[tarantool-patches] [PATCH v2 2/2] ctl: added functionality to detect and prune dead replicas

Olga Arkhangelskaia arkholga at tarantool.org
Fri Oct 12 22:45:57 MSK 2018


Added replicaset_list_wasted(), replica_displace(uuid),
replicaset_trim(uuid_table) functions.

replicaset_list_wasted() - returns table of dead replicas.
We maintain two values: last active write time  - law, and last active
read - lar. lar/law is time that passed from now till last read/write (activity
of the applier and relay). Tome is measured in hours. Values can be
found in box.info.replication. We do not maintain lar/law for current
replica. If replicaset_list_wasted() is called it compares lar/law/abs(lar-law)
with replication_dead_gap/replication_rw_gap. If this values exceeds
gaps replica is supposed to be dead. And is stored in the resulting
table.

The resulting table cam be passed to replicaset_trim(uuid_table). In
this case all replicas from the table will be thrown away from the system
space.

If one knows that some replica is dead it's uuid can be passed to
replica_displace(uuid). In such case it will be thrown away from suystem
space.

Closes #3110
---
 src/box/CMakeLists.txt         |   1 +
 src/box/lua/ctl.lua            |  58 ++++++++++
 src/box/lua/info.c             |  10 ++
 src/box/lua/init.c             |   2 +
 src/box/relay.cc               |   6 ++
 src/box/relay.h                |   4 +
 test/replication/trim.lua      |  66 ++++++++++++
 test/replication/trim.result   | 237 +++++++++++++++++++++++++++++++++++++++++
 test/replication/trim.test.lua |  93 ++++++++++++++++
 test/replication/trim1.lua     |   1 +
 test/replication/trim2.lua     |   1 +
 test/replication/trim3.lua     |   1 +
 test/replication/trim4.lua     |   1 +
 13 files changed, 481 insertions(+)
 create mode 100644 src/box/lua/ctl.lua
 create mode 100644 test/replication/trim.lua
 create mode 100644 test/replication/trim.result
 create mode 100644 test/replication/trim.test.lua
 create mode 120000 test/replication/trim1.lua
 create mode 120000 test/replication/trim2.lua
 create mode 120000 test/replication/trim3.lua
 create mode 120000 test/replication/trim4.lua

diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index 52413d3cf..1daa1798e 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -11,6 +11,7 @@ lua_source(lua_sources lua/net_box.lua)
 lua_source(lua_sources lua/upgrade.lua)
 lua_source(lua_sources lua/console.lua)
 lua_source(lua_sources lua/xlog.lua)
+lua_source(lua_sources lua/ctl.lua)
 set(bin_sources)
 bin_source(bin_sources bootstrap.snap bootstrap.h)
 
diff --git a/src/box/lua/ctl.lua b/src/box/lua/ctl.lua
new file mode 100644
index 000000000..852de91d1
--- /dev/null
+++ b/src/box/lua/ctl.lua
@@ -0,0 +1,58 @@
+-- ctl.lua (internal file)
+
+dead_gap = 0
+rw_gap = 0
+
+local function is_dead(replica)
+    -- no information about applier and relay
+    if replica.lar == nil and replica.law == nil then return true end
+
+    -- time between last active read and now exceeds dead_gap
+    if replica.lar > dead_gap then return true end
+
+    -- time between last active write and now exceeds dead_gap
+    if replica.law > dead_gap then return true end
+
+    -- something happened to relay or applier
+    if math.abs(replica.lar - replica.law) > rw_gap then return true end
+
+    return false
+end
+
+-- return list of replicas suspected to be dead
+function replicaset_list_wasted()
+    dead_gap = box.cfg.replication_dead_gap
+    rw_gap = box.cfg.replication_rw_gap
+    if dead_gap == 0 or rw_gap == 0 then
+         error("replication_dead_gap and replication_rw_gap must be set")
+    end
+    local wasted_list = {}
+    local replicaset = box.info.replication
+    for i, replica in pairs(replicaset) do
+        -- current replica is alive
+        if replica.uuid ~=  box.info.uuid and is_dead(replica) then
+            table.insert(wasted_list, replica.uuid)
+        end
+    end
+    return wasted_list
+end
+
+-- throw away any replica from system space
+function replica_displace(uuid)
+    if uuid == nil then
+        error("Usage: replica_displace([uuid])")
+    end
+    box.space._cluster.index.uuid:delete{uuid}
+end
+
+-- delete table of dead replica obtained from replicaset_list_wasted() or
+-- formed by admin
+function replicaset_trim(uuid_table)
+    if uuid_table == nil then
+        error("Usage: replicaset_trim([uuid_table])")
+    end
+   for i in pairs(uuid_table) do
+        print("Deleting replica with uuid ".. i.. " "..uuid_table[i])
+        replica_displace(uuid_table[i])
+    end
+end
diff --git a/src/box/lua/info.c b/src/box/lua/info.c
index 655768ec4..c35e72aa9 100644
--- a/src/box/lua/info.c
+++ b/src/box/lua/info.c
@@ -145,6 +145,16 @@ lbox_pushreplica(lua_State *L, struct replica *replica)
 	luaL_pushuint64(L, vclock_get(&replicaset.vclock, replica->id));
 	lua_settable(L, -3);
 
+	if (applier != NULL && applier->state != APPLIER_OFF) {
+		lua_pushstring(L, "lar");
+		lua_pushnumber(L, (ev_monotonic_now(loop()) - applier->last_row_time)/3600.0);
+		lua_settable(L, -3);
+	}
+	if (relay != NULL && relay_get_state(relay) != RELAY_OFF) {
+		lua_pushstring(L, "law");
+		lua_pushnumber(L, (ev_monotonic_now(loop()) - relay_get_lrt(relay))/3600.0);
+		lua_settable(L, -3);
+	}
 	if (applier != NULL && applier->state != APPLIER_OFF) {
 		lua_pushstring(L, "upstream");
 		lbox_pushapplier(L, applier);
diff --git a/src/box/lua/init.c b/src/box/lua/init.c
index 694b5bfd3..55910a85b 100644
--- a/src/box/lua/init.c
+++ b/src/box/lua/init.c
@@ -62,6 +62,7 @@
 extern char session_lua[],
 	tuple_lua[],
 	schema_lua[],
+	ctl_lua[],
 	load_cfg_lua[],
 	xlog_lua[],
 	checkpoint_daemon_lua[],
@@ -81,6 +82,7 @@ static const char *lua_sources[] = {
 	"box/console", console_lua,
 	"box/load_cfg", load_cfg_lua,
 	"box/xlog", xlog_lua,
+	"box/ctl", ctl_lua,
 	NULL
 };
 
diff --git a/src/box/relay.cc b/src/box/relay.cc
index d5df487eb..f20972003 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -146,6 +146,12 @@ relay_get_diag(struct relay *relay)
 	return &relay->diag;
 }
 
+double
+relay_get_lrt(struct relay *relay)
+{
+	return relay->last_row_tm;
+}
+
 enum relay_state
 relay_get_state(const struct relay *relay)
 {
diff --git a/src/box/relay.h b/src/box/relay.h
index 53bf68eb8..0d7324ca1 100644
--- a/src/box/relay.h
+++ b/src/box/relay.h
@@ -73,6 +73,10 @@ relay_delete(struct relay *relay);
 struct diag*
 relay_get_diag(struct relay *relay);
 
+/** Get relay's last row time */
+double
+relay_get_lrt(struct relay *relay);
+
 /** Return the current state of relay. */
 enum relay_state
 relay_get_state(const struct relay *relay);
diff --git a/test/replication/trim.lua b/test/replication/trim.lua
new file mode 100644
index 000000000..1fb9cc6d2
--- /dev/null
+++ b/test/replication/trim.lua
@@ -0,0 +1,66 @@
+#!/usr/bin/env tarantool
+
+-- get instance name from filename (trim1.lua => trim1)
+local INSTANCE_ID = string.match(arg[0], "%d")
+local SOCKET_DIR = require('fio').cwd()
+local TIMEOUT = tonumber(arg[1])
+local CON_TIMEOUT = arg[2] and tonumber(arg[2]) or 30.0
+
+local function instance_uri(instance_id)
+    --return 'localhost:'..(3310 + instance_id)
+    return SOCKET_DIR..'/trim'..instance_id..'.sock';
+end
+
+require('console').listen(os.getenv('ADMIN'))
+
+box.cfg({
+    listen = instance_uri(INSTANCE_ID);
+    replication_timeout = TIMEOUT;
+    replication_connect_timeout = CON_TIMEOUT;
+    replication = {
+        instance_uri(1);
+        instance_uri(2);
+        instance_uri(3);
+        instance_uri(4);
+    };
+})
+
+box.once("bootstrap", function()
+    local test_run = require('test_run').new()
+    box.schema.user.grant("guest", 'replication')
+    box.schema.space.create('test', {engine = test_run:get_cfg('engine')})
+    box.space.test:create_index('primary')
+end)
+
+
+fiber = require('fiber')
+function wait()
+   local i = 80
+   while i~= 0 do fiber.sleep(0.05) i = i - 1 end
+end
+
+function find_wasted_by_law(uuid)
+    for i, info in pairs(box.info.replication) do
+        if info.uuid == uuid then
+            return info.law > box.cfg.replication_dead_gap
+        end
+    end
+    return false
+end
+
+function find_wasted_by_lar(uuid)
+    for i, info in pairs(box.info.replication) do
+        if info.uuid == uuid then
+            return info.lar > box.cfg.replication_dead_gap
+        end
+    end
+    return false
+end
+
+function find_wasted_by_rw(uuid)
+    for i, info in pairs(box.info.replication) do
+        if info.uuid == uuid then
+            return math.abs(info.law - info.lar) > box.cfg.replication_rw_gap
+        end
+    end
+end
diff --git a/test/replication/trim.result b/test/replication/trim.result
new file mode 100644
index 000000000..69563898f
--- /dev/null
+++ b/test/replication/trim.result
@@ -0,0 +1,237 @@
+test_run = require('test_run').new()
+---
+...
+SERVERS = {'trim1', 'trim2', 'trim3', 'trim4'}
+---
+...
+-- Deploy cluster
+test_run:create_cluster(SERVERS, "replication", {args="0.1"})
+---
+...
+test_run:wait_fullmesh(SERVERS)
+---
+...
+test_run:cmd('switch trim1')
+---
+- true
+...
+box.space._cluster:len() == 4
+---
+- true
+...
+-- errors
+replicaset_list_wasted()
+---
+- error: 'builtin/box/ctl.lua:27: replication_dead_gap and replication_rw_gap must
+    be set'
+...
+replica_displace()
+---
+- error: 'builtin/box/ctl.lua:43: Usage: replica_displace([uuid])'
+...
+-- set dead/rw gap
+box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10}
+---
+...
+-- stop replication
+test_run:cmd('switch trim4')
+---
+- true
+...
+replication = box.cfg.replication
+---
+...
+box.cfg{replication = {}}
+---
+...
+test_run:cmd('switch trim1')
+---
+- true
+...
+-- must be empty
+table.getn(replicaset_list_wasted()) == 0
+---
+- true
+...
+-- need time to fulfill dead_gap
+wait()
+---
+...
+wasted_replica = replicaset_list_wasted()
+---
+...
+table.getn(wasted_replica) == 1
+---
+- true
+...
+-- found by law
+find_wasted_by_law(wasted_replica[1])
+---
+- true
+...
+find_wasted_by_rw(wasted_replica[1])
+---
+- false
+...
+find_wasted_by_lar(wasted_replica[1])
+---
+- false
+...
+--turn on replication and see empty wasted list
+test_run:cmd('switch trim4')
+---
+- true
+...
+box.cfg{replication = replication}
+---
+...
+test_run:cmd('switch trim1')
+---
+- true
+...
+table.getn(replicaset_list_wasted()) == 0
+---
+- true
+...
+-- look at rw_gap
+box.cfg{replication_dead_gap = 10, replication_rw_gap = 0.001}
+---
+...
+test_run:cmd('switch trim4')
+---
+- true
+...
+box.cfg{replication = {}}
+---
+...
+test_run:cmd('switch trim1')
+---
+- true
+...
+wait()
+---
+...
+table.getn(replicaset_list_wasted()) == 1
+---
+- true
+...
+find_wasted_by_rw(wasted_replica[1])
+---
+- true
+...
+find_wasted_by_law(wasted_replica[1])
+---
+- false
+...
+find_wasted_by_lar(wasted_replica[1])
+---
+- false
+...
+-- look at lar
+test_run:cmd('switch trim4')
+---
+- true
+...
+box.cfg{replication = replication}
+---
+...
+test_run:cmd('switch trim1')
+---
+- true
+...
+table.getn(replicaset_list_wasted()) == 0
+---
+- true
+...
+box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10}
+---
+...
+test_run:cmd('stop server trim4')
+---
+- true
+...
+table.getn(replicaset_list_wasted()) == 0
+---
+- true
+...
+wait()
+---
+...
+wasted_replica = replicaset_list_wasted()
+---
+...
+table.getn(wasted_replica)  == 1
+---
+- true
+...
+find_wasted_by_lar(wasted_replica[1])
+---
+- true
+...
+find_wasted_by_rw(wasted_replica[1])
+---
+- false
+...
+find_wasted_by_law(wasted_replica[1])
+---
+- true
+...
+-- throw away dead replicas
+-- delete given replica
+box.space._cluster:len() == 4
+---
+- true
+...
+replica_displace(wasted_replica[1])
+---
+...
+box.space._cluster:len() == 3
+---
+- true
+...
+-- trim replicaset
+test_run:cmd('stop server trim2')
+---
+- true
+...
+test_run:cmd('stop server trim3')
+---
+- true
+...
+wait()
+---
+...
+trim_set = replicaset_list_wasted()
+---
+...
+table.getn(trim_set) == 2
+---
+- true
+...
+replicaset_trim(trim_set)
+---
+...
+box.space._cluster:len() == 1
+---
+- true
+...
+-- Cleanup
+test_run:cmd('start server trim2')
+---
+- true
+...
+test_run:cmd('start server trim3')
+---
+- true
+...
+test_run:cmd('start server trim4')
+---
+- true
+...
+test_run:cmd('switch default')
+---
+- true
+...
+test_run:drop_cluster(SERVERS)
+---
+...
diff --git a/test/replication/trim.test.lua b/test/replication/trim.test.lua
new file mode 100644
index 000000000..35dbb8590
--- /dev/null
+++ b/test/replication/trim.test.lua
@@ -0,0 +1,93 @@
+test_run = require('test_run').new()
+
+SERVERS = {'trim1', 'trim2', 'trim3', 'trim4'}
+
+
+-- Deploy cluster
+test_run:create_cluster(SERVERS, "replication", {args="0.1"})
+test_run:wait_fullmesh(SERVERS)
+
+test_run:cmd('switch trim1')
+box.space._cluster:len() == 4
+-- errors
+replicaset_list_wasted()
+replica_displace()
+
+-- set dead/rw gap
+box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10}
+
+-- stop replication
+test_run:cmd('switch trim4')
+replication = box.cfg.replication
+box.cfg{replication = {}}
+
+test_run:cmd('switch trim1')
+-- must be empty
+table.getn(replicaset_list_wasted()) == 0
+-- need time to fulfill dead_gap
+wait()
+wasted_replica = replicaset_list_wasted()
+table.getn(wasted_replica) == 1
+
+-- found by law
+find_wasted_by_law(wasted_replica[1])
+find_wasted_by_rw(wasted_replica[1])
+find_wasted_by_lar(wasted_replica[1])
+
+--turn on replication and see empty wasted list
+test_run:cmd('switch trim4')
+box.cfg{replication = replication}
+test_run:cmd('switch trim1')
+table.getn(replicaset_list_wasted()) == 0
+
+-- look at rw_gap
+box.cfg{replication_dead_gap = 10, replication_rw_gap = 0.001}
+test_run:cmd('switch trim4')
+box.cfg{replication = {}}
+test_run:cmd('switch trim1')
+wait()
+table.getn(replicaset_list_wasted()) == 1
+
+find_wasted_by_rw(wasted_replica[1])
+find_wasted_by_law(wasted_replica[1])
+find_wasted_by_lar(wasted_replica[1])
+
+-- look at lar
+test_run:cmd('switch trim4')
+box.cfg{replication = replication}
+test_run:cmd('switch trim1')
+table.getn(replicaset_list_wasted()) == 0
+box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10}
+test_run:cmd('stop server trim4')
+table.getn(replicaset_list_wasted()) == 0
+wait()
+wasted_replica = replicaset_list_wasted()
+table.getn(wasted_replica)  == 1
+
+find_wasted_by_lar(wasted_replica[1])
+find_wasted_by_rw(wasted_replica[1])
+find_wasted_by_law(wasted_replica[1])
+
+-- throw away dead replicas
+-- delete given replica
+box.space._cluster:len() == 4
+replica_displace(wasted_replica[1])
+box.space._cluster:len() == 3
+
+-- trim replicaset
+test_run:cmd('stop server trim2')
+test_run:cmd('stop server trim3')
+
+wait()
+trim_set = replicaset_list_wasted()
+table.getn(trim_set) == 2
+replicaset_trim(trim_set)
+box.space._cluster:len() == 1
+
+-- Cleanup
+test_run:cmd('start server trim2')
+test_run:cmd('start server trim3')
+test_run:cmd('start server trim4')
+
+test_run:cmd('switch default')
+test_run:drop_cluster(SERVERS)
diff --git a/test/replication/trim1.lua b/test/replication/trim1.lua
new file mode 120000
index 000000000..14e98bd68
--- /dev/null
+++ b/test/replication/trim1.lua
@@ -0,0 +1 @@
+trim.lua
\ No newline at end of file
diff --git a/test/replication/trim2.lua b/test/replication/trim2.lua
new file mode 120000
index 000000000..14e98bd68
--- /dev/null
+++ b/test/replication/trim2.lua
@@ -0,0 +1 @@
+trim.lua
\ No newline at end of file
diff --git a/test/replication/trim3.lua b/test/replication/trim3.lua
new file mode 120000
index 000000000..14e98bd68
--- /dev/null
+++ b/test/replication/trim3.lua
@@ -0,0 +1 @@
+trim.lua
\ No newline at end of file
diff --git a/test/replication/trim4.lua b/test/replication/trim4.lua
new file mode 120000
index 000000000..14e98bd68
--- /dev/null
+++ b/test/replication/trim4.lua
@@ -0,0 +1 @@
+trim.lua
\ No newline at end of file
-- 
2.14.3 (Apple Git-98)





More information about the Tarantool-patches mailing list