[tarantool-patches] [PATCH rfc] schema: add possibility to find and throw away dead replicas

Olga Arkhangelskaia arkholga at tarantool.org
Fri Sep 21 21:25:03 MSK 2018


Adds possibility to get list of alive replicas in a replicaset,
prune from box.space_cluster those who is not considered as alive,
and if one has doubts see state of replicaset.

Replica is considered alive if it is just added, its status after
timeout period is not stopped or disconnected. However it it has both
roles (master and replica) we consider such instance dead only if its
upstream and downstream status is stopped or disconnected.

If replica is considered dead we can prune its uuid from _cluster space.
If one not sure if the replica is dead or is there is any activity on it
it is possible to list replicas with its role, status and lsn
statistics.

If you have some ideas how else we can/should decide whether replica is dead
please share. 

Closes #3110
---

https://github.com/tarantool/tarantool/issues/3110
https://github.com/tarantool/tarantool/tree/OKriw/gh-3110-prune-dead-replica-from-replicaset-1.10

 src/box/lua/schema.lua               | 145 +++++++++++++++++++++++++++++++++++
 test/replication/prune_dead.lua      |  49 ++++++++++++
 test/replication/prune_dead.result   | 123 +++++++++++++++++++++++++++++
 test/replication/prune_dead.test.lua |  46 +++++++++++
 test/replication/prune_dead1.lua     |   1 +
 test/replication/prune_dead2.lua     |   1 +
 test/replication/prune_dead3.lua     |   1 +
 test/replication/suite.cfg           |   1 +
 8 files changed, 367 insertions(+)
 create mode 100644 test/replication/prune_dead.lua
 create mode 100644 test/replication/prune_dead.result
 create mode 100644 test/replication/prune_dead.test.lua
 create mode 120000 test/replication/prune_dead1.lua
 create mode 120000 test/replication/prune_dead2.lua
 create mode 120000 test/replication/prune_dead3.lua

diff --git a/src/box/lua/schema.lua b/src/box/lua/schema.lua
index 540a2a5fd..f1c46de7a 100644
--- a/src/box/lua/schema.lua
+++ b/src/box/lua/schema.lua
@@ -7,7 +7,9 @@ local fun = require('fun')
 local log = require('log')
 local fio = require('fio')
 local json = require('json')
+local fiber = require('fiber')
 local session = box.session
+local fiber = require('fiber')
 local internal = require('box.internal')
 local function setmap(table)
     return setmetatable(table, { __serialize = 'map' })
@@ -2431,3 +2433,146 @@ box.feedback.save = function(file_name)
 end
 
 box.NULL = msgpack.NULL
+
+--
+-- prune dead replicas from replicaset
+--
+box.replication = {}
+
+local function is_alive (replica_info)
+    -- current replica
+    if replica_info ~= nil and replica_info.uuid == box.info.uuid then
+        -- current replica is alive.
+        return true
+    end
+
+    -- no information is available
+    if replica_info == nil then return false end
+
+    -- roles
+    local master = false
+    local replica = false
+    if (replica_info.downstream ~= nil) then master = true end
+    if (replica_info.upstream ~= nil) then replica = true end
+    -- if no up/downstream information is available and this is not current replica
+    -- there is 2 possibilities - dead replica or cascade topology. We do not recommend
+    -- use it, so we decide that such replica is dead
+    if (not master and not replica) then return false end
+
+    -- only replica
+    if replica and not master then
+        if ((replica_info.upstream.status == "disconnected" or
+             replica_info.upstream.status == "stopped")) then
+             return false
+        end
+    end
+
+    -- master
+    if (master and replica_info.downstream.status ~= nil) then
+        if (not replica) then
+            return false
+        elseif (replica_info.upstream.status == "disconnected" or
+                replica_info.upstream.status == "stopped") then
+            return false
+        end
+    end
+
+    return true
+end
+
+-- list replica with lsn delta within given period
+-- this adds a additional info to decide whether everything is ok with replica
+box.replication.list_replicas = function(timeout)
+    if timeout ~= nil then
+        if (type(timeout) ~= 'number' or timeout <= 0) then
+            error('Usage: box.replication.list_replicas([timeout]). Timeout should be positive value')
+        end
+    else
+        error('No timeout is specified')
+    end
+
+    local replicas = {} -- uuid, id, status, lsn activity delta, role
+    local old_info = box.info.replication
+    local new_info = old_info
+    fiber.sleep(timeout)
+    new_info = box.info.replication
+
+    for i, new in pairs(new_info) do
+       local active = "N"
+       local old = old_info[i]
+       local up = "-"
+       local down = "-"
+       local role = ''
+       if new.upstream ~= nil then
+           up = new.upstream.status
+           role = "R"
+       end
+
+       if new.downstream ~= nil then
+           role = string.format("%sM", role)
+           if new.downstream.status ~=nil then
+               down = new.downstream.status
+           end
+
+       end
+
+       if new.uuid == box.info.uuid then
+           up = box.info.status
+       end
+       if new.lsn - old.lsn > 0 then
+           active = "Y"
+       end
+       local line = string.format("id: %s uuid: %s status: %s/%s  active: %s role: %s",
+                                  new.id, new.uuid, up, down, active, role)
+       table.insert(replicas, line)
+    end
+    return replicas
+
+end
+
+-- return  uuid id table of replicas that is assumed to be alive.
+-- Decision is based on status. However, one should use list_replicas to
+-- look on lsn change and status of current replica to form table of alive replicas
+-- that can be passed to box.replication.prune_replicas
+box.replication.get_alive_replicas = function(timeout)
+    if timeout ~= nil then
+        if type(timeout) ~= 'number' or timeout <= 0 then
+            error('Usage: box.replication.get_alive_replicas([timeout]). Timeout should be positive value')
+        end
+    else
+        error('No timeout is specified')
+    end
+
+    local alive = {}
+    local info_old = box.info.replication
+    local info_new = box.info.replication
+    fiber.sleep(timeout)
+    info_new = box.info.replication
+    for i, new_value in pairs(info_new) do
+        local old_value = info_old[i]
+        if old_value == nil or old_value.uuid ~= new_value.uuid then
+            -- Replica was added during waiting period. We can't compare it with previous status.
+            -- We should assume it alive despite its status.
+            -- UUID wouldn't match only if old_replica was deleted and new replica was added at this time.
+            -- If the old replica was recovered with new id, we assume it alive too.
+            table.insert(alive, new_value.uuid)
+        elseif is_alive(new_value) then
+             table.insert(alive, new_value.uuid)
+        end
+    end
+    return alive
+end
+
+--deletes given replica or replicas from _cluster space
+--replicas should be passed as table of [uuid: id]
+--will fail
+box.replication.prune_replicas = function(alive_replicas)
+    if type(alive_replicas) ~= 'table' then
+        error("Usage: box.replication.prune_dead_replicas(alive_replicas)")
+    end
+    for _, tuple in box.space._cluster:pairs() do
+        if alive_replicas[tuple[1]] == nil then
+            box.space._cluster.index.uuid:delete{tuple[2]}
+        end
+    end
+end
diff --git a/test/replication/prune_dead.lua b/test/replication/prune_dead.lua
new file mode 100644
index 000000000..cb4b9ee15
--- /dev/null
+++ b/test/replication/prune_dead.lua
@@ -0,0 +1,49 @@
+#!/usr/bin/env tarantool
+
+-- get instance name from filename (prune_dead.lua => prune_dead1)
+local INSTANCE_ID = string.match(arg[0], "%d")
+
+local SOCKET_DIR = require('fio').cwd()
+
+local function instance_uri(instance_id)
+    --return 'localhost:'..(3310 + instance_id)
+    return SOCKET_DIR..'/prune_dead'..instance_id..'.sock';
+end
+
+-- start console first
+require('console').listen(os.getenv('ADMIN'))
+
+box.cfg({
+    listen = instance_uri(INSTANCE_ID);
+    replication = {
+        instance_uri(1);
+        instance_uri(2);
+        instance_uri(3);
+    };
+})
+
+TIMEOUT = 0.01
+
+box.once("bootstrap", function()
+    local test_run = require('test_run').new()
+    box.schema.user.grant("guest", 'replication')
+    box.schema.space.create('test', {engine = test_run:get_cfg('engine')})
+    box.space.test:create_index('primary')
+end)
+
+-- helper functions
+function contains (uuid_table, value)
+    for i = 1, table.getn(uuid_table) do
+        if (uuid_table[i] == value) then return true end
+    end
+    return false
+end
+
+function find_excess (uuid_all, uuid_alive)
+    local i = 1
+    while (i <= table.getn(uuid_alive)) do
+        if (not contains(uuid_alive, uuid_all[i])) then return i  end
+        i = i + 1
+    end
+    return i
+end
diff --git a/test/replication/prune_dead.result b/test/replication/prune_dead.result
new file mode 100644
index 000000000..90c912e21
--- /dev/null
+++ b/test/replication/prune_dead.result
@@ -0,0 +1,123 @@
+test_run = require('test_run').new()
+---
+...
+fiber = require('fiber')
+---
+...
+SERVERS = {'prune_dead1', 'prune_dead2', 'prune_dead3'}
+---
+...
+-- Deploy cluster
+test_run:create_cluster(SERVERS, "replication")
+---
+...
+test_run:wait_fullmesh(SERVERS)
+---
+...
+-- check that we can monitor replica set and all replicas are alive
+test_run:cmd('switch prune_dead1')
+---
+- true
+...
+alive = box.replication.get_alive_replicas(TIMEOUT)
+---
+...
+table.getn(alive) == box.space._cluster:count()
+---
+- true
+...
+box.info.replication[1].uuid == alive[1]
+---
+- true
+...
+box.info.replication[2].uuid == alive[2]
+---
+- true
+...
+box.info.replication[3].uuid == alive[3]
+---
+- true
+...
+-- check if we turn off replication replica is considered as alive
+test_run:cmd('switch prune_dead2')
+---
+- true
+...
+replication = box.cfg.replication
+---
+...
+box.cfg{replication = ''}
+---
+...
+test_run:cmd('switch prune_dead1')
+---
+- true
+...
+alive = box.replication.get_alive_replicas(TIMEOUT)
+---
+...
+table.getn(alive) == box.space._cluster:count()
+---
+- true
+...
+test_run:cmd('switch prune_dead2')
+---
+- true
+...
+box.cfg{replication = replication}
+---
+...
+test_run:cmd('switch default')
+---
+- true
+...
+test_run:wait_fullmesh(SERVERS)
+---
+...
+-- stop replica to see that is not in alive list
+test_run:cmd('stop server prune_dead2')
+---
+- true
+...
+test_run:cmd('switch prune_dead1')
+---
+- true
+...
+alive = box.replication.get_alive_replicas(TIMEOUT)
+---
+...
+table.getn(alive) < box.space._cluster:count()
+---
+- true
+...
+all = {box.info.replication[1].uuid, box.info.replication[2].uuid, box.info.replication[3].uuid}
+---
+...
+box.info.replication[find_excess(all, alive)].upstream.status == "disconnected"
+---
+- true
+...
+box.info.replication[find_excess(all, alive)].downstream.status == "stopped"
+---
+- true
+...
+-- prune  dead replica
+box.replication.prune_replicas(alive)
+---
+...
+table.getn(alive) == box.space._cluster:count()
+---
+- true
+...
+-- Cleanup
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd('start server prune_dead2')
+---
+- true
+...
+test_run:drop_cluster(SERVERS)
+---
+...
diff --git a/test/replication/prune_dead.test.lua b/test/replication/prune_dead.test.lua
new file mode 100644
index 000000000..f480472ae
--- /dev/null
+++ b/test/replication/prune_dead.test.lua
@@ -0,0 +1,46 @@
+test_run = require('test_run').new()
+fiber = require('fiber')
+
+SERVERS = {'prune_dead1', 'prune_dead2', 'prune_dead3'}
+
+-- Deploy cluster
+test_run:create_cluster(SERVERS, "replication")
+test_run:wait_fullmesh(SERVERS)
+
+-- check that we can monitor replica set and all replicas are alive
+test_run:cmd('switch prune_dead1')
+alive = box.replication.get_alive_replicas(TIMEOUT)
+table.getn(alive) == box.space._cluster:count()
+box.info.replication[1].uuid == alive[1]
+box.info.replication[2].uuid == alive[2]
+box.info.replication[3].uuid == alive[3]
+
+-- check if we turn off replication replica is considered as alive
+test_run:cmd('switch prune_dead2')
+replication = box.cfg.replication
+box.cfg{replication = ''}
+test_run:cmd('switch prune_dead1')
+alive = box.replication.get_alive_replicas(TIMEOUT)
+table.getn(alive) == box.space._cluster:count()
+test_run:cmd('switch prune_dead2')
+box.cfg{replication = replication}
+test_run:cmd('switch default')
+test_run:wait_fullmesh(SERVERS)
+
+-- stop replica to see that is not in alive list
+test_run:cmd('stop server prune_dead2')
+test_run:cmd('switch prune_dead1')
+alive = box.replication.get_alive_replicas(TIMEOUT)
+table.getn(alive) < box.space._cluster:count()
+all = {box.info.replication[1].uuid, box.info.replication[2].uuid, box.info.replication[3].uuid}
+box.info.replication[find_excess(all, alive)].upstream.status == "disconnected"
+box.info.replication[find_excess(all, alive)].downstream.status == "stopped"
+
+-- prune  dead replica
+box.replication.prune_replicas(alive)
+table.getn(alive) == box.space._cluster:count()
+
+-- Cleanup
+test_run:cmd("switch default")
+test_run:cmd('start server prune_dead2')
+test_run:drop_cluster(SERVERS)
diff --git a/test/replication/prune_dead1.lua b/test/replication/prune_dead1.lua
new file mode 120000
index 000000000..20f09ea62
--- /dev/null
+++ b/test/replication/prune_dead1.lua
@@ -0,0 +1 @@
+prune_dead.lua
\ No newline at end of file
diff --git a/test/replication/prune_dead2.lua b/test/replication/prune_dead2.lua
new file mode 120000
index 000000000..20f09ea62
--- /dev/null
+++ b/test/replication/prune_dead2.lua
@@ -0,0 +1 @@
+prune_dead.lua
\ No newline at end of file
diff --git a/test/replication/prune_dead3.lua b/test/replication/prune_dead3.lua
new file mode 120000
index 000000000..20f09ea62
--- /dev/null
+++ b/test/replication/prune_dead3.lua
@@ -0,0 +1 @@
+prune_dead.lua
\ No newline at end of file
diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg
index 95e94e5a2..f819eedd9 100644
--- a/test/replication/suite.cfg
+++ b/test/replication/suite.cfg
@@ -6,6 +6,7 @@
     "wal_off.test.lua": {},
     "hot_standby.test.lua": {},
     "rebootstrap.test.lua": {},
+    "prune_dead.test.lua": {},
     "*": {
         "memtx": {"engine": "memtx"},
         "vinyl": {"engine": "vinyl"}
-- 
2.14.3 (Apple Git-98)





More information about the Tarantool-patches mailing list