From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id DC1261FA5B for ; Fri, 21 Sep 2018 14:25:21 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 5uvmqde-4CmI for ; Fri, 21 Sep 2018 14:25:21 -0400 (EDT) Received: from smtp16.mail.ru (smtp16.mail.ru [94.100.176.153]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 72DF72B787 for ; Fri, 21 Sep 2018 14:25:21 -0400 (EDT) From: Olga Arkhangelskaia Subject: [tarantool-patches] [PATCH rfc] schema: add possibility to find and throw away dead replicas Date: Fri, 21 Sep 2018 21:25:03 +0300 Message-Id: <20180921182503.14027-1-arkholga@tarantool.org> Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: Olga Arkhangelskaia Adds possibility to get list of alive replicas in a replicaset, prune from box.space_cluster those who is not considered as alive, and if one has doubts see state of replicaset. Replica is considered alive if it is just added, its status after timeout period is not stopped or disconnected. However it it has both roles (master and replica) we consider such instance dead only if its upstream and downstream status is stopped or disconnected. If replica is considered dead we can prune its uuid from _cluster space. If one not sure if the replica is dead or is there is any activity on it it is possible to list replicas with its role, status and lsn statistics. If you have some ideas how else we can/should decide whether replica is dead please share. Closes #3110 --- https://github.com/tarantool/tarantool/issues/3110 https://github.com/tarantool/tarantool/tree/OKriw/gh-3110-prune-dead-replica-from-replicaset-1.10 src/box/lua/schema.lua | 145 +++++++++++++++++++++++++++++++++++ test/replication/prune_dead.lua | 49 ++++++++++++ test/replication/prune_dead.result | 123 +++++++++++++++++++++++++++++ test/replication/prune_dead.test.lua | 46 +++++++++++ test/replication/prune_dead1.lua | 1 + test/replication/prune_dead2.lua | 1 + test/replication/prune_dead3.lua | 1 + test/replication/suite.cfg | 1 + 8 files changed, 367 insertions(+) create mode 100644 test/replication/prune_dead.lua create mode 100644 test/replication/prune_dead.result create mode 100644 test/replication/prune_dead.test.lua create mode 120000 test/replication/prune_dead1.lua create mode 120000 test/replication/prune_dead2.lua create mode 120000 test/replication/prune_dead3.lua diff --git a/src/box/lua/schema.lua b/src/box/lua/schema.lua index 540a2a5fd..f1c46de7a 100644 --- a/src/box/lua/schema.lua +++ b/src/box/lua/schema.lua @@ -7,7 +7,9 @@ local fun = require('fun') local log = require('log') local fio = require('fio') local json = require('json') +local fiber = require('fiber') local session = box.session +local fiber = require('fiber') local internal = require('box.internal') local function setmap(table) return setmetatable(table, { __serialize = 'map' }) @@ -2431,3 +2433,146 @@ box.feedback.save = function(file_name) end box.NULL = msgpack.NULL + +-- +-- prune dead replicas from replicaset +-- +box.replication = {} + +local function is_alive (replica_info) + -- current replica + if replica_info ~= nil and replica_info.uuid == box.info.uuid then + -- current replica is alive. + return true + end + + -- no information is available + if replica_info == nil then return false end + + -- roles + local master = false + local replica = false + if (replica_info.downstream ~= nil) then master = true end + if (replica_info.upstream ~= nil) then replica = true end + -- if no up/downstream information is available and this is not current replica + -- there is 2 possibilities - dead replica or cascade topology. We do not recommend + -- use it, so we decide that such replica is dead + if (not master and not replica) then return false end + + -- only replica + if replica and not master then + if ((replica_info.upstream.status == "disconnected" or + replica_info.upstream.status == "stopped")) then + return false + end + end + + -- master + if (master and replica_info.downstream.status ~= nil) then + if (not replica) then + return false + elseif (replica_info.upstream.status == "disconnected" or + replica_info.upstream.status == "stopped") then + return false + end + end + + return true +end + +-- list replica with lsn delta within given period +-- this adds a additional info to decide whether everything is ok with replica +box.replication.list_replicas = function(timeout) + if timeout ~= nil then + if (type(timeout) ~= 'number' or timeout <= 0) then + error('Usage: box.replication.list_replicas([timeout]). Timeout should be positive value') + end + else + error('No timeout is specified') + end + + local replicas = {} -- uuid, id, status, lsn activity delta, role + local old_info = box.info.replication + local new_info = old_info + fiber.sleep(timeout) + new_info = box.info.replication + + for i, new in pairs(new_info) do + local active = "N" + local old = old_info[i] + local up = "-" + local down = "-" + local role = '' + if new.upstream ~= nil then + up = new.upstream.status + role = "R" + end + + if new.downstream ~= nil then + role = string.format("%sM", role) + if new.downstream.status ~=nil then + down = new.downstream.status + end + + end + + if new.uuid == box.info.uuid then + up = box.info.status + end + if new.lsn - old.lsn > 0 then + active = "Y" + end + local line = string.format("id: %s uuid: %s status: %s/%s active: %s role: %s", + new.id, new.uuid, up, down, active, role) + table.insert(replicas, line) + end + return replicas + +end + +-- return uuid id table of replicas that is assumed to be alive. +-- Decision is based on status. However, one should use list_replicas to +-- look on lsn change and status of current replica to form table of alive replicas +-- that can be passed to box.replication.prune_replicas +box.replication.get_alive_replicas = function(timeout) + if timeout ~= nil then + if type(timeout) ~= 'number' or timeout <= 0 then + error('Usage: box.replication.get_alive_replicas([timeout]). Timeout should be positive value') + end + else + error('No timeout is specified') + end + + local alive = {} + local info_old = box.info.replication + local info_new = box.info.replication + fiber.sleep(timeout) + info_new = box.info.replication + for i, new_value in pairs(info_new) do + local old_value = info_old[i] + if old_value == nil or old_value.uuid ~= new_value.uuid then + -- Replica was added during waiting period. We can't compare it with previous status. + -- We should assume it alive despite its status. + -- UUID wouldn't match only if old_replica was deleted and new replica was added at this time. + -- If the old replica was recovered with new id, we assume it alive too. + table.insert(alive, new_value.uuid) + elseif is_alive(new_value) then + table.insert(alive, new_value.uuid) + end + end + return alive +end + +--deletes given replica or replicas from _cluster space +--replicas should be passed as table of [uuid: id] +--will fail +box.replication.prune_replicas = function(alive_replicas) + if type(alive_replicas) ~= 'table' then + error("Usage: box.replication.prune_dead_replicas(alive_replicas)") + end + for _, tuple in box.space._cluster:pairs() do + if alive_replicas[tuple[1]] == nil then + box.space._cluster.index.uuid:delete{tuple[2]} + end + end +end diff --git a/test/replication/prune_dead.lua b/test/replication/prune_dead.lua new file mode 100644 index 000000000..cb4b9ee15 --- /dev/null +++ b/test/replication/prune_dead.lua @@ -0,0 +1,49 @@ +#!/usr/bin/env tarantool + +-- get instance name from filename (prune_dead.lua => prune_dead1) +local INSTANCE_ID = string.match(arg[0], "%d") + +local SOCKET_DIR = require('fio').cwd() + +local function instance_uri(instance_id) + --return 'localhost:'..(3310 + instance_id) + return SOCKET_DIR..'/prune_dead'..instance_id..'.sock'; +end + +-- start console first +require('console').listen(os.getenv('ADMIN')) + +box.cfg({ + listen = instance_uri(INSTANCE_ID); + replication = { + instance_uri(1); + instance_uri(2); + instance_uri(3); + }; +}) + +TIMEOUT = 0.01 + +box.once("bootstrap", function() + local test_run = require('test_run').new() + box.schema.user.grant("guest", 'replication') + box.schema.space.create('test', {engine = test_run:get_cfg('engine')}) + box.space.test:create_index('primary') +end) + +-- helper functions +function contains (uuid_table, value) + for i = 1, table.getn(uuid_table) do + if (uuid_table[i] == value) then return true end + end + return false +end + +function find_excess (uuid_all, uuid_alive) + local i = 1 + while (i <= table.getn(uuid_alive)) do + if (not contains(uuid_alive, uuid_all[i])) then return i end + i = i + 1 + end + return i +end diff --git a/test/replication/prune_dead.result b/test/replication/prune_dead.result new file mode 100644 index 000000000..90c912e21 --- /dev/null +++ b/test/replication/prune_dead.result @@ -0,0 +1,123 @@ +test_run = require('test_run').new() +--- +... +fiber = require('fiber') +--- +... +SERVERS = {'prune_dead1', 'prune_dead2', 'prune_dead3'} +--- +... +-- Deploy cluster +test_run:create_cluster(SERVERS, "replication") +--- +... +test_run:wait_fullmesh(SERVERS) +--- +... +-- check that we can monitor replica set and all replicas are alive +test_run:cmd('switch prune_dead1') +--- +- true +... +alive = box.replication.get_alive_replicas(TIMEOUT) +--- +... +table.getn(alive) == box.space._cluster:count() +--- +- true +... +box.info.replication[1].uuid == alive[1] +--- +- true +... +box.info.replication[2].uuid == alive[2] +--- +- true +... +box.info.replication[3].uuid == alive[3] +--- +- true +... +-- check if we turn off replication replica is considered as alive +test_run:cmd('switch prune_dead2') +--- +- true +... +replication = box.cfg.replication +--- +... +box.cfg{replication = ''} +--- +... +test_run:cmd('switch prune_dead1') +--- +- true +... +alive = box.replication.get_alive_replicas(TIMEOUT) +--- +... +table.getn(alive) == box.space._cluster:count() +--- +- true +... +test_run:cmd('switch prune_dead2') +--- +- true +... +box.cfg{replication = replication} +--- +... +test_run:cmd('switch default') +--- +- true +... +test_run:wait_fullmesh(SERVERS) +--- +... +-- stop replica to see that is not in alive list +test_run:cmd('stop server prune_dead2') +--- +- true +... +test_run:cmd('switch prune_dead1') +--- +- true +... +alive = box.replication.get_alive_replicas(TIMEOUT) +--- +... +table.getn(alive) < box.space._cluster:count() +--- +- true +... +all = {box.info.replication[1].uuid, box.info.replication[2].uuid, box.info.replication[3].uuid} +--- +... +box.info.replication[find_excess(all, alive)].upstream.status == "disconnected" +--- +- true +... +box.info.replication[find_excess(all, alive)].downstream.status == "stopped" +--- +- true +... +-- prune dead replica +box.replication.prune_replicas(alive) +--- +... +table.getn(alive) == box.space._cluster:count() +--- +- true +... +-- Cleanup +test_run:cmd("switch default") +--- +- true +... +test_run:cmd('start server prune_dead2') +--- +- true +... +test_run:drop_cluster(SERVERS) +--- +... diff --git a/test/replication/prune_dead.test.lua b/test/replication/prune_dead.test.lua new file mode 100644 index 000000000..f480472ae --- /dev/null +++ b/test/replication/prune_dead.test.lua @@ -0,0 +1,46 @@ +test_run = require('test_run').new() +fiber = require('fiber') + +SERVERS = {'prune_dead1', 'prune_dead2', 'prune_dead3'} + +-- Deploy cluster +test_run:create_cluster(SERVERS, "replication") +test_run:wait_fullmesh(SERVERS) + +-- check that we can monitor replica set and all replicas are alive +test_run:cmd('switch prune_dead1') +alive = box.replication.get_alive_replicas(TIMEOUT) +table.getn(alive) == box.space._cluster:count() +box.info.replication[1].uuid == alive[1] +box.info.replication[2].uuid == alive[2] +box.info.replication[3].uuid == alive[3] + +-- check if we turn off replication replica is considered as alive +test_run:cmd('switch prune_dead2') +replication = box.cfg.replication +box.cfg{replication = ''} +test_run:cmd('switch prune_dead1') +alive = box.replication.get_alive_replicas(TIMEOUT) +table.getn(alive) == box.space._cluster:count() +test_run:cmd('switch prune_dead2') +box.cfg{replication = replication} +test_run:cmd('switch default') +test_run:wait_fullmesh(SERVERS) + +-- stop replica to see that is not in alive list +test_run:cmd('stop server prune_dead2') +test_run:cmd('switch prune_dead1') +alive = box.replication.get_alive_replicas(TIMEOUT) +table.getn(alive) < box.space._cluster:count() +all = {box.info.replication[1].uuid, box.info.replication[2].uuid, box.info.replication[3].uuid} +box.info.replication[find_excess(all, alive)].upstream.status == "disconnected" +box.info.replication[find_excess(all, alive)].downstream.status == "stopped" + +-- prune dead replica +box.replication.prune_replicas(alive) +table.getn(alive) == box.space._cluster:count() + +-- Cleanup +test_run:cmd("switch default") +test_run:cmd('start server prune_dead2') +test_run:drop_cluster(SERVERS) diff --git a/test/replication/prune_dead1.lua b/test/replication/prune_dead1.lua new file mode 120000 index 000000000..20f09ea62 --- /dev/null +++ b/test/replication/prune_dead1.lua @@ -0,0 +1 @@ +prune_dead.lua \ No newline at end of file diff --git a/test/replication/prune_dead2.lua b/test/replication/prune_dead2.lua new file mode 120000 index 000000000..20f09ea62 --- /dev/null +++ b/test/replication/prune_dead2.lua @@ -0,0 +1 @@ +prune_dead.lua \ No newline at end of file diff --git a/test/replication/prune_dead3.lua b/test/replication/prune_dead3.lua new file mode 120000 index 000000000..20f09ea62 --- /dev/null +++ b/test/replication/prune_dead3.lua @@ -0,0 +1 @@ +prune_dead.lua \ No newline at end of file diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg index 95e94e5a2..f819eedd9 100644 --- a/test/replication/suite.cfg +++ b/test/replication/suite.cfg @@ -6,6 +6,7 @@ "wal_off.test.lua": {}, "hot_standby.test.lua": {}, "rebootstrap.test.lua": {}, + "prune_dead.test.lua": {}, "*": { "memtx": {"engine": "memtx"}, "vinyl": {"engine": "vinyl"} -- 2.14.3 (Apple Git-98)