From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 1693F2C7EF for ; Fri, 12 Oct 2018 15:46:52 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 85KqRmh5LNDX for ; Fri, 12 Oct 2018 15:46:51 -0400 (EDT) Received: from smtp52.i.mail.ru (smtp52.i.mail.ru [94.100.177.112]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 3A8832C659 for ; Fri, 12 Oct 2018 15:46:51 -0400 (EDT) From: Olga Arkhangelskaia Subject: [tarantool-patches] [PATCH v2 2/2] ctl: added functionality to detect and prune dead replicas Date: Fri, 12 Oct 2018 22:45:57 +0300 Message-Id: <20181012194557.7445-3-arkholga@tarantool.org> In-Reply-To: <20181012194557.7445-1-arkholga@tarantool.org> References: <20181012194557.7445-1-arkholga@tarantool.org> Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: Olga Arkhangelskaia Added replicaset_list_wasted(), replica_displace(uuid), replicaset_trim(uuid_table) functions. replicaset_list_wasted() - returns table of dead replicas. We maintain two values: last active write time - law, and last active read - lar. lar/law is time that passed from now till last read/write (activity of the applier and relay). Tome is measured in hours. Values can be found in box.info.replication. We do not maintain lar/law for current replica. If replicaset_list_wasted() is called it compares lar/law/abs(lar-law) with replication_dead_gap/replication_rw_gap. If this values exceeds gaps replica is supposed to be dead. And is stored in the resulting table. The resulting table cam be passed to replicaset_trim(uuid_table). In this case all replicas from the table will be thrown away from the system space. If one knows that some replica is dead it's uuid can be passed to replica_displace(uuid). In such case it will be thrown away from suystem space. Closes #3110 --- src/box/CMakeLists.txt | 1 + src/box/lua/ctl.lua | 58 ++++++++++ src/box/lua/info.c | 10 ++ src/box/lua/init.c | 2 + src/box/relay.cc | 6 ++ src/box/relay.h | 4 + test/replication/trim.lua | 66 ++++++++++++ test/replication/trim.result | 237 +++++++++++++++++++++++++++++++++++++++++ test/replication/trim.test.lua | 93 ++++++++++++++++ test/replication/trim1.lua | 1 + test/replication/trim2.lua | 1 + test/replication/trim3.lua | 1 + test/replication/trim4.lua | 1 + 13 files changed, 481 insertions(+) create mode 100644 src/box/lua/ctl.lua create mode 100644 test/replication/trim.lua create mode 100644 test/replication/trim.result create mode 100644 test/replication/trim.test.lua create mode 120000 test/replication/trim1.lua create mode 120000 test/replication/trim2.lua create mode 120000 test/replication/trim3.lua create mode 120000 test/replication/trim4.lua diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index 52413d3cf..1daa1798e 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -11,6 +11,7 @@ lua_source(lua_sources lua/net_box.lua) lua_source(lua_sources lua/upgrade.lua) lua_source(lua_sources lua/console.lua) lua_source(lua_sources lua/xlog.lua) +lua_source(lua_sources lua/ctl.lua) set(bin_sources) bin_source(bin_sources bootstrap.snap bootstrap.h) diff --git a/src/box/lua/ctl.lua b/src/box/lua/ctl.lua new file mode 100644 index 000000000..852de91d1 --- /dev/null +++ b/src/box/lua/ctl.lua @@ -0,0 +1,58 @@ +-- ctl.lua (internal file) + +dead_gap = 0 +rw_gap = 0 + +local function is_dead(replica) + -- no information about applier and relay + if replica.lar == nil and replica.law == nil then return true end + + -- time between last active read and now exceeds dead_gap + if replica.lar > dead_gap then return true end + + -- time between last active write and now exceeds dead_gap + if replica.law > dead_gap then return true end + + -- something happened to relay or applier + if math.abs(replica.lar - replica.law) > rw_gap then return true end + + return false +end + +-- return list of replicas suspected to be dead +function replicaset_list_wasted() + dead_gap = box.cfg.replication_dead_gap + rw_gap = box.cfg.replication_rw_gap + if dead_gap == 0 or rw_gap == 0 then + error("replication_dead_gap and replication_rw_gap must be set") + end + local wasted_list = {} + local replicaset = box.info.replication + for i, replica in pairs(replicaset) do + -- current replica is alive + if replica.uuid ~= box.info.uuid and is_dead(replica) then + table.insert(wasted_list, replica.uuid) + end + end + return wasted_list +end + +-- throw away any replica from system space +function replica_displace(uuid) + if uuid == nil then + error("Usage: replica_displace([uuid])") + end + box.space._cluster.index.uuid:delete{uuid} +end + +-- delete table of dead replica obtained from replicaset_list_wasted() or +-- formed by admin +function replicaset_trim(uuid_table) + if uuid_table == nil then + error("Usage: replicaset_trim([uuid_table])") + end + for i in pairs(uuid_table) do + print("Deleting replica with uuid ".. i.. " "..uuid_table[i]) + replica_displace(uuid_table[i]) + end +end diff --git a/src/box/lua/info.c b/src/box/lua/info.c index 655768ec4..c35e72aa9 100644 --- a/src/box/lua/info.c +++ b/src/box/lua/info.c @@ -145,6 +145,16 @@ lbox_pushreplica(lua_State *L, struct replica *replica) luaL_pushuint64(L, vclock_get(&replicaset.vclock, replica->id)); lua_settable(L, -3); + if (applier != NULL && applier->state != APPLIER_OFF) { + lua_pushstring(L, "lar"); + lua_pushnumber(L, (ev_monotonic_now(loop()) - applier->last_row_time)/3600.0); + lua_settable(L, -3); + } + if (relay != NULL && relay_get_state(relay) != RELAY_OFF) { + lua_pushstring(L, "law"); + lua_pushnumber(L, (ev_monotonic_now(loop()) - relay_get_lrt(relay))/3600.0); + lua_settable(L, -3); + } if (applier != NULL && applier->state != APPLIER_OFF) { lua_pushstring(L, "upstream"); lbox_pushapplier(L, applier); diff --git a/src/box/lua/init.c b/src/box/lua/init.c index 694b5bfd3..55910a85b 100644 --- a/src/box/lua/init.c +++ b/src/box/lua/init.c @@ -62,6 +62,7 @@ extern char session_lua[], tuple_lua[], schema_lua[], + ctl_lua[], load_cfg_lua[], xlog_lua[], checkpoint_daemon_lua[], @@ -81,6 +82,7 @@ static const char *lua_sources[] = { "box/console", console_lua, "box/load_cfg", load_cfg_lua, "box/xlog", xlog_lua, + "box/ctl", ctl_lua, NULL }; diff --git a/src/box/relay.cc b/src/box/relay.cc index d5df487eb..f20972003 100644 --- a/src/box/relay.cc +++ b/src/box/relay.cc @@ -146,6 +146,12 @@ relay_get_diag(struct relay *relay) return &relay->diag; } +double +relay_get_lrt(struct relay *relay) +{ + return relay->last_row_tm; +} + enum relay_state relay_get_state(const struct relay *relay) { diff --git a/src/box/relay.h b/src/box/relay.h index 53bf68eb8..0d7324ca1 100644 --- a/src/box/relay.h +++ b/src/box/relay.h @@ -73,6 +73,10 @@ relay_delete(struct relay *relay); struct diag* relay_get_diag(struct relay *relay); +/** Get relay's last row time */ +double +relay_get_lrt(struct relay *relay); + /** Return the current state of relay. */ enum relay_state relay_get_state(const struct relay *relay); diff --git a/test/replication/trim.lua b/test/replication/trim.lua new file mode 100644 index 000000000..1fb9cc6d2 --- /dev/null +++ b/test/replication/trim.lua @@ -0,0 +1,66 @@ +#!/usr/bin/env tarantool + +-- get instance name from filename (trim1.lua => trim1) +local INSTANCE_ID = string.match(arg[0], "%d") +local SOCKET_DIR = require('fio').cwd() +local TIMEOUT = tonumber(arg[1]) +local CON_TIMEOUT = arg[2] and tonumber(arg[2]) or 30.0 + +local function instance_uri(instance_id) + --return 'localhost:'..(3310 + instance_id) + return SOCKET_DIR..'/trim'..instance_id..'.sock'; +end + +require('console').listen(os.getenv('ADMIN')) + +box.cfg({ + listen = instance_uri(INSTANCE_ID); + replication_timeout = TIMEOUT; + replication_connect_timeout = CON_TIMEOUT; + replication = { + instance_uri(1); + instance_uri(2); + instance_uri(3); + instance_uri(4); + }; +}) + +box.once("bootstrap", function() + local test_run = require('test_run').new() + box.schema.user.grant("guest", 'replication') + box.schema.space.create('test', {engine = test_run:get_cfg('engine')}) + box.space.test:create_index('primary') +end) + + +fiber = require('fiber') +function wait() + local i = 80 + while i~= 0 do fiber.sleep(0.05) i = i - 1 end +end + +function find_wasted_by_law(uuid) + for i, info in pairs(box.info.replication) do + if info.uuid == uuid then + return info.law > box.cfg.replication_dead_gap + end + end + return false +end + +function find_wasted_by_lar(uuid) + for i, info in pairs(box.info.replication) do + if info.uuid == uuid then + return info.lar > box.cfg.replication_dead_gap + end + end + return false +end + +function find_wasted_by_rw(uuid) + for i, info in pairs(box.info.replication) do + if info.uuid == uuid then + return math.abs(info.law - info.lar) > box.cfg.replication_rw_gap + end + end +end diff --git a/test/replication/trim.result b/test/replication/trim.result new file mode 100644 index 000000000..69563898f --- /dev/null +++ b/test/replication/trim.result @@ -0,0 +1,237 @@ +test_run = require('test_run').new() +--- +... +SERVERS = {'trim1', 'trim2', 'trim3', 'trim4'} +--- +... +-- Deploy cluster +test_run:create_cluster(SERVERS, "replication", {args="0.1"}) +--- +... +test_run:wait_fullmesh(SERVERS) +--- +... +test_run:cmd('switch trim1') +--- +- true +... +box.space._cluster:len() == 4 +--- +- true +... +-- errors +replicaset_list_wasted() +--- +- error: 'builtin/box/ctl.lua:27: replication_dead_gap and replication_rw_gap must + be set' +... +replica_displace() +--- +- error: 'builtin/box/ctl.lua:43: Usage: replica_displace([uuid])' +... +-- set dead/rw gap +box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10} +--- +... +-- stop replication +test_run:cmd('switch trim4') +--- +- true +... +replication = box.cfg.replication +--- +... +box.cfg{replication = {}} +--- +... +test_run:cmd('switch trim1') +--- +- true +... +-- must be empty +table.getn(replicaset_list_wasted()) == 0 +--- +- true +... +-- need time to fulfill dead_gap +wait() +--- +... +wasted_replica = replicaset_list_wasted() +--- +... +table.getn(wasted_replica) == 1 +--- +- true +... +-- found by law +find_wasted_by_law(wasted_replica[1]) +--- +- true +... +find_wasted_by_rw(wasted_replica[1]) +--- +- false +... +find_wasted_by_lar(wasted_replica[1]) +--- +- false +... +--turn on replication and see empty wasted list +test_run:cmd('switch trim4') +--- +- true +... +box.cfg{replication = replication} +--- +... +test_run:cmd('switch trim1') +--- +- true +... +table.getn(replicaset_list_wasted()) == 0 +--- +- true +... +-- look at rw_gap +box.cfg{replication_dead_gap = 10, replication_rw_gap = 0.001} +--- +... +test_run:cmd('switch trim4') +--- +- true +... +box.cfg{replication = {}} +--- +... +test_run:cmd('switch trim1') +--- +- true +... +wait() +--- +... +table.getn(replicaset_list_wasted()) == 1 +--- +- true +... +find_wasted_by_rw(wasted_replica[1]) +--- +- true +... +find_wasted_by_law(wasted_replica[1]) +--- +- false +... +find_wasted_by_lar(wasted_replica[1]) +--- +- false +... +-- look at lar +test_run:cmd('switch trim4') +--- +- true +... +box.cfg{replication = replication} +--- +... +test_run:cmd('switch trim1') +--- +- true +... +table.getn(replicaset_list_wasted()) == 0 +--- +- true +... +box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10} +--- +... +test_run:cmd('stop server trim4') +--- +- true +... +table.getn(replicaset_list_wasted()) == 0 +--- +- true +... +wait() +--- +... +wasted_replica = replicaset_list_wasted() +--- +... +table.getn(wasted_replica) == 1 +--- +- true +... +find_wasted_by_lar(wasted_replica[1]) +--- +- true +... +find_wasted_by_rw(wasted_replica[1]) +--- +- false +... +find_wasted_by_law(wasted_replica[1]) +--- +- true +... +-- throw away dead replicas +-- delete given replica +box.space._cluster:len() == 4 +--- +- true +... +replica_displace(wasted_replica[1]) +--- +... +box.space._cluster:len() == 3 +--- +- true +... +-- trim replicaset +test_run:cmd('stop server trim2') +--- +- true +... +test_run:cmd('stop server trim3') +--- +- true +... +wait() +--- +... +trim_set = replicaset_list_wasted() +--- +... +table.getn(trim_set) == 2 +--- +- true +... +replicaset_trim(trim_set) +--- +... +box.space._cluster:len() == 1 +--- +- true +... +-- Cleanup +test_run:cmd('start server trim2') +--- +- true +... +test_run:cmd('start server trim3') +--- +- true +... +test_run:cmd('start server trim4') +--- +- true +... +test_run:cmd('switch default') +--- +- true +... +test_run:drop_cluster(SERVERS) +--- +... diff --git a/test/replication/trim.test.lua b/test/replication/trim.test.lua new file mode 100644 index 000000000..35dbb8590 --- /dev/null +++ b/test/replication/trim.test.lua @@ -0,0 +1,93 @@ +test_run = require('test_run').new() + +SERVERS = {'trim1', 'trim2', 'trim3', 'trim4'} + + +-- Deploy cluster +test_run:create_cluster(SERVERS, "replication", {args="0.1"}) +test_run:wait_fullmesh(SERVERS) + +test_run:cmd('switch trim1') +box.space._cluster:len() == 4 +-- errors +replicaset_list_wasted() +replica_displace() + +-- set dead/rw gap +box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10} + +-- stop replication +test_run:cmd('switch trim4') +replication = box.cfg.replication +box.cfg{replication = {}} + +test_run:cmd('switch trim1') +-- must be empty +table.getn(replicaset_list_wasted()) == 0 +-- need time to fulfill dead_gap +wait() +wasted_replica = replicaset_list_wasted() +table.getn(wasted_replica) == 1 + +-- found by law +find_wasted_by_law(wasted_replica[1]) +find_wasted_by_rw(wasted_replica[1]) +find_wasted_by_lar(wasted_replica[1]) + +--turn on replication and see empty wasted list +test_run:cmd('switch trim4') +box.cfg{replication = replication} +test_run:cmd('switch trim1') +table.getn(replicaset_list_wasted()) == 0 + +-- look at rw_gap +box.cfg{replication_dead_gap = 10, replication_rw_gap = 0.001} +test_run:cmd('switch trim4') +box.cfg{replication = {}} +test_run:cmd('switch trim1') +wait() +table.getn(replicaset_list_wasted()) == 1 + +find_wasted_by_rw(wasted_replica[1]) +find_wasted_by_law(wasted_replica[1]) +find_wasted_by_lar(wasted_replica[1]) + +-- look at lar +test_run:cmd('switch trim4') +box.cfg{replication = replication} +test_run:cmd('switch trim1') +table.getn(replicaset_list_wasted()) == 0 +box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10} +test_run:cmd('stop server trim4') +table.getn(replicaset_list_wasted()) == 0 +wait() +wasted_replica = replicaset_list_wasted() +table.getn(wasted_replica) == 1 + +find_wasted_by_lar(wasted_replica[1]) +find_wasted_by_rw(wasted_replica[1]) +find_wasted_by_law(wasted_replica[1]) + +-- throw away dead replicas +-- delete given replica +box.space._cluster:len() == 4 +replica_displace(wasted_replica[1]) +box.space._cluster:len() == 3 + +-- trim replicaset +test_run:cmd('stop server trim2') +test_run:cmd('stop server trim3') + +wait() +trim_set = replicaset_list_wasted() +table.getn(trim_set) == 2 +replicaset_trim(trim_set) +box.space._cluster:len() == 1 + +-- Cleanup +test_run:cmd('start server trim2') +test_run:cmd('start server trim3') +test_run:cmd('start server trim4') + +test_run:cmd('switch default') +test_run:drop_cluster(SERVERS) diff --git a/test/replication/trim1.lua b/test/replication/trim1.lua new file mode 120000 index 000000000..14e98bd68 --- /dev/null +++ b/test/replication/trim1.lua @@ -0,0 +1 @@ +trim.lua \ No newline at end of file diff --git a/test/replication/trim2.lua b/test/replication/trim2.lua new file mode 120000 index 000000000..14e98bd68 --- /dev/null +++ b/test/replication/trim2.lua @@ -0,0 +1 @@ +trim.lua \ No newline at end of file diff --git a/test/replication/trim3.lua b/test/replication/trim3.lua new file mode 120000 index 000000000..14e98bd68 --- /dev/null +++ b/test/replication/trim3.lua @@ -0,0 +1 @@ +trim.lua \ No newline at end of file diff --git a/test/replication/trim4.lua b/test/replication/trim4.lua new file mode 120000 index 000000000..14e98bd68 --- /dev/null +++ b/test/replication/trim4.lua @@ -0,0 +1 @@ +trim.lua \ No newline at end of file -- 2.14.3 (Apple Git-98)