* [tarantool-patches] [PATCH v2 1/2] box: added replication_dead/rw_gap options
2018-10-12 19:45 [tarantool-patches] [PATCH v2 0/2] detect and throw away dead replicas Olga Arkhangelskaia
@ 2018-10-12 19:45 ` Olga Arkhangelskaia
2018-10-15 10:22 ` Vladimir Davydov
2018-10-23 7:10 ` [tarantool-patches] " Konstantin Osipov
2018-10-12 19:45 ` [tarantool-patches] [PATCH v2 2/2] ctl: added functionality to detect and prune dead replicas Olga Arkhangelskaia
1 sibling, 2 replies; 6+ messages in thread
From: Olga Arkhangelskaia @ 2018-10-12 19:45 UTC (permalink / raw)
To: tarantool-patches; +Cc: Olga Arkhangelskaia
In scope of gh-3110 we need options that store periods of time,
to be compared with time of last activity of relay and applier.
This patch introduces replication_dead_gap and replication_rw_gap options.
replication_dead_gap is configured in box.cfg, with default 0 value.
If time that passed from now till last reader/writer activity of given replica
exceeds replication_dead_gap value, replica is suspected to be dead.
replication_dead_gap is measured in hours.
replication_rw_gap is configured in box.cfg, with default 0 value.
If time difference between last reader activity and last writer activity of
given replica exceeds replication_rw_gap value, replica is suspected to be dead.
replication_rw_gap is measured in hours.
---
src/box/box.cc | 34 ++++++++++++++++++++++++++++++++++
src/box/box.h | 2 ++
src/box/lua/cfg.cc | 24 ++++++++++++++++++++++++
src/box/lua/load_cfg.lua | 8 ++++++++
src/box/replication.cc | 3 ++-
src/box/replication.h | 12 ++++++++++++
test/box/admin.result | 4 ++++
test/box/cfg.result | 8 ++++++++
8 files changed, 94 insertions(+), 1 deletion(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index 7e32b9fc7..f74e012f7 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -465,6 +465,28 @@ box_check_replication_sync_timeout(void)
return timeout;
}
+static double
+box_check_replication_dead_gap(void)
+{
+ double gap = cfg_getd("replication_dead_gap");
+ if (gap <= 0) {
+ tnt_raise(ClientError, ER_CFG, "replication_dead_gap",
+ "the value must be grater than 0");
+ }
+ return gap;
+}
+
+static double
+box_check_replication_rw_gap(void)
+{
+ double gap = cfg_getd("replication_rw_gap");
+ if (gap <= 0) {
+ tnt_raise(ClientError, ER_CFG, "replication_dead_gap",
+ "the value must be grater than 0");
+ }
+ return gap;
+}
+
static void
box_check_instance_uuid(struct tt_uuid *uuid)
{
@@ -739,6 +761,18 @@ box_set_replication_sync_timeout(void)
replication_sync_timeout = box_check_replication_sync_timeout();
}
+void
+box_set_replication_dead_gap(void)
+{
+ replication_dead_gap = box_check_replication_dead_gap();
+}
+
+void
+box_set_replication_rw_gap(void)
+{
+ replication_rw_gap = box_check_replication_rw_gap();
+}
+
void
box_set_replication_skip_conflict(void)
{
diff --git a/src/box/box.h b/src/box/box.h
index 9930d4a1a..bfb5bb873 100644
--- a/src/box/box.h
+++ b/src/box/box.h
@@ -198,6 +198,8 @@ void box_set_replication_connect_quorum(void);
void box_set_replication_sync_lag(void);
void box_set_replication_sync_timeout(void);
void box_set_replication_skip_conflict(void);
+void box_set_replication_dead_gap(void);
+void box_set_replication_rw_gap(void);
void box_set_net_msg_max(void);
extern "C" {
diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
index c3825591c..f34b34bee 100644
--- a/src/box/lua/cfg.cc
+++ b/src/box/lua/cfg.cc
@@ -316,6 +316,28 @@ lbox_cfg_set_replication_sync_timeout(struct lua_State *L)
return 0;
}
+static int
+lbox_cfg_set_replication_dead_gap(struct lua_State *L)
+{
+ try {
+ box_set_replication_dead_gap();
+ } catch (Exception *) {
+ luaT_error(L);
+ }
+ return 0;
+}
+
+static int
+lbox_cfg_set_replication_rw_gap(struct lua_State *L)
+{
+ try {
+ box_set_replication_rw_gap();
+ } catch (Exception *) {
+ luaT_error(L);
+ }
+ return 0;
+}
+
static int
lbox_cfg_set_replication_skip_conflict(struct lua_State *L)
{
@@ -353,6 +375,8 @@ box_lua_cfg_init(struct lua_State *L)
{"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag},
{"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout},
{"cfg_set_replication_skip_conflict", lbox_cfg_set_replication_skip_conflict},
+ {"cfg_set_replication_dead_gap", lbox_cfg_set_replication_dead_gap},
+ {"cfg_set_replication_rw_gap", lbox_cfg_set_replication_rw_gap},
{"cfg_set_net_msg_max", lbox_cfg_set_net_msg_max},
{NULL, NULL}
};
diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
index f62f4dc1e..c15769dfe 100644
--- a/src/box/lua/load_cfg.lua
+++ b/src/box/lua/load_cfg.lua
@@ -76,6 +76,8 @@ local default_cfg = {
replication_connect_timeout = 30,
replication_connect_quorum = nil, -- connect all
replication_skip_conflict = false,
+ replication_dead_gap = 0,
+ replication_rw_gap = 0,
feedback_enabled = true,
feedback_host = "https://feedback.tarantool.io",
feedback_interval = 3600,
@@ -138,6 +140,8 @@ local template_cfg = {
replication_connect_timeout = 'number',
replication_connect_quorum = 'number',
replication_skip_conflict = 'boolean',
+ replication_dead_gap = 'number',
+ replication_rw_gap = 'number',
feedback_enabled = 'boolean',
feedback_host = 'string',
feedback_interval = 'number',
@@ -232,6 +236,8 @@ local dynamic_cfg = {
replication_sync_lag = private.cfg_set_replication_sync_lag,
replication_sync_timeout = private.cfg_set_replication_sync_timeout,
replication_skip_conflict = private.cfg_set_replication_skip_conflict,
+ replication_dead_gap = private.cfg_set_replication_dead_gap,
+ replication_rw_gap = private.cfg_set_replication_rw_gap,
instance_uuid = check_instance_uuid,
replicaset_uuid = check_replicaset_uuid,
net_msg_max = private.cfg_set_net_msg_max,
@@ -248,6 +254,8 @@ local dynamic_cfg_skip_at_load = {
replication_connect_quorum = true,
replication_sync_lag = true,
replication_sync_timeout = true,
+ replication_dead_gap = true,
+ replication_rw_gap = true,
wal_dir_rescan_delay = true,
custom_proc_title = true,
force_recovery = true,
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 2cb4ec0f8..392f8d9fd 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -53,7 +53,8 @@ int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
double replication_sync_lag = 10.0; /* seconds */
double replication_sync_timeout = 300.0; /* seconds */
bool replication_skip_conflict = false;
-
+double replication_dead_gap = 0.0; /* hours */
+double replication_rw_gap = 0.0; /* hours */
struct replicaset replicaset;
static int
diff --git a/src/box/replication.h b/src/box/replication.h
index 2ac620d86..bf0c99fb7 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -137,6 +137,18 @@ extern double replication_sync_timeout;
*/
extern bool replication_skip_conflict;
+/*
+ * If replica is not active during time that exceeds replication_dead_gap it
+ * is considered as dead replica and can be thrown out from system space.
+ */
+extern double replication_dead_gap;
+
+/*
+ * If replica has both roles and gap between read/write activities exceeds this
+ * value it can be considered as dead one.
+ */
+extern double replication_rw_gap;
+
/**
* Wait for the given period of time before trying to reconnect
* to a master.
diff --git a/test/box/admin.result b/test/box/admin.result
index 8048460a1..5341e6f78 100644
--- a/test/box/admin.result
+++ b/test/box/admin.result
@@ -64,6 +64,10 @@ cfg_filter(box.cfg)
- 16320
- - replication_connect_timeout
- 30
+ - - replication_dead_gap
+ - 0
+ - - replication_rw_gap
+ - 0
- - replication_skip_conflict
- false
- - replication_sync_lag
diff --git a/test/box/cfg.result b/test/box/cfg.result
index 515033754..2f6bcd788 100644
--- a/test/box/cfg.result
+++ b/test/box/cfg.result
@@ -60,6 +60,10 @@ cfg_filter(box.cfg)
- 16320
- - replication_connect_timeout
- 30
+ - - replication_dead_gap
+ - 0
+ - - replication_rw_gap
+ - 0
- - replication_skip_conflict
- false
- - replication_sync_lag
@@ -161,6 +165,10 @@ cfg_filter(box.cfg)
- 16320
- - replication_connect_timeout
- 30
+ - - replication_dead_gap
+ - 0
+ - - replication_rw_gap
+ - 0
- - replication_skip_conflict
- false
- - replication_sync_lag
--
2.14.3 (Apple Git-98)
^ permalink raw reply [flat|nested] 6+ messages in thread
* [tarantool-patches] [PATCH v2 2/2] ctl: added functionality to detect and prune dead replicas
2018-10-12 19:45 [tarantool-patches] [PATCH v2 0/2] detect and throw away dead replicas Olga Arkhangelskaia
2018-10-12 19:45 ` [tarantool-patches] [PATCH v2 1/2] box: added replication_dead/rw_gap options Olga Arkhangelskaia
@ 2018-10-12 19:45 ` Olga Arkhangelskaia
2018-10-15 12:43 ` Vladimir Davydov
1 sibling, 1 reply; 6+ messages in thread
From: Olga Arkhangelskaia @ 2018-10-12 19:45 UTC (permalink / raw)
To: tarantool-patches; +Cc: Olga Arkhangelskaia
Added replicaset_list_wasted(), replica_displace(uuid),
replicaset_trim(uuid_table) functions.
replicaset_list_wasted() - returns table of dead replicas.
We maintain two values: last active write time - law, and last active
read - lar. lar/law is time that passed from now till last read/write (activity
of the applier and relay). Tome is measured in hours. Values can be
found in box.info.replication. We do not maintain lar/law for current
replica. If replicaset_list_wasted() is called it compares lar/law/abs(lar-law)
with replication_dead_gap/replication_rw_gap. If this values exceeds
gaps replica is supposed to be dead. And is stored in the resulting
table.
The resulting table cam be passed to replicaset_trim(uuid_table). In
this case all replicas from the table will be thrown away from the system
space.
If one knows that some replica is dead it's uuid can be passed to
replica_displace(uuid). In such case it will be thrown away from suystem
space.
Closes #3110
---
src/box/CMakeLists.txt | 1 +
src/box/lua/ctl.lua | 58 ++++++++++
src/box/lua/info.c | 10 ++
src/box/lua/init.c | 2 +
src/box/relay.cc | 6 ++
src/box/relay.h | 4 +
test/replication/trim.lua | 66 ++++++++++++
test/replication/trim.result | 237 +++++++++++++++++++++++++++++++++++++++++
test/replication/trim.test.lua | 93 ++++++++++++++++
test/replication/trim1.lua | 1 +
test/replication/trim2.lua | 1 +
test/replication/trim3.lua | 1 +
test/replication/trim4.lua | 1 +
13 files changed, 481 insertions(+)
create mode 100644 src/box/lua/ctl.lua
create mode 100644 test/replication/trim.lua
create mode 100644 test/replication/trim.result
create mode 100644 test/replication/trim.test.lua
create mode 120000 test/replication/trim1.lua
create mode 120000 test/replication/trim2.lua
create mode 120000 test/replication/trim3.lua
create mode 120000 test/replication/trim4.lua
diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index 52413d3cf..1daa1798e 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -11,6 +11,7 @@ lua_source(lua_sources lua/net_box.lua)
lua_source(lua_sources lua/upgrade.lua)
lua_source(lua_sources lua/console.lua)
lua_source(lua_sources lua/xlog.lua)
+lua_source(lua_sources lua/ctl.lua)
set(bin_sources)
bin_source(bin_sources bootstrap.snap bootstrap.h)
diff --git a/src/box/lua/ctl.lua b/src/box/lua/ctl.lua
new file mode 100644
index 000000000..852de91d1
--- /dev/null
+++ b/src/box/lua/ctl.lua
@@ -0,0 +1,58 @@
+-- ctl.lua (internal file)
+
+dead_gap = 0
+rw_gap = 0
+
+local function is_dead(replica)
+ -- no information about applier and relay
+ if replica.lar == nil and replica.law == nil then return true end
+
+ -- time between last active read and now exceeds dead_gap
+ if replica.lar > dead_gap then return true end
+
+ -- time between last active write and now exceeds dead_gap
+ if replica.law > dead_gap then return true end
+
+ -- something happened to relay or applier
+ if math.abs(replica.lar - replica.law) > rw_gap then return true end
+
+ return false
+end
+
+-- return list of replicas suspected to be dead
+function replicaset_list_wasted()
+ dead_gap = box.cfg.replication_dead_gap
+ rw_gap = box.cfg.replication_rw_gap
+ if dead_gap == 0 or rw_gap == 0 then
+ error("replication_dead_gap and replication_rw_gap must be set")
+ end
+ local wasted_list = {}
+ local replicaset = box.info.replication
+ for i, replica in pairs(replicaset) do
+ -- current replica is alive
+ if replica.uuid ~= box.info.uuid and is_dead(replica) then
+ table.insert(wasted_list, replica.uuid)
+ end
+ end
+ return wasted_list
+end
+
+-- throw away any replica from system space
+function replica_displace(uuid)
+ if uuid == nil then
+ error("Usage: replica_displace([uuid])")
+ end
+ box.space._cluster.index.uuid:delete{uuid}
+end
+
+-- delete table of dead replica obtained from replicaset_list_wasted() or
+-- formed by admin
+function replicaset_trim(uuid_table)
+ if uuid_table == nil then
+ error("Usage: replicaset_trim([uuid_table])")
+ end
+ for i in pairs(uuid_table) do
+ print("Deleting replica with uuid ".. i.. " "..uuid_table[i])
+ replica_displace(uuid_table[i])
+ end
+end
diff --git a/src/box/lua/info.c b/src/box/lua/info.c
index 655768ec4..c35e72aa9 100644
--- a/src/box/lua/info.c
+++ b/src/box/lua/info.c
@@ -145,6 +145,16 @@ lbox_pushreplica(lua_State *L, struct replica *replica)
luaL_pushuint64(L, vclock_get(&replicaset.vclock, replica->id));
lua_settable(L, -3);
+ if (applier != NULL && applier->state != APPLIER_OFF) {
+ lua_pushstring(L, "lar");
+ lua_pushnumber(L, (ev_monotonic_now(loop()) - applier->last_row_time)/3600.0);
+ lua_settable(L, -3);
+ }
+ if (relay != NULL && relay_get_state(relay) != RELAY_OFF) {
+ lua_pushstring(L, "law");
+ lua_pushnumber(L, (ev_monotonic_now(loop()) - relay_get_lrt(relay))/3600.0);
+ lua_settable(L, -3);
+ }
if (applier != NULL && applier->state != APPLIER_OFF) {
lua_pushstring(L, "upstream");
lbox_pushapplier(L, applier);
diff --git a/src/box/lua/init.c b/src/box/lua/init.c
index 694b5bfd3..55910a85b 100644
--- a/src/box/lua/init.c
+++ b/src/box/lua/init.c
@@ -62,6 +62,7 @@
extern char session_lua[],
tuple_lua[],
schema_lua[],
+ ctl_lua[],
load_cfg_lua[],
xlog_lua[],
checkpoint_daemon_lua[],
@@ -81,6 +82,7 @@ static const char *lua_sources[] = {
"box/console", console_lua,
"box/load_cfg", load_cfg_lua,
"box/xlog", xlog_lua,
+ "box/ctl", ctl_lua,
NULL
};
diff --git a/src/box/relay.cc b/src/box/relay.cc
index d5df487eb..f20972003 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -146,6 +146,12 @@ relay_get_diag(struct relay *relay)
return &relay->diag;
}
+double
+relay_get_lrt(struct relay *relay)
+{
+ return relay->last_row_tm;
+}
+
enum relay_state
relay_get_state(const struct relay *relay)
{
diff --git a/src/box/relay.h b/src/box/relay.h
index 53bf68eb8..0d7324ca1 100644
--- a/src/box/relay.h
+++ b/src/box/relay.h
@@ -73,6 +73,10 @@ relay_delete(struct relay *relay);
struct diag*
relay_get_diag(struct relay *relay);
+/** Get relay's last row time */
+double
+relay_get_lrt(struct relay *relay);
+
/** Return the current state of relay. */
enum relay_state
relay_get_state(const struct relay *relay);
diff --git a/test/replication/trim.lua b/test/replication/trim.lua
new file mode 100644
index 000000000..1fb9cc6d2
--- /dev/null
+++ b/test/replication/trim.lua
@@ -0,0 +1,66 @@
+#!/usr/bin/env tarantool
+
+-- get instance name from filename (trim1.lua => trim1)
+local INSTANCE_ID = string.match(arg[0], "%d")
+local SOCKET_DIR = require('fio').cwd()
+local TIMEOUT = tonumber(arg[1])
+local CON_TIMEOUT = arg[2] and tonumber(arg[2]) or 30.0
+
+local function instance_uri(instance_id)
+ --return 'localhost:'..(3310 + instance_id)
+ return SOCKET_DIR..'/trim'..instance_id..'.sock';
+end
+
+require('console').listen(os.getenv('ADMIN'))
+
+box.cfg({
+ listen = instance_uri(INSTANCE_ID);
+ replication_timeout = TIMEOUT;
+ replication_connect_timeout = CON_TIMEOUT;
+ replication = {
+ instance_uri(1);
+ instance_uri(2);
+ instance_uri(3);
+ instance_uri(4);
+ };
+})
+
+box.once("bootstrap", function()
+ local test_run = require('test_run').new()
+ box.schema.user.grant("guest", 'replication')
+ box.schema.space.create('test', {engine = test_run:get_cfg('engine')})
+ box.space.test:create_index('primary')
+end)
+
+
+fiber = require('fiber')
+function wait()
+ local i = 80
+ while i~= 0 do fiber.sleep(0.05) i = i - 1 end
+end
+
+function find_wasted_by_law(uuid)
+ for i, info in pairs(box.info.replication) do
+ if info.uuid == uuid then
+ return info.law > box.cfg.replication_dead_gap
+ end
+ end
+ return false
+end
+
+function find_wasted_by_lar(uuid)
+ for i, info in pairs(box.info.replication) do
+ if info.uuid == uuid then
+ return info.lar > box.cfg.replication_dead_gap
+ end
+ end
+ return false
+end
+
+function find_wasted_by_rw(uuid)
+ for i, info in pairs(box.info.replication) do
+ if info.uuid == uuid then
+ return math.abs(info.law - info.lar) > box.cfg.replication_rw_gap
+ end
+ end
+end
diff --git a/test/replication/trim.result b/test/replication/trim.result
new file mode 100644
index 000000000..69563898f
--- /dev/null
+++ b/test/replication/trim.result
@@ -0,0 +1,237 @@
+test_run = require('test_run').new()
+---
+...
+SERVERS = {'trim1', 'trim2', 'trim3', 'trim4'}
+---
+...
+-- Deploy cluster
+test_run:create_cluster(SERVERS, "replication", {args="0.1"})
+---
+...
+test_run:wait_fullmesh(SERVERS)
+---
+...
+test_run:cmd('switch trim1')
+---
+- true
+...
+box.space._cluster:len() == 4
+---
+- true
+...
+-- errors
+replicaset_list_wasted()
+---
+- error: 'builtin/box/ctl.lua:27: replication_dead_gap and replication_rw_gap must
+ be set'
+...
+replica_displace()
+---
+- error: 'builtin/box/ctl.lua:43: Usage: replica_displace([uuid])'
+...
+-- set dead/rw gap
+box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10}
+---
+...
+-- stop replication
+test_run:cmd('switch trim4')
+---
+- true
+...
+replication = box.cfg.replication
+---
+...
+box.cfg{replication = {}}
+---
+...
+test_run:cmd('switch trim1')
+---
+- true
+...
+-- must be empty
+table.getn(replicaset_list_wasted()) == 0
+---
+- true
+...
+-- need time to fulfill dead_gap
+wait()
+---
+...
+wasted_replica = replicaset_list_wasted()
+---
+...
+table.getn(wasted_replica) == 1
+---
+- true
+...
+-- found by law
+find_wasted_by_law(wasted_replica[1])
+---
+- true
+...
+find_wasted_by_rw(wasted_replica[1])
+---
+- false
+...
+find_wasted_by_lar(wasted_replica[1])
+---
+- false
+...
+--turn on replication and see empty wasted list
+test_run:cmd('switch trim4')
+---
+- true
+...
+box.cfg{replication = replication}
+---
+...
+test_run:cmd('switch trim1')
+---
+- true
+...
+table.getn(replicaset_list_wasted()) == 0
+---
+- true
+...
+-- look at rw_gap
+box.cfg{replication_dead_gap = 10, replication_rw_gap = 0.001}
+---
+...
+test_run:cmd('switch trim4')
+---
+- true
+...
+box.cfg{replication = {}}
+---
+...
+test_run:cmd('switch trim1')
+---
+- true
+...
+wait()
+---
+...
+table.getn(replicaset_list_wasted()) == 1
+---
+- true
+...
+find_wasted_by_rw(wasted_replica[1])
+---
+- true
+...
+find_wasted_by_law(wasted_replica[1])
+---
+- false
+...
+find_wasted_by_lar(wasted_replica[1])
+---
+- false
+...
+-- look at lar
+test_run:cmd('switch trim4')
+---
+- true
+...
+box.cfg{replication = replication}
+---
+...
+test_run:cmd('switch trim1')
+---
+- true
+...
+table.getn(replicaset_list_wasted()) == 0
+---
+- true
+...
+box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10}
+---
+...
+test_run:cmd('stop server trim4')
+---
+- true
+...
+table.getn(replicaset_list_wasted()) == 0
+---
+- true
+...
+wait()
+---
+...
+wasted_replica = replicaset_list_wasted()
+---
+...
+table.getn(wasted_replica) == 1
+---
+- true
+...
+find_wasted_by_lar(wasted_replica[1])
+---
+- true
+...
+find_wasted_by_rw(wasted_replica[1])
+---
+- false
+...
+find_wasted_by_law(wasted_replica[1])
+---
+- true
+...
+-- throw away dead replicas
+-- delete given replica
+box.space._cluster:len() == 4
+---
+- true
+...
+replica_displace(wasted_replica[1])
+---
+...
+box.space._cluster:len() == 3
+---
+- true
+...
+-- trim replicaset
+test_run:cmd('stop server trim2')
+---
+- true
+...
+test_run:cmd('stop server trim3')
+---
+- true
+...
+wait()
+---
+...
+trim_set = replicaset_list_wasted()
+---
+...
+table.getn(trim_set) == 2
+---
+- true
+...
+replicaset_trim(trim_set)
+---
+...
+box.space._cluster:len() == 1
+---
+- true
+...
+-- Cleanup
+test_run:cmd('start server trim2')
+---
+- true
+...
+test_run:cmd('start server trim3')
+---
+- true
+...
+test_run:cmd('start server trim4')
+---
+- true
+...
+test_run:cmd('switch default')
+---
+- true
+...
+test_run:drop_cluster(SERVERS)
+---
+...
diff --git a/test/replication/trim.test.lua b/test/replication/trim.test.lua
new file mode 100644
index 000000000..35dbb8590
--- /dev/null
+++ b/test/replication/trim.test.lua
@@ -0,0 +1,93 @@
+test_run = require('test_run').new()
+
+SERVERS = {'trim1', 'trim2', 'trim3', 'trim4'}
+
+
+-- Deploy cluster
+test_run:create_cluster(SERVERS, "replication", {args="0.1"})
+test_run:wait_fullmesh(SERVERS)
+
+test_run:cmd('switch trim1')
+box.space._cluster:len() == 4
+-- errors
+replicaset_list_wasted()
+replica_displace()
+
+-- set dead/rw gap
+box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10}
+
+-- stop replication
+test_run:cmd('switch trim4')
+replication = box.cfg.replication
+box.cfg{replication = {}}
+
+test_run:cmd('switch trim1')
+-- must be empty
+table.getn(replicaset_list_wasted()) == 0
+-- need time to fulfill dead_gap
+wait()
+wasted_replica = replicaset_list_wasted()
+table.getn(wasted_replica) == 1
+
+-- found by law
+find_wasted_by_law(wasted_replica[1])
+find_wasted_by_rw(wasted_replica[1])
+find_wasted_by_lar(wasted_replica[1])
+
+--turn on replication and see empty wasted list
+test_run:cmd('switch trim4')
+box.cfg{replication = replication}
+test_run:cmd('switch trim1')
+table.getn(replicaset_list_wasted()) == 0
+
+-- look at rw_gap
+box.cfg{replication_dead_gap = 10, replication_rw_gap = 0.001}
+test_run:cmd('switch trim4')
+box.cfg{replication = {}}
+test_run:cmd('switch trim1')
+wait()
+table.getn(replicaset_list_wasted()) == 1
+
+find_wasted_by_rw(wasted_replica[1])
+find_wasted_by_law(wasted_replica[1])
+find_wasted_by_lar(wasted_replica[1])
+
+-- look at lar
+test_run:cmd('switch trim4')
+box.cfg{replication = replication}
+test_run:cmd('switch trim1')
+table.getn(replicaset_list_wasted()) == 0
+box.cfg{replication_dead_gap = 0.001, replication_rw_gap = 10}
+test_run:cmd('stop server trim4')
+table.getn(replicaset_list_wasted()) == 0
+wait()
+wasted_replica = replicaset_list_wasted()
+table.getn(wasted_replica) == 1
+
+find_wasted_by_lar(wasted_replica[1])
+find_wasted_by_rw(wasted_replica[1])
+find_wasted_by_law(wasted_replica[1])
+
+-- throw away dead replicas
+-- delete given replica
+box.space._cluster:len() == 4
+replica_displace(wasted_replica[1])
+box.space._cluster:len() == 3
+
+-- trim replicaset
+test_run:cmd('stop server trim2')
+test_run:cmd('stop server trim3')
+
+wait()
+trim_set = replicaset_list_wasted()
+table.getn(trim_set) == 2
+replicaset_trim(trim_set)
+box.space._cluster:len() == 1
+
+-- Cleanup
+test_run:cmd('start server trim2')
+test_run:cmd('start server trim3')
+test_run:cmd('start server trim4')
+
+test_run:cmd('switch default')
+test_run:drop_cluster(SERVERS)
diff --git a/test/replication/trim1.lua b/test/replication/trim1.lua
new file mode 120000
index 000000000..14e98bd68
--- /dev/null
+++ b/test/replication/trim1.lua
@@ -0,0 +1 @@
+trim.lua
\ No newline at end of file
diff --git a/test/replication/trim2.lua b/test/replication/trim2.lua
new file mode 120000
index 000000000..14e98bd68
--- /dev/null
+++ b/test/replication/trim2.lua
@@ -0,0 +1 @@
+trim.lua
\ No newline at end of file
diff --git a/test/replication/trim3.lua b/test/replication/trim3.lua
new file mode 120000
index 000000000..14e98bd68
--- /dev/null
+++ b/test/replication/trim3.lua
@@ -0,0 +1 @@
+trim.lua
\ No newline at end of file
diff --git a/test/replication/trim4.lua b/test/replication/trim4.lua
new file mode 120000
index 000000000..14e98bd68
--- /dev/null
+++ b/test/replication/trim4.lua
@@ -0,0 +1 @@
+trim.lua
\ No newline at end of file
--
2.14.3 (Apple Git-98)
^ permalink raw reply [flat|nested] 6+ messages in thread