[tarantool-patches] [PATCH v2 1/2] box: added replication_dead/rw_gap options
Olga Arkhangelskaia
arkholga at tarantool.org
Fri Oct 12 22:45:56 MSK 2018
In scope of gh-3110 we need options that store periods of time,
to be compared with time of last activity of relay and applier.
This patch introduces replication_dead_gap and replication_rw_gap options.
replication_dead_gap is configured in box.cfg, with default 0 value.
If time that passed from now till last reader/writer activity of given replica
exceeds replication_dead_gap value, replica is suspected to be dead.
replication_dead_gap is measured in hours.
replication_rw_gap is configured in box.cfg, with default 0 value.
If time difference between last reader activity and last writer activity of
given replica exceeds replication_rw_gap value, replica is suspected to be dead.
replication_rw_gap is measured in hours.
---
src/box/box.cc | 34 ++++++++++++++++++++++++++++++++++
src/box/box.h | 2 ++
src/box/lua/cfg.cc | 24 ++++++++++++++++++++++++
src/box/lua/load_cfg.lua | 8 ++++++++
src/box/replication.cc | 3 ++-
src/box/replication.h | 12 ++++++++++++
test/box/admin.result | 4 ++++
test/box/cfg.result | 8 ++++++++
8 files changed, 94 insertions(+), 1 deletion(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index 7e32b9fc7..f74e012f7 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -465,6 +465,28 @@ box_check_replication_sync_timeout(void)
return timeout;
}
+static double
+box_check_replication_dead_gap(void)
+{
+ double gap = cfg_getd("replication_dead_gap");
+ if (gap <= 0) {
+ tnt_raise(ClientError, ER_CFG, "replication_dead_gap",
+ "the value must be grater than 0");
+ }
+ return gap;
+}
+
+static double
+box_check_replication_rw_gap(void)
+{
+ double gap = cfg_getd("replication_rw_gap");
+ if (gap <= 0) {
+ tnt_raise(ClientError, ER_CFG, "replication_dead_gap",
+ "the value must be grater than 0");
+ }
+ return gap;
+}
+
static void
box_check_instance_uuid(struct tt_uuid *uuid)
{
@@ -739,6 +761,18 @@ box_set_replication_sync_timeout(void)
replication_sync_timeout = box_check_replication_sync_timeout();
}
+void
+box_set_replication_dead_gap(void)
+{
+ replication_dead_gap = box_check_replication_dead_gap();
+}
+
+void
+box_set_replication_rw_gap(void)
+{
+ replication_rw_gap = box_check_replication_rw_gap();
+}
+
void
box_set_replication_skip_conflict(void)
{
diff --git a/src/box/box.h b/src/box/box.h
index 9930d4a1a..bfb5bb873 100644
--- a/src/box/box.h
+++ b/src/box/box.h
@@ -198,6 +198,8 @@ void box_set_replication_connect_quorum(void);
void box_set_replication_sync_lag(void);
void box_set_replication_sync_timeout(void);
void box_set_replication_skip_conflict(void);
+void box_set_replication_dead_gap(void);
+void box_set_replication_rw_gap(void);
void box_set_net_msg_max(void);
extern "C" {
diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
index c3825591c..f34b34bee 100644
--- a/src/box/lua/cfg.cc
+++ b/src/box/lua/cfg.cc
@@ -316,6 +316,28 @@ lbox_cfg_set_replication_sync_timeout(struct lua_State *L)
return 0;
}
+static int
+lbox_cfg_set_replication_dead_gap(struct lua_State *L)
+{
+ try {
+ box_set_replication_dead_gap();
+ } catch (Exception *) {
+ luaT_error(L);
+ }
+ return 0;
+}
+
+static int
+lbox_cfg_set_replication_rw_gap(struct lua_State *L)
+{
+ try {
+ box_set_replication_rw_gap();
+ } catch (Exception *) {
+ luaT_error(L);
+ }
+ return 0;
+}
+
static int
lbox_cfg_set_replication_skip_conflict(struct lua_State *L)
{
@@ -353,6 +375,8 @@ box_lua_cfg_init(struct lua_State *L)
{"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag},
{"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout},
{"cfg_set_replication_skip_conflict", lbox_cfg_set_replication_skip_conflict},
+ {"cfg_set_replication_dead_gap", lbox_cfg_set_replication_dead_gap},
+ {"cfg_set_replication_rw_gap", lbox_cfg_set_replication_rw_gap},
{"cfg_set_net_msg_max", lbox_cfg_set_net_msg_max},
{NULL, NULL}
};
diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
index f62f4dc1e..c15769dfe 100644
--- a/src/box/lua/load_cfg.lua
+++ b/src/box/lua/load_cfg.lua
@@ -76,6 +76,8 @@ local default_cfg = {
replication_connect_timeout = 30,
replication_connect_quorum = nil, -- connect all
replication_skip_conflict = false,
+ replication_dead_gap = 0,
+ replication_rw_gap = 0,
feedback_enabled = true,
feedback_host = "https://feedback.tarantool.io",
feedback_interval = 3600,
@@ -138,6 +140,8 @@ local template_cfg = {
replication_connect_timeout = 'number',
replication_connect_quorum = 'number',
replication_skip_conflict = 'boolean',
+ replication_dead_gap = 'number',
+ replication_rw_gap = 'number',
feedback_enabled = 'boolean',
feedback_host = 'string',
feedback_interval = 'number',
@@ -232,6 +236,8 @@ local dynamic_cfg = {
replication_sync_lag = private.cfg_set_replication_sync_lag,
replication_sync_timeout = private.cfg_set_replication_sync_timeout,
replication_skip_conflict = private.cfg_set_replication_skip_conflict,
+ replication_dead_gap = private.cfg_set_replication_dead_gap,
+ replication_rw_gap = private.cfg_set_replication_rw_gap,
instance_uuid = check_instance_uuid,
replicaset_uuid = check_replicaset_uuid,
net_msg_max = private.cfg_set_net_msg_max,
@@ -248,6 +254,8 @@ local dynamic_cfg_skip_at_load = {
replication_connect_quorum = true,
replication_sync_lag = true,
replication_sync_timeout = true,
+ replication_dead_gap = true,
+ replication_rw_gap = true,
wal_dir_rescan_delay = true,
custom_proc_title = true,
force_recovery = true,
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 2cb4ec0f8..392f8d9fd 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -53,7 +53,8 @@ int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
double replication_sync_lag = 10.0; /* seconds */
double replication_sync_timeout = 300.0; /* seconds */
bool replication_skip_conflict = false;
-
+double replication_dead_gap = 0.0; /* hours */
+double replication_rw_gap = 0.0; /* hours */
struct replicaset replicaset;
static int
diff --git a/src/box/replication.h b/src/box/replication.h
index 2ac620d86..bf0c99fb7 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -137,6 +137,18 @@ extern double replication_sync_timeout;
*/
extern bool replication_skip_conflict;
+/*
+ * If replica is not active during time that exceeds replication_dead_gap it
+ * is considered as dead replica and can be thrown out from system space.
+ */
+extern double replication_dead_gap;
+
+/*
+ * If replica has both roles and gap between read/write activities exceeds this
+ * value it can be considered as dead one.
+ */
+extern double replication_rw_gap;
+
/**
* Wait for the given period of time before trying to reconnect
* to a master.
diff --git a/test/box/admin.result b/test/box/admin.result
index 8048460a1..5341e6f78 100644
--- a/test/box/admin.result
+++ b/test/box/admin.result
@@ -64,6 +64,10 @@ cfg_filter(box.cfg)
- 16320
- - replication_connect_timeout
- 30
+ - - replication_dead_gap
+ - 0
+ - - replication_rw_gap
+ - 0
- - replication_skip_conflict
- false
- - replication_sync_lag
diff --git a/test/box/cfg.result b/test/box/cfg.result
index 515033754..2f6bcd788 100644
--- a/test/box/cfg.result
+++ b/test/box/cfg.result
@@ -60,6 +60,10 @@ cfg_filter(box.cfg)
- 16320
- - replication_connect_timeout
- 30
+ - - replication_dead_gap
+ - 0
+ - - replication_rw_gap
+ - 0
- - replication_skip_conflict
- false
- - replication_sync_lag
@@ -161,6 +165,10 @@ cfg_filter(box.cfg)
- 16320
- - replication_connect_timeout
- 30
+ - - replication_dead_gap
+ - 0
+ - - replication_rw_gap
+ - 0
- - replication_skip_conflict
- false
- - replication_sync_lag
--
2.14.3 (Apple Git-98)
More information about the Tarantool-patches
mailing list