Tarantool development patches archive
 help / color / mirror / Atom feed
From: Olga Arkhangelskaia <arkholga@tarantool.org>
To: tarantool-patches@freelists.org
Cc: Olga Arkhangelskaia <arkholga@tarantool.org>
Subject: [tarantool-patches] [PATCH v2 1/2] box: added replication_dead/rw_gap options
Date: Fri, 12 Oct 2018 22:45:56 +0300	[thread overview]
Message-ID: <20181012194557.7445-2-arkholga@tarantool.org> (raw)
In-Reply-To: <20181012194557.7445-1-arkholga@tarantool.org>

In scope of gh-3110 we need options that store periods of time,
to be compared with time of last activity of relay and applier.
This patch introduces replication_dead_gap and replication_rw_gap options.

replication_dead_gap is configured in box.cfg, with default 0 value.
If time that passed from now till last reader/writer activity of given replica
exceeds replication_dead_gap value, replica is suspected to be dead.
replication_dead_gap is measured in hours.

replication_rw_gap is configured in box.cfg, with default 0 value.
If time difference between last reader activity and last writer activity of
given replica exceeds replication_rw_gap value, replica is suspected to be dead.
replication_rw_gap is measured in hours.
---
 src/box/box.cc           | 34 ++++++++++++++++++++++++++++++++++
 src/box/box.h            |  2 ++
 src/box/lua/cfg.cc       | 24 ++++++++++++++++++++++++
 src/box/lua/load_cfg.lua |  8 ++++++++
 src/box/replication.cc   |  3 ++-
 src/box/replication.h    | 12 ++++++++++++
 test/box/admin.result    |  4 ++++
 test/box/cfg.result      |  8 ++++++++
 8 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index 7e32b9fc7..f74e012f7 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -465,6 +465,28 @@ box_check_replication_sync_timeout(void)
 	return timeout;
 }
 
+static double
+box_check_replication_dead_gap(void)
+{
+	double gap = cfg_getd("replication_dead_gap");
+	if (gap <= 0) {
+		tnt_raise(ClientError, ER_CFG, "replication_dead_gap",
+			  "the value must be grater than 0");
+	}
+	return gap;
+}
+
+static double
+box_check_replication_rw_gap(void)
+{
+	double gap = cfg_getd("replication_rw_gap");
+	if (gap <= 0) {
+		tnt_raise(ClientError, ER_CFG, "replication_dead_gap",
+			  "the value must be grater than 0");
+	}
+	return gap;
+}
+
 static void
 box_check_instance_uuid(struct tt_uuid *uuid)
 {
@@ -739,6 +761,18 @@ box_set_replication_sync_timeout(void)
 	replication_sync_timeout = box_check_replication_sync_timeout();
 }
 
+void
+box_set_replication_dead_gap(void)
+{
+	replication_dead_gap = box_check_replication_dead_gap();
+}
+
+void
+box_set_replication_rw_gap(void)
+{
+	replication_rw_gap = box_check_replication_rw_gap();
+}
+
 void
 box_set_replication_skip_conflict(void)
 {
diff --git a/src/box/box.h b/src/box/box.h
index 9930d4a1a..bfb5bb873 100644
--- a/src/box/box.h
+++ b/src/box/box.h
@@ -198,6 +198,8 @@ void box_set_replication_connect_quorum(void);
 void box_set_replication_sync_lag(void);
 void box_set_replication_sync_timeout(void);
 void box_set_replication_skip_conflict(void);
+void box_set_replication_dead_gap(void);
+void box_set_replication_rw_gap(void);
 void box_set_net_msg_max(void);
 
 extern "C" {
diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
index c3825591c..f34b34bee 100644
--- a/src/box/lua/cfg.cc
+++ b/src/box/lua/cfg.cc
@@ -316,6 +316,28 @@ lbox_cfg_set_replication_sync_timeout(struct lua_State *L)
 	return 0;
 }
 
+static int
+lbox_cfg_set_replication_dead_gap(struct lua_State *L)
+{
+	try {
+		box_set_replication_dead_gap();
+	} catch (Exception *) {
+		luaT_error(L);
+	}
+	return 0;
+}
+
+static int
+lbox_cfg_set_replication_rw_gap(struct lua_State *L)
+{
+	try {
+		box_set_replication_rw_gap();
+	} catch (Exception *) {
+		luaT_error(L);
+	}
+	return 0;
+}
+
 static int
 lbox_cfg_set_replication_skip_conflict(struct lua_State *L)
 {
@@ -353,6 +375,8 @@ box_lua_cfg_init(struct lua_State *L)
 		{"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag},
 		{"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout},
 		{"cfg_set_replication_skip_conflict", lbox_cfg_set_replication_skip_conflict},
+		{"cfg_set_replication_dead_gap", lbox_cfg_set_replication_dead_gap},
+		{"cfg_set_replication_rw_gap", lbox_cfg_set_replication_rw_gap},
 		{"cfg_set_net_msg_max", lbox_cfg_set_net_msg_max},
 		{NULL, NULL}
 	};
diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
index f62f4dc1e..c15769dfe 100644
--- a/src/box/lua/load_cfg.lua
+++ b/src/box/lua/load_cfg.lua
@@ -76,6 +76,8 @@ local default_cfg = {
     replication_connect_timeout = 30,
     replication_connect_quorum = nil, -- connect all
     replication_skip_conflict = false,
+    replication_dead_gap = 0,
+    replication_rw_gap = 0,
     feedback_enabled      = true,
     feedback_host         = "https://feedback.tarantool.io",
     feedback_interval     = 3600,
@@ -138,6 +140,8 @@ local template_cfg = {
     replication_connect_timeout = 'number',
     replication_connect_quorum = 'number',
     replication_skip_conflict = 'boolean',
+    replication_dead_gap = 'number',
+    replication_rw_gap  = 'number',
     feedback_enabled      = 'boolean',
     feedback_host         = 'string',
     feedback_interval     = 'number',
@@ -232,6 +236,8 @@ local dynamic_cfg = {
     replication_sync_lag    = private.cfg_set_replication_sync_lag,
     replication_sync_timeout = private.cfg_set_replication_sync_timeout,
     replication_skip_conflict = private.cfg_set_replication_skip_conflict,
+    replication_dead_gap    = private.cfg_set_replication_dead_gap,
+    replication_rw_gap      = private.cfg_set_replication_rw_gap,
     instance_uuid           = check_instance_uuid,
     replicaset_uuid         = check_replicaset_uuid,
     net_msg_max             = private.cfg_set_net_msg_max,
@@ -248,6 +254,8 @@ local dynamic_cfg_skip_at_load = {
     replication_connect_quorum = true,
     replication_sync_lag    = true,
     replication_sync_timeout = true,
+    replication_dead_gap    = true,
+    replication_rw_gap      = true,
     wal_dir_rescan_delay    = true,
     custom_proc_title       = true,
     force_recovery          = true,
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 2cb4ec0f8..392f8d9fd 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -53,7 +53,8 @@ int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
 double replication_sync_lag = 10.0; /* seconds */
 double replication_sync_timeout = 300.0; /* seconds */
 bool replication_skip_conflict = false;
-
+double replication_dead_gap = 0.0; /* hours */
+double replication_rw_gap = 0.0; /* hours */
 struct replicaset replicaset;
 
 static int
diff --git a/src/box/replication.h b/src/box/replication.h
index 2ac620d86..bf0c99fb7 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -137,6 +137,18 @@ extern double replication_sync_timeout;
  */
 extern bool replication_skip_conflict;
 
+/*
+ * If replica is not active during time that exceeds replication_dead_gap it
+ * is considered as dead replica and can be thrown out from system space.
+ */
+extern double replication_dead_gap;
+
+/*
+ * If replica has both roles and gap between read/write activities exceeds this
+ * value it can be considered as dead one.
+ */
+extern double replication_rw_gap;
+
 /**
  * Wait for the given period of time before trying to reconnect
  * to a master.
diff --git a/test/box/admin.result b/test/box/admin.result
index 8048460a1..5341e6f78 100644
--- a/test/box/admin.result
+++ b/test/box/admin.result
@@ -64,6 +64,10 @@ cfg_filter(box.cfg)
     - 16320
   - - replication_connect_timeout
     - 30
+  - - replication_dead_gap
+    - 0
+  - - replication_rw_gap
+    - 0
   - - replication_skip_conflict
     - false
   - - replication_sync_lag
diff --git a/test/box/cfg.result b/test/box/cfg.result
index 515033754..2f6bcd788 100644
--- a/test/box/cfg.result
+++ b/test/box/cfg.result
@@ -60,6 +60,10 @@ cfg_filter(box.cfg)
     - 16320
   - - replication_connect_timeout
     - 30
+  - - replication_dead_gap
+    - 0
+  - - replication_rw_gap
+    - 0
   - - replication_skip_conflict
     - false
   - - replication_sync_lag
@@ -161,6 +165,10 @@ cfg_filter(box.cfg)
     - 16320
   - - replication_connect_timeout
     - 30
+  - - replication_dead_gap
+    - 0
+  - - replication_rw_gap
+    - 0
   - - replication_skip_conflict
     - false
   - - replication_sync_lag
-- 
2.14.3 (Apple Git-98)

  reply	other threads:[~2018-10-12 19:46 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-12 19:45 [tarantool-patches] [PATCH v2 0/2] detect and throw away dead replicas Olga Arkhangelskaia
2018-10-12 19:45 ` Olga Arkhangelskaia [this message]
2018-10-15 10:22   ` [tarantool-patches] [PATCH v2 1/2] box: added replication_dead/rw_gap options Vladimir Davydov
2018-10-23  7:10   ` [tarantool-patches] " Konstantin Osipov
2018-10-12 19:45 ` [tarantool-patches] [PATCH v2 2/2] ctl: added functionality to detect and prune dead replicas Olga Arkhangelskaia
2018-10-15 12:43   ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181012194557.7445-2-arkholga@tarantool.org \
    --to=arkholga@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [tarantool-patches] [PATCH v2 1/2] box: added replication_dead/rw_gap options' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox