[tarantool-patches] [PATCH v2 2/3] box: add replication_sync_timeout

Olga Arkhangelskaia krishtal.olja at gmail.com
Thu Aug 30 17:11:13 MSK 2018


In scope of gh-3427 we need timeout in case if replicaset will wait for
synchronization for too long, or even forever. Default value is 300.

Closes #3674

@TarantoolBot document
Title: Introduce new option replication_sync_timeout.
After initial bootstrap or after replication configuration changes we
need to sync up with replication quorum. Sometimes sync can take too
long or replication_sync_lag can be smaller than network latency we
replica will stuck in sync loop that can't be cancelled.To avoid this
situations replication_sync_timeout can be used. When time set in
replication_sync_timeout is passed replica enters orphan state.
Can be set dynamically. Default value is 300 seconds.
---
https://github.com/tarantool/tarantool/issues/3647
https://github.com/tarantool/tarantool/tree/OKriw/gh-3427-replication-no-sync-1.9

v1:
https://www.freelists.org/post/tarantool-patches/PATCH-23-box-add-replication-sync-lag-timeout

Changes in v2:
- renamed replication_sync_lag_timeout to replication_sync_timeout
- fiber_cond_timeout changed to deadline
- default time is set to 300

 src/box/box.cc                  | 19 ++++++++++++++++++
 src/box/box.h                   |  1 +
 src/box/lua/cfg.cc              | 12 ++++++++++++
 src/box/lua/load_cfg.lua        |  4 ++++
 src/box/replication.cc          | 16 +++++++++++----
 src/box/replication.h           |  6 ++++++
 test/app-tap/init_script.result | 43 +++++++++++++++++++++--------------------
 test/box-tap/cfg.test.lua       |  9 ++++++++-
 test/box/admin.result           |  2 ++
 test/box/cfg.result             |  4 ++++
 10 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index 7155ad085..dcedfd002 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -420,6 +420,17 @@ box_check_replication_sync_lag(void)
 	return lag;
 }
 
+static double
+box_check_replication_sync_timeout(void)
+{
+	double timeout = cfg_getd("replication_sync_timeout");
+	if (timeout <= 0) {
+		tnt_raise(ClientError, ER_CFG, "replication_sync_timeout",
+			  "the value must be greater than 0");
+	}
+	return timeout;
+}
+
 static void
 box_check_instance_uuid(struct tt_uuid *uuid)
 {
@@ -546,6 +557,7 @@ box_check_config()
 	box_check_replication_connect_timeout();
 	box_check_replication_connect_quorum();
 	box_check_replication_sync_lag();
+	box_check_replication_sync_timeout();
 	box_check_readahead(cfg_geti("readahead"));
 	box_check_checkpoint_count(cfg_geti("checkpoint_count"));
 	box_check_wal_max_rows(cfg_geti64("rows_per_wal"));
@@ -662,6 +674,12 @@ box_set_replication_sync_lag(void)
 	replication_sync_lag = box_check_replication_sync_lag();
 }
 
+void
+box_set_replication_sync_timeout(void)
+{
+	replication_sync_timeout = box_check_replication_sync_timeout();
+}
+
 void
 box_bind(void)
 {
@@ -1754,6 +1772,7 @@ box_cfg_xc(void)
 	box_set_replication_connect_timeout();
 	box_set_replication_connect_quorum();
 	box_set_replication_sync_lag();
+	box_set_replication_sync_timeout();
 	xstream_create(&join_stream, apply_initial_join_row);
 	xstream_create(&subscribe_stream, apply_row);
 
diff --git a/src/box/box.h b/src/box/box.h
index 3090fdcdb..6e1c13f59 100644
--- a/src/box/box.h
+++ b/src/box/box.h
@@ -177,6 +177,7 @@ void box_set_replication_timeout(void);
 void box_set_replication_connect_timeout(void);
 void box_set_replication_connect_quorum(void);
 void box_set_replication_sync_lag(void);
+void box_set_replication_sync_timeout(void);
 
 extern "C" {
 #endif /* defined(__cplusplus) */
diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
index 5442723b5..17431dc9f 100644
--- a/src/box/lua/cfg.cc
+++ b/src/box/lua/cfg.cc
@@ -273,6 +273,17 @@ lbox_cfg_set_replication_sync_lag(struct lua_State *L)
 	return 0;
 }
 
+static int
+lbox_cfg_set_replication_sync_timeout(struct lua_State *L)
+{
+	try {
+		box_set_replication_sync_timeout();
+	} catch (Exception *) {
+		luaT_error(L);
+	}
+	return 0;
+}
+
 void
 box_lua_cfg_init(struct lua_State *L)
 {
@@ -298,6 +309,7 @@ box_lua_cfg_init(struct lua_State *L)
 		{"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout},
 		{"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum},
 		{"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag},
+		{"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout},
 		{NULL, NULL}
 	};
 
diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
index f803d8987..213904458 100644
--- a/src/box/lua/load_cfg.lua
+++ b/src/box/lua/load_cfg.lua
@@ -72,6 +72,7 @@ local default_cfg = {
     worker_pool_threads = 4,
     replication_timeout = 1,
     replication_sync_lag = 10,
+    replication_sync_timeout = 300,
     replication_connect_timeout = 30,
     replication_connect_quorum = nil, -- connect all
 }
@@ -128,6 +129,7 @@ local template_cfg = {
     worker_pool_threads = 'number',
     replication_timeout = 'number',
     replication_sync_lag = 'number',
+    replication_sync_timeout = 'number',
     replication_connect_timeout = 'number',
     replication_connect_quorum = 'number',
 }
@@ -200,6 +202,7 @@ local dynamic_cfg = {
     replication_connect_timeout = private.cfg_set_replication_connect_timeout,
     replication_connect_quorum = private.cfg_set_replication_connect_quorum,
     replication_sync_lag    = private.cfg_set_replication_sync_lag,
+    replication_sync_timeout    = private.cfg_set_replication_sync_timeout,
     instance_uuid           = function()
         if box.cfg.instance_uuid ~= box.info.uuid then
             box.error(box.error.CFG, 'instance_uuid',
@@ -222,6 +225,7 @@ local dynamic_cfg_skip_at_load = {
     replication_connect_timeout = true,
     replication_connect_quorum = true,
     replication_sync_lag    = true,
+    replication_sync_timeout = true,
     wal_dir_rescan_delay    = true,
     custom_proc_title       = true,
     force_recovery          = true,
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 861ce34ea..be58b0225 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -49,7 +49,7 @@ double replication_timeout = 1.0; /* seconds */
 double replication_connect_timeout = 30.0; /* seconds */
 int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
 double replication_sync_lag = 10.0; /* seconds */
-
+double replication_sync_timeout = 300.0; /* seconds */
 struct replicaset replicaset;
 
 static int
@@ -673,12 +673,20 @@ replicaset_sync(void)
 
 	/*
 	 * Wait until all connected replicas synchronize up to
-	 * replication_sync_lag
+	 * replication_sync_lag or return on replication_sync_timeout
 	 */
+	double start_time = ev_monotonic_now(loop());
+	double deadline = start_time + replication_sync_timeout;
 	while (replicaset.applier.synced < quorum &&
 	       replicaset.applier.connected +
-	       replicaset.applier.loading >= quorum)
-		fiber_cond_wait(&replicaset.applier.cond);
+	       replicaset.applier.loading >= quorum) {
+		if (fiber_cond_wait_deadline(&replicaset.applier.cond,
+				            deadline) != 0) {
+			say_crit("replication_sync_timeout fired, entering orphan mode");
+			break;
+		}
+
+	}
 
 	if (replicaset.applier.synced < quorum) {
 		/*
diff --git a/src/box/replication.h b/src/box/replication.h
index 06a2867b6..a6f1dbf69 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -126,6 +126,12 @@ extern int replication_connect_quorum;
  */
 extern double replication_sync_lag;
 
+/**
+ * Time to wait before enter orphan state in case of unsuccessful
+ * synchronization.
+ */
+extern double replication_sync_timeout;
+
 /**
  * Wait for the given period of time before trying to reconnect
  * to a master.
diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result
index eea9f5bcf..261ddf3a4 100644
--- a/test/app-tap/init_script.result
+++ b/test/app-tap/init_script.result
@@ -23,27 +23,28 @@ box.cfg
 18	readahead:16320
 19	replication_connect_timeout:30
 20	replication_sync_lag:10
-21	replication_timeout:1
-22	rows_per_wal:500000
-23	slab_alloc_factor:1.05
-24	too_long_threshold:0.5
-25	vinyl_bloom_fpr:0.05
-26	vinyl_cache:134217728
-27	vinyl_dir:.
-28	vinyl_max_tuple_size:1048576
-29	vinyl_memory:134217728
-30	vinyl_page_size:8192
-31	vinyl_range_size:1073741824
-32	vinyl_read_threads:1
-33	vinyl_run_count_per_level:2
-34	vinyl_run_size_ratio:3.5
-35	vinyl_timeout:60
-36	vinyl_write_threads:2
-37	wal_dir:.
-38	wal_dir_rescan_delay:2
-39	wal_max_size:268435456
-40	wal_mode:write
-41	worker_pool_threads:4
+21	replication_sync_timeout:300
+22	replication_timeout:1
+23	rows_per_wal:500000
+24	slab_alloc_factor:1.05
+25	too_long_threshold:0.5
+26	vinyl_bloom_fpr:0.05
+27	vinyl_cache:134217728
+28	vinyl_dir:.
+29	vinyl_max_tuple_size:1048576
+30	vinyl_memory:134217728
+31	vinyl_page_size:8192
+32	vinyl_range_size:1073741824
+33	vinyl_read_threads:1
+34	vinyl_run_count_per_level:2
+35	vinyl_run_size_ratio:3.5
+36	vinyl_timeout:60
+37	vinyl_write_threads:2
+38	wal_dir:.
+39	wal_dir_rescan_delay:2
+40	wal_max_size:268435456
+41	wal_mode:write
+42	worker_pool_threads:4
 --
 -- Test insert from detached fiber
 --
diff --git a/test/box-tap/cfg.test.lua b/test/box-tap/cfg.test.lua
index d315346de..023a2af72 100755
--- a/test/box-tap/cfg.test.lua
+++ b/test/box-tap/cfg.test.lua
@@ -6,7 +6,7 @@ local socket = require('socket')
 local fio = require('fio')
 local uuid = require('uuid')
 local msgpack = require('msgpack')
-test:plan(91)
+test:plan(94)
 
 --------------------------------------------------------------------------------
 -- Invalid values
@@ -29,6 +29,8 @@ invalid('replication_timeout', -1)
 invalid('replication_timeout', 0)
 invalid('replication_sync_lag', -1)
 invalid('replication_sync_lag', 0)
+invalid('replication_sync_timeout', -1)
+invalid('replication_sync_timeout', 0)
 invalid('replication_connect_timeout', -1)
 invalid('replication_connect_timeout', 0)
 invalid('replication_connect_quorum', -1)
@@ -100,6 +102,11 @@ status, result = pcall(box.cfg, {replication_sync_lag = 1})
 test:ok(status, "dynamic replication_sync_lag")
 pcall(box.cfg, {repliction_sync_lag = lag})
 
+timeout = box.cfg.replication_sync_timeout
+status, result = pcall(box.cfg, {replication_sync_timeout = 10})
+test:ok(status, "dynamic replication_sync_timeout")
+pcall(box.cfg, {repliction_sync_timeout = timeout})
+
 --------------------------------------------------------------------------------
 -- gh-534: Segmentation fault after two bad wal_mode settings
 --------------------------------------------------------------------------------
diff --git a/test/box/admin.result b/test/box/admin.result
index c3e318a6a..ace88e6e9 100644
--- a/test/box/admin.result
+++ b/test/box/admin.result
@@ -58,6 +58,8 @@ cfg_filter(box.cfg)
     - 30
   - - replication_sync_lag
     - 10
+  - - replication_sync_timeout
+    - 300
   - - replication_timeout
     - 1
   - - rows_per_wal
diff --git a/test/box/cfg.result b/test/box/cfg.result
index a2df83310..816178513 100644
--- a/test/box/cfg.result
+++ b/test/box/cfg.result
@@ -54,6 +54,8 @@ cfg_filter(box.cfg)
     - 30
   - - replication_sync_lag
     - 10
+  - - replication_sync_timeout
+    - 300
   - - replication_timeout
     - 1
   - - rows_per_wal
@@ -143,6 +145,8 @@ cfg_filter(box.cfg)
     - 30
   - - replication_sync_lag
     - 10
+  - - replication_sync_timeout
+    - 300
   - - replication_timeout
     - 1
   - - rows_per_wal
-- 
2.14.3 (Apple Git-98)





More information about the Tarantool-patches mailing list