[tarantool-patches] [PATCH v2 2/3] box: add replication_sync_timeout
Olga Arkhangelskaia
krishtal.olja at gmail.com
Thu Aug 30 17:11:13 MSK 2018
In scope of gh-3427 we need timeout in case if replicaset will wait for
synchronization for too long, or even forever. Default value is 300.
Closes #3674
@TarantoolBot document
Title: Introduce new option replication_sync_timeout.
After initial bootstrap or after replication configuration changes we
need to sync up with replication quorum. Sometimes sync can take too
long or replication_sync_lag can be smaller than network latency we
replica will stuck in sync loop that can't be cancelled.To avoid this
situations replication_sync_timeout can be used. When time set in
replication_sync_timeout is passed replica enters orphan state.
Can be set dynamically. Default value is 300 seconds.
---
https://github.com/tarantool/tarantool/issues/3647
https://github.com/tarantool/tarantool/tree/OKriw/gh-3427-replication-no-sync-1.9
v1:
https://www.freelists.org/post/tarantool-patches/PATCH-23-box-add-replication-sync-lag-timeout
Changes in v2:
- renamed replication_sync_lag_timeout to replication_sync_timeout
- fiber_cond_timeout changed to deadline
- default time is set to 300
src/box/box.cc | 19 ++++++++++++++++++
src/box/box.h | 1 +
src/box/lua/cfg.cc | 12 ++++++++++++
src/box/lua/load_cfg.lua | 4 ++++
src/box/replication.cc | 16 +++++++++++----
src/box/replication.h | 6 ++++++
test/app-tap/init_script.result | 43 +++++++++++++++++++++--------------------
test/box-tap/cfg.test.lua | 9 ++++++++-
test/box/admin.result | 2 ++
test/box/cfg.result | 4 ++++
10 files changed, 90 insertions(+), 26 deletions(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index 7155ad085..dcedfd002 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -420,6 +420,17 @@ box_check_replication_sync_lag(void)
return lag;
}
+static double
+box_check_replication_sync_timeout(void)
+{
+ double timeout = cfg_getd("replication_sync_timeout");
+ if (timeout <= 0) {
+ tnt_raise(ClientError, ER_CFG, "replication_sync_timeout",
+ "the value must be greater than 0");
+ }
+ return timeout;
+}
+
static void
box_check_instance_uuid(struct tt_uuid *uuid)
{
@@ -546,6 +557,7 @@ box_check_config()
box_check_replication_connect_timeout();
box_check_replication_connect_quorum();
box_check_replication_sync_lag();
+ box_check_replication_sync_timeout();
box_check_readahead(cfg_geti("readahead"));
box_check_checkpoint_count(cfg_geti("checkpoint_count"));
box_check_wal_max_rows(cfg_geti64("rows_per_wal"));
@@ -662,6 +674,12 @@ box_set_replication_sync_lag(void)
replication_sync_lag = box_check_replication_sync_lag();
}
+void
+box_set_replication_sync_timeout(void)
+{
+ replication_sync_timeout = box_check_replication_sync_timeout();
+}
+
void
box_bind(void)
{
@@ -1754,6 +1772,7 @@ box_cfg_xc(void)
box_set_replication_connect_timeout();
box_set_replication_connect_quorum();
box_set_replication_sync_lag();
+ box_set_replication_sync_timeout();
xstream_create(&join_stream, apply_initial_join_row);
xstream_create(&subscribe_stream, apply_row);
diff --git a/src/box/box.h b/src/box/box.h
index 3090fdcdb..6e1c13f59 100644
--- a/src/box/box.h
+++ b/src/box/box.h
@@ -177,6 +177,7 @@ void box_set_replication_timeout(void);
void box_set_replication_connect_timeout(void);
void box_set_replication_connect_quorum(void);
void box_set_replication_sync_lag(void);
+void box_set_replication_sync_timeout(void);
extern "C" {
#endif /* defined(__cplusplus) */
diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
index 5442723b5..17431dc9f 100644
--- a/src/box/lua/cfg.cc
+++ b/src/box/lua/cfg.cc
@@ -273,6 +273,17 @@ lbox_cfg_set_replication_sync_lag(struct lua_State *L)
return 0;
}
+static int
+lbox_cfg_set_replication_sync_timeout(struct lua_State *L)
+{
+ try {
+ box_set_replication_sync_timeout();
+ } catch (Exception *) {
+ luaT_error(L);
+ }
+ return 0;
+}
+
void
box_lua_cfg_init(struct lua_State *L)
{
@@ -298,6 +309,7 @@ box_lua_cfg_init(struct lua_State *L)
{"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout},
{"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum},
{"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag},
+ {"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout},
{NULL, NULL}
};
diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
index f803d8987..213904458 100644
--- a/src/box/lua/load_cfg.lua
+++ b/src/box/lua/load_cfg.lua
@@ -72,6 +72,7 @@ local default_cfg = {
worker_pool_threads = 4,
replication_timeout = 1,
replication_sync_lag = 10,
+ replication_sync_timeout = 300,
replication_connect_timeout = 30,
replication_connect_quorum = nil, -- connect all
}
@@ -128,6 +129,7 @@ local template_cfg = {
worker_pool_threads = 'number',
replication_timeout = 'number',
replication_sync_lag = 'number',
+ replication_sync_timeout = 'number',
replication_connect_timeout = 'number',
replication_connect_quorum = 'number',
}
@@ -200,6 +202,7 @@ local dynamic_cfg = {
replication_connect_timeout = private.cfg_set_replication_connect_timeout,
replication_connect_quorum = private.cfg_set_replication_connect_quorum,
replication_sync_lag = private.cfg_set_replication_sync_lag,
+ replication_sync_timeout = private.cfg_set_replication_sync_timeout,
instance_uuid = function()
if box.cfg.instance_uuid ~= box.info.uuid then
box.error(box.error.CFG, 'instance_uuid',
@@ -222,6 +225,7 @@ local dynamic_cfg_skip_at_load = {
replication_connect_timeout = true,
replication_connect_quorum = true,
replication_sync_lag = true,
+ replication_sync_timeout = true,
wal_dir_rescan_delay = true,
custom_proc_title = true,
force_recovery = true,
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 861ce34ea..be58b0225 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -49,7 +49,7 @@ double replication_timeout = 1.0; /* seconds */
double replication_connect_timeout = 30.0; /* seconds */
int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
double replication_sync_lag = 10.0; /* seconds */
-
+double replication_sync_timeout = 300.0; /* seconds */
struct replicaset replicaset;
static int
@@ -673,12 +673,20 @@ replicaset_sync(void)
/*
* Wait until all connected replicas synchronize up to
- * replication_sync_lag
+ * replication_sync_lag or return on replication_sync_timeout
*/
+ double start_time = ev_monotonic_now(loop());
+ double deadline = start_time + replication_sync_timeout;
while (replicaset.applier.synced < quorum &&
replicaset.applier.connected +
- replicaset.applier.loading >= quorum)
- fiber_cond_wait(&replicaset.applier.cond);
+ replicaset.applier.loading >= quorum) {
+ if (fiber_cond_wait_deadline(&replicaset.applier.cond,
+ deadline) != 0) {
+ say_crit("replication_sync_timeout fired, entering orphan mode");
+ break;
+ }
+
+ }
if (replicaset.applier.synced < quorum) {
/*
diff --git a/src/box/replication.h b/src/box/replication.h
index 06a2867b6..a6f1dbf69 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -126,6 +126,12 @@ extern int replication_connect_quorum;
*/
extern double replication_sync_lag;
+/**
+ * Time to wait before enter orphan state in case of unsuccessful
+ * synchronization.
+ */
+extern double replication_sync_timeout;
+
/**
* Wait for the given period of time before trying to reconnect
* to a master.
diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result
index eea9f5bcf..261ddf3a4 100644
--- a/test/app-tap/init_script.result
+++ b/test/app-tap/init_script.result
@@ -23,27 +23,28 @@ box.cfg
18 readahead:16320
19 replication_connect_timeout:30
20 replication_sync_lag:10
-21 replication_timeout:1
-22 rows_per_wal:500000
-23 slab_alloc_factor:1.05
-24 too_long_threshold:0.5
-25 vinyl_bloom_fpr:0.05
-26 vinyl_cache:134217728
-27 vinyl_dir:.
-28 vinyl_max_tuple_size:1048576
-29 vinyl_memory:134217728
-30 vinyl_page_size:8192
-31 vinyl_range_size:1073741824
-32 vinyl_read_threads:1
-33 vinyl_run_count_per_level:2
-34 vinyl_run_size_ratio:3.5
-35 vinyl_timeout:60
-36 vinyl_write_threads:2
-37 wal_dir:.
-38 wal_dir_rescan_delay:2
-39 wal_max_size:268435456
-40 wal_mode:write
-41 worker_pool_threads:4
+21 replication_sync_timeout:300
+22 replication_timeout:1
+23 rows_per_wal:500000
+24 slab_alloc_factor:1.05
+25 too_long_threshold:0.5
+26 vinyl_bloom_fpr:0.05
+27 vinyl_cache:134217728
+28 vinyl_dir:.
+29 vinyl_max_tuple_size:1048576
+30 vinyl_memory:134217728
+31 vinyl_page_size:8192
+32 vinyl_range_size:1073741824
+33 vinyl_read_threads:1
+34 vinyl_run_count_per_level:2
+35 vinyl_run_size_ratio:3.5
+36 vinyl_timeout:60
+37 vinyl_write_threads:2
+38 wal_dir:.
+39 wal_dir_rescan_delay:2
+40 wal_max_size:268435456
+41 wal_mode:write
+42 worker_pool_threads:4
--
-- Test insert from detached fiber
--
diff --git a/test/box-tap/cfg.test.lua b/test/box-tap/cfg.test.lua
index d315346de..023a2af72 100755
--- a/test/box-tap/cfg.test.lua
+++ b/test/box-tap/cfg.test.lua
@@ -6,7 +6,7 @@ local socket = require('socket')
local fio = require('fio')
local uuid = require('uuid')
local msgpack = require('msgpack')
-test:plan(91)
+test:plan(94)
--------------------------------------------------------------------------------
-- Invalid values
@@ -29,6 +29,8 @@ invalid('replication_timeout', -1)
invalid('replication_timeout', 0)
invalid('replication_sync_lag', -1)
invalid('replication_sync_lag', 0)
+invalid('replication_sync_timeout', -1)
+invalid('replication_sync_timeout', 0)
invalid('replication_connect_timeout', -1)
invalid('replication_connect_timeout', 0)
invalid('replication_connect_quorum', -1)
@@ -100,6 +102,11 @@ status, result = pcall(box.cfg, {replication_sync_lag = 1})
test:ok(status, "dynamic replication_sync_lag")
pcall(box.cfg, {repliction_sync_lag = lag})
+timeout = box.cfg.replication_sync_timeout
+status, result = pcall(box.cfg, {replication_sync_timeout = 10})
+test:ok(status, "dynamic replication_sync_timeout")
+pcall(box.cfg, {repliction_sync_timeout = timeout})
+
--------------------------------------------------------------------------------
-- gh-534: Segmentation fault after two bad wal_mode settings
--------------------------------------------------------------------------------
diff --git a/test/box/admin.result b/test/box/admin.result
index c3e318a6a..ace88e6e9 100644
--- a/test/box/admin.result
+++ b/test/box/admin.result
@@ -58,6 +58,8 @@ cfg_filter(box.cfg)
- 30
- - replication_sync_lag
- 10
+ - - replication_sync_timeout
+ - 300
- - replication_timeout
- 1
- - rows_per_wal
diff --git a/test/box/cfg.result b/test/box/cfg.result
index a2df83310..816178513 100644
--- a/test/box/cfg.result
+++ b/test/box/cfg.result
@@ -54,6 +54,8 @@ cfg_filter(box.cfg)
- 30
- - replication_sync_lag
- 10
+ - - replication_sync_timeout
+ - 300
- - replication_timeout
- 1
- - rows_per_wal
@@ -143,6 +145,8 @@ cfg_filter(box.cfg)
- 30
- - replication_sync_lag
- 10
+ - - replication_sync_timeout
+ - 300
- - replication_timeout
- 1
- - rows_per_wal
--
2.14.3 (Apple Git-98)
More information about the Tarantool-patches
mailing list