* [tarantool-patches] [PATCH v2 2/3] box: add replication_sync_timeout
2018-08-30 14:11 [tarantool-patches] [PATCH 1/3] box: make replication_sync_lag option dynamic Olga Arkhangelskaia
@ 2018-08-30 14:11 ` Olga Arkhangelskaia
2018-08-30 16:11 ` Vladimir Davydov
2018-08-30 14:11 ` [tarantool-patches] [PATCH v6 3/3] box: adds replication sync after cfg. update Olga Arkhangelskaia
1 sibling, 1 reply; 5+ messages in thread
From: Olga Arkhangelskaia @ 2018-08-30 14:11 UTC (permalink / raw)
To: tarantool-patches; +Cc: Olga Arkhangelskaia
In scope of gh-3427 we need timeout in case if replicaset will wait for
synchronization for too long, or even forever. Default value is 300.
Closes #3674
@TarantoolBot document
Title: Introduce new option replication_sync_timeout.
After initial bootstrap or after replication configuration changes we
need to sync up with replication quorum. Sometimes sync can take too
long or replication_sync_lag can be smaller than network latency we
replica will stuck in sync loop that can't be cancelled.To avoid this
situations replication_sync_timeout can be used. When time set in
replication_sync_timeout is passed replica enters orphan state.
Can be set dynamically. Default value is 300 seconds.
---
https://github.com/tarantool/tarantool/issues/3647
https://github.com/tarantool/tarantool/tree/OKriw/gh-3427-replication-no-sync-1.9
v1:
https://www.freelists.org/post/tarantool-patches/PATCH-23-box-add-replication-sync-lag-timeout
Changes in v2:
- renamed replication_sync_lag_timeout to replication_sync_timeout
- fiber_cond_timeout changed to deadline
- default time is set to 300
src/box/box.cc | 19 ++++++++++++++++++
src/box/box.h | 1 +
src/box/lua/cfg.cc | 12 ++++++++++++
src/box/lua/load_cfg.lua | 4 ++++
src/box/replication.cc | 16 +++++++++++----
src/box/replication.h | 6 ++++++
test/app-tap/init_script.result | 43 +++++++++++++++++++++--------------------
test/box-tap/cfg.test.lua | 9 ++++++++-
test/box/admin.result | 2 ++
test/box/cfg.result | 4 ++++
10 files changed, 90 insertions(+), 26 deletions(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index 7155ad085..dcedfd002 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -420,6 +420,17 @@ box_check_replication_sync_lag(void)
return lag;
}
+static double
+box_check_replication_sync_timeout(void)
+{
+ double timeout = cfg_getd("replication_sync_timeout");
+ if (timeout <= 0) {
+ tnt_raise(ClientError, ER_CFG, "replication_sync_timeout",
+ "the value must be greater than 0");
+ }
+ return timeout;
+}
+
static void
box_check_instance_uuid(struct tt_uuid *uuid)
{
@@ -546,6 +557,7 @@ box_check_config()
box_check_replication_connect_timeout();
box_check_replication_connect_quorum();
box_check_replication_sync_lag();
+ box_check_replication_sync_timeout();
box_check_readahead(cfg_geti("readahead"));
box_check_checkpoint_count(cfg_geti("checkpoint_count"));
box_check_wal_max_rows(cfg_geti64("rows_per_wal"));
@@ -662,6 +674,12 @@ box_set_replication_sync_lag(void)
replication_sync_lag = box_check_replication_sync_lag();
}
+void
+box_set_replication_sync_timeout(void)
+{
+ replication_sync_timeout = box_check_replication_sync_timeout();
+}
+
void
box_bind(void)
{
@@ -1754,6 +1772,7 @@ box_cfg_xc(void)
box_set_replication_connect_timeout();
box_set_replication_connect_quorum();
box_set_replication_sync_lag();
+ box_set_replication_sync_timeout();
xstream_create(&join_stream, apply_initial_join_row);
xstream_create(&subscribe_stream, apply_row);
diff --git a/src/box/box.h b/src/box/box.h
index 3090fdcdb..6e1c13f59 100644
--- a/src/box/box.h
+++ b/src/box/box.h
@@ -177,6 +177,7 @@ void box_set_replication_timeout(void);
void box_set_replication_connect_timeout(void);
void box_set_replication_connect_quorum(void);
void box_set_replication_sync_lag(void);
+void box_set_replication_sync_timeout(void);
extern "C" {
#endif /* defined(__cplusplus) */
diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
index 5442723b5..17431dc9f 100644
--- a/src/box/lua/cfg.cc
+++ b/src/box/lua/cfg.cc
@@ -273,6 +273,17 @@ lbox_cfg_set_replication_sync_lag(struct lua_State *L)
return 0;
}
+static int
+lbox_cfg_set_replication_sync_timeout(struct lua_State *L)
+{
+ try {
+ box_set_replication_sync_timeout();
+ } catch (Exception *) {
+ luaT_error(L);
+ }
+ return 0;
+}
+
void
box_lua_cfg_init(struct lua_State *L)
{
@@ -298,6 +309,7 @@ box_lua_cfg_init(struct lua_State *L)
{"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout},
{"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum},
{"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag},
+ {"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout},
{NULL, NULL}
};
diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
index f803d8987..213904458 100644
--- a/src/box/lua/load_cfg.lua
+++ b/src/box/lua/load_cfg.lua
@@ -72,6 +72,7 @@ local default_cfg = {
worker_pool_threads = 4,
replication_timeout = 1,
replication_sync_lag = 10,
+ replication_sync_timeout = 300,
replication_connect_timeout = 30,
replication_connect_quorum = nil, -- connect all
}
@@ -128,6 +129,7 @@ local template_cfg = {
worker_pool_threads = 'number',
replication_timeout = 'number',
replication_sync_lag = 'number',
+ replication_sync_timeout = 'number',
replication_connect_timeout = 'number',
replication_connect_quorum = 'number',
}
@@ -200,6 +202,7 @@ local dynamic_cfg = {
replication_connect_timeout = private.cfg_set_replication_connect_timeout,
replication_connect_quorum = private.cfg_set_replication_connect_quorum,
replication_sync_lag = private.cfg_set_replication_sync_lag,
+ replication_sync_timeout = private.cfg_set_replication_sync_timeout,
instance_uuid = function()
if box.cfg.instance_uuid ~= box.info.uuid then
box.error(box.error.CFG, 'instance_uuid',
@@ -222,6 +225,7 @@ local dynamic_cfg_skip_at_load = {
replication_connect_timeout = true,
replication_connect_quorum = true,
replication_sync_lag = true,
+ replication_sync_timeout = true,
wal_dir_rescan_delay = true,
custom_proc_title = true,
force_recovery = true,
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 861ce34ea..be58b0225 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -49,7 +49,7 @@ double replication_timeout = 1.0; /* seconds */
double replication_connect_timeout = 30.0; /* seconds */
int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
double replication_sync_lag = 10.0; /* seconds */
-
+double replication_sync_timeout = 300.0; /* seconds */
struct replicaset replicaset;
static int
@@ -673,12 +673,20 @@ replicaset_sync(void)
/*
* Wait until all connected replicas synchronize up to
- * replication_sync_lag
+ * replication_sync_lag or return on replication_sync_timeout
*/
+ double start_time = ev_monotonic_now(loop());
+ double deadline = start_time + replication_sync_timeout;
while (replicaset.applier.synced < quorum &&
replicaset.applier.connected +
- replicaset.applier.loading >= quorum)
- fiber_cond_wait(&replicaset.applier.cond);
+ replicaset.applier.loading >= quorum) {
+ if (fiber_cond_wait_deadline(&replicaset.applier.cond,
+ deadline) != 0) {
+ say_crit("replication_sync_timeout fired, entering orphan mode");
+ break;
+ }
+
+ }
if (replicaset.applier.synced < quorum) {
/*
diff --git a/src/box/replication.h b/src/box/replication.h
index 06a2867b6..a6f1dbf69 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -126,6 +126,12 @@ extern int replication_connect_quorum;
*/
extern double replication_sync_lag;
+/**
+ * Time to wait before enter orphan state in case of unsuccessful
+ * synchronization.
+ */
+extern double replication_sync_timeout;
+
/**
* Wait for the given period of time before trying to reconnect
* to a master.
diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result
index eea9f5bcf..261ddf3a4 100644
--- a/test/app-tap/init_script.result
+++ b/test/app-tap/init_script.result
@@ -23,27 +23,28 @@ box.cfg
18 readahead:16320
19 replication_connect_timeout:30
20 replication_sync_lag:10
-21 replication_timeout:1
-22 rows_per_wal:500000
-23 slab_alloc_factor:1.05
-24 too_long_threshold:0.5
-25 vinyl_bloom_fpr:0.05
-26 vinyl_cache:134217728
-27 vinyl_dir:.
-28 vinyl_max_tuple_size:1048576
-29 vinyl_memory:134217728
-30 vinyl_page_size:8192
-31 vinyl_range_size:1073741824
-32 vinyl_read_threads:1
-33 vinyl_run_count_per_level:2
-34 vinyl_run_size_ratio:3.5
-35 vinyl_timeout:60
-36 vinyl_write_threads:2
-37 wal_dir:.
-38 wal_dir_rescan_delay:2
-39 wal_max_size:268435456
-40 wal_mode:write
-41 worker_pool_threads:4
+21 replication_sync_timeout:300
+22 replication_timeout:1
+23 rows_per_wal:500000
+24 slab_alloc_factor:1.05
+25 too_long_threshold:0.5
+26 vinyl_bloom_fpr:0.05
+27 vinyl_cache:134217728
+28 vinyl_dir:.
+29 vinyl_max_tuple_size:1048576
+30 vinyl_memory:134217728
+31 vinyl_page_size:8192
+32 vinyl_range_size:1073741824
+33 vinyl_read_threads:1
+34 vinyl_run_count_per_level:2
+35 vinyl_run_size_ratio:3.5
+36 vinyl_timeout:60
+37 vinyl_write_threads:2
+38 wal_dir:.
+39 wal_dir_rescan_delay:2
+40 wal_max_size:268435456
+41 wal_mode:write
+42 worker_pool_threads:4
--
-- Test insert from detached fiber
--
diff --git a/test/box-tap/cfg.test.lua b/test/box-tap/cfg.test.lua
index d315346de..023a2af72 100755
--- a/test/box-tap/cfg.test.lua
+++ b/test/box-tap/cfg.test.lua
@@ -6,7 +6,7 @@ local socket = require('socket')
local fio = require('fio')
local uuid = require('uuid')
local msgpack = require('msgpack')
-test:plan(91)
+test:plan(94)
--------------------------------------------------------------------------------
-- Invalid values
@@ -29,6 +29,8 @@ invalid('replication_timeout', -1)
invalid('replication_timeout', 0)
invalid('replication_sync_lag', -1)
invalid('replication_sync_lag', 0)
+invalid('replication_sync_timeout', -1)
+invalid('replication_sync_timeout', 0)
invalid('replication_connect_timeout', -1)
invalid('replication_connect_timeout', 0)
invalid('replication_connect_quorum', -1)
@@ -100,6 +102,11 @@ status, result = pcall(box.cfg, {replication_sync_lag = 1})
test:ok(status, "dynamic replication_sync_lag")
pcall(box.cfg, {repliction_sync_lag = lag})
+timeout = box.cfg.replication_sync_timeout
+status, result = pcall(box.cfg, {replication_sync_timeout = 10})
+test:ok(status, "dynamic replication_sync_timeout")
+pcall(box.cfg, {repliction_sync_timeout = timeout})
+
--------------------------------------------------------------------------------
-- gh-534: Segmentation fault after two bad wal_mode settings
--------------------------------------------------------------------------------
diff --git a/test/box/admin.result b/test/box/admin.result
index c3e318a6a..ace88e6e9 100644
--- a/test/box/admin.result
+++ b/test/box/admin.result
@@ -58,6 +58,8 @@ cfg_filter(box.cfg)
- 30
- - replication_sync_lag
- 10
+ - - replication_sync_timeout
+ - 300
- - replication_timeout
- 1
- - rows_per_wal
diff --git a/test/box/cfg.result b/test/box/cfg.result
index a2df83310..816178513 100644
--- a/test/box/cfg.result
+++ b/test/box/cfg.result
@@ -54,6 +54,8 @@ cfg_filter(box.cfg)
- 30
- - replication_sync_lag
- 10
+ - - replication_sync_timeout
+ - 300
- - replication_timeout
- 1
- - rows_per_wal
@@ -143,6 +145,8 @@ cfg_filter(box.cfg)
- 30
- - replication_sync_lag
- 10
+ - - replication_sync_timeout
+ - 300
- - replication_timeout
- 1
- - rows_per_wal
--
2.14.3 (Apple Git-98)
^ permalink raw reply [flat|nested] 5+ messages in thread
* [tarantool-patches] [PATCH v6 3/3] box: adds replication sync after cfg. update
2018-08-30 14:11 [tarantool-patches] [PATCH 1/3] box: make replication_sync_lag option dynamic Olga Arkhangelskaia
2018-08-30 14:11 ` [tarantool-patches] [PATCH v2 2/3] box: add replication_sync_timeout Olga Arkhangelskaia
@ 2018-08-30 14:11 ` Olga Arkhangelskaia
2018-08-30 16:41 ` Vladimir Davydov
1 sibling, 1 reply; 5+ messages in thread
From: Olga Arkhangelskaia @ 2018-08-30 14:11 UTC (permalink / raw)
To: tarantool-patches; +Cc: Olga Arkhangelskaia
When replica reconnects to replica set not for the first time, we
suffer from absence of synchronization. Such behavior leads to giving
away outdated data.
Closes #3427
@TarantoolBot document
Title: Orphan status after configuration update or initial bootstrap.
In case of initial bootstrap or after configuration update we can get
an orphan status in two cases. If we synced up with number of replicas
that is smaller than quorum or if we failed to sync up during the time
specified in replication_sync_timeout.
---
https://github.com/tarantool/tarantool/issues/3427
https://github.com/tarantool/tarantool/tree/OKriw/gh-3427-replication-no-sync-1.9
v1:
https://www.freelists.org/post/tarantool-patches/PATCH-replication-adds-replication-sync-after-cfg-update
v2:
https://www.freelists.org/post/tarantool-patches/PATCH-v2-replication-adds-replication-sync-after-cfg-update
v3:
https://www.freelists.org/post/tarantool-patches/PATCH-v3-box-adds-replication-sync-after-cfg-update
v4:
https://www.freelists.org/post/tarantool-patches/PATCH-v4-22-box-adds-replication-sync-after-cfg-update
v5:
https://www.freelists.org/post/tarantool-patches/PATCH-v5-33-box-adds-replication-sync-after-cfg-update
Changes in v2:
- fixed test
- changed replicaset_sync
Changes in v3:
- now we raise the exception when sync is not successful.
- fixed test
- renamed test
Changes in v4:
- fixed test
- replication_sync_lag is made dynamicall in separate patch
- removed unnecessary error type
- moved say_crit to another place
- in case of sync error we rollback to prev. config
Changes in v5:
- added test case
- now we don't roll back to prev. cfg
Changes in v6:
- set orphan
- added testcases
src/box/box.cc | 6 ++
src/box/replication.cc | 1 +
src/box/replication.h | 5 +-
test/replication/sync.result | 147 +++++++++++++++++++++++++++++++++++++++++
test/replication/sync.test.lua | 71 ++++++++++++++++++++
5 files changed, 227 insertions(+), 3 deletions(-)
create mode 100644 test/replication/sync.result
create mode 100644 test/replication/sync.test.lua
diff --git a/src/box/box.cc b/src/box/box.cc
index dcedfd002..e54a79467 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -646,6 +646,12 @@ box_set_replication(void)
box_sync_replication(true);
/* Follow replica */
replicaset_follow();
+ /* Set orphan and sync replica up to quorum.
+ * If we fail to sync up, replica will be left in orphan state.
+ */
+ is_orphan = true;
+ title("orphan");
+ replicaset_sync();
}
void
diff --git a/src/box/replication.cc b/src/box/replication.cc
index be58b0225..d85700b78 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -700,6 +700,7 @@ replicaset_sync(void)
say_crit("replica set sync complete, quorum of %d "
"replicas formed", quorum);
+ return;
}
void
diff --git a/src/box/replication.h b/src/box/replication.h
index a6f1dbf69..64f6e7f97 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -378,10 +378,9 @@ void
replicaset_follow(void);
/**
- * Wait until a replication quorum is formed.
- * Return immediately if a quorum cannot be
- * formed because of errors.
+ * Wait until a replication quorum is formed and sync up with it.
*/
+
void
replicaset_sync(void);
diff --git a/test/replication/sync.result b/test/replication/sync.result
new file mode 100644
index 000000000..d0a0eb5eb
--- /dev/null
+++ b/test/replication/sync.result
@@ -0,0 +1,147 @@
+fiber = require('fiber')
+---
+...
+--
+-- gh-3427: no sync after configuration update
+--
+--
+-- successful sync
+--
+env = require('test_run')
+---
+...
+test_run = env.new()
+---
+...
+engine = test_run:get_cfg('engine')
+---
+...
+box.schema.user.grant('guest', 'replication')
+---
+...
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+---
+- true
+...
+test_run:cmd("start server replica")
+---
+- true
+...
+s = box.schema.space.create('test', {engine = engine})
+---
+...
+index = s:create_index('primary')
+---
+...
+-- change replica configuration
+test_run:cmd("switch replica")
+---
+- true
+...
+replication = box.cfg.replication
+---
+...
+box.cfg{replication={}}
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+-- insert values on the master while replica is unconfigured
+box.begin() for i = 1, 100 do box.space.test:insert{i, i} end box.commit()
+---
+...
+box.space.test:count()
+---
+- 100
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.cfg{replication = replication}
+---
+...
+box.space.test:count() == 100
+---
+- true
+...
+--
+-- unsuccessful sync entering orphan state
+--
+box.cfg{replication={}}
+---
+...
+box.cfg{replication_sync_timeout = 0.000001}
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+-- insert values on the master while replica is unconfigured
+box.begin() for i = 101, 200 do box.space.test:insert{i, i} end box.commit()
+---
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.cfg{replication = replication}
+---
+...
+box.info.status
+---
+- orphan
+...
+require'fiber'.sleep(0.1)
+---
+...
+box.info.status
+---
+- running
+...
+--
+-- replication_sync_lag is too big
+--
+box.cfg{replication_sync_lag = 100}
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+function f () box.begin() for i = 201, 500 do box.space.test:insert{i, i} end box.commit(); end
+---
+...
+_=fiber.create(f)
+---
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.space.test:count() < 500
+---
+- true
+...
+test_run:cmd("switch default")
+---
+- true
+...
+-- cleanup
+test_run:cmd("stop server replica")
+---
+- true
+...
+test_run:cmd("cleanup server replica")
+---
+- true
+...
+box.space.test:drop()
+---
+...
+box.schema.user.revoke('guest', 'replication')
+---
+...
diff --git a/test/replication/sync.test.lua b/test/replication/sync.test.lua
new file mode 100644
index 000000000..0c4fff483
--- /dev/null
+++ b/test/replication/sync.test.lua
@@ -0,0 +1,71 @@
+fiber = require('fiber')
+--
+-- gh-3427: no sync after configuration update
+--
+
+--
+-- successful sync
+--
+
+env = require('test_run')
+test_run = env.new()
+engine = test_run:get_cfg('engine')
+
+box.schema.user.grant('guest', 'replication')
+
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+test_run:cmd("start server replica")
+
+s = box.schema.space.create('test', {engine = engine})
+index = s:create_index('primary')
+
+-- change replica configuration
+test_run:cmd("switch replica")
+replication = box.cfg.replication
+box.cfg{replication={}}
+
+test_run:cmd("switch default")
+-- insert values on the master while replica is unconfigured
+box.begin() for i = 1, 100 do box.space.test:insert{i, i} end box.commit()
+box.space.test:count()
+
+test_run:cmd("switch replica")
+box.cfg{replication = replication}
+box.space.test:count() == 100
+
+--
+-- unsuccessful sync entering orphan state
+--
+box.cfg{replication={}}
+box.cfg{replication_sync_timeout = 0.000001}
+
+test_run:cmd("switch default")
+-- insert values on the master while replica is unconfigured
+box.begin() for i = 101, 200 do box.space.test:insert{i, i} end box.commit()
+
+test_run:cmd("switch replica")
+box.cfg{replication = replication}
+box.info.status
+require'fiber'.sleep(0.1)
+box.info.status
+
+--
+-- replication_sync_lag is too big
+--
+
+box.cfg{replication_sync_lag = 100}
+
+test_run:cmd("switch default")
+
+function f () box.begin() for i = 201, 500 do box.space.test:insert{i, i} end box.commit(); end
+_=fiber.create(f)
+
+test_run:cmd("switch replica")
+box.space.test:count() < 500
+
+test_run:cmd("switch default")
+-- cleanup
+test_run:cmd("stop server replica")
+test_run:cmd("cleanup server replica")
+box.space.test:drop()
+box.schema.user.revoke('guest', 'replication')
--
2.14.3 (Apple Git-98)
^ permalink raw reply [flat|nested] 5+ messages in thread