[tarantool-patches] [PATCH v2] replication: adds replication sync after cfg. update

Olga Arkhangelskaia krishtal.olja at gmail.com
Wed Aug 22 18:59:30 MSK 2018


When replica reconnects to replica set not for the first time, we
suffer from absence of synchronization. Such behavior leads to giving
away outdated data.

Closes #3427
---
https://github.com/tarantool/tarantool/issues/3427
https://github.com/tarantool/tarantool/tree/OKriw/replication_no_sync-1.9

v1:
https://www.freelists.org/post/tarantool-patches/PATCH-replication-adds-replication-sync-after-cfg-update

Changes in v2:
- fixed test
- changed replicaset_sync

 src/box/box.cc                      |  7 ++-
 src/box/replication.cc              | 34 ++++++++------
 src/box/replication.h               |  7 ++-
 test/replication/orphan.result      | 92 +++++++++++++++++++++++++++++++++++++
 test/replication/orphan.test.lua    | 41 +++++++++++++++++
 test/replication/replica_orphan.lua | 12 +++++
 6 files changed, 177 insertions(+), 16 deletions(-)
 create mode 100644 test/replication/orphan.result
 create mode 100644 test/replication/orphan.test.lua
 create mode 100644 test/replication/replica_orphan.lua

diff --git a/src/box/box.cc b/src/box/box.cc
index 8d7454d1f..8c67c79e8 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -634,6 +634,9 @@ box_set_replication(void)
 	box_sync_replication(true);
 	/* Follow replica */
 	replicaset_follow();
+	/* Sync replica up to quorum */
+	replicaset_sync();
+	say_verbose("synchronization complete");
 }
 
 void
@@ -1941,8 +1944,10 @@ box_cfg_xc(void)
 	fiber_gc();
 	is_box_configured = true;
 
-	if (!is_bootstrap_leader)
+	if (!is_bootstrap_leader) {
 		replicaset_sync();
+		replicaset_is_synced();
+	}
 
 	say_info("ready to accept requests");
 }
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 861ce34ea..c7b11a5d6 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -662,24 +662,12 @@ replicaset_follow(void)
 }
 
 void
-replicaset_sync(void)
+replicaset_is_synced(void)
 {
 	int quorum = replicaset_quorum();
 
 	if (quorum == 0)
 		return;
-
-	say_verbose("synchronizing with %d replicas", quorum);
-
-	/*
-	 * Wait until all connected replicas synchronize up to
-	 * replication_sync_lag
-	 */
-	while (replicaset.applier.synced < quorum &&
-	       replicaset.applier.connected +
-	       replicaset.applier.loading >= quorum)
-		fiber_cond_wait(&replicaset.applier.cond);
-
 	if (replicaset.applier.synced < quorum) {
 		/*
 		 * Not enough replicas connected to form a quorum.
@@ -694,6 +682,26 @@ replicaset_sync(void)
 		 "replicas formed", quorum);
 }
 
+void
+replicaset_sync(void)
+{
+	int quorum = replicaset_quorum();
+
+	if (quorum == 0)
+		return;
+
+	say_verbose("synchronizing with %d replicas", quorum);
+
+	/*
+	 * Wait until all connected replicas synchronize up to
+	 * replication_sync_lag
+	 */
+	while (replicaset.applier.synced < quorum &&
+	       replicaset.applier.connected +
+	       replicaset.applier.loading >= quorum)
+		fiber_cond_wait(&replicaset.applier.cond);
+}
+
 void
 replicaset_check_quorum(void)
 {
diff --git a/src/box/replication.h b/src/box/replication.h
index 06a2867b6..728a73704 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -371,10 +371,13 @@ replicaset_connect(struct applier **appliers, int count,
 void
 replicaset_follow(void);
 
+/**
+ * Check if replicaset is synced with quorum
+ */
+void
+replicaset_is_synced(void);
 /**
  * Wait until a replication quorum is formed.
- * Return immediately if a quorum cannot be
- * formed because of errors.
  */
 void
 replicaset_sync(void);
diff --git a/test/replication/orphan.result b/test/replication/orphan.result
new file mode 100644
index 000000000..818ea75de
--- /dev/null
+++ b/test/replication/orphan.result
@@ -0,0 +1,92 @@
+--
+-- gh-3427: no sync after configuration update
+--
+env = require('test_run')
+---
+...
+test_run = env.new()
+---
+...
+engine = test_run:get_cfg('engine')
+---
+...
+box.schema.user.grant('guest', 'read,write,execute', 'universe')
+---
+...
+box.schema.user.grant('guest', 'replication')
+---
+...
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica_orphan.lua'")
+---
+- true
+...
+test_run:cmd("start server replica")
+---
+- true
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+test_run:cmd("switch default")
+---
+- true
+...
+s = box.schema.space.create('test', {engine = engine})
+---
+...
+index = s:create_index('primary')
+---
+...
+-- change replica configuration
+test_run:cmd("switch replica")
+---
+- true
+...
+box.cfg{replication={}}
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+-- insert values on the master while replica is unconfigured
+a = 100000 box.begin() while a > 0 do a = a-1 box.space.test:insert{a,a} end box.commit()
+---
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+require'fiber'.sleep(0.1)
+---
+...
+box.cfg{replication = os.getenv("MASTER")}
+---
+...
+box.info.replication[1].upstream.lag > 0.1
+---
+- false
+...
+test_run:cmd("switch default")
+---
+- true
+...
+-- cleanup
+test_run:cmd("stop server replica")
+---
+- true
+...
+test_run:cmd("cleanup server replica")
+---
+- true
+...
+box.space.test:drop()
+---
+...
+box.schema.user.revoke('guest', 'replication')
+---
+...
+box.schema.user.revoke('guest', 'read,write,execute', 'universe')
+---
+...
diff --git a/test/replication/orphan.test.lua b/test/replication/orphan.test.lua
new file mode 100644
index 000000000..862fd69af
--- /dev/null
+++ b/test/replication/orphan.test.lua
@@ -0,0 +1,41 @@
+--
+-- gh-3427: no sync after configuration update
+--
+
+env = require('test_run')
+test_run = env.new()
+engine = test_run:get_cfg('engine')
+
+box.schema.user.grant('guest', 'read,write,execute', 'universe')
+
+box.schema.user.grant('guest', 'replication')
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica_orphan.lua'")
+test_run:cmd("start server replica")
+
+test_run:cmd("switch replica")
+test_run:cmd("switch default")
+
+s = box.schema.space.create('test', {engine = engine})
+index = s:create_index('primary')
+
+-- change replica configuration
+test_run:cmd("switch replica")
+box.cfg{replication={}}
+
+test_run:cmd("switch default")
+-- insert values on the master while replica is unconfigured
+a = 100000 box.begin() while a > 0 do a = a-1 box.space.test:insert{a,a} end box.commit()
+
+test_run:cmd("switch replica")
+require'fiber'.sleep(0.1)
+box.cfg{replication = os.getenv("MASTER")}
+
+box.info.replication[1].upstream.lag > 0.1
+test_run:cmd("switch default")
+
+-- cleanup
+test_run:cmd("stop server replica")
+test_run:cmd("cleanup server replica")
+box.space.test:drop()
+box.schema.user.revoke('guest', 'replication')
+box.schema.user.revoke('guest', 'read,write,execute', 'universe')
diff --git a/test/replication/replica_orphan.lua b/test/replication/replica_orphan.lua
new file mode 100644
index 000000000..97740d69a
--- /dev/null
+++ b/test/replication/replica_orphan.lua
@@ -0,0 +1,12 @@
+#!/usr/bin/env tarantool
+
+local TIMEOUT = tonumber(arg[1])
+
+box.cfg({
+    listen              = os.getenv("LISTEN"),
+    replication         = os.getenv("MASTER"),
+    replication_connect_timeout = 0.5,
+    replication_sync_lag = 0.01,
+})
+
+require('console').listen(os.getenv('ADMIN'))
-- 
2.14.3 (Apple Git-98)





More information about the Tarantool-patches mailing list