[tarantool-patches] [PATCH v4 2/2] box: adds replication sync after cfg. update

Olga Arkhangelskaia krishtal.olja at gmail.com
Tue Aug 28 14:43:28 MSK 2018


When replica reconnects to replica set not for the first time, we
suffer from absence of synchronization. Such behavior leads to giving
away outdated data.

Closes #3427
---
https://github.com/tarantool/tarantool/issues/3427
https://github.com/tarantool/tarantool/tree/OKriw/replication_no_sync-1.9

v1:
https://www.freelists.org/post/tarantool-patches/PATCH-replication-adds-replication-sync-after-cfg-update

v2:
https://www.freelists.org/post/tarantool-patches/PATCH-v2-replication-adds-replication-sync-after-cfg-update

v3:
https://www.freelists.org/post/tarantool-patches/PATCH-v3-box-adds-replication-sync-after-cfg-update

Changes in v2:
- fixed test
- changed replicaset_sync

Changes in v3:
- now we raise the exception when sync is not successful.
- fixed test
- renamed test 

Changes in v4:
- fixed test
- replication_sync_lag is made dynamicall in separate patch
- removed unnecessary error type
- moved say_crit to another place
- in case of sync error we rollback to prev. config

 src/box/box.cc                 |  8 ++++-
 src/box/replication.cc         |  8 ++---
 src/box/replication.h          |  6 ++--
 test/replication/sync.result   | 81 ++++++++++++++++++++++++++++++++++++++++++
 test/replication/sync.test.lua | 38 ++++++++++++++++++++
 5 files changed, 133 insertions(+), 8 deletions(-)
 create mode 100644 test/replication/sync.result
 create mode 100644 test/replication/sync.test.lua

diff --git a/src/box/box.cc b/src/box/box.cc
index be5077da8..aaae4219f 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -634,6 +634,11 @@ box_set_replication(void)
 	box_sync_replication(true);
 	/* Follow replica */
 	replicaset_follow();
+	/* Sync replica up to quorum */
+	if (!replicaset_sync()) {
+		tnt_raise(ClientError, ER_CFG, "replication",
+			  "failed to connect to one or more replicas");
+	}
 }
 
 void
@@ -1948,7 +1953,8 @@ box_cfg_xc(void)
 	is_box_configured = true;
 
 	if (!is_bootstrap_leader)
-		replicaset_sync();
+		if (!replicaset_sync())
+			say_crit("entering orphan mode");
 
 	say_info("ready to accept requests");
 }
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 861ce34ea..9d3b1094c 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -661,13 +661,13 @@ replicaset_follow(void)
 	}
 }
 
-void
+bool
 replicaset_sync(void)
 {
 	int quorum = replicaset_quorum();
 
 	if (quorum == 0)
-		return;
+		return true;
 
 	say_verbose("synchronizing with %d replicas", quorum);
 
@@ -686,12 +686,12 @@ replicaset_sync(void)
 		 * Do not stall configuration, leave the instance
 		 * in 'orphan' state.
 		 */
-		say_crit("entering orphan mode");
-		return;
+		return false;
 	}
 
 	say_crit("replica set sync complete, quorum of %d "
 		 "replicas formed", quorum);
+	return true;
 }
 
 void
diff --git a/src/box/replication.h b/src/box/replication.h
index 06a2867b6..d4e6f7e3e 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -373,10 +373,10 @@ replicaset_follow(void);
 
 /**
  * Wait until a replication quorum is formed.
- * Return immediately if a quorum cannot be
- * formed because of errors.
+ * @return true in case of success.
+ * @return false if a quorum cannot be formed because of errors.
  */
-void
+bool
 replicaset_sync(void);
 
 /**
diff --git a/test/replication/sync.result b/test/replication/sync.result
new file mode 100644
index 000000000..f6ddb02e0
--- /dev/null
+++ b/test/replication/sync.result
@@ -0,0 +1,81 @@
+--
+-- gh-3427: no sync after configuration update
+--
+env = require('test_run')
+---
+...
+test_run = env.new()
+---
+...
+engine = test_run:get_cfg('engine')
+---
+...
+box.schema.user.grant('guest', 'replication')
+---
+...
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+---
+- true
+...
+test_run:cmd("start server replica")
+---
+- true
+...
+s = box.schema.space.create('test', {engine = engine})
+---
+...
+index = s:create_index('primary')
+---
+...
+-- change replica configuration
+test_run:cmd("switch replica")
+---
+- true
+...
+box.cfg{replication_sync_lag = 0.1}
+---
+...
+replication = box.cfg.replication
+---
+...
+box.cfg{replication={}}
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+-- insert values on the master while replica is unconfigured
+a = 3000 box.begin() while a > 0 do a = a-1 box.space.test:insert{a,a} end box.commit()
+---
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.cfg{replication = replication}
+---
+...
+box.space.test:count() == 3000
+---
+- true
+...
+test_run:cmd("switch default")
+---
+- true
+...
+-- cleanup
+test_run:cmd("stop server replica")
+---
+- true
+...
+test_run:cmd("cleanup server replica")
+---
+- true
+...
+box.space.test:drop()
+---
+...
+box.schema.user.revoke('guest', 'replication')
+---
+...
diff --git a/test/replication/sync.test.lua b/test/replication/sync.test.lua
new file mode 100644
index 000000000..4c2b55af8
--- /dev/null
+++ b/test/replication/sync.test.lua
@@ -0,0 +1,38 @@
+--
+-- gh-3427: no sync after configuration update
+--
+
+env = require('test_run')
+test_run = env.new()
+engine = test_run:get_cfg('engine')
+
+box.schema.user.grant('guest', 'replication')
+
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+test_run:cmd("start server replica")
+
+s = box.schema.space.create('test', {engine = engine})
+index = s:create_index('primary')
+
+-- change replica configuration
+test_run:cmd("switch replica")
+box.cfg{replication_sync_lag = 0.1}
+replication = box.cfg.replication
+box.cfg{replication={}}
+
+test_run:cmd("switch default")
+-- insert values on the master while replica is unconfigured
+a = 3000 box.begin() while a > 0 do a = a-1 box.space.test:insert{a,a} end box.commit()
+
+test_run:cmd("switch replica")
+box.cfg{replication = replication}
+
+box.space.test:count() == 3000
+
+test_run:cmd("switch default")
+
+-- cleanup
+test_run:cmd("stop server replica")
+test_run:cmd("cleanup server replica")
+box.space.test:drop()
+box.schema.user.revoke('guest', 'replication')
-- 
2.14.3 (Apple Git-98)





More information about the Tarantool-patches mailing list