[PATCH v2 1/2] replication: stay in orphan mode until replica is synced by vclock
Konstantin Belyavskiy
k.belyavskiy at tarantool.org
Fri Mar 30 17:03:14 MSK 2018
Stay in orphan (read-only) mode until local vclock is lower than
master's to make sure that datasets are the same across replicaset.
Also revert and slightly update catch test.
Needed for 3210
---
src/box/applier.cc | 16 +++++++++++-----
test/replication/catch.result | 15 ++++++++++-----
test/replication/catch.test.lua | 7 ++++---
3 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/src/box/applier.cc b/src/box/applier.cc
index 6bfe5a99a..12bf1f0d2 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -305,7 +305,7 @@ applier_join(struct applier *applier)
* server is 1.6. Since we have
* not initialized replication
* vclock yet, do it now. In 1.7+
- * this vlcock is not used.
+ * this vclock is not used.
*/
xrow_decode_vclock_xc(&row, &replicaset.vclock);
}
@@ -370,6 +370,7 @@ applier_subscribe(struct applier *applier)
struct ev_io *coio = &applier->io;
struct ibuf *ibuf = &applier->ibuf;
struct xrow_header row;
+ struct vclock remote_vclock_at_subscribe;
xrow_encode_subscribe_xc(&row, &REPLICASET_UUID, &INSTANCE_UUID,
&replicaset.vclock);
@@ -411,9 +412,8 @@ applier_subscribe(struct applier *applier)
* In case of successful subscribe, the server
* responds with its current vclock.
*/
- struct vclock vclock;
- vclock_create(&vclock);
- xrow_decode_vclock_xc(&row, &vclock);
+ vclock_create(&remote_vclock_at_subscribe);
+ xrow_decode_vclock_xc(&row, &remote_vclock_at_subscribe);
}
/**
* Tarantool < 1.6.7:
@@ -452,8 +452,14 @@ applier_subscribe(struct applier *applier)
applier_set_state(applier, APPLIER_FOLLOW);
}
+ /*
+ * Must stay in read-only mode, until it synchronized.
+ * Check lag and compare local vclock with remote one.
+ */
if (applier->state == APPLIER_SYNC &&
- applier->lag <= replication_sync_lag) {
+ applier->lag <= replication_sync_lag &&
+ vclock_compare(&remote_vclock_at_subscribe,
+ &replicaset.vclock) <= 0) {
/* Applier is synced, switch to "follow". */
applier_set_state(applier, APPLIER_FOLLOW);
}
diff --git a/test/replication/catch.result b/test/replication/catch.result
index 7d61ad26f..681cd77ac 100644
--- a/test/replication/catch.result
+++ b/test/replication/catch.result
@@ -19,11 +19,11 @@ errinj = box.error.injection
box.schema.user.grant('guest', 'replication')
---
...
-test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica_timeout.lua'")
---
- true
...
-test_run:cmd("start server replica")
+test_run:cmd("start server replica with args='0.1'")
---
- true
...
@@ -69,7 +69,7 @@ errinj.set("ERRINJ_RELAY_TIMEOUT", 1000.0)
---
- ok
...
-test_run:cmd("start server replica")
+test_run:cmd("start server replica with args='0.1'")
---
- true
...
@@ -99,10 +99,11 @@ box.space.test ~= nil
...
d = box.space.test:delete{1}
---
+- error: Can't modify data because this instance is in read-only mode.
...
box.space.test:get(1) == nil
---
-- true
+- false
...
-- case #2: delete tuple by net.box
test_run:cmd("switch default")
@@ -116,9 +117,13 @@ test_run:cmd("set variable r_uri to 'replica.listen'")
c = net_box.connect(r_uri)
---
...
+d = c.space.test:delete{1}
+---
+- error: Can't modify data because this instance is in read-only mode.
+...
c.space.test:get(1) == nil
---
-- true
+- false
...
-- check sync
errinj.set("ERRINJ_RELAY_TIMEOUT", 0)
diff --git a/test/replication/catch.test.lua b/test/replication/catch.test.lua
index cb865aa3c..cbfa1c19a 100644
--- a/test/replication/catch.test.lua
+++ b/test/replication/catch.test.lua
@@ -8,8 +8,8 @@ net_box = require('net.box')
errinj = box.error.injection
box.schema.user.grant('guest', 'replication')
-test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
-test_run:cmd("start server replica")
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica_timeout.lua'")
+test_run:cmd("start server replica with args='0.1'")
test_run:cmd("switch replica")
test_run:cmd("switch default")
@@ -29,7 +29,7 @@ for i=1,100 do s:insert{i, 'this is test message12345'} end
-- sleep after every tuple
errinj.set("ERRINJ_RELAY_TIMEOUT", 1000.0)
-test_run:cmd("start server replica")
+test_run:cmd("start server replica with args='0.1'")
test_run:cmd("switch replica")
fiber = require('fiber')
@@ -53,6 +53,7 @@ box.space.test:get(1) == nil
test_run:cmd("switch default")
test_run:cmd("set variable r_uri to 'replica.listen'")
c = net_box.connect(r_uri)
+d = c.space.test:delete{1}
c.space.test:get(1) == nil
-- check sync
--
2.14.3 (Apple Git-98)
More information about the Tarantool-patches
mailing list