[Tarantool-patches] [PATCH v2] replication: fix join vclock obtainment in relay_initial_join

Ilya Kosarev i.kosarev at tarantool.org
Mon Oct 28 15:25:06 MSK 2019


join_vclock test could fail on huge load due to vclock advance
comparing to an actual WAL. Now we are clearly obtaining vclock on the
flushed state using wal_begin_checkpoint. It is also better to get max
index and index count in single request in join_vclock test. With fixes
mentioned above it is not fragile anymore.

Closes #4160
---
https://github.com/tarantool/tarantool/tree/i.kosarev/gh-4160-fix-join-vclock
https://github.com/tarantool/tarantool/issues/4160

 src/box/relay.cc                      | 9 +++++----
 test/replication/join_vclock.result   | 5 +----
 test/replication/join_vclock.test.lua | 3 +--
 test/replication/suite.ini            | 1 -
 4 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/src/box/relay.cc b/src/box/relay.cc
index e849fcf4f..d16cd1a56 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -306,13 +306,14 @@ relay_initial_join(int fd, uint64_t sync, struct vclock *vclock)
 	});
 
 	/*
-	 * Sync WAL to make sure that all changes visible from
-	 * the frozen read view are successfully committed.
+	 * Make sure that current database state is flushed to
+	 * the WAL and obtain corresponding vclock.
 	 */
-	if (wal_sync() != 0)
+	struct wal_checkpoint checkpoint;
+	if (wal_begin_checkpoint(&checkpoint) != 0)
 		diag_raise();
 
-	vclock_copy(vclock, &replicaset.vclock);
+	vclock_copy(vclock, &checkpoint.vclock);
 
 	/* Respond to the JOIN request with the current vclock. */
 	struct xrow_header row;
diff --git a/test/replication/join_vclock.result b/test/replication/join_vclock.result
index a9781073d..d6d9af783 100644
--- a/test/replication/join_vclock.result
+++ b/test/replication/join_vclock.result
@@ -67,10 +67,7 @@ test_run:cmd("switch replica1")
 ---
 - true
 ...
-cnt = box.space.test.index[0]:count()
----
-...
-box.space.test.index.primary:max()[1] == cnt - 1
+box.space.test.index.primary:max()[1] == box.space.test.index.primary:count() - 1
 ---
 - true
 ...
diff --git a/test/replication/join_vclock.test.lua b/test/replication/join_vclock.test.lua
index 0b60dffc2..a813ba31f 100644
--- a/test/replication/join_vclock.test.lua
+++ b/test/replication/join_vclock.test.lua
@@ -26,8 +26,7 @@ ch:get()
 
 errinj.set("ERRINJ_RELAY_FINAL_SLEEP", false)
 test_run:cmd("switch replica1")
-cnt = box.space.test.index[0]:count()
-box.space.test.index.primary:max()[1] == cnt - 1
+box.space.test.index.primary:max()[1] == box.space.test.index.primary:count() - 1
 test_run:cmd("switch default")
 
 replica_set.drop_all(test_run)
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index 384dac677..ed1de3140 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -12,7 +12,6 @@ long_run = prune.test.lua
 is_parallel = True
 pretest_clean = True
 fragile = errinj.test.lua            ; gh-3870
-          join_vclock.test.lua       ; gh-4160
           long_row_timeout.test.lua  ; gh-4351
           skip_conflict_row.test.lua ; gh-4457
           sync.test.lua              ; gh-3835 gh-3877
-- 
2.17.1



More information about the Tarantool-patches mailing list