[Tarantool-patches] [PATCH v3 4/4] box: start counting local space requests separately

Serge Petrenko sergepetrenko at tarantool.org
Mon Mar 23 19:19:35 MSK 2020


Sign local space requests with a zero instance id. This allows to split
local changes aside from the changes, which should be visible to the whole
cluster, and stop sending NOPs to replicas to follow local vclock.

Closes #4114
---
 src/box/box.cc                                |  20 +++
 src/box/relay.cc                              |  15 +--
 src/box/wal.c                                 |  16 ++-
 test/replication/autobootstrap.result         |   6 +
 test/replication/autobootstrap.test.lua       |   2 +
 test/replication/before_replace.result        |   8 +-
 test/replication/before_replace.test.lua      |   4 +-
 .../gh-4114-local-space-replication.result    | 125 ++++++++++++++++++
 .../gh-4114-local-space-replication.test.lua  |  48 +++++++
 test/replication/local_spaces.result          |   4 +
 test/replication/local_spaces.test.lua        |   3 +
 test/replication/misc.result                  |   6 +
 test/replication/misc.test.lua                |   2 +
 test/replication/quorum.result                |   6 +
 test/replication/quorum.test.lua              |   2 +
 test/replication/replica_rejoin.result        |   9 ++
 test/replication/replica_rejoin.test.lua      |   3 +
 test/replication/skip_conflict_row.result     |   3 +
 test/replication/skip_conflict_row.test.lua   |   1 +
 test/replication/suite.cfg                    |   1 +
 test/vinyl/errinj.result                      |   5 +
 test/vinyl/errinj.test.lua                    |   4 +
 22 files changed, 278 insertions(+), 15 deletions(-)
 create mode 100644 test/replication/gh-4114-local-space-replication.result
 create mode 100644 test/replication/gh-4114-local-space-replication.test.lua

diff --git a/src/box/box.cc b/src/box/box.cc
index 6de6b677c..b7ea0de2d 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1882,6 +1882,26 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
 	say_info("remote vclock %s local vclock %s",
 		 vclock_to_string(&replica_clock), vclock_to_string(&vclock));
 
+	/*
+	 * Replica clock is used in gc state and recovery
+	 * initialization, so we need to replace the remote 0-th
+	 * component with our own one. This makes recovery work
+	 * correctly: we're trying to recover from an xlog whose
+	 * vclock is smaller than remote replica clock in each
+	 * component, exluding the 0-th component. This leads to
+	 * finding the correct WAL, if it exists, since we do not
+	 * need to recover local rows (the ones, that contribute
+	 * to the 0-th vclock component). It would be bad to set
+	 * 0-th vclock component to a smaller value, since it
+	 * would unnecessarily require additional WALs, which may
+	 * have already been deleted.
+	 * Same stands for gc. Remote instances do not need this
+	 * instance's local rows, and after gc was reworked to
+	 * track individual vclock components instead of
+	 * signatures it's safe to set the local component to the
+	 * most recent value.
+	 */
+	vclock_set(&replica_clock, 0, vclock_get(&replicaset.vclock, 0));
 	/*
 	 * Process SUBSCRIBE request via replication relay
 	 * Send current recovery vector clock as a marker
diff --git a/src/box/relay.cc b/src/box/relay.cc
index 21b7c0a6f..a91e4b771 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -760,16 +760,15 @@ relay_send_row(struct xstream *stream, struct xrow_header *packet)
 {
 	struct relay *relay = container_of(stream, struct relay, stream);
 	assert(iproto_type_is_dml(packet->type));
-	/*
-	 * Transform replica local requests to IPROTO_NOP so as to
-	 * promote vclock on the replica without actually modifying
-	 * any data.
-	 */
 	if (packet->group_id == GROUP_LOCAL) {
 		/*
-		 * Replica-local requests generated while replica
-		 * was anonymous have a zero instance id. Just
-		 * skip all these rows.
+		 * We do not relay replica-local rows to other
+		 * instances, since we started signing them with
+		 * a zero instance id. However, if replica-local
+		 * rows, signed with a non-zero id are present in
+		 * our WAL, we still need to relay them as NOPs in
+		 * order to correctly promote the vclock on the
+		 * replica.
 		 */
 		if (packet->replica_id == REPLICA_ID_NIL)
 			return;
diff --git a/src/box/wal.c b/src/box/wal.c
index 3b094b0e8..a74bdecd9 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -953,13 +953,19 @@ wal_assign_lsn(struct vclock *vclock_diff, struct vclock *base,
 	/** Assign LSN to all local rows. */
 	for ( ; row < end; row++) {
 		if ((*row)->replica_id == 0) {
-			(*row)->lsn = vclock_inc(vclock_diff, instance_id) +
-				      vclock_get(base, instance_id);
 			/*
-			 * Note, an anonymous replica signs local
-			 * rows whith a zero instance id.
+			 * All rows representing local space data
+			 * manipulations are signed wth a zero
+			 * instance id. This is also true for
+			 * anonymous replicas, since they are
+			 * only capable of writing to local and
+			 * temporary spaces.
 			 */
-			(*row)->replica_id = instance_id;
+			if ((*row)->group_id != GROUP_LOCAL)
+				(*row)->replica_id = instance_id;
+
+			(*row)->lsn = vclock_inc(vclock_diff, (*row)->replica_id) +
+						 vclock_get(base, (*row)->replica_id);
 			/* Use lsn of the first local row as transaction id. */
 			tsn = tsn == 0 ? (*row)->lsn : tsn;
 			(*row)->tsn = tsn;
diff --git a/test/replication/autobootstrap.result b/test/replication/autobootstrap.result
index 743982d47..6918e23ea 100644
--- a/test/replication/autobootstrap.result
+++ b/test/replication/autobootstrap.result
@@ -162,6 +162,9 @@ box.schema.user.revoke('test_u', 'create', 'space')
 vclock = test_run:get_vclock('autobootstrap1')
 ---
 ...
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock("autobootstrap2", vclock)
 ---
 ...
@@ -206,6 +209,9 @@ test_run:wait_fullmesh(SERVERS)
 vclock = test_run:get_vclock("autobootstrap1")
 ---
 ...
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock("autobootstrap2", vclock)
 ---
 ...
diff --git a/test/replication/autobootstrap.test.lua b/test/replication/autobootstrap.test.lua
index 055ea4277..f8bb1c74a 100644
--- a/test/replication/autobootstrap.test.lua
+++ b/test/replication/autobootstrap.test.lua
@@ -74,6 +74,7 @@ box.schema.user.revoke('test_u', 'create', 'space')
 
 -- Synchronize
 vclock = test_run:get_vclock('autobootstrap1')
+vclock[0] = nil
 _ = test_run:wait_vclock("autobootstrap2", vclock)
 _ = test_run:wait_vclock("autobootstrap3", vclock)
 
@@ -95,6 +96,7 @@ _ = test_run:cmd("switch default")
 test_run:wait_fullmesh(SERVERS)
 
 vclock = test_run:get_vclock("autobootstrap1")
+vclock[0] = nil
 _ = test_run:wait_vclock("autobootstrap2", vclock)
 _ = test_run:wait_vclock("autobootstrap3", vclock)
 
diff --git a/test/replication/before_replace.result b/test/replication/before_replace.result
index ced40547e..61a552e84 100644
--- a/test/replication/before_replace.result
+++ b/test/replication/before_replace.result
@@ -266,7 +266,13 @@ box.space.test:replace{1, 1}
 ---
 - [1, 1]
 ...
-_ = test_run:wait_vclock('replica', test_run:get_vclock('default'))
+vclock = test_run:get_vclock('default')
+---
+...
+vclock[0] = nil
+---
+...
+_ = test_run:wait_vclock('replica', vclock)
 ---
 ...
 -- Check that replace{1, 2} coming from the master was suppressed
diff --git a/test/replication/before_replace.test.lua b/test/replication/before_replace.test.lua
index bcc6dc00d..ecd8ff044 100644
--- a/test/replication/before_replace.test.lua
+++ b/test/replication/before_replace.test.lua
@@ -101,7 +101,9 @@ _ = box.space.test:before_replace(function(old, new) return new:update{{'+', 2,
 test_run:cmd("switch default")
 box.space.test:replace{1, 1}
 
-_ = test_run:wait_vclock('replica', test_run:get_vclock('default'))
+vclock = test_run:get_vclock('default')
+vclock[0] = nil
+_ = test_run:wait_vclock('replica', vclock)
 
 -- Check that replace{1, 2} coming from the master was suppressed
 -- by the before_replace trigger on the replica.
diff --git a/test/replication/gh-4114-local-space-replication.result b/test/replication/gh-4114-local-space-replication.result
new file mode 100644
index 000000000..e524c9a1b
--- /dev/null
+++ b/test/replication/gh-4114-local-space-replication.result
@@ -0,0 +1,125 @@
+-- test-run result file version 2
+env = require('test_run')
+ | ---
+ | ...
+test_run = env.new()
+ | ---
+ | ...
+
+--
+-- gh-4114. Account local space changes in a separate vclock
+-- component. Do not replicate local space changes, even as NOPs.
+--
+
+box.schema.user.grant('guest', 'replication')
+ | ---
+ | ...
+_ = box.schema.space.create('test', {is_local=true})
+ | ---
+ | ...
+_ = box.space.test:create_index("pk")
+ | ---
+ | ...
+
+test_run:cmd('create server replica with rpl_master=default, script "replication/replica.lua"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server replica with wait=True, wait_load=True')
+ | ---
+ | - true
+ | ...
+
+a = box.info.vclock[0] or 0
+ | ---
+ | ...
+for i = 1,10 do box.space.test:insert{i} end
+ | ---
+ | ...
+box.info.vclock[0] == a + 10 or box.info.vclock[0] - a
+ | ---
+ | - true
+ | ...
+
+test_run:cmd('switch replica')
+ | ---
+ | - true
+ | ...
+box.info.vclock[0]
+ | ---
+ | - null
+ | ...
+box.cfg{checkpoint_count=1}
+ | ---
+ | ...
+box.space.test:select{}
+ | ---
+ | - []
+ | ...
+box.space.test:insert{1}
+ | ---
+ | - [1]
+ | ...
+box.snapshot()
+ | ---
+ | - ok
+ | ...
+box.space.test:insert{2}
+ | ---
+ | - [2]
+ | ...
+box.snapshot()
+ | ---
+ | - ok
+ | ...
+box.space.test:insert{3}
+ | ---
+ | - [3]
+ | ...
+box.snapshot()
+ | ---
+ | - ok
+ | ...
+
+box.info.vclock[0]
+ | ---
+ | - 3
+ | ...
+
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+
+test_run:cmd('set variable repl_source to "replica.listen"')
+ | ---
+ | - true
+ | ...
+
+box.cfg{replication=repl_source}
+ | ---
+ | ...
+test_run:wait_cond(function()\
+                       return box.info.replication[2].upstream and\
+                       box.info.replication[2].upstream.status == 'follow'\
+                   end,\
+                   10)
+ | ---
+ | - true
+ | ...
+
+-- Cleanup.
+test_run:cmd('stop server replica')
+ | ---
+ | - true
+ | ...
+test_run:cmd('delete server replica')
+ | ---
+ | - true
+ | ...
+box.space.test:drop()
+ | ---
+ | ...
+box.schema.user.revoke('guest', 'replication')
+ | ---
+ | ...
diff --git a/test/replication/gh-4114-local-space-replication.test.lua b/test/replication/gh-4114-local-space-replication.test.lua
new file mode 100644
index 000000000..26dccee68
--- /dev/null
+++ b/test/replication/gh-4114-local-space-replication.test.lua
@@ -0,0 +1,48 @@
+env = require('test_run')
+test_run = env.new()
+
+--
+-- gh-4114. Account local space changes in a separate vclock
+-- component. Do not replicate local space changes, even as NOPs.
+--
+
+box.schema.user.grant('guest', 'replication')
+_ = box.schema.space.create('test', {is_local=true})
+_ = box.space.test:create_index("pk")
+
+test_run:cmd('create server replica with rpl_master=default, script "replication/replica.lua"')
+test_run:cmd('start server replica with wait=True, wait_load=True')
+
+a = box.info.vclock[0] or 0
+for i = 1,10 do box.space.test:insert{i} end
+box.info.vclock[0] == a + 10 or box.info.vclock[0] - a
+
+test_run:cmd('switch replica')
+box.info.vclock[0]
+box.cfg{checkpoint_count=1}
+box.space.test:select{}
+box.space.test:insert{1}
+box.snapshot()
+box.space.test:insert{2}
+box.snapshot()
+box.space.test:insert{3}
+box.snapshot()
+
+box.info.vclock[0]
+
+test_run:cmd('switch default')
+
+test_run:cmd('set variable repl_source to "replica.listen"')
+
+box.cfg{replication=repl_source}
+test_run:wait_cond(function()\
+                       return box.info.replication[2].upstream and\
+                       box.info.replication[2].upstream.status == 'follow'\
+                   end,\
+                   10)
+
+-- Cleanup.
+test_run:cmd('stop server replica')
+test_run:cmd('delete server replica')
+box.space.test:drop()
+box.schema.user.revoke('guest', 'replication')
diff --git a/test/replication/local_spaces.result b/test/replication/local_spaces.result
index cf2c52010..4855d8a88 100644
--- a/test/replication/local_spaces.result
+++ b/test/replication/local_spaces.result
@@ -288,6 +288,10 @@ _ = s3:insert{3}
 vclock = test_run:get_vclock('default')
 ---
 ...
+-- Ignore 0-th component when waiting. They don't match.
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock('replica', vclock)
 ---
 ...
diff --git a/test/replication/local_spaces.test.lua b/test/replication/local_spaces.test.lua
index 373e2cd20..c5e224030 100644
--- a/test/replication/local_spaces.test.lua
+++ b/test/replication/local_spaces.test.lua
@@ -112,6 +112,9 @@ _ = s1:insert{3}
 _ = s2:insert{3}
 _ = s3:insert{3}
 vclock = test_run:get_vclock('default')
+
+-- Ignore 0-th component when waiting. They don't match.
+vclock[0] = nil
 _ = test_run:wait_vclock('replica', vclock)
 
 test_run:cmd("switch replica")
diff --git a/test/replication/misc.result b/test/replication/misc.result
index b63d72846..e5d1f560e 100644
--- a/test/replication/misc.result
+++ b/test/replication/misc.result
@@ -214,6 +214,9 @@ box.space.space1:select{}
 vclock = test_run:get_vclock("autobootstrap1")
 ---
 ...
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock("autobootstrap2", vclock)
 ---
 ...
@@ -414,6 +417,9 @@ while box.info.replication[2] == nil do fiber.sleep(0.01) end
 vclock = test_run:get_vclock('default')
 ---
 ...
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock('replica_auth', vclock)
 ---
 ...
diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua
index c454a0992..d285b014a 100644
--- a/test/replication/misc.test.lua
+++ b/test/replication/misc.test.lua
@@ -88,6 +88,7 @@ c.space.space1:insert{box.NULL, "data"} -- fails, but bumps sequence value
 c.space.space1:insert{box.NULL, 1, "data"}
 box.space.space1:select{}
 vclock = test_run:get_vclock("autobootstrap1")
+vclock[0] = nil
 _ = test_run:wait_vclock("autobootstrap2", vclock)
 test_run:cmd("switch autobootstrap2")
 box.space.space1:select{}
@@ -172,6 +173,7 @@ box.schema.user.grant('cluster', 'replication')
 
 while box.info.replication[2] == nil do fiber.sleep(0.01) end
 vclock = test_run:get_vclock('default')
+vclock[0] = nil
 _ = test_run:wait_vclock('replica_auth', vclock)
 
 test_run:cmd("stop server replica_auth")
diff --git a/test/replication/quorum.result b/test/replication/quorum.result
index 07abe7f2a..5ef66bf0a 100644
--- a/test/replication/quorum.result
+++ b/test/replication/quorum.result
@@ -325,6 +325,9 @@ space:insert{2}
 vclock = test_run:get_vclock("default")
 ---
 ...
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock("replica", vclock)
 ---
 ...
@@ -390,6 +393,9 @@ box.cfg{replication = repl}
 vclock = test_run:get_vclock("master_quorum1")
 ---
 ...
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock("master_quorum2", vclock)
 ---
 ...
diff --git a/test/replication/quorum.test.lua b/test/replication/quorum.test.lua
index 5f2872675..da24f34a0 100644
--- a/test/replication/quorum.test.lua
+++ b/test/replication/quorum.test.lua
@@ -125,6 +125,7 @@ test_run:cmd("switch default")
 box.cfg{listen = listen}
 space:insert{2}
 vclock = test_run:get_vclock("default")
+vclock[0] = nil
 _ = test_run:wait_vclock("replica", vclock)
 test_run:cmd("switch replica")
 box.info.status -- running
@@ -145,6 +146,7 @@ box.cfg{replication = ""}
 box.space.test:insert{1}
 box.cfg{replication = repl}
 vclock = test_run:get_vclock("master_quorum1")
+vclock[0] = nil
 _ = test_run:wait_vclock("master_quorum2", vclock)
 test_run:cmd("switch master_quorum2")
 box.space.test:select()
diff --git a/test/replication/replica_rejoin.result b/test/replication/replica_rejoin.result
index b8ed79f14..dd04ae297 100644
--- a/test/replication/replica_rejoin.result
+++ b/test/replication/replica_rejoin.result
@@ -144,6 +144,9 @@ for i = 10, 30, 10 do box.space.test:update(i, {{'!', 1, i}}) end
 vclock = test_run:get_vclock('default')
 ---
 ...
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock('replica', vclock)
 ---
 ...
@@ -295,6 +298,9 @@ for i = 1, 10 do box.space.test:replace{2} end
 vclock = test_run:get_vclock('replica')
 ---
 ...
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock('default', vclock)
 ---
 ...
@@ -345,6 +351,9 @@ for i = 1, 10 do box.space.test:replace{2} end
 vclock = test_run:get_vclock('replica')
 ---
 ...
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock('default', vclock)
 ---
 ...
diff --git a/test/replication/replica_rejoin.test.lua b/test/replication/replica_rejoin.test.lua
index 25c0849ec..410ef44d7 100644
--- a/test/replication/replica_rejoin.test.lua
+++ b/test/replication/replica_rejoin.test.lua
@@ -55,6 +55,7 @@ test_run:cmd("switch default")
 -- Make sure the replica follows new changes.
 for i = 10, 30, 10 do box.space.test:update(i, {{'!', 1, i}}) end
 vclock = test_run:get_vclock('default')
+vclock[0] = nil
 _ = test_run:wait_vclock('replica', vclock)
 test_run:cmd("switch replica")
 box.space.test:select()
@@ -109,6 +110,7 @@ box.space.test:replace{1}
 test_run:cmd("switch replica")
 for i = 1, 10 do box.space.test:replace{2} end
 vclock = test_run:get_vclock('replica')
+vclock[0] = nil
 _ = test_run:wait_vclock('default', vclock)
 -- Restart the master and force garbage collection.
 test_run:cmd("switch default")
@@ -126,6 +128,7 @@ test_run:wait_cond(function() return #fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.
 test_run:cmd("switch replica")
 for i = 1, 10 do box.space.test:replace{2} end
 vclock = test_run:get_vclock('replica')
+vclock[0] = nil
 _ = test_run:wait_vclock('default', vclock)
 -- Restart the replica. It should successfully rebootstrap.
 test_run:cmd("restart server replica with args='true'")
diff --git a/test/replication/skip_conflict_row.result b/test/replication/skip_conflict_row.result
index 9b2777872..d70ac8e2a 100644
--- a/test/replication/skip_conflict_row.result
+++ b/test/replication/skip_conflict_row.result
@@ -54,6 +54,9 @@ box.info.status
 vclock = test_run:get_vclock('default')
 ---
 ...
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock("replica", vclock)
 ---
 ...
diff --git a/test/replication/skip_conflict_row.test.lua b/test/replication/skip_conflict_row.test.lua
index 2982c730a..04fd08136 100644
--- a/test/replication/skip_conflict_row.test.lua
+++ b/test/replication/skip_conflict_row.test.lua
@@ -19,6 +19,7 @@ space:insert{2}
 box.info.status
 
 vclock = test_run:get_vclock('default')
+vclock[0] = nil
 _ = test_run:wait_vclock("replica", vclock)
 test_run:cmd("switch replica")
 box.info.replication[1].upstream.message
diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg
index 90fd53ca6..0907ac17f 100644
--- a/test/replication/suite.cfg
+++ b/test/replication/suite.cfg
@@ -12,6 +12,7 @@
     "on_schema_init.test.lua": {},
     "long_row_timeout.test.lua": {},
     "join_without_snap.test.lua": {},
+    "gh-4114-local-space-replication.test.lua": {},
     "gh-4402-info-errno.test.lua": {},
     "gh-4605-empty-password.test.lua": {},
     "gh-4606-admin-creds.test.lua": {},
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 2635da265..2bd701f70 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -1241,6 +1241,11 @@ test_run:cmd("switch default")
 vclock = test_run:get_vclock("default")
 ---
 ...
+-- Ignore 0-th vclock component. They don't match between
+-- replicas.
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock("replica", vclock)
 ---
 ...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 4230cfae3..750d3bfe8 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -455,6 +455,10 @@ box.cfg{read_only = true}
 
 test_run:cmd("switch default")
 vclock = test_run:get_vclock("default")
+
+-- Ignore 0-th vclock component. They don't match between
+-- replicas.
+vclock[0] = nil
 _ = test_run:wait_vclock("replica", vclock)
 
 test_run:cmd("stop server replica")
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list