[Tarantool-patches] [PATCH 3/3] box: start counting local space requests separately

Serge Petrenko sergepetrenko at tarantool.org
Fri Jan 31 17:58:07 MSK 2020


Sign local space requests with a zero instance id. This allows to split
local changes aside from the changes, which should be visible to the whole
cluster, and stop sending NOPs to replicas to follow local vclock.

Closes #4114
---
 src/box/box.cc                                |  19 +++
 src/box/relay.cc                              |  17 +--
 src/box/wal.c                                 |  16 ++-
 .../gh-4114-local-space-replication.result    | 121 ++++++++++++++++++
 .../gh-4114-local-space-replication.test.lua  |  44 +++++++
 test/replication/local_spaces.result          |   4 +
 test/replication/local_spaces.test.lua        |   3 +
 test/vinyl/errinj.result                      |   5 +
 test/vinyl/errinj.test.lua                    |   4 +
 test/xlog/panic_on_wal_error.result           |   2 +-
 10 files changed, 216 insertions(+), 19 deletions(-)
 create mode 100644 test/replication/gh-4114-local-space-replication.result
 create mode 100644 test/replication/gh-4114-local-space-replication.test.lua

diff --git a/src/box/box.cc b/src/box/box.cc
index 57a43c754..add6e3743 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1859,6 +1859,25 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header)
 	say_info("remote vclock %s local vclock %s",
 		 vclock_to_string(&replica_clock), vclock_to_string(&vclock));
 
+	/*
+	 * Replica clock is used in gc consumer and recovery
+	 * initialization, so we need to replace the remote 0-th
+	 * component with our own one. This makes recovery work
+	 * correctly: we're trying to recover from an xlog whose
+	 * vclock is smaller than remote replica clock in each
+	 * component, exluding the 0-th component. This leads to
+	 * finding the correct WAL, if it exists, since we do not
+	 * need to recover local rows (the ones, that contribute
+	 * to the 0-th vclock component). It would be bad to set
+	 * 0-th vclock component to a smaller value, since it
+	 * would unnecessarily require additional WALs, which may
+	 * have already been deleted.
+	 * Speaking of gc, we have to be more conservative here,
+	 * and set 0-th component to the minimal possible value,
+	 * zeroing it out. Otherwise we can delete wals still
+	 * needed by replica. This is done in relay_subscribe.
+	 */
+	vclock_set(&replica_clock, 0, vclock_get(&replicaset.vclock, 0));
 	/*
 	 * Process SUBSCRIBE request via replication relay
 	 * Send current recovery vector clock as a marker
diff --git a/src/box/relay.cc b/src/box/relay.cc
index e6840541f..7c423e925 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -691,6 +691,9 @@ relay_subscribe(struct replica *replica, int fd, uint64_t sync,
 	 * join.
 	 */
 	if (replica->gc == NULL && !replica->anon) {
+		struct vclock gc_clock;
+		vclock_copy(&gc_clock, replica_clock);
+		vclock_set(&gc_clock, 0, 0);
 		replica->gc = gc_consumer_register(replica_clock, "replica %s",
 						   tt_uuid_str(&replica->uuid));
 		if (replica->gc == NULL)
@@ -750,22 +753,10 @@ relay_send_row(struct xstream *stream, struct xrow_header *packet)
 {
 	struct relay *relay = container_of(stream, struct relay, stream);
 	assert(iproto_type_is_dml(packet->type));
-	/*
-	 * Transform replica local requests to IPROTO_NOP so as to
-	 * promote vclock on the replica without actually modifying
-	 * any data.
-	 */
+	/* We do not relay replica-local rows to other instances. */
 	if (packet->group_id == GROUP_LOCAL) {
-		/*
-		 * Replica-local requests generated while replica
-		 * was anonymous have a zero instance id. Just
-		 * skip all these rows.
-		 */
 		if (packet->replica_id == REPLICA_ID_NIL)
 			return;
-		packet->type = IPROTO_NOP;
-		packet->group_id = GROUP_DEFAULT;
-		packet->bodycnt = 0;
 	}
 	/*
 	 * We're feeding a WAL, thus responding to FINAL JOIN or SUBSCRIBE
diff --git a/src/box/wal.c b/src/box/wal.c
index 0ae66ff32..b453022b4 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -939,13 +939,19 @@ wal_assign_lsn(struct vclock *vclock_diff, struct vclock *base,
 	/** Assign LSN to all local rows. */
 	for ( ; row < end; row++) {
 		if ((*row)->replica_id == 0) {
-			(*row)->lsn = vclock_inc(vclock_diff, instance_id) +
-				      vclock_get(base, instance_id);
 			/*
-			 * Note, an anonymous replica signs local
-			 * rows whith a zero instance id.
+			 * All rows representing local space data
+			 * manipulations are signed wth a zero
+			 * instance id. This is also true for
+			 * anonymous replicas, since they are
+			 * only capable of writing to local and
+			 * temporary spaces.
 			 */
-			(*row)->replica_id = instance_id;
+			if ((*row)->group_id != GROUP_LOCAL)
+				(*row)->replica_id = instance_id;
+
+			(*row)->lsn = vclock_inc(vclock_diff, (*row)->replica_id) +
+						 vclock_get(base, (*row)->replica_id);
 			/* Use lsn of the first local row as transaction id. */
 			tsn = tsn == 0 ? (*row)->lsn : tsn;
 			(*row)->tsn = tsn;
diff --git a/test/replication/gh-4114-local-space-replication.result b/test/replication/gh-4114-local-space-replication.result
new file mode 100644
index 000000000..ed69c2b6f
--- /dev/null
+++ b/test/replication/gh-4114-local-space-replication.result
@@ -0,0 +1,121 @@
+-- test-run result file version 2
+env = require('test_run')
+ | ---
+ | ...
+test_run = env.new()
+ | ---
+ | ...
+
+--
+-- gh-4114. Account local space changes in a separate vclock
+-- component. Do not replicate local space changes, even as NOPs.
+--
+
+box.schema.user.grant('guest', 'replication')
+ | ---
+ | ...
+_ = box.schema.space.create('test', {is_local=true})
+ | ---
+ | ...
+_ = box.space.test:create_index("pk")
+ | ---
+ | ...
+
+test_run:cmd('create server replica with rpl_master=default, script "replication/replica.lua"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server replica with wait=True, wait_load=True')
+ | ---
+ | - true
+ | ...
+
+a = box.info.vclock[0] or 0
+ | ---
+ | ...
+for i = 1,10 do box.space.test:insert{i} end
+ | ---
+ | ...
+box.info.vclock[0] == a + 10 or box.info.vclock[0] - a
+ | ---
+ | - true
+ | ...
+
+test_run:cmd('switch replica')
+ | ---
+ | - true
+ | ...
+box.info.vclock[0]
+ | ---
+ | - null
+ | ...
+box.cfg{checkpoint_count=1}
+ | ---
+ | ...
+box.space.test:select{}
+ | ---
+ | - []
+ | ...
+box.space.test:insert{1}
+ | ---
+ | - [1]
+ | ...
+box.snapshot()
+ | ---
+ | - ok
+ | ...
+box.space.test:insert{2}
+ | ---
+ | - [2]
+ | ...
+box.snapshot()
+ | ---
+ | - ok
+ | ...
+box.space.test:insert{3}
+ | ---
+ | - [3]
+ | ...
+box.snapshot()
+ | ---
+ | - ok
+ | ...
+
+box.info.vclock[0]
+ | ---
+ | - 3
+ | ...
+
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+
+test_run:cmd('set variable repl_source to "replica.listen"')
+ | ---
+ | - true
+ | ...
+
+box.cfg{replication=repl_source}
+ | ---
+ | ...
+box.info.replication[2].upstream.status
+ | ---
+ | - follow
+ | ...
+
+-- Cleanup.
+test_run:cmd('stop server replica')
+ | ---
+ | - true
+ | ...
+test_run:cmd('delete server replica')
+ | ---
+ | - true
+ | ...
+box.space.test:drop()
+ | ---
+ | ...
+box.schema.user.revoke('guest', 'replication')
+ | ---
+ | ...
diff --git a/test/replication/gh-4114-local-space-replication.test.lua b/test/replication/gh-4114-local-space-replication.test.lua
new file mode 100644
index 000000000..415ba1ddc
--- /dev/null
+++ b/test/replication/gh-4114-local-space-replication.test.lua
@@ -0,0 +1,44 @@
+env = require('test_run')
+test_run = env.new()
+
+--
+-- gh-4114. Account local space changes in a separate vclock
+-- component. Do not replicate local space changes, even as NOPs.
+--
+
+box.schema.user.grant('guest', 'replication')
+_ = box.schema.space.create('test', {is_local=true})
+_ = box.space.test:create_index("pk")
+
+test_run:cmd('create server replica with rpl_master=default, script "replication/replica.lua"')
+test_run:cmd('start server replica with wait=True, wait_load=True')
+
+a = box.info.vclock[0] or 0
+for i = 1,10 do box.space.test:insert{i} end
+box.info.vclock[0] == a + 10 or box.info.vclock[0] - a
+
+test_run:cmd('switch replica')
+box.info.vclock[0]
+box.cfg{checkpoint_count=1}
+box.space.test:select{}
+box.space.test:insert{1}
+box.snapshot()
+box.space.test:insert{2}
+box.snapshot()
+box.space.test:insert{3}
+box.snapshot()
+
+box.info.vclock[0]
+
+test_run:cmd('switch default')
+
+test_run:cmd('set variable repl_source to "replica.listen"')
+
+box.cfg{replication=repl_source}
+box.info.replication[2].upstream.status
+
+-- Cleanup.
+test_run:cmd('stop server replica')
+test_run:cmd('delete server replica')
+box.space.test:drop()
+box.schema.user.revoke('guest', 'replication')
diff --git a/test/replication/local_spaces.result b/test/replication/local_spaces.result
index cf2c52010..4855d8a88 100644
--- a/test/replication/local_spaces.result
+++ b/test/replication/local_spaces.result
@@ -288,6 +288,10 @@ _ = s3:insert{3}
 vclock = test_run:get_vclock('default')
 ---
 ...
+-- Ignore 0-th component when waiting. They don't match.
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock('replica', vclock)
 ---
 ...
diff --git a/test/replication/local_spaces.test.lua b/test/replication/local_spaces.test.lua
index 373e2cd20..c5e224030 100644
--- a/test/replication/local_spaces.test.lua
+++ b/test/replication/local_spaces.test.lua
@@ -112,6 +112,9 @@ _ = s1:insert{3}
 _ = s2:insert{3}
 _ = s3:insert{3}
 vclock = test_run:get_vclock('default')
+
+-- Ignore 0-th component when waiting. They don't match.
+vclock[0] = nil
 _ = test_run:wait_vclock('replica', vclock)
 
 test_run:cmd("switch replica")
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 2635da265..2bd701f70 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -1241,6 +1241,11 @@ test_run:cmd("switch default")
 vclock = test_run:get_vclock("default")
 ---
 ...
+-- Ignore 0-th vclock component. They don't match between
+-- replicas.
+vclock[0] = nil
+---
+...
 _ = test_run:wait_vclock("replica", vclock)
 ---
 ...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 4230cfae3..750d3bfe8 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -455,6 +455,10 @@ box.cfg{read_only = true}
 
 test_run:cmd("switch default")
 vclock = test_run:get_vclock("default")
+
+-- Ignore 0-th vclock component. They don't match between
+-- replicas.
+vclock[0] = nil
 _ = test_run:wait_vclock("replica", vclock)
 
 test_run:cmd("stop server replica")
diff --git a/test/xlog/panic_on_wal_error.result b/test/xlog/panic_on_wal_error.result
index 22f14f912..aa2ba417b 100644
--- a/test/xlog/panic_on_wal_error.result
+++ b/test/xlog/panic_on_wal_error.result
@@ -141,7 +141,7 @@ box.info.replication[1].upstream.status
 ...
 box.info.replication[1].upstream.message
 ---
-- 'Missing .xlog file between LSN 6 {1: 6} and 8 {1: 8}'
+- 'Missing .xlog file between LSN 6 {0: 0, 1: 6} and 8 {1: 8}'
 ...
 box.space.test:select{}
 ---
-- 
2.21.0 (Apple Git-122)



More information about the Tarantool-patches mailing list