[PATCH] replication: move cluster id match check to replica

Serge Petrenko sergepetrenko at tarantool.org
Wed Feb 6 17:51:59 MSK 2019


Hi! Thankyou for review!

> 6 февр. 2019 г., в 13:48, Vladimir Davydov <vdavydov.dev at gmail.com> написал(а):
> 
> On Thu, Jan 31, 2019 at 04:25:26PM +0300, Serge Petrenko wrote:
>> On replica subscribe master checks that replica's cluster id matches
>> 
>> 		 * responds with its current vclock.
>> +		 *
>> +		 * Tarantool 2.1.1 also sends its cluster id to
>> +		 * the replica, and replica has to check whether
>> +		 * its and master's cluster ids match.
>> 		 */
>> 		vclock_create(&remote_vclock_at_subscribe);
>> -		xrow_decode_vclock_xc(&row, &remote_vclock_at_subscribe);
>> +		if (applier->version_id >= version_id(2, 1, 1)) {
> 
> We already released 2.1.1. Should be 2.1.2? Anyway, do we really need to
> check the version here? Let's unconditionally check replicaset_uuid in
> case it's present. It shouldn't break anything, should it?

Yes, you’re right. I removed the if-else.

> 
>> +			xrow_decode_subscribe_response_xc(&row,
>> +							  &cluster_id,
>> +							  &remote_vclock_at_subscribe);
>> +			/*
>> +			 * If master didn't send us its cluster id
>> +			 * assume that it has done all the checks.
>> +			 * In this case cluster_id will remain zero.
>> +			 */
>> +			if (!tt_uuid_is_equal(&cluster_id, &uuid_nil) &&
>> +			    !tt_uuid_is_equal(&cluster_id, &REPLICASET_UUID)) {
>> +				tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH,
>> +					  tt_uuid_str(&cluster_id),
>> +					  tt_uuid_str(&REPLICASET_UUID));
>> +			}
>> +		} else {
>> +			xrow_decode_vclock_xc(&row, &remote_vclock_at_subscribe);
>> +		}
>> 	}
>> 	/*
>> 	 * Tarantool < 1.6.7:
>> diff --git a/src/box/xrow.c b/src/box/xrow.c
>> index c4e3073be..84b3473b1 100644
>> --- a/src/box/xrow.c
>> +++ b/src/box/xrow.c
>> @@ -1170,6 +1170,32 @@ xrow_encode_vclock(struct xrow_header *row, const struct vclock *vclock)
>> 	return 0;
>> }
>> 
>> +int
>> +xrow_encode_subscribe_response(struct xrow_header *row,
>> +			       const struct tt_uuid *replicaset_uuid,
>> +			       const struct vclock *vclock)
>> +{
>> +	memset(row, 0, sizeof(*row));
>> +	size_t size = XROW_BODY_LEN_MAX + UUID_STR_LEN + mp_sizeof_vclock(vclock);
> 
> XROW_BODY_LEN_MAX? Why is it here?
> 
> Let's estimate the response size with mp_sizeof please.

Ok, now I count it properly, sorry.

> 
>> +	char *buf = (char *) region_alloc(&fiber()->gc, size);
>> +	if (buf == NULL) {
>> +		diag_set(OutOfMemory, size, "region_alloc", "buf");
>> +		return -1;
>> +	}
>> +	char *data = buf;
>> +	data = mp_encode_map(data, 2);
>> +	data = mp_encode_uint(data, IPROTO_VCLOCK);
>> +	data = mp_encode_vclock(data, vclock);
>> +	data = mp_encode_uint(data, IPROTO_CLUSTER_UUID);
>> +	data = xrow_encode_uuid(data, replicaset_uuid);
>> +	assert(data <= buf + size);
>> +	row->body[0].iov_base = buf;
>> +	row->body[0].iov_len = (data - buf);
>> +	row->bodycnt = 1;
>> +	row->type = IPROTO_OK;
>> +	return 0;
>> +}
>> diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua
>> index 6a8af05c3..967c4e912 100644
>> --- a/test/replication/misc.test.lua
>> +++ b/test/replication/misc.test.lua
>> @@ -243,3 +243,29 @@ test_run:cmd("stop server replica")
>> test_run:cmd("cleanup server replica")
>> test_run:cmd("delete server replica")
>> test_run:cleanup_cluster()
>> +
>> +--
>> +-- gh-3704 move cluster id check to replica
>> +--
>> +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
>> +box.schema.user.grant("guest", "replication")
>> +test_run:cmd("start server replica")
>> +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
>> +box.info.replication[2].downstream ~= nil
>> +-- make master generate another cluster uuid and check that
>> +-- replica doesn't connect
>> +test_run:cmd("stop server replica")
>> +test_run:cmd("restart server default with cleanup=1")
>> +box.schema.user.grant("guest", "replication")
>> +test_run:cmd("start server replica")
>> +-- master believes replica is in cluster, but their UUIDS differ.
>> +replica_uuid = test_run:eval("replica", "box.info.uuid")[1]
>> +_ = box.space._cluster:insert{2, replica_uuid}
> 
> You could instead call box.schema._replace{'cluster_id', new_uuid}.
> Not sure if the test would be shorter if you'd done that though.

Done, the test is a bit shorter, indeed.

> 
>> +test_run:cmd("restart server replica")
>> +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
>> +box.info.replication[2].downstream ~= nil
>> +
>> +test_run:cmd("restart server default with cleanup=1")
>> +test_run:cmd("stop server replica")
>> +test_run:cmd("cleanup server replica")
>> +test_run:cmd("delete server replica»)

Here’s an incremental diff. The fixes are on the branch.

diff --git a/src/box/applier.cc b/src/box/applier.cc
index 704ed6c5b..045c69102 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -415,23 +415,19 @@ applier_subscribe(struct applier *applier)
 		 * its and master's cluster ids match.
 		 */
 		vclock_create(&remote_vclock_at_subscribe);
-		if (applier->version_id >= version_id(2, 1, 1)) {
-			xrow_decode_subscribe_response_xc(&row,
-							  &cluster_id,
-							  &remote_vclock_at_subscribe);
-			/*
-			 * If master didn't send us its cluster id
-			 * assume that it has done all the checks.
-			 * In this case cluster_id will remain zero.
-			 */
-			if (!tt_uuid_is_equal(&cluster_id, &uuid_nil) &&
-			    !tt_uuid_is_equal(&cluster_id, &REPLICASET_UUID)) {
-				tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH,
-					  tt_uuid_str(&cluster_id),
-					  tt_uuid_str(&REPLICASET_UUID));
-			}
-		} else {
-			xrow_decode_vclock_xc(&row, &remote_vclock_at_subscribe);
+		xrow_decode_subscribe_response_xc(&row,
+						  &cluster_id,
+						  &remote_vclock_at_subscribe);
+		/*
+		 * If master didn't send us its cluster id
+		 * assume that it has done all the checks.
+		 * In this case cluster_id will remain zero.
+		 */
+		if (!tt_uuid_is_equal(&cluster_id, &uuid_nil) &&
+		    !tt_uuid_is_equal(&cluster_id, &REPLICASET_UUID)) {
+			tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH,
+				  tt_uuid_str(&cluster_id),
+				  tt_uuid_str(&REPLICASET_UUID));
 		}
 	}
 	/*
diff --git a/src/box/xrow.c b/src/box/xrow.c
index 84b3473b1..ba3671fed 100644
--- a/src/box/xrow.c
+++ b/src/box/xrow.c
@@ -1176,7 +1176,10 @@ xrow_encode_subscribe_response(struct xrow_header *row,
 			       const struct vclock *vclock)
 {
 	memset(row, 0, sizeof(*row));
-	size_t size = XROW_BODY_LEN_MAX + UUID_STR_LEN + mp_sizeof_vclock(vclock);
+	size_t size = mp_sizeof_map(2) +
+		      mp_sizeof_uint(IPROTO_VCLOCK) + mp_sizeof_vclock(vclock) +
+		      mp_sizeof_uint(IPROTO_CLUSTER_UUID) +
+		      mp_sizeof_str(UUID_STR_LEN);
 	char *buf = (char *) region_alloc(&fiber()->gc, size);
 	if (buf == NULL) {
 		diag_set(OutOfMemory, size, "region_alloc", "buf");
diff --git a/test/replication/misc.result b/test/replication/misc.result
index 1f7e78a2f..2401ac5cf 100644
--- a/test/replication/misc.result
+++ b/test/replication/misc.result
@@ -622,47 +622,32 @@ test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
 ---
 - null
 ...
-box.info.replication[2].downstream ~= nil
+box.info.replication[2].downstream.status
 ---
-- true
+- follow
 ...
--- make master generate another cluster uuid and check that
--- replica doesn't connect
+-- change master's cluster uuid and check that replica doesn't connect.
 test_run:cmd("stop server replica")
 ---
 - true
 ...
-test_run:cmd("restart server default with cleanup=1")
-box.schema.user.grant("guest", "replication")
+_ = box.space._schema:replace{'cluster', tostring(uuid.new())}
 ---
 ...
+-- master believes replica is in cluster, but their cluster UUIDS differ.
 test_run:cmd("start server replica")
 ---
 - true
 ...
--- master believes replica is in cluster, but their UUIDS differ.
-replica_uuid = test_run:eval("replica", "box.info.uuid")[1]
----
-...
-_ = box.space._cluster:insert{2, replica_uuid}
----
-...
-test_run:cmd("restart server replica")
----
-- true
-...
-test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
+test_run:wait_log("replica", "REPLICASET_UUID_MISMATCH", nil, 1.0)
 ---
 - REPLICASET_UUID_MISMATCH
 ...
-box.info.replication[2].downstream ~= nil
+box.info.replication[2].downstream.status
 ---
-- false
+- stopped
 ...
 test_run:cmd("restart server default with cleanup=1")
----
-- true
-...
 test_run:cmd("stop server replica")
 ---
 - true
diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua
index 967c4e912..a1ddf75a8 100644
--- a/test/replication/misc.test.lua
+++ b/test/replication/misc.test.lua
@@ -251,19 +251,14 @@ test_run:cmd("create server replica with rpl_master=default, script='replication
 box.schema.user.grant("guest", "replication")
 test_run:cmd("start server replica")
 test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
-box.info.replication[2].downstream ~= nil
--- make master generate another cluster uuid and check that
--- replica doesn't connect
+box.info.replication[2].downstream.status
+-- change master's cluster uuid and check that replica doesn't connect.
 test_run:cmd("stop server replica")
-test_run:cmd("restart server default with cleanup=1")
-box.schema.user.grant("guest", "replication")
+_ = box.space._schema:replace{'cluster', tostring(uuid.new())}
+-- master believes replica is in cluster, but their cluster UUIDS differ.
 test_run:cmd("start server replica")
--- master believes replica is in cluster, but their UUIDS differ.
-replica_uuid = test_run:eval("replica", "box.info.uuid")[1]
-_ = box.space._cluster:insert{2, replica_uuid}
-test_run:cmd("restart server replica")
-test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
-box.info.replication[2].downstream ~= nil
+test_run:wait_log("replica", "REPLICASET_UUID_MISMATCH", nil, 1.0)
+box.info.replication[2].downstream.status
 
 test_run:cmd("restart server default with cleanup=1")
 test_run:cmd("stop server replica")

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.tarantool.org/pipermail/tarantool-patches/attachments/20190206/bbc13b25/attachment.html>


More information about the Tarantool-patches mailing list