Hi! Thankyou for review!

6 февр. 2019 г., в 13:48, Vladimir Davydov <vdavydov.dev@gmail.com> написал(а):

On Thu, Jan 31, 2019 at 04:25:26PM +0300, Serge Petrenko wrote:
On replica subscribe master checks that replica's cluster id matches

 * responds with its current vclock.
+  *
+  * Tarantool 2.1.1 also sends its cluster id to
+  * the replica, and replica has to check whether
+  * its and master's cluster ids match.
 */
vclock_create(&remote_vclock_at_subscribe);
- xrow_decode_vclock_xc(&row, &remote_vclock_at_subscribe);
+ if (applier->version_id >= version_id(2, 1, 1)) {

We already released 2.1.1. Should be 2.1.2? Anyway, do we really need to
check the version here? Let's unconditionally check replicaset_uuid in
case it's present. It shouldn't break anything, should it?

Yes, you’re right. I removed the if-else.


+ xrow_decode_subscribe_response_xc(&row,
+   &cluster_id,
+   &remote_vclock_at_subscribe);
+ /*
+  * If master didn't send us its cluster id
+  * assume that it has done all the checks.
+  * In this case cluster_id will remain zero.
+  */
+ if (!tt_uuid_is_equal(&cluster_id, &uuid_nil) &&
+     !tt_uuid_is_equal(&cluster_id, &REPLICASET_UUID)) {
+ tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH,
+   tt_uuid_str(&cluster_id),
+   tt_uuid_str(&REPLICASET_UUID));
+ }
+ } else {
+ xrow_decode_vclock_xc(&row, &remote_vclock_at_subscribe);
+ }
}
/*
 * Tarantool < 1.6.7:
diff --git a/src/box/xrow.c b/src/box/xrow.c
index c4e3073be..84b3473b1 100644
--- a/src/box/xrow.c
+++ b/src/box/xrow.c
@@ -1170,6 +1170,32 @@ xrow_encode_vclock(struct xrow_header *row, const struct vclock *vclock)
return 0;
}

+int
+xrow_encode_subscribe_response(struct xrow_header *row,
+        const struct tt_uuid *replicaset_uuid,
+        const struct vclock *vclock)
+{
+ memset(row, 0, sizeof(*row));
+ size_t size = XROW_BODY_LEN_MAX + UUID_STR_LEN + mp_sizeof_vclock(vclock);

XROW_BODY_LEN_MAX? Why is it here?

Let's estimate the response size with mp_sizeof please.

Ok, now I count it properly, sorry.


+ char *buf = (char *) region_alloc(&fiber()->gc, size);
+ if (buf == NULL) {
+ diag_set(OutOfMemory, size, "region_alloc", "buf");
+ return -1;
+ }
+ char *data = buf;
+ data = mp_encode_map(data, 2);
+ data = mp_encode_uint(data, IPROTO_VCLOCK);
+ data = mp_encode_vclock(data, vclock);
+ data = mp_encode_uint(data, IPROTO_CLUSTER_UUID);
+ data = xrow_encode_uuid(data, replicaset_uuid);
+ assert(data <= buf + size);
+ row->body[0].iov_base = buf;
+ row->body[0].iov_len = (data - buf);
+ row->bodycnt = 1;
+ row->type = IPROTO_OK;
+ return 0;
+}
diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua
index 6a8af05c3..967c4e912 100644
--- a/test/replication/misc.test.lua
+++ b/test/replication/misc.test.lua
@@ -243,3 +243,29 @@ test_run:cmd("stop server replica")
test_run:cmd("cleanup server replica")
test_run:cmd("delete server replica")
test_run:cleanup_cluster()
+
+--
+-- gh-3704 move cluster id check to replica
+--
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'")
+box.schema.user.grant("guest", "replication")
+test_run:cmd("start server replica")
+test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
+box.info.replication[2].downstream ~= nil
+-- make master generate another cluster uuid and check that
+-- replica doesn't connect
+test_run:cmd("stop server replica")
+test_run:cmd("restart server default with cleanup=1")
+box.schema.user.grant("guest", "replication")
+test_run:cmd("start server replica")
+-- master believes replica is in cluster, but their UUIDS differ.
+replica_uuid = test_run:eval("replica", "box.info.uuid")[1]
+_ = box.space._cluster:insert{2, replica_uuid}

You could instead call box.schema._replace{'cluster_id', new_uuid}.
Not sure if the test would be shorter if you'd done that though.

Done, the test is a bit shorter, indeed.


+test_run:cmd("restart server replica")
+test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
+box.info.replication[2].downstream ~= nil
+
+test_run:cmd("restart server default with cleanup=1")
+test_run:cmd("stop server replica")
+test_run:cmd("cleanup server replica")
+test_run:cmd("delete server replica»)

Here’s an incremental diff. The fixes are on the branch.

diff --git a/src/box/applier.cc b/src/box/applier.cc
index 704ed6c5b..045c69102 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -415,23 +415,19 @@ applier_subscribe(struct applier *applier)
   * its and master's cluster ids match.
   */
  vclock_create(&remote_vclock_at_subscribe);
- if (applier->version_id >= version_id(2, 1, 1)) {
- xrow_decode_subscribe_response_xc(&row,
-   &cluster_id,
-   &remote_vclock_at_subscribe);
- /*
-  * If master didn't send us its cluster id
-  * assume that it has done all the checks.
-  * In this case cluster_id will remain zero.
-  */
- if (!tt_uuid_is_equal(&cluster_id, &uuid_nil) &&
-     !tt_uuid_is_equal(&cluster_id, &REPLICASET_UUID)) {
- tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH,
-   tt_uuid_str(&cluster_id),
-   tt_uuid_str(&REPLICASET_UUID));
- }
- } else {
- xrow_decode_vclock_xc(&row, &remote_vclock_at_subscribe);
+ xrow_decode_subscribe_response_xc(&row,
+   &cluster_id,
+   &remote_vclock_at_subscribe);
+ /*
+  * If master didn't send us its cluster id
+  * assume that it has done all the checks.
+  * In this case cluster_id will remain zero.
+  */
+ if (!tt_uuid_is_equal(&cluster_id, &uuid_nil) &&
+     !tt_uuid_is_equal(&cluster_id, &REPLICASET_UUID)) {
+ tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH,
+   tt_uuid_str(&cluster_id),
+   tt_uuid_str(&REPLICASET_UUID));
  }
  }
  /*
diff --git a/src/box/xrow.c b/src/box/xrow.c
index 84b3473b1..ba3671fed 100644
--- a/src/box/xrow.c
+++ b/src/box/xrow.c
@@ -1176,7 +1176,10 @@ xrow_encode_subscribe_response(struct xrow_header *row,
         const struct vclock *vclock)
 {
  memset(row, 0, sizeof(*row));
- size_t size = XROW_BODY_LEN_MAX + UUID_STR_LEN + mp_sizeof_vclock(vclock);
+ size_t size = mp_sizeof_map(2) +
+       mp_sizeof_uint(IPROTO_VCLOCK) + mp_sizeof_vclock(vclock) +
+       mp_sizeof_uint(IPROTO_CLUSTER_UUID) +
+       mp_sizeof_str(UUID_STR_LEN);
  char *buf = (char *) region_alloc(&fiber()->gc, size);
  if (buf == NULL) {
  diag_set(OutOfMemory, size, "region_alloc", "buf");
diff --git a/test/replication/misc.result b/test/replication/misc.result
index 1f7e78a2f..2401ac5cf 100644
--- a/test/replication/misc.result
+++ b/test/replication/misc.result
@@ -622,47 +622,32 @@ test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
 ---
 - null
 ...
-box.info.replication[2].downstream ~= nil
+box.info.replication[2].downstream.status
 ---
-- true
+- follow
 ...
--- make master generate another cluster uuid and check that
--- replica doesn't connect
+-- change master's cluster uuid and check that replica doesn't connect.
 test_run:cmd("stop server replica")
 ---
 - true
 ...
-test_run:cmd("restart server default with cleanup=1")
-box.schema.user.grant("guest", "replication")
+_ = box.space._schema:replace{'cluster', tostring(uuid.new())}
 ---
 ...
+-- master believes replica is in cluster, but their cluster UUIDS differ.
 test_run:cmd("start server replica")
 ---
 - true
 ...
--- master believes replica is in cluster, but their UUIDS differ.
-replica_uuid = test_run:eval("replica", "box.info.uuid")[1]
----
-...
-_ = box.space._cluster:insert{2, replica_uuid}
----
-...
-test_run:cmd("restart server replica")
----
-- true
-...
-test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
+test_run:wait_log("replica", "REPLICASET_UUID_MISMATCH", nil, 1.0)
 ---
 - REPLICASET_UUID_MISMATCH
 ...
-box.info.replication[2].downstream ~= nil
+box.info.replication[2].downstream.status
 ---
-- false
+- stopped
 ...
 test_run:cmd("restart server default with cleanup=1")
----
-- true
-...
 test_run:cmd("stop server replica")
 ---
 - true
diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua
index 967c4e912..a1ddf75a8 100644
--- a/test/replication/misc.test.lua
+++ b/test/replication/misc.test.lua
@@ -251,19 +251,14 @@ test_run:cmd("create server replica with rpl_master=default, script='replication
 box.schema.user.grant("guest", "replication")
 test_run:cmd("start server replica")
 test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
-box.info.replication[2].downstream ~= nil
--- make master generate another cluster uuid and check that
--- replica doesn't connect
+box.info.replication[2].downstream.status
+-- change master's cluster uuid and check that replica doesn't connect.
 test_run:cmd("stop server replica")
-test_run:cmd("restart server default with cleanup=1")
-box.schema.user.grant("guest", "replication")
+_ = box.space._schema:replace{'cluster', tostring(uuid.new())}
+-- master believes replica is in cluster, but their cluster UUIDS differ.
 test_run:cmd("start server replica")
--- master believes replica is in cluster, but their UUIDS differ.
-replica_uuid = test_run:eval("replica", "box.info.uuid")[1]
-_ = box.space._cluster:insert{2, replica_uuid}
-test_run:cmd("restart server replica")
-test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH")
-box.info.replication[2].downstream ~= nil
+test_run:wait_log("replica", "REPLICASET_UUID_MISMATCH", nil, 1.0)
+box.info.replication[2].downstream.status
 
 test_run:cmd("restart server default with cleanup=1")
 test_run:cmd("stop server replica")