From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Wed, 6 Feb 2019 13:48:11 +0300 From: Vladimir Davydov Subject: Re: [PATCH] replication: move cluster id match check to replica Message-ID: <20190206104811.nnn7bni4zfa74syq@esperanza> References: <20190131132526.72263-1-sergepetrenko@tarantool.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20190131132526.72263-1-sergepetrenko@tarantool.org> To: Serge Petrenko Cc: tarantool-patches@freelists.org, georgy@tarantool.org List-ID: On Thu, Jan 31, 2019 at 04:25:26PM +0300, Serge Petrenko wrote: > On replica subscribe master checks that replica's cluster id matches > master's one, and disallows replication in case of mismatch. > This behaviour blocks impplementation of anonymous replicas, which > shouldn't pollute _cluster space and could accumulate changes from > multiple clusters at once. > So let's move the check to replica to let it decide which action to take > in case of mismatch. > > Needed for #3186 > Closes #3704 > --- > Issue: https://github.com/tarantool/tarantool/issues/3704 > Branch: https://github.com/tarantool/tarantool/tree/sp/gh-3704-cluster-id-check-on-replica > > src/box/applier.cc | 24 +++++++++++- > src/box/box.cc | 22 +++++------ > src/box/xrow.c | 26 +++++++++++++ > src/box/xrow.h | 51 ++++++++++++++++++++++++ > test/replication/misc.result | 71 ++++++++++++++++++++++++++++++++++ > test/replication/misc.test.lua | 26 +++++++++++++ > 6 files changed, 206 insertions(+), 14 deletions(-) > > diff --git a/src/box/applier.cc b/src/box/applier.cc > index 21d2e6bcb..704ed6c5b 100644 > --- a/src/box/applier.cc > +++ b/src/box/applier.cc > @@ -391,6 +391,7 @@ applier_subscribe(struct applier *applier) > struct ibuf *ibuf = &applier->ibuf; > struct xrow_header row; > struct vclock remote_vclock_at_subscribe; > + struct tt_uuid cluster_id = uuid_nil; > > xrow_encode_subscribe_xc(&row, &REPLICASET_UUID, &INSTANCE_UUID, > &replicaset.vclock); > @@ -408,9 +409,30 @@ applier_subscribe(struct applier *applier) > /* > * In case of successful subscribe, the server > * responds with its current vclock. > + * > + * Tarantool 2.1.1 also sends its cluster id to > + * the replica, and replica has to check whether > + * its and master's cluster ids match. > */ > vclock_create(&remote_vclock_at_subscribe); > - xrow_decode_vclock_xc(&row, &remote_vclock_at_subscribe); > + if (applier->version_id >= version_id(2, 1, 1)) { We already released 2.1.1. Should be 2.1.2? Anyway, do we really need to check the version here? Let's unconditionally check replicaset_uuid in case it's present. It shouldn't break anything, should it? > + xrow_decode_subscribe_response_xc(&row, > + &cluster_id, > + &remote_vclock_at_subscribe); > + /* > + * If master didn't send us its cluster id > + * assume that it has done all the checks. > + * In this case cluster_id will remain zero. > + */ > + if (!tt_uuid_is_equal(&cluster_id, &uuid_nil) && > + !tt_uuid_is_equal(&cluster_id, &REPLICASET_UUID)) { > + tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH, > + tt_uuid_str(&cluster_id), > + tt_uuid_str(&REPLICASET_UUID)); > + } > + } else { > + xrow_decode_vclock_xc(&row, &remote_vclock_at_subscribe); > + } > } > /* > * Tarantool < 1.6.7: > diff --git a/src/box/xrow.c b/src/box/xrow.c > index c4e3073be..84b3473b1 100644 > --- a/src/box/xrow.c > +++ b/src/box/xrow.c > @@ -1170,6 +1170,32 @@ xrow_encode_vclock(struct xrow_header *row, const struct vclock *vclock) > return 0; > } > > +int > +xrow_encode_subscribe_response(struct xrow_header *row, > + const struct tt_uuid *replicaset_uuid, > + const struct vclock *vclock) > +{ > + memset(row, 0, sizeof(*row)); > + size_t size = XROW_BODY_LEN_MAX + UUID_STR_LEN + mp_sizeof_vclock(vclock); XROW_BODY_LEN_MAX? Why is it here? Let's estimate the response size with mp_sizeof please. > + char *buf = (char *) region_alloc(&fiber()->gc, size); > + if (buf == NULL) { > + diag_set(OutOfMemory, size, "region_alloc", "buf"); > + return -1; > + } > + char *data = buf; > + data = mp_encode_map(data, 2); > + data = mp_encode_uint(data, IPROTO_VCLOCK); > + data = mp_encode_vclock(data, vclock); > + data = mp_encode_uint(data, IPROTO_CLUSTER_UUID); > + data = xrow_encode_uuid(data, replicaset_uuid); > + assert(data <= buf + size); > + row->body[0].iov_base = buf; > + row->body[0].iov_len = (data - buf); > + row->bodycnt = 1; > + row->type = IPROTO_OK; > + return 0; > +} > diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua > index 6a8af05c3..967c4e912 100644 > --- a/test/replication/misc.test.lua > +++ b/test/replication/misc.test.lua > @@ -243,3 +243,29 @@ test_run:cmd("stop server replica") > test_run:cmd("cleanup server replica") > test_run:cmd("delete server replica") > test_run:cleanup_cluster() > + > +-- > +-- gh-3704 move cluster id check to replica > +-- > +test_run:cmd("create server replica with rpl_master=default, script='replication/replica.lua'") > +box.schema.user.grant("guest", "replication") > +test_run:cmd("start server replica") > +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH") > +box.info.replication[2].downstream ~= nil > +-- make master generate another cluster uuid and check that > +-- replica doesn't connect > +test_run:cmd("stop server replica") > +test_run:cmd("restart server default with cleanup=1") > +box.schema.user.grant("guest", "replication") > +test_run:cmd("start server replica") > +-- master believes replica is in cluster, but their UUIDS differ. > +replica_uuid = test_run:eval("replica", "box.info.uuid")[1] > +_ = box.space._cluster:insert{2, replica_uuid} You could instead call box.schema._replace{'cluster_id', new_uuid}. Not sure if the test would be shorter if you'd done that though. > +test_run:cmd("restart server replica") > +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH") > +box.info.replication[2].downstream ~= nil > + > +test_run:cmd("restart server default with cleanup=1") > +test_run:cmd("stop server replica") > +test_run:cmd("cleanup server replica") > +test_run:cmd("delete server replica")