From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Content-Type: text/plain; charset=utf-8 Mime-Version: 1.0 (Mac OS X Mail 12.2 \(3445.102.3\)) Subject: Re: [PATCH] replication: move cluster id match check to replica From: Serge Petrenko In-Reply-To: <20190131132526.72263-1-sergepetrenko@tarantool.org> Date: Thu, 31 Jan 2019 16:30:50 +0300 Content-Transfer-Encoding: quoted-printable Message-Id: References: <20190131132526.72263-1-sergepetrenko@tarantool.org> To: Vladimir Davydov Cc: tarantool-patches@freelists.org, Georgy Kirichenko List-ID: Sorry, the mailing list didn=E2=80=99t forward it for some reason > 31 =D1=8F=D0=BD=D0=B2. 2019 =D0=B3., =D0=B2 16:25, Serge Petrenko = =D0=BD=D0=B0=D0=BF=D0=B8=D1=81=D0=B0=D0=BB(=D0= =B0): >=20 > On replica subscribe master checks that replica's cluster id matches > master's one, and disallows replication in case of mismatch. > This behaviour blocks impplementation of anonymous replicas, which > shouldn't pollute _cluster space and could accumulate changes from > multiple clusters at once. > So let's move the check to replica to let it decide which action to = take > in case of mismatch. >=20 > Needed for #3186 > Closes #3704 > --- > Issue: https://github.com/tarantool/tarantool/issues/3704 > Branch: = https://github.com/tarantool/tarantool/tree/sp/gh-3704-cluster-id-check-on= -replica >=20 > src/box/applier.cc | 24 +++++++++++- > src/box/box.cc | 22 +++++------ > src/box/xrow.c | 26 +++++++++++++ > src/box/xrow.h | 51 ++++++++++++++++++++++++ > test/replication/misc.result | 71 ++++++++++++++++++++++++++++++++++ > test/replication/misc.test.lua | 26 +++++++++++++ > 6 files changed, 206 insertions(+), 14 deletions(-) >=20 > diff --git a/src/box/applier.cc b/src/box/applier.cc > index 21d2e6bcb..704ed6c5b 100644 > --- a/src/box/applier.cc > +++ b/src/box/applier.cc > @@ -391,6 +391,7 @@ applier_subscribe(struct applier *applier) > struct ibuf *ibuf =3D &applier->ibuf; > struct xrow_header row; > struct vclock remote_vclock_at_subscribe; > + struct tt_uuid cluster_id =3D uuid_nil; >=20 > xrow_encode_subscribe_xc(&row, &REPLICASET_UUID, &INSTANCE_UUID, > &replicaset.vclock); > @@ -408,9 +409,30 @@ applier_subscribe(struct applier *applier) > /* > * In case of successful subscribe, the server > * responds with its current vclock. > + * > + * Tarantool 2.1.1 also sends its cluster id to > + * the replica, and replica has to check whether > + * its and master's cluster ids match. > */ > vclock_create(&remote_vclock_at_subscribe); > - xrow_decode_vclock_xc(&row, = &remote_vclock_at_subscribe); > + if (applier->version_id >=3D version_id(2, 1, 1)) { > + xrow_decode_subscribe_response_xc(&row, > + &cluster_id, > + = &remote_vclock_at_subscribe); > + /* > + * If master didn't send us its cluster id > + * assume that it has done all the checks. > + * In this case cluster_id will remain zero. > + */ > + if (!tt_uuid_is_equal(&cluster_id, &uuid_nil) && > + !tt_uuid_is_equal(&cluster_id, = &REPLICASET_UUID)) { > + tnt_raise(ClientError, = ER_REPLICASET_UUID_MISMATCH, > + tt_uuid_str(&cluster_id), > + = tt_uuid_str(&REPLICASET_UUID)); > + } > + } else { > + xrow_decode_vclock_xc(&row, = &remote_vclock_at_subscribe); > + } > } > /* > * Tarantool < 1.6.7: > diff --git a/src/box/box.cc b/src/box/box.cc > index 8892d0f0e..8a8c08167 100644 > --- a/src/box/box.cc > +++ b/src/box/box.cc > @@ -1584,18 +1584,6 @@ box_process_subscribe(struct ev_io *io, struct = xrow_header *header) > /* Check permissions */ > access_check_universe_xc(PRIV_R); >=20 > - /** > - * Check that the given UUID matches the UUID of the > - * replica set this replica belongs to. Used to handshake > - * replica connect, and refuse a connection from a replica > - * which belongs to a different replica set. > - */ > - if (!tt_uuid_is_equal(&replicaset_uuid, &REPLICASET_UUID)) { > - tnt_raise(ClientError, ER_REPLICASET_UUID_MISMATCH, > - tt_uuid_str(&REPLICASET_UUID), > - tt_uuid_str(&replicaset_uuid)); > - } > - > /* Check replica uuid */ > struct replica *replica =3D replica_by_uuid(&replica_uuid); > if (replica =3D=3D NULL || replica->id =3D=3D REPLICA_ID_NIL) { > @@ -1620,9 +1608,17 @@ box_process_subscribe(struct ev_io *io, struct = xrow_header *header) > * Send a response to SUBSCRIBE request, tell > * the replica how many rows we have in stock for it, > * and identify ourselves with our own replica id. > + * > + * Since Tarantool version 2.1.1 master doesn't check > + * that replica and master have the same cluster id. > + * It sends its cluster id to replica instead, and replica > + * checks that its cluster id matches master's one. > + * Older versions will just ignore the additional field. > */ > struct xrow_header row; > - xrow_encode_vclock_xc(&row, &replicaset.vclock); > + xrow_encode_subscribe_response_xc(&row, > + &REPLICASET_UUID, > + &replicaset.vclock); > /* > * Identify the message with the replica id of this > * instance, this is the only way for a replica to find > diff --git a/src/box/xrow.c b/src/box/xrow.c > index c4e3073be..84b3473b1 100644 > --- a/src/box/xrow.c > +++ b/src/box/xrow.c > @@ -1170,6 +1170,32 @@ xrow_encode_vclock(struct xrow_header *row, = const struct vclock *vclock) > return 0; > } >=20 > +int > +xrow_encode_subscribe_response(struct xrow_header *row, > + const struct tt_uuid *replicaset_uuid, > + const struct vclock *vclock) > +{ > + memset(row, 0, sizeof(*row)); > + size_t size =3D XROW_BODY_LEN_MAX + UUID_STR_LEN + = mp_sizeof_vclock(vclock); > + char *buf =3D (char *) region_alloc(&fiber()->gc, size); > + if (buf =3D=3D NULL) { > + diag_set(OutOfMemory, size, "region_alloc", "buf"); > + return -1; > + } > + char *data =3D buf; > + data =3D mp_encode_map(data, 2); > + data =3D mp_encode_uint(data, IPROTO_VCLOCK); > + data =3D mp_encode_vclock(data, vclock); > + data =3D mp_encode_uint(data, IPROTO_CLUSTER_UUID); > + data =3D xrow_encode_uuid(data, replicaset_uuid); > + assert(data <=3D buf + size); > + row->body[0].iov_base =3D buf; > + row->body[0].iov_len =3D (data - buf); > + row->bodycnt =3D 1; > + row->type =3D IPROTO_OK; > + return 0; > +} > + > void > xrow_encode_timestamp(struct xrow_header *row, uint32_t replica_id, = double tm) > { > diff --git a/src/box/xrow.h b/src/box/xrow.h > index 2654e35e6..719add4f0 100644 > --- a/src/box/xrow.h > +++ b/src/box/xrow.h > @@ -348,6 +348,37 @@ xrow_decode_vclock(struct xrow_header *row, = struct vclock *vclock) > return xrow_decode_subscribe(row, NULL, NULL, vclock, NULL); > } >=20 > +/** > + * Encode a response to subscribe request. > + * @param row[out] Row to encode into. > + * @param replicaset_uuid. > + * @param vclock. > + * > + * @retval 0 Success. > + * @retval -1 Memory error. > + */ > +int > +xrow_encode_subscribe_response(struct xrow_header *row, > + const struct tt_uuid *replicaset_uuid, > + const struct vclock *vclock); > + > +/** > + * Decode a response to subscribe request. > + * @param row Row to decode. > + * @param[out] replicaset_uuid. > + * @param[out] vclock. > + * > + * @retval 0 Success. > + * @retval -1 Memory or format error. > + */ > +static inline int > +xrow_decode_subscribe_response(struct xrow_header *row, > + struct tt_uuid *replicaset_uuid, > + struct vclock *vclock) > +{ > + return xrow_decode_subscribe(row, replicaset_uuid, NULL, vclock, = NULL); > +} > + > /** > * Encode a heartbeat message. > * @param row[out] Row to encode into. > @@ -759,6 +790,26 @@ xrow_decode_vclock_xc(struct xrow_header *row, = struct vclock *vclock) > diag_raise(); > } >=20 > +/** @copydoc xrow_encode_subscribe_response. */ > +static inline void > +xrow_encode_subscribe_response_xc(struct xrow_header *row, > + const struct tt_uuid *replicaset_uuid, > + const struct vclock *vclock) > +{ > + if (xrow_encode_subscribe_response(row, replicaset_uuid, vclock) = !=3D 0) > + diag_raise(); > +} > + > +/** @copydoc xrow_decode_subscribe_response. */ > +static inline void > +xrow_decode_subscribe_response_xc(struct xrow_header *row, > + struct tt_uuid *replicaset_uuid, > + struct vclock *vclock) > +{ > + if (xrow_decode_subscribe_response(row, replicaset_uuid, vclock) = !=3D 0) > + diag_raise(); > +} > + > /** @copydoc iproto_reply_ok. */ > static inline void > iproto_reply_ok_xc(struct obuf *out, uint64_t sync, uint32_t = schema_version) > diff --git a/test/replication/misc.result = b/test/replication/misc.result > index c32681a7a..1f7e78a2f 100644 > --- a/test/replication/misc.result > +++ b/test/replication/misc.result > @@ -604,3 +604,74 @@ test_run:cmd("delete server replica") > test_run:cleanup_cluster() > --- > ... > +-- > +-- gh-3704 move cluster id check to replica > +-- > +test_run:cmd("create server replica with rpl_master=3Ddefault, = script=3D'replication/replica.lua'") > +--- > +- true > +... > +box.schema.user.grant("guest", "replication") > +--- > +... > +test_run:cmd("start server replica") > +--- > +- true > +... > +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH") > +--- > +- null > +... > +box.info.replication[2].downstream ~=3D nil > +--- > +- true > +... > +-- make master generate another cluster uuid and check that > +-- replica doesn't connect > +test_run:cmd("stop server replica") > +--- > +- true > +... > +test_run:cmd("restart server default with cleanup=3D1") > +box.schema.user.grant("guest", "replication") > +--- > +... > +test_run:cmd("start server replica") > +--- > +- true > +... > +-- master believes replica is in cluster, but their UUIDS differ. > +replica_uuid =3D test_run:eval("replica", "box.info.uuid")[1] > +--- > +... > +_ =3D box.space._cluster:insert{2, replica_uuid} > +--- > +... > +test_run:cmd("restart server replica") > +--- > +- true > +... > +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH") > +--- > +- REPLICASET_UUID_MISMATCH > +... > +box.info.replication[2].downstream ~=3D nil > +--- > +- false > +... > +test_run:cmd("restart server default with cleanup=3D1") > +--- > +- true > +... > +test_run:cmd("stop server replica") > +--- > +- true > +... > +test_run:cmd("cleanup server replica") > +--- > +- true > +... > +test_run:cmd("delete server replica") > +--- > +- true > +... > diff --git a/test/replication/misc.test.lua = b/test/replication/misc.test.lua > index 6a8af05c3..967c4e912 100644 > --- a/test/replication/misc.test.lua > +++ b/test/replication/misc.test.lua > @@ -243,3 +243,29 @@ test_run:cmd("stop server replica") > test_run:cmd("cleanup server replica") > test_run:cmd("delete server replica") > test_run:cleanup_cluster() > + > +-- > +-- gh-3704 move cluster id check to replica > +-- > +test_run:cmd("create server replica with rpl_master=3Ddefault, = script=3D'replication/replica.lua'") > +box.schema.user.grant("guest", "replication") > +test_run:cmd("start server replica") > +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH") > +box.info.replication[2].downstream ~=3D nil > +-- make master generate another cluster uuid and check that > +-- replica doesn't connect > +test_run:cmd("stop server replica") > +test_run:cmd("restart server default with cleanup=3D1") > +box.schema.user.grant("guest", "replication") > +test_run:cmd("start server replica") > +-- master believes replica is in cluster, but their UUIDS differ. > +replica_uuid =3D test_run:eval("replica", "box.info.uuid")[1] > +_ =3D box.space._cluster:insert{2, replica_uuid} > +test_run:cmd("restart server replica") > +test_run:grep_log("replica", "REPLICASET_UUID_MISMATCH") > +box.info.replication[2].downstream ~=3D nil > + > +test_run:cmd("restart server default with cleanup=3D1") > +test_run:cmd("stop server replica") > +test_run:cmd("cleanup server replica") > +test_run:cmd("delete server replica") > --=20 > 2.17.2 (Apple Git-113) >=20