[Tarantool-patches] [PATCH v2 3/4] replication: add is_anon flag to ballot
Serge Petrenko
sergepetrenko at tarantool.org
Tue Sep 15 10:46:37 MSK 2020
15.09.2020 02:11, Vladislav Shpilevoy пишет:
> Ballot is a message sent in response on vote request, which is
> sent by applier first thing after connection establishment.
>
> It contains basic info about the remote instance such as whether
> it is read only, if it is still loading, and more.
>
> The ballot didn't contain a flag whether the instance is
> anonymous. That led to a problem, when applier was connected to a
> remote instance, was added to struct replicaset inside a struct
> replica object, but it was unknown whether it is anonymous. It was
> added as not anonymous by default.
>
> If the remote instance was in fact anonymous and sent a subscribe
> response back to the first instance with the anon flag = true,
> then it looked like the remote instance was not anonymous, and
> suddenly became such, without even a reconnect. It could lead to
> an assertion.
>
> The bug is hidden behind another bug, because of which the leader
> instance on boostrap registers all replicas listed in its
> box.cfg.replication, even anonymous ones.
>
> The patch makes the ballot contain the anon flag. Now both relay
> and applier send whether their host is anonymous. Relay does it by
> sending the ballot, applier sends it in scope of subscribe
> request. By the time a replica gets UUID and is added into struct
> replicaset, its anon flag is determined.
>
> Also the patch makes anon_count updated on each replica hash table
> change. Previously it was only updated when something related to
> relay was done. Now anon is updated by applier actions too, and
> it is not ok to update the counter on relay-specific actions.
>
> The early registration bug is a subject for a next patch.
>
> Part of #5287
>
> @TarantoolBot document
> Title: IPROTO_BALLOT_IS_ANON flag
>
> There is a request type IPROTO_BALLOT, with code 0x29. It has
> fields IPROTO_BALLOT_IS_RO (0x01), IPROTO_BALLOT_VCLOCK (0x02),
> IPROTO_BALLOT_GC_VCLOCK (0x03), IPROTO_BALLOT_IS_LOADING (0x04).
>
> Now it gets a new field IPROTO_BALLOT_IS_ANON (0x05). The field
> is a boolean, and equals to box.cfg.replication_anon of the
> sender.
> ---
Hi! Thanks for the patch!
Please see one comment below.
> src/box/box.cc | 1 +
> src/box/iproto_constants.h | 1 +
> src/box/replication.cc | 14 ++++++++++++--
> src/box/xrow.c | 14 ++++++++++++--
> src/box/xrow.h | 5 +++++
> 5 files changed, 31 insertions(+), 4 deletions(-)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index faffd5769..145b53ec8 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -2080,6 +2080,7 @@ void
> box_process_vote(struct ballot *ballot)
> {
> ballot->is_ro = cfg_geti("read_only") != 0;
> + ballot->is_anon = replication_anon;
> /*
> * is_ro is true on initial load and is set to box.cfg.read_only
> * after box_cfg() returns, during dynamic box.cfg parameters setting.
> diff --git a/src/box/iproto_constants.h b/src/box/iproto_constants.h
> index 4f5a2b195..9c2fb6058 100644
> --- a/src/box/iproto_constants.h
> +++ b/src/box/iproto_constants.h
> @@ -149,6 +149,7 @@ enum iproto_ballot_key {
> IPROTO_BALLOT_VCLOCK = 0x02,
> IPROTO_BALLOT_GC_VCLOCK = 0x03,
> IPROTO_BALLOT_IS_LOADING = 0x04,
> + IPROTO_BALLOT_IS_ANON = 0x05,
> };
>
> #define bit(c) (1ULL<<IPROTO_##c)
> diff --git a/src/box/replication.cc b/src/box/replication.cc
> index ef0e2411d..8408f395e 100644
> --- a/src/box/replication.cc
> +++ b/src/box/replication.cc
> @@ -290,6 +290,8 @@ replica_clear_id(struct replica *replica)
> }
> if (replica_is_orphan(replica)) {
> replica_hash_remove(&replicaset.hash, replica);
> + replicaset.anon_count -= replica->anon;
> + assert(replicaset.anon_count >= 0);
replica_clear_id() is only called on _cluster entry deletion.
So you're surely working with a normal replica, not anonymous.
We may add an assset(!replica->anon) somewhere.
Don't know if it's really needed though.
> replica_delete(replica);
> }
> }
> @@ -332,6 +334,7 @@ replica_on_applier_connect(struct replica *replica)
> assert(replica->applier_sync_state == APPLIER_DISCONNECTED);
>
> replica->uuid = applier->uuid;
> + replica->anon = applier->ballot.is_anon;
> replica->applier_sync_state = APPLIER_CONNECTED;
> replicaset.applier.connected++;
>
> @@ -362,6 +365,7 @@ replica_on_applier_connect(struct replica *replica)
> } else {
> /* Add a new struct replica */
> replica_hash_insert(&replicaset.hash, replica);
> + replicaset.anon_count += replica->anon;
> }
> }
>
> @@ -390,7 +394,9 @@ replica_on_applier_reconnect(struct replica *replica)
> if (orig == NULL) {
> orig = replica_new();
> orig->uuid = applier->uuid;
> + orig->anon = applier->ballot.is_anon;
> replica_hash_insert(&replicaset.hash, orig);
> + replicaset.anon_count += orig->anon;
> }
>
> if (orig->applier != NULL) {
> @@ -526,6 +532,7 @@ replicaset_update(struct applier **appliers, int count)
>
> assert(!tt_uuid_is_nil(&applier->uuid));
> replica->uuid = applier->uuid;
> + replica->anon = applier->ballot.is_anon;
>
> if (replica_hash_search(&uniq, replica) != NULL) {
> replica_clear_applier(replica);
> @@ -582,6 +589,7 @@ replicaset_update(struct applier **appliers, int count)
> } else {
> /* Add a new struct replica */
> replica_hash_insert(&replicaset.hash, replica);
> + replicaset.anon_count += replica->anon;
> }
>
> replica->applier_sync_state = APPLIER_CONNECTED;
> @@ -593,6 +601,8 @@ replicaset_update(struct applier **appliers, int count)
> replica_hash_foreach_safe(&replicaset.hash, replica, next) {
> if (replica_is_orphan(replica)) {
> replica_hash_remove(&replicaset.hash, replica);
> + replicaset.anon_count -= replica->anon;
> + assert(replicaset.anon_count >= 0);
> replica_delete(replica);
> }
> }
> @@ -907,12 +917,12 @@ replica_on_relay_stop(struct replica *replica)
> replica->gc = NULL;
> } else {
> assert(replica->gc == NULL);
> - assert(replicaset.anon_count > 0);
> - replicaset.anon_count--;
> }
> }
> if (replica_is_orphan(replica)) {
> replica_hash_remove(&replicaset.hash, replica);
> + replicaset.anon_count -= replica->anon;
> + assert(replicaset.anon_count >= 0);
> replica_delete(replica);
> }
> }
> diff --git a/src/box/xrow.c b/src/box/xrow.c
> index 95ddb1fe7..2edcb2f8f 100644
> --- a/src/box/xrow.c
> +++ b/src/box/xrow.c
> @@ -447,9 +447,11 @@ iproto_reply_vote(struct obuf *out, const struct ballot *ballot,
> uint64_t sync, uint32_t schema_version)
> {
> size_t max_size = IPROTO_HEADER_LEN + mp_sizeof_map(1) +
> - mp_sizeof_uint(UINT32_MAX) + mp_sizeof_map(4) +
> + mp_sizeof_uint(UINT32_MAX) + mp_sizeof_map(5) +
> mp_sizeof_uint(UINT32_MAX) + mp_sizeof_bool(ballot->is_ro) +
> mp_sizeof_uint(UINT32_MAX) + mp_sizeof_bool(ballot->is_loading) +
> + mp_sizeof_uint(IPROTO_BALLOT_IS_ANON) +
> + mp_sizeof_bool(ballot->is_anon) +
> mp_sizeof_uint(UINT32_MAX) +
> mp_sizeof_vclock_ignore0(&ballot->vclock) +
> mp_sizeof_uint(UINT32_MAX) +
> @@ -465,11 +467,13 @@ iproto_reply_vote(struct obuf *out, const struct ballot *ballot,
> char *data = buf + IPROTO_HEADER_LEN;
> data = mp_encode_map(data, 1);
> data = mp_encode_uint(data, IPROTO_BALLOT);
> - data = mp_encode_map(data, 4);
> + data = mp_encode_map(data, 5);
> data = mp_encode_uint(data, IPROTO_BALLOT_IS_RO);
> data = mp_encode_bool(data, ballot->is_ro);
> data = mp_encode_uint(data, IPROTO_BALLOT_IS_LOADING);
> data = mp_encode_bool(data, ballot->is_loading);
> + data = mp_encode_uint(data, IPROTO_BALLOT_IS_ANON);
> + data = mp_encode_bool(data, ballot->is_anon);
> data = mp_encode_uint(data, IPROTO_BALLOT_VCLOCK);
> data = mp_encode_vclock_ignore0(data, &ballot->vclock);
> data = mp_encode_uint(data, IPROTO_BALLOT_GC_VCLOCK);
> @@ -1227,6 +1231,7 @@ xrow_decode_ballot(struct xrow_header *row, struct ballot *ballot)
> {
> ballot->is_ro = false;
> ballot->is_loading = false;
> + ballot->is_anon = false;
> vclock_create(&ballot->vclock);
>
> const char *start = NULL;
> @@ -1276,6 +1281,11 @@ xrow_decode_ballot(struct xrow_header *row, struct ballot *ballot)
> goto err;
> ballot->is_loading = mp_decode_bool(&data);
> break;
> + case IPROTO_BALLOT_IS_ANON:
> + if (mp_typeof(*data) != MP_BOOL)
> + goto err;
> + ballot->is_anon = mp_decode_bool(&data);
> + break;
> case IPROTO_BALLOT_VCLOCK:
> if (mp_decode_vclock_ignore0(&data,
> &ballot->vclock) != 0)
> diff --git a/src/box/xrow.h b/src/box/xrow.h
> index 58d47b12d..0dc9eb71a 100644
> --- a/src/box/xrow.h
> +++ b/src/box/xrow.h
> @@ -332,6 +332,11 @@ xrow_encode_auth(struct xrow_header *row, const char *salt, size_t salt_len,
> struct ballot {
> /** Set if the instance is running in read-only mode. */
> bool is_ro;
> + /**
> + * A flag whether the instance is anonymous, not having an
> + * ID, and not going to request it.
> + */
> + bool is_anon;
> /**
> * Set if the instance hasn't finished bootstrap or recovery, or
> * is syncing with other replicas in the replicaset.
--
Serge Petrenko
More information about the Tarantool-patches
mailing list