[Tarantool-patches] [PATCH v2 3/4] replication: add is_anon flag to ballot

Serge Petrenko sergepetrenko at tarantool.org
Tue Sep 15 10:46:37 MSK 2020


15.09.2020 02:11, Vladislav Shpilevoy пишет:
> Ballot is a message sent in response on vote request, which is
> sent by applier first thing after connection establishment.
>
> It contains basic info about the remote instance such as whether
> it is read only, if it is still loading, and more.
>
> The ballot didn't contain a flag whether the instance is
> anonymous. That led to a problem, when applier was connected to a
> remote instance, was added to struct replicaset inside a struct
> replica object, but it was unknown whether it is anonymous. It was
> added as not anonymous by default.
>
> If the remote instance was in fact anonymous and sent a subscribe
> response back to the first instance with the anon flag = true,
> then it looked like the remote instance was not anonymous, and
> suddenly became such, without even a reconnect. It could lead to
> an assertion.
>
> The bug is hidden behind another bug, because of which the leader
> instance on boostrap registers all replicas listed in its
> box.cfg.replication, even anonymous ones.
>
> The patch makes the ballot contain the anon flag. Now both relay
> and applier send whether their host is anonymous. Relay does it by
> sending the ballot, applier sends it in scope of subscribe
> request. By the time a replica gets UUID and is added into struct
> replicaset, its anon flag is determined.
>
> Also the patch makes anon_count updated on each replica hash table
> change. Previously it was only updated when something related to
> relay was done. Now anon is updated by applier actions too, and
> it is not ok to update the counter on relay-specific actions.
>
> The early registration bug is a subject for a next patch.
>
> Part of #5287
>
> @TarantoolBot document
> Title: IPROTO_BALLOT_IS_ANON flag
>
> There is a request type IPROTO_BALLOT, with code 0x29. It has
> fields IPROTO_BALLOT_IS_RO (0x01), IPROTO_BALLOT_VCLOCK (0x02),
> IPROTO_BALLOT_GC_VCLOCK (0x03), IPROTO_BALLOT_IS_LOADING (0x04).
>
> Now it gets a new field IPROTO_BALLOT_IS_ANON (0x05). The field
> is a boolean, and equals to box.cfg.replication_anon of the
> sender.
> ---
Hi! Thanks for the patch!
Please see one comment below.
>   src/box/box.cc             |  1 +
>   src/box/iproto_constants.h |  1 +
>   src/box/replication.cc     | 14 ++++++++++++--
>   src/box/xrow.c             | 14 ++++++++++++--
>   src/box/xrow.h             |  5 +++++
>   5 files changed, 31 insertions(+), 4 deletions(-)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index faffd5769..145b53ec8 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -2080,6 +2080,7 @@ void
>   box_process_vote(struct ballot *ballot)
>   {
>   	ballot->is_ro = cfg_geti("read_only") != 0;
> +	ballot->is_anon = replication_anon;
>   	/*
>   	 * is_ro is true on initial load and is set to box.cfg.read_only
>   	 * after box_cfg() returns, during dynamic box.cfg parameters setting.
> diff --git a/src/box/iproto_constants.h b/src/box/iproto_constants.h
> index 4f5a2b195..9c2fb6058 100644
> --- a/src/box/iproto_constants.h
> +++ b/src/box/iproto_constants.h
> @@ -149,6 +149,7 @@ enum iproto_ballot_key {
>   	IPROTO_BALLOT_VCLOCK = 0x02,
>   	IPROTO_BALLOT_GC_VCLOCK = 0x03,
>   	IPROTO_BALLOT_IS_LOADING = 0x04,
> +	IPROTO_BALLOT_IS_ANON = 0x05,
>   };
>   
>   #define bit(c) (1ULL<<IPROTO_##c)
> diff --git a/src/box/replication.cc b/src/box/replication.cc
> index ef0e2411d..8408f395e 100644
> --- a/src/box/replication.cc
> +++ b/src/box/replication.cc
> @@ -290,6 +290,8 @@ replica_clear_id(struct replica *replica)
>   	}
>   	if (replica_is_orphan(replica)) {
>   		replica_hash_remove(&replicaset.hash, replica);
> +		replicaset.anon_count -= replica->anon;
> +		assert(replicaset.anon_count >= 0);

replica_clear_id() is only called on _cluster entry deletion.
So you're surely working with a normal replica, not anonymous.

We may add an assset(!replica->anon) somewhere.
Don't know if it's really needed though.

>   		replica_delete(replica);
>   	}
>   }
> @@ -332,6 +334,7 @@ replica_on_applier_connect(struct replica *replica)
>   	assert(replica->applier_sync_state == APPLIER_DISCONNECTED);
>   
>   	replica->uuid = applier->uuid;
> +	replica->anon = applier->ballot.is_anon;
>   	replica->applier_sync_state = APPLIER_CONNECTED;
>   	replicaset.applier.connected++;
>   
> @@ -362,6 +365,7 @@ replica_on_applier_connect(struct replica *replica)
>   	} else {
>   		/* Add a new struct replica */
>   		replica_hash_insert(&replicaset.hash, replica);
> +		replicaset.anon_count += replica->anon;
>   	}
>   }
>   
> @@ -390,7 +394,9 @@ replica_on_applier_reconnect(struct replica *replica)
>   		if (orig == NULL) {
>   			orig = replica_new();
>   			orig->uuid = applier->uuid;
> +			orig->anon = applier->ballot.is_anon;
>   			replica_hash_insert(&replicaset.hash, orig);
> +			replicaset.anon_count += orig->anon;
>   		}
>   
>   		if (orig->applier != NULL) {
> @@ -526,6 +532,7 @@ replicaset_update(struct applier **appliers, int count)
>   
>   		assert(!tt_uuid_is_nil(&applier->uuid));
>   		replica->uuid = applier->uuid;
> +		replica->anon = applier->ballot.is_anon;
>   
>   		if (replica_hash_search(&uniq, replica) != NULL) {
>   			replica_clear_applier(replica);
> @@ -582,6 +589,7 @@ replicaset_update(struct applier **appliers, int count)
>   		} else {
>   			/* Add a new struct replica */
>   			replica_hash_insert(&replicaset.hash, replica);
> +			replicaset.anon_count += replica->anon;
>   		}
>   
>   		replica->applier_sync_state = APPLIER_CONNECTED;
> @@ -593,6 +601,8 @@ replicaset_update(struct applier **appliers, int count)
>   	replica_hash_foreach_safe(&replicaset.hash, replica, next) {
>   		if (replica_is_orphan(replica)) {
>   			replica_hash_remove(&replicaset.hash, replica);
> +			replicaset.anon_count -= replica->anon;
> +			assert(replicaset.anon_count >= 0);
>   			replica_delete(replica);
>   		}
>   	}
> @@ -907,12 +917,12 @@ replica_on_relay_stop(struct replica *replica)
>   			replica->gc = NULL;
>   		} else {
>   			assert(replica->gc == NULL);
> -			assert(replicaset.anon_count > 0);
> -			replicaset.anon_count--;
>   		}
>   	}
>   	if (replica_is_orphan(replica)) {
>   		replica_hash_remove(&replicaset.hash, replica);
> +		replicaset.anon_count -= replica->anon;
> +		assert(replicaset.anon_count >= 0);
>   		replica_delete(replica);
>   	}
>   }
> diff --git a/src/box/xrow.c b/src/box/xrow.c
> index 95ddb1fe7..2edcb2f8f 100644
> --- a/src/box/xrow.c
> +++ b/src/box/xrow.c
> @@ -447,9 +447,11 @@ iproto_reply_vote(struct obuf *out, const struct ballot *ballot,
>   		  uint64_t sync, uint32_t schema_version)
>   {
>   	size_t max_size = IPROTO_HEADER_LEN + mp_sizeof_map(1) +
> -		mp_sizeof_uint(UINT32_MAX) + mp_sizeof_map(4) +
> +		mp_sizeof_uint(UINT32_MAX) + mp_sizeof_map(5) +
>   		mp_sizeof_uint(UINT32_MAX) + mp_sizeof_bool(ballot->is_ro) +
>   		mp_sizeof_uint(UINT32_MAX) + mp_sizeof_bool(ballot->is_loading) +
> +		mp_sizeof_uint(IPROTO_BALLOT_IS_ANON) +
> +		mp_sizeof_bool(ballot->is_anon) +
>   		mp_sizeof_uint(UINT32_MAX) +
>   		mp_sizeof_vclock_ignore0(&ballot->vclock) +
>   		mp_sizeof_uint(UINT32_MAX) +
> @@ -465,11 +467,13 @@ iproto_reply_vote(struct obuf *out, const struct ballot *ballot,
>   	char *data = buf + IPROTO_HEADER_LEN;
>   	data = mp_encode_map(data, 1);
>   	data = mp_encode_uint(data, IPROTO_BALLOT);
> -	data = mp_encode_map(data, 4);
> +	data = mp_encode_map(data, 5);
>   	data = mp_encode_uint(data, IPROTO_BALLOT_IS_RO);
>   	data = mp_encode_bool(data, ballot->is_ro);
>   	data = mp_encode_uint(data, IPROTO_BALLOT_IS_LOADING);
>   	data = mp_encode_bool(data, ballot->is_loading);
> +	data = mp_encode_uint(data, IPROTO_BALLOT_IS_ANON);
> +	data = mp_encode_bool(data, ballot->is_anon);
>   	data = mp_encode_uint(data, IPROTO_BALLOT_VCLOCK);
>   	data = mp_encode_vclock_ignore0(data, &ballot->vclock);
>   	data = mp_encode_uint(data, IPROTO_BALLOT_GC_VCLOCK);
> @@ -1227,6 +1231,7 @@ xrow_decode_ballot(struct xrow_header *row, struct ballot *ballot)
>   {
>   	ballot->is_ro = false;
>   	ballot->is_loading = false;
> +	ballot->is_anon = false;
>   	vclock_create(&ballot->vclock);
>   
>   	const char *start = NULL;
> @@ -1276,6 +1281,11 @@ xrow_decode_ballot(struct xrow_header *row, struct ballot *ballot)
>   				goto err;
>   			ballot->is_loading = mp_decode_bool(&data);
>   			break;
> +		case IPROTO_BALLOT_IS_ANON:
> +			if (mp_typeof(*data) != MP_BOOL)
> +				goto err;
> +			ballot->is_anon = mp_decode_bool(&data);
> +			break;
>   		case IPROTO_BALLOT_VCLOCK:
>   			if (mp_decode_vclock_ignore0(&data,
>   						     &ballot->vclock) != 0)
> diff --git a/src/box/xrow.h b/src/box/xrow.h
> index 58d47b12d..0dc9eb71a 100644
> --- a/src/box/xrow.h
> +++ b/src/box/xrow.h
> @@ -332,6 +332,11 @@ xrow_encode_auth(struct xrow_header *row, const char *salt, size_t salt_len,
>   struct ballot {
>   	/** Set if the instance is running in read-only mode. */
>   	bool is_ro;
> +	/**
> +	 * A flag whether the instance is anonymous, not having an
> +	 * ID, and not going to request it.
> +	 */
> +	bool is_anon;
>   	/**
>   	 * Set if the instance hasn't finished bootstrap or recovery, or
>   	 * is syncing with other replicas in the replicaset.

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list