[Tarantool-patches] [PATCH 07/12] raft: stop using replicaset.vclock

Serge Petrenko sergepetrenko at tarantool.org
Tue Nov 17 12:23:17 MSK 2020


17.11.2020 03:02, Vladislav Shpilevoy пишет:
> Raft is being moved to a separate library in src/lib. It means,
> it can't depend on anything from box/.
>
> The patch makes raft stop using replicaset.vclock.
>
> Instead, it has a new option 'vclock'. It is stored inside struct
> raft by pointer and should be configured using raft_cfg_vclock().
>
> Box configures it to point at replicaset.vclock like before. But
> now raftlib code does not depend on it explicitly.
>
> Vclock is stored in Raft by pointer instead of by value so as not
> to update it for each transaction. It would be too high price to
> pay for Raft independence from box.
>
> Part of #5303
> ---


LGTM.


>   src/box/box.cc    |  1 +
>   src/box/raftlib.c | 15 +++++++++++----
>   src/box/raftlib.h | 16 ++++++++++++++++
>   3 files changed, 28 insertions(+), 4 deletions(-)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index 78fca928e..ff80e45a4 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -2768,6 +2768,7 @@ box_cfg_xc(void)
>   	 */
>   	if (!replication_anon)
>   		raft_cfg_instance_id(box_raft(), instance_id);
> +	raft_cfg_vclock(box_raft(), &replicaset.vclock);
>   
>   	if (box_set_election_timeout() != 0)
>   		diag_raise();
> diff --git a/src/box/raftlib.c b/src/box/raftlib.c
> index 78164bf91..ab2e27fd8 100644
> --- a/src/box/raftlib.c
> +++ b/src/box/raftlib.c
> @@ -125,8 +125,7 @@ raft_new_random_election_shift(const struct raft *raft)
>   static inline bool
>   raft_can_vote_for(const struct raft *raft, const struct vclock *v)
>   {
> -	(void)raft;
> -	int cmp = vclock_compare_ignore0(v, &replicaset.vclock);
> +	int cmp = vclock_compare_ignore0(v, raft->vclock);
>   	return cmp == 0 || cmp == 1;
>   }
>   
> @@ -597,7 +596,7 @@ raft_worker_handle_broadcast(struct raft *raft)
>   	req.state = raft->state;
>   	if (req.state == RAFT_STATE_CANDIDATE) {
>   		assert(raft->vote == raft->self);
> -		req.vclock = &replicaset.vclock;
> +		req.vclock = raft->vclock;
>   	}
>   	replicaset_foreach(replica)
>   		relay_push_raft(replica->relay, &req);
> @@ -865,7 +864,7 @@ raft_serialize_for_network(const struct raft *raft, struct raft_request *req)
>   	 * Vclock is sent out only by candidate instances.
>   	 */
>   	if (req->state == RAFT_STATE_CANDIDATE)
> -		req->vclock = &replicaset.vclock;
> +		req->vclock = raft->vclock;
>   }
>   
>   void
> @@ -984,6 +983,14 @@ raft_cfg_instance_id(struct raft *raft, uint32_t instance_id)
>   	raft->self = instance_id;
>   }
>   
> +void
> +raft_cfg_vclock(struct raft *raft, const struct vclock *vclock)
> +{
> +	assert(raft->vclock == NULL);
> +	assert(vclock != NULL);
> +	raft->vclock = vclock;
> +}
> +
>   void
>   raft_new_term(struct raft *raft)
>   {
> diff --git a/src/box/raftlib.h b/src/box/raftlib.h
> index 2da3cec86..8d0d03da0 100644
> --- a/src/box/raftlib.h
> +++ b/src/box/raftlib.h
> @@ -154,6 +154,15 @@ struct raft {
>   	int vote_count;
>   	/** Number of votes necessary for successful election. */
>   	int election_quorum;
> +	/**
> +	 * Vclock of the Raft node owner. Raft never changes it, only watches,
> +	 * and makes decisions based on it. The value is not stored by copy so
> +	 * as to avoid frequent updates. If every transaction would need to
> +	 * update several vclocks in different places, it would be too
> +	 * expensive. So they update only one vclock, which is shared between
> +	 * subsystems, such as Raft.
> +	 */
> +	const struct vclock *vclock;
>   	/** State machine timed event trigger. */
>   	struct ev_timer timer;
>   	/** Worker fiber to execute blocking tasks like IO. */
> @@ -250,6 +259,13 @@ raft_cfg_death_timeout(struct raft *raft, double death_timeout);
>   void
>   raft_cfg_instance_id(struct raft *raft, uint32_t instance_id);
>   
> +/**
> + * Configure vclock of the given Raft instance. The vclock is not copied, so the
> + * caller must keep it valid.
> + */
> +void
> +raft_cfg_vclock(struct raft *raft, const struct vclock *vclock);
> +
>   /**
>    * Bump the term. When it is persisted, the node checks if there is a leader,
>    * and if there is not, a new election is started. That said, this function can

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list