[Tarantool-patches] [PATCH 08/12] raft: introduce vtab for disk and network

Serge Petrenko sergepetrenko at tarantool.org
Tue Nov 17 12:35:58 MSK 2020


17.11.2020 03:02, Vladislav Shpilevoy пишет:
> Raft is being moved to a separate library in src/lib. It means,
> it can't depend on anything from box/.
>
> The patch makes raft stop using replicaset and journal objects.
> They were used to broadcast messages to all the other nodes, and
> to persist updates.
>
> Now Raft does the same through vtab, which is configured by box.
> Broadcast still sends messages via relays, and disk write still
> uses the journal. But Raft does not depend on any specific journal
> or network API.
>
> Part of #5303
> ---
>   src/box/raft.c    | 63 ++++++++++++++++++++++++++++++++++++++-
>   src/box/raftlib.c | 75 ++++++++++-------------------------------------
>   src/box/raftlib.h | 24 ++++++++++++++-
>   3 files changed, 100 insertions(+), 62 deletions(-)
>
>
>   /* Dump Raft state to WAL in a blocking way. */
>   static void
>   raft_worker_handle_io(struct raft *raft)
> @@ -567,8 +513,17 @@ end_dump:
>   		assert(raft->volatile_term >= raft->term);
>   		req.term = raft->volatile_term;
>   		req.vote = raft->volatile_vote;
> -
> -		raft_write_request(&req);
> +		/*
> +		 * Skip vclock. It is used only to be sent to network when vote
> +		 * for self. It is a job of the vclock owner to persist it
> +		 * anyhow.
> +		 *
> +		 * Skip state. That would be strictly against Raft protocol. The
> +		 * reason is that it does not make much sense - even if the node
> +		 * is a leader now, after the node is restarted, there will be
> +		 * another leader elected by that time likely.
> +		 */
> +		raft->vtab->write(raft, &req);
>   		say_info("RAFT: persisted state %s",
>   			 raft_request_to_string(&req));
>   
> @@ -598,8 +553,7 @@ raft_worker_handle_broadcast(struct raft *raft)
>   		assert(raft->vote == raft->self);
>   		req.vclock = raft->vclock;
>   	}
> -	replicaset_foreach(replica)
> -		relay_push_raft(replica->relay, &req);
> +	raft->vtab->broadcast(raft, &req);


I'd introduce helpers, like raft_write() and raft_broadcast(),
to hide raft->vtab->... calls. Up to you, though.

Other than that LGTM.


>   	trigger_run(&raft->on_update, raft);
>   	raft->is_broadcast_scheduled = false;
>   }
> @@ -1038,7 +992,7 @@ raft_schedule_broadcast(struct raft *raft)
>   }
>   
>   void
> -raft_create(struct raft *raft)
> +raft_create(struct raft *raft, const struct raft_vtab *vtab)
>   {
>   	*raft = (struct raft) {
>   		.state = RAFT_STATE_FOLLOWER,
> @@ -1047,6 +1001,7 @@ raft_create(struct raft *raft)
>   		.election_quorum = 1,
>   		.election_timeout = 5,
>   		.death_timeout = 5,
> +		.vtab = vtab,
>   	};
>   	ev_timer_init(&raft->timer, raft_sm_schedule_new_election_cb, 0, 0);
>   	raft->timer.data = raft;
> diff --git a/src/box/raftlib.h b/src/box/raftlib.h
> index 8d0d03da0..6181d9d49 100644
> --- a/src/box/raftlib.h
> +++ b/src/box/raftlib.h
> @@ -295,8 +313,12 @@ raft_serialize_for_disk(const struct raft *raft, struct raft_request *req);
>   void
>   raft_on_update(struct raft *raft, struct trigger *trigger);
>   
> +/**
> + * Create a Raft node. The vtab is not copied. Its memory should stay valid even
> + * after the creation.
> + */
>   void
> -raft_create(struct raft *raft);
> +raft_create(struct raft *raft, const struct raft_vtab *vtab);
>   
>   void
>   raft_destroy(struct raft *raft);

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list