[Tarantool-patches] [PATCH v2 11/11] [tosquash] raft: a swarm of minor fixes
Serge Petrenko
sergepetrenko at tarantool.org
Mon Sep 14 13:13:37 MSK 2020
10.09.2020 02:16, Vladislav Shpilevoy пишет:
> Major change - ev_check/ev_prepare didn't work. They are invoked
> from a context, where yields are not possible. To a blocking WAL
> write can't be done there. And an async WAL write requires too
> many changes.
>
> This patch adds a worker fiber to Raft to perform state dumps.
>
> Other changes are fixes of typos and of minor bugs.
> ---
> src/box/raft.c | 58 ++++++++++++++++++++++++++++++++++----------------
> src/box/raft.h | 7 +++---
> 2 files changed, 43 insertions(+), 22 deletions(-)
>
> diff --git a/src/box/raft.c b/src/box/raft.c
> index b01e65ced..e4e0b037c 100644
> --- a/src/box/raft.c
> +++ b/src/box/raft.c
> @@ -64,6 +64,7 @@ struct raft raft = {
> .vote = 0,
> .vote_mask = 0,
> .vote_count = 0,
> + .worker = NULL,
> .election_timeout = 5,
> };
>
> @@ -250,13 +251,6 @@ raft_sm_schedule_new_election_cb(struct ev_loop *loop, struct ev_timer *timer,
> static void
> raft_sm_pause_and_dump(void);
>
> -/**
> - * Flush Raft state changes to WAL. The callback resets itself, if during the
> - * write more changes appear.
> - */
> -static void
> -raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events);
> -
> void
> raft_process_recovery(const struct raft_request *req)
> {
> @@ -348,6 +342,7 @@ raft_process_msg(const struct raft_request *req, uint32_t source)
> break;
> raft.state = RAFT_STATE_LEADER;
> raft.leader = instance_id;
> + ev_timer_stop(loop(), &raft.timer);
> break;
> default:
> unreachable();
> @@ -374,6 +369,10 @@ raft_process_msg(const struct raft_request *req, uint32_t source)
> /* New leader was elected. */
> raft.state = RAFT_STATE_FOLLOWER;
> raft.leader = source;
> + if (!raft.is_write_in_progress) {
> + ev_timer_stop(loop(), &raft.timer);
> + raft_sm_wait_leader_dead();
> + }
> end:
> if (raft.state != old_state) {
> /*
> @@ -406,6 +405,12 @@ raft_process_heartbeat(uint32_t source)
> /* Not interested in heartbeats from not a leader. */
> if (raft.leader != source)
> return;
> + /*
> + * The instance currently is busy with writing something on disk. Can't
> + * react to heartbeats.
> + */
> + if (raft.is_write_in_progress)
> + return;
> /*
> * XXX: it may be expensive to reset the timer like that. It may be less
> * expensive to let the timer work, and remember last timestamp when
> @@ -473,10 +478,8 @@ fail:
> }
>
> static void
> -raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events)
> +raft_worker_handle_io(void)
> {
> - assert(watcher == &raft.io);
> - (void) events;
> assert(raft.is_write_in_progress);
> /* During write Raft can't be anything but a follower. */
> assert(raft.state == RAFT_STATE_FOLLOWER);
> @@ -488,7 +491,6 @@ raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events)
> if (raft_is_fully_on_disk()) {
> end_dump:
> raft.is_write_in_progress = false;
> - ev_check_stop(loop, watcher);
> /*
> * The state machine is stable. Can see now, to what state to
> * go.
> @@ -583,6 +585,25 @@ end_dump:
> raft_broadcast(&req);
> }
>
> +static int
> +raft_worker_f(va_list args)
> +{
> + (void)args;
> + while (!fiber_is_cancelled()) {
> + if (!raft.is_write_in_progress)
> + goto idle;
> + raft_worker_handle_io();
> + if (!raft.is_write_in_progress)
> + goto idle;
> + fiber_sleep(0);
> + continue;
> + idle:
> + assert(raft_is_fully_on_disk());
> + fiber_yield();
> + }
> + return 0;
> +}
> +
> static void
> raft_sm_pause_and_dump(void)
> {
> @@ -590,8 +611,10 @@ raft_sm_pause_and_dump(void)
> if (raft.is_write_in_progress)
> return;
> ev_timer_stop(loop(), &raft.timer);
> - ev_check_start(loop(), &raft.io);
> raft.is_write_in_progress = true;
> + if (raft.worker == NULL)
> + raft.worker = fiber_new("raft_worker", raft_worker_f);
> + fiber_wakeup(raft.worker);
> }
>
> static void
> @@ -620,7 +643,6 @@ raft_sm_schedule_new_election(void)
> {
> assert(raft_is_fully_on_disk());
> assert(raft.is_candidate);
> - assert(raft.leader == 0);
> /* Everyone is a follower until its vote for self is persisted. */
> raft_sm_schedule_new_term(raft.term + 1);
> raft_sm_schedule_new_vote(instance_id);
> @@ -641,20 +663,19 @@ static void
> raft_sm_wait_leader_dead(void)
> {
> assert(!ev_is_active(&raft.timer));
> - assert(!ev_is_active(&raft.io));
> assert(!raft.is_write_in_progress);
> assert(raft.is_candidate);
> assert(raft.state == RAFT_STATE_FOLLOWER);
> assert(raft.leader != 0);
> double death_timeout = replication_disconnect_timeout();
> ev_timer_set(&raft.timer, death_timeout, death_timeout);
> + ev_timer_start(loop(), &raft.timer);
> }
>
> static void
> raft_sm_wait_election_end(void)
> {
> assert(!ev_is_active(&raft.timer));
> - assert(!ev_is_active(&raft.io));
> assert(!raft.is_write_in_progress);
> assert(raft.is_candidate);
> assert(raft.state == RAFT_STATE_FOLLOWER ||
> @@ -664,13 +685,13 @@ raft_sm_wait_election_end(void)
> double election_timeout = raft.election_timeout +
> raft_new_random_election_shift();
> ev_timer_set(&raft.timer, election_timeout, election_timeout);
> + ev_timer_start(loop(), &raft.timer);
> }
>
> static void
> raft_sm_start(void)
> {
> assert(!ev_is_active(&raft.timer));
> - assert(!ev_is_active(&raft.io));
> assert(!raft.is_write_in_progress);
> assert(!raft.is_enabled);
> assert(raft.state == RAFT_STATE_FOLLOWER);
> @@ -769,12 +790,13 @@ raft_cfg_election_timeout(double timeout)
> return;
>
> raft.election_timeout = timeout;
> - if (raft.vote != 0 && raft.leader == 0) {
> + if (raft.vote != 0 && raft.leader == 0 && raft.is_candidate) {
> assert(ev_is_active(&raft.timer));
> double timeout = ev_timer_remaining(loop(), &raft.timer) -
> raft.timer.at + raft.election_timeout;
> ev_timer_stop(loop(), &raft.timer);
> ev_timer_set(&raft.timer, timeout, timeout);
> + ev_timer_start(loop(), &raft.timer);
> }
> }
>
> @@ -808,6 +830,7 @@ raft_cfg_death_timeout(void)
> raft.timer.at + death_timeout;
> ev_timer_stop(loop(), &raft.timer);
> ev_timer_set(&raft.timer, timeout, timeout);
> + ev_timer_start(loop(), &raft.timer);
> }
> }
>
> @@ -826,5 +849,4 @@ void
> raft_init(void)
> {
> ev_timer_init(&raft.timer, raft_sm_schedule_new_election_cb, 0, 0);
> - ev_check_init(&raft.io, raft_sm_dump_step);
> }
> diff --git a/src/box/raft.h b/src/box/raft.h
> index 111a9c16e..23aedfe10 100644
> --- a/src/box/raft.h
> +++ b/src/box/raft.h
> @@ -65,6 +65,7 @@ extern "C" {
> * than the configured one. See more details in the code.
> */
>
> +struct fiber;
> struct raft_request;
> struct vclock;
>
> @@ -138,10 +139,8 @@ struct raft {
> int vote_count;
> /** State machine timed event trigger. */
> struct ev_timer timer;
> - /**
> - * Dump of Raft state in the end of event loop, when it is changed.
> - */
> - struct ev_check io;
> + /** Worker fiber to execute blocking tasks like IO. */
> + struct fiber *worker;
> /** Configured election timeout in seconds. */
> double election_timeout;
> };
LGTM
--
Serge Petrenko
More information about the Tarantool-patches
mailing list