From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> To: tarantool-patches@dev.tarantool.org, sergepetrenko@tarantool.org, gorcunov@gmail.com Subject: [Tarantool-patches] [PATCH v2 11/11] [tosquash] raft: a swarm of minor fixes Date: Thu, 10 Sep 2020 01:16:52 +0200 [thread overview] Message-ID: <4899ce311ec02592ceb6fe2dd1d6e099df6e7c51.1599693319.git.v.shpilevoy@tarantool.org> (raw) In-Reply-To: <cover.1599693319.git.v.shpilevoy@tarantool.org> Major change - ev_check/ev_prepare didn't work. They are invoked from a context, where yields are not possible. To a blocking WAL write can't be done there. And an async WAL write requires too many changes. This patch adds a worker fiber to Raft to perform state dumps. Other changes are fixes of typos and of minor bugs. --- src/box/raft.c | 58 ++++++++++++++++++++++++++++++++++---------------- src/box/raft.h | 7 +++--- 2 files changed, 43 insertions(+), 22 deletions(-) diff --git a/src/box/raft.c b/src/box/raft.c index b01e65ced..e4e0b037c 100644 --- a/src/box/raft.c +++ b/src/box/raft.c @@ -64,6 +64,7 @@ struct raft raft = { .vote = 0, .vote_mask = 0, .vote_count = 0, + .worker = NULL, .election_timeout = 5, }; @@ -250,13 +251,6 @@ raft_sm_schedule_new_election_cb(struct ev_loop *loop, struct ev_timer *timer, static void raft_sm_pause_and_dump(void); -/** - * Flush Raft state changes to WAL. The callback resets itself, if during the - * write more changes appear. - */ -static void -raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events); - void raft_process_recovery(const struct raft_request *req) { @@ -348,6 +342,7 @@ raft_process_msg(const struct raft_request *req, uint32_t source) break; raft.state = RAFT_STATE_LEADER; raft.leader = instance_id; + ev_timer_stop(loop(), &raft.timer); break; default: unreachable(); @@ -374,6 +369,10 @@ raft_process_msg(const struct raft_request *req, uint32_t source) /* New leader was elected. */ raft.state = RAFT_STATE_FOLLOWER; raft.leader = source; + if (!raft.is_write_in_progress) { + ev_timer_stop(loop(), &raft.timer); + raft_sm_wait_leader_dead(); + } end: if (raft.state != old_state) { /* @@ -406,6 +405,12 @@ raft_process_heartbeat(uint32_t source) /* Not interested in heartbeats from not a leader. */ if (raft.leader != source) return; + /* + * The instance currently is busy with writing something on disk. Can't + * react to heartbeats. + */ + if (raft.is_write_in_progress) + return; /* * XXX: it may be expensive to reset the timer like that. It may be less * expensive to let the timer work, and remember last timestamp when @@ -473,10 +478,8 @@ fail: } static void -raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events) +raft_worker_handle_io(void) { - assert(watcher == &raft.io); - (void) events; assert(raft.is_write_in_progress); /* During write Raft can't be anything but a follower. */ assert(raft.state == RAFT_STATE_FOLLOWER); @@ -488,7 +491,6 @@ raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events) if (raft_is_fully_on_disk()) { end_dump: raft.is_write_in_progress = false; - ev_check_stop(loop, watcher); /* * The state machine is stable. Can see now, to what state to * go. @@ -583,6 +585,25 @@ end_dump: raft_broadcast(&req); } +static int +raft_worker_f(va_list args) +{ + (void)args; + while (!fiber_is_cancelled()) { + if (!raft.is_write_in_progress) + goto idle; + raft_worker_handle_io(); + if (!raft.is_write_in_progress) + goto idle; + fiber_sleep(0); + continue; + idle: + assert(raft_is_fully_on_disk()); + fiber_yield(); + } + return 0; +} + static void raft_sm_pause_and_dump(void) { @@ -590,8 +611,10 @@ raft_sm_pause_and_dump(void) if (raft.is_write_in_progress) return; ev_timer_stop(loop(), &raft.timer); - ev_check_start(loop(), &raft.io); raft.is_write_in_progress = true; + if (raft.worker == NULL) + raft.worker = fiber_new("raft_worker", raft_worker_f); + fiber_wakeup(raft.worker); } static void @@ -620,7 +643,6 @@ raft_sm_schedule_new_election(void) { assert(raft_is_fully_on_disk()); assert(raft.is_candidate); - assert(raft.leader == 0); /* Everyone is a follower until its vote for self is persisted. */ raft_sm_schedule_new_term(raft.term + 1); raft_sm_schedule_new_vote(instance_id); @@ -641,20 +663,19 @@ static void raft_sm_wait_leader_dead(void) { assert(!ev_is_active(&raft.timer)); - assert(!ev_is_active(&raft.io)); assert(!raft.is_write_in_progress); assert(raft.is_candidate); assert(raft.state == RAFT_STATE_FOLLOWER); assert(raft.leader != 0); double death_timeout = replication_disconnect_timeout(); ev_timer_set(&raft.timer, death_timeout, death_timeout); + ev_timer_start(loop(), &raft.timer); } static void raft_sm_wait_election_end(void) { assert(!ev_is_active(&raft.timer)); - assert(!ev_is_active(&raft.io)); assert(!raft.is_write_in_progress); assert(raft.is_candidate); assert(raft.state == RAFT_STATE_FOLLOWER || @@ -664,13 +685,13 @@ raft_sm_wait_election_end(void) double election_timeout = raft.election_timeout + raft_new_random_election_shift(); ev_timer_set(&raft.timer, election_timeout, election_timeout); + ev_timer_start(loop(), &raft.timer); } static void raft_sm_start(void) { assert(!ev_is_active(&raft.timer)); - assert(!ev_is_active(&raft.io)); assert(!raft.is_write_in_progress); assert(!raft.is_enabled); assert(raft.state == RAFT_STATE_FOLLOWER); @@ -769,12 +790,13 @@ raft_cfg_election_timeout(double timeout) return; raft.election_timeout = timeout; - if (raft.vote != 0 && raft.leader == 0) { + if (raft.vote != 0 && raft.leader == 0 && raft.is_candidate) { assert(ev_is_active(&raft.timer)); double timeout = ev_timer_remaining(loop(), &raft.timer) - raft.timer.at + raft.election_timeout; ev_timer_stop(loop(), &raft.timer); ev_timer_set(&raft.timer, timeout, timeout); + ev_timer_start(loop(), &raft.timer); } } @@ -808,6 +830,7 @@ raft_cfg_death_timeout(void) raft.timer.at + death_timeout; ev_timer_stop(loop(), &raft.timer); ev_timer_set(&raft.timer, timeout, timeout); + ev_timer_start(loop(), &raft.timer); } } @@ -826,5 +849,4 @@ void raft_init(void) { ev_timer_init(&raft.timer, raft_sm_schedule_new_election_cb, 0, 0); - ev_check_init(&raft.io, raft_sm_dump_step); } diff --git a/src/box/raft.h b/src/box/raft.h index 111a9c16e..23aedfe10 100644 --- a/src/box/raft.h +++ b/src/box/raft.h @@ -65,6 +65,7 @@ extern "C" { * than the configured one. See more details in the code. */ +struct fiber; struct raft_request; struct vclock; @@ -138,10 +139,8 @@ struct raft { int vote_count; /** State machine timed event trigger. */ struct ev_timer timer; - /** - * Dump of Raft state in the end of event loop, when it is changed. - */ - struct ev_check io; + /** Worker fiber to execute blocking tasks like IO. */ + struct fiber *worker; /** Configured election timeout in seconds. */ double election_timeout; }; -- 2.21.1 (Apple Git-122.3)
next prev parent reply other threads:[~2020-09-09 23:17 UTC|newest] Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-09-09 23:16 [Tarantool-patches] [PATCH v2 00/11] dRaft Vladislav Shpilevoy 2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 01/11] applier: store instance_id in struct applier Vladislav Shpilevoy 2020-09-14 9:38 ` Serge Petrenko 2020-09-19 15:44 ` Vladislav Shpilevoy 2020-09-21 6:23 ` Serge Petrenko 2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 10/11] raft: introduce box.info.raft Vladislav Shpilevoy 2020-09-14 9:42 ` Serge Petrenko 2020-09-09 23:16 ` Vladislav Shpilevoy [this message] 2020-09-14 10:13 ` [Tarantool-patches] [PATCH v2 11/11] [tosquash] raft: a swarm of minor fixes Serge Petrenko 2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 02/11] box: introduce summary RO flag Vladislav Shpilevoy 2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 03/11] wal: don't touch box.cfg.wal_dir more than once Vladislav Shpilevoy 2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 04/11] replication: track registered replica count Vladislav Shpilevoy 2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 05/11] [wip] box: do not register outgoing connections Vladislav Shpilevoy 2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 06/11] raft: introduce persistent raft state Vladislav Shpilevoy 2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 07/11] raft: introduce box.cfg.raft_* options Vladislav Shpilevoy 2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 08/11] raft: relay status updates to followers Vladislav Shpilevoy 2020-09-20 17:17 ` Vladislav Shpilevoy 2020-09-21 7:13 ` Serge Petrenko 2020-09-21 10:50 ` Serge Petrenko 2020-09-21 22:47 ` Vladislav Shpilevoy 2020-09-22 8:48 ` Serge Petrenko 2020-09-21 22:47 ` Vladislav Shpilevoy 2020-09-22 8:47 ` Serge Petrenko 2020-09-09 23:17 ` [Tarantool-patches] [PATCH v2 09/11] raft: introduce state machine Vladislav Shpilevoy 2020-09-19 15:49 ` Vladislav Shpilevoy 2020-09-19 15:50 ` Vladislav Shpilevoy 2020-09-21 8:20 ` Serge Petrenko 2020-09-21 8:22 ` Serge Petrenko 2020-09-21 8:34 ` Serge Petrenko 2020-09-21 22:47 ` Vladislav Shpilevoy 2020-09-22 8:49 ` Serge Petrenko 2020-09-22 22:48 ` Vladislav Shpilevoy 2020-09-23 9:59 ` Serge Petrenko 2020-09-23 20:31 ` Vladislav Shpilevoy 2020-09-24 9:34 ` Serge Petrenko 2020-09-19 15:58 ` [Tarantool-patches] [PATCH v2 12/11] dRaft Vladislav Shpilevoy 2020-09-19 15:59 ` Vladislav Shpilevoy 2020-09-21 7:24 ` Serge Petrenko 2020-09-21 22:48 ` [Tarantool-patches] [PATCH v2 12/11] raft: add tests Vladislav Shpilevoy 2020-09-30 10:56 ` [Tarantool-patches] [PATCH v2 00/11] dRaft Kirill Yukhin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=4899ce311ec02592ceb6fe2dd1d6e099df6e7c51.1599693319.git.v.shpilevoy@tarantool.org \ --to=v.shpilevoy@tarantool.org \ --cc=gorcunov@gmail.com \ --cc=sergepetrenko@tarantool.org \ --cc=tarantool-patches@dev.tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH v2 11/11] [tosquash] raft: a swarm of minor fixes' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox