From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp15.mail.ru (smtp15.mail.ru [94.100.176.133]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 1C8CA469719 for ; Mon, 14 Sep 2020 13:13:39 +0300 (MSK) References: <4899ce311ec02592ceb6fe2dd1d6e099df6e7c51.1599693319.git.v.shpilevoy@tarantool.org> From: Serge Petrenko Message-ID: <4ae8634e-4331-9cab-ebf6-7b835e9a2472@tarantool.org> Date: Mon, 14 Sep 2020 13:13:37 +0300 MIME-Version: 1.0 In-Reply-To: <4899ce311ec02592ceb6fe2dd1d6e099df6e7c51.1599693319.git.v.shpilevoy@tarantool.org> Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Transfer-Encoding: 8bit Content-Language: ru Subject: Re: [Tarantool-patches] [PATCH v2 11/11] [tosquash] raft: a swarm of minor fixes List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Vladislav Shpilevoy , tarantool-patches@dev.tarantool.org, gorcunov@gmail.com 10.09.2020 02:16, Vladislav Shpilevoy пишет: > Major change - ev_check/ev_prepare didn't work. They are invoked > from a context, where yields are not possible. To a blocking WAL > write can't be done there. And an async WAL write requires too > many changes. > > This patch adds a worker fiber to Raft to perform state dumps. > > Other changes are fixes of typos and of minor bugs. > --- > src/box/raft.c | 58 ++++++++++++++++++++++++++++++++++---------------- > src/box/raft.h | 7 +++--- > 2 files changed, 43 insertions(+), 22 deletions(-) > > diff --git a/src/box/raft.c b/src/box/raft.c > index b01e65ced..e4e0b037c 100644 > --- a/src/box/raft.c > +++ b/src/box/raft.c > @@ -64,6 +64,7 @@ struct raft raft = { > .vote = 0, > .vote_mask = 0, > .vote_count = 0, > + .worker = NULL, > .election_timeout = 5, > }; > > @@ -250,13 +251,6 @@ raft_sm_schedule_new_election_cb(struct ev_loop *loop, struct ev_timer *timer, > static void > raft_sm_pause_and_dump(void); > > -/** > - * Flush Raft state changes to WAL. The callback resets itself, if during the > - * write more changes appear. > - */ > -static void > -raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events); > - > void > raft_process_recovery(const struct raft_request *req) > { > @@ -348,6 +342,7 @@ raft_process_msg(const struct raft_request *req, uint32_t source) > break; > raft.state = RAFT_STATE_LEADER; > raft.leader = instance_id; > + ev_timer_stop(loop(), &raft.timer); > break; > default: > unreachable(); > @@ -374,6 +369,10 @@ raft_process_msg(const struct raft_request *req, uint32_t source) > /* New leader was elected. */ > raft.state = RAFT_STATE_FOLLOWER; > raft.leader = source; > + if (!raft.is_write_in_progress) { > + ev_timer_stop(loop(), &raft.timer); > + raft_sm_wait_leader_dead(); > + } > end: > if (raft.state != old_state) { > /* > @@ -406,6 +405,12 @@ raft_process_heartbeat(uint32_t source) > /* Not interested in heartbeats from not a leader. */ > if (raft.leader != source) > return; > + /* > + * The instance currently is busy with writing something on disk. Can't > + * react to heartbeats. > + */ > + if (raft.is_write_in_progress) > + return; > /* > * XXX: it may be expensive to reset the timer like that. It may be less > * expensive to let the timer work, and remember last timestamp when > @@ -473,10 +478,8 @@ fail: > } > > static void > -raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events) > +raft_worker_handle_io(void) > { > - assert(watcher == &raft.io); > - (void) events; > assert(raft.is_write_in_progress); > /* During write Raft can't be anything but a follower. */ > assert(raft.state == RAFT_STATE_FOLLOWER); > @@ -488,7 +491,6 @@ raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events) > if (raft_is_fully_on_disk()) { > end_dump: > raft.is_write_in_progress = false; > - ev_check_stop(loop, watcher); > /* > * The state machine is stable. Can see now, to what state to > * go. > @@ -583,6 +585,25 @@ end_dump: > raft_broadcast(&req); > } > > +static int > +raft_worker_f(va_list args) > +{ > + (void)args; > + while (!fiber_is_cancelled()) { > + if (!raft.is_write_in_progress) > + goto idle; > + raft_worker_handle_io(); > + if (!raft.is_write_in_progress) > + goto idle; > + fiber_sleep(0); > + continue; > + idle: > + assert(raft_is_fully_on_disk()); > + fiber_yield(); > + } > + return 0; > +} > + > static void > raft_sm_pause_and_dump(void) > { > @@ -590,8 +611,10 @@ raft_sm_pause_and_dump(void) > if (raft.is_write_in_progress) > return; > ev_timer_stop(loop(), &raft.timer); > - ev_check_start(loop(), &raft.io); > raft.is_write_in_progress = true; > + if (raft.worker == NULL) > + raft.worker = fiber_new("raft_worker", raft_worker_f); > + fiber_wakeup(raft.worker); > } > > static void > @@ -620,7 +643,6 @@ raft_sm_schedule_new_election(void) > { > assert(raft_is_fully_on_disk()); > assert(raft.is_candidate); > - assert(raft.leader == 0); > /* Everyone is a follower until its vote for self is persisted. */ > raft_sm_schedule_new_term(raft.term + 1); > raft_sm_schedule_new_vote(instance_id); > @@ -641,20 +663,19 @@ static void > raft_sm_wait_leader_dead(void) > { > assert(!ev_is_active(&raft.timer)); > - assert(!ev_is_active(&raft.io)); > assert(!raft.is_write_in_progress); > assert(raft.is_candidate); > assert(raft.state == RAFT_STATE_FOLLOWER); > assert(raft.leader != 0); > double death_timeout = replication_disconnect_timeout(); > ev_timer_set(&raft.timer, death_timeout, death_timeout); > + ev_timer_start(loop(), &raft.timer); > } > > static void > raft_sm_wait_election_end(void) > { > assert(!ev_is_active(&raft.timer)); > - assert(!ev_is_active(&raft.io)); > assert(!raft.is_write_in_progress); > assert(raft.is_candidate); > assert(raft.state == RAFT_STATE_FOLLOWER || > @@ -664,13 +685,13 @@ raft_sm_wait_election_end(void) > double election_timeout = raft.election_timeout + > raft_new_random_election_shift(); > ev_timer_set(&raft.timer, election_timeout, election_timeout); > + ev_timer_start(loop(), &raft.timer); > } > > static void > raft_sm_start(void) > { > assert(!ev_is_active(&raft.timer)); > - assert(!ev_is_active(&raft.io)); > assert(!raft.is_write_in_progress); > assert(!raft.is_enabled); > assert(raft.state == RAFT_STATE_FOLLOWER); > @@ -769,12 +790,13 @@ raft_cfg_election_timeout(double timeout) > return; > > raft.election_timeout = timeout; > - if (raft.vote != 0 && raft.leader == 0) { > + if (raft.vote != 0 && raft.leader == 0 && raft.is_candidate) { > assert(ev_is_active(&raft.timer)); > double timeout = ev_timer_remaining(loop(), &raft.timer) - > raft.timer.at + raft.election_timeout; > ev_timer_stop(loop(), &raft.timer); > ev_timer_set(&raft.timer, timeout, timeout); > + ev_timer_start(loop(), &raft.timer); > } > } > > @@ -808,6 +830,7 @@ raft_cfg_death_timeout(void) > raft.timer.at + death_timeout; > ev_timer_stop(loop(), &raft.timer); > ev_timer_set(&raft.timer, timeout, timeout); > + ev_timer_start(loop(), &raft.timer); > } > } > > @@ -826,5 +849,4 @@ void > raft_init(void) > { > ev_timer_init(&raft.timer, raft_sm_schedule_new_election_cb, 0, 0); > - ev_check_init(&raft.io, raft_sm_dump_step); > } > diff --git a/src/box/raft.h b/src/box/raft.h > index 111a9c16e..23aedfe10 100644 > --- a/src/box/raft.h > +++ b/src/box/raft.h > @@ -65,6 +65,7 @@ extern "C" { > * than the configured one. See more details in the code. > */ > > +struct fiber; > struct raft_request; > struct vclock; > > @@ -138,10 +139,8 @@ struct raft { > int vote_count; > /** State machine timed event trigger. */ > struct ev_timer timer; > - /** > - * Dump of Raft state in the end of event loop, when it is changed. > - */ > - struct ev_check io; > + /** Worker fiber to execute blocking tasks like IO. */ > + struct fiber *worker; > /** Configured election timeout in seconds. */ > double election_timeout; > }; LGTM -- Serge Petrenko