From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@dev.tarantool.org, sergepetrenko@tarantool.org,
gorcunov@gmail.com
Subject: [Tarantool-patches] [PATCH v2 11/11] [tosquash] raft: a swarm of minor fixes
Date: Thu, 10 Sep 2020 01:16:52 +0200 [thread overview]
Message-ID: <4899ce311ec02592ceb6fe2dd1d6e099df6e7c51.1599693319.git.v.shpilevoy@tarantool.org> (raw)
In-Reply-To: <cover.1599693319.git.v.shpilevoy@tarantool.org>
Major change - ev_check/ev_prepare didn't work. They are invoked
from a context, where yields are not possible. To a blocking WAL
write can't be done there. And an async WAL write requires too
many changes.
This patch adds a worker fiber to Raft to perform state dumps.
Other changes are fixes of typos and of minor bugs.
---
src/box/raft.c | 58 ++++++++++++++++++++++++++++++++++----------------
src/box/raft.h | 7 +++---
2 files changed, 43 insertions(+), 22 deletions(-)
diff --git a/src/box/raft.c b/src/box/raft.c
index b01e65ced..e4e0b037c 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -64,6 +64,7 @@ struct raft raft = {
.vote = 0,
.vote_mask = 0,
.vote_count = 0,
+ .worker = NULL,
.election_timeout = 5,
};
@@ -250,13 +251,6 @@ raft_sm_schedule_new_election_cb(struct ev_loop *loop, struct ev_timer *timer,
static void
raft_sm_pause_and_dump(void);
-/**
- * Flush Raft state changes to WAL. The callback resets itself, if during the
- * write more changes appear.
- */
-static void
-raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events);
-
void
raft_process_recovery(const struct raft_request *req)
{
@@ -348,6 +342,7 @@ raft_process_msg(const struct raft_request *req, uint32_t source)
break;
raft.state = RAFT_STATE_LEADER;
raft.leader = instance_id;
+ ev_timer_stop(loop(), &raft.timer);
break;
default:
unreachable();
@@ -374,6 +369,10 @@ raft_process_msg(const struct raft_request *req, uint32_t source)
/* New leader was elected. */
raft.state = RAFT_STATE_FOLLOWER;
raft.leader = source;
+ if (!raft.is_write_in_progress) {
+ ev_timer_stop(loop(), &raft.timer);
+ raft_sm_wait_leader_dead();
+ }
end:
if (raft.state != old_state) {
/*
@@ -406,6 +405,12 @@ raft_process_heartbeat(uint32_t source)
/* Not interested in heartbeats from not a leader. */
if (raft.leader != source)
return;
+ /*
+ * The instance currently is busy with writing something on disk. Can't
+ * react to heartbeats.
+ */
+ if (raft.is_write_in_progress)
+ return;
/*
* XXX: it may be expensive to reset the timer like that. It may be less
* expensive to let the timer work, and remember last timestamp when
@@ -473,10 +478,8 @@ fail:
}
static void
-raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events)
+raft_worker_handle_io(void)
{
- assert(watcher == &raft.io);
- (void) events;
assert(raft.is_write_in_progress);
/* During write Raft can't be anything but a follower. */
assert(raft.state == RAFT_STATE_FOLLOWER);
@@ -488,7 +491,6 @@ raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events)
if (raft_is_fully_on_disk()) {
end_dump:
raft.is_write_in_progress = false;
- ev_check_stop(loop, watcher);
/*
* The state machine is stable. Can see now, to what state to
* go.
@@ -583,6 +585,25 @@ end_dump:
raft_broadcast(&req);
}
+static int
+raft_worker_f(va_list args)
+{
+ (void)args;
+ while (!fiber_is_cancelled()) {
+ if (!raft.is_write_in_progress)
+ goto idle;
+ raft_worker_handle_io();
+ if (!raft.is_write_in_progress)
+ goto idle;
+ fiber_sleep(0);
+ continue;
+ idle:
+ assert(raft_is_fully_on_disk());
+ fiber_yield();
+ }
+ return 0;
+}
+
static void
raft_sm_pause_and_dump(void)
{
@@ -590,8 +611,10 @@ raft_sm_pause_and_dump(void)
if (raft.is_write_in_progress)
return;
ev_timer_stop(loop(), &raft.timer);
- ev_check_start(loop(), &raft.io);
raft.is_write_in_progress = true;
+ if (raft.worker == NULL)
+ raft.worker = fiber_new("raft_worker", raft_worker_f);
+ fiber_wakeup(raft.worker);
}
static void
@@ -620,7 +643,6 @@ raft_sm_schedule_new_election(void)
{
assert(raft_is_fully_on_disk());
assert(raft.is_candidate);
- assert(raft.leader == 0);
/* Everyone is a follower until its vote for self is persisted. */
raft_sm_schedule_new_term(raft.term + 1);
raft_sm_schedule_new_vote(instance_id);
@@ -641,20 +663,19 @@ static void
raft_sm_wait_leader_dead(void)
{
assert(!ev_is_active(&raft.timer));
- assert(!ev_is_active(&raft.io));
assert(!raft.is_write_in_progress);
assert(raft.is_candidate);
assert(raft.state == RAFT_STATE_FOLLOWER);
assert(raft.leader != 0);
double death_timeout = replication_disconnect_timeout();
ev_timer_set(&raft.timer, death_timeout, death_timeout);
+ ev_timer_start(loop(), &raft.timer);
}
static void
raft_sm_wait_election_end(void)
{
assert(!ev_is_active(&raft.timer));
- assert(!ev_is_active(&raft.io));
assert(!raft.is_write_in_progress);
assert(raft.is_candidate);
assert(raft.state == RAFT_STATE_FOLLOWER ||
@@ -664,13 +685,13 @@ raft_sm_wait_election_end(void)
double election_timeout = raft.election_timeout +
raft_new_random_election_shift();
ev_timer_set(&raft.timer, election_timeout, election_timeout);
+ ev_timer_start(loop(), &raft.timer);
}
static void
raft_sm_start(void)
{
assert(!ev_is_active(&raft.timer));
- assert(!ev_is_active(&raft.io));
assert(!raft.is_write_in_progress);
assert(!raft.is_enabled);
assert(raft.state == RAFT_STATE_FOLLOWER);
@@ -769,12 +790,13 @@ raft_cfg_election_timeout(double timeout)
return;
raft.election_timeout = timeout;
- if (raft.vote != 0 && raft.leader == 0) {
+ if (raft.vote != 0 && raft.leader == 0 && raft.is_candidate) {
assert(ev_is_active(&raft.timer));
double timeout = ev_timer_remaining(loop(), &raft.timer) -
raft.timer.at + raft.election_timeout;
ev_timer_stop(loop(), &raft.timer);
ev_timer_set(&raft.timer, timeout, timeout);
+ ev_timer_start(loop(), &raft.timer);
}
}
@@ -808,6 +830,7 @@ raft_cfg_death_timeout(void)
raft.timer.at + death_timeout;
ev_timer_stop(loop(), &raft.timer);
ev_timer_set(&raft.timer, timeout, timeout);
+ ev_timer_start(loop(), &raft.timer);
}
}
@@ -826,5 +849,4 @@ void
raft_init(void)
{
ev_timer_init(&raft.timer, raft_sm_schedule_new_election_cb, 0, 0);
- ev_check_init(&raft.io, raft_sm_dump_step);
}
diff --git a/src/box/raft.h b/src/box/raft.h
index 111a9c16e..23aedfe10 100644
--- a/src/box/raft.h
+++ b/src/box/raft.h
@@ -65,6 +65,7 @@ extern "C" {
* than the configured one. See more details in the code.
*/
+struct fiber;
struct raft_request;
struct vclock;
@@ -138,10 +139,8 @@ struct raft {
int vote_count;
/** State machine timed event trigger. */
struct ev_timer timer;
- /**
- * Dump of Raft state in the end of event loop, when it is changed.
- */
- struct ev_check io;
+ /** Worker fiber to execute blocking tasks like IO. */
+ struct fiber *worker;
/** Configured election timeout in seconds. */
double election_timeout;
};
--
2.21.1 (Apple Git-122.3)
next prev parent reply other threads:[~2020-09-09 23:17 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-09 23:16 [Tarantool-patches] [PATCH v2 00/11] dRaft Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 01/11] applier: store instance_id in struct applier Vladislav Shpilevoy
2020-09-14 9:38 ` Serge Petrenko
2020-09-19 15:44 ` Vladislav Shpilevoy
2020-09-21 6:23 ` Serge Petrenko
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 10/11] raft: introduce box.info.raft Vladislav Shpilevoy
2020-09-14 9:42 ` Serge Petrenko
2020-09-09 23:16 ` Vladislav Shpilevoy [this message]
2020-09-14 10:13 ` [Tarantool-patches] [PATCH v2 11/11] [tosquash] raft: a swarm of minor fixes Serge Petrenko
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 02/11] box: introduce summary RO flag Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 03/11] wal: don't touch box.cfg.wal_dir more than once Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 04/11] replication: track registered replica count Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 05/11] [wip] box: do not register outgoing connections Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 06/11] raft: introduce persistent raft state Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 07/11] raft: introduce box.cfg.raft_* options Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 08/11] raft: relay status updates to followers Vladislav Shpilevoy
2020-09-20 17:17 ` Vladislav Shpilevoy
2020-09-21 7:13 ` Serge Petrenko
2020-09-21 10:50 ` Serge Petrenko
2020-09-21 22:47 ` Vladislav Shpilevoy
2020-09-22 8:48 ` Serge Petrenko
2020-09-21 22:47 ` Vladislav Shpilevoy
2020-09-22 8:47 ` Serge Petrenko
2020-09-09 23:17 ` [Tarantool-patches] [PATCH v2 09/11] raft: introduce state machine Vladislav Shpilevoy
2020-09-19 15:49 ` Vladislav Shpilevoy
2020-09-19 15:50 ` Vladislav Shpilevoy
2020-09-21 8:20 ` Serge Petrenko
2020-09-21 8:22 ` Serge Petrenko
2020-09-21 8:34 ` Serge Petrenko
2020-09-21 22:47 ` Vladislav Shpilevoy
2020-09-22 8:49 ` Serge Petrenko
2020-09-22 22:48 ` Vladislav Shpilevoy
2020-09-23 9:59 ` Serge Petrenko
2020-09-23 20:31 ` Vladislav Shpilevoy
2020-09-24 9:34 ` Serge Petrenko
2020-09-19 15:58 ` [Tarantool-patches] [PATCH v2 12/11] dRaft Vladislav Shpilevoy
2020-09-19 15:59 ` Vladislav Shpilevoy
2020-09-21 7:24 ` Serge Petrenko
2020-09-21 22:48 ` [Tarantool-patches] [PATCH v2 12/11] raft: add tests Vladislav Shpilevoy
2020-09-30 10:56 ` [Tarantool-patches] [PATCH v2 00/11] dRaft Kirill Yukhin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4899ce311ec02592ceb6fe2dd1d6e099df6e7c51.1599693319.git.v.shpilevoy@tarantool.org \
--to=v.shpilevoy@tarantool.org \
--cc=gorcunov@gmail.com \
--cc=sergepetrenko@tarantool.org \
--cc=tarantool-patches@dev.tarantool.org \
--subject='Re: [Tarantool-patches] [PATCH v2 11/11] [tosquash] raft: a swarm of minor fixes' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox