Tarantool development patches archive
 help / color / mirror / Atom feed
From: Serge Petrenko <sergepetrenko@tarantool.org>
To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>,
	tarantool-patches@dev.tarantool.org, gorcunov@gmail.com
Subject: Re: [Tarantool-patches] [PATCH v2 11/11] [tosquash] raft: a swarm of minor fixes
Date: Mon, 14 Sep 2020 13:13:37 +0300	[thread overview]
Message-ID: <4ae8634e-4331-9cab-ebf6-7b835e9a2472@tarantool.org> (raw)
In-Reply-To: <4899ce311ec02592ceb6fe2dd1d6e099df6e7c51.1599693319.git.v.shpilevoy@tarantool.org>


10.09.2020 02:16, Vladislav Shpilevoy пишет:
> Major change - ev_check/ev_prepare didn't work. They are invoked
> from a context, where yields are not possible. To a blocking WAL
> write can't be done there. And an async WAL write requires too
> many changes.
>
> This patch adds a worker fiber to Raft to perform state dumps.
>
> Other changes are fixes of typos and of minor bugs.
> ---
>   src/box/raft.c | 58 ++++++++++++++++++++++++++++++++++----------------
>   src/box/raft.h |  7 +++---
>   2 files changed, 43 insertions(+), 22 deletions(-)
>
> diff --git a/src/box/raft.c b/src/box/raft.c
> index b01e65ced..e4e0b037c 100644
> --- a/src/box/raft.c
> +++ b/src/box/raft.c
> @@ -64,6 +64,7 @@ struct raft raft = {
>   	.vote = 0,
>   	.vote_mask = 0,
>   	.vote_count = 0,
> +	.worker = NULL,
>   	.election_timeout = 5,
>   };
>   
> @@ -250,13 +251,6 @@ raft_sm_schedule_new_election_cb(struct ev_loop *loop, struct ev_timer *timer,
>   static void
>   raft_sm_pause_and_dump(void);
>   
> -/**
> - * Flush Raft state changes to WAL. The callback resets itself, if during the
> - * write more changes appear.
> - */
> -static void
> -raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events);
> -
>   void
>   raft_process_recovery(const struct raft_request *req)
>   {
> @@ -348,6 +342,7 @@ raft_process_msg(const struct raft_request *req, uint32_t source)
>   				break;
>   			raft.state = RAFT_STATE_LEADER;
>   			raft.leader = instance_id;
> +			ev_timer_stop(loop(), &raft.timer);
>   			break;
>   		default:
>   			unreachable();
> @@ -374,6 +369,10 @@ raft_process_msg(const struct raft_request *req, uint32_t source)
>   	/* New leader was elected. */
>   	raft.state = RAFT_STATE_FOLLOWER;
>   	raft.leader = source;
> +	if (!raft.is_write_in_progress) {
> +		ev_timer_stop(loop(), &raft.timer);
> +		raft_sm_wait_leader_dead();
> +	}
>   end:
>   	if (raft.state != old_state) {
>   		/*
> @@ -406,6 +405,12 @@ raft_process_heartbeat(uint32_t source)
>   	/* Not interested in heartbeats from not a leader. */
>   	if (raft.leader != source)
>   		return;
> +	/*
> +	 * The instance currently is busy with writing something on disk. Can't
> +	 * react to heartbeats.
> +	 */
> +	if (raft.is_write_in_progress)
> +		return;
>   	/*
>   	 * XXX: it may be expensive to reset the timer like that. It may be less
>   	 * expensive to let the timer work, and remember last timestamp when
> @@ -473,10 +478,8 @@ fail:
>   }
>   
>   static void
> -raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events)
> +raft_worker_handle_io(void)
>   {
> -	assert(watcher == &raft.io);
> -	(void) events;
>   	assert(raft.is_write_in_progress);
>   	/* During write Raft can't be anything but a follower. */
>   	assert(raft.state == RAFT_STATE_FOLLOWER);
> @@ -488,7 +491,6 @@ raft_sm_dump_step(struct ev_loop *loop, struct ev_check *watcher, int events)
>   	if (raft_is_fully_on_disk()) {
>   end_dump:
>   		raft.is_write_in_progress = false;
> -		ev_check_stop(loop, watcher);
>   		/*
>   		 * The state machine is stable. Can see now, to what state to
>   		 * go.
> @@ -583,6 +585,25 @@ end_dump:
>   		raft_broadcast(&req);
>   }
>   
> +static int
> +raft_worker_f(va_list args)
> +{
> +	(void)args;
> +	while (!fiber_is_cancelled()) {
> +		if (!raft.is_write_in_progress)
> +			goto idle;
> +		raft_worker_handle_io();
> +		if (!raft.is_write_in_progress)
> +			goto idle;
> +		fiber_sleep(0);
> +		continue;
> +	idle:
> +		assert(raft_is_fully_on_disk());
> +		fiber_yield();
> +	}
> +	return 0;
> +}
> +
>   static void
>   raft_sm_pause_and_dump(void)
>   {
> @@ -590,8 +611,10 @@ raft_sm_pause_and_dump(void)
>   	if (raft.is_write_in_progress)
>   		return;
>   	ev_timer_stop(loop(), &raft.timer);
> -	ev_check_start(loop(), &raft.io);
>   	raft.is_write_in_progress = true;
> +	if (raft.worker == NULL)
> +		raft.worker = fiber_new("raft_worker", raft_worker_f);
> +	fiber_wakeup(raft.worker);
>   }
>   
>   static void
> @@ -620,7 +643,6 @@ raft_sm_schedule_new_election(void)
>   {
>   	assert(raft_is_fully_on_disk());
>   	assert(raft.is_candidate);
> -	assert(raft.leader == 0);
>   	/* Everyone is a follower until its vote for self is persisted. */
>   	raft_sm_schedule_new_term(raft.term + 1);
>   	raft_sm_schedule_new_vote(instance_id);
> @@ -641,20 +663,19 @@ static void
>   raft_sm_wait_leader_dead(void)
>   {
>   	assert(!ev_is_active(&raft.timer));
> -	assert(!ev_is_active(&raft.io));
>   	assert(!raft.is_write_in_progress);
>   	assert(raft.is_candidate);
>   	assert(raft.state == RAFT_STATE_FOLLOWER);
>   	assert(raft.leader != 0);
>   	double death_timeout = replication_disconnect_timeout();
>   	ev_timer_set(&raft.timer, death_timeout, death_timeout);
> +	ev_timer_start(loop(), &raft.timer);
>   }
>   
>   static void
>   raft_sm_wait_election_end(void)
>   {
>   	assert(!ev_is_active(&raft.timer));
> -	assert(!ev_is_active(&raft.io));
>   	assert(!raft.is_write_in_progress);
>   	assert(raft.is_candidate);
>   	assert(raft.state == RAFT_STATE_FOLLOWER ||
> @@ -664,13 +685,13 @@ raft_sm_wait_election_end(void)
>   	double election_timeout = raft.election_timeout +
>   				  raft_new_random_election_shift();
>   	ev_timer_set(&raft.timer, election_timeout, election_timeout);
> +	ev_timer_start(loop(), &raft.timer);
>   }
>   
>   static void
>   raft_sm_start(void)
>   {
>   	assert(!ev_is_active(&raft.timer));
> -	assert(!ev_is_active(&raft.io));
>   	assert(!raft.is_write_in_progress);
>   	assert(!raft.is_enabled);
>   	assert(raft.state == RAFT_STATE_FOLLOWER);
> @@ -769,12 +790,13 @@ raft_cfg_election_timeout(double timeout)
>   		return;
>   
>   	raft.election_timeout = timeout;
> -	if (raft.vote != 0 && raft.leader == 0) {
> +	if (raft.vote != 0 && raft.leader == 0 && raft.is_candidate) {
>   		assert(ev_is_active(&raft.timer));
>   		double timeout = ev_timer_remaining(loop(), &raft.timer) -
>   				 raft.timer.at + raft.election_timeout;
>   		ev_timer_stop(loop(), &raft.timer);
>   		ev_timer_set(&raft.timer, timeout, timeout);
> +		ev_timer_start(loop(), &raft.timer);
>   	}
>   }
>   
> @@ -808,6 +830,7 @@ raft_cfg_death_timeout(void)
>   				 raft.timer.at + death_timeout;
>   		ev_timer_stop(loop(), &raft.timer);
>   		ev_timer_set(&raft.timer, timeout, timeout);
> +		ev_timer_start(loop(), &raft.timer);
>   	}
>   }
>   
> @@ -826,5 +849,4 @@ void
>   raft_init(void)
>   {
>   	ev_timer_init(&raft.timer, raft_sm_schedule_new_election_cb, 0, 0);
> -	ev_check_init(&raft.io, raft_sm_dump_step);
>   }
> diff --git a/src/box/raft.h b/src/box/raft.h
> index 111a9c16e..23aedfe10 100644
> --- a/src/box/raft.h
> +++ b/src/box/raft.h
> @@ -65,6 +65,7 @@ extern "C" {
>    * than the configured one. See more details in the code.
>    */
>   
> +struct fiber;
>   struct raft_request;
>   struct vclock;
>   
> @@ -138,10 +139,8 @@ struct raft {
>   	int vote_count;
>   	/** State machine timed event trigger. */
>   	struct ev_timer timer;
> -	/**
> -	 * Dump of Raft state in the end of event loop, when it is changed.
> -	 */
> -	struct ev_check io;
> +	/** Worker fiber to execute blocking tasks like IO. */
> +	struct fiber *worker;
>   	/** Configured election timeout in seconds. */
>   	double election_timeout;
>   };
LGTM

-- 
Serge Petrenko

  reply	other threads:[~2020-09-14 10:13 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-09 23:16 [Tarantool-patches] [PATCH v2 00/11] dRaft Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 01/11] applier: store instance_id in struct applier Vladislav Shpilevoy
2020-09-14  9:38   ` Serge Petrenko
2020-09-19 15:44   ` Vladislav Shpilevoy
2020-09-21  6:23     ` Serge Petrenko
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 10/11] raft: introduce box.info.raft Vladislav Shpilevoy
2020-09-14  9:42   ` Serge Petrenko
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 11/11] [tosquash] raft: a swarm of minor fixes Vladislav Shpilevoy
2020-09-14 10:13   ` Serge Petrenko [this message]
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 02/11] box: introduce summary RO flag Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 03/11] wal: don't touch box.cfg.wal_dir more than once Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 04/11] replication: track registered replica count Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 05/11] [wip] box: do not register outgoing connections Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 06/11] raft: introduce persistent raft state Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 07/11] raft: introduce box.cfg.raft_* options Vladislav Shpilevoy
2020-09-09 23:16 ` [Tarantool-patches] [PATCH v2 08/11] raft: relay status updates to followers Vladislav Shpilevoy
2020-09-20 17:17   ` Vladislav Shpilevoy
2020-09-21  7:13     ` Serge Petrenko
2020-09-21 10:50   ` Serge Petrenko
2020-09-21 22:47     ` Vladislav Shpilevoy
2020-09-22  8:48       ` Serge Petrenko
2020-09-21 22:47   ` Vladislav Shpilevoy
2020-09-22  8:47     ` Serge Petrenko
2020-09-09 23:17 ` [Tarantool-patches] [PATCH v2 09/11] raft: introduce state machine Vladislav Shpilevoy
2020-09-19 15:49   ` Vladislav Shpilevoy
2020-09-19 15:50     ` Vladislav Shpilevoy
2020-09-21  8:20       ` Serge Petrenko
2020-09-21  8:22     ` Serge Petrenko
2020-09-21  8:34       ` Serge Petrenko
2020-09-21 22:47     ` Vladislav Shpilevoy
2020-09-22  8:49       ` Serge Petrenko
2020-09-22 22:48   ` Vladislav Shpilevoy
2020-09-23  9:59     ` Serge Petrenko
2020-09-23 20:31       ` Vladislav Shpilevoy
2020-09-24  9:34         ` Serge Petrenko
2020-09-19 15:58 ` [Tarantool-patches] [PATCH v2 12/11] dRaft Vladislav Shpilevoy
2020-09-19 15:59   ` Vladislav Shpilevoy
2020-09-21  7:24     ` Serge Petrenko
2020-09-21 22:48   ` [Tarantool-patches] [PATCH v2 12/11] raft: add tests Vladislav Shpilevoy
2020-09-30 10:56 ` [Tarantool-patches] [PATCH v2 00/11] dRaft Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4ae8634e-4331-9cab-ebf6-7b835e9a2472@tarantool.org \
    --to=sergepetrenko@tarantool.org \
    --cc=gorcunov@gmail.com \
    --cc=tarantool-patches@dev.tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v2 11/11] [tosquash] raft: a swarm of minor fixes' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox