Tarantool development patches archive
 help / color / mirror / Atom feed
From: Serge Petrenko via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Cyrill Gorcunov <gorcunov@gmail.com>,
	tml <tarantool-patches@dev.tarantool.org>
Cc: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Subject: Re: [Tarantool-patches] [PATCH v3 1/3] gc/xlog: delay xlog cleanup until relays are subscribed
Date: Wed, 24 Mar 2021 16:09:56 +0300	[thread overview]
Message-ID: <1498cc0d-1a3a-619b-8cde-d484eca81758@tarantool.org> (raw)
In-Reply-To: <20210323154710.1696442-2-gorcunov@gmail.com>

Hi! Thanks for the patch! Please see my 2 comments below.


23.03.2021 18:47, Cyrill Gorcunov пишет:
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -771,6 +771,19 @@ box_check_wal_queue_max_size(void)
>   	return size;
>   }
>   
> +static double
> +box_check_wal_cleanup_delay(void)
> +{
> +	double value = cfg_getd("wal_cleanup_delay");
> +	if (value < 0) {
> +		diag_set(ClientError, ER_CFG, "wal_cleanup_delay",
> +			 "the value must be >= 0");
> +		return -1;
> +	}
> +
> +	return value;
> +}
> +
>   static void
>   box_check_readahead(int readahead)
>   {
> @@ -918,6 +931,8 @@ box_check_config(void)
>   	box_check_wal_mode(cfg_gets("wal_mode"));
>   	if (box_check_wal_queue_max_size() < 0)
>   		diag_raise();
> +	if (box_check_wal_cleanup_delay() < 0)
> +		diag_raise();
>   	if (box_check_memory_quota("memtx_memory") < 0)
>   		diag_raise();
>   	box_check_memtx_min_tuple_size(cfg_geti64("memtx_min_tuple_size"));
> @@ -1465,6 +1480,23 @@ box_set_wal_queue_max_size(void)
>   	return 0;
>   }
>   
> +int
> +box_set_wal_cleanup_delay(void)
> +{
> +	double delay = box_check_wal_cleanup_delay();
> +	if (delay < 0)
> +		return -1;
> +	/*
> +	 * Anonymous replicas do not require
> +	 * delay the cleanup procedure since they
> +	 * are read only.
> +	 */
> +	if (replication_anon)
> +		delay = 0;
> +	gc_set_wal_cleanup_delay(delay);
> +	return 0;
> +}
> +
>   void
>   box_set_vinyl_memory(void)
>   {
> @@ -3000,7 +3032,7 @@ box_cfg_xc(void)
>   	rmean_box = rmean_new(iproto_type_strs, IPROTO_TYPE_STAT_MAX);
>   	rmean_error = rmean_new(rmean_error_strings, RMEAN_ERROR_LAST);
>   
> -	gc_init();
> +	gc_init(box_check_wal_cleanup_delay());

You didn't  put `wal_cleanup_delay` to `dynamic_cfg_skip_at_load`,
and that's correct because we need to disable it if replication_anon is set.

So wal_cleanup_delay will be reapplied once box_cfg exits.

I propose to init gc with TIMEOUT_INFINITY then. It'd look simpler than
setting the same value twice IMO.

> diff --git a/src/box/gc.c b/src/box/gc.c
> index 9af4ef958..e1d7a1187 100644
> --- a/src/box/gc.c
> +++ b/src/box/gc.c
> @@ -102,11 +102,18 @@ gc_checkpoint_delete(struct gc_checkpoint *checkpoint)
>   }
>   
>   void
> -gc_init(void)
> +gc_init(double wal_cleanup_delay)
>   {
>   	/* Don't delete any files until recovery is complete. */
>   	gc.min_checkpoint_count = INT_MAX;
>   
> +	gc.wal_cleanup_delay = wal_cleanup_delay;
> +	gc.is_paused = wal_cleanup_delay > 0;
> +	gc.delay_ref = 0;
> +
> +	if (gc.is_paused)
> +		say_info("wal/engine cleanup is paused");
> +
>   	vclock_create(&gc.vclock);
>   	rlist_create(&gc.checkpoints);
>   	gc_tree_new(&gc.consumers);
> @@ -238,6 +245,39 @@ static int
>   gc_cleanup_fiber_f(va_list ap)
>   {
>   	(void)ap;
> +
> +	/*
> +	 * Stage 1 (optional): in case if we're booting
> +	 * up with cleanup disabled lets do wait in a
> +	 * separate cycle to minimize branching on stage 2.
> +	 */
> +	if (gc.is_paused) {
> +		double start_time = fiber_clock();
> +		while (!fiber_is_cancelled()) {
> +			double deadline = start_time + gc.wal_cleanup_delay;
> +			double timeout = gc.wal_cleanup_delay;
> +
> +			if (fiber_clock() >= deadline ||
> +			    fiber_yield_timeout(timeout)) {
> +				say_info("wal/engine cleanup is resumed "
> +					 "due to timeout expiration");
> +				gc.is_paused = false;
> +				gc.delay_ref = 0;
> +				break;
> +			}
> +
> +			/*
> +			 * If a last reference is dropped
> +			 * we can exit out early.
> +			 */
> +			if (!gc.is_paused)
> +				break;
> +		}
> +	}
> +
> +	/*
> +	 * Stage 2: a regular cleanup cycle.
> +	 */
>   	while (!fiber_is_cancelled()) {
>   		int64_t delta = gc.cleanup_scheduled - gc.cleanup_completed;
>   		if (delta == 0) {
> @@ -253,6 +293,43 @@ gc_cleanup_fiber_f(va_list ap)
>   	return 0;
>   }
>   
> +void
> +gc_set_wal_cleanup_delay(double wal_cleanup_delay)
> +{
> +	gc.wal_cleanup_delay = wal_cleanup_delay;
> +	/*
> +	 * This routine may be called at arbitrary
> +	 * moment thus we must be sure the cleanup
> +	 * fiber is paused to not wake up it when
> +	 * it is already in a regular cleanup stage.
> +	 */
> +	if (gc.is_paused)
> +		fiber_wakeup(gc.cleanup_fiber);
> +}
> +
> +void
> +gc_delay_ref(void)
> +{
> +	if (gc.is_paused) {
> +		assert(gc.delay_ref >= 0);
> +		gc.delay_ref++;
> +	}
> +}
> +
> +void
> +gc_delay_unref(void)
> +{
> +	if (gc.is_paused) {
> +		assert(gc.delay_ref > 0);
> +		gc.delay_ref--;
> +		if (gc.delay_ref == 0) {
> +			say_info("wal/engine cleanup is resumed");
> +			gc.is_paused = false;
> +			fiber_wakeup(gc.cleanup_fiber);

I'd move the info message to the cleanup fiber.
You may deduce reason for the resume there: timeout/replicas connected 
and print it.
Or don't show reason for resume at all and leave a single info message.

I don't insist on you doing this though, feel free to ignore.

-- 
Serge Petrenko


  reply	other threads:[~2021-03-24 13:09 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-23 15:47 [Tarantool-patches] [PATCH v3 0/3] " Cyrill Gorcunov via Tarantool-patches
2021-03-23 15:47 ` [Tarantool-patches] [PATCH v3 1/3] " Cyrill Gorcunov via Tarantool-patches
2021-03-24 13:09   ` Serge Petrenko via Tarantool-patches [this message]
2021-03-24 15:00     ` Cyrill Gorcunov via Tarantool-patches
2021-03-25  7:12       ` Serge Petrenko via Tarantool-patches
2021-03-23 15:47 ` [Tarantool-patches] [PATCH v3 2/3] test: add a test for wal_cleanup_delay option Cyrill Gorcunov via Tarantool-patches
2021-03-24 13:20   ` Serge Petrenko via Tarantool-patches
2021-03-23 15:47 ` [Tarantool-patches] [PATCH v3 3/3] test: box-tap/gc -- add test for is_paused field Cyrill Gorcunov via Tarantool-patches
2021-03-24 13:27   ` Serge Petrenko via Tarantool-patches
2021-03-23 18:33 ` [Tarantool-patches] [PATCH v3 0/3] gc/xlog: delay xlog cleanup until relays are subscribed Cyrill Gorcunov via Tarantool-patches
2021-03-23 19:07   ` Cyrill Gorcunov via Tarantool-patches
2021-03-24 12:47 ` Serge Petrenko via Tarantool-patches
2021-03-24 14:42   ` Cyrill Gorcunov via Tarantool-patches
2021-03-25  7:13     ` Serge Petrenko via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1498cc0d-1a3a-619b-8cde-d484eca81758@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=gorcunov@gmail.com \
    --cc=sergepetrenko@tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v3 1/3] gc/xlog: delay xlog cleanup until relays are subscribed' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox