[Tarantool-patches] [PATCH v3 1/3] gc/xlog: delay xlog cleanup until relays are subscribed

Serge Petrenko sergepetrenko at tarantool.org
Wed Mar 24 16:09:56 MSK 2021


Hi! Thanks for the patch! Please see my 2 comments below.


23.03.2021 18:47, Cyrill Gorcunov пишет:
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -771,6 +771,19 @@ box_check_wal_queue_max_size(void)
>   	return size;
>   }
>   
> +static double
> +box_check_wal_cleanup_delay(void)
> +{
> +	double value = cfg_getd("wal_cleanup_delay");
> +	if (value < 0) {
> +		diag_set(ClientError, ER_CFG, "wal_cleanup_delay",
> +			 "the value must be >= 0");
> +		return -1;
> +	}
> +
> +	return value;
> +}
> +
>   static void
>   box_check_readahead(int readahead)
>   {
> @@ -918,6 +931,8 @@ box_check_config(void)
>   	box_check_wal_mode(cfg_gets("wal_mode"));
>   	if (box_check_wal_queue_max_size() < 0)
>   		diag_raise();
> +	if (box_check_wal_cleanup_delay() < 0)
> +		diag_raise();
>   	if (box_check_memory_quota("memtx_memory") < 0)
>   		diag_raise();
>   	box_check_memtx_min_tuple_size(cfg_geti64("memtx_min_tuple_size"));
> @@ -1465,6 +1480,23 @@ box_set_wal_queue_max_size(void)
>   	return 0;
>   }
>   
> +int
> +box_set_wal_cleanup_delay(void)
> +{
> +	double delay = box_check_wal_cleanup_delay();
> +	if (delay < 0)
> +		return -1;
> +	/*
> +	 * Anonymous replicas do not require
> +	 * delay the cleanup procedure since they
> +	 * are read only.
> +	 */
> +	if (replication_anon)
> +		delay = 0;
> +	gc_set_wal_cleanup_delay(delay);
> +	return 0;
> +}
> +
>   void
>   box_set_vinyl_memory(void)
>   {
> @@ -3000,7 +3032,7 @@ box_cfg_xc(void)
>   	rmean_box = rmean_new(iproto_type_strs, IPROTO_TYPE_STAT_MAX);
>   	rmean_error = rmean_new(rmean_error_strings, RMEAN_ERROR_LAST);
>   
> -	gc_init();
> +	gc_init(box_check_wal_cleanup_delay());

You didn't  put `wal_cleanup_delay` to `dynamic_cfg_skip_at_load`,
and that's correct because we need to disable it if replication_anon is set.

So wal_cleanup_delay will be reapplied once box_cfg exits.

I propose to init gc with TIMEOUT_INFINITY then. It'd look simpler than
setting the same value twice IMO.

> diff --git a/src/box/gc.c b/src/box/gc.c
> index 9af4ef958..e1d7a1187 100644
> --- a/src/box/gc.c
> +++ b/src/box/gc.c
> @@ -102,11 +102,18 @@ gc_checkpoint_delete(struct gc_checkpoint *checkpoint)
>   }
>   
>   void
> -gc_init(void)
> +gc_init(double wal_cleanup_delay)
>   {
>   	/* Don't delete any files until recovery is complete. */
>   	gc.min_checkpoint_count = INT_MAX;
>   
> +	gc.wal_cleanup_delay = wal_cleanup_delay;
> +	gc.is_paused = wal_cleanup_delay > 0;
> +	gc.delay_ref = 0;
> +
> +	if (gc.is_paused)
> +		say_info("wal/engine cleanup is paused");
> +
>   	vclock_create(&gc.vclock);
>   	rlist_create(&gc.checkpoints);
>   	gc_tree_new(&gc.consumers);
> @@ -238,6 +245,39 @@ static int
>   gc_cleanup_fiber_f(va_list ap)
>   {
>   	(void)ap;
> +
> +	/*
> +	 * Stage 1 (optional): in case if we're booting
> +	 * up with cleanup disabled lets do wait in a
> +	 * separate cycle to minimize branching on stage 2.
> +	 */
> +	if (gc.is_paused) {
> +		double start_time = fiber_clock();
> +		while (!fiber_is_cancelled()) {
> +			double deadline = start_time + gc.wal_cleanup_delay;
> +			double timeout = gc.wal_cleanup_delay;
> +
> +			if (fiber_clock() >= deadline ||
> +			    fiber_yield_timeout(timeout)) {
> +				say_info("wal/engine cleanup is resumed "
> +					 "due to timeout expiration");
> +				gc.is_paused = false;
> +				gc.delay_ref = 0;
> +				break;
> +			}
> +
> +			/*
> +			 * If a last reference is dropped
> +			 * we can exit out early.
> +			 */
> +			if (!gc.is_paused)
> +				break;
> +		}
> +	}
> +
> +	/*
> +	 * Stage 2: a regular cleanup cycle.
> +	 */
>   	while (!fiber_is_cancelled()) {
>   		int64_t delta = gc.cleanup_scheduled - gc.cleanup_completed;
>   		if (delta == 0) {
> @@ -253,6 +293,43 @@ gc_cleanup_fiber_f(va_list ap)
>   	return 0;
>   }
>   
> +void
> +gc_set_wal_cleanup_delay(double wal_cleanup_delay)
> +{
> +	gc.wal_cleanup_delay = wal_cleanup_delay;
> +	/*
> +	 * This routine may be called at arbitrary
> +	 * moment thus we must be sure the cleanup
> +	 * fiber is paused to not wake up it when
> +	 * it is already in a regular cleanup stage.
> +	 */
> +	if (gc.is_paused)
> +		fiber_wakeup(gc.cleanup_fiber);
> +}
> +
> +void
> +gc_delay_ref(void)
> +{
> +	if (gc.is_paused) {
> +		assert(gc.delay_ref >= 0);
> +		gc.delay_ref++;
> +	}
> +}
> +
> +void
> +gc_delay_unref(void)
> +{
> +	if (gc.is_paused) {
> +		assert(gc.delay_ref > 0);
> +		gc.delay_ref--;
> +		if (gc.delay_ref == 0) {
> +			say_info("wal/engine cleanup is resumed");
> +			gc.is_paused = false;
> +			fiber_wakeup(gc.cleanup_fiber);

I'd move the info message to the cleanup fiber.
You may deduce reason for the resume there: timeout/replicas connected 
and print it.
Or don't show reason for resume at all and leave a single info message.

I don't insist on you doing this though, feel free to ignore.

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list