[Tarantool-patches] [PATCH v3 1/3] gc/xlog: delay xlog cleanup until relays are subscribed
Serge Petrenko
sergepetrenko at tarantool.org
Wed Mar 24 16:09:56 MSK 2021
Hi! Thanks for the patch! Please see my 2 comments below.
23.03.2021 18:47, Cyrill Gorcunov пишет:
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -771,6 +771,19 @@ box_check_wal_queue_max_size(void)
> return size;
> }
>
> +static double
> +box_check_wal_cleanup_delay(void)
> +{
> + double value = cfg_getd("wal_cleanup_delay");
> + if (value < 0) {
> + diag_set(ClientError, ER_CFG, "wal_cleanup_delay",
> + "the value must be >= 0");
> + return -1;
> + }
> +
> + return value;
> +}
> +
> static void
> box_check_readahead(int readahead)
> {
> @@ -918,6 +931,8 @@ box_check_config(void)
> box_check_wal_mode(cfg_gets("wal_mode"));
> if (box_check_wal_queue_max_size() < 0)
> diag_raise();
> + if (box_check_wal_cleanup_delay() < 0)
> + diag_raise();
> if (box_check_memory_quota("memtx_memory") < 0)
> diag_raise();
> box_check_memtx_min_tuple_size(cfg_geti64("memtx_min_tuple_size"));
> @@ -1465,6 +1480,23 @@ box_set_wal_queue_max_size(void)
> return 0;
> }
>
> +int
> +box_set_wal_cleanup_delay(void)
> +{
> + double delay = box_check_wal_cleanup_delay();
> + if (delay < 0)
> + return -1;
> + /*
> + * Anonymous replicas do not require
> + * delay the cleanup procedure since they
> + * are read only.
> + */
> + if (replication_anon)
> + delay = 0;
> + gc_set_wal_cleanup_delay(delay);
> + return 0;
> +}
> +
> void
> box_set_vinyl_memory(void)
> {
> @@ -3000,7 +3032,7 @@ box_cfg_xc(void)
> rmean_box = rmean_new(iproto_type_strs, IPROTO_TYPE_STAT_MAX);
> rmean_error = rmean_new(rmean_error_strings, RMEAN_ERROR_LAST);
>
> - gc_init();
> + gc_init(box_check_wal_cleanup_delay());
You didn't put `wal_cleanup_delay` to `dynamic_cfg_skip_at_load`,
and that's correct because we need to disable it if replication_anon is set.
So wal_cleanup_delay will be reapplied once box_cfg exits.
I propose to init gc with TIMEOUT_INFINITY then. It'd look simpler than
setting the same value twice IMO.
> diff --git a/src/box/gc.c b/src/box/gc.c
> index 9af4ef958..e1d7a1187 100644
> --- a/src/box/gc.c
> +++ b/src/box/gc.c
> @@ -102,11 +102,18 @@ gc_checkpoint_delete(struct gc_checkpoint *checkpoint)
> }
>
> void
> -gc_init(void)
> +gc_init(double wal_cleanup_delay)
> {
> /* Don't delete any files until recovery is complete. */
> gc.min_checkpoint_count = INT_MAX;
>
> + gc.wal_cleanup_delay = wal_cleanup_delay;
> + gc.is_paused = wal_cleanup_delay > 0;
> + gc.delay_ref = 0;
> +
> + if (gc.is_paused)
> + say_info("wal/engine cleanup is paused");
> +
> vclock_create(&gc.vclock);
> rlist_create(&gc.checkpoints);
> gc_tree_new(&gc.consumers);
> @@ -238,6 +245,39 @@ static int
> gc_cleanup_fiber_f(va_list ap)
> {
> (void)ap;
> +
> + /*
> + * Stage 1 (optional): in case if we're booting
> + * up with cleanup disabled lets do wait in a
> + * separate cycle to minimize branching on stage 2.
> + */
> + if (gc.is_paused) {
> + double start_time = fiber_clock();
> + while (!fiber_is_cancelled()) {
> + double deadline = start_time + gc.wal_cleanup_delay;
> + double timeout = gc.wal_cleanup_delay;
> +
> + if (fiber_clock() >= deadline ||
> + fiber_yield_timeout(timeout)) {
> + say_info("wal/engine cleanup is resumed "
> + "due to timeout expiration");
> + gc.is_paused = false;
> + gc.delay_ref = 0;
> + break;
> + }
> +
> + /*
> + * If a last reference is dropped
> + * we can exit out early.
> + */
> + if (!gc.is_paused)
> + break;
> + }
> + }
> +
> + /*
> + * Stage 2: a regular cleanup cycle.
> + */
> while (!fiber_is_cancelled()) {
> int64_t delta = gc.cleanup_scheduled - gc.cleanup_completed;
> if (delta == 0) {
> @@ -253,6 +293,43 @@ gc_cleanup_fiber_f(va_list ap)
> return 0;
> }
>
> +void
> +gc_set_wal_cleanup_delay(double wal_cleanup_delay)
> +{
> + gc.wal_cleanup_delay = wal_cleanup_delay;
> + /*
> + * This routine may be called at arbitrary
> + * moment thus we must be sure the cleanup
> + * fiber is paused to not wake up it when
> + * it is already in a regular cleanup stage.
> + */
> + if (gc.is_paused)
> + fiber_wakeup(gc.cleanup_fiber);
> +}
> +
> +void
> +gc_delay_ref(void)
> +{
> + if (gc.is_paused) {
> + assert(gc.delay_ref >= 0);
> + gc.delay_ref++;
> + }
> +}
> +
> +void
> +gc_delay_unref(void)
> +{
> + if (gc.is_paused) {
> + assert(gc.delay_ref > 0);
> + gc.delay_ref--;
> + if (gc.delay_ref == 0) {
> + say_info("wal/engine cleanup is resumed");
> + gc.is_paused = false;
> + fiber_wakeup(gc.cleanup_fiber);
I'd move the info message to the cleanup fiber.
You may deduce reason for the resume there: timeout/replicas connected
and print it.
Or don't show reason for resume at all and leave a single info message.
I don't insist on you doing this though, feel free to ignore.
--
Serge Petrenko
More information about the Tarantool-patches
mailing list