From: Vladimir Davydov <vdavydov.dev@gmail.com> To: kostja@tarantool.org Cc: tarantool-patches@freelists.org Subject: [PATCH 09/13] gc: cleanup garbage collection procedure Date: Thu, 4 Oct 2018 20:20:11 +0300 [thread overview] Message-ID: <554ab508361da2d4015c56712f7406182019f89a.1538671546.git.vdavydov.dev@gmail.com> (raw) In-Reply-To: <cover.1538671546.git.vdavydov.dev@gmail.com> In-Reply-To: <cover.1538671546.git.vdavydov.dev@gmail.com> Do some refactoring intended to make the code of gc_run() easier for understanding: - Remove gc_state::checkpoint_vclock. It was used to avoid rerunning engine gc callback in case no checkpoint was deleted. Since we maintain a list of all available checkpoints, we don't need it for this anymore - we can run gc only if a checkpoint was actually removed from the list. - Rename gc_state::wal_vclock back to gc_state::vclock. - Use bool variables with descriptive names instead of comparing vclock signatures. - Add some comments. --- src/box/box.cc | 2 +- src/box/gc.c | 52 +++++++++++++++++++++++++--------------------------- src/box/gc.h | 6 ++---- 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index a33b80ef..2b2d2307 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -1614,7 +1614,7 @@ box_process_vote(struct ballot *ballot) { ballot->is_ro = cfg_geti("read_only") != 0; vclock_copy(&ballot->vclock, &replicaset.vclock); - vclock_copy(&ballot->gc_vclock, &gc.wal_vclock); + vclock_copy(&ballot->gc_vclock, &gc.vclock); } /** Insert a new cluster into _schema */ diff --git a/src/box/gc.c b/src/box/gc.c index e6951f4c..eb07a3b8 100644 --- a/src/box/gc.c +++ b/src/box/gc.c @@ -80,9 +80,8 @@ gc_init(void) /* Don't delete any files until recovery is complete. */ gc.min_checkpoint_count = INT_MAX; + vclock_create(&gc.vclock); rlist_create(&gc.checkpoints); - vclock_create(&gc.wal_vclock); - vclock_create(&gc.checkpoint_vclock); gc_tree_new(&gc.consumers); latch_create(&gc.latch); } @@ -125,26 +124,23 @@ gc_tree_first_checkpoint(gc_tree_t *consumers) static void gc_run(void) { - /* Look up the consumer that uses the oldest WAL. */ - struct gc_consumer *leftmost = gc_tree_first(&gc.consumers); /* Look up the consumer that uses the oldest checkpoint. */ struct gc_consumer *leftmost_checkpoint = gc_tree_first_checkpoint(&gc.consumers); + bool run_wal_gc = false; + bool run_engine_gc = false; + /* * Find the oldest checkpoint that must be preserved. * We have to preserve @min_checkpoint_count oldest * checkpoints, plus we can't remove checkpoints that * are still in use. */ - struct vclock gc_checkpoint_vclock; - vclock_create(&gc_checkpoint_vclock); - struct gc_checkpoint *checkpoint = NULL; while (true) { checkpoint = rlist_first_entry(&gc.checkpoints, struct gc_checkpoint, in_checkpoints); - vclock_copy(&gc_checkpoint_vclock, &checkpoint->vclock); if (gc.checkpoint_count <= gc.min_checkpoint_count) break; if (leftmost_checkpoint != NULL && @@ -154,20 +150,29 @@ gc_run(void) rlist_del_entry(checkpoint, in_checkpoints); free(checkpoint); gc.checkpoint_count--; + run_engine_gc = true; } /* At least one checkpoint must always be available. */ assert(checkpoint != NULL); - struct vclock gc_wal_vclock; - if (leftmost != NULL && - vclock_sum(&leftmost->vclock) < vclock_sum(&gc_checkpoint_vclock)) - vclock_copy(&gc_wal_vclock, &leftmost->vclock); - else - vclock_copy(&gc_wal_vclock, &gc_checkpoint_vclock); + /* + * Find the vclock of the oldest WAL row to keep. + * Note, we must keep all WALs created after the + * oldest checkpoint, even if no consumer needs them. + */ + const struct vclock *vclock = (gc_tree_empty(&gc.consumers) ? NULL : + &gc_tree_first(&gc.consumers)->vclock); + if (vclock == NULL || + vclock_sum(vclock) > vclock_sum(&checkpoint->vclock)) + vclock = &checkpoint->vclock; + + if (vclock_sum(vclock) > vclock_sum(&gc.vclock)) { + vclock_copy(&gc.vclock, vclock); + run_wal_gc = true; + } - if (vclock_sum(&gc_wal_vclock) <= vclock_sum(&gc.wal_vclock) && - vclock_sum(&gc_checkpoint_vclock) <= vclock_sum(&gc.checkpoint_vclock)) + if (!run_engine_gc && !run_wal_gc) return; /* nothing to do */ /* @@ -185,17 +190,10 @@ gc_run(void) * fails - see comment to memtx_engine_collect_garbage(). */ int rc = 0; - - if (vclock_sum(&gc_checkpoint_vclock) > vclock_sum(&gc.checkpoint_vclock)) { - vclock_copy(&gc.checkpoint_vclock, &gc_checkpoint_vclock); - rc = engine_collect_garbage(vclock_sum(&gc_checkpoint_vclock)); - } - if (vclock_sum(&gc_wal_vclock) > vclock_sum(&gc.wal_vclock)) { - vclock_copy(&gc.wal_vclock, &gc_wal_vclock); - if (rc == 0) - wal_collect_garbage(vclock_sum(&gc_wal_vclock)); - } - + if (run_engine_gc) + rc = engine_collect_garbage(vclock_sum(&checkpoint->vclock)); + if (run_wal_gc && rc == 0) + wal_collect_garbage(vclock_sum(vclock)); latch_unlock(&gc.latch); } diff --git a/src/box/gc.h b/src/box/gc.h index e26b1017..f6f2279e 100644 --- a/src/box/gc.h +++ b/src/box/gc.h @@ -87,6 +87,8 @@ typedef rb_tree(struct gc_consumer) gc_tree_t; /** Garbage collection state. */ struct gc_state { + /** VClock of the oldest WAL row available on the instance. */ + struct vclock vclock; /** * Minimal number of checkpoints to preserve. * Configured by box.cfg.checkpoint_count. @@ -103,10 +105,6 @@ struct gc_state { * to the tail. Linked by gc_checkpoint::in_checkpoints. */ struct rlist checkpoints; - /** Max vclock WAL garbage collection has been called for. */ - struct vclock wal_vclock; - /** Max vclock checkpoint garbage collection has been called for. */ - struct vclock checkpoint_vclock; /** Registered consumers, linked by gc_consumer::node. */ gc_tree_t consumers; /** -- 2.11.0
next prev parent reply other threads:[~2018-10-04 17:20 UTC|newest] Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-10-04 17:20 [PATCH 00/13] box: garbage collection refactoring and fixes Vladimir Davydov 2018-10-04 17:20 ` [PATCH 01/13] vinyl: fix master crash on replica join failure Vladimir Davydov 2018-10-04 21:43 ` Konstantin Osipov 2018-10-04 17:20 ` [PATCH 02/13] vinyl: force deletion of runs left from unfinished indexes on restart Vladimir Davydov 2018-10-04 21:44 ` Konstantin Osipov 2018-10-04 17:20 ` [PATCH 03/13] gc: make gc_consumer and gc_state structs transparent Vladimir Davydov 2018-10-04 21:47 ` Konstantin Osipov 2018-10-04 17:20 ` [PATCH 04/13] gc: use fixed length buffer for storing consumer name Vladimir Davydov 2018-10-04 21:47 ` Konstantin Osipov 2018-10-04 17:20 ` [PATCH 05/13] gc: fold gc_consumer_new and gc_consumer_delete Vladimir Davydov 2018-10-04 21:50 ` Konstantin Osipov 2018-10-05 8:56 ` Vladimir Davydov 2018-10-04 17:20 ` [PATCH 06/13] gc: format consumer name in gc_consumer_register Vladimir Davydov 2018-10-04 21:50 ` Konstantin Osipov 2018-10-04 17:20 ` [PATCH 07/13] gc: rename checkpoint_count to min_checkpoint_count Vladimir Davydov 2018-10-04 21:51 ` Konstantin Osipov 2018-10-04 17:20 ` [PATCH 08/13] gc: keep track of available checkpoints Vladimir Davydov 2018-10-04 21:59 ` Konstantin Osipov 2018-10-05 8:50 ` Vladimir Davydov 2018-10-04 17:20 ` Vladimir Davydov [this message] 2018-10-04 22:00 ` [PATCH 09/13] gc: cleanup garbage collection procedure Konstantin Osipov 2018-10-04 17:20 ` [PATCH 10/13] gc: improve box.info.gc output Vladimir Davydov 2018-10-04 22:01 ` Konstantin Osipov 2018-10-04 17:20 ` [PATCH 11/13] gc: separate checkpoint references from wal consumers Vladimir Davydov 2018-10-04 22:05 ` Konstantin Osipov 2018-10-04 17:20 ` [PATCH 12/13] gc: call gc_run unconditionally when consumer is advanced Vladimir Davydov 2018-10-04 22:26 ` Konstantin Osipov 2018-10-04 17:20 ` [PATCH 13/13] replication: ref checkpoint needed to join replica Vladimir Davydov 2018-10-04 22:27 ` Konstantin Osipov 2018-10-05 17:03 ` [PATCH 00/13] box: garbage collection refactoring and fixes Vladimir Davydov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=554ab508361da2d4015c56712f7406182019f89a.1538671546.git.vdavydov.dev@gmail.com \ --to=vdavydov.dev@gmail.com \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='Re: [PATCH 09/13] gc: cleanup garbage collection procedure' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox