From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 09/13] gc: cleanup garbage collection procedure Date: Thu, 4 Oct 2018 20:20:11 +0300 Message-Id: <554ab508361da2d4015c56712f7406182019f89a.1538671546.git.vdavydov.dev@gmail.com> In-Reply-To: References: In-Reply-To: References: To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: Do some refactoring intended to make the code of gc_run() easier for understanding: - Remove gc_state::checkpoint_vclock. It was used to avoid rerunning engine gc callback in case no checkpoint was deleted. Since we maintain a list of all available checkpoints, we don't need it for this anymore - we can run gc only if a checkpoint was actually removed from the list. - Rename gc_state::wal_vclock back to gc_state::vclock. - Use bool variables with descriptive names instead of comparing vclock signatures. - Add some comments. --- src/box/box.cc | 2 +- src/box/gc.c | 52 +++++++++++++++++++++++++--------------------------- src/box/gc.h | 6 ++---- 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index a33b80ef..2b2d2307 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -1614,7 +1614,7 @@ box_process_vote(struct ballot *ballot) { ballot->is_ro = cfg_geti("read_only") != 0; vclock_copy(&ballot->vclock, &replicaset.vclock); - vclock_copy(&ballot->gc_vclock, &gc.wal_vclock); + vclock_copy(&ballot->gc_vclock, &gc.vclock); } /** Insert a new cluster into _schema */ diff --git a/src/box/gc.c b/src/box/gc.c index e6951f4c..eb07a3b8 100644 --- a/src/box/gc.c +++ b/src/box/gc.c @@ -80,9 +80,8 @@ gc_init(void) /* Don't delete any files until recovery is complete. */ gc.min_checkpoint_count = INT_MAX; + vclock_create(&gc.vclock); rlist_create(&gc.checkpoints); - vclock_create(&gc.wal_vclock); - vclock_create(&gc.checkpoint_vclock); gc_tree_new(&gc.consumers); latch_create(&gc.latch); } @@ -125,26 +124,23 @@ gc_tree_first_checkpoint(gc_tree_t *consumers) static void gc_run(void) { - /* Look up the consumer that uses the oldest WAL. */ - struct gc_consumer *leftmost = gc_tree_first(&gc.consumers); /* Look up the consumer that uses the oldest checkpoint. */ struct gc_consumer *leftmost_checkpoint = gc_tree_first_checkpoint(&gc.consumers); + bool run_wal_gc = false; + bool run_engine_gc = false; + /* * Find the oldest checkpoint that must be preserved. * We have to preserve @min_checkpoint_count oldest * checkpoints, plus we can't remove checkpoints that * are still in use. */ - struct vclock gc_checkpoint_vclock; - vclock_create(&gc_checkpoint_vclock); - struct gc_checkpoint *checkpoint = NULL; while (true) { checkpoint = rlist_first_entry(&gc.checkpoints, struct gc_checkpoint, in_checkpoints); - vclock_copy(&gc_checkpoint_vclock, &checkpoint->vclock); if (gc.checkpoint_count <= gc.min_checkpoint_count) break; if (leftmost_checkpoint != NULL && @@ -154,20 +150,29 @@ gc_run(void) rlist_del_entry(checkpoint, in_checkpoints); free(checkpoint); gc.checkpoint_count--; + run_engine_gc = true; } /* At least one checkpoint must always be available. */ assert(checkpoint != NULL); - struct vclock gc_wal_vclock; - if (leftmost != NULL && - vclock_sum(&leftmost->vclock) < vclock_sum(&gc_checkpoint_vclock)) - vclock_copy(&gc_wal_vclock, &leftmost->vclock); - else - vclock_copy(&gc_wal_vclock, &gc_checkpoint_vclock); + /* + * Find the vclock of the oldest WAL row to keep. + * Note, we must keep all WALs created after the + * oldest checkpoint, even if no consumer needs them. + */ + const struct vclock *vclock = (gc_tree_empty(&gc.consumers) ? NULL : + &gc_tree_first(&gc.consumers)->vclock); + if (vclock == NULL || + vclock_sum(vclock) > vclock_sum(&checkpoint->vclock)) + vclock = &checkpoint->vclock; + + if (vclock_sum(vclock) > vclock_sum(&gc.vclock)) { + vclock_copy(&gc.vclock, vclock); + run_wal_gc = true; + } - if (vclock_sum(&gc_wal_vclock) <= vclock_sum(&gc.wal_vclock) && - vclock_sum(&gc_checkpoint_vclock) <= vclock_sum(&gc.checkpoint_vclock)) + if (!run_engine_gc && !run_wal_gc) return; /* nothing to do */ /* @@ -185,17 +190,10 @@ gc_run(void) * fails - see comment to memtx_engine_collect_garbage(). */ int rc = 0; - - if (vclock_sum(&gc_checkpoint_vclock) > vclock_sum(&gc.checkpoint_vclock)) { - vclock_copy(&gc.checkpoint_vclock, &gc_checkpoint_vclock); - rc = engine_collect_garbage(vclock_sum(&gc_checkpoint_vclock)); - } - if (vclock_sum(&gc_wal_vclock) > vclock_sum(&gc.wal_vclock)) { - vclock_copy(&gc.wal_vclock, &gc_wal_vclock); - if (rc == 0) - wal_collect_garbage(vclock_sum(&gc_wal_vclock)); - } - + if (run_engine_gc) + rc = engine_collect_garbage(vclock_sum(&checkpoint->vclock)); + if (run_wal_gc && rc == 0) + wal_collect_garbage(vclock_sum(vclock)); latch_unlock(&gc.latch); } diff --git a/src/box/gc.h b/src/box/gc.h index e26b1017..f6f2279e 100644 --- a/src/box/gc.h +++ b/src/box/gc.h @@ -87,6 +87,8 @@ typedef rb_tree(struct gc_consumer) gc_tree_t; /** Garbage collection state. */ struct gc_state { + /** VClock of the oldest WAL row available on the instance. */ + struct vclock vclock; /** * Minimal number of checkpoints to preserve. * Configured by box.cfg.checkpoint_count. @@ -103,10 +105,6 @@ struct gc_state { * to the tail. Linked by gc_checkpoint::in_checkpoints. */ struct rlist checkpoints; - /** Max vclock WAL garbage collection has been called for. */ - struct vclock wal_vclock; - /** Max vclock checkpoint garbage collection has been called for. */ - struct vclock checkpoint_vclock; /** Registered consumers, linked by gc_consumer::node. */ gc_tree_t consumers; /** -- 2.11.0