[PATCH 09/13] gc: cleanup garbage collection procedure

Vladimir Davydov vdavydov.dev at gmail.com
Thu Oct 4 20:20:11 MSK 2018


Do some refactoring intended to make the code of gc_run() easier for
understanding:
 - Remove gc_state::checkpoint_vclock. It was used to avoid rerunning
   engine gc callback in case no checkpoint was deleted. Since we
   maintain a list of all available checkpoints, we don't need it for
   this anymore - we can run gc only if a checkpoint was actually
   removed from the list.
 - Rename gc_state::wal_vclock back to gc_state::vclock.
 - Use bool variables with descriptive names instead of comparing
   vclock signatures.
 - Add some comments.
---
 src/box/box.cc |  2 +-
 src/box/gc.c   | 52 +++++++++++++++++++++++++---------------------------
 src/box/gc.h   |  6 ++----
 3 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index a33b80ef..2b2d2307 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1614,7 +1614,7 @@ box_process_vote(struct ballot *ballot)
 {
 	ballot->is_ro = cfg_geti("read_only") != 0;
 	vclock_copy(&ballot->vclock, &replicaset.vclock);
-	vclock_copy(&ballot->gc_vclock, &gc.wal_vclock);
+	vclock_copy(&ballot->gc_vclock, &gc.vclock);
 }
 
 /** Insert a new cluster into _schema */
diff --git a/src/box/gc.c b/src/box/gc.c
index e6951f4c..eb07a3b8 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -80,9 +80,8 @@ gc_init(void)
 	/* Don't delete any files until recovery is complete. */
 	gc.min_checkpoint_count = INT_MAX;
 
+	vclock_create(&gc.vclock);
 	rlist_create(&gc.checkpoints);
-	vclock_create(&gc.wal_vclock);
-	vclock_create(&gc.checkpoint_vclock);
 	gc_tree_new(&gc.consumers);
 	latch_create(&gc.latch);
 }
@@ -125,26 +124,23 @@ gc_tree_first_checkpoint(gc_tree_t *consumers)
 static void
 gc_run(void)
 {
-	/* Look up the consumer that uses the oldest WAL. */
-	struct gc_consumer *leftmost = gc_tree_first(&gc.consumers);
 	/* Look up the consumer that uses the oldest checkpoint. */
 	struct gc_consumer *leftmost_checkpoint =
 		gc_tree_first_checkpoint(&gc.consumers);
 
+	bool run_wal_gc = false;
+	bool run_engine_gc = false;
+
 	/*
 	 * Find the oldest checkpoint that must be preserved.
 	 * We have to preserve @min_checkpoint_count oldest
 	 * checkpoints, plus we can't remove checkpoints that
 	 * are still in use.
 	 */
-	struct vclock gc_checkpoint_vclock;
-	vclock_create(&gc_checkpoint_vclock);
-
 	struct gc_checkpoint *checkpoint = NULL;
 	while (true) {
 		checkpoint = rlist_first_entry(&gc.checkpoints,
 				struct gc_checkpoint, in_checkpoints);
-		vclock_copy(&gc_checkpoint_vclock, &checkpoint->vclock);
 		if (gc.checkpoint_count <= gc.min_checkpoint_count)
 			break;
 		if (leftmost_checkpoint != NULL &&
@@ -154,20 +150,29 @@ gc_run(void)
 		rlist_del_entry(checkpoint, in_checkpoints);
 		free(checkpoint);
 		gc.checkpoint_count--;
+		run_engine_gc = true;
 	}
 
 	/* At least one checkpoint must always be available. */
 	assert(checkpoint != NULL);
 
-	struct vclock gc_wal_vclock;
-	if (leftmost != NULL &&
-	    vclock_sum(&leftmost->vclock) < vclock_sum(&gc_checkpoint_vclock))
-		vclock_copy(&gc_wal_vclock, &leftmost->vclock);
-	else
-		vclock_copy(&gc_wal_vclock, &gc_checkpoint_vclock);
+	/*
+	 * Find the vclock of the oldest WAL row to keep.
+	 * Note, we must keep all WALs created after the
+	 * oldest checkpoint, even if no consumer needs them.
+	 */
+	const struct vclock *vclock = (gc_tree_empty(&gc.consumers) ? NULL :
+				       &gc_tree_first(&gc.consumers)->vclock);
+	if (vclock == NULL ||
+	    vclock_sum(vclock) > vclock_sum(&checkpoint->vclock))
+		vclock = &checkpoint->vclock;
+
+	if (vclock_sum(vclock) > vclock_sum(&gc.vclock)) {
+		vclock_copy(&gc.vclock, vclock);
+		run_wal_gc = true;
+	}
 
-	if (vclock_sum(&gc_wal_vclock) <= vclock_sum(&gc.wal_vclock) &&
-	    vclock_sum(&gc_checkpoint_vclock) <= vclock_sum(&gc.checkpoint_vclock))
+	if (!run_engine_gc && !run_wal_gc)
 		return; /* nothing to do */
 
 	/*
@@ -185,17 +190,10 @@ gc_run(void)
 	 * fails - see comment to memtx_engine_collect_garbage().
 	 */
 	int rc = 0;
-
-	if (vclock_sum(&gc_checkpoint_vclock) > vclock_sum(&gc.checkpoint_vclock)) {
-		vclock_copy(&gc.checkpoint_vclock, &gc_checkpoint_vclock);
-		rc = engine_collect_garbage(vclock_sum(&gc_checkpoint_vclock));
-	}
-	if (vclock_sum(&gc_wal_vclock) > vclock_sum(&gc.wal_vclock)) {
-		vclock_copy(&gc.wal_vclock, &gc_wal_vclock);
-		if (rc == 0)
-			wal_collect_garbage(vclock_sum(&gc_wal_vclock));
-	}
-
+	if (run_engine_gc)
+		rc = engine_collect_garbage(vclock_sum(&checkpoint->vclock));
+	if (run_wal_gc && rc == 0)
+		wal_collect_garbage(vclock_sum(vclock));
 	latch_unlock(&gc.latch);
 }
 
diff --git a/src/box/gc.h b/src/box/gc.h
index e26b1017..f6f2279e 100644
--- a/src/box/gc.h
+++ b/src/box/gc.h
@@ -87,6 +87,8 @@ typedef rb_tree(struct gc_consumer) gc_tree_t;
 
 /** Garbage collection state. */
 struct gc_state {
+	/** VClock of the oldest WAL row available on the instance. */
+	struct vclock vclock;
 	/**
 	 * Minimal number of checkpoints to preserve.
 	 * Configured by box.cfg.checkpoint_count.
@@ -103,10 +105,6 @@ struct gc_state {
 	 * to the tail. Linked by gc_checkpoint::in_checkpoints.
 	 */
 	struct rlist checkpoints;
-	/** Max vclock WAL garbage collection has been called for. */
-	struct vclock wal_vclock;
-	/** Max vclock checkpoint garbage collection has been called for. */
-	struct vclock checkpoint_vclock;
 	/** Registered consumers, linked by gc_consumer::node. */
 	gc_tree_t consumers;
 	/**
-- 
2.11.0




More information about the Tarantool-patches mailing list