From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 09/13] gc: cleanup garbage collection procedure
Date: Thu,  4 Oct 2018 20:20:11 +0300	[thread overview]
Message-ID: <554ab508361da2d4015c56712f7406182019f89a.1538671546.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1538671546.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1538671546.git.vdavydov.dev@gmail.com>
Do some refactoring intended to make the code of gc_run() easier for
understanding:
 - Remove gc_state::checkpoint_vclock. It was used to avoid rerunning
   engine gc callback in case no checkpoint was deleted. Since we
   maintain a list of all available checkpoints, we don't need it for
   this anymore - we can run gc only if a checkpoint was actually
   removed from the list.
 - Rename gc_state::wal_vclock back to gc_state::vclock.
 - Use bool variables with descriptive names instead of comparing
   vclock signatures.
 - Add some comments.
---
 src/box/box.cc |  2 +-
 src/box/gc.c   | 52 +++++++++++++++++++++++++---------------------------
 src/box/gc.h   |  6 ++----
 3 files changed, 28 insertions(+), 32 deletions(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index a33b80ef..2b2d2307 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1614,7 +1614,7 @@ box_process_vote(struct ballot *ballot)
 {
 	ballot->is_ro = cfg_geti("read_only") != 0;
 	vclock_copy(&ballot->vclock, &replicaset.vclock);
-	vclock_copy(&ballot->gc_vclock, &gc.wal_vclock);
+	vclock_copy(&ballot->gc_vclock, &gc.vclock);
 }
 
 /** Insert a new cluster into _schema */
diff --git a/src/box/gc.c b/src/box/gc.c
index e6951f4c..eb07a3b8 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -80,9 +80,8 @@ gc_init(void)
 	/* Don't delete any files until recovery is complete. */
 	gc.min_checkpoint_count = INT_MAX;
 
+	vclock_create(&gc.vclock);
 	rlist_create(&gc.checkpoints);
-	vclock_create(&gc.wal_vclock);
-	vclock_create(&gc.checkpoint_vclock);
 	gc_tree_new(&gc.consumers);
 	latch_create(&gc.latch);
 }
@@ -125,26 +124,23 @@ gc_tree_first_checkpoint(gc_tree_t *consumers)
 static void
 gc_run(void)
 {
-	/* Look up the consumer that uses the oldest WAL. */
-	struct gc_consumer *leftmost = gc_tree_first(&gc.consumers);
 	/* Look up the consumer that uses the oldest checkpoint. */
 	struct gc_consumer *leftmost_checkpoint =
 		gc_tree_first_checkpoint(&gc.consumers);
 
+	bool run_wal_gc = false;
+	bool run_engine_gc = false;
+
 	/*
 	 * Find the oldest checkpoint that must be preserved.
 	 * We have to preserve @min_checkpoint_count oldest
 	 * checkpoints, plus we can't remove checkpoints that
 	 * are still in use.
 	 */
-	struct vclock gc_checkpoint_vclock;
-	vclock_create(&gc_checkpoint_vclock);
-
 	struct gc_checkpoint *checkpoint = NULL;
 	while (true) {
 		checkpoint = rlist_first_entry(&gc.checkpoints,
 				struct gc_checkpoint, in_checkpoints);
-		vclock_copy(&gc_checkpoint_vclock, &checkpoint->vclock);
 		if (gc.checkpoint_count <= gc.min_checkpoint_count)
 			break;
 		if (leftmost_checkpoint != NULL &&
@@ -154,20 +150,29 @@ gc_run(void)
 		rlist_del_entry(checkpoint, in_checkpoints);
 		free(checkpoint);
 		gc.checkpoint_count--;
+		run_engine_gc = true;
 	}
 
 	/* At least one checkpoint must always be available. */
 	assert(checkpoint != NULL);
 
-	struct vclock gc_wal_vclock;
-	if (leftmost != NULL &&
-	    vclock_sum(&leftmost->vclock) < vclock_sum(&gc_checkpoint_vclock))
-		vclock_copy(&gc_wal_vclock, &leftmost->vclock);
-	else
-		vclock_copy(&gc_wal_vclock, &gc_checkpoint_vclock);
+	/*
+	 * Find the vclock of the oldest WAL row to keep.
+	 * Note, we must keep all WALs created after the
+	 * oldest checkpoint, even if no consumer needs them.
+	 */
+	const struct vclock *vclock = (gc_tree_empty(&gc.consumers) ? NULL :
+				       &gc_tree_first(&gc.consumers)->vclock);
+	if (vclock == NULL ||
+	    vclock_sum(vclock) > vclock_sum(&checkpoint->vclock))
+		vclock = &checkpoint->vclock;
+
+	if (vclock_sum(vclock) > vclock_sum(&gc.vclock)) {
+		vclock_copy(&gc.vclock, vclock);
+		run_wal_gc = true;
+	}
 
-	if (vclock_sum(&gc_wal_vclock) <= vclock_sum(&gc.wal_vclock) &&
-	    vclock_sum(&gc_checkpoint_vclock) <= vclock_sum(&gc.checkpoint_vclock))
+	if (!run_engine_gc && !run_wal_gc)
 		return; /* nothing to do */
 
 	/*
@@ -185,17 +190,10 @@ gc_run(void)
 	 * fails - see comment to memtx_engine_collect_garbage().
 	 */
 	int rc = 0;
-
-	if (vclock_sum(&gc_checkpoint_vclock) > vclock_sum(&gc.checkpoint_vclock)) {
-		vclock_copy(&gc.checkpoint_vclock, &gc_checkpoint_vclock);
-		rc = engine_collect_garbage(vclock_sum(&gc_checkpoint_vclock));
-	}
-	if (vclock_sum(&gc_wal_vclock) > vclock_sum(&gc.wal_vclock)) {
-		vclock_copy(&gc.wal_vclock, &gc_wal_vclock);
-		if (rc == 0)
-			wal_collect_garbage(vclock_sum(&gc_wal_vclock));
-	}
-
+	if (run_engine_gc)
+		rc = engine_collect_garbage(vclock_sum(&checkpoint->vclock));
+	if (run_wal_gc && rc == 0)
+		wal_collect_garbage(vclock_sum(vclock));
 	latch_unlock(&gc.latch);
 }
 
diff --git a/src/box/gc.h b/src/box/gc.h
index e26b1017..f6f2279e 100644
--- a/src/box/gc.h
+++ b/src/box/gc.h
@@ -87,6 +87,8 @@ typedef rb_tree(struct gc_consumer) gc_tree_t;
 
 /** Garbage collection state. */
 struct gc_state {
+	/** VClock of the oldest WAL row available on the instance. */
+	struct vclock vclock;
 	/**
 	 * Minimal number of checkpoints to preserve.
 	 * Configured by box.cfg.checkpoint_count.
@@ -103,10 +105,6 @@ struct gc_state {
 	 * to the tail. Linked by gc_checkpoint::in_checkpoints.
 	 */
 	struct rlist checkpoints;
-	/** Max vclock WAL garbage collection has been called for. */
-	struct vclock wal_vclock;
-	/** Max vclock checkpoint garbage collection has been called for. */
-	struct vclock checkpoint_vclock;
 	/** Registered consumers, linked by gc_consumer::node. */
 	gc_tree_t consumers;
 	/**
-- 
2.11.0
next prev parent reply	other threads:[~2018-10-04 17:20 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-04 17:20 [PATCH 00/13] box: garbage collection refactoring and fixes Vladimir Davydov
2018-10-04 17:20 ` [PATCH 01/13] vinyl: fix master crash on replica join failure Vladimir Davydov
2018-10-04 21:43   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 02/13] vinyl: force deletion of runs left from unfinished indexes on restart Vladimir Davydov
2018-10-04 21:44   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 03/13] gc: make gc_consumer and gc_state structs transparent Vladimir Davydov
2018-10-04 21:47   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 04/13] gc: use fixed length buffer for storing consumer name Vladimir Davydov
2018-10-04 21:47   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 05/13] gc: fold gc_consumer_new and gc_consumer_delete Vladimir Davydov
2018-10-04 21:50   ` Konstantin Osipov
2018-10-05  8:56     ` Vladimir Davydov
2018-10-04 17:20 ` [PATCH 06/13] gc: format consumer name in gc_consumer_register Vladimir Davydov
2018-10-04 21:50   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 07/13] gc: rename checkpoint_count to min_checkpoint_count Vladimir Davydov
2018-10-04 21:51   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 08/13] gc: keep track of available checkpoints Vladimir Davydov
2018-10-04 21:59   ` Konstantin Osipov
2018-10-05  8:50     ` Vladimir Davydov
2018-10-04 17:20 ` Vladimir Davydov [this message]
2018-10-04 22:00   ` [PATCH 09/13] gc: cleanup garbage collection procedure Konstantin Osipov
2018-10-04 17:20 ` [PATCH 10/13] gc: improve box.info.gc output Vladimir Davydov
2018-10-04 22:01   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 11/13] gc: separate checkpoint references from wal consumers Vladimir Davydov
2018-10-04 22:05   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 12/13] gc: call gc_run unconditionally when consumer is advanced Vladimir Davydov
2018-10-04 22:26   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 13/13] replication: ref checkpoint needed to join replica Vladimir Davydov
2018-10-04 22:27   ` Konstantin Osipov
2018-10-05 17:03 ` [PATCH 00/13] box: garbage collection refactoring and fixes Vladimir Davydov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox
  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):
  git send-email \
    --in-reply-to=554ab508361da2d4015c56712f7406182019f89a.1538671546.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 09/13] gc: cleanup garbage collection procedure' \
    /path/to/YOUR_REPLY
  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox