[PATCH 09/13] gc: cleanup garbage collection procedure
Vladimir Davydov
vdavydov.dev at gmail.com
Thu Oct 4 20:20:11 MSK 2018
Do some refactoring intended to make the code of gc_run() easier for
understanding:
- Remove gc_state::checkpoint_vclock. It was used to avoid rerunning
engine gc callback in case no checkpoint was deleted. Since we
maintain a list of all available checkpoints, we don't need it for
this anymore - we can run gc only if a checkpoint was actually
removed from the list.
- Rename gc_state::wal_vclock back to gc_state::vclock.
- Use bool variables with descriptive names instead of comparing
vclock signatures.
- Add some comments.
---
src/box/box.cc | 2 +-
src/box/gc.c | 52 +++++++++++++++++++++++++---------------------------
src/box/gc.h | 6 ++----
3 files changed, 28 insertions(+), 32 deletions(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index a33b80ef..2b2d2307 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1614,7 +1614,7 @@ box_process_vote(struct ballot *ballot)
{
ballot->is_ro = cfg_geti("read_only") != 0;
vclock_copy(&ballot->vclock, &replicaset.vclock);
- vclock_copy(&ballot->gc_vclock, &gc.wal_vclock);
+ vclock_copy(&ballot->gc_vclock, &gc.vclock);
}
/** Insert a new cluster into _schema */
diff --git a/src/box/gc.c b/src/box/gc.c
index e6951f4c..eb07a3b8 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -80,9 +80,8 @@ gc_init(void)
/* Don't delete any files until recovery is complete. */
gc.min_checkpoint_count = INT_MAX;
+ vclock_create(&gc.vclock);
rlist_create(&gc.checkpoints);
- vclock_create(&gc.wal_vclock);
- vclock_create(&gc.checkpoint_vclock);
gc_tree_new(&gc.consumers);
latch_create(&gc.latch);
}
@@ -125,26 +124,23 @@ gc_tree_first_checkpoint(gc_tree_t *consumers)
static void
gc_run(void)
{
- /* Look up the consumer that uses the oldest WAL. */
- struct gc_consumer *leftmost = gc_tree_first(&gc.consumers);
/* Look up the consumer that uses the oldest checkpoint. */
struct gc_consumer *leftmost_checkpoint =
gc_tree_first_checkpoint(&gc.consumers);
+ bool run_wal_gc = false;
+ bool run_engine_gc = false;
+
/*
* Find the oldest checkpoint that must be preserved.
* We have to preserve @min_checkpoint_count oldest
* checkpoints, plus we can't remove checkpoints that
* are still in use.
*/
- struct vclock gc_checkpoint_vclock;
- vclock_create(&gc_checkpoint_vclock);
-
struct gc_checkpoint *checkpoint = NULL;
while (true) {
checkpoint = rlist_first_entry(&gc.checkpoints,
struct gc_checkpoint, in_checkpoints);
- vclock_copy(&gc_checkpoint_vclock, &checkpoint->vclock);
if (gc.checkpoint_count <= gc.min_checkpoint_count)
break;
if (leftmost_checkpoint != NULL &&
@@ -154,20 +150,29 @@ gc_run(void)
rlist_del_entry(checkpoint, in_checkpoints);
free(checkpoint);
gc.checkpoint_count--;
+ run_engine_gc = true;
}
/* At least one checkpoint must always be available. */
assert(checkpoint != NULL);
- struct vclock gc_wal_vclock;
- if (leftmost != NULL &&
- vclock_sum(&leftmost->vclock) < vclock_sum(&gc_checkpoint_vclock))
- vclock_copy(&gc_wal_vclock, &leftmost->vclock);
- else
- vclock_copy(&gc_wal_vclock, &gc_checkpoint_vclock);
+ /*
+ * Find the vclock of the oldest WAL row to keep.
+ * Note, we must keep all WALs created after the
+ * oldest checkpoint, even if no consumer needs them.
+ */
+ const struct vclock *vclock = (gc_tree_empty(&gc.consumers) ? NULL :
+ &gc_tree_first(&gc.consumers)->vclock);
+ if (vclock == NULL ||
+ vclock_sum(vclock) > vclock_sum(&checkpoint->vclock))
+ vclock = &checkpoint->vclock;
+
+ if (vclock_sum(vclock) > vclock_sum(&gc.vclock)) {
+ vclock_copy(&gc.vclock, vclock);
+ run_wal_gc = true;
+ }
- if (vclock_sum(&gc_wal_vclock) <= vclock_sum(&gc.wal_vclock) &&
- vclock_sum(&gc_checkpoint_vclock) <= vclock_sum(&gc.checkpoint_vclock))
+ if (!run_engine_gc && !run_wal_gc)
return; /* nothing to do */
/*
@@ -185,17 +190,10 @@ gc_run(void)
* fails - see comment to memtx_engine_collect_garbage().
*/
int rc = 0;
-
- if (vclock_sum(&gc_checkpoint_vclock) > vclock_sum(&gc.checkpoint_vclock)) {
- vclock_copy(&gc.checkpoint_vclock, &gc_checkpoint_vclock);
- rc = engine_collect_garbage(vclock_sum(&gc_checkpoint_vclock));
- }
- if (vclock_sum(&gc_wal_vclock) > vclock_sum(&gc.wal_vclock)) {
- vclock_copy(&gc.wal_vclock, &gc_wal_vclock);
- if (rc == 0)
- wal_collect_garbage(vclock_sum(&gc_wal_vclock));
- }
-
+ if (run_engine_gc)
+ rc = engine_collect_garbage(vclock_sum(&checkpoint->vclock));
+ if (run_wal_gc && rc == 0)
+ wal_collect_garbage(vclock_sum(vclock));
latch_unlock(&gc.latch);
}
diff --git a/src/box/gc.h b/src/box/gc.h
index e26b1017..f6f2279e 100644
--- a/src/box/gc.h
+++ b/src/box/gc.h
@@ -87,6 +87,8 @@ typedef rb_tree(struct gc_consumer) gc_tree_t;
/** Garbage collection state. */
struct gc_state {
+ /** VClock of the oldest WAL row available on the instance. */
+ struct vclock vclock;
/**
* Minimal number of checkpoints to preserve.
* Configured by box.cfg.checkpoint_count.
@@ -103,10 +105,6 @@ struct gc_state {
* to the tail. Linked by gc_checkpoint::in_checkpoints.
*/
struct rlist checkpoints;
- /** Max vclock WAL garbage collection has been called for. */
- struct vclock wal_vclock;
- /** Max vclock checkpoint garbage collection has been called for. */
- struct vclock checkpoint_vclock;
/** Registered consumers, linked by gc_consumer::node. */
gc_tree_t consumers;
/**
--
2.11.0
More information about the Tarantool-patches
mailing list