Tarantool development patches archive
 help / color / mirror / Atom feed
* [PATCH v2] replication: remove old snapshot files not needed by replicas
@ 2018-06-28 12:09 Serge Petrenko
  2018-06-28 13:00 ` Vladimir Davydov
  0 siblings, 1 reply; 4+ messages in thread
From: Serge Petrenko @ 2018-06-28 12:09 UTC (permalink / raw)
  To: vdavydov.dev; +Cc: tarantool-patches, Serge Petrenko

Garbage collection doesn't distinguish consumers which need checkpoint
files, such as backup, and the ones, who only need WALS, such as
replicas. A disconnected replica will 'hold' all checkpoint files, created
after it got unsynchronised, even though it doesn't need them, which may
lead to disk space shortage. To fix this, we store consumer's type, and
treat consumers differently during garbage collection: now only the old
WALS are stored for replicas, and old checkpoints are stored for backup,
if any. Also changed the tests to check updated garbage collection correctly.

Closes #3444
---
https://github.com/tarantool/tarantool/tree/sergepetrenko/gh-3444-remove-old-shapshots-for-replicas
https://github.com/tarantool/tarantool/issues/3444

Changes in v2:
- prefixed variable names with prefix 'checkpoint_'
  instead of 'snap_', so that there is no
  confusion with memtx snapshots
- same with changing variable name
  xlog_only to wal_only
- rewrote gc_run so that there is only a single
  loop over checkpoints, and also one excess old
  WAL is removed (it was unneeded, but kept due
  to a mistake). Now wal_collect_garbage or
  engine_collect_garbage are called only if they
  have work to do.
- fix tests to correctly check the amount of xlogs
  kept by garbage collection

 src/box/gc.c                 | 90 +++++++++++++++++++-------------------------
 src/box/gc.h                 | 10 ++---
 test/replication/gc.result   | 26 ++++++++++++-
 test/replication/gc.test.lua | 14 ++++++-
 4 files changed, 80 insertions(+), 60 deletions(-)

diff --git a/src/box/gc.c b/src/box/gc.c
index 288cc7236..06d2fbd35 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -61,8 +61,8 @@ struct gc_consumer {
 	char *name;
 	/** The vclock signature tracked by this consumer. */
 	int64_t signature;
-	/** The flag indicating that consumer only consumes xlog files. */
-	bool xlog_only;
+	/** The flag indicating that consumer only consumes WAL files. */
+	bool wal_only;
 };
 
 typedef rb_tree(struct gc_consumer) gc_tree_t;
@@ -72,9 +72,9 @@ struct gc_state {
 	/** Number of checkpoints to maintain. */
 	int checkpoint_count;
 	/** Max signature WAL garbage collection has been called for. */
-	int64_t xlog_signature;
-	/** Max signature snapshot garbage collection has been called for. */
-	int64_t snap_signature;
+	int64_t wal_signature;
+	/** Max signature checkpoint garbage collection has been called for. */
+	int64_t checkpoint_signature;
 	/** Registered consumers, linked by gc_consumer::node. */
 	gc_tree_t consumers;
 	/**
@@ -108,7 +108,7 @@ rb_gen(MAYBE_UNUSED static inline, gc_tree_, gc_tree_t,
 
 /** Allocate a consumer object. */
 static struct gc_consumer *
-gc_consumer_new(const char *name, int64_t signature, bool xlog_only)
+gc_consumer_new(const char *name, int64_t signature, bool wal_only)
 {
 	struct gc_consumer *consumer = calloc(1, sizeof(*consumer));
 	if (consumer == NULL) {
@@ -124,7 +124,7 @@ gc_consumer_new(const char *name, int64_t signature, bool xlog_only)
 		return NULL;
 	}
 	consumer->signature = signature;
-	consumer->xlog_only = xlog_only;
+	consumer->wal_only = wal_only;
 	return consumer;
 }
 
@@ -140,8 +140,8 @@ gc_consumer_delete(struct gc_consumer *consumer)
 void
 gc_init(void)
 {
-	gc.xlog_signature = -1;
-	gc.snap_signature = -1;
+	gc.wal_signature = -1;
+	gc.checkpoint_signature = -1;
 	gc_tree_new(&gc.consumers);
 	latch_create(&gc.latch);
 }
@@ -161,12 +161,12 @@ gc_free(void)
 	latch_destroy(&gc.latch);
 }
 
-/** Find the consumer that uses the oldest snapshot */
+/** Find the consumer that uses the oldest checkpoint */
 struct gc_consumer *
-gc_first_snap(gc_tree_t *consumers)
+gc_tree_first_checkpoint(gc_tree_t *consumers)
 {
 	struct gc_consumer *consumer = gc_tree_first(consumers);
-	while (consumer != NULL && consumer->xlog_only)
+	while (consumer != NULL && consumer->wal_only)
 		consumer = gc_tree_next(consumers, consumer);
 	return consumer;
 }
@@ -179,15 +179,16 @@ gc_run(void)
 
 	/* Look up the consumer that uses the oldest WAL */
 	struct gc_consumer *leftmost = gc_tree_first(&gc.consumers);
-	/* Look up the consumer that uses the oldest snapshot. */
-	struct gc_consumer *leftmost_snap = gc_first_snap(&gc.consumers);
+	/* Look up the consumer that uses the oldest checkpoint. */
+	struct gc_consumer *leftmost_checkpoint =
+	  gc_tree_first_checkpoint(&gc.consumers);
 
 	/*
 	 * Find the oldest checkpoint that must be preserved.
-	 * We have to maintain @checkpoint_count oldest snapshots,
-	 * plus we can't remove snapshots that are still in use.
+	 * We have to maintain @checkpoint_count oldest checkpoints,
+	 * plus we can't remove checkpoints that are still in use.
 	 */
-	int64_t gc_xlog_signature = -1;
+	int64_t gc_checkpoint_signature = -1;
 
 	struct checkpoint_iterator checkpoints;
 	checkpoint_iterator_init(&checkpoints);
@@ -196,37 +197,20 @@ gc_run(void)
 	while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) {
 		if (--checkpoint_count > 0)
 			continue;
-		if (leftmost != NULL &&
-		    leftmost->signature < vclock_sum(vclock))
+		if (leftmost_checkpoint != NULL &&
+		    leftmost_checkpoint->signature < vclock_sum(vclock))
 			continue;
-		gc_xlog_signature = vclock_sum(vclock);
+		gc_checkpoint_signature = vclock_sum(vclock);
 		break;
 	}
 
-	int64_t gc_snap_signature = -1;
-	checkpoint_count = gc.checkpoint_count;
+	int64_t gc_wal_signature = MIN(gc_checkpoint_signature, leftmost != NULL ?
+	  leftmost->signature : INT64_MAX);
 
-	checkpoint_iterator_init(&checkpoints);
-
-	while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) {
-		if (--checkpoint_count > 0)
-			continue;
-		if (leftmost_snap != NULL &&
-		    leftmost_snap->signature < vclock_sum(vclock))
-			continue;
-		gc_snap_signature = vclock_sum(vclock);
-		break;
-	}
-
-	if (gc_snap_signature <= gc.snap_signature &&
-	    gc_xlog_signature <= gc.xlog_signature)
+	if (gc_checkpoint_signature <= gc.checkpoint_signature &&
+	    gc_wal_signature <= gc.wal_signature)
 		return; /* nothing to do */
 
-	if (gc_snap_signature > gc.snap_signature)
-		gc.snap_signature = gc_snap_signature;
-	if (gc_xlog_signature > gc.xlog_signature)
-		gc.xlog_signature = gc_xlog_signature;
-
 	/*
 	 * Engine callbacks may sleep, because they use coio for
 	 * removing files. Make sure we won't try to remove the
@@ -234,6 +218,7 @@ gc_run(void)
 	 * executions.
 	 */
 	latch_lock(&gc.latch);
+
 	/*
 	 * Run garbage collection.
 	 *
@@ -241,8 +226,17 @@ gc_run(void)
 	 * collection for memtx snapshots first and abort if it
 	 * fails - see comment to memtx_engine_collect_garbage().
 	 */
-	if (engine_collect_garbage(gc_snap_signature) == 0)
-		wal_collect_garbage(gc_xlog_signature);
+	int rc = 0;
+
+	if (gc_checkpoint_signature > gc.checkpoint_signature) {
+		gc.checkpoint_signature = gc_checkpoint_signature;
+		rc = engine_collect_garbage(gc_checkpoint_signature);
+	}
+	if (gc_wal_signature > gc.wal_signature) {
+		gc.wal_signature = gc_wal_signature;
+		if (rc == 0)
+			wal_collect_garbage(gc_wal_signature);
+	}
 
 	latch_unlock(&gc.latch);
 }
@@ -254,9 +248,9 @@ gc_set_checkpoint_count(int checkpoint_count)
 }
 
 struct gc_consumer *
-gc_consumer_register(const char *name, int64_t signature, bool xlog_only)
+gc_consumer_register(const char *name, int64_t signature, bool wal_only)
 {
-	struct gc_consumer *consumer = gc_consumer_new(name, signature, xlog_only);
+	struct gc_consumer *consumer = gc_consumer_new(name, signature, wal_only);
 	if (consumer != NULL)
 		gc_tree_insert(&gc.consumers, consumer);
 	return consumer;
@@ -324,12 +318,6 @@ gc_consumer_signature(const struct gc_consumer *consumer)
 	return consumer->signature;
 }
 
-bool
-gc_consumer_xlog_only(const struct gc_consumer *consumer)
-{
-	return consumer->xlog_only;
-}
-
 struct gc_consumer *
 gc_consumer_iterator_next(struct gc_consumer_iterator *it)
 {
diff --git a/src/box/gc.h b/src/box/gc.h
index c9a1d6558..36edd7740 100644
--- a/src/box/gc.h
+++ b/src/box/gc.h
@@ -75,14 +75,14 @@ gc_set_checkpoint_count(int checkpoint_count);
  * @signature until the consumer is unregistered or advanced.
  * @name is a human-readable name of the consumer, it will
  * be used for reporting the consumer to the user.
- * @xlog_only is a flag reporting whether consumer only consumes
- * xlog files.
+ * @wal_only is a flag reporting whether consumer only depends
+ * on WAL files.
  *
  * Returns a pointer to the new consumer object or NULL on
  * memory allocation failure.
  */
 struct gc_consumer *
-gc_consumer_register(const char *name, int64_t signature, bool xlog_only);
+gc_consumer_register(const char *name, int64_t signature, bool wal_only);
 
 /**
  * Unregister a consumer and invoke garbage collection
@@ -106,10 +106,6 @@ gc_consumer_name(const struct gc_consumer *consumer);
 int64_t
 gc_consumer_signature(const struct gc_consumer *consumer);
 
-/** Return whether consumer only consumes xlog files. */
-bool
-gc_consumer_xlog_only(const struct gc_consumer *consumer);
-
 /**
  * Iterator over registered consumers. The iterator is valid
  * as long as the caller doesn't yield.
diff --git a/test/replication/gc.result b/test/replication/gc.result
index adbe04ca2..084530e8a 100644
--- a/test/replication/gc.result
+++ b/test/replication/gc.result
@@ -129,8 +129,22 @@ box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
 - ok
 ...
 -- Send more data to the replica.
-for i = 1, 100 do s:auto_increment{} end
+-- Need to do 2 snapshots here, otherwise the replica would
+-- only require 1 xlog and that case would be
+-- undistingvishable from wrong operation.
+for i = 1, 50 do s:auto_increment{} end
+---
+...
+box.snapshot()
+---
+- ok
+...
+for i = 1, 50 do s:auto_increment{} end
+---
+...
+box.snapshot()
 ---
+- ok
 ...
 -- Invoke garbage collection. Check that it doesn't remove
 -- xlogs needed by the replica.
@@ -299,6 +313,16 @@ test_run:cmd("cleanup server replica")
 ...
 -- Invoke garbage collection. Check that it removes the old
 -- checkpoint, but keeps the xlog last used by the replica.
+-- once again, need 2 snapshots because after 1 snapshot
+-- with no insertions after it the replica would need only
+-- 1 xlog, which is stored anyways.
+_ = s:auto_increment{}
+---
+...
+box.snapshot()
+---
+- ok
+...
 _ = s:auto_increment{}
 ---
 ...
diff --git a/test/replication/gc.test.lua b/test/replication/gc.test.lua
index 2b9ab0cf0..710c99ea7 100644
--- a/test/replication/gc.test.lua
+++ b/test/replication/gc.test.lua
@@ -67,13 +67,20 @@ wait_gc(1)
 box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
 
 -- Send more data to the replica.
-for i = 1, 100 do s:auto_increment{} end
+-- Need to do 2 snapshots here, otherwise the replica would
+-- only require 1 xlog and that case would be
+-- undistingvishable from wrong operation.
+for i = 1, 50 do s:auto_increment{} end
+box.snapshot()
+for i = 1, 50 do s:auto_increment{} end
+box.snapshot()
 
 -- Invoke garbage collection. Check that it doesn't remove
 -- xlogs needed by the replica.
 box.snapshot()
 #box.info.gc().checkpoints == 1 or box.info.gc()
 #fio.glob('./master/*.xlog') == 2 or fio.listdir('./master')
+
 -- Remove the timeout injection so that the replica catches
 -- up quickly.
 box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0)
@@ -134,6 +141,11 @@ test_run:cmd("cleanup server replica")
 
 -- Invoke garbage collection. Check that it removes the old
 -- checkpoint, but keeps the xlog last used by the replica.
+-- once again, need 2 snapshots because after 1 snapshot
+-- with no insertions after it the replica would need only
+-- 1 xlog, which is stored anyways.
+_ = s:auto_increment{}
+box.snapshot()
 _ = s:auto_increment{}
 box.snapshot()
 #box.info.gc().checkpoints == 1 or box.info.gc()
-- 
2.15.2 (Apple Git-101.1)

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-06-28 15:18 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-28 12:09 [PATCH v2] replication: remove old snapshot files not needed by replicas Serge Petrenko
2018-06-28 13:00 ` Vladimir Davydov
2018-06-28 14:45   ` Re[2]: " Sergey Petrenko
2018-06-28 15:18     ` Vladimir Davydov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox