From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Serge Petrenko Subject: [PATCH v2] replication: remove old snapshot files not needed by replicas Date: Thu, 28 Jun 2018 15:09:08 +0300 Message-Id: <20180628120908.78984-1-sergepetrenko@tarantool.org> To: vdavydov.dev@gmail.com Cc: tarantool-patches@freelists.org, Serge Petrenko List-ID: Garbage collection doesn't distinguish consumers which need checkpoint files, such as backup, and the ones, who only need WALS, such as replicas. A disconnected replica will 'hold' all checkpoint files, created after it got unsynchronised, even though it doesn't need them, which may lead to disk space shortage. To fix this, we store consumer's type, and treat consumers differently during garbage collection: now only the old WALS are stored for replicas, and old checkpoints are stored for backup, if any. Also changed the tests to check updated garbage collection correctly. Closes #3444 --- https://github.com/tarantool/tarantool/tree/sergepetrenko/gh-3444-remove-old-shapshots-for-replicas https://github.com/tarantool/tarantool/issues/3444 Changes in v2: - prefixed variable names with prefix 'checkpoint_' instead of 'snap_', so that there is no confusion with memtx snapshots - same with changing variable name xlog_only to wal_only - rewrote gc_run so that there is only a single loop over checkpoints, and also one excess old WAL is removed (it was unneeded, but kept due to a mistake). Now wal_collect_garbage or engine_collect_garbage are called only if they have work to do. - fix tests to correctly check the amount of xlogs kept by garbage collection src/box/gc.c | 90 +++++++++++++++++++------------------------- src/box/gc.h | 10 ++--- test/replication/gc.result | 26 ++++++++++++- test/replication/gc.test.lua | 14 ++++++- 4 files changed, 80 insertions(+), 60 deletions(-) diff --git a/src/box/gc.c b/src/box/gc.c index 288cc7236..06d2fbd35 100644 --- a/src/box/gc.c +++ b/src/box/gc.c @@ -61,8 +61,8 @@ struct gc_consumer { char *name; /** The vclock signature tracked by this consumer. */ int64_t signature; - /** The flag indicating that consumer only consumes xlog files. */ - bool xlog_only; + /** The flag indicating that consumer only consumes WAL files. */ + bool wal_only; }; typedef rb_tree(struct gc_consumer) gc_tree_t; @@ -72,9 +72,9 @@ struct gc_state { /** Number of checkpoints to maintain. */ int checkpoint_count; /** Max signature WAL garbage collection has been called for. */ - int64_t xlog_signature; - /** Max signature snapshot garbage collection has been called for. */ - int64_t snap_signature; + int64_t wal_signature; + /** Max signature checkpoint garbage collection has been called for. */ + int64_t checkpoint_signature; /** Registered consumers, linked by gc_consumer::node. */ gc_tree_t consumers; /** @@ -108,7 +108,7 @@ rb_gen(MAYBE_UNUSED static inline, gc_tree_, gc_tree_t, /** Allocate a consumer object. */ static struct gc_consumer * -gc_consumer_new(const char *name, int64_t signature, bool xlog_only) +gc_consumer_new(const char *name, int64_t signature, bool wal_only) { struct gc_consumer *consumer = calloc(1, sizeof(*consumer)); if (consumer == NULL) { @@ -124,7 +124,7 @@ gc_consumer_new(const char *name, int64_t signature, bool xlog_only) return NULL; } consumer->signature = signature; - consumer->xlog_only = xlog_only; + consumer->wal_only = wal_only; return consumer; } @@ -140,8 +140,8 @@ gc_consumer_delete(struct gc_consumer *consumer) void gc_init(void) { - gc.xlog_signature = -1; - gc.snap_signature = -1; + gc.wal_signature = -1; + gc.checkpoint_signature = -1; gc_tree_new(&gc.consumers); latch_create(&gc.latch); } @@ -161,12 +161,12 @@ gc_free(void) latch_destroy(&gc.latch); } -/** Find the consumer that uses the oldest snapshot */ +/** Find the consumer that uses the oldest checkpoint */ struct gc_consumer * -gc_first_snap(gc_tree_t *consumers) +gc_tree_first_checkpoint(gc_tree_t *consumers) { struct gc_consumer *consumer = gc_tree_first(consumers); - while (consumer != NULL && consumer->xlog_only) + while (consumer != NULL && consumer->wal_only) consumer = gc_tree_next(consumers, consumer); return consumer; } @@ -179,15 +179,16 @@ gc_run(void) /* Look up the consumer that uses the oldest WAL */ struct gc_consumer *leftmost = gc_tree_first(&gc.consumers); - /* Look up the consumer that uses the oldest snapshot. */ - struct gc_consumer *leftmost_snap = gc_first_snap(&gc.consumers); + /* Look up the consumer that uses the oldest checkpoint. */ + struct gc_consumer *leftmost_checkpoint = + gc_tree_first_checkpoint(&gc.consumers); /* * Find the oldest checkpoint that must be preserved. - * We have to maintain @checkpoint_count oldest snapshots, - * plus we can't remove snapshots that are still in use. + * We have to maintain @checkpoint_count oldest checkpoints, + * plus we can't remove checkpoints that are still in use. */ - int64_t gc_xlog_signature = -1; + int64_t gc_checkpoint_signature = -1; struct checkpoint_iterator checkpoints; checkpoint_iterator_init(&checkpoints); @@ -196,37 +197,20 @@ gc_run(void) while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) { if (--checkpoint_count > 0) continue; - if (leftmost != NULL && - leftmost->signature < vclock_sum(vclock)) + if (leftmost_checkpoint != NULL && + leftmost_checkpoint->signature < vclock_sum(vclock)) continue; - gc_xlog_signature = vclock_sum(vclock); + gc_checkpoint_signature = vclock_sum(vclock); break; } - int64_t gc_snap_signature = -1; - checkpoint_count = gc.checkpoint_count; + int64_t gc_wal_signature = MIN(gc_checkpoint_signature, leftmost != NULL ? + leftmost->signature : INT64_MAX); - checkpoint_iterator_init(&checkpoints); - - while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) { - if (--checkpoint_count > 0) - continue; - if (leftmost_snap != NULL && - leftmost_snap->signature < vclock_sum(vclock)) - continue; - gc_snap_signature = vclock_sum(vclock); - break; - } - - if (gc_snap_signature <= gc.snap_signature && - gc_xlog_signature <= gc.xlog_signature) + if (gc_checkpoint_signature <= gc.checkpoint_signature && + gc_wal_signature <= gc.wal_signature) return; /* nothing to do */ - if (gc_snap_signature > gc.snap_signature) - gc.snap_signature = gc_snap_signature; - if (gc_xlog_signature > gc.xlog_signature) - gc.xlog_signature = gc_xlog_signature; - /* * Engine callbacks may sleep, because they use coio for * removing files. Make sure we won't try to remove the @@ -234,6 +218,7 @@ gc_run(void) * executions. */ latch_lock(&gc.latch); + /* * Run garbage collection. * @@ -241,8 +226,17 @@ gc_run(void) * collection for memtx snapshots first and abort if it * fails - see comment to memtx_engine_collect_garbage(). */ - if (engine_collect_garbage(gc_snap_signature) == 0) - wal_collect_garbage(gc_xlog_signature); + int rc = 0; + + if (gc_checkpoint_signature > gc.checkpoint_signature) { + gc.checkpoint_signature = gc_checkpoint_signature; + rc = engine_collect_garbage(gc_checkpoint_signature); + } + if (gc_wal_signature > gc.wal_signature) { + gc.wal_signature = gc_wal_signature; + if (rc == 0) + wal_collect_garbage(gc_wal_signature); + } latch_unlock(&gc.latch); } @@ -254,9 +248,9 @@ gc_set_checkpoint_count(int checkpoint_count) } struct gc_consumer * -gc_consumer_register(const char *name, int64_t signature, bool xlog_only) +gc_consumer_register(const char *name, int64_t signature, bool wal_only) { - struct gc_consumer *consumer = gc_consumer_new(name, signature, xlog_only); + struct gc_consumer *consumer = gc_consumer_new(name, signature, wal_only); if (consumer != NULL) gc_tree_insert(&gc.consumers, consumer); return consumer; @@ -324,12 +318,6 @@ gc_consumer_signature(const struct gc_consumer *consumer) return consumer->signature; } -bool -gc_consumer_xlog_only(const struct gc_consumer *consumer) -{ - return consumer->xlog_only; -} - struct gc_consumer * gc_consumer_iterator_next(struct gc_consumer_iterator *it) { diff --git a/src/box/gc.h b/src/box/gc.h index c9a1d6558..36edd7740 100644 --- a/src/box/gc.h +++ b/src/box/gc.h @@ -75,14 +75,14 @@ gc_set_checkpoint_count(int checkpoint_count); * @signature until the consumer is unregistered or advanced. * @name is a human-readable name of the consumer, it will * be used for reporting the consumer to the user. - * @xlog_only is a flag reporting whether consumer only consumes - * xlog files. + * @wal_only is a flag reporting whether consumer only depends + * on WAL files. * * Returns a pointer to the new consumer object or NULL on * memory allocation failure. */ struct gc_consumer * -gc_consumer_register(const char *name, int64_t signature, bool xlog_only); +gc_consumer_register(const char *name, int64_t signature, bool wal_only); /** * Unregister a consumer and invoke garbage collection @@ -106,10 +106,6 @@ gc_consumer_name(const struct gc_consumer *consumer); int64_t gc_consumer_signature(const struct gc_consumer *consumer); -/** Return whether consumer only consumes xlog files. */ -bool -gc_consumer_xlog_only(const struct gc_consumer *consumer); - /** * Iterator over registered consumers. The iterator is valid * as long as the caller doesn't yield. diff --git a/test/replication/gc.result b/test/replication/gc.result index adbe04ca2..084530e8a 100644 --- a/test/replication/gc.result +++ b/test/replication/gc.result @@ -129,8 +129,22 @@ box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05) - ok ... -- Send more data to the replica. -for i = 1, 100 do s:auto_increment{} end +-- Need to do 2 snapshots here, otherwise the replica would +-- only require 1 xlog and that case would be +-- undistingvishable from wrong operation. +for i = 1, 50 do s:auto_increment{} end +--- +... +box.snapshot() +--- +- ok +... +for i = 1, 50 do s:auto_increment{} end +--- +... +box.snapshot() --- +- ok ... -- Invoke garbage collection. Check that it doesn't remove -- xlogs needed by the replica. @@ -299,6 +313,16 @@ test_run:cmd("cleanup server replica") ... -- Invoke garbage collection. Check that it removes the old -- checkpoint, but keeps the xlog last used by the replica. +-- once again, need 2 snapshots because after 1 snapshot +-- with no insertions after it the replica would need only +-- 1 xlog, which is stored anyways. +_ = s:auto_increment{} +--- +... +box.snapshot() +--- +- ok +... _ = s:auto_increment{} --- ... diff --git a/test/replication/gc.test.lua b/test/replication/gc.test.lua index 2b9ab0cf0..710c99ea7 100644 --- a/test/replication/gc.test.lua +++ b/test/replication/gc.test.lua @@ -67,13 +67,20 @@ wait_gc(1) box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05) -- Send more data to the replica. -for i = 1, 100 do s:auto_increment{} end +-- Need to do 2 snapshots here, otherwise the replica would +-- only require 1 xlog and that case would be +-- undistingvishable from wrong operation. +for i = 1, 50 do s:auto_increment{} end +box.snapshot() +for i = 1, 50 do s:auto_increment{} end +box.snapshot() -- Invoke garbage collection. Check that it doesn't remove -- xlogs needed by the replica. box.snapshot() #box.info.gc().checkpoints == 1 or box.info.gc() #fio.glob('./master/*.xlog') == 2 or fio.listdir('./master') + -- Remove the timeout injection so that the replica catches -- up quickly. box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0) @@ -134,6 +141,11 @@ test_run:cmd("cleanup server replica") -- Invoke garbage collection. Check that it removes the old -- checkpoint, but keeps the xlog last used by the replica. +-- once again, need 2 snapshots because after 1 snapshot +-- with no insertions after it the replica would need only +-- 1 xlog, which is stored anyways. +_ = s:auto_increment{} +box.snapshot() _ = s:auto_increment{} box.snapshot() #box.info.gc().checkpoints == 1 or box.info.gc() -- 2.15.2 (Apple Git-101.1)