>Четверг, 28 июня 2018, 16:00 +03:00 от Vladimir Davydov : > >On Thu, Jun 28, 2018 at 03:09:08PM +0300, Serge Petrenko wrote: >> Garbage collection doesn't distinguish consumers which need checkpoint >> files, such as backup, and the ones, who only need WALS, such as >> replicas. A disconnected replica will 'hold' all checkpoint files, created >> after it got unsynchronised, even though it doesn't need them, which may >> lead to disk space shortage. To fix this, we store consumer's type, and >> treat consumers differently during garbage collection: now only the old >> WALS are stored for replicas, and old checkpoints are stored for backup, >> if any. Also changed the tests to check updated garbage collection correctly. >> >> Closes #3444 >> --- >> https://github.com/tarantool/tarantool/tree/sergepetrenko/gh-3444-remove-old-shapshots-for-replicas >> https://github.com/tarantool/tarantool/issues/3444 > >After fixing your code according to review comments, you should amend >your old commit, not make a new one (take a look at `git commit --amend` >and `git revert -i`), then send the amended patch to the mailing list. >Sometimes, when the change set is fairly small (which isn't the case >this time), you might want to send an incremental diff in reply to the >email with review remarks, but you should amend your branch anyway. >Please do. Also, see a few minor comments below. Sorry, all done. > >> >> Changes in v2: >> - prefixed variable names with prefix 'checkpoint_' >> instead of 'snap_', so that there is no >> confusion with memtx snapshots >> - same with changing variable name >> xlog_only to wal_only >> - rewrote gc_run so that there is only a single >> loop over checkpoints, and also one excess old >> WAL is removed (it was unneeded, but kept due >> to a mistake). Now wal_collect_garbage or >> engine_collect_garbage are called only if they >> have work to do. >> - fix tests to correctly check the amount of xlogs >> kept by garbage collection >> >> src/box/gc.c | 90 +++++++++++++++++++------------------------- >> src/box/gc.h | 10 ++--- >> test/replication/gc.result | 26 ++++++++++++- >> test/replication/gc.test.lua | 14 ++++++- >> 4 files changed, 80 insertions(+), 60 deletions(-) >> >> diff --git a/src/box/gc.c b/src/box/gc.c >> index 288cc7236..06d2fbd35 100644 >> --- a/src/box/gc.c >> +++ b/src/box/gc.c >> @@ -161,12 +161,12 @@ gc_free(void) >> latch_destroy(&gc.latch); >> } >> >> -/** Find the consumer that uses the oldest snapshot */ >> +/** Find the consumer that uses the oldest checkpoint */ > >Comment-style: a dot at the end of the sentence is missing. Fixed. > >> struct gc_consumer * >> -gc_first_snap(gc_tree_t *consumers) >> +gc_tree_first_checkpoint(gc_tree_t *consumers) >> { >> struct gc_consumer *consumer = gc_tree_first(consumers); >> - while (consumer != NULL && consumer->xlog_only) >> + while (consumer != NULL && consumer->wal_only) >> consumer = gc_tree_next(consumers, consumer); >> return consumer; >> } >> @@ -179,15 +179,16 @@ gc_run(void) >> >> /* Look up the consumer that uses the oldest WAL */ >> struct gc_consumer *leftmost = gc_tree_first(&gc.consumers); >> - /* Look up the consumer that uses the oldest snapshot. */ >> - struct gc_consumer *leftmost_snap = gc_first_snap(&gc.consumers); >> + /* Look up the consumer that uses the oldest checkpoint. */ >> + struct gc_consumer *leftmost_checkpoint = >> + gc_tree_first_checkpoint(&gc.consumers); > >Coding-style: when continuing an expression to the next line, indent it >with tabs, please: > >struct gc_consumer *leftmost_checkpoint = >gc_tree_first_checkpoint(&gc.consumers). Fixed. > >> >> /* >> * Find the oldest checkpoint that must be preserved. >> - * We have to maintain @checkpoint_count oldest snapshots, >> - * plus we can't remove snapshots that are still in use. >> + * We have to maintain @checkpoint_count oldest checkpoints, >> + * plus we can't remove checkpoints that are still in use. >> */ >> - int64_t gc_xlog_signature = -1; >> + int64_t gc_checkpoint_signature = -1; >> >> struct checkpoint_iterator checkpoints; >> checkpoint_iterator_init(&checkpoints); >> @@ -196,37 +197,20 @@ gc_run(void) >> while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) { >> if (--checkpoint_count > 0) >> continue; >> - if (leftmost != NULL && >> - leftmost->signature < vclock_sum(vclock)) >> + if (leftmost_checkpoint != NULL && >> + leftmost_checkpoint->signature < vclock_sum(vclock)) >> continue; >> - gc_xlog_signature = vclock_sum(vclock); >> + gc_checkpoint_signature = vclock_sum(vclock); >> break; >> } >> >> - int64_t gc_snap_signature = -1; >> - checkpoint_count = gc.checkpoint_count; >> + int64_t gc_wal_signature = MIN(gc_checkpoint_signature, leftmost != NULL ? >> + leftmost->signature : INT64_MAX); > >Ditto. Fixed. > >> >> - checkpoint_iterator_init(&checkpoints); >> - >> - while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) { >> - if (--checkpoint_count > 0) >> - continue; >> - if (leftmost_snap != NULL && >> - leftmost_snap->signature < vclock_sum(vclock)) >> - continue; >> - gc_snap_signature = vclock_sum(vclock); >> - break; >> - } >> - >> - if (gc_snap_signature <= gc.snap_signature && >> - gc_xlog_signature <= gc.xlog_signature) >> + if (gc_checkpoint_signature <= gc.checkpoint_signature && >> + gc_wal_signature <= gc.wal_signature) >> return; /* nothing to do */ >> >> - if (gc_snap_signature > gc.snap_signature) >> - gc.snap_signature = gc_snap_signature; >> - if (gc_xlog_signature > gc.xlog_signature) >> - gc.xlog_signature = gc_xlog_signature; >> - >> /* >> * Engine callbacks may sleep, because they use coio for >> * removing files. Make sure we won't try to remove the >> @@ -234,6 +218,7 @@ gc_run(void) >> * executions. >> */ >> latch_lock(&gc.latch); >> + >> /* >> * Run garbage collection. >> * > >Please try to avoid stray hunks, like this one, when preparing a patch. >They complicate review. Wym? > >> @@ -241,8 +226,17 @@ gc_run(void) >> * collection for memtx snapshots first and abort if it >> * fails - see comment to memtx_engine_collect_garbage(). >> */ >> - if (engine_collect_garbage(gc_snap_signature) == 0) >> - wal_collect_garbage(gc_xlog_signature); >> + int rc = 0; >> + >> + if (gc_checkpoint_signature > gc.checkpoint_signature) { >> + gc.checkpoint_signature = gc_checkpoint_signature; >> + rc = engine_collect_garbage(gc_checkpoint_signature); >> + } >> + if (gc_wal_signature > gc.wal_signature) { >> + gc.wal_signature = gc_wal_signature; >> + if (rc == 0) >> + wal_collect_garbage(gc_wal_signature); >> + } > >I guess, in case engine_collect_garbage() fails and we don't call >wal_collect_garbage(), we shouldn't advance gc.wal_signature: > >if (rc == 0 && gc_wal_signature > gc.wal_signature) { >gc.wal_signature = gc_wal_signature; >wal_collect_garbage(gc_wal_signature); >} That's how it was done previously. First update gc.signature, and only then try to run encgine_collect_garbage() and wal_collect_garbage(), so I decided to leave it as is. --- src/box/gc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/box/gc.c b/src/box/gc.c index 06d2fbd35..4d8f321ff 100644 --- a/src/box/gc.c +++ b/src/box/gc.c @@ -161,7 +161,7 @@ gc_free(void) latch_destroy(&gc.latch); } -/** Find the consumer that uses the oldest checkpoint */ +/** Find the consumer that uses the oldest checkpoint. */ struct gc_consumer * gc_tree_first_checkpoint(gc_tree_t *consumers) { @@ -181,7 +181,7 @@ gc_run(void) struct gc_consumer *leftmost = gc_tree_first(&gc.consumers); /* Look up the consumer that uses the oldest checkpoint. */ struct gc_consumer *leftmost_checkpoint = - gc_tree_first_checkpoint(&gc.consumers); + gc_tree_first_checkpoint(&gc.consumers); /* * Find the oldest checkpoint that must be preserved. @@ -198,17 +198,17 @@ gc_run(void) if (--checkpoint_count > 0) continue; if (leftmost_checkpoint != NULL && - leftmost_checkpoint->signature < vclock_sum(vclock)) + leftmost_checkpoint->signature < vclock_sum(vclock)) continue; gc_checkpoint_signature = vclock_sum(vclock); break; } int64_t gc_wal_signature = MIN(gc_checkpoint_signature, leftmost != NULL ? - leftmost->signature : INT64_MAX); + leftmost->signature : INT64_MAX); if (gc_checkpoint_signature <= gc.checkpoint_signature && - gc_wal_signature <= gc.wal_signature) + gc_wal_signature <= gc.wal_signature) return; /* nothing to do */ /* -- 2.15.2 (Apple Git-101.1) -- Best regards, Sergey Petrenko