Tarantool development patches archive
 help / color / mirror / Atom feed
* [PATCH v2] replication: remove old snapshot files not needed by replicas
@ 2018-06-28 12:09 Serge Petrenko
  2018-06-28 13:00 ` Vladimir Davydov
  0 siblings, 1 reply; 4+ messages in thread
From: Serge Petrenko @ 2018-06-28 12:09 UTC (permalink / raw)
  To: vdavydov.dev; +Cc: tarantool-patches, Serge Petrenko

Garbage collection doesn't distinguish consumers which need checkpoint
files, such as backup, and the ones, who only need WALS, such as
replicas. A disconnected replica will 'hold' all checkpoint files, created
after it got unsynchronised, even though it doesn't need them, which may
lead to disk space shortage. To fix this, we store consumer's type, and
treat consumers differently during garbage collection: now only the old
WALS are stored for replicas, and old checkpoints are stored for backup,
if any. Also changed the tests to check updated garbage collection correctly.

Closes #3444
---
https://github.com/tarantool/tarantool/tree/sergepetrenko/gh-3444-remove-old-shapshots-for-replicas
https://github.com/tarantool/tarantool/issues/3444

Changes in v2:
- prefixed variable names with prefix 'checkpoint_'
  instead of 'snap_', so that there is no
  confusion with memtx snapshots
- same with changing variable name
  xlog_only to wal_only
- rewrote gc_run so that there is only a single
  loop over checkpoints, and also one excess old
  WAL is removed (it was unneeded, but kept due
  to a mistake). Now wal_collect_garbage or
  engine_collect_garbage are called only if they
  have work to do.
- fix tests to correctly check the amount of xlogs
  kept by garbage collection

 src/box/gc.c                 | 90 +++++++++++++++++++-------------------------
 src/box/gc.h                 | 10 ++---
 test/replication/gc.result   | 26 ++++++++++++-
 test/replication/gc.test.lua | 14 ++++++-
 4 files changed, 80 insertions(+), 60 deletions(-)

diff --git a/src/box/gc.c b/src/box/gc.c
index 288cc7236..06d2fbd35 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -61,8 +61,8 @@ struct gc_consumer {
 	char *name;
 	/** The vclock signature tracked by this consumer. */
 	int64_t signature;
-	/** The flag indicating that consumer only consumes xlog files. */
-	bool xlog_only;
+	/** The flag indicating that consumer only consumes WAL files. */
+	bool wal_only;
 };
 
 typedef rb_tree(struct gc_consumer) gc_tree_t;
@@ -72,9 +72,9 @@ struct gc_state {
 	/** Number of checkpoints to maintain. */
 	int checkpoint_count;
 	/** Max signature WAL garbage collection has been called for. */
-	int64_t xlog_signature;
-	/** Max signature snapshot garbage collection has been called for. */
-	int64_t snap_signature;
+	int64_t wal_signature;
+	/** Max signature checkpoint garbage collection has been called for. */
+	int64_t checkpoint_signature;
 	/** Registered consumers, linked by gc_consumer::node. */
 	gc_tree_t consumers;
 	/**
@@ -108,7 +108,7 @@ rb_gen(MAYBE_UNUSED static inline, gc_tree_, gc_tree_t,
 
 /** Allocate a consumer object. */
 static struct gc_consumer *
-gc_consumer_new(const char *name, int64_t signature, bool xlog_only)
+gc_consumer_new(const char *name, int64_t signature, bool wal_only)
 {
 	struct gc_consumer *consumer = calloc(1, sizeof(*consumer));
 	if (consumer == NULL) {
@@ -124,7 +124,7 @@ gc_consumer_new(const char *name, int64_t signature, bool xlog_only)
 		return NULL;
 	}
 	consumer->signature = signature;
-	consumer->xlog_only = xlog_only;
+	consumer->wal_only = wal_only;
 	return consumer;
 }
 
@@ -140,8 +140,8 @@ gc_consumer_delete(struct gc_consumer *consumer)
 void
 gc_init(void)
 {
-	gc.xlog_signature = -1;
-	gc.snap_signature = -1;
+	gc.wal_signature = -1;
+	gc.checkpoint_signature = -1;
 	gc_tree_new(&gc.consumers);
 	latch_create(&gc.latch);
 }
@@ -161,12 +161,12 @@ gc_free(void)
 	latch_destroy(&gc.latch);
 }
 
-/** Find the consumer that uses the oldest snapshot */
+/** Find the consumer that uses the oldest checkpoint */
 struct gc_consumer *
-gc_first_snap(gc_tree_t *consumers)
+gc_tree_first_checkpoint(gc_tree_t *consumers)
 {
 	struct gc_consumer *consumer = gc_tree_first(consumers);
-	while (consumer != NULL && consumer->xlog_only)
+	while (consumer != NULL && consumer->wal_only)
 		consumer = gc_tree_next(consumers, consumer);
 	return consumer;
 }
@@ -179,15 +179,16 @@ gc_run(void)
 
 	/* Look up the consumer that uses the oldest WAL */
 	struct gc_consumer *leftmost = gc_tree_first(&gc.consumers);
-	/* Look up the consumer that uses the oldest snapshot. */
-	struct gc_consumer *leftmost_snap = gc_first_snap(&gc.consumers);
+	/* Look up the consumer that uses the oldest checkpoint. */
+	struct gc_consumer *leftmost_checkpoint =
+	  gc_tree_first_checkpoint(&gc.consumers);
 
 	/*
 	 * Find the oldest checkpoint that must be preserved.
-	 * We have to maintain @checkpoint_count oldest snapshots,
-	 * plus we can't remove snapshots that are still in use.
+	 * We have to maintain @checkpoint_count oldest checkpoints,
+	 * plus we can't remove checkpoints that are still in use.
 	 */
-	int64_t gc_xlog_signature = -1;
+	int64_t gc_checkpoint_signature = -1;
 
 	struct checkpoint_iterator checkpoints;
 	checkpoint_iterator_init(&checkpoints);
@@ -196,37 +197,20 @@ gc_run(void)
 	while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) {
 		if (--checkpoint_count > 0)
 			continue;
-		if (leftmost != NULL &&
-		    leftmost->signature < vclock_sum(vclock))
+		if (leftmost_checkpoint != NULL &&
+		    leftmost_checkpoint->signature < vclock_sum(vclock))
 			continue;
-		gc_xlog_signature = vclock_sum(vclock);
+		gc_checkpoint_signature = vclock_sum(vclock);
 		break;
 	}
 
-	int64_t gc_snap_signature = -1;
-	checkpoint_count = gc.checkpoint_count;
+	int64_t gc_wal_signature = MIN(gc_checkpoint_signature, leftmost != NULL ?
+	  leftmost->signature : INT64_MAX);
 
-	checkpoint_iterator_init(&checkpoints);
-
-	while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) {
-		if (--checkpoint_count > 0)
-			continue;
-		if (leftmost_snap != NULL &&
-		    leftmost_snap->signature < vclock_sum(vclock))
-			continue;
-		gc_snap_signature = vclock_sum(vclock);
-		break;
-	}
-
-	if (gc_snap_signature <= gc.snap_signature &&
-	    gc_xlog_signature <= gc.xlog_signature)
+	if (gc_checkpoint_signature <= gc.checkpoint_signature &&
+	    gc_wal_signature <= gc.wal_signature)
 		return; /* nothing to do */
 
-	if (gc_snap_signature > gc.snap_signature)
-		gc.snap_signature = gc_snap_signature;
-	if (gc_xlog_signature > gc.xlog_signature)
-		gc.xlog_signature = gc_xlog_signature;
-
 	/*
 	 * Engine callbacks may sleep, because they use coio for
 	 * removing files. Make sure we won't try to remove the
@@ -234,6 +218,7 @@ gc_run(void)
 	 * executions.
 	 */
 	latch_lock(&gc.latch);
+
 	/*
 	 * Run garbage collection.
 	 *
@@ -241,8 +226,17 @@ gc_run(void)
 	 * collection for memtx snapshots first and abort if it
 	 * fails - see comment to memtx_engine_collect_garbage().
 	 */
-	if (engine_collect_garbage(gc_snap_signature) == 0)
-		wal_collect_garbage(gc_xlog_signature);
+	int rc = 0;
+
+	if (gc_checkpoint_signature > gc.checkpoint_signature) {
+		gc.checkpoint_signature = gc_checkpoint_signature;
+		rc = engine_collect_garbage(gc_checkpoint_signature);
+	}
+	if (gc_wal_signature > gc.wal_signature) {
+		gc.wal_signature = gc_wal_signature;
+		if (rc == 0)
+			wal_collect_garbage(gc_wal_signature);
+	}
 
 	latch_unlock(&gc.latch);
 }
@@ -254,9 +248,9 @@ gc_set_checkpoint_count(int checkpoint_count)
 }
 
 struct gc_consumer *
-gc_consumer_register(const char *name, int64_t signature, bool xlog_only)
+gc_consumer_register(const char *name, int64_t signature, bool wal_only)
 {
-	struct gc_consumer *consumer = gc_consumer_new(name, signature, xlog_only);
+	struct gc_consumer *consumer = gc_consumer_new(name, signature, wal_only);
 	if (consumer != NULL)
 		gc_tree_insert(&gc.consumers, consumer);
 	return consumer;
@@ -324,12 +318,6 @@ gc_consumer_signature(const struct gc_consumer *consumer)
 	return consumer->signature;
 }
 
-bool
-gc_consumer_xlog_only(const struct gc_consumer *consumer)
-{
-	return consumer->xlog_only;
-}
-
 struct gc_consumer *
 gc_consumer_iterator_next(struct gc_consumer_iterator *it)
 {
diff --git a/src/box/gc.h b/src/box/gc.h
index c9a1d6558..36edd7740 100644
--- a/src/box/gc.h
+++ b/src/box/gc.h
@@ -75,14 +75,14 @@ gc_set_checkpoint_count(int checkpoint_count);
  * @signature until the consumer is unregistered or advanced.
  * @name is a human-readable name of the consumer, it will
  * be used for reporting the consumer to the user.
- * @xlog_only is a flag reporting whether consumer only consumes
- * xlog files.
+ * @wal_only is a flag reporting whether consumer only depends
+ * on WAL files.
  *
  * Returns a pointer to the new consumer object or NULL on
  * memory allocation failure.
  */
 struct gc_consumer *
-gc_consumer_register(const char *name, int64_t signature, bool xlog_only);
+gc_consumer_register(const char *name, int64_t signature, bool wal_only);
 
 /**
  * Unregister a consumer and invoke garbage collection
@@ -106,10 +106,6 @@ gc_consumer_name(const struct gc_consumer *consumer);
 int64_t
 gc_consumer_signature(const struct gc_consumer *consumer);
 
-/** Return whether consumer only consumes xlog files. */
-bool
-gc_consumer_xlog_only(const struct gc_consumer *consumer);
-
 /**
  * Iterator over registered consumers. The iterator is valid
  * as long as the caller doesn't yield.
diff --git a/test/replication/gc.result b/test/replication/gc.result
index adbe04ca2..084530e8a 100644
--- a/test/replication/gc.result
+++ b/test/replication/gc.result
@@ -129,8 +129,22 @@ box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
 - ok
 ...
 -- Send more data to the replica.
-for i = 1, 100 do s:auto_increment{} end
+-- Need to do 2 snapshots here, otherwise the replica would
+-- only require 1 xlog and that case would be
+-- undistingvishable from wrong operation.
+for i = 1, 50 do s:auto_increment{} end
+---
+...
+box.snapshot()
+---
+- ok
+...
+for i = 1, 50 do s:auto_increment{} end
+---
+...
+box.snapshot()
 ---
+- ok
 ...
 -- Invoke garbage collection. Check that it doesn't remove
 -- xlogs needed by the replica.
@@ -299,6 +313,16 @@ test_run:cmd("cleanup server replica")
 ...
 -- Invoke garbage collection. Check that it removes the old
 -- checkpoint, but keeps the xlog last used by the replica.
+-- once again, need 2 snapshots because after 1 snapshot
+-- with no insertions after it the replica would need only
+-- 1 xlog, which is stored anyways.
+_ = s:auto_increment{}
+---
+...
+box.snapshot()
+---
+- ok
+...
 _ = s:auto_increment{}
 ---
 ...
diff --git a/test/replication/gc.test.lua b/test/replication/gc.test.lua
index 2b9ab0cf0..710c99ea7 100644
--- a/test/replication/gc.test.lua
+++ b/test/replication/gc.test.lua
@@ -67,13 +67,20 @@ wait_gc(1)
 box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.05)
 
 -- Send more data to the replica.
-for i = 1, 100 do s:auto_increment{} end
+-- Need to do 2 snapshots here, otherwise the replica would
+-- only require 1 xlog and that case would be
+-- undistingvishable from wrong operation.
+for i = 1, 50 do s:auto_increment{} end
+box.snapshot()
+for i = 1, 50 do s:auto_increment{} end
+box.snapshot()
 
 -- Invoke garbage collection. Check that it doesn't remove
 -- xlogs needed by the replica.
 box.snapshot()
 #box.info.gc().checkpoints == 1 or box.info.gc()
 #fio.glob('./master/*.xlog') == 2 or fio.listdir('./master')
+
 -- Remove the timeout injection so that the replica catches
 -- up quickly.
 box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0)
@@ -134,6 +141,11 @@ test_run:cmd("cleanup server replica")
 
 -- Invoke garbage collection. Check that it removes the old
 -- checkpoint, but keeps the xlog last used by the replica.
+-- once again, need 2 snapshots because after 1 snapshot
+-- with no insertions after it the replica would need only
+-- 1 xlog, which is stored anyways.
+_ = s:auto_increment{}
+box.snapshot()
 _ = s:auto_increment{}
 box.snapshot()
 #box.info.gc().checkpoints == 1 or box.info.gc()
-- 
2.15.2 (Apple Git-101.1)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] replication: remove old snapshot files not needed by replicas
  2018-06-28 12:09 [PATCH v2] replication: remove old snapshot files not needed by replicas Serge Petrenko
@ 2018-06-28 13:00 ` Vladimir Davydov
  2018-06-28 14:45   ` Re[2]: " Sergey Petrenko
  0 siblings, 1 reply; 4+ messages in thread
From: Vladimir Davydov @ 2018-06-28 13:00 UTC (permalink / raw)
  To: Serge Petrenko; +Cc: tarantool-patches

On Thu, Jun 28, 2018 at 03:09:08PM +0300, Serge Petrenko wrote:
> Garbage collection doesn't distinguish consumers which need checkpoint
> files, such as backup, and the ones, who only need WALS, such as
> replicas. A disconnected replica will 'hold' all checkpoint files, created
> after it got unsynchronised, even though it doesn't need them, which may
> lead to disk space shortage. To fix this, we store consumer's type, and
> treat consumers differently during garbage collection: now only the old
> WALS are stored for replicas, and old checkpoints are stored for backup,
> if any. Also changed the tests to check updated garbage collection correctly.
> 
> Closes #3444
> ---
> https://github.com/tarantool/tarantool/tree/sergepetrenko/gh-3444-remove-old-shapshots-for-replicas
> https://github.com/tarantool/tarantool/issues/3444

After fixing your code according to review comments, you should amend
your old commit, not make a new one (take a look at `git commit --amend`
and `git revert -i`), then send the amended patch to the mailing list.
Sometimes, when the change set is fairly small (which isn't the case
this time), you might want to send an incremental diff in reply to the
email with review remarks, but you should amend your branch anyway.
Please do. Also, see a few minor comments below.

> 
> Changes in v2:
> - prefixed variable names with prefix 'checkpoint_'
>   instead of 'snap_', so that there is no
>   confusion with memtx snapshots
> - same with changing variable name
>   xlog_only to wal_only
> - rewrote gc_run so that there is only a single
>   loop over checkpoints, and also one excess old
>   WAL is removed (it was unneeded, but kept due
>   to a mistake). Now wal_collect_garbage or
>   engine_collect_garbage are called only if they
>   have work to do.
> - fix tests to correctly check the amount of xlogs
>   kept by garbage collection
> 
>  src/box/gc.c                 | 90 +++++++++++++++++++-------------------------
>  src/box/gc.h                 | 10 ++---
>  test/replication/gc.result   | 26 ++++++++++++-
>  test/replication/gc.test.lua | 14 ++++++-
>  4 files changed, 80 insertions(+), 60 deletions(-)
> 
> diff --git a/src/box/gc.c b/src/box/gc.c
> index 288cc7236..06d2fbd35 100644
> --- a/src/box/gc.c
> +++ b/src/box/gc.c
> @@ -161,12 +161,12 @@ gc_free(void)
>  	latch_destroy(&gc.latch);
>  }
>  
> -/** Find the consumer that uses the oldest snapshot */
> +/** Find the consumer that uses the oldest checkpoint */

Comment-style: a dot at the end of the sentence is missing.

>  struct gc_consumer *
> -gc_first_snap(gc_tree_t *consumers)
> +gc_tree_first_checkpoint(gc_tree_t *consumers)
>  {
>  	struct gc_consumer *consumer = gc_tree_first(consumers);
> -	while (consumer != NULL && consumer->xlog_only)
> +	while (consumer != NULL && consumer->wal_only)
>  		consumer = gc_tree_next(consumers, consumer);
>  	return consumer;
>  }
> @@ -179,15 +179,16 @@ gc_run(void)
>  
>  	/* Look up the consumer that uses the oldest WAL */
>  	struct gc_consumer *leftmost = gc_tree_first(&gc.consumers);
> -	/* Look up the consumer that uses the oldest snapshot. */
> -	struct gc_consumer *leftmost_snap = gc_first_snap(&gc.consumers);
> +	/* Look up the consumer that uses the oldest checkpoint. */
> +	struct gc_consumer *leftmost_checkpoint =
> +	  gc_tree_first_checkpoint(&gc.consumers);

Coding-style: when continuing an expression to the next line, indent it
with tabs, please:

	struct gc_consumer *leftmost_checkpoint =
		gc_tree_first_checkpoint(&gc.consumers).

>  
>  	/*
>  	 * Find the oldest checkpoint that must be preserved.
> -	 * We have to maintain @checkpoint_count oldest snapshots,
> -	 * plus we can't remove snapshots that are still in use.
> +	 * We have to maintain @checkpoint_count oldest checkpoints,
> +	 * plus we can't remove checkpoints that are still in use.
>  	 */
> -	int64_t gc_xlog_signature = -1;
> +	int64_t gc_checkpoint_signature = -1;
>  
>  	struct checkpoint_iterator checkpoints;
>  	checkpoint_iterator_init(&checkpoints);
> @@ -196,37 +197,20 @@ gc_run(void)
>  	while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) {
>  		if (--checkpoint_count > 0)
>  			continue;
> -		if (leftmost != NULL &&
> -		    leftmost->signature < vclock_sum(vclock))
> +		if (leftmost_checkpoint != NULL &&
> +		    leftmost_checkpoint->signature < vclock_sum(vclock))
>  			continue;
> -		gc_xlog_signature = vclock_sum(vclock);
> +		gc_checkpoint_signature = vclock_sum(vclock);
>  		break;
>  	}
>  
> -	int64_t gc_snap_signature = -1;
> -	checkpoint_count = gc.checkpoint_count;
> +	int64_t gc_wal_signature = MIN(gc_checkpoint_signature, leftmost != NULL ?
> +	  leftmost->signature : INT64_MAX);

Ditto.

>  
> -	checkpoint_iterator_init(&checkpoints);
> -
> -	while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) {
> -		if (--checkpoint_count > 0)
> -			continue;
> -		if (leftmost_snap != NULL &&
> -		    leftmost_snap->signature < vclock_sum(vclock))
> -			continue;
> -		gc_snap_signature = vclock_sum(vclock);
> -		break;
> -	}
> -
> -	if (gc_snap_signature <= gc.snap_signature &&
> -	    gc_xlog_signature <= gc.xlog_signature)
> +	if (gc_checkpoint_signature <= gc.checkpoint_signature &&
> +	    gc_wal_signature <= gc.wal_signature)
>  		return; /* nothing to do */
>  
> -	if (gc_snap_signature > gc.snap_signature)
> -		gc.snap_signature = gc_snap_signature;
> -	if (gc_xlog_signature > gc.xlog_signature)
> -		gc.xlog_signature = gc_xlog_signature;
> -
>  	/*
>  	 * Engine callbacks may sleep, because they use coio for
>  	 * removing files. Make sure we won't try to remove the
> @@ -234,6 +218,7 @@ gc_run(void)
>  	 * executions.
>  	 */
>  	latch_lock(&gc.latch);
> +
>  	/*
>  	 * Run garbage collection.
>  	 *

Please try to avoid stray hunks, like this one, when preparing a patch.
They complicate review.

> @@ -241,8 +226,17 @@ gc_run(void)
>  	 * collection for memtx snapshots first and abort if it
>  	 * fails - see comment to memtx_engine_collect_garbage().
>  	 */
> -	if (engine_collect_garbage(gc_snap_signature) == 0)
> -		wal_collect_garbage(gc_xlog_signature);
> +	int rc = 0;
> +
> +	if (gc_checkpoint_signature > gc.checkpoint_signature) {
> +		gc.checkpoint_signature = gc_checkpoint_signature;
> +		rc = engine_collect_garbage(gc_checkpoint_signature);
> +	}
> +	if (gc_wal_signature > gc.wal_signature) {
> +		gc.wal_signature = gc_wal_signature;
> +		if (rc == 0)
> +			wal_collect_garbage(gc_wal_signature);
> +	}

I guess, in case engine_collect_garbage() fails and we don't call
wal_collect_garbage(), we shouldn't advance gc.wal_signature:

	if (rc == 0 && gc_wal_signature > gc.wal_signature) {
		gc.wal_signature = gc_wal_signature;
		wal_collect_garbage(gc_wal_signature);
	}

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re[2]: [PATCH v2] replication: remove old snapshot files not needed by replicas
  2018-06-28 13:00 ` Vladimir Davydov
@ 2018-06-28 14:45   ` Sergey Petrenko
  2018-06-28 15:18     ` Vladimir Davydov
  0 siblings, 1 reply; 4+ messages in thread
From: Sergey Petrenko @ 2018-06-28 14:45 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: tarantool-patches

[-- Attachment #1: Type: text/plain, Size: 8681 bytes --]


>Четверг, 28 июня 2018, 16:00 +03:00 от Vladimir Davydov <vdavydov.dev@gmail.com>:
>
>On Thu, Jun 28, 2018 at 03:09:08PM +0300, Serge Petrenko wrote:
>> Garbage collection doesn't distinguish consumers which need checkpoint
>> files, such as backup, and the ones, who only need WALS, such as
>> replicas. A disconnected replica will 'hold' all checkpoint files, created
>> after it got unsynchronised, even though it doesn't need them, which may
>> lead to disk space shortage. To fix this, we store consumer's type, and
>> treat consumers differently during garbage collection: now only the old
>> WALS are stored for replicas, and old checkpoints are stored for backup,
>> if any. Also changed the tests to check updated garbage collection correctly.
>> 
>> Closes #3444
>> ---
>>  https://github.com/tarantool/tarantool/tree/sergepetrenko/gh-3444-remove-old-shapshots-for-replicas
>>  https://github.com/tarantool/tarantool/issues/3444
>
>After fixing your code according to review comments, you should amend
>your old commit, not make a new one (take a look at `git commit --amend`
>and `git revert -i`), then send the amended patch to the mailing list.
>Sometimes, when the change set is fairly small (which isn't the case
>this time), you might want to send an incremental diff in reply to the
>email with review remarks, but you should amend your branch anyway.
>Please do. Also, see a few minor comments below. 
Sorry, all done.
>
>> 
>> Changes in v2:
>> - prefixed variable names with prefix 'checkpoint_'
>>   instead of 'snap_', so that there is no
>>   confusion with memtx snapshots
>> - same with changing variable name
>>   xlog_only to wal_only
>> - rewrote gc_run so that there is only a single
>>   loop over checkpoints, and also one excess old
>>   WAL is removed (it was unneeded, but kept due
>>   to a mistake). Now wal_collect_garbage or
>>   engine_collect_garbage are called only if they
>>   have work to do.
>> - fix tests to correctly check the amount of xlogs
>>   kept by garbage collection
>> 
>>  src/box/gc.c                 | 90 +++++++++++++++++++-------------------------
>>  src/box/gc.h                 | 10 ++---
>>  test/replication/gc.result   | 26 ++++++++++++-
>>  test/replication/gc.test.lua | 14 ++++++-
>>  4 files changed, 80 insertions(+), 60 deletions(-)
>> 
>> diff --git a/src/box/gc.c b/src/box/gc.c
>> index 288cc7236..06d2fbd35 100644
>> --- a/src/box/gc.c
>> +++ b/src/box/gc.c
>> @@ -161,12 +161,12 @@ gc_free(void)
>>  	latch_destroy(&gc.latch);
>>  }
>> 
>> -/** Find the consumer that uses the oldest snapshot */
>> +/** Find the consumer that uses the oldest checkpoint */
>
>Comment-style: a dot at the end of the sentence is missing. 
Fixed.
>
>>  struct gc_consumer *
>> -gc_first_snap(gc_tree_t *consumers)
>> +gc_tree_first_checkpoint(gc_tree_t *consumers)
>>  {
>>  	struct gc_consumer *consumer = gc_tree_first(consumers);
>> -	while (consumer != NULL && consumer->xlog_only)
>> +	while (consumer != NULL && consumer->wal_only)
>>  		consumer = gc_tree_next(consumers, consumer);
>>  	return consumer;
>>  }
>> @@ -179,15 +179,16 @@ gc_run(void)
>> 
>>  	/* Look up the consumer that uses the oldest WAL */
>>  	struct gc_consumer *leftmost = gc_tree_first(&gc.consumers);
>> -	/* Look up the consumer that uses the oldest snapshot. */
>> -	struct gc_consumer *leftmost_snap = gc_first_snap(&gc.consumers);
>> +	/* Look up the consumer that uses the oldest checkpoint. */
>> +	struct gc_consumer *leftmost_checkpoint =
>> +	  gc_tree_first_checkpoint(&gc.consumers);
>
>Coding-style: when continuing an expression to the next line, indent it
>with tabs, please:
>
>struct gc_consumer *leftmost_checkpoint =
>gc_tree_first_checkpoint(&gc.consumers). 
Fixed.
>
>> 
>>  	/*
>>  	 * Find the oldest checkpoint that must be preserved.
>> -	 * We have to maintain @checkpoint_count oldest snapshots,
>> -	 * plus we can't remove snapshots that are still in use.
>> +	 * We have to maintain @checkpoint_count oldest checkpoints,
>> +	 * plus we can't remove checkpoints that are still in use.
>>  	 */
>> -	int64_t gc_xlog_signature = -1;
>> +	int64_t gc_checkpoint_signature = -1;
>> 
>>  	struct checkpoint_iterator checkpoints;
>>  	checkpoint_iterator_init(&checkpoints);
>> @@ -196,37 +197,20 @@ gc_run(void)
>>  	while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) {
>>  		if (--checkpoint_count > 0)
>>  			continue;
>> -		if (leftmost != NULL &&
>> -		    leftmost->signature < vclock_sum(vclock))
>> +		if (leftmost_checkpoint != NULL &&
>> +		    leftmost_checkpoint->signature < vclock_sum(vclock))
>>  			continue;
>> -		gc_xlog_signature = vclock_sum(vclock);
>> +		gc_checkpoint_signature = vclock_sum(vclock);
>>  		break;
>>  	}
>> 
>> -	int64_t gc_snap_signature = -1;
>> -	checkpoint_count = gc.checkpoint_count;
>> +	int64_t gc_wal_signature = MIN(gc_checkpoint_signature, leftmost != NULL ?
>> +	  leftmost->signature : INT64_MAX);
>
>Ditto. 
Fixed.
>
>> 
>> -	checkpoint_iterator_init(&checkpoints);
>> -
>> -	while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) {
>> -		if (--checkpoint_count > 0)
>> -			continue;
>> -		if (leftmost_snap != NULL &&
>> -		    leftmost_snap->signature < vclock_sum(vclock))
>> -			continue;
>> -		gc_snap_signature = vclock_sum(vclock);
>> -		break;
>> -	}
>> -
>> -	if (gc_snap_signature <= gc.snap_signature &&
>> -	    gc_xlog_signature <= gc.xlog_signature)
>> +	if (gc_checkpoint_signature <= gc.checkpoint_signature &&
>> +	    gc_wal_signature <= gc.wal_signature)
>>  		return; /* nothing to do */
>> 
>> -	if (gc_snap_signature > gc.snap_signature)
>> -		gc.snap_signature = gc_snap_signature;
>> -	if (gc_xlog_signature > gc.xlog_signature)
>> -		gc.xlog_signature = gc_xlog_signature;
>> -
>>  	/*
>>  	 * Engine callbacks may sleep, because they use coio for
>>  	 * removing files. Make sure we won't try to remove the
>> @@ -234,6 +218,7 @@ gc_run(void)
>>  	 * executions.
>>  	 */
>>  	latch_lock(&gc.latch);
>> +
>>  	/*
>>  	 * Run garbage collection.
>>  	 *
>
>Please try to avoid stray hunks, like this one, when preparing a patch.
>They complicate review. 
Wym?
>
>> @@ -241,8 +226,17 @@ gc_run(void)
>>  	 * collection for memtx snapshots first and abort if it
>>  	 * fails - see comment to memtx_engine_collect_garbage().
>>  	 */
>> -	if (engine_collect_garbage(gc_snap_signature) == 0)
>> -		wal_collect_garbage(gc_xlog_signature);
>> +	int rc = 0;
>> +
>> +	if (gc_checkpoint_signature > gc.checkpoint_signature) {
>> +		gc.checkpoint_signature = gc_checkpoint_signature;
>> +		rc = engine_collect_garbage(gc_checkpoint_signature);
>> +	}
>> +	if (gc_wal_signature > gc.wal_signature) {
>> +		gc.wal_signature = gc_wal_signature;
>> +		if (rc == 0)
>> +			wal_collect_garbage(gc_wal_signature);
>> +	}
>
>I guess, in case engine_collect_garbage() fails and we don't call
>wal_collect_garbage(), we shouldn't advance gc.wal_signature:
>
>if (rc == 0 && gc_wal_signature > gc.wal_signature) {
>gc.wal_signature = gc_wal_signature;
>wal_collect_garbage(gc_wal_signature);
>}
That's how it was done previously. First update gc.signature, and only then
try to run encgine_collect_garbage() and wal_collect_garbage(), so I decided
to leave it as is.

---
src/box/gc.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/box/gc.c b/src/box/gc.c
index 06d2fbd35..4d8f321ff 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -161,7 +161,7 @@ gc_free(void)
latch_destroy(&gc.latch);
}

-/** Find the consumer that uses the oldest checkpoint */
+/** Find the consumer that uses the oldest checkpoint. */
struct gc_consumer *
gc_tree_first_checkpoint(gc_tree_t *consumers)
{
@@ -181,7 +181,7 @@ gc_run(void)
struct gc_consumer *leftmost = gc_tree_first(&gc.consumers);
/* Look up the consumer that uses the oldest checkpoint. */
struct gc_consumer *leftmost_checkpoint =
- gc_tree_first_checkpoint(&gc.consumers);
+ gc_tree_first_checkpoint(&gc.consumers);

/*
* Find the oldest checkpoint that must be preserved.
@@ -198,17 +198,17 @@ gc_run(void)
if (--checkpoint_count > 0)
continue;
if (leftmost_checkpoint != NULL &&
- leftmost_checkpoint->signature < vclock_sum(vclock))
+ leftmost_checkpoint->signature < vclock_sum(vclock))
continue;
gc_checkpoint_signature = vclock_sum(vclock);
break;
}

int64_t gc_wal_signature = MIN(gc_checkpoint_signature, leftmost != NULL ?
- leftmost->signature : INT64_MAX);
+ leftmost->signature : INT64_MAX);

if (gc_checkpoint_signature <= gc.checkpoint_signature &&
- gc_wal_signature <= gc.wal_signature)
+ gc_wal_signature <= gc.wal_signature)
return; /* nothing to do */

/*
-- 
2.15.2 (Apple Git-101.1)

-- 
Best regards,
Sergey Petrenko

[-- Attachment #2: Type: text/html, Size: 11681 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] replication: remove old snapshot files not needed by replicas
  2018-06-28 14:45   ` Re[2]: " Sergey Petrenko
@ 2018-06-28 15:18     ` Vladimir Davydov
  0 siblings, 0 replies; 4+ messages in thread
From: Vladimir Davydov @ 2018-06-28 15:18 UTC (permalink / raw)
  To: Sergey Petrenko; +Cc: tarantool-patches

On Thu, Jun 28, 2018 at 05:45:12PM +0300, Sergey Petrenko wrote:
> >> @@ -234,6 +218,7 @@ gc_run(void)
> >>  	 * executions.
> >>  	 */
> >>  	latch_lock(&gc.latch);
> >> +
> >>  	/*
> >>  	 * Run garbage collection.
> >>  	 *
> >
> >Please try to avoid stray hunks, like this one, when preparing a patch.
> >They complicate review. 
> Wym?

I mean, before submitting your patch, please self review it and make
sure, there's no changes that are irrelevant (or not necessary) and that
add new @@ sections to the patch. Please don't add/remove extra lines or
fix comments or fix coding-style if this adds new hunks (@@) to your
patch.

> >
> >> @@ -241,8 +226,17 @@ gc_run(void)
> >>  	 * collection for memtx snapshots first and abort if it
> >>  	 * fails - see comment to memtx_engine_collect_garbage().
> >>  	 */
> >> -	if (engine_collect_garbage(gc_snap_signature) == 0)
> >> -		wal_collect_garbage(gc_xlog_signature);
> >> +	int rc = 0;
> >> +
> >> +	if (gc_checkpoint_signature > gc.checkpoint_signature) {
> >> +		gc.checkpoint_signature = gc_checkpoint_signature;
> >> +		rc = engine_collect_garbage(gc_checkpoint_signature);
> >> +	}
> >> +	if (gc_wal_signature > gc.wal_signature) {
> >> +		gc.wal_signature = gc_wal_signature;
> >> +		if (rc == 0)
> >> +			wal_collect_garbage(gc_wal_signature);
> >> +	}
> >
> >I guess, in case engine_collect_garbage() fails and we don't call
> >wal_collect_garbage(), we shouldn't advance gc.wal_signature:
> >
> >if (rc == 0 && gc_wal_signature > gc.wal_signature) {
> >gc.wal_signature = gc_wal_signature;
> >wal_collect_garbage(gc_wal_signature);
> >}
> That's how it was done previously. First update gc.signature, and only then
> try to run encgine_collect_garbage() and wal_collect_garbage(), so I decided
> to leave it as is.

OK.

Your mail agent mangles indentation so that the patch you pasted
below is unreadable. Please install a patch-friendly mail agent,
e.g. Thunderbird.

Regarding indentation, I guess I didn't make myself quite clear: if
there's a parenthesis, try to keep the code aligned to it using both
tabs and spaces, otherwise use only tabs, e.g.

	if (long_condition_one &&
	    another_condition)
		do_something();

but

	struct long_type_name long_var_name =
		initializer_function();

Rule of thumb: look at the code around you and follow it closely.

Also, try to fit your code in 80 chars.

I fixed indentation by myself on the branch. The patch looks good to me
now. Please send v3 (no changes, just git-send-email + don't forget to
add changelog) so that I can ack it and Kostja can review it.

> 
> ---
> src/box/gc.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
> diff --git a/src/box/gc.c b/src/box/gc.c
> index 06d2fbd35..4d8f321ff 100644
> --- a/src/box/gc.c
> +++ b/src/box/gc.c
> @@ -161,7 +161,7 @@ gc_free(void)
> latch_destroy(&gc.latch);
> }
> 
> -/** Find the consumer that uses the oldest checkpoint */
> +/** Find the consumer that uses the oldest checkpoint. */
> struct gc_consumer *
> gc_tree_first_checkpoint(gc_tree_t *consumers)
> {
> @@ -181,7 +181,7 @@ gc_run(void)
> struct gc_consumer *leftmost = gc_tree_first(&gc.consumers);
> /* Look up the consumer that uses the oldest checkpoint. */
> struct gc_consumer *leftmost_checkpoint =
> - gc_tree_first_checkpoint(&gc.consumers);
> + gc_tree_first_checkpoint(&gc.consumers);
> 
> /*
> * Find the oldest checkpoint that must be preserved.
> @@ -198,17 +198,17 @@ gc_run(void)
> if (--checkpoint_count > 0)
> continue;
> if (leftmost_checkpoint != NULL &&
> - leftmost_checkpoint->signature < vclock_sum(vclock))
> + leftmost_checkpoint->signature < vclock_sum(vclock))
> continue;
> gc_checkpoint_signature = vclock_sum(vclock);
> break;
> }
> 
> int64_t gc_wal_signature = MIN(gc_checkpoint_signature, leftmost != NULL ?
> - leftmost->signature : INT64_MAX);
> + leftmost->signature : INT64_MAX);
> 
> if (gc_checkpoint_signature <= gc.checkpoint_signature &&
> - gc_wal_signature <= gc.wal_signature)
> + gc_wal_signature <= gc.wal_signature)
> return; /* nothing to do */

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-06-28 15:18 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-28 12:09 [PATCH v2] replication: remove old snapshot files not needed by replicas Serge Petrenko
2018-06-28 13:00 ` Vladimir Davydov
2018-06-28 14:45   ` Re[2]: " Sergey Petrenko
2018-06-28 15:18     ` Vladimir Davydov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox