[tarantool-patches] [PATCH] replication: remove old snapshot files not needed by replicas

Vladimir Davydov vdavydov.dev at gmail.com
Thu Jun 28 11:58:33 MSK 2018


On Wed, Jun 27, 2018 at 05:08:22PM +0300, Serge Petrenko wrote:
> Closes #3444

Commit message body should give a detailed description what and why
you're doing here, see

  https://tarantool.io/en/doc/1.9/dev_guide/developer_guidelines/#how-to-write-a-commit-message

Please write one.

> 
> branch: https://github.com/tarantool/tarantool/tree/sergepetrenko/gh-3444-remove-old-shapshots-for-replicas
> issue [3444]: https://github.com/tarantool/tarantool/issues/3444

Links to the branch and the ticket should go after '---' (then they will
be ignored by git-am). Also, there's no need to prefix the links with
'branch:', and 'issue:'. Here's a couple of examples of a good formed
patch email:

  https://www.freelists.org/post/tarantool-patches/PATCH-socket-fix-race-between-unix-tcp-server-stop-and-start
  https://www.freelists.org/post/tarantool-patches/PATCH-v2-Allow-to-increase-boxcfgvinyl-memory-and-memtx-memory-at-runtime

Please fix when you submit a new version of the patch. And don't forget
to add a brief change log (should go after --- too).

> ---
>  src/box/box.cc               |  4 +--
>  src/box/gc.c                 | 69 +++++++++++++++++++++++++++++++++++---------
>  src/box/gc.h                 |  9 +++++-
>  src/box/relay.cc             |  2 +-
>  test/replication/gc.result   | 47 +++++++++++++++++++++++++-----
>  test/replication/gc.test.lua | 29 ++++++++++---------
>  6 files changed, 122 insertions(+), 38 deletions(-)

> diff --git a/src/box/gc.c b/src/box/gc.c
> index 12e68f3dc..288cc7236 100644
> --- a/src/box/gc.c
> +++ b/src/box/gc.c
> @@ -61,6 +61,8 @@ struct gc_consumer {
>  	char *name;
>  	/** The vclock signature tracked by this consumer. */
>  	int64_t signature;
> +	/** The flag indicating that consumer only consumes xlog files. */
> +	bool xlog_only;
>  };
>  
>  typedef rb_tree(struct gc_consumer) gc_tree_t;
> @@ -69,8 +71,10 @@ typedef rb_tree(struct gc_consumer) gc_tree_t;
>  struct gc_state {
>  	/** Number of checkpoints to maintain. */
>  	int checkpoint_count;
> -	/** Max signature garbage collection has been called for. */
> -	int64_t signature;
> +	/** Max signature WAL garbage collection has been called for. */
> +	int64_t xlog_signature;
> +	/** Max signature snapshot garbage collection has been called for. */
> +	int64_t snap_signature;

We call snapshots checkpoints nowadays, so as not to confuse a
checkpoint with a snap file (which is a memtx snapshot). Please
rename the variables appropriately.

>  	/** Registered consumers, linked by gc_consumer::node. */
>  	gc_tree_t consumers;
>  	/**
> @@ -104,7 +108,7 @@ rb_gen(MAYBE_UNUSED static inline, gc_tree_, gc_tree_t,
>  
>  /** Allocate a consumer object. */
>  static struct gc_consumer *
> -gc_consumer_new(const char *name, int64_t signature)
> +gc_consumer_new(const char *name, int64_t signature, bool xlog_only)
>  {
>  	struct gc_consumer *consumer = calloc(1, sizeof(*consumer));
>  	if (consumer == NULL) {
> @@ -120,6 +124,7 @@ gc_consumer_new(const char *name, int64_t signature)
>  		return NULL;
>  	}
>  	consumer->signature = signature;
> +	consumer->xlog_only = xlog_only;
>  	return consumer;
>  }
>  
> @@ -135,7 +140,8 @@ gc_consumer_delete(struct gc_consumer *consumer)
>  void
>  gc_init(void)
>  {
> -	gc.signature = -1;
> +	gc.xlog_signature = -1;
> +	gc.snap_signature = -1;
>  	gc_tree_new(&gc.consumers);
>  	latch_create(&gc.latch);
>  }
> @@ -155,21 +161,33 @@ gc_free(void)
>  	latch_destroy(&gc.latch);
>  }
>  
> +/** Find the consumer that uses the oldest snapshot */
> +struct gc_consumer *
> +gc_first_snap(gc_tree_t *consumers)

I'd call this function gc_tree_first_checkpoint, for the sake of
consistency with gc_tree_first.

> +{
> +	struct gc_consumer *consumer = gc_tree_first(consumers);
> +	while (consumer != NULL && consumer->xlog_only)
> +		consumer = gc_tree_next(consumers, consumer);
> +	return consumer;
> +}
> +
>  void
>  gc_run(void)
>  {
>  	int checkpoint_count = gc.checkpoint_count;
>  	assert(checkpoint_count > 0);
>  
> -	/* Look up the consumer that uses the oldest snapshot. */
> +	/* Look up the consumer that uses the oldest WAL */
>  	struct gc_consumer *leftmost = gc_tree_first(&gc.consumers);
> +	/* Look up the consumer that uses the oldest snapshot. */
> +	struct gc_consumer *leftmost_snap = gc_first_snap(&gc.consumers);
>  
>  	/*
>  	 * Find the oldest checkpoint that must be preserved.
>  	 * We have to maintain @checkpoint_count oldest snapshots,
>  	 * plus we can't remove snapshots that are still in use.
>  	 */
> -	int64_t gc_signature = -1;
> +	int64_t gc_xlog_signature = -1;
>  
>  	struct checkpoint_iterator checkpoints;
>  	checkpoint_iterator_init(&checkpoints);
> @@ -181,14 +199,33 @@ gc_run(void)
>  		if (leftmost != NULL &&
>  		    leftmost->signature < vclock_sum(vclock))
>  			continue;
> -		gc_signature = vclock_sum(vclock);
> +		gc_xlog_signature = vclock_sum(vclock);
>  		break;
>  	}
>  
> -	if (gc_signature <= gc.signature)
> +	int64_t gc_snap_signature = -1;
> +	checkpoint_count = gc.checkpoint_count;
> +
> +	checkpoint_iterator_init(&checkpoints);
> +
> +	while ((vclock = checkpoint_iterator_prev(&checkpoints)) != NULL) {
> +		if (--checkpoint_count > 0)
> +			continue;
> +		if (leftmost_snap != NULL &&
> +		    leftmost_snap->signature < vclock_sum(vclock))
> +			continue;
> +		gc_snap_signature = vclock_sum(vclock);
> +		break;
> +	}

I don't understand why you loop over checkpoints for the second time.
AFAIU what you need to do here is find the oldest checkpoint to save,
which is done right above, and then call wal_collect_garbage for

  gc_xlog_signature = MIN(gc_checkpoint_signature, leftmost->signature)

No?

> +
> +	if (gc_snap_signature <= gc.snap_signature &&
> +	    gc_xlog_signature <= gc.xlog_signature)
>  		return; /* nothing to do */

That is, gc_run() will call engine_collect_garbage() every time it needs
to delete a WAL, even if no checkpoints needs to be removed. I don't
think it's good. Please fix.

>  
> -	gc.signature = gc_signature;
> +	if (gc_snap_signature > gc.snap_signature)
> +		gc.snap_signature = gc_snap_signature;
> +	if (gc_xlog_signature > gc.xlog_signature)
> +		gc.xlog_signature = gc_xlog_signature;
>  
>  	/*
>  	 * Engine callbacks may sleep, because they use coio for
> @@ -204,8 +241,8 @@ gc_run(void)
>  	 * collection for memtx snapshots first and abort if it
>  	 * fails - see comment to memtx_engine_collect_garbage().
>  	 */
> -	if (engine_collect_garbage(gc_signature) == 0)
> -		wal_collect_garbage(gc_signature);
> +	if (engine_collect_garbage(gc_snap_signature) == 0)
> +		wal_collect_garbage(gc_xlog_signature);
>  
>  	latch_unlock(&gc.latch);
>  }
> @@ -217,9 +254,9 @@ gc_set_checkpoint_count(int checkpoint_count)
>  }
>  
>  struct gc_consumer *
> -gc_consumer_register(const char *name, int64_t signature)
> +gc_consumer_register(const char *name, int64_t signature, bool xlog_only)
>  {
> -	struct gc_consumer *consumer = gc_consumer_new(name, signature);
> +	struct gc_consumer *consumer = gc_consumer_new(name, signature, xlog_only);
>  	if (consumer != NULL)
>  		gc_tree_insert(&gc.consumers, consumer);
>  	return consumer;
> @@ -287,6 +324,12 @@ gc_consumer_signature(const struct gc_consumer *consumer)
>  	return consumer->signature;
>  }
>  
> +bool
> +gc_consumer_xlog_only(const struct gc_consumer *consumer)
> +{
> +	return consumer->xlog_only;
> +}
> +

This function is not used anywhere. Please remove (and don't forget to
remove its declaration in the header, too).



More information about the Tarantool-patches mailing list