Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 11/13] gc: separate checkpoint references from wal consumers
Date: Thu,  4 Oct 2018 20:20:13 +0300	[thread overview]
Message-ID: <df173df606d0b98534117bd2ee6d2f7b14a40f7d.1538671546.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1538671546.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1538671546.git.vdavydov.dev@gmail.com>

Initially, gc_consumer object was used for pinning both checkpoint and
WAL files, but commit 9c5d851d7830 ("replication: remove old snapshot
files not needed by replicas") changed that. Now whether a consumer pins
WALs or checkpoints or both depends on gc_consumer_type. This was done
so that replicas wouldn't prevent garbage collection of checkpoint
files, which they don't need after initial join is complete.

The way the feature was implemented is rather questionable though:
 - Since consumers of both types are stored in the same binary search
   tree, we have to iterate through the tree to find the leftmost
   checkpoint consumer, see gc_tree_first_checkpoint. This looks
   inefficient and ugly.
 - The notion of advancing a checkpoint consumer (gc_consumer_advance)
   is dubious: there's no point to move on to the next checkpoint after
   reading one - instead the consumer needs incremental changes, i.e.
   WALs.

To eliminate those questionable aspects and make the code easier for
understanding, let's separate WAL and checkpoint consumers. We do this
by removing gc_consumer_type and making gc_consumer track WALs only.
For pinning the files corresponding to a checkpoint a new object class
is introduced, gc_checkpoint_ref. To pin a checkpoint, gc_ref_checkpoint
needs to be called. It is passed the gc_checkpoint object to pin, the
consumer name, and the gc_checkpoint_ref to store the ref in. To unpin a
previously pinned checkpoint, gc_checkpoint_unref should be called.

References are listed by box.info.gc() for each checkpoint under
'references' key.
---
 src/box/box.cc     | 30 ++++++++++++++----------
 src/box/gc.c       | 43 +++++++++++++++++-----------------
 src/box/gc.h       | 69 ++++++++++++++++++++++++++++++++++++++++--------------
 src/box/lua/info.c | 10 ++++++++
 src/box/relay.cc   |  5 ++--
 5 files changed, 102 insertions(+), 55 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index 2b2d2307..2679f6da 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -89,11 +89,17 @@ static void title(const char *new_status)
 bool box_checkpoint_is_in_progress = false;
 
 /**
- * If backup is in progress, this points to the gc consumer
+ * Set if backup is in progress, i.e. box_backup_start() was
+ * called but box_backup_stop() hasn't been yet.
+ */
+static bool backup_is_in_progress;
+
+/**
+ * If backup is in progress, this points to the gc reference
  * object that prevents the garbage collector from deleting
  * the checkpoint files that are currently being backed up.
  */
-static struct gc_consumer *backup_gc;
+static struct gc_checkpoint_ref backup_gc;
 
 /**
  * The instance is in read-write mode: the local checkpoint
@@ -1461,7 +1467,7 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
 
 	/* Register the replica with the garbage collector. */
 	struct gc_consumer *gc = gc_consumer_register(&start_vclock,
-		GC_CONSUMER_WAL, "replica %s", tt_uuid_str(&instance_uuid));
+			"replica %s", tt_uuid_str(&instance_uuid));
 	if (gc == NULL)
 		diag_raise();
 	auto gc_guard = make_scoped_guard([=]{
@@ -2167,7 +2173,7 @@ int
 box_backup_start(int checkpoint_idx, box_backup_cb cb, void *cb_arg)
 {
 	assert(checkpoint_idx >= 0);
-	if (backup_gc != NULL) {
+	if (backup_is_in_progress) {
 		diag_set(ClientError, ER_BACKUP_IN_PROGRESS);
 		return -1;
 	}
@@ -2180,14 +2186,12 @@ box_backup_start(int checkpoint_idx, box_backup_cb cb, void *cb_arg)
 		diag_set(ClientError, ER_MISSING_SNAPSHOT);
 		return -1;
 	}
-	backup_gc = gc_consumer_register(&checkpoint->vclock,
-					 GC_CONSUMER_ALL, "backup");
-	if (backup_gc == NULL)
-		return -1;
+	backup_is_in_progress = true;
+	gc_ref_checkpoint(checkpoint, &backup_gc, "backup");
 	int rc = engine_backup(&checkpoint->vclock, cb, cb_arg);
 	if (rc != 0) {
-		gc_consumer_unregister(backup_gc);
-		backup_gc = NULL;
+		gc_unref_checkpoint(&backup_gc);
+		backup_is_in_progress = false;
 	}
 	return rc;
 }
@@ -2195,9 +2199,9 @@ box_backup_start(int checkpoint_idx, box_backup_cb cb, void *cb_arg)
 void
 box_backup_stop(void)
 {
-	if (backup_gc != NULL) {
-		gc_consumer_unregister(backup_gc);
-		backup_gc = NULL;
+	if (backup_is_in_progress) {
+		gc_unref_checkpoint(&backup_gc);
+		backup_is_in_progress = false;
 	}
 }
 
diff --git a/src/box/gc.c b/src/box/gc.c
index eb07a3b8..f8100e3f 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -106,16 +106,6 @@ gc_free(void)
 	}
 }
 
-/** Find the consumer that uses the oldest checkpoint. */
-struct gc_consumer *
-gc_tree_first_checkpoint(gc_tree_t *consumers)
-{
-	struct gc_consumer *consumer = gc_tree_first(consumers);
-	while (consumer != NULL && consumer->type == GC_CONSUMER_WAL)
-		consumer = gc_tree_next(consumers, consumer);
-	return consumer;
-}
-
 /**
  * Invoke garbage collection in order to remove files left
  * from old checkpoints. The number of checkpoints saved by
@@ -124,10 +114,6 @@ gc_tree_first_checkpoint(gc_tree_t *consumers)
 static void
 gc_run(void)
 {
-	/* Look up the consumer that uses the oldest checkpoint. */
-	struct gc_consumer *leftmost_checkpoint =
-		gc_tree_first_checkpoint(&gc.consumers);
-
 	bool run_wal_gc = false;
 	bool run_engine_gc = false;
 
@@ -143,9 +129,7 @@ gc_run(void)
 				struct gc_checkpoint, in_checkpoints);
 		if (gc.checkpoint_count <= gc.min_checkpoint_count)
 			break;
-		if (leftmost_checkpoint != NULL &&
-		    vclock_sum(&checkpoint->vclock) >=
-		    vclock_sum(&leftmost_checkpoint->vclock))
+		if (!rlist_empty(&checkpoint->refs))
 			break; /* checkpoint is in use */
 		rlist_del_entry(checkpoint, in_checkpoints);
 		free(checkpoint);
@@ -229,6 +213,7 @@ gc_add_checkpoint(const struct vclock *vclock)
 	if (checkpoint == NULL)
 		panic("out of memory");
 
+	rlist_create(&checkpoint->refs);
 	vclock_copy(&checkpoint->vclock, vclock);
 	rlist_add_tail_entry(&gc.checkpoints, checkpoint, in_checkpoints);
 	gc.checkpoint_count++;
@@ -236,9 +221,27 @@ gc_add_checkpoint(const struct vclock *vclock)
 	gc_run();
 }
 
+void
+gc_ref_checkpoint(struct gc_checkpoint *checkpoint,
+		  struct gc_checkpoint_ref *ref, const char *format, ...)
+{
+	va_list ap;
+	va_start(ap, format);
+	vsnprintf(ref->name, GC_NAME_MAX, format, ap);
+	va_end(ap);
+
+	rlist_add_tail_entry(&checkpoint->refs, ref, in_refs);
+}
+
+void
+gc_unref_checkpoint(struct gc_checkpoint_ref *ref)
+{
+	rlist_del_entry(ref, in_refs);
+	gc_run();
+}
+
 struct gc_consumer *
-gc_consumer_register(const struct vclock *vclock, enum gc_consumer_type type,
-		     const char *format, ...)
+gc_consumer_register(const struct vclock *vclock, const char *format, ...)
 {
 	struct gc_consumer *consumer = calloc(1, sizeof(*consumer));
 	if (consumer == NULL) {
@@ -253,8 +256,6 @@ gc_consumer_register(const struct vclock *vclock, enum gc_consumer_type type,
 	va_end(ap);
 
 	vclock_copy(&consumer->vclock, vclock);
-	consumer->type = type;
-
 	gc_tree_insert(&gc.consumers, consumer);
 	return consumer;
 }
diff --git a/src/box/gc.h b/src/box/gc.h
index f6f2279e..a5392cef 100644
--- a/src/box/gc.h
+++ b/src/box/gc.h
@@ -46,13 +46,6 @@ struct gc_consumer;
 
 enum { GC_NAME_MAX = 64 };
 
-/** Consumer type: WAL consumer, or SNAP */
-enum gc_consumer_type {
-	GC_CONSUMER_WAL = 1,
-	GC_CONSUMER_SNAP = 2,
-	GC_CONSUMER_ALL = 3,
-};
-
 typedef rb_node(struct gc_consumer) gc_node_t;
 
 /**
@@ -64,11 +57,30 @@ struct gc_checkpoint {
 	struct rlist in_checkpoints;
 	/** VClock of the checkpoint. */
 	struct vclock vclock;
+	/**
+	 * List of checkpoint references, linked by
+	 * gc_checkpoint_ref::in_refs.
+	 *
+	 * We use a list rather than a reference counter so
+	 * that we can list reference names in box.info.gc().
+	 */
+	struct rlist refs;
+};
+
+/**
+ * The following structure represents a checkpoint reference.
+ * See also gc_checkpoint::refs.
+ */
+struct gc_checkpoint_ref {
+	/** Link in gc_checkpoint::refs. */
+	struct rlist in_refs;
+	/** Human-readable name of this checkpoint reference. */
+	char name[GC_NAME_MAX];
 };
 
 /**
  * The object of this type is used to prevent garbage
- * collection from removing files that are still in use.
+ * collection from removing WALs that are still in use.
  */
 struct gc_consumer {
 	/** Link in gc_state::consumers. */
@@ -77,10 +89,6 @@ struct gc_consumer {
 	char name[GC_NAME_MAX];
 	/** The vclock tracked by this consumer. */
 	struct vclock vclock;
-	/** Consumer type, indicating that consumer only consumes
-	 * WAL files, or both - SNAP and WAL.
-	 */
-	enum gc_consumer_type type;
 };
 
 typedef rb_tree(struct gc_consumer) gc_tree_t;
@@ -131,6 +139,12 @@ extern struct gc_state gc;
 	rlist_foreach_entry_reverse(checkpoint, &gc.checkpoints, in_checkpoints)
 
 /**
+ * Iterate over all references to the given checkpoint.
+ */
+#define gc_foreach_checkpoint_ref(ref, checkpoint) \
+	rlist_foreach_entry(ref, &(checkpoint)->refs, in_refs)
+
+/**
  * Return the last (newest) checkpoint known to the garbage
  * collector. If there's no checkpoint, return NULL.
  */
@@ -176,22 +190,41 @@ void
 gc_add_checkpoint(const struct vclock *vclock);
 
 /**
+ * Get a reference to @checkpoint and store it in @ref.
+ * This will block the garbage collector from deleting
+ * the checkpoint files until the reference is released
+ * with gc_put_checkpoint_ref().
+ *
+ * @format... specifies a human-readable name that will be
+ * used for listing the reference in box.info.gc().
+ */
+CFORMAT(printf, 3, 4)
+void
+gc_ref_checkpoint(struct gc_checkpoint *checkpoint,
+		  struct gc_checkpoint_ref *ref, const char *format, ...);
+
+/**
+ * Release a reference to a checkpoint previously taken
+ * with gc_ref_checkpoint(). This function may trigger
+ * garbage collection.
+ */
+void
+gc_unref_checkpoint(struct gc_checkpoint_ref *ref);
+
+/**
  * Register a consumer.
  *
- * This will stop garbage collection of objects newer than
+ * This will stop garbage collection of WAL files newer than
  * @vclock until the consumer is unregistered or advanced.
  * @format... specifies a human-readable name of the consumer,
  * it will be used for listing the consumer in box.info.gc().
- * @type consumer type, reporting whether consumer only depends
- * on WAL files.
  *
  * Returns a pointer to the new consumer object or NULL on
  * memory allocation failure.
  */
-CFORMAT(printf, 3, 4)
+CFORMAT(printf, 2, 3)
 struct gc_consumer *
-gc_consumer_register(const struct vclock *vclock, enum gc_consumer_type type,
-		     const char *format, ...);
+gc_consumer_register(const struct vclock *vclock, const char *format, ...);
 
 /**
  * Unregister a consumer and invoke garbage collection
diff --git a/src/box/lua/info.c b/src/box/lua/info.c
index 9e51b823..655768ec 100644
--- a/src/box/lua/info.c
+++ b/src/box/lua/info.c
@@ -389,6 +389,16 @@ lbox_info_gc_call(struct lua_State *L)
 		luaL_pushint64(L, vclock_sum(&checkpoint->vclock));
 		lua_settable(L, -3);
 
+		lua_pushstring(L, "references");
+		lua_newtable(L);
+		int ref_idx = 0;
+		struct gc_checkpoint_ref *ref;
+		gc_foreach_checkpoint_ref(ref, checkpoint) {
+			lua_pushstring(L, ref->name);
+			lua_rawseti(L, -2, ++ref_idx);
+		}
+		lua_settable(L, -3);
+
 		lua_rawseti(L, -2, ++count);
 	}
 	lua_settable(L, -3);
diff --git a/src/box/relay.cc b/src/box/relay.cc
index 88430856..d5df487e 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -592,9 +592,8 @@ relay_subscribe(struct replica *replica, int fd, uint64_t sync,
 	 * join.
 	 */
 	if (replica->gc == NULL) {
-		replica->gc = gc_consumer_register(replica_clock,
-				GC_CONSUMER_WAL, "replica %s",
-				tt_uuid_str(&replica->uuid));
+		replica->gc = gc_consumer_register(replica_clock, "replica %s",
+						   tt_uuid_str(&replica->uuid));
 		if (replica->gc == NULL)
 			diag_raise();
 	}
-- 
2.11.0

  parent reply	other threads:[~2018-10-04 17:20 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-04 17:20 [PATCH 00/13] box: garbage collection refactoring and fixes Vladimir Davydov
2018-10-04 17:20 ` [PATCH 01/13] vinyl: fix master crash on replica join failure Vladimir Davydov
2018-10-04 21:43   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 02/13] vinyl: force deletion of runs left from unfinished indexes on restart Vladimir Davydov
2018-10-04 21:44   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 03/13] gc: make gc_consumer and gc_state structs transparent Vladimir Davydov
2018-10-04 21:47   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 04/13] gc: use fixed length buffer for storing consumer name Vladimir Davydov
2018-10-04 21:47   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 05/13] gc: fold gc_consumer_new and gc_consumer_delete Vladimir Davydov
2018-10-04 21:50   ` Konstantin Osipov
2018-10-05  8:56     ` Vladimir Davydov
2018-10-04 17:20 ` [PATCH 06/13] gc: format consumer name in gc_consumer_register Vladimir Davydov
2018-10-04 21:50   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 07/13] gc: rename checkpoint_count to min_checkpoint_count Vladimir Davydov
2018-10-04 21:51   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 08/13] gc: keep track of available checkpoints Vladimir Davydov
2018-10-04 21:59   ` Konstantin Osipov
2018-10-05  8:50     ` Vladimir Davydov
2018-10-04 17:20 ` [PATCH 09/13] gc: cleanup garbage collection procedure Vladimir Davydov
2018-10-04 22:00   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 10/13] gc: improve box.info.gc output Vladimir Davydov
2018-10-04 22:01   ` Konstantin Osipov
2018-10-04 17:20 ` Vladimir Davydov [this message]
2018-10-04 22:05   ` [PATCH 11/13] gc: separate checkpoint references from wal consumers Konstantin Osipov
2018-10-04 17:20 ` [PATCH 12/13] gc: call gc_run unconditionally when consumer is advanced Vladimir Davydov
2018-10-04 22:26   ` Konstantin Osipov
2018-10-04 17:20 ` [PATCH 13/13] replication: ref checkpoint needed to join replica Vladimir Davydov
2018-10-04 22:27   ` Konstantin Osipov
2018-10-05 17:03 ` [PATCH 00/13] box: garbage collection refactoring and fixes Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=df173df606d0b98534117bd2ee6d2f7b14a40f7d.1538671546.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 11/13] gc: separate checkpoint references from wal consumers' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox