[PATCH 11/13] gc: separate checkpoint references from wal consumers

Vladimir Davydov vdavydov.dev at gmail.com
Thu Oct 4 20:20:13 MSK 2018


Initially, gc_consumer object was used for pinning both checkpoint and
WAL files, but commit 9c5d851d7830 ("replication: remove old snapshot
files not needed by replicas") changed that. Now whether a consumer pins
WALs or checkpoints or both depends on gc_consumer_type. This was done
so that replicas wouldn't prevent garbage collection of checkpoint
files, which they don't need after initial join is complete.

The way the feature was implemented is rather questionable though:
 - Since consumers of both types are stored in the same binary search
   tree, we have to iterate through the tree to find the leftmost
   checkpoint consumer, see gc_tree_first_checkpoint. This looks
   inefficient and ugly.
 - The notion of advancing a checkpoint consumer (gc_consumer_advance)
   is dubious: there's no point to move on to the next checkpoint after
   reading one - instead the consumer needs incremental changes, i.e.
   WALs.

To eliminate those questionable aspects and make the code easier for
understanding, let's separate WAL and checkpoint consumers. We do this
by removing gc_consumer_type and making gc_consumer track WALs only.
For pinning the files corresponding to a checkpoint a new object class
is introduced, gc_checkpoint_ref. To pin a checkpoint, gc_ref_checkpoint
needs to be called. It is passed the gc_checkpoint object to pin, the
consumer name, and the gc_checkpoint_ref to store the ref in. To unpin a
previously pinned checkpoint, gc_checkpoint_unref should be called.

References are listed by box.info.gc() for each checkpoint under
'references' key.
---
 src/box/box.cc     | 30 ++++++++++++++----------
 src/box/gc.c       | 43 +++++++++++++++++-----------------
 src/box/gc.h       | 69 ++++++++++++++++++++++++++++++++++++++++--------------
 src/box/lua/info.c | 10 ++++++++
 src/box/relay.cc   |  5 ++--
 5 files changed, 102 insertions(+), 55 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index 2b2d2307..2679f6da 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -89,11 +89,17 @@ static void title(const char *new_status)
 bool box_checkpoint_is_in_progress = false;
 
 /**
- * If backup is in progress, this points to the gc consumer
+ * Set if backup is in progress, i.e. box_backup_start() was
+ * called but box_backup_stop() hasn't been yet.
+ */
+static bool backup_is_in_progress;
+
+/**
+ * If backup is in progress, this points to the gc reference
  * object that prevents the garbage collector from deleting
  * the checkpoint files that are currently being backed up.
  */
-static struct gc_consumer *backup_gc;
+static struct gc_checkpoint_ref backup_gc;
 
 /**
  * The instance is in read-write mode: the local checkpoint
@@ -1461,7 +1467,7 @@ box_process_join(struct ev_io *io, struct xrow_header *header)
 
 	/* Register the replica with the garbage collector. */
 	struct gc_consumer *gc = gc_consumer_register(&start_vclock,
-		GC_CONSUMER_WAL, "replica %s", tt_uuid_str(&instance_uuid));
+			"replica %s", tt_uuid_str(&instance_uuid));
 	if (gc == NULL)
 		diag_raise();
 	auto gc_guard = make_scoped_guard([=]{
@@ -2167,7 +2173,7 @@ int
 box_backup_start(int checkpoint_idx, box_backup_cb cb, void *cb_arg)
 {
 	assert(checkpoint_idx >= 0);
-	if (backup_gc != NULL) {
+	if (backup_is_in_progress) {
 		diag_set(ClientError, ER_BACKUP_IN_PROGRESS);
 		return -1;
 	}
@@ -2180,14 +2186,12 @@ box_backup_start(int checkpoint_idx, box_backup_cb cb, void *cb_arg)
 		diag_set(ClientError, ER_MISSING_SNAPSHOT);
 		return -1;
 	}
-	backup_gc = gc_consumer_register(&checkpoint->vclock,
-					 GC_CONSUMER_ALL, "backup");
-	if (backup_gc == NULL)
-		return -1;
+	backup_is_in_progress = true;
+	gc_ref_checkpoint(checkpoint, &backup_gc, "backup");
 	int rc = engine_backup(&checkpoint->vclock, cb, cb_arg);
 	if (rc != 0) {
-		gc_consumer_unregister(backup_gc);
-		backup_gc = NULL;
+		gc_unref_checkpoint(&backup_gc);
+		backup_is_in_progress = false;
 	}
 	return rc;
 }
@@ -2195,9 +2199,9 @@ box_backup_start(int checkpoint_idx, box_backup_cb cb, void *cb_arg)
 void
 box_backup_stop(void)
 {
-	if (backup_gc != NULL) {
-		gc_consumer_unregister(backup_gc);
-		backup_gc = NULL;
+	if (backup_is_in_progress) {
+		gc_unref_checkpoint(&backup_gc);
+		backup_is_in_progress = false;
 	}
 }
 
diff --git a/src/box/gc.c b/src/box/gc.c
index eb07a3b8..f8100e3f 100644
--- a/src/box/gc.c
+++ b/src/box/gc.c
@@ -106,16 +106,6 @@ gc_free(void)
 	}
 }
 
-/** Find the consumer that uses the oldest checkpoint. */
-struct gc_consumer *
-gc_tree_first_checkpoint(gc_tree_t *consumers)
-{
-	struct gc_consumer *consumer = gc_tree_first(consumers);
-	while (consumer != NULL && consumer->type == GC_CONSUMER_WAL)
-		consumer = gc_tree_next(consumers, consumer);
-	return consumer;
-}
-
 /**
  * Invoke garbage collection in order to remove files left
  * from old checkpoints. The number of checkpoints saved by
@@ -124,10 +114,6 @@ gc_tree_first_checkpoint(gc_tree_t *consumers)
 static void
 gc_run(void)
 {
-	/* Look up the consumer that uses the oldest checkpoint. */
-	struct gc_consumer *leftmost_checkpoint =
-		gc_tree_first_checkpoint(&gc.consumers);
-
 	bool run_wal_gc = false;
 	bool run_engine_gc = false;
 
@@ -143,9 +129,7 @@ gc_run(void)
 				struct gc_checkpoint, in_checkpoints);
 		if (gc.checkpoint_count <= gc.min_checkpoint_count)
 			break;
-		if (leftmost_checkpoint != NULL &&
-		    vclock_sum(&checkpoint->vclock) >=
-		    vclock_sum(&leftmost_checkpoint->vclock))
+		if (!rlist_empty(&checkpoint->refs))
 			break; /* checkpoint is in use */
 		rlist_del_entry(checkpoint, in_checkpoints);
 		free(checkpoint);
@@ -229,6 +213,7 @@ gc_add_checkpoint(const struct vclock *vclock)
 	if (checkpoint == NULL)
 		panic("out of memory");
 
+	rlist_create(&checkpoint->refs);
 	vclock_copy(&checkpoint->vclock, vclock);
 	rlist_add_tail_entry(&gc.checkpoints, checkpoint, in_checkpoints);
 	gc.checkpoint_count++;
@@ -236,9 +221,27 @@ gc_add_checkpoint(const struct vclock *vclock)
 	gc_run();
 }
 
+void
+gc_ref_checkpoint(struct gc_checkpoint *checkpoint,
+		  struct gc_checkpoint_ref *ref, const char *format, ...)
+{
+	va_list ap;
+	va_start(ap, format);
+	vsnprintf(ref->name, GC_NAME_MAX, format, ap);
+	va_end(ap);
+
+	rlist_add_tail_entry(&checkpoint->refs, ref, in_refs);
+}
+
+void
+gc_unref_checkpoint(struct gc_checkpoint_ref *ref)
+{
+	rlist_del_entry(ref, in_refs);
+	gc_run();
+}
+
 struct gc_consumer *
-gc_consumer_register(const struct vclock *vclock, enum gc_consumer_type type,
-		     const char *format, ...)
+gc_consumer_register(const struct vclock *vclock, const char *format, ...)
 {
 	struct gc_consumer *consumer = calloc(1, sizeof(*consumer));
 	if (consumer == NULL) {
@@ -253,8 +256,6 @@ gc_consumer_register(const struct vclock *vclock, enum gc_consumer_type type,
 	va_end(ap);
 
 	vclock_copy(&consumer->vclock, vclock);
-	consumer->type = type;
-
 	gc_tree_insert(&gc.consumers, consumer);
 	return consumer;
 }
diff --git a/src/box/gc.h b/src/box/gc.h
index f6f2279e..a5392cef 100644
--- a/src/box/gc.h
+++ b/src/box/gc.h
@@ -46,13 +46,6 @@ struct gc_consumer;
 
 enum { GC_NAME_MAX = 64 };
 
-/** Consumer type: WAL consumer, or SNAP */
-enum gc_consumer_type {
-	GC_CONSUMER_WAL = 1,
-	GC_CONSUMER_SNAP = 2,
-	GC_CONSUMER_ALL = 3,
-};
-
 typedef rb_node(struct gc_consumer) gc_node_t;
 
 /**
@@ -64,11 +57,30 @@ struct gc_checkpoint {
 	struct rlist in_checkpoints;
 	/** VClock of the checkpoint. */
 	struct vclock vclock;
+	/**
+	 * List of checkpoint references, linked by
+	 * gc_checkpoint_ref::in_refs.
+	 *
+	 * We use a list rather than a reference counter so
+	 * that we can list reference names in box.info.gc().
+	 */
+	struct rlist refs;
+};
+
+/**
+ * The following structure represents a checkpoint reference.
+ * See also gc_checkpoint::refs.
+ */
+struct gc_checkpoint_ref {
+	/** Link in gc_checkpoint::refs. */
+	struct rlist in_refs;
+	/** Human-readable name of this checkpoint reference. */
+	char name[GC_NAME_MAX];
 };
 
 /**
  * The object of this type is used to prevent garbage
- * collection from removing files that are still in use.
+ * collection from removing WALs that are still in use.
  */
 struct gc_consumer {
 	/** Link in gc_state::consumers. */
@@ -77,10 +89,6 @@ struct gc_consumer {
 	char name[GC_NAME_MAX];
 	/** The vclock tracked by this consumer. */
 	struct vclock vclock;
-	/** Consumer type, indicating that consumer only consumes
-	 * WAL files, or both - SNAP and WAL.
-	 */
-	enum gc_consumer_type type;
 };
 
 typedef rb_tree(struct gc_consumer) gc_tree_t;
@@ -131,6 +139,12 @@ extern struct gc_state gc;
 	rlist_foreach_entry_reverse(checkpoint, &gc.checkpoints, in_checkpoints)
 
 /**
+ * Iterate over all references to the given checkpoint.
+ */
+#define gc_foreach_checkpoint_ref(ref, checkpoint) \
+	rlist_foreach_entry(ref, &(checkpoint)->refs, in_refs)
+
+/**
  * Return the last (newest) checkpoint known to the garbage
  * collector. If there's no checkpoint, return NULL.
  */
@@ -176,22 +190,41 @@ void
 gc_add_checkpoint(const struct vclock *vclock);
 
 /**
+ * Get a reference to @checkpoint and store it in @ref.
+ * This will block the garbage collector from deleting
+ * the checkpoint files until the reference is released
+ * with gc_put_checkpoint_ref().
+ *
+ * @format... specifies a human-readable name that will be
+ * used for listing the reference in box.info.gc().
+ */
+CFORMAT(printf, 3, 4)
+void
+gc_ref_checkpoint(struct gc_checkpoint *checkpoint,
+		  struct gc_checkpoint_ref *ref, const char *format, ...);
+
+/**
+ * Release a reference to a checkpoint previously taken
+ * with gc_ref_checkpoint(). This function may trigger
+ * garbage collection.
+ */
+void
+gc_unref_checkpoint(struct gc_checkpoint_ref *ref);
+
+/**
  * Register a consumer.
  *
- * This will stop garbage collection of objects newer than
+ * This will stop garbage collection of WAL files newer than
  * @vclock until the consumer is unregistered or advanced.
  * @format... specifies a human-readable name of the consumer,
  * it will be used for listing the consumer in box.info.gc().
- * @type consumer type, reporting whether consumer only depends
- * on WAL files.
  *
  * Returns a pointer to the new consumer object or NULL on
  * memory allocation failure.
  */
-CFORMAT(printf, 3, 4)
+CFORMAT(printf, 2, 3)
 struct gc_consumer *
-gc_consumer_register(const struct vclock *vclock, enum gc_consumer_type type,
-		     const char *format, ...);
+gc_consumer_register(const struct vclock *vclock, const char *format, ...);
 
 /**
  * Unregister a consumer and invoke garbage collection
diff --git a/src/box/lua/info.c b/src/box/lua/info.c
index 9e51b823..655768ec 100644
--- a/src/box/lua/info.c
+++ b/src/box/lua/info.c
@@ -389,6 +389,16 @@ lbox_info_gc_call(struct lua_State *L)
 		luaL_pushint64(L, vclock_sum(&checkpoint->vclock));
 		lua_settable(L, -3);
 
+		lua_pushstring(L, "references");
+		lua_newtable(L);
+		int ref_idx = 0;
+		struct gc_checkpoint_ref *ref;
+		gc_foreach_checkpoint_ref(ref, checkpoint) {
+			lua_pushstring(L, ref->name);
+			lua_rawseti(L, -2, ++ref_idx);
+		}
+		lua_settable(L, -3);
+
 		lua_rawseti(L, -2, ++count);
 	}
 	lua_settable(L, -3);
diff --git a/src/box/relay.cc b/src/box/relay.cc
index 88430856..d5df487e 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -592,9 +592,8 @@ relay_subscribe(struct replica *replica, int fd, uint64_t sync,
 	 * join.
 	 */
 	if (replica->gc == NULL) {
-		replica->gc = gc_consumer_register(replica_clock,
-				GC_CONSUMER_WAL, "replica %s",
-				tt_uuid_str(&replica->uuid));
+		replica->gc = gc_consumer_register(replica_clock, "replica %s",
+						   tt_uuid_str(&replica->uuid));
 		if (replica->gc == NULL)
 			diag_raise();
 	}
-- 
2.11.0




More information about the Tarantool-patches mailing list