Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: tarantool-patches@freelists.org
Subject: [PATCH 07/13] memtx: use ref counting to pin indexes for snapshot
Date: Sat, 10 Aug 2019 13:03:34 +0300	[thread overview]
Message-ID: <e22878763f483a93811511f48724131e05740e2c.1565430177.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1565430177.git.vdavydov.dev@gmail.com>

Currently, to prevent an index from going away while it is being
written to a snapshot, we postpone memtx_gc_task's free() invocation
until checkpointing is complete, see commit 94de0a081b3a ("Don't take
schema lock for checkpointing"). This works fine, but makes it rather
difficult to reuse snapshot iterators for other purposes, e.g. feeding
a consistent read view to a newly joined replica.

Let's instead use index reference counting for pinning indexes for
checkpointing. A reference is taken in a snapshot iterator constructor
and released when the snapshot iterator is destroyed.
---
 src/box/memtx_engine.c | 26 +-------------------------
 src/box/memtx_engine.h |  5 -----
 src/box/memtx_hash.c   | 15 +++++++++------
 src/box/memtx_tree.c   | 16 +++++++++-------
 4 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 59ad1682..ec667e7a 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -651,19 +651,6 @@ memtx_engine_wait_checkpoint(struct engine *engine,
 	return result;
 }
 
-/**
- * Called after checkpointing is complete to free indexes dropped
- * while checkpointing was in progress, see memtx_engine_run_gc().
- */
-static void
-memtx_engine_gc_after_checkpoint(struct memtx_engine *memtx)
-{
-	struct memtx_gc_task *task, *next;
-	stailq_foreach_entry_safe(task, next, &memtx->gc_to_free, link)
-		task->vtab->free(task);
-	stailq_create(&memtx->gc_to_free);
-}
-
 static void
 memtx_engine_commit_checkpoint(struct engine *engine,
 			       const struct vclock *vclock)
@@ -701,8 +688,6 @@ memtx_engine_commit_checkpoint(struct engine *engine,
 
 	checkpoint_delete(memtx->checkpoint);
 	memtx->checkpoint = NULL;
-
-	memtx_engine_gc_after_checkpoint(memtx);
 }
 
 static void
@@ -887,15 +872,7 @@ memtx_engine_run_gc(struct memtx_engine *memtx, bool *stop)
 	task->vtab->run(task, &task_done);
 	if (task_done) {
 		stailq_shift(&memtx->gc_queue);
-		/*
-		 * If checkpointing is in progress, the index may be
-		 * used by the checkpoint thread so we postpone freeing
-		 * until checkpointing is complete.
-		 */
-		if (memtx->checkpoint == NULL)
-			task->vtab->free(task);
-		else
-			stailq_add_entry(&memtx->gc_to_free, task, link);
+		task->vtab->free(task);
 	}
 }
 
@@ -967,7 +944,6 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery,
 	}
 
 	stailq_create(&memtx->gc_queue);
-	stailq_create(&memtx->gc_to_free);
 	memtx->gc_fiber = fiber_new("memtx.gc", memtx_engine_gc_f);
 	if (memtx->gc_fiber == NULL)
 		goto fail;
diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h
index fcf595e7..ccb51678 100644
--- a/src/box/memtx_engine.h
+++ b/src/box/memtx_engine.h
@@ -155,11 +155,6 @@ struct memtx_engine {
 	 * memtx_gc_task::link.
 	 */
 	struct stailq gc_queue;
-	/**
-	 * List of tasks awaiting to be freed once checkpointing
-	 * is complete, linked by memtx_gc_task::link.
-	 */
-	struct stailq gc_to_free;
 };
 
 struct memtx_gc_task;
diff --git a/src/box/memtx_hash.c b/src/box/memtx_hash.c
index e30170d1..2762d973 100644
--- a/src/box/memtx_hash.c
+++ b/src/box/memtx_hash.c
@@ -399,7 +399,7 @@ memtx_hash_index_create_iterator(struct index *base, enum iterator_type type,
 
 struct hash_snapshot_iterator {
 	struct snapshot_iterator base;
-	struct light_index_core *hash_table;
+	struct memtx_hash_index *index;
 	struct light_index_iterator iterator;
 };
 
@@ -414,7 +414,8 @@ hash_snapshot_iterator_free(struct snapshot_iterator *iterator)
 	assert(iterator->free == hash_snapshot_iterator_free);
 	struct hash_snapshot_iterator *it =
 		(struct hash_snapshot_iterator *) iterator;
-	light_index_iterator_destroy(it->hash_table, &it->iterator);
+	light_index_iterator_destroy(&it->index->hash_table, &it->iterator);
+	index_unref(&it->index->base);
 	free(iterator);
 }
 
@@ -429,7 +430,8 @@ hash_snapshot_iterator_next(struct snapshot_iterator *iterator, uint32_t *size)
 	assert(iterator->free == hash_snapshot_iterator_free);
 	struct hash_snapshot_iterator *it =
 		(struct hash_snapshot_iterator *) iterator;
-	struct tuple **res = light_index_iterator_get_and_next(it->hash_table,
+	struct light_index_core *hash_table = &it->index->hash_table;
+	struct tuple **res = light_index_iterator_get_and_next(hash_table,
 							       &it->iterator);
 	if (res == NULL)
 		return NULL;
@@ -455,9 +457,10 @@ memtx_hash_index_create_snapshot_iterator(struct index *base)
 
 	it->base.next = hash_snapshot_iterator_next;
 	it->base.free = hash_snapshot_iterator_free;
-	it->hash_table = &index->hash_table;
-	light_index_iterator_begin(it->hash_table, &it->iterator);
-	light_index_iterator_freeze(it->hash_table, &it->iterator);
+	it->index = index;
+	index_ref(base);
+	light_index_iterator_begin(&index->hash_table, &it->iterator);
+	light_index_iterator_freeze(&index->hash_table, &it->iterator);
 	return (struct snapshot_iterator *) it;
 }
 
diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index aeba2ba3..77223a6d 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -1205,7 +1205,7 @@ memtx_tree_index_end_build(struct index *base)
 
 struct tree_snapshot_iterator {
 	struct snapshot_iterator base;
-	struct memtx_tree *tree;
+	struct memtx_tree_index *index;
 	struct memtx_tree_iterator tree_iterator;
 };
 
@@ -1215,8 +1215,8 @@ tree_snapshot_iterator_free(struct snapshot_iterator *iterator)
 	assert(iterator->free == tree_snapshot_iterator_free);
 	struct tree_snapshot_iterator *it =
 		(struct tree_snapshot_iterator *)iterator;
-	struct memtx_tree *tree = (struct memtx_tree *)it->tree;
-	memtx_tree_iterator_destroy(tree, &it->tree_iterator);
+	memtx_tree_iterator_destroy(&it->index->tree, &it->tree_iterator);
+	index_unref(&it->index->base);
 	free(iterator);
 }
 
@@ -1226,11 +1226,12 @@ tree_snapshot_iterator_next(struct snapshot_iterator *iterator, uint32_t *size)
 	assert(iterator->free == tree_snapshot_iterator_free);
 	struct tree_snapshot_iterator *it =
 		(struct tree_snapshot_iterator *)iterator;
-	struct memtx_tree_data *res =
-		memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
+	struct memtx_tree *tree = &it->index->tree;
+	struct memtx_tree_data *res = memtx_tree_iterator_get_elem(tree,
+							&it->tree_iterator);
 	if (res == NULL)
 		return NULL;
-	memtx_tree_iterator_next(it->tree, &it->tree_iterator);
+	memtx_tree_iterator_next(tree, &it->tree_iterator);
 	return tuple_data_range(res->tuple, size);
 }
 
@@ -1253,7 +1254,8 @@ memtx_tree_index_create_snapshot_iterator(struct index *base)
 
 	it->base.free = tree_snapshot_iterator_free;
 	it->base.next = tree_snapshot_iterator_next;
-	it->tree = &index->tree;
+	it->index = index;
+	index_ref(base);
 	it->tree_iterator = memtx_tree_iterator_first(&index->tree);
 	memtx_tree_iterator_freeze(&index->tree, &it->tree_iterator);
 	return (struct snapshot_iterator *) it;
-- 
2.20.1

  parent reply	other threads:[~2019-08-10 10:03 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-10 10:03 [PATCH 00/13] Join replicas off the current read view Vladimir Davydov
2019-08-10 10:03 ` [PATCH 01/13] vinyl: embed engine in vy_env Vladimir Davydov
2019-08-12 22:14   ` [tarantool-patches] " Konstantin Osipov
2019-08-14 13:09   ` Vladimir Davydov
2019-08-10 10:03 ` [PATCH 02/13] vinyl: embed index in vy_lsm Vladimir Davydov
2019-08-12 22:14   ` [tarantool-patches] " Konstantin Osipov
2019-08-14 13:09   ` Vladimir Davydov
2019-08-10 10:03 ` [PATCH 03/13] vinyl: move reference counting from vy_lsm to index Vladimir Davydov
2019-08-12 22:16   ` [tarantool-patches] " Konstantin Osipov
2019-08-14 13:09   ` Vladimir Davydov
2019-08-10 10:03 ` [PATCH 04/13] vinyl: don't pin index for iterator lifetime Vladimir Davydov
2019-08-10 10:03 ` [PATCH 05/13] vinyl: don't exempt dropped indexes from dump and compaction Vladimir Davydov
2019-08-10 10:03 ` [PATCH 06/13] memtx: don't store pointers to index internals in iterator Vladimir Davydov
2019-08-12 22:21   ` [tarantool-patches] " Konstantin Osipov
2019-08-14 13:10   ` Vladimir Davydov
2019-08-10 10:03 ` Vladimir Davydov [this message]
2019-08-12 22:24   ` [tarantool-patches] Re: [PATCH 07/13] memtx: use ref counting to pin indexes for snapshot Konstantin Osipov
2019-08-13 10:56     ` Vladimir Davydov
2019-08-13 16:08       ` Georgy Kirichenko
2019-08-10 10:03 ` [PATCH 08/13] memtx: allow snapshot iterator to fail Vladimir Davydov
2019-08-12 22:25   ` [tarantool-patches] " Konstantin Osipov
2019-08-14 13:10   ` Vladimir Davydov
2019-08-10 10:03 ` [PATCH 09/13] memtx: enter small delayed free mode from snapshot iterator Vladimir Davydov
2019-08-12 22:27   ` [tarantool-patches] " Konstantin Osipov
2019-08-13 10:59     ` Vladimir Davydov
2019-08-10 10:03 ` [PATCH 10/13] wal: make wal_sync fail on write error Vladimir Davydov
2019-08-12 22:29   ` [tarantool-patches] " Konstantin Osipov
2019-08-14 16:48   ` Vladimir Davydov
2019-08-10 10:03 ` [PATCH 11/13] xrow: factor out helper for setting REPLACE request body Vladimir Davydov
2019-08-12 22:29   ` [tarantool-patches] " Konstantin Osipov
2019-08-14 13:11   ` Vladimir Davydov
2019-08-10 10:03 ` [PATCH 12/13] test: disable replication/on_schema_init Vladimir Davydov
2019-08-12 22:31   ` [tarantool-patches] " Konstantin Osipov
2019-08-10 10:03 ` [PATCH 13/13] relay: join new replicas off read view Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e22878763f483a93811511f48724131e05740e2c.1565430177.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 07/13] memtx: use ref counting to pin indexes for snapshot' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox