Tarantool development patches archive
 help / color / mirror / Atom feed
* [PATCH] memtx: free tuples asynchronously when primary index is dropped
@ 2018-05-20 15:06 Vladimir Davydov
  2018-05-20 21:31 ` Konstantin Osipov
  0 siblings, 1 reply; 3+ messages in thread
From: Vladimir Davydov @ 2018-05-20 15:06 UTC (permalink / raw)
  To: kostja; +Cc: tarantool-patches

When a memtx space is dropped or truncated, we have to unreference all
tuples stored in it. Currently, we do it synchronously, thus blocking
the tx thread. If a space is big, tx thread may remain blocked for
several seconds, which is unacceptable. This patch makes drop/truncate
hand actual work to a background fiber.

Before this patch, drop of a space with 10M 64-byte records took more
than 0.5 seconds. After this patch, it takes less than 1 millisecond.

Closes #3408
---
https://github.com/tarantool/tarantool/issues/3408
https://github.com/tarantool/tarantool/commits/gh-3408-memtx-async-index-destruction

 src/box/memtx_bitset.c |  5 +--
 src/box/memtx_engine.c | 91 ++++++++++++++++++++++----------------------------
 src/box/memtx_engine.h | 40 ++++++++++++++++------
 src/box/memtx_hash.c   | 53 ++++++++++++++++++++++++++---
 src/box/memtx_hash.h   |  2 ++
 src/box/memtx_rtree.c  |  5 +--
 src/box/memtx_tree.c   | 53 ++++++++++++++++++++++++++---
 src/box/memtx_tree.h   |  1 +
 8 files changed, 176 insertions(+), 74 deletions(-)

diff --git a/src/box/memtx_bitset.c b/src/box/memtx_bitset.c
index 7b87560b..2322af37 100644
--- a/src/box/memtx_bitset.c
+++ b/src/box/memtx_bitset.c
@@ -457,9 +457,9 @@ memtx_bitset_index_count(struct index *base, enum iterator_type type,
 static const struct index_vtab memtx_bitset_index_vtab = {
 	/* .destroy = */ memtx_bitset_index_destroy,
 	/* .commit_create = */ generic_index_commit_create,
-	/* .abort_create = */ memtx_index_abort_create,
+	/* .abort_create = */ generic_index_abort_create,
 	/* .commit_modify = */ generic_index_commit_modify,
-	/* .commit_drop = */ memtx_index_commit_drop,
+	/* .commit_drop = */ generic_index_commit_drop,
 	/* .update_def = */ generic_index_update_def,
 	/* .depends_on_pk = */ generic_index_depends_on_pk,
 	/* .def_change_requires_rebuild = */
@@ -486,6 +486,7 @@ static const struct index_vtab memtx_bitset_index_vtab = {
 struct memtx_bitset_index *
 memtx_bitset_index_new(struct memtx_engine *memtx, struct index_def *def)
 {
+	assert(def->iid > 0);
 	assert(!def->opts.is_unique);
 
 	memtx_index_arena_init();
diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 388cc4fe..ee3afaec 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -35,6 +35,7 @@
 #include <small/small.h>
 #include <small/mempool.h>
 
+#include "fiber.h"
 #include "coio_file.h"
 #include "tuple.h"
 #include "txn.h"
@@ -928,6 +929,23 @@ static const struct engine_vtab memtx_engine_vtab = {
 	/* .check_space_def = */ memtx_engine_check_space_def,
 };
 
+static int
+memtx_engine_gc_f(va_list va)
+{
+	struct memtx_engine *memtx = va_arg(va, struct memtx_engine *);
+	while (!fiber_is_cancelled()) {
+		if (stailq_empty(&memtx->gc_queue)) {
+			fiber_yield_timeout(TIMEOUT_INFINITY);
+			continue;
+		}
+		struct memtx_gc_task *task;
+		task = stailq_shift_entry(&memtx->gc_queue,
+					  struct memtx_gc_task, link);
+		task->func(task);
+	}
+	return 0;
+}
+
 struct memtx_engine *
 memtx_engine_new(const char *snap_dirname, bool force_recovery,
 		 uint64_t tuple_arena_max_size, uint32_t objsize_min,
@@ -945,18 +963,34 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery,
 	xdir_create(&memtx->snap_dir, snap_dirname, SNAP, &INSTANCE_UUID);
 	memtx->snap_dir.force_recovery = force_recovery;
 
-	if (xdir_scan(&memtx->snap_dir) != 0) {
-		xdir_destroy(&memtx->snap_dir);
-		free(memtx);
-		return NULL;
-	}
+	if (xdir_scan(&memtx->snap_dir) != 0)
+		goto fail;
+
+	stailq_create(&memtx->gc_queue);
+	memtx->gc_fiber = fiber_new("memtx.gc", memtx_engine_gc_f);
+	if (memtx->gc_fiber == NULL)
+		goto fail;
 
 	memtx->state = MEMTX_INITIALIZED;
 	memtx->force_recovery = force_recovery;
 
 	memtx->base.vtab = &memtx_engine_vtab;
 	memtx->base.name = "memtx";
+
+	fiber_start(memtx->gc_fiber, memtx);
 	return memtx;
+fail:
+	xdir_destroy(&memtx->snap_dir);
+	free(memtx);
+	return NULL;
+}
+
+void
+memtx_engine_schedule_gc(struct memtx_engine *memtx,
+			 struct memtx_gc_task *task)
+{
+	stailq_add_tail_entry(&memtx->gc_queue, task, link);
+	fiber_wakeup(memtx->gc_fiber);
 }
 
 void
@@ -1063,53 +1097,6 @@ memtx_index_extent_reserve(int num)
 	return 0;
 }
 
-static void
-memtx_index_prune(struct index *index)
-{
-	if (index->def->iid > 0)
-		return;
-
-	/*
-	 * Tuples stored in a memtx space are referenced by the
-	 * primary index so when the primary index is dropped we
-	 * should delete them.
-	 */
-	struct iterator *it = index_create_iterator(index, ITER_ALL, NULL, 0);
-	if (it == NULL)
-		goto fail;
-	int rc;
-	struct tuple *tuple;
-	while ((rc = iterator_next(it, &tuple)) == 0 && tuple != NULL)
-		tuple_unref(tuple);
-	iterator_delete(it);
-	if (rc != 0)
-		goto fail;
-
-	return;
-fail:
-	/*
-	 * This function is called after WAL write so we have no
-	 * other choice but panic in case of any error. The good
-	 * news is memtx iterators do not fail so we should not
-	 * normally get here.
-	 */
-	diag_log();
-	unreachable();
-	panic("failed to drop index");
-}
-
-void
-memtx_index_abort_create(struct index *index)
-{
-	memtx_index_prune(index);
-}
-
-void
-memtx_index_commit_drop(struct index *index)
-{
-	memtx_index_prune(index);
-}
-
 bool
 memtx_index_def_change_requires_rebuild(struct index *index,
 					const struct index_def *new_def)
diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h
index c1f2652c..2ce597b4 100644
--- a/src/box/memtx_engine.h
+++ b/src/box/memtx_engine.h
@@ -37,12 +37,14 @@
 
 #include "engine.h"
 #include "xlog.h"
+#include "salad/stailq.h"
 
 #if defined(__cplusplus)
 extern "C" {
 #endif /* defined(__cplusplus) */
 
 struct index;
+struct fiber;
 
 /**
  * The state of memtx recovery process.
@@ -101,8 +103,36 @@ struct memtx_engine {
 	struct mempool hash_iterator_pool;
 	/** Memory pool for bitset index iterator. */
 	struct mempool bitset_iterator_pool;
+	/**
+	 * Garbage collection fiber. Used for asynchronous
+	 * destruction of dropped indexes.
+	 */
+	struct fiber *gc_fiber;
+	/**
+	 * Scheduled garbage collection tasks, linked by
+	 * memtx_gc_task::link.
+	 */
+	struct stailq gc_queue;
 };
 
+struct memtx_gc_task;
+typedef void (*memtx_gc_func)(struct memtx_gc_task *);
+
+/** Garbage collection task. */
+struct memtx_gc_task {
+	/** Link in memtx_engine::gc_queue. */
+	struct stailq_entry link;
+	/** Function that will perform the task. */
+	memtx_gc_func func;
+};
+
+/**
+ * Schedule a garbage collection task for execution.
+ */
+void
+memtx_engine_schedule_gc(struct memtx_engine *memtx,
+			 struct memtx_gc_task *task);
+
 struct memtx_engine *
 memtx_engine_new(const char *snap_dirname, bool force_recovery,
 		 uint64_t tuple_arena_max_size,
@@ -151,16 +181,6 @@ memtx_index_extent_free(void *ctx, void *extent);
 int
 memtx_index_extent_reserve(int num);
 
-/*
- * The following two methods are used by all kinds of memtx indexes
- * to delete tuples stored in the space when the primary index is
- * destroyed.
- */
-void
-memtx_index_abort_create(struct index *index);
-void
-memtx_index_commit_drop(struct index *index);
-
 /**
  * Generic implementation of index_vtab::def_change_requires_rebuild,
  * common for all kinds of memtx indexes.
diff --git a/src/box/memtx_hash.c b/src/box/memtx_hash.c
index 24138f42..7e4c0474 100644
--- a/src/box/memtx_hash.c
+++ b/src/box/memtx_hash.c
@@ -129,15 +129,57 @@ hash_iterator_eq(struct iterator *it, struct tuple **ret)
 /* {{{ MemtxHash -- implementation of all hashes. **********************/
 
 static void
-memtx_hash_index_destroy(struct index *base)
+memtx_hash_index_free(struct memtx_hash_index *index)
 {
-	struct memtx_hash_index *index = (struct memtx_hash_index *)base;
 	light_index_destroy(index->hash_table);
 	free(index->hash_table);
 	free(index);
 }
 
 static void
+memtx_hash_index_destroy_f(struct memtx_gc_task *task)
+{
+	enum { YIELD_LOOPS = 128 };
+
+	struct memtx_hash_index *index = container_of(task,
+			struct memtx_hash_index, gc_task);
+	struct light_index_core *hash = index->hash_table;
+
+	struct light_index_iterator itr;
+	light_index_iterator_begin(hash, &itr);
+
+	struct tuple **res;
+	unsigned int loops = 0;
+	while ((res = light_index_iterator_get_and_next(hash, &itr)) != NULL) {
+		tuple_unref(*res);
+		if (++loops % YIELD_LOOPS == 0)
+			fiber_sleep(0);
+	}
+	memtx_hash_index_free(index);
+}
+
+static void
+memtx_hash_index_destroy(struct index *base)
+{
+	struct memtx_hash_index *index = (struct memtx_hash_index *)base;
+	struct memtx_engine *memtx = (struct memtx_engine *)base->engine;
+	if (index->gc_task.func != NULL) {
+		/*
+		 * Primary index. We need to free all tuples stored
+		 * in the index, which may take a while. Schedule a
+		 * background task in order not to block tx thread.
+		 */
+		memtx_engine_schedule_gc(memtx, &index->gc_task);
+	} else {
+		/*
+		 * Secondary index. Destruction is fast, no need to
+		 * hand over to background fiber.
+		 */
+		memtx_hash_index_free(index);
+	}
+}
+
+static void
 memtx_hash_index_update_def(struct index *base)
 {
 	struct memtx_hash_index *index = (struct memtx_hash_index *)base;
@@ -381,9 +423,9 @@ memtx_hash_index_create_snapshot_iterator(struct index *base)
 static const struct index_vtab memtx_hash_index_vtab = {
 	/* .destroy = */ memtx_hash_index_destroy,
 	/* .commit_create = */ generic_index_commit_create,
-	/* .abort_create = */ memtx_index_abort_create,
+	/* .abort_create = */ generic_index_abort_create,
 	/* .commit_modify = */ generic_index_commit_modify,
-	/* .commit_drop = */ memtx_index_commit_drop,
+	/* .commit_drop = */ generic_index_commit_drop,
 	/* .update_def = */ memtx_hash_index_update_def,
 	/* .depends_on_pk = */ generic_index_depends_on_pk,
 	/* .def_change_requires_rebuild = */
@@ -443,6 +485,9 @@ memtx_hash_index_new(struct memtx_engine *memtx, struct index_def *def)
 			   memtx_index_extent_alloc, memtx_index_extent_free,
 			   NULL, index->base.def->key_def);
 	index->hash_table = hash_table;
+
+	if (def->iid == 0)
+		index->gc_task.func = memtx_hash_index_destroy_f;
 	return index;
 }
 
diff --git a/src/box/memtx_hash.h b/src/box/memtx_hash.h
index bdadbd1e..99001079 100644
--- a/src/box/memtx_hash.h
+++ b/src/box/memtx_hash.h
@@ -31,6 +31,7 @@
  * SUCH DAMAGE.
  */
 #include "index.h"
+#include "memtx_engine.h"
 
 #if defined(__cplusplus)
 extern "C" {
@@ -42,6 +43,7 @@ struct light_index_core;
 struct memtx_hash_index {
 	struct index base;
 	struct light_index_core *hash_table;
+	struct memtx_gc_task gc_task;
 };
 
 struct memtx_hash_index *
diff --git a/src/box/memtx_rtree.c b/src/box/memtx_rtree.c
index 653343a7..bcf8e4d4 100644
--- a/src/box/memtx_rtree.c
+++ b/src/box/memtx_rtree.c
@@ -301,9 +301,9 @@ memtx_rtree_index_create_iterator(struct index *base,  enum iterator_type type,
 static const struct index_vtab memtx_rtree_index_vtab = {
 	/* .destroy = */ memtx_rtree_index_destroy,
 	/* .commit_create = */ generic_index_commit_create,
-	/* .abort_create = */ memtx_index_abort_create,
+	/* .abort_create = */ generic_index_abort_create,
 	/* .commit_modify = */ generic_index_commit_modify,
-	/* .commit_drop = */ memtx_index_commit_drop,
+	/* .commit_drop = */ generic_index_commit_drop,
 	/* .update_def = */ generic_index_update_def,
 	/* .depends_on_pk = */ generic_index_depends_on_pk,
 	/* .def_change_requires_rebuild = */
@@ -330,6 +330,7 @@ static const struct index_vtab memtx_rtree_index_vtab = {
 struct memtx_rtree_index *
 memtx_rtree_index_new(struct memtx_engine *memtx, struct index_def *def)
 {
+	assert(def->iid > 0);
 	assert(def->key_def->part_count == 1);
 	assert(def->key_def->parts[0].type == FIELD_TYPE_ARRAY);
 	assert(def->opts.is_unique == false);
diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index 073006b4..97452c5f 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -301,15 +301,57 @@ memtx_tree_index_cmp_def(struct memtx_tree_index *index)
 }
 
 static void
-memtx_tree_index_destroy(struct index *base)
+memtx_tree_index_free(struct memtx_tree_index *index)
 {
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
 	memtx_tree_destroy(&index->tree);
 	free(index->build_array);
 	free(index);
 }
 
 static void
+memtx_tree_index_destroy_f(struct memtx_gc_task *task)
+{
+	enum { YIELD_LOOPS = 128 };
+
+	struct memtx_tree_index *index = container_of(task,
+			struct memtx_tree_index, gc_task);
+	struct memtx_tree *tree = &index->tree;
+
+	unsigned int loops = 0;
+	struct memtx_tree_iterator itr;
+	for (itr = memtx_tree_iterator_first(tree);
+	     !memtx_tree_iterator_is_invalid(&itr);
+	     memtx_tree_iterator_next(tree, &itr)) {
+		struct tuple *tuple = *memtx_tree_iterator_get_elem(tree, &itr);
+		tuple_unref(tuple);
+		if (++loops % YIELD_LOOPS == 0)
+			fiber_sleep(0);
+	}
+	memtx_tree_index_free(index);
+}
+
+static void
+memtx_tree_index_destroy(struct index *base)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	struct memtx_engine *memtx = (struct memtx_engine *)base->engine;
+	if (index->gc_task.func != NULL) {
+		/*
+		 * Primary index. We need to free all tuples stored
+		 * in the index, which may take a while. Schedule a
+		 * background task in order not to block tx thread.
+		 */
+		memtx_engine_schedule_gc(memtx, &index->gc_task);
+	} else {
+		/*
+		 * Secondary index. Destruction is fast, no need to
+		 * hand over to background fiber.
+		 */
+		memtx_tree_index_free(index);
+	}
+}
+
+static void
 memtx_tree_index_update_def(struct index *base)
 {
 	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
@@ -590,9 +632,9 @@ memtx_tree_index_create_snapshot_iterator(struct index *base)
 static const struct index_vtab memtx_tree_index_vtab = {
 	/* .destroy = */ memtx_tree_index_destroy,
 	/* .commit_create = */ generic_index_commit_create,
-	/* .abort_create = */ memtx_index_abort_create,
+	/* .abort_create = */ generic_index_abort_create,
 	/* .commit_modify = */ generic_index_commit_modify,
-	/* .commit_drop = */ memtx_index_commit_drop,
+	/* .commit_drop = */ generic_index_commit_drop,
 	/* .update_def = */ memtx_tree_index_update_def,
 	/* .depends_on_pk = */ memtx_tree_index_depends_on_pk,
 	/* .def_change_requires_rebuild = */
@@ -643,5 +685,8 @@ memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
 	memtx_tree_create(&index->tree, cmp_def,
 			  memtx_index_extent_alloc,
 			  memtx_index_extent_free, NULL);
+
+	if (def->iid == 0)
+		index->gc_task.func = memtx_tree_index_destroy_f;
 	return index;
 }
diff --git a/src/box/memtx_tree.h b/src/box/memtx_tree.h
index 28b6fbd6..2e7acde2 100644
--- a/src/box/memtx_tree.h
+++ b/src/box/memtx_tree.h
@@ -97,6 +97,7 @@ struct memtx_tree_index {
 	struct memtx_tree tree;
 	struct tuple **build_array;
 	size_t build_array_size, build_array_alloc_size;
+	struct memtx_gc_task gc_task;
 };
 
 struct memtx_tree_index *
-- 
2.11.0

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] memtx: free tuples asynchronously when primary index is dropped
  2018-05-20 15:06 [PATCH] memtx: free tuples asynchronously when primary index is dropped Vladimir Davydov
@ 2018-05-20 21:31 ` Konstantin Osipov
  2018-05-21  7:17   ` Vladimir Davydov
  0 siblings, 1 reply; 3+ messages in thread
From: Konstantin Osipov @ 2018-05-20 21:31 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: tarantool-patches

* Vladimir Davydov <vdavydov.dev@gmail.com> [18/05/20 18:07]:
> When a memtx space is dropped or truncated, we have to unreference all
> tuples stored in it. Currently, we do it synchronously, thus blocking
> the tx thread. If a space is big, tx thread may remain blocked for
> several seconds, which is unacceptable. This patch makes drop/truncate
> hand actual work to a background fiber.
> 
> Before this patch, drop of a space with 10M 64-byte records took more
> than 0.5 seconds. After this patch, it takes less than 1 millisecond.
> 
> Closes #3408

This is a duplicate of https://github.com/tarantool/tarantool/issues/444

It's OK to push.

You can test it either using error injection or by adding metrics.

Choice of the constant - 128 iterations of the iterator loop per
yield - is puzzling. Did you do any math? How much does a fiber
yield cost?

> https://github.com/tarantool/tarantool/issues/3408
> https://github.com/tarantool/tarantool/commits/gh-3408-memtx-async-index-destruction
> 
>  src/box/memtx_bitset.c |  5 +--
>  src/box/memtx_engine.c | 91 ++++++++++++++++++++++----------------------------
>  src/box/memtx_engine.h | 40 ++++++++++++++++------
>  src/box/memtx_hash.c   | 53 ++++++++++++++++++++++++++---
>  src/box/memtx_hash.h   |  2 ++
>  src/box/memtx_rtree.c  |  5 +--
>  src/box/memtx_tree.c   | 53 ++++++++++++++++++++++++++---
>  src/box/memtx_tree.h   |  1 +
>  8 files changed, 176 insertions(+), 74 deletions(-)
> 
> diff --git a/src/box/memtx_bitset.c b/src/box/memtx_bitset.c
> index 7b87560b..2322af37 100644
> --- a/src/box/memtx_bitset.c
> +++ b/src/box/memtx_bitset.c
> @@ -457,9 +457,9 @@ memtx_bitset_index_count(struct index *base, enum iterator_type type,
>  static const struct index_vtab memtx_bitset_index_vtab = {
>  	/* .destroy = */ memtx_bitset_index_destroy,
>  	/* .commit_create = */ generic_index_commit_create,
> -	/* .abort_create = */ memtx_index_abort_create,
> +	/* .abort_create = */ generic_index_abort_create,
>  	/* .commit_modify = */ generic_index_commit_modify,
> -	/* .commit_drop = */ memtx_index_commit_drop,
> +	/* .commit_drop = */ generic_index_commit_drop,
>  	/* .update_def = */ generic_index_update_def,
>  	/* .depends_on_pk = */ generic_index_depends_on_pk,
>  	/* .def_change_requires_rebuild = */
> @@ -486,6 +486,7 @@ static const struct index_vtab memtx_bitset_index_vtab = {
>  struct memtx_bitset_index *
>  memtx_bitset_index_new(struct memtx_engine *memtx, struct index_def *def)
>  {
> +	assert(def->iid > 0);
>  	assert(!def->opts.is_unique);
>  
>  	memtx_index_arena_init();
> diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
> index 388cc4fe..ee3afaec 100644
> --- a/src/box/memtx_engine.c
> +++ b/src/box/memtx_engine.c
> @@ -35,6 +35,7 @@
>  #include <small/small.h>
>  #include <small/mempool.h>
>  
> +#include "fiber.h"
>  #include "coio_file.h"
>  #include "tuple.h"
>  #include "txn.h"
> @@ -928,6 +929,23 @@ static const struct engine_vtab memtx_engine_vtab = {
>  	/* .check_space_def = */ memtx_engine_check_space_def,
>  };
>  
> +static int
> +memtx_engine_gc_f(va_list va)
> +{
> +	struct memtx_engine *memtx = va_arg(va, struct memtx_engine *);
> +	while (!fiber_is_cancelled()) {
> +		if (stailq_empty(&memtx->gc_queue)) {
> +			fiber_yield_timeout(TIMEOUT_INFINITY);
> +			continue;
> +		}
> +		struct memtx_gc_task *task;
> +		task = stailq_shift_entry(&memtx->gc_queue,
> +					  struct memtx_gc_task, link);
> +		task->func(task);
> +	}
> +	return 0;
> +}
> +
>  struct memtx_engine *
>  memtx_engine_new(const char *snap_dirname, bool force_recovery,
>  		 uint64_t tuple_arena_max_size, uint32_t objsize_min,
> @@ -945,18 +963,34 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery,
>  	xdir_create(&memtx->snap_dir, snap_dirname, SNAP, &INSTANCE_UUID);
>  	memtx->snap_dir.force_recovery = force_recovery;
>  
> -	if (xdir_scan(&memtx->snap_dir) != 0) {
> -		xdir_destroy(&memtx->snap_dir);
> -		free(memtx);
> -		return NULL;
> -	}
> +	if (xdir_scan(&memtx->snap_dir) != 0)
> +		goto fail;
> +
> +	stailq_create(&memtx->gc_queue);
> +	memtx->gc_fiber = fiber_new("memtx.gc", memtx_engine_gc_f);
> +	if (memtx->gc_fiber == NULL)
> +		goto fail;
>  
>  	memtx->state = MEMTX_INITIALIZED;
>  	memtx->force_recovery = force_recovery;
>  
>  	memtx->base.vtab = &memtx_engine_vtab;
>  	memtx->base.name = "memtx";
> +
> +	fiber_start(memtx->gc_fiber, memtx);
>  	return memtx;
> +fail:
> +	xdir_destroy(&memtx->snap_dir);
> +	free(memtx);
> +	return NULL;
> +}
> +
> +void
> +memtx_engine_schedule_gc(struct memtx_engine *memtx,
> +			 struct memtx_gc_task *task)
> +{
> +	stailq_add_tail_entry(&memtx->gc_queue, task, link);
> +	fiber_wakeup(memtx->gc_fiber);
>  }
>  
>  void
> @@ -1063,53 +1097,6 @@ memtx_index_extent_reserve(int num)
>  	return 0;
>  }
>  
> -static void
> -memtx_index_prune(struct index *index)
> -{
> -	if (index->def->iid > 0)
> -		return;
> -
> -	/*
> -	 * Tuples stored in a memtx space are referenced by the
> -	 * primary index so when the primary index is dropped we
> -	 * should delete them.
> -	 */
> -	struct iterator *it = index_create_iterator(index, ITER_ALL, NULL, 0);
> -	if (it == NULL)
> -		goto fail;
> -	int rc;
> -	struct tuple *tuple;
> -	while ((rc = iterator_next(it, &tuple)) == 0 && tuple != NULL)
> -		tuple_unref(tuple);
> -	iterator_delete(it);
> -	if (rc != 0)
> -		goto fail;
> -
> -	return;
> -fail:
> -	/*
> -	 * This function is called after WAL write so we have no
> -	 * other choice but panic in case of any error. The good
> -	 * news is memtx iterators do not fail so we should not
> -	 * normally get here.
> -	 */
> -	diag_log();
> -	unreachable();
> -	panic("failed to drop index");
> -}
> -
> -void
> -memtx_index_abort_create(struct index *index)
> -{
> -	memtx_index_prune(index);
> -}
> -
> -void
> -memtx_index_commit_drop(struct index *index)
> -{
> -	memtx_index_prune(index);
> -}
> -
>  bool
>  memtx_index_def_change_requires_rebuild(struct index *index,
>  					const struct index_def *new_def)
> diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h
> index c1f2652c..2ce597b4 100644
> --- a/src/box/memtx_engine.h
> +++ b/src/box/memtx_engine.h
> @@ -37,12 +37,14 @@
>  
>  #include "engine.h"
>  #include "xlog.h"
> +#include "salad/stailq.h"
>  
>  #if defined(__cplusplus)
>  extern "C" {
>  #endif /* defined(__cplusplus) */
>  
>  struct index;
> +struct fiber;
>  
>  /**
>   * The state of memtx recovery process.
> @@ -101,8 +103,36 @@ struct memtx_engine {
>  	struct mempool hash_iterator_pool;
>  	/** Memory pool for bitset index iterator. */
>  	struct mempool bitset_iterator_pool;
> +	/**
> +	 * Garbage collection fiber. Used for asynchronous
> +	 * destruction of dropped indexes.
> +	 */
> +	struct fiber *gc_fiber;
> +	/**
> +	 * Scheduled garbage collection tasks, linked by
> +	 * memtx_gc_task::link.
> +	 */
> +	struct stailq gc_queue;
>  };
>  
> +struct memtx_gc_task;
> +typedef void (*memtx_gc_func)(struct memtx_gc_task *);
> +
> +/** Garbage collection task. */
> +struct memtx_gc_task {
> +	/** Link in memtx_engine::gc_queue. */
> +	struct stailq_entry link;
> +	/** Function that will perform the task. */
> +	memtx_gc_func func;
> +};
> +
> +/**
> + * Schedule a garbage collection task for execution.
> + */
> +void
> +memtx_engine_schedule_gc(struct memtx_engine *memtx,
> +			 struct memtx_gc_task *task);
> +
>  struct memtx_engine *
>  memtx_engine_new(const char *snap_dirname, bool force_recovery,
>  		 uint64_t tuple_arena_max_size,
> @@ -151,16 +181,6 @@ memtx_index_extent_free(void *ctx, void *extent);
>  int
>  memtx_index_extent_reserve(int num);
>  
> -/*
> - * The following two methods are used by all kinds of memtx indexes
> - * to delete tuples stored in the space when the primary index is
> - * destroyed.
> - */
> -void
> -memtx_index_abort_create(struct index *index);
> -void
> -memtx_index_commit_drop(struct index *index);
> -
>  /**
>   * Generic implementation of index_vtab::def_change_requires_rebuild,
>   * common for all kinds of memtx indexes.
> diff --git a/src/box/memtx_hash.c b/src/box/memtx_hash.c
> index 24138f42..7e4c0474 100644
> --- a/src/box/memtx_hash.c
> +++ b/src/box/memtx_hash.c
> @@ -129,15 +129,57 @@ hash_iterator_eq(struct iterator *it, struct tuple **ret)
>  /* {{{ MemtxHash -- implementation of all hashes. **********************/
>  
>  static void
> -memtx_hash_index_destroy(struct index *base)
> +memtx_hash_index_free(struct memtx_hash_index *index)
>  {
> -	struct memtx_hash_index *index = (struct memtx_hash_index *)base;
>  	light_index_destroy(index->hash_table);
>  	free(index->hash_table);
>  	free(index);
>  }
>  
>  static void
> +memtx_hash_index_destroy_f(struct memtx_gc_task *task)
> +{
> +	enum { YIELD_LOOPS = 128 };
> +
> +	struct memtx_hash_index *index = container_of(task,
> +			struct memtx_hash_index, gc_task);
> +	struct light_index_core *hash = index->hash_table;
> +
> +	struct light_index_iterator itr;
> +	light_index_iterator_begin(hash, &itr);
> +
> +	struct tuple **res;
> +	unsigned int loops = 0;
> +	while ((res = light_index_iterator_get_and_next(hash, &itr)) != NULL) {
> +		tuple_unref(*res);
> +		if (++loops % YIELD_LOOPS == 0)
> +			fiber_sleep(0);
> +	}
> +	memtx_hash_index_free(index);
> +}
> +
> +static void
> +memtx_hash_index_destroy(struct index *base)
> +{
> +	struct memtx_hash_index *index = (struct memtx_hash_index *)base;
> +	struct memtx_engine *memtx = (struct memtx_engine *)base->engine;
> +	if (index->gc_task.func != NULL) {
> +		/*
> +		 * Primary index. We need to free all tuples stored
> +		 * in the index, which may take a while. Schedule a
> +		 * background task in order not to block tx thread.
> +		 */
> +		memtx_engine_schedule_gc(memtx, &index->gc_task);
> +	} else {
> +		/*
> +		 * Secondary index. Destruction is fast, no need to
> +		 * hand over to background fiber.
> +		 */
> +		memtx_hash_index_free(index);
> +	}
> +}
> +
> +static void
>  memtx_hash_index_update_def(struct index *base)
>  {
>  	struct memtx_hash_index *index = (struct memtx_hash_index *)base;
> @@ -381,9 +423,9 @@ memtx_hash_index_create_snapshot_iterator(struct index *base)
>  static const struct index_vtab memtx_hash_index_vtab = {
>  	/* .destroy = */ memtx_hash_index_destroy,
>  	/* .commit_create = */ generic_index_commit_create,
> -	/* .abort_create = */ memtx_index_abort_create,
> +	/* .abort_create = */ generic_index_abort_create,
>  	/* .commit_modify = */ generic_index_commit_modify,
> -	/* .commit_drop = */ memtx_index_commit_drop,
> +	/* .commit_drop = */ generic_index_commit_drop,
>  	/* .update_def = */ memtx_hash_index_update_def,
>  	/* .depends_on_pk = */ generic_index_depends_on_pk,
>  	/* .def_change_requires_rebuild = */
> @@ -443,6 +485,9 @@ memtx_hash_index_new(struct memtx_engine *memtx, struct index_def *def)
>  			   memtx_index_extent_alloc, memtx_index_extent_free,
>  			   NULL, index->base.def->key_def);
>  	index->hash_table = hash_table;
> +
> +	if (def->iid == 0)
> +		index->gc_task.func = memtx_hash_index_destroy_f;
>  	return index;
>  }
>  
> diff --git a/src/box/memtx_hash.h b/src/box/memtx_hash.h
> index bdadbd1e..99001079 100644
> --- a/src/box/memtx_hash.h
> +++ b/src/box/memtx_hash.h
> @@ -31,6 +31,7 @@
>   * SUCH DAMAGE.
>   */
>  #include "index.h"
> +#include "memtx_engine.h"
>  
>  #if defined(__cplusplus)
>  extern "C" {
> @@ -42,6 +43,7 @@ struct light_index_core;
>  struct memtx_hash_index {
>  	struct index base;
>  	struct light_index_core *hash_table;
> +	struct memtx_gc_task gc_task;
>  };
>  
>  struct memtx_hash_index *
> diff --git a/src/box/memtx_rtree.c b/src/box/memtx_rtree.c
> index 653343a7..bcf8e4d4 100644
> --- a/src/box/memtx_rtree.c
> +++ b/src/box/memtx_rtree.c
> @@ -301,9 +301,9 @@ memtx_rtree_index_create_iterator(struct index *base,  enum iterator_type type,
>  static const struct index_vtab memtx_rtree_index_vtab = {
>  	/* .destroy = */ memtx_rtree_index_destroy,
>  	/* .commit_create = */ generic_index_commit_create,
> -	/* .abort_create = */ memtx_index_abort_create,
> +	/* .abort_create = */ generic_index_abort_create,
>  	/* .commit_modify = */ generic_index_commit_modify,
> -	/* .commit_drop = */ memtx_index_commit_drop,
> +	/* .commit_drop = */ generic_index_commit_drop,
>  	/* .update_def = */ generic_index_update_def,
>  	/* .depends_on_pk = */ generic_index_depends_on_pk,
>  	/* .def_change_requires_rebuild = */
> @@ -330,6 +330,7 @@ static const struct index_vtab memtx_rtree_index_vtab = {
>  struct memtx_rtree_index *
>  memtx_rtree_index_new(struct memtx_engine *memtx, struct index_def *def)
>  {
> +	assert(def->iid > 0);
>  	assert(def->key_def->part_count == 1);
>  	assert(def->key_def->parts[0].type == FIELD_TYPE_ARRAY);
>  	assert(def->opts.is_unique == false);
> diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
> index 073006b4..97452c5f 100644
> --- a/src/box/memtx_tree.c
> +++ b/src/box/memtx_tree.c
> @@ -301,15 +301,57 @@ memtx_tree_index_cmp_def(struct memtx_tree_index *index)
>  }
>  
>  static void
> -memtx_tree_index_destroy(struct index *base)
> +memtx_tree_index_free(struct memtx_tree_index *index)
>  {
> -	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
>  	memtx_tree_destroy(&index->tree);
>  	free(index->build_array);
>  	free(index);
>  }
>  
>  static void
> +memtx_tree_index_destroy_f(struct memtx_gc_task *task)
> +{
> +	enum { YIELD_LOOPS = 128 };
> +
> +	struct memtx_tree_index *index = container_of(task,
> +			struct memtx_tree_index, gc_task);
> +	struct memtx_tree *tree = &index->tree;
> +
> +	unsigned int loops = 0;
> +	struct memtx_tree_iterator itr;
> +	for (itr = memtx_tree_iterator_first(tree);
> +	     !memtx_tree_iterator_is_invalid(&itr);
> +	     memtx_tree_iterator_next(tree, &itr)) {
> +		struct tuple *tuple = *memtx_tree_iterator_get_elem(tree, &itr);
> +		tuple_unref(tuple);
> +		if (++loops % YIELD_LOOPS == 0)
> +			fiber_sleep(0);
> +	}
> +	memtx_tree_index_free(index);
> +}
> +
> +static void
> +memtx_tree_index_destroy(struct index *base)
> +{
> +	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
> +	struct memtx_engine *memtx = (struct memtx_engine *)base->engine;
> +	if (index->gc_task.func != NULL) {
> +		/*
> +		 * Primary index. We need to free all tuples stored
> +		 * in the index, which may take a while. Schedule a
> +		 * background task in order not to block tx thread.
> +		 */
> +		memtx_engine_schedule_gc(memtx, &index->gc_task);
> +	} else {
> +		/*
> +		 * Secondary index. Destruction is fast, no need to
> +		 * hand over to background fiber.
> +		 */
> +		memtx_tree_index_free(index);
> +	}
> +}
> +
> +static void
>  memtx_tree_index_update_def(struct index *base)
>  {
>  	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
> @@ -590,9 +632,9 @@ memtx_tree_index_create_snapshot_iterator(struct index *base)
>  static const struct index_vtab memtx_tree_index_vtab = {
>  	/* .destroy = */ memtx_tree_index_destroy,
>  	/* .commit_create = */ generic_index_commit_create,
> -	/* .abort_create = */ memtx_index_abort_create,
> +	/* .abort_create = */ generic_index_abort_create,
>  	/* .commit_modify = */ generic_index_commit_modify,
> -	/* .commit_drop = */ memtx_index_commit_drop,
> +	/* .commit_drop = */ generic_index_commit_drop,
>  	/* .update_def = */ memtx_tree_index_update_def,
>  	/* .depends_on_pk = */ memtx_tree_index_depends_on_pk,
>  	/* .def_change_requires_rebuild = */
> @@ -643,5 +685,8 @@ memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
>  	memtx_tree_create(&index->tree, cmp_def,
>  			  memtx_index_extent_alloc,
>  			  memtx_index_extent_free, NULL);
> +
> +	if (def->iid == 0)
> +		index->gc_task.func = memtx_tree_index_destroy_f;
>  	return index;
>  }
> diff --git a/src/box/memtx_tree.h b/src/box/memtx_tree.h
> index 28b6fbd6..2e7acde2 100644
> --- a/src/box/memtx_tree.h
> +++ b/src/box/memtx_tree.h
> @@ -97,6 +97,7 @@ struct memtx_tree_index {
>  	struct memtx_tree tree;
>  	struct tuple **build_array;
>  	size_t build_array_size, build_array_alloc_size;
> +	struct memtx_gc_task gc_task;
>  };
>  
>  struct memtx_tree_index *
> -- 
> 2.11.0

-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] memtx: free tuples asynchronously when primary index is dropped
  2018-05-20 21:31 ` Konstantin Osipov
@ 2018-05-21  7:17   ` Vladimir Davydov
  0 siblings, 0 replies; 3+ messages in thread
From: Vladimir Davydov @ 2018-05-21  7:17 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tarantool-patches

On Mon, May 21, 2018 at 12:31:18AM +0300, Konstantin Osipov wrote:
> * Vladimir Davydov <vdavydov.dev@gmail.com> [18/05/20 18:07]:
> > When a memtx space is dropped or truncated, we have to unreference all
> > tuples stored in it. Currently, we do it synchronously, thus blocking
> > the tx thread. If a space is big, tx thread may remain blocked for
> > several seconds, which is unacceptable. This patch makes drop/truncate
> > hand actual work to a background fiber.
> > 
> > Before this patch, drop of a space with 10M 64-byte records took more
> > than 0.5 seconds. After this patch, it takes less than 1 millisecond.
> > 
> > Closes #3408
> 
> This is a duplicate of https://github.com/tarantool/tarantool/issues/444
> 
> It's OK to push.
> 
> You can test it either using error injection or by adding metrics.

I'll add error injection.

> 
> Choice of the constant - 128 iterations of the iterator loop per
> yield - is puzzling. Did you do any math? How much does a fiber
> yield cost?

Choice of 128 was arbitrary. After you noticed that, I did some testing.
If we want to keep max latency < 0.1 ms, which seems to be reasonable,
it's enough to yield every 1000 tuples. The diff is below.

diff --git a/src/box/memtx_hash.c b/src/box/memtx_hash.c
index 7e4c0474..55131740 100644
--- a/src/box/memtx_hash.c
+++ b/src/box/memtx_hash.c
@@ -139,7 +139,8 @@ memtx_hash_index_free(struct memtx_hash_index *index)
 static void
 memtx_hash_index_destroy_f(struct memtx_gc_task *task)
 {
-	enum { YIELD_LOOPS = 128 };
+	/* Yield every 1K tuples to keep latency < 0.1 ms. */
+	enum { YIELD_LOOPS = 1000 };
 
 	struct memtx_hash_index *index = container_of(task,
 			struct memtx_hash_index, gc_task);
diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index 97452c5f..c72b8fa8 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -311,7 +311,8 @@ memtx_tree_index_free(struct memtx_tree_index *index)
 static void
 memtx_tree_index_destroy_f(struct memtx_gc_task *task)
 {
-	enum { YIELD_LOOPS = 128 };
+	/* Yield every 1K tuples to keep latency < 0.1 ms. */
+	enum { YIELD_LOOPS = 1000 };
 
 	struct memtx_tree_index *index = container_of(task,
 			struct memtx_tree_index, gc_task);

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-05-21  7:17 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-20 15:06 [PATCH] memtx: free tuples asynchronously when primary index is dropped Vladimir Davydov
2018-05-20 21:31 ` Konstantin Osipov
2018-05-21  7:17   ` Vladimir Davydov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox