* [PATCH v2 1/2] memtx: rework background garbage collection procedure
2018-05-22 17:25 [PATCH v2 0/2] Follow-up on async memtx index cleanup Vladimir Davydov
@ 2018-05-22 17:25 ` Vladimir Davydov
2018-05-23 17:56 ` Konstantin Osipov
2018-05-22 17:25 ` [PATCH v2 2/2] memtx: run garbage collection on demand Vladimir Davydov
1 sibling, 1 reply; 7+ messages in thread
From: Vladimir Davydov @ 2018-05-22 17:25 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
Currently, the engine has not control over yields issued during
asynchronous index destruction. As a result, it can't force gc when
there's not enough memory. To fix that, let's make gc callback stateful:
now it's supposed to free some objects and return true if there's still
more objects to free or false otherwise. Yields are now done by the
memtx engine itself after each gc callback invocation.
---
src/box/index.cc | 7 ++++++-
src/box/memtx_engine.c | 34 +++++++++++++++++++++++++++++-----
src/box/memtx_engine.h | 17 ++++++++++++++---
src/box/memtx_hash.c | 35 ++++++++++++++++++++++++-----------
src/box/memtx_hash.h | 1 +
src/box/memtx_tree.c | 38 ++++++++++++++++++++++++++------------
src/box/memtx_tree.h | 1 +
7 files changed, 101 insertions(+), 32 deletions(-)
diff --git a/src/box/index.cc b/src/box/index.cc
index 978935b3..b67f8bc6 100644
--- a/src/box/index.cc
+++ b/src/box/index.cc
@@ -496,8 +496,13 @@ index_create(struct index *index, struct engine *engine,
void
index_delete(struct index *index)
{
- index_def_delete(index->def);
+ /*
+ * Free index_def after destroying the index as
+ * engine might still need it.
+ */
+ struct index_def *def = index->def;
index->vtab->destroy(index);
+ index_def_delete(def);
}
int
diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index b935c3f6..9bde98a0 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -933,19 +933,43 @@ static const struct engine_vtab memtx_engine_vtab = {
/* .check_space_def = */ memtx_engine_check_space_def,
};
+/**
+ * Run one iteration of garbage collection. Set @stop if
+ * there is no more objects to free.
+ */
+static void
+memtx_engine_run_gc(struct memtx_engine *memtx, bool *stop)
+{
+ *stop = stailq_empty(&memtx->gc_queue);
+ if (*stop)
+ return;
+
+ struct memtx_gc_task *task = stailq_first_entry(&memtx->gc_queue,
+ struct memtx_gc_task, link);
+ bool task_done;
+ task->vtab->run(task, &task_done);
+ if (task_done) {
+ stailq_shift(&memtx->gc_queue);
+ task->vtab->free(task);
+ }
+}
+
static int
memtx_engine_gc_f(va_list va)
{
struct memtx_engine *memtx = va_arg(va, struct memtx_engine *);
while (!fiber_is_cancelled()) {
- if (stailq_empty(&memtx->gc_queue)) {
+ bool stop;
+ memtx_engine_run_gc(memtx, &stop);
+ if (stop) {
fiber_yield_timeout(TIMEOUT_INFINITY);
continue;
}
- struct memtx_gc_task *task;
- task = stailq_shift_entry(&memtx->gc_queue,
- struct memtx_gc_task, link);
- task->func(task);
+ /*
+ * Yield after each iteration so as not to block
+ * tx thread for too long.
+ */
+ fiber_sleep(0);
}
return 0;
}
diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h
index 0bcd24ac..97dba177 100644
--- a/src/box/memtx_engine.h
+++ b/src/box/memtx_engine.h
@@ -150,14 +150,25 @@ struct memtx_engine {
};
struct memtx_gc_task;
-typedef void (*memtx_gc_func)(struct memtx_gc_task *);
+
+struct memtx_gc_task_vtab {
+ /**
+ * Free some objects associated with @task. If @task has
+ * no more objects to free, set flag @done.
+ */
+ void (*run)(struct memtx_gc_task *task, bool *done);
+ /**
+ * Destroy @task.
+ */
+ void (*free)(struct memtx_gc_task *task);
+};
/** Garbage collection task. */
struct memtx_gc_task {
/** Link in memtx_engine::gc_queue. */
struct stailq_entry link;
- /** Function that will perform the task. */
- memtx_gc_func func;
+ /** Virtual function table. */
+ const struct memtx_gc_task_vtab *vtab;
};
/**
diff --git a/src/box/memtx_hash.c b/src/box/memtx_hash.c
index e1fdad54..b91d604e 100644
--- a/src/box/memtx_hash.c
+++ b/src/box/memtx_hash.c
@@ -110,7 +110,7 @@ memtx_hash_index_free(struct memtx_hash_index *index)
}
static void
-memtx_hash_index_destroy_f(struct memtx_gc_task *task)
+memtx_hash_index_gc_run(struct memtx_gc_task *task, bool *done)
{
/*
* Yield every 1K tuples to keep latency < 0.1 ms.
@@ -125,31 +125,47 @@ memtx_hash_index_destroy_f(struct memtx_gc_task *task)
struct memtx_hash_index *index = container_of(task,
struct memtx_hash_index, gc_task);
struct light_index_core *hash = &index->hash_table;
-
- struct light_index_iterator itr;
- light_index_iterator_begin(hash, &itr);
+ struct light_index_iterator *itr = &index->gc_iterator;
struct tuple **res;
unsigned int loops = 0;
- while ((res = light_index_iterator_get_and_next(hash, &itr)) != NULL) {
+ while ((res = light_index_iterator_get_and_next(hash, itr)) != NULL) {
tuple_unref(*res);
- if (++loops % YIELD_LOOPS == 0)
- fiber_sleep(0);
+ if (++loops >= YIELD_LOOPS) {
+ *done = false;
+ return;
+ }
}
+ *done = true;
+}
+
+static void
+memtx_hash_index_gc_free(struct memtx_gc_task *task)
+{
+ struct memtx_hash_index *index = container_of(task,
+ struct memtx_hash_index, gc_task);
memtx_hash_index_free(index);
}
+static const struct memtx_gc_task_vtab memtx_hash_index_gc_vtab = {
+ .run = memtx_hash_index_gc_run,
+ .free = memtx_hash_index_gc_free,
+};
+
static void
memtx_hash_index_destroy(struct index *base)
{
struct memtx_hash_index *index = (struct memtx_hash_index *)base;
struct memtx_engine *memtx = (struct memtx_engine *)base->engine;
- if (index->gc_task.func != NULL) {
+ if (base->def->iid == 0) {
/*
* Primary index. We need to free all tuples stored
* in the index, which may take a while. Schedule a
* background task in order not to block tx thread.
*/
+ index->gc_task.vtab = &memtx_hash_index_gc_vtab;
+ light_index_iterator_begin(&index->hash_table,
+ &index->gc_iterator);
memtx_engine_schedule_gc(memtx, &index->gc_task);
} else {
/*
@@ -456,9 +472,6 @@ memtx_hash_index_new(struct memtx_engine *memtx, struct index_def *def)
light_index_create(&index->hash_table, MEMTX_EXTENT_SIZE,
memtx_index_extent_alloc, memtx_index_extent_free,
memtx, index->base.def->key_def);
-
- if (def->iid == 0)
- index->gc_task.func = memtx_hash_index_destroy_f;
return index;
}
diff --git a/src/box/memtx_hash.h b/src/box/memtx_hash.h
index 7a08dc87..a3b48052 100644
--- a/src/box/memtx_hash.h
+++ b/src/box/memtx_hash.h
@@ -72,6 +72,7 @@ struct memtx_hash_index {
struct index base;
struct light_index_core hash_table;
struct memtx_gc_task gc_task;
+ struct light_index_iterator gc_iterator;
};
struct memtx_hash_index *
diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index 8814b13b..e4b1cd65 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -309,7 +309,7 @@ memtx_tree_index_free(struct memtx_tree_index *index)
}
static void
-memtx_tree_index_destroy_f(struct memtx_gc_task *task)
+memtx_tree_index_gc_run(struct memtx_gc_task *task, bool *done)
{
/*
* Yield every 1K tuples to keep latency < 0.1 ms.
@@ -320,34 +320,51 @@ memtx_tree_index_destroy_f(struct memtx_gc_task *task)
#else
enum { YIELD_LOOPS = 10 };
#endif
+
struct memtx_tree_index *index = container_of(task,
struct memtx_tree_index, gc_task);
struct memtx_tree *tree = &index->tree;
+ struct memtx_tree_iterator *itr = &index->gc_iterator;
unsigned int loops = 0;
- struct memtx_tree_iterator itr;
- for (itr = memtx_tree_iterator_first(tree);
- !memtx_tree_iterator_is_invalid(&itr);
- memtx_tree_iterator_next(tree, &itr)) {
- struct tuple *tuple = *memtx_tree_iterator_get_elem(tree, &itr);
+ while (!memtx_tree_iterator_is_invalid(itr)) {
+ struct tuple *tuple = *memtx_tree_iterator_get_elem(tree, itr);
+ memtx_tree_iterator_next(tree, itr);
tuple_unref(tuple);
- if (++loops % YIELD_LOOPS == 0)
- fiber_sleep(0);
+ if (++loops >= YIELD_LOOPS) {
+ *done = false;
+ return;
+ }
}
+ *done = true;
+}
+
+static void
+memtx_tree_index_gc_free(struct memtx_gc_task *task)
+{
+ struct memtx_tree_index *index = container_of(task,
+ struct memtx_tree_index, gc_task);
memtx_tree_index_free(index);
}
+static const struct memtx_gc_task_vtab memtx_tree_index_gc_vtab = {
+ .run = memtx_tree_index_gc_run,
+ .free = memtx_tree_index_gc_free,
+};
+
static void
memtx_tree_index_destroy(struct index *base)
{
struct memtx_tree_index *index = (struct memtx_tree_index *)base;
struct memtx_engine *memtx = (struct memtx_engine *)base->engine;
- if (index->gc_task.func != NULL) {
+ if (base->def->iid == 0) {
/*
* Primary index. We need to free all tuples stored
* in the index, which may take a while. Schedule a
* background task in order not to block tx thread.
*/
+ index->gc_task.vtab = &memtx_tree_index_gc_vtab;
+ index->gc_iterator = memtx_tree_iterator_first(&index->tree);
memtx_engine_schedule_gc(memtx, &index->gc_task);
} else {
/*
@@ -690,8 +707,5 @@ memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
memtx_tree_create(&index->tree, cmp_def, memtx_index_extent_alloc,
memtx_index_extent_free, memtx);
-
- if (def->iid == 0)
- index->gc_task.func = memtx_tree_index_destroy_f;
return index;
}
diff --git a/src/box/memtx_tree.h b/src/box/memtx_tree.h
index 2e7acde2..bbc8b241 100644
--- a/src/box/memtx_tree.h
+++ b/src/box/memtx_tree.h
@@ -98,6 +98,7 @@ struct memtx_tree_index {
struct tuple **build_array;
size_t build_array_size, build_array_alloc_size;
struct memtx_gc_task gc_task;
+ struct memtx_tree_iterator gc_iterator;
};
struct memtx_tree_index *
--
2.11.0
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 2/2] memtx: run garbage collection on demand
2018-05-22 17:25 [PATCH v2 0/2] Follow-up on async memtx index cleanup Vladimir Davydov
2018-05-22 17:25 ` [PATCH v2 1/2] memtx: rework background garbage collection procedure Vladimir Davydov
@ 2018-05-22 17:25 ` Vladimir Davydov
2018-05-23 17:58 ` Konstantin Osipov
1 sibling, 1 reply; 7+ messages in thread
From: Vladimir Davydov @ 2018-05-22 17:25 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
When a memtx space is dropped or truncated, we delegate freeing tuples
stored in it to a background fiber so as not to block the caller (and tx
thread) for too long. Turns out it doesn't work out well for ephemeral
spaces, which share the destruction code with normal spaces: the problem
is the user might issue a lot of complex SQL SELECT statements that
create a lot of ephemeral spaces and do not yield and hence don't give
the garbage collection fiber a chance to clean up. There's a test that
emulates this, 2.0:test/sql-tap/gh-3083-ephemeral-unref-tuples.test.lua.
For this test to pass, let's run garbage collection procedure on demand,
i.e. when any of memtx allocation functions fails to allocate memory.
Follow-up #3408
---
src/box/memtx_engine.c | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 9bde98a0..b4c1582a 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -1070,7 +1070,13 @@ memtx_tuple_new(struct tuple_format *format, const char *data, const char *end)
return NULL;
}
- struct memtx_tuple *memtx_tuple = smalloc(&memtx->alloc, total);
+ struct memtx_tuple *memtx_tuple;
+ while ((memtx_tuple = smalloc(&memtx->alloc, total)) == NULL) {
+ bool stop;
+ memtx_engine_run_gc(memtx, &stop);
+ if (stop)
+ break;
+ }
if (memtx_tuple == NULL) {
diag_set(OutOfMemory, total, "slab allocator", "memtx_tuple");
return NULL;
@@ -1151,7 +1157,13 @@ memtx_index_extent_alloc(void *ctx)
"mempool", "new slab");
return NULL;
});
- void *ret = mempool_alloc(&memtx->index_extent_pool);
+ void *ret;
+ while ((ret = mempool_alloc(&memtx->index_extent_pool)) == NULL) {
+ bool stop;
+ memtx_engine_run_gc(memtx, &stop);
+ if (stop)
+ break;
+ }
if (ret == NULL)
diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
"mempool", "new slab");
@@ -1184,6 +1196,10 @@ memtx_index_extent_reserve(struct memtx_engine *memtx, int num)
while (memtx->num_reserved_extents < num) {
void *ext = mempool_alloc(&memtx->index_extent_pool);
if (ext == NULL) {
+ bool stop;
+ memtx_engine_run_gc(memtx, &stop);
+ if (!stop)
+ continue;
diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
"mempool", "new slab");
return -1;
--
2.11.0
^ permalink raw reply [flat|nested] 7+ messages in thread