[PATCH v1 2/4] box: rework memtx_tree class to be reusable

Kirill Shcherbatov kshcherbatov at tarantool.org
Tue Feb 5 14:58:37 MSK 2019


The memtx tree class has been redesigned so that it can be used
to store arbitrary structures in the future. This makes possible
to implement type hints in memtx in subsequent patches.

Needed for #3961
---
 src/box/memtx_tree.c      | 715 ++--------------------------
 src/box/memtx_tree.h      |  68 +--
 src/box/memtx_tree_impl.h | 960 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 1008 insertions(+), 735 deletions(-)
 create mode 100644 src/box/memtx_tree_impl.h

diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index f851fb869..6b22883c2 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -28,684 +28,59 @@
  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
+#include <stdbool.h>
+#include <stdint.h>
+#include "tuple_compare.h"
 #include "memtx_tree.h"
-#include "memtx_engine.h"
-#include "space.h"
-#include "schema.h" /* space_cache_find() */
-#include "errinj.h"
-#include "memory.h"
-#include "fiber.h"
-#include "tuple.h"
-#include <third_party/qsort_arg.h>
-#include <small/mempool.h>
 
-/* {{{ Utilities. *************************************************/
+/* {{{ Memtx tree of tuples class. ******************************/
 
-static int
-memtx_tree_qcompare(const void* a, const void *b, void *c)
-{
-	return tuple_compare(*(struct tuple **)a,
-		*(struct tuple **)b, (struct key_def *)c);
-}
-
-/* {{{ MemtxTree Iterators ****************************************/
-struct tree_iterator {
-	struct iterator base;
-	const struct memtx_tree *tree;
-	struct index_def *index_def;
-	struct memtx_tree_iterator tree_iterator;
-	enum iterator_type type;
-	struct memtx_tree_key_data key_data;
-	struct tuple *current_tuple;
-	/** Memory pool the iterator was allocated from. */
-	struct mempool *pool;
+/** Struct that is used as a key in BPS tree definition. */
+struct memtx_tuple_tree_key_data {
+	/** Sequence of msgpacked search fields. */
+	const char *key;
+	/** Number of msgpacked search fields. */
+	uint32_t part_count;
 };
 
-static void
-tree_iterator_free(struct iterator *iterator);
-
-static inline struct tree_iterator *
-tree_iterator(struct iterator *it)
-{
-	assert(it->free == tree_iterator_free);
-	return (struct tree_iterator *) it;
-}
-
-static void
-tree_iterator_free(struct iterator *iterator)
-{
-	struct tree_iterator *it = tree_iterator(iterator);
-	if (it->current_tuple != NULL)
-		tuple_unref(it->current_tuple);
-	mempool_free(it->pool, it);
-}
-
-static int
-tree_iterator_dummie(struct iterator *iterator, struct tuple **ret)
-{
-	(void)iterator;
-	*ret = NULL;
-	return 0;
-}
-
-static int
-tree_iterator_next(struct iterator *iterator, struct tuple **ret)
-{
-	struct tuple **res;
-	struct tree_iterator *it = tree_iterator(iterator);
-	assert(it->current_tuple != NULL);
-	struct tuple **check = memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
-	if (check == NULL || *check != it->current_tuple)
-		it->tree_iterator =
-			memtx_tree_upper_bound_elem(it->tree, it->current_tuple,
-						    NULL);
-	else
-		memtx_tree_iterator_next(it->tree, &it->tree_iterator);
-	tuple_unref(it->current_tuple);
-	it->current_tuple = NULL;
-	res = memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
-	if (res == NULL) {
-		iterator->next = tree_iterator_dummie;
-		*ret = NULL;
-	} else {
-		*ret = it->current_tuple = *res;
-		tuple_ref(it->current_tuple);
-	}
-	return 0;
-}
-
-static int
-tree_iterator_prev(struct iterator *iterator, struct tuple **ret)
-{
-	struct tree_iterator *it = tree_iterator(iterator);
-	assert(it->current_tuple != NULL);
-	struct tuple **check = memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
-	if (check == NULL || *check != it->current_tuple)
-		it->tree_iterator =
-			memtx_tree_lower_bound_elem(it->tree, it->current_tuple,
-						    NULL);
-	memtx_tree_iterator_prev(it->tree, &it->tree_iterator);
-	tuple_unref(it->current_tuple);
-	it->current_tuple = NULL;
-	struct tuple **res = memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
-	if (!res) {
-		iterator->next = tree_iterator_dummie;
-		*ret = NULL;
-	} else {
-		*ret = it->current_tuple = *res;
-		tuple_ref(it->current_tuple);
-	}
-	return 0;
-}
-
-static int
-tree_iterator_next_equal(struct iterator *iterator, struct tuple **ret)
-{
-	struct tree_iterator *it = tree_iterator(iterator);
-	assert(it->current_tuple != NULL);
-	struct tuple **check = memtx_tree_iterator_get_elem(it->tree,
-						&it->tree_iterator);
-	if (check == NULL || *check != it->current_tuple)
-		it->tree_iterator =
-			memtx_tree_upper_bound_elem(it->tree, it->current_tuple,
-						    NULL);
-	else
-		memtx_tree_iterator_next(it->tree, &it->tree_iterator);
-	tuple_unref(it->current_tuple);
-	it->current_tuple = NULL;
-	struct tuple **res = memtx_tree_iterator_get_elem(it->tree,
-						&it->tree_iterator);
-	/* Use user key def to save a few loops. */
-	if (!res || memtx_tree_compare_key(*res, &it->key_data,
-					   it->index_def->key_def) != 0) {
-		iterator->next = tree_iterator_dummie;
-		*ret = NULL;
-	} else {
-		*ret = it->current_tuple = *res;
-		tuple_ref(it->current_tuple);
-	}
-	return 0;
-}
-
-static int
-tree_iterator_prev_equal(struct iterator *iterator, struct tuple **ret)
-{
-	struct tree_iterator *it = tree_iterator(iterator);
-	assert(it->current_tuple != NULL);
-	struct tuple **check = memtx_tree_iterator_get_elem(it->tree,
-						&it->tree_iterator);
-	if (check == NULL || *check != it->current_tuple)
-		it->tree_iterator =
-			memtx_tree_lower_bound_elem(it->tree, it->current_tuple,
-						    NULL);
-	memtx_tree_iterator_prev(it->tree, &it->tree_iterator);
-	tuple_unref(it->current_tuple);
-	it->current_tuple = NULL;
-	struct tuple **res = memtx_tree_iterator_get_elem(it->tree,
-						&it->tree_iterator);
-	/* Use user key def to save a few loops. */
-	if (!res || memtx_tree_compare_key(*res, &it->key_data,
-					   it->index_def->key_def) != 0) {
-		iterator->next = tree_iterator_dummie;
-		*ret = NULL;
-	} else {
-		*ret = it->current_tuple = *res;
-		tuple_ref(it->current_tuple);
-	}
-	return 0;
-}
-
-static void
-tree_iterator_set_next_method(struct tree_iterator *it)
-{
-	assert(it->current_tuple != NULL);
-	switch (it->type) {
-	case ITER_EQ:
-		it->base.next = tree_iterator_next_equal;
-		break;
-	case ITER_REQ:
-		it->base.next = tree_iterator_prev_equal;
-		break;
-	case ITER_ALL:
-		it->base.next = tree_iterator_next;
-		break;
-	case ITER_LT:
-	case ITER_LE:
-		it->base.next = tree_iterator_prev;
-		break;
-	case ITER_GE:
-	case ITER_GT:
-		it->base.next = tree_iterator_next;
-		break;
-	default:
-		/* The type was checked in initIterator */
-		assert(false);
-	}
-}
-
-static int
-tree_iterator_start(struct iterator *iterator, struct tuple **ret)
-{
-	*ret = NULL;
-	struct tree_iterator *it = tree_iterator(iterator);
-	it->base.next = tree_iterator_dummie;
-	const struct memtx_tree *tree = it->tree;
-	enum iterator_type type = it->type;
-	bool exact = false;
-	assert(it->current_tuple == NULL);
-	if (it->key_data.key == 0) {
-		if (iterator_type_is_reverse(it->type))
-			it->tree_iterator = memtx_tree_iterator_last(tree);
-		else
-			it->tree_iterator = memtx_tree_iterator_first(tree);
-	} else {
-		if (type == ITER_ALL || type == ITER_EQ ||
-		    type == ITER_GE || type == ITER_LT) {
-			it->tree_iterator =
-				memtx_tree_lower_bound(tree, &it->key_data,
-						       &exact);
-			if (type == ITER_EQ && !exact)
-				return 0;
-		} else { // ITER_GT, ITER_REQ, ITER_LE
-			it->tree_iterator =
-				memtx_tree_upper_bound(tree, &it->key_data,
-						       &exact);
-			if (type == ITER_REQ && !exact)
-				return 0;
-		}
-		if (iterator_type_is_reverse(type)) {
-			/*
-			 * Because of limitations of tree search API we use use
-			 * lower_bound for LT search and upper_bound for LE
-			 * and REQ searches. Thus we found position to the
-			 * right of the target one. Let's make a step to the
-			 * left to reach target position.
-			 * If we found an invalid iterator all the elements in
-			 * the tree are less (less or equal) to the key, and
-			 * iterator_next call will convert the iterator to the
-			 * last position in the tree, that's what we need.
-			 */
-			memtx_tree_iterator_prev(it->tree, &it->tree_iterator);
-		}
-	}
-
-	struct tuple **res = memtx_tree_iterator_get_elem(it->tree,
-						&it->tree_iterator);
-	if (!res)
-		return 0;
-	*ret = it->current_tuple = *res;
-	tuple_ref(it->current_tuple);
-	tree_iterator_set_next_method(it);
-	return 0;
-}
+#define MEMTX_TREE_NAME metmx_tuple_tree
+#define memtx_tree_elem struct tuple *
+#define memtx_tree_key struct memtx_tuple_tree_key_data
+#define MEMTX_TREE_ELEM_EQUAL(elem_a_ptr, elem_b_ptr)				\
+	(*elem_a_ptr == *elem_b_ptr)
+#define MEMTX_TREE_ELEM_CMP(elem_a_ptr, elem_b_ptr, key_def)			\
+	tuple_compare(*elem_a_ptr, *elem_b_ptr, key_def)
+#define MEMTX_TREE_ELEM_WITH_KEY_CMP(elem_ptr, key_ptr, key_def)		\
+	tuple_compare_with_key(*elem_ptr, (key_ptr)->key,			\
+			       (key_ptr)->part_count, key_def)
+#define MEMTX_TREE_ELEM_SET(elem_ptr, tuple, key_def)				\
+	({(void)key_def; *elem_ptr = tuple;})
+#define MEMTX_TREE_KEY_SET(key_ptr, key_val, part_count_val, key_def)		\
+	({(void)key_def; (key_ptr)->key = key_val;				\
+	 (key_ptr)->part_count = part_count_val;})
+#define MEMTX_TREE_ELEM_GET(elem_ptr) (*(elem_ptr))
+#define MEMTX_TREE_KEY_GET(key_ptr, part_count_ptr)				\
+	({*part_count_ptr = (key_ptr)->part_count; (key_ptr)->key;})
+
+#include "memtx_tree_impl.h"
+
+#undef memtx_tree_key
+#undef memtx_tree_elem
+#undef MEMTX_TREE_KEY_GET
+#undef MEMTX_TREE_ELEM_GET
+#undef MEMTX_TREE_KEY_SET
+#undef MEMTX_TREE_ELEM_SET
+#undef MEMTX_TREE_ELEM_WITH_KEY_CMP
+#undef MEMTX_TREE_ELEM_CMP
+#undef MEMTX_TREE_ELEM_EQUAL
+#undef memtx_tree_key
+#undef memtx_tree_elem
+#undef MEMTX_TREE_NAME
 
 /* }}} */
 
-/* {{{ MemtxTree  **********************************************************/
-
-/**
- * Return the key def to use for comparing tuples stored
- * in the given tree index.
- *
- * We use extended key def for non-unique and nullable
- * indexes. Unique but nullable index can store multiple
- * NULLs. To correctly compare these NULLs extended key
- * def must be used. For details @sa tuple_compare.cc.
- */
-static struct key_def *
-memtx_tree_index_cmp_def(struct memtx_tree_index *index)
-{
-	struct index_def *def = index->base.def;
-	return def->opts.is_unique && !def->key_def->is_nullable ?
-		def->key_def : def->cmp_def;
-}
-
-static void
-memtx_tree_index_free(struct memtx_tree_index *index)
-{
-	memtx_tree_destroy(&index->tree);
-	free(index->build_array);
-	free(index);
-}
-
-static void
-memtx_tree_index_gc_run(struct memtx_gc_task *task, bool *done)
-{
-	/*
-	 * Yield every 1K tuples to keep latency < 0.1 ms.
-	 * Yield more often in debug mode.
-	 */
-#ifdef NDEBUG
-	enum { YIELD_LOOPS = 1000 };
-#else
-	enum { YIELD_LOOPS = 10 };
-#endif
-
-	struct memtx_tree_index *index = container_of(task,
-			struct memtx_tree_index, gc_task);
-	struct memtx_tree *tree = &index->tree;
-	struct memtx_tree_iterator *itr = &index->gc_iterator;
-
-	unsigned int loops = 0;
-	while (!memtx_tree_iterator_is_invalid(itr)) {
-		struct tuple *tuple = *memtx_tree_iterator_get_elem(tree, itr);
-		memtx_tree_iterator_next(tree, itr);
-		tuple_unref(tuple);
-		if (++loops >= YIELD_LOOPS) {
-			*done = false;
-			return;
-		}
-	}
-	*done = true;
-}
-
-static void
-memtx_tree_index_gc_free(struct memtx_gc_task *task)
-{
-	struct memtx_tree_index *index = container_of(task,
-			struct memtx_tree_index, gc_task);
-	memtx_tree_index_free(index);
-}
-
-static const struct memtx_gc_task_vtab memtx_tree_index_gc_vtab = {
-	.run = memtx_tree_index_gc_run,
-	.free = memtx_tree_index_gc_free,
-};
-
-static void
-memtx_tree_index_destroy(struct index *base)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	struct memtx_engine *memtx = (struct memtx_engine *)base->engine;
-	if (base->def->iid == 0) {
-		/*
-		 * Primary index. We need to free all tuples stored
-		 * in the index, which may take a while. Schedule a
-		 * background task in order not to block tx thread.
-		 */
-		index->gc_task.vtab = &memtx_tree_index_gc_vtab;
-		index->gc_iterator = memtx_tree_iterator_first(&index->tree);
-		memtx_engine_schedule_gc(memtx, &index->gc_task);
-	} else {
-		/*
-		 * Secondary index. Destruction is fast, no need to
-		 * hand over to background fiber.
-		 */
-		memtx_tree_index_free(index);
-	}
-}
-
-static void
-memtx_tree_index_update_def(struct index *base)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	index->tree.arg = memtx_tree_index_cmp_def(index);
-}
-
-static bool
-memtx_tree_index_depends_on_pk(struct index *base)
-{
-	struct index_def *def = base->def;
-	/* See comment to memtx_tree_index_cmp_def(). */
-	return !def->opts.is_unique || def->key_def->is_nullable;
-}
-
-static ssize_t
-memtx_tree_index_size(struct index *base)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	return memtx_tree_size(&index->tree);
-}
-
-static ssize_t
-memtx_tree_index_bsize(struct index *base)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	return memtx_tree_mem_used(&index->tree);
-}
-
-static int
-memtx_tree_index_random(struct index *base, uint32_t rnd, struct tuple **result)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	struct tuple **res = memtx_tree_random(&index->tree, rnd);
-	*result = res != NULL ? *res : NULL;
-	return 0;
-}
-
-static ssize_t
-memtx_tree_index_count(struct index *base, enum iterator_type type,
-		       const char *key, uint32_t part_count)
-{
-	if (type == ITER_ALL)
-		return memtx_tree_index_size(base); /* optimization */
-	return generic_index_count(base, type, key, part_count);
-}
-
-static int
-memtx_tree_index_get(struct index *base, const char *key,
-		     uint32_t part_count, struct tuple **result)
-{
-	assert(base->def->opts.is_unique &&
-	       part_count == base->def->key_def->part_count);
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	struct memtx_tree_key_data key_data;
-	key_data.key = key;
-	key_data.part_count = part_count;
-	struct tuple **res = memtx_tree_find(&index->tree, &key_data);
-	*result = res != NULL ? *res : NULL;
-	return 0;
-}
-
-static int
-memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
-			 struct tuple *new_tuple, enum dup_replace_mode mode,
-			 struct tuple **result)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	if (new_tuple) {
-		struct tuple *dup_tuple = NULL;
-
-		/* Try to optimistically replace the new_tuple. */
-		int tree_res = memtx_tree_insert(&index->tree,
-						 new_tuple, &dup_tuple);
-		if (tree_res) {
-			diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
-				 "memtx_tree_index", "replace");
-			return -1;
-		}
-
-		uint32_t errcode = replace_check_dup(old_tuple,
-						     dup_tuple, mode);
-		if (errcode) {
-			memtx_tree_delete(&index->tree, new_tuple);
-			if (dup_tuple)
-				memtx_tree_insert(&index->tree, dup_tuple, 0);
-			struct space *sp = space_cache_find(base->def->space_id);
-			if (sp != NULL)
-				diag_set(ClientError, errcode, base->def->name,
-					 space_name(sp));
-			return -1;
-		}
-		if (dup_tuple) {
-			*result = dup_tuple;
-			return 0;
-		}
-	}
-	if (old_tuple) {
-		memtx_tree_delete(&index->tree, old_tuple);
-	}
-	*result = old_tuple;
-	return 0;
-}
-
-static struct iterator *
-memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
-				 const char *key, uint32_t part_count)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	struct memtx_engine *memtx = (struct memtx_engine *)base->engine;
-
-	assert(part_count == 0 || key != NULL);
-	if (type > ITER_GT) {
-		diag_set(UnsupportedIndexFeature, base->def,
-			 "requested iterator type");
-		return NULL;
-	}
-
-	if (part_count == 0) {
-		/*
-		 * If no key is specified, downgrade equality
-		 * iterators to a full range.
-		 */
-		type = iterator_type_is_reverse(type) ? ITER_LE : ITER_GE;
-		key = NULL;
-	}
-
-	struct tree_iterator *it = mempool_alloc(&memtx->tree_iterator_pool);
-	if (it == NULL) {
-		diag_set(OutOfMemory, sizeof(struct tree_iterator),
-			 "memtx_tree_index", "iterator");
-		return NULL;
-	}
-	iterator_create(&it->base, base);
-	it->pool = &memtx->tree_iterator_pool;
-	it->base.next = tree_iterator_start;
-	it->base.free = tree_iterator_free;
-	it->type = type;
-	it->key_data.key = key;
-	it->key_data.part_count = part_count;
-	it->index_def = base->def;
-	it->tree = &index->tree;
-	it->tree_iterator = memtx_tree_invalid_iterator();
-	it->current_tuple = NULL;
-	return (struct iterator *)it;
-}
-
-static void
-memtx_tree_index_begin_build(struct index *base)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	assert(memtx_tree_size(&index->tree) == 0);
-	(void)index;
-}
-
-static int
-memtx_tree_index_reserve(struct index *base, uint32_t size_hint)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	if (size_hint < index->build_array_alloc_size)
-		return 0;
-	struct tuple **tmp = (struct tuple **)realloc(index->build_array,
-						      size_hint * sizeof(*tmp));
-	if (tmp == NULL) {
-		diag_set(OutOfMemory, size_hint * sizeof(*tmp),
-			 "memtx_tree_index", "reserve");
-		return -1;
-	}
-	index->build_array = tmp;
-	index->build_array_alloc_size = size_hint;
-	return 0;
-}
-
-static int
-memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	if (index->build_array == NULL) {
-		index->build_array = (struct tuple **)malloc(MEMTX_EXTENT_SIZE);
-		if (index->build_array == NULL) {
-			diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
-				 "memtx_tree_index", "build_next");
-			return -1;
-		}
-		index->build_array_alloc_size =
-			MEMTX_EXTENT_SIZE / sizeof(struct tuple*);
-	}
-	assert(index->build_array_size <= index->build_array_alloc_size);
-	if (index->build_array_size == index->build_array_alloc_size) {
-		index->build_array_alloc_size = index->build_array_alloc_size +
-					index->build_array_alloc_size / 2;
-		struct tuple **tmp = (struct tuple **)
-			realloc(index->build_array,
-				index->build_array_alloc_size * sizeof(*tmp));
-		if (tmp == NULL) {
-			diag_set(OutOfMemory, index->build_array_alloc_size *
-				 sizeof(*tmp), "memtx_tree_index", "build_next");
-			return -1;
-		}
-		index->build_array = tmp;
-	}
-	index->build_array[index->build_array_size++] = tuple;
-	return 0;
-}
-
-static void
-memtx_tree_index_end_build(struct index *base)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
-	qsort_arg(index->build_array, index->build_array_size,
-		  sizeof(struct tuple *),
-		  memtx_tree_qcompare, cmp_def);
-	memtx_tree_build(&index->tree, index->build_array,
-			 index->build_array_size);
-
-	free(index->build_array);
-	index->build_array = NULL;
-	index->build_array_size = 0;
-	index->build_array_alloc_size = 0;
-}
-
-struct tree_snapshot_iterator {
-	struct snapshot_iterator base;
-	struct memtx_tree *tree;
-	struct memtx_tree_iterator tree_iterator;
-};
-
-static void
-tree_snapshot_iterator_free(struct snapshot_iterator *iterator)
-{
-	assert(iterator->free == tree_snapshot_iterator_free);
-	struct tree_snapshot_iterator *it =
-		(struct tree_snapshot_iterator *)iterator;
-	struct memtx_tree *tree = (struct memtx_tree *)it->tree;
-	memtx_tree_iterator_destroy(tree, &it->tree_iterator);
-	free(iterator);
-}
-
-static const char *
-tree_snapshot_iterator_next(struct snapshot_iterator *iterator, uint32_t *size)
-{
-	assert(iterator->free == tree_snapshot_iterator_free);
-	struct tree_snapshot_iterator *it =
-		(struct tree_snapshot_iterator *)iterator;
-	struct tuple **res = memtx_tree_iterator_get_elem(it->tree,
-						&it->tree_iterator);
-	if (res == NULL)
-		return NULL;
-	memtx_tree_iterator_next(it->tree, &it->tree_iterator);
-	return tuple_data_range(*res, size);
-}
-
-/**
- * Create an ALL iterator with personal read view so further
- * index modifications will not affect the iteration results.
- * Must be destroyed by iterator->free after usage.
- */
-static struct snapshot_iterator *
-memtx_tree_index_create_snapshot_iterator(struct index *base)
-{
-	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
-	struct tree_snapshot_iterator *it = (struct tree_snapshot_iterator *)
-		calloc(1, sizeof(*it));
-	if (it == NULL) {
-		diag_set(OutOfMemory, sizeof(struct tree_snapshot_iterator),
-			 "memtx_tree_index", "create_snapshot_iterator");
-		return NULL;
-	}
-
-	it->base.free = tree_snapshot_iterator_free;
-	it->base.next = tree_snapshot_iterator_next;
-	it->tree = &index->tree;
-	it->tree_iterator = memtx_tree_iterator_first(&index->tree);
-	memtx_tree_iterator_freeze(&index->tree, &it->tree_iterator);
-	return (struct snapshot_iterator *) it;
-}
-
-static const struct index_vtab memtx_tree_index_vtab = {
-	/* .destroy = */ memtx_tree_index_destroy,
-	/* .commit_create = */ generic_index_commit_create,
-	/* .abort_create = */ generic_index_abort_create,
-	/* .commit_modify = */ generic_index_commit_modify,
-	/* .commit_drop = */ generic_index_commit_drop,
-	/* .update_def = */ memtx_tree_index_update_def,
-	/* .depends_on_pk = */ memtx_tree_index_depends_on_pk,
-	/* .def_change_requires_rebuild = */
-		memtx_index_def_change_requires_rebuild,
-	/* .size = */ memtx_tree_index_size,
-	/* .bsize = */ memtx_tree_index_bsize,
-	/* .min = */ generic_index_min,
-	/* .max = */ generic_index_max,
-	/* .random = */ memtx_tree_index_random,
-	/* .count = */ memtx_tree_index_count,
-	/* .get = */ memtx_tree_index_get,
-	/* .replace = */ memtx_tree_index_replace,
-	/* .create_iterator = */ memtx_tree_index_create_iterator,
-	/* .create_snapshot_iterator = */
-		memtx_tree_index_create_snapshot_iterator,
-	/* .stat = */ generic_index_stat,
-	/* .compact = */ generic_index_compact,
-	/* .reset_stat = */ generic_index_reset_stat,
-	/* .begin_build = */ memtx_tree_index_begin_build,
-	/* .reserve = */ memtx_tree_index_reserve,
-	/* .build_next = */ memtx_tree_index_build_next,
-	/* .end_build = */ memtx_tree_index_end_build,
-};
-
-struct memtx_tree_index *
+struct index *
 memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
 {
-	if (!mempool_is_initialized(&memtx->tree_iterator_pool)) {
-		mempool_create(&memtx->tree_iterator_pool, cord_slab_cache(),
-			       sizeof(struct tree_iterator));
-	}
-
-	struct memtx_tree_index *index =
-		(struct memtx_tree_index *)calloc(1, sizeof(*index));
-	if (index == NULL) {
-		diag_set(OutOfMemory, sizeof(*index),
-			 "malloc", "struct memtx_tree_index");
-		return NULL;
-	}
-	if (index_create(&index->base, (struct engine *)memtx,
-			 &memtx_tree_index_vtab, def) != 0) {
-		free(index);
-		return NULL;
-	}
-
-	struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
-	memtx_tree_create(&index->tree, cmp_def, memtx_index_extent_alloc,
-			  memtx_index_extent_free, memtx);
-	return index;
+	return metmx_tuple_tree_index_new(memtx, def);
 }
diff --git a/src/box/memtx_tree.h b/src/box/memtx_tree.h
index bbc8b2419..edeaebab9 100644
--- a/src/box/memtx_tree.h
+++ b/src/box/memtx_tree.h
@@ -30,78 +30,16 @@
  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-#include <stddef.h>
-#include <stdint.h>
-
-#include "index.h"
-#include "memtx_engine.h"
 
 #if defined(__cplusplus)
 extern "C" {
 #endif /* defined(__cplusplus) */
 
+struct index;
+struct index_def;
 struct memtx_engine;
 
-/**
- * Struct that is used as a key in BPS tree definition.
- */
-struct memtx_tree_key_data
-{
-	/** Sequence of msgpacked search fields */
-	const char *key;
-	/** Number of msgpacked search fields */
-	uint32_t part_count;
-};
-
-/**
- * BPS tree element vs key comparator.
- * Defined in header in order to allow compiler to inline it.
- * @param tuple - tuple to compare.
- * @param key_data - key to compare with.
- * @param def - key definition.
- * @retval 0  if tuple == key in terms of def.
- * @retval <0 if tuple < key in terms of def.
- * @retval >0 if tuple > key in terms of def.
- */
-static inline int
-memtx_tree_compare_key(const struct tuple *tuple,
-		       const struct memtx_tree_key_data *key_data,
-		       struct key_def *def)
-{
-	return tuple_compare_with_key(tuple, key_data->key,
-				      key_data->part_count, def);
-}
-
-#define BPS_TREE_NAME memtx_tree
-#define BPS_TREE_BLOCK_SIZE (512)
-#define BPS_TREE_EXTENT_SIZE MEMTX_EXTENT_SIZE
-#define BPS_TREE_COMPARE(a, b, arg) tuple_compare(a, b, arg)
-#define BPS_TREE_COMPARE_KEY(a, b, arg) memtx_tree_compare_key(a, b, arg)
-#define bps_tree_elem_t struct tuple *
-#define bps_tree_key_t struct memtx_tree_key_data *
-#define bps_tree_arg_t struct key_def *
-
-#include "salad/bps_tree.h"
-
-#undef BPS_TREE_NAME
-#undef BPS_TREE_BLOCK_SIZE
-#undef BPS_TREE_EXTENT_SIZE
-#undef BPS_TREE_COMPARE
-#undef BPS_TREE_COMPARE_KEY
-#undef bps_tree_elem_t
-#undef bps_tree_key_t
-#undef bps_tree_arg_t
-
-struct memtx_tree_index {
-	struct index base;
-	struct memtx_tree tree;
-	struct tuple **build_array;
-	size_t build_array_size, build_array_alloc_size;
-	struct memtx_gc_task gc_task;
-	struct memtx_tree_iterator gc_iterator;
-};
-
-struct memtx_tree_index *
+struct index *
 memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def);
 
 #if defined(__cplusplus)
diff --git a/src/box/memtx_tree_impl.h b/src/box/memtx_tree_impl.h
new file mode 100644
index 000000000..20b88ad3b
--- /dev/null
+++ b/src/box/memtx_tree_impl.h
@@ -0,0 +1,960 @@
+/*
+ * Copyright 2010-2019 Tarantool AUTHORS: please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <small/mempool.h>
+#include <third_party/qsort_arg.h>
+#include "memtx_engine.h"
+#include "schema.h"
+
+#ifndef MEMTX_TREE_NAME
+#error "MEMTX_TREE_NAME must be defined"
+#endif
+
+#ifndef memtx_tree_elem
+#error "memtx_tree_elem must be defined"
+#endif
+
+#ifndef memtx_tree_key
+#error "memtx_tree_key must be defined"
+#endif
+
+#ifndef MEMTX_TREE_ELEM_EQUAL
+#error "MEMTX_TREE_ELEM_EQUAL must be defined"
+#endif
+
+#ifndef MEMTX_TREE_ELEM_CMP
+#error "MEMTX_TREE_ELEM_CMP must be defined"
+#endif
+
+#ifndef MEMTX_TREE_ELEM_WITH_KEY_CMP
+#error "MEMTX_TREE_ELEM_WITH_KEY_CMP must be defined"
+#endif
+
+#ifndef MEMTX_TREE_ELEM_SET
+#error "MEMTX_TREE_ELEM_SET must be defined"
+#endif
+
+#ifndef MEMTX_TREE_KEY_SET
+#error "MEMTX_TREE_KEY_SET must be defined"
+#endif
+
+#ifndef MEMTX_TREE_ELEM_GET
+#error "MEMTX_TREE_ELEM_GET must be defined"
+#endif
+
+#ifndef MEMTX_TREE_KEY_GET
+#error "MEMTX_TREE_KEY_GET must be defined"
+#endif
+
+/**
+ * The size of the biggest memtx tree iterator. Used with
+ * mempool_create. This is the size of the block that will be
+ * allocated for each iterator.
+ */
+#define MEMTX_TREE_ITERATOR_SIZE (136)
+
+#define bps_tree_elem_t memtx_tree_elem
+#define bps_tree_key_t memtx_tree_key *
+#define bps_tree_arg_t struct key_def *
+#define BPS_TREE_NAME CONCAT3(MEMTX_TREE_NAME, _, tree)
+#define BPS_TREE_BLOCK_SIZE (512)
+#define BPS_TREE_EXTENT_SIZE MEMTX_EXTENT_SIZE
+#define BPS_TREE_EQUAL(a, b) MEMTX_TREE_ELEM_EQUAL(&a, &b)
+#define BPS_TREE_COMPARE(a, b, arg) MEMTX_TREE_ELEM_CMP(&a, &b, arg)
+#define BPS_TREE_COMPARE_KEY(a, b, arg) MEMTX_TREE_ELEM_WITH_KEY_CMP(&a, b, arg)
+
+#include "salad/bps_tree.h"
+
+#undef bps_tree_elem_t
+#undef bps_tree_key_t
+#undef bps_tree_arg_t
+#undef BPS_TREE_NAME
+#undef BPS_TREE_BLOCK_SIZE
+#undef BPS_TREE_EXTENT_SIZE
+#undef BPS_TREE_EQUAL
+#undef BPS_TREE_COMPARE
+#undef BPS_TREE_COMPARE_KEY
+
+#define bps_tree CONCAT3(MEMTX_TREE_NAME, _, tree)
+#define _tree_api_name(postfix) CONCAT5(MEMTX_TREE_NAME, _, tree, _, postfix)
+#define bps_tree_iterator _tree_api_name(iterator)
+#define bps_tree_iterator_is_invalid _tree_api_name(iterator_is_invalid)
+#define bps_tree_iterator_first _tree_api_name(iterator_first)
+#define bps_tree_iterator_last _tree_api_name(iterator_last)
+#define bps_tree_iterator_get_elem _tree_api_name(iterator_get_elem)
+#define bps_tree_iterator_next _tree_api_name(iterator_next)
+#define bps_tree_iterator_prev _tree_api_name(iterator_prev)
+#define bps_tree_iterator_freeze _tree_api_name(iterator_freeze)
+#define bps_tree_iterator_destroy _tree_api_name(iterator_destroy)
+#define bps_tree_create _tree_api_name(create)
+#define bps_tree_build _tree_api_name(build)
+#define bps_tree_destroy _tree_api_name(destroy)
+#define bps_tree_find _tree_api_name(find)
+#define bps_tree_insert _tree_api_name(insert)
+#define bps_tree_delete _tree_api_name(delete)
+#define bps_tree_size _tree_api_name(size)
+#define bps_tree_mem_used _tree_api_name(mem_used)
+#define bps_tree_random _tree_api_name(random)
+#define bps_tree_invalid_iterator _tree_api_name(invalid_iterator)
+#define bps_tree_lower_bound _tree_api_name(lower_bound)
+#define bps_tree_upper_bound _tree_api_name(upper_bound)
+#define bps_tree_lower_bound_elem _tree_api_name(lower_bound_elem)
+#define bps_tree_upper_bound_elem _tree_api_name(upper_bound_elem)
+
+#undef _api_name
+#define _api_name(postfix) CONCAT3(MEMTX_TREE_NAME, _, postfix)
+#define memtx_tree_index _api_name(index)
+#define memtx_tree_qcompare _api_name(qcompare)
+#define memtx_tree_iterator _api_name(iterator)
+#define memtx_tree_iterator_free _api_name(iterator_free)
+#define memtx_tree_iterator_dummie _api_name(iterator_dummie)
+#define memtx_tree_iterator_next _api_name(iterator_next)
+#define memtx_tree_iterator_prev _api_name(iterator_prev)
+#define memtx_tree_iterator_next_equal _api_name(iterator_next_equal)
+#define memtx_tree_iterator_prev_equal _api_name(iterator_prev_equal)
+#define memtx_tree_iterator_set_next_method _api_name(iterator_set_next_method)
+#define memtx_tree_iterator_start _api_name(iterator_start)
+#define memtx_tree_index_cmp_def _api_name(index_cmp_def)
+#define memtx_tree_index_free _api_name(index_free)
+#define memtx_tree_index_gc_run _api_name(index_gc_run)
+#define memtx_tree_index_gc_free _api_name(index_gc_free)
+#define memtx_tree_index_gc_vtab _api_name(index_gc_vtab)
+#define memtx_tree_index_destroy _api_name(index_destroy)
+#define memtx_tree_index_update_def _api_name(index_update_def)
+#define memtx_tree_index_depends_on_pk _api_name(index_depends_on_pk)
+#define memtx_tree_index_size _api_name(index_size)
+#define memtx_tree_index_bsize _api_name(index_bsize)
+#define memtx_tree_index_random _api_name(index_random)
+#define memtx_tree_index_count _api_name(index_count)
+#define memtx_tree_index_get _api_name(index_get)
+#define memtx_tree_index_insert_tuple _api_name(index_insert_tuple)
+#define memtx_tree_index_delete_tuple _api_name(index_delete_tuple)
+#define memtx_tree_index_replace _api_name(index_replace)
+#define memtx_tree_index_create_iterator _api_name(index_create_iterator)
+#define memtx_tree_index_begin_build _api_name(index_begin_build)
+#define memtx_tree_index_build_next _api_name(index_build_next)
+#define memtx_tree_index_reserve _api_name(index_reserve)
+#define memtx_tree_index_end_build _api_name(index_end_build)
+#define memtx_tree_snapshot_iterator _api_name(snapshot_iterator)
+#define memtx_tree_snapshot_iterator_free _api_name(snapshot_iterator_free)
+#define memtx_tree_snapshot_iterator_next _api_name(snapshot_iterator_next)
+#define memtx_tree_index_create_snapshot_iterator _api_name(index_create_snapshot_iterator)
+#define memtx_tree_index_vtab _api_name(index_vtab)
+#define memtx_tree_index_new _api_name(index_new)
+
+struct memtx_tree_index {
+	struct index base;
+	memtx_tree_elem *build_array;
+	size_t build_array_size, build_array_alloc_size;
+	struct memtx_gc_task gc_task;
+	struct bps_tree tree;
+	struct bps_tree_iterator gc_iterator;
+};
+
+/* {{{ Utilities. ***********************************************/
+
+static int
+memtx_tree_qcompare(const void* a, const void *b, void *c)
+{
+	return MEMTX_TREE_ELEM_CMP((memtx_tree_elem *)a,
+				   (memtx_tree_elem *)b, c);
+}
+
+/* {{{ MemtxTree Iterators **************************************/
+struct memtx_tree_iterator {
+	struct iterator base;
+	const struct bps_tree *tree;
+	struct index_def *index_def;
+	struct bps_tree_iterator tree_iterator;
+	enum iterator_type type;
+	/** Memory pool the iterator was allocated from. */
+	struct mempool *pool;
+	memtx_tree_key key_data;
+	memtx_tree_elem current;
+};
+
+static_assert(sizeof(struct memtx_tree_iterator) <= MEMTX_TREE_ITERATOR_SIZE,
+	      "memtx_tree_iterator must be less or equal than "
+	      "MEMTX_TREE_ITERATOR_SIZE");
+
+static void
+memtx_tree_iterator_free(struct iterator *iterator)
+{
+	struct memtx_tree_iterator *it = (struct memtx_tree_iterator *)iterator;
+	if (MEMTX_TREE_ELEM_GET(&it->current) != NULL)
+		tuple_unref(MEMTX_TREE_ELEM_GET(&it->current));
+	mempool_free(it->pool, it);
+}
+
+static int
+memtx_tree_iterator_dummie(struct iterator *iterator, struct tuple **ret)
+{
+	(void)iterator;
+	*ret = NULL;
+	return 0;
+}
+
+static int
+memtx_tree_iterator_next(struct iterator *iterator, struct tuple **ret)
+{
+	struct memtx_tree_iterator *it = (struct memtx_tree_iterator *)iterator;
+	assert(MEMTX_TREE_ELEM_GET(&it->current) != NULL);
+	memtx_tree_elem *check =
+		bps_tree_iterator_get_elem(it->tree, &it->tree_iterator);
+	if (check == NULL || !MEMTX_TREE_ELEM_EQUAL(check, &it->current)) {
+		it->tree_iterator =
+			bps_tree_upper_bound_elem(it->tree, it->current, NULL);
+	} else {
+		bps_tree_iterator_next(it->tree, &it->tree_iterator);
+	}
+	tuple_unref(MEMTX_TREE_ELEM_GET(&it->current));
+	MEMTX_TREE_ELEM_SET(&it->current, NULL, it->index_def->key_def);
+	memtx_tree_elem *res =
+		bps_tree_iterator_get_elem(it->tree, &it->tree_iterator);
+	if (res == NULL) {
+		iterator->next = memtx_tree_iterator_dummie;
+		*ret = NULL;
+	} else {
+		*ret = MEMTX_TREE_ELEM_GET(res);
+		MEMTX_TREE_ELEM_SET(&it->current, *ret, it->index_def->key_def);
+		tuple_ref(*ret);
+	}
+	return 0;
+}
+
+static int
+memtx_tree_iterator_prev(struct iterator *iterator, struct tuple **ret)
+{
+	struct memtx_tree_iterator *it = (struct memtx_tree_iterator *)iterator;
+	assert(MEMTX_TREE_ELEM_GET(&it->current) != NULL);
+	memtx_tree_elem *check =
+		bps_tree_iterator_get_elem(it->tree, &it->tree_iterator);
+	if (check == NULL || !MEMTX_TREE_ELEM_EQUAL(check, &it->current)) {
+		it->tree_iterator =
+			bps_tree_lower_bound_elem(it->tree, it->current, NULL);
+	}
+	bps_tree_iterator_prev(it->tree, &it->tree_iterator);
+	tuple_unref(MEMTX_TREE_ELEM_GET(&it->current));
+	MEMTX_TREE_ELEM_SET(&it->current, NULL, it->index_def->key_def);
+	memtx_tree_elem *res =
+		bps_tree_iterator_get_elem(it->tree, &it->tree_iterator);
+	if (!res) {
+		iterator->next = memtx_tree_iterator_dummie;
+		*ret = NULL;
+	} else {
+		*ret = MEMTX_TREE_ELEM_GET(res);
+		MEMTX_TREE_ELEM_SET(&it->current, *ret, it->index_def->key_def);
+		tuple_ref(*ret);
+	}
+	return 0;
+}
+
+static int
+memtx_tree_iterator_next_equal(struct iterator *iterator, struct tuple **ret)
+{
+	struct memtx_tree_iterator *it = (struct memtx_tree_iterator *)iterator;
+	assert(MEMTX_TREE_ELEM_GET(&it->current) != NULL);
+	memtx_tree_elem *check =
+		bps_tree_iterator_get_elem(it->tree, &it->tree_iterator);
+	if (check == NULL || !MEMTX_TREE_ELEM_EQUAL(check, &it->current)) {
+		it->tree_iterator =
+			bps_tree_upper_bound_elem(it->tree, it->current, NULL);
+	} else {
+		bps_tree_iterator_next(it->tree, &it->tree_iterator);
+	}
+	tuple_unref(MEMTX_TREE_ELEM_GET(&it->current));
+	MEMTX_TREE_ELEM_SET(&it->current, NULL, it->index_def->key_def);
+	memtx_tree_elem *res =
+		bps_tree_iterator_get_elem(it->tree, &it->tree_iterator);
+	/* Use user key def to save a few loops. */
+	if (!res ||
+	    MEMTX_TREE_ELEM_WITH_KEY_CMP(res, &it->key_data,
+					 it->index_def->key_def) != 0) {
+		iterator->next = memtx_tree_iterator_dummie;
+		*ret = NULL;
+	} else {
+		*ret = MEMTX_TREE_ELEM_GET(res);
+		MEMTX_TREE_ELEM_SET(&it->current, *ret, it->index_def->key_def);
+		tuple_ref(*ret);
+	}
+	return 0;
+}
+
+static int
+memtx_tree_iterator_prev_equal(struct iterator *iterator, struct tuple **ret)
+{
+	struct memtx_tree_iterator *it = (struct memtx_tree_iterator *)iterator;
+	assert(MEMTX_TREE_ELEM_GET(&it->current) != NULL);
+	memtx_tree_elem *check =
+		bps_tree_iterator_get_elem(it->tree, &it->tree_iterator);
+	if (check == NULL || !MEMTX_TREE_ELEM_EQUAL(check, &it->current)) {
+		it->tree_iterator =
+			bps_tree_lower_bound_elem(it->tree, it->current, NULL);
+	}
+	bps_tree_iterator_prev(it->tree, &it->tree_iterator);
+	tuple_unref(MEMTX_TREE_ELEM_GET(&it->current));
+	MEMTX_TREE_ELEM_SET(&it->current, NULL, it->index_def->key_def);
+	memtx_tree_elem *res =
+		bps_tree_iterator_get_elem(it->tree, &it->tree_iterator);
+	/* Use user key def to save a few loops. */
+	if (!res ||
+	    MEMTX_TREE_ELEM_WITH_KEY_CMP(res, &it->key_data,
+					 it->index_def->key_def) != 0) {
+		iterator->next = memtx_tree_iterator_dummie;
+		*ret = NULL;
+	} else {
+		*ret = MEMTX_TREE_ELEM_GET(res);
+		MEMTX_TREE_ELEM_SET(&it->current, *ret, it->index_def->key_def);
+		tuple_ref(*ret);
+	}
+	return 0;
+}
+
+static void
+memtx_tree_iterator_set_next_method(struct memtx_tree_iterator *it)
+{
+	assert(MEMTX_TREE_ELEM_GET(&it->current) != NULL);
+	switch (it->type) {
+	case ITER_EQ:
+		it->base.next = memtx_tree_iterator_next_equal;
+		break;
+	case ITER_REQ:
+		it->base.next = memtx_tree_iterator_prev_equal;
+		break;
+	case ITER_ALL:
+		it->base.next = memtx_tree_iterator_next;
+		break;
+	case ITER_LT:
+	case ITER_LE:
+		it->base.next = memtx_tree_iterator_prev;
+		break;
+	case ITER_GE:
+	case ITER_GT:
+		it->base.next = memtx_tree_iterator_next;
+		break;
+	default:
+		/* The type was checked in initIterator. */
+		assert(false);
+	}
+}
+
+static int
+memtx_tree_iterator_start(struct iterator *iterator, struct tuple **ret)
+{
+	*ret = NULL;
+	struct memtx_tree_iterator *it = (struct memtx_tree_iterator *)iterator;
+	it->base.next = memtx_tree_iterator_dummie;
+	const struct bps_tree *tree = it->tree;
+	enum iterator_type type = it->type;
+	bool exact = false;
+	assert(MEMTX_TREE_ELEM_GET(&it->current) == NULL);
+	uint32_t part_count;
+	const char *key = MEMTX_TREE_KEY_GET(&it->key_data, &part_count);
+	if (key == NULL) {
+		if (iterator_type_is_reverse(it->type))
+			it->tree_iterator = bps_tree_iterator_last(tree);
+		else
+			it->tree_iterator = bps_tree_iterator_first(tree);
+	} else {
+		if (type == ITER_ALL || type == ITER_EQ ||
+		    type == ITER_GE || type == ITER_LT) {
+			it->tree_iterator =
+				bps_tree_lower_bound(tree, &it->key_data,
+						     &exact);
+			if (type == ITER_EQ && !exact)
+				return 0;
+		} else {
+			/* ITER_GT, ITER_REQ, ITER_LE. */
+			it->tree_iterator =
+				bps_tree_upper_bound(tree, &it->key_data,
+						     &exact);
+			if (type == ITER_REQ && !exact)
+				return 0;
+		}
+		if (iterator_type_is_reverse(type)) {
+			/*
+			 * Because of limitations of tree search
+			 * API we use use lower_bound for LT
+			 * search and upper_bound for LE and REQ
+			 * searches. Thus we found position to the
+			 * right of the target one. Let's make a
+			 * step to the left to reach target
+			 * position.
+			 * If we found an invalid iterator all the
+			 * elements in the tree are less (less or
+			 * equal) to the key, and iterator_next
+			 * call will convert the iterator to the
+			 * last position in the tree, that's what
+			 * we need.
+			 */
+			bps_tree_iterator_prev(it->tree, &it->tree_iterator);
+		}
+	}
+
+	memtx_tree_elem *res =
+		bps_tree_iterator_get_elem(it->tree, &it->tree_iterator);
+	if (!res)
+		return 0;
+	*ret = MEMTX_TREE_ELEM_GET(res);
+	MEMTX_TREE_ELEM_SET(&it->current, *ret, it->index_def->key_def);
+	tuple_ref(*ret);
+	memtx_tree_iterator_set_next_method(it);
+	return 0;
+}
+
+/* }}} */
+
+/* {{{ MemtxTree  ***********************************************/
+
+/**
+ * Return the key def to use for comparing tuples stored
+ * in the given tree index.
+ *
+ * We use extended key def for non-unique and nullable
+ * indexes. Unique but nullable index can store multiple
+ * NULLs. To correctly compare these NULLs extended key
+ * def must be used. For details @sa tuple_compare.cc.
+ */
+static struct key_def *
+memtx_tree_index_cmp_def(struct memtx_tree_index *index)
+{
+	struct index_def *def = index->base.def;
+	return def->opts.is_unique && !def->key_def->is_nullable ?
+	       def->key_def : def->cmp_def;
+}
+
+static void
+memtx_tree_index_free(struct memtx_tree_index *index)
+{
+	bps_tree_destroy(&index->tree);
+	free(index->build_array);
+	free(index);
+}
+
+static void
+memtx_tree_index_gc_run(struct memtx_gc_task *task, bool *done)
+{
+	/*
+	 * Yield every 1K tuples to keep latency < 0.1 ms.
+	 * Yield more often in debug mode.
+	 */
+#ifdef NDEBUG
+	enum { YIELD_LOOPS = 1000 };
+#else
+	enum { YIELD_LOOPS = 10 };
+#endif
+
+	struct memtx_tree_index *index =
+		container_of(task, struct memtx_tree_index, gc_task);
+	struct bps_tree *tree = &index->tree;
+	struct bps_tree_iterator *itr = &index->gc_iterator;
+
+	unsigned int loops = 0;
+	while (!bps_tree_iterator_is_invalid(itr)) {
+		memtx_tree_elem *res =
+			bps_tree_iterator_get_elem(tree, itr);
+		struct tuple *tuple = MEMTX_TREE_ELEM_GET(res);;
+		bps_tree_iterator_next(tree, itr);
+		tuple_unref(tuple);
+		if (++loops >= YIELD_LOOPS) {
+			*done = false;
+			return;
+		}
+	}
+	*done = true;
+}
+
+static void
+memtx_tree_index_gc_free(struct memtx_gc_task *task)
+{
+	struct memtx_tree_index *index =
+		container_of(task, struct memtx_tree_index, gc_task);
+	memtx_tree_index_free(index);
+}
+
+static const struct memtx_gc_task_vtab memtx_tree_index_gc_vtab = {
+	.run = memtx_tree_index_gc_run,
+	.free = memtx_tree_index_gc_free,
+};
+
+static void
+memtx_tree_index_destroy(struct index *base)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	struct memtx_engine *memtx = (struct memtx_engine *)base->engine;
+	if (base->def->iid == 0) {
+		/*
+		 * Primary index. We need to free all tuples
+		 * stored in the index, which may take a while.
+		 * Schedule a background task in order not to
+		 * block tx thread.
+		 */
+		index->gc_task.vtab = &memtx_tree_index_gc_vtab;
+		index->gc_iterator = bps_tree_iterator_first(&index->tree);
+		memtx_engine_schedule_gc(memtx, &index->gc_task);
+	} else {
+		/*
+		 * Secondary index. Destruction is fast, no need
+		 * to hand over to background fiber.
+		 */
+		memtx_tree_index_free(index);
+	}
+}
+
+static void
+memtx_tree_index_update_def(struct index *base)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	index->tree.arg = memtx_tree_index_cmp_def(index);
+}
+
+static bool
+memtx_tree_index_depends_on_pk(struct index *base)
+{
+	struct index_def *def = base->def;
+	/* See comment to memtx_tree_index_cmp_def(). */
+	return !def->opts.is_unique || def->key_def->is_nullable;
+}
+
+static ssize_t
+memtx_tree_index_size(struct index *base)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	return bps_tree_size(&index->tree);
+}
+
+static ssize_t
+memtx_tree_index_bsize(struct index *base)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	return bps_tree_mem_used(&index->tree);
+}
+
+static int
+memtx_tree_index_random(struct index *base, uint32_t rnd, struct tuple **result)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	memtx_tree_elem *res = bps_tree_random(&index->tree, rnd);
+	*result = res != NULL ? MEMTX_TREE_ELEM_GET(res) : NULL;
+	return 0;
+}
+
+static ssize_t
+memtx_tree_index_count(struct index *base, enum iterator_type type,
+		       const char *key, uint32_t part_count)
+{
+	if (type == ITER_ALL) {
+		/* Optimization. */
+		return memtx_tree_index_size(base);
+	}
+	return generic_index_count(base, type, key, part_count);
+}
+
+static int
+memtx_tree_index_get(struct index *base, const char *key,
+		     uint32_t part_count, struct tuple **result)
+{
+	assert(base->def->opts.is_unique &&
+	       part_count == base->def->key_def->part_count);
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	memtx_tree_key key_data;
+	struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
+	MEMTX_TREE_KEY_SET(&key_data, key, part_count, cmp_def);
+	memtx_tree_elem *res = bps_tree_find(&index->tree, &key_data);
+	*result = res != NULL ? MEMTX_TREE_ELEM_GET(res) : NULL;
+	return 0;
+}
+
+static int
+memtx_tree_index_insert_tuple(struct index *base, struct tuple *tuple,
+			      struct tuple **replaced)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	memtx_tree_elem data;
+	MEMTX_TREE_ELEM_SET(&data, tuple, index->tree.arg);
+	memtx_tree_elem data_replaced;
+	memset(&data_replaced, 0, sizeof(data_replaced));
+	int rc = bps_tree_insert(&index->tree, data, &data_replaced);
+	if (replaced != NULL)
+		*replaced = MEMTX_TREE_ELEM_GET(&data_replaced);
+	return rc;
+}
+
+static void
+memtx_tree_index_delete_tuple(struct index *base, struct tuple *tuple)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	memtx_tree_elem data;
+	MEMTX_TREE_ELEM_SET(&data, tuple, index->tree.arg);
+	bps_tree_delete(&index->tree, data);
+}
+
+static int
+memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
+			 struct tuple *new_tuple, enum dup_replace_mode mode,
+			 struct tuple **result)
+{
+	if (new_tuple) {
+		struct tuple *dup_tuple = NULL;
+
+		/* Try to optimistically replace the new_tuple. */
+		int tree_res =
+			memtx_tree_index_insert_tuple(base, new_tuple,
+						      &dup_tuple);
+		if (tree_res) {
+			diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
+				 "memtx_tree_index", "replace");
+			return -1;
+		}
+
+		uint32_t errcode =
+			replace_check_dup(old_tuple, dup_tuple, mode);
+		if (errcode) {
+			memtx_tree_index_delete_tuple(base, new_tuple);
+			if (dup_tuple)
+				memtx_tree_index_insert_tuple(base, dup_tuple, 0);
+			struct space *sp = space_cache_find(base->def->space_id);
+			if (sp != NULL) {
+				diag_set(ClientError, errcode, base->def->name,
+					 space_name(sp));
+			}
+			return -1;
+		}
+		if (dup_tuple) {
+			*result = dup_tuple;
+			return 0;
+		}
+	}
+	if (old_tuple)
+		memtx_tree_index_delete_tuple(base, old_tuple);
+	*result = old_tuple;
+	return 0;
+}
+
+static struct iterator *
+memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
+				 const char *key, uint32_t part_count)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	struct memtx_engine *memtx = (struct memtx_engine *)base->engine;
+
+	assert(part_count == 0 || key != NULL);
+	if (type > ITER_GT) {
+		diag_set(UnsupportedIndexFeature, base->def,
+			 "requested iterator type");
+		return NULL;
+	}
+
+	if (part_count == 0) {
+		/*
+		 * If no key is specified, downgrade equality
+		 * iterators to a full range.
+		 */
+		type = iterator_type_is_reverse(type) ? ITER_LE : ITER_GE;
+		key = NULL;
+	}
+
+	struct memtx_tree_iterator *it =
+		mempool_alloc(&memtx->tree_iterator_pool);
+	if (it == NULL) {
+		diag_set(OutOfMemory, sizeof(struct memtx_tree_iterator),
+			 "memtx_tree_index", "iterator");
+		return NULL;
+	}
+	iterator_create(&it->base, base);
+	it->pool = &memtx->tree_iterator_pool;
+	it->base.next = memtx_tree_iterator_start;
+	it->base.free = memtx_tree_iterator_free;
+	it->type = type;
+	struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
+	MEMTX_TREE_KEY_SET(&it->key_data, key, part_count, cmp_def);
+	it->index_def = base->def;
+	it->tree = &index->tree;
+	it->tree_iterator = bps_tree_invalid_iterator();
+	MEMTX_TREE_ELEM_SET(&it->current, NULL, it->index_def->key_def);
+	return (struct iterator *)it;
+}
+
+static void
+memtx_tree_index_begin_build(struct index *base)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	assert(bps_tree_size(&index->tree) == 0);
+	(void)index;
+}
+
+static int
+memtx_tree_index_reserve(struct index *base, uint32_t size_hint)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	if (size_hint < index->build_array_alloc_size)
+		return 0;
+	memtx_tree_elem *tmp =
+		realloc(index->build_array, size_hint * sizeof(*tmp));
+	if (tmp == NULL) {
+		diag_set(OutOfMemory, size_hint * sizeof(*tmp),
+			 "memtx_tree_index", "reserve");
+		return -1;
+	}
+	index->build_array = tmp;
+	index->build_array_alloc_size = size_hint;
+	return 0;
+}
+
+static int
+memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	if (index->build_array == NULL) {
+		index->build_array =
+			(memtx_tree_elem *)malloc(MEMTX_EXTENT_SIZE);
+		if (index->build_array == NULL) {
+			diag_set(OutOfMemory, MEMTX_EXTENT_SIZE,
+				 "memtx_tree_index", "build_next");
+			return -1;
+		}
+		index->build_array_alloc_size =
+			MEMTX_EXTENT_SIZE / sizeof(index->build_array[0]);
+	}
+	assert(index->build_array_size <= index->build_array_alloc_size);
+	if (index->build_array_size == index->build_array_alloc_size) {
+		index->build_array_alloc_size = index->build_array_alloc_size +
+					index->build_array_alloc_size / 2;
+		memtx_tree_elem *tmp = realloc(index->build_array,
+				index->build_array_alloc_size * sizeof(*tmp));
+		if (tmp == NULL) {
+			diag_set(OutOfMemory, index->build_array_alloc_size *
+				 sizeof(*tmp), "memtx_tree_index", "build_next");
+			return -1;
+		}
+		index->build_array = tmp;
+	}
+	MEMTX_TREE_ELEM_SET(&index->build_array[index->build_array_size++],
+			    tuple, memtx_tree_index_cmp_def(index));
+	return 0;
+}
+
+static void
+memtx_tree_index_end_build(struct index *base)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
+	qsort_arg(index->build_array, index->build_array_size,
+		  sizeof(index->build_array[0]), memtx_tree_qcompare, cmp_def);
+	bps_tree_build(&index->tree, index->build_array,
+		       index->build_array_size);
+
+	free(index->build_array);
+	index->build_array = NULL;
+	index->build_array_size = 0;
+	index->build_array_alloc_size = 0;
+}
+
+struct memtx_tree_snapshot_iterator {
+	struct snapshot_iterator base;
+	struct bps_tree *tree;
+	struct bps_tree_iterator tree_iterator;
+};
+
+static void
+memtx_tree_snapshot_iterator_free(struct snapshot_iterator *iterator)
+{
+	assert(iterator->free == memtx_tree_snapshot_iterator_free);
+	struct memtx_tree_snapshot_iterator *it =
+		(struct memtx_tree_snapshot_iterator *)iterator;
+	bps_tree_iterator_destroy(it->tree, &it->tree_iterator);
+	free(iterator);
+}
+
+static const char *
+memtx_tree_snapshot_iterator_next(struct snapshot_iterator *iterator,
+				  uint32_t *size)
+{
+	assert(iterator->free == memtx_tree_snapshot_iterator_free);
+	struct memtx_tree_snapshot_iterator *it =
+		(struct memtx_tree_snapshot_iterator *)iterator;
+	memtx_tree_elem *res =
+		bps_tree_iterator_get_elem(it->tree, &it->tree_iterator);
+	if (res == NULL)
+		return NULL;
+	bps_tree_iterator_next(it->tree, &it->tree_iterator);
+	return tuple_data_range(MEMTX_TREE_ELEM_GET(res), size);
+}
+
+/**
+ * Create an ALL iterator with personal read view so further
+ * index modifications will not affect the iteration results.
+ * Must be destroyed by iterator->free after usage.
+ */
+static struct snapshot_iterator *
+memtx_tree_index_create_snapshot_iterator(struct index *base)
+{
+	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+	struct memtx_tree_snapshot_iterator *it =
+		(struct memtx_tree_snapshot_iterator *)calloc(1, sizeof(*it));
+	if (it == NULL) {
+		diag_set(OutOfMemory, sizeof(struct memtx_tree_snapshot_iterator),
+			 "memtx_tree_index", "create_snapshot_iterator");
+		return NULL;
+	}
+
+	it->base.free = memtx_tree_snapshot_iterator_free;
+	it->base.next = memtx_tree_snapshot_iterator_next;
+	it->tree = &index->tree;
+	it->tree_iterator = bps_tree_iterator_first(&index->tree);
+	bps_tree_iterator_freeze(&index->tree, &it->tree_iterator);
+	return (struct snapshot_iterator *) it;
+}
+
+static const struct index_vtab memtx_tree_index_vtab = {
+	/* .destroy = */ memtx_tree_index_destroy,
+	/* .commit_create = */ generic_index_commit_create,
+	/* .abort_create = */ generic_index_abort_create,
+	/* .commit_modify = */ generic_index_commit_modify,
+	/* .commit_drop = */ generic_index_commit_drop,
+	/* .update_def = */ memtx_tree_index_update_def,
+	/* .depends_on_pk = */ memtx_tree_index_depends_on_pk,
+	/* .def_change_requires_rebuild = */
+		memtx_index_def_change_requires_rebuild,
+	/* .size = */ memtx_tree_index_size,
+	/* .bsize = */ memtx_tree_index_bsize,
+	/* .min = */ generic_index_min,
+	/* .max = */ generic_index_max,
+	/* .random = */ memtx_tree_index_random,
+	/* .count = */ memtx_tree_index_count,
+	/* .get = */ memtx_tree_index_get,
+	/* .replace = */ memtx_tree_index_replace,
+	/* .create_iterator = */ memtx_tree_index_create_iterator,
+	/* .create_snapshot_iterator = */
+		memtx_tree_index_create_snapshot_iterator,
+	/* .stat = */ generic_index_stat,
+	/* .compact = */ generic_index_compact,
+	/* .reset_stat = */ generic_index_reset_stat,
+	/* .begin_build = */ memtx_tree_index_begin_build,
+	/* .reserve = */ memtx_tree_index_reserve,
+	/* .build_next = */ memtx_tree_index_build_next,
+	/* .end_build = */ memtx_tree_index_end_build,
+};
+
+struct index *
+memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
+{
+	if (!mempool_is_initialized(&memtx->tree_iterator_pool)) {
+		mempool_create(&memtx->tree_iterator_pool, cord_slab_cache(),
+			       MEMTX_TREE_ITERATOR_SIZE);
+	}
+
+	struct memtx_tree_index *index =
+		(struct memtx_tree_index *)calloc(1, sizeof(*index));
+	if (index == NULL) {
+		diag_set(OutOfMemory, sizeof(*index),
+			 "malloc", "struct memtx_tree_index");
+		return NULL;
+	}
+	if (index_create(&index->base, (struct engine *)memtx,
+			 &memtx_tree_index_vtab, def) != 0) {
+		free(index);
+		return NULL;
+	}
+
+	struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
+	bps_tree_create(&index->tree, cmp_def, memtx_index_extent_alloc,
+			memtx_index_extent_free, memtx);
+	return (struct index *)index;
+}
+
+/* }}} */
+
+#undef bps_tree
+#undef _tree_api_name
+#undef bps_tree_iterator
+#undef bps_tree_iterator_is_invalid
+#undef bps_tree_iterator_first
+#undef bps_tree_iterator_last
+#undef bps_tree_iterator_get_elem
+#undef bps_tree_iterator_next
+#undef bps_tree_iterator_prev
+#undef bps_tree_iterator_freeze
+#undef bps_tree_iterator_destroy
+#undef bps_tree_create
+#undef bps_tree_build
+#undef bps_tree_destroy
+#undef bps_tree_find
+#undef bps_tree_insert
+#undef bps_tree_delete
+#undef bps_tree_size
+#undef bps_tree_mem_used
+#undef bps_tree_random
+#undef bps_tree_invalid_iterator
+#undef bps_tree_lower_bound
+#undef bps_tree_upper_bound
+#undef bps_tree_lower_bound_elem
+#undef bps_tree_upper_bound_elem
+
+#undef _api_name
+#undef memtx_tree_index
+#undef memtx_tree_qcompare
+#undef memtx_tree_iterator
+#undef memtx_tree_iterator_free
+#undef memtx_tree_iterator_dummie
+#undef memtx_tree_iterator_next
+#undef memtx_tree_iterator_prev
+#undef memtx_tree_iterator_next_equal
+#undef memtx_tree_iterator_prev_equal
+#undef memtx_tree_iterator_set_next_method
+#undef memtx_tree_iterator_start
+#undef memtx_tree_index_cmp_def
+#undef memtx_tree_index_free
+#undef memtx_tree_index_gc_run
+#undef memtx_tree_index_gc_free
+#undef memtx_tree_index_gc_vtab
+#undef memtx_tree_index_destroy
+#undef memtx_tree_index_update_def
+#undef memtx_tree_index_depends_on_pk
+#undef memtx_tree_index_size
+#undef memtx_tree_index_bsize
+#undef memtx_tree_index_random
+#undef memtx_tree_index_count
+#undef memtx_tree_index_get
+#undef memtx_tree_index_insert_tuple
+#undef memtx_tree_index_delete_tuple
+#undef memtx_tree_index_replace
+#undef memtx_tree_index_create_iterator
+#undef memtx_tree_index_begin_build
+#undef memtx_tree_index_build_next
+#undef memtx_tree_index_reserve
+#undef memtx_tree_index_end_build
+#undef memtx_tree_snapshot_iterator
+#undef memtx_tree_snapshot_iterator_free
+#undef memtx_tree_snapshot_iterator_next
+#undef memtx_tree_index_create_snapshot_iterator
+#undef memtx_tree_index_vtab
+#undef memtx_tree_index_new
+
-- 
2.20.1




More information about the Tarantool-patches mailing list