[PATCH v2 2/4] box: rework memtx_tree to store arbitrary nodes

Vladimir Davydov vdavydov.dev at gmail.com
Thu Feb 21 18:01:55 MSK 2019


On Wed, Feb 13, 2019 at 12:32:26PM +0300, Kirill Shcherbatov wrote:
> Reworked memtx_tree class to use arbitrary containers as a tree
> nodes. This makes possible to implement tuple hints in subsequent
> patches.
> 
> The memtx_tree.c is currently used in a header manner. This is to
> ensure that functional changes in the patch do not mix with
> refactoring, that will be finished in the next commit.
> 
> Needed for #3961
> ---
>  src/box/CMakeLists.txt    |   2 +-
>  src/box/memtx_tree.c      | 498 ++++++++++++++++++++++++++------------
>  src/box/memtx_tree_decl.c |  84 +++++++
>  3 files changed, 434 insertions(+), 150 deletions(-)
>  create mode 100644 src/box/memtx_tree_decl.c
> 
> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
> index 5521e489e..dc8cc2cd5 100644
> --- a/src/box/CMakeLists.txt
> +++ b/src/box/CMakeLists.txt
> @@ -64,7 +64,7 @@ add_library(box STATIC
>      index_def.c
>      iterator_type.c
>      memtx_hash.c
> -    memtx_tree.c
> +    memtx_tree_decl.c
>      memtx_rtree.c
>      memtx_bitset.c
>      engine.c
> diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
> index 2e8adb682..0874b7dc8 100644
> --- a/src/box/memtx_tree.c
> +++ b/src/box/memtx_tree.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright 2010-2016, Tarantool AUTHORS, please see AUTHORS file.
> + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
>   *
>   * Redistribution and use in source and binary forms, with or
>   * without modification, are permitted provided that the following
> @@ -28,72 +28,156 @@
>   * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>   * SUCH DAMAGE.
>   */
> -#include "memtx_tree.h"
> -#include "memtx_engine.h"
> -#include "space.h"
> -#include "schema.h" /* space_cache_find() */
> -#include "errinj.h"
> -#include "memory.h"
> -#include "fiber.h"
> -#include "tuple.h"
> -#include <third_party/qsort_arg.h>

Why touch header inclusions?

> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif /* defined(__cplusplus) */
> +

This isn't needed in a header like this.

>  #include <small/mempool.h>
> +#include <third_party/qsort_arg.h>
> +#include "memtx_engine.h"
> +#include "schema.h"
>  
> -/**
> - * Struct that is used as a key in BPS tree definition.
> - */
> -struct memtx_tree_key_data {
> -	/** Sequence of msgpacked search fields. */
> -	const char *key;
> -	/** Number of msgpacked search fields. */
> -	uint32_t part_count;
> -};
> +#ifndef MEMTX_TREE_NAME
> +#error "MEMTX_TREE_NAME must be defined"
> +#endif
> +
> +#ifndef memtx_tree_elem
> +#error "memtx_tree_elem must be defined"
> +#endif
> +
> +#ifndef memtx_tree_key
> +#error "memtx_tree_key must be defined"
> +#endif

Let's please use _t suffix for types (memtx_tree_elem_t,
memtx_tree_key_t).

> +
> +#ifndef MEMTX_TREE_ELEM_CMP
> +#error "MEMTX_TREE_ELEM_CMP must be defined"
> +#endif
> +
> +#ifndef MEMTX_TREE_ELEM_WITH_KEY_CMP
> +#error "MEMTX_TREE_ELEM_WITH_KEY_CMP must be defined"
> +#endif

May be, call these MEMTX_TREE_COMPARE and MEMTX_TREE_COMPARE_KEY to
match BPS tree macros?

> +
> +#ifndef MEMTX_TREE_ELEM_SET
> +#error "MEMTX_TREE_ELEM_SET must be defined"
> +#endif
> +
> +#ifndef MEMTX_TREE_KEY_SET
> +#error "MEMTX_TREE_KEY_SET must be defined"
> +#endif
> +
> +#ifndef MEMTX_TREE_ELEM_GET
> +#error "MEMTX_TREE_ELEM_GET must be defined"
> +#endif
> +
> +#ifndef MEMTX_TREE_KEY_GET
> +#error "MEMTX_TREE_KEY_GET must be defined"
> +#endif

Please add a comment to each of the macros describing what it is
supposed to do.

>  
>  /**
> - * BPS tree element vs key comparator.
> - * Defined in header in order to allow compiler to inline it.
> - * @param tuple - tuple to compare.
> - * @param key_data - key to compare with.
> - * @param def - key definition.
> - * @retval 0  if tuple == key in terms of def.
> - * @retval <0 if tuple < key in terms of def.
> - * @retval >0 if tuple > key in terms of def.
> + * The size of the biggest memtx tree iterator. Used with
> + * mempool_create. This is the size of the block that will be
> + * allocated for each iterator.
>   */
> -static int
> -memtx_tree_compare_key(const struct tuple *tuple,
> -		       const struct memtx_tree_key_data *key_data,
> -		       struct key_def *def)
> -{
> -	return tuple_compare_with_key(tuple, key_data->key,
> -				      key_data->part_count, def);
> -}
> +#define MEMTX_TREE_ITERATOR_SIZE (152)

Take a look at struct memtx_engine:

	/** Memory pool for tree index iterator. */
	struct mempool tree_iterator_pool;
	/** Memory pool for rtree index iterator. */
	struct mempool rtree_iterator_pool;
	/** Memory pool for hash index iterator. */
	struct mempool hash_iterator_pool;
	/** Memory pool for bitset index iterator. */
	struct mempool bitset_iterator_pool;

This looks ugly. Let's use this trick with static iterator size for all
index types. Please do it in a separate patch.

>  
> -#define BPS_TREE_NAME memtx_tree
> +#define bps_tree_elem_t memtx_tree_elem
> +#define bps_tree_key_t memtx_tree_key *
> +#define bps_tree_arg_t struct key_def *
> +#define BPS_TREE_NAME CONCAT3(MEMTX_TREE_NAME, _, tree)
>  #define BPS_TREE_BLOCK_SIZE (512)
>  #define BPS_TREE_EXTENT_SIZE MEMTX_EXTENT_SIZE
> -#define BPS_TREE_COMPARE(a, b, arg) tuple_compare(a, b, arg)
> -#define BPS_TREE_COMPARE_KEY(a, b, arg) memtx_tree_compare_key(a, b, arg)
> -#define bps_tree_elem_t struct tuple *
> -#define bps_tree_key_t struct memtx_tree_key_data *
> -#define bps_tree_arg_t struct key_def *
> +#define BPS_TREE_COMPARE(a, b, arg) MEMTX_TREE_ELEM_CMP(&a, &b, arg)
> +#define BPS_TREE_COMPARE_KEY(a, b, arg) MEMTX_TREE_ELEM_WITH_KEY_CMP(&a, b, arg)
> +#define BPS_TREE_NO_DEBUG
>  
>  #include "salad/bps_tree.h"
>  
> +#undef bps_tree_elem_t
> +#undef bps_tree_key_t
> +#undef bps_tree_arg_t
>  #undef BPS_TREE_NAME
>  #undef BPS_TREE_BLOCK_SIZE
>  #undef BPS_TREE_EXTENT_SIZE
>  #undef BPS_TREE_COMPARE
>  #undef BPS_TREE_COMPARE_KEY
> -#undef bps_tree_elem_t
> -#undef bps_tree_key_t
> -#undef bps_tree_arg_t
> +#undef BPS_TREE_NO_DEBUG
> +
> +#define memtx_tree CONCAT3(MEMTX_TREE_NAME, _, tree)
> +#define _tree_api_name(postfix) CONCAT5(MEMTX_TREE_NAME, _, tree, _, postfix)

Why not _api_name?

> +#define memtx_tree_iterator _tree_api_name(iterator)
> +#define memtx_tree_iterator_is_invalid _tree_api_name(iterator_is_invalid)
> +#define memtx_tree_iterator_first _tree_api_name(iterator_first)
> +#define memtx_tree_iterator_last _tree_api_name(iterator_last)
> +#define memtx_tree_iterator_get_elem _tree_api_name(iterator_get_elem)
> +#define memtx_tree_iterator_next _tree_api_name(iterator_next)
> +#define memtx_tree_iterator_prev _tree_api_name(iterator_prev)
> +#define memtx_tree_iterator_freeze _tree_api_name(iterator_freeze)
> +#define memtx_tree_iterator_destroy _tree_api_name(iterator_destroy)
> +#define memtx_tree_create _tree_api_name(create)
> +#define memtx_tree_build _tree_api_name(build)
> +#define memtx_tree_destroy _tree_api_name(destroy)
> +#define memtx_tree_find _tree_api_name(find)
> +#define memtx_tree_insert _tree_api_name(insert)
> +#define memtx_tree_delete _tree_api_name(delete)
> +#define memtx_tree_size _tree_api_name(size)
> +#define memtx_tree_mem_used _tree_api_name(mem_used)
> +#define memtx_tree_random _tree_api_name(random)
> +#define memtx_tree_invalid_iterator _tree_api_name(invalid_iterator)
> +#define memtx_tree_lower_bound _tree_api_name(lower_bound)
> +#define memtx_tree_upper_bound _tree_api_name(upper_bound)
> +#define memtx_tree_lower_bound_elem _tree_api_name(lower_bound_elem)
> +#define memtx_tree_upper_bound_elem _tree_api_name(upper_bound_elem)
> +
> +#undef _api_name

BPS tree should undef all macros it defines on its own. Please fix it in
a separate patch.

> +#define _api_name(postfix) CONCAT3(MEMTX_TREE_NAME, _, postfix)
> +#define memtx_tree_index _api_name(index)
> +#define memtx_tree_qcompare _api_name(qcompare)
> +#define tree_iterator _api_name(iterator)
> +#define tree_iterator_free _api_name(iterator_free)
> +#define tree_iterator_cast _api_name(iterator_cast)
> +#define tree_iterator_dummie _api_name(iterator_dummie)
> +#define tree_iterator_next _api_name(iterator_next)
> +#define tree_iterator_prev _api_name(iterator_prev)
> +#define tree_iterator_next_equal _api_name(iterator_next_equal)
> +#define tree_iterator_prev_equal _api_name(iterator_prev_equal)
> +#define tree_iterator_set_next_method _api_name(iterator_set_next_method)
> +#define tree_iterator_start _api_name(iterator_start)
> +#define memtx_tree_index_cmp_def _api_name(index_cmp_def)
> +#define memtx_tree_index_free _api_name(index_free)
> +#define memtx_tree_index_gc_run _api_name(index_gc_run)
> +#define memtx_tree_index_gc_free _api_name(index_gc_free)
> +#define memtx_tree_index_gc_vtab _api_name(index_gc_vtab)
> +#define memtx_tree_index_destroy _api_name(index_destroy)
> +#define memtx_tree_index_update_def _api_name(index_update_def)
> +#define memtx_tree_index_depends_on_pk _api_name(index_depends_on_pk)
> +#define memtx_tree_index_size _api_name(index_size)
> +#define memtx_tree_index_bsize _api_name(index_bsize)
> +#define memtx_tree_index_random _api_name(index_random)
> +#define memtx_tree_index_count _api_name(index_count)
> +#define memtx_tree_index_get _api_name(index_get)
> +#define memtx_tree_index_insert_tuple _api_name(index_insert_tuple)
> +#define memtx_tree_index_delete_tuple _api_name(index_delete_tuple)
> +#define memtx_tree_index_replace _api_name(index_replace)
> +#define memtx_tree_index_create_iterator _api_name(index_create_iterator)
> +#define memtx_tree_index_begin_build _api_name(index_begin_build)
> +#define memtx_tree_index_build_next _api_name(index_build_next)
> +#define memtx_tree_index_reserve _api_name(index_reserve)
> +#define memtx_tree_index_end_build _api_name(index_end_build)
> +#define tree_snapshot_iterator _api_name(snapshot_iterator)
> +#define tree_snapshot_iterator_free _api_name(snapshot_iterator_free)
> +#define tree_snapshot_iterator_next _api_name(snapshot_iterator_next)
> +#define memtx_tree_index_create_snapshot_iterator\
> +	_api_name(index_create_snapshot_iterator)
> +#define memtx_tree_index_vtab _api_name(index_vtab)
> +#define memtx_tree_index_new _api_name(index_new)

I'd undef _api_name right here, because we don't need it anywhere else.

>  
>  struct memtx_tree_index {
>  	struct index base;
> -	struct memtx_tree tree;
> -	struct tuple **build_array;
> +	memtx_tree_elem *build_array;
>  	size_t build_array_size, build_array_alloc_size;
>  	struct memtx_gc_task gc_task;
> +	struct memtx_tree tree;

Nit: pointless change (moving 'tree' member).

>  	struct memtx_tree_iterator gc_iterator;
>  };
>  
> @@ -102,8 +186,8 @@ struct memtx_tree_index {
>  static int
>  memtx_tree_qcompare(const void* a, const void *b, void *c)
>  {
> -	return tuple_compare(*(struct tuple **)a,
> -		*(struct tuple **)b, (struct key_def *)c);
> +	return MEMTX_TREE_ELEM_CMP((memtx_tree_elem *)a,
> +				   (memtx_tree_elem *)b, c);
>  }
>  
>  /* {{{ MemtxTree Iterators ****************************************/
> @@ -113,17 +197,21 @@ struct tree_iterator {
>  	struct index_def *index_def;
>  	struct memtx_tree_iterator tree_iterator;
>  	enum iterator_type type;
> -	struct memtx_tree_key_data key_data;
> -	struct tuple *current_tuple;
> +	memtx_tree_key key_data;
> +	memtx_tree_elem current;
>  	/** Memory pool the iterator was allocated from. */
>  	struct mempool *pool;
>  };
>  
> +static_assert(sizeof(struct tree_iterator) <= MEMTX_TREE_ITERATOR_SIZE,
> +	      "tree_iterator must be less or equal than "
> +	      "MEMTX_TREE_ITERATOR_SIZE");
> +
>  static void
>  tree_iterator_free(struct iterator *iterator);
>  
>  static inline struct tree_iterator *
> -tree_iterator(struct iterator *it)
> +tree_iterator_cast(struct iterator *it)
>  {
>  	assert(it->free == tree_iterator_free);
>  	return (struct tree_iterator *) it;
> @@ -132,9 +220,10 @@ tree_iterator(struct iterator *it)
>  static void
>  tree_iterator_free(struct iterator *iterator)
>  {
> -	struct tree_iterator *it = tree_iterator(iterator);
> -	if (it->current_tuple != NULL)
> -		tuple_unref(it->current_tuple);
> +	struct tree_iterator *it = tree_iterator_cast(iterator);
> +	struct tuple *tuple = MEMTX_TREE_ELEM_GET(&it->current);
> +	if (tuple != NULL)
> +		tuple_unref(tuple);
>  	mempool_free(it->pool, it);
>  }
>  
> @@ -149,25 +238,28 @@ tree_iterator_dummie(struct iterator *iterator, struct tuple **ret)
>  static int
>  tree_iterator_next(struct iterator *iterator, struct tuple **ret)
>  {
> -	struct tuple **res;
> -	struct tree_iterator *it = tree_iterator(iterator);
> -	assert(it->current_tuple != NULL);
> -	struct tuple **check = memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
> -	if (check == NULL || *check != it->current_tuple)
> +	struct tree_iterator *it = tree_iterator_cast(iterator);
> +	assert(MEMTX_TREE_ELEM_GET(&it->current) != NULL);
> +	memtx_tree_elem *check =
> +		memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
> +	if (check == NULL ||
> +	    MEMTX_TREE_ELEM_GET(check) != MEMTX_TREE_ELEM_GET(&it->current)) {

AFAIU this check isn't quite correct: we want to make sure that
it->current and check refer to the same tree element. So in case
of a multikey index it isn't enough to just check the tuples they
refer to - you also need to check the key offsets.

> diff --git a/src/box/memtx_tree_decl.c b/src/box/memtx_tree_decl.c
> new file mode 100644
> index 000000000..03e0e0ed7
> --- /dev/null
> +++ b/src/box/memtx_tree_decl.c
> @@ -0,0 +1,84 @@
> +/*
> + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
> + *
> + * Redistribution and use in source and binary forms, with or
> + * without modification, are permitted provided that the following
> + * conditions are met:
> + *
> + * 1. Redistributions of source code must retain the above
> + *    copyright notice, this list of conditions and the
> + *    following disclaimer.
> + *
> + * 2. Redistributions in binary form must reproduce the above
> + *    copyright notice, this list of conditions and the following
> + *    disclaimer in the documentation and/or other materials
> + *    provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
> + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
> + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include "tuple_compare.h"
> +#include "memtx_tree.h"
> +
> +/* {{{ Memtx tree of tuples class. ******************************/
> +
> +/** Struct that is used as a key in BPS tree definition. */
> +struct memtx_tuple_tree_key_data {
> +	/** Sequence of msgpacked search fields. */
> +	const char *key;
> +	/** Number of msgpacked search fields. */
> +	uint32_t part_count;
> +};
> +
> +#define MEMTX_TREE_NAME memtx_tuple_tree

I don't quite like the name, because all memtx indexes store tuples,
eventually. May be, memtx_simple_tree or memtx_basic_tree?

> +#define memtx_tree_elem struct tuple *
> +#define memtx_tree_key struct memtx_tuple_tree_key_data
> +#define MEMTX_TREE_ELEM_CMP(elem_a_ptr, elem_b_ptr, key_def)			\
> +	tuple_compare(*elem_a_ptr, *elem_b_ptr, key_def)
> +#define MEMTX_TREE_ELEM_WITH_KEY_CMP(elem_ptr, key_ptr, key_def)		\
> +	tuple_compare_with_key(*elem_ptr, (key_ptr)->key,			\
> +			       (key_ptr)->part_count, key_def)
> +#define MEMTX_TREE_ELEM_SET(elem_ptr, tuple, key_def)				\
> +	({(void)key_def; *elem_ptr = tuple;})
> +#define MEMTX_TREE_KEY_SET(key_ptr, key_val, part_count_val, key_def)		\
> +	({(void)key_def; (key_ptr)->key = key_val;				\
> +	 (key_ptr)->part_count = part_count_val;})
> +#define MEMTX_TREE_ELEM_GET(elem_ptr) (*(elem_ptr))
> +#define MEMTX_TREE_KEY_GET(key_ptr, part_count_ptr)				\
> +	({*part_count_ptr = (key_ptr)->part_count; (key_ptr)->key;})
> +
> +#include "memtx_tree.c"
> +
> +#undef memtx_tree_key
> +#undef memtx_tree_elem
> +#undef MEMTX_TREE_KEY_GET
> +#undef MEMTX_TREE_ELEM_GET
> +#undef MEMTX_TREE_KEY_SET
> +#undef MEMTX_TREE_ELEM_SET
> +#undef MEMTX_TREE_ELEM_WITH_KEY_CMP
> +#undef MEMTX_TREE_ELEM_CMP

> +#undef MEMTX_TREE_ELEM_EQUAL

This one is never defined.

> +#undef memtx_tree_key
> +#undef memtx_tree_elem

You undef those twice.

> +#undef MEMTX_TREE_NAME
> +
> +/* }}} */
> +
> +struct index *
> +memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
> +{
> +	return memtx_tuple_tree_index_new(memtx, def);
> +}



More information about the Tarantool-patches mailing list