[PATCH v2 2/4] box: rework memtx_tree to store arbitrary nodes
Vladimir Davydov
vdavydov.dev at gmail.com
Thu Feb 21 18:01:55 MSK 2019
On Wed, Feb 13, 2019 at 12:32:26PM +0300, Kirill Shcherbatov wrote:
> Reworked memtx_tree class to use arbitrary containers as a tree
> nodes. This makes possible to implement tuple hints in subsequent
> patches.
>
> The memtx_tree.c is currently used in a header manner. This is to
> ensure that functional changes in the patch do not mix with
> refactoring, that will be finished in the next commit.
>
> Needed for #3961
> ---
> src/box/CMakeLists.txt | 2 +-
> src/box/memtx_tree.c | 498 ++++++++++++++++++++++++++------------
> src/box/memtx_tree_decl.c | 84 +++++++
> 3 files changed, 434 insertions(+), 150 deletions(-)
> create mode 100644 src/box/memtx_tree_decl.c
>
> diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
> index 5521e489e..dc8cc2cd5 100644
> --- a/src/box/CMakeLists.txt
> +++ b/src/box/CMakeLists.txt
> @@ -64,7 +64,7 @@ add_library(box STATIC
> index_def.c
> iterator_type.c
> memtx_hash.c
> - memtx_tree.c
> + memtx_tree_decl.c
> memtx_rtree.c
> memtx_bitset.c
> engine.c
> diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
> index 2e8adb682..0874b7dc8 100644
> --- a/src/box/memtx_tree.c
> +++ b/src/box/memtx_tree.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright 2010-2016, Tarantool AUTHORS, please see AUTHORS file.
> + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
> *
> * Redistribution and use in source and binary forms, with or
> * without modification, are permitted provided that the following
> @@ -28,72 +28,156 @@
> * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> * SUCH DAMAGE.
> */
> -#include "memtx_tree.h"
> -#include "memtx_engine.h"
> -#include "space.h"
> -#include "schema.h" /* space_cache_find() */
> -#include "errinj.h"
> -#include "memory.h"
> -#include "fiber.h"
> -#include "tuple.h"
> -#include <third_party/qsort_arg.h>
Why touch header inclusions?
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif /* defined(__cplusplus) */
> +
This isn't needed in a header like this.
> #include <small/mempool.h>
> +#include <third_party/qsort_arg.h>
> +#include "memtx_engine.h"
> +#include "schema.h"
>
> -/**
> - * Struct that is used as a key in BPS tree definition.
> - */
> -struct memtx_tree_key_data {
> - /** Sequence of msgpacked search fields. */
> - const char *key;
> - /** Number of msgpacked search fields. */
> - uint32_t part_count;
> -};
> +#ifndef MEMTX_TREE_NAME
> +#error "MEMTX_TREE_NAME must be defined"
> +#endif
> +
> +#ifndef memtx_tree_elem
> +#error "memtx_tree_elem must be defined"
> +#endif
> +
> +#ifndef memtx_tree_key
> +#error "memtx_tree_key must be defined"
> +#endif
Let's please use _t suffix for types (memtx_tree_elem_t,
memtx_tree_key_t).
> +
> +#ifndef MEMTX_TREE_ELEM_CMP
> +#error "MEMTX_TREE_ELEM_CMP must be defined"
> +#endif
> +
> +#ifndef MEMTX_TREE_ELEM_WITH_KEY_CMP
> +#error "MEMTX_TREE_ELEM_WITH_KEY_CMP must be defined"
> +#endif
May be, call these MEMTX_TREE_COMPARE and MEMTX_TREE_COMPARE_KEY to
match BPS tree macros?
> +
> +#ifndef MEMTX_TREE_ELEM_SET
> +#error "MEMTX_TREE_ELEM_SET must be defined"
> +#endif
> +
> +#ifndef MEMTX_TREE_KEY_SET
> +#error "MEMTX_TREE_KEY_SET must be defined"
> +#endif
> +
> +#ifndef MEMTX_TREE_ELEM_GET
> +#error "MEMTX_TREE_ELEM_GET must be defined"
> +#endif
> +
> +#ifndef MEMTX_TREE_KEY_GET
> +#error "MEMTX_TREE_KEY_GET must be defined"
> +#endif
Please add a comment to each of the macros describing what it is
supposed to do.
>
> /**
> - * BPS tree element vs key comparator.
> - * Defined in header in order to allow compiler to inline it.
> - * @param tuple - tuple to compare.
> - * @param key_data - key to compare with.
> - * @param def - key definition.
> - * @retval 0 if tuple == key in terms of def.
> - * @retval <0 if tuple < key in terms of def.
> - * @retval >0 if tuple > key in terms of def.
> + * The size of the biggest memtx tree iterator. Used with
> + * mempool_create. This is the size of the block that will be
> + * allocated for each iterator.
> */
> -static int
> -memtx_tree_compare_key(const struct tuple *tuple,
> - const struct memtx_tree_key_data *key_data,
> - struct key_def *def)
> -{
> - return tuple_compare_with_key(tuple, key_data->key,
> - key_data->part_count, def);
> -}
> +#define MEMTX_TREE_ITERATOR_SIZE (152)
Take a look at struct memtx_engine:
/** Memory pool for tree index iterator. */
struct mempool tree_iterator_pool;
/** Memory pool for rtree index iterator. */
struct mempool rtree_iterator_pool;
/** Memory pool for hash index iterator. */
struct mempool hash_iterator_pool;
/** Memory pool for bitset index iterator. */
struct mempool bitset_iterator_pool;
This looks ugly. Let's use this trick with static iterator size for all
index types. Please do it in a separate patch.
>
> -#define BPS_TREE_NAME memtx_tree
> +#define bps_tree_elem_t memtx_tree_elem
> +#define bps_tree_key_t memtx_tree_key *
> +#define bps_tree_arg_t struct key_def *
> +#define BPS_TREE_NAME CONCAT3(MEMTX_TREE_NAME, _, tree)
> #define BPS_TREE_BLOCK_SIZE (512)
> #define BPS_TREE_EXTENT_SIZE MEMTX_EXTENT_SIZE
> -#define BPS_TREE_COMPARE(a, b, arg) tuple_compare(a, b, arg)
> -#define BPS_TREE_COMPARE_KEY(a, b, arg) memtx_tree_compare_key(a, b, arg)
> -#define bps_tree_elem_t struct tuple *
> -#define bps_tree_key_t struct memtx_tree_key_data *
> -#define bps_tree_arg_t struct key_def *
> +#define BPS_TREE_COMPARE(a, b, arg) MEMTX_TREE_ELEM_CMP(&a, &b, arg)
> +#define BPS_TREE_COMPARE_KEY(a, b, arg) MEMTX_TREE_ELEM_WITH_KEY_CMP(&a, b, arg)
> +#define BPS_TREE_NO_DEBUG
>
> #include "salad/bps_tree.h"
>
> +#undef bps_tree_elem_t
> +#undef bps_tree_key_t
> +#undef bps_tree_arg_t
> #undef BPS_TREE_NAME
> #undef BPS_TREE_BLOCK_SIZE
> #undef BPS_TREE_EXTENT_SIZE
> #undef BPS_TREE_COMPARE
> #undef BPS_TREE_COMPARE_KEY
> -#undef bps_tree_elem_t
> -#undef bps_tree_key_t
> -#undef bps_tree_arg_t
> +#undef BPS_TREE_NO_DEBUG
> +
> +#define memtx_tree CONCAT3(MEMTX_TREE_NAME, _, tree)
> +#define _tree_api_name(postfix) CONCAT5(MEMTX_TREE_NAME, _, tree, _, postfix)
Why not _api_name?
> +#define memtx_tree_iterator _tree_api_name(iterator)
> +#define memtx_tree_iterator_is_invalid _tree_api_name(iterator_is_invalid)
> +#define memtx_tree_iterator_first _tree_api_name(iterator_first)
> +#define memtx_tree_iterator_last _tree_api_name(iterator_last)
> +#define memtx_tree_iterator_get_elem _tree_api_name(iterator_get_elem)
> +#define memtx_tree_iterator_next _tree_api_name(iterator_next)
> +#define memtx_tree_iterator_prev _tree_api_name(iterator_prev)
> +#define memtx_tree_iterator_freeze _tree_api_name(iterator_freeze)
> +#define memtx_tree_iterator_destroy _tree_api_name(iterator_destroy)
> +#define memtx_tree_create _tree_api_name(create)
> +#define memtx_tree_build _tree_api_name(build)
> +#define memtx_tree_destroy _tree_api_name(destroy)
> +#define memtx_tree_find _tree_api_name(find)
> +#define memtx_tree_insert _tree_api_name(insert)
> +#define memtx_tree_delete _tree_api_name(delete)
> +#define memtx_tree_size _tree_api_name(size)
> +#define memtx_tree_mem_used _tree_api_name(mem_used)
> +#define memtx_tree_random _tree_api_name(random)
> +#define memtx_tree_invalid_iterator _tree_api_name(invalid_iterator)
> +#define memtx_tree_lower_bound _tree_api_name(lower_bound)
> +#define memtx_tree_upper_bound _tree_api_name(upper_bound)
> +#define memtx_tree_lower_bound_elem _tree_api_name(lower_bound_elem)
> +#define memtx_tree_upper_bound_elem _tree_api_name(upper_bound_elem)
> +
> +#undef _api_name
BPS tree should undef all macros it defines on its own. Please fix it in
a separate patch.
> +#define _api_name(postfix) CONCAT3(MEMTX_TREE_NAME, _, postfix)
> +#define memtx_tree_index _api_name(index)
> +#define memtx_tree_qcompare _api_name(qcompare)
> +#define tree_iterator _api_name(iterator)
> +#define tree_iterator_free _api_name(iterator_free)
> +#define tree_iterator_cast _api_name(iterator_cast)
> +#define tree_iterator_dummie _api_name(iterator_dummie)
> +#define tree_iterator_next _api_name(iterator_next)
> +#define tree_iterator_prev _api_name(iterator_prev)
> +#define tree_iterator_next_equal _api_name(iterator_next_equal)
> +#define tree_iterator_prev_equal _api_name(iterator_prev_equal)
> +#define tree_iterator_set_next_method _api_name(iterator_set_next_method)
> +#define tree_iterator_start _api_name(iterator_start)
> +#define memtx_tree_index_cmp_def _api_name(index_cmp_def)
> +#define memtx_tree_index_free _api_name(index_free)
> +#define memtx_tree_index_gc_run _api_name(index_gc_run)
> +#define memtx_tree_index_gc_free _api_name(index_gc_free)
> +#define memtx_tree_index_gc_vtab _api_name(index_gc_vtab)
> +#define memtx_tree_index_destroy _api_name(index_destroy)
> +#define memtx_tree_index_update_def _api_name(index_update_def)
> +#define memtx_tree_index_depends_on_pk _api_name(index_depends_on_pk)
> +#define memtx_tree_index_size _api_name(index_size)
> +#define memtx_tree_index_bsize _api_name(index_bsize)
> +#define memtx_tree_index_random _api_name(index_random)
> +#define memtx_tree_index_count _api_name(index_count)
> +#define memtx_tree_index_get _api_name(index_get)
> +#define memtx_tree_index_insert_tuple _api_name(index_insert_tuple)
> +#define memtx_tree_index_delete_tuple _api_name(index_delete_tuple)
> +#define memtx_tree_index_replace _api_name(index_replace)
> +#define memtx_tree_index_create_iterator _api_name(index_create_iterator)
> +#define memtx_tree_index_begin_build _api_name(index_begin_build)
> +#define memtx_tree_index_build_next _api_name(index_build_next)
> +#define memtx_tree_index_reserve _api_name(index_reserve)
> +#define memtx_tree_index_end_build _api_name(index_end_build)
> +#define tree_snapshot_iterator _api_name(snapshot_iterator)
> +#define tree_snapshot_iterator_free _api_name(snapshot_iterator_free)
> +#define tree_snapshot_iterator_next _api_name(snapshot_iterator_next)
> +#define memtx_tree_index_create_snapshot_iterator\
> + _api_name(index_create_snapshot_iterator)
> +#define memtx_tree_index_vtab _api_name(index_vtab)
> +#define memtx_tree_index_new _api_name(index_new)
I'd undef _api_name right here, because we don't need it anywhere else.
>
> struct memtx_tree_index {
> struct index base;
> - struct memtx_tree tree;
> - struct tuple **build_array;
> + memtx_tree_elem *build_array;
> size_t build_array_size, build_array_alloc_size;
> struct memtx_gc_task gc_task;
> + struct memtx_tree tree;
Nit: pointless change (moving 'tree' member).
> struct memtx_tree_iterator gc_iterator;
> };
>
> @@ -102,8 +186,8 @@ struct memtx_tree_index {
> static int
> memtx_tree_qcompare(const void* a, const void *b, void *c)
> {
> - return tuple_compare(*(struct tuple **)a,
> - *(struct tuple **)b, (struct key_def *)c);
> + return MEMTX_TREE_ELEM_CMP((memtx_tree_elem *)a,
> + (memtx_tree_elem *)b, c);
> }
>
> /* {{{ MemtxTree Iterators ****************************************/
> @@ -113,17 +197,21 @@ struct tree_iterator {
> struct index_def *index_def;
> struct memtx_tree_iterator tree_iterator;
> enum iterator_type type;
> - struct memtx_tree_key_data key_data;
> - struct tuple *current_tuple;
> + memtx_tree_key key_data;
> + memtx_tree_elem current;
> /** Memory pool the iterator was allocated from. */
> struct mempool *pool;
> };
>
> +static_assert(sizeof(struct tree_iterator) <= MEMTX_TREE_ITERATOR_SIZE,
> + "tree_iterator must be less or equal than "
> + "MEMTX_TREE_ITERATOR_SIZE");
> +
> static void
> tree_iterator_free(struct iterator *iterator);
>
> static inline struct tree_iterator *
> -tree_iterator(struct iterator *it)
> +tree_iterator_cast(struct iterator *it)
> {
> assert(it->free == tree_iterator_free);
> return (struct tree_iterator *) it;
> @@ -132,9 +220,10 @@ tree_iterator(struct iterator *it)
> static void
> tree_iterator_free(struct iterator *iterator)
> {
> - struct tree_iterator *it = tree_iterator(iterator);
> - if (it->current_tuple != NULL)
> - tuple_unref(it->current_tuple);
> + struct tree_iterator *it = tree_iterator_cast(iterator);
> + struct tuple *tuple = MEMTX_TREE_ELEM_GET(&it->current);
> + if (tuple != NULL)
> + tuple_unref(tuple);
> mempool_free(it->pool, it);
> }
>
> @@ -149,25 +238,28 @@ tree_iterator_dummie(struct iterator *iterator, struct tuple **ret)
> static int
> tree_iterator_next(struct iterator *iterator, struct tuple **ret)
> {
> - struct tuple **res;
> - struct tree_iterator *it = tree_iterator(iterator);
> - assert(it->current_tuple != NULL);
> - struct tuple **check = memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
> - if (check == NULL || *check != it->current_tuple)
> + struct tree_iterator *it = tree_iterator_cast(iterator);
> + assert(MEMTX_TREE_ELEM_GET(&it->current) != NULL);
> + memtx_tree_elem *check =
> + memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
> + if (check == NULL ||
> + MEMTX_TREE_ELEM_GET(check) != MEMTX_TREE_ELEM_GET(&it->current)) {
AFAIU this check isn't quite correct: we want to make sure that
it->current and check refer to the same tree element. So in case
of a multikey index it isn't enough to just check the tuples they
refer to - you also need to check the key offsets.
> diff --git a/src/box/memtx_tree_decl.c b/src/box/memtx_tree_decl.c
> new file mode 100644
> index 000000000..03e0e0ed7
> --- /dev/null
> +++ b/src/box/memtx_tree_decl.c
> @@ -0,0 +1,84 @@
> +/*
> + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
> + *
> + * Redistribution and use in source and binary forms, with or
> + * without modification, are permitted provided that the following
> + * conditions are met:
> + *
> + * 1. Redistributions of source code must retain the above
> + * copyright notice, this list of conditions and the
> + * following disclaimer.
> + *
> + * 2. Redistributions in binary form must reproduce the above
> + * copyright notice, this list of conditions and the following
> + * disclaimer in the documentation and/or other materials
> + * provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
> + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
> + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include "tuple_compare.h"
> +#include "memtx_tree.h"
> +
> +/* {{{ Memtx tree of tuples class. ******************************/
> +
> +/** Struct that is used as a key in BPS tree definition. */
> +struct memtx_tuple_tree_key_data {
> + /** Sequence of msgpacked search fields. */
> + const char *key;
> + /** Number of msgpacked search fields. */
> + uint32_t part_count;
> +};
> +
> +#define MEMTX_TREE_NAME memtx_tuple_tree
I don't quite like the name, because all memtx indexes store tuples,
eventually. May be, memtx_simple_tree or memtx_basic_tree?
> +#define memtx_tree_elem struct tuple *
> +#define memtx_tree_key struct memtx_tuple_tree_key_data
> +#define MEMTX_TREE_ELEM_CMP(elem_a_ptr, elem_b_ptr, key_def) \
> + tuple_compare(*elem_a_ptr, *elem_b_ptr, key_def)
> +#define MEMTX_TREE_ELEM_WITH_KEY_CMP(elem_ptr, key_ptr, key_def) \
> + tuple_compare_with_key(*elem_ptr, (key_ptr)->key, \
> + (key_ptr)->part_count, key_def)
> +#define MEMTX_TREE_ELEM_SET(elem_ptr, tuple, key_def) \
> + ({(void)key_def; *elem_ptr = tuple;})
> +#define MEMTX_TREE_KEY_SET(key_ptr, key_val, part_count_val, key_def) \
> + ({(void)key_def; (key_ptr)->key = key_val; \
> + (key_ptr)->part_count = part_count_val;})
> +#define MEMTX_TREE_ELEM_GET(elem_ptr) (*(elem_ptr))
> +#define MEMTX_TREE_KEY_GET(key_ptr, part_count_ptr) \
> + ({*part_count_ptr = (key_ptr)->part_count; (key_ptr)->key;})
> +
> +#include "memtx_tree.c"
> +
> +#undef memtx_tree_key
> +#undef memtx_tree_elem
> +#undef MEMTX_TREE_KEY_GET
> +#undef MEMTX_TREE_ELEM_GET
> +#undef MEMTX_TREE_KEY_SET
> +#undef MEMTX_TREE_ELEM_SET
> +#undef MEMTX_TREE_ELEM_WITH_KEY_CMP
> +#undef MEMTX_TREE_ELEM_CMP
> +#undef MEMTX_TREE_ELEM_EQUAL
This one is never defined.
> +#undef memtx_tree_key
> +#undef memtx_tree_elem
You undef those twice.
> +#undef MEMTX_TREE_NAME
> +
> +/* }}} */
> +
> +struct index *
> +memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
> +{
> + return memtx_tuple_tree_index_new(memtx, def);
> +}
More information about the Tarantool-patches
mailing list