From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Subject: Re: [tarantool-patches] Re: [PATCH v6 2/3] memtx: introduce tuple compare hint References: <004880d7afdf9513bf874d6f9b13982e6294be46.1552478226.git.kshcherbatov@tarantool.org> <20190314081932.ebf245jj2q4wk52t@esperanza> From: Kirill Shcherbatov Message-ID: <4d1312d3-c66e-de4e-ca9f-bb67b7a3bbb1@tarantool.org> Date: Fri, 15 Mar 2019 13:20:09 +0300 MIME-Version: 1.0 In-Reply-To: <20190314081932.ebf245jj2q4wk52t@esperanza> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit To: tarantool-patches@freelists.org, Vladimir Davydov List-ID: > Please refresh the comment. It isn't quite correct. I've stripped it a bit. > On the second thought, we might want to reduce the hint size or increase > it in future. Would it be better to introduce a helper type for hints so > that we could easily do this if we need to? > > typedef uint64_t tuple_hint_t; > > What do you think? Would it be worthwhile? Would it look OK? > > Sorry, I asked you to use uint64_t before. Looks to me now that > I was wrong... :-( tuple_hint_t name is not really good, I believe. At first, tuple_hint() is a routine that calculates such hint for tuple. I prefer hint_t type that would be ok even later with multikey index intriduction. > Hmm, (&a)->tuple is equivalent to (a).tuple, isn't it? I don't understand why, by it doesn't work in such way. > Accessing index->tree.arg looks ugly - it violates encapsulation. > Besides, right above you use index_def_cmp_def instead. I think we > better introduce a helper function for accessing index->tree.arg > and use it everywhere: > > memtx_tree_cmp_def(&index->tree) Now I use memtx_tree_index_cmp_def() routine that already present in code and works fine in such scenario. ============================================================== Implement functions for retrieving tuple hints for a particular key_def. Hint is an integer that can be used for tuple comparison optimization: if a hint of one tuple is less than a hint of another then the first tuple is definitely less than the second; only if hints are equal tuple_compare must be called for getting comparison result. Hints are calculated using only the first part of key_def. Close #3961 --- src/box/key_def.h | 128 +++++++++++++ src/box/memtx_tree.c | 40 +++- src/box/tuple_compare.cc | 394 +++++++++++++++++++++++++++++++++++++-- src/lib/coll/coll.c | 29 +++ src/lib/coll/coll.h | 5 + test/engine/ddl.result | 180 ++++++++++++++++++ test/engine/ddl.test.lua | 49 +++++ 7 files changed, 800 insertions(+), 25 deletions(-) diff --git a/src/box/key_def.h b/src/box/key_def.h index 7c9f8c6b5..174790f72 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -116,6 +116,41 @@ struct key_part { struct key_def; struct tuple; +/** + * Tuple comparison hint h(t) is such a function of tuple t that + * the following conditions always hold for any pair of tuples + * t1 and t2: + * if h(t1) < h(t2) then t1 < t2; + * if h(t1) > h(t2) then t1 > t2; + * if h(t1) == h(t2) then t1 may or may not be equal to t2. + * These rules mean that instead of direct tuple vs tuple + * (or tuple vs key) comparison one may compare theirs + * hints first; and only if theirs hints equal compare + * the tuples themselves. + * + * The comparison hint has the following structure: + * uint64_t: [ HINT_TYPE | HINT_VALUE ] + * 64 60 0 bit + * + * To make the values of hints for numeric types(unsigned, + * integer, float, double) compatible, define a linear mapping + * of the segment f(x) = k*x + b = x + max + 1: + * f([-max - 1, max]) = [0, 2 * max + 1]. + * It is easy to make sure that there is a < b <=> f (a) < f(b). + * The max value is determined by the number of bits allocated + * for storage of HINT_VALUE. + */ +typedef uint64_t hint_t; + +#define HINT_VALUE_BITS 60 +#define HINT_MAX (((int64_t)1 << (HINT_VALUE_BITS - 1)) - 1) +#define HINT_MIN (-HINT_MAX - 1) + +/** + * The reserved value to use when comparison hint is undefined. + */ +#define HINT_NONE ((hint_t)UINT64_MAX) + /** * Get is_nullable property of key_part. * @param key_part for which attribute is being fetched @@ -137,6 +172,19 @@ typedef int (*tuple_compare_with_key_t)(const struct tuple *tuple_a, typedef int (*tuple_compare_t)(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_def *key_def); +/** @copydoc tuple_compare_with_key_hinted() */ +typedef int (*tuple_compare_with_key_hinted_t)(const struct tuple *tuple, + hint_t tuple_hint, + const char *key, + uint32_t part_count, + hint_t key_hint, + struct key_def *key_def); +/** @copydoc tuple_compare_hinted() */ +typedef int (*tuple_compare_hinted_t)(const struct tuple *tuple_a, + hint_t tuple_a_hint, + const struct tuple *tuple_b, + hint_t tuple_b_hint, + struct key_def *key_def); /** @copydoc tuple_extract_key() */ typedef char *(*tuple_extract_key_t)(const struct tuple *tuple, struct key_def *key_def, @@ -152,6 +200,11 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple, /** @copydoc key_hash() */ typedef uint32_t (*key_hash_t)(const char *key, struct key_def *key_def); +/** @copydoc tuple_hint() */ +typedef hint_t (*tuple_hint_t)(const struct tuple *tuple, + struct key_def *key_def); +/** @copydoc key_hint() */ +typedef hint_t (*key_hint_t)(const char *key, struct key_def *key_def); /* Definition of a multipart key. */ struct key_def { @@ -159,6 +212,10 @@ struct key_def { tuple_compare_t tuple_compare; /** @see tuple_compare_with_key() */ tuple_compare_with_key_t tuple_compare_with_key; + /** @see tuple_compare_with_key_hinted() */ + tuple_compare_with_key_hinted_t tuple_compare_with_key_hinted; + /** @see tuple_compare_hinted() */ + tuple_compare_hinted_t tuple_compare_hinted; /** @see tuple_extract_key() */ tuple_extract_key_t tuple_extract_key; /** @see tuple_extract_key_raw() */ @@ -167,6 +224,10 @@ struct key_def { tuple_hash_t tuple_hash; /** @see key_hash() */ key_hash_t key_hash; + /** @see tuple_hint() */ + tuple_hint_t tuple_hint; + /** @see key_hint() */ + key_hint_t key_hint; /** * Minimal part count which always is unique. For example, * if a secondary index is unique, then @@ -575,6 +636,49 @@ tuple_compare_with_key(const struct tuple *tuple, const char *key, return key_def->tuple_compare_with_key(tuple, key, part_count, key_def); } +/** + * Compare tuples using the key definition and comparison hints. + * @param tuple_a First tuple. + * @param tuple_a_hint Comparison hint of @a tuple_a. + * @param tuple_b Second tuple. + * @param tuple_b_hint Comparison hint of @a tuple_b. + * @param key_def Key definition. + * @retval 0 if key_fields(tuple_a) == key_fields(tuple_b). + * @retval <0 if key_fields(tuple_a) < key_fields(tuple_b). + * @retval >0 if key_fields(tuple_a) > key_fields(tuple_b). + */ +static inline int +tuple_compare_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint, + const struct tuple *tuple_b, hint_t tuple_b_hint, + struct key_def *key_def) +{ + return key_def->tuple_compare_hinted(tuple_a, tuple_a_hint, tuple_b, + tuple_b_hint, key_def); +} + +/** + * Compare tuple with key using the key definition and + * comparison hints. + * @param tuple Tuple. + * @param tuple_hint Comparison hint of @a tuple. + * @param key Key parts without MessagePack array header. + * @param part_count The number of parts in @a key. + * @param key_hint t Comparison hint of @a key. + * @param key_def Key definition. + * @retval 0 if key_fields(tuple) == parts(key). + * @retval <0 if key_fields(tuple) < parts(key). + * @retval >0 if key_fields(tuple) > parts(key). + */ +static inline int +tuple_compare_with_key_hinted(const struct tuple *tuple, hint_t tuple_hint, + const char *key, uint32_t part_count, + hint_t key_hint, struct key_def *key_def) +{ + return key_def->tuple_compare_with_key_hinted(tuple, tuple_hint, key, + part_count, key_hint, + key_def); +} + /** * Compute hash of a tuple field. * @param ph1 - pointer to running hash @@ -628,6 +732,30 @@ key_hash(const char *key, struct key_def *key_def) return key_def->key_hash(key, key_def); } + /* + * Get comparison hint for a @a tuple. + * @param tuple Tuple to compute the hint for. + * @param key_def Key definition used for tuple comparison. + * @return The hint. + */ +static inline hint_t +tuple_hint(const struct tuple *tuple, struct key_def *key_def) +{ + return key_def->tuple_hint(tuple, key_def); +} + +/** + * Get a comparison hint of a @a key. + * @param key Key to compute the hint for. + * @param key_def Key definition used for tuple comparison. + * @return The hint. + */ +static inline hint_t +key_hint(const char *key, struct key_def *key_def) +{ + return key_def->key_hint(key, key_def); +} + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c index d5b42efb6..c2c8a19e7 100644 --- a/src/box/memtx_tree.c +++ b/src/box/memtx_tree.c @@ -47,6 +47,8 @@ struct memtx_tree_key_data { const char *key; /** Number of msgpacked search fields. */ uint32_t part_count; + /** Comparison hint, see tuple_hint(). */ + hint_t hint; }; /** @@ -55,6 +57,8 @@ struct memtx_tree_key_data { struct memtx_tree_data { /* Tuple that this node is represents. */ struct tuple *tuple; + /** Comparison hint, see key_hint(). */ + hint_t hint; }; /** @@ -69,16 +73,18 @@ static bool memtx_tree_data_identical(const struct memtx_tree_data *a, const struct memtx_tree_data *b) { - return a->tuple == b->tuple; + return a->tuple == b->tuple && a->hint == b->hint; } #define BPS_TREE_NAME memtx_tree #define BPS_TREE_BLOCK_SIZE (512) #define BPS_TREE_EXTENT_SIZE MEMTX_EXTENT_SIZE #define BPS_TREE_COMPARE(a, b, arg)\ - tuple_compare((&a)->tuple, (&b)->tuple, arg) + tuple_compare_hinted((&a)->tuple, (&a)->hint, (&b)->tuple,\ + (&b)->hint, arg) #define BPS_TREE_COMPARE_KEY(a, b, arg)\ - tuple_compare_with_key((&a)->tuple, (b)->key, (b)->part_count, arg) + tuple_compare_with_key_hinted((&a)->tuple, (&a)->hint, (b)->key,\ + (b)->part_count, (b)->hint, arg) #define BPS_TREE_IDENTICAL(a, b) memtx_tree_data_identical(&a, &b) #define bps_tree_elem_t struct memtx_tree_data #define bps_tree_key_t struct memtx_tree_key_data * @@ -113,7 +119,8 @@ memtx_tree_qcompare(const void* a, const void *b, void *c) const struct memtx_tree_data *data_a = a; const struct memtx_tree_data *data_b = b; struct key_def *key_def = c; - return tuple_compare(data_a->tuple, data_b->tuple, key_def); + return tuple_compare_hinted(data_a->tuple, data_a->hint, data_b->tuple, + data_b->hint, key_def); } /* {{{ MemtxTree Iterators ****************************************/ @@ -235,9 +242,11 @@ tree_iterator_next_equal(struct iterator *iterator, struct tuple **ret) memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator); /* Use user key def to save a few loops. */ if (res == NULL || - tuple_compare_with_key(res->tuple, it->key_data.key, - it->key_data.part_count, - it->index_def->key_def) != 0) { + tuple_compare_with_key_hinted(res->tuple, res->hint, + it->key_data.key, + it->key_data.part_count, + it->key_data.hint, + it->index_def->key_def) != 0) { iterator->next = tree_iterator_dummie; it->current.tuple = NULL; *ret = NULL; @@ -266,9 +275,11 @@ tree_iterator_prev_equal(struct iterator *iterator, struct tuple **ret) memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator); /* Use user key def to save a few loops. */ if (res == NULL || - tuple_compare_with_key(res->tuple, it->key_data.key, - it->key_data.part_count, - it->index_def->key_def) != 0) { + tuple_compare_with_key_hinted(res->tuple, res->hint, + it->key_data.key, + it->key_data.part_count, + it->key_data.hint, + it->index_def->key_def) != 0) { iterator->next = tree_iterator_dummie; it->current.tuple = NULL; *ret = NULL; @@ -516,9 +527,11 @@ memtx_tree_index_get(struct index *base, const char *key, assert(base->def->opts.is_unique && part_count == base->def->key_def->part_count); struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *cmp_def = memtx_tree_index_cmp_def(index); struct memtx_tree_key_data key_data; key_data.key = key; key_data.part_count = part_count; + key_data.hint = key_hint(key, cmp_def); struct memtx_tree_data *res = memtx_tree_find(&index->tree, &key_data); *result = res != NULL ? res->tuple : NULL; return 0; @@ -530,9 +543,11 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple, struct tuple **result) { struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *key_def = memtx_tree_index_cmp_def(index); if (new_tuple) { struct memtx_tree_data new_data; new_data.tuple = new_tuple; + new_data.hint = tuple_hint(new_tuple, key_def); struct memtx_tree_data dup_data; dup_data.tuple = NULL; @@ -565,6 +580,7 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple, if (old_tuple) { struct memtx_tree_data old_data; old_data.tuple = old_tuple; + old_data.hint = tuple_hint(old_tuple, key_def); memtx_tree_delete(&index->tree, old_data); } *result = old_tuple; @@ -607,6 +623,8 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type, it->type = type; it->key_data.key = key; it->key_data.part_count = part_count; + struct key_def *cmp_def = memtx_tree_index_cmp_def(index); + it->key_data.hint = key_hint(key, cmp_def); it->index_def = base->def; it->tree = &index->tree; it->tree_iterator = memtx_tree_invalid_iterator(); @@ -671,6 +689,8 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple) struct memtx_tree_data *elem = &index->build_array[index->build_array_size++]; elem->tuple = tuple; + struct key_def *key_def = memtx_tree_index_cmp_def(index); + elem->hint = tuple_hint(tuple, key_def); return 0; } diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index 7fe1766a8..54a977926 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -34,6 +34,14 @@ #include "trivia/util.h" /* NOINLINE */ #include +/** + * Perform hints comparison according hint_t definition. + * Returns 1 when hint_a > hint_b, -1 when hint_b > hint_a; + * When tuple_hints are incompatible or equal, return 0. + */ +static int +hint_cmp(hint_t hint_a, hint_t hint_b); + /* {{{ tuple_compare */ /* @@ -427,13 +435,17 @@ tuple_compare_field_with_hint(const char *field_a, enum mp_type a_type, template static inline int -tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, - struct key_def *key_def) +tuple_compare_slowpath_hinted(const struct tuple *tuple_a, + hint_t tuple_a_hint, const struct tuple *tuple_b, + hint_t tuple_b_hint, struct key_def *key_def) { assert(has_json_paths == key_def->has_json_paths); assert(!has_optional_parts || is_nullable); assert(is_nullable == key_def->is_nullable); assert(has_optional_parts == key_def->has_optional_parts); + int rc = 0; + if ((rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0) + return rc; struct key_part *part = key_def->parts; const char *tuple_a_raw = tuple_data(tuple_a); const char *tuple_b_raw = tuple_data(tuple_b); @@ -469,7 +481,6 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_part *end; const char *field_a, *field_b; enum mp_type a_type, b_type; - int rc; if (is_nullable) end = part + key_def->unique_part_count; else @@ -559,8 +570,19 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, template static inline int -tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, - uint32_t part_count, struct key_def *key_def) +tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, + struct key_def *key_def) +{ + return tuple_compare_slowpath_hinted(tuple_a, HINT_NONE, tuple_b, HINT_NONE, + key_def); +} + +template +static inline int +tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, + hint_t tuple_hint, const char *key, uint32_t part_count, + hint_t key_hint, struct key_def *key_def) { assert(has_json_paths == key_def->has_json_paths); assert(!has_optional_parts || is_nullable); @@ -568,6 +590,9 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, assert(has_optional_parts == key_def->has_optional_parts); assert(key != NULL || part_count == 0); assert(part_count <= key_def->part_count); + int rc = 0; + if ((rc = hint_cmp(tuple_hint, key_hint)) != 0) + return rc; struct key_part *part = key_def->parts; struct tuple_format *format = tuple_format(tuple); const char *tuple_raw = tuple_data(tuple); @@ -603,7 +628,6 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, } struct key_part *end = part + part_count; - int rc; for (; part < end; ++part, mp_next(&key)) { const char *field; if (has_json_paths) { @@ -642,6 +666,17 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, return 0; } +template +static inline int +tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, + uint32_t part_count, struct key_def *key_def) +{ + return tuple_compare_with_key_slowpath_hinted(tuple, + HINT_NONE, key, part_count, + HINT_NONE, key_def); +} + template static inline int key_compare_parts(const char *key_a, const char *key_b, uint32_t part_count, @@ -700,13 +735,17 @@ key_compare_parts(const char *key_a, const char *key_b, uint32_t part_count, template static inline int -tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key, - uint32_t part_count, struct key_def *key_def) +tuple_compare_with_key_sequential_hinted(const struct tuple *tuple, + hint_t tuple_hint, const char *key, uint32_t part_count, + hint_t key_hint, struct key_def *key_def) { assert(!has_optional_parts || is_nullable); assert(key_def_is_sequential(key_def)); assert(is_nullable == key_def->is_nullable); assert(has_optional_parts == key_def->has_optional_parts); + int rc = hint_cmp(tuple_hint, key_hint); + if (rc != 0) + return rc; const char *tuple_key = tuple_data(tuple); uint32_t field_count = mp_decode_array(&tuple_key); uint32_t cmp_part_count; @@ -716,8 +755,8 @@ tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key, assert(field_count >= part_count); cmp_part_count = part_count; } - int rc = key_compare_parts(tuple_key, key, cmp_part_count, - key_def); + rc = key_compare_parts(tuple_key, key, cmp_part_count, + key_def); if (!has_optional_parts || rc != 0) return rc; /* @@ -739,6 +778,16 @@ tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key, return 0; } +template +static inline int +tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key, + uint32_t part_count, struct key_def *key_def) +{ + return tuple_compare_with_key_sequential_hinted(tuple, HINT_NONE, key, + part_count, HINT_NONE, key_def); +} + int key_compare(const char *key_a, const char *key_b, struct key_def *key_def) { @@ -759,13 +808,17 @@ key_compare(const char *key_a, const char *key_b, struct key_def *key_def) template static int -tuple_compare_sequential(const struct tuple *tuple_a, - const struct tuple *tuple_b, key_def *key_def) +tuple_compare_sequential_hinted(const struct tuple *tuple_a, + hint_t tuple_a_hint, const struct tuple *tuple_b, + hint_t tuple_b_hint, key_def *key_def) { assert(!has_optional_parts || is_nullable); assert(has_optional_parts == key_def->has_optional_parts); assert(key_def_is_sequential(key_def)); assert(is_nullable == key_def->is_nullable); + int rc = hint_cmp(tuple_a_hint, tuple_b_hint); + if (rc != 0) + return rc; const char *key_a = tuple_data(tuple_a); uint32_t fc_a = mp_decode_array(&key_a); const char *key_b = tuple_data(tuple_b); @@ -779,7 +832,6 @@ tuple_compare_sequential(const struct tuple *tuple_a, bool was_null_met = false; struct key_part *part = key_def->parts; struct key_part *end = part + key_def->unique_part_count; - int rc; uint32_t i = 0; for (; part < end; ++part, ++i) { enum mp_type a_type, b_type; @@ -826,6 +878,16 @@ tuple_compare_sequential(const struct tuple *tuple_a, return 0; } +template +static int +tuple_compare_sequential(const struct tuple *tuple_a, + const struct tuple *tuple_b, key_def *key_def) +{ + return tuple_compare_sequential_hinted(tuple_a, HINT_NONE, tuple_b, + HINT_NONE, key_def); +} + template static inline int field_compare(const char **field_a, const char **field_b); @@ -956,6 +1018,18 @@ struct TupleCompare compare(tuple_a, tuple_b, format_a, format_b, field_a, field_b); } + + static int compare_hinted(const struct tuple *tuple_a, + hint_t tuple_a_hint, + const struct tuple *tuple_b, + hint_t tuple_b_hint, + struct key_def *key_def) + { + int rc = hint_cmp(tuple_a_hint, tuple_b_hint); + if (rc != 0) + return rc; + return compare(tuple_a, tuple_b, key_def); + } }; template @@ -973,15 +1047,30 @@ struct TupleCompare<0, TYPE, MORE_TYPES...> { return FieldCompare<0, TYPE, MORE_TYPES...>::compare(tuple_a, tuple_b, format_a, format_b, field_a, field_b); } + + static int compare_hinted(const struct tuple *tuple_a, + hint_t tuple_a_hint, + const struct tuple *tuple_b, + hint_t tuple_b_hint, + struct key_def *key_def) + { + int rc = hint_cmp(tuple_a_hint, tuple_b_hint); + if (rc != 0) + return rc; + return compare(tuple_a, tuple_b, key_def); + } }; } /* end of anonymous namespace */ struct comparator_signature { tuple_compare_t f; + tuple_compare_hinted_t f_hinted; uint32_t p[64]; }; #define COMPARATOR(...) \ - { TupleCompare<__VA_ARGS__>::compare, { __VA_ARGS__, UINT32_MAX } }, + { TupleCompare<__VA_ARGS__>::compare, \ + TupleCompare<__VA_ARGS__>::compare_hinted, \ + { __VA_ARGS__, UINT32_MAX } }, /** * field1 no, field1 type, field2 no, field2 type, ... @@ -1133,6 +1222,17 @@ struct TupleCompareWithKey compare(tuple, key, part_count, key_def, format, field); } + + static int + compare_hinted(const struct tuple *tuple, hint_t tuple_hint, + const char *key, uint32_t part_count, hint_t key_hint, + struct key_def *key_def) + { + int rc = hint_cmp(tuple_hint, key_hint); + if (rc != 0) + return rc; + return compare(tuple, key, part_count, key_def); + } }; template @@ -1153,6 +1253,17 @@ struct TupleCompareWithKey<0, 0, TYPE, MORE_TYPES...> compare(tuple, key, part_count, key_def, format, field); } + + static int + compare_hinted(const struct tuple *tuple, hint_t tuple_hint, + const char *key, uint32_t part_count, hint_t key_hint, + struct key_def *key_def) + { + int rc = hint_cmp(tuple_hint, key_hint); + if (rc != 0) + return rc; + return compare(tuple, key, part_count, key_def); + } }; } /* end of anonymous namespace */ @@ -1160,11 +1271,14 @@ struct TupleCompareWithKey<0, 0, TYPE, MORE_TYPES...> struct comparator_with_key_signature { tuple_compare_with_key_t f; + tuple_compare_with_key_hinted_t f_hinted; uint32_t p[64]; }; #define KEY_COMPARATOR(...) \ - { TupleCompareWithKey<0, __VA_ARGS__>::compare, { __VA_ARGS__ } }, + { TupleCompareWithKey<0, __VA_ARGS__>::compare, \ + TupleCompareWithKey<0, __VA_ARGS__>::compare_hinted, \ + { __VA_ARGS__ } }, static const comparator_with_key_signature precompiled_cmp_wk_arr[] = { KEY_COMPARATOR(0, FIELD_TYPE_UNSIGNED, 1, FIELD_TYPE_UNSIGNED, 2, FIELD_TYPE_UNSIGNED) @@ -1187,16 +1301,22 @@ static const comparator_with_key_signature precompiled_cmp_wk_arr[] = { #define COMPARE_SLOWPATH(...) \ {tuple_compare_slowpath<__VA_ARGS__>, \ tuple_compare_with_key_slowpath<__VA_ARGS__>, \ + tuple_compare_slowpath_hinted<__VA_ARGS__>, \ + tuple_compare_with_key_slowpath_hinted<__VA_ARGS__>, \ __VA_ARGS__, false} #define COMPARE_SEQUENTIAL(...) \ {tuple_compare_sequential<__VA_ARGS__>, \ tuple_compare_with_key_sequential<__VA_ARGS__>, \ + tuple_compare_sequential_hinted<__VA_ARGS__>, \ + tuple_compare_with_key_sequential_hinted<__VA_ARGS__>, \ __VA_ARGS__, false, true} static struct { tuple_compare_t tuple_compare; tuple_compare_with_key_t tuple_compare_with_key; + tuple_compare_hinted_t tuple_compare_hinted; + tuple_compare_with_key_hinted_t tuple_compare_with_key_hinted; bool is_nullable; bool has_optional_parts; bool has_json_paths; @@ -1221,11 +1341,236 @@ static struct { /* }}} tuple_compare_with_key */ +static inline hint_t +tuple_hint_new(enum mp_class type, uint64_t val) +{ + assert(((uint64_t)type >> (sizeof(hint_t) * CHAR_BIT - HINT_VALUE_BITS)) == 0); + assert(val >> HINT_VALUE_BITS == 0); + return (hint_t)(((uint64_t)type << HINT_VALUE_BITS) | val); +} + +static int +hint_cmp(hint_t hint_a, hint_t hint_b) +{ + if (hint_a != HINT_NONE && hint_b != HINT_NONE && hint_a != hint_b) + return hint_a < hint_b ? -1 : 1; + else + return 0; +} + +static hint_t +field_hint_unsigned(const char *field) +{ + assert(mp_typeof(*field) == MP_UINT); + uint64_t val = mp_decode_uint(&field); + if (val > HINT_MAX) + val = HINT_MAX; + uint64_t ret = val - (uint64_t)HINT_MIN; + return tuple_hint_new(MP_CLASS_NUMBER, ret); +} + +static hint_t +field_hint_integer(const char *field) +{ + switch (mp_typeof(*field)) { + case MP_INT: { + int64_t val = mp_decode_int(&field); + if (val > HINT_MAX) + val = HINT_MAX; + else if (val < HINT_MIN) + val = HINT_MIN; + uint64_t ret = (uint64_t)val - (uint64_t)HINT_MIN; + return tuple_hint_new(MP_CLASS_NUMBER, ret); + } + case MP_UINT: + return field_hint_unsigned(field); + default: + unreachable(); + } + return HINT_NONE; +} + +static hint_t +field_hint_number(const char *field) +{ + enum mp_type type = mp_typeof(*field); + switch (type) { + case MP_FLOAT: + case MP_DOUBLE: { + double f = type == MP_FLOAT ? + mp_decode_float(&field) : mp_decode_double(&field); + if (!isfinite(f)) + return HINT_NONE; + uint64_t ret; + if (f > exp2(HINT_VALUE_BITS - 1)) + ret = (uint64_t)HINT_MAX - (uint64_t)HINT_MIN; + else if (f < -exp2(HINT_VALUE_BITS - 1)) + ret = 0; + else { + double val; + (void)modf(f, &val); + ret = (uint64_t)val - (uint64_t)HINT_MIN; + } + return tuple_hint_new(MP_CLASS_NUMBER, ret); + } + case MP_UINT: + return field_hint_unsigned(field); + case MP_INT: + return field_hint_integer(field); + default: + unreachable(); + } + return HINT_NONE; +} + +static hint_t +field_hint_boolean(const char *field) +{ + assert(mp_typeof(*field) == MP_BOOL); + return tuple_hint_new(MP_CLASS_BOOL, mp_decode_bool(&field) ? 1 : 0); +} + +static uint64_t +memory_buffer_hint_val(const unsigned char *raw, uint32_t size) +{ + int process_len = MIN((int)size, 7); + uint64_t result = 0; + for (int i = 0; i < process_len; i++) { + result <<= CHAR_BIT; + result |= raw[i]; + } + result <<= CHAR_BIT * (7 - process_len); + return result; +} + +template +static hint_t +field_hint_string(const char *field, struct coll *coll) +{ + assert(has_collation == (coll != NULL)); + assert(mp_typeof(*field) == MP_STR); + char buff[7]; + uint32_t len; + const char *str = mp_decode_str(&field, &len); + if (has_collation) { + len = coll->get_sort_key(str, len, buff, sizeof(buff), coll); + str = buff; + } + uint64_t ret = memory_buffer_hint_val((const unsigned char *)str, len); + return tuple_hint_new(MP_CLASS_STR, ret); +} + +template +static hint_t +field_hint_scalar(const char *field, struct coll *coll) +{ + switch (mp_classof(mp_typeof(*field))) { + case MP_CLASS_NUMBER: + return field_hint_number(field); + case MP_CLASS_BOOL: + return field_hint_boolean(field); + case MP_CLASS_STR: + return field_hint_string(field, coll); + case MP_CLASS_BIN: { + assert(coll == NULL); + uint32_t size; + const unsigned char *raw = + (const unsigned char *)mp_decode_bin(&field, &size); + uint64_t ret = memory_buffer_hint_val(raw, size); + return tuple_hint_new(MP_CLASS_BIN, ret); + } + default: + unreachable(); + } + return HINT_NONE; +} + +template +static hint_t +field_hint(const char *field, struct key_def *key_def) +{ + assert(type == key_def->parts->type); + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(has_collation == (key_def->parts->coll != NULL)); + switch (type) { + case FIELD_TYPE_UNSIGNED: + return field_hint_unsigned(field); + case FIELD_TYPE_INTEGER: + return field_hint_integer(field); + case FIELD_TYPE_NUMBER: + return field_hint_number(field); + case FIELD_TYPE_BOOLEAN: + return field_hint_boolean(field); + case FIELD_TYPE_STRING: + return field_hint_string(field, + key_def->parts->coll); + case FIELD_TYPE_SCALAR: + return field_hint_scalar(field, + key_def->parts->coll); + default: + unreachable(); + } + return HINT_NONE; +} + +template +static hint_t +key_hint(const char *key, struct key_def *key_def) +{ + if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) + return tuple_hint_new(MP_CLASS_NIL, 0); + return field_hint(key, key_def); +} + +template +static hint_t +tuple_hint(const struct tuple *tuple, struct key_def *key_def) +{ + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && (field == NULL || mp_typeof(*field) == MP_NIL)) + return tuple_hint_new(MP_CLASS_NIL, 0); + return field_hint(field, key_def); +} + +#define HINT(...) { \ + tuple_hint<__VA_ARGS__>, key_hint<__VA_ARGS__>, __VA_ARGS__ } + +static const struct { + tuple_hint_t tuple_hint; + key_hint_t key_hint; + enum field_type type; + bool is_nullable; + bool has_collation; +} hint_arr[] = { + HINT(FIELD_TYPE_UNSIGNED, false, false), + HINT(FIELD_TYPE_STRING, false, false), + HINT(FIELD_TYPE_NUMBER, false, false), + HINT(FIELD_TYPE_INTEGER, false, false), + HINT(FIELD_TYPE_BOOLEAN, false, false), + HINT(FIELD_TYPE_SCALAR, false, false), + HINT(FIELD_TYPE_UNSIGNED, true, false), + HINT(FIELD_TYPE_STRING, true, false), + HINT(FIELD_TYPE_NUMBER, true, false), + HINT(FIELD_TYPE_INTEGER, true, false), + HINT(FIELD_TYPE_BOOLEAN, true, false), + HINT(FIELD_TYPE_SCALAR, true, false), + HINT(FIELD_TYPE_STRING, false, true), + HINT(FIELD_TYPE_SCALAR, false, true), + HINT(FIELD_TYPE_STRING, true, true), + HINT(FIELD_TYPE_SCALAR, true, true) +}; + +#undef HINT + void key_def_set_compare_func(struct key_def *def) { def->tuple_compare = NULL; def->tuple_compare_with_key = NULL; + def->tuple_compare_hinted = NULL; + def->tuple_compare_with_key_hinted = NULL; + def->key_hint = NULL; + def->tuple_hint = NULL; if (!def->is_nullable && !key_def_has_collation(def) && !def->has_json_paths) { /* Precalculated comparators don't use collation */ @@ -1242,6 +1587,8 @@ key_def_set_compare_func(struct key_def *def) if (i == def->part_count && precompiled_cmp_arr[k].p[i * 2] == UINT32_MAX) { def->tuple_compare = precompiled_cmp_arr[k].f; + def->tuple_compare_hinted = + precompiled_cmp_arr[k].f_hinted; break; } } @@ -1258,6 +1605,8 @@ key_def_set_compare_func(struct key_def *def) if (i == def->part_count) { def->tuple_compare_with_key = precompiled_cmp_wk_arr[k].f; + def->tuple_compare_with_key_hinted = + precompiled_cmp_wk_arr[k].f_hinted; break; } } @@ -1274,11 +1623,16 @@ key_def_set_compare_func(struct key_def *def) if (def->tuple_compare == NULL) { def->tuple_compare = cmp_arr[k].tuple_compare; + def->tuple_compare_hinted = + cmp_arr[k].tuple_compare_hinted; } if (def->tuple_compare_with_key == NULL) { def->tuple_compare_with_key = cmp_arr[k]. tuple_compare_with_key; + def->tuple_compare_with_key_hinted = + cmp_arr[k]. + tuple_compare_with_key_hinted; } break; } @@ -1286,4 +1640,14 @@ key_def_set_compare_func(struct key_def *def) } assert(def->tuple_compare != NULL && def->tuple_compare_with_key != NULL); + for (uint32_t k = 0; k < sizeof(hint_arr) / sizeof(hint_arr[0]); k++) { + if (def->parts->type == hint_arr[k].type && + key_part_is_nullable(def->parts) == + hint_arr[k].is_nullable && + (def->parts->coll != NULL) == hint_arr[k].has_collation) { + def->key_hint = hint_arr[k].key_hint; + def->tuple_hint = hint_arr[k].tuple_hint; + break; + } + } } diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c index afc15e809..d110b156b 100644 --- a/src/lib/coll/coll.c +++ b/src/lib/coll/coll.c @@ -133,6 +133,33 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, return s_len; } +int +coll_icu_get_sort_key(const char *s, size_t s_len, char *buff, size_t buff_len, + struct coll *coll) +{ + assert(coll->type == COLL_TYPE_ICU); + UCharIterator itr; + uiter_setUTF8(&itr, s, s_len); + uint32_t state[2] = {0, 0}; + UErrorCode status = U_ZERO_ERROR; + int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, + (uint8_t *)buff, buff_len, &status); + assert(got >= 0 && (size_t)got <= buff_len); + assert(U_SUCCESS(status)); + return got; +} + +int +coll_bin_get_sort_key(const char *s, size_t s_len, char *buff, size_t buff_len, + struct coll *coll) +{ + (void)coll; + assert(coll->type == COLL_TYPE_BINARY); + int to_copy = (int)MIN(s_len, buff_len); + memcpy(buff, s, to_copy); + return to_copy; +} + /** * Set up ICU collator and init cmp and hash members of collation. * @param coll Collation to set up. @@ -242,6 +269,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def) } coll->cmp = coll_icu_cmp; coll->hash = coll_icu_hash; + coll->get_sort_key = coll_icu_get_sort_key; return 0; } @@ -356,6 +384,7 @@ coll_new(const struct coll_def *def) coll->collator = NULL; coll->cmp = coll_bin_cmp; coll->hash = coll_bin_hash; + coll->get_sort_key = coll_bin_get_sort_key; break; default: unreachable(); diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h index 8c9f94293..a68668ec5 100644 --- a/src/lib/coll/coll.h +++ b/src/lib/coll/coll.h @@ -48,6 +48,9 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t, typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, struct coll *coll); +typedef int (*coll_get_sort_key_f)(const char *s, size_t s_len, char *buff, + size_t buff_len, struct coll *coll); + struct UCollator; /** @@ -62,6 +65,8 @@ struct coll { /** String comparator. */ coll_cmp_f cmp; coll_hash_f hash; + /** Fill memory buffer with data. */ + coll_get_sort_key_f get_sort_key; /** Reference counter. */ int refs; /** diff --git a/test/engine/ddl.result b/test/engine/ddl.result index 8d34d5ef4..1791138c6 100644 --- a/test/engine/ddl.result +++ b/test/engine/ddl.result @@ -1935,6 +1935,186 @@ s:drop() --- ... -- +-- gh-3961: Introduce tuple comparison hints +-- +s = box.schema.space.create('test', {engine = engine}) +--- +... +pk = s:create_index('pk') +--- +... +i1 = s:create_index('i1', {unique = true, parts = {2, 'scalar'}}) +--- +... +bdbl = (2.1)^62; +--- +... +nbdbl = -bdbl +--- +... +_ = s:insert{0, bdbl} +--- +... +_ = s:insert{1, nbdbl} +--- +... +_ = s:insert{2, -2.3} +--- +... +_ = s:insert{3, -2} +--- +... +_ = s:insert{4, -1.3} +--- +... +_ = s:insert{5, -1} +--- +... +_ = s:insert{6, 0} +--- +... +_ = s:insert{7, 1} +--- +... +_ = s:insert{8, 1.3} +--- +... +_ = s:insert{9, 2} +--- +... +_ = s:insert{10, 2.3} +--- +... +i1:select() +--- +- - [1, -9.4972150816945e+19] + - [2, -2.3] + - [3, -2] + - [4, -1.3] + - [5, -1] + - [6, 0] + - [7, 1] + - [8, 1.3] + - [9, 2] + - [10, 2.3] + - [0, 9.4972150816945e+19] +... +i1:alter{parts = {2, 'number'}} +--- +... +_ = s:insert{11, -1.2} +--- +... +_ = s:insert{12, 1.2} +--- +... +_ = s:insert{13, -5} +--- +... +_ = s:insert{14, 5} +--- +... +i1:select() +--- +- - [1, -9.4972150816945e+19] + - [13, -5] + - [2, -2.3] + - [3, -2] + - [4, -1.3] + - [11, -1.2] + - [5, -1] + - [6, 0] + - [7, 1] + - [12, 1.2] + - [8, 1.3] + - [9, 2] + - [10, 2.3] + - [14, 5] + - [0, 9.4972150816945e+19] +... +_ = i1:delete({-2.3}) +--- +... +_ = i1:delete({-1.3}) +--- +... +_ = i1:delete({1.3}) +--- +... +_ = i1:delete({2.3}) +--- +... +_ = i1:delete({-1.2}) +--- +... +_ = i1:delete({1.2}) +--- +... +_ = i1:delete({bdbl}) +--- +... +_ = i1:delete({nbdbl}) +--- +... +i1:alter{parts = {2, 'integer'}} +--- +... +_ = s:insert{15, -4} +--- +... +_ = s:insert{16, 4} +--- +... +i1:select() +--- +- - [13, -5] + - [15, -4] + - [3, -2] + - [5, -1] + - [6, 0] + - [7, 1] + - [9, 2] + - [16, 4] + - [14, 5] +... +_ = i1:delete({-5}) +--- +... +_ = i1:delete({-4}) +--- +... +_ = i1:delete({-2}) +--- +... +_ = i1:delete({-1}) +--- +... +i1:alter{parts = {2, 'unsigned'}} +--- +... +_ = s:insert({17, 10}) +--- +... +i1:alter{parts = {2, 'scalar'}} +--- +... +_ = s:insert({18, 1.1}) +--- +... +i1:select() +--- +- - [6, 0] + - [7, 1] + - [18, 1.1] + - [9, 2] + - [16, 4] + - [14, 5] + - [17, 10] +... +s:drop() +--- +... +-- -- Creating/altering a secondary index of a non-empty space. -- s = box.schema.space.create('test', {engine = engine}) diff --git a/test/engine/ddl.test.lua b/test/engine/ddl.test.lua index cdaf7a5bf..a73075498 100644 --- a/test/engine/ddl.test.lua +++ b/test/engine/ddl.test.lua @@ -710,6 +710,55 @@ _ = s:create_index('sk', {parts = {2, 'unsigned'}}) c:get() s:drop() +-- +-- gh-3961: Introduce tuple comparison hints +-- +s = box.schema.space.create('test', {engine = engine}) +pk = s:create_index('pk') +i1 = s:create_index('i1', {unique = true, parts = {2, 'scalar'}}) +bdbl = (2.1)^62; +nbdbl = -bdbl +_ = s:insert{0, bdbl} +_ = s:insert{1, nbdbl} +_ = s:insert{2, -2.3} +_ = s:insert{3, -2} +_ = s:insert{4, -1.3} +_ = s:insert{5, -1} +_ = s:insert{6, 0} +_ = s:insert{7, 1} +_ = s:insert{8, 1.3} +_ = s:insert{9, 2} +_ = s:insert{10, 2.3} +i1:select() +i1:alter{parts = {2, 'number'}} +_ = s:insert{11, -1.2} +_ = s:insert{12, 1.2} +_ = s:insert{13, -5} +_ = s:insert{14, 5} +i1:select() +_ = i1:delete({-2.3}) +_ = i1:delete({-1.3}) +_ = i1:delete({1.3}) +_ = i1:delete({2.3}) +_ = i1:delete({-1.2}) +_ = i1:delete({1.2}) +_ = i1:delete({bdbl}) +_ = i1:delete({nbdbl}) +i1:alter{parts = {2, 'integer'}} +_ = s:insert{15, -4} +_ = s:insert{16, 4} +i1:select() +_ = i1:delete({-5}) +_ = i1:delete({-4}) +_ = i1:delete({-2}) +_ = i1:delete({-1}) +i1:alter{parts = {2, 'unsigned'}} +_ = s:insert({17, 10}) +i1:alter{parts = {2, 'scalar'}} +_ = s:insert({18, 1.1}) +i1:select() +s:drop() + -- -- Creating/altering a secondary index of a non-empty space. -- -- 2.21.0