From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v3 7/7] memtx: introduce tuple compare hint Date: Fri, 22 Feb 2019 18:42:32 +0300 Message-Id: In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: Alexandr Lyapunov List-ID: From: Alexandr Lyapunov Implement functions for retrieving tuple hints for a particular key_def. Hint is an integer that can be used for tuple comparison optimization: if a hint of one tuple is less than a hint of another then the first tuple is definitely less than the second; only if hints are equal tuple_compare must be called for getting comparison result. Hints are calculated using only the first part of key_def. Hints are only useful when: * they are precalculated and stored along with the tuple; calculation is not cheap (cheaper than tuple_compare btw) but precalculated hints allow to compare tuples without even fetching tuple data. * first part of key_def is 'string'(for now) * since hint is calculated using only the first part of key_def (and only first several characters if it is a string) this part must be different with high probability for every tuple pair. Closes #3961 --- src/box/CMakeLists.txt | 1 + src/box/key_def.c | 2 + src/box/key_def.h | 44 ++++++++ src/box/memtx_tree_decl.c | 89 ++++++++++++++++ src/box/tuple_hint.cc | 210 ++++++++++++++++++++++++++++++++++++++ src/box/tuple_hint.h | 51 +++++++++ src/coll.c | 33 ++++++ src/coll.h | 4 + 8 files changed, 434 insertions(+) create mode 100644 src/box/tuple_hint.cc create mode 100644 src/box/tuple_hint.h diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index dc8cc2cd5..05a4f40f6 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -38,6 +38,7 @@ add_library(tuple STATIC tuple_format.c tuple_update.c tuple_compare.cc + tuple_hint.cc tuple_extract_key.cc tuple_hash.cc tuple_bloom.c diff --git a/src/box/key_def.c b/src/box/key_def.c index 432b72a97..ced5b0580 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -34,6 +34,7 @@ #include "tuple_compare.h" #include "tuple_extract_key.h" #include "tuple_hash.h" +#include "tuple_hint.h" #include "column_mask.h" #include "schema_def.h" #include "coll_id_cache.h" @@ -135,6 +136,7 @@ key_def_set_func(struct key_def *def) key_def_set_compare_func(def); key_def_set_hash_func(def); key_def_set_extract_func(def); + key_def_set_hint_func(def); } static void diff --git a/src/box/key_def.h b/src/box/key_def.h index dd62f6a35..00775368f 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -153,6 +153,13 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple, typedef uint32_t (*key_hash_t)(const char *key, struct key_def *key_def); +/** @copydoc tuple_hint() */ +typedef uint64_t (*tuple_hint_t)(const struct tuple *tuple, + struct key_def *key_def); + +/** @copydoc key_hint() */ +typedef uint64_t (*key_hint_t)(const char *key, struct key_def *key_def); + /* Definition of a multipart key. */ struct key_def { /** @see tuple_compare() */ @@ -167,6 +174,10 @@ struct key_def { tuple_hash_t tuple_hash; /** @see key_hash() */ key_hash_t key_hash; + /** @see tuple_hint() */ + tuple_hint_t tuple_hint; + /** @see key_hint() */ + key_hint_t key_hint; /** * Minimal part count which always is unique. For example, * if a secondary index is unique, then @@ -624,6 +635,39 @@ key_hash(const char *key, struct key_def *key_def) return key_def->key_hash(key, key_def); } + /* + * Get a comparison hint of a tuple. + * Hint is such a function h(tuple) in terms of particular key_def that + * has follows rules: + * if h(t1) < h(t2) then t1 < t2; + * if h(t1) > h(t2) then t1 > t2; + * if t1 == t2 then h(t1) == h(t2); + * These rules means that instead of direct tuple vs tuple (or tuple vs key) + * comparison one may compare theirs hints first; and only if theirs hints + * are equal compare the tuples themselves. + * @param tuple - tuple to get hint of. + * @param key_def - key_def that defines which comparison is used. + * @return the hint. + */ +static inline uint64_t +tuple_hint(const struct tuple *tuple, struct key_def *key_def) +{ + return key_def->tuple_hint(tuple, key_def); +} + +/** + * Get a comparison hint of a key. + * @See tuple_hint for hint term definition. + * @param key - key to get hint of. + * @param key_def - key_def that defines which comparison is used. + * @return the hint. + */ +static inline uint64_t +key_hint(const char *key, struct key_def *key_def) +{ + return key_def->key_hint(key, key_def); +} + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/memtx_tree_decl.c b/src/box/memtx_tree_decl.c index 82209eaab..a28fdd9fd 100644 --- a/src/box/memtx_tree_decl.c +++ b/src/box/memtx_tree_decl.c @@ -77,8 +77,97 @@ struct memtx_basic_tree_key_data { /* }}} */ +/* {{{ Memtx hinted tree class. *********************************/ + +/** + * Struct that is used as a key in BPS tree definition in + * memtx_hint_only_tree and memtx_hinted_tree. +*/ +struct memtx_hinted_tree_key_data { + /** Sequence of msgpacked search fields. */ + const char *key; + /** Number of msgpacked search fields. */ + uint32_t part_count; + /** + * Comparison hint. Calculated automatically on 'set' + * operation with MEMTX_TREE_KEY_SET(). + */ + uint64_t hint; +}; + +/** + * Struct that is used as a key in BPS tree definition in + * memtx_hint_only_tree and memtx_hinted_tree. + */ +struct memtx_hinted_tree_data { + /** Tuple this node is represents. */ + struct tuple *tuple; + /** + * Comparison hint. Calculated automatically on 'set' + * operation with MEMTX_TREE_ELEM_SET(). + */ + uint64_t hint; +}; + +#define MEMTX_TREE_NAME memtx_hinted_tree +#define memtx_tree_elem_t struct memtx_hinted_tree_data +#define memtx_tree_key_t struct memtx_hinted_tree_key_data +#define MEMTX_TREE_COMPARE(elem_a_ptr, elem_b_ptr, key_def) ({ \ + int rc; \ + if ((elem_a_ptr)->hint != (elem_b_ptr)->hint) { \ + rc = (elem_a_ptr)->hint < (elem_b_ptr)->hint ? -1 : 1; \ + } else { \ + rc = tuple_compare((elem_a_ptr)->tuple, (elem_b_ptr)->tuple, \ + key_def); \ + } \ + rc; \ +}) +#define MEMTX_TREE_COMPARE_KEY(elem_ptr, key_ptr, key_def) ({ \ + int rc; \ + if ((elem_ptr)->hint != (key_ptr)->hint) { \ + rc = (elem_ptr)->hint < (key_ptr)->hint ? -1 : 1; \ + } else { \ + rc = tuple_compare_with_key((elem_ptr)->tuple, (key_ptr)->key, \ + (key_ptr)->part_count, key_def); \ + } \ + rc; \ +}) +#define MEMTX_TREE_ELEM_SET(elem_ptr, info, key_def) ({ \ + (elem_ptr)->tuple = info; \ + (elem_ptr)->hint = tuple_hint(info, key_def); \ +}) +#define MEMTX_TREE_KEY_SET(key_ptr, key_val, part_count_val, key_def) ({ \ + (key_ptr)->key = key_val; \ + (key_ptr)->part_count = part_count_val; \ + (key_ptr)->hint = part_count_val > 0 ? key_hint(key_val, key_def) : 0; \ +}) +#define MEMTX_TREE_ELEM_GET(elem_ptr) ((elem_ptr)->tuple) +#define MEMTX_TREE_KEY_GET(key_ptr, part_count_ptr) \ + ({*part_count_ptr = (key_ptr)->part_count; (key_ptr)->key;}) +#define MEMTX_TREE_IDENTICAL(elem_a_ptr, elem_b_ptr) \ + ({(elem_a_ptr)->tuple == (elem_b_ptr)->tuple;}) + +#include "memtx_tree_impl.h" + +#undef memtx_tree_key_t +#undef memtx_tree_elem_t +#undef MEMTX_TREE_KEY_GET +#undef MEMTX_TREE_ELEM_GET +#undef MEMTX_TREE_KEY_SET +#undef MEMTX_TREE_ELEM_SET +#undef MEMTX_TREE_COMPARE_KEY +#undef MEMTX_TREE_COMPARE +#undef MEMTX_TREE_NAME +#undef MEMTX_TREE_IDENTICAL + +/* }}} */ + struct index * memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def) { + if (def->cmp_def->parts->type == FIELD_TYPE_STRING || + def->cmp_def->parts->type == FIELD_TYPE_UNSIGNED || + def->cmp_def->parts->type == FIELD_TYPE_INTEGER) + return memtx_hinted_tree_index_new(memtx, def); return memtx_basic_tree_index_new(memtx, def); } diff --git a/src/box/tuple_hint.cc b/src/box/tuple_hint.cc new file mode 100644 index 000000000..7b73403be --- /dev/null +++ b/src/box/tuple_hint.cc @@ -0,0 +1,210 @@ +/* + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "coll.h" +#include "tuple.h" +#include "tuple_hint.h" + +static uint64_t +key_hint_default(const char *key, struct key_def *key_def) +{ + (void)key; + (void)key_def; + return 0; +} + +static uint64_t +tuple_hint_default(const struct tuple *tuple, struct key_def *key_def) +{ + (void)tuple; + (void)key_def; + return 0; +} + +template +static uint64_t +key_hint_uint(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_UNSIGNED || + key_def->parts->type == FIELD_TYPE_INTEGER); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + assert(mp_typeof(*key) == MP_UINT); + uint64_t val = mp_decode_uint(&key); + if (val > INT64_MAX) + return INT64_MAX; + return val - (uint64_t)INT64_MIN; +} + +template +static uint64_t +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_UNSIGNED); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_uint(field, key_def); +} + +template +static uint64_t +key_hint_int(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_INTEGER); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + if (mp_typeof(*key) == MP_UINT) { + return key_hint_uint(key, key_def); + } else { + assert(mp_typeof(*key) == MP_INT); + int64_t val = mp_decode_int(&key); + return (uint64_t)val - (uint64_t)INT64_MIN; + } +} + +template +static uint64_t +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_INTEGER); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_int(field, key_def); +} + +template +static uint64_t +key_hint_string(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_STRING); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + assert(key_def->parts->type == FIELD_TYPE_STRING); + assert(mp_typeof(*key) == MP_STR); + uint32_t len; + const unsigned char *str = + (const unsigned char *)mp_decode_str(&key, &len); + uint64_t result = 0; + uint32_t process_len = MIN(len, 8); + for (uint32_t i = 0; i < process_len; i++) { + result <<= 8; + result |= str[i]; + } + result <<= 8 * (8 - process_len); + return result; +} + +template +static uint64_t +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_STRING); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_string(field, key_def); +} + +template +static uint64_t +key_hint_string_coll(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_STRING && + key_def->parts->coll != NULL); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + assert(key_def->parts->type == FIELD_TYPE_STRING); + assert(mp_typeof(*key) == MP_STR); + assert(key_def->parts->coll != NULL); + uint32_t len; + const char *str = mp_decode_str(&key, &len); + return key_def->parts->coll->hint(str, len, key_def->parts->coll); +} + +template +static uint64_t +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_STRING && + key_def->parts->coll != NULL); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_string_coll(field, key_def); +} + +void +key_def_set_hint_func(struct key_def *def) +{ + def->key_hint = key_hint_default; + def->tuple_hint = tuple_hint_default; + bool is_nullable = key_part_is_nullable(def->parts); + if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL) { + def->key_hint = is_nullable ? key_hint_string_coll : + key_hint_string_coll; + def->tuple_hint = is_nullable ? tuple_hint_string_coll : + tuple_hint_string_coll; + return; + } + switch (def->parts->type) { + case FIELD_TYPE_UNSIGNED: + def->key_hint = is_nullable ? key_hint_uint : + key_hint_uint; + def->tuple_hint = is_nullable ? tuple_hint_uint : + tuple_hint_uint; + break; + case FIELD_TYPE_INTEGER: + def->key_hint = is_nullable ? key_hint_int : + key_hint_int; + def->tuple_hint = is_nullable ? tuple_hint_int : + tuple_hint_int; + break; + case FIELD_TYPE_STRING: + def->key_hint = is_nullable ? key_hint_string : + key_hint_string; + def->tuple_hint = is_nullable ? tuple_hint_string : + tuple_hint_string; + break; + default: + break; + }; +} diff --git a/src/box/tuple_hint.h b/src/box/tuple_hint.h new file mode 100644 index 000000000..d38a96c8f --- /dev/null +++ b/src/box/tuple_hint.h @@ -0,0 +1,51 @@ +#ifndef TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED +#define TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED +/* + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + +struct key_def; + +/** + * Initialize tuple_hint() and key_hint() functions for the + * key_def. + * @param key_def key definition to set up. + */ +void +key_def_set_hint_func(struct key_def *key_def); + +#if defined(__cplusplus) +} /* extern "C" */ +#endif /* defined(__cplusplus) */ + +#endif /* TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED */ diff --git a/src/coll.c b/src/coll.c index 6d9c44dbf..bf892087d 100644 --- a/src/coll.c +++ b/src/coll.c @@ -133,6 +133,37 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, return s_len; } +/** Get a compare hint of a binary collation. */ +static uint64_t +coll_bin_hint(const char *s, size_t s_len, struct coll *coll) +{ + (void)coll; + uint64_t result = 0; + uint32_t process_len = MIN(s_len, 8); + for (uint32_t i = 0; i < process_len; i++) { + result <<= 8; + result |= ((unsigned char *)s)[i]; + } + result <<= 8 * (8 - process_len); + return result; +} + +/** Get a compare hint of a string using ICU collation. */ +static uint64_t +coll_icu_hint(const char *s, size_t s_len, struct coll *coll) +{ + assert(coll->type == COLL_TYPE_ICU); + UCharIterator itr; + uiter_setUTF8(&itr, s, s_len); + uint8_t buf[7]; + uint32_t state[2] = {0, 0}; + UErrorCode status = U_ZERO_ERROR; + int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf, + sizeof(buf), &status); + assert(got >= 0 && got <= 7); + return coll_bin_hint((const char *)buf, got, NULL); +} + /** * Set up ICU collator and init cmp and hash members of collation. * @param coll Collation to set up. @@ -242,6 +273,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def) } coll->cmp = coll_icu_cmp; coll->hash = coll_icu_hash; + coll->hint = coll_icu_hint; return 0; } @@ -332,6 +364,7 @@ coll_new(const struct coll_def *def) coll->collator = NULL; coll->cmp = coll_bin_cmp; coll->hash = coll_bin_hash; + coll->hint = coll_bin_hint; break; default: unreachable(); diff --git a/src/coll.h b/src/coll.h index 9c725712a..53133dae3 100644 --- a/src/coll.h +++ b/src/coll.h @@ -47,6 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t, typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, struct coll *coll); +typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll); + struct UCollator; /** @@ -61,6 +63,8 @@ struct coll { /** String comparator. */ coll_cmp_f cmp; coll_hash_f hash; + /** The pointer to routine calculating tuple hint. */ + coll_hint_f hint; /** Reference counter. */ int refs; /** -- 2.20.1