From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v4 3/3] memtx: introduce tuple compare hint Date: Thu, 28 Feb 2019 16:38:49 +0300 Message-Id: <9137be7b9366befaa48ee5e7a396f85b76d40dd4.1551360482.git.kshcherbatov@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: kostja@tarantool.org, Alexandr Lyapunov List-ID: From: Alexandr Lyapunov Implement functions for retrieving tuple hints for a particular key_def. Hint is an integer that can be used for tuple comparison optimization: if a hint of one tuple is less than a hint of another then the first tuple is definitely less than the second; only if hints are equal tuple_compare must be called for getting comparison result. Hints are calculated using only the first part of key_def. Hints are only useful when: * they are precalculated and stored along with the tuple; calculation is not cheap (cheaper than tuple_compare btw) but precalculated hints allow to compare tuples without even fetching tuple data. * first part of key_def is 'string'(for now) * since hint is calculated using only the first part of key_def (and only first several characters if it is a string) this part must be different with high probability for every tuple pair. @kshcherbatov: massive refactoring, new hints for number, boolean, binary collation. Closes #3961 --- src/box/CMakeLists.txt | 1 + src/box/key_def.c | 2 + src/box/key_def.h | 44 ++++++ src/box/memtx_tree.cc | 43 +++++- src/box/tuple_hint.cc | 301 +++++++++++++++++++++++++++++++++++++++++ src/box/tuple_hint.h | 60 ++++++++ src/lib/coll/coll.c | 33 +++++ src/lib/coll/coll.h | 4 + 8 files changed, 484 insertions(+), 4 deletions(-) create mode 100644 src/box/tuple_hint.cc create mode 100644 src/box/tuple_hint.h diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index d4d9b7aeb..f0c508c02 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -38,6 +38,7 @@ add_library(tuple STATIC tuple_format.c tuple_update.c tuple_compare.cc + tuple_hint.cc tuple_extract_key.cc tuple_hash.cc tuple_bloom.c diff --git a/src/box/key_def.c b/src/box/key_def.c index 432b72a97..ced5b0580 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -34,6 +34,7 @@ #include "tuple_compare.h" #include "tuple_extract_key.h" #include "tuple_hash.h" +#include "tuple_hint.h" #include "column_mask.h" #include "schema_def.h" #include "coll_id_cache.h" @@ -135,6 +136,7 @@ key_def_set_func(struct key_def *def) key_def_set_compare_func(def); key_def_set_hash_func(def); key_def_set_extract_func(def); + key_def_set_hint_func(def); } static void diff --git a/src/box/key_def.h b/src/box/key_def.h index dd62f6a35..00775368f 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -153,6 +153,13 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple, typedef uint32_t (*key_hash_t)(const char *key, struct key_def *key_def); +/** @copydoc tuple_hint() */ +typedef uint64_t (*tuple_hint_t)(const struct tuple *tuple, + struct key_def *key_def); + +/** @copydoc key_hint() */ +typedef uint64_t (*key_hint_t)(const char *key, struct key_def *key_def); + /* Definition of a multipart key. */ struct key_def { /** @see tuple_compare() */ @@ -167,6 +174,10 @@ struct key_def { tuple_hash_t tuple_hash; /** @see key_hash() */ key_hash_t key_hash; + /** @see tuple_hint() */ + tuple_hint_t tuple_hint; + /** @see key_hint() */ + key_hint_t key_hint; /** * Minimal part count which always is unique. For example, * if a secondary index is unique, then @@ -624,6 +635,39 @@ key_hash(const char *key, struct key_def *key_def) return key_def->key_hash(key, key_def); } + /* + * Get a comparison hint of a tuple. + * Hint is such a function h(tuple) in terms of particular key_def that + * has follows rules: + * if h(t1) < h(t2) then t1 < t2; + * if h(t1) > h(t2) then t1 > t2; + * if t1 == t2 then h(t1) == h(t2); + * These rules means that instead of direct tuple vs tuple (or tuple vs key) + * comparison one may compare theirs hints first; and only if theirs hints + * are equal compare the tuples themselves. + * @param tuple - tuple to get hint of. + * @param key_def - key_def that defines which comparison is used. + * @return the hint. + */ +static inline uint64_t +tuple_hint(const struct tuple *tuple, struct key_def *key_def) +{ + return key_def->tuple_hint(tuple, key_def); +} + +/** + * Get a comparison hint of a key. + * @See tuple_hint for hint term definition. + * @param key - key to get hint of. + * @param key_def - key_def that defines which comparison is used. + * @return the hint. + */ +static inline uint64_t +key_hint(const char *key, struct key_def *key_def) +{ + return key_def->key_hint(key, key_def); +} + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/memtx_tree.cc b/src/box/memtx_tree.cc index 346585dea..2b8a07f00 100644 --- a/src/box/memtx_tree.cc +++ b/src/box/memtx_tree.cc @@ -36,6 +36,7 @@ #include "memory.h" #include "fiber.h" #include "tuple.h" +#include "tuple_hint.h" #include #include @@ -47,6 +48,11 @@ class MemtxTreeKeyData { const char *key; /** Number of msgpacked search fields. */ uint32_t part_count; + /** + * Comparison hint. Calculated automatically on method + * MemtxTreeKeyData::set. + */ + uint64_t hint; public: /** * Return "payload" data that this object stores: @@ -59,6 +65,16 @@ public: return this->key; } + /** + * Return comparasion hint is calculated before "payload" + * store on MemtxTreeKeyData::set call. + */ + uint64_t + get_hint(void) const + { + return this->hint; + } + /** * Perform this MemtxTreeKeyData object * initialization. @@ -66,9 +82,9 @@ public: void set(const char *key, uint32_t part_count, struct key_def *key_def) { - (void)key_def; this->key = key; this->part_count = part_count; + this->hint = part_count > 0 ? key_hint(key, key_def) : 0; } }; @@ -78,6 +94,11 @@ public: class MemtxTreeData { /** Data tuple pointer. */ struct tuple *tuple; + /** + * Comparison hint. Calculated automatically on method + * MemtxTreeData::set. + */ + uint64_t hint; public: /** * Return "payload" data that this object stores: @@ -95,6 +116,7 @@ public: { (void)key_def; this->tuple = tuple; + this->hint = tuple_hint(tuple, key_def); } /** Clear this MemtxTreeData object. */ @@ -124,7 +146,13 @@ public: int compare(const MemtxTreeData *elem, struct key_def *key_def) const { - return tuple_compare(this->tuple, elem->tuple, key_def); + if (likely(this->hint != elem->hint && + this->hint != INVALID_HINT && + elem->hint != INVALID_HINT)) { + return this->hint < elem->hint ? -1 : 1; + } else { + return tuple_compare(this->tuple, elem->tuple, key_def); + } } /** @@ -137,8 +165,15 @@ public: { uint32_t part_count; const char *key_data = key->get(&part_count); - return tuple_compare_with_key(this->tuple, key_data, part_count, - key_def); + uint64_t key_hint = key->get_hint(); + if (likely(this->hint != key_hint && + this->hint != INVALID_HINT && + key_hint != INVALID_HINT)) { + return this->hint < key_hint ? -1 : 1; + } else { + return tuple_compare_with_key(this->tuple, key_data, + part_count, key_def); + } } }; diff --git a/src/box/tuple_hint.cc b/src/box/tuple_hint.cc new file mode 100644 index 000000000..5acabc985 --- /dev/null +++ b/src/box/tuple_hint.cc @@ -0,0 +1,301 @@ +/* + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "tuple.h" +#include "tuple_hint.h" +#include "lib/coll/coll.h" +#include + +static uint64_t +key_hint_default(const char *key, struct key_def *key_def) +{ + (void)key; + (void)key_def; + return INVALID_HINT; +} + +static uint64_t +tuple_hint_default(const struct tuple *tuple, struct key_def *key_def) +{ + (void)tuple; + (void)key_def; + return INVALID_HINT; +} + +template +static uint64_t +key_hint_uint(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_UNSIGNED); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + assert(mp_typeof(*key) == MP_UINT); + uint64_t val = mp_decode_uint(&key); + if (unlikely(val > INT64_MAX)) + return INT64_MAX; + return val - (uint64_t)INT64_MIN; +} + +template +static uint64_t +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_UNSIGNED); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_uint(field, key_def); +} + +template +static uint64_t +key_hint_int(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_INTEGER); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + if (mp_typeof(*key) == MP_UINT) { + uint64_t val = mp_decode_uint(&key); + if (unlikely(val > INT64_MAX)) + return INT64_MAX; + return val - (uint64_t)INT64_MIN; + } else { + assert(mp_typeof(*key) == MP_INT); + int64_t val = mp_decode_int(&key); + return (uint64_t)val - (uint64_t)INT64_MIN; + } +} + +template +static uint64_t +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_INTEGER); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_int(field, key_def); +} + +template +static uint64_t +key_hint_number(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_NUMBER); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + uint64_t val = 0; + switch (mp_typeof(*key)) { + case MP_FLOAT: + case MP_DOUBLE: { + double f = mp_typeof(*key) == MP_FLOAT ? + mp_decode_float(&key) : mp_decode_double(&key); + if (isnan(f) || isinf(f)) + return INVALID_HINT; + double ival; + (void)modf(f, &ival); + if (unlikely(ival >= (double)INT64_MAX)) + return INT64_MAX; + if (unlikely(ival <= (double)INT64_MIN)) + return 0; + val = (uint64_t)ival; + break; + } + case MP_INT: { + val = (uint64_t)mp_decode_int(&key); + break; + } + case MP_UINT: { + val = mp_decode_uint(&key); + if (val > INT64_MAX) + return INT64_MAX; + break; + } + default: + unreachable(); + } + return val - (uint64_t)INT64_MIN; +} + +template +static uint64_t +tuple_hint_number(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_NUMBER); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_number(field, key_def); +} + +template +static uint64_t +key_hint_boolean(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_BOOLEAN); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + bool val = mp_decode_bool(&key); + return (uint64_t)val - (uint64_t)INT64_MIN; +} + +template +static uint64_t +tuple_hint_boolean(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_BOOLEAN); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_boolean(field, key_def); +} + +template +static uint64_t +key_hint_string(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->coll == NULL); + assert(key_def->parts->type == FIELD_TYPE_STRING); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + assert(mp_typeof(*key) == MP_STR); + uint32_t len; + const unsigned char *str = + (const unsigned char *)mp_decode_str(&key, &len); + uint64_t result = 0; + uint32_t process_len = MIN(len, 8); + for (uint32_t i = 0; i < process_len; i++) { + result <<= 8; + result |= str[i]; + } + result <<= 8 * (8 - process_len); + return result; +} + +template +static uint64_t +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->coll == NULL); + assert(key_def->parts->type == FIELD_TYPE_STRING); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_string(field, key_def); +} + +template +static uint64_t +key_hint_string_coll(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_STRING && + key_def->parts->coll != NULL); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + assert(mp_typeof(*key) == MP_STR); + uint32_t len; + const char *str = mp_decode_str(&key, &len); + return key_def->parts->coll->hint(str, len, key_def->parts->coll); +} + +template +static uint64_t +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_STRING && + key_def->parts->coll != NULL); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_string_coll(field, key_def); +} + +void +key_def_set_hint_func(struct key_def *def) +{ + def->key_hint = key_hint_default; + def->tuple_hint = tuple_hint_default; + bool is_nullable = key_part_is_nullable(def->parts); + if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL) { + def->key_hint = is_nullable ? key_hint_string_coll : + key_hint_string_coll; + def->tuple_hint = is_nullable ? tuple_hint_string_coll : + tuple_hint_string_coll; + return; + } + switch (def->parts->type) { + case FIELD_TYPE_UNSIGNED: + def->key_hint = is_nullable ? key_hint_uint : + key_hint_uint; + def->tuple_hint = is_nullable ? tuple_hint_uint : + tuple_hint_uint; + break; + case FIELD_TYPE_INTEGER: + def->key_hint = is_nullable ? key_hint_int : + key_hint_int; + def->tuple_hint = is_nullable ? tuple_hint_int : + tuple_hint_int; + break; + case FIELD_TYPE_STRING: + def->key_hint = is_nullable ? key_hint_string : + key_hint_string; + def->tuple_hint = is_nullable ? tuple_hint_string : + tuple_hint_string; + break; + case FIELD_TYPE_NUMBER: + def->key_hint = is_nullable ? key_hint_number : + key_hint_number; + def->tuple_hint = is_nullable ? tuple_hint_number : + tuple_hint_number; + break; + case FIELD_TYPE_BOOLEAN: + def->key_hint = is_nullable ? key_hint_boolean : + key_hint_boolean; + def->tuple_hint = is_nullable ? tuple_hint_boolean : + tuple_hint_boolean; + break; + default: + break; + }; +} diff --git a/src/box/tuple_hint.h b/src/box/tuple_hint.h new file mode 100644 index 000000000..19b08dec1 --- /dev/null +++ b/src/box/tuple_hint.h @@ -0,0 +1,60 @@ +#ifndef TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED +#define TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED +/* + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + +#include +#include + +struct key_def; + +/** + * If it is impossible to calculate hint, this value is returned. + * Such hint must not be used for comparisons. + */ +#define INVALID_HINT UINT64_MAX + +/** + * Initialize tuple_hint() and key_hint() functions for the + * key_def. + * @param key_def key definition to set up. + */ +void +key_def_set_hint_func(struct key_def *key_def); + +#if defined(__cplusplus) +} /* extern "C" */ +#endif /* defined(__cplusplus) */ + +#endif /* TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED */ diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c index 6d9c44dbf..fe0ff171f 100644 --- a/src/lib/coll/coll.c +++ b/src/lib/coll/coll.c @@ -133,6 +133,37 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, return s_len; } +/** Get a compare hint of a binary collation. */ +static uint64_t +coll_bin_hint(const char *s, size_t s_len, struct coll *coll) +{ + (void)coll; + uint64_t result = 0; + uint32_t process_len = MIN(s_len, 8); + for (uint32_t i = 0; i < process_len; i++) { + result <<= 8; + result |= ((unsigned char *)s)[i]; + } + result <<= 8 * (8 - process_len); + return result; +} + +/** Get a compare hint of a string using ICU collation. */ +static uint64_t +coll_icu_hint(const char *s, size_t s_len, struct coll *coll) +{ + assert(coll->type == COLL_TYPE_ICU); + UCharIterator itr; + uiter_setUTF8(&itr, s, s_len); + uint8_t buf[8]; + uint32_t state[2] = {0, 0}; + UErrorCode status = U_ZERO_ERROR; + int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf, + sizeof(buf), &status); + assert(got >= 0 && got <= 8); + return coll_bin_hint((const char *)buf, got, NULL); +} + /** * Set up ICU collator and init cmp and hash members of collation. * @param coll Collation to set up. @@ -242,6 +273,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def) } coll->cmp = coll_icu_cmp; coll->hash = coll_icu_hash; + coll->hint = coll_icu_hint; return 0; } @@ -332,6 +364,7 @@ coll_new(const struct coll_def *def) coll->collator = NULL; coll->cmp = coll_bin_cmp; coll->hash = coll_bin_hash; + coll->hint = coll_bin_hint; break; default: unreachable(); diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h index 9c725712a..53133dae3 100644 --- a/src/lib/coll/coll.h +++ b/src/lib/coll/coll.h @@ -47,6 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t, typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, struct coll *coll); +typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll); + struct UCollator; /** @@ -61,6 +63,8 @@ struct coll { /** String comparator. */ coll_cmp_f cmp; coll_hash_f hash; + /** The pointer to routine calculating tuple hint. */ + coll_hint_f hint; /** Reference counter. */ int refs; /** -- 2.21.0