From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Subject: Re: [tarantool-patches] [PATCH v2 4/4] box: introduce tuple compare hint for string index From: Kirill Shcherbatov Reply-To: tarantool-patches@freelists.org References: <48f09c5c8f53bfa63a52da90ffe4672ade7ad9ae.1550050245.git.kshcherbatov@tarantool.org> Message-ID: <414fffe7-3d58-fd42-3334-d7072c78afb4@tarantool.org> Date: Fri, 15 Feb 2019 21:34:27 +0300 MIME-Version: 1.0 In-Reply-To: <48f09c5c8f53bfa63a52da90ffe4672ade7ad9ae.1550050245.git.kshcherbatov@tarantool.org> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit To: tarantool-patches@freelists.org, kostja@tarantool.org, vdavydov.dev@gmail.com Cc: Alexandr Lyapunov List-ID: Hi! I've implemented Kostya's concepts for compatible hints for UINT/INT. Also fixed nullability troubles. Now alter looks fine for me. I've thought about number/integer convertions, but such alter would not pass, I guess. ========================================= Implement functions for retrieving tuple hints for a particular key_def. Hint is an integer that can be used for tuple comparison optimization: if a hint of one tuple is less than a hint of another then the first tuple is definitely less than the second; only if hints are equal tuple_compare must be called for getting comparison result. Hints are calculated using only the first part of key_def. Hints are only useful when: * they are precalculated and stored along with the tuple; calculation is not cheap (cheaper than tuple_compare btw) but precalculated hints allow to compare tuples without even fetching tuple data. * first part of key_def is 'string'(for now) * since hint is calculated using only the first part of key_def (and only first several characters if it is a string) this part must be different with high probability for every tuple pair. Part of #3961 --- src/box/key_def.c | 1 + src/box/key_def.h | 44 +++++++++ src/box/memtx_tree_decl.c | 125 ++++++++++++++++++++++++++ src/box/tuple_compare.cc | 182 ++++++++++++++++++++++++++++++++++++++ src/box/tuple_compare.h | 7 ++ src/coll.c | 45 ++++++++++ src/coll.h | 4 + 7 files changed, 408 insertions(+) diff --git a/src/box/key_def.c b/src/box/key_def.c index 9411ade39..ceca1ebee 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -135,6 +135,7 @@ key_def_set_cmp(struct key_def *def) def->tuple_compare_with_key = tuple_compare_with_key_create(def); tuple_hash_func_set(def); tuple_extract_key_set(def); + tuple_hint_set(def); } static void diff --git a/src/box/key_def.h b/src/box/key_def.h index bf3e63b6d..a2a0bbecb 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -153,6 +153,13 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple, typedef uint32_t (*key_hash_t)(const char *key, struct key_def *key_def); +/** @copydoc tuple_hint() */ +typedef uint64_t (*tuple_hint_t)(const struct tuple *tuple, + struct key_def *key_def); + +/** @copydoc key_hint() */ +typedef uint64_t (*key_hint_t)(const char *key, struct key_def *key_def); + /* Definition of a multipart key. */ struct key_def { /** @see tuple_compare() */ @@ -167,6 +174,10 @@ struct key_def { tuple_hash_t tuple_hash; /** @see key_hash() */ key_hash_t key_hash; + /** @see tuple_hint() */ + tuple_hint_t tuple_hint; + /** @see key_hint() */ + key_hint_t key_hint; /** * Minimal part count which always is unique. For example, * if a secondary index is unique, then @@ -551,6 +562,39 @@ tuple_compare_with_key(const struct tuple *tuple, const char *key, return key_def->tuple_compare_with_key(tuple, key, part_count, key_def); } +/** + * Get a comparison hint of a tuple. + * Hint is such a function h(tuple) in terms of particular key_def that + * has follows rules: + * if h(t1) < h(t2) then t1 < t2; + * if h(t1) > h(t2) then t1 > t2; + * if t1 == t2 then h(t1) == h(t2); + * These rules means that instead of direct tuple vs tuple (or tuple vs key) + * comparison one may compare theirs hints first; and only if theirs hints + * are equal compare the tuples themselves. + * @param tuple - tuple to get hint of. + * @param key_def - key_def that defines which comparison is used. + * @return the hint. + */ +static inline uint64_t +tuple_hint(const struct tuple *tuple, struct key_def *key_def) +{ + return key_def->tuple_hint(tuple, key_def); +} + +/** + * Get a comparison hint of a key. + * @See tuple_hint for hint term definition. + * @param key - key to get hint of. + * @param key_def - key_def that defines which comparison is used. + * @return the hint. + */ +static inline uint64_t +key_hint(const char *key, struct key_def *key_def) +{ + return key_def->key_hint(key, key_def); +} + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/memtx_tree_decl.c b/src/box/memtx_tree_decl.c index 084a0594f..55016148b 100644 --- a/src/box/memtx_tree_decl.c +++ b/src/box/memtx_tree_decl.c @@ -77,8 +77,133 @@ struct memtx_tuple_tree_key_data { /* }}} */ +/* {{{ Memtx hinted and hint_only tree class. *******************/ + +/** + * Struct that is used as a key in BPS tree definition in + * memtx_hint_only_tree and memtx_hinted_tree. +*/ +struct memtx_hinted_tree_key_data { + /** Sequence of msgpacked search fields. */ + const char *key; + /** Number of msgpacked search fields. */ + uint32_t part_count; + /** + * Compare hint. Is calculated automatically on 'set' + * operation with memtx_hinted_tree_key_data_set(). + */ + uint64_t hint; +}; + +/** + * Struct that is used as a key in BPS tree definition in + * memtx_hint_only_tree and memtx_hinted_tree. + */ +struct memtx_hinted_tree_data { + /** Tuple this node is representing. */ + struct tuple *tuple; + /** + * Compare hint. Is calculated automatically on 'set' + * operation with memtx_hinted_tree_data_set(). + */ + uint64_t hint; +}; + +/** + * Compare memtx_hinted_tree records. + */ +static int +memtx_hinted_tree_data_cmp(struct memtx_hinted_tree_data *a, + struct memtx_hinted_tree_data *b, + struct key_def *key_def) +{ + if (a->hint != b->hint) + return a->hint < b->hint ? -1 : 1; + return tuple_compare(a->tuple, b->tuple, key_def); +} + +/** + * Compare memtx_hinted_tree record with key. + */ +static int +memtx_hinted_tree_data_cmp_with_key(struct memtx_hinted_tree_data *a, + struct memtx_hinted_tree_key_data *key, + struct key_def *key_def) +{ + if (a->hint != key->hint) + return a->hint < key->hint ? -1 : 1; + return tuple_compare_with_key(a->tuple, key->key, key->part_count, + key_def); +} + +/** + * Initialize memtx_hinted_tree or memtx_hint_only_tree record + * with tuple and recalculate internal hint field. + */ +static void +memtx_hinted_tree_data_set(struct memtx_hinted_tree_data *data, + struct tuple *tuple, struct key_def *key_def) +{ + data->tuple = tuple; + data->hint = tuple_hint(tuple, key_def); +} + +/** + * Initialize memtx_hinted_tree or memtx_hint_only_tree key with + * key raw and part count and recalculate internal hint field. + */ +static void +memtx_hinted_tree_key_data_set(struct memtx_hinted_tree_key_data *key_data, + const char *key, uint32_t part_count, + struct key_def *key_def) +{ + key_data->key = key; + key_data->part_count = part_count; + key_data->hint = key != NULL && part_count > 0 ? + key_hint(key, key_def) : 0; +} + +#define MEMTX_TREE_NAME memtx_hinted_tree +#define memtx_tree_elem struct memtx_hinted_tree_data +#define memtx_tree_key struct memtx_hinted_tree_key_data +#define MEMTX_TREE_ELEM_EQUAL(elem_a_ptr, elem_b_ptr) \ + ((elem_a_ptr)->tuple == (elem_b_ptr)->tuple) +#define MEMTX_TREE_ELEM_CMP(elem_a_ptr, elem_b_ptr, key_def) \ + memtx_hinted_tree_data_cmp(elem_a_ptr, elem_b_ptr, key_def) +#define MEMTX_TREE_ELEM_WITH_KEY_CMP(elem_ptr, key_ptr, key_def) \ + memtx_hinted_tree_data_cmp_with_key(elem_ptr, key_ptr, key_def) +#define MEMTX_TREE_ELEM_SET(elem_ptr, tuple, key_def) \ + memtx_hinted_tree_data_set(elem_ptr, tuple, key_def) +#define MEMTX_TREE_KEY_SET(key_ptr, key_val, part_count_val, key_def) \ + memtx_hinted_tree_key_data_set(key_ptr, key_val, part_count_val, \ + key_def) +#define MEMTX_TREE_ELEM_GET(elem_ptr) ((elem_ptr)->tuple) +#define MEMTX_TREE_KEY_GET(key_ptr, part_count_ptr) \ + ({*part_count_ptr = (key_ptr)->part_count; (key_ptr)->key;}) + +#include "memtx_tree_impl.h" + +#undef memtx_tree_key +#undef memtx_tree_elem +#undef MEMTX_TREE_KEY_GET +#undef MEMTX_TREE_ELEM_GET +#undef MEMTX_TREE_KEY_SET +#undef MEMTX_TREE_ELEM_SET +#undef MEMTX_TREE_ELEM_WITH_KEY_CMP +#undef MEMTX_TREE_ELEM_CMP +#undef MEMTX_TREE_ELEM_EQUAL +#undef memtx_tree_key +#undef memtx_tree_elem +#undef MEMTX_TREE_NAME + +/* }}} */ + struct index * memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def) { + if (def->cmp_def->parts->type == FIELD_TYPE_STRING || + def->cmp_def->parts->type == FIELD_TYPE_UNSIGNED || + def->cmp_def->parts->type == FIELD_TYPE_INTEGER) + return memtx_hinted_tree_index_new(memtx, def); return memtx_tuple_tree_index_new(memtx, def); } diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index ded802c7d..6ebf625a4 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -1321,4 +1321,186 @@ tuple_compare_with_key_create(const struct key_def *def) compare_with_key_slowpath_funcs[cmp_func_idx]; } +static uint64_t +key_hint_default(const char *key, struct key_def *key_def) +{ + (void)key; + (void)key_def; + return 0; +} + +static uint64_t +tuple_hint_default(const struct tuple *tuple, struct key_def *key_def) +{ + (void)tuple; + (void)key_def; + return 0; +} + +template +static uint64_t +key_hint_uint(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + assert(mp_typeof(*key) == MP_UINT); + uint64_t val = mp_decode_uint(&key); + if (val > INT64_MAX) + return INT64_MAX; + return val - (uint64_t)INT64_MIN; +} + +template +static uint64_t +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_uint(field, key_def); +} + +template +static uint64_t +key_hint_int(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + if (mp_typeof(*key) == MP_UINT) { + return key_hint_uint(key, key_def); + } else { + assert(key_def->parts->type == FIELD_TYPE_INTEGER); + assert(mp_typeof(*key) == MP_INT); + int64_t val = mp_decode_int(&key); + return (uint64_t)val - (uint64_t)INT64_MIN; + } +} + +template +static uint64_t +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_int(field, key_def); +} + +template +static uint64_t +key_hint_string(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + assert(key_def->parts->type == FIELD_TYPE_STRING); + assert(mp_typeof(*key) == MP_STR); + uint32_t len; + const unsigned char *str = + (const unsigned char *)mp_decode_str(&key, &len); + uint64_t result = 0; + /* + * Map the sequence of characters in the hint's bytes in + * the reverse order: the first character will be mapped + * in the highest byte of the number, etc. + * One additional bit encodes the presence of the next + * sign. + * 0bCHARBYTEU CHARBYTEU CHARBYTEU ... CHARBYTE0 + * The sequence constructed in this way provides a + * comparison transitivity of strings no longer than 7 + * bytes. + */ + uint32_t process_len = MIN(len, 7); + for (uint32_t i = 0; i < process_len; i++) { + result <<= 9; + result |= 0x100; + result |= str[i]; + } + /* + * If the length of the string is less than the 64-bit + * hint can accommodate, the insignificant positions are + * filled with 0. + */ + result <<= 9 * (7 - process_len); + return result; +} + +template +static uint64_t +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_string(field, key_def); +} + +template +static uint64_t +key_hint_string_coll(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + if (is_nullable && mp_typeof(*key) == MP_NIL) + return 0; + assert(key_def->parts->type == FIELD_TYPE_STRING); + assert(mp_typeof(*key) == MP_STR); + assert(key_def->parts->coll != NULL); + uint32_t len; + const char *str = mp_decode_str(&key, &len); + return key_def->parts->coll->hint(str, len, key_def->parts->coll); +} + +template +static uint64_t +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return 0; + return key_hint_string_coll(field, key_def); +} + +void +tuple_hint_set(struct key_def *def) +{ + def->key_hint = key_hint_default; + def->tuple_hint = tuple_hint_default; + bool is_nullable = key_part_is_nullable(def->parts); + if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL) { + def->key_hint = is_nullable ? key_hint_string_coll : + key_hint_string_coll; + def->tuple_hint = is_nullable ? tuple_hint_string_coll : + tuple_hint_string_coll; + return; + } + switch (def->parts->type) { + case FIELD_TYPE_UNSIGNED: + def->key_hint = is_nullable ? key_hint_uint : + key_hint_uint; + def->tuple_hint = is_nullable ? tuple_hint_uint : + tuple_hint_uint; + break; + case FIELD_TYPE_INTEGER: + def->key_hint = is_nullable ? key_hint_int : + key_hint_int; + def->tuple_hint = is_nullable ? tuple_hint_int : + tuple_hint_int; + break; + case FIELD_TYPE_STRING: + def->key_hint = is_nullable ? key_hint_string : + key_hint_string; + def->tuple_hint = is_nullable ? tuple_hint_string : + tuple_hint_string; + break; + default: + break; + }; +} + /* }}} tuple_compare_with_key */ diff --git a/src/box/tuple_compare.h b/src/box/tuple_compare.h index e3a63204f..405460086 100644 --- a/src/box/tuple_compare.h +++ b/src/box/tuple_compare.h @@ -67,6 +67,13 @@ tuple_compare_create(const struct key_def *key_def); tuple_compare_with_key_t tuple_compare_with_key_create(const struct key_def *key_def); +/** + * Initialize tuple_hint() and key_hint() functions for the key_def. + * @param key_def key definition to set up. + */ +void +tuple_hint_set(struct key_def *key_def); + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/coll.c b/src/coll.c index 6d9c44dbf..0e7c8ec92 100644 --- a/src/coll.c +++ b/src/coll.c @@ -33,6 +33,7 @@ #include "third_party/PMurHash.h" #include "diag.h" #include "assoc.h" +#include "limits.h" #include #include @@ -133,6 +134,48 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, return s_len; } +/** Get a compare hint of a string using ICU collation. */ +static uint64_t +coll_icu_hint(const char *s, size_t s_len, struct coll *coll) +{ + UCharIterator itr; + uiter_setUTF8(&itr, s, s_len); + uint8_t buf[7]; + uint32_t state[2] = {0, 0}; + UErrorCode status = U_ZERO_ERROR; + int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf, + sizeof(buf), &status); + assert(got >= 0 && got <= 7); + uint64_t result = 0; + for (int32_t i = 0; i < got; i++) { + result <<= 9; + result |= 0x100; + result |= buf[i]; + } + result <<= 9 * (7 - got); + return result; +} + +/** + * Get a compare hint of a string using BINARY collation. + * Works similar to key_hint_string, read it's comment for more + * details. + */ +static uint64_t +coll_bin_hint(const char *s, size_t s_len, struct coll *coll) +{ + (void)coll; + uint64_t result = 0; + uint32_t process_len = MIN(s_len, 7); + for (uint32_t i = 0; i < process_len; i++) { + result <<= 9; + result |= 0x100; + result |= s[i]; + } + result <<= 9 * (7 - process_len); + return result; +} + /** * Set up ICU collator and init cmp and hash members of collation. * @param coll Collation to set up. @@ -242,6 +285,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def) } coll->cmp = coll_icu_cmp; coll->hash = coll_icu_hash; + coll->hint = coll_icu_hint; return 0; } @@ -332,6 +376,7 @@ coll_new(const struct coll_def *def) coll->collator = NULL; coll->cmp = coll_bin_cmp; coll->hash = coll_bin_hash; + coll->hint = coll_bin_hint; break; default: unreachable(); diff --git a/src/coll.h b/src/coll.h index 9c725712a..53133dae3 100644 --- a/src/coll.h +++ b/src/coll.h @@ -47,6 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t, typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, struct coll *coll); +typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll); + struct UCollator; /** @@ -61,6 +63,8 @@ struct coll { /** String comparator. */ coll_cmp_f cmp; coll_hash_f hash; + /** The pointer to routine calculating tuple hint. */ + coll_hint_f hint; /** Reference counter. */ int refs; /** -- 2.20.1