From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v1 3/4] box: introduce tuple compare hint Date: Tue, 5 Feb 2019 14:58:38 +0300 Message-Id: <535f26d85705f8157737bca7ed4978d4c9a016f4.1549367041.git.kshcherbatov@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: Alexandr Lyapunov List-ID: From: Alexandr Lyapunov Implement functions for retrieving tuple hints for a particular key_def. Hint is an integer that can be used for tuple comparison optimization: if a hint of one tuple is less than a hint of another then the first tuple is definitely less than the second; only if hints are equal tuple_compare must be called for getting comparison result. Hints are calculated using only the first part of key_def. Hints are only useful when: * they are precalculated and stored along with the tuple; calculation is not cheap (cheaper than tuple_compare btw) but precalculated hints allow to compare tuples without even fetching tuple data. * first part of key_def is 'string', 'unsigned' or 'integer' * since hint is calculated using only the first part of key_def (and only first several characters if it is a string) this part must be different with high probability for every tuple pair. @kshcherbatov: rebase to 2.1, fix conflicts Needed for #3961 --- src/box/key_def.c | 1 + src/box/key_def.h | 11 ++++ src/box/tuple_compare.cc | 131 +++++++++++++++++++++++++++++++++++++++ src/box/tuple_compare.h | 40 ++++++++++++ src/coll.c | 23 +++++++ src/coll.h | 3 + 6 files changed, 209 insertions(+) diff --git a/src/box/key_def.c b/src/box/key_def.c index 9411ade39..ceca1ebee 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -135,6 +135,7 @@ key_def_set_cmp(struct key_def *def) def->tuple_compare_with_key = tuple_compare_with_key_create(def); tuple_hash_func_set(def); tuple_extract_key_set(def); + tuple_hint_set(def); } static void diff --git a/src/box/key_def.h b/src/box/key_def.h index 85bed92bb..ff86de3ca 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -153,6 +153,13 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple, typedef uint32_t (*key_hash_t)(const char *key, struct key_def *key_def); +/** @copydoc tuple_hint() */ +typedef uint64_t (*tuple_hint_t)(const struct tuple *tuple, + struct key_def *key_def); + +/** @copydoc key_hint() */ +typedef uint64_t (*key_hint_t)(const char *key, struct key_def *key_def); + /* Definition of a multipart key. */ struct key_def { /** @see tuple_compare() */ @@ -167,6 +174,10 @@ struct key_def { tuple_hash_t tuple_hash; /** @see key_hash() */ key_hash_t key_hash; + /** @see tuple_hint() */ + tuple_hint_t tuple_hint; + /** @see key_hint() */ + key_hint_t key_hint; /** * Minimal part count which always is unique. For example, * if a secondary index is unique, then diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index 1d1ab8711..b8f91972c 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -1321,4 +1321,135 @@ tuple_compare_with_key_create(const struct key_def *def) compare_with_key_slowpath_funcs[cmp_func_idx]; } +static uint64_t +key_hint_default(const char *key, struct key_def *key_def) +{ + (void)key; + (void)key_def; + return 0; +} + +static uint64_t +tuple_hint_default(const struct tuple *tuple, struct key_def *key_def) +{ + (void)tuple; + (void)key_def; + return 0; +} + +static uint64_t +key_hint_uint(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_def->parts->type == FIELD_TYPE_UNSIGNED); + assert(mp_typeof(*key) == MP_UINT); + return mp_decode_uint(&key); +} + +static uint64_t +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def) +{ + const char *field = tuple_field_by_part(tuple, key_def->parts); + return key_hint_uint(field, key_def); +} + +static uint64_t +key_hint_int(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_def->parts->type == FIELD_TYPE_INTEGER); + if (mp_typeof(*key) == MP_UINT) { + uint64_t val = mp_decode_uint(&key); + if (val > INT64_MAX) + return INT64_MAX; + return val - (uint64_t)INT64_MIN; + } else { + assert(mp_typeof(*key) == MP_INT); + int64_t val = mp_decode_int(&key); + return (uint64_t)val - (uint64_t)INT64_MIN; + } +} + +static uint64_t +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def) +{ + const char *field = tuple_field_by_part(tuple, key_def->parts); + return key_hint_int(field, key_def); +} + +static uint64_t +key_hint_string(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_def->parts->type == FIELD_TYPE_STRING); + assert(mp_typeof(*key) == MP_STR); + uint32_t len; + const unsigned char *str = + (const unsigned char *)mp_decode_str(&key, &len); + uint64_t result = 0; + uint32_t process_len = MIN(len, 7); + for (uint32_t i = 0; i < process_len; i++) { + result <<= 9; + result |= 0x100; + result |= str[i]; + } + result <<= 9 * (7 - process_len); + return result; +} + +static uint64_t +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def) +{ + const char *field = tuple_field_by_part(tuple, key_def->parts); + return key_hint_string(field, key_def); +} + +static uint64_t +key_hint_string_coll(const char *key, struct key_def *key_def) +{ + assert(key_def->parts->type == FIELD_TYPE_STRING); + assert(mp_typeof(*key) == MP_STR); + assert(key_def->parts->coll != NULL); + uint32_t len; + const char *str = mp_decode_str(&key, &len); + return key_def->parts->coll->hint(str, len, key_def->parts->coll); +} + +static uint64_t +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def) +{ + const char *field = tuple_field_by_part(tuple, key_def->parts); + return key_hint_string_coll(field, key_def); +} + +void +tuple_hint_set(struct key_def *def) +{ + def->key_hint = key_hint_default; + def->tuple_hint = tuple_hint_default; + if (key_part_is_nullable(def->parts)) + return; + if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL) { + def->key_hint = key_hint_string_coll; + def->tuple_hint = tuple_hint_string_coll; + return; + } + switch (def->parts->type) { + case FIELD_TYPE_UNSIGNED: + def->key_hint = key_hint_uint; + def->tuple_hint = tuple_hint_uint; + break; + case FIELD_TYPE_INTEGER: + def->key_hint = key_hint_int; + def->tuple_hint = tuple_hint_int; + break; + case FIELD_TYPE_STRING: + def->key_hint = key_hint_string; + def->tuple_hint = tuple_hint_string; + break; + default: + break; + }; +} + /* }}} tuple_compare_with_key */ diff --git a/src/box/tuple_compare.h b/src/box/tuple_compare.h index e3a63204f..be96ba38c 100644 --- a/src/box/tuple_compare.h +++ b/src/box/tuple_compare.h @@ -67,6 +67,46 @@ tuple_compare_create(const struct key_def *key_def); tuple_compare_with_key_t tuple_compare_with_key_create(const struct key_def *key_def); +/** + * Get a comparison hint of a tuple. + * Hint is such a function h(tuple) in terms of particular key_def that + * has follows rules: + * if h(t1) < h(t2) then t1 < t2; + * if h(t1) > h(t2) then t1 > t2; + * if t1 == t2 then h(t1) == h(t2); + * These rules means that instead of direct tuple vs tuple (or tuple vs key) + * comparison one may compare theirs hints first; and only if theirs hints + * are equal compare the tuples themselves. + * @param tuple - tuple to get hint of. + * @param key_def - key_def that defines which comparison is used. + * @return the hint. + */ +static inline uint64_t +tuple_hint(const struct tuple *tuple, struct key_def *key_def) +{ + return key_def->tuple_hint(tuple, key_def); +} + +/** + * Get a comparison hint of a key. + * @See tuple_hint for hint term definition. + * @param key - key to get hint of. + * @param key_def - key_def that defines which comparison is used. + * @return the hint. + */ +static inline uint64_t +key_hint(const char *key, struct key_def *key_def) +{ + return key_def->key_hint(key, key_def); +} + +/** + * Initialize tuple_hint() and key_hint() functions for the key_def. + * @param key_def key definition to set up. + */ +void +tuple_hint_set(struct key_def *key_def); + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/coll.c b/src/coll.c index 6d9c44dbf..cff2899cd 100644 --- a/src/coll.c +++ b/src/coll.c @@ -133,6 +133,28 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, return s_len; } +/** Get a compare hint of a string using ICU collation. */ +static uint64_t +coll_icu_hint(const char *s, size_t s_len, struct coll *coll) +{ + UCharIterator itr; + uiter_setUTF8(&itr, s, s_len); + uint8_t buf[7]; + uint32_t state[2] = {0, 0}; + UErrorCode status = U_ZERO_ERROR; + int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf, + sizeof(buf), &status); + assert(got >= 0 && got <= 7); + uint64_t result = 0; + for (int32_t i = 0; i < got; i++) { + result <<= 9; + result |= 0x100; + result |= buf[i]; + } + result <<= 9 * (7 - got); + return result; +} + /** * Set up ICU collator and init cmp and hash members of collation. * @param coll Collation to set up. @@ -242,6 +264,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def) } coll->cmp = coll_icu_cmp; coll->hash = coll_icu_hash; + coll->hint = coll_icu_hint; return 0; } diff --git a/src/coll.h b/src/coll.h index 9c725712a..4030d38de 100644 --- a/src/coll.h +++ b/src/coll.h @@ -47,6 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t, typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, struct coll *coll); +typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll); + struct UCollator; /** @@ -61,6 +63,7 @@ struct coll { /** String comparator. */ coll_cmp_f cmp; coll_hash_f hash; + coll_hint_f hint; /** Reference counter. */ int refs; /** -- 2.20.1