From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v5 2/4] memtx: introduce tuple compare hint Date: Thu, 7 Mar 2019 12:44:06 +0300 Message-Id: <53e165cf566611452821c692d79d621628e839ff.1551951540.git.kshcherbatov@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: Kirill Shcherbatov List-ID: Implement functions for retrieving tuple hints for a particular key_def. Hint is an integer that can be used for tuple comparison optimization: if a hint of one tuple is less than a hint of another then the first tuple is definitely less than the second; only if hints are equal tuple_compare must be called for getting comparison result. Hints are calculated using only the first part of key_def. Hints are only useful when: * they are precalculated and stored along with the tuple; calculation is not cheap (cheaper than tuple_compare btw) but precalculated hints allow to compare tuples without even fetching tuple data. * first part of key_def is 'string'(for now) * since hint is calculated using only the first part of key_def (and only first several characters if it is a string) this part must be different with high probability for every tuple pair. Closes #3961 --- src/box/key_def.c | 1 + src/box/key_def.h | 119 ++++++++++++ src/box/memtx_tree.c | 32 +++- src/box/tuple_compare.cc | 381 +++++++++++++++++++++++++++++++++++++++ src/box/tuple_compare.h | 7 + src/lib/coll/coll.c | 33 ++++ src/lib/coll/coll.h | 4 + 7 files changed, 567 insertions(+), 10 deletions(-) diff --git a/src/box/key_def.c b/src/box/key_def.c index d4c979aa1..771c30172 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -136,6 +136,7 @@ key_def_set_func(struct key_def *def) key_def_set_compare_func(def); key_def_set_hash_func(def); key_def_set_extract_func(def); + key_def_set_cmp_aux_func(def); } static void diff --git a/src/box/key_def.h b/src/box/key_def.h index dd62f6a35..c630e6b43 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -115,6 +115,28 @@ struct key_part { struct key_def; struct tuple; +/** Comparasion auxiliary information. */ +typedef union { + /** + * Hint is a value h(t) is calculated for tuple in terms + * of particular key_def that has follows rules: + * if h(t1) < h(t2) then t1 < t2; + * if h(t1) > h(t2) then t1 > t2; + * if t1 == t2 then h(t1) == h(t2); + * These rules means that instead of direct tuple vs tuple + * (or tuple vs key) comparison one may compare theirs + * hints first; and only if theirs hints are equal compare + * the tuples themselves. + */ + uint64_t hint; +} cmp_aux_t; + +/** Test if cmp_aux_t a and b are equal. */ +static inline bool +cmp_aux_equal(cmp_aux_t a, cmp_aux_t b) +{ + return a.hint == b.hint; +} /** * Get is_nullable property of key_part. @@ -137,6 +159,18 @@ typedef int (*tuple_compare_with_key_t)(const struct tuple *tuple_a, typedef int (*tuple_compare_t)(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_def *key_def); +/** @copydoc tuple_aux_compare_with_key() */ +typedef int (*tuple_aux_compare_with_key_t)(const struct tuple *tuple, + cmp_aux_t tuple_cmp_aux, + const char *key, uint32_t part_count, + cmp_aux_t key_cmp_aux, + struct key_def *key_def); +/** @copydoc tuple_aux_compare() */ +typedef int (*tuple_aux_compare_t)(const struct tuple *tuple_a, + cmp_aux_t tuple_a_cmp_aux, + const struct tuple *tuple_b, + cmp_aux_t tuple_b_cmp_aux, + struct key_def *key_def); /** @copydoc tuple_extract_key() */ typedef char *(*tuple_extract_key_t)(const struct tuple *tuple, struct key_def *key_def, @@ -153,12 +187,23 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple, typedef uint32_t (*key_hash_t)(const char *key, struct key_def *key_def); +/** @copydoc tuple_cmp_aux() */ +typedef cmp_aux_t (*tuple_cmp_aux_t)(const struct tuple *tuple, + struct key_def *key_def); + +/** @copydoc key_cmp_aux() */ +typedef cmp_aux_t (*key_cmp_aux_t)(const char *key, struct key_def *key_def); + /* Definition of a multipart key. */ struct key_def { /** @see tuple_compare() */ tuple_compare_t tuple_compare; /** @see tuple_compare_with_key() */ tuple_compare_with_key_t tuple_compare_with_key; + /** @see tuple_aux_compare_with_key() */ + tuple_aux_compare_with_key_t tuple_aux_compare_with_key; + /** @see tuple_aux_compare() */ + tuple_aux_compare_t tuple_aux_compare; /** @see tuple_extract_key() */ tuple_extract_key_t tuple_extract_key; /** @see tuple_extract_key_raw() */ @@ -167,6 +212,10 @@ struct key_def { tuple_hash_t tuple_hash; /** @see key_hash() */ key_hash_t key_hash; + /** @see tuple_cmp_aux() */ + tuple_cmp_aux_t tuple_cmp_aux; + /** @see key_cmp_aux() */ + key_cmp_aux_t key_cmp_aux; /** * Minimal part count which always is unique. For example, * if a secondary index is unique, then @@ -571,6 +620,52 @@ tuple_compare_with_key(const struct tuple *tuple, const char *key, return key_def->tuple_compare_with_key(tuple, key, part_count, key_def); } +/** + * Compare tuples using the key definition and comparasion + * auxillary information. + * @param tuple_a First tuple. + * @param tuple_a_cmp_aux Comparasion auxiliary information + * for the tuple_a. + * @param tuple_b Second tuple. + * @param tuple_b_cmp_aux Comparasion auxilary information + * for the tuple_b. + * @param key_def Key definition. + * @retval 0 if key_fields(tuple_a) == key_fields(tuple_b) + * @retval <0 if key_fields(tuple_a) < key_fields(tuple_b) + * @retval >0 if key_fields(tuple_a) > key_fields(tuple_b) + */ +static inline int +tuple_aux_compare(const struct tuple *tuple_a, cmp_aux_t tuple_a_cmp_aux, + const struct tuple *tuple_b, cmp_aux_t tuple_b_cmp_aux, + struct key_def *key_def) +{ + return key_def->tuple_aux_compare(tuple_a, tuple_a_cmp_aux, tuple_b, + tuple_b_cmp_aux, key_def); +} + +/** + * Compare tuple with key using the key definition and + * comparasion auxilary information. + * @param tuple tuple + * @param tuple_cmp_aux tuple compare auxiliary information. + * @param key key parts without MessagePack array header + * @param part_count the number of parts in @a key + * @param key_cmp_aux t Key compare auxiliary information. + * @param key_def key definition + * @retval 0 if key_fields(tuple) == parts(key) + * @retval <0 if key_fields(tuple) < parts(key) + * @retval >0 if key_fields(tuple) > parts(key) + */ +static inline int +tuple_aux_compare_with_key(const struct tuple *tuple, cmp_aux_t tuple_cmp_aux, + const char *key, uint32_t part_count, + cmp_aux_t key_cmp_aux, struct key_def *key_def) +{ + return key_def->tuple_aux_compare_with_key(tuple, tuple_cmp_aux, key, + part_count, key_cmp_aux, + key_def); +} + /** * Compute hash of a tuple field. * @param ph1 - pointer to running hash @@ -624,6 +719,30 @@ key_hash(const char *key, struct key_def *key_def) return key_def->key_hash(key, key_def); } + /* + * Get a comparison auxiliary information for a tuple. + * @param tuple - tuple to get cmp_aux_t of. + * @param key_def - key_def that defines which comparison is used. + * @return the comparison auxiliary information. + */ +static inline cmp_aux_t +tuple_cmp_aux(const struct tuple *tuple, struct key_def *key_def) +{ + return key_def->tuple_cmp_aux(tuple, key_def); +} + +/** + * Get a comparison hint of a key. + * @param key - key to get hint of. + * @param key_def - key_def that defines which comparison is used. + * @return the comparison auxiliary information. + */ +static inline cmp_aux_t +key_cmp_aux(const char *key, struct key_def *key_def) +{ + return key_def->key_cmp_aux(key, key_def); +} + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c index 688f09597..4b0886bef 100644 --- a/src/box/memtx_tree.c +++ b/src/box/memtx_tree.c @@ -47,6 +47,8 @@ struct memtx_tree_key_data { const char *key; /** Number of msgpacked search fields. */ uint32_t part_count; + /** Comparison auxilary information. */ + cmp_aux_t aux_info; }; /** @@ -55,6 +57,8 @@ struct memtx_tree_key_data { struct memtx_tree_data { /* Tuple that this node is represents. */ struct tuple *tuple; + /** Comparison auxilary information. */ + cmp_aux_t aux_info; }; /** @@ -69,7 +73,7 @@ static bool memtx_tree_data_identical(const struct memtx_tree_data *a, const struct memtx_tree_data *b) { - return a->tuple == b->tuple; + return a->tuple == b->tuple && cmp_aux_equal(a->aux_info, b->aux_info); } /** @@ -91,6 +95,7 @@ memtx_tree_data_set(struct memtx_tree_data *data, struct tuple *tuple, { (void)key_def; data->tuple = tuple; + data->aux_info = tuple_cmp_aux(tuple, key_def); } /** @@ -103,15 +108,18 @@ memtx_tree_key_data_set(struct memtx_tree_key_data *key_data, const char *key, (void)key_def; key_data->key = key; key_data->part_count = part_count; + key_data->aux_info = key_cmp_aux(key, key_def); } #define BPS_TREE_NAME memtx_tree #define BPS_TREE_BLOCK_SIZE (512) #define BPS_TREE_EXTENT_SIZE MEMTX_EXTENT_SIZE #define BPS_TREE_COMPARE(a, b, arg)\ - tuple_compare((&a)->tuple, (&b)->tuple, arg) + tuple_aux_compare((&a)->tuple, (&a)->aux_info, (&b)->tuple,\ + (&b)->aux_info, arg) #define BPS_TREE_COMPARE_KEY(a, b, arg)\ - tuple_compare_with_key((&a)->tuple, (b)->key, (b)->part_count, arg) + tuple_aux_compare_with_key((&a)->tuple, (&a)->aux_info, (b)->key,\ + (b)->part_count, (b)->aux_info, arg) #define BPS_TREE_IDENTICAL(a, b) memtx_tree_data_identical(&a, &b) #define bps_tree_elem_t struct memtx_tree_data #define bps_tree_key_t struct memtx_tree_key_data * @@ -146,7 +154,8 @@ memtx_tree_qcompare(const void* a, const void *b, void *c) const struct memtx_tree_data *data_a = a; const struct memtx_tree_data *data_b = b; struct key_def *key_def = c; - return tuple_compare(data_a->tuple, data_b->tuple, key_def); + return tuple_aux_compare(data_a->tuple, data_a->aux_info, data_b->tuple, + data_b->aux_info, key_def); } /* {{{ MemtxTree Iterators ****************************************/ @@ -268,9 +277,10 @@ tree_iterator_next_equal(struct iterator *iterator, struct tuple **ret) memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator); /* Use user key def to save a few loops. */ if (res == NULL || - tuple_compare_with_key(res->tuple, it->key_data.key, - it->key_data.part_count, - it->index_def->key_def) != 0) { + tuple_aux_compare_with_key(res->tuple, res->aux_info, + it->key_data.key, it->key_data.part_count, + it->key_data.aux_info, + it->index_def->key_def) != 0) { iterator->next = tree_iterator_dummie; memtx_tree_data_clear(&it->current); *ret = NULL; @@ -299,9 +309,10 @@ tree_iterator_prev_equal(struct iterator *iterator, struct tuple **ret) memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator); /* Use user key def to save a few loops. */ if (res == NULL || - tuple_compare_with_key(res->tuple, it->key_data.key, - it->key_data.part_count, - it->index_def->key_def) != 0) { + tuple_aux_compare_with_key(res->tuple, res->aux_info, + it->key_data.key, it->key_data.part_count, + it->key_data.aux_info, + it->index_def->key_def) != 0) { iterator->next = tree_iterator_dummie; memtx_tree_data_clear(&it->current); *ret = NULL; @@ -549,6 +560,7 @@ memtx_tree_index_get(struct index *base, const char *key, assert(base->def->opts.is_unique && part_count == base->def->key_def->part_count); struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct memtx_tree_key_data key_data; struct key_def *cmp_def = memtx_tree_index_cmp_def(index); memtx_tree_key_data_set(&key_data, key, part_count, cmp_def); diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index cf4519ccb..5b06e06b3 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -1323,9 +1323,390 @@ tuple_compare_with_key_create(const struct key_def *def) /* }}} tuple_compare_with_key */ +/* {{{ tuple_aux_compare */ + +#define CMP_HINT_INVALID ((uint64_t)UINT64_MAX) + +static int +tuple_hinted_compare(const struct tuple *tuple_a, cmp_aux_t tuple_a_cmp_aux, + const struct tuple *tuple_b, cmp_aux_t tuple_b_cmp_aux, + struct key_def *key_def) +{ + uint64_t tuple_a_hint = tuple_a_cmp_aux.hint; + uint64_t tuple_b_hint = tuple_b_cmp_aux.hint; + if (likely(tuple_a_hint != tuple_b_hint && + tuple_a_hint != CMP_HINT_INVALID && + tuple_b_hint != CMP_HINT_INVALID)) + return tuple_a_hint < tuple_b_hint ? -1 : 1; + else + return tuple_compare(tuple_a, tuple_b, key_def); +} + +static tuple_aux_compare_t +tuple_aux_compare_create(const struct key_def *def) +{ + (void)def; + return tuple_hinted_compare; +} + +/* }}} tuple_aux_compare */ + +/* {{{ tuple_aux_compare_with_key */ + +static int +tuple_hinted_compare_with_key(const struct tuple *tuple, cmp_aux_t tuple_cmp_aux, + const char *key, uint32_t part_count, + cmp_aux_t key_cmp_aux, struct key_def *key_def) +{ + uint64_t tuple_hint = tuple_cmp_aux.hint; + uint64_t key_hint = key_cmp_aux.hint; + if (likely(tuple_hint != key_hint && tuple_hint != CMP_HINT_INVALID && + key_hint != CMP_HINT_INVALID)) + return tuple_hint < key_hint ? -1 : 1; + else + return tuple_compare_with_key(tuple, key, part_count, key_def); +} + +static tuple_aux_compare_with_key_t +tuple_aux_compare_with_key_create(const struct key_def *def) +{ + (void)def; + return tuple_hinted_compare_with_key; +} + +/* }}} tuple_aux_compare_with_key */ + void key_def_set_compare_func(struct key_def *def) { def->tuple_compare = tuple_compare_create(def); def->tuple_compare_with_key = tuple_compare_with_key_create(def); + def->tuple_aux_compare = tuple_aux_compare_create(def); + def->tuple_aux_compare_with_key = + tuple_aux_compare_with_key_create(def); +} + +/* Tuple hints */ + +static cmp_aux_t +key_hint_default(const char *key, struct key_def *key_def) +{ + (void)key; + (void)key_def; + cmp_aux_t ret; + ret.hint = CMP_HINT_INVALID; + return ret; +} + +static cmp_aux_t +tuple_hint_default(const struct tuple *tuple, struct key_def *key_def) +{ + (void)tuple; + (void)key_def; + cmp_aux_t ret; + ret.hint = CMP_HINT_INVALID; + return ret; +} + +template +static cmp_aux_t +key_hint_uint(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_UNSIGNED); + cmp_aux_t ret; + if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) { + ret.hint = 0; + return ret; + } + assert(mp_typeof(*key) == MP_UINT); + uint64_t val = mp_decode_uint(&key); + ret.hint = unlikely(val > INT64_MAX) ? INT64_MAX : + val - (uint64_t)INT64_MIN;; + return ret; +} + +template +static cmp_aux_t +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_UNSIGNED); + cmp_aux_t ret; + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) { + ret.hint = 0; + return ret; + } + return key_hint_uint(field, key_def); +} + +template +static cmp_aux_t +key_hint_int(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_INTEGER); + cmp_aux_t ret; + if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) { + ret.hint = 0; + return ret; + } + if (mp_typeof(*key) == MP_UINT) { + uint64_t val = mp_decode_uint(&key); + ret.hint = unlikely(val > INT64_MAX) ? INT64_MAX : + val - (uint64_t)INT64_MIN; + } else { + assert(mp_typeof(*key) == MP_INT); + int64_t val = mp_decode_int(&key); + ret.hint = (uint64_t)val - (uint64_t)INT64_MIN; + } + return ret; +} + +template +static cmp_aux_t +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_INTEGER); + cmp_aux_t ret; + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) { + ret.hint = 0; + return ret; + } + return key_hint_int(field, key_def); +} + +template +static cmp_aux_t +key_hint_number(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_NUMBER); + cmp_aux_t ret; + if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) { + ret.hint = 0; + return ret; + } + uint64_t val = 0; + switch (mp_typeof(*key)) { + case MP_FLOAT: + case MP_DOUBLE: { + double f = mp_typeof(*key) == MP_FLOAT ? + mp_decode_float(&key) : mp_decode_double(&key); + if (isnan(f) || isinf(f)) { + ret.hint = CMP_HINT_INVALID; + return ret; + } + double ival; + (void)modf(f, &ival); + if (unlikely(ival >= (double)INT64_MAX)) { + ret.hint = INT64_MAX; + return ret; + } + if (unlikely(ival <= (double)INT64_MIN)) { + ret.hint = 0; + return ret; + } + val = (uint64_t)ival; + break; + } + case MP_INT: { + val = (uint64_t)mp_decode_int(&key); + break; + } + case MP_UINT: { + val = mp_decode_uint(&key); + if (val > INT64_MAX) { + ret.hint = INT64_MAX; + return ret; + } + break; + } + default: + unreachable(); + } + ret.hint = val - (uint64_t)INT64_MIN; + return ret; +} + +template +static cmp_aux_t +tuple_hint_number(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_NUMBER); + cmp_aux_t ret; + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) { + ret.hint = 0; + return ret; + } + return key_hint_number(field, key_def); +} + +template +static cmp_aux_t +key_hint_boolean(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_BOOLEAN); + cmp_aux_t ret; + if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) { + ret.hint = 0; + return ret; + } + bool val = mp_decode_bool(&key); + ret.hint = (uint64_t)val - (uint64_t)INT64_MIN; + return ret; +} + +template +static cmp_aux_t +tuple_hint_boolean(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_BOOLEAN); + const char *field = tuple_field_by_part(tuple, key_def->parts); + cmp_aux_t ret; + if (is_nullable && field == NULL) { + ret.hint = 0; + return ret; + } + return key_hint_boolean(field, key_def); +} + +template +static cmp_aux_t +key_hint_string(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->coll == NULL); + assert(key_def->parts->type == FIELD_TYPE_STRING); + cmp_aux_t ret; + if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) { + ret.hint = 0; + return ret; + } + assert(mp_typeof(*key) == MP_STR); + uint32_t len; + const unsigned char *str = + (const unsigned char *)mp_decode_str(&key, &len); + uint64_t result = 0; + uint32_t process_len = MIN(len, 8); + for (uint32_t i = 0; i < process_len; i++) { + result <<= 8; + result |= str[i]; + } + result <<= 8 * (8 - process_len); + ret.hint = result; + return ret; +} + +template +static cmp_aux_t +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->coll == NULL); + assert(key_def->parts->type == FIELD_TYPE_STRING); + cmp_aux_t ret; + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) { + ret.hint = 0; + return ret; + } + return key_hint_string(field, key_def); +} + +template +static cmp_aux_t +key_hint_string_coll(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_STRING && + key_def->parts->coll != NULL); + cmp_aux_t ret; + if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) { + ret.hint = 0; + return ret; + } + assert(mp_typeof(*key) == MP_STR); + uint32_t len; + const char *str = mp_decode_str(&key, &len); + ret.hint = key_def->parts->coll->hint(str, len, key_def->parts->coll); + return ret; +} + +template +static cmp_aux_t +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_STRING && + key_def->parts->coll != NULL); + const char *field = tuple_field_by_part(tuple, key_def->parts); + cmp_aux_t ret; + if (is_nullable && field == NULL) { + ret.hint = 0; + return ret; + } + return key_hint_string_coll(field, key_def); +} + +void +key_def_set_cmp_aux_func(struct key_def *def) +{ + def->key_cmp_aux = key_hint_default; + def->tuple_cmp_aux = tuple_hint_default; + bool is_nullable = key_part_is_nullable(def->parts); + if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL) { + def->key_cmp_aux = is_nullable ? key_hint_string_coll : + key_hint_string_coll; + def->tuple_cmp_aux = is_nullable ? + tuple_hint_string_coll : + tuple_hint_string_coll; + return; + } + switch (def->parts->type) { + case FIELD_TYPE_UNSIGNED: + def->key_cmp_aux = is_nullable ? key_hint_uint : + key_hint_uint; + def->tuple_cmp_aux = is_nullable ? tuple_hint_uint : + tuple_hint_uint; + break; + case FIELD_TYPE_INTEGER: + def->key_cmp_aux = is_nullable ? key_hint_int : + key_hint_int; + def->tuple_cmp_aux = is_nullable ? tuple_hint_int : + tuple_hint_int; + break; + case FIELD_TYPE_STRING: + def->key_cmp_aux = is_nullable ? key_hint_string : + key_hint_string; + def->tuple_cmp_aux = is_nullable ? tuple_hint_string : + tuple_hint_string; + break; + case FIELD_TYPE_NUMBER: + def->key_cmp_aux = is_nullable ? key_hint_number : + key_hint_number; + def->tuple_cmp_aux = is_nullable ? tuple_hint_number : + tuple_hint_number; + break; + case FIELD_TYPE_BOOLEAN: + def->key_cmp_aux = is_nullable ? key_hint_boolean : + key_hint_boolean; + def->tuple_cmp_aux = is_nullable ? tuple_hint_boolean : + tuple_hint_boolean; + break; + default: + break; + }; } diff --git a/src/box/tuple_compare.h b/src/box/tuple_compare.h index 6538d5fc0..160135af4 100644 --- a/src/box/tuple_compare.h +++ b/src/box/tuple_compare.h @@ -43,6 +43,13 @@ struct key_def; void key_def_set_compare_func(struct key_def *def); +/** + * Initialize cmp_aux calculation functions for the key_def. + * @param key_def key definition + */ +void +key_def_set_cmp_aux_func(struct key_def *def); + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c index afc15e809..9c267d384 100644 --- a/src/lib/coll/coll.c +++ b/src/lib/coll/coll.c @@ -133,6 +133,37 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, return s_len; } +/** Get a compare hint of a binary collation. */ +static uint64_t +coll_bin_hint(const char *s, size_t s_len, struct coll *coll) +{ + (void)coll; + uint64_t result = 0; + uint32_t process_len = MIN(s_len, 8); + for (uint32_t i = 0; i < process_len; i++) { + result <<= 8; + result |= ((unsigned char *)s)[i]; + } + result <<= 8 * (8 - process_len); + return result; +} + +/** Get a compare hint of a string using ICU collation. */ +static uint64_t +coll_icu_hint(const char *s, size_t s_len, struct coll *coll) +{ + assert(coll->type == COLL_TYPE_ICU); + UCharIterator itr; + uiter_setUTF8(&itr, s, s_len); + uint8_t buf[8]; + uint32_t state[2] = {0, 0}; + UErrorCode status = U_ZERO_ERROR; + int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf, + sizeof(buf), &status); + assert(got >= 0 && got <= 8); + return coll_bin_hint((const char *)buf, got, NULL); +} + /** * Set up ICU collator and init cmp and hash members of collation. * @param coll Collation to set up. @@ -242,6 +273,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def) } coll->cmp = coll_icu_cmp; coll->hash = coll_icu_hash; + coll->hint = coll_icu_hint; return 0; } @@ -356,6 +388,7 @@ coll_new(const struct coll_def *def) coll->collator = NULL; coll->cmp = coll_bin_cmp; coll->hash = coll_bin_hash; + coll->hint = coll_bin_hint; break; default: unreachable(); diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h index 8c9f94293..d695a02ad 100644 --- a/src/lib/coll/coll.h +++ b/src/lib/coll/coll.h @@ -48,6 +48,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t, typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, struct coll *coll); +typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll); + struct UCollator; /** @@ -62,6 +64,8 @@ struct coll { /** String comparator. */ coll_cmp_f cmp; coll_hash_f hash; + /** The pointer to routine calculating tuple hint. */ + coll_hint_f hint; /** Reference counter. */ int refs; /** -- 2.21.0