From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v6 2/3] memtx: introduce tuple compare hint Date: Wed, 13 Mar 2019 15:15:38 +0300 Message-Id: <004880d7afdf9513bf874d6f9b13982e6294be46.1552478226.git.kshcherbatov@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: Kirill Shcherbatov List-ID: Implement functions for retrieving tuple hints for a particular key_def. Hint is an integer that can be used for tuple comparison optimization: if a hint of one tuple is less than a hint of another then the first tuple is definitely less than the second; only if hints are equal tuple_compare must be called for getting comparison result. Hints are calculated using only the first part of key_def. Hints are only useful when: * they are precalculated and stored along with the tuple; calculation is not cheap (cheaper than tuple_compare btw) but precalculated hints allow to compare tuples without even fetching tuple data. * first part of key_def is 'string'(for now) * since hint is calculated using only the first part of key_def (and only first several characters if it is a string) this part must be different with high probability for every tuple pair. Closes #3961 --- src/box/key_def.h | 95 +++++++ src/box/memtx_tree.c | 46 ++- src/box/tuple_compare.cc | 584 +++++++++++++++++++++++++++++++++++++-- src/lib/coll/coll.c | 33 +++ src/lib/coll/coll.h | 4 + 5 files changed, 730 insertions(+), 32 deletions(-) diff --git a/src/box/key_def.h b/src/box/key_def.h index dd62f6a35..0d8f3112e 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -137,6 +137,19 @@ typedef int (*tuple_compare_with_key_t)(const struct tuple *tuple_a, typedef int (*tuple_compare_t)(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_def *key_def); +/** @copydoc tuple_compare_with_key_hinted() */ +typedef int (*tuple_compare_with_key_hinted_t)(const struct tuple *tuple, + uint64_t tuple_hint, + const char *key, + uint32_t part_count, + uint64_t key_hint, + struct key_def *key_def); +/** @copydoc tuple_compare_hinted() */ +typedef int (*tuple_compare_hinted_t)(const struct tuple *tuple_a, + uint64_t tuple_a_hint, + const struct tuple *tuple_b, + uint64_t tuple_b_hint, + struct key_def *key_def); /** @copydoc tuple_extract_key() */ typedef char *(*tuple_extract_key_t)(const struct tuple *tuple, struct key_def *key_def, @@ -152,6 +165,11 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple, /** @copydoc key_hash() */ typedef uint32_t (*key_hash_t)(const char *key, struct key_def *key_def); +/** @copydoc tuple_hint() */ +typedef uint64_t (*tuple_hint_t)(const struct tuple *tuple, + struct key_def *key_def); +/** @copydoc key_hint() */ +typedef uint64_t (*key_hint_t)(const char *key, struct key_def *key_def); /* Definition of a multipart key. */ struct key_def { @@ -159,6 +177,10 @@ struct key_def { tuple_compare_t tuple_compare; /** @see tuple_compare_with_key() */ tuple_compare_with_key_t tuple_compare_with_key; + /** @see tuple_compare_with_key_hinted() */ + tuple_compare_with_key_hinted_t tuple_compare_with_key_hinted; + /** @see tuple_compare_hinted() */ + tuple_compare_hinted_t tuple_compare_hinted; /** @see tuple_extract_key() */ tuple_extract_key_t tuple_extract_key; /** @see tuple_extract_key_raw() */ @@ -167,6 +189,10 @@ struct key_def { tuple_hash_t tuple_hash; /** @see key_hash() */ key_hash_t key_hash; + /** @see tuple_hint() */ + tuple_hint_t tuple_hint; + /** @see key_hint() */ + key_hint_t key_hint; /** * Minimal part count which always is unique. For example, * if a secondary index is unique, then @@ -571,6 +597,51 @@ tuple_compare_with_key(const struct tuple *tuple, const char *key, return key_def->tuple_compare_with_key(tuple, key, part_count, key_def); } +/** + * Compare tuples using the key definition and comparison hints. + * @param tuple_a First tuple. + * @param tuple_a_hint Comparison hint is calculated for the + * @a tuple_a. + * @param tuple_b Second tuple. + * @param tuple_b_hint Comparison hint is calculated for the + * @a tuple_b. + * @param key_def Key definition. + * @retval 0 if key_fields(tuple_a) == key_fields(tuple_b) + * @retval <0 if key_fields(tuple_a) < key_fields(tuple_b) + * @retval >0 if key_fields(tuple_a) > key_fields(tuple_b) + */ +static inline int +tuple_compare_hinted(const struct tuple *tuple_a, uint64_t tuple_a_hint, + const struct tuple *tuple_b, uint64_t tuple_b_hint, + struct key_def *key_def) +{ + return key_def->tuple_compare_hinted(tuple_a, tuple_a_hint, tuple_b, + tuple_b_hint, key_def); +} + +/** + * Compare tuple with key using the key definition and + * comparison hints. + * @param tuple tuple + * @param tuple_hint Comparison hint is calculated for @a tuple. + * @param key key parts without MessagePack array header + * @param part_count the number of parts in @a key + * @param key_hint t Comparison key kent is calculated for @a key. + * @param key_def key definition + * @retval 0 if key_fields(tuple) == parts(key) + * @retval <0 if key_fields(tuple) < parts(key) + * @retval >0 if key_fields(tuple) > parts(key) + */ +static inline int +tuple_compare_with_key_hinted(const struct tuple *tuple, uint64_t tuple_hint, + const char *key, uint32_t part_count, + uint64_t key_hint, struct key_def *key_def) +{ + return key_def->tuple_compare_with_key_hinted(tuple, tuple_hint, key, + part_count, key_hint, + key_def); +} + /** * Compute hash of a tuple field. * @param ph1 - pointer to running hash @@ -624,6 +695,30 @@ key_hash(const char *key, struct key_def *key_def) return key_def->key_hash(key, key_def); } + /* + * Get a comparison hint for a @a tuple. + * @param tuple - tuple to get uint64_t of. + * @param key_def - key_def that defines which comparison is used. + * @return the comparison auxiliary information. + */ +static inline uint64_t +tuple_hint(const struct tuple *tuple, struct key_def *key_def) +{ + return key_def->tuple_hint(tuple, key_def); +} + +/** + * Get a comparison hint of a @a key. + * @param key - key to get hint of. + * @param key_def - key_def that defines which comparison is used. + * @return the comparison auxiliary information. + */ +static inline uint64_t +key_hint(const char *key, struct key_def *key_def) +{ + return key_def->key_hint(key, key_def); +} + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c index d5b42efb6..2c1933ceb 100644 --- a/src/box/memtx_tree.c +++ b/src/box/memtx_tree.c @@ -47,6 +47,11 @@ struct memtx_tree_key_data { const char *key; /** Number of msgpacked search fields. */ uint32_t part_count; + /** + * Comparison hint is calculated with + * key_hint(key, ...). + */ + uint64_t hint; }; /** @@ -55,6 +60,11 @@ struct memtx_tree_key_data { struct memtx_tree_data { /* Tuple that this node is represents. */ struct tuple *tuple; + /** + * Comparison hint is calculated with + * tuple_hint(tuple, ...). + */ + uint64_t hint; }; /** @@ -69,16 +79,18 @@ static bool memtx_tree_data_identical(const struct memtx_tree_data *a, const struct memtx_tree_data *b) { - return a->tuple == b->tuple; + return a->tuple == b->tuple && a->hint == b->hint; } #define BPS_TREE_NAME memtx_tree #define BPS_TREE_BLOCK_SIZE (512) #define BPS_TREE_EXTENT_SIZE MEMTX_EXTENT_SIZE #define BPS_TREE_COMPARE(a, b, arg)\ - tuple_compare((&a)->tuple, (&b)->tuple, arg) + tuple_compare_hinted((&a)->tuple, (&a)->hint, (&b)->tuple,\ + (&b)->hint, arg) #define BPS_TREE_COMPARE_KEY(a, b, arg)\ - tuple_compare_with_key((&a)->tuple, (b)->key, (b)->part_count, arg) + tuple_compare_with_key_hinted((&a)->tuple, (&a)->hint, (b)->key,\ + (b)->part_count, (b)->hint, arg) #define BPS_TREE_IDENTICAL(a, b) memtx_tree_data_identical(&a, &b) #define bps_tree_elem_t struct memtx_tree_data #define bps_tree_key_t struct memtx_tree_key_data * @@ -113,7 +125,8 @@ memtx_tree_qcompare(const void* a, const void *b, void *c) const struct memtx_tree_data *data_a = a; const struct memtx_tree_data *data_b = b; struct key_def *key_def = c; - return tuple_compare(data_a->tuple, data_b->tuple, key_def); + return tuple_compare_hinted(data_a->tuple, data_a->hint, data_b->tuple, + data_b->hint, key_def); } /* {{{ MemtxTree Iterators ****************************************/ @@ -235,9 +248,11 @@ tree_iterator_next_equal(struct iterator *iterator, struct tuple **ret) memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator); /* Use user key def to save a few loops. */ if (res == NULL || - tuple_compare_with_key(res->tuple, it->key_data.key, - it->key_data.part_count, - it->index_def->key_def) != 0) { + tuple_compare_with_key_hinted(res->tuple, res->hint, + it->key_data.key, + it->key_data.part_count, + it->key_data.hint, + it->index_def->key_def) != 0) { iterator->next = tree_iterator_dummie; it->current.tuple = NULL; *ret = NULL; @@ -266,9 +281,11 @@ tree_iterator_prev_equal(struct iterator *iterator, struct tuple **ret) memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator); /* Use user key def to save a few loops. */ if (res == NULL || - tuple_compare_with_key(res->tuple, it->key_data.key, - it->key_data.part_count, - it->index_def->key_def) != 0) { + tuple_compare_with_key_hinted(res->tuple, res->hint, + it->key_data.key, + it->key_data.part_count, + it->key_data.hint, + it->index_def->key_def) != 0) { iterator->next = tree_iterator_dummie; it->current.tuple = NULL; *ret = NULL; @@ -516,9 +533,11 @@ memtx_tree_index_get(struct index *base, const char *key, assert(base->def->opts.is_unique && part_count == base->def->key_def->part_count); struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *cmp_def = memtx_tree_index_cmp_def(index); struct memtx_tree_key_data key_data; key_data.key = key; key_data.part_count = part_count; + key_data.hint = key_hint(key, cmp_def); struct memtx_tree_data *res = memtx_tree_find(&index->tree, &key_data); *result = res != NULL ? res->tuple : NULL; return 0; @@ -530,9 +549,11 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple, struct tuple **result) { struct memtx_tree_index *index = (struct memtx_tree_index *)base; + struct key_def *key_def = index->tree.arg; if (new_tuple) { struct memtx_tree_data new_data; new_data.tuple = new_tuple; + new_data.hint = tuple_hint(new_tuple, key_def); struct memtx_tree_data dup_data; dup_data.tuple = NULL; @@ -565,6 +586,7 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple, if (old_tuple) { struct memtx_tree_data old_data; old_data.tuple = old_tuple; + old_data.hint = tuple_hint(old_tuple, key_def); memtx_tree_delete(&index->tree, old_data); } *result = old_tuple; @@ -607,6 +629,8 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type, it->type = type; it->key_data.key = key; it->key_data.part_count = part_count; + struct key_def *cmp_def = memtx_tree_index_cmp_def(index); + it->key_data.hint = key_hint(key, cmp_def); it->index_def = base->def; it->tree = &index->tree; it->tree_iterator = memtx_tree_invalid_iterator(); @@ -671,6 +695,8 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple) struct memtx_tree_data *elem = &index->build_array[index->build_array_size++]; elem->tuple = tuple; + struct key_def *key_def = index->tree.arg; + elem->hint = tuple_hint(tuple, key_def); return 0; } diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc index 1bcff2ca3..86922c9bb 100644 --- a/src/box/tuple_compare.cc +++ b/src/box/tuple_compare.cc @@ -34,6 +34,383 @@ #include "trivia/util.h" /* NOINLINE */ #include +/** + * Tuple hints + * + * Hint is a value h(t) is calculated for tuple in terms + * of particular key_def that has the following rules: + * if h(t1) < h(t2) then t1 < t2; + * if h(t1) > h(t2) then t1 > t2; + * if t1 == t2 then regular comparision is required; + * These rules mean that instead of direct tuple vs tuple + * (or tuple vs key) comparison one may compare theirs + * hints first; and only if theirs hints equal compare + * the tuples themselves. + * + * The comparison hint has the following structure: + * uint64_t: [ CMP_HINT_TYPE | CMP_HINT_VALUE ] + * 64 62 0 bit + * + * The reserved value HINT_INVALID is returned when hint is + * undefined. + */ +#define HINT_MAX (((int64_t)1 << 61) - 1) +#define HINT_MIN (-HINT_MAX - 1) +#define HINT_UMAX (((uint64_t)1 << 62) - 1) + +#define HINT_INVALID UINT64_MAX + +#define HINT_TYPE_NUMBER ((uint8_t)1) +#define HINT_TYPE_STRING ((uint8_t)2) +#define HINT_TYPE_BOOL ((uint8_t)3) + +#define HINT_TYPE(hint) ((uint64_t)hint >> 62) +#define HINT_VALUE(hint) ((uint64_t)hint & (((uint64_t)1 << 62) - 1)) +#define HINT(type, value) ({ \ + assert(HINT_TYPE(value) == 0); \ + (((uint64_t)type << 62) | (uint64_t)value);}) + +/** + * Perform hints comparison. + * Returns 1 when hint_a > hint_b, -1 when hint_b > hint_a; + * If the hints are incompatible or equal, a value of 0 is + * returned, and the further comparison of the original data is + * required. + */ +static int +hint_cmp(uint64_t hint_a, uint64_t hint_b) +{ + if (hint_a != HINT_INVALID && hint_b != HINT_INVALID && + HINT_TYPE(hint_a) == HINT_TYPE(hint_b) && + HINT_VALUE(hint_a) != HINT_VALUE(hint_b)) + return hint_a < hint_b ? -1 : 1; + else + return 0; +} + +template +static uint64_t +key_hint_uint(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_UNSIGNED); + uint64_t ret; + if ((is_optional && key == NULL) || + (is_nullable && mp_typeof(*key) == MP_NIL)) + return HINT(HINT_TYPE_NUMBER, 0); + assert(mp_typeof(*key) == MP_UINT); + uint64_t val = mp_decode_uint(&key); + ret = val > HINT_MAX ? HINT_UMAX : val - (uint64_t)HINT_MIN; + return HINT(HINT_TYPE_NUMBER, ret); +} + +template +static uint64_t +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_UNSIGNED); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return HINT(HINT_TYPE_NUMBER, 0); + return key_hint_uint(field, key_def); +} + +template +static uint64_t +key_hint_int(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_INTEGER); + uint64_t ret; + if ((is_optional && key == NULL) || + (is_nullable && mp_typeof(*key) == MP_NIL)) + return HINT(HINT_TYPE_NUMBER, 0); + if (mp_typeof(*key) == MP_UINT) { + uint64_t val = mp_decode_uint(&key); + ret = val > HINT_MAX ? HINT_UMAX : val - (uint64_t)HINT_MIN; + } else { + assert(mp_typeof(*key) == MP_INT); + int64_t val = mp_decode_int(&key); + ret = val > HINT_MAX ? HINT_UMAX : + val < HINT_MIN ? 0 : + (uint64_t)val - (uint64_t)HINT_MIN; + } + return HINT(HINT_TYPE_NUMBER, ret); +} + +template +static uint64_t +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_INTEGER); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return HINT(HINT_TYPE_NUMBER, 0); + return key_hint_int(field, key_def); +} + +template +static uint64_t +key_hint_number(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_NUMBER || + key_def->parts->type == FIELD_TYPE_SCALAR); + if ((is_optional && key == NULL) || + (is_nullable && mp_typeof(*key) == MP_NIL)) + return HINT(HINT_TYPE_NUMBER, 0); + uint64_t ret; + switch (mp_typeof(*key)) { + case MP_FLOAT: + case MP_DOUBLE: { + double f = mp_typeof(*key) == MP_FLOAT ? + mp_decode_float(&key) : mp_decode_double(&key); + if (!isfinite(f)) + return HINT_INVALID; + double val; + (void)modf(f, &val); + ret = val > (double)HINT_MAX ? HINT_UMAX : + val < (double)HINT_MIN ? 0 : + (uint64_t)val - (uint64_t)HINT_MIN; + break; + } + case MP_INT: { + int64_t val = (uint64_t)mp_decode_int(&key); + ret = val > HINT_MAX ? HINT_UMAX : + val < HINT_MIN ? 0 : + (uint64_t)val - (uint64_t)HINT_MIN; + break; + } + case MP_UINT: { + uint64_t val = mp_decode_uint(&key); + ret = val > HINT_MAX ? HINT_UMAX : val - (uint64_t)HINT_MIN; + break; + } + default: + unreachable(); + } + return HINT(HINT_TYPE_NUMBER, ret); +} + +template +static uint64_t +tuple_hint_number(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_NUMBER); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return HINT(HINT_TYPE_NUMBER, 0); + return key_hint_number(field, key_def); +} + +template +static uint64_t +key_hint_boolean(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_BOOLEAN || + key_def->parts->type == FIELD_TYPE_SCALAR); + if ((is_optional && key == NULL) || + (is_nullable && mp_typeof(*key) == MP_NIL)) + return HINT(HINT_TYPE_BOOL, 0); + return (uint64_t)mp_decode_bool(&key); +} + +template +static uint64_t +tuple_hint_boolean(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_BOOLEAN); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return HINT(HINT_TYPE_BOOL, 0); + return key_hint_boolean(field, key_def); +} + +template +static uint64_t +key_hint_string(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->coll == NULL); + assert(key_def->parts->type == FIELD_TYPE_STRING || + key_def->parts->type == FIELD_TYPE_SCALAR); + if ((is_optional && key == NULL) || + (is_nullable && mp_typeof(*key) == MP_NIL)) + return HINT(HINT_TYPE_STRING, 0); + assert(mp_typeof(*key) == MP_STR); + uint32_t len; + const unsigned char *str = + (const unsigned char *)mp_decode_str(&key, &len); + uint64_t result = 0; + uint32_t process_len = MIN(len, 7); + for (uint32_t i = 0; i < process_len; i++) { + result <<= 8; + result |= str[i]; + } + result <<= 8 * (7 - process_len); + return HINT(HINT_TYPE_STRING, result); +} + +template +static uint64_t +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->coll == NULL); + assert(key_def->parts->type == FIELD_TYPE_STRING); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return HINT(HINT_TYPE_STRING, 0); + return key_hint_string(field, key_def); +} + +template +static uint64_t +key_hint_string_coll(const char *key, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert((key_def->parts->type == FIELD_TYPE_STRING || + key_def->parts->type == FIELD_TYPE_SCALAR) && + key_def->parts->coll != NULL); + if ((is_optional && key == NULL) || + (is_nullable && mp_typeof(*key) == MP_NIL)) + return HINT(HINT_TYPE_STRING, 0); + assert(mp_typeof(*key) == MP_STR); + uint32_t len; + const char *str = mp_decode_str(&key, &len); + return key_def->parts->coll->hint(str, len, key_def->parts->coll); +} + +template +static uint64_t +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_STRING && + key_def->parts->coll != NULL); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return HINT(HINT_TYPE_STRING, 0); + return key_hint_string_coll(field, key_def); +} + +template +static uint64_t +key_hint_scalar(const char *key, struct key_def *key_def) +{ + (void)key_def; + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_SCALAR); + if ((is_optional && key == NULL) || + (is_nullable && mp_typeof(*key) == MP_NIL)) + return HINT(HINT_TYPE_BOOL, 0); + switch (mp_typeof(*key)) { + case MP_INT: + case MP_UINT: + case MP_FLOAT: + case MP_DOUBLE: + return key_hint_number(key, key_def); + case MP_BOOL: + return key_hint_boolean(key, key_def); + case MP_STR: + if (key_def->parts->coll == NULL) + return key_hint_string(key, key_def); + else + return key_hint_string_coll(key, key_def); + default: + return HINT_INVALID; + } +} + +template +static uint64_t +tuple_hint_scalar(const struct tuple *tuple, struct key_def *key_def) +{ + assert(key_part_is_nullable(key_def->parts) == is_nullable); + assert(key_def->parts->type == FIELD_TYPE_SCALAR); + const char *field = tuple_field_by_part(tuple, key_def->parts); + if (is_nullable && field == NULL) + return HINT(HINT_TYPE_BOOL, 0); + return key_hint_scalar(field, key_def); +} + +void +key_def_set_hint_func(struct key_def *def) +{ + def->key_hint = NULL; + def->tuple_hint = NULL; + bool is_nullable = key_part_is_nullable(def->parts); + switch (def->parts->type) { + case FIELD_TYPE_UNSIGNED: { + def->key_hint = is_nullable ? key_hint_uint : + key_hint_uint; + def->tuple_hint = is_nullable ? tuple_hint_uint : + tuple_hint_uint; + break; + } + case FIELD_TYPE_INTEGER: { + def->key_hint = is_nullable ? key_hint_int : + key_hint_int; + def->tuple_hint = is_nullable ? tuple_hint_int : + tuple_hint_int; + break; + } + case FIELD_TYPE_STRING: { + if (def->parts->coll != NULL) { + def->key_hint = + is_nullable ? key_hint_string_coll : + key_hint_string_coll; + def->tuple_hint = + is_nullable ? tuple_hint_string_coll : + tuple_hint_string_coll; + } else { + def->key_hint = + is_nullable ? key_hint_string : + key_hint_string; + def->tuple_hint = is_nullable ? tuple_hint_string : + tuple_hint_string; + } + break; + } + case FIELD_TYPE_NUMBER: { + def->key_hint = is_nullable ? key_hint_number : + key_hint_number; + def->tuple_hint = is_nullable ? tuple_hint_number : + tuple_hint_number; + break; + } + case FIELD_TYPE_BOOLEAN: { + def->key_hint = is_nullable ? key_hint_boolean : + key_hint_boolean; + def->tuple_hint = is_nullable ? tuple_hint_boolean : + tuple_hint_boolean; + break; + } + case FIELD_TYPE_SCALAR: { + def->key_hint = is_nullable ? key_hint_scalar : + key_hint_scalar; + def->tuple_hint = is_nullable ? tuple_hint_scalar : + tuple_hint_scalar; + break; + } + default: { + break; + } + } +} + /* {{{ tuple_compare */ /* @@ -460,13 +837,17 @@ tuple_common_key_parts(const struct tuple *tuple_a, const struct tuple *tuple_b, template static inline int -tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, - struct key_def *key_def) +tuple_compare_slowpath_hinted(const struct tuple *tuple_a, + uint64_t tuple_a_hint, const struct tuple *tuple_b, + uint64_t tuple_b_hint, struct key_def *key_def) { assert(has_json_paths == key_def->has_json_paths); assert(!has_optional_parts || is_nullable); assert(is_nullable == key_def->is_nullable); assert(has_optional_parts == key_def->has_optional_parts); + int rc = 0; + if ((rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0) + return rc; struct key_part *part = key_def->parts; const char *tuple_a_raw = tuple_data(tuple_a); const char *tuple_b_raw = tuple_data(tuple_b); @@ -502,7 +883,6 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, struct key_part *end; const char *field_a, *field_b; enum mp_type a_type, b_type; - int rc; if (is_nullable) end = part + key_def->unique_part_count; else @@ -592,8 +972,19 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, template static inline int -tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, - uint32_t part_count, struct key_def *key_def) +tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b, + struct key_def *key_def) +{ + return tuple_compare_slowpath_hinted(tuple_a, HINT_INVALID, tuple_b, + HINT_INVALID, key_def); +} + +template +static inline int +tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple, + uint64_t tuple_hint, const char *key, uint32_t part_count, + uint64_t key_hint, struct key_def *key_def) { assert(has_json_paths == key_def->has_json_paths); assert(!has_optional_parts || is_nullable); @@ -601,6 +992,9 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, assert(has_optional_parts == key_def->has_optional_parts); assert(key != NULL || part_count == 0); assert(part_count <= key_def->part_count); + int rc = 0; + if ((rc = hint_cmp(tuple_hint, key_hint)) != 0) + return rc; struct key_part *part = key_def->parts; struct tuple_format *format = tuple_format(tuple); const char *tuple_raw = tuple_data(tuple); @@ -636,7 +1030,6 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, } struct key_part *end = part + part_count; - int rc; for (; part < end; ++part, mp_next(&key)) { const char *field; if (has_json_paths) { @@ -675,6 +1068,17 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, return 0; } +template +static inline int +tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key, + uint32_t part_count, struct key_def *key_def) +{ + return tuple_compare_with_key_slowpath_hinted(tuple, + HINT_INVALID, key, part_count, + HINT_INVALID, key_def); +} + template static inline int key_compare_parts(const char *key_a, const char *key_b, uint32_t part_count, @@ -733,13 +1137,17 @@ key_compare_parts(const char *key_a, const char *key_b, uint32_t part_count, template static inline int -tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key, - uint32_t part_count, struct key_def *key_def) +tuple_compare_with_key_sequential_hinted(const struct tuple *tuple, + uint64_t tuple_hint, const char *key, uint32_t part_count, + uint64_t key_hint, struct key_def *key_def) { assert(!has_optional_parts || is_nullable); assert(key_def_is_sequential(key_def)); assert(is_nullable == key_def->is_nullable); assert(has_optional_parts == key_def->has_optional_parts); + int rc = hint_cmp(tuple_hint, key_hint); + if (rc != 0) + return rc; const char *tuple_key = tuple_data(tuple); uint32_t field_count = mp_decode_array(&tuple_key); uint32_t cmp_part_count; @@ -749,8 +1157,8 @@ tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key, assert(field_count >= part_count); cmp_part_count = part_count; } - int rc = key_compare_parts(tuple_key, key, cmp_part_count, - key_def); + rc = key_compare_parts(tuple_key, key, cmp_part_count, + key_def); if (!has_optional_parts || rc != 0) return rc; /* @@ -772,6 +1180,16 @@ tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key, return 0; } +template +static inline int +tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key, + uint32_t part_count, struct key_def *key_def) +{ + return tuple_compare_with_key_sequential_hinted(tuple, HINT_INVALID, key, + part_count, HINT_INVALID, key_def); +} + int key_compare(const char *key_a, const char *key_b, struct key_def *key_def) { @@ -792,9 +1210,13 @@ key_compare(const char *key_a, const char *key_b, struct key_def *key_def) template static int -tuple_compare_sequential(const struct tuple *tuple_a, - const struct tuple *tuple_b, key_def *key_def) +tuple_compare_sequential_hinted(const struct tuple *tuple_a, + uint64_t tuple_a_hint, const struct tuple *tuple_b, + uint64_t tuple_b_hint, key_def *key_def) { + int rc = hint_cmp(tuple_a_hint, tuple_b_hint); + if (rc != 0) + return rc; assert(!has_optional_parts || is_nullable); assert(has_optional_parts == key_def->has_optional_parts); assert(key_def_is_sequential(key_def)); @@ -812,7 +1234,6 @@ tuple_compare_sequential(const struct tuple *tuple_a, bool was_null_met = false; struct key_part *part = key_def->parts; struct key_part *end = part + key_def->unique_part_count; - int rc; uint32_t i = 0; for (; part < end; ++part, ++i) { enum mp_type a_type, b_type; @@ -859,6 +1280,16 @@ tuple_compare_sequential(const struct tuple *tuple_a, return 0; } +template +static int +tuple_compare_sequential(const struct tuple *tuple_a, + const struct tuple *tuple_b, key_def *key_def) +{ + return tuple_compare_sequential_hinted(tuple_a, HINT_INVALID, tuple_b, + HINT_INVALID, key_def); +} + template static inline int field_compare(const char **field_a, const char **field_b); @@ -989,6 +1420,18 @@ struct TupleCompare compare(tuple_a, tuple_b, format_a, format_b, field_a, field_b); } + + static int compare_hinted(const struct tuple *tuple_a, + uint64_t tuple_a_hint, + const struct tuple *tuple_b, + uint64_t tuple_b_hint, + struct key_def *key_def) + { + int rc = hint_cmp(tuple_a_hint, tuple_b_hint); + if (rc != 0) + return rc; + return compare(tuple_a, tuple_b, key_def); + } }; template @@ -1006,15 +1449,30 @@ struct TupleCompare<0, TYPE, MORE_TYPES...> { return FieldCompare<0, TYPE, MORE_TYPES...>::compare(tuple_a, tuple_b, format_a, format_b, field_a, field_b); } + + static int compare_hinted(const struct tuple *tuple_a, + uint64_t tuple_a_hint, + const struct tuple *tuple_b, + uint64_t tuple_b_hint, + struct key_def *key_def) + { + int rc = hint_cmp(tuple_a_hint, tuple_b_hint); + if (rc != 0) + return rc; + return compare(tuple_a, tuple_b, key_def); + } }; } /* end of anonymous namespace */ struct comparator_signature { tuple_compare_t f; + tuple_compare_hinted_t f_hinted; uint32_t p[64]; }; #define COMPARATOR(...) \ - { TupleCompare<__VA_ARGS__>::compare, { __VA_ARGS__, UINT32_MAX } }, + { TupleCompare<__VA_ARGS__>::compare, \ + TupleCompare<__VA_ARGS__>::compare_hinted, \ + { __VA_ARGS__, UINT32_MAX } }, /** * field1 no, field1 type, field2 no, field2 type, ... @@ -1049,6 +1507,17 @@ static const tuple_compare_t compare_slowpath_funcs[] = { tuple_compare_slowpath }; +static const tuple_compare_hinted_t compare_slowpath_hinted_funcs[] = { + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted, + tuple_compare_slowpath_hinted +}; + void key_def_set_tuple_compare(struct key_def *def) { @@ -1060,13 +1529,21 @@ key_def_set_tuple_compare(struct key_def *def) if (def->has_optional_parts) { def->tuple_compare = tuple_compare_sequential; + def->tuple_compare_hinted = + tuple_compare_sequential_hinted; } else { def->tuple_compare = tuple_compare_sequential; + def->tuple_compare_hinted = + tuple_compare_sequential_hinted; } } else { def->tuple_compare = compare_slowpath_funcs[cmp_func_idx]; + def->tuple_compare_hinted = + compare_slowpath_hinted_funcs[cmp_func_idx]; } return; } @@ -1085,13 +1562,20 @@ key_def_set_tuple_compare(struct key_def *def) if (i == def->part_count && cmp_arr[k].p[i * 2] == UINT32_MAX) { def->tuple_compare = cmp_arr[k].f; + def->tuple_compare_hinted = cmp_arr[k].f_hinted; return; } } } - def->tuple_compare = key_def_is_sequential(def) ? - tuple_compare_sequential : - compare_slowpath_funcs[cmp_func_idx]; + if (key_def_is_sequential(def)) { + def->tuple_compare = tuple_compare_sequential; + def->tuple_compare_hinted = + tuple_compare_sequential_hinted; + } else { + def->tuple_compare = compare_slowpath_funcs[cmp_func_idx]; + def->tuple_compare_hinted = + compare_slowpath_hinted_funcs[cmp_func_idx]; + } } /* }}} tuple_compare */ @@ -1222,6 +1706,17 @@ struct TupleCompareWithKey compare(tuple, key, part_count, key_def, format, field); } + + static int + compare_hinted(const struct tuple *tuple, uint64_t tuple_hint, + const char *key, uint32_t part_count, uint64_t key_hint, + struct key_def *key_def) + { + int rc = hint_cmp(tuple_hint, key_hint); + if (rc != 0) + return rc; + return compare(tuple, key, part_count, key_def); + } }; template @@ -1242,6 +1737,17 @@ struct TupleCompareWithKey<0, 0, TYPE, MORE_TYPES...> compare(tuple, key, part_count, key_def, format, field); } + + static int + compare_hinted(const struct tuple *tuple, uint64_t tuple_hint, + const char *key, uint32_t part_count, uint64_t key_hint, + struct key_def *key_def) + { + int rc = hint_cmp(tuple_hint, key_hint); + if (rc != 0) + return rc; + return compare(tuple, key, part_count, key_def); + } }; } /* end of anonymous namespace */ @@ -1249,11 +1755,14 @@ struct TupleCompareWithKey<0, 0, TYPE, MORE_TYPES...> struct comparator_with_key_signature { tuple_compare_with_key_t f; + tuple_compare_with_key_hinted_t f_hinted; uint32_t p[64]; }; #define KEY_COMPARATOR(...) \ - { TupleCompareWithKey<0, __VA_ARGS__>::compare, { __VA_ARGS__ } }, + { TupleCompareWithKey<0, __VA_ARGS__>::compare, \ + TupleCompareWithKey<0, __VA_ARGS__>::compare_hinted, \ + { __VA_ARGS__ } }, static const comparator_with_key_signature cmp_wk_arr[] = { KEY_COMPARATOR(0, FIELD_TYPE_UNSIGNED, 1, FIELD_TYPE_UNSIGNED, 2, FIELD_TYPE_UNSIGNED) @@ -1284,6 +1793,18 @@ static const tuple_compare_with_key_t compare_with_key_slowpath_funcs[] = { tuple_compare_with_key_slowpath }; +static const tuple_compare_with_key_hinted_t + compare_with_key_slowpath_hinted_funcs[] = { + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted, + tuple_compare_with_key_slowpath_hinted +}; + void key_def_set_tuple_compare_with_key(struct key_def *def) { @@ -1296,14 +1817,23 @@ key_def_set_tuple_compare_with_key(struct key_def *def) def->tuple_compare_with_key = tuple_compare_with_key_sequential; + def->tuple_compare_with_key_hinted = + tuple_compare_with_key_sequential_hinted< + true, true>; } else { def->tuple_compare_with_key = tuple_compare_with_key_sequential; + def->tuple_compare_with_key_hinted = + tuple_compare_with_key_sequential_hinted< + true, false>; } } else { def->tuple_compare_with_key = compare_with_key_slowpath_funcs[cmp_func_idx]; + def->tuple_compare_with_key_hinted = + compare_with_key_slowpath_hinted_funcs[ + cmp_func_idx]; } return; } @@ -1325,14 +1855,23 @@ key_def_set_tuple_compare_with_key(struct key_def *def) } if (i == def->part_count) { def->tuple_compare_with_key = cmp_wk_arr[k].f; + def->tuple_compare_with_key_hinted = + cmp_wk_arr[k].f_hinted; return; } } } - def->tuple_compare_with_key = - key_def_is_sequential(def) ? - tuple_compare_with_key_sequential : - compare_with_key_slowpath_funcs[cmp_func_idx]; + if (key_def_is_sequential(def)) { + def->tuple_compare_with_key = + tuple_compare_with_key_sequential; + def->tuple_compare_with_key_hinted = + tuple_compare_with_key_sequential_hinted; + } else { + def->tuple_compare_with_key = + compare_with_key_slowpath_funcs[cmp_func_idx]; + def->tuple_compare_with_key_hinted = + compare_with_key_slowpath_hinted_funcs[cmp_func_idx]; + } } /* }}} tuple_compare_with_key */ @@ -1342,4 +1881,5 @@ key_def_set_compare_func(struct key_def *def) { key_def_set_tuple_compare(def); key_def_set_tuple_compare_with_key(def); + key_def_set_hint_func(def); } diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c index afc15e809..3bd7c44f5 100644 --- a/src/lib/coll/coll.c +++ b/src/lib/coll/coll.c @@ -133,6 +133,37 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, return s_len; } +/** Get a compare hint of a binary collation. */ +static uint64_t +coll_bin_hint(const char *s, size_t s_len, struct coll *coll) +{ + (void)coll; + uint64_t result = 0; + uint32_t process_len = MIN(s_len, 7); + for (uint32_t i = 0; i < process_len; i++) { + result <<= 8; + result |= ((unsigned char *)s)[i]; + } + result <<= 8 * (7 - process_len); + return result; +} + +/** Get a compare hint of a string using ICU collation. */ +static uint64_t +coll_icu_hint(const char *s, size_t s_len, struct coll *coll) +{ + assert(coll->type == COLL_TYPE_ICU); + UCharIterator itr; + uiter_setUTF8(&itr, s, s_len); + uint8_t buf[7]; + uint32_t state[2] = {0, 0}; + UErrorCode status = U_ZERO_ERROR; + int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf, + sizeof(buf), &status); + assert(got >= 0 && got <= 7); + return coll_bin_hint((const char *)buf, got, NULL); +} + /** * Set up ICU collator and init cmp and hash members of collation. * @param coll Collation to set up. @@ -242,6 +273,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def) } coll->cmp = coll_icu_cmp; coll->hash = coll_icu_hash; + coll->hint = coll_icu_hint; return 0; } @@ -356,6 +388,7 @@ coll_new(const struct coll_def *def) coll->collator = NULL; coll->cmp = coll_bin_cmp; coll->hash = coll_bin_hash; + coll->hint = coll_bin_hint; break; default: unreachable(); diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h index 8c9f94293..d695a02ad 100644 --- a/src/lib/coll/coll.h +++ b/src/lib/coll/coll.h @@ -48,6 +48,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t, typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, struct coll *coll); +typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll); + struct UCollator; /** @@ -62,6 +64,8 @@ struct coll { /** String comparator. */ coll_cmp_f cmp; coll_hash_f hash; + /** The pointer to routine calculating tuple hint. */ + coll_hint_f hint; /** Reference counter. */ int refs; /** -- 2.21.0