From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Thu, 21 Feb 2019 19:06:07 +0300 From: Vladimir Davydov Subject: Re: [tarantool-patches] Re: [PATCH v2 4/4] box: introduce tuple compare hint for string index Message-ID: <20190221160607.4s4ighjoxu24lamx@esperanza> References: <48f09c5c8f53bfa63a52da90ffe4672ade7ad9ae.1550050245.git.kshcherbatov@tarantool.org> <414fffe7-3d58-fd42-3334-d7072c78afb4@tarantool.org> <3c155f7e-94ac-8ff1-2100-99f8b0c42dfc@tarantool.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <3c155f7e-94ac-8ff1-2100-99f8b0c42dfc@tarantool.org> To: Kirill Shcherbatov Cc: tarantool-patches@freelists.org List-ID: On Tue, Feb 19, 2019 at 06:02:55PM +0300, Kirill Shcherbatov wrote: > diff --git a/src/box/memtx_tree_decl.c b/src/box/memtx_tree_decl.c > index 084a0594f..55016148b 100644 > --- a/src/box/memtx_tree_decl.c > +++ b/src/box/memtx_tree_decl.c > @@ -77,8 +77,133 @@ struct memtx_tuple_tree_key_data { > > /* }}} */ > > +/* {{{ Memtx hinted and hint_only tree class. *******************/ There's no hint_only tree anymore. Please update the comments. > + > +/** > + * Struct that is used as a key in BPS tree definition in > + * memtx_hint_only_tree and memtx_hinted_tree. > +*/ > +struct memtx_hinted_tree_key_data { > + /** Sequence of msgpacked search fields. */ > + const char *key; > + /** Number of msgpacked search fields. */ > + uint32_t part_count; > + /** > + * Compare hint. Is calculated automatically on 'set' Comparison hint. 'Is' isn't necessary - could be simply Calculated ... > + * operation with memtx_hinted_tree_key_data_set(). > + */ > + uint64_t hint; > +}; > + > +/** > + * Struct that is used as a key in BPS tree definition in > + * memtx_hint_only_tree and memtx_hinted_tree. > + */ > +struct memtx_hinted_tree_data { > + /** Tuple this node is representing. */ represents > + struct tuple *tuple; > + /** > + * Compare hint. Is calculated automatically on 'set' > + * operation with memtx_hinted_tree_data_set(). > + */ > + uint64_t hint; > +}; > + > +/** > + * Compare memtx_hinted_tree records. > + */ > +static int > +memtx_hinted_tree_data_cmp(struct memtx_hinted_tree_data *a, > + struct memtx_hinted_tree_data *b, > + struct key_def *key_def) > +{ > + if (a->hint != b->hint) > + return a->hint < b->hint ? -1 : 1; > + return tuple_compare(a->tuple, b->tuple, key_def); > +} > + > +/** > + * Compare memtx_hinted_tree record with key. > + */ > +static int > +memtx_hinted_tree_data_cmp_with_key(struct memtx_hinted_tree_data *a, > + struct memtx_hinted_tree_key_data *key, > + struct key_def *key_def) > +{ > + if (a->hint != key->hint) > + return a->hint < key->hint ? -1 : 1; > + return tuple_compare_with_key(a->tuple, key->key, key->part_count, > + key_def); > +} > + > +/** > + * Initialize memtx_hinted_tree or memtx_hint_only_tree record > + * with tuple and recalculate internal hint field. > + */ > +static void > +memtx_hinted_tree_data_set(struct memtx_hinted_tree_data *data, > + struct tuple *tuple, struct key_def *key_def) > +{ > + data->tuple = tuple; > + data->hint = tuple_hint(tuple, key_def); > +} > + > +/** > + * Initialize memtx_hinted_tree or memtx_hint_only_tree key with > + * key raw and part count and recalculate internal hint field. > + */ > +static void > +memtx_hinted_tree_key_data_set(struct memtx_hinted_tree_key_data *key_data, > + const char *key, uint32_t part_count, > + struct key_def *key_def) All these functions aren't used anywhere except in MEMTX_TREE_ macros, and they're pretty short. Worth inlining them there? > +{ > + key_data->key = key; > + key_data->part_count = part_count; > + key_data->hint = key != NULL && part_count > 0 ? No need to check both part_count and key. Either of them is enough. > + key_hint(key, key_def) : 0; > +} > + > +#define MEMTX_TREE_NAME memtx_hinted_tree > +#define memtx_tree_elem struct memtx_hinted_tree_data > +#define memtx_tree_key struct memtx_hinted_tree_key_data > +#define MEMTX_TREE_ELEM_EQUAL(elem_a_ptr, elem_b_ptr) \ > + ((elem_a_ptr)->tuple == (elem_b_ptr)->tuple) This macro isn't required as far as I recall. > +#define MEMTX_TREE_ELEM_CMP(elem_a_ptr, elem_b_ptr, key_def) \ > + memtx_hinted_tree_data_cmp(elem_a_ptr, elem_b_ptr, key_def) > +#define MEMTX_TREE_ELEM_WITH_KEY_CMP(elem_ptr, key_ptr, key_def) \ > + memtx_hinted_tree_data_cmp_with_key(elem_ptr, key_ptr, key_def) > +#define MEMTX_TREE_ELEM_SET(elem_ptr, tuple, key_def) \ > + memtx_hinted_tree_data_set(elem_ptr, tuple, key_def) > +#define MEMTX_TREE_KEY_SET(key_ptr, key_val, part_count_val, key_def) \ > + memtx_hinted_tree_key_data_set(key_ptr, key_val, part_count_val, \ > + key_def) > +#define MEMTX_TREE_ELEM_GET(elem_ptr) ((elem_ptr)->tuple) > +#define MEMTX_TREE_KEY_GET(key_ptr, part_count_ptr) \ > + ({*part_count_ptr = (key_ptr)->part_count; (key_ptr)->key;}) > + > +#include "memtx_tree_impl.h" > + > +#undef memtx_tree_key > +#undef memtx_tree_elem > +#undef MEMTX_TREE_KEY_GET > +#undef MEMTX_TREE_ELEM_GET > +#undef MEMTX_TREE_KEY_SET > +#undef MEMTX_TREE_ELEM_SET > +#undef MEMTX_TREE_ELEM_WITH_KEY_CMP > +#undef MEMTX_TREE_ELEM_CMP > +#undef MEMTX_TREE_ELEM_EQUAL > +#undef memtx_tree_key > +#undef memtx_tree_elem > +#undef MEMTX_TREE_NAME > + > +/* }}} */ > + > struct index * > memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def) > { > + if (def->cmp_def->parts->type == FIELD_TYPE_STRING || > + def->cmp_def->parts->type == FIELD_TYPE_UNSIGNED || > + def->cmp_def->parts->type == FIELD_TYPE_INTEGER) > + return memtx_hinted_tree_index_new(memtx, def); > return memtx_tuple_tree_index_new(memtx, def); > } > diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc > index ded802c7d..5845235ff 100644 > --- a/src/box/tuple_compare.cc > +++ b/src/box/tuple_compare.cc > @@ -1321,4 +1321,189 @@ tuple_compare_with_key_create(const struct key_def *def) > compare_with_key_slowpath_funcs[cmp_func_idx]; > } I think all the hints should be defined in new files tuple_hint.{h,cc} > > +static uint64_t > +key_hint_default(const char *key, struct key_def *key_def) > +{ > + (void)key; > + (void)key_def; > + return 0; > +} > + > +static uint64_t > +tuple_hint_default(const struct tuple *tuple, struct key_def *key_def) > +{ > + (void)tuple; > + (void)key_def; > + return 0; > +} > + > +template I doubt we need to use templates here, but OK, let them be. > +static uint64_t > +key_hint_uint(const char *key, struct key_def *key_def) > +{ > + (void)key_def; > + assert(key_part_is_nullable(key_def->parts) == is_nullable); > + if (is_nullable && mp_typeof(*key) == MP_NIL) > + return 0; > + assert(mp_typeof(*key) == MP_UINT); > + uint64_t val = mp_decode_uint(&key); > + if (val > INT64_MAX) > + return INT64_MAX; > + return val - (uint64_t)INT64_MIN; > +} > + > +template > +static uint64_t > +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def) > +{ > + assert(key_part_is_nullable(key_def->parts) == is_nullable); > + const char *field = tuple_field_by_part(tuple, key_def->parts); > + if (is_nullable && field == NULL) > + return 0; > + return key_hint_uint(field, key_def); > +} > + > +template > +static uint64_t > +key_hint_int(const char *key, struct key_def *key_def) > +{ > + assert(key_part_is_nullable(key_def->parts) == is_nullable); > + if (is_nullable && mp_typeof(*key) == MP_NIL) > + return 0; > + if (mp_typeof(*key) == MP_UINT) { > + return key_hint_uint(key, key_def); > + } else { > + assert(key_def->parts->type == FIELD_TYPE_INTEGER); This assertion should be in the beginning of this function, no? Please also add assertions checking part->type to other hint functions. > + assert(mp_typeof(*key) == MP_INT); > + int64_t val = mp_decode_int(&key); > + return (uint64_t)val - (uint64_t)INT64_MIN; > + } > +} > + > +template > +static uint64_t > +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def) > +{ > + assert(key_part_is_nullable(key_def->parts) == is_nullable); > + const char *field = tuple_field_by_part(tuple, key_def->parts); > + if (is_nullable && field == NULL) > + return 0; > + return key_hint_int(field, key_def); > +} > + > +template > +static uint64_t > +key_hint_string(const char *key, struct key_def *key_def) > +{ > + (void)key_def; > + assert(key_part_is_nullable(key_def->parts) == is_nullable); > + if (is_nullable && mp_typeof(*key) == MP_NIL) > + return 0; > + assert(key_def->parts->type == FIELD_TYPE_STRING); > + assert(mp_typeof(*key) == MP_STR); > + uint32_t len; > + const unsigned char *str = > + (const unsigned char *)mp_decode_str(&key, &len); > + uint64_t result = 0; > + /* > + * Map the sequence of characters in the hint's bytes in > + * the reverse order: the first character will be mapped > + * in the highest byte of the number, etc. > + * One additional bit encodes the presence of the next > + * sign. > + * 0bCHARBYTEU CHARBYTEU CHARBYTEU ... CHARBYTE0 > + * The sequence constructed in this way provides a > + * comparison transitivity of strings no longer than 7 > + * bytes. Still I don't understand why we need this transformation and why simply using a sequence of bytes constituting the string isn't enough. Could you give an example? > + */ > + uint32_t process_len = MIN(len, 7); > + for (uint32_t i = 0; i < process_len; i++) { > + result <<= 9; > + result |= 0x100; > + result |= str[i]; > + } > + /* > + * If the length of the string is less than the 64-bit > + * hint can accommodate, the insignificant positions are > + * filled with 0. > + */ > + result <<= 9 * (7 - process_len); > + return result; > +} > + > +template > +static uint64_t > +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def) > +{ > + assert(key_part_is_nullable(key_def->parts) == is_nullable); > + const char *field = tuple_field_by_part(tuple, key_def->parts); > + if (is_nullable && field == NULL) > + return 0; > + return key_hint_string(field, key_def); > +} > + > +template > +static uint64_t > +key_hint_string_coll(const char *key, struct key_def *key_def) > +{ > + assert(key_part_is_nullable(key_def->parts) == is_nullable); > + if (is_nullable && mp_typeof(*key) == MP_NIL) > + return 0; > + assert(key_def->parts->type == FIELD_TYPE_STRING); > + assert(mp_typeof(*key) == MP_STR); > + assert(key_def->parts->coll != NULL); > + uint32_t len; > + const char *str = mp_decode_str(&key, &len); > + return key_def->parts->coll->hint(str, len, key_def->parts->coll); > +} > + > +template > +static uint64_t > +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def) > +{ > + assert(key_part_is_nullable(key_def->parts) == is_nullable); > + const char *field = tuple_field_by_part(tuple, key_def->parts); > + if (is_nullable && field == NULL) > + return 0; > + return key_hint_string_coll(field, key_def); > +} > + > +void > +tuple_hint_set(struct key_def *def) Should be called key_def_set_hint_func. Please rename. > +{ > + def->key_hint = key_hint_default; > + def->tuple_hint = tuple_hint_default; > + bool is_nullable = key_part_is_nullable(def->parts); > + if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL && > + def->parts->coll->type != COLL_TYPE_BINARY) { > + def->key_hint = is_nullable ? key_hint_string_coll : > + key_hint_string_coll; > + def->tuple_hint = is_nullable ? tuple_hint_string_coll : > + tuple_hint_string_coll; > + return; > + } > + switch (def->parts->type) { > + case FIELD_TYPE_UNSIGNED: > + def->key_hint = is_nullable ? key_hint_uint : > + key_hint_uint; > + def->tuple_hint = is_nullable ? tuple_hint_uint : > + tuple_hint_uint; > + break; > + case FIELD_TYPE_INTEGER: > + def->key_hint = is_nullable ? key_hint_int : > + key_hint_int; > + def->tuple_hint = is_nullable ? tuple_hint_int : > + tuple_hint_int; > + break; > + case FIELD_TYPE_STRING: > + def->key_hint = is_nullable ? key_hint_string : > + key_hint_string; > + def->tuple_hint = is_nullable ? tuple_hint_string : > + tuple_hint_string; > + break; > + default: > + break; > + }; > +}