[tarantool-patches] Re: [PATCH v2 4/4] box: introduce tuple compare hint for string index
Vladimir Davydov
vdavydov.dev at gmail.com
Thu Feb 21 19:06:07 MSK 2019
On Tue, Feb 19, 2019 at 06:02:55PM +0300, Kirill Shcherbatov wrote:
> diff --git a/src/box/memtx_tree_decl.c b/src/box/memtx_tree_decl.c
> index 084a0594f..55016148b 100644
> --- a/src/box/memtx_tree_decl.c
> +++ b/src/box/memtx_tree_decl.c
> @@ -77,8 +77,133 @@ struct memtx_tuple_tree_key_data {
>
> /* }}} */
>
> +/* {{{ Memtx hinted and hint_only tree class. *******************/
There's no hint_only tree anymore. Please update the comments.
> +
> +/**
> + * Struct that is used as a key in BPS tree definition in
> + * memtx_hint_only_tree and memtx_hinted_tree.
> +*/
> +struct memtx_hinted_tree_key_data {
> + /** Sequence of msgpacked search fields. */
> + const char *key;
> + /** Number of msgpacked search fields. */
> + uint32_t part_count;
> + /**
> + * Compare hint. Is calculated automatically on 'set'
Comparison hint. 'Is' isn't necessary - could be simply Calculated ...
> + * operation with memtx_hinted_tree_key_data_set().
> + */
> + uint64_t hint;
> +};
> +
> +/**
> + * Struct that is used as a key in BPS tree definition in
> + * memtx_hint_only_tree and memtx_hinted_tree.
> + */
> +struct memtx_hinted_tree_data {
> + /** Tuple this node is representing. */
represents
> + struct tuple *tuple;
> + /**
> + * Compare hint. Is calculated automatically on 'set'
> + * operation with memtx_hinted_tree_data_set().
> + */
> + uint64_t hint;
> +};
> +
> +/**
> + * Compare memtx_hinted_tree records.
> + */
> +static int
> +memtx_hinted_tree_data_cmp(struct memtx_hinted_tree_data *a,
> + struct memtx_hinted_tree_data *b,
> + struct key_def *key_def)
> +{
> + if (a->hint != b->hint)
> + return a->hint < b->hint ? -1 : 1;
> + return tuple_compare(a->tuple, b->tuple, key_def);
> +}
> +
> +/**
> + * Compare memtx_hinted_tree record with key.
> + */
> +static int
> +memtx_hinted_tree_data_cmp_with_key(struct memtx_hinted_tree_data *a,
> + struct memtx_hinted_tree_key_data *key,
> + struct key_def *key_def)
> +{
> + if (a->hint != key->hint)
> + return a->hint < key->hint ? -1 : 1;
> + return tuple_compare_with_key(a->tuple, key->key, key->part_count,
> + key_def);
> +}
> +
> +/**
> + * Initialize memtx_hinted_tree or memtx_hint_only_tree record
> + * with tuple and recalculate internal hint field.
> + */
> +static void
> +memtx_hinted_tree_data_set(struct memtx_hinted_tree_data *data,
> + struct tuple *tuple, struct key_def *key_def)
> +{
> + data->tuple = tuple;
> + data->hint = tuple_hint(tuple, key_def);
> +}
> +
> +/**
> + * Initialize memtx_hinted_tree or memtx_hint_only_tree key with
> + * key raw and part count and recalculate internal hint field.
> + */
> +static void
> +memtx_hinted_tree_key_data_set(struct memtx_hinted_tree_key_data *key_data,
> + const char *key, uint32_t part_count,
> + struct key_def *key_def)
All these functions aren't used anywhere except in MEMTX_TREE_ macros,
and they're pretty short. Worth inlining them there?
> +{
> + key_data->key = key;
> + key_data->part_count = part_count;
> + key_data->hint = key != NULL && part_count > 0 ?
No need to check both part_count and key. Either of them is enough.
> + key_hint(key, key_def) : 0;
> +}
> +
> +#define MEMTX_TREE_NAME memtx_hinted_tree
> +#define memtx_tree_elem struct memtx_hinted_tree_data
> +#define memtx_tree_key struct memtx_hinted_tree_key_data
> +#define MEMTX_TREE_ELEM_EQUAL(elem_a_ptr, elem_b_ptr) \
> + ((elem_a_ptr)->tuple == (elem_b_ptr)->tuple)
This macro isn't required as far as I recall.
> +#define MEMTX_TREE_ELEM_CMP(elem_a_ptr, elem_b_ptr, key_def) \
> + memtx_hinted_tree_data_cmp(elem_a_ptr, elem_b_ptr, key_def)
> +#define MEMTX_TREE_ELEM_WITH_KEY_CMP(elem_ptr, key_ptr, key_def) \
> + memtx_hinted_tree_data_cmp_with_key(elem_ptr, key_ptr, key_def)
> +#define MEMTX_TREE_ELEM_SET(elem_ptr, tuple, key_def) \
> + memtx_hinted_tree_data_set(elem_ptr, tuple, key_def)
> +#define MEMTX_TREE_KEY_SET(key_ptr, key_val, part_count_val, key_def) \
> + memtx_hinted_tree_key_data_set(key_ptr, key_val, part_count_val, \
> + key_def)
> +#define MEMTX_TREE_ELEM_GET(elem_ptr) ((elem_ptr)->tuple)
> +#define MEMTX_TREE_KEY_GET(key_ptr, part_count_ptr) \
> + ({*part_count_ptr = (key_ptr)->part_count; (key_ptr)->key;})
> +
> +#include "memtx_tree_impl.h"
> +
> +#undef memtx_tree_key
> +#undef memtx_tree_elem
> +#undef MEMTX_TREE_KEY_GET
> +#undef MEMTX_TREE_ELEM_GET
> +#undef MEMTX_TREE_KEY_SET
> +#undef MEMTX_TREE_ELEM_SET
> +#undef MEMTX_TREE_ELEM_WITH_KEY_CMP
> +#undef MEMTX_TREE_ELEM_CMP
> +#undef MEMTX_TREE_ELEM_EQUAL
> +#undef memtx_tree_key
> +#undef memtx_tree_elem
> +#undef MEMTX_TREE_NAME
> +
> +/* }}} */
> +
> struct index *
> memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
> {
> + if (def->cmp_def->parts->type == FIELD_TYPE_STRING ||
> + def->cmp_def->parts->type == FIELD_TYPE_UNSIGNED ||
> + def->cmp_def->parts->type == FIELD_TYPE_INTEGER)
> + return memtx_hinted_tree_index_new(memtx, def);
> return memtx_tuple_tree_index_new(memtx, def);
> }
> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
> index ded802c7d..5845235ff 100644
> --- a/src/box/tuple_compare.cc
> +++ b/src/box/tuple_compare.cc
> @@ -1321,4 +1321,189 @@ tuple_compare_with_key_create(const struct key_def *def)
> compare_with_key_slowpath_funcs[cmp_func_idx];
> }
I think all the hints should be defined in new files tuple_hint.{h,cc}
>
> +static uint64_t
> +key_hint_default(const char *key, struct key_def *key_def)
> +{
> + (void)key;
> + (void)key_def;
> + return 0;
> +}
> +
> +static uint64_t
> +tuple_hint_default(const struct tuple *tuple, struct key_def *key_def)
> +{
> + (void)tuple;
> + (void)key_def;
> + return 0;
> +}
> +
> +template<bool is_nullable>
I doubt we need to use templates here, but OK, let them be.
> +static uint64_t
> +key_hint_uint(const char *key, struct key_def *key_def)
> +{
> + (void)key_def;
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + if (is_nullable && mp_typeof(*key) == MP_NIL)
> + return 0;
> + assert(mp_typeof(*key) == MP_UINT);
> + uint64_t val = mp_decode_uint(&key);
> + if (val > INT64_MAX)
> + return INT64_MAX;
> + return val - (uint64_t)INT64_MIN;
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + const char *field = tuple_field_by_part(tuple, key_def->parts);
> + if (is_nullable && field == NULL)
> + return 0;
> + return key_hint_uint<is_nullable>(field, key_def);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +key_hint_int(const char *key, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + if (is_nullable && mp_typeof(*key) == MP_NIL)
> + return 0;
> + if (mp_typeof(*key) == MP_UINT) {
> + return key_hint_uint<is_nullable>(key, key_def);
> + } else {
> + assert(key_def->parts->type == FIELD_TYPE_INTEGER);
This assertion should be in the beginning of this function, no?
Please also add assertions checking part->type to other hint functions.
> + assert(mp_typeof(*key) == MP_INT);
> + int64_t val = mp_decode_int(&key);
> + return (uint64_t)val - (uint64_t)INT64_MIN;
> + }
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + const char *field = tuple_field_by_part(tuple, key_def->parts);
> + if (is_nullable && field == NULL)
> + return 0;
> + return key_hint_int<is_nullable>(field, key_def);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +key_hint_string(const char *key, struct key_def *key_def)
> +{
> + (void)key_def;
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + if (is_nullable && mp_typeof(*key) == MP_NIL)
> + return 0;
> + assert(key_def->parts->type == FIELD_TYPE_STRING);
> + assert(mp_typeof(*key) == MP_STR);
> + uint32_t len;
> + const unsigned char *str =
> + (const unsigned char *)mp_decode_str(&key, &len);
> + uint64_t result = 0;
> + /*
> + * Map the sequence of characters in the hint's bytes in
> + * the reverse order: the first character will be mapped
> + * in the highest byte of the number, etc.
> + * One additional bit encodes the presence of the next
> + * sign.
> + * 0bCHARBYTEU CHARBYTEU CHARBYTEU ... CHARBYTE0
> + * The sequence constructed in this way provides a
> + * comparison transitivity of strings no longer than 7
> + * bytes.
Still I don't understand why we need this transformation and why simply
using a sequence of bytes constituting the string isn't enough. Could
you give an example?
> + */
> + uint32_t process_len = MIN(len, 7);
> + for (uint32_t i = 0; i < process_len; i++) {
> + result <<= 9;
> + result |= 0x100;
> + result |= str[i];
> + }
> + /*
> + * If the length of the string is less than the 64-bit
> + * hint can accommodate, the insignificant positions are
> + * filled with 0.
> + */
> + result <<= 9 * (7 - process_len);
> + return result;
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + const char *field = tuple_field_by_part(tuple, key_def->parts);
> + if (is_nullable && field == NULL)
> + return 0;
> + return key_hint_string<is_nullable>(field, key_def);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +key_hint_string_coll(const char *key, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + if (is_nullable && mp_typeof(*key) == MP_NIL)
> + return 0;
> + assert(key_def->parts->type == FIELD_TYPE_STRING);
> + assert(mp_typeof(*key) == MP_STR);
> + assert(key_def->parts->coll != NULL);
> + uint32_t len;
> + const char *str = mp_decode_str(&key, &len);
> + return key_def->parts->coll->hint(str, len, key_def->parts->coll);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + const char *field = tuple_field_by_part(tuple, key_def->parts);
> + if (is_nullable && field == NULL)
> + return 0;
> + return key_hint_string_coll<is_nullable>(field, key_def);
> +}
> +
> +void
> +tuple_hint_set(struct key_def *def)
Should be called key_def_set_hint_func. Please rename.
> +{
> + def->key_hint = key_hint_default;
> + def->tuple_hint = tuple_hint_default;
> + bool is_nullable = key_part_is_nullable(def->parts);
> + if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL &&
> + def->parts->coll->type != COLL_TYPE_BINARY) {
> + def->key_hint = is_nullable ? key_hint_string_coll<true> :
> + key_hint_string_coll<false>;
> + def->tuple_hint = is_nullable ? tuple_hint_string_coll<true> :
> + tuple_hint_string_coll<false>;
> + return;
> + }
> + switch (def->parts->type) {
> + case FIELD_TYPE_UNSIGNED:
> + def->key_hint = is_nullable ? key_hint_uint<true> :
> + key_hint_uint<false>;
> + def->tuple_hint = is_nullable ? tuple_hint_uint<true> :
> + tuple_hint_uint<false>;
> + break;
> + case FIELD_TYPE_INTEGER:
> + def->key_hint = is_nullable ? key_hint_int<true> :
> + key_hint_int<false>;
> + def->tuple_hint = is_nullable ? tuple_hint_int<true> :
> + tuple_hint_int<false>;
> + break;
> + case FIELD_TYPE_STRING:
> + def->key_hint = is_nullable ? key_hint_string<true> :
> + key_hint_string<false>;
> + def->tuple_hint = is_nullable ? tuple_hint_string<true> :
> + tuple_hint_string<false>;
> + break;
> + default:
> + break;
> + };
> +}
More information about the Tarantool-patches
mailing list