[PATCH v6 2/3] memtx: introduce tuple compare hint
Vladimir Davydov
vdavydov.dev at gmail.com
Thu Mar 14 11:19:32 MSK 2019
On Wed, Mar 13, 2019 at 03:15:38PM +0300, Kirill Shcherbatov wrote:
> Implement functions for retrieving tuple hints for a particular
> key_def. Hint is an integer that can be used for tuple comparison
> optimization: if a hint of one tuple is less than a hint of another
> then the first tuple is definitely less than the second; only if
> hints are equal tuple_compare must be called for getting comparison
> result.
>
> Hints are calculated using only the first part of key_def.
>
> Hints are only useful when:
> * they are precalculated and stored along with the tuple;
> calculation is not cheap (cheaper than tuple_compare btw) but
> precalculated hints allow to compare tuples without even fetching
> tuple data.
> * first part of key_def is 'string'(for now)
> * since hint is calculated using only the first part of key_def
> (and only first several characters if it is a string) this part
> must be different with high probability for every tuple pair.
Please refresh the comment. It isn't quite correct.
>
> Closes #3961
> ---
> src/box/key_def.h | 95 +++++++
> src/box/memtx_tree.c | 46 ++-
> src/box/tuple_compare.cc | 584 +++++++++++++++++++++++++++++++++++++--
> src/lib/coll/coll.c | 33 +++
> src/lib/coll/coll.h | 4 +
> 5 files changed, 730 insertions(+), 32 deletions(-)
Please add tests checking that changing an indexed field type from
scalar to a basic type and vice versa without rebuilding the index
doesn't break ordering.
>
> diff --git a/src/box/key_def.h b/src/box/key_def.h
> index dd62f6a35..0d8f3112e 100644
> --- a/src/box/key_def.h
> +++ b/src/box/key_def.h
> @@ -137,6 +137,19 @@ typedef int (*tuple_compare_with_key_t)(const struct tuple *tuple_a,
> typedef int (*tuple_compare_t)(const struct tuple *tuple_a,
> const struct tuple *tuple_b,
> struct key_def *key_def);
> +/** @copydoc tuple_compare_with_key_hinted() */
> +typedef int (*tuple_compare_with_key_hinted_t)(const struct tuple *tuple,
> + uint64_t tuple_hint,
> + const char *key,
> + uint32_t part_count,
> + uint64_t key_hint,
> + struct key_def *key_def);
On the second thought, we might want to reduce the hint size or increase
it in future. Would it be better to introduce a helper type for hints so
that we could easily do this if we need to?
typedef uint64_t tuple_hint_t;
What do you think? Would it be worthwhile? Would it look OK?
Sorry, I asked you to use uint64_t before. Looks to me now that
I was wrong... :-(
> @@ -571,6 +597,51 @@ tuple_compare_with_key(const struct tuple *tuple, const char *key,
> return key_def->tuple_compare_with_key(tuple, key, part_count, key_def);
> }
>
> +/**
> + * Compare tuples using the key definition and comparison hints.
> + * @param tuple_a First tuple.
> + * @param tuple_a_hint Comparison hint is calculated for the
> + * @a tuple_a.
Should be "Comparison hint of @a tuple_a"
Please fix here and everywhere else where appropriate.
> + * @param tuple_b Second tuple.
> + * @param tuple_b_hint Comparison hint is calculated for the
> + * @a tuple_b.
> + * @param key_def Key definition.
> + * @retval 0 if key_fields(tuple_a) == key_fields(tuple_b)
> + * @retval <0 if key_fields(tuple_a) < key_fields(tuple_b)
> + * @retval >0 if key_fields(tuple_a) > key_fields(tuple_b)
> + */
> +static inline int
> +tuple_compare_hinted(const struct tuple *tuple_a, uint64_t tuple_a_hint,
> + const struct tuple *tuple_b, uint64_t tuple_b_hint,
> + struct key_def *key_def)
> +{
> + return key_def->tuple_compare_hinted(tuple_a, tuple_a_hint, tuple_b,
> + tuple_b_hint, key_def);
> +}
> +
> +/**
> + * Compare tuple with key using the key definition and
> + * comparison hints.
> + * @param tuple tuple
> + * @param tuple_hint Comparison hint is calculated for @a tuple.
> + * @param key key parts without MessagePack array header
> + * @param part_count the number of parts in @a key
This is nitpicking, of course, but please be consistent. If you put dot
(.) at the end of a sentence, put it everywhere.
> + * @param key_hint t Comparison key kent is calculated for @a key.
Comparison hint of @a key.
> + * @param key_def key definition
> + * @retval 0 if key_fields(tuple) == parts(key)
> + * @retval <0 if key_fields(tuple) < parts(key)
> + * @retval >0 if key_fields(tuple) > parts(key)
> + */
> +static inline int
> +tuple_compare_with_key_hinted(const struct tuple *tuple, uint64_t tuple_hint,
> + const char *key, uint32_t part_count,
> + uint64_t key_hint, struct key_def *key_def)
> +{
> + return key_def->tuple_compare_with_key_hinted(tuple, tuple_hint, key,
> + part_count, key_hint,
> + key_def);
> +}
> +
> /**
> * Compute hash of a tuple field.
> * @param ph1 - pointer to running hash
> @@ -624,6 +695,30 @@ key_hash(const char *key, struct key_def *key_def)
> return key_def->key_hash(key, key_def);
> }
>
> + /*
> + * Get a comparison hint for a @a tuple.
The article before 'tuple' ('a') is redundant.
> + * @param tuple - tuple to get uint64_t of.
tuple to compute the hint for.
> + * @param key_def - key_def that defines which comparison is used.
key definition used for tuple comparison.
> + * @return the comparison auxiliary information.
the hint.
> + */
> +static inline uint64_t
> +tuple_hint(const struct tuple *tuple, struct key_def *key_def)
> +{
> + return key_def->tuple_hint(tuple, key_def);
> +}
> +
> +/**
> + * Get a comparison hint of a @a key.
> + * @param key - key to get hint of.
> + * @param key_def - key_def that defines which comparison is used.
> + * @return the comparison auxiliary information.
Ditto.
> + */
> +static inline uint64_t
> +key_hint(const char *key, struct key_def *key_def)
> +{
> + return key_def->key_hint(key, key_def);
> +}
> +
> #if defined(__cplusplus)
> } /* extern "C" */
> #endif /* defined(__cplusplus) */
> diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
> index d5b42efb6..2c1933ceb 100644
> --- a/src/box/memtx_tree.c
> +++ b/src/box/memtx_tree.c
> @@ -47,6 +47,11 @@ struct memtx_tree_key_data {
> const char *key;
> /** Number of msgpacked search fields. */
> uint32_t part_count;
> + /**
> + * Comparison hint is calculated with
> + * key_hint(key, ...).
> + */
/** Comparison hint, see tuple_hint(). */
> + uint64_t hint;
> };
>
> /**
> @@ -55,6 +60,11 @@ struct memtx_tree_key_data {
> struct memtx_tree_data {
> /* Tuple that this node is represents. */
> struct tuple *tuple;
> + /**
> + * Comparison hint is calculated with
> + * tuple_hint(tuple, ...).
> + */
/** Comparison hint, see key_hint(). */
> + uint64_t hint;
> };
>
> /**
> @@ -69,16 +79,18 @@ static bool
> memtx_tree_data_identical(const struct memtx_tree_data *a,
> const struct memtx_tree_data *b)
> {
> - return a->tuple == b->tuple;
> + return a->tuple == b->tuple && a->hint == b->hint;
> }
>
> #define BPS_TREE_NAME memtx_tree
> #define BPS_TREE_BLOCK_SIZE (512)
> #define BPS_TREE_EXTENT_SIZE MEMTX_EXTENT_SIZE
> #define BPS_TREE_COMPARE(a, b, arg)\
> - tuple_compare((&a)->tuple, (&b)->tuple, arg)
> + tuple_compare_hinted((&a)->tuple, (&a)->hint, (&b)->tuple,\
> + (&b)->hint, arg)
Hmm, (&a)->tuple is equivalent to (a).tuple, isn't it?
> @@ -516,9 +533,11 @@ memtx_tree_index_get(struct index *base, const char *key,
> assert(base->def->opts.is_unique &&
> part_count == base->def->key_def->part_count);
> struct memtx_tree_index *index = (struct memtx_tree_index *)base;
> + struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
> struct memtx_tree_key_data key_data;
> key_data.key = key;
> key_data.part_count = part_count;
> + key_data.hint = key_hint(key, cmp_def);
> struct memtx_tree_data *res = memtx_tree_find(&index->tree, &key_data);
> *result = res != NULL ? res->tuple : NULL;
> return 0;
> @@ -530,9 +549,11 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
> struct tuple **result)
> {
> struct memtx_tree_index *index = (struct memtx_tree_index *)base;
> + struct key_def *key_def = index->tree.arg;
Accessing index->tree.arg looks ugly - it violates encapsulation.
Besides, right above you use index_def_cmp_def instead. I think we
better introduce a helper function for accessing index->tree.arg
and use it everywhere:
memtx_tree_cmp_def(&index->tree)
> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
> index 1bcff2ca3..86922c9bb 100644
> --- a/src/box/tuple_compare.cc
> +++ b/src/box/tuple_compare.cc
> @@ -34,6 +34,383 @@
> #include "trivia/util.h" /* NOINLINE */
> #include <math.h>
>
> +/**
> + * Tuple hints
If you agree to add tuple_hint_t, then please move this comment to
the type declaration. Don't forget to update it when you introduce
multikey indexes.
> + *
> + * Hint is a value h(t) is calculated for tuple in terms
> + * of particular key_def that has the following rules:
Tuple comparison hint h(t) is such a function of tuple t that
the following conditions always hold for any pair of tuples
t1 and t2:
> + * if h(t1) < h(t2) then t1 < t2;
> + * if h(t1) > h(t2) then t1 > t2;
> + * if t1 == t2 then regular comparision is required;
if h(t1) == h(t2) then t1 may or may not be equal to t2.
> + * These rules mean that instead of direct tuple vs tuple
> + * (or tuple vs key) comparison one may compare theirs
> + * hints first; and only if theirs hints equal compare
> + * the tuples themselves.
> + *
> + * The comparison hint has the following structure:
> + * uint64_t: [ CMP_HINT_TYPE | CMP_HINT_VALUE ]
> + * 64 62 0 bit
> + *
> + * The reserved value HINT_INVALID is returned when hint is
> + * undefined.
> + */
> +#define HINT_MAX (((int64_t)1 << 61) - 1)
> +#define HINT_MIN (-HINT_MAX - 1)
> +#define HINT_UMAX (((uint64_t)1 << 62) - 1)
What's HINT_UMAX? Please remove if possible.
> +
> +#define HINT_INVALID UINT64_MAX
HINT_NONE would be a better name IMO, because it would stress on
the fact that the hint is absent.
> +
> +#define HINT_TYPE_NUMBER ((uint8_t)1)
> +#define HINT_TYPE_STRING ((uint8_t)2)
> +#define HINT_TYPE_BOOL ((uint8_t)3)
> +
> +#define HINT_TYPE(hint) ((uint64_t)hint >> 62)
> +#define HINT_VALUE(hint) ((uint64_t)hint & (((uint64_t)1 << 62) - 1))
Using 62 directly is error-prone (in case we decide to change it).
Please define constants for all numbers you use.
> +#define HINT(type, value) ({ \
> + assert(HINT_TYPE(value) == 0); \
> + (((uint64_t)type << 62) | (uint64_t)value);})
Would be better to make these helpers static inline functions.
> +
> +/**
> + * Perform hints comparison.
> + * Returns 1 when hint_a > hint_b, -1 when hint_b > hint_a;
> + * If the hints are incompatible or equal, a value of 0 is
> + * returned, and the further comparison of the original data is
> + * required.
> + */
> +static int
> +hint_cmp(uint64_t hint_a, uint64_t hint_b)
> +{
> + if (hint_a != HINT_INVALID && hint_b != HINT_INVALID &&
> + HINT_TYPE(hint_a) == HINT_TYPE(hint_b) &&
> + HINT_VALUE(hint_a) != HINT_VALUE(hint_b))
> + return hint_a < hint_b ? -1 : 1;
Hints must be defined in such a way that we don't need to compare types,
i.e. hint(123) < hint("abc").
I suggest you to take a look at mp_class. May be, we could reuse it for
hint types?
> + else
> + return 0;
> +}
> +
> +template<bool is_optional, bool is_nullable>
> +static uint64_t
> +key_hint_uint(const char *key, struct key_def *key_def)
Please use unsigned/integer for these functions so as not to confuse
them with MP_INT/UINT.
> +{
> + (void)key_def;
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->type == FIELD_TYPE_UNSIGNED);
> + uint64_t ret;
> + if ((is_optional && key == NULL) ||
> + (is_nullable && mp_typeof(*key) == MP_NIL))
> + return HINT(HINT_TYPE_NUMBER, 0);
> + assert(mp_typeof(*key) == MP_UINT);
> + uint64_t val = mp_decode_uint(&key);
> + ret = val > HINT_MAX ? HINT_UMAX : val - (uint64_t)HINT_MIN;
> + return HINT(HINT_TYPE_NUMBER, ret);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->type == FIELD_TYPE_UNSIGNED);
> + const char *field = tuple_field_by_part(tuple, key_def->parts);
> + if (is_nullable && field == NULL)
> + return HINT(HINT_TYPE_NUMBER, 0);
> + return key_hint_uint<false, is_nullable>(field, key_def);
> +}
Although I didn't complain about the way you define hint functions
before, I've always had an inkling that something is wrong, that calling
key_hint from tuple_hint looks weird. The 'is_optional' template
argument you added recently has only strengthened my suspicions: it
looks confusing, because in fact it doesn't have anything to do with
optional parts we use in tuple_compare templates.
I propose the following refactoring. Let's introduce field_hint_*
functions (not using templates) that would take only binary data
of a particular type and return a hint, e.g.
tuple_hint_t
field_hint_unsigned(const char *field) {
assert(mp_typeof(*field) == MP_UINT);
...
}
Then add two parametrised functions for computing a hint for a key and
a tuple that would look like this:
template<enum field_type type, ...>
tuple_hint(const struct tuple *tuple, struct key_def *key_def) {
switch (type) {
case FIELD_TYPE_UNSIGNED:
return field_hint_unsigned(...);
case FIELD_TYPE_INTEGER:
return field_hint_integer(...);
...
}
}
template<enum field_type type, ...>
key_hint(...) {
switch (type) {
case FIELD_TYPE_UNSIGNED:
return field_hint_unsigned(...);
case FIELD_TYPE_INTEGER:
return field_hint_integer(...);
...
}
}
This way all the logic computing hints would be consolidated in
field_hint_* family of functions which would be simple and wouldn't
even take key_def. This would also allow you to get rid of that ugly
switch-case in key_def_set_hint_func.
> +
> +template<bool is_optional, bool is_nullable>
> +static uint64_t
> +key_hint_int(const char *key, struct key_def *key_def)
> +{
> + (void)key_def;
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->type == FIELD_TYPE_INTEGER);
> + uint64_t ret;
> + if ((is_optional && key == NULL) ||
> + (is_nullable && mp_typeof(*key) == MP_NIL))
> + return HINT(HINT_TYPE_NUMBER, 0);
> + if (mp_typeof(*key) == MP_UINT) {
> + uint64_t val = mp_decode_uint(&key);
> + ret = val > HINT_MAX ? HINT_UMAX : val - (uint64_t)HINT_MIN;
> + } else {
> + assert(mp_typeof(*key) == MP_INT);
> + int64_t val = mp_decode_int(&key);
> + ret = val > HINT_MAX ? HINT_UMAX :
> + val < HINT_MIN ? 0 :
> + (uint64_t)val - (uint64_t)HINT_MIN;
> + }
> + return HINT(HINT_TYPE_NUMBER, ret);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_int(const struct tuple *tuple, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->type == FIELD_TYPE_INTEGER);
> + const char *field = tuple_field_by_part(tuple, key_def->parts);
> + if (is_nullable && field == NULL)
> + return HINT(HINT_TYPE_NUMBER, 0);
> + return key_hint_int<false, is_nullable>(field, key_def);
> +}
> +
> +template<bool is_optional, bool is_nullable>
> +static uint64_t
> +key_hint_number(const char *key, struct key_def *key_def)
> +{
> + (void)key_def;
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->type == FIELD_TYPE_NUMBER ||
> + key_def->parts->type == FIELD_TYPE_SCALAR);
> + if ((is_optional && key == NULL) ||
> + (is_nullable && mp_typeof(*key) == MP_NIL))
> + return HINT(HINT_TYPE_NUMBER, 0);
> + uint64_t ret;
> + switch (mp_typeof(*key)) {
> + case MP_FLOAT:
> + case MP_DOUBLE: {
> + double f = mp_typeof(*key) == MP_FLOAT ?
> + mp_decode_float(&key) : mp_decode_double(&key);
> + if (!isfinite(f))
> + return HINT_INVALID;
> + double val;
> + (void)modf(f, &val);
> + ret = val > (double)HINT_MAX ? HINT_UMAX :
> + val < (double)HINT_MIN ? 0 :
As I told you before, comparing doulbe with int64_t like this is
incorrect:
#include <stdio.h>
#include <stdint.h>
int main()
{
int64_t x = (1LL << 61) - 1;
double y = x;
printf("x is %lld (int64_t)\n", x);
printf("y is %lf (double)\n", y);
printf("y <= (double)x is %s\n",
y <= (double)x ? "true" : "false");
printf("(int64_t)y is %lld\n", (int64_t)y);
}
prints
x is 2305843009213693951 (int64_t)
y is 2305843009213693952.000000 (double)
y <= (double)x is true
(int64_t)y is 2305843009213693952
Please fix and don't forget to add a test case.
> + (uint64_t)val - (uint64_t)HINT_MIN;
> + break;
> + }
> + case MP_INT: {
> + int64_t val = (uint64_t)mp_decode_int(&key);
> + ret = val > HINT_MAX ? HINT_UMAX :
> + val < HINT_MIN ? 0 :
> + (uint64_t)val - (uint64_t)HINT_MIN;
> + break;
> + }
> + case MP_UINT: {
> + uint64_t val = mp_decode_uint(&key);
> + ret = val > HINT_MAX ? HINT_UMAX : val - (uint64_t)HINT_MIN;
> + break;
> + }
> + default:
> + unreachable();
> + }
> + return HINT(HINT_TYPE_NUMBER, ret);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_number(const struct tuple *tuple, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->type == FIELD_TYPE_NUMBER);
> + const char *field = tuple_field_by_part(tuple, key_def->parts);
> + if (is_nullable && field == NULL)
> + return HINT(HINT_TYPE_NUMBER, 0);
> + return key_hint_number<false, is_nullable>(field, key_def);
> +}
> +
> +template<bool is_optional, bool is_nullable>
> +static uint64_t
> +key_hint_boolean(const char *key, struct key_def *key_def)
> +{
> + (void)key_def;
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->type == FIELD_TYPE_BOOLEAN ||
> + key_def->parts->type == FIELD_TYPE_SCALAR);
> + if ((is_optional && key == NULL) ||
> + (is_nullable && mp_typeof(*key) == MP_NIL))
> + return HINT(HINT_TYPE_BOOL, 0);
> + return (uint64_t)mp_decode_bool(&key);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_boolean(const struct tuple *tuple, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->type == FIELD_TYPE_BOOLEAN);
> + const char *field = tuple_field_by_part(tuple, key_def->parts);
> + if (is_nullable && field == NULL)
> + return HINT(HINT_TYPE_BOOL, 0);
> + return key_hint_boolean<false, is_nullable>(field, key_def);
> +}
> +
> +template<bool is_optional, bool is_nullable>
> +static uint64_t
> +key_hint_string(const char *key, struct key_def *key_def)
> +{
> + (void)key_def;
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->coll == NULL);
> + assert(key_def->parts->type == FIELD_TYPE_STRING ||
> + key_def->parts->type == FIELD_TYPE_SCALAR);
> + if ((is_optional && key == NULL) ||
> + (is_nullable && mp_typeof(*key) == MP_NIL))
> + return HINT(HINT_TYPE_STRING, 0);
> + assert(mp_typeof(*key) == MP_STR);
> + uint32_t len;
> + const unsigned char *str =
> + (const unsigned char *)mp_decode_str(&key, &len);
> + uint64_t result = 0;
> + uint32_t process_len = MIN(len, 7);
> + for (uint32_t i = 0; i < process_len; i++) {
> + result <<= 8;
> + result |= str[i];
> + }
> + result <<= 8 * (7 - process_len);
Please define an appropriate constant for 7.
> + return HINT(HINT_TYPE_STRING, result);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_string(const struct tuple *tuple, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->coll == NULL);
> + assert(key_def->parts->type == FIELD_TYPE_STRING);
> + const char *field = tuple_field_by_part(tuple, key_def->parts);
> + if (is_nullable && field == NULL)
> + return HINT(HINT_TYPE_STRING, 0);
> + return key_hint_string<false, is_nullable>(field, key_def);
> +}
> +
> +template<bool is_optional, bool is_nullable>
> +static uint64_t
> +key_hint_string_coll(const char *key, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert((key_def->parts->type == FIELD_TYPE_STRING ||
> + key_def->parts->type == FIELD_TYPE_SCALAR) &&
> + key_def->parts->coll != NULL);
> + if ((is_optional && key == NULL) ||
> + (is_nullable && mp_typeof(*key) == MP_NIL))
> + return HINT(HINT_TYPE_STRING, 0);
> + assert(mp_typeof(*key) == MP_STR);
> + uint32_t len;
> + const char *str = mp_decode_str(&key, &len);
> + return key_def->parts->coll->hint(str, len, key_def->parts->coll);
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->type == FIELD_TYPE_STRING &&
> + key_def->parts->coll != NULL);
> + const char *field = tuple_field_by_part(tuple, key_def->parts);
> + if (is_nullable && field == NULL)
> + return HINT(HINT_TYPE_STRING, 0);
> + return key_hint_string_coll<false, is_nullable>(field, key_def);
> +}
> +
> +template<bool is_optional, bool is_nullable>
> +static uint64_t
> +key_hint_scalar(const char *key, struct key_def *key_def)
> +{
> + (void)key_def;
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->type == FIELD_TYPE_SCALAR);
> + if ((is_optional && key == NULL) ||
> + (is_nullable && mp_typeof(*key) == MP_NIL))
> + return HINT(HINT_TYPE_BOOL, 0);
> + switch (mp_typeof(*key)) {
> + case MP_INT:
> + case MP_UINT:
> + case MP_FLOAT:
> + case MP_DOUBLE:
> + return key_hint_number<is_optional, is_nullable>(key, key_def);
> + case MP_BOOL:
> + return key_hint_boolean<is_optional, is_nullable>(key, key_def);
> + case MP_STR:
> + if (key_def->parts->coll == NULL)
> + return key_hint_string<is_optional,
> + is_nullable>(key, key_def);
> + else
> + return key_hint_string_coll<is_optional,
> + is_nullable>(key, key_def);
Please make sure this if() is resolved at compile time. Since we've
chosen the path of using templates for optimization, we should stick
to it till the end.
> + default:
> + return HINT_INVALID;
Should be unreachable(). BTW you forgot MP_BIN.
> + }
> +}
> +
> +template<bool is_nullable>
> +static uint64_t
> +tuple_hint_scalar(const struct tuple *tuple, struct key_def *key_def)
> +{
> + assert(key_part_is_nullable(key_def->parts) == is_nullable);
> + assert(key_def->parts->type == FIELD_TYPE_SCALAR);
> + const char *field = tuple_field_by_part(tuple, key_def->parts);
> + if (is_nullable && field == NULL)
> + return HINT(HINT_TYPE_BOOL, 0);
> + return key_hint_scalar<false, is_nullable>(field, key_def);
> +}
> +
> +void
> +key_def_set_hint_func(struct key_def *def)
> +{
> + def->key_hint = NULL;
> + def->tuple_hint = NULL;
> + bool is_nullable = key_part_is_nullable(def->parts);
> + switch (def->parts->type) {
> + case FIELD_TYPE_UNSIGNED: {
> + def->key_hint = is_nullable ? key_hint_uint<true, true> :
> + key_hint_uint<true, false>;
> + def->tuple_hint = is_nullable ? tuple_hint_uint<true> :
> + tuple_hint_uint<false>;
> + break;
> + }
> + case FIELD_TYPE_INTEGER: {
> + def->key_hint = is_nullable ? key_hint_int<true, true> :
> + key_hint_int<true, false>;
> + def->tuple_hint = is_nullable ? tuple_hint_int<true> :
> + tuple_hint_int<false>;
> + break;
> + }
> + case FIELD_TYPE_STRING: {
> + if (def->parts->coll != NULL) {
> + def->key_hint =
> + is_nullable ? key_hint_string_coll<true, true> :
> + key_hint_string_coll<true, false>;
> + def->tuple_hint =
> + is_nullable ? tuple_hint_string_coll<true> :
> + tuple_hint_string_coll<false>;
> + } else {
> + def->key_hint =
> + is_nullable ? key_hint_string<true, true> :
> + key_hint_string<true, false>;
> + def->tuple_hint = is_nullable ? tuple_hint_string<true> :
> + tuple_hint_string<false>;
> + }
> + break;
> + }
> + case FIELD_TYPE_NUMBER: {
> + def->key_hint = is_nullable ? key_hint_number<true, true> :
> + key_hint_number<true, false>;
> + def->tuple_hint = is_nullable ? tuple_hint_number<true> :
> + tuple_hint_number<false>;
> + break;
> + }
> + case FIELD_TYPE_BOOLEAN: {
> + def->key_hint = is_nullable ? key_hint_boolean<true, true> :
> + key_hint_boolean<true, false>;
> + def->tuple_hint = is_nullable ? tuple_hint_boolean<true> :
> + tuple_hint_boolean<false>;
> + break;
> + }
> + case FIELD_TYPE_SCALAR: {
> + def->key_hint = is_nullable ? key_hint_scalar<true, true> :
> + key_hint_scalar<true, false>;
> + def->tuple_hint = is_nullable ? tuple_hint_scalar<true> :
> + tuple_hint_scalar<false>;
> + break;
> + }
> + default: {
> + break;
> + }
> + }
> +}
> +
> /* {{{ tuple_compare */
>
> /*
> @@ -792,9 +1210,13 @@ key_compare(const char *key_a, const char *key_b, struct key_def *key_def)
>
> template <bool is_nullable, bool has_optional_parts>
> static int
> -tuple_compare_sequential(const struct tuple *tuple_a,
> - const struct tuple *tuple_b, key_def *key_def)
> +tuple_compare_sequential_hinted(const struct tuple *tuple_a,
> + uint64_t tuple_a_hint, const struct tuple *tuple_b,
> + uint64_t tuple_b_hint, key_def *key_def)
> {
> + int rc = hint_cmp(tuple_a_hint, tuple_b_hint);
> + if (rc != 0)
> + return rc;
This should go after all the assertions below.
> assert(!has_optional_parts || is_nullable);
> assert(has_optional_parts == key_def->has_optional_parts);
> assert(key_def_is_sequential(key_def));
> diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
> index afc15e809..3bd7c44f5 100644
> --- a/src/lib/coll/coll.c
> +++ b/src/lib/coll/coll.c
> @@ -133,6 +133,37 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
> return s_len;
> }
>
> +/** Get a compare hint of a binary collation. */
> +static uint64_t
> +coll_bin_hint(const char *s, size_t s_len, struct coll *coll)
> +{
> + (void)coll;
> + uint64_t result = 0;
> + uint32_t process_len = MIN(s_len, 7);
> + for (uint32_t i = 0; i < process_len; i++) {
> + result <<= 8;
> + result |= ((unsigned char *)s)[i];
> + }
> + result <<= 8 * (7 - process_len);
I don't like that you copy-n-paste string hint computation logic here
and in tuple_compare.cc. I think that it should be consolidated in
tuple_compare.cc while coll->hint should return a hint in some raw
format. I guess it could be just a wrapper around ucol_nextSortKeyPart.
May be, we should even pick another name for it.
> + return result;
> +}
> +
> +/** Get a compare hint of a string using ICU collation. */
> +static uint64_t
> +coll_icu_hint(const char *s, size_t s_len, struct coll *coll)
> +{
> + assert(coll->type == COLL_TYPE_ICU);
> + UCharIterator itr;
> + uiter_setUTF8(&itr, s, s_len);
> + uint8_t buf[7];
> + uint32_t state[2] = {0, 0};
> + UErrorCode status = U_ZERO_ERROR;
> + int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf,
> + sizeof(buf), &status);
> + assert(got >= 0 && got <= 7);
> + return coll_bin_hint((const char *)buf, got, NULL);
> +}
More information about the Tarantool-patches
mailing list