[PATCH v6 2/3] memtx: introduce tuple compare hint
Kirill Shcherbatov
kshcherbatov at tarantool.org
Wed Mar 13 15:15:38 MSK 2019
Implement functions for retrieving tuple hints for a particular
key_def. Hint is an integer that can be used for tuple comparison
optimization: if a hint of one tuple is less than a hint of another
then the first tuple is definitely less than the second; only if
hints are equal tuple_compare must be called for getting comparison
result.
Hints are calculated using only the first part of key_def.
Hints are only useful when:
* they are precalculated and stored along with the tuple;
calculation is not cheap (cheaper than tuple_compare btw) but
precalculated hints allow to compare tuples without even fetching
tuple data.
* first part of key_def is 'string'(for now)
* since hint is calculated using only the first part of key_def
(and only first several characters if it is a string) this part
must be different with high probability for every tuple pair.
Closes #3961
---
src/box/key_def.h | 95 +++++++
src/box/memtx_tree.c | 46 ++-
src/box/tuple_compare.cc | 584 +++++++++++++++++++++++++++++++++++++--
src/lib/coll/coll.c | 33 +++
src/lib/coll/coll.h | 4 +
5 files changed, 730 insertions(+), 32 deletions(-)
diff --git a/src/box/key_def.h b/src/box/key_def.h
index dd62f6a35..0d8f3112e 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -137,6 +137,19 @@ typedef int (*tuple_compare_with_key_t)(const struct tuple *tuple_a,
typedef int (*tuple_compare_t)(const struct tuple *tuple_a,
const struct tuple *tuple_b,
struct key_def *key_def);
+/** @copydoc tuple_compare_with_key_hinted() */
+typedef int (*tuple_compare_with_key_hinted_t)(const struct tuple *tuple,
+ uint64_t tuple_hint,
+ const char *key,
+ uint32_t part_count,
+ uint64_t key_hint,
+ struct key_def *key_def);
+/** @copydoc tuple_compare_hinted() */
+typedef int (*tuple_compare_hinted_t)(const struct tuple *tuple_a,
+ uint64_t tuple_a_hint,
+ const struct tuple *tuple_b,
+ uint64_t tuple_b_hint,
+ struct key_def *key_def);
/** @copydoc tuple_extract_key() */
typedef char *(*tuple_extract_key_t)(const struct tuple *tuple,
struct key_def *key_def,
@@ -152,6 +165,11 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple,
/** @copydoc key_hash() */
typedef uint32_t (*key_hash_t)(const char *key,
struct key_def *key_def);
+/** @copydoc tuple_hint() */
+typedef uint64_t (*tuple_hint_t)(const struct tuple *tuple,
+ struct key_def *key_def);
+/** @copydoc key_hint() */
+typedef uint64_t (*key_hint_t)(const char *key, struct key_def *key_def);
/* Definition of a multipart key. */
struct key_def {
@@ -159,6 +177,10 @@ struct key_def {
tuple_compare_t tuple_compare;
/** @see tuple_compare_with_key() */
tuple_compare_with_key_t tuple_compare_with_key;
+ /** @see tuple_compare_with_key_hinted() */
+ tuple_compare_with_key_hinted_t tuple_compare_with_key_hinted;
+ /** @see tuple_compare_hinted() */
+ tuple_compare_hinted_t tuple_compare_hinted;
/** @see tuple_extract_key() */
tuple_extract_key_t tuple_extract_key;
/** @see tuple_extract_key_raw() */
@@ -167,6 +189,10 @@ struct key_def {
tuple_hash_t tuple_hash;
/** @see key_hash() */
key_hash_t key_hash;
+ /** @see tuple_hint() */
+ tuple_hint_t tuple_hint;
+ /** @see key_hint() */
+ key_hint_t key_hint;
/**
* Minimal part count which always is unique. For example,
* if a secondary index is unique, then
@@ -571,6 +597,51 @@ tuple_compare_with_key(const struct tuple *tuple, const char *key,
return key_def->tuple_compare_with_key(tuple, key, part_count, key_def);
}
+/**
+ * Compare tuples using the key definition and comparison hints.
+ * @param tuple_a First tuple.
+ * @param tuple_a_hint Comparison hint is calculated for the
+ * @a tuple_a.
+ * @param tuple_b Second tuple.
+ * @param tuple_b_hint Comparison hint is calculated for the
+ * @a tuple_b.
+ * @param key_def Key definition.
+ * @retval 0 if key_fields(tuple_a) == key_fields(tuple_b)
+ * @retval <0 if key_fields(tuple_a) < key_fields(tuple_b)
+ * @retval >0 if key_fields(tuple_a) > key_fields(tuple_b)
+ */
+static inline int
+tuple_compare_hinted(const struct tuple *tuple_a, uint64_t tuple_a_hint,
+ const struct tuple *tuple_b, uint64_t tuple_b_hint,
+ struct key_def *key_def)
+{
+ return key_def->tuple_compare_hinted(tuple_a, tuple_a_hint, tuple_b,
+ tuple_b_hint, key_def);
+}
+
+/**
+ * Compare tuple with key using the key definition and
+ * comparison hints.
+ * @param tuple tuple
+ * @param tuple_hint Comparison hint is calculated for @a tuple.
+ * @param key key parts without MessagePack array header
+ * @param part_count the number of parts in @a key
+ * @param key_hint t Comparison key kent is calculated for @a key.
+ * @param key_def key definition
+ * @retval 0 if key_fields(tuple) == parts(key)
+ * @retval <0 if key_fields(tuple) < parts(key)
+ * @retval >0 if key_fields(tuple) > parts(key)
+ */
+static inline int
+tuple_compare_with_key_hinted(const struct tuple *tuple, uint64_t tuple_hint,
+ const char *key, uint32_t part_count,
+ uint64_t key_hint, struct key_def *key_def)
+{
+ return key_def->tuple_compare_with_key_hinted(tuple, tuple_hint, key,
+ part_count, key_hint,
+ key_def);
+}
+
/**
* Compute hash of a tuple field.
* @param ph1 - pointer to running hash
@@ -624,6 +695,30 @@ key_hash(const char *key, struct key_def *key_def)
return key_def->key_hash(key, key_def);
}
+ /*
+ * Get a comparison hint for a @a tuple.
+ * @param tuple - tuple to get uint64_t of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the comparison auxiliary information.
+ */
+static inline uint64_t
+tuple_hint(const struct tuple *tuple, struct key_def *key_def)
+{
+ return key_def->tuple_hint(tuple, key_def);
+}
+
+/**
+ * Get a comparison hint of a @a key.
+ * @param key - key to get hint of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the comparison auxiliary information.
+ */
+static inline uint64_t
+key_hint(const char *key, struct key_def *key_def)
+{
+ return key_def->key_hint(key, key_def);
+}
+
#if defined(__cplusplus)
} /* extern "C" */
#endif /* defined(__cplusplus) */
diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index d5b42efb6..2c1933ceb 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -47,6 +47,11 @@ struct memtx_tree_key_data {
const char *key;
/** Number of msgpacked search fields. */
uint32_t part_count;
+ /**
+ * Comparison hint is calculated with
+ * key_hint(key, ...).
+ */
+ uint64_t hint;
};
/**
@@ -55,6 +60,11 @@ struct memtx_tree_key_data {
struct memtx_tree_data {
/* Tuple that this node is represents. */
struct tuple *tuple;
+ /**
+ * Comparison hint is calculated with
+ * tuple_hint(tuple, ...).
+ */
+ uint64_t hint;
};
/**
@@ -69,16 +79,18 @@ static bool
memtx_tree_data_identical(const struct memtx_tree_data *a,
const struct memtx_tree_data *b)
{
- return a->tuple == b->tuple;
+ return a->tuple == b->tuple && a->hint == b->hint;
}
#define BPS_TREE_NAME memtx_tree
#define BPS_TREE_BLOCK_SIZE (512)
#define BPS_TREE_EXTENT_SIZE MEMTX_EXTENT_SIZE
#define BPS_TREE_COMPARE(a, b, arg)\
- tuple_compare((&a)->tuple, (&b)->tuple, arg)
+ tuple_compare_hinted((&a)->tuple, (&a)->hint, (&b)->tuple,\
+ (&b)->hint, arg)
#define BPS_TREE_COMPARE_KEY(a, b, arg)\
- tuple_compare_with_key((&a)->tuple, (b)->key, (b)->part_count, arg)
+ tuple_compare_with_key_hinted((&a)->tuple, (&a)->hint, (b)->key,\
+ (b)->part_count, (b)->hint, arg)
#define BPS_TREE_IDENTICAL(a, b) memtx_tree_data_identical(&a, &b)
#define bps_tree_elem_t struct memtx_tree_data
#define bps_tree_key_t struct memtx_tree_key_data *
@@ -113,7 +125,8 @@ memtx_tree_qcompare(const void* a, const void *b, void *c)
const struct memtx_tree_data *data_a = a;
const struct memtx_tree_data *data_b = b;
struct key_def *key_def = c;
- return tuple_compare(data_a->tuple, data_b->tuple, key_def);
+ return tuple_compare_hinted(data_a->tuple, data_a->hint, data_b->tuple,
+ data_b->hint, key_def);
}
/* {{{ MemtxTree Iterators ****************************************/
@@ -235,9 +248,11 @@ tree_iterator_next_equal(struct iterator *iterator, struct tuple **ret)
memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
/* Use user key def to save a few loops. */
if (res == NULL ||
- tuple_compare_with_key(res->tuple, it->key_data.key,
- it->key_data.part_count,
- it->index_def->key_def) != 0) {
+ tuple_compare_with_key_hinted(res->tuple, res->hint,
+ it->key_data.key,
+ it->key_data.part_count,
+ it->key_data.hint,
+ it->index_def->key_def) != 0) {
iterator->next = tree_iterator_dummie;
it->current.tuple = NULL;
*ret = NULL;
@@ -266,9 +281,11 @@ tree_iterator_prev_equal(struct iterator *iterator, struct tuple **ret)
memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
/* Use user key def to save a few loops. */
if (res == NULL ||
- tuple_compare_with_key(res->tuple, it->key_data.key,
- it->key_data.part_count,
- it->index_def->key_def) != 0) {
+ tuple_compare_with_key_hinted(res->tuple, res->hint,
+ it->key_data.key,
+ it->key_data.part_count,
+ it->key_data.hint,
+ it->index_def->key_def) != 0) {
iterator->next = tree_iterator_dummie;
it->current.tuple = NULL;
*ret = NULL;
@@ -516,9 +533,11 @@ memtx_tree_index_get(struct index *base, const char *key,
assert(base->def->opts.is_unique &&
part_count == base->def->key_def->part_count);
struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+ struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
struct memtx_tree_key_data key_data;
key_data.key = key;
key_data.part_count = part_count;
+ key_data.hint = key_hint(key, cmp_def);
struct memtx_tree_data *res = memtx_tree_find(&index->tree, &key_data);
*result = res != NULL ? res->tuple : NULL;
return 0;
@@ -530,9 +549,11 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
struct tuple **result)
{
struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+ struct key_def *key_def = index->tree.arg;
if (new_tuple) {
struct memtx_tree_data new_data;
new_data.tuple = new_tuple;
+ new_data.hint = tuple_hint(new_tuple, key_def);
struct memtx_tree_data dup_data;
dup_data.tuple = NULL;
@@ -565,6 +586,7 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
if (old_tuple) {
struct memtx_tree_data old_data;
old_data.tuple = old_tuple;
+ old_data.hint = tuple_hint(old_tuple, key_def);
memtx_tree_delete(&index->tree, old_data);
}
*result = old_tuple;
@@ -607,6 +629,8 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
it->type = type;
it->key_data.key = key;
it->key_data.part_count = part_count;
+ struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
+ it->key_data.hint = key_hint(key, cmp_def);
it->index_def = base->def;
it->tree = &index->tree;
it->tree_iterator = memtx_tree_invalid_iterator();
@@ -671,6 +695,8 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
struct memtx_tree_data *elem =
&index->build_array[index->build_array_size++];
elem->tuple = tuple;
+ struct key_def *key_def = index->tree.arg;
+ elem->hint = tuple_hint(tuple, key_def);
return 0;
}
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index 1bcff2ca3..86922c9bb 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -34,6 +34,383 @@
#include "trivia/util.h" /* NOINLINE */
#include <math.h>
+/**
+ * Tuple hints
+ *
+ * Hint is a value h(t) is calculated for tuple in terms
+ * of particular key_def that has the following rules:
+ * if h(t1) < h(t2) then t1 < t2;
+ * if h(t1) > h(t2) then t1 > t2;
+ * if t1 == t2 then regular comparision is required;
+ * These rules mean that instead of direct tuple vs tuple
+ * (or tuple vs key) comparison one may compare theirs
+ * hints first; and only if theirs hints equal compare
+ * the tuples themselves.
+ *
+ * The comparison hint has the following structure:
+ * uint64_t: [ CMP_HINT_TYPE | CMP_HINT_VALUE ]
+ * 64 62 0 bit
+ *
+ * The reserved value HINT_INVALID is returned when hint is
+ * undefined.
+ */
+#define HINT_MAX (((int64_t)1 << 61) - 1)
+#define HINT_MIN (-HINT_MAX - 1)
+#define HINT_UMAX (((uint64_t)1 << 62) - 1)
+
+#define HINT_INVALID UINT64_MAX
+
+#define HINT_TYPE_NUMBER ((uint8_t)1)
+#define HINT_TYPE_STRING ((uint8_t)2)
+#define HINT_TYPE_BOOL ((uint8_t)3)
+
+#define HINT_TYPE(hint) ((uint64_t)hint >> 62)
+#define HINT_VALUE(hint) ((uint64_t)hint & (((uint64_t)1 << 62) - 1))
+#define HINT(type, value) ({ \
+ assert(HINT_TYPE(value) == 0); \
+ (((uint64_t)type << 62) | (uint64_t)value);})
+
+/**
+ * Perform hints comparison.
+ * Returns 1 when hint_a > hint_b, -1 when hint_b > hint_a;
+ * If the hints are incompatible or equal, a value of 0 is
+ * returned, and the further comparison of the original data is
+ * required.
+ */
+static int
+hint_cmp(uint64_t hint_a, uint64_t hint_b)
+{
+ if (hint_a != HINT_INVALID && hint_b != HINT_INVALID &&
+ HINT_TYPE(hint_a) == HINT_TYPE(hint_b) &&
+ HINT_VALUE(hint_a) != HINT_VALUE(hint_b))
+ return hint_a < hint_b ? -1 : 1;
+ else
+ return 0;
+}
+
+template<bool is_optional, bool is_nullable>
+static uint64_t
+key_hint_uint(const char *key, struct key_def *key_def)
+{
+ (void)key_def;
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_UNSIGNED);
+ uint64_t ret;
+ if ((is_optional && key == NULL) ||
+ (is_nullable && mp_typeof(*key) == MP_NIL))
+ return HINT(HINT_TYPE_NUMBER, 0);
+ assert(mp_typeof(*key) == MP_UINT);
+ uint64_t val = mp_decode_uint(&key);
+ ret = val > HINT_MAX ? HINT_UMAX : val - (uint64_t)HINT_MIN;
+ return HINT(HINT_TYPE_NUMBER, ret);
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_UNSIGNED);
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return HINT(HINT_TYPE_NUMBER, 0);
+ return key_hint_uint<false, is_nullable>(field, key_def);
+}
+
+template<bool is_optional, bool is_nullable>
+static uint64_t
+key_hint_int(const char *key, struct key_def *key_def)
+{
+ (void)key_def;
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_INTEGER);
+ uint64_t ret;
+ if ((is_optional && key == NULL) ||
+ (is_nullable && mp_typeof(*key) == MP_NIL))
+ return HINT(HINT_TYPE_NUMBER, 0);
+ if (mp_typeof(*key) == MP_UINT) {
+ uint64_t val = mp_decode_uint(&key);
+ ret = val > HINT_MAX ? HINT_UMAX : val - (uint64_t)HINT_MIN;
+ } else {
+ assert(mp_typeof(*key) == MP_INT);
+ int64_t val = mp_decode_int(&key);
+ ret = val > HINT_MAX ? HINT_UMAX :
+ val < HINT_MIN ? 0 :
+ (uint64_t)val - (uint64_t)HINT_MIN;
+ }
+ return HINT(HINT_TYPE_NUMBER, ret);
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_int(const struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_INTEGER);
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return HINT(HINT_TYPE_NUMBER, 0);
+ return key_hint_int<false, is_nullable>(field, key_def);
+}
+
+template<bool is_optional, bool is_nullable>
+static uint64_t
+key_hint_number(const char *key, struct key_def *key_def)
+{
+ (void)key_def;
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_NUMBER ||
+ key_def->parts->type == FIELD_TYPE_SCALAR);
+ if ((is_optional && key == NULL) ||
+ (is_nullable && mp_typeof(*key) == MP_NIL))
+ return HINT(HINT_TYPE_NUMBER, 0);
+ uint64_t ret;
+ switch (mp_typeof(*key)) {
+ case MP_FLOAT:
+ case MP_DOUBLE: {
+ double f = mp_typeof(*key) == MP_FLOAT ?
+ mp_decode_float(&key) : mp_decode_double(&key);
+ if (!isfinite(f))
+ return HINT_INVALID;
+ double val;
+ (void)modf(f, &val);
+ ret = val > (double)HINT_MAX ? HINT_UMAX :
+ val < (double)HINT_MIN ? 0 :
+ (uint64_t)val - (uint64_t)HINT_MIN;
+ break;
+ }
+ case MP_INT: {
+ int64_t val = (uint64_t)mp_decode_int(&key);
+ ret = val > HINT_MAX ? HINT_UMAX :
+ val < HINT_MIN ? 0 :
+ (uint64_t)val - (uint64_t)HINT_MIN;
+ break;
+ }
+ case MP_UINT: {
+ uint64_t val = mp_decode_uint(&key);
+ ret = val > HINT_MAX ? HINT_UMAX : val - (uint64_t)HINT_MIN;
+ break;
+ }
+ default:
+ unreachable();
+ }
+ return HINT(HINT_TYPE_NUMBER, ret);
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_number(const struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_NUMBER);
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return HINT(HINT_TYPE_NUMBER, 0);
+ return key_hint_number<false, is_nullable>(field, key_def);
+}
+
+template<bool is_optional, bool is_nullable>
+static uint64_t
+key_hint_boolean(const char *key, struct key_def *key_def)
+{
+ (void)key_def;
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_BOOLEAN ||
+ key_def->parts->type == FIELD_TYPE_SCALAR);
+ if ((is_optional && key == NULL) ||
+ (is_nullable && mp_typeof(*key) == MP_NIL))
+ return HINT(HINT_TYPE_BOOL, 0);
+ return (uint64_t)mp_decode_bool(&key);
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_boolean(const struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_BOOLEAN);
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return HINT(HINT_TYPE_BOOL, 0);
+ return key_hint_boolean<false, is_nullable>(field, key_def);
+}
+
+template<bool is_optional, bool is_nullable>
+static uint64_t
+key_hint_string(const char *key, struct key_def *key_def)
+{
+ (void)key_def;
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->coll == NULL);
+ assert(key_def->parts->type == FIELD_TYPE_STRING ||
+ key_def->parts->type == FIELD_TYPE_SCALAR);
+ if ((is_optional && key == NULL) ||
+ (is_nullable && mp_typeof(*key) == MP_NIL))
+ return HINT(HINT_TYPE_STRING, 0);
+ assert(mp_typeof(*key) == MP_STR);
+ uint32_t len;
+ const unsigned char *str =
+ (const unsigned char *)mp_decode_str(&key, &len);
+ uint64_t result = 0;
+ uint32_t process_len = MIN(len, 7);
+ for (uint32_t i = 0; i < process_len; i++) {
+ result <<= 8;
+ result |= str[i];
+ }
+ result <<= 8 * (7 - process_len);
+ return HINT(HINT_TYPE_STRING, result);
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_string(const struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->coll == NULL);
+ assert(key_def->parts->type == FIELD_TYPE_STRING);
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return HINT(HINT_TYPE_STRING, 0);
+ return key_hint_string<false, is_nullable>(field, key_def);
+}
+
+template<bool is_optional, bool is_nullable>
+static uint64_t
+key_hint_string_coll(const char *key, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert((key_def->parts->type == FIELD_TYPE_STRING ||
+ key_def->parts->type == FIELD_TYPE_SCALAR) &&
+ key_def->parts->coll != NULL);
+ if ((is_optional && key == NULL) ||
+ (is_nullable && mp_typeof(*key) == MP_NIL))
+ return HINT(HINT_TYPE_STRING, 0);
+ assert(mp_typeof(*key) == MP_STR);
+ uint32_t len;
+ const char *str = mp_decode_str(&key, &len);
+ return key_def->parts->coll->hint(str, len, key_def->parts->coll);
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_STRING &&
+ key_def->parts->coll != NULL);
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return HINT(HINT_TYPE_STRING, 0);
+ return key_hint_string_coll<false, is_nullable>(field, key_def);
+}
+
+template<bool is_optional, bool is_nullable>
+static uint64_t
+key_hint_scalar(const char *key, struct key_def *key_def)
+{
+ (void)key_def;
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_SCALAR);
+ if ((is_optional && key == NULL) ||
+ (is_nullable && mp_typeof(*key) == MP_NIL))
+ return HINT(HINT_TYPE_BOOL, 0);
+ switch (mp_typeof(*key)) {
+ case MP_INT:
+ case MP_UINT:
+ case MP_FLOAT:
+ case MP_DOUBLE:
+ return key_hint_number<is_optional, is_nullable>(key, key_def);
+ case MP_BOOL:
+ return key_hint_boolean<is_optional, is_nullable>(key, key_def);
+ case MP_STR:
+ if (key_def->parts->coll == NULL)
+ return key_hint_string<is_optional,
+ is_nullable>(key, key_def);
+ else
+ return key_hint_string_coll<is_optional,
+ is_nullable>(key, key_def);
+ default:
+ return HINT_INVALID;
+ }
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_scalar(const struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_SCALAR);
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return HINT(HINT_TYPE_BOOL, 0);
+ return key_hint_scalar<false, is_nullable>(field, key_def);
+}
+
+void
+key_def_set_hint_func(struct key_def *def)
+{
+ def->key_hint = NULL;
+ def->tuple_hint = NULL;
+ bool is_nullable = key_part_is_nullable(def->parts);
+ switch (def->parts->type) {
+ case FIELD_TYPE_UNSIGNED: {
+ def->key_hint = is_nullable ? key_hint_uint<true, true> :
+ key_hint_uint<true, false>;
+ def->tuple_hint = is_nullable ? tuple_hint_uint<true> :
+ tuple_hint_uint<false>;
+ break;
+ }
+ case FIELD_TYPE_INTEGER: {
+ def->key_hint = is_nullable ? key_hint_int<true, true> :
+ key_hint_int<true, false>;
+ def->tuple_hint = is_nullable ? tuple_hint_int<true> :
+ tuple_hint_int<false>;
+ break;
+ }
+ case FIELD_TYPE_STRING: {
+ if (def->parts->coll != NULL) {
+ def->key_hint =
+ is_nullable ? key_hint_string_coll<true, true> :
+ key_hint_string_coll<true, false>;
+ def->tuple_hint =
+ is_nullable ? tuple_hint_string_coll<true> :
+ tuple_hint_string_coll<false>;
+ } else {
+ def->key_hint =
+ is_nullable ? key_hint_string<true, true> :
+ key_hint_string<true, false>;
+ def->tuple_hint = is_nullable ? tuple_hint_string<true> :
+ tuple_hint_string<false>;
+ }
+ break;
+ }
+ case FIELD_TYPE_NUMBER: {
+ def->key_hint = is_nullable ? key_hint_number<true, true> :
+ key_hint_number<true, false>;
+ def->tuple_hint = is_nullable ? tuple_hint_number<true> :
+ tuple_hint_number<false>;
+ break;
+ }
+ case FIELD_TYPE_BOOLEAN: {
+ def->key_hint = is_nullable ? key_hint_boolean<true, true> :
+ key_hint_boolean<true, false>;
+ def->tuple_hint = is_nullable ? tuple_hint_boolean<true> :
+ tuple_hint_boolean<false>;
+ break;
+ }
+ case FIELD_TYPE_SCALAR: {
+ def->key_hint = is_nullable ? key_hint_scalar<true, true> :
+ key_hint_scalar<true, false>;
+ def->tuple_hint = is_nullable ? tuple_hint_scalar<true> :
+ tuple_hint_scalar<false>;
+ break;
+ }
+ default: {
+ break;
+ }
+ }
+}
+
/* {{{ tuple_compare */
/*
@@ -460,13 +837,17 @@ tuple_common_key_parts(const struct tuple *tuple_a, const struct tuple *tuple_b,
template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
static inline int
-tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
- struct key_def *key_def)
+tuple_compare_slowpath_hinted(const struct tuple *tuple_a,
+ uint64_t tuple_a_hint, const struct tuple *tuple_b,
+ uint64_t tuple_b_hint, struct key_def *key_def)
{
assert(has_json_paths == key_def->has_json_paths);
assert(!has_optional_parts || is_nullable);
assert(is_nullable == key_def->is_nullable);
assert(has_optional_parts == key_def->has_optional_parts);
+ int rc = 0;
+ if ((rc = hint_cmp(tuple_a_hint, tuple_b_hint)) != 0)
+ return rc;
struct key_part *part = key_def->parts;
const char *tuple_a_raw = tuple_data(tuple_a);
const char *tuple_b_raw = tuple_data(tuple_b);
@@ -502,7 +883,6 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
struct key_part *end;
const char *field_a, *field_b;
enum mp_type a_type, b_type;
- int rc;
if (is_nullable)
end = part + key_def->unique_part_count;
else
@@ -592,8 +972,19 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
static inline int
-tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
- uint32_t part_count, struct key_def *key_def)
+tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
+ struct key_def *key_def)
+{
+ return tuple_compare_slowpath_hinted<is_nullable, has_optional_parts,
+ has_json_paths>(tuple_a, HINT_INVALID, tuple_b,
+ HINT_INVALID, key_def);
+}
+
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+static inline int
+tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
+ uint64_t tuple_hint, const char *key, uint32_t part_count,
+ uint64_t key_hint, struct key_def *key_def)
{
assert(has_json_paths == key_def->has_json_paths);
assert(!has_optional_parts || is_nullable);
@@ -601,6 +992,9 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
assert(has_optional_parts == key_def->has_optional_parts);
assert(key != NULL || part_count == 0);
assert(part_count <= key_def->part_count);
+ int rc = 0;
+ if ((rc = hint_cmp(tuple_hint, key_hint)) != 0)
+ return rc;
struct key_part *part = key_def->parts;
struct tuple_format *format = tuple_format(tuple);
const char *tuple_raw = tuple_data(tuple);
@@ -636,7 +1030,6 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
}
struct key_part *end = part + part_count;
- int rc;
for (; part < end; ++part, mp_next(&key)) {
const char *field;
if (has_json_paths) {
@@ -675,6 +1068,17 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
return 0;
}
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+static inline int
+tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
+ uint32_t part_count, struct key_def *key_def)
+{
+ return tuple_compare_with_key_slowpath_hinted<is_nullable,
+ has_optional_parts, has_json_paths>(tuple,
+ HINT_INVALID, key, part_count,
+ HINT_INVALID, key_def);
+}
+
template<bool is_nullable>
static inline int
key_compare_parts(const char *key_a, const char *key_b, uint32_t part_count,
@@ -733,13 +1137,17 @@ key_compare_parts(const char *key_a, const char *key_b, uint32_t part_count,
template<bool is_nullable, bool has_optional_parts>
static inline int
-tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key,
- uint32_t part_count, struct key_def *key_def)
+tuple_compare_with_key_sequential_hinted(const struct tuple *tuple,
+ uint64_t tuple_hint, const char *key, uint32_t part_count,
+ uint64_t key_hint, struct key_def *key_def)
{
assert(!has_optional_parts || is_nullable);
assert(key_def_is_sequential(key_def));
assert(is_nullable == key_def->is_nullable);
assert(has_optional_parts == key_def->has_optional_parts);
+ int rc = hint_cmp(tuple_hint, key_hint);
+ if (rc != 0)
+ return rc;
const char *tuple_key = tuple_data(tuple);
uint32_t field_count = mp_decode_array(&tuple_key);
uint32_t cmp_part_count;
@@ -749,8 +1157,8 @@ tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key,
assert(field_count >= part_count);
cmp_part_count = part_count;
}
- int rc = key_compare_parts<is_nullable>(tuple_key, key, cmp_part_count,
- key_def);
+ rc = key_compare_parts<is_nullable>(tuple_key, key, cmp_part_count,
+ key_def);
if (!has_optional_parts || rc != 0)
return rc;
/*
@@ -772,6 +1180,16 @@ tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key,
return 0;
}
+template<bool is_nullable, bool has_optional_parts>
+static inline int
+tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key,
+ uint32_t part_count, struct key_def *key_def)
+{
+ return tuple_compare_with_key_sequential_hinted<is_nullable,
+ has_optional_parts>(tuple, HINT_INVALID, key,
+ part_count, HINT_INVALID, key_def);
+}
+
int
key_compare(const char *key_a, const char *key_b, struct key_def *key_def)
{
@@ -792,9 +1210,13 @@ key_compare(const char *key_a, const char *key_b, struct key_def *key_def)
template <bool is_nullable, bool has_optional_parts>
static int
-tuple_compare_sequential(const struct tuple *tuple_a,
- const struct tuple *tuple_b, key_def *key_def)
+tuple_compare_sequential_hinted(const struct tuple *tuple_a,
+ uint64_t tuple_a_hint, const struct tuple *tuple_b,
+ uint64_t tuple_b_hint, key_def *key_def)
{
+ int rc = hint_cmp(tuple_a_hint, tuple_b_hint);
+ if (rc != 0)
+ return rc;
assert(!has_optional_parts || is_nullable);
assert(has_optional_parts == key_def->has_optional_parts);
assert(key_def_is_sequential(key_def));
@@ -812,7 +1234,6 @@ tuple_compare_sequential(const struct tuple *tuple_a,
bool was_null_met = false;
struct key_part *part = key_def->parts;
struct key_part *end = part + key_def->unique_part_count;
- int rc;
uint32_t i = 0;
for (; part < end; ++part, ++i) {
enum mp_type a_type, b_type;
@@ -859,6 +1280,16 @@ tuple_compare_sequential(const struct tuple *tuple_a,
return 0;
}
+template <bool is_nullable, bool has_optional_parts>
+static int
+tuple_compare_sequential(const struct tuple *tuple_a,
+ const struct tuple *tuple_b, key_def *key_def)
+{
+ return tuple_compare_sequential_hinted<is_nullable,
+ has_optional_parts>(tuple_a, HINT_INVALID, tuple_b,
+ HINT_INVALID, key_def);
+}
+
template <int TYPE>
static inline int
field_compare(const char **field_a, const char **field_b);
@@ -989,6 +1420,18 @@ struct TupleCompare
compare(tuple_a, tuple_b, format_a,
format_b, field_a, field_b);
}
+
+ static int compare_hinted(const struct tuple *tuple_a,
+ uint64_t tuple_a_hint,
+ const struct tuple *tuple_b,
+ uint64_t tuple_b_hint,
+ struct key_def *key_def)
+ {
+ int rc = hint_cmp(tuple_a_hint, tuple_b_hint);
+ if (rc != 0)
+ return rc;
+ return compare(tuple_a, tuple_b, key_def);
+ }
};
template <int TYPE, int ...MORE_TYPES>
@@ -1006,15 +1449,30 @@ struct TupleCompare<0, TYPE, MORE_TYPES...> {
return FieldCompare<0, TYPE, MORE_TYPES...>::compare(tuple_a, tuple_b,
format_a, format_b, field_a, field_b);
}
+
+ static int compare_hinted(const struct tuple *tuple_a,
+ uint64_t tuple_a_hint,
+ const struct tuple *tuple_b,
+ uint64_t tuple_b_hint,
+ struct key_def *key_def)
+ {
+ int rc = hint_cmp(tuple_a_hint, tuple_b_hint);
+ if (rc != 0)
+ return rc;
+ return compare(tuple_a, tuple_b, key_def);
+ }
};
} /* end of anonymous namespace */
struct comparator_signature {
tuple_compare_t f;
+ tuple_compare_hinted_t f_hinted;
uint32_t p[64];
};
#define COMPARATOR(...) \
- { TupleCompare<__VA_ARGS__>::compare, { __VA_ARGS__, UINT32_MAX } },
+ { TupleCompare<__VA_ARGS__>::compare, \
+ TupleCompare<__VA_ARGS__>::compare_hinted, \
+ { __VA_ARGS__, UINT32_MAX } },
/**
* field1 no, field1 type, field2 no, field2 type, ...
@@ -1049,6 +1507,17 @@ static const tuple_compare_t compare_slowpath_funcs[] = {
tuple_compare_slowpath<true, true, true>
};
+static const tuple_compare_hinted_t compare_slowpath_hinted_funcs[] = {
+ tuple_compare_slowpath_hinted<false, false, false>,
+ tuple_compare_slowpath_hinted<true, false, false>,
+ tuple_compare_slowpath_hinted<false, true, false>,
+ tuple_compare_slowpath_hinted<true, true, false>,
+ tuple_compare_slowpath_hinted<false, false, true>,
+ tuple_compare_slowpath_hinted<true, false, true>,
+ tuple_compare_slowpath_hinted<false, true, true>,
+ tuple_compare_slowpath_hinted<true, true, true>
+};
+
void
key_def_set_tuple_compare(struct key_def *def)
{
@@ -1060,13 +1529,21 @@ key_def_set_tuple_compare(struct key_def *def)
if (def->has_optional_parts) {
def->tuple_compare =
tuple_compare_sequential<true, true>;
+ def->tuple_compare_hinted =
+ tuple_compare_sequential_hinted<true,
+ true>;
} else {
def->tuple_compare =
tuple_compare_sequential<true, false>;
+ def->tuple_compare_hinted =
+ tuple_compare_sequential_hinted<true,
+ false>;
}
} else {
def->tuple_compare =
compare_slowpath_funcs[cmp_func_idx];
+ def->tuple_compare_hinted =
+ compare_slowpath_hinted_funcs[cmp_func_idx];
}
return;
}
@@ -1085,13 +1562,20 @@ key_def_set_tuple_compare(struct key_def *def)
if (i == def->part_count &&
cmp_arr[k].p[i * 2] == UINT32_MAX) {
def->tuple_compare = cmp_arr[k].f;
+ def->tuple_compare_hinted = cmp_arr[k].f_hinted;
return;
}
}
}
- def->tuple_compare = key_def_is_sequential(def) ?
- tuple_compare_sequential<false, false> :
- compare_slowpath_funcs[cmp_func_idx];
+ if (key_def_is_sequential(def)) {
+ def->tuple_compare = tuple_compare_sequential<false, false>;
+ def->tuple_compare_hinted =
+ tuple_compare_sequential_hinted<false, false>;
+ } else {
+ def->tuple_compare = compare_slowpath_funcs[cmp_func_idx];
+ def->tuple_compare_hinted =
+ compare_slowpath_hinted_funcs[cmp_func_idx];
+ }
}
/* }}} tuple_compare */
@@ -1222,6 +1706,17 @@ struct TupleCompareWithKey
compare(tuple, key, part_count,
key_def, format, field);
}
+
+ static int
+ compare_hinted(const struct tuple *tuple, uint64_t tuple_hint,
+ const char *key, uint32_t part_count, uint64_t key_hint,
+ struct key_def *key_def)
+ {
+ int rc = hint_cmp(tuple_hint, key_hint);
+ if (rc != 0)
+ return rc;
+ return compare(tuple, key, part_count, key_def);
+ }
};
template <int TYPE, int ...MORE_TYPES>
@@ -1242,6 +1737,17 @@ struct TupleCompareWithKey<0, 0, TYPE, MORE_TYPES...>
compare(tuple, key, part_count,
key_def, format, field);
}
+
+ static int
+ compare_hinted(const struct tuple *tuple, uint64_t tuple_hint,
+ const char *key, uint32_t part_count, uint64_t key_hint,
+ struct key_def *key_def)
+ {
+ int rc = hint_cmp(tuple_hint, key_hint);
+ if (rc != 0)
+ return rc;
+ return compare(tuple, key, part_count, key_def);
+ }
};
} /* end of anonymous namespace */
@@ -1249,11 +1755,14 @@ struct TupleCompareWithKey<0, 0, TYPE, MORE_TYPES...>
struct comparator_with_key_signature
{
tuple_compare_with_key_t f;
+ tuple_compare_with_key_hinted_t f_hinted;
uint32_t p[64];
};
#define KEY_COMPARATOR(...) \
- { TupleCompareWithKey<0, __VA_ARGS__>::compare, { __VA_ARGS__ } },
+ { TupleCompareWithKey<0, __VA_ARGS__>::compare, \
+ TupleCompareWithKey<0, __VA_ARGS__>::compare_hinted, \
+ { __VA_ARGS__ } },
static const comparator_with_key_signature cmp_wk_arr[] = {
KEY_COMPARATOR(0, FIELD_TYPE_UNSIGNED, 1, FIELD_TYPE_UNSIGNED, 2, FIELD_TYPE_UNSIGNED)
@@ -1284,6 +1793,18 @@ static const tuple_compare_with_key_t compare_with_key_slowpath_funcs[] = {
tuple_compare_with_key_slowpath<true, true, true>
};
+static const tuple_compare_with_key_hinted_t
+ compare_with_key_slowpath_hinted_funcs[] = {
+ tuple_compare_with_key_slowpath_hinted<false, false, false>,
+ tuple_compare_with_key_slowpath_hinted<true, false, false>,
+ tuple_compare_with_key_slowpath_hinted<false, true, false>,
+ tuple_compare_with_key_slowpath_hinted<true, true, false>,
+ tuple_compare_with_key_slowpath_hinted<false, false, true>,
+ tuple_compare_with_key_slowpath_hinted<true, false, true>,
+ tuple_compare_with_key_slowpath_hinted<false, true, true>,
+ tuple_compare_with_key_slowpath_hinted<true, true, true>
+};
+
void
key_def_set_tuple_compare_with_key(struct key_def *def)
{
@@ -1296,14 +1817,23 @@ key_def_set_tuple_compare_with_key(struct key_def *def)
def->tuple_compare_with_key =
tuple_compare_with_key_sequential<true,
true>;
+ def->tuple_compare_with_key_hinted =
+ tuple_compare_with_key_sequential_hinted<
+ true, true>;
} else {
def->tuple_compare_with_key =
tuple_compare_with_key_sequential<true,
false>;
+ def->tuple_compare_with_key_hinted =
+ tuple_compare_with_key_sequential_hinted<
+ true, false>;
}
} else {
def->tuple_compare_with_key =
compare_with_key_slowpath_funcs[cmp_func_idx];
+ def->tuple_compare_with_key_hinted =
+ compare_with_key_slowpath_hinted_funcs[
+ cmp_func_idx];
}
return;
}
@@ -1325,14 +1855,23 @@ key_def_set_tuple_compare_with_key(struct key_def *def)
}
if (i == def->part_count) {
def->tuple_compare_with_key = cmp_wk_arr[k].f;
+ def->tuple_compare_with_key_hinted =
+ cmp_wk_arr[k].f_hinted;
return;
}
}
}
- def->tuple_compare_with_key =
- key_def_is_sequential(def) ?
- tuple_compare_with_key_sequential<false, false> :
- compare_with_key_slowpath_funcs[cmp_func_idx];
+ if (key_def_is_sequential(def)) {
+ def->tuple_compare_with_key =
+ tuple_compare_with_key_sequential<false, false>;
+ def->tuple_compare_with_key_hinted =
+ tuple_compare_with_key_sequential_hinted<false, false>;
+ } else {
+ def->tuple_compare_with_key =
+ compare_with_key_slowpath_funcs[cmp_func_idx];
+ def->tuple_compare_with_key_hinted =
+ compare_with_key_slowpath_hinted_funcs[cmp_func_idx];
+ }
}
/* }}} tuple_compare_with_key */
@@ -1342,4 +1881,5 @@ key_def_set_compare_func(struct key_def *def)
{
key_def_set_tuple_compare(def);
key_def_set_tuple_compare_with_key(def);
+ key_def_set_hint_func(def);
}
diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
index afc15e809..3bd7c44f5 100644
--- a/src/lib/coll/coll.c
+++ b/src/lib/coll/coll.c
@@ -133,6 +133,37 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
return s_len;
}
+/** Get a compare hint of a binary collation. */
+static uint64_t
+coll_bin_hint(const char *s, size_t s_len, struct coll *coll)
+{
+ (void)coll;
+ uint64_t result = 0;
+ uint32_t process_len = MIN(s_len, 7);
+ for (uint32_t i = 0; i < process_len; i++) {
+ result <<= 8;
+ result |= ((unsigned char *)s)[i];
+ }
+ result <<= 8 * (7 - process_len);
+ return result;
+}
+
+/** Get a compare hint of a string using ICU collation. */
+static uint64_t
+coll_icu_hint(const char *s, size_t s_len, struct coll *coll)
+{
+ assert(coll->type == COLL_TYPE_ICU);
+ UCharIterator itr;
+ uiter_setUTF8(&itr, s, s_len);
+ uint8_t buf[7];
+ uint32_t state[2] = {0, 0};
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf,
+ sizeof(buf), &status);
+ assert(got >= 0 && got <= 7);
+ return coll_bin_hint((const char *)buf, got, NULL);
+}
+
/**
* Set up ICU collator and init cmp and hash members of collation.
* @param coll Collation to set up.
@@ -242,6 +273,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
}
coll->cmp = coll_icu_cmp;
coll->hash = coll_icu_hash;
+ coll->hint = coll_icu_hint;
return 0;
}
@@ -356,6 +388,7 @@ coll_new(const struct coll_def *def)
coll->collator = NULL;
coll->cmp = coll_bin_cmp;
coll->hash = coll_bin_hash;
+ coll->hint = coll_bin_hint;
break;
default:
unreachable();
diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h
index 8c9f94293..d695a02ad 100644
--- a/src/lib/coll/coll.h
+++ b/src/lib/coll/coll.h
@@ -48,6 +48,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
uint32_t *pcarry, struct coll *coll);
+typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll);
+
struct UCollator;
/**
@@ -62,6 +64,8 @@ struct coll {
/** String comparator. */
coll_cmp_f cmp;
coll_hash_f hash;
+ /** The pointer to routine calculating tuple hint. */
+ coll_hint_f hint;
/** Reference counter. */
int refs;
/**
--
2.21.0
More information about the Tarantool-patches
mailing list