[tarantool-patches] Re: [PATCH v6 2/3] memtx: introduce tuple compare hint
Vladimir Davydov
vdavydov.dev at gmail.com
Wed Mar 20 21:08:40 MSK 2019
Pushed to master with some changes. You will find the new patch below.
I removed the tests, because I find it insufficient. Please cover all
possible test cases and submit a separate patch.
---
>From 9fba29abb4e05babb9b23b4413bd8083f0fba933 Mon Sep 17 00:00:00 2001
Message-Id: <9fba29abb4e05babb9b23b4413bd8083f0fba933.1553105230.git.vdavydov.dev at gmail.com>
From: Kirill Shcherbatov <kshcherbatov at tarantool.org>
Date: Wed, 13 Mar 2019 13:06:30 +0300
Subject: [PATCH] memtx: introduce tuple compare hint
Implement functions for retrieving tuple hints for a particular
key_def. Hint is an integer that can be used for tuple comparison
optimization: if a hint of one tuple is less than a hint of another
then the first tuple is definitely less than the second; only if
hints are equal tuple_compare must be called for getting comparison
result. Hints are calculated using only the first part of key_def.
@locker:
- Rework key_def_set_hint_func.
- Refactor functions calculating hints.
- Drop has_collation template argument (we don't use templates
for collations anywhere else).
- Add part_count argument to key_hint (it's conventional to pass
part_count along with decoded key).
- Improve comments, rename a few functions, and cleanup code.
Close #3961
---
src/box/key_def.h | 115 ++++++++++
src/box/memtx_tree.c | 40 +++-
src/box/tuple_compare.cc | 535 +++++++++++++++++++++++++++++++++++++++++++++--
src/lib/coll/coll.c | 26 +++
src/lib/coll/coll.h | 12 ++
5 files changed, 702 insertions(+), 26 deletions(-)
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 7c9f8c6b..288cf727 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -117,6 +117,27 @@ struct key_def;
struct tuple;
/**
+ * Tuple comparison hint h(t) is such a function of tuple t that
+ * the following conditions always hold for any pair of tuples
+ * t1 and t2:
+ *
+ * if h(t1) < h(t2) then t1 < t2;
+ * if h(t1) > h(t2) then t1 > t2;
+ * if h(t1) == h(t2) then t1 may or may not be equal to t2.
+ *
+ * These rules mean that instead of direct tuple vs tuple
+ * (or tuple vs key) comparison one may compare their hints
+ * first and only if theirs hints equal compare the tuples
+ * themselves.
+ */
+typedef uint64_t hint_t;
+
+/**
+ * Reserved value to use when comparison hint is undefined.
+ */
+#define HINT_NONE ((hint_t)UINT64_MAX)
+
+/**
* Get is_nullable property of key_part.
* @param key_part for which attribute is being fetched
*
@@ -133,10 +154,23 @@ typedef int (*tuple_compare_with_key_t)(const struct tuple *tuple_a,
const char *key,
uint32_t part_count,
struct key_def *key_def);
+/** @copydoc tuple_compare_with_key_hinted() */
+typedef int (*tuple_compare_with_key_hinted_t)(const struct tuple *tuple,
+ hint_t tuple_hint,
+ const char *key,
+ uint32_t part_count,
+ hint_t key_hint,
+ struct key_def *key_def);
/** @copydoc tuple_compare() */
typedef int (*tuple_compare_t)(const struct tuple *tuple_a,
const struct tuple *tuple_b,
struct key_def *key_def);
+/** @copydoc tuple_compare_hinted() */
+typedef int (*tuple_compare_hinted_t)(const struct tuple *tuple_a,
+ hint_t tuple_a_hint,
+ const struct tuple *tuple_b,
+ hint_t tuple_b_hint,
+ struct key_def *key_def);
/** @copydoc tuple_extract_key() */
typedef char *(*tuple_extract_key_t)(const struct tuple *tuple,
struct key_def *key_def,
@@ -152,13 +186,23 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple,
/** @copydoc key_hash() */
typedef uint32_t (*key_hash_t)(const char *key,
struct key_def *key_def);
+/** @copydoc tuple_hint() */
+typedef hint_t (*tuple_hint_t)(const struct tuple *tuple,
+ struct key_def *key_def);
+/** @copydoc key_hint() */
+typedef hint_t (*key_hint_t)(const char *key, uint32_t part_count,
+ struct key_def *key_def);
/* Definition of a multipart key. */
struct key_def {
/** @see tuple_compare() */
tuple_compare_t tuple_compare;
+ /** @see tuple_compare_hinted() */
+ tuple_compare_hinted_t tuple_compare_hinted;
/** @see tuple_compare_with_key() */
tuple_compare_with_key_t tuple_compare_with_key;
+ /** @see tuple_compare_with_key_hinted() */
+ tuple_compare_with_key_hinted_t tuple_compare_with_key_hinted;
/** @see tuple_extract_key() */
tuple_extract_key_t tuple_extract_key;
/** @see tuple_extract_key_raw() */
@@ -167,6 +211,10 @@ struct key_def {
tuple_hash_t tuple_hash;
/** @see key_hash() */
key_hash_t key_hash;
+ /** @see tuple_hint() */
+ tuple_hint_t tuple_hint;
+ /** @see key_hint() */
+ key_hint_t key_hint;
/**
* Minimal part count which always is unique. For example,
* if a secondary index is unique, then
@@ -558,6 +606,26 @@ tuple_compare(const struct tuple *tuple_a, const struct tuple *tuple_b,
}
/**
+ * Compare tuples using the key definition and comparison hints.
+ * @param tuple_a first tuple
+ * @param tuple_a_hint comparison hint of @a tuple_a
+ * @param tuple_b second tuple
+ * @param tuple_b_hint comparison hint of @a tuple_b
+ * @param key_def key definition
+ * @retval 0 if key_fields(tuple_a) == key_fields(tuple_b)
+ * @retval <0 if key_fields(tuple_a) < key_fields(tuple_b)
+ * @retval >0 if key_fields(tuple_a) > key_fields(tuple_b)
+ */
+static inline int
+tuple_compare_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint,
+ const struct tuple *tuple_b, hint_t tuple_b_hint,
+ struct key_def *key_def)
+{
+ return key_def->tuple_compare_hinted(tuple_a, tuple_a_hint, tuple_b,
+ tuple_b_hint, key_def);
+}
+
+/**
* @brief Compare tuple with key using the key definition.
* @param tuple tuple
* @param key key parts without MessagePack array header
@@ -576,6 +644,29 @@ tuple_compare_with_key(const struct tuple *tuple, const char *key,
}
/**
+ * Compare tuple with key using the key definition and
+ * comparison hints
+ * @param tuple tuple
+ * @param tuple_hint comparison hint of @a tuple
+ * @param key key parts without MessagePack array header
+ * @param part_count the number of parts in @a key
+ * @param key_hint comparison hint of @a key
+ * @param key_def key definition
+ * @retval 0 if key_fields(tuple) == parts(key)
+ * @retval <0 if key_fields(tuple) < parts(key)
+ * @retval >0 if key_fields(tuple) > parts(key)
+ */
+static inline int
+tuple_compare_with_key_hinted(const struct tuple *tuple, hint_t tuple_hint,
+ const char *key, uint32_t part_count,
+ hint_t key_hint, struct key_def *key_def)
+{
+ return key_def->tuple_compare_with_key_hinted(tuple, tuple_hint, key,
+ part_count, key_hint,
+ key_def);
+}
+
+/**
* Compute hash of a tuple field.
* @param ph1 - pointer to running hash
* @param pcarry - pointer to carry
@@ -628,6 +719,30 @@ key_hash(const char *key, struct key_def *key_def)
return key_def->key_hash(key, key_def);
}
+ /*
+ * Get comparison hint for a tuple.
+ * @param tuple - tuple to compute the hint for
+ * @param key_def - key_def used for tuple comparison
+ * @return - hint value
+ */
+static inline hint_t
+tuple_hint(const struct tuple *tuple, struct key_def *key_def)
+{
+ return key_def->tuple_hint(tuple, key_def);
+}
+
+/**
+ * Get a comparison hint of a key.
+ * @param key - key to compute the hint for
+ * @param key_def - key_def used for tuple comparison
+ * @return - hint value
+ */
+static inline hint_t
+key_hint(const char *key, uint32_t part_count, struct key_def *key_def)
+{
+ return key_def->key_hint(key, part_count, key_def);
+}
+
#if defined(__cplusplus)
} /* extern "C" */
#endif /* defined(__cplusplus) */
diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index e24c4032..fe037c54 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -47,6 +47,8 @@ struct memtx_tree_key_data {
const char *key;
/** Number of msgpacked search fields. */
uint32_t part_count;
+ /** Comparison hint, see tuple_hint(). */
+ hint_t hint;
};
/**
@@ -55,6 +57,8 @@ struct memtx_tree_key_data {
struct memtx_tree_data {
/* Tuple that this node is represents. */
struct tuple *tuple;
+ /** Comparison hint, see key_hint(). */
+ hint_t hint;
};
/**
@@ -69,16 +73,18 @@ static bool
memtx_tree_data_identical(const struct memtx_tree_data *a,
const struct memtx_tree_data *b)
{
- return a->tuple == b->tuple;
+ return a->tuple == b->tuple && a->hint == b->hint;
}
#define BPS_TREE_NAME memtx_tree
#define BPS_TREE_BLOCK_SIZE (512)
#define BPS_TREE_EXTENT_SIZE MEMTX_EXTENT_SIZE
#define BPS_TREE_COMPARE(a, b, arg)\
- tuple_compare((&a)->tuple, (&b)->tuple, arg)
+ tuple_compare_hinted((&a)->tuple, (&a)->hint, (&b)->tuple,\
+ (&b)->hint, arg)
#define BPS_TREE_COMPARE_KEY(a, b, arg)\
- tuple_compare_with_key((&a)->tuple, (b)->key, (b)->part_count, arg)
+ tuple_compare_with_key_hinted((&a)->tuple, (&a)->hint, (b)->key,\
+ (b)->part_count, (b)->hint, arg)
#define BPS_TREE_IDENTICAL(a, b) memtx_tree_data_identical(&a, &b)
#define bps_tree_elem_t struct memtx_tree_data
#define bps_tree_key_t struct memtx_tree_key_data *
@@ -119,7 +125,8 @@ memtx_tree_qcompare(const void* a, const void *b, void *c)
const struct memtx_tree_data *data_a = a;
const struct memtx_tree_data *data_b = b;
struct key_def *key_def = c;
- return tuple_compare(data_a->tuple, data_b->tuple, key_def);
+ return tuple_compare_hinted(data_a->tuple, data_a->hint, data_b->tuple,
+ data_b->hint, key_def);
}
/* {{{ MemtxTree Iterators ****************************************/
@@ -241,9 +248,11 @@ tree_iterator_next_equal(struct iterator *iterator, struct tuple **ret)
memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
/* Use user key def to save a few loops. */
if (res == NULL ||
- tuple_compare_with_key(res->tuple, it->key_data.key,
- it->key_data.part_count,
- it->index_def->key_def) != 0) {
+ tuple_compare_with_key_hinted(res->tuple, res->hint,
+ it->key_data.key,
+ it->key_data.part_count,
+ it->key_data.hint,
+ it->index_def->key_def) != 0) {
iterator->next = tree_iterator_dummie;
it->current.tuple = NULL;
*ret = NULL;
@@ -272,9 +281,11 @@ tree_iterator_prev_equal(struct iterator *iterator, struct tuple **ret)
memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
/* Use user key def to save a few loops. */
if (res == NULL ||
- tuple_compare_with_key(res->tuple, it->key_data.key,
- it->key_data.part_count,
- it->index_def->key_def) != 0) {
+ tuple_compare_with_key_hinted(res->tuple, res->hint,
+ it->key_data.key,
+ it->key_data.part_count,
+ it->key_data.hint,
+ it->index_def->key_def) != 0) {
iterator->next = tree_iterator_dummie;
it->current.tuple = NULL;
*ret = NULL;
@@ -513,9 +524,11 @@ memtx_tree_index_get(struct index *base, const char *key,
assert(base->def->opts.is_unique &&
part_count == base->def->key_def->part_count);
struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+ struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
struct memtx_tree_key_data key_data;
key_data.key = key;
key_data.part_count = part_count;
+ key_data.hint = key_hint(key, part_count, cmp_def);
struct memtx_tree_data *res = memtx_tree_find(&index->tree, &key_data);
*result = res != NULL ? res->tuple : NULL;
return 0;
@@ -527,9 +540,11 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
struct tuple **result)
{
struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+ struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
if (new_tuple) {
struct memtx_tree_data new_data;
new_data.tuple = new_tuple;
+ new_data.hint = tuple_hint(new_tuple, cmp_def);
struct memtx_tree_data dup_data;
dup_data.tuple = NULL;
@@ -562,6 +577,7 @@ memtx_tree_index_replace(struct index *base, struct tuple *old_tuple,
if (old_tuple) {
struct memtx_tree_data old_data;
old_data.tuple = old_tuple;
+ old_data.hint = tuple_hint(old_tuple, cmp_def);
memtx_tree_delete(&index->tree, old_data);
}
*result = old_tuple;
@@ -574,6 +590,7 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
{
struct memtx_tree_index *index = (struct memtx_tree_index *)base;
struct memtx_engine *memtx = (struct memtx_engine *)base->engine;
+ struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
assert(part_count == 0 || key != NULL);
if (type > ITER_GT) {
@@ -604,6 +621,7 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type,
it->type = type;
it->key_data.key = key;
it->key_data.part_count = part_count;
+ it->key_data.hint = key_hint(key, part_count, cmp_def);
it->index_def = base->def;
it->tree = &index->tree;
it->tree_iterator = memtx_tree_invalid_iterator();
@@ -641,6 +659,7 @@ static int
memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
{
struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+ struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree);
if (index->build_array == NULL) {
index->build_array = malloc(MEMTX_EXTENT_SIZE);
if (index->build_array == NULL) {
@@ -668,6 +687,7 @@ memtx_tree_index_build_next(struct index *base, struct tuple *tuple)
struct memtx_tree_data *elem =
&index->build_array[index->build_array_size++];
elem->tuple = tuple;
+ elem->hint = tuple_hint(tuple, cmp_def);
return 0;
}
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index 595f6f25..93756365 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -36,6 +36,25 @@
/* {{{ tuple_compare */
+/**
+ * Compare two tuple hints.
+ *
+ * Returns:
+ *
+ * -1 if the first tuple is less than the second tuple
+ * +1 if the first tuple is greater than the second tuple
+ * 0 if the first tuple may be less than, equal to, or
+ * greater than the second tuple, and a full tuple
+ * comparison is needed to determine the order.
+ */
+static inline int
+hint_cmp(hint_t hint_a, hint_t hint_b)
+{
+ if (hint_a != HINT_NONE && hint_b != HINT_NONE && hint_a != hint_b)
+ return hint_a < hint_b ? -1 : 1;
+ return 0;
+}
+
/*
* Compare two tuple fields.
* Separate version exists since compare is a very
@@ -53,7 +72,8 @@ enum mp_class {
MP_CLASS_STR,
MP_CLASS_BIN,
MP_CLASS_ARRAY,
- MP_CLASS_MAP
+ MP_CLASS_MAP,
+ mp_class_max,
};
static enum mp_class mp_classes[] = {
@@ -427,13 +447,17 @@ tuple_compare_field_with_type(const char *field_a, enum mp_type a_type,
template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
static inline int
-tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
- struct key_def *key_def)
+tuple_compare_slowpath_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint,
+ const struct tuple *tuple_b, hint_t tuple_b_hint,
+ struct key_def *key_def)
{
assert(has_json_paths == key_def->has_json_paths);
assert(!has_optional_parts || is_nullable);
assert(is_nullable == key_def->is_nullable);
assert(has_optional_parts == key_def->has_optional_parts);
+ int rc = hint_cmp(tuple_a_hint, tuple_b_hint);
+ if (rc != 0)
+ return rc;
struct key_part *part = key_def->parts;
const char *tuple_a_raw = tuple_data(tuple_a);
const char *tuple_b_raw = tuple_data(tuple_b);
@@ -469,7 +493,6 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
struct key_part *end;
const char *field_a, *field_b;
enum mp_type a_type, b_type;
- int rc;
if (is_nullable)
end = part + key_def->unique_part_count;
else
@@ -559,8 +582,19 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
static inline int
-tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
- uint32_t part_count, struct key_def *key_def)
+tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
+ struct key_def *key_def)
+{
+ return tuple_compare_slowpath_hinted
+ <is_nullable, has_optional_parts, has_json_paths>
+ (tuple_a, HINT_NONE, tuple_b, HINT_NONE, key_def);
+}
+
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+static inline int
+tuple_compare_with_key_slowpath_hinted(const struct tuple *tuple,
+ hint_t tuple_hint, const char *key, uint32_t part_count,
+ hint_t key_hint, struct key_def *key_def)
{
assert(has_json_paths == key_def->has_json_paths);
assert(!has_optional_parts || is_nullable);
@@ -568,6 +602,9 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
assert(has_optional_parts == key_def->has_optional_parts);
assert(key != NULL || part_count == 0);
assert(part_count <= key_def->part_count);
+ int rc = hint_cmp(tuple_hint, key_hint);
+ if (rc != 0)
+ return rc;
struct key_part *part = key_def->parts;
struct tuple_format *format = tuple_format(tuple);
const char *tuple_raw = tuple_data(tuple);
@@ -603,7 +640,6 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
}
struct key_part *end = part + part_count;
- int rc;
for (; part < end; ++part, mp_next(&key)) {
const char *field;
if (has_json_paths) {
@@ -642,6 +678,16 @@ tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
return 0;
}
+template<bool is_nullable, bool has_optional_parts, bool has_json_paths>
+static inline int
+tuple_compare_with_key_slowpath(const struct tuple *tuple, const char *key,
+ uint32_t part_count, struct key_def *key_def)
+{
+ return tuple_compare_with_key_slowpath_hinted
+ <is_nullable, has_optional_parts, has_json_paths>
+ (tuple, HINT_NONE, key, part_count, HINT_NONE, key_def);
+}
+
template<bool is_nullable>
static inline int
key_compare_parts(const char *key_a, const char *key_b, uint32_t part_count,
@@ -700,13 +746,17 @@ key_compare_parts(const char *key_a, const char *key_b, uint32_t part_count,
template<bool is_nullable, bool has_optional_parts>
static inline int
-tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key,
- uint32_t part_count, struct key_def *key_def)
+tuple_compare_with_key_sequential_hinted(const struct tuple *tuple,
+ hint_t tuple_hint, const char *key, uint32_t part_count,
+ hint_t key_hint, struct key_def *key_def)
{
assert(!has_optional_parts || is_nullable);
assert(key_def_is_sequential(key_def));
assert(is_nullable == key_def->is_nullable);
assert(has_optional_parts == key_def->has_optional_parts);
+ int rc = hint_cmp(tuple_hint, key_hint);
+ if (rc != 0)
+ return rc;
const char *tuple_key = tuple_data(tuple);
uint32_t field_count = mp_decode_array(&tuple_key);
uint32_t cmp_part_count;
@@ -716,8 +766,8 @@ tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key,
assert(field_count >= part_count);
cmp_part_count = part_count;
}
- int rc = key_compare_parts<is_nullable>(tuple_key, key, cmp_part_count,
- key_def);
+ rc = key_compare_parts<is_nullable>(tuple_key, key, cmp_part_count,
+ key_def);
if (!has_optional_parts || rc != 0)
return rc;
/*
@@ -739,6 +789,16 @@ tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key,
return 0;
}
+template<bool is_nullable, bool has_optional_parts>
+static inline int
+tuple_compare_with_key_sequential(const struct tuple *tuple, const char *key,
+ uint32_t part_count, struct key_def *key_def)
+{
+ return tuple_compare_with_key_sequential_hinted
+ <is_nullable, has_optional_parts>
+ (tuple, HINT_NONE, key, part_count, HINT_NONE, key_def);
+}
+
int
key_compare(const char *key_a, const char *key_b, struct key_def *key_def)
{
@@ -759,13 +819,17 @@ key_compare(const char *key_a, const char *key_b, struct key_def *key_def)
template <bool is_nullable, bool has_optional_parts>
static int
-tuple_compare_sequential(const struct tuple *tuple_a,
- const struct tuple *tuple_b, key_def *key_def)
+tuple_compare_sequential_hinted(const struct tuple *tuple_a, hint_t tuple_a_hint,
+ const struct tuple *tuple_b, hint_t tuple_b_hint,
+ struct key_def *key_def)
{
assert(!has_optional_parts || is_nullable);
assert(has_optional_parts == key_def->has_optional_parts);
assert(key_def_is_sequential(key_def));
assert(is_nullable == key_def->is_nullable);
+ int rc = hint_cmp(tuple_a_hint, tuple_b_hint);
+ if (rc != 0)
+ return rc;
const char *key_a = tuple_data(tuple_a);
uint32_t fc_a = mp_decode_array(&key_a);
const char *key_b = tuple_data(tuple_b);
@@ -779,7 +843,6 @@ tuple_compare_sequential(const struct tuple *tuple_a,
bool was_null_met = false;
struct key_part *part = key_def->parts;
struct key_part *end = part + key_def->unique_part_count;
- int rc;
uint32_t i = 0;
for (; part < end; ++part, ++i) {
enum mp_type a_type, b_type;
@@ -826,6 +889,16 @@ tuple_compare_sequential(const struct tuple *tuple_a,
return 0;
}
+template <bool is_nullable, bool has_optional_parts>
+static int
+tuple_compare_sequential(const struct tuple *tuple_a,
+ const struct tuple *tuple_b, struct key_def *key_def)
+{
+ return tuple_compare_sequential_hinted
+ <is_nullable, has_optional_parts>
+ (tuple_a, HINT_NONE, tuple_b, HINT_NONE, key_def);
+}
+
template <int TYPE>
static inline int
field_compare(const char **field_a, const char **field_b);
@@ -956,6 +1029,18 @@ struct TupleCompare
compare(tuple_a, tuple_b, format_a,
format_b, field_a, field_b);
}
+
+ static int compare_hinted(const struct tuple *tuple_a,
+ hint_t tuple_a_hint,
+ const struct tuple *tuple_b,
+ hint_t tuple_b_hint,
+ struct key_def *key_def)
+ {
+ int rc = hint_cmp(tuple_a_hint, tuple_b_hint);
+ if (rc != 0)
+ return rc;
+ return compare(tuple_a, tuple_b, key_def);
+ }
};
template <int TYPE, int ...MORE_TYPES>
@@ -973,15 +1058,30 @@ struct TupleCompare<0, TYPE, MORE_TYPES...> {
return FieldCompare<0, TYPE, MORE_TYPES...>::compare(tuple_a, tuple_b,
format_a, format_b, field_a, field_b);
}
+
+ static int compare_hinted(const struct tuple *tuple_a,
+ hint_t tuple_a_hint,
+ const struct tuple *tuple_b,
+ hint_t tuple_b_hint,
+ struct key_def *key_def)
+ {
+ int rc = hint_cmp(tuple_a_hint, tuple_b_hint);
+ if (rc != 0)
+ return rc;
+ return compare(tuple_a, tuple_b, key_def);
+ }
};
} /* end of anonymous namespace */
struct comparator_signature {
tuple_compare_t f;
+ tuple_compare_hinted_t f_hinted;
uint32_t p[64];
};
#define COMPARATOR(...) \
- { TupleCompare<__VA_ARGS__>::compare, { __VA_ARGS__, UINT32_MAX } },
+ { TupleCompare<__VA_ARGS__>::compare, \
+ TupleCompare<__VA_ARGS__>::compare_hinted, \
+ { __VA_ARGS__, UINT32_MAX } },
/**
* field1 no, field1 type, field2 no, field2 type, ...
@@ -1133,6 +1233,17 @@ struct TupleCompareWithKey
compare(tuple, key, part_count,
key_def, format, field);
}
+
+ static int
+ compare_hinted(const struct tuple *tuple, hint_t tuple_hint,
+ const char *key, uint32_t part_count, hint_t key_hint,
+ struct key_def *key_def)
+ {
+ int rc = hint_cmp(tuple_hint, key_hint);
+ if (rc != 0)
+ return rc;
+ return compare(tuple, key, part_count, key_def);
+ }
};
template <int TYPE, int ...MORE_TYPES>
@@ -1153,6 +1264,17 @@ struct TupleCompareWithKey<0, 0, TYPE, MORE_TYPES...>
compare(tuple, key, part_count,
key_def, format, field);
}
+
+ static int
+ compare_hinted(const struct tuple *tuple, hint_t tuple_hint,
+ const char *key, uint32_t part_count, hint_t key_hint,
+ struct key_def *key_def)
+ {
+ int rc = hint_cmp(tuple_hint, key_hint);
+ if (rc != 0)
+ return rc;
+ return compare(tuple, key, part_count, key_def);
+ }
};
} /* end of anonymous namespace */
@@ -1160,11 +1282,14 @@ struct TupleCompareWithKey<0, 0, TYPE, MORE_TYPES...>
struct comparator_with_key_signature
{
tuple_compare_with_key_t f;
+ tuple_compare_with_key_hinted_t f_hinted;
uint32_t p[64];
};
#define KEY_COMPARATOR(...) \
- { TupleCompareWithKey<0, __VA_ARGS__>::compare, { __VA_ARGS__ } },
+ { TupleCompareWithKey<0, __VA_ARGS__>::compare, \
+ TupleCompareWithKey<0, __VA_ARGS__>::compare_hinted, \
+ { __VA_ARGS__ } },
static const comparator_with_key_signature cmp_wk_arr[] = {
KEY_COMPARATOR(0, FIELD_TYPE_UNSIGNED, 1, FIELD_TYPE_UNSIGNED, 2, FIELD_TYPE_UNSIGNED)
@@ -1186,6 +1311,356 @@ static const comparator_with_key_signature cmp_wk_arr[] = {
/* }}} tuple_compare_with_key */
+/* {{{ tuple_hint */
+
+/**
+ * A comparison hint is an unsigned integer number that has
+ * the following layout:
+ *
+ * [ class | value ]
+ * <-- HINT_CLASS_BITS --> <-- HINT_VALUE_BITS -->
+ * <----------------- HINT_BITS ----------------->
+ *
+ * For simplicity we construct it using the first key part only;
+ * other key parts don't participate in hint construction. As a
+ * consequence, tuple hints are useless if the first key part
+ * doesn't differ among indexed tuples.
+ *
+ * Hint class stores one of mp_class enum values corresponding
+ * to the field type. We store it in upper bits of a hint so
+ * as to guarantee that tuple fields that have different types
+ * are compared correctly in a scalar index. We use the same
+ * layout for indexes of base types too so that we don't need
+ * to recalculate hints when altering an index from scalar to
+ * one of base types or vice versa without index rebuild.
+ *
+ * Hint value is stored in lower bits of a hint and chosen so
+ * as to satisfy the hint property (see the comment to hint_t).
+ * Also, hint values must be compatible among all kinds of numbers
+ * (integer, unsigned, floating point) so that we can alter an
+ * index between any two of them without touching the hints.
+ *
+ * The value depends on the field type:
+ *
+ * - For an integer field, the hint value equals the number
+ * stored in the field minus the minimal (negative) integer
+ * number that fits in a hint. If the number is too large
+ * or too small to fit in a hint, we use the max or the min
+ * number that fits. This guarantees correct order for all
+ * positive and negative integers.
+ *
+ * - For a field storing a floating point number, we use
+ * the hint computed for the integral part. This ensures
+ * compatibility between floating point and integer field
+ * hints.
+ *
+ * - For a boolean field, the hint value is simply 0 or 1.
+ *
+ * - For a string field, we take the first few characters that
+ * fit in a hint and shift them in such a way that comparing
+ * them is equivalent to strcmp(). If there's a collation,
+ * we use the sort key instead of the string (see coll->hint).
+ *
+ * - For a field containing NULL, the value is 0, and we rely on
+ * mp_class comparison rules for arranging nullable fields.
+ */
+#define HINT_BITS (sizeof(hint_t) * CHAR_BIT)
+#define HINT_CLASS_BITS 4
+#define HINT_VALUE_BITS (HINT_BITS - HINT_CLASS_BITS)
+
+/** Number of bytes that fit in a hint value. */
+#define HINT_VALUE_BYTES (HINT_VALUE_BITS / CHAR_BIT)
+
+/** Max unsigned integer that can be stored in a hint value. */
+#define HINT_VALUE_MAX ((1ULL << HINT_VALUE_BITS) - 1)
+
+/**
+ * Max and min signed integer numbers that fit in a hint value.
+ * For numbers > MAX and < MIN we store MAX and MIN, respectively.
+ */
+#define HINT_VALUE_INT_MAX ((1LL << (HINT_VALUE_BITS - 1)) - 1)
+#define HINT_VALUE_INT_MIN (-(1LL << (HINT_VALUE_BITS - 1)))
+
+/**
+ * Max and min floating point numbers whose integral parts fit
+ * in a hint value. Note, we can't compare a floating point number
+ * with HINT_VALUE_INT_{MIN,MAX} because of rounding errors.
+ */
+#define HINT_VALUE_DOUBLE_MAX (exp2(HINT_VALUE_BITS - 1) - 1)
+#define HINT_VALUE_DOUBLE_MIN (-exp2(HINT_VALUE_BITS - 1))
+
+/*
+ * HINT_CLASS_BITS should be big enough to store any mp_class value.
+ * Note, ((1 << HINT_CLASS_BITS) - 1) is reserved for HINT_NONE.
+ */
+static_assert(mp_class_max < (1 << HINT_CLASS_BITS) - 1,
+ "mp_class must fit in tuple hint");
+
+static inline hint_t
+hint_create(enum mp_class c, uint64_t val)
+{
+ assert((val >> HINT_VALUE_BITS) == 0);
+ return (hint_t)(((uint64_t)c << HINT_VALUE_BITS) | val);
+}
+
+static inline hint_t
+hint_nil(void)
+{
+ return hint_create(MP_CLASS_NIL, 0);
+}
+
+static inline hint_t
+hint_bool(bool b)
+{
+ return hint_create(MP_CLASS_BOOL, b ? 1 : 0);
+}
+
+static inline hint_t
+hint_uint(uint64_t u)
+{
+ uint64_t val = (u >= (uint64_t)HINT_VALUE_INT_MAX ?
+ HINT_VALUE_MAX : u - HINT_VALUE_INT_MIN);
+ return hint_create(MP_CLASS_NUMBER, val);
+}
+
+static inline hint_t
+hint_int(int64_t i)
+{
+ assert(i < 0);
+ uint64_t val = (i <= HINT_VALUE_INT_MIN ? 0 : i - HINT_VALUE_INT_MIN);
+ return hint_create(MP_CLASS_NUMBER, val);
+}
+
+static inline hint_t
+hint_double(double d)
+{
+ if (!isfinite(d))
+ return HINT_NONE;
+
+ uint64_t val;
+ if (d >= HINT_VALUE_DOUBLE_MAX)
+ val = HINT_VALUE_MAX;
+ else if (d <= HINT_VALUE_DOUBLE_MIN)
+ val = 0;
+ else
+ val = (int64_t)d - HINT_VALUE_INT_MIN;
+
+ return hint_create(MP_CLASS_NUMBER, val);
+}
+
+static inline uint64_t
+hint_str_raw(const char *s, uint32_t len)
+{
+ len = MIN(len, HINT_VALUE_BYTES);
+ uint64_t val = 0;
+ for (uint32_t i = 0; i < len; i++) {
+ val <<= CHAR_BIT;
+ val |= (unsigned char)s[i];
+ }
+ val <<= CHAR_BIT * (HINT_VALUE_BYTES - len);
+ return val;
+}
+
+static inline hint_t
+hint_str(const char *s, uint32_t len)
+{
+ uint64_t val = hint_str_raw(s, len);
+ return hint_create(MP_CLASS_STR, val);
+}
+
+static inline hint_t
+hint_str_coll(const char *s, uint32_t len, struct coll *coll)
+{
+ char buf[HINT_VALUE_BYTES];
+ uint32_t buf_len = coll->hint(s, len, buf, sizeof(buf), coll);
+ uint64_t val = hint_str_raw(buf, buf_len);
+ return hint_create(MP_CLASS_STR, val);
+}
+
+static inline hint_t
+hint_bin(const char *s, uint32_t len)
+{
+ uint64_t val = hint_str_raw(s, len);
+ return hint_create(MP_CLASS_BIN, val);
+}
+
+static inline hint_t
+field_hint_boolean(const char *field)
+{
+ assert(mp_typeof(*field) == MP_BOOL);
+ return hint_bool(mp_decode_bool(&field));
+}
+
+static inline hint_t
+field_hint_unsigned(const char *field)
+{
+ assert(mp_typeof(*field) == MP_UINT);
+ return hint_uint(mp_decode_uint(&field));
+}
+
+static inline hint_t
+field_hint_integer(const char *field)
+{
+ switch (mp_typeof(*field)) {
+ case MP_UINT:
+ return hint_uint(mp_decode_uint(&field));
+ case MP_INT:
+ return hint_int(mp_decode_int(&field));
+ default:
+ unreachable();
+ }
+ return HINT_NONE;
+}
+
+static inline hint_t
+field_hint_number(const char *field)
+{
+ switch (mp_typeof(*field)) {
+ case MP_UINT:
+ return hint_uint(mp_decode_uint(&field));
+ case MP_INT:
+ return hint_int(mp_decode_int(&field));
+ case MP_FLOAT:
+ return hint_double(mp_decode_float(&field));
+ case MP_DOUBLE:
+ return hint_double(mp_decode_double(&field));
+ default:
+ unreachable();
+ }
+ return HINT_NONE;
+}
+
+static inline hint_t
+field_hint_string(const char *field, struct coll *coll)
+{
+ assert(mp_typeof(*field) == MP_STR);
+ uint32_t len = mp_decode_strl(&field);
+ return coll == NULL ? hint_str(field, len) :
+ hint_str_coll(field, len, coll);
+}
+
+static inline hint_t
+field_hint_scalar(const char *field, struct coll *coll)
+{
+ uint32_t len;
+ switch(mp_typeof(*field)) {
+ case MP_BOOL:
+ return hint_bool(mp_decode_bool(&field));
+ case MP_UINT:
+ return hint_uint(mp_decode_uint(&field));
+ case MP_INT:
+ return hint_int(mp_decode_int(&field));
+ case MP_FLOAT:
+ return hint_double(mp_decode_float(&field));
+ case MP_DOUBLE:
+ return hint_double(mp_decode_double(&field));
+ case MP_STR:
+ len = mp_decode_strl(&field);
+ return coll == NULL ? hint_str(field, len) :
+ hint_str_coll(field, len, coll);
+ case MP_BIN:
+ len = mp_decode_binl(&field);
+ return hint_bin(field, len);
+ default:
+ unreachable();
+ }
+ return HINT_NONE;
+}
+
+template <enum field_type type, bool is_nullable>
+static inline hint_t
+field_hint(const char *field, struct coll *coll)
+{
+ if (is_nullable && mp_typeof(*field) == MP_NIL)
+ return hint_nil();
+ switch (type) {
+ case FIELD_TYPE_BOOLEAN:
+ return field_hint_boolean(field);
+ case FIELD_TYPE_UNSIGNED:
+ return field_hint_unsigned(field);
+ case FIELD_TYPE_INTEGER:
+ return field_hint_integer(field);
+ case FIELD_TYPE_NUMBER:
+ return field_hint_number(field);
+ case FIELD_TYPE_STRING:
+ return field_hint_string(field, coll);
+ case FIELD_TYPE_SCALAR:
+ return field_hint_scalar(field, coll);
+ default:
+ unreachable();
+ }
+ return HINT_NONE;
+}
+
+template <enum field_type type, bool is_nullable>
+static hint_t
+key_hint(const char *key, uint32_t part_count, struct key_def *key_def)
+{
+ if (part_count == 0)
+ return HINT_NONE;
+ return field_hint<type, is_nullable>(key, key_def->parts->coll);
+}
+
+template <enum field_type type, bool is_nullable>
+static hint_t
+tuple_hint(const struct tuple *tuple, struct key_def *key_def)
+{
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return hint_nil();
+ return field_hint<type, is_nullable>(field, key_def->parts->coll);
+}
+
+template<enum field_type type, bool is_nullable>
+static void
+key_def_set_hint_func(struct key_def *def)
+{
+ def->key_hint = key_hint<type, is_nullable>;
+ def->tuple_hint = tuple_hint<type, is_nullable>;
+}
+
+template<enum field_type type>
+static void
+key_def_set_hint_func(struct key_def *def)
+{
+ if (key_part_is_nullable(def->parts))
+ key_def_set_hint_func<type, true>(def);
+ else
+ key_def_set_hint_func<type, false>(def);
+}
+
+static void
+key_def_set_hint_func(struct key_def *def)
+{
+ switch (def->parts->type) {
+ case FIELD_TYPE_BOOLEAN:
+ key_def_set_hint_func<FIELD_TYPE_BOOLEAN>(def);
+ break;
+ case FIELD_TYPE_UNSIGNED:
+ key_def_set_hint_func<FIELD_TYPE_UNSIGNED>(def);
+ break;
+ case FIELD_TYPE_INTEGER:
+ key_def_set_hint_func<FIELD_TYPE_INTEGER>(def);
+ break;
+ case FIELD_TYPE_NUMBER:
+ key_def_set_hint_func<FIELD_TYPE_NUMBER>(def);
+ break;
+ case FIELD_TYPE_STRING:
+ key_def_set_hint_func<FIELD_TYPE_STRING>(def);
+ break;
+ case FIELD_TYPE_SCALAR:
+ key_def_set_hint_func<FIELD_TYPE_SCALAR>(def);
+ break;
+ default:
+ /* Invalid key definition. */
+ def->key_hint = NULL;
+ def->tuple_hint = NULL;
+ break;
+ }
+}
+
+/* }}} tuple_hint */
+
static void
key_def_set_compare_func_fast(struct key_def *def)
{
@@ -1195,7 +1670,9 @@ key_def_set_compare_func_fast(struct key_def *def)
assert(!key_def_has_collation(def));
tuple_compare_t cmp = NULL;
+ tuple_compare_hinted_t cmp_hinted = NULL;
tuple_compare_with_key_t cmp_wk = NULL;
+ tuple_compare_with_key_hinted_t cmp_wk_hinted = NULL;
bool is_sequential = key_def_is_sequential(def);
/*
@@ -1210,6 +1687,7 @@ key_def_set_compare_func_fast(struct key_def *def)
break;
if (i == def->part_count && cmp_arr[k].p[i * 2] == UINT32_MAX) {
cmp = cmp_arr[k].f;
+ cmp_hinted = cmp_arr[k].f_hinted;
break;
}
}
@@ -1222,6 +1700,7 @@ key_def_set_compare_func_fast(struct key_def *def)
}
if (i == def->part_count) {
cmp_wk = cmp_wk_arr[k].f;
+ cmp_wk_hinted = cmp_wk_arr[k].f_hinted;
break;
}
}
@@ -1229,15 +1708,23 @@ key_def_set_compare_func_fast(struct key_def *def)
cmp = is_sequential ?
tuple_compare_sequential<false, false> :
tuple_compare_slowpath<false, false, false>;
+ cmp_hinted = is_sequential ?
+ tuple_compare_sequential_hinted<false, false> :
+ tuple_compare_slowpath_hinted<false, false, false>;
}
if (cmp_wk == NULL) {
cmp_wk = is_sequential ?
tuple_compare_with_key_sequential<false, false> :
tuple_compare_with_key_slowpath<false, false, false>;
+ cmp_wk_hinted = is_sequential ?
+ tuple_compare_with_key_sequential_hinted<false, false> :
+ tuple_compare_with_key_slowpath_hinted<false, false, false>;
}
def->tuple_compare = cmp;
+ def->tuple_compare_hinted = cmp_hinted;
def->tuple_compare_with_key = cmp_wk;
+ def->tuple_compare_with_key_hinted = cmp_wk_hinted;
}
template<bool is_nullable, bool has_optional_parts>
@@ -1248,13 +1735,23 @@ key_def_set_compare_func_plain(struct key_def *def)
if (key_def_is_sequential(def)) {
def->tuple_compare = tuple_compare_sequential
<is_nullable, has_optional_parts>;
+ def->tuple_compare_hinted = tuple_compare_sequential_hinted
+ <is_nullable, has_optional_parts>;
def->tuple_compare_with_key = tuple_compare_with_key_sequential
<is_nullable, has_optional_parts>;
+ def->tuple_compare_with_key_hinted =
+ tuple_compare_with_key_sequential_hinted
+ <is_nullable, has_optional_parts>;
} else {
def->tuple_compare = tuple_compare_slowpath
<is_nullable, has_optional_parts, false>;
+ def->tuple_compare_hinted = tuple_compare_slowpath_hinted
+ <is_nullable, has_optional_parts, false>;
def->tuple_compare_with_key = tuple_compare_with_key_slowpath
<is_nullable, has_optional_parts, false>;
+ def->tuple_compare_with_key_hinted =
+ tuple_compare_with_key_slowpath_hinted
+ <is_nullable, has_optional_parts, false>;
}
}
@@ -1265,8 +1762,13 @@ key_def_set_compare_func_json(struct key_def *def)
assert(def->has_json_paths);
def->tuple_compare = tuple_compare_slowpath
<is_nullable, has_optional_parts, true>;
+ def->tuple_compare_hinted = tuple_compare_slowpath_hinted
+ <is_nullable, has_optional_parts, true>;
def->tuple_compare_with_key = tuple_compare_with_key_slowpath
<is_nullable, has_optional_parts, true>;
+ def->tuple_compare_with_key_hinted =
+ tuple_compare_with_key_slowpath_hinted
+ <is_nullable, has_optional_parts, true>;
}
void
@@ -1294,4 +1796,5 @@ key_def_set_compare_func(struct key_def *def)
key_def_set_compare_func_json<false, false>(def);
}
}
+ key_def_set_hint_func(def);
}
diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
index afc15e80..b83f0fdc 100644
--- a/src/lib/coll/coll.c
+++ b/src/lib/coll/coll.c
@@ -133,6 +133,30 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
return s_len;
}
+static size_t
+coll_icu_hint(const char *s, size_t s_len, char *buf, size_t buf_len,
+ struct coll *coll)
+{
+ assert(coll->type == COLL_TYPE_ICU);
+ UCharIterator itr;
+ uiter_setUTF8(&itr, s, s_len);
+ uint32_t state[2] = {0, 0};
+ UErrorCode status = U_ZERO_ERROR;
+ return ucol_nextSortKeyPart(coll->collator, &itr, state,
+ (uint8_t *)buf, buf_len, &status);
+}
+
+static size_t
+coll_bin_hint(const char *s, size_t s_len, char *buf, size_t buf_len,
+ struct coll *coll)
+{
+ (void)coll;
+ assert(coll->type == COLL_TYPE_BINARY);
+ size_t len = MIN(s_len, buf_len);
+ memcpy(buf, s, len);
+ return len;
+}
+
/**
* Set up ICU collator and init cmp and hash members of collation.
* @param coll Collation to set up.
@@ -242,6 +266,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
}
coll->cmp = coll_icu_cmp;
coll->hash = coll_icu_hash;
+ coll->hint = coll_icu_hint;
return 0;
}
@@ -356,6 +381,7 @@ coll_new(const struct coll_def *def)
coll->collator = NULL;
coll->cmp = coll_bin_cmp;
coll->hash = coll_bin_hash;
+ coll->hint = coll_bin_hint;
break;
default:
unreachable();
diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h
index 8c9f9429..c505804f 100644
--- a/src/lib/coll/coll.h
+++ b/src/lib/coll/coll.h
@@ -48,6 +48,9 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
uint32_t *pcarry, struct coll *coll);
+typedef size_t (*coll_hint_f)(const char *s, size_t s_len, char *buf,
+ size_t buf_len, struct coll *coll);
+
struct UCollator;
/**
@@ -61,7 +64,16 @@ struct coll {
struct UCollator *collator;
/** String comparator. */
coll_cmp_f cmp;
+ /** String hash function. */
coll_hash_f hash;
+ /**
+ * String comparison hint.
+ *
+ * This function copies a sort key for a string to
+ * the given buffer and returns the number of bytes
+ * copied. Sort keys may be compared using strcmp().
+ */
+ coll_hint_f hint;
/** Reference counter. */
int refs;
/**
More information about the Tarantool-patches
mailing list