[PATCH v1 3/4] box: introduce tuple compare hint
Kirill Shcherbatov
kshcherbatov at tarantool.org
Tue Feb 5 14:58:38 MSK 2019
From: Alexandr Lyapunov <aleks at tarantool.org>
Implement functions for retrieving tuple hints for a particular
key_def. Hint is an integer that can be used for tuple comparison
optimization: if a hint of one tuple is less than a hint of another
then the first tuple is definitely less than the second; only if
hints are equal tuple_compare must be called for getting comparison
result.
Hints are calculated using only the first part of key_def.
Hints are only useful when:
* they are precalculated and stored along with the tuple;
calculation is not cheap (cheaper than tuple_compare btw) but
precalculated hints allow to compare tuples without even fetching
tuple data.
* first part of key_def is 'string', 'unsigned' or 'integer'
* since hint is calculated using only the first part of key_def
(and only first several characters if it is a string) this part
must be different with high probability for every tuple pair.
@kshcherbatov: rebase to 2.1, fix conflicts
Needed for #3961
---
src/box/key_def.c | 1 +
src/box/key_def.h | 11 ++++
src/box/tuple_compare.cc | 131 +++++++++++++++++++++++++++++++++++++++
src/box/tuple_compare.h | 40 ++++++++++++
src/coll.c | 23 +++++++
src/coll.h | 3 +
6 files changed, 209 insertions(+)
diff --git a/src/box/key_def.c b/src/box/key_def.c
index 9411ade39..ceca1ebee 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -135,6 +135,7 @@ key_def_set_cmp(struct key_def *def)
def->tuple_compare_with_key = tuple_compare_with_key_create(def);
tuple_hash_func_set(def);
tuple_extract_key_set(def);
+ tuple_hint_set(def);
}
static void
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 85bed92bb..ff86de3ca 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -153,6 +153,13 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple,
typedef uint32_t (*key_hash_t)(const char *key,
struct key_def *key_def);
+/** @copydoc tuple_hint() */
+typedef uint64_t (*tuple_hint_t)(const struct tuple *tuple,
+ struct key_def *key_def);
+
+/** @copydoc key_hint() */
+typedef uint64_t (*key_hint_t)(const char *key, struct key_def *key_def);
+
/* Definition of a multipart key. */
struct key_def {
/** @see tuple_compare() */
@@ -167,6 +174,10 @@ struct key_def {
tuple_hash_t tuple_hash;
/** @see key_hash() */
key_hash_t key_hash;
+ /** @see tuple_hint() */
+ tuple_hint_t tuple_hint;
+ /** @see key_hint() */
+ key_hint_t key_hint;
/**
* Minimal part count which always is unique. For example,
* if a secondary index is unique, then
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index 1d1ab8711..b8f91972c 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -1321,4 +1321,135 @@ tuple_compare_with_key_create(const struct key_def *def)
compare_with_key_slowpath_funcs[cmp_func_idx];
}
+static uint64_t
+key_hint_default(const char *key, struct key_def *key_def)
+{
+ (void)key;
+ (void)key_def;
+ return 0;
+}
+
+static uint64_t
+tuple_hint_default(const struct tuple *tuple, struct key_def *key_def)
+{
+ (void)tuple;
+ (void)key_def;
+ return 0;
+}
+
+static uint64_t
+key_hint_uint(const char *key, struct key_def *key_def)
+{
+ (void)key_def;
+ assert(key_def->parts->type == FIELD_TYPE_UNSIGNED);
+ assert(mp_typeof(*key) == MP_UINT);
+ return mp_decode_uint(&key);
+}
+
+static uint64_t
+tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def)
+{
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ return key_hint_uint(field, key_def);
+}
+
+static uint64_t
+key_hint_int(const char *key, struct key_def *key_def)
+{
+ (void)key_def;
+ assert(key_def->parts->type == FIELD_TYPE_INTEGER);
+ if (mp_typeof(*key) == MP_UINT) {
+ uint64_t val = mp_decode_uint(&key);
+ if (val > INT64_MAX)
+ return INT64_MAX;
+ return val - (uint64_t)INT64_MIN;
+ } else {
+ assert(mp_typeof(*key) == MP_INT);
+ int64_t val = mp_decode_int(&key);
+ return (uint64_t)val - (uint64_t)INT64_MIN;
+ }
+}
+
+static uint64_t
+tuple_hint_int(const struct tuple *tuple, struct key_def *key_def)
+{
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ return key_hint_int(field, key_def);
+}
+
+static uint64_t
+key_hint_string(const char *key, struct key_def *key_def)
+{
+ (void)key_def;
+ assert(key_def->parts->type == FIELD_TYPE_STRING);
+ assert(mp_typeof(*key) == MP_STR);
+ uint32_t len;
+ const unsigned char *str =
+ (const unsigned char *)mp_decode_str(&key, &len);
+ uint64_t result = 0;
+ uint32_t process_len = MIN(len, 7);
+ for (uint32_t i = 0; i < process_len; i++) {
+ result <<= 9;
+ result |= 0x100;
+ result |= str[i];
+ }
+ result <<= 9 * (7 - process_len);
+ return result;
+}
+
+static uint64_t
+tuple_hint_string(const struct tuple *tuple, struct key_def *key_def)
+{
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ return key_hint_string(field, key_def);
+}
+
+static uint64_t
+key_hint_string_coll(const char *key, struct key_def *key_def)
+{
+ assert(key_def->parts->type == FIELD_TYPE_STRING);
+ assert(mp_typeof(*key) == MP_STR);
+ assert(key_def->parts->coll != NULL);
+ uint32_t len;
+ const char *str = mp_decode_str(&key, &len);
+ return key_def->parts->coll->hint(str, len, key_def->parts->coll);
+}
+
+static uint64_t
+tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def)
+{
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ return key_hint_string_coll(field, key_def);
+}
+
+void
+tuple_hint_set(struct key_def *def)
+{
+ def->key_hint = key_hint_default;
+ def->tuple_hint = tuple_hint_default;
+ if (key_part_is_nullable(def->parts))
+ return;
+ if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL) {
+ def->key_hint = key_hint_string_coll;
+ def->tuple_hint = tuple_hint_string_coll;
+ return;
+ }
+ switch (def->parts->type) {
+ case FIELD_TYPE_UNSIGNED:
+ def->key_hint = key_hint_uint;
+ def->tuple_hint = tuple_hint_uint;
+ break;
+ case FIELD_TYPE_INTEGER:
+ def->key_hint = key_hint_int;
+ def->tuple_hint = tuple_hint_int;
+ break;
+ case FIELD_TYPE_STRING:
+ def->key_hint = key_hint_string;
+ def->tuple_hint = tuple_hint_string;
+ break;
+ default:
+ break;
+ };
+}
+
/* }}} tuple_compare_with_key */
diff --git a/src/box/tuple_compare.h b/src/box/tuple_compare.h
index e3a63204f..be96ba38c 100644
--- a/src/box/tuple_compare.h
+++ b/src/box/tuple_compare.h
@@ -67,6 +67,46 @@ tuple_compare_create(const struct key_def *key_def);
tuple_compare_with_key_t
tuple_compare_with_key_create(const struct key_def *key_def);
+/**
+ * Get a comparison hint of a tuple.
+ * Hint is such a function h(tuple) in terms of particular key_def that
+ * has follows rules:
+ * if h(t1) < h(t2) then t1 < t2;
+ * if h(t1) > h(t2) then t1 > t2;
+ * if t1 == t2 then h(t1) == h(t2);
+ * These rules means that instead of direct tuple vs tuple (or tuple vs key)
+ * comparison one may compare theirs hints first; and only if theirs hints
+ * are equal compare the tuples themselves.
+ * @param tuple - tuple to get hint of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the hint.
+ */
+static inline uint64_t
+tuple_hint(const struct tuple *tuple, struct key_def *key_def)
+{
+ return key_def->tuple_hint(tuple, key_def);
+}
+
+/**
+ * Get a comparison hint of a key.
+ * @See tuple_hint for hint term definition.
+ * @param key - key to get hint of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the hint.
+ */
+static inline uint64_t
+key_hint(const char *key, struct key_def *key_def)
+{
+ return key_def->key_hint(key, key_def);
+}
+
+/**
+ * Initialize tuple_hint() and key_hint() functions for the key_def.
+ * @param key_def key definition to set up.
+ */
+void
+tuple_hint_set(struct key_def *key_def);
+
#if defined(__cplusplus)
} /* extern "C" */
#endif /* defined(__cplusplus) */
diff --git a/src/coll.c b/src/coll.c
index 6d9c44dbf..cff2899cd 100644
--- a/src/coll.c
+++ b/src/coll.c
@@ -133,6 +133,28 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
return s_len;
}
+/** Get a compare hint of a string using ICU collation. */
+static uint64_t
+coll_icu_hint(const char *s, size_t s_len, struct coll *coll)
+{
+ UCharIterator itr;
+ uiter_setUTF8(&itr, s, s_len);
+ uint8_t buf[7];
+ uint32_t state[2] = {0, 0};
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf,
+ sizeof(buf), &status);
+ assert(got >= 0 && got <= 7);
+ uint64_t result = 0;
+ for (int32_t i = 0; i < got; i++) {
+ result <<= 9;
+ result |= 0x100;
+ result |= buf[i];
+ }
+ result <<= 9 * (7 - got);
+ return result;
+}
+
/**
* Set up ICU collator and init cmp and hash members of collation.
* @param coll Collation to set up.
@@ -242,6 +264,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
}
coll->cmp = coll_icu_cmp;
coll->hash = coll_icu_hash;
+ coll->hint = coll_icu_hint;
return 0;
}
diff --git a/src/coll.h b/src/coll.h
index 9c725712a..4030d38de 100644
--- a/src/coll.h
+++ b/src/coll.h
@@ -47,6 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
uint32_t *pcarry, struct coll *coll);
+typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll);
+
struct UCollator;
/**
@@ -61,6 +63,7 @@ struct coll {
/** String comparator. */
coll_cmp_f cmp;
coll_hash_f hash;
+ coll_hint_f hint;
/** Reference counter. */
int refs;
/**
--
2.20.1
More information about the Tarantool-patches
mailing list