[PATCH v3 7/7] memtx: introduce tuple compare hint
Kirill Shcherbatov
kshcherbatov at tarantool.org
Fri Feb 22 18:42:32 MSK 2019
From: Alexandr Lyapunov <aleks at tarantool.org>
Implement functions for retrieving tuple hints for a particular
key_def. Hint is an integer that can be used for tuple comparison
optimization: if a hint of one tuple is less than a hint of another
then the first tuple is definitely less than the second; only if
hints are equal tuple_compare must be called for getting comparison
result.
Hints are calculated using only the first part of key_def.
Hints are only useful when:
* they are precalculated and stored along with the tuple;
calculation is not cheap (cheaper than tuple_compare btw) but
precalculated hints allow to compare tuples without even fetching
tuple data.
* first part of key_def is 'string'(for now)
* since hint is calculated using only the first part of key_def
(and only first several characters if it is a string) this part
must be different with high probability for every tuple pair.
Closes #3961
---
src/box/CMakeLists.txt | 1 +
src/box/key_def.c | 2 +
src/box/key_def.h | 44 ++++++++
src/box/memtx_tree_decl.c | 89 ++++++++++++++++
src/box/tuple_hint.cc | 210 ++++++++++++++++++++++++++++++++++++++
src/box/tuple_hint.h | 51 +++++++++
src/coll.c | 33 ++++++
src/coll.h | 4 +
8 files changed, 434 insertions(+)
create mode 100644 src/box/tuple_hint.cc
create mode 100644 src/box/tuple_hint.h
diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index dc8cc2cd5..05a4f40f6 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -38,6 +38,7 @@ add_library(tuple STATIC
tuple_format.c
tuple_update.c
tuple_compare.cc
+ tuple_hint.cc
tuple_extract_key.cc
tuple_hash.cc
tuple_bloom.c
diff --git a/src/box/key_def.c b/src/box/key_def.c
index 432b72a97..ced5b0580 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -34,6 +34,7 @@
#include "tuple_compare.h"
#include "tuple_extract_key.h"
#include "tuple_hash.h"
+#include "tuple_hint.h"
#include "column_mask.h"
#include "schema_def.h"
#include "coll_id_cache.h"
@@ -135,6 +136,7 @@ key_def_set_func(struct key_def *def)
key_def_set_compare_func(def);
key_def_set_hash_func(def);
key_def_set_extract_func(def);
+ key_def_set_hint_func(def);
}
static void
diff --git a/src/box/key_def.h b/src/box/key_def.h
index dd62f6a35..00775368f 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -153,6 +153,13 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple,
typedef uint32_t (*key_hash_t)(const char *key,
struct key_def *key_def);
+/** @copydoc tuple_hint() */
+typedef uint64_t (*tuple_hint_t)(const struct tuple *tuple,
+ struct key_def *key_def);
+
+/** @copydoc key_hint() */
+typedef uint64_t (*key_hint_t)(const char *key, struct key_def *key_def);
+
/* Definition of a multipart key. */
struct key_def {
/** @see tuple_compare() */
@@ -167,6 +174,10 @@ struct key_def {
tuple_hash_t tuple_hash;
/** @see key_hash() */
key_hash_t key_hash;
+ /** @see tuple_hint() */
+ tuple_hint_t tuple_hint;
+ /** @see key_hint() */
+ key_hint_t key_hint;
/**
* Minimal part count which always is unique. For example,
* if a secondary index is unique, then
@@ -624,6 +635,39 @@ key_hash(const char *key, struct key_def *key_def)
return key_def->key_hash(key, key_def);
}
+ /*
+ * Get a comparison hint of a tuple.
+ * Hint is such a function h(tuple) in terms of particular key_def that
+ * has follows rules:
+ * if h(t1) < h(t2) then t1 < t2;
+ * if h(t1) > h(t2) then t1 > t2;
+ * if t1 == t2 then h(t1) == h(t2);
+ * These rules means that instead of direct tuple vs tuple (or tuple vs key)
+ * comparison one may compare theirs hints first; and only if theirs hints
+ * are equal compare the tuples themselves.
+ * @param tuple - tuple to get hint of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the hint.
+ */
+static inline uint64_t
+tuple_hint(const struct tuple *tuple, struct key_def *key_def)
+{
+ return key_def->tuple_hint(tuple, key_def);
+}
+
+/**
+ * Get a comparison hint of a key.
+ * @See tuple_hint for hint term definition.
+ * @param key - key to get hint of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the hint.
+ */
+static inline uint64_t
+key_hint(const char *key, struct key_def *key_def)
+{
+ return key_def->key_hint(key, key_def);
+}
+
#if defined(__cplusplus)
} /* extern "C" */
#endif /* defined(__cplusplus) */
diff --git a/src/box/memtx_tree_decl.c b/src/box/memtx_tree_decl.c
index 82209eaab..a28fdd9fd 100644
--- a/src/box/memtx_tree_decl.c
+++ b/src/box/memtx_tree_decl.c
@@ -77,8 +77,97 @@ struct memtx_basic_tree_key_data {
/* }}} */
+/* {{{ Memtx hinted tree class. *********************************/
+
+/**
+ * Struct that is used as a key in BPS tree definition in
+ * memtx_hint_only_tree and memtx_hinted_tree.
+*/
+struct memtx_hinted_tree_key_data {
+ /** Sequence of msgpacked search fields. */
+ const char *key;
+ /** Number of msgpacked search fields. */
+ uint32_t part_count;
+ /**
+ * Comparison hint. Calculated automatically on 'set'
+ * operation with MEMTX_TREE_KEY_SET().
+ */
+ uint64_t hint;
+};
+
+/**
+ * Struct that is used as a key in BPS tree definition in
+ * memtx_hint_only_tree and memtx_hinted_tree.
+ */
+struct memtx_hinted_tree_data {
+ /** Tuple this node is represents. */
+ struct tuple *tuple;
+ /**
+ * Comparison hint. Calculated automatically on 'set'
+ * operation with MEMTX_TREE_ELEM_SET().
+ */
+ uint64_t hint;
+};
+
+#define MEMTX_TREE_NAME memtx_hinted_tree
+#define memtx_tree_elem_t struct memtx_hinted_tree_data
+#define memtx_tree_key_t struct memtx_hinted_tree_key_data
+#define MEMTX_TREE_COMPARE(elem_a_ptr, elem_b_ptr, key_def) ({ \
+ int rc; \
+ if ((elem_a_ptr)->hint != (elem_b_ptr)->hint) { \
+ rc = (elem_a_ptr)->hint < (elem_b_ptr)->hint ? -1 : 1; \
+ } else { \
+ rc = tuple_compare((elem_a_ptr)->tuple, (elem_b_ptr)->tuple, \
+ key_def); \
+ } \
+ rc; \
+})
+#define MEMTX_TREE_COMPARE_KEY(elem_ptr, key_ptr, key_def) ({ \
+ int rc; \
+ if ((elem_ptr)->hint != (key_ptr)->hint) { \
+ rc = (elem_ptr)->hint < (key_ptr)->hint ? -1 : 1; \
+ } else { \
+ rc = tuple_compare_with_key((elem_ptr)->tuple, (key_ptr)->key, \
+ (key_ptr)->part_count, key_def); \
+ } \
+ rc; \
+})
+#define MEMTX_TREE_ELEM_SET(elem_ptr, info, key_def) ({ \
+ (elem_ptr)->tuple = info; \
+ (elem_ptr)->hint = tuple_hint(info, key_def); \
+})
+#define MEMTX_TREE_KEY_SET(key_ptr, key_val, part_count_val, key_def) ({ \
+ (key_ptr)->key = key_val; \
+ (key_ptr)->part_count = part_count_val; \
+ (key_ptr)->hint = part_count_val > 0 ? key_hint(key_val, key_def) : 0; \
+})
+#define MEMTX_TREE_ELEM_GET(elem_ptr) ((elem_ptr)->tuple)
+#define MEMTX_TREE_KEY_GET(key_ptr, part_count_ptr) \
+ ({*part_count_ptr = (key_ptr)->part_count; (key_ptr)->key;})
+#define MEMTX_TREE_IDENTICAL(elem_a_ptr, elem_b_ptr) \
+ ({(elem_a_ptr)->tuple == (elem_b_ptr)->tuple;})
+
+#include "memtx_tree_impl.h"
+
+#undef memtx_tree_key_t
+#undef memtx_tree_elem_t
+#undef MEMTX_TREE_KEY_GET
+#undef MEMTX_TREE_ELEM_GET
+#undef MEMTX_TREE_KEY_SET
+#undef MEMTX_TREE_ELEM_SET
+#undef MEMTX_TREE_COMPARE_KEY
+#undef MEMTX_TREE_COMPARE
+#undef MEMTX_TREE_NAME
+#undef MEMTX_TREE_IDENTICAL
+
+/* }}} */
+
struct index *
memtx_tree_index_new(struct memtx_engine *memtx, struct index_def *def)
{
+ if (def->cmp_def->parts->type == FIELD_TYPE_STRING ||
+ def->cmp_def->parts->type == FIELD_TYPE_UNSIGNED ||
+ def->cmp_def->parts->type == FIELD_TYPE_INTEGER)
+ return memtx_hinted_tree_index_new(memtx, def);
return memtx_basic_tree_index_new(memtx, def);
}
diff --git a/src/box/tuple_hint.cc b/src/box/tuple_hint.cc
new file mode 100644
index 000000000..7b73403be
--- /dev/null
+++ b/src/box/tuple_hint.cc
@@ -0,0 +1,210 @@
+/*
+ * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "coll.h"
+#include "tuple.h"
+#include "tuple_hint.h"
+
+static uint64_t
+key_hint_default(const char *key, struct key_def *key_def)
+{
+ (void)key;
+ (void)key_def;
+ return 0;
+}
+
+static uint64_t
+tuple_hint_default(const struct tuple *tuple, struct key_def *key_def)
+{
+ (void)tuple;
+ (void)key_def;
+ return 0;
+}
+
+template<bool is_nullable>
+static uint64_t
+key_hint_uint(const char *key, struct key_def *key_def)
+{
+ (void)key_def;
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_UNSIGNED ||
+ key_def->parts->type == FIELD_TYPE_INTEGER);
+ if (is_nullable && mp_typeof(*key) == MP_NIL)
+ return 0;
+ assert(mp_typeof(*key) == MP_UINT);
+ uint64_t val = mp_decode_uint(&key);
+ if (val > INT64_MAX)
+ return INT64_MAX;
+ return val - (uint64_t)INT64_MIN;
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_UNSIGNED);
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return 0;
+ return key_hint_uint<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static uint64_t
+key_hint_int(const char *key, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_INTEGER);
+ if (is_nullable && mp_typeof(*key) == MP_NIL)
+ return 0;
+ if (mp_typeof(*key) == MP_UINT) {
+ return key_hint_uint<is_nullable>(key, key_def);
+ } else {
+ assert(mp_typeof(*key) == MP_INT);
+ int64_t val = mp_decode_int(&key);
+ return (uint64_t)val - (uint64_t)INT64_MIN;
+ }
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_int(const struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_INTEGER);
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return 0;
+ return key_hint_int<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static uint64_t
+key_hint_string(const char *key, struct key_def *key_def)
+{
+ (void)key_def;
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_STRING);
+ if (is_nullable && mp_typeof(*key) == MP_NIL)
+ return 0;
+ assert(key_def->parts->type == FIELD_TYPE_STRING);
+ assert(mp_typeof(*key) == MP_STR);
+ uint32_t len;
+ const unsigned char *str =
+ (const unsigned char *)mp_decode_str(&key, &len);
+ uint64_t result = 0;
+ uint32_t process_len = MIN(len, 8);
+ for (uint32_t i = 0; i < process_len; i++) {
+ result <<= 8;
+ result |= str[i];
+ }
+ result <<= 8 * (8 - process_len);
+ return result;
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_string(const struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_STRING);
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return 0;
+ return key_hint_string<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static uint64_t
+key_hint_string_coll(const char *key, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_STRING &&
+ key_def->parts->coll != NULL);
+ if (is_nullable && mp_typeof(*key) == MP_NIL)
+ return 0;
+ assert(key_def->parts->type == FIELD_TYPE_STRING);
+ assert(mp_typeof(*key) == MP_STR);
+ assert(key_def->parts->coll != NULL);
+ uint32_t len;
+ const char *str = mp_decode_str(&key, &len);
+ return key_def->parts->coll->hint(str, len, key_def->parts->coll);
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def)
+{
+ assert(key_part_is_nullable(key_def->parts) == is_nullable);
+ assert(key_def->parts->type == FIELD_TYPE_STRING &&
+ key_def->parts->coll != NULL);
+ const char *field = tuple_field_by_part(tuple, key_def->parts);
+ if (is_nullable && field == NULL)
+ return 0;
+ return key_hint_string_coll<is_nullable>(field, key_def);
+}
+
+void
+key_def_set_hint_func(struct key_def *def)
+{
+ def->key_hint = key_hint_default;
+ def->tuple_hint = tuple_hint_default;
+ bool is_nullable = key_part_is_nullable(def->parts);
+ if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL) {
+ def->key_hint = is_nullable ? key_hint_string_coll<true> :
+ key_hint_string_coll<false>;
+ def->tuple_hint = is_nullable ? tuple_hint_string_coll<true> :
+ tuple_hint_string_coll<false>;
+ return;
+ }
+ switch (def->parts->type) {
+ case FIELD_TYPE_UNSIGNED:
+ def->key_hint = is_nullable ? key_hint_uint<true> :
+ key_hint_uint<false>;
+ def->tuple_hint = is_nullable ? tuple_hint_uint<true> :
+ tuple_hint_uint<false>;
+ break;
+ case FIELD_TYPE_INTEGER:
+ def->key_hint = is_nullable ? key_hint_int<true> :
+ key_hint_int<false>;
+ def->tuple_hint = is_nullable ? tuple_hint_int<true> :
+ tuple_hint_int<false>;
+ break;
+ case FIELD_TYPE_STRING:
+ def->key_hint = is_nullable ? key_hint_string<true> :
+ key_hint_string<false>;
+ def->tuple_hint = is_nullable ? tuple_hint_string<true> :
+ tuple_hint_string<false>;
+ break;
+ default:
+ break;
+ };
+}
diff --git a/src/box/tuple_hint.h b/src/box/tuple_hint.h
new file mode 100644
index 000000000..d38a96c8f
--- /dev/null
+++ b/src/box/tuple_hint.h
@@ -0,0 +1,51 @@
+#ifndef TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED
+#define TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED
+/*
+ * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+struct key_def;
+
+/**
+ * Initialize tuple_hint() and key_hint() functions for the
+ * key_def.
+ * @param key_def key definition to set up.
+ */
+void
+key_def_set_hint_func(struct key_def *key_def);
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif /* defined(__cplusplus) */
+
+#endif /* TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED */
diff --git a/src/coll.c b/src/coll.c
index 6d9c44dbf..bf892087d 100644
--- a/src/coll.c
+++ b/src/coll.c
@@ -133,6 +133,37 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
return s_len;
}
+/** Get a compare hint of a binary collation. */
+static uint64_t
+coll_bin_hint(const char *s, size_t s_len, struct coll *coll)
+{
+ (void)coll;
+ uint64_t result = 0;
+ uint32_t process_len = MIN(s_len, 8);
+ for (uint32_t i = 0; i < process_len; i++) {
+ result <<= 8;
+ result |= ((unsigned char *)s)[i];
+ }
+ result <<= 8 * (8 - process_len);
+ return result;
+}
+
+/** Get a compare hint of a string using ICU collation. */
+static uint64_t
+coll_icu_hint(const char *s, size_t s_len, struct coll *coll)
+{
+ assert(coll->type == COLL_TYPE_ICU);
+ UCharIterator itr;
+ uiter_setUTF8(&itr, s, s_len);
+ uint8_t buf[7];
+ uint32_t state[2] = {0, 0};
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf,
+ sizeof(buf), &status);
+ assert(got >= 0 && got <= 7);
+ return coll_bin_hint((const char *)buf, got, NULL);
+}
+
/**
* Set up ICU collator and init cmp and hash members of collation.
* @param coll Collation to set up.
@@ -242,6 +273,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
}
coll->cmp = coll_icu_cmp;
coll->hash = coll_icu_hash;
+ coll->hint = coll_icu_hint;
return 0;
}
@@ -332,6 +364,7 @@ coll_new(const struct coll_def *def)
coll->collator = NULL;
coll->cmp = coll_bin_cmp;
coll->hash = coll_bin_hash;
+ coll->hint = coll_bin_hint;
break;
default:
unreachable();
diff --git a/src/coll.h b/src/coll.h
index 9c725712a..53133dae3 100644
--- a/src/coll.h
+++ b/src/coll.h
@@ -47,6 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
uint32_t *pcarry, struct coll *coll);
+typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll);
+
struct UCollator;
/**
@@ -61,6 +63,8 @@ struct coll {
/** String comparator. */
coll_cmp_f cmp;
coll_hash_f hash;
+ /** The pointer to routine calculating tuple hint. */
+ coll_hint_f hint;
/** Reference counter. */
int refs;
/**
--
2.20.1
More information about the Tarantool-patches
mailing list