[PATCH v4 3/3] memtx: introduce tuple compare hint

Kirill Shcherbatov kshcherbatov at tarantool.org
Thu Feb 28 16:38:49 MSK 2019


From: Alexandr Lyapunov <aleks at tarantool.org>

Implement functions for retrieving tuple hints for a particular
key_def. Hint is an integer that can be used for tuple comparison
optimization: if a hint of one tuple is less than a hint of another
then the first tuple is definitely less than the second; only if
hints are equal tuple_compare must be called for getting comparison
result.

Hints are calculated using only the first part of key_def.

Hints are only useful when:
 * they are precalculated and stored along with the tuple;
calculation is not cheap (cheaper than tuple_compare btw) but
precalculated hints allow to compare tuples without even fetching
tuple data.
 * first part of key_def is 'string'(for now)
 * since hint is calculated using only the first part of key_def
(and only first several characters if it is a string) this part
must be different with high probability for every tuple pair.

@kshcherbatov: massive refactoring, new hints for number,
               boolean, binary collation.

Closes #3961
---
 src/box/CMakeLists.txt |   1 +
 src/box/key_def.c      |   2 +
 src/box/key_def.h      |  44 ++++++
 src/box/memtx_tree.cc  |  43 +++++-
 src/box/tuple_hint.cc  | 301 +++++++++++++++++++++++++++++++++++++++++
 src/box/tuple_hint.h   |  60 ++++++++
 src/lib/coll/coll.c    |  33 +++++
 src/lib/coll/coll.h    |   4 +
 8 files changed, 484 insertions(+), 4 deletions(-)
 create mode 100644 src/box/tuple_hint.cc
 create mode 100644 src/box/tuple_hint.h

diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index d4d9b7aeb..f0c508c02 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -38,6 +38,7 @@ add_library(tuple STATIC
     tuple_format.c
     tuple_update.c
     tuple_compare.cc
+    tuple_hint.cc
     tuple_extract_key.cc
     tuple_hash.cc
     tuple_bloom.c
diff --git a/src/box/key_def.c b/src/box/key_def.c
index 432b72a97..ced5b0580 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -34,6 +34,7 @@
 #include "tuple_compare.h"
 #include "tuple_extract_key.h"
 #include "tuple_hash.h"
+#include "tuple_hint.h"
 #include "column_mask.h"
 #include "schema_def.h"
 #include "coll_id_cache.h"
@@ -135,6 +136,7 @@ key_def_set_func(struct key_def *def)
 	key_def_set_compare_func(def);
 	key_def_set_hash_func(def);
 	key_def_set_extract_func(def);
+	key_def_set_hint_func(def);
 }
 
 static void
diff --git a/src/box/key_def.h b/src/box/key_def.h
index dd62f6a35..00775368f 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -153,6 +153,13 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple,
 typedef uint32_t (*key_hash_t)(const char *key,
 				struct key_def *key_def);
 
+/** @copydoc tuple_hint() */
+typedef uint64_t (*tuple_hint_t)(const struct tuple *tuple,
+				 struct key_def *key_def);
+
+/** @copydoc key_hint() */
+typedef uint64_t (*key_hint_t)(const char *key, struct key_def *key_def);
+
 /* Definition of a multipart key. */
 struct key_def {
 	/** @see tuple_compare() */
@@ -167,6 +174,10 @@ struct key_def {
 	tuple_hash_t tuple_hash;
 	/** @see key_hash() */
 	key_hash_t key_hash;
+	/** @see tuple_hint() */
+	tuple_hint_t tuple_hint;
+	/** @see key_hint() */
+	key_hint_t key_hint;
 	/**
 	 * Minimal part count which always is unique. For example,
 	 * if a secondary index is unique, then
@@ -624,6 +635,39 @@ key_hash(const char *key, struct key_def *key_def)
 	return key_def->key_hash(key, key_def);
 }
 
+ /*
+ * Get a comparison hint of a tuple.
+ * Hint is such a function h(tuple) in terms of particular key_def that
+ * has follows rules:
+ * if h(t1) < h(t2) then t1 < t2;
+ * if h(t1) > h(t2) then t1 > t2;
+ * if t1 == t2 then h(t1) == h(t2);
+ * These rules means that instead of direct tuple vs tuple (or tuple vs key)
+ * comparison one may compare theirs hints first; and only if theirs hints
+ * are equal compare the tuples themselves.
+ * @param tuple - tuple to get hint of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the hint.
+ */
+static inline uint64_t
+tuple_hint(const struct tuple *tuple, struct key_def *key_def)
+{
+	return key_def->tuple_hint(tuple, key_def);
+}
+
+/**
+ * Get a comparison hint of a key.
+ * @See tuple_hint for hint term definition.
+ * @param key - key to get hint of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the hint.
+ */
+static inline uint64_t
+key_hint(const char *key, struct key_def *key_def)
+{
+	return key_def->key_hint(key, key_def);
+}
+
 #if defined(__cplusplus)
 } /* extern "C" */
 #endif /* defined(__cplusplus) */
diff --git a/src/box/memtx_tree.cc b/src/box/memtx_tree.cc
index 346585dea..2b8a07f00 100644
--- a/src/box/memtx_tree.cc
+++ b/src/box/memtx_tree.cc
@@ -36,6 +36,7 @@
 #include "memory.h"
 #include "fiber.h"
 #include "tuple.h"
+#include "tuple_hint.h"
 #include <third_party/qsort_arg.h>
 #include <small/mempool.h>
 
@@ -47,6 +48,11 @@ class MemtxTreeKeyData {
 	const char *key;
 	/** Number of msgpacked search fields. */
 	uint32_t part_count;
+	/**
+	 * Comparison hint. Calculated automatically on method
+	 * MemtxTreeKeyData::set.
+	 */
+	uint64_t hint;
 public:
 	/**
 	 * Return "payload" data that this object stores:
@@ -59,6 +65,16 @@ public:
 		return this->key;
 	}
 
+	/**
+	 * Return comparasion hint is calculated before "payload"
+	 * store on MemtxTreeKeyData::set call.
+	 */
+	uint64_t
+	get_hint(void) const
+	{
+		return this->hint;
+	}
+
 	/**
 	 * Perform this MemtxTreeKeyData object
 	 * initialization.
@@ -66,9 +82,9 @@ public:
 	void
 	set(const char *key, uint32_t part_count, struct key_def *key_def)
 	{
-		(void)key_def;
 		this->key = key;
 		this->part_count = part_count;
+		this->hint = part_count > 0 ? key_hint(key, key_def) : 0;
 	}
 };
 
@@ -78,6 +94,11 @@ public:
 class MemtxTreeData {
 	/** Data tuple pointer. */
 	struct tuple *tuple;
+	/**
+	 * Comparison hint. Calculated automatically on method
+	 * MemtxTreeData::set.
+	 */
+	uint64_t hint;
 public:
 	/**
 	 * Return "payload" data that this object stores:
@@ -95,6 +116,7 @@ public:
 	{
 		(void)key_def;
 		this->tuple = tuple;
+		this->hint = tuple_hint(tuple, key_def);
 	}
 
 	/** Clear this MemtxTreeData object. */
@@ -124,7 +146,13 @@ public:
 	int
 	compare(const MemtxTreeData *elem, struct key_def *key_def) const
 	{
-		return tuple_compare(this->tuple, elem->tuple, key_def);
+		if (likely(this->hint != elem->hint &&
+			   this->hint != INVALID_HINT &&
+			   elem->hint != INVALID_HINT)) {
+			return this->hint < elem->hint ? -1 : 1;
+		} else {
+			return tuple_compare(this->tuple, elem->tuple, key_def);
+		}
 	}
 
 	/**
@@ -137,8 +165,15 @@ public:
 	{
 		uint32_t part_count;
 		const char *key_data = key->get(&part_count);
-		return tuple_compare_with_key(this->tuple, key_data, part_count,
-					      key_def);
+		uint64_t key_hint = key->get_hint();
+		if (likely(this->hint != key_hint &&
+			   this->hint != INVALID_HINT &&
+			   key_hint != INVALID_HINT)) {
+			return this->hint < key_hint ? -1 : 1;
+		} else {
+			return tuple_compare_with_key(this->tuple, key_data,
+						      part_count, key_def);
+		}
 	}
 };
 
diff --git a/src/box/tuple_hint.cc b/src/box/tuple_hint.cc
new file mode 100644
index 000000000..5acabc985
--- /dev/null
+++ b/src/box/tuple_hint.cc
@@ -0,0 +1,301 @@
+/*
+ * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "tuple.h"
+#include "tuple_hint.h"
+#include "lib/coll/coll.h"
+#include <math.h>
+
+static uint64_t
+key_hint_default(const char *key, struct key_def *key_def)
+{
+	(void)key;
+	(void)key_def;
+	return INVALID_HINT;
+}
+
+static uint64_t
+tuple_hint_default(const struct tuple *tuple, struct key_def *key_def)
+{
+	(void)tuple;
+	(void)key_def;
+	return INVALID_HINT;
+}
+
+template<bool is_nullable>
+static uint64_t
+key_hint_uint(const char *key, struct key_def *key_def)
+{
+	(void)key_def;
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_UNSIGNED);
+	if (is_nullable && mp_typeof(*key) == MP_NIL)
+		return 0;
+	assert(mp_typeof(*key) == MP_UINT);
+	uint64_t val = mp_decode_uint(&key);
+	if (unlikely(val > INT64_MAX))
+		return INT64_MAX;
+	return val - (uint64_t)INT64_MIN;
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_UNSIGNED);
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	if (is_nullable && field == NULL)
+		return 0;
+	return key_hint_uint<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static uint64_t
+key_hint_int(const char *key, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_INTEGER);
+	if (is_nullable && mp_typeof(*key) == MP_NIL)
+		return 0;
+	if (mp_typeof(*key) == MP_UINT) {
+		uint64_t val = mp_decode_uint(&key);
+		if (unlikely(val > INT64_MAX))
+			return INT64_MAX;
+		return val - (uint64_t)INT64_MIN;
+	} else {
+		assert(mp_typeof(*key) == MP_INT);
+		int64_t val = mp_decode_int(&key);
+		return (uint64_t)val - (uint64_t)INT64_MIN;
+	}
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_int(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_INTEGER);
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	if (is_nullable && field == NULL)
+		return 0;
+	return key_hint_int<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static uint64_t
+key_hint_number(const char *key, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_NUMBER);
+	if (is_nullable && mp_typeof(*key) == MP_NIL)
+		return 0;
+	uint64_t val = 0;
+	switch (mp_typeof(*key)) {
+	case MP_FLOAT:
+	case MP_DOUBLE: {
+		double f = mp_typeof(*key) == MP_FLOAT ?
+			   mp_decode_float(&key) : mp_decode_double(&key);
+		if (isnan(f) || isinf(f))
+			return INVALID_HINT;
+		double ival;
+		(void)modf(f, &ival);
+		if (unlikely(ival >= (double)INT64_MAX))
+			return INT64_MAX;
+		if (unlikely(ival <= (double)INT64_MIN))
+			return 0;
+		val = (uint64_t)ival;
+		break;
+	}
+	case MP_INT: {
+		val = (uint64_t)mp_decode_int(&key);
+		break;
+	}
+	case MP_UINT: {
+		val = mp_decode_uint(&key);
+		if (val > INT64_MAX)
+			return INT64_MAX;
+		break;
+	}
+	default:
+		unreachable();
+	}
+	return val - (uint64_t)INT64_MIN;
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_number(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_NUMBER);
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	if (is_nullable && field == NULL)
+		return 0;
+	return key_hint_number<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static uint64_t
+key_hint_boolean(const char *key, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_BOOLEAN);
+	if (is_nullable && mp_typeof(*key) == MP_NIL)
+		return 0;
+	bool val = mp_decode_bool(&key);
+	return (uint64_t)val - (uint64_t)INT64_MIN;
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_boolean(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_BOOLEAN);
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	if (is_nullable && field == NULL)
+		return 0;
+	return key_hint_boolean<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static uint64_t
+key_hint_string(const char *key, struct key_def *key_def)
+{
+	(void)key_def;
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->coll == NULL);
+	assert(key_def->parts->type == FIELD_TYPE_STRING);
+	if (is_nullable && mp_typeof(*key) == MP_NIL)
+		return 0;
+	assert(mp_typeof(*key) == MP_STR);
+	uint32_t len;
+	const unsigned char *str =
+		(const unsigned char *)mp_decode_str(&key, &len);
+	uint64_t result = 0;
+	uint32_t process_len = MIN(len, 8);
+	for (uint32_t i = 0; i < process_len; i++) {
+		result <<= 8;
+		result |= str[i];
+	}
+	result <<= 8 * (8 - process_len);
+	return result;
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_string(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->coll == NULL);
+	assert(key_def->parts->type == FIELD_TYPE_STRING);
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	if (is_nullable && field == NULL)
+		return 0;
+	return key_hint_string<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static uint64_t
+key_hint_string_coll(const char *key, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_STRING &&
+	        key_def->parts->coll != NULL);
+	if (is_nullable && mp_typeof(*key) == MP_NIL)
+		return 0;
+	assert(mp_typeof(*key) == MP_STR);
+	uint32_t len;
+	const char *str = mp_decode_str(&key, &len);
+	return key_def->parts->coll->hint(str, len, key_def->parts->coll);
+}
+
+template<bool is_nullable>
+static uint64_t
+tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_STRING &&
+	        key_def->parts->coll != NULL);
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	if (is_nullable && field == NULL)
+		return 0;
+	return key_hint_string_coll<is_nullable>(field, key_def);
+}
+
+void
+key_def_set_hint_func(struct key_def *def)
+{
+	def->key_hint = key_hint_default;
+	def->tuple_hint = tuple_hint_default;
+	bool is_nullable = key_part_is_nullable(def->parts);
+	if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL) {
+		def->key_hint = is_nullable ? key_hint_string_coll<true> :
+					      key_hint_string_coll<false>;
+		def->tuple_hint = is_nullable ? tuple_hint_string_coll<true> :
+						tuple_hint_string_coll<false>;
+		return;
+	}
+	switch (def->parts->type) {
+	case FIELD_TYPE_UNSIGNED:
+		def->key_hint = is_nullable ? key_hint_uint<true> :
+					      key_hint_uint<false>;
+		def->tuple_hint = is_nullable ? tuple_hint_uint<true> :
+						tuple_hint_uint<false>;
+		break;
+	case FIELD_TYPE_INTEGER:
+		def->key_hint = is_nullable ? key_hint_int<true> :
+					      key_hint_int<false>;
+		def->tuple_hint = is_nullable ? tuple_hint_int<true> :
+						tuple_hint_int<false>;
+		break;
+	case FIELD_TYPE_STRING:
+		def->key_hint = is_nullable ? key_hint_string<true> :
+					      key_hint_string<false>;
+		def->tuple_hint = is_nullable ? tuple_hint_string<true> :
+						tuple_hint_string<false>;
+		break;
+	case FIELD_TYPE_NUMBER:
+		def->key_hint = is_nullable ? key_hint_number<true> :
+					      key_hint_number<false>;
+		def->tuple_hint = is_nullable ? tuple_hint_number<true> :
+						tuple_hint_number<false>;
+		break;
+	case FIELD_TYPE_BOOLEAN:
+		def->key_hint = is_nullable ? key_hint_boolean<true> :
+					      key_hint_boolean<false>;
+		def->tuple_hint = is_nullable ? tuple_hint_boolean<true> :
+						tuple_hint_boolean<false>;
+		break;
+	default:
+		break;
+	};
+}
diff --git a/src/box/tuple_hint.h b/src/box/tuple_hint.h
new file mode 100644
index 000000000..19b08dec1
--- /dev/null
+++ b/src/box/tuple_hint.h
@@ -0,0 +1,60 @@
+#ifndef TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED
+#define TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED
+/*
+ * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+#include <stdint.h>
+#include <trivia/util.h>
+
+struct key_def;
+
+/**
+ * If it is impossible to calculate hint, this value is returned.
+ * Such hint must not be used for comparisons.
+ */
+#define INVALID_HINT UINT64_MAX
+
+/**
+ * Initialize tuple_hint() and key_hint() functions for the
+ * key_def.
+ * @param key_def key definition to set up.
+ */
+void
+key_def_set_hint_func(struct key_def *key_def);
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif /* defined(__cplusplus) */
+
+#endif /* TARANTOOL_BOX_TUPLE_HINT_H_INCLUDED */
diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
index 6d9c44dbf..fe0ff171f 100644
--- a/src/lib/coll/coll.c
+++ b/src/lib/coll/coll.c
@@ -133,6 +133,37 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
 	return s_len;
 }
 
+/** Get a compare hint of a binary collation. */
+static uint64_t
+coll_bin_hint(const char *s, size_t s_len, struct coll *coll)
+{
+	(void)coll;
+	uint64_t result = 0;
+	uint32_t process_len = MIN(s_len, 8);
+	for (uint32_t i = 0; i < process_len; i++) {
+		result <<= 8;
+		result |= ((unsigned char *)s)[i];
+	}
+	result <<= 8 * (8 - process_len);
+	return result;
+}
+
+/** Get a compare hint of a string using ICU collation. */
+static uint64_t
+coll_icu_hint(const char *s, size_t s_len, struct coll *coll)
+{
+	assert(coll->type == COLL_TYPE_ICU);
+	UCharIterator itr;
+	uiter_setUTF8(&itr, s, s_len);
+	uint8_t buf[8];
+	uint32_t state[2] = {0, 0};
+	UErrorCode status = U_ZERO_ERROR;
+	int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf,
+					   sizeof(buf), &status);
+	assert(got >= 0 && got <= 8);
+	return coll_bin_hint((const char *)buf, got, NULL);
+}
+
 /**
  * Set up ICU collator and init cmp and hash members of collation.
  * @param coll Collation to set up.
@@ -242,6 +273,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
 	}
 	coll->cmp = coll_icu_cmp;
 	coll->hash = coll_icu_hash;
+	coll->hint = coll_icu_hint;
 	return 0;
 }
 
@@ -332,6 +364,7 @@ coll_new(const struct coll_def *def)
 		coll->collator = NULL;
 		coll->cmp = coll_bin_cmp;
 		coll->hash = coll_bin_hash;
+		coll->hint = coll_bin_hint;
 		break;
 	default:
 		unreachable();
diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h
index 9c725712a..53133dae3 100644
--- a/src/lib/coll/coll.h
+++ b/src/lib/coll/coll.h
@@ -47,6 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
 typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
 				uint32_t *pcarry, struct coll *coll);
 
+typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll);
+
 struct UCollator;
 
 /**
@@ -61,6 +63,8 @@ struct coll {
 	/** String comparator. */
 	coll_cmp_f cmp;
 	coll_hash_f hash;
+	/** The pointer to routine calculating tuple hint. */
+	coll_hint_f hint;
 	/** Reference counter. */
 	int refs;
 	/**
-- 
2.21.0




More information about the Tarantool-patches mailing list