Tarantool development patches archive
 help / color / mirror / Atom feed
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com
Cc: Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [PATCH v5 2/4] memtx: introduce tuple compare hint
Date: Thu,  7 Mar 2019 12:44:06 +0300	[thread overview]
Message-ID: <53e165cf566611452821c692d79d621628e839ff.1551951540.git.kshcherbatov@tarantool.org> (raw)
In-Reply-To: <cover.1551951540.git.kshcherbatov@tarantool.org>

Implement functions for retrieving tuple hints for a particular
key_def. Hint is an integer that can be used for tuple comparison
optimization: if a hint of one tuple is less than a hint of another
then the first tuple is definitely less than the second; only if
hints are equal tuple_compare must be called for getting comparison
result.

Hints are calculated using only the first part of key_def.

Hints are only useful when:
 * they are precalculated and stored along with the tuple;
calculation is not cheap (cheaper than tuple_compare btw) but
precalculated hints allow to compare tuples without even fetching
tuple data.
 * first part of key_def is 'string'(for now)
 * since hint is calculated using only the first part of key_def
(and only first several characters if it is a string) this part
must be different with high probability for every tuple pair.

Closes #3961
---
 src/box/key_def.c        |   1 +
 src/box/key_def.h        | 119 ++++++++++++
 src/box/memtx_tree.c     |  32 +++-
 src/box/tuple_compare.cc | 381 +++++++++++++++++++++++++++++++++++++++
 src/box/tuple_compare.h  |   7 +
 src/lib/coll/coll.c      |  33 ++++
 src/lib/coll/coll.h      |   4 +
 7 files changed, 567 insertions(+), 10 deletions(-)

diff --git a/src/box/key_def.c b/src/box/key_def.c
index d4c979aa1..771c30172 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -136,6 +136,7 @@ key_def_set_func(struct key_def *def)
 	key_def_set_compare_func(def);
 	key_def_set_hash_func(def);
 	key_def_set_extract_func(def);
+	key_def_set_cmp_aux_func(def);
 }
 
 static void
diff --git a/src/box/key_def.h b/src/box/key_def.h
index dd62f6a35..c630e6b43 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -115,6 +115,28 @@ struct key_part {
 
 struct key_def;
 struct tuple;
+/** Comparasion auxiliary information. */
+typedef union {
+	/**
+	 * Hint is a value h(t) is calculated for tuple in terms
+	 * of particular key_def that has follows rules:
+	 * if h(t1) < h(t2) then t1 < t2;
+	 * if h(t1) > h(t2) then t1 > t2;
+	 * if t1 == t2 then h(t1) == h(t2);
+	 * These rules means that instead of direct tuple vs tuple
+	 * (or tuple vs key) comparison one may compare theirs
+	 * hints first; and only if theirs hints are equal compare
+	 * the tuples themselves.
+	 */
+	uint64_t hint;
+} cmp_aux_t;
+
+/** Test if cmp_aux_t a and b are equal. */
+static inline bool
+cmp_aux_equal(cmp_aux_t a, cmp_aux_t b)
+{
+	return a.hint == b.hint;
+}
 
 /**
  * Get is_nullable property of key_part.
@@ -137,6 +159,18 @@ typedef int (*tuple_compare_with_key_t)(const struct tuple *tuple_a,
 typedef int (*tuple_compare_t)(const struct tuple *tuple_a,
 			       const struct tuple *tuple_b,
 			       struct key_def *key_def);
+/** @copydoc tuple_aux_compare_with_key() */
+typedef int (*tuple_aux_compare_with_key_t)(const struct tuple *tuple,
+					    cmp_aux_t tuple_cmp_aux,
+					    const char *key, uint32_t part_count,
+					    cmp_aux_t key_cmp_aux,
+					    struct key_def *key_def);
+/** @copydoc tuple_aux_compare() */
+typedef int (*tuple_aux_compare_t)(const struct tuple *tuple_a,
+				   cmp_aux_t tuple_a_cmp_aux,
+				   const struct tuple *tuple_b,
+				   cmp_aux_t tuple_b_cmp_aux,
+				   struct key_def *key_def);
 /** @copydoc tuple_extract_key() */
 typedef char *(*tuple_extract_key_t)(const struct tuple *tuple,
 				     struct key_def *key_def,
@@ -153,12 +187,23 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple,
 typedef uint32_t (*key_hash_t)(const char *key,
 				struct key_def *key_def);
 
+/** @copydoc tuple_cmp_aux() */
+typedef cmp_aux_t (*tuple_cmp_aux_t)(const struct tuple *tuple,
+				     struct key_def *key_def);
+
+/** @copydoc key_cmp_aux() */
+typedef cmp_aux_t (*key_cmp_aux_t)(const char *key, struct key_def *key_def);
+
 /* Definition of a multipart key. */
 struct key_def {
 	/** @see tuple_compare() */
 	tuple_compare_t tuple_compare;
 	/** @see tuple_compare_with_key() */
 	tuple_compare_with_key_t tuple_compare_with_key;
+	/** @see tuple_aux_compare_with_key() */
+	tuple_aux_compare_with_key_t tuple_aux_compare_with_key;
+	/** @see tuple_aux_compare() */
+	tuple_aux_compare_t tuple_aux_compare;
 	/** @see tuple_extract_key() */
 	tuple_extract_key_t tuple_extract_key;
 	/** @see tuple_extract_key_raw() */
@@ -167,6 +212,10 @@ struct key_def {
 	tuple_hash_t tuple_hash;
 	/** @see key_hash() */
 	key_hash_t key_hash;
+	/** @see tuple_cmp_aux() */
+	tuple_cmp_aux_t tuple_cmp_aux;
+	/** @see key_cmp_aux() */
+	key_cmp_aux_t key_cmp_aux;
 	/**
 	 * Minimal part count which always is unique. For example,
 	 * if a secondary index is unique, then
@@ -571,6 +620,52 @@ tuple_compare_with_key(const struct tuple *tuple, const char *key,
 	return key_def->tuple_compare_with_key(tuple, key, part_count, key_def);
 }
 
+/**
+ * Compare tuples using the key definition and comparasion
+ * auxillary information.
+ * @param tuple_a First tuple.
+ * @param tuple_a_cmp_aux Comparasion auxiliary information
+ *                        for the tuple_a.
+ * @param tuple_b Second tuple.
+ * @param tuple_b_cmp_aux Comparasion auxilary information
+ *                        for the tuple_b.
+ * @param key_def Key definition.
+ * @retval 0  if key_fields(tuple_a) == key_fields(tuple_b)
+ * @retval <0 if key_fields(tuple_a) < key_fields(tuple_b)
+ * @retval >0 if key_fields(tuple_a) > key_fields(tuple_b)
+ */
+static inline int
+tuple_aux_compare(const struct tuple *tuple_a, cmp_aux_t tuple_a_cmp_aux,
+		  const struct tuple *tuple_b, cmp_aux_t tuple_b_cmp_aux,
+		  struct key_def *key_def)
+{
+	return key_def->tuple_aux_compare(tuple_a, tuple_a_cmp_aux, tuple_b,
+					  tuple_b_cmp_aux, key_def);
+}
+
+/**
+ * Compare tuple with key using the key definition and
+ * comparasion auxilary information.
+ * @param tuple tuple
+ * @param tuple_cmp_aux tuple compare auxiliary information.
+ * @param key key parts without MessagePack array header
+ * @param part_count the number of parts in @a key
+ * @param key_cmp_aux t Key compare auxiliary information.
+ * @param key_def key definition
+ * @retval 0  if key_fields(tuple) == parts(key)
+ * @retval <0 if key_fields(tuple) < parts(key)
+ * @retval >0 if key_fields(tuple) > parts(key)
+ */
+static inline int
+tuple_aux_compare_with_key(const struct tuple *tuple, cmp_aux_t tuple_cmp_aux,
+			   const char *key, uint32_t part_count,
+			   cmp_aux_t key_cmp_aux, struct key_def *key_def)
+{
+	return key_def->tuple_aux_compare_with_key(tuple, tuple_cmp_aux, key,
+						   part_count, key_cmp_aux,
+						   key_def);
+}
+
 /**
  * Compute hash of a tuple field.
  * @param ph1 - pointer to running hash
@@ -624,6 +719,30 @@ key_hash(const char *key, struct key_def *key_def)
 	return key_def->key_hash(key, key_def);
 }
 
+ /*
+ * Get a comparison auxiliary information for a tuple.
+ * @param tuple - tuple to get cmp_aux_t of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the comparison auxiliary information.
+ */
+static inline cmp_aux_t
+tuple_cmp_aux(const struct tuple *tuple, struct key_def *key_def)
+{
+	return key_def->tuple_cmp_aux(tuple, key_def);
+}
+
+/**
+ * Get a comparison hint of a key.
+ * @param key - key to get hint of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the comparison auxiliary information.
+ */
+static inline cmp_aux_t
+key_cmp_aux(const char *key, struct key_def *key_def)
+{
+	return key_def->key_cmp_aux(key, key_def);
+}
+
 #if defined(__cplusplus)
 } /* extern "C" */
 #endif /* defined(__cplusplus) */
diff --git a/src/box/memtx_tree.c b/src/box/memtx_tree.c
index 688f09597..4b0886bef 100644
--- a/src/box/memtx_tree.c
+++ b/src/box/memtx_tree.c
@@ -47,6 +47,8 @@ struct memtx_tree_key_data {
 	const char *key;
 	/** Number of msgpacked search fields. */
 	uint32_t part_count;
+	/** Comparison auxilary information. */
+	cmp_aux_t aux_info;
 };
 
 /**
@@ -55,6 +57,8 @@ struct memtx_tree_key_data {
 struct memtx_tree_data {
 	/* Tuple that this node is represents. */
 	struct tuple *tuple;
+	/** Comparison auxilary information. */
+	cmp_aux_t aux_info;
 };
 
 /**
@@ -69,7 +73,7 @@ static bool
 memtx_tree_data_identical(const struct memtx_tree_data *a,
 			  const struct memtx_tree_data *b)
 {
-	return a->tuple == b->tuple;
+	return a->tuple == b->tuple && cmp_aux_equal(a->aux_info, b->aux_info);
 }
 
 /**
@@ -91,6 +95,7 @@ memtx_tree_data_set(struct memtx_tree_data *data, struct tuple *tuple,
 {
 	(void)key_def;
 	data->tuple = tuple;
+	data->aux_info = tuple_cmp_aux(tuple, key_def);
 }
 
 /**
@@ -103,15 +108,18 @@ memtx_tree_key_data_set(struct memtx_tree_key_data *key_data, const char *key,
 	(void)key_def;
 	key_data->key = key;
 	key_data->part_count = part_count;
+	key_data->aux_info = key_cmp_aux(key, key_def);
 }
 
 #define BPS_TREE_NAME memtx_tree
 #define BPS_TREE_BLOCK_SIZE (512)
 #define BPS_TREE_EXTENT_SIZE MEMTX_EXTENT_SIZE
 #define BPS_TREE_COMPARE(a, b, arg)\
-	tuple_compare((&a)->tuple, (&b)->tuple, arg)
+	tuple_aux_compare((&a)->tuple, (&a)->aux_info, (&b)->tuple,\
+			  (&b)->aux_info, arg)
 #define BPS_TREE_COMPARE_KEY(a, b, arg)\
-	tuple_compare_with_key((&a)->tuple, (b)->key, (b)->part_count, arg)
+	tuple_aux_compare_with_key((&a)->tuple, (&a)->aux_info, (b)->key,\
+				   (b)->part_count, (b)->aux_info, arg)
 #define BPS_TREE_IDENTICAL(a, b) memtx_tree_data_identical(&a, &b)
 #define bps_tree_elem_t struct memtx_tree_data
 #define bps_tree_key_t struct memtx_tree_key_data *
@@ -146,7 +154,8 @@ memtx_tree_qcompare(const void* a, const void *b, void *c)
 	const struct memtx_tree_data *data_a = a;
 	const struct memtx_tree_data *data_b = b;
 	struct key_def *key_def = c;
-	return tuple_compare(data_a->tuple, data_b->tuple, key_def);
+	return tuple_aux_compare(data_a->tuple, data_a->aux_info, data_b->tuple,
+				 data_b->aux_info, key_def);
 }
 
 /* {{{ MemtxTree Iterators ****************************************/
@@ -268,9 +277,10 @@ tree_iterator_next_equal(struct iterator *iterator, struct tuple **ret)
 		memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
 	/* Use user key def to save a few loops. */
 	if (res == NULL ||
-	    tuple_compare_with_key(res->tuple, it->key_data.key,
-				   it->key_data.part_count,
-				   it->index_def->key_def) != 0) {
+	    tuple_aux_compare_with_key(res->tuple, res->aux_info,
+				       it->key_data.key, it->key_data.part_count,
+				       it->key_data.aux_info,
+				       it->index_def->key_def) != 0) {
 		iterator->next = tree_iterator_dummie;
 		memtx_tree_data_clear(&it->current);
 		*ret = NULL;
@@ -299,9 +309,10 @@ tree_iterator_prev_equal(struct iterator *iterator, struct tuple **ret)
 		memtx_tree_iterator_get_elem(it->tree, &it->tree_iterator);
 	/* Use user key def to save a few loops. */
 	if (res == NULL ||
-	    tuple_compare_with_key(res->tuple, it->key_data.key,
-				   it->key_data.part_count,
-				   it->index_def->key_def) != 0) {
+	    tuple_aux_compare_with_key(res->tuple, res->aux_info,
+				       it->key_data.key, it->key_data.part_count,
+				       it->key_data.aux_info,
+				       it->index_def->key_def) != 0) {
 		iterator->next = tree_iterator_dummie;
 		memtx_tree_data_clear(&it->current);
 		*ret = NULL;
@@ -549,6 +560,7 @@ memtx_tree_index_get(struct index *base, const char *key,
 	assert(base->def->opts.is_unique &&
 	       part_count == base->def->key_def->part_count);
 	struct memtx_tree_index *index = (struct memtx_tree_index *)base;
+
 	struct memtx_tree_key_data key_data;
 	struct key_def *cmp_def = memtx_tree_index_cmp_def(index);
 	memtx_tree_key_data_set(&key_data, key, part_count, cmp_def);
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index cf4519ccb..5b06e06b3 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -1323,9 +1323,390 @@ tuple_compare_with_key_create(const struct key_def *def)
 
 /* }}} tuple_compare_with_key */
 
+/* {{{ tuple_aux_compare */
+
+#define CMP_HINT_INVALID ((uint64_t)UINT64_MAX)
+
+static int
+tuple_hinted_compare(const struct tuple *tuple_a, cmp_aux_t tuple_a_cmp_aux,
+		     const struct tuple *tuple_b, cmp_aux_t tuple_b_cmp_aux,
+		     struct key_def *key_def)
+{
+	uint64_t tuple_a_hint = tuple_a_cmp_aux.hint;
+	uint64_t tuple_b_hint = tuple_b_cmp_aux.hint;
+	if (likely(tuple_a_hint != tuple_b_hint &&
+		   tuple_a_hint != CMP_HINT_INVALID &&
+		   tuple_b_hint != CMP_HINT_INVALID))
+		return tuple_a_hint < tuple_b_hint ? -1 : 1;
+	else
+		return tuple_compare(tuple_a, tuple_b, key_def);
+}
+
+static tuple_aux_compare_t
+tuple_aux_compare_create(const struct key_def *def)
+{
+	(void)def;
+	return tuple_hinted_compare;
+}
+
+/* }}} tuple_aux_compare */
+
+/* {{{ tuple_aux_compare_with_key */
+
+static int
+tuple_hinted_compare_with_key(const struct tuple *tuple, cmp_aux_t tuple_cmp_aux,
+			      const char *key, uint32_t part_count,
+			      cmp_aux_t key_cmp_aux, struct key_def *key_def)
+{
+	uint64_t tuple_hint = tuple_cmp_aux.hint;
+	uint64_t key_hint = key_cmp_aux.hint;
+	if (likely(tuple_hint != key_hint && tuple_hint != CMP_HINT_INVALID &&
+		   key_hint != CMP_HINT_INVALID))
+		return tuple_hint < key_hint ? -1 : 1;
+	else
+		return tuple_compare_with_key(tuple, key, part_count, key_def);
+}
+
+static tuple_aux_compare_with_key_t
+tuple_aux_compare_with_key_create(const struct key_def *def)
+{
+	(void)def;
+	return tuple_hinted_compare_with_key;
+}
+
+/* }}} tuple_aux_compare_with_key */
+
 void
 key_def_set_compare_func(struct key_def *def)
 {
 	def->tuple_compare = tuple_compare_create(def);
 	def->tuple_compare_with_key = tuple_compare_with_key_create(def);
+	def->tuple_aux_compare = tuple_aux_compare_create(def);
+	def->tuple_aux_compare_with_key =
+		tuple_aux_compare_with_key_create(def);
+}
+
+/* Tuple hints */
+
+static cmp_aux_t
+key_hint_default(const char *key, struct key_def *key_def)
+{
+	(void)key;
+	(void)key_def;
+	cmp_aux_t ret;
+	ret.hint = CMP_HINT_INVALID;
+	return ret;
+}
+
+static cmp_aux_t
+tuple_hint_default(const struct tuple *tuple, struct key_def *key_def)
+{
+	(void)tuple;
+	(void)key_def;
+	cmp_aux_t ret;
+	ret.hint = CMP_HINT_INVALID;
+	return ret;
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+key_hint_uint(const char *key, struct key_def *key_def)
+{
+	(void)key_def;
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_UNSIGNED);
+	cmp_aux_t ret;
+	if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) {
+		ret.hint = 0;
+		return ret;
+	}
+	assert(mp_typeof(*key) == MP_UINT);
+	uint64_t val = mp_decode_uint(&key);
+	ret.hint = unlikely(val > INT64_MAX) ? INT64_MAX :
+		   val - (uint64_t)INT64_MIN;;
+	return ret;
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_UNSIGNED);
+	cmp_aux_t ret;
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	if (is_nullable && field == NULL) {
+		ret.hint = 0;
+		return ret;
+	}
+	return key_hint_uint<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+key_hint_int(const char *key, struct key_def *key_def)
+{
+	(void)key_def;
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_INTEGER);
+	cmp_aux_t ret;
+	if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) {
+		ret.hint = 0;
+		return ret;
+	}
+	if (mp_typeof(*key) == MP_UINT) {
+		uint64_t val = mp_decode_uint(&key);
+		ret.hint = unlikely(val > INT64_MAX) ? INT64_MAX :
+			   val - (uint64_t)INT64_MIN;
+	} else {
+		assert(mp_typeof(*key) == MP_INT);
+		int64_t val = mp_decode_int(&key);
+		ret.hint = (uint64_t)val - (uint64_t)INT64_MIN;
+	}
+	return ret;
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+tuple_hint_int(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_INTEGER);
+	cmp_aux_t ret;
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	if (is_nullable && field == NULL) {
+		ret.hint = 0;
+		return ret;
+	}
+	return key_hint_int<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+key_hint_number(const char *key, struct key_def *key_def)
+{
+	(void)key_def;
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_NUMBER);
+	cmp_aux_t ret;
+	if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) {
+		ret.hint = 0;
+		return ret;
+	}
+	uint64_t val = 0;
+	switch (mp_typeof(*key)) {
+	case MP_FLOAT:
+	case MP_DOUBLE: {
+		double f = mp_typeof(*key) == MP_FLOAT ?
+			   mp_decode_float(&key) : mp_decode_double(&key);
+		if (isnan(f) || isinf(f)) {
+			ret.hint = CMP_HINT_INVALID;
+			return ret;
+		}
+		double ival;
+		(void)modf(f, &ival);
+		if (unlikely(ival >= (double)INT64_MAX)) {
+			ret.hint = INT64_MAX;
+			return ret;
+		}
+		if (unlikely(ival <= (double)INT64_MIN)) {
+			ret.hint = 0;
+			return ret;
+		}
+		val = (uint64_t)ival;
+		break;
+	}
+	case MP_INT: {
+		val = (uint64_t)mp_decode_int(&key);
+		break;
+	}
+	case MP_UINT: {
+		val = mp_decode_uint(&key);
+		if (val > INT64_MAX) {
+			ret.hint = INT64_MAX;
+			return ret;
+		}
+		break;
+	}
+	default:
+		unreachable();
+	}
+	ret.hint = val - (uint64_t)INT64_MIN;
+	return ret;
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+tuple_hint_number(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_NUMBER);
+	cmp_aux_t ret;
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	if (is_nullable && field == NULL) {
+		ret.hint = 0;
+		return ret;
+	}
+	return key_hint_number<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+key_hint_boolean(const char *key, struct key_def *key_def)
+{
+	(void)key_def;
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_BOOLEAN);
+	cmp_aux_t ret;
+	if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) {
+		ret.hint = 0;
+		return ret;
+	}
+	bool val = mp_decode_bool(&key);
+	ret.hint = (uint64_t)val - (uint64_t)INT64_MIN;
+	return ret;
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+tuple_hint_boolean(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_BOOLEAN);
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	cmp_aux_t ret;
+	if (is_nullable && field == NULL) {
+		ret.hint = 0;
+		return ret;
+	}
+	return key_hint_boolean<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+key_hint_string(const char *key, struct key_def *key_def)
+{
+	(void)key_def;
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->coll == NULL);
+	assert(key_def->parts->type == FIELD_TYPE_STRING);
+	cmp_aux_t ret;
+	if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) {
+		ret.hint = 0;
+		return ret;
+	}
+	assert(mp_typeof(*key) == MP_STR);
+	uint32_t len;
+	const unsigned char *str =
+		(const unsigned char *)mp_decode_str(&key, &len);
+	uint64_t result = 0;
+	uint32_t process_len = MIN(len, 8);
+	for (uint32_t i = 0; i < process_len; i++) {
+		result <<= 8;
+		result |= str[i];
+	}
+	result <<= 8 * (8 - process_len);
+	ret.hint = result;
+	return ret;
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+tuple_hint_string(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->coll == NULL);
+	assert(key_def->parts->type == FIELD_TYPE_STRING);
+	cmp_aux_t ret;
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	if (is_nullable && field == NULL) {
+		ret.hint = 0;
+		return ret;
+	}
+	return key_hint_string<is_nullable>(field, key_def);
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+key_hint_string_coll(const char *key, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_STRING &&
+	        key_def->parts->coll != NULL);
+	cmp_aux_t ret;
+	if (key == NULL || (is_nullable && mp_typeof(*key) == MP_NIL)) {
+		ret.hint = 0;
+		return ret;
+	}
+	assert(mp_typeof(*key) == MP_STR);
+	uint32_t len;
+	const char *str = mp_decode_str(&key, &len);
+	ret.hint = key_def->parts->coll->hint(str, len, key_def->parts->coll);
+	return ret;
+}
+
+template<bool is_nullable>
+static cmp_aux_t
+tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def)
+{
+	assert(key_part_is_nullable(key_def->parts) == is_nullable);
+	assert(key_def->parts->type == FIELD_TYPE_STRING &&
+	        key_def->parts->coll != NULL);
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	cmp_aux_t ret;
+	if (is_nullable && field == NULL) {
+		ret.hint = 0;
+		return ret;
+	}
+	return key_hint_string_coll<is_nullable>(field, key_def);
+}
+
+void
+key_def_set_cmp_aux_func(struct key_def *def)
+{
+	def->key_cmp_aux = key_hint_default;
+	def->tuple_cmp_aux = tuple_hint_default;
+	bool is_nullable = key_part_is_nullable(def->parts);
+	if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL) {
+		def->key_cmp_aux = is_nullable ? key_hint_string_coll<true> :
+						 key_hint_string_coll<false>;
+		def->tuple_cmp_aux = is_nullable ?
+				     tuple_hint_string_coll<true> :
+				     tuple_hint_string_coll<false>;
+		return;
+	}
+	switch (def->parts->type) {
+	case FIELD_TYPE_UNSIGNED:
+		def->key_cmp_aux = is_nullable ? key_hint_uint<true> :
+						 key_hint_uint<false>;
+		def->tuple_cmp_aux = is_nullable ? tuple_hint_uint<true> :
+						   tuple_hint_uint<false>;
+		break;
+	case FIELD_TYPE_INTEGER:
+		def->key_cmp_aux = is_nullable ? key_hint_int<true> :
+						 key_hint_int<false>;
+		def->tuple_cmp_aux = is_nullable ? tuple_hint_int<true> :
+						   tuple_hint_int<false>;
+		break;
+	case FIELD_TYPE_STRING:
+		def->key_cmp_aux = is_nullable ? key_hint_string<true> :
+						 key_hint_string<false>;
+		def->tuple_cmp_aux = is_nullable ? tuple_hint_string<true> :
+						   tuple_hint_string<false>;
+		break;
+	case FIELD_TYPE_NUMBER:
+		def->key_cmp_aux = is_nullable ? key_hint_number<true> :
+						 key_hint_number<false>;
+		def->tuple_cmp_aux = is_nullable ? tuple_hint_number<true> :
+						   tuple_hint_number<false>;
+		break;
+	case FIELD_TYPE_BOOLEAN:
+		def->key_cmp_aux = is_nullable ? key_hint_boolean<true> :
+						 key_hint_boolean<false>;
+		def->tuple_cmp_aux = is_nullable ? tuple_hint_boolean<true> :
+						   tuple_hint_boolean<false>;
+		break;
+	default:
+		break;
+	};
 }
diff --git a/src/box/tuple_compare.h b/src/box/tuple_compare.h
index 6538d5fc0..160135af4 100644
--- a/src/box/tuple_compare.h
+++ b/src/box/tuple_compare.h
@@ -43,6 +43,13 @@ struct key_def;
 void
 key_def_set_compare_func(struct key_def *def);
 
+/**
+ * Initialize cmp_aux calculation functions for the key_def.
+ * @param key_def key definition
+ */
+void
+key_def_set_cmp_aux_func(struct key_def *def);
+
 #if defined(__cplusplus)
 } /* extern "C" */
 #endif /* defined(__cplusplus) */
diff --git a/src/lib/coll/coll.c b/src/lib/coll/coll.c
index afc15e809..9c267d384 100644
--- a/src/lib/coll/coll.c
+++ b/src/lib/coll/coll.c
@@ -133,6 +133,37 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
 	return s_len;
 }
 
+/** Get a compare hint of a binary collation. */
+static uint64_t
+coll_bin_hint(const char *s, size_t s_len, struct coll *coll)
+{
+	(void)coll;
+	uint64_t result = 0;
+	uint32_t process_len = MIN(s_len, 8);
+	for (uint32_t i = 0; i < process_len; i++) {
+		result <<= 8;
+		result |= ((unsigned char *)s)[i];
+	}
+	result <<= 8 * (8 - process_len);
+	return result;
+}
+
+/** Get a compare hint of a string using ICU collation. */
+static uint64_t
+coll_icu_hint(const char *s, size_t s_len, struct coll *coll)
+{
+	assert(coll->type == COLL_TYPE_ICU);
+	UCharIterator itr;
+	uiter_setUTF8(&itr, s, s_len);
+	uint8_t buf[8];
+	uint32_t state[2] = {0, 0};
+	UErrorCode status = U_ZERO_ERROR;
+	int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf,
+					   sizeof(buf), &status);
+	assert(got >= 0 && got <= 8);
+	return coll_bin_hint((const char *)buf, got, NULL);
+}
+
 /**
  * Set up ICU collator and init cmp and hash members of collation.
  * @param coll Collation to set up.
@@ -242,6 +273,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
 	}
 	coll->cmp = coll_icu_cmp;
 	coll->hash = coll_icu_hash;
+	coll->hint = coll_icu_hint;
 	return 0;
 }
 
@@ -356,6 +388,7 @@ coll_new(const struct coll_def *def)
 		coll->collator = NULL;
 		coll->cmp = coll_bin_cmp;
 		coll->hash = coll_bin_hash;
+		coll->hint = coll_bin_hint;
 		break;
 	default:
 		unreachable();
diff --git a/src/lib/coll/coll.h b/src/lib/coll/coll.h
index 8c9f94293..d695a02ad 100644
--- a/src/lib/coll/coll.h
+++ b/src/lib/coll/coll.h
@@ -48,6 +48,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
 typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
 				uint32_t *pcarry, struct coll *coll);
 
+typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll);
+
 struct UCollator;
 
 /**
@@ -62,6 +64,8 @@ struct coll {
 	/** String comparator. */
 	coll_cmp_f cmp;
 	coll_hash_f hash;
+	/** The pointer to routine calculating tuple hint. */
+	coll_hint_f hint;
 	/** Reference counter. */
 	int refs;
 	/**
-- 
2.21.0

  parent reply	other threads:[~2019-03-07  9:44 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-07  9:44 [PATCH v5 0/4] box: introduce hint option for memtx tree index Kirill Shcherbatov
2019-03-07  9:44 ` [PATCH v5 1/4] memtx: rework memtx_tree to store arbitrary nodes Kirill Shcherbatov
2019-03-11 10:34   ` Vladimir Davydov
2019-03-11 16:53     ` [tarantool-patches] " Kirill Shcherbatov
2019-03-12 10:45       ` Vladimir Davydov
2019-03-07  9:44 ` Kirill Shcherbatov [this message]
2019-03-07 10:42   ` [tarantool-patches] [PATCH v5 2/4] memtx: introduce tuple compare hint Konstantin Osipov
2019-03-07 10:59     ` Vladimir Davydov
2019-03-11 10:39   ` Vladimir Davydov
2019-03-11 17:03   ` Vladimir Davydov
2019-03-12 13:00   ` Vladimir Davydov
2019-03-07  9:44 ` [PATCH v5 3/4] box: move offset_slot init to tuple_format_add_field Kirill Shcherbatov
2019-03-07 15:53   ` [tarantool-patches] " Kirill Shcherbatov
2019-03-07  9:44 ` [PATCH v5 4/4] box: introduce multikey indexes Kirill Shcherbatov
2019-03-07 15:55   ` [tarantool-patches] " Kirill Shcherbatov
2019-03-12 13:24     ` Vladimir Davydov
2019-03-07 10:45 ` [tarantool-patches] [PATCH v5 0/4] box: introduce hint option for memtx tree index Konstantin Osipov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53e165cf566611452821c692d79d621628e839ff.1551951540.git.kshcherbatov@tarantool.org \
    --to=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=vdavydov.dev@gmail.com \
    --subject='Re: [PATCH v5 2/4] memtx: introduce tuple compare hint' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox