Tarantool development patches archive
 help / color / mirror / Atom feed
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com
Cc: Alexandr Lyapunov <aleks@tarantool.org>
Subject: [PATCH v1 3/4] box: introduce tuple compare hint
Date: Tue,  5 Feb 2019 14:58:38 +0300	[thread overview]
Message-ID: <535f26d85705f8157737bca7ed4978d4c9a016f4.1549367041.git.kshcherbatov@tarantool.org> (raw)
In-Reply-To: <cover.1549367041.git.kshcherbatov@tarantool.org>

From: Alexandr Lyapunov <aleks@tarantool.org>

Implement functions for retrieving tuple hints for a particular
key_def. Hint is an integer that can be used for tuple comparison
optimization: if a hint of one tuple is less than a hint of another
then the first tuple is definitely less than the second; only if
hints are equal tuple_compare must be called for getting comparison
result.

Hints are calculated using only the first part of key_def.

Hints are only useful when:
 * they are precalculated and stored along with the tuple;
calculation is not cheap (cheaper than tuple_compare btw) but
precalculated hints allow to compare tuples without even fetching
tuple data.
 * first part of key_def is 'string', 'unsigned' or 'integer'
 * since hint is calculated using only the first part of key_def
(and only first several characters if it is a string) this part
must be different with high probability for every tuple pair.

@kshcherbatov: rebase to 2.1, fix conflicts

Needed for #3961
---
 src/box/key_def.c        |   1 +
 src/box/key_def.h        |  11 ++++
 src/box/tuple_compare.cc | 131 +++++++++++++++++++++++++++++++++++++++
 src/box/tuple_compare.h  |  40 ++++++++++++
 src/coll.c               |  23 +++++++
 src/coll.h               |   3 +
 6 files changed, 209 insertions(+)

diff --git a/src/box/key_def.c b/src/box/key_def.c
index 9411ade39..ceca1ebee 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -135,6 +135,7 @@ key_def_set_cmp(struct key_def *def)
 	def->tuple_compare_with_key = tuple_compare_with_key_create(def);
 	tuple_hash_func_set(def);
 	tuple_extract_key_set(def);
+	tuple_hint_set(def);
 }
 
 static void
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 85bed92bb..ff86de3ca 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -153,6 +153,13 @@ typedef uint32_t (*tuple_hash_t)(const struct tuple *tuple,
 typedef uint32_t (*key_hash_t)(const char *key,
 				struct key_def *key_def);
 
+/** @copydoc tuple_hint() */
+typedef uint64_t (*tuple_hint_t)(const struct tuple *tuple,
+				 struct key_def *key_def);
+
+/** @copydoc key_hint() */
+typedef uint64_t (*key_hint_t)(const char *key, struct key_def *key_def);
+
 /* Definition of a multipart key. */
 struct key_def {
 	/** @see tuple_compare() */
@@ -167,6 +174,10 @@ struct key_def {
 	tuple_hash_t tuple_hash;
 	/** @see key_hash() */
 	key_hash_t key_hash;
+	/** @see tuple_hint() */
+	tuple_hint_t tuple_hint;
+	/** @see key_hint() */
+	key_hint_t key_hint;
 	/**
 	 * Minimal part count which always is unique. For example,
 	 * if a secondary index is unique, then
diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
index 1d1ab8711..b8f91972c 100644
--- a/src/box/tuple_compare.cc
+++ b/src/box/tuple_compare.cc
@@ -1321,4 +1321,135 @@ tuple_compare_with_key_create(const struct key_def *def)
 	       compare_with_key_slowpath_funcs[cmp_func_idx];
 }
 
+static uint64_t
+key_hint_default(const char *key, struct key_def *key_def)
+{
+	(void)key;
+	(void)key_def;
+	return 0;
+}
+
+static uint64_t
+tuple_hint_default(const struct tuple *tuple, struct key_def *key_def)
+{
+	(void)tuple;
+	(void)key_def;
+	return 0;
+}
+
+static uint64_t
+key_hint_uint(const char *key, struct key_def *key_def)
+{
+	(void)key_def;
+	assert(key_def->parts->type == FIELD_TYPE_UNSIGNED);
+	assert(mp_typeof(*key) == MP_UINT);
+	return mp_decode_uint(&key);
+}
+
+static uint64_t
+tuple_hint_uint(const struct tuple *tuple, struct key_def *key_def)
+{
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	return key_hint_uint(field, key_def);
+}
+
+static uint64_t
+key_hint_int(const char *key, struct key_def *key_def)
+{
+	(void)key_def;
+	assert(key_def->parts->type == FIELD_TYPE_INTEGER);
+	if (mp_typeof(*key) == MP_UINT) {
+		uint64_t val = mp_decode_uint(&key);
+		if (val > INT64_MAX)
+			return INT64_MAX;
+		return val - (uint64_t)INT64_MIN;
+	} else {
+		assert(mp_typeof(*key) == MP_INT);
+		int64_t val = mp_decode_int(&key);
+		return (uint64_t)val - (uint64_t)INT64_MIN;
+	}
+}
+
+static uint64_t
+tuple_hint_int(const struct tuple *tuple, struct key_def *key_def)
+{
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	return key_hint_int(field, key_def);
+}
+
+static uint64_t
+key_hint_string(const char *key, struct key_def *key_def)
+{
+	(void)key_def;
+	assert(key_def->parts->type == FIELD_TYPE_STRING);
+	assert(mp_typeof(*key) == MP_STR);
+	uint32_t len;
+	const unsigned char *str =
+		(const unsigned char *)mp_decode_str(&key, &len);
+	uint64_t result = 0;
+	uint32_t process_len = MIN(len, 7);
+	for (uint32_t i = 0; i < process_len; i++) {
+		result <<= 9;
+		result |= 0x100;
+		result |= str[i];
+	}
+	result <<= 9 * (7 - process_len);
+	return result;
+}
+
+static uint64_t
+tuple_hint_string(const struct tuple *tuple, struct key_def *key_def)
+{
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	return key_hint_string(field, key_def);
+}
+
+static uint64_t
+key_hint_string_coll(const char *key, struct key_def *key_def)
+{
+	assert(key_def->parts->type == FIELD_TYPE_STRING);
+	assert(mp_typeof(*key) == MP_STR);
+	assert(key_def->parts->coll != NULL);
+	uint32_t len;
+	const char *str = mp_decode_str(&key, &len);
+	return key_def->parts->coll->hint(str, len, key_def->parts->coll);
+}
+
+static uint64_t
+tuple_hint_string_coll(const struct tuple *tuple, struct key_def *key_def)
+{
+	const char *field = tuple_field_by_part(tuple, key_def->parts);
+	return key_hint_string_coll(field, key_def);
+}
+
+void
+tuple_hint_set(struct key_def *def)
+{
+	def->key_hint = key_hint_default;
+	def->tuple_hint = tuple_hint_default;
+	if (key_part_is_nullable(def->parts))
+		return;
+	if (def->parts->type == FIELD_TYPE_STRING && def->parts->coll != NULL) {
+		def->key_hint = key_hint_string_coll;
+		def->tuple_hint = tuple_hint_string_coll;
+		return;
+	}
+	switch (def->parts->type) {
+	case FIELD_TYPE_UNSIGNED:
+		def->key_hint = key_hint_uint;
+		def->tuple_hint = tuple_hint_uint;
+		break;
+	case FIELD_TYPE_INTEGER:
+		def->key_hint = key_hint_int;
+		def->tuple_hint = tuple_hint_int;
+		break;
+	case FIELD_TYPE_STRING:
+		def->key_hint = key_hint_string;
+		def->tuple_hint = tuple_hint_string;
+		break;
+	default:
+		break;
+	};
+}
+
 /* }}} tuple_compare_with_key */
diff --git a/src/box/tuple_compare.h b/src/box/tuple_compare.h
index e3a63204f..be96ba38c 100644
--- a/src/box/tuple_compare.h
+++ b/src/box/tuple_compare.h
@@ -67,6 +67,46 @@ tuple_compare_create(const struct key_def *key_def);
 tuple_compare_with_key_t
 tuple_compare_with_key_create(const struct key_def *key_def);
 
+/**
+ * Get a comparison hint of a tuple.
+ * Hint is such a function h(tuple) in terms of particular key_def that
+ * has follows rules:
+ * if h(t1) < h(t2) then t1 < t2;
+ * if h(t1) > h(t2) then t1 > t2;
+ * if t1 == t2 then h(t1) == h(t2);
+ * These rules means that instead of direct tuple vs tuple (or tuple vs key)
+ * comparison one may compare theirs hints first; and only if theirs hints
+ * are equal compare the tuples themselves.
+ * @param tuple - tuple to get hint of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the hint.
+ */
+static inline uint64_t
+tuple_hint(const struct tuple *tuple, struct key_def *key_def)
+{
+	return key_def->tuple_hint(tuple, key_def);
+}
+
+/**
+ * Get a comparison hint of a key.
+ * @See tuple_hint for hint term definition.
+ * @param key - key to get hint of.
+ * @param key_def - key_def that defines which comparison is used.
+ * @return the hint.
+ */
+static inline uint64_t
+key_hint(const char *key, struct key_def *key_def)
+{
+	return key_def->key_hint(key, key_def);
+}
+
+/**
+ * Initialize tuple_hint() and key_hint() functions for the key_def.
+ * @param key_def key definition to set up.
+ */
+void
+tuple_hint_set(struct key_def *key_def);
+
 #if defined(__cplusplus)
 } /* extern "C" */
 #endif /* defined(__cplusplus) */
diff --git a/src/coll.c b/src/coll.c
index 6d9c44dbf..cff2899cd 100644
--- a/src/coll.c
+++ b/src/coll.c
@@ -133,6 +133,28 @@ coll_bin_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry,
 	return s_len;
 }
 
+/** Get a compare hint of a string using ICU collation. */
+static uint64_t
+coll_icu_hint(const char *s, size_t s_len, struct coll *coll)
+{
+	UCharIterator itr;
+	uiter_setUTF8(&itr, s, s_len);
+	uint8_t buf[7];
+	uint32_t state[2] = {0, 0};
+	UErrorCode status = U_ZERO_ERROR;
+	int32_t got = ucol_nextSortKeyPart(coll->collator, &itr, state, buf,
+					   sizeof(buf), &status);
+	assert(got >= 0 && got <= 7);
+	uint64_t result = 0;
+	for (int32_t i = 0; i < got; i++) {
+		result <<= 9;
+		result |= 0x100;
+		result |= buf[i];
+	}
+	result <<= 9 * (7 - got);
+	return result;
+}
+
 /**
  * Set up ICU collator and init cmp and hash members of collation.
  * @param coll Collation to set up.
@@ -242,6 +264,7 @@ coll_icu_init_cmp(struct coll *coll, const struct coll_def *def)
 	}
 	coll->cmp = coll_icu_cmp;
 	coll->hash = coll_icu_hash;
+	coll->hint = coll_icu_hint;
 	return 0;
 }
 
diff --git a/src/coll.h b/src/coll.h
index 9c725712a..4030d38de 100644
--- a/src/coll.h
+++ b/src/coll.h
@@ -47,6 +47,8 @@ typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t,
 typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph,
 				uint32_t *pcarry, struct coll *coll);
 
+typedef uint64_t (*coll_hint_f)(const char *s, size_t s_len, struct coll *coll);
+
 struct UCollator;
 
 /**
@@ -61,6 +63,7 @@ struct coll {
 	/** String comparator. */
 	coll_cmp_f cmp;
 	coll_hash_f hash;
+	coll_hint_f hint;
 	/** Reference counter. */
 	int refs;
 	/**
-- 
2.20.1

  parent reply	other threads:[~2019-02-05 11:58 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-05 11:58 [PATCH v1 0/4] box: introduce hint option for memtx tree index Kirill Shcherbatov
2019-02-05 11:58 ` [PATCH v1 1/4] lib: introduce BPS_TREE_EQUAL custom comparator Kirill Shcherbatov
2019-02-11 15:46   ` Vladimir Davydov
2019-02-05 11:58 ` [PATCH v1 2/4] box: rework memtx_tree class to be reusable Kirill Shcherbatov
2019-02-11 15:49   ` Vladimir Davydov
2019-02-05 11:58 ` Kirill Shcherbatov [this message]
2019-02-11 15:57   ` [PATCH v1 3/4] box: introduce tuple compare hint Vladimir Davydov
2019-02-05 11:58 ` [PATCH v1 4/4] box: introduce hint option for memtx tree index Kirill Shcherbatov
2019-02-11 16:12   ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=535f26d85705f8157737bca7ed4978d4c9a016f4.1549367041.git.kshcherbatov@tarantool.org \
    --to=kshcherbatov@tarantool.org \
    --cc=aleks@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=vdavydov.dev@gmail.com \
    --subject='Re: [PATCH v1 3/4] box: introduce tuple compare hint' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox