[PATCH 05/13] vinyl: store tuple comparison hints in range tree

Vladimir Davydov vdavydov.dev at gmail.com
Tue Apr 2 20:33:42 MSK 2019


This patch incorporates tuple comparison hints into the vy_range_tree.
Now, vy_range->begin and ->end are augmented with hints which are used
for searching a range in an LSM tree.

Although this isn't strictly mandatory for multikey support, since we
only use key statements for range boundaries, which can't store multikey
fields, we still need to be able to look up a hinted tuple in a tree,
so augmenting range boundaries with hints makes the code look more
consistent. Besides, it should speed up LSM tree lookups a little.
---
 src/box/vy_lsm.c            | 44 +++++++++++++++++++++++++++++++-------------
 src/box/vy_point_lookup.c   |  7 ++++---
 src/box/vy_range.c          | 32 ++++++++++++++++++++------------
 src/box/vy_range.h          | 31 +++++++++++++++++++++----------
 src/box/vy_read_iterator.c  |  6 +++++-
 test/unit/vy_point_lookup.c |  3 ++-
 6 files changed, 83 insertions(+), 40 deletions(-)

diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c
index 1e08c0e1..44973a3a 100644
--- a/src/box/vy_lsm.c
+++ b/src/box/vy_lsm.c
@@ -325,8 +325,8 @@ vy_lsm_create(struct vy_lsm *lsm)
 	int64_t id = vy_log_next_id();
 
 	/* Create the initial range. */
-	struct vy_range *range = vy_range_new(vy_log_next_id(), NULL, NULL,
-					      lsm->cmp_def);
+	struct vy_range *range = vy_range_new(vy_log_next_id(), NULL, HINT_NONE,
+					      NULL, HINT_NONE, lsm->cmp_def);
 	if (range == NULL)
 		return -1;
 	assert(lsm->range_count == 0);
@@ -443,6 +443,7 @@ vy_lsm_recover_range(struct vy_lsm *lsm,
 		     struct vy_run_env *run_env, bool force_recovery)
 {
 	struct tuple *begin = NULL, *end = NULL;
+	hint_t begin_hint = HINT_NONE, end_hint = HINT_NONE;
 	struct vy_range *range = NULL;
 
 	if (range_info->begin != NULL) {
@@ -450,22 +451,26 @@ vy_lsm_recover_range(struct vy_lsm *lsm,
 					    range_info->begin);
 		if (begin == NULL)
 			goto out;
+		begin_hint = vy_stmt_hint(begin, lsm->cmp_def);
 	}
 	if (range_info->end != NULL) {
 		end = vy_key_from_msgpack(lsm->env->key_format,
 					  range_info->end);
 		if (end == NULL)
 			goto out;
+		end_hint = vy_stmt_hint(end, lsm->cmp_def);
 	}
 	if (begin != NULL && end != NULL &&
-	    vy_stmt_compare(begin, end, lsm->cmp_def) >= 0) {
+	    vy_stmt_compare_hinted(begin, begin_hint, end, end_hint,
+				   lsm->cmp_def) >= 0) {
 		diag_set(ClientError, ER_INVALID_VYLOG_FILE,
 			 tt_sprintf("begin >= end for range %lld",
 				    (long long)range_info->id));
 		goto out;
 	}
 
-	range = vy_range_new(range_info->id, begin, end, lsm->cmp_def);
+	range = vy_range_new(range_info->id, begin, begin_hint,
+			     end, end_hint, lsm->cmp_def);
 	if (range == NULL)
 		goto out;
 
@@ -584,8 +589,9 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery,
 		 * We need range tree initialized for all LSM trees,
 		 * even for dropped ones.
 		 */
-		struct vy_range *range = vy_range_new(vy_log_next_id(),
-						      NULL, NULL, lsm->cmp_def);
+		struct vy_range *range;
+		range = vy_range_new(vy_log_next_id(), NULL, HINT_NONE,
+				     NULL, HINT_NONE, lsm->cmp_def);
 		if (range == NULL)
 			return -1;
 		vy_lsm_add_range(lsm, range);
@@ -639,8 +645,9 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery,
 		int cmp = 0;
 		if (prev != NULL &&
 		    (prev->end == NULL || range->begin == NULL ||
-		     (cmp = vy_stmt_compare(prev->end, range->begin,
-					    lsm->cmp_def)) != 0)) {
+		     (cmp = vy_stmt_compare_hinted(prev->end, prev->end_hint,
+						   range->begin, range->begin_hint,
+						   lsm->cmp_def)) != 0)) {
 			const char *errmsg = cmp > 0 ?
 				"Nearby ranges %lld and %lld overlap" :
 				"Keys between ranges %lld and %lld not spanned";
@@ -1071,18 +1078,23 @@ vy_lsm_find_range_intersection(struct vy_lsm *lsm,
 		struct vy_range **begin, struct vy_range **end)
 {
 	struct tuple_format *key_format = lsm->env->key_format;
+	struct vy_range_tree_key tree_key;
 	struct tuple *stmt;
 
 	stmt = vy_key_from_msgpack(key_format, min_key);
 	if (stmt == NULL)
 		return -1;
-	*begin = vy_range_tree_psearch(&lsm->range_tree, stmt);
+	tree_key.stmt = stmt;
+	tree_key.hint = vy_stmt_hint(stmt, lsm->cmp_def);
+	*begin = vy_range_tree_psearch(&lsm->range_tree, &tree_key);
 	tuple_unref(stmt);
 
 	stmt = vy_key_from_msgpack(key_format, max_key);
 	if (stmt == NULL)
 		return -1;
-	*end = vy_range_tree_psearch(&lsm->range_tree, stmt);
+	tree_key.stmt = stmt;
+	tree_key.hint = vy_stmt_hint(stmt, lsm->cmp_def);
+	*end = vy_range_tree_psearch(&lsm->range_tree, &tree_key);
 	*end = vy_range_tree_next(&lsm->range_tree, *end);
 	tuple_unref(stmt);
 
@@ -1115,6 +1127,11 @@ vy_lsm_split_range(struct vy_lsm *lsm, struct vy_range *range)
 	keys[1] = split_key;
 	keys[2] = range->end;
 
+	hint_t hints[3];
+	hints[0] = range->begin_hint;
+	hints[1] = vy_stmt_hint(split_key, lsm->cmp_def);
+	hints[2] = range->end_hint;
+
 	/*
 	 * Allocate new ranges and create slices of
 	 * the old range's runs for them.
@@ -1122,8 +1139,8 @@ vy_lsm_split_range(struct vy_lsm *lsm, struct vy_range *range)
 	struct vy_slice *slice, *new_slice;
 	struct vy_range *part, *parts[2] = {NULL, };
 	for (int i = 0; i < n_parts; i++) {
-		part = vy_range_new(vy_log_next_id(), keys[i], keys[i + 1],
-				    lsm->cmp_def);
+		part = vy_range_new(vy_log_next_id(), keys[i], hints[i],
+				    keys[i + 1], hints[i + 1], lsm->cmp_def);
 		if (part == NULL)
 			goto fail;
 		parts[i] = part;
@@ -1209,7 +1226,8 @@ vy_lsm_coalesce_range(struct vy_lsm *lsm, struct vy_range *range)
 		return false;
 
 	struct vy_range *result = vy_range_new(vy_log_next_id(),
-			first->begin, last->end, lsm->cmp_def);
+				first->begin, first->begin_hint,
+				last->end, last->end_hint, lsm->cmp_def);
 	if (result == NULL)
 		goto fail_range;
 
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index 5f1be274..aa0625fb 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -161,10 +161,11 @@ vy_point_lookup_scan_slice(struct vy_lsm *lsm, struct vy_slice *slice,
  */
 static int
 vy_point_lookup_scan_slices(struct vy_lsm *lsm, const struct vy_read_view **rv,
-			    struct tuple *key, struct vy_history *history)
+			    struct tuple *key, hint_t hint,
+			    struct vy_history *history)
 {
 	struct vy_range *range = vy_range_tree_find_by_key(&lsm->range_tree,
-							   ITER_EQ, key);
+							   ITER_EQ, key, hint);
 	assert(range != NULL);
 	int slice_count = range->slice_count;
 	struct vy_slice **slices = (struct vy_slice **)
@@ -231,7 +232,7 @@ restart:
 	uint32_t mem_version = lsm->mem->version;
 	uint32_t mem_list_version = lsm->mem_list_version;
 
-	rc = vy_point_lookup_scan_slices(lsm, rv, key, &disk_history);
+	rc = vy_point_lookup_scan_slices(lsm, rv, key, hint, &disk_history);
 	if (rc != 0)
 		goto done;
 
diff --git a/src/box/vy_range.c b/src/box/vy_range.c
index f76615c4..edeb8a5f 100644
--- a/src/box/vy_range.c
+++ b/src/box/vy_range.c
@@ -63,23 +63,25 @@ vy_range_tree_cmp(struct vy_range *range_a, struct vy_range *range_b)
 		return 1;
 
 	assert(range_a->cmp_def == range_b->cmp_def);
-	return vy_stmt_compare(range_a->begin, range_b->begin,
-			       range_a->cmp_def);
+	return vy_stmt_compare_hinted(range_a->begin, range_a->begin_hint,
+				      range_b->begin, range_b->begin_hint,
+				      range_a->cmp_def);
 }
 
 int
-vy_range_tree_key_cmp(const struct tuple *stmt, struct vy_range *range)
+vy_range_tree_key_cmp(struct vy_range_tree_key *key, struct vy_range *range)
 {
 	/* Any key > -inf. */
 	if (range->begin == NULL)
 		return 1;
-	return vy_stmt_compare(stmt, range->begin, range->cmp_def);
+	return vy_stmt_compare_hinted(key->stmt, key->hint, range->begin,
+				      range->begin_hint, range->cmp_def);
 }
 
 struct vy_range *
 vy_range_tree_find_by_key(vy_range_tree_t *tree,
 			  enum iterator_type iterator_type,
-			  const struct tuple *key)
+			  const struct tuple *key, hint_t hint)
 {
 	if (vy_stmt_is_empty_key(key)) {
 		switch (iterator_type) {
@@ -97,6 +99,7 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree,
 		}
 	}
 	struct vy_range *range;
+	struct vy_range_tree_key tree_key = { key, hint };
 	if (iterator_type == ITER_GE || iterator_type == ITER_GT ||
 	    iterator_type == ITER_EQ) {
 		/**
@@ -121,11 +124,13 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree,
 		 * vy_range_tree_psearch finds least range with begin == key
 		 * or previous if equal was not found
 		 */
-		range = vy_range_tree_psearch(tree, key);
+		range = vy_range_tree_psearch(tree, &tree_key);
 		/* switch to previous for case (4) */
 		if (range != NULL && range->begin != NULL &&
 		    !vy_stmt_is_full_key(key, range->cmp_def) &&
-		    vy_stmt_compare(key, range->begin, range->cmp_def) == 0)
+		    vy_stmt_compare_hinted(key, hint, range->begin,
+					   range->begin_hint,
+					   range->cmp_def) == 0)
 			range = vy_range_tree_prev(tree, range);
 		/* for case 5 or subcase of case 4 */
 		if (range == NULL)
@@ -155,12 +160,13 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree,
 		 * vy_range_tree_nsearch finds most range with begin == key
 		 * or next if equal was not found
 		 */
-		range = vy_range_tree_nsearch(tree, key);
+		range = vy_range_tree_nsearch(tree, &tree_key);
 		if (range != NULL) {
 			/* fix curr_range for cases 2 and 3 */
 			if (range->begin != NULL &&
-			    vy_stmt_compare(key, range->begin,
-					    range->cmp_def) != 0) {
+			    vy_stmt_compare_hinted(key, hint, range->begin,
+						   range->begin_hint,
+						   range->cmp_def) != 0) {
 				struct vy_range *prev;
 				prev = vy_range_tree_prev(tree, range);
 				if (prev != NULL)
@@ -175,8 +181,8 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree,
 }
 
 struct vy_range *
-vy_range_new(int64_t id, struct tuple *begin, struct tuple *end,
-	     struct key_def *cmp_def)
+vy_range_new(int64_t id, struct tuple *begin, hint_t begin_hint,
+	     struct tuple *end, hint_t end_hint, struct key_def *cmp_def)
 {
 	struct vy_range *range = calloc(1, sizeof(*range));
 	if (range == NULL) {
@@ -193,6 +199,8 @@ vy_range_new(int64_t id, struct tuple *begin, struct tuple *end,
 		tuple_ref(end);
 		range->end = end;
 	}
+	range->begin_hint = begin_hint;
+	range->end_hint = end_hint;
 	range->cmp_def = cmp_def;
 	rlist_create(&range->slices);
 	heap_node_create(&range->heap_node);
diff --git a/src/box/vy_range.h b/src/box/vy_range.h
index 91f2682c..7b5988bf 100644
--- a/src/box/vy_range.h
+++ b/src/box/vy_range.h
@@ -39,6 +39,7 @@
 #include <small/rlist.h>
 
 #include "iterator_type.h"
+#include "key_def.h"
 #define HEAP_FORWARD_DECLARATION
 #include "salad/heap.h"
 #include "trivia/util.h"
@@ -49,7 +50,6 @@ extern "C" {
 #endif /* defined(__cplusplus) */
 
 struct index_opts;
-struct key_def;
 struct tuple;
 struct vy_slice;
 
@@ -65,8 +65,12 @@ struct vy_range {
 	 * the full idexed key.
 	 */
 	struct tuple *begin;
+	/** Comparison hint of the range lower bound. */
+	hint_t begin_hint;
 	/** Range upper bound. NULL if range is rightmost. */
 	struct tuple *end;
+	/** Comparison hint of the range upper bound. */
+	hint_t end_hint;
 	/** Key definition for comparing range boundaries.
 	 * Contains secondary and primary key parts for secondary
 	 * keys, to ensure an always distinct result for
@@ -164,15 +168,19 @@ vy_range_is_scheduled(struct vy_range *range)
  * vy_range->begin. Ranges in a tree are supposed to span
  * all possible keys without overlaps.
  */
+struct vy_range_tree_key {
+	const struct tuple *stmt;
+	hint_t hint;
+};
 int
 vy_range_tree_cmp(struct vy_range *range_a, struct vy_range *range_b);
 int
-vy_range_tree_key_cmp(const struct tuple *stmt, struct vy_range *range);
+vy_range_tree_key_cmp(struct vy_range_tree_key *key, struct vy_range *range);
 
 typedef rb_tree(struct vy_range) vy_range_tree_t;
 rb_gen_ext_key(MAYBE_UNUSED static inline, vy_range_tree_, vy_range_tree_t,
 	       struct vy_range, tree_node, vy_range_tree_cmp,
-	       const struct tuple *, vy_range_tree_key_cmp);
+	       struct vy_range_tree_key *, vy_range_tree_key_cmp);
 
 /**
  * Find the first range in which a given key should be looked up.
@@ -180,29 +188,32 @@ rb_gen_ext_key(MAYBE_UNUSED static inline, vy_range_tree_, vy_range_tree_t,
  * @param tree          Range tree to search.
  * @param iterator_type Iterator type.
  * @param key           Key to look up.
+ * @param hint          Comparison hint of @key.
  *
  * @retval              The first range to look up the key in.
  */
 struct vy_range *
 vy_range_tree_find_by_key(vy_range_tree_t *tree,
 			  enum iterator_type iterator_type,
-			  const struct tuple *key);
+			  const struct tuple *key, hint_t hint);
 
 /**
  * Allocate and initialize a range (either a new one or for
  * restore from disk).
  *
- * @param id        Range id.
- * @param begin     Range begin (inclusive) or NULL for -inf.
- * @param end       Range end (exclusive) or NULL for +inf.
- * @param cmp_def   Key definition for comparing range boundaries.
+ * @param id          Range id.
+ * @param begin       Range begin (inclusive) or NULL for -inf.
+ * @param begin_hint  Comparison hint of @begin.
+ * @param end         Range end (exclusive) or NULL for +inf.
+ * @param end_hint    Comparison hint of @end.
+ * @param cmp_def     Key definition for comparing range boundaries.
  *
  * @retval not NULL The new range.
  * @retval NULL     Out of memory.
  */
 struct vy_range *
-vy_range_new(int64_t id, struct tuple *begin, struct tuple *end,
-	     struct key_def *cmp_def);
+vy_range_new(int64_t id, struct tuple *begin, hint_t begin_hint,
+	     struct tuple *end, hint_t end_hint, struct key_def *cmp_def);
 
 /**
  * Free a range and all its slices.
diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c
index b5012748..f1d8c8e4 100644
--- a/src/box/vy_read_iterator.c
+++ b/src/box/vy_read_iterator.c
@@ -730,10 +730,14 @@ vy_read_iterator_restore(struct vy_read_iterator *itr)
 {
 	vy_read_iterator_cleanup(itr);
 
+	const struct tuple *key = itr->last_stmt != NULL ?
+				  itr->last_stmt : itr->key;
+	hint_t hint = vy_stmt_hint(key, itr->lsm->cmp_def);
+
 	itr->mem_list_version = itr->lsm->mem_list_version;
 	itr->range_tree_version = itr->lsm->range_tree_version;
 	itr->curr_range = vy_range_tree_find_by_key(&itr->lsm->range_tree,
-			itr->iterator_type, itr->last_stmt ?: itr->key);
+				itr->iterator_type, key, hint);
 	itr->range_version = itr->curr_range->version;
 
 	if (itr->tx != NULL) {
diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c
index f3cd84d4..8fb1a8d1 100644
--- a/test/unit/vy_point_lookup.c
+++ b/test/unit/vy_point_lookup.c
@@ -98,7 +98,8 @@ test_basic()
 				       index_def, format, NULL, 0);
 	isnt(pk, NULL, "lsm is not NULL")
 
-	struct vy_range *range = vy_range_new(1, NULL, NULL, pk->cmp_def);
+	struct vy_range *range = vy_range_new(1, NULL, HINT_NONE,
+					      NULL, HINT_NONE, pk->cmp_def);
 
 	isnt(pk, NULL, "range is not NULL")
 	vy_lsm_add_range(pk, range);
-- 
2.11.0




More information about the Tarantool-patches mailing list