[PATCH 08/13] vinyl: propagate tuple comparison hints to write iterator

Vladimir Davydov vdavydov.dev at gmail.com
Tue Apr 2 20:33:45 MSK 2019


This patch propagates hints from source streams (mem, run) to the write
iterator, which now uses the hints to efficiently compare tuples and
returns a hint along with a resulting statement.

Apart from speeding up lookups, this is also needed for multikey index
support, because multikey indexes will reuse hints to store offsets of
indexed array entries.

Note, presently the write iterator doesn't really need to return hints,
because a hint isn't needed to write a statement to a run. The only
reason to do this now is for the write iterator to conform to the
vy_stmt_stream interface. However, once multikey indexes are introduced,
we will need hints, because the write iterator will be allowed to return
a multikey tuple coming directly from the memory level, from which it's
impossible to extract a key without a hint.
---
 src/box/vinyl.c               |  3 +-
 src/box/vy_mem.c              |  5 ++-
 src/box/vy_run.c              |  6 +++-
 src/box/vy_scheduler.c        |  3 +-
 src/box/vy_stmt_stream.h      |  4 ++-
 src/box/vy_write_iterator.c   | 83 +++++++++++++++++++++++++++++++------------
 test/unit/vy_point_lookup.c   |  3 +-
 test/unit/vy_write_iterator.c |  3 +-
 8 files changed, 81 insertions(+), 29 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 80b6aecd..d8e22eb8 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -3001,11 +3001,12 @@ vy_send_range_f(struct cbus_call_msg *cmsg)
 {
 	struct vy_join_ctx *ctx = container_of(cmsg, struct vy_join_ctx, cmsg);
 
+	hint_t hint;
 	struct tuple *stmt;
 	int rc = ctx->wi->iface->start(ctx->wi);
 	if (rc != 0)
 		goto err;
-	while ((rc = ctx->wi->iface->next(ctx->wi, &stmt)) == 0 &&
+	while ((rc = ctx->wi->iface->next(ctx->wi, &stmt, &hint)) == 0 &&
 	       stmt != NULL) {
 		struct xrow_header xrow;
 		rc = vy_stmt_encode_primary(stmt, ctx->key_def,
diff --git a/src/box/vy_mem.c b/src/box/vy_mem.c
index aea0ecfb..87ad0661 100644
--- a/src/box/vy_mem.c
+++ b/src/box/vy_mem.c
@@ -641,13 +641,15 @@ vy_mem_iterator_close(struct vy_mem_iterator *itr)
 }
 
 static NODISCARD int
-vy_mem_stream_next(struct vy_stmt_stream *virt_stream, struct tuple **ret)
+vy_mem_stream_next(struct vy_stmt_stream *virt_stream,
+		   struct tuple **ret, hint_t *ret_hint)
 {
 	assert(virt_stream->iface->next == vy_mem_stream_next);
 	struct vy_mem_stream *stream = (struct vy_mem_stream *)virt_stream;
 
 	if (vy_mem_tree_iterator_is_invalid(&stream->curr_pos)) {
 		*ret = NULL;
+		*ret_hint = HINT_NONE;
 		return 0;
 	}
 
@@ -655,6 +657,7 @@ vy_mem_stream_next(struct vy_stmt_stream *virt_stream, struct tuple **ret)
 	elem = *vy_mem_tree_iterator_get_elem(&stream->mem->tree,
 					      &stream->curr_pos);
 	*ret = (struct tuple *)elem.stmt;
+	*ret_hint = elem.hint;
 	vy_mem_tree_iterator_next(&stream->mem->tree, &stream->curr_pos);
 	return 0;
 }
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index 897ece2a..2aac2b42 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -2676,11 +2676,14 @@ vy_slice_stream_search(struct vy_stmt_stream *virt_stream)
  * @return 0 on success, -1 on memory or read error.
  */
 static NODISCARD int
-vy_slice_stream_next(struct vy_stmt_stream *virt_stream, struct tuple **ret)
+vy_slice_stream_next(struct vy_stmt_stream *virt_stream,
+		     struct tuple **ret, hint_t *ret_hint)
 {
 	assert(virt_stream->iface->next == vy_slice_stream_next);
 	struct vy_slice_stream *stream = (struct vy_slice_stream *)virt_stream;
+
 	*ret = NULL;
+	*ret_hint = HINT_NONE;
 
 	/* If the slice is ended, return EOF */
 	if (stream->page_no > stream->slice->last_page_no)
@@ -2711,6 +2714,7 @@ vy_slice_stream_next(struct vy_stmt_stream *virt_stream, struct tuple **ret)
 		tuple_unref(stream->tuple);
 	stream->tuple = tuple;
 	*ret = tuple;
+	*ret_hint = hint;
 
 	/* Increment position */
 	stream->pos_in_page++;
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 723bc9ee..78c4a826 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -1054,8 +1054,9 @@ vy_task_write_run(struct vy_task *task)
 		goto fail_abort_writer;
 	int rc;
 	int loops = 0;
+	hint_t hint;
 	struct tuple *stmt = NULL;
-	while ((rc = wi->iface->next(wi, &stmt)) == 0 && stmt != NULL) {
+	while ((rc = wi->iface->next(wi, &stmt, &hint)) == 0 && stmt != NULL) {
 		inj = errinj(ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT, ERRINJ_DOUBLE);
 		if (inj != NULL && inj->dparam > 0)
 			usleep(inj->dparam * 1000000);
diff --git a/src/box/vy_stmt_stream.h b/src/box/vy_stmt_stream.h
index 098cc8eb..dd93211c 100644
--- a/src/box/vy_stmt_stream.h
+++ b/src/box/vy_stmt_stream.h
@@ -31,6 +31,7 @@
  * SUCH DAMAGE.
  */
 
+#include "key_def.h"
 #include <trivia/util.h>
 
 #if defined(__cplusplus)
@@ -55,7 +56,8 @@ typedef NODISCARD int
  * Get next tuple from a stream.
  */
 typedef NODISCARD int
-(*vy_stream_next_f)(struct vy_stmt_stream *virt_stream, struct tuple **ret);
+(*vy_stream_next_f)(struct vy_stmt_stream *virt_stream,
+		    struct tuple **ret, hint_t *ret_hint);
 
 /**
  * Close the stream.
diff --git a/src/box/vy_write_iterator.c b/src/box/vy_write_iterator.c
index 6818a31c..a2a782fb 100644
--- a/src/box/vy_write_iterator.c
+++ b/src/box/vy_write_iterator.c
@@ -47,6 +47,8 @@ struct vy_write_src {
 	struct heap_node heap_node;
 	/* Current tuple in the source (with minimal key and maximal LSN) */
 	struct tuple *tuple;
+	/** Comparison hint of the current tuple. */
+	hint_t hint;
 	/**
 	 * If this flag is set, this is a so called "virtual"
 	 * source. A virtual source does not stand for any mem or
@@ -84,6 +86,8 @@ struct vy_write_history {
 	struct vy_write_history *next;
 	/** Key. */
 	struct tuple *tuple;
+	/** Tuple comparison hint. */
+	hint_t hint;
 };
 
 /**
@@ -93,19 +97,22 @@ struct vy_write_history {
  * orders statements on the same key chronologically.
  *
  * @param tuple Key version.
+ * @param hint Tuple comparison hint.
  * @param next Next version of the key.
  *
  * @retval not NULL Created object.
  * @retval NULL     Memory error.
  */
 static inline struct vy_write_history *
-vy_write_history_new(struct tuple *tuple, struct vy_write_history *next)
+vy_write_history_new(struct tuple *tuple, hint_t hint,
+		     struct vy_write_history *next)
 {
 	struct vy_write_history *h;
 	h = region_alloc_object(&fiber()->gc, struct vy_write_history);
 	if (h == NULL)
 		return NULL;
 	h->tuple = tuple;
+	h->hint = hint;
 	assert(next == NULL || (next->tuple != NULL &&
 	       vy_stmt_lsn(next->tuple) > vy_stmt_lsn(tuple)));
 	h->next = next;
@@ -134,6 +141,8 @@ struct vy_read_view_stmt {
 	int64_t vlsn;
 	/** Result key version, visible to this @vlsn. */
 	struct tuple *tuple;
+	/** Tuple comparison hint. */
+	hint_t hint;
 	/**
 	 * A history of changes building up to this read
 	 * view. Once built, it is merged into a single
@@ -153,6 +162,7 @@ vy_read_view_stmt_destroy(struct vy_read_view_stmt *rv)
 	if (rv->tuple != NULL)
 		vy_stmt_unref_if_possible(rv->tuple);
 	rv->tuple = NULL;
+	rv->hint = HINT_NONE;
 	if (rv->history != NULL)
 		vy_write_history_destroy(rv->history);
 	rv->history = NULL;
@@ -184,6 +194,8 @@ struct vy_write_iterator {
 	 * of the old tuple from secondary indexes.
 	 */
 	struct tuple *deferred_delete_stmt;
+	/** Comparison hint of the deferred DELETE statement. */
+	hint_t deferred_delete_hint;
 	/** Length of the @read_views. */
 	int rv_count;
 	/**
@@ -223,7 +235,9 @@ heap_less(heap_t *heap, struct vy_write_src *src1, struct vy_write_src *src2)
 	struct vy_write_iterator *stream =
 		container_of(heap, struct vy_write_iterator, src_heap);
 
-	int cmp = vy_stmt_compare(src1->tuple, src2->tuple, stream->cmp_def);
+	int cmp = vy_stmt_compare_hinted(src1->tuple, src1->hint,
+					 src2->tuple, src2->hint,
+					 stream->cmp_def);
 	if (cmp != 0)
 		return cmp < 0;
 
@@ -298,7 +312,7 @@ vy_write_iterator_add_src(struct vy_write_iterator *stream,
 		if (rc != 0)
 			return rc;
 	}
-	int rc = src->stream.iface->next(&src->stream, &src->tuple);
+	int rc = src->stream.iface->next(&src->stream, &src->tuple, &src->hint);
 	if (rc != 0 || src->tuple == NULL)
 		goto stop;
 
@@ -363,12 +377,17 @@ vy_write_iterator_new(struct key_def *cmp_def, bool is_primary,
 	}
 	stream->stmt_i = -1;
 	stream->rv_count = count;
+	stream->read_views[0].hint = HINT_NONE;
 	stream->read_views[0].vlsn = INT64_MAX;
 	count--;
 	struct vy_read_view *rv;
 	/* Descending order. */
-	rlist_foreach_entry(rv, read_views, in_read_views)
-		stream->read_views[count--].vlsn = rv->vlsn;
+	rlist_foreach_entry(rv, read_views, in_read_views) {
+		struct vy_read_view_stmt *rv_stmt;
+		rv_stmt = &stream->read_views[count--];
+		rv_stmt->hint = HINT_NONE;
+		rv_stmt->vlsn = rv->vlsn;
+	}
 	assert(count == 0);
 
 	stream->base.iface = &vy_slice_stream_iface;
@@ -378,6 +397,7 @@ vy_write_iterator_new(struct key_def *cmp_def, bool is_primary,
 	stream->is_primary = is_primary;
 	stream->is_last_level = is_last_level;
 	stream->deferred_delete_handler = handler;
+	stream->deferred_delete_hint = HINT_NONE;
 	return &stream->base;
 }
 
@@ -423,6 +443,7 @@ vy_write_iterator_stop(struct vy_stmt_stream *vstream)
 	if (stream->deferred_delete_stmt != NULL) {
 		vy_stmt_unref_if_possible(stream->deferred_delete_stmt);
 		stream->deferred_delete_stmt = NULL;
+		stream->deferred_delete_hint = HINT_NONE;
 	}
 	struct vy_deferred_delete_handler *handler =
 			stream->deferred_delete_handler;
@@ -489,7 +510,7 @@ vy_write_iterator_merge_step(struct vy_write_iterator *stream)
 {
 	struct vy_write_src *src = vy_source_heap_top(&stream->src_heap);
 	assert(src != NULL);
-	int rc = src->stream.iface->next(&src->stream, &src->tuple);
+	int rc = src->stream.iface->next(&src->stream, &src->tuple, &src->hint);
 	if (rc != 0)
 		return rc;
 	if (src->tuple != NULL)
@@ -519,11 +540,10 @@ vy_write_iterator_get_vlsn(struct vy_write_iterator *stream, int rv_i)
 }
 
 /**
- * Remember the current tuple of the @src as a part of the
- * current read view.
- * @param History objects allocator.
+ * Remember a tuple as a part of the current read view.
  * @param stream Write iterator.
- * @param src Source of the wanted tuple.
+ * @param tuple Tuple to push.
+ * @param hint Tuple comparison hint.
  * @param current_rv_i Index of the current read view.
  *
  * @retval  0 Success.
@@ -531,13 +551,13 @@ vy_write_iterator_get_vlsn(struct vy_write_iterator *stream, int rv_i)
  */
 static inline int
 vy_write_iterator_push_rv(struct vy_write_iterator *stream,
-			  struct tuple *tuple, int current_rv_i)
+			  struct tuple *tuple, hint_t hint, int current_rv_i)
 {
 	assert(current_rv_i < stream->rv_count);
 	struct vy_read_view_stmt *rv = &stream->read_views[current_rv_i];
 	assert(rv->vlsn >= vy_stmt_lsn(tuple));
 	struct vy_write_history *h =
-		vy_write_history_new(tuple, rv->history);
+		vy_write_history_new(tuple, hint, rv->history);
 	if (h == NULL)
 		return -1;
 	rv->history = h;
@@ -557,11 +577,14 @@ vy_write_iterator_push_rv(struct vy_write_iterator *stream,
  * @retval     NULL End of the key (not the end of the sources).
  */
 static inline struct tuple *
-vy_write_iterator_pop_read_view_stmt(struct vy_write_iterator *stream)
+vy_write_iterator_pop_read_view_stmt(struct vy_write_iterator *stream,
+				     hint_t *hint)
 {
 	struct vy_read_view_stmt *rv;
-	if (stream->rv_used_count == 0)
+	if (stream->rv_used_count == 0) {
+		*hint = HINT_NONE;
 		return NULL;
+	}
 	/* Find a next non-empty history element. */
 	do {
 		assert(stream->stmt_i + 1 < stream->rv_count);
@@ -574,7 +597,9 @@ vy_write_iterator_pop_read_view_stmt(struct vy_write_iterator *stream)
 	if (stream->last_stmt != NULL)
 		vy_stmt_unref_if_possible(stream->last_stmt);
 	stream->last_stmt = rv->tuple;
+	*hint = rv->hint;
 	rv->tuple = NULL;
+	rv->hint = HINT_NONE;
 	return stream->last_stmt;
 }
 
@@ -590,7 +615,7 @@ vy_write_iterator_pop_read_view_stmt(struct vy_write_iterator *stream)
  */
 static int
 vy_write_iterator_deferred_delete(struct vy_write_iterator *stream,
-				  struct tuple *stmt)
+				  struct tuple *stmt, hint_t hint)
 {
 	/*
 	 * UPSERTs cannot change secondary index parts neither
@@ -613,6 +638,7 @@ vy_write_iterator_deferred_delete(struct vy_write_iterator *stream,
 			return -1;
 		vy_stmt_unref_if_possible(stream->deferred_delete_stmt);
 		stream->deferred_delete_stmt = NULL;
+		stream->deferred_delete_hint = HINT_NONE;
 	}
 	/*
 	 * Remember the current statement if it is marked with
@@ -625,6 +651,7 @@ vy_write_iterator_deferred_delete(struct vy_write_iterator *stream,
 		       vy_stmt_type(stmt) == IPROTO_REPLACE);
 		vy_stmt_ref_if_possible(stmt);
 		stream->deferred_delete_stmt = stmt;
+		stream->deferred_delete_hint = hint;
 	}
 	return 0;
 }
@@ -672,6 +699,7 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream,
 	struct vy_write_src end_of_key_src;
 	end_of_key_src.is_end_of_key = true;
 	end_of_key_src.tuple = src->tuple;
+	end_of_key_src.hint = src->hint;
 	int rc = vy_source_heap_insert(&stream->src_heap, &end_of_key_src);
 	if (rc) {
 		diag_set(OutOfMemory, sizeof(void *),
@@ -713,7 +741,7 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream,
 		 */
 		if (stream->is_primary) {
 			rc = vy_write_iterator_deferred_delete(stream,
-							       src->tuple);
+						src->tuple, src->hint);
 			if (rc != 0)
 				break;
 		}
@@ -751,7 +779,7 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream,
 			goto next_lsn;
 		}
 
-		rc = vy_write_iterator_push_rv(stream, src->tuple,
+		rc = vy_write_iterator_push_rv(stream, src->tuple, src->hint,
 					       current_rv_i);
 		if (rc != 0)
 			break;
@@ -804,6 +832,7 @@ next_lsn:
  *
  * @param stream Write iterator.
  * @param prev_tuple Tuple from the previous read view (can be NULL).
+ * @param prev_hint Comparison hint of the previous tuple.
  * @param rv Read view to merge.
  * @param is_first_insert Set if the oldest statement for the
  * current key among all sources is an INSERT.
@@ -812,11 +841,13 @@ next_lsn:
  * @retval -1 Memory error.
  */
 static NODISCARD int
-vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *prev_tuple,
+vy_read_view_merge(struct vy_write_iterator *stream,
+		   struct tuple *prev_tuple, hint_t prev_hint,
 		   struct vy_read_view_stmt *rv, bool is_first_insert)
 {
 	assert(rv != NULL);
 	assert(rv->tuple == NULL);
+	assert(rv->hint == HINT_NONE);
 	assert(rv->history != NULL);
 	struct vy_write_history *h = rv->history;
 	/*
@@ -851,6 +882,7 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *prev_tuple,
 			return -1;
 		vy_stmt_unref_if_possible(h->tuple);
 		h->tuple = applied;
+		h->hint = prev_hint;
 	}
 	/* Squash the rest of UPSERTs. */
 	struct vy_write_history *result = h;
@@ -874,8 +906,10 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *prev_tuple,
 		result->next = h;
 	}
 	rv->tuple = result->tuple;
+	rv->hint = result->hint;
 	rv->history = NULL;
 	result->tuple = NULL;
+	result->hint = HINT_NONE;
 	assert(result->next == NULL);
 	/*
 	 * The write iterator generates deferred DELETEs for all
@@ -908,6 +942,7 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *prev_tuple,
 		 */
 		vy_stmt_unref_if_possible(rv->tuple);
 		rv->tuple = NULL;
+		rv->hint = HINT_NONE;
 	} else if ((is_first_insert &&
 		    vy_stmt_type(rv->tuple) == IPROTO_REPLACE) ||
 		   (!is_first_insert &&
@@ -975,11 +1010,12 @@ vy_write_iterator_build_read_views(struct vy_write_iterator *stream, int *count)
 	 * here > 0.
 	 */
 	assert(rv >= &stream->read_views[0] && rv->history != NULL);
+	hint_t prev_hint = HINT_NONE;
 	struct tuple *prev_tuple = NULL;
 	for (; rv >= &stream->read_views[0]; --rv) {
 		if (rv->history == NULL)
 			continue;
-		if (vy_read_view_merge(stream, prev_tuple, rv,
+		if (vy_read_view_merge(stream, prev_tuple, prev_hint, rv,
 				       is_first_insert) != 0)
 			goto error;
 		assert(rv->history == NULL);
@@ -988,6 +1024,7 @@ vy_write_iterator_build_read_views(struct vy_write_iterator *stream, int *count)
 		stream->rv_used_count++;
 		++*count;
 		prev_tuple = rv->tuple;
+		prev_hint = rv->hint;
 	}
 	region_truncate(region, used);
 	return 0;
@@ -1007,7 +1044,7 @@ error:
  */
 static NODISCARD int
 vy_write_iterator_next(struct vy_stmt_stream *vstream,
-		       struct tuple **ret)
+		       struct tuple **ret, hint_t *ret_hint)
 {
 	assert(vstream->iface->next == vy_write_iterator_next);
 	struct vy_write_iterator *stream = (struct vy_write_iterator *)vstream;
@@ -1015,7 +1052,7 @@ vy_write_iterator_next(struct vy_stmt_stream *vstream,
 	 * Try to get the next statement from the current key
 	 * read view statements sequence.
 	 */
-	*ret = vy_write_iterator_pop_read_view_stmt(stream);
+	*ret = vy_write_iterator_pop_read_view_stmt(stream, ret_hint);
 	if (*ret != NULL)
 		return 0;
 	/*
@@ -1036,7 +1073,9 @@ vy_write_iterator_next(struct vy_stmt_stream *vstream,
 			if (stream->last_stmt != NULL)
 				vy_stmt_unref_if_possible(stream->last_stmt);
 			*ret = stream->last_stmt = stream->deferred_delete_stmt;
+			*ret_hint = stream->deferred_delete_hint;
 			stream->deferred_delete_stmt = NULL;
+			stream->deferred_delete_hint = HINT_NONE;
 			return 0;
 		}
 	}
@@ -1059,7 +1098,7 @@ vy_write_iterator_next(struct vy_stmt_stream *vstream,
 			break;
 	}
 	/* Again try to get the statement, after calling next_key(). */
-	*ret = vy_write_iterator_pop_read_view_stmt(stream);
+	*ret = vy_write_iterator_pop_read_view_stmt(stream, ret_hint);
 	return 0;
 }
 
diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c
index bb9798a9..eef50c30 100644
--- a/test/unit/vy_point_lookup.c
+++ b/test/unit/vy_point_lookup.c
@@ -30,8 +30,9 @@ write_run(struct vy_run *run, const char *dir_name,
 	if (wi->iface->start(wi) != 0)
 		goto fail_abort_writer;
 	int rc;
+	hint_t hint;
 	struct tuple *stmt = NULL;
-	while ((rc = wi->iface->next(wi, &stmt)) == 0 && stmt != NULL) {
+	while ((rc = wi->iface->next(wi, &stmt, &hint)) == 0 && stmt != NULL) {
 		rc = vy_run_writer_append_stmt(&writer, stmt);
 		if (rc != 0)
 			break;
diff --git a/test/unit/vy_write_iterator.c b/test/unit/vy_write_iterator.c
index ecbc6281..4d55f743 100644
--- a/test/unit/vy_write_iterator.c
+++ b/test/unit/vy_write_iterator.c
@@ -110,11 +110,12 @@ compare_write_iterator_results(const struct vy_stmt_template *content,
 	fail_if(wi == NULL);
 	fail_if(vy_write_iterator_new_mem(wi, mem) != 0);
 
+	hint_t hint;
 	struct tuple *ret;
 	fail_if(wi->iface->start(wi) != 0);
 	int i = 0;
 	do {
-		fail_if(wi->iface->next(wi, &ret) != 0);
+		fail_if(wi->iface->next(wi, &ret, &hint) != 0);
 		if (ret == NULL)
 			break;
 		fail_if(i >= expected_count);
-- 
2.11.0




More information about the Tarantool-patches mailing list