Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 18/25] vinyl: teach write iterator to return overwritten tuples
Date: Fri, 27 Jul 2018 14:29:58 +0300	[thread overview]
Message-ID: <43644fa2a34a37d5006113f405d8125964dc1653.1532689066.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1532689065.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1532689065.git.vdavydov.dev@gmail.com>

A REPLACE/DELETE request is supposed to delete the old tuple from all
indexes. In order to generate a DELETE statement for a secondary index,
we need to look up the old tuple in the primary index, which is costly
as it implies a random disk access. In the scope of #2129 we are
planning to optimize out the lookup by deferring generation of the
DELETE statement until primary index compaction.

To do that, we need to differentiate statements for which DELETE was
deferred from those for which it was inserted when the request was
executed (as it is the case for UPDATE). So this patch introduces a per
statement flag, VY_STMT_DEFERRED_DELETE. If set for a REPLACE or DELETE
statement, it will make the write iterator to return the overwritten
statement to the caller via a callback.

Needed for #2129
---
 src/box/vinyl.c                    |   3 +-
 src/box/vy_scheduler.c             |   4 +-
 src/box/vy_stmt.h                  |  19 +++
 src/box/vy_write_iterator.c        | 135 ++++++++++++++++++++-
 src/box/vy_write_iterator.h        |  27 ++++-
 test/unit/vy_iterators_helper.c    |   5 +
 test/unit/vy_iterators_helper.h    |  12 +-
 test/unit/vy_point_lookup.c        |   4 +-
 test/unit/vy_write_iterator.c      | 232 ++++++++++++++++++++++++++++++++++---
 test/unit/vy_write_iterator.result |  22 +++-
 10 files changed, 430 insertions(+), 33 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index f2f93736..ddaa22bb 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -3007,7 +3007,8 @@ vy_send_range(struct vy_join_ctx *ctx,
 	struct rlist fake_read_views;
 	rlist_create(&fake_read_views);
 	ctx->wi = vy_write_iterator_new(ctx->key_def, ctx->format,
-					true, true, &fake_read_views);
+					true, true, &fake_read_views,
+					NULL, NULL);
 	if (ctx->wi == NULL) {
 		rc = -1;
 		goto out;
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index b206a605..06dbb1f8 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -1006,7 +1006,7 @@ vy_task_dump_new(struct vy_scheduler *scheduler, struct vy_lsm *lsm,
 	bool is_last_level = (lsm->run_count == 0);
 	wi = vy_write_iterator_new(task->cmp_def, lsm->disk_format,
 				   lsm->index_id == 0, is_last_level,
-				   scheduler->read_views);
+				   scheduler->read_views, NULL, NULL);
 	if (wi == NULL)
 		goto err_wi;
 	rlist_foreach_entry(mem, &lsm->sealed, in_sealed) {
@@ -1273,7 +1273,7 @@ vy_task_compact_new(struct vy_scheduler *scheduler, struct vy_lsm *lsm,
 	bool is_last_level = (range->compact_priority == range->slice_count);
 	wi = vy_write_iterator_new(task->cmp_def, lsm->disk_format,
 				   lsm->index_id == 0, is_last_level,
-				   scheduler->read_views);
+				   scheduler->read_views, NULL, NULL);
 	if (wi == NULL)
 		goto err_wi;
 
diff --git a/src/box/vy_stmt.h b/src/box/vy_stmt.h
index bcf855dd..8de8aa84 100644
--- a/src/box/vy_stmt.h
+++ b/src/box/vy_stmt.h
@@ -70,6 +70,25 @@ extern struct tuple_format_vtab vy_tuple_format_vtab;
  */
 extern size_t vy_max_tuple_size;
 
+/** Statement flags. */
+enum {
+	/**
+	 * A REPLACE/DELETE request is supposed to delete the old
+	 * tuple from all indexes. In order to generate a DELETE
+	 * statement for a secondary index, we need to look up the
+	 * old tuple in the primary index, which is expensive as
+	 * it implies a random disk access. We can optimize out the
+	 * lookup by deferring generation of the DELETE statement
+	 * until primary index compaction.
+	 *
+	 * The following flag is set for those REPLACE and DELETE
+	 * statements that skipped deletion of the old tuple from
+	 * secondary indexes. It makes the write iterator generate
+	 * DELETE statements for them during compaction.
+	 */
+	VY_STMT_DEFERRED_DELETE		= 1 << 0,
+};
+
 /**
  * There are two groups of statements:
  *
diff --git a/src/box/vy_write_iterator.c b/src/box/vy_write_iterator.c
index 06ae342b..b76c2ccb 100644
--- a/src/box/vy_write_iterator.c
+++ b/src/box/vy_write_iterator.c
@@ -177,7 +177,16 @@ struct vy_write_iterator {
 	 * key and its tuple format is different.
 	 */
 	bool is_primary;
-
+	/** Callback for generating deferred DELETE statements. */
+	vy_deferred_delete_f deferred_delete_cb;
+	/** Context passed to @deferred_delete_cb. */
+	void *deferred_delete_ctx;
+	/**
+	 * Last scanned REPLACE or DELETE statement that was
+	 * inserted into the primary index without deletion
+	 * of the old tuple from secondary indexes.
+	 */
+	struct tuple *deferred_delete_stmt;
 	/** Length of the @read_views. */
 	int rv_count;
 	/**
@@ -331,9 +340,10 @@ static const struct vy_stmt_stream_iface vy_slice_stream_iface;
  */
 struct vy_stmt_stream *
 vy_write_iterator_new(const struct key_def *cmp_def,
-		      struct tuple_format *format,
-		      bool is_primary, bool is_last_level,
-		      struct rlist *read_views)
+		      struct tuple_format *format, bool is_primary,
+		      bool is_last_level, struct rlist *read_views,
+		      vy_deferred_delete_f deferred_delete_cb,
+		      void *deferred_delete_ctx)
 {
 	/*
 	 * One is reserved for INT64_MAX - maximal read view.
@@ -368,6 +378,8 @@ vy_write_iterator_new(const struct key_def *cmp_def,
 	tuple_format_ref(stream->format);
 	stream->is_primary = is_primary;
 	stream->is_last_level = is_last_level;
+	stream->deferred_delete_cb = deferred_delete_cb;
+	stream->deferred_delete_ctx = deferred_delete_ctx;
 	return &stream->base;
 }
 
@@ -406,6 +418,10 @@ vy_write_iterator_stop(struct vy_stmt_stream *vstream)
 		vy_stmt_unref_if_possible(stream->last_stmt);
 		stream->last_stmt = NULL;
 	}
+	if (stream->deferred_delete_stmt != NULL) {
+		vy_stmt_unref_if_possible(stream->deferred_delete_stmt);
+		stream->deferred_delete_stmt = NULL;
+	}
 }
 
 /**
@@ -554,6 +570,62 @@ vy_write_iterator_pop_read_view_stmt(struct vy_write_iterator *stream)
 }
 
 /**
+ * Generate a DELETE statement for the given tuple if its
+ * deletion from secondary indexes was deferred.
+ *
+ * @param stream Write iterator.
+ * @param stmt Current statement.
+ *
+ * @retval  0 Success.
+ * @retval -1 Error.
+ */
+static int
+vy_write_iterator_deferred_delete(struct vy_write_iterator *stream,
+				  struct tuple *stmt)
+{
+	/*
+	 * Nothing to do if the caller isn't interested in
+	 * deferred DELETEs.
+	 */
+	if (stream->deferred_delete_cb == NULL)
+		return 0;
+	/*
+	 * UPSERTs cannot change secondary index parts neither
+	 * can they produce deferred DELETEs, so we skip them.
+	 */
+	if (vy_stmt_type(stmt) == IPROTO_UPSERT) {
+		assert((vy_stmt_flags(stmt) & VY_STMT_DEFERRED_DELETE) == 0);
+		return 0;
+	}
+	/*
+	 * Invoke the callback to generate a deferred DELETE
+	 * in case the current tuple was overwritten.
+	 */
+	if (stream->deferred_delete_stmt != NULL) {
+		if (vy_stmt_type(stmt) != IPROTO_DELETE &&
+		    stream->deferred_delete_cb(stmt,
+				stream->deferred_delete_stmt,
+				stream->deferred_delete_ctx) != 0)
+			return -1;
+		vy_stmt_unref_if_possible(stream->deferred_delete_stmt);
+		stream->deferred_delete_stmt = NULL;
+	}
+	/*
+	 * Remember the current statement if it is marked with
+	 * VY_STMT_DEFERRED_DELETE so that we can use it to
+	 * generate a DELETE for the overwritten tuple when this
+	 * function is called next time.
+	 */
+	if ((vy_stmt_flags(stmt) & VY_STMT_DEFERRED_DELETE) != 0) {
+		assert(vy_stmt_type(stmt) == IPROTO_DELETE ||
+		       vy_stmt_type(stmt) == IPROTO_REPLACE);
+		vy_stmt_ref_if_possible(stmt);
+		stream->deferred_delete_stmt = stmt;
+	}
+	return 0;
+}
+
+/**
  * Build the history of the current key.
  * Apply optimizations 1, 2 and 3 (@sa vy_write_iterator.h).
  * When building a history, some statements can be
@@ -578,6 +650,7 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream,
 	*count = 0;
 	*is_first_insert = false;
 	assert(stream->stmt_i == -1);
+	assert(stream->deferred_delete_stmt == NULL);
 	struct heap_node *node = vy_source_heap_top(&stream->src_heap);
 	if (node == NULL)
 		return 0; /* no more data */
@@ -630,6 +703,10 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream,
 				*is_first_insert = true;
 		}
 
+		rc = vy_write_iterator_deferred_delete(stream, src->tuple);
+		if (rc != 0)
+			break;
+
 		if (vy_stmt_lsn(src->tuple) > current_rv_lsn) {
 			/*
 			 * Skip statements invisible to the current read
@@ -710,6 +787,17 @@ next_lsn:
 			break;
 	}
 
+	/*
+	 * No point in keeping the last VY_STMT_DEFERRED_DELETE
+	 * statement around if this is major compaction, because
+	 * there's no tuple it could overwrite.
+	 */
+	if (rc == 0 && stream->is_last_level &&
+	    stream->deferred_delete_stmt != NULL) {
+		vy_stmt_unref_if_possible(stream->deferred_delete_stmt);
+		stream->deferred_delete_stmt = NULL;
+	}
+
 	vy_source_heap_delete(&stream->src_heap, &end_of_key_src.heap_node);
 	vy_stmt_unref_if_possible(end_of_key_src.tuple);
 	return rc;
@@ -794,6 +882,23 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *hint,
 	rv->history = NULL;
 	result->tuple = NULL;
 	assert(result->next == NULL);
+	/*
+	 * The write iterator generates deferred DELETEs for all
+	 * VY_STMT_DEFERRED_DELETE statements, except, may be,
+	 * the last seen one. Clear the flag for all other output
+	 * statements so as not to generate the same DELETEs on
+	 * the next compaction.
+	 */
+	uint8_t flags = vy_stmt_flags(rv->tuple);
+	if (rv->tuple != stream->deferred_delete_stmt &&
+	    (flags & VY_STMT_DEFERRED_DELETE) != 0) {
+		if (!vy_stmt_is_refable(rv->tuple)) {
+			rv->tuple = vy_stmt_dup(rv->tuple);
+			if (rv->tuple == NULL)
+				return -1;
+		}
+		vy_stmt_set_flags(rv->tuple, flags & ~VY_STMT_DEFERRED_DELETE);
+	}
 	if (hint != NULL) {
 		/* Not the first statement. */
 		return 0;
@@ -918,6 +1023,28 @@ vy_write_iterator_next(struct vy_stmt_stream *vstream,
 	*ret = vy_write_iterator_pop_read_view_stmt(stream);
 	if (*ret != NULL)
 		return 0;
+	/*
+	 * If we didn't generate a deferred DELETE corresponding to
+	 * the last seen VY_STMT_DEFERRED_DELETE statement, we must
+	 * include it into the output, because there still might be
+	 * an overwritten tuple in an older source.
+	 */
+	if (stream->deferred_delete_stmt != NULL) {
+		if (stream->deferred_delete_stmt == stream->last_stmt) {
+			/*
+			 * The statement was returned via a read view.
+			 * Nothing to do.
+			 */
+			vy_stmt_unref_if_possible(stream->deferred_delete_stmt);
+			stream->deferred_delete_stmt = NULL;
+		} else {
+			if (stream->last_stmt != NULL)
+				vy_stmt_unref_if_possible(stream->last_stmt);
+			*ret = stream->last_stmt = stream->deferred_delete_stmt;
+			stream->deferred_delete_stmt = NULL;
+			return 0;
+		}
+	}
 
 	/* Build the next key sequence. */
 	stream->stmt_i = -1;
diff --git a/src/box/vy_write_iterator.h b/src/box/vy_write_iterator.h
index ea14b07a..3430bbd2 100644
--- a/src/box/vy_write_iterator.h
+++ b/src/box/vy_write_iterator.h
@@ -220,6 +220,24 @@ struct vy_mem;
 struct vy_slice;
 
 /**
+ * Callback invoked by the write iterator for tuples that were
+ * overwritten or deleted without generating DELETE statement
+ * for secondary indexes.
+ *
+ * @param old_stmt Overwritten tuple.
+ * @param new_stmt Statement that overwrote @old_stmt.
+ * @param ctx Callback context.
+ *
+ * @retval  0 Success.
+ * @retval -1 Error.
+ *
+ * @sa VY_STMT_DEFERRED_DELETE.
+ */
+typedef int
+(*vy_deferred_delete_f)(struct tuple *old_stmt,
+			struct tuple *new_stmt, void *ctx);
+
+/**
  * Open an empty write iterator. To add sources to the iterator
  * use vy_write_iterator_add_* functions.
  * @param cmp_def - key definition for tuple compare.
@@ -227,13 +245,16 @@ struct vy_slice;
  * @param LSM tree is_primary - set if this iterator is for a primary index.
  * @param is_last_level - there is no older level than the one we're writing to.
  * @param read_views - Opened read views.
+ * @param deferred_delete_cb - Callback for generating deferred DELETEs.
+ * @param deferred_delete_ctx - Context passed to @deferred_delete_cb.
  * @return the iterator or NULL on error (diag is set).
  */
 struct vy_stmt_stream *
 vy_write_iterator_new(const struct key_def *cmp_def,
-		      struct tuple_format *format,
-		      bool is_primary, bool is_last_level,
-		      struct rlist *read_views);
+		      struct tuple_format *format, bool is_primary,
+		      bool is_last_level, struct rlist *read_views,
+		      vy_deferred_delete_f deferred_delete_cb,
+		      void *deferred_delete_ctx);
 
 /**
  * Add a mem as a source to the iterator.
diff --git a/test/unit/vy_iterators_helper.c b/test/unit/vy_iterators_helper.c
index 642d8bf2..89603376 100644
--- a/test/unit/vy_iterators_helper.c
+++ b/test/unit/vy_iterators_helper.c
@@ -136,6 +136,7 @@ vy_new_simple_stmt(struct tuple_format *format,
 	}
 	free(buf);
 	vy_stmt_set_lsn(ret, templ->lsn);
+	vy_stmt_set_flags(ret, templ->flags);
 	if (templ->optimize_update)
 		vy_stmt_set_column_mask(ret, 0);
 	return ret;
@@ -277,6 +278,10 @@ vy_stmt_are_same(const struct tuple *actual,
 		tuple_unref(tmp);
 		return false;
 	}
+	if (vy_stmt_flags(actual) != expected->flags) {
+		tuple_unref(tmp);
+		return false;
+	}
 	bool rc = memcmp(a, b, a_len) == 0;
 	tuple_unref(tmp);
 	return rc;
diff --git a/test/unit/vy_iterators_helper.h b/test/unit/vy_iterators_helper.h
index e38ec295..24641df3 100644
--- a/test/unit/vy_iterators_helper.h
+++ b/test/unit/vy_iterators_helper.h
@@ -43,10 +43,16 @@
 #define vyend 99999999
 #define MAX_FIELDS_COUNT 100
 #define STMT_TEMPLATE(lsn, type, ...) \
-{ { __VA_ARGS__, vyend }, IPROTO_##type, lsn, false, 0, 0 }
+{ { __VA_ARGS__, vyend }, IPROTO_##type, lsn, false, 0, 0, 0 }
 
 #define STMT_TEMPLATE_OPTIMIZED(lsn, type, ...) \
-{ { __VA_ARGS__, vyend }, IPROTO_##type, lsn, true, 0, 0 }
+{ { __VA_ARGS__, vyend }, IPROTO_##type, lsn, true, 0, 0, 0 }
+
+#define STMT_TEMPLATE_FLAGS(lsn, type, flags, ...) \
+{ { __VA_ARGS__, vyend }, IPROTO_##type, lsn, false, flags, 0, 0 }
+
+#define STMT_TEMPLATE_DEFERRED_DELETE(lsn, type, ...) \
+STMT_TEMPLATE_FLAGS(lsn, type, VY_STMT_DEFERRED_DELETE, __VA_ARGS__)
 
 extern struct tuple_format_vtab vy_tuple_format_vtab;
 extern struct tuple_format *vy_key_format;
@@ -82,6 +88,8 @@ struct vy_stmt_template {
 	 * to skip it in the write_iterator.
 	 */
 	bool optimize_update;
+	/** Statement flags. */
+	uint8_t flags;
 	/*
 	 * In case of upsert it is possible to use only one 'add' operation.
 	 * This is the column number of the operation.
diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c
index b9b7d6ff..54a34e98 100644
--- a/test/unit/vy_point_lookup.c
+++ b/test/unit/vy_point_lookup.c
@@ -192,7 +192,7 @@ test_basic()
 	}
 	struct vy_stmt_stream *write_stream
 		= vy_write_iterator_new(pk->cmp_def, pk->disk_format,
-					true, true, &read_views);
+					true, true, &read_views, NULL, NULL);
 	vy_write_iterator_new_mem(write_stream, run_mem);
 	struct vy_run *run = vy_run_new(&run_env, 1);
 	isnt(run, NULL, "vy_run_new");
@@ -225,7 +225,7 @@ test_basic()
 	}
 	write_stream
 		= vy_write_iterator_new(pk->cmp_def, pk->disk_format,
-					true, true, &read_views);
+					true, true, &read_views, NULL, NULL);
 	vy_write_iterator_new_mem(write_stream, run_mem);
 	run = vy_run_new(&run_env, 2);
 	isnt(run, NULL, "vy_run_new");
diff --git a/test/unit/vy_write_iterator.c b/test/unit/vy_write_iterator.c
index 25a346af..9f37bbf4 100644
--- a/test/unit/vy_write_iterator.c
+++ b/test/unit/vy_write_iterator.c
@@ -3,6 +3,41 @@
 #include "vy_write_iterator.h"
 #include "vy_iterators_helper.h"
 
+enum { MAX_DEFERRED_COUNT = 32 };
+
+/** Argument passed to @deferred_cb. */
+struct deferred_ctx {
+	/** Format to use for making DELETEs. */
+	struct tuple_format *format;
+	/** Deferred DELETEs generated by the write iterator. */
+	struct tuple *stmt[MAX_DEFERRED_COUNT];
+	/** Number of elements in @stmt array. */
+	int count;
+};
+
+/**
+ * Callback passed to the write iterator for generating deferred
+ * DELETE statements.
+ */
+static int
+deferred_cb(struct tuple *old_stmt, struct tuple *new_stmt, void *arg)
+{
+	struct deferred_ctx *ctx = arg;
+
+	fail_if(vy_stmt_type(old_stmt) == IPROTO_DELETE);
+	fail_if(vy_stmt_type(new_stmt) != IPROTO_DELETE &&
+		vy_stmt_type(new_stmt) != IPROTO_REPLACE);
+
+	struct tuple *delete = vy_stmt_new_surrogate_delete(ctx->format,
+							    old_stmt);
+	fail_if(delete == NULL);
+	vy_stmt_set_lsn(delete, vy_stmt_lsn(new_stmt));
+
+	fail_if(ctx->count >= MAX_DEFERRED_COUNT);
+	ctx->stmt[ctx->count++] = delete;
+	return 0;
+}
+
 /**
  * Create a mem with the specified content, iterate over it with
  * write_iterator and compare actual result statements with the
@@ -12,6 +47,8 @@
  * @param content_count Size of the @content.
  * @param expected Expected results of the iteration.
  * @param expected_count Size of the @expected.
+ * @param deferred Expected deferred DELETEs returned by the iteration.
+ * @param deferred_count Size of @deferred.
  * @param vlsns Read view lsns for the write iterator.
  * @param vlsns_count Size of the @vlsns.
  * @param is_primary True, if the new mem belongs to the primary
@@ -23,6 +60,8 @@ compare_write_iterator_results(const struct vy_stmt_template *content,
 			       int content_count,
 			       const struct vy_stmt_template *expected,
 			       int expected_count,
+			       const struct vy_stmt_template *deferred,
+			       int deferred_count,
 			       const int *vlsns, int vlsns_count,
 			       bool is_primary, bool is_last_level)
 {
@@ -37,9 +76,14 @@ compare_write_iterator_results(const struct vy_stmt_template *content,
 	struct vy_read_view *rv_array = malloc(sizeof(*rv_array) * vlsns_count);
 	fail_if(rv_array == NULL);
 	init_read_views_list(&rv_list, rv_array, vlsns, vlsns_count);
-
-	struct vy_stmt_stream *wi = vy_write_iterator_new(key_def, mem->format,
-					is_primary, is_last_level, &rv_list);
+	struct deferred_ctx deferred_ctx;
+	memset(&deferred_ctx, 0, sizeof(deferred_ctx));
+	deferred_ctx.format = mem->format;
+	struct vy_stmt_stream *wi;
+	wi = vy_write_iterator_new(key_def, mem->format, is_primary,
+				   is_last_level, &rv_list,
+				   deferred == NULL ? NULL :
+				   deferred_cb, &deferred_ctx);
 	fail_if(wi == NULL);
 	fail_if(vy_write_iterator_new_mem(wi, mem) != 0);
 
@@ -57,6 +101,18 @@ compare_write_iterator_results(const struct vy_stmt_template *content,
 		++i;
 	} while (ret != NULL);
 	ok(i == expected_count, "correct results count");
+	wi->iface->stop(wi);
+
+	for (i = 0; i < MIN(deferred_ctx.count, deferred_count); i++) {
+		ok(vy_stmt_are_same(deferred_ctx.stmt[i], &deferred[i],
+				    deferred_ctx.format, NULL),
+		   "deferred stmt %d is correct", i);
+		tuple_unref(deferred_ctx.stmt[i]);
+	}
+	if (deferred != NULL) {
+		ok(deferred_ctx.count == deferred_count,
+		   "correct deferred stmt count");
+	}
 
 	/* Clean up */
 	wi->iface->close(wi);
@@ -69,7 +125,7 @@ void
 test_basic(void)
 {
 	header();
-	plan(46);
+	plan(66);
 {
 /*
  * STATEMENT: REPL REPL REPL  DEL  REPL  REPL  REPL  REPL  REPL  REPL
@@ -98,7 +154,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, true);
 }
 {
@@ -132,7 +188,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -160,7 +216,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, true);
 }
 {
@@ -180,7 +236,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, true);
 }
 {
@@ -204,7 +260,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, true);
 }
 {
@@ -227,7 +283,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -255,7 +311,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, false, true);
 }
 {
@@ -275,7 +331,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, false, false);
 }
 {
@@ -302,7 +358,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -330,7 +386,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, true);
 }
 {
@@ -355,7 +411,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, false, false);
 }
 {
@@ -380,7 +436,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -410,7 +466,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -451,7 +507,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -491,7 +547,147 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
+				       expected, expected_count, NULL, 0,
+				       vlsns, vlsns_count, true, false);
+}
+{
+/*
+ * STATEMENT:    REPL DEL REPL REPL DEL DEL DEL REPL DEL INS DEL INS REPL
+ * LSN:            4   5    6    7   8   9  10   11  12  13  14  15   16
+ * DEFERRED DEL:   +   +    +        +   +        +           +        +
+ * READ VIEW:          *         *                *
+ *
+ * is_last_level = true
+ *
+ * Test generation of deferred DELETEs for various combinations
+ * of input statements.
+ */
+	const struct vy_stmt_template content[] = {
+		STMT_TEMPLATE_DEFERRED_DELETE(4, REPLACE, 1, 2),
+		STMT_TEMPLATE_DEFERRED_DELETE(5, DELETE, 1),
+		STMT_TEMPLATE_DEFERRED_DELETE(6, REPLACE, 1, 3),
+		STMT_TEMPLATE(7, REPLACE, 1, 4),
+		STMT_TEMPLATE_DEFERRED_DELETE(8, DELETE, 1),
+		STMT_TEMPLATE_DEFERRED_DELETE(9, DELETE, 1),
+		STMT_TEMPLATE(10, DELETE, 1),
+		STMT_TEMPLATE_DEFERRED_DELETE(11, REPLACE, 1, 5),
+		STMT_TEMPLATE(12, DELETE, 1),
+		STMT_TEMPLATE(13, INSERT, 1, 6),
+		STMT_TEMPLATE_DEFERRED_DELETE(14, DELETE, 1),
+		STMT_TEMPLATE(15, INSERT, 1, 7),
+		STMT_TEMPLATE_DEFERRED_DELETE(16, REPLACE, 1, 8),
+	};
+	const struct vy_stmt_template expected[] = {
+		STMT_TEMPLATE(16, REPLACE, 1, 8),
+		STMT_TEMPLATE(11, REPLACE, 1, 5),
+		STMT_TEMPLATE(7, REPLACE, 1, 4),
+	};
+	const struct vy_stmt_template deferred[] = {
+		STMT_TEMPLATE(16, DELETE, 1, 7),
+		STMT_TEMPLATE(14, DELETE, 1, 6),
+		STMT_TEMPLATE(8, DELETE, 1, 4),
+		STMT_TEMPLATE(5, DELETE, 1, 2),
+	};
+	const int vlsns[] = {5, 7, 11};
+	int content_count = sizeof(content) / sizeof(content[0]);
+	int expected_count = sizeof(expected) / sizeof(expected[0]);
+	int deferred_count = sizeof(deferred) / sizeof(deferred[0]);
+	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
+	compare_write_iterator_results(content, content_count,
+				       expected, expected_count,
+				       deferred, deferred_count,
+				       vlsns, vlsns_count, true, true);
+}
+{
+/*
+ * STATEMENT:    REPL REPL DEL
+ * LSN:            7    8   9
+ * DEFERRED DEL:   +
+ *
+ * is_last_level = false
+ *
+ * Check that the oldest VY_STMT_DEFERRED_DELETE statement is
+ * preserved in case it doesn't overwrite a terminal statement
+ * and this is not a major compaction.
+ */
+	const struct vy_stmt_template content[] = {
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+		STMT_TEMPLATE(8, REPLACE, 1, 2),
+		STMT_TEMPLATE(9, DELETE, 1, 3),
+	};
+	const struct vy_stmt_template expected[] = {
+		STMT_TEMPLATE(9, DELETE, 1, 1),
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+	};
+	const struct vy_stmt_template deferred[] = {};
+	const int vlsns[] = {};
+	int content_count = sizeof(content) / sizeof(content[0]);
+	int expected_count = sizeof(expected) / sizeof(expected[0]);
+	int deferred_count = sizeof(deferred) / sizeof(deferred[0]);
+	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
+	compare_write_iterator_results(content, content_count,
+				       expected, expected_count,
+				       deferred, deferred_count,
+				       vlsns, vlsns_count, true, false);
+}
+{
+/*
+ * STATEMENT:    REPL REPL DEL
+ * LSN:            7    8   9
+ * DEFERRED DEL:   +
+ * READ VIEW:      *
+ *
+ * is_last_level = false
+ *
+ * Check that the oldest VY_STMT_DEFERRED_DELETE statement is
+ * not returned twice if it is referenced by a read view.
+ */
+	const struct vy_stmt_template content[] = {
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+		STMT_TEMPLATE(8, REPLACE, 1, 2),
+		STMT_TEMPLATE(9, DELETE, 1, 3),
+	};
+	const struct vy_stmt_template expected[] = {
+		STMT_TEMPLATE(9, DELETE, 1, 1),
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+	};
+	const struct vy_stmt_template deferred[] = {};
+	const int vlsns[] = {7};
+	int content_count = sizeof(content) / sizeof(content[0]);
+	int expected_count = sizeof(expected) / sizeof(expected[0]);
+	int deferred_count = sizeof(deferred) / sizeof(deferred[0]);
+	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
+	compare_write_iterator_results(content, content_count,
+				       expected, expected_count,
+				       deferred, deferred_count,
+				       vlsns, vlsns_count, true, false);
+}
+{
+/*
+ * STATEMENT:    REPL
+ * LSN:            7
+ * DEFERRED DEL:   +
+ *
+ * is_last_level = false
+ *
+ * Check that the oldest VY_STMT_DEFERRED_DELETE statement is
+ * not returned twice if it is the only statement in the output.
+ */
+	const struct vy_stmt_template content[] = {
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+	};
+	const struct vy_stmt_template expected[] = {
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+	};
+	const struct vy_stmt_template deferred[] = {};
+	const int vlsns[] = {};
+	int content_count = sizeof(content) / sizeof(content[0]);
+	int expected_count = sizeof(expected) / sizeof(expected[0]);
+	int deferred_count = sizeof(deferred) / sizeof(deferred[0]);
+	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
+	compare_write_iterator_results(content, content_count,
 				       expected, expected_count,
+				       deferred, deferred_count,
 				       vlsns, vlsns_count, true, false);
 }
 	fiber_gc();
diff --git a/test/unit/vy_write_iterator.result b/test/unit/vy_write_iterator.result
index 56d8cb1f..4f95aeb9 100644
--- a/test/unit/vy_write_iterator.result
+++ b/test/unit/vy_write_iterator.result
@@ -1,5 +1,5 @@
 	*** test_basic ***
-1..46
+1..66
 ok 1 - stmt 0 is correct
 ok 2 - stmt 1 is correct
 ok 3 - stmt 2 is correct
@@ -46,4 +46,24 @@ ok 43 - stmt 0 is correct
 ok 44 - stmt 1 is correct
 ok 45 - stmt 2 is correct
 ok 46 - correct results count
+ok 47 - stmt 0 is correct
+ok 48 - stmt 1 is correct
+ok 49 - stmt 2 is correct
+ok 50 - correct results count
+ok 51 - deferred stmt 0 is correct
+ok 52 - deferred stmt 1 is correct
+ok 53 - deferred stmt 2 is correct
+ok 54 - deferred stmt 3 is correct
+ok 55 - correct deferred stmt count
+ok 56 - stmt 0 is correct
+ok 57 - stmt 1 is correct
+ok 58 - correct results count
+ok 59 - correct deferred stmt count
+ok 60 - stmt 0 is correct
+ok 61 - stmt 1 is correct
+ok 62 - correct results count
+ok 63 - correct deferred stmt count
+ok 64 - stmt 0 is correct
+ok 65 - correct results count
+ok 66 - correct deferred stmt count
 	*** test_basic: done ***
-- 
2.11.0

  parent reply	other threads:[~2018-07-27 11:29 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-27 11:29 [PATCH 00/25] vinyl: eliminate disk read on REPLACE/DELETE Vladimir Davydov
2018-07-27 11:29 ` [PATCH 01/25] vinyl: make point lookup always return the latest tuple version Vladimir Davydov
2018-07-27 11:29 ` [PATCH 02/25] vinyl: simplify vy_squash_process Vladimir Davydov
2018-07-27 11:29 ` [PATCH 03/25] vinyl: always get full tuple from pk after reading from secondary index Vladimir Davydov
2018-07-27 11:29 ` [PATCH 04/25] vinyl: fold vy_replace_one and vy_replace_impl Vladimir Davydov
2018-07-27 11:29 ` [PATCH 05/25] vinyl: fold vy_delete_impl Vladimir Davydov
2018-07-27 11:29 ` [PATCH 06/25] vinyl: refactor unique check Vladimir Davydov
2018-07-27 11:29 ` [PATCH 07/25] vinyl: check key uniqueness before modifying tx write set Vladimir Davydov
2018-07-27 11:29 ` [PATCH 08/25] vinyl: remove env argument of vy_check_is_unique_{primary,secondary} Vladimir Davydov
2018-07-31 20:45   ` [tarantool-patches] " Konstantin Osipov
2018-07-27 11:29 ` [PATCH 09/25] vinyl: store full tuples in secondary index cache Vladimir Davydov
2018-07-31 20:47   ` Konstantin Osipov
2018-07-27 11:29 ` [PATCH 10/25] vinyl: do not free pending tasks on shutdown Vladimir Davydov
2018-07-31 20:48   ` Konstantin Osipov
2018-07-27 11:29 ` [PATCH 11/25] vinyl: store pointer to scheduler in struct vy_task Vladimir Davydov
2018-07-31 20:49   ` Konstantin Osipov
2018-07-27 11:29 ` [PATCH 12/25] vinyl: rename some members of vy_scheduler and vy_task struct Vladimir Davydov
2018-07-27 11:29 ` [PATCH 13/25] vinyl: use cbus for communication between scheduler and worker threads Vladimir Davydov
2018-07-27 11:29 ` [PATCH 14/25] vinyl: zap vy_scheduler::is_worker_pool_running Vladimir Davydov
2018-07-27 11:29 ` [PATCH 15/25] vinyl: rename vy_task::status to is_failed Vladimir Davydov
2018-07-27 11:29 ` [PATCH 16/25] xrow: allow to store flags in DML requests Vladimir Davydov
2018-07-27 11:29 ` [PATCH 17/25] vinyl: pin last statement returned by write iterator explicitly Vladimir Davydov
2018-07-27 11:29 ` Vladimir Davydov [this message]
2018-07-27 11:29 ` [PATCH 19/25] vinyl: prepare write iterator heap comparator for deferred DELETEs Vladimir Davydov
2018-07-27 11:30 ` [PATCH 20/25] vinyl: allow to skip certain statements on read Vladimir Davydov
2018-07-27 11:30 ` [PATCH 21/25] vinyl: add function to create surrogate deletes from raw msgpack Vladimir Davydov
2018-07-27 11:30 ` [PATCH 22/25] vinyl: remove pointless assertion from vy_stmt_new_surrogate_delete Vladimir Davydov
2018-07-27 11:30 ` [PATCH 23/25] txn: add helper to detect transaction boundaries Vladimir Davydov
2018-07-31 20:52   ` [tarantool-patches] " Konstantin Osipov
2018-07-27 11:30 ` [PATCH 24/25] Introduce _vinyl_deferred_delete system space Vladimir Davydov
2018-07-31 20:54   ` Konstantin Osipov
2018-08-01 14:00     ` Vladimir Davydov
2018-08-01 20:25       ` [tarantool-patches] " Konstantin Osipov
2018-08-02  9:43         ` Vladimir Davydov
2018-08-06  8:42           ` Vladimir Davydov
2018-07-27 11:30 ` [PATCH 25/25] vinyl: eliminate disk read on REPLACE/DELETE Vladimir Davydov
2018-07-31 20:55   ` Konstantin Osipov
2018-08-01 16:03     ` Vladimir Davydov
2018-08-01 16:51     ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=43644fa2a34a37d5006113f405d8125964dc1653.1532689066.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 18/25] vinyl: teach write iterator to return overwritten tuples' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox