[PATCH v2 2/7] vinyl: teach write iterator to return overwritten tuples

Vladimir Davydov vdavydov.dev at gmail.com
Tue Aug 21 14:15:35 MSK 2018


A REPLACE/DELETE request is supposed to delete the old tuple from all
indexes. In order to generate a DELETE statement for a secondary index,
we need to look up the old tuple in the primary index, which is costly
as it implies a random disk access. In the scope of #2129 we are
planning to optimize out the lookup by deferring generation of the
DELETE statement until primary index compaction.

To do that, we need to differentiate statements for which DELETE was
deferred from those for which it was inserted when the request was
executed (as it is the case for UPDATE). So this patch introduces a per
statement flag, VY_STMT_DEFERRED_DELETE. If set for a REPLACE or DELETE
statement, it will make the write iterator to return the overwritten
statement to the caller via a callback.

Needed for #2129
---
 src/box/vinyl.c                    |   2 +-
 src/box/vy_scheduler.c             |   4 +-
 src/box/vy_stmt.h                  |  19 +++
 src/box/vy_write_iterator.c        | 140 +++++++++++++++++++-
 src/box/vy_write_iterator.h        |  45 ++++++-
 test/unit/vy_iterators_helper.c    |   5 +
 test/unit/vy_iterators_helper.h    |  12 +-
 test/unit/vy_point_lookup.c        |   4 +-
 test/unit/vy_write_iterator.c      | 254 ++++++++++++++++++++++++++++++++++---
 test/unit/vy_write_iterator.result |  22 +++-
 10 files changed, 475 insertions(+), 32 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index f2f93736..fd14d1e7 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -3007,7 +3007,7 @@ vy_send_range(struct vy_join_ctx *ctx,
 	struct rlist fake_read_views;
 	rlist_create(&fake_read_views);
 	ctx->wi = vy_write_iterator_new(ctx->key_def, ctx->format,
-					true, true, &fake_read_views);
+					true, true, &fake_read_views, NULL);
 	if (ctx->wi == NULL) {
 		rc = -1;
 		goto out;
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index b206a605..4e8b476b 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -1006,7 +1006,7 @@ vy_task_dump_new(struct vy_scheduler *scheduler, struct vy_lsm *lsm,
 	bool is_last_level = (lsm->run_count == 0);
 	wi = vy_write_iterator_new(task->cmp_def, lsm->disk_format,
 				   lsm->index_id == 0, is_last_level,
-				   scheduler->read_views);
+				   scheduler->read_views, NULL);
 	if (wi == NULL)
 		goto err_wi;
 	rlist_foreach_entry(mem, &lsm->sealed, in_sealed) {
@@ -1273,7 +1273,7 @@ vy_task_compact_new(struct vy_scheduler *scheduler, struct vy_lsm *lsm,
 	bool is_last_level = (range->compact_priority == range->slice_count);
 	wi = vy_write_iterator_new(task->cmp_def, lsm->disk_format,
 				   lsm->index_id == 0, is_last_level,
-				   scheduler->read_views);
+				   scheduler->read_views, NULL);
 	if (wi == NULL)
 		goto err_wi;
 
diff --git a/src/box/vy_stmt.h b/src/box/vy_stmt.h
index bee3c21e..8051f1e2 100644
--- a/src/box/vy_stmt.h
+++ b/src/box/vy_stmt.h
@@ -70,6 +70,25 @@ extern struct tuple_format_vtab vy_tuple_format_vtab;
  */
 extern size_t vy_max_tuple_size;
 
+/** Statement flags. */
+enum {
+	/**
+	 * A REPLACE/DELETE request is supposed to delete the old
+	 * tuple from all indexes. In order to generate a DELETE
+	 * statement for a secondary index, we need to look up the
+	 * old tuple in the primary index, which is expensive as
+	 * it implies a random disk access. We can optimize out the
+	 * lookup by deferring generation of the DELETE statement
+	 * until primary index compaction.
+	 *
+	 * The following flag is set for those REPLACE and DELETE
+	 * statements that skipped deletion of the old tuple from
+	 * secondary indexes. It makes the write iterator generate
+	 * DELETE statements for them during compaction.
+	 */
+	VY_STMT_DEFERRED_DELETE		= 1 << 0,
+};
+
 /**
  * There are two groups of statements:
  *
diff --git a/src/box/vy_write_iterator.c b/src/box/vy_write_iterator.c
index 06ae342b..50c51f2b 100644
--- a/src/box/vy_write_iterator.c
+++ b/src/box/vy_write_iterator.c
@@ -177,7 +177,14 @@ struct vy_write_iterator {
 	 * key and its tuple format is different.
 	 */
 	bool is_primary;
-
+	/** Deferred DELETE handler. */
+	struct vy_deferred_delete_handler *deferred_delete_handler;
+	/**
+	 * Last scanned REPLACE or DELETE statement that was
+	 * inserted into the primary index without deletion
+	 * of the old tuple from secondary indexes.
+	 */
+	struct tuple *deferred_delete_stmt;
 	/** Length of the @read_views. */
 	int rv_count;
 	/**
@@ -331,11 +338,16 @@ static const struct vy_stmt_stream_iface vy_slice_stream_iface;
  */
 struct vy_stmt_stream *
 vy_write_iterator_new(const struct key_def *cmp_def,
-		      struct tuple_format *format,
-		      bool is_primary, bool is_last_level,
-		      struct rlist *read_views)
+		      struct tuple_format *format, bool is_primary,
+		      bool is_last_level, struct rlist *read_views,
+		      struct vy_deferred_delete_handler *handler)
 {
 	/*
+	 * Deferred DELETE statements can only be produced by
+	 * primary index compaction.
+	 */
+	assert(is_primary || handler == NULL);
+	/*
 	 * One is reserved for INT64_MAX - maximal read view.
 	 */
 	int count = 1;
@@ -368,6 +380,7 @@ vy_write_iterator_new(const struct key_def *cmp_def,
 	tuple_format_ref(stream->format);
 	stream->is_primary = is_primary;
 	stream->is_last_level = is_last_level;
+	stream->deferred_delete_handler = handler;
 	return &stream->base;
 }
 
@@ -406,6 +419,16 @@ vy_write_iterator_stop(struct vy_stmt_stream *vstream)
 		vy_stmt_unref_if_possible(stream->last_stmt);
 		stream->last_stmt = NULL;
 	}
+	if (stream->deferred_delete_stmt != NULL) {
+		vy_stmt_unref_if_possible(stream->deferred_delete_stmt);
+		stream->deferred_delete_stmt = NULL;
+	}
+	struct vy_deferred_delete_handler *handler =
+			stream->deferred_delete_handler;
+	if (handler != NULL) {
+		handler->iface->destroy(handler);
+		stream->deferred_delete_handler = NULL;
+	}
 }
 
 /**
@@ -554,6 +577,60 @@ vy_write_iterator_pop_read_view_stmt(struct vy_write_iterator *stream)
 }
 
 /**
+ * Generate a DELETE statement for the given tuple if its
+ * deletion from secondary indexes was deferred.
+ *
+ * @param stream Write iterator.
+ * @param stmt Current statement.
+ *
+ * @retval  0 Success.
+ * @retval -1 Error.
+ */
+static int
+vy_write_iterator_deferred_delete(struct vy_write_iterator *stream,
+				  struct tuple *stmt)
+{
+	if (!stream->is_primary)
+		return 0;
+
+	/*
+	 * UPSERTs cannot change secondary index parts neither
+	 * can they produce deferred DELETEs, so we skip them.
+	 */
+	if (vy_stmt_type(stmt) == IPROTO_UPSERT) {
+		assert((vy_stmt_flags(stmt) & VY_STMT_DEFERRED_DELETE) == 0);
+		return 0;
+	}
+	/*
+	 * Invoke the callback to generate a deferred DELETE
+	 * in case the current tuple was overwritten.
+	 */
+	if (stream->deferred_delete_stmt != NULL) {
+		struct vy_deferred_delete_handler *handler =
+				stream->deferred_delete_handler;
+		if (handler != NULL && vy_stmt_type(stmt) != IPROTO_DELETE &&
+		    handler->iface->process(handler, stmt,
+					    stream->deferred_delete_stmt) != 0)
+			return -1;
+		vy_stmt_unref_if_possible(stream->deferred_delete_stmt);
+		stream->deferred_delete_stmt = NULL;
+	}
+	/*
+	 * Remember the current statement if it is marked with
+	 * VY_STMT_DEFERRED_DELETE so that we can use it to
+	 * generate a DELETE for the overwritten tuple when this
+	 * function is called next time.
+	 */
+	if ((vy_stmt_flags(stmt) & VY_STMT_DEFERRED_DELETE) != 0) {
+		assert(vy_stmt_type(stmt) == IPROTO_DELETE ||
+		       vy_stmt_type(stmt) == IPROTO_REPLACE);
+		vy_stmt_ref_if_possible(stmt);
+		stream->deferred_delete_stmt = stmt;
+	}
+	return 0;
+}
+
+/**
  * Build the history of the current key.
  * Apply optimizations 1, 2 and 3 (@sa vy_write_iterator.h).
  * When building a history, some statements can be
@@ -578,6 +655,7 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream,
 	*count = 0;
 	*is_first_insert = false;
 	assert(stream->stmt_i == -1);
+	assert(stream->deferred_delete_stmt == NULL);
 	struct heap_node *node = vy_source_heap_top(&stream->src_heap);
 	if (node == NULL)
 		return 0; /* no more data */
@@ -630,6 +708,10 @@ vy_write_iterator_build_history(struct vy_write_iterator *stream,
 				*is_first_insert = true;
 		}
 
+		rc = vy_write_iterator_deferred_delete(stream, src->tuple);
+		if (rc != 0)
+			break;
+
 		if (vy_stmt_lsn(src->tuple) > current_rv_lsn) {
 			/*
 			 * Skip statements invisible to the current read
@@ -710,6 +792,17 @@ next_lsn:
 			break;
 	}
 
+	/*
+	 * No point in keeping the last VY_STMT_DEFERRED_DELETE
+	 * statement around if this is major compaction, because
+	 * there's no tuple it could overwrite.
+	 */
+	if (rc == 0 && stream->is_last_level &&
+	    stream->deferred_delete_stmt != NULL) {
+		vy_stmt_unref_if_possible(stream->deferred_delete_stmt);
+		stream->deferred_delete_stmt = NULL;
+	}
+
 	vy_source_heap_delete(&stream->src_heap, &end_of_key_src.heap_node);
 	vy_stmt_unref_if_possible(end_of_key_src.tuple);
 	return rc;
@@ -794,6 +887,23 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *hint,
 	rv->history = NULL;
 	result->tuple = NULL;
 	assert(result->next == NULL);
+	/*
+	 * The write iterator generates deferred DELETEs for all
+	 * VY_STMT_DEFERRED_DELETE statements, except, may be,
+	 * the last seen one. Clear the flag for all other output
+	 * statements so as not to generate the same DELETEs on
+	 * the next compaction.
+	 */
+	uint8_t flags = vy_stmt_flags(rv->tuple);
+	if ((flags & VY_STMT_DEFERRED_DELETE) != 0 &&
+	    rv->tuple != stream->deferred_delete_stmt) {
+		if (!vy_stmt_is_refable(rv->tuple)) {
+			rv->tuple = vy_stmt_dup(rv->tuple);
+			if (rv->tuple == NULL)
+				return -1;
+		}
+		vy_stmt_set_flags(rv->tuple, flags & ~VY_STMT_DEFERRED_DELETE);
+	}
 	if (hint != NULL) {
 		/* Not the first statement. */
 		return 0;
@@ -918,6 +1028,28 @@ vy_write_iterator_next(struct vy_stmt_stream *vstream,
 	*ret = vy_write_iterator_pop_read_view_stmt(stream);
 	if (*ret != NULL)
 		return 0;
+	/*
+	 * If we didn't generate a deferred DELETE corresponding to
+	 * the last seen VY_STMT_DEFERRED_DELETE statement, we must
+	 * include it into the output, because there still might be
+	 * an overwritten tuple in an older source.
+	 */
+	if (stream->deferred_delete_stmt != NULL) {
+		if (stream->deferred_delete_stmt == stream->last_stmt) {
+			/*
+			 * The statement was returned via a read view.
+			 * Nothing to do.
+			 */
+			vy_stmt_unref_if_possible(stream->deferred_delete_stmt);
+			stream->deferred_delete_stmt = NULL;
+		} else {
+			if (stream->last_stmt != NULL)
+				vy_stmt_unref_if_possible(stream->last_stmt);
+			*ret = stream->last_stmt = stream->deferred_delete_stmt;
+			stream->deferred_delete_stmt = NULL;
+			return 0;
+		}
+	}
 
 	/* Build the next key sequence. */
 	stream->stmt_i = -1;
diff --git a/src/box/vy_write_iterator.h b/src/box/vy_write_iterator.h
index ea14b07a..5214b60c 100644
--- a/src/box/vy_write_iterator.h
+++ b/src/box/vy_write_iterator.h
@@ -213,6 +213,7 @@
  */
 
 struct vy_write_iterator;
+struct vy_deferred_delete_handler;
 struct key_def;
 struct tuple_format;
 struct tuple;
@@ -220,6 +221,41 @@ struct vy_mem;
 struct vy_slice;
 
 /**
+ * Callback invoked by the write iterator for tuples that were
+ * overwritten or deleted in the primary index without generating
+ * a DELETE statement for secondary indexes. It is supposed to
+ * produce a DELETE statement and insert it into secondary indexes.
+ *
+ * @param handler  Deferred DELETE handler.
+ * @param old_stmt Overwritten tuple.
+ * @param new_stmt Statement that overwrote @old_stmt.
+ *
+ * @retval  0 Success.
+ * @retval -1 Error.
+ *
+ * @sa VY_STMT_DEFERRED_DELETE.
+ */
+typedef int
+(*vy_deferred_delete_process_f)(struct vy_deferred_delete_handler *handler,
+				struct tuple *old_stmt, struct tuple *new_stmt);
+
+/**
+ * Callack invoked by the write iterator to destroy a deferred
+ * DELETE handler when the iteration is stopped.
+ */
+typedef void
+(*vy_deferred_delete_destroy_f)(struct vy_deferred_delete_handler *handler);
+
+struct vy_deferred_delete_handler_iface {
+	vy_deferred_delete_process_f process;
+	vy_deferred_delete_destroy_f destroy;
+};
+
+struct vy_deferred_delete_handler {
+	const struct vy_deferred_delete_handler_iface *iface;
+};
+
+/**
  * Open an empty write iterator. To add sources to the iterator
  * use vy_write_iterator_add_* functions.
  * @param cmp_def - key definition for tuple compare.
@@ -227,13 +263,16 @@ struct vy_slice;
  * @param LSM tree is_primary - set if this iterator is for a primary index.
  * @param is_last_level - there is no older level than the one we're writing to.
  * @param read_views - Opened read views.
+ * @param handler - Deferred DELETE handler or NULL if no deferred DELETEs is
+ * expected. Only relevant to primary index compaction. For secondary indexes
+ * this argument must be set to NULL.
  * @return the iterator or NULL on error (diag is set).
  */
 struct vy_stmt_stream *
 vy_write_iterator_new(const struct key_def *cmp_def,
-		      struct tuple_format *format,
-		      bool is_primary, bool is_last_level,
-		      struct rlist *read_views);
+		      struct tuple_format *format, bool is_primary,
+		      bool is_last_level, struct rlist *read_views,
+		      struct vy_deferred_delete_handler *handler);
 
 /**
  * Add a mem as a source to the iterator.
diff --git a/test/unit/vy_iterators_helper.c b/test/unit/vy_iterators_helper.c
index 642d8bf2..89603376 100644
--- a/test/unit/vy_iterators_helper.c
+++ b/test/unit/vy_iterators_helper.c
@@ -136,6 +136,7 @@ vy_new_simple_stmt(struct tuple_format *format,
 	}
 	free(buf);
 	vy_stmt_set_lsn(ret, templ->lsn);
+	vy_stmt_set_flags(ret, templ->flags);
 	if (templ->optimize_update)
 		vy_stmt_set_column_mask(ret, 0);
 	return ret;
@@ -277,6 +278,10 @@ vy_stmt_are_same(const struct tuple *actual,
 		tuple_unref(tmp);
 		return false;
 	}
+	if (vy_stmt_flags(actual) != expected->flags) {
+		tuple_unref(tmp);
+		return false;
+	}
 	bool rc = memcmp(a, b, a_len) == 0;
 	tuple_unref(tmp);
 	return rc;
diff --git a/test/unit/vy_iterators_helper.h b/test/unit/vy_iterators_helper.h
index e38ec295..24641df3 100644
--- a/test/unit/vy_iterators_helper.h
+++ b/test/unit/vy_iterators_helper.h
@@ -43,10 +43,16 @@
 #define vyend 99999999
 #define MAX_FIELDS_COUNT 100
 #define STMT_TEMPLATE(lsn, type, ...) \
-{ { __VA_ARGS__, vyend }, IPROTO_##type, lsn, false, 0, 0 }
+{ { __VA_ARGS__, vyend }, IPROTO_##type, lsn, false, 0, 0, 0 }
 
 #define STMT_TEMPLATE_OPTIMIZED(lsn, type, ...) \
-{ { __VA_ARGS__, vyend }, IPROTO_##type, lsn, true, 0, 0 }
+{ { __VA_ARGS__, vyend }, IPROTO_##type, lsn, true, 0, 0, 0 }
+
+#define STMT_TEMPLATE_FLAGS(lsn, type, flags, ...) \
+{ { __VA_ARGS__, vyend }, IPROTO_##type, lsn, false, flags, 0, 0 }
+
+#define STMT_TEMPLATE_DEFERRED_DELETE(lsn, type, ...) \
+STMT_TEMPLATE_FLAGS(lsn, type, VY_STMT_DEFERRED_DELETE, __VA_ARGS__)
 
 extern struct tuple_format_vtab vy_tuple_format_vtab;
 extern struct tuple_format *vy_key_format;
@@ -82,6 +88,8 @@ struct vy_stmt_template {
 	 * to skip it in the write_iterator.
 	 */
 	bool optimize_update;
+	/** Statement flags. */
+	uint8_t flags;
 	/*
 	 * In case of upsert it is possible to use only one 'add' operation.
 	 * This is the column number of the operation.
diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c
index b9b7d6ff..87f26900 100644
--- a/test/unit/vy_point_lookup.c
+++ b/test/unit/vy_point_lookup.c
@@ -192,7 +192,7 @@ test_basic()
 	}
 	struct vy_stmt_stream *write_stream
 		= vy_write_iterator_new(pk->cmp_def, pk->disk_format,
-					true, true, &read_views);
+					true, true, &read_views, NULL);
 	vy_write_iterator_new_mem(write_stream, run_mem);
 	struct vy_run *run = vy_run_new(&run_env, 1);
 	isnt(run, NULL, "vy_run_new");
@@ -225,7 +225,7 @@ test_basic()
 	}
 	write_stream
 		= vy_write_iterator_new(pk->cmp_def, pk->disk_format,
-					true, true, &read_views);
+					true, true, &read_views, NULL);
 	vy_write_iterator_new_mem(write_stream, run_mem);
 	run = vy_run_new(&run_env, 2);
 	isnt(run, NULL, "vy_run_new");
diff --git a/test/unit/vy_write_iterator.c b/test/unit/vy_write_iterator.c
index 25a346af..337e27ac 100644
--- a/test/unit/vy_write_iterator.c
+++ b/test/unit/vy_write_iterator.c
@@ -3,6 +3,65 @@
 #include "vy_write_iterator.h"
 #include "vy_iterators_helper.h"
 
+enum { MAX_DEFERRED_COUNT = 32 };
+
+/** Test deferred delete handler. */
+struct test_handler {
+	struct vy_deferred_delete_handler base;
+	/** Format to use for making DELETEs. */
+	struct tuple_format *format;
+	/** Deferred DELETEs generated by the write iterator. */
+	struct tuple *stmt[MAX_DEFERRED_COUNT];
+	/** Number of elements in @stmt array. */
+	int count;
+};
+
+/**
+ * Callback passed to the write iterator for generating deferred
+ * DELETE statements.
+ */
+static int
+test_handler_process(struct vy_deferred_delete_handler *base,
+		     struct tuple *old_stmt, struct tuple *new_stmt)
+{
+	struct test_handler *handler = (struct test_handler *)base;
+
+	fail_if(vy_stmt_type(old_stmt) == IPROTO_DELETE);
+	fail_if(vy_stmt_type(new_stmt) != IPROTO_DELETE &&
+		vy_stmt_type(new_stmt) != IPROTO_REPLACE);
+
+	struct tuple *delete = vy_stmt_new_surrogate_delete(handler->format,
+							    old_stmt);
+	fail_if(delete == NULL);
+	vy_stmt_set_lsn(delete, vy_stmt_lsn(new_stmt));
+
+	fail_if(handler->count >= MAX_DEFERRED_COUNT);
+	handler->stmt[handler->count++] = delete;
+	return 0;
+}
+
+static void
+test_handler_destroy(struct vy_deferred_delete_handler *base)
+{
+	struct test_handler *handler = (struct test_handler *)base;
+	for (int i = 0; i < handler->count; i++)
+		tuple_unref(handler->stmt[i]);
+}
+
+static const struct vy_deferred_delete_handler_iface test_handler_iface = {
+	.process = test_handler_process,
+	.destroy = test_handler_destroy,
+};
+
+static void
+test_handler_create(struct test_handler *handler, struct tuple_format *format)
+{
+	memset(handler, 0, sizeof(*handler));
+	handler->base.iface = &test_handler_iface;
+	handler->format = format;
+	tuple_format_ref(format);
+}
+
 /**
  * Create a mem with the specified content, iterate over it with
  * write_iterator and compare actual result statements with the
@@ -12,6 +71,8 @@
  * @param content_count Size of the @content.
  * @param expected Expected results of the iteration.
  * @param expected_count Size of the @expected.
+ * @param deferred Expected deferred DELETEs returned by the iteration.
+ * @param deferred_count Size of @deferred.
  * @param vlsns Read view lsns for the write iterator.
  * @param vlsns_count Size of the @vlsns.
  * @param is_primary True, if the new mem belongs to the primary
@@ -23,6 +84,8 @@ compare_write_iterator_results(const struct vy_stmt_template *content,
 			       int content_count,
 			       const struct vy_stmt_template *expected,
 			       int expected_count,
+			       const struct vy_stmt_template *deferred,
+			       int deferred_count,
 			       const int *vlsns, int vlsns_count,
 			       bool is_primary, bool is_last_level)
 {
@@ -38,8 +101,13 @@ compare_write_iterator_results(const struct vy_stmt_template *content,
 	fail_if(rv_array == NULL);
 	init_read_views_list(&rv_list, rv_array, vlsns, vlsns_count);
 
-	struct vy_stmt_stream *wi = vy_write_iterator_new(key_def, mem->format,
-					is_primary, is_last_level, &rv_list);
+	struct test_handler handler;
+	test_handler_create(&handler, mem->format);
+
+	struct vy_stmt_stream *wi;
+	wi = vy_write_iterator_new(key_def, mem->format, is_primary,
+				   is_last_level, &rv_list,
+				   is_primary ? &handler.base : NULL);
 	fail_if(wi == NULL);
 	fail_if(vy_write_iterator_new_mem(wi, mem) != 0);
 
@@ -58,7 +126,19 @@ compare_write_iterator_results(const struct vy_stmt_template *content,
 	} while (ret != NULL);
 	ok(i == expected_count, "correct results count");
 
+	for (i = 0; i < handler.count; i++) {
+		fail_if(i >= deferred_count);
+		ok(vy_stmt_are_same(handler.stmt[i], &deferred[i],
+				    handler.format, NULL),
+		   "deferred stmt %d is correct", i);
+	}
+	if (deferred != NULL) {
+		ok(handler.count == deferred_count,
+		   "correct deferred stmt count");
+	}
+
 	/* Clean up */
+	wi->iface->stop(wi);
 	wi->iface->close(wi);
 	vy_mem_delete(mem);
 	box_key_def_delete(key_def);
@@ -69,7 +149,7 @@ void
 test_basic(void)
 {
 	header();
-	plan(46);
+	plan(66);
 {
 /*
  * STATEMENT: REPL REPL REPL  DEL  REPL  REPL  REPL  REPL  REPL  REPL
@@ -98,7 +178,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, true);
 }
 {
@@ -132,7 +212,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -160,7 +240,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, true);
 }
 {
@@ -180,7 +260,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, true);
 }
 {
@@ -204,7 +284,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, true);
 }
 {
@@ -227,7 +307,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -255,7 +335,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, false, true);
 }
 {
@@ -275,7 +355,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, false, false);
 }
 {
@@ -302,7 +382,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -330,7 +410,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, true);
 }
 {
@@ -355,7 +435,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, false, false);
 }
 {
@@ -380,7 +460,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -410,7 +490,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -451,7 +531,7 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
-				       expected, expected_count,
+				       expected, expected_count, NULL, 0,
 				       vlsns, vlsns_count, true, false);
 }
 {
@@ -491,7 +571,147 @@ test_basic(void)
 	int expected_count = sizeof(expected) / sizeof(expected[0]);
 	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
 	compare_write_iterator_results(content, content_count,
+				       expected, expected_count, NULL, 0,
+				       vlsns, vlsns_count, true, false);
+}
+{
+/*
+ * STATEMENT:    REPL DEL REPL REPL DEL DEL DEL REPL DEL INS DEL INS REPL
+ * LSN:            4   5    6    7   8   9  10   11  12  13  14  15   16
+ * DEFERRED DEL:   +   +    +        +   +        +           +        +
+ * READ VIEW:          *         *                *
+ *
+ * is_last_level = true
+ *
+ * Test generation of deferred DELETEs for various combinations
+ * of input statements.
+ */
+	const struct vy_stmt_template content[] = {
+		STMT_TEMPLATE_DEFERRED_DELETE(4, REPLACE, 1, 2),
+		STMT_TEMPLATE_DEFERRED_DELETE(5, DELETE, 1),
+		STMT_TEMPLATE_DEFERRED_DELETE(6, REPLACE, 1, 3),
+		STMT_TEMPLATE(7, REPLACE, 1, 4),
+		STMT_TEMPLATE_DEFERRED_DELETE(8, DELETE, 1),
+		STMT_TEMPLATE_DEFERRED_DELETE(9, DELETE, 1),
+		STMT_TEMPLATE(10, DELETE, 1),
+		STMT_TEMPLATE_DEFERRED_DELETE(11, REPLACE, 1, 5),
+		STMT_TEMPLATE(12, DELETE, 1),
+		STMT_TEMPLATE(13, INSERT, 1, 6),
+		STMT_TEMPLATE_DEFERRED_DELETE(14, DELETE, 1),
+		STMT_TEMPLATE(15, INSERT, 1, 7),
+		STMT_TEMPLATE_DEFERRED_DELETE(16, REPLACE, 1, 8),
+	};
+	const struct vy_stmt_template expected[] = {
+		STMT_TEMPLATE(16, REPLACE, 1, 8),
+		STMT_TEMPLATE(11, REPLACE, 1, 5),
+		STMT_TEMPLATE(7, REPLACE, 1, 4),
+	};
+	const struct vy_stmt_template deferred[] = {
+		STMT_TEMPLATE(16, DELETE, 1, 7),
+		STMT_TEMPLATE(14, DELETE, 1, 6),
+		STMT_TEMPLATE(8, DELETE, 1, 4),
+		STMT_TEMPLATE(5, DELETE, 1, 2),
+	};
+	const int vlsns[] = {5, 7, 11};
+	int content_count = sizeof(content) / sizeof(content[0]);
+	int expected_count = sizeof(expected) / sizeof(expected[0]);
+	int deferred_count = sizeof(deferred) / sizeof(deferred[0]);
+	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
+	compare_write_iterator_results(content, content_count,
+				       expected, expected_count,
+				       deferred, deferred_count,
+				       vlsns, vlsns_count, true, true);
+}
+{
+/*
+ * STATEMENT:    REPL REPL DEL
+ * LSN:            7    8   9
+ * DEFERRED DEL:   +
+ *
+ * is_last_level = false
+ *
+ * Check that the oldest VY_STMT_DEFERRED_DELETE statement is
+ * preserved in case it doesn't overwrite a terminal statement
+ * and this is not a major compaction.
+ */
+	const struct vy_stmt_template content[] = {
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+		STMT_TEMPLATE(8, REPLACE, 1, 2),
+		STMT_TEMPLATE(9, DELETE, 1, 3),
+	};
+	const struct vy_stmt_template expected[] = {
+		STMT_TEMPLATE(9, DELETE, 1, 1),
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+	};
+	const struct vy_stmt_template deferred[] = {};
+	const int vlsns[] = {};
+	int content_count = sizeof(content) / sizeof(content[0]);
+	int expected_count = sizeof(expected) / sizeof(expected[0]);
+	int deferred_count = sizeof(deferred) / sizeof(deferred[0]);
+	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
+	compare_write_iterator_results(content, content_count,
+				       expected, expected_count,
+				       deferred, deferred_count,
+				       vlsns, vlsns_count, true, false);
+}
+{
+/*
+ * STATEMENT:    REPL REPL DEL
+ * LSN:            7    8   9
+ * DEFERRED DEL:   +
+ * READ VIEW:      *
+ *
+ * is_last_level = false
+ *
+ * Check that the oldest VY_STMT_DEFERRED_DELETE statement is
+ * not returned twice if it is referenced by a read view.
+ */
+	const struct vy_stmt_template content[] = {
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+		STMT_TEMPLATE(8, REPLACE, 1, 2),
+		STMT_TEMPLATE(9, DELETE, 1, 3),
+	};
+	const struct vy_stmt_template expected[] = {
+		STMT_TEMPLATE(9, DELETE, 1, 1),
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+	};
+	const struct vy_stmt_template deferred[] = {};
+	const int vlsns[] = {7};
+	int content_count = sizeof(content) / sizeof(content[0]);
+	int expected_count = sizeof(expected) / sizeof(expected[0]);
+	int deferred_count = sizeof(deferred) / sizeof(deferred[0]);
+	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
+	compare_write_iterator_results(content, content_count,
+				       expected, expected_count,
+				       deferred, deferred_count,
+				       vlsns, vlsns_count, true, false);
+}
+{
+/*
+ * STATEMENT:    REPL
+ * LSN:            7
+ * DEFERRED DEL:   +
+ *
+ * is_last_level = false
+ *
+ * Check that the oldest VY_STMT_DEFERRED_DELETE statement is
+ * not returned twice if it is the only statement in the output.
+ */
+	const struct vy_stmt_template content[] = {
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+	};
+	const struct vy_stmt_template expected[] = {
+		STMT_TEMPLATE_DEFERRED_DELETE(7, REPLACE, 1, 1),
+	};
+	const struct vy_stmt_template deferred[] = {};
+	const int vlsns[] = {};
+	int content_count = sizeof(content) / sizeof(content[0]);
+	int expected_count = sizeof(expected) / sizeof(expected[0]);
+	int deferred_count = sizeof(deferred) / sizeof(deferred[0]);
+	int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]);
+	compare_write_iterator_results(content, content_count,
 				       expected, expected_count,
+				       deferred, deferred_count,
 				       vlsns, vlsns_count, true, false);
 }
 	fiber_gc();
diff --git a/test/unit/vy_write_iterator.result b/test/unit/vy_write_iterator.result
index 56d8cb1f..4f95aeb9 100644
--- a/test/unit/vy_write_iterator.result
+++ b/test/unit/vy_write_iterator.result
@@ -1,5 +1,5 @@
 	*** test_basic ***
-1..46
+1..66
 ok 1 - stmt 0 is correct
 ok 2 - stmt 1 is correct
 ok 3 - stmt 2 is correct
@@ -46,4 +46,24 @@ ok 43 - stmt 0 is correct
 ok 44 - stmt 1 is correct
 ok 45 - stmt 2 is correct
 ok 46 - correct results count
+ok 47 - stmt 0 is correct
+ok 48 - stmt 1 is correct
+ok 49 - stmt 2 is correct
+ok 50 - correct results count
+ok 51 - deferred stmt 0 is correct
+ok 52 - deferred stmt 1 is correct
+ok 53 - deferred stmt 2 is correct
+ok 54 - deferred stmt 3 is correct
+ok 55 - correct deferred stmt count
+ok 56 - stmt 0 is correct
+ok 57 - stmt 1 is correct
+ok 58 - correct results count
+ok 59 - correct deferred stmt count
+ok 60 - stmt 0 is correct
+ok 61 - stmt 1 is correct
+ok 62 - correct results count
+ok 63 - correct deferred stmt count
+ok 64 - stmt 0 is correct
+ok 65 - correct results count
+ok 66 - correct deferred stmt count
 	*** test_basic: done ***
-- 
2.11.0




More information about the Tarantool-patches mailing list