Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 20/25] vinyl: allow to skip certain statements on read
Date: Fri, 27 Jul 2018 14:30:00 +0300	[thread overview]
Message-ID: <ebcbb8de39327e1b974ba46f5e9f87058ac51e24.1532689066.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1532689065.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1532689065.git.vdavydov.dev@gmail.com>

In the scope of #2129 we will defer insertion of certain DELETE
statements into secondary indexes until primary index compaction.
However, by the time we invoke compaction, new statements might
have been inserted into the space for the same set of keys.
If that happens, insertion of a deferred DELETE will break the
invariant which the read iterator relies upon: that for any key
older sources store older statements. To avoid that, let's add
a new per statement flag, VY_STMT_SKIP_READ, and make the read
iterator ignore statements marked with it.

Needed for #2129
---
 src/box/vy_mem.c  | 19 ++++++++++++-------
 src/box/vy_run.c  |  7 ++++++-
 src/box/vy_stmt.h | 10 ++++++++++
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/src/box/vy_mem.c b/src/box/vy_mem.c
index 3313ae54..0c46b93c 100644
--- a/src/box/vy_mem.c
+++ b/src/box/vy_mem.c
@@ -324,7 +324,8 @@ vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr,
 	assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos));
 	assert(itr->curr_stmt == vy_mem_iterator_curr_stmt(itr));
 	const struct key_def *cmp_def = itr->mem->cmp_def;
-	while (vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn) {
+	while (vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn ||
+	       vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) {
 		if (vy_mem_iterator_step(itr, iterator_type) != 0 ||
 		    (iterator_type == ITER_EQ &&
 		     vy_stmt_compare(key, itr->curr_stmt, cmp_def))) {
@@ -341,6 +342,7 @@ vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr,
 				*vy_mem_tree_iterator_get_elem(&itr->mem->tree,
 							       &prev_pos);
 			if (vy_stmt_lsn(prev_stmt) > (**itr->read_view).vlsn ||
+			    vy_stmt_flags(prev_stmt) & VY_STMT_SKIP_READ ||
 			    vy_tuple_compare(itr->curr_stmt, prev_stmt,
 					     cmp_def) != 0)
 				break;
@@ -496,18 +498,21 @@ vy_mem_iterator_next_lsn(struct vy_mem_iterator *itr)
 	const struct key_def *cmp_def = itr->mem->cmp_def;
 
 	struct vy_mem_tree_iterator next_pos = itr->curr_pos;
+next:
 	vy_mem_tree_iterator_next(&itr->mem->tree, &next_pos);
 	if (vy_mem_tree_iterator_is_invalid(&next_pos))
 		return 1; /* EOF */
 
 	const struct tuple *next_stmt;
 	next_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, &next_pos);
-	if (vy_tuple_compare(itr->curr_stmt, next_stmt, cmp_def) == 0) {
-		itr->curr_pos = next_pos;
-		itr->curr_stmt = next_stmt;
-		return 0;
-	}
-	return 1;
+	if (vy_tuple_compare(itr->curr_stmt, next_stmt, cmp_def) != 0)
+		return 1;
+
+	itr->curr_pos = next_pos;
+	itr->curr_stmt = next_stmt;
+	if (vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ)
+		goto next;
+	return 0;
 }
 
 /**
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index eae3e74d..f107e3a9 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -1157,7 +1157,8 @@ vy_run_iterator_find_lsn(struct vy_run_iterator *itr,
 	assert(itr->curr_stmt != NULL);
 	assert(itr->curr_pos.page_no < slice->run->info.page_count);
 
-	while (vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn) {
+	while (vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn ||
+	       vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) {
 		if (vy_run_iterator_next_pos(itr, iterator_type,
 					     &itr->curr_pos) != 0) {
 			vy_run_iterator_stop(itr);
@@ -1183,6 +1184,7 @@ vy_run_iterator_find_lsn(struct vy_run_iterator *itr,
 						 &test_stmt) != 0)
 				return -1;
 			if (vy_stmt_lsn(test_stmt) > (**itr->read_view).vlsn ||
+			    vy_stmt_flags(test_stmt) & VY_STMT_SKIP_READ ||
 			    vy_tuple_compare(itr->curr_stmt, test_stmt,
 					     cmp_def) != 0) {
 				tuple_unref(test_stmt);
@@ -1478,6 +1480,7 @@ vy_run_iterator_next_lsn(struct vy_run_iterator *itr, struct tuple **ret)
 	assert(itr->curr_pos.page_no < itr->slice->run->info.page_count);
 
 	struct vy_run_iterator_pos next_pos;
+next:
 	if (vy_run_iterator_next_pos(itr, ITER_GE, &next_pos) != 0) {
 		vy_run_iterator_stop(itr);
 		return 0;
@@ -1495,6 +1498,8 @@ vy_run_iterator_next_lsn(struct vy_run_iterator *itr, struct tuple **ret)
 	tuple_unref(itr->curr_stmt);
 	itr->curr_stmt = next_key;
 	itr->curr_pos = next_pos;
+	if (vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ)
+		goto next;
 
 	vy_stmt_counter_acct_tuple(&itr->stat->get, itr->curr_stmt);
 	*ret = itr->curr_stmt;
diff --git a/src/box/vy_stmt.h b/src/box/vy_stmt.h
index 8de8aa84..878a27f7 100644
--- a/src/box/vy_stmt.h
+++ b/src/box/vy_stmt.h
@@ -87,6 +87,16 @@ enum {
 	 * DELETE statements for them during compaction.
 	 */
 	VY_STMT_DEFERRED_DELETE		= 1 << 0,
+	/**
+	 * Statements that have this flag set are ignored by the
+	 * read iterator.
+	 *
+	 * We set this flag for deferred DELETE statements, because
+	 * they may violate the invariant which the read relies upon:
+	 * the older a source, the older statements it stores for a
+	 * particular key.
+	 */
+	VY_STMT_SKIP_READ		= 1 << 1,
 };
 
 /**
-- 
2.11.0

  parent reply	other threads:[~2018-07-27 11:30 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-27 11:29 [PATCH 00/25] vinyl: eliminate disk read on REPLACE/DELETE Vladimir Davydov
2018-07-27 11:29 ` [PATCH 01/25] vinyl: make point lookup always return the latest tuple version Vladimir Davydov
2018-07-27 11:29 ` [PATCH 02/25] vinyl: simplify vy_squash_process Vladimir Davydov
2018-07-27 11:29 ` [PATCH 03/25] vinyl: always get full tuple from pk after reading from secondary index Vladimir Davydov
2018-07-27 11:29 ` [PATCH 04/25] vinyl: fold vy_replace_one and vy_replace_impl Vladimir Davydov
2018-07-27 11:29 ` [PATCH 05/25] vinyl: fold vy_delete_impl Vladimir Davydov
2018-07-27 11:29 ` [PATCH 06/25] vinyl: refactor unique check Vladimir Davydov
2018-07-27 11:29 ` [PATCH 07/25] vinyl: check key uniqueness before modifying tx write set Vladimir Davydov
2018-07-27 11:29 ` [PATCH 08/25] vinyl: remove env argument of vy_check_is_unique_{primary,secondary} Vladimir Davydov
2018-07-31 20:45   ` [tarantool-patches] " Konstantin Osipov
2018-07-27 11:29 ` [PATCH 09/25] vinyl: store full tuples in secondary index cache Vladimir Davydov
2018-07-31 20:47   ` Konstantin Osipov
2018-07-27 11:29 ` [PATCH 10/25] vinyl: do not free pending tasks on shutdown Vladimir Davydov
2018-07-31 20:48   ` Konstantin Osipov
2018-07-27 11:29 ` [PATCH 11/25] vinyl: store pointer to scheduler in struct vy_task Vladimir Davydov
2018-07-31 20:49   ` Konstantin Osipov
2018-07-27 11:29 ` [PATCH 12/25] vinyl: rename some members of vy_scheduler and vy_task struct Vladimir Davydov
2018-07-27 11:29 ` [PATCH 13/25] vinyl: use cbus for communication between scheduler and worker threads Vladimir Davydov
2018-07-27 11:29 ` [PATCH 14/25] vinyl: zap vy_scheduler::is_worker_pool_running Vladimir Davydov
2018-07-27 11:29 ` [PATCH 15/25] vinyl: rename vy_task::status to is_failed Vladimir Davydov
2018-07-27 11:29 ` [PATCH 16/25] xrow: allow to store flags in DML requests Vladimir Davydov
2018-07-27 11:29 ` [PATCH 17/25] vinyl: pin last statement returned by write iterator explicitly Vladimir Davydov
2018-07-27 11:29 ` [PATCH 18/25] vinyl: teach write iterator to return overwritten tuples Vladimir Davydov
2018-07-27 11:29 ` [PATCH 19/25] vinyl: prepare write iterator heap comparator for deferred DELETEs Vladimir Davydov
2018-07-27 11:30 ` Vladimir Davydov [this message]
2018-07-27 11:30 ` [PATCH 21/25] vinyl: add function to create surrogate deletes from raw msgpack Vladimir Davydov
2018-07-27 11:30 ` [PATCH 22/25] vinyl: remove pointless assertion from vy_stmt_new_surrogate_delete Vladimir Davydov
2018-07-27 11:30 ` [PATCH 23/25] txn: add helper to detect transaction boundaries Vladimir Davydov
2018-07-31 20:52   ` [tarantool-patches] " Konstantin Osipov
2018-07-27 11:30 ` [PATCH 24/25] Introduce _vinyl_deferred_delete system space Vladimir Davydov
2018-07-31 20:54   ` Konstantin Osipov
2018-08-01 14:00     ` Vladimir Davydov
2018-08-01 20:25       ` [tarantool-patches] " Konstantin Osipov
2018-08-02  9:43         ` Vladimir Davydov
2018-08-06  8:42           ` Vladimir Davydov
2018-07-27 11:30 ` [PATCH 25/25] vinyl: eliminate disk read on REPLACE/DELETE Vladimir Davydov
2018-07-31 20:55   ` Konstantin Osipov
2018-08-01 16:03     ` Vladimir Davydov
2018-08-01 16:51     ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ebcbb8de39327e1b974ba46f5e9f87058ac51e24.1532689066.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 20/25] vinyl: allow to skip certain statements on read' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox