From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [RFC PATCH 16/23] vinyl: allow to skip certain statements on read Date: Sun, 8 Jul 2018 19:48:47 +0300 Message-Id: In-Reply-To: In-Reply-To: References: To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: In the scope of #2129 we will defer insertion of certain DELETE statements into secondary indexes until primary index compaction. However, by the time we invoke compaction, new statements might have been inserted into the space for the same set of keys. If that happens, insertion of a deferred DELETE will break the invariant which the read iterator relies upon: that for any key older sources store older statements. To avoid that, let's add a new per statement flag, VY_STMT_SKIP_READ, and make the read iterator ignore statements marked with it. Needed for #2129 --- src/box/vy_mem.c | 19 ++++++++++++------- src/box/vy_run.c | 7 ++++++- src/box/vy_stmt.h | 10 ++++++++++ 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/box/vy_mem.c b/src/box/vy_mem.c index 7c9690ef..dadd73cb 100644 --- a/src/box/vy_mem.c +++ b/src/box/vy_mem.c @@ -323,7 +323,8 @@ vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr, assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos)); assert(itr->curr_stmt == vy_mem_iterator_curr_stmt(itr)); const struct key_def *cmp_def = itr->mem->cmp_def; - while (vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn) { + while (vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn || + vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) { if (vy_mem_iterator_step(itr, iterator_type) != 0 || (iterator_type == ITER_EQ && vy_stmt_compare(key, itr->curr_stmt, cmp_def))) { @@ -340,6 +341,7 @@ vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr, *vy_mem_tree_iterator_get_elem(&itr->mem->tree, &prev_pos); if (vy_stmt_lsn(prev_stmt) > (**itr->read_view).vlsn || + vy_stmt_flags(prev_stmt) & VY_STMT_SKIP_READ || vy_tuple_compare(itr->curr_stmt, prev_stmt, cmp_def) != 0) break; @@ -495,18 +497,21 @@ vy_mem_iterator_next_lsn(struct vy_mem_iterator *itr) const struct key_def *cmp_def = itr->mem->cmp_def; struct vy_mem_tree_iterator next_pos = itr->curr_pos; +next: vy_mem_tree_iterator_next(&itr->mem->tree, &next_pos); if (vy_mem_tree_iterator_is_invalid(&next_pos)) return 1; /* EOF */ const struct tuple *next_stmt; next_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, &next_pos); - if (vy_tuple_compare(itr->curr_stmt, next_stmt, cmp_def) == 0) { - itr->curr_pos = next_pos; - itr->curr_stmt = next_stmt; - return 0; - } - return 1; + if (vy_tuple_compare(itr->curr_stmt, next_stmt, cmp_def) != 0) + return 1; + + itr->curr_pos = next_pos; + itr->curr_stmt = next_stmt; + if (vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) + goto next; + return 0; } /** diff --git a/src/box/vy_run.c b/src/box/vy_run.c index dc837c2b..6f7fb82a 100644 --- a/src/box/vy_run.c +++ b/src/box/vy_run.c @@ -1157,7 +1157,8 @@ vy_run_iterator_find_lsn(struct vy_run_iterator *itr, assert(itr->curr_stmt != NULL); assert(itr->curr_pos.page_no < slice->run->info.page_count); - while (vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn) { + while (vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn || + vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) { if (vy_run_iterator_next_pos(itr, iterator_type, &itr->curr_pos) != 0) { vy_run_iterator_stop(itr); @@ -1183,6 +1184,7 @@ vy_run_iterator_find_lsn(struct vy_run_iterator *itr, &test_stmt) != 0) return -1; if (vy_stmt_lsn(test_stmt) > (**itr->read_view).vlsn || + vy_stmt_flags(test_stmt) & VY_STMT_SKIP_READ || vy_tuple_compare(itr->curr_stmt, test_stmt, cmp_def) != 0) { tuple_unref(test_stmt); @@ -1478,6 +1480,7 @@ vy_run_iterator_next_lsn(struct vy_run_iterator *itr, struct tuple **ret) assert(itr->curr_pos.page_no < itr->slice->run->info.page_count); struct vy_run_iterator_pos next_pos; +next: if (vy_run_iterator_next_pos(itr, ITER_GE, &next_pos) != 0) { vy_run_iterator_stop(itr); return 0; @@ -1495,6 +1498,8 @@ vy_run_iterator_next_lsn(struct vy_run_iterator *itr, struct tuple **ret) tuple_unref(itr->curr_stmt); itr->curr_stmt = next_key; itr->curr_pos = next_pos; + if (vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) + goto next; vy_stmt_counter_acct_tuple(&itr->stat->get, itr->curr_stmt); *ret = itr->curr_stmt; diff --git a/src/box/vy_stmt.h b/src/box/vy_stmt.h index 8de8aa84..878a27f7 100644 --- a/src/box/vy_stmt.h +++ b/src/box/vy_stmt.h @@ -87,6 +87,16 @@ enum { * DELETE statements for them during compaction. */ VY_STMT_DEFERRED_DELETE = 1 << 0, + /** + * Statements that have this flag set are ignored by the + * read iterator. + * + * We set this flag for deferred DELETE statements, because + * they may violate the invariant which the read relies upon: + * the older a source, the older statements it stores for a + * particular key. + */ + VY_STMT_SKIP_READ = 1 << 1, }; /** -- 2.11.0