* [PATCH 00/12] vinyl: prepare read iterator for index rebuild
@ 2018-04-15 19:55 Vladimir Davydov
2018-04-15 19:55 ` [PATCH 01/12] vinyl: rename vy_stmt_history to vy_history Vladimir Davydov
` (12 more replies)
0 siblings, 13 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
To be able to use read iterator for building secondary indexes in vinyl,
we need it to guarantee that the returned tuple is always the newest
version, which is currently not true. Fix that.
https://github.com/tarantool/tarantool/commits/vy-prep-read-iterator-for-alter
Vladimir Davydov (12):
vinyl: rename vy_stmt_history to vy_history
vinyl: factor out vy_history_apply from vy_point_lookup_apply_history
vinyl: add vy_history_append_stmt helper
vinyl: zap iterator_src_type enum
vinyl: encapsulate key history with struct
vinyl: refine vy_history_cleanup
vinyl: move vy_history to its own source file
vinyl: use mempool for vy_history_node allocations
vinyl: consolidate skip optimization checks in read iterator
vinyl: refactor vy_read_iterator_next
vinyl: make read iterator always return newest tuple version
vinyl: zap vy_read_iterator::curr_stmt
src/box/CMakeLists.txt | 1 +
src/box/vy_cache.c | 51 ++---
src/box/vy_cache.h | 31 +--
src/box/vy_history.c | 115 ++++++++++
src/box/vy_history.h | 165 +++++++++++++
src/box/vy_lsm.c | 5 +
src/box/vy_lsm.h | 4 +-
src/box/vy_mem.c | 93 +++-----
src/box/vy_mem.h | 33 +--
src/box/vy_point_lookup.c | 228 +++---------------
src/box/vy_read_iterator.c | 560 +++++++++++++++++++--------------------------
src/box/vy_read_iterator.h | 11 +-
src/box/vy_run.c | 68 ++++--
src/box/vy_run.h | 23 +-
src/box/vy_tx.c | 55 +++--
src/box/vy_tx.h | 29 ++-
test/unit/CMakeLists.txt | 3 +
test/unit/vy_cache.c | 16 +-
test/unit/vy_mem.c | 24 +-
test/vinyl/upsert.result | 8 +-
test/vinyl/upsert.test.lua | 6 +-
21 files changed, 788 insertions(+), 741 deletions(-)
create mode 100644 src/box/vy_history.c
create mode 100644 src/box/vy_history.h
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 01/12] vinyl: rename vy_stmt_history to vy_history
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-04-15 19:55 ` [PATCH 02/12] vinyl: factor out vy_history_apply from vy_point_lookup_apply_history Vladimir Davydov
` (11 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
It's shorter, but still perfectly clear.
---
src/box/vy_point_lookup.c | 48 +++++++++++++++++++++++------------------------
1 file changed, 24 insertions(+), 24 deletions(-)
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index 0618590a..32048654 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -62,7 +62,7 @@ enum iterator_src_type {
* same key in order of decreasing lsn. The history can be represented as a
* list, the structure below describes one node of the list.
*/
-struct vy_stmt_history_node {
+struct vy_history_node {
/* Type of source that the history statement came from */
enum iterator_src_type src_type;
/* The history statement. Referenced for runs. */
@@ -75,14 +75,14 @@ struct vy_stmt_history_node {
* Allocate (region) new history node.
* @return new node or NULL on memory error (diag is set).
*/
-static struct vy_stmt_history_node *
-vy_stmt_history_node_new(void)
+static struct vy_history_node *
+vy_history_node_new(void)
{
struct region *region = &fiber()->gc;
- struct vy_stmt_history_node *node = region_alloc(region, sizeof(*node));
+ struct vy_history_node *node = region_alloc(region, sizeof(*node));
if (node == NULL)
diag_set(OutOfMemory, sizeof(*node), "region",
- "struct vy_stmt_history_node");
+ "struct vy_history_node");
return node;
}
@@ -90,9 +90,9 @@ vy_stmt_history_node_new(void)
* Unref statement if necessary, remove node from history if it's there.
*/
static void
-vy_stmt_history_cleanup(struct rlist *history, size_t region_svp)
+vy_history_cleanup(struct rlist *history, size_t region_svp)
{
- struct vy_stmt_history_node *node;
+ struct vy_history_node *node;
rlist_foreach_entry(node, history, link)
if (node->src_type == ITER_SRC_RUN)
tuple_unref(node->stmt);
@@ -105,12 +105,12 @@ vy_stmt_history_cleanup(struct rlist *history, size_t region_svp)
* i.e. REPLACE of DELETE statement.
*/
static bool
-vy_stmt_history_is_terminal(struct rlist *history)
+vy_history_is_terminal(struct rlist *history)
{
if (rlist_empty(history))
return false;
- struct vy_stmt_history_node *node =
- rlist_last_entry(history, struct vy_stmt_history_node, link);
+ struct vy_history_node *node = rlist_last_entry(history,
+ struct vy_history_node, link);
assert(vy_stmt_type(node->stmt) == IPROTO_REPLACE ||
vy_stmt_type(node->stmt) == IPROTO_DELETE ||
vy_stmt_type(node->stmt) == IPROTO_INSERT ||
@@ -136,7 +136,7 @@ vy_point_lookup_scan_txw(struct vy_lsm *lsm, struct vy_tx *tx,
return 0;
vy_stmt_counter_acct_tuple(&lsm->stat.txw.iterator.get,
txv->stmt);
- struct vy_stmt_history_node *node = vy_stmt_history_node_new();
+ struct vy_history_node *node = vy_history_node_new();
if (node == NULL)
return -1;
node->src_type = ITER_SRC_TXW;
@@ -161,7 +161,7 @@ vy_point_lookup_scan_cache(struct vy_lsm *lsm,
return 0;
vy_stmt_counter_acct_tuple(&lsm->cache.stat.get, stmt);
- struct vy_stmt_history_node *node = vy_stmt_history_node_new();
+ struct vy_history_node *node = vy_history_node_new();
if (node == NULL)
return -1;
@@ -198,7 +198,7 @@ vy_point_lookup_scan_mem(struct vy_lsm *lsm, struct vy_mem *mem,
return 0;
while (true) {
- struct vy_stmt_history_node *node = vy_stmt_history_node_new();
+ struct vy_history_node *node = vy_history_node_new();
if (node == NULL)
return -1;
@@ -208,7 +208,7 @@ vy_point_lookup_scan_mem(struct vy_lsm *lsm, struct vy_mem *mem,
node->src_type = ITER_SRC_MEM;
node->stmt = (struct tuple *)stmt;
rlist_add_tail(history, &node->link);
- if (vy_stmt_history_is_terminal(history))
+ if (vy_history_is_terminal(history))
break;
if (!vy_mem_tree_iterator_next(&mem->tree, &mem_itr))
@@ -237,7 +237,7 @@ vy_point_lookup_scan_mems(struct vy_lsm *lsm, const struct vy_read_view **rv,
int rc = vy_point_lookup_scan_mem(lsm, lsm->mem, rv, key, history);
struct vy_mem *mem;
rlist_foreach_entry(mem, &lsm->sealed, in_sealed) {
- if (rc != 0 || vy_stmt_history_is_terminal(history))
+ if (rc != 0 || vy_history_is_terminal(history))
return rc;
rc = vy_point_lookup_scan_mem(lsm, mem, rv, key, history);
@@ -269,7 +269,7 @@ vy_point_lookup_scan_slice(struct vy_lsm *lsm, struct vy_slice *slice,
struct tuple *stmt;
rc = vy_run_iterator_next_key(&run_itr, &stmt);
while (rc == 0 && stmt != NULL) {
- struct vy_stmt_history_node *node = vy_stmt_history_node_new();
+ struct vy_history_node *node = vy_history_node_new();
if (node == NULL) {
rc = -1;
break;
@@ -341,9 +341,9 @@ vy_point_lookup_apply_history(struct vy_lsm *lsm,
return 0;
struct tuple *curr_stmt = NULL;
- struct vy_stmt_history_node *node =
- rlist_last_entry(history, struct vy_stmt_history_node, link);
- if (vy_stmt_history_is_terminal(history)) {
+ struct vy_history_node *node = rlist_last_entry(history,
+ struct vy_history_node, link);
+ if (vy_history_is_terminal(history)) {
if (vy_stmt_type(node->stmt) == IPROTO_DELETE) {
/* Ignore terminal delete */
} else if (node->src_type == ITER_SRC_MEM) {
@@ -401,15 +401,15 @@ restart:
rlist_create(&history);
rc = vy_point_lookup_scan_txw(lsm, tx, key, &history);
- if (rc != 0 || vy_stmt_history_is_terminal(&history))
+ if (rc != 0 || vy_history_is_terminal(&history))
goto done;
rc = vy_point_lookup_scan_cache(lsm, rv, key, &history);
- if (rc != 0 || vy_stmt_history_is_terminal(&history))
+ if (rc != 0 || vy_history_is_terminal(&history))
goto done;
rc = vy_point_lookup_scan_mems(lsm, rv, key, &history);
- if (rc != 0 || vy_stmt_history_is_terminal(&history))
+ if (rc != 0 || vy_history_is_terminal(&history))
goto done;
/* Save version before yield */
@@ -434,7 +434,7 @@ restart:
* This in unnecessary in case of rotation but since we
* cannot distinguish these two cases we always restart.
*/
- vy_stmt_history_cleanup(&history, region_svp);
+ vy_history_cleanup(&history, region_svp);
goto restart;
}
@@ -443,7 +443,7 @@ done:
rc = vy_point_lookup_apply_history(lsm, rv, key,
&history, ret);
}
- vy_stmt_history_cleanup(&history, region_svp);
+ vy_history_cleanup(&history, region_svp);
if (rc != 0)
return -1;
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 02/12] vinyl: factor out vy_history_apply from vy_point_lookup_apply_history
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
2018-04-15 19:55 ` [PATCH 01/12] vinyl: rename vy_stmt_history to vy_history Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-05-14 18:19 ` [tarantool-patches] " Vladislav Shpilevoy
2018-04-15 19:55 ` [PATCH 03/12] vinyl: add vy_history_append_stmt helper Vladimir Davydov
` (10 subsequent siblings)
12 siblings, 1 reply; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
Apart from applying a key history, vy_point_lookup_apply_history also
adds the resultant tuple to the cache and updates LSM stats. Let's
factor out history manipulation into a separate function and put
everything else in vy_point_lookup so that we can make vy_history an
independent entity.
---
src/box/vy_point_lookup.c | 41 ++++++++++++++++++-----------------------
1 file changed, 18 insertions(+), 23 deletions(-)
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index 32048654..5d3076d9 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -328,15 +328,15 @@ vy_point_lookup_scan_slices(struct vy_lsm *lsm, const struct vy_read_view **rv,
}
/**
- * Get a resultant statement from collected history. Add to cache if possible.
+ * Get a resultant statement from collected history.
*/
static int
-vy_point_lookup_apply_history(struct vy_lsm *lsm,
- const struct vy_read_view **rv,
- struct tuple *key, struct rlist *history,
- struct tuple **ret)
+vy_history_apply(struct rlist *history, const struct key_def *cmp_def,
+ struct tuple_format *format, int *upserts_applied,
+ struct tuple **ret)
{
*ret = NULL;
+ *upserts_applied = 0;
if (rlist_empty(history))
return 0;
@@ -355,14 +355,9 @@ vy_point_lookup_apply_history(struct vy_lsm *lsm,
node = rlist_prev_entry_safe(node, history, link);
}
while (node != NULL) {
- assert(vy_stmt_type(node->stmt) == IPROTO_UPSERT);
- /* We could not read the data that is invisible now */
- assert(node->src_type == ITER_SRC_TXW ||
- vy_stmt_lsn(node->stmt) <= (*rv)->vlsn);
-
struct tuple *stmt = vy_apply_upsert(node->stmt, curr_stmt,
- lsm->cmp_def, lsm->mem_format, true);
- lsm->stat.upsert.applied++;
+ cmp_def, format, true);
+ ++*upserts_applied;
if (stmt == NULL)
return -1;
if (curr_stmt != NULL)
@@ -370,15 +365,7 @@ vy_point_lookup_apply_history(struct vy_lsm *lsm,
curr_stmt = stmt;
node = rlist_prev_entry_safe(node, history, link);
}
- if (curr_stmt != NULL) {
- vy_stmt_counter_acct_tuple(&lsm->stat.get, curr_stmt);
- *ret = curr_stmt;
- }
- /**
- * Add a statement to the cache
- */
- if ((*rv)->vlsn == INT64_MAX) /* Do not store non-latest data */
- vy_cache_add(&lsm->cache, curr_stmt, NULL, key, ITER_EQ);
+ *ret = curr_stmt;
return 0;
}
@@ -440,14 +427,22 @@ restart:
done:
if (rc == 0) {
- rc = vy_point_lookup_apply_history(lsm, rv, key,
- &history, ret);
+ int upserts_applied;
+ rc = vy_history_apply(&history, lsm->cmp_def, lsm->mem_format,
+ &upserts_applied, ret);
+ lsm->stat.upsert.applied += upserts_applied;
}
vy_history_cleanup(&history, region_svp);
if (rc != 0)
return -1;
+ if (*ret != NULL) {
+ vy_stmt_counter_acct_tuple(&lsm->stat.get, *ret);
+ if ((*rv)->vlsn == INT64_MAX)
+ vy_cache_add(&lsm->cache, *ret, NULL, key, ITER_EQ);
+ }
+
double latency = ev_monotonic_now(loop()) - start_time;
latency_collect(&lsm->stat.latency, latency);
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 03/12] vinyl: add vy_history_append_stmt helper
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
2018-04-15 19:55 ` [PATCH 01/12] vinyl: rename vy_stmt_history to vy_history Vladimir Davydov
2018-04-15 19:55 ` [PATCH 02/12] vinyl: factor out vy_history_apply from vy_point_lookup_apply_history Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-04-15 19:55 ` [PATCH 04/12] vinyl: zap iterator_src_type enum Vladimir Davydov
` (9 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
Currently, we append statements to a history list in an open-coded
manner, using list manipulation functions. This violates encapsulation
and prevents us from making vy_history an independent entity that
could be reused e.g. in vy_read_iterator. Let's introduce a helper
vy_history_append_stmt to hide vy_history implementation details.
---
src/box/vy_point_lookup.c | 51 ++++++++++++++++++-----------------------------
1 file changed, 19 insertions(+), 32 deletions(-)
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index 5d3076d9..d985bdc2 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -72,18 +72,26 @@ struct vy_history_node {
};
/**
- * Allocate (region) new history node.
- * @return new node or NULL on memory error (diag is set).
+ * Append an (older) statement to a history list.
+ * Returns 0 on success, -1 on memory allocation error.
*/
-static struct vy_history_node *
-vy_history_node_new(void)
+static int
+vy_history_append_stmt(struct rlist *history, struct tuple *stmt,
+ enum iterator_src_type src_type)
{
struct region *region = &fiber()->gc;
struct vy_history_node *node = region_alloc(region, sizeof(*node));
- if (node == NULL)
+ if (node == NULL) {
diag_set(OutOfMemory, sizeof(*node), "region",
"struct vy_history_node");
- return node;
+ return -1;
+ }
+ node->src_type = src_type;
+ if (node->src_type == ITER_SRC_RUN)
+ tuple_ref(stmt);
+ node->stmt = stmt;
+ rlist_add_tail_entry(history, node, link);
+ return 0;
}
/**
@@ -136,13 +144,7 @@ vy_point_lookup_scan_txw(struct vy_lsm *lsm, struct vy_tx *tx,
return 0;
vy_stmt_counter_acct_tuple(&lsm->stat.txw.iterator.get,
txv->stmt);
- struct vy_history_node *node = vy_history_node_new();
- if (node == NULL)
- return -1;
- node->src_type = ITER_SRC_TXW;
- node->stmt = txv->stmt;
- rlist_add_tail(history, &node->link);
- return 0;
+ return vy_history_append_stmt(history, txv->stmt, ITER_SRC_TXW);
}
/**
@@ -161,14 +163,7 @@ vy_point_lookup_scan_cache(struct vy_lsm *lsm,
return 0;
vy_stmt_counter_acct_tuple(&lsm->cache.stat.get, stmt);
- struct vy_history_node *node = vy_history_node_new();
- if (node == NULL)
- return -1;
-
- node->src_type = ITER_SRC_CACHE;
- node->stmt = stmt;
- rlist_add_tail(history, &node->link);
- return 0;
+ return vy_history_append_stmt(history, stmt, ITER_SRC_CACHE);
}
/**
@@ -198,16 +193,13 @@ vy_point_lookup_scan_mem(struct vy_lsm *lsm, struct vy_mem *mem,
return 0;
while (true) {
- struct vy_history_node *node = vy_history_node_new();
- if (node == NULL)
+ if (vy_history_append_stmt(history, (struct tuple *)stmt,
+ ITER_SRC_MEM) != 0)
return -1;
vy_stmt_counter_acct_tuple(&lsm->stat.memory.iterator.get,
stmt);
- node->src_type = ITER_SRC_MEM;
- node->stmt = (struct tuple *)stmt;
- rlist_add_tail(history, &node->link);
if (vy_history_is_terminal(history))
break;
@@ -269,15 +261,10 @@ vy_point_lookup_scan_slice(struct vy_lsm *lsm, struct vy_slice *slice,
struct tuple *stmt;
rc = vy_run_iterator_next_key(&run_itr, &stmt);
while (rc == 0 && stmt != NULL) {
- struct vy_history_node *node = vy_history_node_new();
- if (node == NULL) {
+ if (vy_history_append_stmt(history, stmt, ITER_SRC_RUN) != 0) {
rc = -1;
break;
}
- node->src_type = ITER_SRC_RUN;
- node->stmt = stmt;
- tuple_ref(stmt);
- rlist_add_tail(history, &node->link);
if (vy_stmt_type(stmt) != IPROTO_UPSERT) {
*terminal_found = true;
break;
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 04/12] vinyl: zap iterator_src_type enum
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
` (2 preceding siblings ...)
2018-04-15 19:55 ` [PATCH 03/12] vinyl: add vy_history_append_stmt helper Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-04-15 19:55 ` [PATCH 05/12] vinyl: encapsulate key history with struct Vladimir Davydov
` (8 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
vy_history_append_stmt requires the caller to pass the statement type,
a value of iterator_src_type enumeration. It is needed to reference
statements stored in runs (ITER_SRC_RUN). Point lookup doesn't need to
reference other statements: txw is immutable during point lookup; if a
statement is found in cache, point lookup returns immediately so nothing
has to be referenced; mem statements are unrefable, and point lookup
uses mem_tree_version to track their lifetime.
To make vy_history usable by vy_read_iterator, we need to reference all
kinds of statements except mem statements. The latter can be identified
with vy_stmt_is_refable helper so there's no need in that extra argument
nor in iterator_src_type enum. Let's zap them. However, we do need to
remember whether the statement was refable at the time of insertion into
a history, because by the time we clean up the history, mem statements
stored in it might have been deleted, thus making vy_stmt_is_refable
unusable.
---
src/box/vy_point_lookup.c | 52 +++++++++++++++++++++++------------------------
1 file changed, 26 insertions(+), 26 deletions(-)
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index d985bdc2..805e5455 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -48,27 +48,29 @@
#include "vy_upsert.h"
/**
- * ID of an iterator source type. Can be used in bitmaps.
- */
-enum iterator_src_type {
- ITER_SRC_TXW = 1,
- ITER_SRC_CACHE = 2,
- ITER_SRC_MEM = 4,
- ITER_SRC_RUN = 8,
-};
-
-/**
* History of a key in vinyl is a continuous sequence of statements of the
* same key in order of decreasing lsn. The history can be represented as a
* list, the structure below describes one node of the list.
*/
struct vy_history_node {
- /* Type of source that the history statement came from */
- enum iterator_src_type src_type;
- /* The history statement. Referenced for runs. */
- struct tuple *stmt;
- /* Link in the history list */
+ /** Link in a history list. */
struct rlist link;
+ /** History statement. Referenced if @is_refable is set. */
+ struct tuple *stmt;
+ /**
+ * Set if the statement stored in this node is refable,
+ * i.e. has a reference counter that can be incremented
+ * to pin the statement in memory. Refable statements are
+ * referenced by the history. It is a responsibility of
+ * the user of the history to track lifetime of unrefable
+ * statements.
+ *
+ * Note, we need to store this flag here, because by the
+ * time we clean up a history list, unrefable statements
+ * stored in it might have been deleted, thus making
+ * vy_stmt_is_refable() unusable.
+ */
+ bool is_refable;
};
/**
@@ -76,8 +78,7 @@ struct vy_history_node {
* Returns 0 on success, -1 on memory allocation error.
*/
static int
-vy_history_append_stmt(struct rlist *history, struct tuple *stmt,
- enum iterator_src_type src_type)
+vy_history_append_stmt(struct rlist *history, struct tuple *stmt)
{
struct region *region = &fiber()->gc;
struct vy_history_node *node = region_alloc(region, sizeof(*node));
@@ -86,8 +87,8 @@ vy_history_append_stmt(struct rlist *history, struct tuple *stmt,
"struct vy_history_node");
return -1;
}
- node->src_type = src_type;
- if (node->src_type == ITER_SRC_RUN)
+ node->is_refable = vy_stmt_is_refable(stmt);
+ if (node->is_refable)
tuple_ref(stmt);
node->stmt = stmt;
rlist_add_tail_entry(history, node, link);
@@ -102,7 +103,7 @@ vy_history_cleanup(struct rlist *history, size_t region_svp)
{
struct vy_history_node *node;
rlist_foreach_entry(node, history, link)
- if (node->src_type == ITER_SRC_RUN)
+ if (node->is_refable)
tuple_unref(node->stmt);
region_truncate(&fiber()->gc, region_svp);
@@ -144,7 +145,7 @@ vy_point_lookup_scan_txw(struct vy_lsm *lsm, struct vy_tx *tx,
return 0;
vy_stmt_counter_acct_tuple(&lsm->stat.txw.iterator.get,
txv->stmt);
- return vy_history_append_stmt(history, txv->stmt, ITER_SRC_TXW);
+ return vy_history_append_stmt(history, txv->stmt);
}
/**
@@ -163,7 +164,7 @@ vy_point_lookup_scan_cache(struct vy_lsm *lsm,
return 0;
vy_stmt_counter_acct_tuple(&lsm->cache.stat.get, stmt);
- return vy_history_append_stmt(history, stmt, ITER_SRC_CACHE);
+ return vy_history_append_stmt(history, stmt);
}
/**
@@ -193,8 +194,7 @@ vy_point_lookup_scan_mem(struct vy_lsm *lsm, struct vy_mem *mem,
return 0;
while (true) {
- if (vy_history_append_stmt(history, (struct tuple *)stmt,
- ITER_SRC_MEM) != 0)
+ if (vy_history_append_stmt(history, (struct tuple *)stmt) != 0)
return -1;
vy_stmt_counter_acct_tuple(&lsm->stat.memory.iterator.get,
@@ -261,7 +261,7 @@ vy_point_lookup_scan_slice(struct vy_lsm *lsm, struct vy_slice *slice,
struct tuple *stmt;
rc = vy_run_iterator_next_key(&run_itr, &stmt);
while (rc == 0 && stmt != NULL) {
- if (vy_history_append_stmt(history, stmt, ITER_SRC_RUN) != 0) {
+ if (vy_history_append_stmt(history, stmt) != 0) {
rc = -1;
break;
}
@@ -333,7 +333,7 @@ vy_history_apply(struct rlist *history, const struct key_def *cmp_def,
if (vy_history_is_terminal(history)) {
if (vy_stmt_type(node->stmt) == IPROTO_DELETE) {
/* Ignore terminal delete */
- } else if (node->src_type == ITER_SRC_MEM) {
+ } else if (!node->is_refable) {
curr_stmt = vy_stmt_dup(node->stmt);
} else {
curr_stmt = node->stmt;
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 05/12] vinyl: encapsulate key history with struct
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
` (3 preceding siblings ...)
2018-04-15 19:55 ` [PATCH 04/12] vinyl: zap iterator_src_type enum Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-04-15 19:55 ` [PATCH 06/12] vinyl: refine vy_history_cleanup Vladimir Davydov
` (7 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
Currently, a key history is represented by rlist, which makes it
difficult to extend it or change its implementation. So let's hide
the implementation behind a struct.
---
src/box/vy_point_lookup.c | 67 ++++++++++++++++++++++++++++-------------------
1 file changed, 40 insertions(+), 27 deletions(-)
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index 805e5455..f932f07f 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -47,11 +47,16 @@
#include "vy_cache.h"
#include "vy_upsert.h"
-/**
- * History of a key in vinyl is a continuous sequence of statements of the
- * same key in order of decreasing lsn. The history can be represented as a
- * list, the structure below describes one node of the list.
- */
+/** Key history. */
+struct vy_history {
+ /**
+ * List of statements sorted by LSN in descending order.
+ * Linked by vy_history_node::link.
+ */
+ struct rlist stmts;
+};
+
+/** Key history node. */
struct vy_history_node {
/** Link in a history list. */
struct rlist link;
@@ -74,11 +79,20 @@ struct vy_history_node {
};
/**
+ * Initialize a history list.
+ */
+static void
+vy_history_create(struct vy_history *history)
+{
+ rlist_create(&history->stmts);
+}
+
+/**
* Append an (older) statement to a history list.
* Returns 0 on success, -1 on memory allocation error.
*/
static int
-vy_history_append_stmt(struct rlist *history, struct tuple *stmt)
+vy_history_append_stmt(struct vy_history *history, struct tuple *stmt)
{
struct region *region = &fiber()->gc;
struct vy_history_node *node = region_alloc(region, sizeof(*node));
@@ -91,7 +105,7 @@ vy_history_append_stmt(struct rlist *history, struct tuple *stmt)
if (node->is_refable)
tuple_ref(stmt);
node->stmt = stmt;
- rlist_add_tail_entry(history, node, link);
+ rlist_add_tail_entry(&history->stmts, node, link);
return 0;
}
@@ -99,10 +113,10 @@ vy_history_append_stmt(struct rlist *history, struct tuple *stmt)
* Unref statement if necessary, remove node from history if it's there.
*/
static void
-vy_history_cleanup(struct rlist *history, size_t region_svp)
+vy_history_cleanup(struct vy_history *history, size_t region_svp)
{
struct vy_history_node *node;
- rlist_foreach_entry(node, history, link)
+ rlist_foreach_entry(node, &history->stmts, link)
if (node->is_refable)
tuple_unref(node->stmt);
@@ -114,11 +128,11 @@ vy_history_cleanup(struct rlist *history, size_t region_svp)
* i.e. REPLACE of DELETE statement.
*/
static bool
-vy_history_is_terminal(struct rlist *history)
+vy_history_is_terminal(struct vy_history *history)
{
- if (rlist_empty(history))
+ if (rlist_empty(&history->stmts))
return false;
- struct vy_history_node *node = rlist_last_entry(history,
+ struct vy_history_node *node = rlist_last_entry(&history->stmts,
struct vy_history_node, link);
assert(vy_stmt_type(node->stmt) == IPROTO_REPLACE ||
vy_stmt_type(node->stmt) == IPROTO_DELETE ||
@@ -133,7 +147,7 @@ vy_history_is_terminal(struct rlist *history)
*/
static int
vy_point_lookup_scan_txw(struct vy_lsm *lsm, struct vy_tx *tx,
- struct tuple *key, struct rlist *history)
+ struct tuple *key, struct vy_history *history)
{
if (tx == NULL)
return 0;
@@ -153,9 +167,8 @@ vy_point_lookup_scan_txw(struct vy_lsm *lsm, struct vy_tx *tx,
* Add one or no statement to the history list.
*/
static int
-vy_point_lookup_scan_cache(struct vy_lsm *lsm,
- const struct vy_read_view **rv,
- struct tuple *key, struct rlist *history)
+vy_point_lookup_scan_cache(struct vy_lsm *lsm, const struct vy_read_view **rv,
+ struct tuple *key, struct vy_history *history)
{
lsm->cache.stat.lookup++;
struct tuple *stmt = vy_cache_get(&lsm->cache, key);
@@ -174,7 +187,7 @@ vy_point_lookup_scan_cache(struct vy_lsm *lsm,
static int
vy_point_lookup_scan_mem(struct vy_lsm *lsm, struct vy_mem *mem,
const struct vy_read_view **rv,
- struct tuple *key, struct rlist *history)
+ struct tuple *key, struct vy_history *history)
{
struct tree_mem_key tree_key;
tree_key.stmt = key;
@@ -223,7 +236,7 @@ vy_point_lookup_scan_mem(struct vy_lsm *lsm, struct vy_mem *mem,
*/
static int
vy_point_lookup_scan_mems(struct vy_lsm *lsm, const struct vy_read_view **rv,
- struct tuple *key, struct rlist *history)
+ struct tuple *key, struct vy_history *history)
{
assert(lsm->mem != NULL);
int rc = vy_point_lookup_scan_mem(lsm, lsm->mem, rv, key, history);
@@ -246,7 +259,7 @@ vy_point_lookup_scan_mems(struct vy_lsm *lsm, const struct vy_read_view **rv,
static int
vy_point_lookup_scan_slice(struct vy_lsm *lsm, struct vy_slice *slice,
const struct vy_read_view **rv, struct tuple *key,
- struct rlist *history, bool *terminal_found)
+ struct vy_history *history, bool *terminal_found)
{
int rc = 0;
/*
@@ -283,7 +296,7 @@ vy_point_lookup_scan_slice(struct vy_lsm *lsm, struct vy_slice *slice,
*/
static int
vy_point_lookup_scan_slices(struct vy_lsm *lsm, const struct vy_read_view **rv,
- struct tuple *key, struct rlist *history)
+ struct tuple *key, struct vy_history *history)
{
struct vy_range *range = vy_range_tree_find_by_key(lsm->tree,
ITER_EQ, key);
@@ -318,17 +331,17 @@ vy_point_lookup_scan_slices(struct vy_lsm *lsm, const struct vy_read_view **rv,
* Get a resultant statement from collected history.
*/
static int
-vy_history_apply(struct rlist *history, const struct key_def *cmp_def,
+vy_history_apply(struct vy_history *history, const struct key_def *cmp_def,
struct tuple_format *format, int *upserts_applied,
struct tuple **ret)
{
*ret = NULL;
*upserts_applied = 0;
- if (rlist_empty(history))
+ if (rlist_empty(&history->stmts))
return 0;
struct tuple *curr_stmt = NULL;
- struct vy_history_node *node = rlist_last_entry(history,
+ struct vy_history_node *node = rlist_last_entry(&history->stmts,
struct vy_history_node, link);
if (vy_history_is_terminal(history)) {
if (vy_stmt_type(node->stmt) == IPROTO_DELETE) {
@@ -339,7 +352,7 @@ vy_history_apply(struct rlist *history, const struct key_def *cmp_def,
curr_stmt = node->stmt;
tuple_ref(curr_stmt);
}
- node = rlist_prev_entry_safe(node, history, link);
+ node = rlist_prev_entry_safe(node, &history->stmts, link);
}
while (node != NULL) {
struct tuple *stmt = vy_apply_upsert(node->stmt, curr_stmt,
@@ -350,7 +363,7 @@ vy_history_apply(struct rlist *history, const struct key_def *cmp_def,
if (curr_stmt != NULL)
tuple_unref(curr_stmt);
curr_stmt = stmt;
- node = rlist_prev_entry_safe(node, history, link);
+ node = rlist_prev_entry_safe(node, &history->stmts, link);
}
*ret = curr_stmt;
return 0;
@@ -370,9 +383,9 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx,
lsm->stat.lookup++;
/* History list */
- struct rlist history;
+ struct vy_history history;
restart:
- rlist_create(&history);
+ vy_history_create(&history);
rc = vy_point_lookup_scan_txw(lsm, tx, key, &history);
if (rc != 0 || vy_history_is_terminal(&history))
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 06/12] vinyl: refine vy_history_cleanup
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
` (4 preceding siblings ...)
2018-04-15 19:55 ` [PATCH 05/12] vinyl: encapsulate key history with struct Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-04-15 19:55 ` [PATCH 07/12] vinyl: move vy_history to its own source file Vladimir Davydov
` (6 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
- Remove region_truncate from vy_history_cleanup. There may be other
objects apart from vy_history_node allocated on the region so it is
conceptually wrong to free memory in vy_history_cleanup.
- Reinitialize the history list in vy_history_cleanup so that it isn't
incumbent on the caller to call vy_history_create to reuse a history
after cleanup. This will simplify usage of vy_history in the read
iterator.
---
src/box/vy_point_lookup.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index f932f07f..7c9995ed 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -110,17 +110,17 @@ vy_history_append_stmt(struct vy_history *history, struct tuple *stmt)
}
/**
- * Unref statement if necessary, remove node from history if it's there.
+ * Release all statements stored in the given history and
+ * reinitialize the history list.
*/
static void
-vy_history_cleanup(struct vy_history *history, size_t region_svp)
+vy_history_cleanup(struct vy_history *history)
{
struct vy_history_node *node;
rlist_foreach_entry(node, &history->stmts, link)
if (node->is_refable)
tuple_unref(node->stmt);
-
- region_truncate(&fiber()->gc, region_svp);
+ rlist_create(&history->stmts);
}
/**
@@ -384,9 +384,8 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx,
lsm->stat.lookup++;
/* History list */
struct vy_history history;
-restart:
vy_history_create(&history);
-
+restart:
rc = vy_point_lookup_scan_txw(lsm, tx, key, &history);
if (rc != 0 || vy_history_is_terminal(&history))
goto done;
@@ -421,7 +420,8 @@ restart:
* This in unnecessary in case of rotation but since we
* cannot distinguish these two cases we always restart.
*/
- vy_history_cleanup(&history, region_svp);
+ vy_history_cleanup(&history);
+ region_truncate(&fiber()->gc, region_svp);
goto restart;
}
@@ -432,7 +432,8 @@ done:
&upserts_applied, ret);
lsm->stat.upsert.applied += upserts_applied;
}
- vy_history_cleanup(&history, region_svp);
+ vy_history_cleanup(&history);
+ region_truncate(&fiber()->gc, region_svp);
if (rc != 0)
return -1;
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 07/12] vinyl: move vy_history to its own source file
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
` (5 preceding siblings ...)
2018-04-15 19:55 ` [PATCH 06/12] vinyl: refine vy_history_cleanup Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-04-15 19:55 ` [PATCH 08/12] vinyl: use mempool for vy_history_node allocations Vladimir Davydov
` (5 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
So that it can be reused by vy_read_iterator. No functional changes,
this patch just moves pieces of code.
---
src/box/CMakeLists.txt | 1 +
src/box/vy_history.c | 111 +++++++++++++++++++++++++++++++++++++
src/box/vy_history.h | 133 ++++++++++++++++++++++++++++++++++++++++++++
src/box/vy_point_lookup.c | 138 +---------------------------------------------
test/unit/CMakeLists.txt | 1 +
5 files changed, 247 insertions(+), 137 deletions(-)
create mode 100644 src/box/vy_history.c
create mode 100644 src/box/vy_history.h
diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index ef7225d1..807ee566 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -84,6 +84,7 @@ add_library(box STATIC
vy_cache.c
vy_log.c
vy_upsert.c
+ vy_history.c
vy_read_set.c
vy_scheduler.c
request.c
diff --git a/src/box/vy_history.c b/src/box/vy_history.c
new file mode 100644
index 00000000..a11705a6
--- /dev/null
+++ b/src/box/vy_history.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2010-2018, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "vy_history.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <small/region.h>
+#include <small/rlist.h>
+
+#include "diag.h"
+#include "fiber.h"
+#include "tuple.h"
+#include "iproto_constants.h"
+#include "vy_stmt.h"
+#include "vy_upsert.h"
+
+int
+vy_history_append_stmt(struct vy_history *history, struct tuple *stmt)
+{
+ struct region *region = &fiber()->gc;
+ struct vy_history_node *node = region_alloc(region, sizeof(*node));
+ if (node == NULL) {
+ diag_set(OutOfMemory, sizeof(*node), "region",
+ "struct vy_history_node");
+ return -1;
+ }
+ node->is_refable = vy_stmt_is_refable(stmt);
+ if (node->is_refable)
+ tuple_ref(stmt);
+ node->stmt = stmt;
+ rlist_add_tail_entry(&history->stmts, node, link);
+ return 0;
+}
+
+void
+vy_history_cleanup(struct vy_history *history)
+{
+ struct vy_history_node *node;
+ rlist_foreach_entry(node, &history->stmts, link) {
+ if (node->is_refable)
+ tuple_unref(node->stmt);
+ }
+ rlist_create(&history->stmts);
+}
+
+int
+vy_history_apply(struct vy_history *history, const struct key_def *cmp_def,
+ struct tuple_format *format, int *upserts_applied,
+ struct tuple **ret)
+{
+ *ret = NULL;
+ *upserts_applied = 0;
+ if (rlist_empty(&history->stmts))
+ return 0;
+
+ struct tuple *curr_stmt = NULL;
+ struct vy_history_node *node = rlist_last_entry(&history->stmts,
+ struct vy_history_node, link);
+ if (vy_history_is_terminal(history)) {
+ if (vy_stmt_type(node->stmt) == IPROTO_DELETE) {
+ /* Ignore terminal delete */
+ } else if (!node->is_refable) {
+ curr_stmt = vy_stmt_dup(node->stmt);
+ } else {
+ curr_stmt = node->stmt;
+ tuple_ref(curr_stmt);
+ }
+ node = rlist_prev_entry_safe(node, &history->stmts, link);
+ }
+ while (node != NULL) {
+ struct tuple *stmt = vy_apply_upsert(node->stmt, curr_stmt,
+ cmp_def, format, true);
+ ++*upserts_applied;
+ if (stmt == NULL)
+ return -1;
+ if (curr_stmt != NULL)
+ tuple_unref(curr_stmt);
+ curr_stmt = stmt;
+ node = rlist_prev_entry_safe(node, &history->stmts, link);
+ }
+ *ret = curr_stmt;
+ return 0;
+}
diff --git a/src/box/vy_history.h b/src/box/vy_history.h
new file mode 100644
index 00000000..01f5364c
--- /dev/null
+++ b/src/box/vy_history.h
@@ -0,0 +1,133 @@
+#ifndef INCLUDES_TARANTOOL_BOX_VY_HISTORY_H
+#define INCLUDES_TARANTOOL_BOX_VY_HISTORY_H
+/*
+ * Copyright 2010-2018, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <small/rlist.h>
+
+#include "iproto_constants.h"
+#include "vy_stmt.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+struct key_def;
+struct tuple;
+struct tuple_format;
+
+/** Key history. */
+struct vy_history {
+ /**
+ * List of statements sorted by LSN in descending order.
+ * Linked by vy_history_node::link.
+ */
+ struct rlist stmts;
+};
+
+/** Key history node. */
+struct vy_history_node {
+ /** Link in a history list. */
+ struct rlist link;
+ /** History statement. Referenced if @is_refable is set. */
+ struct tuple *stmt;
+ /**
+ * Set if the statement stored in this node is refable,
+ * i.e. has a reference counter that can be incremented
+ * to pin the statement in memory. Refable statements are
+ * referenced by the history. It is a responsibility of
+ * the user of the history to track lifetime of unrefable
+ * statements.
+ *
+ * Note, we need to store this flag here, because by the
+ * time we clean up a history list, unrefable statements
+ * stored in it might have been deleted, thus making
+ * vy_stmt_is_refable() unusable.
+ */
+ bool is_refable;
+};
+
+/**
+ * Initialize a history list.
+ */
+static inline void
+vy_history_create(struct vy_history *history)
+{
+ rlist_create(&history->stmts);
+}
+
+/**
+ * Return true if the history of a key contains terminal node in the end,
+ * i.e. REPLACE of DELETE statement.
+ */
+static inline bool
+vy_history_is_terminal(struct vy_history *history)
+{
+ if (rlist_empty(&history->stmts))
+ return false;
+ struct vy_history_node *node = rlist_last_entry(&history->stmts,
+ struct vy_history_node, link);
+ assert(vy_stmt_type(node->stmt) == IPROTO_REPLACE ||
+ vy_stmt_type(node->stmt) == IPROTO_DELETE ||
+ vy_stmt_type(node->stmt) == IPROTO_INSERT ||
+ vy_stmt_type(node->stmt) == IPROTO_UPSERT);
+ return vy_stmt_type(node->stmt) != IPROTO_UPSERT;
+}
+
+/**
+ * Append an (older) statement to a history list.
+ * Returns 0 on success, -1 on memory allocation error.
+ */
+int
+vy_history_append_stmt(struct vy_history *history, struct tuple *stmt);
+
+/**
+ * Release all statements stored in the given history and
+ * reinitialize the history list.
+ */
+void
+vy_history_cleanup(struct vy_history *history);
+
+/**
+ * Get a resultant statement from collected history.
+ */
+int
+vy_history_apply(struct vy_history *history, const struct key_def *cmp_def,
+ struct tuple_format *format, int *upserts_applied,
+ struct tuple **ret);
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif /* defined(__cplusplus) */
+
+#endif /* INCLUDES_TARANTOOL_BOX_VY_HISTORY_H */
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index 7c9995ed..6994ebfd 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -45,101 +45,7 @@
#include "vy_mem.h"
#include "vy_run.h"
#include "vy_cache.h"
-#include "vy_upsert.h"
-
-/** Key history. */
-struct vy_history {
- /**
- * List of statements sorted by LSN in descending order.
- * Linked by vy_history_node::link.
- */
- struct rlist stmts;
-};
-
-/** Key history node. */
-struct vy_history_node {
- /** Link in a history list. */
- struct rlist link;
- /** History statement. Referenced if @is_refable is set. */
- struct tuple *stmt;
- /**
- * Set if the statement stored in this node is refable,
- * i.e. has a reference counter that can be incremented
- * to pin the statement in memory. Refable statements are
- * referenced by the history. It is a responsibility of
- * the user of the history to track lifetime of unrefable
- * statements.
- *
- * Note, we need to store this flag here, because by the
- * time we clean up a history list, unrefable statements
- * stored in it might have been deleted, thus making
- * vy_stmt_is_refable() unusable.
- */
- bool is_refable;
-};
-
-/**
- * Initialize a history list.
- */
-static void
-vy_history_create(struct vy_history *history)
-{
- rlist_create(&history->stmts);
-}
-
-/**
- * Append an (older) statement to a history list.
- * Returns 0 on success, -1 on memory allocation error.
- */
-static int
-vy_history_append_stmt(struct vy_history *history, struct tuple *stmt)
-{
- struct region *region = &fiber()->gc;
- struct vy_history_node *node = region_alloc(region, sizeof(*node));
- if (node == NULL) {
- diag_set(OutOfMemory, sizeof(*node), "region",
- "struct vy_history_node");
- return -1;
- }
- node->is_refable = vy_stmt_is_refable(stmt);
- if (node->is_refable)
- tuple_ref(stmt);
- node->stmt = stmt;
- rlist_add_tail_entry(&history->stmts, node, link);
- return 0;
-}
-
-/**
- * Release all statements stored in the given history and
- * reinitialize the history list.
- */
-static void
-vy_history_cleanup(struct vy_history *history)
-{
- struct vy_history_node *node;
- rlist_foreach_entry(node, &history->stmts, link)
- if (node->is_refable)
- tuple_unref(node->stmt);
- rlist_create(&history->stmts);
-}
-
-/**
- * Return true if the history of a key contains terminal node in the end,
- * i.e. REPLACE of DELETE statement.
- */
-static bool
-vy_history_is_terminal(struct vy_history *history)
-{
- if (rlist_empty(&history->stmts))
- return false;
- struct vy_history_node *node = rlist_last_entry(&history->stmts,
- struct vy_history_node, link);
- assert(vy_stmt_type(node->stmt) == IPROTO_REPLACE ||
- vy_stmt_type(node->stmt) == IPROTO_DELETE ||
- vy_stmt_type(node->stmt) == IPROTO_INSERT ||
- vy_stmt_type(node->stmt) == IPROTO_UPSERT);
- return vy_stmt_type(node->stmt) != IPROTO_UPSERT;
-}
+#include "vy_history.h"
/**
* Scan TX write set for given key.
@@ -327,48 +233,6 @@ vy_point_lookup_scan_slices(struct vy_lsm *lsm, const struct vy_read_view **rv,
return rc;
}
-/**
- * Get a resultant statement from collected history.
- */
-static int
-vy_history_apply(struct vy_history *history, const struct key_def *cmp_def,
- struct tuple_format *format, int *upserts_applied,
- struct tuple **ret)
-{
- *ret = NULL;
- *upserts_applied = 0;
- if (rlist_empty(&history->stmts))
- return 0;
-
- struct tuple *curr_stmt = NULL;
- struct vy_history_node *node = rlist_last_entry(&history->stmts,
- struct vy_history_node, link);
- if (vy_history_is_terminal(history)) {
- if (vy_stmt_type(node->stmt) == IPROTO_DELETE) {
- /* Ignore terminal delete */
- } else if (!node->is_refable) {
- curr_stmt = vy_stmt_dup(node->stmt);
- } else {
- curr_stmt = node->stmt;
- tuple_ref(curr_stmt);
- }
- node = rlist_prev_entry_safe(node, &history->stmts, link);
- }
- while (node != NULL) {
- struct tuple *stmt = vy_apply_upsert(node->stmt, curr_stmt,
- cmp_def, format, true);
- ++*upserts_applied;
- if (stmt == NULL)
- return -1;
- if (curr_stmt != NULL)
- tuple_unref(curr_stmt);
- curr_stmt = stmt;
- node = rlist_prev_entry_safe(node, &history->stmts, link);
- }
- *ret = curr_stmt;
- return 0;
-}
-
int
vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx,
const struct vy_read_view **rv,
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 7e1c95ed..db270de6 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -160,6 +160,7 @@ add_executable(vy_point_lookup.test
${PROJECT_SOURCE_DIR}/src/box/vy_tx.c
${PROJECT_SOURCE_DIR}/src/box/vy_read_set.c
${PROJECT_SOURCE_DIR}/src/box/vy_upsert.c
+ ${PROJECT_SOURCE_DIR}/src/box/vy_history.c
${PROJECT_SOURCE_DIR}/src/box/vy_lsm.c
${PROJECT_SOURCE_DIR}/src/box/vy_cache.c
${PROJECT_SOURCE_DIR}/src/box/index_def.c
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 08/12] vinyl: use mempool for vy_history_node allocations
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
` (6 preceding siblings ...)
2018-04-15 19:55 ` [PATCH 07/12] vinyl: move vy_history to its own source file Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-04-15 19:55 ` [PATCH 09/12] vinyl: consolidate skip optimization checks in read iterator Vladimir Davydov
` (4 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
To reuse vy_history in read iterator, we need to persist history nodes
between calls to vy_read_iterator_next so we can't allocate them on the
region. So let's introduce a memory pool for them.
---
src/box/vy_history.c | 15 ++++++++-------
src/box/vy_history.h | 9 +++++++--
src/box/vy_lsm.c | 5 +++++
src/box/vy_lsm.h | 4 +++-
src/box/vy_point_lookup.c | 5 +----
5 files changed, 24 insertions(+), 14 deletions(-)
diff --git a/src/box/vy_history.c b/src/box/vy_history.c
index a11705a6..c1fe6c6a 100644
--- a/src/box/vy_history.c
+++ b/src/box/vy_history.c
@@ -30,13 +30,13 @@
*/
#include "vy_history.h"
+#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
-#include <small/region.h>
+#include <small/mempool.h>
#include <small/rlist.h>
#include "diag.h"
-#include "fiber.h"
#include "tuple.h"
#include "iproto_constants.h"
#include "vy_stmt.h"
@@ -45,10 +45,10 @@
int
vy_history_append_stmt(struct vy_history *history, struct tuple *stmt)
{
- struct region *region = &fiber()->gc;
- struct vy_history_node *node = region_alloc(region, sizeof(*node));
+ assert(history->pool->objsize == sizeof(struct vy_history_node));
+ struct vy_history_node *node = mempool_alloc(history->pool);
if (node == NULL) {
- diag_set(OutOfMemory, sizeof(*node), "region",
+ diag_set(OutOfMemory, sizeof(*node), "mempool",
"struct vy_history_node");
return -1;
}
@@ -63,10 +63,11 @@ vy_history_append_stmt(struct vy_history *history, struct tuple *stmt)
void
vy_history_cleanup(struct vy_history *history)
{
- struct vy_history_node *node;
- rlist_foreach_entry(node, &history->stmts, link) {
+ struct vy_history_node *node, *tmp;
+ rlist_foreach_entry_safe(node, &history->stmts, link, tmp) {
if (node->is_refable)
tuple_unref(node->stmt);
+ mempool_free(history->pool, node);
}
rlist_create(&history->stmts);
}
diff --git a/src/box/vy_history.h b/src/box/vy_history.h
index 01f5364c..bb4ed28e 100644
--- a/src/box/vy_history.h
+++ b/src/box/vy_history.h
@@ -42,6 +42,7 @@
extern "C" {
#endif /* defined(__cplusplus) */
+struct mempool;
struct key_def;
struct tuple;
struct tuple_format;
@@ -53,6 +54,8 @@ struct vy_history {
* Linked by vy_history_node::link.
*/
struct rlist stmts;
+ /** Memory pool for vy_history_node allocations. */
+ struct mempool *pool;
};
/** Key history node. */
@@ -78,11 +81,13 @@ struct vy_history_node {
};
/**
- * Initialize a history list.
+ * Initialize a history list. The 'pool' argument specifies
+ * the memory pool to use for node allocations.
*/
static inline void
-vy_history_create(struct vy_history *history)
+vy_history_create(struct vy_history *history, struct mempool *pool)
{
+ history->pool = pool;
rlist_create(&history->stmts);
}
diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c
index 73196b10..4dfc0a25 100644
--- a/src/box/vy_lsm.c
+++ b/src/box/vy_lsm.c
@@ -35,6 +35,7 @@
#include <stddef.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include <small/mempool.h>
#include "diag.h"
#include "errcode.h"
@@ -50,6 +51,7 @@
#include "vy_stat.h"
#include "vy_stmt.h"
#include "vy_upsert.h"
+#include "vy_history.h"
#include "vy_read_set.h"
int
@@ -74,6 +76,8 @@ vy_lsm_env_create(struct vy_lsm_env *env, const char *path,
env->upsert_thresh_arg = upsert_thresh_arg;
env->too_long_threshold = TIMEOUT_INFINITY;
env->lsm_count = 0;
+ mempool_create(&env->history_node_pool, cord_slab_cache(),
+ sizeof(struct vy_history_node));
return 0;
}
@@ -82,6 +86,7 @@ vy_lsm_env_destroy(struct vy_lsm_env *env)
{
tuple_unref(env->empty_key);
tuple_format_unref(env->key_format);
+ mempool_destroy(&env->history_node_pool);
}
const char *
diff --git a/src/box/vy_lsm.h b/src/box/vy_lsm.h
index 2ef190ba..c7ccb7d6 100644
--- a/src/box/vy_lsm.h
+++ b/src/box/vy_lsm.h
@@ -34,7 +34,7 @@
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
-
+#include <small/mempool.h>
#include <small/rlist.h>
#include "index_def.h"
@@ -90,6 +90,8 @@ struct vy_lsm_env {
size_t bloom_size;
/** Size of memory used for page index. */
size_t page_index_size;
+ /** Memory pool for vy_history_node allocations. */
+ struct mempool history_node_pool;
};
/** Create a common LSM tree environment. */
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index 6994ebfd..b48a2332 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -241,14 +241,13 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx,
assert(tuple_field_count(key) >= lsm->cmp_def->part_count);
*ret = NULL;
- size_t region_svp = region_used(&fiber()->gc);
double start_time = ev_monotonic_now(loop());
int rc = 0;
lsm->stat.lookup++;
/* History list */
struct vy_history history;
- vy_history_create(&history);
+ vy_history_create(&history, &lsm->env->history_node_pool);
restart:
rc = vy_point_lookup_scan_txw(lsm, tx, key, &history);
if (rc != 0 || vy_history_is_terminal(&history))
@@ -285,7 +284,6 @@ restart:
* cannot distinguish these two cases we always restart.
*/
vy_history_cleanup(&history);
- region_truncate(&fiber()->gc, region_svp);
goto restart;
}
@@ -297,7 +295,6 @@ done:
lsm->stat.upsert.applied += upserts_applied;
}
vy_history_cleanup(&history);
- region_truncate(&fiber()->gc, region_svp);
if (rc != 0)
return -1;
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 09/12] vinyl: consolidate skip optimization checks in read iterator
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
` (7 preceding siblings ...)
2018-04-15 19:55 ` [PATCH 08/12] vinyl: use mempool for vy_history_node allocations Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-05-14 18:25 ` [tarantool-patches] " Vladislav Shpilevoy
2018-04-15 19:55 ` [PATCH 10/12] vinyl: refactor vy_read_iterator_next Vladimir Davydov
` (3 subsequent siblings)
12 siblings, 1 reply; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
Each kind of source iterator has 'skip' method, which is called to
position the iterator to a specified key. It is used to advance
sources that were skipped on the previous iteration (e.g. because
the result was found in the cache). The method has an optimization:
it avoids a lookup in the index if it is already positioned at a
statement following the specified key. This optimization makes the
method difficult to use if we want to keep a key history in each
source instead of a single statement, as we don't know whether 'skip'
changed the position or not and hence whether we need to rebuild key
history or not. Let's move the optimization to the read iterator and
make 'skip' plain and simple: let it always reposition the iterator
to the first statement following a given key.
---
src/box/vy_cache.c | 19 -------------------
src/box/vy_mem.c | 14 --------------
src/box/vy_read_iterator.c | 23 ++++++++++++++++++++---
src/box/vy_run.c | 13 -------------
src/box/vy_tx.c | 14 --------------
5 files changed, 20 insertions(+), 63 deletions(-)
diff --git a/src/box/vy_cache.c b/src/box/vy_cache.c
index 1c3e3692..d4cdcdff 100644
--- a/src/box/vy_cache.c
+++ b/src/box/vy_cache.c
@@ -703,25 +703,6 @@ vy_cache_iterator_skip(struct vy_cache_iterator *itr,
assert(!itr->search_started || itr->version == itr->cache->version);
- /*
- * Check if the iterator is already positioned
- * at the statement following last_stmt.
- */
- if (itr->search_started &&
- (itr->curr_stmt == NULL || last_stmt == NULL ||
- iterator_direction(itr->iterator_type) *
- vy_tuple_compare(itr->curr_stmt, last_stmt,
- itr->cache->cmp_def) > 0)) {
- if (itr->curr_stmt == NULL)
- return;
- struct vy_cache_tree *tree = &itr->cache->cache_tree;
- struct vy_cache_entry *entry =
- *vy_cache_tree_iterator_get_elem(tree, &itr->curr_pos);
- *ret = itr->curr_stmt;
- *stop = vy_cache_iterator_is_stop(itr, entry);
- return;
- }
-
itr->search_started = true;
itr->version = itr->cache->version;
if (itr->curr_stmt != NULL)
diff --git a/src/box/vy_mem.c b/src/box/vy_mem.c
index 22cd33fa..9d40f76e 100644
--- a/src/box/vy_mem.c
+++ b/src/box/vy_mem.c
@@ -544,20 +544,6 @@ vy_mem_iterator_skip(struct vy_mem_iterator *itr,
*ret = NULL;
assert(!itr->search_started || itr->version == itr->mem->version);
- /*
- * Check if the iterator is already positioned
- * at the statement following last_stmt.
- */
- if (itr->search_started &&
- (itr->curr_stmt == NULL || last_stmt == NULL ||
- iterator_direction(itr->iterator_type) *
- vy_tuple_compare(itr->curr_stmt, last_stmt,
- itr->mem->cmp_def) > 0)) {
- if (itr->curr_stmt != NULL)
- *ret = itr->last_stmt;
- return 0;
- }
-
const struct tuple *key = itr->key;
enum iterator_type iterator_type = itr->iterator_type;
if (last_stmt != NULL) {
diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c
index 2cad2337..548cf234 100644
--- a/src/box/vy_read_iterator.c
+++ b/src/box/vy_read_iterator.c
@@ -242,6 +242,23 @@ vy_read_iterator_evaluate_src(struct vy_read_iterator *itr,
}
}
+/**
+ * Check if a read iterator source is behind the current read
+ * iterator position and hence needs to be fast-forwarded.
+ */
+static inline bool
+vy_read_src_is_behind(struct vy_read_iterator *itr, struct vy_read_src *src)
+{
+ uint32_t src_id = src - itr->src;
+ if (!src->is_started)
+ return true;
+ if (src_id < itr->skipped_src)
+ return false;
+ if (vy_read_iterator_cmp_stmt(itr, src->stmt, itr->last_stmt) > 0)
+ return false;
+ return true;
+}
+
/*
* Each of the functions from the vy_read_iterator_scan_* family
* is used by vy_read_iterator_next_key() to:
@@ -300,7 +317,7 @@ vy_read_iterator_scan_cache(struct vy_read_iterator *itr, bool *stop)
int rc = vy_cache_iterator_restore(src_itr, itr->last_stmt,
&src->stmt, &is_interval);
if (rc == 0) {
- if (!src->is_started || itr->cache_src >= itr->skipped_src) {
+ if (vy_read_src_is_behind(itr, src)) {
vy_cache_iterator_skip(src_itr, itr->last_stmt,
&src->stmt, &is_interval);
} else if (src->front_id == itr->prev_front_id) {
@@ -329,7 +346,7 @@ vy_read_iterator_scan_mem(struct vy_read_iterator *itr,
rc = vy_mem_iterator_restore(src_itr, itr->last_stmt, &src->stmt);
if (rc == 0) {
- if (!src->is_started || mem_src >= itr->skipped_src) {
+ if (vy_read_src_is_behind(itr, src)) {
rc = vy_mem_iterator_skip(src_itr, itr->last_stmt,
&src->stmt);
} else if (src->front_id == itr->prev_front_id) {
@@ -354,7 +371,7 @@ vy_read_iterator_scan_disk(struct vy_read_iterator *itr,
assert(disk_src >= itr->disk_src && disk_src < itr->src_count);
- if (!src->is_started || disk_src >= itr->skipped_src)
+ if (vy_read_src_is_behind(itr, src))
rc = vy_run_iterator_skip(src_itr, itr->last_stmt, &src->stmt);
else if (src->front_id == itr->prev_front_id)
rc = vy_run_iterator_next_key(src_itr, &src->stmt);
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index 49caa341..fa5670d3 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -1485,19 +1485,6 @@ vy_run_iterator_skip(struct vy_run_iterator *itr,
if (itr->search_ended)
return 0;
- /*
- * Check if the iterator is already positioned
- * at the statement following last_stmt.
- */
- if (itr->search_started &&
- (itr->curr_stmt == NULL || last_stmt == NULL ||
- iterator_direction(itr->iterator_type) *
- vy_tuple_compare(itr->curr_stmt, last_stmt,
- itr->cmp_def) > 0)) {
- *ret = itr->curr_stmt;
- return 0;
- }
-
const struct tuple *key = itr->key;
enum iterator_type iterator_type = itr->iterator_type;
if (last_stmt != NULL) {
diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c
index 285af8a6..4812b794 100644
--- a/src/box/vy_tx.c
+++ b/src/box/vy_tx.c
@@ -979,20 +979,6 @@ vy_txw_iterator_skip(struct vy_txw_iterator *itr,
assert(!itr->search_started ||
itr->version == itr->tx->write_set_version);
- /*
- * Check if the iterator is already positioned
- * at the statement following last_stmt.
- */
- if (itr->search_started &&
- (itr->curr_txv == NULL || last_stmt == NULL ||
- iterator_direction(itr->iterator_type) *
- vy_tuple_compare(itr->curr_txv->stmt, last_stmt,
- itr->lsm->cmp_def) > 0)) {
- if (itr->curr_txv != NULL)
- *ret = itr->curr_txv->stmt;
- return;
- }
-
const struct tuple *key = itr->key;
enum iterator_type iterator_type = itr->iterator_type;
if (last_stmt != NULL) {
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 10/12] vinyl: refactor vy_read_iterator_next
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
` (8 preceding siblings ...)
2018-04-15 19:55 ` [PATCH 09/12] vinyl: consolidate skip optimization checks in read iterator Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-04-15 19:55 ` [PATCH 11/12] vinyl: make read iterator always return newest tuple version Vladimir Davydov
` (2 subsequent siblings)
12 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
The function looks rather messy now. Let's rearrange its code to make it
look better and facilitate further development:
- Move the debug checks making sure that the next key meets search
criteria and doesn't violate the iteration order to the place where
we look up the next key, i.e. to vy_read_iterator_next_key.
- Move result tracking in tx read set from vy_read_iterator_next_key
to vy_read_iterator_next so that the former now just advances the
iterator to the next key and does nothing else.
- Remove vy_read_iterator::search_started. We don't really need it as
we can use (vy_read_iterator::last_stmt == NULL) condition instead.
- Rearrange the code of vy_read_iterator_next to reduce indentation
level.
- Remove/rename some variables and labels. Add some comments.
---
src/box/vy_read_iterator.c | 195 +++++++++++++++++++++------------------------
src/box/vy_read_iterator.h | 2 -
2 files changed, 90 insertions(+), 107 deletions(-)
diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c
index 548cf234..0695eedb 100644
--- a/src/box/vy_read_iterator.c
+++ b/src/box/vy_read_iterator.c
@@ -443,9 +443,6 @@ vy_read_iterator_restore(struct vy_read_iterator *itr);
static void
vy_read_iterator_next_range(struct vy_read_iterator *itr);
-static int
-vy_read_iterator_track_read(struct vy_read_iterator *itr, struct tuple *stmt);
-
/**
* Iterate to the next key
* @retval 0 success or EOF (*ret == NULL)
@@ -454,9 +451,6 @@ vy_read_iterator_track_read(struct vy_read_iterator *itr, struct tuple *stmt);
static NODISCARD int
vy_read_iterator_next_key(struct vy_read_iterator *itr, struct tuple **ret)
{
- uint32_t i;
- bool stop = false;
-
if (itr->last_stmt != NULL && (itr->iterator_type == ITER_EQ ||
itr->iterator_type == ITER_REQ) &&
tuple_field_count(itr->key) >= itr->lsm->cmp_def->part_count) {
@@ -469,9 +463,10 @@ vy_read_iterator_next_key(struct vy_read_iterator *itr, struct tuple **ret)
}
/*
* Restore the iterator position if the LSM tree has changed
- * since the last iteration.
+ * since the last iteration or this is the first iteration.
*/
- if (itr->mem_list_version != itr->lsm->mem_list_version ||
+ if (itr->last_stmt == NULL ||
+ itr->mem_list_version != itr->lsm->mem_list_version ||
itr->range_tree_version != itr->lsm->range_tree_version ||
itr->range_version != itr->curr_range->version) {
vy_read_iterator_restore(itr);
@@ -487,6 +482,7 @@ restart:
* Look up the next key in read sources starting
* from the one that stores newest data.
*/
+ bool stop = false;
vy_read_iterator_scan_txw(itr, &stop);
if (stop)
goto done;
@@ -494,7 +490,7 @@ restart:
if (stop)
goto done;
- for (i = itr->mem_src; i < itr->disk_src; i++) {
+ for (uint32_t i = itr->mem_src; i < itr->disk_src; i++) {
if (vy_read_iterator_scan_mem(itr, i, &stop) != 0)
return -1;
if (stop)
@@ -503,9 +499,11 @@ restart:
rescan_disk:
/* The following code may yield as it needs to access disk. */
vy_read_iterator_pin_slices(itr);
- for (i = itr->disk_src; i < itr->src_count; i++) {
- if (vy_read_iterator_scan_disk(itr, i, &stop) != 0)
- goto err_disk;
+ for (uint32_t i = itr->disk_src; i < itr->src_count; i++) {
+ if (vy_read_iterator_scan_disk(itr, i, &stop) != 0) {
+ vy_read_iterator_unpin_slices(itr);
+ return -1;
+ }
if (stop)
break;
}
@@ -539,24 +537,34 @@ rescan_disk:
goto rescan_disk;
}
done:
- if (itr->last_stmt != NULL && itr->curr_stmt != NULL)
+#ifndef NDEBUG
+ /* Check that the statement meets search criteria. */
+ if (itr->curr_stmt != NULL) {
+ int cmp = vy_stmt_compare(itr->curr_stmt, itr->key,
+ itr->lsm->cmp_def);
+ cmp *= iterator_direction(itr->iterator_type);
+ if (itr->iterator_type == ITER_GT ||
+ itr->iterator_type == ITER_LT)
+ assert(cmp > 0);
+ else
+ assert(cmp >= 0);
+ }
+ /*
+ * Ensure the read iterator does not return duplicates
+ * and respects statement order.
+ */
+ if (itr->last_stmt != NULL && itr->curr_stmt != NULL) {
assert(vy_read_iterator_cmp_stmt(itr, itr->curr_stmt,
itr->last_stmt) > 0);
-
+ }
+#endif
if (itr->need_check_eq && itr->curr_stmt != NULL &&
vy_stmt_compare(itr->curr_stmt, itr->key,
itr->lsm->cmp_def) != 0)
itr->curr_stmt = NULL;
- if (vy_read_iterator_track_read(itr, itr->curr_stmt) != 0)
- return -1;
-
*ret = itr->curr_stmt;
return 0;
-
-err_disk:
- vy_read_iterator_unpin_slices(itr);
- return -1;
}
/**
@@ -960,99 +968,70 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result)
{
ev_tstamp start_time = ev_monotonic_now(loop());
- *result = NULL;
-
- if (!itr->search_started) {
- itr->search_started = true;
- itr->lsm->stat.lookup++;
- vy_read_iterator_restore(itr);
- }
-
- struct tuple *prev_key = itr->last_stmt;
- if (prev_key != NULL)
- tuple_ref(prev_key);
- bool skipped_txw_delete = false;
-
- struct tuple *t = NULL;
struct vy_lsm *lsm = itr->lsm;
- int rc = 0;
- while (true) {
- rc = vy_read_iterator_next_key(itr, &t);
- if (rc != 0)
- goto clear;
- if (t == NULL) {
- if (itr->last_stmt != NULL)
- tuple_unref(itr->last_stmt);
- itr->last_stmt = NULL;
- rc = 0; /* No more data. */
- break;
- }
- rc = vy_read_iterator_squash_upsert(itr, &t);
- if (rc != 0)
- goto clear;
- if (itr->last_stmt != NULL)
- tuple_unref(itr->last_stmt);
- itr->last_stmt = t;
- if (vy_stmt_type(t) == IPROTO_INSERT ||
- vy_stmt_type(t) == IPROTO_REPLACE)
- break;
- assert(vy_stmt_type(t) == IPROTO_DELETE);
- if (vy_stmt_lsn(t) == INT64_MAX) /* t is from write set */
- skipped_txw_delete = true;
- }
+ struct tuple *stmt, *prev_stmt;
- *result = itr->last_stmt;
- assert(*result == NULL ||
- vy_stmt_type(*result) == IPROTO_INSERT ||
- vy_stmt_type(*result) == IPROTO_REPLACE);
- if (*result != NULL)
- vy_stmt_counter_acct_tuple(&lsm->stat.get, *result);
-
-#ifndef NDEBUG
- /* Check constraints. */
- int dir = iterator_direction(itr->iterator_type);
/*
- * Each result statement with iterator type GE/GT must
- * be >= iterator key. And with LT/LE must
- * be <= iterator_key. @sa gh-2614.
+ * Remember the statement returned by the last iteration.
+ * We will need it to update the cache.
*/
- if (itr->last_stmt != NULL && tuple_field_count(itr->key) > 0) {
- int cmp = dir * vy_stmt_compare(*result, itr->key,
- itr->lsm->cmp_def);
- assert(cmp >= 0);
- }
+ prev_stmt = itr->last_stmt;
+ if (prev_stmt != NULL)
+ tuple_ref(prev_stmt);
+ else /* first iteration */
+ lsm->stat.lookup++;
+next_key:
+ if (vy_read_iterator_next_key(itr, &stmt) != 0)
+ goto err;
+
/*
- * Ensure the read iterator does not return duplicates
- * and respects statements order (lsm->cmp_def includes
- * primary parts, so prev_key != itr->last_stmt for any
- * LSM tree).
+ * Fetching an older statement of the current key may yield
+ * so we must track the read before applying UPSERTs.
*/
- if (prev_key != NULL && itr->last_stmt != NULL) {
- assert(dir * vy_tuple_compare(prev_key, itr->last_stmt,
- lsm->cmp_def) < 0);
+ if (vy_read_iterator_track_read(itr, stmt) != 0)
+ goto err;
+
+ if (stmt != NULL &&
+ vy_read_iterator_squash_upsert(itr, &stmt) != 0)
+ goto err;
+
+ if (itr->last_stmt != NULL)
+ tuple_unref(itr->last_stmt);
+ itr->last_stmt = stmt;
+
+ if (stmt != NULL && vy_stmt_type(stmt) == IPROTO_DELETE) {
+ /*
+ * We don't return DELETEs so skip to the next key.
+ * If the DELETE was read from TX write set, there
+ * is a good chance that the space actually has
+ * the deleted key and hence we must not consider
+ * previous + current tuple as an unbroken chain.
+ */
+ if (vy_stmt_lsn(stmt) == INT64_MAX) {
+ if (prev_stmt != NULL)
+ tuple_unref(prev_stmt);
+ prev_stmt = NULL;
+ }
+ goto next_key;
}
-#endif
+ assert(stmt == NULL ||
+ vy_stmt_type(stmt) == IPROTO_INSERT ||
+ vy_stmt_type(stmt) == IPROTO_REPLACE);
- /**
- * Add a statement to the cache
+ /*
+ * Store the result in the cache provided we are reading
+ * the latest data.
*/
- if ((**itr->read_view).vlsn == INT64_MAX) { /* Do not store non-latest data */
- struct tuple *cache_prev = prev_key;
- if (skipped_txw_delete) {
- /*
- * If we skipped DELETE that was read from TX write
- * set, there is a chance that the database actually
- * has the deleted key and we must not consider
- * previous+current tuple as an unbroken chain.
- */
- cache_prev = NULL;
- }
- vy_cache_add(&itr->lsm->cache, *result, cache_prev,
+ if ((**itr->read_view).vlsn == INT64_MAX) {
+ vy_cache_add(&lsm->cache, stmt, prev_stmt,
itr->key, itr->iterator_type);
}
-clear:
- if (prev_key != NULL)
- tuple_unref(prev_key);
+ if (prev_stmt != NULL)
+ tuple_unref(prev_stmt);
+
+ /* Update LSM tree stats. */
+ if (stmt != NULL)
+ vy_stmt_counter_acct_tuple(&lsm->stat.get, stmt);
ev_tstamp latency = ev_monotonic_now(loop()) - start_time;
latency_collect(&lsm->stat.latency, latency);
@@ -1061,9 +1040,15 @@ clear:
say_warn("%s: select(%s, %s) => %s took too long: %.3f sec",
vy_lsm_name(lsm), tuple_str(itr->key),
iterator_type_strs[itr->iterator_type],
- vy_stmt_str(itr->last_stmt), latency);
+ vy_stmt_str(stmt), latency);
}
- return rc;
+
+ *result = stmt;
+ return 0;
+err:
+ if (prev_stmt != NULL)
+ tuple_unref(prev_stmt);
+ return -1;
}
/**
diff --git a/src/box/vy_read_iterator.h b/src/box/vy_read_iterator.h
index 4f9d3d4b..6aa84540 100644
--- a/src/box/vy_read_iterator.h
+++ b/src/box/vy_read_iterator.h
@@ -62,8 +62,6 @@ struct vy_read_iterator {
* checked to match the search key.
*/
bool need_check_eq;
- /** Set on the first call to vy_read_iterator_next(). */
- bool search_started;
/** Last statement returned by vy_read_iterator_next(). */
struct tuple *last_stmt;
/**
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 11/12] vinyl: make read iterator always return newest tuple version
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
` (9 preceding siblings ...)
2018-04-15 19:55 ` [PATCH 10/12] vinyl: refactor vy_read_iterator_next Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-04-15 19:55 ` [PATCH 12/12] vinyl: zap vy_read_iterator::curr_stmt Vladimir Davydov
2018-05-04 18:05 ` [tarantool-patches] Re: [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladislav Shpilevoy
12 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
vy_read_iterator_next() first scans all sources to find the newest
statement for the next key. If the statement turns out to be UPSERT,
it retrieves older statements until a terminal statement is found.
While retrieving older statements, it may yield, which opens a window
for other fibers to insert newer statements for the same key. Those
statements will be ignored by the read iterator even if they are
visible from the iterator read view.
To be able to use read iterator for building secondary indexes (i.e.
iterate over tuples stored in pk and insert them into the new index),
we need to lift that limitation, otherwise there's a good chance that
while building a new index we will overwrite statements inserted by
concurrent transactions. So this patch reworks the read iterator to
guarantee that it always returns the newest tuple version.
To achieve that, we now retrieve the whole key history instead of
only the last statement first time we scan a source. Respectively,
we store a vy_history instead of tuple in vy_read_src. All source
iterators now return vy_history too. As a result, we don't need to
iterate to the next LSN (and possibly yield) to apply UPSERTs - we
simply splice all histories corresponding to the next key and call
vy_history_apply() to return the resultant tuple.
Note, this patch changes the output of vinyl/upsert.test.lua. This
is OK as now UPSERTs are applied in the reverse order, from the
oldest statement to the newest.
---
src/box/vy_cache.c | 32 +++--
src/box/vy_cache.h | 31 +++--
src/box/vy_history.c | 11 +-
src/box/vy_history.h | 31 ++++-
src/box/vy_mem.c | 81 ++++++------
src/box/vy_mem.h | 33 ++---
src/box/vy_point_lookup.c | 29 ++---
src/box/vy_read_iterator.c | 306 +++++++++++++++------------------------------
src/box/vy_read_iterator.h | 2 -
src/box/vy_run.c | 55 ++++++--
src/box/vy_run.h | 23 ++--
src/box/vy_tx.c | 43 ++++---
src/box/vy_tx.h | 29 +++--
test/unit/CMakeLists.txt | 2 +
test/unit/vy_cache.c | 16 ++-
test/unit/vy_mem.c | 24 +++-
test/vinyl/upsert.result | 8 +-
test/vinyl/upsert.test.lua | 6 +-
18 files changed, 378 insertions(+), 384 deletions(-)
diff --git a/src/box/vy_cache.c b/src/box/vy_cache.c
index d4cdcdff..2b9e40fa 100644
--- a/src/box/vy_cache.c
+++ b/src/box/vy_cache.c
@@ -32,6 +32,7 @@
#include "diag.h"
#include "fiber.h"
#include "schema_def.h"
+#include "vy_history.h"
#ifndef CT_ASSERT_G
#define CT_ASSERT_G(e) typedef char CONCAT(__ct_assert_, __LINE__)[(e) ? 1 :-1]
@@ -658,12 +659,12 @@ vy_cache_iterator_seek(struct vy_cache_iterator *itr,
*entry = *vy_cache_tree_iterator_get_elem(tree, &itr->curr_pos);
}
-void
+NODISCARD int
vy_cache_iterator_next(struct vy_cache_iterator *itr,
- struct tuple **ret, bool *stop)
+ struct vy_history *history, bool *stop)
{
- *ret = NULL;
*stop = false;
+ vy_history_cleanup(history);
if (!itr->search_started) {
assert(itr->curr_stmt == NULL);
@@ -673,33 +674,34 @@ vy_cache_iterator_next(struct vy_cache_iterator *itr,
vy_cache_iterator_seek(itr, itr->iterator_type,
itr->key, &entry);
if (entry == NULL)
- return;
+ return 0;
itr->curr_stmt = entry->stmt;
*stop = vy_cache_iterator_is_stop(itr, entry);
} else {
assert(itr->version == itr->cache->version);
if (itr->curr_stmt == NULL)
- return;
+ return 0;
tuple_unref(itr->curr_stmt);
*stop = vy_cache_iterator_step(itr, &itr->curr_stmt);
}
vy_cache_iterator_skip_to_read_view(itr, stop);
if (itr->curr_stmt != NULL) {
- *ret = itr->curr_stmt;
tuple_ref(itr->curr_stmt);
vy_stmt_counter_acct_tuple(&itr->cache->stat.get,
itr->curr_stmt);
+ return vy_history_append_stmt(history, itr->curr_stmt);
}
+ return 0;
}
-void
+NODISCARD int
vy_cache_iterator_skip(struct vy_cache_iterator *itr,
const struct tuple *last_stmt,
- struct tuple **ret, bool *stop)
+ struct vy_history *history, bool *stop)
{
- *ret = NULL;
*stop = false;
+ vy_history_cleanup(history);
assert(!itr->search_started || itr->version == itr->cache->version);
@@ -732,17 +734,18 @@ vy_cache_iterator_skip(struct vy_cache_iterator *itr,
vy_cache_iterator_skip_to_read_view(itr, stop);
if (itr->curr_stmt != NULL) {
- *ret = itr->curr_stmt;
tuple_ref(itr->curr_stmt);
vy_stmt_counter_acct_tuple(&itr->cache->stat.get,
itr->curr_stmt);
+ return vy_history_append_stmt(history, itr->curr_stmt);
}
+ return 0;
}
-int
+NODISCARD int
vy_cache_iterator_restore(struct vy_cache_iterator *itr,
const struct tuple *last_stmt,
- struct tuple **ret, bool *stop)
+ struct vy_history *history, bool *stop)
{
struct key_def *def = itr->cache->cmp_def;
int dir = iterator_direction(itr->iterator_type);
@@ -818,11 +821,14 @@ vy_cache_iterator_restore(struct vy_cache_iterator *itr,
break;
}
}
- *ret = itr->curr_stmt;
+
+ vy_history_cleanup(history);
if (itr->curr_stmt != NULL) {
tuple_ref(itr->curr_stmt);
vy_stmt_counter_acct_tuple(&itr->cache->stat.get,
itr->curr_stmt);
+ if (vy_history_append_stmt(history, itr->curr_stmt) != 0)
+ return -1;
return prev_stmt != itr->curr_stmt;
}
return 0;
diff --git a/src/box/vy_cache.h b/src/box/vy_cache.h
index 54fd2892..d2545152 100644
--- a/src/box/vy_cache.h
+++ b/src/box/vy_cache.h
@@ -46,6 +46,8 @@
extern "C" {
#endif /* defined(__cplusplus) */
+struct vy_history;
+
/**
* A record in tuple cache
*/
@@ -263,34 +265,37 @@ vy_cache_iterator_open(struct vy_cache_iterator *itr, struct vy_cache *cache,
const struct tuple *key, const struct vy_read_view **rv);
/**
- * Advance a cache iterator to the next statement.
- * The next statement is returned in @ret (NULL if EOF).
+ * Advance a cache iterator to the next key.
+ * The key history is returned in @history (empty if EOF).
* @stop flag is set if a chain was found in the cache
* and so there shouldn't be statements preceding the
- * returned statement in memory or on disk.
+ * returned statement in memory or on disk. The function
+ * returns 0 on success, -1 on memory allocation error.
*/
-void
+NODISCARD int
vy_cache_iterator_next(struct vy_cache_iterator *itr,
- struct tuple **ret, bool *stop);
+ struct vy_history *history, bool *stop);
/**
- * Advance a cache iterator to the statement following @last_stmt.
- * The statement is returned in @ret (NULL if EOF).
+ * Advance a cache iterator to the key following @last_stmt.
+ * The key history is returned in @history (empty if EOF).
+ * Returns 0 on success, -1 on memory allocation error.
*/
-void
+NODISCARD int
vy_cache_iterator_skip(struct vy_cache_iterator *itr,
const struct tuple *last_stmt,
- struct tuple **ret, bool *stop);
+ struct vy_history *history, bool *stop);
/**
* Check if a cache iterator was invalidated and needs to be restored.
- * If it does, set the iterator position to the statement following
- * @last_stmt and return 1, otherwise return 0.
+ * If it does, set the iterator position to the first key following
+ * @last_stmt and return 1, otherwise return 0. Returns -1 on memory
+ * allocation error.
*/
-int
+NODISCARD int
vy_cache_iterator_restore(struct vy_cache_iterator *itr,
const struct tuple *last_stmt,
- struct tuple **ret, bool *stop);
+ struct vy_history *history, bool *stop);
/**
* Close a cache iterator.
diff --git a/src/box/vy_history.c b/src/box/vy_history.c
index c1fe6c6a..79d80653 100644
--- a/src/box/vy_history.c
+++ b/src/box/vy_history.c
@@ -74,8 +74,8 @@ vy_history_cleanup(struct vy_history *history)
int
vy_history_apply(struct vy_history *history, const struct key_def *cmp_def,
- struct tuple_format *format, int *upserts_applied,
- struct tuple **ret)
+ struct tuple_format *format, bool keep_delete,
+ int *upserts_applied, struct tuple **ret)
{
*ret = NULL;
*upserts_applied = 0;
@@ -86,8 +86,11 @@ vy_history_apply(struct vy_history *history, const struct key_def *cmp_def,
struct vy_history_node *node = rlist_last_entry(&history->stmts,
struct vy_history_node, link);
if (vy_history_is_terminal(history)) {
- if (vy_stmt_type(node->stmt) == IPROTO_DELETE) {
- /* Ignore terminal delete */
+ if (!keep_delete && vy_stmt_type(node->stmt) == IPROTO_DELETE) {
+ /*
+ * Ignore terminal delete unless the caller
+ * explicitly asked to keep it.
+ */
} else if (!node->is_refable) {
curr_stmt = vy_stmt_dup(node->stmt);
} else {
diff --git a/src/box/vy_history.h b/src/box/vy_history.h
index bb4ed28e..1f8bb591 100644
--- a/src/box/vy_history.h
+++ b/src/box/vy_history.h
@@ -110,6 +110,31 @@ vy_history_is_terminal(struct vy_history *history)
}
/**
+ * Return the last (newest, having max LSN) statement of the given
+ * key history or NULL if the history is empty.
+ */
+static inline struct tuple *
+vy_history_last_stmt(struct vy_history *history)
+{
+ if (rlist_empty(&history->stmts))
+ return NULL;
+ /* Newest statement is at the head of the list. */
+ struct vy_history_node *node = rlist_first_entry(&history->stmts,
+ struct vy_history_node, link);
+ return node->stmt;
+}
+
+/**
+ * Append all statements of history @src to history @dst.
+ */
+static inline void
+vy_history_splice(struct vy_history *dst, struct vy_history *src)
+{
+ assert(dst->pool == src->pool);
+ rlist_splice_tail(&dst->stmts, &src->stmts);
+}
+
+/**
* Append an (older) statement to a history list.
* Returns 0 on success, -1 on memory allocation error.
*/
@@ -125,11 +150,13 @@ vy_history_cleanup(struct vy_history *history);
/**
* Get a resultant statement from collected history.
+ * If the resultant statement is a DELETE, the function
+ * will return NULL unless @keep_delete flag is set.
*/
int
vy_history_apply(struct vy_history *history, const struct key_def *cmp_def,
- struct tuple_format *format, int *upserts_applied,
- struct tuple **ret);
+ struct tuple_format *format, bool keep_delete,
+ int *upserts_applied, struct tuple **ret);
#if defined(__cplusplus)
} /* extern "C" */
diff --git a/src/box/vy_mem.c b/src/box/vy_mem.c
index 9d40f76e..65e31ea5 100644
--- a/src/box/vy_mem.c
+++ b/src/box/vy_mem.c
@@ -39,6 +39,7 @@
#include "diag.h"
#include "tuple.h"
+#include "vy_history.h"
/** {{{ vy_mem_env */
@@ -269,26 +270,6 @@ vy_mem_rollback_stmt(struct vy_mem *mem, const struct tuple *stmt)
/* {{{ vy_mem_iterator support functions */
/**
- * Copy current statement into the out parameter. It is necessary
- * because vy_mem stores its tuples in the lsregion allocated
- * area, and lsregion tuples can't be referenced or unreferenced.
- */
-static int
-vy_mem_iterator_copy_to(struct vy_mem_iterator *itr, struct tuple **ret)
-{
- assert(itr->curr_stmt != NULL);
- if (itr->last_stmt)
- tuple_unref(itr->last_stmt);
- itr->last_stmt = vy_stmt_dup(itr->curr_stmt);
- *ret = itr->last_stmt;
- if (itr->last_stmt != NULL) {
- vy_stmt_counter_acct_tuple(&itr->stat->get, *ret);
- return 0;
- }
- return -1;
-}
-
-/**
* Get a stmt by current position
*/
static const struct tuple *
@@ -450,7 +431,6 @@ vy_mem_iterator_open(struct vy_mem_iterator *itr, struct vy_mem_iterator_stat *s
itr->curr_pos = vy_mem_tree_invalid_iterator();
itr->curr_stmt = NULL;
- itr->last_stmt = NULL;
itr->search_started = false;
}
@@ -461,7 +441,7 @@ vy_mem_iterator_open(struct vy_mem_iterator *itr, struct vy_mem_iterator_stat *s
* @retval 1 Not found
*/
static NODISCARD int
-vy_mem_iterator_next_key_impl(struct vy_mem_iterator *itr)
+vy_mem_iterator_next_key(struct vy_mem_iterator *itr)
{
if (!itr->search_started)
return vy_mem_iterator_start(itr);
@@ -488,22 +468,13 @@ vy_mem_iterator_next_key_impl(struct vy_mem_iterator *itr)
return vy_mem_iterator_find_lsn(itr, itr->iterator_type, itr->key);
}
-NODISCARD int
-vy_mem_iterator_next_key(struct vy_mem_iterator *itr, struct tuple **ret)
-{
- *ret = NULL;
- if (vy_mem_iterator_next_key_impl(itr) == 0)
- return vy_mem_iterator_copy_to(itr, ret);
- return 0;
-}
-
/*
* Find next (lower, older) record with the same key as current
* @retval 0 Found
* @retval 1 Not found
*/
static NODISCARD int
-vy_mem_iterator_next_lsn_impl(struct vy_mem_iterator *itr)
+vy_mem_iterator_next_lsn(struct vy_mem_iterator *itr)
{
assert(itr->search_started);
if (!itr->curr_stmt) /* End of search. */
@@ -528,22 +499,45 @@ vy_mem_iterator_next_lsn_impl(struct vy_mem_iterator *itr)
return 1;
}
+/**
+ * Append statements for the current key to a statement history
+ * until a terminal statement is found. Returns 0 on success, -1
+ * on memory allocation error.
+ */
+static NODISCARD int
+vy_mem_iterator_get_history(struct vy_mem_iterator *itr,
+ struct vy_history *history)
+{
+ do {
+ struct tuple *stmt = (struct tuple *)itr->curr_stmt;
+ vy_stmt_counter_acct_tuple(&itr->stat->get, stmt);
+ if (vy_history_append_stmt(history, stmt) != 0)
+ return -1;
+ if (vy_history_is_terminal(history))
+ break;
+ } while (vy_mem_iterator_next_lsn(itr) == 0);
+ return 0;
+}
+
NODISCARD int
-vy_mem_iterator_next_lsn(struct vy_mem_iterator *itr, struct tuple **ret)
+vy_mem_iterator_next(struct vy_mem_iterator *itr,
+ struct vy_history *history)
{
- *ret = NULL;
- if (vy_mem_iterator_next_lsn_impl(itr) == 0)
- return vy_mem_iterator_copy_to(itr, ret);
+ vy_history_cleanup(history);
+ if (vy_mem_iterator_next_key(itr) == 0)
+ return vy_mem_iterator_get_history(itr, history);
return 0;
}
NODISCARD int
vy_mem_iterator_skip(struct vy_mem_iterator *itr,
- const struct tuple *last_stmt, struct tuple **ret)
+ const struct tuple *last_stmt,
+ struct vy_history *history)
{
- *ret = NULL;
assert(!itr->search_started || itr->version == itr->mem->version);
+ vy_history_cleanup(history);
+
const struct tuple *key = itr->key;
enum iterator_type iterator_type = itr->iterator_type;
if (last_stmt != NULL) {
@@ -561,13 +555,14 @@ vy_mem_iterator_skip(struct vy_mem_iterator *itr,
itr->curr_stmt = NULL;
if (itr->curr_stmt != NULL)
- return vy_mem_iterator_copy_to(itr, ret);
+ return vy_mem_iterator_get_history(itr, history);
return 0;
}
NODISCARD int
vy_mem_iterator_restore(struct vy_mem_iterator *itr,
- const struct tuple *last_stmt, struct tuple **ret)
+ const struct tuple *last_stmt,
+ struct vy_history *history)
{
if (!itr->search_started || itr->version == itr->mem->version)
return 0;
@@ -590,9 +585,9 @@ vy_mem_iterator_restore(struct vy_mem_iterator *itr,
if (prev_stmt == itr->curr_stmt)
return 0;
- *ret = NULL;
+ vy_history_cleanup(history);
if (itr->curr_stmt != NULL &&
- vy_mem_iterator_copy_to(itr, ret) < 0)
+ vy_mem_iterator_get_history(itr, history) != 0)
return -1;
return 1;
}
@@ -600,8 +595,6 @@ vy_mem_iterator_restore(struct vy_mem_iterator *itr,
void
vy_mem_iterator_close(struct vy_mem_iterator *itr)
{
- if (itr->last_stmt != NULL)
- tuple_unref(itr->last_stmt);
TRASH(itr);
}
diff --git a/src/box/vy_mem.h b/src/box/vy_mem.h
index c2917efb..a35b140f 100644
--- a/src/box/vy_mem.h
+++ b/src/box/vy_mem.h
@@ -50,6 +50,8 @@
extern "C" {
#endif /* defined(__cplusplus) */
+struct vy_history;
+
/** Vinyl memory environment. */
struct vy_mem_env {
struct lsregion allocator;
@@ -349,12 +351,6 @@ struct vy_mem_iterator {
* valid statement.
*/
const struct tuple *curr_stmt;
- /*
- * Copy of the statement returned from one of public methods
- * (restore/next_lsn/next_key). Need to store the copy, because can't
- * return region allocated curr_stmt.
- */
- struct tuple *last_stmt;
/* data version from vy_mem */
uint32_t version;
@@ -371,29 +367,23 @@ vy_mem_iterator_open(struct vy_mem_iterator *itr, struct vy_mem_iterator_stat *s
const struct tuple *key, const struct vy_read_view **rv);
/**
- * Advance a mem iterator to the newest statement for the next key.
- * The statement is returned in @ret (NULL if EOF).
- * Returns 0 on success, -1 on memory allocation error.
- */
-NODISCARD int
-vy_mem_iterator_next_key(struct vy_mem_iterator *itr, struct tuple **ret);
-
-/**
- * Advance a mem iterator to the older statement for the same key.
- * The statement is returned in @ret (NULL if EOF).
+ * Advance a mem iterator to the next key.
+ * The key history is returned in @history (empty if EOF).
* Returns 0 on success, -1 on memory allocation error.
*/
NODISCARD int
-vy_mem_iterator_next_lsn(struct vy_mem_iterator *itr, struct tuple **ret);
+vy_mem_iterator_next(struct vy_mem_iterator *itr,
+ struct vy_history *history);
/**
- * Advance a mem iterator to the newest statement for the first key
- * following @last_stmt. The statement is returned in @ret (NULL if EOF).
+ * Advance a mem iterator to the key following @last_stmt.
+ * The key history is returned in @history (empty if EOF).
* Returns 0 on success, -1 on memory allocation error.
*/
NODISCARD int
vy_mem_iterator_skip(struct vy_mem_iterator *itr,
- const struct tuple *last_stmt, struct tuple **ret);
+ const struct tuple *last_stmt,
+ struct vy_history *history);
/**
* Check if a mem iterator was invalidated and needs to be restored.
@@ -403,7 +393,8 @@ vy_mem_iterator_skip(struct vy_mem_iterator *itr,
*/
NODISCARD int
vy_mem_iterator_restore(struct vy_mem_iterator *itr,
- const struct tuple *last_stmt, struct tuple **ret);
+ const struct tuple *last_stmt,
+ struct vy_history *history);
/**
* Close a mem iterator.
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index b48a2332..91dc1cca 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -159,15 +159,12 @@ vy_point_lookup_scan_mems(struct vy_lsm *lsm, const struct vy_read_view **rv,
/**
* Scan one particular slice.
* Add found statements to the history list up to terminal statement.
- * Set *terminal_found to true if the terminal statement (DELETE or REPLACE)
- * was found.
*/
static int
vy_point_lookup_scan_slice(struct vy_lsm *lsm, struct vy_slice *slice,
const struct vy_read_view **rv, struct tuple *key,
- struct vy_history *history, bool *terminal_found)
+ struct vy_history *history)
{
- int rc = 0;
/*
* The format of the statement must be exactly the space
* format with the same identifier to fully match the
@@ -177,19 +174,10 @@ vy_point_lookup_scan_slice(struct vy_lsm *lsm, struct vy_slice *slice,
vy_run_iterator_open(&run_itr, &lsm->stat.disk.iterator, slice,
ITER_EQ, key, rv, lsm->cmp_def, lsm->key_def,
lsm->disk_format, lsm->index_id == 0);
- struct tuple *stmt;
- rc = vy_run_iterator_next_key(&run_itr, &stmt);
- while (rc == 0 && stmt != NULL) {
- if (vy_history_append_stmt(history, stmt) != 0) {
- rc = -1;
- break;
- }
- if (vy_stmt_type(stmt) != IPROTO_UPSERT) {
- *terminal_found = true;
- break;
- }
- rc = vy_run_iterator_next_lsn(&run_itr, &stmt);
- }
+ struct vy_history slice_history;
+ vy_history_create(&slice_history, &lsm->env->history_node_pool);
+ int rc = vy_run_iterator_next(&run_itr, &slice_history);
+ vy_history_splice(history, &slice_history);
vy_run_iterator_close(&run_itr);
return rc;
}
@@ -223,11 +211,10 @@ vy_point_lookup_scan_slices(struct vy_lsm *lsm, const struct vy_read_view **rv,
}
assert(i == slice_count);
int rc = 0;
- bool terminal_found = false;
for (i = 0; i < slice_count; i++) {
- if (rc == 0 && !terminal_found)
+ if (rc == 0 && !vy_history_is_terminal(history))
rc = vy_point_lookup_scan_slice(lsm, slices[i],
- rv, key, history, &terminal_found);
+ rv, key, history);
vy_slice_unpin(slices[i]);
}
return rc;
@@ -291,7 +278,7 @@ done:
if (rc == 0) {
int upserts_applied;
rc = vy_history_apply(&history, lsm->cmp_def, lsm->mem_format,
- &upserts_applied, ret);
+ false, &upserts_applied, ret);
lsm->stat.upsert.applied += upserts_applied;
}
vy_history_cleanup(&history);
diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c
index 0695eedb..b171aa60 100644
--- a/src/box/vy_read_iterator.c
+++ b/src/box/vy_read_iterator.c
@@ -34,7 +34,7 @@
#include "vy_cache.h"
#include "vy_tx.h"
#include "fiber.h"
-#include "vy_upsert.h"
+#include "vy_history.h"
#include "vy_lsm.h"
#include "vy_stat.h"
@@ -54,8 +54,8 @@ struct vy_read_src {
bool is_started;
/** See vy_read_iterator->front_id. */
uint32_t front_id;
- /** Statement the iterator is at. */
- struct tuple *stmt;
+ /** History of the key the iterator is positioned at. */
+ struct vy_history history;
};
/**
@@ -74,10 +74,13 @@ vy_read_iterator_reserve(struct vy_read_iterator *itr, uint32_t capacity)
"calloc", "new_src");
return -1;
}
- if (itr->src_count > 0) {
- memcpy(new_src, itr->src, itr->src_count * sizeof(*new_src));
- free(itr->src);
+ memcpy(new_src, itr->src, itr->src_count * sizeof(*new_src));
+ for (uint32_t i = 0; i < itr->src_count; i++) {
+ vy_history_create(&new_src[i].history,
+ &itr->lsm->env->history_node_pool);
+ vy_history_splice(&new_src[i].history, &itr->src[i].history);
}
+ free(itr->src);
itr->src = new_src;
itr->src_capacity = capacity;
return 0;
@@ -94,9 +97,9 @@ vy_read_iterator_add_src(struct vy_read_iterator *itr)
if (vy_read_iterator_reserve(itr, itr->src_count + 1) != 0)
return NULL;
}
- itr->src[itr->src_count].front_id = 0;
struct vy_read_src *src = &itr->src[itr->src_count++];
memset(src, 0, sizeof(*src));
+ vy_history_create(&src->history, &itr->lsm->env->history_node_pool);
return src;
}
@@ -221,14 +224,11 @@ vy_read_iterator_evaluate_src(struct vy_read_iterator *itr,
struct vy_read_src *src, bool *stop)
{
uint32_t src_id = src - itr->src;
- int cmp = vy_read_iterator_cmp_stmt(itr, src->stmt, itr->curr_stmt);
+ struct tuple *stmt = vy_history_last_stmt(&src->history);
+ int cmp = vy_read_iterator_cmp_stmt(itr, stmt, itr->curr_stmt);
if (cmp < 0) {
- assert(src->stmt != NULL);
- tuple_ref(src->stmt);
- if (itr->curr_stmt != NULL)
- tuple_unref(itr->curr_stmt);
- itr->curr_stmt = src->stmt;
- itr->curr_src = src_id;
+ assert(stmt != NULL);
+ itr->curr_stmt = stmt;
itr->front_id++;
}
if (cmp <= 0)
@@ -236,7 +236,8 @@ vy_read_iterator_evaluate_src(struct vy_read_iterator *itr,
itr->skipped_src = MAX(itr->skipped_src, src_id + 1);
- if (cmp < 0 && vy_read_iterator_is_exact_match(itr, src->stmt)) {
+ if (cmp < 0 && vy_history_is_terminal(&src->history) &&
+ vy_read_iterator_is_exact_match(itr, stmt)) {
itr->skipped_src = src_id + 1;
*stop = true;
}
@@ -254,14 +255,15 @@ vy_read_src_is_behind(struct vy_read_iterator *itr, struct vy_read_src *src)
return true;
if (src_id < itr->skipped_src)
return false;
- if (vy_read_iterator_cmp_stmt(itr, src->stmt, itr->last_stmt) > 0)
+ struct tuple *stmt = vy_history_last_stmt(&src->history);
+ if (vy_read_iterator_cmp_stmt(itr, stmt, itr->last_stmt) > 0)
return false;
return true;
}
/*
* Each of the functions from the vy_read_iterator_scan_* family
- * is used by vy_read_iterator_next_key() to:
+ * is used by vy_read_iterator_advance() to:
*
* 1. Update the position of a read source, which implies:
*
@@ -283,31 +285,36 @@ vy_read_src_is_behind(struct vy_read_iterator *itr, struct vy_read_src *src)
* See also vy_read_iterator_evaluate_src().
*/
-static void
+static NODISCARD int
vy_read_iterator_scan_txw(struct vy_read_iterator *itr, bool *stop)
{
struct vy_read_src *src = &itr->src[itr->txw_src];
struct vy_txw_iterator *src_itr = &src->txw_iterator;
if (itr->tx == NULL)
- return;
+ return 0;
assert(itr->txw_src < itr->skipped_src);
- int rc = vy_txw_iterator_restore(src_itr, itr->last_stmt, &src->stmt);
+ int rc = vy_txw_iterator_restore(src_itr, itr->last_stmt,
+ &src->history);
if (rc == 0) {
if (!src->is_started) {
- vy_txw_iterator_skip(src_itr, itr->last_stmt,
- &src->stmt);
+ rc = vy_txw_iterator_skip(src_itr, itr->last_stmt,
+ &src->history);
} else if (src->front_id == itr->prev_front_id) {
- vy_txw_iterator_next(src_itr, &src->stmt);
+ rc = vy_txw_iterator_next(src_itr, &src->history);
}
src->is_started = true;
}
+ if (rc < 0)
+ return -1;
+
vy_read_iterator_evaluate_src(itr, src, stop);
+ return 0;
}
-static void
+static NODISCARD int
vy_read_iterator_scan_cache(struct vy_read_iterator *itr, bool *stop)
{
bool is_interval = false;
@@ -315,23 +322,26 @@ vy_read_iterator_scan_cache(struct vy_read_iterator *itr, bool *stop)
struct vy_cache_iterator *src_itr = &src->cache_iterator;
int rc = vy_cache_iterator_restore(src_itr, itr->last_stmt,
- &src->stmt, &is_interval);
+ &src->history, &is_interval);
if (rc == 0) {
if (vy_read_src_is_behind(itr, src)) {
- vy_cache_iterator_skip(src_itr, itr->last_stmt,
- &src->stmt, &is_interval);
+ rc = vy_cache_iterator_skip(src_itr, itr->last_stmt,
+ &src->history, &is_interval);
} else if (src->front_id == itr->prev_front_id) {
- vy_cache_iterator_next(src_itr, &src->stmt,
- &is_interval);
+ rc = vy_cache_iterator_next(src_itr, &src->history,
+ &is_interval);
}
src->is_started = true;
}
- vy_read_iterator_evaluate_src(itr, src, stop);
+ if (rc < 0)
+ return -1;
+ vy_read_iterator_evaluate_src(itr, src, stop);
if (is_interval) {
itr->skipped_src = itr->cache_src + 1;
*stop = true;
}
+ return 0;
}
static NODISCARD int
@@ -344,13 +354,13 @@ vy_read_iterator_scan_mem(struct vy_read_iterator *itr,
assert(mem_src >= itr->mem_src && mem_src < itr->disk_src);
- rc = vy_mem_iterator_restore(src_itr, itr->last_stmt, &src->stmt);
+ rc = vy_mem_iterator_restore(src_itr, itr->last_stmt, &src->history);
if (rc == 0) {
if (vy_read_src_is_behind(itr, src)) {
rc = vy_mem_iterator_skip(src_itr, itr->last_stmt,
- &src->stmt);
+ &src->history);
} else if (src->front_id == itr->prev_front_id) {
- rc = vy_mem_iterator_next_key(src_itr, &src->stmt);
+ rc = vy_mem_iterator_next(src_itr, &src->history);
}
src->is_started = true;
}
@@ -372,9 +382,10 @@ vy_read_iterator_scan_disk(struct vy_read_iterator *itr,
assert(disk_src >= itr->disk_src && disk_src < itr->src_count);
if (vy_read_src_is_behind(itr, src))
- rc = vy_run_iterator_skip(src_itr, itr->last_stmt, &src->stmt);
+ rc = vy_run_iterator_skip(src_itr, itr->last_stmt,
+ &src->history);
else if (src->front_id == itr->prev_front_id)
- rc = vy_run_iterator_next_key(src_itr, &src->stmt);
+ rc = vy_run_iterator_next(src_itr, &src->history);
src->is_started = true;
if (rc < 0)
@@ -397,13 +408,14 @@ vy_read_iterator_restore_mem(struct vy_read_iterator *itr)
struct vy_read_src *src = &itr->src[itr->mem_src];
rc = vy_mem_iterator_restore(&src->mem_iterator,
- itr->last_stmt, &src->stmt);
+ itr->last_stmt, &src->history);
if (rc < 0)
return -1; /* memory allocation error */
if (rc == 0)
return 0; /* nothing changed */
- cmp = vy_read_iterator_cmp_stmt(itr, src->stmt, itr->curr_stmt);
+ struct tuple *stmt = vy_history_last_stmt(&src->history);
+ cmp = vy_read_iterator_cmp_stmt(itr, stmt, itr->curr_stmt);
if (cmp > 0) {
/*
* Memory trees are append-only so if the
@@ -413,26 +425,21 @@ vy_read_iterator_restore_mem(struct vy_read_iterator *itr)
assert(src->front_id < itr->front_id);
return 0;
}
- if (cmp < 0 || itr->curr_src != itr->txw_src) {
+ if (cmp < 0) {
/*
* The new statement precedes the current
- * candidate for the next key or it is a
- * newer version of the same key.
+ * candidate for the next key.
*/
- tuple_ref(src->stmt);
- if (itr->curr_stmt != NULL)
- tuple_unref(itr->curr_stmt);
- itr->curr_stmt = src->stmt;
- itr->curr_src = itr->mem_src;
+ itr->curr_stmt = stmt;
+ itr->front_id++;
} else {
/*
+ * The new statement updates the next key.
* Make sure we don't read the old value
* from the cache while applying UPSERTs.
*/
itr->src[itr->cache_src].front_id = 0;
}
- if (cmp < 0)
- itr->front_id++;
src->front_id = itr->front_id;
return 0;
}
@@ -444,12 +451,11 @@ static void
vy_read_iterator_next_range(struct vy_read_iterator *itr);
/**
- * Iterate to the next key
- * @retval 0 success or EOF (*ret == NULL)
- * @retval -1 read error
+ * Advance the iterator to the next key.
+ * Returns 0 on success, -1 on error.
*/
static NODISCARD int
-vy_read_iterator_next_key(struct vy_read_iterator *itr, struct tuple **ret)
+vy_read_iterator_advance(struct vy_read_iterator *itr)
{
if (itr->last_stmt != NULL && (itr->iterator_type == ITER_EQ ||
itr->iterator_type == ITER_REQ) &&
@@ -458,7 +464,7 @@ vy_read_iterator_next_key(struct vy_read_iterator *itr, struct tuple **ret)
* There may be one statement at max satisfying
* EQ with a full key.
*/
- *ret = NULL;
+ itr->curr_stmt = NULL;
return 0;
}
/*
@@ -472,10 +478,7 @@ vy_read_iterator_next_key(struct vy_read_iterator *itr, struct tuple **ret)
vy_read_iterator_restore(itr);
}
restart:
- if (itr->curr_stmt != NULL)
- tuple_unref(itr->curr_stmt);
itr->curr_stmt = NULL;
- itr->curr_src = UINT32_MAX;
itr->prev_front_id = itr->front_id;
/*
@@ -483,10 +486,12 @@ restart:
* from the one that stores newest data.
*/
bool stop = false;
- vy_read_iterator_scan_txw(itr, &stop);
+ if (vy_read_iterator_scan_txw(itr, &stop) != 0)
+ return -1;
if (stop)
goto done;
- vy_read_iterator_scan_cache(itr, &stop);
+ if (vy_read_iterator_scan_cache(itr, &stop) != 0)
+ return -1;
if (stop)
goto done;
@@ -562,138 +567,6 @@ done:
vy_stmt_compare(itr->curr_stmt, itr->key,
itr->lsm->cmp_def) != 0)
itr->curr_stmt = NULL;
-
- *ret = itr->curr_stmt;
- return 0;
-}
-
-/**
- * Iterate to the next (elder) version of the same key
- * @retval 0 success or EOF (*ret == NULL)
- * @retval -1 read error
- */
-static NODISCARD int
-vy_read_iterator_next_lsn(struct vy_read_iterator *itr, struct tuple **ret)
-{
- uint32_t i;
- bool unused;
- struct vy_read_src *src;
-
- assert(itr->curr_stmt != NULL);
- assert(itr->curr_src < itr->skipped_src);
-
- /* Cache stores only terminal statements. */
- assert(itr->curr_src != itr->cache_src);
-
- if (itr->curr_src == itr->txw_src) {
- /*
- * Write set does not store statement history.
- * Look up the older statement in the cache and
- * if it isn't there proceed to mems and runs.
- */
- src = &itr->src[itr->cache_src];
- if (itr->cache_src >= itr->skipped_src)
- vy_read_iterator_scan_cache(itr, &unused);
- if (src->front_id == itr->front_id)
- goto found;
- }
-
- /* Look up the older statement in in-memory trees. */
- for (i = MAX(itr->curr_src, itr->mem_src); i < itr->disk_src; i++) {
- src = &itr->src[i];
- if (i >= itr->skipped_src &&
- vy_read_iterator_scan_mem(itr, i, &unused) != 0)
- return -1;
- if (src->front_id != itr->front_id)
- continue;
- if (i == itr->curr_src &&
- vy_mem_iterator_next_lsn(&src->mem_iterator,
- &src->stmt) != 0)
- return -1;
- if (src->stmt != NULL)
- goto found;
- }
-
- /*
- * Look up the older statement in on-disk runs.
- *
- * Note, we don't need to check the LSM tree version after the yield
- * caused by the disk read, because once we've come to this point,
- * we won't read any source except run slices, which are pinned
- * and hence cannot be removed during the yield.
- */
- vy_read_iterator_pin_slices(itr);
- for (i = MAX(itr->curr_src, itr->disk_src); i < itr->src_count; i++) {
- src = &itr->src[i];
- if (i >= itr->skipped_src &&
- vy_read_iterator_scan_disk(itr, i, &unused) != 0)
- goto err_disk;
- if (src->front_id != itr->front_id)
- continue;
- if (i == itr->curr_src &&
- vy_run_iterator_next_lsn(&src->run_iterator,
- &src->stmt) != 0)
- goto err_disk;
- if (src->stmt != NULL)
- break;
- }
- vy_read_iterator_unpin_slices(itr);
-
- if (i < itr->src_count)
- goto found;
-
- /* Searched everywhere, found nothing. */
- *ret = NULL;
- return 0;
-found:
- tuple_ref(src->stmt);
- if (itr->curr_stmt != NULL)
- tuple_unref(itr->curr_stmt);
- itr->curr_stmt = src->stmt;
- itr->curr_src = src - itr->src;
- *ret = itr->curr_stmt;
- return 0;
-
-err_disk:
- vy_read_iterator_unpin_slices(itr);
- return -1;
-}
-
-/**
- * Squash in a single REPLACE all UPSERTs for the current key.
- *
- * @retval 0 success
- * @retval -1 error
- */
-static NODISCARD int
-vy_read_iterator_squash_upsert(struct vy_read_iterator *itr,
- struct tuple **ret)
-{
- *ret = NULL;
- struct vy_lsm *lsm = itr->lsm;
- struct tuple *t = itr->curr_stmt;
-
- /* Upserts enabled only in the primary index LSM tree. */
- assert(vy_stmt_type(t) != IPROTO_UPSERT || lsm->index_id == 0);
- tuple_ref(t);
- while (vy_stmt_type(t) == IPROTO_UPSERT) {
- struct tuple *next;
- int rc = vy_read_iterator_next_lsn(itr, &next);
- if (rc != 0) {
- tuple_unref(t);
- return rc;
- }
- struct tuple *applied = vy_apply_upsert(t, next,
- lsm->cmp_def, lsm->mem_format, true);
- lsm->stat.upsert.applied++;
- tuple_unref(t);
- if (applied == NULL)
- return -1;
- t = applied;
- if (next == NULL)
- break;
- }
- *ret = t;
return 0;
}
@@ -792,25 +665,26 @@ vy_read_iterator_cleanup(struct vy_read_iterator *itr)
if (itr->txw_src < itr->src_count) {
src = &itr->src[itr->txw_src];
+ vy_history_cleanup(&src->history);
vy_txw_iterator_close(&src->txw_iterator);
}
if (itr->cache_src < itr->src_count) {
src = &itr->src[itr->cache_src];
+ vy_history_cleanup(&src->history);
vy_cache_iterator_close(&src->cache_iterator);
}
for (i = itr->mem_src; i < itr->disk_src; i++) {
src = &itr->src[i];
+ vy_history_cleanup(&src->history);
vy_mem_iterator_close(&src->mem_iterator);
}
for (i = itr->disk_src; i < itr->src_count; i++) {
src = &itr->src[i];
+ vy_history_cleanup(&src->history);
vy_run_iterator_close(&src->run_iterator);
}
- if (itr->curr_stmt != NULL)
- tuple_unref(itr->curr_stmt);
itr->curr_stmt = NULL;
- itr->curr_src = UINT32_MAX;
itr->txw_src = UINT32_MAX;
itr->cache_src = UINT32_MAX;
itr->mem_src = UINT32_MAX;
@@ -937,6 +811,41 @@ vy_read_iterator_next_range(struct vy_read_iterator *itr)
}
/**
+ * Get a resultant statement for the current key.
+ * Returns 0 on success, -1 on error.
+ */
+static NODISCARD int
+vy_read_iterator_apply_history(struct vy_read_iterator *itr,
+ struct tuple **ret)
+{
+ if (itr->curr_stmt == NULL) {
+ *ret = NULL;
+ return 0;
+ }
+
+ struct vy_lsm *lsm = itr->lsm;
+ struct vy_history history;
+ vy_history_create(&history, &lsm->env->history_node_pool);
+
+ for (uint32_t i = 0; i < itr->src_count; i++) {
+ struct vy_read_src *src = &itr->src[i];
+ if (src->front_id == itr->front_id) {
+ vy_history_splice(&history, &src->history);
+ if (vy_history_is_terminal(&history))
+ break;
+ }
+ }
+
+ int upserts_applied = 0;
+ int rc = vy_history_apply(&history, lsm->cmp_def, lsm->mem_format,
+ true, &upserts_applied, ret);
+
+ lsm->stat.upsert.applied += upserts_applied;
+ vy_history_cleanup(&history);
+ return rc;
+}
+
+/**
* Track a read in the conflict manager.
*/
static int
@@ -981,18 +890,11 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result)
else /* first iteration */
lsm->stat.lookup++;
next_key:
- if (vy_read_iterator_next_key(itr, &stmt) != 0)
+ if (vy_read_iterator_advance(itr) != 0)
goto err;
-
- /*
- * Fetching an older statement of the current key may yield
- * so we must track the read before applying UPSERTs.
- */
- if (vy_read_iterator_track_read(itr, stmt) != 0)
+ if (vy_read_iterator_apply_history(itr, &stmt) != 0)
goto err;
-
- if (stmt != NULL &&
- vy_read_iterator_squash_upsert(itr, &stmt) != 0)
+ if (vy_read_iterator_track_read(itr, stmt) != 0)
goto err;
if (itr->last_stmt != NULL)
diff --git a/src/box/vy_read_iterator.h b/src/box/vy_read_iterator.h
index 6aa84540..07c6ef8f 100644
--- a/src/box/vy_read_iterator.h
+++ b/src/box/vy_read_iterator.h
@@ -92,8 +92,6 @@ struct vy_read_iterator {
uint32_t src_count;
/** Maximal capacity of the src array. */
uint32_t src_capacity;
- /** Offset of the current merge source. */
- uint32_t curr_src;
/** Statement returned by the current merge source. */
struct tuple *curr_stmt;
/** Offset of the transaction write set source. */
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index fa5670d3..587cb002 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -44,6 +44,7 @@
#include "tuple_compare.h"
#include "xlog.h"
#include "xrow.h"
+#include "vy_history.h"
static const uint64_t vy_page_info_key_map = (1 << VY_PAGE_INFO_OFFSET) |
(1 << VY_PAGE_INFO_SIZE) |
@@ -1401,7 +1402,12 @@ vy_run_iterator_open(struct vy_run_iterator *itr,
itr->search_ended = false;
}
-NODISCARD int
+/**
+ * Advance a run iterator to the newest statement for the next key.
+ * The statement is returned in @ret (NULL if EOF).
+ * Returns 0 on success, -1 on memory allocation or IO error.
+ */
+static NODISCARD int
vy_run_iterator_next_key(struct vy_run_iterator *itr, struct tuple **ret)
{
*ret = NULL;
@@ -1441,7 +1447,12 @@ vy_run_iterator_next_key(struct vy_run_iterator *itr, struct tuple **ret)
return vy_run_iterator_find_lsn(itr, itr->iterator_type, itr->key, ret);
}
-NODISCARD int
+/**
+ * Advance a run iterator to the newest statement for the first key
+ * following @last_stmt. The statement is returned in @ret (NULL if EOF).
+ * Returns 0 on success, -1 on memory allocation or IO error.
+ */
+static NODISCARD int
vy_run_iterator_next_lsn(struct vy_run_iterator *itr, struct tuple **ret)
{
*ret = NULL;
@@ -1478,10 +1489,30 @@ vy_run_iterator_next_lsn(struct vy_run_iterator *itr, struct tuple **ret)
}
NODISCARD int
+vy_run_iterator_next(struct vy_run_iterator *itr,
+ struct vy_history *history)
+{
+ vy_history_cleanup(history);
+ struct tuple *stmt;
+ if (vy_run_iterator_next_key(itr, &stmt) != 0)
+ return -1;
+ while (stmt != NULL) {
+ if (vy_history_append_stmt(history, stmt) != 0)
+ return -1;
+ if (vy_history_is_terminal(history))
+ break;
+ if (vy_run_iterator_next_lsn(itr, &stmt) != 0)
+ return -1;
+ }
+ return 0;
+}
+
+NODISCARD int
vy_run_iterator_skip(struct vy_run_iterator *itr,
- const struct tuple *last_stmt, struct tuple **ret)
+ const struct tuple *last_stmt,
+ struct vy_history *history)
{
- *ret = NULL;
+ vy_history_cleanup(history);
if (itr->search_ended)
return 0;
@@ -1494,14 +1525,24 @@ vy_run_iterator_skip(struct vy_run_iterator *itr,
}
itr->search_started = true;
- if (vy_run_iterator_seek(itr, iterator_type, key, ret) != 0)
+ struct tuple *stmt;
+ if (vy_run_iterator_seek(itr, iterator_type, key, &stmt) != 0)
return -1;
if (itr->iterator_type == ITER_EQ && last_stmt != NULL &&
- *ret != NULL && vy_stmt_compare(itr->key, *ret,
+ stmt != NULL && vy_stmt_compare(itr->key, stmt,
itr->cmp_def) != 0) {
vy_run_iterator_stop(itr);
- *ret = NULL;
+ return 0;
+ }
+
+ while (stmt != NULL) {
+ if (vy_history_append_stmt(history, stmt) != 0)
+ return -1;
+ if (vy_history_is_terminal(history))
+ break;
+ if (vy_run_iterator_next_lsn(itr, &stmt) != 0)
+ return -1;
}
return 0;
}
diff --git a/src/box/vy_run.h b/src/box/vy_run.h
index 85bb66a7..6551191b 100644
--- a/src/box/vy_run.h
+++ b/src/box/vy_run.h
@@ -49,6 +49,7 @@
extern "C" {
#endif /* defined(__cplusplus) */
+struct vy_history;
struct vy_run_reader;
/** Part of vinyl environment for run read/write */
@@ -496,29 +497,23 @@ vy_run_iterator_open(struct vy_run_iterator *itr,
struct tuple_format *format, bool is_primary);
/**
- * Advance a run iterator to the newest statement for the next key.
- * The statement is returned in @ret (NULL if EOF).
+ * Advance a run iterator to the next key.
+ * The key history is returned in @history (empty if EOF).
* Returns 0 on success, -1 on memory allocation or IO error.
*/
NODISCARD int
-vy_run_iterator_next_key(struct vy_run_iterator *itr, struct tuple **ret);
+vy_run_iterator_next(struct vy_run_iterator *itr,
+ struct vy_history *history);
/**
- * Advance a run iterator to the older statement for the same key.
- * The statement is returned in @ret (NULL if EOF).
- * Returns 0 on success, -1 on memory allocation or IO error.
- */
-NODISCARD int
-vy_run_iterator_next_lsn(struct vy_run_iterator *itr, struct tuple **ret);
-
-/**
- * Advance a run iterator to the newest statement for the first key
- * following @last_stmt. The statement is returned in @ret (NULL if EOF).
+ * Advance a run iterator to the key following @last_stmt.
+ * The key history is returned in @history (empty if EOF).
* Returns 0 on success, -1 on memory allocation or IO error.
*/
NODISCARD int
vy_run_iterator_skip(struct vy_run_iterator *itr,
- const struct tuple *last_stmt, struct tuple **ret);
+ const struct tuple *last_stmt,
+ struct vy_history *history);
/**
* Close a run iterator.
diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c
index 4812b794..04792ce7 100644
--- a/src/box/vy_tx.c
+++ b/src/box/vy_tx.c
@@ -55,6 +55,7 @@
#include "vy_stat.h"
#include "vy_stmt.h"
#include "vy_upsert.h"
+#include "vy_history.h"
#include "vy_read_set.h"
#include "vy_read_view.h"
@@ -942,10 +943,11 @@ vy_txw_iterator_seek(struct vy_txw_iterator *itr,
itr->curr_txv = txv;
}
-void
-vy_txw_iterator_next(struct vy_txw_iterator *itr, struct tuple **ret)
+NODISCARD int
+vy_txw_iterator_next(struct vy_txw_iterator *itr,
+ struct vy_history *history)
{
- *ret = NULL;
+ vy_history_cleanup(history);
if (!itr->search_started) {
itr->search_started = true;
vy_txw_iterator_seek(itr, itr->iterator_type, itr->key);
@@ -953,7 +955,7 @@ vy_txw_iterator_next(struct vy_txw_iterator *itr, struct tuple **ret)
}
assert(itr->version == itr->tx->write_set_version);
if (itr->curr_txv == NULL)
- return;
+ return 0;
if (itr->iterator_type == ITER_LE || itr->iterator_type == ITER_LT)
itr->curr_txv = write_set_prev(&itr->tx->write_set, itr->curr_txv);
else
@@ -966,19 +968,23 @@ vy_txw_iterator_next(struct vy_txw_iterator *itr, struct tuple **ret)
itr->curr_txv = NULL;
out:
if (itr->curr_txv != NULL) {
- *ret = itr->curr_txv->stmt;
- vy_stmt_counter_acct_tuple(&itr->stat->get, *ret);
+ vy_stmt_counter_acct_tuple(&itr->stat->get,
+ itr->curr_txv->stmt);
+ return vy_history_append_stmt(history, itr->curr_txv->stmt);
}
+ return 0;
}
-void
+NODISCARD int
vy_txw_iterator_skip(struct vy_txw_iterator *itr,
- const struct tuple *last_stmt, struct tuple **ret)
+ const struct tuple *last_stmt,
+ struct vy_history *history)
{
- *ret = NULL;
assert(!itr->search_started ||
itr->version == itr->tx->write_set_version);
+ vy_history_cleanup(history);
+
const struct tuple *key = itr->key;
enum iterator_type iterator_type = itr->iterator_type;
if (last_stmt != NULL) {
@@ -996,14 +1002,17 @@ vy_txw_iterator_skip(struct vy_txw_iterator *itr,
itr->curr_txv = NULL;
if (itr->curr_txv != NULL) {
- *ret = itr->curr_txv->stmt;
- vy_stmt_counter_acct_tuple(&itr->stat->get, *ret);
+ vy_stmt_counter_acct_tuple(&itr->stat->get,
+ itr->curr_txv->stmt);
+ return vy_history_append_stmt(history, itr->curr_txv->stmt);
}
+ return 0;
}
-int
+NODISCARD int
vy_txw_iterator_restore(struct vy_txw_iterator *itr,
- const struct tuple *last_stmt, struct tuple **ret)
+ const struct tuple *last_stmt,
+ struct vy_history *history)
{
if (!itr->search_started || itr->version == itr->tx->write_set_version)
return 0;
@@ -1027,10 +1036,12 @@ vy_txw_iterator_restore(struct vy_txw_iterator *itr,
if (prev_txv == itr->curr_txv)
return 0;
- *ret = NULL;
+ vy_history_cleanup(history);
if (itr->curr_txv != NULL) {
- *ret = itr->curr_txv->stmt;
- vy_stmt_counter_acct_tuple(&itr->stat->get, *ret);
+ vy_stmt_counter_acct_tuple(&itr->stat->get,
+ itr->curr_txv->stmt);
+ if (vy_history_append_stmt(history, itr->curr_txv->stmt) != 0)
+ return -1;
}
return 1;
}
diff --git a/src/box/vy_tx.h b/src/box/vy_tx.h
index a59145be..9c35e2bd 100644
--- a/src/box/vy_tx.h
+++ b/src/box/vy_tx.h
@@ -55,6 +55,7 @@ struct tuple;
struct tx_manager;
struct vy_mem;
struct vy_tx;
+struct vy_history;
/** Transaction state. */
enum tx_state {
@@ -403,28 +404,34 @@ vy_txw_iterator_open(struct vy_txw_iterator *itr,
const struct tuple *key);
/**
- * Advance a txw iterator to the next statement.
- * The next statement is returned in @ret (NULL if EOF).
+ * Advance a txw iterator to the next key.
+ * The key history is returned in @history (empty if EOF).
+ * Returns 0 on success, -1 on memory allocation error.
*/
-void
-vy_txw_iterator_next(struct vy_txw_iterator *itr, struct tuple **ret);
+NODISCARD int
+vy_txw_iterator_next(struct vy_txw_iterator *itr,
+ struct vy_history *history);
/**
- * Advance a txw iterator to the statement following @last_stmt.
- * The statement is returned in @ret (NULL if EOF).
+ * Advance a txw iterator to the key following @last_stmt.
+ * The key history is returned in @history (empty if EOF).
+ * Returns 0 on success, -1 on memory allocation error.
*/
-void
+NODISCARD int
vy_txw_iterator_skip(struct vy_txw_iterator *itr,
- const struct tuple *last_stmt, struct tuple **ret);
+ const struct tuple *last_stmt,
+ struct vy_history *history);
/**
* Check if a txw iterator was invalidated and needs to be restored.
- * If it does, set the iterator position to the statement following
- * @last_stmt and return 1, otherwise return 0.
+ * If it does, set the iterator position to the first key following
+ * @last_stmt and return 1, otherwise return 0. Returns -1 on memory
+ * allocation error.
*/
int
vy_txw_iterator_restore(struct vy_txw_iterator *itr,
- const struct tuple *last_stmt, struct tuple **ret);
+ const struct tuple *last_stmt,
+ struct vy_history *history);
/**
* Close a txw iterator.
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index db270de6..9fc645a1 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -140,6 +140,8 @@ target_link_libraries(say.test core unit)
set(ITERATOR_TEST_SOURCES
vy_iterators_helper.c
${PROJECT_SOURCE_DIR}/src/box/vy_stmt.c
+ ${PROJECT_SOURCE_DIR}/src/box/vy_upsert.c
+ ${PROJECT_SOURCE_DIR}/src/box/vy_history.c
${PROJECT_SOURCE_DIR}/src/box/vy_mem.c
${PROJECT_SOURCE_DIR}/src/box/vy_cache.c)
set(ITERATOR_TEST_LIBS core tuple xrow unit)
diff --git a/test/unit/vy_cache.c b/test/unit/vy_cache.c
index 6b543d85..5d296aa6 100644
--- a/test/unit/vy_cache.c
+++ b/test/unit/vy_cache.c
@@ -1,5 +1,7 @@
#include "trivia/util.h"
#include "vy_iterators_helper.h"
+#include "vy_history.h"
+#include "fiber.h"
#include "unit.h"
const struct vy_stmt_template key_template = STMT_TEMPLATE(0, SELECT, vyend);
@@ -19,6 +21,10 @@ test_basic()
struct tuple *select_all = vy_new_simple_stmt(format, NULL,
&key_template);
+ struct mempool history_node_pool;
+ mempool_create(&history_node_pool, cord_slab_cache(),
+ sizeof(struct vy_history_node));
+
/*
* Fill the cache with 3 chains.
*/
@@ -85,8 +91,11 @@ test_basic()
/* Start iterator and make several steps. */
struct tuple *ret;
bool unused;
+ struct vy_history history;
+ vy_history_create(&history, &history_node_pool);
for (int i = 0; i < 4; ++i)
- vy_cache_iterator_next(&itr, &ret, &unused);
+ vy_cache_iterator_next(&itr, &history, &unused);
+ ret = vy_history_last_stmt(&history);
ok(vy_stmt_are_same(ret, &chain1[3], format, NULL),
"next_key * 4");
@@ -107,14 +116,17 @@ test_basic()
* must be chain1[1].
*/
struct tuple *last_stmt = vy_new_simple_stmt(format, NULL, &chain1[0]);
- ok(vy_cache_iterator_restore(&itr, last_stmt, &ret, &unused) >= 0,
+ ok(vy_cache_iterator_restore(&itr, last_stmt, &history, &unused) >= 0,
"restore");
+ ret = vy_history_last_stmt(&history);
ok(vy_stmt_are_same(ret, &chain1[1], format, NULL),
"restore on position after last");
tuple_unref(last_stmt);
+ vy_history_cleanup(&history);
vy_cache_iterator_close(&itr);
+ mempool_destroy(&history_node_pool);
tuple_unref(select_all);
destroy_test_cache(&cache, key_def, format);
check_plan();
diff --git a/test/unit/vy_mem.c b/test/unit/vy_mem.c
index 60fcf084..6641dc0c 100644
--- a/test/unit/vy_mem.c
+++ b/test/unit/vy_mem.c
@@ -1,6 +1,7 @@
#include <trivia/config.h>
#include "memory.h"
#include "fiber.h"
+#include "vy_history.h"
#include "vy_iterators_helper.h"
static void
@@ -91,6 +92,10 @@ test_iterator_restore_after_insertion()
struct tuple *select_key = vy_stmt_new_select(format, "", 0);
+ struct mempool history_node_pool;
+ mempool_create(&history_node_pool, cord_slab_cache(),
+ sizeof(struct vy_history_node));
+
uint64_t restore_on_value = 20;
uint64_t restore_on_value_reverse = 60;
char data[16];
@@ -189,7 +194,10 @@ test_iterator_restore_after_insertion()
direct ? ITER_GE : ITER_LE, select_key,
&prv);
struct tuple *t;
- int rc = vy_mem_iterator_next_key(&itr, &t);
+ struct vy_history history;
+ vy_history_create(&history, &history_node_pool);
+ int rc = vy_mem_iterator_next(&itr, &history);
+ t = vy_history_last_stmt(&history);
assert(rc == 0);
size_t j = 0;
while (t != NULL) {
@@ -209,7 +217,8 @@ test_iterator_restore_after_insertion()
break;
else if(!direct && val <= middle_value)
break;
- int rc = vy_mem_iterator_next_key(&itr, &t);
+ int rc = vy_mem_iterator_next(&itr, &history);
+ t = vy_history_last_stmt(&history);
assert(rc == 0);
}
if (t == NULL && j != expected_count)
@@ -261,9 +270,10 @@ test_iterator_restore_after_insertion()
}
if (direct)
- rc = vy_mem_iterator_restore(&itr, restore_on_key, &t);
+ rc = vy_mem_iterator_restore(&itr, restore_on_key, &history);
else
- rc = vy_mem_iterator_restore(&itr, restore_on_key_reverse, &t);
+ rc = vy_mem_iterator_restore(&itr, restore_on_key_reverse, &history);
+ t = vy_history_last_stmt(&history);
j = 0;
while (t != NULL) {
@@ -279,7 +289,8 @@ test_iterator_restore_after_insertion()
break;
}
j++;
- int rc = vy_mem_iterator_next_key(&itr, &t);
+ int rc = vy_mem_iterator_next(&itr, &history);
+ t = vy_history_last_stmt(&history);
assert(rc == 0);
}
if (j != expected_count)
@@ -289,6 +300,7 @@ test_iterator_restore_after_insertion()
break;
}
+ vy_history_cleanup(&history);
vy_mem_delete(mem);
lsregion_gc(&lsregion, 2);
}
@@ -296,6 +308,8 @@ test_iterator_restore_after_insertion()
ok(!wrong_output, "check wrong_output %d", i_fail);
/* Clean up */
+ mempool_destroy(&history_node_pool);
+
tuple_unref(select_key);
tuple_unref(restore_on_key);
tuple_unref(restore_on_key_reverse);
diff --git a/test/vinyl/upsert.result b/test/vinyl/upsert.result
index cd6070a6..76728024 100644
--- a/test/vinyl/upsert.result
+++ b/test/vinyl/upsert.result
@@ -684,15 +684,15 @@ box.snapshot()
---
- ok
...
-s:upsert({1, 1}, {{'+', 1, 1}})
+s:upsert({1, 1}, {{'+', 1, 1}}) -- ignored due to primary key changed
---
...
-s:upsert({1, 1}, {{'+', 2, 1}})
+s:upsert({1, 1}, {{'+', 2, 1}}) -- applied to the previous statement
---
...
-s:select() --both upserts are ignored due to primary key change
+s:select()
---
-- - [1, 1]
+- - [1, 2]
...
--
-- gh-2520 use cache as a hint when applying upserts.
diff --git a/test/vinyl/upsert.test.lua b/test/vinyl/upsert.test.lua
index 50315ed3..a16d2cf2 100644
--- a/test/vinyl/upsert.test.lua
+++ b/test/vinyl/upsert.test.lua
@@ -276,9 +276,9 @@ i = s:create_index('test', { run_count_per_level = 20 })
s:replace({1, 1})
box.snapshot()
-s:upsert({1, 1}, {{'+', 1, 1}})
-s:upsert({1, 1}, {{'+', 2, 1}})
-s:select() --both upserts are ignored due to primary key change
+s:upsert({1, 1}, {{'+', 1, 1}}) -- ignored due to primary key changed
+s:upsert({1, 1}, {{'+', 2, 1}}) -- applied to the previous statement
+s:select()
--
-- gh-2520 use cache as a hint when applying upserts.
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 12/12] vinyl: zap vy_read_iterator::curr_stmt
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
` (10 preceding siblings ...)
2018-04-15 19:55 ` [PATCH 11/12] vinyl: make read iterator always return newest tuple version Vladimir Davydov
@ 2018-04-15 19:55 ` Vladimir Davydov
2018-05-04 18:05 ` [tarantool-patches] Re: [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladislav Shpilevoy
12 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-04-15 19:55 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
It isn't really a part of the iterator state - it is only used while
looking up the next key. The name is confusing, too. It isn't the
current statement - it's the current candidate for the next key.
Let's remove it from vy_read_iterator struct and pass it explictily
in arguments, and call it next_key.
---
src/box/vy_read_iterator.c | 96 +++++++++++++++++++++++-----------------------
src/box/vy_read_iterator.h | 7 ++--
2 files changed, 50 insertions(+), 53 deletions(-)
diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c
index b171aa60..61c3b683 100644
--- a/src/box/vy_read_iterator.c
+++ b/src/box/vy_read_iterator.c
@@ -131,8 +131,8 @@ vy_read_iterator_unpin_slices(struct vy_read_iterator *itr)
}
/**
- * Return true if the current statement is outside the current
- * range and hence we should move to the next range.
+ * Return true if the current candidate for the next key is outside
+ * the current range and hence we should move to the next range.
*
* If we are looking for a match (EQ, REQ) and the search key
* doesn't intersect with the current range's boundary, the next
@@ -140,23 +140,23 @@ vy_read_iterator_unpin_slices(struct vy_read_iterator *itr)
* and hence there's no point in iterating to it.
*/
static bool
-vy_read_iterator_range_is_done(struct vy_read_iterator *itr)
+vy_read_iterator_range_is_done(struct vy_read_iterator *itr,
+ struct tuple *next_key)
{
- struct tuple *stmt = itr->curr_stmt;
struct vy_range *range = itr->curr_range;
struct key_def *cmp_def = itr->lsm->cmp_def;
int dir = iterator_direction(itr->iterator_type);
if (dir > 0 && range->end != NULL &&
- (stmt == NULL || vy_tuple_compare_with_key(stmt,
- range->end, cmp_def) >= 0) &&
+ (next_key == NULL || vy_tuple_compare_with_key(next_key,
+ range->end, cmp_def) >= 0) &&
(itr->iterator_type != ITER_EQ ||
vy_stmt_compare_with_key(itr->key, range->end, cmp_def) >= 0))
return true;
if (dir < 0 && range->begin != NULL &&
- (stmt == NULL || vy_tuple_compare_with_key(stmt,
- range->begin, cmp_def) < 0) &&
+ (next_key == NULL || vy_tuple_compare_with_key(next_key,
+ range->begin, cmp_def) < 0) &&
(itr->iterator_type != ITER_REQ ||
vy_stmt_compare_with_key(itr->key, range->begin, cmp_def) <= 0))
return true;
@@ -215,20 +215,21 @@ vy_read_iterator_is_exact_match(struct vy_read_iterator *itr,
/**
* Check if the statement at which the given read source
* is positioned precedes the current candidate for the
- * next key ('curr_stmt') and update the latter if so.
+ * next key ('next_key') and update the latter if so.
* The 'stop' flag is set if the next key is found and
* older sources don't need to be evaluated.
*/
static void
vy_read_iterator_evaluate_src(struct vy_read_iterator *itr,
- struct vy_read_src *src, bool *stop)
+ struct vy_read_src *src,
+ struct tuple **next_key, bool *stop)
{
uint32_t src_id = src - itr->src;
struct tuple *stmt = vy_history_last_stmt(&src->history);
- int cmp = vy_read_iterator_cmp_stmt(itr, stmt, itr->curr_stmt);
+ int cmp = vy_read_iterator_cmp_stmt(itr, stmt, *next_key);
if (cmp < 0) {
assert(stmt != NULL);
- itr->curr_stmt = stmt;
+ *next_key = stmt;
itr->front_id++;
}
if (cmp <= 0)
@@ -278,7 +279,7 @@ vy_read_src_is_behind(struct vy_read_iterator *itr, struct vy_read_src *src)
* front_id of the read iterator were used on the previous
* iteration and hence need to be advanced.
*
- * 2. Update the candidate for the next key ('curr_stmt') if the
+ * 2. Update the candidate for the next key ('next_key') if the
* statement at which the source is positioned precedes it.
* The 'stop' flag is set if older sources do not need to be
* scanned (e.g. because a chain was found in the cache).
@@ -286,7 +287,8 @@ vy_read_src_is_behind(struct vy_read_iterator *itr, struct vy_read_src *src)
*/
static NODISCARD int
-vy_read_iterator_scan_txw(struct vy_read_iterator *itr, bool *stop)
+vy_read_iterator_scan_txw(struct vy_read_iterator *itr,
+ struct tuple **next_key, bool *stop)
{
struct vy_read_src *src = &itr->src[itr->txw_src];
struct vy_txw_iterator *src_itr = &src->txw_iterator;
@@ -310,12 +312,13 @@ vy_read_iterator_scan_txw(struct vy_read_iterator *itr, bool *stop)
if (rc < 0)
return -1;
- vy_read_iterator_evaluate_src(itr, src, stop);
+ vy_read_iterator_evaluate_src(itr, src, next_key, stop);
return 0;
}
static NODISCARD int
-vy_read_iterator_scan_cache(struct vy_read_iterator *itr, bool *stop)
+vy_read_iterator_scan_cache(struct vy_read_iterator *itr,
+ struct tuple **next_key, bool *stop)
{
bool is_interval = false;
struct vy_read_src *src = &itr->src[itr->cache_src];
@@ -336,7 +339,7 @@ vy_read_iterator_scan_cache(struct vy_read_iterator *itr, bool *stop)
if (rc < 0)
return -1;
- vy_read_iterator_evaluate_src(itr, src, stop);
+ vy_read_iterator_evaluate_src(itr, src, next_key, stop);
if (is_interval) {
itr->skipped_src = itr->cache_src + 1;
*stop = true;
@@ -345,8 +348,8 @@ vy_read_iterator_scan_cache(struct vy_read_iterator *itr, bool *stop)
}
static NODISCARD int
-vy_read_iterator_scan_mem(struct vy_read_iterator *itr,
- uint32_t mem_src, bool *stop)
+vy_read_iterator_scan_mem(struct vy_read_iterator *itr, uint32_t mem_src,
+ struct tuple **next_key, bool *stop)
{
int rc;
struct vy_read_src *src = &itr->src[mem_src];
@@ -367,13 +370,13 @@ vy_read_iterator_scan_mem(struct vy_read_iterator *itr,
if (rc < 0)
return -1;
- vy_read_iterator_evaluate_src(itr, src, stop);
+ vy_read_iterator_evaluate_src(itr, src, next_key, stop);
return 0;
}
static NODISCARD int
-vy_read_iterator_scan_disk(struct vy_read_iterator *itr,
- uint32_t disk_src, bool *stop)
+vy_read_iterator_scan_disk(struct vy_read_iterator *itr, uint32_t disk_src,
+ struct tuple **next_key, bool *stop)
{
int rc = 0;
struct vy_read_src *src = &itr->src[disk_src];
@@ -391,17 +394,18 @@ vy_read_iterator_scan_disk(struct vy_read_iterator *itr,
if (rc < 0)
return -1;
- vy_read_iterator_evaluate_src(itr, src, stop);
+ vy_read_iterator_evaluate_src(itr, src, next_key, stop);
return 0;
}
/**
* Restore the position of the active in-memory tree iterator
- * after a yield caused by a disk read and update 'curr_stmt'
+ * after a yield caused by a disk read and update 'next_key'
* if necessary.
*/
static NODISCARD int
-vy_read_iterator_restore_mem(struct vy_read_iterator *itr)
+vy_read_iterator_restore_mem(struct vy_read_iterator *itr,
+ struct tuple **next_key)
{
int rc;
int cmp;
@@ -415,7 +419,7 @@ vy_read_iterator_restore_mem(struct vy_read_iterator *itr)
return 0; /* nothing changed */
struct tuple *stmt = vy_history_last_stmt(&src->history);
- cmp = vy_read_iterator_cmp_stmt(itr, stmt, itr->curr_stmt);
+ cmp = vy_read_iterator_cmp_stmt(itr, stmt, *next_key);
if (cmp > 0) {
/*
* Memory trees are append-only so if the
@@ -430,7 +434,7 @@ vy_read_iterator_restore_mem(struct vy_read_iterator *itr)
* The new statement precedes the current
* candidate for the next key.
*/
- itr->curr_stmt = stmt;
+ *next_key = stmt;
itr->front_id++;
} else {
/*
@@ -464,7 +468,7 @@ vy_read_iterator_advance(struct vy_read_iterator *itr)
* There may be one statement at max satisfying
* EQ with a full key.
*/
- itr->curr_stmt = NULL;
+ itr->front_id++;
return 0;
}
/*
@@ -478,25 +482,26 @@ vy_read_iterator_advance(struct vy_read_iterator *itr)
vy_read_iterator_restore(itr);
}
restart:
- itr->curr_stmt = NULL;
itr->prev_front_id = itr->front_id;
+ itr->front_id++;
/*
* Look up the next key in read sources starting
* from the one that stores newest data.
*/
bool stop = false;
- if (vy_read_iterator_scan_txw(itr, &stop) != 0)
+ struct tuple *next_key = NULL;
+ if (vy_read_iterator_scan_txw(itr, &next_key, &stop) != 0)
return -1;
if (stop)
goto done;
- if (vy_read_iterator_scan_cache(itr, &stop) != 0)
+ if (vy_read_iterator_scan_cache(itr, &next_key, &stop) != 0)
return -1;
if (stop)
goto done;
for (uint32_t i = itr->mem_src; i < itr->disk_src; i++) {
- if (vy_read_iterator_scan_mem(itr, i, &stop) != 0)
+ if (vy_read_iterator_scan_mem(itr, i, &next_key, &stop) != 0)
return -1;
if (stop)
goto done;
@@ -505,7 +510,7 @@ rescan_disk:
/* The following code may yield as it needs to access disk. */
vy_read_iterator_pin_slices(itr);
for (uint32_t i = itr->disk_src; i < itr->src_count; i++) {
- if (vy_read_iterator_scan_disk(itr, i, &stop) != 0) {
+ if (vy_read_iterator_scan_disk(itr, i, &next_key, &stop) != 0) {
vy_read_iterator_unpin_slices(itr);
return -1;
}
@@ -531,21 +536,21 @@ rescan_disk:
* as it is owned exclusively by the current fiber so the only
* source to check is the active in-memory tree.
*/
- if (vy_read_iterator_restore_mem(itr) != 0)
+ if (vy_read_iterator_restore_mem(itr, &next_key) != 0)
return -1;
/*
* Scan the next range in case we transgressed the current
* range's boundaries.
*/
- if (vy_read_iterator_range_is_done(itr)) {
+ if (vy_read_iterator_range_is_done(itr, next_key)) {
vy_read_iterator_next_range(itr);
goto rescan_disk;
}
done:
#ifndef NDEBUG
/* Check that the statement meets search criteria. */
- if (itr->curr_stmt != NULL) {
- int cmp = vy_stmt_compare(itr->curr_stmt, itr->key,
+ if (next_key != NULL) {
+ int cmp = vy_stmt_compare(next_key, itr->key,
itr->lsm->cmp_def);
cmp *= iterator_direction(itr->iterator_type);
if (itr->iterator_type == ITER_GT ||
@@ -558,15 +563,14 @@ done:
* Ensure the read iterator does not return duplicates
* and respects statement order.
*/
- if (itr->last_stmt != NULL && itr->curr_stmt != NULL) {
- assert(vy_read_iterator_cmp_stmt(itr, itr->curr_stmt,
+ if (itr->last_stmt != NULL && next_key != NULL) {
+ assert(vy_read_iterator_cmp_stmt(itr, next_key,
itr->last_stmt) > 0);
}
#endif
- if (itr->need_check_eq && itr->curr_stmt != NULL &&
- vy_stmt_compare(itr->curr_stmt, itr->key,
- itr->lsm->cmp_def) != 0)
- itr->curr_stmt = NULL;
+ if (itr->need_check_eq && next_key != NULL &&
+ vy_stmt_compare(next_key, itr->key, itr->lsm->cmp_def) != 0)
+ itr->front_id++;
return 0;
}
@@ -684,7 +688,6 @@ vy_read_iterator_cleanup(struct vy_read_iterator *itr)
vy_run_iterator_close(&src->run_iterator);
}
- itr->curr_stmt = NULL;
itr->txw_src = UINT32_MAX;
itr->cache_src = UINT32_MAX;
itr->mem_src = UINT32_MAX;
@@ -818,11 +821,6 @@ static NODISCARD int
vy_read_iterator_apply_history(struct vy_read_iterator *itr,
struct tuple **ret)
{
- if (itr->curr_stmt == NULL) {
- *ret = NULL;
- return 0;
- }
-
struct vy_lsm *lsm = itr->lsm;
struct vy_history history;
vy_history_create(&history, &lsm->env->history_node_pool);
diff --git a/src/box/vy_read_iterator.h b/src/box/vy_read_iterator.h
index 07c6ef8f..2cac1087 100644
--- a/src/box/vy_read_iterator.h
+++ b/src/box/vy_read_iterator.h
@@ -92,8 +92,6 @@ struct vy_read_iterator {
uint32_t src_count;
/** Maximal capacity of the src array. */
uint32_t src_capacity;
- /** Statement returned by the current merge source. */
- struct tuple *curr_stmt;
/** Offset of the transaction write set source. */
uint32_t txw_src;
/** Offset of the cache source. */
@@ -105,8 +103,9 @@ struct vy_read_iterator {
/** Offset of the first skipped source. */
uint32_t skipped_src;
/**
- * front_id of the current source and all sources
- * that are on the same key.
+ * vy_read_src::front_id <= front_id for any source.
+ * vy_read_src::front_id == front_id iff the source
+ * iterator is positioned at the next key.
*/
uint32_t front_id;
/**
--
2.11.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* [tarantool-patches] Re: [PATCH 00/12] vinyl: prepare read iterator for index rebuild
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
` (11 preceding siblings ...)
2018-04-15 19:55 ` [PATCH 12/12] vinyl: zap vy_read_iterator::curr_stmt Vladimir Davydov
@ 2018-05-04 18:05 ` Vladislav Shpilevoy
12 siblings, 0 replies; 17+ messages in thread
From: Vladislav Shpilevoy @ 2018-05-04 18:05 UTC (permalink / raw)
To: tarantool-patches, kostja
Hello. The whole patchset LGTM except one memory leak. Kostja
already pushed it, so I fixed it on the branch: vy-fix-memleak.
On 15/04/2018 22:55, Vladimir Davydov wrote:
> To be able to use read iterator for building secondary indexes in vinyl,
> we need it to guarantee that the returned tuple is always the newest
> version, which is currently not true. Fix that.
>
> https://github.com/tarantool/tarantool/commits/vy-prep-read-iterator-for-alter
>
> Vladimir Davydov (12):
> vinyl: rename vy_stmt_history to vy_history
> vinyl: factor out vy_history_apply from vy_point_lookup_apply_history
> vinyl: add vy_history_append_stmt helper
> vinyl: zap iterator_src_type enum
> vinyl: encapsulate key history with struct
> vinyl: refine vy_history_cleanup
> vinyl: move vy_history to its own source file
> vinyl: use mempool for vy_history_node allocations
> vinyl: consolidate skip optimization checks in read iterator
> vinyl: refactor vy_read_iterator_next
> vinyl: make read iterator always return newest tuple version
> vinyl: zap vy_read_iterator::curr_stmt
>
> src/box/CMakeLists.txt | 1 +
> src/box/vy_cache.c | 51 ++---
> src/box/vy_cache.h | 31 +--
> src/box/vy_history.c | 115 ++++++++++
> src/box/vy_history.h | 165 +++++++++++++
> src/box/vy_lsm.c | 5 +
> src/box/vy_lsm.h | 4 +-
> src/box/vy_mem.c | 93 +++-----
> src/box/vy_mem.h | 33 +--
> src/box/vy_point_lookup.c | 228 +++---------------
> src/box/vy_read_iterator.c | 560 +++++++++++++++++++--------------------------
> src/box/vy_read_iterator.h | 11 +-
> src/box/vy_run.c | 68 ++++--
> src/box/vy_run.h | 23 +-
> src/box/vy_tx.c | 55 +++--
> src/box/vy_tx.h | 29 ++-
> test/unit/CMakeLists.txt | 3 +
> test/unit/vy_cache.c | 16 +-
> test/unit/vy_mem.c | 24 +-
> test/vinyl/upsert.result | 8 +-
> test/vinyl/upsert.test.lua | 6 +-
> 21 files changed, 788 insertions(+), 741 deletions(-)
> create mode 100644 src/box/vy_history.c
> create mode 100644 src/box/vy_history.h
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* [tarantool-patches] Re: [PATCH 02/12] vinyl: factor out vy_history_apply from vy_point_lookup_apply_history
2018-04-15 19:55 ` [PATCH 02/12] vinyl: factor out vy_history_apply from vy_point_lookup_apply_history Vladimir Davydov
@ 2018-05-14 18:19 ` Vladislav Shpilevoy
0 siblings, 0 replies; 17+ messages in thread
From: Vladislav Shpilevoy @ 2018-05-14 18:19 UTC (permalink / raw)
To: tarantool-patches, kostja
Hello. I found one moment, that can be checked.
On 15/04/2018 22:55, Vladimir Davydov wrote:
> Apart from applying a key history, vy_point_lookup_apply_history also
> adds the resultant tuple to the cache and updates LSM stats. Let's
> factor out history manipulation into a separate function and put
> everything else in vy_point_lookup so that we can make vy_history an
> independent entity.
> ---
> src/box/vy_point_lookup.c | 41 ++++++++++++++++++-----------------------
> 1 file changed, 18 insertions(+), 23 deletions(-)
>
> diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
> index 32048654..5d3076d9 100644
> --- a/src/box/vy_point_lookup.c
> +++ b/src/box/vy_point_lookup.c
> @@ -440,14 +427,22 @@ vy_point_lookup_apply_history(struct vy_lsm *lsm,
>
> done:
> if (rc == 0) {
> - rc = vy_point_lookup_apply_history(lsm, rv, key,
> - &history, ret);
> + int upserts_applied;
> + rc = vy_history_apply(&history, lsm->cmp_def, lsm->mem_format,
> + &upserts_applied, ret);
> + lsm->stat.upsert.applied += upserts_applied;
> }
> vy_history_cleanup(&history, region_svp);
>
> if (rc != 0)
> return -1;
>
> + if (*ret != NULL) {
> + vy_stmt_counter_acct_tuple(&lsm->stat.get, *ret);
> + if ((*rv)->vlsn == INT64_MAX)
> + vy_cache_add(&lsm->cache, *ret, NULL, key, ITER_EQ);
In the previous history applier vy_cache_add was called always, even if *ret is
not found (== NULL). But now it is called only when it is not NULL.
Maybe it is ok, because vy_cached_add does nothing on [NULL, NULL] chain. I wrote
it just for record.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [tarantool-patches] Re: [PATCH 09/12] vinyl: consolidate skip optimization checks in read iterator
2018-04-15 19:55 ` [PATCH 09/12] vinyl: consolidate skip optimization checks in read iterator Vladimir Davydov
@ 2018-05-14 18:25 ` Vladislav Shpilevoy
2018-05-15 15:00 ` Vladimir Davydov
0 siblings, 1 reply; 17+ messages in thread
From: Vladislav Shpilevoy @ 2018-05-14 18:25 UTC (permalink / raw)
To: tarantool-patches, kostja
Hello. I see several unsettling problems here.
1. Old skip functions had returned their last_stmt (itr->last_stmt), that
updates their current statement (itr->curr_stmt). This was done if an iterator
is not 'behind' according to the new terminology. But now if an iterator is
behind, it does nothing and does not update itr->curr_stmt as well.
2. Old vy_cache_iterator_skip in the non-behind state could set 'stop'
flag triggering skipped_src propagation, but now if cache is not behind,
stop flag is always false, and skipped_src is not propagated.
3. vy_read_iterator_scan_txw does not use vy_read_src_is_behind() - it
continues old way usage:
if (!src->is_started)
vy_txw_iterator_skip(src_itr, itr->last_stmt, &src->stmt);
On 15/04/2018 22:55, Vladimir Davydov wrote:
> Each kind of source iterator has 'skip' method, which is called to
> position the iterator to a specified key. It is used to advance
> sources that were skipped on the previous iteration (e.g. because
> the result was found in the cache). The method has an optimization:
> it avoids a lookup in the index if it is already positioned at a
> statement following the specified key. This optimization makes the
> method difficult to use if we want to keep a key history in each
> source instead of a single statement, as we don't know whether 'skip'
> changed the position or not and hence whether we need to rebuild key
> history or not. Let's move the optimization to the read iterator and
> make 'skip' plain and simple: let it always reposition the iterator
> to the first statement following a given key.
> ---
> src/box/vy_cache.c | 19 -------------------
> src/box/vy_mem.c | 14 --------------
> src/box/vy_read_iterator.c | 23 ++++++++++++++++++++---
> src/box/vy_run.c | 13 -------------
> src/box/vy_tx.c | 14 --------------
> 5 files changed, 20 insertions(+), 63 deletions(-)
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [tarantool-patches] Re: [PATCH 09/12] vinyl: consolidate skip optimization checks in read iterator
2018-05-14 18:25 ` [tarantool-patches] " Vladislav Shpilevoy
@ 2018-05-15 15:00 ` Vladimir Davydov
0 siblings, 0 replies; 17+ messages in thread
From: Vladimir Davydov @ 2018-05-15 15:00 UTC (permalink / raw)
To: Vladislav Shpilevoy; +Cc: tarantool-patches, kostja
On Mon, May 14, 2018 at 09:25:45PM +0300, Vladislav Shpilevoy wrote:
> 1. Old skip functions had returned their last_stmt (itr->last_stmt), that
> updates their current statement (itr->curr_stmt). This was done if an iterator
> is not 'behind' according to the new terminology. But now if an iterator is
> behind, it does nothing and does not update itr->curr_stmt as well.
If a source iterator is behind the current read iterator position,
'skip' method will update src_itr->curr_stmt, just like before.
Otherwise, there's no need to update src_itr->curr_stmt, because
the position is up-to-date as the source was not skipped at the
previous iteration, neither was it restored. All we need to do in
this case is move the source to the next key provided it was used
at the previous iteration.
>
> 2. Old vy_cache_iterator_skip in the non-behind state could set 'stop'
> flag triggering skipped_src propagation, but now if cache is not behind,
> stop flag is always false, and skipped_src is not propagated.
If the cache iterator is up-to-date with the current read iterator
position (not behind), its 'skip' method is not called, but it still can
(and does) set the 'stop' flag in the 'next_key' method.
>
> 3. vy_read_iterator_scan_txw does not use vy_read_src_is_behind() - it
> continues old way usage:
> if (!src->is_started)
> vy_txw_iterator_skip(src_itr, itr->last_stmt, &src->stmt);
Because txw iterator cannot be behind the current position (skipped),
and there's an assertion for this in vy_read_iterator_scan_txw():
assert(itr->txw_src < itr->skipped_src);
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2018-05-15 15:00 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
2018-04-15 19:55 ` [PATCH 01/12] vinyl: rename vy_stmt_history to vy_history Vladimir Davydov
2018-04-15 19:55 ` [PATCH 02/12] vinyl: factor out vy_history_apply from vy_point_lookup_apply_history Vladimir Davydov
2018-05-14 18:19 ` [tarantool-patches] " Vladislav Shpilevoy
2018-04-15 19:55 ` [PATCH 03/12] vinyl: add vy_history_append_stmt helper Vladimir Davydov
2018-04-15 19:55 ` [PATCH 04/12] vinyl: zap iterator_src_type enum Vladimir Davydov
2018-04-15 19:55 ` [PATCH 05/12] vinyl: encapsulate key history with struct Vladimir Davydov
2018-04-15 19:55 ` [PATCH 06/12] vinyl: refine vy_history_cleanup Vladimir Davydov
2018-04-15 19:55 ` [PATCH 07/12] vinyl: move vy_history to its own source file Vladimir Davydov
2018-04-15 19:55 ` [PATCH 08/12] vinyl: use mempool for vy_history_node allocations Vladimir Davydov
2018-04-15 19:55 ` [PATCH 09/12] vinyl: consolidate skip optimization checks in read iterator Vladimir Davydov
2018-05-14 18:25 ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-15 15:00 ` Vladimir Davydov
2018-04-15 19:55 ` [PATCH 10/12] vinyl: refactor vy_read_iterator_next Vladimir Davydov
2018-04-15 19:55 ` [PATCH 11/12] vinyl: make read iterator always return newest tuple version Vladimir Davydov
2018-04-15 19:55 ` [PATCH 12/12] vinyl: zap vy_read_iterator::curr_stmt Vladimir Davydov
2018-05-04 18:05 ` [tarantool-patches] Re: [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladislav Shpilevoy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox