* [PATCH] vinyl: account statements skipped on read
@ 2019-04-12 8:34 Vladimir Davydov
2019-04-12 9:53 ` Vladimir Davydov
0 siblings, 1 reply; 2+ messages in thread
From: Vladimir Davydov @ 2019-04-12 8:34 UTC (permalink / raw)
To: kostja; +Cc: tarantool-patches
After we retrieve a statement from a secondary index, we always do
a lookup in the primary index to get the full tuple corresponding to
the found secondary key. It may turn out that the full tuple doesn't
match the secondary key, which means the key was overwritten, but
the DELETE statement hasn't been propagated yet (aka deferred DELETE).
Currently, there's no way to figure out how often this happens as all
tuples read from an LSM tree are accounted under 'get' counter.
So this patch splits 'get' in two: 'get', which now accounts only
tuples actually returned to the user, and 'skip', which accounts
skipped tuples.
---
https://github.com/tarantool/tarantool/commits/dv/vy-account-skipped-statements
src/box/vinyl.c | 17 ++++++-
src/box/vy_point_lookup.c | 5 --
src/box/vy_read_iterator.c | 7 ---
src/box/vy_stat.h | 2 +
test/vinyl/deferred_delete.result | 88 +++++++++++++++++++++++++++++++----
test/vinyl/deferred_delete.test.lua | 26 +++++++++--
test/vinyl/errinj.result | 6 +--
test/vinyl/errinj.test.lua | 6 +--
test/vinyl/errinj_ddl.result | 8 ++--
test/vinyl/errinj_ddl.test.lua | 8 ++--
test/vinyl/stat.result | 92 ++++++++++++++++++++-----------------
11 files changed, 183 insertions(+), 82 deletions(-)
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index b9bf721a..2e1e717e 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -382,6 +382,7 @@ vinyl_index_stat(struct index *index, struct info_handler *h)
info_append_int(h, "lookup", stat->lookup);
vy_info_append_stmt_counter(h, "get", &stat->get);
+ vy_info_append_stmt_counter(h, "skip", &stat->skip);
vy_info_append_stmt_counter(h, "put", &stat->put);
info_table_begin(h, "latency");
@@ -484,6 +485,7 @@ vinyl_index_reset_stat(struct index *index)
stat->lookup = 0;
latency_reset(&stat->latency);
vy_stmt_counter_reset(&stat->get);
+ vy_stmt_counter_reset(&stat->skip);
vy_stmt_counter_reset(&stat->put);
memset(&stat->upsert, 0, sizeof(stat->upsert));
@@ -1344,6 +1346,8 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx,
}
key.hint = vy_stmt_hint(key.stmt, lsm->pk->cmp_def);
+ lsm->pk->stat.lookup++;
+
if (vy_point_lookup(lsm->pk, tx, rv, key, result) != 0) {
rc = -1;
goto out;
@@ -1364,7 +1368,10 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx,
* propagated to the secondary index yet. In this
* case silently skip this tuple.
*/
+ vy_stmt_counter_acct_tuple(&lsm->stat.skip, entry.stmt);
if (result->stmt != NULL) {
+ vy_stmt_counter_acct_tuple(&lsm->pk->stat.skip,
+ result->stmt);
tuple_unref(result->stmt);
*result = vy_entry_none();
}
@@ -1399,6 +1406,8 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx,
/* Inherit the hint from the secondary index entry. */
result->hint = entry.hint;
+
+ vy_stmt_counter_acct_tuple(&lsm->pk->stat.get, result->stmt);
out:
tuple_unref(key.stmt);
return rc;
@@ -1435,6 +1444,8 @@ vy_get(struct vy_lsm *lsm, struct vy_tx *tx,
key.stmt = key_stmt;
key.hint = vy_stmt_hint(key.stmt, lsm->cmp_def);
+ lsm->stat.lookup++;
+
if (vy_stmt_is_full_key(key.stmt, lsm->cmp_def)) {
/*
* Use point lookup for a full key.
@@ -1489,6 +1500,8 @@ out:
vy_lsm_name(lsm), tuple_str(key.stmt),
tuple_str(*result), latency);
}
+ if (*result != NULL)
+ vy_stmt_counter_acct_tuple(&lsm->stat.get, *result);
return 0;
}
@@ -3797,7 +3810,8 @@ vinyl_iterator_account_read(struct vinyl_iterator *it, double start_time,
iterator_type_strs[type],
tuple_str(result), latency);
}
-
+ if (result != NULL)
+ vy_stmt_counter_acct_tuple(&lsm->stat.get, result);
}
static int
@@ -3947,6 +3961,7 @@ vinyl_index_create_iterator(struct index *base, enum iterator_type type,
}
it->tx = tx;
+ lsm->stat.lookup++;
vy_read_iterator_open(&it->iterator, lsm, tx, type, it->key,
(const struct vy_read_view **)&tx->read_view);
return (struct iterator *)it;
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index becc0486..ecdcde7d 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -203,8 +203,6 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx,
*ret = vy_entry_none();
int rc = 0;
- lsm->stat.lookup++;
-
/* History list */
struct vy_history history, mem_history, disk_history;
vy_history_create(&history, &lsm->env->history_node_pool);
@@ -294,9 +292,6 @@ done:
if (rc != 0)
return -1;
- if (ret->stmt != NULL)
- vy_stmt_counter_acct_tuple(&lsm->stat.get, ret->stmt);
-
return 0;
}
diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c
index b26750e9..9ea07709 100644
--- a/src/box/vy_read_iterator.c
+++ b/src/box/vy_read_iterator.c
@@ -857,11 +857,7 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct vy_entry *result)
{
assert(itr->tx == NULL || itr->tx->state == VINYL_TX_READY);
- struct vy_lsm *lsm = itr->lsm;
struct vy_entry entry;
-
- if (itr->last.stmt == NULL)
- lsm->stat.lookup++; /* first iteration */
next_key:
if (vy_read_iterator_advance(itr) != 0)
return -1;
@@ -893,9 +889,6 @@ next_key:
vy_stmt_type(entry.stmt) == IPROTO_INSERT ||
vy_stmt_type(entry.stmt) == IPROTO_REPLACE);
- if (entry.stmt != NULL)
- vy_stmt_counter_acct_tuple(&lsm->stat.get, entry.stmt);
-
*result = entry;
return 0;
}
diff --git a/src/box/vy_stat.h b/src/box/vy_stat.h
index b7f50d93..a3021a0c 100644
--- a/src/box/vy_stat.h
+++ b/src/box/vy_stat.h
@@ -117,6 +117,8 @@ struct vy_lsm_stat {
int64_t lookup;
/** Number of statements read from this LSM tree. */
struct vy_stmt_counter get;
+ /** Number of statements skipped on read. */
+ struct vy_stmt_counter skip;
/** Number of statements written to this LSM tree. */
struct vy_stmt_counter put;
/** Read latency. */
diff --git a/test/vinyl/deferred_delete.result b/test/vinyl/deferred_delete.result
index 0a380ed5..422f2fe0 100644
--- a/test/vinyl/deferred_delete.result
+++ b/test/vinyl/deferred_delete.result
@@ -71,14 +71,26 @@ i1:select()
- [8, 80, 800]
- [10, 100, 1000]
...
-i1:stat().get.rows -- 15
+i1:stat().get.rows -- 5
---
-- 15
+- 5
+...
+i1:stat().skip.rows -- 10
+---
+- 10
...
pk:stat().lookup -- 15
---
- 15
...
+pk:stat().get.rows -- 5
+---
+- 5
+...
+pk:stat().skip.rows -- 5
+---
+- 5
+...
i2:select()
---
- - [2, 20, 200]
@@ -87,14 +99,26 @@ i2:select()
- [8, 80, 800]
- [10, 100, 1000]
...
-i2:stat().get.rows -- 15
+i2:stat().get.rows -- 5
---
-- 15
+- 5
+...
+i2:stat().skip.rows -- 10
+---
+- 10
...
pk:stat().lookup -- 30
---
- 30
...
+pk:stat().get.rows -- 10
+---
+- 10
+...
+pk:stat().skip.rows -- 10
+---
+- 10
+...
-- Overwritten/deleted tuples are not stored in the cache so calling
-- SELECT for a second time does only 5 lookups.
box.stat.reset()
@@ -112,10 +136,22 @@ i1:stat().get.rows -- 5
---
- 5
...
+i1:stat().skip.rows -- 0
+---
+- 0
+...
pk:stat().lookup -- 5
---
- 5
...
+pk:stat().get.rows -- 5
+---
+- 5
+...
+pk:stat().skip.rows -- 0
+---
+- 0
+...
i2:select()
---
- - [2, 20, 200]
@@ -128,10 +164,22 @@ i2:stat().get.rows -- 5
---
- 5
...
+i2:stat().skip.rows -- 0
+---
+- 0
+...
pk:stat().lookup -- 10
---
- 10
...
+pk:stat().get.rows -- 10
+---
+- 10
+...
+pk:stat().skip.rows -- 0
+---
+- 0
+...
-- Cleanup the cache.
vinyl_cache = box.cfg.vinyl_cache
---
@@ -179,14 +227,26 @@ i1:select()
- [8, 80, 800]
- [10, 100, 1000]
...
-i1:stat().get.rows -- 15
+i1:stat().get.rows -- 5
---
-- 15
+- 5
+...
+i1:stat().skip.rows -- 10
+---
+- 10
...
pk:stat().lookup -- 15
---
- 15
...
+pk:stat().get.rows -- 5
+---
+- 5
+...
+pk:stat().skip.rows -- 5
+---
+- 5
+...
i2:select()
---
- - [2, 20, 200]
@@ -195,14 +255,26 @@ i2:select()
- [8, 80, 800]
- [10, 100, 1000]
...
-i2:stat().get.rows -- 15
+i2:stat().get.rows -- 5
---
-- 15
+- 5
+...
+i2:stat().skip.rows -- 10
+---
+- 10
...
pk:stat().lookup -- 30
---
- 30
...
+pk:stat().get.rows -- 10
+---
+- 10
+...
+pk:stat().skip.rows -- 10
+---
+- 10
+...
-- Check that deferred DELETEs are not lost after restart.
test_run:cmd("restart server default")
fiber = require('fiber')
diff --git a/test/vinyl/deferred_delete.test.lua b/test/vinyl/deferred_delete.test.lua
index 240429fc..6dfcbde5 100644
--- a/test/vinyl/deferred_delete.test.lua
+++ b/test/vinyl/deferred_delete.test.lua
@@ -33,21 +33,33 @@ i2:stat().rows -- ditto
-- overwritten tuples in the primary index hence 15 lookups per SELECT
-- in a secondary index.
i1:select()
-i1:stat().get.rows -- 15
+i1:stat().get.rows -- 5
+i1:stat().skip.rows -- 10
pk:stat().lookup -- 15
+pk:stat().get.rows -- 5
+pk:stat().skip.rows -- 5
i2:select()
-i2:stat().get.rows -- 15
+i2:stat().get.rows -- 5
+i2:stat().skip.rows -- 10
pk:stat().lookup -- 30
+pk:stat().get.rows -- 10
+pk:stat().skip.rows -- 10
-- Overwritten/deleted tuples are not stored in the cache so calling
-- SELECT for a second time does only 5 lookups.
box.stat.reset()
i1:select()
i1:stat().get.rows -- 5
+i1:stat().skip.rows -- 0
pk:stat().lookup -- 5
+pk:stat().get.rows -- 5
+pk:stat().skip.rows -- 0
i2:select()
i2:stat().get.rows -- 5
+i2:stat().skip.rows -- 0
pk:stat().lookup -- 10
+pk:stat().get.rows -- 10
+pk:stat().skip.rows -- 0
-- Cleanup the cache.
vinyl_cache = box.cfg.vinyl_cache
@@ -67,11 +79,17 @@ i2:stat().rows -- ditto
-- the number of lookups.
box.stat.reset()
i1:select()
-i1:stat().get.rows -- 15
+i1:stat().get.rows -- 5
+i1:stat().skip.rows -- 10
pk:stat().lookup -- 15
+pk:stat().get.rows -- 5
+pk:stat().skip.rows -- 5
i2:select()
-i2:stat().get.rows -- 15
+i2:stat().get.rows -- 5
+i2:stat().skip.rows -- 10
pk:stat().lookup -- 30
+pk:stat().get.rows -- 10
+pk:stat().skip.rows -- 10
-- Check that deferred DELETEs are not lost after restart.
test_run:cmd("restart server default")
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index f6d37769..6fc4d9e5 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -825,15 +825,15 @@ errinj.set("ERRINJ_VY_DELAY_PK_LOOKUP", true)
_ = fiber.create(do_read)
---
...
-test_run:wait_cond(function() return sk:stat().get.rows > 0 end, 60)
+test_run:wait_cond(function() return sk:stat().disk.iterator.get.rows > 0 end, 60)
---
- true
...
-pk:stat().get.rows -- 0
+pk:stat().disk.iterator.get.rows -- 0
---
- 0
...
-sk:stat().get.rows -- 1
+sk:stat().disk.iterator.get.rows -- 1
---
- 1
...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 4820ef97..b500b33c 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -297,9 +297,9 @@ ret = nil
function do_read() ret = sk:select({2}, {iterator = 'GE'}) end
errinj.set("ERRINJ_VY_DELAY_PK_LOOKUP", true)
_ = fiber.create(do_read)
-test_run:wait_cond(function() return sk:stat().get.rows > 0 end, 60)
-pk:stat().get.rows -- 0
-sk:stat().get.rows -- 1
+test_run:wait_cond(function() return sk:stat().disk.iterator.get.rows > 0 end, 60)
+pk:stat().disk.iterator.get.rows -- 0
+sk:stat().disk.iterator.get.rows -- 1
s:replace{2, 2}
errinj.set("ERRINJ_VY_DELAY_PK_LOOKUP", false)
test_run:wait_cond(function() return pk:stat().get.rows > 0 end, 60)
diff --git a/test/vinyl/errinj_ddl.result b/test/vinyl/errinj_ddl.result
index af5f73d4..45e116a1 100644
--- a/test/vinyl/errinj_ddl.result
+++ b/test/vinyl/errinj_ddl.result
@@ -454,10 +454,10 @@ test_run:cmd("setopt delimiter ''");
- true
...
-- Wait until DDL starts scanning the altered space.
-lookup = i1:stat().lookup
+lookup = i1:stat().disk.iterator.lookup
---
...
-wait_cond = function() return i1:stat().lookup > lookup end
+wait_cond = function() return i1:stat().disk.iterator.lookup > lookup end
---
...
c1 = async_replace(s1, {1}, wait_cond)
@@ -501,10 +501,10 @@ s1:format{}
---
...
-- Wait until DDL starts scanning the altered space.
-lookup = i1:stat().lookup
+lookup = i1:stat().disk.iterator.lookup
---
...
-wait_cond = function() return i1:stat().lookup > lookup end
+wait_cond = function() return i1:stat().disk.iterator.lookup > lookup end
---
...
c1 = async_replace(s1, {2}, wait_cond)
diff --git a/test/vinyl/errinj_ddl.test.lua b/test/vinyl/errinj_ddl.test.lua
index 57af9c65..5870a0b0 100644
--- a/test/vinyl/errinj_ddl.test.lua
+++ b/test/vinyl/errinj_ddl.test.lua
@@ -204,8 +204,8 @@ end;
test_run:cmd("setopt delimiter ''");
-- Wait until DDL starts scanning the altered space.
-lookup = i1:stat().lookup
-wait_cond = function() return i1:stat().lookup > lookup end
+lookup = i1:stat().disk.iterator.lookup
+wait_cond = function() return i1:stat().disk.iterator.lookup > lookup end
c1 = async_replace(s1, {1}, wait_cond)
c2 = async_replace(s2, {1}, wait_cond)
@@ -222,8 +222,8 @@ s1:format()
s1:format{}
-- Wait until DDL starts scanning the altered space.
-lookup = i1:stat().lookup
-wait_cond = function() return i1:stat().lookup > lookup end
+lookup = i1:stat().disk.iterator.lookup
+wait_cond = function() return i1:stat().disk.iterator.lookup > lookup end
c1 = async_replace(s1, {2}, wait_cond)
c2 = async_replace(s2, {2}, wait_cond)
diff --git a/test/vinyl/stat.result b/test/vinyl/stat.result
index f49394f7..aa47a7be 100644
--- a/test/vinyl/stat.result
+++ b/test/vinyl/stat.result
@@ -129,10 +129,8 @@ test_run:cmd("setopt delimiter ''");
-- initially stats are empty
istat()
---
-- upsert:
- squashed: 0
- applied: 0
- bytes: 0
+- range_count: 1
+ lookup: 0
cache:
invalidate:
rows: 0
@@ -212,20 +210,25 @@ istat()
rows: 0
run_avg: 0
dumps_per_compaction: 0
- lookup: 0
- range_count: 1
- txw:
+ upsert:
+ squashed: 0
+ applied: 0
+ bytes: 0
+ put:
+ rows: 0
bytes: 0
+ skip:
rows: 0
- iterator:
- lookup: 0
- get:
- rows: 0
- bytes: 0
+ bytes: 0
run_count: 0
- put:
- rows: 0
+ txw:
bytes: 0
+ rows: 0
+ iterator:
+ lookup: 0
+ get:
+ rows: 0
+ bytes: 0
memory:
bytes: 0
index_size: 0
@@ -348,7 +351,6 @@ stat_diff(istat(), st)
- put:
rows: 50
bytes: 53050
- rows: 25
bytes: 26042
disk:
last_level:
@@ -385,6 +387,7 @@ stat_diff(istat(), st)
pages: 13
bytes_compressed: <bytes_compressed>
rows: 50
+ rows: 25
...
-- point lookup from disk + cache put
st = istat()
@@ -404,6 +407,7 @@ stat_diff(istat(), st)
put:
rows: 1
bytes: 1061
+ lookup: 1
disk:
iterator:
read:
@@ -415,7 +419,6 @@ stat_diff(istat(), st)
get:
rows: 1
bytes: 1061
- lookup: 1
memory:
iterator:
lookup: 1
@@ -655,19 +658,6 @@ stat_diff(istat(), st)
put:
rows: 51
bytes: 54111
- lookup: 1
- txw:
- iterator:
- lookup: 1
- get:
- rows: 50
- bytes: 53050
- memory:
- iterator:
- lookup: 1
- get:
- rows: 100
- bytes: 106100
disk:
iterator:
read:
@@ -679,6 +669,19 @@ stat_diff(istat(), st)
get:
rows: 100
bytes: 106100
+ txw:
+ iterator:
+ lookup: 1
+ get:
+ rows: 50
+ bytes: 53050
+ memory:
+ iterator:
+ lookup: 1
+ get:
+ rows: 100
+ bytes: 106100
+ lookup: 1
get:
rows: 100
bytes: 106100
@@ -1001,10 +1004,8 @@ box.stat.reset()
...
istat()
---
-- upsert:
- squashed: 0
- applied: 0
- bytes: 317731
+- range_count: 2
+ lookup: 0
cache:
invalidate:
rows: 0
@@ -1084,20 +1085,25 @@ istat()
rows: 306
run_avg: 1
dumps_per_compaction: 1
- lookup: 0
- range_count: 2
- txw:
+ upsert:
+ squashed: 0
+ applied: 0
+ bytes: 317731
+ put:
+ rows: 0
bytes: 0
+ skip:
rows: 0
- iterator:
- lookup: 0
- get:
- rows: 0
- bytes: 0
+ bytes: 0
run_count: 2
- put:
- rows: 0
+ txw:
bytes: 0
+ rows: 0
+ iterator:
+ lookup: 0
+ get:
+ rows: 0
+ bytes: 0
memory:
bytes: 213431
index_size: 49152
--
2.11.0
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-04-12 9:53 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-12 8:34 [PATCH] vinyl: account statements skipped on read Vladimir Davydov
2019-04-12 9:53 ` Vladimir Davydov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox