[PATCH] vinyl: account statements skipped on read

Vladimir Davydov vdavydov.dev at gmail.com
Fri Apr 12 11:34:14 MSK 2019


After we retrieve a statement from a secondary index, we always do
a lookup in the primary index to get the full tuple corresponding to
the found secondary key. It may turn out that the full tuple doesn't
match the secondary key, which means the key was overwritten, but
the DELETE statement hasn't been propagated yet (aka deferred DELETE).
Currently, there's no way to figure out how often this happens as all
tuples read from an LSM tree are accounted under 'get' counter.

So this patch splits 'get' in two: 'get', which now accounts only
tuples actually returned to the user, and 'skip', which accounts
skipped tuples.
---
https://github.com/tarantool/tarantool/commits/dv/vy-account-skipped-statements

 src/box/vinyl.c                     | 17 ++++++-
 src/box/vy_point_lookup.c           |  5 --
 src/box/vy_read_iterator.c          |  7 ---
 src/box/vy_stat.h                   |  2 +
 test/vinyl/deferred_delete.result   | 88 +++++++++++++++++++++++++++++++----
 test/vinyl/deferred_delete.test.lua | 26 +++++++++--
 test/vinyl/errinj.result            |  6 +--
 test/vinyl/errinj.test.lua          |  6 +--
 test/vinyl/errinj_ddl.result        |  8 ++--
 test/vinyl/errinj_ddl.test.lua      |  8 ++--
 test/vinyl/stat.result              | 92 ++++++++++++++++++++-----------------
 11 files changed, 183 insertions(+), 82 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index b9bf721a..2e1e717e 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -382,6 +382,7 @@ vinyl_index_stat(struct index *index, struct info_handler *h)
 
 	info_append_int(h, "lookup", stat->lookup);
 	vy_info_append_stmt_counter(h, "get", &stat->get);
+	vy_info_append_stmt_counter(h, "skip", &stat->skip);
 	vy_info_append_stmt_counter(h, "put", &stat->put);
 
 	info_table_begin(h, "latency");
@@ -484,6 +485,7 @@ vinyl_index_reset_stat(struct index *index)
 	stat->lookup = 0;
 	latency_reset(&stat->latency);
 	vy_stmt_counter_reset(&stat->get);
+	vy_stmt_counter_reset(&stat->skip);
 	vy_stmt_counter_reset(&stat->put);
 	memset(&stat->upsert, 0, sizeof(stat->upsert));
 
@@ -1344,6 +1346,8 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx,
 	}
 	key.hint = vy_stmt_hint(key.stmt, lsm->pk->cmp_def);
 
+	lsm->pk->stat.lookup++;
+
 	if (vy_point_lookup(lsm->pk, tx, rv, key, result) != 0) {
 		rc = -1;
 		goto out;
@@ -1364,7 +1368,10 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx,
 		 * propagated to the secondary index yet. In this
 		 * case silently skip this tuple.
 		 */
+		vy_stmt_counter_acct_tuple(&lsm->stat.skip, entry.stmt);
 		if (result->stmt != NULL) {
+			vy_stmt_counter_acct_tuple(&lsm->pk->stat.skip,
+						   result->stmt);
 			tuple_unref(result->stmt);
 			*result = vy_entry_none();
 		}
@@ -1399,6 +1406,8 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx,
 
 	/* Inherit the hint from the secondary index entry. */
 	result->hint = entry.hint;
+
+	vy_stmt_counter_acct_tuple(&lsm->pk->stat.get, result->stmt);
 out:
 	tuple_unref(key.stmt);
 	return rc;
@@ -1435,6 +1444,8 @@ vy_get(struct vy_lsm *lsm, struct vy_tx *tx,
 	key.stmt = key_stmt;
 	key.hint = vy_stmt_hint(key.stmt, lsm->cmp_def);
 
+	lsm->stat.lookup++;
+
 	if (vy_stmt_is_full_key(key.stmt, lsm->cmp_def)) {
 		/*
 		 * Use point lookup for a full key.
@@ -1489,6 +1500,8 @@ out:
 				     vy_lsm_name(lsm), tuple_str(key.stmt),
 				     tuple_str(*result), latency);
 	}
+	if (*result != NULL)
+		vy_stmt_counter_acct_tuple(&lsm->stat.get, *result);
 	return 0;
 }
 
@@ -3797,7 +3810,8 @@ vinyl_iterator_account_read(struct vinyl_iterator *it, double start_time,
 				     iterator_type_strs[type],
 				     tuple_str(result), latency);
 	}
-
+	if (result != NULL)
+		vy_stmt_counter_acct_tuple(&lsm->stat.get, result);
 }
 
 static int
@@ -3947,6 +3961,7 @@ vinyl_index_create_iterator(struct index *base, enum iterator_type type,
 	}
 	it->tx = tx;
 
+	lsm->stat.lookup++;
 	vy_read_iterator_open(&it->iterator, lsm, tx, type, it->key,
 			      (const struct vy_read_view **)&tx->read_view);
 	return (struct iterator *)it;
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index becc0486..ecdcde7d 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -203,8 +203,6 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx,
 	*ret = vy_entry_none();
 	int rc = 0;
 
-	lsm->stat.lookup++;
-
 	/* History list */
 	struct vy_history history, mem_history, disk_history;
 	vy_history_create(&history, &lsm->env->history_node_pool);
@@ -294,9 +292,6 @@ done:
 	if (rc != 0)
 		return -1;
 
-	if (ret->stmt != NULL)
-		vy_stmt_counter_acct_tuple(&lsm->stat.get, ret->stmt);
-
 	return 0;
 }
 
diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c
index b26750e9..9ea07709 100644
--- a/src/box/vy_read_iterator.c
+++ b/src/box/vy_read_iterator.c
@@ -857,11 +857,7 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct vy_entry *result)
 {
 	assert(itr->tx == NULL || itr->tx->state == VINYL_TX_READY);
 
-	struct vy_lsm *lsm = itr->lsm;
 	struct vy_entry entry;
-
-	if (itr->last.stmt == NULL)
-		lsm->stat.lookup++; /* first iteration */
 next_key:
 	if (vy_read_iterator_advance(itr) != 0)
 		return -1;
@@ -893,9 +889,6 @@ next_key:
 	       vy_stmt_type(entry.stmt) == IPROTO_INSERT ||
 	       vy_stmt_type(entry.stmt) == IPROTO_REPLACE);
 
-	if (entry.stmt != NULL)
-		vy_stmt_counter_acct_tuple(&lsm->stat.get, entry.stmt);
-
 	*result = entry;
 	return 0;
 }
diff --git a/src/box/vy_stat.h b/src/box/vy_stat.h
index b7f50d93..a3021a0c 100644
--- a/src/box/vy_stat.h
+++ b/src/box/vy_stat.h
@@ -117,6 +117,8 @@ struct vy_lsm_stat {
 	int64_t lookup;
 	/** Number of statements read from this LSM tree. */
 	struct vy_stmt_counter get;
+	/** Number of statements skipped on read. */
+	struct vy_stmt_counter skip;
 	/** Number of statements written to this LSM tree. */
 	struct vy_stmt_counter put;
 	/** Read latency. */
diff --git a/test/vinyl/deferred_delete.result b/test/vinyl/deferred_delete.result
index 0a380ed5..422f2fe0 100644
--- a/test/vinyl/deferred_delete.result
+++ b/test/vinyl/deferred_delete.result
@@ -71,14 +71,26 @@ i1:select()
   - [8, 80, 800]
   - [10, 100, 1000]
 ...
-i1:stat().get.rows -- 15
+i1:stat().get.rows -- 5
 ---
-- 15
+- 5
+...
+i1:stat().skip.rows -- 10
+---
+- 10
 ...
 pk:stat().lookup -- 15
 ---
 - 15
 ...
+pk:stat().get.rows -- 5
+---
+- 5
+...
+pk:stat().skip.rows -- 5
+---
+- 5
+...
 i2:select()
 ---
 - - [2, 20, 200]
@@ -87,14 +99,26 @@ i2:select()
   - [8, 80, 800]
   - [10, 100, 1000]
 ...
-i2:stat().get.rows -- 15
+i2:stat().get.rows -- 5
 ---
-- 15
+- 5
+...
+i2:stat().skip.rows -- 10
+---
+- 10
 ...
 pk:stat().lookup -- 30
 ---
 - 30
 ...
+pk:stat().get.rows -- 10
+---
+- 10
+...
+pk:stat().skip.rows -- 10
+---
+- 10
+...
 -- Overwritten/deleted tuples are not stored in the cache so calling
 -- SELECT for a second time does only 5 lookups.
 box.stat.reset()
@@ -112,10 +136,22 @@ i1:stat().get.rows -- 5
 ---
 - 5
 ...
+i1:stat().skip.rows -- 0
+---
+- 0
+...
 pk:stat().lookup -- 5
 ---
 - 5
 ...
+pk:stat().get.rows -- 5
+---
+- 5
+...
+pk:stat().skip.rows -- 0
+---
+- 0
+...
 i2:select()
 ---
 - - [2, 20, 200]
@@ -128,10 +164,22 @@ i2:stat().get.rows -- 5
 ---
 - 5
 ...
+i2:stat().skip.rows -- 0
+---
+- 0
+...
 pk:stat().lookup -- 10
 ---
 - 10
 ...
+pk:stat().get.rows -- 10
+---
+- 10
+...
+pk:stat().skip.rows -- 0
+---
+- 0
+...
 -- Cleanup the cache.
 vinyl_cache = box.cfg.vinyl_cache
 ---
@@ -179,14 +227,26 @@ i1:select()
   - [8, 80, 800]
   - [10, 100, 1000]
 ...
-i1:stat().get.rows -- 15
+i1:stat().get.rows -- 5
 ---
-- 15
+- 5
+...
+i1:stat().skip.rows -- 10
+---
+- 10
 ...
 pk:stat().lookup -- 15
 ---
 - 15
 ...
+pk:stat().get.rows -- 5
+---
+- 5
+...
+pk:stat().skip.rows -- 5
+---
+- 5
+...
 i2:select()
 ---
 - - [2, 20, 200]
@@ -195,14 +255,26 @@ i2:select()
   - [8, 80, 800]
   - [10, 100, 1000]
 ...
-i2:stat().get.rows -- 15
+i2:stat().get.rows -- 5
 ---
-- 15
+- 5
+...
+i2:stat().skip.rows -- 10
+---
+- 10
 ...
 pk:stat().lookup -- 30
 ---
 - 30
 ...
+pk:stat().get.rows -- 10
+---
+- 10
+...
+pk:stat().skip.rows -- 10
+---
+- 10
+...
 -- Check that deferred DELETEs are not lost after restart.
 test_run:cmd("restart server default")
 fiber = require('fiber')
diff --git a/test/vinyl/deferred_delete.test.lua b/test/vinyl/deferred_delete.test.lua
index 240429fc..6dfcbde5 100644
--- a/test/vinyl/deferred_delete.test.lua
+++ b/test/vinyl/deferred_delete.test.lua
@@ -33,21 +33,33 @@ i2:stat().rows -- ditto
 -- overwritten tuples in the primary index hence 15 lookups per SELECT
 -- in a secondary index.
 i1:select()
-i1:stat().get.rows -- 15
+i1:stat().get.rows -- 5
+i1:stat().skip.rows -- 10
 pk:stat().lookup -- 15
+pk:stat().get.rows -- 5
+pk:stat().skip.rows -- 5
 i2:select()
-i2:stat().get.rows -- 15
+i2:stat().get.rows -- 5
+i2:stat().skip.rows -- 10
 pk:stat().lookup -- 30
+pk:stat().get.rows -- 10
+pk:stat().skip.rows -- 10
 
 -- Overwritten/deleted tuples are not stored in the cache so calling
 -- SELECT for a second time does only 5 lookups.
 box.stat.reset()
 i1:select()
 i1:stat().get.rows -- 5
+i1:stat().skip.rows -- 0
 pk:stat().lookup -- 5
+pk:stat().get.rows -- 5
+pk:stat().skip.rows -- 0
 i2:select()
 i2:stat().get.rows -- 5
+i2:stat().skip.rows -- 0
 pk:stat().lookup -- 10
+pk:stat().get.rows -- 10
+pk:stat().skip.rows -- 0
 
 -- Cleanup the cache.
 vinyl_cache = box.cfg.vinyl_cache
@@ -67,11 +79,17 @@ i2:stat().rows -- ditto
 -- the number of lookups.
 box.stat.reset()
 i1:select()
-i1:stat().get.rows -- 15
+i1:stat().get.rows -- 5
+i1:stat().skip.rows -- 10
 pk:stat().lookup -- 15
+pk:stat().get.rows -- 5
+pk:stat().skip.rows -- 5
 i2:select()
-i2:stat().get.rows -- 15
+i2:stat().get.rows -- 5
+i2:stat().skip.rows -- 10
 pk:stat().lookup -- 30
+pk:stat().get.rows -- 10
+pk:stat().skip.rows -- 10
 
 -- Check that deferred DELETEs are not lost after restart.
 test_run:cmd("restart server default")
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index f6d37769..6fc4d9e5 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -825,15 +825,15 @@ errinj.set("ERRINJ_VY_DELAY_PK_LOOKUP", true)
 _ = fiber.create(do_read)
 ---
 ...
-test_run:wait_cond(function() return sk:stat().get.rows > 0 end, 60)
+test_run:wait_cond(function() return sk:stat().disk.iterator.get.rows > 0 end, 60)
 ---
 - true
 ...
-pk:stat().get.rows -- 0
+pk:stat().disk.iterator.get.rows -- 0
 ---
 - 0
 ...
-sk:stat().get.rows -- 1
+sk:stat().disk.iterator.get.rows -- 1
 ---
 - 1
 ...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 4820ef97..b500b33c 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -297,9 +297,9 @@ ret = nil
 function do_read() ret = sk:select({2}, {iterator = 'GE'}) end
 errinj.set("ERRINJ_VY_DELAY_PK_LOOKUP", true)
 _ = fiber.create(do_read)
-test_run:wait_cond(function() return sk:stat().get.rows > 0 end, 60)
-pk:stat().get.rows -- 0
-sk:stat().get.rows -- 1
+test_run:wait_cond(function() return sk:stat().disk.iterator.get.rows > 0 end, 60)
+pk:stat().disk.iterator.get.rows -- 0
+sk:stat().disk.iterator.get.rows -- 1
 s:replace{2, 2}
 errinj.set("ERRINJ_VY_DELAY_PK_LOOKUP", false)
 test_run:wait_cond(function() return pk:stat().get.rows > 0 end, 60)
diff --git a/test/vinyl/errinj_ddl.result b/test/vinyl/errinj_ddl.result
index af5f73d4..45e116a1 100644
--- a/test/vinyl/errinj_ddl.result
+++ b/test/vinyl/errinj_ddl.result
@@ -454,10 +454,10 @@ test_run:cmd("setopt delimiter ''");
 - true
 ...
 -- Wait until DDL starts scanning the altered space.
-lookup = i1:stat().lookup
+lookup = i1:stat().disk.iterator.lookup
 ---
 ...
-wait_cond = function() return i1:stat().lookup > lookup end
+wait_cond = function() return i1:stat().disk.iterator.lookup > lookup end
 ---
 ...
 c1 = async_replace(s1, {1}, wait_cond)
@@ -501,10 +501,10 @@ s1:format{}
 ---
 ...
 -- Wait until DDL starts scanning the altered space.
-lookup = i1:stat().lookup
+lookup = i1:stat().disk.iterator.lookup
 ---
 ...
-wait_cond = function() return i1:stat().lookup > lookup end
+wait_cond = function() return i1:stat().disk.iterator.lookup > lookup end
 ---
 ...
 c1 = async_replace(s1, {2}, wait_cond)
diff --git a/test/vinyl/errinj_ddl.test.lua b/test/vinyl/errinj_ddl.test.lua
index 57af9c65..5870a0b0 100644
--- a/test/vinyl/errinj_ddl.test.lua
+++ b/test/vinyl/errinj_ddl.test.lua
@@ -204,8 +204,8 @@ end;
 test_run:cmd("setopt delimiter ''");
 
 -- Wait until DDL starts scanning the altered space.
-lookup = i1:stat().lookup
-wait_cond = function() return i1:stat().lookup > lookup end
+lookup = i1:stat().disk.iterator.lookup
+wait_cond = function() return i1:stat().disk.iterator.lookup > lookup end
 c1 = async_replace(s1, {1}, wait_cond)
 c2 = async_replace(s2, {1}, wait_cond)
 
@@ -222,8 +222,8 @@ s1:format()
 s1:format{}
 
 -- Wait until DDL starts scanning the altered space.
-lookup = i1:stat().lookup
-wait_cond = function() return i1:stat().lookup > lookup end
+lookup = i1:stat().disk.iterator.lookup
+wait_cond = function() return i1:stat().disk.iterator.lookup > lookup end
 c1 = async_replace(s1, {2}, wait_cond)
 c2 = async_replace(s2, {2}, wait_cond)
 
diff --git a/test/vinyl/stat.result b/test/vinyl/stat.result
index f49394f7..aa47a7be 100644
--- a/test/vinyl/stat.result
+++ b/test/vinyl/stat.result
@@ -129,10 +129,8 @@ test_run:cmd("setopt delimiter ''");
 -- initially stats are empty
 istat()
 ---
-- upsert:
-    squashed: 0
-    applied: 0
-  bytes: 0
+- range_count: 1
+  lookup: 0
   cache:
     invalidate:
       rows: 0
@@ -212,20 +210,25 @@ istat()
   rows: 0
   run_avg: 0
   dumps_per_compaction: 0
-  lookup: 0
-  range_count: 1
-  txw:
+  upsert:
+    squashed: 0
+    applied: 0
+  bytes: 0
+  put:
+    rows: 0
     bytes: 0
+  skip:
     rows: 0
-    iterator:
-      lookup: 0
-      get:
-        rows: 0
-        bytes: 0
+    bytes: 0
   run_count: 0
-  put:
-    rows: 0
+  txw:
     bytes: 0
+    rows: 0
+    iterator:
+      lookup: 0
+      get:
+        rows: 0
+        bytes: 0
   memory:
     bytes: 0
     index_size: 0
@@ -348,7 +351,6 @@ stat_diff(istat(), st)
 - put:
     rows: 50
     bytes: 53050
-  rows: 25
   bytes: 26042
   disk:
     last_level:
@@ -385,6 +387,7 @@ stat_diff(istat(), st)
         pages: 13
         bytes_compressed: <bytes_compressed>
         rows: 50
+  rows: 25
 ...
 -- point lookup from disk + cache put
 st = istat()
@@ -404,6 +407,7 @@ stat_diff(istat(), st)
     put:
       rows: 1
       bytes: 1061
+  lookup: 1
   disk:
     iterator:
       read:
@@ -415,7 +419,6 @@ stat_diff(istat(), st)
       get:
         rows: 1
         bytes: 1061
-  lookup: 1
   memory:
     iterator:
       lookup: 1
@@ -655,19 +658,6 @@ stat_diff(istat(), st)
     put:
       rows: 51
       bytes: 54111
-  lookup: 1
-  txw:
-    iterator:
-      lookup: 1
-      get:
-        rows: 50
-        bytes: 53050
-  memory:
-    iterator:
-      lookup: 1
-      get:
-        rows: 100
-        bytes: 106100
   disk:
     iterator:
       read:
@@ -679,6 +669,19 @@ stat_diff(istat(), st)
       get:
         rows: 100
         bytes: 106100
+  txw:
+    iterator:
+      lookup: 1
+      get:
+        rows: 50
+        bytes: 53050
+  memory:
+    iterator:
+      lookup: 1
+      get:
+        rows: 100
+        bytes: 106100
+  lookup: 1
   get:
     rows: 100
     bytes: 106100
@@ -1001,10 +1004,8 @@ box.stat.reset()
 ...
 istat()
 ---
-- upsert:
-    squashed: 0
-    applied: 0
-  bytes: 317731
+- range_count: 2
+  lookup: 0
   cache:
     invalidate:
       rows: 0
@@ -1084,20 +1085,25 @@ istat()
   rows: 306
   run_avg: 1
   dumps_per_compaction: 1
-  lookup: 0
-  range_count: 2
-  txw:
+  upsert:
+    squashed: 0
+    applied: 0
+  bytes: 317731
+  put:
+    rows: 0
     bytes: 0
+  skip:
     rows: 0
-    iterator:
-      lookup: 0
-      get:
-        rows: 0
-        bytes: 0
+    bytes: 0
   run_count: 2
-  put:
-    rows: 0
+  txw:
     bytes: 0
+    rows: 0
+    iterator:
+      lookup: 0
+      get:
+        rows: 0
+        bytes: 0
   memory:
     bytes: 213431
     index_size: 49152
-- 
2.11.0




More information about the Tarantool-patches mailing list