[PATCH 12/12] vinyl: add last level size to statistics
Vladimir Davydov
vdavydov.dev at gmail.com
Tue Jan 15 17:17:21 MSK 2019
In order to estimate space amplification of a vinyl database, we need to
know the size of data stored at the last LSM tree level. So this patch
adds such a counter both per index and globablly.
Per-index it is reported under disk.last_level, in rows, bytes, bytes
after compression, and pages, just like any other disk counter.
Globablly it is repoted in bytes only under disk.data_compacted. Note,
to be consistent with disk.data, it doesn't include the last level of
secondary indexes.
---
src/box/vinyl.c | 3 +
src/box/vy_lsm.c | 19 ++++
src/box/vy_lsm.h | 13 +++
src/box/vy_stat.h | 2 +
test/vinyl/stat.result | 248 +++++++++++++++++++++++++++++++++++++----------
test/vinyl/stat.test.lua | 42 ++++++++
6 files changed, 276 insertions(+), 51 deletions(-)
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 4e5903c2..01daa435 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -318,6 +318,7 @@ vy_info_append_disk(struct vy_env *env, struct info_handler *h)
info_table_begin(h, "disk");
info_append_int(h, "data", env->lsm_env.disk_data_size);
info_append_int(h, "index", env->lsm_env.disk_index_size);
+ info_append_int(h, "data_compacted", env->lsm_env.compacted_data_size);
info_table_end(h); /* disk */
}
@@ -403,6 +404,8 @@ vinyl_index_stat(struct index *index, struct info_handler *h)
info_table_begin(h, "disk");
vy_info_append_disk_stmt_counter(h, NULL, &stat->disk.count);
+ vy_info_append_disk_stmt_counter(h, "last_level",
+ &stat->disk.last_level_count);
info_table_begin(h, "statement");
info_append_int(h, "inserts", stat->disk.stmt.inserts);
info_append_int(h, "replaces", stat->disk.stmt.replaces);
diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c
index 07ddc646..efaae377 100644
--- a/src/box/vy_lsm.c
+++ b/src/box/vy_lsm.c
@@ -239,6 +239,9 @@ vy_lsm_delete(struct vy_lsm *lsm)
assert(lsm->env->lsm_count > 0);
lsm->env->lsm_count--;
+ if (lsm->index_id == 0)
+ lsm->env->compacted_data_size -=
+ lsm->stat.disk.last_level_count.bytes;
if (lsm->pk != NULL)
vy_lsm_unref(lsm->pk);
@@ -748,6 +751,14 @@ vy_lsm_acct_range(struct vy_lsm *lsm, struct vy_range *range)
histogram_collect(lsm->run_hist, range->slice_count);
vy_disk_stmt_counter_add(&lsm->stat.disk.compaction.queue,
&range->compaction_queue);
+ if (!rlist_empty(&range->slices)) {
+ struct vy_slice *slice = rlist_last_entry(&range->slices,
+ struct vy_slice, in_range);
+ vy_disk_stmt_counter_add(&lsm->stat.disk.last_level_count,
+ &slice->count);
+ if (lsm->index_id == 0)
+ lsm->env->compacted_data_size += slice->count.bytes;
+ }
}
void
@@ -756,6 +767,14 @@ vy_lsm_unacct_range(struct vy_lsm *lsm, struct vy_range *range)
histogram_discard(lsm->run_hist, range->slice_count);
vy_disk_stmt_counter_sub(&lsm->stat.disk.compaction.queue,
&range->compaction_queue);
+ if (!rlist_empty(&range->slices)) {
+ struct vy_slice *slice = rlist_last_entry(&range->slices,
+ struct vy_slice, in_range);
+ vy_disk_stmt_counter_sub(&lsm->stat.disk.last_level_count,
+ &slice->count);
+ if (lsm->index_id == 0)
+ lsm->env->compacted_data_size -= slice->count.bytes;
+ }
}
void
diff --git a/src/box/vy_lsm.h b/src/box/vy_lsm.h
index 97b567d4..2b779ca0 100644
--- a/src/box/vy_lsm.h
+++ b/src/box/vy_lsm.h
@@ -107,6 +107,17 @@ struct vy_lsm_env {
* is consistent with index.bsize().
*/
int64_t disk_index_size;
+ /**
+ * Min size of disk space required to store data of all
+ * spaces of the database. In other words, the size of
+ * disk space the database would occupy if all spaces were
+ * compacted and there were no indexes. Accounted in bytes,
+ * without taking into account disk compression. Estimated
+ * as the size of data stored in the last level of primary
+ * LSM trees. Along with disk_data_size and disk_index_size,
+ * it can be used for evaluating space amplification.
+ */
+ int64_t compacted_data_size;
/** Memory pool for vy_history_node allocations. */
struct mempool history_node_pool;
};
@@ -452,6 +463,8 @@ vy_lsm_remove_range(struct vy_lsm *lsm, struct vy_range *range);
* a range of the LSM tree.
* - vy_lsm::stat::disk::compaction::queue after compaction priority
* of a range is updated.
+ * - vy_lsm::stat::disk::last_level_count and vy_lsm_env::last_level_size
+ * after a range is compacted.
*/
void
vy_lsm_acct_range(struct vy_lsm *lsm, struct vy_range *range);
diff --git a/src/box/vy_stat.h b/src/box/vy_stat.h
index 6f6db6ac..1eb5a461 100644
--- a/src/box/vy_stat.h
+++ b/src/box/vy_stat.h
@@ -139,6 +139,8 @@ struct vy_lsm_stat {
struct {
/** Number of statements stored on disk. */
struct vy_disk_stmt_counter count;
+ /** Number of statements stored in the last LSM level. */
+ struct vy_disk_stmt_counter last_level_count;
/** Statement statistics. */
struct vy_stmt_stat stmt;
/** Run iterator statistics. */
diff --git a/test/vinyl/stat.result b/test/vinyl/stat.result
index 0920e3da..419d3e6c 100644
--- a/test/vinyl/stat.result
+++ b/test/vinyl/stat.result
@@ -159,24 +159,12 @@ istat()
rows: 0
bytes: 0
disk:
- index_size: 0
- compaction:
- input:
- bytes_compressed: 0
- pages: 0
- rows: 0
- bytes: 0
- queue:
- bytes_compressed: 0
- pages: 0
- rows: 0
- bytes: 0
- output:
- bytes_compressed: 0
- pages: 0
- rows: 0
- bytes: 0
- count: 0
+ last_level:
+ bytes_compressed: 0
+ pages: 0
+ rows: 0
+ bytes: 0
+ rows: 0
statement:
inserts: 0
replaces: 0
@@ -193,6 +181,7 @@ istat()
bytes: 0
count: 0
bloom_size: 0
+ index_size: 0
iterator:
read:
bytes_compressed: 0
@@ -206,10 +195,26 @@ istat()
get:
rows: 0
bytes: 0
- bytes: 0
+ compaction:
+ input:
+ bytes_compressed: 0
+ pages: 0
+ rows: 0
+ bytes: 0
+ queue:
+ bytes_compressed: 0
+ pages: 0
+ rows: 0
+ bytes: 0
+ output:
+ bytes_compressed: 0
+ pages: 0
+ rows: 0
+ bytes: 0
+ count: 0
pages: 0
bytes_compressed: 0
- rows: 0
+ bytes: 0
txw:
bytes: 0
rows: 0
@@ -249,6 +254,7 @@ gstat()
page_index: 0
bloom_filter: 0
disk:
+ data_compacted: 0
data: 0
index: 0
scheduler:
@@ -292,6 +298,14 @@ stat_diff(istat(), st)
run_avg: 1
run_count: 1
disk:
+ last_level:
+ bytes: 26049
+ pages: 7
+ bytes_compressed: <bytes_compressed>
+ rows: 25
+ rows: 25
+ statement:
+ replaces: 25
dump:
input:
rows: 25
@@ -302,14 +316,11 @@ stat_diff(istat(), st)
pages: 7
bytes_compressed: <bytes_compressed>
rows: 25
+ bytes: 26049
index_size: 294
- rows: 25
- bytes_compressed: <bytes_compressed>
pages: 7
+ bytes_compressed: <bytes_compressed>
bloom_size: 70
- statement:
- replaces: 25
- bytes: 26049
bytes: 26049
put:
rows: 25
@@ -332,6 +343,14 @@ wait(istat, st, 'disk.compaction.count', 1)
stat_diff(istat(), st)
---
- disk:
+ last_level:
+ bytes: 26042
+ pages: 6
+ bytes_compressed: <bytes_compressed>
+ rows: 25
+ rows: 25
+ statement:
+ replaces: 25
dump:
input:
rows: 50
@@ -342,7 +361,10 @@ stat_diff(istat(), st)
pages: 13
bytes_compressed: <bytes_compressed>
rows: 50
+ bytes: 26042
index_size: 252
+ pages: 6
+ bytes_compressed: <bytes_compressed>
compaction:
input:
bytes: 78140
@@ -355,12 +377,6 @@ stat_diff(istat(), st)
pages: 13
bytes_compressed: <bytes_compressed>
rows: 50
- rows: 25
- bytes_compressed: <bytes_compressed>
- pages: 6
- statement:
- replaces: 25
- bytes: 26042
put:
rows: 50
bytes: 53050
@@ -1012,24 +1028,12 @@ istat()
rows: 0
bytes: 0
disk:
- index_size: 1050
- compaction:
- input:
- bytes_compressed: <bytes_compressed>
- pages: 0
- rows: 0
- bytes: 0
- queue:
- bytes_compressed: <bytes_compressed>
- pages: 0
- rows: 0
- bytes: 0
- output:
- bytes_compressed: <bytes_compressed>
- pages: 0
- rows: 0
- bytes: 0
- count: 0
+ last_level:
+ bytes_compressed: <bytes_compressed>
+ pages: 25
+ rows: 100
+ bytes: 104300
+ rows: 100
statement:
inserts: 0
replaces: 100
@@ -1046,6 +1050,7 @@ istat()
bytes: 0
count: 0
bloom_size: 140
+ index_size: 1050
iterator:
read:
bytes_compressed: <bytes_compressed>
@@ -1059,10 +1064,26 @@ istat()
get:
rows: 0
bytes: 0
- bytes: 104300
+ compaction:
+ input:
+ bytes_compressed: <bytes_compressed>
+ pages: 0
+ rows: 0
+ bytes: 0
+ queue:
+ bytes_compressed: <bytes_compressed>
+ pages: 0
+ rows: 0
+ bytes: 0
+ output:
+ bytes_compressed: <bytes_compressed>
+ pages: 0
+ rows: 0
+ bytes: 0
+ count: 0
pages: 25
bytes_compressed: <bytes_compressed>
- rows: 100
+ bytes: 104300
txw:
bytes: 0
rows: 0
@@ -1102,6 +1123,7 @@ gstat()
page_index: 1050
bloom_filter: 140
disk:
+ data_compacted: 104300
data: 104300
index: 1190
scheduler:
@@ -1557,6 +1579,9 @@ test_run:cmd('restart server test')
fiber = require('fiber')
---
...
+digest = require('digest')
+---
+...
s = box.space.test
---
...
@@ -1586,6 +1611,127 @@ i:stat().disk.statement
s:drop()
---
...
+--
+-- Last level size.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+i1 = s:create_index('i1', {parts = {1, 'unsigned'}})
+---
+...
+i2 = s:create_index('i2', {parts = {2, 'unsigned'}})
+---
+...
+i1:stat().disk.last_level
+---
+- bytes_compressed: <bytes_compressed>
+ pages: 0
+ rows: 0
+ bytes: 0
+...
+i2:stat().disk.last_level
+---
+- bytes_compressed: <bytes_compressed>
+ pages: 0
+ rows: 0
+ bytes: 0
+...
+box.stat.vinyl().disk.data_compacted
+---
+- 0
+...
+for i = 1, 100 do s:replace{i, i, digest.urandom(100)} end
+---
+...
+box.snapshot()
+---
+- ok
+...
+i1:stat().disk.last_level
+---
+- bytes_compressed: <bytes_compressed>
+ pages: 2
+ rows: 100
+ bytes: 11815
+...
+i2:stat().disk.last_level
+---
+- bytes_compressed: <bytes_compressed>
+ pages: 1
+ rows: 100
+ bytes: 1608
+...
+box.stat.vinyl().disk.data_compacted
+---
+- 11815
+...
+for i = 1, 100, 10 do s:replace{i, i * 1000, digest.urandom(100)} end
+---
+...
+box.snapshot()
+---
+- ok
+...
+i1:stat().disk.last_level
+---
+- bytes_compressed: <bytes_compressed>
+ pages: 2
+ rows: 100
+ bytes: 11815
+...
+i2:stat().disk.last_level
+---
+- bytes_compressed: <bytes_compressed>
+ pages: 1
+ rows: 100
+ bytes: 1608
+...
+box.stat.vinyl().disk.data_compacted
+---
+- 11815
+...
+i1:compact()
+---
+...
+while i1:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end
+---
+...
+i1:stat().disk.last_level
+---
+- bytes_compressed: <bytes_compressed>
+ pages: 2
+ rows: 100
+ bytes: 11841
+...
+box.stat.vinyl().disk.data_compacted
+---
+- 11841
+...
+i2:compact()
+---
+...
+while i2:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end
+---
+...
+i2:stat().disk.last_level
+---
+- bytes_compressed: <bytes_compressed>
+ pages: 1
+ rows: 110
+ bytes: 1794
+...
+box.stat.vinyl().disk.data_compacted
+---
+- 11841
+...
+s:drop()
+---
+...
+box.stat.vinyl().disk.data_compacted
+---
+- 0
+...
test_run:cmd('switch default')
---
- true
diff --git a/test/vinyl/stat.test.lua b/test/vinyl/stat.test.lua
index 818ec730..4a955682 100644
--- a/test/vinyl/stat.test.lua
+++ b/test/vinyl/stat.test.lua
@@ -473,6 +473,7 @@ i:stat().disk.statement
test_run:cmd('restart server test')
fiber = require('fiber')
+digest = require('digest')
s = box.space.test
i = s.index.primary
@@ -486,6 +487,47 @@ i:stat().disk.statement
s:drop()
+--
+-- Last level size.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+i1 = s:create_index('i1', {parts = {1, 'unsigned'}})
+i2 = s:create_index('i2', {parts = {2, 'unsigned'}})
+
+i1:stat().disk.last_level
+i2:stat().disk.last_level
+box.stat.vinyl().disk.data_compacted
+
+for i = 1, 100 do s:replace{i, i, digest.urandom(100)} end
+box.snapshot()
+
+i1:stat().disk.last_level
+i2:stat().disk.last_level
+box.stat.vinyl().disk.data_compacted
+
+for i = 1, 100, 10 do s:replace{i, i * 1000, digest.urandom(100)} end
+box.snapshot()
+
+i1:stat().disk.last_level
+i2:stat().disk.last_level
+box.stat.vinyl().disk.data_compacted
+
+i1:compact()
+while i1:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end
+
+i1:stat().disk.last_level
+box.stat.vinyl().disk.data_compacted
+
+i2:compact()
+while i2:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end
+
+i2:stat().disk.last_level
+box.stat.vinyl().disk.data_compacted
+
+s:drop()
+
+box.stat.vinyl().disk.data_compacted
+
test_run:cmd('switch default')
test_run:cmd('stop server test')
test_run:cmd('cleanup server test')
--
2.11.0
More information about the Tarantool-patches
mailing list