From: Vladimir Davydov <vdavydov.dev@gmail.com> To: tarantool-patches@freelists.org Subject: [PATCH 12/12] vinyl: add last level size to statistics Date: Tue, 15 Jan 2019 17:17:21 +0300 [thread overview] Message-ID: <35758e4f6ac6424f88735a362fe10754e31fbb87.1547558871.git.vdavydov.dev@gmail.com> (raw) In-Reply-To: <cover.1547558871.git.vdavydov.dev@gmail.com> In-Reply-To: <cover.1547558871.git.vdavydov.dev@gmail.com> In order to estimate space amplification of a vinyl database, we need to know the size of data stored at the last LSM tree level. So this patch adds such a counter both per index and globablly. Per-index it is reported under disk.last_level, in rows, bytes, bytes after compression, and pages, just like any other disk counter. Globablly it is repoted in bytes only under disk.data_compacted. Note, to be consistent with disk.data, it doesn't include the last level of secondary indexes. --- src/box/vinyl.c | 3 + src/box/vy_lsm.c | 19 ++++ src/box/vy_lsm.h | 13 +++ src/box/vy_stat.h | 2 + test/vinyl/stat.result | 248 +++++++++++++++++++++++++++++++++++++---------- test/vinyl/stat.test.lua | 42 ++++++++ 6 files changed, 276 insertions(+), 51 deletions(-) diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 4e5903c2..01daa435 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -318,6 +318,7 @@ vy_info_append_disk(struct vy_env *env, struct info_handler *h) info_table_begin(h, "disk"); info_append_int(h, "data", env->lsm_env.disk_data_size); info_append_int(h, "index", env->lsm_env.disk_index_size); + info_append_int(h, "data_compacted", env->lsm_env.compacted_data_size); info_table_end(h); /* disk */ } @@ -403,6 +404,8 @@ vinyl_index_stat(struct index *index, struct info_handler *h) info_table_begin(h, "disk"); vy_info_append_disk_stmt_counter(h, NULL, &stat->disk.count); + vy_info_append_disk_stmt_counter(h, "last_level", + &stat->disk.last_level_count); info_table_begin(h, "statement"); info_append_int(h, "inserts", stat->disk.stmt.inserts); info_append_int(h, "replaces", stat->disk.stmt.replaces); diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c index 07ddc646..efaae377 100644 --- a/src/box/vy_lsm.c +++ b/src/box/vy_lsm.c @@ -239,6 +239,9 @@ vy_lsm_delete(struct vy_lsm *lsm) assert(lsm->env->lsm_count > 0); lsm->env->lsm_count--; + if (lsm->index_id == 0) + lsm->env->compacted_data_size -= + lsm->stat.disk.last_level_count.bytes; if (lsm->pk != NULL) vy_lsm_unref(lsm->pk); @@ -748,6 +751,14 @@ vy_lsm_acct_range(struct vy_lsm *lsm, struct vy_range *range) histogram_collect(lsm->run_hist, range->slice_count); vy_disk_stmt_counter_add(&lsm->stat.disk.compaction.queue, &range->compaction_queue); + if (!rlist_empty(&range->slices)) { + struct vy_slice *slice = rlist_last_entry(&range->slices, + struct vy_slice, in_range); + vy_disk_stmt_counter_add(&lsm->stat.disk.last_level_count, + &slice->count); + if (lsm->index_id == 0) + lsm->env->compacted_data_size += slice->count.bytes; + } } void @@ -756,6 +767,14 @@ vy_lsm_unacct_range(struct vy_lsm *lsm, struct vy_range *range) histogram_discard(lsm->run_hist, range->slice_count); vy_disk_stmt_counter_sub(&lsm->stat.disk.compaction.queue, &range->compaction_queue); + if (!rlist_empty(&range->slices)) { + struct vy_slice *slice = rlist_last_entry(&range->slices, + struct vy_slice, in_range); + vy_disk_stmt_counter_sub(&lsm->stat.disk.last_level_count, + &slice->count); + if (lsm->index_id == 0) + lsm->env->compacted_data_size -= slice->count.bytes; + } } void diff --git a/src/box/vy_lsm.h b/src/box/vy_lsm.h index 97b567d4..2b779ca0 100644 --- a/src/box/vy_lsm.h +++ b/src/box/vy_lsm.h @@ -107,6 +107,17 @@ struct vy_lsm_env { * is consistent with index.bsize(). */ int64_t disk_index_size; + /** + * Min size of disk space required to store data of all + * spaces of the database. In other words, the size of + * disk space the database would occupy if all spaces were + * compacted and there were no indexes. Accounted in bytes, + * without taking into account disk compression. Estimated + * as the size of data stored in the last level of primary + * LSM trees. Along with disk_data_size and disk_index_size, + * it can be used for evaluating space amplification. + */ + int64_t compacted_data_size; /** Memory pool for vy_history_node allocations. */ struct mempool history_node_pool; }; @@ -452,6 +463,8 @@ vy_lsm_remove_range(struct vy_lsm *lsm, struct vy_range *range); * a range of the LSM tree. * - vy_lsm::stat::disk::compaction::queue after compaction priority * of a range is updated. + * - vy_lsm::stat::disk::last_level_count and vy_lsm_env::last_level_size + * after a range is compacted. */ void vy_lsm_acct_range(struct vy_lsm *lsm, struct vy_range *range); diff --git a/src/box/vy_stat.h b/src/box/vy_stat.h index 6f6db6ac..1eb5a461 100644 --- a/src/box/vy_stat.h +++ b/src/box/vy_stat.h @@ -139,6 +139,8 @@ struct vy_lsm_stat { struct { /** Number of statements stored on disk. */ struct vy_disk_stmt_counter count; + /** Number of statements stored in the last LSM level. */ + struct vy_disk_stmt_counter last_level_count; /** Statement statistics. */ struct vy_stmt_stat stmt; /** Run iterator statistics. */ diff --git a/test/vinyl/stat.result b/test/vinyl/stat.result index 0920e3da..419d3e6c 100644 --- a/test/vinyl/stat.result +++ b/test/vinyl/stat.result @@ -159,24 +159,12 @@ istat() rows: 0 bytes: 0 disk: - index_size: 0 - compaction: - input: - bytes_compressed: 0 - pages: 0 - rows: 0 - bytes: 0 - queue: - bytes_compressed: 0 - pages: 0 - rows: 0 - bytes: 0 - output: - bytes_compressed: 0 - pages: 0 - rows: 0 - bytes: 0 - count: 0 + last_level: + bytes_compressed: 0 + pages: 0 + rows: 0 + bytes: 0 + rows: 0 statement: inserts: 0 replaces: 0 @@ -193,6 +181,7 @@ istat() bytes: 0 count: 0 bloom_size: 0 + index_size: 0 iterator: read: bytes_compressed: 0 @@ -206,10 +195,26 @@ istat() get: rows: 0 bytes: 0 - bytes: 0 + compaction: + input: + bytes_compressed: 0 + pages: 0 + rows: 0 + bytes: 0 + queue: + bytes_compressed: 0 + pages: 0 + rows: 0 + bytes: 0 + output: + bytes_compressed: 0 + pages: 0 + rows: 0 + bytes: 0 + count: 0 pages: 0 bytes_compressed: 0 - rows: 0 + bytes: 0 txw: bytes: 0 rows: 0 @@ -249,6 +254,7 @@ gstat() page_index: 0 bloom_filter: 0 disk: + data_compacted: 0 data: 0 index: 0 scheduler: @@ -292,6 +298,14 @@ stat_diff(istat(), st) run_avg: 1 run_count: 1 disk: + last_level: + bytes: 26049 + pages: 7 + bytes_compressed: <bytes_compressed> + rows: 25 + rows: 25 + statement: + replaces: 25 dump: input: rows: 25 @@ -302,14 +316,11 @@ stat_diff(istat(), st) pages: 7 bytes_compressed: <bytes_compressed> rows: 25 + bytes: 26049 index_size: 294 - rows: 25 - bytes_compressed: <bytes_compressed> pages: 7 + bytes_compressed: <bytes_compressed> bloom_size: 70 - statement: - replaces: 25 - bytes: 26049 bytes: 26049 put: rows: 25 @@ -332,6 +343,14 @@ wait(istat, st, 'disk.compaction.count', 1) stat_diff(istat(), st) --- - disk: + last_level: + bytes: 26042 + pages: 6 + bytes_compressed: <bytes_compressed> + rows: 25 + rows: 25 + statement: + replaces: 25 dump: input: rows: 50 @@ -342,7 +361,10 @@ stat_diff(istat(), st) pages: 13 bytes_compressed: <bytes_compressed> rows: 50 + bytes: 26042 index_size: 252 + pages: 6 + bytes_compressed: <bytes_compressed> compaction: input: bytes: 78140 @@ -355,12 +377,6 @@ stat_diff(istat(), st) pages: 13 bytes_compressed: <bytes_compressed> rows: 50 - rows: 25 - bytes_compressed: <bytes_compressed> - pages: 6 - statement: - replaces: 25 - bytes: 26042 put: rows: 50 bytes: 53050 @@ -1012,24 +1028,12 @@ istat() rows: 0 bytes: 0 disk: - index_size: 1050 - compaction: - input: - bytes_compressed: <bytes_compressed> - pages: 0 - rows: 0 - bytes: 0 - queue: - bytes_compressed: <bytes_compressed> - pages: 0 - rows: 0 - bytes: 0 - output: - bytes_compressed: <bytes_compressed> - pages: 0 - rows: 0 - bytes: 0 - count: 0 + last_level: + bytes_compressed: <bytes_compressed> + pages: 25 + rows: 100 + bytes: 104300 + rows: 100 statement: inserts: 0 replaces: 100 @@ -1046,6 +1050,7 @@ istat() bytes: 0 count: 0 bloom_size: 140 + index_size: 1050 iterator: read: bytes_compressed: <bytes_compressed> @@ -1059,10 +1064,26 @@ istat() get: rows: 0 bytes: 0 - bytes: 104300 + compaction: + input: + bytes_compressed: <bytes_compressed> + pages: 0 + rows: 0 + bytes: 0 + queue: + bytes_compressed: <bytes_compressed> + pages: 0 + rows: 0 + bytes: 0 + output: + bytes_compressed: <bytes_compressed> + pages: 0 + rows: 0 + bytes: 0 + count: 0 pages: 25 bytes_compressed: <bytes_compressed> - rows: 100 + bytes: 104300 txw: bytes: 0 rows: 0 @@ -1102,6 +1123,7 @@ gstat() page_index: 1050 bloom_filter: 140 disk: + data_compacted: 104300 data: 104300 index: 1190 scheduler: @@ -1557,6 +1579,9 @@ test_run:cmd('restart server test') fiber = require('fiber') --- ... +digest = require('digest') +--- +... s = box.space.test --- ... @@ -1586,6 +1611,127 @@ i:stat().disk.statement s:drop() --- ... +-- +-- Last level size. +-- +s = box.schema.space.create('test', {engine = 'vinyl'}) +--- +... +i1 = s:create_index('i1', {parts = {1, 'unsigned'}}) +--- +... +i2 = s:create_index('i2', {parts = {2, 'unsigned'}}) +--- +... +i1:stat().disk.last_level +--- +- bytes_compressed: <bytes_compressed> + pages: 0 + rows: 0 + bytes: 0 +... +i2:stat().disk.last_level +--- +- bytes_compressed: <bytes_compressed> + pages: 0 + rows: 0 + bytes: 0 +... +box.stat.vinyl().disk.data_compacted +--- +- 0 +... +for i = 1, 100 do s:replace{i, i, digest.urandom(100)} end +--- +... +box.snapshot() +--- +- ok +... +i1:stat().disk.last_level +--- +- bytes_compressed: <bytes_compressed> + pages: 2 + rows: 100 + bytes: 11815 +... +i2:stat().disk.last_level +--- +- bytes_compressed: <bytes_compressed> + pages: 1 + rows: 100 + bytes: 1608 +... +box.stat.vinyl().disk.data_compacted +--- +- 11815 +... +for i = 1, 100, 10 do s:replace{i, i * 1000, digest.urandom(100)} end +--- +... +box.snapshot() +--- +- ok +... +i1:stat().disk.last_level +--- +- bytes_compressed: <bytes_compressed> + pages: 2 + rows: 100 + bytes: 11815 +... +i2:stat().disk.last_level +--- +- bytes_compressed: <bytes_compressed> + pages: 1 + rows: 100 + bytes: 1608 +... +box.stat.vinyl().disk.data_compacted +--- +- 11815 +... +i1:compact() +--- +... +while i1:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end +--- +... +i1:stat().disk.last_level +--- +- bytes_compressed: <bytes_compressed> + pages: 2 + rows: 100 + bytes: 11841 +... +box.stat.vinyl().disk.data_compacted +--- +- 11841 +... +i2:compact() +--- +... +while i2:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end +--- +... +i2:stat().disk.last_level +--- +- bytes_compressed: <bytes_compressed> + pages: 1 + rows: 110 + bytes: 1794 +... +box.stat.vinyl().disk.data_compacted +--- +- 11841 +... +s:drop() +--- +... +box.stat.vinyl().disk.data_compacted +--- +- 0 +... test_run:cmd('switch default') --- - true diff --git a/test/vinyl/stat.test.lua b/test/vinyl/stat.test.lua index 818ec730..4a955682 100644 --- a/test/vinyl/stat.test.lua +++ b/test/vinyl/stat.test.lua @@ -473,6 +473,7 @@ i:stat().disk.statement test_run:cmd('restart server test') fiber = require('fiber') +digest = require('digest') s = box.space.test i = s.index.primary @@ -486,6 +487,47 @@ i:stat().disk.statement s:drop() +-- +-- Last level size. +-- +s = box.schema.space.create('test', {engine = 'vinyl'}) +i1 = s:create_index('i1', {parts = {1, 'unsigned'}}) +i2 = s:create_index('i2', {parts = {2, 'unsigned'}}) + +i1:stat().disk.last_level +i2:stat().disk.last_level +box.stat.vinyl().disk.data_compacted + +for i = 1, 100 do s:replace{i, i, digest.urandom(100)} end +box.snapshot() + +i1:stat().disk.last_level +i2:stat().disk.last_level +box.stat.vinyl().disk.data_compacted + +for i = 1, 100, 10 do s:replace{i, i * 1000, digest.urandom(100)} end +box.snapshot() + +i1:stat().disk.last_level +i2:stat().disk.last_level +box.stat.vinyl().disk.data_compacted + +i1:compact() +while i1:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end + +i1:stat().disk.last_level +box.stat.vinyl().disk.data_compacted + +i2:compact() +while i2:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end + +i2:stat().disk.last_level +box.stat.vinyl().disk.data_compacted + +s:drop() + +box.stat.vinyl().disk.data_compacted + test_run:cmd('switch default') test_run:cmd('stop server test') test_run:cmd('cleanup server test') -- 2.11.0
next prev parent reply other threads:[~2019-01-15 14:17 UTC|newest] Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-01-15 14:17 [PATCH 00/12] vinyl: statistics improvements Vladimir Davydov 2019-01-15 14:17 ` [PATCH 01/12] test: rename vinyl/info to vinyl/stat Vladimir Davydov 2019-01-17 11:32 ` [tarantool-patches] " Konstantin Osipov 2019-01-15 14:17 ` [PATCH 02/12] test: split vinyl/errinj Vladimir Davydov 2019-01-17 11:33 ` [tarantool-patches] " Konstantin Osipov 2019-01-15 14:17 ` [PATCH 03/12] vinyl: rename dump/compact in/out to input/output Vladimir Davydov 2019-01-17 11:33 ` [tarantool-patches] " Konstantin Osipov 2019-01-15 14:17 ` [PATCH 04/12] vinyl: rename compact to compaction Vladimir Davydov 2019-01-17 11:34 ` [tarantool-patches] " Konstantin Osipov 2019-01-17 12:08 ` Vladimir Davydov 2019-01-17 13:51 ` Konstantin Osipov 2019-01-15 14:17 ` [PATCH 05/12] vinyl: bump range version in vy_range.c Vladimir Davydov 2019-01-15 14:17 ` [PATCH 06/12] vinyl: don't add dropped LSM trees to the scheduler during recovery Vladimir Davydov 2019-01-15 14:17 ` [PATCH 07/12] vinyl: move global dump/compaction statistics to scheduler Vladimir Davydov 2019-01-16 16:36 ` Vladimir Davydov 2019-01-15 14:17 ` [PATCH 08/12] vinyl: add dump count to global scheduler statistics Vladimir Davydov 2019-01-15 14:17 ` [PATCH 09/12] vinyl: don't account secondary indexes to scheduler.dump_input Vladimir Davydov 2019-01-15 14:17 ` [PATCH 10/12] vinyl: add task accounting to global scheduler statistics Vladimir Davydov 2019-01-15 14:17 ` [PATCH 11/12] vinyl: add dump/compaction time to statistics Vladimir Davydov 2019-01-15 14:17 ` Vladimir Davydov [this message] 2019-01-17 11:35 ` [tarantool-patches] Re: [PATCH 12/12] vinyl: add last level size " Konstantin Osipov 2019-01-17 11:32 ` [tarantool-patches] Re: [PATCH 00/12] vinyl: statistics improvements Konstantin Osipov 2019-01-17 12:06 ` Vladimir Davydov 2019-01-20 21:16 ` Vladimir Davydov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=35758e4f6ac6424f88735a362fe10754e31fbb87.1547558871.git.vdavydov.dev@gmail.com \ --to=vdavydov.dev@gmail.com \ --cc=tarantool-patches@freelists.org \ --subject='Re: [PATCH 12/12] vinyl: add last level size to statistics' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox