From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Wed, 16 Jan 2019 19:36:50 +0300 From: Vladimir Davydov Subject: Re: [PATCH 07/12] vinyl: move global dump/compaction statistics to scheduler Message-ID: <20190116163650.3bngy7gusgv3iayi@esperanza> References: MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: To: tarantool-patches@freelists.org List-ID: On Tue, Jan 15, 2019 at 05:17:16PM +0300, Vladimir Davydov wrote: > +/** > + * Account an LSM tree to the compaction queue stats. > + * Called after updating LSM tree compaction priority. > + */ > +static void > +vy_scheduler_acct_lsm(struct vy_scheduler *scheduler, struct vy_lsm *lsm) > +{ > + if (lsm->is_dropped) > + return; > + scheduler->stat.compaction_queue += lsm->stat.disk.compaction.queue.bytes; > +} > + > +/** > + * Unaccount an LSM tree from the compaction queue stats. > + * Called before updating LSM tree compaction priority. > + */ > +static void > +vy_scheduler_unacct_lsm(struct vy_scheduler *scheduler, struct vy_lsm *lsm) > +{ > + if (lsm->is_dropped) > + return; > + scheduler->stat.compaction_queue -= lsm->stat.disk.compaction.queue.bytes; > +} > @@ -1636,8 +1682,11 @@ vy_task_compaction_new(struct vy_scheduler *scheduler, struct vy_worker *worker, > range = container_of(range_node, struct vy_range, heap_node); > assert(range->compaction_priority > 1); > > - if (vy_lsm_split_range(lsm, range) || > - vy_lsm_coalesce_range(lsm, range)) { > + vy_scheduler_unacct_lsm(scheduler, lsm); > + bool lsm_changed = (vy_lsm_split_range(lsm, range) || > + vy_lsm_coalesce_range(lsm, range)); > + vy_scheduler_acct_lsm(scheduler, lsm); > + if (lsm_changed) { > vy_scheduler_update_lsm(scheduler, lsm); > return 0; > } Come to think of it, moving compaction_queue from vy_lsm_env to vy_scheduler isn't worth the complexity it introduces. Let's leave it in vy_lsm_env along with disk_data_size, disk_index_size, and last_level_size, but report it in the 'scheduler' section of box.info.vinyl(). The new version of the patch goes right below. It doesn't affect the rest of the series. --- >From 1d23a1b10ac99fa197c443aa1f621535549b1f42 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Mon, 14 Jan 2019 16:49:42 +0300 Subject: [PATCH] vinyl: move global dump/compaction statistics to scheduler Although it's convenient to maintain dump/compaction input/output metrics in vy_lsm_env, semantically it's incorrect as those metrics characterize the scheduler not the LSM environment. Also, we can't easily extend those stats with e.g. the number of completed dumps or the number of tasks in progress, because those are only known to the scheduler. That said, let's introduce 'scheduler' section in box.stat.vinyl() and move dump/compaction stats from 'disk' to the new section. Let's also move the stats accounting from vy_lsm.c to vy_scheduler.c. The 'disk' section now stores only the size of data and index on disk and no cumulative statistics, which makes it similar to the 'memory' section. Note, this patch flattens the stats (disk.compaction.input is moved to scheduler.compaction_input and so forth), because all other global stats are reported without using nested tables. diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 5965700b..a89a1c27 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -245,6 +245,21 @@ static struct trigger on_replace_vinyl_deferred_delete; /** {{{ Introspection */ static void +vy_info_append_scheduler(struct vy_env *env, struct info_handler *h) +{ + struct vy_scheduler_stat *stat = &env->scheduler.stat; + + info_table_begin(h, "scheduler"); + info_append_int(h, "dump_input", stat->dump_input); + info_append_int(h, "dump_output", stat->dump_output); + info_append_int(h, "compaction_input", stat->compaction_input); + info_append_int(h, "compaction_output", stat->compaction_output); + info_append_int(h, "compaction_queue", + env->lsm_env.compaction_queue_size); + info_table_end(h); /* scheduler */ +} + +static void vy_info_append_regulator(struct vy_env *env, struct info_handler *h) { struct vy_regulator *r = &env->regulator; @@ -295,24 +310,9 @@ vy_info_append_memory(struct vy_env *env, struct info_handler *h) static void vy_info_append_disk(struct vy_env *env, struct info_handler *h) { - struct vy_disk_stat *stat = &env->lsm_env.disk_stat; - info_table_begin(h, "disk"); - - info_append_int(h, "data", stat->data); - info_append_int(h, "index", stat->index); - - info_table_begin(h, "dump"); - info_append_int(h, "input", stat->dump.input); - info_append_int(h, "output", stat->dump.output); - info_table_end(h); /* dump */ - - info_table_begin(h, "compaction"); - info_append_int(h, "input", stat->compaction.input); - info_append_int(h, "output", stat->compaction.output); - info_append_int(h, "queue", stat->compaction.queue); - info_table_end(h); /* compaction */ - + info_append_int(h, "data", env->lsm_env.disk_data_size); + info_append_int(h, "index", env->lsm_env.disk_index_size); info_table_end(h); /* disk */ } @@ -325,6 +325,7 @@ vinyl_engine_stat(struct vinyl_engine *vinyl, struct info_handler *h) vy_info_append_tx(env, h); vy_info_append_memory(env, h); vy_info_append_disk(env, h); + vy_info_append_scheduler(env, h); vy_info_append_regulator(env, h); info_end(h); } @@ -514,11 +515,7 @@ vinyl_engine_reset_stat(struct engine *engine) struct tx_manager *xm = env->xm; memset(&xm->stat, 0, sizeof(xm->stat)); - struct vy_disk_stat *disk_stat = &env->lsm_env.disk_stat; - disk_stat->dump.input = 0; - disk_stat->dump.output = 0; - disk_stat->compaction.input = 0; - disk_stat->compaction.output = 0; + vy_scheduler_reset_stat(&env->scheduler); } /** }}} Introspection */ diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c index abadab5c..2aafe354 100644 --- a/src/box/vy_lsm.c +++ b/src/box/vy_lsm.c @@ -239,7 +239,7 @@ vy_lsm_delete(struct vy_lsm *lsm) assert(lsm->env->lsm_count > 0); lsm->env->lsm_count--; - lsm->env->disk_stat.compaction.queue -= + lsm->env->compaction_queue_size -= lsm->stat.disk.compaction.queue.bytes; if (lsm->pk != NULL) @@ -691,11 +691,11 @@ vy_lsm_add_run(struct vy_lsm *lsm, struct vy_run *run) /* Data size is consistent with space.bsize. */ if (lsm->index_id == 0) - env->disk_stat.data += run->count.bytes; + env->disk_data_size += run->count.bytes; /* Index size is consistent with index.bsize. */ - env->disk_stat.index += bloom_size + page_index_size; + env->disk_index_size += bloom_size + page_index_size; if (lsm->index_id > 0) - env->disk_stat.index += run->count.bytes; + env->disk_index_size += run->count.bytes; } void @@ -720,11 +720,11 @@ vy_lsm_remove_run(struct vy_lsm *lsm, struct vy_run *run) /* Data size is consistent with space.bsize. */ if (lsm->index_id == 0) - env->disk_stat.data -= run->count.bytes; + env->disk_data_size -= run->count.bytes; /* Index size is consistent with index.bsize. */ - env->disk_stat.index -= bloom_size + page_index_size; + env->disk_index_size -= bloom_size + page_index_size; if (lsm->index_id > 0) - env->disk_stat.index -= run->count.bytes; + env->disk_index_size -= run->count.bytes; } void @@ -751,7 +751,7 @@ vy_lsm_acct_range(struct vy_lsm *lsm, struct vy_range *range) histogram_collect(lsm->run_hist, range->slice_count); vy_disk_stmt_counter_add(&lsm->stat.disk.compaction.queue, &range->compaction_queue); - lsm->env->disk_stat.compaction.queue += range->compaction_queue.bytes; + lsm->env->compaction_queue_size += range->compaction_queue.bytes; } void @@ -760,7 +760,7 @@ vy_lsm_unacct_range(struct vy_lsm *lsm, struct vy_range *range) histogram_discard(lsm->run_hist, range->slice_count); vy_disk_stmt_counter_sub(&lsm->stat.disk.compaction.queue, &range->compaction_queue); - lsm->env->disk_stat.compaction.queue -= range->compaction_queue.bytes; + lsm->env->compaction_queue_size -= range->compaction_queue.bytes; } void @@ -771,9 +771,6 @@ vy_lsm_acct_dump(struct vy_lsm *lsm, lsm->stat.disk.dump.count++; vy_stmt_counter_add(&lsm->stat.disk.dump.input, input); vy_disk_stmt_counter_add(&lsm->stat.disk.dump.output, output); - - lsm->env->disk_stat.dump.input += input->bytes; - lsm->env->disk_stat.dump.output += output->bytes; } void @@ -784,9 +781,6 @@ vy_lsm_acct_compaction(struct vy_lsm *lsm, lsm->stat.disk.compaction.count++; vy_disk_stmt_counter_add(&lsm->stat.disk.compaction.input, input); vy_disk_stmt_counter_add(&lsm->stat.disk.compaction.output, output); - - lsm->env->disk_stat.compaction.input += input->bytes; - lsm->env->disk_stat.compaction.output += output->bytes; } int diff --git a/src/box/vy_lsm.h b/src/box/vy_lsm.h index e7487995..6b51c5c7 100644 --- a/src/box/vy_lsm.h +++ b/src/box/vy_lsm.h @@ -91,8 +91,27 @@ struct vy_lsm_env { size_t bloom_size; /** Size of memory used for page index. */ size_t page_index_size; - /** Global disk statistics. */ - struct vy_disk_stat disk_stat; + /** + * Size of disk space used for storing data of all spaces, + * in bytes, without taking into account disk compression. + * By 'data' we mean statements stored in primary indexes + * only, which is consistent with space.bsize(). + */ + int64_t disk_data_size; + /** + * Size of disk space used for indexing data in all spaces, + * in bytes, without taking into account disk compression. + * This consists of page indexes and bloom filters, which + * are stored in .index files, as well as the total size of + * statements stored in secondary index .run files, which + * is consistent with index.bsize(). + */ + int64_t disk_index_size; + /** + * Size of data of all spaces that need to be compacted, + * in bytes, without taking into account disk compression. + */ + int64_t compaction_queue_size; /** Memory pool for vy_history_node allocations. */ struct mempool history_node_pool; }; diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index f431eb24..f65f4b14 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -501,6 +501,16 @@ vy_scheduler_destroy(struct vy_scheduler *scheduler) } void +vy_scheduler_reset_stat(struct vy_scheduler *scheduler) +{ + struct vy_scheduler_stat *stat = &scheduler->stat; + stat->dump_input = 0; + stat->dump_output = 0; + stat->compaction_input = 0; + stat->compaction_output = 0; +} + +void vy_scheduler_add_lsm(struct vy_scheduler *scheduler, struct vy_lsm *lsm) { assert(!lsm->is_dropped); @@ -1227,6 +1237,8 @@ delete_mems: } lsm->dump_lsn = MAX(lsm->dump_lsn, dump_lsn); vy_lsm_acct_dump(lsm, &dump_input, &dump_output); + scheduler->stat.dump_input += dump_input.bytes; + scheduler->stat.dump_output += dump_output.bytes; /* The iterator has been cleaned up in a worker thread. */ task->wi->iface->close(task->wi); @@ -1564,6 +1576,8 @@ vy_task_compaction_complete(struct vy_task *task) vy_range_update_compaction_priority(range, &lsm->opts); vy_lsm_acct_range(lsm, range); vy_lsm_acct_compaction(lsm, &compaction_input, &compaction_output); + scheduler->stat.compaction_input += compaction_input.bytes; + scheduler->stat.compaction_output += compaction_output.bytes; /* * Unaccount unused runs and delete compacted slices. diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h index 5b09f964..2d4352d7 100644 --- a/src/box/vy_scheduler.h +++ b/src/box/vy_scheduler.h @@ -41,6 +41,7 @@ #define HEAP_FORWARD_DECLARATION #include "salad/heap.h" #include "salad/stailq.h" +#include "vy_stat.h" #if defined(__cplusplus) extern "C" { @@ -139,6 +140,8 @@ struct vy_scheduler { double dump_start; /** Signaled on dump round completion. */ struct fiber_cond dump_cond; + /** Scheduler statistics. */ + struct vy_scheduler_stat stat; /** * Function called by the scheduler upon dump round * completion. It is supposed to free memory released @@ -184,6 +187,12 @@ void vy_scheduler_destroy(struct vy_scheduler *scheduler); /** + * Reset scheduler statistics (called by box.stat.reset). + */ +void +vy_scheduler_reset_stat(struct vy_scheduler *scheduler); + +/** * Add an LSM tree to scheduler dump/compaction queues. */ void diff --git a/src/box/vy_stat.h b/src/box/vy_stat.h index 7ed55ff5..62ef2b75 100644 --- a/src/box/vy_stat.h +++ b/src/box/vy_stat.h @@ -206,23 +206,20 @@ struct vy_tx_stat { }; /** - * Global disk statistics. + * Scheduler statistics. * - * Fields correspond to those of per LSM tree statistics. - * All counters are given in bytes, uncompressed. + * All byte counters are given without taking into account + * disk compression. */ -struct vy_disk_stat { - int64_t data; - int64_t index; - struct { - int64_t input; - int64_t output; - } dump; - struct { - int64_t input; - int64_t output; - int64_t queue; - } compaction; +struct vy_scheduler_stat { + /** Number of bytes read by dump tasks. */ + int64_t dump_input; + /** Number of bytes written by dump tasks. */ + int64_t dump_output; + /** Number of bytes read by compaction tasks. */ + int64_t compaction_input; + /** Number of bytes written by compaction tasks. */ + int64_t compaction_output; }; static inline int diff --git a/test/vinyl/errinj_stat.result b/test/vinyl/errinj_stat.result index 9d7f64e9..fe379cef 100644 --- a/test/vinyl/errinj_stat.result +++ b/test/vinyl/errinj_stat.result @@ -50,7 +50,7 @@ i:stat().disk.compaction.queue -- none rows: 0 bytes: 0 ... -i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue +i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue --- - true ... @@ -71,7 +71,7 @@ i:stat().disk.compaction.queue -- 30 statements rows: 30 bytes: 411 ... -i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue +i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue --- - true ... @@ -85,7 +85,7 @@ i:stat().disk.compaction.queue -- 40 statements rows: 40 bytes: 548 ... -i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue +i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue --- - true ... @@ -99,7 +99,7 @@ i:stat().disk.compaction.queue -- 50 statements rows: 50 bytes: 685 ... -i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue +i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue --- - true ... @@ -113,7 +113,7 @@ i:stat().disk.compaction.queue -- 50 statements rows: 50 bytes: 685 ... -i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue +i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue --- - true ... diff --git a/test/vinyl/errinj_stat.test.lua b/test/vinyl/errinj_stat.test.lua index 6cd95cf9..3556f50a 100644 --- a/test/vinyl/errinj_stat.test.lua +++ b/test/vinyl/errinj_stat.test.lua @@ -22,21 +22,21 @@ i = s:create_index('pk', {run_count_per_level = 2}) function dump() for i = 1, 10 do s:replace{i} end box.snapshot() end dump() i:stat().disk.compaction.queue -- none -i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue +i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue errinj.set('ERRINJ_VY_COMPACTION_DELAY', true) dump() dump() i:stat().disk.compaction.queue -- 30 statements -i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue +i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue dump() i:stat().disk.compaction.queue -- 40 statements -i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue +i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue dump() i:stat().disk.compaction.queue -- 50 statements -i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue +i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue box.stat.reset() -- doesn't affect queue size i:stat().disk.compaction.queue -- 50 statements -i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue +i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue errinj.set('ERRINJ_VY_COMPACTION_DELAY', false) while i:stat().disk.compaction.count < 2 do fiber.sleep(0.01) end i:stat().disk.compaction.queue -- none diff --git a/test/vinyl/stat.result b/test/vinyl/stat.result index 68aea30b..16f01945 100644 --- a/test/vinyl/stat.result +++ b/test/vinyl/stat.result @@ -224,30 +224,29 @@ istat() ... gstat() --- -- disk: - dump: - input: 0 - output: 0 - compaction: - input: 0 - output: 0 - queue: 0 - data: 0 - index: 0 +- tx: + conflict: 0 + commit: 0 + rollback: 0 + statements: 0 + transactions: 0 + gap_locks: 0 + read_views: 0 memory: tuple_cache: 0 tx: 0 level0: 0 page_index: 0 bloom_filter: 0 - tx: - conflict: 0 - commit: 0 - rollback: 0 - statements: 0 - transactions: 0 - gap_locks: 0 - read_views: 0 + disk: + data: 0 + index: 0 + scheduler: + compaction_output: 0 + compaction_queue: 0 + dump_output: 0 + dump_input: 0 + compaction_input: 0 ... -- -- Index statistics. @@ -701,19 +700,19 @@ box.rollback() -- Global statistics. -- -- dump and compaction totals -gstat().disk.dump.input == istat().disk.dump.input.bytes +gstat().scheduler.dump_input == istat().disk.dump.input.bytes --- - true ... -gstat().disk.dump.output == istat().disk.dump.output.bytes +gstat().scheduler.dump_output == istat().disk.dump.output.bytes --- - true ... -gstat().disk.compaction.input == istat().disk.compaction.input.bytes +gstat().scheduler.compaction_input == istat().disk.compaction.input.bytes --- - true ... -gstat().disk.compaction.output == istat().disk.compaction.output.bytes +gstat().scheduler.compaction_output == istat().disk.compaction.output.bytes --- - true ... @@ -1074,30 +1073,29 @@ istat() ... gstat() --- -- disk: - dump: - input: 0 - output: 0 - compaction: - input: 0 - output: 0 - queue: 0 - data: 104300 - index: 1190 +- tx: + conflict: 0 + commit: 0 + rollback: 0 + statements: 0 + transactions: 0 + gap_locks: 0 + read_views: 0 memory: tuple_cache: 14313 tx: 0 level0: 262583 page_index: 1050 bloom_filter: 140 - tx: - conflict: 0 - commit: 0 - rollback: 0 - statements: 0 - transactions: 0 - gap_locks: 0 - read_views: 0 + disk: + data: 104300 + index: 1190 + scheduler: + compaction_output: 0 + compaction_queue: 0 + dump_output: 0 + dump_input: 0 + compaction_input: 0 ... s:drop() --- diff --git a/test/vinyl/stat.test.lua b/test/vinyl/stat.test.lua index 0173cf89..6708fcb9 100644 --- a/test/vinyl/stat.test.lua +++ b/test/vinyl/stat.test.lua @@ -205,10 +205,10 @@ box.rollback() -- -- dump and compaction totals -gstat().disk.dump.input == istat().disk.dump.input.bytes -gstat().disk.dump.output == istat().disk.dump.output.bytes -gstat().disk.compaction.input == istat().disk.compaction.input.bytes -gstat().disk.compaction.output == istat().disk.compaction.output.bytes +gstat().scheduler.dump_input == istat().disk.dump.input.bytes +gstat().scheduler.dump_output == istat().disk.dump.output.bytes +gstat().scheduler.compaction_input == istat().disk.compaction.input.bytes +gstat().scheduler.compaction_output == istat().disk.compaction.output.bytes -- use memory st = gstat()