From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 8/8] vinyl: keep track of thread pool idle ratio Date: Tue, 4 Sep 2018 20:23:51 +0300 Message-Id: In-Reply-To: References: <20180904115404.el6kdswgitsnopgf@esperanza> In-Reply-To: References: To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: To understand whether the disk is fully utilized or can still handle more compaction load and make right decisions regarding transaction throttling, we need a metric that would report how much time worker threads spent being idle. So this patch adds two new metrics to global statistics, disk.dump_idle_ratio and compact_idle_ratio, which show how much time dump threads and compaction threads were idle, respectively. The metrics are updated using the following formula: idle_time idle_ratio = -------------------------- dump_period * worker_count where idle_time is the total amount of time workers were idle between the last two dumps, dump_period is the time that passed between the last two dumps, worker_count is the number of workers in the pool. --- src/box/vinyl.c | 5 +++ src/box/vy_scheduler.c | 64 ++++++++++++++++++++++++++++++++++++ src/box/vy_scheduler.h | 11 +++++++ test/vinyl/errinj.result | 82 ++++++++++++++++++++++++++++++++++++++++++++++ test/vinyl/errinj.test.lua | 37 +++++++++++++++++++++ test/vinyl/info.result | 14 ++++---- test/vinyl/info.test.lua | 2 ++ 7 files changed, 209 insertions(+), 6 deletions(-) diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 1cf9ad16..f6b23cd6 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -302,15 +302,20 @@ static void vy_info_append_disk(struct vy_env *env, struct info_handler *h) { struct vy_lsm_env *lsm_env = &env->lsm_env; + struct vy_scheduler *scheduler = &env->scheduler; info_table_begin(h, "disk"); info_append_int(h, "data_files", lsm_env->data_file_count); info_append_int(h, "data_size", lsm_env->disk_data_size); info_append_int(h, "index_size", lsm_env->disk_index_size); info_append_int(h, "dump_total", lsm_env->dump_total); + info_append_double(h, "dump_idle_ratio", + scheduler->dump_pool.idle_ratio); info_append_int(h, "compact_total", lsm_env->compact_total); info_append_int(h, "compact_queue", lsm_env->compact_queue); info_append_int(h, "compact_debt", lsm_env->compact_debt); + info_append_double(h, "compact_idle_ratio", + scheduler->compact_pool.idle_ratio); info_table_end(h); } diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index 5daaf4f1..178db2cc 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -97,6 +97,10 @@ struct vy_worker { struct vy_task *task; /** Link in vy_worker_pool::idle_workers. */ struct stailq_entry in_idle; + /** Time when this worker became idle. */ + double idle_start; + /** How much time this worker have been idle. */ + double idle_time; /** Route for sending deferred DELETEs back to tx. */ struct cmsg_hop deferred_delete_route[2]; }; @@ -355,6 +359,13 @@ vy_worker_pool_start(struct vy_worker_pool *pool) cpipe_create(&worker->worker_pipe, name); stailq_add_tail_entry(&pool->idle_workers, worker, in_idle); + /* + * Distribute accumulated idle time amongst workers + * as if they were running all the time. + */ + worker->idle_start = pool->last_idle_update; + worker->idle_time = pool->last_idle_time / pool->size; + struct cmsg_hop *route = worker->deferred_delete_route; route[0].f = vy_deferred_delete_batch_process_f; route[0].pipe = &worker->worker_pipe; @@ -384,6 +395,9 @@ vy_worker_pool_create(struct vy_worker_pool *pool, const char *name, int size) pool->size = size; pool->workers = NULL; stailq_create(&pool->idle_workers); + pool->idle_ratio = 1; + pool->last_idle_time = 0; + pool->last_idle_update = ev_monotonic_now(loop()); } static void @@ -412,6 +426,8 @@ vy_worker_pool_get(struct vy_worker_pool *pool) worker = stailq_shift_entry(&pool->idle_workers, struct vy_worker, in_idle); assert(worker->pool == pool); + worker->idle_time += ev_monotonic_now(loop()) - + worker->idle_start; } return worker; } @@ -425,6 +441,51 @@ vy_worker_pool_put(struct vy_worker *worker) { struct vy_worker_pool *pool = worker->pool; stailq_add_entry(&pool->idle_workers, worker, in_idle); + worker->idle_start = ev_monotonic_now(loop()); +} + +/** + * Return total time workers have spent idle. + */ +static double +vy_worker_pool_idle_time(struct vy_worker_pool *pool) +{ + double now = ev_monotonic_now(loop()); + + if (pool->workers == NULL) { + /* + * Workers haven't been started yet so naturally + * they all should be accounted as idle. + */ + return pool->last_idle_time + + (now - pool->last_idle_update) * pool->size; + } + + double idle_time = 0; + struct vy_worker *worker; + for (int i = 0; i < pool->size; i++) { + worker = &pool->workers[i]; + idle_time += worker->idle_time; + } + + stailq_foreach_entry(worker, &pool->idle_workers, in_idle) + idle_time += now - worker->idle_start; + + return idle_time; +} + +/** + * Update idle ratio of a worker pool. + */ +static void +vy_worker_pool_update_idle_ratio(struct vy_worker_pool *pool) +{ + double now = ev_monotonic_now(loop()); + double idle_time = vy_worker_pool_idle_time(pool); + pool->idle_ratio = (idle_time - pool->last_idle_time) / + (now - pool->last_idle_update) / pool->size; + pool->last_idle_time = idle_time; + pool->last_idle_update = now; } void @@ -657,6 +718,9 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler) scheduler->dump_complete_cb(scheduler, min_generation - 1, dump_duration); fiber_cond_signal(&scheduler->dump_cond); + + vy_worker_pool_update_idle_ratio(&scheduler->dump_pool); + vy_worker_pool_update_idle_ratio(&scheduler->compact_pool); } int diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h index 96ce721b..d6553cc8 100644 --- a/src/box/vy_scheduler.h +++ b/src/box/vy_scheduler.h @@ -65,6 +65,17 @@ struct vy_worker_pool { struct vy_worker *workers; /** List of workers that are currently idle. */ struct stailq idle_workers; + /** + * How much time worker threads were idle, relative to + * the total time (0 <= @idle_ratio <= 1). + * + * Updated on memory dump completion. + */ + double idle_ratio; + /** Time of the last @idle_ratio update. */ + double last_idle_update; + /** Total idle time at @last_idle_update. */ + double last_idle_time; }; struct vy_scheduler { diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result index 7b880030..35769e66 100644 --- a/test/vinyl/errinj.result +++ b/test/vinyl/errinj.result @@ -2247,3 +2247,85 @@ i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt s:drop() --- ... +-- +-- Check idle_ratio metric. +-- +dump_threads = math.max(1, math.ceil(box.cfg.vinyl_write_threads / 4)) +--- +... +compact_threads = box.cfg.vinyl_write_threads - dump_threads +--- +... +s = box.schema.space.create('test', {engine = 'vinyl'}) +--- +... +i = s:create_index('pk', {run_count_per_level = 2}) +--- +... +function dump() for i = 1, 10 do s:replace{i} end box.snapshot() end +--- +... +dump() +--- +... +fiber.sleep(0.1) +--- +... +errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.1) +--- +- ok +... +dump() +--- +... +errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0) +--- +- ok +... +-- one dump thread was busy half of the time +idle = box.stat.vinyl().disk.dump_idle_ratio +--- +... +expected = 1 - 1 / (2 * dump_threads) +--- +... +math.abs(idle - expected) < 0.1 or idle +--- +- true +... +-- all compaction threads were idle +box.stat.vinyl().disk.compact_idle_ratio -- 1 +--- +- 1 +... +errinj.set('ERRINJ_VY_COMPACTION_DELAY', true) +--- +- ok +... +dump() +--- +... +dump() +--- +... +errinj.set('ERRINJ_VY_COMPACTION_DELAY', false) +--- +- ok +... +-- one compaction thread was busy all the time +idle = box.stat.vinyl().disk.compact_idle_ratio +--- +... +expected = 1 - 1 / compact_threads +--- +... +math.abs(idle - expected) < 0.1 or idle +--- +- true +... +while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end +--- +... +s:drop() +--- +... diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua index 9037bfad..fc52dbee 100644 --- a/test/vinyl/errinj.test.lua +++ b/test/vinyl/errinj.test.lua @@ -884,3 +884,40 @@ i:stat().disk.compact.debt -- none i:stat().disk.compact.queue.bytes == box.stat.vinyl().disk.compact_queue i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt s:drop() + +-- +-- Check idle_ratio metric. +-- +dump_threads = math.max(1, math.ceil(box.cfg.vinyl_write_threads / 4)) +compact_threads = box.cfg.vinyl_write_threads - dump_threads + +s = box.schema.space.create('test', {engine = 'vinyl'}) +i = s:create_index('pk', {run_count_per_level = 2}) +function dump() for i = 1, 10 do s:replace{i} end box.snapshot() end + +dump() +fiber.sleep(0.1) +errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.1) +dump() +errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0) + +-- one dump thread was busy half of the time +idle = box.stat.vinyl().disk.dump_idle_ratio +expected = 1 - 1 / (2 * dump_threads) +math.abs(idle - expected) < 0.1 or idle + +-- all compaction threads were idle +box.stat.vinyl().disk.compact_idle_ratio -- 1 + +errinj.set('ERRINJ_VY_COMPACTION_DELAY', true) +dump() +dump() +errinj.set('ERRINJ_VY_COMPACTION_DELAY', false) + +-- one compaction thread was busy all the time +idle = box.stat.vinyl().disk.compact_idle_ratio +expected = 1 - 1 / compact_threads +math.abs(idle - expected) < 0.1 or idle + +while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end +s:drop() diff --git a/test/vinyl/info.result b/test/vinyl/info.result index 556f5eca..6dcdc90f 100644 --- a/test/vinyl/info.result +++ b/test/vinyl/info.result @@ -102,6 +102,8 @@ function gstat() st.quota.use_rate = nil st.quota.dump_bandwidth = nil st.quota.watermark = nil + st.disk.dump_idle_ratio = nil + st.disk.compact_idle_ratio = nil return st end; --- @@ -218,12 +220,12 @@ gstat() --- - disk: dump_total: 0 - data_size: 0 + index_size: 0 compact_debt: 0 - compact_queue: 0 data_files: 0 + compact_queue: 0 + data_size: 0 compact_total: 0 - index_size: 0 quota: limit: 134217728 used: 0 @@ -1039,12 +1041,12 @@ gstat() --- - disk: dump_total: 0 - data_size: 104300 + index_size: 1190 compact_debt: 0 - compact_queue: 0 data_files: 2 + compact_queue: 0 + data_size: 104300 compact_total: 0 - index_size: 1190 quota: limit: 134217728 used: 262583 diff --git a/test/vinyl/info.test.lua b/test/vinyl/info.test.lua index 919dde63..637aa323 100644 --- a/test/vinyl/info.test.lua +++ b/test/vinyl/info.test.lua @@ -84,6 +84,8 @@ function gstat() st.quota.use_rate = nil st.quota.dump_bandwidth = nil st.quota.watermark = nil + st.disk.dump_idle_ratio = nil + st.disk.compact_idle_ratio = nil return st end; -- 2.11.0