From: Vladimir Davydov <vdavydov.dev@gmail.com> To: kostja@tarantool.org Cc: tarantool-patches@freelists.org Subject: [PATCH 7/7] vinyl: keep track of disk idle time Date: Sun, 2 Sep 2018 23:19:00 +0300 [thread overview] Message-ID: <d0477df76ff786366c11c7f4030824f98aa90bb8.1535917763.git.vdavydov.dev@gmail.com> (raw) In-Reply-To: <cover.1535917763.git.vdavydov.dev@gmail.com> In-Reply-To: <cover.1535917763.git.vdavydov.dev@gmail.com> To understand whether the disk is fully utilized or can still handle more compaction load and make right decisions regarding transaction throttling, we need a metric that would report how much time worker threads spent being idle. So this patch adds a new metric to global statistics, box.stat.vinyl().disk.idle_ratio. The metric is updated on each dump using the following formula: idle_time idle_ratio = -------------------------- dump_period * worker_count where idle_time is the total amount of time workers were idle between the last two dumps, dump_period is the time that passed between the last two dumps, worker_count is the number of workers. The value of the new metric always lays between 0 inclusive and 1 exclusive. The closer it is to 1 the more busy the disk is. --- src/box/vinyl.c | 1 + src/box/vy_scheduler.c | 48 +++++++++++++++++++++++++++++++---- src/box/vy_scheduler.h | 12 +++++++++ test/vinyl/errinj.result | 62 ++++++++++++++++++++++++++++++++++++++++++++++ test/vinyl/errinj.test.lua | 22 ++++++++++++++++ test/vinyl/info.result | 1 + test/vinyl/info.test.lua | 1 + 7 files changed, 142 insertions(+), 5 deletions(-) diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 416c9824..e140f03c 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -311,6 +311,7 @@ vy_info_append_disk(struct vy_env *env, struct info_handler *h) info_append_int(h, "compact_total", lsm_env->compact_total); info_append_int(h, "compact_queue", lsm_env->compact_queue); info_append_int(h, "compact_debt", lsm_env->compact_debt); + info_append_double(h, "idle_ratio", env->scheduler.idle_ratio); info_table_end(h); } diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index 580c3129..702f426c 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -96,6 +96,10 @@ struct vy_worker { struct vy_task *task; /** Link in vy_scheduler::idle_workers. */ struct stailq_entry in_idle; + /** Time when this worker became idle. */ + double idle_start; + /** How much time this worker have been idle. */ + double idle_time; /** Route for sending deferred DELETEs back to tx. */ struct cmsg_hop deferred_delete_route[2]; }; @@ -346,6 +350,7 @@ vy_scheduler_start_workers(struct vy_scheduler *scheduler) if (scheduler->worker_pool == NULL) panic("failed to allocate vinyl worker pool"); + double now = ev_monotonic_now(loop()); for (int i = 0; i < scheduler->worker_pool_size; i++) { char name[FIBER_NAME_MAX]; snprintf(name, sizeof(name), "vinyl.writer.%d", i); @@ -355,6 +360,7 @@ vy_scheduler_start_workers(struct vy_scheduler *scheduler) cpipe_create(&worker->worker_pipe, name); stailq_add_tail_entry(&scheduler->idle_workers, worker, in_idle); + worker->idle_start = now; struct cmsg_hop *route = worker->deferred_delete_route; route[0].f = vy_deferred_delete_batch_process_f; @@ -407,6 +413,7 @@ vy_scheduler_create(struct vy_scheduler *scheduler, int write_threads, diag_create(&scheduler->diag); fiber_cond_create(&scheduler->dump_cond); + scheduler->dump_end = ev_monotonic_now(loop()); fiber_start(scheduler->scheduler_fiber, scheduler); } @@ -548,6 +555,27 @@ vy_scheduler_force_compaction(struct vy_scheduler *scheduler, } /** + * Return total time workers have spent idle. + */ +static double +vy_scheduler_get_idle_time(struct vy_scheduler *scheduler) +{ + double idle_time = 0; + double now = ev_monotonic_now(loop()); + + struct vy_worker *worker; + for (int i = 0; i < scheduler->worker_pool_size; i++) { + worker = &scheduler->worker_pool[i]; + idle_time += worker->idle_time; + } + + stailq_foreach_entry(worker, &scheduler->idle_workers, in_idle) + idle_time += now - worker->idle_start; + + return idle_time; +} + +/** * Check whether the current dump round is complete. * If it is, free memory and proceed to the next dump round. */ @@ -585,7 +613,11 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler) */ double now = ev_monotonic_now(loop()); double dump_duration = now - scheduler->dump_start; - scheduler->dump_start = now; + double idle_time = vy_scheduler_get_idle_time(scheduler); + scheduler->idle_ratio = (idle_time - scheduler->idle_time_at_dump) / + (now - scheduler->dump_end) / scheduler->worker_pool_size; + scheduler->idle_time_at_dump = idle_time; + scheduler->dump_start = scheduler->dump_end = now; scheduler->dump_generation = min_generation; scheduler->dump_complete_cb(scheduler, min_generation - 1, dump_duration); @@ -1900,7 +1932,9 @@ vy_scheduler_f(va_list va) while (scheduler->scheduler_fiber != NULL) { struct stailq processed_tasks; struct vy_task *task, *next; + struct vy_worker *worker; int tasks_failed = 0, tasks_done = 0; + double now = ev_monotonic_now(loop()); /* Get the list of processed tasks. */ stailq_create(&processed_tasks); @@ -1913,8 +1947,10 @@ vy_scheduler_f(va_list va) tasks_failed++; else tasks_done++; + worker = task->worker; stailq_add_entry(&scheduler->idle_workers, - task->worker, in_idle); + worker, in_idle); + worker->idle_start = now; vy_task_delete(task); scheduler->idle_worker_count++; assert(scheduler->idle_worker_count <= @@ -1951,11 +1987,13 @@ vy_scheduler_f(va_list va) /* Queue the task and notify workers if necessary. */ assert(!stailq_empty(&scheduler->idle_workers)); - task->worker = stailq_shift_entry(&scheduler->idle_workers, - struct vy_worker, in_idle); + worker = stailq_shift_entry(&scheduler->idle_workers, + struct vy_worker, in_idle); + worker->idle_time += now - worker->idle_start; scheduler->idle_worker_count--; + task->worker = worker; cmsg_init(&task->cmsg, vy_task_execute_route); - cpipe_push(&task->worker->worker_pipe, &task->cmsg); + cpipe_push(&worker->worker_pipe, &task->cmsg); fiber_reschedule(); continue; diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h index deefacd7..5524ecce 100644 --- a/src/box/vy_scheduler.h +++ b/src/box/vy_scheduler.h @@ -136,6 +136,18 @@ struct vy_scheduler { int dump_task_count; /** Time when the current dump round started. */ double dump_start; + /** Time when the last dump round ended. */ + double dump_end; + /** + * Total amount of time worker threads have been idle, + * taken at the time when the last dump round completed. + */ + double idle_time_at_dump; + /** + * How much time worker threads were idle between the last + * two dump, relative to the dump period. + */ + double idle_ratio; /** Signaled on dump round completion. */ struct fiber_cond dump_cond; /** diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result index 7b880030..bb5377b4 100644 --- a/test/vinyl/errinj.result +++ b/test/vinyl/errinj.result @@ -2244,6 +2244,68 @@ i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt --- - true ... +s:truncate() +--- +... +box.stat.reset() +--- +... +-- Check disk.idle_ratio statistic. +errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.01) +--- +- ok +... +start = fiber.time() +--- +... +dump() +--- +... +fiber.sleep(fiber.time() - start) +--- +... +dump() +--- +... +-- one worker is busy half of the time +expected = 1 - 1 / (2 * box.cfg.vinyl_write_threads) +--- +... +math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1 +--- +- true +... +errinj.set('ERRINJ_VY_COMPACTION_DELAY', true) +--- +- ok +... +start = fiber.time() +--- +... +dump() +--- +... +fiber.sleep(fiber.time() - start) +--- +... +dump() +--- +... +-- one worker is busy all the time, plus one half of the time +expected = 1 - 3 / (2 * box.cfg.vinyl_write_threads) +--- +... +math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1 +--- +- true +... +errinj.set('ERRINJ_VY_COMPACTION_DELAY', false) +--- +- ok +... +while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end +--- +... s:drop() --- ... diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua index 9037bfad..835f4540 100644 --- a/test/vinyl/errinj.test.lua +++ b/test/vinyl/errinj.test.lua @@ -883,4 +883,26 @@ i:stat().disk.compact.queue -- none i:stat().disk.compact.debt -- none i:stat().disk.compact.queue.bytes == box.stat.vinyl().disk.compact_queue i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt +s:truncate() +box.stat.reset() + +-- Check disk.idle_ratio statistic. +errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.01) +start = fiber.time() +dump() +fiber.sleep(fiber.time() - start) +dump() +-- one worker is busy half of the time +expected = 1 - 1 / (2 * box.cfg.vinyl_write_threads) +math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1 +errinj.set('ERRINJ_VY_COMPACTION_DELAY', true) +start = fiber.time() +dump() +fiber.sleep(fiber.time() - start) +dump() +-- one worker is busy all the time, plus one half of the time +expected = 1 - 3 / (2 * box.cfg.vinyl_write_threads) +math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1 +errinj.set('ERRINJ_VY_COMPACTION_DELAY', false) +while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end s:drop() diff --git a/test/vinyl/info.result b/test/vinyl/info.result index 556f5eca..4340be91 100644 --- a/test/vinyl/info.result +++ b/test/vinyl/info.result @@ -102,6 +102,7 @@ function gstat() st.quota.use_rate = nil st.quota.dump_bandwidth = nil st.quota.watermark = nil + st.disk.idle_ratio = nil return st end; --- diff --git a/test/vinyl/info.test.lua b/test/vinyl/info.test.lua index 919dde63..fe070416 100644 --- a/test/vinyl/info.test.lua +++ b/test/vinyl/info.test.lua @@ -84,6 +84,7 @@ function gstat() st.quota.use_rate = nil st.quota.dump_bandwidth = nil st.quota.watermark = nil + st.disk.idle_ratio = nil return st end; -- 2.11.0
next prev parent reply other threads:[~2018-09-02 20:19 UTC|newest] Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-09-02 20:18 [PATCH 0/7] vinyl: improve stats for throttling Vladimir Davydov 2018-09-02 20:18 ` [PATCH 1/7] vinyl: fix accounting of secondary index cache statements Vladimir Davydov 2018-09-02 22:26 ` [tarantool-patches] " Konstantin Osipov 2018-09-02 20:18 ` [PATCH 2/7] vinyl: add global memory stats Vladimir Davydov 2018-09-02 22:27 ` [tarantool-patches] " Konstantin Osipov 2018-09-02 22:27 ` Konstantin Osipov 2018-09-03 8:10 ` Vladimir Davydov 2018-09-02 20:18 ` [PATCH 3/7] vinyl: add global disk stats Vladimir Davydov 2018-09-02 22:30 ` [tarantool-patches] " Konstantin Osipov 2018-09-02 20:18 ` [PATCH 4/7] vinyl: fix force compaction logic Vladimir Davydov 2018-09-02 20:18 ` [PATCH 5/7] vinyl: update compact priority usual way on range split/coalesce Vladimir Davydov 2018-09-02 20:18 ` [PATCH 6/7] vinyl: keep track of compaction queue length and debt Vladimir Davydov 2018-09-02 20:19 ` Vladimir Davydov [this message] 2018-09-04 11:54 ` [PATCH 7/7] vinyl: keep track of disk idle time Vladimir Davydov 2018-09-04 17:23 ` Vladimir Davydov 2018-09-04 17:23 ` [PATCH 1/8] vinyl: add helper to check whether dump is in progress Vladimir Davydov 2018-09-06 7:33 ` Konstantin Osipov 2018-09-04 17:23 ` [PATCH 2/8] vinyl: don't use mempool for allocating background tasks Vladimir Davydov 2018-09-06 7:33 ` Konstantin Osipov 2018-09-04 17:23 ` [PATCH 3/8] vinyl: factor out worker pool from scheduler struct Vladimir Davydov 2018-09-06 7:34 ` Konstantin Osipov 2018-09-04 17:23 ` [PATCH 4/8] vinyl: move worker allocation closer to task creation Vladimir Davydov 2018-09-06 7:35 ` Konstantin Osipov 2018-09-04 17:23 ` [PATCH 5/8] vinyl: use separate thread pools for dump and compaction tasks Vladimir Davydov 2018-09-06 7:37 ` Konstantin Osipov 2018-09-06 9:48 ` Vladimir Davydov 2018-09-06 10:32 ` Konstantin Osipov 2018-09-04 17:23 ` [PATCH 6/8] vinyl: zap vy_worker_pool::idle_worker_count Vladimir Davydov 2018-09-06 7:38 ` Konstantin Osipov 2018-09-04 17:23 ` [PATCH 7/8] vinyl: don't start scheduler fiber until local recovery is complete Vladimir Davydov 2018-09-06 7:39 ` Konstantin Osipov 2018-09-04 17:23 ` [PATCH 8/8] vinyl: keep track of thread pool idle ratio Vladimir Davydov 2018-09-06 7:49 ` Konstantin Osipov 2018-09-06 8:18 ` Vladimir Davydov 2018-09-06 10:26 ` Konstantin Osipov 2018-09-06 10:52 ` Vladimir Davydov 2018-09-06 10:57 ` Konstantin Osipov 2018-09-06 11:59 ` Vladimir Davydov 2018-09-09 11:41 ` [PATCH 0/7] vinyl: improve stats for throttling Vladimir Davydov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=d0477df76ff786366c11c7f4030824f98aa90bb8.1535917763.git.vdavydov.dev@gmail.com \ --to=vdavydov.dev@gmail.com \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='Re: [PATCH 7/7] vinyl: keep track of disk idle time' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox