From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 7/7] vinyl: keep track of disk idle time
Date: Sun, 2 Sep 2018 23:19:00 +0300 [thread overview]
Message-ID: <d0477df76ff786366c11c7f4030824f98aa90bb8.1535917763.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1535917763.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1535917763.git.vdavydov.dev@gmail.com>
To understand whether the disk is fully utilized or can still handle
more compaction load and make right decisions regarding transaction
throttling, we need a metric that would report how much time worker
threads spent being idle. So this patch adds a new metric to global
statistics, box.stat.vinyl().disk.idle_ratio. The metric is updated
on each dump using the following formula:
idle_time
idle_ratio = --------------------------
dump_period * worker_count
where idle_time is the total amount of time workers were idle between
the last two dumps, dump_period is the time that passed between the last
two dumps, worker_count is the number of workers.
The value of the new metric always lays between 0 inclusive and 1
exclusive. The closer it is to 1 the more busy the disk is.
---
src/box/vinyl.c | 1 +
src/box/vy_scheduler.c | 48 +++++++++++++++++++++++++++++++----
src/box/vy_scheduler.h | 12 +++++++++
test/vinyl/errinj.result | 62 ++++++++++++++++++++++++++++++++++++++++++++++
test/vinyl/errinj.test.lua | 22 ++++++++++++++++
test/vinyl/info.result | 1 +
test/vinyl/info.test.lua | 1 +
7 files changed, 142 insertions(+), 5 deletions(-)
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 416c9824..e140f03c 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -311,6 +311,7 @@ vy_info_append_disk(struct vy_env *env, struct info_handler *h)
info_append_int(h, "compact_total", lsm_env->compact_total);
info_append_int(h, "compact_queue", lsm_env->compact_queue);
info_append_int(h, "compact_debt", lsm_env->compact_debt);
+ info_append_double(h, "idle_ratio", env->scheduler.idle_ratio);
info_table_end(h);
}
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 580c3129..702f426c 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -96,6 +96,10 @@ struct vy_worker {
struct vy_task *task;
/** Link in vy_scheduler::idle_workers. */
struct stailq_entry in_idle;
+ /** Time when this worker became idle. */
+ double idle_start;
+ /** How much time this worker have been idle. */
+ double idle_time;
/** Route for sending deferred DELETEs back to tx. */
struct cmsg_hop deferred_delete_route[2];
};
@@ -346,6 +350,7 @@ vy_scheduler_start_workers(struct vy_scheduler *scheduler)
if (scheduler->worker_pool == NULL)
panic("failed to allocate vinyl worker pool");
+ double now = ev_monotonic_now(loop());
for (int i = 0; i < scheduler->worker_pool_size; i++) {
char name[FIBER_NAME_MAX];
snprintf(name, sizeof(name), "vinyl.writer.%d", i);
@@ -355,6 +360,7 @@ vy_scheduler_start_workers(struct vy_scheduler *scheduler)
cpipe_create(&worker->worker_pipe, name);
stailq_add_tail_entry(&scheduler->idle_workers,
worker, in_idle);
+ worker->idle_start = now;
struct cmsg_hop *route = worker->deferred_delete_route;
route[0].f = vy_deferred_delete_batch_process_f;
@@ -407,6 +413,7 @@ vy_scheduler_create(struct vy_scheduler *scheduler, int write_threads,
diag_create(&scheduler->diag);
fiber_cond_create(&scheduler->dump_cond);
+ scheduler->dump_end = ev_monotonic_now(loop());
fiber_start(scheduler->scheduler_fiber, scheduler);
}
@@ -548,6 +555,27 @@ vy_scheduler_force_compaction(struct vy_scheduler *scheduler,
}
/**
+ * Return total time workers have spent idle.
+ */
+static double
+vy_scheduler_get_idle_time(struct vy_scheduler *scheduler)
+{
+ double idle_time = 0;
+ double now = ev_monotonic_now(loop());
+
+ struct vy_worker *worker;
+ for (int i = 0; i < scheduler->worker_pool_size; i++) {
+ worker = &scheduler->worker_pool[i];
+ idle_time += worker->idle_time;
+ }
+
+ stailq_foreach_entry(worker, &scheduler->idle_workers, in_idle)
+ idle_time += now - worker->idle_start;
+
+ return idle_time;
+}
+
+/**
* Check whether the current dump round is complete.
* If it is, free memory and proceed to the next dump round.
*/
@@ -585,7 +613,11 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler)
*/
double now = ev_monotonic_now(loop());
double dump_duration = now - scheduler->dump_start;
- scheduler->dump_start = now;
+ double idle_time = vy_scheduler_get_idle_time(scheduler);
+ scheduler->idle_ratio = (idle_time - scheduler->idle_time_at_dump) /
+ (now - scheduler->dump_end) / scheduler->worker_pool_size;
+ scheduler->idle_time_at_dump = idle_time;
+ scheduler->dump_start = scheduler->dump_end = now;
scheduler->dump_generation = min_generation;
scheduler->dump_complete_cb(scheduler,
min_generation - 1, dump_duration);
@@ -1900,7 +1932,9 @@ vy_scheduler_f(va_list va)
while (scheduler->scheduler_fiber != NULL) {
struct stailq processed_tasks;
struct vy_task *task, *next;
+ struct vy_worker *worker;
int tasks_failed = 0, tasks_done = 0;
+ double now = ev_monotonic_now(loop());
/* Get the list of processed tasks. */
stailq_create(&processed_tasks);
@@ -1913,8 +1947,10 @@ vy_scheduler_f(va_list va)
tasks_failed++;
else
tasks_done++;
+ worker = task->worker;
stailq_add_entry(&scheduler->idle_workers,
- task->worker, in_idle);
+ worker, in_idle);
+ worker->idle_start = now;
vy_task_delete(task);
scheduler->idle_worker_count++;
assert(scheduler->idle_worker_count <=
@@ -1951,11 +1987,13 @@ vy_scheduler_f(va_list va)
/* Queue the task and notify workers if necessary. */
assert(!stailq_empty(&scheduler->idle_workers));
- task->worker = stailq_shift_entry(&scheduler->idle_workers,
- struct vy_worker, in_idle);
+ worker = stailq_shift_entry(&scheduler->idle_workers,
+ struct vy_worker, in_idle);
+ worker->idle_time += now - worker->idle_start;
scheduler->idle_worker_count--;
+ task->worker = worker;
cmsg_init(&task->cmsg, vy_task_execute_route);
- cpipe_push(&task->worker->worker_pipe, &task->cmsg);
+ cpipe_push(&worker->worker_pipe, &task->cmsg);
fiber_reschedule();
continue;
diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h
index deefacd7..5524ecce 100644
--- a/src/box/vy_scheduler.h
+++ b/src/box/vy_scheduler.h
@@ -136,6 +136,18 @@ struct vy_scheduler {
int dump_task_count;
/** Time when the current dump round started. */
double dump_start;
+ /** Time when the last dump round ended. */
+ double dump_end;
+ /**
+ * Total amount of time worker threads have been idle,
+ * taken at the time when the last dump round completed.
+ */
+ double idle_time_at_dump;
+ /**
+ * How much time worker threads were idle between the last
+ * two dump, relative to the dump period.
+ */
+ double idle_ratio;
/** Signaled on dump round completion. */
struct fiber_cond dump_cond;
/**
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 7b880030..bb5377b4 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -2244,6 +2244,68 @@ i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
---
- true
...
+s:truncate()
+---
+...
+box.stat.reset()
+---
+...
+-- Check disk.idle_ratio statistic.
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.01)
+---
+- ok
+...
+start = fiber.time()
+---
+...
+dump()
+---
+...
+fiber.sleep(fiber.time() - start)
+---
+...
+dump()
+---
+...
+-- one worker is busy half of the time
+expected = 1 - 1 / (2 * box.cfg.vinyl_write_threads)
+---
+...
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+---
+- true
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+---
+- ok
+...
+start = fiber.time()
+---
+...
+dump()
+---
+...
+fiber.sleep(fiber.time() - start)
+---
+...
+dump()
+---
+...
+-- one worker is busy all the time, plus one half of the time
+expected = 1 - 3 / (2 * box.cfg.vinyl_write_threads)
+---
+...
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+---
+- true
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+---
+- ok
+...
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
+---
+...
s:drop()
---
...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 9037bfad..835f4540 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -883,4 +883,26 @@ i:stat().disk.compact.queue -- none
i:stat().disk.compact.debt -- none
i:stat().disk.compact.queue.bytes == box.stat.vinyl().disk.compact_queue
i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
+s:truncate()
+box.stat.reset()
+
+-- Check disk.idle_ratio statistic.
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.01)
+start = fiber.time()
+dump()
+fiber.sleep(fiber.time() - start)
+dump()
+-- one worker is busy half of the time
+expected = 1 - 1 / (2 * box.cfg.vinyl_write_threads)
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+start = fiber.time()
+dump()
+fiber.sleep(fiber.time() - start)
+dump()
+-- one worker is busy all the time, plus one half of the time
+expected = 1 - 3 / (2 * box.cfg.vinyl_write_threads)
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
s:drop()
diff --git a/test/vinyl/info.result b/test/vinyl/info.result
index 556f5eca..4340be91 100644
--- a/test/vinyl/info.result
+++ b/test/vinyl/info.result
@@ -102,6 +102,7 @@ function gstat()
st.quota.use_rate = nil
st.quota.dump_bandwidth = nil
st.quota.watermark = nil
+ st.disk.idle_ratio = nil
return st
end;
---
diff --git a/test/vinyl/info.test.lua b/test/vinyl/info.test.lua
index 919dde63..fe070416 100644
--- a/test/vinyl/info.test.lua
+++ b/test/vinyl/info.test.lua
@@ -84,6 +84,7 @@ function gstat()
st.quota.use_rate = nil
st.quota.dump_bandwidth = nil
st.quota.watermark = nil
+ st.disk.idle_ratio = nil
return st
end;
--
2.11.0
next prev parent reply other threads:[~2018-09-02 20:19 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-09-02 20:18 [PATCH 0/7] vinyl: improve stats for throttling Vladimir Davydov
2018-09-02 20:18 ` [PATCH 1/7] vinyl: fix accounting of secondary index cache statements Vladimir Davydov
2018-09-02 22:26 ` [tarantool-patches] " Konstantin Osipov
2018-09-02 20:18 ` [PATCH 2/7] vinyl: add global memory stats Vladimir Davydov
2018-09-02 22:27 ` [tarantool-patches] " Konstantin Osipov
2018-09-02 22:27 ` Konstantin Osipov
2018-09-03 8:10 ` Vladimir Davydov
2018-09-02 20:18 ` [PATCH 3/7] vinyl: add global disk stats Vladimir Davydov
2018-09-02 22:30 ` [tarantool-patches] " Konstantin Osipov
2018-09-02 20:18 ` [PATCH 4/7] vinyl: fix force compaction logic Vladimir Davydov
2018-09-02 20:18 ` [PATCH 5/7] vinyl: update compact priority usual way on range split/coalesce Vladimir Davydov
2018-09-02 20:18 ` [PATCH 6/7] vinyl: keep track of compaction queue length and debt Vladimir Davydov
2018-09-02 20:19 ` Vladimir Davydov [this message]
2018-09-04 11:54 ` [PATCH 7/7] vinyl: keep track of disk idle time Vladimir Davydov
2018-09-04 17:23 ` Vladimir Davydov
2018-09-04 17:23 ` [PATCH 1/8] vinyl: add helper to check whether dump is in progress Vladimir Davydov
2018-09-06 7:33 ` Konstantin Osipov
2018-09-04 17:23 ` [PATCH 2/8] vinyl: don't use mempool for allocating background tasks Vladimir Davydov
2018-09-06 7:33 ` Konstantin Osipov
2018-09-04 17:23 ` [PATCH 3/8] vinyl: factor out worker pool from scheduler struct Vladimir Davydov
2018-09-06 7:34 ` Konstantin Osipov
2018-09-04 17:23 ` [PATCH 4/8] vinyl: move worker allocation closer to task creation Vladimir Davydov
2018-09-06 7:35 ` Konstantin Osipov
2018-09-04 17:23 ` [PATCH 5/8] vinyl: use separate thread pools for dump and compaction tasks Vladimir Davydov
2018-09-06 7:37 ` Konstantin Osipov
2018-09-06 9:48 ` Vladimir Davydov
2018-09-06 10:32 ` Konstantin Osipov
2018-09-04 17:23 ` [PATCH 6/8] vinyl: zap vy_worker_pool::idle_worker_count Vladimir Davydov
2018-09-06 7:38 ` Konstantin Osipov
2018-09-04 17:23 ` [PATCH 7/8] vinyl: don't start scheduler fiber until local recovery is complete Vladimir Davydov
2018-09-06 7:39 ` Konstantin Osipov
2018-09-04 17:23 ` [PATCH 8/8] vinyl: keep track of thread pool idle ratio Vladimir Davydov
2018-09-06 7:49 ` Konstantin Osipov
2018-09-06 8:18 ` Vladimir Davydov
2018-09-06 10:26 ` Konstantin Osipov
2018-09-06 10:52 ` Vladimir Davydov
2018-09-06 10:57 ` Konstantin Osipov
2018-09-06 11:59 ` Vladimir Davydov
2018-09-09 11:41 ` [PATCH 0/7] vinyl: improve stats for throttling Vladimir Davydov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d0477df76ff786366c11c7f4030824f98aa90bb8.1535917763.git.vdavydov.dev@gmail.com \
--to=vdavydov.dev@gmail.com \
--cc=kostja@tarantool.org \
--cc=tarantool-patches@freelists.org \
--subject='Re: [PATCH 7/7] vinyl: keep track of disk idle time' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox