[PATCH 7/7] vinyl: keep track of disk idle time
Vladimir Davydov
vdavydov.dev at gmail.com
Sun Sep 2 23:19:00 MSK 2018
To understand whether the disk is fully utilized or can still handle
more compaction load and make right decisions regarding transaction
throttling, we need a metric that would report how much time worker
threads spent being idle. So this patch adds a new metric to global
statistics, box.stat.vinyl().disk.idle_ratio. The metric is updated
on each dump using the following formula:
idle_time
idle_ratio = --------------------------
dump_period * worker_count
where idle_time is the total amount of time workers were idle between
the last two dumps, dump_period is the time that passed between the last
two dumps, worker_count is the number of workers.
The value of the new metric always lays between 0 inclusive and 1
exclusive. The closer it is to 1 the more busy the disk is.
---
src/box/vinyl.c | 1 +
src/box/vy_scheduler.c | 48 +++++++++++++++++++++++++++++++----
src/box/vy_scheduler.h | 12 +++++++++
test/vinyl/errinj.result | 62 ++++++++++++++++++++++++++++++++++++++++++++++
test/vinyl/errinj.test.lua | 22 ++++++++++++++++
test/vinyl/info.result | 1 +
test/vinyl/info.test.lua | 1 +
7 files changed, 142 insertions(+), 5 deletions(-)
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 416c9824..e140f03c 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -311,6 +311,7 @@ vy_info_append_disk(struct vy_env *env, struct info_handler *h)
info_append_int(h, "compact_total", lsm_env->compact_total);
info_append_int(h, "compact_queue", lsm_env->compact_queue);
info_append_int(h, "compact_debt", lsm_env->compact_debt);
+ info_append_double(h, "idle_ratio", env->scheduler.idle_ratio);
info_table_end(h);
}
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 580c3129..702f426c 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -96,6 +96,10 @@ struct vy_worker {
struct vy_task *task;
/** Link in vy_scheduler::idle_workers. */
struct stailq_entry in_idle;
+ /** Time when this worker became idle. */
+ double idle_start;
+ /** How much time this worker have been idle. */
+ double idle_time;
/** Route for sending deferred DELETEs back to tx. */
struct cmsg_hop deferred_delete_route[2];
};
@@ -346,6 +350,7 @@ vy_scheduler_start_workers(struct vy_scheduler *scheduler)
if (scheduler->worker_pool == NULL)
panic("failed to allocate vinyl worker pool");
+ double now = ev_monotonic_now(loop());
for (int i = 0; i < scheduler->worker_pool_size; i++) {
char name[FIBER_NAME_MAX];
snprintf(name, sizeof(name), "vinyl.writer.%d", i);
@@ -355,6 +360,7 @@ vy_scheduler_start_workers(struct vy_scheduler *scheduler)
cpipe_create(&worker->worker_pipe, name);
stailq_add_tail_entry(&scheduler->idle_workers,
worker, in_idle);
+ worker->idle_start = now;
struct cmsg_hop *route = worker->deferred_delete_route;
route[0].f = vy_deferred_delete_batch_process_f;
@@ -407,6 +413,7 @@ vy_scheduler_create(struct vy_scheduler *scheduler, int write_threads,
diag_create(&scheduler->diag);
fiber_cond_create(&scheduler->dump_cond);
+ scheduler->dump_end = ev_monotonic_now(loop());
fiber_start(scheduler->scheduler_fiber, scheduler);
}
@@ -548,6 +555,27 @@ vy_scheduler_force_compaction(struct vy_scheduler *scheduler,
}
/**
+ * Return total time workers have spent idle.
+ */
+static double
+vy_scheduler_get_idle_time(struct vy_scheduler *scheduler)
+{
+ double idle_time = 0;
+ double now = ev_monotonic_now(loop());
+
+ struct vy_worker *worker;
+ for (int i = 0; i < scheduler->worker_pool_size; i++) {
+ worker = &scheduler->worker_pool[i];
+ idle_time += worker->idle_time;
+ }
+
+ stailq_foreach_entry(worker, &scheduler->idle_workers, in_idle)
+ idle_time += now - worker->idle_start;
+
+ return idle_time;
+}
+
+/**
* Check whether the current dump round is complete.
* If it is, free memory and proceed to the next dump round.
*/
@@ -585,7 +613,11 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler)
*/
double now = ev_monotonic_now(loop());
double dump_duration = now - scheduler->dump_start;
- scheduler->dump_start = now;
+ double idle_time = vy_scheduler_get_idle_time(scheduler);
+ scheduler->idle_ratio = (idle_time - scheduler->idle_time_at_dump) /
+ (now - scheduler->dump_end) / scheduler->worker_pool_size;
+ scheduler->idle_time_at_dump = idle_time;
+ scheduler->dump_start = scheduler->dump_end = now;
scheduler->dump_generation = min_generation;
scheduler->dump_complete_cb(scheduler,
min_generation - 1, dump_duration);
@@ -1900,7 +1932,9 @@ vy_scheduler_f(va_list va)
while (scheduler->scheduler_fiber != NULL) {
struct stailq processed_tasks;
struct vy_task *task, *next;
+ struct vy_worker *worker;
int tasks_failed = 0, tasks_done = 0;
+ double now = ev_monotonic_now(loop());
/* Get the list of processed tasks. */
stailq_create(&processed_tasks);
@@ -1913,8 +1947,10 @@ vy_scheduler_f(va_list va)
tasks_failed++;
else
tasks_done++;
+ worker = task->worker;
stailq_add_entry(&scheduler->idle_workers,
- task->worker, in_idle);
+ worker, in_idle);
+ worker->idle_start = now;
vy_task_delete(task);
scheduler->idle_worker_count++;
assert(scheduler->idle_worker_count <=
@@ -1951,11 +1987,13 @@ vy_scheduler_f(va_list va)
/* Queue the task and notify workers if necessary. */
assert(!stailq_empty(&scheduler->idle_workers));
- task->worker = stailq_shift_entry(&scheduler->idle_workers,
- struct vy_worker, in_idle);
+ worker = stailq_shift_entry(&scheduler->idle_workers,
+ struct vy_worker, in_idle);
+ worker->idle_time += now - worker->idle_start;
scheduler->idle_worker_count--;
+ task->worker = worker;
cmsg_init(&task->cmsg, vy_task_execute_route);
- cpipe_push(&task->worker->worker_pipe, &task->cmsg);
+ cpipe_push(&worker->worker_pipe, &task->cmsg);
fiber_reschedule();
continue;
diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h
index deefacd7..5524ecce 100644
--- a/src/box/vy_scheduler.h
+++ b/src/box/vy_scheduler.h
@@ -136,6 +136,18 @@ struct vy_scheduler {
int dump_task_count;
/** Time when the current dump round started. */
double dump_start;
+ /** Time when the last dump round ended. */
+ double dump_end;
+ /**
+ * Total amount of time worker threads have been idle,
+ * taken at the time when the last dump round completed.
+ */
+ double idle_time_at_dump;
+ /**
+ * How much time worker threads were idle between the last
+ * two dump, relative to the dump period.
+ */
+ double idle_ratio;
/** Signaled on dump round completion. */
struct fiber_cond dump_cond;
/**
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 7b880030..bb5377b4 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -2244,6 +2244,68 @@ i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
---
- true
...
+s:truncate()
+---
+...
+box.stat.reset()
+---
+...
+-- Check disk.idle_ratio statistic.
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.01)
+---
+- ok
+...
+start = fiber.time()
+---
+...
+dump()
+---
+...
+fiber.sleep(fiber.time() - start)
+---
+...
+dump()
+---
+...
+-- one worker is busy half of the time
+expected = 1 - 1 / (2 * box.cfg.vinyl_write_threads)
+---
+...
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+---
+- true
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+---
+- ok
+...
+start = fiber.time()
+---
+...
+dump()
+---
+...
+fiber.sleep(fiber.time() - start)
+---
+...
+dump()
+---
+...
+-- one worker is busy all the time, plus one half of the time
+expected = 1 - 3 / (2 * box.cfg.vinyl_write_threads)
+---
+...
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+---
+- true
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+---
+- ok
+...
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
+---
+...
s:drop()
---
...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 9037bfad..835f4540 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -883,4 +883,26 @@ i:stat().disk.compact.queue -- none
i:stat().disk.compact.debt -- none
i:stat().disk.compact.queue.bytes == box.stat.vinyl().disk.compact_queue
i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
+s:truncate()
+box.stat.reset()
+
+-- Check disk.idle_ratio statistic.
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.01)
+start = fiber.time()
+dump()
+fiber.sleep(fiber.time() - start)
+dump()
+-- one worker is busy half of the time
+expected = 1 - 1 / (2 * box.cfg.vinyl_write_threads)
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+start = fiber.time()
+dump()
+fiber.sleep(fiber.time() - start)
+dump()
+-- one worker is busy all the time, plus one half of the time
+expected = 1 - 3 / (2 * box.cfg.vinyl_write_threads)
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
s:drop()
diff --git a/test/vinyl/info.result b/test/vinyl/info.result
index 556f5eca..4340be91 100644
--- a/test/vinyl/info.result
+++ b/test/vinyl/info.result
@@ -102,6 +102,7 @@ function gstat()
st.quota.use_rate = nil
st.quota.dump_bandwidth = nil
st.quota.watermark = nil
+ st.disk.idle_ratio = nil
return st
end;
---
diff --git a/test/vinyl/info.test.lua b/test/vinyl/info.test.lua
index 919dde63..fe070416 100644
--- a/test/vinyl/info.test.lua
+++ b/test/vinyl/info.test.lua
@@ -84,6 +84,7 @@ function gstat()
st.quota.use_rate = nil
st.quota.dump_bandwidth = nil
st.quota.watermark = nil
+ st.disk.idle_ratio = nil
return st
end;
--
2.11.0
More information about the Tarantool-patches
mailing list