[PATCH 7/7] vinyl: keep track of disk idle time

Vladimir Davydov vdavydov.dev at gmail.com
Sun Sep 2 23:19:00 MSK 2018


To understand whether the disk is fully utilized or can still handle
more compaction load and make right decisions regarding transaction
throttling, we need a metric that would report how much time worker
threads spent being idle. So this patch adds a new metric to global
statistics, box.stat.vinyl().disk.idle_ratio. The metric is updated
on each dump using the following formula:

                       idle_time
  idle_ratio = --------------------------
               dump_period * worker_count

where idle_time is the total amount of time workers were idle between
the last two dumps, dump_period is the time that passed between the last
two dumps, worker_count is the number of workers.

The value of the new metric always lays between 0 inclusive and 1
exclusive. The closer it is to 1 the more busy the disk is.
---
 src/box/vinyl.c            |  1 +
 src/box/vy_scheduler.c     | 48 +++++++++++++++++++++++++++++++----
 src/box/vy_scheduler.h     | 12 +++++++++
 test/vinyl/errinj.result   | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/errinj.test.lua | 22 ++++++++++++++++
 test/vinyl/info.result     |  1 +
 test/vinyl/info.test.lua   |  1 +
 7 files changed, 142 insertions(+), 5 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 416c9824..e140f03c 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -311,6 +311,7 @@ vy_info_append_disk(struct vy_env *env, struct info_handler *h)
 	info_append_int(h, "compact_total", lsm_env->compact_total);
 	info_append_int(h, "compact_queue", lsm_env->compact_queue);
 	info_append_int(h, "compact_debt", lsm_env->compact_debt);
+	info_append_double(h, "idle_ratio", env->scheduler.idle_ratio);
 	info_table_end(h);
 }
 
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 580c3129..702f426c 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -96,6 +96,10 @@ struct vy_worker {
 	struct vy_task *task;
 	/** Link in vy_scheduler::idle_workers. */
 	struct stailq_entry in_idle;
+	/** Time when this worker became idle. */
+	double idle_start;
+	/** How much time this worker have been idle. */
+	double idle_time;
 	/** Route for sending deferred DELETEs back to tx. */
 	struct cmsg_hop deferred_delete_route[2];
 };
@@ -346,6 +350,7 @@ vy_scheduler_start_workers(struct vy_scheduler *scheduler)
 	if (scheduler->worker_pool == NULL)
 		panic("failed to allocate vinyl worker pool");
 
+	double now = ev_monotonic_now(loop());
 	for (int i = 0; i < scheduler->worker_pool_size; i++) {
 		char name[FIBER_NAME_MAX];
 		snprintf(name, sizeof(name), "vinyl.writer.%d", i);
@@ -355,6 +360,7 @@ vy_scheduler_start_workers(struct vy_scheduler *scheduler)
 		cpipe_create(&worker->worker_pipe, name);
 		stailq_add_tail_entry(&scheduler->idle_workers,
 				      worker, in_idle);
+		worker->idle_start = now;
 
 		struct cmsg_hop *route = worker->deferred_delete_route;
 		route[0].f = vy_deferred_delete_batch_process_f;
@@ -407,6 +413,7 @@ vy_scheduler_create(struct vy_scheduler *scheduler, int write_threads,
 
 	diag_create(&scheduler->diag);
 	fiber_cond_create(&scheduler->dump_cond);
+	scheduler->dump_end = ev_monotonic_now(loop());
 
 	fiber_start(scheduler->scheduler_fiber, scheduler);
 }
@@ -548,6 +555,27 @@ vy_scheduler_force_compaction(struct vy_scheduler *scheduler,
 }
 
 /**
+ * Return total time workers have spent idle.
+ */
+static double
+vy_scheduler_get_idle_time(struct vy_scheduler *scheduler)
+{
+	double idle_time = 0;
+	double now = ev_monotonic_now(loop());
+
+	struct vy_worker *worker;
+	for (int i = 0; i < scheduler->worker_pool_size; i++) {
+		worker = &scheduler->worker_pool[i];
+		idle_time += worker->idle_time;
+	}
+
+	stailq_foreach_entry(worker, &scheduler->idle_workers, in_idle)
+		idle_time += now - worker->idle_start;
+
+	return idle_time;
+}
+
+/**
  * Check whether the current dump round is complete.
  * If it is, free memory and proceed to the next dump round.
  */
@@ -585,7 +613,11 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler)
 	 */
 	double now = ev_monotonic_now(loop());
 	double dump_duration = now - scheduler->dump_start;
-	scheduler->dump_start = now;
+	double idle_time = vy_scheduler_get_idle_time(scheduler);
+	scheduler->idle_ratio = (idle_time - scheduler->idle_time_at_dump) /
+		(now - scheduler->dump_end) / scheduler->worker_pool_size;
+	scheduler->idle_time_at_dump = idle_time;
+	scheduler->dump_start = scheduler->dump_end = now;
 	scheduler->dump_generation = min_generation;
 	scheduler->dump_complete_cb(scheduler,
 			min_generation - 1, dump_duration);
@@ -1900,7 +1932,9 @@ vy_scheduler_f(va_list va)
 	while (scheduler->scheduler_fiber != NULL) {
 		struct stailq processed_tasks;
 		struct vy_task *task, *next;
+		struct vy_worker *worker;
 		int tasks_failed = 0, tasks_done = 0;
+		double now = ev_monotonic_now(loop());
 
 		/* Get the list of processed tasks. */
 		stailq_create(&processed_tasks);
@@ -1913,8 +1947,10 @@ vy_scheduler_f(va_list va)
 				tasks_failed++;
 			else
 				tasks_done++;
+			worker = task->worker;
 			stailq_add_entry(&scheduler->idle_workers,
-					 task->worker, in_idle);
+					 worker, in_idle);
+			worker->idle_start = now;
 			vy_task_delete(task);
 			scheduler->idle_worker_count++;
 			assert(scheduler->idle_worker_count <=
@@ -1951,11 +1987,13 @@ vy_scheduler_f(va_list va)
 
 		/* Queue the task and notify workers if necessary. */
 		assert(!stailq_empty(&scheduler->idle_workers));
-		task->worker = stailq_shift_entry(&scheduler->idle_workers,
-						  struct vy_worker, in_idle);
+		worker = stailq_shift_entry(&scheduler->idle_workers,
+					    struct vy_worker, in_idle);
+		worker->idle_time += now - worker->idle_start;
 		scheduler->idle_worker_count--;
+		task->worker = worker;
 		cmsg_init(&task->cmsg, vy_task_execute_route);
-		cpipe_push(&task->worker->worker_pipe, &task->cmsg);
+		cpipe_push(&worker->worker_pipe, &task->cmsg);
 
 		fiber_reschedule();
 		continue;
diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h
index deefacd7..5524ecce 100644
--- a/src/box/vy_scheduler.h
+++ b/src/box/vy_scheduler.h
@@ -136,6 +136,18 @@ struct vy_scheduler {
 	int dump_task_count;
 	/** Time when the current dump round started. */
 	double dump_start;
+	/** Time when the last dump round ended. */
+	double dump_end;
+	/**
+	 * Total amount of time worker threads have been idle,
+	 * taken at the time when the last dump round completed.
+	 */
+	double idle_time_at_dump;
+	/**
+	 * How much time worker threads were idle between the last
+	 * two dump, relative to the dump period.
+	 */
+	double idle_ratio;
 	/** Signaled on dump round completion. */
 	struct fiber_cond dump_cond;
 	/**
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 7b880030..bb5377b4 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -2244,6 +2244,68 @@ i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
 ---
 - true
 ...
+s:truncate()
+---
+...
+box.stat.reset()
+---
+...
+-- Check disk.idle_ratio statistic.
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.01)
+---
+- ok
+...
+start = fiber.time()
+---
+...
+dump()
+---
+...
+fiber.sleep(fiber.time() - start)
+---
+...
+dump()
+---
+...
+-- one worker is busy half of the time
+expected = 1 - 1 / (2 * box.cfg.vinyl_write_threads)
+---
+...
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+---
+- true
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+---
+- ok
+...
+start = fiber.time()
+---
+...
+dump()
+---
+...
+fiber.sleep(fiber.time() - start)
+---
+...
+dump()
+---
+...
+-- one worker is busy all the time, plus one half of the time
+expected = 1 - 3 / (2 * box.cfg.vinyl_write_threads)
+---
+...
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+---
+- true
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+---
+- ok
+...
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
+---
+...
 s:drop()
 ---
 ...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 9037bfad..835f4540 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -883,4 +883,26 @@ i:stat().disk.compact.queue -- none
 i:stat().disk.compact.debt -- none
 i:stat().disk.compact.queue.bytes == box.stat.vinyl().disk.compact_queue
 i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
+s:truncate()
+box.stat.reset()
+
+-- Check disk.idle_ratio statistic.
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.01)
+start = fiber.time()
+dump()
+fiber.sleep(fiber.time() - start)
+dump()
+-- one worker is busy half of the time
+expected = 1 - 1 / (2 * box.cfg.vinyl_write_threads)
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+start = fiber.time()
+dump()
+fiber.sleep(fiber.time() - start)
+dump()
+-- one worker is busy all the time, plus one half of the time
+expected = 1 - 3 / (2 * box.cfg.vinyl_write_threads)
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
 s:drop()
diff --git a/test/vinyl/info.result b/test/vinyl/info.result
index 556f5eca..4340be91 100644
--- a/test/vinyl/info.result
+++ b/test/vinyl/info.result
@@ -102,6 +102,7 @@ function gstat()
     st.quota.use_rate = nil
     st.quota.dump_bandwidth = nil
     st.quota.watermark = nil
+    st.disk.idle_ratio = nil
     return st
 end;
 ---
diff --git a/test/vinyl/info.test.lua b/test/vinyl/info.test.lua
index 919dde63..fe070416 100644
--- a/test/vinyl/info.test.lua
+++ b/test/vinyl/info.test.lua
@@ -84,6 +84,7 @@ function gstat()
     st.quota.use_rate = nil
     st.quota.dump_bandwidth = nil
     st.quota.watermark = nil
+    st.disk.idle_ratio = nil
     return st
 end;
 
-- 
2.11.0




More information about the Tarantool-patches mailing list