[PATCH 8/8] vinyl: keep track of thread pool idle ratio

Vladimir Davydov vdavydov.dev at gmail.com
Tue Sep 4 20:23:51 MSK 2018


To understand whether the disk is fully utilized or can still handle
more compaction load and make right decisions regarding transaction
throttling, we need a metric that would report how much time worker
threads spent being idle. So this patch adds two new metrics to global
statistics, disk.dump_idle_ratio and compact_idle_ratio, which show how
much time dump threads and compaction threads were idle, respectively.
The metrics are updated using the following formula:

                       idle_time
  idle_ratio = --------------------------
               dump_period * worker_count

where idle_time is the total amount of time workers were idle between
the last two dumps, dump_period is the time that passed between the last
two dumps, worker_count is the number of workers in the pool.
---
 src/box/vinyl.c            |  5 +++
 src/box/vy_scheduler.c     | 64 ++++++++++++++++++++++++++++++++++++
 src/box/vy_scheduler.h     | 11 +++++++
 test/vinyl/errinj.result   | 82 ++++++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/errinj.test.lua | 37 +++++++++++++++++++++
 test/vinyl/info.result     | 14 ++++----
 test/vinyl/info.test.lua   |  2 ++
 7 files changed, 209 insertions(+), 6 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 1cf9ad16..f6b23cd6 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -302,15 +302,20 @@ static void
 vy_info_append_disk(struct vy_env *env, struct info_handler *h)
 {
 	struct vy_lsm_env *lsm_env = &env->lsm_env;
+	struct vy_scheduler *scheduler = &env->scheduler;
 
 	info_table_begin(h, "disk");
 	info_append_int(h, "data_files", lsm_env->data_file_count);
 	info_append_int(h, "data_size", lsm_env->disk_data_size);
 	info_append_int(h, "index_size", lsm_env->disk_index_size);
 	info_append_int(h, "dump_total", lsm_env->dump_total);
+	info_append_double(h, "dump_idle_ratio",
+			   scheduler->dump_pool.idle_ratio);
 	info_append_int(h, "compact_total", lsm_env->compact_total);
 	info_append_int(h, "compact_queue", lsm_env->compact_queue);
 	info_append_int(h, "compact_debt", lsm_env->compact_debt);
+	info_append_double(h, "compact_idle_ratio",
+			   scheduler->compact_pool.idle_ratio);
 	info_table_end(h);
 }
 
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 5daaf4f1..178db2cc 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -97,6 +97,10 @@ struct vy_worker {
 	struct vy_task *task;
 	/** Link in vy_worker_pool::idle_workers. */
 	struct stailq_entry in_idle;
+	/** Time when this worker became idle. */
+	double idle_start;
+	/** How much time this worker have been idle. */
+	double idle_time;
 	/** Route for sending deferred DELETEs back to tx. */
 	struct cmsg_hop deferred_delete_route[2];
 };
@@ -355,6 +359,13 @@ vy_worker_pool_start(struct vy_worker_pool *pool)
 		cpipe_create(&worker->worker_pipe, name);
 		stailq_add_tail_entry(&pool->idle_workers, worker, in_idle);
 
+		/*
+		 * Distribute accumulated idle time amongst workers
+		 * as if they were running all the time.
+		 */
+		worker->idle_start = pool->last_idle_update;
+		worker->idle_time = pool->last_idle_time / pool->size;
+
 		struct cmsg_hop *route = worker->deferred_delete_route;
 		route[0].f = vy_deferred_delete_batch_process_f;
 		route[0].pipe = &worker->worker_pipe;
@@ -384,6 +395,9 @@ vy_worker_pool_create(struct vy_worker_pool *pool, const char *name, int size)
 	pool->size = size;
 	pool->workers = NULL;
 	stailq_create(&pool->idle_workers);
+	pool->idle_ratio = 1;
+	pool->last_idle_time = 0;
+	pool->last_idle_update = ev_monotonic_now(loop());
 }
 
 static void
@@ -412,6 +426,8 @@ vy_worker_pool_get(struct vy_worker_pool *pool)
 		worker = stailq_shift_entry(&pool->idle_workers,
 					    struct vy_worker, in_idle);
 		assert(worker->pool == pool);
+		worker->idle_time += ev_monotonic_now(loop()) -
+						worker->idle_start;
 	}
 	return worker;
 }
@@ -425,6 +441,51 @@ vy_worker_pool_put(struct vy_worker *worker)
 {
 	struct vy_worker_pool *pool = worker->pool;
 	stailq_add_entry(&pool->idle_workers, worker, in_idle);
+	worker->idle_start = ev_monotonic_now(loop());
+}
+
+/**
+ * Return total time workers have spent idle.
+ */
+static double
+vy_worker_pool_idle_time(struct vy_worker_pool *pool)
+{
+	double now = ev_monotonic_now(loop());
+
+	if (pool->workers == NULL) {
+		/*
+		 * Workers haven't been started yet so naturally
+		 * they all should be accounted as idle.
+		 */
+		return pool->last_idle_time +
+			(now - pool->last_idle_update) * pool->size;
+	}
+
+	double idle_time = 0;
+	struct vy_worker *worker;
+	for (int i = 0; i < pool->size; i++) {
+		worker = &pool->workers[i];
+		idle_time += worker->idle_time;
+	}
+
+	stailq_foreach_entry(worker, &pool->idle_workers, in_idle)
+		idle_time += now - worker->idle_start;
+
+	return idle_time;
+}
+
+/**
+ * Update idle ratio of a worker pool.
+ */
+static void
+vy_worker_pool_update_idle_ratio(struct vy_worker_pool *pool)
+{
+	double now = ev_monotonic_now(loop());
+	double idle_time = vy_worker_pool_idle_time(pool);
+	pool->idle_ratio = (idle_time - pool->last_idle_time) /
+			   (now - pool->last_idle_update) / pool->size;
+	pool->last_idle_time = idle_time;
+	pool->last_idle_update = now;
 }
 
 void
@@ -657,6 +718,9 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler)
 	scheduler->dump_complete_cb(scheduler,
 			min_generation - 1, dump_duration);
 	fiber_cond_signal(&scheduler->dump_cond);
+
+	vy_worker_pool_update_idle_ratio(&scheduler->dump_pool);
+	vy_worker_pool_update_idle_ratio(&scheduler->compact_pool);
 }
 
 int
diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h
index 96ce721b..d6553cc8 100644
--- a/src/box/vy_scheduler.h
+++ b/src/box/vy_scheduler.h
@@ -65,6 +65,17 @@ struct vy_worker_pool {
 	struct vy_worker *workers;
 	/** List of workers that are currently idle. */
 	struct stailq idle_workers;
+	/**
+	 * How much time worker threads were idle, relative to
+	 * the total time (0 <= @idle_ratio <= 1).
+	 *
+	 * Updated on memory dump completion.
+	 */
+	double idle_ratio;
+	/** Time of the last @idle_ratio update. */
+	double last_idle_update;
+	/** Total idle time at @last_idle_update. */
+	double last_idle_time;
 };
 
 struct vy_scheduler {
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 7b880030..35769e66 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -2247,3 +2247,85 @@ i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
 s:drop()
 ---
 ...
+--
+-- Check idle_ratio metric.
+--
+dump_threads = math.max(1, math.ceil(box.cfg.vinyl_write_threads / 4))
+---
+...
+compact_threads = box.cfg.vinyl_write_threads - dump_threads
+---
+...
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+i = s:create_index('pk', {run_count_per_level = 2})
+---
+...
+function dump() for i = 1, 10 do s:replace{i} end box.snapshot() end
+---
+...
+dump()
+---
+...
+fiber.sleep(0.1)
+---
+...
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.1)
+---
+- ok
+...
+dump()
+---
+...
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0)
+---
+- ok
+...
+-- one dump thread was busy half of the time
+idle = box.stat.vinyl().disk.dump_idle_ratio
+---
+...
+expected = 1 - 1 / (2 * dump_threads)
+---
+...
+math.abs(idle - expected) < 0.1 or idle
+---
+- true
+...
+-- all compaction threads were idle
+box.stat.vinyl().disk.compact_idle_ratio -- 1
+---
+- 1
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+---
+- ok
+...
+dump()
+---
+...
+dump()
+---
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+---
+- ok
+...
+-- one compaction thread was busy all the time
+idle = box.stat.vinyl().disk.compact_idle_ratio
+---
+...
+expected = 1 - 1 / compact_threads
+---
+...
+math.abs(idle - expected) < 0.1 or idle
+---
+- true
+...
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
+---
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 9037bfad..fc52dbee 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -884,3 +884,40 @@ i:stat().disk.compact.debt -- none
 i:stat().disk.compact.queue.bytes == box.stat.vinyl().disk.compact_queue
 i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
 s:drop()
+
+--
+-- Check idle_ratio metric.
+--
+dump_threads = math.max(1, math.ceil(box.cfg.vinyl_write_threads / 4))
+compact_threads = box.cfg.vinyl_write_threads - dump_threads
+
+s = box.schema.space.create('test', {engine = 'vinyl'})
+i = s:create_index('pk', {run_count_per_level = 2})
+function dump() for i = 1, 10 do s:replace{i} end box.snapshot() end
+
+dump()
+fiber.sleep(0.1)
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.1)
+dump()
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0)
+
+-- one dump thread was busy half of the time
+idle = box.stat.vinyl().disk.dump_idle_ratio
+expected = 1 - 1 / (2 * dump_threads)
+math.abs(idle - expected) < 0.1 or idle
+
+-- all compaction threads were idle
+box.stat.vinyl().disk.compact_idle_ratio -- 1
+
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+dump()
+dump()
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+
+-- one compaction thread was busy all the time
+idle = box.stat.vinyl().disk.compact_idle_ratio
+expected = 1 - 1 / compact_threads
+math.abs(idle - expected) < 0.1 or idle
+
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
+s:drop()
diff --git a/test/vinyl/info.result b/test/vinyl/info.result
index 556f5eca..6dcdc90f 100644
--- a/test/vinyl/info.result
+++ b/test/vinyl/info.result
@@ -102,6 +102,8 @@ function gstat()
     st.quota.use_rate = nil
     st.quota.dump_bandwidth = nil
     st.quota.watermark = nil
+    st.disk.dump_idle_ratio = nil
+    st.disk.compact_idle_ratio = nil
     return st
 end;
 ---
@@ -218,12 +220,12 @@ gstat()
 ---
 - disk:
     dump_total: 0
-    data_size: 0
+    index_size: 0
     compact_debt: 0
-    compact_queue: 0
     data_files: 0
+    compact_queue: 0
+    data_size: 0
     compact_total: 0
-    index_size: 0
   quota:
     limit: 134217728
     used: 0
@@ -1039,12 +1041,12 @@ gstat()
 ---
 - disk:
     dump_total: 0
-    data_size: 104300
+    index_size: 1190
     compact_debt: 0
-    compact_queue: 0
     data_files: 2
+    compact_queue: 0
+    data_size: 104300
     compact_total: 0
-    index_size: 1190
   quota:
     limit: 134217728
     used: 262583
diff --git a/test/vinyl/info.test.lua b/test/vinyl/info.test.lua
index 919dde63..637aa323 100644
--- a/test/vinyl/info.test.lua
+++ b/test/vinyl/info.test.lua
@@ -84,6 +84,8 @@ function gstat()
     st.quota.use_rate = nil
     st.quota.dump_bandwidth = nil
     st.quota.watermark = nil
+    st.disk.dump_idle_ratio = nil
+    st.disk.compact_idle_ratio = nil
     return st
 end;
 
-- 
2.11.0




More information about the Tarantool-patches mailing list