[PATCH 8/8] vinyl: keep track of thread pool idle ratio
Vladimir Davydov
vdavydov.dev at gmail.com
Tue Sep 4 20:23:51 MSK 2018
To understand whether the disk is fully utilized or can still handle
more compaction load and make right decisions regarding transaction
throttling, we need a metric that would report how much time worker
threads spent being idle. So this patch adds two new metrics to global
statistics, disk.dump_idle_ratio and compact_idle_ratio, which show how
much time dump threads and compaction threads were idle, respectively.
The metrics are updated using the following formula:
idle_time
idle_ratio = --------------------------
dump_period * worker_count
where idle_time is the total amount of time workers were idle between
the last two dumps, dump_period is the time that passed between the last
two dumps, worker_count is the number of workers in the pool.
---
src/box/vinyl.c | 5 +++
src/box/vy_scheduler.c | 64 ++++++++++++++++++++++++++++++++++++
src/box/vy_scheduler.h | 11 +++++++
test/vinyl/errinj.result | 82 ++++++++++++++++++++++++++++++++++++++++++++++
test/vinyl/errinj.test.lua | 37 +++++++++++++++++++++
test/vinyl/info.result | 14 ++++----
test/vinyl/info.test.lua | 2 ++
7 files changed, 209 insertions(+), 6 deletions(-)
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 1cf9ad16..f6b23cd6 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -302,15 +302,20 @@ static void
vy_info_append_disk(struct vy_env *env, struct info_handler *h)
{
struct vy_lsm_env *lsm_env = &env->lsm_env;
+ struct vy_scheduler *scheduler = &env->scheduler;
info_table_begin(h, "disk");
info_append_int(h, "data_files", lsm_env->data_file_count);
info_append_int(h, "data_size", lsm_env->disk_data_size);
info_append_int(h, "index_size", lsm_env->disk_index_size);
info_append_int(h, "dump_total", lsm_env->dump_total);
+ info_append_double(h, "dump_idle_ratio",
+ scheduler->dump_pool.idle_ratio);
info_append_int(h, "compact_total", lsm_env->compact_total);
info_append_int(h, "compact_queue", lsm_env->compact_queue);
info_append_int(h, "compact_debt", lsm_env->compact_debt);
+ info_append_double(h, "compact_idle_ratio",
+ scheduler->compact_pool.idle_ratio);
info_table_end(h);
}
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 5daaf4f1..178db2cc 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -97,6 +97,10 @@ struct vy_worker {
struct vy_task *task;
/** Link in vy_worker_pool::idle_workers. */
struct stailq_entry in_idle;
+ /** Time when this worker became idle. */
+ double idle_start;
+ /** How much time this worker have been idle. */
+ double idle_time;
/** Route for sending deferred DELETEs back to tx. */
struct cmsg_hop deferred_delete_route[2];
};
@@ -355,6 +359,13 @@ vy_worker_pool_start(struct vy_worker_pool *pool)
cpipe_create(&worker->worker_pipe, name);
stailq_add_tail_entry(&pool->idle_workers, worker, in_idle);
+ /*
+ * Distribute accumulated idle time amongst workers
+ * as if they were running all the time.
+ */
+ worker->idle_start = pool->last_idle_update;
+ worker->idle_time = pool->last_idle_time / pool->size;
+
struct cmsg_hop *route = worker->deferred_delete_route;
route[0].f = vy_deferred_delete_batch_process_f;
route[0].pipe = &worker->worker_pipe;
@@ -384,6 +395,9 @@ vy_worker_pool_create(struct vy_worker_pool *pool, const char *name, int size)
pool->size = size;
pool->workers = NULL;
stailq_create(&pool->idle_workers);
+ pool->idle_ratio = 1;
+ pool->last_idle_time = 0;
+ pool->last_idle_update = ev_monotonic_now(loop());
}
static void
@@ -412,6 +426,8 @@ vy_worker_pool_get(struct vy_worker_pool *pool)
worker = stailq_shift_entry(&pool->idle_workers,
struct vy_worker, in_idle);
assert(worker->pool == pool);
+ worker->idle_time += ev_monotonic_now(loop()) -
+ worker->idle_start;
}
return worker;
}
@@ -425,6 +441,51 @@ vy_worker_pool_put(struct vy_worker *worker)
{
struct vy_worker_pool *pool = worker->pool;
stailq_add_entry(&pool->idle_workers, worker, in_idle);
+ worker->idle_start = ev_monotonic_now(loop());
+}
+
+/**
+ * Return total time workers have spent idle.
+ */
+static double
+vy_worker_pool_idle_time(struct vy_worker_pool *pool)
+{
+ double now = ev_monotonic_now(loop());
+
+ if (pool->workers == NULL) {
+ /*
+ * Workers haven't been started yet so naturally
+ * they all should be accounted as idle.
+ */
+ return pool->last_idle_time +
+ (now - pool->last_idle_update) * pool->size;
+ }
+
+ double idle_time = 0;
+ struct vy_worker *worker;
+ for (int i = 0; i < pool->size; i++) {
+ worker = &pool->workers[i];
+ idle_time += worker->idle_time;
+ }
+
+ stailq_foreach_entry(worker, &pool->idle_workers, in_idle)
+ idle_time += now - worker->idle_start;
+
+ return idle_time;
+}
+
+/**
+ * Update idle ratio of a worker pool.
+ */
+static void
+vy_worker_pool_update_idle_ratio(struct vy_worker_pool *pool)
+{
+ double now = ev_monotonic_now(loop());
+ double idle_time = vy_worker_pool_idle_time(pool);
+ pool->idle_ratio = (idle_time - pool->last_idle_time) /
+ (now - pool->last_idle_update) / pool->size;
+ pool->last_idle_time = idle_time;
+ pool->last_idle_update = now;
}
void
@@ -657,6 +718,9 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler)
scheduler->dump_complete_cb(scheduler,
min_generation - 1, dump_duration);
fiber_cond_signal(&scheduler->dump_cond);
+
+ vy_worker_pool_update_idle_ratio(&scheduler->dump_pool);
+ vy_worker_pool_update_idle_ratio(&scheduler->compact_pool);
}
int
diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h
index 96ce721b..d6553cc8 100644
--- a/src/box/vy_scheduler.h
+++ b/src/box/vy_scheduler.h
@@ -65,6 +65,17 @@ struct vy_worker_pool {
struct vy_worker *workers;
/** List of workers that are currently idle. */
struct stailq idle_workers;
+ /**
+ * How much time worker threads were idle, relative to
+ * the total time (0 <= @idle_ratio <= 1).
+ *
+ * Updated on memory dump completion.
+ */
+ double idle_ratio;
+ /** Time of the last @idle_ratio update. */
+ double last_idle_update;
+ /** Total idle time at @last_idle_update. */
+ double last_idle_time;
};
struct vy_scheduler {
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 7b880030..35769e66 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -2247,3 +2247,85 @@ i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
s:drop()
---
...
+--
+-- Check idle_ratio metric.
+--
+dump_threads = math.max(1, math.ceil(box.cfg.vinyl_write_threads / 4))
+---
+...
+compact_threads = box.cfg.vinyl_write_threads - dump_threads
+---
+...
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+i = s:create_index('pk', {run_count_per_level = 2})
+---
+...
+function dump() for i = 1, 10 do s:replace{i} end box.snapshot() end
+---
+...
+dump()
+---
+...
+fiber.sleep(0.1)
+---
+...
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.1)
+---
+- ok
+...
+dump()
+---
+...
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0)
+---
+- ok
+...
+-- one dump thread was busy half of the time
+idle = box.stat.vinyl().disk.dump_idle_ratio
+---
+...
+expected = 1 - 1 / (2 * dump_threads)
+---
+...
+math.abs(idle - expected) < 0.1 or idle
+---
+- true
+...
+-- all compaction threads were idle
+box.stat.vinyl().disk.compact_idle_ratio -- 1
+---
+- 1
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+---
+- ok
+...
+dump()
+---
+...
+dump()
+---
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+---
+- ok
+...
+-- one compaction thread was busy all the time
+idle = box.stat.vinyl().disk.compact_idle_ratio
+---
+...
+expected = 1 - 1 / compact_threads
+---
+...
+math.abs(idle - expected) < 0.1 or idle
+---
+- true
+...
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
+---
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 9037bfad..fc52dbee 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -884,3 +884,40 @@ i:stat().disk.compact.debt -- none
i:stat().disk.compact.queue.bytes == box.stat.vinyl().disk.compact_queue
i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
s:drop()
+
+--
+-- Check idle_ratio metric.
+--
+dump_threads = math.max(1, math.ceil(box.cfg.vinyl_write_threads / 4))
+compact_threads = box.cfg.vinyl_write_threads - dump_threads
+
+s = box.schema.space.create('test', {engine = 'vinyl'})
+i = s:create_index('pk', {run_count_per_level = 2})
+function dump() for i = 1, 10 do s:replace{i} end box.snapshot() end
+
+dump()
+fiber.sleep(0.1)
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.1)
+dump()
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0)
+
+-- one dump thread was busy half of the time
+idle = box.stat.vinyl().disk.dump_idle_ratio
+expected = 1 - 1 / (2 * dump_threads)
+math.abs(idle - expected) < 0.1 or idle
+
+-- all compaction threads were idle
+box.stat.vinyl().disk.compact_idle_ratio -- 1
+
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+dump()
+dump()
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+
+-- one compaction thread was busy all the time
+idle = box.stat.vinyl().disk.compact_idle_ratio
+expected = 1 - 1 / compact_threads
+math.abs(idle - expected) < 0.1 or idle
+
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
+s:drop()
diff --git a/test/vinyl/info.result b/test/vinyl/info.result
index 556f5eca..6dcdc90f 100644
--- a/test/vinyl/info.result
+++ b/test/vinyl/info.result
@@ -102,6 +102,8 @@ function gstat()
st.quota.use_rate = nil
st.quota.dump_bandwidth = nil
st.quota.watermark = nil
+ st.disk.dump_idle_ratio = nil
+ st.disk.compact_idle_ratio = nil
return st
end;
---
@@ -218,12 +220,12 @@ gstat()
---
- disk:
dump_total: 0
- data_size: 0
+ index_size: 0
compact_debt: 0
- compact_queue: 0
data_files: 0
+ compact_queue: 0
+ data_size: 0
compact_total: 0
- index_size: 0
quota:
limit: 134217728
used: 0
@@ -1039,12 +1041,12 @@ gstat()
---
- disk:
dump_total: 0
- data_size: 104300
+ index_size: 1190
compact_debt: 0
- compact_queue: 0
data_files: 2
+ compact_queue: 0
+ data_size: 104300
compact_total: 0
- index_size: 1190
quota:
limit: 134217728
used: 262583
diff --git a/test/vinyl/info.test.lua b/test/vinyl/info.test.lua
index 919dde63..637aa323 100644
--- a/test/vinyl/info.test.lua
+++ b/test/vinyl/info.test.lua
@@ -84,6 +84,8 @@ function gstat()
st.quota.use_rate = nil
st.quota.dump_bandwidth = nil
st.quota.watermark = nil
+ st.disk.dump_idle_ratio = nil
+ st.disk.compact_idle_ratio = nil
return st
end;
--
2.11.0
More information about the Tarantool-patches
mailing list