Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 7/7] vinyl: keep track of disk idle time
Date: Sun,  2 Sep 2018 23:19:00 +0300	[thread overview]
Message-ID: <d0477df76ff786366c11c7f4030824f98aa90bb8.1535917763.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1535917763.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1535917763.git.vdavydov.dev@gmail.com>

To understand whether the disk is fully utilized or can still handle
more compaction load and make right decisions regarding transaction
throttling, we need a metric that would report how much time worker
threads spent being idle. So this patch adds a new metric to global
statistics, box.stat.vinyl().disk.idle_ratio. The metric is updated
on each dump using the following formula:

                       idle_time
  idle_ratio = --------------------------
               dump_period * worker_count

where idle_time is the total amount of time workers were idle between
the last two dumps, dump_period is the time that passed between the last
two dumps, worker_count is the number of workers.

The value of the new metric always lays between 0 inclusive and 1
exclusive. The closer it is to 1 the more busy the disk is.
---
 src/box/vinyl.c            |  1 +
 src/box/vy_scheduler.c     | 48 +++++++++++++++++++++++++++++++----
 src/box/vy_scheduler.h     | 12 +++++++++
 test/vinyl/errinj.result   | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/errinj.test.lua | 22 ++++++++++++++++
 test/vinyl/info.result     |  1 +
 test/vinyl/info.test.lua   |  1 +
 7 files changed, 142 insertions(+), 5 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 416c9824..e140f03c 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -311,6 +311,7 @@ vy_info_append_disk(struct vy_env *env, struct info_handler *h)
 	info_append_int(h, "compact_total", lsm_env->compact_total);
 	info_append_int(h, "compact_queue", lsm_env->compact_queue);
 	info_append_int(h, "compact_debt", lsm_env->compact_debt);
+	info_append_double(h, "idle_ratio", env->scheduler.idle_ratio);
 	info_table_end(h);
 }
 
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 580c3129..702f426c 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -96,6 +96,10 @@ struct vy_worker {
 	struct vy_task *task;
 	/** Link in vy_scheduler::idle_workers. */
 	struct stailq_entry in_idle;
+	/** Time when this worker became idle. */
+	double idle_start;
+	/** How much time this worker have been idle. */
+	double idle_time;
 	/** Route for sending deferred DELETEs back to tx. */
 	struct cmsg_hop deferred_delete_route[2];
 };
@@ -346,6 +350,7 @@ vy_scheduler_start_workers(struct vy_scheduler *scheduler)
 	if (scheduler->worker_pool == NULL)
 		panic("failed to allocate vinyl worker pool");
 
+	double now = ev_monotonic_now(loop());
 	for (int i = 0; i < scheduler->worker_pool_size; i++) {
 		char name[FIBER_NAME_MAX];
 		snprintf(name, sizeof(name), "vinyl.writer.%d", i);
@@ -355,6 +360,7 @@ vy_scheduler_start_workers(struct vy_scheduler *scheduler)
 		cpipe_create(&worker->worker_pipe, name);
 		stailq_add_tail_entry(&scheduler->idle_workers,
 				      worker, in_idle);
+		worker->idle_start = now;
 
 		struct cmsg_hop *route = worker->deferred_delete_route;
 		route[0].f = vy_deferred_delete_batch_process_f;
@@ -407,6 +413,7 @@ vy_scheduler_create(struct vy_scheduler *scheduler, int write_threads,
 
 	diag_create(&scheduler->diag);
 	fiber_cond_create(&scheduler->dump_cond);
+	scheduler->dump_end = ev_monotonic_now(loop());
 
 	fiber_start(scheduler->scheduler_fiber, scheduler);
 }
@@ -548,6 +555,27 @@ vy_scheduler_force_compaction(struct vy_scheduler *scheduler,
 }
 
 /**
+ * Return total time workers have spent idle.
+ */
+static double
+vy_scheduler_get_idle_time(struct vy_scheduler *scheduler)
+{
+	double idle_time = 0;
+	double now = ev_monotonic_now(loop());
+
+	struct vy_worker *worker;
+	for (int i = 0; i < scheduler->worker_pool_size; i++) {
+		worker = &scheduler->worker_pool[i];
+		idle_time += worker->idle_time;
+	}
+
+	stailq_foreach_entry(worker, &scheduler->idle_workers, in_idle)
+		idle_time += now - worker->idle_start;
+
+	return idle_time;
+}
+
+/**
  * Check whether the current dump round is complete.
  * If it is, free memory and proceed to the next dump round.
  */
@@ -585,7 +613,11 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler)
 	 */
 	double now = ev_monotonic_now(loop());
 	double dump_duration = now - scheduler->dump_start;
-	scheduler->dump_start = now;
+	double idle_time = vy_scheduler_get_idle_time(scheduler);
+	scheduler->idle_ratio = (idle_time - scheduler->idle_time_at_dump) /
+		(now - scheduler->dump_end) / scheduler->worker_pool_size;
+	scheduler->idle_time_at_dump = idle_time;
+	scheduler->dump_start = scheduler->dump_end = now;
 	scheduler->dump_generation = min_generation;
 	scheduler->dump_complete_cb(scheduler,
 			min_generation - 1, dump_duration);
@@ -1900,7 +1932,9 @@ vy_scheduler_f(va_list va)
 	while (scheduler->scheduler_fiber != NULL) {
 		struct stailq processed_tasks;
 		struct vy_task *task, *next;
+		struct vy_worker *worker;
 		int tasks_failed = 0, tasks_done = 0;
+		double now = ev_monotonic_now(loop());
 
 		/* Get the list of processed tasks. */
 		stailq_create(&processed_tasks);
@@ -1913,8 +1947,10 @@ vy_scheduler_f(va_list va)
 				tasks_failed++;
 			else
 				tasks_done++;
+			worker = task->worker;
 			stailq_add_entry(&scheduler->idle_workers,
-					 task->worker, in_idle);
+					 worker, in_idle);
+			worker->idle_start = now;
 			vy_task_delete(task);
 			scheduler->idle_worker_count++;
 			assert(scheduler->idle_worker_count <=
@@ -1951,11 +1987,13 @@ vy_scheduler_f(va_list va)
 
 		/* Queue the task and notify workers if necessary. */
 		assert(!stailq_empty(&scheduler->idle_workers));
-		task->worker = stailq_shift_entry(&scheduler->idle_workers,
-						  struct vy_worker, in_idle);
+		worker = stailq_shift_entry(&scheduler->idle_workers,
+					    struct vy_worker, in_idle);
+		worker->idle_time += now - worker->idle_start;
 		scheduler->idle_worker_count--;
+		task->worker = worker;
 		cmsg_init(&task->cmsg, vy_task_execute_route);
-		cpipe_push(&task->worker->worker_pipe, &task->cmsg);
+		cpipe_push(&worker->worker_pipe, &task->cmsg);
 
 		fiber_reschedule();
 		continue;
diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h
index deefacd7..5524ecce 100644
--- a/src/box/vy_scheduler.h
+++ b/src/box/vy_scheduler.h
@@ -136,6 +136,18 @@ struct vy_scheduler {
 	int dump_task_count;
 	/** Time when the current dump round started. */
 	double dump_start;
+	/** Time when the last dump round ended. */
+	double dump_end;
+	/**
+	 * Total amount of time worker threads have been idle,
+	 * taken at the time when the last dump round completed.
+	 */
+	double idle_time_at_dump;
+	/**
+	 * How much time worker threads were idle between the last
+	 * two dump, relative to the dump period.
+	 */
+	double idle_ratio;
 	/** Signaled on dump round completion. */
 	struct fiber_cond dump_cond;
 	/**
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 7b880030..bb5377b4 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -2244,6 +2244,68 @@ i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
 ---
 - true
 ...
+s:truncate()
+---
+...
+box.stat.reset()
+---
+...
+-- Check disk.idle_ratio statistic.
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.01)
+---
+- ok
+...
+start = fiber.time()
+---
+...
+dump()
+---
+...
+fiber.sleep(fiber.time() - start)
+---
+...
+dump()
+---
+...
+-- one worker is busy half of the time
+expected = 1 - 1 / (2 * box.cfg.vinyl_write_threads)
+---
+...
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+---
+- true
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+---
+- ok
+...
+start = fiber.time()
+---
+...
+dump()
+---
+...
+fiber.sleep(fiber.time() - start)
+---
+...
+dump()
+---
+...
+-- one worker is busy all the time, plus one half of the time
+expected = 1 - 3 / (2 * box.cfg.vinyl_write_threads)
+---
+...
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+---
+- true
+...
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+---
+- ok
+...
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
+---
+...
 s:drop()
 ---
 ...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 9037bfad..835f4540 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -883,4 +883,26 @@ i:stat().disk.compact.queue -- none
 i:stat().disk.compact.debt -- none
 i:stat().disk.compact.queue.bytes == box.stat.vinyl().disk.compact_queue
 i:stat().disk.compact.debt.bytes == box.stat.vinyl().disk.compact_debt
+s:truncate()
+box.stat.reset()
+
+-- Check disk.idle_ratio statistic.
+errinj.set('ERRINJ_VY_RUN_WRITE_TIMEOUT', 0.01)
+start = fiber.time()
+dump()
+fiber.sleep(fiber.time() - start)
+dump()
+-- one worker is busy half of the time
+expected = 1 - 1 / (2 * box.cfg.vinyl_write_threads)
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
+start = fiber.time()
+dump()
+fiber.sleep(fiber.time() - start)
+dump()
+-- one worker is busy all the time, plus one half of the time
+expected = 1 - 3 / (2 * box.cfg.vinyl_write_threads)
+math.abs(box.stat.vinyl().disk.idle_ratio - expected) < 0.1
+errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
+while i:stat().disk.compact.count < 1 do fiber.sleep(0.01) end
 s:drop()
diff --git a/test/vinyl/info.result b/test/vinyl/info.result
index 556f5eca..4340be91 100644
--- a/test/vinyl/info.result
+++ b/test/vinyl/info.result
@@ -102,6 +102,7 @@ function gstat()
     st.quota.use_rate = nil
     st.quota.dump_bandwidth = nil
     st.quota.watermark = nil
+    st.disk.idle_ratio = nil
     return st
 end;
 ---
diff --git a/test/vinyl/info.test.lua b/test/vinyl/info.test.lua
index 919dde63..fe070416 100644
--- a/test/vinyl/info.test.lua
+++ b/test/vinyl/info.test.lua
@@ -84,6 +84,7 @@ function gstat()
     st.quota.use_rate = nil
     st.quota.dump_bandwidth = nil
     st.quota.watermark = nil
+    st.disk.idle_ratio = nil
     return st
 end;
 
-- 
2.11.0

  parent reply	other threads:[~2018-09-02 20:19 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-02 20:18 [PATCH 0/7] vinyl: improve stats for throttling Vladimir Davydov
2018-09-02 20:18 ` [PATCH 1/7] vinyl: fix accounting of secondary index cache statements Vladimir Davydov
2018-09-02 22:26   ` [tarantool-patches] " Konstantin Osipov
2018-09-02 20:18 ` [PATCH 2/7] vinyl: add global memory stats Vladimir Davydov
2018-09-02 22:27   ` [tarantool-patches] " Konstantin Osipov
2018-09-02 22:27   ` Konstantin Osipov
2018-09-03  8:10     ` Vladimir Davydov
2018-09-02 20:18 ` [PATCH 3/7] vinyl: add global disk stats Vladimir Davydov
2018-09-02 22:30   ` [tarantool-patches] " Konstantin Osipov
2018-09-02 20:18 ` [PATCH 4/7] vinyl: fix force compaction logic Vladimir Davydov
2018-09-02 20:18 ` [PATCH 5/7] vinyl: update compact priority usual way on range split/coalesce Vladimir Davydov
2018-09-02 20:18 ` [PATCH 6/7] vinyl: keep track of compaction queue length and debt Vladimir Davydov
2018-09-02 20:19 ` Vladimir Davydov [this message]
2018-09-04 11:54   ` [PATCH 7/7] vinyl: keep track of disk idle time Vladimir Davydov
2018-09-04 17:23     ` Vladimir Davydov
2018-09-04 17:23       ` [PATCH 1/8] vinyl: add helper to check whether dump is in progress Vladimir Davydov
2018-09-06  7:33         ` Konstantin Osipov
2018-09-04 17:23       ` [PATCH 2/8] vinyl: don't use mempool for allocating background tasks Vladimir Davydov
2018-09-06  7:33         ` Konstantin Osipov
2018-09-04 17:23       ` [PATCH 3/8] vinyl: factor out worker pool from scheduler struct Vladimir Davydov
2018-09-06  7:34         ` Konstantin Osipov
2018-09-04 17:23       ` [PATCH 4/8] vinyl: move worker allocation closer to task creation Vladimir Davydov
2018-09-06  7:35         ` Konstantin Osipov
2018-09-04 17:23       ` [PATCH 5/8] vinyl: use separate thread pools for dump and compaction tasks Vladimir Davydov
2018-09-06  7:37         ` Konstantin Osipov
2018-09-06  9:48           ` Vladimir Davydov
2018-09-06 10:32             ` Konstantin Osipov
2018-09-04 17:23       ` [PATCH 6/8] vinyl: zap vy_worker_pool::idle_worker_count Vladimir Davydov
2018-09-06  7:38         ` Konstantin Osipov
2018-09-04 17:23       ` [PATCH 7/8] vinyl: don't start scheduler fiber until local recovery is complete Vladimir Davydov
2018-09-06  7:39         ` Konstantin Osipov
2018-09-04 17:23       ` [PATCH 8/8] vinyl: keep track of thread pool idle ratio Vladimir Davydov
2018-09-06  7:49         ` Konstantin Osipov
2018-09-06  8:18           ` Vladimir Davydov
2018-09-06 10:26             ` Konstantin Osipov
2018-09-06 10:52               ` Vladimir Davydov
2018-09-06 10:57                 ` Konstantin Osipov
2018-09-06 11:59                   ` Vladimir Davydov
2018-09-09 11:41 ` [PATCH 0/7] vinyl: improve stats for throttling Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d0477df76ff786366c11c7f4030824f98aa90bb8.1535917763.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 7/7] vinyl: keep track of disk idle time' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox