[PATCH 10/12] vinyl: add task accounting to global scheduler statistics

Vladimir Davydov vdavydov.dev at gmail.com
Tue Jan 15 17:17:19 MSK 2019


This patch adds the following new fields to box.stat.vinyl():

  scheduler.tasks_inprogress - number of currently running tasks
  scheduler.tasks_completed - number of successfully completed tasks
  scheduler.tasks_failed - number of aborted tasks

tasks_failed can be useful for monitoring disk write errors while
tasks_inprogress and tasks_completed can shed light on worker thread
pool effeciency.
---
 src/box/vinyl.c                 |   3 +
 src/box/vy_scheduler.c          |  21 +++++--
 src/box/vy_stat.h               |   6 ++
 test/vinyl/errinj_stat.result   | 119 ++++++++++++++++++++++++++++++++++++++++
 test/vinyl/errinj_stat.test.lua |  36 ++++++++++++
 test/vinyl/stat.result          |  26 ++++++---
 6 files changed, 199 insertions(+), 12 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index e3478679..ea80d0ba 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -250,6 +250,9 @@ vy_info_append_scheduler(struct vy_env *env, struct info_handler *h)
 	struct vy_scheduler_stat *stat = &env->scheduler.stat;
 
 	info_table_begin(h, "scheduler");
+	info_append_int(h, "tasks_inprogress", stat->tasks_inprogress);
+	info_append_int(h, "tasks_completed", stat->tasks_completed);
+	info_append_int(h, "tasks_failed", stat->tasks_failed);
 	info_append_int(h, "dump_count", stat->dump_count);
 	info_append_int(h, "dump_input", stat->dump_input);
 	info_append_int(h, "dump_output", stat->dump_output);
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index e49cd211..d0344cdd 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -504,6 +504,8 @@ void
 vy_scheduler_reset_stat(struct vy_scheduler *scheduler)
 {
 	struct vy_scheduler_stat *stat = &scheduler->stat;
+	stat->tasks_completed = 0;
+	stat->tasks_failed = 0;
 	stat->dump_count = 0;
 	stat->dump_input = 0;
 	stat->dump_output = 0;
@@ -1965,15 +1967,18 @@ vy_schedule(struct vy_scheduler *scheduler, struct vy_task **ptask)
 	if (vy_scheduler_peek_dump(scheduler, ptask) != 0)
 		goto fail;
 	if (*ptask != NULL)
-		return 0;
+		goto found;
 
 	if (vy_scheduler_peek_compaction(scheduler, ptask) != 0)
 		goto fail;
 	if (*ptask != NULL)
-		return 0;
+		goto found;
 
 	/* no task to run */
 	return 0;
+found:
+	scheduler->stat.tasks_inprogress++;
+	return 0;
 fail:
 	assert(!diag_is_empty(diag_get()));
 	diag_move(diag_get(), &scheduler->diag);
@@ -1984,10 +1989,15 @@ fail:
 static int
 vy_task_complete(struct vy_task *task)
 {
+	struct vy_scheduler *scheduler = task->scheduler;
+
+	assert(scheduler->stat.tasks_inprogress > 0);
+	scheduler->stat.tasks_inprogress--;
+
 	if (task->lsm->is_dropped) {
 		if (task->ops->abort)
 			task->ops->abort(task);
-		return 0;
+		goto out;
 	}
 
 	struct diag *diag = &task->diag;
@@ -2006,11 +2016,14 @@ vy_task_complete(struct vy_task *task)
 		diag_move(diag_get(), diag);
 		goto fail;
 	}
+out:
+	scheduler->stat.tasks_completed++;
 	return 0;
 fail:
 	if (task->ops->abort)
 		task->ops->abort(task);
-	diag_move(diag, &task->scheduler->diag);
+	diag_move(diag, &scheduler->diag);
+	scheduler->stat.tasks_failed++;
 	return -1;
 }
 
diff --git a/src/box/vy_stat.h b/src/box/vy_stat.h
index b976db47..bd3b811c 100644
--- a/src/box/vy_stat.h
+++ b/src/box/vy_stat.h
@@ -212,6 +212,12 @@ struct vy_tx_stat {
  * disk compression.
  */
 struct vy_scheduler_stat {
+	/** Number of completed tasks. */
+	int32_t tasks_completed;
+	/** Number of failed tasks. */
+	int32_t tasks_failed;
+	/** Number of tasks in progress. */
+	int32_t tasks_inprogress;
 	/** Number of completed memory dumps. */
 	int32_t dump_count;
 	/** Number of bytes read by dump tasks. */
diff --git a/test/vinyl/errinj_stat.result b/test/vinyl/errinj_stat.result
index fe379cef..9989f734 100644
--- a/test/vinyl/errinj_stat.result
+++ b/test/vinyl/errinj_stat.result
@@ -134,6 +134,125 @@ i:stat().disk.compaction.queue -- none
 s:drop()
 ---
 ...
+--
+-- Check task statistics.
+--
+box.stat.reset()
+---
+...
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk')
+---
+...
+errinj.set('ERRINJ_VY_RUN_WRITE_DELAY', true)
+---
+- ok
+...
+s:replace{1}
+---
+- [1]
+...
+c = fiber.channel(1)
+---
+...
+_ = fiber.create(function() box.snapshot() c:put(true) end)
+---
+...
+fiber.sleep(0.01)
+---
+...
+stat = box.stat.vinyl().scheduler
+---
+...
+stat.tasks_inprogress > 0
+---
+- true
+...
+stat.tasks_completed == 0
+---
+- true
+...
+stat.tasks_failed == 0
+---
+- true
+...
+box.stat.reset() -- doesn't affect tasks_inprogress
+---
+...
+box.stat.vinyl().scheduler.tasks_inprogress > 0
+---
+- true
+...
+errinj.set('ERRINJ_VY_RUN_WRITE_DELAY', false)
+---
+- ok
+...
+c:get()
+---
+- true
+...
+stat = box.stat.vinyl().scheduler
+---
+...
+stat.tasks_inprogress == 0
+---
+- true
+...
+stat.tasks_completed == 1
+---
+- true
+...
+stat.tasks_failed == 0
+---
+- true
+...
+errinj.set('ERRINJ_VY_RUN_WRITE', true)
+---
+- ok
+...
+errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.01)
+---
+- ok
+...
+s:replace{2}
+---
+- [2]
+...
+box.snapshot()
+---
+- error: Error injection 'vinyl dump'
+...
+stat = box.stat.vinyl().scheduler
+---
+...
+stat.tasks_inprogress == 0
+---
+- true
+...
+stat.tasks_completed == 1
+---
+- true
+...
+stat.tasks_failed > 0
+---
+- true
+...
+errinj.set('ERRINJ_VY_RUN_WRITE', false)
+---
+- ok
+...
+errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
+---
+- ok
+...
+fiber.sleep(0.01)
+---
+...
+s:drop()
+---
+...
 test_run:cmd("clear filter")
 ---
 - true
diff --git a/test/vinyl/errinj_stat.test.lua b/test/vinyl/errinj_stat.test.lua
index 3556f50a..fd80b382 100644
--- a/test/vinyl/errinj_stat.test.lua
+++ b/test/vinyl/errinj_stat.test.lua
@@ -42,6 +42,42 @@ while i:stat().disk.compaction.count < 2 do fiber.sleep(0.01) end
 i:stat().disk.compaction.queue -- none
 s:drop()
 
+--
+-- Check task statistics.
+--
+box.stat.reset()
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk')
+errinj.set('ERRINJ_VY_RUN_WRITE_DELAY', true)
+s:replace{1}
+c = fiber.channel(1)
+_ = fiber.create(function() box.snapshot() c:put(true) end)
+fiber.sleep(0.01)
+stat = box.stat.vinyl().scheduler
+stat.tasks_inprogress > 0
+stat.tasks_completed == 0
+stat.tasks_failed == 0
+box.stat.reset() -- doesn't affect tasks_inprogress
+box.stat.vinyl().scheduler.tasks_inprogress > 0
+errinj.set('ERRINJ_VY_RUN_WRITE_DELAY', false)
+c:get()
+stat = box.stat.vinyl().scheduler
+stat.tasks_inprogress == 0
+stat.tasks_completed == 1
+stat.tasks_failed == 0
+errinj.set('ERRINJ_VY_RUN_WRITE', true)
+errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.01)
+s:replace{2}
+box.snapshot()
+stat = box.stat.vinyl().scheduler
+stat.tasks_inprogress == 0
+stat.tasks_completed == 1
+stat.tasks_failed > 0
+errinj.set('ERRINJ_VY_RUN_WRITE', false)
+errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
+fiber.sleep(0.01)
+s:drop()
+
 test_run:cmd("clear filter")
 test_run:cmd('switch default')
 test_run:cmd('stop server test')
diff --git a/test/vinyl/stat.result b/test/vinyl/stat.result
index 3050e43d..6b71d4c5 100644
--- a/test/vinyl/stat.result
+++ b/test/vinyl/stat.result
@@ -242,10 +242,13 @@ gstat()
     data: 0
     index: 0
   scheduler:
-    compaction_queue: 0
-    dump_count: 0
     compaction_output: 0
+    tasks_inprogress: 0
     dump_output: 0
+    tasks_completed: 0
+    dump_count: 0
+    tasks_failed: 0
+    compaction_queue: 0
     dump_input: 0
     compaction_input: 0
 ...
@@ -1092,10 +1095,13 @@ gstat()
     data: 104300
     index: 1190
   scheduler:
-    compaction_queue: 0
-    dump_count: 0
     compaction_output: 0
+    tasks_inprogress: 0
     dump_output: 0
+    tasks_completed: 0
+    dump_count: 0
+    tasks_failed: 0
+    compaction_queue: 0
     dump_input: 0
     compaction_input: 0
 ...
@@ -1124,8 +1130,9 @@ box.snapshot()
 ...
 stat_diff(gstat(), st, 'scheduler')
 ---
-- dump_count: 1
-  dump_input: 104200
+- dump_input: 104200
+  dump_count: 1
+  tasks_completed: 2
   dump_output: 103592
 ...
 for i = 1, 100, 10 do s:replace{i, i, string.rep('y', 1000)} end
@@ -1140,8 +1147,9 @@ box.snapshot()
 ...
 stat_diff(gstat(), st, 'scheduler')
 ---
-- dump_count: 1
-  dump_input: 10420
+- dump_input: 10420
+  dump_count: 1
+  tasks_completed: 2
   dump_output: 10371
 ...
 st = gstat()
@@ -1156,6 +1164,7 @@ while i1:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end
 stat_diff(gstat(), st, 'scheduler')
 ---
 - compaction_input: 112188
+  tasks_completed: 1
   compaction_output: 101984
 ...
 st = gstat()
@@ -1170,6 +1179,7 @@ while i2:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end
 stat_diff(gstat(), st, 'scheduler')
 ---
 - compaction_input: 1775
+  tasks_completed: 1
   compaction_output: 1608
 ...
 s:drop()
-- 
2.11.0




More information about the Tarantool-patches mailing list