From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 10/12] vinyl: add task accounting to global scheduler statistics Date: Tue, 15 Jan 2019 17:17:19 +0300 Message-Id: <771c3685cb7ac4133645692dd57bd165e0510236.1547558871.git.vdavydov.dev@gmail.com> In-Reply-To: References: In-Reply-To: References: To: tarantool-patches@freelists.org List-ID: This patch adds the following new fields to box.stat.vinyl(): scheduler.tasks_inprogress - number of currently running tasks scheduler.tasks_completed - number of successfully completed tasks scheduler.tasks_failed - number of aborted tasks tasks_failed can be useful for monitoring disk write errors while tasks_inprogress and tasks_completed can shed light on worker thread pool effeciency. --- src/box/vinyl.c | 3 + src/box/vy_scheduler.c | 21 +++++-- src/box/vy_stat.h | 6 ++ test/vinyl/errinj_stat.result | 119 ++++++++++++++++++++++++++++++++++++++++ test/vinyl/errinj_stat.test.lua | 36 ++++++++++++ test/vinyl/stat.result | 26 ++++++--- 6 files changed, 199 insertions(+), 12 deletions(-) diff --git a/src/box/vinyl.c b/src/box/vinyl.c index e3478679..ea80d0ba 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -250,6 +250,9 @@ vy_info_append_scheduler(struct vy_env *env, struct info_handler *h) struct vy_scheduler_stat *stat = &env->scheduler.stat; info_table_begin(h, "scheduler"); + info_append_int(h, "tasks_inprogress", stat->tasks_inprogress); + info_append_int(h, "tasks_completed", stat->tasks_completed); + info_append_int(h, "tasks_failed", stat->tasks_failed); info_append_int(h, "dump_count", stat->dump_count); info_append_int(h, "dump_input", stat->dump_input); info_append_int(h, "dump_output", stat->dump_output); diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index e49cd211..d0344cdd 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -504,6 +504,8 @@ void vy_scheduler_reset_stat(struct vy_scheduler *scheduler) { struct vy_scheduler_stat *stat = &scheduler->stat; + stat->tasks_completed = 0; + stat->tasks_failed = 0; stat->dump_count = 0; stat->dump_input = 0; stat->dump_output = 0; @@ -1965,15 +1967,18 @@ vy_schedule(struct vy_scheduler *scheduler, struct vy_task **ptask) if (vy_scheduler_peek_dump(scheduler, ptask) != 0) goto fail; if (*ptask != NULL) - return 0; + goto found; if (vy_scheduler_peek_compaction(scheduler, ptask) != 0) goto fail; if (*ptask != NULL) - return 0; + goto found; /* no task to run */ return 0; +found: + scheduler->stat.tasks_inprogress++; + return 0; fail: assert(!diag_is_empty(diag_get())); diag_move(diag_get(), &scheduler->diag); @@ -1984,10 +1989,15 @@ fail: static int vy_task_complete(struct vy_task *task) { + struct vy_scheduler *scheduler = task->scheduler; + + assert(scheduler->stat.tasks_inprogress > 0); + scheduler->stat.tasks_inprogress--; + if (task->lsm->is_dropped) { if (task->ops->abort) task->ops->abort(task); - return 0; + goto out; } struct diag *diag = &task->diag; @@ -2006,11 +2016,14 @@ vy_task_complete(struct vy_task *task) diag_move(diag_get(), diag); goto fail; } +out: + scheduler->stat.tasks_completed++; return 0; fail: if (task->ops->abort) task->ops->abort(task); - diag_move(diag, &task->scheduler->diag); + diag_move(diag, &scheduler->diag); + scheduler->stat.tasks_failed++; return -1; } diff --git a/src/box/vy_stat.h b/src/box/vy_stat.h index b976db47..bd3b811c 100644 --- a/src/box/vy_stat.h +++ b/src/box/vy_stat.h @@ -212,6 +212,12 @@ struct vy_tx_stat { * disk compression. */ struct vy_scheduler_stat { + /** Number of completed tasks. */ + int32_t tasks_completed; + /** Number of failed tasks. */ + int32_t tasks_failed; + /** Number of tasks in progress. */ + int32_t tasks_inprogress; /** Number of completed memory dumps. */ int32_t dump_count; /** Number of bytes read by dump tasks. */ diff --git a/test/vinyl/errinj_stat.result b/test/vinyl/errinj_stat.result index fe379cef..9989f734 100644 --- a/test/vinyl/errinj_stat.result +++ b/test/vinyl/errinj_stat.result @@ -134,6 +134,125 @@ i:stat().disk.compaction.queue -- none s:drop() --- ... +-- +-- Check task statistics. +-- +box.stat.reset() +--- +... +s = box.schema.space.create('test', {engine = 'vinyl'}) +--- +... +_ = s:create_index('pk') +--- +... +errinj.set('ERRINJ_VY_RUN_WRITE_DELAY', true) +--- +- ok +... +s:replace{1} +--- +- [1] +... +c = fiber.channel(1) +--- +... +_ = fiber.create(function() box.snapshot() c:put(true) end) +--- +... +fiber.sleep(0.01) +--- +... +stat = box.stat.vinyl().scheduler +--- +... +stat.tasks_inprogress > 0 +--- +- true +... +stat.tasks_completed == 0 +--- +- true +... +stat.tasks_failed == 0 +--- +- true +... +box.stat.reset() -- doesn't affect tasks_inprogress +--- +... +box.stat.vinyl().scheduler.tasks_inprogress > 0 +--- +- true +... +errinj.set('ERRINJ_VY_RUN_WRITE_DELAY', false) +--- +- ok +... +c:get() +--- +- true +... +stat = box.stat.vinyl().scheduler +--- +... +stat.tasks_inprogress == 0 +--- +- true +... +stat.tasks_completed == 1 +--- +- true +... +stat.tasks_failed == 0 +--- +- true +... +errinj.set('ERRINJ_VY_RUN_WRITE', true) +--- +- ok +... +errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.01) +--- +- ok +... +s:replace{2} +--- +- [2] +... +box.snapshot() +--- +- error: Error injection 'vinyl dump' +... +stat = box.stat.vinyl().scheduler +--- +... +stat.tasks_inprogress == 0 +--- +- true +... +stat.tasks_completed == 1 +--- +- true +... +stat.tasks_failed > 0 +--- +- true +... +errinj.set('ERRINJ_VY_RUN_WRITE', false) +--- +- ok +... +errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0) +--- +- ok +... +fiber.sleep(0.01) +--- +... +s:drop() +--- +... test_run:cmd("clear filter") --- - true diff --git a/test/vinyl/errinj_stat.test.lua b/test/vinyl/errinj_stat.test.lua index 3556f50a..fd80b382 100644 --- a/test/vinyl/errinj_stat.test.lua +++ b/test/vinyl/errinj_stat.test.lua @@ -42,6 +42,42 @@ while i:stat().disk.compaction.count < 2 do fiber.sleep(0.01) end i:stat().disk.compaction.queue -- none s:drop() +-- +-- Check task statistics. +-- +box.stat.reset() +s = box.schema.space.create('test', {engine = 'vinyl'}) +_ = s:create_index('pk') +errinj.set('ERRINJ_VY_RUN_WRITE_DELAY', true) +s:replace{1} +c = fiber.channel(1) +_ = fiber.create(function() box.snapshot() c:put(true) end) +fiber.sleep(0.01) +stat = box.stat.vinyl().scheduler +stat.tasks_inprogress > 0 +stat.tasks_completed == 0 +stat.tasks_failed == 0 +box.stat.reset() -- doesn't affect tasks_inprogress +box.stat.vinyl().scheduler.tasks_inprogress > 0 +errinj.set('ERRINJ_VY_RUN_WRITE_DELAY', false) +c:get() +stat = box.stat.vinyl().scheduler +stat.tasks_inprogress == 0 +stat.tasks_completed == 1 +stat.tasks_failed == 0 +errinj.set('ERRINJ_VY_RUN_WRITE', true) +errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.01) +s:replace{2} +box.snapshot() +stat = box.stat.vinyl().scheduler +stat.tasks_inprogress == 0 +stat.tasks_completed == 1 +stat.tasks_failed > 0 +errinj.set('ERRINJ_VY_RUN_WRITE', false) +errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0) +fiber.sleep(0.01) +s:drop() + test_run:cmd("clear filter") test_run:cmd('switch default') test_run:cmd('stop server test') diff --git a/test/vinyl/stat.result b/test/vinyl/stat.result index 3050e43d..6b71d4c5 100644 --- a/test/vinyl/stat.result +++ b/test/vinyl/stat.result @@ -242,10 +242,13 @@ gstat() data: 0 index: 0 scheduler: - compaction_queue: 0 - dump_count: 0 compaction_output: 0 + tasks_inprogress: 0 dump_output: 0 + tasks_completed: 0 + dump_count: 0 + tasks_failed: 0 + compaction_queue: 0 dump_input: 0 compaction_input: 0 ... @@ -1092,10 +1095,13 @@ gstat() data: 104300 index: 1190 scheduler: - compaction_queue: 0 - dump_count: 0 compaction_output: 0 + tasks_inprogress: 0 dump_output: 0 + tasks_completed: 0 + dump_count: 0 + tasks_failed: 0 + compaction_queue: 0 dump_input: 0 compaction_input: 0 ... @@ -1124,8 +1130,9 @@ box.snapshot() ... stat_diff(gstat(), st, 'scheduler') --- -- dump_count: 1 - dump_input: 104200 +- dump_input: 104200 + dump_count: 1 + tasks_completed: 2 dump_output: 103592 ... for i = 1, 100, 10 do s:replace{i, i, string.rep('y', 1000)} end @@ -1140,8 +1147,9 @@ box.snapshot() ... stat_diff(gstat(), st, 'scheduler') --- -- dump_count: 1 - dump_input: 10420 +- dump_input: 10420 + dump_count: 1 + tasks_completed: 2 dump_output: 10371 ... st = gstat() @@ -1156,6 +1164,7 @@ while i1:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end stat_diff(gstat(), st, 'scheduler') --- - compaction_input: 112188 + tasks_completed: 1 compaction_output: 101984 ... st = gstat() @@ -1170,6 +1179,7 @@ while i2:stat().disk.compaction.count == 0 do fiber.sleep(0.01) end stat_diff(gstat(), st, 'scheduler') --- - compaction_input: 1775 + tasks_completed: 1 compaction_output: 1608 ... s:drop() -- 2.11.0