From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp54.i.mail.ru (smtp54.i.mail.ru [217.69.128.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 2A3854696C5 for ; Tue, 28 Apr 2020 04:03:47 +0300 (MSK) From: Nikita Pettik Date: Tue, 28 Apr 2020 04:03:43 +0300 Message-Id: <42d8197eeca37e6bfe297daf7c12d444486f1eee.1588035071.git.korablev@tarantool.org> In-Reply-To: References: In-Reply-To: References: Subject: [Tarantool-patches] [PATCH v2 2/2] vinyl: unthrottle scheduler on manual checkpoint List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org Cc: v.shpilevoy@tarantool.org Before this patch box.snapshot() bails out immediately if it sees that the scheduler is throttled due to errors. For instance: box.error.injection.set('ERRINJ_VY_RUN_WRITE', true) snapshot() -- fails due to ERRINJ_VY_RUN_WRITE box.error.injection.set('ERRINJ_VY_RUN_WRITE', false) snapshot() -- still fails despite the fact that injected error is unset As a result, one has to wait up to a minute to make a snapshot. The reason why throttling was introduced was to avoid flooding the log in case of repeating disk errors. What is more, to deal with schedule throttling in tests, we had to introduce a new error injection (ERRINJ_VY_SCHED_TIMEOUT). It reduces time duration during which the scheduler remains throttled, which is ugly and race prone. So, let's unthrottle scheduler when checkpoint process is launched via manual box.snapshot() invocation. Closes #3519 --- src/box/vinyl.c | 2 +- src/box/vy_scheduler.c | 21 ++++++++++++++------- src/box/vy_scheduler.h | 2 +- test/box/errinj.result | 8 -------- test/box/errinj.test.lua | 2 -- test/vinyl/errinj.result | 8 -------- test/vinyl/errinj.test.lua | 3 --- test/vinyl/errinj_stat.result | 8 -------- test/vinyl/errinj_stat.test.lua | 2 -- test/vinyl/errinj_vylog.result | 28 ---------------------------- test/vinyl/errinj_vylog.test.lua | 8 -------- 11 files changed, 16 insertions(+), 76 deletions(-) diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 2a01322f7..055e88f0f 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -2738,7 +2738,7 @@ vinyl_engine_begin_checkpoint(struct engine *engine, bool is_scheduled) */ if (lsregion_used(&env->mem_env.allocator) == 0) return 0; - if (vy_scheduler_begin_checkpoint(&env->scheduler) != 0) + if (vy_scheduler_begin_checkpoint(&env->scheduler, is_scheduled) != 0) return -1; return 0; } diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index bf4c3fe58..cf58d5f60 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -681,22 +681,29 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler) } int -vy_scheduler_begin_checkpoint(struct vy_scheduler *scheduler) +vy_scheduler_begin_checkpoint(struct vy_scheduler *scheduler, bool is_scheduled) { assert(!scheduler->checkpoint_in_progress); /* - * If the scheduler is throttled due to errors, do not wait + * If checkpoint is manually launched (via box.snapshot()) + * then ignore throttling and force dump process. Otherwise, + * if the scheduler is throttled due to errors, do not wait * until it wakes up as it may take quite a while. Instead * fail checkpoint immediately with the last error seen by * the scheduler. */ if (scheduler->is_throttled) { - struct error *e = diag_last_error(&scheduler->diag); - diag_set_error(diag_get(), e); - say_error("cannot checkpoint vinyl, " - "scheduler is throttled with: %s", e->errmsg); - return -1; + if (is_scheduled) { + struct error *e = diag_last_error(&scheduler->diag); + diag_set_error(diag_get(), e); + say_error("cannot checkpoint vinyl, " + "scheduler is throttled with: %s", e->errmsg); + return -1; + } + say_info("scheduler is unthrottled due to manual checkpoint " + "process"); + scheduler->is_throttled = false; } if (!vy_scheduler_dump_in_progress(scheduler)) { diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h index bc953975e..42e7b2f2f 100644 --- a/src/box/vy_scheduler.h +++ b/src/box/vy_scheduler.h @@ -225,7 +225,7 @@ vy_scheduler_force_compaction(struct vy_scheduler *scheduler, * after that. */ int -vy_scheduler_begin_checkpoint(struct vy_scheduler *); +vy_scheduler_begin_checkpoint(struct vy_scheduler *, bool); /** * Wait for checkpoint. Please call vy_scheduler_end_checkpoint() diff --git a/test/box/errinj.result b/test/box/errinj.result index de877b708..e506e9d73 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -1585,10 +1585,6 @@ errinj.set('ERRINJ_VY_GC', true) --- - ok ... -errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.001) ---- -- ok -... errinj.set('ERRINJ_VY_RUN_FILE_RENAME', true) --- - ok @@ -1625,10 +1621,6 @@ errinj.set('ERRINJ_VY_INDEX_FILE_RENAME', false) --- - ok ... -errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0) ---- -- ok -... errinj.set('ERRINJ_VY_GC', false) --- - ok diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua index 5d8f4c635..bb76e0791 100644 --- a/test/box/errinj.test.lua +++ b/test/box/errinj.test.lua @@ -575,7 +575,6 @@ box.snapshot() errinj.set('ERRINJ_VY_LOG_FILE_RENAME', false) errinj.set('ERRINJ_VY_GC', true) -errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.001) errinj.set('ERRINJ_VY_RUN_FILE_RENAME', true) box.space.test:insert{1} @@ -590,7 +589,6 @@ box.space.test:insert{2} box.snapshot() -- error errinj.set('ERRINJ_VY_INDEX_FILE_RENAME', false) -errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0) errinj.set('ERRINJ_VY_GC', false) test_run:cmd('restart server default') diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result index 2bd701f70..bf49f4e46 100644 --- a/test/vinyl/errinj.result +++ b/test/vinyl/errinj.result @@ -10,10 +10,6 @@ fiber = require('fiber') errinj = box.error.injection --- ... -errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0.040) ---- -- ok -... -- -- Lost data in case of dump error -- @@ -361,10 +357,6 @@ box.snapshot(); s:drop() --- ... -errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0) ---- -- ok -... -- -- Check that upsert squash fiber does not crash if index or -- in-memory tree is gone. diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua index 750d3bfe8..2c8121364 100644 --- a/test/vinyl/errinj.test.lua +++ b/test/vinyl/errinj.test.lua @@ -2,7 +2,6 @@ test_run = require('test_run').new() fio = require('fio') fiber = require('fiber') errinj = box.error.injection -errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0.040) -- -- Lost data in case of dump error -- @@ -126,8 +125,6 @@ box.snapshot(); #s:select({1}) s:drop() -errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0) - -- -- Check that upsert squash fiber does not crash if index or -- in-memory tree is gone. diff --git a/test/vinyl/errinj_stat.result b/test/vinyl/errinj_stat.result index a8b5e0312..38c27a924 100644 --- a/test/vinyl/errinj_stat.result +++ b/test/vinyl/errinj_stat.result @@ -214,10 +214,6 @@ errinj.set('ERRINJ_VY_RUN_WRITE', true) --- - ok ... -errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.01) ---- -- ok -... s:replace{2} --- - [2] @@ -241,10 +237,6 @@ errinj.set('ERRINJ_VY_RUN_WRITE', false) --- - ok ... -errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0) ---- -- ok -... test_run:wait_cond(function() return box.stat.vinyl().scheduler.tasks_completed > 1 end) --- - true diff --git a/test/vinyl/errinj_stat.test.lua b/test/vinyl/errinj_stat.test.lua index f331d9c2d..0d1e4af8e 100644 --- a/test/vinyl/errinj_stat.test.lua +++ b/test/vinyl/errinj_stat.test.lua @@ -66,14 +66,12 @@ stat.tasks_inprogress == 0 stat.tasks_completed == 1 stat.tasks_failed == 0 errinj.set('ERRINJ_VY_RUN_WRITE', true) -errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.01) s:replace{2} box.snapshot() stat = box.stat.vinyl().scheduler stat.tasks_completed == 1 stat.tasks_failed > 0 errinj.set('ERRINJ_VY_RUN_WRITE', false) -errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0) test_run:wait_cond(function() return box.stat.vinyl().scheduler.tasks_completed > 1 end) box.snapshot() i:compact() diff --git a/test/vinyl/errinj_vylog.result b/test/vinyl/errinj_vylog.result index 8fb5deda4..b9ae9332e 100644 --- a/test/vinyl/errinj_vylog.result +++ b/test/vinyl/errinj_vylog.result @@ -52,13 +52,6 @@ _ = s:create_index('pk') _ = s:insert{1, 'x'} --- ... -SCHED_TIMEOUT = 0.05 ---- -... -box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT) ---- -- ok -... box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true); --- - ok @@ -71,13 +64,6 @@ box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false); --- - ok ... -fiber.sleep(2 * SCHED_TIMEOUT) ---- -... -box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0) ---- -- ok -... _ = s:insert{2, 'y'} --- ... @@ -161,24 +147,10 @@ s2.index.pk:drop() --- ... -- pending records must not be rolled back on error -SCHED_TIMEOUT = 0.05 ---- -... -box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT) ---- -- ok -... box.snapshot() -- error --- - error: Error injection 'vinyl log flush' ... -fiber.sleep(2 * SCHED_TIMEOUT) ---- -... -box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0) ---- -- ok -... box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false); --- - ok diff --git a/test/vinyl/errinj_vylog.test.lua b/test/vinyl/errinj_vylog.test.lua index 5ec58682e..4401f3015 100644 --- a/test/vinyl/errinj_vylog.test.lua +++ b/test/vinyl/errinj_vylog.test.lua @@ -27,15 +27,11 @@ s = box.schema.space.create('test', {engine = 'vinyl'}) _ = s:create_index('pk') _ = s:insert{1, 'x'} -SCHED_TIMEOUT = 0.05 -box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT) box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true); box.snapshot() box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false); -fiber.sleep(2 * SCHED_TIMEOUT) -box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0) _ = s:insert{2, 'y'} @@ -79,11 +75,7 @@ _ = s1:insert{3, 'c'} s2.index.pk:drop() -- pending records must not be rolled back on error -SCHED_TIMEOUT = 0.05 -box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT) box.snapshot() -- error -fiber.sleep(2 * SCHED_TIMEOUT) -box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0) box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false); -- 2.17.1