Tarantool development patches archive
 help / color / mirror / Atom feed
From: Nikita Pettik <korablev@tarantool.org>
To: tarantool-patches@dev.tarantool.org
Cc: v.shpilevoy@tarantool.org
Subject: [Tarantool-patches] [PATCH v2 2/2] vinyl: unthrottle scheduler on manual checkpoint
Date: Tue, 28 Apr 2020 04:03:43 +0300	[thread overview]
Message-ID: <42d8197eeca37e6bfe297daf7c12d444486f1eee.1588035071.git.korablev@tarantool.org> (raw)
In-Reply-To: <cover.1588035071.git.korablev@tarantool.org>
In-Reply-To: <cover.1588035071.git.korablev@tarantool.org>

Before this patch box.snapshot() bails out immediately if it sees
that the scheduler is throttled due to errors. For instance:

box.error.injection.set('ERRINJ_VY_RUN_WRITE', true)
snapshot() -- fails due to ERRINJ_VY_RUN_WRITE
box.error.injection.set('ERRINJ_VY_RUN_WRITE', false)
snapshot() -- still fails despite the fact that injected error is unset

As a result, one has to wait up to a minute to make a snapshot. The
reason why throttling was introduced was to avoid flooding the log
in case of repeating disk errors.
What is more, to deal with schedule throttling in tests, we had to
introduce a new error injection (ERRINJ_VY_SCHED_TIMEOUT). It reduces
time duration during which the scheduler remains throttled, which is
ugly and race prone.  So, let's unthrottle scheduler when checkpoint
process is launched via manual box.snapshot() invocation.

Closes #3519
---
 src/box/vinyl.c                  |  2 +-
 src/box/vy_scheduler.c           | 21 ++++++++++++++-------
 src/box/vy_scheduler.h           |  2 +-
 test/box/errinj.result           |  8 --------
 test/box/errinj.test.lua         |  2 --
 test/vinyl/errinj.result         |  8 --------
 test/vinyl/errinj.test.lua       |  3 ---
 test/vinyl/errinj_stat.result    |  8 --------
 test/vinyl/errinj_stat.test.lua  |  2 --
 test/vinyl/errinj_vylog.result   | 28 ----------------------------
 test/vinyl/errinj_vylog.test.lua |  8 --------
 11 files changed, 16 insertions(+), 76 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 2a01322f7..055e88f0f 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -2738,7 +2738,7 @@ vinyl_engine_begin_checkpoint(struct engine *engine, bool is_scheduled)
 	 */
 	if (lsregion_used(&env->mem_env.allocator) == 0)
 		return 0;
-	if (vy_scheduler_begin_checkpoint(&env->scheduler) != 0)
+	if (vy_scheduler_begin_checkpoint(&env->scheduler, is_scheduled) != 0)
 		return -1;
 	return 0;
 }
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index bf4c3fe58..cf58d5f60 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -681,22 +681,29 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler)
 }
 
 int
-vy_scheduler_begin_checkpoint(struct vy_scheduler *scheduler)
+vy_scheduler_begin_checkpoint(struct vy_scheduler *scheduler, bool is_scheduled)
 {
 	assert(!scheduler->checkpoint_in_progress);
 
 	/*
-	 * If the scheduler is throttled due to errors, do not wait
+	 * If checkpoint is manually launched (via box.snapshot())
+	 * then ignore throttling and force dump process. Otherwise,
+	 * if the scheduler is throttled due to errors, do not wait
 	 * until it wakes up as it may take quite a while. Instead
 	 * fail checkpoint immediately with the last error seen by
 	 * the scheduler.
 	 */
 	if (scheduler->is_throttled) {
-		struct error *e = diag_last_error(&scheduler->diag);
-		diag_set_error(diag_get(), e);
-		say_error("cannot checkpoint vinyl, "
-			  "scheduler is throttled with: %s", e->errmsg);
-		return -1;
+		if (is_scheduled) {
+			struct error *e = diag_last_error(&scheduler->diag);
+			diag_set_error(diag_get(), e);
+			say_error("cannot checkpoint vinyl, "
+				  "scheduler is throttled with: %s", e->errmsg);
+			return -1;
+		}
+		say_info("scheduler is unthrottled due to manual checkpoint "
+			 "process");
+		scheduler->is_throttled = false;
 	}
 
 	if (!vy_scheduler_dump_in_progress(scheduler)) {
diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h
index bc953975e..42e7b2f2f 100644
--- a/src/box/vy_scheduler.h
+++ b/src/box/vy_scheduler.h
@@ -225,7 +225,7 @@ vy_scheduler_force_compaction(struct vy_scheduler *scheduler,
  * after that.
  */
 int
-vy_scheduler_begin_checkpoint(struct vy_scheduler *);
+vy_scheduler_begin_checkpoint(struct vy_scheduler *, bool);
 
 /**
  * Wait for checkpoint. Please call vy_scheduler_end_checkpoint()
diff --git a/test/box/errinj.result b/test/box/errinj.result
index de877b708..e506e9d73 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -1585,10 +1585,6 @@ errinj.set('ERRINJ_VY_GC', true)
 ---
 - ok
 ...
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.001)
----
-- ok
-...
 errinj.set('ERRINJ_VY_RUN_FILE_RENAME', true)
 ---
 - ok
@@ -1625,10 +1621,6 @@ errinj.set('ERRINJ_VY_INDEX_FILE_RENAME', false)
 ---
 - ok
 ...
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
----
-- ok
-...
 errinj.set('ERRINJ_VY_GC', false)
 ---
 - ok
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index 5d8f4c635..bb76e0791 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -575,7 +575,6 @@ box.snapshot()
 errinj.set('ERRINJ_VY_LOG_FILE_RENAME', false)
 
 errinj.set('ERRINJ_VY_GC', true)
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.001)
 
 errinj.set('ERRINJ_VY_RUN_FILE_RENAME', true)
 box.space.test:insert{1}
@@ -590,7 +589,6 @@ box.space.test:insert{2}
 box.snapshot() -- error
 errinj.set('ERRINJ_VY_INDEX_FILE_RENAME', false)
 
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
 errinj.set('ERRINJ_VY_GC', false)
 
 test_run:cmd('restart server default')
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 2bd701f70..bf49f4e46 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -10,10 +10,6 @@ fiber = require('fiber')
 errinj = box.error.injection
 ---
 ...
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0.040)
----
-- ok
-...
 --
 -- Lost data in case of dump error
 --
@@ -361,10 +357,6 @@ box.snapshot();
 s:drop()
 ---
 ...
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
----
-- ok
-...
 --
 -- Check that upsert squash fiber does not crash if index or
 -- in-memory tree is gone.
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 750d3bfe8..2c8121364 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -2,7 +2,6 @@ test_run = require('test_run').new()
 fio = require('fio')
 fiber = require('fiber')
 errinj = box.error.injection
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0.040)
 --
 -- Lost data in case of dump error
 --
@@ -126,8 +125,6 @@ box.snapshot();
 #s:select({1})
 s:drop()
 
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
-
 --
 -- Check that upsert squash fiber does not crash if index or
 -- in-memory tree is gone.
diff --git a/test/vinyl/errinj_stat.result b/test/vinyl/errinj_stat.result
index a8b5e0312..38c27a924 100644
--- a/test/vinyl/errinj_stat.result
+++ b/test/vinyl/errinj_stat.result
@@ -214,10 +214,6 @@ errinj.set('ERRINJ_VY_RUN_WRITE', true)
 ---
 - ok
 ...
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.01)
----
-- ok
-...
 s:replace{2}
 ---
 - [2]
@@ -241,10 +237,6 @@ errinj.set('ERRINJ_VY_RUN_WRITE', false)
 ---
 - ok
 ...
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
----
-- ok
-...
 test_run:wait_cond(function() return box.stat.vinyl().scheduler.tasks_completed > 1 end)
 ---
 - true
diff --git a/test/vinyl/errinj_stat.test.lua b/test/vinyl/errinj_stat.test.lua
index f331d9c2d..0d1e4af8e 100644
--- a/test/vinyl/errinj_stat.test.lua
+++ b/test/vinyl/errinj_stat.test.lua
@@ -66,14 +66,12 @@ stat.tasks_inprogress == 0
 stat.tasks_completed == 1
 stat.tasks_failed == 0
 errinj.set('ERRINJ_VY_RUN_WRITE', true)
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.01)
 s:replace{2}
 box.snapshot()
 stat = box.stat.vinyl().scheduler
 stat.tasks_completed == 1
 stat.tasks_failed > 0
 errinj.set('ERRINJ_VY_RUN_WRITE', false)
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
 test_run:wait_cond(function() return box.stat.vinyl().scheduler.tasks_completed > 1 end)
 box.snapshot()
 i:compact()
diff --git a/test/vinyl/errinj_vylog.result b/test/vinyl/errinj_vylog.result
index 8fb5deda4..b9ae9332e 100644
--- a/test/vinyl/errinj_vylog.result
+++ b/test/vinyl/errinj_vylog.result
@@ -52,13 +52,6 @@ _ = s:create_index('pk')
 _ = s:insert{1, 'x'}
 ---
 ...
-SCHED_TIMEOUT = 0.05
----
-...
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
----
-- ok
-...
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true);
 ---
 - ok
@@ -71,13 +64,6 @@ box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
 ---
 - ok
 ...
-fiber.sleep(2 * SCHED_TIMEOUT)
----
-...
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
----
-- ok
-...
 _ = s:insert{2, 'y'}
 ---
 ...
@@ -161,24 +147,10 @@ s2.index.pk:drop()
 ---
 ...
 -- pending records must not be rolled back on error
-SCHED_TIMEOUT = 0.05
----
-...
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
----
-- ok
-...
 box.snapshot() -- error
 ---
 - error: Error injection 'vinyl log flush'
 ...
-fiber.sleep(2 * SCHED_TIMEOUT)
----
-...
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
----
-- ok
-...
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
 ---
 - ok
diff --git a/test/vinyl/errinj_vylog.test.lua b/test/vinyl/errinj_vylog.test.lua
index 5ec58682e..4401f3015 100644
--- a/test/vinyl/errinj_vylog.test.lua
+++ b/test/vinyl/errinj_vylog.test.lua
@@ -27,15 +27,11 @@ s = box.schema.space.create('test', {engine = 'vinyl'})
 _ = s:create_index('pk')
 _ = s:insert{1, 'x'}
 
-SCHED_TIMEOUT = 0.05
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true);
 
 box.snapshot()
 
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
-fiber.sleep(2 * SCHED_TIMEOUT)
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
 
 _ = s:insert{2, 'y'}
 
@@ -79,11 +75,7 @@ _ = s1:insert{3, 'c'}
 s2.index.pk:drop()
 
 -- pending records must not be rolled back on error
-SCHED_TIMEOUT = 0.05
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
 box.snapshot() -- error
-fiber.sleep(2 * SCHED_TIMEOUT)
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
 
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
 
-- 
2.17.1

  parent reply	other threads:[~2020-04-28  1:03 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-28  1:03 [Tarantool-patches] [PATCH v2 0/2] Unthrottle vinyl " Nikita Pettik
2020-04-28  1:03 ` [Tarantool-patches] [PATCH v2 1/2] engine: add is_scheduled arg to engine->begin_checkpoint Nikita Pettik
2020-04-28  1:03 ` Nikita Pettik [this message]
2020-05-03 17:01 ` [Tarantool-patches] [PATCH v2 0/2] Unthrottle vinyl scheduler on manual checkpoint Vladislav Shpilevoy
2020-05-27 13:40   ` Nikita Pettik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=42d8197eeca37e6bfe297daf7c12d444486f1eee.1588035071.git.korablev@tarantool.org \
    --to=korablev@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v2 2/2] vinyl: unthrottle scheduler on manual checkpoint' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox