[Tarantool-patches] [PATCH v2 2/2] vinyl: unthrottle scheduler on manual checkpoint

Nikita Pettik korablev at tarantool.org
Tue Apr 28 04:03:43 MSK 2020


Before this patch box.snapshot() bails out immediately if it sees
that the scheduler is throttled due to errors. For instance:

box.error.injection.set('ERRINJ_VY_RUN_WRITE', true)
snapshot() -- fails due to ERRINJ_VY_RUN_WRITE
box.error.injection.set('ERRINJ_VY_RUN_WRITE', false)
snapshot() -- still fails despite the fact that injected error is unset

As a result, one has to wait up to a minute to make a snapshot. The
reason why throttling was introduced was to avoid flooding the log
in case of repeating disk errors.
What is more, to deal with schedule throttling in tests, we had to
introduce a new error injection (ERRINJ_VY_SCHED_TIMEOUT). It reduces
time duration during which the scheduler remains throttled, which is
ugly and race prone.  So, let's unthrottle scheduler when checkpoint
process is launched via manual box.snapshot() invocation.

Closes #3519
---
 src/box/vinyl.c                  |  2 +-
 src/box/vy_scheduler.c           | 21 ++++++++++++++-------
 src/box/vy_scheduler.h           |  2 +-
 test/box/errinj.result           |  8 --------
 test/box/errinj.test.lua         |  2 --
 test/vinyl/errinj.result         |  8 --------
 test/vinyl/errinj.test.lua       |  3 ---
 test/vinyl/errinj_stat.result    |  8 --------
 test/vinyl/errinj_stat.test.lua  |  2 --
 test/vinyl/errinj_vylog.result   | 28 ----------------------------
 test/vinyl/errinj_vylog.test.lua |  8 --------
 11 files changed, 16 insertions(+), 76 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 2a01322f7..055e88f0f 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -2738,7 +2738,7 @@ vinyl_engine_begin_checkpoint(struct engine *engine, bool is_scheduled)
 	 */
 	if (lsregion_used(&env->mem_env.allocator) == 0)
 		return 0;
-	if (vy_scheduler_begin_checkpoint(&env->scheduler) != 0)
+	if (vy_scheduler_begin_checkpoint(&env->scheduler, is_scheduled) != 0)
 		return -1;
 	return 0;
 }
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index bf4c3fe58..cf58d5f60 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -681,22 +681,29 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler)
 }
 
 int
-vy_scheduler_begin_checkpoint(struct vy_scheduler *scheduler)
+vy_scheduler_begin_checkpoint(struct vy_scheduler *scheduler, bool is_scheduled)
 {
 	assert(!scheduler->checkpoint_in_progress);
 
 	/*
-	 * If the scheduler is throttled due to errors, do not wait
+	 * If checkpoint is manually launched (via box.snapshot())
+	 * then ignore throttling and force dump process. Otherwise,
+	 * if the scheduler is throttled due to errors, do not wait
 	 * until it wakes up as it may take quite a while. Instead
 	 * fail checkpoint immediately with the last error seen by
 	 * the scheduler.
 	 */
 	if (scheduler->is_throttled) {
-		struct error *e = diag_last_error(&scheduler->diag);
-		diag_set_error(diag_get(), e);
-		say_error("cannot checkpoint vinyl, "
-			  "scheduler is throttled with: %s", e->errmsg);
-		return -1;
+		if (is_scheduled) {
+			struct error *e = diag_last_error(&scheduler->diag);
+			diag_set_error(diag_get(), e);
+			say_error("cannot checkpoint vinyl, "
+				  "scheduler is throttled with: %s", e->errmsg);
+			return -1;
+		}
+		say_info("scheduler is unthrottled due to manual checkpoint "
+			 "process");
+		scheduler->is_throttled = false;
 	}
 
 	if (!vy_scheduler_dump_in_progress(scheduler)) {
diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h
index bc953975e..42e7b2f2f 100644
--- a/src/box/vy_scheduler.h
+++ b/src/box/vy_scheduler.h
@@ -225,7 +225,7 @@ vy_scheduler_force_compaction(struct vy_scheduler *scheduler,
  * after that.
  */
 int
-vy_scheduler_begin_checkpoint(struct vy_scheduler *);
+vy_scheduler_begin_checkpoint(struct vy_scheduler *, bool);
 
 /**
  * Wait for checkpoint. Please call vy_scheduler_end_checkpoint()
diff --git a/test/box/errinj.result b/test/box/errinj.result
index de877b708..e506e9d73 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -1585,10 +1585,6 @@ errinj.set('ERRINJ_VY_GC', true)
 ---
 - ok
 ...
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.001)
----
-- ok
-...
 errinj.set('ERRINJ_VY_RUN_FILE_RENAME', true)
 ---
 - ok
@@ -1625,10 +1621,6 @@ errinj.set('ERRINJ_VY_INDEX_FILE_RENAME', false)
 ---
 - ok
 ...
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
----
-- ok
-...
 errinj.set('ERRINJ_VY_GC', false)
 ---
 - ok
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index 5d8f4c635..bb76e0791 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -575,7 +575,6 @@ box.snapshot()
 errinj.set('ERRINJ_VY_LOG_FILE_RENAME', false)
 
 errinj.set('ERRINJ_VY_GC', true)
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.001)
 
 errinj.set('ERRINJ_VY_RUN_FILE_RENAME', true)
 box.space.test:insert{1}
@@ -590,7 +589,6 @@ box.space.test:insert{2}
 box.snapshot() -- error
 errinj.set('ERRINJ_VY_INDEX_FILE_RENAME', false)
 
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
 errinj.set('ERRINJ_VY_GC', false)
 
 test_run:cmd('restart server default')
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 2bd701f70..bf49f4e46 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -10,10 +10,6 @@ fiber = require('fiber')
 errinj = box.error.injection
 ---
 ...
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0.040)
----
-- ok
-...
 --
 -- Lost data in case of dump error
 --
@@ -361,10 +357,6 @@ box.snapshot();
 s:drop()
 ---
 ...
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
----
-- ok
-...
 --
 -- Check that upsert squash fiber does not crash if index or
 -- in-memory tree is gone.
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index 750d3bfe8..2c8121364 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -2,7 +2,6 @@ test_run = require('test_run').new()
 fio = require('fio')
 fiber = require('fiber')
 errinj = box.error.injection
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0.040)
 --
 -- Lost data in case of dump error
 --
@@ -126,8 +125,6 @@ box.snapshot();
 #s:select({1})
 s:drop()
 
-errinj.set("ERRINJ_VY_SCHED_TIMEOUT", 0)
-
 --
 -- Check that upsert squash fiber does not crash if index or
 -- in-memory tree is gone.
diff --git a/test/vinyl/errinj_stat.result b/test/vinyl/errinj_stat.result
index a8b5e0312..38c27a924 100644
--- a/test/vinyl/errinj_stat.result
+++ b/test/vinyl/errinj_stat.result
@@ -214,10 +214,6 @@ errinj.set('ERRINJ_VY_RUN_WRITE', true)
 ---
 - ok
 ...
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.01)
----
-- ok
-...
 s:replace{2}
 ---
 - [2]
@@ -241,10 +237,6 @@ errinj.set('ERRINJ_VY_RUN_WRITE', false)
 ---
 - ok
 ...
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
----
-- ok
-...
 test_run:wait_cond(function() return box.stat.vinyl().scheduler.tasks_completed > 1 end)
 ---
 - true
diff --git a/test/vinyl/errinj_stat.test.lua b/test/vinyl/errinj_stat.test.lua
index f331d9c2d..0d1e4af8e 100644
--- a/test/vinyl/errinj_stat.test.lua
+++ b/test/vinyl/errinj_stat.test.lua
@@ -66,14 +66,12 @@ stat.tasks_inprogress == 0
 stat.tasks_completed == 1
 stat.tasks_failed == 0
 errinj.set('ERRINJ_VY_RUN_WRITE', true)
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0.01)
 s:replace{2}
 box.snapshot()
 stat = box.stat.vinyl().scheduler
 stat.tasks_completed == 1
 stat.tasks_failed > 0
 errinj.set('ERRINJ_VY_RUN_WRITE', false)
-errinj.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
 test_run:wait_cond(function() return box.stat.vinyl().scheduler.tasks_completed > 1 end)
 box.snapshot()
 i:compact()
diff --git a/test/vinyl/errinj_vylog.result b/test/vinyl/errinj_vylog.result
index 8fb5deda4..b9ae9332e 100644
--- a/test/vinyl/errinj_vylog.result
+++ b/test/vinyl/errinj_vylog.result
@@ -52,13 +52,6 @@ _ = s:create_index('pk')
 _ = s:insert{1, 'x'}
 ---
 ...
-SCHED_TIMEOUT = 0.05
----
-...
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
----
-- ok
-...
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true);
 ---
 - ok
@@ -71,13 +64,6 @@ box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
 ---
 - ok
 ...
-fiber.sleep(2 * SCHED_TIMEOUT)
----
-...
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
----
-- ok
-...
 _ = s:insert{2, 'y'}
 ---
 ...
@@ -161,24 +147,10 @@ s2.index.pk:drop()
 ---
 ...
 -- pending records must not be rolled back on error
-SCHED_TIMEOUT = 0.05
----
-...
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
----
-- ok
-...
 box.snapshot() -- error
 ---
 - error: Error injection 'vinyl log flush'
 ...
-fiber.sleep(2 * SCHED_TIMEOUT)
----
-...
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
----
-- ok
-...
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
 ---
 - ok
diff --git a/test/vinyl/errinj_vylog.test.lua b/test/vinyl/errinj_vylog.test.lua
index 5ec58682e..4401f3015 100644
--- a/test/vinyl/errinj_vylog.test.lua
+++ b/test/vinyl/errinj_vylog.test.lua
@@ -27,15 +27,11 @@ s = box.schema.space.create('test', {engine = 'vinyl'})
 _ = s:create_index('pk')
 _ = s:insert{1, 'x'}
 
-SCHED_TIMEOUT = 0.05
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', true);
 
 box.snapshot()
 
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
-fiber.sleep(2 * SCHED_TIMEOUT)
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
 
 _ = s:insert{2, 'y'}
 
@@ -79,11 +75,7 @@ _ = s1:insert{3, 'c'}
 s2.index.pk:drop()
 
 -- pending records must not be rolled back on error
-SCHED_TIMEOUT = 0.05
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', SCHED_TIMEOUT)
 box.snapshot() -- error
-fiber.sleep(2 * SCHED_TIMEOUT)
-box.error.injection.set('ERRINJ_VY_SCHED_TIMEOUT', 0)
 
 box.error.injection.set('ERRINJ_VY_LOG_FLUSH', false);
 
-- 
2.17.1



More information about the Tarantool-patches mailing list