[Tarantool-patches] [PATCH 2/2] vinyl: drop wasted runs in case range recovery fails

Nikita Pettik korablev at tarantool.org
Thu Apr 30 22:27:51 MSK 2020


If recovery process fails during range restoration, range itself is
deleted and recovery is assumed to be finished as failed (in case of
casual i.e. not forced recovery). During recovery of particular range,
runs to be restored are reffed twice: once when they are created at
vy_run_new() and once when they are attached to slice. This fact is
taken into consideration and after all ranges are recovered all runs of
lsm tree are unreffed so that slices own run resources. However, if
range recovery fails, it is dropped alongside with slices which in turn
results in unreffing runs - this is not accounted. In this case, once
again unreffing such runs would lead to their destruction. On the other
hand, iteration over runs may turn out to be unsafe, so we should use
rlist_foreach_entry_safe(). Moreover, we should explicitly unaccount
these runs calling vy_lsm_remove_run().

Closes #4805
---
 src/box/vy_lsm.c                              |  14 ++-
 src/box/vy_run.c                              |   4 +
 src/errinj.h                                  |   1 +
 test/box/errinj.result                        |   1 +
 test/vinyl/errinj_recovery.lua                |  10 ++
 .../gh-4805-open-run-err-recovery.result      | 101 ++++++++++++++++++
 .../gh-4805-open-run-err-recovery.test.lua    |  38 +++++++
 test/vinyl/suite.ini                          |   2 +-
 8 files changed, 167 insertions(+), 4 deletions(-)
 create mode 100644 test/vinyl/errinj_recovery.lua
 create mode 100644 test/vinyl/gh-4805-open-run-err-recovery.result
 create mode 100644 test/vinyl/gh-4805-open-run-err-recovery.test.lua

diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c
index 3d3f41b7a..81b011c69 100644
--- a/src/box/vy_lsm.c
+++ b/src/box/vy_lsm.c
@@ -604,9 +604,17 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery,
 	 * of each recovered run. We need to drop the extra
 	 * references once we are done.
 	 */
-	struct vy_run *run;
-	rlist_foreach_entry(run, &lsm->runs, in_lsm) {
-		assert(run->refs > 1);
+	struct vy_run *run, *next_run;
+	rlist_foreach_entry_safe(run, &lsm->runs, in_lsm, next_run) {
+		/*
+		 * In case vy_lsm_recover_range() failed, slices
+		 * are already deleted and runs are unreffed. So
+		 * we have nothing to do but finish run clean-up.
+		 */
+		if (run->refs == 1) {
+			assert(rc != 0);
+			vy_lsm_remove_run(lsm, run);
+		}
 		vy_run_unref(run);
 	}
 
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index bd8a7a430..eeaeb88b4 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -1676,6 +1676,10 @@ vy_run_recover(struct vy_run *run, const char *dir,
 			    space_id, iid, run->id, VY_FILE_INDEX);
 
 	struct xlog_cursor cursor;
+	ERROR_INJECT_DELAYED(ERRINJ_VY_RUN_OPEN, 2, {
+		diag_set(SystemError, "failed to open '%s' file", path);
+		goto fail;
+	});
 	if (xlog_cursor_open(&cursor, path))
 		goto fail;
 
diff --git a/src/errinj.h b/src/errinj.h
index 990f4921d..273458d59 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -127,6 +127,7 @@ struct errinj {
 	_(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT, {.iparam = 0}) \
 	_(ERRINJ_INDEX_RESERVE, ERRINJ_BOOL, {.bparam = false})\
+	_(ERRINJ_VY_RUN_OPEN, ERRINJ_BOOL, {.bparam = false})\
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 8090deedc..785f6394b 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -70,6 +70,7 @@ evals
   - ERRINJ_VY_READ_PAGE_TIMEOUT: 0
   - ERRINJ_VY_RUN_DISCARD: false
   - ERRINJ_VY_RUN_FILE_RENAME: false
+  - ERRINJ_VY_RUN_OPEN: false
   - ERRINJ_VY_RUN_WRITE: false
   - ERRINJ_VY_RUN_WRITE_DELAY: false
   - ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT: 0
diff --git a/test/vinyl/errinj_recovery.lua b/test/vinyl/errinj_recovery.lua
new file mode 100644
index 000000000..925d12a85
--- /dev/null
+++ b/test/vinyl/errinj_recovery.lua
@@ -0,0 +1,10 @@
+#!/usr/bin/env tarantool
+
+box.error.injection.set('ERRINJ_VY_RUN_OPEN', true)
+assert(box.error.injection.get('ERRINJ_VY_RUN_OPEN'))
+
+box.cfg {
+    listen = os.getenv("LISTEN"),
+}
+
+require('console').listen(os.getenv('ADMIN'))
diff --git a/test/vinyl/gh-4805-open-run-err-recovery.result b/test/vinyl/gh-4805-open-run-err-recovery.result
new file mode 100644
index 000000000..31e0e7857
--- /dev/null
+++ b/test/vinyl/gh-4805-open-run-err-recovery.result
@@ -0,0 +1,101 @@
+-- test-run result file version 2
+fiber = require('fiber')
+ | ---
+ | ...
+test_run = require('test_run').new()
+ | ---
+ | ...
+
+test_run:cmd('create server err_recovery with script = "vinyl/errinj_recovery.lua"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server err_recovery')
+ | ---
+ | - true
+ | ...
+test_run:cmd('switch err_recovery')
+ | ---
+ | - true
+ | ...
+
+s = box.schema.space.create('test', {engine = 'vinyl'})
+ | ---
+ | ...
+_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
+ | ---
+ | ...
+
+digest = require('digest')
+ | ---
+ | ...
+test_run:cmd("setopt delimiter ';'")
+ | ---
+ | - true
+ | ...
+function dump(big)
+    local step = big and 1 or 5
+    for i = 1, 20, step do
+        s:replace{i, digest.urandom(1000)}
+    end
+    box.snapshot()
+end;
+ | ---
+ | ...
+test_run:cmd("setopt delimiter ''");
+ | ---
+ | - true
+ | ...
+
+dump(true)
+ | ---
+ | ...
+dump()
+ | ---
+ | ...
+dump()
+ | ---
+ | ...
+
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+test_run:cmd('stop server err_recovery')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server err_recovery with crash_expected=True')
+ | ---
+ | - false
+ | ...
+
+fio = require('fio')
+ | ---
+ | ...
+fh = fio.open(fio.pathjoin(fio.cwd(), 'errinj_recovery.log'), {'O_RDONLY'})
+ | ---
+ | ...
+size = fh:seek(0, 'SEEK_END')
+ | ---
+ | ...
+fh:seek(-256, 'SEEK_END') ~= nil
+ | ---
+ | - true
+ | ...
+line = fh:read(256)
+ | ---
+ | ...
+fh:close()
+ | ---
+ | - true
+ | ...
+string.match(line, 'failed to open') ~= nil
+ | ---
+ | - true
+ | ...
+
+test_run:cmd('delete server err_recovery')
+ | ---
+ | - true
+ | ...
diff --git a/test/vinyl/gh-4805-open-run-err-recovery.test.lua b/test/vinyl/gh-4805-open-run-err-recovery.test.lua
new file mode 100644
index 000000000..8b5895d41
--- /dev/null
+++ b/test/vinyl/gh-4805-open-run-err-recovery.test.lua
@@ -0,0 +1,38 @@
+fiber = require('fiber')
+test_run = require('test_run').new()
+
+test_run:cmd('create server err_recovery with script = "vinyl/errinj_recovery.lua"')
+test_run:cmd('start server err_recovery')
+test_run:cmd('switch err_recovery')
+
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
+
+digest = require('digest')
+test_run:cmd("setopt delimiter ';'")
+function dump(big)
+    local step = big and 1 or 5
+    for i = 1, 20, step do
+        s:replace{i, digest.urandom(1000)}
+    end
+    box.snapshot()
+end;
+test_run:cmd("setopt delimiter ''");
+
+dump(true)
+dump()
+dump()
+
+test_run:cmd('switch default')
+test_run:cmd('stop server err_recovery')
+test_run:cmd('start server err_recovery with crash_expected=True')
+
+fio = require('fio')
+fh = fio.open(fio.pathjoin(fio.cwd(), 'errinj_recovery.log'), {'O_RDONLY'})
+size = fh:seek(0, 'SEEK_END')
+fh:seek(-256, 'SEEK_END') ~= nil
+line = fh:read(256)
+fh:close()
+string.match(line, 'failed to open') ~= nil
+
+test_run:cmd('delete server err_recovery')
diff --git a/test/vinyl/suite.ini b/test/vinyl/suite.ini
index 1417d7156..ce02eb83d 100644
--- a/test/vinyl/suite.ini
+++ b/test/vinyl/suite.ini
@@ -2,7 +2,7 @@
 core = tarantool
 description = vinyl integration tests
 script = vinyl.lua
-release_disabled = errinj.test.lua errinj_ddl.test.lua errinj_gc.test.lua errinj_stat.test.lua errinj_tx.test.lua errinj_vylog.test.lua partial_dump.test.lua quota_timeout.test.lua recovery_quota.test.lua replica_rejoin.test.lua
+release_disabled = errinj.test.lua errinj_ddl.test.lua errinj_gc.test.lua errinj_stat.test.lua errinj_tx.test.lua errinj_vylog.test.lua partial_dump.test.lua quota_timeout.test.lua recovery_quota.test.lua replica_rejoin.test.lua gh-4805-open-run-err-recovery.test.lua
 config = suite.cfg
 lua_libs = suite.lua stress.lua large.lua txn_proxy.lua ../box/lua/utils.lua
 use_unix_sockets = True
-- 
2.17.1



More information about the Tarantool-patches mailing list