Tarantool development patches archive
 help / color / mirror / Atom feed
From: Nikita Pettik <korablev@tarantool.org>
To: tarantool-patches@dev.tarantool.org
Cc: v.shpilevoy@tarantool.org
Subject: [Tarantool-patches] [PATCH 2/2] vinyl: drop wasted runs in case range recovery fails
Date: Thu, 30 Apr 2020 22:27:51 +0300	[thread overview]
Message-ID: <5012bd8eb07b5379eb70aa777402c7cd566a3b34.1588273848.git.korablev@tarantool.org> (raw)
In-Reply-To: <cover.1588273848.git.korablev@tarantool.org>
In-Reply-To: <cover.1588273848.git.korablev@tarantool.org>

If recovery process fails during range restoration, range itself is
deleted and recovery is assumed to be finished as failed (in case of
casual i.e. not forced recovery). During recovery of particular range,
runs to be restored are reffed twice: once when they are created at
vy_run_new() and once when they are attached to slice. This fact is
taken into consideration and after all ranges are recovered all runs of
lsm tree are unreffed so that slices own run resources. However, if
range recovery fails, it is dropped alongside with slices which in turn
results in unreffing runs - this is not accounted. In this case, once
again unreffing such runs would lead to their destruction. On the other
hand, iteration over runs may turn out to be unsafe, so we should use
rlist_foreach_entry_safe(). Moreover, we should explicitly unaccount
these runs calling vy_lsm_remove_run().

Closes #4805
---
 src/box/vy_lsm.c                              |  14 ++-
 src/box/vy_run.c                              |   4 +
 src/errinj.h                                  |   1 +
 test/box/errinj.result                        |   1 +
 test/vinyl/errinj_recovery.lua                |  10 ++
 .../gh-4805-open-run-err-recovery.result      | 101 ++++++++++++++++++
 .../gh-4805-open-run-err-recovery.test.lua    |  38 +++++++
 test/vinyl/suite.ini                          |   2 +-
 8 files changed, 167 insertions(+), 4 deletions(-)
 create mode 100644 test/vinyl/errinj_recovery.lua
 create mode 100644 test/vinyl/gh-4805-open-run-err-recovery.result
 create mode 100644 test/vinyl/gh-4805-open-run-err-recovery.test.lua

diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c
index 3d3f41b7a..81b011c69 100644
--- a/src/box/vy_lsm.c
+++ b/src/box/vy_lsm.c
@@ -604,9 +604,17 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery,
 	 * of each recovered run. We need to drop the extra
 	 * references once we are done.
 	 */
-	struct vy_run *run;
-	rlist_foreach_entry(run, &lsm->runs, in_lsm) {
-		assert(run->refs > 1);
+	struct vy_run *run, *next_run;
+	rlist_foreach_entry_safe(run, &lsm->runs, in_lsm, next_run) {
+		/*
+		 * In case vy_lsm_recover_range() failed, slices
+		 * are already deleted and runs are unreffed. So
+		 * we have nothing to do but finish run clean-up.
+		 */
+		if (run->refs == 1) {
+			assert(rc != 0);
+			vy_lsm_remove_run(lsm, run);
+		}
 		vy_run_unref(run);
 	}
 
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index bd8a7a430..eeaeb88b4 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -1676,6 +1676,10 @@ vy_run_recover(struct vy_run *run, const char *dir,
 			    space_id, iid, run->id, VY_FILE_INDEX);
 
 	struct xlog_cursor cursor;
+	ERROR_INJECT_DELAYED(ERRINJ_VY_RUN_OPEN, 2, {
+		diag_set(SystemError, "failed to open '%s' file", path);
+		goto fail;
+	});
 	if (xlog_cursor_open(&cursor, path))
 		goto fail;
 
diff --git a/src/errinj.h b/src/errinj.h
index 990f4921d..273458d59 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -127,6 +127,7 @@ struct errinj {
 	_(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT, {.iparam = 0}) \
 	_(ERRINJ_INDEX_RESERVE, ERRINJ_BOOL, {.bparam = false})\
+	_(ERRINJ_VY_RUN_OPEN, ERRINJ_BOOL, {.bparam = false})\
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 8090deedc..785f6394b 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -70,6 +70,7 @@ evals
   - ERRINJ_VY_READ_PAGE_TIMEOUT: 0
   - ERRINJ_VY_RUN_DISCARD: false
   - ERRINJ_VY_RUN_FILE_RENAME: false
+  - ERRINJ_VY_RUN_OPEN: false
   - ERRINJ_VY_RUN_WRITE: false
   - ERRINJ_VY_RUN_WRITE_DELAY: false
   - ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT: 0
diff --git a/test/vinyl/errinj_recovery.lua b/test/vinyl/errinj_recovery.lua
new file mode 100644
index 000000000..925d12a85
--- /dev/null
+++ b/test/vinyl/errinj_recovery.lua
@@ -0,0 +1,10 @@
+#!/usr/bin/env tarantool
+
+box.error.injection.set('ERRINJ_VY_RUN_OPEN', true)
+assert(box.error.injection.get('ERRINJ_VY_RUN_OPEN'))
+
+box.cfg {
+    listen = os.getenv("LISTEN"),
+}
+
+require('console').listen(os.getenv('ADMIN'))
diff --git a/test/vinyl/gh-4805-open-run-err-recovery.result b/test/vinyl/gh-4805-open-run-err-recovery.result
new file mode 100644
index 000000000..31e0e7857
--- /dev/null
+++ b/test/vinyl/gh-4805-open-run-err-recovery.result
@@ -0,0 +1,101 @@
+-- test-run result file version 2
+fiber = require('fiber')
+ | ---
+ | ...
+test_run = require('test_run').new()
+ | ---
+ | ...
+
+test_run:cmd('create server err_recovery with script = "vinyl/errinj_recovery.lua"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server err_recovery')
+ | ---
+ | - true
+ | ...
+test_run:cmd('switch err_recovery')
+ | ---
+ | - true
+ | ...
+
+s = box.schema.space.create('test', {engine = 'vinyl'})
+ | ---
+ | ...
+_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
+ | ---
+ | ...
+
+digest = require('digest')
+ | ---
+ | ...
+test_run:cmd("setopt delimiter ';'")
+ | ---
+ | - true
+ | ...
+function dump(big)
+    local step = big and 1 or 5
+    for i = 1, 20, step do
+        s:replace{i, digest.urandom(1000)}
+    end
+    box.snapshot()
+end;
+ | ---
+ | ...
+test_run:cmd("setopt delimiter ''");
+ | ---
+ | - true
+ | ...
+
+dump(true)
+ | ---
+ | ...
+dump()
+ | ---
+ | ...
+dump()
+ | ---
+ | ...
+
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+test_run:cmd('stop server err_recovery')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server err_recovery with crash_expected=True')
+ | ---
+ | - false
+ | ...
+
+fio = require('fio')
+ | ---
+ | ...
+fh = fio.open(fio.pathjoin(fio.cwd(), 'errinj_recovery.log'), {'O_RDONLY'})
+ | ---
+ | ...
+size = fh:seek(0, 'SEEK_END')
+ | ---
+ | ...
+fh:seek(-256, 'SEEK_END') ~= nil
+ | ---
+ | - true
+ | ...
+line = fh:read(256)
+ | ---
+ | ...
+fh:close()
+ | ---
+ | - true
+ | ...
+string.match(line, 'failed to open') ~= nil
+ | ---
+ | - true
+ | ...
+
+test_run:cmd('delete server err_recovery')
+ | ---
+ | - true
+ | ...
diff --git a/test/vinyl/gh-4805-open-run-err-recovery.test.lua b/test/vinyl/gh-4805-open-run-err-recovery.test.lua
new file mode 100644
index 000000000..8b5895d41
--- /dev/null
+++ b/test/vinyl/gh-4805-open-run-err-recovery.test.lua
@@ -0,0 +1,38 @@
+fiber = require('fiber')
+test_run = require('test_run').new()
+
+test_run:cmd('create server err_recovery with script = "vinyl/errinj_recovery.lua"')
+test_run:cmd('start server err_recovery')
+test_run:cmd('switch err_recovery')
+
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024})
+
+digest = require('digest')
+test_run:cmd("setopt delimiter ';'")
+function dump(big)
+    local step = big and 1 or 5
+    for i = 1, 20, step do
+        s:replace{i, digest.urandom(1000)}
+    end
+    box.snapshot()
+end;
+test_run:cmd("setopt delimiter ''");
+
+dump(true)
+dump()
+dump()
+
+test_run:cmd('switch default')
+test_run:cmd('stop server err_recovery')
+test_run:cmd('start server err_recovery with crash_expected=True')
+
+fio = require('fio')
+fh = fio.open(fio.pathjoin(fio.cwd(), 'errinj_recovery.log'), {'O_RDONLY'})
+size = fh:seek(0, 'SEEK_END')
+fh:seek(-256, 'SEEK_END') ~= nil
+line = fh:read(256)
+fh:close()
+string.match(line, 'failed to open') ~= nil
+
+test_run:cmd('delete server err_recovery')
diff --git a/test/vinyl/suite.ini b/test/vinyl/suite.ini
index 1417d7156..ce02eb83d 100644
--- a/test/vinyl/suite.ini
+++ b/test/vinyl/suite.ini
@@ -2,7 +2,7 @@
 core = tarantool
 description = vinyl integration tests
 script = vinyl.lua
-release_disabled = errinj.test.lua errinj_ddl.test.lua errinj_gc.test.lua errinj_stat.test.lua errinj_tx.test.lua errinj_vylog.test.lua partial_dump.test.lua quota_timeout.test.lua recovery_quota.test.lua replica_rejoin.test.lua
+release_disabled = errinj.test.lua errinj_ddl.test.lua errinj_gc.test.lua errinj_stat.test.lua errinj_tx.test.lua errinj_vylog.test.lua partial_dump.test.lua quota_timeout.test.lua recovery_quota.test.lua replica_rejoin.test.lua gh-4805-open-run-err-recovery.test.lua
 config = suite.cfg
 lua_libs = suite.lua stress.lua large.lua txn_proxy.lua ../box/lua/utils.lua
 use_unix_sockets = True
-- 
2.17.1

  parent reply	other threads:[~2020-04-30 19:27 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-30 19:27 [Tarantool-patches] [PATCH 0/2] Fix crash in case of lack of FDs during recovery Nikita Pettik
2020-04-30 19:27 ` [Tarantool-patches] [PATCH 1/2] errinj: introduce delayed injection Nikita Pettik
2020-04-30 20:15   ` Konstantin Osipov
2020-04-30 20:55     ` Nikita Pettik
2020-05-01 19:15       ` Konstantin Osipov
2020-05-03 19:20   ` Vladislav Shpilevoy
2020-05-07 13:50     ` Nikita Pettik
2020-05-07 21:47       ` Vladislav Shpilevoy
2020-05-07 22:41         ` Nikita Pettik
2020-04-30 19:27 ` Nikita Pettik [this message]
2020-05-03 19:21   ` [Tarantool-patches] [PATCH 2/2] vinyl: drop wasted runs in case range recovery fails Vladislav Shpilevoy
2020-05-07 13:02     ` Nikita Pettik
2020-05-07 14:16       ` Konstantin Osipov
2020-05-07 21:47         ` Vladislav Shpilevoy
2020-05-07 22:37           ` Nikita Pettik
2020-05-07 21:47       ` Vladislav Shpilevoy
2020-05-07 22:36         ` Nikita Pettik
2020-05-10 19:59           ` Vladislav Shpilevoy
2020-05-12  1:16             ` Nikita Pettik
2020-05-03 19:20 ` [Tarantool-patches] [PATCH 0/2] Fix crash in case of lack of FDs during recovery Vladislav Shpilevoy
2020-05-07 14:11   ` Nikita Pettik
2020-05-12 20:53 ` Vladislav Shpilevoy
2020-05-12 20:56   ` Vladislav Shpilevoy
2020-05-12 22:45     ` Nikita Pettik
2020-05-13 20:19       ` Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5012bd8eb07b5379eb70aa777402c7cd566a3b34.1588273848.git.korablev@tarantool.org \
    --to=korablev@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH 2/2] vinyl: drop wasted runs in case range recovery fails' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox