From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp38.i.mail.ru (smtp38.i.mail.ru [94.100.177.98]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id A167A4696C3 for ; Thu, 30 Apr 2020 22:27:54 +0300 (MSK) From: Nikita Pettik Date: Thu, 30 Apr 2020 22:27:51 +0300 Message-Id: <5012bd8eb07b5379eb70aa777402c7cd566a3b34.1588273848.git.korablev@tarantool.org> In-Reply-To: References: In-Reply-To: References: Subject: [Tarantool-patches] [PATCH 2/2] vinyl: drop wasted runs in case range recovery fails List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org Cc: v.shpilevoy@tarantool.org If recovery process fails during range restoration, range itself is deleted and recovery is assumed to be finished as failed (in case of casual i.e. not forced recovery). During recovery of particular range, runs to be restored are reffed twice: once when they are created at vy_run_new() and once when they are attached to slice. This fact is taken into consideration and after all ranges are recovered all runs of lsm tree are unreffed so that slices own run resources. However, if range recovery fails, it is dropped alongside with slices which in turn results in unreffing runs - this is not accounted. In this case, once again unreffing such runs would lead to their destruction. On the other hand, iteration over runs may turn out to be unsafe, so we should use rlist_foreach_entry_safe(). Moreover, we should explicitly unaccount these runs calling vy_lsm_remove_run(). Closes #4805 --- src/box/vy_lsm.c | 14 ++- src/box/vy_run.c | 4 + src/errinj.h | 1 + test/box/errinj.result | 1 + test/vinyl/errinj_recovery.lua | 10 ++ .../gh-4805-open-run-err-recovery.result | 101 ++++++++++++++++++ .../gh-4805-open-run-err-recovery.test.lua | 38 +++++++ test/vinyl/suite.ini | 2 +- 8 files changed, 167 insertions(+), 4 deletions(-) create mode 100644 test/vinyl/errinj_recovery.lua create mode 100644 test/vinyl/gh-4805-open-run-err-recovery.result create mode 100644 test/vinyl/gh-4805-open-run-err-recovery.test.lua diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c index 3d3f41b7a..81b011c69 100644 --- a/src/box/vy_lsm.c +++ b/src/box/vy_lsm.c @@ -604,9 +604,17 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery, * of each recovered run. We need to drop the extra * references once we are done. */ - struct vy_run *run; - rlist_foreach_entry(run, &lsm->runs, in_lsm) { - assert(run->refs > 1); + struct vy_run *run, *next_run; + rlist_foreach_entry_safe(run, &lsm->runs, in_lsm, next_run) { + /* + * In case vy_lsm_recover_range() failed, slices + * are already deleted and runs are unreffed. So + * we have nothing to do but finish run clean-up. + */ + if (run->refs == 1) { + assert(rc != 0); + vy_lsm_remove_run(lsm, run); + } vy_run_unref(run); } diff --git a/src/box/vy_run.c b/src/box/vy_run.c index bd8a7a430..eeaeb88b4 100644 --- a/src/box/vy_run.c +++ b/src/box/vy_run.c @@ -1676,6 +1676,10 @@ vy_run_recover(struct vy_run *run, const char *dir, space_id, iid, run->id, VY_FILE_INDEX); struct xlog_cursor cursor; + ERROR_INJECT_DELAYED(ERRINJ_VY_RUN_OPEN, 2, { + diag_set(SystemError, "failed to open '%s' file", path); + goto fail; + }); if (xlog_cursor_open(&cursor, path)) goto fail; diff --git a/src/errinj.h b/src/errinj.h index 990f4921d..273458d59 100644 --- a/src/errinj.h +++ b/src/errinj.h @@ -127,6 +127,7 @@ struct errinj { _(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT, {.iparam = 0}) \ _(ERRINJ_INDEX_RESERVE, ERRINJ_BOOL, {.bparam = false})\ + _(ERRINJ_VY_RUN_OPEN, ERRINJ_BOOL, {.bparam = false})\ ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/test/box/errinj.result b/test/box/errinj.result index 8090deedc..785f6394b 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -70,6 +70,7 @@ evals - ERRINJ_VY_READ_PAGE_TIMEOUT: 0 - ERRINJ_VY_RUN_DISCARD: false - ERRINJ_VY_RUN_FILE_RENAME: false + - ERRINJ_VY_RUN_OPEN: false - ERRINJ_VY_RUN_WRITE: false - ERRINJ_VY_RUN_WRITE_DELAY: false - ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT: 0 diff --git a/test/vinyl/errinj_recovery.lua b/test/vinyl/errinj_recovery.lua new file mode 100644 index 000000000..925d12a85 --- /dev/null +++ b/test/vinyl/errinj_recovery.lua @@ -0,0 +1,10 @@ +#!/usr/bin/env tarantool + +box.error.injection.set('ERRINJ_VY_RUN_OPEN', true) +assert(box.error.injection.get('ERRINJ_VY_RUN_OPEN')) + +box.cfg { + listen = os.getenv("LISTEN"), +} + +require('console').listen(os.getenv('ADMIN')) diff --git a/test/vinyl/gh-4805-open-run-err-recovery.result b/test/vinyl/gh-4805-open-run-err-recovery.result new file mode 100644 index 000000000..31e0e7857 --- /dev/null +++ b/test/vinyl/gh-4805-open-run-err-recovery.result @@ -0,0 +1,101 @@ +-- test-run result file version 2 +fiber = require('fiber') + | --- + | ... +test_run = require('test_run').new() + | --- + | ... + +test_run:cmd('create server err_recovery with script = "vinyl/errinj_recovery.lua"') + | --- + | - true + | ... +test_run:cmd('start server err_recovery') + | --- + | - true + | ... +test_run:cmd('switch err_recovery') + | --- + | - true + | ... + +s = box.schema.space.create('test', {engine = 'vinyl'}) + | --- + | ... +_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024}) + | --- + | ... + +digest = require('digest') + | --- + | ... +test_run:cmd("setopt delimiter ';'") + | --- + | - true + | ... +function dump(big) + local step = big and 1 or 5 + for i = 1, 20, step do + s:replace{i, digest.urandom(1000)} + end + box.snapshot() +end; + | --- + | ... +test_run:cmd("setopt delimiter ''"); + | --- + | - true + | ... + +dump(true) + | --- + | ... +dump() + | --- + | ... +dump() + | --- + | ... + +test_run:cmd('switch default') + | --- + | - true + | ... +test_run:cmd('stop server err_recovery') + | --- + | - true + | ... +test_run:cmd('start server err_recovery with crash_expected=True') + | --- + | - false + | ... + +fio = require('fio') + | --- + | ... +fh = fio.open(fio.pathjoin(fio.cwd(), 'errinj_recovery.log'), {'O_RDONLY'}) + | --- + | ... +size = fh:seek(0, 'SEEK_END') + | --- + | ... +fh:seek(-256, 'SEEK_END') ~= nil + | --- + | - true + | ... +line = fh:read(256) + | --- + | ... +fh:close() + | --- + | - true + | ... +string.match(line, 'failed to open') ~= nil + | --- + | - true + | ... + +test_run:cmd('delete server err_recovery') + | --- + | - true + | ... diff --git a/test/vinyl/gh-4805-open-run-err-recovery.test.lua b/test/vinyl/gh-4805-open-run-err-recovery.test.lua new file mode 100644 index 000000000..8b5895d41 --- /dev/null +++ b/test/vinyl/gh-4805-open-run-err-recovery.test.lua @@ -0,0 +1,38 @@ +fiber = require('fiber') +test_run = require('test_run').new() + +test_run:cmd('create server err_recovery with script = "vinyl/errinj_recovery.lua"') +test_run:cmd('start server err_recovery') +test_run:cmd('switch err_recovery') + +s = box.schema.space.create('test', {engine = 'vinyl'}) +_ = s:create_index('pk', {run_count_per_level = 100, page_size = 128, range_size = 1024}) + +digest = require('digest') +test_run:cmd("setopt delimiter ';'") +function dump(big) + local step = big and 1 or 5 + for i = 1, 20, step do + s:replace{i, digest.urandom(1000)} + end + box.snapshot() +end; +test_run:cmd("setopt delimiter ''"); + +dump(true) +dump() +dump() + +test_run:cmd('switch default') +test_run:cmd('stop server err_recovery') +test_run:cmd('start server err_recovery with crash_expected=True') + +fio = require('fio') +fh = fio.open(fio.pathjoin(fio.cwd(), 'errinj_recovery.log'), {'O_RDONLY'}) +size = fh:seek(0, 'SEEK_END') +fh:seek(-256, 'SEEK_END') ~= nil +line = fh:read(256) +fh:close() +string.match(line, 'failed to open') ~= nil + +test_run:cmd('delete server err_recovery') diff --git a/test/vinyl/suite.ini b/test/vinyl/suite.ini index 1417d7156..ce02eb83d 100644 --- a/test/vinyl/suite.ini +++ b/test/vinyl/suite.ini @@ -2,7 +2,7 @@ core = tarantool description = vinyl integration tests script = vinyl.lua -release_disabled = errinj.test.lua errinj_ddl.test.lua errinj_gc.test.lua errinj_stat.test.lua errinj_tx.test.lua errinj_vylog.test.lua partial_dump.test.lua quota_timeout.test.lua recovery_quota.test.lua replica_rejoin.test.lua +release_disabled = errinj.test.lua errinj_ddl.test.lua errinj_gc.test.lua errinj_stat.test.lua errinj_tx.test.lua errinj_vylog.test.lua partial_dump.test.lua quota_timeout.test.lua recovery_quota.test.lua replica_rejoin.test.lua gh-4805-open-run-err-recovery.test.lua config = suite.cfg lua_libs = suite.lua stress.lua large.lua txn_proxy.lua ../box/lua/utils.lua use_unix_sockets = True -- 2.17.1