From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH v2 4/6] recovery: promote recovery clock even if the WAL is empty Date: Fri, 29 Jun 2018 19:48:31 +0300 Message-Id: <06b193fe34f0ca1d3af13ad21a011007b4ef1d97.1530287767.git.vdavydov.dev@gmail.com> In-Reply-To: References: In-Reply-To: References: To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: Currently, if the last WAL in the directory happens to be corrupted or empty so that we don't recover anything from it, recovery clock will be that of the last record of the previous WAL. If the previous WAL happens to have a gap at the end, the next WAL will be created between the last WAL (empty one) and the next to last (with a gap at the end), breaking the file order in the WAL directory. That said, we must promote recovery clock even if we don't recover anything from a WAL. --- src/box/recovery.cc | 12 ++++++++++ test/xlog/panic_on_lsn_gap.result | 47 +++++++++++++++++++++++++++++++++++++ test/xlog/panic_on_lsn_gap.test.lua | 10 ++++++++ 3 files changed, 69 insertions(+) diff --git a/src/box/recovery.cc b/src/box/recovery.cc index 8ac89cc2..70eb7d74 100644 --- a/src/box/recovery.cc +++ b/src/box/recovery.cc @@ -183,6 +183,17 @@ recovery_open_log(struct recovery *r, const struct vclock *vclock) */ goto gap_error; } +out: + /* + * We must promote recovery clock even if we don't recover + * anything from the next WAL. Otherwise if the last WAL + * in the directory is corrupted or empty and the previous + * one has an LSN gap at the end (due to a write error), + * we will create the next WAL between two existing ones, + * thus breaking the file order. + */ + if (vclock_compare(&r->vclock, vclock) < 0) + vclock_copy(&r->vclock, vclock); return; gap_error: @@ -192,6 +203,7 @@ gap_error: /* Ignore missing WALs if force_recovery is set. */ e->log(); say_warn("ignoring a gap in LSN"); + goto out; } void diff --git a/test/xlog/panic_on_lsn_gap.result b/test/xlog/panic_on_lsn_gap.result index c93fcdd6..d5064ce6 100644 --- a/test/xlog/panic_on_lsn_gap.result +++ b/test/xlog/panic_on_lsn_gap.result @@ -309,11 +309,58 @@ box.space._schema:replace{'key', 'test 5'} -- creates new WAL --- - ['key', 'test 5'] ... +box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true) +--- +- ok +... +box.space._schema:replace{'key', 'test 6'} -- fails, makes gap +--- +- error: Failed to write to disk +... +box.snapshot() -- fails, rotates WAL +--- +- error: Error injection 'xlog write injection' +... +box.space._schema:replace{'key', 'test 6'} -- fails, creates empty WAL +--- +- error: Failed to write to disk +... +name = string.match(arg[0], "([^,]+)%.lua") +--- +... +require('fio').glob(name .. "/*.xlog") +--- +- - panic/00000000000000000000.xlog + - panic/00000000000000000001.xlog + - panic/00000000000000000012.xlog + - panic/00000000000000000022.xlog + - panic/00000000000000000025.xlog + - panic/00000000000000000027.xlog + - panic/00000000000000000029.xlog +... test_run:cmd("restart server panic") box.space._schema:select{'key'} --- - - ['key', 'test 5'] ... +-- Check that we don't create a WAL in the gap between the last two. +box.space._schema:replace{'key', 'test 6'} +--- +- ['key', 'test 6'] +... +name = string.match(arg[0], "([^,]+)%.lua") +--- +... +require('fio').glob(name .. "/*.xlog") +--- +- - panic/00000000000000000000.xlog + - panic/00000000000000000001.xlog + - panic/00000000000000000012.xlog + - panic/00000000000000000022.xlog + - panic/00000000000000000025.xlog + - panic/00000000000000000027.xlog + - panic/00000000000000000029.xlog +... test_run:cmd('switch default') --- - true diff --git a/test/xlog/panic_on_lsn_gap.test.lua b/test/xlog/panic_on_lsn_gap.test.lua index b1ede320..d72552d0 100644 --- a/test/xlog/panic_on_lsn_gap.test.lua +++ b/test/xlog/panic_on_lsn_gap.test.lua @@ -119,8 +119,18 @@ box.space._schema:replace{'key', 'test 5'} -- fails, makes gap box.snapshot() -- fails, rotates WAL box.error.injection.set("ERRINJ_WAL_WRITE_DISK", false) box.space._schema:replace{'key', 'test 5'} -- creates new WAL +box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true) +box.space._schema:replace{'key', 'test 6'} -- fails, makes gap +box.snapshot() -- fails, rotates WAL +box.space._schema:replace{'key', 'test 6'} -- fails, creates empty WAL +name = string.match(arg[0], "([^,]+)%.lua") +require('fio').glob(name .. "/*.xlog") test_run:cmd("restart server panic") box.space._schema:select{'key'} +-- Check that we don't create a WAL in the gap between the last two. +box.space._schema:replace{'key', 'test 6'} +name = string.match(arg[0], "([^,]+)%.lua") +require('fio').glob(name .. "/*.xlog") test_run:cmd('switch default') test_run:cmd("stop server panic") test_run:cmd("cleanup server panic") -- 2.11.0