[PATCH v2 3/6] recovery: make LSN gap check more thorough

Vladimir Davydov vdavydov.dev at gmail.com
Fri Jun 29 19:48:30 MSK 2018


Currently, the lsn gap check is rather sloppy: when we open an xlog file
for recovery, we check that its vclock equals the vclock of the last
replayed row (see recover_remaining_wals), so if there were WAL write
errors at the end of an xlog file, we will report a false-positive gap
error (because wal doesn't rollback lsn counter). Let's use PrevVclock
xlog meta key introduced earlier to improve the check.
---
 src/box/recovery.cc                 | 62 +++++++++++++++++++++++++------------
 test/xlog/panic_on_lsn_gap.result   | 34 ++++++++++++++++++++
 test/xlog/panic_on_lsn_gap.test.lua | 13 ++++++++
 3 files changed, 90 insertions(+), 19 deletions(-)

diff --git a/src/box/recovery.cc b/src/box/recovery.cc
index 71f6bd8c..8ac89cc2 100644
--- a/src/box/recovery.cc
+++ b/src/box/recovery.cc
@@ -152,6 +152,48 @@ recovery_close_log(struct recovery *r)
 	trigger_run_xc(&r->on_close_log, NULL);
 }
 
+static void
+recovery_open_log(struct recovery *r, const struct vclock *vclock)
+{
+	XlogGapError *e;
+	struct xlog_meta meta = r->cursor.meta;
+	enum xlog_cursor_state state = r->cursor.state;
+
+	recovery_close_log(r);
+
+	xdir_open_cursor_xc(&r->wal_dir, vclock_sum(vclock), &r->cursor);
+
+	if (state == XLOG_CURSOR_UNINITIALIZED &&
+	    vclock_compare(vclock, &r->vclock) > 0) {
+		/*
+		 * This is the first WAL we are about to scan
+		 * and the best clock we could find is greater
+		 * or is incomparable with the initial recovery
+		 * position.
+		 */
+		goto gap_error;
+	}
+
+	if (state != XLOG_CURSOR_UNINITIALIZED &&
+	    r->cursor.meta.has_prev_vclock &&
+	    vclock_compare(&r->cursor.meta.prev_vclock, &meta.vclock) != 0) {
+		/*
+		 * WALs are missing between the last scanned WAL
+		 * and the next one.
+		 */
+		goto gap_error;
+	}
+	return;
+
+gap_error:
+	e = tnt_error(XlogGapError, &r->vclock, vclock);
+	if (!r->wal_dir.force_recovery)
+		throw e;
+	/* Ignore missing WALs if force_recovery is set. */
+	e->log();
+	say_warn("ignoring a gap in LSN");
+}
+
 void
 recovery_delete(struct recovery *r)
 {
@@ -277,25 +319,7 @@ recover_remaining_wals(struct recovery *r, struct xstream *stream,
 			continue;
 		}
 
-		if (vclock_compare(clock, &r->vclock) > 0) {
-			/**
-			 * The best clock we could find is
-			 * greater or is incomparable with the
-			 * current state of recovery.
-			 */
-			XlogGapError *e =
-				tnt_error(XlogGapError, &r->vclock, clock);
-
-			if (!r->wal_dir.force_recovery)
-				throw e;
-			e->log();
-			/* Ignore missing WALs */
-			say_warn("ignoring a gap in LSN");
-		}
-
-		recovery_close_log(r);
-
-		xdir_open_cursor_xc(&r->wal_dir, vclock_sum(clock), &r->cursor);
+		recovery_open_log(r, clock);
 
 		say_info("recover from `%s'", r->cursor.name);
 
diff --git a/test/xlog/panic_on_lsn_gap.result b/test/xlog/panic_on_lsn_gap.result
index 313850a6..c93fcdd6 100644
--- a/test/xlog/panic_on_lsn_gap.result
+++ b/test/xlog/panic_on_lsn_gap.result
@@ -280,6 +280,40 @@ box.space._schema:select{'key'}
 ---
 - - ['key', 'test 4']
 ...
+--
+-- Check that if there's an LSN gap between two WALs
+-- that appeared due to a disk error and no files is
+-- actually missing, we won't panic on recovery.
+--
+box.space._schema:replace{'key', 'test 4'} -- creates new WAL
+---
+- ['key', 'test 4']
+...
+box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true)
+---
+- ok
+...
+box.space._schema:replace{'key', 'test 5'} -- fails, makes gap
+---
+- error: Failed to write to disk
+...
+box.snapshot() -- fails, rotates WAL
+---
+- error: Error injection 'xlog write injection'
+...
+box.error.injection.set("ERRINJ_WAL_WRITE_DISK", false)
+---
+- ok
+...
+box.space._schema:replace{'key', 'test 5'} -- creates new WAL
+---
+- ['key', 'test 5']
+...
+test_run:cmd("restart server panic")
+box.space._schema:select{'key'}
+---
+- - ['key', 'test 5']
+...
 test_run:cmd('switch default')
 ---
 - true
diff --git a/test/xlog/panic_on_lsn_gap.test.lua b/test/xlog/panic_on_lsn_gap.test.lua
index 248a3e63..b1ede320 100644
--- a/test/xlog/panic_on_lsn_gap.test.lua
+++ b/test/xlog/panic_on_lsn_gap.test.lua
@@ -108,6 +108,19 @@ require('fio').glob(name .. "/*.xlog")
 -- restart is ok
 test_run:cmd("restart server panic")
 box.space._schema:select{'key'}
+--
+-- Check that if there's an LSN gap between two WALs
+-- that appeared due to a disk error and no files is
+-- actually missing, we won't panic on recovery.
+--
+box.space._schema:replace{'key', 'test 4'} -- creates new WAL
+box.error.injection.set("ERRINJ_WAL_WRITE_DISK", true)
+box.space._schema:replace{'key', 'test 5'} -- fails, makes gap
+box.snapshot() -- fails, rotates WAL
+box.error.injection.set("ERRINJ_WAL_WRITE_DISK", false)
+box.space._schema:replace{'key', 'test 5'} -- creates new WAL
+test_run:cmd("restart server panic")
+box.space._schema:select{'key'}
 test_run:cmd('switch default')
 test_run:cmd("stop server panic")
 test_run:cmd("cleanup server panic")
-- 
2.11.0




More information about the Tarantool-patches mailing list