[PATCH] Fix force_recovery on empty xlog

Konstantin Belyavskiy k.belyavskiy at tarantool.org
Thu Jan 25 16:17:52 MSK 2018


This fix covers several related issues.
It fix force_recovery behaviour on empty xlog files and ones with corrupted
 header. #3026, #3076
Also improve logging (to suggest use force_recovery on such cases) #2905
Add add two tests:
1. Check behaviour on empty xlog in the middle (absolutely same for one with bad header)
2. Check behaviour on empty xlog in the end and remove it (same for one with bad header)

Closes #3026, #3076, #2905
---
branch: gh-3026-fix-force-recovery-on-empty-xlog

 src/box/recovery.cc                                | 27 ++++--
 src/box/recovery.h                                 |  1 +
 src/box/xlog.c                                     |  4 +-
 test/xlog/force_recovery.lua                       |  8 ++
 .../xlog/rm_last_corrupted_xlog_on_recovery.result | 76 +++++++++++++++++
 .../rm_last_corrupted_xlog_on_recovery.test.lua    | 27 ++++++
 test/xlog/skip_empty_xlog_on_recovery.result       | 96 ++++++++++++++++++++++
 test/xlog/skip_empty_xlog_on_recovery.test.lua     | 33 ++++++++
 8 files changed, 261 insertions(+), 11 deletions(-)
 create mode 100644 test/xlog/force_recovery.lua
 create mode 100644 test/xlog/rm_last_corrupted_xlog_on_recovery.result
 create mode 100644 test/xlog/rm_last_corrupted_xlog_on_recovery.test.lua
 create mode 100644 test/xlog/skip_empty_xlog_on_recovery.result
 create mode 100644 test/xlog/skip_empty_xlog_on_recovery.test.lua

diff --git a/src/box/recovery.cc b/src/box/recovery.cc
index 281ac1838..22d64d40b 100644
--- a/src/box/recovery.cc
+++ b/src/box/recovery.cc
@@ -332,18 +332,27 @@ recovery_finalize(struct recovery *r, struct xstream *stream)
 	/*
 	 * Check that the last xlog file has rows.
 	 */
-	if (vclockset_last(&r->wal_dir.index) != NULL &&
-	    vclock_sum(&r->vclock) ==
-	    vclock_sum(vclockset_last(&r->wal_dir.index))) {
-		/*
-		 * Delete the last empty xlog file.
-		 */
+	if (vclockset_last(&r->wal_dir.index) != NULL) {
 		char *name = xdir_format_filename(&r->wal_dir,
 						  vclock_sum(&r->vclock),
 						  NONE);
-		if (unlink(name) != 0) {
-			tnt_raise(SystemError, "%s: failed to unlink file",
-				  name);
+		/*
+		 * delete the last xlog file if one of the following conditions is true:
+		 *  - file has zero size
+		 *  - file has no valid records
+		 */
+		struct stat st;
+		if ((stat(name, &st) == 0 && st.st_size == 0) ||
+		    vclock_sum(&r->vclock) ==
+		    vclock_sum(vclockset_last(&r->wal_dir.index))) {
+			/*
+			 * Delete the last xlog file.
+			 */
+			say_info("delete corrupted xlog %s", name);
+			if (unlink(name) != 0) {
+				tnt_raise(SystemError, "%s: failed to unlink file",
+					  name);
+			}
 		}
 	}
 }
diff --git a/src/box/recovery.h b/src/box/recovery.h
index 562a1fbb5..7d3230e16 100644
--- a/src/box/recovery.h
+++ b/src/box/recovery.h
@@ -36,6 +36,7 @@
 #include "xlog.h"
 #include "vclock.h"
 #include "tt_uuid.h"
+#include <sys/stat.h>
 
 #if defined(__cplusplus)
 extern "C" {
diff --git a/src/box/xlog.c b/src/box/xlog.c
index 98b54d658..5d6100e45 100644
--- a/src/box/xlog.c
+++ b/src/box/xlog.c
@@ -553,7 +553,6 @@ xdir_scan(struct xdir *dir)
 				/** Skip a corrupted file */
 				error_log(e);
 				rc = 0;
-				goto exit;
 			}
 			i++;
 		} else {
@@ -1799,7 +1798,8 @@ xlog_cursor_openfd(struct xlog_cursor *i, int fd, const char *name)
 	if (rc == -1)
 		goto error;
 	if (rc > 0) {
-		diag_set(XlogError, "Unexpected end of file");
+		diag_set(XlogError, "Unexpected end of file %s, to fix "
+			 "try to run with 'force_recovery = true'", name);
 		goto error;
 	}
 	snprintf(i->name, PATH_MAX, "%s", name);
diff --git a/test/xlog/force_recovery.lua b/test/xlog/force_recovery.lua
new file mode 100644
index 000000000..ee429ba4c
--- /dev/null
+++ b/test/xlog/force_recovery.lua
@@ -0,0 +1,8 @@
+#!/usr/bin/env tarantool
+
+box.cfg {
+    listen = os.getenv("LISTEN"),
+    force_recovery = true
+}
+
+require('console').listen(os.getenv('ADMIN'))
diff --git a/test/xlog/rm_last_corrupted_xlog_on_recovery.result b/test/xlog/rm_last_corrupted_xlog_on_recovery.result
new file mode 100644
index 000000000..5091f577a
--- /dev/null
+++ b/test/xlog/rm_last_corrupted_xlog_on_recovery.result
@@ -0,0 +1,76 @@
+#!/usr/bin/env tarantool
+---
+...
+env = require('test_run')
+---
+...
+test_run = env.new()
+---
+...
+box.cfg{}
+---
+...
+test_run:cmd('create server test with script = "xlog/force_recovery.lua"')
+---
+- true
+...
+test_run:cmd("start server test")
+---
+- true
+...
+test_run:cmd("switch test")
+---
+- true
+...
+box.space._schema:replace({'test'})
+---
+- ['test']
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("stop server test")
+---
+- true
+...
+test_run:cmd("start server test")
+---
+- true
+...
+test_run:cmd("switch test")
+---
+- true
+...
+box.space._schema:replace({'tost'})
+---
+- ['tost']
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("stop server test")
+---
+- true
+...
+os.execute("rm force_recovery/00000000000000000001.xlog")
+---
+- 0
+...
+os.execute("touch force_recovery/00000000000000000001.xlog")
+---
+- 0
+...
+test_run:cmd("start server test")
+---
+- true
+...
+test_run:cmd("switch test")
+---
+- true
+...
+box.space._schema:replace({'lost'})
+---
+- ['lost']
+...
diff --git a/test/xlog/rm_last_corrupted_xlog_on_recovery.test.lua b/test/xlog/rm_last_corrupted_xlog_on_recovery.test.lua
new file mode 100644
index 000000000..b89e2fef8
--- /dev/null
+++ b/test/xlog/rm_last_corrupted_xlog_on_recovery.test.lua
@@ -0,0 +1,27 @@
+#!/usr/bin/env tarantool
+
+env = require('test_run')
+test_run = env.new()
+
+box.cfg{}
+
+test_run:cmd('create server test with script = "xlog/force_recovery.lua"')
+
+test_run:cmd("start server test")
+test_run:cmd("switch test")
+box.space._schema:replace({'test'})
+test_run:cmd("switch default")
+test_run:cmd("stop server test")
+
+test_run:cmd("start server test")
+test_run:cmd("switch test")
+box.space._schema:replace({'tost'})
+test_run:cmd("switch default")
+test_run:cmd("stop server test")
+
+os.execute("rm force_recovery/00000000000000000001.xlog")
+os.execute("touch force_recovery/00000000000000000001.xlog")
+
+test_run:cmd("start server test")
+test_run:cmd("switch test")
+box.space._schema:replace({'lost'})
diff --git a/test/xlog/skip_empty_xlog_on_recovery.result b/test/xlog/skip_empty_xlog_on_recovery.result
new file mode 100644
index 000000000..dd242b473
--- /dev/null
+++ b/test/xlog/skip_empty_xlog_on_recovery.result
@@ -0,0 +1,96 @@
+#!/usr/bin/env tarantool
+---
+...
+env = require('test_run')
+---
+...
+test_run = env.new()
+---
+...
+box.cfg{}
+---
+...
+test_run:cmd('create server test with script = "xlog/force_recovery.lua"')
+---
+- true
+...
+test_run:cmd("start server test")
+---
+- true
+...
+test_run:cmd("switch test")
+---
+- true
+...
+box.space._schema:replace({'test'})
+---
+- ['test']
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("stop server test")
+---
+- true
+...
+test_run:cmd("start server test")
+---
+- true
+...
+test_run:cmd("switch test")
+---
+- true
+...
+box.space._schema:replace({'tost'})
+---
+- ['tost']
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("stop server test")
+---
+- true
+...
+test_run:cmd("start server test")
+---
+- true
+...
+test_run:cmd("switch test")
+---
+- true
+...
+box.space._schema:replace({'lost'})
+---
+- ['lost']
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:cmd("stop server test")
+---
+- true
+...
+os.execute("rm force_recovery/00000000000000000001.xlog")
+---
+- 0
+...
+os.execute("touch force_recovery/00000000000000000001.xlog")
+---
+- 0
+...
+test_run:cmd("start server test")
+---
+- true
+...
+test_run:cmd("switch test")
+---
+- true
+...
+box.space._schema:replace({'last'})
+---
+- ['last']
+...
diff --git a/test/xlog/skip_empty_xlog_on_recovery.test.lua b/test/xlog/skip_empty_xlog_on_recovery.test.lua
new file mode 100644
index 000000000..1c64bea86
--- /dev/null
+++ b/test/xlog/skip_empty_xlog_on_recovery.test.lua
@@ -0,0 +1,33 @@
+#!/usr/bin/env tarantool
+
+env = require('test_run')
+test_run = env.new()
+
+box.cfg{}
+
+test_run:cmd('create server test with script = "xlog/force_recovery.lua"')
+
+test_run:cmd("start server test")
+test_run:cmd("switch test")
+box.space._schema:replace({'test'})
+test_run:cmd("switch default")
+test_run:cmd("stop server test")
+
+test_run:cmd("start server test")
+test_run:cmd("switch test")
+box.space._schema:replace({'tost'})
+test_run:cmd("switch default")
+test_run:cmd("stop server test")
+
+test_run:cmd("start server test")
+test_run:cmd("switch test")
+box.space._schema:replace({'lost'})
+test_run:cmd("switch default")
+test_run:cmd("stop server test")
+
+os.execute("rm force_recovery/00000000000000000001.xlog")
+os.execute("touch force_recovery/00000000000000000001.xlog")
+
+test_run:cmd("start server test")
+test_run:cmd("switch test")
+box.space._schema:replace({'last'})
-- 
2.14.3 (Apple Git-98)




More information about the Tarantool-patches mailing list