[PATCH v2 5/6] wal: create empty xlog on shutdown

Vladimir Davydov vdavydov.dev at gmail.com
Fri Jun 29 19:48:32 MSK 2018


In order to determine whether we need to rebootstrap the instance on
startup, we need to know its vclock. To find it out, we are planning to
scan the last xlog file before proceeding to local recovery, but this
means in case rebootstrap is not required we will scan the last xlog
twice, which is sub-optimal. To speed up this procedure, let's create a
new empty xlog before shutting down the server and reopen it after
restart.

Needed for #461
---
 src/box/recovery.cc                   | 23 ---------
 src/box/wal.c                         | 94 ++++++++++++++++++++++++++++++++++-
 test/replication/gc.result            | 23 +--------
 test/replication/gc.test.lua          |  8 +--
 test/replication/hot_standby.result   | 12 ++---
 test/replication/hot_standby.test.lua |  4 +-
 test/xlog-py/dup_key.result           | 20 ++------
 test/xlog-py/dup_key.test.py          | 29 ++++-------
 test/xlog/errinj.result               |  1 +
 test/xlog/panic_on_lsn_gap.result     | 64 ++++++++++++------------
 test/xlog/panic_on_lsn_gap.test.lua   | 10 ++--
 test/xlog/panic_on_wal_error.result   | 23 +--------
 test/xlog/panic_on_wal_error.test.lua |  9 +---
 13 files changed, 160 insertions(+), 160 deletions(-)

diff --git a/src/box/recovery.cc b/src/box/recovery.cc
index 70eb7d74..722b86c5 100644
--- a/src/box/recovery.cc
+++ b/src/box/recovery.cc
@@ -352,29 +352,6 @@ void
 recovery_finalize(struct recovery *r)
 {
 	recovery_close_log(r);
-
-	/*
-	 * Check if next xlog exists. If it's true this xlog is
-	 * corrupted and we should rename it (to avoid getting
-	 * problem on the next xlog write with the same name).
-	 * Possible reasons are:
-	 *  - last xlog has corrupted rows
-	 *  - last xlog has corrupted header
-	 *  - last xlog has zero size
-	 */
-	char *name = xdir_format_filename(&r->wal_dir,
-					  vclock_sum(&r->vclock),
-					  NONE);
-	if (access(name, F_OK) == 0) {
-		say_info("rename corrupted xlog %s", name);
-		char to[PATH_MAX];
-		snprintf(to, sizeof(to), "%s.corrupted", name);
-		if (rename(name, to) != 0) {
-			tnt_raise(SystemError,
-				  "%s: can't rename corrupted xlog",
-				  name);
-		}
-	}
 }
 
 
diff --git a/src/box/wal.c b/src/box/wal.c
index f6b0fa66..19c9138e 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -34,6 +34,8 @@
 #include "fiber.h"
 #include "fio.h"
 #include "errinj.h"
+#include "error.h"
+#include "exception.h"
 
 #include "xlog.h"
 #include "xrow.h"
@@ -310,6 +312,75 @@ wal_thread_start()
 	cpipe_set_max_input(&wal_thread.wal_pipe, IOV_MAX);
 }
 
+static int
+wal_open_f(struct cbus_call_msg *msg)
+{
+	(void)msg;
+	struct wal_writer *writer = &wal_writer_singleton;
+	const char *path = xdir_format_filename(&writer->wal_dir,
+				vclock_sum(&writer->vclock), NONE);
+	assert(!xlog_is_open(&writer->current_wal));
+	return xlog_open(&writer->current_wal, path);
+}
+
+/**
+ * Try to open the current WAL file for appending if it exists.
+ */
+static int
+wal_open(struct wal_writer *writer)
+{
+	const char *path = xdir_format_filename(&writer->wal_dir,
+				vclock_sum(&writer->vclock), NONE);
+	if (access(path, F_OK) != 0) {
+		if (errno == ENOENT) {
+			/* No WAL, nothing to do. */
+			return 0;
+		}
+		diag_set(SystemError, "failed to access %s", path);
+		return -1;
+	}
+
+	/*
+	 * The WAL file exists, try to open it.
+	 *
+	 * Note, an xlog object cannot be opened and used in
+	 * different threads (because it uses slab arena), so
+	 * we have to call xlog_open() on behalf of the WAL
+	 * thread.
+	 */
+	struct cbus_call_msg msg;
+	if (cbus_call(&wal_thread.wal_pipe, &wal_thread.tx_pipe, &msg,
+		      wal_open_f, NULL, TIMEOUT_INFINITY) == 0) {
+		/*
+		 * Success: we can now append to
+		 * the existing WAL file.
+		 */
+		return 0;
+	}
+	struct error *e = diag_last_error(diag_get());
+	if (!type_assignable(&type_XlogError, e->type)) {
+		/*
+		 * Out of memory or system error.
+		 * Nothing we can do.
+		 */
+		return -1;
+	}
+	diag_log();
+
+	/*
+	 * Looks like the WAL file is corrupted.
+	 * Rename it so that we can proceed.
+	 */
+	say_warn("renaming corrupted %s", path);
+	char new_path[PATH_MAX];
+	snprintf(new_path, sizeof(new_path), "%s.corrupted", path);
+	if (rename(path, new_path) != 0) {
+		diag_set(SystemError, "failed to rename %s", path);
+		return -1;
+	}
+	return 0;
+}
+
 /**
  * Initialize WAL writer.
  *
@@ -332,6 +403,9 @@ wal_init(enum wal_mode wal_mode, const char *wal_dirname,
 	if (xdir_scan(&writer->wal_dir))
 		return -1;
 
+	if (wal_open(writer) != 0)
+		return -1;
+
 	journal_set(&writer->base);
 	return 0;
 }
@@ -382,8 +456,7 @@ wal_checkpoint_f(struct cmsg *data)
 
 		xlog_close(&writer->current_wal, false);
 		/*
-		 * Avoid creating an empty xlog if this is the
-		 * last snapshot before shutdown.
+		 * The next WAL will be created on the first write.
 		 */
 	}
 	vclock_copy(msg->vclock, &writer->vclock);
@@ -703,6 +776,23 @@ wal_thread_f(va_list ap)
 
 	struct wal_writer *writer = &wal_writer_singleton;
 
+	/*
+	 * Create a new empty WAL on shutdown so that we don't
+	 * have to rescan the last WAL to find the instance vclock.
+	 * Don't create a WAL if the last one is empty.
+	 */
+	if (writer->wal_mode != WAL_NONE &&
+	    (!xlog_is_open(&writer->current_wal) ||
+	     vclock_compare(&writer->vclock,
+			    &writer->current_wal.meta.vclock) > 0)) {
+		struct xlog l;
+		if (xdir_create_xlog(&writer->wal_dir, &l,
+				     &writer->vclock) == 0)
+			xlog_close(&l, false);
+		else
+			diag_log();
+	}
+
 	if (xlog_is_open(&writer->current_wal))
 		xlog_close(&writer->current_wal, false);
 
diff --git a/test/replication/gc.result b/test/replication/gc.result
index 7d6644ae..47d91e85 100644
--- a/test/replication/gc.result
+++ b/test/replication/gc.result
@@ -209,35 +209,16 @@ test_run:cmd("switch replica")
 ---
 - true
 ...
--- Unblock the replica and make it fail to apply a row.
-box.info.replication[1].upstream.message == nil
----
-- true
-...
-box.error.injection.set("ERRINJ_WAL_WRITE", true)
----
-- ok
-...
+-- Unblock the replica and break replication.
 box.error.injection.set("ERRINJ_WAL_DELAY", false)
 ---
 - ok
 ...
-while box.info.replication[1].upstream.message == nil do fiber.sleep(0.01) end
----
-...
-box.info.replication[1].upstream.message
+box.cfg{replication = {}}
 ---
-- Failed to write to disk
-...
-test_run:cmd("switch default")
----
-- true
 ...
 -- Restart the replica to reestablish replication.
 test_run:cmd("restart server replica")
----
-- true
-...
 -- Wait for the replica to catch up.
 test_run:cmd("switch replica")
 ---
diff --git a/test/replication/gc.test.lua b/test/replication/gc.test.lua
index 3a680075..c45b1152 100644
--- a/test/replication/gc.test.lua
+++ b/test/replication/gc.test.lua
@@ -107,13 +107,9 @@ fiber.sleep(0.1) -- wait for master to relay data
 -- needed by the replica.
 #box.info.gc().checkpoints == 2 or box.info.gc()
 test_run:cmd("switch replica")
--- Unblock the replica and make it fail to apply a row.
-box.info.replication[1].upstream.message == nil
-box.error.injection.set("ERRINJ_WAL_WRITE", true)
+-- Unblock the replica and break replication.
 box.error.injection.set("ERRINJ_WAL_DELAY", false)
-while box.info.replication[1].upstream.message == nil do fiber.sleep(0.01) end
-box.info.replication[1].upstream.message
-test_run:cmd("switch default")
+box.cfg{replication = {}}
 -- Restart the replica to reestablish replication.
 test_run:cmd("restart server replica")
 -- Wait for the replica to catch up.
diff --git a/test/replication/hot_standby.result b/test/replication/hot_standby.result
index 66ede5b7..24be0a94 100644
--- a/test/replication/hot_standby.result
+++ b/test/replication/hot_standby.result
@@ -284,27 +284,27 @@ _select(11, 20)
   - [19, 'the tuple 19']
   - [20, 'the tuple 20']
 ...
-test_run:cmd("deploy server default")
+test_run:cmd("stop server hot_standby")
 ---
 - true
 ...
-test_run:cmd("start server default")
+test_run:cmd("cleanup server hot_standby")
 ---
 - true
 ...
-test_run:cmd("switch default")
+test_run:cmd("deploy server default")
 ---
 - true
 ...
-test_run:cmd("stop server hot_standby")
+test_run:cmd("start server default")
 ---
 - true
 ...
-test_run:cmd("stop server replica")
+test_run:cmd("switch default")
 ---
 - true
 ...
-test_run:cmd("cleanup server hot_standby")
+test_run:cmd("stop server replica")
 ---
 - true
 ...
diff --git a/test/replication/hot_standby.test.lua b/test/replication/hot_standby.test.lua
index 8a7c837e..adb3fb6f 100644
--- a/test/replication/hot_standby.test.lua
+++ b/test/replication/hot_standby.test.lua
@@ -109,10 +109,10 @@ test_run:cmd("switch replica")
 _wait_lsn(10)
 _select(11, 20)
 
+test_run:cmd("stop server hot_standby")
+test_run:cmd("cleanup server hot_standby")
 test_run:cmd("deploy server default")
 test_run:cmd("start server default")
 test_run:cmd("switch default")
-test_run:cmd("stop server hot_standby")
 test_run:cmd("stop server replica")
-test_run:cmd("cleanup server hot_standby")
 test_run:cmd("cleanup server replica")
diff --git a/test/xlog-py/dup_key.result b/test/xlog-py/dup_key.result
index 53ae7322..f387e8e8 100644
--- a/test/xlog-py/dup_key.result
+++ b/test/xlog-py/dup_key.result
@@ -4,6 +4,10 @@ space = box.schema.space.create('test')
 index = box.space.test:create_index('primary')
 ---
 ...
+box.snapshot()
+---
+- ok
+...
 box.space.test:insert{1, 'first tuple'}
 ---
 - [1, 'first tuple']
@@ -13,20 +17,6 @@ box.space.test:insert{2, 'second tuple'}
 - [2, 'second tuple']
 ...
 .xlog exists
-space = box.schema.space.create('test')
----
-...
-index = box.space.test:create_index('primary')
----
-...
-box.space.test:insert{1, 'first tuple'}
----
-- [1, 'first tuple']
-...
-box.space.test:delete{1}
----
-- [1, 'first tuple']
-...
 box.space.test:insert{1, 'third tuple'}
 ---
 - [1, 'third tuple']
@@ -35,7 +25,7 @@ box.space.test:insert{2, 'fourth tuple'}
 ---
 - [2, 'fourth tuple']
 ...
-.xlog exists
+.xlog does not exist
 check log line for 'Duplicate key'
 
 'Duplicate key' exists in server log
diff --git a/test/xlog-py/dup_key.test.py b/test/xlog-py/dup_key.test.py
index 058d9e3f..1c033da4 100644
--- a/test/xlog-py/dup_key.test.py
+++ b/test/xlog-py/dup_key.test.py
@@ -8,6 +8,11 @@ import yaml
 
 server.stop()
 server.deploy()
+
+server.admin("space = box.schema.space.create('test')")
+server.admin("index = box.space.test:create_index('primary')")
+server.admin("box.snapshot()")
+
 lsn = int(yaml.load(server.admin("box.info.lsn", silent=True))[0])
 filename = str(lsn).zfill(20) + ".xlog"
 vardir = os.path.join(server.vardir, server.name)
@@ -15,40 +20,26 @@ wal_old = os.path.join(vardir, "old_" + filename)
 wal = os.path.join(vardir, filename)
 
 # Create wal#1
-server.admin("space = box.schema.space.create('test')")
-server.admin("index = box.space.test:create_index('primary')")
 server.admin("box.space.test:insert{1, 'first tuple'}")
 server.admin("box.space.test:insert{2, 'second tuple'}")
 server.stop()
 
-# Save wal #1
+# Save wal#1
 if os.access(wal, os.F_OK):
     print ".xlog exists"
     os.rename(wal, wal_old)
 
-lsn += 4
-
-# Create another wal#1
-server.start()
-server.admin("space = box.schema.space.create('test')")
-server.admin("index = box.space.test:create_index('primary')")
-server.admin("box.space.test:insert{1, 'first tuple'}")
-server.admin("box.space.test:delete{1}")
-server.stop()
-
-# Create wal#2
+# Write wal#2
 server.start()
 server.admin("box.space.test:insert{1, 'third tuple'}")
 server.admin("box.space.test:insert{2, 'fourth tuple'}")
 server.stop()
 
-if os.access(wal, os.F_OK):
-    print ".xlog exists"
-    # Replace wal#1 with saved copy
-    os.unlink(wal)
+# Restore wal#1
+if not os.access(wal, os.F_OK):
+    print ".xlog does not exist"
     os.rename(wal_old, wal)
 
-
 server.start()
 line = 'Duplicate key'
 print "check log line for '%s'" % line
diff --git a/test/xlog/errinj.result b/test/xlog/errinj.result
index 76cbe754..262677f1 100644
--- a/test/xlog/errinj.result
+++ b/test/xlog/errinj.result
@@ -43,6 +43,7 @@ require('fio').glob(name .. "/*.xlog")
 ---
 - - xlog/00000000000000000000.xlog
   - xlog/00000000000000000001.xlog
+  - xlog/00000000000000000002.xlog
 ...
 test_run:cmd('restart server default with cleanup=1')
 -- gh-881 iproto request with wal IO error
diff --git a/test/xlog/panic_on_lsn_gap.result b/test/xlog/panic_on_lsn_gap.result
index d5064ce6..4dd1291f 100644
--- a/test/xlog/panic_on_lsn_gap.result
+++ b/test/xlog/panic_on_lsn_gap.result
@@ -83,8 +83,9 @@ t
   - Failed to write to disk
 ...
 --
--- Before restart: oops, our LSN is 11,
--- even though we didn't insert anything.
+-- Before restart: our LSN is 1, because
+-- LSN is promoted in tx only on successful
+-- WAL write.
 --
 name = string.match(arg[0], "([^,]+)%.lua")
 ---
@@ -99,13 +100,12 @@ require('fio').glob(name .. "/*.xlog")
 ...
 test_run:cmd("restart server panic")
 --
--- after restart: our LSN is the LSN of the
--- last *written* row, all the failed
--- rows are gone from lsn counter.
+-- After restart: our LSN is the LSN of the
+-- last empty WAL created on shutdown, i.e. 11.
 --
 box.info.vclock
 ---
-- {1: 1}
+- {1: 11}
 ...
 box.space._schema:select{'key'}
 ---
@@ -153,7 +153,7 @@ t
 ...
 box.info.vclock
 ---
-- {1: 1}
+- {1: 11}
 ...
 box.error.injection.set("ERRINJ_WAL_WRITE", false)
 ---
@@ -176,12 +176,12 @@ s:replace{'key', 'test 2'}
 --
 box.info.vclock
 ---
-- {1: 12}
+- {1: 22}
 ...
 test_run:cmd("restart server panic")
 box.info.vclock
 ---
-- {1: 12}
+- {1: 22}
 ...
 box.space._schema:select{'key'}
 ---
@@ -194,7 +194,8 @@ name = string.match(arg[0], "([^,]+)%.lua")
 require('fio').glob(name .. "/*.xlog")
 ---
 - - panic/00000000000000000000.xlog
-  - panic/00000000000000000001.xlog
+  - panic/00000000000000000011.xlog
+  - panic/00000000000000000022.xlog
 ...
 -- now insert 10 rows - so that the next
 -- row will need to switch the WAL
@@ -216,8 +217,8 @@ test_run:cmd("setopt delimiter ''");
 require('fio').glob(name .. "/*.xlog")
 ---
 - - panic/00000000000000000000.xlog
-  - panic/00000000000000000001.xlog
-  - panic/00000000000000000012.xlog
+  - panic/00000000000000000011.xlog
+  - panic/00000000000000000022.xlog
 ...
 box.error.injection.set("ERRINJ_WAL_WRITE", true)
 ---
@@ -229,14 +230,14 @@ box.space._schema:replace{"key", 'test 3'}
 ...
 box.info.vclock
 ---
-- {1: 22}
+- {1: 32}
 ...
 require('fio').glob(name .. "/*.xlog")
 ---
 - - panic/00000000000000000000.xlog
-  - panic/00000000000000000001.xlog
-  - panic/00000000000000000012.xlog
+  - panic/00000000000000000011.xlog
   - panic/00000000000000000022.xlog
+  - panic/00000000000000000032.xlog
 ...
 -- and the next one (just to be sure
 box.space._schema:replace{"key", 'test 3'}
@@ -245,14 +246,14 @@ box.space._schema:replace{"key", 'test 3'}
 ...
 box.info.vclock
 ---
-- {1: 22}
+- {1: 32}
 ...
 require('fio').glob(name .. "/*.xlog")
 ---
 - - panic/00000000000000000000.xlog
-  - panic/00000000000000000001.xlog
-  - panic/00000000000000000012.xlog
+  - panic/00000000000000000011.xlog
   - panic/00000000000000000022.xlog
+  - panic/00000000000000000032.xlog
 ...
 box.error.injection.set("ERRINJ_WAL_WRITE", false)
 ---
@@ -265,14 +266,14 @@ box.space._schema:replace{"key", 'test 4'}
 ...
 box.info.vclock
 ---
-- {1: 25}
+- {1: 35}
 ...
 require('fio').glob(name .. "/*.xlog")
 ---
 - - panic/00000000000000000000.xlog
-  - panic/00000000000000000001.xlog
-  - panic/00000000000000000012.xlog
+  - panic/00000000000000000011.xlog
   - panic/00000000000000000022.xlog
+  - panic/00000000000000000032.xlog
 ...
 -- restart is ok
 test_run:cmd("restart server panic")
@@ -331,12 +332,12 @@ name = string.match(arg[0], "([^,]+)%.lua")
 require('fio').glob(name .. "/*.xlog")
 ---
 - - panic/00000000000000000000.xlog
-  - panic/00000000000000000001.xlog
-  - panic/00000000000000000012.xlog
+  - panic/00000000000000000011.xlog
   - panic/00000000000000000022.xlog
-  - panic/00000000000000000025.xlog
-  - panic/00000000000000000027.xlog
-  - panic/00000000000000000029.xlog
+  - panic/00000000000000000032.xlog
+  - panic/00000000000000000035.xlog
+  - panic/00000000000000000037.xlog
+  - panic/00000000000000000039.xlog
 ...
 test_run:cmd("restart server panic")
 box.space._schema:select{'key'}
@@ -354,12 +355,13 @@ name = string.match(arg[0], "([^,]+)%.lua")
 require('fio').glob(name .. "/*.xlog")
 ---
 - - panic/00000000000000000000.xlog
-  - panic/00000000000000000001.xlog
-  - panic/00000000000000000012.xlog
+  - panic/00000000000000000011.xlog
   - panic/00000000000000000022.xlog
-  - panic/00000000000000000025.xlog
-  - panic/00000000000000000027.xlog
-  - panic/00000000000000000029.xlog
+  - panic/00000000000000000032.xlog
+  - panic/00000000000000000035.xlog
+  - panic/00000000000000000037.xlog
+  - panic/00000000000000000039.xlog
+  - panic/00000000000000000040.xlog
 ...
 test_run:cmd('switch default')
 ---
diff --git a/test/xlog/panic_on_lsn_gap.test.lua b/test/xlog/panic_on_lsn_gap.test.lua
index d72552d0..6221261a 100644
--- a/test/xlog/panic_on_lsn_gap.test.lua
+++ b/test/xlog/panic_on_lsn_gap.test.lua
@@ -34,17 +34,17 @@ end;
 test_run:cmd("setopt delimiter ''");
 t
 --
--- Before restart: oops, our LSN is 11,
--- even though we didn't insert anything.
+-- Before restart: our LSN is 1, because
+-- LSN is promoted in tx only on successful
+-- WAL write.
 --
 name = string.match(arg[0], "([^,]+)%.lua")
 box.info.vclock
 require('fio').glob(name .. "/*.xlog")
 test_run:cmd("restart server panic")
 --
--- after restart: our LSN is the LSN of the
--- last *written* row, all the failed
--- rows are gone from lsn counter.
+-- After restart: our LSN is the LSN of the
+-- last empty WAL created on shutdown, i.e. 11.
 --
 box.info.vclock
 box.space._schema:select{'key'}
diff --git a/test/xlog/panic_on_wal_error.result b/test/xlog/panic_on_wal_error.result
index 267b5340..345534ba 100644
--- a/test/xlog/panic_on_wal_error.result
+++ b/test/xlog/panic_on_wal_error.result
@@ -5,28 +5,7 @@ env = require('test_run')
 test_run = env.new()
 ---
 ...
-fio = require('fio')
----
-...
-glob = fio.pathjoin(box.cfg.wal_dir, '*.xlog')
----
-...
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
----
-...
-glob = fio.pathjoin(box.cfg.vinyl_dir, '*.vylog')
----
-...
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
----
-...
-glob = fio.pathjoin(box.cfg.memtx_dir, '*.snap')
----
-...
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
----
-...
-test_run:cmd("restart server default")
+test_run:cmd("restart server default with cleanup=True")
 box.schema.user.grant('guest', 'replication')
 ---
 ...
diff --git a/test/xlog/panic_on_wal_error.test.lua b/test/xlog/panic_on_wal_error.test.lua
index 4f598e33..29410cb2 100644
--- a/test/xlog/panic_on_wal_error.test.lua
+++ b/test/xlog/panic_on_wal_error.test.lua
@@ -2,14 +2,7 @@
 env = require('test_run')
 test_run = env.new()
 
-fio = require('fio')
-glob = fio.pathjoin(box.cfg.wal_dir, '*.xlog')
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
-glob = fio.pathjoin(box.cfg.vinyl_dir, '*.vylog')
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
-glob = fio.pathjoin(box.cfg.memtx_dir, '*.snap')
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
-test_run:cmd("restart server default")
+test_run:cmd("restart server default with cleanup=True")
 box.schema.user.grant('guest', 'replication')
 _ = box.schema.space.create('test')
 _ = box.space.test:create_index('pk')
-- 
2.11.0




More information about the Tarantool-patches mailing list