Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 3/3] wal: create empty xlog on shutdown
Date: Fri, 15 Jun 2018 18:48:22 +0300	[thread overview]
Message-ID: <1499b12de12125f1258324b83e3fbb0e1d1d0587.1529075903.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1529075903.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1529075903.git.vdavydov.dev@gmail.com>

In order to determine whether we need to rebootstrap the instance on
startup, we need to know its vclock. To find it out, we scan the last
xlog file before proceeding to local recovery, but this means in case
rebootstrap is not required we scan the last xlog twice, which is
sub-optimal. To speed up this procedure, let's create a new empty xlog
before shutting down the server and reopen it after restart.
---
 src/box/recovery.cc                   | 23 ----------------
 src/box/wal.c                         | 50 +++++++++++++++++++++++++++++++++--
 test/replication/hot_standby.result   | 12 ++++-----
 test/replication/hot_standby.test.lua |  4 +--
 test/xlog-py/dup_key.result           | 20 ++++----------
 test/xlog-py/dup_key.test.py          | 29 +++++++-------------
 test/xlog/panic_on_lsn_gap.result     |  1 +
 test/xlog/panic_on_wal_error.result   | 23 +---------------
 test/xlog/panic_on_wal_error.test.lua |  9 +------
 9 files changed, 74 insertions(+), 97 deletions(-)

diff --git a/src/box/recovery.cc b/src/box/recovery.cc
index eb77476d..1f7a11e6 100644
--- a/src/box/recovery.cc
+++ b/src/box/recovery.cc
@@ -339,29 +339,6 @@ void
 recovery_finalize(struct recovery *r)
 {
 	recovery_close_log(r);
-
-	/*
-	 * Check if next xlog exists. If it's true this xlog is
-	 * corrupted and we should rename it (to avoid getting
-	 * problem on the next xlog write with the same name).
-	 * Possible reasons are:
-	 *  - last xlog has corrupted rows
-	 *  - last xlog has corrupted header
-	 *  - last xlog has zero size
-	 */
-	char *name = xdir_format_filename(&r->wal_dir,
-					  vclock_sum(&r->vclock),
-					  NONE);
-	if (access(name, F_OK) == 0) {
-		say_info("rename corrupted xlog %s", name);
-		char to[PATH_MAX];
-		snprintf(to, sizeof(to), "%s.corrupted", name);
-		if (rename(name, to) != 0) {
-			tnt_raise(SystemError,
-				  "%s: can't rename corrupted xlog",
-				  name);
-		}
-	}
 }
 
 
diff --git a/src/box/wal.c b/src/box/wal.c
index 1c6d2422..1456d3e7 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -310,6 +310,39 @@ wal_thread_start()
 	cpipe_set_max_input(&wal_thread.wal_pipe, IOV_MAX);
 }
 
+static int
+wal_init_f(struct cbus_call_msg *msg)
+{
+	(void)msg;
+
+	struct wal_writer *writer = &wal_writer_singleton;
+
+	/*
+	 * Check if the next WAL file already exists. If it does,
+	 * it must have been created on shutdown, try to reopen it.
+	 */
+	const char *path = xdir_format_filename(&writer->wal_dir,
+				vclock_sum(&writer->vclock), NONE);
+	if (access(path, F_OK) == 0) {
+		if (xlog_open(&writer->current_wal, path) == 0)
+			return 0;
+		/*
+		 * The WAL file seems to be corrupted. Rename it
+		 * so that we can proceed.
+		 */
+		say_info("rename corrupted %s", path);
+		char new_path[PATH_MAX];
+		snprintf(new_path, sizeof(new_path), "%s.corrupted", path);
+		if (rename(path, new_path) != 0) {
+			diag_set(SystemError,
+				 "%s: can't rename corrupted xlog", path);
+			diag_log();
+			return -1;
+		}
+	}
+	return 0;
+}
+
 /**
  * Initialize WAL writer.
  *
@@ -332,6 +365,11 @@ wal_init(enum wal_mode wal_mode, const char *wal_dirname,
 	if (xdir_scan(&writer->wal_dir))
 		return -1;
 
+	struct cbus_call_msg msg;
+	if (cbus_call(&wal_thread.wal_pipe, &wal_thread.tx_pipe, &msg,
+		      wal_init_f, NULL, TIMEOUT_INFINITY) != 0)
+		return -1;
+
 	journal_set(&writer->base);
 	return 0;
 }
@@ -382,8 +420,7 @@ wal_checkpoint_f(struct cmsg *data)
 
 		xlog_close(&writer->current_wal, false);
 		/*
-		 * Avoid creating an empty xlog if this is the
-		 * last snapshot before shutdown.
+		 * The next WAL will be created on first write.
 		 */
 	}
 	vclock_copy(msg->vclock, &writer->vclock);
@@ -712,6 +749,15 @@ wal_thread_f(va_list ap)
 	if (xlog_is_open(&writer->current_wal))
 		xlog_close(&writer->current_wal, false);
 
+	/*
+	 * Create a new empty WAL on shutdown so that we don't have
+	 * to rescan the last WAL to find the instance vclock.
+	 */
+	if (writer->wal_mode != WAL_NONE &&
+	    xdir_create_xlog(&writer->wal_dir, &writer->current_wal,
+			     &writer->vclock) == 0)
+		xlog_close(&writer->current_wal, false);
+
 	if (xlog_is_open(&vy_log_writer.xlog))
 		xlog_close(&vy_log_writer.xlog, false);
 
diff --git a/test/replication/hot_standby.result b/test/replication/hot_standby.result
index 66ede5b7..24be0a94 100644
--- a/test/replication/hot_standby.result
+++ b/test/replication/hot_standby.result
@@ -284,27 +284,27 @@ _select(11, 20)
   - [19, 'the tuple 19']
   - [20, 'the tuple 20']
 ...
-test_run:cmd("deploy server default")
+test_run:cmd("stop server hot_standby")
 ---
 - true
 ...
-test_run:cmd("start server default")
+test_run:cmd("cleanup server hot_standby")
 ---
 - true
 ...
-test_run:cmd("switch default")
+test_run:cmd("deploy server default")
 ---
 - true
 ...
-test_run:cmd("stop server hot_standby")
+test_run:cmd("start server default")
 ---
 - true
 ...
-test_run:cmd("stop server replica")
+test_run:cmd("switch default")
 ---
 - true
 ...
-test_run:cmd("cleanup server hot_standby")
+test_run:cmd("stop server replica")
 ---
 - true
 ...
diff --git a/test/replication/hot_standby.test.lua b/test/replication/hot_standby.test.lua
index 8a7c837e..adb3fb6f 100644
--- a/test/replication/hot_standby.test.lua
+++ b/test/replication/hot_standby.test.lua
@@ -109,10 +109,10 @@ test_run:cmd("switch replica")
 _wait_lsn(10)
 _select(11, 20)
 
+test_run:cmd("stop server hot_standby")
+test_run:cmd("cleanup server hot_standby")
 test_run:cmd("deploy server default")
 test_run:cmd("start server default")
 test_run:cmd("switch default")
-test_run:cmd("stop server hot_standby")
 test_run:cmd("stop server replica")
-test_run:cmd("cleanup server hot_standby")
 test_run:cmd("cleanup server replica")
diff --git a/test/xlog-py/dup_key.result b/test/xlog-py/dup_key.result
index 53ae7322..f387e8e8 100644
--- a/test/xlog-py/dup_key.result
+++ b/test/xlog-py/dup_key.result
@@ -4,6 +4,10 @@ space = box.schema.space.create('test')
 index = box.space.test:create_index('primary')
 ---
 ...
+box.snapshot()
+---
+- ok
+...
 box.space.test:insert{1, 'first tuple'}
 ---
 - [1, 'first tuple']
@@ -13,20 +17,6 @@ box.space.test:insert{2, 'second tuple'}
 - [2, 'second tuple']
 ...
 .xlog exists
-space = box.schema.space.create('test')
----
-...
-index = box.space.test:create_index('primary')
----
-...
-box.space.test:insert{1, 'first tuple'}
----
-- [1, 'first tuple']
-...
-box.space.test:delete{1}
----
-- [1, 'first tuple']
-...
 box.space.test:insert{1, 'third tuple'}
 ---
 - [1, 'third tuple']
@@ -35,7 +25,7 @@ box.space.test:insert{2, 'fourth tuple'}
 ---
 - [2, 'fourth tuple']
 ...
-.xlog exists
+.xlog does not exist
 check log line for 'Duplicate key'
 
 'Duplicate key' exists in server log
diff --git a/test/xlog-py/dup_key.test.py b/test/xlog-py/dup_key.test.py
index 058d9e3f..1c033da4 100644
--- a/test/xlog-py/dup_key.test.py
+++ b/test/xlog-py/dup_key.test.py
@@ -8,6 +8,11 @@ import yaml
 
 server.stop()
 server.deploy()
+
+server.admin("space = box.schema.space.create('test')")
+server.admin("index = box.space.test:create_index('primary')")
+server.admin("box.snapshot()")
+
 lsn = int(yaml.load(server.admin("box.info.lsn", silent=True))[0])
 filename = str(lsn).zfill(20) + ".xlog"
 vardir = os.path.join(server.vardir, server.name)
@@ -15,40 +20,26 @@ wal_old = os.path.join(vardir, "old_" + filename)
 wal = os.path.join(vardir, filename)
 
 # Create wal#1
-server.admin("space = box.schema.space.create('test')")
-server.admin("index = box.space.test:create_index('primary')")
 server.admin("box.space.test:insert{1, 'first tuple'}")
 server.admin("box.space.test:insert{2, 'second tuple'}")
 server.stop()
 
-# Save wal #1
+# Save wal#1
 if os.access(wal, os.F_OK):
     print ".xlog exists"
     os.rename(wal, wal_old)
 
-lsn += 4
-
-# Create another wal#1
-server.start()
-server.admin("space = box.schema.space.create('test')")
-server.admin("index = box.space.test:create_index('primary')")
-server.admin("box.space.test:insert{1, 'first tuple'}")
-server.admin("box.space.test:delete{1}")
-server.stop()
-
-# Create wal#2
+# Write wal#2
 server.start()
 server.admin("box.space.test:insert{1, 'third tuple'}")
 server.admin("box.space.test:insert{2, 'fourth tuple'}")
 server.stop()
 
-if os.access(wal, os.F_OK):
-    print ".xlog exists"
-    # Replace wal#1 with saved copy
-    os.unlink(wal)
+# Restore wal#1
+if not os.access(wal, os.F_OK):
+    print ".xlog does not exist"
     os.rename(wal_old, wal)
 
-
 server.start()
 line = 'Duplicate key'
 print "check log line for '%s'" % line
diff --git a/test/xlog/panic_on_lsn_gap.result b/test/xlog/panic_on_lsn_gap.result
index 731eec4e..d0978e40 100644
--- a/test/xlog/panic_on_lsn_gap.result
+++ b/test/xlog/panic_on_lsn_gap.result
@@ -188,6 +188,7 @@ require('fio').glob(name .. "/*.xlog")
 ---
 - - panic/00000000000000000000.xlog
   - panic/00000000000000000001.xlog
+  - panic/00000000000000000002.xlog
 ...
 -- now insert 10 rows - so that the next
 -- row will need to switch the WAL
diff --git a/test/xlog/panic_on_wal_error.result b/test/xlog/panic_on_wal_error.result
index 267b5340..345534ba 100644
--- a/test/xlog/panic_on_wal_error.result
+++ b/test/xlog/panic_on_wal_error.result
@@ -5,28 +5,7 @@ env = require('test_run')
 test_run = env.new()
 ---
 ...
-fio = require('fio')
----
-...
-glob = fio.pathjoin(box.cfg.wal_dir, '*.xlog')
----
-...
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
----
-...
-glob = fio.pathjoin(box.cfg.vinyl_dir, '*.vylog')
----
-...
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
----
-...
-glob = fio.pathjoin(box.cfg.memtx_dir, '*.snap')
----
-...
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
----
-...
-test_run:cmd("restart server default")
+test_run:cmd("restart server default with cleanup=True")
 box.schema.user.grant('guest', 'replication')
 ---
 ...
diff --git a/test/xlog/panic_on_wal_error.test.lua b/test/xlog/panic_on_wal_error.test.lua
index 4f598e33..29410cb2 100644
--- a/test/xlog/panic_on_wal_error.test.lua
+++ b/test/xlog/panic_on_wal_error.test.lua
@@ -2,14 +2,7 @@
 env = require('test_run')
 test_run = env.new()
 
-fio = require('fio')
-glob = fio.pathjoin(box.cfg.wal_dir, '*.xlog')
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
-glob = fio.pathjoin(box.cfg.vinyl_dir, '*.vylog')
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
-glob = fio.pathjoin(box.cfg.memtx_dir, '*.snap')
-for _, file in pairs(fio.glob(glob)) do fio.unlink(file) end
-test_run:cmd("restart server default")
+test_run:cmd("restart server default with cleanup=True")
 box.schema.user.grant('guest', 'replication')
 _ = box.schema.space.create('test')
 _ = box.space.test:create_index('pk')
-- 
2.11.0

  parent reply	other threads:[~2018-06-15 15:48 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-08 17:34 [PATCH v2 00/11] Replica rejoin Vladimir Davydov
2018-06-08 17:34 ` [PATCH v2 01/11] box: retrieve instance uuid before starting local recovery Vladimir Davydov
2018-06-08 17:51   ` Konstantin Osipov
2018-06-08 17:34 ` [PATCH v2 02/11] box: refactor hot standby recovery Vladimir Davydov
2018-06-08 17:34 ` [PATCH v2 03/11] box: retrieve end vclock before starting local recovery Vladimir Davydov
2018-06-14 12:58   ` Konstantin Osipov
2018-06-08 17:34 ` [PATCH v2 04/11] box: open the port " Vladimir Davydov
2018-06-13 20:43   ` Konstantin Osipov
2018-06-14  8:31     ` Vladimir Davydov
2018-06-14 12:59       ` Konstantin Osipov
2018-06-15 15:48         ` [PATCH 0/3] Speed up recovery in case rebootstrap is not needed Vladimir Davydov
2018-06-15 15:48           ` [PATCH 1/3] xlog: erase eof marker when reopening existing file for writing Vladimir Davydov
2018-06-27 17:09             ` Konstantin Osipov
2018-06-15 15:48           ` [PATCH 2/3] wal: rollback vclock on write failure Vladimir Davydov
2018-06-27 17:22             ` Konstantin Osipov
2018-06-15 15:48           ` Vladimir Davydov [this message]
2018-06-27 17:29             ` [PATCH 3/3] wal: create empty xlog on shutdown Konstantin Osipov
2018-06-08 17:34 ` [PATCH v2 05/11] box: connect to remote peers before starting local recovery Vladimir Davydov
2018-06-13 20:45   ` Konstantin Osipov
2018-06-14  8:34     ` Vladimir Davydov
2018-06-14 12:59       ` Konstantin Osipov
2018-06-08 17:34 ` [PATCH v2 06/11] box: factor out local recovery function Vladimir Davydov
2018-06-13 20:50   ` Konstantin Osipov
2018-06-08 17:34 ` [PATCH v2 07/11] applier: inquire oldest vclock on connect Vladimir Davydov
2018-06-13 20:51   ` Konstantin Osipov
2018-06-14  8:40     ` Vladimir Davydov
2018-06-08 17:34 ` [PATCH v2 08/11] replication: rebootstrap instance on startup if it fell behind Vladimir Davydov
2018-06-13 20:55   ` Konstantin Osipov
2018-06-14  8:58     ` Vladimir Davydov
2018-06-08 17:34 ` [PATCH v2 09/11] vinyl: simplify vylog recovery from backup Vladimir Davydov
2018-06-08 17:34 ` [PATCH v2 10/11] vinyl: pass flags to vy_recovery_new Vladimir Davydov
2018-06-13 20:56   ` Konstantin Osipov
2018-06-08 17:34 ` [PATCH v2 11/11] vinyl: implement rebootstrap support Vladimir Davydov
2018-06-10 12:02   ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1499b12de12125f1258324b83e3fbb0e1d1d0587.1529075903.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 3/3] wal: create empty xlog on shutdown' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox