[PATCH v3] xlog: Remove inprogress files on start

Ilya Markov imarkov at tarantool.org
Tue Jun 19 16:14:11 MSK 2018


When tarantool crashes during writing to vylog, index, run or snapshot,
inprogress files remain. But garbage collector doesn't take into account
these files. So they remain until they are removed manually.

Fix this with adding deletion of such files to functions memtx_engine_end_recovery,
vinyl_engine_end_recovery and vy_run_remove_files.

Add 3 errinj which simulate the crash before renaming inprogress files.
Moved const string inprogress_suffix to xlog.h in order to reuse it in
vinyl run and index files removal.

Closes #3406
---
branch: https://github.com/tarantool/tarantool/tree/gh-3406-remove-inprogress-files
issue: https://github.com/tarantool/tarantool/issues/3406

Changes in v3:
- Delete abortion of end_recovery in case if inprgress file deletion fails.
  Replace it with diag_log.
- Move deletion of vylog.inprogress to vy_log_end_recovery().
- Move error new injections before relevant calls of xlog_rename().
- Change names and error messages of error injections.
- Removed copy&paste in error injection code and add comment where it's impossible.
- Changed signature of xdir_collect_inprogress function.
- Removed call xdir_collect_inprogress from vy_run_remove_files,
  replace it with implicit call of coio_unlink.
- Replace coio_unlink in xdir_collect_inprogress with unlink.
- Move const string inprogress_suffix to xlog.h.

 src/box/memtx_engine.c   |   2 +
 src/box/vy_log.c         |   7 +-
 src/box/vy_run.c         |  40 +++++++++--
 src/box/xlog.c           |  35 +++++++++-
 src/box/xlog.h           |  11 ++++
 src/errinj.h             |   3 +
 test/box/errinj.result   | 168 +++++++++++++++++++++++++++++++++++++++++++++++
 test/box/errinj.test.lua |  63 ++++++++++++++++++
 8 files changed, 321 insertions(+), 8 deletions(-)

diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 9a9aff5..1566d26 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -303,6 +303,8 @@ memtx_engine_end_recovery(struct engine *engine)
 		if (space_foreach(memtx_build_secondary_keys, memtx) != 0)
 			return -1;
 	}
+	if (xdir_collect_inprogress(&memtx->snap_dir) < 0)
+		diag_log();
 	return 0;
 }

diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 51e3753..cd343b9 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -852,6 +852,10 @@ vy_log_open(struct xlog *xlog)
 	    xlog_write_row(xlog, &row) < 0)
 		goto fail_close_xlog;

+	ERROR_INJECT(ERRINJ_VY_LOG_FILE_RENAME, {
+		diag_set(ClientError, ER_INJECTION, "vinyl log file rename");
+		return -1;
+	});
 	if (xlog_rename(xlog) < 0)
 		goto fail_close_xlog;

@@ -976,7 +980,8 @@ vy_log_end_recovery(void)
 			return -1;
 		}
 	}
-
+	if (xdir_collect_inprogress(&vy_log.dir) < 0)
+		diag_log();
 	vy_log.recovery = NULL;
 	return 0;
 }
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index 980bc4d..96736ba 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -2021,6 +2021,19 @@ vy_run_write_index(struct vy_run *run, const char *dirpath,
 			goto fail;
 	}

+	ERROR_INJECT(ERRINJ_VY_INDEX_FILE_RENAME, {
+		/*
+		 * Can't use here goto label fail,
+		 * because we don't want to unlink file in errinj and can't move
+		 * unlink before xlog_close call.
+		 */
+		region_truncate(region, mem_used);
+		xlog_tx_rollback(&index_xlog);
+		xlog_close(&index_xlog, false);
+		diag_set(ClientError, ER_INJECTION,
+			 "vinyl index file rename");
+		return -1;
+	});
 	if (xlog_tx_commit(&index_xlog) < 0 ||
 	    xlog_flush(&index_xlog) < 0 ||
 	    xlog_rename(&index_xlog) < 0)
@@ -2263,6 +2276,12 @@ vy_run_writer_commit(struct vy_run_writer *writer)
 	run->info.max_key = vy_key_dup(key);
 	if (run->info.max_key == NULL)
 		goto out;
+	ERROR_INJECT(ERRINJ_VY_RUN_FILE_RENAME, {
+		diag_set(ClientError, ER_INJECTION,
+			 "vinyl run file rename");
+		rc = -1;
+		goto out;
+	});

 	/* Sync data and link the file to the final name. */
 	if (xlog_sync(&writer->data_xlog) < 0 ||
@@ -2430,6 +2449,15 @@ close_err:
 	return -1;
 }

+static inline void
+remove_file(const char *path, int *ret) {
+	say_info("removing %s", path);
+	if (coio_unlink(path) < 0 && errno != ENOENT) {
+		say_syserror("error while removing %s", path);
+		*ret = -1;
+	}
+}
+
 int
 vy_run_remove_files(const char *dir, uint32_t space_id,
 		    uint32_t iid, int64_t run_id)
@@ -2439,14 +2467,14 @@ vy_run_remove_files(const char *dir, uint32_t space_id,
 				(long long)run_id); return -1;});
 	int ret = 0;
 	char path[PATH_MAX];
+	int total = 0;
 	for (int type = 0; type < vy_file_MAX; type++) {
-		vy_run_snprint_path(path, sizeof(path), dir,
+		total = vy_run_snprint_path(path, sizeof(path), dir,
 				    space_id, iid, run_id, type);
-		say_info("removing %s", path);
-		if (coio_unlink(path) < 0 && errno != ENOENT) {
-			say_syserror("error while removing %s", path);
-			ret = -1;
-		}
+		remove_file(path, &ret);
+		snprintf(path + total, sizeof(path) - total, "%s",
+			 inprogress_suffix);
+		remove_file(path, &ret);
 	}
 	return ret;
 }
diff --git a/src/box/xlog.c b/src/box/xlog.c
index 824ad11..7bd618e 100644
--- a/src/box/xlog.c
+++ b/src/box/xlog.c
@@ -58,7 +58,6 @@ typedef uint32_t log_magic_t;
 static const log_magic_t row_marker = mp_bswap_u32(0xd5ba0bab); /* host byte order */
 static const log_magic_t zrow_marker = mp_bswap_u32(0xd5ba0bba); /* host byte order */
 static const log_magic_t eof_marker = mp_bswap_u32(0xd510aded); /* host byte order */
-static const char inprogress_suffix[] = ".inprogress";

 enum {
 	/**
@@ -621,6 +620,40 @@ xdir_collect_garbage(struct xdir *dir, int64_t signature, bool use_coio)
 	return 0;
 }

+int
+xdir_collect_inprogress(struct xdir *xdir)
+{
+	const char *dirname = xdir->dirname;
+	DIR *dh = opendir(dirname);
+	if (dh == NULL) {
+		if (errno == ENOENT)
+			return 0;
+		diag_set(SystemError, "error reading directory '%s'",
+			 dirname);
+		return -1;
+	}
+
+	struct dirent *dent;
+	char path[PATH_MAX];
+	int total = snprintf(path, sizeof(path), "%s/", dirname);
+
+	while ((dent = readdir(dh)) != NULL) {
+		char *ext = strrchr(dent->d_name, '.');
+		if (ext == NULL || strcmp(ext, inprogress_suffix) != 0)
+			continue;
+		strcpy(path + total, dent->d_name);
+		say_info("removing %s", path);
+		int rc = unlink(path);
+		if (rc < 0) {
+			say_syserror("error while removing %s", path);
+			diag_set(SystemError, "failed to unlink file '%s'",
+				 path);
+			return -1;
+		}
+	}
+	return 0;
+}
+
 /* }}} */


diff --git a/src/box/xlog.h b/src/box/xlog.h
index 973910d..eba687f 100644
--- a/src/box/xlog.h
+++ b/src/box/xlog.h
@@ -70,6 +70,11 @@ enum xdir_type {
 enum log_suffix { NONE, INPROGRESS };

 /**
+ * Suffix added to path of inprogress files.
+ */
+static const char inprogress_suffix[] = ".inprogress";
+
+/**
  * A handle for a data directory with write ahead logs, snapshots,
  * vylogs.
  * Can be used to find the last log in the directory, scan
@@ -181,6 +186,12 @@ int
 xdir_collect_garbage(struct xdir *dir, int64_t signature, bool use_coio);

 /**
+ * Remove .inprogress files in specified directory.
+ */
+int
+xdir_collect_inprogress(struct xdir *xdir);
+
+/**
  * Return LSN and vclock (unless @vclock is NULL) of the newest
  * file in a directory or -1 if the directory is empty.
  */
diff --git a/src/errinj.h b/src/errinj.h
index 78514ac..7141af9 100644
--- a/src/errinj.h
+++ b/src/errinj.h
@@ -111,6 +111,9 @@ struct errinj {
 	_(ERRINJ_IPROTO_TX_DELAY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_SNAP_COMMIT_DELAY, ERRINJ_BOOL, {.bparam = 0}) \
+	_(ERRINJ_VY_LOG_FILE_RENAME, ERRINJ_BOOL, {.bparam = false}) \
+	_(ERRINJ_VY_INDEX_FILE_RENAME, ERRINJ_BOOL, {.bparam = false}) \
+	_(ERRINJ_VY_RUN_FILE_RENAME, ERRINJ_BOOL, {.bparam = false}) \

 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 6ced172..d995a0f 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -4,6 +4,12 @@ errinj = box.error.injection
 net_box = require('net.box')
 ---
 ...
+fio = require("fio")
+---
+...
+fiber = require('fiber')
+---
+...
 space = box.schema.space.create('tweedledum')
 ---
 ...
@@ -40,8 +46,12 @@ errinj.info()
     state: false
   ERRINJ_WAL_IO:
     state: false
+  ERRINJ_VY_INDEX_FILE_RENAME:
+    state: false
   ERRINJ_TUPLE_ALLOC:
     state: false
+  ERRINJ_VY_RUN_FILE_RENAME:
+    state: false
   ERRINJ_VY_READ_PAGE:
     state: false
   ERRINJ_RELAY_REPORT_INTERVAL:
@@ -56,6 +66,8 @@ errinj.info()
     state: false
   ERRINJ_VY_LOG_FLUSH_DELAY:
     state: false
+  ERRINJ_VY_LOG_FILE_RENAME:
+    state: false
   ERRINJ_SNAP_COMMIT_DELAY:
     state: false
   ERRINJ_RELAY_FINAL_SLEEP:
@@ -693,6 +705,162 @@ errinj.set("ERRINJ_WAL_WRITE", false)
 space:drop()
 ---
 ...
+errinj.set("ERRINJ_VY_LOG_FILE_RENAME", true)
+---
+- ok
+...
+s = box.schema.space.create("vinyl_test", {engine='vinyl'})
+---
+...
+_ = s:create_index("primary")
+---
+...
+vinyl_vylog = string.format("%s/*.vylog.inprogress", box.cfg.vinyl_dir)
+---
+...
+#fio.glob(vinyl_vylog) > 0 -- true
+---
+- true
+...
+errinj.set("ERRINJ_SNAP_COMMIT_DELAY", true)
+---
+- ok
+...
+_ = fiber.create(function() box.snapshot() end)
+---
+...
+memtx_snap = string.format("%s/*.snap.inprogress", box.cfg.memtx_dir)
+---
+...
+while #fio.glob(memtx_snap) == 0 do fiber.sleep(0.001) end-- true
+---
+...
+#fio.glob(memtx_snap) > 0 --true
+---
+- true
+...
+test_run:cmd("restart server default")
+errinj = box.error.injection
+---
+...
+net_box = require('net.box')
+---
+...
+fio = require("fio")
+---
+...
+fiber = require('fiber')
+---
+...
+memtx_snap = string.format("%s/*.snap.inprogress", box.cfg.memtx_dir)
+---
+...
+#fio.glob(memtx_snap) == 0 -- true
+---
+- true
+...
+vinyl_vylog = string.format("%s/*.vylog.inprogress", box.cfg.vinyl_dir)
+---
+...
+#fio.glob(vinyl_vylog) == 0 --true
+---
+- true
+...
+s = box.space.vinyl_test
+---
+...
+vinyl_index = string.format("%s/%s/*/*.index.inprogress", box.cfg.vinyl_dir, s.id)
+---
+...
+errinj.set("ERRINJ_VY_INDEX_FILE_RENAME", true)
+---
+- ok
+...
+-- insert big tuples in order to cause compaction without box.snapshot.
+for i = 1, 10000 do s:insert{i, string.rep('a', 10000)} end
+---
+...
+while #fio.glob(vinyl_index) == 0 do fiber.sleep(0.001) end
+---
+...
+vinyl_run = string.format("%s/%s/*/*.run.inprogress", box.cfg.vinyl_dir, s.id)
+---
+...
+errinj.set("ERRINJ_VY_RUN_FILE_RENAME", true)
+---
+- ok
+...
+while #fio.glob(vinyl_run) == 0  do fiber.sleep(0.001) end
+---
+...
+#fio.glob(vinyl_run) > 0 -- true
+---
+- true
+...
+#fio.glob(vinyl_index) > 0 -- true
+---
+- true
+...
+test_run:cmd("restart server default")
+errinj = box.error.injection
+---
+...
+net_box = require('net.box')
+---
+...
+fio = require("fio")
+---
+...
+fiber = require('fiber')
+---
+...
+s = box.space.vinyl_test
+---
+...
+memtx_snap = string.format("%s/*.snap.inprogress", box.cfg.memtx_dir)
+---
+...
+vinyl_vylog = string.format("%s/*.vylog.inprogress", box.cfg.vinyl_dir)
+---
+...
+vinyl_run = string.format("%s/%s/*/*.run.inprogress", box.cfg.vinyl_dir, s.id)
+---
+...
+vinyl_index = string.format("%s/%s/*/*.index.inprogress", box.cfg.vinyl_dir, s.id)
+---
+...
+-- no inprogress files should be present in memtx(vinyl)_dir.
+#fio.glob(memtx_snap) == 0 -- true
+---
+- true
+...
+#fio.glob(vinyl_vylog) == 0 -- true
+---
+- true
+...
+#fio.glob(vinyl_index) == 0 -- true
+---
+- true
+...
+#fio.glob(vinyl_run) == 0 -- true
+---
+- true
+...
+box.space.vinyl_test:drop()
+---
+...
+run_dir = fio.pathjoin(box.cfg.vinyl_dir,"*/*/*")
+---
+...
+for _, file in ipairs(fio.glob(run_dir)) do fio.unlink(file) end
+---
+...
+vinyl_dir = fio.pathjoin(box.cfg.vinyl_dir,"*/")
+---
+...
+for _, dir in ipairs(fio.glob(vinyl_dir)) do fio.rmtree(dir) end
+---
+...
 --test space:bsize() in case of memory error
 utils = dofile('utils.lua')
 ---
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index 3af1b74..756a8e7 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -1,5 +1,7 @@
 errinj = box.error.injection
 net_box = require('net.box')
+fio = require("fio")
+fiber = require('fiber')

 space = box.schema.space.create('tweedledum')
 index = space:create_index('primary', { type = 'hash' })
@@ -203,6 +205,67 @@ box.snapshot()
 errinj.set("ERRINJ_WAL_WRITE", false)
 space:drop()

+errinj.set("ERRINJ_VY_LOG_FILE_RENAME", true)
+s = box.schema.space.create("vinyl_test", {engine='vinyl'})
+_ = s:create_index("primary")
+vinyl_vylog = string.format("%s/*.vylog.inprogress", box.cfg.vinyl_dir)
+#fio.glob(vinyl_vylog) > 0 -- true
+
+errinj.set("ERRINJ_SNAP_COMMIT_DELAY", true)
+_ = fiber.create(function() box.snapshot() end)
+memtx_snap = string.format("%s/*.snap.inprogress", box.cfg.memtx_dir)
+while #fio.glob(memtx_snap) == 0 do fiber.sleep(0.001) end-- true
+#fio.glob(memtx_snap) > 0 --true
+
+test_run:cmd("restart server default")
+errinj = box.error.injection
+net_box = require('net.box')
+fio = require("fio")
+fiber = require('fiber')
+
+memtx_snap = string.format("%s/*.snap.inprogress", box.cfg.memtx_dir)
+#fio.glob(memtx_snap) == 0 -- true
+
+vinyl_vylog = string.format("%s/*.vylog.inprogress", box.cfg.vinyl_dir)
+#fio.glob(vinyl_vylog) == 0 --true
+
+s = box.space.vinyl_test
+vinyl_index = string.format("%s/%s/*/*.index.inprogress", box.cfg.vinyl_dir, s.id)
+errinj.set("ERRINJ_VY_INDEX_FILE_RENAME", true)
+-- insert big tuples in order to cause compaction without box.snapshot.
+for i = 1, 10000 do s:insert{i, string.rep('a', 10000)} end
+
+while #fio.glob(vinyl_index) == 0 do fiber.sleep(0.001) end
+
+vinyl_run = string.format("%s/%s/*/*.run.inprogress", box.cfg.vinyl_dir, s.id)
+errinj.set("ERRINJ_VY_RUN_FILE_RENAME", true)
+while #fio.glob(vinyl_run) == 0  do fiber.sleep(0.001) end
+#fio.glob(vinyl_run) > 0 -- true
+#fio.glob(vinyl_index) > 0 -- true
+
+test_run:cmd("restart server default")
+errinj = box.error.injection
+net_box = require('net.box')
+fio = require("fio")
+fiber = require('fiber')
+
+s = box.space.vinyl_test
+memtx_snap = string.format("%s/*.snap.inprogress", box.cfg.memtx_dir)
+vinyl_vylog = string.format("%s/*.vylog.inprogress", box.cfg.vinyl_dir)
+vinyl_run = string.format("%s/%s/*/*.run.inprogress", box.cfg.vinyl_dir, s.id)
+vinyl_index = string.format("%s/%s/*/*.index.inprogress", box.cfg.vinyl_dir, s.id)
+
+-- no inprogress files should be present in memtx(vinyl)_dir.
+#fio.glob(memtx_snap) == 0 -- true
+#fio.glob(vinyl_vylog) == 0 -- true
+#fio.glob(vinyl_index) == 0 -- true
+#fio.glob(vinyl_run) == 0 -- true
+
+box.space.vinyl_test:drop()
+run_dir = fio.pathjoin(box.cfg.vinyl_dir,"*/*/*")
+for _, file in ipairs(fio.glob(run_dir)) do fio.unlink(file) end
+vinyl_dir = fio.pathjoin(box.cfg.vinyl_dir,"*/")
+for _, dir in ipairs(fio.glob(vinyl_dir)) do fio.rmtree(dir) end
 --test space:bsize() in case of memory error
 utils = dofile('utils.lua')
 s = box.schema.space.create('space_bsize')
--
2.7.4




More information about the Tarantool-patches mailing list