[patches] Re: [PATCH] Fix force_recovery on empty xlog
Vladimir Davydov
vdavydov.dev at gmail.com
Wed Jan 31 19:09:37 MSK 2018
Looks OK to me.
On Wed, Jan 31, 2018 at 07:04:30PM +0300, Konstantin Belyavskiy wrote:
> From d0566bf2989958507e9646b9caef6cf0bd9ee708 Mon Sep 17 00:00:00 2001
> From: Konstantin Belyavskiy <k.belyavskiy at tarantool.org>
> Date: Thu, 25 Jan 2018 15:46:22 +0300
> Subject: [PATCH] Fix force_recovery on empty xlog
>
> * Fix force_recovery behaviour on empty xlog files and ones with corrupted header.
> * Add a test
>
> Closes #3026, #3076
> ---
> src/box/recovery.cc | 30 ++++++------
> src/box/xlog.c | 2 -
> test/xlog/force_recovery.lua | 8 ++++
> test/xlog/force_recovery.result | 98 +++++++++++++++++++++++++++++++++++++++
> test/xlog/force_recovery.test.lua | 47 +++++++++++++++++++
> 5 files changed, 170 insertions(+), 15 deletions(-)
> create mode 100644 test/xlog/force_recovery.lua
> create mode 100644 test/xlog/force_recovery.result
> create mode 100644 test/xlog/force_recovery.test.lua
>
> diff --git a/src/box/recovery.cc b/src/box/recovery.cc
> index 281ac1838..f8ec39717 100644
> --- a/src/box/recovery.cc
> +++ b/src/box/recovery.cc
> @@ -330,19 +330,24 @@ recovery_finalize(struct recovery *r, struct xstream *stream)
> recovery_close_log(r);
>
> /*
> - * Check that the last xlog file has rows.
> + * Check if next xlog exists. If it's true this xlog is
> + * corrupted and we should rename it (to avoid getting
> + * problem on the next xlog write with the same name).
> + * Possible reasons are:
> + * - last xlog has corrupted rows
> + * - last xlog has corrupted header
> + * - last xlog has zero size
> */
> - if (vclockset_last(&r->wal_dir.index) != NULL &&
> - vclock_sum(&r->vclock) ==
> - vclock_sum(vclockset_last(&r->wal_dir.index))) {
> - /*
> - * Delete the last empty xlog file.
> - */
> - char *name = xdir_format_filename(&r->wal_dir,
> - vclock_sum(&r->vclock),
> - NONE);
> - if (unlink(name) != 0) {
> - tnt_raise(SystemError, "%s: failed to unlink file",
> + char *name = xdir_format_filename(&r->wal_dir,
> + vclock_sum(&r->vclock),
> + NONE);
> + if (access(name, F_OK) == 0) {
> + say_info("rename corrupted xlog %s", name);
> + char to[PATH_MAX];
> + snprintf(to, sizeof(to), "%s.corrupted", name);
> + if (rename(name, to) != 0) {
> + tnt_raise(SystemError,
> + "%s: can't rename corrupted xlog",
> name);
> }
> }
> @@ -529,4 +534,3 @@ recovery_stop_local(struct recovery *r)
> }
>
> /* }}} */
> -
> diff --git a/src/box/xlog.c b/src/box/xlog.c
> index 98b54d658..4655d53fa 100644
> --- a/src/box/xlog.c
> +++ b/src/box/xlog.c
> @@ -552,8 +552,6 @@ xdir_scan(struct xdir *dir)
> goto exit;
> /** Skip a corrupted file */
> error_log(e);
> - rc = 0;
> - goto exit;
> }
> i++;
> } else {
> diff --git a/test/xlog/force_recovery.lua b/test/xlog/force_recovery.lua
> new file mode 100644
> index 000000000..ee429ba4c
> --- /dev/null
> +++ b/test/xlog/force_recovery.lua
> @@ -0,0 +1,8 @@
> +#!/usr/bin/env tarantool
> +
> +box.cfg {
> + listen = os.getenv("LISTEN"),
> + force_recovery = true
> +}
> +
> +require('console').listen(os.getenv('ADMIN'))
> diff --git a/test/xlog/force_recovery.result b/test/xlog/force_recovery.result
> new file mode 100644
> index 000000000..c0df20bad
> --- /dev/null
> +++ b/test/xlog/force_recovery.result
> @@ -0,0 +1,98 @@
> +#!/usr/bin/env tarantool
> +---
> +...
> +env = require('test_run')
> +---
> +...
> +test_run = env.new()
> +---
> +...
> +box.cfg{}
> +---
> +...
> +test_run:cmd('create server test with script = "xlog/force_recovery.lua"')
> +---
> +- true
> +...
> +test_run:cmd("start server test")
> +---
> +- true
> +...
> +test_run:cmd("switch test")
> +---
> +- true
> +...
> +box.space._schema:replace({'test'})
> +---
> +- ['test']
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'lost'})
> +---
> +- ['lost']
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'tost'})
> +---
> +- ['tost']
> +...
> +-- corrupted (empty) in the middle (old behavior: goto error on recovery)
> +fio = require('fio')
> +---
> +...
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 2))
> +---
> +...
> +fio.truncate(path)
> +---
> +- true
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'last'})
> +---
> +- ['last']
> +...
> +-- corrupted (empty), last
> +fio = require('fio')
> +---
> +...
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 1))
> +---
> +...
> +fio.truncate(path)
> +---
> +- true
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'test'})
> +---
> +- ['test']
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'tost'})
> +---
> +- ['tost']
> +...
> +-- corrupted header, last
> +fio = require('fio')
> +---
> +...
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 1))
> +---
> +...
> +f = fio.open(path, {'O_WRONLY'})
> +---
> +...
> +f:write('DEAD')
> +---
> +- true
> +...
> +f:close()
> +---
> +- true
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'post'})
> +---
> +- ['post']
> +...
> diff --git a/test/xlog/force_recovery.test.lua b/test/xlog/force_recovery.test.lua
> new file mode 100644
> index 000000000..ed51e320d
> --- /dev/null
> +++ b/test/xlog/force_recovery.test.lua
> @@ -0,0 +1,47 @@
> +#!/usr/bin/env tarantool
> +
> +env = require('test_run')
> +test_run = env.new()
> +
> +box.cfg{}
> +
> +test_run:cmd('create server test with script = "xlog/force_recovery.lua"')
> +
> +test_run:cmd("start server test")
> +test_run:cmd("switch test")
> +box.space._schema:replace({'test'})
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'lost'})
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'tost'})
> +
> +-- corrupted (empty) in the middle (old behavior: goto error on recovery)
> +fio = require('fio')
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 2))
> +fio.truncate(path)
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'last'})
> +
> +-- corrupted (empty), last
> +fio = require('fio')
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 1))
> +fio.truncate(path)
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'test'})
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'tost'})
> +
> +-- corrupted header, last
> +fio = require('fio')
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 1))
> +f = fio.open(path, {'O_WRONLY'})
> +f:write('DEAD')
> +f:close()
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'post'})
More information about the Tarantool-patches
mailing list