[patches] Re: [PATCH] Fix force_recovery on empty xlog

Vladimir Davydov vdavydov.dev at gmail.com
Wed Jan 31 19:09:37 MSK 2018


Looks OK to me.

On Wed, Jan 31, 2018 at 07:04:30PM +0300, Konstantin Belyavskiy wrote:

> From d0566bf2989958507e9646b9caef6cf0bd9ee708 Mon Sep 17 00:00:00 2001
> From: Konstantin Belyavskiy <k.belyavskiy at tarantool.org>
> Date: Thu, 25 Jan 2018 15:46:22 +0300
> Subject: [PATCH] Fix force_recovery on empty xlog
> 
> * Fix force_recovery behaviour on empty xlog files and ones with corrupted header.
> * Add a test
> 
> Closes #3026, #3076
> ---
>  src/box/recovery.cc               | 30 ++++++------
>  src/box/xlog.c                    |  2 -
>  test/xlog/force_recovery.lua      |  8 ++++
>  test/xlog/force_recovery.result   | 98 +++++++++++++++++++++++++++++++++++++++
>  test/xlog/force_recovery.test.lua | 47 +++++++++++++++++++
>  5 files changed, 170 insertions(+), 15 deletions(-)
>  create mode 100644 test/xlog/force_recovery.lua
>  create mode 100644 test/xlog/force_recovery.result
>  create mode 100644 test/xlog/force_recovery.test.lua
> 
> diff --git a/src/box/recovery.cc b/src/box/recovery.cc
> index 281ac1838..f8ec39717 100644
> --- a/src/box/recovery.cc
> +++ b/src/box/recovery.cc
> @@ -330,19 +330,24 @@ recovery_finalize(struct recovery *r, struct xstream *stream)
>  	recovery_close_log(r);
>  
>  	/*
> -	 * Check that the last xlog file has rows.
> +	 * Check if next xlog exists. If it's true this xlog is
> +	 * corrupted and we should rename it (to avoid getting
> +	 * problem on the next xlog write with the same name).
> +	 * Possible reasons are:
> +	 *  - last xlog has corrupted rows
> +	 *  - last xlog has corrupted header
> +	 *  - last xlog has zero size
>  	 */
> -	if (vclockset_last(&r->wal_dir.index) != NULL &&
> -	    vclock_sum(&r->vclock) ==
> -	    vclock_sum(vclockset_last(&r->wal_dir.index))) {
> -		/*
> -		 * Delete the last empty xlog file.
> -		 */
> -		char *name = xdir_format_filename(&r->wal_dir,
> -						  vclock_sum(&r->vclock),
> -						  NONE);
> -		if (unlink(name) != 0) {
> -			tnt_raise(SystemError, "%s: failed to unlink file",
> +	char *name = xdir_format_filename(&r->wal_dir,
> +					  vclock_sum(&r->vclock),
> +					  NONE);
> +	if (access(name, F_OK) == 0) {
> +		say_info("rename corrupted xlog %s", name);
> +		char to[PATH_MAX];
> +		snprintf(to, sizeof(to), "%s.corrupted", name);
> +		if (rename(name, to) != 0) {
> +			tnt_raise(SystemError,
> +				  "%s: can't rename corrupted xlog",
>  				  name);
>  		}
>  	}
> @@ -529,4 +534,3 @@ recovery_stop_local(struct recovery *r)
>  }
>  
>  /* }}} */
> -
> diff --git a/src/box/xlog.c b/src/box/xlog.c
> index 98b54d658..4655d53fa 100644
> --- a/src/box/xlog.c
> +++ b/src/box/xlog.c
> @@ -552,8 +552,6 @@ xdir_scan(struct xdir *dir)
>  					goto exit;
>  				/** Skip a corrupted file */
>  				error_log(e);
> -				rc = 0;
> -				goto exit;
>  			}
>  			i++;
>  		} else {
> diff --git a/test/xlog/force_recovery.lua b/test/xlog/force_recovery.lua
> new file mode 100644
> index 000000000..ee429ba4c
> --- /dev/null
> +++ b/test/xlog/force_recovery.lua
> @@ -0,0 +1,8 @@
> +#!/usr/bin/env tarantool
> +
> +box.cfg {
> +    listen = os.getenv("LISTEN"),
> +    force_recovery = true
> +}
> +
> +require('console').listen(os.getenv('ADMIN'))
> diff --git a/test/xlog/force_recovery.result b/test/xlog/force_recovery.result
> new file mode 100644
> index 000000000..c0df20bad
> --- /dev/null
> +++ b/test/xlog/force_recovery.result
> @@ -0,0 +1,98 @@
> +#!/usr/bin/env tarantool
> +---
> +...
> +env = require('test_run')
> +---
> +...
> +test_run = env.new()
> +---
> +...
> +box.cfg{}
> +---
> +...
> +test_run:cmd('create server test with script = "xlog/force_recovery.lua"')
> +---
> +- true
> +...
> +test_run:cmd("start server test")
> +---
> +- true
> +...
> +test_run:cmd("switch test")
> +---
> +- true
> +...
> +box.space._schema:replace({'test'})
> +---
> +- ['test']
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'lost'})
> +---
> +- ['lost']
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'tost'})
> +---
> +- ['tost']
> +...
> +-- corrupted (empty) in the middle (old behavior: goto error on recovery)
> +fio = require('fio')
> +---
> +...
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 2))
> +---
> +...
> +fio.truncate(path)
> +---
> +- true
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'last'})
> +---
> +- ['last']
> +...
> +-- corrupted (empty), last
> +fio = require('fio')
> +---
> +...
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 1))
> +---
> +...
> +fio.truncate(path)
> +---
> +- true
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'test'})
> +---
> +- ['test']
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'tost'})
> +---
> +- ['tost']
> +...
> +-- corrupted header, last
> +fio = require('fio')
> +---
> +...
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 1))
> +---
> +...
> +f = fio.open(path, {'O_WRONLY'})
> +---
> +...
> +f:write('DEAD')
> +---
> +- true
> +...
> +f:close()
> +---
> +- true
> +...
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'post'})
> +---
> +- ['post']
> +...
> diff --git a/test/xlog/force_recovery.test.lua b/test/xlog/force_recovery.test.lua
> new file mode 100644
> index 000000000..ed51e320d
> --- /dev/null
> +++ b/test/xlog/force_recovery.test.lua
> @@ -0,0 +1,47 @@
> +#!/usr/bin/env tarantool
> +
> +env = require('test_run')
> +test_run = env.new()
> +
> +box.cfg{}
> +
> +test_run:cmd('create server test with script = "xlog/force_recovery.lua"')
> +
> +test_run:cmd("start server test")
> +test_run:cmd("switch test")
> +box.space._schema:replace({'test'})
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'lost'})
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'tost'})
> +
> +-- corrupted (empty) in the middle (old behavior: goto error on recovery)
> +fio = require('fio')
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 2))
> +fio.truncate(path)
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'last'})
> +
> +-- corrupted (empty), last
> +fio = require('fio')
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 1))
> +fio.truncate(path)
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'test'})
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'tost'})
> +
> +-- corrupted header, last
> +fio = require('fio')
> +path = fio.pathjoin(box.cfg.wal_dir, string.format('%020d.xlog', box.info.lsn - 1))
> +f = fio.open(path, {'O_WRONLY'})
> +f:write('DEAD')
> +f:close()
> +
> +test_run:cmd("restart server test")
> +box.space._schema:replace({'post'})



More information about the Tarantool-patches mailing list