Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH v5 1/2] vinyl: fix check vinyl_dir existence at bootstrap
@ 2020-08-17  5:29 Alexander V. Tikhonov
  2020-08-18 21:10 ` Alexander Turenko
  0 siblings, 1 reply; 3+ messages in thread
From: Alexander V. Tikhonov @ 2020-08-17  5:29 UTC (permalink / raw)
  To: Kirill Yukhin, Alexander Turenko; +Cc: tarantool-patches

During implementation of openSUSE got failed box-tap/cfg.test.lua test.
Found that when memtx_dir didn't exist and vinyl_dir existed and also
errno was set to ENOENT, box configuration succeeded, but it shouldn't.
Reason of this wrong behaviour was that not all of the failure paths in
xdir_scan() set errno, but the caller assumed it.

Debugging src/box/xlog.c found that all checks were correct, but at:

  src/box/vy_log.c:vy_log_bootstrap()
  src/box/vy_log.c:vy_log_begin_recovery()

the checks on of the errno on ENOENT blocked the negative return from:

  src/box/xlog.c:xdir_scan()

Found that errno was already set to ENOENT before the xdir_scan() call.
To fix the issue the errno could be clean before the call to xdir_scan,
because we are interesting in it only from xdir_scan function.

After discussions found that there were alternative better solution to
fix it. The fix with resetting errno was not good because xdir_scan()
was not system call in real and some internal routines could set it
to ENOENT itself, so it couldn't be controled from outside of function.

To be sure in behaviour of the changing errno decided to pass a flag to
xdir_scan() if the directory should exist.

Closes #4594
Needed for #4562

Co-authored-by: Alexander Turenko <alexander.turenko@tarantool.org>
---

Github: https://github.com/tarantool/tarantool/tree/avtikhon/gh-4562-suse-pack-full-ci
Issue: https://github.com/tarantool/tarantool/issues/4562

 src/box/memtx_engine.c                        |  2 +-
 src/box/recovery.cc                           |  4 +-
 src/box/vy_log.c                              |  4 +-
 src/box/wal.c                                 |  2 +-
 src/box/xlog.c                                |  4 +-
 src/box/xlog.h                                |  6 +--
 .../gh-4562-errno-at-xdir_scan.test.lua       | 47 +++++++++++++++++++
 7 files changed, 59 insertions(+), 10 deletions(-)
 create mode 100755 test/box-tap/gh-4562-errno-at-xdir_scan.test.lua

diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index dfd6fce6e..9f079a6b5 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -992,7 +992,7 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery,
 		    &xlog_opts_default);
 	memtx->snap_dir.force_recovery = force_recovery;
 
-	if (xdir_scan(&memtx->snap_dir) != 0)
+	if (xdir_scan(&memtx->snap_dir, true) != 0)
 		goto fail;
 
 	/*
diff --git a/src/box/recovery.cc b/src/box/recovery.cc
index d1a503cfc..cd33e7635 100644
--- a/src/box/recovery.cc
+++ b/src/box/recovery.cc
@@ -121,7 +121,7 @@ void
 recovery_scan(struct recovery *r, struct vclock *end_vclock,
 	      struct vclock *gc_vclock)
 {
-	xdir_scan_xc(&r->wal_dir);
+	xdir_scan_xc(&r->wal_dir, true);
 
 	if (xdir_last_vclock(&r->wal_dir, end_vclock) < 0 ||
 	    vclock_compare(end_vclock, &r->vclock) < 0) {
@@ -307,7 +307,7 @@ recover_remaining_wals(struct recovery *r, struct xstream *stream,
 	struct vclock *clock;
 
 	if (scan_dir)
-		xdir_scan_xc(&r->wal_dir);
+		xdir_scan_xc(&r->wal_dir, true);
 
 	if (xlog_cursor_is_open(&r->cursor)) {
 		/* If there's a WAL open, recover from it first. */
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index 311985c72..da3c50e87 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -1014,7 +1014,7 @@ vy_log_rebootstrap(void)
 int
 vy_log_bootstrap(void)
 {
-	if (xdir_scan(&vy_log.dir) < 0 && errno != ENOENT)
+	if (xdir_scan(&vy_log.dir, false) < 0)
 		return -1;
 	if (xdir_last_vclock(&vy_log.dir, &vy_log.last_checkpoint) >= 0)
 		return vy_log_rebootstrap();
@@ -1036,7 +1036,7 @@ vy_log_begin_recovery(const struct vclock *vclock)
 	 * because vinyl might not be even in use. Complain only
 	 * on an attempt to write a vylog.
 	 */
-	if (xdir_scan(&vy_log.dir) < 0 && errno != ENOENT)
+	if (xdir_scan(&vy_log.dir, false) < 0)
 		return NULL;
 
 	if (xdir_last_vclock(&vy_log.dir, &vy_log.last_checkpoint) < 0) {
diff --git a/src/box/wal.c b/src/box/wal.c
index d8c92aa36..2b894d680 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -559,7 +559,7 @@ wal_enable(void)
 	 * existing WAL files. Required for garbage collection,
 	 * see wal_collect_garbage().
 	 */
-	if (xdir_scan(&writer->wal_dir))
+	if (xdir_scan(&writer->wal_dir, true))
 		return -1;
 
 	/* Open the most recent WAL file. */
diff --git a/src/box/xlog.c b/src/box/xlog.c
index 6ccd3d68d..74f761994 100644
--- a/src/box/xlog.c
+++ b/src/box/xlog.c
@@ -511,13 +511,15 @@ xdir_open_cursor(struct xdir *dir, int64_t signature,
  * @return nothing.
  */
 int
-xdir_scan(struct xdir *dir)
+xdir_scan(struct xdir *dir, bool is_dir_required)
 {
 	DIR *dh = opendir(dir->dirname);        /* log dir */
 	int64_t *signatures = NULL;             /* log file names */
 	size_t s_count = 0, s_capacity = 0;
 
 	if (dh == NULL) {
+		if (!is_dir_required && errno == ENOENT)
+			return 0;
 		diag_set(SystemError, "error reading directory '%s'",
 			  dir->dirname);
 		return -1;
diff --git a/src/box/xlog.h b/src/box/xlog.h
index 9ffce598b..3400eb75f 100644
--- a/src/box/xlog.h
+++ b/src/box/xlog.h
@@ -187,7 +187,7 @@ xdir_destroy(struct xdir *dir);
  * snapshot or scan through all logs.
  */
 int
-xdir_scan(struct xdir *dir);
+xdir_scan(struct xdir *dir, bool is_dir_required);
 
 /**
  * Check that a directory exists and is writable.
@@ -821,9 +821,9 @@ xdir_open_cursor(struct xdir *dir, int64_t signature,
 #include "exception.h"
 
 static inline void
-xdir_scan_xc(struct xdir *dir)
+xdir_scan_xc(struct xdir *dir, bool is_dir_required)
 {
-	if (xdir_scan(dir) == -1)
+	if (xdir_scan(dir, is_dir_required) == -1)
 		diag_raise();
 }
 
diff --git a/test/box-tap/gh-4562-errno-at-xdir_scan.test.lua b/test/box-tap/gh-4562-errno-at-xdir_scan.test.lua
new file mode 100755
index 000000000..cbf7b1f35
--- /dev/null
+++ b/test/box-tap/gh-4562-errno-at-xdir_scan.test.lua
@@ -0,0 +1,47 @@
+#!/usr/bin/env tarantool
+
+local tap = require('tap')
+local test = tap.test('cfg')
+local fio = require('fio')
+test:plan(1)
+
+local tarantool_bin = arg[-1]
+local PANIC = 256
+
+function run_script(code)
+    local dir = fio.tempdir()
+    local script_path = fio.pathjoin(dir, 'script.lua')
+    local script = fio.open(script_path, {'O_CREAT', 'O_WRONLY', 'O_APPEND'},
+        tonumber('0777', 8))
+    script:write(code)
+    script:write("\nos.exit(0)")
+    script:close()
+    local cmd = [[/bin/sh -c 'cd "%s" && "%s" ./script.lua 2> /dev/null']]
+    local res = os.execute(string.format(cmd, dir, tarantool_bin))
+    fio.rmtree(dir)
+    return res
+end
+
+--
+-- gh-4594: when memtx_dir is not exists, but vinyl_dir exists and
+-- errno is set to ENOENT, box configuration succeeds, however it
+-- should not
+--
+vinyl_dir = fio.tempdir()
+run_script(string.format([[
+box.cfg{vinyl_dir = '%s'}
+s = box.schema.space.create('test', {engine = 'vinyl'})
+s:create_index('pk')
+os.exit(0)
+]], vinyl_dir))
+code = string.format([[
+local errno = require('errno')
+errno(errno.ENOENT)
+box.cfg{vinyl_dir = '%s'}
+os.exit(0)
+]], vinyl_dir)
+test:is(run_script(code), PANIC, "bootstrap with ENOENT from non-empty vinyl_dir")
+fio.rmtree(vinyl_dir)
+
+test:check()
+os.exit(0)
-- 
2.17.1

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Tarantool-patches] [PATCH v5 1/2] vinyl: fix check vinyl_dir existence at bootstrap
  2020-08-17  5:29 [Tarantool-patches] [PATCH v5 1/2] vinyl: fix check vinyl_dir existence at bootstrap Alexander V. Tikhonov
@ 2020-08-18 21:10 ` Alexander Turenko
  2020-08-19  6:04   ` Alexander V. Tikhonov
  0 siblings, 1 reply; 3+ messages in thread
From: Alexander Turenko @ 2020-08-18 21:10 UTC (permalink / raw)
  To: Alexander V. Tikhonov; +Cc: tarantool-patches

Thanks for the update! Now we can discuss the message.

I left comments, where something confuses me in the message and the
test.

WBR, Alexander Turenko.

On Mon, Aug 17, 2020 at 08:29:14AM +0300, Alexander V. Tikhonov wrote:
> During implementation of openSUSE got failed box-tap/cfg.test.lua test.

Implementation of openSUSE... build?

> Found that when memtx_dir didn't exist and vinyl_dir existed and also
> errno was set to ENOENT, box configuration succeeded, but it shouldn't.
> Reason of this wrong behaviour was that not all of the failure paths in
> xdir_scan() set errno, but the caller assumed it.
> 
> Debugging src/box/xlog.c found that all checks were correct, but at:
> 
>   src/box/vy_log.c:vy_log_bootstrap()
>   src/box/vy_log.c:vy_log_begin_recovery()
> 
> the checks on of the errno on ENOENT blocked the negative return from:
> 
>   src/box/xlog.c:xdir_scan()

'blocked negative return' is not clear for me. I guess that you want to
express the following two points:

- A negative return value is not considered as an error when errno is
  set to ENOENT.
- The idea of this check is to handle the situation when vinyl_dir is
  not exists.

I would rephrase it.

> 
> Found that errno was already set to ENOENT before the xdir_scan() call.
> To fix the issue the errno could be clean before the call to xdir_scan,
> because we are interesting in it only from xdir_scan function.

Strictly speaking, the reason is different: because xdir_scan() may
return a negative value and leave errno unchanged.

> 
> After discussions found that there were alternative better solution to
> fix it. The fix with resetting errno was not good because xdir_scan()
> was not system call in real and some internal routines could set it
> to ENOENT itself, so it couldn't be controled from outside of function.

'internal routines could set it to ENOENT itself' -- I would say that
something that is not vinyl_dir existence check.

One more point: I don't see how it may happen except due to a race:

- a file name is obtained from readdir(),
- the file is deleted by another process,
- we open the file and fail with ENOENT.

And I'm not sure the race is possible. I want to say that I'm unable to
state that 'internal routines could set it to ENOENT': I don't know, in
fact. But the implementation can be changed in a future and there is no
guarantee that returning with -1 and ENOENT means only lack of
vinyl_dir.

I mean, I would highlight that the sentence does not assert that the
situation is possible with the current implementation. Just that we have
no corresponding guarantee.

Typo: was not system call -> is not a system call

Typo: controled -> controlled.

> 
> To be sure in behaviour of the changing errno decided to pass a flag to
> xdir_scan() if the directory should exist.

'behaviour of the changing errno' is vague. I guess the idea of the
sentence is that the variant with the flag is better, because (I guess)
it is more explicit and should be less fragile.

> 
> Closes #4594
> Needed for #4562
> 
> Co-authored-by: Alexander Turenko <alexander.turenko@tarantool.org>
> ---
> 
> Github: https://github.com/tarantool/tarantool/tree/avtikhon/gh-4562-suse-pack-full-ci
> Issue: https://github.com/tarantool/tarantool/issues/4562

> diff --git a/test/box-tap/gh-4562-errno-at-xdir_scan.test.lua b/test/box-tap/gh-4562-errno-at-xdir_scan.test.lua
> new file mode 100755
> index 000000000..cbf7b1f35
> --- /dev/null
> +++ b/test/box-tap/gh-4562-errno-at-xdir_scan.test.lua
> @@ -0,0 +1,47 @@
> +#!/usr/bin/env tarantool
> +
> +local tap = require('tap')
> +local test = tap.test('cfg')
> +local fio = require('fio')
> +test:plan(1)
> +
> +local tarantool_bin = arg[-1]
> +local PANIC = 256
> +
> +function run_script(code)

Nit: It is the rule of thumb to don't fill the global namespace with
fields (variables and functions) if you actually don't need it.

> +    local dir = fio.tempdir()
> +    local script_path = fio.pathjoin(dir, 'script.lua')
> +    local script = fio.open(script_path, {'O_CREAT', 'O_WRONLY', 'O_APPEND'},
> +        tonumber('0777', 8))
> +    script:write(code)
> +    script:write("\nos.exit(0)")
> +    script:close()
> +    local cmd = [[/bin/sh -c 'cd "%s" && "%s" ./script.lua 2> /dev/null']]
> +    local res = os.execute(string.format(cmd, dir, tarantool_bin))
> +    fio.rmtree(dir)
> +    return res
> +end
> +
> +--
> +-- gh-4594: when memtx_dir is not exists, but vinyl_dir exists and
> +-- errno is set to ENOENT, box configuration succeeds, however it
> +-- should not
> +--
> +vinyl_dir = fio.tempdir()

Same here, it would be good to use 'local' here.

> +run_script(string.format([[
> +box.cfg{vinyl_dir = '%s'}
> +s = box.schema.space.create('test', {engine = 'vinyl'})
> +s:create_index('pk')
> +os.exit(0)
> +]], vinyl_dir))
> +code = string.format([[

Same here.

> +local errno = require('errno')
> +errno(errno.ENOENT)
> +box.cfg{vinyl_dir = '%s'}
> +os.exit(0)
> +]], vinyl_dir)
> +test:is(run_script(code), PANIC, "bootstrap with ENOENT from non-empty vinyl_dir")
> +fio.rmtree(vinyl_dir)

I propose to clarify the test steps a bit:

 | vinyl_dir = <...>
 |
 | -- Fill vinyl_dir.
 | run_script(<...>)
 |
 | -- Verify the case described above.
 | local code = <...>
 | test:is(<...>)
 |
 | -- Remove vinyl_dir.
 | fio.rmtree(vinyl_dir)

> +
> +test:check()
> +os.exit(0)

I suggest to highlight a test status in the exit code (it follows the
Lua Style Guide proposal [1]):

 | os.exit(test:check() and 0 or 1)

[1]: https://github.com/tarantool/doc/issues/1004

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Tarantool-patches] [PATCH v5 1/2] vinyl: fix check vinyl_dir existence at bootstrap
  2020-08-18 21:10 ` Alexander Turenko
@ 2020-08-19  6:04   ` Alexander V. Tikhonov
  0 siblings, 0 replies; 3+ messages in thread
From: Alexander V. Tikhonov @ 2020-08-19  6:04 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: tarantool-patches

Hi Alexander, thanks for the review, please check my comments below.

On Wed, Aug 19, 2020 at 12:10:14AM +0300, Alexander Turenko wrote:
> Thanks for the update! Now we can discuss the message.
> 
> I left comments, where something confuses me in the message and the
> test.
> 
> WBR, Alexander Turenko.
> 
> On Mon, Aug 17, 2020 at 08:29:14AM +0300, Alexander V. Tikhonov wrote:
> > During implementation of openSUSE got failed box-tap/cfg.test.lua test.
> 
> Implementation of openSUSE... build?

Corrected.

> 
> > Found that when memtx_dir didn't exist and vinyl_dir existed and also
> > errno was set to ENOENT, box configuration succeeded, but it shouldn't.
> > Reason of this wrong behaviour was that not all of the failure paths in
> > xdir_scan() set errno, but the caller assumed it.
> > 
> > Debugging src/box/xlog.c found that all checks were correct, but at:
> > 
> >   src/box/vy_log.c:vy_log_bootstrap()
> >   src/box/vy_log.c:vy_log_begin_recovery()
> > 
> > the checks on of the errno on ENOENT blocked the negative return from:
> > 
> >   src/box/xlog.c:xdir_scan()
> 
> 'blocked negative return' is not clear for me. I guess that you want to
> express the following two points:
> 
> - A negative return value is not considered as an error when errno is
>   set to ENOENT.
> - The idea of this check is to handle the situation when vinyl_dir is
>   not exists.
> 
> I would rephrase it.
>

Right I've rewrote it an mentioned these points.

> > 
> > Found that errno was already set to ENOENT before the xdir_scan() call.
> > To fix the issue the errno could be clean before the call to xdir_scan,
> > because we are interesting in it only from xdir_scan function.
> 
> Strictly speaking, the reason is different: because xdir_scan() may
> return a negative value and leave errno unchanged.
>

Right, I've changed the comment to make it more clear.

> > 
> > After discussions found that there were alternative better solution to
> > fix it. The fix with resetting errno was not good because xdir_scan()
> > was not system call in real and some internal routines could set it
> > to ENOENT itself, so it couldn't be controled from outside of function.
> 
> 'internal routines could set it to ENOENT itself' -- I would say that
> something that is not vinyl_dir existence check.
> 
> One more point: I don't see how it may happen except due to a race:
> 
> - a file name is obtained from readdir(),
> - the file is deleted by another process,
> - we open the file and fail with ENOENT.
> 
> And I'm not sure the race is possible. I want to say that I'm unable to
> state that 'internal routines could set it to ENOENT': I don't know, in
> fact. But the implementation can be changed in a future and there is no
> guarantee that returning with -1 and ENOENT means only lack of
> vinyl_dir.
> 
> I mean, I would highlight that the sentence does not assert that the
> situation is possible with the current implementation. Just that we have
> no corresponding guarantee.
>

Right, the xdir_scan() is not system call and can be changed in the way
that can brake the check.

> Typo: was not system call -> is not a system call
> 
> Typo: controled -> controlled.
>

Corrected.

> > 
> > To be sure in behaviour of the changing errno decided to pass a flag to
> > xdir_scan() if the directory should exist.
> 
> 'behaviour of the changing errno' is vague. I guess the idea of the
> sentence is that the variant with the flag is better, because (I guess)
> it is more explicit and should be less fragile.
>

Here I've added more comments.

> > 
> > Closes #4594
> > Needed for #4562
> > 
> > Co-authored-by: Alexander Turenko <alexander.turenko@tarantool.org>
> > ---
> > 
> > Github: https://github.com/tarantool/tarantool/tree/avtikhon/gh-4562-suse-pack-full-ci
> > Issue: https://github.com/tarantool/tarantool/issues/4562
> 
> > diff --git a/test/box-tap/gh-4562-errno-at-xdir_scan.test.lua b/test/box-tap/gh-4562-errno-at-xdir_scan.test.lua
> > new file mode 100755
> > index 000000000..cbf7b1f35
> > --- /dev/null
> > +++ b/test/box-tap/gh-4562-errno-at-xdir_scan.test.lua
> > @@ -0,0 +1,47 @@
> > +#!/usr/bin/env tarantool
> > +
> > +local tap = require('tap')
> > +local test = tap.test('cfg')
> > +local fio = require('fio')
> > +test:plan(1)
> > +
> > +local tarantool_bin = arg[-1]
> > +local PANIC = 256
> > +
> > +function run_script(code)
> 
> Nit: It is the rule of thumb to don't fill the global namespace with
> fields (variables and functions) if you actually don't need it.
> 

Corrected.

> > +    local dir = fio.tempdir()
> > +    local script_path = fio.pathjoin(dir, 'script.lua')
> > +    local script = fio.open(script_path, {'O_CREAT', 'O_WRONLY', 'O_APPEND'},
> > +        tonumber('0777', 8))
> > +    script:write(code)
> > +    script:write("\nos.exit(0)")
> > +    script:close()
> > +    local cmd = [[/bin/sh -c 'cd "%s" && "%s" ./script.lua 2> /dev/null']]
> > +    local res = os.execute(string.format(cmd, dir, tarantool_bin))
> > +    fio.rmtree(dir)
> > +    return res
> > +end
> > +
> > +--
> > +-- gh-4594: when memtx_dir is not exists, but vinyl_dir exists and
> > +-- errno is set to ENOENT, box configuration succeeds, however it
> > +-- should not
> > +--
> > +vinyl_dir = fio.tempdir()
> 
> Same here, it would be good to use 'local' here.
> 

Corrected.

> > +run_script(string.format([[
> > +box.cfg{vinyl_dir = '%s'}
> > +s = box.schema.space.create('test', {engine = 'vinyl'})
> > +s:create_index('pk')
> > +os.exit(0)
> > +]], vinyl_dir))
> > +code = string.format([[
> 
> Same here.
> 

Corrected.

> > +local errno = require('errno')
> > +errno(errno.ENOENT)
> > +box.cfg{vinyl_dir = '%s'}
> > +os.exit(0)
> > +]], vinyl_dir)
> > +test:is(run_script(code), PANIC, "bootstrap with ENOENT from non-empty vinyl_dir")
> > +fio.rmtree(vinyl_dir)
> 
> I propose to clarify the test steps a bit:
> 

Corrected.

>  | vinyl_dir = <...>
>  |
>  | -- Fill vinyl_dir.
>  | run_script(<...>)
>  |
>  | -- Verify the case described above.
>  | local code = <...>
>  | test:is(<...>)
>  |
>  | -- Remove vinyl_dir.
>  | fio.rmtree(vinyl_dir)
> 
> > +
> > +test:check()
> > +os.exit(0)
> 
> I suggest to highlight a test status in the exit code (it follows the
> Lua Style Guide proposal [1]):
> 
>  | os.exit(test:check() and 0 or 1)
> 
> [1]: https://github.com/tarantool/doc/issues/1004

Corrected.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-08-19  6:04 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-17  5:29 [Tarantool-patches] [PATCH v5 1/2] vinyl: fix check vinyl_dir existence at bootstrap Alexander V. Tikhonov
2020-08-18 21:10 ` Alexander Turenko
2020-08-19  6:04   ` Alexander V. Tikhonov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox