* Re: [PATCH] test: fix long_row_timeout.test.lua failure in parallel mode
2019-03-25 17:52 [PATCH] test: fix long_row_timeout.test.lua failure in parallel mode Serge Petrenko
@ 2019-03-26 15:55 ` Vladimir Davydov
0 siblings, 0 replies; 2+ messages in thread
From: Vladimir Davydov @ 2019-03-26 15:55 UTC (permalink / raw)
To: Serge Petrenko; +Cc: avtikhon, tarantool-patches
On Mon, Mar 25, 2019 at 08:52:30PM +0300, Serge Petrenko wrote:
> The test used to write big rows (20 mb in size), so when run in parallel
> mode, it put high load on the disk and processor, which made appliers
> time out multiple times during read, and caused the test to fail
> occasionally.
> So, instead of writing huge rows in test, introduce a new error
> injection restricting sio from reading more than a couple of bytes per
> request. This ensures that the test is still relevant and makes it a lot
> more lightweight.
>
> Closes #4062
> ---
> https://github.com/tarantool/tarantool/tree/sp/gh-4062-early-timeout
> https://github.com/tarantool/tarantool/issues/4062
>
> src/lib/core/errinj.h | 1 +
> src/lib/core/sio.c | 10 +++++++++-
> test/box/errinj.result | 2 ++
> test/replication/long_row_timeout.result | 18 +++++-------------
> test/replication/long_row_timeout.test.lua | 13 +++++--------
> 5 files changed, 22 insertions(+), 22 deletions(-)
All tests using error injection should be disabled in release builds,
even if they pass. I fixed test/repliation/suite.ini accordingly.
>
> diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
> index 41783cc74..e4f06751d 100644
> --- a/src/lib/core/errinj.h
> +++ b/src/lib/core/errinj.h
> @@ -125,6 +125,7 @@ struct errinj {
> _(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
> _(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \
> _(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \
> + _(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT, {.iparam = -1}) \
Too long name :)
I renamed it to SIO_READ_MAX
>
> ENUM0(errinj_id, ERRINJ_LIST);
> extern struct errinj errinjs[];
> diff --git a/src/lib/core/sio.c b/src/lib/core/sio.c
> index 8f25b8159..27ea1f0d1 100644
> --- a/src/lib/core/sio.c
> +++ b/src/lib/core/sio.c
> @@ -41,6 +41,7 @@
> #include "trivia/util.h"
> #include "exception.h"
> #include "uri/uri.h"
> +#include "errinj.h"
>
> const char *
> sio_socketname(int fd)
> @@ -222,7 +223,14 @@ sio_accept(int fd, struct sockaddr *addr, socklen_t *addrlen)
> ssize_t
> sio_read(int fd, void *buf, size_t count)
> {
> - ssize_t n = read(fd, buf, count);
> + struct errinj *inj = errinj(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT);
> + ssize_t n;
> + if (inj != NULL && inj->iparam >= 0) {
> + n = read(fd, buf, inj->iparam);
Even though it's just an error injection, we shouldn't allow writing
beyond the buffer so it should be MIN(count, inj->iparam).
Also, allowing 0 isn't very helpful - the application would just hang.
> + } else {
> + n = read(fd, buf, count);
> + }
> +
> if (n < 0 && !sio_wouldblock(errno)) {
> /*
> * Happens typically when the client closes
Committed the patch to the master branch with the following changes:
diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index e4f06751..99891c5b 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -125,7 +125,7 @@ struct errinj {
_(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
_(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \
_(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \
- _(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT, {.iparam = -1}) \
+ _(ERRINJ_SIO_READ_MAX, ERRINJ_INT, {.iparam = -1}) \
ENUM0(errinj_id, ERRINJ_LIST);
extern struct errinj errinjs[];
diff --git a/src/lib/core/sio.c b/src/lib/core/sio.c
index 27ea1f0d..ecd6d412 100644
--- a/src/lib/core/sio.c
+++ b/src/lib/core/sio.c
@@ -223,14 +223,11 @@ sio_accept(int fd, struct sockaddr *addr, socklen_t *addrlen)
ssize_t
sio_read(int fd, void *buf, size_t count)
{
- struct errinj *inj = errinj(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT);
- ssize_t n;
- if (inj != NULL && inj->iparam >= 0) {
- n = read(fd, buf, inj->iparam);
- } else {
- n = read(fd, buf, count);
- }
+ struct errinj *inj = errinj(ERRINJ_SIO_READ_MAX, ERRINJ_INT);
+ if (inj != NULL && inj->iparam > 0)
+ count = MIN(count, (size_t)inj->iparam);
+ ssize_t n = read(fd, buf, count);
if (n < 0 && !sio_wouldblock(errno)) {
/*
* Happens typically when the client closes
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 8734b128..2bc41ac5 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -42,7 +42,7 @@ errinj.info()
state: false
ERRINJ_PORT_DUMP:
state: false
- ERRINJ_SOCKET_MAX_READ_SIZE:
+ ERRINJ_RELAY_BREAK_LSN:
state: -1
ERRINJ_WAL_IO:
state: false
@@ -68,7 +68,7 @@ errinj.info()
state: 0
ERRINJ_XLOG_META:
state: false
- ERRINJ_RELAY_BREAK_LSN:
+ ERRINJ_SIO_READ_MAX:
state: -1
ERRINJ_VY_INDEX_FILE_RENAME:
state: false
diff --git a/test/replication/long_row_timeout.result b/test/replication/long_row_timeout.result
index 571db7d9..9284d9c2 100644
--- a/test/replication/long_row_timeout.result
+++ b/test/replication/long_row_timeout.result
@@ -31,7 +31,7 @@ test_run:cmd('switch replica')
---
- true
...
-box.error.injection.set("ERRINJ_SOCKET_MAX_READ_SIZE", 1)
+box.error.injection.set("ERRINJ_SIO_READ_MAX", 1)
---
- ok
...
diff --git a/test/replication/long_row_timeout.test.lua b/test/replication/long_row_timeout.test.lua
index 65def3ba..7fc52e3e 100644
--- a/test/replication/long_row_timeout.test.lua
+++ b/test/replication/long_row_timeout.test.lua
@@ -14,7 +14,7 @@ box.info.replication[2].downstream.status
-- make applier incapable of reading rows in one go, so that it
-- yields a couple of times.
test_run:cmd('switch replica')
-box.error.injection.set("ERRINJ_SOCKET_MAX_READ_SIZE", 1)
+box.error.injection.set("ERRINJ_SIO_READ_MAX", 1)
test_run:cmd('switch default')
s = box.schema.space.create('test')
_ = s:create_index('pk')
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index 6e9e3edd..dd5b0140 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -3,7 +3,7 @@ core = tarantool
script = master.lua
description = tarantool/box, replication
disabled = consistent.test.lua
-release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua
+release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua
config = suite.cfg
lua_libs = lua/fast_replica.lua lua/rlimit.lua
use_unix_sockets = True
^ permalink raw reply [flat|nested] 2+ messages in thread