[PATCH] test: fix long_row_timeout.test.lua failure in parallel mode

Vladimir Davydov vdavydov.dev at gmail.com
Tue Mar 26 18:55:16 MSK 2019


On Mon, Mar 25, 2019 at 08:52:30PM +0300, Serge Petrenko wrote:
> The test used to write big rows (20 mb in size), so when run in parallel
> mode, it put high load on the disk and processor, which made appliers
> time out multiple times during read, and caused the test to fail
> occasionally.
> So, instead of writing huge rows in test, introduce a new error
> injection restricting sio from reading more than a couple of bytes per
> request. This ensures that the test is still relevant and makes it a lot
> more lightweight.
> 
> Closes #4062
> ---
> https://github.com/tarantool/tarantool/tree/sp/gh-4062-early-timeout
> https://github.com/tarantool/tarantool/issues/4062
> 
>  src/lib/core/errinj.h                      |  1 +
>  src/lib/core/sio.c                         | 10 +++++++++-
>  test/box/errinj.result                     |  2 ++
>  test/replication/long_row_timeout.result   | 18 +++++-------------
>  test/replication/long_row_timeout.test.lua | 13 +++++--------
>  5 files changed, 22 insertions(+), 22 deletions(-)

All tests using error injection should be disabled in release builds,
even if they pass. I fixed test/repliation/suite.ini accordingly.

> 
> diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
> index 41783cc74..e4f06751d 100644
> --- a/src/lib/core/errinj.h
> +++ b/src/lib/core/errinj.h
> @@ -125,6 +125,7 @@ struct errinj {
>  	_(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
>  	_(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \
>  	_(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \
> +	_(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT, {.iparam = -1}) \

Too long name :)

I renamed it to SIO_READ_MAX

>  
>  ENUM0(errinj_id, ERRINJ_LIST);
>  extern struct errinj errinjs[];
> diff --git a/src/lib/core/sio.c b/src/lib/core/sio.c
> index 8f25b8159..27ea1f0d1 100644
> --- a/src/lib/core/sio.c
> +++ b/src/lib/core/sio.c
> @@ -41,6 +41,7 @@
>  #include "trivia/util.h"
>  #include "exception.h"
>  #include "uri/uri.h"
> +#include "errinj.h"
>  
>  const char *
>  sio_socketname(int fd)
> @@ -222,7 +223,14 @@ sio_accept(int fd, struct sockaddr *addr, socklen_t *addrlen)
>  ssize_t
>  sio_read(int fd, void *buf, size_t count)
>  {
> -	ssize_t n = read(fd, buf, count);
> +	struct errinj *inj = errinj(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT);
> +	ssize_t n;
> +	if (inj != NULL && inj->iparam >= 0) {
> +		n = read(fd, buf, inj->iparam);

Even though it's just an error injection, we shouldn't allow writing
beyond the buffer so it should be MIN(count, inj->iparam).

Also, allowing 0 isn't very helpful - the application would just hang.

> +	} else {
> +		n = read(fd, buf, count);
> +	}
> +
>  	if (n < 0 && !sio_wouldblock(errno)) {
>  		/*
>  		 * Happens typically when the client closes

Committed the patch to the master branch with the following changes:

diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index e4f06751..99891c5b 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -125,7 +125,7 @@ struct errinj {
 	_(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \
 	_(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \
-	_(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT, {.iparam = -1}) \
+	_(ERRINJ_SIO_READ_MAX, ERRINJ_INT, {.iparam = -1}) \
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/src/lib/core/sio.c b/src/lib/core/sio.c
index 27ea1f0d..ecd6d412 100644
--- a/src/lib/core/sio.c
+++ b/src/lib/core/sio.c
@@ -223,14 +223,11 @@ sio_accept(int fd, struct sockaddr *addr, socklen_t *addrlen)
 ssize_t
 sio_read(int fd, void *buf, size_t count)
 {
-	struct errinj *inj = errinj(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT);
-	ssize_t n;
-	if (inj != NULL && inj->iparam >= 0) {
-		n = read(fd, buf, inj->iparam);
-	} else {
-		n = read(fd, buf, count);
-	}
+	struct errinj *inj = errinj(ERRINJ_SIO_READ_MAX, ERRINJ_INT);
+	if (inj != NULL && inj->iparam > 0)
+		count = MIN(count, (size_t)inj->iparam);
 
+	ssize_t n = read(fd, buf, count);
 	if (n < 0 && !sio_wouldblock(errno)) {
 		/*
 		 * Happens typically when the client closes
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 8734b128..2bc41ac5 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -42,7 +42,7 @@ errinj.info()
     state: false
   ERRINJ_PORT_DUMP:
     state: false
-  ERRINJ_SOCKET_MAX_READ_SIZE:
+  ERRINJ_RELAY_BREAK_LSN:
     state: -1
   ERRINJ_WAL_IO:
     state: false
@@ -68,7 +68,7 @@ errinj.info()
     state: 0
   ERRINJ_XLOG_META:
     state: false
-  ERRINJ_RELAY_BREAK_LSN:
+  ERRINJ_SIO_READ_MAX:
     state: -1
   ERRINJ_VY_INDEX_FILE_RENAME:
     state: false
diff --git a/test/replication/long_row_timeout.result b/test/replication/long_row_timeout.result
index 571db7d9..9284d9c2 100644
--- a/test/replication/long_row_timeout.result
+++ b/test/replication/long_row_timeout.result
@@ -31,7 +31,7 @@ test_run:cmd('switch replica')
 ---
 - true
 ...
-box.error.injection.set("ERRINJ_SOCKET_MAX_READ_SIZE", 1)
+box.error.injection.set("ERRINJ_SIO_READ_MAX", 1)
 ---
 - ok
 ...
diff --git a/test/replication/long_row_timeout.test.lua b/test/replication/long_row_timeout.test.lua
index 65def3ba..7fc52e3e 100644
--- a/test/replication/long_row_timeout.test.lua
+++ b/test/replication/long_row_timeout.test.lua
@@ -14,7 +14,7 @@ box.info.replication[2].downstream.status
 -- make applier incapable of reading rows in one go, so that it
 -- yields a couple of times.
 test_run:cmd('switch replica')
-box.error.injection.set("ERRINJ_SOCKET_MAX_READ_SIZE", 1)
+box.error.injection.set("ERRINJ_SIO_READ_MAX", 1)
 test_run:cmd('switch default')
 s = box.schema.space.create('test')
 _ = s:create_index('pk')
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index 6e9e3edd..dd5b0140 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -3,7 +3,7 @@ core = tarantool
 script =  master.lua
 description = tarantool/box, replication
 disabled = consistent.test.lua
-release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua
+release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua
 config = suite.cfg
 lua_libs = lua/fast_replica.lua lua/rlimit.lua
 use_unix_sockets = True



More information about the Tarantool-patches mailing list