From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Tue, 26 Mar 2019 18:55:16 +0300 From: Vladimir Davydov Subject: Re: [PATCH] test: fix long_row_timeout.test.lua failure in parallel mode Message-ID: <20190326155516.4n3i5olwyacyqul6@esperanza> References: <20190325175230.29985-1-sergepetrenko@tarantool.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20190325175230.29985-1-sergepetrenko@tarantool.org> To: Serge Petrenko Cc: avtikhon@tarantool.org, tarantool-patches@freelists.org List-ID: On Mon, Mar 25, 2019 at 08:52:30PM +0300, Serge Petrenko wrote: > The test used to write big rows (20 mb in size), so when run in parallel > mode, it put high load on the disk and processor, which made appliers > time out multiple times during read, and caused the test to fail > occasionally. > So, instead of writing huge rows in test, introduce a new error > injection restricting sio from reading more than a couple of bytes per > request. This ensures that the test is still relevant and makes it a lot > more lightweight. > > Closes #4062 > --- > https://github.com/tarantool/tarantool/tree/sp/gh-4062-early-timeout > https://github.com/tarantool/tarantool/issues/4062 > > src/lib/core/errinj.h | 1 + > src/lib/core/sio.c | 10 +++++++++- > test/box/errinj.result | 2 ++ > test/replication/long_row_timeout.result | 18 +++++------------- > test/replication/long_row_timeout.test.lua | 13 +++++-------- > 5 files changed, 22 insertions(+), 22 deletions(-) All tests using error injection should be disabled in release builds, even if they pass. I fixed test/repliation/suite.ini accordingly. > > diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h > index 41783cc74..e4f06751d 100644 > --- a/src/lib/core/errinj.h > +++ b/src/lib/core/errinj.h > @@ -125,6 +125,7 @@ struct errinj { > _(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \ > _(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \ > _(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \ > + _(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT, {.iparam = -1}) \ Too long name :) I renamed it to SIO_READ_MAX > > ENUM0(errinj_id, ERRINJ_LIST); > extern struct errinj errinjs[]; > diff --git a/src/lib/core/sio.c b/src/lib/core/sio.c > index 8f25b8159..27ea1f0d1 100644 > --- a/src/lib/core/sio.c > +++ b/src/lib/core/sio.c > @@ -41,6 +41,7 @@ > #include "trivia/util.h" > #include "exception.h" > #include "uri/uri.h" > +#include "errinj.h" > > const char * > sio_socketname(int fd) > @@ -222,7 +223,14 @@ sio_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) > ssize_t > sio_read(int fd, void *buf, size_t count) > { > - ssize_t n = read(fd, buf, count); > + struct errinj *inj = errinj(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT); > + ssize_t n; > + if (inj != NULL && inj->iparam >= 0) { > + n = read(fd, buf, inj->iparam); Even though it's just an error injection, we shouldn't allow writing beyond the buffer so it should be MIN(count, inj->iparam). Also, allowing 0 isn't very helpful - the application would just hang. > + } else { > + n = read(fd, buf, count); > + } > + > if (n < 0 && !sio_wouldblock(errno)) { > /* > * Happens typically when the client closes Committed the patch to the master branch with the following changes: diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h index e4f06751..99891c5b 100644 --- a/src/lib/core/errinj.h +++ b/src/lib/core/errinj.h @@ -125,7 +125,7 @@ struct errinj { _(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \ _(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \ - _(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT, {.iparam = -1}) \ + _(ERRINJ_SIO_READ_MAX, ERRINJ_INT, {.iparam = -1}) \ ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/src/lib/core/sio.c b/src/lib/core/sio.c index 27ea1f0d..ecd6d412 100644 --- a/src/lib/core/sio.c +++ b/src/lib/core/sio.c @@ -223,14 +223,11 @@ sio_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) ssize_t sio_read(int fd, void *buf, size_t count) { - struct errinj *inj = errinj(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT); - ssize_t n; - if (inj != NULL && inj->iparam >= 0) { - n = read(fd, buf, inj->iparam); - } else { - n = read(fd, buf, count); - } + struct errinj *inj = errinj(ERRINJ_SIO_READ_MAX, ERRINJ_INT); + if (inj != NULL && inj->iparam > 0) + count = MIN(count, (size_t)inj->iparam); + ssize_t n = read(fd, buf, count); if (n < 0 && !sio_wouldblock(errno)) { /* * Happens typically when the client closes diff --git a/test/box/errinj.result b/test/box/errinj.result index 8734b128..2bc41ac5 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -42,7 +42,7 @@ errinj.info() state: false ERRINJ_PORT_DUMP: state: false - ERRINJ_SOCKET_MAX_READ_SIZE: + ERRINJ_RELAY_BREAK_LSN: state: -1 ERRINJ_WAL_IO: state: false @@ -68,7 +68,7 @@ errinj.info() state: 0 ERRINJ_XLOG_META: state: false - ERRINJ_RELAY_BREAK_LSN: + ERRINJ_SIO_READ_MAX: state: -1 ERRINJ_VY_INDEX_FILE_RENAME: state: false diff --git a/test/replication/long_row_timeout.result b/test/replication/long_row_timeout.result index 571db7d9..9284d9c2 100644 --- a/test/replication/long_row_timeout.result +++ b/test/replication/long_row_timeout.result @@ -31,7 +31,7 @@ test_run:cmd('switch replica') --- - true ... -box.error.injection.set("ERRINJ_SOCKET_MAX_READ_SIZE", 1) +box.error.injection.set("ERRINJ_SIO_READ_MAX", 1) --- - ok ... diff --git a/test/replication/long_row_timeout.test.lua b/test/replication/long_row_timeout.test.lua index 65def3ba..7fc52e3e 100644 --- a/test/replication/long_row_timeout.test.lua +++ b/test/replication/long_row_timeout.test.lua @@ -14,7 +14,7 @@ box.info.replication[2].downstream.status -- make applier incapable of reading rows in one go, so that it -- yields a couple of times. test_run:cmd('switch replica') -box.error.injection.set("ERRINJ_SOCKET_MAX_READ_SIZE", 1) +box.error.injection.set("ERRINJ_SIO_READ_MAX", 1) test_run:cmd('switch default') s = box.schema.space.create('test') _ = s:create_index('pk') diff --git a/test/replication/suite.ini b/test/replication/suite.ini index 6e9e3edd..dd5b0140 100644 --- a/test/replication/suite.ini +++ b/test/replication/suite.ini @@ -3,7 +3,7 @@ core = tarantool script = master.lua description = tarantool/box, replication disabled = consistent.test.lua -release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua +release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua config = suite.cfg lua_libs = lua/fast_replica.lua lua/rlimit.lua use_unix_sockets = True