From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Serge Petrenko Subject: [PATCH] test: fix long_row_timeout.test.lua failure in parallel mode Date: Mon, 25 Mar 2019 20:52:30 +0300 Message-Id: <20190325175230.29985-1-sergepetrenko@tarantool.org> To: vdavydov.dev@gmail.com Cc: avtikhon@tarantool.org, tarantool-patches@freelists.org, Serge Petrenko List-ID: The test used to write big rows (20 mb in size), so when run in parallel mode, it put high load on the disk and processor, which made appliers time out multiple times during read, and caused the test to fail occasionally. So, instead of writing huge rows in test, introduce a new error injection restricting sio from reading more than a couple of bytes per request. This ensures that the test is still relevant and makes it a lot more lightweight. Closes #4062 --- https://github.com/tarantool/tarantool/tree/sp/gh-4062-early-timeout https://github.com/tarantool/tarantool/issues/4062 src/lib/core/errinj.h | 1 + src/lib/core/sio.c | 10 +++++++++- test/box/errinj.result | 2 ++ test/replication/long_row_timeout.result | 18 +++++------------- test/replication/long_row_timeout.test.lua | 13 +++++-------- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h index 41783cc74..e4f06751d 100644 --- a/src/lib/core/errinj.h +++ b/src/lib/core/errinj.h @@ -125,6 +125,7 @@ struct errinj { _(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \ _(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \ + _(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT, {.iparam = -1}) \ ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/src/lib/core/sio.c b/src/lib/core/sio.c index 8f25b8159..27ea1f0d1 100644 --- a/src/lib/core/sio.c +++ b/src/lib/core/sio.c @@ -41,6 +41,7 @@ #include "trivia/util.h" #include "exception.h" #include "uri/uri.h" +#include "errinj.h" const char * sio_socketname(int fd) @@ -222,7 +223,14 @@ sio_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) ssize_t sio_read(int fd, void *buf, size_t count) { - ssize_t n = read(fd, buf, count); + struct errinj *inj = errinj(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT); + ssize_t n; + if (inj != NULL && inj->iparam >= 0) { + n = read(fd, buf, inj->iparam); + } else { + n = read(fd, buf, count); + } + if (n < 0 && !sio_wouldblock(errno)) { /* * Happens typically when the client closes diff --git a/test/box/errinj.result b/test/box/errinj.result index 8e76b21b3..8734b1282 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -42,6 +42,8 @@ errinj.info() state: false ERRINJ_PORT_DUMP: state: false + ERRINJ_SOCKET_MAX_READ_SIZE: + state: -1 ERRINJ_WAL_IO: state: false ERRINJ_WAL_FALLOCATE: diff --git a/test/replication/long_row_timeout.result b/test/replication/long_row_timeout.result index 5b5a46d51..571db7d9e 100644 --- a/test/replication/long_row_timeout.result +++ b/test/replication/long_row_timeout.result @@ -25,32 +25,27 @@ box.info.replication[2].downstream.status --- - follow ... -default_memtx_max_tuple_size = box.cfg.memtx_max_tuple_size ---- -... +-- make applier incapable of reading rows in one go, so that it +-- yields a couple of times. test_run:cmd('switch replica') --- - true ... -box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024} +box.error.injection.set("ERRINJ_SOCKET_MAX_READ_SIZE", 1) --- +- ok ... test_run:cmd('switch default') --- - true ... -box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024} ---- -... --- insert some big rows which cannot be read in one go, so applier yields --- on read a couple of times. s = box.schema.space.create('test') --- ... _ = s:create_index('pk') --- ... -for i = 1,5 do box.space.test:replace{1, digest.urandom(20 * 1024 * 1024)} collectgarbage('collect') end +for i = 1,5 do box.space.test:replace{1, digest.urandom(1024)} collectgarbage('collect') end --- ... -- replication_disconnect_timeout is 4 * replication_timeout, check that @@ -100,9 +95,6 @@ test_run:cmd('delete server replica') test_run:cleanup_cluster() --- ... -box.cfg{memtx_max_tuple_size = default_memtx_max_tuple_size} ---- -... box.schema.user.revoke('guest', 'replication') --- ... diff --git a/test/replication/long_row_timeout.test.lua b/test/replication/long_row_timeout.test.lua index 6e1d38b11..65def3ba1 100644 --- a/test/replication/long_row_timeout.test.lua +++ b/test/replication/long_row_timeout.test.lua @@ -10,17 +10,15 @@ test_run:cmd('create server replica with rpl_master=default, script="replication test_run:cmd('start server replica') box.info.replication[2].downstream.status -default_memtx_max_tuple_size = box.cfg.memtx_max_tuple_size + +-- make applier incapable of reading rows in one go, so that it +-- yields a couple of times. test_run:cmd('switch replica') -box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024} +box.error.injection.set("ERRINJ_SOCKET_MAX_READ_SIZE", 1) test_run:cmd('switch default') -box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024} - --- insert some big rows which cannot be read in one go, so applier yields --- on read a couple of times. s = box.schema.space.create('test') _ = s:create_index('pk') -for i = 1,5 do box.space.test:replace{1, digest.urandom(20 * 1024 * 1024)} collectgarbage('collect') end +for i = 1,5 do box.space.test:replace{1, digest.urandom(1024)} collectgarbage('collect') end -- replication_disconnect_timeout is 4 * replication_timeout, check that -- replica doesn't time out too early. test_run:cmd('setopt delimiter ";"') @@ -42,7 +40,6 @@ test_run:cmd('stop server replica') test_run:cmd('cleanup server replica') test_run:cmd('delete server replica') test_run:cleanup_cluster() -box.cfg{memtx_max_tuple_size = default_memtx_max_tuple_size} box.schema.user.revoke('guest', 'replication') -- Rotate xlogs so as not to replicate the huge rows in -- 2.17.2 (Apple Git-113)