[PATCH] test: fix long_row_timeout.test.lua failure in parallel mode

Serge Petrenko sergepetrenko at tarantool.org
Mon Mar 25 20:52:30 MSK 2019


The test used to write big rows (20 mb in size), so when run in parallel
mode, it put high load on the disk and processor, which made appliers
time out multiple times during read, and caused the test to fail
occasionally.
So, instead of writing huge rows in test, introduce a new error
injection restricting sio from reading more than a couple of bytes per
request. This ensures that the test is still relevant and makes it a lot
more lightweight.

Closes #4062
---
https://github.com/tarantool/tarantool/tree/sp/gh-4062-early-timeout
https://github.com/tarantool/tarantool/issues/4062

 src/lib/core/errinj.h                      |  1 +
 src/lib/core/sio.c                         | 10 +++++++++-
 test/box/errinj.result                     |  2 ++
 test/replication/long_row_timeout.result   | 18 +++++-------------
 test/replication/long_row_timeout.test.lua | 13 +++++--------
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index 41783cc74..e4f06751d 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -125,6 +125,7 @@ struct errinj {
 	_(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \
 	_(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \
+	_(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT, {.iparam = -1}) \
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/src/lib/core/sio.c b/src/lib/core/sio.c
index 8f25b8159..27ea1f0d1 100644
--- a/src/lib/core/sio.c
+++ b/src/lib/core/sio.c
@@ -41,6 +41,7 @@
 #include "trivia/util.h"
 #include "exception.h"
 #include "uri/uri.h"
+#include "errinj.h"
 
 const char *
 sio_socketname(int fd)
@@ -222,7 +223,14 @@ sio_accept(int fd, struct sockaddr *addr, socklen_t *addrlen)
 ssize_t
 sio_read(int fd, void *buf, size_t count)
 {
-	ssize_t n = read(fd, buf, count);
+	struct errinj *inj = errinj(ERRINJ_SOCKET_MAX_READ_SIZE, ERRINJ_INT);
+	ssize_t n;
+	if (inj != NULL && inj->iparam >= 0) {
+		n = read(fd, buf, inj->iparam);
+	} else {
+		n = read(fd, buf, count);
+	}
+
 	if (n < 0 && !sio_wouldblock(errno)) {
 		/*
 		 * Happens typically when the client closes
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 8e76b21b3..8734b1282 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -42,6 +42,8 @@ errinj.info()
     state: false
   ERRINJ_PORT_DUMP:
     state: false
+  ERRINJ_SOCKET_MAX_READ_SIZE:
+    state: -1
   ERRINJ_WAL_IO:
     state: false
   ERRINJ_WAL_FALLOCATE:
diff --git a/test/replication/long_row_timeout.result b/test/replication/long_row_timeout.result
index 5b5a46d51..571db7d9e 100644
--- a/test/replication/long_row_timeout.result
+++ b/test/replication/long_row_timeout.result
@@ -25,32 +25,27 @@ box.info.replication[2].downstream.status
 ---
 - follow
 ...
-default_memtx_max_tuple_size = box.cfg.memtx_max_tuple_size
----
-...
+-- make applier incapable of reading rows in one go, so that it
+-- yields a couple of times.
 test_run:cmd('switch replica')
 ---
 - true
 ...
-box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024}
+box.error.injection.set("ERRINJ_SOCKET_MAX_READ_SIZE", 1)
 ---
+- ok
 ...
 test_run:cmd('switch default')
 ---
 - true
 ...
-box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024}
----
-...
--- insert some big rows which cannot be read in one go, so applier yields
--- on read a couple of times.
 s = box.schema.space.create('test')
 ---
 ...
 _ = s:create_index('pk')
 ---
 ...
-for i = 1,5 do box.space.test:replace{1, digest.urandom(20 * 1024 * 1024)} collectgarbage('collect') end
+for i = 1,5 do box.space.test:replace{1, digest.urandom(1024)} collectgarbage('collect') end
 ---
 ...
 -- replication_disconnect_timeout is 4 * replication_timeout, check that
@@ -100,9 +95,6 @@ test_run:cmd('delete server replica')
 test_run:cleanup_cluster()
 ---
 ...
-box.cfg{memtx_max_tuple_size = default_memtx_max_tuple_size}
----
-...
 box.schema.user.revoke('guest', 'replication')
 ---
 ...
diff --git a/test/replication/long_row_timeout.test.lua b/test/replication/long_row_timeout.test.lua
index 6e1d38b11..65def3ba1 100644
--- a/test/replication/long_row_timeout.test.lua
+++ b/test/replication/long_row_timeout.test.lua
@@ -10,17 +10,15 @@ test_run:cmd('create server replica with rpl_master=default, script="replication
 test_run:cmd('start server replica')
 box.info.replication[2].downstream.status
 
-default_memtx_max_tuple_size = box.cfg.memtx_max_tuple_size
+
+-- make applier incapable of reading rows in one go, so that it
+-- yields a couple of times.
 test_run:cmd('switch replica')
-box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024}
+box.error.injection.set("ERRINJ_SOCKET_MAX_READ_SIZE", 1)
 test_run:cmd('switch default')
-box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024}
-
--- insert some big rows which cannot be read in one go, so applier yields
--- on read a couple of times.
 s = box.schema.space.create('test')
 _ = s:create_index('pk')
-for i = 1,5 do box.space.test:replace{1, digest.urandom(20 * 1024 * 1024)} collectgarbage('collect') end
+for i = 1,5 do box.space.test:replace{1, digest.urandom(1024)} collectgarbage('collect') end
 -- replication_disconnect_timeout is 4 * replication_timeout, check that
 -- replica doesn't time out too early.
 test_run:cmd('setopt delimiter ";"')
@@ -42,7 +40,6 @@ test_run:cmd('stop server replica')
 test_run:cmd('cleanup server replica')
 test_run:cmd('delete server replica')
 test_run:cleanup_cluster()
-box.cfg{memtx_max_tuple_size = default_memtx_max_tuple_size}
 box.schema.user.revoke('guest', 'replication')
 
 -- Rotate xlogs so as not to replicate the huge rows in
-- 
2.17.2 (Apple Git-113)




More information about the Tarantool-patches mailing list