* [Tarantool-patches] [PATCH v4] test: fix flaky socket test
@ 2019-12-23 20:48 Ilya Kosarev
2019-12-25 1:29 ` Alexander Turenko
0 siblings, 1 reply; 6+ messages in thread
From: Ilya Kosarev @ 2019-12-23 20:48 UTC (permalink / raw)
To: tarantool-patches; +Cc: v.shpilevoy
socket.test had a number of flaky problems:
- socket readiness expectation
- race conditions on socket shutdown in emulation test cases
- UDP datagrams losses on mac os
- excessive random port searches
Now they are solved. Socket test is not fragile anymore.
Closes #4451, #4426, #4469
---
Branch: https://github.com/tarantool/tarantool/tree/i.kosarev/gh-4426-4451-fix-socket-test
Issues: https://github.com/tarantool/tarantool/issues/4426
https://github.com/tarantool/tarantool/issues/4451
https://github.com/tarantool/tarantool/issues/4469
Changes in v2:
- reconsidered socket readiness expectation
- reduced conditions waiting time
Changes in v3:
- reconsidered expectations to unify them
- simplified randomization
Changes in v4:
- left infinite timeouts alone
- wrapped UDP datagrams awaiting with wait_cond
- replaced manual port randomization with zero bind
- removed extra wait_cond wrappers
test/app/socket.result | 104 ++++++++++++++++++++++-----------------
test/app/socket.test.lua | 94 ++++++++++++++++++++---------------
test/app/suite.ini | 1 -
3 files changed, 114 insertions(+), 85 deletions(-)
diff --git a/test/app/socket.result b/test/app/socket.result
index fd299424c96..3c680125131 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -42,7 +42,7 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
---
- true
...
-WAIT_COND_TIME = 10
+WAIT_COND_TIME = 100
---
...
socket('PF_INET', 'SOCK_STREAM', 'tcp121222');
@@ -107,11 +107,11 @@ s:nonblock(true)
---
- true
...
-s:readable(.1)
+s:readable(WAIT_COND_TIME)
---
- true
...
-s:wait(.1)
+s:wait(WAIT_COND_TIME)
---
- RW
...
@@ -183,7 +183,7 @@ s:writable(0)
---
- true
...
-s:wait(.01)
+s:wait(WAIT_COND_TIME)
---
- RW
...
@@ -227,11 +227,11 @@ s:syswrite(ffi.cast('const char *', ping), #ping)
---
- 6
...
-s:readable(1)
+s:readable(WAIT_COND_TIME)
---
- true
...
-s:wait(.01)
+s:wait(WAIT_COND_TIME)
---
- RW
...
@@ -308,7 +308,7 @@ sc:nonblock(true)
---
- true
...
-sc:readable(.5)
+sc:readable(WAIT_COND_TIME)
---
- true
...
@@ -451,7 +451,7 @@ sc:sysconnect('127.0.0.1', s:name().port) or errno() == errno.EINPROGRESS
---
- true
...
-sc:writable(10)
+sc:writable(WAIT_COND_TIME)
---
- true
...
@@ -830,7 +830,7 @@ sc:sendto('127.0.0.1', s:name().port, 'Hello, world')
---
- 12
...
-s:readable(10)
+s:readable(WAIT_COND_TIME)
---
- true
...
@@ -842,7 +842,7 @@ sc:sendto('127.0.0.1', s:name().port, 'Hello, world, 2')
---
- 15
...
-s:readable(10)
+s:readable(WAIT_COND_TIME)
---
- true
...
@@ -898,7 +898,7 @@ sc:sendto('127.0.0.1', s:name().port, 'Hello, World!')
---
- 13
...
-s:readable(1)
+s:readable(WAIT_COND_TIME)
---
- true
...
@@ -913,7 +913,7 @@ s:sendto(from.host, from.port, 'Hello, hello!')
---
- 13
...
-sc:readable(1)
+sc:readable(WAIT_COND_TIME)
---
- true
...
@@ -1070,29 +1070,18 @@ s = nil
master = socket('PF_INET', 'SOCK_STREAM', 'tcp')
---
...
-master:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true)
+master:bind('127.0.0.1', 0)
---
- true
...
-port = 32768 + math.random(0, 32767)
+port = master:name().port
---
...
--- SO_REUSEADDR allows to bind to the same source addr:port twice,
--- so listen() can return EADDRINUSE and so we check it within
--- wait_cond().
-test_run:cmd("setopt delimiter ';'")
+master:listen()
---
- true
...
-test_run:wait_cond(function()
- local ok = master:bind('127.0.0.1', port)
- local ok = ok and master:listen()
- if not ok then
- port = 32768 + math.random(32768)
- return false, master:error()
- end
- return true
-end, WAIT_COND_TIME);
+test_run:cmd("setopt delimiter ';'")
---
- true
...
@@ -1822,8 +1811,14 @@ test_run:cmd("setopt delimiter ';'")
---
- true
...
+socket_opened = true
cfiber = fiber.create(function(sc, rch, wch)
- while sc:send(wch:get()) and rch:put(sc:receive("*l")) do end
+ while socket_opened do
+ sc:send(wch:get())
+ local data = sc:receive("*l")
+ if not socket_opened then sc:close() end
+ rch:put(data)
+ end
end, sc, rch, wch);
---
...
@@ -1936,6 +1931,9 @@ c:receive("*l")
---
-
...
+socket_opened = false
+---
+...
wch:put("Fu")
---
- true
@@ -1944,10 +1942,6 @@ c:send("354 Please type your message\n")
---
- 29
...
-sc:close()
----
-- 1
-...
c:receive("*l", "Line: ")
---
- null
@@ -2292,7 +2286,9 @@ sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
---
- 0
...
-received_message = receiving_socket:recv()
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv() \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2315,7 +2311,9 @@ sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
---
- 0
...
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2392,7 +2390,9 @@ sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
---
- 0
...
-received_message = receiving_socket:recv(512)
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv(512) \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2415,7 +2415,9 @@ sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
---
- 0
...
-received_message, from = receiving_socket:recvfrom(512)
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom(512) \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2452,7 +2454,9 @@ sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
---
- 1025
...
-received_message = receiving_socket:recv()
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv() \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2484,7 +2488,9 @@ sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
---
- 1025
...
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2523,7 +2529,9 @@ sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
---
- 1025
...
-received_message = receiving_socket:recv(512)
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv(512) \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2566,7 +2574,9 @@ sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
---
- 1025
...
-received_message, from = receiving_socket:recvfrom(512)
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom(512) \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2643,6 +2653,10 @@ sending_socket:sysconnect('127.0.0.1', listening_socket_port) or errno() == errn
---
- true
...
+listening_socket:readable(WAIT_COND_TIME)
+---
+- true
+...
receiving_socket = listening_socket:accept()
---
...
@@ -2651,7 +2665,9 @@ sending_socket:write(message)
- 513
...
-- case: recvfrom reads first 512 bytes from the message with tcp
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2816,7 +2832,7 @@ test_run:cmd("clear filter")
---
- true
...
--- case: sicket receive inconsistent behavior
+-- case: socket receive inconsistent behavior
chan = fiber.channel()
---
...
@@ -2826,10 +2842,10 @@ counter = 0
fn = function(s) counter = 0; while true do s:write((tostring(counter)):rep(chan:get())); counter = counter + 1 end end
---
...
-srv = socket.tcp_server('0.0.0.0', 8888, fn)
+srv = socket.tcp_server('0.0.0.0', 0, fn)
---
...
-s = socket.connect('localhost', 8888)
+s = socket.connect('localhost', srv:name().port)
---
...
chan:put(5)
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index c72d41763f4..4ed6c0f25f7 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -13,7 +13,7 @@ env = require('test_run')
test_run = env.new()
test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
-WAIT_COND_TIME = 10
+WAIT_COND_TIME = 100
socket('PF_INET', 'SOCK_STREAM', 'tcp121222');
@@ -39,8 +39,8 @@ s:nonblock(false)
s:nonblock()
s:nonblock(true)
-s:readable(.1)
-s:wait(.1)
+s:readable(WAIT_COND_TIME)
+s:wait(WAIT_COND_TIME)
socket.iowait(s:fd(), 'RW')
socket.iowait(s:fd(), 3)
socket.iowait(s:fd(), 'R')
@@ -58,7 +58,7 @@ s:errno() > 0
s:error()
s:writable(.00000000000001)
s:writable(0)
-s:wait(.01)
+s:wait(WAIT_COND_TIME)
socket.iowait(nil, nil, -1)
socket.iowait(nil, nil, 0.0001)
socket.iowait(-1, nil, 0.0001)
@@ -75,8 +75,8 @@ ping = msgpack.encode(string.len(ping)) .. ping
-- test syswrite with char *
s:syswrite(ffi.cast('const char *', ping), #ping)
-s:readable(1)
-s:wait(.01)
+s:readable(WAIT_COND_TIME)
+s:wait(WAIT_COND_TIME)
pong = s:sysread()
string.len(pong)
@@ -100,7 +100,7 @@ sc = socket('PF_INET', 'SOCK_STREAM', 'tcp')
sc:nonblock(false)
sc:sysconnect('127.0.0.1', s:name().port)
sc:nonblock(true)
-sc:readable(.5)
+sc:readable(WAIT_COND_TIME)
sc:sysread()
string.match(tostring(sc), ', peer') ~= nil
#sevres
@@ -140,7 +140,7 @@ s:listen(128)
sc = socket('PF_INET', 'SOCK_STREAM', 'tcp')
sc:sysconnect('127.0.0.1', s:name().port) or errno() == errno.EINPROGRESS
-sc:writable(10)
+sc:writable(WAIT_COND_TIME)
sc:write('Hello, world')
sa, addr = s:accept()
@@ -266,11 +266,11 @@ s = socket('AF_INET', 'SOCK_DGRAM', 'udp')
s:bind('127.0.0.1', 0)
sc = socket('AF_INET', 'SOCK_DGRAM', 'udp')
sc:sendto('127.0.0.1', s:name().port, 'Hello, world')
-s:readable(10)
+s:readable(WAIT_COND_TIME)
s:recv()
sc:sendto('127.0.0.1', s:name().port, 'Hello, world, 2')
-s:readable(10)
+s:readable(WAIT_COND_TIME)
d, from = s:recvfrom()
from.port > 0
from.port = 'Random port'
@@ -286,11 +286,11 @@ sc = socket('AF_INET', 'SOCK_DGRAM', 'udp')
sc:nonblock(true)
sc:sendto('127.0.0.1', s:name().port)
sc:sendto('127.0.0.1', s:name().port, 'Hello, World!')
-s:readable(1)
+s:readable(WAIT_COND_TIME)
data, from = s:recvfrom(10)
data
s:sendto(from.host, from.port, 'Hello, hello!')
-sc:readable(1)
+sc:readable(WAIT_COND_TIME)
data_r, from_r = sc:recvfrom()
data_r
from_r.host
@@ -342,21 +342,10 @@ s = nil
-- random port
master = socket('PF_INET', 'SOCK_STREAM', 'tcp')
-master:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true)
-port = 32768 + math.random(0, 32767)
--- SO_REUSEADDR allows to bind to the same source addr:port twice,
--- so listen() can return EADDRINUSE and so we check it within
--- wait_cond().
+master:bind('127.0.0.1', 0)
+port = master:name().port
+master:listen()
test_run:cmd("setopt delimiter ';'")
-test_run:wait_cond(function()
- local ok = master:bind('127.0.0.1', port)
- local ok = ok and master:listen()
- if not ok then
- port = 32768 + math.random(32768)
- return false, master:error()
- end
- return true
-end, WAIT_COND_TIME);
function gh361()
local s = socket('PF_INET', 'SOCK_STREAM', 'tcp')
s:sysconnect('127.0.0.1', port)
@@ -619,8 +608,14 @@ s:settimeout(100500)
rch, wch = fiber.channel(1), fiber.channel(1)
sc = socket.connect(host, port)
test_run:cmd("setopt delimiter ';'")
+socket_opened = true
cfiber = fiber.create(function(sc, rch, wch)
- while sc:send(wch:get()) and rch:put(sc:receive("*l")) do end
+ while socket_opened do
+ sc:send(wch:get())
+ local data = sc:receive("*l")
+ if not socket_opened then sc:close() end
+ rch:put(data)
+ end
end, sc, rch, wch);
test_run:cmd("setopt delimiter ''");
@@ -651,9 +646,9 @@ rch:get()
wch:put("DATA\n")
c:receive(4)
c:receive("*l")
+socket_opened = false
wch:put("Fu")
c:send("354 Please type your message\n")
-sc:close()
c:receive("*l", "Line: ")
c:receive()
c:receive(10)
@@ -778,7 +773,9 @@ e == errno.EAGAIN -- expected true
-- case: recv, zero datagram
sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
-received_message = receiving_socket:recv()
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv() \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == '' -- expected true
received_message
@@ -786,7 +783,9 @@ e == 0 -- expected true
-- case: recvfrom, zero datagram
sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == '' -- expected true
received_message
@@ -812,7 +811,9 @@ e == errno.EAGAIN -- expected true
-- case: recv, zero datagram, explicit size
sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
-received_message = receiving_socket:recv(512)
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv(512) \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == '' -- expected true
received_message
@@ -820,7 +821,9 @@ e == 0 -- expected true
-- case: recvfrom, zero datagram, explicit size
sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
-received_message, from = receiving_socket:recvfrom(512)
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom(512) \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == '' -- expected true
received_message
@@ -833,7 +836,9 @@ message = string.rep('x', message_len)
-- case: recv, non-zero length datagram, the buffer size should be evaluated
sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
-received_message = receiving_socket:recv()
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv() \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == message -- expected true
received_message:len()
@@ -844,7 +849,9 @@ e
-- case: recvfrom, non-zero length datagram, the buffer size should be
-- evaluated
sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == message -- expected true
received_message:len()
@@ -856,7 +863,9 @@ e
-- case: recv truncates a datagram larger then the buffer of an explicit size
sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
-received_message = receiving_socket:recv(512)
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv(512) \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == message:sub(1, 512) -- expected true
received_message:len() == 512 -- expected true
@@ -872,7 +881,9 @@ message = string.rep('y', message_len)
-- case: recvfrom truncates a datagram larger then the buffer of an explicit size
sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
-received_message, from = receiving_socket:recvfrom(512)
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom(512) \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == message:sub(1, 512) -- expected true
received_message:len() == 512 -- expected true
@@ -896,11 +907,14 @@ listening_socket:listen()
listening_socket_port = listening_socket:name().port
sending_socket = socket('AF_INET', 'SOCK_STREAM', 'tcp')
sending_socket:sysconnect('127.0.0.1', listening_socket_port) or errno() == errno.EINPROGRESS
+listening_socket:readable(WAIT_COND_TIME)
receiving_socket = listening_socket:accept()
sending_socket:write(message)
-- case: recvfrom reads first 512 bytes from the message with tcp
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == message:sub(1, 512) -- expected true
received_message:len() == 512 -- expected true
@@ -960,12 +974,12 @@ server:close()
test_run:cmd("clear filter")
--- case: sicket receive inconsistent behavior
+-- case: socket receive inconsistent behavior
chan = fiber.channel()
counter = 0
fn = function(s) counter = 0; while true do s:write((tostring(counter)):rep(chan:get())); counter = counter + 1 end end
-srv = socket.tcp_server('0.0.0.0', 8888, fn)
-s = socket.connect('localhost', 8888)
+srv = socket.tcp_server('0.0.0.0', 0, fn)
+s = socket.connect('localhost', srv:name().port)
chan:put(5)
chan:put(5)
s:receive(5)
diff --git a/test/app/suite.ini b/test/app/suite.ini
index 79432e29a76..dd802d98cf9 100644
--- a/test/app/suite.ini
+++ b/test/app/suite.ini
@@ -7,4 +7,3 @@ use_unix_sockets = True
use_unix_sockets_iproto = True
is_parallel = True
pretest_clean = True
-fragile = socket.test.lua ; gh-4426 gh-4451
--
2.17.1
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Tarantool-patches] [PATCH v4] test: fix flaky socket test
2019-12-23 20:48 [Tarantool-patches] [PATCH v4] test: fix flaky socket test Ilya Kosarev
@ 2019-12-25 1:29 ` Alexander Turenko
2019-12-25 21:06 ` Ilya Kosarev
2019-12-25 23:47 ` Alexander Turenko
0 siblings, 2 replies; 6+ messages in thread
From: Alexander Turenko @ 2019-12-25 1:29 UTC (permalink / raw)
To: Ilya Kosarev; +Cc: tarantool-patches, v.shpilevoy
Thanks for the patch!
I really appreciate the work in direction toward stable tests. Unstable
CI sometimes hides real problems, so actually it is direction to make
tarantool itself more stable.
All changes look correct. There are places however that we can polish a
bit. Please, consider my comments below.
How I tested it:
$ (cd test && ./test-run.py -j 30 --force $(yes app/socket | head -n 1000))
Observed the following fails:
Case 1.
[008] Test failed! Result content mismatch:
[008] --- app/socket.result Tue Dec 24 15:55:40 2019
[008] +++ app/socket.reject Tue Dec 24 15:56:24 2019
[008] @@ -1371,7 +1371,7 @@
[008] ...
[008] cnt
[008] ---
[008] -- 2
[008] +- 3
[008] ...
[008] client:write('hi')
[008] ---
Another instance is connected to our server. I found that it is
socket.connect() from 'case: socket receive inconsistent behavior' (and
this is the reason of case 3 below). The code the the following:
| srv = socket.tcp_server('0.0.0.0', 0, fn)
| s = socket.connect('localhost', srv:name().port)
localhost may be resolved into ::1 (ipv6) and 127.0.0.1 (ipv4). Linux
allows to bind the same port number on different addresses.
getaddrinfo() on Linux give ipv6 addr/port first (usually, depends
on settings) and if another process listens on ::1 on the same port as
we bound on ipv4, we'll connect to this process.
The change that eliminates the fail above for me:
@@ -979,7 +983,7 @@ chan = fiber.channel()
counter = 0
fn = function(s) counter = 0; while true do s:write((tostring(counter)):rep(chan:get())); counter = counter + 1 end end
srv = socket.tcp_server('0.0.0.0', 0, fn)
-s = socket.connect('localhost', srv:name().port)
+s = socket.connect('127.0.0.1', srv:name().port)
chan:put(5)
chan:put(5)
s:receive(5)
(CCed Maria as author of the test case.)
However it is possible that there are other similar cases. I suggest to
verify the test against them. Please, beware of changing negative cases
(when we expect that a timeout will occur): setting large timeout on
them would significantly increase the test run time (we'll always wait
100 seconds at the point).
BTW, why not to listen on 127.0.0.1 rather then on all interfaces
(0.0.0.0)?
BTW, unix sockets may help to don't bother with clashes of different
kinds.
----
[018] --- app/socket.result Tue Dec 24 15:58:20 2019
[018] +++ app/socket.reject Tue Dec 24 15:59:29 2019
[018] @@ -2822,7 +2822,7 @@
[018] ...
[018] client:read(1, 5) == ''
[018] ---
[018] -- true
[018] +- false
[018] ...
[018] server:close()
[018] ---
I printed client_.errno. It is 110 (ETIMEDOUT). It seems it worth to
replace 5 with WAIT_COND_TIME (or, as TIMEOUT as I proposed to name it
below). Maybe there are other such places.
----
Case 3.
Test hung! Result content mismatch:
--- app/socket.result Wed Dec 25 01:04:26 2019
+++ var/003_app/socket.result Wed Dec 25 01:05:22 2019
@@ -2857,34 +2857,3 @@
---
...
chan:put(5)
----
-- true
-...
-chan:put(5)
----
-- true
-...
-s:receive(5)
----
-- '00000'
-...
-chan:put(5)
----
-- true
-...
-s:settimeout(1)
----
-- 1
-...
-s:receive('*a')
----
-- '1111122222'
-...
-s:close()
----
-- 1
-...
-srv:close()
----
-- true
-...
Explained above (see case 1).
----
[017] --- app/socket.result Wed Dec 25 01:18:29 2019
[017] +++ app/socket.reject Wed Dec 25 01:19:30 2019
[017] @@ -40,7 +40,7 @@
[017] ...
[017] test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
[017] ---
[017] -- true
[017] +- Hello, world
It seems we connected to some of our socket server rather then test-run.
Looks like the same ipv4 / ipv6 problem.
After the following patch for test-run I don't see it anymore (don't
sure I made enough runs, however):
diff --git a/lib/test_suite.py b/lib/test_suite.py
index 84b99f4..0a137c3 100644
--- a/lib/test_suite.py
+++ b/lib/test_suite.py
@@ -197,7 +197,7 @@ class TestSuite:
def start_server(self, server):
# create inspector daemon for cluster tests
inspector = TarantoolInspector(
- 'localhost', server.inspector_port
+ '127.0.0.1', server.inspector_port
)
inspector.start()
# fixme: remove this string if we fix all legacy tests
diff --git a/test_run.lua b/test_run.lua
index e37b559..fc1b833 100644
--- a/test_run.lua
+++ b/test_run.lua
@@ -458,7 +458,7 @@ local inspector_methods = {
local function inspector_new(host, port)
local inspector = {}
- inspector.host = host or 'localhost'
+ inspector.host = host or '127.0.0.1'
inspector.port = port or tonumber(os.getenv('INSPECTOR'))
if inspector.port == nil then
error('Inspector not started')
----
On Mon, Dec 23, 2019 at 11:48:33PM +0300, Ilya Kosarev wrote:
> socket.test had a number of flaky problems:
> - socket readiness expectation
> - race conditions on socket shutdown in emulation test cases
> - UDP datagrams losses on mac os
> - excessive random port searches
> Now they are solved. Socket test is not fragile anymore.
>
> Closes #4451, #4426, #4469
> ---
> Branch: https://github.com/tarantool/tarantool/tree/i.kosarev/gh-4426-4451-fix-socket-test
> Issues: https://github.com/tarantool/tarantool/issues/4426
> https://github.com/tarantool/tarantool/issues/4451
> https://github.com/tarantool/tarantool/issues/4469
>
> Changes in v2:
> - reconsidered socket readiness expectation
> - reduced conditions waiting time
>
> Changes in v3:
> - reconsidered expectations to unify them
> - simplified randomization
>
> Changes in v4:
> - left infinite timeouts alone
> - wrapped UDP datagrams awaiting with wait_cond
That's my bad. As I see, other parts of the test follows the following
pattern for waiting UDP packets:
| s:readable(TIMEOUT)
| s:recv()
test_run:wait_cond() is correct solution too, but :readable() looks a
bit simpler. Anyway, let's use one that you like more.
(It is the only change I propose, which I didn't verify.)
> - replaced manual port randomization with zero bind
Nice idea!
> - removed extra wait_cond wrappers
>
> test/app/socket.result | 104 ++++++++++++++++++++++-----------------
> test/app/socket.test.lua | 94 ++++++++++++++++++++---------------
> test/app/suite.ini | 1 -
> 3 files changed, 114 insertions(+), 85 deletions(-)
> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
> index c72d41763f4..4ed6c0f25f7 100644
> --- a/test/app/socket.test.lua
> +++ b/test/app/socket.test.lua
> @@ -13,7 +13,7 @@ env = require('test_run')
> test_run = env.new()
> test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
>
> -WAIT_COND_TIME = 10
> +WAIT_COND_TIME = 100
Nit: I would name it just TIMEOUT, since it is not only for
test_run:wait_cond() now.
> @@ -619,8 +608,14 @@ s:settimeout(100500)
> rch, wch = fiber.channel(1), fiber.channel(1)
> sc = socket.connect(host, port)
> test_run:cmd("setopt delimiter ';'")
> +socket_opened = true
> cfiber = fiber.create(function(sc, rch, wch)
> - while sc:send(wch:get()) and rch:put(sc:receive("*l")) do end
> + while socket_opened do
> + sc:send(wch:get())
> + local data = sc:receive("*l")
> + if not socket_opened then sc:close() end
> + rch:put(data)
> + end
I would use existing channel to signal the fiber to close the socket: it
looks simpler then using a channel and a variable both. Proposed diff:
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index 4ed6c0f25..0fe22709a 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -608,13 +608,16 @@ s:settimeout(100500)
rch, wch = fiber.channel(1), fiber.channel(1)
sc = socket.connect(host, port)
test_run:cmd("setopt delimiter ';'")
-socket_opened = true
cfiber = fiber.create(function(sc, rch, wch)
- while socket_opened do
- sc:send(wch:get())
- local data = sc:receive("*l")
- if not socket_opened then sc:close() end
- rch:put(data)
+ while true do
+ local outgoing_data = wch:get()
+ if outgoing_data == nil then
+ sc:close()
+ break
+ end
+ sc:send(outgoing_data)
+ local incoming_data = sc:receive("*l")
+ rch:put(incoming_data)
end
end, sc, rch, wch);
test_run:cmd("setopt delimiter ''");
@@ -646,9 +649,10 @@ rch:get()
wch:put("DATA\n")
c:receive(4)
c:receive("*l")
-socket_opened = false
+rch:get()
wch:put("Fu")
c:send("354 Please type your message\n")
+wch:put(nil) -- EOF
c:receive("*l", "Line: ")
c:receive()
c:receive(10)
> end, sc, rch, wch);
> test_run:cmd("setopt delimiter ''");
>
> @@ -651,9 +646,9 @@ rch:get()
> wch:put("DATA\n")
> c:receive(4)
> c:receive("*l")
> +socket_opened = false
> wch:put("Fu")
> c:send("354 Please type your message\n")
> -sc:close()
> c:receive("*l", "Line: ")
> c:receive()
> c:receive(10)
> @@ -778,7 +773,9 @@ e == errno.EAGAIN -- expected true
>
> -- case: recv, zero datagram
> sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
> -received_message = receiving_socket:recv()
> +received_message = test_run:wait_cond(function() \
> + return receiving_socket:recv() \
> +end, WAIT_COND_TIME)
Proposed to use :readable() (see above).
> --- case: sicket receive inconsistent behavior
> +-- case: socket receive inconsistent behavior
> chan = fiber.channel()
> counter = 0
> fn = function(s) counter = 0; while true do s:write((tostring(counter)):rep(chan:get())); counter = counter + 1 end end
> -srv = socket.tcp_server('0.0.0.0', 8888, fn)
> -s = socket.connect('localhost', 8888)
> +srv = socket.tcp_server('0.0.0.0', 0, fn)
> +s = socket.connect('localhost', srv:name().port)
Proposed to connect to 127.0.0.1 (see above).
> chan:put(5)
> chan:put(5)
> s:receive(5)
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Tarantool-patches] [PATCH v4] test: fix flaky socket test
2019-12-25 1:29 ` Alexander Turenko
@ 2019-12-25 21:06 ` Ilya Kosarev
2019-12-25 23:47 ` Alexander Turenko
1 sibling, 0 replies; 6+ messages in thread
From: Ilya Kosarev @ 2019-12-25 21:06 UTC (permalink / raw)
To: Alexander Turenko; +Cc: tarantool-patches, v.shpilevoy
[-- Attachment #1: Type: text/plain, Size: 10388 bytes --]
Hi!
Thanks for your review!
Sent v5 including all proposed enhancements.
>Среда, 25 декабря 2019, 4:29 +03:00 от Alexander Turenko <alexander.turenko@tarantool.org>:
>
>Thanks for the patch!
>
>I really appreciate the work in direction toward stable tests. Unstable
>CI sometimes hides real problems, so actually it is direction to make
>tarantool itself more stable.
>
>All changes look correct. There are places however that we can polish a
>bit. Please, consider my comments below.
>
>How I tested it:
>
>$ (cd test && ./test-run.py -j 30 --force $(yes app/socket | head -n 1000))
>
>Observed the following fails:
>
>Case 1.
>
>[008] Test failed! Result content mismatch:
>[008] --- app/socket.result Tue Dec 24 15:55:40 2019
>[008] +++ app/socket.reject Tue Dec 24 15:56:24 2019
>[008] @@ -1371,7 +1371,7 @@
>[008] ...
>[008] cnt
>[008] ---
>[008] -- 2
>[008] +- 3
>[008] ...
>[008] client:write('hi')
>[008] ---
>
>Another instance is connected to our server. I found that it is
>socket.connect() from 'case: socket receive inconsistent behavior' (and
>this is the reason of case 3 below). The code the the following:
>
> | srv = socket.tcp_server('0.0.0.0', 0, fn)
> | s = socket.connect('localhost', srv:name().port)
>
>localhost may be resolved into ::1 (ipv6) and 127.0.0.1 (ipv4). Linux
>allows to bind the same port number on different addresses.
>getaddrinfo() on Linux give ipv6 addr/port first (usually, depends
>on settings) and if another process listens on ::1 on the same port as
>we bound on ipv4, we'll connect to this process.
>
>The change that eliminates the fail above for me:
>
>@@ -979,7 +983,7 @@ chan = fiber.channel()
> counter = 0
> fn = function(s) counter = 0; while true do s:write((tostring(counter)):rep(chan:get())); counter = counter + 1 end end
> srv = socket.tcp_server('0.0.0.0', 0, fn)
>-s = socket.connect('localhost', srv:name().port)
>+s = socket.connect('127.0.0.1', srv:name().port)
> chan:put(5)
> chan:put(5)
> s:receive(5)
>
>(CCed Maria as author of the test case.)
>
>However it is possible that there are other similar cases. I suggest to
>verify the test against them. Please, beware of changing negative cases
>(when we expect that a timeout will occur): setting large timeout on
>them would significantly increase the test run time (we'll always wait
>100 seconds at the point).
>
>BTW, why not to listen on 127.0.0.1 rather then on all interfaces
>(0.0.0.0)?
>
>BTW, unix sockets may help to don't bother with clashes of different
>kinds.
>
>----
>
>[018] --- app/socket.result Tue Dec 24 15:58:20 2019
>[018] +++ app/socket.reject Tue Dec 24 15:59:29 2019
>[018] @@ -2822,7 +2822,7 @@
>[018] ...
>[018] client:read(1, 5) == ''
>[018] ---
>[018] -- true
>[018] +- false
>[018] ...
>[018] server:close()
>[018] ---
>
>I printed client_.errno. It is 110 (ETIMEDOUT). It seems it worth to
>replace 5 with WAIT_COND_TIME (or, as TIMEOUT as I proposed to name it
>below). Maybe there are other such places.
>
>----
>
>Case 3.
>
>Test hung! Result content mismatch:
>--- app/socket.result Wed Dec 25 01:04:26 2019
>+++ var/003_app/socket.result Wed Dec 25 01:05:22 2019
>@@ -2857,34 +2857,3 @@
> ---
> ...
> chan:put(5)
>----
>-- true
>-...
>-chan:put(5)
>----
>-- true
>-...
>-s:receive(5)
>----
>-- '00000'
>-...
>-chan:put(5)
>----
>-- true
>-...
>-s:settimeout(1)
>----
>-- 1
>-...
>-s:receive('*a')
>----
>-- '1111122222'
>-...
>-s:close()
>----
>-- 1
>-...
>-srv:close()
>----
>-- true
>-...
>
>Explained above (see case 1).
>
>----
>
>[017] --- app/socket.result Wed Dec 25 01:18:29 2019
>[017] +++ app/socket.reject Wed Dec 25 01:19:30 2019
>[017] @@ -40,7 +40,7 @@
>[017] ...
>[017] test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
>[017] ---
>[017] -- true
>[017] +- Hello, world
>
>It seems we connected to some of our socket server rather then test-run.
>Looks like the same ipv4 / ipv6 problem.
>
>After the following patch for test-run I don't see it anymore (don't
>sure I made enough runs, however):
>
>diff --git a/lib/test_suite.py b/lib/test_suite.py
>index 84b99f4..0a137c3 100644
>--- a/lib/test_suite.py
>+++ b/lib/test_suite.py
>@@ -197,7 +197,7 @@ class TestSuite:
> def start_server(self, server):
> # create inspector daemon for cluster tests
> inspector = TarantoolInspector(
>- 'localhost', server.inspector_port
>+ '127.0.0.1', server.inspector_port
> )
> inspector.start()
> # fixme: remove this string if we fix all legacy tests
>diff --git a/test_run.lua b/test_run.lua
>index e37b559..fc1b833 100644
>--- a/test_run.lua
>+++ b/test_run.lua
>@@ -458,7 +458,7 @@ local inspector_methods = {
> local function inspector_new(host, port)
> local inspector = {}
>
>- inspector.host = host or 'localhost'
>+ inspector.host = host or '127.0.0.1'
> inspector.port = port or tonumber(os.getenv('INSPECTOR'))
> if inspector.port == nil then
> error('Inspector not started')
>
>----
>
>On Mon, Dec 23, 2019 at 11:48:33PM +0300, Ilya Kosarev wrote:
>> socket.test had a number of flaky problems:
>> - socket readiness expectation
>> - race conditions on socket shutdown in emulation test cases
>> - UDP datagrams losses on mac os
>> - excessive random port searches
>> Now they are solved. Socket test is not fragile anymore.
>>
>> Closes #4451, #4426, #4469
>> ---
>> Branch: https://github.com/tarantool/tarantool/tree/i.kosarev/gh-4426-4451-fix-socket-test
>> Issues: https://github.com/tarantool/tarantool/issues/4426
>> https://github.com/tarantool/tarantool/issues/4451
>> https://github.com/tarantool/tarantool/issues/4469
>>
>> Changes in v2:
>> - reconsidered socket readiness expectation
>> - reduced conditions waiting time
>>
>> Changes in v3:
>> - reconsidered expectations to unify them
>> - simplified randomization
>>
>> Changes in v4:
>> - left infinite timeouts alone
>> - wrapped UDP datagrams awaiting with wait_cond
>
>That's my bad. As I see, other parts of the test follows the following
>pattern for waiting UDP packets:
>
> | s:readable(TIMEOUT)
> | s:recv()
>
>test_run:wait_cond() is correct solution too, but :readable() looks a
>bit simpler. Anyway, let's use one that you like more.
>
>(It is the only change I propose, which I didn't verify.)
>
>> - replaced manual port randomization with zero bind
>
>Nice idea!
>
>> - removed extra wait_cond wrappers
>>
>> test/app/socket.result | 104 ++++++++++++++++++++++-----------------
>> test/app/socket.test.lua | 94 ++++++++++++++++++++---------------
>> test/app/suite.ini | 1 -
>> 3 files changed, 114 insertions(+), 85 deletions(-)
>
>> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
>> index c72d41763f4..4ed6c0f25f7 100644
>> --- a/test/app/socket.test.lua
>> +++ b/test/app/socket.test.lua
>> @@ -13,7 +13,7 @@ env = require('test_run')
>> test_run = env.new()
>> test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
>>
>> -WAIT_COND_TIME = 10
>> +WAIT_COND_TIME = 100
>
>Nit: I would name it just TIMEOUT, since it is not only for
>test_run:wait_cond() now.
>
>> @@ -619,8 +608,14 @@ s:settimeout(100500)
>> rch, wch = fiber.channel(1), fiber.channel(1)
>> sc = socket.connect(host, port)
>> test_run:cmd("setopt delimiter ';'")
>> +socket_opened = true
>> cfiber = fiber.create(function(sc, rch, wch)
>> - while sc:send(wch:get()) and rch:put(sc:receive("*l")) do end
>> + while socket_opened do
>> + sc:send(wch:get())
>> + local data = sc:receive("*l")
>> + if not socket_opened then sc:close() end
>> + rch:put(data)
>> + end
>
>I would use existing channel to signal the fiber to close the socket: it
>looks simpler then using a channel and a variable both. Proposed diff:
>
>diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
>index 4ed6c0f25..0fe22709a 100644
>--- a/test/app/socket.test.lua
>+++ b/test/app/socket.test.lua
>@@ -608,13 +608,16 @@ s:settimeout(100500)
> rch, wch = fiber.channel(1), fiber.channel(1)
> sc = socket.connect(host, port)
> test_run:cmd("setopt delimiter ';'")
>-socket_opened = true
> cfiber = fiber.create(function(sc, rch, wch)
>- while socket_opened do
>- sc:send(wch:get())
>- local data = sc:receive("*l")
>- if not socket_opened then sc:close() end
>- rch:put(data)
>+ while true do
>+ local outgoing_data = wch:get()
>+ if outgoing_data == nil then
>+ sc:close()
>+ break
>+ end
>+ sc:send(outgoing_data)
>+ local incoming_data = sc:receive("*l")
>+ rch:put(incoming_data)
> end
> end, sc, rch, wch);
> test_run:cmd("setopt delimiter ''");
>@@ -646,9 +649,10 @@ rch:get()
> wch:put("DATA\n")
> c:receive(4)
> c:receive("*l")
>-socket_opened = false
>+rch:get()
> wch:put("Fu")
> c:send("354 Please type your message\n")
>+wch:put(nil) -- EOF
> c:receive("*l", "Line: ")
> c:receive()
> c:receive(10)
>
>> end, sc, rch, wch);
>> test_run:cmd("setopt delimiter ''");
>>
>> @@ -651,9 +646,9 @@ rch:get()
>> wch:put("DATA\n")
>> c:receive(4)
>> c:receive("*l")
>> +socket_opened = false
>> wch:put("Fu")
>> c:send("354 Please type your message\n")
>> -sc:close()
>> c:receive("*l", "Line: ")
>> c:receive()
>> c:receive(10)
>
>> @@ -778,7 +773,9 @@ e == errno.EAGAIN -- expected true
>>
>> -- case: recv, zero datagram
>> sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
>> -received_message = receiving_socket:recv()
>> +received_message = test_run:wait_cond(function() \
>> + return receiving_socket:recv() \
>> +end, WAIT_COND_TIME)
>
>Proposed to use :readable() (see above).
>
>> --- case: sicket receive inconsistent behavior
>> +-- case: socket receive inconsistent behavior
>> chan = fiber.channel()
>> counter = 0
>> fn = function(s) counter = 0; while true do s:write((tostring(counter)):rep(chan:get())); counter = counter + 1 end end
>> -srv = socket.tcp_server('0.0.0.0', 8888, fn)
>> -s = socket.connect('localhost', 8888)
>> +srv = socket.tcp_server('0.0.0.0', 0, fn)
>> +s = socket.connect('localhost', srv:name().port)
>
>Proposed to connect to 127.0.0.1 (see above).
>
>> chan:put(5)
>> chan:put(5)
>> s:receive(5)
--
Ilya Kosarev
[-- Attachment #2: Type: text/html, Size: 13523 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Tarantool-patches] [PATCH v4] test: fix flaky socket test
2019-12-25 1:29 ` Alexander Turenko
2019-12-25 21:06 ` Ilya Kosarev
@ 2019-12-25 23:47 ` Alexander Turenko
1 sibling, 0 replies; 6+ messages in thread
From: Alexander Turenko @ 2019-12-25 23:47 UTC (permalink / raw)
To: Ilya Kosarev; +Cc: tarantool-patches, v.shpilevoy
> [017] --- app/socket.result Wed Dec 25 01:18:29 2019
> [017] +++ app/socket.reject Wed Dec 25 01:19:30 2019
> [017] @@ -40,7 +40,7 @@
> [017] ...
> [017] test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
> [017] ---
> [017] -- true
> [017] +- Hello, world
>
> It seems we connected to some of our socket server rather then test-run.
> Looks like the same ipv4 / ipv6 problem.
>
> After the following patch for test-run I don't see it anymore (don't
> sure I made enough runs, however):
>
> diff --git a/lib/test_suite.py b/lib/test_suite.py
> index 84b99f4..0a137c3 100644
> --- a/lib/test_suite.py
> +++ b/lib/test_suite.py
> @@ -197,7 +197,7 @@ class TestSuite:
> def start_server(self, server):
> # create inspector daemon for cluster tests
> inspector = TarantoolInspector(
> - 'localhost', server.inspector_port
> + '127.0.0.1', server.inspector_port
> )
> inspector.start()
> # fixme: remove this string if we fix all legacy tests
> diff --git a/test_run.lua b/test_run.lua
> index e37b559..fc1b833 100644
> --- a/test_run.lua
> +++ b/test_run.lua
> @@ -458,7 +458,7 @@ local inspector_methods = {
> local function inspector_new(host, port)
> local inspector = {}
>
> - inspector.host = host or 'localhost'
> + inspector.host = host or '127.0.0.1'
> inspector.port = port or tonumber(os.getenv('INSPECTOR'))
> if inspector.port == nil then
> error('Inspector not started')
Patch: https://github.com/tarantool/test-run/pull/197
WBR, Alexander Turenko.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Tarantool-patches] [PATCH v4] test: fix flaky socket test
2019-12-24 12:58 Ilya Kosarev
@ 2019-12-24 16:33 ` Vladislav Shpilevoy
0 siblings, 0 replies; 6+ messages in thread
From: Vladislav Shpilevoy @ 2019-12-24 16:33 UTC (permalink / raw)
To: Ilya Kosarev, tarantool-patches
Thanks for the fixes!
LGTM.
^ permalink raw reply [flat|nested] 6+ messages in thread
* [Tarantool-patches] [PATCH v4] test: fix flaky socket test
@ 2019-12-24 12:58 Ilya Kosarev
2019-12-24 16:33 ` Vladislav Shpilevoy
0 siblings, 1 reply; 6+ messages in thread
From: Ilya Kosarev @ 2019-12-24 12:58 UTC (permalink / raw)
To: tarantool-patches; +Cc: v.shpilevoy
socket.test had a number of flaky problems:
- socket readiness expectation
- race conditions on socket shutdown in emulation test cases
- UDP datagrams losses on mac os
- excessive random port searches
Now they are solved. Socket test is not fragile anymore.
Closes #4426
Closes #4451
Closes #4469
---
Branch: https://github.com/tarantool/tarantool/tree/i.kosarev/gh-4426-4451-fix-socket-test
Issues: https://github.com/tarantool/tarantool/issues/4426
https://github.com/tarantool/tarantool/issues/4451
https://github.com/tarantool/tarantool/issues/4469
Changes in v2:
- reconsidered socket readiness expectation
- reduced conditions waiting time
Changes in v3:
- reconsidered expectations to unify them
- simplified randomization
Changes in v4:
- left infinite timeouts alone
- wrapped UDP datagrams awaiting with wait_cond
- replaced manual port randomization with zero bind
- removed extra wait_cond wrappers
v4.1: Repushed with commit message rewording
test/app/socket.result | 104 ++++++++++++++++++++++-----------------
test/app/socket.test.lua | 94 ++++++++++++++++++++---------------
test/app/suite.ini | 1 -
3 files changed, 114 insertions(+), 85 deletions(-)
diff --git a/test/app/socket.result b/test/app/socket.result
index fd299424c96..3c680125131 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -42,7 +42,7 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
---
- true
...
-WAIT_COND_TIME = 10
+WAIT_COND_TIME = 100
---
...
socket('PF_INET', 'SOCK_STREAM', 'tcp121222');
@@ -107,11 +107,11 @@ s:nonblock(true)
---
- true
...
-s:readable(.1)
+s:readable(WAIT_COND_TIME)
---
- true
...
-s:wait(.1)
+s:wait(WAIT_COND_TIME)
---
- RW
...
@@ -183,7 +183,7 @@ s:writable(0)
---
- true
...
-s:wait(.01)
+s:wait(WAIT_COND_TIME)
---
- RW
...
@@ -227,11 +227,11 @@ s:syswrite(ffi.cast('const char *', ping), #ping)
---
- 6
...
-s:readable(1)
+s:readable(WAIT_COND_TIME)
---
- true
...
-s:wait(.01)
+s:wait(WAIT_COND_TIME)
---
- RW
...
@@ -308,7 +308,7 @@ sc:nonblock(true)
---
- true
...
-sc:readable(.5)
+sc:readable(WAIT_COND_TIME)
---
- true
...
@@ -451,7 +451,7 @@ sc:sysconnect('127.0.0.1', s:name().port) or errno() == errno.EINPROGRESS
---
- true
...
-sc:writable(10)
+sc:writable(WAIT_COND_TIME)
---
- true
...
@@ -830,7 +830,7 @@ sc:sendto('127.0.0.1', s:name().port, 'Hello, world')
---
- 12
...
-s:readable(10)
+s:readable(WAIT_COND_TIME)
---
- true
...
@@ -842,7 +842,7 @@ sc:sendto('127.0.0.1', s:name().port, 'Hello, world, 2')
---
- 15
...
-s:readable(10)
+s:readable(WAIT_COND_TIME)
---
- true
...
@@ -898,7 +898,7 @@ sc:sendto('127.0.0.1', s:name().port, 'Hello, World!')
---
- 13
...
-s:readable(1)
+s:readable(WAIT_COND_TIME)
---
- true
...
@@ -913,7 +913,7 @@ s:sendto(from.host, from.port, 'Hello, hello!')
---
- 13
...
-sc:readable(1)
+sc:readable(WAIT_COND_TIME)
---
- true
...
@@ -1070,29 +1070,18 @@ s = nil
master = socket('PF_INET', 'SOCK_STREAM', 'tcp')
---
...
-master:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true)
+master:bind('127.0.0.1', 0)
---
- true
...
-port = 32768 + math.random(0, 32767)
+port = master:name().port
---
...
--- SO_REUSEADDR allows to bind to the same source addr:port twice,
--- so listen() can return EADDRINUSE and so we check it within
--- wait_cond().
-test_run:cmd("setopt delimiter ';'")
+master:listen()
---
- true
...
-test_run:wait_cond(function()
- local ok = master:bind('127.0.0.1', port)
- local ok = ok and master:listen()
- if not ok then
- port = 32768 + math.random(32768)
- return false, master:error()
- end
- return true
-end, WAIT_COND_TIME);
+test_run:cmd("setopt delimiter ';'")
---
- true
...
@@ -1822,8 +1811,14 @@ test_run:cmd("setopt delimiter ';'")
---
- true
...
+socket_opened = true
cfiber = fiber.create(function(sc, rch, wch)
- while sc:send(wch:get()) and rch:put(sc:receive("*l")) do end
+ while socket_opened do
+ sc:send(wch:get())
+ local data = sc:receive("*l")
+ if not socket_opened then sc:close() end
+ rch:put(data)
+ end
end, sc, rch, wch);
---
...
@@ -1936,6 +1931,9 @@ c:receive("*l")
---
-
...
+socket_opened = false
+---
+...
wch:put("Fu")
---
- true
@@ -1944,10 +1942,6 @@ c:send("354 Please type your message\n")
---
- 29
...
-sc:close()
----
-- 1
-...
c:receive("*l", "Line: ")
---
- null
@@ -2292,7 +2286,9 @@ sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
---
- 0
...
-received_message = receiving_socket:recv()
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv() \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2315,7 +2311,9 @@ sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
---
- 0
...
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2392,7 +2390,9 @@ sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
---
- 0
...
-received_message = receiving_socket:recv(512)
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv(512) \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2415,7 +2415,9 @@ sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
---
- 0
...
-received_message, from = receiving_socket:recvfrom(512)
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom(512) \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2452,7 +2454,9 @@ sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
---
- 1025
...
-received_message = receiving_socket:recv()
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv() \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2484,7 +2488,9 @@ sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
---
- 1025
...
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2523,7 +2529,9 @@ sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
---
- 1025
...
-received_message = receiving_socket:recv(512)
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv(512) \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2566,7 +2574,9 @@ sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
---
- 1025
...
-received_message, from = receiving_socket:recvfrom(512)
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom(512) \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2643,6 +2653,10 @@ sending_socket:sysconnect('127.0.0.1', listening_socket_port) or errno() == errn
---
- true
...
+listening_socket:readable(WAIT_COND_TIME)
+---
+- true
+...
receiving_socket = listening_socket:accept()
---
...
@@ -2651,7 +2665,9 @@ sending_socket:write(message)
- 513
...
-- case: recvfrom reads first 512 bytes from the message with tcp
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
---
...
e = receiving_socket:errno()
@@ -2816,7 +2832,7 @@ test_run:cmd("clear filter")
---
- true
...
--- case: sicket receive inconsistent behavior
+-- case: socket receive inconsistent behavior
chan = fiber.channel()
---
...
@@ -2826,10 +2842,10 @@ counter = 0
fn = function(s) counter = 0; while true do s:write((tostring(counter)):rep(chan:get())); counter = counter + 1 end end
---
...
-srv = socket.tcp_server('0.0.0.0', 8888, fn)
+srv = socket.tcp_server('0.0.0.0', 0, fn)
---
...
-s = socket.connect('localhost', 8888)
+s = socket.connect('localhost', srv:name().port)
---
...
chan:put(5)
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index c72d41763f4..4ed6c0f25f7 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -13,7 +13,7 @@ env = require('test_run')
test_run = env.new()
test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
-WAIT_COND_TIME = 10
+WAIT_COND_TIME = 100
socket('PF_INET', 'SOCK_STREAM', 'tcp121222');
@@ -39,8 +39,8 @@ s:nonblock(false)
s:nonblock()
s:nonblock(true)
-s:readable(.1)
-s:wait(.1)
+s:readable(WAIT_COND_TIME)
+s:wait(WAIT_COND_TIME)
socket.iowait(s:fd(), 'RW')
socket.iowait(s:fd(), 3)
socket.iowait(s:fd(), 'R')
@@ -58,7 +58,7 @@ s:errno() > 0
s:error()
s:writable(.00000000000001)
s:writable(0)
-s:wait(.01)
+s:wait(WAIT_COND_TIME)
socket.iowait(nil, nil, -1)
socket.iowait(nil, nil, 0.0001)
socket.iowait(-1, nil, 0.0001)
@@ -75,8 +75,8 @@ ping = msgpack.encode(string.len(ping)) .. ping
-- test syswrite with char *
s:syswrite(ffi.cast('const char *', ping), #ping)
-s:readable(1)
-s:wait(.01)
+s:readable(WAIT_COND_TIME)
+s:wait(WAIT_COND_TIME)
pong = s:sysread()
string.len(pong)
@@ -100,7 +100,7 @@ sc = socket('PF_INET', 'SOCK_STREAM', 'tcp')
sc:nonblock(false)
sc:sysconnect('127.0.0.1', s:name().port)
sc:nonblock(true)
-sc:readable(.5)
+sc:readable(WAIT_COND_TIME)
sc:sysread()
string.match(tostring(sc), ', peer') ~= nil
#sevres
@@ -140,7 +140,7 @@ s:listen(128)
sc = socket('PF_INET', 'SOCK_STREAM', 'tcp')
sc:sysconnect('127.0.0.1', s:name().port) or errno() == errno.EINPROGRESS
-sc:writable(10)
+sc:writable(WAIT_COND_TIME)
sc:write('Hello, world')
sa, addr = s:accept()
@@ -266,11 +266,11 @@ s = socket('AF_INET', 'SOCK_DGRAM', 'udp')
s:bind('127.0.0.1', 0)
sc = socket('AF_INET', 'SOCK_DGRAM', 'udp')
sc:sendto('127.0.0.1', s:name().port, 'Hello, world')
-s:readable(10)
+s:readable(WAIT_COND_TIME)
s:recv()
sc:sendto('127.0.0.1', s:name().port, 'Hello, world, 2')
-s:readable(10)
+s:readable(WAIT_COND_TIME)
d, from = s:recvfrom()
from.port > 0
from.port = 'Random port'
@@ -286,11 +286,11 @@ sc = socket('AF_INET', 'SOCK_DGRAM', 'udp')
sc:nonblock(true)
sc:sendto('127.0.0.1', s:name().port)
sc:sendto('127.0.0.1', s:name().port, 'Hello, World!')
-s:readable(1)
+s:readable(WAIT_COND_TIME)
data, from = s:recvfrom(10)
data
s:sendto(from.host, from.port, 'Hello, hello!')
-sc:readable(1)
+sc:readable(WAIT_COND_TIME)
data_r, from_r = sc:recvfrom()
data_r
from_r.host
@@ -342,21 +342,10 @@ s = nil
-- random port
master = socket('PF_INET', 'SOCK_STREAM', 'tcp')
-master:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true)
-port = 32768 + math.random(0, 32767)
--- SO_REUSEADDR allows to bind to the same source addr:port twice,
--- so listen() can return EADDRINUSE and so we check it within
--- wait_cond().
+master:bind('127.0.0.1', 0)
+port = master:name().port
+master:listen()
test_run:cmd("setopt delimiter ';'")
-test_run:wait_cond(function()
- local ok = master:bind('127.0.0.1', port)
- local ok = ok and master:listen()
- if not ok then
- port = 32768 + math.random(32768)
- return false, master:error()
- end
- return true
-end, WAIT_COND_TIME);
function gh361()
local s = socket('PF_INET', 'SOCK_STREAM', 'tcp')
s:sysconnect('127.0.0.1', port)
@@ -619,8 +608,14 @@ s:settimeout(100500)
rch, wch = fiber.channel(1), fiber.channel(1)
sc = socket.connect(host, port)
test_run:cmd("setopt delimiter ';'")
+socket_opened = true
cfiber = fiber.create(function(sc, rch, wch)
- while sc:send(wch:get()) and rch:put(sc:receive("*l")) do end
+ while socket_opened do
+ sc:send(wch:get())
+ local data = sc:receive("*l")
+ if not socket_opened then sc:close() end
+ rch:put(data)
+ end
end, sc, rch, wch);
test_run:cmd("setopt delimiter ''");
@@ -651,9 +646,9 @@ rch:get()
wch:put("DATA\n")
c:receive(4)
c:receive("*l")
+socket_opened = false
wch:put("Fu")
c:send("354 Please type your message\n")
-sc:close()
c:receive("*l", "Line: ")
c:receive()
c:receive(10)
@@ -778,7 +773,9 @@ e == errno.EAGAIN -- expected true
-- case: recv, zero datagram
sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
-received_message = receiving_socket:recv()
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv() \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == '' -- expected true
received_message
@@ -786,7 +783,9 @@ e == 0 -- expected true
-- case: recvfrom, zero datagram
sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == '' -- expected true
received_message
@@ -812,7 +811,9 @@ e == errno.EAGAIN -- expected true
-- case: recv, zero datagram, explicit size
sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
-received_message = receiving_socket:recv(512)
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv(512) \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == '' -- expected true
received_message
@@ -820,7 +821,9 @@ e == 0 -- expected true
-- case: recvfrom, zero datagram, explicit size
sendto_zero(sending_socket, '127.0.0.1', receiving_socket_port)
-received_message, from = receiving_socket:recvfrom(512)
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom(512) \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == '' -- expected true
received_message
@@ -833,7 +836,9 @@ message = string.rep('x', message_len)
-- case: recv, non-zero length datagram, the buffer size should be evaluated
sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
-received_message = receiving_socket:recv()
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv() \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == message -- expected true
received_message:len()
@@ -844,7 +849,9 @@ e
-- case: recvfrom, non-zero length datagram, the buffer size should be
-- evaluated
sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == message -- expected true
received_message:len()
@@ -856,7 +863,9 @@ e
-- case: recv truncates a datagram larger then the buffer of an explicit size
sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
-received_message = receiving_socket:recv(512)
+received_message = test_run:wait_cond(function() \
+ return receiving_socket:recv(512) \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == message:sub(1, 512) -- expected true
received_message:len() == 512 -- expected true
@@ -872,7 +881,9 @@ message = string.rep('y', message_len)
-- case: recvfrom truncates a datagram larger then the buffer of an explicit size
sending_socket:sendto('127.0.0.1', receiving_socket_port, message)
-received_message, from = receiving_socket:recvfrom(512)
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom(512) \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == message:sub(1, 512) -- expected true
received_message:len() == 512 -- expected true
@@ -896,11 +907,14 @@ listening_socket:listen()
listening_socket_port = listening_socket:name().port
sending_socket = socket('AF_INET', 'SOCK_STREAM', 'tcp')
sending_socket:sysconnect('127.0.0.1', listening_socket_port) or errno() == errno.EINPROGRESS
+listening_socket:readable(WAIT_COND_TIME)
receiving_socket = listening_socket:accept()
sending_socket:write(message)
-- case: recvfrom reads first 512 bytes from the message with tcp
-received_message, from = receiving_socket:recvfrom()
+received_message, from = test_run:wait_cond(function() \
+ return receiving_socket:recvfrom() \
+end, WAIT_COND_TIME)
e = receiving_socket:errno()
received_message == message:sub(1, 512) -- expected true
received_message:len() == 512 -- expected true
@@ -960,12 +974,12 @@ server:close()
test_run:cmd("clear filter")
--- case: sicket receive inconsistent behavior
+-- case: socket receive inconsistent behavior
chan = fiber.channel()
counter = 0
fn = function(s) counter = 0; while true do s:write((tostring(counter)):rep(chan:get())); counter = counter + 1 end end
-srv = socket.tcp_server('0.0.0.0', 8888, fn)
-s = socket.connect('localhost', 8888)
+srv = socket.tcp_server('0.0.0.0', 0, fn)
+s = socket.connect('localhost', srv:name().port)
chan:put(5)
chan:put(5)
s:receive(5)
diff --git a/test/app/suite.ini b/test/app/suite.ini
index 79432e29a76..dd802d98cf9 100644
--- a/test/app/suite.ini
+++ b/test/app/suite.ini
@@ -7,4 +7,3 @@ use_unix_sockets = True
use_unix_sockets_iproto = True
is_parallel = True
pretest_clean = True
-fragile = socket.test.lua ; gh-4426 gh-4451
--
2.17.1
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2019-12-25 23:47 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-23 20:48 [Tarantool-patches] [PATCH v4] test: fix flaky socket test Ilya Kosarev
2019-12-25 1:29 ` Alexander Turenko
2019-12-25 21:06 ` Ilya Kosarev
2019-12-25 23:47 ` Alexander Turenko
2019-12-24 12:58 Ilya Kosarev
2019-12-24 16:33 ` Vladislav Shpilevoy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox