From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Mon, 29 Oct 2018 11:25:29 +0300 From: Alexander Turenko Subject: Re: [tarantool-patches] Re: [PATCH] test: fix unix socket conflict in socket.test.lua Message-ID: <20181029082528.ou2zooi33vcblk6b@tkn_work_nb> References: <671f12793def4a2c57a9672e851986877616a81f.1540433737.git.alexander.turenko@tarantool.org> <20181025094339.jb3652vrqnlvwpkm@esperanza> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20181025094339.jb3652vrqnlvwpkm@esperanza> To: Vladimir Davydov Cc: Sergei Voronezhskii , tarantool-patches@freelists.org List-ID: On Thu, Oct 25, 2018 at 12:43:39PM +0300, Vladimir Davydov wrote: > On Thu, Oct 25, 2018 at 05:21:45AM +0300, Alexander Turenko wrote: > > It is needed to run the test in parallel on several test-run workers to > > investigate flaky failures of the test. > > I don't think I understand. Do you mean that this test doesn't fix the > test flakiness and is only needed for further investigation? > Yes (but now I have added some fixes that can eliminate some flaky failures). It is often convenient to run something like... ``` TEST_RUN_TESTS=$(for i in $(seq 1 100); do echo -n "app/ "; done) make test ``` ...to prove the suite is ready to run in parallel mode. > > diff --git a/test/app/socket.result b/test/app/socket.result > > index 2f002a37e..1a570b9fa 100644 > > --- a/test/app/socket.result > > +++ b/test/app/socket.result > > @@ -42,6 +42,29 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'") > > --- > > - true > > ... > > +test_run:cmd("push filter '(/tmp/tarantool-test-socket)-[0-9]+' to '\\1'") > > +--- > > +- true > > +... > > +-- /tmp/tarantool-test-socket-${TEST_RUN_WORKER_ID} > > +test_run:cmd("setopt delimiter ';'") > > +--- > > +- true > > +... > > +function get_temp_socket_path() > > + local base_path = '/tmp/tarantool-test-socket' > > + local worker_id = os.getenv('TEST_RUN_WORKER_ID') > > + if not worker_id then > > + return base_path > > + end > > + return ('%s-%s'):format(base_path, worker_id) > > +end; > > Why don't you simply create the file in the worker directory? Sure, I can. Rewritten the test. But socket.skipcond still uses /tmp/tarantool-test-socket-${WORKER_ID}, because its working directory is `test`. The whole new patch is below. commit 224fcdf2a5e8f84971253f1d585b94213d06a384 Author: Alexander Turenko Date: Thu Oct 25 04:06:46 2018 +0300 test: fix unix socket conflict in socket.test.lua Increased socket.readable / socket.wait timeouts. Rewritten port choosing: repeat bind+listen until succeeds, exclude incorrect port 65536 from the range. All these changes are needed to run the test in parallel on several test-run workers to investigate flaky failures of the test / of the suite. Some of these changes can also eliminate possible flaky failures. diff --git a/test/app/socket.result b/test/app/socket.result index 2f002a37e..1209ec218 100644 --- a/test/app/socket.result +++ b/test/app/socket.result @@ -42,6 +42,9 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'") --- - true ... +WAIT_COND_TIME = 10 +--- +... socket('PF_INET', 'SOCK_STREAM', 'tcp121222'); --- - null @@ -104,11 +107,11 @@ s:nonblock(true) --- - true ... -s:readable(.01) +s:readable(.1) --- - true ... -s:wait(.01) +s:wait(.1) --- - RW ... @@ -609,13 +612,16 @@ s:nonblock() --- - true ... -os.remove('/tmp/tarantool-test-socket') +path = 'tarantool-test-socket' +--- +... +os.remove(path) --- - null -- '/tmp/tarantool-test-socket: No such file or directory' +- 'tarantool-test-socket: No such file or directory' - 2 ... -s:bind('unix/', '/tmp/tarantool-test-socket') +s:bind('unix/', path) --- - true ... @@ -634,7 +640,7 @@ sc:nonblock(true) --- - true ... -sc:sysconnect('unix/', '/tmp/tarantool-test-socket') +sc:sysconnect('unix/', path) --- - true ... @@ -673,7 +679,7 @@ s:close() --- - true ... -_ = os.remove('/tmp/tarantool-test-socket') +_ = os.remove(path) --- ... test_run:cmd("setopt delimiter ';'") @@ -963,7 +969,7 @@ socket.tcp_connect('127.0.0.1', port), errno() == errno.ECONNREFUSED - true ... -- AF_UNIX -path = '/tmp/tarantool-test-socket' +path = 'tarantool-test-socket' --- ... _ = os.remove(path) @@ -1109,27 +1115,25 @@ master:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true) --- - true ... -port = 32768 + math.random(32768) ---- -... -attempt = 0 +port = 32768 + math.random(0, 32767) --- ... +-- SO_REUSEADDR allows to bind to the same source addr:port twice, +-- so listen() can return EADDRINUSE and so we check it within +-- wait_cond(). test_run:cmd("setopt delimiter ';'") --- - true ... -while attempt < 10 do - if not master:bind('127.0.0.1', port) then +test_run:wait_cond(function() + local ok = master:bind('127.0.0.1', port) + local ok = ok and master:listen() + if not ok then port = 32768 + math.random(32768) - attempt = attempt + 1 - else - break + return false, master:error() end -end; ---- -... -master:listen(); + return true +end, WAIT_COND_TIME); --- - true ... @@ -1161,7 +1165,7 @@ master:close() f = nil --- ... -path = '/tmp/tarantool-test-socket' +path = 'tarantool-test-socket' --- ... s = socket('PF_UNIX', 'SOCK_STREAM', 0) @@ -1599,7 +1603,7 @@ s = nil --- ... -- start AF_UNIX server with dead socket exists -path = '/tmp/tarantool-test-socket' +path = 'tarantool-test-socket' --- ... s = socket('AF_UNIX', 'SOCK_STREAM', 0) @@ -1691,9 +1695,7 @@ test_run:cmd("setopt delimiter ';'") err = nil; --- ... -path = '/tmp/tarantool-test-socket'; ---- -... +path = 'tarantool-test-socket' for i = 1, 10 do local server = socket.tcp_server('unix/', path, function() end) if not server then diff --git a/test/app/socket.skipcond b/test/app/socket.skipcond index 80f2117b5..c7d03a681 100644 --- a/test/app/socket.skipcond +++ b/test/app/socket.skipcond @@ -5,7 +5,8 @@ import os.path import socket import os -test_path = '/tmp/tarantool-test-socket' +worker_id = os.environ['TEST_RUN_WORKER_ID'] +test_path = '/tmp/tarantool-test-socket-' + worker_id if os.path.exists(test_path): os.remove(test_path) diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua index c0b001449..9901352b1 100644 --- a/test/app/socket.test.lua +++ b/test/app/socket.test.lua @@ -13,6 +13,8 @@ env = require('test_run') test_run = env.new() test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'") +WAIT_COND_TIME = 10 + socket('PF_INET', 'SOCK_STREAM', 'tcp121222'); s = socket('PF_INET', 'SOCK_STREAM', 'tcp') @@ -37,8 +39,8 @@ s:nonblock(false) s:nonblock() s:nonblock(true) -s:readable(.01) -s:wait(.01) +s:readable(.1) +s:wait(.1) socket.iowait(s:fd(), 'RW') socket.iowait(s:fd(), 3) socket.iowait(s:fd(), 'R') @@ -185,14 +187,15 @@ s ~= nil s:nonblock() s:nonblock(true) s:nonblock() -os.remove('/tmp/tarantool-test-socket') -s:bind('unix/', '/tmp/tarantool-test-socket') +path = 'tarantool-test-socket' +os.remove(path) +s:bind('unix/', path) sc ~= nil s:listen(1234) sc = socket('PF_UNIX', 'SOCK_STREAM', 0) sc:nonblock(true) -sc:sysconnect('unix/', '/tmp/tarantool-test-socket') +sc:sysconnect('unix/', path) sc:error() s:readable() @@ -205,7 +208,7 @@ sc:close() sa:close() s:close() -_ = os.remove('/tmp/tarantool-test-socket') +_ = os.remove(path) test_run:cmd("setopt delimiter ';'") function aexitst(ai, hostnames, port) @@ -306,7 +309,7 @@ s:close() socket.tcp_connect('127.0.0.1', port), errno() == errno.ECONNREFUSED -- AF_UNIX -path = '/tmp/tarantool-test-socket' +path = 'tarantool-test-socket' _ = os.remove(path) s = socket('AF_UNIX', 'SOCK_STREAM', 0) s:bind('unix/', path) @@ -357,18 +360,20 @@ s:error() -- random port master = socket('PF_INET', 'SOCK_STREAM', 'tcp') master:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true) -port = 32768 + math.random(32768) -attempt = 0 +port = 32768 + math.random(0, 32767) +-- SO_REUSEADDR allows to bind to the same source addr:port twice, +-- so listen() can return EADDRINUSE and so we check it within +-- wait_cond(). test_run:cmd("setopt delimiter ';'") -while attempt < 10 do - if not master:bind('127.0.0.1', port) then +test_run:wait_cond(function() + local ok = master:bind('127.0.0.1', port) + local ok = ok and master:listen() + if not ok then port = 32768 + math.random(32768) - attempt = attempt + 1 - else - break + return false, master:error() end -end; -master:listen(); + return true +end, WAIT_COND_TIME); function gh361() local s = socket('PF_INET', 'SOCK_STREAM', 'tcp') s:sysconnect('127.0.0.1', port) @@ -383,7 +388,7 @@ master:close() f = nil -path = '/tmp/tarantool-test-socket' +path = 'tarantool-test-socket' s = socket('PF_UNIX', 'SOCK_STREAM', 0) s:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true) s:error() @@ -531,7 +536,7 @@ yaml.decode(yaml.encode(s)).fd == s:fd() s = nil -- start AF_UNIX server with dead socket exists -path = '/tmp/tarantool-test-socket' +path = 'tarantool-test-socket' s = socket('AF_UNIX', 'SOCK_STREAM', 0) s:bind('unix/', path) s:close() @@ -569,7 +574,7 @@ f:cancel() -- and used by a newly started one. test_run:cmd("setopt delimiter ';'") err = nil; -path = '/tmp/tarantool-test-socket'; +path = 'tarantool-test-socket' for i = 1, 10 do local server = socket.tcp_server('unix/', path, function() end) if not server then