[tarantool-patches] Re: [PATCH] test: fix unix socket conflict in socket.test.lua

Alexander Turenko alexander.turenko at tarantool.org
Mon Oct 29 11:25:29 MSK 2018


On Thu, Oct 25, 2018 at 12:43:39PM +0300, Vladimir Davydov wrote:
> On Thu, Oct 25, 2018 at 05:21:45AM +0300, Alexander Turenko wrote:
> > It is needed to run the test in parallel on several test-run workers to
> > investigate flaky failures of the test.
> 
> I don't think I understand. Do you mean that this test doesn't fix the
> test flakiness and is only needed for further investigation?
> 

Yes (but now I have added some fixes that can eliminate some flaky
failures). It is often convenient to run something like...

```
TEST_RUN_TESTS=$(for i in $(seq 1 100); do echo -n "app/ "; done) make test
```

...to prove the suite is ready to run in parallel mode.

> > diff --git a/test/app/socket.result b/test/app/socket.result
> > index 2f002a37e..1a570b9fa 100644
> > --- a/test/app/socket.result
> > +++ b/test/app/socket.result
> > @@ -42,6 +42,29 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
> >  ---
> >  - true
> >  ...
> > +test_run:cmd("push filter '(/tmp/tarantool-test-socket)-[0-9]+' to '\\1'")
> > +---
> > +- true
> > +...
> > +-- /tmp/tarantool-test-socket-${TEST_RUN_WORKER_ID}
> > +test_run:cmd("setopt delimiter ';'")
> > +---
> > +- true
> > +...
> > +function get_temp_socket_path()
> > +    local base_path = '/tmp/tarantool-test-socket'
> > +    local worker_id = os.getenv('TEST_RUN_WORKER_ID')
> > +    if not worker_id then
> > +        return base_path
> > +    end
> > +    return ('%s-%s'):format(base_path, worker_id)
> > +end;
> 
> Why don't you simply create the file in the worker directory?

Sure, I can. Rewritten the test. But socket.skipcond still uses
/tmp/tarantool-test-socket-${WORKER_ID}, because its working directory
is `test`.

The whole new patch is below.

commit 224fcdf2a5e8f84971253f1d585b94213d06a384
Author: Alexander Turenko <alexander.turenko at tarantool.org>
Date:   Thu Oct 25 04:06:46 2018 +0300

    test: fix unix socket conflict in socket.test.lua
    
    Increased socket.readable / socket.wait timeouts.
    
    Rewritten port choosing: repeat bind+listen until succeeds, exclude
    incorrect port 65536 from the range.
    
    All these changes are needed to run the test in parallel on several
    test-run workers to investigate flaky failures of the test / of the
    suite. Some of these changes can also eliminate possible flaky failures.

diff --git a/test/app/socket.result b/test/app/socket.result
index 2f002a37e..1209ec218 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -42,6 +42,9 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
 ---
 - true
 ...
+WAIT_COND_TIME = 10
+---
+...
 socket('PF_INET', 'SOCK_STREAM', 'tcp121222');
 ---
 - null
@@ -104,11 +107,11 @@ s:nonblock(true)
 ---
 - true
 ...
-s:readable(.01)
+s:readable(.1)
 ---
 - true
 ...
-s:wait(.01)
+s:wait(.1)
 ---
 - RW
 ...
@@ -609,13 +612,16 @@ s:nonblock()
 ---
 - true
 ...
-os.remove('/tmp/tarantool-test-socket')
+path = 'tarantool-test-socket'
+---
+...
+os.remove(path)
 ---
 - null
-- '/tmp/tarantool-test-socket: No such file or directory'
+- 'tarantool-test-socket: No such file or directory'
 - 2
 ...
-s:bind('unix/', '/tmp/tarantool-test-socket')
+s:bind('unix/', path)
 ---
 - true
 ...
@@ -634,7 +640,7 @@ sc:nonblock(true)
 ---
 - true
 ...
-sc:sysconnect('unix/', '/tmp/tarantool-test-socket')
+sc:sysconnect('unix/', path)
 ---
 - true
 ...
@@ -673,7 +679,7 @@ s:close()
 ---
 - true
 ...
-_ = os.remove('/tmp/tarantool-test-socket')
+_ = os.remove(path)
 ---
 ...
 test_run:cmd("setopt delimiter ';'")
@@ -963,7 +969,7 @@ socket.tcp_connect('127.0.0.1', port), errno() == errno.ECONNREFUSED
 - true
 ...
 -- AF_UNIX
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 ---
 ...
 _ = os.remove(path)
@@ -1109,27 +1115,25 @@ master:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true)
 ---
 - true
 ...
-port = 32768 + math.random(32768)
----
-...
-attempt = 0
+port = 32768 + math.random(0, 32767)
 ---
 ...
+-- SO_REUSEADDR allows to bind to the same source addr:port twice,
+-- so listen() can return EADDRINUSE and so we check it within
+-- wait_cond().
 test_run:cmd("setopt delimiter ';'")
 ---
 - true
 ...
-while attempt < 10 do
-    if not master:bind('127.0.0.1', port)  then
+test_run:wait_cond(function()
+    local ok = master:bind('127.0.0.1', port)
+    local ok = ok and master:listen()
+    if not ok then
         port = 32768 + math.random(32768)
-        attempt = attempt + 1
-    else
-        break
+        return false, master:error()
     end
-end;
----
-...
-master:listen();
+    return true
+end, WAIT_COND_TIME);
 ---
 - true
 ...
@@ -1161,7 +1165,7 @@ master:close()
 f = nil
 ---
 ...
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 ---
 ...
 s = socket('PF_UNIX', 'SOCK_STREAM', 0)
@@ -1599,7 +1603,7 @@ s = nil
 ---
 ...
 -- start AF_UNIX server with dead socket exists
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 ---
 ...
 s = socket('AF_UNIX', 'SOCK_STREAM', 0)
@@ -1691,9 +1695,7 @@ test_run:cmd("setopt delimiter ';'")
 err = nil;
 ---
 ...
-path = '/tmp/tarantool-test-socket';
----
-...
+path = 'tarantool-test-socket'
 for i = 1, 10 do
     local server = socket.tcp_server('unix/', path, function() end)
     if not server then
diff --git a/test/app/socket.skipcond b/test/app/socket.skipcond
index 80f2117b5..c7d03a681 100644
--- a/test/app/socket.skipcond
+++ b/test/app/socket.skipcond
@@ -5,7 +5,8 @@ import os.path
 import socket
 import os
 
-test_path = '/tmp/tarantool-test-socket'
+worker_id = os.environ['TEST_RUN_WORKER_ID']
+test_path = '/tmp/tarantool-test-socket-' + worker_id
 
 if os.path.exists(test_path):
     os.remove(test_path)
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index c0b001449..9901352b1 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -13,6 +13,8 @@ env = require('test_run')
 test_run = env.new()
 test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
 
+WAIT_COND_TIME = 10
+
 socket('PF_INET', 'SOCK_STREAM', 'tcp121222');
 
 s = socket('PF_INET', 'SOCK_STREAM', 'tcp')
@@ -37,8 +39,8 @@ s:nonblock(false)
 s:nonblock()
 s:nonblock(true)
 
-s:readable(.01)
-s:wait(.01)
+s:readable(.1)
+s:wait(.1)
 socket.iowait(s:fd(), 'RW')
 socket.iowait(s:fd(), 3)
 socket.iowait(s:fd(), 'R')
@@ -185,14 +187,15 @@ s ~= nil
 s:nonblock()
 s:nonblock(true)
 s:nonblock()
-os.remove('/tmp/tarantool-test-socket')
-s:bind('unix/', '/tmp/tarantool-test-socket')
+path = 'tarantool-test-socket'
+os.remove(path)
+s:bind('unix/', path)
 sc ~= nil
 s:listen(1234)
 
 sc = socket('PF_UNIX', 'SOCK_STREAM', 0)
 sc:nonblock(true)
-sc:sysconnect('unix/', '/tmp/tarantool-test-socket')
+sc:sysconnect('unix/', path)
 sc:error()
 
 s:readable()
@@ -205,7 +208,7 @@ sc:close()
 sa:close()
 s:close()
 
-_ = os.remove('/tmp/tarantool-test-socket')
+_ = os.remove(path)
 
 test_run:cmd("setopt delimiter ';'")
 function aexitst(ai, hostnames, port)
@@ -306,7 +309,7 @@ s:close()
 socket.tcp_connect('127.0.0.1', port), errno() == errno.ECONNREFUSED
 
 -- AF_UNIX
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 _ = os.remove(path)
 s = socket('AF_UNIX', 'SOCK_STREAM', 0)
 s:bind('unix/', path)
@@ -357,18 +360,20 @@ s:error()
 -- random port
 master = socket('PF_INET', 'SOCK_STREAM', 'tcp')
 master:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true)
-port = 32768 + math.random(32768)
-attempt = 0
+port = 32768 + math.random(0, 32767)
+-- SO_REUSEADDR allows to bind to the same source addr:port twice,
+-- so listen() can return EADDRINUSE and so we check it within
+-- wait_cond().
 test_run:cmd("setopt delimiter ';'")
-while attempt < 10 do
-    if not master:bind('127.0.0.1', port)  then
+test_run:wait_cond(function()
+    local ok = master:bind('127.0.0.1', port)
+    local ok = ok and master:listen()
+    if not ok then
         port = 32768 + math.random(32768)
-        attempt = attempt + 1
-    else
-        break
+        return false, master:error()
     end
-end;
-master:listen();
+    return true
+end, WAIT_COND_TIME);
 function gh361()
     local s = socket('PF_INET', 'SOCK_STREAM', 'tcp')
     s:sysconnect('127.0.0.1', port)
@@ -383,7 +388,7 @@ master:close()
 f = nil
 
 
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 s = socket('PF_UNIX', 'SOCK_STREAM', 0)
 s:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true)
 s:error()
@@ -531,7 +536,7 @@ yaml.decode(yaml.encode(s)).fd == s:fd()
 s = nil
 
 -- start AF_UNIX server with dead socket exists
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 s = socket('AF_UNIX', 'SOCK_STREAM', 0)
 s:bind('unix/', path)
 s:close()
@@ -569,7 +574,7 @@ f:cancel()
 -- and used by a newly started one.
 test_run:cmd("setopt delimiter ';'")
 err = nil;
-path = '/tmp/tarantool-test-socket';
+path = 'tarantool-test-socket'
 for i = 1, 10 do
     local server = socket.tcp_server('unix/', path, function() end)
     if not server then



More information about the Tarantool-patches mailing list