[PATCH] test: fix unix socket conflict in socket.test.lua

Tarantool development patches archive
 help / color / mirror / Atom feed

* [PATCH] test: fix unix socket conflict in socket.test.lua
@ 2018-10-25  2:21 Alexander Turenko
  2018-10-25  9:43 ` Vladimir Davydov
  0 siblings, 1 reply; 6+ messages in thread
From: Alexander Turenko @ 2018-10-25  2:21 UTC (permalink / raw)
  To: Vladimir Davydov
  Cc: Alexander Turenko, Sergei Voronezhskii, tarantool-patches

It is needed to run the test in parallel on several test-run workers to
investigate flaky failures of the test.
---

issue: no
https://github.com/tarantool/tarantool/tree/Totktonada/socket-test-lua-reenterability

 test/app/socket.result   | 48 +++++++++++++++++++++++++++++++---------
 test/app/socket.skipcond |  3 ++-
 test/app/socket.test.lua | 31 +++++++++++++++++++-------
 3 files changed, 63 insertions(+), 19 deletions(-)

diff --git a/test/app/socket.result b/test/app/socket.result
index 2f002a37e..1a570b9fa 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -42,6 +42,29 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
 ---
 - true
 ...
+test_run:cmd("push filter '(/tmp/tarantool-test-socket)-[0-9]+' to '\\1'")
+---
+- true
+...
+-- /tmp/tarantool-test-socket-${TEST_RUN_WORKER_ID}
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function get_temp_socket_path()
+    local base_path = '/tmp/tarantool-test-socket'
+    local worker_id = os.getenv('TEST_RUN_WORKER_ID')
+    if not worker_id then
+        return base_path
+    end
+    return ('%s-%s'):format(base_path, worker_id)
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
 socket('PF_INET', 'SOCK_STREAM', 'tcp121222');
 ---
 - null
@@ -609,13 +632,16 @@ s:nonblock()
 ---
 - true
 ...
-os.remove('/tmp/tarantool-test-socket')
+path = get_temp_socket_path()
+---
+...
+os.remove(path)
 ---
 - null
 - '/tmp/tarantool-test-socket: No such file or directory'
 - 2
 ...
-s:bind('unix/', '/tmp/tarantool-test-socket')
+s:bind('unix/', path)
 ---
 - true
 ...
@@ -634,7 +660,7 @@ sc:nonblock(true)
 ---
 - true
 ...
-sc:sysconnect('unix/', '/tmp/tarantool-test-socket')
+sc:sysconnect('unix/', path)
 ---
 - true
 ...
@@ -673,7 +699,7 @@ s:close()
 ---
 - true
 ...
-_ = os.remove('/tmp/tarantool-test-socket')
+_ = os.remove(path)
 ---
 ...
 test_run:cmd("setopt delimiter ';'")
@@ -963,7 +989,7 @@ socket.tcp_connect('127.0.0.1', port), errno() == errno.ECONNREFUSED
 - true
 ...
 -- AF_UNIX
-path = '/tmp/tarantool-test-socket'
+path = get_temp_socket_path()
 ---
 ...
 _ = os.remove(path)
@@ -1161,7 +1187,7 @@ master:close()
 f = nil
 ---
 ...
-path = '/tmp/tarantool-test-socket'
+path = get_temp_socket_path()
 ---
 ...
 s = socket('PF_UNIX', 'SOCK_STREAM', 0)
@@ -1599,7 +1625,7 @@ s = nil
 ---
 ...
 -- start AF_UNIX server with dead socket exists
-path = '/tmp/tarantool-test-socket'
+path = get_temp_socket_path()
 ---
 ...
 s = socket('AF_UNIX', 'SOCK_STREAM', 0)
@@ -1691,9 +1717,7 @@ test_run:cmd("setopt delimiter ';'")
 err = nil;
 ---
 ...
-path = '/tmp/tarantool-test-socket';
----
-...
+path = get_temp_socket_path()
 for i = 1, 10 do
     local server = socket.tcp_server('unix/', path, function() end)
     if not server then
@@ -2806,6 +2830,10 @@ addr = server:name()
 client = socket.tcp_connect(addr.host, addr.port)
 ---
 ...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
 echo_fiber ~= nil
 ---
 - true
diff --git a/test/app/socket.skipcond b/test/app/socket.skipcond
index 80f2117b5..c7d03a681 100644
--- a/test/app/socket.skipcond
+++ b/test/app/socket.skipcond
@@ -5,7 +5,8 @@ import os.path
 import socket
 import os
 
-test_path = '/tmp/tarantool-test-socket'
+worker_id = os.environ['TEST_RUN_WORKER_ID']
+test_path = '/tmp/tarantool-test-socket-' + worker_id
 
 if os.path.exists(test_path):
     os.remove(test_path)
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index c0b001449..752fdc093 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -12,6 +12,19 @@ type(socket)
 env = require('test_run')
 test_run = env.new()
 test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
+test_run:cmd("push filter '(/tmp/tarantool-test-socket)-[0-9]+' to '\\1'")
+
+-- /tmp/tarantool-test-socket-${TEST_RUN_WORKER_ID}
+test_run:cmd("setopt delimiter ';'")
+function get_temp_socket_path()
+    local base_path = '/tmp/tarantool-test-socket'
+    local worker_id = os.getenv('TEST_RUN_WORKER_ID')
+    if not worker_id then
+        return base_path
+    end
+    return ('%s-%s'):format(base_path, worker_id)
+end;
+test_run:cmd("setopt delimiter ''");
 
 socket('PF_INET', 'SOCK_STREAM', 'tcp121222');
 
@@ -185,14 +198,15 @@ s ~= nil
 s:nonblock()
 s:nonblock(true)
 s:nonblock()
-os.remove('/tmp/tarantool-test-socket')
-s:bind('unix/', '/tmp/tarantool-test-socket')
+path = get_temp_socket_path()
+os.remove(path)
+s:bind('unix/', path)
 sc ~= nil
 s:listen(1234)
 
 sc = socket('PF_UNIX', 'SOCK_STREAM', 0)
 sc:nonblock(true)
-sc:sysconnect('unix/', '/tmp/tarantool-test-socket')
+sc:sysconnect('unix/', path)
 sc:error()
 
 s:readable()
@@ -205,7 +219,7 @@ sc:close()
 sa:close()
 s:close()
 
-_ = os.remove('/tmp/tarantool-test-socket')
+_ = os.remove(path)
 
 test_run:cmd("setopt delimiter ';'")
 function aexitst(ai, hostnames, port)
@@ -306,7 +320,7 @@ s:close()
 socket.tcp_connect('127.0.0.1', port), errno() == errno.ECONNREFUSED
 
 -- AF_UNIX
-path = '/tmp/tarantool-test-socket'
+path = get_temp_socket_path()
 _ = os.remove(path)
 s = socket('AF_UNIX', 'SOCK_STREAM', 0)
 s:bind('unix/', path)
@@ -383,7 +397,7 @@ master:close()
 f = nil
 
 
-path = '/tmp/tarantool-test-socket'
+path = get_temp_socket_path()
 s = socket('PF_UNIX', 'SOCK_STREAM', 0)
 s:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true)
 s:error()
@@ -531,7 +545,7 @@ yaml.decode(yaml.encode(s)).fd == s:fd()
 s = nil
 
 -- start AF_UNIX server with dead socket exists
-path = '/tmp/tarantool-test-socket'
+path = get_temp_socket_path()
 s = socket('AF_UNIX', 'SOCK_STREAM', 0)
 s:bind('unix/', path)
 s:close()
@@ -569,7 +583,7 @@ f:cancel()
 -- and used by a newly started one.
 test_run:cmd("setopt delimiter ';'")
 err = nil;
-path = '/tmp/tarantool-test-socket';
+path = get_temp_socket_path()
 for i = 1, 10 do
     local server = socket.tcp_server('unix/', path, function() end)
     if not server then
@@ -954,6 +968,7 @@ end, name = 'echoserv'});
 test_run:cmd("setopt delimiter ''");
 addr = server:name()
 client = socket.tcp_connect(addr.host, addr.port)
+test_run:cmd("setopt delimiter ''");
 echo_fiber ~= nil
 client:write('hello')
 client:read(5, 0.1) == 'hello'
-- 
2.19.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] test: fix unix socket conflict in socket.test.lua
  2018-10-25  2:21 [PATCH] test: fix unix socket conflict in socket.test.lua Alexander Turenko
@ 2018-10-25  9:43 ` Vladimir Davydov
  2018-10-29  8:25   ` [tarantool-patches] " Alexander Turenko
  0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Davydov @ 2018-10-25  9:43 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: Sergei Voronezhskii, tarantool-patches

On Thu, Oct 25, 2018 at 05:21:45AM +0300, Alexander Turenko wrote:
> It is needed to run the test in parallel on several test-run workers to
> investigate flaky failures of the test.

I don't think I understand. Do you mean that this test doesn't fix the
test flakiness and is only needed for further investigation?

> diff --git a/test/app/socket.result b/test/app/socket.result
> index 2f002a37e..1a570b9fa 100644
> --- a/test/app/socket.result
> +++ b/test/app/socket.result
> @@ -42,6 +42,29 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
>  ---
>  - true
>  ...
> +test_run:cmd("push filter '(/tmp/tarantool-test-socket)-[0-9]+' to '\\1'")
> +---
> +- true
> +...
> +-- /tmp/tarantool-test-socket-${TEST_RUN_WORKER_ID}
> +test_run:cmd("setopt delimiter ';'")
> +---
> +- true
> +...
> +function get_temp_socket_path()
> +    local base_path = '/tmp/tarantool-test-socket'
> +    local worker_id = os.getenv('TEST_RUN_WORKER_ID')
> +    if not worker_id then
> +        return base_path
> +    end
> +    return ('%s-%s'):format(base_path, worker_id)
> +end;

Why don't you simply create the file in the worker directory?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [tarantool-patches] Re: [PATCH] test: fix unix socket conflict in socket.test.lua
  2018-10-25  9:43 ` Vladimir Davydov
@ 2018-10-29  8:25   ` Alexander Turenko
  2018-10-29  9:32     ` Vladimir Davydov
  0 siblings, 1 reply; 6+ messages in thread
From: Alexander Turenko @ 2018-10-29  8:25 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: Sergei Voronezhskii, tarantool-patches

On Thu, Oct 25, 2018 at 12:43:39PM +0300, Vladimir Davydov wrote:
> On Thu, Oct 25, 2018 at 05:21:45AM +0300, Alexander Turenko wrote:
> > It is needed to run the test in parallel on several test-run workers to
> > investigate flaky failures of the test.
> 
> I don't think I understand. Do you mean that this test doesn't fix the
> test flakiness and is only needed for further investigation?
> 

Yes (but now I have added some fixes that can eliminate some flaky
failures). It is often convenient to run something like...

```
TEST_RUN_TESTS=$(for i in $(seq 1 100); do echo -n "app/ "; done) make test
```

...to prove the suite is ready to run in parallel mode.

> > diff --git a/test/app/socket.result b/test/app/socket.result
> > index 2f002a37e..1a570b9fa 100644
> > --- a/test/app/socket.result
> > +++ b/test/app/socket.result
> > @@ -42,6 +42,29 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
> >  ---
> >  - true
> >  ...
> > +test_run:cmd("push filter '(/tmp/tarantool-test-socket)-[0-9]+' to '\\1'")
> > +---
> > +- true
> > +...
> > +-- /tmp/tarantool-test-socket-${TEST_RUN_WORKER_ID}
> > +test_run:cmd("setopt delimiter ';'")
> > +---
> > +- true
> > +...
> > +function get_temp_socket_path()
> > +    local base_path = '/tmp/tarantool-test-socket'
> > +    local worker_id = os.getenv('TEST_RUN_WORKER_ID')
> > +    if not worker_id then
> > +        return base_path
> > +    end
> > +    return ('%s-%s'):format(base_path, worker_id)
> > +end;
> 
> Why don't you simply create the file in the worker directory?

Sure, I can. Rewritten the test. But socket.skipcond still uses
/tmp/tarantool-test-socket-${WORKER_ID}, because its working directory
is `test`.

The whole new patch is below.

commit 224fcdf2a5e8f84971253f1d585b94213d06a384
Author: Alexander Turenko <alexander.turenko@tarantool.org>
Date:   Thu Oct 25 04:06:46 2018 +0300

    test: fix unix socket conflict in socket.test.lua
    
    Increased socket.readable / socket.wait timeouts.
    
    Rewritten port choosing: repeat bind+listen until succeeds, exclude
    incorrect port 65536 from the range.
    
    All these changes are needed to run the test in parallel on several
    test-run workers to investigate flaky failures of the test / of the
    suite. Some of these changes can also eliminate possible flaky failures.

diff --git a/test/app/socket.result b/test/app/socket.result
index 2f002a37e..1209ec218 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -42,6 +42,9 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
 ---
 - true
 ...
+WAIT_COND_TIME = 10
+---
+...
 socket('PF_INET', 'SOCK_STREAM', 'tcp121222');
 ---
 - null
@@ -104,11 +107,11 @@ s:nonblock(true)
 ---
 - true
 ...
-s:readable(.01)
+s:readable(.1)
 ---
 - true
 ...
-s:wait(.01)
+s:wait(.1)
 ---
 - RW
 ...
@@ -609,13 +612,16 @@ s:nonblock()
 ---
 - true
 ...
-os.remove('/tmp/tarantool-test-socket')
+path = 'tarantool-test-socket'
+---
+...
+os.remove(path)
 ---
 - null
-- '/tmp/tarantool-test-socket: No such file or directory'
+- 'tarantool-test-socket: No such file or directory'
 - 2
 ...
-s:bind('unix/', '/tmp/tarantool-test-socket')
+s:bind('unix/', path)
 ---
 - true
 ...
@@ -634,7 +640,7 @@ sc:nonblock(true)
 ---
 - true
 ...
-sc:sysconnect('unix/', '/tmp/tarantool-test-socket')
+sc:sysconnect('unix/', path)
 ---
 - true
 ...
@@ -673,7 +679,7 @@ s:close()
 ---
 - true
 ...
-_ = os.remove('/tmp/tarantool-test-socket')
+_ = os.remove(path)
 ---
 ...
 test_run:cmd("setopt delimiter ';'")
@@ -963,7 +969,7 @@ socket.tcp_connect('127.0.0.1', port), errno() == errno.ECONNREFUSED
 - true
 ...
 -- AF_UNIX
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 ---
 ...
 _ = os.remove(path)
@@ -1109,27 +1115,25 @@ master:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true)
 ---
 - true
 ...
-port = 32768 + math.random(32768)
----
-...
-attempt = 0
+port = 32768 + math.random(0, 32767)
 ---
 ...
+-- SO_REUSEADDR allows to bind to the same source addr:port twice,
+-- so listen() can return EADDRINUSE and so we check it within
+-- wait_cond().
 test_run:cmd("setopt delimiter ';'")
 ---
 - true
 ...
-while attempt < 10 do
-    if not master:bind('127.0.0.1', port)  then
+test_run:wait_cond(function()
+    local ok = master:bind('127.0.0.1', port)
+    local ok = ok and master:listen()
+    if not ok then
         port = 32768 + math.random(32768)
-        attempt = attempt + 1
-    else
-        break
+        return false, master:error()
     end
-end;
----
-...
-master:listen();
+    return true
+end, WAIT_COND_TIME);
 ---
 - true
 ...
@@ -1161,7 +1165,7 @@ master:close()
 f = nil
 ---
 ...
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 ---
 ...
 s = socket('PF_UNIX', 'SOCK_STREAM', 0)
@@ -1599,7 +1603,7 @@ s = nil
 ---
 ...
 -- start AF_UNIX server with dead socket exists
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 ---
 ...
 s = socket('AF_UNIX', 'SOCK_STREAM', 0)
@@ -1691,9 +1695,7 @@ test_run:cmd("setopt delimiter ';'")
 err = nil;
 ---
 ...
-path = '/tmp/tarantool-test-socket';
----
-...
+path = 'tarantool-test-socket'
 for i = 1, 10 do
     local server = socket.tcp_server('unix/', path, function() end)
     if not server then
diff --git a/test/app/socket.skipcond b/test/app/socket.skipcond
index 80f2117b5..c7d03a681 100644
--- a/test/app/socket.skipcond
+++ b/test/app/socket.skipcond
@@ -5,7 +5,8 @@ import os.path
 import socket
 import os
 
-test_path = '/tmp/tarantool-test-socket'
+worker_id = os.environ['TEST_RUN_WORKER_ID']
+test_path = '/tmp/tarantool-test-socket-' + worker_id
 
 if os.path.exists(test_path):
     os.remove(test_path)
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index c0b001449..9901352b1 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -13,6 +13,8 @@ env = require('test_run')
 test_run = env.new()
 test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
 
+WAIT_COND_TIME = 10
+
 socket('PF_INET', 'SOCK_STREAM', 'tcp121222');
 
 s = socket('PF_INET', 'SOCK_STREAM', 'tcp')
@@ -37,8 +39,8 @@ s:nonblock(false)
 s:nonblock()
 s:nonblock(true)
 
-s:readable(.01)
-s:wait(.01)
+s:readable(.1)
+s:wait(.1)
 socket.iowait(s:fd(), 'RW')
 socket.iowait(s:fd(), 3)
 socket.iowait(s:fd(), 'R')
@@ -185,14 +187,15 @@ s ~= nil
 s:nonblock()
 s:nonblock(true)
 s:nonblock()
-os.remove('/tmp/tarantool-test-socket')
-s:bind('unix/', '/tmp/tarantool-test-socket')
+path = 'tarantool-test-socket'
+os.remove(path)
+s:bind('unix/', path)
 sc ~= nil
 s:listen(1234)
 
 sc = socket('PF_UNIX', 'SOCK_STREAM', 0)
 sc:nonblock(true)
-sc:sysconnect('unix/', '/tmp/tarantool-test-socket')
+sc:sysconnect('unix/', path)
 sc:error()
 
 s:readable()
@@ -205,7 +208,7 @@ sc:close()
 sa:close()
 s:close()
 
-_ = os.remove('/tmp/tarantool-test-socket')
+_ = os.remove(path)
 
 test_run:cmd("setopt delimiter ';'")
 function aexitst(ai, hostnames, port)
@@ -306,7 +309,7 @@ s:close()
 socket.tcp_connect('127.0.0.1', port), errno() == errno.ECONNREFUSED
 
 -- AF_UNIX
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 _ = os.remove(path)
 s = socket('AF_UNIX', 'SOCK_STREAM', 0)
 s:bind('unix/', path)
@@ -357,18 +360,20 @@ s:error()
 -- random port
 master = socket('PF_INET', 'SOCK_STREAM', 'tcp')
 master:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true)
-port = 32768 + math.random(32768)
-attempt = 0
+port = 32768 + math.random(0, 32767)
+-- SO_REUSEADDR allows to bind to the same source addr:port twice,
+-- so listen() can return EADDRINUSE and so we check it within
+-- wait_cond().
 test_run:cmd("setopt delimiter ';'")
-while attempt < 10 do
-    if not master:bind('127.0.0.1', port)  then
+test_run:wait_cond(function()
+    local ok = master:bind('127.0.0.1', port)
+    local ok = ok and master:listen()
+    if not ok then
         port = 32768 + math.random(32768)
-        attempt = attempt + 1
-    else
-        break
+        return false, master:error()
     end
-end;
-master:listen();
+    return true
+end, WAIT_COND_TIME);
 function gh361()
     local s = socket('PF_INET', 'SOCK_STREAM', 'tcp')
     s:sysconnect('127.0.0.1', port)
@@ -383,7 +388,7 @@ master:close()
 f = nil
 
 
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 s = socket('PF_UNIX', 'SOCK_STREAM', 0)
 s:setsockopt('SOL_SOCKET', 'SO_REUSEADDR', true)
 s:error()
@@ -531,7 +536,7 @@ yaml.decode(yaml.encode(s)).fd == s:fd()
 s = nil
 
 -- start AF_UNIX server with dead socket exists
-path = '/tmp/tarantool-test-socket'
+path = 'tarantool-test-socket'
 s = socket('AF_UNIX', 'SOCK_STREAM', 0)
 s:bind('unix/', path)
 s:close()
@@ -569,7 +574,7 @@ f:cancel()
 -- and used by a newly started one.
 test_run:cmd("setopt delimiter ';'")
 err = nil;
-path = '/tmp/tarantool-test-socket';
+path = 'tarantool-test-socket'
 for i = 1, 10 do
     local server = socket.tcp_server('unix/', path, function() end)
     if not server then

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [tarantool-patches] Re: [PATCH] test: fix unix socket conflict in socket.test.lua
  2018-10-29  8:25   ` [tarantool-patches] " Alexander Turenko
@ 2018-10-29  9:32     ` Vladimir Davydov
  2018-10-29 11:29       ` Alexander Turenko
  0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Davydov @ 2018-10-29  9:32 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: Sergei Voronezhskii, tarantool-patches

On Mon, Oct 29, 2018 at 11:25:29AM +0300, Alexander Turenko wrote:
> On Thu, Oct 25, 2018 at 12:43:39PM +0300, Vladimir Davydov wrote:
> > On Thu, Oct 25, 2018 at 05:21:45AM +0300, Alexander Turenko wrote:
> > > It is needed to run the test in parallel on several test-run workers to
> > > investigate flaky failures of the test.
> > 
> > I don't think I understand. Do you mean that this test doesn't fix the
> > test flakiness and is only needed for further investigation?
> > 
> 
> Yes (but now I have added some fixes that can eliminate some flaky
> failures). It is often convenient to run something like...
> 
> ```
> TEST_RUN_TESTS=$(for i in $(seq 1 100); do echo -n "app/ "; done) make test
> ```
> 
> ...to prove the suite is ready to run in parallel mode.
> 
> > > diff --git a/test/app/socket.result b/test/app/socket.result
> > > index 2f002a37e..1a570b9fa 100644
> > > --- a/test/app/socket.result
> > > +++ b/test/app/socket.result
> > > @@ -42,6 +42,29 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
> > >  ---
> > >  - true
> > >  ...
> > > +test_run:cmd("push filter '(/tmp/tarantool-test-socket)-[0-9]+' to '\\1'")
> > > +---
> > > +- true
> > > +...
> > > +-- /tmp/tarantool-test-socket-${TEST_RUN_WORKER_ID}
> > > +test_run:cmd("setopt delimiter ';'")
> > > +---
> > > +- true
> > > +...
> > > +function get_temp_socket_path()
> > > +    local base_path = '/tmp/tarantool-test-socket'
> > > +    local worker_id = os.getenv('TEST_RUN_WORKER_ID')
> > > +    if not worker_id then
> > > +        return base_path
> > > +    end
> > > +    return ('%s-%s'):format(base_path, worker_id)
> > > +end;
> > 
> > Why don't you simply create the file in the worker directory?
> 
> Sure, I can. Rewritten the test. But socket.skipcond still uses
> /tmp/tarantool-test-socket-${WORKER_ID}, because its working directory
> is `test`.
> 
> The whole new patch is below.
> 
> commit 224fcdf2a5e8f84971253f1d585b94213d06a384
> Author: Alexander Turenko <alexander.turenko@tarantool.org>
> Date:   Thu Oct 25 04:06:46 2018 +0300
> 
>     test: fix unix socket conflict in socket.test.lua
>     
>     Increased socket.readable / socket.wait timeouts.
>     
>     Rewritten port choosing: repeat bind+listen until succeeds, exclude
>     incorrect port 65536 from the range.
>     
>     All these changes are needed to run the test in parallel on several
>     test-run workers to investigate flaky failures of the test / of the
>     suite. Some of these changes can also eliminate possible flaky failures.

./test-run -j -1 app/socket.test.lua doesn't work with this patch:

} app/socket.test.lua
} Test.run() received the following error:
} Traceback (most recent call last):
}   File "/home/vlad/src/tarantool/test-run/lib/test.py", line 174, in run
}     execfile(self.skip_cond, dict(locals(), **server.__dict__))
}   File "app/socket.skipcond", line 8, in <module>
}     worker_id = os.environ['TEST_RUN_WORKER_ID']
}   File "/usr/lib/python2.7/UserDict.py", line 40, in __getitem__
}     raise KeyError(key)
} KeyError: 'TEST_RUN_WORKER_ID'

I think you shouldn't use TEST_RUN_WORKER_ID in skipcond after all.
Instead you should make a temporary file.

Also, I've seen the following failure once. Not sure if it's related:

} [001] app/socket.test.lua                                             [ fail ]
} [001]
} [001] Test failed! Result content mismatch:
} [001] --- app/socket.result     Mon Oct 29 12:06:39 2018
} [001] +++ socket.reject Mon Oct 29 12:13:34 2018
} [001] @@ -2810,7 +2810,7 @@
} [001]  ...
} [001]  echo_fiber ~= nil
} [001]  ---
} [001] -- true
} [001] +- false
} [001]  ...
} [001]  client:write('hello')
} [001]  ---
} [001]
} [001] Last 15 lines of Tarantool Log file [Instance "app"][/home/vlad/src/tarantool/test/var/001_app/app.log]:
} [001] 2018-10-29 12:13:34.188 [20123] main/144/server/unix/:tarantool-test-soc I> started
} [001] 2018-10-29 12:13:34.189 [20123] main/142/server/unix/:tarantool-test-soc utils.c:939 E> LuajitError: builtin/socket.lua:80: attempt to use closed socket
} [001] 2018-10-29 12:13:34.189 [20123] main/146/server/unix/:tarantool-test-soc I> started
} [001] 2018-10-29 12:13:34.190 [20123] main/144/server/unix/:tarantool-test-soc utils.c:939 E> LuajitError: builtin/socket.lua:80: attempt to use closed socket
} [001] 2018-10-29 12:13:34.190 [20123] main/148/server/unix/:tarantool-test-soc I> started
} [001] 2018-10-29 12:13:34.190 [20123] main/146/server/unix/:tarantool-test-soc utils.c:939 E> LuajitError: builtin/socket.lua:80: attempt to use closed socket
} [001] 2018-10-29 12:13:34.190 [20123] main/150/server/unix/:tarantool-test-soc I> started
} [001] 2018-10-29 12:13:34.191 [20123] main/148/server/unix/:tarantool-test-soc utils.c:939 E> LuajitError: builtin/socket.lua:80: attempt to use closed socket
} [001] 2018-10-29 12:13:34.191 [20123] main/152/server/unix/:tarantool-test-soc I> started
} [001] 2018-10-29 12:13:34.191 [20123] main/150/server/unix/:tarantool-test-soc utils.c:939 E> LuajitError: builtin/socket.lua:80: attempt to use closed socket
} [001] 2018-10-29 12:13:34.191 [20123] main/154/server/unix/:tarantool-test-soc I> started
} [001] 2018-10-29 12:13:34.192 [20123] main/152/server/unix/:tarantool-test-soc utils.c:939 E> LuajitError: builtin/socket.lua:80: attempt to use closed socket
} [001] 2018-10-29 12:13:34.192 [20123] main/154/server/unix/:tarantool-test-soc utils.c:939 E> LuajitError: builtin/socket.lua:80: attempt to use closed socket
} [001] 2018-10-29 12:13:34.364 [20123] main/161/echoserv/::1:0 I> started
} [001] 2018-10-29 12:13:34.417 [20123] main/161/echoserv/::1:0 utils.c:939 E> LuajitError: builtin/socket.lua:80: attempt to use closed socket
} [Main process] Got failed test; gently terminate all workers...
} [001] Worker "001_app" got failed test; stopping the server...
} ---------------------------------------------------------------------------------
} Statistics:
} * fail: 1
} Failed tasks:
} - [app/socket.test.lua, null]
} # logfile:        /home/vlad/src/tarantool/test/var/log/001_app.log
} # reproduce file: /home/vlad/src/tarantool/test/var/reproduce/001_app.list.yaml
} ---
} - [app/socket.test.lua, null]
} ...

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [tarantool-patches] Re: [PATCH] test: fix unix socket conflict in socket.test.lua
  2018-10-29  9:32     ` Vladimir Davydov
@ 2018-10-29 11:29       ` Alexander Turenko
  2018-10-29 16:09         ` Vladimir Davydov
  0 siblings, 1 reply; 6+ messages in thread
From: Alexander Turenko @ 2018-10-29 11:29 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: Sergei Voronezhskii, tarantool-patches

On Mon, Oct 29, 2018 at 12:32:29PM +0300, Vladimir Davydov wrote:
> On Mon, Oct 29, 2018 at 11:25:29AM +0300, Alexander Turenko wrote:
> > On Thu, Oct 25, 2018 at 12:43:39PM +0300, Vladimir Davydov wrote:
> > > On Thu, Oct 25, 2018 at 05:21:45AM +0300, Alexander Turenko wrote:
> > > > It is needed to run the test in parallel on several test-run workers to
> > > > investigate flaky failures of the test.
> > > 
> > > I don't think I understand. Do you mean that this test doesn't fix the
> > > test flakiness and is only needed for further investigation?
> > > 
> > 
> > Yes (but now I have added some fixes that can eliminate some flaky
> > failures). It is often convenient to run something like...
> > 
> > ```
> > TEST_RUN_TESTS=$(for i in $(seq 1 100); do echo -n "app/ "; done) make test
> > ```
> > 
> > ...to prove the suite is ready to run in parallel mode.
> > 
> > > > diff --git a/test/app/socket.result b/test/app/socket.result
> > > > index 2f002a37e..1a570b9fa 100644
> > > > --- a/test/app/socket.result
> > > > +++ b/test/app/socket.result
> > > > @@ -42,6 +42,29 @@ test_run:cmd("push filter '(error: .builtin/.*[.]lua):[0-9]+' to '\\1'")
> > > >  ---
> > > >  - true
> > > >  ...
> > > > +test_run:cmd("push filter '(/tmp/tarantool-test-socket)-[0-9]+' to '\\1'")
> > > > +---
> > > > +- true
> > > > +...
> > > > +-- /tmp/tarantool-test-socket-${TEST_RUN_WORKER_ID}
> > > > +test_run:cmd("setopt delimiter ';'")
> > > > +---
> > > > +- true
> > > > +...
> > > > +function get_temp_socket_path()
> > > > +    local base_path = '/tmp/tarantool-test-socket'
> > > > +    local worker_id = os.getenv('TEST_RUN_WORKER_ID')
> > > > +    if not worker_id then
> > > > +        return base_path
> > > > +    end
> > > > +    return ('%s-%s'):format(base_path, worker_id)
> > > > +end;
> > > 
> > > Why don't you simply create the file in the worker directory?
> > 
> > Sure, I can. Rewritten the test. But socket.skipcond still uses
> > /tmp/tarantool-test-socket-${WORKER_ID}, because its working directory
> > is `test`.
> > 
> > The whole new patch is below.
> > 
> > commit 224fcdf2a5e8f84971253f1d585b94213d06a384
> > Author: Alexander Turenko <alexander.turenko@tarantool.org>
> > Date:   Thu Oct 25 04:06:46 2018 +0300
> > 
> >     test: fix unix socket conflict in socket.test.lua
> >     
> >     Increased socket.readable / socket.wait timeouts.
> >     
> >     Rewritten port choosing: repeat bind+listen until succeeds, exclude
> >     incorrect port 65536 from the range.
> >     
> >     All these changes are needed to run the test in parallel on several
> >     test-run workers to investigate flaky failures of the test / of the
> >     suite. Some of these changes can also eliminate possible flaky failures.
> 
> ./test-run -j -1 app/socket.test.lua doesn't work with this patch:
> 
> } app/socket.test.lua
> } Test.run() received the following error:
> } Traceback (most recent call last):
> }   File "/home/vlad/src/tarantool/test-run/lib/test.py", line 174, in run
> }     execfile(self.skip_cond, dict(locals(), **server.__dict__))
> }   File "app/socket.skipcond", line 8, in <module>
> }     worker_id = os.environ['TEST_RUN_WORKER_ID']
> }   File "/usr/lib/python2.7/UserDict.py", line 40, in __getitem__
> }     raise KeyError(key)
> } KeyError: 'TEST_RUN_WORKER_ID'
> 
> I think you shouldn't use TEST_RUN_WORKER_ID in skipcond after all.
> Instead you should make a temporary file.

Thanks for catching it up. Fixed. See the patch at end of the email.

> Also, I've seen the following failure once. Not sure if it's related:
> 
> } [001] app/socket.test.lua                                             [ fail ]
> } [001]
> } [001] Test failed! Result content mismatch:
> } [001] --- app/socket.result     Mon Oct 29 12:06:39 2018
> } [001] +++ socket.reject Mon Oct 29 12:13:34 2018
> } [001] @@ -2810,7 +2810,7 @@
> } [001]  ...
> } [001]  echo_fiber ~= nil
> } [001]  ---
> } [001] -- true
> } [001] +- false
> } [001]  ...
> } [001]  client:write('hello')
> } [001]  ---
> } [001]

I've observed some fails too, but when run the test, say, 500 times on
16 workers. I think it is now stable enough to unblock the testing
scenario like `./test-run.py app/ app/ app/ app/ app/` at least on
Linux. (I've a way more frequent fails on Mac OS.)

Changes:

diff --git a/test/app/socket.skipcond b/test/app/socket.skipcond
index c7d03a681..89a293f81 100644
--- a/test/app/socket.skipcond
+++ b/test/app/socket.skipcond
@@ -4,12 +4,10 @@ import re
 import os.path
 import socket
 import os
+import tempfile
 
-worker_id = os.environ['TEST_RUN_WORKER_ID']
-test_path = '/tmp/tarantool-test-socket-' + worker_id
-
-if os.path.exists(test_path):
-    os.remove(test_path)
+test_dir = tempfile.mkdtemp(prefix='tarantool-test-socket')
+test_path = os.path.join(test_dir, 'socket')
 
 s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
 try:
@@ -21,3 +19,4 @@ s.close()
 
 if os.path.exists(test_path):
     os.remove(test_path)
+    os.rmdir(test_dir)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [tarantool-patches] Re: [PATCH] test: fix unix socket conflict in socket.test.lua
  2018-10-29 11:29       ` Alexander Turenko
@ 2018-10-29 16:09         ` Vladimir Davydov
  0 siblings, 0 replies; 6+ messages in thread
From: Vladimir Davydov @ 2018-10-29 16:09 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: Sergei Voronezhskii, tarantool-patches

Pushed to 1.10

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-10-29 16:09 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-25  2:21 [PATCH] test: fix unix socket conflict in socket.test.lua Alexander Turenko
2018-10-25  9:43 ` Vladimir Davydov
2018-10-29  8:25   ` [tarantool-patches] " Alexander Turenko
2018-10-29  9:32     ` Vladimir Davydov
2018-10-29 11:29       ` Alexander Turenko
2018-10-29 16:09         ` Vladimir Davydov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox