Tarantool development patches archive
 help / color / mirror / Atom feed
* [tarantool-patches] [PATCH] Fix lua socket polling in case of a spurious wakeup
@ 2018-08-02 15:44 Georgy Kirichenko
  2018-08-06 14:27 ` [tarantool-patches] " Vladislav Shpilevoy
  0 siblings, 1 reply; 3+ messages in thread
From: Georgy Kirichenko @ 2018-08-02 15:44 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Georgy Kirichenko

socket_writable/socket_readable handles socket.iowait spurious wakeup
until event is happened or timeout is exceeded.

Fixes #3344
---
Issue: https://github.com/tarantool/tarantool/issues/3344
Branch: https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3344-socket-io-spurios-wakeup
src/lua/socket.lua       | 67 ++++++++++++++++++++------------------
 test/app/socket.result   | 70 ++++++++++++++++++++++++++++++++++++++++
 test/app/socket.test.lua | 29 +++++++++++++++++
 3 files changed, 135 insertions(+), 31 deletions(-)

diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index 06306eae2..9fcaee0e8 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -328,12 +328,18 @@ local function do_wait(self, what, timeout)
     self._errno = nil
     timeout = timeout or TIMEOUT_INFINITY
 
-    local res = internal.iowait(fd, what, timeout)
-    if res == 0 then
-        self._errno = boxerrno.ETIMEDOUT
-        return 0
+    local deadline = fiber.clock() + timeout
+    local events
+
+    while fiber.clock() <= deadline do
+        events = internal.iowait(fd, what, deadline - fiber.clock())
+        fiber.testcancel()
+        if events ~= 0 and events ~= '' then
+            return events
+        end
     end
-    return res
+    self._errno = boxerrno.ETIMEDOUT
+    return events
 end
 
 local function socket_readable(self, timeout)
@@ -662,7 +668,7 @@ local function read(self, limit, timeout, check, ...)
         return data
     end
 
-    while timeout > 0 do
+    while timeout >= 0 do
         local started = fiber.clock()
 
         assert(rbuf:size() < limit)
@@ -689,15 +695,9 @@ local function read(self, limit, timeout, check, ...)
             return nil
         end
 
-        if not socket_readable(self, timeout) then
-            return nil
-        end
-        if timeout <= 0 then
-            break
-        end
+        socket_readable(self, timeout)
         timeout = timeout - ( fiber.clock() - started )
     end
-    self._errno = boxerrno.ETIMEDOUT
     return nil
 end
 
@@ -736,7 +736,7 @@ local function socket_write(self, octets, timeout)
     end
 
     local started = fiber.clock()
-    while true do
+    while timeout >= 0 do
         local written = syswrite(self, p, e - p)
         if written == 0 then
             return p - s -- eof
@@ -750,11 +750,10 @@ local function socket_write(self, octets, timeout)
             return nil
         end
 
+        socket_writable(self, timeout)
         timeout = timeout - (fiber.clock() - started)
-        if timeout <= 0 or not socket_writable(self, timeout) then
-            break
-        end
     end
+    return p - s
 end
 
 local function socket_send(self, octets, flags)
@@ -935,16 +934,19 @@ local function socket_tcp_connect(s, address, port, timeout)
     -- Wait until the connection is established or ultimately fails.
     -- In either condition the socket becomes writable. To tell these
     -- conditions appart SO_ERROR must be consulted (man connect).
-    if socket_writable(s, timeout) then
-        s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
-    else
-        s._errno = boxerrno.ETIMEDOUT
-    end
-    if s._errno ~= 0 then
-        return nil
+    local deadline = timeout + fiber.clock()
+    while deadline - fiber.clock() >= 0 do
+        if socket_writable(s, deadline - fiber.clock()) then
+            s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
+            if s._errno == 0 then
+                return true
+            else
+                return nil
+            end
+        end
     end
-    -- Connected
-    return true
+    s._errno = boxerrno.ETIMEDOUT
+    return nil
 end
 
 local function tcp_connect(host, port, timeout)
@@ -1005,7 +1007,8 @@ end
 local function tcp_server_loop(server, s, addr)
     fiber.name(format("%s/%s:%s", server.name, addr.host, addr.port), {truncate = true})
     log.info("started")
-    while socket_readable(s) do
+    while true do
+        socket_readable(s)
         local sc, from = socket_accept(s)
         if sc == nil then
             local errno = s._errno
@@ -1325,7 +1328,8 @@ end
 local function lsocket_tcp_accept(self)
     check_socket(self)
     local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
-    repeat
+    while deadline - fiber.clock() >= 0 do
+        socket_readable(self, deadline - fiber.clock())
         local client = socket_accept(self)
         if client then
             setmetatable(client, lsocket_tcp_client_mt)
@@ -1335,7 +1339,7 @@ local function lsocket_tcp_accept(self)
         if not errno_is_transient[errno] then
             break
         end
-    until not socket_readable(self, deadline - fiber.clock())
+    end
     return nil, socket_error(self)
 end
 
@@ -1390,7 +1394,8 @@ local function lsocket_tcp_receive(self, pattern, prefix)
     elseif pattern == "*a" then
         local result = { prefix }
         local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
-        repeat
+        while deadline - fiber.clock() >= 0 do
+            socket_readable(self, deadline - fiber.clock())
             local data = socket_sysread(self)
             if data == nil then
                 if not errno_is_transient[self._errno] then
@@ -1401,7 +1406,7 @@ local function lsocket_tcp_receive(self, pattern, prefix)
             else
                 table.insert(result, data)
             end
-        until not socket_readable(self, deadline - fiber.clock())
+        end
         if #result == 1 then
             return nil, 'closed', table.concat(result)
         end
diff --git a/test/app/socket.result b/test/app/socket.result
index b4d4a9a78..d6f6762e0 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -2230,6 +2230,76 @@ s:close()
 ---
 - 1
 ...
+--gh-3344 connection should not fail is there is a spurious wakeup for io fiber
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+echo_fiber = nil
+server = socket.tcp_server('localhost', 0, { handler = function(s)
+    echo_fiber = fiber.self()
+    while true do
+        local b = s:read(1, 0.1)
+        if b ~= nil then
+            s:write(b)
+        end
+    end
+end, name = 'echoserv'});
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+addr = server:name()
+---
+...
+client = socket.tcp_connect(addr.host, addr.port)
+---
+...
+echo_fiber ~= nil
+---
+- true
+...
+client:write('hello')
+---
+- 5
+...
+client:read(5, 0.1) == 'hello'
+---
+- true
+...
+-- send spurious wakeup
+fiber.wakeup(echo_fiber)
+---
+...
+fiber.sleep(0)
+---
+...
+client:write('world')
+---
+- 5
+...
+client:read(5, 0.1) == 'world'
+---
+- true
+...
+-- cancel fiber
+fiber.cancel(echo_fiber)
+---
+...
+client:write('!')
+---
+- 1
+...
+client:read(1, 0.1) == ''
+---
+- true
+...
+server:close()
+---
+- true
+...
 test_run:cmd("clear filter")
 ---
 - true
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index f6539438b..4e44b7ac0 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -736,4 +736,33 @@ sc:connect(host, port)
 sc:close()
 s:close()
 
+--gh-3344 connection should not fail is there is a spurious wakeup for io fiber
+test_run:cmd("setopt delimiter ';'")
+echo_fiber = nil
+server = socket.tcp_server('localhost', 0, { handler = function(s)
+    echo_fiber = fiber.self()
+    while true do
+        local b = s:read(1, 0.1)
+        if b ~= nil then
+            s:write(b)
+        end
+    end
+end, name = 'echoserv'});
+test_run:cmd("setopt delimiter ''");
+addr = server:name()
+client = socket.tcp_connect(addr.host, addr.port)
+echo_fiber ~= nil
+client:write('hello')
+client:read(5, 0.1) == 'hello'
+-- send spurious wakeup
+fiber.wakeup(echo_fiber)
+fiber.sleep(0)
+client:write('world')
+client:read(5, 0.1) == 'world'
+-- cancel fiber
+fiber.cancel(echo_fiber)
+client:write('!')
+client:read(1, 0.1) == ''
+server:close()
+
 test_run:cmd("clear filter")
-- 
2.18.0

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [tarantool-patches] Re: [PATCH] Fix lua socket polling in case of a spurious wakeup
  2018-08-02 15:44 [tarantool-patches] [PATCH] Fix lua socket polling in case of a spurious wakeup Georgy Kirichenko
@ 2018-08-06 14:27 ` Vladislav Shpilevoy
  0 siblings, 0 replies; 3+ messages in thread
From: Vladislav Shpilevoy @ 2018-08-06 14:27 UTC (permalink / raw)
  To: tarantool-patches, Georgy Kirichenko

Hi! See 7 comments below. Last 6 of them I've fixed on the branch
in a separate commit. Please, look, squash and fix the first comment
yourself.

On 02/08/2018 18:44, Georgy Kirichenko wrote:
> socket_writable/socket_readable handles socket.iowait spurious wakeup
> until event is happened or timeout is exceeded.
> 
> Fixes #3344
> ---
> Issue: https://github.com/tarantool/tarantool/issues/3344
> Branch: https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3344-socket-io-spurios-wakeup

1. The test constantly fails:

======================================================================================
WORKR TEST                                            PARAMS          RESULT
---------------------------------------------------------------------------------
[001] app/socket.test.lua                                             [ fail ]
[001]
[001] Test failed! Result content mismatch:
[001] --- app/socket.result	Mon Aug  6 16:36:23 2018
[001] +++ app/socket.reject	Mon Aug  6 17:06:23 2018
[001] @@ -2294,7 +2294,7 @@
[001]  ...
[001]  client:read(1, 0.1) == ''
[001]  ---
[001] -- true
[001] +- false
[001]  ...
[001]  server:close()
[001]  ---

I have tried to fix it and it looks like socket close event is not
triggered or checked in socket_read() function. Please, fix.

> src/lua/socket.lua       | 67 ++++++++++++++++++++------------------
>   test/app/socket.result   | 70 ++++++++++++++++++++++++++++++++++++++++
>   test/app/socket.test.lua | 29 +++++++++++++++++
>   3 files changed, 135 insertions(+), 31 deletions(-)
> 
> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
> index 06306eae2..9fcaee0e8 100644
> --- a/src/lua/socket.lua
> +++ b/src/lua/socket.lua
> @@ -328,12 +328,18 @@ local function do_wait(self, what, timeout)
>       self._errno = nil
>       timeout = timeout or TIMEOUT_INFINITY
>   
> -    local res = internal.iowait(fd, what, timeout)
> -    if res == 0 then
> -        self._errno = boxerrno.ETIMEDOUT
> -        return 0
> +    local deadline = fiber.clock() + timeout
> +    local events
> +
> +    while fiber.clock() <= deadline do
> +        events = internal.iowait(fd, what, deadline - fiber.clock())
> +        fiber.testcancel()
> +        if events ~= 0 and events ~= '' then> +            return events
> +        end
>       end
> -    return res
> +    self._errno = boxerrno.ETIMEDOUT
> +    return events

2. Is it possible that you have an event after timeout? I believe,
events here is always 0. So you can return 0, and put 'local events'
declaration inside the 'while' loop above.

>   end
>   
>   local function socket_readable(self, timeout)
> @@ -935,16 +934,19 @@ local function socket_tcp_connect(s, address, port, timeout)
>       -- Wait until the connection is established or ultimately fails.
>       -- In either condition the socket becomes writable. To tell these
>       -- conditions appart SO_ERROR must be consulted (man connect).
> -    if socket_writable(s, timeout) then
> -        s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
> -    else
> -        s._errno = boxerrno.ETIMEDOUT
> -    end
> -    if s._errno ~= 0 then
> -        return nil
> +    local deadline = timeout + fiber.clock()
> +    while deadline - fiber.clock() >= 0 do

3. This 'while' can be removed since the wait is done inside
socket_writable.

> +        if socket_writable(s, deadline - fiber.clock()) then
> +            s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
> +            if s._errno == 0 then
> +                return true
> +            else
> +                return nil
> +            end
> +        end
>       end
> -    -- Connected
> -    return true
> +    s._errno = boxerrno.ETIMEDOUT

4. Socket_writable already sets errno.

> +    return nil
>   end
>   
>   local function tcp_connect(host, port, timeout)
> @@ -1005,7 +1007,8 @@ end
>   local function tcp_server_loop(server, s, addr)
>       fiber.name(format("%s/%s:%s", server.name, addr.host, addr.port), {truncate = true})
>       log.info("started")
> -    while socket_readable(s) do
> +    while true do
> +        socket_readable(s)

5. If the socket is not writable you should not accept anything.
The previous version was right.

>           local sc, from = socket_accept(s)
>           if sc == nil then
>               local errno = s._errno
> @@ -1325,7 +1328,8 @@ end
>   local function lsocket_tcp_accept(self)
>       check_socket(self)
>       local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
> -    repeat
> +    while deadline - fiber.clock() >= 0 do
> +        socket_readable(self, deadline - fiber.clock())

6. The same. If the socket is not writable, here is nothing to
accept.

>           local client = socket_accept(self)
>           if client then
>               setmetatable(client, lsocket_tcp_client_mt)
> @@ -1390,7 +1394,8 @@ local function lsocket_tcp_receive(self, pattern, prefix)
>       elseif pattern == "*a" then
>           local result = { prefix }
>           local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
> -        repeat
> +        while deadline - fiber.clock() >= 0 do
> +            socket_readable(self, deadline - fiber.clock())

7. The same.

>               local data = socket_sysread(self)
>               if data == nil then
>                   if not errno_is_transient[self._errno] then

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [tarantool-patches] [PATCH] Fix lua socket polling in case of a spurious wakeup
@ 2018-07-18 16:46 Georgy Kirichenko
  0 siblings, 0 replies; 3+ messages in thread
From: Georgy Kirichenko @ 2018-07-18 16:46 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Georgy Kirichenko

socket_writable/socket_readable may return before timeout is exceeded
with the false status in cause of a spurious wakeup and this should not be
treated as an EOF or an error.

Fixed #3344
---
Issue: https://github.com/tarantool/tarantool/issues/3344
Branch:
https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3344-socket-io-spurios-wakeup
 src/lua/socket.lua       | 57 ++++++++++++++++----------------
 test/app/socket.result   | 70 ++++++++++++++++++++++++++++++++++++++++
 test/app/socket.test.lua | 29 +++++++++++++++++
 3 files changed, 129 insertions(+), 27 deletions(-)

diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index 06306eae2..6ebcea055 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -337,11 +337,15 @@ local function do_wait(self, what, timeout)
 end
 
 local function socket_readable(self, timeout)
-    return do_wait(self, 1, timeout) ~= 0
+    local result = do_wait(self, 1, timeout) ~= 0
+    fiber.testcancel()
+    return result
 end
 
 local function socket_writable(self, timeout)
-    return do_wait(self, 2, timeout) ~= 0
+    local result = do_wait(self, 2, timeout) ~= 0
+    fiber.testcancel()
+    return result
 end
 
 local function socket_wait(self, timeout)
@@ -662,7 +666,7 @@ local function read(self, limit, timeout, check, ...)
         return data
     end
 
-    while timeout > 0 do
+    while timeout >= 0 do
         local started = fiber.clock()
 
         assert(rbuf:size() < limit)
@@ -689,12 +693,7 @@ local function read(self, limit, timeout, check, ...)
             return nil
         end
 
-        if not socket_readable(self, timeout) then
-            return nil
-        end
-        if timeout <= 0 then
-            break
-        end
+        socket_readable(self, timeout)
         timeout = timeout - ( fiber.clock() - started )
     end
     self._errno = boxerrno.ETIMEDOUT
@@ -736,7 +735,7 @@ local function socket_write(self, octets, timeout)
     end
 
     local started = fiber.clock()
-    while true do
+    while timeout >= 0 do
         local written = syswrite(self, p, e - p)
         if written == 0 then
             return p - s -- eof
@@ -750,10 +749,8 @@ local function socket_write(self, octets, timeout)
             return nil
         end
 
+        socket_writable(self, timeout)
         timeout = timeout - (fiber.clock() - started)
-        if timeout <= 0 or not socket_writable(self, timeout) then
-            break
-        end
     end
 end
 
@@ -935,16 +932,19 @@ local function socket_tcp_connect(s, address, port, timeout)
     -- Wait until the connection is established or ultimately fails.
     -- In either condition the socket becomes writable. To tell these
     -- conditions appart SO_ERROR must be consulted (man connect).
-    if socket_writable(s, timeout) then
-        s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
-    else
-        s._errno = boxerrno.ETIMEDOUT
-    end
-    if s._errno ~= 0 then
-        return nil
+    local deadline = timeout + fiber.clock()
+    while deadline - fiber.clock() >= 0 do
+        if socket_writable(s, deadline - fiber.clock()) then
+            s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
+            if s._errno == 0 then
+                return true
+            else
+                return nil
+            end
+        end
     end
-    -- Connected
-    return true
+    s._errno = boxerrno.ETIMEDOUT
+    return nil
 end
 
 local function tcp_connect(host, port, timeout)
@@ -1005,7 +1005,8 @@ end
 local function tcp_server_loop(server, s, addr)
     fiber.name(format("%s/%s:%s", server.name, addr.host, addr.port), {truncate = true})
     log.info("started")
-    while socket_readable(s) do
+    while true do
+        socket_readable(s)
         local sc, from = socket_accept(s)
         if sc == nil then
             local errno = s._errno
@@ -1325,7 +1326,8 @@ end
 local function lsocket_tcp_accept(self)
     check_socket(self)
     local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
-    repeat
+    while deadline - fiber.clock() >= 0 do
+        socket_readable(self, deadline - fiber.clock())
         local client = socket_accept(self)
         if client then
             setmetatable(client, lsocket_tcp_client_mt)
@@ -1335,7 +1337,7 @@ local function lsocket_tcp_accept(self)
         if not errno_is_transient[errno] then
             break
         end
-    until not socket_readable(self, deadline - fiber.clock())
+    end
     return nil, socket_error(self)
 end
 
@@ -1390,7 +1392,8 @@ local function lsocket_tcp_receive(self, pattern, prefix)
     elseif pattern == "*a" then
         local result = { prefix }
         local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
-        repeat
+        while deadline - fiber.clock() >= 0 do
+            socket_readable(self, deadline - fiber.clock())
             local data = socket_sysread(self)
             if data == nil then
                 if not errno_is_transient[self._errno] then
@@ -1401,7 +1404,7 @@ local function lsocket_tcp_receive(self, pattern, prefix)
             else
                 table.insert(result, data)
             end
-        until not socket_readable(self, deadline - fiber.clock())
+        end
         if #result == 1 then
             return nil, 'closed', table.concat(result)
         end
diff --git a/test/app/socket.result b/test/app/socket.result
index b4d4a9a78..d6f6762e0 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -2230,6 +2230,76 @@ s:close()
 ---
 - 1
 ...
+--gh-3344 connection should not fail is there is a spurious wakeup for io fiber
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+echo_fiber = nil
+server = socket.tcp_server('localhost', 0, { handler = function(s)
+    echo_fiber = fiber.self()
+    while true do
+        local b = s:read(1, 0.1)
+        if b ~= nil then
+            s:write(b)
+        end
+    end
+end, name = 'echoserv'});
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+addr = server:name()
+---
+...
+client = socket.tcp_connect(addr.host, addr.port)
+---
+...
+echo_fiber ~= nil
+---
+- true
+...
+client:write('hello')
+---
+- 5
+...
+client:read(5, 0.1) == 'hello'
+---
+- true
+...
+-- send spurious wakeup
+fiber.wakeup(echo_fiber)
+---
+...
+fiber.sleep(0)
+---
+...
+client:write('world')
+---
+- 5
+...
+client:read(5, 0.1) == 'world'
+---
+- true
+...
+-- cancel fiber
+fiber.cancel(echo_fiber)
+---
+...
+client:write('!')
+---
+- 1
+...
+client:read(1, 0.1) == ''
+---
+- true
+...
+server:close()
+---
+- true
+...
 test_run:cmd("clear filter")
 ---
 - true
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index f6539438b..4e44b7ac0 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -736,4 +736,33 @@ sc:connect(host, port)
 sc:close()
 s:close()
 
+--gh-3344 connection should not fail is there is a spurious wakeup for io fiber
+test_run:cmd("setopt delimiter ';'")
+echo_fiber = nil
+server = socket.tcp_server('localhost', 0, { handler = function(s)
+    echo_fiber = fiber.self()
+    while true do
+        local b = s:read(1, 0.1)
+        if b ~= nil then
+            s:write(b)
+        end
+    end
+end, name = 'echoserv'});
+test_run:cmd("setopt delimiter ''");
+addr = server:name()
+client = socket.tcp_connect(addr.host, addr.port)
+echo_fiber ~= nil
+client:write('hello')
+client:read(5, 0.1) == 'hello'
+-- send spurious wakeup
+fiber.wakeup(echo_fiber)
+fiber.sleep(0)
+client:write('world')
+client:read(5, 0.1) == 'world'
+-- cancel fiber
+fiber.cancel(echo_fiber)
+client:write('!')
+client:read(1, 0.1) == ''
+server:close()
+
 test_run:cmd("clear filter")
-- 
2.18.0

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-08-06 14:27 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-02 15:44 [tarantool-patches] [PATCH] Fix lua socket polling in case of a spurious wakeup Georgy Kirichenko
2018-08-06 14:27 ` [tarantool-patches] " Vladislav Shpilevoy
  -- strict thread matches above, loose matches on Subject: below --
2018-07-18 16:46 [tarantool-patches] " Georgy Kirichenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox