[tarantool-patches] [PATCH v2] Fix lua socket polling in case of a spurious wakeup

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Oct 3 18:08:04 MSK 2018


Hi! Thanks for the fixes! LGTM. Vova, please, review or push.

On 24/09/2018 22:53, Georgy Kirichenko wrote:
> socket_writable/socket_readable handles socket.iowait spurious wakeup
> until event is happened or timeout is exceeded.
> 
> https://github.com/tarantool/tarantool/issues/3344
> https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3344-socket-io-spurios-wakeup
> 
> Changes in v2:
>   - Fixes according to Vlad's review
> 
> Fixes: #3344
> ---
>   src/lua/socket.lua       | 52 ++++++++++++++-----------------
>   test/app/socket.result   | 66 ++++++++++++++++++++++++++++++++++++++++
>   test/app/socket.test.lua | 28 +++++++++++++++++
>   3 files changed, 117 insertions(+), 29 deletions(-)
> 
> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
> index 8f5e2926b..02c93381b 100644
> --- a/src/lua/socket.lua
> +++ b/src/lua/socket.lua
> @@ -332,12 +332,17 @@ local function do_wait(self, what, timeout)
>       self._errno = nil
>       timeout = timeout or TIMEOUT_INFINITY
>   
> -    local res = internal.iowait(fd, what, timeout)
> -    if res == 0 then
> -        self._errno = boxerrno.ETIMEDOUT
> -        return 0
> +    local deadline = fiber.clock() + timeout
> +
> +    while fiber.clock() <= deadline do
> +        local events = internal.iowait(fd, what, deadline - fiber.clock())
> +        fiber.testcancel()
> +        if events ~= 0 and events ~= '' then
> +            return events
> +        end
>       end
> -    return res
> +    self._errno = boxerrno.ETIMEDOUT
> +    return 0
>   end
>   
>   local function socket_readable(self, timeout)
> @@ -666,7 +671,7 @@ local function read(self, limit, timeout, check, ...)
>           return data
>       end
>   
> -    while timeout > 0 do
> +    while timeout >= 0 do
>           local started = fiber.clock()
>   
>           assert(rbuf:size() < limit)
> @@ -693,15 +698,9 @@ local function read(self, limit, timeout, check, ...)
>               return nil
>           end
>   
> -        if not socket_readable(self, timeout) then
> -            return nil
> -        end
> -        if timeout <= 0 then
> -            break
> -        end
> +        socket_readable(self, timeout)
>           timeout = timeout - ( fiber.clock() - started )
>       end
> -    self._errno = boxerrno.ETIMEDOUT
>       return nil
>   end
>   
> @@ -740,7 +739,7 @@ local function socket_write(self, octets, timeout)
>       end
>   
>       local started = fiber.clock()
> -    while true do
> +    while timeout >= 0 do
>           local written = syswrite(self, p, e - p)
>           if written == 0 then
>               return p - s -- eof
> @@ -754,11 +753,10 @@ local function socket_write(self, octets, timeout)
>               return nil
>           end
>   
> +        socket_writable(self, timeout)
>           timeout = timeout - (fiber.clock() - started)
> -        if timeout <= 0 or not socket_writable(self, timeout) then
> -            break
> -        end
>       end
> +    return p - s
>   end
>   
>   local function socket_send(self, octets, flags)
> @@ -997,16 +995,12 @@ local function socket_tcp_connect(s, address, port, timeout)
>       -- Wait until the connection is established or ultimately fails.
>       -- In either condition the socket becomes writable. To tell these
>       -- conditions appart SO_ERROR must be consulted (man connect).
> -    if socket_writable(s, timeout) then
> +    local deadline = timeout + fiber.clock()
> +    if socket_writable(s, deadline - fiber.clock()) then
>           s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
> -    else
> -        s._errno = boxerrno.ETIMEDOUT
> -    end
> -    if s._errno ~= 0 then
> -        return nil
> +        return s._errno == 0 or nil
>       end
> -    -- Connected
> -    return true
> +    return nil
>   end
>   
>   local function tcp_connect(host, port, timeout)
> @@ -1387,7 +1381,7 @@ end
>   local function lsocket_tcp_accept(self)
>       check_socket(self)
>       local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
> -    repeat
> +    while socket_readable(self, deadline - fiber.clock()) do
>           local client = socket_accept(self)
>           if client then
>               setmetatable(client, lsocket_tcp_client_mt)
> @@ -1397,7 +1391,7 @@ local function lsocket_tcp_accept(self)
>           if not errno_is_transient[errno] then
>               break
>           end
> -    until not socket_readable(self, deadline - fiber.clock())
> +    end
>       return nil, socket_error(self)
>   end
>   
> @@ -1452,7 +1446,7 @@ local function lsocket_tcp_receive(self, pattern, prefix)
>       elseif pattern == "*a" then
>           local result = { prefix }
>           local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
> -        repeat
> +        while socket_readable(self, deadline - fiber.clock()) do
>               local data = socket_sysread(self)
>               if data == nil then
>                   if not errno_is_transient[self._errno] then
> @@ -1463,7 +1457,7 @@ local function lsocket_tcp_receive(self, pattern, prefix)
>               else
>                   table.insert(result, data)
>               end
> -        until not socket_readable(self, deadline - fiber.clock())
> +        end
>           if #result == 1 then
>               return nil, 'closed', table.concat(result)
>           end
> diff --git a/test/app/socket.result b/test/app/socket.result
> index 729c1eb3f..2f002a37e 100644
> --- a/test/app/socket.result
> +++ b/test/app/socket.result
> @@ -2779,6 +2779,72 @@ listening_socket:close()
>   ---
>   - true
>   ...
> +--gh-3344 connection should not fail is there is a spurious wakeup for io fiber
> +test_run:cmd("setopt delimiter ';'")
> +---
> +- true
> +...
> +echo_fiber = nil
> +server = socket.tcp_server('localhost', 0, { handler = function(s)
> +    echo_fiber = fiber.self()
> +    while true do
> +        local b = s:read(1, 0.1)
> +        if b ~= nil then
> +            s:write(b)
> +        end
> +    end
> +end, name = 'echoserv'});
> +---
> +...
> +test_run:cmd("setopt delimiter ''");
> +---
> +- true
> +...
> +addr = server:name()
> +---
> +...
> +client = socket.tcp_connect(addr.host, addr.port)
> +---
> +...
> +echo_fiber ~= nil
> +---
> +- true
> +...
> +client:write('hello')
> +---
> +- 5
> +...
> +client:read(5, 0.1) == 'hello'
> +---
> +- true
> +...
> +-- send spurious wakeup
> +fiber.wakeup(echo_fiber)
> +---
> +...
> +fiber.sleep(0)
> +---
> +...
> +client:write('world')
> +---
> +- 5
> +...
> +client:read(5, 0.1) == 'world'
> +---
> +- true
> +...
> +-- cancel fiber
> +fiber.cancel(echo_fiber)
> +---
> +...
> +client:read(1, 0.1) == ''
> +---
> +- true
> +...
> +server:close()
> +---
> +- true
> +...
>   test_run:cmd("clear filter")
>   ---
>   - true
> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
> index 9b896522b..c0b001449 100644
> --- a/test/app/socket.test.lua
> +++ b/test/app/socket.test.lua
> @@ -939,4 +939,32 @@ receiving_socket:close()
>   sending_socket:close()
>   listening_socket:close()
>   
> +--gh-3344 connection should not fail is there is a spurious wakeup for io fiber
> +test_run:cmd("setopt delimiter ';'")
> +echo_fiber = nil
> +server = socket.tcp_server('localhost', 0, { handler = function(s)
> +    echo_fiber = fiber.self()
> +    while true do
> +        local b = s:read(1, 0.1)
> +        if b ~= nil then
> +            s:write(b)
> +        end
> +    end
> +end, name = 'echoserv'});
> +test_run:cmd("setopt delimiter ''");
> +addr = server:name()
> +client = socket.tcp_connect(addr.host, addr.port)
> +echo_fiber ~= nil
> +client:write('hello')
> +client:read(5, 0.1) == 'hello'
> +-- send spurious wakeup
> +fiber.wakeup(echo_fiber)
> +fiber.sleep(0)
> +client:write('world')
> +client:read(5, 0.1) == 'world'
> +-- cancel fiber
> +fiber.cancel(echo_fiber)
> +client:read(1, 0.1) == ''
> +server:close()
> +
>   test_run:cmd("clear filter")
> 



More information about the Tarantool-patches mailing list