* [tarantool-patches] [PATCH] Fix lua socket polling in case of a spurious wakeup
@ 2018-08-02 15:44 Georgy Kirichenko
2018-08-06 14:27 ` [tarantool-patches] " Vladislav Shpilevoy
0 siblings, 1 reply; 3+ messages in thread
From: Georgy Kirichenko @ 2018-08-02 15:44 UTC (permalink / raw)
To: tarantool-patches; +Cc: Georgy Kirichenko
socket_writable/socket_readable handles socket.iowait spurious wakeup
until event is happened or timeout is exceeded.
Fixes #3344
---
Issue: https://github.com/tarantool/tarantool/issues/3344
Branch: https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3344-socket-io-spurios-wakeup
src/lua/socket.lua | 67 ++++++++++++++++++++------------------
test/app/socket.result | 70 ++++++++++++++++++++++++++++++++++++++++
test/app/socket.test.lua | 29 +++++++++++++++++
3 files changed, 135 insertions(+), 31 deletions(-)
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index 06306eae2..9fcaee0e8 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -328,12 +328,18 @@ local function do_wait(self, what, timeout)
self._errno = nil
timeout = timeout or TIMEOUT_INFINITY
- local res = internal.iowait(fd, what, timeout)
- if res == 0 then
- self._errno = boxerrno.ETIMEDOUT
- return 0
+ local deadline = fiber.clock() + timeout
+ local events
+
+ while fiber.clock() <= deadline do
+ events = internal.iowait(fd, what, deadline - fiber.clock())
+ fiber.testcancel()
+ if events ~= 0 and events ~= '' then
+ return events
+ end
end
- return res
+ self._errno = boxerrno.ETIMEDOUT
+ return events
end
local function socket_readable(self, timeout)
@@ -662,7 +668,7 @@ local function read(self, limit, timeout, check, ...)
return data
end
- while timeout > 0 do
+ while timeout >= 0 do
local started = fiber.clock()
assert(rbuf:size() < limit)
@@ -689,15 +695,9 @@ local function read(self, limit, timeout, check, ...)
return nil
end
- if not socket_readable(self, timeout) then
- return nil
- end
- if timeout <= 0 then
- break
- end
+ socket_readable(self, timeout)
timeout = timeout - ( fiber.clock() - started )
end
- self._errno = boxerrno.ETIMEDOUT
return nil
end
@@ -736,7 +736,7 @@ local function socket_write(self, octets, timeout)
end
local started = fiber.clock()
- while true do
+ while timeout >= 0 do
local written = syswrite(self, p, e - p)
if written == 0 then
return p - s -- eof
@@ -750,11 +750,10 @@ local function socket_write(self, octets, timeout)
return nil
end
+ socket_writable(self, timeout)
timeout = timeout - (fiber.clock() - started)
- if timeout <= 0 or not socket_writable(self, timeout) then
- break
- end
end
+ return p - s
end
local function socket_send(self, octets, flags)
@@ -935,16 +934,19 @@ local function socket_tcp_connect(s, address, port, timeout)
-- Wait until the connection is established or ultimately fails.
-- In either condition the socket becomes writable. To tell these
-- conditions appart SO_ERROR must be consulted (man connect).
- if socket_writable(s, timeout) then
- s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
- else
- s._errno = boxerrno.ETIMEDOUT
- end
- if s._errno ~= 0 then
- return nil
+ local deadline = timeout + fiber.clock()
+ while deadline - fiber.clock() >= 0 do
+ if socket_writable(s, deadline - fiber.clock()) then
+ s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
+ if s._errno == 0 then
+ return true
+ else
+ return nil
+ end
+ end
end
- -- Connected
- return true
+ s._errno = boxerrno.ETIMEDOUT
+ return nil
end
local function tcp_connect(host, port, timeout)
@@ -1005,7 +1007,8 @@ end
local function tcp_server_loop(server, s, addr)
fiber.name(format("%s/%s:%s", server.name, addr.host, addr.port), {truncate = true})
log.info("started")
- while socket_readable(s) do
+ while true do
+ socket_readable(s)
local sc, from = socket_accept(s)
if sc == nil then
local errno = s._errno
@@ -1325,7 +1328,8 @@ end
local function lsocket_tcp_accept(self)
check_socket(self)
local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
- repeat
+ while deadline - fiber.clock() >= 0 do
+ socket_readable(self, deadline - fiber.clock())
local client = socket_accept(self)
if client then
setmetatable(client, lsocket_tcp_client_mt)
@@ -1335,7 +1339,7 @@ local function lsocket_tcp_accept(self)
if not errno_is_transient[errno] then
break
end
- until not socket_readable(self, deadline - fiber.clock())
+ end
return nil, socket_error(self)
end
@@ -1390,7 +1394,8 @@ local function lsocket_tcp_receive(self, pattern, prefix)
elseif pattern == "*a" then
local result = { prefix }
local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
- repeat
+ while deadline - fiber.clock() >= 0 do
+ socket_readable(self, deadline - fiber.clock())
local data = socket_sysread(self)
if data == nil then
if not errno_is_transient[self._errno] then
@@ -1401,7 +1406,7 @@ local function lsocket_tcp_receive(self, pattern, prefix)
else
table.insert(result, data)
end
- until not socket_readable(self, deadline - fiber.clock())
+ end
if #result == 1 then
return nil, 'closed', table.concat(result)
end
diff --git a/test/app/socket.result b/test/app/socket.result
index b4d4a9a78..d6f6762e0 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -2230,6 +2230,76 @@ s:close()
---
- 1
...
+--gh-3344 connection should not fail is there is a spurious wakeup for io fiber
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+echo_fiber = nil
+server = socket.tcp_server('localhost', 0, { handler = function(s)
+ echo_fiber = fiber.self()
+ while true do
+ local b = s:read(1, 0.1)
+ if b ~= nil then
+ s:write(b)
+ end
+ end
+end, name = 'echoserv'});
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+addr = server:name()
+---
+...
+client = socket.tcp_connect(addr.host, addr.port)
+---
+...
+echo_fiber ~= nil
+---
+- true
+...
+client:write('hello')
+---
+- 5
+...
+client:read(5, 0.1) == 'hello'
+---
+- true
+...
+-- send spurious wakeup
+fiber.wakeup(echo_fiber)
+---
+...
+fiber.sleep(0)
+---
+...
+client:write('world')
+---
+- 5
+...
+client:read(5, 0.1) == 'world'
+---
+- true
+...
+-- cancel fiber
+fiber.cancel(echo_fiber)
+---
+...
+client:write('!')
+---
+- 1
+...
+client:read(1, 0.1) == ''
+---
+- true
+...
+server:close()
+---
+- true
+...
test_run:cmd("clear filter")
---
- true
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index f6539438b..4e44b7ac0 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -736,4 +736,33 @@ sc:connect(host, port)
sc:close()
s:close()
+--gh-3344 connection should not fail is there is a spurious wakeup for io fiber
+test_run:cmd("setopt delimiter ';'")
+echo_fiber = nil
+server = socket.tcp_server('localhost', 0, { handler = function(s)
+ echo_fiber = fiber.self()
+ while true do
+ local b = s:read(1, 0.1)
+ if b ~= nil then
+ s:write(b)
+ end
+ end
+end, name = 'echoserv'});
+test_run:cmd("setopt delimiter ''");
+addr = server:name()
+client = socket.tcp_connect(addr.host, addr.port)
+echo_fiber ~= nil
+client:write('hello')
+client:read(5, 0.1) == 'hello'
+-- send spurious wakeup
+fiber.wakeup(echo_fiber)
+fiber.sleep(0)
+client:write('world')
+client:read(5, 0.1) == 'world'
+-- cancel fiber
+fiber.cancel(echo_fiber)
+client:write('!')
+client:read(1, 0.1) == ''
+server:close()
+
test_run:cmd("clear filter")
--
2.18.0
^ permalink raw reply [flat|nested] 3+ messages in thread
* [tarantool-patches] Re: [PATCH] Fix lua socket polling in case of a spurious wakeup
2018-08-02 15:44 [tarantool-patches] [PATCH] Fix lua socket polling in case of a spurious wakeup Georgy Kirichenko
@ 2018-08-06 14:27 ` Vladislav Shpilevoy
0 siblings, 0 replies; 3+ messages in thread
From: Vladislav Shpilevoy @ 2018-08-06 14:27 UTC (permalink / raw)
To: tarantool-patches, Georgy Kirichenko
Hi! See 7 comments below. Last 6 of them I've fixed on the branch
in a separate commit. Please, look, squash and fix the first comment
yourself.
On 02/08/2018 18:44, Georgy Kirichenko wrote:
> socket_writable/socket_readable handles socket.iowait spurious wakeup
> until event is happened or timeout is exceeded.
>
> Fixes #3344
> ---
> Issue: https://github.com/tarantool/tarantool/issues/3344
> Branch: https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3344-socket-io-spurios-wakeup
1. The test constantly fails:
======================================================================================
WORKR TEST PARAMS RESULT
---------------------------------------------------------------------------------
[001] app/socket.test.lua [ fail ]
[001]
[001] Test failed! Result content mismatch:
[001] --- app/socket.result Mon Aug 6 16:36:23 2018
[001] +++ app/socket.reject Mon Aug 6 17:06:23 2018
[001] @@ -2294,7 +2294,7 @@
[001] ...
[001] client:read(1, 0.1) == ''
[001] ---
[001] -- true
[001] +- false
[001] ...
[001] server:close()
[001] ---
I have tried to fix it and it looks like socket close event is not
triggered or checked in socket_read() function. Please, fix.
> src/lua/socket.lua | 67 ++++++++++++++++++++------------------
> test/app/socket.result | 70 ++++++++++++++++++++++++++++++++++++++++
> test/app/socket.test.lua | 29 +++++++++++++++++
> 3 files changed, 135 insertions(+), 31 deletions(-)
>
> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
> index 06306eae2..9fcaee0e8 100644
> --- a/src/lua/socket.lua
> +++ b/src/lua/socket.lua
> @@ -328,12 +328,18 @@ local function do_wait(self, what, timeout)
> self._errno = nil
> timeout = timeout or TIMEOUT_INFINITY
>
> - local res = internal.iowait(fd, what, timeout)
> - if res == 0 then
> - self._errno = boxerrno.ETIMEDOUT
> - return 0
> + local deadline = fiber.clock() + timeout
> + local events
> +
> + while fiber.clock() <= deadline do
> + events = internal.iowait(fd, what, deadline - fiber.clock())
> + fiber.testcancel()
> + if events ~= 0 and events ~= '' then> + return events
> + end
> end
> - return res
> + self._errno = boxerrno.ETIMEDOUT
> + return events
2. Is it possible that you have an event after timeout? I believe,
events here is always 0. So you can return 0, and put 'local events'
declaration inside the 'while' loop above.
> end
>
> local function socket_readable(self, timeout)
> @@ -935,16 +934,19 @@ local function socket_tcp_connect(s, address, port, timeout)
> -- Wait until the connection is established or ultimately fails.
> -- In either condition the socket becomes writable. To tell these
> -- conditions appart SO_ERROR must be consulted (man connect).
> - if socket_writable(s, timeout) then
> - s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
> - else
> - s._errno = boxerrno.ETIMEDOUT
> - end
> - if s._errno ~= 0 then
> - return nil
> + local deadline = timeout + fiber.clock()
> + while deadline - fiber.clock() >= 0 do
3. This 'while' can be removed since the wait is done inside
socket_writable.
> + if socket_writable(s, deadline - fiber.clock()) then
> + s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
> + if s._errno == 0 then
> + return true
> + else
> + return nil
> + end
> + end
> end
> - -- Connected
> - return true
> + s._errno = boxerrno.ETIMEDOUT
4. Socket_writable already sets errno.
> + return nil
> end
>
> local function tcp_connect(host, port, timeout)
> @@ -1005,7 +1007,8 @@ end
> local function tcp_server_loop(server, s, addr)
> fiber.name(format("%s/%s:%s", server.name, addr.host, addr.port), {truncate = true})
> log.info("started")
> - while socket_readable(s) do
> + while true do
> + socket_readable(s)
5. If the socket is not writable you should not accept anything.
The previous version was right.
> local sc, from = socket_accept(s)
> if sc == nil then
> local errno = s._errno
> @@ -1325,7 +1328,8 @@ end
> local function lsocket_tcp_accept(self)
> check_socket(self)
> local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
> - repeat
> + while deadline - fiber.clock() >= 0 do
> + socket_readable(self, deadline - fiber.clock())
6. The same. If the socket is not writable, here is nothing to
accept.
> local client = socket_accept(self)
> if client then
> setmetatable(client, lsocket_tcp_client_mt)
> @@ -1390,7 +1394,8 @@ local function lsocket_tcp_receive(self, pattern, prefix)
> elseif pattern == "*a" then
> local result = { prefix }
> local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
> - repeat
> + while deadline - fiber.clock() >= 0 do
> + socket_readable(self, deadline - fiber.clock())
7. The same.
> local data = socket_sysread(self)
> if data == nil then
> if not errno_is_transient[self._errno] then
^ permalink raw reply [flat|nested] 3+ messages in thread
* [tarantool-patches] Re: [PATCH] Fix lua socket polling in case of a spurious wakeup
2018-07-18 16:46 [tarantool-patches] " Georgy Kirichenko
@ 2018-07-18 17:06 ` Vladislav Shpilevoy
0 siblings, 0 replies; 3+ messages in thread
From: Vladislav Shpilevoy @ 2018-07-18 17:06 UTC (permalink / raw)
To: tarantool-patches, Georgy Kirichenko
Thanks for the fixes!
Please, use [PATCH v2] prefix when sending second
version of a patch.
See 2 comments below.
On 18/07/2018 19:46, Georgy Kirichenko wrote:
> socket_writable/socket_readable may return before timeout is exceeded
> with the false status in cause of a spurious wakeup and this should not be
> treated as an EOF or an error.
>
> Fixed #3344
> ---
> Issue: https://github.com/tarantool/tarantool/issues/3344
> Branch:
> https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3344-socket-io-spurios-wakeup
> src/lua/socket.lua | 57 ++++++++++++++++----------------
> test/app/socket.result | 70 ++++++++++++++++++++++++++++++++++++++++
> test/app/socket.test.lua | 29 +++++++++++++++++
> 3 files changed, 129 insertions(+), 27 deletions(-)
>
> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
> index 06306eae2..6ebcea055 100644
> --- a/src/lua/socket.lua
> +++ b/src/lua/socket.lua
> @@ -337,11 +337,15 @@ local function do_wait(self, what, timeout)
> end
1. The function 'do_wait' now sets _errno to timeout
even if it was actually not timeout but spurious wakeup.
I think, we should not set any errors in such a case.
Actually, do_wait is called from only three functions,
so we can set _errno in them when the timeout really
occurs.
>
> local function socket_readable(self, timeout)
> - return do_wait(self, 1, timeout) ~= 0
> + local result = do_wait(self, 1, timeout) ~= 0
2. Function socket_readable now can return even if the
socket actually is not readable because of spurious
wakeup. It can lead to block on read() as I understand,
if we are trying to read non-readable thing. And breaks
logic of this function.
I think, that here we should check do_wait result to
actually contain 'R'. If not 'R' and the timeout still
is not depleted, we should call do_wait again.
> + fiber.testcancel()
> + return result
> end
>
> local function socket_writable(self, timeout)
> - return do_wait(self, 2, timeout) ~= 0
> + local result = do_wait(self, 2, timeout) ~= 0
> + fiber.testcancel()
> + return result
> end
>
> local function socket_wait(self, timeout)
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-08-06 14:27 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-02 15:44 [tarantool-patches] [PATCH] Fix lua socket polling in case of a spurious wakeup Georgy Kirichenko
2018-08-06 14:27 ` [tarantool-patches] " Vladislav Shpilevoy
-- strict thread matches above, loose matches on Subject: below --
2018-07-18 16:46 [tarantool-patches] " Georgy Kirichenko
2018-07-18 17:06 ` [tarantool-patches] " Vladislav Shpilevoy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox