* [tarantool-patches] [PATCH] Protect lua socket io against a spurious wakeup
@ 2018-07-18 11:43 Georgy Kirichenko
2018-07-18 12:25 ` [tarantool-patches] " Vladislav Shpilevoy
0 siblings, 1 reply; 3+ messages in thread
From: Georgy Kirichenko @ 2018-07-18 11:43 UTC (permalink / raw)
To: tarantool-patches; +Cc: Georgy Kirichenko
socket_readable/socket_writable might return until socket become a
requested state in case of a spurious wakeup. Socket functions are
refactored with considering that fact.
Old behavior leads to test failures.
---
Issue: https://github.com/tarantool/tarantool/issues/3344
Branch:
https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3344-socket-io-spurios-wakeup
src/lua/socket.lua | 54 ++++++++++++++++++++++-----------------
test/box/net.box.result | 3 +++
test/box/net.box.test.lua | 2 ++
3 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index 06306eae2..0b22bb91c 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -689,12 +689,8 @@ local function read(self, limit, timeout, check, ...)
return nil
end
- if not socket_readable(self, timeout) then
- return nil
- end
- if timeout <= 0 then
- break
- end
+ socket_readable(self, timeout)
+ fiber.testcancel()
timeout = timeout - ( fiber.clock() - started )
end
self._errno = boxerrno.ETIMEDOUT
@@ -736,7 +732,7 @@ local function socket_write(self, octets, timeout)
end
local started = fiber.clock()
- while true do
+ while timeout > 0 do
local written = syswrite(self, p, e - p)
if written == 0 then
return p - s -- eof
@@ -750,10 +746,9 @@ local function socket_write(self, octets, timeout)
return nil
end
+ socket_writable(self, timeout)
+ fiber.testcancel()
timeout = timeout - (fiber.clock() - started)
- if timeout <= 0 or not socket_writable(self, timeout) then
- break
- end
end
end
@@ -935,16 +930,21 @@ local function socket_tcp_connect(s, address, port, timeout)
-- Wait until the connection is established or ultimately fails.
-- In either condition the socket becomes writable. To tell these
-- conditions appart SO_ERROR must be consulted (man connect).
- if socket_writable(s, timeout) then
- s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
- else
- s._errno = boxerrno.ETIMEDOUT
- end
- if s._errno ~= 0 then
- return nil
+ local deadline = timeout + fiber.clock()
+ while deadline - fiber.clock() > 0 do
+ if socket_writable(s, deadline - fiber.clock()) then
+ s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
+ if s._errno == 0 then
+ return true
+ else
+ return nil
+ end
+ end
+ -- timeout, spurious wakeup or cancel
+ fiber.testcancel()
end
- -- Connected
- return true
+ s._errno = boxerrno.ETIMEDOUT
+ return nil
end
local function tcp_connect(host, port, timeout)
@@ -1005,7 +1005,9 @@ end
local function tcp_server_loop(server, s, addr)
fiber.name(format("%s/%s:%s", server.name, addr.host, addr.port), {truncate = true})
log.info("started")
- while socket_readable(s) do
+ while true do
+ fiber.testcancel()
+ socket_readable(s)
local sc, from = socket_accept(s)
if sc == nil then
local errno = s._errno
@@ -1325,7 +1327,9 @@ end
local function lsocket_tcp_accept(self)
check_socket(self)
local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
- repeat
+ while deadline - fiber.clock() > 0 do
+ socket_readable(self, deadline - fiber.clock())
+ fiber.testcancel()
local client = socket_accept(self)
if client then
setmetatable(client, lsocket_tcp_client_mt)
@@ -1335,7 +1339,7 @@ local function lsocket_tcp_accept(self)
if not errno_is_transient[errno] then
break
end
- until not socket_readable(self, deadline - fiber.clock())
+ end
return nil, socket_error(self)
end
@@ -1390,7 +1394,9 @@ local function lsocket_tcp_receive(self, pattern, prefix)
elseif pattern == "*a" then
local result = { prefix }
local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
- repeat
+ while deadline - fiber.clock() > 0 do
+ socket_readable(self, deadline - fiber.clock())
+ fiber.testcancel()
local data = socket_sysread(self)
if data == nil then
if not errno_is_transient[self._errno] then
@@ -1401,7 +1407,7 @@ local function lsocket_tcp_receive(self, pattern, prefix)
else
table.insert(result, data)
end
- until not socket_readable(self, deadline - fiber.clock())
+ end
if #result == 1 then
return nil, 'closed', table.concat(result)
end
diff --git a/test/box/net.box.result b/test/box/net.box.result
index 21cff4a11..4bafabdcb 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -20,6 +20,9 @@ test_run:cmd("push filter ".."'\\.lua.*:[0-9]+: ' to '.lua...\"]:<line>: '")
---
- true
...
+fiber.wakeup(fiber.self())
+---
+...
test_run:cmd("setopt delimiter ';'")
---
- true
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 14fb6ebbd..76d3a5837 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -6,6 +6,8 @@ env = require('test_run')
test_run = env.new()
test_run:cmd("push filter ".."'\\.lua.*:[0-9]+: ' to '.lua...\"]:<line>: '")
+fiber.wakeup(fiber.self())
+
test_run:cmd("setopt delimiter ';'")
function x_select(cn, space_id, index_id, iterator, offset, limit, key, opts)
return cn:_request('select', opts, space_id, index_id, iterator,
--
2.18.0
^ permalink raw reply [flat|nested] 3+ messages in thread
* [tarantool-patches] Re: [PATCH] Protect lua socket io against a spurious wakeup
2018-07-18 11:43 [tarantool-patches] [PATCH] Protect lua socket io against a spurious wakeup Georgy Kirichenko
@ 2018-07-18 12:25 ` Vladislav Shpilevoy
2018-07-18 16:47 ` Georgy Kirichenko
0 siblings, 1 reply; 3+ messages in thread
From: Vladislav Shpilevoy @ 2018-07-18 12:25 UTC (permalink / raw)
To: tarantool-patches, Georgy Kirichenko
Hello. Thanks for the patch! See 6 comments below.
On 18/07/2018 14:43, Georgy Kirichenko wrote:
> socket_readable/socket_writable might return until socket become a
> requested state in case of a spurious wakeup. Socket functions are
> refactored with considering that fact.
>
> Old behavior leads to test failures.
Are you sure, that the only reason is the tests? Please,
explain here briefly, why it is important to track spurious
wakeups and cancels.
> ---
> Issue: https://github.com/tarantool/tarantool/issues/3344
> Branch:
> https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3344-socket-io-spurios-wakeup
> src/lua/socket.lua | 54 ++++++++++++++++++++++-----------------
> test/box/net.box.result | 3 +++
> test/box/net.box.test.lua | 2 ++
> 3 files changed, 35 insertions(+), 24 deletions(-)
>
> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
> index 06306eae2..0b22bb91c 100644
> --- a/src/lua/socket.lua
> +++ b/src/lua/socket.lua
> @@ -689,12 +689,8 @@ local function read(self, limit, timeout, check, ...)
> return nil
> end
>
> - if not socket_readable(self, timeout) then
> - return nil
> - end
> - if timeout <= 0 then
> - break
> - end
> + socket_readable(self, timeout)
> + fiber.testcancel()
1. I see that you put this testcancel after each wait in this
file. But why can not you just put it inside wait? This is
obvious code duplication.
> timeout = timeout - ( fiber.clock() - started )
> end
> self._errno = boxerrno.ETIMEDOUT
> @@ -736,7 +732,7 @@ local function socket_write(self, octets, timeout)
> end
>
> local started = fiber.clock()
> - while true do
> + while timeout > 0 do
2. Why? As I remember, Kostja said, that timeout 0 is ok and
means that we have only one attempt to do an action (like
write, read ...). And how is it linked with the patch?
> local written = syswrite(self, p, e - p)
> if written == 0 then
> return p - s -- eof
> @@ -935,16 +930,21 @@ local function socket_tcp_connect(s, address, port, timeout)
> -- Wait until the connection is established or ultimately fails.
> -- In either condition the socket becomes writable. To tell these
> -- conditions appart SO_ERROR must be consulted (man connect).
> - if socket_writable(s, timeout) then
> - s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
> - else
> - s._errno = boxerrno.ETIMEDOUT
> - end
> - if s._errno ~= 0 then
> - return nil
> + local deadline = timeout + fiber.clock()
> + while deadline - fiber.clock() > 0 do
> + if socket_writable(s, deadline - fiber.clock()) then
3. If the socket became writable, and the fiber was canceled,
then you will return true/nil with no testcancel.
> + s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR')
> + if s._errno == 0 then
> + return true
> + else
> + return nil
> + end
> + end
> + -- timeout, spurious wakeup or cancel
> + fiber.testcancel()
> end
> - -- Connected
> - return true
> + s._errno = boxerrno.ETIMEDOUT
> + return nil
> end
>
> local function tcp_connect(host, port, timeout)
> @@ -1005,7 +1005,9 @@ end
> local function tcp_server_loop(server, s, addr)
> fiber.name(format("%s/%s:%s", server.name, addr.host, addr.port), {truncate = true})
> log.info("started")
> - while socket_readable(s) do
> + while true do
> + fiber.testcancel()
> + socket_readable(s)
4. Why cancel() is before readable()?
> local sc, from = socket_accept(s)
> if sc == nil then
> local errno = s._errno
> @@ -1335,7 +1339,7 @@ local function lsocket_tcp_accept(self)
> if not errno_is_transient[errno] then
> break
> end
> - until not socket_readable(self, deadline - fiber.clock())
> + end
> return nil, socket_error(self)
> end
>
> @@ -1390,7 +1394,9 @@ local function lsocket_tcp_receive(self, pattern, prefix)
> elseif pattern == "*a" then
> local result = { prefix }
> local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY)
> - repeat
> + while deadline - fiber.clock() > 0 do
5. Same as 2. We allow 0 timeout.
> + socket_readable(self, deadline - fiber.clock())
> + fiber.testcancel()
> local data = socket_sysread(self)
> if data == nil then
> if not errno_is_transient[self._errno] then
> diff --git a/test/box/net.box.result b/test/box/net.box.result
> index 21cff4a11..4bafabdcb 100644
> --- a/test/box/net.box.result
> +++ b/test/box/net.box.result
> @@ -20,6 +20,9 @@ test_run:cmd("push filter ".."'\\.lua.*:[0-9]+: ' to '.lua...\"]:<line>: '")
> ---
> - true
> ...
> +fiber.wakeup(fiber.self())
6. No link to the issue. Both in the commit message and
before the test.
> +---
> +...
> test_run:cmd("setopt delimiter ';'")
> ---
> - true
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-07-18 16:47 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-18 11:43 [tarantool-patches] [PATCH] Protect lua socket io against a spurious wakeup Georgy Kirichenko
2018-07-18 12:25 ` [tarantool-patches] " Vladislav Shpilevoy
2018-07-18 16:47 ` Georgy Kirichenko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox