From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Fri, 5 Oct 2018 15:54:00 +0300 From: Vladimir Davydov Subject: Re: [tarantool-patches] [PATCH v2] Fix lua socket polling in case of a spurious wakeup Message-ID: <20181005125400.vcj4mbbrfrfpr736@esperanza> References: MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: To: Georgy Kirichenko Cc: tarantool-patches@freelists.org List-ID: On Mon, Sep 24, 2018 at 10:53:04PM +0300, Georgy Kirichenko wrote: > socket_writable/socket_readable handles socket.iowait spurious wakeup > until event is happened or timeout is exceeded. > > https://github.com/tarantool/tarantool/issues/3344 > https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3344-socket-io-spurios-wakeup > > Changes in v2: > - Fixes according to Vlad's review Nit: please put the links and the change log after ---, otherwise it looks like they are a part of the commit message. Also, according to our guideline, one should prefix the subject line with a subsystem name, when it makes sense. In your case it should be socket: fix polling in case of spurious wakeup > > Fixes: #3344 Nit: should be Closes #3344 > --- > src/lua/socket.lua | 52 ++++++++++++++----------------- > test/app/socket.result | 66 ++++++++++++++++++++++++++++++++++++++++ > test/app/socket.test.lua | 28 +++++++++++++++++ > 3 files changed, 117 insertions(+), 29 deletions(-) > > diff --git a/src/lua/socket.lua b/src/lua/socket.lua > index 8f5e2926b..02c93381b 100644 > --- a/src/lua/socket.lua > +++ b/src/lua/socket.lua > @@ -332,12 +332,17 @@ local function do_wait(self, what, timeout) > self._errno = nil > timeout = timeout or TIMEOUT_INFINITY > > - local res = internal.iowait(fd, what, timeout) > - if res == 0 then > - self._errno = boxerrno.ETIMEDOUT > - return 0 > + local deadline = fiber.clock() + timeout > + > + while fiber.clock() <= deadline do > + local events = internal.iowait(fd, what, deadline - fiber.clock()) > + fiber.testcancel() > + if events ~= 0 and events ~= '' then > + return events > + end > end > - return res > + self._errno = boxerrno.ETIMEDOUT > + return 0 > end > > local function socket_readable(self, timeout) > @@ -666,7 +671,7 @@ local function read(self, limit, timeout, check, ...) > return data > end > > - while timeout > 0 do > + while timeout >= 0 do > local started = fiber.clock() > > assert(rbuf:size() < limit) > @@ -693,15 +698,9 @@ local function read(self, limit, timeout, check, ...) > return nil > end > > - if not socket_readable(self, timeout) then > - return nil > - end > - if timeout <= 0 then > - break > - end > + socket_readable(self, timeout) > timeout = timeout - ( fiber.clock() - started ) > end > - self._errno = boxerrno.ETIMEDOUT > return nil I guess it's nitpicking, but calling socket_readable() without checking what it returns looks confusing to me :-( May be rewrite it with repeat local started = fiber.clock() ... timeout = timeout - (fiber.clock() - started) until not socket_readable(self, timeout) return nil ? > end > > @@ -740,7 +739,7 @@ local function socket_write(self, octets, timeout) > end > > local started = fiber.clock() > - while true do > + while timeout >= 0 do By the look at how 'timeout' is updated below, 'started' should be set inside the loop. Looks like a bug, which is worth fixing in the scope of this patch. > local written = syswrite(self, p, e - p) > if written == 0 then > return p - s -- eof > @@ -754,11 +753,10 @@ local function socket_write(self, octets, timeout) > return nil > end > > + socket_writable(self, timeout) > timeout = timeout - (fiber.clock() - started) > - if timeout <= 0 or not socket_writable(self, timeout) then > - break > - end Again, I'd rewrite this with repeat-until: repeat local started = fiber.clock() ... timeout = timeout - (fiber.clock() - started) until not socket_writable(self, timeout) return p > s and p - s or nil > end > + return p - s I think this function should return nil on timeout in case nothing was written. Why did you change that? > end > > local function socket_send(self, octets, flags) > @@ -997,16 +995,12 @@ local function socket_tcp_connect(s, address, port, timeout) > -- Wait until the connection is established or ultimately fails. > -- In either condition the socket becomes writable. To tell these > -- conditions appart SO_ERROR must be consulted (man connect). > - if socket_writable(s, timeout) then > + local deadline = timeout + fiber.clock() > + if socket_writable(s, deadline - fiber.clock()) then What's the point of using 'deadline' here? > s._errno = socket_getsockopt(s, 'SOL_SOCKET', 'SO_ERROR') > - else > - s._errno = boxerrno.ETIMEDOUT > - end > - if s._errno ~= 0 then > - return nil > + return s._errno == 0 or nil > end > - -- Connected > - return true > + return nil Again, nitpicking. I understand why you removed '_errno = ETIMEDOUT', but why change the rest? It looked pretty clean. > end > > local function tcp_connect(host, port, timeout) > @@ -1387,7 +1381,7 @@ end > local function lsocket_tcp_accept(self) > check_socket(self) > local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY) > - repeat > + while socket_readable(self, deadline - fiber.clock()) do > local client = socket_accept(self) > if client then > setmetatable(client, lsocket_tcp_client_mt) > @@ -1397,7 +1391,7 @@ local function lsocket_tcp_accept(self) > if not errno_is_transient[errno] then > break > end > - until not socket_readable(self, deadline - fiber.clock()) > + end Why did you replace repeat-until with while-do? Now, we will call iowait even if the socket is ready. That is, it was one syscall for the fast path (wait-less) before this change, now it's two. That said, this change looks rather controversial to me, and is definitely out of the scope of this patch. Let's please leave it as is. > return nil, socket_error(self) > end > > @@ -1452,7 +1446,7 @@ local function lsocket_tcp_receive(self, pattern, prefix) > elseif pattern == "*a" then > local result = { prefix } > local deadline = fiber.clock() + (self.timeout or TIMEOUT_INFINITY) > - repeat > + while socket_readable(self, deadline - fiber.clock()) do > local data = socket_sysread(self) > if data == nil then > if not errno_is_transient[self._errno] then > @@ -1463,7 +1457,7 @@ local function lsocket_tcp_receive(self, pattern, prefix) > else > table.insert(result, data) > end > - until not socket_readable(self, deadline - fiber.clock()) > + end Ditto. > if #result == 1 then > return nil, 'closed', table.concat(result) > end