[Tarantool-patches] [tarantool-patches] [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors

Roman Khabibov roman.habibov at tarantool.org
Sat Dec 21 20:50:51 MSK 2019


Hi! Thanks for the review.

> On Dec 18, 2019, at 17:58, Alexander Turenko <alexander.turenko at tarantool.org> wrote:
> 
>>>> @@ -1360,9 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
>>>>    -- This function is broken by design
>>>>    local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>>>>    local timeout = deadline - fiber.clock()
>>>> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
>>>> -    if dns == nil or #dns == 0 then
>>>> -        self._errno = boxerrno.EINVAL
>>>> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>>>> +    if dns == nil then
>>>> +        return nil, err
>>>> +    end
>>>> +    if #dns == 0 then
>>>> +        boxerrno(self._errno)
>>>>        return nil, socket_error(self)
>>>>    end
>>>>    for _, remote in ipairs(dns) do
>>> 
>>> EINVAL here is changed to self._errno, which is always `nil` at least
>>> for typical usage of lua socket: `s = socket.tcp()`, then `s:connect()`.
>>> 
>>> Let's keep the old behaviour (EINVAL) when getaddrinfo() returns zero
>>> amount of addresses.
>>> 
>>> RFC 2553 ('Basic Socket Interface Extensions for IPv6') states:
>>> 
>>> | Upon successful return a pointer to a linked list of one or more
>>> | addrinfo structures is returned through the final argument.
>>> 
>>> RFC 3493 ('Basic Socket Interface Extensions for IPv6') states the same,
>>> but more explicitly:
>>> 
>>> | Upon successful return of getaddrinfo(), the location to which res
>>> | points shall refer to a linked list of addrinfo structures, each of
>>> | which shall specify a socket address and information for use in
>>> | creating a socket with which to use that socket address.  The list
>>> | shall include at least one addrinfo structure.
>>> 
>>> So this situation should not occur. But anyway we should not miss to set
>>> a certain errno in the case at least for consistency with tcp_connect().
>>> 
>>> Also I think that we should set errno to EINVAL when getaddrinfo() fails
>>> to keep the code backward compatible: say, a user lean on this value
>>> when tcp_connect() / :connect() returns `nil`.
>> 
>> I understand so:
>> 
>> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
>> index a334ad45b..6c829a11c 100644
>> --- a/src/lua/socket.lua
>> +++ b/src/lua/socket.lua
>> @@ -995,7 +995,13 @@ local function getaddrinfo(host, port, timeout, opts)
>>         end
>> 
>>     end
>> -    return internal.getaddrinfo(host, port, timeout, ga_opts)
>> +
>> +    local ret, err = internal.getaddrinfo(host, port, timeout, ga_opts)
>> +    if ret == nil then
>> +        boxerrno(boxerrno.EINVAL)
>> +        return nil, err
>> +    end
>> +    return ret
>> end
>> 
>> -- tcp connector
>> @@ -1041,9 +1047,12 @@ local function tcp_connect(host, port, timeout)
>>     end
>>     local timeout = timeout or TIMEOUT_INFINITY
>>     local stop = fiber.clock() + timeout
>> -    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>> +    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>>         protocol = 'tcp' })
>> -    if dns == nil or #dns == 0 then
>> +    if dns == nil then
>> +        return nil, err
>> +    end
>> +    if #dns == 0 then
>>         boxerrno(boxerrno.EINVAL)
>>         return nil
>>     end
>> @@ -1360,8 +1369,12 @@ local function lsocket_tcp_connect(self, host, port)
>>     -- This function is broken by design
>>     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>>     local timeout = deadline - fiber.clock()
>> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
>> -    if dns == nil or #dns == 0 then
>> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>> +    if dns == nil then
>> +        self._errno = boxerrno()
>> +        return nil, err
>> +    end
>> +    if #dns == 0 then
>>         self._errno = boxerrno.EINVAL
>>         return nil, socket_error(self)
>>     end
>> @@ -1547,9 +1560,12 @@ local function lsocket_connect(host, port)
>>     if host == nil or port == nil then
>>         error("Usage: luasocket.connect(host, port)")
>>     end
>> -    local s = tcp_connect(host, port)
>> +    local s, err = tcp_connect(host, port)
>>     if not s then
>> -        return nil, boxerrno.strerror()
>> +        if not err then
>> +            return nil, boxerrno.strerror()
>> +        end
>> +        return nil, err
>>     end
>>     setmetatable(s, lsocket_tcp_client_mt)
>>     return s
> 
> I meant that `self._errno = boxerrno.EINVAL` was changed to
> `boxerrno(self._errno)` in `#dns == 0` case and it does not more affect
> the second return value (an error message). I would return it back for
> `#dns == 0` case (only for it).
> 
> The motivation is just to don't change things that we don't asked to
> change.
> 
> I didn't mean changes in getaddrinfo() function itself.
@@ -1360,8 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
-    if dns == nil or #dns == 0 then
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
+    if dns == nil then
+        self._errno = boxerrno()
+        return nil, err
+    end
+    if #dns == 0 then
         self._errno = boxerrno.EINVAL
         return nil, socket_error(self)
     end

>>> Also I see that we don't document Lua Socket part of the API:
>>> https://www.tarantool.io/en/doc/1.10/reference/reference_lua/socket/
>> 
>> Is this the part of this patchset? Maybe, it would be better to do doc
>> request separately.
> 
> It was in context of your docbot request: there is no need to ask to
> update Lua Socket API docs if we does not provide it at all.
> 
> Lua Socket has its own API documentation, so it is maybe okay to don't
> document it again on our website.

commit 7a85a217cb97faf03c8c817fc2748749abd91e91
Author: Roman Khabibov <roman.habibov at tarantool.org>
Date:   Thu Nov 21 14:37:54 2019 +0300

    lua: return getaddrinfo() errors
    
    Add getaddrinfo() errors into the several fuctions of socket. Now
    getaddrinfo() can return a pair of values (nil and error message)
    in case of error.
    
    Closes #4138
    
    @TarantoolBot document
    Title: socket API changes
    
    * socket.getaddrinfo()
    
    Can return error message as second return value in case of
    internal getaddrinfo() error.
    
    Example:
    tarantool> socket.getaddrinfo('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    ...
    
    * socket.tcp_connect()
    
    Can return socket.getaddrinfo() error message as second return
    value.
    
    Example:
    tarantool> socket.tcp_connect('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    ...

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index c2e1bb9c4..2fb3e6df6 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -150,9 +150,12 @@ local function next_id(id) return band(id + 1, 0x7FFFFFFF) end
 local function establish_connection(host, port, timeout)
     local timeout = timeout or DEFAULT_CONNECT_TIMEOUT
     local begin = fiber.clock()
-    local s = socket.tcp_connect(host, port, timeout)
+    local s, err = socket.tcp_connect(host, port, timeout)
     if not s then
-        return nil, errno.strerror(errno())
+        if not err then
+            return nil, errno.strerror(errno())
+        end
+        return nil, err
     end
     local msg = s:read({chunk = IPROTO_GREETING_SIZE},
                         timeout - (fiber.clock() - begin))
diff --git a/src/lua/socket.c b/src/lua/socket.c
index 130378caf..dbade2d27 100644
--- a/src/lua/socket.c
+++ b/src/lua/socket.c
@@ -774,6 +774,18 @@ lbox_getaddrinfo_result_wrapper(struct lua_State *L)
 	return 1;
 }
 
+/**
+ * Wrap coio_getaddrinfo() and call it. Push returned values onto
+ * @a L Lua stack.
+ *
+ * @param L Lua stack.
+ *
+ * @retval 1 Number of returned values by Lua function if
+ * coio_getaddrinfo() success.
+ * @retval 2 Number of returned values by Lua function if
+ * coio_getaddrinfo() is failed (first is nil, second is error
+ * message).
+ */
 static int
 lbox_socket_getaddrinfo(struct lua_State *L)
 {
@@ -816,7 +828,9 @@ lbox_socket_getaddrinfo(struct lua_State *L)
 
 	if (dns_res != 0) {
 		lua_pushnil(L);
-		return 1;
+		struct error *err = diag_get()->last;
+		lua_pushstring(L, err->errmsg);
+		return 2;
 	}
 
 	/* no results */
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index a334ad45b..f9256ecdc 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -1041,9 +1041,12 @@ local function tcp_connect(host, port, timeout)
     end
     local timeout = timeout or TIMEOUT_INFINITY
     local stop = fiber.clock() + timeout
-    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
+    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
         protocol = 'tcp' })
-    if dns == nil or #dns == 0 then
+    if dns == nil then
+        return nil, err
+    end
+    if #dns == 0 then
         boxerrno(boxerrno.EINVAL)
         return nil
     end
@@ -1360,8 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
-    if dns == nil or #dns == 0 then
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
+    if dns == nil then
+        self._errno = boxerrno()
+        return nil, err
+    end
+    if #dns == 0 then
         self._errno = boxerrno.EINVAL
         return nil, socket_error(self)
     end
@@ -1547,9 +1554,12 @@ local function lsocket_connect(host, port)
     if host == nil or port == nil then
         error("Usage: luasocket.connect(host, port)")
     end
-    local s = tcp_connect(host, port)
+    local s, err = tcp_connect(host, port)
     if not s then
-        return nil, boxerrno.strerror()
+        if not err then
+            return nil, boxerrno.strerror()
+        end
+        return nil, err
     end
     setmetatable(s, lsocket_tcp_client_mt)
     return s
diff --git a/test/app/socket.result b/test/app/socket.result
index fd299424c..6e5bd7a35 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -941,10 +941,20 @@ sc:close()
 - true
 ...
 -- tcp_connect
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
 ---
-- null
+...
+s == nil
+---
+- true
 ...
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -1606,7 +1616,7 @@ fio.stat(path) == nil
 { socket.tcp_connect('abrakadabra#123') == nil, errno.strerror() }
 ---
 - - true
-  - Invalid argument
+  - Input/output error
 ...
 -- wrong options for getaddrinfo
 socket.getaddrinfo('host', 'port', { type = 'WRONG' }) == nil and errno() == errno.EINVAL
@@ -2816,6 +2826,48 @@ test_run:cmd("clear filter")
 ---
 - true
 ...
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+        return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
+s, err = socket.connect('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
+s, err = socket.tcp_connect('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
 -- case: sicket receive inconsistent behavior
 chan = fiber.channel()
 ---
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index c72d41763..7086e496b 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -300,8 +300,16 @@ sc:close()
 
 -- tcp_connect
 
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+s == nil
 
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -960,6 +968,27 @@ server:close()
 
 test_run:cmd("clear filter")
 
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+        return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.connect('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.tcp_connect('non_exists_hostname', 3301)
+check_err(err)
+
 -- case: sicket receive inconsistent behavior
 chan = fiber.channel()
 counter = 0
diff --git a/test/box/net.box.result b/test/box/net.box.result
index e3dabf7d9..0635ca9e7 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -3927,6 +3927,34 @@ test_run:grep_log('default', '00000040:.*')
 ---
 - null
 ...
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s = remote.connect('non_exists_hostname:3301')
+---
+...
+check_err(s['error'])
+---
+- true
+...
 box.cfg{log_level=log_level}
 ---
 ...
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 8e65ff470..9c9aee1ad 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -1582,4 +1582,21 @@ test_run:wait_log('default', '00000030:.*', nil, 10)
 -- we expect nothing below, so don't wait
 test_run:grep_log('default', '00000040:.*')
 
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s = remote.connect('non_exists_hostname:3301')
+check_err(s['error'])
+
 box.cfg{log_level=log_level}




More information about the Tarantool-patches mailing list