Tarantool development patches archive
 help / color / mirror / Atom feed
* [tarantool-patches] [PATCH] lua: return getaddrinfo() errors
@ 2019-06-15 16:58 Roman Khabibov
  2019-06-23 20:31 ` [tarantool-patches] " Alexander Turenko
  0 siblings, 1 reply; 20+ messages in thread
From: Roman Khabibov @ 2019-06-15 16:58 UTC (permalink / raw)
  To: tarantool-patches; +Cc: alexander.turenko

Before this patch, branch when coio_getaddrinfo() returns
getaddrinfo() errors has never reached. Add this errors into the
socket and thenet.box modules.

Closes #4138
---

Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4138-getaddrinfo
Issue: https://github.com/tarantool/tarantool/issues/4138

 src/box/lua/net_box.lua   |  7 +++++--
 src/lib/core/coio_task.c  |  2 +-
 src/lua/socket.c          |  6 ++++++
 src/lua/socket.lua        | 35 ++++++++++++++++++++++-------------
 test/app/socket.result    | 31 ++++++++++++++++++++++++++++---
 test/app/socket.test.lua  | 19 ++++++++++++++++++-
 test/box/net.box.result   | 21 ++++++++++++++++++++-
 test/box/net.box.test.lua | 15 +++++++++++++++
 8 files changed, 115 insertions(+), 21 deletions(-)

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index 251ad407a..4e3497ed9 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -142,9 +142,12 @@ local function next_id(id) return band(id + 1, 0x7FFFFFFF) end
 local function establish_connection(host, port, timeout)
     local timeout = timeout or DEFAULT_CONNECT_TIMEOUT
     local begin = fiber.clock()
-    local s = socket.tcp_connect(host, port, timeout)
+    local s, err = socket.tcp_connect(host, port, timeout)
     if not s then
-        return nil, errno.strerror(errno())
+        if not err then
+            return nil, errno.strerror(errno())
+        end
+        return nil, err
     end
     local msg = s:read({chunk = IPROTO_GREETING_SIZE},
                         timeout - (fiber.clock() - begin))
diff --git a/src/lib/core/coio_task.c b/src/lib/core/coio_task.c
index 908b336ed..83f669d05 100644
--- a/src/lib/core/coio_task.c
+++ b/src/lib/core/coio_task.c
@@ -413,7 +413,7 @@ coio_getaddrinfo(const char *host, const char *port,
 		return -1; /* timed out or cancelled */
 
 	/* Task finished */
-	if (task->rc < 0) {
+	if (task->rc != 0) {
 		/* getaddrinfo() failed */
 		errno = EIO;
 		diag_set(SystemError, "getaddrinfo: %s",
diff --git a/src/lua/socket.c b/src/lua/socket.c
index 130378caf..5a8469ddf 100644
--- a/src/lua/socket.c
+++ b/src/lua/socket.c
@@ -54,6 +54,7 @@
 #include <fiber.h>
 #include "lua/utils.h"
 #include "lua/fiber.h"
+#include "reflection.h"
 
 extern int coio_wait(int fd, int event, double timeout);
 
@@ -816,6 +817,11 @@ lbox_socket_getaddrinfo(struct lua_State *L)
 
 	if (dns_res != 0) {
 		lua_pushnil(L);
+		struct error *err = diag_get()->last;
+		if (strcmp(err->type->name, "SystemError") == 0) {
+			lua_pushstring(L, err->errmsg);
+			return 2;
+		}
 		return 1;
 	}
 
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index 2dba0a8d2..f0b432925 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -1028,11 +1028,14 @@ local function tcp_connect(host, port, timeout)
     end
     local timeout = timeout or TIMEOUT_INFINITY
     local stop = fiber.clock() + timeout
-    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
+    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
         protocol = 'tcp' })
     if dns == nil or #dns == 0 then
-        boxerrno(boxerrno.EINVAL)
-        return nil
+        if not err then
+            boxerrno(boxerrno.EINVAL)
+            return nil
+        end
+        return nil, err
     end
     for i, remote in pairs(dns) do
         timeout = stop - fiber.clock()
@@ -1147,15 +1150,15 @@ end
 
 local function tcp_server_bind(host, port, prepare, timeout)
     timeout = timeout and tonumber(timeout) or TIMEOUT_INFINITY
-    local dns
+    local dns, err
     if host == 'unix/' then
         dns = {{host = host, port = port, family = 'AF_UNIX', protocol = 0,
             type = 'SOCK_STREAM' }}
     else
-        dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
+        dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
             flags = 'AI_PASSIVE'})
         if dns == nil then
-            return nil
+            return nil, err
         end
     end
 
@@ -1347,10 +1350,10 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
     if dns == nil or #dns == 0 then
-        self._errno = boxerrno.EINVAL
-        return nil, socket_error(self)
+         self._errno = boxerrno.EINVAL
+        return nil, err
     end
     for _, remote in ipairs(dns) do
         timeout = deadline - fiber.clock()
@@ -1534,9 +1537,12 @@ local function lsocket_connect(host, port)
     if host == nil or port == nil then
         error("Usage: luasocket.connect(host, port)")
     end
-    local s = tcp_connect(host, port)
+    local s, err = tcp_connect(host, port)
     if not s then
-        return nil, boxerrno.strerror()
+        if not err then
+            return nil, boxerrno.strerror()
+        end
+        return nil, err
     end
     setmetatable(s, lsocket_tcp_client_mt)
     return s
@@ -1547,9 +1553,12 @@ local function lsocket_bind(host, port, backlog)
         error("Usage: luasocket.bind(host, port [, backlog])")
     end
     local function prepare(s) return backlog end
-    local s = tcp_server_bind(host, port, prepare)
+    local s, err = tcp_server_bind(host, port, prepare)
     if not s then
-        return nil, boxerrno.strerror()
+        if not err then
+            return nil, boxerrno.strerror()
+        end
+        return nil, err
     end
     return setmetatable(s, lsocket_tcp_server_mt)
 end
diff --git a/test/app/socket.result b/test/app/socket.result
index 0d029039a..6a78b92a4 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -942,7 +942,7 @@ sc:close()
 ...
 -- tcp_connect
 -- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+socket.tcp_connect('127.0.0.1', 80, 0.00000000000001)
 ---
 - null
 ...
@@ -1664,7 +1664,7 @@ fio.stat(path) == nil
 { socket.tcp_connect('abrakadabra#123') == nil, errno.strerror() }
 ---
 - - true
-  - Invalid argument
+  - Input/output error
 ...
 -- wrong options for getaddrinfo
 socket.getaddrinfo('host', 'port', { type = 'WRONG' }) == nil and errno() == errno.EINVAL
@@ -2870,7 +2870,32 @@ server:close()
 ---
 - true
 ...
-test_run:cmd("clear filter")
+--gh-4138 Check getaddrinfo() error.
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+---
+...
+s, err = socket:connect('hostname:3301');
+---
+...
+check_err(err);
+---
+- true
+...
+s, err = socket:bind('hostname:3301');
+---
+...
+check_err(err);
 ---
 - true
 ...
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index dab168f90..140baf22f 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -301,7 +301,7 @@ sc:close()
 -- tcp_connect
 
 -- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+socket.tcp_connect('127.0.0.1', 80, 0.00000000000001)
 
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -982,4 +982,21 @@ fiber.cancel(echo_fiber)
 client:read(1, 5) == ''
 server:close()
 
+--gh-4138 Check getaddrinfo() error.
+
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+
+s, err = socket:connect('hostname:3301');
+check_err(err);
+s, err = socket:bind('hostname:3301');
+check_err(err);
+
 test_run:cmd("clear filter")
diff --git a/test/box/net.box.result b/test/box/net.box.result
index 474297af3..8a9b26411 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -3796,6 +3796,25 @@ test_run:grep_log('default', '00000040:.*')
 ---
 - null
 ...
-box.cfg{log_level=log_level}
+--gh-4138 Check getaddrinfo() error.
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
 ---
 ...
+s = remote.connect('hostname:3301');
+---
+...
+check_err(s['error']);
+---
+- true
+...
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index bea43002d..13ed4e200 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -1538,4 +1538,19 @@ test_run:grep_log('default', '00000020:.*')
 test_run:grep_log('default', '00000030:.*')
 test_run:grep_log('default', '00000040:.*')
 
+--gh-4138 Check getaddrinfo() error.
+test_run:cmd("setopt delimiter ';'")
+
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+
+s = remote.connect('hostname:3301');
+check_err(s['error']);
+
 box.cfg{log_level=log_level}
-- 
2.20.1 (Apple Git-117)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [tarantool-patches] Re: [PATCH] lua: return getaddrinfo() errors
  2019-06-15 16:58 [tarantool-patches] [PATCH] lua: return getaddrinfo() errors Roman Khabibov
@ 2019-06-23 20:31 ` Alexander Turenko
  2019-06-25 13:38   ` [tarantool-patches] Re: [PATCH v2 1/2] " Roman Khabibov
  0 siblings, 1 reply; 20+ messages in thread
From: Alexander Turenko @ 2019-06-23 20:31 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches

Re C part of the commit (src/lib/core/coio_task.c).

The origin of the (rc < 0) check is the following commit (marked
relevant phrase with !!):

commit ea1da04d5add51c308efb3fd2d71cdfabed8411c
Author: Roman Tsisyk <roman@tsisyk.com>
Date:   Mon Dec 5 20:28:59 2016 +0300

    Refactor coio_task

    * Add coio_task_create() and coio_task_destroy()
    * Rename coio_task() to coio_task_post()
    * Fix the core invariant of coio_task_post():
      - On timeout or when fiber was cancelled set diag, return -1 and
        guarantee that on_timeout will be called somewhen.
      - Otherwise don't touch diag, don't call on_timeout callback,
        return 0. Please check task->base.result and task->diag
        to get the original return code and diag from the callback.
    * Change the return value of coio_task_post() to "int".
    * Add diag to coio_getaddrinfo() and fix a possible bug in replication;
!!    ignore uncoventional getaddrinfo(3) error codes for now.
    * Fix buggy box.snapshot() tests.

    Needed for #1954

I don't find any reason to ignoring 'unconventional' errors.

getaddrinfo() returns negative values on Linux, however it is not so on
Mac OS. Values and corresponding messages can be checked like so:

$ gcc -Wall -Wextra -x c <(echo -e '#include <sys/types.h>\n#include <sys/socket.h>\n#include <netdb.h>\n#include <stdio.h>\nint main() { printf("%d\\n", EAI_NONAME); return 0; }') && ./a.out; rm a.out
$ gcc -Wall -Wextra -x c <(echo -e '#include <sys/types.h>\n#include <sys/socket.h>\n#include <netdb.h>\n#include <stdio.h>\nint main() { printf("%s\\n", gai_strerror(EAI_NONAME)); return 0; }') && ./a.out; rm a.out

The commit message should mention Roman's commit and cleanly state that
(rc < 0) assumption does not work on Mac OS.

Aside of that there is nothing bad in calling gai_strerror() with some
unusual value, it just return "Unknown error" (I checked both Linux and
Mac OS).

I would add relevant test case for coio_getaddrinfo() into
test/unit/coio.cc.

Consider comments re Lua part of the commit below.

> Before this patch, branch when coio_getaddrinfo() returns
> getaddrinfo() errors has never reached. Add this errors into the
> socket and thenet.box modules.

Typo: thenet.box -> net.box.

Aside that I would add more investigation results and contracts change
(see other comments in this email).

> 
> Closes #4138
> ---
> 
> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4138-getaddrinfo
> Issue: https://github.com/tarantool/tarantool/issues/4138

> diff --git a/src/lib/core/coio_task.c b/src/lib/core/coio_task.c
> index 908b336ed..83f669d05 100644
> --- a/src/lib/core/coio_task.c
> +++ b/src/lib/core/coio_task.c
> @@ -413,7 +413,7 @@ coio_getaddrinfo(const char *host, const char *port,
>  		return -1; /* timed out or cancelled */
>  
>  	/* Task finished */
> -	if (task->rc < 0) {
> +	if (task->rc != 0) {

See comments at the start of the email.

> diff --git a/src/lua/socket.c b/src/lua/socket.c
> index 130378caf..5a8469ddf 100644
> --- a/src/lua/socket.c
> +++ b/src/lua/socket.c
> @@ -54,6 +54,7 @@
>  #include <fiber.h>
>  #include "lua/utils.h"
>  #include "lua/fiber.h"
> +#include "reflection.h"

It is only needed to deference 'err->type', so I would remove it with
this check.

>  
>  extern int coio_wait(int fd, int event, double timeout);
>  
> @@ -816,6 +817,11 @@ lbox_socket_getaddrinfo(struct lua_State *L)
>  
>  	if (dns_res != 0) {
>  		lua_pushnil(L);
> +		struct error *err = diag_get()->last;
> +		if (strcmp(err->type->name, "SystemError") == 0) {

I don't think that we should check a type here. Why not report any error
as is?

After that the convention for lbox_socket_getaddrinfo() will be simple:
it returns a table of results when successul; `nil`, err_msg otherwise.

Now it can return just `nil` or `nil`, err_msg -- the contract is more
complex.

Lua's code will be simpler if we'll simplify this contract.

I suggest to add a comment (in free form, but other reviewers can
enforce specific format) to the function that shows it contract
explicitly, like so: return <...> at success, otherwise return <...>.

> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
> index 2dba0a8d2..f0b432925 100644
> --- a/src/lua/socket.lua
> +++ b/src/lua/socket.lua
> @@ -1028,11 +1028,14 @@ local function tcp_connect(host, port, timeout)
>      end
>      local timeout = timeout or TIMEOUT_INFINITY
>      local stop = fiber.clock() + timeout
> -    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
> +    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>          protocol = 'tcp' })
>      if dns == nil or #dns == 0 then
> -        boxerrno(boxerrno.EINVAL)
> -        return nil
> +        if not err then
> +            boxerrno(boxerrno.EINVAL)
> +            return nil
> +        end
> +        return nil, err

Here we change the contract of socket.getaddrinfo() -- the user visible
function. I would mention that in the commit message.

> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
> index dab168f90..140baf22f 100644
> --- a/test/app/socket.test.lua
> +++ b/test/app/socket.test.lua
> @@ -301,7 +301,7 @@ sc:close()
>  -- tcp_connect
>  
>  -- test timeout
> -socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
> +socket.tcp_connect('127.0.0.1', 80, 0.00000000000001)

It seems this change is not related to your commit.

> +--gh-4138 Check getaddrinfo() error.
> +
> +test_run:cmd("setopt delimiter ';'")
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' then
> +            return true
> +    end
> +    return false
> +end;

Flush the delimiter to '' here.

Please, comment (outside of semicolon-delimiter block) that these
messages corresponds to the following codes:

* EAI_NONAME (Mac OS)
* EAI_SERVICE (Linux and Mac OS)
* EAI_NONAME (Linux)

It seems that you reveice either EAI_NONAME or EAI_SERVICE, not? Please,
elaborate. I guess it worth to see which certain error is returned by
getaddrinfo() here and check only for it (but consider difference
between Linux and Mac OS gai_strerror() messages if it is EAI_NONAME).

> +
> +s, err = socket:connect('hostname:3301');

I would use less generic hostname like 'non_exists_hostname'.

> diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
> index bea43002d..13ed4e200 100644
> --- a/test/box/net.box.test.lua
> +++ b/test/box/net.box.test.lua
> @@ -1538,4 +1538,19 @@ test_run:grep_log('default', '00000020:.*')
>  test_run:grep_log('default', '00000030:.*')
>  test_run:grep_log('default', '00000040:.*')
>  
> +--gh-4138 Check getaddrinfo() error.
> +test_run:cmd("setopt delimiter ';'")
> +
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' then
> +            return true
> +    end
> +    return false
> +end;

The same comments as above are applicable for this test case too.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [tarantool-patches] Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-06-23 20:31 ` [tarantool-patches] " Alexander Turenko
@ 2019-06-25 13:38   ` Roman Khabibov
  2019-07-09  8:04     ` Alexander Turenko
  0 siblings, 1 reply; 20+ messages in thread
From: Roman Khabibov @ 2019-06-25 13:38 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Alexander Turenko



> On Jun 23, 2019, at 11:31 PM, Alexander Turenko <alexander.turenko@tarantool.org> wrote:
> 
> Re C part of the commit (src/lib/core/coio_task.c).
> 
> The origin of the (rc < 0) check is the following commit (marked
> relevant phrase with !!):
> 
> commit ea1da04d5add51c308efb3fd2d71cdfabed8411c
> Author: Roman Tsisyk <roman@tsisyk.com>
> Date:   Mon Dec 5 20:28:59 2016 +0300
> 
>    Refactor coio_task
> 
>    * Add coio_task_create() and coio_task_destroy()
>    * Rename coio_task() to coio_task_post()
>    * Fix the core invariant of coio_task_post():
>      - On timeout or when fiber was cancelled set diag, return -1 and
>        guarantee that on_timeout will be called somewhen.
>      - Otherwise don't touch diag, don't call on_timeout callback,
>        return 0. Please check task->base.result and task->diag
>        to get the original return code and diag from the callback.
>    * Change the return value of coio_task_post() to "int".
>    * Add diag to coio_getaddrinfo() and fix a possible bug in replication;
> !!    ignore uncoventional getaddrinfo(3) error codes for now.
>    * Fix buggy box.snapshot() tests.
> 
>    Needed for #1954
> 
> I don't find any reason to ignoring 'unconventional' errors.
> 
> getaddrinfo() returns negative values on Linux, however it is not so on
> Mac OS. Values and corresponding messages can be checked like so:
> 
> $ gcc -Wall -Wextra -x c <(echo -e '#include <sys/types.h>\n#include <sys/socket.h>\n#include <netdb.h>\n#include <stdio.h>\nint main() { printf("%d\\n", EAI_NONAME); return 0; }') && ./a.out; rm a.out
> $ gcc -Wall -Wextra -x c <(echo -e '#include <sys/types.h>\n#include <sys/socket.h>\n#include <netdb.h>\n#include <stdio.h>\nint main() { printf("%s\\n", gai_strerror(EAI_NONAME)); return 0; }') && ./a.out; rm a.out
> 
> The commit message should mention Roman's commit and cleanly state that
> (rc < 0) assumption does not work on Mac OS.
> 
> Aside of that there is nothing bad in calling gai_strerror() with some
> unusual value, it just return "Unknown error" (I checked both Linux and
> Mac OS).
> 
> I would add relevant test case for coio_getaddrinfo() into
> test/unit/coio.cc.
@@ -72,7 +72,7 @@ static void
 test_getaddrinfo(void)
 {
 	header();
-	plan(1);
+	plan(3);
 	const char *host = "127.0.0.1";
 	const char *port = "3333";
 	struct addrinfo *i;
@@ -81,6 +81,11 @@ test_getaddrinfo(void)
 	is(rc, 0, "getaddrinfo");
 	freeaddrinfo(i);
 
+	/* gh-4138: Check getaddrinfo() error. */
+	isnt(coio_getaddrinfo("hostname", port, NULL, &i, 1), 0, "getaddrinfo error");
+	isnt(strstr(diag_get()->last->errmsg, "getaddrinfo"), NULL,
+	     "getaddrinfo error");
+
 	/*
 	 * gh-4209: 0 timeout should not be a special value and
 	 * detach a task. Before a fix it led to segfault

> Consider comments re Lua part of the commit below.
> 
>> Before this patch, branch when coio_getaddrinfo() returns
>> getaddrinfo() errors has never reached. Add this errors into the
>> socket and thenet.box modules.
> 
> Typo: thenet.box -> net.box.
Before this patch, branch when getaddrinfo() returns error codes
couldn't be reached on Mac OS, because they are greater than 0 on
Mac OS (checking for errors was rc < 0). Add this errors into the
socket and the net.box modules. Now getaddrinfo() can return a
pair of values (nil and error message) in case of error.

> 
> Aside that I would add more investigation results and contracts change
> (see other comments in this email).
> 
>> 
>> Closes #4138
>> ---
>> 
>> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4138-getaddrinfo
>> Issue: https://github.com/tarantool/tarantool/issues/4138
> 
>> diff --git a/src/lib/core/coio_task.c b/src/lib/core/coio_task.c
>> index 908b336ed..83f669d05 100644
>> --- a/src/lib/core/coio_task.c
>> +++ b/src/lib/core/coio_task.c
>> @@ -413,7 +413,7 @@ coio_getaddrinfo(const char *host, const char *port,
>> 		return -1; /* timed out or cancelled */
>> 
>> 	/* Task finished */
>> -	if (task->rc < 0) {
>> +	if (task->rc != 0) {
> 
> See comments at the start of the email.
> 
>> diff --git a/src/lua/socket.c b/src/lua/socket.c
>> index 130378caf..5a8469ddf 100644
>> --- a/src/lua/socket.c
>> +++ b/src/lua/socket.c
>> @@ -54,6 +54,7 @@
>> #include <fiber.h>
>> #include "lua/utils.h"
>> #include "lua/fiber.h"
>> +#include "reflection.h"
> 
> It is only needed to deference 'err->type', so I would remove it with
> this check.
@@ -816,7 +829,9 @@ lbox_socket_getaddrinfo(struct lua_State *L)
 
 	if (dns_res != 0) {
 		lua_pushnil(L);
-		return 1;
+		struct error *err = diag_get()->last;
+		lua_pushstring(L, err->errmsg);
+		return 2;
 	}

>> 
>> extern int coio_wait(int fd, int event, double timeout);
>> 
>> @@ -816,6 +817,11 @@ lbox_socket_getaddrinfo(struct lua_State *L)
>> 
>> 	if (dns_res != 0) {
>> 		lua_pushnil(L);
>> +		struct error *err = diag_get()->last;
>> +		if (strcmp(err->type->name, "SystemError") == 0) {
> 
> I don't think that we should check a type here. Why not report any error
> as is?
> 
> After that the convention for lbox_socket_getaddrinfo() will be simple:
> it returns a table of results when successul; `nil`, err_msg otherwise.
> 
> Now it can return just `nil` or `nil`, err_msg -- the contract is more
> complex.
> 
> Lua's code will be simpler if we'll simplify this contract.
> 
> I suggest to add a comment (in free form, but other reviewers can
> enforce specific format) to the function that shows it contract
> explicitly, like so: return <...> at success, otherwise return <…>.
@@ -774,6 +774,19 @@ lbox_getaddrinfo_result_wrapper(struct lua_State *L)
 	return 1;
 }
 
+/**
+ * Wrap coio_getaddrinfo() and call it. Push returned values onto
+ * @a L Lua stack.
+ *
+ * @param L Lua stack.
+ *
+ * @retval 1 Number of returned values by Lua function if
+ * coio_getaddrinfo() success.
+ * @retval 2 Number of returned values by Lua function if
+ * coio_getaddrinfo() is failed (fist is nil, second is error
+ * message).
+ * @retval One of luaT_error() codes.
+ */
 static int
 lbox_socket_getaddrinfo(struct lua_State *L)
 {

>> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
>> index 2dba0a8d2..f0b432925 100644
>> --- a/src/lua/socket.lua
>> +++ b/src/lua/socket.lua
>> @@ -1028,11 +1028,14 @@ local function tcp_connect(host, port, timeout)
>>     end
>>     local timeout = timeout or TIMEOUT_INFINITY
>>     local stop = fiber.clock() + timeout
>> -    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>> +    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>>         protocol = 'tcp' })
>>     if dns == nil or #dns == 0 then
>> -        boxerrno(boxerrno.EINVAL)
>> -        return nil
>> +        if not err then
>> +            boxerrno(boxerrno.EINVAL)
>> +            return nil
>> +        end
>> +        return nil, err
> 
> Here we change the contract of socket.getaddrinfo() -- the user visible
> function. I would mention that in the commit message.
Added.

>> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
>> index dab168f90..140baf22f 100644
>> --- a/test/app/socket.test.lua
>> +++ b/test/app/socket.test.lua
>> @@ -301,7 +301,7 @@ sc:close()
>> -- tcp_connect
>> 
>> -- test timeout
>> -socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
>> +socket.tcp_connect('127.0.0.1', 80, 0.00000000000001)
> 
> It seems this change is not related to your commit.
Removed.

>> +--gh-4138 Check getaddrinfo() error.
>> +
>> +test_run:cmd("setopt delimiter ';'")
>> +function check_err(err)
>> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
>> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
>> +       err == 'getaddrinfo: Name or service not known' then
>> +            return true
>> +    end
>> +    return false
>> +end;
> 
> Flush the delimiter to '' here.
> 
> Please, comment (outside of semicolon-delimiter block) that these
> messages corresponds to the following codes:
> 
> * EAI_NONAME (Mac OS)
> * EAI_SERVICE (Linux and Mac OS)
> * EAI_NONAME (Linux)
> 
> It seems that you reveice either EAI_NONAME or EAI_SERVICE, not? Please,
> elaborate. I guess it worth to see which certain error is returned by
> getaddrinfo() here and check only for it (but consider difference
> between Linux and Mac OS gai_strerror() messages if it is EAI_NONAME).
> 
>> +
>> +s, err = socket:connect('hostname:3301');
> 
> I would use less generic hostname like 'non_exists_hostname’.
@@ -982,4 +982,14 @@ fiber.cancel(echo_fiber)
 client:read(1, 5) == ''
 server:close()
 
+-- gh-4138 Check getaddrinfo() error. Error code and error message
+-- returned by getaddrinfo() depends on system's gai_strerror()
+-- and compiler. So that there is no checking for certain error
+-- message.
+
+s, err = socket:connect('non_exists_hostname:3301')
+string.find(err, 'getaddrinfo') ~= nil
+s, err = socket:bind('non_exists_hostname:3301')
+string.find(err, 'getaddrinfo') ~= nil
+

>> diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
>> index bea43002d..13ed4e200 100644
>> --- a/test/box/net.box.test.lua
>> +++ b/test/box/net.box.test.lua
>> @@ -1538,4 +1538,19 @@ test_run:grep_log('default', '00000020:.*')
>> test_run:grep_log('default', '00000030:.*')
>> test_run:grep_log('default', '00000040:.*')
>> 
>> +--gh-4138 Check getaddrinfo() error.
>> +test_run:cmd("setopt delimiter ';'")
>> +
>> +function check_err(err)
>> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
>> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
>> +       err == 'getaddrinfo: Name or service not known' then
>> +            return true
>> +    end
>> +    return false
>> +end;
> 
> The same comments as above are applicable for this test case too.
@@ -1538,4 +1538,12 @@ test_run:grep_log('default', '00000020:.*')
 test_run:grep_log('default', '00000030:.*')
 test_run:grep_log('default', '00000040:.*')
 
+-- gh-4138 Check getaddrinfo() error. Error code and error message
+-- returned by getaddrinfo() depends on system's gai_strerror()
+-- and compiler. So that there is no checking for certain error
+-- message.
+
+s = remote.connect('non_exists_hostname:3301')
+string.find(s['error'], 'getaddrinfo') ~= nil
+
 box.cfg{log_level=log_level}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [tarantool-patches] Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-06-25 13:38   ` [tarantool-patches] Re: [PATCH v2 1/2] " Roman Khabibov
@ 2019-07-09  8:04     ` Alexander Turenko
  2019-07-10  2:16       ` Roman Khabibov
  0 siblings, 1 reply; 20+ messages in thread
From: Alexander Turenko @ 2019-07-09  8:04 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches

> > The commit message should mention Roman's commit and cleanly state that
> > (rc < 0) assumption does not work on Mac OS.

This is not done.

> +	/* gh-4138: Check getaddrinfo() error. */
> +	isnt(coio_getaddrinfo("hostname", port, NULL, &i, 1), 0, "getaddrinfo error");

The line is longer then 80 symbols.

> +	isnt(strstr(diag_get()->last->errmsg, "getaddrinfo"), NULL,
> +	     "getaddrinfo error");

It would be good to have different case names.

The problem that stated in the issue is about improper error message. I don't
understand the test in this context: it does not check whether it is so. It is
applicable to lua tests too.

> + * Wrap coio_getaddrinfo() and call it. Push returned values onto
> + * @a L Lua stack.
> + *
> + * @param L Lua stack.
> + *
> + * @retval 1 Number of returned values by Lua function if
> + * coio_getaddrinfo() success.
> + * @retval 2 Number of returned values by Lua function if
> + * coio_getaddrinfo() is failed (fist is nil, second is error

Typo: fist -> first.

> + * message).
> + * @retval One of luaT_error() codes.

'luaT_error() code' is the strange phrase. I guess you want to say that
the function can raise a lua error. It is not a return value, because a
function does not return at all in the case (it is longjmp to be
precise).

> >> +s, err = socket:connect('hostname:3301');
> > 
> > I would use less generic hostname like 'non_exists_hostname’.

The comment is still actual for test/unit/coio.cc.

(I have copied parts of your patch below to comment them.)

> @@ -1347,10 +1350,10 @@ local function lsocket_tcp_connect(self, host, port)
>      -- This function is broken by design
>      local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>      local timeout = deadline - fiber.clock()
> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>      if dns == nil or #dns == 0 then
> -        self._errno = boxerrno.EINVAL
> -        return nil, socket_error(self)
> +         self._errno = boxerrno.EINVAL

Broken indent.

> @@ -1534,9 +1537,12 @@ local function lsocket_connect(host, port)
>      if host == nil or port == nil then
>          error("Usage: luasocket.connect(host, port)")
>      end
> -    local s = tcp_connect(host, port)
> +    local s, err = tcp_connect(host, port)
>      if not s then
> -        return nil, boxerrno.strerror()
> +        if not err then

> +            return nil, boxerrno.strerror()
> +        end
> +        return nil, err

Why not always set errno to EINVAL and return err, nil in case on an error? It
seems the 'if' here is redundant.

Also applicable to similar changes in other lua functions.

>      for i, remote in pairs(dns) do
>          timeout = stop - fiber.clock()
>          if timeout <= 0 then
>              boxerrno(boxerrno.ETIMEDOUT)
> -            return nil
> +            return nil, 'timed out'
>          end
>          local s = socket_new(remote.family, remote.type, remote.protocol)
>          if s then

Do you really think this change is self-explanatory and it is obvious
why this is needed (and why this was not needed before)?

Yep, we discussed it voicely, but I'm not a last reader of your code.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [tarantool-patches] Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-07-09  8:04     ` Alexander Turenko
@ 2019-07-10  2:16       ` Roman Khabibov
  2019-07-23 12:39         ` Alexander Turenko
  0 siblings, 1 reply; 20+ messages in thread
From: Roman Khabibov @ 2019-07-10  2:16 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Alexander Turenko


> On Jul 9, 2019, at 11:04, Alexander Turenko <alexander.turenko@tarantool.org> wrote:
> 
>>> The commit message should mention Roman's commit and cleanly state that
>>> (rc < 0) assumption does not work on Mac OS.
> 
> This is not done.
    lua: return getaddrinfo() errors
    
    Before this patch, branch when getaddrinfo() returns error codes
    couldn't be reached on Mac OS, because they are greater than 0 on
    Mac OS (assumption "rc < 0" in commit ea1da04d5 is incorret for
    Mac OS). Add this errors into the socket and the net.box modules.
    Now getaddrinfo() can return a pair of values (nil and error
    message) in case of error.
    
    Closes #4138

>> +	/* gh-4138: Check getaddrinfo() error. */
>> +	isnt(coio_getaddrinfo("hostname", port, NULL, &i, 1), 0, "getaddrinfo error");
> 
> The line is longer then 80 symbols
>> +	isnt(strstr(diag_get()->last->errmsg, "getaddrinfo"), NULL,
>> +	     "getaddrinfo error");
> 
> It would be good to have different case names.
+	/* gh-4138: Check getaddrinfo() error. */
+	isnt(coio_getaddrinfo("non_exists_hostname", port, NULL, &i, 1), 0,
+	     "getaddrinfo error");
+	isnt(strstr(diag_get()->last->errmsg, "getaddrinfo"), NULL,
+	     "getaddrinfo error message");
+

> The problem that stated in the issue is about improper error message. I don't
> understand the test in this context: it does not check whether it is so. It is
> applicable to lua tests too.
>I would add relevant test case for coio_getaddrinfo() into
>test/unit/coio.cc.
I added this for this reason.
Why? Now, error message is proper, and I check it by grep:
+-- gh-4138 Check getaddrinfo() error. Error code and error message
+-- returned by getaddrinfo() depends on system's gai_strerror()
+-- and compiler. So that there is no checking for certain error
+-- message.
+
+s, err = socket:connect('non_exists_hostname:3301')
+string.find(err, 'getaddrinfo') ~= nil
+s, err = socket:bind('non_exists_hostname:3301')
+string.find(err, 'getaddrinfo') ~= nil
+
Same about net.box test.
>> + * Wrap coio_getaddrinfo() and call it. Push returned values onto
>> + * @a L Lua stack.
>> + *
>> + * @param L Lua stack.
>> + *
>> + * @retval 1 Number of returned values by Lua function if
>> + * coio_getaddrinfo() success.
>> + * @retval 2 Number of returned values by Lua function if
>> + * coio_getaddrinfo() is failed (fist is nil, second is error
> 
> Typo: fist -> first.
> 
>> + * message).
>> + * @retval One of luaT_error() codes.
> 
> 'luaT_error() code' is the strange phrase. I guess you want to say that
> the function can raise a lua error. It is not a return value, because a
> function does not return at all in the case (it is longjmp to be
> precise).
+/**
+ * Wrap coio_getaddrinfo() and call it. Push returned values onto
+ * @a L Lua stack. In case of error raise a Lua error.
+ *
+ * @param L Lua stack.
+ *
+ * @retval 1 Number of returned values by Lua function if
+ * coio_getaddrinfo() success.
+ * @retval 2 Number of returned values by Lua function if
+ * coio_getaddrinfo() is failed (first is nil, second is error
+ * message).
+ */

>>>> +s, err = socket:connect('hostname:3301');
>>> 
>>> I would use less generic hostname like 'non_exists_hostname’.
> 
> The comment is still actual for test/unit/coio.cc.
Done.

> (I have copied parts of your patch below to comment them.)
> 
>> @@ -1347,10 +1350,10 @@ local function lsocket_tcp_connect(self, host, port)
>>     -- This function is broken by design
>>     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>>     local timeout = deadline - fiber.clock()
>> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
>> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>>     if dns == nil or #dns == 0 then
>> -        self._errno = boxerrno.EINVAL
>> -        return nil, socket_error(self)
>> +         self._errno = boxerrno.EINVAL
> 
> Broken indent.
@@ -1347,10 +1356,10 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
     if dns == nil or #dns == 0 then
         self._errno = boxerrno.EINVAL
-        return nil, socket_error(self)
+        return nil, err
     end
     for _, remote in ipairs(dns) do
         timeout = deadline - fiber.clock()

>> @@ -1534,9 +1537,12 @@ local function lsocket_connect(host, port)
>>     if host == nil or port == nil then
>>         error("Usage: luasocket.connect(host, port)")
>>     end
>> -    local s = tcp_connect(host, port)
>> +    local s, err = tcp_connect(host, port)
>>     if not s then
>> -        return nil, boxerrno.strerror()
>> +        if not err then
> 
>> +            return nil, boxerrno.strerror()
>> +        end
>> +        return nil, err
> 
> Why not always set errno to EINVAL and return err, nil in case on an error? It
> seems the 'if' here is redundant.
I think that if we remove this ‘if', the behaviour of these methods
(lsocket_connect(), lsocket_bind()) will be strange. In some cases error
message is present, in some cases it is not, because tcp_connect() can return
just nil without err.

> Also applicable to similar changes in other lua functions.
> 
>>     for i, remote in pairs(dns) do
>>         timeout = stop - fiber.clock()
>>         if timeout <= 0 then
>>             boxerrno(boxerrno.ETIMEDOUT)
>> -            return nil
>> +            return nil, 'timed out'
>>         end
>>         local s = socket_new(remote.family, remote.type, remote.protocol)
>>         if s then
> 
> Do you really think this change is self-explanatory and it is obvious
> why this is needed (and why this was not needed before)?
> 
> Yep, we discussed it voicely, but I'm not a last reader of your code.
-            return nil
+            -- Second arg added to do the behaviour of this
+            -- function symmetrical in case of timeout on both
+            -- Linux and Mac OS. On Mac OS error message 'timed
+            -- out' is thrown by getaddrinfo(). On Linux the
+            -- behaviour of getaddrinfo() in this case isn't same
+            -- and timeout is checked here.
+            return nil, 'timed out'

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-07-10  2:16       ` Roman Khabibov
@ 2019-07-23 12:39         ` Alexander Turenko
  2019-07-26 13:48           ` Alexander Turenko
  2019-08-05 13:36           ` Roman Khabibov
  0 siblings, 2 replies; 20+ messages in thread
From: Alexander Turenko @ 2019-07-23 12:39 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches, Vladimir Davydov

Please, split Mac OS specific fix (I think both coio_task.c and say.c
can be fixed in one commit) from OS independent Lua change (that just
pass received error to a user).

Returning either `nil` or `nil, err` still look strange for me. Please,
try to implement the following way: set an additional field in a socket
object (we have '_errno', let's add another one: say, '_error') with a
message from a diagnostics and return it from socket_error() if it
exists. Don't forget to flush it when update any of those two fields (I
guess an auxiliary function to set _errno and _error).

Don't sure how should we check whether a diag is set. Maybe flush it
before a call to C and then check whether it was set afterwards.

There are places where strerror() is called directly to return `nil,
err`. They should be replaced with socket_error().

I think it worth to introduce socket_clear_error() and
socket_set_error() in a separate commit w/o any logic change.

CCed Vladimir, because we discussed possible approaches with him.

WBR, Alexander Turenko.

> >>> The commit message should mention Roman's commit and cleanly state that
> >>> (rc < 0) assumption does not work on Mac OS.
> > 
> > This is not done.
>     lua: return getaddrinfo() errors
>     
>     Before this patch, branch when getaddrinfo() returns error codes
>     couldn't be reached on Mac OS, because they are greater than 0 on
>     Mac OS (assumption "rc < 0" in commit ea1da04d5 is incorret for
>     Mac OS). Add this errors into the socket and the net.box modules.
>     Now getaddrinfo() can return a pair of values (nil and error
>     message) in case of error.
>     
>     Closes #4138

Typo: incorret -> incorrect.

> > The problem that stated in the issue is about improper error message. I don't
> > understand the test in this context: it does not check whether it is so. It is
> > applicable to lua tests too.
> >I would add relevant test case for coio_getaddrinfo() into
> >test/unit/coio.cc.
> I added this for this reason.
> Why? Now, error message is proper, and I check it by grep:
> +-- gh-4138 Check getaddrinfo() error. Error code and error message
> +-- returned by getaddrinfo() depends on system's gai_strerror()
> +-- and compiler. So that there is no checking for certain error
> +-- message.
> +
> +s, err = socket:connect('non_exists_hostname:3301')
> +string.find(err, 'getaddrinfo') ~= nil
> +s, err = socket:bind('non_exists_hostname:3301')
> +string.find(err, 'getaddrinfo') ~= nil
> +
> Same about net.box test.


It does not depend on compiler, but on libc. Anyway, if there are two
variants of an error message, just check that it is one of them. I saw
this check in a previous verison of the patch (but it was checked
against three messages; don't know why).

> >> @@ -1534,9 +1537,12 @@ local function lsocket_connect(host, port)
> >>     if host == nil or port == nil then
> >>         error("Usage: luasocket.connect(host, port)")
> >>     end
> >> -    local s = tcp_connect(host, port)
> >> +    local s, err = tcp_connect(host, port)
> >>     if not s then
> >> -        return nil, boxerrno.strerror()
> >> +        if not err then
> > 
> >> +            return nil, boxerrno.strerror()
> >> +        end
> >> +        return nil, err
> > 
> > Why not always set errno to EINVAL and return err, nil in case on an error? It
> > seems the 'if' here is redundant.
> I think that if we remove this ‘if', the behaviour of these methods
> (lsocket_connect(), lsocket_bind()) will be strange. In some cases error
> message is present, in some cases it is not, because tcp_connect() can return
> just nil without err.

Hope this will become non-actual with the way proposed above.

> 
> > Also applicable to similar changes in other lua functions.
> > 
> >>     for i, remote in pairs(dns) do
> >>         timeout = stop - fiber.clock()
> >>         if timeout <= 0 then
> >>             boxerrno(boxerrno.ETIMEDOUT)
> >> -            return nil
> >> +            return nil, 'timed out'
> >>         end
> >>         local s = socket_new(remote.family, remote.type, remote.protocol)
> >>         if s then
> > 
> > Do you really think this change is self-explanatory and it is obvious
> > why this is needed (and why this was not needed before)?
> > 
> > Yep, we discussed it voicely, but I'm not a last reader of your code.
> -            return nil
> +            -- Second arg added to do the behaviour of this
> +            -- function symmetrical in case of timeout on both
> +            -- Linux and Mac OS. On Mac OS error message 'timed
> +            -- out' is thrown by getaddrinfo(). On Linux the
> +            -- behaviour of getaddrinfo() in this case isn't same
> +            -- and timeout is checked here.
> +            return nil, 'timed out'

It is unclear why the difference between OSes exists here. I don't sure,
but this can be flaky behaviour. We check timeout in coio_task and set a
TimedOut diag (with 'timed out' message) for address resolving timeout.
Here we (most likely) will get timeout if connect() takes a long time. I
guess your fix is for the same problem that Mergen already fixed in
79f876adee1e11961db7840381bea7a18fe18180 ('box: increase connection
timeout in "net.box.test.lua"'). If so this change is not needed
anymore.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-07-23 12:39         ` Alexander Turenko
@ 2019-07-26 13:48           ` Alexander Turenko
  2019-08-05 13:36           ` Roman Khabibov
  1 sibling, 0 replies; 20+ messages in thread
From: Alexander Turenko @ 2019-07-26 13:48 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches, Vladimir Davydov

On Tue, Jul 23, 2019 at 03:39:54PM +0300, Alexander Turenko wrote:
> Please, split Mac OS specific fix (I think both coio_task.c and say.c
> can be fixed in one commit) from OS independent Lua change (that just
> pass received error to a user).
> 
> Returning either `nil` or `nil, err` still look strange for me. Please,
> try to implement the following way: set an additional field in a socket
> object (we have '_errno', let's add another one: say, '_error') with a
> message from a diagnostics and return it from socket_error() if it
> exists. Don't forget to flush it when update any of those two fields (I
> guess an auxiliary function to set _errno and _error).
> 
> Don't sure how should we check whether a diag is set. Maybe flush it
> before a call to C and then check whether it was set afterwards.
> 
> There are places where strerror() is called directly to return `nil,
> err`. They should be replaced with socket_error().
> 
> I think it worth to introduce socket_clear_error() and
> socket_set_error() in a separate commit w/o any logic change.
> 
> CCed Vladimir, because we discussed possible approaches with him.
> 
> WBR, Alexander Turenko.

We look into the code with Roman together and now I understood that the
proposed way does not work: we have no a socket object in getaddrinfo()
Lua function to save _error into it.

We should return a text error message as retvals anyway.

There is the question about constructions like so:

local val, err = foo(<...>)
if val == nil then
    if err == nil then
        return boxerrno.strerror()
    else
        return err
    end
end

I think foo() should either always return `nil, string` at an error or
always set an errno. I propose to try to implement this way and compare
whther it will be simper.

WBR, Alexander Turenko.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [tarantool-patches] Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-07-23 12:39         ` Alexander Turenko
  2019-07-26 13:48           ` Alexander Turenko
@ 2019-08-05 13:36           ` Roman Khabibov
  2019-08-29  0:45             ` Alexander Turenko
  1 sibling, 1 reply; 20+ messages in thread
From: Roman Khabibov @ 2019-08-05 13:36 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Alexander Turenko

The problem that stated in the issue is about improper error message. I don't
>>> understand the test in this context: it does not check whether it is so. It is
>>> applicable to lua tests too.
>>> I would add relevant test case for coio_getaddrinfo() into
>>> test/unit/coio.cc.
>> I added this for this reason.
>> Why? Now, error message is proper, and I check it by grep:
>> +-- gh-4138 Check getaddrinfo() error. Error code and error message
>> +-- returned by getaddrinfo() depends on system's gai_strerror()
>> +-- and compiler. So that there is no checking for certain error
>> +-- message.
>> +
>> +s, err = socket:connect('non_exists_hostname:3301')
>> +string.find(err, 'getaddrinfo') ~= nil
>> +s, err = socket:bind('non_exists_hostname:3301')
>> +string.find(err, 'getaddrinfo') ~= nil
>> +
>> Same about net.box test.
> 
> 
> It does not depend on compiler, but on libc. Anyway, if there are two
> variants of an error message, just check that it is one of them. I saw
> this check in a previous verison of the patch (but it was checked
> against three messages; don't know why).
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror(). So that there is no checking for
+-- certain error message.
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s, err = socket:connect('non_exists_hostname:3301')
+check_err(err) == true
+
 test_run:cmd("clear filter”)

>> 
>>> Also applicable to similar changes in other lua functions.
>>> 
>>>>    for i, remote in pairs(dns) do
>>>>        timeout = stop - fiber.clock()
>>>>        if timeout <= 0 then
>>>>            boxerrno(boxerrno.ETIMEDOUT)
>>>> -            return nil
>>>> +            return nil, 'timed out'
>>>>        end
>>>>        local s = socket_new(remote.family, remote.type, remote.protocol)
>>>>        if s then
>>> 
>>> Do you really think this change is self-explanatory and it is obvious
>>> why this is needed (and why this was not needed before)?
>>> 
>>> Yep, we discussed it voicely, but I'm not a last reader of your code.
>> -            return nil
>> +            -- Second arg added to do the behaviour of this
>> +            -- function symmetrical in case of timeout on both
>> +            -- Linux and Mac OS. On Mac OS error message 'timed
>> +            -- out' is thrown by getaddrinfo(). On Linux the
>> +            -- behaviour of getaddrinfo() in this case isn't same
>> +            -- and timeout is checked here.
>> +            return nil, 'timed out'
> 
> It is unclear why the difference between OSes exists here. I don't sure,
> but this can be flaky behaviour. We check timeout in coio_task and set a
> TimedOut diag (with 'timed out' message) for address resolving timeout.
> Here we (most likely) will get timeout if connect() takes a long time. I
> guess your fix is for the same problem that Mergen already fixed in
> 79f876adee1e11961db7840381bea7a18fe18180 ('box: increase connection
> timeout in "net.box.test.lua"'). If so this change is not needed
> anymore.
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value on Mac OS from internal.getaddrinfo (check for
+-- timeout in coio_task and setting a TimedOut diag (with
+-- 'timed out' message) for address resolving timeout). On Linux,
+-- it is not occurred and time-out is checked in tcp_connect().
+-- This difference has appeared after gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+s == nil


commit 1e7d16ffa5c1908cac83ddee4e0f6cf42fce55e5
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Tue Jul 30 15:49:13 2019 +0300

    lua: return getaddrinfo() errors
    
    Add getaddrinfo() errors into the several fuctions of socket. Now
    getaddrinfo() can return a pair of values (nil and error message)
    in case of error.
    
    Closes #4138

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index 31a8c16b7..beab3b0a7 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -150,9 +150,12 @@ local function next_id(id) return band(id + 1, 0x7FFFFFFF) end
 local function establish_connection(host, port, timeout)
     local timeout = timeout or DEFAULT_CONNECT_TIMEOUT
     local begin = fiber.clock()
-    local s = socket.tcp_connect(host, port, timeout)
+    local s, err = socket.tcp_connect(host, port, timeout)
     if not s then
-        return nil, errno.strerror(errno())
+        if not err then
+            return nil, errno.strerror(errno())
+        end
+        return nil, err
     end
     local msg = s:read({chunk = IPROTO_GREETING_SIZE},
                         timeout - (fiber.clock() - begin))
diff --git a/src/lua/socket.c b/src/lua/socket.c
index 130378caf..b4282335f 100644
--- a/src/lua/socket.c
+++ b/src/lua/socket.c
@@ -774,6 +774,18 @@ lbox_getaddrinfo_result_wrapper(struct lua_State *L)
 	return 1;
 }
 
+/**
+ * Wrap coio_getaddrinfo() and call it. Push returned values onto
+ * @a L Lua stack. Raise a Lua error in case of error.
+ *
+ * @param L Lua stack.
+ *
+ * @retval 1 Number of returned values by Lua function if
+ * coio_getaddrinfo() success.
+ * @retval 2 Number of returned values by Lua function if
+ * coio_getaddrinfo() is failed (first is nil, second is error
+ * message).
+ */
 static int
 lbox_socket_getaddrinfo(struct lua_State *L)
 {
@@ -816,7 +828,9 @@ lbox_socket_getaddrinfo(struct lua_State *L)
 
 	if (dns_res != 0) {
 		lua_pushnil(L);
-		return 1;
+		struct error *err = diag_get()->last;
+		lua_pushstring(L, err->errmsg);
+		return 2;
 	}
 
 	/* no results */
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index a334ad45b..324c5fbb5 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -1041,9 +1041,12 @@ local function tcp_connect(host, port, timeout)
     end
     local timeout = timeout or TIMEOUT_INFINITY
     local stop = fiber.clock() + timeout
-    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
+    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
         protocol = 'tcp' })
-    if dns == nil or #dns == 0 then
+    if dns == nil then
+        return nil, err
+    end
+    if #dns == 0 then
         boxerrno(boxerrno.EINVAL)
         return nil
     end
@@ -1360,10 +1363,9 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
     if dns == nil or #dns == 0 then
-        self._errno = boxerrno.EINVAL
-        return nil, socket_error(self)
+        return nil, err
     end
     for _, remote in ipairs(dns) do
         timeout = deadline - fiber.clock()
@@ -1547,9 +1549,12 @@ local function lsocket_connect(host, port)
     if host == nil or port == nil then
         error("Usage: luasocket.connect(host, port)")
     end
-    local s = tcp_connect(host, port)
+    local s, err = tcp_connect(host, port)
     if not s then
-        return nil, boxerrno.strerror()
+        if not err then
+            return nil, boxerrno.strerror()
+        end
+        return nil, err
     end
     setmetatable(s, lsocket_tcp_client_mt)
     return s
diff --git a/test/app/socket.result b/test/app/socket.result
index 77eff7370..a361c378e 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -941,10 +941,18 @@ sc:close()
 - true
 ...
 -- tcp_connect
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value on Mac OS from internal.getaddrinfo (check for
+-- timeout in coio_task and setting a TimedOut diag (with
+-- 'timed out' message) for address resolving timeout). On Linux,
+-- it is not occurred and time-out is checked in tcp_connect().
+-- This difference has appeared after gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
 ---
-- null
+...
+s == nil
+---
+- true
 ...
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -1664,7 +1672,7 @@ fio.stat(path) == nil
 { socket.tcp_connect('abrakadabra#123') == nil, errno.strerror() }
 ---
 - - true
-  - Invalid argument
+  - Input/output error
 ...
 -- wrong options for getaddrinfo
 socket.getaddrinfo('host', 'port', { type = 'WRONG' }) == nil and errno() == errno.EINVAL
@@ -2870,6 +2878,35 @@ server:close()
 ---
 - true
 ...
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror(). So that there is no checking for
+-- certain error message.
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s, err = socket:connect('non_exists_hostname:3301')
+---
+...
+check_err(err) == true
+---
+- true
+...
 test_run:cmd("clear filter")
 ---
 - true
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index 7ae9a98aa..600eaf2a6 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -300,8 +300,14 @@ sc:close()
 
 -- tcp_connect
 
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value on Mac OS from internal.getaddrinfo (check for
+-- timeout in coio_task and setting a TimedOut diag (with
+-- 'timed out' message) for address resolving timeout). On Linux,
+-- it is not occurred and time-out is checked in tcp_connect().
+-- This difference has appeared after gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+s == nil
 
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -982,6 +988,24 @@ fiber.cancel(echo_fiber)
 client:read(1, 5) == ''
 server:close()
 
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror(). So that there is no checking for
+-- certain error message.
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s, err = socket:connect('non_exists_hostname:3301')
+check_err(err) == true
+
 test_run:cmd("clear filter")
 
 -- case: sicket receive inconsistent behavior
diff --git a/test/box/net.box.result b/test/box/net.box.result
index e3dabf7d9..2c0219c24 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -3927,6 +3927,35 @@ test_run:grep_log('default', '00000040:.*')
 ---
 - null
 ...
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror(). So that there is no checking for
+-- certain error message.
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s = remote.connect('non_exists_hostname:3301')
+---
+...
+check_err(s['error']) == true
+---
+- true
+...
 box.cfg{log_level=log_level}
 ---
 ...
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 8e65ff470..a6d98da00 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -1582,4 +1582,22 @@ test_run:wait_log('default', '00000030:.*', nil, 10)
 -- we expect nothing below, so don't wait
 test_run:grep_log('default', '00000040:.*')
 
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror(). So that there is no checking for
+-- certain error message.
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s = remote.connect('non_exists_hostname:3301')
+check_err(s['error']) == true
+
 box.cfg{log_level=log_level}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [tarantool-patches] Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-08-05 13:36           ` Roman Khabibov
@ 2019-08-29  0:45             ` Alexander Turenko
       [not found]               ` <868EAF2C-A491-46C9-AD37-7512D6CAB213@tarantool.org>
  0 siblings, 1 reply; 20+ messages in thread
From: Alexander Turenko @ 2019-08-29  0:45 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches

I'm okay here with the minimalistic approach when we only change those
functions that were requested to be changed.

I have several comments below.

WBR, Alexander Turenko.

On Mon, Aug 05, 2019 at 04:36:46PM +0300, Roman Khabibov wrote:
> The problem that stated in the issue is about improper error message. I don't
> >>> understand the test in this context: it does not check whether it is so. It is
> >>> applicable to lua tests too.
> >>> I would add relevant test case for coio_getaddrinfo() into
> >>> test/unit/coio.cc.
> >> I added this for this reason.
> >> Why? Now, error message is proper, and I check it by grep:
> >> +-- gh-4138 Check getaddrinfo() error. Error code and error message
> >> +-- returned by getaddrinfo() depends on system's gai_strerror()
> >> +-- and compiler. So that there is no checking for certain error
> >> +-- message.
> >> +
> >> +s, err = socket:connect('non_exists_hostname:3301')
> >> +string.find(err, 'getaddrinfo') ~= nil
> >> +s, err = socket:bind('non_exists_hostname:3301')
> >> +string.find(err, 'getaddrinfo') ~= nil
> >> +
> >> Same about net.box test.
> > 
> > 
> > It does not depend on compiler, but on libc. Anyway, if there are two
> > variants of an error message, just check that it is one of them. I saw
> > this check in a previous verison of the patch (but it was checked
> > against three messages; don't know why).
> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
> +-- Error code and error message returned by getaddrinfo() depends
> +-- on system's gai_strerror(). So that there is no checking for
> +-- certain error message.

It checks for certain error messages now.

> +test_run:cmd("setopt delimiter ';'")
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' then
> +            return true
> +    end
> +    return false
> +end;
> +test_run:cmd("setopt delimiter ''");
> +
> +s, err = socket:connect('non_exists_hostname:3301')
> +check_err(err) == true
> +
>  test_run:cmd("clear filter”)

It is better to verify it against ffi.C.gai_strerror(EAI_AGAIN)
ffi.C.gai_strerror(EAI_NONAME) and don't rely on the fact that those
messages don't changed across libc versions. The same for other tests.

> 
> >> 
> >>> Also applicable to similar changes in other lua functions.
> >>> 
> >>>>    for i, remote in pairs(dns) do
> >>>>        timeout = stop - fiber.clock()
> >>>>        if timeout <= 0 then
> >>>>            boxerrno(boxerrno.ETIMEDOUT)
> >>>> -            return nil
> >>>> +            return nil, 'timed out'
> >>>>        end
> >>>>        local s = socket_new(remote.family, remote.type, remote.protocol)
> >>>>        if s then
> >>> 
> >>> Do you really think this change is self-explanatory and it is obvious
> >>> why this is needed (and why this was not needed before)?
> >>> 
> >>> Yep, we discussed it voicely, but I'm not a last reader of your code.
> >> -            return nil
> >> +            -- Second arg added to do the behaviour of this
> >> +            -- function symmetrical in case of timeout on both
> >> +            -- Linux and Mac OS. On Mac OS error message 'timed
> >> +            -- out' is thrown by getaddrinfo(). On Linux the
> >> +            -- behaviour of getaddrinfo() in this case isn't same
> >> +            -- and timeout is checked here.
> >> +            return nil, 'timed out'
> > 
> > It is unclear why the difference between OSes exists here. I don't sure,
> > but this can be flaky behaviour. We check timeout in coio_task and set a
> > TimedOut diag (with 'timed out' message) for address resolving timeout.
> > Here we (most likely) will get timeout if connect() takes a long time. I
> > guess your fix is for the same problem that Mergen already fixed in
> > 79f876adee1e11961db7840381bea7a18fe18180 ('box: increase connection
> > timeout in "net.box.test.lua"'). If so this change is not needed
> > anymore.
> -socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
> +-- Test timeout. In this test, tcp_connect can return the second
> +-- output value on Mac OS from internal.getaddrinfo (check for
> +-- timeout in coio_task and setting a TimedOut diag (with
> +-- 'timed out' message) for address resolving timeout). On Linux,
> +-- it is not occurred and time-out is checked in tcp_connect().
> +-- This difference has appeared after gh-4138 patch.
> +s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
> +s == nil

I tested it: it is flaky on Mac OS: sometimes getaddrinfo is timed out,
sometimes connect. On Linux however getaddrinfo is fast enough to never
give timeout error in the case. But if we'll add a busy loop into
getaddrinfo_cb, then the situation becomes the same.

So I would not say that it is not appears on Linux: it is possible
theoretically. The root of the problem is not in OS differences, but
that there are two sources of timeout errors that are reported
differently. Please, describe the problem in OS agnostic way, because it
is not OS dependent by its nature.

I'm not against mention that **usually** Mac OS behaves in one way, but
Linux **usually** in another: you can mention it afterwards if you wish.

> diff --git a/src/lua/socket.c b/src/lua/socket.c
> index 130378caf..b4282335f 100644
> --- a/src/lua/socket.c
> +++ b/src/lua/socket.c
> @@ -774,6 +774,18 @@ lbox_getaddrinfo_result_wrapper(struct lua_State *L)
>  	return 1;
>  }
>  
> +/**
> + * Wrap coio_getaddrinfo() and call it. Push returned values onto
> + * @a L Lua stack. Raise a Lua error in case of error.

It does not raise an error usually. As I see (consider e04413b1d0) it
can raise an error only in case of 'not enough memory' Lua error. I
would ignore this and remove the sentence. You already described how
errors are reported (in retvals below), this is enough.

> + *
> + * @param L Lua stack.
> + *
> + * @retval 1 Number of returned values by Lua function if
> + * coio_getaddrinfo() success.
> + * @retval 2 Number of returned values by Lua function if
> + * coio_getaddrinfo() is failed (first is nil, second is error
> + * message).
> + */
>  static int
>  lbox_socket_getaddrinfo(struct lua_State *L)
>  {

> @@ -1360,10 +1363,9 @@ local function lsocket_tcp_connect(self, host, port)
>      -- This function is broken by design
>      local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>      local timeout = deadline - fiber.clock()
> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>      if dns == nil or #dns == 0 then
> -        self._errno = boxerrno.EINVAL
> -        return nil, socket_error(self)
> +        return nil, err

If dns is nil, then we should set self._errno to boxerrno() value and
return err.

If #dns is 0, then we should set self._errno to boxerrno() value too and
return socket_error(self).

> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
> +-- Error code and error message returned by getaddrinfo() depends
> +-- on system's gai_strerror(). So that there is no checking for
> +-- certain error message.
> +test_run:cmd("setopt delimiter ';'")
> +---
> +- true
> +...
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' then
> +            return true
> +    end
> +    return false
> +end;
> +---
> +...
> +test_run:cmd("setopt delimiter ''");
> +---
> +- true
> +...
> +s, err = socket:connect('non_exists_hostname:3301')

socket.connect() should be called, the colon here is the mistake.

I think we should test both native and Lua Socket API, because both were
changed.

Should we ever change Lua Socket API? Please, verify it with a
documentation and an actual behaviour of the external module. We emulate
it, so we should be compatible.

I think it also worth to test socket.getaddrinfo() function.

> +---
> +...
> +check_err(err) == truea

== true is redundant.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [tarantool-patches] Re: [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
       [not found]               ` <868EAF2C-A491-46C9-AD37-7512D6CAB213@tarantool.org>
@ 2019-09-06 13:45                 ` Alexander Turenko
  2019-09-10 12:54                   ` Roman Khabibov
  0 siblings, 1 reply; 20+ messages in thread
From: Alexander Turenko @ 2019-09-06 13:45 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches

> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
> +-- Error code and error message returned by getaddrinfo() depends
> +-- on system's gai_strerror().
> 
> >> +test_run:cmd("setopt delimiter ';'")
> >> +function check_err(err)
> >> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> >> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> >> +       err == 'getaddrinfo: Name or service not known' then
> >> +            return true
> >> +    end
> >> +    return false
> >> +end;
> >> +test_run:cmd("setopt delimiter ''");
> >> +
> >> +s, err = socket:connect('non_exists_hostname:3301')
> >> +check_err(err) == true
> >> +
> >> test_run:cmd("clear filter”)
> > 
> > It is better to verify it against ffi.C.gai_strerror(EAI_AGAIN)
> > ffi.C.gai_strerror(EAI_NONAME) and don't rely on the fact that those
> > messages don't changed across libc versions. The same for other tests.
> Don’t know how to export this macroses/enums to Lua. More, it is too
> complicated, because we need to covert cdata to Lua string.

We discussed it voicely with Roman. The plan was to add needed constants
to socket.c and expose into Lua as strings. We however don't expose
GAI_* error codes, just messages from gai_strerror(). So maybe adding
them just for tests isn't really worthful.

> >> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
> >> +-- Error code and error message returned by getaddrinfo() depends
> >> +-- on system's gai_strerror(). So that there is no checking for
> >> +-- certain error message.
> >> +test_run:cmd("setopt delimiter ';'")
> >> +---
> >> +- true
> >> +...
> >> +function check_err(err)
> >> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> >> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> >> +       err == 'getaddrinfo: Name or service not known' then
> >> +            return true
> >> +    end
> >> +    return false
> >> +end;
> >> +---
> >> +...
> >> +test_run:cmd("setopt delimiter ''");
> >> +---
> >> +- true
> >> +...
> >> +s, err = socket:connect('non_exists_hostname:3301')
> > 
> > socket.connect() should be called, the colon here is the mistake.

Please, look again to the code. One call is socket.connect(...), another
is socket:connect().

Just in case, I suggest to add a patch to your patchset to keep youself
from such mistakes:

 | diff --git a/src/lua/socket.lua b/src/lua/socket.lua
 | index e4815adbb..a1c13d4db 100644
 | --- a/src/lua/socket.lua
 | +++ b/src/lua/socket.lua
 | @@ -1554,7 +1554,8 @@ local function lsocket_tcp()
 |  end
 |  
 |  local function lsocket_connect(host, port)
 | -    if host == nil or port == nil then
 | +    if type(host) ~= 'string' or (type(port) ~= 'string' and
 | +            type(port) ~= 'number') then
 | 		 error("Usage: luasocket.connect(host, port)")
 | 	 end
 | 	 local s, err = tcp_connect(host, port)
 | @@ -1569,7 +1570,8 @@ local function lsocket_connect(host, port)
 |  end
 |  
 |  local function lsocket_bind(host, port, backlog)
 | -    if host == nil or port == nil then
 | +    if type(host) ~= 'string' or (type(port) ~= 'string' and
 | +            type(port) ~= 'number') then
 | 		 error("Usage: luasocket.bind(host, port [, backlog])")
 | 	 end
 | 	 local function prepare(s) return backlog end

> > I think we should test both native and Lua Socket API, because both were
> > changed.
> > 
> > Should we ever change Lua Socket API? Please, verify it with a
> > documentation and an actual behaviour of the external module. We emulate
> > it, so we should be compatible.

Are you look into this?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [tarantool-patches] Re: [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-09-06 13:45                 ` [tarantool-patches] Re: [server-dev] " Alexander Turenko
@ 2019-09-10 12:54                   ` Roman Khabibov
  2019-11-01 14:39                     ` [Tarantool-patches] [server-dev] Re: [tarantool-patches] " Alexander Turenko
  0 siblings, 1 reply; 20+ messages in thread
From: Roman Khabibov @ 2019-09-10 12:54 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Alexander Turenko


>>>> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
>>>> +-- Error code and error message returned by getaddrinfo() depends
>>>> +-- on system's gai_strerror(). So that there is no checking for
>>>> +-- certain error message.
>>>> +test_run:cmd("setopt delimiter ';'")
>>>> +---
>>>> +- true
>>>> +...
>>>> +function check_err(err)
>>>> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
>>>> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
>>>> +       err == 'getaddrinfo: Name or service not known' then
>>>> +            return true
>>>> +    end
>>>> +    return false
>>>> +end;
>>>> +---
>>>> +...
>>>> +test_run:cmd("setopt delimiter ''");
>>>> +---
>>>> +- true
>>>> +...
>>>> +s, err = socket:connect('non_exists_hostname:3301')
>>> 
>>> socket.connect() should be called, the colon here is the mistake.
> 
> Please, look again to the code. One call is socket.connect(...), another
> is socket:connect().
> 
> Just in case, I suggest to add a patch to your patchset to keep youself
> from such mistakes:
> 
> | diff --git a/src/lua/socket.lua b/src/lua/socket.lua
> | index e4815adbb..a1c13d4db 100644
> | --- a/src/lua/socket.lua
> | +++ b/src/lua/socket.lua
> | @@ -1554,7 +1554,8 @@ local function lsocket_tcp()
> |  end
> |  
> |  local function lsocket_connect(host, port)
> | -    if host == nil or port == nil then
> | +    if type(host) ~= 'string' or (type(port) ~= 'string' and
> | +            type(port) ~= 'number') then
> | 		 error("Usage: luasocket.connect(host, port)")
> | 	 end
> | 	 local s, err = tcp_connect(host, port)
> | @@ -1569,7 +1570,8 @@ local function lsocket_connect(host, port)
> |  end
> |  
> |  local function lsocket_bind(host, port, backlog)
> | -    if host == nil or port == nil then
> | +    if type(host) ~= 'string' or (type(port) ~= 'string' and
> | +            type(port) ~= 'number') then
> | 		 error("Usage: luasocket.bind(host, port [, backlog])")
> | 	 end
> | 	 local function prepare(s) return backlog end
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.connect('non_exists_hostname', 3301)
+check_err(err)
+

Also, I added suggested patch.
>>> I think we should test both native and Lua Socket API, because both were
>>> changed.
>>> 
>>> Should we ever change Lua Socket API? Please, verify it with a
>>> documentation and an actual behaviour of the external module. We emulate
>>> it, so we should be compatible.
> 
> Are you look into this?
http://w3.impa.br/~diego/software/luasocket/tcp.html#connect
master:connect(address, port)

Attempts to connect a master object to a remote host, transforming it into a client object. Client objects support methods send, receive, getsockname, getpeername, settimeout, and close.

Address can be an IP address or a host name. Port must be an integer number in the range [1..64K). 

In case of error, the method returns nil followed by a string describing the error. In case of success, the method returns 1.

I looked to this doc. It seems like our behaviour corresponds this API.

commit 9810ccd869183de068939cdce5a157ad35916d36
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Tue Jul 30 15:49:13 2019 +0300

    lua: return getaddrinfo() errors
    
    Add getaddrinfo() errors into the several fuctions of socket. Now
    getaddrinfo() can return a pair of values (nil and error message)
    in case of error.
    
    Closes #4138
    
    @TarantoolBot document
    Title: socket_object:connect
    
    Connect to a remote host using tcp_connect() within. If it is no
    error, return a socket object.

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index 31a8c16b7..beab3b0a7 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -150,9 +150,12 @@ local function next_id(id) return band(id + 1, 0x7FFFFFFF) end
 local function establish_connection(host, port, timeout)
     local timeout = timeout or DEFAULT_CONNECT_TIMEOUT
     local begin = fiber.clock()
-    local s = socket.tcp_connect(host, port, timeout)
+    local s, err = socket.tcp_connect(host, port, timeout)
     if not s then
-        return nil, errno.strerror(errno())
+        if not err then
+            return nil, errno.strerror(errno())
+        end
+        return nil, err
     end
     local msg = s:read({chunk = IPROTO_GREETING_SIZE},
                         timeout - (fiber.clock() - begin))
diff --git a/src/lua/socket.c b/src/lua/socket.c
index 130378caf..dbade2d27 100644
--- a/src/lua/socket.c
+++ b/src/lua/socket.c
@@ -774,6 +774,18 @@ lbox_getaddrinfo_result_wrapper(struct lua_State *L)
 	return 1;
 }
 
+/**
+ * Wrap coio_getaddrinfo() and call it. Push returned values onto
+ * @a L Lua stack.
+ *
+ * @param L Lua stack.
+ *
+ * @retval 1 Number of returned values by Lua function if
+ * coio_getaddrinfo() success.
+ * @retval 2 Number of returned values by Lua function if
+ * coio_getaddrinfo() is failed (first is nil, second is error
+ * message).
+ */
 static int
 lbox_socket_getaddrinfo(struct lua_State *L)
 {
@@ -816,7 +828,9 @@ lbox_socket_getaddrinfo(struct lua_State *L)
 
 	if (dns_res != 0) {
 		lua_pushnil(L);
-		return 1;
+		struct error *err = diag_get()->last;
+		lua_pushstring(L, err->errmsg);
+		return 2;
 	}
 
 	/* no results */
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index a334ad45b..e4815adbb 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -1041,9 +1041,12 @@ local function tcp_connect(host, port, timeout)
     end
     local timeout = timeout or TIMEOUT_INFINITY
     local stop = fiber.clock() + timeout
-    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
+    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
         protocol = 'tcp' })
-    if dns == nil or #dns == 0 then
+    if dns == nil then
+        return nil, err
+    end
+    if #dns == 0 then
         boxerrno(boxerrno.EINVAL)
         return nil
     end
@@ -1360,9 +1363,16 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
     if dns == nil or #dns == 0 then
-        self._errno = boxerrno.EINVAL
+        return nil, err
+    end
+    if dns == nil then
+        boxerrno(self._errno)
+        return nil, err
+    end
+    if #dns == 0 then
+        boxerrno(self._errno)
         return nil, socket_error(self)
     end
     for _, remote in ipairs(dns) do
@@ -1547,9 +1557,12 @@ local function lsocket_connect(host, port)
     if host == nil or port == nil then
         error("Usage: luasocket.connect(host, port)")
     end
-    local s = tcp_connect(host, port)
+    local s, err = tcp_connect(host, port)
     if not s then
-        return nil, boxerrno.strerror()
+        if not err then
+            return nil, boxerrno.strerror()
+        end
+        return nil, err
     end
     setmetatable(s, lsocket_tcp_client_mt)
     return s
diff --git a/test/app/socket.result b/test/app/socket.result
index fd299424c..87b1d0387 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -941,10 +941,20 @@ sc:close()
 - true
 ...
 -- tcp_connect
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
 ---
-- null
+...
+s == nil
+---
+- true
 ...
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -1606,7 +1616,7 @@ fio.stat(path) == nil
 { socket.tcp_connect('abrakadabra#123') == nil, errno.strerror() }
 ---
 - - true
-  - Invalid argument
+  - Input/output error
 ...
 -- wrong options for getaddrinfo
 socket.getaddrinfo('host', 'port', { type = 'WRONG' }) == nil and errno() == errno.EINVAL
@@ -2812,6 +2822,41 @@ server:close()
 ---
 - true
 ...
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
+s, err = socket.connect('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
 test_run:cmd("clear filter")
 ---
 - true
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index c72d41763..08cb7fc9e 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -300,8 +300,16 @@ sc:close()
 
 -- tcp_connect
 
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+s == nil
 
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -958,6 +966,25 @@ fiber.cancel(echo_fiber)
 client:read(1, 5) == ''
 server:close()
 
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.connect('non_exists_hostname', 3301)
+check_err(err)
+
 test_run:cmd("clear filter")
 
 -- case: sicket receive inconsistent behavior
diff --git a/test/box/net.box.result b/test/box/net.box.result
index e3dabf7d9..0635ca9e7 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -3927,6 +3927,34 @@ test_run:grep_log('default', '00000040:.*')
 ---
 - null
 ...
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s = remote.connect('non_exists_hostname:3301')
+---
+...
+check_err(s['error'])
+---
+- true
+...
 box.cfg{log_level=log_level}
 ---
 ...
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 8e65ff470..9c9aee1ad 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -1582,4 +1582,21 @@ test_run:wait_log('default', '00000030:.*', nil, 10)
 -- we expect nothing below, so don't wait
 test_run:grep_log('default', '00000040:.*')
 
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s = remote.connect('non_exists_hostname:3301')
+check_err(s['error'])
+
 box.cfg{log_level=log_level}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Tarantool-patches] [server-dev] Re: [tarantool-patches] Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-09-10 12:54                   ` Roman Khabibov
@ 2019-11-01 14:39                     ` Alexander Turenko
  2019-11-21 17:27                       ` [Tarantool-patches] [tarantool-patches] [server-dev] " Roman Khabibov
  0 siblings, 1 reply; 20+ messages in thread
From: Alexander Turenko @ 2019-11-01 14:39 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches, tarantool-patches

> commit 9810ccd869183de068939cdce5a157ad35916d36
> Author: Roman Khabibov <roman.habibov@tarantool.org>
> Date:   Tue Jul 30 15:49:13 2019 +0300
> 
>     lua: return getaddrinfo() errors
>     
>     Add getaddrinfo() errors into the several fuctions of socket. Now
>     getaddrinfo() can return a pair of values (nil and error message)
>     in case of error.
>     
>     Closes #4138
>     
>     @TarantoolBot document
>     Title: socket_object:connect

`socket_object:connect` is not valid Lua syntax. It is better to write
it as socket_object:connect() or socket_object.connect.

net.box API also was changed.

>     
>     Connect to a remote host using tcp_connect() within. If it is no
>     error, return a socket object.

It is hard to understood w/o a context. Please, either describe API
changes formally or give an example that will show the API change.

> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
> index a334ad45b..e4815adbb 100644
> --- a/src/lua/socket.lua
> +++ b/src/lua/socket.lua
> @@ -1041,9 +1041,12 @@ local function tcp_connect(host, port, timeout)
>      end
>      local timeout = timeout or TIMEOUT_INFINITY
>      local stop = fiber.clock() + timeout
> -    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
> +    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>          protocol = 'tcp' })
> -    if dns == nil or #dns == 0 then
> +    if dns == nil then
> +        return nil, err
> +    end
> +    if #dns == 0 then
>          boxerrno(boxerrno.EINVAL)
>          return nil
>      end

Cited to link from below.

> @@ -1360,9 +1363,16 @@ local function lsocket_tcp_connect(self, host, port)
>      -- This function is broken by design
>      local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>      local timeout = deadline - fiber.clock()
> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>      if dns == nil or #dns == 0 then
> -        self._errno = boxerrno.EINVAL
> +        return nil, err
> +    end
> +    if dns == nil then
> +        boxerrno(self._errno)
> +        return nil, err
> +    end
> +    if #dns == 0 then
> +        boxerrno(self._errno)
>          return nil, socket_error(self)
>      end

We'll never reach the last two if's bodies. However the similar fragment
above in tcp_connect() was changed in the right way.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Tarantool-patches] [tarantool-patches] [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-11-01 14:39                     ` [Tarantool-patches] [server-dev] Re: [tarantool-patches] " Alexander Turenko
@ 2019-11-21 17:27                       ` Roman Khabibov
  2019-12-08 19:48                         ` Alexander Turenko
  0 siblings, 1 reply; 20+ messages in thread
From: Roman Khabibov @ 2019-11-21 17:27 UTC (permalink / raw)
  To: tarantool-patches


> On Nov 1, 2019, at 17:39, Alexander Turenko <alexander.turenko@tarantool.org> wrote:
> 
>> commit 9810ccd869183de068939cdce5a157ad35916d36
>> Author: Roman Khabibov <roman.habibov@tarantool.org>
>> Date:   Tue Jul 30 15:49:13 2019 +0300
>> 
>>    lua: return getaddrinfo() errors
>> 
>>    Add getaddrinfo() errors into the several fuctions of socket. Now
>>    getaddrinfo() can return a pair of values (nil and error message)
>>    in case of error.
>> 
>>    Closes #4138
>> 
>>    @TarantoolBot document
>>    Title: socket_object:connect
> 
> `socket_object:connect` is not valid Lua syntax. It is better to write
> it as socket_object:connect() or socket_object.connect.
> 
> net.box API also was changed.
net.box’s .connect() returns error as second return value. Now, it can return
tcp_connect()’s errors (getaddrinfo). Don’t think, we should note this in the
net.box doc.

>> 
>>    Connect to a remote host using tcp_connect() within. If it is no
>>    error, return a socket object.
> 
> It is hard to understood w/o a context. Please, either describe API
> changes formally or give an example that will show the API change.
In the commit message.

>> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
>> index a334ad45b..e4815adbb 100644
>> --- a/src/lua/socket.lua
>> +++ b/src/lua/socket.lua
>> @@ -1041,9 +1041,12 @@ local function tcp_connect(host, port, timeout)
>>     end
>>     local timeout = timeout or TIMEOUT_INFINITY
>>     local stop = fiber.clock() + timeout
>> -    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>> +    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>>         protocol = 'tcp' })
>> -    if dns == nil or #dns == 0 then
>> +    if dns == nil then
>> +        return nil, err
>> +    end
>> +    if #dns == 0 then
>>         boxerrno(boxerrno.EINVAL)
>>         return nil
>>     end
> 
> Cited to link from below.
> 
>> @@ -1360,9 +1363,16 @@ local function lsocket_tcp_connect(self, host, port)
>>     -- This function is broken by design
>>     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>>     local timeout = deadline - fiber.clock()
>> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
>> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>>     if dns == nil or #dns == 0 then
>> -        self._errno = boxerrno.EINVAL
>> +        return nil, err
>> +    end
>> +    if dns == nil then
>> +        boxerrno(self._errno)
>> +        return nil, err
>> +    end
>> +    if #dns == 0 then
>> +        boxerrno(self._errno)
>>         return nil, socket_error(self)
>>     end
> 
> We'll never reach the last two if's bodies. However the similar fragment
> above in tcp_connect() was changed in the right way.
@@ -1360,9 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
-    if dns == nil or #dns == 0 then
-        self._errno = boxerrno.EINVAL
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
+    if dns == nil then
+        return nil, err
+    end
+    if #dns == 0 then
+        boxerrno(self._errno)
         return nil, socket_error(self)
     end
     for _, remote in ipairs(dns) do

commit b3a8af5c605db811deb40946cfbcf9dcd45ad75c
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Thu Nov 21 14:37:54 2019 +0300

    lua: return getaddrinfo() errors
    
    Add getaddrinfo() errors into the several fuctions of socket. Now
    getaddrinfo() can return a pair of values (nil and error message)
    in case of error.
    
    Closes #4138
    
    @TarantoolBot document
    Title: socket API changes
    
    * socket.getaddrinfo()
    
    Can return error message as second return value in case of
    internal getaddrinfo() error.
    
    Example:
    tarantool> socket.getaddrinfo('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    ...
    
    * socket.tcp_connect()
    
    Can return socket.getaddrinfo() error message as second return
    value.
    
    Example:
    tarantool> socket.tcp_connect('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    ...
    
    * socket_object:connect()
    
    Wrapper for the socket.tcp_connect() with arguments format
    checking. If it is no error, return a socket object. If it is not,
    return nil and error message (socket.tcp_connect() just returns
    nil in case of error except when it is an internal getaddrinfo()
    error).

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index c2e1bb9c4..2fb3e6df6 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -150,9 +150,12 @@ local function next_id(id) return band(id + 1, 0x7FFFFFFF) end
 local function establish_connection(host, port, timeout)
     local timeout = timeout or DEFAULT_CONNECT_TIMEOUT
     local begin = fiber.clock()
-    local s = socket.tcp_connect(host, port, timeout)
+    local s, err = socket.tcp_connect(host, port, timeout)
     if not s then
-        return nil, errno.strerror(errno())
+        if not err then
+            return nil, errno.strerror(errno())
+        end
+        return nil, err
     end
     local msg = s:read({chunk = IPROTO_GREETING_SIZE},
                         timeout - (fiber.clock() - begin))
diff --git a/src/lua/socket.c b/src/lua/socket.c
index 130378caf..dbade2d27 100644
--- a/src/lua/socket.c
+++ b/src/lua/socket.c
@@ -774,6 +774,18 @@ lbox_getaddrinfo_result_wrapper(struct lua_State *L)
 	return 1;
 }
 
+/**
+ * Wrap coio_getaddrinfo() and call it. Push returned values onto
+ * @a L Lua stack.
+ *
+ * @param L Lua stack.
+ *
+ * @retval 1 Number of returned values by Lua function if
+ * coio_getaddrinfo() success.
+ * @retval 2 Number of returned values by Lua function if
+ * coio_getaddrinfo() is failed (first is nil, second is error
+ * message).
+ */
 static int
 lbox_socket_getaddrinfo(struct lua_State *L)
 {
@@ -816,7 +828,9 @@ lbox_socket_getaddrinfo(struct lua_State *L)
 
 	if (dns_res != 0) {
 		lua_pushnil(L);
-		return 1;
+		struct error *err = diag_get()->last;
+		lua_pushstring(L, err->errmsg);
+		return 2;
 	}
 
 	/* no results */
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index 2a4c69f45..6b0017e14 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -1041,9 +1041,12 @@ local function tcp_connect(host, port, timeout)
     end
     local timeout = timeout or TIMEOUT_INFINITY
     local stop = fiber.clock() + timeout
-    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
+    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
         protocol = 'tcp' })
-    if dns == nil or #dns == 0 then
+    if dns == nil then
+        return nil, err
+    end
+    if #dns == 0 then
         boxerrno(boxerrno.EINVAL)
         return nil
     end
@@ -1360,9 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
-    if dns == nil or #dns == 0 then
-        self._errno = boxerrno.EINVAL
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
+    if dns == nil then
+        return nil, err
+    end
+    if #dns == 0 then
+        boxerrno(self._errno)
         return nil, socket_error(self)
     end
     for _, remote in ipairs(dns) do
@@ -1548,9 +1554,12 @@ local function lsocket_connect(host, port)
        type(port) ~= 'number') then
         error("Usage: luasocket.connect(host, port)")
     end
-    local s = tcp_connect(host, port)
+    local s, err = tcp_connect(host, port)
     if not s then
-        return nil, boxerrno.strerror()
+        if not err then
+            return nil, boxerrno.strerror()
+        end
+        return nil, err
     end
     setmetatable(s, lsocket_tcp_client_mt)
     return s
diff --git a/test/app/socket.result b/test/app/socket.result
index 6a72fb3cf..84426a88f 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -941,10 +941,20 @@ sc:close()
 - true
 ...
 -- tcp_connect
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
 ---
-- null
+...
+s == nil
+---
+- true
 ...
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -1606,7 +1616,7 @@ fio.stat(path) == nil
 { socket.tcp_connect('abrakadabra#123') == nil, errno.strerror() }
 ---
 - - true
-  - Invalid argument
+  - Input/output error
 ...
 -- wrong options for getaddrinfo
 socket.getaddrinfo('host', 'port', { type = 'WRONG' }) == nil and errno() == errno.EINVAL
@@ -2841,6 +2851,41 @@ test_run:cmd("clear filter")
 ---
 - true
 ...
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+        return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
+s, err = socket.connect('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
 -- case: sicket receive inconsistent behavior
 chan = fiber.channel()
 ---
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index 022cd4f40..9e66d6a8b 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -300,8 +300,16 @@ sc:close()
 
 -- tcp_connect
 
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+s == nil
 
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -968,6 +976,26 @@ socket.bind('127.0.0.1', 3301, '1')
 
 test_run:cmd("clear filter")
 
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+        return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.connect('non_exists_hostname', 3301)
+check_err(err)
+
+
 -- case: sicket receive inconsistent behavior
 chan = fiber.channel()
 counter = 0
diff --git a/test/box/net.box.result b/test/box/net.box.result
index e3dabf7d9..0635ca9e7 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -3927,6 +3927,34 @@ test_run:grep_log('default', '00000040:.*')
 ---
 - null
 ...
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s = remote.connect('non_exists_hostname:3301')
+---
+...
+check_err(s['error'])
+---
+- true
+...
 box.cfg{log_level=log_level}
 ---
 ...
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 8e65ff470..9c9aee1ad 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -1582,4 +1582,21 @@ test_run:wait_log('default', '00000030:.*', nil, 10)
 -- we expect nothing below, so don't wait
 test_run:grep_log('default', '00000040:.*')
 
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s = remote.connect('non_exists_hostname:3301')
+check_err(s['error'])
+
 box.cfg{log_level=log_level}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Tarantool-patches] [tarantool-patches] [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-11-21 17:27                       ` [Tarantool-patches] [tarantool-patches] [server-dev] " Roman Khabibov
@ 2019-12-08 19:48                         ` Alexander Turenko
  2019-12-10 16:25                           ` Roman Khabibov
  0 siblings, 1 reply; 20+ messages in thread
From: Alexander Turenko @ 2019-12-08 19:48 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches

I didn't find where to comment args validation patch, so I'll comment it
here:

It changes socket.connect() (part of Lua Socket API), but does not
change luasocket_master:connect() (part of Lua Socket API too) and
does not change socket.tcp_connect() (our native function).

The same: socket.bind() (Lua Socket API) was changed, but
luasocket_master:bind() (Lua Socket API) was not. Our native
socket.tcp_server() was not changed too.

That's all looks strange. Let's extract this patch and either drop it or
bring it to an acceptable state and send separately from this patchset.

----

> > `socket_object:connect` is not valid Lua syntax. It is better to write
> > it as socket_object:connect() or socket_object.connect.
> > 
> > net.box API also was changed.
>
> net.box’s .connect() returns error as second return value. Now, it can return
> tcp_connect()’s errors (getaddrinfo). Don’t think, we should note this in the
> net.box doc.

I see now, the API is not changed, only connection.error message.

> >>    Connect to a remote host using tcp_connect() within. If it is no
> >>    error, return a socket object.
> > 
> > It is hard to understood w/o a context. Please, either describe API
> > changes formally or give an example that will show the API change.
> In the commit message.

See the comment below.

> >> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
> >> index a334ad45b..e4815adbb 100644
> >> --- a/src/lua/socket.lua
> >> +++ b/src/lua/socket.lua
> >> @@ -1041,9 +1041,12 @@ local function tcp_connect(host, port, timeout)
> >>     end
> >>     local timeout = timeout or TIMEOUT_INFINITY
> >>     local stop = fiber.clock() + timeout
> >> -    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
> >> +    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
> >>         protocol = 'tcp' })
> >> -    if dns == nil or #dns == 0 then
> >> +    if dns == nil then
> >> +        return nil, err
> >> +    end
> >> +    if #dns == 0 then
> >>         boxerrno(boxerrno.EINVAL)
> >>         return nil
> >>     end
> > 
> > Cited to link from below.
> > 
> >> @@ -1360,9 +1363,16 @@ local function lsocket_tcp_connect(self, host, port)
> >>     -- This function is broken by design
> >>     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
> >>     local timeout = deadline - fiber.clock()
> >> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
> >> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
> >>     if dns == nil or #dns == 0 then
> >> -        self._errno = boxerrno.EINVAL
> >> +        return nil, err
> >> +    end
> >> +    if dns == nil then
> >> +        boxerrno(self._errno)
> >> +        return nil, err
> >> +    end
> >> +    if #dns == 0 then
> >> +        boxerrno(self._errno)
> >>         return nil, socket_error(self)
> >>     end
> > 
> > We'll never reach the last two if's bodies. However the similar fragment
> > above in tcp_connect() was changed in the right way.
>
> @@ -1360,9 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
>      -- This function is broken by design
>      local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>      local timeout = deadline - fiber.clock()
> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
> -    if dns == nil or #dns == 0 then
> -        self._errno = boxerrno.EINVAL
> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
> +    if dns == nil then
> +        return nil, err
> +    end
> +    if #dns == 0 then
> +        boxerrno(self._errno)
>          return nil, socket_error(self)
>      end
>      for _, remote in ipairs(dns) do

EINVAL here is changed to self._errno, which is always `nil` at least
for typical usage of lua socket: `s = socket.tcp()`, then `s:connect()`.

Let's keep the old behaviour (EINVAL) when getaddrinfo() returns zero
amount of addresses.

RFC 2553 ('Basic Socket Interface Extensions for IPv6') states:

 | Upon successful return a pointer to a linked list of one or more
 | addrinfo structures is returned through the final argument.

RFC 3493 ('Basic Socket Interface Extensions for IPv6') states the same,
but more explicitly:

 | Upon successful return of getaddrinfo(), the location to which res
 | points shall refer to a linked list of addrinfo structures, each of
 | which shall specify a socket address and information for use in
 | creating a socket with which to use that socket address.  The list
 | shall include at least one addrinfo structure.

So this situation should not occur. But anyway we should not miss to set
a certain errno in the case at least for consistency with tcp_connect().

Also I think that we should set errno to EINVAL when getaddrinfo() fails
to keep the code backward compatible: say, a user lean on this value
when tcp_connect() / :connect() returns `nil`.

> commit b3a8af5c605db811deb40946cfbcf9dcd45ad75c
> Author: Roman Khabibov <roman.habibov@tarantool.org>
> Date:   Thu Nov 21 14:37:54 2019 +0300
> 
>     lua: return getaddrinfo() errors
>     
>     Add getaddrinfo() errors into the several fuctions of socket. Now
>     getaddrinfo() can return a pair of values (nil and error message)
>     in case of error.
>     
>     Closes #4138
>     
>     @TarantoolBot document
>     Title: socket API changes
>     
>     * socket.getaddrinfo()
>     
>     Can return error message as second return value in case of
>     internal getaddrinfo() error.
>     
>     Example:
>     tarantool> socket.getaddrinfo('non_exists_hostname', 3301)
>     ---
>     - null
>     - 'getaddrinfo: nodename nor servname provided, or not known'
>     ...
>     
>     * socket.tcp_connect()
>     
>     Can return socket.getaddrinfo() error message as second return
>     value.
>     
>     Example:
>     tarantool> socket.tcp_connect('non_exists_hostname', 3301)
>     ---
>     - null
>     - 'getaddrinfo: nodename nor servname provided, or not known'
>     ...
>     
>     * socket_object:connect()
>     
>     Wrapper for the socket.tcp_connect() with arguments format
>     checking. If it is no error, return a socket object. If it is not,
>     return nil and error message (socket.tcp_connect() just returns
>     nil in case of error except when it is an internal getaddrinfo()
>     error).

socket_object:connect() is part of Lua Socket API, so I would name the
instance 'luasocket_master': this would follow Lua Socket terminology:
http://w3.impa.br/~diego/software/luasocket/tcp.html

We end with the situation that Lua Socket function checks its args,
but our one does not. This looks strange.

Describing this function whose primary reason is to verify arguments is
not correct too. This function exists for compatibility with Lua Socket
API.

The behaviour change here is that luasocket_master:connect() returns an
error from getaddrinfo() in the case rather than 'Invalid argument' as
it was before the commit. So the API is not changed and we can remove
this function from the docbot request (the same situation as in
net.box).

Also I see that we don't document Lua Socket part of the API:
https://www.tarantool.io/en/doc/1.10/reference/reference_lua/socket/

> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
> index 022cd4f40..9e66d6a8b 100644
> --- a/test/app/socket.test.lua
> +++ b/test/app/socket.test.lua
> <...>
> @@ -968,6 +976,26 @@ socket.bind('127.0.0.1', 3301, '1')
>  
>  test_run:cmd("clear filter")
>  
> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
> +-- Error code and error message returned by getaddrinfo() depends
> +-- on system's gai_strerror().
> +test_run:cmd("setopt delimiter ';'")
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' then
> +        return true
> +    end
> +    return false
> +end;
> +test_run:cmd("setopt delimiter ''");
> +
> +s, err = socket.getaddrinfo('non_exists_hostname', 3301)
> +check_err(err)
> +s, err = socket.connect('non_exists_hostname', 3301)
> +check_err(err)

Okay, we have the test case for Lua Socket API's socket.connect(). Let's
add a case for our native socket.tcp_connect().

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Tarantool-patches] [tarantool-patches] [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-12-08 19:48                         ` Alexander Turenko
@ 2019-12-10 16:25                           ` Roman Khabibov
  2019-12-18 14:58                             ` Alexander Turenko
  0 siblings, 1 reply; 20+ messages in thread
From: Roman Khabibov @ 2019-12-10 16:25 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: tarantool-patches



> On Dec 8, 2019, at 22:48, Alexander Turenko <alexander.turenko@tarantool.org> wrote:
> 
> I didn't find where to comment args validation patch, so I'll comment it
> here:
> 
> It changes socket.connect() (part of Lua Socket API), but does not
> change luasocket_master:connect() (part of Lua Socket API too) and
> does not change socket.tcp_connect() (our native function).
> 
> The same: socket.bind() (Lua Socket API) was changed, but
> luasocket_master:bind() (Lua Socket API) was not. Our native
> socket.tcp_server() was not changed too.
> 
> That's all looks strange. Let's extract this patch and either drop it or
> bring it to an acceptable state and send separately from this patchset.
Dropped.

> ----
> 
>>> `socket_object:connect` is not valid Lua syntax. It is better to write
>>> it as socket_object:connect() or socket_object.connect.
>>> 
>>> net.box API also was changed.
>> 
>> net.box’s .connect() returns error as second return value. Now, it can return
>> tcp_connect()’s errors (getaddrinfo). Don’t think, we should note this in the
>> net.box doc.
> 
> I see now, the API is not changed, only connection.error message.
> 
>>>>   Connect to a remote host using tcp_connect() within. If it is no
>>>>   error, return a socket object.
>>> 
>>> It is hard to understood w/o a context. Please, either describe API
>>> changes formally or give an example that will show the API change.
>> In the commit message.
> 
> See the comment below.
> 
>>>> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
>>>> index a334ad45b..e4815adbb 100644
>>>> --- a/src/lua/socket.lua
>>>> +++ b/src/lua/socket.lua
>>>> @@ -1041,9 +1041,12 @@ local function tcp_connect(host, port, timeout)
>>>>    end
>>>>    local timeout = timeout or TIMEOUT_INFINITY
>>>>    local stop = fiber.clock() + timeout
>>>> -    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>>>> +    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>>>>        protocol = 'tcp' })
>>>> -    if dns == nil or #dns == 0 then
>>>> +    if dns == nil then
>>>> +        return nil, err
>>>> +    end
>>>> +    if #dns == 0 then
>>>>        boxerrno(boxerrno.EINVAL)
>>>>        return nil
>>>>    end
>>> 
>>> Cited to link from below.
>>> 
>>>> @@ -1360,9 +1363,16 @@ local function lsocket_tcp_connect(self, host, port)
>>>>    -- This function is broken by design
>>>>    local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>>>>    local timeout = deadline - fiber.clock()
>>>> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
>>>> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>>>>    if dns == nil or #dns == 0 then
>>>> -        self._errno = boxerrno.EINVAL
>>>> +        return nil, err
>>>> +    end
>>>> +    if dns == nil then
>>>> +        boxerrno(self._errno)
>>>> +        return nil, err
>>>> +    end
>>>> +    if #dns == 0 then
>>>> +        boxerrno(self._errno)
>>>>        return nil, socket_error(self)
>>>>    end
>>> 
>>> We'll never reach the last two if's bodies. However the similar fragment
>>> above in tcp_connect() was changed in the right way.
>> 
>> @@ -1360,9 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
>>     -- This function is broken by design
>>     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>>     local timeout = deadline - fiber.clock()
>> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
>> -    if dns == nil or #dns == 0 then
>> -        self._errno = boxerrno.EINVAL
>> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>> +    if dns == nil then
>> +        return nil, err
>> +    end
>> +    if #dns == 0 then
>> +        boxerrno(self._errno)
>>         return nil, socket_error(self)
>>     end
>>     for _, remote in ipairs(dns) do
> 
> EINVAL here is changed to self._errno, which is always `nil` at least
> for typical usage of lua socket: `s = socket.tcp()`, then `s:connect()`.
> 
> Let's keep the old behaviour (EINVAL) when getaddrinfo() returns zero
> amount of addresses.
> 
> RFC 2553 ('Basic Socket Interface Extensions for IPv6') states:
> 
> | Upon successful return a pointer to a linked list of one or more
> | addrinfo structures is returned through the final argument.
> 
> RFC 3493 ('Basic Socket Interface Extensions for IPv6') states the same,
> but more explicitly:
> 
> | Upon successful return of getaddrinfo(), the location to which res
> | points shall refer to a linked list of addrinfo structures, each of
> | which shall specify a socket address and information for use in
> | creating a socket with which to use that socket address.  The list
> | shall include at least one addrinfo structure.
> 
> So this situation should not occur. But anyway we should not miss to set
> a certain errno in the case at least for consistency with tcp_connect().
> 
> Also I think that we should set errno to EINVAL when getaddrinfo() fails
> to keep the code backward compatible: say, a user lean on this value
> when tcp_connect() / :connect() returns `nil`.
I understand so:

diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index a334ad45b..6c829a11c 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -995,7 +995,13 @@ local function getaddrinfo(host, port, timeout, opts)
         end
 
     end
-    return internal.getaddrinfo(host, port, timeout, ga_opts)
+
+    local ret, err = internal.getaddrinfo(host, port, timeout, ga_opts)
+    if ret == nil then
+        boxerrno(boxerrno.EINVAL)
+        return nil, err
+    end
+    return ret
 end
 
 -- tcp connector
@@ -1041,9 +1047,12 @@ local function tcp_connect(host, port, timeout)
     end
     local timeout = timeout or TIMEOUT_INFINITY
     local stop = fiber.clock() + timeout
-    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
+    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
         protocol = 'tcp' })
-    if dns == nil or #dns == 0 then
+    if dns == nil then
+        return nil, err
+    end
+    if #dns == 0 then
         boxerrno(boxerrno.EINVAL)
         return nil
     end
@@ -1360,8 +1369,12 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
-    if dns == nil or #dns == 0 then
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
+    if dns == nil then
+        self._errno = boxerrno()
+        return nil, err
+    end
+    if #dns == 0 then
         self._errno = boxerrno.EINVAL
         return nil, socket_error(self)
     end
@@ -1547,9 +1560,12 @@ local function lsocket_connect(host, port)
     if host == nil or port == nil then
         error("Usage: luasocket.connect(host, port)")
     end
-    local s = tcp_connect(host, port)
+    local s, err = tcp_connect(host, port)
     if not s then
-        return nil, boxerrno.strerror()
+        if not err then
+            return nil, boxerrno.strerror()
+        end
+        return nil, err
     end
     setmetatable(s, lsocket_tcp_client_mt)
     return s

>> commit b3a8af5c605db811deb40946cfbcf9dcd45ad75c
>> Author: Roman Khabibov <roman.habibov@tarantool.org>
>> Date:   Thu Nov 21 14:37:54 2019 +0300
>> 
>>    lua: return getaddrinfo() errors
>> 
>>    Add getaddrinfo() errors into the several fuctions of socket. Now
>>    getaddrinfo() can return a pair of values (nil and error message)
>>    in case of error.
>> 
>>    Closes #4138
>> 
>>    @TarantoolBot document
>>    Title: socket API changes
>> 
>>    * socket.getaddrinfo()
>> 
>>    Can return error message as second return value in case of
>>    internal getaddrinfo() error.
>> 
>>    Example:
>>    tarantool> socket.getaddrinfo('non_exists_hostname', 3301)
>>    ---
>>    - null
>>    - 'getaddrinfo: nodename nor servname provided, or not known'
>>    ...
>> 
>>    * socket.tcp_connect()
>> 
>>    Can return socket.getaddrinfo() error message as second return
>>    value.
>> 
>>    Example:
>>    tarantool> socket.tcp_connect('non_exists_hostname', 3301)
>>    ---
>>    - null
>>    - 'getaddrinfo: nodename nor servname provided, or not known'
>>    ...
>> 
>>    * socket_object:connect()
>> 
>>    Wrapper for the socket.tcp_connect() with arguments format
>>    checking. If it is no error, return a socket object. If it is not,
>>    return nil and error message (socket.tcp_connect() just returns
>>    nil in case of error except when it is an internal getaddrinfo()
>>    error).
> 
> socket_object:connect() is part of Lua Socket API, so I would name the
> instance 'luasocket_master': this would follow Lua Socket terminology:
> http://w3.impa.br/~diego/software/luasocket/tcp.html
> 
> We end with the situation that Lua Socket function checks its args,
> but our one does not. This looks strange.
> 
> Describing this function whose primary reason is to verify arguments is
> not correct too. This function exists for compatibility with Lua Socket
> API.
> 
> The behaviour change here is that luasocket_master:connect() returns an
> error from getaddrinfo() in the case rather than 'Invalid argument' as
> it was before the commit. So the API is not changed and we can remove
> this function from the docbot request (the same situation as in
> net.box).
    lua: return getaddrinfo() errors
    
    Add getaddrinfo() errors into the several fuctions of socket. Now
    getaddrinfo() can return a pair of values (nil and error message)
    in case of error.
    
    Closes #4138
    
    @TarantoolBot document
    Title: socket API changes
    
    * socket.getaddrinfo()
    
    Can return error message as second return value in case of
    internal getaddrinfo() error.
    
    Example:
    tarantool> socket.getaddrinfo('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    ...
    
    * socket.tcp_connect()
    
    Can return socket.getaddrinfo() error message as second return
    value.
    
    Example:
    tarantool> socket.tcp_connect('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    …

> Also I see that we don't document Lua Socket part of the API:
> https://www.tarantool.io/en/doc/1.10/reference/reference_lua/socket/

Is this the part of this patchset? Maybe, it would be better to do doc
request separately.

>> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
>> index 022cd4f40..9e66d6a8b 100644
>> --- a/test/app/socket.test.lua
>> +++ b/test/app/socket.test.lua
>> <...>
>> @@ -968,6 +976,26 @@ socket.bind('127.0.0.1', 3301, '1')
>> 
>> test_run:cmd("clear filter")
>> 
>> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
>> +-- Error code and error message returned by getaddrinfo() depends
>> +-- on system's gai_strerror().
>> +test_run:cmd("setopt delimiter ';'")
>> +function check_err(err)
>> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
>> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
>> +       err == 'getaddrinfo: Name or service not known' then
>> +        return true
>> +    end
>> +    return false
>> +end;
>> +test_run:cmd("setopt delimiter ''");
>> +
>> +s, err = socket.getaddrinfo('non_exists_hostname', 3301)
>> +check_err(err)
>> +s, err = socket.connect('non_exists_hostname', 3301)
>> +check_err(err)
> 
> Okay, we have the test case for Lua Socket API's socket.connect(). Let's
> add a case for our native socket.tcp_connect().
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+        return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.connect('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.tcp_connect('non_exists_hostname', 3301)
+check_err(err)
+

commit d328fbf453248193d1a7c1406a3155f6b86b22c5
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Thu Nov 21 14:37:54 2019 +0300

    lua: return getaddrinfo() errors
    
    Add getaddrinfo() errors into the several fuctions of socket. Now
    getaddrinfo() can return a pair of values (nil and error message)
    in case of error.
    
    Closes #4138
    
    @TarantoolBot document
    Title: socket API changes
    
    * socket.getaddrinfo()
    
    Can return error message as second return value in case of
    internal getaddrinfo() error.
    
    Example:
    tarantool> socket.getaddrinfo('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    ...
    
    * socket.tcp_connect()
    
    Can return socket.getaddrinfo() error message as second return
    value.
    
    Example:
    tarantool> socket.tcp_connect('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    ...

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index c2e1bb9c4..2fb3e6df6 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -150,9 +150,12 @@ local function next_id(id) return band(id + 1, 0x7FFFFFFF) end
 local function establish_connection(host, port, timeout)
     local timeout = timeout or DEFAULT_CONNECT_TIMEOUT
     local begin = fiber.clock()
-    local s = socket.tcp_connect(host, port, timeout)
+    local s, err = socket.tcp_connect(host, port, timeout)
     if not s then
-        return nil, errno.strerror(errno())
+        if not err then
+            return nil, errno.strerror(errno())
+        end
+        return nil, err
     end
     local msg = s:read({chunk = IPROTO_GREETING_SIZE},
                         timeout - (fiber.clock() - begin))
diff --git a/src/lua/socket.c b/src/lua/socket.c
index 130378caf..dbade2d27 100644
--- a/src/lua/socket.c
+++ b/src/lua/socket.c
@@ -774,6 +774,18 @@ lbox_getaddrinfo_result_wrapper(struct lua_State *L)
 	return 1;
 }
 
+/**
+ * Wrap coio_getaddrinfo() and call it. Push returned values onto
+ * @a L Lua stack.
+ *
+ * @param L Lua stack.
+ *
+ * @retval 1 Number of returned values by Lua function if
+ * coio_getaddrinfo() success.
+ * @retval 2 Number of returned values by Lua function if
+ * coio_getaddrinfo() is failed (first is nil, second is error
+ * message).
+ */
 static int
 lbox_socket_getaddrinfo(struct lua_State *L)
 {
@@ -816,7 +828,9 @@ lbox_socket_getaddrinfo(struct lua_State *L)
 
 	if (dns_res != 0) {
 		lua_pushnil(L);
-		return 1;
+		struct error *err = diag_get()->last;
+		lua_pushstring(L, err->errmsg);
+		return 2;
 	}
 
 	/* no results */
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index a334ad45b..6c829a11c 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -995,7 +995,13 @@ local function getaddrinfo(host, port, timeout, opts)
         end
 
     end
-    return internal.getaddrinfo(host, port, timeout, ga_opts)
+
+    local ret, err = internal.getaddrinfo(host, port, timeout, ga_opts)
+    if ret == nil then
+        boxerrno(boxerrno.EINVAL)
+        return nil, err
+    end
+    return ret
 end
 
 -- tcp connector
@@ -1041,9 +1047,12 @@ local function tcp_connect(host, port, timeout)
     end
     local timeout = timeout or TIMEOUT_INFINITY
     local stop = fiber.clock() + timeout
-    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
+    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
         protocol = 'tcp' })
-    if dns == nil or #dns == 0 then
+    if dns == nil then
+        return nil, err
+    end
+    if #dns == 0 then
         boxerrno(boxerrno.EINVAL)
         return nil
     end
@@ -1360,8 +1369,12 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
-    if dns == nil or #dns == 0 then
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
+    if dns == nil then
+        self._errno = boxerrno()
+        return nil, err
+    end
+    if #dns == 0 then
         self._errno = boxerrno.EINVAL
         return nil, socket_error(self)
     end
@@ -1547,9 +1560,12 @@ local function lsocket_connect(host, port)
     if host == nil or port == nil then
         error("Usage: luasocket.connect(host, port)")
     end
-    local s = tcp_connect(host, port)
+    local s, err = tcp_connect(host, port)
     if not s then
-        return nil, boxerrno.strerror()
+        if not err then
+            return nil, boxerrno.strerror()
+        end
+        return nil, err
     end
     setmetatable(s, lsocket_tcp_client_mt)
     return s
diff --git a/test/app/socket.result b/test/app/socket.result
index fd299424c..a9fe840be 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -941,10 +941,20 @@ sc:close()
 - true
 ...
 -- tcp_connect
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
 ---
-- null
+...
+s == nil
+---
+- true
 ...
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -2816,6 +2826,48 @@ test_run:cmd("clear filter")
 ---
 - true
 ...
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+        return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
+s, err = socket.connect('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
+s, err = socket.tcp_connect('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
 -- case: sicket receive inconsistent behavior
 chan = fiber.channel()
 ---
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index c72d41763..7086e496b 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -300,8 +300,16 @@ sc:close()
 
 -- tcp_connect
 
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+s == nil
 
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -960,6 +968,27 @@ server:close()
 
 test_run:cmd("clear filter")
 
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+        return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.connect('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.tcp_connect('non_exists_hostname', 3301)
+check_err(err)
+
 -- case: sicket receive inconsistent behavior
 chan = fiber.channel()
 counter = 0
diff --git a/test/box/net.box.result b/test/box/net.box.result
index e3dabf7d9..0635ca9e7 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -3927,6 +3927,34 @@ test_run:grep_log('default', '00000040:.*')
 ---
 - null
 ...
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s = remote.connect('non_exists_hostname:3301')
+---
+...
+check_err(s['error'])
+---
+- true
+...
 box.cfg{log_level=log_level}
 ---
 ...
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 8e65ff470..9c9aee1ad 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -1582,4 +1582,21 @@ test_run:wait_log('default', '00000030:.*', nil, 10)
 -- we expect nothing below, so don't wait
 test_run:grep_log('default', '00000040:.*')
 
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s = remote.connect('non_exists_hostname:3301')
+check_err(s['error'])
+
 box.cfg{log_level=log_level}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Tarantool-patches] [tarantool-patches] [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-12-10 16:25                           ` Roman Khabibov
@ 2019-12-18 14:58                             ` Alexander Turenko
  2019-12-21 17:50                               ` Roman Khabibov
  0 siblings, 1 reply; 20+ messages in thread
From: Alexander Turenko @ 2019-12-18 14:58 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches

> >> @@ -1360,9 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
> >>     -- This function is broken by design
> >>     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
> >>     local timeout = deadline - fiber.clock()
> >> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
> >> -    if dns == nil or #dns == 0 then
> >> -        self._errno = boxerrno.EINVAL
> >> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
> >> +    if dns == nil then
> >> +        return nil, err
> >> +    end
> >> +    if #dns == 0 then
> >> +        boxerrno(self._errno)
> >>         return nil, socket_error(self)
> >>     end
> >>     for _, remote in ipairs(dns) do
> > 
> > EINVAL here is changed to self._errno, which is always `nil` at least
> > for typical usage of lua socket: `s = socket.tcp()`, then `s:connect()`.
> > 
> > Let's keep the old behaviour (EINVAL) when getaddrinfo() returns zero
> > amount of addresses.
> > 
> > RFC 2553 ('Basic Socket Interface Extensions for IPv6') states:
> > 
> > | Upon successful return a pointer to a linked list of one or more
> > | addrinfo structures is returned through the final argument.
> > 
> > RFC 3493 ('Basic Socket Interface Extensions for IPv6') states the same,
> > but more explicitly:
> > 
> > | Upon successful return of getaddrinfo(), the location to which res
> > | points shall refer to a linked list of addrinfo structures, each of
> > | which shall specify a socket address and information for use in
> > | creating a socket with which to use that socket address.  The list
> > | shall include at least one addrinfo structure.
> > 
> > So this situation should not occur. But anyway we should not miss to set
> > a certain errno in the case at least for consistency with tcp_connect().
> > 
> > Also I think that we should set errno to EINVAL when getaddrinfo() fails
> > to keep the code backward compatible: say, a user lean on this value
> > when tcp_connect() / :connect() returns `nil`.
>
> I understand so:
> 
> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
> index a334ad45b..6c829a11c 100644
> --- a/src/lua/socket.lua
> +++ b/src/lua/socket.lua
> @@ -995,7 +995,13 @@ local function getaddrinfo(host, port, timeout, opts)
>          end
>  
>      end
> -    return internal.getaddrinfo(host, port, timeout, ga_opts)
> +
> +    local ret, err = internal.getaddrinfo(host, port, timeout, ga_opts)
> +    if ret == nil then
> +        boxerrno(boxerrno.EINVAL)
> +        return nil, err
> +    end
> +    return ret
>  end
>  
>  -- tcp connector
> @@ -1041,9 +1047,12 @@ local function tcp_connect(host, port, timeout)
>      end
>      local timeout = timeout or TIMEOUT_INFINITY
>      local stop = fiber.clock() + timeout
> -    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
> +    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>          protocol = 'tcp' })
> -    if dns == nil or #dns == 0 then
> +    if dns == nil then
> +        return nil, err
> +    end
> +    if #dns == 0 then
>          boxerrno(boxerrno.EINVAL)
>          return nil
>      end
> @@ -1360,8 +1369,12 @@ local function lsocket_tcp_connect(self, host, port)
>      -- This function is broken by design
>      local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>      local timeout = deadline - fiber.clock()
> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
> -    if dns == nil or #dns == 0 then
> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
> +    if dns == nil then
> +        self._errno = boxerrno()
> +        return nil, err
> +    end
> +    if #dns == 0 then
>          self._errno = boxerrno.EINVAL
>          return nil, socket_error(self)
>      end
> @@ -1547,9 +1560,12 @@ local function lsocket_connect(host, port)
>      if host == nil or port == nil then
>          error("Usage: luasocket.connect(host, port)")
>      end
> -    local s = tcp_connect(host, port)
> +    local s, err = tcp_connect(host, port)
>      if not s then
> -        return nil, boxerrno.strerror()
> +        if not err then
> +            return nil, boxerrno.strerror()
> +        end
> +        return nil, err
>      end
>      setmetatable(s, lsocket_tcp_client_mt)
>      return s

I meant that `self._errno = boxerrno.EINVAL` was changed to
`boxerrno(self._errno)` in `#dns == 0` case and it does not more affect
the second return value (an error message). I would return it back for
`#dns == 0` case (only for it).

The motivation is just to don't change things that we don't asked to
change.

I didn't mean changes in getaddrinfo() function itself.

> > Also I see that we don't document Lua Socket part of the API:
> > https://www.tarantool.io/en/doc/1.10/reference/reference_lua/socket/
> 
> Is this the part of this patchset? Maybe, it would be better to do doc
> request separately.

It was in context of your docbot request: there is no need to ask to
update Lua Socket API docs if we does not provide it at all.

Lua Socket has its own API documentation, so it is maybe okay to don't
document it again on our website.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Tarantool-patches] [tarantool-patches] [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-12-18 14:58                             ` Alexander Turenko
@ 2019-12-21 17:50                               ` Roman Khabibov
  2019-12-23 13:36                                 ` Alexander Turenko
  0 siblings, 1 reply; 20+ messages in thread
From: Roman Khabibov @ 2019-12-21 17:50 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: tarantool-patches

Hi! Thanks for the review.

> On Dec 18, 2019, at 17:58, Alexander Turenko <alexander.turenko@tarantool.org> wrote:
> 
>>>> @@ -1360,9 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
>>>>    -- This function is broken by design
>>>>    local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>>>>    local timeout = deadline - fiber.clock()
>>>> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
>>>> -    if dns == nil or #dns == 0 then
>>>> -        self._errno = boxerrno.EINVAL
>>>> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>>>> +    if dns == nil then
>>>> +        return nil, err
>>>> +    end
>>>> +    if #dns == 0 then
>>>> +        boxerrno(self._errno)
>>>>        return nil, socket_error(self)
>>>>    end
>>>>    for _, remote in ipairs(dns) do
>>> 
>>> EINVAL here is changed to self._errno, which is always `nil` at least
>>> for typical usage of lua socket: `s = socket.tcp()`, then `s:connect()`.
>>> 
>>> Let's keep the old behaviour (EINVAL) when getaddrinfo() returns zero
>>> amount of addresses.
>>> 
>>> RFC 2553 ('Basic Socket Interface Extensions for IPv6') states:
>>> 
>>> | Upon successful return a pointer to a linked list of one or more
>>> | addrinfo structures is returned through the final argument.
>>> 
>>> RFC 3493 ('Basic Socket Interface Extensions for IPv6') states the same,
>>> but more explicitly:
>>> 
>>> | Upon successful return of getaddrinfo(), the location to which res
>>> | points shall refer to a linked list of addrinfo structures, each of
>>> | which shall specify a socket address and information for use in
>>> | creating a socket with which to use that socket address.  The list
>>> | shall include at least one addrinfo structure.
>>> 
>>> So this situation should not occur. But anyway we should not miss to set
>>> a certain errno in the case at least for consistency with tcp_connect().
>>> 
>>> Also I think that we should set errno to EINVAL when getaddrinfo() fails
>>> to keep the code backward compatible: say, a user lean on this value
>>> when tcp_connect() / :connect() returns `nil`.
>> 
>> I understand so:
>> 
>> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
>> index a334ad45b..6c829a11c 100644
>> --- a/src/lua/socket.lua
>> +++ b/src/lua/socket.lua
>> @@ -995,7 +995,13 @@ local function getaddrinfo(host, port, timeout, opts)
>>         end
>> 
>>     end
>> -    return internal.getaddrinfo(host, port, timeout, ga_opts)
>> +
>> +    local ret, err = internal.getaddrinfo(host, port, timeout, ga_opts)
>> +    if ret == nil then
>> +        boxerrno(boxerrno.EINVAL)
>> +        return nil, err
>> +    end
>> +    return ret
>> end
>> 
>> -- tcp connector
>> @@ -1041,9 +1047,12 @@ local function tcp_connect(host, port, timeout)
>>     end
>>     local timeout = timeout or TIMEOUT_INFINITY
>>     local stop = fiber.clock() + timeout
>> -    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>> +    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>>         protocol = 'tcp' })
>> -    if dns == nil or #dns == 0 then
>> +    if dns == nil then
>> +        return nil, err
>> +    end
>> +    if #dns == 0 then
>>         boxerrno(boxerrno.EINVAL)
>>         return nil
>>     end
>> @@ -1360,8 +1369,12 @@ local function lsocket_tcp_connect(self, host, port)
>>     -- This function is broken by design
>>     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>>     local timeout = deadline - fiber.clock()
>> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
>> -    if dns == nil or #dns == 0 then
>> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>> +    if dns == nil then
>> +        self._errno = boxerrno()
>> +        return nil, err
>> +    end
>> +    if #dns == 0 then
>>         self._errno = boxerrno.EINVAL
>>         return nil, socket_error(self)
>>     end
>> @@ -1547,9 +1560,12 @@ local function lsocket_connect(host, port)
>>     if host == nil or port == nil then
>>         error("Usage: luasocket.connect(host, port)")
>>     end
>> -    local s = tcp_connect(host, port)
>> +    local s, err = tcp_connect(host, port)
>>     if not s then
>> -        return nil, boxerrno.strerror()
>> +        if not err then
>> +            return nil, boxerrno.strerror()
>> +        end
>> +        return nil, err
>>     end
>>     setmetatable(s, lsocket_tcp_client_mt)
>>     return s
> 
> I meant that `self._errno = boxerrno.EINVAL` was changed to
> `boxerrno(self._errno)` in `#dns == 0` case and it does not more affect
> the second return value (an error message). I would return it back for
> `#dns == 0` case (only for it).
> 
> The motivation is just to don't change things that we don't asked to
> change.
> 
> I didn't mean changes in getaddrinfo() function itself.
@@ -1360,8 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
-    if dns == nil or #dns == 0 then
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
+    if dns == nil then
+        self._errno = boxerrno()
+        return nil, err
+    end
+    if #dns == 0 then
         self._errno = boxerrno.EINVAL
         return nil, socket_error(self)
     end

>>> Also I see that we don't document Lua Socket part of the API:
>>> https://www.tarantool.io/en/doc/1.10/reference/reference_lua/socket/
>> 
>> Is this the part of this patchset? Maybe, it would be better to do doc
>> request separately.
> 
> It was in context of your docbot request: there is no need to ask to
> update Lua Socket API docs if we does not provide it at all.
> 
> Lua Socket has its own API documentation, so it is maybe okay to don't
> document it again on our website.

commit 7a85a217cb97faf03c8c817fc2748749abd91e91
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Thu Nov 21 14:37:54 2019 +0300

    lua: return getaddrinfo() errors
    
    Add getaddrinfo() errors into the several fuctions of socket. Now
    getaddrinfo() can return a pair of values (nil and error message)
    in case of error.
    
    Closes #4138
    
    @TarantoolBot document
    Title: socket API changes
    
    * socket.getaddrinfo()
    
    Can return error message as second return value in case of
    internal getaddrinfo() error.
    
    Example:
    tarantool> socket.getaddrinfo('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    ...
    
    * socket.tcp_connect()
    
    Can return socket.getaddrinfo() error message as second return
    value.
    
    Example:
    tarantool> socket.tcp_connect('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    ...

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index c2e1bb9c4..2fb3e6df6 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -150,9 +150,12 @@ local function next_id(id) return band(id + 1, 0x7FFFFFFF) end
 local function establish_connection(host, port, timeout)
     local timeout = timeout or DEFAULT_CONNECT_TIMEOUT
     local begin = fiber.clock()
-    local s = socket.tcp_connect(host, port, timeout)
+    local s, err = socket.tcp_connect(host, port, timeout)
     if not s then
-        return nil, errno.strerror(errno())
+        if not err then
+            return nil, errno.strerror(errno())
+        end
+        return nil, err
     end
     local msg = s:read({chunk = IPROTO_GREETING_SIZE},
                         timeout - (fiber.clock() - begin))
diff --git a/src/lua/socket.c b/src/lua/socket.c
index 130378caf..dbade2d27 100644
--- a/src/lua/socket.c
+++ b/src/lua/socket.c
@@ -774,6 +774,18 @@ lbox_getaddrinfo_result_wrapper(struct lua_State *L)
 	return 1;
 }
 
+/**
+ * Wrap coio_getaddrinfo() and call it. Push returned values onto
+ * @a L Lua stack.
+ *
+ * @param L Lua stack.
+ *
+ * @retval 1 Number of returned values by Lua function if
+ * coio_getaddrinfo() success.
+ * @retval 2 Number of returned values by Lua function if
+ * coio_getaddrinfo() is failed (first is nil, second is error
+ * message).
+ */
 static int
 lbox_socket_getaddrinfo(struct lua_State *L)
 {
@@ -816,7 +828,9 @@ lbox_socket_getaddrinfo(struct lua_State *L)
 
 	if (dns_res != 0) {
 		lua_pushnil(L);
-		return 1;
+		struct error *err = diag_get()->last;
+		lua_pushstring(L, err->errmsg);
+		return 2;
 	}
 
 	/* no results */
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index a334ad45b..f9256ecdc 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -1041,9 +1041,12 @@ local function tcp_connect(host, port, timeout)
     end
     local timeout = timeout or TIMEOUT_INFINITY
     local stop = fiber.clock() + timeout
-    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
+    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
         protocol = 'tcp' })
-    if dns == nil or #dns == 0 then
+    if dns == nil then
+        return nil, err
+    end
+    if #dns == 0 then
         boxerrno(boxerrno.EINVAL)
         return nil
     end
@@ -1360,8 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
-    if dns == nil or #dns == 0 then
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
+    if dns == nil then
+        self._errno = boxerrno()
+        return nil, err
+    end
+    if #dns == 0 then
         self._errno = boxerrno.EINVAL
         return nil, socket_error(self)
     end
@@ -1547,9 +1554,12 @@ local function lsocket_connect(host, port)
     if host == nil or port == nil then
         error("Usage: luasocket.connect(host, port)")
     end
-    local s = tcp_connect(host, port)
+    local s, err = tcp_connect(host, port)
     if not s then
-        return nil, boxerrno.strerror()
+        if not err then
+            return nil, boxerrno.strerror()
+        end
+        return nil, err
     end
     setmetatable(s, lsocket_tcp_client_mt)
     return s
diff --git a/test/app/socket.result b/test/app/socket.result
index fd299424c..6e5bd7a35 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -941,10 +941,20 @@ sc:close()
 - true
 ...
 -- tcp_connect
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
 ---
-- null
+...
+s == nil
+---
+- true
 ...
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -1606,7 +1616,7 @@ fio.stat(path) == nil
 { socket.tcp_connect('abrakadabra#123') == nil, errno.strerror() }
 ---
 - - true
-  - Invalid argument
+  - Input/output error
 ...
 -- wrong options for getaddrinfo
 socket.getaddrinfo('host', 'port', { type = 'WRONG' }) == nil and errno() == errno.EINVAL
@@ -2816,6 +2826,48 @@ test_run:cmd("clear filter")
 ---
 - true
 ...
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+        return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
+s, err = socket.connect('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
+s, err = socket.tcp_connect('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
 -- case: sicket receive inconsistent behavior
 chan = fiber.channel()
 ---
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index c72d41763..7086e496b 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -300,8 +300,16 @@ sc:close()
 
 -- tcp_connect
 
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+s == nil
 
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -960,6 +968,27 @@ server:close()
 
 test_run:cmd("clear filter")
 
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+        return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.connect('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.tcp_connect('non_exists_hostname', 3301)
+check_err(err)
+
 -- case: sicket receive inconsistent behavior
 chan = fiber.channel()
 counter = 0
diff --git a/test/box/net.box.result b/test/box/net.box.result
index e3dabf7d9..0635ca9e7 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -3927,6 +3927,34 @@ test_run:grep_log('default', '00000040:.*')
 ---
 - null
 ...
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s = remote.connect('non_exists_hostname:3301')
+---
+...
+check_err(s['error'])
+---
+- true
+...
 box.cfg{log_level=log_level}
 ---
 ...
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 8e65ff470..9c9aee1ad 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -1582,4 +1582,21 @@ test_run:wait_log('default', '00000030:.*', nil, 10)
 -- we expect nothing below, so don't wait
 test_run:grep_log('default', '00000040:.*')
 
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' then
+            return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s = remote.connect('non_exists_hostname:3301')
+check_err(s['error'])
+
 box.cfg{log_level=log_level}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Tarantool-patches] [tarantool-patches] [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-12-21 17:50                               ` Roman Khabibov
@ 2019-12-23 13:36                                 ` Alexander Turenko
  2019-12-26 17:29                                   ` Roman Khabibov
  0 siblings, 1 reply; 20+ messages in thread
From: Alexander Turenko @ 2019-12-23 13:36 UTC (permalink / raw)
  To: Roman Khabibov; +Cc: tarantool-patches

Comments below are trivial. Let's fix and proceed with the next reviewer
(Sergey O.). Please, resend the patchset for him.

I still think you should run CI on all targets (push to ...-full-ci
branch).

LGTM except minor comments below.

WBR, Alexander Turenko.

> @@ -1360,8 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
>      -- This function is broken by design
>      local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>      local timeout = deadline - fiber.clock()
> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
> -    if dns == nil or #dns == 0 then
> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
> +    if dns == nil then
> +        self._errno = boxerrno()

Master's socket _errno was set to EINVAL before this patch. Let's keep
this behaviour, but change the 2nd return value.

I mean:

 | if dns == nil then
 |     self._errno = boxerrno.EINVAL
 |     return nil, err
 | end
 | if #dns == 0 then
 |     self._errno = boxerrno.EINVAL
 |     return nil, socket_error(self)
 | end

> +        return nil, err
> +    end
> +    if #dns == 0 then
>          self._errno = boxerrno.EINVAL
>          return nil, socket_error(self)
>      end

> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
> index c72d41763..7086e496b 100644
> --- a/test/app/socket.test.lua
> +++ b/test/app/socket.test.lua
> @@ -960,6 +968,27 @@ server:close()
>  
>  test_run:cmd("clear filter")
>  
> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
> +-- Error code and error message returned by getaddrinfo() depends
> +-- on system's gai_strerror().
> +test_run:cmd("setopt delimiter ';'")
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' then
> +        return true
> +    end
> +    return false
> +end;

You added the following error message to coio_getaddrinfo() unit test in
the first commit:

 | const char *exp_errmsg_4 = "getaddrinfo: hostname nor servname provided"
 |         ", or not known";

So it worth to add it here too. We'll enable the test of FreeBSD sooner
or later.

Also I got EAI_AGAIN on my system now (maybe something was changed on
DNS server side). Let's add it too (for both commits).

> diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
> index 8e65ff470..9c9aee1ad 100644
> --- a/test/box/net.box.test.lua
> +++ b/test/box/net.box.test.lua
> @@ -1582,4 +1582,21 @@ test_run:wait_log('default', '00000030:.*', nil, 10)
>  -- we expect nothing below, so don't wait
>  test_run:grep_log('default', '00000040:.*')
>  
> +-- gh-4138 Check getaddrinfo() error from connect() only. Error
> +-- code and error message returned by getaddrinfo() depends on
> +-- system's gai_strerror().
> +test_run:cmd("setopt delimiter ';'")
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' then
> +            return true
> +    end
> +    return false
> +end;
> +test_run:cmd("setopt delimiter ''");

Same here.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Tarantool-patches] [tarantool-patches] [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-12-23 13:36                                 ` Alexander Turenko
@ 2019-12-26 17:29                                   ` Roman Khabibov
  2020-02-18 13:55                                     ` [Tarantool-patches] Fwd: " Roman Khabibov
  0 siblings, 1 reply; 20+ messages in thread
From: Roman Khabibov @ 2019-12-26 17:29 UTC (permalink / raw)
  To: Sergey Ostanevich; +Cc: tarantool-patches

Hi! Alexander, thanks for the review. Sergos, can you, please, do a second review?

https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4138-getaddrinfo-full-ci

> On Dec 23, 2019, at 16:36, Alexander Turenko <alexander.turenko@tarantool.org> wrote:
> 
> Comments below are trivial. Let's fix and proceed with the next reviewer
> (Sergey O.). Please, resend the patchset for him.
> 
> I still think you should run CI on all targets (push to ...-full-ci
> branch).
> 
> LGTM except minor comments below.
> 
> WBR, Alexander Turenko.
> 
>> @@ -1360,8 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
>>     -- This function is broken by design
>>     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>>     local timeout = deadline - fiber.clock()
>> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
>> -    if dns == nil or #dns == 0 then
>> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>> +    if dns == nil then
>> +        self._errno = boxerrno()
> 
> Master's socket _errno was set to EINVAL before this patch. Let's keep
> this behaviour, but change the 2nd return value.
> 
> I mean:
> 
> | if dns == nil then
> |     self._errno = boxerrno.EINVAL
> |     return nil, err
> | end
> | if #dns == 0 then
> |     self._errno = boxerrno.EINVAL
> |     return nil, socket_error(self)
> | end
@@ -1360,8 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
-    if dns == nil or #dns == 0 then
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
+    if dns == nil then
+        self._errno = boxerrno.EINVAL
+        return nil, err
+    end
+    if #dns == 0 then
         self._errno = boxerrno.EINVAL
         return nil, socket_error(self)
     end

>> +        return nil, err
>> +    end
>> +    if #dns == 0 then
>>         self._errno = boxerrno.EINVAL
>>         return nil, socket_error(self)
>>     end
> 
>> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
>> index c72d41763..7086e496b 100644
>> --- a/test/app/socket.test.lua
>> +++ b/test/app/socket.test.lua
>> @@ -960,6 +968,27 @@ server:close()
>> 
>> test_run:cmd("clear filter")
>> 
>> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
>> +-- Error code and error message returned by getaddrinfo() depends
>> +-- on system's gai_strerror().
>> +test_run:cmd("setopt delimiter ';'")
>> +function check_err(err)
>> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
>> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
>> +       err == 'getaddrinfo: Name or service not known' then
>> +        return true
>> +    end
>> +    return false
>> +end;
> 
> You added the following error message to coio_getaddrinfo() unit test in
> the first commit:
> 
> | const char *exp_errmsg_4 = "getaddrinfo: hostname nor servname provided"
> |         ", or not known";
> 
> So it worth to add it here too. We'll enable the test of FreeBSD sooner
> or later.
> 
> Also I got EAI_AGAIN on my system now (maybe something was changed on
> DNS server side). Let's add it too (for both commits).
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' or
+       err == 'getaddrinfo: hostname nor servname provided, or not known' or
+       err == 'getaddrinfo: temporary failure in name resolution' then
+        return true
+    end
+    return false
+end;

>> diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
>> index 8e65ff470..9c9aee1ad 100644
>> --- a/test/box/net.box.test.lua
>> +++ b/test/box/net.box.test.lua
>> @@ -1582,4 +1582,21 @@ test_run:wait_log('default', '00000030:.*', nil, 10)
>> -- we expect nothing below, so don't wait
>> test_run:grep_log('default', '00000040:.*')
>> 
>> +-- gh-4138 Check getaddrinfo() error from connect() only. Error
>> +-- code and error message returned by getaddrinfo() depends on
>> +-- system's gai_strerror().
>> +test_run:cmd("setopt delimiter ';'")
>> +function check_err(err)
>> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
>> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
>> +       err == 'getaddrinfo: Name or service not known' then
>> +            return true
>> +    end
>> +    return false
>> +end;
>> +test_run:cmd("setopt delimiter ''");
> 
> Same here.
Done.


commit 22937d2fce2d127a762a32e392d2c4d525c22c0b
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Thu Nov 21 14:37:54 2019 +0300

    lua: return getaddrinfo() errors
    
    Add getaddrinfo() errors into the several fuctions of socket. Now
    getaddrinfo() can return a pair of values (nil and error message)
    in case of error.
    
    Closes #4138
    
    @TarantoolBot document
    Title: socket API changes
    
    * socket.getaddrinfo()
    
    Can return error message as second return value in case of
    internal getaddrinfo() error.
    
    Example:
    tarantool> socket.getaddrinfo('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    ...
    
    * socket.tcp_connect()
    
    Can return socket.getaddrinfo() error message as second return
    value.
    
    Example:
    tarantool> socket.tcp_connect('non_exists_hostname', 3301)
    ---
    - null
    - 'getaddrinfo: nodename nor servname provided, or not known'
    ...

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index c2e1bb9c4..2fb3e6df6 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -150,9 +150,12 @@ local function next_id(id) return band(id + 1, 0x7FFFFFFF) end
 local function establish_connection(host, port, timeout)
     local timeout = timeout or DEFAULT_CONNECT_TIMEOUT
     local begin = fiber.clock()
-    local s = socket.tcp_connect(host, port, timeout)
+    local s, err = socket.tcp_connect(host, port, timeout)
     if not s then
-        return nil, errno.strerror(errno())
+        if not err then
+            return nil, errno.strerror(errno())
+        end
+        return nil, err
     end
     local msg = s:read({chunk = IPROTO_GREETING_SIZE},
                         timeout - (fiber.clock() - begin))
diff --git a/src/lua/socket.c b/src/lua/socket.c
index 130378caf..dbade2d27 100644
--- a/src/lua/socket.c
+++ b/src/lua/socket.c
@@ -774,6 +774,18 @@ lbox_getaddrinfo_result_wrapper(struct lua_State *L)
 	return 1;
 }
 
+/**
+ * Wrap coio_getaddrinfo() and call it. Push returned values onto
+ * @a L Lua stack.
+ *
+ * @param L Lua stack.
+ *
+ * @retval 1 Number of returned values by Lua function if
+ * coio_getaddrinfo() success.
+ * @retval 2 Number of returned values by Lua function if
+ * coio_getaddrinfo() is failed (first is nil, second is error
+ * message).
+ */
 static int
 lbox_socket_getaddrinfo(struct lua_State *L)
 {
@@ -816,7 +828,9 @@ lbox_socket_getaddrinfo(struct lua_State *L)
 
 	if (dns_res != 0) {
 		lua_pushnil(L);
-		return 1;
+		struct error *err = diag_get()->last;
+		lua_pushstring(L, err->errmsg);
+		return 2;
 	}
 
 	/* no results */
diff --git a/src/lua/socket.lua b/src/lua/socket.lua
index a334ad45b..ce6dab301 100644
--- a/src/lua/socket.lua
+++ b/src/lua/socket.lua
@@ -1041,9 +1041,12 @@ local function tcp_connect(host, port, timeout)
     end
     local timeout = timeout or TIMEOUT_INFINITY
     local stop = fiber.clock() + timeout
-    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
+    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
         protocol = 'tcp' })
-    if dns == nil or #dns == 0 then
+    if dns == nil then
+        return nil, err
+    end
+    if #dns == 0 then
         boxerrno(boxerrno.EINVAL)
         return nil
     end
@@ -1360,8 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
     -- This function is broken by design
     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
     local timeout = deadline - fiber.clock()
-    local dns = getaddrinfo(host, port, timeout, ga_opts)
-    if dns == nil or #dns == 0 then
+    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
+    if dns == nil then
+        self._errno = boxerrno.EINVAL
+        return nil, err
+    end
+    if #dns == 0 then
         self._errno = boxerrno.EINVAL
         return nil, socket_error(self)
     end
@@ -1547,9 +1554,12 @@ local function lsocket_connect(host, port)
     if host == nil or port == nil then
         error("Usage: luasocket.connect(host, port)")
     end
-    local s = tcp_connect(host, port)
+    local s, err = tcp_connect(host, port)
     if not s then
-        return nil, boxerrno.strerror()
+        if not err then
+            return nil, boxerrno.strerror()
+        end
+        return nil, err
     end
     setmetatable(s, lsocket_tcp_client_mt)
     return s
diff --git a/test/app/socket.result b/test/app/socket.result
index fd299424c..aca189f4c 100644
--- a/test/app/socket.result
+++ b/test/app/socket.result
@@ -941,10 +941,20 @@ sc:close()
 - true
 ...
 -- tcp_connect
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
 ---
-- null
+...
+s == nil
+---
+- true
 ...
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -1606,7 +1616,7 @@ fio.stat(path) == nil
 { socket.tcp_connect('abrakadabra#123') == nil, errno.strerror() }
 ---
 - - true
-  - Invalid argument
+  - Input/output error
 ...
 -- wrong options for getaddrinfo
 socket.getaddrinfo('host', 'port', { type = 'WRONG' }) == nil and errno() == errno.EINVAL
@@ -2816,6 +2826,50 @@ test_run:cmd("clear filter")
 ---
 - true
 ...
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' or
+       err == 'getaddrinfo: hostname nor servname provided, or not known' or
+       err == 'getaddrinfo: temporary failure in name resolution' then
+        return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
+s, err = socket.connect('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
+s, err = socket.tcp_connect('non_exists_hostname', 3301)
+---
+...
+check_err(err)
+---
+- true
+...
 -- case: sicket receive inconsistent behavior
 chan = fiber.channel()
 ---
diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
index c72d41763..223935e66 100644
--- a/test/app/socket.test.lua
+++ b/test/app/socket.test.lua
@@ -300,8 +300,16 @@ sc:close()
 
 -- tcp_connect
 
--- test timeout
-socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+-- Test timeout. In this test, tcp_connect can return the second
+-- output value from internal.getaddrinfo (usually on Mac OS, but
+-- theoretically it can happen on Linux too). Sometimes
+-- getaddrinfo() is timed out, sometimes connect. On Linux however
+-- getaddrinfo is fast enough to never give timeout error in
+-- the case. So, there are two sources of timeout errors that are
+-- reported differently. This difference has appeared after
+-- gh-4138 patch.
+s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
+s == nil
 
 -- AF_INET
 s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
@@ -960,6 +968,29 @@ server:close()
 
 test_run:cmd("clear filter")
 
+-- gh-4138 Check getaddrinfo() error from socket:connect() only.
+-- Error code and error message returned by getaddrinfo() depends
+-- on system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' or
+       err == 'getaddrinfo: hostname nor servname provided, or not known' or
+       err == 'getaddrinfo: temporary failure in name resolution' then
+        return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s, err = socket.getaddrinfo('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.connect('non_exists_hostname', 3301)
+check_err(err)
+s, err = socket.tcp_connect('non_exists_hostname', 3301)
+check_err(err)
+
 -- case: sicket receive inconsistent behavior
 chan = fiber.channel()
 counter = 0
diff --git a/test/box/net.box.result b/test/box/net.box.result
index e3dabf7d9..11914dd68 100644
--- a/test/box/net.box.result
+++ b/test/box/net.box.result
@@ -3927,6 +3927,36 @@ test_run:grep_log('default', '00000040:.*')
 ---
 - null
 ...
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' or
+       err == 'getaddrinfo: hostname nor servname provided, or not known' or
+       err == 'getaddrinfo: temporary failure in name resolution' then
+            return true
+    end
+    return false
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+s = remote.connect('non_exists_hostname:3301')
+---
+...
+check_err(s['error'])
+---
+- true
+...
 box.cfg{log_level=log_level}
 ---
 ...
diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
index 8e65ff470..79707eaa4 100644
--- a/test/box/net.box.test.lua
+++ b/test/box/net.box.test.lua
@@ -1582,4 +1582,23 @@ test_run:wait_log('default', '00000030:.*', nil, 10)
 -- we expect nothing below, so don't wait
 test_run:grep_log('default', '00000040:.*')
 
+-- gh-4138 Check getaddrinfo() error from connect() only. Error
+-- code and error message returned by getaddrinfo() depends on
+-- system's gai_strerror().
+test_run:cmd("setopt delimiter ';'")
+function check_err(err)
+    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
+       err == 'getaddrinfo: Servname not supported for ai_socktype' or
+       err == 'getaddrinfo: Name or service not known' or
+       err == 'getaddrinfo: hostname nor servname provided, or not known' or
+       err == 'getaddrinfo: temporary failure in name resolution' then
+            return true
+    end
+    return false
+end;
+test_run:cmd("setopt delimiter ''");
+
+s = remote.connect('non_exists_hostname:3301')
+check_err(s['error'])
+
 box.cfg{log_level=log_level}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Tarantool-patches] Fwd: [tarantool-patches] [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
  2019-12-26 17:29                                   ` Roman Khabibov
@ 2020-02-18 13:55                                     ` Roman Khabibov
  0 siblings, 0 replies; 20+ messages in thread
From: Roman Khabibov @ 2020-02-18 13:55 UTC (permalink / raw)
  To: Sergey Ostanevich; +Cc: tarantool-patches

[-- Attachment #1: Type: text/plain, Size: 17424 bytes --]



> Begin forwarded message:
> 
> From: Roman Khabibov <roman.habibov@tarantool.org>
> Subject: Re: [tarantool-patches] [server-dev] Re: Re: [PATCH v2 1/2] lua: return getaddrinfo() errors
> Date: December 26, 2019 at 20:29:16 GMT+3
> To: Sergey Ostanevich <sergos@tarantool.org>
> Cc: tarantool-patches@dev.tarantool.org, Alexander Turenko <alexander.turenko@tarantool.org>
> 
> Hi! Alexander, thanks for the review. Sergos, can you, please, do a second review?
> 
> https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4138-getaddrinfo-full-ci <https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4138-getaddrinfo-full-ci>
> 
>> On Dec 23, 2019, at 16:36, Alexander Turenko <alexander.turenko@tarantool.org> wrote:
>> 
>> Comments below are trivial. Let's fix and proceed with the next reviewer
>> (Sergey O.). Please, resend the patchset for him.
>> 
>> I still think you should run CI on all targets (push to ...-full-ci
>> branch).
>> 
>> LGTM except minor comments below.
>> 
>> WBR, Alexander Turenko.
>> 
>>> @@ -1360,8 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
>>>    -- This function is broken by design
>>>    local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>>>    local timeout = deadline - fiber.clock()
>>> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
>>> -    if dns == nil or #dns == 0 then
>>> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
>>> +    if dns == nil then
>>> +        self._errno = boxerrno()
>> 
>> Master's socket _errno was set to EINVAL before this patch. Let's keep
>> this behaviour, but change the 2nd return value.
>> 
>> I mean:
>> 
>> | if dns == nil then
>> |     self._errno = boxerrno.EINVAL
>> |     return nil, err
>> | end
>> | if #dns == 0 then
>> |     self._errno = boxerrno.EINVAL
>> |     return nil, socket_error(self)
>> | end
> @@ -1360,8 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
>     -- This function is broken by design
>     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>     local timeout = deadline - fiber.clock()
> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
> -    if dns == nil or #dns == 0 then
> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
> +    if dns == nil then
> +        self._errno = boxerrno.EINVAL
> +        return nil, err
> +    end
> +    if #dns == 0 then
>         self._errno = boxerrno.EINVAL
>         return nil, socket_error(self)
>     end
> 
>>> +        return nil, err
>>> +    end
>>> +    if #dns == 0 then
>>>        self._errno = boxerrno.EINVAL
>>>        return nil, socket_error(self)
>>>    end
>> 
>>> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
>>> index c72d41763..7086e496b 100644
>>> --- a/test/app/socket.test.lua
>>> +++ b/test/app/socket.test.lua
>>> @@ -960,6 +968,27 @@ server:close()
>>> 
>>> test_run:cmd("clear filter")
>>> 
>>> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
>>> +-- Error code and error message returned by getaddrinfo() depends
>>> +-- on system's gai_strerror().
>>> +test_run:cmd("setopt delimiter ';'")
>>> +function check_err(err)
>>> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
>>> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
>>> +       err == 'getaddrinfo: Name or service not known' then
>>> +        return true
>>> +    end
>>> +    return false
>>> +end;
>> 
>> You added the following error message to coio_getaddrinfo() unit test in
>> the first commit:
>> 
>> | const char *exp_errmsg_4 = "getaddrinfo: hostname nor servname provided"
>> |         ", or not known";
>> 
>> So it worth to add it here too. We'll enable the test of FreeBSD sooner
>> or later.
>> 
>> Also I got EAI_AGAIN on my system now (maybe something was changed on
>> DNS server side). Let's add it too (for both commits).
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' or
> +       err == 'getaddrinfo: hostname nor servname provided, or not known' or
> +       err == 'getaddrinfo: temporary failure in name resolution' then
> +        return true
> +    end
> +    return false
> +end;
> 
>>> diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
>>> index 8e65ff470..9c9aee1ad 100644
>>> --- a/test/box/net.box.test.lua
>>> +++ b/test/box/net.box.test.lua
>>> @@ -1582,4 +1582,21 @@ test_run:wait_log('default', '00000030:.*', nil, 10)
>>> -- we expect nothing below, so don't wait
>>> test_run:grep_log('default', '00000040:.*')
>>> 
>>> +-- gh-4138 Check getaddrinfo() error from connect() only. Error
>>> +-- code and error message returned by getaddrinfo() depends on
>>> +-- system's gai_strerror().
>>> +test_run:cmd("setopt delimiter ';'")
>>> +function check_err(err)
>>> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
>>> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
>>> +       err == 'getaddrinfo: Name or service not known' then
>>> +            return true
>>> +    end
>>> +    return false
>>> +end;
>>> +test_run:cmd("setopt delimiter ''");
>> 
>> Same here.
> Done.
> 
> 
> commit 22937d2fce2d127a762a32e392d2c4d525c22c0b
> Author: Roman Khabibov <roman.habibov@tarantool.org <mailto:roman.habibov@tarantool.org>>
> Date:   Thu Nov 21 14:37:54 2019 +0300
> 
>    lua: return getaddrinfo() errors
> 
>    Add getaddrinfo() errors into the several fuctions of socket. Now
>    getaddrinfo() can return a pair of values (nil and error message)
>    in case of error.
> 
>    Closes #4138
> 
>    @TarantoolBot document
>    Title: socket API changes
> 
>    * socket.getaddrinfo()
> 
>    Can return error message as second return value in case of
>    internal getaddrinfo() error.
> 
>    Example:
>    tarantool> socket.getaddrinfo('non_exists_hostname', 3301)
>    ---
>    - null
>    - 'getaddrinfo: nodename nor servname provided, or not known'
>    ...
> 
>    * socket.tcp_connect()
> 
>    Can return socket.getaddrinfo() error message as second return
>    value.
> 
>    Example:
>    tarantool> socket.tcp_connect('non_exists_hostname', 3301)
>    ---
>    - null
>    - 'getaddrinfo: nodename nor servname provided, or not known'
>    ...
> 
> diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
> index c2e1bb9c4..2fb3e6df6 100644
> --- a/src/box/lua/net_box.lua
> +++ b/src/box/lua/net_box.lua
> @@ -150,9 +150,12 @@ local function next_id(id) return band(id + 1, 0x7FFFFFFF) end
> local function establish_connection(host, port, timeout)
>     local timeout = timeout or DEFAULT_CONNECT_TIMEOUT
>     local begin = fiber.clock()
> -    local s = socket.tcp_connect(host, port, timeout)
> +    local s, err = socket.tcp_connect(host, port, timeout)
>     if not s then
> -        return nil, errno.strerror(errno())
> +        if not err then
> +            return nil, errno.strerror(errno())
> +        end
> +        return nil, err
>     end
>     local msg = s:read({chunk = IPROTO_GREETING_SIZE},
>                         timeout - (fiber.clock() - begin))
> diff --git a/src/lua/socket.c b/src/lua/socket.c
> index 130378caf..dbade2d27 100644
> --- a/src/lua/socket.c
> +++ b/src/lua/socket.c
> @@ -774,6 +774,18 @@ lbox_getaddrinfo_result_wrapper(struct lua_State *L)
> 	return 1;
> }
> 
> +/**
> + * Wrap coio_getaddrinfo() and call it. Push returned values onto
> + * @a L Lua stack.
> + *
> + * @param L Lua stack.
> + *
> + * @retval 1 Number of returned values by Lua function if
> + * coio_getaddrinfo() success.
> + * @retval 2 Number of returned values by Lua function if
> + * coio_getaddrinfo() is failed (first is nil, second is error
> + * message).
> + */
> static int
> lbox_socket_getaddrinfo(struct lua_State *L)
> {
> @@ -816,7 +828,9 @@ lbox_socket_getaddrinfo(struct lua_State *L)
> 
> 	if (dns_res != 0) {
> 		lua_pushnil(L);
> -		return 1;
> +		struct error *err = diag_get()->last;
> +		lua_pushstring(L, err->errmsg);
> +		return 2;
> 	}
> 
> 	/* no results */
> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
> index a334ad45b..ce6dab301 100644
> --- a/src/lua/socket.lua
> +++ b/src/lua/socket.lua
> @@ -1041,9 +1041,12 @@ local function tcp_connect(host, port, timeout)
>     end
>     local timeout = timeout or TIMEOUT_INFINITY
>     local stop = fiber.clock() + timeout
> -    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
> +    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>         protocol = 'tcp' })
> -    if dns == nil or #dns == 0 then
> +    if dns == nil then
> +        return nil, err
> +    end
> +    if #dns == 0 then
>         boxerrno(boxerrno.EINVAL)
>         return nil
>     end
> @@ -1360,8 +1363,12 @@ local function lsocket_tcp_connect(self, host, port)
>     -- This function is broken by design
>     local ga_opts = { family = 'AF_INET', type = 'SOCK_STREAM' }
>     local timeout = deadline - fiber.clock()
> -    local dns = getaddrinfo(host, port, timeout, ga_opts)
> -    if dns == nil or #dns == 0 then
> +    local dns, err = getaddrinfo(host, port, timeout, ga_opts)
> +    if dns == nil then
> +        self._errno = boxerrno.EINVAL
> +        return nil, err
> +    end
> +    if #dns == 0 then
>         self._errno = boxerrno.EINVAL
>         return nil, socket_error(self)
>     end
> @@ -1547,9 +1554,12 @@ local function lsocket_connect(host, port)
>     if host == nil or port == nil then
>         error("Usage: luasocket.connect(host, port)")
>     end
> -    local s = tcp_connect(host, port)
> +    local s, err = tcp_connect(host, port)
>     if not s then
> -        return nil, boxerrno.strerror()
> +        if not err then
> +            return nil, boxerrno.strerror()
> +        end
> +        return nil, err
>     end
>     setmetatable(s, lsocket_tcp_client_mt)
>     return s
> diff --git a/test/app/socket.result b/test/app/socket.result
> index fd299424c..aca189f4c 100644
> --- a/test/app/socket.result
> +++ b/test/app/socket.result
> @@ -941,10 +941,20 @@ sc:close()
> - true
> ...
> -- tcp_connect
> --- test timeout
> -socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
> +-- Test timeout. In this test, tcp_connect can return the second
> +-- output value from internal.getaddrinfo (usually on Mac OS, but
> +-- theoretically it can happen on Linux too). Sometimes
> +-- getaddrinfo() is timed out, sometimes connect. On Linux however
> +-- getaddrinfo is fast enough to never give timeout error in
> +-- the case. So, there are two sources of timeout errors that are
> +-- reported differently. This difference has appeared after
> +-- gh-4138 patch.
> +s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
> ---
> -- null
> +...
> +s == nil
> +---
> +- true
> ...
> -- AF_INET
> s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
> @@ -1606,7 +1616,7 @@ fio.stat(path) == nil
> { socket.tcp_connect('abrakadabra#123') == nil, errno.strerror() }
> ---
> - - true
> -  - Invalid argument
> +  - Input/output error
> ...
> -- wrong options for getaddrinfo
> socket.getaddrinfo('host', 'port', { type = 'WRONG' }) == nil and errno() == errno.EINVAL
> @@ -2816,6 +2826,50 @@ test_run:cmd("clear filter")
> ---
> - true
> ...
> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
> +-- Error code and error message returned by getaddrinfo() depends
> +-- on system's gai_strerror().
> +test_run:cmd("setopt delimiter ';'")
> +---
> +- true
> +...
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' or
> +       err == 'getaddrinfo: hostname nor servname provided, or not known' or
> +       err == 'getaddrinfo: temporary failure in name resolution' then
> +        return true
> +    end
> +    return false
> +end;
> +---
> +...
> +test_run:cmd("setopt delimiter ''");
> +---
> +- true
> +...
> +s, err = socket.getaddrinfo('non_exists_hostname', 3301)
> +---
> +...
> +check_err(err)
> +---
> +- true
> +...
> +s, err = socket.connect('non_exists_hostname', 3301)
> +---
> +...
> +check_err(err)
> +---
> +- true
> +...
> +s, err = socket.tcp_connect('non_exists_hostname', 3301)
> +---
> +...
> +check_err(err)
> +---
> +- true
> +...
> -- case: sicket receive inconsistent behavior
> chan = fiber.channel()
> ---
> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
> index c72d41763..223935e66 100644
> --- a/test/app/socket.test.lua
> +++ b/test/app/socket.test.lua
> @@ -300,8 +300,16 @@ sc:close()
> 
> -- tcp_connect
> 
> --- test timeout
> -socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
> +-- Test timeout. In this test, tcp_connect can return the second
> +-- output value from internal.getaddrinfo (usually on Mac OS, but
> +-- theoretically it can happen on Linux too). Sometimes
> +-- getaddrinfo() is timed out, sometimes connect. On Linux however
> +-- getaddrinfo is fast enough to never give timeout error in
> +-- the case. So, there are two sources of timeout errors that are
> +-- reported differently. This difference has appeared after
> +-- gh-4138 patch.
> +s, err = socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
> +s == nil
> 
> -- AF_INET
> s = socket('AF_INET', 'SOCK_STREAM', 'tcp')
> @@ -960,6 +968,29 @@ server:close()
> 
> test_run:cmd("clear filter")
> 
> +-- gh-4138 Check getaddrinfo() error from socket:connect() only.
> +-- Error code and error message returned by getaddrinfo() depends
> +-- on system's gai_strerror().
> +test_run:cmd("setopt delimiter ';'")
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' or
> +       err == 'getaddrinfo: hostname nor servname provided, or not known' or
> +       err == 'getaddrinfo: temporary failure in name resolution' then
> +        return true
> +    end
> +    return false
> +end;
> +test_run:cmd("setopt delimiter ''");
> +
> +s, err = socket.getaddrinfo('non_exists_hostname', 3301)
> +check_err(err)
> +s, err = socket.connect('non_exists_hostname', 3301)
> +check_err(err)
> +s, err = socket.tcp_connect('non_exists_hostname', 3301)
> +check_err(err)
> +
> -- case: sicket receive inconsistent behavior
> chan = fiber.channel()
> counter = 0
> diff --git a/test/box/net.box.result b/test/box/net.box.result
> index e3dabf7d9..11914dd68 100644
> --- a/test/box/net.box.result
> +++ b/test/box/net.box.result
> @@ -3927,6 +3927,36 @@ test_run:grep_log('default', '00000040:.*')
> ---
> - null
> ...
> +-- gh-4138 Check getaddrinfo() error from connect() only. Error
> +-- code and error message returned by getaddrinfo() depends on
> +-- system's gai_strerror().
> +test_run:cmd("setopt delimiter ';'")
> +---
> +- true
> +...
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' or
> +       err == 'getaddrinfo: hostname nor servname provided, or not known' or
> +       err == 'getaddrinfo: temporary failure in name resolution' then
> +            return true
> +    end
> +    return false
> +end;
> +---
> +...
> +test_run:cmd("setopt delimiter ''");
> +---
> +- true
> +...
> +s = remote.connect('non_exists_hostname:3301')
> +---
> +...
> +check_err(s['error'])
> +---
> +- true
> +...
> box.cfg{log_level=log_level}
> ---
> ...
> diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
> index 8e65ff470..79707eaa4 100644
> --- a/test/box/net.box.test.lua
> +++ b/test/box/net.box.test.lua
> @@ -1582,4 +1582,23 @@ test_run:wait_log('default', '00000030:.*', nil, 10)
> -- we expect nothing below, so don't wait
> test_run:grep_log('default', '00000040:.*')
> 
> +-- gh-4138 Check getaddrinfo() error from connect() only. Error
> +-- code and error message returned by getaddrinfo() depends on
> +-- system's gai_strerror().
> +test_run:cmd("setopt delimiter ';'")
> +function check_err(err)
> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
> +       err == 'getaddrinfo: Name or service not known' or
> +       err == 'getaddrinfo: hostname nor servname provided, or not known' or
> +       err == 'getaddrinfo: temporary failure in name resolution' then
> +            return true
> +    end
> +    return false
> +end;
> +test_run:cmd("setopt delimiter ''");
> +
> +s = remote.connect('non_exists_hostname:3301')
> +check_err(s['error'])
> +
> box.cfg{log_level=log_level}


[-- Attachment #2: Type: text/html, Size: 301598 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2020-02-18 13:55 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-15 16:58 [tarantool-patches] [PATCH] lua: return getaddrinfo() errors Roman Khabibov
2019-06-23 20:31 ` [tarantool-patches] " Alexander Turenko
2019-06-25 13:38   ` [tarantool-patches] Re: [PATCH v2 1/2] " Roman Khabibov
2019-07-09  8:04     ` Alexander Turenko
2019-07-10  2:16       ` Roman Khabibov
2019-07-23 12:39         ` Alexander Turenko
2019-07-26 13:48           ` Alexander Turenko
2019-08-05 13:36           ` Roman Khabibov
2019-08-29  0:45             ` Alexander Turenko
     [not found]               ` <868EAF2C-A491-46C9-AD37-7512D6CAB213@tarantool.org>
2019-09-06 13:45                 ` [tarantool-patches] Re: [server-dev] " Alexander Turenko
2019-09-10 12:54                   ` Roman Khabibov
2019-11-01 14:39                     ` [Tarantool-patches] [server-dev] Re: [tarantool-patches] " Alexander Turenko
2019-11-21 17:27                       ` [Tarantool-patches] [tarantool-patches] [server-dev] " Roman Khabibov
2019-12-08 19:48                         ` Alexander Turenko
2019-12-10 16:25                           ` Roman Khabibov
2019-12-18 14:58                             ` Alexander Turenko
2019-12-21 17:50                               ` Roman Khabibov
2019-12-23 13:36                                 ` Alexander Turenko
2019-12-26 17:29                                   ` Roman Khabibov
2020-02-18 13:55                                     ` [Tarantool-patches] Fwd: " Roman Khabibov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox