[tarantool-patches] Re: [PATCH v2 1/2] lua: return getaddrinfo() errors

Roman Khabibov roman.habibov at tarantool.org
Tue Jun 25 16:38:34 MSK 2019



> On Jun 23, 2019, at 11:31 PM, Alexander Turenko <alexander.turenko at tarantool.org> wrote:
> 
> Re C part of the commit (src/lib/core/coio_task.c).
> 
> The origin of the (rc < 0) check is the following commit (marked
> relevant phrase with !!):
> 
> commit ea1da04d5add51c308efb3fd2d71cdfabed8411c
> Author: Roman Tsisyk <roman at tsisyk.com>
> Date:   Mon Dec 5 20:28:59 2016 +0300
> 
>    Refactor coio_task
> 
>    * Add coio_task_create() and coio_task_destroy()
>    * Rename coio_task() to coio_task_post()
>    * Fix the core invariant of coio_task_post():
>      - On timeout or when fiber was cancelled set diag, return -1 and
>        guarantee that on_timeout will be called somewhen.
>      - Otherwise don't touch diag, don't call on_timeout callback,
>        return 0. Please check task->base.result and task->diag
>        to get the original return code and diag from the callback.
>    * Change the return value of coio_task_post() to "int".
>    * Add diag to coio_getaddrinfo() and fix a possible bug in replication;
> !!    ignore uncoventional getaddrinfo(3) error codes for now.
>    * Fix buggy box.snapshot() tests.
> 
>    Needed for #1954
> 
> I don't find any reason to ignoring 'unconventional' errors.
> 
> getaddrinfo() returns negative values on Linux, however it is not so on
> Mac OS. Values and corresponding messages can be checked like so:
> 
> $ gcc -Wall -Wextra -x c <(echo -e '#include <sys/types.h>\n#include <sys/socket.h>\n#include <netdb.h>\n#include <stdio.h>\nint main() { printf("%d\\n", EAI_NONAME); return 0; }') && ./a.out; rm a.out
> $ gcc -Wall -Wextra -x c <(echo -e '#include <sys/types.h>\n#include <sys/socket.h>\n#include <netdb.h>\n#include <stdio.h>\nint main() { printf("%s\\n", gai_strerror(EAI_NONAME)); return 0; }') && ./a.out; rm a.out
> 
> The commit message should mention Roman's commit and cleanly state that
> (rc < 0) assumption does not work on Mac OS.
> 
> Aside of that there is nothing bad in calling gai_strerror() with some
> unusual value, it just return "Unknown error" (I checked both Linux and
> Mac OS).
> 
> I would add relevant test case for coio_getaddrinfo() into
> test/unit/coio.cc.
@@ -72,7 +72,7 @@ static void
 test_getaddrinfo(void)
 {
 	header();
-	plan(1);
+	plan(3);
 	const char *host = "127.0.0.1";
 	const char *port = "3333";
 	struct addrinfo *i;
@@ -81,6 +81,11 @@ test_getaddrinfo(void)
 	is(rc, 0, "getaddrinfo");
 	freeaddrinfo(i);
 
+	/* gh-4138: Check getaddrinfo() error. */
+	isnt(coio_getaddrinfo("hostname", port, NULL, &i, 1), 0, "getaddrinfo error");
+	isnt(strstr(diag_get()->last->errmsg, "getaddrinfo"), NULL,
+	     "getaddrinfo error");
+
 	/*
 	 * gh-4209: 0 timeout should not be a special value and
 	 * detach a task. Before a fix it led to segfault

> Consider comments re Lua part of the commit below.
> 
>> Before this patch, branch when coio_getaddrinfo() returns
>> getaddrinfo() errors has never reached. Add this errors into the
>> socket and thenet.box modules.
> 
> Typo: thenet.box -> net.box.
Before this patch, branch when getaddrinfo() returns error codes
couldn't be reached on Mac OS, because they are greater than 0 on
Mac OS (checking for errors was rc < 0). Add this errors into the
socket and the net.box modules. Now getaddrinfo() can return a
pair of values (nil and error message) in case of error.

> 
> Aside that I would add more investigation results and contracts change
> (see other comments in this email).
> 
>> 
>> Closes #4138
>> ---
>> 
>> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-4138-getaddrinfo
>> Issue: https://github.com/tarantool/tarantool/issues/4138
> 
>> diff --git a/src/lib/core/coio_task.c b/src/lib/core/coio_task.c
>> index 908b336ed..83f669d05 100644
>> --- a/src/lib/core/coio_task.c
>> +++ b/src/lib/core/coio_task.c
>> @@ -413,7 +413,7 @@ coio_getaddrinfo(const char *host, const char *port,
>> 		return -1; /* timed out or cancelled */
>> 
>> 	/* Task finished */
>> -	if (task->rc < 0) {
>> +	if (task->rc != 0) {
> 
> See comments at the start of the email.
> 
>> diff --git a/src/lua/socket.c b/src/lua/socket.c
>> index 130378caf..5a8469ddf 100644
>> --- a/src/lua/socket.c
>> +++ b/src/lua/socket.c
>> @@ -54,6 +54,7 @@
>> #include <fiber.h>
>> #include "lua/utils.h"
>> #include "lua/fiber.h"
>> +#include "reflection.h"
> 
> It is only needed to deference 'err->type', so I would remove it with
> this check.
@@ -816,7 +829,9 @@ lbox_socket_getaddrinfo(struct lua_State *L)
 
 	if (dns_res != 0) {
 		lua_pushnil(L);
-		return 1;
+		struct error *err = diag_get()->last;
+		lua_pushstring(L, err->errmsg);
+		return 2;
 	}

>> 
>> extern int coio_wait(int fd, int event, double timeout);
>> 
>> @@ -816,6 +817,11 @@ lbox_socket_getaddrinfo(struct lua_State *L)
>> 
>> 	if (dns_res != 0) {
>> 		lua_pushnil(L);
>> +		struct error *err = diag_get()->last;
>> +		if (strcmp(err->type->name, "SystemError") == 0) {
> 
> I don't think that we should check a type here. Why not report any error
> as is?
> 
> After that the convention for lbox_socket_getaddrinfo() will be simple:
> it returns a table of results when successul; `nil`, err_msg otherwise.
> 
> Now it can return just `nil` or `nil`, err_msg -- the contract is more
> complex.
> 
> Lua's code will be simpler if we'll simplify this contract.
> 
> I suggest to add a comment (in free form, but other reviewers can
> enforce specific format) to the function that shows it contract
> explicitly, like so: return <...> at success, otherwise return <…>.
@@ -774,6 +774,19 @@ lbox_getaddrinfo_result_wrapper(struct lua_State *L)
 	return 1;
 }
 
+/**
+ * Wrap coio_getaddrinfo() and call it. Push returned values onto
+ * @a L Lua stack.
+ *
+ * @param L Lua stack.
+ *
+ * @retval 1 Number of returned values by Lua function if
+ * coio_getaddrinfo() success.
+ * @retval 2 Number of returned values by Lua function if
+ * coio_getaddrinfo() is failed (fist is nil, second is error
+ * message).
+ * @retval One of luaT_error() codes.
+ */
 static int
 lbox_socket_getaddrinfo(struct lua_State *L)
 {

>> diff --git a/src/lua/socket.lua b/src/lua/socket.lua
>> index 2dba0a8d2..f0b432925 100644
>> --- a/src/lua/socket.lua
>> +++ b/src/lua/socket.lua
>> @@ -1028,11 +1028,14 @@ local function tcp_connect(host, port, timeout)
>>     end
>>     local timeout = timeout or TIMEOUT_INFINITY
>>     local stop = fiber.clock() + timeout
>> -    local dns = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>> +    local dns, err = getaddrinfo(host, port, timeout, { type = 'SOCK_STREAM',
>>         protocol = 'tcp' })
>>     if dns == nil or #dns == 0 then
>> -        boxerrno(boxerrno.EINVAL)
>> -        return nil
>> +        if not err then
>> +            boxerrno(boxerrno.EINVAL)
>> +            return nil
>> +        end
>> +        return nil, err
> 
> Here we change the contract of socket.getaddrinfo() -- the user visible
> function. I would mention that in the commit message.
Added.

>> diff --git a/test/app/socket.test.lua b/test/app/socket.test.lua
>> index dab168f90..140baf22f 100644
>> --- a/test/app/socket.test.lua
>> +++ b/test/app/socket.test.lua
>> @@ -301,7 +301,7 @@ sc:close()
>> -- tcp_connect
>> 
>> -- test timeout
>> -socket.tcp_connect('127.0.0.1', 80, 0.00000000001)
>> +socket.tcp_connect('127.0.0.1', 80, 0.00000000000001)
> 
> It seems this change is not related to your commit.
Removed.

>> +--gh-4138 Check getaddrinfo() error.
>> +
>> +test_run:cmd("setopt delimiter ';'")
>> +function check_err(err)
>> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
>> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
>> +       err == 'getaddrinfo: Name or service not known' then
>> +            return true
>> +    end
>> +    return false
>> +end;
> 
> Flush the delimiter to '' here.
> 
> Please, comment (outside of semicolon-delimiter block) that these
> messages corresponds to the following codes:
> 
> * EAI_NONAME (Mac OS)
> * EAI_SERVICE (Linux and Mac OS)
> * EAI_NONAME (Linux)
> 
> It seems that you reveice either EAI_NONAME or EAI_SERVICE, not? Please,
> elaborate. I guess it worth to see which certain error is returned by
> getaddrinfo() here and check only for it (but consider difference
> between Linux and Mac OS gai_strerror() messages if it is EAI_NONAME).
> 
>> +
>> +s, err = socket:connect('hostname:3301');
> 
> I would use less generic hostname like 'non_exists_hostname’.
@@ -982,4 +982,14 @@ fiber.cancel(echo_fiber)
 client:read(1, 5) == ''
 server:close()
 
+-- gh-4138 Check getaddrinfo() error. Error code and error message
+-- returned by getaddrinfo() depends on system's gai_strerror()
+-- and compiler. So that there is no checking for certain error
+-- message.
+
+s, err = socket:connect('non_exists_hostname:3301')
+string.find(err, 'getaddrinfo') ~= nil
+s, err = socket:bind('non_exists_hostname:3301')
+string.find(err, 'getaddrinfo') ~= nil
+

>> diff --git a/test/box/net.box.test.lua b/test/box/net.box.test.lua
>> index bea43002d..13ed4e200 100644
>> --- a/test/box/net.box.test.lua
>> +++ b/test/box/net.box.test.lua
>> @@ -1538,4 +1538,19 @@ test_run:grep_log('default', '00000020:.*')
>> test_run:grep_log('default', '00000030:.*')
>> test_run:grep_log('default', '00000040:.*')
>> 
>> +--gh-4138 Check getaddrinfo() error.
>> +test_run:cmd("setopt delimiter ';'")
>> +
>> +function check_err(err)
>> +    if err == 'getaddrinfo: nodename nor servname provided, or not known' or
>> +       err == 'getaddrinfo: Servname not supported for ai_socktype' or
>> +       err == 'getaddrinfo: Name or service not known' then
>> +            return true
>> +    end
>> +    return false
>> +end;
> 
> The same comments as above are applicable for this test case too.
@@ -1538,4 +1538,12 @@ test_run:grep_log('default', '00000020:.*')
 test_run:grep_log('default', '00000030:.*')
 test_run:grep_log('default', '00000040:.*')
 
+-- gh-4138 Check getaddrinfo() error. Error code and error message
+-- returned by getaddrinfo() depends on system's gai_strerror()
+-- and compiler. So that there is no checking for certain error
+-- message.
+
+s = remote.connect('non_exists_hostname:3301')
+string.find(s['error'], 'getaddrinfo') ~= nil
+
 box.cfg{log_level=log_level}



More information about the Tarantool-patches mailing list