[Tarantool-patches] [PATCH v1] test: flaky box/net.box_wait_connected_gh-3856

Alexander Turenko alexander.turenko at tarantool.org
Thu Jun 18 21:25:13 MSK 2020


The reason of the fail is that getaddrinfo() returns EIA_SERVICE for an
incorrect TCP/IP port on FreeBSD, but crops it as modulo of 65536 on
Linux/glibc. You may check it youself:

 | /* cc getaddrinfo.c -o getaddrinfo */
 |
 | #include <sys/types.h>
 | #include <sys/socket.h>
 | #include <netdb.h>
 | #include <netinet/in.h>
 | #include <stdio.h>
 | #include <stdlib.h>
 | #include <string.h>
 | #include <errno.h>
 |
 | const char *
 | family_str(int family)
 | {
 | 	if (family == AF_INET)
 | 		return "AF_INET";
 | 	if (family == AF_INET6)
 | 		return "AF_INET6";
 | 	return "?";
 | }
 |
 | const char *
 | socktype_str(int socktype)
 | {
 | 	if (socktype == SOCK_DGRAM)
 | 		return "SOCK_DGRAM";
 | 	if (socktype == SOCK_STREAM)
 | 		return "SOCK_STREAM";
 | 	if (socktype == SOCK_RAW)
 | 		return "SOCK_RAW";
 | 	return "?";
 | }
 |
 | const char *
 | protocol_str(int protocol)
 | {
 | 	if (protocol == IPPROTO_TCP)
 | 		return "IPPROTO_TCP";
 | 	if (protocol == IPPROTO_UDP)
 | 		return "IPPROTO_UDP";
 | 	return "?";
 | }
 |
 | int
 | main(int argc, char **argv)
 | {
 | 	static char host[1024];
 | 	static char serv[1024];
 |
 | 	struct addrinfo hints;
 | 	memset(&hints, (char) 0, sizeof(hints));
 | 	hints.ai_family = AF_UNSPEC;
 | 	hints.ai_socktype = SOCK_STREAM;
 |
 | 	if (argc != 3) {
 | 		fprintf(stderr, "Usage: %s host port\n", argv[0]);
 | 		return 1;
 | 	}
 |
 | 	struct addrinfo *addrs;
 | 	int rc = getaddrinfo(argv[1], argv[2], &hints, &addrs);
 | 	if (rc != 0) {
 | 		fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(rc));
 | 		exit(1);
 | 	}
 |
 | 	int flags = NI_NUMERICHOST | NI_NUMERICSERV;
 | 	struct addrinfo *addr;
 | 	for (addr = addrs; addr != NULL; addr = addr->ai_next) {
 | 		int rc = getnameinfo(addr->ai_addr, addr->ai_addrlen,
 | 				     host, 1024, serv, 1024, flags);
 | 		if (rc != 0) {
 | 			fprintf(stderr, "getnameinfo error\n");
 | 			exit(1);
 | 		}
 | 		printf("----\n");
 | 		printf("family: %s\n", family_str(addr->ai_family));
 | 		printf("socktype: %s\n", socktype_str(addr->ai_socktype));
 | 		printf("protocol: %s\n", protocol_str(addr->ai_protocol));
 | 		printf("host: %s\n", host);
 | 		printf("serv: %s\n", serv);
 |
 | #if 0
 | 		printf("Connecting...\n");
 | 		int fd = socket(addr->ai_family, addr->ai_socktype, 0);
 | 		if (connect(fd, addr->ai_addr, addr->ai_addrlen) == -1) {
 | 			fprintf(stderr, "connect errno: %d\n", errno);
 | 			perror("connect");
 | 		} else {
 | 			printf("connected successfully\n");
 | 		}
 | #endif
 | 	}
 |
 | 	freeaddrinfo(addrs);
 |
 | 	return 0;
 | }

(Linux/glibc) $ ./getaddrinfo 8.8.8.8 123456
----
family: AF_INET
socktype: SOCK_STREAM
protocol: IPPROTO_TCP
host: 8.8.8.8
serv: 57920

(FreeBSD) $ ./getaddrinfo 8.8.8.8 123456
getaddrinfo: Service was not recognized for socket type

So obvious fix would be change 123456 to something less or equal to
65535. Say, 1234.

> diff --git a/test/box/net.box_wait_connected_gh-3856.test.lua b/test/box/net.box_wait_connected_gh-3856.test.lua
> index 29e997fb5..d9fa80f3f 100644
> --- a/test/box/net.box_wait_connected_gh-3856.test.lua
> +++ b/test/box/net.box_wait_connected_gh-3856.test.lua
> @@ -1,8 +1,12 @@
>  net = require('net.box')
> +test_run = require('test_run').new()
>  
>  --
>  -- gh-3856: wait_connected = false is ignored.
> +-- Test uses Google DNS IP for testing:
> +-- https://developers.google.com/speed/public-dns/docs/using
>  --
>  c = net.connect('8.8.8.8:123456', {wait_connected = false})
> +test_run:wait_cond(function() return c.state == 'initial' end)
>  c
>  c:close()

It should not work and does not. I checked it with the following command
on a FreeBSD virtual machine:

$ ( cd test && ./test-run.py -j 20 `yes box/net.box_wait_connected_gh-3856.test.lua | head -n 1000` )

The 123456 -> 1234 change, however, passes.

The test still depend on an order in which fibers will be scheduled
(net_box.connect() creates a separate fiber for connecting in background
using fiber.create(), which yields). Unlikely our fiber will not get
execution time during the connection attempt, so it is more like a
formal thing.

But we can decrease probability of this situation even more if we'll
grab all connection fields just when net_box.connect() returns, not
after yield in console (which is due to waiting a next command from
test-run).

Consider this way:

 | $ cat test/box/net.box_wait_connected_gh-3856.test.lua
 | net = require('net.box')
 |
 | --
 | -- gh-3856: wait_connected = false is ignored.
 | --
 | do                                                            \
 |     c = net.connect('8.8.8.8:1234', {wait_connected = false}) \
 |     return c.state                                            \
 | end
 | c:close()

CCed Vlad as author of the test case.

WBR, Alexander Turenko.


More information about the Tarantool-patches mailing list