[Tarantool-patches] [PATCH v1] test: flaky box/net.box_wait_connected_gh-3856
Alexander Turenko
alexander.turenko at tarantool.org
Thu Jun 18 21:25:13 MSK 2020
The reason of the fail is that getaddrinfo() returns EIA_SERVICE for an
incorrect TCP/IP port on FreeBSD, but crops it as modulo of 65536 on
Linux/glibc. You may check it youself:
| /* cc getaddrinfo.c -o getaddrinfo */
|
| #include <sys/types.h>
| #include <sys/socket.h>
| #include <netdb.h>
| #include <netinet/in.h>
| #include <stdio.h>
| #include <stdlib.h>
| #include <string.h>
| #include <errno.h>
|
| const char *
| family_str(int family)
| {
| if (family == AF_INET)
| return "AF_INET";
| if (family == AF_INET6)
| return "AF_INET6";
| return "?";
| }
|
| const char *
| socktype_str(int socktype)
| {
| if (socktype == SOCK_DGRAM)
| return "SOCK_DGRAM";
| if (socktype == SOCK_STREAM)
| return "SOCK_STREAM";
| if (socktype == SOCK_RAW)
| return "SOCK_RAW";
| return "?";
| }
|
| const char *
| protocol_str(int protocol)
| {
| if (protocol == IPPROTO_TCP)
| return "IPPROTO_TCP";
| if (protocol == IPPROTO_UDP)
| return "IPPROTO_UDP";
| return "?";
| }
|
| int
| main(int argc, char **argv)
| {
| static char host[1024];
| static char serv[1024];
|
| struct addrinfo hints;
| memset(&hints, (char) 0, sizeof(hints));
| hints.ai_family = AF_UNSPEC;
| hints.ai_socktype = SOCK_STREAM;
|
| if (argc != 3) {
| fprintf(stderr, "Usage: %s host port\n", argv[0]);
| return 1;
| }
|
| struct addrinfo *addrs;
| int rc = getaddrinfo(argv[1], argv[2], &hints, &addrs);
| if (rc != 0) {
| fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(rc));
| exit(1);
| }
|
| int flags = NI_NUMERICHOST | NI_NUMERICSERV;
| struct addrinfo *addr;
| for (addr = addrs; addr != NULL; addr = addr->ai_next) {
| int rc = getnameinfo(addr->ai_addr, addr->ai_addrlen,
| host, 1024, serv, 1024, flags);
| if (rc != 0) {
| fprintf(stderr, "getnameinfo error\n");
| exit(1);
| }
| printf("----\n");
| printf("family: %s\n", family_str(addr->ai_family));
| printf("socktype: %s\n", socktype_str(addr->ai_socktype));
| printf("protocol: %s\n", protocol_str(addr->ai_protocol));
| printf("host: %s\n", host);
| printf("serv: %s\n", serv);
|
| #if 0
| printf("Connecting...\n");
| int fd = socket(addr->ai_family, addr->ai_socktype, 0);
| if (connect(fd, addr->ai_addr, addr->ai_addrlen) == -1) {
| fprintf(stderr, "connect errno: %d\n", errno);
| perror("connect");
| } else {
| printf("connected successfully\n");
| }
| #endif
| }
|
| freeaddrinfo(addrs);
|
| return 0;
| }
(Linux/glibc) $ ./getaddrinfo 8.8.8.8 123456
----
family: AF_INET
socktype: SOCK_STREAM
protocol: IPPROTO_TCP
host: 8.8.8.8
serv: 57920
(FreeBSD) $ ./getaddrinfo 8.8.8.8 123456
getaddrinfo: Service was not recognized for socket type
So obvious fix would be change 123456 to something less or equal to
65535. Say, 1234.
> diff --git a/test/box/net.box_wait_connected_gh-3856.test.lua b/test/box/net.box_wait_connected_gh-3856.test.lua
> index 29e997fb5..d9fa80f3f 100644
> --- a/test/box/net.box_wait_connected_gh-3856.test.lua
> +++ b/test/box/net.box_wait_connected_gh-3856.test.lua
> @@ -1,8 +1,12 @@
> net = require('net.box')
> +test_run = require('test_run').new()
>
> --
> -- gh-3856: wait_connected = false is ignored.
> +-- Test uses Google DNS IP for testing:
> +-- https://developers.google.com/speed/public-dns/docs/using
> --
> c = net.connect('8.8.8.8:123456', {wait_connected = false})
> +test_run:wait_cond(function() return c.state == 'initial' end)
> c
> c:close()
It should not work and does not. I checked it with the following command
on a FreeBSD virtual machine:
$ ( cd test && ./test-run.py -j 20 `yes box/net.box_wait_connected_gh-3856.test.lua | head -n 1000` )
The 123456 -> 1234 change, however, passes.
The test still depend on an order in which fibers will be scheduled
(net_box.connect() creates a separate fiber for connecting in background
using fiber.create(), which yields). Unlikely our fiber will not get
execution time during the connection attempt, so it is more like a
formal thing.
But we can decrease probability of this situation even more if we'll
grab all connection fields just when net_box.connect() returns, not
after yield in console (which is due to waiting a next command from
test-run).
Consider this way:
| $ cat test/box/net.box_wait_connected_gh-3856.test.lua
| net = require('net.box')
|
| --
| -- gh-3856: wait_connected = false is ignored.
| --
| do \
| c = net.connect('8.8.8.8:1234', {wait_connected = false}) \
| return c.state \
| end
| c:close()
CCed Vlad as author of the test case.
WBR, Alexander Turenko.
More information about the Tarantool-patches
mailing list