From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Content-Type: text/plain; charset=utf-8 Mime-Version: 1.0 (Mac OS X Mail 12.2 \(3445.102.3\)) Subject: Re: [tarantool-patches] [PATCH] evio: fix timeout calculations From: Serge Petrenko In-Reply-To: <20190314151641.26876-1-sergepetrenko@tarantool.org> Date: Thu, 14 Mar 2019 18:57:12 +0300 Content-Transfer-Encoding: quoted-printable Message-Id: References: <20190314151641.26876-1-sergepetrenko@tarantool.org> To: Vladimir Davydov Cc: tarantool-patches@freelists.org List-ID: > 14 =D0=BC=D0=B0=D1=80=D1=82=D0=B0 2019 =D0=B3., =D0=B2 18:16, Serge = Petrenko =D0=BD=D0=B0=D0=BF=D0=B8=D1=81=D0=B0= =D0=BB(=D0=B0): >=20 > The function evio_timeout_update() failed to update the starting time > point, which lead to timeouts happening much faster than they should = if > there were consecutive calls to the function. > This lead, for example, to applier timing out while reading a several > megabyte-size row in 0.2 seconds even if replication_timeout was set = to > 15 seconds. >=20 > Closes #4042 > --- > https://github.com/tarantool/tarantool/tree/sp/gh-4042-applier-timeout > https://github.com/tarantool/tarantool/issues/4042 >=20 > src/box/xrow_io.cc | 4 +- > src/lib/core/coio.cc | 18 ++-- > src/lib/core/coio.h | 2 +- > src/lib/core/evio.h | 5 +- > test/replication/long_row_timeout.result | 98 ++++++++++++++++++++++ > test/replication/long_row_timeout.test.lua | 43 ++++++++++ > test/replication/replica_big.lua | 12 +++ > test/replication/suite.cfg | 1 + > 8 files changed, 169 insertions(+), 14 deletions(-) > create mode 100644 test/replication/long_row_timeout.result > create mode 100644 test/replication/long_row_timeout.test.lua > create mode 100644 test/replication/replica_big.lua >=20 > diff --git a/src/box/xrow_io.cc b/src/box/xrow_io.cc > index 4b6e68bc1..48707982b 100644 > --- a/src/box/xrow_io.cc > +++ b/src/box/xrow_io.cc > @@ -71,7 +71,7 @@ coio_read_xrow_timeout_xc(struct ev_io *coio, struct = ibuf *in, > /* Read fixed header */ > if (ibuf_used(in) < 1) > coio_breadn_timeout(coio, in, 1, delay); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); >=20 > /* Read length */ > if (mp_typeof(*in->rpos) !=3D MP_UINT) { > @@ -81,7 +81,7 @@ coio_read_xrow_timeout_xc(struct ev_io *coio, struct = ibuf *in, > ssize_t to_read =3D mp_check_uint(in->rpos, in->wpos); > if (to_read > 0) > coio_breadn_timeout(coio, in, to_read, delay); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); >=20 > uint32_t len =3D mp_decode_uint((const char **) &in->rpos); >=20 > diff --git a/src/lib/core/coio.cc b/src/lib/core/coio.cc > index 6b94cf2a6..e88d724d5 100644 > --- a/src/lib/core/coio.cc > +++ b/src/lib/core/coio.cc > @@ -210,7 +210,7 @@ coio_connect_timeout(struct ev_io *coio, struct = uri *uri, struct sockaddr *addr, > if (!uri->host_hint) freeaddrinfo(ai); > else free(ai_local.ai_addr); > }); > - evio_timeout_update(loop(), start, &delay); > + evio_timeout_update(loop(), &start, &delay); >=20 > coio_timeout_init(&start, &delay, timeout); > assert(! evio_has_fd(coio)); > @@ -232,7 +232,7 @@ coio_connect_timeout(struct ev_io *coio, struct = uri *uri, struct sockaddr *addr, > } > ai =3D ai->ai_next; > ev_now_update(loop); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } >=20 > tnt_raise(SocketError, sio_socketname(coio->fd), "connection = failed"); > @@ -278,7 +278,7 @@ coio_accept(struct ev_io *coio, struct sockaddr = *addr, > fiber_testcancel(); > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > } >=20 > @@ -338,7 +338,7 @@ coio_read_ahead_timeout(struct ev_io *coio, void = *buf, size_t sz, > fiber_testcancel(); > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > } >=20 > @@ -433,7 +433,7 @@ coio_write_timeout(struct ev_io *coio, const void = *buf, size_t sz, >=20 > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > } >=20 > @@ -499,7 +499,7 @@ coio_writev_timeout(struct ev_io *coio, struct = iovec *iov, int iovcnt, >=20 > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > return total; > } > @@ -545,7 +545,7 @@ coio_sendto_timeout(struct ev_io *coio, const void = *buf, size_t sz, int flags, > fiber_testcancel(); > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > } >=20 > @@ -590,7 +590,7 @@ coio_recvfrom_timeout(struct ev_io *coio, void = *buf, size_t sz, int flags, > fiber_testcancel(); > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > } >=20 > @@ -759,7 +759,7 @@ coio_write_fd_timeout(int fd, const void *data, = size_t size, ev_tstamp timeout) > } > if (errno =3D=3D EAGAIN || errno =3D=3D EWOULDBLOCK) { > coio_wait(fd, COIO_WRITE, delay); > - evio_timeout_update(loop, start, &delay); > + evio_timeout_update(loop, &start, &delay); > } else if (errno !=3D EINTR) { > diag_set(SocketError, sio_socketname(fd), = "write"); > return -1; > diff --git a/src/lib/core/coio.h b/src/lib/core/coio.h > index 104abf720..6a2337689 100644 > --- a/src/lib/core/coio.h > +++ b/src/lib/core/coio.h > @@ -88,7 +88,7 @@ coio_timeout_init(ev_tstamp *start, ev_tstamp = *delay, > } >=20 > static inline void > -coio_timeout_update(ev_tstamp start, ev_tstamp *delay) > +coio_timeout_update(ev_tstamp *start, ev_tstamp *delay) > { > return evio_timeout_update(loop(), start, delay); > } > diff --git a/src/lib/core/evio.h b/src/lib/core/evio.h > index bd48235bc..c86be9e6a 100644 > --- a/src/lib/core/evio.h > +++ b/src/lib/core/evio.h > @@ -148,9 +148,10 @@ evio_timeout_init(ev_loop *loop, ev_tstamp = *start, ev_tstamp *delay, > } >=20 > static inline void > -evio_timeout_update(ev_loop *loop, ev_tstamp start, ev_tstamp *delay) > +evio_timeout_update(ev_loop *loop, ev_tstamp *start, ev_tstamp = *delay) > { > - ev_tstamp elapsed =3D ev_monotonic_now(loop) - start; > + ev_tstamp elapsed =3D ev_monotonic_now(loop) - *start; > + *start +=3D elapsed; > *delay =3D (elapsed >=3D *delay) ? 0 : *delay - elapsed; > } >=20 > diff --git a/test/replication/long_row_timeout.result = b/test/replication/long_row_timeout.result > new file mode 100644 > index 000000000..690fdcbb4 > --- /dev/null > +++ b/test/replication/long_row_timeout.result > @@ -0,0 +1,98 @@ > +fiber =3D require('fiber') > +--- > +... > +digest =3D require('digest') > +--- > +... > +test_run =3D require('test_run').new() > +--- > +... > +-- > +-- gh-4042 applier read times out too fast when reading large rows. > +-- > +box.schema.user.grant('guest', 'replication') > +--- > +... > +test_run:cmd('create server replica with rpl_master=3Ddefault, = script=3D"replication/replica_big.lua"') > +--- > +- true > +... > +test_run:cmd('start server replica') > +--- > +- true > +... > +box.info.replication[2].downstream.status > +--- > +- follow > +... > +tup_sz =3D box.cfg.memtx_max_tuple_size > +--- > +... > +box.cfg{memtx_max_tuple_size =3D 21 * 1024 * 1024, memtx_memory =3D = 1024 * 1024 * 1024} > +--- > +... > +-- insert some big rows which cannot be read in one go, so applier = yields > +-- on read a couple of times. > +s =3D box.schema.space.create('test') > +--- > +... > +_ =3D s:create_index('pk') > +--- > +... > +for i =3D 1,5 do box.space.test:insert{i, = require('digest').urandom(20 * 1024 * 1024)} end > +--- > +... > +-- replication_disconnect_timeout is 4 * replication_timeout, check = that > +-- replica doesn't time out too early. > +test_run:cmd('setopt delimiter ";"') > +--- > +- true > +... > +ok =3D true; > +--- > +... > +start =3D fiber.time(); > +--- > +... > +while fiber.time() - start < 3 * box.cfg.replication_timeout do > + if box.info.replication[2].downstream.status ~=3D 'follow' then > + ok =3D false > + break > + end > + fiber.sleep(0.001) > +end; > +--- > +... > +test_run:cmd('setopt delimiter ""'); > +--- > +- true > +... > +ok > +--- > +- true > +... > +s:drop() > +--- > +... > +test_run:cmd('stop server replica') > +--- > +- true > +... > +test_run:cmd('cleanup server replica') > +--- > +- true > +... > +test_run:cmd('delete server replica') > +--- > +- true > +... > +test_run:cleanup_cluster() > +--- > +... > +-- cannot decrease memtx_memory to its original value. > +box.cfg{memtx_max_tuple_size =3D tup_sz} > +--- > +... > +box.schema.user.revoke('guest', 'replication') > +--- > +... > diff --git a/test/replication/long_row_timeout.test.lua = b/test/replication/long_row_timeout.test.lua > new file mode 100644 > index 000000000..21a522018 > --- /dev/null > +++ b/test/replication/long_row_timeout.test.lua > @@ -0,0 +1,43 @@ > +fiber =3D require('fiber') > +digest =3D require('digest') > +test_run =3D require('test_run').new() > + > +-- > +-- gh-4042 applier read times out too fast when reading large rows. > +-- > +box.schema.user.grant('guest', 'replication') > +test_run:cmd('create server replica with rpl_master=3Ddefault, = script=3D"replication/replica_big.lua"') > +test_run:cmd('start server replica') > +box.info.replication[2].downstream.status > +tup_sz =3D box.cfg.memtx_max_tuple_size > +box.cfg{memtx_max_tuple_size =3D 21 * 1024 * 1024, memtx_memory =3D = 1024 * 1024 * 1024} > + > +-- insert some big rows which cannot be read in one go, so applier = yields > +-- on read a couple of times. > +s =3D box.schema.space.create('test') > +_ =3D s:create_index('pk') > +for i =3D 1,5 do box.space.test:insert{i, = require('digest').urandom(20 * 1024 * 1024)} end > +-- replication_disconnect_timeout is 4 * replication_timeout, check = that > +-- replica doesn't time out too early. > +test_run:cmd('setopt delimiter ";"') > +ok =3D true; > +start =3D fiber.time(); > +while fiber.time() - start < 3 * box.cfg.replication_timeout do > + if box.info.replication[2].downstream.status ~=3D 'follow' then > + ok =3D false > + break > + end > + fiber.sleep(0.001) > +end; > +test_run:cmd('setopt delimiter ""'); > + > +ok > + > +s:drop() > +test_run:cmd('stop server replica') > +test_run:cmd('cleanup server replica') > +test_run:cmd('delete server replica') > +test_run:cleanup_cluster() > +-- cannot decrease memtx_memory to its original value. > +box.cfg{memtx_max_tuple_size =3D tup_sz} > +box.schema.user.revoke('guest', 'replication') > diff --git a/test/replication/replica_big.lua = b/test/replication/replica_big.lua > new file mode 100644 > index 000000000..b768420fd > --- /dev/null > +++ b/test/replication/replica_big.lua > @@ -0,0 +1,12 @@ > +#!/usr/bin/env tarantool > + > +box.cfg({ > + listen =3D os.getenv("LISTEN"), > + replication =3D os.getenv("MASTER"), > + memtx_memory =3D 1024 * 1024 * 1024, > + memtx_max_tuple_size =3D 21 * 1024 * 1024, > + replication_timeout =3D 0.1, > + replication_connect_timeout =3D 0.5, > +}) > + > +require('console').listen(os.getenv('ADMIN')) > diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg > index 5e8809731..91e884ece 100644 > --- a/test/replication/suite.cfg > +++ b/test/replication/suite.cfg > @@ -9,6 +9,7 @@ > "wal_rw_stress.test.lua": {}, > "force_recovery.test.lua": {}, > "on_schema_init.test.lua": {}, > + "long_row_timeout.test.lua": {}, > "*": { > "memtx": {"engine": "memtx"}, > "vinyl": {"engine": "vinyl"} > --=20 > 2.17.2 (Apple Git-113) >=20 >=20 Rewrote the test a little to use less memtx_memory in main testing = instance. Here=E2=80=99s the diff. diff --git a/test/replication/long_row_timeout.result = b/test/replication/long_row_timeout.result index 690fdcbb4..3f93f1913 100644 --- a/test/replication/long_row_timeout.result +++ b/test/replication/long_row_timeout.result @@ -28,7 +28,7 @@ box.info.replication[2].downstream.status tup_sz =3D box.cfg.memtx_max_tuple_size --- ... -box.cfg{memtx_max_tuple_size =3D 21 * 1024 * 1024, memtx_memory =3D = 1024 * 1024 * 1024} +box.cfg{memtx_max_tuple_size =3D 21 * 1024 * 1024} --- ... -- insert some big rows which cannot be read in one go, so applier = yields @@ -39,7 +39,7 @@ s =3D box.schema.space.create('test') _ =3D s:create_index('pk') --- ... -for i =3D 1,5 do box.space.test:insert{i, require('digest').urandom(20 = * 1024 * 1024)} end +for i =3D 1,5 do box.space.test:replace{1, digest.urandom(20 * 1024 * = 1024)} collectgarbage('collect') end --- ... -- replication_disconnect_timeout is 4 * replication_timeout, check = that @@ -89,7 +89,6 @@ test_run:cmd('delete server replica') test_run:cleanup_cluster() --- ... --- cannot decrease memtx_memory to its original value. box.cfg{memtx_max_tuple_size =3D tup_sz} --- ... diff --git a/test/replication/long_row_timeout.test.lua = b/test/replication/long_row_timeout.test.lua index 21a522018..3993f1657 100644 --- a/test/replication/long_row_timeout.test.lua +++ b/test/replication/long_row_timeout.test.lua @@ -10,13 +10,13 @@ test_run:cmd('create server replica with = rpl_master=3Ddefault, script=3D"replication test_run:cmd('start server replica') box.info.replication[2].downstream.status tup_sz =3D box.cfg.memtx_max_tuple_size -box.cfg{memtx_max_tuple_size =3D 21 * 1024 * 1024, memtx_memory =3D = 1024 * 1024 * 1024} +box.cfg{memtx_max_tuple_size =3D 21 * 1024 * 1024} =20 -- insert some big rows which cannot be read in one go, so applier = yields -- on read a couple of times. s =3D box.schema.space.create('test') _ =3D s:create_index('pk') -for i =3D 1,5 do box.space.test:insert{i, require('digest').urandom(20 = * 1024 * 1024)} end +for i =3D 1,5 do box.space.test:replace{1, digest.urandom(20 * 1024 * = 1024)} collectgarbage('collect') end -- replication_disconnect_timeout is 4 * replication_timeout, check = that -- replica doesn't time out too early. test_run:cmd('setopt delimiter ";"') @@ -38,6 +38,5 @@ test_run:cmd('stop server replica') test_run:cmd('cleanup server replica') test_run:cmd('delete server replica') test_run:cleanup_cluster() --- cannot decrease memtx_memory to its original value. box.cfg{memtx_max_tuple_size =3D tup_sz} box.schema.user.revoke('guest', 'replication')=