From: Serge Petrenko <sergepetrenko@tarantool.org> To: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: tarantool-patches@freelists.org Subject: Re: [tarantool-patches] [PATCH] evio: fix timeout calculations Date: Thu, 14 Mar 2019 18:57:12 +0300 [thread overview] Message-ID: <D50D4CB9-4CDC-4483-85D7-CC0C9C9D3261@tarantool.org> (raw) In-Reply-To: <20190314151641.26876-1-sergepetrenko@tarantool.org> > 14 марта 2019 г., в 18:16, Serge Petrenko <sergepetrenko@tarantool.org> написал(а): > > The function evio_timeout_update() failed to update the starting time > point, which lead to timeouts happening much faster than they should if > there were consecutive calls to the function. > This lead, for example, to applier timing out while reading a several > megabyte-size row in 0.2 seconds even if replication_timeout was set to > 15 seconds. > > Closes #4042 > --- > https://github.com/tarantool/tarantool/tree/sp/gh-4042-applier-timeout > https://github.com/tarantool/tarantool/issues/4042 > > src/box/xrow_io.cc | 4 +- > src/lib/core/coio.cc | 18 ++-- > src/lib/core/coio.h | 2 +- > src/lib/core/evio.h | 5 +- > test/replication/long_row_timeout.result | 98 ++++++++++++++++++++++ > test/replication/long_row_timeout.test.lua | 43 ++++++++++ > test/replication/replica_big.lua | 12 +++ > test/replication/suite.cfg | 1 + > 8 files changed, 169 insertions(+), 14 deletions(-) > create mode 100644 test/replication/long_row_timeout.result > create mode 100644 test/replication/long_row_timeout.test.lua > create mode 100644 test/replication/replica_big.lua > > diff --git a/src/box/xrow_io.cc b/src/box/xrow_io.cc > index 4b6e68bc1..48707982b 100644 > --- a/src/box/xrow_io.cc > +++ b/src/box/xrow_io.cc > @@ -71,7 +71,7 @@ coio_read_xrow_timeout_xc(struct ev_io *coio, struct ibuf *in, > /* Read fixed header */ > if (ibuf_used(in) < 1) > coio_breadn_timeout(coio, in, 1, delay); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > > /* Read length */ > if (mp_typeof(*in->rpos) != MP_UINT) { > @@ -81,7 +81,7 @@ coio_read_xrow_timeout_xc(struct ev_io *coio, struct ibuf *in, > ssize_t to_read = mp_check_uint(in->rpos, in->wpos); > if (to_read > 0) > coio_breadn_timeout(coio, in, to_read, delay); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > > uint32_t len = mp_decode_uint((const char **) &in->rpos); > > diff --git a/src/lib/core/coio.cc b/src/lib/core/coio.cc > index 6b94cf2a6..e88d724d5 100644 > --- a/src/lib/core/coio.cc > +++ b/src/lib/core/coio.cc > @@ -210,7 +210,7 @@ coio_connect_timeout(struct ev_io *coio, struct uri *uri, struct sockaddr *addr, > if (!uri->host_hint) freeaddrinfo(ai); > else free(ai_local.ai_addr); > }); > - evio_timeout_update(loop(), start, &delay); > + evio_timeout_update(loop(), &start, &delay); > > coio_timeout_init(&start, &delay, timeout); > assert(! evio_has_fd(coio)); > @@ -232,7 +232,7 @@ coio_connect_timeout(struct ev_io *coio, struct uri *uri, struct sockaddr *addr, > } > ai = ai->ai_next; > ev_now_update(loop); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > > tnt_raise(SocketError, sio_socketname(coio->fd), "connection failed"); > @@ -278,7 +278,7 @@ coio_accept(struct ev_io *coio, struct sockaddr *addr, > fiber_testcancel(); > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > } > > @@ -338,7 +338,7 @@ coio_read_ahead_timeout(struct ev_io *coio, void *buf, size_t sz, > fiber_testcancel(); > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > } > > @@ -433,7 +433,7 @@ coio_write_timeout(struct ev_io *coio, const void *buf, size_t sz, > > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > } > > @@ -499,7 +499,7 @@ coio_writev_timeout(struct ev_io *coio, struct iovec *iov, int iovcnt, > > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > return total; > } > @@ -545,7 +545,7 @@ coio_sendto_timeout(struct ev_io *coio, const void *buf, size_t sz, int flags, > fiber_testcancel(); > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > } > > @@ -590,7 +590,7 @@ coio_recvfrom_timeout(struct ev_io *coio, void *buf, size_t sz, int flags, > fiber_testcancel(); > if (is_timedout) > tnt_raise(TimedOut); > - coio_timeout_update(start, &delay); > + coio_timeout_update(&start, &delay); > } > } > > @@ -759,7 +759,7 @@ coio_write_fd_timeout(int fd, const void *data, size_t size, ev_tstamp timeout) > } > if (errno == EAGAIN || errno == EWOULDBLOCK) { > coio_wait(fd, COIO_WRITE, delay); > - evio_timeout_update(loop, start, &delay); > + evio_timeout_update(loop, &start, &delay); > } else if (errno != EINTR) { > diag_set(SocketError, sio_socketname(fd), "write"); > return -1; > diff --git a/src/lib/core/coio.h b/src/lib/core/coio.h > index 104abf720..6a2337689 100644 > --- a/src/lib/core/coio.h > +++ b/src/lib/core/coio.h > @@ -88,7 +88,7 @@ coio_timeout_init(ev_tstamp *start, ev_tstamp *delay, > } > > static inline void > -coio_timeout_update(ev_tstamp start, ev_tstamp *delay) > +coio_timeout_update(ev_tstamp *start, ev_tstamp *delay) > { > return evio_timeout_update(loop(), start, delay); > } > diff --git a/src/lib/core/evio.h b/src/lib/core/evio.h > index bd48235bc..c86be9e6a 100644 > --- a/src/lib/core/evio.h > +++ b/src/lib/core/evio.h > @@ -148,9 +148,10 @@ evio_timeout_init(ev_loop *loop, ev_tstamp *start, ev_tstamp *delay, > } > > static inline void > -evio_timeout_update(ev_loop *loop, ev_tstamp start, ev_tstamp *delay) > +evio_timeout_update(ev_loop *loop, ev_tstamp *start, ev_tstamp *delay) > { > - ev_tstamp elapsed = ev_monotonic_now(loop) - start; > + ev_tstamp elapsed = ev_monotonic_now(loop) - *start; > + *start += elapsed; > *delay = (elapsed >= *delay) ? 0 : *delay - elapsed; > } > > diff --git a/test/replication/long_row_timeout.result b/test/replication/long_row_timeout.result > new file mode 100644 > index 000000000..690fdcbb4 > --- /dev/null > +++ b/test/replication/long_row_timeout.result > @@ -0,0 +1,98 @@ > +fiber = require('fiber') > +--- > +... > +digest = require('digest') > +--- > +... > +test_run = require('test_run').new() > +--- > +... > +-- > +-- gh-4042 applier read times out too fast when reading large rows. > +-- > +box.schema.user.grant('guest', 'replication') > +--- > +... > +test_run:cmd('create server replica with rpl_master=default, script="replication/replica_big.lua"') > +--- > +- true > +... > +test_run:cmd('start server replica') > +--- > +- true > +... > +box.info.replication[2].downstream.status > +--- > +- follow > +... > +tup_sz = box.cfg.memtx_max_tuple_size > +--- > +... > +box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024, memtx_memory = 1024 * 1024 * 1024} > +--- > +... > +-- insert some big rows which cannot be read in one go, so applier yields > +-- on read a couple of times. > +s = box.schema.space.create('test') > +--- > +... > +_ = s:create_index('pk') > +--- > +... > +for i = 1,5 do box.space.test:insert{i, require('digest').urandom(20 * 1024 * 1024)} end > +--- > +... > +-- replication_disconnect_timeout is 4 * replication_timeout, check that > +-- replica doesn't time out too early. > +test_run:cmd('setopt delimiter ";"') > +--- > +- true > +... > +ok = true; > +--- > +... > +start = fiber.time(); > +--- > +... > +while fiber.time() - start < 3 * box.cfg.replication_timeout do > + if box.info.replication[2].downstream.status ~= 'follow' then > + ok = false > + break > + end > + fiber.sleep(0.001) > +end; > +--- > +... > +test_run:cmd('setopt delimiter ""'); > +--- > +- true > +... > +ok > +--- > +- true > +... > +s:drop() > +--- > +... > +test_run:cmd('stop server replica') > +--- > +- true > +... > +test_run:cmd('cleanup server replica') > +--- > +- true > +... > +test_run:cmd('delete server replica') > +--- > +- true > +... > +test_run:cleanup_cluster() > +--- > +... > +-- cannot decrease memtx_memory to its original value. > +box.cfg{memtx_max_tuple_size = tup_sz} > +--- > +... > +box.schema.user.revoke('guest', 'replication') > +--- > +... > diff --git a/test/replication/long_row_timeout.test.lua b/test/replication/long_row_timeout.test.lua > new file mode 100644 > index 000000000..21a522018 > --- /dev/null > +++ b/test/replication/long_row_timeout.test.lua > @@ -0,0 +1,43 @@ > +fiber = require('fiber') > +digest = require('digest') > +test_run = require('test_run').new() > + > +-- > +-- gh-4042 applier read times out too fast when reading large rows. > +-- > +box.schema.user.grant('guest', 'replication') > +test_run:cmd('create server replica with rpl_master=default, script="replication/replica_big.lua"') > +test_run:cmd('start server replica') > +box.info.replication[2].downstream.status > +tup_sz = box.cfg.memtx_max_tuple_size > +box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024, memtx_memory = 1024 * 1024 * 1024} > + > +-- insert some big rows which cannot be read in one go, so applier yields > +-- on read a couple of times. > +s = box.schema.space.create('test') > +_ = s:create_index('pk') > +for i = 1,5 do box.space.test:insert{i, require('digest').urandom(20 * 1024 * 1024)} end > +-- replication_disconnect_timeout is 4 * replication_timeout, check that > +-- replica doesn't time out too early. > +test_run:cmd('setopt delimiter ";"') > +ok = true; > +start = fiber.time(); > +while fiber.time() - start < 3 * box.cfg.replication_timeout do > + if box.info.replication[2].downstream.status ~= 'follow' then > + ok = false > + break > + end > + fiber.sleep(0.001) > +end; > +test_run:cmd('setopt delimiter ""'); > + > +ok > + > +s:drop() > +test_run:cmd('stop server replica') > +test_run:cmd('cleanup server replica') > +test_run:cmd('delete server replica') > +test_run:cleanup_cluster() > +-- cannot decrease memtx_memory to its original value. > +box.cfg{memtx_max_tuple_size = tup_sz} > +box.schema.user.revoke('guest', 'replication') > diff --git a/test/replication/replica_big.lua b/test/replication/replica_big.lua > new file mode 100644 > index 000000000..b768420fd > --- /dev/null > +++ b/test/replication/replica_big.lua > @@ -0,0 +1,12 @@ > +#!/usr/bin/env tarantool > + > +box.cfg({ > + listen = os.getenv("LISTEN"), > + replication = os.getenv("MASTER"), > + memtx_memory = 1024 * 1024 * 1024, > + memtx_max_tuple_size = 21 * 1024 * 1024, > + replication_timeout = 0.1, > + replication_connect_timeout = 0.5, > +}) > + > +require('console').listen(os.getenv('ADMIN')) > diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg > index 5e8809731..91e884ece 100644 > --- a/test/replication/suite.cfg > +++ b/test/replication/suite.cfg > @@ -9,6 +9,7 @@ > "wal_rw_stress.test.lua": {}, > "force_recovery.test.lua": {}, > "on_schema_init.test.lua": {}, > + "long_row_timeout.test.lua": {}, > "*": { > "memtx": {"engine": "memtx"}, > "vinyl": {"engine": "vinyl"} > -- > 2.17.2 (Apple Git-113) > > Rewrote the test a little to use less memtx_memory in main testing instance. Here’s the diff. diff --git a/test/replication/long_row_timeout.result b/test/replication/long_row_timeout.result index 690fdcbb4..3f93f1913 100644 --- a/test/replication/long_row_timeout.result +++ b/test/replication/long_row_timeout.result @@ -28,7 +28,7 @@ box.info.replication[2].downstream.status tup_sz = box.cfg.memtx_max_tuple_size --- ... -box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024, memtx_memory = 1024 * 1024 * 1024} +box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024} --- ... -- insert some big rows which cannot be read in one go, so applier yields @@ -39,7 +39,7 @@ s = box.schema.space.create('test') _ = s:create_index('pk') --- ... -for i = 1,5 do box.space.test:insert{i, require('digest').urandom(20 * 1024 * 1024)} end +for i = 1,5 do box.space.test:replace{1, digest.urandom(20 * 1024 * 1024)} collectgarbage('collect') end --- ... -- replication_disconnect_timeout is 4 * replication_timeout, check that @@ -89,7 +89,6 @@ test_run:cmd('delete server replica') test_run:cleanup_cluster() --- ... --- cannot decrease memtx_memory to its original value. box.cfg{memtx_max_tuple_size = tup_sz} --- ... diff --git a/test/replication/long_row_timeout.test.lua b/test/replication/long_row_timeout.test.lua index 21a522018..3993f1657 100644 --- a/test/replication/long_row_timeout.test.lua +++ b/test/replication/long_row_timeout.test.lua @@ -10,13 +10,13 @@ test_run:cmd('create server replica with rpl_master=default, script="replication test_run:cmd('start server replica') box.info.replication[2].downstream.status tup_sz = box.cfg.memtx_max_tuple_size -box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024, memtx_memory = 1024 * 1024 * 1024} +box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024} -- insert some big rows which cannot be read in one go, so applier yields -- on read a couple of times. s = box.schema.space.create('test') _ = s:create_index('pk') -for i = 1,5 do box.space.test:insert{i, require('digest').urandom(20 * 1024 * 1024)} end +for i = 1,5 do box.space.test:replace{1, digest.urandom(20 * 1024 * 1024)} collectgarbage('collect') end -- replication_disconnect_timeout is 4 * replication_timeout, check that -- replica doesn't time out too early. test_run:cmd('setopt delimiter ";"') @@ -38,6 +38,5 @@ test_run:cmd('stop server replica') test_run:cmd('cleanup server replica') test_run:cmd('delete server replica') test_run:cleanup_cluster() --- cannot decrease memtx_memory to its original value. box.cfg{memtx_max_tuple_size = tup_sz} box.schema.user.revoke('guest', 'replication')
next prev parent reply other threads:[~2019-03-14 15:57 UTC|newest] Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-03-14 15:16 Serge Petrenko 2019-03-14 15:57 ` Serge Petrenko [this message] 2019-03-14 16:43 ` [tarantool-patches] " Vladimir Davydov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=D50D4CB9-4CDC-4483-85D7-CC0C9C9D3261@tarantool.org \ --to=sergepetrenko@tarantool.org \ --cc=tarantool-patches@freelists.org \ --cc=vdavydov.dev@gmail.com \ --subject='Re: [tarantool-patches] [PATCH] evio: fix timeout calculations' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox