From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 18/18] vinyl: throttle tx rate if dump does not catch up Date: Thu, 16 Aug 2018 19:12:12 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: If the rate at which transactions are ready to write to the database is greater than the dump bandwidth, memory will get depleted before the last dump is complete and all newer transactions will have to wait until the dump has been completed, which may take seconds or even minutes: 2018-08-16 15:45:11.739 [30874] main/1100/main vy_quota.c:291 W> waited for 555 bytes of vinyl memory quota for too long: 15.750 sec This patch set implements transaction throttling that are supposed to help avoid unpredictably long stalls. Now once a dump is started, transaction write rate will be limited so that the hard limit cannot get exceeded before the dump is complete. This is how it looks in the log: 2018-08-16 16:01:09.412 [489] main/445/main I> dumping 134217901 bytes, expected rate 6.0 MB/s, ETA 21.3 s, recent write rate 10.5 MB/s 2018-08-16 16:01:09.447 [489] main/103/vinyl.scheduler I> 513/0: dump started 2018-08-16 16:01:09.447 [489] vinyl.writer.0/103/task I> writing `./513/0/00000000000000000004.run' 2018-08-16 16:01:09.468 [489] main I> throttling enabled, max write rate 6.0 MB/s 2018-08-16 16:01:30.004 [489] vinyl.writer.0/103/task I> writing `./513/0/00000000000000000004.index' 2018-08-16 16:01:30.094 [489] main/103/vinyl.scheduler I> 513/0: dump completed 2018-08-16 16:01:30.095 [489] main/103/vinyl.scheduler I> dumped 134216236 bytes in 20.7 s, rate 6.2 MB/s 2018-08-16 16:01:30.167 [489] main I> throttling disabled Closes #1862 --- src/box/vy_quota.c | 41 ++++++++++++++++++- src/box/vy_quota.h | 13 ++++++ test/vinyl/suite.ini | 2 +- test/vinyl/throttle.result | 95 ++++++++++++++++++++++++++++++++++++++++++++ test/vinyl/throttle.test.lua | 47 ++++++++++++++++++++++ 5 files changed, 195 insertions(+), 3 deletions(-) create mode 100644 test/vinyl/throttle.result create mode 100644 test/vinyl/throttle.test.lua diff --git a/src/box/vy_quota.c b/src/box/vy_quota.c index 471a8bd0..85e18d4b 100644 --- a/src/box/vy_quota.c +++ b/src/box/vy_quota.c @@ -119,7 +119,7 @@ enum { static inline void vy_quota_signal(struct vy_quota *q) { - if (q->used < q->limit) + if (q->used < q->limit && q->use_curr < q->use_max) fiber_cond_signal(&q->cond); } @@ -133,6 +133,8 @@ vy_quota_check_watermark(struct vy_quota *q) if (!q->dump_in_progress && q->used >= q->watermark && q->quota_exceeded_cb(q)) { q->dump_in_progress = true; + q->dump_size = q->used; + q->dump_start = ev_monotonic_now(loop()); say_info("dumping %zu bytes, expected rate %.1f MB/s, " "ETA %.1f s, recent write rate %.1f MB/s", q->used, (double)q->dump_bw / 1024 / 1024, @@ -148,6 +150,7 @@ vy_quota_timer_cb(ev_loop *loop, ev_timer *timer, int events) (void)events; struct vy_quota *q = timer->data; + double now = ev_monotonic_now(loop()); /* * Update the quota use rate with the new measurement. @@ -159,6 +162,34 @@ vy_quota_timer_cb(ev_loop *loop, ev_timer *timer, int events) q->use_curr = 0; /* + * To avoid unpredictably long stalls, we must limit + * the write rate when a dump is in progress so that + * we don't hit the hard limit before the dump has + * completed, i.e. + * + * left_to_use left_to_dump + * ----------- <= ------------ + * use_rate dump_rate + */ + if (q->dump_in_progress) { + size_t dumped = q->dump_bw * (now - q->dump_start); + size_t left_to_dump = (dumped < q->dump_size ? + q->dump_size - dumped : 0); + size_t left_to_use = (q->used < q->limit ? + q->limit - q->used : 0); + double max_use_rate = (left_to_use * q->dump_bw / + (left_to_dump + 1)); + if (q->use_max == SIZE_MAX) + say_info("throttling enabled, max write rate " + "%.1f MB/s", max_use_rate / 1024 / 1024); + q->use_max = VY_QUOTA_UPDATE_INTERVAL * max_use_rate; + } else { + if (q->use_max < SIZE_MAX) + say_info("throttling disabled"); + q->use_max = SIZE_MAX; + } + + /* * Update the quota watermark and trigger memory dump * if the watermark is exceeded. * @@ -172,6 +203,9 @@ vy_quota_timer_cb(ev_loop *loop, ev_timer *timer, int events) q->watermark = MIN(q->limit * VY_QUOTA_WATERMARK_MAX / 100, q->watermark); vy_quota_check_watermark(q); + + /* Wake up the next throttled fiber in the line. */ + vy_quota_signal(q); } int @@ -201,11 +235,14 @@ vy_quota_create(struct vy_quota *q, vy_quota_exceeded_f quota_exceeded_cb) q->watermark = SIZE_MAX; q->used = 0; q->use_curr = 0; + q->use_max = SIZE_MAX; q->use_rate = 0; q->too_long_threshold = TIMEOUT_INFINITY; q->dump_bw = VY_DEFAULT_DUMP_BANDWIDTH; q->quota_exceeded_cb = quota_exceeded_cb; q->dump_in_progress = false; + q->dump_size = 0; + q->dump_start = 0; fiber_cond_create(&q->cond); ev_timer_init(&q->timer, vy_quota_timer_cb, 0, VY_QUOTA_UPDATE_INTERVAL); @@ -277,7 +314,7 @@ vy_quota_try_use(struct vy_quota *q, size_t size, double timeout) double deadline = start_time + timeout; q->used += size; - while (q->used > q->limit) { + while (q->used > q->limit || q->use_curr >= q->use_max) { vy_quota_check_watermark(q); q->used -= size; int rc = fiber_cond_wait_deadline(&q->cond, deadline); diff --git a/src/box/vy_quota.h b/src/box/vy_quota.h index cb681386..ef2fb6cb 100644 --- a/src/box/vy_quota.h +++ b/src/box/vy_quota.h @@ -86,6 +86,13 @@ struct vy_quota { * true, but vy_quota_dump() hasn't been called yet. */ bool dump_in_progress; + /** + * Memory usage at the time when the last dump was started + * (memory dump size). + */ + size_t dump_size; + /** Time when the last dump was started. */ + double dump_start; /** Timer for updating quota watermark. */ ev_timer timer; /** @@ -94,6 +101,12 @@ struct vy_quota { */ size_t use_curr; /** + * Maximal amount of quota that can be used between timer + * callback invocations. It is set to such a value so that + * the quota use rate never exceeds the dump bandwidth. + */ + size_t use_max; + /** * Quota use rate, in bytes per second. * Calculated as exponentially weighted * moving average of use_curr. diff --git a/test/vinyl/suite.ini b/test/vinyl/suite.ini index b9dae380..785bc63d 100644 --- a/test/vinyl/suite.ini +++ b/test/vinyl/suite.ini @@ -6,5 +6,5 @@ release_disabled = errinj.test.lua errinj_gc.test.lua errinj_vylog.test.lua part config = suite.cfg lua_libs = suite.lua stress.lua large.lua txn_proxy.lua ../box/lua/utils.lua use_unix_sockets = True -long_run = stress.test.lua large.test.lua write_iterator_rand.test.lua dump_stress.test.lua select_consistency.test.lua +long_run = stress.test.lua large.test.lua write_iterator_rand.test.lua dump_stress.test.lua select_consistency.test.lua throttle.test.lua is_parallel = False diff --git a/test/vinyl/throttle.result b/test/vinyl/throttle.result new file mode 100644 index 00000000..5634c40b --- /dev/null +++ b/test/vinyl/throttle.result @@ -0,0 +1,95 @@ +test_run = require('test_run').new() +--- +... +test_run:cmd("create server test with script='vinyl/low_quota.lua'") +--- +- true +... +test_run:cmd(string.format("start server test with args='%d'", 32 * 1024 * 1024)) +--- +- true +... +test_run:cmd('switch test') +--- +- true +... +fiber = require('fiber') +--- +... +digest = require('digest') +--- +... +box.cfg{snap_io_rate_limit = 4} +--- +... +FIBER_COUNT = 5 +--- +... +TUPLE_SIZE = 1000 +--- +... +TX_TUPLE_COUNT = 10 +--- +... +TX_SIZE = TUPLE_SIZE * TX_TUPLE_COUNT +--- +... +TX_COUNT = math.ceil(box.cfg.vinyl_memory / (TX_SIZE * FIBER_COUNT)) +--- +... +s = box.schema.space.create('test', {engine = 'vinyl'}) +--- +... +_ = s:create_index('primary', {parts = {1, 'unsigned', 2, 'unsigned', 3, 'unsigned'}}) +--- +... +latency = 0 +--- +... +c = fiber.channel(FIBER_COUNT) +--- +... +test_run:cmd("setopt delimiter ';'") +--- +- true +... +for i = 1, FIBER_COUNT do + fiber.create(function() + for j = 1, TX_COUNT do + local t1 = fiber.time() + box.begin() + for k = 1, TX_TUPLE_COUNT do + s:replace{i, j, k, digest.urandom(TUPLE_SIZE)} + end + box.commit() + local t2 = fiber.time() + latency = math.max(latency, t2 - t1) + end + c:put(true) + end) +end; +--- +... +test_run:cmd("setopt delimiter ''"); +--- +- true +... +for i = 1, FIBER_COUNT do c:get() end +--- +... +latency < 0.5 or latency +--- +- true +... +test_run:cmd('switch default') +--- +- true +... +test_run:cmd("stop server test") +--- +- true +... +test_run:cmd("cleanup server test") +--- +- true +... diff --git a/test/vinyl/throttle.test.lua b/test/vinyl/throttle.test.lua new file mode 100644 index 00000000..4efbbec7 --- /dev/null +++ b/test/vinyl/throttle.test.lua @@ -0,0 +1,47 @@ +test_run = require('test_run').new() +test_run:cmd("create server test with script='vinyl/low_quota.lua'") +test_run:cmd(string.format("start server test with args='%d'", 32 * 1024 * 1024)) +test_run:cmd('switch test') + +fiber = require('fiber') +digest = require('digest') + +box.cfg{snap_io_rate_limit = 4} + +FIBER_COUNT = 5 +TUPLE_SIZE = 1000 +TX_TUPLE_COUNT = 10 +TX_SIZE = TUPLE_SIZE * TX_TUPLE_COUNT +TX_COUNT = math.ceil(box.cfg.vinyl_memory / (TX_SIZE * FIBER_COUNT)) + +s = box.schema.space.create('test', {engine = 'vinyl'}) +_ = s:create_index('primary', {parts = {1, 'unsigned', 2, 'unsigned', 3, 'unsigned'}}) + +latency = 0 +c = fiber.channel(FIBER_COUNT) + +test_run:cmd("setopt delimiter ';'") +for i = 1, FIBER_COUNT do + fiber.create(function() + for j = 1, TX_COUNT do + local t1 = fiber.time() + box.begin() + for k = 1, TX_TUPLE_COUNT do + s:replace{i, j, k, digest.urandom(TUPLE_SIZE)} + end + box.commit() + local t2 = fiber.time() + latency = math.max(latency, t2 - t1) + end + c:put(true) + end) +end; +test_run:cmd("setopt delimiter ''"); + +for i = 1, FIBER_COUNT do c:get() end + +latency < 0.5 or latency + +test_run:cmd('switch default') +test_run:cmd("stop server test") +test_run:cmd("cleanup server test") -- 2.11.0