From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 03/18] vinyl: move quota related methods and variables from vy_env to vy_quota Date: Thu, 16 Aug 2018 19:11:57 +0300 Message-Id: <4b1670e2b548c34d4d19dfebea53b5405ef4892e.1534432819.git.vdavydov.dev@gmail.com> In-Reply-To: References: In-Reply-To: References: To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: Watermark calculation is a private business of vy_quota. Let's move related stuff from vy_env to vy_quota. This will also make it easier to implement throttling opaque to the caller. --- src/box/vinyl.c | 145 ++++------------------------------------------------- src/box/vy_quota.c | 119 +++++++++++++++++++++++++++++++++++++++---- src/box/vy_quota.h | 45 +++++++++++++---- 3 files changed, 156 insertions(+), 153 deletions(-) diff --git a/src/box/vinyl.c b/src/box/vinyl.c index d0e822bf..a07f661f 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -45,7 +45,6 @@ #include "vy_scheduler.h" #include "vy_stat.h" -#include #include #include #include @@ -105,31 +104,6 @@ struct vy_env { struct mempool iterator_pool; /** Memory quota */ struct vy_quota quota; - /** Timer for updating quota watermark. */ - ev_timer quota_timer; - /** - * Amount of quota used since the last - * invocation of the quota timer callback. - */ - size_t quota_use_curr; - /** - * Quota use rate, in bytes per second. - * Calculated as exponentially weighted - * moving average of quota_use_curr. - */ - size_t quota_use_rate; - /** - * Dump bandwidth is needed for calculating the quota watermark. - * The higher the bandwidth, the later we can start dumping w/o - * suffering from transaction throttling. So we want to be very - * conservative about estimating the bandwidth. - * - * To make sure we don't overestimate it, we maintain a - * histogram of all observed measurements and assume the - * bandwidth to be equal to the 10th percentile, i.e. the - * best result among 10% worst measurements. - */ - struct histogram *dump_bw; /** Common LSM tree environment. */ struct vy_lsm_env lsm_env; /** Environment for cache subsystem */ @@ -169,26 +143,6 @@ struct vy_env { bool force_recovery; }; -enum { - /** - * Time interval between successive updates of - * quota watermark and use rate, in seconds. - */ - VY_QUOTA_UPDATE_INTERVAL = 1, - /** - * Period of time over which the quota use rate - * is averaged, in seconds. - */ - VY_QUOTA_RATE_AVG_PERIOD = 5, -}; - -static inline int64_t -vy_dump_bandwidth(struct vy_env *env) -{ - /* See comment to vy_env::dump_bw. */ - return histogram_percentile(env->dump_bw, 10); -} - struct vinyl_engine { struct engine base; /** Vinyl environment. */ @@ -300,8 +254,8 @@ vy_info_append_quota(struct vy_env *env, struct info_handler *h) info_append_int(h, "used", q->used); info_append_int(h, "limit", q->limit); info_append_int(h, "watermark", q->watermark); - info_append_int(h, "use_rate", env->quota_use_rate); - info_append_int(h, "dump_bandwidth", vy_dump_bandwidth(env)); + info_append_int(h, "use_rate", q->use_rate); + info_append_int(h, "dump_bandwidth", vy_quota_dump_bandwidth(q)); info_table_end(h); } @@ -2340,14 +2294,9 @@ vinyl_engine_prepare(struct engine *engine, struct txn *txn) size_t mem_used_after = lsregion_used(&env->mem_env.allocator); assert(mem_used_after >= mem_used_before); - size_t write_size = mem_used_after - mem_used_before; - vy_quota_commit_use(&env->quota, tx->write_size, write_size); - - if (rc != 0) - return -1; - - env->quota_use_curr += write_size; - return 0; + vy_quota_commit_use(&env->quota, tx->write_size, + mem_used_after - mem_used_before); + return rc; } static void @@ -2418,41 +2367,6 @@ vinyl_engine_rollback_statement(struct engine *engine, struct txn *txn, /** {{{ Environment */ static void -vy_env_quota_timer_cb(ev_loop *loop, ev_timer *timer, int events) -{ - (void)loop; - (void)events; - - struct vy_env *e = timer->data; - - /* - * Update the quota use rate with the new measurement. - */ - const double weight = 1 - exp(-VY_QUOTA_UPDATE_INTERVAL / - (double)VY_QUOTA_RATE_AVG_PERIOD); - e->quota_use_rate = (1 - weight) * e->quota_use_rate + - weight * e->quota_use_curr / VY_QUOTA_UPDATE_INTERVAL; - e->quota_use_curr = 0; - - /* - * Due to log structured nature of the lsregion allocator, - * which is used for allocating statements, we cannot free - * memory in chunks, only all at once. Therefore we should - * configure the watermark so that by the time we hit the - * limit, all memory have been dumped, i.e. - * - * limit - watermark watermark - * ----------------- = -------------- - * quota_use_rate dump_bandwidth - */ - int64_t dump_bandwidth = vy_dump_bandwidth(e); - size_t watermark = ((double)e->quota.limit * dump_bandwidth / - (dump_bandwidth + e->quota_use_rate + 1)); - - vy_quota_set_watermark(&e->quota, watermark); -} - -static void vy_env_quota_exceeded_cb(struct vy_quota *quota) { struct vy_env *env = container_of(quota, struct vy_env, quota); @@ -2498,14 +2412,9 @@ vy_env_dump_complete_cb(struct vy_scheduler *scheduler, size_t mem_used_after = lsregion_used(allocator); assert(mem_used_after <= mem_used_before); size_t mem_dumped = mem_used_before - mem_used_after; - vy_quota_dump(quota, mem_dumped); + vy_quota_dump(quota, mem_dumped, dump_duration); say_info("dumped %zu bytes in %.1f sec", mem_dumped, dump_duration); - - /* Account dump bandwidth. */ - if (dump_duration > 0) - histogram_collect(env->dump_bw, - mem_dumped / dump_duration); } static struct vy_squash_queue * @@ -2520,21 +2429,6 @@ static struct vy_env * vy_env_new(const char *path, size_t memory, int read_threads, int write_threads, bool force_recovery) { - enum { KB = 1000, MB = 1000 * 1000 }; - static int64_t dump_bandwidth_buckets[] = { - 100 * KB, 200 * KB, 300 * KB, 400 * KB, 500 * KB, - 1 * MB, 2 * MB, 3 * MB, 4 * MB, 5 * MB, - 10 * MB, 20 * MB, 30 * MB, 40 * MB, 50 * MB, - 60 * MB, 70 * MB, 80 * MB, 90 * MB, 100 * MB, - 110 * MB, 120 * MB, 130 * MB, 140 * MB, 150 * MB, - 160 * MB, 170 * MB, 180 * MB, 190 * MB, 200 * MB, - 220 * MB, 240 * MB, 260 * MB, 280 * MB, 300 * MB, - 320 * MB, 340 * MB, 360 * MB, 380 * MB, 400 * MB, - 450 * MB, 500 * MB, 550 * MB, 600 * MB, 650 * MB, - 700 * MB, 750 * MB, 800 * MB, 850 * MB, 900 * MB, - 950 * MB, 1000 * MB, - }; - struct vy_env *e = malloc(sizeof(*e)); if (unlikely(e == NULL)) { diag_set(OutOfMemory, sizeof(*e), "malloc", "struct vy_env"); @@ -2554,19 +2448,6 @@ vy_env_new(const char *path, size_t memory, goto error_path; } - e->dump_bw = histogram_new(dump_bandwidth_buckets, - lengthof(dump_bandwidth_buckets)); - if (e->dump_bw == NULL) { - diag_set(OutOfMemory, 0, "histogram_new", - "dump bandwidth histogram"); - goto error_dump_bw; - } - /* - * Until we dump anything, assume bandwidth to be 10 MB/s, - * which should be fine for initial guess. - */ - histogram_collect(e->dump_bw, 10 * MB); - e->xm = tx_manager_new(); if (e->xm == NULL) goto error_xm; @@ -2584,18 +2465,18 @@ vy_env_new(const char *path, size_t memory, vy_squash_schedule, e) != 0) goto error_lsm_env; + if (vy_quota_create(&e->quota, vy_env_quota_exceeded_cb) != 0) + goto error_quota; + struct slab_cache *slab_cache = cord_slab_cache(); mempool_create(&e->iterator_pool, slab_cache, sizeof(struct vinyl_iterator)); - vy_quota_create(&e->quota, vy_env_quota_exceeded_cb); - ev_timer_init(&e->quota_timer, vy_env_quota_timer_cb, 0, - VY_QUOTA_UPDATE_INTERVAL); - e->quota_timer.data = e; - ev_timer_start(loop(), &e->quota_timer); vy_cache_env_create(&e->cache_env, slab_cache); vy_run_env_create(&e->run_env); vy_log_init(e->path); return e; +error_quota: + vy_lsm_env_destroy(&e->lsm_env); error_lsm_env: vy_mem_env_destroy(&e->mem_env); vy_scheduler_destroy(&e->scheduler); @@ -2603,8 +2484,6 @@ error_lsm_env: error_squash_queue: tx_manager_delete(e->xm); error_xm: - histogram_delete(e->dump_bw); -error_dump_bw: free(e->path); error_path: free(e); @@ -2614,12 +2493,10 @@ error_path: static void vy_env_delete(struct vy_env *e) { - ev_timer_stop(loop(), &e->quota_timer); vy_scheduler_destroy(&e->scheduler); vy_squash_queue_delete(e->squash_queue); tx_manager_delete(e->xm); free(e->path); - histogram_delete(e->dump_bw); mempool_destroy(&e->iterator_pool); vy_run_env_destroy(&e->run_env); vy_lsm_env_destroy(&e->lsm_env); diff --git a/src/box/vy_quota.c b/src/box/vy_quota.c index 6e93d652..c8177c69 100644 --- a/src/box/vy_quota.c +++ b/src/box/vy_quota.c @@ -32,30 +32,127 @@ #include #include +#include +#include #include +#include "diag.h" #include "fiber.h" #include "fiber_cond.h" #include "say.h" +#include "histogram.h" +#include "trivia/util.h" -void +enum { + /** + * Time interval between successive updates of + * quota watermark and use rate, in seconds. + */ + VY_QUOTA_UPDATE_INTERVAL = 1, + /** + * Period of time over which the quota use rate + * is averaged, in seconds. + */ + VY_QUOTA_RATE_AVG_PERIOD = 5, +}; + +static void +vy_quota_timer_cb(ev_loop *loop, ev_timer *timer, int events) +{ + (void)loop; + (void)events; + + struct vy_quota *q = timer->data; + + /* + * Update the quota use rate with the new measurement. + */ + const double weight = 1 - exp(-VY_QUOTA_UPDATE_INTERVAL / + (double)VY_QUOTA_RATE_AVG_PERIOD); + q->use_rate = (1 - weight) * q->use_rate + + weight * q->use_curr / VY_QUOTA_UPDATE_INTERVAL; + q->use_curr = 0; + + /* + * Due to log structured nature of the lsregion allocator, + * which is used for allocating statements, we cannot free + * memory in chunks, only all at once. Therefore we should + * configure the watermark so that by the time we hit the + * limit, all memory have been dumped, i.e. + * + * limit - watermark watermark + * ----------------- = -------------- + * use_rate dump_bandwidth + */ + size_t dump_bandwidth = vy_quota_dump_bandwidth(q); + q->watermark = ((double)q->limit * dump_bandwidth / + (dump_bandwidth + q->use_rate + 1)); + if (q->used >= q->watermark) + q->quota_exceeded_cb(q); +} + +int vy_quota_create(struct vy_quota *q, vy_quota_exceeded_f quota_exceeded_cb) { + enum { KB = 1000, MB = 1000 * 1000 }; + static int64_t dump_bandwidth_buckets[] = { + 100 * KB, 200 * KB, 300 * KB, 400 * KB, 500 * KB, + 1 * MB, 2 * MB, 3 * MB, 4 * MB, 5 * MB, + 10 * MB, 20 * MB, 30 * MB, 40 * MB, 50 * MB, + 60 * MB, 70 * MB, 80 * MB, 90 * MB, 100 * MB, + 110 * MB, 120 * MB, 130 * MB, 140 * MB, 150 * MB, + 160 * MB, 170 * MB, 180 * MB, 190 * MB, 200 * MB, + 220 * MB, 240 * MB, 260 * MB, 280 * MB, 300 * MB, + 320 * MB, 340 * MB, 360 * MB, 380 * MB, 400 * MB, + 450 * MB, 500 * MB, 550 * MB, 600 * MB, 650 * MB, + 700 * MB, 750 * MB, 800 * MB, 850 * MB, 900 * MB, + 950 * MB, 1000 * MB, + }; + + q->dump_bw = histogram_new(dump_bandwidth_buckets, + lengthof(dump_bandwidth_buckets)); + if (q->dump_bw == NULL) { + diag_set(OutOfMemory, 0, "histogram_new", + "dump bandwidth histogram"); + return -1; + } + /* + * Until we dump anything, assume bandwidth to be 10 MB/s, + * which should be fine for initial guess. + */ + histogram_collect(q->dump_bw, 10 * MB); + q->limit = SIZE_MAX; q->watermark = SIZE_MAX; q->used = 0; + q->use_curr = 0; + q->use_rate = 0; q->too_long_threshold = TIMEOUT_INFINITY; q->quota_exceeded_cb = quota_exceeded_cb; fiber_cond_create(&q->cond); + ev_timer_init(&q->timer, vy_quota_timer_cb, 0, + VY_QUOTA_UPDATE_INTERVAL); + q->timer.data = q; + ev_timer_start(loop(), &q->timer); + return 0; } void vy_quota_destroy(struct vy_quota *q) { + ev_timer_stop(loop(), &q->timer); + histogram_delete(q->dump_bw); fiber_cond_broadcast(&q->cond); fiber_cond_destroy(&q->cond); } +size_t +vy_quota_dump_bandwidth(struct vy_quota *q) +{ + /* See comment to vy_quota::dump_bw. */ + return histogram_percentile(q->dump_bw, 10); +} + void vy_quota_set_limit(struct vy_quota *q, size_t limit) { @@ -66,27 +163,24 @@ vy_quota_set_limit(struct vy_quota *q, size_t limit) } void -vy_quota_set_watermark(struct vy_quota *q, size_t watermark) -{ - q->watermark = watermark; - if (q->used >= watermark) - q->quota_exceeded_cb(q); -} - -void vy_quota_force_use(struct vy_quota *q, size_t size) { q->used += size; + q->use_curr += size; if (q->used >= q->watermark) q->quota_exceeded_cb(q); } void -vy_quota_dump(struct vy_quota *q, size_t size) +vy_quota_dump(struct vy_quota *q, size_t size, double duration) { assert(q->used >= size); q->used -= size; fiber_cond_broadcast(&q->cond); + + /* Account dump bandwidth. */ + if (duration > 0) + histogram_collect(q->dump_bw, size / duration); } int @@ -107,6 +201,7 @@ vy_quota_try_use(struct vy_quota *q, size_t size, double timeout) if (q->used + size > q->limit) return -1; q->used += size; + q->use_curr += size; if (q->used >= q->watermark) q->quota_exceeded_cb(q); return 0; @@ -119,6 +214,10 @@ vy_quota_commit_use(struct vy_quota *q, size_t reserved, size_t used) size_t excess = reserved - used; assert(q->used >= excess); q->used -= excess; + if (q->use_curr >= excess) + q->use_curr -= excess; + else /* was reset by timeout */ + q->use_curr = 0; fiber_cond_broadcast(&q->cond); } if (reserved < used) diff --git a/src/box/vy_quota.h b/src/box/vy_quota.h index cf70b1ab..3a7a24e7 100644 --- a/src/box/vy_quota.h +++ b/src/box/vy_quota.h @@ -32,6 +32,7 @@ */ #include +#include #include "fiber_cond.h" #if defined(__cplusplus) @@ -39,6 +40,7 @@ extern "C" { #endif /* defined(__cplusplus) */ struct vy_quota; +struct histogram; typedef void (*vy_quota_exceeded_f)(struct vy_quota *quota); @@ -76,14 +78,43 @@ struct vy_quota { * It is supposed to trigger memory reclaim. */ vy_quota_exceeded_f quota_exceeded_cb; + /** Timer for updating quota watermark. */ + ev_timer timer; + /** + * Amount of quota used since the last + * invocation of the quota timer callback. + */ + size_t use_curr; + /** + * Quota use rate, in bytes per second. + * Calculated as exponentially weighted + * moving average of use_curr. + */ + size_t use_rate; + /** + * Dump bandwidth is needed for calculating the quota watermark. + * The higher the bandwidth, the later we can start dumping w/o + * suffering from transaction throttling. So we want to be very + * conservative about estimating the bandwidth. + * + * To make sure we don't overestimate it, we maintain a + * histogram of all observed measurements and assume the + * bandwidth to be equal to the 10th percentile, i.e. the + * best result among 10% worst measurements. + */ + struct histogram *dump_bw; }; -void +int vy_quota_create(struct vy_quota *q, vy_quota_exceeded_f quota_exceeded_cb); void vy_quota_destroy(struct vy_quota *q); +/** Return quota dump bandwidth. */ +size_t +vy_quota_dump_bandwidth(struct vy_quota *q); + /** * Set memory limit. If current memory usage exceeds * the new limit, invoke the callback. @@ -92,13 +123,6 @@ void vy_quota_set_limit(struct vy_quota *q, size_t limit); /** - * Set memory watermark. If current memory usage exceeds - * the new watermark, invoke the callback. - */ -void -vy_quota_set_watermark(struct vy_quota *q, size_t watermark); - -/** * Consume @size bytes of memory. In contrast to vy_quota_try_use() * this function does not throttle the caller. */ @@ -108,9 +132,12 @@ vy_quota_force_use(struct vy_quota *q, size_t size); /** * Function called on dump completion to release quota after * freeing memory. + * + * @size: size of dumped memory. + * @duration: how long memory dump took. */ void -vy_quota_dump(struct vy_quota *q, size_t size); +vy_quota_dump(struct vy_quota *q, size_t size, double duration); /** * Try to consume @size bytes of memory, throttle the caller -- 2.11.0