[PATCH 03/18] vinyl: move quota related methods and variables from vy_env to vy_quota

Vladimir Davydov vdavydov.dev at gmail.com
Thu Aug 16 19:11:57 MSK 2018


Watermark calculation is a private business of vy_quota. Let's move
related stuff from vy_env to vy_quota. This will also make it easier
to implement throttling opaque to the caller.
---
 src/box/vinyl.c    | 145 ++++-------------------------------------------------
 src/box/vy_quota.c | 119 +++++++++++++++++++++++++++++++++++++++----
 src/box/vy_quota.h |  45 +++++++++++++----
 3 files changed, 156 insertions(+), 153 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index d0e822bf..a07f661f 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -45,7 +45,6 @@
 #include "vy_scheduler.h"
 #include "vy_stat.h"
 
-#include <math.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
@@ -105,31 +104,6 @@ struct vy_env {
 	struct mempool iterator_pool;
 	/** Memory quota */
 	struct vy_quota     quota;
-	/** Timer for updating quota watermark. */
-	ev_timer            quota_timer;
-	/**
-	 * Amount of quota used since the last
-	 * invocation of the quota timer callback.
-	 */
-	size_t quota_use_curr;
-	/**
-	 * Quota use rate, in bytes per second.
-	 * Calculated as exponentially weighted
-	 * moving average of quota_use_curr.
-	 */
-	size_t quota_use_rate;
-	/**
-	 * Dump bandwidth is needed for calculating the quota watermark.
-	 * The higher the bandwidth, the later we can start dumping w/o
-	 * suffering from transaction throttling. So we want to be very
-	 * conservative about estimating the bandwidth.
-	 *
-	 * To make sure we don't overestimate it, we maintain a
-	 * histogram of all observed measurements and assume the
-	 * bandwidth to be equal to the 10th percentile, i.e. the
-	 * best result among 10% worst measurements.
-	 */
-	struct histogram *dump_bw;
 	/** Common LSM tree environment. */
 	struct vy_lsm_env lsm_env;
 	/** Environment for cache subsystem */
@@ -169,26 +143,6 @@ struct vy_env {
 	bool force_recovery;
 };
 
-enum {
-	/**
-	 * Time interval between successive updates of
-	 * quota watermark and use rate, in seconds.
-	 */
-	VY_QUOTA_UPDATE_INTERVAL = 1,
-	/**
-	 * Period of time over which the quota use rate
-	 * is averaged, in seconds.
-	 */
-	VY_QUOTA_RATE_AVG_PERIOD = 5,
-};
-
-static inline int64_t
-vy_dump_bandwidth(struct vy_env *env)
-{
-	/* See comment to vy_env::dump_bw. */
-	return histogram_percentile(env->dump_bw, 10);
-}
-
 struct vinyl_engine {
 	struct engine base;
 	/** Vinyl environment. */
@@ -300,8 +254,8 @@ vy_info_append_quota(struct vy_env *env, struct info_handler *h)
 	info_append_int(h, "used", q->used);
 	info_append_int(h, "limit", q->limit);
 	info_append_int(h, "watermark", q->watermark);
-	info_append_int(h, "use_rate", env->quota_use_rate);
-	info_append_int(h, "dump_bandwidth", vy_dump_bandwidth(env));
+	info_append_int(h, "use_rate", q->use_rate);
+	info_append_int(h, "dump_bandwidth", vy_quota_dump_bandwidth(q));
 	info_table_end(h);
 }
 
@@ -2340,14 +2294,9 @@ vinyl_engine_prepare(struct engine *engine, struct txn *txn)
 
 	size_t mem_used_after = lsregion_used(&env->mem_env.allocator);
 	assert(mem_used_after >= mem_used_before);
-	size_t write_size = mem_used_after - mem_used_before;
-	vy_quota_commit_use(&env->quota, tx->write_size, write_size);
-
-	if (rc != 0)
-		return -1;
-
-	env->quota_use_curr += write_size;
-	return 0;
+	vy_quota_commit_use(&env->quota, tx->write_size,
+			    mem_used_after - mem_used_before);
+	return rc;
 }
 
 static void
@@ -2418,41 +2367,6 @@ vinyl_engine_rollback_statement(struct engine *engine, struct txn *txn,
 /** {{{ Environment */
 
 static void
-vy_env_quota_timer_cb(ev_loop *loop, ev_timer *timer, int events)
-{
-	(void)loop;
-	(void)events;
-
-	struct vy_env *e = timer->data;
-
-	/*
-	 * Update the quota use rate with the new measurement.
-	 */
-	const double weight = 1 - exp(-VY_QUOTA_UPDATE_INTERVAL /
-				      (double)VY_QUOTA_RATE_AVG_PERIOD);
-	e->quota_use_rate = (1 - weight) * e->quota_use_rate +
-		weight * e->quota_use_curr / VY_QUOTA_UPDATE_INTERVAL;
-	e->quota_use_curr = 0;
-
-	/*
-	 * Due to log structured nature of the lsregion allocator,
-	 * which is used for allocating statements, we cannot free
-	 * memory in chunks, only all at once. Therefore we should
-	 * configure the watermark so that by the time we hit the
-	 * limit, all memory have been dumped, i.e.
-	 *
-	 *   limit - watermark      watermark
-	 *   ----------------- = --------------
-	 *     quota_use_rate    dump_bandwidth
-	 */
-	int64_t dump_bandwidth = vy_dump_bandwidth(e);
-	size_t watermark = ((double)e->quota.limit * dump_bandwidth /
-			    (dump_bandwidth + e->quota_use_rate + 1));
-
-	vy_quota_set_watermark(&e->quota, watermark);
-}
-
-static void
 vy_env_quota_exceeded_cb(struct vy_quota *quota)
 {
 	struct vy_env *env = container_of(quota, struct vy_env, quota);
@@ -2498,14 +2412,9 @@ vy_env_dump_complete_cb(struct vy_scheduler *scheduler,
 	size_t mem_used_after = lsregion_used(allocator);
 	assert(mem_used_after <= mem_used_before);
 	size_t mem_dumped = mem_used_before - mem_used_after;
-	vy_quota_dump(quota, mem_dumped);
+	vy_quota_dump(quota, mem_dumped, dump_duration);
 
 	say_info("dumped %zu bytes in %.1f sec", mem_dumped, dump_duration);
-
-	/* Account dump bandwidth. */
-	if (dump_duration > 0)
-		histogram_collect(env->dump_bw,
-				  mem_dumped / dump_duration);
 }
 
 static struct vy_squash_queue *
@@ -2520,21 +2429,6 @@ static struct vy_env *
 vy_env_new(const char *path, size_t memory,
 	   int read_threads, int write_threads, bool force_recovery)
 {
-	enum { KB = 1000, MB = 1000 * 1000 };
-	static int64_t dump_bandwidth_buckets[] = {
-		100 * KB, 200 * KB, 300 * KB, 400 * KB, 500 * KB,
-		  1 * MB,   2 * MB,   3 * MB,   4 * MB,   5 * MB,
-		 10 * MB,  20 * MB,  30 * MB,  40 * MB,  50 * MB,
-		 60 * MB,  70 * MB,  80 * MB,  90 * MB, 100 * MB,
-		110 * MB, 120 * MB, 130 * MB, 140 * MB, 150 * MB,
-		160 * MB, 170 * MB, 180 * MB, 190 * MB, 200 * MB,
-		220 * MB, 240 * MB, 260 * MB, 280 * MB, 300 * MB,
-		320 * MB, 340 * MB, 360 * MB, 380 * MB, 400 * MB,
-		450 * MB, 500 * MB, 550 * MB, 600 * MB, 650 * MB,
-		700 * MB, 750 * MB, 800 * MB, 850 * MB, 900 * MB,
-		950 * MB, 1000 * MB,
-	};
-
 	struct vy_env *e = malloc(sizeof(*e));
 	if (unlikely(e == NULL)) {
 		diag_set(OutOfMemory, sizeof(*e), "malloc", "struct vy_env");
@@ -2554,19 +2448,6 @@ vy_env_new(const char *path, size_t memory,
 		goto error_path;
 	}
 
-	e->dump_bw = histogram_new(dump_bandwidth_buckets,
-				   lengthof(dump_bandwidth_buckets));
-	if (e->dump_bw == NULL) {
-		diag_set(OutOfMemory, 0, "histogram_new",
-			 "dump bandwidth histogram");
-		goto error_dump_bw;
-	}
-	/*
-	 * Until we dump anything, assume bandwidth to be 10 MB/s,
-	 * which should be fine for initial guess.
-	 */
-	histogram_collect(e->dump_bw, 10 * MB);
-
 	e->xm = tx_manager_new();
 	if (e->xm == NULL)
 		goto error_xm;
@@ -2584,18 +2465,18 @@ vy_env_new(const char *path, size_t memory,
 			      vy_squash_schedule, e) != 0)
 		goto error_lsm_env;
 
+	if (vy_quota_create(&e->quota, vy_env_quota_exceeded_cb) != 0)
+		goto error_quota;
+
 	struct slab_cache *slab_cache = cord_slab_cache();
 	mempool_create(&e->iterator_pool, slab_cache,
 	               sizeof(struct vinyl_iterator));
-	vy_quota_create(&e->quota, vy_env_quota_exceeded_cb);
-	ev_timer_init(&e->quota_timer, vy_env_quota_timer_cb, 0,
-		      VY_QUOTA_UPDATE_INTERVAL);
-	e->quota_timer.data = e;
-	ev_timer_start(loop(), &e->quota_timer);
 	vy_cache_env_create(&e->cache_env, slab_cache);
 	vy_run_env_create(&e->run_env);
 	vy_log_init(e->path);
 	return e;
+error_quota:
+	vy_lsm_env_destroy(&e->lsm_env);
 error_lsm_env:
 	vy_mem_env_destroy(&e->mem_env);
 	vy_scheduler_destroy(&e->scheduler);
@@ -2603,8 +2484,6 @@ error_lsm_env:
 error_squash_queue:
 	tx_manager_delete(e->xm);
 error_xm:
-	histogram_delete(e->dump_bw);
-error_dump_bw:
 	free(e->path);
 error_path:
 	free(e);
@@ -2614,12 +2493,10 @@ error_path:
 static void
 vy_env_delete(struct vy_env *e)
 {
-	ev_timer_stop(loop(), &e->quota_timer);
 	vy_scheduler_destroy(&e->scheduler);
 	vy_squash_queue_delete(e->squash_queue);
 	tx_manager_delete(e->xm);
 	free(e->path);
-	histogram_delete(e->dump_bw);
 	mempool_destroy(&e->iterator_pool);
 	vy_run_env_destroy(&e->run_env);
 	vy_lsm_env_destroy(&e->lsm_env);
diff --git a/src/box/vy_quota.c b/src/box/vy_quota.c
index 6e93d652..c8177c69 100644
--- a/src/box/vy_quota.c
+++ b/src/box/vy_quota.c
@@ -32,30 +32,127 @@
 
 #include <assert.h>
 #include <stddef.h>
+#include <stdint.h>
+#include <math.h>
 #include <tarantool_ev.h>
 
+#include "diag.h"
 #include "fiber.h"
 #include "fiber_cond.h"
 #include "say.h"
+#include "histogram.h"
+#include "trivia/util.h"
 
-void
+enum {
+	/**
+	 * Time interval between successive updates of
+	 * quota watermark and use rate, in seconds.
+	 */
+	VY_QUOTA_UPDATE_INTERVAL = 1,
+	/**
+	 * Period of time over which the quota use rate
+	 * is averaged, in seconds.
+	 */
+	VY_QUOTA_RATE_AVG_PERIOD = 5,
+};
+
+static void
+vy_quota_timer_cb(ev_loop *loop, ev_timer *timer, int events)
+{
+	(void)loop;
+	(void)events;
+
+	struct vy_quota *q = timer->data;
+
+	/*
+	 * Update the quota use rate with the new measurement.
+	 */
+	const double weight = 1 - exp(-VY_QUOTA_UPDATE_INTERVAL /
+				      (double)VY_QUOTA_RATE_AVG_PERIOD);
+	q->use_rate = (1 - weight) * q->use_rate +
+		weight * q->use_curr / VY_QUOTA_UPDATE_INTERVAL;
+	q->use_curr = 0;
+
+	/*
+	 * Due to log structured nature of the lsregion allocator,
+	 * which is used for allocating statements, we cannot free
+	 * memory in chunks, only all at once. Therefore we should
+	 * configure the watermark so that by the time we hit the
+	 * limit, all memory have been dumped, i.e.
+	 *
+	 *   limit - watermark      watermark
+	 *   ----------------- = --------------
+	 *        use_rate       dump_bandwidth
+	 */
+	size_t dump_bandwidth = vy_quota_dump_bandwidth(q);
+	q->watermark = ((double)q->limit * dump_bandwidth /
+			(dump_bandwidth + q->use_rate + 1));
+	if (q->used >= q->watermark)
+		q->quota_exceeded_cb(q);
+}
+
+int
 vy_quota_create(struct vy_quota *q, vy_quota_exceeded_f quota_exceeded_cb)
 {
+	enum { KB = 1000, MB = 1000 * 1000 };
+	static int64_t dump_bandwidth_buckets[] = {
+		100 * KB, 200 * KB, 300 * KB, 400 * KB, 500 * KB,
+		  1 * MB,   2 * MB,   3 * MB,   4 * MB,   5 * MB,
+		 10 * MB,  20 * MB,  30 * MB,  40 * MB,  50 * MB,
+		 60 * MB,  70 * MB,  80 * MB,  90 * MB, 100 * MB,
+		110 * MB, 120 * MB, 130 * MB, 140 * MB, 150 * MB,
+		160 * MB, 170 * MB, 180 * MB, 190 * MB, 200 * MB,
+		220 * MB, 240 * MB, 260 * MB, 280 * MB, 300 * MB,
+		320 * MB, 340 * MB, 360 * MB, 380 * MB, 400 * MB,
+		450 * MB, 500 * MB, 550 * MB, 600 * MB, 650 * MB,
+		700 * MB, 750 * MB, 800 * MB, 850 * MB, 900 * MB,
+		950 * MB, 1000 * MB,
+	};
+
+	q->dump_bw = histogram_new(dump_bandwidth_buckets,
+				   lengthof(dump_bandwidth_buckets));
+	if (q->dump_bw == NULL) {
+		diag_set(OutOfMemory, 0, "histogram_new",
+			 "dump bandwidth histogram");
+		return -1;
+	}
+	/*
+	 * Until we dump anything, assume bandwidth to be 10 MB/s,
+	 * which should be fine for initial guess.
+	 */
+	histogram_collect(q->dump_bw, 10 * MB);
+
 	q->limit = SIZE_MAX;
 	q->watermark = SIZE_MAX;
 	q->used = 0;
+	q->use_curr = 0;
+	q->use_rate = 0;
 	q->too_long_threshold = TIMEOUT_INFINITY;
 	q->quota_exceeded_cb = quota_exceeded_cb;
 	fiber_cond_create(&q->cond);
+	ev_timer_init(&q->timer, vy_quota_timer_cb, 0,
+		      VY_QUOTA_UPDATE_INTERVAL);
+	q->timer.data = q;
+	ev_timer_start(loop(), &q->timer);
+	return 0;
 }
 
 void
 vy_quota_destroy(struct vy_quota *q)
 {
+	ev_timer_stop(loop(), &q->timer);
+	histogram_delete(q->dump_bw);
 	fiber_cond_broadcast(&q->cond);
 	fiber_cond_destroy(&q->cond);
 }
 
+size_t
+vy_quota_dump_bandwidth(struct vy_quota *q)
+{
+	/* See comment to vy_quota::dump_bw. */
+	return histogram_percentile(q->dump_bw, 10);
+}
+
 void
 vy_quota_set_limit(struct vy_quota *q, size_t limit)
 {
@@ -66,27 +163,24 @@ vy_quota_set_limit(struct vy_quota *q, size_t limit)
 }
 
 void
-vy_quota_set_watermark(struct vy_quota *q, size_t watermark)
-{
-	q->watermark = watermark;
-	if (q->used >= watermark)
-		q->quota_exceeded_cb(q);
-}
-
-void
 vy_quota_force_use(struct vy_quota *q, size_t size)
 {
 	q->used += size;
+	q->use_curr += size;
 	if (q->used >= q->watermark)
 		q->quota_exceeded_cb(q);
 }
 
 void
-vy_quota_dump(struct vy_quota *q, size_t size)
+vy_quota_dump(struct vy_quota *q, size_t size, double duration)
 {
 	assert(q->used >= size);
 	q->used -= size;
 	fiber_cond_broadcast(&q->cond);
+
+	/* Account dump bandwidth. */
+	if (duration > 0)
+		histogram_collect(q->dump_bw, size / duration);
 }
 
 int
@@ -107,6 +201,7 @@ vy_quota_try_use(struct vy_quota *q, size_t size, double timeout)
 	if (q->used + size > q->limit)
 		return -1;
 	q->used += size;
+	q->use_curr += size;
 	if (q->used >= q->watermark)
 		q->quota_exceeded_cb(q);
 	return 0;
@@ -119,6 +214,10 @@ vy_quota_commit_use(struct vy_quota *q, size_t reserved, size_t used)
 		size_t excess = reserved - used;
 		assert(q->used >= excess);
 		q->used -= excess;
+		if (q->use_curr >= excess)
+			q->use_curr -= excess;
+		else /* was reset by timeout */
+			q->use_curr = 0;
 		fiber_cond_broadcast(&q->cond);
 	}
 	if (reserved < used)
diff --git a/src/box/vy_quota.h b/src/box/vy_quota.h
index cf70b1ab..3a7a24e7 100644
--- a/src/box/vy_quota.h
+++ b/src/box/vy_quota.h
@@ -32,6 +32,7 @@
  */
 
 #include <stddef.h>
+#include <tarantool_ev.h>
 #include "fiber_cond.h"
 
 #if defined(__cplusplus)
@@ -39,6 +40,7 @@ extern "C" {
 #endif /* defined(__cplusplus) */
 
 struct vy_quota;
+struct histogram;
 
 typedef void
 (*vy_quota_exceeded_f)(struct vy_quota *quota);
@@ -76,14 +78,43 @@ struct vy_quota {
 	 * It is supposed to trigger memory reclaim.
 	 */
 	vy_quota_exceeded_f quota_exceeded_cb;
+	/** Timer for updating quota watermark. */
+	ev_timer timer;
+	/**
+	 * Amount of quota used since the last
+	 * invocation of the quota timer callback.
+	 */
+	size_t use_curr;
+	/**
+	 * Quota use rate, in bytes per second.
+	 * Calculated as exponentially weighted
+	 * moving average of use_curr.
+	 */
+	size_t use_rate;
+	/**
+	 * Dump bandwidth is needed for calculating the quota watermark.
+	 * The higher the bandwidth, the later we can start dumping w/o
+	 * suffering from transaction throttling. So we want to be very
+	 * conservative about estimating the bandwidth.
+	 *
+	 * To make sure we don't overestimate it, we maintain a
+	 * histogram of all observed measurements and assume the
+	 * bandwidth to be equal to the 10th percentile, i.e. the
+	 * best result among 10% worst measurements.
+	 */
+	struct histogram *dump_bw;
 };
 
-void
+int
 vy_quota_create(struct vy_quota *q, vy_quota_exceeded_f quota_exceeded_cb);
 
 void
 vy_quota_destroy(struct vy_quota *q);
 
+/** Return quota dump bandwidth. */
+size_t
+vy_quota_dump_bandwidth(struct vy_quota *q);
+
 /**
  * Set memory limit. If current memory usage exceeds
  * the new limit, invoke the callback.
@@ -92,13 +123,6 @@ void
 vy_quota_set_limit(struct vy_quota *q, size_t limit);
 
 /**
- * Set memory watermark. If current memory usage exceeds
- * the new watermark, invoke the callback.
- */
-void
-vy_quota_set_watermark(struct vy_quota *q, size_t watermark);
-
-/**
  * Consume @size bytes of memory. In contrast to vy_quota_try_use()
  * this function does not throttle the caller.
  */
@@ -108,9 +132,12 @@ vy_quota_force_use(struct vy_quota *q, size_t size);
 /**
  * Function called on dump completion to release quota after
  * freeing memory.
+ *
+ * @size: size of dumped memory.
+ * @duration: how long memory dump took.
  */
 void
-vy_quota_dump(struct vy_quota *q, size_t size);
+vy_quota_dump(struct vy_quota *q, size_t size, double duration);
 
 /**
  * Try to consume @size bytes of memory, throttle the caller
-- 
2.11.0




More information about the Tarantool-patches mailing list