[PATCH 07/12] vinyl: move global dump/compaction statistics to scheduler

Vladimir Davydov vdavydov.dev at gmail.com
Wed Jan 16 19:36:50 MSK 2019


On Tue, Jan 15, 2019 at 05:17:16PM +0300, Vladimir Davydov wrote:
> +/**
> + * Account an LSM tree to the compaction queue stats.
> + * Called after updating LSM tree compaction priority.
> + */
> +static void
> +vy_scheduler_acct_lsm(struct vy_scheduler *scheduler, struct vy_lsm *lsm)
> +{
> +	if (lsm->is_dropped)
> +		return;
> +	scheduler->stat.compaction_queue += lsm->stat.disk.compaction.queue.bytes;
> +}
> +
> +/**
> + * Unaccount an LSM tree from the compaction queue stats.
> + * Called before updating LSM tree compaction priority.
> + */
> +static void
> +vy_scheduler_unacct_lsm(struct vy_scheduler *scheduler, struct vy_lsm *lsm)
> +{
> +	if (lsm->is_dropped)
> +		return;
> +	scheduler->stat.compaction_queue -= lsm->stat.disk.compaction.queue.bytes;
> +}

> @@ -1636,8 +1682,11 @@ vy_task_compaction_new(struct vy_scheduler *scheduler, struct vy_worker *worker,
>  	range = container_of(range_node, struct vy_range, heap_node);
>  	assert(range->compaction_priority > 1);
>  
> -	if (vy_lsm_split_range(lsm, range) ||
> -	    vy_lsm_coalesce_range(lsm, range)) {
> +	vy_scheduler_unacct_lsm(scheduler, lsm);
> +	bool lsm_changed = (vy_lsm_split_range(lsm, range) ||
> +			    vy_lsm_coalesce_range(lsm, range));
> +	vy_scheduler_acct_lsm(scheduler, lsm);
> +	if (lsm_changed) {
>  		vy_scheduler_update_lsm(scheduler, lsm);
>  		return 0;
>  	}

Come to think of it, moving compaction_queue from vy_lsm_env to
vy_scheduler isn't worth the complexity it introduces. Let's leave
it in vy_lsm_env along with disk_data_size, disk_index_size, and
last_level_size, but report it in the 'scheduler' section of
box.info.vinyl(). The new version of the patch goes right below.
It doesn't affect the rest of the series.
---

>From 1d23a1b10ac99fa197c443aa1f621535549b1f42 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov.dev at gmail.com>
Date: Mon, 14 Jan 2019 16:49:42 +0300
Subject: [PATCH] vinyl: move global dump/compaction statistics to scheduler

Although it's convenient to maintain dump/compaction input/output
metrics in vy_lsm_env, semantically it's incorrect as those metrics
characterize the scheduler not the LSM environment. Also, we can't
easily extend those stats with e.g. the number of completed dumps or
the number of tasks in progress, because those are only known to the
scheduler.

That said, let's introduce 'scheduler' section in box.stat.vinyl() and
move dump/compaction stats from 'disk' to the new section. Let's also
move the stats accounting from vy_lsm.c to vy_scheduler.c. The 'disk'
section now stores only the size of data and index on disk and no
cumulative statistics, which makes it similar to the 'memory' section.

Note, this patch flattens the stats (disk.compaction.input is moved to
scheduler.compaction_input and so forth), because all other global stats
are reported without using nested tables.

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 5965700b..a89a1c27 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -245,6 +245,21 @@ static struct trigger on_replace_vinyl_deferred_delete;
 /** {{{ Introspection */
 
 static void
+vy_info_append_scheduler(struct vy_env *env, struct info_handler *h)
+{
+	struct vy_scheduler_stat *stat = &env->scheduler.stat;
+
+	info_table_begin(h, "scheduler");
+	info_append_int(h, "dump_input", stat->dump_input);
+	info_append_int(h, "dump_output", stat->dump_output);
+	info_append_int(h, "compaction_input", stat->compaction_input);
+	info_append_int(h, "compaction_output", stat->compaction_output);
+	info_append_int(h, "compaction_queue",
+			env->lsm_env.compaction_queue_size);
+	info_table_end(h); /* scheduler */
+}
+
+static void
 vy_info_append_regulator(struct vy_env *env, struct info_handler *h)
 {
 	struct vy_regulator *r = &env->regulator;
@@ -295,24 +310,9 @@ vy_info_append_memory(struct vy_env *env, struct info_handler *h)
 static void
 vy_info_append_disk(struct vy_env *env, struct info_handler *h)
 {
-	struct vy_disk_stat *stat = &env->lsm_env.disk_stat;
-
 	info_table_begin(h, "disk");
-
-	info_append_int(h, "data", stat->data);
-	info_append_int(h, "index", stat->index);
-
-	info_table_begin(h, "dump");
-	info_append_int(h, "input", stat->dump.input);
-	info_append_int(h, "output", stat->dump.output);
-	info_table_end(h); /* dump */
-
-	info_table_begin(h, "compaction");
-	info_append_int(h, "input", stat->compaction.input);
-	info_append_int(h, "output", stat->compaction.output);
-	info_append_int(h, "queue", stat->compaction.queue);
-	info_table_end(h); /* compaction */
-
+	info_append_int(h, "data", env->lsm_env.disk_data_size);
+	info_append_int(h, "index", env->lsm_env.disk_index_size);
 	info_table_end(h); /* disk */
 }
 
@@ -325,6 +325,7 @@ vinyl_engine_stat(struct vinyl_engine *vinyl, struct info_handler *h)
 	vy_info_append_tx(env, h);
 	vy_info_append_memory(env, h);
 	vy_info_append_disk(env, h);
+	vy_info_append_scheduler(env, h);
 	vy_info_append_regulator(env, h);
 	info_end(h);
 }
@@ -514,11 +515,7 @@ vinyl_engine_reset_stat(struct engine *engine)
 	struct tx_manager *xm = env->xm;
 	memset(&xm->stat, 0, sizeof(xm->stat));
 
-	struct vy_disk_stat *disk_stat = &env->lsm_env.disk_stat;
-	disk_stat->dump.input = 0;
-	disk_stat->dump.output = 0;
-	disk_stat->compaction.input = 0;
-	disk_stat->compaction.output = 0;
+	vy_scheduler_reset_stat(&env->scheduler);
 }
 
 /** }}} Introspection */
diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c
index abadab5c..2aafe354 100644
--- a/src/box/vy_lsm.c
+++ b/src/box/vy_lsm.c
@@ -239,7 +239,7 @@ vy_lsm_delete(struct vy_lsm *lsm)
 	assert(lsm->env->lsm_count > 0);
 
 	lsm->env->lsm_count--;
-	lsm->env->disk_stat.compaction.queue -=
+	lsm->env->compaction_queue_size -=
 			lsm->stat.disk.compaction.queue.bytes;
 
 	if (lsm->pk != NULL)
@@ -691,11 +691,11 @@ vy_lsm_add_run(struct vy_lsm *lsm, struct vy_run *run)
 
 	/* Data size is consistent with space.bsize. */
 	if (lsm->index_id == 0)
-		env->disk_stat.data += run->count.bytes;
+		env->disk_data_size += run->count.bytes;
 	/* Index size is consistent with index.bsize. */
-	env->disk_stat.index += bloom_size + page_index_size;
+	env->disk_index_size += bloom_size + page_index_size;
 	if (lsm->index_id > 0)
-		env->disk_stat.index += run->count.bytes;
+		env->disk_index_size += run->count.bytes;
 }
 
 void
@@ -720,11 +720,11 @@ vy_lsm_remove_run(struct vy_lsm *lsm, struct vy_run *run)
 
 	/* Data size is consistent with space.bsize. */
 	if (lsm->index_id == 0)
-		env->disk_stat.data -= run->count.bytes;
+		env->disk_data_size -= run->count.bytes;
 	/* Index size is consistent with index.bsize. */
-	env->disk_stat.index -= bloom_size + page_index_size;
+	env->disk_index_size -= bloom_size + page_index_size;
 	if (lsm->index_id > 0)
-		env->disk_stat.index -= run->count.bytes;
+		env->disk_index_size -= run->count.bytes;
 }
 
 void
@@ -751,7 +751,7 @@ vy_lsm_acct_range(struct vy_lsm *lsm, struct vy_range *range)
 	histogram_collect(lsm->run_hist, range->slice_count);
 	vy_disk_stmt_counter_add(&lsm->stat.disk.compaction.queue,
 				 &range->compaction_queue);
-	lsm->env->disk_stat.compaction.queue += range->compaction_queue.bytes;
+	lsm->env->compaction_queue_size += range->compaction_queue.bytes;
 }
 
 void
@@ -760,7 +760,7 @@ vy_lsm_unacct_range(struct vy_lsm *lsm, struct vy_range *range)
 	histogram_discard(lsm->run_hist, range->slice_count);
 	vy_disk_stmt_counter_sub(&lsm->stat.disk.compaction.queue,
 				 &range->compaction_queue);
-	lsm->env->disk_stat.compaction.queue -= range->compaction_queue.bytes;
+	lsm->env->compaction_queue_size -= range->compaction_queue.bytes;
 }
 
 void
@@ -771,9 +771,6 @@ vy_lsm_acct_dump(struct vy_lsm *lsm,
 	lsm->stat.disk.dump.count++;
 	vy_stmt_counter_add(&lsm->stat.disk.dump.input, input);
 	vy_disk_stmt_counter_add(&lsm->stat.disk.dump.output, output);
-
-	lsm->env->disk_stat.dump.input += input->bytes;
-	lsm->env->disk_stat.dump.output += output->bytes;
 }
 
 void
@@ -784,9 +781,6 @@ vy_lsm_acct_compaction(struct vy_lsm *lsm,
 	lsm->stat.disk.compaction.count++;
 	vy_disk_stmt_counter_add(&lsm->stat.disk.compaction.input, input);
 	vy_disk_stmt_counter_add(&lsm->stat.disk.compaction.output, output);
-
-	lsm->env->disk_stat.compaction.input += input->bytes;
-	lsm->env->disk_stat.compaction.output += output->bytes;
 }
 
 int
diff --git a/src/box/vy_lsm.h b/src/box/vy_lsm.h
index e7487995..6b51c5c7 100644
--- a/src/box/vy_lsm.h
+++ b/src/box/vy_lsm.h
@@ -91,8 +91,27 @@ struct vy_lsm_env {
 	size_t bloom_size;
 	/** Size of memory used for page index. */
 	size_t page_index_size;
-	/** Global disk statistics. */
-	struct vy_disk_stat disk_stat;
+	/**
+	 * Size of disk space used for storing data of all spaces,
+	 * in bytes, without taking into account disk compression.
+	 * By 'data' we mean statements stored in primary indexes
+	 * only, which is consistent with space.bsize().
+	 */
+	int64_t disk_data_size;
+	/**
+	 * Size of disk space used for indexing data in all spaces,
+	 * in bytes, without taking into account disk compression.
+	 * This consists of page indexes and bloom filters, which
+	 * are stored in .index files, as well as the total size of
+	 * statements stored in secondary index .run files, which
+	 * is consistent with index.bsize().
+	 */
+	int64_t disk_index_size;
+	/**
+	 * Size of data of all spaces that need to be compacted,
+	 * in bytes, without taking into account disk compression.
+	 */
+	int64_t compaction_queue_size;
 	/** Memory pool for vy_history_node allocations. */
 	struct mempool history_node_pool;
 };
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index f431eb24..f65f4b14 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -501,6 +501,16 @@ vy_scheduler_destroy(struct vy_scheduler *scheduler)
 }
 
 void
+vy_scheduler_reset_stat(struct vy_scheduler *scheduler)
+{
+	struct vy_scheduler_stat *stat = &scheduler->stat;
+	stat->dump_input = 0;
+	stat->dump_output = 0;
+	stat->compaction_input = 0;
+	stat->compaction_output = 0;
+}
+
+void
 vy_scheduler_add_lsm(struct vy_scheduler *scheduler, struct vy_lsm *lsm)
 {
 	assert(!lsm->is_dropped);
@@ -1227,6 +1237,8 @@ delete_mems:
 	}
 	lsm->dump_lsn = MAX(lsm->dump_lsn, dump_lsn);
 	vy_lsm_acct_dump(lsm, &dump_input, &dump_output);
+	scheduler->stat.dump_input += dump_input.bytes;
+	scheduler->stat.dump_output += dump_output.bytes;
 
 	/* The iterator has been cleaned up in a worker thread. */
 	task->wi->iface->close(task->wi);
@@ -1564,6 +1576,8 @@ vy_task_compaction_complete(struct vy_task *task)
 	vy_range_update_compaction_priority(range, &lsm->opts);
 	vy_lsm_acct_range(lsm, range);
 	vy_lsm_acct_compaction(lsm, &compaction_input, &compaction_output);
+	scheduler->stat.compaction_input += compaction_input.bytes;
+	scheduler->stat.compaction_output += compaction_output.bytes;
 
 	/*
 	 * Unaccount unused runs and delete compacted slices.
diff --git a/src/box/vy_scheduler.h b/src/box/vy_scheduler.h
index 5b09f964..2d4352d7 100644
--- a/src/box/vy_scheduler.h
+++ b/src/box/vy_scheduler.h
@@ -41,6 +41,7 @@
 #define HEAP_FORWARD_DECLARATION
 #include "salad/heap.h"
 #include "salad/stailq.h"
+#include "vy_stat.h"
 
 #if defined(__cplusplus)
 extern "C" {
@@ -139,6 +140,8 @@ struct vy_scheduler {
 	double dump_start;
 	/** Signaled on dump round completion. */
 	struct fiber_cond dump_cond;
+	/** Scheduler statistics. */
+	struct vy_scheduler_stat stat;
 	/**
 	 * Function called by the scheduler upon dump round
 	 * completion. It is supposed to free memory released
@@ -184,6 +187,12 @@ void
 vy_scheduler_destroy(struct vy_scheduler *scheduler);
 
 /**
+ * Reset scheduler statistics (called by box.stat.reset).
+ */
+void
+vy_scheduler_reset_stat(struct vy_scheduler *scheduler);
+
+/**
  * Add an LSM tree to scheduler dump/compaction queues.
  */
 void
diff --git a/src/box/vy_stat.h b/src/box/vy_stat.h
index 7ed55ff5..62ef2b75 100644
--- a/src/box/vy_stat.h
+++ b/src/box/vy_stat.h
@@ -206,23 +206,20 @@ struct vy_tx_stat {
 };
 
 /**
- * Global disk statistics.
+ * Scheduler statistics.
  *
- * Fields correspond to those of per LSM tree statistics.
- * All counters are given in bytes, uncompressed.
+ * All byte counters are given without taking into account
+ * disk compression.
  */
-struct vy_disk_stat {
-	int64_t data;
-	int64_t index;
-	struct {
-		int64_t input;
-		int64_t output;
-	} dump;
-	struct {
-		int64_t input;
-		int64_t output;
-		int64_t queue;
-	} compaction;
+struct vy_scheduler_stat {
+	/** Number of bytes read by dump tasks. */
+	int64_t dump_input;
+	/** Number of bytes written by dump tasks. */
+	int64_t dump_output;
+	/** Number of bytes read by compaction tasks. */
+	int64_t compaction_input;
+	/** Number of bytes written by compaction tasks. */
+	int64_t compaction_output;
 };
 
 static inline int
diff --git a/test/vinyl/errinj_stat.result b/test/vinyl/errinj_stat.result
index 9d7f64e9..fe379cef 100644
--- a/test/vinyl/errinj_stat.result
+++ b/test/vinyl/errinj_stat.result
@@ -50,7 +50,7 @@ i:stat().disk.compaction.queue -- none
   rows: 0
   bytes: 0
 ...
-i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue
+i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue
 ---
 - true
 ...
@@ -71,7 +71,7 @@ i:stat().disk.compaction.queue -- 30 statements
   rows: 30
   bytes: 411
 ...
-i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue
+i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue
 ---
 - true
 ...
@@ -85,7 +85,7 @@ i:stat().disk.compaction.queue -- 40 statements
   rows: 40
   bytes: 548
 ...
-i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue
+i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue
 ---
 - true
 ...
@@ -99,7 +99,7 @@ i:stat().disk.compaction.queue -- 50 statements
   rows: 50
   bytes: 685
 ...
-i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue
+i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue
 ---
 - true
 ...
@@ -113,7 +113,7 @@ i:stat().disk.compaction.queue -- 50 statements
   rows: 50
   bytes: 685
 ...
-i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue
+i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue
 ---
 - true
 ...
diff --git a/test/vinyl/errinj_stat.test.lua b/test/vinyl/errinj_stat.test.lua
index 6cd95cf9..3556f50a 100644
--- a/test/vinyl/errinj_stat.test.lua
+++ b/test/vinyl/errinj_stat.test.lua
@@ -22,21 +22,21 @@ i = s:create_index('pk', {run_count_per_level = 2})
 function dump() for i = 1, 10 do s:replace{i} end box.snapshot() end
 dump()
 i:stat().disk.compaction.queue -- none
-i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue
+i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue
 errinj.set('ERRINJ_VY_COMPACTION_DELAY', true)
 dump()
 dump()
 i:stat().disk.compaction.queue -- 30 statements
-i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue
+i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue
 dump()
 i:stat().disk.compaction.queue -- 40 statements
-i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue
+i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue
 dump()
 i:stat().disk.compaction.queue -- 50 statements
-i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue
+i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue
 box.stat.reset() -- doesn't affect queue size
 i:stat().disk.compaction.queue -- 50 statements
-i:stat().disk.compaction.queue.bytes == box.stat.vinyl().disk.compaction.queue
+i:stat().disk.compaction.queue.bytes == box.stat.vinyl().scheduler.compaction_queue
 errinj.set('ERRINJ_VY_COMPACTION_DELAY', false)
 while i:stat().disk.compaction.count < 2 do fiber.sleep(0.01) end
 i:stat().disk.compaction.queue -- none
diff --git a/test/vinyl/stat.result b/test/vinyl/stat.result
index 68aea30b..16f01945 100644
--- a/test/vinyl/stat.result
+++ b/test/vinyl/stat.result
@@ -224,30 +224,29 @@ istat()
 ...
 gstat()
 ---
-- disk:
-    dump:
-      input: 0
-      output: 0
-    compaction:
-      input: 0
-      output: 0
-      queue: 0
-    data: 0
-    index: 0
+- tx:
+    conflict: 0
+    commit: 0
+    rollback: 0
+    statements: 0
+    transactions: 0
+    gap_locks: 0
+    read_views: 0
   memory:
     tuple_cache: 0
     tx: 0
     level0: 0
     page_index: 0
     bloom_filter: 0
-  tx:
-    conflict: 0
-    commit: 0
-    rollback: 0
-    statements: 0
-    transactions: 0
-    gap_locks: 0
-    read_views: 0
+  disk:
+    data: 0
+    index: 0
+  scheduler:
+    compaction_output: 0
+    compaction_queue: 0
+    dump_output: 0
+    dump_input: 0
+    compaction_input: 0
 ...
 --
 -- Index statistics.
@@ -701,19 +700,19 @@ box.rollback()
 -- Global statistics.
 --
 -- dump and compaction totals
-gstat().disk.dump.input == istat().disk.dump.input.bytes
+gstat().scheduler.dump_input == istat().disk.dump.input.bytes
 ---
 - true
 ...
-gstat().disk.dump.output == istat().disk.dump.output.bytes
+gstat().scheduler.dump_output == istat().disk.dump.output.bytes
 ---
 - true
 ...
-gstat().disk.compaction.input == istat().disk.compaction.input.bytes
+gstat().scheduler.compaction_input == istat().disk.compaction.input.bytes
 ---
 - true
 ...
-gstat().disk.compaction.output == istat().disk.compaction.output.bytes
+gstat().scheduler.compaction_output == istat().disk.compaction.output.bytes
 ---
 - true
 ...
@@ -1074,30 +1073,29 @@ istat()
 ...
 gstat()
 ---
-- disk:
-    dump:
-      input: 0
-      output: 0
-    compaction:
-      input: 0
-      output: 0
-      queue: 0
-    data: 104300
-    index: 1190
+- tx:
+    conflict: 0
+    commit: 0
+    rollback: 0
+    statements: 0
+    transactions: 0
+    gap_locks: 0
+    read_views: 0
   memory:
     tuple_cache: 14313
     tx: 0
     level0: 262583
     page_index: 1050
     bloom_filter: 140
-  tx:
-    conflict: 0
-    commit: 0
-    rollback: 0
-    statements: 0
-    transactions: 0
-    gap_locks: 0
-    read_views: 0
+  disk:
+    data: 104300
+    index: 1190
+  scheduler:
+    compaction_output: 0
+    compaction_queue: 0
+    dump_output: 0
+    dump_input: 0
+    compaction_input: 0
 ...
 s:drop()
 ---
diff --git a/test/vinyl/stat.test.lua b/test/vinyl/stat.test.lua
index 0173cf89..6708fcb9 100644
--- a/test/vinyl/stat.test.lua
+++ b/test/vinyl/stat.test.lua
@@ -205,10 +205,10 @@ box.rollback()
 --
 
 -- dump and compaction totals
-gstat().disk.dump.input == istat().disk.dump.input.bytes
-gstat().disk.dump.output == istat().disk.dump.output.bytes
-gstat().disk.compaction.input == istat().disk.compaction.input.bytes
-gstat().disk.compaction.output == istat().disk.compaction.output.bytes
+gstat().scheduler.dump_input == istat().disk.dump.input.bytes
+gstat().scheduler.dump_output == istat().disk.dump.output.bytes
+gstat().scheduler.compaction_input == istat().disk.compaction.input.bytes
+gstat().scheduler.compaction_output == istat().disk.compaction.output.bytes
 
 -- use memory
 st = gstat()



More information about the Tarantool-patches mailing list