[PATCH 5/9] vinyl: keep track of dumps per compaction for each LSM tree

Vladimir Davydov vdavydov.dev at gmail.com
Mon Jan 21 00:17:04 MSK 2019


This patch adds dumps_per_compaction metric to per index statistics. It
shows the number of dumps it takes to trigger a major compaction of a
range in a given LSM tree. We need it to automatically choose the
optimal number of ranges that would smooth out the load generated by
range compaction.

To calculate this metric, we assign dump_count to each run. It shows how
many dumps it took to create the run. If a run was created by a memory
dump, it is set to 1. If a run was created by a minor compaction, it is
set to the sum of dump counts of compacted ranges. If a run was created
by a major compaction, it is set to the sum of dump counts of compacted
ranges minus dump count of the last level run. The dump_count is stored
in vylog.

This allows us to estimate the number of dumps that triggers compaction
in a range as dump_count of the last level run stored in the range.
Finally, we report dumps_per_compaction of an LSM tree as the minimal
dumps_per_compaction among all ranges constituting the tree. To achieve
that, we maintain a heap of ranges per each LSM tree ordered by
dumps_per_compaction.

Needed for #3944
---
 src/box/vinyl.c          |   2 +
 src/box/vy_log.c         |  26 ++++++++-
 src/box/vy_log.h         |  10 +++-
 src/box/vy_lsm.c         |  23 ++++++++
 src/box/vy_lsm.h         |   6 +++
 src/box/vy_range.c       |  13 +++++
 src/box/vy_range.h       |  32 ++++++++++++
 src/box/vy_run.h         |  15 ++++++
 src/box/vy_scheduler.c   |  14 ++++-
 test/vinyl/layout.result |   8 +--
 test/vinyl/stat.result   | 133 ++++++++++++++++++++++++++++++++++++++++++++---
 test/vinyl/stat.test.lua |  57 ++++++++++++++++++++
 12 files changed, 323 insertions(+), 16 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index d6117f44..dc4fc830 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -463,6 +463,8 @@ vinyl_index_stat(struct index *index, struct info_handler *h)
 	info_append_int(h, "run_avg", lsm->run_count / lsm->range_count);
 	histogram_snprint(buf, sizeof(buf), lsm->run_hist);
 	info_append_str(h, "run_histogram", buf);
+	info_append_int(h, "dumps_per_compaction",
+			vy_lsm_dumps_per_compaction(lsm));
 
 	info_end(h);
 }
diff --git a/src/box/vy_log.c b/src/box/vy_log.c
index d3fa0c7a..d7cf4996 100644
--- a/src/box/vy_log.c
+++ b/src/box/vy_log.c
@@ -84,6 +84,7 @@ enum vy_log_key {
 	VY_LOG_KEY_MODIFY_LSN		= 13,
 	VY_LOG_KEY_DROP_LSN		= 14,
 	VY_LOG_KEY_GROUP_ID		= 15,
+	VY_LOG_KEY_DUMP_COUNT		= 16,
 };
 
 /** vy_log_key -> human readable name. */
@@ -104,6 +105,7 @@ static const char *vy_log_key_name[] = {
 	[VY_LOG_KEY_MODIFY_LSN]		= "modify_lsn",
 	[VY_LOG_KEY_DROP_LSN]		= "drop_lsn",
 	[VY_LOG_KEY_GROUP_ID]		= "group_id",
+	[VY_LOG_KEY_DUMP_COUNT]		= "dump_count",
 };
 
 /** vy_log_type -> human readable name. */
@@ -285,6 +287,10 @@ vy_log_record_snprint(char *buf, int size, const struct vy_log_record *record)
 		SNPRINT(total, snprintf, buf, size, "%s=%"PRIi64", ",
 			vy_log_key_name[VY_LOG_KEY_GC_LSN],
 			record->gc_lsn);
+	if (record->dump_count > 0)
+		SNPRINT(total, snprintf, buf, size, "%s=%"PRIu32", ",
+			vy_log_key_name[VY_LOG_KEY_DUMP_COUNT],
+			record->dump_count);
 	SNPRINT(total, snprintf, buf, size, "}");
 	return total;
 }
@@ -411,6 +417,11 @@ vy_log_record_encode(const struct vy_log_record *record,
 		size += mp_sizeof_uint(record->gc_lsn);
 		n_keys++;
 	}
+	if (record->dump_count > 0) {
+		size += mp_sizeof_uint(VY_LOG_KEY_DUMP_COUNT);
+		size += mp_sizeof_uint(record->dump_count);
+		n_keys++;
+	}
 	size += mp_sizeof_map(n_keys);
 
 	/*
@@ -493,6 +504,10 @@ vy_log_record_encode(const struct vy_log_record *record,
 		pos = mp_encode_uint(pos, VY_LOG_KEY_GC_LSN);
 		pos = mp_encode_uint(pos, record->gc_lsn);
 	}
+	if (record->dump_count > 0) {
+		pos = mp_encode_uint(pos, VY_LOG_KEY_DUMP_COUNT);
+		pos = mp_encode_uint(pos, record->dump_count);
+	}
 	assert(pos == tuple + size);
 
 	/*
@@ -620,6 +635,9 @@ vy_log_record_decode(struct vy_log_record *record,
 		case VY_LOG_KEY_GC_LSN:
 			record->gc_lsn = mp_decode_uint(&pos);
 			break;
+		case VY_LOG_KEY_DUMP_COUNT:
+			record->dump_count = mp_decode_uint(&pos);
+			break;
 		default:
 			mp_next(&pos); /* unknown key, ignore */
 			break;
@@ -1558,6 +1576,7 @@ vy_recovery_do_create_run(struct vy_recovery *recovery, int64_t run_id)
 	run->id = run_id;
 	run->dump_lsn = -1;
 	run->gc_lsn = -1;
+	run->dump_count = 0;
 	run->is_incomplete = false;
 	run->is_dropped = false;
 	run->data = NULL;
@@ -1612,7 +1631,7 @@ vy_recovery_prepare_run(struct vy_recovery *recovery, int64_t lsm_id,
  */
 static int
 vy_recovery_create_run(struct vy_recovery *recovery, int64_t lsm_id,
-		       int64_t run_id, int64_t dump_lsn)
+		       int64_t run_id, int64_t dump_lsn, uint32_t dump_count)
 {
 	struct vy_lsm_recovery_info *lsm;
 	lsm = vy_recovery_lookup_lsm(recovery, lsm_id);
@@ -1637,6 +1656,7 @@ vy_recovery_create_run(struct vy_recovery *recovery, int64_t lsm_id,
 			return -1;
 	}
 	run->dump_lsn = dump_lsn;
+	run->dump_count = dump_count;
 	run->is_incomplete = false;
 	rlist_move_entry(&lsm->runs, run, in_lsm);
 	return 0;
@@ -1998,7 +2018,8 @@ vy_recovery_process_record(struct vy_recovery *recovery,
 		break;
 	case VY_LOG_CREATE_RUN:
 		rc = vy_recovery_create_run(recovery, record->lsm_id,
-					    record->run_id, record->dump_lsn);
+					    record->run_id, record->dump_lsn,
+					    record->dump_count);
 		break;
 	case VY_LOG_DROP_RUN:
 		rc = vy_recovery_drop_run(recovery, record->run_id,
@@ -2348,6 +2369,7 @@ vy_log_append_lsm(struct xlog *xlog, struct vy_lsm_recovery_info *lsm)
 		} else {
 			record.type = VY_LOG_CREATE_RUN;
 			record.dump_lsn = run->dump_lsn;
+			record.dump_count = run->dump_count;
 		}
 		record.lsm_id = lsm->id;
 		record.run_id = run->id;
diff --git a/src/box/vy_log.h b/src/box/vy_log.h
index 70e25245..ee38c193 100644
--- a/src/box/vy_log.h
+++ b/src/box/vy_log.h
@@ -96,7 +96,7 @@ enum vy_log_record_type {
 	VY_LOG_PREPARE_RUN		= 4,
 	/**
 	 * Commit a vinyl run file creation.
-	 * Requires vy_log_record::lsm_id, run_id, dump_lsn.
+	 * Requires vy_log_record::lsm_id, run_id, dump_lsn, dump_count.
 	 *
 	 * Written after a run file was successfully created.
 	 */
@@ -271,6 +271,8 @@ struct vy_log_record {
 	 * that uses this run.
 	 */
 	int64_t gc_lsn;
+	/** For runs: number of dumps it took to create the run. */
+	uint32_t dump_count;
 	/** Link in vy_log::tx. */
 	struct stailq_entry in_tx;
 };
@@ -389,6 +391,8 @@ struct vy_run_recovery_info {
 	 * that uses this run.
 	 */
 	int64_t gc_lsn;
+	/** Number of dumps it took to create the run. */
+	uint32_t dump_count;
 	/**
 	 * True if the run was not committed (there's
 	 * VY_LOG_PREPARE_RUN, but no VY_LOG_CREATE_RUN).
@@ -710,7 +714,8 @@ vy_log_prepare_run(int64_t lsm_id, int64_t run_id)
 
 /** Helper to log a vinyl run creation. */
 static inline void
-vy_log_create_run(int64_t lsm_id, int64_t run_id, int64_t dump_lsn)
+vy_log_create_run(int64_t lsm_id, int64_t run_id,
+		  int64_t dump_lsn, uint32_t dump_count)
 {
 	struct vy_log_record record;
 	vy_log_record_init(&record);
@@ -718,6 +723,7 @@ vy_log_create_run(int64_t lsm_id, int64_t run_id, int64_t dump_lsn)
 	record.lsm_id = lsm_id;
 	record.run_id = run_id;
 	record.dump_lsn = dump_lsn;
+	record.dump_count = dump_count;
 	vy_log_write(&record);
 }
 
diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c
index 851785ee..6ec86c22 100644
--- a/src/box/vy_lsm.c
+++ b/src/box/vy_lsm.c
@@ -181,6 +181,7 @@ vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_cache_env *cache_env,
 	rlist_create(&lsm->sealed);
 	vy_range_tree_new(lsm->tree);
 	vy_max_compaction_priority_create(&lsm->max_compaction_priority);
+	vy_min_dumps_per_compaction_create(&lsm->min_dumps_per_compaction);
 	rlist_create(&lsm->runs);
 	lsm->pk = pk;
 	if (pk != NULL)
@@ -258,6 +259,7 @@ vy_lsm_delete(struct vy_lsm *lsm)
 
 	vy_range_tree_iter(lsm->tree, NULL, vy_range_tree_free_cb, NULL);
 	vy_max_compaction_priority_destroy(&lsm->max_compaction_priority);
+	vy_min_dumps_per_compaction_destroy(&lsm->min_dumps_per_compaction);
 	tuple_format_unref(lsm->disk_format);
 	key_def_delete(lsm->cmp_def);
 	key_def_delete(lsm->key_def);
@@ -351,6 +353,7 @@ vy_lsm_recover_run(struct vy_lsm *lsm, struct vy_run_recovery_info *run_info,
 		return NULL;
 
 	run->dump_lsn = run_info->dump_lsn;
+	run->dump_count = run_info->dump_count;
 	if (vy_run_recover(run, lsm->env->path,
 			   lsm->space_id, lsm->index_id) != 0 &&
 	    (!force_recovery ||
@@ -636,6 +639,7 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery,
 					    (long long)range->id));
 			return -1;
 		}
+		vy_range_update_dumps_per_compaction(range);
 		vy_lsm_acct_range(lsm, range);
 	}
 	if (prev == NULL) {
@@ -651,6 +655,7 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery,
 				    (long long)prev->id));
 		return -1;
 	}
+	vy_min_dumps_per_compaction_update_all(&lsm->min_dumps_per_compaction);
 	return 0;
 }
 
@@ -674,6 +679,18 @@ vy_lsm_compaction_priority(struct vy_lsm *lsm)
 	return range->compaction_priority;
 }
 
+int
+vy_lsm_dumps_per_compaction(struct vy_lsm *lsm)
+{
+	struct heap_node *node;
+	node = vy_min_dumps_per_compaction_top(&lsm->min_dumps_per_compaction);
+	if (node == NULL)
+		return 0;
+	struct vy_range *range = container_of(node, struct vy_range,
+					      dumps_per_compaction_node);
+	return range->dumps_per_compaction;
+}
+
 void
 vy_lsm_add_run(struct vy_lsm *lsm, struct vy_run *run)
 {
@@ -737,6 +754,8 @@ vy_lsm_add_range(struct vy_lsm *lsm, struct vy_range *range)
 	assert(range->compaction_priority_node.pos == UINT32_MAX);
 	vy_max_compaction_priority_insert(&lsm->max_compaction_priority,
 					  &range->compaction_priority_node);
+	vy_min_dumps_per_compaction_insert(&lsm->min_dumps_per_compaction,
+					   &range->dumps_per_compaction_node);
 	vy_range_tree_insert(lsm->tree, range);
 	lsm->range_count++;
 }
@@ -747,6 +766,8 @@ vy_lsm_remove_range(struct vy_lsm *lsm, struct vy_range *range)
 	assert(range->compaction_priority_node.pos != UINT32_MAX);
 	vy_max_compaction_priority_delete(&lsm->max_compaction_priority,
 					  &range->compaction_priority_node);
+	vy_min_dumps_per_compaction_delete(&lsm->min_dumps_per_compaction,
+					  &range->dumps_per_compaction_node);
 	vy_range_tree_remove(lsm->tree, range);
 	lsm->range_count--;
 }
@@ -1080,6 +1101,7 @@ vy_lsm_split_range(struct vy_lsm *lsm, struct vy_range *range)
 		}
 		part->needs_compaction = range->needs_compaction;
 		vy_range_update_compaction_priority(part, &lsm->opts);
+		vy_range_update_dumps_per_compaction(part);
 	}
 
 	/*
@@ -1197,6 +1219,7 @@ vy_lsm_coalesce_range(struct vy_lsm *lsm, struct vy_range *range)
 	 * as it fits the configured LSM tree shape.
 	 */
 	vy_range_update_compaction_priority(result, &lsm->opts);
+	vy_range_update_dumps_per_compaction(result);
 	vy_lsm_acct_range(lsm, result);
 	vy_lsm_add_range(lsm, result);
 	lsm->range_tree_version++;
diff --git a/src/box/vy_lsm.h b/src/box/vy_lsm.h
index a1d872e9..4df9d19a 100644
--- a/src/box/vy_lsm.h
+++ b/src/box/vy_lsm.h
@@ -253,6 +253,8 @@ struct vy_lsm {
 	int range_count;
 	/** Heap of ranges, prioritized by compaction_priority. */
 	heap_t max_compaction_priority;
+	/** Heap of ranges, prioritized by dumps_per_compaction. */
+	heap_t min_dumps_per_compaction;
 	/**
 	 * List of all runs created for this LSM tree,
 	 * linked by vy_run->in_lsm.
@@ -438,6 +440,10 @@ vy_lsm_generation(struct vy_lsm *lsm);
 int
 vy_lsm_compaction_priority(struct vy_lsm *lsm);
 
+/** Return min dumps_per_compaction among ranges of an LSM tree. */
+int
+vy_lsm_dumps_per_compaction(struct vy_lsm *lsm);
+
 /** Add a run to the list of runs of an LSM tree. */
 void
 vy_lsm_add_run(struct vy_lsm *lsm, struct vy_run *run);
diff --git a/src/box/vy_range.c b/src/box/vy_range.c
index a2cb4558..7cb1b4ba 100644
--- a/src/box/vy_range.c
+++ b/src/box/vy_range.c
@@ -198,6 +198,7 @@ vy_range_new(int64_t id, struct tuple *begin, struct tuple *end,
 	range->cmp_def = cmp_def;
 	rlist_create(&range->slices);
 	range->compaction_priority_node.pos = UINT32_MAX;
+	range->dumps_per_compaction_node.pos = UINT32_MAX;
 	return range;
 }
 
@@ -391,6 +392,18 @@ vy_range_update_compaction_priority(struct vy_range *range,
 	}
 }
 
+void
+vy_range_update_dumps_per_compaction(struct vy_range *range)
+{
+	if (!rlist_empty(&range->slices)) {
+		struct vy_slice *slice = rlist_last_entry(&range->slices,
+						struct vy_slice, in_range);
+		range->dumps_per_compaction = slice->run->dump_count;
+	} else {
+		range->dumps_per_compaction = 0;
+	}
+}
+
 /**
  * Return true and set split_key accordingly if the range needs to be
  * split in two.
diff --git a/src/box/vy_range.h b/src/box/vy_range.h
index 7c0a16e2..f19c2c6b 100644
--- a/src/box/vy_range.h
+++ b/src/box/vy_range.h
@@ -121,6 +121,13 @@ struct vy_range {
 	bool needs_compaction;
 	/** Number of times the range was compacted. */
 	int n_compactions;
+	/**
+	 * Number of dumps it takes to trigger major compaction in
+	 * this range, see vy_run::dump_count for more details.
+	 */
+	int dumps_per_compaction;
+	/** Link in vy_lsm->min_dumps_per_compaction. */
+	struct heap_node dumps_per_compaction_node;
 	/** Link in vy_lsm->tree. */
 	rb_node(struct vy_range) tree_node;
 	/**
@@ -149,6 +156,25 @@ vy_max_compaction_priority_less(struct heap_node *a, struct heap_node *b)
 #undef HEAP_LESS
 #undef HEAP_NAME
 
+/**
+ * Heap of all ranges of the same LSM tree, prioritized by
+ * vy_range->dumps_per_compaction.
+ */
+#define HEAP_NAME vy_min_dumps_per_compaction
+static inline bool
+vy_min_dumps_per_compaction_less(struct heap_node *a, struct heap_node *b)
+{
+	struct vy_range *r1 = container_of(a, struct vy_range,
+					   dumps_per_compaction_node);
+	struct vy_range *r2 = container_of(b, struct vy_range,
+					   dumps_per_compaction_node);
+	return r1->dumps_per_compaction < r2->dumps_per_compaction;
+}
+#define HEAP_LESS(h, l, r) vy_min_dumps_per_compaction_less(l, r)
+#include "salad/heap.h"
+#undef HEAP_LESS
+#undef HEAP_NAME
+
 /** Return true if a task is scheduled for a given range. */
 static inline bool
 vy_range_is_scheduled(struct vy_range *range)
@@ -245,6 +271,12 @@ vy_range_update_compaction_priority(struct vy_range *range,
 				    const struct index_opts *opts);
 
 /**
+ * Update the value of range->dumps_per_compaction.
+ */
+void
+vy_range_update_dumps_per_compaction(struct vy_range *range);
+
+/**
  * Check if a range needs to be split in two.
  *
  * @param range             The range.
diff --git a/src/box/vy_run.h b/src/box/vy_run.h
index 990daffa..28fd6a50 100644
--- a/src/box/vy_run.h
+++ b/src/box/vy_run.h
@@ -130,6 +130,21 @@ struct vy_run {
 	/** Max LSN stored on disk. */
 	int64_t dump_lsn;
 	/**
+	 * Number of dumps it took to create this run.
+	 *
+	 * If the run was produced by a memory dump, it is 1.
+	 * If the run was produced by a minor compaction, it
+	 * is is the sum of dump counts of compacted runs.
+	 * If the run was produced by a major compaction, it
+	 * is is the sum of dump counts of compacted runs
+	 * minus the dump count of the last (greatest) run.
+	 *
+	 * This way, by looking at the last level run in an LSM
+	 * tree, we can tell how many dumps it took to compact
+	 * it last time.
+	 */
+	uint32_t dump_count;
+	/**
 	 * Run reference counter, the run is deleted once it hits 0.
 	 * A new run is created with the reference counter set to 1.
 	 * A run is referenced by each slice created for it and each
diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c
index 16ecafed..f14a199b 100644
--- a/src/box/vy_scheduler.c
+++ b/src/box/vy_scheduler.c
@@ -1193,7 +1193,7 @@ vy_task_dump_complete(struct vy_task *task)
 	 * Log change in metadata.
 	 */
 	vy_log_tx_begin();
-	vy_log_create_run(lsm->id, new_run->id, dump_lsn);
+	vy_log_create_run(lsm->id, new_run->id, dump_lsn, new_run->dump_count);
 	for (range = begin_range, i = 0; range != end_range;
 	     range = vy_range_tree_next(lsm->tree, range), i++) {
 		assert(i < lsm->range_count);
@@ -1226,9 +1226,11 @@ vy_task_dump_complete(struct vy_task *task)
 		vy_lsm_unacct_range(lsm, range);
 		vy_range_add_slice(range, slice);
 		vy_range_update_compaction_priority(range, &lsm->opts);
+		vy_range_update_dumps_per_compaction(range);
 		vy_lsm_acct_range(lsm, range);
 	}
 	vy_max_compaction_priority_update_all(&lsm->max_compaction_priority);
+	vy_min_dumps_per_compaction_update_all(&lsm->min_dumps_per_compaction);
 	free(new_slices);
 
 delete_mems:
@@ -1396,6 +1398,7 @@ vy_task_dump_new(struct vy_scheduler *scheduler, struct vy_worker *worker,
 	if (new_run == NULL)
 		goto err_run;
 
+	new_run->dump_count = 1;
 	new_run->dump_lsn = dump_lsn;
 
 	/*
@@ -1528,7 +1531,8 @@ vy_task_compaction_complete(struct vy_task *task)
 	rlist_foreach_entry(run, &unused_runs, in_unused)
 		vy_log_drop_run(run->id, gc_lsn);
 	if (new_slice != NULL) {
-		vy_log_create_run(lsm->id, new_run->id, new_run->dump_lsn);
+		vy_log_create_run(lsm->id, new_run->id, new_run->dump_lsn,
+				  new_run->dump_count);
 		vy_log_insert_slice(range->id, new_run->id, new_slice->id,
 				    tuple_data_or_null(new_slice->begin),
 				    tuple_data_or_null(new_slice->end));
@@ -1589,6 +1593,7 @@ vy_task_compaction_complete(struct vy_task *task)
 	}
 	range->n_compactions++;
 	vy_range_update_compaction_priority(range, &lsm->opts);
+	vy_range_update_dumps_per_compaction(range);
 	vy_lsm_acct_range(lsm, range);
 	vy_lsm_acct_compaction(lsm, compaction_time,
 			       &compaction_input, &compaction_output);
@@ -1613,6 +1618,8 @@ vy_task_compaction_complete(struct vy_task *task)
 	assert(range->compaction_priority_node.pos == UINT32_MAX);
 	vy_max_compaction_priority_insert(&lsm->max_compaction_priority,
 					  &range->compaction_priority_node);
+	vy_min_dumps_per_compaction_update(&lsm->min_dumps_per_compaction,
+					   &range->dumps_per_compaction_node);
 	vy_scheduler_update_lsm(scheduler, lsm);
 
 	say_info("%s: completed compacting range %s",
@@ -1701,6 +1708,7 @@ vy_task_compaction_new(struct vy_scheduler *scheduler, struct vy_worker *worker,
 			goto err_wi_sub;
 		new_run->dump_lsn = MAX(new_run->dump_lsn,
 					slice->run->dump_lsn);
+		new_run->dump_count += slice->run->dump_count;
 		/* Remember the slices we are compacting. */
 		if (task->first_slice == NULL)
 			task->first_slice = slice;
@@ -1709,6 +1717,8 @@ vy_task_compaction_new(struct vy_scheduler *scheduler, struct vy_worker *worker,
 		if (--n == 0)
 			break;
 	}
+	if (range->compaction_priority == range->slice_count)
+		new_run->dump_count -= slice->run->dump_count;
 	assert(n == 0);
 	assert(new_run->dump_lsn >= 0);
 
diff --git a/test/vinyl/layout.result b/test/vinyl/layout.result
index 14201c5d..0e9d7260 100644
--- a/test/vinyl/layout.result
+++ b/test/vinyl/layout.result
@@ -141,7 +141,7 @@ result
       - HEADER:
           type: INSERT
         BODY:
-          tuple: [5, {2: 8, 9: 11}]
+          tuple: [5, {2: 8, 16: 1, 9: 11}]
       - HEADER:
           type: INSERT
         BODY:
@@ -166,7 +166,7 @@ result
       - HEADER:
           type: INSERT
         BODY:
-          tuple: [5, {0: 2, 2: 6, 9: 11}]
+          tuple: [5, {0: 2, 2: 6, 16: 1, 9: 11}]
       - HEADER:
           type: INSERT
         BODY:
@@ -206,7 +206,7 @@ result
           timestamp: <timestamp>
           type: INSERT
         BODY:
-          tuple: [5, {0: 2, 2: 10, 9: 14}]
+          tuple: [5, {0: 2, 2: 10, 16: 1, 9: 14}]
       - HEADER:
           timestamp: <timestamp>
           type: INSERT
@@ -226,7 +226,7 @@ result
           timestamp: <timestamp>
           type: INSERT
         BODY:
-          tuple: [5, {2: 12, 9: 14}]
+          tuple: [5, {2: 12, 16: 1, 9: 14}]
       - HEADER:
           timestamp: <timestamp>
           type: INSERT
diff --git a/test/vinyl/stat.result b/test/vinyl/stat.result
index 419d3e6c..b0b569ab 100644
--- a/test/vinyl/stat.result
+++ b/test/vinyl/stat.result
@@ -129,7 +129,8 @@ test_run:cmd("setopt delimiter ''");
 -- initially stats are empty
 istat()
 ---
-- rows: 0
+- dumps_per_compaction: 0
+  rows: 0
   run_avg: 0
   bytes: 0
   upsert:
@@ -294,10 +295,7 @@ wait(istat, st, 'disk.dump.count', 1)
 ...
 stat_diff(istat(), st)
 ---
-- rows: 25
-  run_avg: 1
-  run_count: 1
-  disk:
+- disk:
     last_level:
       bytes: 26049
       pages: 7
@@ -321,6 +319,10 @@ stat_diff(istat(), st)
     pages: 7
     bytes_compressed: <bytes_compressed>
     bloom_size: 70
+  rows: 25
+  run_avg: 1
+  run_count: 1
+  dumps_per_compaction: 1
   bytes: 26049
   put:
     rows: 25
@@ -998,7 +1000,8 @@ box.stat.reset()
 ...
 istat()
 ---
-- rows: 306
+- dumps_per_compaction: 1
+  rows: 306
   run_avg: 1
   bytes: 317731
   upsert:
@@ -1732,6 +1735,124 @@ box.stat.vinyl().disk.data_compacted
 ---
 - 0
 ...
+--
+-- Number of dumps needed to trigger major compaction in
+-- an LSM tree range.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+i = s:create_index('primary', {page_size = 128, range_size = 8192, run_count_per_level = 2, run_size_ratio = 5})
+---
+...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function dump(a, b)
+    for i = a, b do
+        s:replace{i, digest.urandom(100)}
+    end
+    box.snapshot()
+end;
+---
+...
+function wait_compaction(count)
+    test_run:wait_cond(function()
+        return i:stat().disk.compaction.count == count
+    end, 10)
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+dump(1, 100)
+---
+...
+i:stat().dumps_per_compaction -- 1
+---
+- 1
+...
+dump(1, 100) -- compaction
+---
+...
+dump(1, 100) -- split + compaction
+---
+...
+wait_compaction(3)
+---
+...
+i:stat().range_count -- 2
+---
+- 2
+...
+i:stat().dumps_per_compaction -- 1
+---
+- 1
+...
+dump(1, 20)
+---
+...
+dump(1, 10) -- compaction in range 1
+---
+...
+wait_compaction(4)
+---
+...
+i:stat().dumps_per_compaction -- 1
+---
+- 1
+...
+dump(80, 100)
+---
+...
+dump(90, 100) -- compaction in range 2
+---
+...
+wait_compaction(5)
+---
+...
+i:stat().dumps_per_compaction -- 2
+---
+- 2
+...
+test_run:cmd('restart server test')
+fiber = require('fiber')
+---
+...
+digest = require('digest')
+---
+...
+s = box.space.test
+---
+...
+i = s.index.primary
+---
+...
+i:stat().dumps_per_compaction -- 2
+---
+- 2
+...
+for i = 1, 100 do s:replace{i, digest.urandom(100)} end
+---
+...
+box.snapshot()
+---
+- ok
+...
+test_run:wait_cond(function() return i:stat().disk.compaction.count == 2 end, 10)
+---
+- true
+...
+i:stat().dumps_per_compaction -- 1
+---
+- 1
+...
+s:drop()
+---
+...
 test_run:cmd('switch default')
 ---
 - true
diff --git a/test/vinyl/stat.test.lua b/test/vinyl/stat.test.lua
index 4a955682..73729f49 100644
--- a/test/vinyl/stat.test.lua
+++ b/test/vinyl/stat.test.lua
@@ -528,6 +528,63 @@ s:drop()
 
 box.stat.vinyl().disk.data_compacted
 
+--
+-- Number of dumps needed to trigger major compaction in
+-- an LSM tree range.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+i = s:create_index('primary', {page_size = 128, range_size = 8192, run_count_per_level = 2, run_size_ratio = 5})
+
+test_run:cmd("setopt delimiter ';'")
+function dump(a, b)
+    for i = a, b do
+        s:replace{i, digest.urandom(100)}
+    end
+    box.snapshot()
+end;
+function wait_compaction(count)
+    test_run:wait_cond(function()
+        return i:stat().disk.compaction.count == count
+    end, 10)
+end;
+test_run:cmd("setopt delimiter ''");
+
+dump(1, 100)
+i:stat().dumps_per_compaction -- 1
+
+dump(1, 100) -- compaction
+dump(1, 100) -- split + compaction
+wait_compaction(3)
+i:stat().range_count -- 2
+i:stat().dumps_per_compaction -- 1
+
+dump(1, 20)
+dump(1, 10) -- compaction in range 1
+wait_compaction(4)
+i:stat().dumps_per_compaction -- 1
+
+dump(80, 100)
+dump(90, 100) -- compaction in range 2
+wait_compaction(5)
+i:stat().dumps_per_compaction -- 2
+
+test_run:cmd('restart server test')
+
+fiber = require('fiber')
+digest = require('digest')
+
+s = box.space.test
+i = s.index.primary
+
+i:stat().dumps_per_compaction -- 2
+for i = 1, 100 do s:replace{i, digest.urandom(100)} end
+box.snapshot()
+test_run:wait_cond(function() return i:stat().disk.compaction.count == 2 end, 10)
+
+i:stat().dumps_per_compaction -- 1
+
+s:drop()
+
 test_run:cmd('switch default')
 test_run:cmd('stop server test')
 test_run:cmd('cleanup server test')
-- 
2.11.0




More information about the Tarantool-patches mailing list