[PATCH v2 1/8] vinyl: use uncompressed run size for range split/coalesce/compaction

Vladimir Davydov vdavydov.dev at gmail.com
Thu Jan 24 20:12:37 MSK 2019


Historically, when considering splitting or coalescing a range or
updating compaction priority, we use sizes of compressed runs (see
bytes_compressed). This makes the algorithms dependent on whether
compression is used or not and how effective it is, which is weird,
because compression is a way of storing data on disk - it shouldn't
affect the way data is partitioned. E.g. if we turned off compression
at the first LSM tree level, which would make sense, because it's
relatively small, we would affect the compaction algorithm because
of this.

That said, let's use uncompressed run sizes when considering range
tree transformations.
---
 src/box/vy_range.c                  | 12 ++++++------
 test/vinyl/deferred_delete.result   | 21 +++++++++++++++------
 test/vinyl/deferred_delete.test.lua | 11 +++++++----
 3 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/src/box/vy_range.c b/src/box/vy_range.c
index f649aff7..87c4c6b9 100644
--- a/src/box/vy_range.c
+++ b/src/box/vy_range.c
@@ -329,7 +329,7 @@ vy_range_update_compaction_priority(struct vy_range *range,
 
 	struct vy_slice *slice;
 	rlist_foreach_entry(slice, &range->slices, in_range) {
-		uint64_t size = slice->count.bytes_compressed;
+		uint64_t size = slice->count.bytes;
 		/*
 		 * The size of the first level is defined by
 		 * the size of the most recent run.
@@ -377,7 +377,7 @@ vy_range_update_compaction_priority(struct vy_range *range,
 			 */
 			range->compaction_priority = total_run_count;
 			range->compaction_queue = total_stmt_count;
-			est_new_run_size = total_stmt_count.bytes_compressed;
+			est_new_run_size = total_stmt_count.bytes;
 		}
 	}
 
@@ -419,7 +419,7 @@ vy_range_needs_split(struct vy_range *range, const struct index_opts *opts,
 	slice = rlist_last_entry(&range->slices, struct vy_slice, in_range);
 
 	/* The range is too small to be split. */
-	if (slice->count.bytes_compressed < opts->range_size * 4 / 3)
+	if (slice->count.bytes < opts->range_size * 4 / 3)
 		return false;
 
 	/* Find the median key in the oldest run (approximately). */
@@ -481,7 +481,7 @@ vy_range_needs_coalesce(struct vy_range *range, vy_range_tree_t *tree,
 	struct vy_range *it;
 
 	/* Size of the coalesced range. */
-	uint64_t total_size = range->count.bytes_compressed;
+	uint64_t total_size = range->count.bytes;
 	/* Coalesce ranges until total_size > max_size. */
 	uint64_t max_size = opts->range_size / 2;
 
@@ -496,7 +496,7 @@ vy_range_needs_coalesce(struct vy_range *range, vy_range_tree_t *tree,
 	for (it = vy_range_tree_next(tree, range);
 	     it != NULL && !vy_range_is_scheduled(it);
 	     it = vy_range_tree_next(tree, it)) {
-		uint64_t size = it->count.bytes_compressed;
+		uint64_t size = it->count.bytes;
 		if (total_size + size > max_size)
 			break;
 		total_size += size;
@@ -505,7 +505,7 @@ vy_range_needs_coalesce(struct vy_range *range, vy_range_tree_t *tree,
 	for (it = vy_range_tree_prev(tree, range);
 	     it != NULL && !vy_range_is_scheduled(it);
 	     it = vy_range_tree_prev(tree, it)) {
-		uint64_t size = it->count.bytes_compressed;
+		uint64_t size = it->count.bytes;
 		if (total_size + size > max_size)
 			break;
 		total_size += size;
diff --git a/test/vinyl/deferred_delete.result b/test/vinyl/deferred_delete.result
index 29945f8d..61f81ce2 100644
--- a/test/vinyl/deferred_delete.result
+++ b/test/vinyl/deferred_delete.result
@@ -668,16 +668,13 @@ test_run:cmd("switch test")
 fiber = require('fiber')
 ---
 ...
-digest = require('digest')
----
-...
 s = box.schema.space.create('test', {engine = 'vinyl'})
 ---
 ...
-pk = s:create_index('pk', {run_count_per_level = 10})
+pk = s:create_index('pk', {run_count_per_level = 10, run_size_ratio = 2})
 ---
 ...
-sk = s:create_index('sk', {run_count_per_level = 10, parts = {2, 'unsigned', 3, 'string'}, unique = false})
+sk = s:create_index('sk', {run_count_per_level = 10, run_size_ratio = 2, parts = {2, 'unsigned', 3, 'string'}, unique = false})
 ---
 ...
 -- Write a run big enough to prevent major compaction from kicking in
@@ -685,13 +682,25 @@ sk = s:create_index('sk', {run_count_per_level = 10, parts = {2, 'unsigned', 3,
 dummy_rows = 100
 ---
 ...
-for i = 1, dummy_rows do s:replace{i + 1000, i + 1000, digest.urandom(100)} end
+pad = string.rep('z', 50 * 1024)
+---
+...
+for i = 1, dummy_rows do s:replace{i + 1000, i + 1000, pad} end
 ---
 ...
 box.snapshot()
 ---
 - ok
 ...
+pk:compact()
+---
+...
+sk:compact()
+---
+...
+while box.stat.vinyl().scheduler.compaction_queue > 0 do fiber.sleep(0.001) end
+---
+...
 pad = string.rep('x', 10 * 1024)
 ---
 ...
diff --git a/test/vinyl/deferred_delete.test.lua b/test/vinyl/deferred_delete.test.lua
index d38802da..93b5b358 100644
--- a/test/vinyl/deferred_delete.test.lua
+++ b/test/vinyl/deferred_delete.test.lua
@@ -252,17 +252,20 @@ test_run:cmd("start server test with args='1048576'")
 test_run:cmd("switch test")
 
 fiber = require('fiber')
-digest = require('digest')
 
 s = box.schema.space.create('test', {engine = 'vinyl'})
-pk = s:create_index('pk', {run_count_per_level = 10})
-sk = s:create_index('sk', {run_count_per_level = 10, parts = {2, 'unsigned', 3, 'string'}, unique = false})
+pk = s:create_index('pk', {run_count_per_level = 10, run_size_ratio = 2})
+sk = s:create_index('sk', {run_count_per_level = 10, run_size_ratio = 2, parts = {2, 'unsigned', 3, 'string'}, unique = false})
 
 -- Write a run big enough to prevent major compaction from kicking in
 -- (run_count_per_level is ignored on the last level - see gh-3657).
 dummy_rows = 100
-for i = 1, dummy_rows do s:replace{i + 1000, i + 1000, digest.urandom(100)} end
+pad = string.rep('z', 50 * 1024)
+for i = 1, dummy_rows do s:replace{i + 1000, i + 1000, pad} end
 box.snapshot()
+pk:compact()
+sk:compact()
+while box.stat.vinyl().scheduler.compaction_queue > 0 do fiber.sleep(0.001) end
 
 pad = string.rep('x', 10 * 1024)
 for i = 1, 120 do s:replace{i, i, pad} end
-- 
2.11.0




More information about the Tarantool-patches mailing list