[PATCH 2/2] vinyl: allow to disable bloom filter for index

Vladimir Davydov vdavydov.dev at gmail.com
Sun Feb 18 19:05:57 MSK 2018


Not all workloads need bloom filters enabled for all indexes. Let's
allow to disable them on per-index basis by setting bloom_fpr to 1.
This will allow to save some memory if bloom filters are unused.

Closes #3138
---
 src/box/vy_index.c        |  8 ++++----
 src/box/vy_run.c          | 48 ++++++++++++++++++++++++++++++-----------------
 src/box/vy_run.h          |  6 ++++++
 test/vinyl/bloom.result   | 38 +++++++++++++++++++++++++++++++++++--
 test/vinyl/bloom.test.lua | 16 ++++++++++++++--
 5 files changed, 91 insertions(+), 25 deletions(-)

diff --git a/src/box/vy_index.c b/src/box/vy_index.c
index 0596c4ed..3e8dadc6 100644
--- a/src/box/vy_index.c
+++ b/src/box/vy_index.c
@@ -702,10 +702,10 @@ vy_index_add_run(struct vy_index *index, struct vy_run *run)
 	index->run_count++;
 	vy_disk_stmt_counter_add(&index->stat.disk.count, &run->count);
 
-	index->bloom_size += bloom_store_size(&run->info.bloom);
+	index->bloom_size += vy_run_bloom_size(run);
 	index->page_index_size += run->page_index_size;
 
-	index->env->bloom_size += bloom_store_size(&run->info.bloom);
+	index->env->bloom_size += vy_run_bloom_size(run);
 	index->env->page_index_size += run->page_index_size;
 }
 
@@ -718,10 +718,10 @@ vy_index_remove_run(struct vy_index *index, struct vy_run *run)
 	index->run_count--;
 	vy_disk_stmt_counter_sub(&index->stat.disk.count, &run->count);
 
-	index->bloom_size -= bloom_store_size(&run->info.bloom);
+	index->bloom_size -= vy_run_bloom_size(run);
 	index->page_index_size -= run->page_index_size;
 
-	index->env->bloom_size -= bloom_store_size(&run->info.bloom);
+	index->env->bloom_size -= vy_run_bloom_size(run);
 	index->env->page_index_size -= run->page_index_size;
 }
 
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index ccbb0ca1..c7dd8f26 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -2030,7 +2030,8 @@ vy_run_write_page(struct vy_run *run, struct xlog *data_xlog,
 				     cmp_def, is_primary) != 0)
 			goto error_rollback;
 
-		bloom_spectrum_add(bs, tuple_hash(*curr_stmt, key_def));
+		if (bs != NULL)
+			bloom_spectrum_add(bs, tuple_hash(*curr_stmt, key_def));
 
 		int64_t lsn = vy_stmt_lsn(*curr_stmt);
 		run->info.min_lsn = MIN(run->info.min_lsn, lsn);
@@ -2139,8 +2140,9 @@ vy_run_write_data(struct vy_run *run, const char *dirpath,
 		goto done;
 
 	struct bloom_spectrum bs;
-	if (bloom_spectrum_create(&bs, max_output_count,
-				  bloom_fpr, runtime.quota) != 0) {
+	bool has_bloom = bloom_fpr < 1;
+	if (has_bloom && bloom_spectrum_create(&bs, max_output_count,
+					bloom_fpr, runtime.quota) != 0) {
 		diag_set(OutOfMemory, 0,
 			 "bloom_spectrum_create", "bloom_spectrum");
 		goto err;
@@ -2167,8 +2169,8 @@ vy_run_write_data(struct vy_run *run, const char *dirpath,
 	uint32_t page_info_capacity = 0;
 	int rc;
 	do {
-		rc = vy_run_write_page(run, &data_xlog, wi, &stmt,
-				       page_size, &bs, cmp_def, key_def,
+		rc = vy_run_write_page(run, &data_xlog, wi, &stmt, page_size,
+				       has_bloom ? &bs : NULL, cmp_def, key_def,
 				       iid == 0, &page_info_capacity);
 		if (rc < 0)
 			goto err_close_xlog;
@@ -2184,9 +2186,11 @@ vy_run_write_data(struct vy_run *run, const char *dirpath,
 	xlog_close(&data_xlog, true);
 	fiber_gc();
 
-	bloom_spectrum_choose(&bs, &run->info.bloom);
-	run->info.has_bloom = true;
-	bloom_spectrum_destroy(&bs, runtime.quota);
+	if (has_bloom) {
+		bloom_spectrum_choose(&bs, &run->info.bloom);
+		run->info.has_bloom = true;
+		bloom_spectrum_destroy(&bs, runtime.quota);
+	}
 done:
 	wi->iface->stop(wi);
 	return 0;
@@ -2195,7 +2199,8 @@ err_close_xlog:
 	xlog_close(&data_xlog, false);
 	fiber_gc();
 err_free_bloom:
-	bloom_spectrum_destroy(&bs, runtime.quota);
+	if (has_bloom)
+		bloom_spectrum_destroy(&bs, runtime.quota);
 err:
 	wi->iface->stop(wi);
 	return -1;
@@ -2334,8 +2339,11 @@ vy_run_info_encode(const struct vy_run_info *run_info,
 	mp_next(&tmp);
 	size_t max_key_size = tmp - run_info->max_key;
 
-	assert(run_info->has_bloom);
-	size_t size = mp_sizeof_map(6);
+	uint32_t key_count = 5;
+	if (run_info->has_bloom)
+		key_count++;
+
+	size_t size = mp_sizeof_map(key_count);
 	size += mp_sizeof_uint(VY_RUN_INFO_MIN_KEY) + min_key_size;
 	size += mp_sizeof_uint(VY_RUN_INFO_MAX_KEY) + max_key_size;
 	size += mp_sizeof_uint(VY_RUN_INFO_MIN_LSN) +
@@ -2344,8 +2352,9 @@ vy_run_info_encode(const struct vy_run_info *run_info,
 		mp_sizeof_uint(run_info->max_lsn);
 	size += mp_sizeof_uint(VY_RUN_INFO_PAGE_COUNT) +
 		mp_sizeof_uint(run_info->page_count);
-	size += mp_sizeof_uint(VY_RUN_INFO_BLOOM) +
-		vy_run_bloom_encode_size(&run_info->bloom);
+	if (run_info->has_bloom)
+		size += mp_sizeof_uint(VY_RUN_INFO_BLOOM) +
+			vy_run_bloom_encode_size(&run_info->bloom);
 
 	char *pos = region_alloc(&fiber()->gc, size);
 	if (pos == NULL) {
@@ -2355,7 +2364,7 @@ vy_run_info_encode(const struct vy_run_info *run_info,
 	memset(xrow, 0, sizeof(*xrow));
 	xrow->body->iov_base = pos;
 	/* encode values */
-	pos = mp_encode_map(pos, 6);
+	pos = mp_encode_map(pos, key_count);
 	pos = mp_encode_uint(pos, VY_RUN_INFO_MIN_KEY);
 	memcpy(pos, run_info->min_key, min_key_size);
 	pos += min_key_size;
@@ -2368,8 +2377,10 @@ vy_run_info_encode(const struct vy_run_info *run_info,
 	pos = mp_encode_uint(pos, run_info->max_lsn);
 	pos = mp_encode_uint(pos, VY_RUN_INFO_PAGE_COUNT);
 	pos = mp_encode_uint(pos, run_info->page_count);
-	pos = mp_encode_uint(pos, VY_RUN_INFO_BLOOM);
-	pos = vy_run_bloom_encode(&run_info->bloom, pos);
+	if (run_info->has_bloom) {
+		pos = mp_encode_uint(pos, VY_RUN_INFO_BLOOM);
+		pos = vy_run_bloom_encode(&run_info->bloom, pos);
+	}
 	xrow->body->iov_len = (void *)pos - xrow->body->iov_base;
 	xrow->bodycnt = 1;
 	xrow->type = VY_INDEX_RUN_INFO;
@@ -2560,6 +2571,9 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
 	}
 	run->info.max_lsn = max_lsn;
 	run->info.min_lsn = min_lsn;
+
+	if (opts->bloom_fpr >= 1)
+		goto done;
 	if (xlog_cursor_reset(&cursor) != 0)
 		goto close_err;
 	if (bloom_create(&run->info.bloom, run_row_count,
@@ -2580,7 +2594,7 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
 		bloom_add(&run->info.bloom, tuple_hash(tuple, key_def));
 	}
 	run->info.has_bloom = true;
-
+done:
 	region_truncate(region, mem_used);
 	run->fd = cursor.fd;
 	xlog_cursor_close(&cursor, true);
diff --git a/src/box/vy_run.h b/src/box/vy_run.h
index cf0569c2..ab980f89 100644
--- a/src/box/vy_run.h
+++ b/src/box/vy_run.h
@@ -301,6 +301,12 @@ vy_run_env_destroy(struct vy_run_env *env);
 void
 vy_run_env_enable_coio(struct vy_run_env *env, int threads);
 
+static inline size_t
+vy_run_bloom_size(struct vy_run *run)
+{
+	return run->info.has_bloom ? bloom_store_size(&run->info.bloom) : 0;
+}
+
 static inline struct vy_page_info *
 vy_run_page_info(struct vy_run *run, uint32_t pos)
 {
diff --git a/test/vinyl/bloom.result b/test/vinyl/bloom.result
index 9d62b602..3c5b503d 100644
--- a/test/vinyl/bloom.result
+++ b/test/vinyl/bloom.result
@@ -1,7 +1,41 @@
-#!/usr/bin/env tarantool
+test_run = require('test_run').new()
 ---
 ...
-test_run = require('test_run').new()
+--
+-- Setting bloom_fpr to 1 disables bloom filter.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk', {bloom_fpr = 1})
+---
+...
+for i = 1, 10, 2 do s:insert{i} end
+---
+...
+box.snapshot()
+---
+- ok
+...
+for i = 1, 10 do s:get{i} end
+---
+...
+stat = s.index.pk:info()
+---
+...
+stat.disk.bloom_size -- 0
+---
+- 0
+...
+stat.disk.iterator.bloom.hit -- 0
+---
+- 0
+...
+stat.disk.iterator.bloom.miss -- 0
+---
+- 0
+...
+s:drop()
 ---
 ...
 s = box.schema.space.create('test', {engine = 'vinyl'})
diff --git a/test/vinyl/bloom.test.lua b/test/vinyl/bloom.test.lua
index 2f7b68f9..5fecfde1 100644
--- a/test/vinyl/bloom.test.lua
+++ b/test/vinyl/bloom.test.lua
@@ -1,7 +1,19 @@
-#!/usr/bin/env tarantool
-
 test_run = require('test_run').new()
 
+--
+-- Setting bloom_fpr to 1 disables bloom filter.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk', {bloom_fpr = 1})
+for i = 1, 10, 2 do s:insert{i} end
+box.snapshot()
+for i = 1, 10 do s:get{i} end
+stat = s.index.pk:info()
+stat.disk.bloom_size -- 0
+stat.disk.iterator.bloom.hit -- 0
+stat.disk.iterator.bloom.miss -- 0
+s:drop()
+
 s = box.schema.space.create('test', {engine = 'vinyl'})
 _ = s:create_index('pk')
 
-- 
2.11.0




More information about the Tarantool-patches mailing list