[PATCH 2/2] vinyl: allow to disable bloom filter for index
Vladimir Davydov
vdavydov.dev at gmail.com
Sun Feb 18 19:05:57 MSK 2018
Not all workloads need bloom filters enabled for all indexes. Let's
allow to disable them on per-index basis by setting bloom_fpr to 1.
This will allow to save some memory if bloom filters are unused.
Closes #3138
---
src/box/vy_index.c | 8 ++++----
src/box/vy_run.c | 48 ++++++++++++++++++++++++++++++-----------------
src/box/vy_run.h | 6 ++++++
test/vinyl/bloom.result | 38 +++++++++++++++++++++++++++++++++++--
test/vinyl/bloom.test.lua | 16 ++++++++++++++--
5 files changed, 91 insertions(+), 25 deletions(-)
diff --git a/src/box/vy_index.c b/src/box/vy_index.c
index 0596c4ed..3e8dadc6 100644
--- a/src/box/vy_index.c
+++ b/src/box/vy_index.c
@@ -702,10 +702,10 @@ vy_index_add_run(struct vy_index *index, struct vy_run *run)
index->run_count++;
vy_disk_stmt_counter_add(&index->stat.disk.count, &run->count);
- index->bloom_size += bloom_store_size(&run->info.bloom);
+ index->bloom_size += vy_run_bloom_size(run);
index->page_index_size += run->page_index_size;
- index->env->bloom_size += bloom_store_size(&run->info.bloom);
+ index->env->bloom_size += vy_run_bloom_size(run);
index->env->page_index_size += run->page_index_size;
}
@@ -718,10 +718,10 @@ vy_index_remove_run(struct vy_index *index, struct vy_run *run)
index->run_count--;
vy_disk_stmt_counter_sub(&index->stat.disk.count, &run->count);
- index->bloom_size -= bloom_store_size(&run->info.bloom);
+ index->bloom_size -= vy_run_bloom_size(run);
index->page_index_size -= run->page_index_size;
- index->env->bloom_size -= bloom_store_size(&run->info.bloom);
+ index->env->bloom_size -= vy_run_bloom_size(run);
index->env->page_index_size -= run->page_index_size;
}
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index ccbb0ca1..c7dd8f26 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -2030,7 +2030,8 @@ vy_run_write_page(struct vy_run *run, struct xlog *data_xlog,
cmp_def, is_primary) != 0)
goto error_rollback;
- bloom_spectrum_add(bs, tuple_hash(*curr_stmt, key_def));
+ if (bs != NULL)
+ bloom_spectrum_add(bs, tuple_hash(*curr_stmt, key_def));
int64_t lsn = vy_stmt_lsn(*curr_stmt);
run->info.min_lsn = MIN(run->info.min_lsn, lsn);
@@ -2139,8 +2140,9 @@ vy_run_write_data(struct vy_run *run, const char *dirpath,
goto done;
struct bloom_spectrum bs;
- if (bloom_spectrum_create(&bs, max_output_count,
- bloom_fpr, runtime.quota) != 0) {
+ bool has_bloom = bloom_fpr < 1;
+ if (has_bloom && bloom_spectrum_create(&bs, max_output_count,
+ bloom_fpr, runtime.quota) != 0) {
diag_set(OutOfMemory, 0,
"bloom_spectrum_create", "bloom_spectrum");
goto err;
@@ -2167,8 +2169,8 @@ vy_run_write_data(struct vy_run *run, const char *dirpath,
uint32_t page_info_capacity = 0;
int rc;
do {
- rc = vy_run_write_page(run, &data_xlog, wi, &stmt,
- page_size, &bs, cmp_def, key_def,
+ rc = vy_run_write_page(run, &data_xlog, wi, &stmt, page_size,
+ has_bloom ? &bs : NULL, cmp_def, key_def,
iid == 0, &page_info_capacity);
if (rc < 0)
goto err_close_xlog;
@@ -2184,9 +2186,11 @@ vy_run_write_data(struct vy_run *run, const char *dirpath,
xlog_close(&data_xlog, true);
fiber_gc();
- bloom_spectrum_choose(&bs, &run->info.bloom);
- run->info.has_bloom = true;
- bloom_spectrum_destroy(&bs, runtime.quota);
+ if (has_bloom) {
+ bloom_spectrum_choose(&bs, &run->info.bloom);
+ run->info.has_bloom = true;
+ bloom_spectrum_destroy(&bs, runtime.quota);
+ }
done:
wi->iface->stop(wi);
return 0;
@@ -2195,7 +2199,8 @@ err_close_xlog:
xlog_close(&data_xlog, false);
fiber_gc();
err_free_bloom:
- bloom_spectrum_destroy(&bs, runtime.quota);
+ if (has_bloom)
+ bloom_spectrum_destroy(&bs, runtime.quota);
err:
wi->iface->stop(wi);
return -1;
@@ -2334,8 +2339,11 @@ vy_run_info_encode(const struct vy_run_info *run_info,
mp_next(&tmp);
size_t max_key_size = tmp - run_info->max_key;
- assert(run_info->has_bloom);
- size_t size = mp_sizeof_map(6);
+ uint32_t key_count = 5;
+ if (run_info->has_bloom)
+ key_count++;
+
+ size_t size = mp_sizeof_map(key_count);
size += mp_sizeof_uint(VY_RUN_INFO_MIN_KEY) + min_key_size;
size += mp_sizeof_uint(VY_RUN_INFO_MAX_KEY) + max_key_size;
size += mp_sizeof_uint(VY_RUN_INFO_MIN_LSN) +
@@ -2344,8 +2352,9 @@ vy_run_info_encode(const struct vy_run_info *run_info,
mp_sizeof_uint(run_info->max_lsn);
size += mp_sizeof_uint(VY_RUN_INFO_PAGE_COUNT) +
mp_sizeof_uint(run_info->page_count);
- size += mp_sizeof_uint(VY_RUN_INFO_BLOOM) +
- vy_run_bloom_encode_size(&run_info->bloom);
+ if (run_info->has_bloom)
+ size += mp_sizeof_uint(VY_RUN_INFO_BLOOM) +
+ vy_run_bloom_encode_size(&run_info->bloom);
char *pos = region_alloc(&fiber()->gc, size);
if (pos == NULL) {
@@ -2355,7 +2364,7 @@ vy_run_info_encode(const struct vy_run_info *run_info,
memset(xrow, 0, sizeof(*xrow));
xrow->body->iov_base = pos;
/* encode values */
- pos = mp_encode_map(pos, 6);
+ pos = mp_encode_map(pos, key_count);
pos = mp_encode_uint(pos, VY_RUN_INFO_MIN_KEY);
memcpy(pos, run_info->min_key, min_key_size);
pos += min_key_size;
@@ -2368,8 +2377,10 @@ vy_run_info_encode(const struct vy_run_info *run_info,
pos = mp_encode_uint(pos, run_info->max_lsn);
pos = mp_encode_uint(pos, VY_RUN_INFO_PAGE_COUNT);
pos = mp_encode_uint(pos, run_info->page_count);
- pos = mp_encode_uint(pos, VY_RUN_INFO_BLOOM);
- pos = vy_run_bloom_encode(&run_info->bloom, pos);
+ if (run_info->has_bloom) {
+ pos = mp_encode_uint(pos, VY_RUN_INFO_BLOOM);
+ pos = vy_run_bloom_encode(&run_info->bloom, pos);
+ }
xrow->body->iov_len = (void *)pos - xrow->body->iov_base;
xrow->bodycnt = 1;
xrow->type = VY_INDEX_RUN_INFO;
@@ -2560,6 +2571,9 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
}
run->info.max_lsn = max_lsn;
run->info.min_lsn = min_lsn;
+
+ if (opts->bloom_fpr >= 1)
+ goto done;
if (xlog_cursor_reset(&cursor) != 0)
goto close_err;
if (bloom_create(&run->info.bloom, run_row_count,
@@ -2580,7 +2594,7 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
bloom_add(&run->info.bloom, tuple_hash(tuple, key_def));
}
run->info.has_bloom = true;
-
+done:
region_truncate(region, mem_used);
run->fd = cursor.fd;
xlog_cursor_close(&cursor, true);
diff --git a/src/box/vy_run.h b/src/box/vy_run.h
index cf0569c2..ab980f89 100644
--- a/src/box/vy_run.h
+++ b/src/box/vy_run.h
@@ -301,6 +301,12 @@ vy_run_env_destroy(struct vy_run_env *env);
void
vy_run_env_enable_coio(struct vy_run_env *env, int threads);
+static inline size_t
+vy_run_bloom_size(struct vy_run *run)
+{
+ return run->info.has_bloom ? bloom_store_size(&run->info.bloom) : 0;
+}
+
static inline struct vy_page_info *
vy_run_page_info(struct vy_run *run, uint32_t pos)
{
diff --git a/test/vinyl/bloom.result b/test/vinyl/bloom.result
index 9d62b602..3c5b503d 100644
--- a/test/vinyl/bloom.result
+++ b/test/vinyl/bloom.result
@@ -1,7 +1,41 @@
-#!/usr/bin/env tarantool
+test_run = require('test_run').new()
---
...
-test_run = require('test_run').new()
+--
+-- Setting bloom_fpr to 1 disables bloom filter.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('pk', {bloom_fpr = 1})
+---
+...
+for i = 1, 10, 2 do s:insert{i} end
+---
+...
+box.snapshot()
+---
+- ok
+...
+for i = 1, 10 do s:get{i} end
+---
+...
+stat = s.index.pk:info()
+---
+...
+stat.disk.bloom_size -- 0
+---
+- 0
+...
+stat.disk.iterator.bloom.hit -- 0
+---
+- 0
+...
+stat.disk.iterator.bloom.miss -- 0
+---
+- 0
+...
+s:drop()
---
...
s = box.schema.space.create('test', {engine = 'vinyl'})
diff --git a/test/vinyl/bloom.test.lua b/test/vinyl/bloom.test.lua
index 2f7b68f9..5fecfde1 100644
--- a/test/vinyl/bloom.test.lua
+++ b/test/vinyl/bloom.test.lua
@@ -1,7 +1,19 @@
-#!/usr/bin/env tarantool
-
test_run = require('test_run').new()
+--
+-- Setting bloom_fpr to 1 disables bloom filter.
+--
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('pk', {bloom_fpr = 1})
+for i = 1, 10, 2 do s:insert{i} end
+box.snapshot()
+for i = 1, 10 do s:get{i} end
+stat = s.index.pk:info()
+stat.disk.bloom_size -- 0
+stat.disk.iterator.bloom.hit -- 0
+stat.disk.iterator.bloom.miss -- 0
+s:drop()
+
s = box.schema.space.create('test', {engine = 'vinyl'})
_ = s:create_index('pk')
--
2.11.0
More information about the Tarantool-patches
mailing list