[PATCH v2 07/14] vinyl: add helpers to add/check statement with bloom
Vladimir Davydov
vdavydov.dev at gmail.com
Wed Mar 13 11:52:53 MSK 2019
A Vinyl statement may be either a key or a tuple. We must use different
functions for the two kinds when working with a bloom filter. Let's
introduce helpers incorporating that logic.
Notes:
- Currently, we never add keys to bloom filters, but after the next
patch we will, so this patch adds tuple_bloom_builder_add_key helper.
- According to the function protocol, tuple_bloom_builder_add may fail
with out-of-memory, but we never checked that. Fix that while we are
at it.
---
src/box/tuple_bloom.c | 23 +++++++++++++++++++++++
src/box/tuple_bloom.h | 15 +++++++++++++++
src/box/vy_run.c | 37 ++++++++++++++-----------------------
src/box/vy_stmt.c | 29 +++++++++++++++++++++++++++++
src/box/vy_stmt.h | 18 ++++++++++++++++++
5 files changed, 99 insertions(+), 23 deletions(-)
diff --git a/src/box/tuple_bloom.c b/src/box/tuple_bloom.c
index 367b1faa..8d918065 100644
--- a/src/box/tuple_bloom.c
+++ b/src/box/tuple_bloom.c
@@ -123,6 +123,29 @@ tuple_bloom_builder_add(struct tuple_bloom_builder *builder,
return 0;
}
+int
+tuple_bloom_builder_add_key(struct tuple_bloom_builder *builder,
+ const char *key, uint32_t part_count,
+ struct key_def *key_def)
+{
+ (void)part_count;
+ assert(part_count >= key_def->part_count);
+ assert(builder->part_count == key_def->part_count);
+
+ uint32_t h = HASH_SEED;
+ uint32_t carry = 0;
+ uint32_t total_size = 0;
+
+ for (uint32_t i = 0; i < key_def->part_count; i++) {
+ total_size += tuple_hash_field(&h, &carry, &key,
+ key_def->parts[i].coll);
+ uint32_t hash = PMurHash32_Result(h, carry, total_size);
+ if (tuple_hash_array_add(&builder->parts[i], hash) != 0)
+ return -1;
+ }
+ return 0;
+}
+
struct tuple_bloom *
tuple_bloom_new(struct tuple_bloom_builder *builder, double fpr)
{
diff --git a/src/box/tuple_bloom.h b/src/box/tuple_bloom.h
index 8380801e..f653c7ed 100644
--- a/src/box/tuple_bloom.h
+++ b/src/box/tuple_bloom.h
@@ -121,6 +121,21 @@ tuple_bloom_builder_add(struct tuple_bloom_builder *builder,
const struct tuple *tuple, struct key_def *key_def);
/**
+ * Add a key hash to a tuple bloom filter builder.
+ * @param builder - bloom filter builder
+ * @param key - key to add
+ * @param part_count - number of parts in the key
+ * @param key_def - key definition
+ * @param hashed_parts - number of key parts that have already
+ * been added to the builder
+ * @return 0 on success, -1 on OOM
+ */
+int
+tuple_bloom_builder_add_key(struct tuple_bloom_builder *builder,
+ const char *key, uint32_t part_count,
+ struct key_def *key_def);
+
+/**
* Create a new tuple bloom filter.
* @param builder - bloom filter builder
* @param fpr - desired false positive rate
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index e2123fe7..57f62b72 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -1248,22 +1248,11 @@ vy_run_iterator_do_seek(struct vy_run_iterator *itr,
struct tuple_bloom *bloom = run->info.bloom;
struct key_def *key_def = itr->key_def;
- if (iterator_type == ITER_EQ && bloom != NULL) {
- bool need_lookup;
- if (vy_stmt_is_key(key)) {
- const char *data = tuple_data(key);
- uint32_t part_count = mp_decode_array(&data);
- need_lookup = tuple_bloom_maybe_has_key(bloom, data,
- part_count, key_def);
- } else {
- need_lookup = tuple_bloom_maybe_has(bloom, key,
- key_def);
- }
- if (!need_lookup) {
- itr->search_ended = true;
- itr->stat->bloom_hit++;
- return 0;
- }
+ if (iterator_type == ITER_EQ && bloom != NULL &&
+ !vy_stmt_bloom_maybe_has(bloom, key, key_def)) {
+ itr->search_ended = true;
+ itr->stat->bloom_hit++;
+ return 0;
}
itr->stat->lookup++;
@@ -2177,10 +2166,10 @@ vy_run_writer_start_page(struct vy_run_writer *writer,
static int
vy_run_writer_write_to_page(struct vy_run_writer *writer, struct tuple *stmt)
{
- if (writer->bloom != NULL) {
- tuple_bloom_builder_add(writer->bloom, stmt,
- writer->key_def);
- }
+ if (writer->bloom != NULL &&
+ vy_stmt_bloom_builder_add(writer->bloom, stmt,
+ writer->key_def) != 0)
+ return -1;
if (writer->last_stmt != NULL)
vy_stmt_unref_if_possible(writer->last_stmt);
writer->last_stmt = stmt;
@@ -2405,9 +2394,11 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
format, iid == 0);
if (tuple == NULL)
goto close_err;
- if (bloom_builder != NULL) {
- tuple_bloom_builder_add(bloom_builder, tuple,
- key_def);
+ if (bloom_builder != NULL &&
+ vy_stmt_bloom_builder_add(bloom_builder, tuple,
+ key_def) != 0) {
+ tuple_unref(tuple);
+ goto close_err;
}
key = tuple_extract_key(tuple, cmp_def, NULL);
if (prev_tuple != NULL)
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index 954ec276..229f8b41 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -41,6 +41,7 @@
#include <small/lsregion.h>
#include "error.h"
+#include "tuple_bloom.h"
#include "tuple_format.h"
#include "xrow.h"
#include "fiber.h"
@@ -663,6 +664,34 @@ vy_stmt_extract_key_raw(const char *data, const char *data_end,
return key;
}
+int
+vy_stmt_bloom_builder_add(struct tuple_bloom_builder *builder,
+ const struct tuple *stmt, struct key_def *key_def)
+{
+ if (vy_stmt_is_key(stmt)) {
+ const char *data = tuple_data(stmt);
+ uint32_t part_count = mp_decode_array(&data);
+ return tuple_bloom_builder_add_key(builder, data,
+ part_count, key_def);
+ } else {
+ return tuple_bloom_builder_add(builder, stmt, key_def);
+ }
+}
+
+bool
+vy_stmt_bloom_maybe_has(const struct tuple_bloom *bloom,
+ const struct tuple *stmt, struct key_def *key_def)
+{
+ if (vy_stmt_is_key(stmt)) {
+ const char *data = tuple_data(stmt);
+ uint32_t part_count = mp_decode_array(&data);
+ return tuple_bloom_maybe_has_key(bloom, data,
+ part_count, key_def);
+ } else {
+ return tuple_bloom_maybe_has(bloom, stmt, key_def);
+ }
+}
+
/**
* Encode the given statement meta data in a request.
* Returns 0 on success, -1 on memory allocation error.
diff --git a/src/box/vy_stmt.h b/src/box/vy_stmt.h
index 7c9b3677..7b93883d 100644
--- a/src/box/vy_stmt.h
+++ b/src/box/vy_stmt.h
@@ -50,6 +50,8 @@ struct xrow_header;
struct region;
struct tuple_format;
struct tuple_dictionary;
+struct tuple_bloom;
+struct tuple_bloom_builder;
struct iovec;
#define MAX_LSN (INT64_MAX / 2)
@@ -595,6 +597,22 @@ vy_stmt_extract_key_raw(const char *data, const char *data_end,
struct tuple_format *format);
/**
+ * Add a statement hash to a bloom filter builder.
+ * See tuple_bloom_builder_add() for more details.
+ */
+int
+vy_stmt_bloom_builder_add(struct tuple_bloom_builder *builder,
+ const struct tuple *stmt, struct key_def *key_def);
+
+/**
+ * Check if a statement hash is present in a bloom filter.
+ * See tuple_bloom_maybe_has() for more details.
+ */
+bool
+vy_stmt_bloom_maybe_has(const struct tuple_bloom *bloom,
+ const struct tuple *stmt, struct key_def *key_def);
+
+/**
* Encode vy_stmt for a primary key as xrow_header
*
* @param value statement to encode
--
2.11.0
More information about the Tarantool-patches
mailing list