[PATCH v2 07/14] vinyl: add helpers to add/check statement with bloom

Vladimir Davydov vdavydov.dev at gmail.com
Wed Mar 13 11:52:53 MSK 2019


A Vinyl statement may be either a key or a tuple. We must use different
functions for the two kinds when working with a bloom filter. Let's
introduce helpers incorporating that logic.

Notes:
 - Currently, we never add keys to bloom filters, but after the next
   patch we will, so this patch adds tuple_bloom_builder_add_key helper.
 - According to the function protocol, tuple_bloom_builder_add may fail
   with out-of-memory, but we never checked that. Fix that while we are
   at it.
---
 src/box/tuple_bloom.c | 23 +++++++++++++++++++++++
 src/box/tuple_bloom.h | 15 +++++++++++++++
 src/box/vy_run.c      | 37 ++++++++++++++-----------------------
 src/box/vy_stmt.c     | 29 +++++++++++++++++++++++++++++
 src/box/vy_stmt.h     | 18 ++++++++++++++++++
 5 files changed, 99 insertions(+), 23 deletions(-)

diff --git a/src/box/tuple_bloom.c b/src/box/tuple_bloom.c
index 367b1faa..8d918065 100644
--- a/src/box/tuple_bloom.c
+++ b/src/box/tuple_bloom.c
@@ -123,6 +123,29 @@ tuple_bloom_builder_add(struct tuple_bloom_builder *builder,
 	return 0;
 }
 
+int
+tuple_bloom_builder_add_key(struct tuple_bloom_builder *builder,
+			    const char *key, uint32_t part_count,
+			    struct key_def *key_def)
+{
+	(void)part_count;
+	assert(part_count >= key_def->part_count);
+	assert(builder->part_count == key_def->part_count);
+
+	uint32_t h = HASH_SEED;
+	uint32_t carry = 0;
+	uint32_t total_size = 0;
+
+	for (uint32_t i = 0; i < key_def->part_count; i++) {
+		total_size += tuple_hash_field(&h, &carry, &key,
+					       key_def->parts[i].coll);
+		uint32_t hash = PMurHash32_Result(h, carry, total_size);
+		if (tuple_hash_array_add(&builder->parts[i], hash) != 0)
+			return -1;
+	}
+	return 0;
+}
+
 struct tuple_bloom *
 tuple_bloom_new(struct tuple_bloom_builder *builder, double fpr)
 {
diff --git a/src/box/tuple_bloom.h b/src/box/tuple_bloom.h
index 8380801e..f653c7ed 100644
--- a/src/box/tuple_bloom.h
+++ b/src/box/tuple_bloom.h
@@ -121,6 +121,21 @@ tuple_bloom_builder_add(struct tuple_bloom_builder *builder,
 			const struct tuple *tuple, struct key_def *key_def);
 
 /**
+ * Add a key hash to a tuple bloom filter builder.
+ * @param builder - bloom filter builder
+ * @param key - key to add
+ * @param part_count - number of parts in the key
+ * @param key_def - key definition
+ * @param hashed_parts - number of key parts that have already
+ *  been added to the builder
+ * @return 0 on success, -1 on OOM
+ */
+int
+tuple_bloom_builder_add_key(struct tuple_bloom_builder *builder,
+			    const char *key, uint32_t part_count,
+			    struct key_def *key_def);
+
+/**
  * Create a new tuple bloom filter.
  * @param builder - bloom filter builder
  * @param fpr - desired false positive rate
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index e2123fe7..57f62b72 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -1248,22 +1248,11 @@ vy_run_iterator_do_seek(struct vy_run_iterator *itr,
 
 	struct tuple_bloom *bloom = run->info.bloom;
 	struct key_def *key_def = itr->key_def;
-	if (iterator_type == ITER_EQ && bloom != NULL) {
-		bool need_lookup;
-		if (vy_stmt_is_key(key)) {
-			const char *data = tuple_data(key);
-			uint32_t part_count = mp_decode_array(&data);
-			need_lookup = tuple_bloom_maybe_has_key(bloom, data,
-							part_count, key_def);
-		} else {
-			need_lookup = tuple_bloom_maybe_has(bloom, key,
-							    key_def);
-		}
-		if (!need_lookup) {
-			itr->search_ended = true;
-			itr->stat->bloom_hit++;
-			return 0;
-		}
+	if (iterator_type == ITER_EQ && bloom != NULL &&
+	    !vy_stmt_bloom_maybe_has(bloom, key, key_def)) {
+		itr->search_ended = true;
+		itr->stat->bloom_hit++;
+		return 0;
 	}
 
 	itr->stat->lookup++;
@@ -2177,10 +2166,10 @@ vy_run_writer_start_page(struct vy_run_writer *writer,
 static int
 vy_run_writer_write_to_page(struct vy_run_writer *writer, struct tuple *stmt)
 {
-	if (writer->bloom != NULL) {
-		tuple_bloom_builder_add(writer->bloom, stmt,
-					writer->key_def);
-	}
+	if (writer->bloom != NULL &&
+	    vy_stmt_bloom_builder_add(writer->bloom, stmt,
+				      writer->key_def) != 0)
+		return -1;
 	if (writer->last_stmt != NULL)
 		vy_stmt_unref_if_possible(writer->last_stmt);
 	writer->last_stmt = stmt;
@@ -2405,9 +2394,11 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
 							     format, iid == 0);
 			if (tuple == NULL)
 				goto close_err;
-			if (bloom_builder != NULL) {
-				tuple_bloom_builder_add(bloom_builder, tuple,
-							key_def);
+			if (bloom_builder != NULL &&
+			    vy_stmt_bloom_builder_add(bloom_builder, tuple,
+						      key_def) != 0) {
+				tuple_unref(tuple);
+				goto close_err;
 			}
 			key = tuple_extract_key(tuple, cmp_def, NULL);
 			if (prev_tuple != NULL)
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index 954ec276..229f8b41 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -41,6 +41,7 @@
 #include <small/lsregion.h>
 
 #include "error.h"
+#include "tuple_bloom.h"
 #include "tuple_format.h"
 #include "xrow.h"
 #include "fiber.h"
@@ -663,6 +664,34 @@ vy_stmt_extract_key_raw(const char *data, const char *data_end,
 	return key;
 }
 
+int
+vy_stmt_bloom_builder_add(struct tuple_bloom_builder *builder,
+			  const struct tuple *stmt, struct key_def *key_def)
+{
+	if (vy_stmt_is_key(stmt)) {
+		const char *data = tuple_data(stmt);
+		uint32_t part_count = mp_decode_array(&data);
+		return tuple_bloom_builder_add_key(builder, data,
+						   part_count, key_def);
+	} else {
+		return tuple_bloom_builder_add(builder, stmt, key_def);
+	}
+}
+
+bool
+vy_stmt_bloom_maybe_has(const struct tuple_bloom *bloom,
+			const struct tuple *stmt, struct key_def *key_def)
+{
+	if (vy_stmt_is_key(stmt)) {
+		const char *data = tuple_data(stmt);
+		uint32_t part_count = mp_decode_array(&data);
+		return tuple_bloom_maybe_has_key(bloom, data,
+						 part_count, key_def);
+	} else {
+		return tuple_bloom_maybe_has(bloom, stmt, key_def);
+	}
+}
+
 /**
  * Encode the given statement meta data in a request.
  * Returns 0 on success, -1 on memory allocation error.
diff --git a/src/box/vy_stmt.h b/src/box/vy_stmt.h
index 7c9b3677..7b93883d 100644
--- a/src/box/vy_stmt.h
+++ b/src/box/vy_stmt.h
@@ -50,6 +50,8 @@ struct xrow_header;
 struct region;
 struct tuple_format;
 struct tuple_dictionary;
+struct tuple_bloom;
+struct tuple_bloom_builder;
 struct iovec;
 
 #define MAX_LSN (INT64_MAX / 2)
@@ -595,6 +597,22 @@ vy_stmt_extract_key_raw(const char *data, const char *data_end,
 			struct tuple_format *format);
 
 /**
+ * Add a statement hash to a bloom filter builder.
+ * See tuple_bloom_builder_add() for more details.
+ */
+int
+vy_stmt_bloom_builder_add(struct tuple_bloom_builder *builder,
+			  const struct tuple *stmt, struct key_def *key_def);
+
+/**
+ * Check if a statement hash is present in a bloom filter.
+ * See tuple_bloom_maybe_has() for more details.
+ */
+bool
+vy_stmt_bloom_maybe_has(const struct tuple_bloom *bloom,
+			const struct tuple *stmt, struct key_def *key_def);
+
+/**
  * Encode vy_stmt for a primary key as xrow_header
  *
  * @param value statement to encode
-- 
2.11.0




More information about the Tarantool-patches mailing list