From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH v2 07/14] vinyl: add helpers to add/check statement with bloom
Date: Wed, 13 Mar 2019 11:52:53 +0300 [thread overview]
Message-ID: <2e56f4fe2f0a7f76556b122e3c4a357511144bb1.1552464666.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1552464666.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1552464666.git.vdavydov.dev@gmail.com>
A Vinyl statement may be either a key or a tuple. We must use different
functions for the two kinds when working with a bloom filter. Let's
introduce helpers incorporating that logic.
Notes:
- Currently, we never add keys to bloom filters, but after the next
patch we will, so this patch adds tuple_bloom_builder_add_key helper.
- According to the function protocol, tuple_bloom_builder_add may fail
with out-of-memory, but we never checked that. Fix that while we are
at it.
---
src/box/tuple_bloom.c | 23 +++++++++++++++++++++++
src/box/tuple_bloom.h | 15 +++++++++++++++
src/box/vy_run.c | 37 ++++++++++++++-----------------------
src/box/vy_stmt.c | 29 +++++++++++++++++++++++++++++
src/box/vy_stmt.h | 18 ++++++++++++++++++
5 files changed, 99 insertions(+), 23 deletions(-)
diff --git a/src/box/tuple_bloom.c b/src/box/tuple_bloom.c
index 367b1faa..8d918065 100644
--- a/src/box/tuple_bloom.c
+++ b/src/box/tuple_bloom.c
@@ -123,6 +123,29 @@ tuple_bloom_builder_add(struct tuple_bloom_builder *builder,
return 0;
}
+int
+tuple_bloom_builder_add_key(struct tuple_bloom_builder *builder,
+ const char *key, uint32_t part_count,
+ struct key_def *key_def)
+{
+ (void)part_count;
+ assert(part_count >= key_def->part_count);
+ assert(builder->part_count == key_def->part_count);
+
+ uint32_t h = HASH_SEED;
+ uint32_t carry = 0;
+ uint32_t total_size = 0;
+
+ for (uint32_t i = 0; i < key_def->part_count; i++) {
+ total_size += tuple_hash_field(&h, &carry, &key,
+ key_def->parts[i].coll);
+ uint32_t hash = PMurHash32_Result(h, carry, total_size);
+ if (tuple_hash_array_add(&builder->parts[i], hash) != 0)
+ return -1;
+ }
+ return 0;
+}
+
struct tuple_bloom *
tuple_bloom_new(struct tuple_bloom_builder *builder, double fpr)
{
diff --git a/src/box/tuple_bloom.h b/src/box/tuple_bloom.h
index 8380801e..f653c7ed 100644
--- a/src/box/tuple_bloom.h
+++ b/src/box/tuple_bloom.h
@@ -121,6 +121,21 @@ tuple_bloom_builder_add(struct tuple_bloom_builder *builder,
const struct tuple *tuple, struct key_def *key_def);
/**
+ * Add a key hash to a tuple bloom filter builder.
+ * @param builder - bloom filter builder
+ * @param key - key to add
+ * @param part_count - number of parts in the key
+ * @param key_def - key definition
+ * @param hashed_parts - number of key parts that have already
+ * been added to the builder
+ * @return 0 on success, -1 on OOM
+ */
+int
+tuple_bloom_builder_add_key(struct tuple_bloom_builder *builder,
+ const char *key, uint32_t part_count,
+ struct key_def *key_def);
+
+/**
* Create a new tuple bloom filter.
* @param builder - bloom filter builder
* @param fpr - desired false positive rate
diff --git a/src/box/vy_run.c b/src/box/vy_run.c
index e2123fe7..57f62b72 100644
--- a/src/box/vy_run.c
+++ b/src/box/vy_run.c
@@ -1248,22 +1248,11 @@ vy_run_iterator_do_seek(struct vy_run_iterator *itr,
struct tuple_bloom *bloom = run->info.bloom;
struct key_def *key_def = itr->key_def;
- if (iterator_type == ITER_EQ && bloom != NULL) {
- bool need_lookup;
- if (vy_stmt_is_key(key)) {
- const char *data = tuple_data(key);
- uint32_t part_count = mp_decode_array(&data);
- need_lookup = tuple_bloom_maybe_has_key(bloom, data,
- part_count, key_def);
- } else {
- need_lookup = tuple_bloom_maybe_has(bloom, key,
- key_def);
- }
- if (!need_lookup) {
- itr->search_ended = true;
- itr->stat->bloom_hit++;
- return 0;
- }
+ if (iterator_type == ITER_EQ && bloom != NULL &&
+ !vy_stmt_bloom_maybe_has(bloom, key, key_def)) {
+ itr->search_ended = true;
+ itr->stat->bloom_hit++;
+ return 0;
}
itr->stat->lookup++;
@@ -2177,10 +2166,10 @@ vy_run_writer_start_page(struct vy_run_writer *writer,
static int
vy_run_writer_write_to_page(struct vy_run_writer *writer, struct tuple *stmt)
{
- if (writer->bloom != NULL) {
- tuple_bloom_builder_add(writer->bloom, stmt,
- writer->key_def);
- }
+ if (writer->bloom != NULL &&
+ vy_stmt_bloom_builder_add(writer->bloom, stmt,
+ writer->key_def) != 0)
+ return -1;
if (writer->last_stmt != NULL)
vy_stmt_unref_if_possible(writer->last_stmt);
writer->last_stmt = stmt;
@@ -2405,9 +2394,11 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir,
format, iid == 0);
if (tuple == NULL)
goto close_err;
- if (bloom_builder != NULL) {
- tuple_bloom_builder_add(bloom_builder, tuple,
- key_def);
+ if (bloom_builder != NULL &&
+ vy_stmt_bloom_builder_add(bloom_builder, tuple,
+ key_def) != 0) {
+ tuple_unref(tuple);
+ goto close_err;
}
key = tuple_extract_key(tuple, cmp_def, NULL);
if (prev_tuple != NULL)
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index 954ec276..229f8b41 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -41,6 +41,7 @@
#include <small/lsregion.h>
#include "error.h"
+#include "tuple_bloom.h"
#include "tuple_format.h"
#include "xrow.h"
#include "fiber.h"
@@ -663,6 +664,34 @@ vy_stmt_extract_key_raw(const char *data, const char *data_end,
return key;
}
+int
+vy_stmt_bloom_builder_add(struct tuple_bloom_builder *builder,
+ const struct tuple *stmt, struct key_def *key_def)
+{
+ if (vy_stmt_is_key(stmt)) {
+ const char *data = tuple_data(stmt);
+ uint32_t part_count = mp_decode_array(&data);
+ return tuple_bloom_builder_add_key(builder, data,
+ part_count, key_def);
+ } else {
+ return tuple_bloom_builder_add(builder, stmt, key_def);
+ }
+}
+
+bool
+vy_stmt_bloom_maybe_has(const struct tuple_bloom *bloom,
+ const struct tuple *stmt, struct key_def *key_def)
+{
+ if (vy_stmt_is_key(stmt)) {
+ const char *data = tuple_data(stmt);
+ uint32_t part_count = mp_decode_array(&data);
+ return tuple_bloom_maybe_has_key(bloom, data,
+ part_count, key_def);
+ } else {
+ return tuple_bloom_maybe_has(bloom, stmt, key_def);
+ }
+}
+
/**
* Encode the given statement meta data in a request.
* Returns 0 on success, -1 on memory allocation error.
diff --git a/src/box/vy_stmt.h b/src/box/vy_stmt.h
index 7c9b3677..7b93883d 100644
--- a/src/box/vy_stmt.h
+++ b/src/box/vy_stmt.h
@@ -50,6 +50,8 @@ struct xrow_header;
struct region;
struct tuple_format;
struct tuple_dictionary;
+struct tuple_bloom;
+struct tuple_bloom_builder;
struct iovec;
#define MAX_LSN (INT64_MAX / 2)
@@ -595,6 +597,22 @@ vy_stmt_extract_key_raw(const char *data, const char *data_end,
struct tuple_format *format);
/**
+ * Add a statement hash to a bloom filter builder.
+ * See tuple_bloom_builder_add() for more details.
+ */
+int
+vy_stmt_bloom_builder_add(struct tuple_bloom_builder *builder,
+ const struct tuple *stmt, struct key_def *key_def);
+
+/**
+ * Check if a statement hash is present in a bloom filter.
+ * See tuple_bloom_maybe_has() for more details.
+ */
+bool
+vy_stmt_bloom_maybe_has(const struct tuple_bloom *bloom,
+ const struct tuple *stmt, struct key_def *key_def);
+
+/**
* Encode vy_stmt for a primary key as xrow_header
*
* @param value statement to encode
--
2.11.0
next prev parent reply other threads:[~2019-03-13 8:52 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-03-13 8:52 [PATCH v2 00/14] vinyl: do not fill secondary tuples with nulls Vladimir Davydov
2019-03-13 8:52 ` [PATCH v2 01/14] vinyl: remove optimized comparators Vladimir Davydov
2019-03-13 8:55 ` Konstantin Osipov
2019-03-13 8:52 ` [PATCH v2 02/14] vinyl: introduce statement environment Vladimir Davydov
2019-03-13 8:58 ` Konstantin Osipov
2019-03-13 9:19 ` Vladimir Davydov
2019-03-13 8:52 ` [PATCH v2 03/14] vinyl: rename key stmt construction routine Vladimir Davydov
2019-03-13 8:59 ` Konstantin Osipov
2019-03-13 8:52 ` [PATCH v2 04/14] vinyl: don't use IPROTO_SELECT type for key statements Vladimir Davydov
2019-03-13 9:00 ` Konstantin Osipov
2019-03-13 8:52 ` [PATCH v2 05/14] bloom: do not use tuple_common_key_parts when constructing tuple bloom Vladimir Davydov
2019-03-13 11:52 ` Konstantin Osipov
2019-03-13 8:52 ` [PATCH v2 06/14] bloom: factor out helper to add tuple hash to bloom builder Vladimir Davydov
2019-03-13 11:52 ` Konstantin Osipov
2019-03-13 8:52 ` Vladimir Davydov [this message]
2019-03-13 11:59 ` [PATCH v2 07/14] vinyl: add helpers to add/check statement with bloom Konstantin Osipov
2019-03-13 12:25 ` Vladimir Davydov
2019-03-13 8:52 ` [PATCH v2 08/14] vinyl: do not pass format to vy_apply_upsert Vladimir Davydov
2019-03-13 12:00 ` Konstantin Osipov
2019-03-13 8:52 ` [PATCH v2 09/14] vinyl: clean up write iterator source destruction Vladimir Davydov
2019-03-13 12:05 ` Konstantin Osipov
2019-03-13 8:52 ` [PATCH v2 10/14] vinyl: zap vy_write_iterator->format Vladimir Davydov
2019-03-13 12:06 ` Konstantin Osipov
2019-03-13 8:52 ` [PATCH v2 11/14] vinyl: do not fill secondary tuples with nulls when decoded Vladimir Davydov
2019-03-13 12:25 ` Konstantin Osipov
2019-03-13 12:45 ` Vladimir Davydov
2019-03-13 12:56 ` Konstantin Osipov
2019-03-13 8:52 ` [PATCH v2 12/14] vinyl: zap vy_stmt_new_surrogate_from_key Vladimir Davydov
2019-03-13 12:27 ` Konstantin Osipov
2019-03-13 8:52 ` [PATCH v2 13/14] vinyl: don't use vy_stmt_new_surrogate_delete if not necessary Vladimir Davydov
2019-03-13 12:28 ` Konstantin Osipov
2019-03-13 8:53 ` [PATCH v2 14/14] tuple_format: zap min_tuple_size Vladimir Davydov
2019-03-13 12:28 ` Konstantin Osipov
2019-03-13 15:54 ` [PATCH v2 00/14] vinyl: do not fill secondary tuples with nulls Vladimir Davydov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2e56f4fe2f0a7f76556b122e3c4a357511144bb1.1552464666.git.vdavydov.dev@gmail.com \
--to=vdavydov.dev@gmail.com \
--cc=kostja@tarantool.org \
--cc=tarantool-patches@freelists.org \
--subject='Re: [PATCH v2 07/14] vinyl: add helpers to add/check statement with bloom' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox