From: Vladimir Davydov <vdavydov.dev@gmail.com> To: kostja@tarantool.org Cc: tarantool-patches@freelists.org Subject: [PATCH v2 07/14] vinyl: add helpers to add/check statement with bloom Date: Wed, 13 Mar 2019 11:52:53 +0300 [thread overview] Message-ID: <2e56f4fe2f0a7f76556b122e3c4a357511144bb1.1552464666.git.vdavydov.dev@gmail.com> (raw) In-Reply-To: <cover.1552464666.git.vdavydov.dev@gmail.com> In-Reply-To: <cover.1552464666.git.vdavydov.dev@gmail.com> A Vinyl statement may be either a key or a tuple. We must use different functions for the two kinds when working with a bloom filter. Let's introduce helpers incorporating that logic. Notes: - Currently, we never add keys to bloom filters, but after the next patch we will, so this patch adds tuple_bloom_builder_add_key helper. - According to the function protocol, tuple_bloom_builder_add may fail with out-of-memory, but we never checked that. Fix that while we are at it. --- src/box/tuple_bloom.c | 23 +++++++++++++++++++++++ src/box/tuple_bloom.h | 15 +++++++++++++++ src/box/vy_run.c | 37 ++++++++++++++----------------------- src/box/vy_stmt.c | 29 +++++++++++++++++++++++++++++ src/box/vy_stmt.h | 18 ++++++++++++++++++ 5 files changed, 99 insertions(+), 23 deletions(-) diff --git a/src/box/tuple_bloom.c b/src/box/tuple_bloom.c index 367b1faa..8d918065 100644 --- a/src/box/tuple_bloom.c +++ b/src/box/tuple_bloom.c @@ -123,6 +123,29 @@ tuple_bloom_builder_add(struct tuple_bloom_builder *builder, return 0; } +int +tuple_bloom_builder_add_key(struct tuple_bloom_builder *builder, + const char *key, uint32_t part_count, + struct key_def *key_def) +{ + (void)part_count; + assert(part_count >= key_def->part_count); + assert(builder->part_count == key_def->part_count); + + uint32_t h = HASH_SEED; + uint32_t carry = 0; + uint32_t total_size = 0; + + for (uint32_t i = 0; i < key_def->part_count; i++) { + total_size += tuple_hash_field(&h, &carry, &key, + key_def->parts[i].coll); + uint32_t hash = PMurHash32_Result(h, carry, total_size); + if (tuple_hash_array_add(&builder->parts[i], hash) != 0) + return -1; + } + return 0; +} + struct tuple_bloom * tuple_bloom_new(struct tuple_bloom_builder *builder, double fpr) { diff --git a/src/box/tuple_bloom.h b/src/box/tuple_bloom.h index 8380801e..f653c7ed 100644 --- a/src/box/tuple_bloom.h +++ b/src/box/tuple_bloom.h @@ -121,6 +121,21 @@ tuple_bloom_builder_add(struct tuple_bloom_builder *builder, const struct tuple *tuple, struct key_def *key_def); /** + * Add a key hash to a tuple bloom filter builder. + * @param builder - bloom filter builder + * @param key - key to add + * @param part_count - number of parts in the key + * @param key_def - key definition + * @param hashed_parts - number of key parts that have already + * been added to the builder + * @return 0 on success, -1 on OOM + */ +int +tuple_bloom_builder_add_key(struct tuple_bloom_builder *builder, + const char *key, uint32_t part_count, + struct key_def *key_def); + +/** * Create a new tuple bloom filter. * @param builder - bloom filter builder * @param fpr - desired false positive rate diff --git a/src/box/vy_run.c b/src/box/vy_run.c index e2123fe7..57f62b72 100644 --- a/src/box/vy_run.c +++ b/src/box/vy_run.c @@ -1248,22 +1248,11 @@ vy_run_iterator_do_seek(struct vy_run_iterator *itr, struct tuple_bloom *bloom = run->info.bloom; struct key_def *key_def = itr->key_def; - if (iterator_type == ITER_EQ && bloom != NULL) { - bool need_lookup; - if (vy_stmt_is_key(key)) { - const char *data = tuple_data(key); - uint32_t part_count = mp_decode_array(&data); - need_lookup = tuple_bloom_maybe_has_key(bloom, data, - part_count, key_def); - } else { - need_lookup = tuple_bloom_maybe_has(bloom, key, - key_def); - } - if (!need_lookup) { - itr->search_ended = true; - itr->stat->bloom_hit++; - return 0; - } + if (iterator_type == ITER_EQ && bloom != NULL && + !vy_stmt_bloom_maybe_has(bloom, key, key_def)) { + itr->search_ended = true; + itr->stat->bloom_hit++; + return 0; } itr->stat->lookup++; @@ -2177,10 +2166,10 @@ vy_run_writer_start_page(struct vy_run_writer *writer, static int vy_run_writer_write_to_page(struct vy_run_writer *writer, struct tuple *stmt) { - if (writer->bloom != NULL) { - tuple_bloom_builder_add(writer->bloom, stmt, - writer->key_def); - } + if (writer->bloom != NULL && + vy_stmt_bloom_builder_add(writer->bloom, stmt, + writer->key_def) != 0) + return -1; if (writer->last_stmt != NULL) vy_stmt_unref_if_possible(writer->last_stmt); writer->last_stmt = stmt; @@ -2405,9 +2394,11 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir, format, iid == 0); if (tuple == NULL) goto close_err; - if (bloom_builder != NULL) { - tuple_bloom_builder_add(bloom_builder, tuple, - key_def); + if (bloom_builder != NULL && + vy_stmt_bloom_builder_add(bloom_builder, tuple, + key_def) != 0) { + tuple_unref(tuple); + goto close_err; } key = tuple_extract_key(tuple, cmp_def, NULL); if (prev_tuple != NULL) diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c index 954ec276..229f8b41 100644 --- a/src/box/vy_stmt.c +++ b/src/box/vy_stmt.c @@ -41,6 +41,7 @@ #include <small/lsregion.h> #include "error.h" +#include "tuple_bloom.h" #include "tuple_format.h" #include "xrow.h" #include "fiber.h" @@ -663,6 +664,34 @@ vy_stmt_extract_key_raw(const char *data, const char *data_end, return key; } +int +vy_stmt_bloom_builder_add(struct tuple_bloom_builder *builder, + const struct tuple *stmt, struct key_def *key_def) +{ + if (vy_stmt_is_key(stmt)) { + const char *data = tuple_data(stmt); + uint32_t part_count = mp_decode_array(&data); + return tuple_bloom_builder_add_key(builder, data, + part_count, key_def); + } else { + return tuple_bloom_builder_add(builder, stmt, key_def); + } +} + +bool +vy_stmt_bloom_maybe_has(const struct tuple_bloom *bloom, + const struct tuple *stmt, struct key_def *key_def) +{ + if (vy_stmt_is_key(stmt)) { + const char *data = tuple_data(stmt); + uint32_t part_count = mp_decode_array(&data); + return tuple_bloom_maybe_has_key(bloom, data, + part_count, key_def); + } else { + return tuple_bloom_maybe_has(bloom, stmt, key_def); + } +} + /** * Encode the given statement meta data in a request. * Returns 0 on success, -1 on memory allocation error. diff --git a/src/box/vy_stmt.h b/src/box/vy_stmt.h index 7c9b3677..7b93883d 100644 --- a/src/box/vy_stmt.h +++ b/src/box/vy_stmt.h @@ -50,6 +50,8 @@ struct xrow_header; struct region; struct tuple_format; struct tuple_dictionary; +struct tuple_bloom; +struct tuple_bloom_builder; struct iovec; #define MAX_LSN (INT64_MAX / 2) @@ -595,6 +597,22 @@ vy_stmt_extract_key_raw(const char *data, const char *data_end, struct tuple_format *format); /** + * Add a statement hash to a bloom filter builder. + * See tuple_bloom_builder_add() for more details. + */ +int +vy_stmt_bloom_builder_add(struct tuple_bloom_builder *builder, + const struct tuple *stmt, struct key_def *key_def); + +/** + * Check if a statement hash is present in a bloom filter. + * See tuple_bloom_maybe_has() for more details. + */ +bool +vy_stmt_bloom_maybe_has(const struct tuple_bloom *bloom, + const struct tuple *stmt, struct key_def *key_def); + +/** * Encode vy_stmt for a primary key as xrow_header * * @param value statement to encode -- 2.11.0
next prev parent reply other threads:[~2019-03-13 8:52 UTC|newest] Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-03-13 8:52 [PATCH v2 00/14] vinyl: do not fill secondary tuples with nulls Vladimir Davydov 2019-03-13 8:52 ` [PATCH v2 01/14] vinyl: remove optimized comparators Vladimir Davydov 2019-03-13 8:55 ` Konstantin Osipov 2019-03-13 8:52 ` [PATCH v2 02/14] vinyl: introduce statement environment Vladimir Davydov 2019-03-13 8:58 ` Konstantin Osipov 2019-03-13 9:19 ` Vladimir Davydov 2019-03-13 8:52 ` [PATCH v2 03/14] vinyl: rename key stmt construction routine Vladimir Davydov 2019-03-13 8:59 ` Konstantin Osipov 2019-03-13 8:52 ` [PATCH v2 04/14] vinyl: don't use IPROTO_SELECT type for key statements Vladimir Davydov 2019-03-13 9:00 ` Konstantin Osipov 2019-03-13 8:52 ` [PATCH v2 05/14] bloom: do not use tuple_common_key_parts when constructing tuple bloom Vladimir Davydov 2019-03-13 11:52 ` Konstantin Osipov 2019-03-13 8:52 ` [PATCH v2 06/14] bloom: factor out helper to add tuple hash to bloom builder Vladimir Davydov 2019-03-13 11:52 ` Konstantin Osipov 2019-03-13 8:52 ` Vladimir Davydov [this message] 2019-03-13 11:59 ` [PATCH v2 07/14] vinyl: add helpers to add/check statement with bloom Konstantin Osipov 2019-03-13 12:25 ` Vladimir Davydov 2019-03-13 8:52 ` [PATCH v2 08/14] vinyl: do not pass format to vy_apply_upsert Vladimir Davydov 2019-03-13 12:00 ` Konstantin Osipov 2019-03-13 8:52 ` [PATCH v2 09/14] vinyl: clean up write iterator source destruction Vladimir Davydov 2019-03-13 12:05 ` Konstantin Osipov 2019-03-13 8:52 ` [PATCH v2 10/14] vinyl: zap vy_write_iterator->format Vladimir Davydov 2019-03-13 12:06 ` Konstantin Osipov 2019-03-13 8:52 ` [PATCH v2 11/14] vinyl: do not fill secondary tuples with nulls when decoded Vladimir Davydov 2019-03-13 12:25 ` Konstantin Osipov 2019-03-13 12:45 ` Vladimir Davydov 2019-03-13 12:56 ` Konstantin Osipov 2019-03-13 8:52 ` [PATCH v2 12/14] vinyl: zap vy_stmt_new_surrogate_from_key Vladimir Davydov 2019-03-13 12:27 ` Konstantin Osipov 2019-03-13 8:52 ` [PATCH v2 13/14] vinyl: don't use vy_stmt_new_surrogate_delete if not necessary Vladimir Davydov 2019-03-13 12:28 ` Konstantin Osipov 2019-03-13 8:53 ` [PATCH v2 14/14] tuple_format: zap min_tuple_size Vladimir Davydov 2019-03-13 12:28 ` Konstantin Osipov 2019-03-13 15:54 ` [PATCH v2 00/14] vinyl: do not fill secondary tuples with nulls Vladimir Davydov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=2e56f4fe2f0a7f76556b122e3c4a357511144bb1.1552464666.git.vdavydov.dev@gmail.com \ --to=vdavydov.dev@gmail.com \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='Re: [PATCH v2 07/14] vinyl: add helpers to add/check statement with bloom' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox