From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 2/3] vinyl: move unique check optimization setup to the space level Date: Tue, 23 Jul 2019 18:49:27 +0300 Message-Id: <64b85c269c8153167e2aee302fecfab79ef58d43.1563895921.git.vdavydov.dev@gmail.com> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org List-ID: If any sub-set of fields indexed by a unique index is indexed by another unique index, we can skip the uniqueness check for it. We use this to optimize out unnecessary uniqueness checks in vinyl, where they may be pretty costly, especially if the bloom filter doesn't reflect them. Currently, whether to check if an index is unique or not is determined in vinyl space constructor, which sets vy_lsm::check_is_unique flag for the space indexes. This looks ugly, because this means that no other engine can make use of this optimization without duplicating the code setting up the flags. True, no other engine needs it now, but still it doesn't feel right. Besides, the check_is_unique flag isn't actually an index property - it's rather a property of a space so storing it in vy_lsm looks wrong. Because of that we have to update the flag when an index is reassigned to another space by MoveIndex DDL operation, see vinyl_space_swap_index(). So let's store the flags indicating whether a uniqueness check is required for a particular index in a bitmap in the space struct and set it up in the generic space constructor, space_create(). --- src/box/alter.cc | 20 +++++++++---- src/box/memtx_space.c | 5 +++- src/box/space.c | 38 +++++++++++++++++++++++- src/box/space.h | 51 +++++++++++++++++++++++++------- src/box/vinyl.c | 68 ++++++++++++++++++------------------------- src/box/vy_lsm.c | 1 - src/box/vy_lsm.h | 8 ----- 7 files changed, 124 insertions(+), 67 deletions(-) diff --git a/src/box/alter.cc b/src/box/alter.cc index f98a77a5..8668688c 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -733,6 +733,17 @@ alter_space_delete(struct alter_space *alter) space_def_delete(alter->space_def); } +/** A trivial wrapper around space_build_index(). */ +static void +alter_space_build_index(struct space *src_space, struct space *new_space, + struct index *new_index) +{ + bool check_unique = space_needs_check_unique(new_space, + new_index->def->iid); + space_build_index_xc(src_space, new_index, new_space->format, + check_unique); +} + AlterSpaceOp::AlterSpaceOp(struct alter_space *alter) { /* Add to the tail: operations must be processed in order. */ @@ -1280,8 +1291,7 @@ CreateIndex::prepare(struct alter_space *alter) space_add_primary_key_xc(alter->new_space); return; } - space_build_index_xc(alter->old_space, new_index, - alter->new_space->format); + alter_space_build_index(alter->old_space, alter->new_space, new_index); } void @@ -1346,8 +1356,7 @@ RebuildIndex::prepare(struct alter_space *alter) /* Get the new index and build it. */ new_index = space_index(alter->new_space, new_index_def->iid); assert(new_index != NULL); - space_build_index_xc(alter->old_space, new_index, - alter->new_space->format); + alter_space_build_index(alter->old_space, alter->new_space, new_index); } void @@ -1413,8 +1422,7 @@ TruncateIndex::prepare(struct alter_space *alter) * callback to load indexes during local recovery. */ assert(new_index != NULL); - space_build_index_xc(alter->new_space, new_index, - alter->new_space->format); + alter_space_build_index(alter->new_space, alter->new_space, new_index); } void diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c index a8e7b808..f6cc943f 100644 --- a/src/box/memtx_space.c +++ b/src/box/memtx_space.c @@ -1008,8 +1008,11 @@ memtx_build_on_replace(struct trigger *trigger, void *event) static int memtx_space_build_index(struct space *src_space, struct index *new_index, - struct tuple_format *new_format) + struct tuple_format *new_format, bool check_unique) { + /* In memtx unique check comes for free so we never skip it. */ + (void)check_unique; + struct txn *txn = in_txn(); /** * If it's a secondary key, and we're not building them diff --git a/src/box/space.c b/src/box/space.c index e7babb2a..f520b307 100644 --- a/src/box/space.c +++ b/src/box/space.c @@ -32,6 +32,7 @@ #include #include #include +#include "bit/bit.h" #include "tuple_format.h" #include "trigger.h" #include "user.h" @@ -156,16 +157,48 @@ space_create(struct space *space, struct engine *engine, goto fail; } space->index = space->index_map + index_id_max + 1; + space->unique_index_bitmap = calloc(bitmap_size(index_id_max + 1), 1); + if (space->unique_index_bitmap == NULL) { + diag_set(OutOfMemory, bitmap_size(index_id_max + 1), + "malloc", "unique_index_bitmap"); + goto fail; + } rlist_foreach_entry(index_def, key_list, link) { struct index *index = space_create_index(space, index_def); if (index == NULL) goto fail_free_indexes; space->index_map[index_def->iid] = index; + if (index_def->opts.is_unique) + bit_set(space->unique_index_bitmap, index_def->iid); } space_fill_index_map(space); rlist_create(&space->parent_fk_constraint); rlist_create(&space->child_fk_constraint); rlist_create(&space->ck_constraint); + + /* + * Check if there are unique indexes that are contained + * by other unique indexes. For them, we can skip check + * for duplicates on INSERT. Prefer indexes with higher + * ids for uniqueness check optimization as they are + * likelier to have a "colder" cache. + */ + for (int i = space->index_count - 1; i >= 0; i--) { + struct index *index = space->index[i]; + if (!bit_test(space->unique_index_bitmap, index->def->iid)) + continue; + for (int j = 0; j < (int)space->index_count; j++) { + struct index *other = space->index[j]; + if (i != j && bit_test(space->unique_index_bitmap, + other->def->iid) && + key_def_contains(index->def->key_def, + other->def->key_def)) { + bit_clear(space->unique_index_bitmap, + index->def->iid); + break; + } + } + } return 0; fail_free_indexes: @@ -176,6 +209,7 @@ fail_free_indexes: } fail: free(space->index_map); + free(space->unique_index_bitmap); if (space->def != NULL) space_def_delete(space->def); if (space->format != NULL) @@ -214,6 +248,7 @@ space_delete(struct space *space) index_delete(index); } free(space->index_map); + free(space->unique_index_bitmap); if (space->format != NULL) tuple_format_unref(space->format); trigger_destroy(&space->before_replace); @@ -635,11 +670,12 @@ generic_space_check_format(struct space *space, struct tuple_format *format) int generic_space_build_index(struct space *src_space, struct index *new_index, - struct tuple_format *new_format) + struct tuple_format *new_format, bool check_unique) { (void)src_space; (void)new_index; (void)new_format; + (void)check_unique; return 0; } diff --git a/src/box/space.h b/src/box/space.h index 88db4ec5..d44c9fdc 100644 --- a/src/box/space.h +++ b/src/box/space.h @@ -33,6 +33,7 @@ #include "user_def.h" #include "space_def.h" #include "small/rlist.h" +#include "bit/bit.h" #include "engine.h" #include "index.h" #include "error.h" @@ -112,14 +113,20 @@ struct space_vtab { * supposed to assure that all tuples conform to the new * format. * - * @param src_space space to use as build source - * @param new_index index to build - * @param new_format format for validating tuples - * @retval 0 success - * @retval -1 build failed + * @param src_space space to use as build source + * @param new_index index to build + * @param new_format format for validating tuples + * @param check_unique if this flag is set the build procedure + * must check the uniqueness constraint of + * the new index, otherwise the check may + * be optimized out even if the index is + * marked as unique + * + * @retval 0 success + * @retval -1 build failed */ int (*build_index)(struct space *src_space, struct index *new_index, - struct tuple_format *new_format); + struct tuple_format *new_format, bool check_unique); /** * Exchange two index objects in two spaces. Used * to update a space with a newly built index, while @@ -195,6 +202,15 @@ struct space { * of index id. */ struct index **index; + /** + * If bit i is set, the unique constraint of index i must + * be checked before inserting a tuple into this space. + * Note, it isn't quite the same as index_opts::is_unique, + * as we don't need to check the unique constraint of + * a unique index in case the uniqueness of the indexed + * fields is guaranteed by another unique index. + */ + void *unique_index_bitmap; /** * List of check constraints linked with * ck_constraint::link. @@ -265,6 +281,17 @@ space_index(struct space *space, uint32_t id) return NULL; } +/** + * Return true if the unique constraint must be checked for + * the index with the given id before inserting a tuple into + * the space. + */ +static inline bool +space_needs_check_unique(struct space *space, uint32_t index_id) +{ + return bit_test(space->unique_index_bitmap, index_id); +} + /** * Return key_def of the index identified by id or NULL * if there is no such index. @@ -403,9 +430,10 @@ space_drop_primary_key(struct space *space) static inline int space_build_index(struct space *src_space, struct index *new_index, - struct tuple_format *new_format) + struct tuple_format *new_format, bool check_unique) { - return src_space->vtab->build_index(src_space, new_index, new_format); + return src_space->vtab->build_index(src_space, new_index, + new_format, check_unique); } static inline void @@ -490,7 +518,7 @@ int generic_space_add_primary_key(struct space *space); void generic_space_drop_primary_key(struct space *space); int generic_space_check_format(struct space *, struct tuple_format *); int generic_space_build_index(struct space *, struct index *, - struct tuple_format *); + struct tuple_format *, bool); int generic_space_prepare_alter(struct space *, struct space *); void generic_space_invalidate(struct space *); @@ -588,9 +616,10 @@ space_check_format_xc(struct space *space, struct tuple_format *format) static inline void space_build_index_xc(struct space *src_space, struct index *new_index, - struct tuple_format *new_format) + struct tuple_format *new_format, bool check_unique) { - if (space_build_index(src_space, new_index, new_format) != 0) + if (space_build_index(src_space, new_index, + new_format, check_unique) != 0) diag_raise(); } diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 4ac54138..d2a02a21 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -647,27 +647,6 @@ vinyl_engine_create_space(struct engine *engine, struct space_def *def, /* Format is now referenced by the space. */ tuple_format_unref(format); - - /* - * Check if there are unique indexes that are contained - * by other unique indexes. For them, we can skip check - * for duplicates on INSERT. Prefer indexes with higher - * ids for uniqueness check optimization as they are - * likelier to have a "colder" cache. - */ - for (int i = space->index_count - 1; i >= 0; i--) { - struct vy_lsm *lsm = vy_lsm(space->index[i]); - if (!lsm->check_is_unique) - continue; - for (int j = 0; j < (int)space->index_count; j++) { - struct vy_lsm *other = vy_lsm(space->index[j]); - if (other != lsm && other->check_is_unique && - key_def_contains(lsm->key_def, other->key_def)) { - lsm->check_is_unique = false; - break; - } - } - } return space; } @@ -1206,7 +1185,6 @@ vinyl_space_swap_index(struct space *old_space, struct space *new_space, old_index_id, new_index_id); SWAP(old_lsm, new_lsm); - SWAP(old_lsm->check_is_unique, new_lsm->check_is_unique); SWAP(old_lsm->mem_format, new_lsm->mem_format); SWAP(old_lsm->disk_format, new_lsm->disk_format); @@ -1564,10 +1542,9 @@ vy_check_is_unique_primary(struct vy_tx *tx, const struct vy_read_view **rv, struct vy_lsm *lsm, struct tuple *stmt) { assert(lsm->index_id == 0); + assert(lsm->opts.is_unique); assert(vy_stmt_type(stmt) == IPROTO_INSERT); - if (!lsm->check_is_unique) - return 0; struct tuple *found; if (vy_get(lsm, tx, rv, stmt, &found)) return -1; @@ -1610,7 +1587,7 @@ vy_check_is_unique_secondary_one(struct vy_tx *tx, const struct vy_read_view **r * without modifying the secondary key and so there's * actually no conflict. For INSERT this can only happen * if we optimized out the primary index uniqueness check - * (see vy_lsm::check_is_unique), in which case we must + * (see space_needs_check_unique()), in which case we must * fail here. */ if (found != NULL && vy_stmt_type(stmt) == IPROTO_REPLACE && @@ -1646,8 +1623,7 @@ vy_check_is_unique_secondary(struct vy_tx *tx, const struct vy_read_view **rv, const char *space_name, const char *index_name, struct vy_lsm *lsm, struct tuple *stmt) { - if (!lsm->check_is_unique) - return 0; + assert(lsm->opts.is_unique); if (!lsm->cmp_def->is_multikey) { return vy_check_is_unique_secondary_one(tx, rv, space_name, index_name, lsm, stmt, @@ -1699,7 +1675,8 @@ vy_check_is_unique(struct vy_env *env, struct vy_tx *tx, * if this is INSERT, because REPLACE will silently overwrite * the existing tuple, if any. */ - if (vy_stmt_type(stmt) == IPROTO_INSERT) { + if (space_needs_check_unique(space, 0) && + vy_stmt_type(stmt) == IPROTO_INSERT) { struct vy_lsm *lsm = vy_lsm(space->index[0]); if (vy_check_is_unique_primary(tx, rv, space_name(space), index_name_by_id(space, 0), @@ -1713,6 +1690,8 @@ vy_check_is_unique(struct vy_env *env, struct vy_tx *tx, */ for (uint32_t i = 1; i < space->index_count; i++) { struct vy_lsm *lsm = vy_lsm(space->index[i]); + if (!space_needs_check_unique(space, lsm->index_id)) + continue; if (key_update_can_be_skipped(lsm->key_def->column_mask, column_mask)) continue; @@ -4028,6 +4007,11 @@ struct vy_build_ctx { struct vy_lsm *lsm; /** Format to check new tuples against. */ struct tuple_format *format; + /** + * Check the unique constraint of the new index + * if this flag is set. + */ + bool check_unique; /** * Names of the altered space and the new index. * Used for error reporting. @@ -4063,7 +4047,7 @@ vy_build_on_replace(struct trigger *trigger, void *event) goto err; /* Check key uniqueness if necessary. */ - if (stmt->new_tuple != NULL && + if (ctx->check_unique && stmt->new_tuple != NULL && vy_check_is_unique_secondary(tx, vy_tx_read_view(tx), ctx->space_name, ctx->index_name, lsm, stmt->new_tuple) != 0) @@ -4138,7 +4122,8 @@ vy_build_insert_stmt(struct vy_lsm *lsm, struct vy_mem *mem, static int vy_build_insert_tuple(struct vy_env *env, struct vy_lsm *lsm, const char *space_name, const char *index_name, - struct tuple_format *new_format, struct tuple *tuple) + struct tuple_format *new_format, bool check_unique, + struct tuple *tuple) { int rc; struct vy_mem *mem = lsm->mem; @@ -4175,13 +4160,16 @@ vy_build_insert_tuple(struct vy_env *env, struct vy_lsm *lsm, * uniqueness check helper is sensitive to the statement * type, we must not use the original tuple for the check. */ - vy_mem_pin(mem); - rc = vy_check_is_unique_secondary(NULL, &env->xm->p_committed_read_view, - space_name, index_name, lsm, stmt); - vy_mem_unpin(mem); - if (rc != 0) { - tuple_unref(stmt); - return -1; + if (check_unique) { + vy_mem_pin(mem); + rc = vy_check_is_unique_secondary(NULL, + &env->xm->p_committed_read_view, + space_name, index_name, lsm, stmt); + vy_mem_unpin(mem); + if (rc != 0) { + tuple_unref(stmt); + return -1; + } } /* Insert the new tuple into the in-memory index. */ @@ -4329,7 +4317,7 @@ vy_build_recover(struct vy_env *env, struct vy_lsm *lsm, struct vy_lsm *pk) static int vinyl_space_build_index(struct space *src_space, struct index *new_index, - struct tuple_format *new_format) + struct tuple_format *new_format, bool check_unique) { struct vy_env *env = vy_env(src_space->engine); struct vy_lsm *pk = vy_lsm(src_space->index[0]); @@ -4398,6 +4386,7 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index, struct vy_build_ctx ctx; ctx.lsm = new_lsm; ctx.format = new_format; + ctx.check_unique = check_unique; ctx.space_name = space_name(src_space); ctx.index_name = new_index->def->name; ctx.is_failed = false; @@ -4441,7 +4430,8 @@ vinyl_space_build_index(struct space *src_space, struct index *new_index, rc = vy_build_insert_tuple(env, new_lsm, space_name(src_space), new_index->def->name, - new_format, tuple); + new_format, check_unique, + tuple); if (rc != 0) break; } diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c index 48590521..c67db87a 100644 --- a/src/box/vy_lsm.c +++ b/src/box/vy_lsm.c @@ -207,7 +207,6 @@ vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_cache_env *cache_env, lsm->index_id = index_def->iid; lsm->group_id = group_id; lsm->opts = index_def->opts; - lsm->check_is_unique = lsm->opts.is_unique; vy_lsm_read_set_new(&lsm->read_set); lsm_env->lsm_count++; diff --git a/src/box/vy_lsm.h b/src/box/vy_lsm.h index 0a8e9100..d9e4b582 100644 --- a/src/box/vy_lsm.h +++ b/src/box/vy_lsm.h @@ -206,14 +206,6 @@ struct vy_lsm { * to a primary index. */ struct key_def *pk_in_cmp_def; - /** - * If the following flag is set, the index this LSM tree - * is created for is unique and it must be checked for - * duplicates on INSERT. Otherwise, the check can be skipped, - * either because the index is not unique or it is a part - * of another unique index. - */ - bool check_is_unique; /** * Tuple format for tuples of this LSM tree created when * reading pages from disk. -- 2.20.1