[PATCH] vinyl: skip needless checks for duplicates on INSERT
Vladimir Davydov
vdavydov.dev at gmail.com
Sat Feb 17 20:58:15 MSK 2018
If a unique index includes all parts of another unique index, we can
skip the check for duplicates for it on INSERT. Let's mark all such
indexes with a special flag on CREATE/ALTER and optimize out the check
if the flag is set. If there are two indexes that index the same set
of fields, check uniqueness for the one with a lower id, because it
is likelier to have have a "warmer" cache.
Closes #3154
---
https://github.com/tarantool/tarantool/tree/gh-3154-vy-skip-needless-unique-check
src/box/key_def.cc | 12 +++++++++
src/box/key_def.h | 9 +++++++
src/box/vinyl.c | 28 +++++++++++++++++++--
src/box/vy_index.c | 1 +
src/box/vy_index.h | 7 ++++++
test/vinyl/misc.result | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
test/vinyl/misc.test.lua | 25 +++++++++++++++++++
7 files changed, 143 insertions(+), 2 deletions(-)
diff --git a/src/box/key_def.cc b/src/box/key_def.cc
index f7f6c753..b4da3aed 100644
--- a/src/box/key_def.cc
+++ b/src/box/key_def.cc
@@ -481,6 +481,18 @@ key_def_find(const struct key_def *key_def, uint32_t fieldno)
return NULL;
}
+bool
+key_def_contains(const struct key_def *first, const struct key_def *second)
+{
+ const struct key_part *part = second->parts;
+ const struct key_part *end = part + second->part_count;
+ for (; part != end; part++) {
+ if (key_def_find(first, part->fieldno) == NULL)
+ return false;
+ }
+ return true;
+}
+
struct key_def *
key_def_merge(const struct key_def *first, const struct key_def *second)
{
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 201863f5..5982f5a4 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -261,6 +261,15 @@ const struct key_part *
key_def_find(const struct key_def *key_def, uint32_t fieldno);
/**
+ * Check if key definition @a first contains all parts of
+ * key definition @a second.
+ * @retval true if @a first is a superset of @a second
+ * @retval false otherwise
+ */
+bool
+key_def_contains(const struct key_def *first, const struct key_def *second);
+
+/**
* Allocate a new key_def with a set union of key parts from
* first and second key defs. Parts of the new key_def consist
* of the first key_def's parts and those parts of the second
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 03ad2bf4..a433b859 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -1110,6 +1110,7 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
/* Set possibly changed opts. */
pk->opts = new_index_def->opts;
+ pk->need_check_unique = true;
/* Set new formats. */
tuple_format_unref(pk->disk_format);
@@ -1133,6 +1134,7 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
index->pk = pk;
new_index_def = space_index_def(new_space, i);
index->opts = new_index_def->opts;
+ index->need_check_unique = index->opts.is_unique;
tuple_format_unref(index->mem_format_with_colmask);
tuple_format_unref(index->mem_format);
tuple_format_unref(index->upsert_format);
@@ -1145,6 +1147,27 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
tuple_format_ref(index->upsert_format);
vy_index_validate_formats(index);
}
+
+ /*
+ * Check if there are unique indexes that are contained
+ * by other unique indexes. For them, we can skip check
+ * for duplicates on INSERT. Prefer indexes with higher
+ * ids for uniqueness check optimization as they are
+ * likelier to have a "colder" cache.
+ */
+ for (int i = new_space->index_count - 1; i >= 0; i--) {
+ struct vy_index *index = vy_index(new_space->index[i]);
+ if (!index->need_check_unique)
+ continue;
+ for (int j = 0; j < (int)new_space->index_count; j++) {
+ struct vy_index *other = vy_index(new_space->index[j]);
+ if (other != index && other->need_check_unique &&
+ key_def_contains(index->key_def, other->key_def)) {
+ index->need_check_unique = false;
+ break;
+ }
+ }
+ }
return;
fail:
/* FIXME: space_vtab::commit_alter() must not fail. */
@@ -1382,7 +1405,8 @@ vy_insert_primary(struct vy_env *env, struct vy_tx *tx, struct space *space,
* A primary index is always unique and the new tuple must not
* conflict with existing tuples.
*/
- if (vy_check_dup_key(env, tx, space, pk, stmt) != 0)
+ if (pk->need_check_unique &&
+ vy_check_dup_key(env, tx, space, pk, stmt) != 0)
return -1;
return vy_tx_set(tx, pk, stmt);
}
@@ -1411,7 +1435,7 @@ vy_insert_secondary(struct vy_env *env, struct vy_tx *tx, struct space *space,
* conflict with existing tuples. If the index is not
* unique a conflict is impossible.
*/
- if (index->opts.is_unique &&
+ if (index->need_check_unique &&
!key_update_can_be_skipped(index->key_def->column_mask,
vy_stmt_column_mask(stmt)) &&
(!index->key_def->is_nullable ||
diff --git a/src/box/vy_index.c b/src/box/vy_index.c
index 0596c4ed..d4b7d52d 100644
--- a/src/box/vy_index.c
+++ b/src/box/vy_index.c
@@ -239,6 +239,7 @@ vy_index_new(struct vy_index_env *index_env, struct vy_cache_env *cache_env,
index->space_id = index_def->space_id;
index->id = index_def->iid;
index->opts = index_def->opts;
+ index->need_check_unique = index->opts.is_unique;
vy_index_read_set_new(&index->read_set);
index_env->index_count++;
diff --git a/src/box/vy_index.h b/src/box/vy_index.h
index 47fa335f..06898126 100644
--- a/src/box/vy_index.h
+++ b/src/box/vy_index.h
@@ -159,6 +159,13 @@ struct vy_index {
/** Key definition passed by the user. */
struct key_def *key_def;
/**
+ * If the following flag is set the index is unique and
+ * it must be checked for duplicates on INSERT. Otherwise,
+ * the check can be skipped, either because this index
+ * is not unique or it is a part of another unique index.
+ */
+ bool need_check_unique;
+ /**
* Tuple format for tuples of this index created when
* reading pages from disk.
* Is distinct from mem_format only for secondary keys,
diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result
index a8db51e5..8c4bbbb4 100644
--- a/test/vinyl/misc.result
+++ b/test/vinyl/misc.result
@@ -112,3 +112,66 @@ tx2.rollback - tx1.rollback -- 1
s:drop()
---
...
+--
+-- gh-3158: check of duplicates is skipped if the index
+-- is contained by another unique index which is checked.
+--
+s = box.schema.create_space('test', {engine = 'vinyl'})
+---
+...
+i1 = s:create_index('i1', {unique = true, parts = {1, 'unsigned', 2, 'unsigned'}})
+---
+...
+i2 = s:create_index('i2', {unique = true, parts = {2, 'unsigned', 1, 'unsigned'}})
+---
+...
+i3 = s:create_index('i3', {unique = true, parts = {3, 'unsigned', 4, 'unsigned', 5, 'unsigned'}})
+---
+...
+i4 = s:create_index('i4', {unique = true, parts = {5, 'unsigned', 4, 'unsigned'}})
+---
+...
+i5 = s:create_index('i5', {unique = true, parts = {4, 'unsigned', 5, 'unsigned', 1, 'unsigned'}})
+---
+...
+i6 = s:create_index('i6', {unique = true, parts = {4, 'unsigned', 6, 'unsigned', 5, 'unsigned'}})
+---
+...
+i7 = s:create_index('i7', {unique = true, parts = {6, 'unsigned'}})
+---
+...
+s:insert{1, 1, 1, 1, 1, 1}
+---
+- [1, 1, 1, 1, 1, 1]
+...
+i1:info().lookup -- 1
+---
+- 1
+...
+i2:info().lookup -- 0
+---
+- 0
+...
+i3:info().lookup -- 0
+---
+- 0
+...
+i4:info().lookup -- 1
+---
+- 1
+...
+i5:info().lookup -- 0
+---
+- 0
+...
+i6:info().lookup -- 0
+---
+- 0
+...
+i7:info().lookup -- 1
+---
+- 1
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/misc.test.lua b/test/vinyl/misc.test.lua
index 8b76a66b..93dfb4bf 100644
--- a/test/vinyl/misc.test.lua
+++ b/test/vinyl/misc.test.lua
@@ -48,3 +48,28 @@ tx2 = box.info.vinyl().tx
tx2.commit - tx1.commit -- 0
tx2.rollback - tx1.rollback -- 1
s:drop()
+
+--
+-- gh-3158: check of duplicates is skipped if the index
+-- is contained by another unique index which is checked.
+--
+s = box.schema.create_space('test', {engine = 'vinyl'})
+i1 = s:create_index('i1', {unique = true, parts = {1, 'unsigned', 2, 'unsigned'}})
+i2 = s:create_index('i2', {unique = true, parts = {2, 'unsigned', 1, 'unsigned'}})
+i3 = s:create_index('i3', {unique = true, parts = {3, 'unsigned', 4, 'unsigned', 5, 'unsigned'}})
+i4 = s:create_index('i4', {unique = true, parts = {5, 'unsigned', 4, 'unsigned'}})
+i5 = s:create_index('i5', {unique = true, parts = {4, 'unsigned', 5, 'unsigned', 1, 'unsigned'}})
+i6 = s:create_index('i6', {unique = true, parts = {4, 'unsigned', 6, 'unsigned', 5, 'unsigned'}})
+i7 = s:create_index('i7', {unique = true, parts = {6, 'unsigned'}})
+
+s:insert{1, 1, 1, 1, 1, 1}
+
+i1:info().lookup -- 1
+i2:info().lookup -- 0
+i3:info().lookup -- 0
+i4:info().lookup -- 1
+i5:info().lookup -- 0
+i6:info().lookup -- 0
+i7:info().lookup -- 1
+
+s:drop()
--
2.11.0
More information about the Tarantool-patches
mailing list