[PATCH] vinyl: skip needless checks for duplicates on INSERT

Sat Feb 17 20:58:15 MSK 2018

If a unique index includes all parts of another unique index, we can
skip the check for duplicates for it on INSERT. Let's mark all such
indexes with a special flag on CREATE/ALTER and optimize out the check
if the flag is set. If there are two indexes that index the same set
of fields, check uniqueness for the one with a lower id, because it
is likelier to have have a "warmer" cache.

Closes #3154
---
https://github.com/tarantool/tarantool/tree/gh-3154-vy-skip-needless-unique-check

 src/box/key_def.cc       | 12 +++++++++
 src/box/key_def.h        |  9 +++++++
 src/box/vinyl.c          | 28 +++++++++++++++++++--
 src/box/vy_index.c       |  1 +
 src/box/vy_index.h       |  7 ++++++
 test/vinyl/misc.result   | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/misc.test.lua | 25 +++++++++++++++++++
 7 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/src/box/key_def.cc b/src/box/key_def.cc
index f7f6c753..b4da3aed 100644
--- a/src/box/key_def.cc
+++ b/src/box/key_def.cc
@@ -481,6 +481,18 @@ key_def_find(const struct key_def *key_def, uint32_t fieldno)
 	return NULL;
 }
 
+bool
+key_def_contains(const struct key_def *first, const struct key_def *second)
+{
+	const struct key_part *part = second->parts;
+	const struct key_part *end = part + second->part_count;
+	for (; part != end; part++) {
+		if (key_def_find(first, part->fieldno) == NULL)
+			return false;
+	}
+	return true;
+}
+
 struct key_def *
 key_def_merge(const struct key_def *first, const struct key_def *second)
 {
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 201863f5..5982f5a4 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -261,6 +261,15 @@ const struct key_part *
 key_def_find(const struct key_def *key_def, uint32_t fieldno);
 
 /**
+ * Check if key definition @a first contains all parts of
+ * key definition @a second.
+ * @retval true if @a first is a superset of @a second
+ * @retval false otherwise
+ */
+bool
+key_def_contains(const struct key_def *first, const struct key_def *second);
+
+/**
  * Allocate a new key_def with a set union of key parts from
  * first and second key defs. Parts of the new key_def consist
  * of the first key_def's parts and those parts of the second
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 03ad2bf4..a433b859 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -1110,6 +1110,7 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
 
 	/* Set possibly changed opts. */
 	pk->opts = new_index_def->opts;
+	pk->need_check_unique = true;
 
 	/* Set new formats. */
 	tuple_format_unref(pk->disk_format);
@@ -1133,6 +1134,7 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
 		index->pk = pk;
 		new_index_def = space_index_def(new_space, i);
 		index->opts = new_index_def->opts;
+		index->need_check_unique = index->opts.is_unique;
 		tuple_format_unref(index->mem_format_with_colmask);
 		tuple_format_unref(index->mem_format);
 		tuple_format_unref(index->upsert_format);
@@ -1145,6 +1147,27 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
 		tuple_format_ref(index->upsert_format);
 		vy_index_validate_formats(index);
 	}
+
+	/*
+	 * Check if there are unique indexes that are contained
+	 * by other unique indexes. For them, we can skip check
+	 * for duplicates on INSERT. Prefer indexes with higher
+	 * ids for uniqueness check optimization as they are
+	 * likelier to have a "colder" cache.
+	 */
+	for (int i = new_space->index_count - 1; i >= 0; i--) {
+		struct vy_index *index = vy_index(new_space->index[i]);
+		if (!index->need_check_unique)
+			continue;
+		for (int j = 0; j < (int)new_space->index_count; j++) {
+			struct vy_index *other = vy_index(new_space->index[j]);
+			if (other != index && other->need_check_unique &&
+			    key_def_contains(index->key_def, other->key_def)) {
+				index->need_check_unique = false;
+				break;
+			}
+		}
+	}
 	return;
 fail:
 	/* FIXME: space_vtab::commit_alter() must not fail. */
@@ -1382,7 +1405,8 @@ vy_insert_primary(struct vy_env *env, struct vy_tx *tx, struct space *space,
 	 * A primary index is always unique and the new tuple must not
 	 * conflict with existing tuples.
 	 */
-	if (vy_check_dup_key(env, tx, space, pk, stmt) != 0)
+	if (pk->need_check_unique &&
+	    vy_check_dup_key(env, tx, space, pk, stmt) != 0)
 		return -1;
 	return vy_tx_set(tx, pk, stmt);
 }
@@ -1411,7 +1435,7 @@ vy_insert_secondary(struct vy_env *env, struct vy_tx *tx, struct space *space,
 	 * conflict with existing tuples. If the index is not
 	 * unique a conflict is impossible.
 	 */
-	if (index->opts.is_unique &&
+	if (index->need_check_unique &&
 	    !key_update_can_be_skipped(index->key_def->column_mask,
 				       vy_stmt_column_mask(stmt)) &&
 	    (!index->key_def->is_nullable ||
diff --git a/src/box/vy_index.c b/src/box/vy_index.c
index 0596c4ed..d4b7d52d 100644
--- a/src/box/vy_index.c
+++ b/src/box/vy_index.c
@@ -239,6 +239,7 @@ vy_index_new(struct vy_index_env *index_env, struct vy_cache_env *cache_env,
 	index->space_id = index_def->space_id;
 	index->id = index_def->iid;
 	index->opts = index_def->opts;
+	index->need_check_unique = index->opts.is_unique;
 	vy_index_read_set_new(&index->read_set);
 
 	index_env->index_count++;
diff --git a/src/box/vy_index.h b/src/box/vy_index.h
index 47fa335f..06898126 100644
--- a/src/box/vy_index.h
+++ b/src/box/vy_index.h
@@ -159,6 +159,13 @@ struct vy_index {
 	/** Key definition passed by the user. */
 	struct key_def *key_def;
 	/**
+	 * If the following flag is set the index is unique and
+	 * it must be checked for duplicates on INSERT. Otherwise,
+	 * the check can be skipped, either because this index
+	 * is not unique or it is a part of another unique index.
+	 */
+	bool need_check_unique;
+	/**
 	 * Tuple format for tuples of this index created when
 	 * reading pages from disk.
 	 * Is distinct from mem_format only for secondary keys,
diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result
index a8db51e5..8c4bbbb4 100644
--- a/test/vinyl/misc.result
+++ b/test/vinyl/misc.result
@@ -112,3 +112,66 @@ tx2.rollback - tx1.rollback -- 1
 s:drop()
 ---
 ...
+--
+-- gh-3158: check of duplicates is skipped if the index
+-- is contained by another unique index which is checked.
+--
+s = box.schema.create_space('test', {engine = 'vinyl'})
+---
+...
+i1 = s:create_index('i1', {unique = true, parts = {1, 'unsigned', 2, 'unsigned'}})
+---
+...
+i2 = s:create_index('i2', {unique = true, parts = {2, 'unsigned', 1, 'unsigned'}})
+---
+...
+i3 = s:create_index('i3', {unique = true, parts = {3, 'unsigned', 4, 'unsigned', 5, 'unsigned'}})
+---
+...
+i4 = s:create_index('i4', {unique = true, parts = {5, 'unsigned', 4, 'unsigned'}})
+---
+...
+i5 = s:create_index('i5', {unique = true, parts = {4, 'unsigned', 5, 'unsigned', 1, 'unsigned'}})
+---
+...
+i6 = s:create_index('i6', {unique = true, parts = {4, 'unsigned', 6, 'unsigned', 5, 'unsigned'}})
+---
+...
+i7 = s:create_index('i7', {unique = true, parts = {6, 'unsigned'}})
+---
+...
+s:insert{1, 1, 1, 1, 1, 1}
+---
+- [1, 1, 1, 1, 1, 1]
+...
+i1:info().lookup -- 1
+---
+- 1
+...
+i2:info().lookup -- 0
+---
+- 0
+...
+i3:info().lookup -- 0
+---
+- 0
+...
+i4:info().lookup -- 1
+---
+- 1
+...
+i5:info().lookup -- 0
+---
+- 0
+...
+i6:info().lookup -- 0
+---
+- 0
+...
+i7:info().lookup -- 1
+---
+- 1
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/misc.test.lua b/test/vinyl/misc.test.lua
index 8b76a66b..93dfb4bf 100644
--- a/test/vinyl/misc.test.lua
+++ b/test/vinyl/misc.test.lua
@@ -48,3 +48,28 @@ tx2 = box.info.vinyl().tx
 tx2.commit - tx1.commit -- 0
 tx2.rollback - tx1.rollback -- 1
 s:drop()
+
+--
+-- gh-3158: check of duplicates is skipped if the index
+-- is contained by another unique index which is checked.
+--
+s = box.schema.create_space('test', {engine = 'vinyl'})
+i1 = s:create_index('i1', {unique = true, parts = {1, 'unsigned', 2, 'unsigned'}})
+i2 = s:create_index('i2', {unique = true, parts = {2, 'unsigned', 1, 'unsigned'}})
+i3 = s:create_index('i3', {unique = true, parts = {3, 'unsigned', 4, 'unsigned', 5, 'unsigned'}})
+i4 = s:create_index('i4', {unique = true, parts = {5, 'unsigned', 4, 'unsigned'}})
+i5 = s:create_index('i5', {unique = true, parts = {4, 'unsigned', 5, 'unsigned', 1, 'unsigned'}})
+i6 = s:create_index('i6', {unique = true, parts = {4, 'unsigned', 6, 'unsigned', 5, 'unsigned'}})
+i7 = s:create_index('i7', {unique = true, parts = {6, 'unsigned'}})
+
+s:insert{1, 1, 1, 1, 1, 1}
+
+i1:info().lookup -- 1
+i2:info().lookup -- 0
+i3:info().lookup -- 0
+i4:info().lookup -- 1
+i5:info().lookup -- 0
+i6:info().lookup -- 0
+i7:info().lookup -- 1
+
+s:drop()
-- 
2.11.0