[tarantool-patches] [PATCH v2] sql: Duplicate key error for a non-unique index

Tarantool development patches archive
 help / color / mirror / Atom feed

* [tarantool-patches] [PATCH v2] sql: Duplicate key error for a non-unique index
@ 2019-02-20 10:06 Stanislav Zudin
  2019-02-26 13:26 ` Vladimir Davydov
  0 siblings, 1 reply; 6+ messages in thread
From: Stanislav Zudin @ 2019-02-20 10:06 UTC (permalink / raw)
  To: tarantool-patches, kostja; +Cc: Stanislav Zudin

Adds collation analysis into creating of a composite key for
index tuples.
The keys of secondary index consist of parts defined for index itself
combined with parts defined for primary key.
The duplicate parts are ignored. But the search of duplicates didn't
take the collation into consideration.
If non-unique secondary index contained primary key columns their
parts from the primary key were omitted. This fact caused an issue.

Closes #3537
---
Branch: https://github.com/tarantool/tarantool/tree/stanztt/gh-3537-nonunique-index-dup-key-error
Issue: https://github.com/tarantool/tarantool/issues/3537

 src/box/key_def.c           |  51 +++++-
 src/coll.c                  |   1 +
 src/coll.h                  |   2 +
 test/sql/collation.result   | 330 ++++++++++++++++++++++++++++++++++++
 test/sql/collation.test.lua | 123 ++++++++++++++
 5 files changed, 500 insertions(+), 7 deletions(-)

diff --git a/src/box/key_def.c b/src/box/key_def.c
index 9411ade39..9814d6df0 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -37,6 +37,7 @@
 #include "schema_def.h"
 #include "coll_id_cache.h"
 #include "small/region.h"
+#include "coll.h"
 
 const char *sort_order_strs[] = { "asc", "desc", "undef" };
 
@@ -596,12 +597,11 @@ key_def_find_by_fieldno(const struct key_def *key_def, uint32_t fieldno)
 	return key_def_find(key_def, &part);
 }
 
-const struct key_part *
-key_def_find(const struct key_def *key_def, const struct key_part *to_find)
+static const struct key_part *
+key_def_find_next(const struct key_part *part, const struct key_part *end,
+		  const struct key_part *to_find)
 {
-	const struct key_part *part = key_def->parts;
-	const struct key_part *end = part + key_def->part_count;
-	for (; part != end; part++) {
+	for(; part != end; part++) {
 		if (part->fieldno == to_find->fieldno &&
 		    json_path_cmp(part->path, part->path_len,
 				  to_find->path, to_find->path_len,
@@ -611,6 +611,43 @@ key_def_find(const struct key_def *key_def, const struct key_part *to_find)
 	return NULL;
 }
 
+static bool
+key_def_need_merge(const struct key_def *sec_key_def,
+		   const struct key_part *pk_key_part)
+{
+	const struct key_part* end = sec_key_def->parts +
+				     sec_key_def->part_count;
+	const struct key_part* part = key_def_find_next(sec_key_def->parts,
+							end,
+							pk_key_part);
+	if (part == NULL)
+		return true;
+
+	/* The duplicate key_part is found,
+	 * compare collation */
+	if (part->coll == pk_key_part->coll)
+		return false;
+
+	if (part->coll == NULL ||
+	    part->coll->strength == COLL_ICU_STRENGTH_DEFAULT) {
+		return false;
+		/* If collation of the sec. key part
+		 * is binary then the sec. key
+		 * doesn't require a composite key.
+		 * */
+	} else
+		return true;
+}
+
+const struct key_part *
+key_def_find(const struct key_def *key_def, const struct key_part *to_find)
+{
+	/* find the first match */
+	return key_def_find_next(key_def->parts,
+				 key_def->parts + key_def->part_count,
+				 to_find);
+}
+
 bool
 key_def_contains(const struct key_def *first, const struct key_def *second)
 {
@@ -639,7 +676,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
 	part = second->parts;
 	end = part + second->part_count;
 	for (; part != end; part++) {
-		if (key_def_find(first, part) != NULL)
+		if (!key_def_need_merge(first, part))
 			--new_part_count;
 		else
 			sz += part->path_len;
@@ -677,7 +714,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
 	part = second->parts;
 	end = part + second->part_count;
 	for (; part != end; part++) {
-		if (key_def_find(first, part) != NULL)
+		if (!key_def_need_merge(first, part))
 			continue;
 		key_def_set_part(new_def, pos++, part->fieldno, part->type,
 				 part->nullable_action, part->coll,
diff --git a/src/coll.c b/src/coll.c
index 6d9c44dbf..8f9dcbac2 100644
--- a/src/coll.c
+++ b/src/coll.c
@@ -320,6 +320,7 @@ coll_new(const struct coll_def *def)
 	}
 	memcpy((char *) coll->fingerprint, fingerprint, fingerprint_len + 1);
 	coll->refs = 1;
+	coll->strength = def->icu.strength;
 	coll->type = def->type;
 	switch (coll->type) {
 	case COLL_TYPE_ICU:
diff --git a/src/coll.h b/src/coll.h
index 9c725712a..2412f8032 100644
--- a/src/coll.h
+++ b/src/coll.h
@@ -56,6 +56,8 @@ struct UCollator;
 struct coll {
 	/** Collation type. */
 	enum coll_type type;
+	/** Strength ICU settings */
+	enum coll_icu_strength strength;
 	/** ICU collation specific data. */
 	struct UCollator *collator;
 	/** String comparator. */
diff --git a/test/sql/collation.result b/test/sql/collation.result
index 5721ef854..6cccbc84b 100644
--- a/test/sql/collation.result
+++ b/test/sql/collation.result
@@ -325,3 +325,333 @@ box.sql.execute("DROP TABLE t1;")
 box.sql.execute("DROP TABLE t0;")
 ---
 ...
+-- gh-3537 Duplicate key error for an index that is not unique
+-- pk - default, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3 (s1 CHAR(5) PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3 ON t3 (s1 collate "unicode_ci");')
+---
+...
+box.sql.execute("INSERT INTO t3 VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3 VALUES ('A');")
+---
+...
+box.sql.execute("SELECT * FROM t3;")
+---
+- - ['A']
+  - ['a']
+...
+box.sql.execute("DROP TABLE t3;")
+---
+...
+-- pk - binary, sc - unicode
+box.sql.execute('CREATE TABLE t3b (s1 CHAR(5) collate "binary" PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3b ON t3b (s1 collate "unicode");')
+---
+...
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3b VALUES ('A');")
+---
+...
+box.sql.execute("SELECT * FROM t3b;")
+---
+- - ['A']
+  - ['a']
+...
+box.sql.execute("DROP TABLE t3b;")
+---
+...
+-- pk - binary, sc - unicode (make dup)
+box.sql.execute('CREATE TABLE t3b (s1 CHAR(5) collate "binary" PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3b ON t3b (s1 collate "unicode");')
+---
+...
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3b VALUES ('A');")
+---
+...
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3B_1' in space 'T3B'
+...
+box.sql.execute("SELECT * FROM t3b;")
+---
+- - ['A']
+  - ['a']
+...
+box.sql.execute("DROP TABLE t3b;")
+---
+...
+-- pk - unicode, sc - binary
+box.sql.execute('CREATE TABLE t3c (s1 CHAR(5) collate "unicode" PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3c ON t3c (s1 collate "binary");')
+---
+...
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3c VALUES ('A');")
+---
+...
+box.sql.execute("SELECT * FROM t3c;")
+---
+- - ['a']
+  - ['A']
+...
+box.sql.execute("DROP TABLE t3c;")
+---
+...
+-- pk - unicode, sc - binary (make dup)
+box.sql.execute('CREATE TABLE t3c (s1 CHAR(5) collate "unicode" PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3c ON t3c (s1 collate "binary");')
+---
+...
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3c VALUES ('A');")
+---
+...
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3C_1' in space 'T3C'
+...
+box.sql.execute("SELECT * FROM t3c;")
+---
+- - ['a']
+  - ['A']
+...
+box.sql.execute("DROP TABLE t3c;")
+---
+...
+-- pk - binary, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3d (s1 CHAR(5) collate "binary" PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3d ON t3d (s1 collate "unicode_ci");')
+---
+...
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3d VALUES ('A');")
+---
+...
+box.sql.execute("SELECT * FROM t3d;")
+---
+- - ['A']
+  - ['a']
+...
+box.sql.execute("DROP TABLE t3d;")
+---
+...
+-- pk - binary, sc - unicode_ci (make dup)
+box.sql.execute('CREATE TABLE t3d (s1 CHAR(5) collate "binary" PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3d ON t3d (s1 collate "unicode_ci");')
+---
+...
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3d VALUES ('A');")
+---
+...
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3D_1' in space 'T3D'
+...
+box.sql.execute("SELECT * FROM t3d;")
+---
+- - ['A']
+  - ['a']
+...
+box.sql.execute("DROP TABLE t3d;")
+---
+...
+-- pk - unicode_ci, sc - binary (should fail)
+box.sql.execute('CREATE TABLE t3e (s1 CHAR(5) collate "unicode_ci" PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3e ON t3e (s1 collate "binary");')
+---
+...
+box.sql.execute("INSERT INTO t3e VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3e VALUES ('A');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3E_1' in space 'T3E'
+...
+box.sql.execute("SELECT * FROM t3e;")
+---
+- - ['a']
+...
+box.sql.execute("DROP TABLE t3e;")
+---
+...
+-- pk - unicode, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3f (s1 CHAR(5) collate "unicode" PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3f ON t3f (s1 collate "unicode_ci");')
+---
+...
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3f VALUES ('A');")
+---
+...
+box.sql.execute("SELECT * FROM t3f;")
+---
+- - ['a']
+  - ['A']
+...
+box.sql.execute("DROP TABLE t3f;")
+---
+...
+-- pk - unicode, sc - unicode_ci (make dup)
+box.sql.execute('CREATE TABLE t3f (s1 CHAR(5) collate "unicode" PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3f ON t3f (s1 collate "unicode_ci");')
+---
+...
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3f VALUES ('A');")
+---
+...
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3F_1' in space 'T3F'
+...
+box.sql.execute("SELECT * FROM t3f;")
+---
+- - ['a']
+  - ['A']
+...
+box.sql.execute("DROP TABLE t3f;")
+---
+...
+-- pk - unicode_ci, sc - unicode (should fail)
+box.sql.execute('CREATE TABLE t3g (s1 CHAR(5) collate "unicode_ci" PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3g ON t3g (s1 collate "unicode");')
+---
+...
+box.sql.execute("INSERT INTO t3g VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3g VALUES ('A');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3G_1' in space 'T3G'
+...
+box.sql.execute("SELECT * FROM t3g;")
+---
+- - ['a']
+...
+box.sql.execute("DROP TABLE t3g;")
+---
+...
+-- pk - default, sc - multipart
+box.sql.execute('CREATE TABLE qms1 (w CHAR(5) PRIMARY KEY, n CHAR(5), q CHAR(5), s INTEGER);')
+---
+...
+box.sql.execute('CREATE INDEX iqms1 ON qms1 (w collate "unicode_ci", n);')
+---
+...
+box.sql.execute("INSERT INTO qms1 VALUES ('www', 'nnn', 'qqq', 1);")
+---
+...
+box.sql.execute("INSERT INTO qms1 VALUES ('WWW', 'nnn', 'qqq', 2);")
+---
+...
+box.sql.execute("SELECT * FROM qms1;")
+---
+- - ['WWW', 'nnn', 'qqq', 2]
+  - ['www', 'nnn', 'qqq', 1]
+...
+box.sql.execute("DROP TABLE qms1;")
+---
+...
+box.sql.execute('CREATE TABLE qms2 (w CHAR(5) PRIMARY KEY, n CHAR(5), q CHAR(5), s INTEGER);')
+---
+...
+box.sql.execute('CREATE INDEX iqms2 ON qms2 (w collate "unicode", n);')
+---
+...
+box.sql.execute("INSERT INTO qms2 VALUES ('www', 'nnn', 'qqq', 1);")
+---
+...
+box.sql.execute("INSERT INTO qms2 VALUES ('WWW', 'nnn', 'qqq', 2);")
+---
+...
+box.sql.execute("SELECT * FROM qms2;")
+---
+- - ['WWW', 'nnn', 'qqq', 2]
+  - ['www', 'nnn', 'qqq', 1]
+...
+box.sql.execute("DROP TABLE qms2;")
+---
+...
+-- pk - multipart, sc overlaps with pk
+box.sql.execute('CREATE TABLE qms3 (w CHAR(5), n CHAR(5), q CHAR(5), s INTEGER, CONSTRAINT pk_qms3 PRIMARY KEY(w, n, q));')
+---
+...
+box.sql.execute('CREATE INDEX iqms3 ON qms3 (w collate "unicode_ci", s);')
+---
+...
+box.sql.execute("INSERT INTO qms3 VALUES ('www', 'nnn', 'qqq', 1);")
+---
+...
+box.sql.execute("INSERT INTO qms3 VALUES ('WWW', 'nnn', 'qqq', 2);")
+---
+...
+box.sql.execute("SELECT * FROM qms3;")
+---
+- - ['WWW', 'nnn', 'qqq', 2]
+  - ['www', 'nnn', 'qqq', 1]
+...
+box.sql.execute("DROP TABLE qms3;")
+---
+...
+box.sql.execute('CREATE TABLE qms4 (w CHAR(5), n CHAR(5), q CHAR(5), s INTEGER, CONSTRAINT pk_qms4 PRIMARY KEY(w, n, q));')
+---
+...
+box.sql.execute('CREATE INDEX iqms4 ON qms4 (w collate "unicode", s);')
+---
+...
+box.sql.execute("INSERT INTO qms4 VALUES ('www', 'nnn', 'qqq', 1);")
+---
+...
+box.sql.execute("INSERT INTO qms4 VALUES ('WWW', 'nnn', 'qqq', 2);")
+---
+...
+box.sql.execute("SELECT * FROM qms4;")
+---
+- - ['WWW', 'nnn', 'qqq', 2]
+  - ['www', 'nnn', 'qqq', 1]
+...
+box.sql.execute("DROP TABLE qms4;")
+---
+...
diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
index 4c649a444..075ec7ee4 100644
--- a/test/sql/collation.test.lua
+++ b/test/sql/collation.test.lua
@@ -126,3 +126,126 @@ box.sql.execute("SELECT * FROM t1;")
 box.sql.execute("SELECT s1 FROM t0;")
 box.sql.execute("DROP TABLE t1;")
 box.sql.execute("DROP TABLE t0;")
+
+-- gh-3537 Duplicate key error for an index that is not unique
+-- pk - default, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3 (s1 CHAR(5) PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3 ON t3 (s1 collate "unicode_ci");')
+box.sql.execute("INSERT INTO t3 VALUES ('a');")
+box.sql.execute("INSERT INTO t3 VALUES ('A');")
+box.sql.execute("SELECT * FROM t3;")
+box.sql.execute("DROP TABLE t3;")
+
+-- pk - binary, sc - unicode
+box.sql.execute('CREATE TABLE t3b (s1 CHAR(5) collate "binary" PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3b ON t3b (s1 collate "unicode");')
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+box.sql.execute("INSERT INTO t3b VALUES ('A');")
+box.sql.execute("SELECT * FROM t3b;")
+box.sql.execute("DROP TABLE t3b;")
+
+-- pk - binary, sc - unicode (make dup)
+box.sql.execute('CREATE TABLE t3b (s1 CHAR(5) collate "binary" PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3b ON t3b (s1 collate "unicode");')
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+box.sql.execute("INSERT INTO t3b VALUES ('A');")
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+box.sql.execute("SELECT * FROM t3b;")
+box.sql.execute("DROP TABLE t3b;")
+
+-- pk - unicode, sc - binary
+box.sql.execute('CREATE TABLE t3c (s1 CHAR(5) collate "unicode" PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3c ON t3c (s1 collate "binary");')
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+box.sql.execute("INSERT INTO t3c VALUES ('A');")
+box.sql.execute("SELECT * FROM t3c;")
+box.sql.execute("DROP TABLE t3c;")
+
+-- pk - unicode, sc - binary (make dup)
+box.sql.execute('CREATE TABLE t3c (s1 CHAR(5) collate "unicode" PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3c ON t3c (s1 collate "binary");')
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+box.sql.execute("INSERT INTO t3c VALUES ('A');")
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+box.sql.execute("SELECT * FROM t3c;")
+box.sql.execute("DROP TABLE t3c;")
+
+-- pk - binary, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3d (s1 CHAR(5) collate "binary" PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3d ON t3d (s1 collate "unicode_ci");')
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+box.sql.execute("INSERT INTO t3d VALUES ('A');")
+box.sql.execute("SELECT * FROM t3d;")
+box.sql.execute("DROP TABLE t3d;")
+
+-- pk - binary, sc - unicode_ci (make dup)
+box.sql.execute('CREATE TABLE t3d (s1 CHAR(5) collate "binary" PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3d ON t3d (s1 collate "unicode_ci");')
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+box.sql.execute("INSERT INTO t3d VALUES ('A');")
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+box.sql.execute("SELECT * FROM t3d;")
+box.sql.execute("DROP TABLE t3d;")
+
+-- pk - unicode_ci, sc - binary (should fail)
+box.sql.execute('CREATE TABLE t3e (s1 CHAR(5) collate "unicode_ci" PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3e ON t3e (s1 collate "binary");')
+box.sql.execute("INSERT INTO t3e VALUES ('a');")
+box.sql.execute("INSERT INTO t3e VALUES ('A');")
+box.sql.execute("SELECT * FROM t3e;")
+box.sql.execute("DROP TABLE t3e;")
+
+-- pk - unicode, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3f (s1 CHAR(5) collate "unicode" PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3f ON t3f (s1 collate "unicode_ci");')
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+box.sql.execute("INSERT INTO t3f VALUES ('A');")
+box.sql.execute("SELECT * FROM t3f;")
+box.sql.execute("DROP TABLE t3f;")
+
+-- pk - unicode, sc - unicode_ci (make dup)
+box.sql.execute('CREATE TABLE t3f (s1 CHAR(5) collate "unicode" PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3f ON t3f (s1 collate "unicode_ci");')
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+box.sql.execute("INSERT INTO t3f VALUES ('A');")
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+box.sql.execute("SELECT * FROM t3f;")
+box.sql.execute("DROP TABLE t3f;")
+
+-- pk - unicode_ci, sc - unicode (should fail)
+box.sql.execute('CREATE TABLE t3g (s1 CHAR(5) collate "unicode_ci" PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3g ON t3g (s1 collate "unicode");')
+box.sql.execute("INSERT INTO t3g VALUES ('a');")
+box.sql.execute("INSERT INTO t3g VALUES ('A');")
+box.sql.execute("SELECT * FROM t3g;")
+box.sql.execute("DROP TABLE t3g;")
+
+-- pk - default, sc - multipart
+box.sql.execute('CREATE TABLE qms1 (w CHAR(5) PRIMARY KEY, n CHAR(5), q CHAR(5), s INTEGER);')
+box.sql.execute('CREATE INDEX iqms1 ON qms1 (w collate "unicode_ci", n);')
+box.sql.execute("INSERT INTO qms1 VALUES ('www', 'nnn', 'qqq', 1);")
+box.sql.execute("INSERT INTO qms1 VALUES ('WWW', 'nnn', 'qqq', 2);")
+box.sql.execute("SELECT * FROM qms1;")
+box.sql.execute("DROP TABLE qms1;")
+
+box.sql.execute('CREATE TABLE qms2 (w CHAR(5) PRIMARY KEY, n CHAR(5), q CHAR(5), s INTEGER);')
+box.sql.execute('CREATE INDEX iqms2 ON qms2 (w collate "unicode", n);')
+box.sql.execute("INSERT INTO qms2 VALUES ('www', 'nnn', 'qqq', 1);")
+box.sql.execute("INSERT INTO qms2 VALUES ('WWW', 'nnn', 'qqq', 2);")
+box.sql.execute("SELECT * FROM qms2;")
+box.sql.execute("DROP TABLE qms2;")
+
+-- pk - multipart, sc overlaps with pk
+box.sql.execute('CREATE TABLE qms3 (w CHAR(5), n CHAR(5), q CHAR(5), s INTEGER, CONSTRAINT pk_qms3 PRIMARY KEY(w, n, q));')
+box.sql.execute('CREATE INDEX iqms3 ON qms3 (w collate "unicode_ci", s);')
+box.sql.execute("INSERT INTO qms3 VALUES ('www', 'nnn', 'qqq', 1);")
+box.sql.execute("INSERT INTO qms3 VALUES ('WWW', 'nnn', 'qqq', 2);")
+box.sql.execute("SELECT * FROM qms3;")
+box.sql.execute("DROP TABLE qms3;")
+
+box.sql.execute('CREATE TABLE qms4 (w CHAR(5), n CHAR(5), q CHAR(5), s INTEGER, CONSTRAINT pk_qms4 PRIMARY KEY(w, n, q));')
+box.sql.execute('CREATE INDEX iqms4 ON qms4 (w collate "unicode", s);')
+box.sql.execute("INSERT INTO qms4 VALUES ('www', 'nnn', 'qqq', 1);")
+box.sql.execute("INSERT INTO qms4 VALUES ('WWW', 'nnn', 'qqq', 2);")
+box.sql.execute("SELECT * FROM qms4;")
+box.sql.execute("DROP TABLE qms4;")
-- 
2.17.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [tarantool-patches] [PATCH v2] sql: Duplicate key error for a non-unique index
  2019-02-20 10:06 [tarantool-patches] [PATCH v2] sql: Duplicate key error for a non-unique index Stanislav Zudin
@ 2019-02-26 13:26 ` Vladimir Davydov
  2019-02-28 14:17   ` [tarantool-patches] " Stanislav Zudin
  0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Davydov @ 2019-02-26 13:26 UTC (permalink / raw)
  To: Stanislav Zudin; +Cc: tarantool-patches, kostja

On Wed, Feb 20, 2019 at 01:06:10PM +0300, Stanislav Zudin wrote:
> Adds collation analysis into creating of a composite key for
> index tuples.
> The keys of secondary index consist of parts defined for index itself
> combined with parts defined for primary key.
> The duplicate parts are ignored. But the search of duplicates didn't
> take the collation into consideration.
> If non-unique secondary index contained primary key columns their
> parts from the primary key were omitted. This fact caused an issue.
> 
> Closes #3537
> ---
> Branch: https://github.com/tarantool/tarantool/tree/stanztt/gh-3537-nonunique-index-dup-key-error
> Issue: https://github.com/tarantool/tarantool/issues/3537

Whenever you resubmit a patch, please write a brief change log here.
For more details see

https://tarantool.io/en/doc/1.10/dev_guide/developer_guidelines/#how-to-submit-a-patch-for-review

> 
>  src/box/key_def.c           |  51 +++++-
>  src/coll.c                  |   1 +
>  src/coll.h                  |   2 +
>  test/sql/collation.result   | 330 ++++++++++++++++++++++++++++++++++++
>  test/sql/collation.test.lua | 123 ++++++++++++++
>  5 files changed, 500 insertions(+), 7 deletions(-)
> 
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index 9411ade39..9814d6df0 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -37,6 +37,7 @@
>  #include "schema_def.h"
>  #include "coll_id_cache.h"
>  #include "small/region.h"
> +#include "coll.h"
>  
>  const char *sort_order_strs[] = { "asc", "desc", "undef" };
>  
> @@ -596,12 +597,11 @@ key_def_find_by_fieldno(const struct key_def *key_def, uint32_t fieldno)
>  	return key_def_find(key_def, &part);
>  }
>  
> -const struct key_part *
> -key_def_find(const struct key_def *key_def, const struct key_part *to_find)
> +static const struct key_part *
> +key_def_find_next(const struct key_part *part, const struct key_part *end,
> +		  const struct key_part *to_find)
>  {
> -	const struct key_part *part = key_def->parts;
> -	const struct key_part *end = part + key_def->part_count;
> -	for (; part != end; part++) {
> +	for(; part != end; part++) {
>  		if (part->fieldno == to_find->fieldno &&
>  		    json_path_cmp(part->path, part->path_len,
>  				  to_find->path, to_find->path_len,
> @@ -611,6 +611,43 @@ key_def_find(const struct key_def *key_def, const struct key_part *to_find)
>  	return NULL;
>  }
>  
> +static bool
> +key_def_need_merge(const struct key_def *sec_key_def,
> +		   const struct key_part *pk_key_part)

Please don't use secondary/primary in argument names/comments,
because key_def_merge doesn't. This creates inconsistency leading
to confusion. Let's just assume that this function checks whether
an arbitrary key part can be merged into an arbitrary key def.

> +{
> +	const struct key_part* end = sec_key_def->parts +

Nit: we always put * closer to the variable name, not type. Please fix.

> +				     sec_key_def->part_count;
> +	const struct key_part* part = key_def_find_next(sec_key_def->parts,
> +							end,
> +							pk_key_part);
> +	if (part == NULL)
> +		return true;

The function name sounds like it should return true if the given key
part needs to be merged into the given key def, but actually it return
false in this case. Also, IMO key_def_can_merge would be a better name.

> +
> +	/* The duplicate key_part is found,
> +	 * compare collation */

Nit: we always format multline comments like

/*
 * This is a very long
 * comment.
 */

Please fix.

> +	if (part->coll == pk_key_part->coll)
> +		return false;
> +
> +	if (part->coll == NULL ||
> +	    part->coll->strength == COLL_ICU_STRENGTH_DEFAULT) {
> +		return false;
> +		/* If collation of the sec. key part

'sec' doesn't look like a common abbreviation for secondary.

> +		 * is binary then the sec. key
> +		 * doesn't require a composite key.
> +		 * */

This comment is pretty much useless, because it follows directly from
the code surrounding it. Please explain *why* you're doing it rather
than *what* you're doing.

Perhaps, it's worth encapsulating this logic in a helper function
defined in coll.c. Something like coll_is_compatible.

> +	} else
> +		return true;
> +}
> +
> +const struct key_part *
> +key_def_find(const struct key_def *key_def, const struct key_part *to_find)
> +{
> +	/* find the first match */
> +	return key_def_find_next(key_def->parts,
> +				 key_def->parts + key_def->part_count,
> +				 to_find);

Why did you factor key_def_find_next out of key_def_find? AFAIU you
could as well use key_def_find directly in key_def_need_merge, no?

> +}
> +
>  bool
>  key_def_contains(const struct key_def *first, const struct key_def *second)
>  {
> @@ -639,7 +676,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
>  	part = second->parts;
>  	end = part + second->part_count;
>  	for (; part != end; part++) {
> -		if (key_def_find(first, part) != NULL)
> +		if (!key_def_need_merge(first, part))
>  			--new_part_count;
>  		else
>  			sz += part->path_len;
> @@ -677,7 +714,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
>  	part = second->parts;
>  	end = part + second->part_count;
>  	for (; part != end; part++) {
> -		if (key_def_find(first, part) != NULL)
> +		if (!key_def_need_merge(first, part))
>  			continue;
>  		key_def_set_part(new_def, pos++, part->fieldno, part->type,
>  				 part->nullable_action, part->coll,
> diff --git a/src/coll.c b/src/coll.c
> index 6d9c44dbf..8f9dcbac2 100644
> --- a/src/coll.c
> +++ b/src/coll.c
> @@ -320,6 +320,7 @@ coll_new(const struct coll_def *def)
>  	}
>  	memcpy((char *) coll->fingerprint, fingerprint, fingerprint_len + 1);
>  	coll->refs = 1;
> +	coll->strength = def->icu.strength;
>  	coll->type = def->type;
>  	switch (coll->type) {
>  	case COLL_TYPE_ICU:
> diff --git a/src/coll.h b/src/coll.h
> index 9c725712a..2412f8032 100644
> --- a/src/coll.h
> +++ b/src/coll.h
> @@ -56,6 +56,8 @@ struct UCollator;
>  struct coll {
>  	/** Collation type. */
>  	enum coll_type type;
> +	/** Strength ICU settings */
> +	enum coll_icu_strength strength;

Do we really need it to add strengh here? Can't we extract it from the
collator?

>  	/** ICU collation specific data. */
>  	struct UCollator *collator;
>  	/** String comparator. */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2] sql: Duplicate key error for a non-unique index
  2019-02-26 13:26 ` Vladimir Davydov
@ 2019-02-28 14:17   ` Stanislav Zudin
  2019-02-28 16:19     ` Vladimir Davydov
  0 siblings, 1 reply; 6+ messages in thread
From: Stanislav Zudin @ 2019-02-28 14:17 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Vladimir Davydov

The recent patch includes the suggested changes, see the comments inline.

On 26.02.2019 16:26, Vladimir Davydov wrote:
> On Wed, Feb 20, 2019 at 01:06:10PM +0300, Stanislav Zudin wrote:
>> Adds collation analysis into creating of a composite key for
>> index tuples.
>> The keys of secondary index consist of parts defined for index itself
>> combined with parts defined for primary key.
>> The duplicate parts are ignored. But the search of duplicates didn't
>> take the collation into consideration.
>> If non-unique secondary index contained primary key columns their
>> parts from the primary key were omitted. This fact caused an issue.
>>
>> Closes #3537
>> ---
>> Branch:https://github.com/tarantool/tarantool/tree/stanztt/gh-3537-nonunique-index-dup-key-error
>> Issue:https://github.com/tarantool/tarantool/issues/3537
> Whenever you resubmit a patch, please write a brief change log here.
> For more details see
>
> https://tarantool.io/en/doc/1.10/dev_guide/developer_guidelines/#how-to-submit-a-patch-for-review
>
>>   src/box/key_def.c           |  51 +++++-
>>   src/coll.c                  |   1 +
>>   src/coll.h                  |   2 +
>>   test/sql/collation.result   | 330 ++++++++++++++++++++++++++++++++++++
>>   test/sql/collation.test.lua | 123 ++++++++++++++
>>   5 files changed, 500 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/box/key_def.c b/src/box/key_def.c
>> index 9411ade39..9814d6df0 100644
>> --- a/src/box/key_def.c
>> +++ b/src/box/key_def.c
>> @@ -37,6 +37,7 @@
>>   #include "schema_def.h"
>>   #include "coll_id_cache.h"
>>   #include "small/region.h"
>> +#include "coll.h"
>>   
>>   const char *sort_order_strs[] = { "asc", "desc", "undef" };
>>   
>> @@ -596,12 +597,11 @@ key_def_find_by_fieldno(const struct key_def *key_def, uint32_t fieldno)
>>   	return key_def_find(key_def, &part);
>>   }
>>   
>> -const struct key_part *
>> -key_def_find(const struct key_def *key_def, const struct key_part *to_find)
>> +static const struct key_part *
>> +key_def_find_next(const struct key_part *part, const struct key_part *end,
>> +		  const struct key_part *to_find)
>>   {
>> -	const struct key_part *part = key_def->parts;
>> -	const struct key_part *end = part + key_def->part_count;
>> -	for (; part != end; part++) {
>> +	for(; part != end; part++) {
>>   		if (part->fieldno == to_find->fieldno &&
>>   		    json_path_cmp(part->path, part->path_len,
>>   				  to_find->path, to_find->path_len,
>> @@ -611,6 +611,43 @@ key_def_find(const struct key_def *key_def, const struct key_part *to_find)
>>   	return NULL;
>>   }
>>   
>> +static bool
>> +key_def_need_merge(const struct key_def *sec_key_def,
>> +		   const struct key_part *pk_key_part)
> Please don't use secondary/primary in argument names/comments,
> because key_def_merge doesn't. This creates inconsistency leading
> to confusion. Let's just assume that this function checks whether
> an arbitrary key part can be merged into an arbitrary key def.
fixed.
>> +{
>> +	const struct key_part* end = sec_key_def->parts +
> Nit: we always put * closer to the variable name, not type. Please fix.
>
>> +				     sec_key_def->part_count;
>> +	const struct key_part* part = key_def_find_next(sec_key_def->parts,
>> +							end,
>> +							pk_key_part);
>> +	if (part == NULL)
>> +		return true;
> The function name sounds like it should return true if the given key
> part needs to be merged into the given key def, but actually it return
> false in this case. Also, IMO key_def_can_merge would be a better name.
ok
>> +
>> +	/* The duplicate key_part is found,
>> +	 * compare collation */
> Nit: we always format multline comments like
>
> /*
>   * This is a very long
>   * comment.
>   */
>
> Please fix.
>
>> +	if (part->coll == pk_key_part->coll)
>> +		return false;
>> +
>> +	if (part->coll == NULL ||
>> +	    part->coll->strength == COLL_ICU_STRENGTH_DEFAULT) {
>> +		return false;
>> +		/* If collation of the sec. key part
> 'sec' doesn't look like a common abbreviation for secondary.
>
>> +		 * is binary then the sec. key
>> +		 * doesn't require a composite key.
>> +		 * */
> This comment is pretty much useless, because it follows directly from
> the code surrounding it. Please explain *why* you're doing it rather
> than *what* you're doing.
i've removed this comment.
> Perhaps, it's worth encapsulating this logic in a helper function
> defined in coll.c. Something like coll_is_compatible.
This case is not a general one, so it doesn't deserve to be implemented 
as a generic helper function.
>> +	} else
>> +		return true;
>> +}
>> +
>> +const struct key_part *
>> +key_def_find(const struct key_def *key_def, const struct key_part *to_find)
>> +{
>> +	/* find the first match */
>> +	return key_def_find_next(key_def->parts,
>> +				 key_def->parts + key_def->part_count,
>> +				 to_find);
> Why did you factor key_def_find_next out of key_def_find? AFAIU you
> could as well use key_def_find directly in key_def_need_merge, no?
The current functions are result of several consecutive// improvements 
included previous review.
Fixed.
>> +}
>> +
>>   bool
>>   key_def_contains(const struct key_def *first, const struct key_def *second)
>>   {
>> @@ -639,7 +676,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
>>   	part = second->parts;
>>   	end = part + second->part_count;
>>   	for (; part != end; part++) {
>> -		if (key_def_find(first, part) != NULL)
>> +		if (!key_def_need_merge(first, part))
>>   			--new_part_count;
>>   		else
>>   			sz += part->path_len;
>> @@ -677,7 +714,7 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
>>   	part = second->parts;
>>   	end = part + second->part_count;
>>   	for (; part != end; part++) {
>> -		if (key_def_find(first, part) != NULL)
>> +		if (!key_def_need_merge(first, part))
>>   			continue;
>>   		key_def_set_part(new_def, pos++, part->fieldno, part->type,
>>   				 part->nullable_action, part->coll,
>> diff --git a/src/coll.c b/src/coll.c
>> index 6d9c44dbf..8f9dcbac2 100644
>> --- a/src/coll.c
>> +++ b/src/coll.c
>> @@ -320,6 +320,7 @@ coll_new(const struct coll_def *def)
>>   	}
>>   	memcpy((char *) coll->fingerprint, fingerprint, fingerprint_len + 1);
>>   	coll->refs = 1;
>> +	coll->strength = def->icu.strength;
>>   	coll->type = def->type;
>>   	switch (coll->type) {
>>   	case COLL_TYPE_ICU:
>> diff --git a/src/coll.h b/src/coll.h
>> index 9c725712a..2412f8032 100644
>> --- a/src/coll.h
>> +++ b/src/coll.h
>> @@ -56,6 +56,8 @@ struct UCollator;
>>   struct coll {
>>   	/** Collation type. */
>>   	enum coll_type type;
>> +	/** Strength ICU settings */
>> +	enum coll_icu_strength strength;
> Do we really need it to add strengh here? Can't we extract it from the
> collator?

implemented a function to retrieve the collation strength.

>>   	/** ICU collation specific data. */
>>   	struct UCollator *collator;
>>   	/** String comparator. */

The updated patch is below:

---
Branch: 
https://github.com/tarantool/tarantool/tree/stanztt/gh-3537-nonunique-index-dup-key-error
Issue: https://github.com/tarantool/tarantool/issues/3537

  src/CMakeLists.txt          |   3 +-
  src/box/key_def.c           |  29 +++-
  src/coll.c                  |  38 +++++
  src/coll.h                  |   4 +
  test/sql/collation.result   | 330 ++++++++++++++++++++++++++++++++++++
  test/sql/collation.test.lua | 123 ++++++++++++++
  6 files changed, 524 insertions(+), 3 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 04de5ad04..7346e7ba7 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -125,7 +125,8 @@ target_link_libraries(core
      ${LIBEIO_LIBRARIES}
      ${LIBCORO_LIBRARIES}
      ${MSGPUCK_LIBRARIES}
-    ${generic_libraries})
+    ${generic_libraries}
+    ${ICU_LIBRARIES})

  add_library(stat STATIC rmean.c latency.c histogram.c)
  target_link_libraries(stat core)
diff --git a/src/box/key_def.c b/src/box/key_def.c
index 9411ade39..61b0258b2 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -37,6 +37,7 @@
  #include "schema_def.h"
  #include "coll_id_cache.h"
  #include "small/region.h"
+#include "coll.h"

  const char *sort_order_strs[] = { "asc", "desc", "undef" };

@@ -611,6 +612,30 @@ key_def_find(const struct key_def *key_def, const 
struct key_part *to_find)
      return NULL;
  }

+static bool
+key_def_can_merge(const struct key_def *first_key_def,
+    const struct key_part *second_key_part)
+{
+    const struct key_part *part = key_def_find(first_key_def,
+                           second_key_part);
+    if (part == NULL)
+        return true;
+
+    /*
+     * The duplicate key_part is found,
+     * compare collation
+     */
+    if (part->coll == second_key_part->coll)
+        return false;
+
+    if (part->coll == NULL ||
+        /*part->coll->strength == COLL_ICU_STRENGTH_DEFAULT)*/
+        coll_get_strength(part->coll) == COLL_ICU_STRENGTH_DEFAULT)
+        return false;
+    else
+        return true;
+}
+
  bool
  key_def_contains(const struct key_def *first, const struct key_def 
*second)
  {
@@ -639,7 +664,7 @@ key_def_merge(const struct key_def *first, const 
struct key_def *second)
      part = second->parts;
      end = part + second->part_count;
      for (; part != end; part++) {
-        if (key_def_find(first, part) != NULL)
+        if (!key_def_can_merge(first, part))
              --new_part_count;
          else
              sz += part->path_len;
@@ -677,7 +702,7 @@ key_def_merge(const struct key_def *first, const 
struct key_def *second)
      part = second->parts;
      end = part + second->part_count;
      for (; part != end; part++) {
-        if (key_def_find(first, part) != NULL)
+        if (!key_def_can_merge(first, part))
              continue;
          key_def_set_part(new_def, pos++, part->fieldno, part->type,
                   part->nullable_action, part->coll,
diff --git a/src/coll.c b/src/coll.c
index 6d9c44dbf..7d7267d8a 100644
--- a/src/coll.c
+++ b/src/coll.c
@@ -295,6 +295,44 @@ coll_def_snfingerprint(char *buffer, int size, 
const struct coll_def *def)
      return total;
  }

+static enum coll_icu_strength
+cast_coll_strength(UCollationStrength s)
+{
+    enum coll_icu_strength res = COLL_ICU_STRENGTH_DEFAULT;
+
+    switch(s) {
+    case UCOL_PRIMARY:
+        res = COLL_ICU_STRENGTH_PRIMARY;
+        break;
+    case UCOL_SECONDARY:
+        res = COLL_ICU_STRENGTH_SECONDARY;
+        break;
+    case UCOL_TERTIARY:
+        res = COLL_ICU_STRENGTH_TERTIARY;
+        break;
+    case UCOL_QUATERNARY:
+        res = COLL_ICU_STRENGTH_QUATERNARY;
+        break;
+    case UCOL_IDENTICAL:
+        res = COLL_ICU_STRENGTH_IDENTICAL;
+        break;
+    default:
+        break;
+    }
+
+    return res;
+}
+
+/*enum coll_icu_strength*/
+enum coll_icu_strength
+coll_get_strength(const struct coll *coll)
+{
+    if (coll->collator == NULL)
+        return COLL_ICU_STRENGTH_DEFAULT;
+    else
+        return cast_coll_strength(ucol_getStrength(coll->collator));
+}
+
  struct coll *
  coll_new(const struct coll_def *def)
  {
diff --git a/src/coll.h b/src/coll.h
index 9c725712a..8f65b73dd 100644
--- a/src/coll.h
+++ b/src/coll.h
@@ -70,6 +70,10 @@ struct coll {
      const char fingerprint[0];
  };

+/** Retrieve strength of the given collation */
+enum coll_icu_strength
+coll_get_strength(const struct coll *coll);
+
  /**
   * Create a collation by definition. Can return an existing
   * collation object, if a one with the same fingerprint was
diff --git a/test/sql/collation.result b/test/sql/collation.result
index 5721ef854..6cccbc84b 100644
--- a/test/sql/collation.result
+++ b/test/sql/collation.result
@@ -325,3 +325,333 @@ box.sql.execute("DROP TABLE t1;")
  box.sql.execute("DROP TABLE t0;")
  ---
  ...
+-- gh-3537 Duplicate key error for an index that is not unique
+-- pk - default, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3 (s1 CHAR(5) PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3 ON t3 (s1 collate "unicode_ci");')
+---
+...
+box.sql.execute("INSERT INTO t3 VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3 VALUES ('A');")
+---
+...
+box.sql.execute("SELECT * FROM t3;")
+---
+- - ['A']
+  - ['a']
+...
+box.sql.execute("DROP TABLE t3;")
+---
+...
+-- pk - binary, sc - unicode
+box.sql.execute('CREATE TABLE t3b (s1 CHAR(5) collate "binary" PRIMARY 
KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3b ON t3b (s1 collate "unicode");')
+---
+...
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3b VALUES ('A');")
+---
+...
+box.sql.execute("SELECT * FROM t3b;")
+---
+- - ['A']
+  - ['a']
+...
+box.sql.execute("DROP TABLE t3b;")
+---
+...
+-- pk - binary, sc - unicode (make dup)
+box.sql.execute('CREATE TABLE t3b (s1 CHAR(5) collate "binary" PRIMARY 
KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3b ON t3b (s1 collate "unicode");')
+---
+...
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3b VALUES ('A');")
+---
+...
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3B_1' in 
space 'T3B'
+...
+box.sql.execute("SELECT * FROM t3b;")
+---
+- - ['A']
+  - ['a']
+...
+box.sql.execute("DROP TABLE t3b;")
+---
+...
+-- pk - unicode, sc - binary
+box.sql.execute('CREATE TABLE t3c (s1 CHAR(5) collate "unicode" PRIMARY 
KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3c ON t3c (s1 collate "binary");')
+---
+...
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3c VALUES ('A');")
+---
+...
+box.sql.execute("SELECT * FROM t3c;")
+---
+- - ['a']
+  - ['A']
+...
+box.sql.execute("DROP TABLE t3c;")
+---
+...
+-- pk - unicode, sc - binary (make dup)
+box.sql.execute('CREATE TABLE t3c (s1 CHAR(5) collate "unicode" PRIMARY 
KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3c ON t3c (s1 collate "binary");')
+---
+...
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3c VALUES ('A');")
+---
+...
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3C_1' in 
space 'T3C'
+...
+box.sql.execute("SELECT * FROM t3c;")
+---
+- - ['a']
+  - ['A']
+...
+box.sql.execute("DROP TABLE t3c;")
+---
+...
+-- pk - binary, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3d (s1 CHAR(5) collate "binary" PRIMARY 
KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3d ON t3d (s1 collate "unicode_ci");')
+---
+...
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3d VALUES ('A');")
+---
+...
+box.sql.execute("SELECT * FROM t3d;")
+---
+- - ['A']
+  - ['a']
+...
+box.sql.execute("DROP TABLE t3d;")
+---
+...
+-- pk - binary, sc - unicode_ci (make dup)
+box.sql.execute('CREATE TABLE t3d (s1 CHAR(5) collate "binary" PRIMARY 
KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3d ON t3d (s1 collate "unicode_ci");')
+---
+...
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3d VALUES ('A');")
+---
+...
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3D_1' in 
space 'T3D'
+...
+box.sql.execute("SELECT * FROM t3d;")
+---
+- - ['A']
+  - ['a']
+...
+box.sql.execute("DROP TABLE t3d;")
+---
+...
+-- pk - unicode_ci, sc - binary (should fail)
+box.sql.execute('CREATE TABLE t3e (s1 CHAR(5) collate "unicode_ci" 
PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3e ON t3e (s1 collate "binary");')
+---
+...
+box.sql.execute("INSERT INTO t3e VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3e VALUES ('A');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3E_1' in 
space 'T3E'
+...
+box.sql.execute("SELECT * FROM t3e;")
+---
+- - ['a']
+...
+box.sql.execute("DROP TABLE t3e;")
+---
+...
+-- pk - unicode, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3f (s1 CHAR(5) collate "unicode" PRIMARY 
KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3f ON t3f (s1 collate "unicode_ci");')
+---
+...
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3f VALUES ('A');")
+---
+...
+box.sql.execute("SELECT * FROM t3f;")
+---
+- - ['a']
+  - ['A']
+...
+box.sql.execute("DROP TABLE t3f;")
+---
+...
+-- pk - unicode, sc - unicode_ci (make dup)
+box.sql.execute('CREATE TABLE t3f (s1 CHAR(5) collate "unicode" PRIMARY 
KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3f ON t3f (s1 collate "unicode_ci");')
+---
+...
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3f VALUES ('A');")
+---
+...
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3F_1' in 
space 'T3F'
+...
+box.sql.execute("SELECT * FROM t3f;")
+---
+- - ['a']
+  - ['A']
+...
+box.sql.execute("DROP TABLE t3f;")
+---
+...
+-- pk - unicode_ci, sc - unicode (should fail)
+box.sql.execute('CREATE TABLE t3g (s1 CHAR(5) collate "unicode_ci" 
PRIMARY KEY);')
+---
+...
+box.sql.execute('CREATE INDEX i3g ON t3g (s1 collate "unicode");')
+---
+...
+box.sql.execute("INSERT INTO t3g VALUES ('a');")
+---
+...
+box.sql.execute("INSERT INTO t3g VALUES ('A');")
+---
+- error: Duplicate key exists in unique index 'pk_unnamed_T3G_1' in 
space 'T3G'
+...
+box.sql.execute("SELECT * FROM t3g;")
+---
+- - ['a']
+...
+box.sql.execute("DROP TABLE t3g;")
+---
+...
+-- pk - default, sc - multipart
+box.sql.execute('CREATE TABLE qms1 (w CHAR(5) PRIMARY KEY, n CHAR(5), q 
CHAR(5), s INTEGER);')
+---
+...
+box.sql.execute('CREATE INDEX iqms1 ON qms1 (w collate "unicode_ci", n);')
+---
+...
+box.sql.execute("INSERT INTO qms1 VALUES ('www', 'nnn', 'qqq', 1);")
+---
+...
+box.sql.execute("INSERT INTO qms1 VALUES ('WWW', 'nnn', 'qqq', 2);")
+---
+...
+box.sql.execute("SELECT * FROM qms1;")
+---
+- - ['WWW', 'nnn', 'qqq', 2]
+  - ['www', 'nnn', 'qqq', 1]
+...
+box.sql.execute("DROP TABLE qms1;")
+---
+...
+box.sql.execute('CREATE TABLE qms2 (w CHAR(5) PRIMARY KEY, n CHAR(5), q 
CHAR(5), s INTEGER);')
+---
+...
+box.sql.execute('CREATE INDEX iqms2 ON qms2 (w collate "unicode", n);')
+---
+...
+box.sql.execute("INSERT INTO qms2 VALUES ('www', 'nnn', 'qqq', 1);")
+---
+...
+box.sql.execute("INSERT INTO qms2 VALUES ('WWW', 'nnn', 'qqq', 2);")
+---
+...
+box.sql.execute("SELECT * FROM qms2;")
+---
+- - ['WWW', 'nnn', 'qqq', 2]
+  - ['www', 'nnn', 'qqq', 1]
+...
+box.sql.execute("DROP TABLE qms2;")
+---
+...
+-- pk - multipart, sc overlaps with pk
+box.sql.execute('CREATE TABLE qms3 (w CHAR(5), n CHAR(5), q CHAR(5), s 
INTEGER, CONSTRAINT pk_qms3 PRIMARY KEY(w, n, q));')
+---
+...
+box.sql.execute('CREATE INDEX iqms3 ON qms3 (w collate "unicode_ci", s);')
+---
+...
+box.sql.execute("INSERT INTO qms3 VALUES ('www', 'nnn', 'qqq', 1);")
+---
+...
+box.sql.execute("INSERT INTO qms3 VALUES ('WWW', 'nnn', 'qqq', 2);")
+---
+...
+box.sql.execute("SELECT * FROM qms3;")
+---
+- - ['WWW', 'nnn', 'qqq', 2]
+  - ['www', 'nnn', 'qqq', 1]
+...
+box.sql.execute("DROP TABLE qms3;")
+---
+...
+box.sql.execute('CREATE TABLE qms4 (w CHAR(5), n CHAR(5), q CHAR(5), s 
INTEGER, CONSTRAINT pk_qms4 PRIMARY KEY(w, n, q));')
+---
+...
+box.sql.execute('CREATE INDEX iqms4 ON qms4 (w collate "unicode", s);')
+---
+...
+box.sql.execute("INSERT INTO qms4 VALUES ('www', 'nnn', 'qqq', 1);")
+---
+...
+box.sql.execute("INSERT INTO qms4 VALUES ('WWW', 'nnn', 'qqq', 2);")
+---
+...
+box.sql.execute("SELECT * FROM qms4;")
+---
+- - ['WWW', 'nnn', 'qqq', 2]
+  - ['www', 'nnn', 'qqq', 1]
+...
+box.sql.execute("DROP TABLE qms4;")
+---
+...
diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
index 4c649a444..075ec7ee4 100644
--- a/test/sql/collation.test.lua
+++ b/test/sql/collation.test.lua
@@ -126,3 +126,126 @@ box.sql.execute("SELECT * FROM t1;")
  box.sql.execute("SELECT s1 FROM t0;")
  box.sql.execute("DROP TABLE t1;")
  box.sql.execute("DROP TABLE t0;")
+
+-- gh-3537 Duplicate key error for an index that is not unique
+-- pk - default, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3 (s1 CHAR(5) PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3 ON t3 (s1 collate "unicode_ci");')
+box.sql.execute("INSERT INTO t3 VALUES ('a');")
+box.sql.execute("INSERT INTO t3 VALUES ('A');")
+box.sql.execute("SELECT * FROM t3;")
+box.sql.execute("DROP TABLE t3;")
+
+-- pk - binary, sc - unicode
+box.sql.execute('CREATE TABLE t3b (s1 CHAR(5) collate "binary" PRIMARY 
KEY);')
+box.sql.execute('CREATE INDEX i3b ON t3b (s1 collate "unicode");')
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+box.sql.execute("INSERT INTO t3b VALUES ('A');")
+box.sql.execute("SELECT * FROM t3b;")
+box.sql.execute("DROP TABLE t3b;")
+
+-- pk - binary, sc - unicode (make dup)
+box.sql.execute('CREATE TABLE t3b (s1 CHAR(5) collate "binary" PRIMARY 
KEY);')
+box.sql.execute('CREATE INDEX i3b ON t3b (s1 collate "unicode");')
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+box.sql.execute("INSERT INTO t3b VALUES ('A');")
+box.sql.execute("INSERT INTO t3b VALUES ('a');")
+box.sql.execute("SELECT * FROM t3b;")
+box.sql.execute("DROP TABLE t3b;")
+
+-- pk - unicode, sc - binary
+box.sql.execute('CREATE TABLE t3c (s1 CHAR(5) collate "unicode" PRIMARY 
KEY);')
+box.sql.execute('CREATE INDEX i3c ON t3c (s1 collate "binary");')
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+box.sql.execute("INSERT INTO t3c VALUES ('A');")
+box.sql.execute("SELECT * FROM t3c;")
+box.sql.execute("DROP TABLE t3c;")
+
+-- pk - unicode, sc - binary (make dup)
+box.sql.execute('CREATE TABLE t3c (s1 CHAR(5) collate "unicode" PRIMARY 
KEY);')
+box.sql.execute('CREATE INDEX i3c ON t3c (s1 collate "binary");')
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+box.sql.execute("INSERT INTO t3c VALUES ('A');")
+box.sql.execute("INSERT INTO t3c VALUES ('a');")
+box.sql.execute("SELECT * FROM t3c;")
+box.sql.execute("DROP TABLE t3c;")
+
+-- pk - binary, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3d (s1 CHAR(5) collate "binary" PRIMARY 
KEY);')
+box.sql.execute('CREATE INDEX i3d ON t3d (s1 collate "unicode_ci");')
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+box.sql.execute("INSERT INTO t3d VALUES ('A');")
+box.sql.execute("SELECT * FROM t3d;")
+box.sql.execute("DROP TABLE t3d;")
+
+-- pk - binary, sc - unicode_ci (make dup)
+box.sql.execute('CREATE TABLE t3d (s1 CHAR(5) collate "binary" PRIMARY 
KEY);')
+box.sql.execute('CREATE INDEX i3d ON t3d (s1 collate "unicode_ci");')
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+box.sql.execute("INSERT INTO t3d VALUES ('A');")
+box.sql.execute("INSERT INTO t3d VALUES ('a');")
+box.sql.execute("SELECT * FROM t3d;")
+box.sql.execute("DROP TABLE t3d;")
+
+-- pk - unicode_ci, sc - binary (should fail)
+box.sql.execute('CREATE TABLE t3e (s1 CHAR(5) collate "unicode_ci" 
PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3e ON t3e (s1 collate "binary");')
+box.sql.execute("INSERT INTO t3e VALUES ('a');")
+box.sql.execute("INSERT INTO t3e VALUES ('A');")
+box.sql.execute("SELECT * FROM t3e;")
+box.sql.execute("DROP TABLE t3e;")
+
+-- pk - unicode, sc - unicode_ci
+box.sql.execute('CREATE TABLE t3f (s1 CHAR(5) collate "unicode" PRIMARY 
KEY);')
+box.sql.execute('CREATE INDEX i3f ON t3f (s1 collate "unicode_ci");')
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+box.sql.execute("INSERT INTO t3f VALUES ('A');")
+box.sql.execute("SELECT * FROM t3f;")
+box.sql.execute("DROP TABLE t3f;")
+
+-- pk - unicode, sc - unicode_ci (make dup)
+box.sql.execute('CREATE TABLE t3f (s1 CHAR(5) collate "unicode" PRIMARY 
KEY);')
+box.sql.execute('CREATE INDEX i3f ON t3f (s1 collate "unicode_ci");')
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+box.sql.execute("INSERT INTO t3f VALUES ('A');")
+box.sql.execute("INSERT INTO t3f VALUES ('a');")
+box.sql.execute("SELECT * FROM t3f;")
+box.sql.execute("DROP TABLE t3f;")
+
+-- pk - unicode_ci, sc - unicode (should fail)
+box.sql.execute('CREATE TABLE t3g (s1 CHAR(5) collate "unicode_ci" 
PRIMARY KEY);')
+box.sql.execute('CREATE INDEX i3g ON t3g (s1 collate "unicode");')
+box.sql.execute("INSERT INTO t3g VALUES ('a');")
+box.sql.execute("INSERT INTO t3g VALUES ('A');")
+box.sql.execute("SELECT * FROM t3g;")
+box.sql.execute("DROP TABLE t3g;")
+
+-- pk - default, sc - multipart
+box.sql.execute('CREATE TABLE qms1 (w CHAR(5) PRIMARY KEY, n CHAR(5), q 
CHAR(5), s INTEGER);')
+box.sql.execute('CREATE INDEX iqms1 ON qms1 (w collate "unicode_ci", n);')
+box.sql.execute("INSERT INTO qms1 VALUES ('www', 'nnn', 'qqq', 1);")
+box.sql.execute("INSERT INTO qms1 VALUES ('WWW', 'nnn', 'qqq', 2);")
+box.sql.execute("SELECT * FROM qms1;")
+box.sql.execute("DROP TABLE qms1;")
+
+box.sql.execute('CREATE TABLE qms2 (w CHAR(5) PRIMARY KEY, n CHAR(5), q 
CHAR(5), s INTEGER);')
+box.sql.execute('CREATE INDEX iqms2 ON qms2 (w collate "unicode", n);')
+box.sql.execute("INSERT INTO qms2 VALUES ('www', 'nnn', 'qqq', 1);")
+box.sql.execute("INSERT INTO qms2 VALUES ('WWW', 'nnn', 'qqq', 2);")
+box.sql.execute("SELECT * FROM qms2;")
+box.sql.execute("DROP TABLE qms2;")
+
+-- pk - multipart, sc overlaps with pk
+box.sql.execute('CREATE TABLE qms3 (w CHAR(5), n CHAR(5), q CHAR(5), s 
INTEGER, CONSTRAINT pk_qms3 PRIMARY KEY(w, n, q));')
+box.sql.execute('CREATE INDEX iqms3 ON qms3 (w collate "unicode_ci", s);')
+box.sql.execute("INSERT INTO qms3 VALUES ('www', 'nnn', 'qqq', 1);")
+box.sql.execute("INSERT INTO qms3 VALUES ('WWW', 'nnn', 'qqq', 2);")
+box.sql.execute("SELECT * FROM qms3;")
+box.sql.execute("DROP TABLE qms3;")
+
+box.sql.execute('CREATE TABLE qms4 (w CHAR(5), n CHAR(5), q CHAR(5), s 
INTEGER, CONSTRAINT pk_qms4 PRIMARY KEY(w, n, q));')
+box.sql.execute('CREATE INDEX iqms4 ON qms4 (w collate "unicode", s);')
+box.sql.execute("INSERT INTO qms4 VALUES ('www', 'nnn', 'qqq', 1);")
+box.sql.execute("INSERT INTO qms4 VALUES ('WWW', 'nnn', 'qqq', 2);")
+box.sql.execute("SELECT * FROM qms4;")
+box.sql.execute("DROP TABLE qms4;")
-- 
2.17.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2] sql: Duplicate key error for a non-unique index
  2019-02-28 14:17   ` [tarantool-patches] " Stanislav Zudin
@ 2019-02-28 16:19     ` Vladimir Davydov
  2019-03-01 11:46       ` Stanislav Zudin
  0 siblings, 1 reply; 6+ messages in thread
From: Vladimir Davydov @ 2019-02-28 16:19 UTC (permalink / raw)
  To: Stanislav Zudin; +Cc: tarantool-patches, Konstantin Osipov

[ Cc += Kostja - AFAIR he had something against using STRENGTH in
  collation comparison, see my comment inline ]

On Thu, Feb 28, 2019 at 05:17:13PM +0300, Stanislav Zudin wrote:
> > > +static bool
> > > +key_def_need_merge(const struct key_def *sec_key_def,
> > > +		   const struct key_part *pk_key_part)
> > > +{
> > > +	const struct key_part* end = sec_key_def->parts +
> > > +				     sec_key_def->part_count;
> > > +	const struct key_part* part = key_def_find_next(sec_key_def->parts,
> > > +							end,
> > > +							pk_key_part);
> > > +	if (part == NULL)
> > > +		return true;
> > > +
> > > +	/* The duplicate key_part is found,
> > > +	 * compare collation */
> > > +	if (part->coll == pk_key_part->coll)
> > > +		return false;
> > > +
> > > +	if (part->coll == NULL ||
> > > +	    part->coll->strength == COLL_ICU_STRENGTH_DEFAULT) {
> > > +		return false;
> > > +		/* If collation of the sec. key part
> > > +		 * is binary then the sec. key
> > > +		 * doesn't require a composite key.
> > > +		 * */
> >
> > Perhaps, it's worth encapsulating this logic in a helper function
> > defined in coll.c. Something like coll_is_compatible.
>
> This case is not a general one, so it doesn't deserve to be implemented as a
> generic helper function.

Well, coll isn't an independent library. IMO it'd be perfectly okay to
add a helper there, which is only relevant to Tarantool internals.

> The updated patch is below:
> 
> ---
> Branch: https://github.com/tarantool/tarantool/tree/stanztt/gh-3537-nonunique-index-dup-key-error
> Issue: https://github.com/tarantool/tarantool/issues/3537
> 
>  src/CMakeLists.txt          |   3 +-
>  src/box/key_def.c           |  29 +++-
>  src/coll.c                  |  38 +++++
>  src/coll.h                  |   4 +
>  test/sql/collation.result   | 330 ++++++++++++++++++++++++++++++++++++
>  test/sql/collation.test.lua | 123 ++++++++++++++
>  6 files changed, 524 insertions(+), 3 deletions(-)
> 
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
> index 04de5ad04..7346e7ba7 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -125,7 +125,8 @@ target_link_libraries(core
>      ${LIBEIO_LIBRARIES}
>      ${LIBCORO_LIBRARIES}
>      ${MSGPUCK_LIBRARIES}
> -    ${generic_libraries})
> +    ${generic_libraries}
> +    ${ICU_LIBRARIES})
> 
>  add_library(stat STATIC rmean.c latency.c histogram.c)
>  target_link_libraries(stat core)
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index 9411ade39..61b0258b2 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -37,6 +37,7 @@
>  #include "schema_def.h"
>  #include "coll_id_cache.h"
>  #include "small/region.h"
> +#include "coll.h"
> 
>  const char *sort_order_strs[] = { "asc", "desc", "undef" };
> 
> @@ -611,6 +612,30 @@ key_def_find(const struct key_def *key_def, const
> struct key_part *to_find)
>      return NULL;
>  }
> 
> +static bool
> +key_def_can_merge(const struct key_def *first_key_def,
> +    const struct key_part *second_key_part)

Your MUA mangles patches. Please fix it - it makes the patches difficult
to review.

> +{
> +    const struct key_part *part = key_def_find(first_key_def,
> +                           second_key_part);
> +    if (part == NULL)
> +        return true;

I asked you to return false in this case, because this means we can't
merge the given key part into the given key def.

> +
> +    /*
> +     * The duplicate key_part is found,
> +     * compare collation
> +     */
> +    if (part->coll == second_key_part->coll)
> +        return false;
> +
> +    if (part->coll == NULL ||
> +        /*part->coll->strength == COLL_ICU_STRENGTH_DEFAULT)*/

Stray hunk, pleaase remove.

> +        coll_get_strength(part->coll) == COLL_ICU_STRENGTH_DEFAULT)

If my memory doesn't fail, on our last daily meeting, Kostja mentioned
that using STRENGTH here isn't quite correct...

> +        return false;
> +    else
> +        return true;
> +}
> +
>  bool
>  key_def_contains(const struct key_def *first, const struct key_def *second)
>  {
> @@ -639,7 +664,7 @@ key_def_merge(const struct key_def *first, const struct
> key_def *second)
>      part = second->parts;
>      end = part + second->part_count;
>      for (; part != end; part++) {
> -        if (key_def_find(first, part) != NULL)
> +        if (!key_def_can_merge(first, part))
>              --new_part_count;
>          else
>              sz += part->path_len;
> @@ -677,7 +702,7 @@ key_def_merge(const struct key_def *first, const struct
> key_def *second)
>      part = second->parts;
>      end = part + second->part_count;
>      for (; part != end; part++) {
> -        if (key_def_find(first, part) != NULL)
> +        if (!key_def_can_merge(first, part))
>              continue;
>          key_def_set_part(new_def, pos++, part->fieldno, part->type,
>                   part->nullable_action, part->coll,
> diff --git a/src/coll.c b/src/coll.c
> index 6d9c44dbf..7d7267d8a 100644
> --- a/src/coll.c
> +++ b/src/coll.c
> @@ -295,6 +295,44 @@ coll_def_snfingerprint(char *buffer, int size, const
> struct coll_def *def)
>      return total;
>  }
> 
> +static enum coll_icu_strength
> +cast_coll_strength(UCollationStrength s)
> +{
> +    enum coll_icu_strength res = COLL_ICU_STRENGTH_DEFAULT;
> +
> +    switch(s) {
> +    case UCOL_PRIMARY:
> +        res = COLL_ICU_STRENGTH_PRIMARY;
> +        break;
> +    case UCOL_SECONDARY:
> +        res = COLL_ICU_STRENGTH_SECONDARY;
> +        break;
> +    case UCOL_TERTIARY:
> +        res = COLL_ICU_STRENGTH_TERTIARY;
> +        break;
> +    case UCOL_QUATERNARY:
> +        res = COLL_ICU_STRENGTH_QUATERNARY;
> +        break;
> +    case UCOL_IDENTICAL:
> +        res = COLL_ICU_STRENGTH_IDENTICAL;
> +        break;
> +    default:
> +        break;

Ouch. This function looks cumbersome. I'd really prefer if you rather
factored out the whole collation check into a helper function defined
in coll.c.

> +    }
> +
> +    return res;
> +}
> +
> +/*enum coll_icu_strength*/
> +enum coll_icu_strength
> +coll_get_strength(const struct coll *coll)
> +{
> +    if (coll->collator == NULL)
> +        return COLL_ICU_STRENGTH_DEFAULT;
> +    else
> +        return cast_coll_strength(ucol_getStrength(coll->collator));
> +}
> +
>  struct coll *
>  coll_new(const struct coll_def *def)
>  {

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2] sql: Duplicate key error for a non-unique index
  2019-02-28 16:19     ` Vladimir Davydov
@ 2019-03-01 11:46       ` Stanislav Zudin
  2019-03-04 17:53         ` Vladimir Davydov
  0 siblings, 1 reply; 6+ messages in thread
From: Stanislav Zudin @ 2019-03-01 11:46 UTC (permalink / raw)
  To: tarantool-patches, Vladimir Davydov; +Cc: Konstantin Osipov

The recent patch contains the following changes:
1. collation compatibility analysis was extracted to a single helper 
function.


On 28.02.2019 19:19, Vladimir Davydov wrote:
> [ Cc += Kostja - AFAIR he had something against using STRENGTH in
>    collation comparison, see my comment inline ]
> 
> On Thu, Feb 28, 2019 at 05:17:13PM +0300, Stanislav Zudin wrote:
>>>> +static bool
>>>> +key_def_need_merge(const struct key_def *sec_key_def,
>>>> +		   const struct key_part *pk_key_part)
>>>> +{
>>>> +	const struct key_part* end = sec_key_def->parts +
>>>> +				     sec_key_def->part_count;
>>>> +	const struct key_part* part = key_def_find_next(sec_key_def->parts,
>>>> +							end,
>>>> +							pk_key_part);
>>>> +	if (part == NULL)
>>>> +		return true;
>>>> +
>>>> +	/* The duplicate key_part is found,
>>>> +	 * compare collation */
>>>> +	if (part->coll == pk_key_part->coll)
>>>> +		return false;
>>>> +
>>>> +	if (part->coll == NULL ||
>>>> +	    part->coll->strength == COLL_ICU_STRENGTH_DEFAULT) {
>>>> +		return false;
>>>> +		/* If collation of the sec. key part
>>>> +		 * is binary then the sec. key
>>>> +		 * doesn't require a composite key.
>>>> +		 * */
>>>
>>> Perhaps, it's worth encapsulating this logic in a helper function
>>> defined in coll.c. Something like coll_is_compatible.
>>
>> This case is not a general one, so it doesn't deserve to be implemented as a
>> generic helper function.
> 
> Well, coll isn't an independent library. IMO it'd be perfectly okay to
> add a helper there, which is only relevant to Tarantool internals.

Moved this code to a separate function.

> 
>> The updated patch is below:
>>
>> ---
>> Branch: https://github.com/tarantool/tarantool/tree/stanztt/gh-3537-nonunique-index-dup-key-error
>> Issue: https://github.com/tarantool/tarantool/issues/3537
>>
>>   src/CMakeLists.txt          |   3 +-
>>   src/box/key_def.c           |  29 +++-
>>   src/coll.c                  |  38 +++++
>>   src/coll.h                  |   4 +
>>   test/sql/collation.result   | 330 ++++++++++++++++++++++++++++++++++++
>>   test/sql/collation.test.lua | 123 ++++++++++++++
>>   6 files changed, 524 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
>> index 04de5ad04..7346e7ba7 100644
>> --- a/src/CMakeLists.txt
>> +++ b/src/CMakeLists.txt
>> @@ -125,7 +125,8 @@ target_link_libraries(core
>>       ${LIBEIO_LIBRARIES}
>>       ${LIBCORO_LIBRARIES}
>>       ${MSGPUCK_LIBRARIES}
>> -    ${generic_libraries})
>> +    ${generic_libraries}
>> +    ${ICU_LIBRARIES})
>>
>>   add_library(stat STATIC rmean.c latency.c histogram.c)
>>   target_link_libraries(stat core)
>> diff --git a/src/box/key_def.c b/src/box/key_def.c
>> index 9411ade39..61b0258b2 100644
>> --- a/src/box/key_def.c
>> +++ b/src/box/key_def.c
>> @@ -37,6 +37,7 @@
>>   #include "schema_def.h"
>>   #include "coll_id_cache.h"
>>   #include "small/region.h"
>> +#include "coll.h"
>>
>>   const char *sort_order_strs[] = { "asc", "desc", "undef" };
>>
>> @@ -611,6 +612,30 @@ key_def_find(const struct key_def *key_def, const
>> struct key_part *to_find)
>>       return NULL;
>>   }
>>
>> +static bool
>> +key_def_can_merge(const struct key_def *first_key_def,
>> +    const struct key_part *second_key_part)
> 
> Your MUA mangles patches. Please fix it - it makes the patches difficult
> to review.
> 
>> +{
>> +    const struct key_part *part = key_def_find(first_key_def,
>> +                           second_key_part);
>> +    if (part == NULL)
>> +        return true;
> 
> I asked you to return false in this case, because this means we can't
> merge the given key part into the given key def.

Just the opposite. (part == NULL) means that there is no duplicates and
we must include the second_key_part into the composite key.

> 
>> +
>> +    /*
>> +     * The duplicate key_part is found,
>> +     * compare collation
>> +     */
>> +    if (part->coll == second_key_part->coll)
>> +        return false;
>> +
>> +    if (part->coll == NULL ||
>> +        /*part->coll->strength == COLL_ICU_STRENGTH_DEFAULT)*/
> 
> Stray hunk, pleaase remove.

ooops. Fixed.

> 
>> +        coll_get_strength(part->coll) == COLL_ICU_STRENGTH_DEFAULT)
> 
> If my memory doesn't fail, on our last daily meeting, Kostja mentioned
> that using STRENGTH here isn't quite correct...

strength is the only metric we can evaluate. We can't compare e.g. case 
sensitivity vs accent sensitivity, in such cases we consider them
incompatible.
For the case in question we have to decide whether we append the 
key_part from the primary key or not. If the second key_part has binary 
or case sensitive collation then we shouldn't, since the second key_def 
is enough unique.

> 
>> +        return false;
>> +    else
>> +        return true;
>> +}
>> +
>>   bool
>>   key_def_contains(const struct key_def *first, const struct key_def *second)
>>   {
>> @@ -639,7 +664,7 @@ key_def_merge(const struct key_def *first, const struct
>> key_def *second)
>>       part = second->parts;
>>       end = part + second->part_count;
>>       for (; part != end; part++) {
>> -        if (key_def_find(first, part) != NULL)
>> +        if (!key_def_can_merge(first, part))
>>               --new_part_count;
>>           else
>>               sz += part->path_len;
>> @@ -677,7 +702,7 @@ key_def_merge(const struct key_def *first, const struct
>> key_def *second)
>>       part = second->parts;
>>       end = part + second->part_count;
>>       for (; part != end; part++) {
>> -        if (key_def_find(first, part) != NULL)
>> +        if (!key_def_can_merge(first, part))
>>               continue;
>>           key_def_set_part(new_def, pos++, part->fieldno, part->type,
>>                    part->nullable_action, part->coll,
>> diff --git a/src/coll.c b/src/coll.c
>> index 6d9c44dbf..7d7267d8a 100644
>> --- a/src/coll.c
>> +++ b/src/coll.c
>> @@ -295,6 +295,44 @@ coll_def_snfingerprint(char *buffer, int size, const
>> struct coll_def *def)
>>       return total;
>>   }
>>
>> +static enum coll_icu_strength
>> +cast_coll_strength(UCollationStrength s)
>> +{
>> +    enum coll_icu_strength res = COLL_ICU_STRENGTH_DEFAULT;
>> +
>> +    switch(s) {
>> +    case UCOL_PRIMARY:
>> +        res = COLL_ICU_STRENGTH_PRIMARY;
>> +        break;
>> +    case UCOL_SECONDARY:
>> +        res = COLL_ICU_STRENGTH_SECONDARY;
>> +        break;
>> +    case UCOL_TERTIARY:
>> +        res = COLL_ICU_STRENGTH_TERTIARY;
>> +        break;
>> +    case UCOL_QUATERNARY:
>> +        res = COLL_ICU_STRENGTH_QUATERNARY;
>> +        break;
>> +    case UCOL_IDENTICAL:
>> +        res = COLL_ICU_STRENGTH_IDENTICAL;
>> +        break;
>> +    default:
>> +        break;
> 
> Ouch. This function looks cumbersome. I'd really prefer if you rather
> factored out the whole collation check into a helper function defined
> in coll.c.
> 

Done.

>> +    }
>> +
>> +    return res;
>> +}
>> +
>> +/*enum coll_icu_strength*/
>> +enum coll_icu_strength
>> +coll_get_strength(const struct coll *coll)
>> +{
>> +    if (coll->collator == NULL)
>> +        return COLL_ICU_STRENGTH_DEFAULT;
>> +    else
>> +        return cast_coll_strength(ucol_getStrength(coll->collator));
>> +}
>> +
>>   struct coll *
>>   coll_new(const struct coll_def *def)
>>   {
> 

Please find the recent patch below:

---
  src/box/key_def.c | 21 ++++++++-------------
  src/coll.c        | 44 ++++++++++----------------------------------
  src/coll.h        | 10 +++++++---
  3 files changed, 25 insertions(+), 50 deletions(-)

diff --git a/src/box/key_def.c b/src/box/key_def.c
index 61b0258b2..2b5fbbfcc 100644
--- a/src/box/key_def.c
+++ b/src/box/key_def.c
@@ -618,22 +618,17 @@ key_def_can_merge(const struct key_def *first_key_def,
  {
  	const struct key_part *part = key_def_find(first_key_def,
  						   second_key_part);
+	/*
+	 * The key_parts can be merged if
+	 * both key_defs have no duplicated
+	 * key_parts.
+	 * Or there are duplicated key_parts
+	 * but their collations are incompatible.
+	 * */
  	if (part == NULL)
  		return true;

-	/*
-	 * The duplicate key_part is found,
-	 * compare collation
-	 */
-	if (part->coll == second_key_part->coll)
-		return false;
-
-	if (part->coll == NULL ||
-		/*part->coll->strength == COLL_ICU_STRENGTH_DEFAULT)*/
-		coll_get_strength(part->coll) == COLL_ICU_STRENGTH_DEFAULT)
-		return false;
-	else
-		return true;
+	return !coll_is_compatible(part->coll, second_key_part->coll);
  }

  bool
diff --git a/src/coll.c b/src/coll.c
index 7d7267d8a..4a1344fa1 100644
--- a/src/coll.c
+++ b/src/coll.c
@@ -295,42 +295,18 @@ coll_def_snfingerprint(char *buffer, int size, 
const struct coll_def *def)
  	return total;
  }

-static enum coll_icu_strength
-cast_coll_strength(UCollationStrength s)
+bool
+coll_is_compatible(const struct coll *first, const struct coll* second)
  {
-	enum coll_icu_strength res = COLL_ICU_STRENGTH_DEFAULT;
-
-	switch(s) {
-	case UCOL_PRIMARY:
-		res = COLL_ICU_STRENGTH_PRIMARY;
-		break;
-	case UCOL_SECONDARY:
-		res = COLL_ICU_STRENGTH_SECONDARY;
-		break;
-	case UCOL_TERTIARY:
-		res = COLL_ICU_STRENGTH_TERTIARY;
-		break;
-	case UCOL_QUATERNARY:
-		res = COLL_ICU_STRENGTH_QUATERNARY;
-		break;
-	case UCOL_IDENTICAL:
-		res = COLL_ICU_STRENGTH_IDENTICAL;
-		break;
-	default:
-		break;
-	}
-
-	return res;
-}
-
-/*enum coll_icu_strength*/
-enum coll_icu_strength
-coll_get_strength(const struct coll *coll)
-{
-	if (coll->collator == NULL)
-		return COLL_ICU_STRENGTH_DEFAULT;
+	if (first == second)
+		return true;
+	if (first == NULL ||
+	    first->collator == NULL ||
+	    ucol_getStrength(first->collator) == UCOL_DEFAULT)
+		return true;
  	else
-		return cast_coll_strength(ucol_getStrength(coll->collator));
+		return false;
+
  }

  struct coll *
diff --git a/src/coll.h b/src/coll.h
index 8f65b73dd..e3c6447b2 100644
--- a/src/coll.h
+++ b/src/coll.h
@@ -34,6 +34,7 @@
  #include "coll_def.h"
  #include <stddef.h>
  #include <stdint.h>
+#include <stdbool.h>

  #if defined(__cplusplus)
  extern "C" {
@@ -70,9 +71,12 @@ struct coll {
  	const char fingerprint[0];
  };

-/** Retrieve strength of the given collation */
-enum coll_icu_strength
-coll_get_strength(const struct coll *coll);
+/**
+ * Check whether the first collation is compatible
+ * with the second one.
+ * */
+bool
+coll_is_compatible(const struct coll *first, const struct coll* second);

  /**
   * Create a collation by definition. Can return an existing
-- 
2.17.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2] sql: Duplicate key error for a non-unique index
  2019-03-01 11:46       ` Stanislav Zudin
@ 2019-03-04 17:53         ` Vladimir Davydov
  0 siblings, 0 replies; 6+ messages in thread
From: Vladimir Davydov @ 2019-03-04 17:53 UTC (permalink / raw)
  To: Stanislav Zudin; +Cc: tarantool-patches, Konstantin Osipov

On Fri, Mar 01, 2019 at 02:46:45PM +0300, Stanislav Zudin wrote:
> +/**
> + * Check whether the first collation is compatible
> + * with the second one.
> + * */
> +bool
> +coll_is_compatible(const struct coll *first, const struct coll* second);

I renamed coll_is_compatible to coll_can_merge (so that it matches
key_def_can_merge), because compatibility is typically a symmetric
relationship (if a is compatible with b, then b is compatible with a)
while this function is asymmetric. I also added some comments explaining
the logic behind the decisions made by coll_is_compatible and pushed the
patch to 2.1.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-03-04 17:53 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-20 10:06 [tarantool-patches] [PATCH v2] sql: Duplicate key error for a non-unique index Stanislav Zudin
2019-02-26 13:26 ` Vladimir Davydov
2019-02-28 14:17   ` [tarantool-patches] " Stanislav Zudin
2019-02-28 16:19     ` Vladimir Davydov
2019-03-01 11:46       ` Stanislav Zudin
2019-03-04 17:53         ` Vladimir Davydov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox