[tarantool-patches] [PATCH 2/2] sql: compute resulting collation for concatenation

Nikita Pettik korablev at tarantool.org
Wed Jan 16 16:13:20 MSK 2019


According to ANSI, result of concatenation operation should derive
collation sequence from its operands. Now it is not true: result is
always comes with no ("none") collation.

In a nutshell*, rules are quite simple:

a) If some data type  has an explicit collation EC1, then every data
type that has an explicit collation shall have declared type collation
that is EC1.  The collation derivation is explicit and the collation is
EC1.

b) If every data type has an implicit collation, then:

 - If every data type has the same declared type collation IC1, then
   the collation derivation is implicit and the collation is IC1.

 - Otherwise, the collation derivation is none.

c) Otherwise, the collation derivation is none.

*Read complete statement at 9.5 Result of data type combinations

Closes #3937
---
 src/box/sql/expr.c          |  47 +++++++++++++++++++-
 test/sql/collation.result   | 102 ++++++++++++++++++++++++++++++++++++++++++++
 test/sql/collation.test.lua |  46 ++++++++++++++++++++
 3 files changed, 193 insertions(+), 2 deletions(-)

diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index f8819f779..e6f536757 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -221,6 +221,45 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id)
 			}
 			break;
 		}
+		if (op == TK_CONCAT) {
+			/*
+			 * Despite the fact that procedure below
+			 * is very similar to collation_check_compatability(),
+			 * it is slightly different: when both
+			 * operands have different implicit collations,
+			 * derived collation should be "none",
+			 * i.e. no collation is used at all
+			 * (instead of raising error).
+			 */
+			bool is_lhs_forced;
+			uint32_t lhs_coll_id;
+			if (sql_expr_coll(parse, p->pLeft, &is_lhs_forced,
+					  &lhs_coll_id) != 0)
+				return -1;
+			bool is_rhs_forced;
+			uint32_t rhs_coll_id;
+			if (sql_expr_coll(parse, p->pRight, &is_rhs_forced,
+					  &rhs_coll_id) != 0)
+				return -1;
+			if (is_lhs_forced && is_rhs_forced) {
+				if (lhs_coll_id != rhs_coll_id)
+					return -1;
+			}
+			if (is_lhs_forced) {
+				*coll_id = lhs_coll_id;
+				*is_explicit_coll = true;
+				return 0;
+			}
+			if (is_rhs_forced) {
+				*coll_id = rhs_coll_id;
+				*is_explicit_coll = true;
+				return 0;
+			}
+			if (rhs_coll_id != lhs_coll_id)
+				return 0;
+			*coll_id = lhs_coll_id;
+			return 0;
+		}
 		if (p->flags & EP_Collate) {
 			if (p->pLeft && (p->pLeft->flags & EP_Collate) != 0) {
 				p = p->pLeft;
@@ -384,10 +423,14 @@ sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right)
 	bool is_rhs_forced;
 	uint32_t lhs_coll_id;
 	uint32_t rhs_coll_id;
-	if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0)
+	if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) {
+		diag_set(ClientError, ER_ILLEGAL_COLLATION_MIX);
 		goto err;
-	if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0)
+	}
+	if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) {
+		diag_set(ClientError, ER_ILLEGAL_COLLATION_MIX);
 		goto err;
+	}
 	uint32_t coll_id;
 	if (collations_check_compatibility(lhs_coll_id, is_lhs_forced,
 					   rhs_coll_id, is_rhs_forced,
diff --git a/test/sql/collation.result b/test/sql/collation.result
index c69510fe7..a13738f49 100644
--- a/test/sql/collation.result
+++ b/test/sql/collation.result
@@ -325,3 +325,105 @@ box.sql.execute("DROP TABLE t1;")
 box.sql.execute("DROP TABLE t0;")
 ---
 ...
+-- gh-3937: result of concatination has derived collation.
+--
+box.sql.execute("CREATE TABLE t4a(a TEXT COLLATE \"unicode\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);")
+---
+...
+box.sql.execute("INSERT INTO t4a VALUES('ABC','abc',1);")
+---
+...
+box.sql.execute("INSERT INTO t4a VALUES('ghi','ghi',3);")
+---
+...
+-- Only LHS of concatenation has implicitly set collation.
+-- Hence, no collation is used.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a||'') = b;")
+---
+- - [1]
+  - [3]
+...
+-- BINARY collation is forced for comparison operator as
+-- a derivation from concatenation.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b;")
+---
+- - [3]
+...
+-- Both operands of concatenation have explicit but different
+-- collations.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'' COLLATE \"unicode_ci\") = b;")
+---
+- error: Illegal mix of collations
+...
+box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b COLLATE \"unicode\";")
+---
+- error: Illegal mix of collations
+...
+-- No collation is used since LHS and RHS of concatenation
+-- operator have different implicit collations.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a||'')=(b||'');")
+---
+- - [3]
+...
+box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);")
+---
+- - [3]
+...
+box.sql.execute("CREATE TABLE t4b(a TEXT COLLATE \"unicode_ci\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);")
+---
+...
+box.sql.execute("INSERT INTO t4b VALUES('BCD','bcd',1);")
+---
+...
+box.sql.execute("INSERT INTO t4b VALUES('ghi','ghi',3);")
+---
+...
+-- Operands have the same implicit collation, so it is derived.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);")
+---
+- - [3]
+...
+-- Couple of other possible combinations.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b||a);")
+---
+- - [3]
+...
+box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b COLLATE \"unicode_ci\"||a);")
+---
+- error: Illegal mix of collations
+...
+box.sql.execute("INSERT INTO t4b VALUES('abc', 'xxx', 2);")
+---
+...
+box.sql.execute("INSERT INTO t4b VALUES('gHz', 'xxx', 4);")
+---
+...
+-- Results are sorted with case-insensitive order.
+-- Otherwise capital latters come first.
+--
+box.sql.execute("SELECT a FROM t4b ORDER BY a COLLATE \"unicode_ci\" || ''")
+---
+- - ['abc']
+  - ['BCD']
+  - ['ghi']
+  - ['gHz']
+...
+box.sql.execute("SELECT a FROM t4b ORDER BY a || b")
+---
+- - ['abc']
+  - ['BCD']
+  - ['ghi']
+  - ['gHz']
+...
+box.space.T4A:drop()
+---
+...
+box.space.T4B:drop()
+---
+...
diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
index 4ad2d5e50..8f1d57502 100644
--- a/test/sql/collation.test.lua
+++ b/test/sql/collation.test.lua
@@ -126,3 +126,49 @@ box.sql.execute("SELECT * FROM t1;")
 box.sql.execute("SELECT * FROM t0;")
 box.sql.execute("DROP TABLE t1;")
 box.sql.execute("DROP TABLE t0;")
+
+-- gh-3937: result of concatination has derived collation.
+--
+box.sql.execute("CREATE TABLE t4a(a TEXT COLLATE \"unicode\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);")
+box.sql.execute("INSERT INTO t4a VALUES('ABC','abc',1);")
+box.sql.execute("INSERT INTO t4a VALUES('ghi','ghi',3);")
+-- Only LHS of concatenation has implicitly set collation.
+-- Hence, no collation is used.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a||'') = b;")
+-- BINARY collation is forced for comparison operator as
+-- a derivation from concatenation.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b;")
+-- Both operands of concatenation have explicit but different
+-- collations.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'' COLLATE \"unicode_ci\") = b;")
+box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b COLLATE \"unicode\";")
+-- No collation is used since LHS and RHS of concatenation
+-- operator have different implicit collations.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a||'')=(b||'');")
+box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);")
+
+box.sql.execute("CREATE TABLE t4b(a TEXT COLLATE \"unicode_ci\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);")
+box.sql.execute("INSERT INTO t4b VALUES('BCD','bcd',1);")
+box.sql.execute("INSERT INTO t4b VALUES('ghi','ghi',3);")
+-- Operands have the same implicit collation, so it is derived.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);")
+-- Couple of other possible combinations.
+--
+box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b||a);")
+box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b COLLATE \"unicode_ci\"||a);")
+
+box.sql.execute("INSERT INTO t4b VALUES('abc', 'xxx', 2);")
+box.sql.execute("INSERT INTO t4b VALUES('gHz', 'xxx', 4);")
+-- Results are sorted with case-insensitive order.
+-- Otherwise capital latters come first.
+--
+box.sql.execute("SELECT a FROM t4b ORDER BY a COLLATE \"unicode_ci\" || ''")
+box.sql.execute("SELECT a FROM t4b ORDER BY a || b")
+
+box.space.T4A:drop()
+box.space.T4B:drop()
-- 
2.15.1





More information about the Tarantool-patches mailing list