From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 938E724944 for ; Wed, 16 Jan 2019 08:13:26 -0500 (EST) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id i26nkYj88_xb for ; Wed, 16 Jan 2019 08:13:25 -0500 (EST) Received: from smtpng2.m.smailru.net (smtpng2.m.smailru.net [94.100.179.3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 47B8C246F6 for ; Wed, 16 Jan 2019 08:13:25 -0500 (EST) From: Nikita Pettik Subject: [tarantool-patches] [PATCH 2/2] sql: compute resulting collation for concatenation Date: Wed, 16 Jan 2019 16:13:20 +0300 Message-Id: <652a9e6a4514a03ef93133961b09c2f5d45721d8.1547644180.git.korablev@tarantool.org> In-Reply-To: References: In-Reply-To: References: Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: v.shpilevoy@tarantool.org, Nikita Pettik According to ANSI, result of concatenation operation should derive collation sequence from its operands. Now it is not true: result is always comes with no ("none") collation. In a nutshell*, rules are quite simple: a) If some data type has an explicit collation EC1, then every data type that has an explicit collation shall have declared type collation that is EC1. The collation derivation is explicit and the collation is EC1. b) If every data type has an implicit collation, then: - If every data type has the same declared type collation IC1, then the collation derivation is implicit and the collation is IC1. - Otherwise, the collation derivation is none. c) Otherwise, the collation derivation is none. *Read complete statement at 9.5 Result of data type combinations Closes #3937 --- src/box/sql/expr.c | 47 +++++++++++++++++++- test/sql/collation.result | 102 ++++++++++++++++++++++++++++++++++++++++++++ test/sql/collation.test.lua | 46 ++++++++++++++++++++ 3 files changed, 193 insertions(+), 2 deletions(-) diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index f8819f779..e6f536757 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -221,6 +221,45 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id) } break; } + if (op == TK_CONCAT) { + /* + * Despite the fact that procedure below + * is very similar to collation_check_compatability(), + * it is slightly different: when both + * operands have different implicit collations, + * derived collation should be "none", + * i.e. no collation is used at all + * (instead of raising error). + */ + bool is_lhs_forced; + uint32_t lhs_coll_id; + if (sql_expr_coll(parse, p->pLeft, &is_lhs_forced, + &lhs_coll_id) != 0) + return -1; + bool is_rhs_forced; + uint32_t rhs_coll_id; + if (sql_expr_coll(parse, p->pRight, &is_rhs_forced, + &rhs_coll_id) != 0) + return -1; + if (is_lhs_forced && is_rhs_forced) { + if (lhs_coll_id != rhs_coll_id) + return -1; + } + if (is_lhs_forced) { + *coll_id = lhs_coll_id; + *is_explicit_coll = true; + return 0; + } + if (is_rhs_forced) { + *coll_id = rhs_coll_id; + *is_explicit_coll = true; + return 0; + } + if (rhs_coll_id != lhs_coll_id) + return 0; + *coll_id = lhs_coll_id; + return 0; + } if (p->flags & EP_Collate) { if (p->pLeft && (p->pLeft->flags & EP_Collate) != 0) { p = p->pLeft; @@ -384,10 +423,14 @@ sql_binary_compare_coll_seq(Parse *parser, Expr *left, Expr *right) bool is_rhs_forced; uint32_t lhs_coll_id; uint32_t rhs_coll_id; - if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) + if (sql_expr_coll(parser, left, &is_lhs_forced, &lhs_coll_id) != 0) { + diag_set(ClientError, ER_ILLEGAL_COLLATION_MIX); goto err; - if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) + } + if (sql_expr_coll(parser, right, &is_rhs_forced, &rhs_coll_id) != 0) { + diag_set(ClientError, ER_ILLEGAL_COLLATION_MIX); goto err; + } uint32_t coll_id; if (collations_check_compatibility(lhs_coll_id, is_lhs_forced, rhs_coll_id, is_rhs_forced, diff --git a/test/sql/collation.result b/test/sql/collation.result index c69510fe7..a13738f49 100644 --- a/test/sql/collation.result +++ b/test/sql/collation.result @@ -325,3 +325,105 @@ box.sql.execute("DROP TABLE t1;") box.sql.execute("DROP TABLE t0;") --- ... +-- gh-3937: result of concatination has derived collation. +-- +box.sql.execute("CREATE TABLE t4a(a TEXT COLLATE \"unicode\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);") +--- +... +box.sql.execute("INSERT INTO t4a VALUES('ABC','abc',1);") +--- +... +box.sql.execute("INSERT INTO t4a VALUES('ghi','ghi',3);") +--- +... +-- Only LHS of concatenation has implicitly set collation. +-- Hence, no collation is used. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||'') = b;") +--- +- - [1] + - [3] +... +-- BINARY collation is forced for comparison operator as +-- a derivation from concatenation. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b;") +--- +- - [3] +... +-- Both operands of concatenation have explicit but different +-- collations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'' COLLATE \"unicode_ci\") = b;") +--- +- error: Illegal mix of collations +... +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b COLLATE \"unicode\";") +--- +- error: Illegal mix of collations +... +-- No collation is used since LHS and RHS of concatenation +-- operator have different implicit collations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||'')=(b||'');") +--- +- - [3] +... +box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);") +--- +- - [3] +... +box.sql.execute("CREATE TABLE t4b(a TEXT COLLATE \"unicode_ci\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);") +--- +... +box.sql.execute("INSERT INTO t4b VALUES('BCD','bcd',1);") +--- +... +box.sql.execute("INSERT INTO t4b VALUES('ghi','ghi',3);") +--- +... +-- Operands have the same implicit collation, so it is derived. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);") +--- +- - [3] +... +-- Couple of other possible combinations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b||a);") +--- +- - [3] +... +box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b COLLATE \"unicode_ci\"||a);") +--- +- error: Illegal mix of collations +... +box.sql.execute("INSERT INTO t4b VALUES('abc', 'xxx', 2);") +--- +... +box.sql.execute("INSERT INTO t4b VALUES('gHz', 'xxx', 4);") +--- +... +-- Results are sorted with case-insensitive order. +-- Otherwise capital latters come first. +-- +box.sql.execute("SELECT a FROM t4b ORDER BY a COLLATE \"unicode_ci\" || ''") +--- +- - ['abc'] + - ['BCD'] + - ['ghi'] + - ['gHz'] +... +box.sql.execute("SELECT a FROM t4b ORDER BY a || b") +--- +- - ['abc'] + - ['BCD'] + - ['ghi'] + - ['gHz'] +... +box.space.T4A:drop() +--- +... +box.space.T4B:drop() +--- +... diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua index 4ad2d5e50..8f1d57502 100644 --- a/test/sql/collation.test.lua +++ b/test/sql/collation.test.lua @@ -126,3 +126,49 @@ box.sql.execute("SELECT * FROM t1;") box.sql.execute("SELECT * FROM t0;") box.sql.execute("DROP TABLE t1;") box.sql.execute("DROP TABLE t0;") + +-- gh-3937: result of concatination has derived collation. +-- +box.sql.execute("CREATE TABLE t4a(a TEXT COLLATE \"unicode\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);") +box.sql.execute("INSERT INTO t4a VALUES('ABC','abc',1);") +box.sql.execute("INSERT INTO t4a VALUES('ghi','ghi',3);") +-- Only LHS of concatenation has implicitly set collation. +-- Hence, no collation is used. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||'') = b;") +-- BINARY collation is forced for comparison operator as +-- a derivation from concatenation. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b;") +-- Both operands of concatenation have explicit but different +-- collations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'' COLLATE \"unicode_ci\") = b;") +box.sql.execute("SELECT c FROM t4a WHERE (a COLLATE \"binary\"||'') = b COLLATE \"unicode\";") +-- No collation is used since LHS and RHS of concatenation +-- operator have different implicit collations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||'')=(b||'');") +box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);") + +box.sql.execute("CREATE TABLE t4b(a TEXT COLLATE \"unicode_ci\", b TEXT COLLATE \"unicode_ci\", c INT PRIMARY KEY);") +box.sql.execute("INSERT INTO t4b VALUES('BCD','bcd',1);") +box.sql.execute("INSERT INTO t4b VALUES('ghi','ghi',3);") +-- Operands have the same implicit collation, so it is derived. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||b)=(b||a);") +-- Couple of other possible combinations. +-- +box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b||a);") +box.sql.execute("SELECT c FROM t4a WHERE (a||b COLLATE \"binary\")=(b COLLATE \"unicode_ci\"||a);") + +box.sql.execute("INSERT INTO t4b VALUES('abc', 'xxx', 2);") +box.sql.execute("INSERT INTO t4b VALUES('gHz', 'xxx', 4);") +-- Results are sorted with case-insensitive order. +-- Otherwise capital latters come first. +-- +box.sql.execute("SELECT a FROM t4b ORDER BY a COLLATE \"unicode_ci\" || ''") +box.sql.execute("SELECT a FROM t4b ORDER BY a || b") + +box.space.T4A:drop() +box.space.T4B:drop() -- 2.15.1