[tarantool-patches] Re: [PATCH 2/2] sql: allow <COLLATE> only for string-like args

Roman Khabibov roman.habibov at tarantool.org
Fri Jun 7 18:01:59 MSK 2019


Hi! Thanks for the review.

> On Jun 4, 2019, at 10:49 PM, Vladislav Shpilevoy <v.shpilevoy at tarantool.org> wrote:
> 
> Hi!
> 
> tarantool> box.execute('SELECT TRUE AND TRUE COLLATE "binary";')
> ---
> - metadata:
>  - name: TRUE AND TRUE COLLATE "binary"
>    type: boolean
>  rows:
>  - [true]
> ...
> 
> Your previous solution was correct, you should check collation
> in CodeTemp. But somewhy you decided to remove it and keep only
> in 'TK_NOT' and 'TK_COLLATE'. Obviously, you can't add the check
> to each 'case' in that 'switch', and you need to check the collation
> for every token.
@ -4438,6 +4475,8 @@ sqlExprCodeAtInit(Parse * pParse,	/* Parsing context */
 int
 sqlExprCodeTemp(Parse * pParse, Expr * pExpr, int *pReg)
 {
+	if (pExpr->op == TK_COLLATE && check_collate_arg(pParse, pExpr) != 0)
+		return 0;
 	int r2;
 	pExpr = sqlExprSkipCollate(pExpr);
 	if (ConstFactorOk(pParse)

> Besides, Nikita, please, help us with the next comment.
> On 04/06/2019 17:27, Roman Khabibov wrote:
>> Hello, thanks for the review.
>> 
>>> On Jun 2, 2019, at 8:09 PM, Vladislav Shpilevoy <v.shpilevoy at tarantool.org> wrote:
>>> 
>>> Thanks for the fixes! See 8 comments below.
>>> 
>>>>>> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
>>>>>> index ba7eea59d..29e3386fa 100644
>>>>>> --- a/src/box/sql/expr.c
>>>>>> +++ b/src/box/sql/expr.c
>>>>>> @@ -4215,6 +4215,22 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
>>>>>> 		}
>>>>>> 	case TK_SPAN:
>>>>>> 	case TK_COLLATE:{
>>>>>> +			enum field_type type;
>>>>>> +			struct Expr *left = pExpr->pLeft;
>>>>>> +			if (left->op == TK_COLUMN) {
>>>>>> +				int col_num = left->iColumn;
>>>>>> +				type = left->space_def->fields[col_num].type;
>>>>>> +			} else
>>>>>> +				type = left->type;
>>>>>> +			if (left->op != TK_CONCAT &&
>>>>> 
>>>>> Why do you check for TK_CONCAT? Its type should be FIELD_TYPE_STRING.
>>>> Concatenations are rejected without this check.
>>> 
>>> 1. It is not a good answer, when you can't explain, why you need
>>> a line of code, but a program does not work without it. Please, investigate
>>> and explain, why do you check for both 'type == STRING' and 'op == CONCAT'.
>>> If an op is CONCAT, then type is already STRING, so the additional check
>>> for 'op' should not be necessary.
>> @@ -1060,6 +1094,8 @@ sqlPExpr(Parse * pParse,	/* Parsing context */
>> 		if (p) {
>> 			memset(p, 0, sizeof(Expr));
>> 			p->op = op & TKFLG_MASK;
>> +			if (op == TK_CONCAT)
>> +				p->type = FIELD_TYPE_STRING;
>> 			p->iAgg = -1;
>> 		}
>> 		sqlExprAttachSubtrees(pParse->db, p, pLeft, pRight);
>> 
> 
> I am 100% sure, that it is not a place for types setting. This
> function, and its comment says, only allocates. For complex types
> we have sql_expr_type(), we can use Expr.type for basic types
> located in tree lists only.
> 
> Nikita knows more about typing, he can help better.
@@ -4215,6 +4249,9 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
 		}
 	case TK_SPAN:
 	case TK_COLLATE:{
+			pExpr->pLeft->type = sql_expr_type(pExpr->pLeft);
+			if (check_collate_arg(pParse, pExpr) != 0)
+				break;
 			return sqlExprCodeTarget(pParse, pExpr->pLeft,
 						     target);
 		}

commit 1cceb4ffa3a8b5c513ea742bb243ae963bc78ea9
Author: Roman Khabibov <roman.habibov at tarantool.org>
Date:   Mon May 6 14:30:21 2019 +0300

    sql: allow <COLLATE> only for string-like args
    
    Before this patch, user could use COLLATE with non-string-like
    literals, columns or subquery results. Disallow such usage.
    
    Closes #3804

diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index ba7eea59d..bc6c1dcf3 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -197,6 +197,40 @@ sqlExprSkipCollate(Expr * pExpr)
 	return pExpr;
 }
 
+/*
+ * Check that left node of @a expr with the collation in the root
+ * can be used with <COLLATE>. If it is not, leave an error
+ * message in pParse.
+ *
+ * @param parse Parser context.
+ * @param expr Expression for checking.
+ *
+ * @retval 0 on success.
+ * @retval -1 on error.
+ */
+static int
+check_collate_arg(struct Parse *parse, struct Expr *expr)
+{
+	struct Expr *left = expr->pLeft;
+	while (left->op == TK_COLLATE)
+		left = left->pLeft;
+	enum field_type type;
+	if (left->op == TK_COLUMN) {
+		int col_num = left->iColumn;
+		type = left->space_def->fields[col_num].type;
+	} else {
+		type = left->type;
+	}
+	if (type != FIELD_TYPE_STRING && type != FIELD_TYPE_SCALAR) {
+		diag_set(ClientError, ER_SQL_PARSER_GENERIC,
+			 "COLLATE can't be used with "
+			 "non-string arguments");
+		parse->is_aborted = true;
+		return -1;
+	}
+	return 0;
+}
+
 int
 sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id,
 	      struct coll **coll)
@@ -4215,6 +4249,9 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
 		}
 	case TK_SPAN:
 	case TK_COLLATE:{
+			pExpr->pLeft->type = sql_expr_type(pExpr->pLeft);
+			if (check_collate_arg(pParse, pExpr) != 0)
+				break;
 			return sqlExprCodeTarget(pParse, pExpr->pLeft,
 						     target);
 		}
@@ -4438,6 +4475,8 @@ sqlExprCodeAtInit(Parse * pParse,	/* Parsing context */
 int
 sqlExprCodeTemp(Parse * pParse, Expr * pExpr, int *pReg)
 {
+	if (pExpr->op == TK_COLLATE && check_collate_arg(pParse, pExpr) != 0)
+		return 0;
 	int r2;
 	pExpr = sqlExprSkipCollate(pExpr);
 	if (ConstFactorOk(pParse)
diff --git a/test/sql-tap/collation.test.lua b/test/sql-tap/collation.test.lua
index 9d0076e1d..a15cf0756 100755
--- a/test/sql-tap/collation.test.lua
+++ b/test/sql-tap/collation.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(177)
+test:plan(191)
 
 local prefix = "collation-"
 
@@ -546,4 +546,77 @@ test:do_execsql_test(
     [[ SELECT s COLLATE "unicode_ci" FROM a ORDER BY s COLLATE "unicode" ]],
     {"b","B"})
 
+-- gh-3805 Check COLLATE passing with string-like args only.
+
+test:do_execsql_test(
+    "collation-2.7.0",
+    [[ CREATE TABLE test1 (one INT PRIMARY KEY, two INT) ]],
+    {})
+
+test:do_catchsql_test(
+    "collation-2.7.1",
+    'SELECT one COLLATE BINARY FROM test1',
+    {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+    "collation-2.7.2",
+    'SELECT one COLLATE "unicode_ci" FROM test1',
+    {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+    "collation-2.7.3",
+    'SELECT two COLLATE BINARY FROM test1',
+    {1, "COLLATE can't be used with non-string arguments"})
+
+
+test:do_catchsql_test(
+    "collation-2.7.4",
+    'SELECT (one + two) COLLATE BINARY FROM test1',
+    {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+    "collation-2.7.5",
+    'SELECT (SELECT one FROM test1) COLLATE BINARY',
+    {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_execsql_test(
+    "collation-2.7.6",
+    'SELECT TRIM(\'A\') COLLATE BINARY',
+    {"A"})
+
+test:do_catchsql_test(
+    "collation-2.7.7",
+    'SELECT RANDOM() COLLATE BINARY',
+    {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+    "collation-2.7.8",
+    'SELECT LENGTH(\'A\') COLLATE BINARY',
+    {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+    "collation-2.7.9",
+    'SELECT TRUE COLLATE \"unicode\"',
+    {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+    "collation-2.7.10",
+    'SELECT NOT TRUE COLLATE \"unicode\"',
+    {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+    "collation-2.7.11",
+    'SELECT TRUE AND TRUE COLLATE \"unicode\"',
+    {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+    "collation-2.7.12",
+    'SELECT 1 | 1 COLLATE \"unicode\"',
+    {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+    "collation-2.7.14",
+    'SELECT +\'str\' COLLATE \"unicode\"',
+    {0,{"str"}})
+
 test:finish_test()
diff --git a/test/sql-tap/distinct.test.lua b/test/sql-tap/distinct.test.lua
index e6970084e..ae35a0db5 100755
--- a/test/sql-tap/distinct.test.lua
+++ b/test/sql-tap/distinct.test.lua
@@ -121,12 +121,12 @@ local data = {
     {"12.1", 0, "SELECT DISTINCT a, d FROM t1"},
     {"12.2", 0, "SELECT DISTINCT a, d FROM t4"},
     {"13.1", 0, "SELECT DISTINCT a, b, c COLLATE \"unicode_ci\" FROM t1"},
-    {"13.2", 0, "SELECT DISTINCT a, b, c COLLATE \"unicode_ci\" FROM t4"},
+    {"13.2", 1, "SELECT DISTINCT a, b, c FROM t4"},
     {"14.1", 0, "SELECT DISTINCT a, d COLLATE \"unicode_ci\" FROM t1"},
     {"14.2", 1, "SELECT DISTINCT a, d COLLATE \"unicode_ci\" FROM t4"},
     {"15 ", 0, "SELECT DISTINCT a, d COLLATE \"binary\" FROM t1"},
     {"16.1", 0, "SELECT DISTINCT a, b, c COLLATE \"binary\" FROM t1"},
-    {"16.2", 0, "SELECT DISTINCT a, b, c COLLATE \"binary\" FROM t4"},
+    {"16.2", 1, "SELECT DISTINCT a, b, c FROM t4"},
     {"17",  0,   --{ \/* Technically, it would be possible to detect that DISTINCT\n            ** is a no-op in cases like the following. But sql does not\n            ** do so. *\/\n
     "SELECT DISTINCT t1.id FROM t1, t2 WHERE t1.id=t2.x" },
     {"18 ", 1, "SELECT DISTINCT c1, c2 FROM t3"},
@@ -173,7 +173,7 @@ data = {
     {"a, b, c FROM t1", {"btree"}, {"A", "B", "C", "a", "b", "c"}},
     {"a, b, c FROM t1 ORDER BY a, b, c", {"btree"}, {"A", "B", "C", "a", "b", "c"}},
     {"b FROM t1 WHERE a = 'a'", {}, {"b"}},
-    {"b FROM t1 ORDER BY +b COLLATE \"binary\"", {"btree", "btree"}, {"B", "b"}},
+    {"b FROM t1 ORDER BY b COLLATE \"binary\"", {"btree", "btree"}, {"B", "b"}},
     {"a FROM t1", {}, {"A", "a"}},
     {"b COLLATE \"unicode_ci\" FROM t1", {}, {"b"}},
     {"b COLLATE \"unicode_ci\" FROM t1 ORDER BY b COLLATE \"unicode_ci\"", {}, {"b"}},
diff --git a/test/sql-tap/identifier_case.test.lua b/test/sql-tap/identifier_case.test.lua
index 4db729f11..65ed9aea1 100755
--- a/test/sql-tap/identifier_case.test.lua
+++ b/test/sql-tap/identifier_case.test.lua
@@ -166,7 +166,7 @@ test:do_execsql_test(
 
 test:do_execsql_test(
     test_prefix.."4.1",
-    string.format([[select * from table1 order by a collate "unicode_ci"]]),
+    string.format([[select * from table1 order by a]]),
     {}
 )
 
diff --git a/test/sql-tap/in3.test.lua b/test/sql-tap/in3.test.lua
index 1ca6a6446..1fdee16b7 100755
--- a/test/sql-tap/in3.test.lua
+++ b/test/sql-tap/in3.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(27)
+test:plan(25)
 
 --!./tcltestrunner.lua
 -- 2007 November 29
@@ -186,33 +186,6 @@ test:do_test(
         -- </in3-1.13>
     })
 
-
-
--- The first of these queries has to use the temp-table, because the 
--- collation sequence used for the index on "t1.a" does not match the
--- collation sequence used by the "IN" comparison. The second does not
--- require a temp-table, because the collation sequences match.
---
-test:do_test(
-    "in3-1.14",
-    function()
-        return exec_neph(" SELECT a FROM t1 WHERE a COLLATE \"unicode_ci\" IN (SELECT a FROM t1) ")
-    end, {
-        -- <in3-1.14>
-        1, 1, 3, 5
-        -- </in3-1.14>
-    })
-
-test:do_test(
-    "in3-1.15",
-    function()
-        return exec_neph(" SELECT a FROM t1 WHERE a COLLATE \"binary\" IN (SELECT a FROM t1) ")
-    end, {
-        -- <in3-1.15>
-        1, 1, 3, 5
-        -- </in3-1.15>
-    })
-
 -- Neither of these queries require a temp-table. The collation sequence
 -- makes no difference when using a rowid.
 --
diff --git a/test/sql-tap/resolver01.test.lua b/test/sql-tap/resolver01.test.lua
index 9fcf0c7c0..315c892ac 100755
--- a/test/sql-tap/resolver01.test.lua
+++ b/test/sql-tap/resolver01.test.lua
@@ -106,7 +106,7 @@ test:do_catchsql_test(
 test:do_catchsql_test(
     "resolver01-2.1",
     [[
-        SELECT 2 AS y FROM t1, t2 ORDER BY y COLLATE "unicode_ci";
+        SELECT 2 AS y FROM t1, t2 ORDER BY y;
     ]], {
         -- <resolver01-2.1>
         0, {2}
@@ -116,7 +116,7 @@ test:do_catchsql_test(
 test:do_catchsql_test(
     "resolver01-2.2",
     [[
-        SELECT 2 AS yy FROM t1, t2 ORDER BY y COLLATE "unicode_ci";
+        SELECT 2 AS yy FROM t1, t2 ORDER BY y;
     ]], {
         -- <resolver01-2.2>
         1, "ambiguous column name: Y"
@@ -126,7 +126,7 @@ test:do_catchsql_test(
 test:do_catchsql_test(
     "resolver01-2.3",
     [[
-        SELECT x AS y FROM t3 ORDER BY y COLLATE "unicode_ci";
+        SELECT x AS y FROM t3 ORDER BY y;
     ]], {
         -- <resolver01-2.3>
         0, {11, 33}
@@ -136,7 +136,7 @@ test:do_catchsql_test(
 test:do_catchsql_test(
     "resolver01-2.4",
     [[
-        SELECT x AS yy FROM t3 ORDER BY y COLLATE "unicode_ci";
+        SELECT x AS yy FROM t3 ORDER BY y;
     ]], {
         -- <resolver01-2.4>
         0, {33, 11}
@@ -146,7 +146,7 @@ test:do_catchsql_test(
 test:do_catchsql_test(
     "resolver01-2.5",
     [[
-        SELECT x AS yy FROM t3 ORDER BY yy COLLATE "unicode_ci";
+        SELECT x AS yy FROM t3 ORDER BY yy;
     ]], {
         -- <resolver01-2.5>
         0, {11, 33}
@@ -156,7 +156,7 @@ test:do_catchsql_test(
 test:do_catchsql_test(
     "resolver01-2.6",
     [[
-        SELECT x AS yy FROM t3 ORDER BY 1 COLLATE "unicode_ci";
+        SELECT x AS yy FROM t3 ORDER BY 1;
     ]], {
         -- <resolver01-2.6>
         0, {11, 33}
diff --git a/test/sql-tap/select1.test.lua b/test/sql-tap/select1.test.lua
index 6beeb34cb..6811f7dcb 100755
--- a/test/sql-tap/select1.test.lua
+++ b/test/sql-tap/select1.test.lua
@@ -1672,7 +1672,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "select1-10.7",
     [[
-        SELECT f1 COLLATE "unicode_ci" AS x FROM test1 ORDER BY x
+        SELECT f1 AS x FROM test1 ORDER BY x
     ]], {
         -- <select1-10.7>
         11, 33
diff --git a/test/sql-tap/tkt-b75a9ca6b0.test.lua b/test/sql-tap/tkt-b75a9ca6b0.test.lua
index ea684a73d..6740cd03a 100755
--- a/test/sql-tap/tkt-b75a9ca6b0.test.lua
+++ b/test/sql-tap/tkt-b75a9ca6b0.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(22)
+test:plan(20)
 
 --!./tcltestrunner.lua
 -- 2014-04-21
@@ -57,7 +57,6 @@ local eqps = {
     {"SELECT x,y FROM t1 GROUP BY x, y ORDER BY x, y DESC", {1, 3, 2, 2, 3, 1}, {idxscan, sort}},
     {"SELECT x,y FROM t1 GROUP BY x, y ORDER BY x DESC, y DESC", {3, 1, 2, 2, 1, 3}, {idxscan, sort}},
     {"SELECT x,y FROM t1 GROUP BY x, y ORDER BY x ASC, y ASC", {1, 3, 2, 2, 3, 1}, {idxscan}},
-    {"SELECT x,y FROM t1 GROUP BY x, y ORDER BY x COLLATE \"unicode_ci\", y", {1, 3, 2, 2, 3, 1}, {idxscan, sort}},
 }
 for tn, val in ipairs(eqps) do
     local q = val[1]
diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua
index ec45e5e76..6985c589e 100755
--- a/test/sql-tap/with1.test.lua
+++ b/test/sql-tap/with1.test.lua
@@ -1043,7 +1043,7 @@ test:do_catchsql_test(13.3, [[
 -- 2015-04-12
 --
 test:do_execsql_test(14.1, [[
-  WITH x AS (SELECT * FROM t) SELECT 0 EXCEPT SELECT 0 ORDER BY 1 COLLATE "binary";
+  WITH x AS (SELECT * FROM t) SELECT 0 EXCEPT SELECT 0 ORDER BY 1;
 ]], {
   -- <14.1>
   






More information about the Tarantool-patches mailing list