[tarantool-patches] Re: [PATCH 2/2] sql: allow <COLLATE> only for string-like args
Roman Khabibov
roman.habibov at tarantool.org
Tue Jun 4 17:27:20 MSK 2019
Hello, thanks for the review.
> On Jun 2, 2019, at 8:09 PM, Vladislav Shpilevoy <v.shpilevoy at tarantool.org> wrote:
>
> Thanks for the fixes! See 8 comments below.
>
>>>> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
>>>> index ba7eea59d..29e3386fa 100644
>>>> --- a/src/box/sql/expr.c
>>>> +++ b/src/box/sql/expr.c
>>>> @@ -4215,6 +4215,22 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
>>>> }
>>>> case TK_SPAN:
>>>> case TK_COLLATE:{
>>>> + enum field_type type;
>>>> + struct Expr *left = pExpr->pLeft;
>>>> + if (left->op == TK_COLUMN) {
>>>> + int col_num = left->iColumn;
>>>> + type = left->space_def->fields[col_num].type;
>>>> + } else
>>>> + type = left->type;
>>>> + if (left->op != TK_CONCAT &&
>>>
>>> Why do you check for TK_CONCAT? Its type should be FIELD_TYPE_STRING.
>> Concatenations are rejected without this check.
>
> 1. It is not a good answer, when you can't explain, why you need
> a line of code, but a program does not work without it. Please, investigate
> and explain, why do you check for both 'type == STRING' and 'op == CONCAT'.
> If an op is CONCAT, then type is already STRING, so the additional check
> for 'op' should not be necessary.
@@ -1060,6 +1094,8 @@ sqlPExpr(Parse * pParse, /* Parsing context */
if (p) {
memset(p, 0, sizeof(Expr));
p->op = op & TKFLG_MASK;
+ if (op == TK_CONCAT)
+ p->type = FIELD_TYPE_STRING;
p->iAgg = -1;
}
sqlExprAttachSubtrees(pParse->db, p, pLeft, pRight);
>>
>> commit 5feaa9330f2c642fc16b479383b1c5cac96c28cc
>> Author: Roman Khabibov <roman.habibov at tarantool.org>
>> Date: Mon May 6 14:30:21 2019 +0300
>>
>> sql: allow <COLLATE> only for string-like args
>>
>> Before this patch, user could use COLLATE with non-string-like
>> literals, columns or subquery results. Disallow such usage.
>>
>> Closes #3804
>>
>> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
>> index ba7eea59d..e558cbb18 100644
>> --- a/src/box/sql/expr.c
>> +++ b/src/box/sql/expr.c
>> @@ -197,6 +197,40 @@ sqlExprSkipCollate(Expr * pExpr)
>> return pExpr;
>> }
>>
>> +/*
>> + * Check that left node of @expr with the collation in the root
>
> 2. '@expr' is an invalid Doxygen command. If you want to reference
> a parameter, use <@a parameter_name>. See Doxygen documentation.
>
>> + * can be used with <COLLATE>. If it is not, leave an error
>> + * message in pParse.
>
> 3. There is no argument 'pParse'.
>
>> + *
>> + * @param pParse Parser context.
>> + * @param expr Expression for checking.
>> + *
>> + * @retval 0 on success.
>> + * @retval -1 on error.
>> + */
>> +static int
>> +check_collate_arg(Parse *parse, Expr *expr)
>
> 4. We use 'struct' prefix in all new SQL code.
>
>> +{
>> + struct Expr *left = expr->pLeft;
>> + while (left->op == TK_COLLATE)
>> + left = left->pLeft;
>> + enum field_type type;
>> + if (left->op == TK_COLUMN) {
>> + int col_num = left->iColumn;
>> + type = left->space_def->fields[col_num].type;
>> + } else
>> + type = left->type;
>
> 5. We always either use '{}' for both 'if-else' parts,
> or do not use it at all.
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index ba7eea59d..d5bc733c3 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -197,6 +197,40 @@ sqlExprSkipCollate(Expr * pExpr)
return pExpr;
}
+/*
+ * Check that left node of @a expr with the collation in the root
+ * can be used with <COLLATE>. If it is not, leave an error
+ * message in pParse.
+ *
+ * @param parse Parser context.
+ * @param expr Expression for checking.
+ *
+ * @retval 0 on success.
+ * @retval -1 on error.
+ */
+static int
+check_collate_arg(struct Parse *parse, struct Expr *expr)
+{
+ struct Expr *left = expr->pLeft;
+ while (left->op == TK_COLLATE)
+ left = left->pLeft;
+ enum field_type type;
+ if (left->op == TK_COLUMN) {
+ int col_num = left->iColumn;
+ type = left->space_def->fields[col_num].type;
+ } else {
+ type = left->type;
+ }
+ if (type != FIELD_TYPE_STRING && type != FIELD_TYPE_SCALAR) {
+ diag_set(ClientError, ER_SQL_PARSER_GENERIC,
+ "COLLATE can't be used with "
+ "non-string arguments");
+ parse->is_aborted = true;
+ return -1;
+ }
+ return 0;
+}
+
>> + if (left->op != TK_CONCAT && type != FIELD_TYPE_STRING &&
>> + type != FIELD_TYPE_SCALAR) {
>> + diag_set(ClientError, ER_SQL_PARSER_GENERIC,
>> + "COLLATE can't be used with "
>> + "non-string arguments");
>> + parse->is_aborted = true;
>> + return -1;
>> + }
>> + return 0;
>> @@ -3935,6 +3969,9 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
>> testcase(op == TK_BITNOT);
>> assert(TK_NOT == OP_Not);
>> testcase(op == TK_NOT);
>> + if (pExpr->pLeft->op == TK_COLLATE &&
>> + check_collate_arg(pParse, pExpr->pLeft) != 0)
>> + break;
>> r1 = sqlExprCodeTemp(pParse, pExpr->pLeft,
>> ®Free1);
>
> 6. Why do you check for COLLATE before CodeTemp()? This function
> does exactly the same check.
Forgot to remove it.
>> testcase(regFree1 == 0);
>> diff --git a/test/sql-tap/e_select1.test.lua b/test/sql-tap/e_select1.test.lua
>> index c4724e636..566f2e723 100755
>> --- a/test/sql-tap/e_select1.test.lua
>> +++ b/test/sql-tap/e_select1.test.lua
>> @@ -1938,7 +1938,7 @@ test:do_execsql_test(
>> test:do_execsql_test(
>> "e_select-8.9.2",
>> [[
>> - SELECT x COLLATE "binary" FROM d4 ORDER BY 1 COLLATE "unicode_ci"
>> + SELECT x COLLATE "unicode_ci" FROM d4 ORDER BY 1
>
> 7. Why? The old test should pass as well. '1' here is not a value, but an
> index of a selected column.
Removed.
>> ]], {
>> -- <e_select-8.9.2>
>> "abc", "DEF", "ghi", "JKL"
>> diff --git a/test/sql-tap/selectE.test.lua b/test/sql-tap/selectE.test.lua
>> index 32c3d6a06..16075d38e 100755
>> --- a/test/sql-tap/selectE.test.lua
>> +++ b/test/sql-tap/selectE.test.lua
>> @@ -57,8 +57,7 @@ test:do_test(
>> CREATE TABLE t3(a TEXT primary key);
>> INSERT INTO t3 VALUES('def'),('jkl');
>>
>> - SELECT a FROM t1 EXCEPT SELECT a FROM t2
>> - ORDER BY a COLLATE "unicode_ci";
>> + SELECT a FROM t1 EXCEPT SELECT a FROM t2 ORDER BY a COLLATE "unicode_ci";
>> ]]
>> end, {
>> -- <selectE-1.0>
>> @@ -70,8 +69,7 @@ test:do_test(
>> "selectE-1.1",
>> function()
>> return test:execsql [[
>> - SELECT a FROM t2 EXCEPT SELECT a FROM t3
>> - ORDER BY a COLLATE "unicode_ci";
>> + SELECT a FROM t2 EXCEPT SELECT a FROM t3 ORDER BY a COLLATE "unicode_ci";
>> ]]
>> end, {
>> -- <selectE-1.1>
>> @@ -83,8 +81,7 @@ test:do_test(
>> "selectE-1.2",
>> function()
>> return test:execsql [[
>> - SELECT a FROM t2 EXCEPT SELECT a FROM t3
>> - ORDER BY a COLLATE "binary";
>> + SELECT a FROM t2 EXCEPT SELECT a FROM t3 ORDER BY a COLLATE "binary";
>> ]]
>> end, {
>> -- <selectE-1.2>
>> @@ -96,8 +93,7 @@ test:do_test(
>> "selectE-1.3",
>> function()
>> return test:execsql [[
>> - SELECT a FROM t2 EXCEPT SELECT a FROM t3
>> - ORDER BY a;
>> + SELECT a FROM t2 EXCEPT SELECT a FROM t3 ORDER BY a;
>> ]]
>> end, {
>> -- <selectE-1.3>
>> @@ -113,8 +109,7 @@ test:do_test(
>> DELETE FROM t3;
>> INSERT INTO t2 VALUES('ABC'),('def'),('GHI'),('jkl');
>> INSERT INTO t3 SELECT lower(a) FROM t2;
>> - SELECT a COLLATE "unicode_ci" FROM t2 EXCEPT SELECT a FROM t3
>> - ORDER BY 1
>> + SELECT a COLLATE "unicode_ci" FROM t2 EXCEPT SELECT a FROM t3 ORDER BY 1
>> ]]
>> end, {
>
> 8. Why do you need changes in this file? You just made multiline requests
> one line.
Removed.
commit 879f3c6716a7414247ff4902bdc9117531c47ef6
Author: Roman Khabibov <roman.habibov at tarantool.org>
Date: Mon May 6 14:30:21 2019 +0300
sql: allow <COLLATE> only for string-like args
Before this patch, user could use COLLATE with non-string-like
literals, columns or subquery results. Disallow such usage.
Closes #3804
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index ba7eea59d..d5bc733c3 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -197,6 +197,40 @@ sqlExprSkipCollate(Expr * pExpr)
return pExpr;
}
+/*
+ * Check that left node of @a expr with the collation in the root
+ * can be used with <COLLATE>. If it is not, leave an error
+ * message in pParse.
+ *
+ * @param parse Parser context.
+ * @param expr Expression for checking.
+ *
+ * @retval 0 on success.
+ * @retval -1 on error.
+ */
+static int
+check_collate_arg(struct Parse *parse, struct Expr *expr)
+{
+ struct Expr *left = expr->pLeft;
+ while (left->op == TK_COLLATE)
+ left = left->pLeft;
+ enum field_type type;
+ if (left->op == TK_COLUMN) {
+ int col_num = left->iColumn;
+ type = left->space_def->fields[col_num].type;
+ } else {
+ type = left->type;
+ }
+ if (type != FIELD_TYPE_STRING && type != FIELD_TYPE_SCALAR) {
+ diag_set(ClientError, ER_SQL_PARSER_GENERIC,
+ "COLLATE can't be used with "
+ "non-string arguments");
+ parse->is_aborted = true;
+ return -1;
+ }
+ return 0;
+}
+
int
sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id,
struct coll **coll)
@@ -1060,6 +1094,8 @@ sqlPExpr(Parse * pParse, /* Parsing context */
if (p) {
memset(p, 0, sizeof(Expr));
p->op = op & TKFLG_MASK;
+ if (op == TK_CONCAT)
+ p->type = FIELD_TYPE_STRING;
p->iAgg = -1;
}
sqlExprAttachSubtrees(pParse->db, p, pLeft, pRight);
@@ -3935,6 +3971,9 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
testcase(op == TK_BITNOT);
assert(TK_NOT == OP_Not);
testcase(op == TK_NOT);
+ if (pExpr->pLeft->op == TK_COLLATE &&
+ check_collate_arg(pParse, pExpr->pLeft) != 0)
+ break;
r1 = sqlExprCodeTemp(pParse, pExpr->pLeft,
®Free1);
testcase(regFree1 == 0);
@@ -4215,6 +4254,8 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
}
case TK_SPAN:
case TK_COLLATE:{
+ if (check_collate_arg(pParse, pExpr) != 0)
+ break;
return sqlExprCodeTarget(pParse, pExpr->pLeft,
target);
}
diff --git a/test/sql-tap/collation.test.lua b/test/sql-tap/collation.test.lua
index 9d0076e1d..dc2a0b8ef 100755
--- a/test/sql-tap/collation.test.lua
+++ b/test/sql-tap/collation.test.lua
@@ -1,6 +1,6 @@
#!/usr/bin/env tarantool
test = require("sqltester")
-test:plan(177)
+test:plan(188)
local prefix = "collation-"
@@ -546,4 +546,63 @@ test:do_execsql_test(
[[ SELECT s COLLATE "unicode_ci" FROM a ORDER BY s COLLATE "unicode" ]],
{"b","B"})
+-- gh-3805 Check COLLATE passing with string-like args only.
+
+test:do_execsql_test(
+ "collation-2.7.0",
+ [[ CREATE TABLE test1 (one INT PRIMARY KEY, two INT) ]],
+ {})
+
+test:do_catchsql_test(
+ "collation-2.7.1",
+ 'SELECT one COLLATE BINARY FROM test1',
+ {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+ "collation-2.7.2",
+ 'SELECT one COLLATE "unicode_ci" FROM test1',
+ {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+ "collation-2.7.3",
+ 'SELECT two COLLATE BINARY FROM test1',
+ {1, "COLLATE can't be used with non-string arguments"})
+
+
+test:do_catchsql_test(
+ "collation-2.7.4",
+ 'SELECT (one + two) COLLATE BINARY FROM test1',
+ {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+ "collation-2.7.5",
+ 'SELECT (SELECT one FROM test1) COLLATE BINARY',
+ {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_execsql_test(
+ "collation-2.7.6",
+ 'SELECT TRIM(\'A\') COLLATE BINARY',
+ {"A"})
+
+test:do_catchsql_test(
+ "collation-2.7.7",
+ 'SELECT RANDOM() COLLATE BINARY',
+ {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+ "collation-2.7.8",
+ 'SELECT LENGTH(\'A\') COLLATE BINARY',
+ {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+ "collation-2.7.9",
+ 'SELECT TRUE COLLATE \"unicode\"',
+ {1, "COLLATE can't be used with non-string arguments"})
+
+test:do_catchsql_test(
+ "collation-2.7.10",
+ 'SELECT NOT TRUE COLLATE \"unicode\"',
+ {1, "COLLATE can't be used with non-string arguments"})
+
+
test:finish_test()
diff --git a/test/sql-tap/distinct.test.lua b/test/sql-tap/distinct.test.lua
index e6970084e..ae35a0db5 100755
--- a/test/sql-tap/distinct.test.lua
+++ b/test/sql-tap/distinct.test.lua
@@ -121,12 +121,12 @@ local data = {
{"12.1", 0, "SELECT DISTINCT a, d FROM t1"},
{"12.2", 0, "SELECT DISTINCT a, d FROM t4"},
{"13.1", 0, "SELECT DISTINCT a, b, c COLLATE \"unicode_ci\" FROM t1"},
- {"13.2", 0, "SELECT DISTINCT a, b, c COLLATE \"unicode_ci\" FROM t4"},
+ {"13.2", 1, "SELECT DISTINCT a, b, c FROM t4"},
{"14.1", 0, "SELECT DISTINCT a, d COLLATE \"unicode_ci\" FROM t1"},
{"14.2", 1, "SELECT DISTINCT a, d COLLATE \"unicode_ci\" FROM t4"},
{"15 ", 0, "SELECT DISTINCT a, d COLLATE \"binary\" FROM t1"},
{"16.1", 0, "SELECT DISTINCT a, b, c COLLATE \"binary\" FROM t1"},
- {"16.2", 0, "SELECT DISTINCT a, b, c COLLATE \"binary\" FROM t4"},
+ {"16.2", 1, "SELECT DISTINCT a, b, c FROM t4"},
{"17", 0, --{ \/* Technically, it would be possible to detect that DISTINCT\n ** is a no-op in cases like the following. But sql does not\n ** do so. *\/\n
"SELECT DISTINCT t1.id FROM t1, t2 WHERE t1.id=t2.x" },
{"18 ", 1, "SELECT DISTINCT c1, c2 FROM t3"},
@@ -173,7 +173,7 @@ data = {
{"a, b, c FROM t1", {"btree"}, {"A", "B", "C", "a", "b", "c"}},
{"a, b, c FROM t1 ORDER BY a, b, c", {"btree"}, {"A", "B", "C", "a", "b", "c"}},
{"b FROM t1 WHERE a = 'a'", {}, {"b"}},
- {"b FROM t1 ORDER BY +b COLLATE \"binary\"", {"btree", "btree"}, {"B", "b"}},
+ {"b FROM t1 ORDER BY b COLLATE \"binary\"", {"btree", "btree"}, {"B", "b"}},
{"a FROM t1", {}, {"A", "a"}},
{"b COLLATE \"unicode_ci\" FROM t1", {}, {"b"}},
{"b COLLATE \"unicode_ci\" FROM t1 ORDER BY b COLLATE \"unicode_ci\"", {}, {"b"}},
diff --git a/test/sql-tap/identifier_case.test.lua b/test/sql-tap/identifier_case.test.lua
index 4db729f11..65ed9aea1 100755
--- a/test/sql-tap/identifier_case.test.lua
+++ b/test/sql-tap/identifier_case.test.lua
@@ -166,7 +166,7 @@ test:do_execsql_test(
test:do_execsql_test(
test_prefix.."4.1",
- string.format([[select * from table1 order by a collate "unicode_ci"]]),
+ string.format([[select * from table1 order by a]]),
{}
)
diff --git a/test/sql-tap/in3.test.lua b/test/sql-tap/in3.test.lua
index 1ca6a6446..1fdee16b7 100755
--- a/test/sql-tap/in3.test.lua
+++ b/test/sql-tap/in3.test.lua
@@ -1,6 +1,6 @@
#!/usr/bin/env tarantool
test = require("sqltester")
-test:plan(27)
+test:plan(25)
--!./tcltestrunner.lua
-- 2007 November 29
@@ -186,33 +186,6 @@ test:do_test(
-- </in3-1.13>
})
-
-
--- The first of these queries has to use the temp-table, because the
--- collation sequence used for the index on "t1.a" does not match the
--- collation sequence used by the "IN" comparison. The second does not
--- require a temp-table, because the collation sequences match.
---
-test:do_test(
- "in3-1.14",
- function()
- return exec_neph(" SELECT a FROM t1 WHERE a COLLATE \"unicode_ci\" IN (SELECT a FROM t1) ")
- end, {
- -- <in3-1.14>
- 1, 1, 3, 5
- -- </in3-1.14>
- })
-
-test:do_test(
- "in3-1.15",
- function()
- return exec_neph(" SELECT a FROM t1 WHERE a COLLATE \"binary\" IN (SELECT a FROM t1) ")
- end, {
- -- <in3-1.15>
- 1, 1, 3, 5
- -- </in3-1.15>
- })
-
-- Neither of these queries require a temp-table. The collation sequence
-- makes no difference when using a rowid.
--
diff --git a/test/sql-tap/resolver01.test.lua b/test/sql-tap/resolver01.test.lua
index 9fcf0c7c0..315c892ac 100755
--- a/test/sql-tap/resolver01.test.lua
+++ b/test/sql-tap/resolver01.test.lua
@@ -106,7 +106,7 @@ test:do_catchsql_test(
test:do_catchsql_test(
"resolver01-2.1",
[[
- SELECT 2 AS y FROM t1, t2 ORDER BY y COLLATE "unicode_ci";
+ SELECT 2 AS y FROM t1, t2 ORDER BY y;
]], {
-- <resolver01-2.1>
0, {2}
@@ -116,7 +116,7 @@ test:do_catchsql_test(
test:do_catchsql_test(
"resolver01-2.2",
[[
- SELECT 2 AS yy FROM t1, t2 ORDER BY y COLLATE "unicode_ci";
+ SELECT 2 AS yy FROM t1, t2 ORDER BY y;
]], {
-- <resolver01-2.2>
1, "ambiguous column name: Y"
@@ -126,7 +126,7 @@ test:do_catchsql_test(
test:do_catchsql_test(
"resolver01-2.3",
[[
- SELECT x AS y FROM t3 ORDER BY y COLLATE "unicode_ci";
+ SELECT x AS y FROM t3 ORDER BY y;
]], {
-- <resolver01-2.3>
0, {11, 33}
@@ -136,7 +136,7 @@ test:do_catchsql_test(
test:do_catchsql_test(
"resolver01-2.4",
[[
- SELECT x AS yy FROM t3 ORDER BY y COLLATE "unicode_ci";
+ SELECT x AS yy FROM t3 ORDER BY y;
]], {
-- <resolver01-2.4>
0, {33, 11}
@@ -146,7 +146,7 @@ test:do_catchsql_test(
test:do_catchsql_test(
"resolver01-2.5",
[[
- SELECT x AS yy FROM t3 ORDER BY yy COLLATE "unicode_ci";
+ SELECT x AS yy FROM t3 ORDER BY yy;
]], {
-- <resolver01-2.5>
0, {11, 33}
@@ -156,7 +156,7 @@ test:do_catchsql_test(
test:do_catchsql_test(
"resolver01-2.6",
[[
- SELECT x AS yy FROM t3 ORDER BY 1 COLLATE "unicode_ci";
+ SELECT x AS yy FROM t3 ORDER BY 1;
]], {
-- <resolver01-2.6>
0, {11, 33}
diff --git a/test/sql-tap/select1.test.lua b/test/sql-tap/select1.test.lua
index 6beeb34cb..6811f7dcb 100755
--- a/test/sql-tap/select1.test.lua
+++ b/test/sql-tap/select1.test.lua
@@ -1672,7 +1672,7 @@ test:do_execsql_test(
test:do_execsql_test(
"select1-10.7",
[[
- SELECT f1 COLLATE "unicode_ci" AS x FROM test1 ORDER BY x
+ SELECT f1 AS x FROM test1 ORDER BY x
]], {
-- <select1-10.7>
11, 33
diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua
index ec45e5e76..6985c589e 100755
--- a/test/sql-tap/with1.test.lua
+++ b/test/sql-tap/with1.test.lua
@@ -1043,7 +1043,7 @@ test:do_catchsql_test(13.3, [[
-- 2015-04-12
--
test:do_execsql_test(14.1, [[
- WITH x AS (SELECT * FROM t) SELECT 0 EXCEPT SELECT 0 ORDER BY 1 COLLATE "binary";
+ WITH x AS (SELECT * FROM t) SELECT 0 EXCEPT SELECT 0 ORDER BY 1;
]], {
-- <14.1>
More information about the Tarantool-patches
mailing list