[tarantool-patches] Re: [PATCH 1/2] sql: add better collation determination in functions
i.koptelov
ivan.koptelov at tarantool.org
Wed Mar 27 16:38:47 MSK 2019
> On 25 Mar 2019, at 22:26, n.pettik <korablev at tarantool.org> wrote:
>
>
>
>> On 20 Mar 2019, at 14:11, Ivan Koptelov <ivan.koptelov at tarantool.org> wrote:
>>
>> Before the patch determination of collation in SQL functions
>> (used only in instr) was too narrow: the arguments were scanned
>> from left to right, till the argument with collation was
>> encountered, then its collation was used.
>> Now every arguments collation is considered. The right collation
>> which would be used in function is determined using ANSI
>> compatibility rules ("type combination" rules).
>>
>> Note: currently only instr() a.k.a position() uses mechanism
>> described above, other functions (except aggregate) simply
>> ignores collations.
>
> That’s not true: I see that min-max aggregates also feature this flag:
>
> FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
>
> Fourth param indicates whether SQL_FUNC_NEEDCOLL is set or not.
Oh, sorry for that. Fixed commit message.
>> ---
>> src/box/sql/expr.c | 69 ++++++++++++++++++++++++++++++++++++++++----
>> src/box/sql/sqlInt.h | 8 ++++-
>> 2 files changed, 70 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
>> index a2c70935e..2f48d90c6 100644
>> --- a/src/box/sql/expr.c
>> +++ b/src/box/sql/expr.c
>> @@ -4093,16 +4093,73 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
>> testcase(i == 31);
>> constMask |= MASKBIT32(i);
>> }
>> - if ((pDef->funcFlags & SQL_FUNC_NEEDCOLL) !=
>> - 0 && coll == NULL) {
>> - bool unused;
>> - uint32_t id;
>> + }
>> + /*
>> + * Function arguments may have different
>> + * collations. The following code
>> + * checks if they are compatible and
>> + * finds the collation to be used. This
>> + * is done using ANSI rules from
>> + * collations_check_compatibility().
>> + */
>> + if ((pDef->funcFlags & SQL_FUNC_NEEDCOLL) != 0 &&
>> + coll == NULL) {
>> + struct coll *unused = NULL;
>> + uint32_t curr_id = COLL_NONE;
>> + bool is_curr_forced = false;
>> +
>> + uint32_t temp_id = COLL_NONE;
>> + bool is_temp_forced = false;
>> +
>> + uint32_t lhs_id = COLL_NONE;
>> + bool is_lhs_forced = false;
>> +
>> + uint32_t rhs_id = COLL_NONE;
>> + bool is_rhs_forced = false;
>> +
>> + for (int i = 0; i < nFarg; i++) {
>> if (sql_expr_coll(pParse,
>> pFarg->a[i].pExpr,
>> - &unused, &id,
>> - &coll) != 0)
>> + &is_lhs_forced,
>> + &lhs_id,
>> + &unused) != 0)
>> return 0;
>> +
>> + for (int j = i + 1; j < nFarg; j++) {
>> + if (sql_expr_coll(pParse,
>> + pFarg->a[j].pExpr,
>> + &is_rhs_forced,
>> + &rhs_id,
>> + &unused) != 0)
>> + return 0;
>
> Seems like you need only one pass saving resulting collation.
> Resulting collation shouldn’t depend on way of passing through
> arguments. And second call of collations_check_copatiility() is
> redundant as well. Now you are using 2n^2 calls of this function,
> but n is enough: you compare collation of first argument with
> second one and save result in tmp. Then, compare tmp with
> third argument etc.
Thank you for noticing, fixed now:
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 8c1889d8a..34abb9665 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -4102,65 +4102,34 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
* is done using ANSI rules from
* collations_check_compatibility().
*/
- if (nFarg == 1) {
- bool unused;
- uint32_t id;
- if (sql_expr_coll(pParse,
- pFarg->a[0].pExpr, &unused,
- &id, &coll) != 0)
- return 0;
- }
if ((pDef->funcFlags & SQL_FUNC_NEEDCOLL) != 0 &&
- coll == NULL && nFarg > 1) {
+ coll == NULL) {
struct coll *unused = NULL;
uint32_t curr_id = COLL_NONE;
bool is_curr_forced = false;
- uint32_t temp_id = COLL_NONE;
- bool is_temp_forced = false;
-
- uint32_t lhs_id = COLL_NONE;
- bool is_lhs_forced = false;
+ uint32_t next_id = COLL_NONE;
+ bool is_next_forced = false;
- uint32_t rhs_id = COLL_NONE;
- bool is_rhs_forced = false;
+ if (sql_expr_coll(pParse, pFarg->a[0].pExpr,
+ &is_curr_forced, &curr_id,
+ &unused) != 0)
+ return 0;
- for (int i = 0; i < nFarg; i++) {
+ for (int j = 1; j < nFarg; j++) {
if (sql_expr_coll(pParse,
- pFarg->a[i].pExpr,
- &is_lhs_forced,
- &lhs_id,
+ pFarg->a[j].pExpr,
+ &is_next_forced,
+ &next_id,
&unused) != 0)
return 0;
- for (int j = i + 1; j < nFarg; j++) {
- if (sql_expr_coll(pParse,
- pFarg->a[j].pExpr,
- &is_rhs_forced,
- &rhs_id,
- &unused) != 0)
- return 0;
-
- if (collations_check_compatibility(
- lhs_id, is_lhs_forced,
- rhs_id, is_rhs_forced,
- &temp_id) != 0) {
- pParse->is_aborted = true;
- return 0;
- }
-
- is_temp_forced = (temp_id ==
- lhs_id) ?
- is_lhs_forced :
- is_rhs_forced;
-
- if (collations_check_compatibility(
- curr_id, is_curr_forced,
- temp_id, is_temp_forced,
- &curr_id) != 0) {
- pParse->is_aborted = true;
- return 0;
- }
+ if (collations_check_compatibility(
+ curr_id, is_curr_forced,
+ next_id, is_next_forced,
+ &curr_id) != 0) {
+ pParse->is_aborted = true;
+ return 0;
}
}
coll = coll_by_id(curr_id)->coll;
>
>> +
>> + if (collations_check_compatibility(
>> + lhs_id, is_lhs_forced,
>> + rhs_id, is_rhs_forced,
>> + &temp_id) != 0) {
>> + pParse->rc =
>> + SQL_TARANTOOL_ERROR;
>> + pParse->nErr++;
>> + return 0;
>> + }
>> +
>> + is_temp_forced = (temp_id ==
>> + lhs_id) ?
>> + is_lhs_forced :
>> + is_rhs_forced;
>> +
>> + if (collations_check_compatibility(
>> + curr_id, is_curr_forced,
>> + temp_id, is_temp_forced,
>> + &curr_id) != 0) {
>> + pParse->rc =
>> + SQL_TARANTOOL_ERROR;
>> + pParse->nErr++;
>> + return 0;
>> + }
>> + }
>> }
>> + coll = coll_by_id(curr_id)->coll;
>> }
>> if (pFarg) {
>> if (constMask) {
>> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
>> index 8967ea3e0..47ee474bb 100644
>> --- a/src/box/sql/sqlInt.h
>> +++ b/src/box/sql/sqlInt.h
>> @@ -1660,7 +1660,13 @@ struct FuncDestructor {
>> #define SQL_FUNC_LIKE 0x0004 /* Candidate for the LIKE optimization */
>> #define SQL_FUNC_CASE 0x0008 /* Case-sensitive LIKE-type function */
>> #define SQL_FUNC_EPHEM 0x0010 /* Ephemeral. Delete with VDBE */
>> -#define SQL_FUNC_NEEDCOLL 0x0020 /* sqlGetFuncCollSeq() might be called */
>> +#define SQL_FUNC_NEEDCOLL 0x0020 /* sqlGetFuncCollSeq() might be called.
>> + * The flag is set when the collation
>> + * of function arguments should be
>> + * determined, using rules in
>> + * collations_check_compatibility()
>> + * function.
>> + */
>> #define SQL_FUNC_LENGTH 0x0040 /* Built-in length() function */
>> #define SQL_FUNC_TYPEOF 0x0080 /* Built-in typeof() function */
>> #define SQL_FUNC_COUNT 0x0100 /* Built-in count(*) aggregate */
>> --
>
> Please, provide basic test cases involving one or more built-in
> functions and incompatible arguments (at least min-max funcs use it).
> Moreover, this flag can’t be set for user-defined functions, which is pretty sad.
Add a few tests:
diff --git a/test/sql-tap/func5.test.lua b/test/sql-tap/func5.test.lua
index 6605a2ba1..4282fdac8 100755
--- a/test/sql-tap/func5.test.lua
+++ b/test/sql-tap/func5.test.lua
@@ -1,6 +1,6 @@
#!/usr/bin/env tarantool
test = require("sqltester")
-test:plan(5)
+test:plan(9)
--!./tcltestrunner.lua
-- 2010 August 27
@@ -98,5 +98,59 @@ test:do_execsql_test(
-- </func5-2.2>
})
+-- The following tests ensures that max() and min() functions
+-- raise error if argument's collations are incompatible.
+
+test:do_catchsql_test(
+ "func-5-3.1",
+ [[
+ SELECT max('a' COLLATE "unicode", 'A' COLLATE "unicode_ci");
+ ]],
+ {
+ -- <func5-3.1>
+ 1, "Illegal mix of collations"
+ -- </func5-3.1>
+ }
+)
+
+test:do_catchsql_test(
+ "func-5-3.2",
+ [[
+ CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode");
+ CREATE TABLE test2 (s2 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+ INSERT INTO test1 VALUES ('a');
+ INSERT INTO test2 VALUES ('a');
+ SELECT max(s1, s2) FROM test1 JOIN test2;
+ ]],
+ {
+ -- <func5-3.2>
+ 1, "Illegal mix of collations"
+ -- </func5-3.2>
+ }
+)
+
+test:do_catchsql_test(
+ "func-5-3.3",
+ [[
+ SELECT min('a' COLLATE "unicode", 'A' COLLATE "unicode_ci");
+ ]],
+ {
+ -- <func5-3.3>
+ 1, "Illegal mix of collations"
+ -- </func5-3.3>
+ }
+)
+
+test:do_catchsql_test(
+ "func-5-3.4",
+ [[
+ SELECT min(s1, s2) FROM test1 JOIN test2;
+ ]],
+ {
+ -- <func5-3.4>
+ 1, "Illegal mix of collations"
+ -- </func5-3.4>
+ }
+)
test:finish_test()
More information about the Tarantool-patches
mailing list