[tarantool-patches] Re: [PATCH 1/2] sql: add better collation determination in functions

i.koptelov ivan.koptelov at tarantool.org
Wed Mar 27 16:38:47 MSK 2019


> On 25 Mar 2019, at 22:26, n.pettik <korablev at tarantool.org> wrote:
> 
> 
> 
>> On 20 Mar 2019, at 14:11, Ivan Koptelov <ivan.koptelov at tarantool.org> wrote:
>> 
>> Before the patch determination of collation in SQL functions
>> (used only in instr) was too narrow: the arguments were scanned
>> from left to right, till the argument with collation was
>> encountered, then its collation was used.
>> Now every arguments collation is considered. The right collation
>> which would be used in function is determined using ANSI
>> compatibility rules ("type combination" rules).
>> 
>> Note: currently only instr() a.k.a position() uses mechanism
>> described above, other functions (except aggregate) simply
>> ignores collations.
> 
> That’s not true: I see that min-max aggregates also feature this flag:
> 
> FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
> 
> Fourth param indicates whether SQL_FUNC_NEEDCOLL is set or not.
Oh, sorry for that. Fixed commit message.

>> ---
>> src/box/sql/expr.c   | 69 ++++++++++++++++++++++++++++++++++++++++----
>> src/box/sql/sqlInt.h |  8 ++++-
>> 2 files changed, 70 insertions(+), 7 deletions(-)
>> 
>> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
>> index a2c70935e..2f48d90c6 100644
>> --- a/src/box/sql/expr.c
>> +++ b/src/box/sql/expr.c
>> @@ -4093,16 +4093,73 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
>> 					testcase(i == 31);
>> 					constMask |= MASKBIT32(i);
>> 				}
>> -				if ((pDef->funcFlags & SQL_FUNC_NEEDCOLL) !=
>> -				    0 && coll == NULL) {
>> -					bool unused;
>> -					uint32_t id;
>> +			}
>> +			/*
>> +			 * Function arguments may have different
>> +			 * collations. The following code
>> +			 * checks if they are compatible and
>> +			 * finds the collation to be used. This
>> +			 * is done using ANSI rules from
>> +			 * collations_check_compatibility().
>> +			 */
>> +			if ((pDef->funcFlags & SQL_FUNC_NEEDCOLL) != 0 &&
>> +			     coll == NULL) {
>> +				struct coll *unused = NULL;
>> +				uint32_t curr_id = COLL_NONE;
>> +				bool is_curr_forced = false;
>> +
>> +				uint32_t temp_id = COLL_NONE;
>> +				bool is_temp_forced = false;
>> +
>> +				uint32_t lhs_id = COLL_NONE;
>> +				bool is_lhs_forced = false;
>> +
>> +				uint32_t rhs_id = COLL_NONE;
>> +				bool is_rhs_forced = false;
>> +
>> +				for (int i = 0; i < nFarg; i++) {
>> 					if (sql_expr_coll(pParse,
>> 							  pFarg->a[i].pExpr,
>> -							  &unused, &id,
>> -							  &coll) != 0)
>> +							  &is_lhs_forced,
>> +							  &lhs_id,
>> +							  &unused) != 0)
>> 						return 0;
>> +
>> +					for (int j = i + 1; j < nFarg; j++) {
>> +						if (sql_expr_coll(pParse,
>> +								  pFarg->a[j].pExpr,
>> +								  &is_rhs_forced,
>> +								  &rhs_id,
>> +								  &unused) != 0)
>> +							return 0;
> 
> Seems like you need only one pass saving resulting collation.
> Resulting collation shouldn’t depend on way of passing through
> arguments. And second call of collations_check_copatiility() is
> redundant as well. Now you are using 2n^2 calls of this function,
> but n is enough: you compare collation of first argument with
> second one and save result in tmp. Then, compare tmp with
> third argument etc.
Thank you for noticing, fixed now:
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 8c1889d8a..34abb9665 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -4102,65 +4102,34 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
 			 * is done using ANSI rules from
 			 * collations_check_compatibility().
 			 */
-			if (nFarg == 1) {
-				bool unused;
-				uint32_t id;
-				if (sql_expr_coll(pParse,
-						  pFarg->a[0].pExpr, &unused,
-						  &id, &coll) != 0)
-					return 0;
-			}
 			if ((pDef->funcFlags & SQL_FUNC_NEEDCOLL) != 0 &&
-			     coll == NULL && nFarg > 1) {
+			     coll == NULL) {
 				struct coll *unused = NULL;
 				uint32_t curr_id = COLL_NONE;
 				bool is_curr_forced = false;
 
-				uint32_t temp_id = COLL_NONE;
-				bool is_temp_forced = false;
-
-				uint32_t lhs_id = COLL_NONE;
-				bool is_lhs_forced = false;
+				uint32_t next_id = COLL_NONE;
+				bool is_next_forced = false;
 
-				uint32_t rhs_id = COLL_NONE;
-				bool is_rhs_forced = false;
+				if (sql_expr_coll(pParse, pFarg->a[0].pExpr,
+						  &is_curr_forced, &curr_id,
+						  &unused) != 0)
+					return 0;
 
-				for (int i = 0; i < nFarg; i++) {
+				for (int j = 1; j < nFarg; j++) {
 					if (sql_expr_coll(pParse,
-							  pFarg->a[i].pExpr,
-							  &is_lhs_forced,
-							  &lhs_id,
+							  pFarg->a[j].pExpr,
+							  &is_next_forced,
+							  &next_id,
 							  &unused) != 0)
 						return 0;
 
-					for (int j = i + 1; j < nFarg; j++) {
-						if (sql_expr_coll(pParse,
-								  pFarg->a[j].pExpr,
-								  &is_rhs_forced,
-								  &rhs_id,
-								  &unused) != 0)
-							return 0;
-
-						if (collations_check_compatibility(
-							lhs_id, is_lhs_forced,
-							rhs_id, is_rhs_forced,
-							&temp_id) != 0) {
-							pParse->is_aborted = true;
-							return 0;
-						}
-
-						is_temp_forced = (temp_id ==
-								  lhs_id) ?
-								 is_lhs_forced :
-								 is_rhs_forced;
-
-						if (collations_check_compatibility(
-							curr_id, is_curr_forced,
-							temp_id, is_temp_forced,
-							&curr_id) != 0) {
-							pParse->is_aborted = true;
-							return 0;
-						}
+					if (collations_check_compatibility(
+						curr_id, is_curr_forced,
+						next_id, is_next_forced,
+						&curr_id) != 0) {
+						pParse->is_aborted = true;
+						return 0;
 					}
 				}
 				coll = coll_by_id(curr_id)->coll;
> 
>> +
>> +						if (collations_check_compatibility(
>> +							lhs_id, is_lhs_forced,
>> +							rhs_id, is_rhs_forced,
>> +							&temp_id) != 0) {
>> +							pParse->rc =
>> +								SQL_TARANTOOL_ERROR;
>> +							pParse->nErr++;
>> +							return 0;
>> +						}
>> +
>> +						is_temp_forced = (temp_id ==
>> +								  lhs_id) ?
>> +								 is_lhs_forced :
>> +								 is_rhs_forced;
>> +
>> +						if (collations_check_compatibility(
>> +							curr_id, is_curr_forced,
>> +							temp_id, is_temp_forced,
>> +							&curr_id) != 0) {
>> +							pParse->rc =
>> +								SQL_TARANTOOL_ERROR;
>> +							pParse->nErr++;
>> +							return 0;
>> +						}
>> +					}
>> 				}
>> +				coll = coll_by_id(curr_id)->coll;
>> 			}
>> 			if (pFarg) {
>> 				if (constMask) {
>> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
>> index 8967ea3e0..47ee474bb 100644
>> --- a/src/box/sql/sqlInt.h
>> +++ b/src/box/sql/sqlInt.h
>> @@ -1660,7 +1660,13 @@ struct FuncDestructor {
>> #define SQL_FUNC_LIKE     0x0004	/* Candidate for the LIKE optimization */
>> #define SQL_FUNC_CASE     0x0008	/* Case-sensitive LIKE-type function */
>> #define SQL_FUNC_EPHEM    0x0010	/* Ephemeral.  Delete with VDBE */
>> -#define SQL_FUNC_NEEDCOLL 0x0020	/* sqlGetFuncCollSeq() might be called */
>> +#define SQL_FUNC_NEEDCOLL 0x0020	/* sqlGetFuncCollSeq() might be called.
>> +					 * The flag is set when the collation
>> +					 * of function arguments should be
>> +					 * determined, using rules in
>> +					 * collations_check_compatibility()
>> +					 * function.
>> +					 */
>> #define SQL_FUNC_LENGTH   0x0040	/* Built-in length() function */
>> #define SQL_FUNC_TYPEOF   0x0080	/* Built-in typeof() function */
>> #define SQL_FUNC_COUNT    0x0100	/* Built-in count(*) aggregate */
>> -- 
> 
> Please, provide basic test cases involving one or more built-in
> functions and incompatible arguments (at least min-max funcs use it).
> Moreover, this flag can’t be set for user-defined functions, which is pretty sad.
Add a few tests:
diff --git a/test/sql-tap/func5.test.lua b/test/sql-tap/func5.test.lua
index 6605a2ba1..4282fdac8 100755
--- a/test/sql-tap/func5.test.lua
+++ b/test/sql-tap/func5.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(5)
+test:plan(9)
 
 --!./tcltestrunner.lua
 -- 2010 August 27
@@ -98,5 +98,59 @@ test:do_execsql_test(
         -- </func5-2.2>
     })
 
+-- The following tests ensures that max() and min() functions
+-- raise error if argument's collations are incompatible.
+
+test:do_catchsql_test(
+    "func-5-3.1",
+    [[
+        SELECT max('a' COLLATE "unicode", 'A' COLLATE "unicode_ci");
+    ]],
+    {
+        -- <func5-3.1>
+        1, "Illegal mix of collations"
+        -- </func5-3.1>
+    }
+)
+
+test:do_catchsql_test(
+    "func-5-3.2",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode");
+        CREATE TABLE test2 (s2 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES ('a');
+        INSERT INTO test2 VALUES ('a');
+        SELECT max(s1, s2) FROM test1 JOIN test2;
+    ]],
+    {
+        -- <func5-3.2>
+        1, "Illegal mix of collations"
+        -- </func5-3.2>
+    }
+)
+
+test:do_catchsql_test(
+    "func-5-3.3",
+    [[
+        SELECT min('a' COLLATE "unicode", 'A' COLLATE "unicode_ci");
+    ]],
+    {
+        -- <func5-3.3>
+        1, "Illegal mix of collations"
+        -- </func5-3.3>
+    }
+)
+
+test:do_catchsql_test(
+    "func-5-3.4",
+    [[
+        SELECT min(s1, s2) FROM test1 JOIN test2;
+    ]],
+    {
+        -- <func5-3.4>
+        1, "Illegal mix of collations"
+        -- </func5-3.4>
+    }
+)
 
 test:finish_test()



More information about the Tarantool-patches mailing list