[tarantool-patches] [PATCH 1/2] sql: derive collation for built-in functions

Nikita Pettik korablev at tarantool.org
Thu Feb 21 21:01:34 MSK 2019


Functions such as trim(), substr() etc should return result with
collation derived from their arguments. So, lets add flag indicating
that collation of first argument must be applied to function's result to
SQL function definition. Using this flag, we can derive appropriate
collation in sql_expr_coll().

Part of #3932
---
 src/box/sql/analyze.c       |  6 +++---
 src/box/sql/expr.c          | 23 +++++++++++++++++++++++
 src/box/sql/func.c          | 22 +++++++++++-----------
 src/box/sql/sqlInt.h        | 31 +++++++++++++++++++++++--------
 test/sql/collation.result   | 28 ++++++++++++++++++++++++++++
 test/sql/collation.test.lua | 11 +++++++++++
 6 files changed, 99 insertions(+), 22 deletions(-)

diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c
index e0f2724a5..82db23200 100644
--- a/src/box/sql/analyze.c
+++ b/src/box/sql/analyze.c
@@ -359,7 +359,7 @@ static const FuncDef statInitFuncdef = {
 	0,			/* xFinalize */
 	"stat_init",		/* zName */
 	{0},
-	0
+	0, false
 };
 
 /*
@@ -615,7 +615,7 @@ static const FuncDef statPushFuncdef = {
 	0,			/* xFinalize */
 	"stat_push",		/* zName */
 	{0},
-	0
+	0, false
 };
 
 #define STAT_GET_STAT1 0	/* "stat" column of stat1 table */
@@ -743,7 +743,7 @@ static const FuncDef statGetFuncdef = {
 	0,			/* xFinalize */
 	"stat_get",		/* zName */
 	{0},
-	0
+	0, false
 };
 
 static void
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 35145cef6..3963c627a 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -226,6 +226,29 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id)
 			}
 			break;
 		}
+		if (op == TK_FUNCTION) {
+			uint32_t arg_count = p->x.pList == NULL ? 0 :
+					     p->x.pList->nExpr;
+			struct FuncDef *func = sqlFindFunction(parse->db,
+							       p->u.zToken,
+							       arg_count, 0);
+			if (func == NULL)
+				break;
+			if (func->is_coll_derived) {
+				/*
+				 * Now we use quite straightforward
+				 * approach assuming that resulting
+				 * collation is derived from first
+				 * argument. It is true at least for
+				 * built-in functions: trim, upper,
+				 * lower, replace, substr.
+				 */
+				assert(func->ret_type == FIELD_TYPE_STRING);
+				p = p->x.pList->a->pExpr;
+				continue;
+			}
+			break;
+		}
 		if (p->flags & EP_Collate) {
 			if (p->pLeft && (p->pLeft->flags & EP_Collate) != 0) {
 				p = p->pLeft;
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 83e0d12a1..532b38e4f 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1727,12 +1727,12 @@ sqlRegisterBuiltinFunctions(void)
 			  FIELD_TYPE_INTEGER),
 		FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY,
 			  FIELD_TYPE_INTEGER),
-		FUNCTION(ltrim, 1, 1, 0, trimFunc, FIELD_TYPE_STRING),
-		FUNCTION(ltrim, 2, 1, 0, trimFunc, FIELD_TYPE_STRING),
-		FUNCTION(rtrim, 1, 2, 0, trimFunc, FIELD_TYPE_STRING),
-		FUNCTION(rtrim, 2, 2, 0, trimFunc, FIELD_TYPE_STRING),
-		FUNCTION(trim, 1, 3, 0, trimFunc, FIELD_TYPE_STRING),
-		FUNCTION(trim, 2, 3, 0, trimFunc, FIELD_TYPE_STRING),
+		FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc),
+		FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc),
+		FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc),
+		FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc),
+		FUNCTION_COLL(trim, 1, 3, 0, trimFunc),
+		FUNCTION_COLL(trim, 2, 3, 0, trimFunc),
 		FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
 		FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR),
 		AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,
@@ -1754,8 +1754,8 @@ sqlRegisterBuiltinFunctions(void)
 		FUNCTION(round, 1, 0, 0, roundFunc, FIELD_TYPE_INTEGER),
 		FUNCTION(round, 2, 0, 0, roundFunc, FIELD_TYPE_INTEGER),
 #endif
-		FUNCTION(upper, 1, 0, 1, UpperICUFunc, FIELD_TYPE_STRING),
-		FUNCTION(lower, 1, 0, 1, LowerICUFunc, FIELD_TYPE_STRING),
+		FUNCTION_COLL(upper, 1, 0, 1, UpperICUFunc),
+		FUNCTION_COLL(lower, 1, 0, 1, LowerICUFunc),
 		FUNCTION(hex, 1, 0, 0, hexFunc, FIELD_TYPE_STRING),
 		FUNCTION2(ifnull, 2, 0, 0, noopFunc, SQL_FUNC_COALESCE,
 			  FIELD_TYPE_INTEGER),
@@ -1765,10 +1765,10 @@ sqlRegisterBuiltinFunctions(void)
 		FUNCTION(version, 0, 0, 0, sql_func_version, FIELD_TYPE_STRING),
 		FUNCTION(quote, 1, 0, 0, quoteFunc, FIELD_TYPE_STRING),
 		VFUNCTION(row_count, 0, 0, 0, sql_row_count, FIELD_TYPE_INTEGER),
-		FUNCTION(replace, 3, 0, 0, replaceFunc, FIELD_TYPE_STRING),
+		FUNCTION_COLL(replace, 3, 0, 0, replaceFunc),
 		FUNCTION(zeroblob, 1, 0, 0, zeroblobFunc, FIELD_TYPE_SCALAR),
-		FUNCTION(substr, 2, 0, 0, substrFunc, FIELD_TYPE_STRING),
-		FUNCTION(substr, 3, 0, 0, substrFunc, FIELD_TYPE_STRING),
+		FUNCTION_COLL(substr, 2, 0, 0, substrFunc),
+		FUNCTION_COLL(substr, 3, 0, 0, substrFunc),
 		AGGREGATE(sum, 1, 0, 0, sumStep, sumFinalize, FIELD_TYPE_NUMBER),
 		AGGREGATE(total, 1, 0, 0, sumStep, totalFinalize, FIELD_TYPE_NUMBER),
 		AGGREGATE(avg, 1, 0, 0, sumStep, avgFinalize, FIELD_TYPE_NUMBER),
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 2830ab639..5fb7285d8 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -1633,6 +1633,13 @@ struct FuncDef {
 	} u;
 	/* Return type. */
 	enum field_type ret_type;
+	/**
+	 * If function returns string, it may require collation
+	 * to be applied on its result. For instance, result of
+	 * substr() built-in function must have the same collation
+	 * as its first argument.
+	 */
+	bool is_coll_derived;
 };
 
 /*
@@ -1693,6 +1700,11 @@ struct FuncDestructor {
  *     as the user-data (sql_user_data()) for the function. If
  *     argument bNC is true, then the sql_FUNC_NEEDCOLL flag is set.
  *
+ *   FUNCTION_COLL
+ *     Like FUNCTION except it assumes that function returns
+ *     STRING which collation should be derived from first
+ *     argument (trim, substr etc).
+ *
  *   VFUNCTION(zName, nArg, iArg, bNC, xFunc)
  *     Like FUNCTION except it omits the sql_FUNC_CONSTANT flag.
  *
@@ -1717,28 +1729,31 @@ struct FuncDestructor {
  */
 #define FUNCTION(zName, nArg, iArg, bNC, xFunc, type) \
   {nArg, SQL_FUNC_CONSTANT|(bNC*SQL_FUNC_NEEDCOLL), \
-   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type }
+   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type, false}
+#define FUNCTION_COLL(zName, nArg, iArg, bNC, xFunc) \
+  {nArg, SQL_FUNC_CONSTANT|(bNC*SQL_FUNC_NEEDCOLL), \
+   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, FIELD_TYPE_STRING, true}
 #define VFUNCTION(zName, nArg, iArg, bNC, xFunc, type) \
   {nArg, (bNC*SQL_FUNC_NEEDCOLL), \
-   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type }
+   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type, false }
 #define DFUNCTION(zName, nArg, iArg, bNC, xFunc, type) \
   {nArg, SQL_FUNC_SLOCHNG|(bNC*SQL_FUNC_NEEDCOLL), \
-   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type }
+   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type, false }
 #define FUNCTION2(zName, nArg, iArg, bNC, xFunc, extraFlags, type) \
   {nArg,SQL_FUNC_CONSTANT|(bNC*SQL_FUNC_NEEDCOLL)|extraFlags,\
-   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type }
+   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type, false }
 #define STR_FUNCTION(zName, nArg, pArg, bNC, xFunc) \
   {nArg, SQL_FUNC_SLOCHNG|(bNC*SQL_FUNC_NEEDCOLL), \
-   pArg, 0, xFunc, 0, #zName, {SQL_AFF_STRING, {0}}}
+   pArg, 0, xFunc, 0, #zName, {SQL_AFF_STRING, {0}}, false}
 #define LIKEFUNC(zName, nArg, arg, flags, type) \
   {nArg, SQL_FUNC_CONSTANT|flags, \
-   (void *)(SQL_INT_TO_PTR(arg)), 0, likeFunc, 0, #zName, {0}, type }
+   (void *)(SQL_INT_TO_PTR(arg)), 0, likeFunc, 0, #zName, {0}, type, false }
 #define AGGREGATE(zName, nArg, arg, nc, xStep, xFinal, type) \
   {nArg, (nc*SQL_FUNC_NEEDCOLL), \
-   SQL_INT_TO_PTR(arg), 0, xStep,xFinal,#zName, {0}, type}
+   SQL_INT_TO_PTR(arg), 0, xStep,xFinal,#zName, {0}, type, false}
 #define AGGREGATE2(zName, nArg, arg, nc, xStep, xFinal, extraFlags, type) \
   {nArg, (nc*SQL_FUNC_NEEDCOLL)|extraFlags, \
-   SQL_INT_TO_PTR(arg), 0, xStep,xFinal,#zName, {0}, type}
+   SQL_INT_TO_PTR(arg), 0, xStep,xFinal,#zName, {0}, type, false}
 
 /*
  * All current savepoints are stored in a linked list starting at
diff --git a/test/sql/collation.result b/test/sql/collation.result
index 5721ef854..439dc51c1 100644
--- a/test/sql/collation.result
+++ b/test/sql/collation.result
@@ -325,3 +325,31 @@ box.sql.execute("DROP TABLE t1;")
 box.sql.execute("DROP TABLE t0;")
 ---
 ...
+-- gh-3932: make sure set build-in functions derive collation
+-- from their arguments.
+--
+box.sql.execute("CREATE TABLE jj (s1 INT PRIMARY KEY, s2 CHAR(3) COLLATE \"unicode_ci\");")
+---
+...
+box.sql.execute("INSERT INTO jj VALUES (1,'A'), (2,'a')")
+---
+...
+box.sql.execute("SELECT DISTINCT trim(s2) FROM jj;")
+---
+- - ['A']
+...
+box.sql.execute("INSERT INTO jj VALUES (3, 'aS'), (4, 'AS');")
+---
+...
+box.sql.execute("SELECT DISTINCT replace(s2, 'S', 's') FROM jj;")
+---
+- - ['A']
+  - ['as']
+...
+box.sql.execute("SELECT DISTINCT substr(s2, 1, 1) FROM jj;")
+---
+- - ['A']
+...
+box.space.JJ:drop()
+---
+...
diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
index 4c649a444..d4acbf9f1 100644
--- a/test/sql/collation.test.lua
+++ b/test/sql/collation.test.lua
@@ -126,3 +126,14 @@ box.sql.execute("SELECT * FROM t1;")
 box.sql.execute("SELECT s1 FROM t0;")
 box.sql.execute("DROP TABLE t1;")
 box.sql.execute("DROP TABLE t0;")
+
+-- gh-3932: make sure set build-in functions derive collation
+-- from their arguments.
+--
+box.sql.execute("CREATE TABLE jj (s1 INT PRIMARY KEY, s2 CHAR(3) COLLATE \"unicode_ci\");")
+box.sql.execute("INSERT INTO jj VALUES (1,'A'), (2,'a')")
+box.sql.execute("SELECT DISTINCT trim(s2) FROM jj;")
+box.sql.execute("INSERT INTO jj VALUES (3, 'aS'), (4, 'AS');")
+box.sql.execute("SELECT DISTINCT replace(s2, 'S', 's') FROM jj;")
+box.sql.execute("SELECT DISTINCT substr(s2, 1, 1) FROM jj;")
+box.space.JJ:drop()
-- 
2.15.1





More information about the Tarantool-patches mailing list