Tarantool development patches archive
 help / color / mirror / Atom feed
From: Nikita Pettik <korablev@tarantool.org>
To: tarantool-patches@freelists.org
Cc: v.shpilevoy@tarantool.org, Nikita Pettik <korablev@tarantool.org>
Subject: [tarantool-patches] [PATCH 1/2] sql: derive collation for built-in functions
Date: Thu, 21 Feb 2019 21:01:34 +0300	[thread overview]
Message-ID: <f1fd4382738c37594b1f7ece5e153237f08e5be1.1550768589.git.korablev@tarantool.org> (raw)
In-Reply-To: <cover.1550768589.git.korablev@tarantool.org>
In-Reply-To: <cover.1550768589.git.korablev@tarantool.org>

Functions such as trim(), substr() etc should return result with
collation derived from their arguments. So, lets add flag indicating
that collation of first argument must be applied to function's result to
SQL function definition. Using this flag, we can derive appropriate
collation in sql_expr_coll().

Part of #3932
---
 src/box/sql/analyze.c       |  6 +++---
 src/box/sql/expr.c          | 23 +++++++++++++++++++++++
 src/box/sql/func.c          | 22 +++++++++++-----------
 src/box/sql/sqlInt.h        | 31 +++++++++++++++++++++++--------
 test/sql/collation.result   | 28 ++++++++++++++++++++++++++++
 test/sql/collation.test.lua | 11 +++++++++++
 6 files changed, 99 insertions(+), 22 deletions(-)

diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c
index e0f2724a5..82db23200 100644
--- a/src/box/sql/analyze.c
+++ b/src/box/sql/analyze.c
@@ -359,7 +359,7 @@ static const FuncDef statInitFuncdef = {
 	0,			/* xFinalize */
 	"stat_init",		/* zName */
 	{0},
-	0
+	0, false
 };
 
 /*
@@ -615,7 +615,7 @@ static const FuncDef statPushFuncdef = {
 	0,			/* xFinalize */
 	"stat_push",		/* zName */
 	{0},
-	0
+	0, false
 };
 
 #define STAT_GET_STAT1 0	/* "stat" column of stat1 table */
@@ -743,7 +743,7 @@ static const FuncDef statGetFuncdef = {
 	0,			/* xFinalize */
 	"stat_get",		/* zName */
 	{0},
-	0
+	0, false
 };
 
 static void
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 35145cef6..3963c627a 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -226,6 +226,29 @@ sql_expr_coll(Parse *parse, Expr *p, bool *is_explicit_coll, uint32_t *coll_id)
 			}
 			break;
 		}
+		if (op == TK_FUNCTION) {
+			uint32_t arg_count = p->x.pList == NULL ? 0 :
+					     p->x.pList->nExpr;
+			struct FuncDef *func = sqlFindFunction(parse->db,
+							       p->u.zToken,
+							       arg_count, 0);
+			if (func == NULL)
+				break;
+			if (func->is_coll_derived) {
+				/*
+				 * Now we use quite straightforward
+				 * approach assuming that resulting
+				 * collation is derived from first
+				 * argument. It is true at least for
+				 * built-in functions: trim, upper,
+				 * lower, replace, substr.
+				 */
+				assert(func->ret_type == FIELD_TYPE_STRING);
+				p = p->x.pList->a->pExpr;
+				continue;
+			}
+			break;
+		}
 		if (p->flags & EP_Collate) {
 			if (p->pLeft && (p->pLeft->flags & EP_Collate) != 0) {
 				p = p->pLeft;
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 83e0d12a1..532b38e4f 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1727,12 +1727,12 @@ sqlRegisterBuiltinFunctions(void)
 			  FIELD_TYPE_INTEGER),
 		FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY,
 			  FIELD_TYPE_INTEGER),
-		FUNCTION(ltrim, 1, 1, 0, trimFunc, FIELD_TYPE_STRING),
-		FUNCTION(ltrim, 2, 1, 0, trimFunc, FIELD_TYPE_STRING),
-		FUNCTION(rtrim, 1, 2, 0, trimFunc, FIELD_TYPE_STRING),
-		FUNCTION(rtrim, 2, 2, 0, trimFunc, FIELD_TYPE_STRING),
-		FUNCTION(trim, 1, 3, 0, trimFunc, FIELD_TYPE_STRING),
-		FUNCTION(trim, 2, 3, 0, trimFunc, FIELD_TYPE_STRING),
+		FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc),
+		FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc),
+		FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc),
+		FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc),
+		FUNCTION_COLL(trim, 1, 3, 0, trimFunc),
+		FUNCTION_COLL(trim, 2, 3, 0, trimFunc),
 		FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
 		FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR),
 		AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,
@@ -1754,8 +1754,8 @@ sqlRegisterBuiltinFunctions(void)
 		FUNCTION(round, 1, 0, 0, roundFunc, FIELD_TYPE_INTEGER),
 		FUNCTION(round, 2, 0, 0, roundFunc, FIELD_TYPE_INTEGER),
 #endif
-		FUNCTION(upper, 1, 0, 1, UpperICUFunc, FIELD_TYPE_STRING),
-		FUNCTION(lower, 1, 0, 1, LowerICUFunc, FIELD_TYPE_STRING),
+		FUNCTION_COLL(upper, 1, 0, 1, UpperICUFunc),
+		FUNCTION_COLL(lower, 1, 0, 1, LowerICUFunc),
 		FUNCTION(hex, 1, 0, 0, hexFunc, FIELD_TYPE_STRING),
 		FUNCTION2(ifnull, 2, 0, 0, noopFunc, SQL_FUNC_COALESCE,
 			  FIELD_TYPE_INTEGER),
@@ -1765,10 +1765,10 @@ sqlRegisterBuiltinFunctions(void)
 		FUNCTION(version, 0, 0, 0, sql_func_version, FIELD_TYPE_STRING),
 		FUNCTION(quote, 1, 0, 0, quoteFunc, FIELD_TYPE_STRING),
 		VFUNCTION(row_count, 0, 0, 0, sql_row_count, FIELD_TYPE_INTEGER),
-		FUNCTION(replace, 3, 0, 0, replaceFunc, FIELD_TYPE_STRING),
+		FUNCTION_COLL(replace, 3, 0, 0, replaceFunc),
 		FUNCTION(zeroblob, 1, 0, 0, zeroblobFunc, FIELD_TYPE_SCALAR),
-		FUNCTION(substr, 2, 0, 0, substrFunc, FIELD_TYPE_STRING),
-		FUNCTION(substr, 3, 0, 0, substrFunc, FIELD_TYPE_STRING),
+		FUNCTION_COLL(substr, 2, 0, 0, substrFunc),
+		FUNCTION_COLL(substr, 3, 0, 0, substrFunc),
 		AGGREGATE(sum, 1, 0, 0, sumStep, sumFinalize, FIELD_TYPE_NUMBER),
 		AGGREGATE(total, 1, 0, 0, sumStep, totalFinalize, FIELD_TYPE_NUMBER),
 		AGGREGATE(avg, 1, 0, 0, sumStep, avgFinalize, FIELD_TYPE_NUMBER),
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 2830ab639..5fb7285d8 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -1633,6 +1633,13 @@ struct FuncDef {
 	} u;
 	/* Return type. */
 	enum field_type ret_type;
+	/**
+	 * If function returns string, it may require collation
+	 * to be applied on its result. For instance, result of
+	 * substr() built-in function must have the same collation
+	 * as its first argument.
+	 */
+	bool is_coll_derived;
 };
 
 /*
@@ -1693,6 +1700,11 @@ struct FuncDestructor {
  *     as the user-data (sql_user_data()) for the function. If
  *     argument bNC is true, then the sql_FUNC_NEEDCOLL flag is set.
  *
+ *   FUNCTION_COLL
+ *     Like FUNCTION except it assumes that function returns
+ *     STRING which collation should be derived from first
+ *     argument (trim, substr etc).
+ *
  *   VFUNCTION(zName, nArg, iArg, bNC, xFunc)
  *     Like FUNCTION except it omits the sql_FUNC_CONSTANT flag.
  *
@@ -1717,28 +1729,31 @@ struct FuncDestructor {
  */
 #define FUNCTION(zName, nArg, iArg, bNC, xFunc, type) \
   {nArg, SQL_FUNC_CONSTANT|(bNC*SQL_FUNC_NEEDCOLL), \
-   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type }
+   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type, false}
+#define FUNCTION_COLL(zName, nArg, iArg, bNC, xFunc) \
+  {nArg, SQL_FUNC_CONSTANT|(bNC*SQL_FUNC_NEEDCOLL), \
+   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, FIELD_TYPE_STRING, true}
 #define VFUNCTION(zName, nArg, iArg, bNC, xFunc, type) \
   {nArg, (bNC*SQL_FUNC_NEEDCOLL), \
-   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type }
+   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type, false }
 #define DFUNCTION(zName, nArg, iArg, bNC, xFunc, type) \
   {nArg, SQL_FUNC_SLOCHNG|(bNC*SQL_FUNC_NEEDCOLL), \
-   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type }
+   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type, false }
 #define FUNCTION2(zName, nArg, iArg, bNC, xFunc, extraFlags, type) \
   {nArg,SQL_FUNC_CONSTANT|(bNC*SQL_FUNC_NEEDCOLL)|extraFlags,\
-   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type }
+   SQL_INT_TO_PTR(iArg), 0, xFunc, 0, #zName, {0}, type, false }
 #define STR_FUNCTION(zName, nArg, pArg, bNC, xFunc) \
   {nArg, SQL_FUNC_SLOCHNG|(bNC*SQL_FUNC_NEEDCOLL), \
-   pArg, 0, xFunc, 0, #zName, {SQL_AFF_STRING, {0}}}
+   pArg, 0, xFunc, 0, #zName, {SQL_AFF_STRING, {0}}, false}
 #define LIKEFUNC(zName, nArg, arg, flags, type) \
   {nArg, SQL_FUNC_CONSTANT|flags, \
-   (void *)(SQL_INT_TO_PTR(arg)), 0, likeFunc, 0, #zName, {0}, type }
+   (void *)(SQL_INT_TO_PTR(arg)), 0, likeFunc, 0, #zName, {0}, type, false }
 #define AGGREGATE(zName, nArg, arg, nc, xStep, xFinal, type) \
   {nArg, (nc*SQL_FUNC_NEEDCOLL), \
-   SQL_INT_TO_PTR(arg), 0, xStep,xFinal,#zName, {0}, type}
+   SQL_INT_TO_PTR(arg), 0, xStep,xFinal,#zName, {0}, type, false}
 #define AGGREGATE2(zName, nArg, arg, nc, xStep, xFinal, extraFlags, type) \
   {nArg, (nc*SQL_FUNC_NEEDCOLL)|extraFlags, \
-   SQL_INT_TO_PTR(arg), 0, xStep,xFinal,#zName, {0}, type}
+   SQL_INT_TO_PTR(arg), 0, xStep,xFinal,#zName, {0}, type, false}
 
 /*
  * All current savepoints are stored in a linked list starting at
diff --git a/test/sql/collation.result b/test/sql/collation.result
index 5721ef854..439dc51c1 100644
--- a/test/sql/collation.result
+++ b/test/sql/collation.result
@@ -325,3 +325,31 @@ box.sql.execute("DROP TABLE t1;")
 box.sql.execute("DROP TABLE t0;")
 ---
 ...
+-- gh-3932: make sure set build-in functions derive collation
+-- from their arguments.
+--
+box.sql.execute("CREATE TABLE jj (s1 INT PRIMARY KEY, s2 CHAR(3) COLLATE \"unicode_ci\");")
+---
+...
+box.sql.execute("INSERT INTO jj VALUES (1,'A'), (2,'a')")
+---
+...
+box.sql.execute("SELECT DISTINCT trim(s2) FROM jj;")
+---
+- - ['A']
+...
+box.sql.execute("INSERT INTO jj VALUES (3, 'aS'), (4, 'AS');")
+---
+...
+box.sql.execute("SELECT DISTINCT replace(s2, 'S', 's') FROM jj;")
+---
+- - ['A']
+  - ['as']
+...
+box.sql.execute("SELECT DISTINCT substr(s2, 1, 1) FROM jj;")
+---
+- - ['A']
+...
+box.space.JJ:drop()
+---
+...
diff --git a/test/sql/collation.test.lua b/test/sql/collation.test.lua
index 4c649a444..d4acbf9f1 100644
--- a/test/sql/collation.test.lua
+++ b/test/sql/collation.test.lua
@@ -126,3 +126,14 @@ box.sql.execute("SELECT * FROM t1;")
 box.sql.execute("SELECT s1 FROM t0;")
 box.sql.execute("DROP TABLE t1;")
 box.sql.execute("DROP TABLE t0;")
+
+-- gh-3932: make sure set build-in functions derive collation
+-- from their arguments.
+--
+box.sql.execute("CREATE TABLE jj (s1 INT PRIMARY KEY, s2 CHAR(3) COLLATE \"unicode_ci\");")
+box.sql.execute("INSERT INTO jj VALUES (1,'A'), (2,'a')")
+box.sql.execute("SELECT DISTINCT trim(s2) FROM jj;")
+box.sql.execute("INSERT INTO jj VALUES (3, 'aS'), (4, 'AS');")
+box.sql.execute("SELECT DISTINCT replace(s2, 'S', 's') FROM jj;")
+box.sql.execute("SELECT DISTINCT substr(s2, 1, 1) FROM jj;")
+box.space.JJ:drop()
-- 
2.15.1

  reply	other threads:[~2019-02-21 18:01 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-21 18:01 [tarantool-patches] [PATCH 0/2] Add collation to built-in funcs and fix HAVING clause with aggregate Nikita Pettik
2019-02-21 18:01 ` Nikita Pettik [this message]
2019-02-25 12:58   ` [tarantool-patches] Re: [PATCH 1/2] sql: derive collation for built-in functions Vladislav Shpilevoy
2019-02-25 18:32     ` n.pettik
2019-03-07 14:40       ` Vladislav Shpilevoy
2019-03-11  8:04         ` Konstantin Osipov
2019-02-21 18:01 ` [tarantool-patches] [PATCH 2/2] sql: fix code generation for aggregate in HAVING clause Nikita Pettik
2019-02-25 12:58   ` [tarantool-patches] " Vladislav Shpilevoy
2019-02-25 18:33     ` n.pettik
2019-03-04 12:14       ` n.pettik
2019-03-04 12:52         ` Vladislav Shpilevoy
2019-03-07 14:40 ` [tarantool-patches] Re: [PATCH 0/2] Add collation to built-in funcs and fix HAVING clause with aggregate Vladislav Shpilevoy
2019-03-11 15:49 ` Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f1fd4382738c37594b1f7ece5e153237f08e5be1.1550768589.git.korablev@tarantool.org \
    --to=korablev@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [tarantool-patches] [PATCH 1/2] sql: derive collation for built-in functions' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox