[tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature

Roman Khabibov roman.habibov at tarantool.org
Tue Apr 16 03:14:23 MSK 2019


Hi! Thanks for the review.

> On Apr 14, 2019, at 9:01 PM, Vladislav Shpilevoy <v.shpilevoy at tarantool.org> wrote:
> 
> Hi! Thanks for the patch! See 20 comments below.
> 
> 1. Please, do all the issues on the master branch.
> We cherry-pick on other branches from the master.
> 
> 2. Use the newest version of the branch. Your is outdated
> on more than 2 weeks.
Rebased on 2.1.

> On 11/04/2019 20:33, Roman Khabibov wrote:
>> According to the ANSI standart, ltrim, rtrim and trim should
> 
> 3. Use a spell checker. Sublime text editor has it too.
> 
> standart -> standard
> 
>> be merged into one unified TRIM() function. The specialization of
>> trimming (left, right or both and trimming charcters) determined
> 
> charcters -> characters
    sql: modify TRIM() function signature
    
    According to the ANSI standard, ltrim, rtrim and trim should
    be merged into one unified TRIM() function. The specialization of
    trimming (left, right or both and trimming characters) determined
    in arguments of this function.
    
    Closes #3879

>> in arguments of this function.
>> 
>> Closes #3879
>> ---
>> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3879-trim
>> Issue: https://github.com/tarantool/tarantool/issues/3879
>> 
>> extra/mkkeywordhash.c         |   5 ++
>> src/box/sql/func.c            |  46 ++++++++------
>> src/box/sql/global.c          |   6 +-
>> src/box/sql/parse.y           |  48 +++++++++++++++
>> test/sql-tap/badutf1.test.lua |  14 ++---
>> test/sql-tap/func.test.lua    | 111 ++++++++++++++++++++++------------
>> test/sql-tap/with1.test.lua   |   2 +-
>> 7 files changed, 165 insertions(+), 67 deletions(-)
>> 
>> diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c
>> index be7bd5545..94a768323 100644
>> --- a/extra/mkkeywordhash.c
>> +++ b/extra/mkkeywordhash.c
>> @@ -91,6 +91,7 @@ struct Keyword {
>> #  define CTE        0x00040000
>> #endif
>> #  define RESERVED   0x00000001
>> +#  define FUNCTION   0x00080000
> 
> 4. These fields are stored in struct Keyword.mask field, which
> is never used for anything more complex than 'mask == 0' check.
> And I do not see a reason why do you need here anything but
> 'ALWAYS' constant. Also, see the issue:
> 
> https://github.com/tarantool/tarantool/issues/4155
diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c
index be7bd5545..76e3265e7 100644
--- a/extra/mkkeywordhash.c
+++ b/extra/mkkeywordhash.c
@@ -278,6 +278,10 @@ static Keyword aKeywordTable[] = {
   { "WHILE",                  "TK_STANDARD",    RESERVED,         true  },
   { "TEXT",                   "TK_TEXT",        RESERVED,         true  },
   { "TRUNCATE",               "TK_TRUNCATE",    ALWAYS,           true  },
+  { "TRIM",                   "TK_TRIM",        ALWAYS,           true  },
+  { "LEADING",                "TK_LEADING",     ALWAYS,           true  },
+  { "TRAILING",               "TK_TRAILING",    ALWAYS,           true  },
+  { "BOTH",                   "TK_BOTH",        ALWAYS,           true  },
 };

> 8. Please, create a enum with normal names for these constants.
+enum trim_specification {
+	LEADING = 1,
+	TRAILING = 2,
+	BOTH = 3
+};

> 12. Pointers should be compared with NULL, not 0.
+	if (sql_value_type(argv[0]) == SQL_NULL) {
+		return;
+	}
+	if ((input_str = sql_value_text(argv[0])) == NULL) {
+		return;
+	}


> 
>> 		return;
>> 	} else {
>> 		const unsigned char *z = zCharSet;
>> -		int trim_set_sz = sql_value_bytes(argv[1]);
>> +		int trim_set_sz = sql_value_bytes(argv[source_index - 1]);
>> 		/*
>> 		* Count the number of UTF-8 characters passing
>> 		* through the entire char set, but not up
>> @@ -1272,8 +1288,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv)
>> 		}
>> 	}
>> 	if (nChar > 0) {
>> -		flags = SQL_PTR_TO_INT(sql_user_data(context));
>> -		if (flags & 1) {
>> +		if (trim_side & 1) {
> 
> 13. When checking flags, use (flag & ...) != 0 instead of an
> implicit conversion. In other places too.
+		if ((flags & 1) != 0) {
+		if ((flags & 2) != 0) {

> 
>> 			while (nIn > 0) {
>> 				int len = 0;
>> 				for (i = 0; i < nChar; i++) {
>> @@ -1288,7 +1303,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv)
>> 				nIn -= len;
>> 			}
>> 		}
>> -		if (flags & 2) {
>> +		if (trim_side & 2) {
>> 			while (nIn > 0) {
>> 				int len = 0;
>> 				for (i = 0; i < nChar; i++) {
>> @@ -1738,12 +1753,9 @@ sqlRegisterBuiltinFunctions(void)
>> 			  FIELD_TYPE_INTEGER),
>> 		FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY,
>> 			  FIELD_TYPE_INTEGER),
>> -		FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc),
>> -		FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc),
>> -		FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc),
>> -		FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc),
>> 		FUNCTION_COLL(trim, 1, 3, 0, trimFunc),
>> 		FUNCTION_COLL(trim, 2, 3, 0, trimFunc),
>> +		FUNCTION_COLL(trim, 3, 3, 0, trimFunc),
> 
> 14. Better write three trim functions taking different number of
> args, converting them to normal types, and calling the single
> trim function. Instead of making a pile of 'if's about argc inside
> the current implementation.
Done. But now I have dublicated pieces of code:

+	const unsigned char *input_str;
+	assert(argc == 1);
+	(void) argc;
+
+	if (sql_value_type(argv[0]) == SQL_NULL) {
+		return;
+	}
+	if ((input_str = sql_value_text(argv[0])) == NULL) {
+		return;
+	}
+
+	int input_str_sz = sql_value_bytes(argv[0]);
+	assert(input_str == sql_value_text(argv[0]));

> 
>> 		FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
>> 		FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR),
>> 		AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,> diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
>> index d2614d9b0..53e5fd932 100644
>> --- a/src/box/sql/parse.y
>> +++ b/src/box/sql/parse.y
>> @@ -937,6 +937,54 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). {
>>   sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0);
>> }
>> %endif  SQL_OMIT_CAST
>> +
>> +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). {
>> +  A.pExpr = sqlExprFunction(pParse, Y, &X);
>> +  spanSet(&A, &X, &E);
>> +}
>> +
>> +%type trim_operands {ExprList*}
> 
> 15. In new code we use 'struct' before each struct
> type, and we put a whitespace before '*'. The same in
> other places.
Done.


>> +    A = sql_expr_list_append(pParse->db, A, Y);
>> +  }
>> +}
>> +
>> +from_clause(A) ::= trim_set(Y) FROM. {
>> +  A = sql_expr_list_append(pParse->db, NULL, Y);
>> +}
>> +
>> +%type trim_set {Expr*}
>> +%destructor trim_set {sql_expr_delete(pParse->db, $$, false);}
>> +
>> +trim_set(A) ::= . {A = 0;}
> 
> 17. The same. Assign NULL, not 0.
+trim_character(A) ::= . { A = NULL; }

> 
>> +trim_set(A) ::= expr(X). {A = X.pExpr;}
>> +
>> +%type trim_specification {int}
>> +
>> +trim_specification(A) ::= LEADING.  {A = 1;}
>> +trim_specification(A) ::= TRAILING. {A = 2;}
>> +trim_specification(A) ::= BOTH.     {A = 3;}
> 
> 18. Why is the grammar so complex? In the standard its
> definition takes 12 lines.
Because I haven’t found another ways to implement grammar, that will be able to be
compiled.
> In your grammar you've allowed this: TRIM(FROM str).
> But it is prohibited by the standard, and leads to an
> assertion:
Now I prohibit that.
> 19. I've refactored the grammar a bit, but it can't be compiled. My
> diff is below. Probably it can help.
+test:do_catchsql_test(
+    "func-22.38",
+    [[
+        SELECT TRIM(FROM 'xyxzy');
+    ]], {
+        -- <func-22.38>
+        1, "Syntax error near 'FROM'"
+        -- </func-22.38>
+    })
+

> 20. 3879 was not about 'BOTH' only. Please, describe the
> issue in more details, and test the whole grammar. As the
> assertion fail above shows, you didn’t.
+-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and FROM without
+-- any agrs before. LEADING and TRAILING keywords is checked above.

commit f8b3475d9c5f4f479c2ee1709c78a16e1f02aec9
Author: Roman Khabibov <roman.habibov at tarantool.org>
Date:   Thu Mar 28 14:01:33 2019 +0300

    sql: modify TRIM() function signature
    
    According to the ANSI standard, ltrim, rtrim and trim should
    be merged into one unified TRIM() function. The specialization of
    trimming (left, right or both and trimming characters) determined
    in arguments of this function.
    
    Closes #3879

diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c
index be7bd5545..76e3265e7 100644
--- a/extra/mkkeywordhash.c
+++ b/extra/mkkeywordhash.c
@@ -278,6 +278,10 @@ static Keyword aKeywordTable[] = {
   { "WHILE",                  "TK_STANDARD",    RESERVED,         true  },
   { "TEXT",                   "TK_TEXT",        RESERVED,         true  },
   { "TRUNCATE",               "TK_TRUNCATE",    ALWAYS,           true  },
+  { "TRIM",                   "TK_TRIM",        ALWAYS,           true  },
+  { "LEADING",                "TK_LEADING",     ALWAYS,           true  },
+  { "TRAILING",               "TK_TRAILING",    ALWAYS,           true  },
+  { "BOTH",                   "TK_BOTH",        ALWAYS,           true  },
 };
 
 /* Number of keywords */
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index abeecefa1..bf7e7a652 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1286,108 +1286,223 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
 	sql_result_text(context, (char *)zOut, j, sql_free);
 }
 
-/*
- * Implementation of the TRIM(), LTRIM(), and RTRIM() functions.
- * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both.
+enum trim_specification {
+	LEADING = 1,
+	TRAILING = 2,
+	BOTH = 3
+};
+
+/**
+ * Remove chars included into @a collation from @a input_str.
+ * @param context SQL context.
+ * @param flags Trim specification: left, right or both.
+ * @param collation Character set.
+ * @param coll_sz Character set size in bytes.
+ * @param input_str Input string for trimming.
+ * @param input_str_sz Input string size in bytes.
  */
 static void
-trimFunc(sql_context * context, int argc, sql_value ** argv)
+trim_procedure(sql_context * context, enum trim_specification flags,
+	     const unsigned char *collation, int coll_sz,
+	     const unsigned char *input_str, int input_str_sz)
 {
-	const unsigned char *zIn;	/* Input string */
-	const unsigned char *zCharSet;	/* Set of characters to trim */
-	int nIn;		/* Number of bytes in input */
-	int flags;		/* 1: trimleft  2: trimright  3: trim */
-	int i;			/* Loop counter */
-	unsigned char *aLen = 0;	/* Length of each character in zCharSet */
-	unsigned char **azChar = 0;	/* Individual characters in zCharSet */
-	int nChar;		/* Number of characters in zCharSet */
+	int i;
+	/* 
+	 * Length of each character in collation.
+	 */
+	unsigned char *aLen = 0;
+	/*
+	 * Individual characters in collation.
+	 */
+	unsigned char **azChar = 0;
+	/* 
+	 * Number of characters in collation.
+	 */
+	int nChar;
 
-	if (sql_value_type(argv[0]) == SQL_NULL) {
-		return;
-	}
-	zIn = sql_value_text(argv[0]);
-	if (zIn == 0)
-		return;
-	nIn = sql_value_bytes(argv[0]);
-	assert(zIn == sql_value_text(argv[0]));
-	if (argc == 1) {
-		static const unsigned char lenOne[] = { 1 };
-		static unsigned char *const azOne[] = { (u8 *) " " };
-		nChar = 1;
-		aLen = (u8 *) lenOne;
-		azChar = (unsigned char **)azOne;
-		zCharSet = 0;
-	} else if ((zCharSet = sql_value_text(argv[1])) == 0) {
-		return;
-	} else {
-		const unsigned char *z = zCharSet;
-		int trim_set_sz = sql_value_bytes(argv[1]);
-		/*
-		* Count the number of UTF-8 characters passing
-		* through the entire char set, but not up
-		* to the '\0' or X'00' character. This allows
-		* to handle trimming set containing such
-		* characters.
-		*/
-		nChar = sql_utf8_char_count(z, trim_set_sz);
-		if (nChar > 0) {
-			azChar =
-			    contextMalloc(context,
-					  ((i64) nChar) * (sizeof(char *) + 1));
-			if (azChar == 0) {
-				return;
-			}
-			aLen = (unsigned char *)&azChar[nChar];
-			z = zCharSet;
-			i = 0;
-			nChar = 0;
-			int handled_bytes_cnt = trim_set_sz;
-			while(handled_bytes_cnt > 0) {
-				azChar[nChar] = (unsigned char *)(z + i);
-				SQL_UTF8_FWD_1(z, i, trim_set_sz);
-				aLen[nChar] = (u8) (z + i - azChar[nChar]);
-				handled_bytes_cnt -= aLen[nChar];
-				nChar++;
-			}
+	const unsigned char *z = collation;
+	/*
+	 * Count the number of UTF-8 characters passing
+	 * through the entire char set, but not up
+	 * to the '\0' or X'00' character. This allows
+	 * to handle trimming set containing such
+	 * characters.
+	 */
+	nChar = sql_utf8_char_count(z, coll_sz);
+	if (nChar > 0) {
+		azChar =
+		    contextMalloc(context,
+				  ((i64) nChar) * (sizeof(char *) + 1));
+		if (azChar == 0) {
+			return;
+		}
+		aLen = (unsigned char *)&azChar[nChar];
+		z = collation;
+		i = 0;
+		nChar = 0;
+		int handled_bytes_cnt = coll_sz;
+		while(handled_bytes_cnt > 0) {
+			azChar[nChar] = (unsigned char *)(z + i);
+			SQL_UTF8_FWD_1(z, i, coll_sz);
+			aLen[nChar] = (u8) (z + i - azChar[nChar]);
+			handled_bytes_cnt -= aLen[nChar];
+			nChar++;
 		}
 	}
 	if (nChar > 0) {
-		flags = SQL_PTR_TO_INT(sql_user_data(context));
-		if (flags & 1) {
-			while (nIn > 0) {
+		if ((flags & 1) != 0) {
+			while (input_str_sz > 0) {
 				int len = 0;
 				for (i = 0; i < nChar; i++) {
 					len = aLen[i];
-					if (len <= nIn
-					    && memcmp(zIn, azChar[i], len) == 0)
+					if (len <= input_str_sz
+					    && memcmp(input_str,
+						      azChar[i], len) == 0)
 						break;
 				}
 				if (i >= nChar)
 					break;
-				zIn += len;
-				nIn -= len;
+				input_str += len;
+				input_str_sz -= len;
 			}
 		}
-		if (flags & 2) {
-			while (nIn > 0) {
+		if ((flags & 2) != 0) {
+			while (input_str_sz > 0) {
 				int len = 0;
 				for (i = 0; i < nChar; i++) {
 					len = aLen[i];
-					if (len <= nIn
-					    && memcmp(&zIn[nIn - len],
+					if (len <= input_str_sz
+					    && memcmp(&input_str[input_str_sz - len],
 						      azChar[i], len) == 0)
 						break;
 				}
 				if (i >= nChar)
 					break;
-				nIn -= len;
+				input_str_sz -= len;
 			}
 		}
-		if (zCharSet) {
+		if (collation) {
 			sql_free(azChar);
 		}
 	}
-	sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT);
+	sql_result_text(context, (char *)input_str,input_str_sz,
+			SQL_TRANSIENT);
+}
+
+/**
+ * Normalize args from @a argv input array when it has one arg only.
+ * 
+ * Case: TRIM(<str>)
+ * Call trimming procedure with BOTH as the flags and " " as the collation.
+ *
+ * @param context SQL context.
+ * @param argc Number of args.
+ * @param argv Args array.
+ */
+static void
+trim_func_one_arg(sql_context * context, int argc, sql_value **argv)
+{
+	const unsigned char *input_str;
+	assert(argc == 1);
+	(void) argc;
+
+	if (sql_value_type(argv[0]) == SQL_NULL) {
+		return;
+	}
+	if ((input_str = sql_value_text(argv[0])) == NULL) {
+		return;
+	}
+
+	int input_str_sz = sql_value_bytes(argv[0]);
+	assert(input_str == sql_value_text(argv[0]));
+
+	trim_procedure(context, BOTH, (const unsigned char *) " ",
+		       1, input_str, input_str_sz);
+}
+
+/**
+ * Normalize args from @a argv input array when it has two args.
+ * 
+ * Case: TRIM(<trim_collation> FROM <str>)
+ * If user has specified <trim_collation> only, call trimming procedure with
+ * BOTH as the flags and that collation.
+ *
+ * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>)
+ * If user has specified side keyword only, call trimming procedure
+ * with the specified side and " " as the collation.
+ *
+ * @param context SQL context.
+ * @param argc Number of args.
+ * @param argv Args array.
+ */
+static void
+trim_func_two_args(sql_context * context, int argc, sql_value **argv)
+{
+	const unsigned char *input_str;
+	assert(argc == 2);
+	(void) argc;
+
+	if (sql_value_type(argv[1]) == SQL_NULL) {
+		return;
+	}
+	if ((input_str = sql_value_text(argv[1])) == NULL) {
+		return;
+	}
+
+	int input_str_sz = sql_value_bytes(argv[1]);
+	assert(input_str == sql_value_text(argv[1]));
+
+	const unsigned char *collation;
+	if (sql_value_type(argv[0]) == SQL_INTEGER) {
+		trim_procedure(context, sql_value_int(argv[0]),
+			       (const unsigned char *) " ", 1,
+			       input_str, input_str_sz);
+	} else if ((collation = sql_value_text(argv[0])) == NULL) {
+		return;
+	} else {
+		int coll_sz = sql_value_bytes(argv[0]);
+		trim_procedure(context, BOTH, collation, coll_sz, input_str,
+			       input_str_sz);
+	}
+}
+
+/**
+ * Normalize args from @a argv input array when it has three args.
+ *
+ * Case: TRIM(LEADING/TRAILING/BOTH <trim_collation> FROM <str>)
+ * User has specified side keyword and <trim_collation>, call trimming
+ * procedure with that args.
+ *
+ * @param context SQL context.
+ * @param argc Number of args.
+ * @param argv Args array.
+ */
+static void
+trim_func_three_args(sql_context * context, int argc, sql_value **argv)
+{
+	const unsigned char *input_str;
+	assert(argc == 3);
+	(void) argc;
+
+	if (sql_value_type(argv[2]) == SQL_NULL) {
+		return;
+	}
+	if ((input_str = sql_value_text(argv[2])) == NULL) {
+		return;
+	}
+
+	int input_str_sz = sql_value_bytes(argv[2]);
+	assert(input_str == sql_value_text(argv[2]));
+
+	const unsigned char *collation;
+	assert(sql_value_type(argv[0]) == SQL_INTEGER);
+	if ((collation = sql_value_text(argv[1])) != 0) {
+		int coll_sz = sql_value_bytes(argv[1]);
+		trim_procedure(context, sql_value_int(argv[0]), collation,
+			       coll_sz, input_str, input_str_sz);
+	} else {
+		return;
+	}
 }
 
 #ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION
@@ -1818,12 +1933,9 @@ sqlRegisterBuiltinFunctions(void)
 			  FIELD_TYPE_INTEGER),
 		FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY,
 			  FIELD_TYPE_INTEGER),
-		FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc),
-		FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc),
-		FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc),
-		FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc),
-		FUNCTION_COLL(trim, 1, 3, 0, trimFunc),
-		FUNCTION_COLL(trim, 2, 3, 0, trimFunc),
+		FUNCTION_COLL(trim, 1, 3, 0, trim_func_one_arg),
+		FUNCTION_COLL(trim, 2, 3, 0, trim_func_two_args),
+		FUNCTION_COLL(trim, 3, 3, 0, trim_func_three_args),
 		FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
 		FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR),
 		AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index 099daf512..985d33605 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -1032,6 +1032,51 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). {
   sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0);
 }
 %endif  SQL_OMIT_CAST
+
+expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). {
+    A.pExpr = sqlExprFunction(pParse, Y, &X);
+    spanSet(&A, &X, &E);
+  }
+
+%type trim_operands {struct ExprList *}
+%destructor trim_operands { sql_expr_list_delete(pParse->db, $$); }
+
+trim_operands(A) ::= trim_from_clause(F) expr(Y). {
+    A = sql_expr_list_append(pParse->db, F, Y.pExpr);
+}
+
+trim_operands(A) ::= expr(Y). {
+    A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
+}
+
+%type trim_from_clause {struct ExprList *}
+%destructor trim_from_clause { sql_expr_list_delete(pParse->db, $$); }
+
+trim_from_clause(A) ::= expr(Y) FROM. {
+    A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
+}
+
+trim_from_clause(A) ::= trim_specification(N) trim_character(Y) FROM. {
+  struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER,
+                                         &sqlIntTokens[N]);
+  A = sql_expr_list_append(pParse->db, NULL, p);
+  if (Y != NULL) {
+    A = sql_expr_list_append(pParse->db, A, Y);
+  }
+}
+
+%type trim_character {struct Expr *}
+%destructor trim_character {sql_expr_delete(pParse->db, $$, false);}
+
+trim_character(A) ::= . { A = NULL; }
+trim_character(A) ::= expr(X). { A = X.pExpr; }
+
+%type trim_specification {int}
+
+trim_specification(A) ::= LEADING.  {A = 1;}
+trim_specification(A) ::= TRAILING. {A = 2;}
+trim_specification(A) ::= BOTH.     {A = 3;}
+
 expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). {
   if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){
     const char *err =
diff --git a/src/box/sql/parse_def.c b/src/box/sql/parse_def.c
index 49c76a326..aa1323cb2 100644
--- a/src/box/sql/parse_def.c
+++ b/src/box/sql/parse_def.c
@@ -34,7 +34,9 @@
 
 const struct Token sqlIntTokens[] = {
 	{"0", 1, false},
-	{"1", 1, false}
+	{"1", 1, false},
+	{"2", 1, false},
+	{"3", 1, false},
 };
 
 void
diff --git a/src/box/sql/parse_def.h b/src/box/sql/parse_def.h
index a1af2bacd..5899a7e4e 100644
--- a/src/box/sql/parse_def.h
+++ b/src/box/sql/parse_def.h
@@ -87,7 +87,7 @@ struct Token {
 	bool isReserved;
 };
 
-/** Constant tokens for values 0 and 1. */
+/** Constant tokens for integer values. */
 extern const struct Token sqlIntTokens[];
 
 /** Generate a Token object from a string. */
diff --git a/test/sql-tap/badutf1.test.lua b/test/sql-tap/badutf1.test.lua
index d104efaa9..d32bafae0 100755
--- a/test/sql-tap/badutf1.test.lua
+++ b/test/sql-tap/badutf1.test.lua
@@ -302,7 +302,7 @@ test:do_test(
 test:do_test(
     "badutf-4.1",
     function()
-        return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x")
+        return test:execsql2("SELECT hex(trim('\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x")
     end, {
         -- <badutf-4.1>
         "X", "F0"
@@ -312,7 +312,7 @@ test:do_test(
 test:do_test(
     "badutf-4.2",
     function()
-        return test:execsql2("SELECT hex(ltrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x")
+        return test:execsql2("SELECT hex(trim(LEADING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x")
     end, {
         -- <badutf-4.2>
         "X", "F0808080FF"
@@ -322,7 +322,7 @@ test:do_test(
 test:do_test(
     "badutf-4.3",
     function()
-        return test:execsql2("SELECT hex(rtrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x")
+        return test:execsql2("SELECT hex(trim(TRAILING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x")
     end, {
         -- <badutf-4.3>
         "X", "808080F0"
@@ -332,7 +332,7 @@ test:do_test(
 test:do_test(
     "badutf-4.4",
     function()
-        return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x")
+        return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x")
     end, {
         -- <badutf-4.4>
         "X", "808080F0808080FF"
@@ -342,7 +342,7 @@ test:do_test(
 test:do_test(
     "badutf-4.5",
     function()
-        return test:execsql2("SELECT hex(trim('\xff\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x")
+        return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\x80\xf0\x80\x80\x80\xff')) AS x")
     end, {
         -- <badutf-4.5>
         "X", "80F0808080FF"
@@ -352,7 +352,7 @@ test:do_test(
 test:do_test(
     "badutf-4.6",
     function()
-        return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x")
+        return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x")
     end, {
         -- <badutf-4.6>
         "X", "F0808080FF"
@@ -362,7 +362,7 @@ test:do_test(
 test:do_test(
     "badutf-4.7",
     function()
-        return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80\x80')) AS x")
+        return test:execsql2("SELECT hex(trim('\xff\x80\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x")
     end, {
         -- <badutf-4.7>
         "X", "FF80F0808080FF"
diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
index 251cc3534..8fe04fab1 100755
--- a/test/sql-tap/func.test.lua
+++ b/test/sql-tap/func.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(14586)
+test:plan(14590)
 
 --!./tcltestrunner.lua
 -- 2001 September 15
@@ -1912,37 +1912,37 @@ test:do_test(
 test:do_catchsql_test(
     "func-22.1",
     [[
-        SELECT trim(1,2,3)
+        SELECT TRIM(1,2,3)
     ]], {
         -- <func-22.1>
-        1, "wrong number of arguments to function TRIM()"
+        1, "Syntax error near ','"
         -- </func-22.1>
     })
 
 test:do_catchsql_test(
     "func-22.2",
     [[
-        SELECT ltrim(1,2,3)
+        SELECT LTRIM(1,2,3)
     ]], {
         -- <func-22.2>
-        1, "wrong number of arguments to function LTRIM()"
+        1, "Function 'LTRIM' does not exist"
         -- </func-22.2>
     })
 
 test:do_catchsql_test(
     "func-22.3",
     [[
-        SELECT rtrim(1,2,3)
+        SELECT RTRIM(1,2,3)
     ]], {
         -- <func-22.3>
-        1, "wrong number of arguments to function RTRIM()"
+        1, "Function 'RTRIM' does not exist"
         -- </func-22.3>
     })
 
 test:do_execsql_test(
     "func-22.4",
     [[
-        SELECT trim('  hi  ');
+        SELECT TRIM('  hi  ');
     ]], {
         -- <func-22.4>
         "hi"
@@ -1952,7 +1952,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.5",
     [[
-        SELECT ltrim('  hi  ');
+        SELECT TRIM(LEADING FROM '  hi  ');
     ]], {
         -- <func-22.5>
         "hi  "
@@ -1962,7 +1962,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.6",
     [[
-        SELECT rtrim('  hi  ');
+        SELECT TRIM(TRAILING FROM '  hi  ');
     ]], {
         -- <func-22.6>
         "  hi"
@@ -1972,7 +1972,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.7",
     [[
-        SELECT trim('  hi  ','xyz');
+        SELECT TRIM('xyz' FROM '  hi  ');
     ]], {
         -- <func-22.7>
         "  hi  "
@@ -1982,7 +1982,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.8",
     [[
-        SELECT ltrim('  hi  ','xyz');
+        SELECT TRIM(LEADING 'xyz' FROM '  hi  ');
     ]], {
         -- <func-22.8>
         "  hi  "
@@ -1992,7 +1992,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.9",
     [[
-        SELECT rtrim('  hi  ','xyz');
+        SELECT TRIM(TRAILING 'xyz' FROM '  hi  ');
     ]], {
         -- <func-22.9>
         "  hi  "
@@ -2002,7 +2002,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.10",
     [[
-        SELECT trim('xyxzy  hi  zzzy','xyz');
+        SELECT TRIM('xyz' FROM 'xyxzy  hi  zzzy');
     ]], {
         -- <func-22.10>
         "  hi  "
@@ -2012,7 +2012,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.11",
     [[
-        SELECT ltrim('xyxzy  hi  zzzy','xyz');
+        SELECT TRIM(LEADING 'xyz' FROM 'xyxzy  hi  zzzy');
     ]], {
         -- <func-22.11>
         "  hi  zzzy"
@@ -2022,7 +2022,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.12",
     [[
-        SELECT rtrim('xyxzy  hi  zzzy','xyz');
+        SELECT TRIM(TRAILING 'xyz' FROM 'xyxzy  hi  zzzy');
     ]], {
         -- <func-22.12>
         "xyxzy  hi  "
@@ -2032,7 +2032,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.13",
     [[
-        SELECT trim('  hi  ','');
+        SELECT TRIM('' FROM '  hi  ');
     ]], {
         -- <func-22.13>
         "  hi  "
@@ -2043,7 +2043,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.14",
     [[
-        SELECT hex(trim(x'c280e1bfbff48fbfbf6869',x'6162e1bfbfc280'))
+        SELECT hex(TRIM(x'6162e1bfbfc280' FROM x'c280e1bfbff48fbfbf6869'))
     ]], {
         -- <func-22.14>
         "F48FBFBF6869"
@@ -2052,8 +2052,8 @@ test:do_execsql_test(
 
 test:do_execsql_test(
     "func-22.15",
-    [[SELECT hex(trim(x'6869c280e1bfbff48fbfbf61',
-                         x'6162e1bfbfc280f48fbfbf'))]], {
+    [[SELECT hex(TRIM(x'6162e1bfbfc280f48fbfbf'
+                      FROM x'6869c280e1bfbff48fbfbf61'))]], {
         -- <func-22.15>
         "6869"
         -- </func-22.15>
@@ -2062,7 +2062,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.16",
     [[
-        SELECT hex(trim(x'ceb1ceb2ceb3',x'ceb1'));
+        SELECT hex(TRIM(x'ceb1' FROM x'ceb1ceb2ceb3'));
     ]], {
         -- <func-22.16>
         "CEB2CEB3"
@@ -2073,7 +2073,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.20",
     [[
-        SELECT typeof(trim(NULL));
+        SELECT typeof(TRIM(NULL));
     ]], {
         -- <func-22.20>
         "null"
@@ -2083,7 +2083,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.21",
     [[
-        SELECT typeof(trim(NULL,'xyz'));
+        SELECT typeof(TRIM('xyz' FROM NULL));
     ]], {
         -- <func-22.21>
         "null"
@@ -2093,7 +2093,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.22",
     [[
-        SELECT typeof(trim('hello',NULL));
+        SELECT typeof(TRIM(NULL FROM 'hello'));
     ]], {
         -- <func-22.22>
         "null"
@@ -2105,7 +2105,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.23",
     [[
-        SELECT TRIM(X'004100', X'00');
+        SELECT TRIM(X'00' FROM X'004100');
     ]], {
         -- <func-22.23>
         "A"
@@ -2115,7 +2115,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.24",
     [[
-        SELECT TRIM(X'004100', X'0000');
+        SELECT TRIM(X'0000' FROM X'004100');
     ]], {
         -- <func-22.24>
         "A"
@@ -2125,7 +2125,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.25",
     [[
-        SELECT TRIM(X'004100', X'0042');
+        SELECT TRIM(X'0042' FROM X'004100');
     ]], {
         -- <func-22.25>
         "A"
@@ -2135,7 +2135,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.26",
     [[
-        SELECT TRIM(X'00004100420000', X'00');
+        SELECT TRIM(X'00' FROM X'00004100420000');
     ]], {
         -- <func-22.26>
         "A\0B"
@@ -2145,7 +2145,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.27",
     [[
-        SELECT LTRIM(X'004100', X'00');
+        SELECT TRIM(LEADING X'00' FROM X'004100');
     ]], {
         -- <func-22.27>
         "A\0"
@@ -2155,7 +2155,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.28",
     [[
-        SELECT LTRIM(X'004100', X'0000');
+        SELECT TRIM(LEADING X'0000' FROM X'004100');
     ]], {
         -- <func-22.28>
         "A\0"
@@ -2165,7 +2165,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.29",
     [[
-        SELECT LTRIM(X'004100', X'0042');
+        SELECT TRIM(LEADING X'0042' FROM X'004100');
     ]], {
         -- <func-22.29>
         "A\0"
@@ -2175,7 +2175,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.30",
     [[
-        SELECT LTRIM(X'00004100420000', X'00');
+        SELECT TRIM(LEADING X'00' FROM X'00004100420000');
     ]], {
         -- <func-22.30>
         "A\0B\0\0"
@@ -2185,7 +2185,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.31",
     [[
-        SELECT RTRIM(X'004100', X'00');
+        SELECT TRIM(TRAILING X'00' FROM X'004100');
     ]], {
         -- <func-22.31>
         "\0A"
@@ -2195,7 +2195,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.32",
     [[
-        SELECT RTRIM(X'004100', X'0000');
+        SELECT TRIM(TRAILING X'0000' FROM X'004100');
     ]], {
         -- <func-22.32>
         "\0A"
@@ -2205,7 +2205,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.33",
     [[
-        SELECT RTRIM(X'004100', X'0042');
+        SELECT TRIM(TRAILING X'0042' FROM X'004100');
     ]], {
         -- <func-22.33>
         "\0A"
@@ -2215,13 +2215,55 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func-22.34",
     [[
-        SELECT RTRIM(X'00004100420000', X'00');
+        SELECT TRIM(TRAILING X'00' FROM X'00004100420000');
     ]], {
         -- <func-22.34>
         "\0\0A\0B"
         -- </func-22.34>
     })
 
+-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and FROM without
+-- any agrs before. LEADING and TRAILING keywords is checked above.
+
+test:do_execsql_test(
+    "func-22.35",
+    [[
+        SELECT TRIM(BOTH FROM '  hi  ');
+    ]], {
+        -- <func-22.35>
+        "hi"
+        -- </func-22.35>
+    })
+test:do_execsql_test(
+    "func-22.36",
+    [[
+        SELECT TRIM(BOTH 'xyz' FROM '  hi  ');
+    ]], {
+        -- <func-22.36>
+        "  hi  "
+        -- </func-22.36>
+    })
+
+test:do_execsql_test(
+    "func-22.37",
+    [[
+        SELECT TRIM(BOTH 'xyz' FROM 'xyxzy  hi  zzzy');
+    ]], {
+        -- <func-22.37>
+        "  hi  "
+        -- </func-22.37>
+    })
+
+test:do_catchsql_test(
+    "func-22.38",
+    [[
+        SELECT TRIM(FROM 'xyxzy');
+    ]], {
+        -- <func-22.38>
+        1, "Syntax error near 'FROM'"
+        -- </func-22.38>
+    })
+
 -- This is to test the deprecated sql_aggregate_count() API.
 --
 --test:do_test(
@@ -2838,16 +2880,16 @@ test:do_execsql_test(
     "SELECT TRIM(CHAR(32,00,32,00,32));",
     {string.char(00,32,00)})
 
--- LTRIM
+-- LEFT TRIM
 test:do_execsql_test(
     "func-70",
-    "SELECT LTRIM(CHAR(32,00,32,00,32));",
+    "SELECT TRIM(LEADING FROM CHAR(32,00,32,00,32));",
     {string.char(00,32,00,32)})
 
--- RTRIM
+-- RIGHT TRIM
 test:do_execsql_test(
     "func-71",
-    "SELECT RTRIM(CHAR(32,00,32,00,32));",
+    "SELECT TRIM(TRAILING FROM CHAR(32,00,32,00,32));",
     {string.char(32,00,32,00)})
 
 -- GROUP_CONCAT
diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua
index 495aa4ee4..19953e434 100755
--- a/test/sql-tap/with1.test.lua
+++ b/test/sql-tap/with1.test.lua
@@ -550,7 +550,7 @@ test:do_execsql_test("8.1-mandelbrot", [[
       SELECT group_concat( substr(' .+*#', 1+min(iter/7,4), 1), '') 
       FROM m2 GROUP BY cy
     )
-  SELECT group_concat(rtrim(t),x'0a') FROM a;
+  SELECT group_concat(trim(TRAILING FROM t),x'0a') FROM a;
 ]], {
   -- <8.1-mandelbrot>
   [[                                    ....#







More information about the Tarantool-patches mailing list