[tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sun Apr 14 21:01:14 MSK 2019


Hi! Thanks for the patch! See 20 comments below.

1. Please, do all the issues on the master branch.
We cherry-pick on other branches from the master.

2. Use the newest version of the branch. Your is outdated
on more than 2 weeks.

On 11/04/2019 20:33, Roman Khabibov wrote:
> According to the ANSI standart, ltrim, rtrim and trim should

3. Use a spell checker. Sublime text editor has it too.

standart -> standard

> be merged into one unified TRIM() function. The specialization of
> trimming (left, right or both and trimming charcters) determined

charcters -> characters

> in arguments of this function.
> 
> Closes #3879
> ---
> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3879-trim
> Issue: https://github.com/tarantool/tarantool/issues/3879
> 
>  extra/mkkeywordhash.c         |   5 ++
>  src/box/sql/func.c            |  46 ++++++++------
>  src/box/sql/global.c          |   6 +-
>  src/box/sql/parse.y           |  48 +++++++++++++++
>  test/sql-tap/badutf1.test.lua |  14 ++---
>  test/sql-tap/func.test.lua    | 111 ++++++++++++++++++++++------------
>  test/sql-tap/with1.test.lua   |   2 +-
>  7 files changed, 165 insertions(+), 67 deletions(-)
> 
> diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c
> index be7bd5545..94a768323 100644
> --- a/extra/mkkeywordhash.c
> +++ b/extra/mkkeywordhash.c
> @@ -91,6 +91,7 @@ struct Keyword {
>  #  define CTE        0x00040000
>  #endif
>  #  define RESERVED   0x00000001
> +#  define FUNCTION   0x00080000

4. These fields are stored in struct Keyword.mask field, which
is never used for anything more complex than 'mask == 0' check.
And I do not see a reason why do you need here anything but
'ALWAYS' constant. Also, see the issue:

https://github.com/tarantool/tarantool/issues/4155

> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index a750e52a1..07d3cd25d 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -1207,8 +1207,7 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
>  }
>  
>  /*
> - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions.
> - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both.
> + * Implementation of the TRIM() function.

5. Such a comment is useless. Please, write a normal comment. Especially
on which arguments this function expects, of which type, and what are
possible combinations of the arguments.

>   */
>  static void
>  trimFunc(sql_context * context, int argc, sql_value ** argv)
> @@ -1216,32 +1215,49 @@ trimFunc(sql_context * context, int argc, sql_value ** argv)
>  	const unsigned char *zIn;	/* Input string */
>  	const unsigned char *zCharSet;	/* Set of characters to trim */
>  	int nIn;		/* Number of bytes in input */
> -	int flags;		/* 1: trimleft  2: trimright  3: trim */
>  	int i;			/* Loop counter */
>  	unsigned char *aLen = 0;	/* Length of each character in zCharSet */
>  	unsigned char **azChar = 0;	/* Individual characters in zCharSet */
>  	int nChar;		/* Number of characters in zCharSet */
> +	/* The index of trim source in the argv array.*/
> +	int source_index = argc - 1;

6. Why could not you leave trim source in argv[0]? Why have you
moved it to the end of the arg list? It causes most of the diff
in that function.

> +	/* True if character set has been passed, false if has't been. */
> +	bool set = true;

7. Just give this variable a normal name, and you will not
need this comment. Also, we usually use a term 'collation' instead
of 'character set'.

> +	/* 1: if it's left side.
> +	 * 2: if it's right side.
> +	 * 3: if it's both sides. */
> +	int trim_side = 3;

8. Please, create a enum with normal names for these constants.

9. We do not use these comment style. Please, put first '/*' and
final '*/' on dedicated lines.

> +
> +	/* If we have 2 agrs, the first can be trimiing side or character set.
> +	 * If we have 3 agrs, the first can be triiming side only, i.e. number. */

10. Two errors in one word: 'trimiing', 'triiming'. And it should be
written on the function itself, not inside it.

> +	if (argc == 2 && sql_value_type(argv[0]) == SQL_INTEGER) {
> +		trim_side = sql_value_int(argv[0]);
> +		set = false;
> +	} else if (argc == 3) {
> +		trim_side = sql_value_int(argv[0]);
> +	}
>  
> -	if (sql_value_type(argv[0]) == SQL_NULL) {
> +	if (sql_value_type(argv[source_index]) == SQL_NULL) {
>  		return;
>  	}
> -	zIn = sql_value_text(argv[0]);
> +
> +	zIn = sql_value_text(argv[source_index]);
>  	if (zIn == 0)
>  		return;
> -	nIn = sql_value_bytes(argv[0]);
> -	assert(zIn == sql_value_text(argv[0]));
> -	if (argc == 1) {
> +	nIn = sql_value_bytes(argv[source_index]);
> +	assert(zIn == sql_value_text(argv[source_index]));
> +	if (source_index == 0 || set == false ) {

11. For boolean variables we use '!' to check if they are false.

>  		static const unsigned char lenOne[] = { 1 };
>  		static unsigned char *const azOne[] = { (u8 *) " " };
>  		nChar = 1;
>  		aLen = (u8 *) lenOne;
>  		azChar = (unsigned char **)azOne;
>  		zCharSet = 0;
> -	} else if ((zCharSet = sql_value_text(argv[1])) == 0) {
> +	} else if ((zCharSet = sql_value_text(argv[source_index - 1])) == 0) {

12. Pointers should be compared with NULL, not 0.

>  		return;
>  	} else {
>  		const unsigned char *z = zCharSet;
> -		int trim_set_sz = sql_value_bytes(argv[1]);
> +		int trim_set_sz = sql_value_bytes(argv[source_index - 1]);
>  		/*
>  		* Count the number of UTF-8 characters passing
>  		* through the entire char set, but not up
> @@ -1272,8 +1288,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv)
>  		}
>  	}
>  	if (nChar > 0) {
> -		flags = SQL_PTR_TO_INT(sql_user_data(context));
> -		if (flags & 1) {
> +		if (trim_side & 1) {

13. When checking flags, use (flag & ...) != 0 instead of an
implicit conversion. In other places too.

>  			while (nIn > 0) {
>  				int len = 0;
>  				for (i = 0; i < nChar; i++) {
> @@ -1288,7 +1303,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv)
>  				nIn -= len;
>  			}
>  		}
> -		if (flags & 2) {
> +		if (trim_side & 2) {
>  			while (nIn > 0) {
>  				int len = 0;
>  				for (i = 0; i < nChar; i++) {
> @@ -1738,12 +1753,9 @@ sqlRegisterBuiltinFunctions(void)
>  			  FIELD_TYPE_INTEGER),
>  		FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY,
>  			  FIELD_TYPE_INTEGER),
> -		FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc),
> -		FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc),
> -		FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc),
> -		FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc),
>  		FUNCTION_COLL(trim, 1, 3, 0, trimFunc),
>  		FUNCTION_COLL(trim, 2, 3, 0, trimFunc),
> +		FUNCTION_COLL(trim, 3, 3, 0, trimFunc),

14. Better write three trim functions taking different number of
args, converting them to normal types, and calling the single
trim function. Instead of making a pile of 'if's about argc inside
the current implementation.

>  		FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
>  		FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR),
>  		AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,> diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
> index d2614d9b0..53e5fd932 100644
> --- a/src/box/sql/parse.y
> +++ b/src/box/sql/parse.y
> @@ -937,6 +937,54 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). {
>    sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0);
>  }
>  %endif  SQL_OMIT_CAST
> +
> +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). {
> +  A.pExpr = sqlExprFunction(pParse, Y, &X);
> +  spanSet(&A, &X, &E);
> +}
> +
> +%type trim_operands {ExprList*}

15. In new code we use 'struct' before each struct
type, and we put a whitespace before '*'. The same in
other places.

> +%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);}
> +
> +trim_operands(A) ::= from_clause(F) trim_source(Y). {
> +  A = sql_expr_list_append(pParse->db, F, Y);
> +}
> +trim_operands(A) ::= trim_source(Y). {
> +  A = sql_expr_list_append(pParse->db, NULL, Y);
> +}
> +
> +%type trim_source {Expr*}
> +%destructor trim_source {sql_expr_delete(pParse->db, $$, false);}
> +
> +trim_source(A) ::= expr(X). {A = X.pExpr;}
> +
> +%type from_clause {ExprList*}
> +%destructor from_clause { sql_expr_list_delete(pParse->db, $$); }
> +
> +from_clause(A) ::= trim_specification(N) trim_set(Y) FROM. {
> +  struct Expr* p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1);
> +  A = sql_expr_list_append(pParse->db, NULL, p);
> +  if (Y != 0) {

16. Please, compare with NULL, not 0.

> +    A = sql_expr_list_append(pParse->db, A, Y);
> +  }
> +}
> +
> +from_clause(A) ::= trim_set(Y) FROM. {
> +  A = sql_expr_list_append(pParse->db, NULL, Y);
> +}
> +
> +%type trim_set {Expr*}
> +%destructor trim_set {sql_expr_delete(pParse->db, $$, false);}
> +
> +trim_set(A) ::= . {A = 0;}

17. The same. Assign NULL, not 0.

> +trim_set(A) ::= expr(X). {A = X.pExpr;}
> +
> +%type trim_specification {int}
> +
> +trim_specification(A) ::= LEADING.  {A = 1;}
> +trim_specification(A) ::= TRAILING. {A = 2;}
> +trim_specification(A) ::= BOTH.     {A = 3;}

18. Why is the grammar so complex? In the standard its
definition takes 12 lines.

In your grammar you've allowed this: TRIM(FROM str).
But it is prohibited by the standard, and leads to an
assertion:

tarantool> box.sql.execute('SELECT TRIM(FROM "abc");')
Assertion failed: (pExpr != 0), function sqlExprListFlags, file src/box/sql/expr.c, line 1964.
Process 38832 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = signal SIGABRT
    frame #0: 0x00007fff7aefb23e libsystem_kernel.dylib`__pthread_kill + 10
libsystem_kernel.dylib`__pthread_kill:
->  0x7fff7aefb23e <+10>: jae    0x7fff7aefb248            ; <+20>
    0x7fff7aefb240 <+12>: movq   %rax, %rdi
    0x7fff7aefb243 <+15>: jmp    0x7fff7aef53b7            ; cerror_nocancel
    0x7fff7aefb248 <+20>: retq  

19. I've refactored the grammar a bit, but it can't be compiled. My
diff is below. Probably it can help.

============================================================================

diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index 53e5fd932..42b754cd6 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -938,46 +938,34 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). {
 }
 %endif  SQL_OMIT_CAST
 
-expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). {
-  A.pExpr = sqlExprFunction(pParse, Y, &X);
+expr(A) ::= TRIM(X) LP trim_from_clause(F) expr(Y) RP(E). {
+  struct Expr *argv = sql_expr_list_append(pParse->db, F, Y);
+  A.pExpr = sqlExprFunction(pParse, argv, &X);
   spanSet(&A, &X, &E);
 }
 
-%type trim_operands {ExprList*}
-%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);}
-
-trim_operands(A) ::= from_clause(F) trim_source(Y). {
-  A = sql_expr_list_append(pParse->db, F, Y);
-}
-trim_operands(A) ::= trim_source(Y). {
-  A = sql_expr_list_append(pParse->db, NULL, Y);
-}
+%type trim_from_clause {struct ExprList *}
+%destructor trim_from_clause { sql_expr_list_delete(pParse->db, $$); }
 
-%type trim_source {Expr*}
-%destructor trim_source {sql_expr_delete(pParse->db, $$, false);}
-
-trim_source(A) ::= expr(X). {A = X.pExpr;}
-
-%type from_clause {ExprList*}
-%destructor from_clause { sql_expr_list_delete(pParse->db, $$); }
-
-from_clause(A) ::= trim_specification(N) trim_set(Y) FROM. {
-  struct Expr* p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1);
+trim_from_clause(A) ::= trim_specification(N) trim_character(Y) FROM. {
+  struct Expr *p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1);
   A = sql_expr_list_append(pParse->db, NULL, p);
-  if (Y != 0) {
+  if (Y != NULL) {
     A = sql_expr_list_append(pParse->db, A, Y);
   }
 }
 
-from_clause(A) ::= trim_set(Y) FROM. {
+trim_from_clause(A) ::= trim_character(Y) FROM. {
   A = sql_expr_list_append(pParse->db, NULL, Y);
 }
 
-%type trim_set {Expr*}
-%destructor trim_set {sql_expr_delete(pParse->db, $$, false);}
+trim_from_clause(A) ::= . { A = NULL; }
+
+%type trim_character {struct Expr *}
+%destructor trim_character {sql_expr_delete(pParse->db, $$, false);}
 
-trim_set(A) ::= . {A = 0;}
-trim_set(A) ::= expr(X). {A = X.pExpr;}
+trim_character(A) ::= . { A = NULL; }
+trim_character(A) ::= expr(X). { A = X.pExpr; }
 
 %type trim_specification {int}
 
============================================================================

> diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
> index 889fc5867..d9c96c5bd 100755
> --- a/test/sql-tap/func.test.lua
> +++ b/test/sql-tap/func.test.lua
> @@ -2215,13 +2215,44 @@ test:do_execsql_test(
>  test:do_execsql_test(
>      "func-22.34",
>      [[
> -        SELECT RTRIM(X'00004100420000', X'00');
> +        SELECT TRIM(TRAILING X'00' FROM X'00004100420000');
>      ]], {
>          -- <func-22.34>
>          "\0\0A\0B"
>          -- </func-22.34>
>      })
>  
> +-- gh-3879 Check BOTH.

20. 3879 was not about 'BOTH' only. Please, describe the
issue in more details, and test the whole grammar. As the
assertion fail above shows, you didn't.

> +
> +test:do_execsql_test(
> +    "func-22.35",
> +    [[
> +        SELECT TRIM(BOTH FROM '  hi  ');
> +    ]], {
> +        -- <func-22.35>
> +        "hi"
> +        -- </func-22.35>
> +    })
> +test:do_execsql_test(
> +    "func-22.36",
> +    [[
> +        SELECT TRIM(BOTH 'xyz' FROM '  hi  ');
> +    ]], {
> +        -- <func-22.36>
> +        "  hi  "
> +        -- </func-22.36>
> +    })
> +
> +test:do_execsql_test(
> +    "func-22.37",
> +    [[
> +        SELECT TRIM(BOTH 'xyz' FROM 'xyxzy  hi  zzzy');
> +    ]], {
> +        -- <func-22.37>
> +        "  hi  "
> +        -- </func-22.37>
> +    })
> +




More information about the Tarantool-patches mailing list