[tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Sun Apr 14 21:01:14 MSK 2019
Hi! Thanks for the patch! See 20 comments below.
1. Please, do all the issues on the master branch.
We cherry-pick on other branches from the master.
2. Use the newest version of the branch. Your is outdated
on more than 2 weeks.
On 11/04/2019 20:33, Roman Khabibov wrote:
> According to the ANSI standart, ltrim, rtrim and trim should
3. Use a spell checker. Sublime text editor has it too.
standart -> standard
> be merged into one unified TRIM() function. The specialization of
> trimming (left, right or both and trimming charcters) determined
charcters -> characters
> in arguments of this function.
>
> Closes #3879
> ---
> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3879-trim
> Issue: https://github.com/tarantool/tarantool/issues/3879
>
> extra/mkkeywordhash.c | 5 ++
> src/box/sql/func.c | 46 ++++++++------
> src/box/sql/global.c | 6 +-
> src/box/sql/parse.y | 48 +++++++++++++++
> test/sql-tap/badutf1.test.lua | 14 ++---
> test/sql-tap/func.test.lua | 111 ++++++++++++++++++++++------------
> test/sql-tap/with1.test.lua | 2 +-
> 7 files changed, 165 insertions(+), 67 deletions(-)
>
> diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c
> index be7bd5545..94a768323 100644
> --- a/extra/mkkeywordhash.c
> +++ b/extra/mkkeywordhash.c
> @@ -91,6 +91,7 @@ struct Keyword {
> # define CTE 0x00040000
> #endif
> # define RESERVED 0x00000001
> +# define FUNCTION 0x00080000
4. These fields are stored in struct Keyword.mask field, which
is never used for anything more complex than 'mask == 0' check.
And I do not see a reason why do you need here anything but
'ALWAYS' constant. Also, see the issue:
https://github.com/tarantool/tarantool/issues/4155
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index a750e52a1..07d3cd25d 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -1207,8 +1207,7 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
> }
>
> /*
> - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions.
> - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both.
> + * Implementation of the TRIM() function.
5. Such a comment is useless. Please, write a normal comment. Especially
on which arguments this function expects, of which type, and what are
possible combinations of the arguments.
> */
> static void
> trimFunc(sql_context * context, int argc, sql_value ** argv)
> @@ -1216,32 +1215,49 @@ trimFunc(sql_context * context, int argc, sql_value ** argv)
> const unsigned char *zIn; /* Input string */
> const unsigned char *zCharSet; /* Set of characters to trim */
> int nIn; /* Number of bytes in input */
> - int flags; /* 1: trimleft 2: trimright 3: trim */
> int i; /* Loop counter */
> unsigned char *aLen = 0; /* Length of each character in zCharSet */
> unsigned char **azChar = 0; /* Individual characters in zCharSet */
> int nChar; /* Number of characters in zCharSet */
> + /* The index of trim source in the argv array.*/
> + int source_index = argc - 1;
6. Why could not you leave trim source in argv[0]? Why have you
moved it to the end of the arg list? It causes most of the diff
in that function.
> + /* True if character set has been passed, false if has't been. */
> + bool set = true;
7. Just give this variable a normal name, and you will not
need this comment. Also, we usually use a term 'collation' instead
of 'character set'.
> + /* 1: if it's left side.
> + * 2: if it's right side.
> + * 3: if it's both sides. */
> + int trim_side = 3;
8. Please, create a enum with normal names for these constants.
9. We do not use these comment style. Please, put first '/*' and
final '*/' on dedicated lines.
> +
> + /* If we have 2 agrs, the first can be trimiing side or character set.
> + * If we have 3 agrs, the first can be triiming side only, i.e. number. */
10. Two errors in one word: 'trimiing', 'triiming'. And it should be
written on the function itself, not inside it.
> + if (argc == 2 && sql_value_type(argv[0]) == SQL_INTEGER) {
> + trim_side = sql_value_int(argv[0]);
> + set = false;
> + } else if (argc == 3) {
> + trim_side = sql_value_int(argv[0]);
> + }
>
> - if (sql_value_type(argv[0]) == SQL_NULL) {
> + if (sql_value_type(argv[source_index]) == SQL_NULL) {
> return;
> }
> - zIn = sql_value_text(argv[0]);
> +
> + zIn = sql_value_text(argv[source_index]);
> if (zIn == 0)
> return;
> - nIn = sql_value_bytes(argv[0]);
> - assert(zIn == sql_value_text(argv[0]));
> - if (argc == 1) {
> + nIn = sql_value_bytes(argv[source_index]);
> + assert(zIn == sql_value_text(argv[source_index]));
> + if (source_index == 0 || set == false ) {
11. For boolean variables we use '!' to check if they are false.
> static const unsigned char lenOne[] = { 1 };
> static unsigned char *const azOne[] = { (u8 *) " " };
> nChar = 1;
> aLen = (u8 *) lenOne;
> azChar = (unsigned char **)azOne;
> zCharSet = 0;
> - } else if ((zCharSet = sql_value_text(argv[1])) == 0) {
> + } else if ((zCharSet = sql_value_text(argv[source_index - 1])) == 0) {
12. Pointers should be compared with NULL, not 0.
> return;
> } else {
> const unsigned char *z = zCharSet;
> - int trim_set_sz = sql_value_bytes(argv[1]);
> + int trim_set_sz = sql_value_bytes(argv[source_index - 1]);
> /*
> * Count the number of UTF-8 characters passing
> * through the entire char set, but not up
> @@ -1272,8 +1288,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv)
> }
> }
> if (nChar > 0) {
> - flags = SQL_PTR_TO_INT(sql_user_data(context));
> - if (flags & 1) {
> + if (trim_side & 1) {
13. When checking flags, use (flag & ...) != 0 instead of an
implicit conversion. In other places too.
> while (nIn > 0) {
> int len = 0;
> for (i = 0; i < nChar; i++) {
> @@ -1288,7 +1303,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv)
> nIn -= len;
> }
> }
> - if (flags & 2) {
> + if (trim_side & 2) {
> while (nIn > 0) {
> int len = 0;
> for (i = 0; i < nChar; i++) {
> @@ -1738,12 +1753,9 @@ sqlRegisterBuiltinFunctions(void)
> FIELD_TYPE_INTEGER),
> FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY,
> FIELD_TYPE_INTEGER),
> - FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc),
> - FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc),
> - FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc),
> - FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc),
> FUNCTION_COLL(trim, 1, 3, 0, trimFunc),
> FUNCTION_COLL(trim, 2, 3, 0, trimFunc),
> + FUNCTION_COLL(trim, 3, 3, 0, trimFunc),
14. Better write three trim functions taking different number of
args, converting them to normal types, and calling the single
trim function. Instead of making a pile of 'if's about argc inside
the current implementation.
> FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR),
> FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR),
> AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,> diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
> index d2614d9b0..53e5fd932 100644
> --- a/src/box/sql/parse.y
> +++ b/src/box/sql/parse.y
> @@ -937,6 +937,54 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). {
> sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0);
> }
> %endif SQL_OMIT_CAST
> +
> +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). {
> + A.pExpr = sqlExprFunction(pParse, Y, &X);
> + spanSet(&A, &X, &E);
> +}
> +
> +%type trim_operands {ExprList*}
15. In new code we use 'struct' before each struct
type, and we put a whitespace before '*'. The same in
other places.
> +%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);}
> +
> +trim_operands(A) ::= from_clause(F) trim_source(Y). {
> + A = sql_expr_list_append(pParse->db, F, Y);
> +}
> +trim_operands(A) ::= trim_source(Y). {
> + A = sql_expr_list_append(pParse->db, NULL, Y);
> +}
> +
> +%type trim_source {Expr*}
> +%destructor trim_source {sql_expr_delete(pParse->db, $$, false);}
> +
> +trim_source(A) ::= expr(X). {A = X.pExpr;}
> +
> +%type from_clause {ExprList*}
> +%destructor from_clause { sql_expr_list_delete(pParse->db, $$); }
> +
> +from_clause(A) ::= trim_specification(N) trim_set(Y) FROM. {
> + struct Expr* p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1);
> + A = sql_expr_list_append(pParse->db, NULL, p);
> + if (Y != 0) {
16. Please, compare with NULL, not 0.
> + A = sql_expr_list_append(pParse->db, A, Y);
> + }
> +}
> +
> +from_clause(A) ::= trim_set(Y) FROM. {
> + A = sql_expr_list_append(pParse->db, NULL, Y);
> +}
> +
> +%type trim_set {Expr*}
> +%destructor trim_set {sql_expr_delete(pParse->db, $$, false);}
> +
> +trim_set(A) ::= . {A = 0;}
17. The same. Assign NULL, not 0.
> +trim_set(A) ::= expr(X). {A = X.pExpr;}
> +
> +%type trim_specification {int}
> +
> +trim_specification(A) ::= LEADING. {A = 1;}
> +trim_specification(A) ::= TRAILING. {A = 2;}
> +trim_specification(A) ::= BOTH. {A = 3;}
18. Why is the grammar so complex? In the standard its
definition takes 12 lines.
In your grammar you've allowed this: TRIM(FROM str).
But it is prohibited by the standard, and leads to an
assertion:
tarantool> box.sql.execute('SELECT TRIM(FROM "abc");')
Assertion failed: (pExpr != 0), function sqlExprListFlags, file src/box/sql/expr.c, line 1964.
Process 38832 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = signal SIGABRT
frame #0: 0x00007fff7aefb23e libsystem_kernel.dylib`__pthread_kill + 10
libsystem_kernel.dylib`__pthread_kill:
-> 0x7fff7aefb23e <+10>: jae 0x7fff7aefb248 ; <+20>
0x7fff7aefb240 <+12>: movq %rax, %rdi
0x7fff7aefb243 <+15>: jmp 0x7fff7aef53b7 ; cerror_nocancel
0x7fff7aefb248 <+20>: retq
19. I've refactored the grammar a bit, but it can't be compiled. My
diff is below. Probably it can help.
============================================================================
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index 53e5fd932..42b754cd6 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -938,46 +938,34 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). {
}
%endif SQL_OMIT_CAST
-expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). {
- A.pExpr = sqlExprFunction(pParse, Y, &X);
+expr(A) ::= TRIM(X) LP trim_from_clause(F) expr(Y) RP(E). {
+ struct Expr *argv = sql_expr_list_append(pParse->db, F, Y);
+ A.pExpr = sqlExprFunction(pParse, argv, &X);
spanSet(&A, &X, &E);
}
-%type trim_operands {ExprList*}
-%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);}
-
-trim_operands(A) ::= from_clause(F) trim_source(Y). {
- A = sql_expr_list_append(pParse->db, F, Y);
-}
-trim_operands(A) ::= trim_source(Y). {
- A = sql_expr_list_append(pParse->db, NULL, Y);
-}
+%type trim_from_clause {struct ExprList *}
+%destructor trim_from_clause { sql_expr_list_delete(pParse->db, $$); }
-%type trim_source {Expr*}
-%destructor trim_source {sql_expr_delete(pParse->db, $$, false);}
-
-trim_source(A) ::= expr(X). {A = X.pExpr;}
-
-%type from_clause {ExprList*}
-%destructor from_clause { sql_expr_list_delete(pParse->db, $$); }
-
-from_clause(A) ::= trim_specification(N) trim_set(Y) FROM. {
- struct Expr* p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1);
+trim_from_clause(A) ::= trim_specification(N) trim_character(Y) FROM. {
+ struct Expr *p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1);
A = sql_expr_list_append(pParse->db, NULL, p);
- if (Y != 0) {
+ if (Y != NULL) {
A = sql_expr_list_append(pParse->db, A, Y);
}
}
-from_clause(A) ::= trim_set(Y) FROM. {
+trim_from_clause(A) ::= trim_character(Y) FROM. {
A = sql_expr_list_append(pParse->db, NULL, Y);
}
-%type trim_set {Expr*}
-%destructor trim_set {sql_expr_delete(pParse->db, $$, false);}
+trim_from_clause(A) ::= . { A = NULL; }
+
+%type trim_character {struct Expr *}
+%destructor trim_character {sql_expr_delete(pParse->db, $$, false);}
-trim_set(A) ::= . {A = 0;}
-trim_set(A) ::= expr(X). {A = X.pExpr;}
+trim_character(A) ::= . { A = NULL; }
+trim_character(A) ::= expr(X). { A = X.pExpr; }
%type trim_specification {int}
============================================================================
> diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
> index 889fc5867..d9c96c5bd 100755
> --- a/test/sql-tap/func.test.lua
> +++ b/test/sql-tap/func.test.lua
> @@ -2215,13 +2215,44 @@ test:do_execsql_test(
> test:do_execsql_test(
> "func-22.34",
> [[
> - SELECT RTRIM(X'00004100420000', X'00');
> + SELECT TRIM(TRAILING X'00' FROM X'00004100420000');
> ]], {
> -- <func-22.34>
> "\0\0A\0B"
> -- </func-22.34>
> })
>
> +-- gh-3879 Check BOTH.
20. 3879 was not about 'BOTH' only. Please, describe the
issue in more details, and test the whole grammar. As the
assertion fail above shows, you didn't.
> +
> +test:do_execsql_test(
> + "func-22.35",
> + [[
> + SELECT TRIM(BOTH FROM ' hi ');
> + ]], {
> + -- <func-22.35>
> + "hi"
> + -- </func-22.35>
> + })
> +test:do_execsql_test(
> + "func-22.36",
> + [[
> + SELECT TRIM(BOTH 'xyz' FROM ' hi ');
> + ]], {
> + -- <func-22.36>
> + " hi "
> + -- </func-22.36>
> + })
> +
> +test:do_execsql_test(
> + "func-22.37",
> + [[
> + SELECT TRIM(BOTH 'xyz' FROM 'xyxzy hi zzzy');
> + ]], {
> + -- <func-22.37>
> + " hi "
> + -- </func-22.37>
> + })
> +
More information about the Tarantool-patches
mailing list