* [tarantool-patches] [PATCH] sql: modify TRIM() function signature @ 2019-04-11 17:33 Roman Khabibov 2019-04-14 18:01 ` [tarantool-patches] " Vladislav Shpilevoy 2019-04-23 10:21 ` Kirill Yukhin 0 siblings, 2 replies; 14+ messages in thread From: Roman Khabibov @ 2019-04-11 17:33 UTC (permalink / raw) To: tarantool-patches; +Cc: v.shpilevoy According to the ANSI standart, ltrim, rtrim and trim should be merged into one unified TRIM() function. The specialization of trimming (left, right or both and trimming charcters) determined in arguments of this function. Closes #3879 --- Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3879-trim Issue: https://github.com/tarantool/tarantool/issues/3879 extra/mkkeywordhash.c | 5 ++ src/box/sql/func.c | 46 ++++++++------ src/box/sql/global.c | 6 +- src/box/sql/parse.y | 48 +++++++++++++++ test/sql-tap/badutf1.test.lua | 14 ++--- test/sql-tap/func.test.lua | 111 ++++++++++++++++++++++------------ test/sql-tap/with1.test.lua | 2 +- 7 files changed, 165 insertions(+), 67 deletions(-) diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c index be7bd5545..94a768323 100644 --- a/extra/mkkeywordhash.c +++ b/extra/mkkeywordhash.c @@ -91,6 +91,7 @@ struct Keyword { # define CTE 0x00040000 #endif # define RESERVED 0x00000001 +# define FUNCTION 0x00080000 /* ** These are the keywords */ @@ -202,6 +203,7 @@ static Keyword aKeywordTable[] = { { "TO", "TK_TO", ALWAYS, true }, { "TRANSACTION", "TK_TRANSACTION", ALWAYS, true }, { "TRIGGER", "TK_TRIGGER", TRIGGER, true }, + { "TRIM", "TK_TRIM", FUNCTION, true }, { "UNION", "TK_UNION", COMPOUND, true }, { "UNIQUE", "TK_UNIQUE", ALWAYS, true }, { "UPDATE", "TK_UPDATE", ALWAYS, true }, @@ -278,6 +280,9 @@ static Keyword aKeywordTable[] = { { "WHILE", "TK_STANDARD", RESERVED, true }, { "TEXT", "TK_TEXT", RESERVED, true }, { "TRUNCATE", "TK_TRUNCATE", ALWAYS, true }, + { "LEADING", "TK_LEADING", ALWAYS, true }, + { "TRAILING", "TK_TRAILING", ALWAYS, true }, + { "BOTH", "TK_BOTH", ALWAYS, true }, }; /* Number of keywords */ diff --git a/src/box/sql/func.c b/src/box/sql/func.c index a750e52a1..07d3cd25d 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1207,8 +1207,7 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) } /* - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. + * Implementation of the TRIM() function. */ static void trimFunc(sql_context * context, int argc, sql_value ** argv) @@ -1216,32 +1215,49 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) const unsigned char *zIn; /* Input string */ const unsigned char *zCharSet; /* Set of characters to trim */ int nIn; /* Number of bytes in input */ - int flags; /* 1: trimleft 2: trimright 3: trim */ int i; /* Loop counter */ unsigned char *aLen = 0; /* Length of each character in zCharSet */ unsigned char **azChar = 0; /* Individual characters in zCharSet */ int nChar; /* Number of characters in zCharSet */ + /* The index of trim source in the argv array.*/ + int source_index = argc - 1; + /* True if character set has been passed, false if has't been. */ + bool set = true; + /* 1: if it's left side. + * 2: if it's right side. + * 3: if it's both sides. */ + int trim_side = 3; + + /* If we have 2 agrs, the first can be trimiing side or character set. + * If we have 3 agrs, the first can be triiming side only, i.e. number. */ + if (argc == 2 && sql_value_type(argv[0]) == SQL_INTEGER) { + trim_side = sql_value_int(argv[0]); + set = false; + } else if (argc == 3) { + trim_side = sql_value_int(argv[0]); + } - if (sql_value_type(argv[0]) == SQL_NULL) { + if (sql_value_type(argv[source_index]) == SQL_NULL) { return; } - zIn = sql_value_text(argv[0]); + + zIn = sql_value_text(argv[source_index]); if (zIn == 0) return; - nIn = sql_value_bytes(argv[0]); - assert(zIn == sql_value_text(argv[0])); - if (argc == 1) { + nIn = sql_value_bytes(argv[source_index]); + assert(zIn == sql_value_text(argv[source_index])); + if (source_index == 0 || set == false ) { static const unsigned char lenOne[] = { 1 }; static unsigned char *const azOne[] = { (u8 *) " " }; nChar = 1; aLen = (u8 *) lenOne; azChar = (unsigned char **)azOne; zCharSet = 0; - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { + } else if ((zCharSet = sql_value_text(argv[source_index - 1])) == 0) { return; } else { const unsigned char *z = zCharSet; - int trim_set_sz = sql_value_bytes(argv[1]); + int trim_set_sz = sql_value_bytes(argv[source_index - 1]); /* * Count the number of UTF-8 characters passing * through the entire char set, but not up @@ -1272,8 +1288,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) } } if (nChar > 0) { - flags = SQL_PTR_TO_INT(sql_user_data(context)); - if (flags & 1) { + if (trim_side & 1) { while (nIn > 0) { int len = 0; for (i = 0; i < nChar; i++) { @@ -1288,7 +1303,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) nIn -= len; } } - if (flags & 2) { + if (trim_side & 2) { while (nIn > 0) { int len = 0; for (i = 0; i < nChar; i++) { @@ -1738,12 +1753,9 @@ sqlRegisterBuiltinFunctions(void) FIELD_TYPE_INTEGER), FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY, FIELD_TYPE_INTEGER), - FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc), - FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc), - FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc), - FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc), FUNCTION_COLL(trim, 1, 3, 0, trimFunc), FUNCTION_COLL(trim, 2, 3, 0, trimFunc), + FUNCTION_COLL(trim, 3, 3, 0, trimFunc), FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR), FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR), AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize, diff --git a/src/box/sql/global.c b/src/box/sql/global.c index 95ad71c38..fccf74100 100644 --- a/src/box/sql/global.c +++ b/src/box/sql/global.c @@ -223,11 +223,13 @@ SQL_WSD struct sqlConfig sqlConfig = { FuncDefHash sqlBuiltinFunctions; /* - * Constant tokens for values 0 and 1. + * Constant tokens for necessary integer values. */ const Token sqlIntTokens[] = { {"0", 1, false}, - {"1", 1, false} + {"1", 1, false}, + {"2", 1, false}, + {"3", 1, false} }; /* diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index d2614d9b0..53e5fd932 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -937,6 +937,54 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); } %endif SQL_OMIT_CAST + +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { + A.pExpr = sqlExprFunction(pParse, Y, &X); + spanSet(&A, &X, &E); +} + +%type trim_operands {ExprList*} +%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);} + +trim_operands(A) ::= from_clause(F) trim_source(Y). { + A = sql_expr_list_append(pParse->db, F, Y); +} +trim_operands(A) ::= trim_source(Y). { + A = sql_expr_list_append(pParse->db, NULL, Y); +} + +%type trim_source {Expr*} +%destructor trim_source {sql_expr_delete(pParse->db, $$, false);} + +trim_source(A) ::= expr(X). {A = X.pExpr;} + +%type from_clause {ExprList*} +%destructor from_clause { sql_expr_list_delete(pParse->db, $$); } + +from_clause(A) ::= trim_specification(N) trim_set(Y) FROM. { + struct Expr* p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1); + A = sql_expr_list_append(pParse->db, NULL, p); + if (Y != 0) { + A = sql_expr_list_append(pParse->db, A, Y); + } +} + +from_clause(A) ::= trim_set(Y) FROM. { + A = sql_expr_list_append(pParse->db, NULL, Y); +} + +%type trim_set {Expr*} +%destructor trim_set {sql_expr_delete(pParse->db, $$, false);} + +trim_set(A) ::= . {A = 0;} +trim_set(A) ::= expr(X). {A = X.pExpr;} + +%type trim_specification {int} + +trim_specification(A) ::= LEADING. {A = 1;} +trim_specification(A) ::= TRAILING. {A = 2;} +trim_specification(A) ::= BOTH. {A = 3;} + expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). { if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){ const char *err = diff --git a/test/sql-tap/badutf1.test.lua b/test/sql-tap/badutf1.test.lua index d104efaa9..d32bafae0 100755 --- a/test/sql-tap/badutf1.test.lua +++ b/test/sql-tap/badutf1.test.lua @@ -302,7 +302,7 @@ test:do_test( test:do_test( "badutf-4.1", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim('\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.1> "X", "F0" @@ -312,7 +312,7 @@ test:do_test( test:do_test( "badutf-4.2", function() - return test:execsql2("SELECT hex(ltrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim(LEADING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.2> "X", "F0808080FF" @@ -322,7 +322,7 @@ test:do_test( test:do_test( "badutf-4.3", function() - return test:execsql2("SELECT hex(rtrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim(TRAILING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.3> "X", "808080F0" @@ -332,7 +332,7 @@ test:do_test( test:do_test( "badutf-4.4", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.4> "X", "808080F0808080FF" @@ -342,7 +342,7 @@ test:do_test( test:do_test( "badutf-4.5", function() - return test:execsql2("SELECT hex(trim('\xff\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.5> "X", "80F0808080FF" @@ -352,7 +352,7 @@ test:do_test( test:do_test( "badutf-4.6", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.6> "X", "F0808080FF" @@ -362,7 +362,7 @@ test:do_test( test:do_test( "badutf-4.7", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.7> "X", "FF80F0808080FF" diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua index 889fc5867..d9c96c5bd 100755 --- a/test/sql-tap/func.test.lua +++ b/test/sql-tap/func.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(14586) +test:plan(14589) --!./tcltestrunner.lua -- 2001 September 15 @@ -1912,37 +1912,37 @@ test:do_test( test:do_catchsql_test( "func-22.1", [[ - SELECT trim(1,2,3) + SELECT TRIM(1,2,3) ]], { -- <func-22.1> - 1, "wrong number of arguments to function TRIM()" + 1, "Syntax error near ','" -- </func-22.1> }) test:do_catchsql_test( "func-22.2", [[ - SELECT ltrim(1,2,3) + SELECT LTRIM(1,2,3) ]], { -- <func-22.2> - 1, "wrong number of arguments to function LTRIM()" + 1, "Function 'LTRIM' does not exist" -- </func-22.2> }) test:do_catchsql_test( "func-22.3", [[ - SELECT rtrim(1,2,3) + SELECT RTRIM(1,2,3) ]], { -- <func-22.3> - 1, "wrong number of arguments to function RTRIM()" + 1, "Function 'RTRIM' does not exist" -- </func-22.3> }) test:do_execsql_test( "func-22.4", [[ - SELECT trim(' hi '); + SELECT TRIM(' hi '); ]], { -- <func-22.4> "hi" @@ -1952,7 +1952,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.5", [[ - SELECT ltrim(' hi '); + SELECT TRIM(LEADING FROM ' hi '); ]], { -- <func-22.5> "hi " @@ -1962,7 +1962,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.6", [[ - SELECT rtrim(' hi '); + SELECT TRIM(TRAILING FROM ' hi '); ]], { -- <func-22.6> " hi" @@ -1972,7 +1972,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.7", [[ - SELECT trim(' hi ','xyz'); + SELECT TRIM('xyz' FROM ' hi '); ]], { -- <func-22.7> " hi " @@ -1982,7 +1982,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.8", [[ - SELECT ltrim(' hi ','xyz'); + SELECT TRIM(LEADING 'xyz' FROM ' hi '); ]], { -- <func-22.8> " hi " @@ -1992,7 +1992,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.9", [[ - SELECT rtrim(' hi ','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM ' hi '); ]], { -- <func-22.9> " hi " @@ -2002,7 +2002,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.10", [[ - SELECT trim('xyxzy hi zzzy','xyz'); + SELECT TRIM('xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.10> " hi " @@ -2012,7 +2012,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.11", [[ - SELECT ltrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(LEADING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.11> " hi zzzy" @@ -2022,7 +2022,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.12", [[ - SELECT rtrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.12> "xyxzy hi " @@ -2032,7 +2032,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.13", [[ - SELECT trim(' hi ',''); + SELECT TRIM('' FROM ' hi '); ]], { -- <func-22.13> " hi " @@ -2043,7 +2043,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.14", [[ - SELECT hex(trim(x'c280e1bfbff48fbfbf6869',x'6162e1bfbfc280')) + SELECT hex(TRIM(x'6162e1bfbfc280' FROM x'c280e1bfbff48fbfbf6869')) ]], { -- <func-22.14> "F48FBFBF6869" @@ -2052,8 +2052,8 @@ test:do_execsql_test( test:do_execsql_test( "func-22.15", - [[SELECT hex(trim(x'6869c280e1bfbff48fbfbf61', - x'6162e1bfbfc280f48fbfbf'))]], { + [[SELECT hex(TRIM(x'6162e1bfbfc280f48fbfbf' + FROM x'6869c280e1bfbff48fbfbf61'))]], { -- <func-22.15> "6869" -- </func-22.15> @@ -2062,7 +2062,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.16", [[ - SELECT hex(trim(x'ceb1ceb2ceb3',x'ceb1')); + SELECT hex(TRIM(x'ceb1' FROM x'ceb1ceb2ceb3')); ]], { -- <func-22.16> "CEB2CEB3" @@ -2073,7 +2073,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.20", [[ - SELECT typeof(trim(NULL)); + SELECT typeof(TRIM(NULL)); ]], { -- <func-22.20> "null" @@ -2083,7 +2083,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.21", [[ - SELECT typeof(trim(NULL,'xyz')); + SELECT typeof(TRIM('xyz' FROM NULL)); ]], { -- <func-22.21> "null" @@ -2093,7 +2093,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.22", [[ - SELECT typeof(trim('hello',NULL)); + SELECT typeof(TRIM(NULL FROM 'hello')); ]], { -- <func-22.22> "null" @@ -2105,7 +2105,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.23", [[ - SELECT TRIM(X'004100', X'00'); + SELECT TRIM(X'00' FROM X'004100'); ]], { -- <func-22.23> "A" @@ -2115,7 +2115,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.24", [[ - SELECT TRIM(X'004100', X'0000'); + SELECT TRIM(X'0000' FROM X'004100'); ]], { -- <func-22.24> "A" @@ -2125,7 +2125,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.25", [[ - SELECT TRIM(X'004100', X'0042'); + SELECT TRIM(X'0042' FROM X'004100'); ]], { -- <func-22.25> "A" @@ -2135,7 +2135,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.26", [[ - SELECT TRIM(X'00004100420000', X'00'); + SELECT TRIM(X'00' FROM X'00004100420000'); ]], { -- <func-22.26> "A\0B" @@ -2145,7 +2145,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.27", [[ - SELECT LTRIM(X'004100', X'00'); + SELECT TRIM(LEADING X'00' FROM X'004100'); ]], { -- <func-22.27> "A\0" @@ -2155,7 +2155,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.28", [[ - SELECT LTRIM(X'004100', X'0000'); + SELECT TRIM(LEADING X'0000' FROM X'004100'); ]], { -- <func-22.28> "A\0" @@ -2165,7 +2165,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.29", [[ - SELECT LTRIM(X'004100', X'0042'); + SELECT TRIM(LEADING X'0042' FROM X'004100'); ]], { -- <func-22.29> "A\0" @@ -2175,7 +2175,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.30", [[ - SELECT LTRIM(X'00004100420000', X'00'); + SELECT TRIM(LEADING X'00' FROM X'00004100420000'); ]], { -- <func-22.30> "A\0B\0\0" @@ -2185,7 +2185,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.31", [[ - SELECT RTRIM(X'004100', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'004100'); ]], { -- <func-22.31> "\0A" @@ -2195,7 +2195,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.32", [[ - SELECT RTRIM(X'004100', X'0000'); + SELECT TRIM(TRAILING X'0000' FROM X'004100'); ]], { -- <func-22.32> "\0A" @@ -2205,7 +2205,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.33", [[ - SELECT RTRIM(X'004100', X'0042'); + SELECT TRIM(TRAILING X'0042' FROM X'004100'); ]], { -- <func-22.33> "\0A" @@ -2215,13 +2215,44 @@ test:do_execsql_test( test:do_execsql_test( "func-22.34", [[ - SELECT RTRIM(X'00004100420000', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'00004100420000'); ]], { -- <func-22.34> "\0\0A\0B" -- </func-22.34> }) +-- gh-3879 Check BOTH. + +test:do_execsql_test( + "func-22.35", + [[ + SELECT TRIM(BOTH FROM ' hi '); + ]], { + -- <func-22.35> + "hi" + -- </func-22.35> + }) +test:do_execsql_test( + "func-22.36", + [[ + SELECT TRIM(BOTH 'xyz' FROM ' hi '); + ]], { + -- <func-22.36> + " hi " + -- </func-22.36> + }) + +test:do_execsql_test( + "func-22.37", + [[ + SELECT TRIM(BOTH 'xyz' FROM 'xyxzy hi zzzy'); + ]], { + -- <func-22.37> + " hi " + -- </func-22.37> + }) + -- This is to test the deprecated sql_aggregate_count() API. -- --test:do_test( @@ -2838,16 +2869,16 @@ test:do_execsql_test( "SELECT TRIM(CHAR(32,00,32,00,32));", {string.char(00,32,00)}) --- LTRIM +-- LEFT TRIM test:do_execsql_test( "func-70", - "SELECT LTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(LEADING FROM CHAR(32,00,32,00,32));", {string.char(00,32,00,32)}) --- RTRIM +-- RIGHT TRIM test:do_execsql_test( "func-71", - "SELECT RTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(TRAILING FROM CHAR(32,00,32,00,32));", {string.char(32,00,32,00)}) -- GROUP_CONCAT diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua index f1a169963..5f26f1141 100755 --- a/test/sql-tap/with1.test.lua +++ b/test/sql-tap/with1.test.lua @@ -550,7 +550,7 @@ test:do_execsql_test("8.1-mandelbrot", [[ SELECT group_concat( substr(' .+*#', 1+min(iter/7,4), 1), '') FROM m2 GROUP BY cy ) - SELECT group_concat(rtrim(t),x'0a') FROM a; + SELECT group_concat(trim(TRAILING FROM t),x'0a') FROM a; ]], { -- <8.1-mandelbrot> [[ ....# -- 2.20.1 (Apple Git-117) ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-11 17:33 [tarantool-patches] [PATCH] sql: modify TRIM() function signature Roman Khabibov @ 2019-04-14 18:01 ` Vladislav Shpilevoy 2019-04-16 0:14 ` Roman Khabibov 2019-04-23 10:21 ` Kirill Yukhin 1 sibling, 1 reply; 14+ messages in thread From: Vladislav Shpilevoy @ 2019-04-14 18:01 UTC (permalink / raw) To: tarantool-patches, Roman Khabibov Hi! Thanks for the patch! See 20 comments below. 1. Please, do all the issues on the master branch. We cherry-pick on other branches from the master. 2. Use the newest version of the branch. Your is outdated on more than 2 weeks. On 11/04/2019 20:33, Roman Khabibov wrote: > According to the ANSI standart, ltrim, rtrim and trim should 3. Use a spell checker. Sublime text editor has it too. standart -> standard > be merged into one unified TRIM() function. The specialization of > trimming (left, right or both and trimming charcters) determined charcters -> characters > in arguments of this function. > > Closes #3879 > --- > Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3879-trim > Issue: https://github.com/tarantool/tarantool/issues/3879 > > extra/mkkeywordhash.c | 5 ++ > src/box/sql/func.c | 46 ++++++++------ > src/box/sql/global.c | 6 +- > src/box/sql/parse.y | 48 +++++++++++++++ > test/sql-tap/badutf1.test.lua | 14 ++--- > test/sql-tap/func.test.lua | 111 ++++++++++++++++++++++------------ > test/sql-tap/with1.test.lua | 2 +- > 7 files changed, 165 insertions(+), 67 deletions(-) > > diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c > index be7bd5545..94a768323 100644 > --- a/extra/mkkeywordhash.c > +++ b/extra/mkkeywordhash.c > @@ -91,6 +91,7 @@ struct Keyword { > # define CTE 0x00040000 > #endif > # define RESERVED 0x00000001 > +# define FUNCTION 0x00080000 4. These fields are stored in struct Keyword.mask field, which is never used for anything more complex than 'mask == 0' check. And I do not see a reason why do you need here anything but 'ALWAYS' constant. Also, see the issue: https://github.com/tarantool/tarantool/issues/4155 > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > index a750e52a1..07d3cd25d 100644 > --- a/src/box/sql/func.c > +++ b/src/box/sql/func.c > @@ -1207,8 +1207,7 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) > } > > /* > - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. > - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. > + * Implementation of the TRIM() function. 5. Such a comment is useless. Please, write a normal comment. Especially on which arguments this function expects, of which type, and what are possible combinations of the arguments. > */ > static void > trimFunc(sql_context * context, int argc, sql_value ** argv) > @@ -1216,32 +1215,49 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) > const unsigned char *zIn; /* Input string */ > const unsigned char *zCharSet; /* Set of characters to trim */ > int nIn; /* Number of bytes in input */ > - int flags; /* 1: trimleft 2: trimright 3: trim */ > int i; /* Loop counter */ > unsigned char *aLen = 0; /* Length of each character in zCharSet */ > unsigned char **azChar = 0; /* Individual characters in zCharSet */ > int nChar; /* Number of characters in zCharSet */ > + /* The index of trim source in the argv array.*/ > + int source_index = argc - 1; 6. Why could not you leave trim source in argv[0]? Why have you moved it to the end of the arg list? It causes most of the diff in that function. > + /* True if character set has been passed, false if has't been. */ > + bool set = true; 7. Just give this variable a normal name, and you will not need this comment. Also, we usually use a term 'collation' instead of 'character set'. > + /* 1: if it's left side. > + * 2: if it's right side. > + * 3: if it's both sides. */ > + int trim_side = 3; 8. Please, create a enum with normal names for these constants. 9. We do not use these comment style. Please, put first '/*' and final '*/' on dedicated lines. > + > + /* If we have 2 agrs, the first can be trimiing side or character set. > + * If we have 3 agrs, the first can be triiming side only, i.e. number. */ 10. Two errors in one word: 'trimiing', 'triiming'. And it should be written on the function itself, not inside it. > + if (argc == 2 && sql_value_type(argv[0]) == SQL_INTEGER) { > + trim_side = sql_value_int(argv[0]); > + set = false; > + } else if (argc == 3) { > + trim_side = sql_value_int(argv[0]); > + } > > - if (sql_value_type(argv[0]) == SQL_NULL) { > + if (sql_value_type(argv[source_index]) == SQL_NULL) { > return; > } > - zIn = sql_value_text(argv[0]); > + > + zIn = sql_value_text(argv[source_index]); > if (zIn == 0) > return; > - nIn = sql_value_bytes(argv[0]); > - assert(zIn == sql_value_text(argv[0])); > - if (argc == 1) { > + nIn = sql_value_bytes(argv[source_index]); > + assert(zIn == sql_value_text(argv[source_index])); > + if (source_index == 0 || set == false ) { 11. For boolean variables we use '!' to check if they are false. > static const unsigned char lenOne[] = { 1 }; > static unsigned char *const azOne[] = { (u8 *) " " }; > nChar = 1; > aLen = (u8 *) lenOne; > azChar = (unsigned char **)azOne; > zCharSet = 0; > - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { > + } else if ((zCharSet = sql_value_text(argv[source_index - 1])) == 0) { 12. Pointers should be compared with NULL, not 0. > return; > } else { > const unsigned char *z = zCharSet; > - int trim_set_sz = sql_value_bytes(argv[1]); > + int trim_set_sz = sql_value_bytes(argv[source_index - 1]); > /* > * Count the number of UTF-8 characters passing > * through the entire char set, but not up > @@ -1272,8 +1288,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) > } > } > if (nChar > 0) { > - flags = SQL_PTR_TO_INT(sql_user_data(context)); > - if (flags & 1) { > + if (trim_side & 1) { 13. When checking flags, use (flag & ...) != 0 instead of an implicit conversion. In other places too. > while (nIn > 0) { > int len = 0; > for (i = 0; i < nChar; i++) { > @@ -1288,7 +1303,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) > nIn -= len; > } > } > - if (flags & 2) { > + if (trim_side & 2) { > while (nIn > 0) { > int len = 0; > for (i = 0; i < nChar; i++) { > @@ -1738,12 +1753,9 @@ sqlRegisterBuiltinFunctions(void) > FIELD_TYPE_INTEGER), > FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY, > FIELD_TYPE_INTEGER), > - FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc), > - FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc), > - FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc), > - FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc), > FUNCTION_COLL(trim, 1, 3, 0, trimFunc), > FUNCTION_COLL(trim, 2, 3, 0, trimFunc), > + FUNCTION_COLL(trim, 3, 3, 0, trimFunc), 14. Better write three trim functions taking different number of args, converting them to normal types, and calling the single trim function. Instead of making a pile of 'if's about argc inside the current implementation. > FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR), > FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR), > AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,> diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y > index d2614d9b0..53e5fd932 100644 > --- a/src/box/sql/parse.y > +++ b/src/box/sql/parse.y > @@ -937,6 +937,54 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { > sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); > } > %endif SQL_OMIT_CAST > + > +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { > + A.pExpr = sqlExprFunction(pParse, Y, &X); > + spanSet(&A, &X, &E); > +} > + > +%type trim_operands {ExprList*} 15. In new code we use 'struct' before each struct type, and we put a whitespace before '*'. The same in other places. > +%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);} > + > +trim_operands(A) ::= from_clause(F) trim_source(Y). { > + A = sql_expr_list_append(pParse->db, F, Y); > +} > +trim_operands(A) ::= trim_source(Y). { > + A = sql_expr_list_append(pParse->db, NULL, Y); > +} > + > +%type trim_source {Expr*} > +%destructor trim_source {sql_expr_delete(pParse->db, $$, false);} > + > +trim_source(A) ::= expr(X). {A = X.pExpr;} > + > +%type from_clause {ExprList*} > +%destructor from_clause { sql_expr_list_delete(pParse->db, $$); } > + > +from_clause(A) ::= trim_specification(N) trim_set(Y) FROM. { > + struct Expr* p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1); > + A = sql_expr_list_append(pParse->db, NULL, p); > + if (Y != 0) { 16. Please, compare with NULL, not 0. > + A = sql_expr_list_append(pParse->db, A, Y); > + } > +} > + > +from_clause(A) ::= trim_set(Y) FROM. { > + A = sql_expr_list_append(pParse->db, NULL, Y); > +} > + > +%type trim_set {Expr*} > +%destructor trim_set {sql_expr_delete(pParse->db, $$, false);} > + > +trim_set(A) ::= . {A = 0;} 17. The same. Assign NULL, not 0. > +trim_set(A) ::= expr(X). {A = X.pExpr;} > + > +%type trim_specification {int} > + > +trim_specification(A) ::= LEADING. {A = 1;} > +trim_specification(A) ::= TRAILING. {A = 2;} > +trim_specification(A) ::= BOTH. {A = 3;} 18. Why is the grammar so complex? In the standard its definition takes 12 lines. In your grammar you've allowed this: TRIM(FROM str). But it is prohibited by the standard, and leads to an assertion: tarantool> box.sql.execute('SELECT TRIM(FROM "abc");') Assertion failed: (pExpr != 0), function sqlExprListFlags, file src/box/sql/expr.c, line 1964. Process 38832 stopped * thread #1, queue = 'com.apple.main-thread', stop reason = signal SIGABRT frame #0: 0x00007fff7aefb23e libsystem_kernel.dylib`__pthread_kill + 10 libsystem_kernel.dylib`__pthread_kill: -> 0x7fff7aefb23e <+10>: jae 0x7fff7aefb248 ; <+20> 0x7fff7aefb240 <+12>: movq %rax, %rdi 0x7fff7aefb243 <+15>: jmp 0x7fff7aef53b7 ; cerror_nocancel 0x7fff7aefb248 <+20>: retq 19. I've refactored the grammar a bit, but it can't be compiled. My diff is below. Probably it can help. ============================================================================ diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index 53e5fd932..42b754cd6 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -938,46 +938,34 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { } %endif SQL_OMIT_CAST -expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { - A.pExpr = sqlExprFunction(pParse, Y, &X); +expr(A) ::= TRIM(X) LP trim_from_clause(F) expr(Y) RP(E). { + struct Expr *argv = sql_expr_list_append(pParse->db, F, Y); + A.pExpr = sqlExprFunction(pParse, argv, &X); spanSet(&A, &X, &E); } -%type trim_operands {ExprList*} -%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);} - -trim_operands(A) ::= from_clause(F) trim_source(Y). { - A = sql_expr_list_append(pParse->db, F, Y); -} -trim_operands(A) ::= trim_source(Y). { - A = sql_expr_list_append(pParse->db, NULL, Y); -} +%type trim_from_clause {struct ExprList *} +%destructor trim_from_clause { sql_expr_list_delete(pParse->db, $$); } -%type trim_source {Expr*} -%destructor trim_source {sql_expr_delete(pParse->db, $$, false);} - -trim_source(A) ::= expr(X). {A = X.pExpr;} - -%type from_clause {ExprList*} -%destructor from_clause { sql_expr_list_delete(pParse->db, $$); } - -from_clause(A) ::= trim_specification(N) trim_set(Y) FROM. { - struct Expr* p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1); +trim_from_clause(A) ::= trim_specification(N) trim_character(Y) FROM. { + struct Expr *p = sqlExprAlloc(pParse->db, TK_INTEGER, &sqlIntTokens[N], 1); A = sql_expr_list_append(pParse->db, NULL, p); - if (Y != 0) { + if (Y != NULL) { A = sql_expr_list_append(pParse->db, A, Y); } } -from_clause(A) ::= trim_set(Y) FROM. { +trim_from_clause(A) ::= trim_character(Y) FROM. { A = sql_expr_list_append(pParse->db, NULL, Y); } -%type trim_set {Expr*} -%destructor trim_set {sql_expr_delete(pParse->db, $$, false);} +trim_from_clause(A) ::= . { A = NULL; } + +%type trim_character {struct Expr *} +%destructor trim_character {sql_expr_delete(pParse->db, $$, false);} -trim_set(A) ::= . {A = 0;} -trim_set(A) ::= expr(X). {A = X.pExpr;} +trim_character(A) ::= . { A = NULL; } +trim_character(A) ::= expr(X). { A = X.pExpr; } %type trim_specification {int} ============================================================================ > diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua > index 889fc5867..d9c96c5bd 100755 > --- a/test/sql-tap/func.test.lua > +++ b/test/sql-tap/func.test.lua > @@ -2215,13 +2215,44 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.34", > [[ > - SELECT RTRIM(X'00004100420000', X'00'); > + SELECT TRIM(TRAILING X'00' FROM X'00004100420000'); > ]], { > -- <func-22.34> > "\0\0A\0B" > -- </func-22.34> > }) > > +-- gh-3879 Check BOTH. 20. 3879 was not about 'BOTH' only. Please, describe the issue in more details, and test the whole grammar. As the assertion fail above shows, you didn't. > + > +test:do_execsql_test( > + "func-22.35", > + [[ > + SELECT TRIM(BOTH FROM ' hi '); > + ]], { > + -- <func-22.35> > + "hi" > + -- </func-22.35> > + }) > +test:do_execsql_test( > + "func-22.36", > + [[ > + SELECT TRIM(BOTH 'xyz' FROM ' hi '); > + ]], { > + -- <func-22.36> > + " hi " > + -- </func-22.36> > + }) > + > +test:do_execsql_test( > + "func-22.37", > + [[ > + SELECT TRIM(BOTH 'xyz' FROM 'xyxzy hi zzzy'); > + ]], { > + -- <func-22.37> > + " hi " > + -- </func-22.37> > + }) > + ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-14 18:01 ` [tarantool-patches] " Vladislav Shpilevoy @ 2019-04-16 0:14 ` Roman Khabibov 2019-04-16 17:14 ` Vladislav Shpilevoy 0 siblings, 1 reply; 14+ messages in thread From: Roman Khabibov @ 2019-04-16 0:14 UTC (permalink / raw) To: tarantool-patches; +Cc: Vladislav Shpilevoy Hi! Thanks for the review. > On Apr 14, 2019, at 9:01 PM, Vladislav Shpilevoy <v.shpilevoy@tarantool.org> wrote: > > Hi! Thanks for the patch! See 20 comments below. > > 1. Please, do all the issues on the master branch. > We cherry-pick on other branches from the master. > > 2. Use the newest version of the branch. Your is outdated > on more than 2 weeks. Rebased on 2.1. > On 11/04/2019 20:33, Roman Khabibov wrote: >> According to the ANSI standart, ltrim, rtrim and trim should > > 3. Use a spell checker. Sublime text editor has it too. > > standart -> standard > >> be merged into one unified TRIM() function. The specialization of >> trimming (left, right or both and trimming charcters) determined > > charcters -> characters sql: modify TRIM() function signature According to the ANSI standard, ltrim, rtrim and trim should be merged into one unified TRIM() function. The specialization of trimming (left, right or both and trimming characters) determined in arguments of this function. Closes #3879 >> in arguments of this function. >> >> Closes #3879 >> --- >> Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3879-trim >> Issue: https://github.com/tarantool/tarantool/issues/3879 >> >> extra/mkkeywordhash.c | 5 ++ >> src/box/sql/func.c | 46 ++++++++------ >> src/box/sql/global.c | 6 +- >> src/box/sql/parse.y | 48 +++++++++++++++ >> test/sql-tap/badutf1.test.lua | 14 ++--- >> test/sql-tap/func.test.lua | 111 ++++++++++++++++++++++------------ >> test/sql-tap/with1.test.lua | 2 +- >> 7 files changed, 165 insertions(+), 67 deletions(-) >> >> diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c >> index be7bd5545..94a768323 100644 >> --- a/extra/mkkeywordhash.c >> +++ b/extra/mkkeywordhash.c >> @@ -91,6 +91,7 @@ struct Keyword { >> # define CTE 0x00040000 >> #endif >> # define RESERVED 0x00000001 >> +# define FUNCTION 0x00080000 > > 4. These fields are stored in struct Keyword.mask field, which > is never used for anything more complex than 'mask == 0' check. > And I do not see a reason why do you need here anything but > 'ALWAYS' constant. Also, see the issue: > > https://github.com/tarantool/tarantool/issues/4155 diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c index be7bd5545..76e3265e7 100644 --- a/extra/mkkeywordhash.c +++ b/extra/mkkeywordhash.c @@ -278,6 +278,10 @@ static Keyword aKeywordTable[] = { { "WHILE", "TK_STANDARD", RESERVED, true }, { "TEXT", "TK_TEXT", RESERVED, true }, { "TRUNCATE", "TK_TRUNCATE", ALWAYS, true }, + { "TRIM", "TK_TRIM", ALWAYS, true }, + { "LEADING", "TK_LEADING", ALWAYS, true }, + { "TRAILING", "TK_TRAILING", ALWAYS, true }, + { "BOTH", "TK_BOTH", ALWAYS, true }, }; > 8. Please, create a enum with normal names for these constants. +enum trim_specification { + LEADING = 1, + TRAILING = 2, + BOTH = 3 +}; > 12. Pointers should be compared with NULL, not 0. + if (sql_value_type(argv[0]) == SQL_NULL) { + return; + } + if ((input_str = sql_value_text(argv[0])) == NULL) { + return; + } > >> return; >> } else { >> const unsigned char *z = zCharSet; >> - int trim_set_sz = sql_value_bytes(argv[1]); >> + int trim_set_sz = sql_value_bytes(argv[source_index - 1]); >> /* >> * Count the number of UTF-8 characters passing >> * through the entire char set, but not up >> @@ -1272,8 +1288,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) >> } >> } >> if (nChar > 0) { >> - flags = SQL_PTR_TO_INT(sql_user_data(context)); >> - if (flags & 1) { >> + if (trim_side & 1) { > > 13. When checking flags, use (flag & ...) != 0 instead of an > implicit conversion. In other places too. + if ((flags & 1) != 0) { + if ((flags & 2) != 0) { > >> while (nIn > 0) { >> int len = 0; >> for (i = 0; i < nChar; i++) { >> @@ -1288,7 +1303,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) >> nIn -= len; >> } >> } >> - if (flags & 2) { >> + if (trim_side & 2) { >> while (nIn > 0) { >> int len = 0; >> for (i = 0; i < nChar; i++) { >> @@ -1738,12 +1753,9 @@ sqlRegisterBuiltinFunctions(void) >> FIELD_TYPE_INTEGER), >> FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY, >> FIELD_TYPE_INTEGER), >> - FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc), >> - FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc), >> - FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc), >> - FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc), >> FUNCTION_COLL(trim, 1, 3, 0, trimFunc), >> FUNCTION_COLL(trim, 2, 3, 0, trimFunc), >> + FUNCTION_COLL(trim, 3, 3, 0, trimFunc), > > 14. Better write three trim functions taking different number of > args, converting them to normal types, and calling the single > trim function. Instead of making a pile of 'if's about argc inside > the current implementation. Done. But now I have dublicated pieces of code: + const unsigned char *input_str; + assert(argc == 1); + (void) argc; + + if (sql_value_type(argv[0]) == SQL_NULL) { + return; + } + if ((input_str = sql_value_text(argv[0])) == NULL) { + return; + } + + int input_str_sz = sql_value_bytes(argv[0]); + assert(input_str == sql_value_text(argv[0])); > >> FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR), >> FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR), >> AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize,> diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y >> index d2614d9b0..53e5fd932 100644 >> --- a/src/box/sql/parse.y >> +++ b/src/box/sql/parse.y >> @@ -937,6 +937,54 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { >> sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); >> } >> %endif SQL_OMIT_CAST >> + >> +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { >> + A.pExpr = sqlExprFunction(pParse, Y, &X); >> + spanSet(&A, &X, &E); >> +} >> + >> +%type trim_operands {ExprList*} > > 15. In new code we use 'struct' before each struct > type, and we put a whitespace before '*'. The same in > other places. Done. >> + A = sql_expr_list_append(pParse->db, A, Y); >> + } >> +} >> + >> +from_clause(A) ::= trim_set(Y) FROM. { >> + A = sql_expr_list_append(pParse->db, NULL, Y); >> +} >> + >> +%type trim_set {Expr*} >> +%destructor trim_set {sql_expr_delete(pParse->db, $$, false);} >> + >> +trim_set(A) ::= . {A = 0;} > > 17. The same. Assign NULL, not 0. +trim_character(A) ::= . { A = NULL; } > >> +trim_set(A) ::= expr(X). {A = X.pExpr;} >> + >> +%type trim_specification {int} >> + >> +trim_specification(A) ::= LEADING. {A = 1;} >> +trim_specification(A) ::= TRAILING. {A = 2;} >> +trim_specification(A) ::= BOTH. {A = 3;} > > 18. Why is the grammar so complex? In the standard its > definition takes 12 lines. Because I haven’t found another ways to implement grammar, that will be able to be compiled. > In your grammar you've allowed this: TRIM(FROM str). > But it is prohibited by the standard, and leads to an > assertion: Now I prohibit that. > 19. I've refactored the grammar a bit, but it can't be compiled. My > diff is below. Probably it can help. +test:do_catchsql_test( + "func-22.38", + [[ + SELECT TRIM(FROM 'xyxzy'); + ]], { + -- <func-22.38> + 1, "Syntax error near 'FROM'" + -- </func-22.38> + }) + > 20. 3879 was not about 'BOTH' only. Please, describe the > issue in more details, and test the whole grammar. As the > assertion fail above shows, you didn’t. +-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and FROM without +-- any agrs before. LEADING and TRAILING keywords is checked above. commit f8b3475d9c5f4f479c2ee1709c78a16e1f02aec9 Author: Roman Khabibov <roman.habibov@tarantool.org> Date: Thu Mar 28 14:01:33 2019 +0300 sql: modify TRIM() function signature According to the ANSI standard, ltrim, rtrim and trim should be merged into one unified TRIM() function. The specialization of trimming (left, right or both and trimming characters) determined in arguments of this function. Closes #3879 diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c index be7bd5545..76e3265e7 100644 --- a/extra/mkkeywordhash.c +++ b/extra/mkkeywordhash.c @@ -278,6 +278,10 @@ static Keyword aKeywordTable[] = { { "WHILE", "TK_STANDARD", RESERVED, true }, { "TEXT", "TK_TEXT", RESERVED, true }, { "TRUNCATE", "TK_TRUNCATE", ALWAYS, true }, + { "TRIM", "TK_TRIM", ALWAYS, true }, + { "LEADING", "TK_LEADING", ALWAYS, true }, + { "TRAILING", "TK_TRAILING", ALWAYS, true }, + { "BOTH", "TK_BOTH", ALWAYS, true }, }; /* Number of keywords */ diff --git a/src/box/sql/func.c b/src/box/sql/func.c index abeecefa1..bf7e7a652 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1286,108 +1286,223 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) sql_result_text(context, (char *)zOut, j, sql_free); } -/* - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. +enum trim_specification { + LEADING = 1, + TRAILING = 2, + BOTH = 3 +}; + +/** + * Remove chars included into @a collation from @a input_str. + * @param context SQL context. + * @param flags Trim specification: left, right or both. + * @param collation Character set. + * @param coll_sz Character set size in bytes. + * @param input_str Input string for trimming. + * @param input_str_sz Input string size in bytes. */ static void -trimFunc(sql_context * context, int argc, sql_value ** argv) +trim_procedure(sql_context * context, enum trim_specification flags, + const unsigned char *collation, int coll_sz, + const unsigned char *input_str, int input_str_sz) { - const unsigned char *zIn; /* Input string */ - const unsigned char *zCharSet; /* Set of characters to trim */ - int nIn; /* Number of bytes in input */ - int flags; /* 1: trimleft 2: trimright 3: trim */ - int i; /* Loop counter */ - unsigned char *aLen = 0; /* Length of each character in zCharSet */ - unsigned char **azChar = 0; /* Individual characters in zCharSet */ - int nChar; /* Number of characters in zCharSet */ + int i; + /* + * Length of each character in collation. + */ + unsigned char *aLen = 0; + /* + * Individual characters in collation. + */ + unsigned char **azChar = 0; + /* + * Number of characters in collation. + */ + int nChar; - if (sql_value_type(argv[0]) == SQL_NULL) { - return; - } - zIn = sql_value_text(argv[0]); - if (zIn == 0) - return; - nIn = sql_value_bytes(argv[0]); - assert(zIn == sql_value_text(argv[0])); - if (argc == 1) { - static const unsigned char lenOne[] = { 1 }; - static unsigned char *const azOne[] = { (u8 *) " " }; - nChar = 1; - aLen = (u8 *) lenOne; - azChar = (unsigned char **)azOne; - zCharSet = 0; - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { - return; - } else { - const unsigned char *z = zCharSet; - int trim_set_sz = sql_value_bytes(argv[1]); - /* - * Count the number of UTF-8 characters passing - * through the entire char set, but not up - * to the '\0' or X'00' character. This allows - * to handle trimming set containing such - * characters. - */ - nChar = sql_utf8_char_count(z, trim_set_sz); - if (nChar > 0) { - azChar = - contextMalloc(context, - ((i64) nChar) * (sizeof(char *) + 1)); - if (azChar == 0) { - return; - } - aLen = (unsigned char *)&azChar[nChar]; - z = zCharSet; - i = 0; - nChar = 0; - int handled_bytes_cnt = trim_set_sz; - while(handled_bytes_cnt > 0) { - azChar[nChar] = (unsigned char *)(z + i); - SQL_UTF8_FWD_1(z, i, trim_set_sz); - aLen[nChar] = (u8) (z + i - azChar[nChar]); - handled_bytes_cnt -= aLen[nChar]; - nChar++; - } + const unsigned char *z = collation; + /* + * Count the number of UTF-8 characters passing + * through the entire char set, but not up + * to the '\0' or X'00' character. This allows + * to handle trimming set containing such + * characters. + */ + nChar = sql_utf8_char_count(z, coll_sz); + if (nChar > 0) { + azChar = + contextMalloc(context, + ((i64) nChar) * (sizeof(char *) + 1)); + if (azChar == 0) { + return; + } + aLen = (unsigned char *)&azChar[nChar]; + z = collation; + i = 0; + nChar = 0; + int handled_bytes_cnt = coll_sz; + while(handled_bytes_cnt > 0) { + azChar[nChar] = (unsigned char *)(z + i); + SQL_UTF8_FWD_1(z, i, coll_sz); + aLen[nChar] = (u8) (z + i - azChar[nChar]); + handled_bytes_cnt -= aLen[nChar]; + nChar++; } } if (nChar > 0) { - flags = SQL_PTR_TO_INT(sql_user_data(context)); - if (flags & 1) { - while (nIn > 0) { + if ((flags & 1) != 0) { + while (input_str_sz > 0) { int len = 0; for (i = 0; i < nChar; i++) { len = aLen[i]; - if (len <= nIn - && memcmp(zIn, azChar[i], len) == 0) + if (len <= input_str_sz + && memcmp(input_str, + azChar[i], len) == 0) break; } if (i >= nChar) break; - zIn += len; - nIn -= len; + input_str += len; + input_str_sz -= len; } } - if (flags & 2) { - while (nIn > 0) { + if ((flags & 2) != 0) { + while (input_str_sz > 0) { int len = 0; for (i = 0; i < nChar; i++) { len = aLen[i]; - if (len <= nIn - && memcmp(&zIn[nIn - len], + if (len <= input_str_sz + && memcmp(&input_str[input_str_sz - len], azChar[i], len) == 0) break; } if (i >= nChar) break; - nIn -= len; + input_str_sz -= len; } } - if (zCharSet) { + if (collation) { sql_free(azChar); } } - sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT); + sql_result_text(context, (char *)input_str,input_str_sz, + SQL_TRANSIENT); +} + +/** + * Normalize args from @a argv input array when it has one arg only. + * + * Case: TRIM(<str>) + * Call trimming procedure with BOTH as the flags and " " as the collation. + * + * @param context SQL context. + * @param argc Number of args. + * @param argv Args array. + */ +static void +trim_func_one_arg(sql_context * context, int argc, sql_value **argv) +{ + const unsigned char *input_str; + assert(argc == 1); + (void) argc; + + if (sql_value_type(argv[0]) == SQL_NULL) { + return; + } + if ((input_str = sql_value_text(argv[0])) == NULL) { + return; + } + + int input_str_sz = sql_value_bytes(argv[0]); + assert(input_str == sql_value_text(argv[0])); + + trim_procedure(context, BOTH, (const unsigned char *) " ", + 1, input_str, input_str_sz); +} + +/** + * Normalize args from @a argv input array when it has two args. + * + * Case: TRIM(<trim_collation> FROM <str>) + * If user has specified <trim_collation> only, call trimming procedure with + * BOTH as the flags and that collation. + * + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) + * If user has specified side keyword only, call trimming procedure + * with the specified side and " " as the collation. + * + * @param context SQL context. + * @param argc Number of args. + * @param argv Args array. + */ +static void +trim_func_two_args(sql_context * context, int argc, sql_value **argv) +{ + const unsigned char *input_str; + assert(argc == 2); + (void) argc; + + if (sql_value_type(argv[1]) == SQL_NULL) { + return; + } + if ((input_str = sql_value_text(argv[1])) == NULL) { + return; + } + + int input_str_sz = sql_value_bytes(argv[1]); + assert(input_str == sql_value_text(argv[1])); + + const unsigned char *collation; + if (sql_value_type(argv[0]) == SQL_INTEGER) { + trim_procedure(context, sql_value_int(argv[0]), + (const unsigned char *) " ", 1, + input_str, input_str_sz); + } else if ((collation = sql_value_text(argv[0])) == NULL) { + return; + } else { + int coll_sz = sql_value_bytes(argv[0]); + trim_procedure(context, BOTH, collation, coll_sz, input_str, + input_str_sz); + } +} + +/** + * Normalize args from @a argv input array when it has three args. + * + * Case: TRIM(LEADING/TRAILING/BOTH <trim_collation> FROM <str>) + * User has specified side keyword and <trim_collation>, call trimming + * procedure with that args. + * + * @param context SQL context. + * @param argc Number of args. + * @param argv Args array. + */ +static void +trim_func_three_args(sql_context * context, int argc, sql_value **argv) +{ + const unsigned char *input_str; + assert(argc == 3); + (void) argc; + + if (sql_value_type(argv[2]) == SQL_NULL) { + return; + } + if ((input_str = sql_value_text(argv[2])) == NULL) { + return; + } + + int input_str_sz = sql_value_bytes(argv[2]); + assert(input_str == sql_value_text(argv[2])); + + const unsigned char *collation; + assert(sql_value_type(argv[0]) == SQL_INTEGER); + if ((collation = sql_value_text(argv[1])) != 0) { + int coll_sz = sql_value_bytes(argv[1]); + trim_procedure(context, sql_value_int(argv[0]), collation, + coll_sz, input_str, input_str_sz); + } else { + return; + } } #ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION @@ -1818,12 +1933,9 @@ sqlRegisterBuiltinFunctions(void) FIELD_TYPE_INTEGER), FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY, FIELD_TYPE_INTEGER), - FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc), - FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc), - FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc), - FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc), - FUNCTION_COLL(trim, 1, 3, 0, trimFunc), - FUNCTION_COLL(trim, 2, 3, 0, trimFunc), + FUNCTION_COLL(trim, 1, 3, 0, trim_func_one_arg), + FUNCTION_COLL(trim, 2, 3, 0, trim_func_two_args), + FUNCTION_COLL(trim, 3, 3, 0, trim_func_three_args), FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR), FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR), AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize, diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index 099daf512..985d33605 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -1032,6 +1032,51 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); } %endif SQL_OMIT_CAST + +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { + A.pExpr = sqlExprFunction(pParse, Y, &X); + spanSet(&A, &X, &E); + } + +%type trim_operands {struct ExprList *} +%destructor trim_operands { sql_expr_list_delete(pParse->db, $$); } + +trim_operands(A) ::= trim_from_clause(F) expr(Y). { + A = sql_expr_list_append(pParse->db, F, Y.pExpr); +} + +trim_operands(A) ::= expr(Y). { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} + +%type trim_from_clause {struct ExprList *} +%destructor trim_from_clause { sql_expr_list_delete(pParse->db, $$); } + +trim_from_clause(A) ::= expr(Y) FROM. { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} + +trim_from_clause(A) ::= trim_specification(N) trim_character(Y) FROM. { + struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, + &sqlIntTokens[N]); + A = sql_expr_list_append(pParse->db, NULL, p); + if (Y != NULL) { + A = sql_expr_list_append(pParse->db, A, Y); + } +} + +%type trim_character {struct Expr *} +%destructor trim_character {sql_expr_delete(pParse->db, $$, false);} + +trim_character(A) ::= . { A = NULL; } +trim_character(A) ::= expr(X). { A = X.pExpr; } + +%type trim_specification {int} + +trim_specification(A) ::= LEADING. {A = 1;} +trim_specification(A) ::= TRAILING. {A = 2;} +trim_specification(A) ::= BOTH. {A = 3;} + expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). { if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){ const char *err = diff --git a/src/box/sql/parse_def.c b/src/box/sql/parse_def.c index 49c76a326..aa1323cb2 100644 --- a/src/box/sql/parse_def.c +++ b/src/box/sql/parse_def.c @@ -34,7 +34,9 @@ const struct Token sqlIntTokens[] = { {"0", 1, false}, - {"1", 1, false} + {"1", 1, false}, + {"2", 1, false}, + {"3", 1, false}, }; void diff --git a/src/box/sql/parse_def.h b/src/box/sql/parse_def.h index a1af2bacd..5899a7e4e 100644 --- a/src/box/sql/parse_def.h +++ b/src/box/sql/parse_def.h @@ -87,7 +87,7 @@ struct Token { bool isReserved; }; -/** Constant tokens for values 0 and 1. */ +/** Constant tokens for integer values. */ extern const struct Token sqlIntTokens[]; /** Generate a Token object from a string. */ diff --git a/test/sql-tap/badutf1.test.lua b/test/sql-tap/badutf1.test.lua index d104efaa9..d32bafae0 100755 --- a/test/sql-tap/badutf1.test.lua +++ b/test/sql-tap/badutf1.test.lua @@ -302,7 +302,7 @@ test:do_test( test:do_test( "badutf-4.1", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim('\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.1> "X", "F0" @@ -312,7 +312,7 @@ test:do_test( test:do_test( "badutf-4.2", function() - return test:execsql2("SELECT hex(ltrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim(LEADING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.2> "X", "F0808080FF" @@ -322,7 +322,7 @@ test:do_test( test:do_test( "badutf-4.3", function() - return test:execsql2("SELECT hex(rtrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim(TRAILING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.3> "X", "808080F0" @@ -332,7 +332,7 @@ test:do_test( test:do_test( "badutf-4.4", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.4> "X", "808080F0808080FF" @@ -342,7 +342,7 @@ test:do_test( test:do_test( "badutf-4.5", function() - return test:execsql2("SELECT hex(trim('\xff\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.5> "X", "80F0808080FF" @@ -352,7 +352,7 @@ test:do_test( test:do_test( "badutf-4.6", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.6> "X", "F0808080FF" @@ -362,7 +362,7 @@ test:do_test( test:do_test( "badutf-4.7", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.7> "X", "FF80F0808080FF" diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua index 251cc3534..8fe04fab1 100755 --- a/test/sql-tap/func.test.lua +++ b/test/sql-tap/func.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(14586) +test:plan(14590) --!./tcltestrunner.lua -- 2001 September 15 @@ -1912,37 +1912,37 @@ test:do_test( test:do_catchsql_test( "func-22.1", [[ - SELECT trim(1,2,3) + SELECT TRIM(1,2,3) ]], { -- <func-22.1> - 1, "wrong number of arguments to function TRIM()" + 1, "Syntax error near ','" -- </func-22.1> }) test:do_catchsql_test( "func-22.2", [[ - SELECT ltrim(1,2,3) + SELECT LTRIM(1,2,3) ]], { -- <func-22.2> - 1, "wrong number of arguments to function LTRIM()" + 1, "Function 'LTRIM' does not exist" -- </func-22.2> }) test:do_catchsql_test( "func-22.3", [[ - SELECT rtrim(1,2,3) + SELECT RTRIM(1,2,3) ]], { -- <func-22.3> - 1, "wrong number of arguments to function RTRIM()" + 1, "Function 'RTRIM' does not exist" -- </func-22.3> }) test:do_execsql_test( "func-22.4", [[ - SELECT trim(' hi '); + SELECT TRIM(' hi '); ]], { -- <func-22.4> "hi" @@ -1952,7 +1952,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.5", [[ - SELECT ltrim(' hi '); + SELECT TRIM(LEADING FROM ' hi '); ]], { -- <func-22.5> "hi " @@ -1962,7 +1962,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.6", [[ - SELECT rtrim(' hi '); + SELECT TRIM(TRAILING FROM ' hi '); ]], { -- <func-22.6> " hi" @@ -1972,7 +1972,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.7", [[ - SELECT trim(' hi ','xyz'); + SELECT TRIM('xyz' FROM ' hi '); ]], { -- <func-22.7> " hi " @@ -1982,7 +1982,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.8", [[ - SELECT ltrim(' hi ','xyz'); + SELECT TRIM(LEADING 'xyz' FROM ' hi '); ]], { -- <func-22.8> " hi " @@ -1992,7 +1992,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.9", [[ - SELECT rtrim(' hi ','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM ' hi '); ]], { -- <func-22.9> " hi " @@ -2002,7 +2002,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.10", [[ - SELECT trim('xyxzy hi zzzy','xyz'); + SELECT TRIM('xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.10> " hi " @@ -2012,7 +2012,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.11", [[ - SELECT ltrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(LEADING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.11> " hi zzzy" @@ -2022,7 +2022,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.12", [[ - SELECT rtrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.12> "xyxzy hi " @@ -2032,7 +2032,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.13", [[ - SELECT trim(' hi ',''); + SELECT TRIM('' FROM ' hi '); ]], { -- <func-22.13> " hi " @@ -2043,7 +2043,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.14", [[ - SELECT hex(trim(x'c280e1bfbff48fbfbf6869',x'6162e1bfbfc280')) + SELECT hex(TRIM(x'6162e1bfbfc280' FROM x'c280e1bfbff48fbfbf6869')) ]], { -- <func-22.14> "F48FBFBF6869" @@ -2052,8 +2052,8 @@ test:do_execsql_test( test:do_execsql_test( "func-22.15", - [[SELECT hex(trim(x'6869c280e1bfbff48fbfbf61', - x'6162e1bfbfc280f48fbfbf'))]], { + [[SELECT hex(TRIM(x'6162e1bfbfc280f48fbfbf' + FROM x'6869c280e1bfbff48fbfbf61'))]], { -- <func-22.15> "6869" -- </func-22.15> @@ -2062,7 +2062,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.16", [[ - SELECT hex(trim(x'ceb1ceb2ceb3',x'ceb1')); + SELECT hex(TRIM(x'ceb1' FROM x'ceb1ceb2ceb3')); ]], { -- <func-22.16> "CEB2CEB3" @@ -2073,7 +2073,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.20", [[ - SELECT typeof(trim(NULL)); + SELECT typeof(TRIM(NULL)); ]], { -- <func-22.20> "null" @@ -2083,7 +2083,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.21", [[ - SELECT typeof(trim(NULL,'xyz')); + SELECT typeof(TRIM('xyz' FROM NULL)); ]], { -- <func-22.21> "null" @@ -2093,7 +2093,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.22", [[ - SELECT typeof(trim('hello',NULL)); + SELECT typeof(TRIM(NULL FROM 'hello')); ]], { -- <func-22.22> "null" @@ -2105,7 +2105,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.23", [[ - SELECT TRIM(X'004100', X'00'); + SELECT TRIM(X'00' FROM X'004100'); ]], { -- <func-22.23> "A" @@ -2115,7 +2115,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.24", [[ - SELECT TRIM(X'004100', X'0000'); + SELECT TRIM(X'0000' FROM X'004100'); ]], { -- <func-22.24> "A" @@ -2125,7 +2125,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.25", [[ - SELECT TRIM(X'004100', X'0042'); + SELECT TRIM(X'0042' FROM X'004100'); ]], { -- <func-22.25> "A" @@ -2135,7 +2135,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.26", [[ - SELECT TRIM(X'00004100420000', X'00'); + SELECT TRIM(X'00' FROM X'00004100420000'); ]], { -- <func-22.26> "A\0B" @@ -2145,7 +2145,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.27", [[ - SELECT LTRIM(X'004100', X'00'); + SELECT TRIM(LEADING X'00' FROM X'004100'); ]], { -- <func-22.27> "A\0" @@ -2155,7 +2155,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.28", [[ - SELECT LTRIM(X'004100', X'0000'); + SELECT TRIM(LEADING X'0000' FROM X'004100'); ]], { -- <func-22.28> "A\0" @@ -2165,7 +2165,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.29", [[ - SELECT LTRIM(X'004100', X'0042'); + SELECT TRIM(LEADING X'0042' FROM X'004100'); ]], { -- <func-22.29> "A\0" @@ -2175,7 +2175,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.30", [[ - SELECT LTRIM(X'00004100420000', X'00'); + SELECT TRIM(LEADING X'00' FROM X'00004100420000'); ]], { -- <func-22.30> "A\0B\0\0" @@ -2185,7 +2185,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.31", [[ - SELECT RTRIM(X'004100', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'004100'); ]], { -- <func-22.31> "\0A" @@ -2195,7 +2195,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.32", [[ - SELECT RTRIM(X'004100', X'0000'); + SELECT TRIM(TRAILING X'0000' FROM X'004100'); ]], { -- <func-22.32> "\0A" @@ -2205,7 +2205,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.33", [[ - SELECT RTRIM(X'004100', X'0042'); + SELECT TRIM(TRAILING X'0042' FROM X'004100'); ]], { -- <func-22.33> "\0A" @@ -2215,13 +2215,55 @@ test:do_execsql_test( test:do_execsql_test( "func-22.34", [[ - SELECT RTRIM(X'00004100420000', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'00004100420000'); ]], { -- <func-22.34> "\0\0A\0B" -- </func-22.34> }) +-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and FROM without +-- any agrs before. LEADING and TRAILING keywords is checked above. + +test:do_execsql_test( + "func-22.35", + [[ + SELECT TRIM(BOTH FROM ' hi '); + ]], { + -- <func-22.35> + "hi" + -- </func-22.35> + }) +test:do_execsql_test( + "func-22.36", + [[ + SELECT TRIM(BOTH 'xyz' FROM ' hi '); + ]], { + -- <func-22.36> + " hi " + -- </func-22.36> + }) + +test:do_execsql_test( + "func-22.37", + [[ + SELECT TRIM(BOTH 'xyz' FROM 'xyxzy hi zzzy'); + ]], { + -- <func-22.37> + " hi " + -- </func-22.37> + }) + +test:do_catchsql_test( + "func-22.38", + [[ + SELECT TRIM(FROM 'xyxzy'); + ]], { + -- <func-22.38> + 1, "Syntax error near 'FROM'" + -- </func-22.38> + }) + -- This is to test the deprecated sql_aggregate_count() API. -- --test:do_test( @@ -2838,16 +2880,16 @@ test:do_execsql_test( "SELECT TRIM(CHAR(32,00,32,00,32));", {string.char(00,32,00)}) --- LTRIM +-- LEFT TRIM test:do_execsql_test( "func-70", - "SELECT LTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(LEADING FROM CHAR(32,00,32,00,32));", {string.char(00,32,00,32)}) --- RTRIM +-- RIGHT TRIM test:do_execsql_test( "func-71", - "SELECT RTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(TRAILING FROM CHAR(32,00,32,00,32));", {string.char(32,00,32,00)}) -- GROUP_CONCAT diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua index 495aa4ee4..19953e434 100755 --- a/test/sql-tap/with1.test.lua +++ b/test/sql-tap/with1.test.lua @@ -550,7 +550,7 @@ test:do_execsql_test("8.1-mandelbrot", [[ SELECT group_concat( substr(' .+*#', 1+min(iter/7,4), 1), '') FROM m2 GROUP BY cy ) - SELECT group_concat(rtrim(t),x'0a') FROM a; + SELECT group_concat(trim(TRAILING FROM t),x'0a') FROM a; ]], { -- <8.1-mandelbrot> [[ ....# ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-16 0:14 ` Roman Khabibov @ 2019-04-16 17:14 ` Vladislav Shpilevoy 2019-04-18 17:11 ` Roman Khabibov 0 siblings, 1 reply; 14+ messages in thread From: Vladislav Shpilevoy @ 2019-04-16 17:14 UTC (permalink / raw) To: Roman Khabibov, tarantool-patches Hi! Thanks for the fixes! Much better now, seriously, but see 21 comments below. >> 8. Please, create a enum with normal names for these constants. > +enum trim_specification { > + LEADING = 1, > + TRAILING = 2, > + BOTH = 3 1. These values are used as a bitmask in the TRIM function implementation. I expected that you would account it. BOTH should be a bit combination of LEADING and TRAILING. Also, in such a case it should be 'trim_side_mask' enum, not just 'trim_specification' - what does it specify. In addition, we have a strict policy of naming enum values, because they are visible in the whole namespace. We do not have C++ namespaces. C-way of namespacing is prefixing all functions and constants with a certain name. It means, that the values should be prefixed with uppercased enum name (or its part, when it is too long). Here I would use just 'TRIM_' prefix. Finally, add a comment to that enum. 2. Ok, you added enum, but you do not use it at all anywhere. What is a point of such enum? You still use constants in both parse.y and trim_procedure. Please, do a self-review. In is easy to find such places by yourself just diligently scanning the diff couple of times before a send. >> >>> return; >>> } else { >>> const unsigned char *z = zCharSet; >>> - int trim_set_sz = sql_value_bytes(argv[1]); >>> + int trim_set_sz = sql_value_bytes(argv[source_index - 1]); >>> /* >>> * Count the number of UTF-8 characters passing >>> * through the entire char set, but not up >>> @@ -1272,8 +1288,7 @@ trimFunc(sql_context * context, int argc, sql_value ** argv) >>> } >>> } >>> if (nChar > 0) { >>> - flags = SQL_PTR_TO_INT(sql_user_data(context)); >>> - if (flags & 1) { >>> + if (trim_side & 1) { >> >> 13. When checking flags, use (flag & ...) != 0 instead of an >> implicit conversion. In other places too. > + if ((flags & 1) != 0) { > + if ((flags & 2) != 0) { 3. Use enum bitmask values instead of 1 and 2. (flags & TRIM_TRAILING) != 0 (flags & TRIM_LEADING) != 0 >> >> 14. Better write three trim functions taking different number of >> args, converting them to normal types, and calling the single >> trim function. Instead of making a pile of 'if's about argc inside >> the current implementation. > Done. But now I have dublicated pieces of code: 4. Then do not duplicate and extract it into another function. It is one of your tasks as a programmer to reduce code duplication. You should not be a silent text-editor into which I insert my own code and ideas via the mailing list. Probably after fixing my next comments the code duplication will be minor or will even disappear. > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > index abeecefa1..bf7e7a652 100644 > --- a/src/box/sql/func.c > +++ b/src/box/sql/func.c > @@ -1286,108 +1286,223 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) > sql_result_text(context, (char *)zOut, j, sql_free); > } > > -/* > - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. > - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. > +enum trim_specification { > + LEADING = 1, > + TRAILING = 2, > + BOTH = 3 > +}; > + > +/** > + * Remove chars included into @a collation from @a input_str. > + * @param context SQL context. > + * @param flags Trim specification: left, right or both. > + * @param collation Character set. > + * @param coll_sz Character set size in bytes. > + * @param input_str Input string for trimming. > + * @param input_str_sz Input string size in bytes. > */ > static void > -trimFunc(sql_context * context, int argc, sql_value ** argv) > +trim_procedure(sql_context * context, enum trim_specification flags, > + const unsigned char *collation, int coll_sz, > + const unsigned char *input_str, int input_str_sz) 5. Broken alignment. 6. Why do you really need 'unsigned char'? I do not see any arithmetical operations here. Only assignments. > { > - const unsigned char *zIn; /* Input string */ > - const unsigned char *zCharSet; /* Set of characters to trim */ > - int nIn; /* Number of bytes in input */ > - int flags; /* 1: trimleft 2: trimright 3: trim */ > - int i; /* Loop counter */ > - unsigned char *aLen = 0; /* Length of each character in zCharSet */ > - unsigned char **azChar = 0; /* Individual characters in zCharSet */ > - int nChar; /* Number of characters in zCharSet */ > + int i; > + /* 7. Trailing whitespaces here and below. As I know, git highlights them with red color, which means, that you haven't reviewed that patch before sending. Please, do it next time. Also, you can avoid automatic trailing whitespaces if install one of the comment packages for Sublime. > + * Length of each character in collation. 8. Ok, now I see what did you mean as 'character set' in the previous version. Sorry, in such a case it is not collation of course, and it is strange, that you blindly renamed it without any opposition. It is ok to argue with me. > + */ > + unsigned char *aLen = 0; 9. Please, do not use camel code style for new code. We never use it in Tarantool. Use normal names. > + /* > + * Individual characters in collation. > + */ > + unsigned char **azChar = 0; > + /* > + * Number of characters in collation. > + */ > + int nChar; > > + const unsigned char *z = collation; > + /* > + * Count the number of UTF-8 characters passing > + * through the entire char set, but not up > + * to the '\0' or X'00' character. This allows > + * to handle trimming set containing such > + * characters. 10. The comment's indentation is reduced and the text can be realligned with less number of lines. > + */ > + nChar = sql_utf8_char_count(z, coll_sz); 11. It is not C89. You do not need to declare all the variables at the beginning of function before their usage. > +/** > + * Normalize args from @a argv input array when it has one arg only. 12. Out of 66. In some other places below too. Sublime has facilities to show 66 and 80 borders, google by the phrase 'sublime rulers'. Please, use them. > + * > + * Case: TRIM(<str>) > + * Call trimming procedure with BOTH as the flags and " " as the collation. > + * > + * @param context SQL context. > + * @param argc Number of args. > + * @param argv Args array. 13. Comments on such simple args are useless and on the other hand there is nothing more to say. We often omit @param/@retval section in such a case, and I think it is applicable here. I mean, that everything above first @param is ok, but below is not necessary. You can keep it if you want, up to you. > + */ > +static void > +trim_func_one_arg(sql_context * context, int argc, sql_value **argv) > +{ > + const unsigned char *input_str; > + assert(argc == 1); > + (void) argc; > + > + if (sql_value_type(argv[0]) == SQL_NULL) { > + return; > + } 14. We do not use curly braces when 'if' or 'for' body consists of one line. What is more, you do not need this check at all, because sql_value_text returns NULL, when value is NULL as well. The same in other helper functions. > + if ((input_str = sql_value_text(argv[0])) == NULL) { > + return; > + }> + > + int input_str_sz = sql_value_bytes(argv[0]); > + assert(input_str == sql_value_text(argv[0])); 15. What is a point of that assertion? You assigned input_str to this value literally 5 lines above. > + > + trim_procedure(context, BOTH, (const unsigned char *) " ", > + 1, input_str, input_str_sz); > +} > + > +/** > + * Normalize args from @a argv input array when it has two args. > + * > + * Case: TRIM(<trim_collation> FROM <str>) > + * If user has specified <trim_collation> only, call trimming procedure with > + * BOTH as the flags and that collation. > + * > + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) > + * If user has specified side keyword only, call trimming procedure > + * with the specified side and " " as the collation. > + * > + * @param context SQL context. > + * @param argc Number of args. > + * @param argv Args array. > + */ > +static void > +trim_func_two_args(sql_context * context, int argc, sql_value **argv) > +{ > + const unsigned char *input_str; > + assert(argc == 2); > + (void) argc; > + > + if (sql_value_type(argv[1]) == SQL_NULL) { > + return; > + } > + if ((input_str = sql_value_text(argv[1])) == NULL) { > + return; > + } > + > + int input_str_sz = sql_value_bytes(argv[1]); > + assert(input_str == sql_value_text(argv[1])); > + > + const unsigned char *collation; > + if (sql_value_type(argv[0]) == SQL_INTEGER) { > + trim_procedure(context, sql_value_int(argv[0]), > + (const unsigned char *) " ", 1, > + input_str, input_str_sz); > + } else if ((collation = sql_value_text(argv[0])) == NULL) { > + return; > + } else { > + int coll_sz = sql_value_bytes(argv[0]); > + trim_procedure(context, BOTH, collation, coll_sz, input_str, > + input_str_sz); > + } > +} > + > +/** > + * Normalize args from @a argv input array when it has three args. > + * > + * Case: TRIM(LEADING/TRAILING/BOTH <trim_collation> FROM <str>) > + * User has specified side keyword and <trim_collation>, call trimming > + * procedure with that args. > + * > + * @param context SQL context. > + * @param argc Number of args. > + * @param argv Args array. > + */ > +static void > +trim_func_three_args(sql_context * context, int argc, sql_value **argv) > +{ > + const unsigned char *input_str; > + assert(argc == 3); > + (void) argc; > + > + if (sql_value_type(argv[2]) == SQL_NULL) { > + return; > + } > + if ((input_str = sql_value_text(argv[2])) == NULL) { > + return; > + } > + > + int input_str_sz = sql_value_bytes(argv[2]); > + assert(input_str == sql_value_text(argv[2])); > + > + const unsigned char *collation; > + assert(sql_value_type(argv[0]) == SQL_INTEGER); > + if ((collation = sql_value_text(argv[1])) != 0) { 16. As I said in the previous review, and in reviews to other patches - use NULL to check if a pointer is NULL. When a code hunk is tall, and someone sees code like variable = func() if (variable != 0) .... they could think that the variable is integer. It is confusing (variable can be declared somewhere above and the one does not see its type). > + int coll_sz = sql_value_bytes(argv[1]); > + trim_procedure(context, sql_value_int(argv[0]), collation, > + coll_sz, input_str, input_str_sz); > + } else { > + return; 17. What is a point of this last return? Even without 'else' the compiler inserts implicit 'ret' instruction at the end of 'void' function. > + } > } > > #ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION > diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y > index 099daf512..985d33605 100644 > --- a/src/box/sql/parse.y > +++ b/src/box/sql/parse.y > @@ -1032,6 +1032,51 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { > sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); > } > %endif SQL_OMIT_CAST > + > +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { > + A.pExpr = sqlExprFunction(pParse, Y, &X); > + spanSet(&A, &X, &E); > + } > + > +%type trim_operands {struct ExprList *} > +%destructor trim_operands { sql_expr_list_delete(pParse->db, $$); } > + > +trim_operands(A) ::= trim_from_clause(F) expr(Y). { > + A = sql_expr_list_append(pParse->db, F, Y.pExpr); > +} > + > +trim_operands(A) ::= expr(Y). { > + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); > +} > + > +%type trim_from_clause {struct ExprList *} > +%destructor trim_from_clause { sql_expr_list_delete(pParse->db, $$); } > + > +trim_from_clause(A) ::= expr(Y) FROM. { > + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); > +} > + > +trim_from_clause(A) ::= trim_specification(N) trim_character(Y) FROM. { 18. I understand, why you did not use trim_character rule above, but someone looking at this code first time and not seen our discussion will not understand. I would add a comment about it. > + struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, > + &sqlIntTokens[N]); > + A = sql_expr_list_append(pParse->db, NULL, p); > + if (Y != NULL) { > + A = sql_expr_list_append(pParse->db, A, Y); > + } > +} > + > +%type trim_character {struct Expr *} > +%destructor trim_character {sql_expr_delete(pParse->db, $$, false);} > + > +trim_character(A) ::= . { A = NULL; } > +trim_character(A) ::= expr(X). { A = X.pExpr; } 19. Exactly the same rule already exists: case_operand. I think, it is worth merging them into one rule like expr_optional(A) ::= . { A = NULL; } expr_optional(A) ::= expr(X). { A = X.pExpr; } And using in both places. > + > +%type trim_specification {int} > + > +trim_specification(A) ::= LEADING. {A = 1;} > +trim_specification(A) ::= TRAILING. {A = 2;} > +trim_specification(A) ::= BOTH. {A = 3;} > + > diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua > index 251cc3534..8fe04fab1 100755 > --- a/test/sql-tap/func.test.lua > +++ b/test/sql-tap/func.test.lua > @@ -1,6 +1,6 @@ > #!/usr/bin/env tarantool > test = require("sqltester") > -test:plan(14586) > +test:plan(14590) > > --!./tcltestrunner.lua > -- 2001 September 15 > @@ -1912,37 +1912,37 @@ test:do_test( > test:do_catchsql_test( > "func-22.1", > [[ > - SELECT trim(1,2,3) > + SELECT TRIM(1,2,3) 20. Why? I thought that all identifiers are normalized anyway, including function names, and you do not need to uppercase everything manually. The same about the test func-22.4, func-22.20. > ]], { > -- <func-22.1> > - 1, "wrong number of arguments to function TRIM()" > + 1, "Syntax error near ','" > -- </func-22.1> > }) > @@ -2215,13 +2215,55 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.34", > [[ > - SELECT RTRIM(X'00004100420000', X'00'); > + SELECT TRIM(TRAILING X'00' FROM X'00004100420000'); > ]], { > -- <func-22.34> > "\0\0A\0B" > -- </func-22.34> > }) > > +-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and FROM without > +-- any agrs before. LEADING and TRAILING keywords is checked above. 21. Out of 66. ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-16 17:14 ` Vladislav Shpilevoy @ 2019-04-18 17:11 ` Roman Khabibov 2019-04-19 12:49 ` Vladislav Shpilevoy 0 siblings, 1 reply; 14+ messages in thread From: Roman Khabibov @ 2019-04-18 17:11 UTC (permalink / raw) To: tarantool-patches; +Cc: Vladislav Shpilevoy Hi! Thanks for the review. >>> 8. Please, create a enum with normal names for these constants. >> +enum trim_specification { >> + LEADING = 1, >> + TRAILING = 2, >> + BOTH = 3 > > 1. These values are used as a bitmask in the TRIM function > implementation. I expected that you would account it. BOTH > should be a bit combination of LEADING and TRAILING. > > Also, in such a case it should be 'trim_side_mask' enum, > not just 'trim_specification' - what does it specify. > > In addition, we have a strict policy of naming enum values, > because they are visible in the whole namespace. We do not > have C++ namespaces. C-way of namespacing is prefixing all > functions and constants with a certain name. It means, that > the values should be prefixed with uppercased enum name > (or its part, when it is too long). Here I would use just > 'TRIM_' prefix. > > Finally, add a comment to that enum. I accounted that, but I didn’t think that it should be implemented that way. diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index b322602dc..d5a3e15c1 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -1680,6 +1680,17 @@ struct FuncDestructor { * single query - might change over time */ +/* + * Trim side mask components. TRIM_LEADING means to trim left side + * only. TRIM_TRAILING is to trim right side only. TRIM_BOTH is to + * trim both sides. + */ +enum trim_side_mask { + TRIM_LEADING = 1, + TRIM_TRAILING = 2, + TRIM_BOTH = TRIM_LEADING | TRIM_TRAILING +}; > 2. Ok, you added enum, but you do not use it at all > anywhere. What is a point of such enum? You still use > constants in both parse.y and trim_procedure. > > Please, do a self-review. In is easy to find such > places by yourself just diligently scanning the diff > couple of times before a send. +trim_specification(A) ::= LEADING. {A = TRIM_LEADING;} +trim_specification(A) ::= TRAILING. {A = TRIM_TRAILING;} +trim_specification(A) ::= BOTH. {A = TRIM_BOTH;} - flags = SQL_PTR_TO_INT(sql_user_data(context)); - if (flags & 1) { - while (nIn > 0) { + if (char_cnt > 0) { + if ((flags & TRIM_LEADING) != 0) { - if (flags & 2) { - while (nIn > 0) { + if ((flags & TRIM_TRAILING) != 0) { >>> 14. Better write three trim functions taking different number of >>> args, converting them to normal types, and calling the single >>> trim function. Instead of making a pile of 'if's about argc inside >>> the current implementation. >> Done. But now I have dublicated pieces of code: > > 4. Then do not duplicate and extract it into another function. It is > one of your tasks as a programmer to reduce code duplication. You should > not be a silent text-editor into which I insert my own code and ideas > via the mailing list. > > Probably after fixing my next comments the code duplication will be > minor or will even disappear. Yes. Now six lines is duplicated only. I don’t think that it requires a seperate function. + const unsigned char *input_str; + assert(argc == 1); + (void) argc; + + if ((input_str = sql_value_text(argv[0])) == NULL) + return; + int input_str_sz = sql_value_bytes(argv[0]); > >> diff --git a/src/box/sql/func.c b/src/box/sql/func.c >> index abeecefa1..bf7e7a652 100644 >> --- a/src/box/sql/func.c >> +++ b/src/box/sql/func.c >> @@ -1286,108 +1286,223 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) >> sql_result_text(context, (char *)zOut, j, sql_free); >> } >> >> -/* >> - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. >> - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. >> +enum trim_specification { >> + LEADING = 1, >> + TRAILING = 2, >> + BOTH = 3 >> +}; >> + >> +/** >> + * Remove chars included into @a collation from @a input_str. >> + * @param context SQL context. >> + * @param flags Trim specification: left, right or both. >> + * @param collation Character set. >> + * @param coll_sz Character set size in bytes. >> + * @param input_str Input string for trimming. >> + * @param input_str_sz Input string size in bytes. >> */ >> static void >> -trimFunc(sql_context * context, int argc, sql_value ** argv) >> +trim_procedure(sql_context * context, enum trim_specification flags, >> + const unsigned char *collation, int coll_sz, >> + const unsigned char *input_str, int input_str_sz) > > 5. Broken alignment. Fixed. +trim_procedure(sql_context * context, enum trim_side_mask flags, + const unsigned char *trim_set, int trim_set_sz, + const unsigned char *input_str, int input_str_sz) > 6. Why do you really need 'unsigned char'? I do not see any > arithmetical operations here. Only assignments. > >> { >> - const unsigned char *zIn; /* Input string */ >> - const unsigned char *zCharSet; /* Set of characters to trim */ >> - int nIn; /* Number of bytes in input */ >> - int flags; /* 1: trimleft 2: trimright 3: trim */ >> - int i; /* Loop counter */ >> - unsigned char *aLen = 0; /* Length of each character in zCharSet */ >> - unsigned char **azChar = 0; /* Individual characters in zCharSet */ >> - int nChar; /* Number of characters in zCharSet */ >> + int i; >> + /* I need it, because of "sql_utf8_char_count(const unsigned char *str, int byte_len);” or "const unsigned char *sql_value_text(sql_value *);" > 7. Trailing whitespaces here and below. As I know, git highlights them > with red color, which means, that you haven't reviewed that patch > before sending. Please, do it next time. Also, you can avoid automatic > trailing whitespaces if install one of the comment packages for Sublime. > >> + * Length of each character in collation. Removed. > 8. Ok, now I see what did you mean as 'character set' in the > previous version. Sorry, in such a case it is not collation of > course, and it is strange, that you blindly renamed it without any > opposition. It is ok to argue with me. Now I name it “trim_set”. +trim_procedure(sql_context * context, enum trim_side_mask flags, + const unsigned char *trim_set, int trim_set_sz, + const unsigned char *input_str, int input_str_sz) >> + */ >> + unsigned char *aLen = 0; > > 9. Please, do not use camel code style for new code. We > never use it in Tarantool. Use normal names. + /* + * Length of each character in the character set. + */ + char unsigned *char_len = 0; + /* + * Individual characters in the character set. + */ + char unsigned **ind_chars = 0; + /* + * Number of characters in the character set. + */ + int char_cnt; >> + /* >> + * Individual characters in collation. >> + */ >> + unsigned char **azChar = 0; >> + /* >> + * Number of characters in collation. >> + */ >> + int nChar; >> >> + const unsigned char *z = collation; >> + /* >> + * Count the number of UTF-8 characters passing >> + * through the entire char set, but not up >> + * to the '\0' or X'00' character. This allows >> + * to handle trimming set containing such >> + * characters. > > 10. The comment's indentation is reduced and the text can be > realligned with less number of lines. + int i; + /* Length of each character in the character set. */ + char unsigned *char_len = 0; + /* Individual characters in the character set. */ + char unsigned **ind_chars = 0; + /* Number of characters in the character set. */ + int char_cnt; + + const unsigned char *z = trim_set; + /* + * Count the number of UTF-8 characters passing through + * the entire char set, but not up to the '\0' or X'00' + * character. This allows to handle trimming set + * containing such characters. + */ >> + */ >> + nChar = sql_utf8_char_count(z, coll_sz); > > 11. It is not C89. You do not need to declare all the variables > at the beginning of function before their usage. + int char_cnt = sql_utf8_char_count(z, trim_set_sz); >> + * >> + * Case: TRIM(<str>) >> + * Call trimming procedure with BOTH as the flags and " " as the collation. >> + * >> + * @param context SQL context. >> + * @param argc Number of args. >> + * @param argv Args array. > > 13. Comments on such simple args are useless and on the other hand there > is nothing more to say. We often omit @param/@retval section in such a > case, and I think it is applicable here. > > I mean, that everything above first @param is ok, but below is not > necessary. You can keep it if you want, up to you. +/** + * Normalize args from @a argv input array when it has one arg + * only. + * + * Case: TRIM(<str>) + * Call trimming procedure with TRIM_BOTH as the flags and " " as + * the trimming set. + * + * @param context SQL context. + */ +static void +trim_func_one_arg(sql_context * context, int argc, sql_value **argv) >> + */ >> +static void >> +trim_func_one_arg(sql_context * context, int argc, sql_value **argv) >> +{ >> + const unsigned char *input_str; >> + assert(argc == 1); >> + (void) argc; >> + >> + if (sql_value_type(argv[0]) == SQL_NULL) { >> + return; >> + } > > 14. We do not use curly braces when 'if' or 'for' body > consists of one line. What is more, you do not need > this check at all, because sql_value_text returns NULL, > when value is NULL as well. The same in other helper > functions. - if (sql_value_type(argv[0]) == SQL_NULL) { - return; >> + if ((input_str = sql_value_text(argv[0])) == NULL) { >> + return; >> + }> + >> + int input_str_sz = sql_value_bytes(argv[0]); >> + assert(input_str == sql_value_text(argv[0])); > > 15. What is a point of that assertion? You assigned input_str > to this value literally 5 lines above. - assert(zIn == sql_value_text(argv[0])); >> + >> + trim_procedure(context, BOTH, (const unsigned char *) " ", >> + 1, input_str, input_str_sz); >> +} >> + >> +/** >> + * Normalize args from @a argv input array when it has two args. >> + * >> + * Case: TRIM(<trim_collation> FROM <str>) >> + * If user has specified <trim_collation> only, call trimming procedure with >> + * BOTH as the flags and that collation. >> + * >> + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) >> + * If user has specified side keyword only, call trimming procedure >> + * with the specified side and " " as the collation. >> + * >> + * @param context SQL context. >> + * @param argc Number of args. >> + * @param argv Args array. >> + */ >> +static void >> +trim_func_two_args(sql_context * context, int argc, sql_value **argv) >> +{ >> + const unsigned char *input_str; >> + assert(argc == 2); >> + (void) argc; >> + >> + if (sql_value_type(argv[1]) == SQL_NULL) { >> + return; >> + } >> + if ((input_str = sql_value_text(argv[1])) == NULL) { >> + return; >> + } >> + >> + int input_str_sz = sql_value_bytes(argv[1]); >> + assert(input_str == sql_value_text(argv[1])); >> + >> + const unsigned char *collation; >> + if (sql_value_type(argv[0]) == SQL_INTEGER) { >> + trim_procedure(context, sql_value_int(argv[0]), >> + (const unsigned char *) " ", 1, >> + input_str, input_str_sz); >> + } else if ((collation = sql_value_text(argv[0])) == NULL) { >> + return; >> + } else { >> + int coll_sz = sql_value_bytes(argv[0]); >> + trim_procedure(context, BOTH, collation, coll_sz, input_str, >> + input_str_sz); >> + } >> +} >> + >> +/** >> + * Normalize args from @a argv input array when it has three args. >> + * >> + * Case: TRIM(LEADING/TRAILING/BOTH <trim_collation> FROM <str>) >> + * User has specified side keyword and <trim_collation>, call trimming >> + * procedure with that args. >> + * >> + * @param context SQL context. >> + * @param argc Number of args. >> + * @param argv Args array. >> + */ >> +static void >> +trim_func_three_args(sql_context * context, int argc, sql_value **argv) >> +{ >> + const unsigned char *input_str; >> + assert(argc == 3); >> + (void) argc; >> + >> + if (sql_value_type(argv[2]) == SQL_NULL) { >> + return; >> + } >> + if ((input_str = sql_value_text(argv[2])) == NULL) { >> + return; >> + } >> + >> + int input_str_sz = sql_value_bytes(argv[2]); >> + assert(input_str == sql_value_text(argv[2])); >> + >> + const unsigned char *collation; >> + assert(sql_value_type(argv[0]) == SQL_INTEGER); >> + if ((collation = sql_value_text(argv[1])) != 0) { > > 16. As I said in the previous review, and in reviews to > other patches - use NULL to check if a pointer is NULL. > > When a code hunk is tall, and someone sees code like > > variable = func() > if (variable != 0) > .... > > they could think that the variable is integer. It is > confusing (variable can be declared somewhere above > and the one does not see its type). + const char unsigned *trim_set; + if ((trim_set = sql_value_text(argv[1])) != NULL) { >> + } >> } >> >> #ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION >> diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y >> index 099daf512..985d33605 100644 >> --- a/src/box/sql/parse.y >> +++ b/src/box/sql/parse.y >> @@ -1032,6 +1032,51 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { >> sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); >> } >> %endif SQL_OMIT_CAST >> + >> +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { >> + A.pExpr = sqlExprFunction(pParse, Y, &X); >> + spanSet(&A, &X, &E); >> + } >> + >> +%type trim_operands {struct ExprList *} >> +%destructor trim_operands { sql_expr_list_delete(pParse->db, $$); } >> + >> +trim_operands(A) ::= trim_from_clause(F) expr(Y). { >> + A = sql_expr_list_append(pParse->db, F, Y.pExpr); >> +} >> + >> +trim_operands(A) ::= expr(Y). { >> + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); >> +} >> + >> +%type trim_from_clause {struct ExprList *} >> +%destructor trim_from_clause { sql_expr_list_delete(pParse->db, $$); } >> + >> +trim_from_clause(A) ::= expr(Y) FROM. { >> + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); >> +} >> + >> +trim_from_clause(A) ::= trim_specification(N) trim_character(Y) FROM. { > > 18. I understand, why you did not use trim_character rule above, > but someone looking at this code first time and not seen our > discussion will not understand. I would add a comment about it. +/* + * The following two rules cover three cases of keyword + * (LEADING/TRAILING/BOTH) and <trim_character_set> combination. + * The case when both of them are absent is disallowed. + */ >> + struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, >> + &sqlIntTokens[N]); >> + A = sql_expr_list_append(pParse->db, NULL, p); >> + if (Y != NULL) { >> + A = sql_expr_list_append(pParse->db, A, Y); >> + } >> +} >> + >> +%type trim_character {struct Expr *} >> +%destructor trim_character {sql_expr_delete(pParse->db, $$, false);} >> + >> +trim_character(A) ::= . { A = NULL; } >> +trim_character(A) ::= expr(X). { A = X.pExpr; } > > 19. Exactly the same rule already exists: case_operand. I think, it > is worth merging them into one rule like > > expr_optional(A) ::= . { A = NULL; } > expr_optional(A) ::= expr(X). { A = X.pExpr; } > > And using in both places. +%type expr_optional {struct Expr *} +%destructor expr_optional {sql_expr_delete(pParse->db, $$, false);} + +expr_optional(A) ::= . { A = NULL; } +expr_optional(A) ::= expr(X). { A = X.pExpr; } -%type case_operand {Expr*} -%destructor case_operand {sql_expr_delete(pParse->db, $$, false);} -case_operand(A) ::= expr(X). {A = X.pExpr; /*A-overwrites-X*/} -case_operand(A) ::= . {A = 0;} >> + >> +%type trim_specification {int} >> + >> +trim_specification(A) ::= LEADING. {A = 1;} >> +trim_specification(A) ::= TRAILING. {A = 2;} >> +trim_specification(A) ::= BOTH. {A = 3;} >> + >> diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua >> index 251cc3534..8fe04fab1 100755 >> --- a/test/sql-tap/func.test.lua >> +++ b/test/sql-tap/func.test.lua >> @@ -1,6 +1,6 @@ >> #!/usr/bin/env tarantool >> test = require("sqltester") >> -test:plan(14586) >> +test:plan(14590) >> >> --!./tcltestrunner.lua >> -- 2001 September 15 >> @@ -1912,37 +1912,37 @@ test:do_test( >> test:do_catchsql_test( >> "func-22.1", >> [[ >> - SELECT trim(1,2,3) >> + SELECT TRIM(1,2,3) > > 20. Why? I thought that all identifiers are normalized > anyway, including function names, and you do not need > to uppercase everything manually. The same about the > test func-22.4, func-22.20. For consistency. >> ]], { >> -- <func-22.1> >> - 1, "wrong number of arguments to function TRIM()" >> + 1, "Syntax error near ','" >> -- </func-22.1> >> }) >> @@ -2215,13 +2215,55 @@ test:do_execsql_test( >> test:do_execsql_test( >> "func-22.34", >> [[ >> - SELECT RTRIM(X'00004100420000', X'00'); >> + SELECT TRIM(TRAILING X'00' FROM X'00004100420000'); >> ]], { >> -- <func-22.34> >> "\0\0A\0B" >> -- </func-22.34> >> }) >> >> +-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and FROM without >> +-- any agrs before. LEADING and TRAILING keywords is checked above. > > 21. Out of 66. +-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and +-- FROM without any agrs before. LEADING and TRAILING keywords is +-- checked above. commit 37d84a94298da72f344c6d503cb794fe8dd1e9d7 Author: Roman Khabibov <roman.habibov@tarantool.org> Date: Thu Mar 28 14:01:33 2019 +0300 sql: modify TRIM() function signature According to the ANSI standard, ltrim, rtrim and trim should be merged into one unified TRIM() function. The specialization of trimming (left, right or both and trimming characters) determined in arguments of this function. Closes #3879 diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c index be7bd5545..76e3265e7 100644 --- a/extra/mkkeywordhash.c +++ b/extra/mkkeywordhash.c @@ -278,6 +278,10 @@ static Keyword aKeywordTable[] = { { "WHILE", "TK_STANDARD", RESERVED, true }, { "TEXT", "TK_TEXT", RESERVED, true }, { "TRUNCATE", "TK_TRUNCATE", ALWAYS, true }, + { "TRIM", "TK_TRIM", ALWAYS, true }, + { "LEADING", "TK_LEADING", ALWAYS, true }, + { "TRAILING", "TK_TRAILING", ALWAYS, true }, + { "BOTH", "TK_BOTH", ALWAYS, true }, }; /* Number of keywords */ diff --git a/src/box/sql/func.c b/src/box/sql/func.c index abeecefa1..ac52ddda2 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1286,108 +1286,183 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) sql_result_text(context, (char *)zOut, j, sql_free); } -/* - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. +/** + * Remove chars included into @a trimming set from @a input_str. + * @param context SQL context. + * @param flags Trim specification: left, right or both. + * @param trim_set The set of characters for trimming. + * @param trim_set_sz Character set size in bytes. + * @param input_str Input string for trimming. + * @param input_str_sz Input string size in bytes. */ static void -trimFunc(sql_context * context, int argc, sql_value ** argv) +trim_procedure(sql_context * context, enum trim_side_mask flags, + const unsigned char *trim_set, int trim_set_sz, + const unsigned char *input_str, int input_str_sz) { - const unsigned char *zIn; /* Input string */ - const unsigned char *zCharSet; /* Set of characters to trim */ - int nIn; /* Number of bytes in input */ - int flags; /* 1: trimleft 2: trimright 3: trim */ - int i; /* Loop counter */ - unsigned char *aLen = 0; /* Length of each character in zCharSet */ - unsigned char **azChar = 0; /* Individual characters in zCharSet */ - int nChar; /* Number of characters in zCharSet */ + int i; + /* Length of each character in the character set. */ + char unsigned *char_len = 0; + /* Individual characters in the character set. */ + char unsigned **ind_chars = 0; - if (sql_value_type(argv[0]) == SQL_NULL) { - return; - } - zIn = sql_value_text(argv[0]); - if (zIn == 0) - return; - nIn = sql_value_bytes(argv[0]); - assert(zIn == sql_value_text(argv[0])); - if (argc == 1) { - static const unsigned char lenOne[] = { 1 }; - static unsigned char *const azOne[] = { (u8 *) " " }; - nChar = 1; - aLen = (u8 *) lenOne; - azChar = (unsigned char **)azOne; - zCharSet = 0; - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { - return; - } else { - const unsigned char *z = zCharSet; - int trim_set_sz = sql_value_bytes(argv[1]); - /* - * Count the number of UTF-8 characters passing - * through the entire char set, but not up - * to the '\0' or X'00' character. This allows - * to handle trimming set containing such - * characters. - */ - nChar = sql_utf8_char_count(z, trim_set_sz); - if (nChar > 0) { - azChar = - contextMalloc(context, - ((i64) nChar) * (sizeof(char *) + 1)); - if (azChar == 0) { - return; - } - aLen = (unsigned char *)&azChar[nChar]; - z = zCharSet; - i = 0; - nChar = 0; - int handled_bytes_cnt = trim_set_sz; - while(handled_bytes_cnt > 0) { - azChar[nChar] = (unsigned char *)(z + i); - SQL_UTF8_FWD_1(z, i, trim_set_sz); - aLen[nChar] = (u8) (z + i - azChar[nChar]); - handled_bytes_cnt -= aLen[nChar]; - nChar++; - } + const unsigned char *z = trim_set; + /* + * Count the number of UTF-8 characters passing through + * the entire char set, but not up to the '\0' or X'00' + * character. This allows to handle trimming set + * containing such characters. + */ + int char_cnt = sql_utf8_char_count(z, trim_set_sz); + if (char_cnt > 0) { + ind_chars = + contextMalloc(context, + ((i64) char_cnt) * + (sizeof(unsigned char *) + 1)); + if (ind_chars == 0) + return; + char_len = (unsigned char *)&ind_chars[char_cnt]; + z = trim_set; + i = 0; + char_cnt = 0; + int handled_bytes_cnt = trim_set_sz; + while(handled_bytes_cnt > 0) { + ind_chars[char_cnt] = (unsigned char *)(z + i); + SQL_UTF8_FWD_1(z, i, trim_set_sz); + char_len[char_cnt] = (u8) (z + i - ind_chars[char_cnt]); + handled_bytes_cnt -= char_len[char_cnt]; + char_cnt++; } } - if (nChar > 0) { - flags = SQL_PTR_TO_INT(sql_user_data(context)); - if (flags & 1) { - while (nIn > 0) { + if (char_cnt > 0) { + if ((flags & TRIM_LEADING) != 0) { + while (input_str_sz > 0) { int len = 0; - for (i = 0; i < nChar; i++) { - len = aLen[i]; - if (len <= nIn - && memcmp(zIn, azChar[i], len) == 0) + for (i = 0; i < char_cnt; i++) { + len = char_len[i]; + if (len <= input_str_sz + && memcmp(input_str, + ind_chars[i], len) == 0) break; } - if (i >= nChar) + if (i >= char_cnt) break; - zIn += len; - nIn -= len; + input_str += len; + input_str_sz -= len; } } - if (flags & 2) { - while (nIn > 0) { + if ((flags & TRIM_TRAILING) != 0) { + while (input_str_sz > 0) { int len = 0; - for (i = 0; i < nChar; i++) { - len = aLen[i]; - if (len <= nIn - && memcmp(&zIn[nIn - len], - azChar[i], len) == 0) + for (i = 0; i < char_cnt; i++) { + len = char_len[i]; + if (len <= input_str_sz + && memcmp(&input_str[input_str_sz - len], + ind_chars[i], len) == 0) break; } - if (i >= nChar) + if (i >= char_cnt) break; - nIn -= len; + input_str_sz -= len; } } - if (zCharSet) { - sql_free(azChar); - } + if (trim_set_sz != 0) + sql_free(ind_chars); + } + sql_result_text(context, (char *)input_str, input_str_sz, + SQL_TRANSIENT); +} + +/** + * Normalize args from @a argv input array when it has one arg + * only. + * + * Case: TRIM(<str>) + * Call trimming procedure with TRIM_BOTH as the flags and " " as + * the trimming set. + * + * @param context SQL context. + */ +static void +trim_func_one_arg(sql_context * context, int argc, sql_value **argv) +{ + const unsigned char *input_str; + assert(argc == 1); + (void) argc; + + if ((input_str = sql_value_text(argv[0])) == NULL) + return; + int input_str_sz = sql_value_bytes(argv[0]); + + trim_procedure(context, TRIM_BOTH, (const unsigned char *) " ", + 1, input_str, input_str_sz); +} + +/** + * Normalize args from @a argv input array when it has two args. + * + * Case: TRIM(<character_set> FROM <str>) + * If user has specified <character_set> only, call trimming + * procedure with TRIM_BOTH as the flags and that trimming set. + * + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) + * If user has specified side keyword only, then call trimming + * procedure with the specified side and " " as the trimming set. + * + * @param context SQL context. + */ +static void +trim_func_two_args(sql_context * context, int argc, sql_value **argv) +{ + const unsigned char *input_str; + assert(argc == 2); + (void) argc; + + if ((input_str = sql_value_text(argv[1])) == NULL) + return; + int input_str_sz = sql_value_bytes(argv[1]); + + const char unsigned *trim_set; + if (sql_value_type(argv[0]) == SQL_INTEGER) { + trim_procedure(context, sql_value_int(argv[0]), + (const unsigned char *) " ", 1, + input_str, input_str_sz); + } else if ((trim_set = sql_value_text(argv[0])) == NULL) { + return; + } else { + int trim_set_sz = sql_value_bytes(argv[0]); + trim_procedure(context, TRIM_BOTH, trim_set, trim_set_sz, + input_str, input_str_sz); + } +} + +/** + * Normalize args from @a argv input array when it has three args. + * + * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>) + * If user has specified side keyword and <character_set>, then + * call trimming procedure with that args. + * + * @param context SQL context. + */ +static void +trim_func_three_args(sql_context * context, int argc, sql_value **argv) +{ + const unsigned char *input_str; + assert(argc == 3); + assert(sql_value_type(argv[0]) == SQL_INTEGER); + (void) argc; + + if ((input_str = sql_value_text(argv[2])) == NULL) + return; + int input_str_sz = sql_value_bytes(argv[2]); + + const char unsigned *trim_set; + if ((trim_set = sql_value_text(argv[1])) != NULL) { + int trim_set_sz = sql_value_bytes(argv[1]); + trim_procedure(context, sql_value_int(argv[0]), trim_set, + trim_set_sz, input_str, input_str_sz); } - sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT); } #ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION @@ -1818,12 +1893,9 @@ sqlRegisterBuiltinFunctions(void) FIELD_TYPE_INTEGER), FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY, FIELD_TYPE_INTEGER), - FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc), - FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc), - FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc), - FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc), - FUNCTION_COLL(trim, 1, 3, 0, trimFunc), - FUNCTION_COLL(trim, 2, 3, 0, trimFunc), + FUNCTION_COLL(trim, 1, 3, 0, trim_func_one_arg), + FUNCTION_COLL(trim, 2, 3, 0, trim_func_two_args), + FUNCTION_COLL(trim, 3, 3, 0, trim_func_three_args), FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR), FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR), AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize, diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index 099daf512..b49638d44 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -1032,6 +1032,56 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); } %endif SQL_OMIT_CAST + +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { + A.pExpr = sqlExprFunction(pParse, Y, &X); + spanSet(&A, &X, &E); + } + +%type trim_operands {struct ExprList *} +%destructor trim_operands { sql_expr_list_delete(pParse->db, $$); } + +trim_operands(A) ::= trim_from_clause(F) expr(Y). { + A = sql_expr_list_append(pParse->db, F, Y.pExpr); +} + +trim_operands(A) ::= expr(Y). { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} + +%type trim_from_clause {struct ExprList *} +%destructor trim_from_clause { sql_expr_list_delete(pParse->db, $$); } + +/* + * The following two rules cover three cases of keyword + * (LEADING/TRAILING/BOTH) and <trim_character_set> combination. + * The case when both of them are absent is disallowed. + */ +trim_from_clause(A) ::= expr(Y) FROM. { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} + +trim_from_clause(A) ::= trim_specification(N) expr_optional(Y) FROM. { + struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, + &sqlIntTokens[N]); + A = sql_expr_list_append(pParse->db, NULL, p); + if (Y != NULL) { + A = sql_expr_list_append(pParse->db, A, Y); + } +} + +%type expr_optional {struct Expr *} +%destructor expr_optional {sql_expr_delete(pParse->db, $$, false);} + +expr_optional(A) ::= . { A = NULL; } +expr_optional(A) ::= expr(X). { A = X.pExpr; } + +%type trim_specification {int} + +trim_specification(A) ::= LEADING. {A = TRIM_LEADING;} +trim_specification(A) ::= TRAILING. {A = TRIM_TRAILING;} +trim_specification(A) ::= BOTH. {A = TRIM_BOTH;} + expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). { if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){ const char *err = @@ -1294,7 +1344,7 @@ expr(A) ::= EXISTS(B) LP select(Y) RP(E). { } /* CASE expressions */ -expr(A) ::= CASE(C) case_operand(X) case_exprlist(Y) case_else(Z) END(E). { +expr(A) ::= CASE(C) expr_optional(X) case_exprlist(Y) case_else(Z) END(E). { spanSet(&A,&C,&E); /*A-overwrites-C*/ A.pExpr = sqlPExpr(pParse, TK_CASE, X, 0); if( A.pExpr ){ @@ -1319,10 +1369,6 @@ case_exprlist(A) ::= WHEN expr(Y) THEN expr(Z). { %destructor case_else {sql_expr_delete(pParse->db, $$, false);} case_else(A) ::= ELSE expr(X). {A = X.pExpr;} case_else(A) ::= . {A = 0;} -%type case_operand {Expr*} -%destructor case_operand {sql_expr_delete(pParse->db, $$, false);} -case_operand(A) ::= expr(X). {A = X.pExpr; /*A-overwrites-X*/} -case_operand(A) ::= . {A = 0;} %type exprlist {ExprList*} %destructor exprlist {sql_expr_list_delete(pParse->db, $$);} diff --git a/src/box/sql/parse_def.c b/src/box/sql/parse_def.c index 49c76a326..aa1323cb2 100644 --- a/src/box/sql/parse_def.c +++ b/src/box/sql/parse_def.c @@ -34,7 +34,9 @@ const struct Token sqlIntTokens[] = { {"0", 1, false}, - {"1", 1, false} + {"1", 1, false}, + {"2", 1, false}, + {"3", 1, false}, }; void diff --git a/src/box/sql/parse_def.h b/src/box/sql/parse_def.h index a1af2bacd..5899a7e4e 100644 --- a/src/box/sql/parse_def.h +++ b/src/box/sql/parse_def.h @@ -87,7 +87,7 @@ struct Token { bool isReserved; }; -/** Constant tokens for values 0 and 1. */ +/** Constant tokens for integer values. */ extern const struct Token sqlIntTokens[]; /** Generate a Token object from a string. */ diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index b322602dc..d5a3e15c1 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -1680,6 +1680,17 @@ struct FuncDestructor { * single query - might change over time */ +/* + * Trim side mask components. TRIM_LEADING means to trim left side + * only. TRIM_TRAILING is to trim right side only. TRIM_BOTH is to + * trim both sides. + */ +enum trim_side_mask { + TRIM_LEADING = 1, + TRIM_TRAILING = 2, + TRIM_BOTH = TRIM_LEADING | TRIM_TRAILING +}; + /* * The following three macros, FUNCTION(), LIKEFUNC() and AGGREGATE() are * used to create the initializers for the FuncDef structures. diff --git a/test/sql-tap/badutf1.test.lua b/test/sql-tap/badutf1.test.lua index d104efaa9..d32bafae0 100755 --- a/test/sql-tap/badutf1.test.lua +++ b/test/sql-tap/badutf1.test.lua @@ -302,7 +302,7 @@ test:do_test( test:do_test( "badutf-4.1", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim('\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.1> "X", "F0" @@ -312,7 +312,7 @@ test:do_test( test:do_test( "badutf-4.2", function() - return test:execsql2("SELECT hex(ltrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim(LEADING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.2> "X", "F0808080FF" @@ -322,7 +322,7 @@ test:do_test( test:do_test( "badutf-4.3", function() - return test:execsql2("SELECT hex(rtrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(trim(TRAILING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.3> "X", "808080F0" @@ -332,7 +332,7 @@ test:do_test( test:do_test( "badutf-4.4", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.4> "X", "808080F0808080FF" @@ -342,7 +342,7 @@ test:do_test( test:do_test( "badutf-4.5", function() - return test:execsql2("SELECT hex(trim('\xff\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.5> "X", "80F0808080FF" @@ -352,7 +352,7 @@ test:do_test( test:do_test( "badutf-4.6", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.6> "X", "F0808080FF" @@ -362,7 +362,7 @@ test:do_test( test:do_test( "badutf-4.7", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80\x80')) AS x") + return test:execsql2("SELECT hex(trim('\xff\x80\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.7> "X", "FF80F0808080FF" diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua index 251cc3534..165eafb6d 100755 --- a/test/sql-tap/func.test.lua +++ b/test/sql-tap/func.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(14586) +test:plan(14590) --!./tcltestrunner.lua -- 2001 September 15 @@ -1912,37 +1912,37 @@ test:do_test( test:do_catchsql_test( "func-22.1", [[ - SELECT trim(1,2,3) + SELECT TRIM(1,2,3) ]], { -- <func-22.1> - 1, "wrong number of arguments to function TRIM()" + 1, "Syntax error near ','" -- </func-22.1> }) test:do_catchsql_test( "func-22.2", [[ - SELECT ltrim(1,2,3) + SELECT LTRIM(1,2,3) ]], { -- <func-22.2> - 1, "wrong number of arguments to function LTRIM()" + 1, "Function 'LTRIM' does not exist" -- </func-22.2> }) test:do_catchsql_test( "func-22.3", [[ - SELECT rtrim(1,2,3) + SELECT RTRIM(1,2,3) ]], { -- <func-22.3> - 1, "wrong number of arguments to function RTRIM()" + 1, "Function 'RTRIM' does not exist" -- </func-22.3> }) test:do_execsql_test( "func-22.4", [[ - SELECT trim(' hi '); + SELECT TRIM(' hi '); ]], { -- <func-22.4> "hi" @@ -1952,7 +1952,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.5", [[ - SELECT ltrim(' hi '); + SELECT TRIM(LEADING FROM ' hi '); ]], { -- <func-22.5> "hi " @@ -1962,7 +1962,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.6", [[ - SELECT rtrim(' hi '); + SELECT TRIM(TRAILING FROM ' hi '); ]], { -- <func-22.6> " hi" @@ -1972,7 +1972,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.7", [[ - SELECT trim(' hi ','xyz'); + SELECT TRIM('xyz' FROM ' hi '); ]], { -- <func-22.7> " hi " @@ -1982,7 +1982,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.8", [[ - SELECT ltrim(' hi ','xyz'); + SELECT TRIM(LEADING 'xyz' FROM ' hi '); ]], { -- <func-22.8> " hi " @@ -1992,7 +1992,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.9", [[ - SELECT rtrim(' hi ','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM ' hi '); ]], { -- <func-22.9> " hi " @@ -2002,7 +2002,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.10", [[ - SELECT trim('xyxzy hi zzzy','xyz'); + SELECT TRIM('xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.10> " hi " @@ -2012,7 +2012,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.11", [[ - SELECT ltrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(LEADING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.11> " hi zzzy" @@ -2022,7 +2022,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.12", [[ - SELECT rtrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.12> "xyxzy hi " @@ -2032,7 +2032,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.13", [[ - SELECT trim(' hi ',''); + SELECT TRIM('' FROM ' hi '); ]], { -- <func-22.13> " hi " @@ -2043,7 +2043,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.14", [[ - SELECT hex(trim(x'c280e1bfbff48fbfbf6869',x'6162e1bfbfc280')) + SELECT hex(TRIM(x'6162e1bfbfc280' FROM x'c280e1bfbff48fbfbf6869')) ]], { -- <func-22.14> "F48FBFBF6869" @@ -2052,8 +2052,8 @@ test:do_execsql_test( test:do_execsql_test( "func-22.15", - [[SELECT hex(trim(x'6869c280e1bfbff48fbfbf61', - x'6162e1bfbfc280f48fbfbf'))]], { + [[SELECT hex(TRIM(x'6162e1bfbfc280f48fbfbf' + FROM x'6869c280e1bfbff48fbfbf61'))]], { -- <func-22.15> "6869" -- </func-22.15> @@ -2062,7 +2062,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.16", [[ - SELECT hex(trim(x'ceb1ceb2ceb3',x'ceb1')); + SELECT hex(TRIM(x'ceb1' FROM x'ceb1ceb2ceb3')); ]], { -- <func-22.16> "CEB2CEB3" @@ -2073,7 +2073,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.20", [[ - SELECT typeof(trim(NULL)); + SELECT typeof(TRIM(NULL)); ]], { -- <func-22.20> "null" @@ -2083,7 +2083,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.21", [[ - SELECT typeof(trim(NULL,'xyz')); + SELECT typeof(TRIM('xyz' FROM NULL)); ]], { -- <func-22.21> "null" @@ -2093,7 +2093,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.22", [[ - SELECT typeof(trim('hello',NULL)); + SELECT typeof(TRIM(NULL FROM 'hello')); ]], { -- <func-22.22> "null" @@ -2105,7 +2105,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.23", [[ - SELECT TRIM(X'004100', X'00'); + SELECT TRIM(X'00' FROM X'004100'); ]], { -- <func-22.23> "A" @@ -2115,7 +2115,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.24", [[ - SELECT TRIM(X'004100', X'0000'); + SELECT TRIM(X'0000' FROM X'004100'); ]], { -- <func-22.24> "A" @@ -2125,7 +2125,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.25", [[ - SELECT TRIM(X'004100', X'0042'); + SELECT TRIM(X'0042' FROM X'004100'); ]], { -- <func-22.25> "A" @@ -2135,7 +2135,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.26", [[ - SELECT TRIM(X'00004100420000', X'00'); + SELECT TRIM(X'00' FROM X'00004100420000'); ]], { -- <func-22.26> "A\0B" @@ -2145,7 +2145,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.27", [[ - SELECT LTRIM(X'004100', X'00'); + SELECT TRIM(LEADING X'00' FROM X'004100'); ]], { -- <func-22.27> "A\0" @@ -2155,7 +2155,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.28", [[ - SELECT LTRIM(X'004100', X'0000'); + SELECT TRIM(LEADING X'0000' FROM X'004100'); ]], { -- <func-22.28> "A\0" @@ -2165,7 +2165,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.29", [[ - SELECT LTRIM(X'004100', X'0042'); + SELECT TRIM(LEADING X'0042' FROM X'004100'); ]], { -- <func-22.29> "A\0" @@ -2175,7 +2175,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.30", [[ - SELECT LTRIM(X'00004100420000', X'00'); + SELECT TRIM(LEADING X'00' FROM X'00004100420000'); ]], { -- <func-22.30> "A\0B\0\0" @@ -2185,7 +2185,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.31", [[ - SELECT RTRIM(X'004100', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'004100'); ]], { -- <func-22.31> "\0A" @@ -2195,7 +2195,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.32", [[ - SELECT RTRIM(X'004100', X'0000'); + SELECT TRIM(TRAILING X'0000' FROM X'004100'); ]], { -- <func-22.32> "\0A" @@ -2205,7 +2205,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.33", [[ - SELECT RTRIM(X'004100', X'0042'); + SELECT TRIM(TRAILING X'0042' FROM X'004100'); ]], { -- <func-22.33> "\0A" @@ -2215,13 +2215,56 @@ test:do_execsql_test( test:do_execsql_test( "func-22.34", [[ - SELECT RTRIM(X'00004100420000', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'00004100420000'); ]], { -- <func-22.34> "\0\0A\0B" -- </func-22.34> }) +-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and +-- FROM without any agrs before. LEADING and TRAILING keywords is +-- checked above. + +test:do_execsql_test( + "func-22.35", + [[ + SELECT TRIM(BOTH FROM ' hi '); + ]], { + -- <func-22.35> + "hi" + -- </func-22.35> + }) +test:do_execsql_test( + "func-22.36", + [[ + SELECT TRIM(BOTH 'xyz' FROM ' hi '); + ]], { + -- <func-22.36> + " hi " + -- </func-22.36> + }) + +test:do_execsql_test( + "func-22.37", + [[ + SELECT TRIM(BOTH 'xyz' FROM 'xyxzy hi zzzy'); + ]], { + -- <func-22.37> + " hi " + -- </func-22.37> + }) + +test:do_catchsql_test( + "func-22.38", + [[ + SELECT TRIM(FROM 'xyxzy'); + ]], { + -- <func-22.38> + 1, "Syntax error near 'FROM'" + -- </func-22.38> + }) + -- This is to test the deprecated sql_aggregate_count() API. -- --test:do_test( @@ -2838,16 +2881,16 @@ test:do_execsql_test( "SELECT TRIM(CHAR(32,00,32,00,32));", {string.char(00,32,00)}) --- LTRIM +-- LEFT TRIM test:do_execsql_test( "func-70", - "SELECT LTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(LEADING FROM CHAR(32,00,32,00,32));", {string.char(00,32,00,32)}) --- RTRIM +-- RIGHT TRIM test:do_execsql_test( "func-71", - "SELECT RTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(TRAILING FROM CHAR(32,00,32,00,32));", {string.char(32,00,32,00)}) -- GROUP_CONCAT diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua index 495aa4ee4..19953e434 100755 --- a/test/sql-tap/with1.test.lua +++ b/test/sql-tap/with1.test.lua @@ -550,7 +550,7 @@ test:do_execsql_test("8.1-mandelbrot", [[ SELECT group_concat( substr(' .+*#', 1+min(iter/7,4), 1), '') FROM m2 GROUP BY cy ) - SELECT group_concat(rtrim(t),x'0a') FROM a; + SELECT group_concat(trim(TRAILING FROM t),x'0a') FROM a; ]], { -- <8.1-mandelbrot> [[ ....# ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-18 17:11 ` Roman Khabibov @ 2019-04-19 12:49 ` Vladislav Shpilevoy 2019-04-20 0:48 ` Roman Khabibov 0 siblings, 1 reply; 14+ messages in thread From: Vladislav Shpilevoy @ 2019-04-19 12:49 UTC (permalink / raw) To: Roman Khabibov, tarantool-patches Hi! Thanks for the fixes, much better already, almost done! But see 16 comments below. >>> diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua >>> index 251cc3534..8fe04fab1 100755 >>> --- a/test/sql-tap/func.test.lua >>> +++ b/test/sql-tap/func.test.lua >>> @@ -1,6 +1,6 @@ >>> #!/usr/bin/env tarantool >>> test = require("sqltester") >>> -test:plan(14586) >>> +test:plan(14590) >>> >>> --!./tcltestrunner.lua >>> -- 2001 September 15 >>> @@ -1912,37 +1912,37 @@ test:do_test( >>> test:do_catchsql_test( >>> "func-22.1", >>> [[ >>> - SELECT trim(1,2,3) >>> + SELECT TRIM(1,2,3) >> >> 20. Why? I thought that all identifiers are normalized >> anyway, including function names, and you do not need >> to uppercase everything manually. The same about the >> test func-22.4, func-22.20. > For consistency. 1. Please, do not make dubious changes not required by the patch, wiping the git history, and padding the diff out. Keep the old version. In other places, where the only change was uppercasing, too, please. > sql: modify TRIM() function signature > > According to the ANSI standard, ltrim, rtrim and trim should > be merged into one unified TRIM() function. The specialization of > trimming (left, right or both and trimming characters) determined > in arguments of this function. > > Closes #3879 > > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > index abeecefa1..ac52ddda2 100644 > --- a/src/box/sql/func.c > +++ b/src/box/sql/func.c > @@ -1286,108 +1286,183 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) > sql_result_text(context, (char *)zOut, j, sql_free); > } > > -/* > - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. > - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. > +/** > + * Remove chars included into @a trimming set from @a input_str. 2. There is no parameter named 'trimming'. According to doxygen documentation, @a takes one word as an argument and it is usually the function argument's name. http://www.doxygen.nl/manual/commands.html#cmda Please, prune old parameter names from the comments. > + * @param context SQL context. > + * @param flags Trim specification: left, right or both. > + * @param trim_set The set of characters for trimming. > + * @param trim_set_sz Character set size in bytes. > + * @param input_str Input string for trimming. > + * @param input_str_sz Input string size in bytes. > */ > static void > -trimFunc(sql_context * context, int argc, sql_value ** argv) > +trim_procedure(sql_context * context, enum trim_side_mask flags, > + const unsigned char *trim_set, int trim_set_sz, > + const unsigned char *input_str, int input_str_sz) 3. Now I started looking into this function more attentively and I see, that you changed this function in 100%, which means that you should use Tarantool code style, not SQLite. See some concrete points below. > { > - const unsigned char *zIn; /* Input string */ > - const unsigned char *zCharSet; /* Set of characters to trim */ > - int nIn; /* Number of bytes in input */ > - int flags; /* 1: trimleft 2: trimright 3: trim */ > - int i; /* Loop counter */ > - unsigned char *aLen = 0; /* Length of each character in zCharSet */ > - unsigned char **azChar = 0; /* Individual characters in zCharSet */ > - int nChar; /* Number of characters in zCharSet */ > + int i; > + /* Length of each character in the character set. */ > + char unsigned *char_len = 0; 4. You again ignored my comment about NULL. Please, find all other places and fix it finally. I said it already 1000 times in 1000 reviews - we do not use 0 for pointers. It is a simple rule. Just follow it. Write it down somewhere in a list of code style rules and check them all before sending a patch. Seeing how many my comments you repeatedly ignore, I think that probably you should reconsider the way how you do self-reviews. If you do it via just looking a couple of seconds at the code in the text editor, then it is definitely a bad way. First of all, use 'git diff/show' in console to look only at the patch changes, not at the entire files and functions. If you do not like console, and it is ok, then you can use Sublime Merge desktop program or Sublime Git package for the editor. When you look at the diff only, it is much simpler to notice such violations and even bugs. 5. In our code style we do not use 'char' to represent numbers, we use 'uint8_t' or 'int8_t' when we want to use one-byte numbers. It is the same as 'char'/'unsigned char', but looks shorter and it becomes obvious that these values are used as numbers, not text. Firstly I thought that char_len was an array of characters, but it emerged being an array of symbol sizes. In the summary, I suggest to use 'uint8_t *' for char_len array. > + /* Individual characters in the character set. */ > + char unsigned **ind_chars = 0; 6. If you declare it as 'const char unsigned **', then you can remove unnecessary type cast from line 1330. 7. Normally, we do not reorder 'unsigned' and 'char/int/long'. char unsigned -> unsigned char > > - if (sql_value_type(argv[0]) == SQL_NULL) { > - return; > - } > - zIn = sql_value_text(argv[0]); > - if (zIn == 0) > - return; > - nIn = sql_value_bytes(argv[0]); > - assert(zIn == sql_value_text(argv[0])); > - if (argc == 1) { > - static const unsigned char lenOne[] = { 1 }; > - static unsigned char *const azOne[] = { (u8 *) " " }; > - nChar = 1; > - aLen = (u8 *) lenOne; > - azChar = (unsigned char **)azOne; > - zCharSet = 0; > - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { > - return; > - } else { > - const unsigned char *z = zCharSet; > - int trim_set_sz = sql_value_bytes(argv[1]); > - /* > - * Count the number of UTF-8 characters passing > - * through the entire char set, but not up > - * to the '\0' or X'00' character. This allows > - * to handle trimming set containing such > - * characters. > - */ > - nChar = sql_utf8_char_count(z, trim_set_sz); > - if (nChar > 0) { > - azChar = > - contextMalloc(context, > - ((i64) nChar) * (sizeof(char *) + 1)); > - if (azChar == 0) { > - return; > - } > - aLen = (unsigned char *)&azChar[nChar]; > - z = zCharSet; > - i = 0; > - nChar = 0; > - int handled_bytes_cnt = trim_set_sz; > - while(handled_bytes_cnt > 0) { > - azChar[nChar] = (unsigned char *)(z + i); > - SQL_UTF8_FWD_1(z, i, trim_set_sz); > - aLen[nChar] = (u8) (z + i - azChar[nChar]); > - handled_bytes_cnt -= aLen[nChar]; > - nChar++; > - } > + const unsigned char *z = trim_set; > + /* > + * Count the number of UTF-8 characters passing through > + * the entire char set, but not up to the '\0' or X'00' > + * character. This allows to handle trimming set > + * containing such characters. > + */ > + int char_cnt = sql_utf8_char_count(z, trim_set_sz); > + if (char_cnt > 0) { > + ind_chars = > + contextMalloc(context, > + ((i64) char_cnt) * 8. Why do you need that cast to 'i64'? Anyway you access that memory by 'int' indexes in the next lines. Please, remove it. > + (sizeof(unsigned char *) + 1)); > + if (ind_chars == 0) > + return; > + char_len = (unsigned char *)&ind_chars[char_cnt]; > + z = trim_set; > + i = 0; > + char_cnt = 0; > + int handled_bytes_cnt = trim_set_sz; > + while(handled_bytes_cnt > 0) { > + ind_chars[char_cnt] = (unsigned char *)(z + i); > + SQL_UTF8_FWD_1(z, i, trim_set_sz); > + char_len[char_cnt] = (u8) (z + i - ind_chars[char_cnt]); 9. Why do you need that cast to 'u8'? 'u8' == 'unsigned char', and the type of that expression is already 'unsigned char'. > + handled_bytes_cnt -= char_len[char_cnt]; > + char_cnt++; > } > } > - if (nChar > 0) { > - flags = SQL_PTR_TO_INT(sql_user_data(context)); > - if (flags & 1) { > - while (nIn > 0) { > + if (char_cnt > 0) { 10. Indentation next 33 lines is huge and they are followed by just one 2-line function call. Just do 'goto result' here if char_cnt == 0 and reduce the indentation. The same can be done at line 1317 in order to reduce indentation of next 17 lines. > + if ((flags & TRIM_LEADING) != 0) { > + while (input_str_sz > 0) { > int len = 0; > - for (i = 0; i < nChar; i++) { > - len = aLen[i]; > - if (len <= nIn > - && memcmp(zIn, azChar[i], len) == 0) > + for (i = 0; i < char_cnt; i++) { > + len = char_len[i]; > + if (len <= input_str_sz > + && memcmp(input_str, > + ind_chars[i], len) == 0) > break; > } > - if (i >= nChar) > + if (i >= char_cnt) > break; > - zIn += len; > - nIn -= len; > + input_str += len; > + input_str_sz -= len; > } > } > - if (flags & 2) { > - while (nIn > 0) { > + if ((flags & TRIM_TRAILING) != 0) { > + while (input_str_sz > 0) { > int len = 0; > - for (i = 0; i < nChar; i++) { > - len = aLen[i]; > - if (len <= nIn > - && memcmp(&zIn[nIn - len], > - azChar[i], len) == 0) > + for (i = 0; i < char_cnt; i++) { > + len = char_len[i]; > + if (len <= input_str_sz > + && memcmp(&input_str[input_str_sz - len], > + ind_chars[i], len) == 0) 11. Out of 80. And you saw that in your editor, even without 'git diff' and console, because you have 80-rulers. So why did you decide not to fix it? > break; > } > - if (i >= nChar) > + if (i >= char_cnt) > break; > - nIn -= len; > + input_str_sz -= len; > } > } > - if (zCharSet) { > - sql_free(azChar); > - } > + if (trim_set_sz != 0) > + sql_free(ind_chars); > + } > + sql_result_text(context, (char *)input_str, input_str_sz, > + SQL_TRANSIENT); > +} > + > +/** > + * Normalize args from @a argv input array when it has one arg > + * only. > + * > + * Case: TRIM(<str>) > + * Call trimming procedure with TRIM_BOTH as the flags and " " as > + * the trimming set. > + * > + * @param context SQL context. 12. As I said in the previous reviews, we either omit doxygen formal style completely, or use it correctly. If you want to use doxygen, please, describe all the 3 parameters. If you do not want, then omit @param/@retval section. The same for other places. > + */ > +static void > +trim_func_one_arg(sql_context * context, int argc, sql_value **argv) 13. In new code we use explicit 'struct' keyword for struct types - sql_context and sql_value. Also, we do not put whitepaces after '*' when declare a pointer type value. The same for other places. > +/** > + * Normalize args from @a argv input array when it has two args. > + * > + * Case: TRIM(<character_set> FROM <str>) > + * If user has specified <character_set> only, call trimming > + * procedure with TRIM_BOTH as the flags and that trimming set. > + * > + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) > + * If user has specified side keyword only, then call trimming > + * procedure with the specified side and " " as the trimming set. > + * > + * @param context SQL context. > + */ > +static void > +trim_func_two_args(sql_context * context, int argc, sql_value **argv) > +{ > + const unsigned char *input_str; > + assert(argc == 2); > + (void) argc; > + > + if ((input_str = sql_value_text(argv[1])) == NULL) > + return; > + int input_str_sz = sql_value_bytes(argv[1]); > + > + const char unsigned *trim_set; > + if (sql_value_type(argv[0]) == SQL_INTEGER) { > + trim_procedure(context, sql_value_int(argv[0]), > + (const unsigned char *) " ", 1, > + input_str, input_str_sz); > + } else if ((trim_set = sql_value_text(argv[0])) == NULL) { > + return; > + } else { 14. Please, apply. @@ -1427,9 +1427,7 @@ trim_func_two_args(sql_context * context, int argc, sql_value **argv) trim_procedure(context, sql_value_int(argv[0]), (const unsigned char *) " ", 1, input_str, input_str_sz); - } else if ((trim_set = sql_value_text(argv[0])) == NULL) { - return; - } else { + } else if ((trim_set = sql_value_text(argv[0])) != NULL) { int trim_set_sz = sql_value_bytes(argv[0]); trim_procedure(context, TRIM_BOTH, trim_set, trim_set_sz, input_str, input_str_sz); > + int trim_set_sz = sql_value_bytes(argv[0]); > + trim_procedure(context, TRIM_BOTH, trim_set, trim_set_sz, > + input_str, input_str_sz); > + } > +} > + > +/** > + * Normalize args from @a argv input array when it has three args. > + * > + * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>) > + * If user has specified side keyword and <character_set>, then > + * call trimming procedure with that args. > + * > + * @param context SQL context. > + */ > +static void > +trim_func_three_args(sql_context * context, int argc, sql_value **argv) > +{ > + const unsigned char *input_str; > + assert(argc == 3); > + assert(sql_value_type(argv[0]) == SQL_INTEGER); > + (void) argc; > + > + if ((input_str = sql_value_text(argv[2])) == NULL) > + return; > + int input_str_sz = sql_value_bytes(argv[2]); > + > + const char unsigned *trim_set; > + if ((trim_set = sql_value_text(argv[1])) != NULL) { > + int trim_set_sz = sql_value_bytes(argv[1]); > + trim_procedure(context, sql_value_int(argv[0]), trim_set, > + trim_set_sz, input_str, input_str_sz); > } > - sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT); 15. Please, apply. @@ -1448,21 +1446,18 @@ trim_func_two_args(sql_context * context, int argc, sql_value **argv) static void trim_func_three_args(sql_context * context, int argc, sql_value **argv) { - const unsigned char *input_str; + const unsigned char *input_str, *trim_set; assert(argc == 3); assert(sql_value_type(argv[0]) == SQL_INTEGER); (void) argc; - if ((input_str = sql_value_text(argv[2])) == NULL) + if ((input_str = sql_value_text(argv[2])) == NULL || + (trim_set = sql_value_text(argv[1])) == NULL) return; int input_str_sz = sql_value_bytes(argv[2]); - - const char unsigned *trim_set; - if ((trim_set = sql_value_text(argv[1])) != NULL) { - int trim_set_sz = sql_value_bytes(argv[1]); - trim_procedure(context, sql_value_int(argv[0]), trim_set, - trim_set_sz, input_str, input_str_sz); - } + int trim_set_sz = sql_value_bytes(argv[1]); + trim_procedure(context, sql_value_int(argv[0]), trim_set, trim_set_sz, + input_str, input_str_sz); } > diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y > index 099daf512..b49638d44 100644 > --- a/src/box/sql/parse.y > +++ b/src/box/sql/parse.y > + > +%type expr_optional {struct Expr *} > +%destructor expr_optional {sql_expr_delete(pParse->db, $$, false);} > + > +expr_optional(A) ::= . { A = NULL; } > +expr_optional(A) ::= expr(X). { A = X.pExpr; } > + > +%type trim_specification {int} 16. It is not int - it is enum trim_side_mask. ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-19 12:49 ` Vladislav Shpilevoy @ 2019-04-20 0:48 ` Roman Khabibov 2019-04-21 19:36 ` Vladislav Shpilevoy 0 siblings, 1 reply; 14+ messages in thread From: Roman Khabibov @ 2019-04-20 0:48 UTC (permalink / raw) To: tarantool-patches; +Cc: Vladislav Shpilevoy Hi! Thanks for the review. > On Apr 19, 2019, at 3:49 PM, Vladislav Shpilevoy <v.shpilevoy@tarantool.org> wrote: > > Hi! Thanks for the fixes, much better already, > almost done! But see 16 comments below. > >>>> diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua >>>> index 251cc3534..8fe04fab1 100755 >>>> --- a/test/sql-tap/func.test.lua >>>> +++ b/test/sql-tap/func.test.lua >>>> @@ -1,6 +1,6 @@ >>>> #!/usr/bin/env tarantool >>>> test = require("sqltester") >>>> -test:plan(14586) >>>> +test:plan(14590) >>>> >>>> --!./tcltestrunner.lua >>>> -- 2001 September 15 >>>> @@ -1912,37 +1912,37 @@ test:do_test( >>>> test:do_catchsql_test( >>>> "func-22.1", >>>> [[ >>>> - SELECT trim(1,2,3) >>>> + SELECT TRIM(1,2,3) >>> >>> 20. Why? I thought that all identifiers are normalized >>> anyway, including function names, and you do not need >>> to uppercase everything manually. The same about the >>> test func-22.4, func-22.20. >> For consistency. > > 1. Please, do not make dubious changes not required by > the patch, wiping the git history, and padding the diff > out. Keep the old version. In other places, where the > only change was uppercasing, too, please. Understood. > >> sql: modify TRIM() function signature >> >> According to the ANSI standard, ltrim, rtrim and trim should >> be merged into one unified TRIM() function. The specialization of >> trimming (left, right or both and trimming characters) determined >> in arguments of this function. >> >> Closes #3879 >> >> diff --git a/src/box/sql/func.c b/src/box/sql/func.c >> index abeecefa1..ac52ddda2 100644 >> --- a/src/box/sql/func.c >> +++ b/src/box/sql/func.c >> @@ -1286,108 +1286,183 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) >> sql_result_text(context, (char *)zOut, j, sql_free); >> } >> >> -/* >> - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. >> - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. >> +/** >> + * Remove chars included into @a trimming set from @a input_str. > > 2. There is no parameter named 'trimming'. According to > doxygen documentation, @a takes one word as an argument > and it is usually the function argument's name. > http://www.doxygen.nl/manual/commands.html#cmda > > Please, prune old parameter names from the comments. +/** + * Remove characters included in @a trim_set from @a input_str + * until encounter a character that doesn't belong to @a trim_set. + * Remove from the side specified by @a flags. + * @param context SQL context. + * @param flags Trim specification: left, right or both. + * @param trim_set The set of characters for trimming. + * @param trim_set_sz Character set size in bytes. + * @param input_str Input string for trimming. + * @param input_str_sz Input string size in bytes. */ >> { >> - const unsigned char *zIn; /* Input string */ >> - const unsigned char *zCharSet; /* Set of characters to trim */ >> - int nIn; /* Number of bytes in input */ >> - int flags; /* 1: trimleft 2: trimright 3: trim */ >> - int i; /* Loop counter */ >> - unsigned char *aLen = 0; /* Length of each character in zCharSet */ >> - unsigned char **azChar = 0; /* Individual characters in zCharSet */ >> - int nChar; /* Number of characters in zCharSet */ >> + int i; >> + /* Length of each character in the character set. */ >> + char unsigned *char_len = 0; > > 4. You again ignored my comment about NULL. Please, find all other > places and fix it finally. I said it already 1000 times in 1000 > reviews - we do not use 0 for pointers. It is a simple rule. Just > follow it. Write it down somewhere in a list of code style rules > and check them all before sending a patch. > > Seeing how many my comments you repeatedly ignore, I think that > probably you should reconsider the way how you do self-reviews. If > you do it via just looking a couple of seconds at the code in the > text editor, then it is definitely a bad way. > > First of all, use 'git diff/show' in console to look only at the > patch changes, not at the entire files and functions. If you do not > like console, and it is ok, then you can use Sublime Merge > desktop program or Sublime Git package for the editor. When you > look at the diff only, it is much simpler to notice such violations > and even bugs. + int char_cnt = sql_utf8_char_count(z, trim_set_sz); + if (char_cnt == 0) + unsigned char **ind_chars = + contextMalloc(context, + char_cnt * (sizeof(unsigned char *) + 1)); + if (ind_chars == NULL) + int i = 0; + char_cnt = 0; + if ((input_str = sql_value_text(argv[0])) == NULL) + return; + if ((input_str = sql_value_text(argv[1])) == NULL) + return; + } else if ((trim_set = sql_value_text(argv[0])) != NULL) { + int trim_set_sz = sql_value_bytes(argv[0]); + if ((input_str = sql_value_text(argv[2])) == NULL || + (trim_set = sql_value_text(argv[1])) == NULL) + return; > 5. In our code style we do not use 'char' to represent numbers, we > use 'uint8_t' or 'int8_t' when we want to use one-byte numbers. It > is the same as 'char'/'unsigned char', but looks shorter and it > becomes obvious that these values are used as numbers, not text. > Firstly I thought that char_len was an array of characters, but > it emerged being an array of symbol sizes. In the summary, I > suggest to use 'uint8_t *' for char_len array. + uint8_t *char_len = (uint8_t *)&ind_chars[char_cnt]; >> + /* Individual characters in the character set. */ >> + char unsigned **ind_chars = 0; > > 6. If you declare it as 'const char unsigned **', then you > can remove unnecessary type cast from line 1330. If you meant the following line "ind_chars[char_cnt] = (unsigned char *)(z + i);” then it isn’t compiled without the cast, because of assigning to "'unsigned char *’ from 'const unsigned char *’”. > 7. Normally, we do not reorder 'unsigned' and 'char/int/long'. > > char unsigned -> unsigned char Sorry. Didn’t notice. >> >> - if (sql_value_type(argv[0]) == SQL_NULL) { >> - return; >> - } >> - zIn = sql_value_text(argv[0]); >> - if (zIn == 0) >> - return; >> - nIn = sql_value_bytes(argv[0]); >> - assert(zIn == sql_value_text(argv[0])); >> - if (argc == 1) { >> - static const unsigned char lenOne[] = { 1 }; >> - static unsigned char *const azOne[] = { (u8 *) " " }; >> - nChar = 1; >> - aLen = (u8 *) lenOne; >> - azChar = (unsigned char **)azOne; >> - zCharSet = 0; >> - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { >> - return; >> - } else { >> - const unsigned char *z = zCharSet; >> - int trim_set_sz = sql_value_bytes(argv[1]); >> - /* >> - * Count the number of UTF-8 characters passing >> - * through the entire char set, but not up >> - * to the '\0' or X'00' character. This allows >> - * to handle trimming set containing such >> - * characters. >> - */ >> - nChar = sql_utf8_char_count(z, trim_set_sz); >> - if (nChar > 0) { >> - azChar = >> - contextMalloc(context, >> - ((i64) nChar) * (sizeof(char *) + 1)); >> - if (azChar == 0) { >> - return; >> - } >> - aLen = (unsigned char *)&azChar[nChar]; >> - z = zCharSet; >> - i = 0; >> - nChar = 0; >> - int handled_bytes_cnt = trim_set_sz; >> - while(handled_bytes_cnt > 0) { >> - azChar[nChar] = (unsigned char *)(z + i); >> - SQL_UTF8_FWD_1(z, i, trim_set_sz); >> - aLen[nChar] = (u8) (z + i - azChar[nChar]); >> - handled_bytes_cnt -= aLen[nChar]; >> - nChar++; >> - } >> + const unsigned char *z = trim_set; >> + /* >> + * Count the number of UTF-8 characters passing through >> + * the entire char set, but not up to the '\0' or X'00' >> + * character. This allows to handle trimming set >> + * containing such characters. >> + */ >> + int char_cnt = sql_utf8_char_count(z, trim_set_sz); >> + if (char_cnt > 0) { >> + ind_chars = >> + contextMalloc(context, >> + ((i64) char_cnt) * > 8. Why do you need that cast to 'i64'? Anyway you access that memory by > 'int' indexes in the next lines. Please, remove it. + unsigned char **ind_chars = + contextMalloc(context, + char_cnt * (sizeof(unsigned char *) + 1)); > >> + (sizeof(unsigned char *) + 1)); >> + if (ind_chars == 0) >> + return; >> + char_len = (unsigned char *)&ind_chars[char_cnt]; >> + z = trim_set; >> + i = 0; >> + char_cnt = 0; >> + int handled_bytes_cnt = trim_set_sz; >> + while(handled_bytes_cnt > 0) { >> + ind_chars[char_cnt] = (unsigned char *)(z + i); >> + SQL_UTF8_FWD_1(z, i, trim_set_sz); >> + char_len[char_cnt] = (u8) (z + i - ind_chars[char_cnt]); > 9. Why do you need that cast to 'u8'? 'u8' == 'unsigned char', and the > type of that expression is already 'unsigned char’. + char_len[char_cnt] = z + i - ind_chars[char_cnt]; >> + handled_bytes_cnt -= char_len[char_cnt]; >> + char_cnt++; >> } >> } >> - if (nChar > 0) { >> - flags = SQL_PTR_TO_INT(sql_user_data(context)); >> - if (flags & 1) { >> - while (nIn > 0) { >> + if (char_cnt > 0) { > 10. Indentation next 33 lines is huge and they are followed by > just one 2-line function call. Just do 'goto result' here if > char_cnt == 0 and reduce the indentation. The same can be done at > line 1317 in order to reduce indentation of next 17 lines. + if (char_cnt == 0) + goto result; + /* Individual characters in the character set. */ + unsigned char **ind_chars = + contextMalloc(context, + char_cnt * (sizeof(unsigned char *) + 1)); + if (ind_chars == NULL) return; + if (char_cnt == 0) + goto result; + if ((flags & TRIM_LEADING) != 0) { + while (input_str_sz > 0) { I have never used this operator before. >> + if ((flags & TRIM_LEADING) != 0) { >> + while (input_str_sz > 0) { >> int len = 0; >> - for (i = 0; i < nChar; i++) { >> - len = aLen[i]; >> - if (len <= nIn >> - && memcmp(zIn, azChar[i], len) == 0) >> + for (i = 0; i < char_cnt; i++) { >> + len = char_len[i]; >> + if (len <= input_str_sz >> + && memcmp(input_str, >> + ind_chars[i], len) == 0) >> break; >> } >> - if (i >= nChar) >> + if (i >= char_cnt) >> break; >> - zIn += len; >> - nIn -= len; >> + input_str += len; >> + input_str_sz -= len; >> } >> } >> - if (flags & 2) { >> - while (nIn > 0) { >> + if ((flags & TRIM_TRAILING) != 0) { >> + while (input_str_sz > 0) { >> int len = 0; >> - for (i = 0; i < nChar; i++) { >> - len = aLen[i]; >> - if (len <= nIn >> - && memcmp(&zIn[nIn - len], >> - azChar[i], len) == 0) >> + for (i = 0; i < char_cnt; i++) { >> + len = char_len[i]; >> + if (len <= input_str_sz >> + && memcmp(&input_str[input_str_sz - len], >> + ind_chars[i], len) == 0) > > 11. Out of 80. And you saw that in your editor, even without > 'git diff' and console, because you have 80-rulers. So why did > you decide not to fix it? I often saw same instances in the Tarantool’s code, when few characters is out of 80. In my case, I just didn’t know how to fix that. >> break; >> } >> - if (i >= nChar) >> + if (i >= char_cnt) >> break; >> - nIn -= len; >> + input_str_sz -= len; >> } >> } >> - if (zCharSet) { >> - sql_free(azChar); >> - } >> + if (trim_set_sz != 0) >> + sql_free(ind_chars); >> + } >> + sql_result_text(context, (char *)input_str, input_str_sz, >> + SQL_TRANSIENT); >> +} >> + >> +/** >> + * Normalize args from @a argv input array when it has one arg >> + * only. >> + * >> + * Case: TRIM(<str>) >> + * Call trimming procedure with TRIM_BOTH as the flags and " " as >> + * the trimming set. >> + * >> + * @param context SQL context. > > 12. As I said in the previous reviews, we either omit doxygen > formal style completely, or use it correctly. If you want to use > doxygen, please, describe all the 3 parameters. If you do not > want, then omit @param/@retval section. The same for other places. +/** + * Normalize args from @a argv input array when it has one arg + * only. + * + * Case: TRIM(<str>) + * Call trimming procedure with TRIM_BOTH as the flags and " " as + * the trimming set. + */ +static void +trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv) +/** + * Normalize args from @a argv input array when it has two args. + * + * Case: TRIM(<character_set> FROM <str>) + * If user has specified <character_set> only, call trimming + * procedure with TRIM_BOTH as the flags and that trimming set. + * + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) + * If user has specified side keyword only, then call trimming + * procedure with the specified side and " " as the trimming set. + */ +static void +trim_func_two_args(struct sql_context *context, int argc, sql_value **argv) +/** + * Normalize args from @a argv input array when it has three args. + * + * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>) + * If user has specified side keyword and <character_set>, then + * call trimming procedure with that args. + */ +static void +trim_func_three_args(struct sql_context *context, int argc, sql_value **argv) >> + */ >> +static void >> +trim_func_one_arg(sql_context * context, int argc, sql_value **argv) > > 13. In new code we use explicit 'struct' keyword for struct > types - sql_context and sql_value. Also, we do not put whitepaces > after '*' when declare a pointer type value. The same for other places. +trim_procedure(struct sql_context *context, enum trim_side_mask flags, + const unsigned char *trim_set, int trim_set_sz, + const unsigned char *input_str, int input_str_sz) +static void +trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv) +static void +trim_func_two_args(struct sql_context *context, int argc, sql_value **argv) +static void +trim_func_three_args(struct sql_context *context, int argc, sql_value **argv) >> +/** >> + * Normalize args from @a argv input array when it has two args. >> + * >> + * Case: TRIM(<character_set> FROM <str>) >> + * If user has specified <character_set> only, call trimming >> + * procedure with TRIM_BOTH as the flags and that trimming set. >> + * >> + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) >> + * If user has specified side keyword only, then call trimming >> + * procedure with the specified side and " " as the trimming set. >> + * >> + * @param context SQL context. >> + */ >> +static void >> +trim_func_two_args(sql_context * context, int argc, sql_value **argv) >> +{ >> + const unsigned char *input_str; >> + assert(argc == 2); >> + (void) argc; >> + >> + if ((input_str = sql_value_text(argv[1])) == NULL) >> + return; >> + int input_str_sz = sql_value_bytes(argv[1]); >> + >> + const char unsigned *trim_set; >> + if (sql_value_type(argv[0]) == SQL_INTEGER) { >> + trim_procedure(context, sql_value_int(argv[0]), >> + (const unsigned char *) " ", 1, >> + input_str, input_str_sz); >> + } else if ((trim_set = sql_value_text(argv[0])) == NULL) { >> + return; >> + } else { > > 14. Please, apply. > > @@ -1427,9 +1427,7 @@ trim_func_two_args(sql_context * context, int argc, sql_value **argv) > trim_procedure(context, sql_value_int(argv[0]), > (const unsigned char *) " ", 1, > input_str, input_str_sz); > - } else if ((trim_set = sql_value_text(argv[0])) == NULL) { > - return; > - } else { > + } else if ((trim_set = sql_value_text(argv[0])) != NULL) { > int trim_set_sz = sql_value_bytes(argv[0]); > trim_procedure(context, TRIM_BOTH, trim_set, trim_set_sz, > input_str, input_str_sz); +static void +trim_func_two_args(struct sql_context *context, int argc, sql_value **argv) +{ + assert(argc == 2); + (void) argc; + + const unsigned char *input_str; + if ((input_str = sql_value_text(argv[1])) == NULL) + return; + + int input_str_sz = sql_value_bytes(argv[1]); + const unsigned char *trim_set; + if (sql_value_type(argv[0]) == SQL_INTEGER) { + trim_procedure(context, sql_value_int(argv[0]), + (const unsigned char *) " ", 1, + input_str, input_str_sz); + } else if ((trim_set = sql_value_text(argv[0])) != NULL) { + int trim_set_sz = sql_value_bytes(argv[0]); + trim_procedure(context, TRIM_BOTH, trim_set, trim_set_sz, + input_str, input_str_sz); + } +} >> + int trim_set_sz = sql_value_bytes(argv[0]); >> + trim_procedure(context, TRIM_BOTH, trim_set, trim_set_sz, >> + input_str, input_str_sz); >> + } >> +} >> + >> +/** >> + * Normalize args from @a argv input array when it has three args. >> + * >> + * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>) >> + * If user has specified side keyword and <character_set>, then >> + * call trimming procedure with that args. >> + * >> + * @param context SQL context. >> + */ >> +static void >> +trim_func_three_args(sql_context * context, int argc, sql_value **argv) >> +{ >> + const unsigned char *input_str; >> + assert(argc == 3); >> + assert(sql_value_type(argv[0]) == SQL_INTEGER); >> + (void) argc; >> + >> + if ((input_str = sql_value_text(argv[2])) == NULL) >> + return; >> + int input_str_sz = sql_value_bytes(argv[2]); >> + >> + const char unsigned *trim_set; >> + if ((trim_set = sql_value_text(argv[1])) != NULL) { >> + int trim_set_sz = sql_value_bytes(argv[1]); >> + trim_procedure(context, sql_value_int(argv[0]), trim_set, >> + trim_set_sz, input_str, input_str_sz); >> } >> - sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT); > > 15. Please, apply. > > @@ -1448,21 +1446,18 @@ trim_func_two_args(sql_context * context, int argc, sql_value **argv) > static void > trim_func_three_args(sql_context * context, int argc, sql_value **argv) > { > - const unsigned char *input_str; > + const unsigned char *input_str, *trim_set; > assert(argc == 3); > assert(sql_value_type(argv[0]) == SQL_INTEGER); > (void) argc; > > - if ((input_str = sql_value_text(argv[2])) == NULL) > + if ((input_str = sql_value_text(argv[2])) == NULL || > + (trim_set = sql_value_text(argv[1])) == NULL) > return; > int input_str_sz = sql_value_bytes(argv[2]); > - > - const char unsigned *trim_set; > - if ((trim_set = sql_value_text(argv[1])) != NULL) { > - int trim_set_sz = sql_value_bytes(argv[1]); > - trim_procedure(context, sql_value_int(argv[0]), trim_set, > - trim_set_sz, input_str, input_str_sz); > - } > + int trim_set_sz = sql_value_bytes(argv[1]); > + trim_procedure(context, sql_value_int(argv[0]), trim_set, trim_set_sz, > + input_str, input_str_sz); > } +static void +trim_func_three_args(struct sql_context *context, int argc, sql_value **argv) +{ + assert(argc == 3); + (void) argc; + + assert(sql_value_type(argv[0]) == SQL_INTEGER); + const unsigned char *input_str, *trim_set; + if ((input_str = sql_value_text(argv[2])) == NULL || + (trim_set = sql_value_text(argv[1])) == NULL) + return; + + int trim_set_sz = sql_value_bytes(argv[1]); + int input_str_sz = sql_value_bytes(argv[2]); + trim_procedure(context, sql_value_int(argv[0]), trim_set, trim_set_sz, + input_str, input_str_sz); } >> diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y >> index 099daf512..b49638d44 100644 >> --- a/src/box/sql/parse.y >> +++ b/src/box/sql/parse.y >> + >> +%type expr_optional {struct Expr *} >> +%destructor expr_optional {sql_expr_delete(pParse->db, $$, false);} >> + >> +expr_optional(A) ::= . { A = NULL; } >> +expr_optional(A) ::= expr(X). { A = X.pExpr; } >> + >> +%type trim_specification {int} > > 16. It is not int - it is enum trim_side_mask. +%type trim_specification {enum trim_side_mask} commit 9ae7a84bc59ded41302bde1dca0d0f82d540b960 Author: Roman Khabibov <roman.habibov@tarantool.org> Date: Thu Mar 28 14:01:33 2019 +0300 sql: modify TRIM() function signature According to the ANSI standard, ltrim, rtrim and trim should be merged into one unified TRIM() function. The specialization of trimming (left, right or both and trimming characters) determined in arguments of this function. Closes #3879 diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c index be7bd5545..76e3265e7 100644 --- a/extra/mkkeywordhash.c +++ b/extra/mkkeywordhash.c @@ -278,6 +278,10 @@ static Keyword aKeywordTable[] = { { "WHILE", "TK_STANDARD", RESERVED, true }, { "TEXT", "TK_TEXT", RESERVED, true }, { "TRUNCATE", "TK_TRUNCATE", ALWAYS, true }, + { "TRIM", "TK_TRIM", ALWAYS, true }, + { "LEADING", "TK_LEADING", ALWAYS, true }, + { "TRAILING", "TK_TRAILING", ALWAYS, true }, + { "BOTH", "TK_BOTH", ALWAYS, true }, }; /* Number of keywords */ diff --git a/src/box/sql/func.c b/src/box/sql/func.c index abeecefa1..6f2a5e3f6 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1286,108 +1286,173 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) sql_result_text(context, (char *)zOut, j, sql_free); } -/* - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. +/** + * Remove characters included in @a trim_set from @a input_str + * until encounter a character that doesn't belong to @a trim_set. + * Remove from the side specified by @a flags. + * @param context SQL context. + * @param flags Trim specification: left, right or both. + * @param trim_set The set of characters for trimming. + * @param trim_set_sz Character set size in bytes. + * @param input_str Input string for trimming. + * @param input_str_sz Input string size in bytes. */ static void -trimFunc(sql_context * context, int argc, sql_value ** argv) +trim_procedure(struct sql_context *context, enum trim_side_mask flags, + const unsigned char *trim_set, int trim_set_sz, + const unsigned char *input_str, int input_str_sz) { - const unsigned char *zIn; /* Input string */ - const unsigned char *zCharSet; /* Set of characters to trim */ - int nIn; /* Number of bytes in input */ - int flags; /* 1: trimleft 2: trimright 3: trim */ - int i; /* Loop counter */ - unsigned char *aLen = 0; /* Length of each character in zCharSet */ - unsigned char **azChar = 0; /* Individual characters in zCharSet */ - int nChar; /* Number of characters in zCharSet */ - - if (sql_value_type(argv[0]) == SQL_NULL) { + const unsigned char *z = trim_set; + /* + * Count the number of UTF-8 characters passing through + * the entire char set, but not up to the '\0' or X'00' + * character. This allows to handle trimming set + * containing such characters. + */ + int char_cnt = sql_utf8_char_count(z, trim_set_sz); + if (char_cnt == 0) + goto result; + /* Individual characters in the character set. */ + unsigned char **ind_chars = + contextMalloc(context, + char_cnt * (sizeof(unsigned char *) + 1)); + if (ind_chars == NULL) return; + /* Length of each character in the character set. */ + uint8_t *char_len = (uint8_t *)&ind_chars[char_cnt]; + z = trim_set; + int i = 0; + char_cnt = 0; + int handled_bytes_cnt = trim_set_sz; + while(handled_bytes_cnt > 0) { + ind_chars[char_cnt] = (unsigned char *)(z + i); + SQL_UTF8_FWD_1(z, i, trim_set_sz); + char_len[char_cnt] = z + i - ind_chars[char_cnt]; + handled_bytes_cnt -= char_len[char_cnt]; + char_cnt++; } - zIn = sql_value_text(argv[0]); - if (zIn == 0) - return; - nIn = sql_value_bytes(argv[0]); - assert(zIn == sql_value_text(argv[0])); - if (argc == 1) { - static const unsigned char lenOne[] = { 1 }; - static unsigned char *const azOne[] = { (u8 *) " " }; - nChar = 1; - aLen = (u8 *) lenOne; - azChar = (unsigned char **)azOne; - zCharSet = 0; - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { - return; - } else { - const unsigned char *z = zCharSet; - int trim_set_sz = sql_value_bytes(argv[1]); - /* - * Count the number of UTF-8 characters passing - * through the entire char set, but not up - * to the '\0' or X'00' character. This allows - * to handle trimming set containing such - * characters. - */ - nChar = sql_utf8_char_count(z, trim_set_sz); - if (nChar > 0) { - azChar = - contextMalloc(context, - ((i64) nChar) * (sizeof(char *) + 1)); - if (azChar == 0) { - return; - } - aLen = (unsigned char *)&azChar[nChar]; - z = zCharSet; - i = 0; - nChar = 0; - int handled_bytes_cnt = trim_set_sz; - while(handled_bytes_cnt > 0) { - azChar[nChar] = (unsigned char *)(z + i); - SQL_UTF8_FWD_1(z, i, trim_set_sz); - aLen[nChar] = (u8) (z + i - azChar[nChar]); - handled_bytes_cnt -= aLen[nChar]; - nChar++; - } - } - } - if (nChar > 0) { - flags = SQL_PTR_TO_INT(sql_user_data(context)); - if (flags & 1) { - while (nIn > 0) { - int len = 0; - for (i = 0; i < nChar; i++) { - len = aLen[i]; - if (len <= nIn - && memcmp(zIn, azChar[i], len) == 0) - break; - } - if (i >= nChar) + + if (char_cnt == 0) + goto result; + if ((flags & TRIM_LEADING) != 0) { + while (input_str_sz > 0) { + int len = 0; + for (i = 0; i < char_cnt; i++) { + len = char_len[i]; + if (len <= input_str_sz + && memcmp(input_str, + ind_chars[i], len) == 0) break; - zIn += len; - nIn -= len; } + if (i >= char_cnt) + break; + input_str += len; + input_str_sz -= len; } - if (flags & 2) { - while (nIn > 0) { - int len = 0; - for (i = 0; i < nChar; i++) { - len = aLen[i]; - if (len <= nIn - && memcmp(&zIn[nIn - len], - azChar[i], len) == 0) - break; - } - if (i >= nChar) + } + if ((flags & TRIM_TRAILING) != 0) { + while (input_str_sz > 0) { + int len = 0; + for (i = 0; i < char_cnt; i++) { + len = char_len[i]; + if (len <= input_str_sz + && memcmp(&input_str[input_str_sz - len], + ind_chars[i], len) == 0) break; - nIn -= len; } - } - if (zCharSet) { - sql_free(azChar); + if (i >= char_cnt) + break; + input_str_sz -= len; } } - sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT); + + if (trim_set_sz != 0) + sql_free(ind_chars); + + result: sql_result_text(context, (char *)input_str, input_str_sz, + SQL_TRANSIENT); +} + +/** + * Normalize args from @a argv input array when it has one arg + * only. + * + * Case: TRIM(<str>) + * Call trimming procedure with TRIM_BOTH as the flags and " " as + * the trimming set. + */ +static void +trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv) +{ + assert(argc == 1); + (void) argc; + + const unsigned char *input_str; + if ((input_str = sql_value_text(argv[0])) == NULL) + return; + + int input_str_sz = sql_value_bytes(argv[0]); + trim_procedure(context, TRIM_BOTH, (const unsigned char *) " ", + 1, input_str, input_str_sz); +} + +/** + * Normalize args from @a argv input array when it has two args. + * + * Case: TRIM(<character_set> FROM <str>) + * If user has specified <character_set> only, call trimming + * procedure with TRIM_BOTH as the flags and that trimming set. + * + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) + * If user has specified side keyword only, then call trimming + * procedure with the specified side and " " as the trimming set. + */ +static void +trim_func_two_args(struct sql_context *context, int argc, sql_value **argv) +{ + assert(argc == 2); + (void) argc; + + const unsigned char *input_str; + if ((input_str = sql_value_text(argv[1])) == NULL) + return; + + int input_str_sz = sql_value_bytes(argv[1]); + const unsigned char *trim_set; + if (sql_value_type(argv[0]) == SQL_INTEGER) { + trim_procedure(context, sql_value_int(argv[0]), + (const unsigned char *) " ", 1, + input_str, input_str_sz); + } else if ((trim_set = sql_value_text(argv[0])) != NULL) { + int trim_set_sz = sql_value_bytes(argv[0]); + trim_procedure(context, TRIM_BOTH, trim_set, trim_set_sz, + input_str, input_str_sz); + } +} + +/** + * Normalize args from @a argv input array when it has three args. + * + * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>) + * If user has specified side keyword and <character_set>, then + * call trimming procedure with that args. + */ +static void +trim_func_three_args(struct sql_context *context, int argc, sql_value **argv) +{ + assert(argc == 3); + (void) argc; + + assert(sql_value_type(argv[0]) == SQL_INTEGER); + const unsigned char *input_str, *trim_set; + if ((input_str = sql_value_text(argv[2])) == NULL || + (trim_set = sql_value_text(argv[1])) == NULL) + return; + + int trim_set_sz = sql_value_bytes(argv[1]); + int input_str_sz = sql_value_bytes(argv[2]); + trim_procedure(context, sql_value_int(argv[0]), trim_set, trim_set_sz, + input_str, input_str_sz); } #ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION @@ -1818,12 +1883,9 @@ sqlRegisterBuiltinFunctions(void) FIELD_TYPE_INTEGER), FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY, FIELD_TYPE_INTEGER), - FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc), - FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc), - FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc), - FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc), - FUNCTION_COLL(trim, 1, 3, 0, trimFunc), - FUNCTION_COLL(trim, 2, 3, 0, trimFunc), + FUNCTION_COLL(trim, 1, 3, 0, trim_func_one_arg), + FUNCTION_COLL(trim, 2, 3, 0, trim_func_two_args), + FUNCTION_COLL(trim, 3, 3, 0, trim_func_three_args), FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR), FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR), AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize, diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index 099daf512..a56ce7a10 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -1032,6 +1032,55 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); } %endif SQL_OMIT_CAST + +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { + A.pExpr = sqlExprFunction(pParse, Y, &X); + spanSet(&A, &X, &E); +} + +%type trim_operands {struct ExprList *} +%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);} + +trim_operands(A) ::= trim_from_clause(F) expr(Y). { + A = sql_expr_list_append(pParse->db, F, Y.pExpr); +} + +trim_operands(A) ::= expr(Y). { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} + +%type trim_from_clause {struct ExprList *} +%destructor trim_from_clause {sql_expr_list_delete(pParse->db, $$);} + +/* + * The following two rules cover three cases of keyword + * (LEADING/TRAILING/BOTH) and <trim_character_set> combination. + * The case when both of them are absent is disallowed. + */ +trim_from_clause(A) ::= expr(Y) FROM. { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} + +trim_from_clause(A) ::= trim_specification(N) expr_optional(Y) FROM. { + struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, + &sqlIntTokens[N]); + A = sql_expr_list_append(pParse->db, NULL, p); + if (Y != NULL) + A = sql_expr_list_append(pParse->db, A, Y); +} + +%type expr_optional {struct Expr *} +%destructor expr_optional {sql_expr_delete(pParse->db, $$, false);} + +expr_optional(A) ::= . { A = NULL; } +expr_optional(A) ::= expr(X). { A = X.pExpr; } + +%type trim_specification {enum trim_side_mask} + +trim_specification(A) ::= LEADING. { A = TRIM_LEADING; } +trim_specification(A) ::= TRAILING. { A = TRIM_TRAILING; } +trim_specification(A) ::= BOTH. { A = TRIM_BOTH; } + expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). { if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){ const char *err = @@ -1294,7 +1343,7 @@ expr(A) ::= EXISTS(B) LP select(Y) RP(E). { } /* CASE expressions */ -expr(A) ::= CASE(C) case_operand(X) case_exprlist(Y) case_else(Z) END(E). { +expr(A) ::= CASE(C) expr_optional(X) case_exprlist(Y) case_else(Z) END(E). { spanSet(&A,&C,&E); /*A-overwrites-C*/ A.pExpr = sqlPExpr(pParse, TK_CASE, X, 0); if( A.pExpr ){ @@ -1319,10 +1368,6 @@ case_exprlist(A) ::= WHEN expr(Y) THEN expr(Z). { %destructor case_else {sql_expr_delete(pParse->db, $$, false);} case_else(A) ::= ELSE expr(X). {A = X.pExpr;} case_else(A) ::= . {A = 0;} -%type case_operand {Expr*} -%destructor case_operand {sql_expr_delete(pParse->db, $$, false);} -case_operand(A) ::= expr(X). {A = X.pExpr; /*A-overwrites-X*/} -case_operand(A) ::= . {A = 0;} %type exprlist {ExprList*} %destructor exprlist {sql_expr_list_delete(pParse->db, $$);} diff --git a/src/box/sql/parse_def.c b/src/box/sql/parse_def.c index 49c76a326..aa1323cb2 100644 --- a/src/box/sql/parse_def.c +++ b/src/box/sql/parse_def.c @@ -34,7 +34,9 @@ const struct Token sqlIntTokens[] = { {"0", 1, false}, - {"1", 1, false} + {"1", 1, false}, + {"2", 1, false}, + {"3", 1, false}, }; void diff --git a/src/box/sql/parse_def.h b/src/box/sql/parse_def.h index a1af2bacd..5899a7e4e 100644 --- a/src/box/sql/parse_def.h +++ b/src/box/sql/parse_def.h @@ -87,7 +87,7 @@ struct Token { bool isReserved; }; -/** Constant tokens for values 0 and 1. */ +/** Constant tokens for integer values. */ extern const struct Token sqlIntTokens[]; /** Generate a Token object from a string. */ diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index b322602dc..d5a3e15c1 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -1680,6 +1680,17 @@ struct FuncDestructor { * single query - might change over time */ +/* + * Trim side mask components. TRIM_LEADING means to trim left side + * only. TRIM_TRAILING is to trim right side only. TRIM_BOTH is to + * trim both sides. + */ +enum trim_side_mask { + TRIM_LEADING = 1, + TRIM_TRAILING = 2, + TRIM_BOTH = TRIM_LEADING | TRIM_TRAILING +}; + /* * The following three macros, FUNCTION(), LIKEFUNC() and AGGREGATE() are * used to create the initializers for the FuncDef structures. diff --git a/test/sql-tap/badutf1.test.lua b/test/sql-tap/badutf1.test.lua index d104efaa9..9079dfe25 100755 --- a/test/sql-tap/badutf1.test.lua +++ b/test/sql-tap/badutf1.test.lua @@ -302,7 +302,7 @@ test:do_test( test:do_test( "badutf-4.1", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(TRIM('\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.1> "X", "F0" @@ -312,7 +312,7 @@ test:do_test( test:do_test( "badutf-4.2", function() - return test:execsql2("SELECT hex(ltrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(TRIM(LEADING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.2> "X", "F0808080FF" @@ -322,7 +322,7 @@ test:do_test( test:do_test( "badutf-4.3", function() - return test:execsql2("SELECT hex(rtrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(TRIM(TRAILING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.3> "X", "808080F0" @@ -332,7 +332,7 @@ test:do_test( test:do_test( "badutf-4.4", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.4> "X", "808080F0808080FF" @@ -342,7 +342,7 @@ test:do_test( test:do_test( "badutf-4.5", function() - return test:execsql2("SELECT hex(trim('\xff\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\xff\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.5> "X", "80F0808080FF" @@ -352,7 +352,7 @@ test:do_test( test:do_test( "badutf-4.6", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.6> "X", "F0808080FF" @@ -362,7 +362,7 @@ test:do_test( test:do_test( "badutf-4.7", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.7> "X", "FF80F0808080FF" diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua index 251cc3534..fe9a98191 100755 --- a/test/sql-tap/func.test.lua +++ b/test/sql-tap/func.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(14586) +test:plan(14590) --!./tcltestrunner.lua -- 2001 September 15 @@ -1915,7 +1915,7 @@ test:do_catchsql_test( SELECT trim(1,2,3) ]], { -- <func-22.1> - 1, "wrong number of arguments to function TRIM()" + 1, "Syntax error near ','" -- </func-22.1> }) @@ -1925,7 +1925,7 @@ test:do_catchsql_test( SELECT ltrim(1,2,3) ]], { -- <func-22.2> - 1, "wrong number of arguments to function LTRIM()" + 1, "Function 'LTRIM' does not exist" -- </func-22.2> }) @@ -1935,7 +1935,7 @@ test:do_catchsql_test( SELECT rtrim(1,2,3) ]], { -- <func-22.3> - 1, "wrong number of arguments to function RTRIM()" + 1, "Function 'RTRIM' does not exist" -- </func-22.3> }) @@ -1952,7 +1952,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.5", [[ - SELECT ltrim(' hi '); + SELECT TRIM(LEADING FROM ' hi '); ]], { -- <func-22.5> "hi " @@ -1962,7 +1962,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.6", [[ - SELECT rtrim(' hi '); + SELECT TRIM(TRAILING FROM ' hi '); ]], { -- <func-22.6> " hi" @@ -1972,7 +1972,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.7", [[ - SELECT trim(' hi ','xyz'); + SELECT TRIM('xyz' FROM ' hi '); ]], { -- <func-22.7> " hi " @@ -1982,7 +1982,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.8", [[ - SELECT ltrim(' hi ','xyz'); + SELECT TRIM(LEADING 'xyz' FROM ' hi '); ]], { -- <func-22.8> " hi " @@ -1992,7 +1992,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.9", [[ - SELECT rtrim(' hi ','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM ' hi '); ]], { -- <func-22.9> " hi " @@ -2002,7 +2002,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.10", [[ - SELECT trim('xyxzy hi zzzy','xyz'); + SELECT TRIM('xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.10> " hi " @@ -2012,7 +2012,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.11", [[ - SELECT ltrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(LEADING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.11> " hi zzzy" @@ -2022,7 +2022,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.12", [[ - SELECT rtrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.12> "xyxzy hi " @@ -2032,7 +2032,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.13", [[ - SELECT trim(' hi ',''); + SELECT TRIM('' FROM ' hi '); ]], { -- <func-22.13> " hi " @@ -2043,7 +2043,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.14", [[ - SELECT hex(trim(x'c280e1bfbff48fbfbf6869',x'6162e1bfbfc280')) + SELECT hex(TRIM(x'6162e1bfbfc280' FROM x'c280e1bfbff48fbfbf6869')) ]], { -- <func-22.14> "F48FBFBF6869" @@ -2052,8 +2052,8 @@ test:do_execsql_test( test:do_execsql_test( "func-22.15", - [[SELECT hex(trim(x'6869c280e1bfbff48fbfbf61', - x'6162e1bfbfc280f48fbfbf'))]], { + [[SELECT hex(TRIM(x'6162e1bfbfc280f48fbfbf' + FROM x'6869c280e1bfbff48fbfbf61'))]], { -- <func-22.15> "6869" -- </func-22.15> @@ -2062,7 +2062,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.16", [[ - SELECT hex(trim(x'ceb1ceb2ceb3',x'ceb1')); + SELECT hex(TRIM(x'ceb1' FROM x'ceb1ceb2ceb3')); ]], { -- <func-22.16> "CEB2CEB3" @@ -2083,7 +2083,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.21", [[ - SELECT typeof(trim(NULL,'xyz')); + SELECT typeof(TRIM('xyz' FROM NULL)); ]], { -- <func-22.21> "null" @@ -2093,7 +2093,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.22", [[ - SELECT typeof(trim('hello',NULL)); + SELECT typeof(TRIM(NULL FROM 'hello')); ]], { -- <func-22.22> "null" @@ -2105,7 +2105,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.23", [[ - SELECT TRIM(X'004100', X'00'); + SELECT TRIM(X'00' FROM X'004100'); ]], { -- <func-22.23> "A" @@ -2115,7 +2115,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.24", [[ - SELECT TRIM(X'004100', X'0000'); + SELECT TRIM(X'0000' FROM X'004100'); ]], { -- <func-22.24> "A" @@ -2125,7 +2125,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.25", [[ - SELECT TRIM(X'004100', X'0042'); + SELECT TRIM(X'0042' FROM X'004100'); ]], { -- <func-22.25> "A" @@ -2135,7 +2135,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.26", [[ - SELECT TRIM(X'00004100420000', X'00'); + SELECT TRIM(X'00' FROM X'00004100420000'); ]], { -- <func-22.26> "A\0B" @@ -2145,7 +2145,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.27", [[ - SELECT LTRIM(X'004100', X'00'); + SELECT TRIM(LEADING X'00' FROM X'004100'); ]], { -- <func-22.27> "A\0" @@ -2155,7 +2155,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.28", [[ - SELECT LTRIM(X'004100', X'0000'); + SELECT TRIM(LEADING X'0000' FROM X'004100'); ]], { -- <func-22.28> "A\0" @@ -2165,7 +2165,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.29", [[ - SELECT LTRIM(X'004100', X'0042'); + SELECT TRIM(LEADING X'0042' FROM X'004100'); ]], { -- <func-22.29> "A\0" @@ -2175,7 +2175,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.30", [[ - SELECT LTRIM(X'00004100420000', X'00'); + SELECT TRIM(LEADING X'00' FROM X'00004100420000'); ]], { -- <func-22.30> "A\0B\0\0" @@ -2185,7 +2185,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.31", [[ - SELECT RTRIM(X'004100', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'004100'); ]], { -- <func-22.31> "\0A" @@ -2195,7 +2195,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.32", [[ - SELECT RTRIM(X'004100', X'0000'); + SELECT TRIM(TRAILING X'0000' FROM X'004100'); ]], { -- <func-22.32> "\0A" @@ -2205,7 +2205,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.33", [[ - SELECT RTRIM(X'004100', X'0042'); + SELECT TRIM(TRAILING X'0042' FROM X'004100'); ]], { -- <func-22.33> "\0A" @@ -2215,13 +2215,56 @@ test:do_execsql_test( test:do_execsql_test( "func-22.34", [[ - SELECT RTRIM(X'00004100420000', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'00004100420000'); ]], { -- <func-22.34> "\0\0A\0B" -- </func-22.34> }) +-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and +-- FROM without any agrs before. LEADING and TRAILING keywords is +-- checked above. + +test:do_execsql_test( + "func-22.35", + [[ + SELECT TRIM(BOTH FROM ' hi '); + ]], { + -- <func-22.35> + "hi" + -- </func-22.35> + }) +test:do_execsql_test( + "func-22.36", + [[ + SELECT TRIM(BOTH 'xyz' FROM ' hi '); + ]], { + -- <func-22.36> + " hi " + -- </func-22.36> + }) + +test:do_execsql_test( + "func-22.37", + [[ + SELECT TRIM(BOTH 'xyz' FROM 'xyxzy hi zzzy'); + ]], { + -- <func-22.37> + " hi " + -- </func-22.37> + }) + +test:do_catchsql_test( + "func-22.38", + [[ + SELECT TRIM(FROM 'xyxzy'); + ]], { + -- <func-22.38> + 1, "Syntax error near 'FROM'" + -- </func-22.38> + }) + -- This is to test the deprecated sql_aggregate_count() API. -- --test:do_test( @@ -2838,16 +2881,16 @@ test:do_execsql_test( "SELECT TRIM(CHAR(32,00,32,00,32));", {string.char(00,32,00)}) --- LTRIM +-- LEFT TRIM test:do_execsql_test( "func-70", - "SELECT LTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(LEADING FROM CHAR(32,00,32,00,32));", {string.char(00,32,00,32)}) --- RTRIM +-- RIGHT TRIM test:do_execsql_test( "func-71", - "SELECT RTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(TRAILING FROM CHAR(32,00,32,00,32));", {string.char(32,00,32,00)}) -- GROUP_CONCAT diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua index 495aa4ee4..ec45e5e76 100755 --- a/test/sql-tap/with1.test.lua +++ b/test/sql-tap/with1.test.lua @@ -550,7 +550,7 @@ test:do_execsql_test("8.1-mandelbrot", [[ SELECT group_concat( substr(' .+*#', 1+min(iter/7,4), 1), '') FROM m2 GROUP BY cy ) - SELECT group_concat(rtrim(t),x'0a') FROM a; + SELECT group_concat(TRIM(TRAILING FROM t),x'0a') FROM a; ]], { -- <8.1-mandelbrot> [[ ....# ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-20 0:48 ` Roman Khabibov @ 2019-04-21 19:36 ` Vladislav Shpilevoy 2019-04-22 10:43 ` Vladislav Shpilevoy 2019-04-22 16:45 ` Roman Khabibov 0 siblings, 2 replies; 14+ messages in thread From: Vladislav Shpilevoy @ 2019-04-21 19:36 UTC (permalink / raw) To: Roman Khabibov, tarantool-patches Hi! Thanks for the patch! See 8 comments below. >> 4. You again ignored my comment about NULL. Please, find all other >> places and fix it finally. I said it already 1000 times in 1000 >> reviews - we do not use 0 for pointers. It is a simple rule. Just >> follow it. Write it down somewhere in a list of code style rules >> and check them all before sending a patch. >> >> Seeing how many my comments you repeatedly ignore, I think that >> probably you should reconsider the way how you do self-reviews. If >> you do it via just looking a couple of seconds at the code in the >> text editor, then it is definitely a bad way. >> >> First of all, use 'git diff/show' in console to look only at the >> patch changes, not at the entire files and functions. If you do not >> like console, and it is ok, then you can use Sublime Merge >> desktop program or Sublime Git package for the editor. When you >> look at the diff only, it is much simpler to notice such violations >> and even bugs. > + int char_cnt = sql_utf8_char_count(z, trim_set_sz); > + if (char_cnt == 0) > > + unsigned char **ind_chars = > + contextMalloc(context, > + char_cnt * (sizeof(unsigned char *) + 1)); > + if (ind_chars == NULL) > > + int i = 0; > + char_cnt = 0; > > + if ((input_str = sql_value_text(argv[0])) == NULL) > + return; > > + if ((input_str = sql_value_text(argv[1])) == NULL) > + return; > > + } else if ((trim_set = sql_value_text(argv[0])) != NULL) { > + int trim_set_sz = sql_value_bytes(argv[0]); > > + if ((input_str = sql_value_text(argv[2])) == NULL || > + (trim_set = sql_value_text(argv[1])) == NULL) > + return; 1. Sorry, but I do not understand this bunch of '+' and random lines above. What is it? It is not a diff - I do not see function names, '-', line numbers. Please, next time provide *full* normal diff obtained by 'git --no-pager diff' command. The same about all other 'diff's below. > >> 5. In our code style we do not use 'char' to represent numbers, we >> use 'uint8_t' or 'int8_t' when we want to use one-byte numbers. It >> is the same as 'char'/'unsigned char', but looks shorter and it >> becomes obvious that these values are used as numbers, not text. >> Firstly I thought that char_len was an array of characters, but >> it emerged being an array of symbol sizes. In the summary, I >> suggest to use 'uint8_t *' for char_len array. > + uint8_t *char_len = (uint8_t *)&ind_chars[char_cnt]; > >>> + /* Individual characters in the character set. */ >>> + char unsigned **ind_chars = 0; >> >> 6. If you declare it as 'const char unsigned **', then you >> can remove unnecessary type cast from line 1330. > If you meant the following line "ind_chars[char_cnt] = (unsigned char *)(z + i);” > then it isn’t compiled without the cast, because of assigning to "'unsigned char *’ > from 'const unsigned char *’”. 2. No, I meant literally the same what I said - if you declare 'ind_chars' as 'const unsigned char **' instead of 'unsigned char **', then you do not need this cast. It is a standard. Doubtfully it depends on compiler, and the diff below must work everywhere. Please, check it out. ========================================================================== diff --git a/src/box/sql/func.c b/src/box/sql/func.c index 6f2a5e3f6..9bd41335e 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1313,7 +1313,7 @@ trim_procedure(struct sql_context *context, enum trim_side_mask flags, if (char_cnt == 0) goto result; /* Individual characters in the character set. */ - unsigned char **ind_chars = + const unsigned char **ind_chars = contextMalloc(context, char_cnt * (sizeof(unsigned char *) + 1)); if (ind_chars == NULL) @@ -1325,7 +1325,7 @@ trim_procedure(struct sql_context *context, enum trim_side_mask flags, char_cnt = 0; int handled_bytes_cnt = trim_set_sz; while(handled_bytes_cnt > 0) { - ind_chars[char_cnt] = (unsigned char *)(z + i); + ind_chars[char_cnt] = z + i; SQL_UTF8_FWD_1(z, i, trim_set_sz); char_len[char_cnt] = z + i - ind_chars[char_cnt]; handled_bytes_cnt -= char_len[char_cnt]; ========================================================================== >>> + if ((flags & TRIM_LEADING) != 0) { >>> + while (input_str_sz > 0) { >>> int len = 0; >>> - for (i = 0; i < nChar; i++) { >>> - len = aLen[i]; >>> - if (len <= nIn >>> - && memcmp(zIn, azChar[i], len) == 0) >>> + for (i = 0; i < char_cnt; i++) { >>> + len = char_len[i]; >>> + if (len <= input_str_sz >>> + && memcmp(input_str, >>> + ind_chars[i], len) == 0) >>> break; >>> } >>> - if (i >= nChar) >>> + if (i >= char_cnt) >>> break; >>> - zIn += len; >>> - nIn -= len; >>> + input_str += len; >>> + input_str_sz -= len; >>> } >>> } >>> - if (flags & 2) { >>> - while (nIn > 0) { >>> + if ((flags & TRIM_TRAILING) != 0) { >>> + while (input_str_sz > 0) { >>> int len = 0; >>> - for (i = 0; i < nChar; i++) { >>> - len = aLen[i]; >>> - if (len <= nIn >>> - && memcmp(&zIn[nIn - len], >>> - azChar[i], len) == 0) >>> + for (i = 0; i < char_cnt; i++) { >>> + len = char_len[i]; >>> + if (len <= input_str_sz >>> + && memcmp(&input_str[input_str_sz - len], >>> + ind_chars[i], len) == 0) >> >> 11. Out of 80. And you saw that in your editor, even without >> 'git diff' and console, because you have 80-rulers. So why did >> you decide not to fix it? > I often saw same instances in the Tarantool’s code, when few characters is out of 80. 3. Where? If there are such places, then they violate our code style, and usually are pushed by those who does not care about it, but on the other hand sadly has rights to push without reviews directly into the master. > In my case, I just didn’t know how to fix that. 4. There are basically 3 ways to fix, always: 1) split into smaller functions; 2) add 'goto's; 3) reorganize the code so as to reduce number of checks. They are repetitive or unnecessary quite often. In this concrete case I think 'goto's are ok. >>> + */ >>> +static void >>> +trim_func_one_arg(sql_context * context, int argc, sql_value **argv) >> >> 13. In new code we use explicit 'struct' keyword for struct >> types - sql_context and sql_value. Also, we do not put whitepaces 5. Note, I said 'sql_value' too, and look at your code below. What have you missed? >> after '*' when declare a pointer type value. The same for other places. > +trim_procedure(struct sql_context *context, enum trim_side_mask flags, > + const unsigned char *trim_set, int trim_set_sz, > + const unsigned char *input_str, int input_str_sz) > > +static void > +trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv) > > +static void > +trim_func_two_args(struct sql_context *context, int argc, sql_value **argv) > > +static void > +trim_func_three_args(struct sql_context *context, int argc, sql_value **argv) > > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > index abeecefa1..6f2a5e3f6 100644 > --- a/src/box/sql/func.c > +++ b/src/box/sql/func.c > @@ -1286,108 +1286,173 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) > sql_result_text(context, (char *)zOut, j, sql_free); > } > > -/* > - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. > - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. > +/** > + * Remove characters included in @a trim_set from @a input_str > + * until encounter a character that doesn't belong to @a trim_set. > + * Remove from the side specified by @a flags. > + * @param context SQL context. > + * @param flags Trim specification: left, right or both. > + * @param trim_set The set of characters for trimming. > + * @param trim_set_sz Character set size in bytes. > + * @param input_str Input string for trimming. > + * @param input_str_sz Input string size in bytes. > */ > static void > -trimFunc(sql_context * context, int argc, sql_value ** argv) > +trim_procedure(struct sql_context *context, enum trim_side_mask flags, > + const unsigned char *trim_set, int trim_set_sz, > + const unsigned char *input_str, int input_str_sz) > { > - const unsigned char *zIn; /* Input string */ > - const unsigned char *zCharSet; /* Set of characters to trim */ > - int nIn; /* Number of bytes in input */ > - int flags; /* 1: trimleft 2: trimright 3: trim */ > - int i; /* Loop counter */ > - unsigned char *aLen = 0; /* Length of each character in zCharSet */ > - unsigned char **azChar = 0; /* Individual characters in zCharSet */ > - int nChar; /* Number of characters in zCharSet */ > - > - if (sql_value_type(argv[0]) == SQL_NULL) { > + const unsigned char *z = trim_set; > + /* > + * Count the number of UTF-8 characters passing through > + * the entire char set, but not up to the '\0' or X'00' > + * character. This allows to handle trimming set > + * containing such characters. > + */ > + int char_cnt = sql_utf8_char_count(z, trim_set_sz); > + if (char_cnt == 0) > + goto result; > + /* Individual characters in the character set. */ > + unsigned char **ind_chars = > + contextMalloc(context, > + char_cnt * (sizeof(unsigned char *) + 1)); > + if (ind_chars == NULL) > return; > + /* Length of each character in the character set. */ > + uint8_t *char_len = (uint8_t *)&ind_chars[char_cnt]; > + z = trim_set; > + int i = 0; > + char_cnt = 0; > + int handled_bytes_cnt = trim_set_sz; > + while(handled_bytes_cnt > 0) { > + ind_chars[char_cnt] = (unsigned char *)(z + i); > + SQL_UTF8_FWD_1(z, i, trim_set_sz); > + char_len[char_cnt] = z + i - ind_chars[char_cnt]; > + handled_bytes_cnt -= char_len[char_cnt]; > + char_cnt++; > } > - zIn = sql_value_text(argv[0]); > - if (zIn == 0) > - return; > - nIn = sql_value_bytes(argv[0]); > - assert(zIn == sql_value_text(argv[0])); > - if (argc == 1) { > - static const unsigned char lenOne[] = { 1 }; > - static unsigned char *const azOne[] = { (u8 *) " " }; > - nChar = 1; > - aLen = (u8 *) lenOne; > - azChar = (unsigned char **)azOne; > - zCharSet = 0; > - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { > - return; > - } else { > - const unsigned char *z = zCharSet; > - int trim_set_sz = sql_value_bytes(argv[1]); > - /* > - * Count the number of UTF-8 characters passing > - * through the entire char set, but not up > - * to the '\0' or X'00' character. This allows > - * to handle trimming set containing such > - * characters. > - */ > - nChar = sql_utf8_char_count(z, trim_set_sz); > - if (nChar > 0) { > - azChar = > - contextMalloc(context, > - ((i64) nChar) * (sizeof(char *) + 1)); > - if (azChar == 0) { > - return; > - } > - aLen = (unsigned char *)&azChar[nChar]; > - z = zCharSet; > - i = 0; > - nChar = 0; > - int handled_bytes_cnt = trim_set_sz; > - while(handled_bytes_cnt > 0) { > - azChar[nChar] = (unsigned char *)(z + i); > - SQL_UTF8_FWD_1(z, i, trim_set_sz); > - aLen[nChar] = (u8) (z + i - azChar[nChar]); > - handled_bytes_cnt -= aLen[nChar]; > - nChar++; > - } > - } > - } > - if (nChar > 0) { > - flags = SQL_PTR_TO_INT(sql_user_data(context)); > - if (flags & 1) { > - while (nIn > 0) { > - int len = 0; > - for (i = 0; i < nChar; i++) { > - len = aLen[i]; > - if (len <= nIn > - && memcmp(zIn, azChar[i], len) == 0) > - break; > - } > - if (i >= nChar) > + > + if (char_cnt == 0) > + goto result; 6. How is it possible, that first time sql_utf8_char_count() returned not 0, but now we got 0 on the same string using the same methods (SQL_UTF8_FWD_1)? > + if ((flags & TRIM_LEADING) != 0) { > + while (input_str_sz > 0) { > + int len = 0; > + for (i = 0; i < char_cnt; i++) { > + len = char_len[i]; > + if (len <= input_str_sz > + && memcmp(input_str, > + ind_chars[i], len) == 0) > break; > - zIn += len; > - nIn -= len; > } > + if (i >= char_cnt) > + break; > + input_str += len; > + input_str_sz -= len; > } > - if (flags & 2) { > - while (nIn > 0) { > - int len = 0; > - for (i = 0; i < nChar; i++) { > - len = aLen[i]; > - if (len <= nIn > - && memcmp(&zIn[nIn - len], > - azChar[i], len) == 0) > - break; > - } > - if (i >= nChar) > + } > + if ((flags & TRIM_TRAILING) != 0) { > + while (input_str_sz > 0) { > + int len = 0; > + for (i = 0; i < char_cnt; i++) { > + len = char_len[i]; > + if (len <= input_str_sz > + && memcmp(&input_str[input_str_sz - len], > + ind_chars[i], len) == 0) > break; > - nIn -= len; > } > - } > - if (zCharSet) { > - sql_free(azChar); > + if (i >= char_cnt) > + break; > + input_str_sz -= len; > } > } > - sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT); > + > + if (trim_set_sz != 0) > + sql_free(ind_chars); > + > + result: sql_result_text(context, (char *)input_str, input_str_sz, > + SQL_TRANSIENT); 7. We never declare labels like that. Please, look at other places where and how we declare labels. Also it is stated that we use Linux Kernel code style, as I remember, and it means, that you can lookup doubtful places on sites like this: https://www.kernel.org/doc/html/v4.10/process/coding-style.html#centralized-exiting-of-functions 8. Now the function looks much more clean, and it is easy to see that its performance for the most common case is strongly down. Before your patch when there was no 'trim character set' argument, whitespaces where used by default, but they were not allocated on new memory via contextMalloc() - they were declared on stack as an optimization. Please, refactor that function so as it does not allocate anything and takes already somehow obtained trim char set and trim char lengths. These arguments should only be allocated by trim_func_two_args() and trim_func_three_args(), and only on demand, when not-default trim set is passed. And obviously, you should not do it twice in both places. Create a special function for that which takes 'const unsigned char *trim_set', and returns the prepared arguments. In addition, I doubt if we really need 'char **' of individual characters. In fact, it is 1) bad for performance because of double pointer dereference; 2) its elements ('char *' is element of 'char **') are bigger than the biggest possible character - 8 bytes; 2) it is not needed at all - having 'char *trim_set' and 'uint8_t *char_lens' is enough to iterate char by char through 'trim_set'. I took the liberty of implementing this and got this diff: ========================================================================== diff --git a/src/box/sql/func.c b/src/box/sql/func.c index 6f2a5e3f6..5a4417c73 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1302,46 +1302,35 @@ trim_procedure(struct sql_context *context, enum trim_side_mask flags, const unsigned char *trim_set, int trim_set_sz, const unsigned char *input_str, int input_str_sz) { - const unsigned char *z = trim_set; /* * Count the number of UTF-8 characters passing through * the entire char set, but not up to the '\0' or X'00' * character. This allows to handle trimming set * containing such characters. */ - int char_cnt = sql_utf8_char_count(z, trim_set_sz); + int char_cnt = sql_utf8_char_count(trim_set, trim_set_sz); if (char_cnt == 0) goto result; - /* Individual characters in the character set. */ - unsigned char **ind_chars = - contextMalloc(context, - char_cnt * (sizeof(unsigned char *) + 1)); - if (ind_chars == NULL) - return; /* Length of each character in the character set. */ - uint8_t *char_len = (uint8_t *)&ind_chars[char_cnt]; - z = trim_set; - int i = 0; - char_cnt = 0; - int handled_bytes_cnt = trim_set_sz; - while(handled_bytes_cnt > 0) { - ind_chars[char_cnt] = (unsigned char *)(z + i); + uint8_t *char_len = (uint8_t *)contextMalloc(context, char_cnt); + if (char_len == NULL) + return; + const unsigned char *z = trim_set; + int i = 0, j = 0; + while(j < char_cnt) { + int old_i = i; SQL_UTF8_FWD_1(z, i, trim_set_sz); - char_len[char_cnt] = z + i - ind_chars[char_cnt]; - handled_bytes_cnt -= char_len[char_cnt]; - char_cnt++; + char_len[j++] = i - old_i; } - if (char_cnt == 0) - goto result; if ((flags & TRIM_LEADING) != 0) { while (input_str_sz > 0) { int len = 0; - for (i = 0; i < char_cnt; i++) { + z = trim_set; + for (i = 0; i < char_cnt; ++i, z += len) { len = char_len[i]; if (len <= input_str_sz - && memcmp(input_str, - ind_chars[i], len) == 0) + && memcmp(input_str, z, len) == 0) break; } if (i >= char_cnt) @@ -1353,11 +1342,12 @@ trim_procedure(struct sql_context *context, enum trim_side_mask flags, if ((flags & TRIM_TRAILING) != 0) { while (input_str_sz > 0) { int len = 0; - for (i = 0; i < char_cnt; i++) { + z = trim_set; + for (i = 0; i < char_cnt; ++i, z += len) { len = char_len[i]; if (len <= input_str_sz && memcmp(&input_str[input_str_sz - len], - ind_chars[i], len) == 0) + z, len) == 0) break; } if (i >= char_cnt) @@ -1365,9 +1355,7 @@ trim_procedure(struct sql_context *context, enum trim_side_mask flags, input_str_sz -= len; } } - - if (trim_set_sz != 0) - sql_free(ind_chars); + sql_free(char_len); result: sql_result_text(context, (char *)input_str, input_str_sz, SQL_TRANSIENT); ========================================================================== It passes the tests, what proves unnecessity of 'ind_chars' array. ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-21 19:36 ` Vladislav Shpilevoy @ 2019-04-22 10:43 ` Vladislav Shpilevoy 2019-04-22 16:45 ` Roman Khabibov 1 sibling, 0 replies; 14+ messages in thread From: Vladislav Shpilevoy @ 2019-04-22 10:43 UTC (permalink / raw) To: Roman Khabibov, tarantool-patches >>>> + */ >>>> +static void >>>> +trim_func_one_arg(sql_context * context, int argc, sql_value **argv) >>> >>> 13. In new code we use explicit 'struct' keyword for struct >>> types - sql_context and sql_value. Also, we do not put whitepaces > > 5. Note, I said 'sql_value' too, and look at your code below. What > have you missed? Sorry, now I see that sql_value is a typedef and can not be prepended with struct. ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-21 19:36 ` Vladislav Shpilevoy 2019-04-22 10:43 ` Vladislav Shpilevoy @ 2019-04-22 16:45 ` Roman Khabibov 2019-04-22 18:22 ` Vladislav Shpilevoy 1 sibling, 1 reply; 14+ messages in thread From: Roman Khabibov @ 2019-04-22 16:45 UTC (permalink / raw) To: tarantool-patches; +Cc: Vladislav Shpilevoy Hi! Thanks for the review. > On Apr 21, 2019, at 10:36 PM, Vladislav Shpilevoy <v.shpilevoy@tarantool.org> wrote: > > Hi! Thanks for the patch! See 8 comments below. > >>> 4. You again ignored my comment about NULL. Please, find all other >>> places and fix it finally. I said it already 1000 times in 1000 >>> reviews - we do not use 0 for pointers. It is a simple rule. Just >>> follow it. Write it down somewhere in a list of code style rules >>> and check them all before sending a patch. >>> >>> Seeing how many my comments you repeatedly ignore, I think that >>> probably you should reconsider the way how you do self-reviews. If >>> you do it via just looking a couple of seconds at the code in the >>> text editor, then it is definitely a bad way. >>> >>> First of all, use 'git diff/show' in console to look only at the >>> patch changes, not at the entire files and functions. If you do not >>> like console, and it is ok, then you can use Sublime Merge >>> desktop program or Sublime Git package for the editor. When you >>> look at the diff only, it is much simpler to notice such violations >>> and even bugs. >> + int char_cnt = sql_utf8_char_count(z, trim_set_sz); >> + if (char_cnt == 0) >> >> + unsigned char **ind_chars = >> + contextMalloc(context, >> + char_cnt * (sizeof(unsigned char *) + 1)); >> + if (ind_chars == NULL) >> >> + int i = 0; >> + char_cnt = 0; >> >> + if ((input_str = sql_value_text(argv[0])) == NULL) >> + return; >> >> + if ((input_str = sql_value_text(argv[1])) == NULL) >> + return; >> >> + } else if ((trim_set = sql_value_text(argv[0])) != NULL) { >> + int trim_set_sz = sql_value_bytes(argv[0]); >> >> + if ((input_str = sql_value_text(argv[2])) == NULL || >> + (trim_set = sql_value_text(argv[1])) == NULL) >> + return; > > 1. Sorry, but I do not understand this bunch of '+' and random lines > above. What is it? It is not a diff - I do not see function names, > '-', line numbers. Please, next time provide *full* normal diff > obtained by 'git --no-pager diff' command. > > The same about all other 'diff's below. Understood. >> >>> 5. In our code style we do not use 'char' to represent numbers, we >>> use 'uint8_t' or 'int8_t' when we want to use one-byte numbers. It >>> is the same as 'char'/'unsigned char', but looks shorter and it >>> becomes obvious that these values are used as numbers, not text. >>> Firstly I thought that char_len was an array of characters, but >>> it emerged being an array of symbol sizes. In the summary, I >>> suggest to use 'uint8_t *' for char_len array. >> + uint8_t *char_len = (uint8_t *)&ind_chars[char_cnt]; >> >>>> + /* Individual characters in the character set. */ >>>> + char unsigned **ind_chars = 0; >>> >>> 6. If you declare it as 'const char unsigned **', then you >>> can remove unnecessary type cast from line 1330. > >> If you meant the following line "ind_chars[char_cnt] = (unsigned char *)(z + i);” >> then it isn’t compiled without the cast, because of assigning to "'unsigned char *’ >> from 'const unsigned char *’”. > > In this concrete case I think 'goto's are ok. > >>>> + */ >>>> +static void >>>> +trim_func_one_arg(sql_context * context, int argc, sql_value **argv) >>> >>> 13. In new code we use explicit 'struct' keyword for struct >>> types - sql_context and sql_value. Also, we do not put whitepaces > 6. How is it possible, that first time sql_utf8_char_count() > returned not 0, but now we got 0 on the same string using the > same methods (SQL_UTF8_FWD_1)? Has it disappeared with your diff? > > 7. We never declare labels like that. Please, look at other places > where and how we declare labels. Also it is stated that we use > Linux Kernel code style, as I remember, and it means, that you > can lookup doubtful places on sites like this: > https://www.kernel.org/doc/html/v4.10/process/coding-style.html#centralized-exiting-of-functions diff --git a/src/box/sql/func.c b/src/box/sql/func.c index abeecefa1..0ec359425 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1286,108 +1286,198 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) sql_result_text(context, (char *)zOut, j, sql_free); } -/* - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. +/** + * Remove characters included in @a trim_set from @a input_str + * until encounter a character that doesn't belong to @a trim_set. + * Remove from the side specified by @a flags. + * @param context SQL context. + * @param flags Trim specification: left, right or both. + * @param trim_set The set of characters for trimming. + * @param char_len Lengths of each UTF-8 character in @a trim_set. + * @param char_cnt A number of UTF-8 characters in @a trim_set. + * @param input_str Input string for trimming. + * @param input_str_sz Input string size in bytes. */ static void -trimFunc(sql_context * context, int argc, sql_value ** argv) +trim_procedure(struct sql_context *context, enum trim_side_mask flags, + const unsigned char *trim_set, uint8_t *char_len, int char_cnt, + const unsigned char *input_str, int input_str_sz) { - const unsigned char *zIn; /* Input string */ - const unsigned char *zCharSet; /* Set of characters to trim */ - int nIn; /* Number of bytes in input */ - int flags; /* 1: trimleft 2: trimright 3: trim */ - int i; /* Loop counter */ - unsigned char *aLen = 0; /* Length of each character in zCharSet */ - unsigned char **azChar = 0; /* Individual characters in zCharSet */ - int nChar; /* Number of characters in zCharSet */ - - if (sql_value_type(argv[0]) == SQL_NULL) { - return; - } - zIn = sql_value_text(argv[0]); - if (zIn == 0) - return; - nIn = sql_value_bytes(argv[0]); - assert(zIn == sql_value_text(argv[0])); - if (argc == 1) { - static const unsigned char lenOne[] = { 1 }; - static unsigned char *const azOne[] = { (u8 *) " " }; - nChar = 1; - aLen = (u8 *) lenOne; - azChar = (unsigned char **)azOne; - zCharSet = 0; - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { - return; - } else { - const unsigned char *z = zCharSet; - int trim_set_sz = sql_value_bytes(argv[1]); - /* - * Count the number of UTF-8 characters passing - * through the entire char set, but not up - * to the '\0' or X'00' character. This allows - * to handle trimming set containing such - * characters. - */ - nChar = sql_utf8_char_count(z, trim_set_sz); - if (nChar > 0) { - azChar = - contextMalloc(context, - ((i64) nChar) * (sizeof(char *) + 1)); - if (azChar == 0) { - return; - } - aLen = (unsigned char *)&azChar[nChar]; - z = zCharSet; - i = 0; - nChar = 0; - int handled_bytes_cnt = trim_set_sz; - while(handled_bytes_cnt > 0) { - azChar[nChar] = (unsigned char *)(z + i); - SQL_UTF8_FWD_1(z, i, trim_set_sz); - aLen[nChar] = (u8) (z + i - azChar[nChar]); - handled_bytes_cnt -= aLen[nChar]; - nChar++; - } - } - } - if (nChar > 0) { - flags = SQL_PTR_TO_INT(sql_user_data(context)); - if (flags & 1) { - while (nIn > 0) { - int len = 0; - for (i = 0; i < nChar; i++) { - len = aLen[i]; - if (len <= nIn - && memcmp(zIn, azChar[i], len) == 0) - break; - } - if (i >= nChar) + if (char_cnt == 0) + goto finish; + int i = 0; + const unsigned char *z = NULL; + if ((flags & TRIM_LEADING) != 0) { + while (input_str_sz > 0) { + int len = 0; + z = trim_set; + for (i = 0; i < char_cnt; ++i, z += len) { + len = char_len[i]; + if (len <= input_str_sz + && memcmp(input_str, z, len) == 0) break; - zIn += len; - nIn -= len; } + if (i >= char_cnt) + break; + input_str += len; + input_str_sz -= len; } - if (flags & 2) { - while (nIn > 0) { - int len = 0; - for (i = 0; i < nChar; i++) { - len = aLen[i]; - if (len <= nIn - && memcmp(&zIn[nIn - len], - azChar[i], len) == 0) - break; - } - if (i >= nChar) + } + if ((flags & TRIM_TRAILING) != 0) { + while (input_str_sz > 0) { + int len = 0; + z = trim_set; + for (i = 0; i < char_cnt; ++i, z += len) { + len = char_len[i]; + if (len <= input_str_sz + && memcmp(&input_str[input_str_sz - len], + z, len) == 0) break; - nIn -= len; } + if (i >= char_cnt) + break; + input_str_sz -= len; } - if (zCharSet) { - sql_free(azChar); - } } - sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT); + +finish: + sql_result_text(context, (char *)input_str, input_str_sz, + SQL_TRANSIENT); +} > > 8. Now the function looks much more clean, and it is easy to see that > its performance for the most common case is strongly down. Before > your patch when there was no 'trim character set' argument, whitespaces > where used by default, but they were not allocated on new memory via > contextMalloc() - they were declared on stack as an optimization. Please, > refactor that function so as it does not allocate anything and takes > already somehow obtained trim char set and trim char lengths. > > These arguments should only be allocated by trim_func_two_args() and > trim_func_three_args(), and only on demand, when not-default trim set > is passed. And obviously, you should not do it twice in both > places. Create a special function for that which takes > 'const unsigned char *trim_set', and returns the prepared arguments. +/** + * Prepare arguments for trimming procedure. Allocate memory for + * @a char_len (array of lengths each character in @a trim_set) + * and fill it. + * + * @param context SQL context. + * @param trim_set The set of characters for trimming. + * @param[out] char_len Lengths of each character in @ trim_set. + * @retval >=0 A number of UTF-8 characters in @a trim_set. + * @retval -1 Memory allocation error. + */ +static int +prepare_char_len(struct sql_context *context, const unsigned char *trim_set, + int trim_set_sz, uint8_t **char_len) +{ + /* + * Count the number of UTF-8 characters passing through + * the entire char set, but not up to the '\0' or X'00' + * character. This allows to handle trimming set + * containing such characters. + */ + int char_cnt = sql_utf8_char_count(trim_set, trim_set_sz); + if (char_cnt == 0) + return 0; + + if ((*char_len = (uint8_t *)contextMalloc(context, char_cnt)) == NULL) + return -1; + + const unsigned char *z = trim_set; + int i = 0, j = 0; + while(j < char_cnt) { + int old_i = i; + SQL_UTF8_FWD_1(z, i, trim_set_sz); + (*char_len)[j++] = i - old_i; + } + + return char_cnt; +} commit c35e8fd261ca6ab41fbe5e48935923314bef014a Author: Roman Khabibov <roman.habibov@tarantool.org> Date: Thu Mar 28 14:01:33 2019 +0300 sql: modify TRIM() function signature According to the ANSI standard, ltrim, rtrim and trim should be merged into one unified TRIM() function. The specialization of trimming (left, right or both and trimming characters) determined in arguments of this function. Closes #3879 diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c index be7bd5545..76e3265e7 100644 --- a/extra/mkkeywordhash.c +++ b/extra/mkkeywordhash.c @@ -278,6 +278,10 @@ static Keyword aKeywordTable[] = { { "WHILE", "TK_STANDARD", RESERVED, true }, { "TEXT", "TK_TEXT", RESERVED, true }, { "TRUNCATE", "TK_TRUNCATE", ALWAYS, true }, + { "TRIM", "TK_TRIM", ALWAYS, true }, + { "LEADING", "TK_LEADING", ALWAYS, true }, + { "TRAILING", "TK_TRAILING", ALWAYS, true }, + { "BOTH", "TK_BOTH", ALWAYS, true }, }; /* Number of keywords */ diff --git a/src/box/sql/func.c b/src/box/sql/func.c index abeecefa1..0ec359425 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1286,108 +1286,198 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) sql_result_text(context, (char *)zOut, j, sql_free); } -/* - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. +/** + * Remove characters included in @a trim_set from @a input_str + * until encounter a character that doesn't belong to @a trim_set. + * Remove from the side specified by @a flags. + * @param context SQL context. + * @param flags Trim specification: left, right or both. + * @param trim_set The set of characters for trimming. + * @param char_len Lengths of each UTF-8 character in @a trim_set. + * @param char_cnt A number of UTF-8 characters in @a trim_set. + * @param input_str Input string for trimming. + * @param input_str_sz Input string size in bytes. */ static void -trimFunc(sql_context * context, int argc, sql_value ** argv) +trim_procedure(struct sql_context *context, enum trim_side_mask flags, + const unsigned char *trim_set, uint8_t *char_len, int char_cnt, + const unsigned char *input_str, int input_str_sz) { - const unsigned char *zIn; /* Input string */ - const unsigned char *zCharSet; /* Set of characters to trim */ - int nIn; /* Number of bytes in input */ - int flags; /* 1: trimleft 2: trimright 3: trim */ - int i; /* Loop counter */ - unsigned char *aLen = 0; /* Length of each character in zCharSet */ - unsigned char **azChar = 0; /* Individual characters in zCharSet */ - int nChar; /* Number of characters in zCharSet */ - - if (sql_value_type(argv[0]) == SQL_NULL) { - return; - } - zIn = sql_value_text(argv[0]); - if (zIn == 0) - return; - nIn = sql_value_bytes(argv[0]); - assert(zIn == sql_value_text(argv[0])); - if (argc == 1) { - static const unsigned char lenOne[] = { 1 }; - static unsigned char *const azOne[] = { (u8 *) " " }; - nChar = 1; - aLen = (u8 *) lenOne; - azChar = (unsigned char **)azOne; - zCharSet = 0; - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { - return; - } else { - const unsigned char *z = zCharSet; - int trim_set_sz = sql_value_bytes(argv[1]); - /* - * Count the number of UTF-8 characters passing - * through the entire char set, but not up - * to the '\0' or X'00' character. This allows - * to handle trimming set containing such - * characters. - */ - nChar = sql_utf8_char_count(z, trim_set_sz); - if (nChar > 0) { - azChar = - contextMalloc(context, - ((i64) nChar) * (sizeof(char *) + 1)); - if (azChar == 0) { - return; - } - aLen = (unsigned char *)&azChar[nChar]; - z = zCharSet; - i = 0; - nChar = 0; - int handled_bytes_cnt = trim_set_sz; - while(handled_bytes_cnt > 0) { - azChar[nChar] = (unsigned char *)(z + i); - SQL_UTF8_FWD_1(z, i, trim_set_sz); - aLen[nChar] = (u8) (z + i - azChar[nChar]); - handled_bytes_cnt -= aLen[nChar]; - nChar++; - } - } - } - if (nChar > 0) { - flags = SQL_PTR_TO_INT(sql_user_data(context)); - if (flags & 1) { - while (nIn > 0) { - int len = 0; - for (i = 0; i < nChar; i++) { - len = aLen[i]; - if (len <= nIn - && memcmp(zIn, azChar[i], len) == 0) - break; - } - if (i >= nChar) + if (char_cnt == 0) + goto finish; + int i = 0; + const unsigned char *z = NULL; + if ((flags & TRIM_LEADING) != 0) { + while (input_str_sz > 0) { + int len = 0; + z = trim_set; + for (i = 0; i < char_cnt; ++i, z += len) { + len = char_len[i]; + if (len <= input_str_sz + && memcmp(input_str, z, len) == 0) break; - zIn += len; - nIn -= len; } + if (i >= char_cnt) + break; + input_str += len; + input_str_sz -= len; } - if (flags & 2) { - while (nIn > 0) { - int len = 0; - for (i = 0; i < nChar; i++) { - len = aLen[i]; - if (len <= nIn - && memcmp(&zIn[nIn - len], - azChar[i], len) == 0) - break; - } - if (i >= nChar) + } + if ((flags & TRIM_TRAILING) != 0) { + while (input_str_sz > 0) { + int len = 0; + z = trim_set; + for (i = 0; i < char_cnt; ++i, z += len) { + len = char_len[i]; + if (len <= input_str_sz + && memcmp(&input_str[input_str_sz - len], + z, len) == 0) break; - nIn -= len; } + if (i >= char_cnt) + break; + input_str_sz -= len; } - if (zCharSet) { - sql_free(azChar); - } } - sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT); + +finish: + sql_result_text(context, (char *)input_str, input_str_sz, + SQL_TRANSIENT); +} + +/** + * Prepare arguments for trimming procedure. Allocate memory for + * @a char_len (array of lengths each character in @a trim_set) + * and fill it. + * + * @param context SQL context. + * @param trim_set The set of characters for trimming. + * @param[out] char_len Lengths of each character in @ trim_set. + * @retval >=0 A number of UTF-8 characters in @a trim_set. + * @retval -1 Memory allocation error. + */ +static int +prepare_char_len(struct sql_context *context, const unsigned char *trim_set, + int trim_set_sz, uint8_t **char_len) +{ + /* + * Count the number of UTF-8 characters passing through + * the entire char set, but not up to the '\0' or X'00' + * character. This allows to handle trimming set + * containing such characters. + */ + int char_cnt = sql_utf8_char_count(trim_set, trim_set_sz); + if (char_cnt == 0) + return 0; + + if ((*char_len = (uint8_t *)contextMalloc(context, char_cnt)) == NULL) + return -1; + + const unsigned char *z = trim_set; + int i = 0, j = 0; + while(j < char_cnt) { + int old_i = i; + SQL_UTF8_FWD_1(z, i, trim_set_sz); + (*char_len)[j++] = i - old_i; + } + + return char_cnt; +} + +/** + * Normalize args from @a argv input array when it has one arg + * only. + * + * Case: TRIM(<str>) + * Call trimming procedure with TRIM_BOTH as the flags and " " as + * the trimming set. + */ +static void +trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv) +{ + assert(argc == 1); + (void) argc; + + const unsigned char *input_str; + if ((input_str = sql_value_text(argv[0])) == NULL) + return; + + int input_str_sz = sql_value_bytes(argv[0]); + static uint8_t len_one[] = { 1 }; + trim_procedure(context, TRIM_BOTH, (const unsigned char *) " ", + len_one, 1, input_str, input_str_sz); +} + +/** + * Normalize args from @a argv input array when it has two args. + * + * Case: TRIM(<character_set> FROM <str>) + * If user has specified <character_set> only, call trimming + * procedure with TRIM_BOTH as the flags and that trimming set. + * + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) + * If user has specified side keyword only, then call trimming + * procedure with the specified side and " " as the trimming set. + */ +static void +trim_func_two_args(struct sql_context *context, int argc, sql_value **argv) +{ + assert(argc == 2); + (void) argc; + + const unsigned char *input_str; + if ((input_str = sql_value_text(argv[1])) == NULL) + return; + + int input_str_sz = sql_value_bytes(argv[1]); + const unsigned char *trim_set; + if (sql_value_type(argv[0]) == SQL_INTEGER) { + static uint8_t len_one[] = { 1 }; + trim_procedure(context, sql_value_int(argv[0]), + (const unsigned char *) " ", len_one, 1, + input_str, input_str_sz); + } else if ((trim_set = sql_value_text(argv[0])) != NULL) { + int trim_set_sz = sql_value_bytes(argv[0]); + uint8_t *char_len = NULL; + int char_cnt = 0; + if ((char_cnt = prepare_char_len(context, trim_set, trim_set_sz, + &char_len)) == -1) + return; + trim_procedure(context, TRIM_BOTH, trim_set, char_len, char_cnt, + input_str, input_str_sz); + sql_free(char_len); + } +} + +/** + * Normalize args from @a argv input array when it has three args. + * + * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>) + * If user has specified side keyword and <character_set>, then + * call trimming procedure with that args. + */ +static void +trim_func_three_args(struct sql_context *context, int argc, sql_value **argv) +{ + assert(argc == 3); + (void) argc; + + assert(sql_value_type(argv[0]) == SQL_INTEGER); + const unsigned char *input_str, *trim_set; + if ((input_str = sql_value_text(argv[2])) == NULL || + (trim_set = sql_value_text(argv[1])) == NULL) + return; + + int trim_set_sz = sql_value_bytes(argv[1]); + int input_str_sz = sql_value_bytes(argv[2]); + uint8_t *char_len = NULL; + int char_cnt = 0; + if ((char_cnt = prepare_char_len(context, trim_set, trim_set_sz, + &char_len)) == -1) + return; + trim_procedure(context, sql_value_int(argv[0]), trim_set, char_len, + char_cnt, input_str, input_str_sz); + sql_free(char_len); } #ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION @@ -1818,12 +1908,9 @@ sqlRegisterBuiltinFunctions(void) FIELD_TYPE_INTEGER), FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY, FIELD_TYPE_INTEGER), - FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc), - FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc), - FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc), - FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc), - FUNCTION_COLL(trim, 1, 3, 0, trimFunc), - FUNCTION_COLL(trim, 2, 3, 0, trimFunc), + FUNCTION_COLL(trim, 1, 3, 0, trim_func_one_arg), + FUNCTION_COLL(trim, 2, 3, 0, trim_func_two_args), + FUNCTION_COLL(trim, 3, 3, 0, trim_func_three_args), FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR), FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR), AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize, diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index 099daf512..a56ce7a10 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -1032,6 +1032,55 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); } %endif SQL_OMIT_CAST + +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { + A.pExpr = sqlExprFunction(pParse, Y, &X); + spanSet(&A, &X, &E); +} + +%type trim_operands {struct ExprList *} +%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);} + +trim_operands(A) ::= trim_from_clause(F) expr(Y). { + A = sql_expr_list_append(pParse->db, F, Y.pExpr); +} + +trim_operands(A) ::= expr(Y). { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} + +%type trim_from_clause {struct ExprList *} +%destructor trim_from_clause {sql_expr_list_delete(pParse->db, $$);} + +/* + * The following two rules cover three cases of keyword + * (LEADING/TRAILING/BOTH) and <trim_character_set> combination. + * The case when both of them are absent is disallowed. + */ +trim_from_clause(A) ::= expr(Y) FROM. { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} + +trim_from_clause(A) ::= trim_specification(N) expr_optional(Y) FROM. { + struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, + &sqlIntTokens[N]); + A = sql_expr_list_append(pParse->db, NULL, p); + if (Y != NULL) + A = sql_expr_list_append(pParse->db, A, Y); +} + +%type expr_optional {struct Expr *} +%destructor expr_optional {sql_expr_delete(pParse->db, $$, false);} + +expr_optional(A) ::= . { A = NULL; } +expr_optional(A) ::= expr(X). { A = X.pExpr; } + +%type trim_specification {enum trim_side_mask} + +trim_specification(A) ::= LEADING. { A = TRIM_LEADING; } +trim_specification(A) ::= TRAILING. { A = TRIM_TRAILING; } +trim_specification(A) ::= BOTH. { A = TRIM_BOTH; } + expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). { if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){ const char *err = @@ -1294,7 +1343,7 @@ expr(A) ::= EXISTS(B) LP select(Y) RP(E). { } /* CASE expressions */ -expr(A) ::= CASE(C) case_operand(X) case_exprlist(Y) case_else(Z) END(E). { +expr(A) ::= CASE(C) expr_optional(X) case_exprlist(Y) case_else(Z) END(E). { spanSet(&A,&C,&E); /*A-overwrites-C*/ A.pExpr = sqlPExpr(pParse, TK_CASE, X, 0); if( A.pExpr ){ @@ -1319,10 +1368,6 @@ case_exprlist(A) ::= WHEN expr(Y) THEN expr(Z). { %destructor case_else {sql_expr_delete(pParse->db, $$, false);} case_else(A) ::= ELSE expr(X). {A = X.pExpr;} case_else(A) ::= . {A = 0;} -%type case_operand {Expr*} -%destructor case_operand {sql_expr_delete(pParse->db, $$, false);} -case_operand(A) ::= expr(X). {A = X.pExpr; /*A-overwrites-X*/} -case_operand(A) ::= . {A = 0;} %type exprlist {ExprList*} %destructor exprlist {sql_expr_list_delete(pParse->db, $$);} diff --git a/src/box/sql/parse_def.c b/src/box/sql/parse_def.c index 49c76a326..aa1323cb2 100644 --- a/src/box/sql/parse_def.c +++ b/src/box/sql/parse_def.c @@ -34,7 +34,9 @@ const struct Token sqlIntTokens[] = { {"0", 1, false}, - {"1", 1, false} + {"1", 1, false}, + {"2", 1, false}, + {"3", 1, false}, }; void diff --git a/src/box/sql/parse_def.h b/src/box/sql/parse_def.h index a1af2bacd..5899a7e4e 100644 --- a/src/box/sql/parse_def.h +++ b/src/box/sql/parse_def.h @@ -87,7 +87,7 @@ struct Token { bool isReserved; }; -/** Constant tokens for values 0 and 1. */ +/** Constant tokens for integer values. */ extern const struct Token sqlIntTokens[]; /** Generate a Token object from a string. */ diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index b322602dc..d5a3e15c1 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -1680,6 +1680,17 @@ struct FuncDestructor { * single query - might change over time */ +/* + * Trim side mask components. TRIM_LEADING means to trim left side + * only. TRIM_TRAILING is to trim right side only. TRIM_BOTH is to + * trim both sides. + */ +enum trim_side_mask { + TRIM_LEADING = 1, + TRIM_TRAILING = 2, + TRIM_BOTH = TRIM_LEADING | TRIM_TRAILING +}; + /* * The following three macros, FUNCTION(), LIKEFUNC() and AGGREGATE() are * used to create the initializers for the FuncDef structures. diff --git a/test/sql-tap/badutf1.test.lua b/test/sql-tap/badutf1.test.lua index d104efaa9..9079dfe25 100755 --- a/test/sql-tap/badutf1.test.lua +++ b/test/sql-tap/badutf1.test.lua @@ -302,7 +302,7 @@ test:do_test( test:do_test( "badutf-4.1", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(TRIM('\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.1> "X", "F0" @@ -312,7 +312,7 @@ test:do_test( test:do_test( "badutf-4.2", function() - return test:execsql2("SELECT hex(ltrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(TRIM(LEADING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.2> "X", "F0808080FF" @@ -322,7 +322,7 @@ test:do_test( test:do_test( "badutf-4.3", function() - return test:execsql2("SELECT hex(rtrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(TRIM(TRAILING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.3> "X", "808080F0" @@ -332,7 +332,7 @@ test:do_test( test:do_test( "badutf-4.4", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.4> "X", "808080F0808080FF" @@ -342,7 +342,7 @@ test:do_test( test:do_test( "badutf-4.5", function() - return test:execsql2("SELECT hex(trim('\xff\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\xff\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.5> "X", "80F0808080FF" @@ -352,7 +352,7 @@ test:do_test( test:do_test( "badutf-4.6", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.6> "X", "F0808080FF" @@ -362,7 +362,7 @@ test:do_test( test:do_test( "badutf-4.7", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.7> "X", "FF80F0808080FF" diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua index 251cc3534..fe9a98191 100755 --- a/test/sql-tap/func.test.lua +++ b/test/sql-tap/func.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(14586) +test:plan(14590) --!./tcltestrunner.lua -- 2001 September 15 @@ -1915,7 +1915,7 @@ test:do_catchsql_test( SELECT trim(1,2,3) ]], { -- <func-22.1> - 1, "wrong number of arguments to function TRIM()" + 1, "Syntax error near ','" -- </func-22.1> }) @@ -1925,7 +1925,7 @@ test:do_catchsql_test( SELECT ltrim(1,2,3) ]], { -- <func-22.2> - 1, "wrong number of arguments to function LTRIM()" + 1, "Function 'LTRIM' does not exist" -- </func-22.2> }) @@ -1935,7 +1935,7 @@ test:do_catchsql_test( SELECT rtrim(1,2,3) ]], { -- <func-22.3> - 1, "wrong number of arguments to function RTRIM()" + 1, "Function 'RTRIM' does not exist" -- </func-22.3> }) @@ -1952,7 +1952,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.5", [[ - SELECT ltrim(' hi '); + SELECT TRIM(LEADING FROM ' hi '); ]], { -- <func-22.5> "hi " @@ -1962,7 +1962,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.6", [[ - SELECT rtrim(' hi '); + SELECT TRIM(TRAILING FROM ' hi '); ]], { -- <func-22.6> " hi" @@ -1972,7 +1972,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.7", [[ - SELECT trim(' hi ','xyz'); + SELECT TRIM('xyz' FROM ' hi '); ]], { -- <func-22.7> " hi " @@ -1982,7 +1982,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.8", [[ - SELECT ltrim(' hi ','xyz'); + SELECT TRIM(LEADING 'xyz' FROM ' hi '); ]], { -- <func-22.8> " hi " @@ -1992,7 +1992,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.9", [[ - SELECT rtrim(' hi ','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM ' hi '); ]], { -- <func-22.9> " hi " @@ -2002,7 +2002,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.10", [[ - SELECT trim('xyxzy hi zzzy','xyz'); + SELECT TRIM('xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.10> " hi " @@ -2012,7 +2012,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.11", [[ - SELECT ltrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(LEADING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.11> " hi zzzy" @@ -2022,7 +2022,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.12", [[ - SELECT rtrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.12> "xyxzy hi " @@ -2032,7 +2032,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.13", [[ - SELECT trim(' hi ',''); + SELECT TRIM('' FROM ' hi '); ]], { -- <func-22.13> " hi " @@ -2043,7 +2043,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.14", [[ - SELECT hex(trim(x'c280e1bfbff48fbfbf6869',x'6162e1bfbfc280')) + SELECT hex(TRIM(x'6162e1bfbfc280' FROM x'c280e1bfbff48fbfbf6869')) ]], { -- <func-22.14> "F48FBFBF6869" @@ -2052,8 +2052,8 @@ test:do_execsql_test( test:do_execsql_test( "func-22.15", - [[SELECT hex(trim(x'6869c280e1bfbff48fbfbf61', - x'6162e1bfbfc280f48fbfbf'))]], { + [[SELECT hex(TRIM(x'6162e1bfbfc280f48fbfbf' + FROM x'6869c280e1bfbff48fbfbf61'))]], { -- <func-22.15> "6869" -- </func-22.15> @@ -2062,7 +2062,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.16", [[ - SELECT hex(trim(x'ceb1ceb2ceb3',x'ceb1')); + SELECT hex(TRIM(x'ceb1' FROM x'ceb1ceb2ceb3')); ]], { -- <func-22.16> "CEB2CEB3" @@ -2083,7 +2083,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.21", [[ - SELECT typeof(trim(NULL,'xyz')); + SELECT typeof(TRIM('xyz' FROM NULL)); ]], { -- <func-22.21> "null" @@ -2093,7 +2093,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.22", [[ - SELECT typeof(trim('hello',NULL)); + SELECT typeof(TRIM(NULL FROM 'hello')); ]], { -- <func-22.22> "null" @@ -2105,7 +2105,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.23", [[ - SELECT TRIM(X'004100', X'00'); + SELECT TRIM(X'00' FROM X'004100'); ]], { -- <func-22.23> "A" @@ -2115,7 +2115,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.24", [[ - SELECT TRIM(X'004100', X'0000'); + SELECT TRIM(X'0000' FROM X'004100'); ]], { -- <func-22.24> "A" @@ -2125,7 +2125,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.25", [[ - SELECT TRIM(X'004100', X'0042'); + SELECT TRIM(X'0042' FROM X'004100'); ]], { -- <func-22.25> "A" @@ -2135,7 +2135,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.26", [[ - SELECT TRIM(X'00004100420000', X'00'); + SELECT TRIM(X'00' FROM X'00004100420000'); ]], { -- <func-22.26> "A\0B" @@ -2145,7 +2145,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.27", [[ - SELECT LTRIM(X'004100', X'00'); + SELECT TRIM(LEADING X'00' FROM X'004100'); ]], { -- <func-22.27> "A\0" @@ -2155,7 +2155,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.28", [[ - SELECT LTRIM(X'004100', X'0000'); + SELECT TRIM(LEADING X'0000' FROM X'004100'); ]], { -- <func-22.28> "A\0" @@ -2165,7 +2165,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.29", [[ - SELECT LTRIM(X'004100', X'0042'); + SELECT TRIM(LEADING X'0042' FROM X'004100'); ]], { -- <func-22.29> "A\0" @@ -2175,7 +2175,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.30", [[ - SELECT LTRIM(X'00004100420000', X'00'); + SELECT TRIM(LEADING X'00' FROM X'00004100420000'); ]], { -- <func-22.30> "A\0B\0\0" @@ -2185,7 +2185,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.31", [[ - SELECT RTRIM(X'004100', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'004100'); ]], { -- <func-22.31> "\0A" @@ -2195,7 +2195,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.32", [[ - SELECT RTRIM(X'004100', X'0000'); + SELECT TRIM(TRAILING X'0000' FROM X'004100'); ]], { -- <func-22.32> "\0A" @@ -2205,7 +2205,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.33", [[ - SELECT RTRIM(X'004100', X'0042'); + SELECT TRIM(TRAILING X'0042' FROM X'004100'); ]], { -- <func-22.33> "\0A" @@ -2215,13 +2215,56 @@ test:do_execsql_test( test:do_execsql_test( "func-22.34", [[ - SELECT RTRIM(X'00004100420000', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'00004100420000'); ]], { -- <func-22.34> "\0\0A\0B" -- </func-22.34> }) +-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and +-- FROM without any agrs before. LEADING and TRAILING keywords is +-- checked above. + +test:do_execsql_test( + "func-22.35", + [[ + SELECT TRIM(BOTH FROM ' hi '); + ]], { + -- <func-22.35> + "hi" + -- </func-22.35> + }) +test:do_execsql_test( + "func-22.36", + [[ + SELECT TRIM(BOTH 'xyz' FROM ' hi '); + ]], { + -- <func-22.36> + " hi " + -- </func-22.36> + }) + +test:do_execsql_test( + "func-22.37", + [[ + SELECT TRIM(BOTH 'xyz' FROM 'xyxzy hi zzzy'); + ]], { + -- <func-22.37> + " hi " + -- </func-22.37> + }) + +test:do_catchsql_test( + "func-22.38", + [[ + SELECT TRIM(FROM 'xyxzy'); + ]], { + -- <func-22.38> + 1, "Syntax error near 'FROM'" + -- </func-22.38> + }) + -- This is to test the deprecated sql_aggregate_count() API. -- --test:do_test( @@ -2838,16 +2881,16 @@ test:do_execsql_test( "SELECT TRIM(CHAR(32,00,32,00,32));", {string.char(00,32,00)}) --- LTRIM +-- LEFT TRIM test:do_execsql_test( "func-70", - "SELECT LTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(LEADING FROM CHAR(32,00,32,00,32));", {string.char(00,32,00,32)}) --- RTRIM +-- RIGHT TRIM test:do_execsql_test( "func-71", - "SELECT RTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(TRAILING FROM CHAR(32,00,32,00,32));", {string.char(32,00,32,00)}) -- GROUP_CONCAT diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua index 495aa4ee4..ec45e5e76 100755 --- a/test/sql-tap/with1.test.lua +++ b/test/sql-tap/with1.test.lua @@ -550,7 +550,7 @@ test:do_execsql_test("8.1-mandelbrot", [[ SELECT group_concat( substr(' .+*#', 1+min(iter/7,4), 1), '') FROM m2 GROUP BY cy ) - SELECT group_concat(rtrim(t),x'0a') FROM a; + SELECT group_concat(TRIM(TRAILING FROM t),x'0a') FROM a; ]], { -- <8.1-mandelbrot> [[ ....# ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-22 16:45 ` Roman Khabibov @ 2019-04-22 18:22 ` Vladislav Shpilevoy 2019-04-23 1:04 ` Roman Khabibov 0 siblings, 1 reply; 14+ messages in thread From: Vladislav Shpilevoy @ 2019-04-22 18:22 UTC (permalink / raw) To: Roman Khabibov, tarantool-patches Hi! Thanks for the fixes! I've applied my fixes and pushed on top of the branch. Please, look at them and either squash, or lets discuss where you do not agree. Otherwise it will LGTM. > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > index abeecefa1..0ec359425 100644 > --- a/src/box/sql/func.c > +++ b/src/box/sql/func.c > @@ -1286,108 +1286,198 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) > sql_result_text(context, (char *)zOut, j, sql_free); > } > > -/* > - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. > - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. > +/** > + * Remove characters included in @a trim_set from @a input_str > + * until encounter a character that doesn't belong to @a trim_set. > + * Remove from the side specified by @a flags. > + * @param context SQL context. > + * @param flags Trim specification: left, right or both. > + * @param trim_set The set of characters for trimming. > + * @param char_len Lengths of each UTF-8 character in @a trim_set. > + * @param char_cnt A number of UTF-8 characters in @a trim_set. > + * @param input_str Input string for trimming. > + * @param input_str_sz Input string size in bytes. > */ > static void > -trimFunc(sql_context * context, int argc, sql_value ** argv) > +trim_procedure(struct sql_context *context, enum trim_side_mask flags, > + const unsigned char *trim_set, uint8_t *char_len, int char_cnt, 1. Added 'const' to char_len. > + const unsigned char *input_str, int input_str_sz) > { > - const unsigned char *zIn; /* Input string */ > - const unsigned char *zCharSet; /* Set of characters to trim */ > - int nIn; /* Number of bytes in input */ > - int flags; /* 1: trimleft 2: trimright 3: trim */ > - int i; /* Loop counter */ > - unsigned char *aLen = 0; /* Length of each character in zCharSet */ > - unsigned char **azChar = 0; /* Individual characters in zCharSet */ > - int nChar; /* Number of characters in zCharSet */ > - > - if (sql_value_type(argv[0]) == SQL_NULL) { > - return; > - } > - zIn = sql_value_text(argv[0]); > - if (zIn == 0) > - return; > - nIn = sql_value_bytes(argv[0]); > - assert(zIn == sql_value_text(argv[0])); > - if (argc == 1) { > - static const unsigned char lenOne[] = { 1 }; > - static unsigned char *const azOne[] = { (u8 *) " " }; > - nChar = 1; > - aLen = (u8 *) lenOne; > - azChar = (unsigned char **)azOne; > - zCharSet = 0; > - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { > - return; > - } else { > - const unsigned char *z = zCharSet; > - int trim_set_sz = sql_value_bytes(argv[1]); > - /* > - * Count the number of UTF-8 characters passing > - * through the entire char set, but not up > - * to the '\0' or X'00' character. This allows > - * to handle trimming set containing such > - * characters. > - */ > - nChar = sql_utf8_char_count(z, trim_set_sz); > - if (nChar > 0) { > - azChar = > - contextMalloc(context, > - ((i64) nChar) * (sizeof(char *) + 1)); > - if (azChar == 0) { > - return; > - } > - aLen = (unsigned char *)&azChar[nChar]; > - z = zCharSet; > - i = 0; > - nChar = 0; > - int handled_bytes_cnt = trim_set_sz; > - while(handled_bytes_cnt > 0) { > - azChar[nChar] = (unsigned char *)(z + i); > - SQL_UTF8_FWD_1(z, i, trim_set_sz); > - aLen[nChar] = (u8) (z + i - azChar[nChar]); > - handled_bytes_cnt -= aLen[nChar]; > - nChar++; > - } > - } > - } > - if (nChar > 0) { > - flags = SQL_PTR_TO_INT(sql_user_data(context)); > - if (flags & 1) { > - while (nIn > 0) { > - int len = 0; > - for (i = 0; i < nChar; i++) { > - len = aLen[i]; > - if (len <= nIn > - && memcmp(zIn, azChar[i], len) == 0) > - break; > - } > - if (i >= nChar) > + if (char_cnt == 0) > + goto finish; > + int i = 0; > + const unsigned char *z = NULL; 2. These values do not need initialization. They are assigned before each usage. > + if ((flags & TRIM_LEADING) != 0) { > + while (input_str_sz > 0) { > + int len = 0; > + z = trim_set; > + for (i = 0; i < char_cnt; ++i, z += len) { > + len = char_len[i]; > + if (len <= input_str_sz > + && memcmp(input_str, z, len) == 0) > break; > - zIn += len; > - nIn -= len; > } > + if (i >= char_cnt) > + break; > + input_str += len; > + input_str_sz -= len; > } > - if (flags & 2) { > - while (nIn > 0) { > - int len = 0; > - for (i = 0; i < nChar; i++) { > - len = aLen[i]; > - if (len <= nIn > - && memcmp(&zIn[nIn - len], > - azChar[i], len) == 0) > - break; > - } > - if (i >= nChar) > + } > + if ((flags & TRIM_TRAILING) != 0) { > + while (input_str_sz > 0) { > + int len = 0; > + z = trim_set; > + for (i = 0; i < char_cnt; ++i, z += len) { > + len = char_len[i]; > + if (len <= input_str_sz > + && memcmp(&input_str[input_str_sz - len], > + z, len) == 0) > break; > - nIn -= len; > } > + if (i >= char_cnt) > + break; > + input_str_sz -= len; > } > - if (zCharSet) { > - sql_free(azChar); > - } > } > - sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT); > + > +finish: > + sql_result_text(context, (char *)input_str, input_str_sz, > + SQL_TRANSIENT); 3. The indentation has been broken. > +} > + > +/** > + * Prepare arguments for trimming procedure. Allocate memory for > + * @a char_len (array of lengths each character in @a trim_set) > + * and fill it. > + * > + * @param context SQL context. > + * @param trim_set The set of characters for trimming. > + * @param[out] char_len Lengths of each character in @ trim_set. > + * @retval >=0 A number of UTF-8 characters in @a trim_set. > + * @retval -1 Memory allocation error. > + */ > +static int > +prepare_char_len(struct sql_context *context, const unsigned char *trim_set, > + int trim_set_sz, uint8_t **char_len) 4. Usually when we write somehow linked functions, we prefix them with a certain prefix. Here I would use 'trim_'. > +{ > + /* > + * Count the number of UTF-8 characters passing through > + * the entire char set, but not up to the '\0' or X'00' > + * character. This allows to handle trimming set > + * containing such characters. > + */ > + int char_cnt = sql_utf8_char_count(trim_set, trim_set_sz); > + if (char_cnt == 0) > + return 0; > + > + if ((*char_len = (uint8_t *)contextMalloc(context, char_cnt)) == NULL) > + return -1; > + > + const unsigned char *z = trim_set; 5. You do not need initialization here. Just use 'trim_set' right below. > + int i = 0, j = 0; > + while(j < char_cnt) { > + int old_i = i; > + SQL_UTF8_FWD_1(z, i, trim_set_sz); > + (*char_len)[j++] = i - old_i; > + } > + > + return char_cnt; > +} > + > +/** > + * Normalize args from @a argv input array when it has one arg > + * only. > + * > + * Case: TRIM(<str>) > + * Call trimming procedure with TRIM_BOTH as the flags and " " as > + * the trimming set. > + */ > +static void > +trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv) > +{ > + assert(argc == 1); > + (void) argc; > + > + const unsigned char *input_str; > + if ((input_str = sql_value_text(argv[0])) == NULL) > + return; > + > + int input_str_sz = sql_value_bytes(argv[0]); > + static uint8_t len_one[] = { 1 }; 6. 'static' is overkill here. It is enough to just declare a variable 'uint8_t len = 1;' and pass it by pointer into the trim_procedure. > + trim_procedure(context, TRIM_BOTH, (const unsigned char *) " ", > + len_one, 1, input_str, input_str_sz); > +} > + > +/** > + * Normalize args from @a argv input array when it has two args. > + * > + * Case: TRIM(<character_set> FROM <str>) > + * If user has specified <character_set> only, call trimming > + * procedure with TRIM_BOTH as the flags and that trimming set. > + * > + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) > + * If user has specified side keyword only, then call trimming > + * procedure with the specified side and " " as the trimming set. > + */ > +static void > +trim_func_two_args(struct sql_context *context, int argc, sql_value **argv) > +{ > + assert(argc == 2); > + (void) argc; > + > + const unsigned char *input_str; > + if ((input_str = sql_value_text(argv[1])) == NULL) > + return; > + > + int input_str_sz = sql_value_bytes(argv[1]); > + const unsigned char *trim_set; > + if (sql_value_type(argv[0]) == SQL_INTEGER) { > + static uint8_t len_one[] = { 1 }; 7. The same. > + trim_procedure(context, sql_value_int(argv[0]), > + (const unsigned char *) " ", len_one, 1, > + input_str, input_str_sz); > + } else if ((trim_set = sql_value_text(argv[0])) != NULL) { > + int trim_set_sz = sql_value_bytes(argv[0]); > + uint8_t *char_len = NULL; > + int char_cnt = 0; > + if ((char_cnt = prepare_char_len(context, trim_set, trim_set_sz, > + &char_len)) == -1) 8. I would understand if char_cnt was declared somewhere above, or assignment inside 'if' saved some code lines, like it does with 'input_str' above, but here it doesn't. Normally we do not assign in 'if' at all, it is SQLite legacy. > + return; > + trim_procedure(context, TRIM_BOTH, trim_set, char_len, char_cnt, > + input_str, input_str_sz); > + sql_free(char_len); > + } > +} > + > +/** > + * Normalize args from @a argv input array when it has three args. > + * > + * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>) > + * If user has specified side keyword and <character_set>, then > + * call trimming procedure with that args. > + */ > +static void > +trim_func_three_args(struct sql_context *context, int argc, sql_value **argv) > +{ > + assert(argc == 3); > + (void) argc; > + > + assert(sql_value_type(argv[0]) == SQL_INTEGER); > + const unsigned char *input_str, *trim_set; > + if ((input_str = sql_value_text(argv[2])) == NULL || > + (trim_set = sql_value_text(argv[1])) == NULL) > + return; > + > + int trim_set_sz = sql_value_bytes(argv[1]); > + int input_str_sz = sql_value_bytes(argv[2]); > + uint8_t *char_len = NULL; > + int char_cnt = 0; > + if ((char_cnt = prepare_char_len(context, trim_set, trim_set_sz, > + &char_len)) == -1) > + return; 9. The same. > + trim_procedure(context, sql_value_int(argv[0]), trim_set, char_len, > + char_cnt, input_str, input_str_sz); > + sql_free(char_len); > } > Fixes on the branch: ======================================================================== diff --git a/src/box/sql/func.c b/src/box/sql/func.c index 0ec359425..2bbb2ad4a 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1300,16 +1300,15 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) */ static void trim_procedure(struct sql_context *context, enum trim_side_mask flags, - const unsigned char *trim_set, uint8_t *char_len, int char_cnt, - const unsigned char *input_str, int input_str_sz) + const unsigned char *trim_set, const uint8_t *char_len, + int char_cnt, const unsigned char *input_str, int input_str_sz) { if (char_cnt == 0) goto finish; - int i = 0; - const unsigned char *z = NULL; + int i, len; + const unsigned char *z; if ((flags & TRIM_LEADING) != 0) { while (input_str_sz > 0) { - int len = 0; z = trim_set; for (i = 0; i < char_cnt; ++i, z += len) { len = char_len[i]; @@ -1325,7 +1324,6 @@ trim_procedure(struct sql_context *context, enum trim_side_mask flags, } if ((flags & TRIM_TRAILING) != 0) { while (input_str_sz > 0) { - int len = 0; z = trim_set; for (i = 0; i < char_cnt; ++i, z += len) { len = char_len[i]; @@ -1339,10 +1337,9 @@ trim_procedure(struct sql_context *context, enum trim_side_mask flags, input_str_sz -= len; } } - finish: sql_result_text(context, (char *)input_str, input_str_sz, - SQL_TRANSIENT); + SQL_TRANSIENT); } /** @@ -1357,8 +1354,9 @@ finish: * @retval -1 Memory allocation error. */ static int -prepare_char_len(struct sql_context *context, const unsigned char *trim_set, - int trim_set_sz, uint8_t **char_len) +trim_prepare_char_len(struct sql_context *context, + const unsigned char *trim_set, int trim_set_sz, + uint8_t **char_len) { /* * Count the number of UTF-8 characters passing through @@ -1367,17 +1365,18 @@ prepare_char_len(struct sql_context *context, const unsigned char *trim_set, * containing such characters. */ int char_cnt = sql_utf8_char_count(trim_set, trim_set_sz); - if (char_cnt == 0) + if (char_cnt == 0) { + *char_len = NULL; return 0; + } if ((*char_len = (uint8_t *)contextMalloc(context, char_cnt)) == NULL) return -1; - const unsigned char *z = trim_set; int i = 0, j = 0; while(j < char_cnt) { int old_i = i; - SQL_UTF8_FWD_1(z, i, trim_set_sz); + SQL_UTF8_FWD_1(trim_set, i, trim_set_sz); (*char_len)[j++] = i - old_i; } @@ -1403,9 +1402,9 @@ trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv) return; int input_str_sz = sql_value_bytes(argv[0]); - static uint8_t len_one[] = { 1 }; + uint8_t len_one = 1; trim_procedure(context, TRIM_BOTH, (const unsigned char *) " ", - len_one, 1, input_str, input_str_sz); + &len_one, 1, input_str, input_str_sz); } /** @@ -1425,23 +1424,22 @@ trim_func_two_args(struct sql_context *context, int argc, sql_value **argv) assert(argc == 2); (void) argc; - const unsigned char *input_str; + const unsigned char *input_str, *trim_set; if ((input_str = sql_value_text(argv[1])) == NULL) return; int input_str_sz = sql_value_bytes(argv[1]); - const unsigned char *trim_set; if (sql_value_type(argv[0]) == SQL_INTEGER) { - static uint8_t len_one[] = { 1 }; + uint8_t len_one = 1; trim_procedure(context, sql_value_int(argv[0]), - (const unsigned char *) " ", len_one, 1, + (const unsigned char *) " ", &len_one, 1, input_str, input_str_sz); } else if ((trim_set = sql_value_text(argv[0])) != NULL) { int trim_set_sz = sql_value_bytes(argv[0]); - uint8_t *char_len = NULL; - int char_cnt = 0; - if ((char_cnt = prepare_char_len(context, trim_set, trim_set_sz, - &char_len)) == -1) + uint8_t *char_len; + int char_cnt = trim_prepare_char_len(context, trim_set, + trim_set_sz, &char_len); + if (char_cnt == -1) return; trim_procedure(context, TRIM_BOTH, trim_set, char_len, char_cnt, input_str, input_str_sz); @@ -1470,10 +1468,10 @@ trim_func_three_args(struct sql_context *context, int argc, sql_value **argv) int trim_set_sz = sql_value_bytes(argv[1]); int input_str_sz = sql_value_bytes(argv[2]); - uint8_t *char_len = NULL; - int char_cnt = 0; - if ((char_cnt = prepare_char_len(context, trim_set, trim_set_sz, - &char_len)) == -1) + uint8_t *char_len; + int char_cnt = trim_prepare_char_len(context, trim_set, trim_set_sz, + &char_len); + if (char_cnt == -1) return; trim_procedure(context, sql_value_int(argv[0]), trim_set, char_len, char_cnt, input_str, input_str_sz); ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-22 18:22 ` Vladislav Shpilevoy @ 2019-04-23 1:04 ` Roman Khabibov 2019-04-23 8:59 ` Vladislav Shpilevoy 0 siblings, 1 reply; 14+ messages in thread From: Roman Khabibov @ 2019-04-23 1:04 UTC (permalink / raw) To: tarantool-patches; +Cc: Vladislav Shpilevoy Hello! Of course, I agree with you. > On Apr 22, 2019, at 9:22 PM, Vladislav Shpilevoy <v.shpilevoy@tarantool.org> wrote: > > Hi! Thanks for the fixes! I've applied my > fixes and pushed on top of the branch. Please, > look at them and either squash, or lets discuss > where you do not agree. Otherwise it will LGTM. commit 368f588a6200653adebf20372e1e64c0fae8b9f3 Author: Roman Khabibov <roman.habibov@tarantool.org> Date: Thu Mar 28 14:01:33 2019 +0300 sql: modify TRIM() function signature According to the ANSI standard, ltrim, rtrim and trim should be merged into one unified TRIM() function. The specialization of trimming (left, right or both and trimming characters) determined in arguments of this function. Closes #3879 @TarantoolBot document Title: TRIM() function Modify signature of SQL function TRIM(). This function removes characters included in <trim character> (binary) string from <trim source> (binary) string until encounter a character that doesn't belong to <trim character>. Removal occurs on the side, specified by <trim specification>. Now, syntax is following: TRIM([ [ <trim specification> ] [ <trim character> ] FROM ] <trim source>). <trim specification> can be one of the following keywords: LEADING, TRAILING and BOTH. <trim character> is the set of trimming characters. <trim source> is the string, that will be trimmed. If FROM is specified, then: 1) Either <trim specification> or <trim character> or both shall be specified. 2) If <trim specification> is not specified, then BOTH is implicit. 3) If <trim character> is not specified, then ' ' is implicit. diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c index be7bd5545..76e3265e7 100644 --- a/extra/mkkeywordhash.c +++ b/extra/mkkeywordhash.c @@ -278,6 +278,10 @@ static Keyword aKeywordTable[] = { { "WHILE", "TK_STANDARD", RESERVED, true }, { "TEXT", "TK_TEXT", RESERVED, true }, { "TRUNCATE", "TK_TRUNCATE", ALWAYS, true }, + { "TRIM", "TK_TRIM", ALWAYS, true }, + { "LEADING", "TK_LEADING", ALWAYS, true }, + { "TRAILING", "TK_TRAILING", ALWAYS, true }, + { "BOTH", "TK_BOTH", ALWAYS, true }, }; /* Number of keywords */ diff --git a/src/box/sql/func.c b/src/box/sql/func.c index abeecefa1..2bbb2ad4a 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1286,108 +1286,196 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) sql_result_text(context, (char *)zOut, j, sql_free); } -/* - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. +/** + * Remove characters included in @a trim_set from @a input_str + * until encounter a character that doesn't belong to @a trim_set. + * Remove from the side specified by @a flags. + * @param context SQL context. + * @param flags Trim specification: left, right or both. + * @param trim_set The set of characters for trimming. + * @param char_len Lengths of each UTF-8 character in @a trim_set. + * @param char_cnt A number of UTF-8 characters in @a trim_set. + * @param input_str Input string for trimming. + * @param input_str_sz Input string size in bytes. */ static void -trimFunc(sql_context * context, int argc, sql_value ** argv) +trim_procedure(struct sql_context *context, enum trim_side_mask flags, + const unsigned char *trim_set, const uint8_t *char_len, + int char_cnt, const unsigned char *input_str, int input_str_sz) { - const unsigned char *zIn; /* Input string */ - const unsigned char *zCharSet; /* Set of characters to trim */ - int nIn; /* Number of bytes in input */ - int flags; /* 1: trimleft 2: trimright 3: trim */ - int i; /* Loop counter */ - unsigned char *aLen = 0; /* Length of each character in zCharSet */ - unsigned char **azChar = 0; /* Individual characters in zCharSet */ - int nChar; /* Number of characters in zCharSet */ - - if (sql_value_type(argv[0]) == SQL_NULL) { - return; - } - zIn = sql_value_text(argv[0]); - if (zIn == 0) - return; - nIn = sql_value_bytes(argv[0]); - assert(zIn == sql_value_text(argv[0])); - if (argc == 1) { - static const unsigned char lenOne[] = { 1 }; - static unsigned char *const azOne[] = { (u8 *) " " }; - nChar = 1; - aLen = (u8 *) lenOne; - azChar = (unsigned char **)azOne; - zCharSet = 0; - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { - return; - } else { - const unsigned char *z = zCharSet; - int trim_set_sz = sql_value_bytes(argv[1]); - /* - * Count the number of UTF-8 characters passing - * through the entire char set, but not up - * to the '\0' or X'00' character. This allows - * to handle trimming set containing such - * characters. - */ - nChar = sql_utf8_char_count(z, trim_set_sz); - if (nChar > 0) { - azChar = - contextMalloc(context, - ((i64) nChar) * (sizeof(char *) + 1)); - if (azChar == 0) { - return; - } - aLen = (unsigned char *)&azChar[nChar]; - z = zCharSet; - i = 0; - nChar = 0; - int handled_bytes_cnt = trim_set_sz; - while(handled_bytes_cnt > 0) { - azChar[nChar] = (unsigned char *)(z + i); - SQL_UTF8_FWD_1(z, i, trim_set_sz); - aLen[nChar] = (u8) (z + i - azChar[nChar]); - handled_bytes_cnt -= aLen[nChar]; - nChar++; - } - } - } - if (nChar > 0) { - flags = SQL_PTR_TO_INT(sql_user_data(context)); - if (flags & 1) { - while (nIn > 0) { - int len = 0; - for (i = 0; i < nChar; i++) { - len = aLen[i]; - if (len <= nIn - && memcmp(zIn, azChar[i], len) == 0) - break; - } - if (i >= nChar) + if (char_cnt == 0) + goto finish; + int i, len; + const unsigned char *z; + if ((flags & TRIM_LEADING) != 0) { + while (input_str_sz > 0) { + z = trim_set; + for (i = 0; i < char_cnt; ++i, z += len) { + len = char_len[i]; + if (len <= input_str_sz + && memcmp(input_str, z, len) == 0) break; - zIn += len; - nIn -= len; } + if (i >= char_cnt) + break; + input_str += len; + input_str_sz -= len; } - if (flags & 2) { - while (nIn > 0) { - int len = 0; - for (i = 0; i < nChar; i++) { - len = aLen[i]; - if (len <= nIn - && memcmp(&zIn[nIn - len], - azChar[i], len) == 0) - break; - } - if (i >= nChar) + } + if ((flags & TRIM_TRAILING) != 0) { + while (input_str_sz > 0) { + z = trim_set; + for (i = 0; i < char_cnt; ++i, z += len) { + len = char_len[i]; + if (len <= input_str_sz + && memcmp(&input_str[input_str_sz - len], + z, len) == 0) break; - nIn -= len; } + if (i >= char_cnt) + break; + input_str_sz -= len; } - if (zCharSet) { - sql_free(azChar); - } } - sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT); +finish: + sql_result_text(context, (char *)input_str, input_str_sz, + SQL_TRANSIENT); +} + +/** + * Prepare arguments for trimming procedure. Allocate memory for + * @a char_len (array of lengths each character in @a trim_set) + * and fill it. + * + * @param context SQL context. + * @param trim_set The set of characters for trimming. + * @param[out] char_len Lengths of each character in @ trim_set. + * @retval >=0 A number of UTF-8 characters in @a trim_set. + * @retval -1 Memory allocation error. + */ +static int +trim_prepare_char_len(struct sql_context *context, + const unsigned char *trim_set, int trim_set_sz, + uint8_t **char_len) +{ + /* + * Count the number of UTF-8 characters passing through + * the entire char set, but not up to the '\0' or X'00' + * character. This allows to handle trimming set + * containing such characters. + */ + int char_cnt = sql_utf8_char_count(trim_set, trim_set_sz); + if (char_cnt == 0) { + *char_len = NULL; + return 0; + } + + if ((*char_len = (uint8_t *)contextMalloc(context, char_cnt)) == NULL) + return -1; + + int i = 0, j = 0; + while(j < char_cnt) { + int old_i = i; + SQL_UTF8_FWD_1(trim_set, i, trim_set_sz); + (*char_len)[j++] = i - old_i; + } + + return char_cnt; +} + +/** + * Normalize args from @a argv input array when it has one arg + * only. + * + * Case: TRIM(<str>) + * Call trimming procedure with TRIM_BOTH as the flags and " " as + * the trimming set. + */ +static void +trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv) +{ + assert(argc == 1); + (void) argc; + + const unsigned char *input_str; + if ((input_str = sql_value_text(argv[0])) == NULL) + return; + + int input_str_sz = sql_value_bytes(argv[0]); + uint8_t len_one = 1; + trim_procedure(context, TRIM_BOTH, (const unsigned char *) " ", + &len_one, 1, input_str, input_str_sz); +} + +/** + * Normalize args from @a argv input array when it has two args. + * + * Case: TRIM(<character_set> FROM <str>) + * If user has specified <character_set> only, call trimming + * procedure with TRIM_BOTH as the flags and that trimming set. + * + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) + * If user has specified side keyword only, then call trimming + * procedure with the specified side and " " as the trimming set. + */ +static void +trim_func_two_args(struct sql_context *context, int argc, sql_value **argv) +{ + assert(argc == 2); + (void) argc; + + const unsigned char *input_str, *trim_set; + if ((input_str = sql_value_text(argv[1])) == NULL) + return; + + int input_str_sz = sql_value_bytes(argv[1]); + if (sql_value_type(argv[0]) == SQL_INTEGER) { + uint8_t len_one = 1; + trim_procedure(context, sql_value_int(argv[0]), + (const unsigned char *) " ", &len_one, 1, + input_str, input_str_sz); + } else if ((trim_set = sql_value_text(argv[0])) != NULL) { + int trim_set_sz = sql_value_bytes(argv[0]); + uint8_t *char_len; + int char_cnt = trim_prepare_char_len(context, trim_set, + trim_set_sz, &char_len); + if (char_cnt == -1) + return; + trim_procedure(context, TRIM_BOTH, trim_set, char_len, char_cnt, + input_str, input_str_sz); + sql_free(char_len); + } +} + +/** + * Normalize args from @a argv input array when it has three args. + * + * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>) + * If user has specified side keyword and <character_set>, then + * call trimming procedure with that args. + */ +static void +trim_func_three_args(struct sql_context *context, int argc, sql_value **argv) +{ + assert(argc == 3); + (void) argc; + + assert(sql_value_type(argv[0]) == SQL_INTEGER); + const unsigned char *input_str, *trim_set; + if ((input_str = sql_value_text(argv[2])) == NULL || + (trim_set = sql_value_text(argv[1])) == NULL) + return; + + int trim_set_sz = sql_value_bytes(argv[1]); + int input_str_sz = sql_value_bytes(argv[2]); + uint8_t *char_len; + int char_cnt = trim_prepare_char_len(context, trim_set, trim_set_sz, + &char_len); + if (char_cnt == -1) + return; + trim_procedure(context, sql_value_int(argv[0]), trim_set, char_len, + char_cnt, input_str, input_str_sz); + sql_free(char_len); } #ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION @@ -1818,12 +1906,9 @@ sqlRegisterBuiltinFunctions(void) FIELD_TYPE_INTEGER), FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY, FIELD_TYPE_INTEGER), - FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc), - FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc), - FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc), - FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc), - FUNCTION_COLL(trim, 1, 3, 0, trimFunc), - FUNCTION_COLL(trim, 2, 3, 0, trimFunc), + FUNCTION_COLL(trim, 1, 3, 0, trim_func_one_arg), + FUNCTION_COLL(trim, 2, 3, 0, trim_func_two_args), + FUNCTION_COLL(trim, 3, 3, 0, trim_func_three_args), FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR), FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR), AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize, diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index 099daf512..a56ce7a10 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -1032,6 +1032,55 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); } %endif SQL_OMIT_CAST + +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { + A.pExpr = sqlExprFunction(pParse, Y, &X); + spanSet(&A, &X, &E); +} + +%type trim_operands {struct ExprList *} +%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);} + +trim_operands(A) ::= trim_from_clause(F) expr(Y). { + A = sql_expr_list_append(pParse->db, F, Y.pExpr); +} + +trim_operands(A) ::= expr(Y). { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} + +%type trim_from_clause {struct ExprList *} +%destructor trim_from_clause {sql_expr_list_delete(pParse->db, $$);} + +/* + * The following two rules cover three cases of keyword + * (LEADING/TRAILING/BOTH) and <trim_character_set> combination. + * The case when both of them are absent is disallowed. + */ +trim_from_clause(A) ::= expr(Y) FROM. { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); +} + +trim_from_clause(A) ::= trim_specification(N) expr_optional(Y) FROM. { + struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, + &sqlIntTokens[N]); + A = sql_expr_list_append(pParse->db, NULL, p); + if (Y != NULL) + A = sql_expr_list_append(pParse->db, A, Y); +} + +%type expr_optional {struct Expr *} +%destructor expr_optional {sql_expr_delete(pParse->db, $$, false);} + +expr_optional(A) ::= . { A = NULL; } +expr_optional(A) ::= expr(X). { A = X.pExpr; } + +%type trim_specification {enum trim_side_mask} + +trim_specification(A) ::= LEADING. { A = TRIM_LEADING; } +trim_specification(A) ::= TRAILING. { A = TRIM_TRAILING; } +trim_specification(A) ::= BOTH. { A = TRIM_BOTH; } + expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). { if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){ const char *err = @@ -1294,7 +1343,7 @@ expr(A) ::= EXISTS(B) LP select(Y) RP(E). { } /* CASE expressions */ -expr(A) ::= CASE(C) case_operand(X) case_exprlist(Y) case_else(Z) END(E). { +expr(A) ::= CASE(C) expr_optional(X) case_exprlist(Y) case_else(Z) END(E). { spanSet(&A,&C,&E); /*A-overwrites-C*/ A.pExpr = sqlPExpr(pParse, TK_CASE, X, 0); if( A.pExpr ){ @@ -1319,10 +1368,6 @@ case_exprlist(A) ::= WHEN expr(Y) THEN expr(Z). { %destructor case_else {sql_expr_delete(pParse->db, $$, false);} case_else(A) ::= ELSE expr(X). {A = X.pExpr;} case_else(A) ::= . {A = 0;} -%type case_operand {Expr*} -%destructor case_operand {sql_expr_delete(pParse->db, $$, false);} -case_operand(A) ::= expr(X). {A = X.pExpr; /*A-overwrites-X*/} -case_operand(A) ::= . {A = 0;} %type exprlist {ExprList*} %destructor exprlist {sql_expr_list_delete(pParse->db, $$);} diff --git a/src/box/sql/parse_def.c b/src/box/sql/parse_def.c index 49c76a326..aa1323cb2 100644 --- a/src/box/sql/parse_def.c +++ b/src/box/sql/parse_def.c @@ -34,7 +34,9 @@ const struct Token sqlIntTokens[] = { {"0", 1, false}, - {"1", 1, false} + {"1", 1, false}, + {"2", 1, false}, + {"3", 1, false}, }; void diff --git a/src/box/sql/parse_def.h b/src/box/sql/parse_def.h index a1af2bacd..5899a7e4e 100644 --- a/src/box/sql/parse_def.h +++ b/src/box/sql/parse_def.h @@ -87,7 +87,7 @@ struct Token { bool isReserved; }; -/** Constant tokens for values 0 and 1. */ +/** Constant tokens for integer values. */ extern const struct Token sqlIntTokens[]; /** Generate a Token object from a string. */ diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index b322602dc..d5a3e15c1 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -1680,6 +1680,17 @@ struct FuncDestructor { * single query - might change over time */ +/* + * Trim side mask components. TRIM_LEADING means to trim left side + * only. TRIM_TRAILING is to trim right side only. TRIM_BOTH is to + * trim both sides. + */ +enum trim_side_mask { + TRIM_LEADING = 1, + TRIM_TRAILING = 2, + TRIM_BOTH = TRIM_LEADING | TRIM_TRAILING +}; + /* * The following three macros, FUNCTION(), LIKEFUNC() and AGGREGATE() are * used to create the initializers for the FuncDef structures. diff --git a/test/sql-tap/badutf1.test.lua b/test/sql-tap/badutf1.test.lua index d104efaa9..9079dfe25 100755 --- a/test/sql-tap/badutf1.test.lua +++ b/test/sql-tap/badutf1.test.lua @@ -302,7 +302,7 @@ test:do_test( test:do_test( "badutf-4.1", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(TRIM('\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.1> "X", "F0" @@ -312,7 +312,7 @@ test:do_test( test:do_test( "badutf-4.2", function() - return test:execsql2("SELECT hex(ltrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(TRIM(LEADING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.2> "X", "F0808080FF" @@ -322,7 +322,7 @@ test:do_test( test:do_test( "badutf-4.3", function() - return test:execsql2("SELECT hex(rtrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") + return test:execsql2("SELECT hex(TRIM(TRAILING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.3> "X", "808080F0" @@ -332,7 +332,7 @@ test:do_test( test:do_test( "badutf-4.4", function() - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.4> "X", "808080F0808080FF" @@ -342,7 +342,7 @@ test:do_test( test:do_test( "badutf-4.5", function() - return test:execsql2("SELECT hex(trim('\xff\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\xff\x80\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.5> "X", "80F0808080FF" @@ -352,7 +352,7 @@ test:do_test( test:do_test( "badutf-4.6", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.6> "X", "F0808080FF" @@ -362,7 +362,7 @@ test:do_test( test:do_test( "badutf-4.7", function() - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80\x80')) AS x") + return test:execsql2("SELECT hex(TRIM('\xff\x80\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") end, { -- <badutf-4.7> "X", "FF80F0808080FF" diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua index 251cc3534..fe9a98191 100755 --- a/test/sql-tap/func.test.lua +++ b/test/sql-tap/func.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(14586) +test:plan(14590) --!./tcltestrunner.lua -- 2001 September 15 @@ -1915,7 +1915,7 @@ test:do_catchsql_test( SELECT trim(1,2,3) ]], { -- <func-22.1> - 1, "wrong number of arguments to function TRIM()" + 1, "Syntax error near ','" -- </func-22.1> }) @@ -1925,7 +1925,7 @@ test:do_catchsql_test( SELECT ltrim(1,2,3) ]], { -- <func-22.2> - 1, "wrong number of arguments to function LTRIM()" + 1, "Function 'LTRIM' does not exist" -- </func-22.2> }) @@ -1935,7 +1935,7 @@ test:do_catchsql_test( SELECT rtrim(1,2,3) ]], { -- <func-22.3> - 1, "wrong number of arguments to function RTRIM()" + 1, "Function 'RTRIM' does not exist" -- </func-22.3> }) @@ -1952,7 +1952,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.5", [[ - SELECT ltrim(' hi '); + SELECT TRIM(LEADING FROM ' hi '); ]], { -- <func-22.5> "hi " @@ -1962,7 +1962,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.6", [[ - SELECT rtrim(' hi '); + SELECT TRIM(TRAILING FROM ' hi '); ]], { -- <func-22.6> " hi" @@ -1972,7 +1972,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.7", [[ - SELECT trim(' hi ','xyz'); + SELECT TRIM('xyz' FROM ' hi '); ]], { -- <func-22.7> " hi " @@ -1982,7 +1982,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.8", [[ - SELECT ltrim(' hi ','xyz'); + SELECT TRIM(LEADING 'xyz' FROM ' hi '); ]], { -- <func-22.8> " hi " @@ -1992,7 +1992,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.9", [[ - SELECT rtrim(' hi ','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM ' hi '); ]], { -- <func-22.9> " hi " @@ -2002,7 +2002,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.10", [[ - SELECT trim('xyxzy hi zzzy','xyz'); + SELECT TRIM('xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.10> " hi " @@ -2012,7 +2012,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.11", [[ - SELECT ltrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(LEADING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.11> " hi zzzy" @@ -2022,7 +2022,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.12", [[ - SELECT rtrim('xyxzy hi zzzy','xyz'); + SELECT TRIM(TRAILING 'xyz' FROM 'xyxzy hi zzzy'); ]], { -- <func-22.12> "xyxzy hi " @@ -2032,7 +2032,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.13", [[ - SELECT trim(' hi ',''); + SELECT TRIM('' FROM ' hi '); ]], { -- <func-22.13> " hi " @@ -2043,7 +2043,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.14", [[ - SELECT hex(trim(x'c280e1bfbff48fbfbf6869',x'6162e1bfbfc280')) + SELECT hex(TRIM(x'6162e1bfbfc280' FROM x'c280e1bfbff48fbfbf6869')) ]], { -- <func-22.14> "F48FBFBF6869" @@ -2052,8 +2052,8 @@ test:do_execsql_test( test:do_execsql_test( "func-22.15", - [[SELECT hex(trim(x'6869c280e1bfbff48fbfbf61', - x'6162e1bfbfc280f48fbfbf'))]], { + [[SELECT hex(TRIM(x'6162e1bfbfc280f48fbfbf' + FROM x'6869c280e1bfbff48fbfbf61'))]], { -- <func-22.15> "6869" -- </func-22.15> @@ -2062,7 +2062,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.16", [[ - SELECT hex(trim(x'ceb1ceb2ceb3',x'ceb1')); + SELECT hex(TRIM(x'ceb1' FROM x'ceb1ceb2ceb3')); ]], { -- <func-22.16> "CEB2CEB3" @@ -2083,7 +2083,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.21", [[ - SELECT typeof(trim(NULL,'xyz')); + SELECT typeof(TRIM('xyz' FROM NULL)); ]], { -- <func-22.21> "null" @@ -2093,7 +2093,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.22", [[ - SELECT typeof(trim('hello',NULL)); + SELECT typeof(TRIM(NULL FROM 'hello')); ]], { -- <func-22.22> "null" @@ -2105,7 +2105,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.23", [[ - SELECT TRIM(X'004100', X'00'); + SELECT TRIM(X'00' FROM X'004100'); ]], { -- <func-22.23> "A" @@ -2115,7 +2115,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.24", [[ - SELECT TRIM(X'004100', X'0000'); + SELECT TRIM(X'0000' FROM X'004100'); ]], { -- <func-22.24> "A" @@ -2125,7 +2125,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.25", [[ - SELECT TRIM(X'004100', X'0042'); + SELECT TRIM(X'0042' FROM X'004100'); ]], { -- <func-22.25> "A" @@ -2135,7 +2135,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.26", [[ - SELECT TRIM(X'00004100420000', X'00'); + SELECT TRIM(X'00' FROM X'00004100420000'); ]], { -- <func-22.26> "A\0B" @@ -2145,7 +2145,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.27", [[ - SELECT LTRIM(X'004100', X'00'); + SELECT TRIM(LEADING X'00' FROM X'004100'); ]], { -- <func-22.27> "A\0" @@ -2155,7 +2155,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.28", [[ - SELECT LTRIM(X'004100', X'0000'); + SELECT TRIM(LEADING X'0000' FROM X'004100'); ]], { -- <func-22.28> "A\0" @@ -2165,7 +2165,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.29", [[ - SELECT LTRIM(X'004100', X'0042'); + SELECT TRIM(LEADING X'0042' FROM X'004100'); ]], { -- <func-22.29> "A\0" @@ -2175,7 +2175,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.30", [[ - SELECT LTRIM(X'00004100420000', X'00'); + SELECT TRIM(LEADING X'00' FROM X'00004100420000'); ]], { -- <func-22.30> "A\0B\0\0" @@ -2185,7 +2185,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.31", [[ - SELECT RTRIM(X'004100', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'004100'); ]], { -- <func-22.31> "\0A" @@ -2195,7 +2195,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.32", [[ - SELECT RTRIM(X'004100', X'0000'); + SELECT TRIM(TRAILING X'0000' FROM X'004100'); ]], { -- <func-22.32> "\0A" @@ -2205,7 +2205,7 @@ test:do_execsql_test( test:do_execsql_test( "func-22.33", [[ - SELECT RTRIM(X'004100', X'0042'); + SELECT TRIM(TRAILING X'0042' FROM X'004100'); ]], { -- <func-22.33> "\0A" @@ -2215,13 +2215,56 @@ test:do_execsql_test( test:do_execsql_test( "func-22.34", [[ - SELECT RTRIM(X'00004100420000', X'00'); + SELECT TRIM(TRAILING X'00' FROM X'00004100420000'); ]], { -- <func-22.34> "\0\0A\0B" -- </func-22.34> }) +-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and +-- FROM without any agrs before. LEADING and TRAILING keywords is +-- checked above. + +test:do_execsql_test( + "func-22.35", + [[ + SELECT TRIM(BOTH FROM ' hi '); + ]], { + -- <func-22.35> + "hi" + -- </func-22.35> + }) +test:do_execsql_test( + "func-22.36", + [[ + SELECT TRIM(BOTH 'xyz' FROM ' hi '); + ]], { + -- <func-22.36> + " hi " + -- </func-22.36> + }) + +test:do_execsql_test( + "func-22.37", + [[ + SELECT TRIM(BOTH 'xyz' FROM 'xyxzy hi zzzy'); + ]], { + -- <func-22.37> + " hi " + -- </func-22.37> + }) + +test:do_catchsql_test( + "func-22.38", + [[ + SELECT TRIM(FROM 'xyxzy'); + ]], { + -- <func-22.38> + 1, "Syntax error near 'FROM'" + -- </func-22.38> + }) + -- This is to test the deprecated sql_aggregate_count() API. -- --test:do_test( @@ -2838,16 +2881,16 @@ test:do_execsql_test( "SELECT TRIM(CHAR(32,00,32,00,32));", {string.char(00,32,00)}) --- LTRIM +-- LEFT TRIM test:do_execsql_test( "func-70", - "SELECT LTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(LEADING FROM CHAR(32,00,32,00,32));", {string.char(00,32,00,32)}) --- RTRIM +-- RIGHT TRIM test:do_execsql_test( "func-71", - "SELECT RTRIM(CHAR(32,00,32,00,32));", + "SELECT TRIM(TRAILING FROM CHAR(32,00,32,00,32));", {string.char(32,00,32,00)}) -- GROUP_CONCAT diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua index 495aa4ee4..ec45e5e76 100755 --- a/test/sql-tap/with1.test.lua +++ b/test/sql-tap/with1.test.lua @@ -550,7 +550,7 @@ test:do_execsql_test("8.1-mandelbrot", [[ SELECT group_concat( substr(' .+*#', 1+min(iter/7,4), 1), '') FROM m2 GROUP BY cy ) - SELECT group_concat(rtrim(t),x'0a') FROM a; + SELECT group_concat(TRIM(TRAILING FROM t),x'0a') FROM a; ]], { -- <8.1-mandelbrot> [[ ....# ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-23 1:04 ` Roman Khabibov @ 2019-04-23 8:59 ` Vladislav Shpilevoy 0 siblings, 0 replies; 14+ messages in thread From: Vladislav Shpilevoy @ 2019-04-23 8:59 UTC (permalink / raw) To: Roman Khabibov, tarantool-patches, Kirill Yukhin LGTM. On 23/04/2019 04:04, Roman Khabibov wrote: > Hello! Of course, I agree with you. > >> On Apr 22, 2019, at 9:22 PM, Vladislav Shpilevoy <v.shpilevoy@tarantool.org> wrote: >> >> Hi! Thanks for the fixes! I've applied my >> fixes and pushed on top of the branch. Please, >> look at them and either squash, or lets discuss >> where you do not agree. Otherwise it will LGTM. > > commit 368f588a6200653adebf20372e1e64c0fae8b9f3 > Author: Roman Khabibov <roman.habibov@tarantool.org> > Date: Thu Mar 28 14:01:33 2019 +0300 > > sql: modify TRIM() function signature > > According to the ANSI standard, ltrim, rtrim and trim should > be merged into one unified TRIM() function. The specialization of > trimming (left, right or both and trimming characters) determined > in arguments of this function. > > Closes #3879 > > @TarantoolBot document > Title: TRIM() function > > Modify signature of SQL function TRIM(). This function removes > characters included in <trim character> (binary) string from > <trim source> (binary) string until encounter a character that doesn't > belong to <trim character>. Removal occurs on the side, specified by > <trim specification>. Now, syntax is following: > TRIM([ [ <trim specification> ] [ <trim character> ] FROM ] <trim source>). > > <trim specification> can be one of the following keywords: LEADING, > TRAILING and BOTH. > <trim character> is the set of trimming characters. > <trim source> is the string, that will be trimmed. > If FROM is specified, then: > 1) Either <trim specification> or <trim character> or both shall be > specified. > 2) If <trim specification> is not specified, then BOTH is implicit. > 3) If <trim character> is not specified, then ' ' is implicit. > > diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c > index be7bd5545..76e3265e7 100644 > --- a/extra/mkkeywordhash.c > +++ b/extra/mkkeywordhash.c > @@ -278,6 +278,10 @@ static Keyword aKeywordTable[] = { > { "WHILE", "TK_STANDARD", RESERVED, true }, > { "TEXT", "TK_TEXT", RESERVED, true }, > { "TRUNCATE", "TK_TRUNCATE", ALWAYS, true }, > + { "TRIM", "TK_TRIM", ALWAYS, true }, > + { "LEADING", "TK_LEADING", ALWAYS, true }, > + { "TRAILING", "TK_TRAILING", ALWAYS, true }, > + { "BOTH", "TK_BOTH", ALWAYS, true }, > }; > > /* Number of keywords */ > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > index abeecefa1..2bbb2ad4a 100644 > --- a/src/box/sql/func.c > +++ b/src/box/sql/func.c > @@ -1286,108 +1286,196 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) > sql_result_text(context, (char *)zOut, j, sql_free); > } > > -/* > - * Implementation of the TRIM(), LTRIM(), and RTRIM() functions. > - * The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both. > +/** > + * Remove characters included in @a trim_set from @a input_str > + * until encounter a character that doesn't belong to @a trim_set. > + * Remove from the side specified by @a flags. > + * @param context SQL context. > + * @param flags Trim specification: left, right or both. > + * @param trim_set The set of characters for trimming. > + * @param char_len Lengths of each UTF-8 character in @a trim_set. > + * @param char_cnt A number of UTF-8 characters in @a trim_set. > + * @param input_str Input string for trimming. > + * @param input_str_sz Input string size in bytes. > */ > static void > -trimFunc(sql_context * context, int argc, sql_value ** argv) > +trim_procedure(struct sql_context *context, enum trim_side_mask flags, > + const unsigned char *trim_set, const uint8_t *char_len, > + int char_cnt, const unsigned char *input_str, int input_str_sz) > { > - const unsigned char *zIn; /* Input string */ > - const unsigned char *zCharSet; /* Set of characters to trim */ > - int nIn; /* Number of bytes in input */ > - int flags; /* 1: trimleft 2: trimright 3: trim */ > - int i; /* Loop counter */ > - unsigned char *aLen = 0; /* Length of each character in zCharSet */ > - unsigned char **azChar = 0; /* Individual characters in zCharSet */ > - int nChar; /* Number of characters in zCharSet */ > - > - if (sql_value_type(argv[0]) == SQL_NULL) { > - return; > - } > - zIn = sql_value_text(argv[0]); > - if (zIn == 0) > - return; > - nIn = sql_value_bytes(argv[0]); > - assert(zIn == sql_value_text(argv[0])); > - if (argc == 1) { > - static const unsigned char lenOne[] = { 1 }; > - static unsigned char *const azOne[] = { (u8 *) " " }; > - nChar = 1; > - aLen = (u8 *) lenOne; > - azChar = (unsigned char **)azOne; > - zCharSet = 0; > - } else if ((zCharSet = sql_value_text(argv[1])) == 0) { > - return; > - } else { > - const unsigned char *z = zCharSet; > - int trim_set_sz = sql_value_bytes(argv[1]); > - /* > - * Count the number of UTF-8 characters passing > - * through the entire char set, but not up > - * to the '\0' or X'00' character. This allows > - * to handle trimming set containing such > - * characters. > - */ > - nChar = sql_utf8_char_count(z, trim_set_sz); > - if (nChar > 0) { > - azChar = > - contextMalloc(context, > - ((i64) nChar) * (sizeof(char *) + 1)); > - if (azChar == 0) { > - return; > - } > - aLen = (unsigned char *)&azChar[nChar]; > - z = zCharSet; > - i = 0; > - nChar = 0; > - int handled_bytes_cnt = trim_set_sz; > - while(handled_bytes_cnt > 0) { > - azChar[nChar] = (unsigned char *)(z + i); > - SQL_UTF8_FWD_1(z, i, trim_set_sz); > - aLen[nChar] = (u8) (z + i - azChar[nChar]); > - handled_bytes_cnt -= aLen[nChar]; > - nChar++; > - } > - } > - } > - if (nChar > 0) { > - flags = SQL_PTR_TO_INT(sql_user_data(context)); > - if (flags & 1) { > - while (nIn > 0) { > - int len = 0; > - for (i = 0; i < nChar; i++) { > - len = aLen[i]; > - if (len <= nIn > - && memcmp(zIn, azChar[i], len) == 0) > - break; > - } > - if (i >= nChar) > + if (char_cnt == 0) > + goto finish; > + int i, len; > + const unsigned char *z; > + if ((flags & TRIM_LEADING) != 0) { > + while (input_str_sz > 0) { > + z = trim_set; > + for (i = 0; i < char_cnt; ++i, z += len) { > + len = char_len[i]; > + if (len <= input_str_sz > + && memcmp(input_str, z, len) == 0) > break; > - zIn += len; > - nIn -= len; > } > + if (i >= char_cnt) > + break; > + input_str += len; > + input_str_sz -= len; > } > - if (flags & 2) { > - while (nIn > 0) { > - int len = 0; > - for (i = 0; i < nChar; i++) { > - len = aLen[i]; > - if (len <= nIn > - && memcmp(&zIn[nIn - len], > - azChar[i], len) == 0) > - break; > - } > - if (i >= nChar) > + } > + if ((flags & TRIM_TRAILING) != 0) { > + while (input_str_sz > 0) { > + z = trim_set; > + for (i = 0; i < char_cnt; ++i, z += len) { > + len = char_len[i]; > + if (len <= input_str_sz > + && memcmp(&input_str[input_str_sz - len], > + z, len) == 0) > break; > - nIn -= len; > } > + if (i >= char_cnt) > + break; > + input_str_sz -= len; > } > - if (zCharSet) { > - sql_free(azChar); > - } > } > - sql_result_text(context, (char *)zIn, nIn, SQL_TRANSIENT); > +finish: > + sql_result_text(context, (char *)input_str, input_str_sz, > + SQL_TRANSIENT); > +} > + > +/** > + * Prepare arguments for trimming procedure. Allocate memory for > + * @a char_len (array of lengths each character in @a trim_set) > + * and fill it. > + * > + * @param context SQL context. > + * @param trim_set The set of characters for trimming. > + * @param[out] char_len Lengths of each character in @ trim_set. > + * @retval >=0 A number of UTF-8 characters in @a trim_set. > + * @retval -1 Memory allocation error. > + */ > +static int > +trim_prepare_char_len(struct sql_context *context, > + const unsigned char *trim_set, int trim_set_sz, > + uint8_t **char_len) > +{ > + /* > + * Count the number of UTF-8 characters passing through > + * the entire char set, but not up to the '\0' or X'00' > + * character. This allows to handle trimming set > + * containing such characters. > + */ > + int char_cnt = sql_utf8_char_count(trim_set, trim_set_sz); > + if (char_cnt == 0) { > + *char_len = NULL; > + return 0; > + } > + > + if ((*char_len = (uint8_t *)contextMalloc(context, char_cnt)) == NULL) > + return -1; > + > + int i = 0, j = 0; > + while(j < char_cnt) { > + int old_i = i; > + SQL_UTF8_FWD_1(trim_set, i, trim_set_sz); > + (*char_len)[j++] = i - old_i; > + } > + > + return char_cnt; > +} > + > +/** > + * Normalize args from @a argv input array when it has one arg > + * only. > + * > + * Case: TRIM(<str>) > + * Call trimming procedure with TRIM_BOTH as the flags and " " as > + * the trimming set. > + */ > +static void > +trim_func_one_arg(struct sql_context *context, int argc, sql_value **argv) > +{ > + assert(argc == 1); > + (void) argc; > + > + const unsigned char *input_str; > + if ((input_str = sql_value_text(argv[0])) == NULL) > + return; > + > + int input_str_sz = sql_value_bytes(argv[0]); > + uint8_t len_one = 1; > + trim_procedure(context, TRIM_BOTH, (const unsigned char *) " ", > + &len_one, 1, input_str, input_str_sz); > +} > + > +/** > + * Normalize args from @a argv input array when it has two args. > + * > + * Case: TRIM(<character_set> FROM <str>) > + * If user has specified <character_set> only, call trimming > + * procedure with TRIM_BOTH as the flags and that trimming set. > + * > + * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) > + * If user has specified side keyword only, then call trimming > + * procedure with the specified side and " " as the trimming set. > + */ > +static void > +trim_func_two_args(struct sql_context *context, int argc, sql_value **argv) > +{ > + assert(argc == 2); > + (void) argc; > + > + const unsigned char *input_str, *trim_set; > + if ((input_str = sql_value_text(argv[1])) == NULL) > + return; > + > + int input_str_sz = sql_value_bytes(argv[1]); > + if (sql_value_type(argv[0]) == SQL_INTEGER) { > + uint8_t len_one = 1; > + trim_procedure(context, sql_value_int(argv[0]), > + (const unsigned char *) " ", &len_one, 1, > + input_str, input_str_sz); > + } else if ((trim_set = sql_value_text(argv[0])) != NULL) { > + int trim_set_sz = sql_value_bytes(argv[0]); > + uint8_t *char_len; > + int char_cnt = trim_prepare_char_len(context, trim_set, > + trim_set_sz, &char_len); > + if (char_cnt == -1) > + return; > + trim_procedure(context, TRIM_BOTH, trim_set, char_len, char_cnt, > + input_str, input_str_sz); > + sql_free(char_len); > + } > +} > + > +/** > + * Normalize args from @a argv input array when it has three args. > + * > + * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>) > + * If user has specified side keyword and <character_set>, then > + * call trimming procedure with that args. > + */ > +static void > +trim_func_three_args(struct sql_context *context, int argc, sql_value **argv) > +{ > + assert(argc == 3); > + (void) argc; > + > + assert(sql_value_type(argv[0]) == SQL_INTEGER); > + const unsigned char *input_str, *trim_set; > + if ((input_str = sql_value_text(argv[2])) == NULL || > + (trim_set = sql_value_text(argv[1])) == NULL) > + return; > + > + int trim_set_sz = sql_value_bytes(argv[1]); > + int input_str_sz = sql_value_bytes(argv[2]); > + uint8_t *char_len; > + int char_cnt = trim_prepare_char_len(context, trim_set, trim_set_sz, > + &char_len); > + if (char_cnt == -1) > + return; > + trim_procedure(context, sql_value_int(argv[0]), trim_set, char_len, > + char_cnt, input_str, input_str_sz); > + sql_free(char_len); > } > > #ifdef SQL_ENABLE_UNKNOWN_SQL_FUNCTION > @@ -1818,12 +1906,9 @@ sqlRegisterBuiltinFunctions(void) > FIELD_TYPE_INTEGER), > FUNCTION2(likely, 1, 0, 0, noopFunc, SQL_FUNC_UNLIKELY, > FIELD_TYPE_INTEGER), > - FUNCTION_COLL(ltrim, 1, 1, 0, trimFunc), > - FUNCTION_COLL(ltrim, 2, 1, 0, trimFunc), > - FUNCTION_COLL(rtrim, 1, 2, 0, trimFunc), > - FUNCTION_COLL(rtrim, 2, 2, 0, trimFunc), > - FUNCTION_COLL(trim, 1, 3, 0, trimFunc), > - FUNCTION_COLL(trim, 2, 3, 0, trimFunc), > + FUNCTION_COLL(trim, 1, 3, 0, trim_func_one_arg), > + FUNCTION_COLL(trim, 2, 3, 0, trim_func_two_args), > + FUNCTION_COLL(trim, 3, 3, 0, trim_func_three_args), > FUNCTION(min, -1, 0, 1, minmaxFunc, FIELD_TYPE_SCALAR), > FUNCTION(min, 0, 0, 1, 0, FIELD_TYPE_SCALAR), > AGGREGATE2(min, 1, 0, 1, minmaxStep, minMaxFinalize, > diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y > index 099daf512..a56ce7a10 100644 > --- a/src/box/sql/parse.y > +++ b/src/box/sql/parse.y > @@ -1032,6 +1032,55 @@ expr(A) ::= CAST(X) LP expr(E) AS typedef(T) RP(Y). { > sqlExprAttachSubtrees(pParse->db, A.pExpr, E.pExpr, 0); > } > %endif SQL_OMIT_CAST > + > +expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { > + A.pExpr = sqlExprFunction(pParse, Y, &X); > + spanSet(&A, &X, &E); > +} > + > +%type trim_operands {struct ExprList *} > +%destructor trim_operands {sql_expr_list_delete(pParse->db, $$);} > + > +trim_operands(A) ::= trim_from_clause(F) expr(Y). { > + A = sql_expr_list_append(pParse->db, F, Y.pExpr); > +} > + > +trim_operands(A) ::= expr(Y). { > + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); > +} > + > +%type trim_from_clause {struct ExprList *} > +%destructor trim_from_clause {sql_expr_list_delete(pParse->db, $$);} > + > +/* > + * The following two rules cover three cases of keyword > + * (LEADING/TRAILING/BOTH) and <trim_character_set> combination. > + * The case when both of them are absent is disallowed. > + */ > +trim_from_clause(A) ::= expr(Y) FROM. { > + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); > +} > + > +trim_from_clause(A) ::= trim_specification(N) expr_optional(Y) FROM. { > + struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, > + &sqlIntTokens[N]); > + A = sql_expr_list_append(pParse->db, NULL, p); > + if (Y != NULL) > + A = sql_expr_list_append(pParse->db, A, Y); > +} > + > +%type expr_optional {struct Expr *} > +%destructor expr_optional {sql_expr_delete(pParse->db, $$, false);} > + > +expr_optional(A) ::= . { A = NULL; } > +expr_optional(A) ::= expr(X). { A = X.pExpr; } > + > +%type trim_specification {enum trim_side_mask} > + > +trim_specification(A) ::= LEADING. { A = TRIM_LEADING; } > +trim_specification(A) ::= TRAILING. { A = TRIM_TRAILING; } > +trim_specification(A) ::= BOTH. { A = TRIM_BOTH; } > + > expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). { > if( Y && Y->nExpr>pParse->db->aLimit[SQL_LIMIT_FUNCTION_ARG] ){ > const char *err = > @@ -1294,7 +1343,7 @@ expr(A) ::= EXISTS(B) LP select(Y) RP(E). { > } > > /* CASE expressions */ > -expr(A) ::= CASE(C) case_operand(X) case_exprlist(Y) case_else(Z) END(E). { > +expr(A) ::= CASE(C) expr_optional(X) case_exprlist(Y) case_else(Z) END(E). { > spanSet(&A,&C,&E); /*A-overwrites-C*/ > A.pExpr = sqlPExpr(pParse, TK_CASE, X, 0); > if( A.pExpr ){ > @@ -1319,10 +1368,6 @@ case_exprlist(A) ::= WHEN expr(Y) THEN expr(Z). { > %destructor case_else {sql_expr_delete(pParse->db, $$, false);} > case_else(A) ::= ELSE expr(X). {A = X.pExpr;} > case_else(A) ::= . {A = 0;} > -%type case_operand {Expr*} > -%destructor case_operand {sql_expr_delete(pParse->db, $$, false);} > -case_operand(A) ::= expr(X). {A = X.pExpr; /*A-overwrites-X*/} > -case_operand(A) ::= . {A = 0;} > > %type exprlist {ExprList*} > %destructor exprlist {sql_expr_list_delete(pParse->db, $$);} > diff --git a/src/box/sql/parse_def.c b/src/box/sql/parse_def.c > index 49c76a326..aa1323cb2 100644 > --- a/src/box/sql/parse_def.c > +++ b/src/box/sql/parse_def.c > @@ -34,7 +34,9 @@ > > const struct Token sqlIntTokens[] = { > {"0", 1, false}, > - {"1", 1, false} > + {"1", 1, false}, > + {"2", 1, false}, > + {"3", 1, false}, > }; > > void > diff --git a/src/box/sql/parse_def.h b/src/box/sql/parse_def.h > index a1af2bacd..5899a7e4e 100644 > --- a/src/box/sql/parse_def.h > +++ b/src/box/sql/parse_def.h > @@ -87,7 +87,7 @@ struct Token { > bool isReserved; > }; > > -/** Constant tokens for values 0 and 1. */ > +/** Constant tokens for integer values. */ > extern const struct Token sqlIntTokens[]; > > /** Generate a Token object from a string. */ > diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h > index b322602dc..d5a3e15c1 100644 > --- a/src/box/sql/sqlInt.h > +++ b/src/box/sql/sqlInt.h > @@ -1680,6 +1680,17 @@ struct FuncDestructor { > * single query - might change over time > */ > > +/* > + * Trim side mask components. TRIM_LEADING means to trim left side > + * only. TRIM_TRAILING is to trim right side only. TRIM_BOTH is to > + * trim both sides. > + */ > +enum trim_side_mask { > + TRIM_LEADING = 1, > + TRIM_TRAILING = 2, > + TRIM_BOTH = TRIM_LEADING | TRIM_TRAILING > +}; > + > /* > * The following three macros, FUNCTION(), LIKEFUNC() and AGGREGATE() are > * used to create the initializers for the FuncDef structures. > diff --git a/test/sql-tap/badutf1.test.lua b/test/sql-tap/badutf1.test.lua > index d104efaa9..9079dfe25 100755 > --- a/test/sql-tap/badutf1.test.lua > +++ b/test/sql-tap/badutf1.test.lua > @@ -302,7 +302,7 @@ test:do_test( > test:do_test( > "badutf-4.1", > function() > - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") > + return test:execsql2("SELECT hex(TRIM('\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") > end, { > -- <badutf-4.1> > "X", "F0" > @@ -312,7 +312,7 @@ test:do_test( > test:do_test( > "badutf-4.2", > function() > - return test:execsql2("SELECT hex(ltrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") > + return test:execsql2("SELECT hex(TRIM(LEADING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") > end, { > -- <badutf-4.2> > "X", "F0808080FF" > @@ -322,7 +322,7 @@ test:do_test( > test:do_test( > "badutf-4.3", > function() > - return test:execsql2("SELECT hex(rtrim('\x80\x80\x80\xf0\x80\x80\x80\xff','\x80\xff')) AS x") > + return test:execsql2("SELECT hex(TRIM(TRAILING '\x80\xff' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") > end, { > -- <badutf-4.3> > "X", "808080F0" > @@ -332,7 +332,7 @@ test:do_test( > test:do_test( > "badutf-4.4", > function() > - return test:execsql2("SELECT hex(trim('\x80\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") > + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\x80\x80\x80\xf0\x80\x80\x80\xff')) AS x") > end, { > -- <badutf-4.4> > "X", "808080F0808080FF" > @@ -342,7 +342,7 @@ test:do_test( > test:do_test( > "badutf-4.5", > function() > - return test:execsql2("SELECT hex(trim('\xff\x80\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") > + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\xff\x80\x80\xf0\x80\x80\x80\xff')) AS x") > end, { > -- <badutf-4.5> > "X", "80F0808080FF" > @@ -352,7 +352,7 @@ test:do_test( > test:do_test( > "badutf-4.6", > function() > - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80')) AS x") > + return test:execsql2("SELECT hex(TRIM('\xff\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") > end, { > -- <badutf-4.6> > "X", "F0808080FF" > @@ -362,7 +362,7 @@ test:do_test( > test:do_test( > "badutf-4.7", > function() > - return test:execsql2("SELECT hex(trim('\xff\x80\xf0\x80\x80\x80\xff','\xff\x80\x80')) AS x") > + return test:execsql2("SELECT hex(TRIM('\xff\x80\x80' FROM '\xff\x80\xf0\x80\x80\x80\xff')) AS x") > end, { > -- <badutf-4.7> > "X", "FF80F0808080FF" > diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua > index 251cc3534..fe9a98191 100755 > --- a/test/sql-tap/func.test.lua > +++ b/test/sql-tap/func.test.lua > @@ -1,6 +1,6 @@ > #!/usr/bin/env tarantool > test = require("sqltester") > -test:plan(14586) > +test:plan(14590) > > --!./tcltestrunner.lua > -- 2001 September 15 > @@ -1915,7 +1915,7 @@ test:do_catchsql_test( > SELECT trim(1,2,3) > ]], { > -- <func-22.1> > - 1, "wrong number of arguments to function TRIM()" > + 1, "Syntax error near ','" > -- </func-22.1> > }) > > @@ -1925,7 +1925,7 @@ test:do_catchsql_test( > SELECT ltrim(1,2,3) > ]], { > -- <func-22.2> > - 1, "wrong number of arguments to function LTRIM()" > + 1, "Function 'LTRIM' does not exist" > -- </func-22.2> > }) > > @@ -1935,7 +1935,7 @@ test:do_catchsql_test( > SELECT rtrim(1,2,3) > ]], { > -- <func-22.3> > - 1, "wrong number of arguments to function RTRIM()" > + 1, "Function 'RTRIM' does not exist" > -- </func-22.3> > }) > > @@ -1952,7 +1952,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.5", > [[ > - SELECT ltrim(' hi '); > + SELECT TRIM(LEADING FROM ' hi '); > ]], { > -- <func-22.5> > "hi " > @@ -1962,7 +1962,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.6", > [[ > - SELECT rtrim(' hi '); > + SELECT TRIM(TRAILING FROM ' hi '); > ]], { > -- <func-22.6> > " hi" > @@ -1972,7 +1972,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.7", > [[ > - SELECT trim(' hi ','xyz'); > + SELECT TRIM('xyz' FROM ' hi '); > ]], { > -- <func-22.7> > " hi " > @@ -1982,7 +1982,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.8", > [[ > - SELECT ltrim(' hi ','xyz'); > + SELECT TRIM(LEADING 'xyz' FROM ' hi '); > ]], { > -- <func-22.8> > " hi " > @@ -1992,7 +1992,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.9", > [[ > - SELECT rtrim(' hi ','xyz'); > + SELECT TRIM(TRAILING 'xyz' FROM ' hi '); > ]], { > -- <func-22.9> > " hi " > @@ -2002,7 +2002,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.10", > [[ > - SELECT trim('xyxzy hi zzzy','xyz'); > + SELECT TRIM('xyz' FROM 'xyxzy hi zzzy'); > ]], { > -- <func-22.10> > " hi " > @@ -2012,7 +2012,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.11", > [[ > - SELECT ltrim('xyxzy hi zzzy','xyz'); > + SELECT TRIM(LEADING 'xyz' FROM 'xyxzy hi zzzy'); > ]], { > -- <func-22.11> > " hi zzzy" > @@ -2022,7 +2022,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.12", > [[ > - SELECT rtrim('xyxzy hi zzzy','xyz'); > + SELECT TRIM(TRAILING 'xyz' FROM 'xyxzy hi zzzy'); > ]], { > -- <func-22.12> > "xyxzy hi " > @@ -2032,7 +2032,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.13", > [[ > - SELECT trim(' hi ',''); > + SELECT TRIM('' FROM ' hi '); > ]], { > -- <func-22.13> > " hi " > @@ -2043,7 +2043,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.14", > [[ > - SELECT hex(trim(x'c280e1bfbff48fbfbf6869',x'6162e1bfbfc280')) > + SELECT hex(TRIM(x'6162e1bfbfc280' FROM x'c280e1bfbff48fbfbf6869')) > ]], { > -- <func-22.14> > "F48FBFBF6869" > @@ -2052,8 +2052,8 @@ test:do_execsql_test( > > test:do_execsql_test( > "func-22.15", > - [[SELECT hex(trim(x'6869c280e1bfbff48fbfbf61', > - x'6162e1bfbfc280f48fbfbf'))]], { > + [[SELECT hex(TRIM(x'6162e1bfbfc280f48fbfbf' > + FROM x'6869c280e1bfbff48fbfbf61'))]], { > -- <func-22.15> > "6869" > -- </func-22.15> > @@ -2062,7 +2062,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.16", > [[ > - SELECT hex(trim(x'ceb1ceb2ceb3',x'ceb1')); > + SELECT hex(TRIM(x'ceb1' FROM x'ceb1ceb2ceb3')); > ]], { > -- <func-22.16> > "CEB2CEB3" > @@ -2083,7 +2083,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.21", > [[ > - SELECT typeof(trim(NULL,'xyz')); > + SELECT typeof(TRIM('xyz' FROM NULL)); > ]], { > -- <func-22.21> > "null" > @@ -2093,7 +2093,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.22", > [[ > - SELECT typeof(trim('hello',NULL)); > + SELECT typeof(TRIM(NULL FROM 'hello')); > ]], { > -- <func-22.22> > "null" > @@ -2105,7 +2105,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.23", > [[ > - SELECT TRIM(X'004100', X'00'); > + SELECT TRIM(X'00' FROM X'004100'); > ]], { > -- <func-22.23> > "A" > @@ -2115,7 +2115,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.24", > [[ > - SELECT TRIM(X'004100', X'0000'); > + SELECT TRIM(X'0000' FROM X'004100'); > ]], { > -- <func-22.24> > "A" > @@ -2125,7 +2125,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.25", > [[ > - SELECT TRIM(X'004100', X'0042'); > + SELECT TRIM(X'0042' FROM X'004100'); > ]], { > -- <func-22.25> > "A" > @@ -2135,7 +2135,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.26", > [[ > - SELECT TRIM(X'00004100420000', X'00'); > + SELECT TRIM(X'00' FROM X'00004100420000'); > ]], { > -- <func-22.26> > "A\0B" > @@ -2145,7 +2145,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.27", > [[ > - SELECT LTRIM(X'004100', X'00'); > + SELECT TRIM(LEADING X'00' FROM X'004100'); > ]], { > -- <func-22.27> > "A\0" > @@ -2155,7 +2155,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.28", > [[ > - SELECT LTRIM(X'004100', X'0000'); > + SELECT TRIM(LEADING X'0000' FROM X'004100'); > ]], { > -- <func-22.28> > "A\0" > @@ -2165,7 +2165,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.29", > [[ > - SELECT LTRIM(X'004100', X'0042'); > + SELECT TRIM(LEADING X'0042' FROM X'004100'); > ]], { > -- <func-22.29> > "A\0" > @@ -2175,7 +2175,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.30", > [[ > - SELECT LTRIM(X'00004100420000', X'00'); > + SELECT TRIM(LEADING X'00' FROM X'00004100420000'); > ]], { > -- <func-22.30> > "A\0B\0\0" > @@ -2185,7 +2185,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.31", > [[ > - SELECT RTRIM(X'004100', X'00'); > + SELECT TRIM(TRAILING X'00' FROM X'004100'); > ]], { > -- <func-22.31> > "\0A" > @@ -2195,7 +2195,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.32", > [[ > - SELECT RTRIM(X'004100', X'0000'); > + SELECT TRIM(TRAILING X'0000' FROM X'004100'); > ]], { > -- <func-22.32> > "\0A" > @@ -2205,7 +2205,7 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.33", > [[ > - SELECT RTRIM(X'004100', X'0042'); > + SELECT TRIM(TRAILING X'0042' FROM X'004100'); > ]], { > -- <func-22.33> > "\0A" > @@ -2215,13 +2215,56 @@ test:do_execsql_test( > test:do_execsql_test( > "func-22.34", > [[ > - SELECT RTRIM(X'00004100420000', X'00'); > + SELECT TRIM(TRAILING X'00' FROM X'00004100420000'); > ]], { > -- <func-22.34> > "\0\0A\0B" > -- </func-22.34> > }) > > +-- gh-3879 Check new TRIM() grammar, particularly BOTH keyword and > +-- FROM without any agrs before. LEADING and TRAILING keywords is > +-- checked above. > + > +test:do_execsql_test( > + "func-22.35", > + [[ > + SELECT TRIM(BOTH FROM ' hi '); > + ]], { > + -- <func-22.35> > + "hi" > + -- </func-22.35> > + }) > +test:do_execsql_test( > + "func-22.36", > + [[ > + SELECT TRIM(BOTH 'xyz' FROM ' hi '); > + ]], { > + -- <func-22.36> > + " hi " > + -- </func-22.36> > + }) > + > +test:do_execsql_test( > + "func-22.37", > + [[ > + SELECT TRIM(BOTH 'xyz' FROM 'xyxzy hi zzzy'); > + ]], { > + -- <func-22.37> > + " hi " > + -- </func-22.37> > + }) > + > +test:do_catchsql_test( > + "func-22.38", > + [[ > + SELECT TRIM(FROM 'xyxzy'); > + ]], { > + -- <func-22.38> > + 1, "Syntax error near 'FROM'" > + -- </func-22.38> > + }) > + > -- This is to test the deprecated sql_aggregate_count() API. > -- > --test:do_test( > @@ -2838,16 +2881,16 @@ test:do_execsql_test( > "SELECT TRIM(CHAR(32,00,32,00,32));", > {string.char(00,32,00)}) > > --- LTRIM > +-- LEFT TRIM > test:do_execsql_test( > "func-70", > - "SELECT LTRIM(CHAR(32,00,32,00,32));", > + "SELECT TRIM(LEADING FROM CHAR(32,00,32,00,32));", > {string.char(00,32,00,32)}) > > --- RTRIM > +-- RIGHT TRIM > test:do_execsql_test( > "func-71", > - "SELECT RTRIM(CHAR(32,00,32,00,32));", > + "SELECT TRIM(TRAILING FROM CHAR(32,00,32,00,32));", > {string.char(32,00,32,00)}) > > -- GROUP_CONCAT > diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua > index 495aa4ee4..ec45e5e76 100755 > --- a/test/sql-tap/with1.test.lua > +++ b/test/sql-tap/with1.test.lua > @@ -550,7 +550,7 @@ test:do_execsql_test("8.1-mandelbrot", [[ > SELECT group_concat( substr(' .+*#', 1+min(iter/7,4), 1), '') > FROM m2 GROUP BY cy > ) > - SELECT group_concat(rtrim(t),x'0a') FROM a; > + SELECT group_concat(TRIM(TRAILING FROM t),x'0a') FROM a; > ]], { > -- <8.1-mandelbrot> > [[ ....# > > ^ permalink raw reply [flat|nested] 14+ messages in thread
* [tarantool-patches] Re: [PATCH] sql: modify TRIM() function signature 2019-04-11 17:33 [tarantool-patches] [PATCH] sql: modify TRIM() function signature Roman Khabibov 2019-04-14 18:01 ` [tarantool-patches] " Vladislav Shpilevoy @ 2019-04-23 10:21 ` Kirill Yukhin 1 sibling, 0 replies; 14+ messages in thread From: Kirill Yukhin @ 2019-04-23 10:21 UTC (permalink / raw) To: tarantool-patches; +Cc: v.shpilevoy Hello, On 11 Apr 20:33, Roman Khabibov wrote: > According to the ANSI standart, ltrim, rtrim and trim should > be merged into one unified TRIM() function. The specialization of > trimming (left, right or both and trimming charcters) determined > in arguments of this function. > > Closes #3879 > --- > Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3879-trim > Issue: https://github.com/tarantool/tarantool/issues/3879 I've checked your patch into master branch. -- Regards, Kirill Yukhin ^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2019-04-23 10:21 UTC | newest] Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2019-04-11 17:33 [tarantool-patches] [PATCH] sql: modify TRIM() function signature Roman Khabibov 2019-04-14 18:01 ` [tarantool-patches] " Vladislav Shpilevoy 2019-04-16 0:14 ` Roman Khabibov 2019-04-16 17:14 ` Vladislav Shpilevoy 2019-04-18 17:11 ` Roman Khabibov 2019-04-19 12:49 ` Vladislav Shpilevoy 2019-04-20 0:48 ` Roman Khabibov 2019-04-21 19:36 ` Vladislav Shpilevoy 2019-04-22 10:43 ` Vladislav Shpilevoy 2019-04-22 16:45 ` Roman Khabibov 2019-04-22 18:22 ` Vladislav Shpilevoy 2019-04-23 1:04 ` Roman Khabibov 2019-04-23 8:59 ` Vladislav Shpilevoy 2019-04-23 10:21 ` Kirill Yukhin
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox