From: Mergen Imeev via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: tsafin@tarantool.org Cc: tarantool-patches@dev.tarantool.org Subject: [Tarantool-patches] [PATCH v1 1/1] sql: modify signature of TRIM() Date: Tue, 17 Aug 2021 15:50:11 +0300 [thread overview] Message-ID: <22dc57ac05b46f508f574049ba4057a7bb2f9273.1629204538.git.imeevma@gmail.com> (raw) This patch changes the signature of SQL built-in function TRIM(). This gives us an easier way to check the types of the arguments to this function. Additionally, these changes fix a bug where using TRIM with the BOTH, LEADING, or TRAILING keywords would result in a loss of a collation. Needed for #6105 Closes #6299 --- https://github.com/tarantool/tarantool/issues/6299 https://github.com/tarantool/tarantool/tree/imeevma/gh-6299-fix-trim-signature src/box/sql/func.c | 80 ++++++------------- src/box/sql/parse.y | 36 +++++---- .../gh-6299-lost-collation-on-trim.test.lua | 47 +++++++++++ 3 files changed, 90 insertions(+), 73 deletions(-) create mode 100755 test/sql-tap/gh-6299-lost-collation-on-trim.test.lua diff --git a/src/box/sql/func.c b/src/box/sql/func.c index 1622104d3..c73061e33 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1481,37 +1481,11 @@ trim_prepare_char_len(struct sql_context *context, } /** - * Normalize args from @a argv input array when it has one arg - * only. + * Normalize args from @a argv input array when it has two args. * * Case: TRIM(<str>) * Call trimming procedure with TRIM_BOTH as the flags and " " as * the trimming set. - */ -static void -trim_func_one_arg(struct sql_context *context, sql_value *arg) -{ - /* In case of VARBINARY type default trim octet is X'00'. */ - const unsigned char *default_trim; - if (mem_is_null(arg)) - return; - if (mem_is_bin(arg)) - default_trim = (const unsigned char *) "\0"; - else - default_trim = (const unsigned char *) " "; - const unsigned char *input_str = mem_as_ustr(arg); - int input_str_sz = mem_len_unsafe(arg); - uint8_t trim_char_len[1] = { 1 }; - trim_procedure(context, TRIM_BOTH, default_trim, trim_char_len, 1, - input_str, input_str_sz); -} - -/** - * Normalize args from @a argv input array when it has two args. - * - * Case: TRIM(<character_set> FROM <str>) - * If user has specified <character_set> only, call trimming - * procedure with TRIM_BOTH as the flags and that trimming set. * * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>) * If user has specified side keyword only, then call trimming @@ -1521,32 +1495,29 @@ static void trim_func_two_args(struct sql_context *context, sql_value *arg1, sql_value *arg2) { - const unsigned char *input_str, *trim_set; - if ((input_str = mem_as_ustr(arg2)) == NULL) + const unsigned char *trim_set; + if (mem_is_bin(arg1)) + trim_set = (const unsigned char *)"\0"; + else + trim_set = (const unsigned char *)" "; + const unsigned char *input_str; + if ((input_str = mem_as_ustr(arg1)) == NULL) return; - int input_str_sz = mem_len_unsafe(arg2); - if (sql_value_type(arg1) == MP_INT || sql_value_type(arg1) == MP_UINT) { - uint8_t len_one = 1; - trim_procedure(context, mem_get_int_unsafe(arg1), - (const unsigned char *) " ", &len_one, 1, - input_str, input_str_sz); - } else if ((trim_set = mem_as_ustr(arg1)) != NULL) { - int trim_set_sz = mem_len_unsafe(arg1); - uint8_t *char_len; - int char_cnt = trim_prepare_char_len(context, trim_set, - trim_set_sz, &char_len); - if (char_cnt == -1) - return; - trim_procedure(context, TRIM_BOTH, trim_set, char_len, char_cnt, - input_str, input_str_sz); - sql_free(char_len); - } + int input_str_sz = mem_len_unsafe(arg1); + assert(arg2->type == MEM_TYPE_UINT); + uint8_t len_one = 1; + trim_procedure(context, arg2->u.u, trim_set, + &len_one, 1, input_str, input_str_sz); } /** * Normalize args from @a argv input array when it has three args. * + * Case: TRIM(<character_set> FROM <str>) + * If user has specified <character_set> only, call trimming procedure with + * TRIM_BOTH as the flags and that trimming set. + * * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>) * If user has specified side keyword and <character_set>, then * call trimming procedure with that args. @@ -1555,20 +1526,20 @@ static void trim_func_three_args(struct sql_context *context, sql_value *arg1, sql_value *arg2, sql_value *arg3) { - assert(sql_value_type(arg1) == MP_INT || sql_value_type(arg1) == MP_UINT); + assert(arg2->type == MEM_TYPE_UINT); const unsigned char *input_str, *trim_set; - if ((input_str = mem_as_ustr(arg3)) == NULL || - (trim_set = mem_as_ustr(arg2)) == NULL) + if ((input_str = mem_as_ustr(arg1)) == NULL || + (trim_set = mem_as_ustr(arg3)) == NULL) return; - int trim_set_sz = mem_len_unsafe(arg2); - int input_str_sz = mem_len_unsafe(arg3); + int trim_set_sz = mem_len_unsafe(arg3); + int input_str_sz = mem_len_unsafe(arg1); uint8_t *char_len; int char_cnt = trim_prepare_char_len(context, trim_set, trim_set_sz, &char_len); if (char_cnt == -1) return; - trim_procedure(context, mem_get_int_unsafe(arg1), trim_set, char_len, + trim_procedure(context, arg2->u.u, trim_set, char_len, char_cnt, input_str, input_str_sz); sql_free(char_len); } @@ -1584,9 +1555,6 @@ static void trim_func(struct sql_context *context, int argc, sql_value **argv) { switch (argc) { - case 1: - trim_func_one_arg(context, argv[0]); - break; case 2: trim_func_two_args(context, argv[0], argv[1]); break; @@ -1595,7 +1563,7 @@ trim_func(struct sql_context *context, int argc, sql_value **argv) break; default: diag_set(ClientError, ER_FUNC_WRONG_ARG_COUNT, "TRIM", - "1 or 2 or 3", argc); + "2 or 3", argc); context->is_aborted = true; } } diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index bd041e862..d06f45fd9 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -1132,32 +1132,34 @@ expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). { %type trim_operands {struct ExprList *} %destructor trim_operands {sql_expr_list_delete(pParse->db, $$);} -trim_operands(A) ::= trim_from_clause(F) expr(Y). { - A = sql_expr_list_append(pParse->db, F, Y.pExpr); +trim_operands(A) ::= trim_specification(N) expr(Z) FROM expr(Y). { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); + struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, + &sqlIntTokens[N]); + A = sql_expr_list_append(pParse->db, A, p); + A = sql_expr_list_append(pParse->db, A, Z.pExpr); } -trim_operands(A) ::= expr(Y). { +trim_operands(A) ::= trim_specification(N) FROM expr(Y). { A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); + struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, + &sqlIntTokens[N]); + A = sql_expr_list_append(pParse->db, A, p); } -%type trim_from_clause {struct ExprList *} -%destructor trim_from_clause {sql_expr_list_delete(pParse->db, $$);} - -/* - * The following two rules cover three cases of keyword - * (LEADING/TRAILING/BOTH) and <trim_character_set> combination. - * The case when both of them are absent is disallowed. - */ -trim_from_clause(A) ::= expr(Y) FROM. { +trim_operands(A) ::= expr(Z) FROM expr(Y). { A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); + struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, + &sqlIntTokens[TRIM_BOTH]); + A = sql_expr_list_append(pParse->db, A, p); + A = sql_expr_list_append(pParse->db, A, Z.pExpr); } -trim_from_clause(A) ::= trim_specification(N) expr_optional(Y) FROM. { +trim_operands(A) ::= expr(Y). { + A = sql_expr_list_append(pParse->db, NULL, Y.pExpr); struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER, - &sqlIntTokens[N]); - A = sql_expr_list_append(pParse->db, NULL, p); - if (Y != NULL) - A = sql_expr_list_append(pParse->db, A, Y); + &sqlIntTokens[TRIM_BOTH]); + A = sql_expr_list_append(pParse->db, A, p); } %type expr_optional {struct Expr *} diff --git a/test/sql-tap/gh-6299-lost-collation-on-trim.test.lua b/test/sql-tap/gh-6299-lost-collation-on-trim.test.lua new file mode 100755 index 000000000..1799da839 --- /dev/null +++ b/test/sql-tap/gh-6299-lost-collation-on-trim.test.lua @@ -0,0 +1,47 @@ +#!/usr/bin/env tarantool +local test = require("sqltester") +test:plan(4) + +-- +-- Make sure that collation is not lost when TRIM called with BOTH, LEADING, or +-- TRAILING keywords specified. +-- + +test:execsql[[ + CREATE TABLE t (i INT PRIMARY KEY, s STRING COLLATE "unicode_ci"); + INSERT INTO t VALUES (1,'A'), (2,'a'); +]] + +test:do_execsql_test( + "gh-6299-2", + [[ + SELECT DISTINCT trim(LEADING FROM s) FROM t; + ]], { + 'A' + }) + +test:do_execsql_test( + "gh-6299-3", + [[ + SELECT DISTINCT trim(TRAILING FROM s) FROM t; + ]], { + 'A' + }) + +test:do_execsql_test( + "gh-6299-4", + [[ + SELECT DISTINCT trim(BOTH FROM s) FROM t; + ]], { + 'A' + }) + +test:do_execsql_test( + "gh-6299-1", + [[ + SELECT DISTINCT trim(s) FROM t; + ]], { + 'A' + }) + +test:finish_test() -- 2.25.1
reply other threads:[~2021-08-17 12:50 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=22dc57ac05b46f508f574049ba4057a7bb2f9273.1629204538.git.imeevma@gmail.com \ --to=tarantool-patches@dev.tarantool.org \ --cc=imeevma@tarantool.org \ --cc=tsafin@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH v1 1/1] sql: modify signature of TRIM()' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox