Tarantool development patches archive
 help / color / mirror / Atom feed
From: Mergen Imeev via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: kyukhin@tarantool.org, v.ioffe@tarantool.org
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH v1 1/9] sql: modify signature of TRIM()
Date: Thu, 19 Aug 2021 15:02:56 +0300	[thread overview]
Message-ID: <5ad7461c72af3b9b814fc2dba3d9ab085eca5e1e.1629374448.git.imeevma@gmail.com> (raw)
In-Reply-To: <cover.1629374448.git.imeevma@gmail.com>

This patch changes the signature of SQL built-in function TRIM(). This
gives us an easier way to check the types of the arguments to this
function. Additionally, these changes fix a bug where using TRIM with
the BOTH, LEADING, or TRAILING keywords would result in a loss of a
collation.

Needed for #6105
Closes #6299
---
 src/box/sql/func.c                            | 80 ++++++-------------
 src/box/sql/parse.y                           | 36 +++++----
 .../gh-6299-lost-collation-on-trim.test.lua   | 47 +++++++++++
 3 files changed, 90 insertions(+), 73 deletions(-)
 create mode 100755 test/sql-tap/gh-6299-lost-collation-on-trim.test.lua

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 8cdb370d1..90359a23a 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1467,37 +1467,11 @@ trim_prepare_char_len(struct sql_context *context,
 }
 
 /**
- * Normalize args from @a argv input array when it has one arg
- * only.
+ * Normalize args from @a argv input array when it has two args.
  *
  * Case: TRIM(<str>)
  * Call trimming procedure with TRIM_BOTH as the flags and " " as
  * the trimming set.
- */
-static void
-trim_func_one_arg(struct sql_context *context, sql_value *arg)
-{
-	/* In case of VARBINARY type default trim octet is X'00'. */
-	const unsigned char *default_trim;
-	if (mem_is_null(arg))
-		return;
-	if (mem_is_bin(arg))
-		default_trim = (const unsigned char *) "\0";
-	else
-		default_trim = (const unsigned char *) " ";
-	const unsigned char *input_str = mem_as_ustr(arg);
-	int input_str_sz = mem_len_unsafe(arg);
-	uint8_t trim_char_len[1] = { 1 };
-	trim_procedure(context, TRIM_BOTH, default_trim, trim_char_len, 1,
-		       input_str, input_str_sz);
-}
-
-/**
- * Normalize args from @a argv input array when it has two args.
- *
- * Case: TRIM(<character_set> FROM <str>)
- * If user has specified <character_set> only, call trimming
- * procedure with TRIM_BOTH as the flags and that trimming set.
  *
  * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>)
  * If user has specified side keyword only, then call trimming
@@ -1507,32 +1481,29 @@ static void
 trim_func_two_args(struct sql_context *context, sql_value *arg1,
 		   sql_value *arg2)
 {
-	const unsigned char *input_str, *trim_set;
-	if ((input_str = mem_as_ustr(arg2)) == NULL)
+	const unsigned char *trim_set;
+	if (mem_is_bin(arg1))
+		trim_set = (const unsigned char *)"\0";
+	else
+		trim_set = (const unsigned char *)" ";
+	const unsigned char *input_str;
+	if ((input_str = mem_as_ustr(arg1)) == NULL)
 		return;
 
-	int input_str_sz = mem_len_unsafe(arg2);
-	if (sql_value_type(arg1) == MP_INT || sql_value_type(arg1) == MP_UINT) {
-		uint8_t len_one = 1;
-		trim_procedure(context, mem_get_int_unsafe(arg1),
-			       (const unsigned char *) " ", &len_one, 1,
-			       input_str, input_str_sz);
-	} else if ((trim_set = mem_as_ustr(arg1)) != NULL) {
-		int trim_set_sz = mem_len_unsafe(arg1);
-		uint8_t *char_len;
-		int char_cnt = trim_prepare_char_len(context, trim_set,
-						     trim_set_sz, &char_len);
-		if (char_cnt == -1)
-			return;
-		trim_procedure(context, TRIM_BOTH, trim_set, char_len, char_cnt,
-			       input_str, input_str_sz);
-		sql_free(char_len);
-	}
+	int input_str_sz = mem_len_unsafe(arg1);
+	assert(arg2->type == MEM_TYPE_UINT);
+	uint8_t len_one = 1;
+	trim_procedure(context, arg2->u.u, trim_set,
+		       &len_one, 1, input_str, input_str_sz);
 }
 
 /**
  * Normalize args from @a argv input array when it has three args.
  *
+ * Case: TRIM(<character_set> FROM <str>)
+ * If user has specified <character_set> only, call trimming procedure with
+ * TRIM_BOTH as the flags and that trimming set.
+ *
  * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>)
  * If user has specified side keyword and <character_set>, then
  * call trimming procedure with that args.
@@ -1541,20 +1512,20 @@ static void
 trim_func_three_args(struct sql_context *context, sql_value *arg1,
 		     sql_value *arg2, sql_value *arg3)
 {
-	assert(sql_value_type(arg1) == MP_INT || sql_value_type(arg1) == MP_UINT);
+	assert(arg2->type == MEM_TYPE_UINT);
 	const unsigned char *input_str, *trim_set;
-	if ((input_str = mem_as_ustr(arg3)) == NULL ||
-	    (trim_set = mem_as_ustr(arg2)) == NULL)
+	if ((input_str = mem_as_ustr(arg1)) == NULL ||
+	    (trim_set = mem_as_ustr(arg3)) == NULL)
 		return;
 
-	int trim_set_sz = mem_len_unsafe(arg2);
-	int input_str_sz = mem_len_unsafe(arg3);
+	int trim_set_sz = mem_len_unsafe(arg3);
+	int input_str_sz = mem_len_unsafe(arg1);
 	uint8_t *char_len;
 	int char_cnt = trim_prepare_char_len(context, trim_set, trim_set_sz,
 					     &char_len);
 	if (char_cnt == -1)
 		return;
-	trim_procedure(context, mem_get_int_unsafe(arg1), trim_set, char_len,
+	trim_procedure(context, arg2->u.u, trim_set, char_len,
 		       char_cnt, input_str, input_str_sz);
 	sql_free(char_len);
 }
@@ -1570,9 +1541,6 @@ static void
 trim_func(struct sql_context *context, int argc, sql_value **argv)
 {
 	switch (argc) {
-	case 1:
-		trim_func_one_arg(context, argv[0]);
-		break;
 	case 2:
 		trim_func_two_args(context, argv[0], argv[1]);
 		break;
@@ -1581,7 +1549,7 @@ trim_func(struct sql_context *context, int argc, sql_value **argv)
 		break;
 	default:
 		diag_set(ClientError, ER_FUNC_WRONG_ARG_COUNT, "TRIM",
-			 "1 or 2 or 3", argc);
+			"2 or 3", argc);
 		context->is_aborted = true;
 	}
 }
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index 436c98cd9..337df4916 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -1132,32 +1132,34 @@ expr(A) ::= TRIM(X) LP trim_operands(Y) RP(E). {
 %type trim_operands {struct ExprList *}
 %destructor trim_operands {sql_expr_list_delete(pParse->db, $$);}
 
-trim_operands(A) ::= trim_from_clause(F) expr(Y). {
-  A = sql_expr_list_append(pParse->db, F, Y.pExpr);
+trim_operands(A) ::= trim_specification(N) expr(Z) FROM expr(Y). {
+  A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
+  struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER,
+                                         &sqlIntTokens[N]);
+  A = sql_expr_list_append(pParse->db, A, p);
+  A = sql_expr_list_append(pParse->db, A, Z.pExpr);
 }
 
-trim_operands(A) ::= expr(Y). {
+trim_operands(A) ::= trim_specification(N) FROM expr(Y). {
   A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
+  struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER,
+                                         &sqlIntTokens[N]);
+  A = sql_expr_list_append(pParse->db, A, p);
 }
 
-%type trim_from_clause {struct ExprList *}
-%destructor trim_from_clause {sql_expr_list_delete(pParse->db, $$);}
-
-/*
- * The following two rules cover three cases of keyword
- * (LEADING/TRAILING/BOTH) and <trim_character_set> combination.
- * The case when both of them are absent is disallowed.
- */
-trim_from_clause(A) ::= expr(Y) FROM. {
+trim_operands(A) ::= expr(Z) FROM expr(Y). {
   A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
+  struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER,
+                                         &sqlIntTokens[TRIM_BOTH]);
+  A = sql_expr_list_append(pParse->db, A, p);
+  A = sql_expr_list_append(pParse->db, A, Z.pExpr);
 }
 
-trim_from_clause(A) ::= trim_specification(N) expr_optional(Y) FROM. {
+trim_operands(A) ::= expr(Y). {
+  A = sql_expr_list_append(pParse->db, NULL, Y.pExpr);
   struct Expr *p = sql_expr_new_dequoted(pParse->db, TK_INTEGER,
-                                         &sqlIntTokens[N]);
-  A = sql_expr_list_append(pParse->db, NULL, p);
-  if (Y != NULL)
-    A = sql_expr_list_append(pParse->db, A, Y);
+                                         &sqlIntTokens[TRIM_BOTH]);
+  A = sql_expr_list_append(pParse->db, A, p);
 }
 
 %type expr_optional {struct Expr *}
diff --git a/test/sql-tap/gh-6299-lost-collation-on-trim.test.lua b/test/sql-tap/gh-6299-lost-collation-on-trim.test.lua
new file mode 100755
index 000000000..1799da839
--- /dev/null
+++ b/test/sql-tap/gh-6299-lost-collation-on-trim.test.lua
@@ -0,0 +1,47 @@
+#!/usr/bin/env tarantool
+local test = require("sqltester")
+test:plan(4)
+
+--
+-- Make sure that collation is not lost when TRIM called with BOTH, LEADING, or
+-- TRAILING keywords specified.
+--
+
+test:execsql[[
+    CREATE TABLE t (i INT PRIMARY KEY, s STRING COLLATE "unicode_ci");
+    INSERT INTO t VALUES (1,'A'), (2,'a');
+]]
+
+test:do_execsql_test(
+    "gh-6299-2",
+    [[
+        SELECT DISTINCT trim(LEADING FROM s) FROM t;
+    ]], {
+        'A'
+    })
+
+test:do_execsql_test(
+    "gh-6299-3",
+    [[
+        SELECT DISTINCT trim(TRAILING FROM s) FROM t;
+    ]], {
+        'A'
+    })
+
+test:do_execsql_test(
+    "gh-6299-4",
+    [[
+        SELECT DISTINCT trim(BOTH FROM s) FROM t;
+    ]], {
+        'A'
+    })
+
+test:do_execsql_test(
+    "gh-6299-1",
+    [[
+        SELECT DISTINCT trim(s) FROM t;
+    ]], {
+        'A'
+    })
+
+test:finish_test()
-- 
2.25.1


  reply	other threads:[~2021-08-19 12:03 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-19 12:02 [Tarantool-patches] [PATCH v1 0/9] Check types of SQL built-in functions arguments Mergen Imeev via Tarantool-patches
2021-08-19 12:02 ` Mergen Imeev via Tarantool-patches [this message]
2021-08-19 12:02 ` [Tarantool-patches] [PATCH v1 2/9] sql: rework SQL built-in functions hash table Mergen Imeev via Tarantool-patches
2021-08-19 12:03 ` [Tarantool-patches] [PATCH v1 3/9] sql: check number of arguments during parsing Mergen Imeev via Tarantool-patches
2021-08-19 12:03 ` [Tarantool-patches] [PATCH v1 4/9] sql: static type check for SQL built-in functions Mergen Imeev via Tarantool-patches
2021-08-19 12:03 ` [Tarantool-patches] [PATCH v1 5/9] sql: runtime " Mergen Imeev via Tarantool-patches
2021-08-19 12:03 ` [Tarantool-patches] [PATCH v1 6/9] sql: enable types checking for some functions Mergen Imeev via Tarantool-patches
2021-08-19 12:03 ` [Tarantool-patches] [PATCH v1 7/9] sql: fix result type of min() and max() functions Mergen Imeev via Tarantool-patches
2021-08-19 12:03 ` [Tarantool-patches] [PATCH v1 8/9] sql: check argument types of sum(), avg(), total() Mergen Imeev via Tarantool-patches
2021-08-19 12:03 ` [Tarantool-patches] [PATCH v1 9/9] sql: arguments check for string value functions Mergen Imeev via Tarantool-patches
2021-08-19 12:26 ` [Tarantool-patches] [PATCH v1 0/9] Check types of SQL built-in functions arguments Kirill Yukhin via Tarantool-patches
2021-08-19 15:50   ` Vitaliia Ioffe via Tarantool-patches
2021-08-19 16:16 ` Kirill Yukhin via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5ad7461c72af3b9b814fc2dba3d9ab085eca5e1e.1629374448.git.imeevma@gmail.com \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=imeevma@tarantool.org \
    --cc=kyukhin@tarantool.org \
    --cc=v.ioffe@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v1 1/9] sql: modify signature of TRIM()' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox