From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id B063025038 for ; Mon, 4 Feb 2019 07:34:48 -0500 (EST) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id yHg65R-iNKkv for ; Mon, 4 Feb 2019 07:34:48 -0500 (EST) Received: from smtpng3.m.smailru.net (smtpng3.m.smailru.net [94.100.177.149]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 11FCA25012 for ; Mon, 4 Feb 2019 07:34:47 -0500 (EST) Subject: [tarantool-patches] Re: [PATCH] sql: LIKE/LENGTH process '\0' References: <15e143f4-3ea7-c7d6-d8ac-8a0e20b76449@tarantool.org> <1560FF96-FECD-4368-8AF8-F8F2AE7696E3@tarantool.org> From: Ivan Koptelov Message-ID: Date: Mon, 4 Feb 2019 15:34:45 +0300 MIME-Version: 1.0 In-Reply-To: <1560FF96-FECD-4368-8AF8-F8F2AE7696E3@tarantool.org> Content-Type: text/html; charset="utf-8" Content-Language: en-GB Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: "n.pettik" , tarantool-patches@freelists.org Thank you for the review.

On 29/01/2019 19:35, n.pettik wrote:
Fixes LIKE and LENGTH functions. '\0' now treated as
Nit: is treated.
Fixed.

      
a usual symbol. Strings with '\0' are now processed
entirely. Consider examples:

LENGTH(CHAR(65,00,65)) == 3
LIKE(CHAR(65,00,65), CHAR(65,00,66)) == False
Also, I see that smth wrong with text in this mail again
I hope now the mail text is ok.

      
Closes #3542
Please, check other functions working with strings.
For instance, now I see that replace ignores \0 as well:

tarantool> box.sql.execute("select replace(CHAR(65,00,66), char(0), char(1))")
---
- - ["A\0B"]
...
Ok, done. I've made some extra changes to make REPLACE, GROUP_CONCAT and SUBSTR
work as expected. Also tests on all functions which may work with strings are added.


      
@@ -150,9 +150,13 @@ lengthFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
 			const unsigned char *z = sqlite3_value_text(argv[0]);
 			if (z == 0)
 				return;
+
Nit: don’t abuse empty lines.
Sorry, fixed now.

      
 			len = 0;
-			while (*z) {
+			size_t byte_len = sqlite3_value_bytes(argv[0]);
+			const unsigned char *prev_z;
+			for (size_t cnt = 0; cnt < byte_len; cnt += (z - prev_z)) {
Out of 80.
Fixed.

      
diff --git a/test/sql-tap/gh-3542-like-len-null-term.test.lua b/test/sql-tap/gh-3542-like-len-null-term.test.lua
new file mode 100755
index 000000000..e9ea9ea30
--- /dev/null
+++ b/test/sql-tap/gh-3542-like-len-null-term.test.lua
@@ -0,0 +1,97 @@
+#!/usr/bin/env tarantool
+test = require("sqltester")
+test:plan(14)
+
+-- gh-3542 - LIKE/LENGTH do not scan if '\0' is encountered.
+-- This test ensures that LIKE and LENGTH functions does NOT stop
+-- string processing if '\0' is encountered.
I’d rather put these tests to sql-tap/func.test.lua

Ok, done.


Here are the changes.
--
 src/box/sql/func.c         |  88 +++++++++++++-----
 src/box/sql/vdbeInt.h      |   2 +-
 test/sql-tap/func.test.lua | 220 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 284 insertions(+), 26 deletions(-)

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index e46b162d9..2978af983 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -128,6 +128,30 @@ typeofFunc(sqlite3_context * context, int NotUsed, sqlite3_value ** argv)
 	sqlite3_result_text(context, z, -1, SQLITE_STATIC);
 }
 
+/**
+ * Return number of chars in the given string.
+ *
+ * Number of chars != byte size of string because some characters
+ * are encoded with more than one byte. Also note that all
+ * characters from 'str' to 'str + byte_len' would be counted,
+ * even if there is a '\0' somewhere between them.
+ * @param str String to be counted.
+ * @param byte_len Byte length of given string.
+ * @return
+ */
+static int
+count_chars(const unsigned char *str, size_t byte_len)
+{
+	int n_chars = 0;
+	const unsigned char *prev_z;
+	for (size_t cnt = 0; cnt < byte_len; cnt += (str - prev_z)) {
+		n_chars++;
+		prev_z = str;
+		SQLITE_SKIP_UTF8(str);
+	}
+	return n_chars;
+}
+
 /*
  * Implementation of the length() function
  */
@@ -150,11 +174,7 @@ lengthFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
 			const unsigned char *z = sqlite3_value_text(argv[0]);
 			if (z == 0)
 				return;
-			len = 0;
-			while (*z) {
-				len++;
-				SQLITE_SKIP_UTF8(z);
-			}
+			len = count_chars(z, sqlite3_value_bytes(argv[0]));
 			sqlite3_result_int(context, len);
 			break;
 		}
@@ -340,11 +360,8 @@ substrFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
 		if (z == 0)
 			return;
 		len = 0;
-		if (p1 < 0) {
-			for (z2 = z; *z2; len++) {
-				SQLITE_SKIP_UTF8(z2);
-			}
-		}
+		if (p1 < 0)
+			len = count_chars(z, sqlite3_value_bytes(argv[0]));
 	}
 #ifdef SQLITE_SUBSTR_COMPATIBILITY
 	/* If SUBSTR_COMPATIBILITY is defined then substr(X,0,N) work the same as
@@ -388,12 +405,21 @@ substrFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
 	}
 	assert(p1 >= 0 && p2 >= 0);
 	if (p0type != SQLITE_BLOB) {
-		while (*z && p1) {
+		/*
+		 * In the code below 'cnt' and 'n_chars' is
+		 * used because '\0' is not supposed to be
+		 * end-of-string symbol.
+		 */
+		int n_chars = count_chars(z, sqlite3_value_bytes(argv[0]));
+		int cnt = 0;
+		while (cnt < n_chars && p1) {
 			SQLITE_SKIP_UTF8(z);
+			cnt++;
 			p1--;
 		}
-		for (z2 = z; *z2 && p2; p2--) {
+		for (z2 = z; cnt < n_chars && p2; p2--) {
 			SQLITE_SKIP_UTF8(z2);
+			cnt++;
 		}
 		sqlite3_result_text64(context, (char *)z, z2 - z,
 				      SQLITE_TRANSIENT);
@@ -620,8 +646,14 @@ enum pattern_match_status {
  * This routine is usually quick, but can be N**2 in the worst
  * case.
  *
+ * 'pattern_end' and 'string_end' params are used to determine
+ * the end of strings, because '\0' is not supposed to be
+ * end-of-string signal.
+ *
  * @param pattern String containing comparison pattern.
  * @param string String being compared.
+ * @param pattern_end Ptr to pattern last symbol.
+ * @param string_end Ptr to string last symbol.
  * @param is_like_ci true if LIKE is case insensitive.
  * @param match_other The escape char for LIKE.
  *
@@ -630,6 +662,8 @@ enum pattern_match_status {
 static int
 sql_utf8_pattern_compare(const char *pattern,
 			 const char *string,
+			 const char *pattern_end,
+			 const char *string_end,
 			 const int is_like_ci,
 			 UChar32 match_other)
 {
@@ -637,8 +671,6 @@ sql_utf8_pattern_compare(const char *pattern,
 	UChar32 c, c2;
 	/* One past the last escaped input char. */
 	const char *zEscaped = 0;
-	const char *pattern_end = pattern + strlen(pattern);
-	const char *string_end = string + strlen(string);
 	UErrorCode status = U_ZERO_ERROR;
 
 	while (pattern < pattern_end) {
@@ -721,6 +753,8 @@ sql_utf8_pattern_compare(const char *pattern,
 				}
 				bMatch = sql_utf8_pattern_compare(pattern,
 								  string,
+								  pattern_end,
+								  string_end,
 								  is_like_ci,
 								  match_other);
 				if (bMatch != NO_MATCH)
@@ -768,7 +802,9 @@ sql_utf8_pattern_compare(const char *pattern,
 int
 sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
 {
-	return sql_utf8_pattern_compare(zPattern, zStr, 0, esc);
+	return sql_utf8_pattern_compare(zPattern, zStr,
+		                        zPattern + strlen(zPattern),
+		                        zStr + strlen(zStr), 0, esc);
 }
 
 /**
@@ -778,7 +814,9 @@ sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
 int
 sql_strlike_ci(const char *zPattern, const char *zStr, unsigned int esc)
 {
-	return sql_utf8_pattern_compare(zPattern, zStr, 1, esc);
+	return sql_utf8_pattern_compare(zPattern, zStr,
+		                        zPattern + strlen(zPattern),
+		                        zStr + strlen(zStr), 1, esc);
 }
 
 /**
@@ -802,7 +840,6 @@ int sqlite3_like_count = 0;
 static void
 likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv)
 {
-	const char *zA, *zB;
 	u32 escape = SQL_END_OF_STRING;
 	int nPat;
 	sqlite3 *db = sqlite3_context_db_handle(context);
@@ -818,8 +855,10 @@ likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv)
 		return;
 	}
 #endif
-	zB = (const char *) sqlite3_value_text(argv[0]);
-	zA = (const char *) sqlite3_value_text(argv[1]);
+	const char *zB = (const char *) sqlite3_value_text(argv[0]);
+	const char *zA = (const char *) sqlite3_value_text(argv[1]);
+	const char *zB_end = zB + sqlite3_value_bytes(argv[0]);
+	const char *zA_end = zA + sqlite3_value_bytes(argv[1]);
 
 	/*
 	 * Limit the length of the LIKE pattern to avoid problems
@@ -860,7 +899,8 @@ likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv)
 	sqlite3_like_count++;
 #endif
 	int res;
-	res = sql_utf8_pattern_compare(zB, zA, is_like_ci, escape);
+	res = sql_utf8_pattern_compare(zB, zA, zB_end, zA_end,
+				       is_like_ci, escape);
 	if (res == INVALID_PATTERN) {
 		const char *const err_msg =
 			"LIKE pattern can only contain UTF-8 characters";
@@ -1135,12 +1175,12 @@ replaceFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
 		       || sqlite3_context_db_handle(context)->mallocFailed);
 		return;
 	}
-	if (zPattern[0] == 0) {
+	nPattern = sqlite3_value_bytes(argv[1]);
+	if (nPattern == 0) {
 		assert(sqlite3_value_type(argv[1]) != SQLITE_NULL);
 		sqlite3_result_value(context, argv[0]);
 		return;
 	}
-	nPattern = sqlite3_value_bytes(argv[1]);
 	assert(zPattern == sqlite3_value_text(argv[1]));	/* No encoding change */
 	zRep = sqlite3_value_text(argv[2]);
 	if (zRep == 0)
@@ -1595,8 +1635,8 @@ groupConcatFinalize(sqlite3_context * context)
 			sqlite3_result_error_nomem(context);
 		} else {
 			sqlite3_result_text(context,
-					    sqlite3StrAccumFinish(pAccum), -1,
-					    sqlite3_free);
+					    sqlite3StrAccumFinish(pAccum),
+					    pAccum->nChar, sqlite3_free);
 		}
 	}
 }
diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
index f8dae7920..4ff48d27d 100644
--- a/src/box/sql/vdbeInt.h
+++ b/src/box/sql/vdbeInt.h
@@ -192,7 +192,7 @@ struct Mem {
 	u32 flags;		/* Some combination of MEM_Null, MEM_Str, MEM_Dyn, etc. */
 	/** Subtype for this value. */
 	enum sql_subtype subtype;
-	int n;			/* Number of characters in string value, excluding '\0' */
+	int n;			/* size (in bytes) of string value, excluding trailing '\0' */
 	char *z;		/* String or BLOB value */
 	/* ShallowCopy only needs to copy the information above */
 	char *zMalloc;		/* Space to hold MEM_Str or MEM_Blob if szMalloc>0 */
diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
index b7de1d955..8c712bd5e 100755
--- a/test/sql-tap/func.test.lua
+++ b/test/sql-tap/func.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(14547)
+test:plan(14586)
 
 --!./tcltestrunner.lua
 -- 2001 September 15
@@ -2664,4 +2664,222 @@ test:do_execsql_test(
     [[VALUES (LENGTH(RANDOMBLOB(0)))]],
     {""})
 
+-- gh-3542
+-- In SQL '\0' is NOT a end-of-string signal. Tests below ensures
+-- that all functions which work with strings behave this way.
+
+-- LENGTH
+test:do_execsql_test(
+    "func-37",
+    "SELECT LENGTH(CHAR(00));",
+    {1})
+
+test:do_execsql_test(
+    "func-38",
+    "SELECT LENGTH(CHAR(00, 65));",
+    {2})
+
+test:do_execsql_test(
+    "func-39",
+    "SELECT LENGTH(CHAR(00, 65, 00));",
+    {3})
+
+test:do_execsql_test(
+    "func-40",
+    "SELECT LENGTH(CHAR(00, 65, 00, 65));",
+    {4})
+
+test:do_execsql_test(
+    "func-41",
+    "SELECT LENGTH(CHAR(65, 00, 65, 00, 65));",
+    {5})
+
+test:do_execsql_test(
+    "func-42",
+    "SELECT LENGTH('ሴ' || CHAR(00) || 'ሴ');",
+    {3})
+
+-- LIKE (exact match)
+test:do_execsql_test(
+    "func-43",
+    "SELECT CHAR(00) LIKE CHAR(00);",
+    {1})
+
+test:do_execsql_test(
+    "func-44",
+    "SELECT CHAR(00) LIKE CHAR(65);",
+    {0})
+
+test:do_execsql_test(
+    "func-45",
+    "SELECT CHAR(00,65) LIKE CHAR(00,65);",
+    {1})
+
+test:do_execsql_test(
+    "func-46",
+    "SELECT CHAR(00,65) LIKE CHAR(00,66);",
+    {0})
+
+test:do_execsql_test(
+    "func-47",
+    "SELECT CHAR(00,65, 00) LIKE CHAR(00,65,00);",
+    {1})
+
+test:do_execsql_test(
+    "func-48",
+    "SELECT CHAR(00,65,00) LIKE CHAR(00,65);",
+    {0})
+
+test:do_execsql_test(
+    "func-49",
+    "SELECT CHAR(65,00,65,00,65) LIKE CHAR(65,00,65,00,65);",
+    {1})
+
+test:do_execsql_test(
+    "func-50",
+    "SELECT CHAR(65,00,65,00,65) LIKE CHAR(65,00,65,00,66);",
+    {0})
+
+-- LIKE ('%' and '_')
+test:do_execsql_test(
+    "func-51",
+    "SELECT CHAR(00) LIKE '_';",
+    {1})
+
+test:do_execsql_test(
+    "func-52",
+    "SELECT CHAR(00) LIKE '__';",
+    {0})
+
+test:do_execsql_test(
+    "func-53",
+    "SELECT CHAR(00,65) LIKE '__';",
+    {1})
+
+test:do_execsql_test(
+    "func-54",
+    "SELECT CHAR(00,65) LIKE '_A';",
+    {1})
+
+test:do_execsql_test(
+    "func-55",
+    "SELECT CHAR(00,65,00) LIKE '%';",
+    {1})
+
+test:do_execsql_test(
+    "func-56",
+    "SELECT CHAR(00,65,00) LIKE '%A';",
+    {0})
+
+test:do_execsql_test(
+    "func-57",
+    "SELECT CHAR(65,00,65,00,65) LIKE '%A';",
+    {1})
+
+test:do_execsql_test(
+    "func-58",
+    "SELECT CHAR(65,00,65,00,65) LIKE '%B';",
+    {0})
+
+-- LIKE (ESCAPE symbols)
+test:do_execsql_test(
+    "func-59",
+    "SELECT CHAR(00) || '_' LIKE '_#_' ESCAPE '#';",
+    {1})
+
+test:do_execsql_test(
+    "func-60",
+    "SELECT CHAR(00) || '_' LIKE '_#%' ESCAPE '#';",
+    {0})
+
+-- REPLACE
+test:do_execsql_test(
+    "func-62",
+    "SELECT REPLACE(CHAR(00,65,00,65), CHAR(00), CHAR(65)) LIKE 'AAAA';",
+    {1})
+
+test:do_execsql_test(
+    "func-63",
+    "SELECT REPLACE(CHAR(00,65,00,65), CHAR(65), CHAR(00)) \
+    LIKE CHAR(00,00,00,00);",
+    {1})
+
+-- SUBSTR
+test:do_execsql_test(
+    "func-64",
+    "SELECT SUBSTR(CHAR(65,00,66,67), 3, 2) LIKE CHAR(66, 67);",
+    {1})
+
+test:do_execsql_test(
+    "func-65",
+    "SELECT SUBSTR(CHAR(00,00,00,65), 1, 4) LIKE CHAR(00,00,00,65);",
+    {1})
+
+-- UPPER
+test:do_execsql_test(
+    "func-66",
+    "SELECT UPPER(CHAR(00,97,00,98,00)) LIKE CHAR(00,65,00,66,00);",
+    {1})
+
+-- LOWER
+test:do_execsql_test(
+    "func-67",
+    "SELECT LOWER(CHAR(00,65,00,66,00)) LIKE CHAR(00,97,00,98,00);",
+    {1})
+
+-- HEX
+test:do_execsql_test(
+    "func-68",
+    "SELECT HEX(CHAR(00,65,00,65,00)) LIKE ('0041004100');",
+    {1})
+
+-- TRIM
+test:do_execsql_test(
+    "func-69",
+    "SELECT TRIM(CHAR(32,00,32,00,32)) LIKE (CHAR(00,32,00));",
+    {1})
+
+-- LTRIM
+test:do_execsql_test(
+    "func-70",
+    "SELECT LTRIM(CHAR(32,00,32,00,32)) LIKE (CHAR(00,32,00,32));",
+    {1})
+
+-- RTRIM
+test:do_execsql_test(
+    "func-71",
+    "SELECT RTRIM(CHAR(32,00,32,00,32)) LIKE (CHAR(32,00,32,00));",
+    {1})
+
+-- GROUP_CONCAT
+test:do_execsql_test(
+    "func-72",
+    "CREATE TABLE t100(a INT PRIMARY KEY, b CHAR); \
+     INSERT INTO t100 VALUES (1, CHAR(00)); \
+     INSERT INTO t100 VALUES (2, CHAR(65, 00, 65)); \
+     INSERT INTO t100 VALUES (3, CHAR(00)); \
+     SELECT GROUP_CONCAT(b, '') FROM t100;",
+    {string.char(00,65,00,65,00)})
+
+-- INSTR
+test:do_execsql_test(
+    "func-73",
+    "SELECT INSTR(CHAR(00,65,00,66,00), CHAR(65));",
+    {2})
+
+test:do_execsql_test(
+    "func-74",
+    "SELECT INSTR(CHAR(00,65,00,66,00), CHAR(66));",
+    {4})
+
+test:do_execsql_test(
+    "func-75",
+    "SELECT INSTR(CHAR(00,65,00,66,00), CHAR(00));",
+    {1})
+
+test:do_execsql_test(
+    "func-76",
+    "SELECT INSTR(CHAR(65,66), CHAR(00));",
+    {0})
+
 test:finish_test()
--