Thank you for the review.
On 29/01/2019 19:35, n.pettik wrote:
Fixes LIKE and LENGTH functions. '\0' now treated as
Nit: is treated.
Fixed.
a usual symbol. Strings with '\0' are now processed
entirely. Consider examples:
LENGTH(CHAR(65,00,65)) == 3
LIKE(CHAR(65,00,65), CHAR(65,00,66)) == False
Also, I see that smth wrong with text in this mail again
I hope now the mail text is ok.
Closes #3542
Please, check other functions working with strings.
For instance, now I see that replace ignores \0 as well:
tarantool> box.sql.execute("select replace(CHAR(65,00,66), char(0), char(1))")
---
- - ["A\0B"]
...
Ok, done. I've made some extra changes to make REPLACE, GROUP_CONCAT
and SUBSTR
work as expected. Also tests on all functions which may work with
strings are added.
@@ -150,9 +150,13 @@ lengthFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
const unsigned char *z = sqlite3_value_text(argv[0]);
if (z == 0)
return;
+
Nit: don’t abuse empty lines.
Sorry, fixed now.
len = 0;
- while (*z) {
+ size_t byte_len = sqlite3_value_bytes(argv[0]);
+ const unsigned char *prev_z;
+ for (size_t cnt = 0; cnt < byte_len; cnt += (z - prev_z)) {
Out of 80.
Fixed.
diff --git a/test/sql-tap/gh-3542-like-len-null-term.test.lua b/test/sql-tap/gh-3542-like-len-null-term.test.lua
new file mode 100755
index 000000000..e9ea9ea30
--- /dev/null
+++ b/test/sql-tap/gh-3542-like-len-null-term.test.lua
@@ -0,0 +1,97 @@
+#!/usr/bin/env tarantool
+test = require("sqltester")
+test:plan(14)
+
+-- gh-3542 - LIKE/LENGTH do not scan if '\0' is encountered.
+-- This test ensures that LIKE and LENGTH functions does NOT stop
+-- string processing if '\0' is encountered.
I’d rather put these tests to sql-tap/func.test.lua
Ok, done.
Here are the changes.
--
src/box/sql/func.c | 88 +++++++++++++-----
src/box/sql/vdbeInt.h | 2 +-
test/sql-tap/func.test.lua | 220 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 284 insertions(+), 26 deletions(-)
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index e46b162d9..2978af983 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -128,6 +128,30 @@ typeofFunc(sqlite3_context * context, int NotUsed, sqlite3_value ** argv)
sqlite3_result_text(context, z, -1, SQLITE_STATIC);
}
+/**
+ * Return number of chars in the given string.
+ *
+ * Number of chars != byte size of string because some characters
+ * are encoded with more than one byte. Also note that all
+ * characters from 'str' to 'str + byte_len' would be counted,
+ * even if there is a '\0' somewhere between them.
+ * @param str String to be counted.
+ * @param byte_len Byte length of given string.
+ * @return
+ */
+static int
+count_chars(const unsigned char *str, size_t byte_len)
+{
+ int n_chars = 0;
+ const unsigned char *prev_z;
+ for (size_t cnt = 0; cnt < byte_len; cnt += (str - prev_z)) {
+ n_chars++;
+ prev_z = str;
+ SQLITE_SKIP_UTF8(str);
+ }
+ return n_chars;
+}
+
/*
* Implementation of the length() function
*/
@@ -150,11 +174,7 @@ lengthFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
const unsigned char *z = sqlite3_value_text(argv[0]);
if (z == 0)
return;
- len = 0;
- while (*z) {
- len++;
- SQLITE_SKIP_UTF8(z);
- }
+ len = count_chars(z, sqlite3_value_bytes(argv[0]));
sqlite3_result_int(context, len);
break;
}
@@ -340,11 +360,8 @@ substrFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
if (z == 0)
return;
len = 0;
- if (p1 < 0) {
- for (z2 = z; *z2; len++) {
- SQLITE_SKIP_UTF8(z2);
- }
- }
+ if (p1 < 0)
+ len = count_chars(z, sqlite3_value_bytes(argv[0]));
}
#ifdef SQLITE_SUBSTR_COMPATIBILITY
/* If SUBSTR_COMPATIBILITY is defined then substr(X,0,N) work the same as
@@ -388,12 +405,21 @@ substrFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
}
assert(p1 >= 0 && p2 >= 0);
if (p0type != SQLITE_BLOB) {
- while (*z && p1) {
+ /*
+ * In the code below 'cnt' and 'n_chars' is
+ * used because '\0' is not supposed to be
+ * end-of-string symbol.
+ */
+ int n_chars = count_chars(z, sqlite3_value_bytes(argv[0]));
+ int cnt = 0;
+ while (cnt < n_chars && p1) {
SQLITE_SKIP_UTF8(z);
+ cnt++;
p1--;
}
- for (z2 = z; *z2 && p2; p2--) {
+ for (z2 = z; cnt < n_chars && p2; p2--) {
SQLITE_SKIP_UTF8(z2);
+ cnt++;
}
sqlite3_result_text64(context, (char *)z, z2 - z,
SQLITE_TRANSIENT);
@@ -620,8 +646,14 @@ enum pattern_match_status {
* This routine is usually quick, but can be N**2 in the worst
* case.
*
+ * 'pattern_end' and 'string_end' params are used to determine
+ * the end of strings, because '\0' is not supposed to be
+ * end-of-string signal.
+ *
* @param pattern String containing comparison pattern.
* @param string String being compared.
+ * @param pattern_end Ptr to pattern last symbol.
+ * @param string_end Ptr to string last symbol.
* @param is_like_ci true if LIKE is case insensitive.
* @param match_other The escape char for LIKE.
*
@@ -630,6 +662,8 @@ enum pattern_match_status {
static int
sql_utf8_pattern_compare(const char *pattern,
const char *string,
+ const char *pattern_end,
+ const char *string_end,
const int is_like_ci,
UChar32 match_other)
{
@@ -637,8 +671,6 @@ sql_utf8_pattern_compare(const char *pattern,
UChar32 c, c2;
/* One past the last escaped input char. */
const char *zEscaped = 0;
- const char *pattern_end = pattern + strlen(pattern);
- const char *string_end = string + strlen(string);
UErrorCode status = U_ZERO_ERROR;
while (pattern < pattern_end) {
@@ -721,6 +753,8 @@ sql_utf8_pattern_compare(const char *pattern,
}
bMatch = sql_utf8_pattern_compare(pattern,
string,
+ pattern_end,
+ string_end,
is_like_ci,
match_other);
if (bMatch != NO_MATCH)
@@ -768,7 +802,9 @@ sql_utf8_pattern_compare(const char *pattern,
int
sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
{
- return sql_utf8_pattern_compare(zPattern, zStr, 0, esc);
+ return sql_utf8_pattern_compare(zPattern, zStr,
+ zPattern + strlen(zPattern),
+ zStr + strlen(zStr), 0, esc);
}
/**
@@ -778,7 +814,9 @@ sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
int
sql_strlike_ci(const char *zPattern, const char *zStr, unsigned int esc)
{
- return sql_utf8_pattern_compare(zPattern, zStr, 1, esc);
+ return sql_utf8_pattern_compare(zPattern, zStr,
+ zPattern + strlen(zPattern),
+ zStr + strlen(zStr), 1, esc);
}
/**
@@ -802,7 +840,6 @@ int sqlite3_like_count = 0;
static void
likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv)
{
- const char *zA, *zB;
u32 escape = SQL_END_OF_STRING;
int nPat;
sqlite3 *db = sqlite3_context_db_handle(context);
@@ -818,8 +855,10 @@ likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv)
return;
}
#endif
- zB = (const char *) sqlite3_value_text(argv[0]);
- zA = (const char *) sqlite3_value_text(argv[1]);
+ const char *zB = (const char *) sqlite3_value_text(argv[0]);
+ const char *zA = (const char *) sqlite3_value_text(argv[1]);
+ const char *zB_end = zB + sqlite3_value_bytes(argv[0]);
+ const char *zA_end = zA + sqlite3_value_bytes(argv[1]);
/*
* Limit the length of the LIKE pattern to avoid problems
@@ -860,7 +899,8 @@ likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv)
sqlite3_like_count++;
#endif
int res;
- res = sql_utf8_pattern_compare(zB, zA, is_like_ci, escape);
+ res = sql_utf8_pattern_compare(zB, zA, zB_end, zA_end,
+ is_like_ci, escape);
if (res == INVALID_PATTERN) {
const char *const err_msg =
"LIKE pattern can only contain UTF-8 characters";
@@ -1135,12 +1175,12 @@ replaceFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
|| sqlite3_context_db_handle(context)->mallocFailed);
return;
}
- if (zPattern[0] == 0) {
+ nPattern = sqlite3_value_bytes(argv[1]);
+ if (nPattern == 0) {
assert(sqlite3_value_type(argv[1]) != SQLITE_NULL);
sqlite3_result_value(context, argv[0]);
return;
}
- nPattern = sqlite3_value_bytes(argv[1]);
assert(zPattern == sqlite3_value_text(argv[1])); /* No encoding change */
zRep = sqlite3_value_text(argv[2]);
if (zRep == 0)
@@ -1595,8 +1635,8 @@ groupConcatFinalize(sqlite3_context * context)
sqlite3_result_error_nomem(context);
} else {
sqlite3_result_text(context,
- sqlite3StrAccumFinish(pAccum), -1,
- sqlite3_free);
+ sqlite3StrAccumFinish(pAccum),
+ pAccum->nChar, sqlite3_free);
}
}
}
diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
index f8dae7920..4ff48d27d 100644
--- a/src/box/sql/vdbeInt.h
+++ b/src/box/sql/vdbeInt.h
@@ -192,7 +192,7 @@ struct Mem {
u32 flags; /* Some combination of MEM_Null, MEM_Str, MEM_Dyn, etc. */
/** Subtype for this value. */
enum sql_subtype subtype;
- int n; /* Number of characters in string value, excluding '\0' */
+ int n; /* size (in bytes) of string value, excluding trailing '\0' */
char *z; /* String or BLOB value */
/* ShallowCopy only needs to copy the information above */
char *zMalloc; /* Space to hold MEM_Str or MEM_Blob if szMalloc>0 */
diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
index b7de1d955..8c712bd5e 100755
--- a/test/sql-tap/func.test.lua
+++ b/test/sql-tap/func.test.lua
@@ -1,6 +1,6 @@
#!/usr/bin/env tarantool
test = require("sqltester")
-test:plan(14547)
+test:plan(14586)
--!./tcltestrunner.lua
-- 2001 September 15
@@ -2664,4 +2664,222 @@ test:do_execsql_test(
[[VALUES (LENGTH(RANDOMBLOB(0)))]],
{""})
+-- gh-3542
+-- In SQL '\0' is NOT a end-of-string signal. Tests below ensures
+-- that all functions which work with strings behave this way.
+
+-- LENGTH
+test:do_execsql_test(
+ "func-37",
+ "SELECT LENGTH(CHAR(00));",
+ {1})
+
+test:do_execsql_test(
+ "func-38",
+ "SELECT LENGTH(CHAR(00, 65));",
+ {2})
+
+test:do_execsql_test(
+ "func-39",
+ "SELECT LENGTH(CHAR(00, 65, 00));",
+ {3})
+
+test:do_execsql_test(
+ "func-40",
+ "SELECT LENGTH(CHAR(00, 65, 00, 65));",
+ {4})
+
+test:do_execsql_test(
+ "func-41",
+ "SELECT LENGTH(CHAR(65, 00, 65, 00, 65));",
+ {5})
+
+test:do_execsql_test(
+ "func-42",
+ "SELECT LENGTH('ሴ' || CHAR(00) || 'ሴ');",
+ {3})
+
+-- LIKE (exact match)
+test:do_execsql_test(
+ "func-43",
+ "SELECT CHAR(00) LIKE CHAR(00);",
+ {1})
+
+test:do_execsql_test(
+ "func-44",
+ "SELECT CHAR(00) LIKE CHAR(65);",
+ {0})
+
+test:do_execsql_test(
+ "func-45",
+ "SELECT CHAR(00,65) LIKE CHAR(00,65);",
+ {1})
+
+test:do_execsql_test(
+ "func-46",
+ "SELECT CHAR(00,65) LIKE CHAR(00,66);",
+ {0})
+
+test:do_execsql_test(
+ "func-47",
+ "SELECT CHAR(00,65, 00) LIKE CHAR(00,65,00);",
+ {1})
+
+test:do_execsql_test(
+ "func-48",
+ "SELECT CHAR(00,65,00) LIKE CHAR(00,65);",
+ {0})
+
+test:do_execsql_test(
+ "func-49",
+ "SELECT CHAR(65,00,65,00,65) LIKE CHAR(65,00,65,00,65);",
+ {1})
+
+test:do_execsql_test(
+ "func-50",
+ "SELECT CHAR(65,00,65,00,65) LIKE CHAR(65,00,65,00,66);",
+ {0})
+
+-- LIKE ('%' and '_')
+test:do_execsql_test(
+ "func-51",
+ "SELECT CHAR(00) LIKE '_';",
+ {1})
+
+test:do_execsql_test(
+ "func-52",
+ "SELECT CHAR(00) LIKE '__';",
+ {0})
+
+test:do_execsql_test(
+ "func-53",
+ "SELECT CHAR(00,65) LIKE '__';",
+ {1})
+
+test:do_execsql_test(
+ "func-54",
+ "SELECT CHAR(00,65) LIKE '_A';",
+ {1})
+
+test:do_execsql_test(
+ "func-55",
+ "SELECT CHAR(00,65,00) LIKE '%';",
+ {1})
+
+test:do_execsql_test(
+ "func-56",
+ "SELECT CHAR(00,65,00) LIKE '%A';",
+ {0})
+
+test:do_execsql_test(
+ "func-57",
+ "SELECT CHAR(65,00,65,00,65) LIKE '%A';",
+ {1})
+
+test:do_execsql_test(
+ "func-58",
+ "SELECT CHAR(65,00,65,00,65) LIKE '%B';",
+ {0})
+
+-- LIKE (ESCAPE symbols)
+test:do_execsql_test(
+ "func-59",
+ "SELECT CHAR(00) || '_' LIKE '_#_' ESCAPE '#';",
+ {1})
+
+test:do_execsql_test(
+ "func-60",
+ "SELECT CHAR(00) || '_' LIKE '_#%' ESCAPE '#';",
+ {0})
+
+-- REPLACE
+test:do_execsql_test(
+ "func-62",
+ "SELECT REPLACE(CHAR(00,65,00,65), CHAR(00), CHAR(65)) LIKE 'AAAA';",
+ {1})
+
+test:do_execsql_test(
+ "func-63",
+ "SELECT REPLACE(CHAR(00,65,00,65), CHAR(65), CHAR(00)) \
+ LIKE CHAR(00,00,00,00);",
+ {1})
+
+-- SUBSTR
+test:do_execsql_test(
+ "func-64",
+ "SELECT SUBSTR(CHAR(65,00,66,67), 3, 2) LIKE CHAR(66, 67);",
+ {1})
+
+test:do_execsql_test(
+ "func-65",
+ "SELECT SUBSTR(CHAR(00,00,00,65), 1, 4) LIKE CHAR(00,00,00,65);",
+ {1})
+
+-- UPPER
+test:do_execsql_test(
+ "func-66",
+ "SELECT UPPER(CHAR(00,97,00,98,00)) LIKE CHAR(00,65,00,66,00);",
+ {1})
+
+-- LOWER
+test:do_execsql_test(
+ "func-67",
+ "SELECT LOWER(CHAR(00,65,00,66,00)) LIKE CHAR(00,97,00,98,00);",
+ {1})
+
+-- HEX
+test:do_execsql_test(
+ "func-68",
+ "SELECT HEX(CHAR(00,65,00,65,00)) LIKE ('0041004100');",
+ {1})
+
+-- TRIM
+test:do_execsql_test(
+ "func-69",
+ "SELECT TRIM(CHAR(32,00,32,00,32)) LIKE (CHAR(00,32,00));",
+ {1})
+
+-- LTRIM
+test:do_execsql_test(
+ "func-70",
+ "SELECT LTRIM(CHAR(32,00,32,00,32)) LIKE (CHAR(00,32,00,32));",
+ {1})
+
+-- RTRIM
+test:do_execsql_test(
+ "func-71",
+ "SELECT RTRIM(CHAR(32,00,32,00,32)) LIKE (CHAR(32,00,32,00));",
+ {1})
+
+-- GROUP_CONCAT
+test:do_execsql_test(
+ "func-72",
+ "CREATE TABLE t100(a INT PRIMARY KEY, b CHAR); \
+ INSERT INTO t100 VALUES (1, CHAR(00)); \
+ INSERT INTO t100 VALUES (2, CHAR(65, 00, 65)); \
+ INSERT INTO t100 VALUES (3, CHAR(00)); \
+ SELECT GROUP_CONCAT(b, '') FROM t100;",
+ {string.char(00,65,00,65,00)})
+
+-- INSTR
+test:do_execsql_test(
+ "func-73",
+ "SELECT INSTR(CHAR(00,65,00,66,00), CHAR(65));",
+ {2})
+
+test:do_execsql_test(
+ "func-74",
+ "SELECT INSTR(CHAR(00,65,00,66,00), CHAR(66));",
+ {4})
+
+test:do_execsql_test(
+ "func-75",
+ "SELECT INSTR(CHAR(00,65,00,66,00), CHAR(00));",
+ {1})
+
+test:do_execsql_test(
+ "func-76",
+ "SELECT INSTR(CHAR(65,66), CHAR(00));",
+ {0})
+
test:finish_test()
--