<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body text="#000000" bgcolor="#FFFFFF">
<pre>Fixes LIKE and LENGTH functions. '\0' now treated as
a usual symbol. Strings with '\0' are now processed
entirely. Consider examples:
LENGTH(CHAR(65,00,65)) == 3
LIKE(CHAR(65,00,65), CHAR(65,00,66)) == False
Closes #3542
---
Branch: <a class="moz-txt-link-freetext" href="https://github.com/tarantool/tarantool/tree/sudobobo/gh-3235-repl-Table-w-space">https://github.com/tarantool/tarantool/tree/sudobobo/gh-3542-LIKE/LEN-null-term</a>
Issue: <a class="moz-txt-link-freetext" href="https://github.com/tarantool/tarantool/issues/3235">https://github.com/tarantool/tarantool/issues/3542</a>
src/box/sql/func.c | 32 +++++---
src/box/sql/vdbeInt.h | 2 +-
test/sql-tap/gh-3542-like-len-null-term.test.lua | 97 ++++++++++++++++++++++++
3 files changed, 121 insertions(+), 10 deletions(-)
create mode 100755 test/sql-tap/gh-3542-like-len-null-term.test.lua
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index e46b162d9..a820dc542 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -150,9 +150,13 @@ lengthFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
const unsigned char *z = sqlite3_value_text(argv[0]);
if (z == 0)
return;
+
len = 0;
- while (*z) {
+ size_t byte_len = sqlite3_value_bytes(argv[0]);
+ const unsigned char *prev_z;
+ for (size_t cnt = 0; cnt < byte_len; cnt += (z - prev_z)) {
len++;
+ prev_z = z;
SQLITE_SKIP_UTF8(z);
}
sqlite3_result_int(context, len);
@@ -622,6 +626,8 @@ enum pattern_match_status {
*
* @param pattern String containing comparison pattern.
* @param string String being compared.
+ * @param pattern_end Ptr to pattern last symbol.
+ * @param string_end Ptr to string last symbol.
* @param is_like_ci true if LIKE is case insensitive.
* @param match_other The escape char for LIKE.
*
@@ -630,6 +636,8 @@ enum pattern_match_status {
static int
sql_utf8_pattern_compare(const char *pattern,
const char *string,
+ const char *pattern_end,
+ const char *string_end,
const int is_like_ci,
UChar32 match_other)
{
@@ -637,8 +645,6 @@ sql_utf8_pattern_compare(const char *pattern,
UChar32 c, c2;
/* One past the last escaped input char. */
const char *zEscaped = 0;
- const char *pattern_end = pattern + strlen(pattern);
- const char *string_end = string + strlen(string);
UErrorCode status = U_ZERO_ERROR;
while (pattern < pattern_end) {
@@ -721,6 +727,8 @@ sql_utf8_pattern_compare(const char *pattern,
}
bMatch = sql_utf8_pattern_compare(pattern,
string,
+ pattern_end,
+ string_end,
is_like_ci,
match_other);
if (bMatch != NO_MATCH)
@@ -768,7 +776,9 @@ sql_utf8_pattern_compare(const char *pattern,
int
sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
{
- return sql_utf8_pattern_compare(zPattern, zStr, 0, esc);
+ return sql_utf8_pattern_compare(zPattern, zStr,
+ zPattern + strlen(zPattern),
+ zStr + strlen(zStr), 0, esc);
}
/**
@@ -778,7 +788,9 @@ sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
int
sql_strlike_ci(const char *zPattern, const char *zStr, unsigned int esc)
{
- return sql_utf8_pattern_compare(zPattern, zStr, 1, esc);
+ return sql_utf8_pattern_compare(zPattern, zStr,
+ zPattern + strlen(zPattern),
+ zStr + strlen(zStr), 1, esc);
}
/**
@@ -802,7 +814,6 @@ int sqlite3_like_count = 0;
static void
likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv)
{
- const char *zA, *zB;
u32 escape = SQL_END_OF_STRING;
int nPat;
sqlite3 *db = sqlite3_context_db_handle(context);
@@ -818,8 +829,10 @@ likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv)
return;
}
#endif
- zB = (const char *) sqlite3_value_text(argv[0]);
- zA = (const char *) sqlite3_value_text(argv[1]);
+ const char *zB = (const char *) sqlite3_value_text(argv[0]);
+ const char *zA = (const char *) sqlite3_value_text(argv[1]);
+ const char *zB_end = zB + sqlite3_value_bytes(argv[0]);
+ const char *zA_end = zA + sqlite3_value_bytes(argv[1]);
/*
* Limit the length of the LIKE pattern to avoid problems
@@ -860,7 +873,8 @@ likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv)
sqlite3_like_count++;
#endif
int res;
- res = sql_utf8_pattern_compare(zB, zA, is_like_ci, escape);
+ res = sql_utf8_pattern_compare(zB, zA, zB_end, zA_end,
+ is_like_ci, escape);
if (res == INVALID_PATTERN) {
const char *const err_msg =
"LIKE pattern can only contain UTF-8 characters";
diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
index f8dae7920..4ff48d27d 100644
--- a/src/box/sql/vdbeInt.h
+++ b/src/box/sql/vdbeInt.h
@@ -192,7 +192,7 @@ struct Mem {
u32 flags; /* Some combination of MEM_Null, MEM_Str, MEM_Dyn, etc. */
/** Subtype for this value. */
enum sql_subtype subtype;
- int n; /* Number of characters in string value, excluding '\0' */
+ int n; /* size (in bytes) of string value, excluding trailing '\0' */
char *z; /* String or BLOB value */
/* ShallowCopy only needs to copy the information above */
char *zMalloc; /* Space to hold MEM_Str or MEM_Blob if szMalloc>0 */
diff --git a/test/sql-tap/gh-3542-like-len-null-term.test.lua b/test/sql-tap/gh-3542-like-len-null-term.test.lua
new file mode 100755
index 000000000..e9ea9ea30
--- /dev/null
+++ b/test/sql-tap/gh-3542-like-len-null-term.test.lua
@@ -0,0 +1,97 @@
+#!/usr/bin/env tarantool
+test = require("sqltester")
+test:plan(14)
+
+-- gh-3542 - LIKE/LENGTH do not scan if '\0' is encountered.
+-- This test ensures that LIKE and LENGTH functions does NOT stop
+-- string processing if '\0' is encountered.
+
+test_cases = {
+ -- Simple cases with '\0' and ASCII symbols.
+ -- <like-len-null-term-1>
+ {
+ "VALUES(LENGTH(CHAR(00)));", 1
+ },
+
+ -- <like-len-null-term-2>
+ {
+ "VALUES(LENGTH(CHAR(00, 65)));", 2
+ },
+
+ -- <like-len-null-term-3>
+ {
+ "VALUES(LENGTH(CHAR(65, 65, 00, 65, 00, 65)));", 6
+ },
+
+ -- <like-len-null-term-4>
+ {
+ "SELECT CHAR(65, 66, 00, 65) LIKE CHAR(65, 66, 00, 66);", 0
+ },
+
+ -- <like-len-null-term-5>
+ {
+ "SELECT CHAR(65, 66, 00, 65) LIKE CHAR(65, 66, 00, 65);", 1
+ },
+
+ -- Cases with UTF symbols.
+ -- <like-len-null-term-6>
+ {
+ "VALUES(LENGTH('¢' || CHAR(00) || 'ሴ'));", 3
+ },
+
+ -- <like-len-null-term-7>
+ {
+ "SELECT CHAR(65, 66, 00, 65) LIKE CHAR(65, 66, 00, 65) || '%';", 1
+ },
+
+ -- <like-len-null-term-8>
+ {
+ "SELECT CHAR(65, 66, 00, 65, 00) LIKE CHAR(65, 66, 00, 65) || '%';", 1
+ },
+
+ -- <like-len-null-term-9>
+ {
+ "SELECT CHAR(65, 66, 00, 65, 00) LIKE CHAR(65, 66, 00, 65) || '_';", 1
+ },
+
+ -- <like-len-null-term-10>
+ {
+ "SELECT CHAR(65, 66, 00, 65, 00) LIKE CHAR(00) || '%';", 0
+ },
+
+ -- <like-len-null-term-11>
+ {
+ "SELECT CHAR(65, 66, 00, 65, 00) LIKE '%' || CHAR(00) || '%';", 1
+ },
+
+ -- With ESCAPE symbols.
+ -- <like-len-null-term-12>
+ {
+ "SELECT CHAR(65, 66, 00, 65, 00) || '_' || CHAR(00) \
+ LIKE '%' || CHAR(00) || '#_%' \
+ ESCAPE '#';", 1
+ },
+
+ -- <like-len-null-term-13>
+ {
+ "SELECT CHAR(65, 66, 00, 65, 00) || 'a' \
+ LIKE '%' || CHAR(00) || '_' \
+ ESCAPE '#';", 1
+ },
+
+ -- <like-len-null-term-14>
+ {
+ "SELECT CHAR(65, 66, 00, 65, 00) || 'a' \
+ LIKE '%' || CHAR(00) || '#_' \
+ ESCAPE '#';", 0
+ },
+}
+
+for i, case in ipairs(test_cases) do
+ local name = "like-len-null-term-" .. tostring(i)
+ local code = case[1]
+ local res = case[2]
+ test:do_execsql_test(name, code, { res })
+end
+
+test:finish_test()
--
2.14.3 (Apple Git-98)
</pre>
</body>
</html>