Fixes LIKE and LENGTH functions. '\0' now treated as a usual symbol. Strings with '\0' are now processed entirely. Consider examples: LENGTH(CHAR(65,00,65)) == 3 LIKE(CHAR(65,00,65), CHAR(65,00,66)) == False Closes #3542 --- Branch: https://github.com/tarantool/tarantool/tree/sudobobo/gh-3542-LIKE/LEN-null-term Issue: https://github.com/tarantool/tarantool/issues/3542 src/box/sql/func.c | 32 +++++--- src/box/sql/vdbeInt.h | 2 +- test/sql-tap/gh-3542-like-len-null-term.test.lua | 97 ++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 10 deletions(-) create mode 100755 test/sql-tap/gh-3542-like-len-null-term.test.lua diff --git a/src/box/sql/func.c b/src/box/sql/func.c index e46b162d9..a820dc542 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -150,9 +150,13 @@ lengthFunc(sqlite3_context * context, int argc, sqlite3_value ** argv) const unsigned char *z = sqlite3_value_text(argv[0]); if (z == 0) return; + len = 0; - while (*z) { + size_t byte_len = sqlite3_value_bytes(argv[0]); + const unsigned char *prev_z; + for (size_t cnt = 0; cnt < byte_len; cnt += (z - prev_z)) { len++; + prev_z = z; SQLITE_SKIP_UTF8(z); } sqlite3_result_int(context, len); @@ -622,6 +626,8 @@ enum pattern_match_status { * * @param pattern String containing comparison pattern. * @param string String being compared. + * @param pattern_end Ptr to pattern last symbol. + * @param string_end Ptr to string last symbol. * @param is_like_ci true if LIKE is case insensitive. * @param match_other The escape char for LIKE. * @@ -630,6 +636,8 @@ enum pattern_match_status { static int sql_utf8_pattern_compare(const char *pattern, const char *string, + const char *pattern_end, + const char *string_end, const int is_like_ci, UChar32 match_other) { @@ -637,8 +645,6 @@ sql_utf8_pattern_compare(const char *pattern, UChar32 c, c2; /* One past the last escaped input char. */ const char *zEscaped = 0; - const char *pattern_end = pattern + strlen(pattern); - const char *string_end = string + strlen(string); UErrorCode status = U_ZERO_ERROR; while (pattern < pattern_end) { @@ -721,6 +727,8 @@ sql_utf8_pattern_compare(const char *pattern, } bMatch = sql_utf8_pattern_compare(pattern, string, + pattern_end, + string_end, is_like_ci, match_other); if (bMatch != NO_MATCH) @@ -768,7 +776,9 @@ sql_utf8_pattern_compare(const char *pattern, int sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc) { - return sql_utf8_pattern_compare(zPattern, zStr, 0, esc); + return sql_utf8_pattern_compare(zPattern, zStr, + zPattern + strlen(zPattern), + zStr + strlen(zStr), 0, esc); } /** @@ -778,7 +788,9 @@ sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc) int sql_strlike_ci(const char *zPattern, const char *zStr, unsigned int esc) { - return sql_utf8_pattern_compare(zPattern, zStr, 1, esc); + return sql_utf8_pattern_compare(zPattern, zStr, + zPattern + strlen(zPattern), + zStr + strlen(zStr), 1, esc); } /** @@ -802,7 +814,6 @@ int sqlite3_like_count = 0; static void likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv) { - const char *zA, *zB; u32 escape = SQL_END_OF_STRING; int nPat; sqlite3 *db = sqlite3_context_db_handle(context); @@ -818,8 +829,10 @@ likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv) return; } #endif - zB = (const char *) sqlite3_value_text(argv[0]); - zA = (const char *) sqlite3_value_text(argv[1]); + const char *zB = (const char *) sqlite3_value_text(argv[0]); + const char *zA = (const char *) sqlite3_value_text(argv[1]); + const char *zB_end = zB + sqlite3_value_bytes(argv[0]); + const char *zA_end = zA + sqlite3_value_bytes(argv[1]); /* * Limit the length of the LIKE pattern to avoid problems @@ -860,7 +873,8 @@ likeFunc(sqlite3_context *context, int argc, sqlite3_value **argv) sqlite3_like_count++; #endif int res; - res = sql_utf8_pattern_compare(zB, zA, is_like_ci, escape); + res = sql_utf8_pattern_compare(zB, zA, zB_end, zA_end, + is_like_ci, escape); if (res == INVALID_PATTERN) { const char *const err_msg = "LIKE pattern can only contain UTF-8 characters"; diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h index f8dae7920..4ff48d27d 100644 --- a/src/box/sql/vdbeInt.h +++ b/src/box/sql/vdbeInt.h @@ -192,7 +192,7 @@ struct Mem { u32 flags; /* Some combination of MEM_Null, MEM_Str, MEM_Dyn, etc. */ /** Subtype for this value. */ enum sql_subtype subtype; - int n; /* Number of characters in string value, excluding '\0' */ + int n; /* size (in bytes) of string value, excluding trailing '\0' */ char *z; /* String or BLOB value */ /* ShallowCopy only needs to copy the information above */ char *zMalloc; /* Space to hold MEM_Str or MEM_Blob if szMalloc>0 */ diff --git a/test/sql-tap/gh-3542-like-len-null-term.test.lua b/test/sql-tap/gh-3542-like-len-null-term.test.lua new file mode 100755 index 000000000..e9ea9ea30 --- /dev/null +++ b/test/sql-tap/gh-3542-like-len-null-term.test.lua @@ -0,0 +1,97 @@ +#!/usr/bin/env tarantool +test = require("sqltester") +test:plan(14) + +-- gh-3542 - LIKE/LENGTH do not scan if '\0' is encountered. +-- This test ensures that LIKE and LENGTH functions does NOT stop +-- string processing if '\0' is encountered. + +test_cases = { + -- Simple cases with '\0' and ASCII symbols. + -- <like-len-null-term-1> + { + "VALUES(LENGTH(CHAR(00)));", 1 + }, + + -- <like-len-null-term-2> + { + "VALUES(LENGTH(CHAR(00, 65)));", 2 + }, + + -- <like-len-null-term-3> + { + "VALUES(LENGTH(CHAR(65, 65, 00, 65, 00, 65)));", 6 + }, + + -- <like-len-null-term-4> + { + "SELECT CHAR(65, 66, 00, 65) LIKE CHAR(65, 66, 00, 66);", 0 + }, + + -- <like-len-null-term-5> + { + "SELECT CHAR(65, 66, 00, 65) LIKE CHAR(65, 66, 00, 65);", 1 + }, + + -- Cases with UTF symbols. + -- <like-len-null-term-6> + { + "VALUES(LENGTH('¢' || CHAR(00) || 'ሴ'));", 3 + }, + + -- <like-len-null-term-7> + { + "SELECT CHAR(65, 66, 00, 65) LIKE CHAR(65, 66, 00, 65) || '%';", 1 + }, + + -- <like-len-null-term-8> + { + "SELECT CHAR(65, 66, 00, 65, 00) LIKE CHAR(65, 66, 00, 65) || '%';", 1 + }, + + -- <like-len-null-term-9> + { + "SELECT CHAR(65, 66, 00, 65, 00) LIKE CHAR(65, 66, 00, 65) || '_';", 1 + }, + + -- <like-len-null-term-10> + { + "SELECT CHAR(65, 66, 00, 65, 00) LIKE CHAR(00) || '%';", 0 + }, + + -- <like-len-null-term-11> + { + "SELECT CHAR(65, 66, 00, 65, 00) LIKE '%' || CHAR(00) || '%';", 1 + }, + + -- With ESCAPE symbols. + -- <like-len-null-term-12> + { + "SELECT CHAR(65, 66, 00, 65, 00) || '_' || CHAR(00) \ + LIKE '%' || CHAR(00) || '#_%' \ + ESCAPE '#';", 1 + }, + + -- <like-len-null-term-13> + { + "SELECT CHAR(65, 66, 00, 65, 00) || 'a' \ + LIKE '%' || CHAR(00) || '_' \ + ESCAPE '#';", 1 + }, + + -- <like-len-null-term-14> + { + "SELECT CHAR(65, 66, 00, 65, 00) || 'a' \ + LIKE '%' || CHAR(00) || '#_' \ + ESCAPE '#';", 0 + }, +} + +for i, case in ipairs(test_cases) do + local name = "like-len-null-term-" .. tostring(i) + local code = case[1] + local res = case[2] + test:do_execsql_test(name, code, { res }) +end + +test:finish_test() -- 2.14.3 (Apple Git-98)