From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id DC5DF276FC for ; Wed, 1 Aug 2018 06:51:24 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id fDtCwn6nU4IT for ; Wed, 1 Aug 2018 06:51:24 -0400 (EDT) Received: from mail-lf1-f65.google.com (mail-lf1-f65.google.com [209.85.167.65]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 257D4276DF for ; Wed, 1 Aug 2018 06:51:24 -0400 (EDT) Received: by mail-lf1-f65.google.com with SMTP id f18-v6so12994012lfc.2 for ; Wed, 01 Aug 2018 03:51:23 -0700 (PDT) MIME-Version: 1.0 References: <20180718024314.be245cmsgklxuvnk@tkn_work_nb> <20180727130601.b2oby7dleapd5upg@tkn_work_nb> <20180727202219.ikwbax7tysfnmgr4@tkn_work_nb> <20180731134705.3pij4hwyyirhiwr7@tkn_work_nb> In-Reply-To: From: Nikita Tatunov Date: Wed, 1 Aug 2018 13:51:10 +0300 Message-ID: Subject: [tarantool-patches] Re: [PATCH] sql: LIKE & GLOB pattern comparison issue Content-Type: multipart/alternative; boundary="000000000000ab5f3a05725d790a" Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: Alexander Turenko Cc: avkhatskevich@tarantool.org, tarantool-patches@freelists.org --000000000000ab5f3a05725d790a Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable diff --git a/src/box/sql/func.c b/src/box/sql/func.c index c06e3bd..7f93ef6 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -617,13 +617,17 @@ struct compareInfo { u8 noCase; /* true to ignore case differences */ }; -/* - * For LIKE and GLOB matching on EBCDIC machines, assume that every - * character is exactly one byte in size. Also, provde the Utf8Read() - * macro for fast reading of the next character in the common case where - * the next character is ASCII. +/** + * Providing there are symbols in string s this + * macro returns UTF-8 code of character and + * promotes pointer to the next symbol in the string. + * Otherwise return code is SQL_END_OF_STRING. */ -#define Utf8Read(s, e) ucnv_getNextUChar(pUtf8conv, &s, e, &status) +#define Utf8Read(s, e) (((s) < (e)) ? \ + ucnv_getNextUChar(pUtf8conv, &(s), (e), &(status)) : 0) + +#define SQL_END_OF_STRING 0 +#define SQL_INVALID_UTF8_SYMBOL 0xfffd static const struct compareInfo globInfo =3D { '*', '?', '[', 0 }; @@ -638,19 +642,16 @@ static const struct compareInfo likeInfoNorm =3D { '%= ', '_', 0, 1 }; static const struct compareInfo likeInfoAlt =3D { '%', '_', 0, 0 }; /* - * Possible error returns from patternMatch() + * Possible error returns from sql_utf8_pattern_compare() */ #define SQLITE_MATCH 0 #define SQLITE_NOMATCH 1 #define SQLITE_NOWILDCARDMATCH 2 +#define SQL_PROHIBITED_PATTERN 3 -/* - * Compare two UTF-8 strings for equality where the first string is - * a GLOB or LIKE expression. Return values: - * - * SQLITE_MATCH: Match - * SQLITE_NOMATCH: No match - * SQLITE_NOWILDCARDMATCH: No match in spite of having * or % wildcards. +/** + * Compare two UTF-8 strings for equality where the first string + * is a GLOB or LIKE expression. * * Globbing rules: * @@ -663,92 +664,136 @@ static const struct compareInfo likeInfoAlt =3D { '%= ', '_', 0, 0 }; * * [^...] Matches one character not in the enclosed list. * - * With the [...] and [^...] matching, a ']' character can be included - * in the list by making it the first character after '[' or '^'. A - * range of characters can be specified using '-'. Example: - * "[a-z]" matches any single lower-case letter. To match a '-', make - * it the last character in the list. + * With the [...] and [^...] matching, a ']' character can be + * included in the list by making it the first character after + * '[' or '^'. A range of characters can be specified using '-'. + * Example: "[a-z]" matches any single lower-case letter. + * To match a '-', make it the last character in the list. * * Like matching rules: * - * '%' Matches any sequence of zero or more characters + * '%' Matches any sequence of zero or more characters. * - ** '_' Matches any one character + ** '_' Matches any one character. * * Ec Where E is the "esc" character and c is any other - * character, including '%', '_', and esc, match exactly c. + * character, including '%', '_', and esc, match + * exactly c. * * The comments within this routine usually assume glob matching. * - * This routine is usually quick, but can be N**2 in the worst case. + * This routine is usually quick, but can be N**2 in the worst + * case. + * + * @param pattern String containing comparison pattern. + * @param string String being compared. + * @param compareInfo Information about how to compare. + * @param matchOther The escape char (LIKE) or '[' (GLOB). + * + * @retval SQLITE_MATCH: Match. + * SQLITE_NOMATCH: No match. + * SQLITE_NOWILDCARDMATCH: No match in spite of having * + * or % wildcards. + * SQL_PROHIBITED_PATTERN: Pattern contains invalid + * symbol. */ static int -patternCompare(const char * pattern, /* The glob pattern */ - const char * string, /* The string to compare against the glob */ - const struct compareInfo *pInfo, /* Information about how to do the compare */ - UChar32 matchOther /* The escape char (LIKE) or '[' (GLOB) */ - ) +sql_utf8_pattern_compare(const char * pattern, + const char * string, + const struct compareInfo *pInfo, + UChar32 matchOther) { - UChar32 c, c2; /* Next pattern and input string chars */ - UChar32 matchOne =3D pInfo->matchOne; /* "?" or "_" */ - UChar32 matchAll =3D pInfo->matchAll; /* "*" or "%" */ - UChar32 noCase =3D pInfo->noCase; /* True if uppercase=3D=3Dlowercase */ - const char *zEscaped =3D 0; /* One past the last escaped input char */ + /* Next pattern and input string chars */ + UChar32 c, c2; + /* "?" or "_" */ + UChar32 matchOne =3D pInfo->matchOne; + /* "*" or "%" */ + UChar32 matchAll =3D pInfo->matchAll; + /* True if uppercase=3D=3Dlowercase */ + UChar32 noCase =3D pInfo->noCase; + /* One past the last escaped input char */ + const char *zEscaped =3D 0; const char * pattern_end =3D pattern + strlen(pattern); const char * string_end =3D string + strlen(string); UErrorCode status =3D U_ZERO_ERROR; - while (pattern < pattern_end){ - c =3D Utf8Read(pattern, pattern_end); + while ((c =3D Utf8Read(pattern, pattern_end)) !=3D SQL_END_OF_STRING) { + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; if (c =3D=3D matchAll) { /* Match "*" */ - /* Skip over multiple "*" characters in the pattern. If there - * are also "?" characters, skip those as well, but consume a - * single character of the input string for each "?" skipped + /* Skip over multiple "*" characters in + * the pattern. If there are also "?" + * characters, skip those as well, but + * consume a single character of the + * input string for each "?" skipped. */ - while (pattern < pattern_end){ - c =3D Utf8Read(pattern, pattern_end); + while ((c =3D Utf8Read(pattern, pattern_end)) !=3D + SQL_END_OF_STRING) { + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; if (c !=3D matchAll && c !=3D matchOne) break; - if (c =3D=3D matchOne - && Utf8Read(string, string_end) =3D=3D 0) { + if (c =3D=3D matchOne && + (c2 =3D Utf8Read(string, string_end)) =3D=3D + SQL_END_OF_STRING) return SQLITE_NOWILDCARDMATCH; - } + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; } - /* "*" at the end of the pattern matches */ - if (pattern =3D=3D pattern_end) + /* + * "*" at the end of the pattern matches. + */ + if (c =3D=3D SQL_END_OF_STRING) { + while ((c2 =3D Utf8Read(string, string_end)) !=3D + SQL_END_OF_STRING) + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; return SQLITE_MATCH; + } if (c =3D=3D matchOther) { if (pInfo->matchSet =3D=3D 0) { c =3D Utf8Read(pattern, pattern_end); - if (c =3D=3D 0) + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; + if (c =3D=3D SQL_END_OF_STRING) return SQLITE_NOWILDCARDMATCH; } else { - /* "[...]" immediately follows the "*". We have to do a slow - * recursive search in this case, but it is an unusual case. + /* "[...]" immediately + * follows the "*". We + * have to do a slow + * recursive search in + * this case, but it is + * an unusual case. */ - assert(matchOther < 0x80); /* '[' is a single-byte character */ + assert(matchOther < 0x80); while (string < string_end) { int bMatch =3D - patternCompare(&pattern[-1], - string, - pInfo, - matchOther); + sql_utf8_pattern_compare( + &pattern[-1], + string, + pInfo, + matchOther); if (bMatch !=3D SQLITE_NOMATCH) return bMatch; - Utf8Read(string, string_end); + c =3D Utf8Read(string, string_end); + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; } return SQLITE_NOWILDCARDMATCH; } } - /* At this point variable c contains the first character of the - * pattern string past the "*". Search in the input string for the - * first matching character and recursively continue the match from - * that point. + /* At this point variable c contains the + * first character of the pattern string + * past the "*". Search in the input + * string for the first matching + * character and recursively continue the + * match from that point. * - * For a case-insensitive search, set variable cx to be the same as - * c but in the other case and search the input string for either - * c or cx. + * For a case-insensitive search, set + * variable cx to be the same as c but in + * the other case and search the input + * string for either c or cx. */ int bMatch; @@ -756,14 +801,18 @@ patternCompare(const char * pattern, /* The glob pattern */ c =3D u_tolower(c); while (string < string_end){ /** - * This loop could have been implemented - * without if converting c2 to lower case - * (by holding c_upper and c_lower), however - * it is implemented this way because lower - * works better with German and Turkish - * languages. + * This loop could have been + * implemented without if + * converting c2 to lower case + * by holding c_upper and + * c_lower,however it is + * implemented this way because + * lower works better with German + * and Turkish languages. */ c2 =3D Utf8Read(string, string_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; if (!noCase) { if (c2 !=3D c) continue; @@ -771,9 +820,10 @@ patternCompare(const char * pattern, /* The glob pattern */ if (c2 !=3D c && u_tolower(c2) !=3D c) continue; } - bMatch =3D - patternCompare(pattern, string, - pInfo, matchOther); + bMatch =3D sql_utf8_pattern_compare(pattern, + string, + pInfo, + matchOther); if (bMatch !=3D SQLITE_NOMATCH) return bMatch; } @@ -782,7 +832,9 @@ patternCompare(const char * pattern, /* The glob pattern */ if (c =3D=3D matchOther) { if (pInfo->matchSet =3D=3D 0) { c =3D Utf8Read(pattern, pattern_end); - if (c =3D=3D 0) + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; + if (c =3D=3D SQL_END_OF_STRING) return SQLITE_NOMATCH; zEscaped =3D pattern; } else { @@ -790,23 +842,33 @@ patternCompare(const char * pattern, /* The glob pattern */ int seen =3D 0; int invert =3D 0; c =3D Utf8Read(string, string_end); + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; if (string =3D=3D string_end) return SQLITE_NOMATCH; c2 =3D Utf8Read(pattern, pattern_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; if (c2 =3D=3D '^') { invert =3D 1; c2 =3D Utf8Read(pattern, pattern_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; } if (c2 =3D=3D ']') { if (c =3D=3D ']') seen =3D 1; c2 =3D Utf8Read(pattern, pattern_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; } - while (c2 && c2 !=3D ']') { + while (c2 !=3D SQL_END_OF_STRING && c2 !=3D ']') { if (c2 =3D=3D '-' && pattern[0] !=3D ']' && pattern < pattern_end && prior_c > 0) { c2 =3D Utf8Read(pattern, pattern_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; if (c >=3D prior_c && c <=3D c2) seen =3D 1; prior_c =3D 0; @@ -817,29 +879,36 @@ patternCompare(const char * pattern, /* The glob pattern */ prior_c =3D c2; } c2 =3D Utf8Read(pattern, pattern_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; } - if (pattern =3D=3D pattern_end || (seen ^ invert) =3D=3D 0) { + if (pattern =3D=3D pattern_end || + (seen ^ invert) =3D=3D 0) { return SQLITE_NOMATCH; } continue; } } c2 =3D Utf8Read(string, string_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; if (c =3D=3D c2) continue; if (noCase){ /** - * Small optimisation. Reduce number of calls - * to u_tolower function. - * SQL standards suggest use to_upper for symbol - * normalisation. However, using to_lower allows to - * respect Turkish '=C4=B0' in default locale. + * Small optimisation. Reduce number of + * calls to u_tolower function. SQL + * standards suggest use to_upper for + * symbol normalisation. However, using + * to_lower allows to respect Turkish '=C4=B0' + * in default locale. */ if (u_tolower(c) =3D=3D c2 || c =3D=3D u_tolower(c2)) continue; } - if (c =3D=3D matchOne && pattern !=3D zEscaped && c2 !=3D 0) + if (c =3D=3D matchOne && pattern !=3D zEscaped && + c2 !=3D SQL_END_OF_STRING) continue; return SQLITE_NOMATCH; } @@ -853,8 +922,7 @@ patternCompare(const char * pattern, /* The glob pattern */ int sqlite3_strglob(const char *zGlobPattern, const char *zString) { - return patternCompare(zGlobPattern, zString, &globInfo, - '['); + return sql_utf8_pattern_compare(zGlobPattern, zString, &globInfo, '['); } /* @@ -864,7 +932,7 @@ sqlite3_strglob(const char *zGlobPattern, const char *zString) int sqlite3_strlike(const char *zPattern, const char *zStr, unsigned int esc) { - return patternCompare(zPattern, zStr, &likeInfoNorm, esc); + return sql_utf8_pattern_compare(zPattern, zStr, &likeInfoNorm, esc); } /* @@ -910,8 +978,9 @@ likeFunc(sqlite3_context * context, int argc, sqlite3_value ** argv) zB =3D (const char *) sqlite3_value_text(argv[0]); zA =3D (const char *) sqlite3_value_text(argv[1]); - /* Limit the length of the LIKE or GLOB pattern to avoid problems - * of deep recursion and N*N behavior in patternCompare(). + /* Limit the length of the LIKE or GLOB pattern to avoid + * problems of deep recursion and N*N behavior in + * sql_utf8_pattern_compare(). */ nPat =3D sqlite3_value_bytes(argv[0]); testcase(nPat =3D=3D db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]); @@ -947,7 +1016,12 @@ likeFunc(sqlite3_context * context, int argc, sqlite3_value ** argv) sqlite3_like_count++; #endif int res; - res =3D patternCompare(zB, zA, pInfo, escape); + res =3D sql_utf8_pattern_compare(zB, zA, pInfo, escape); + if (res =3D=3D SQL_PROHIBITED_PATTERN) { + sqlite3_result_error(context, "LIKE or GLOB pattern can only" + " contain UTF-8 characters", -1); + return; + } sqlite3_result_int(context, res =3D=3D SQLITE_MATCH); } diff --git a/test-run b/test-run index 77e9327..95562e9 160000 --- a/test-run +++ b/test-run @@ -1 +1 @@ -Subproject commit 77e93279210f8c5c1fd0ed03416fa19a184f0b6d +Subproject commit 95562e95401fef4e0b755ab0bb430974b5d1a29a diff --git a/test/sql-tap/e_expr.test.lua b/test/sql-tap/e_expr.test.lua index 13d3a96..9780d2c 100755 --- a/test/sql-tap/e_expr.test.lua +++ b/test/sql-tap/e_expr.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test =3D require("sqltester") -test:plan(12431) +test:plan(10665) --!./tcltestrunner.lua -- 2010 July 16 @@ -77,8 +77,10 @@ local operations =3D { {"<>", "ne1"}, {"!=3D", "ne2"}, {"IS", "is"}, - {"LIKE", "like"}, - {"GLOB", "glob"}, +-- NOTE: This test needs refactoring after deletion of GLOB & +-- type restrictions for LIKE. (See #3572) +-- {"LIKE", "like"}, +-- {"GLOB", "glob"}, {"AND", "and"}, {"OR", "or"}, {"MATCH", "match"}, @@ -96,7 +98,12 @@ operations =3D { {"+", "-"}, {"<<", ">>", "&", "|"}, {"<", "<=3D", ">", ">=3D"}, - {"=3D", "=3D=3D", "!=3D", "<>", "LIKE", "GLOB"}, --"MATCH", "REGEXP"}, +-- NOTE: This test needs refactoring after deletion of GLOB & +-- type restrictions for LIKE. (See #3572) +-- Another NOTE: MATCH & REGEXP aren't supported in Tarantool & +-- are waiting for their hour, don't confuse them +-- being commented with ticket above. + {"=3D", "=3D=3D", "!=3D", "<>"}, --"LIKE", "GLOB"}, --"MATCH", "REGEXP= "}, {"AND"}, {"OR"}, } @@ -475,6 +482,7 @@ for _, op in ipairs(oplist) do end end end + --------------------------------------------------------------------------= - -- Test the IS and IS NOT operators. -- diff --git a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua new file mode 100755 index 0000000..2a787f2 --- /dev/null +++ b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua @@ -0,0 +1,213 @@ +#!/usr/bin/env tarantool +test =3D require("sqltester") +test:plan(128) + +local prefix =3D "like-test-" + +-- Unicode byte sequences. +local valid_testcases =3D { + '\x01', + '\x09', + '\x1F', + '\x7F', + '\xC2\x80', + '\xC2\x90', + '\xC2\x9F', + '\xE2\x80\xA8', + '\x20\x0B', + '\xE2\x80\xA9', +} + +-- Non-Unicode byte sequences. +local invalid_testcases =3D { + '\xE2\x80', + '\xFE\xFF', + '\xC2', + '\xED\xB0\x80', + '\xD0', +} + +local like_test_cases =3D +{ + {"1.1", + "SELECT 'AB' LIKE '_B';", + {0, {1}} }, + {"1.2", + "SELECT 'CD' LIKE '_B';", + {0, {0}} }, + {"1.3", + "SELECT '' LIKE '_B';", + {0, {0}} }, + {"1.4", + "SELECT 'AB' LIKE '%B';", + {0, {1}} }, + {"1.5", + "SELECT 'CD' LIKE '%B';", + {0, {0}} }, + {"1.6", + "SELECT '' LIKE '%B';", + {0, {0}} }, + {"1.7", + "SELECT 'AB' LIKE 'A__';", + {0, {0}} }, + {"1.8", + "SELECT 'CD' LIKE 'A__';", + {0, {0}} }, + {"1.9", + "SELECT '' LIKE 'A__';", + {0, {0}} }, + {"1.10", + "SELECT 'AB' LIKE 'A_';", + {0, {1}} }, + {"1.11", + "SELECT 'CD' LIKE 'A_';", + {0, {0}} }, + {"1.12", + "SELECT '' LIKE 'A_';", + {0, {0}} }, + {"1.13", + "SELECT 'AB' LIKE 'A';", + {0, {0}} }, + {"1.14", + "SELECT 'CD' LIKE 'A';", + {0, {0}} }, + {"1.15", + "SELECT '' LIKE 'A';", + {0, {0}} }, + {"1.16", + "SELECT 'AB' LIKE '_';", + {0, {0}} }, + {"1.17", + "SELECT 'CD' LIKE '_';", + {0, {0}} }, + {"1.18", + "SELECT '' LIKE '_';", + {0, {0}} }, + {"1.19", + "SELECT 'AB' LIKE '__';", + {0, {1}} }, + {"1.20", + "SELECT 'CD' LIKE '__';", + {0, {1}} }, + {"1.21", + "SELECT '' LIKE '__';", + {0, {0}} }, + {"1.22", + "SELECT 'AB' LIKE '%A';", + {0, {0}} }, + {"1.23", + "SELECT 'AB' LIKE '%C';", + {0, {0}} }, + {"1.24", + "SELECT 'ab' LIKE '%df';", + {0, {0}} }, + {"1.25", + "SELECT 'abCDF' LIKE '%df';", + {0, {1}} }, + {"1.26", + "SELECT 'CDF' LIKE '%df';", + {0, {1}} }, + {"1.27", + "SELECT 'ab' LIKE 'a_';", + {0, {1}} }, + {"1.28", + "SELECT 'abCDF' LIKE 'a_';", + {0, {0}} }, + {"1.29", + "SELECT 'CDF' LIKE 'a_';", + {0, {0}} }, + {"1.30", + "SELECT 'ab' LIKE 'ab%';", + {0, {1}} }, + {"1.31", + "SELECT 'abCDF' LIKE 'ab%';", + {0, {1}} }, + {"1.32", + "SELECT 'CDF' LIKE 'ab%';", + {0, {0}} }, + {"1.33", + "SELECT 'ab' LIKE 'abC%';", + {0, {0}} }, + {"1.34", + "SELECT 'abCDF' LIKE 'abC%';", + {0, {1}} }, + {"1.35", + "SELECT 'CDF' LIKE 'abC%';", + {0, {0}} }, + {"1.36", + "SELECT 'ab' LIKE 'a_%';", + {0, {1}} }, + {"1.37", + "SELECT 'abCDF' LIKE 'a_%';", + {0, {1}} }, + {"1.38", + "SELECT 'CDF' LIKE 'a_%';", + {0, {0}} }, +} + +test:do_catchsql_set_test(like_test_cases, prefix) + +-- Invalid testcases. +for i, tested_string in ipairs(invalid_testcases) do + + -- We should raise an error in case + -- pattern contains invalid characters. + + local test_name =3D prefix .. "2." .. tostring(i) + local test_itself =3D "SELECT 'abc' LIKE 'ab" .. tested_string .. "';" + test:do_catchsql_test(test_name, test_itself, + {1, "LIKE or GLOB pattern can only contain UTF-8 characters"}) + + test_name =3D prefix .. "3." .. tostring(i) + test_itself =3D "SELECT 'abc' LIKE 'abc" .. tested_string .. "';" + test:do_catchsql_test(test_name, test_itself, + {1, "LIKE or GLOB pattern can only contain UTF-8 characters"}) + + test_name =3D prefix .. "4." .. tostring(i) + test_itself =3D "SELECT 'abc' LIKE 'ab" .. tested_string .. "c';" + test:do_catchsql_test(test_name, test_itself, + {1, "LIKE or GLOB pattern can only contain UTF-8 characters"}) + + -- Just skipping if row value predicand contains invalid character. + + test_name =3D prefix .. "5." .. tostring(i) + test_itself =3D "SELECT 'ab" .. tested_string .. "' LIKE 'abc';" + test:do_execsql_test(test_name, test_itself, {0}) + + test_name =3D prefix .. "6." .. tostring(i) + test_itself =3D "SELECT 'abc" .. tested_string .. "' LIKE 'abc';" + test:do_execsql_test(test_name, test_itself, {0}) + + test_name =3D prefix .. "7." .. tostring(i) + test_itself =3D "SELECT 'ab" .. tested_string .. "c' LIKE 'abc';" + test:do_execsql_test(test_name, test_itself, {0}) +end + +-- Valid testcases. +for i, tested_string in ipairs(valid_testcases) do + test_name =3D prefix .. "8." .. tostring(i) + local test_itself =3D "SELECT 'abc' LIKE 'ab" .. tested_string .. "';" + test:do_execsql_test(test_name, test_itself, {0}) + + test_name =3D prefix .. "9." .. tostring(i) + test_itself =3D "SELECT 'abc' LIKE 'abc" .. tested_string .. "';" + test:do_execsql_test(test_name, test_itself, {0}) + + test_name =3D prefix .. "10." .. tostring(i) + test_itself =3D "SELECT 'abc' LIKE 'ab" .. tested_string .. "c';" + test:do_execsql_test(test_name, test_itself, {0}) + + test_name =3D prefix .. "11." .. tostring(i) + test_itself =3D "SELECT 'ab" .. tested_string .. "' LIKE 'abc';" + test:do_execsql_test(test_name, test_itself, {0}) + + test_name =3D prefix .. "12." .. tostring(i) + test_itself =3D "SELECT 'abc" .. tested_string .. "' LIKE 'abc';" + test:do_execsql_test(test_name, test_itself, {0}) + + test_name =3D prefix .. "13." .. tostring(i) + test_itself =3D "SELECT 'ab" .. tested_string .. "c' LIKE 'abc';" + test:do_execsql_test(test_name, test_itself, {0}) +end + +test:finish_test() --000000000000ab5f3a05725d790a Content-Type: text/html; charset="UTF-8" Content-Transfer-Encoding: quoted-printable
diff --git a/src/box/sql/func.c b/src/box/sql/func.c<= /div>
index c06e3bd..7f93ef6 100644
--- a/src/box/sql/func.c<= /div>
+++ b/src/box/sql/func.c
@@ -617,13 +617,17 @@ struct c= ompareInfo {
=C2=A0 u8 noC= ase; /* true to ignore case differe= nces */
=C2=A0};
=C2=A0
-/*
- * For= LIKE and GLOB matching on EBCDIC machines, assume that every
- *= character is exactly one byte in size.=C2=A0 Also, provde the Utf8Read()
- * macro for fast reading of the next character in the common cas= e where
- * the next character is ASCII.
+/**
+ * Providing there are symbols in string s this
+ * macro retur= ns UTF-8 code of character and
+ * promotes pointer to the next s= ymbol in the string.
+ * Otherwise return code is SQL_END_OF_STRI= NG.
=C2=A0 */
-#define Utf8Read(s, e)=C2=A0 =C2=A0 ucnv= _getNextUChar(pUtf8conv, &s, e, &status)
+#define Utf8Rea= d(s, e) (((s) < (e)) ? \
+ ucnv_getNextUChar(pUtf8conv, &(s), (e), &(status)) : 0)
+
+#define SQL_END_OF_STRING=C2=A0 =C2=A0 =C2=A0 =C2=A0 0
+#define SQL_INVALID_UTF8_SYMBOL=C2=A0 0xfffd
=C2=A0
<= div>=C2=A0static const struct compareInfo globInfo =3D { '*', '= ?', '[', 0 };
=C2=A0
@@ -638,19 +642,16 @@ = static const struct compareInfo likeInfoNorm =3D { '%', '_'= , 0, 1 };
=C2=A0static const struct compareInfo likeInfoAlt =3D {= '%', '_', 0, 0 };
=C2=A0
=C2=A0/*
- * Possible error returns from patternMatch()
+ * Possible= error returns from sql_utf8_pattern_compare()
=C2=A0 */
=C2=A0#define SQLITE_MATCH=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A00
=C2=A0#define SQLITE_NOMATCH=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A01
=C2=A0#define SQLITE_NOWILDCARDMATCH=C2=A0 =C2=A02
+#define SQL_PROHIBITED_PATTERN=C2=A0 =C2=A03
=C2=A0
-/*
- * Compare two UTF-8 strings for equality where the fi= rst string is
- * a GLOB or LIKE expression.=C2=A0 Return values:=
- *
- *=C2=A0 =C2=A0 SQLITE_MATCH:=C2=A0 =C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0 Match
- *=C2=A0 =C2=A0 SQLITE_NOMATCH:=C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 No match
- *=C2=A0 =C2=A0 SQLITE_= NOWILDCARDMATCH:=C2=A0 No match in spite of having * or % wildcards.
<= div>+/**
+ * Compare two UTF-8 strings for equality where the fir= st string
+ * is a GLOB or LIKE expression.
=C2=A0 *
=C2=A0 * Globbing rules:
=C2=A0 *
@@ -663,92 +6= 64,136 @@ static const struct compareInfo likeInfoAlt =3D { '%', &#= 39;_', 0, 0 };
=C2=A0 *
=C2=A0 *=C2=A0 =C2=A0 =C2= =A0[^...]=C2=A0 =C2=A0 =C2=A0Matches one character not in the enclosed list= .
=C2=A0 *
- * With the [...] and [^...] matching, a &#= 39;]' character can be included
- * in the list by making it = the first character after '[' or '^'.=C2=A0 A
- *= range of characters can be specified using '-'.=C2=A0 Example:
- * "[a-z]" matches any single lower-case letter.=C2=A0 To= match a '-', make
- * it the last character in the list.=
+ * With the [...] and [^...] matching, a ']' character = can be
+ * included in the list by making it the first character = after
+ * '[' or '^'. A range of characters can b= e specified using '-'.
+ * Example: "[a-z]" mat= ches any single lower-case letter.
+ * To match a '-', ma= ke it the last character in the list.
=C2=A0 *
=C2=A0 *= Like matching rules:
=C2=A0 *
- *=C2=A0 =C2=A0 =C2=A0 = '%'=C2=A0 =C2=A0 =C2=A0 =C2=A0Matches any sequence of zero or more = characters
+ *=C2=A0 =C2=A0 =C2=A0 '%'=C2=A0 =C2=A0 =C2= =A0 =C2=A0Matches any sequence of zero or more characters.
=C2=A0= *
- **=C2=A0 =C2=A0 =C2=A0'_'=C2=A0 =C2=A0 =C2=A0 =C2=A0= Matches any one character
+ **=C2=A0 =C2=A0 =C2=A0'_'=C2= =A0 =C2=A0 =C2=A0 =C2=A0Matches any one character.
=C2=A0 *
=
=C2=A0 *=C2=A0 =C2=A0 =C2=A0 Ec=C2=A0 =C2=A0 =C2=A0 =C2=A0 Where E is = the "esc" character and c is any other
- *=C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 character, including '%'= , '_', and esc, match exactly c.
+ *=C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 character, including '%', '_= ', and esc, match
+ *=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 exactly c.
=C2=A0 *
=C2=A0 * The comm= ents within this routine usually assume glob matching.
=C2=A0 *
- * This routine is usually quick, but can be N**2 in the worst ca= se.
+ * This routine is usually quick, but can be N**2 in the wor= st
+ * case.
+ *
+ * @param pattern String co= ntaining comparison pattern.
+ * @param string String being compa= red.
+ * @param compareInfo Information about how to compare.
+ * @param matchOther The escape char (LIKE) or '[' (GLOB).<= /div>
+ *
+ * @retval SQLITE_MATCH:=C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 Match.
+ * =C2=A0 =C2=A0SQLITE_NOMATCH:=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 No match.=
+ * =C2=A0 =C2=A0SQLITE_N= OWILDCARDMATCH:=C2=A0 No match in spite of having *
+ * =C2=A0 =C2=A0 or % wildcards.
+ = * =C2=A0 =C2=A0SQL_PROHIBITED_PATTER= N:=C2=A0 Pattern contains invalid
+ * =C2=A0 =C2=A0 symbol.
=C2=A0 */
=C2=A0st= atic int
-patternCompare(const char * pattern, /* The glob pattern */
- =C2=A0 =C2=A0 =C2=A0 =C2=A0const char * string, /* The string to compare against the glob *= /
- =C2=A0 =C2=A0 =C2=A0 = =C2=A0const struct compareInfo *pInfo, /* Information about how to do the compare */
- =C2=A0 =C2=A0 =C2=A0 =C2=A0UChar32 matchOther /* The escape char (LIKE) or '['= (GLOB) */
-=C2=A0 =C2=A0 )
+sql_utf8_pattern_compare(c= onst char * pattern,
+ = const char * string,
+ = const struct compareInfo *pInfo,
+ UChar32 matchOther)
=C2=A0{
- UChar32 c, c2; <= /span>/* Next pattern and input string chars */
- UChar32 matchOne =3D pInfo->matchOne; /* "?" or "_" */
- UChar32 matchAll =3D pInfo->ma= tchAll; /* "*" or "%&= quot; */
- UChar32 noCase = =3D pInfo->noCase; /* True if upp= ercase=3D=3Dlowercase */
- const char *zEscaped =3D 0; /* One = past the last escaped input char */
+ /* Next pattern and input string chars */
+ UChar32 c, c2;
+ /* "?" or "_" */
+ UChar32 matchOne =3D pInfo->matchOne;<= /div>
+ /* "*" or &quo= t;%" */
+ UChar32 mat= chAll =3D pInfo->matchAll;
+ <= /span>/* True if uppercase=3D=3Dlowercase */
+ UChar32 noCase =3D pInfo->noCase;
+ /* One past the last escaped input char *= /
+ const char *zEscaped = =3D 0;
=C2=A0 const char *= pattern_end =3D pattern + strlen(pattern);
=C2=A0 const char * string_end =3D string + strlen(string= );
=C2=A0 UErrorCode statu= s =3D U_ZERO_ERROR;
=C2=A0
- while (pattern < pattern_end){
- c =3D Utf8Read(pattern, pattern_end);
+ while ((c =3D Utf8Read(pattern, patte= rn_end)) !=3D SQL_END_OF_STRING) {
+ if (c =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQL_PROHIBITED_PATTERN;
=C2=A0= if (c =3D=3D matchAll) { /* Match "*" */
- /* Skip over multiple "*" chara= cters in the pattern.=C2=A0 If there
- * are also "?" characters, skip those as well, bu= t consume a
- * single = character of the input string for each "?" skipped
+ /* Skip over multiple "*" = characters in
+ * the p= attern. If there are also "?"
+ * characters, skip those as well, but
+ * consume a single character of the
+ * input string for each = "?" skipped.
=C2=A0 <= /span> */
- while (patte= rn < pattern_end){
- c =3D Utf8Read(pattern, pattern_end);
+ while ((c =3D Utf8Read(pattern, pattern_end)) !=3D
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0SQ= L_END_OF_STRING) {
+ if= (c =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQL_PROHIBITED_PATTERN;
=C2=A0 if (c !=3D matchAll && c !=3D match= One)
=C2=A0 break;
- if (c =3D=3D matchOne
- =C2=A0 =C2=A0 &&= ; Utf8Read(string, string_end) =3D=3D 0) {
+ if (c =3D=3D matchOne &&
+ =C2=A0 =C2=A0 (c2 =3D Utf8Read(string, st= ring_end)) =3D=3D
+ =C2= =A0 =C2=A0 SQL_END_OF_STRING)
=C2=A0 return SQLITE_NOWILDCARDMATCH;
- }
+ <= /span>if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQLITE_NOMATCH;
=C2=A0 }
- /* "*" at the end of the pattern matches */
= - if (pattern =3D=3D pattern_end)<= /div>
+ /*
+ * "*" at the end of the patter= n matches.
+ */
+ if (c =3D=3D SQL_END_OF_STRIN= G) {
+ while ((c2 =3D U= tf8Read(string, string_end)) !=3D
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0SQL_END_OF_STRING)
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMBO= L)
+ return SQLITE_NO= MATCH;
=C2=A0 return SQ= LITE_MATCH;
+ }
=C2=A0 if (c =3D=3D matchOther) = {
=C2=A0 if (pInfo->= matchSet =3D=3D 0) {
=C2=A0 <= /span>c =3D Utf8Read(pattern, pattern_end);
- if (c =3D=3D 0)
+ if (c =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQL_PROHIBITED_PATTERN;
=
+ if (c =3D=3D SQL_END_OF_S= TRING)
=C2=A0 return = SQLITE_NOWILDCARDMATCH;
=C2=A0 = } else {
- /* = "[...]" immediately follows the "*".=C2=A0 We have to d= o a slow
- * recursiv= e search in this case, but it is an unusual case.
+ /* "[...]" immediately
+<= span style=3D"white-space:pre"> * follows the "*". We=
+ * have to do a slo= w
+ * recursive searc= h in
+ * this case, b= ut it is
+ * an unusu= al case.
=C2=A0 */
- assert(matchOther <= 0x80); /* '[' is a single-b= yte character */
+ ass= ert(matchOther < 0x80);
=C2=A0= while (string < string_end) {
=C2=A0 int bMatch =3D
- =C2=A0 =C2=A0 patternCompare(&pattern[-1],
- =C2=A0 =C2=A0strin= g,
- =C2=A0 =C2=A0p= Info,
- =C2=A0 =C2= =A0matchOther);
+ =C2= =A0 =C2=A0 sql_utf8_pattern_compare(
+ &pattern[-1],
+ string,
+ = pInfo,
+ matchOther);
=C2=A0 if (bMatch !=3D SQLITE_NOMATCH)
=C2=A0 return bMatch;
- Utf8Read(string, string_end);
+ c =3D Utf8Read(string, string_end);
+<= span style=3D"white-space:pre"> if (c =3D=3D SQL_INVALID_UTF8_S= YMBOL)
+ return SQLI= TE_NOMATCH;
=C2=A0 }
=C2=A0 return SQLITE_NO= WILDCARDMATCH;
=C2=A0 }=
=C2=A0 }
=C2= =A0
- /* At this point v= ariable c contains the first character of the
- * pattern string past the "*".=C2=A0 Sea= rch in the input string for the
-= * first matching character and recursively continue the match fr= om
- * that point.
+ /* At this point variable = c contains the
+ * firs= t character of the pattern string
+ * past the "*". Search in the input
+ * string for the first matching
=
+ * character and recursivel= y continue the
+ * matc= h from that point.
=C2=A0 *
- * For a case-ins= ensitive search, set variable cx to be the same as
- * c but in the other case and search the in= put string for either
- = * c or cx.
+ * For a c= ase-insensitive search, set
+ <= /span> * variable cx to be the same as c but in
+ * the other case and search the input
= + * string for either c or cx.
=C2=A0 */
=C2= =A0
=C2=A0 int bMatch;
@@ -756,14 +801,18 @@ patternCompare(const char * pattern, /* The glob pattern */
=C2=A0 c =3D u_tolower(c);
=C2=A0= while (string < string_end){
=C2=A0 /**
- * This loop could have been imple= mented
- * without if = converting c2 to lower case
- = * (by holding c_upper and c_lower), however
- * it is implemented this way because lower=
- * works better with= German and Turkish
- = * languages.
+ * This = loop could have been
+ = * implemented without if
+ * converting c2 to lower case
+ * by holding c_upper and
+ * c_lower,however it is
+ * implemented this way because
+ * lower works better with German
<= div>+ * and Turkish languages.
=C2=A0 */
=C2= =A0 c2 =3D Utf8Read(string, strin= g_end);
+ if (c2 =3D=3D= SQL_INVALID_UTF8_SYMBOL)
+ <= /span>return SQLITE_NOMATCH;
=C2=A0 if (!noCase) {
=C2=A0 if (c2 !=3D c)
=C2=A0 continue;
@@ -771,9 +820,10 @@ patternCompare(const= char * pattern, /* The glob pattern= */
=C2=A0 if (c2 !=3D= c && u_tolower(c2) !=3D c)
=C2=A0 continue;
=C2=A0 }
- bMa= tch =3D
- =C2=A0 =C2=A0= patternCompare(pattern, string,
- =C2=A0 =C2=A0pInfo, matchOther);
+ bMatch =3D sql_utf8_pattern_compare(pattern,
+ =C2=A0 string,
<= div>+ =C2=A0 pInfo,
+ =C2=A0 matchOther);
<= div>=C2=A0 if (bMatch !=3D SQLITE= _NOMATCH)
=C2=A0 retur= n bMatch;
=C2=A0 }
=
@@ -782,7 +832,9 @@ patternCompare(const char * pattern, /* The glob pattern */
=C2=A0 if (c =3D=3D matchOther) {
=C2=A0<= span style=3D"white-space:pre"> if (pInfo->matchSet =3D=3D 0) {=
=C2=A0 c =3D Utf8Read(= pattern, pattern_end);
- if (c =3D=3D 0)
+ if = (c =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQL_PROHIBITED_PATTERN;
+ if (c =3D=3D SQL_END_OF_STRING)
=C2=A0 return SQLITE_NOMATCH;
= =C2=A0 zEscaped =3D pattern;
=C2=A0 } else {
@@= -790,23 +842,33 @@ patternCompare(const char * pattern, /* The glob pattern */
=C2=A0 int seen =3D 0;
=C2=A0 int invert =3D 0;
=C2=A0 c =3D Utf8Read(string, string_end);
+ if (c =3D=3D SQL_INVALID_UTF= 8_SYMBOL)
+ return SQL= ITE_NOMATCH;
=C2=A0 if = (string =3D=3D string_end)
=C2=A0= return SQLITE_NOMATCH;
=C2=A0 c2 =3D Utf8Read(pattern, pattern_end);
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL)=
+ return SQL_PROHIBIT= ED_PATTERN;
=C2=A0 if (= c2 =3D=3D '^') {
=C2=A0 = invert =3D 1;
=C2=A0 = c2 =3D Utf8Read(pattern, pattern_end);
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQL_PROHIBITED_PATTE= RN;
=C2=A0 }
= =C2=A0 if (c2 =3D=3D ']')= {
=C2=A0 if (c =3D=3D= ']')
=C2=A0 = seen =3D 1;
=C2=A0 c2 = =3D Utf8Read(pattern, pattern_end);
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQL_PROHIBITED_PATTERN;
=C2=A0 }
- while (c2 && c2 !=3D ']') {=
+ while (c2 !=3D SQL_E= ND_OF_STRING && c2 !=3D ']') {
=C2=A0 if (c2 =3D=3D '-' && patte= rn[0] !=3D ']'
=C2=A0 = =C2=A0 =C2=A0 && pattern < pattern_end
=C2=A0<= span style=3D"white-space:pre"> =C2=A0 =C2=A0 && prior_c= > 0) {
=C2=A0 c2 = =3D Utf8Read(pattern, pattern_end);
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQL_PROHIBITED_PATTERN;
<= div>=C2=A0 if (c >=3D prior_= c && c <=3D c2)
=C2=A0= seen =3D 1;
=C2=A0 = prior_c =3D 0;
@@ -817,29 +879,36 @@ patternCompare(c= onst char * pattern, /* The glob pat= tern */
=C2=A0 prior_= c =3D c2;
=C2=A0 }
=C2=A0 c2 =3D Utf8Read(pa= ttern, pattern_end);
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQL_PROHIBITED_PATTERN;
=C2=A0 }
- if (pattern =3D=3D pattern_end || (seen ^ invert) =3D=3D 0= ) {
+ if (pattern =3D= =3D pattern_end ||
+ = =C2=A0 =C2=A0 (seen ^ invert) =3D=3D 0) {
=C2=A0 return SQLITE_NOMATCH;
=C2=A0 }
=C2=A0 continue;
=C2=A0 }
=C2=A0 }
=C2=A0 c2 =3D Utf8Read(str= ing, string_end);
+ if (c= 2 =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQLITE_NOMATCH;
=C2=A0 if (c =3D=3D c2)
=C2=A0 continue;
=C2=A0= if (noCase){
=C2=A0 /**
- * Small opti= misation. Reduce number of calls
- * to u_tolower function.
- * SQL standards suggest use to_upper for symbol
-= * normalisation. However, using = to_lower allows to
- * = respect Turkish '=C4=B0' in default locale.
+ * Small optimisation. Reduce number of
+ * calls to u_tolower func= tion. SQL
+ * standards= suggest use to_upper for
+ * symbol normalisation. However, using
+ * to_lower allows to respect Turkish '=C4=B0'= ;
+ * in default locale= .
=C2=A0 */
= =C2=A0 if (u_tolower(c) =3D=3D c2 = ||
=C2=A0 =C2=A0 =C2=A0 = c =3D=3D u_tolower(c2))
=C2=A0 = continue;
=C2=A0 = }
- if (c =3D=3D matchOne= && pattern !=3D zEscaped && c2 !=3D 0)
+ if (c =3D=3D matchOne && pattern != =3D zEscaped &&
+ =C2=A0 =C2=A0 c2 !=3D SQL_END_OF_STRING)
=C2=A0 continue;
=C2=A0 return SQLITE_NOMATCH;
=C2=A0 }
@@ -853,8 +922,7 @@ patternCompare(const c= har * pattern, /* The glob pattern *= /
=C2=A0int
=C2=A0sqlite3_strglob(const char *zGlobPatt= ern, const char *zString)
=C2=A0{
- return patternCompare(zGlobPattern, zString, &globI= nfo,
- =C2=A0 =C2=A0 =C2= =A0 '[');
+ return= sql_utf8_pattern_compare(zGlobPattern, zString, &globInfo, '['= );
=C2=A0}
=C2=A0
=C2=A0/*
@@ -864,= 7 +932,7 @@ sqlite3_strglob(const char *zGlobPattern, const char *zString)<= /div>
=C2=A0int
=C2=A0sqlite3_strlike(const char *zPattern, c= onst char *zStr, unsigned int esc)
=C2=A0{
- return patternCompare(zPattern, zStr, &lik= eInfoNorm, esc);
+ return = sql_utf8_pattern_compare(zPattern, zStr, &likeInfoNorm, esc);
=C2=A0}
=C2=A0
=C2=A0/*
@@ -910,8 +978,9 @@ = likeFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
<= div>=C2=A0 zB =3D (const char *) sql= ite3_value_text(argv[0]);
=C2=A0 = zA =3D (const char *) sqlite3_value_text(argv[1]);
=C2=A0<= /div>
- /* Limit the length of t= he LIKE or GLOB pattern to avoid problems
- * of deep recursion and N*N behavior in patternCompare()= .
+ /* Limit the length of= the LIKE or GLOB pattern to avoid
+ * problems of deep recursion and N*N behavior in
+ * sql_utf8_pattern_compare().
=C2=A0 */
=C2=A0 nPat =3D sqlite3_value_bytes(argv[0]);
=C2=A0 testcase(nPat =3D=3D = db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]);
@@ -947,7 +1016= ,12 @@ likeFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)=
=C2=A0 sqlite3_like_count= ++;
=C2=A0#endif
=C2=A0= int res;
- res =3D= patternCompare(zB, zA, pInfo, escape);
+ res =3D sql_utf8_pattern_compare(zB, zA, pInfo, escape);
+ if (res =3D=3D SQL_PROHIBI= TED_PATTERN) {
+ sqlite3_= result_error(context, "LIKE or GLOB pattern can only"
+= =C2=A0 =C2=A0 =C2=A0" conta= in UTF-8 characters", -1);
+= return;
+ }
=
=C2=A0 sqlite3_result_int(conte= xt, res =3D=3D SQLITE_MATCH);
=C2=A0}
=C2=A0
= diff --git a/test-run b/test-run
index 77e9327..95562e9 160000
--- a/test-run
+++ b/test-run
@@ -1 +1 @@
=
-Subproject commit 77e93279210f8c5c1fd0ed03416fa19a184f0b6d
= +Subproject commit 95562e95401fef4e0b755ab0bb430974b5d1a29a
diff = --git a/test/sql-tap/e_expr.test.lua b/test/sql-tap/e_expr.test.lua
index 13d3a96..9780d2c 100755
--- a/test/sql-tap/e_expr.test.l= ua
+++ b/test/sql-tap/e_expr.test.lua
@@ -1,6 +1,6 @@
=C2=A0#!/usr/bin/env tarantool
=C2=A0test =3D require(&q= uot;sqltester")
-test:plan(12431)
+test:plan(10665= )
=C2=A0
=C2=A0--!./tcltestrunner.lua
=C2=A0-= - 2010 July 16
@@ -77,8 +77,10 @@ local operations =3D {
=C2=A0 =C2=A0 =C2=A0{"<>", "ne1"},
= =C2=A0 =C2=A0 =C2=A0{"!=3D", "ne2"},
=C2=A0 = =C2=A0 =C2=A0{"IS", "is"},
-=C2=A0 =C2=A0 {&q= uot;LIKE", "like"},
-=C2=A0 =C2=A0 {"GLOB&quo= t;, "glob"},
+-- NOTE: This test needs refactoring afte= r deletion of GLOB &
+-- type restrictions for LIKE. (See #3572)
+--=C2=A0 =C2=A0 {&qu= ot;LIKE", "like"},
+--=C2=A0 =C2=A0 {"GLOB&qu= ot;, "glob"},
=C2=A0 =C2=A0 =C2=A0{"AND", &qu= ot;and"},
=C2=A0 =C2=A0 =C2=A0{"OR", "or"= ;},
=C2=A0 =C2=A0 =C2=A0{"MATCH", "match"},
@@ -96,7 +98,12 @@ operations =3D {
=C2=A0 =C2=A0 =C2=A0= {"+", "-"},
=C2=A0 =C2=A0 =C2=A0{"<&l= t;", ">>", "&", "|"},
=C2=A0 =C2=A0 =C2=A0{"<", "<=3D", ">&qu= ot;, ">=3D"},
-=C2=A0 =C2=A0 {"=3D", "= ;=3D=3D", "!=3D", "<>", "LIKE", &q= uot;GLOB"}, --"MATCH", "REGEXP"},
+-- NO= TE: This test needs refactoring after deletion of GLOB &
+--<= span style=3D"white-space:pre"> type restrictions for LIKE. (See #3= 572)
+-- Another NOTE: MATCH & REGEXP aren't supported in= Tarantool &
+-- ar= e waiting for their hour, don't confuse them
+-- being commented with ticket above.
+=C2=A0 =C2=A0 {"=3D", "=3D=3D", "!=3D", &qu= ot;<>"}, --"LIKE", "GLOB"}, --"MATCH&qu= ot;, "REGEXP"},
=C2=A0 =C2=A0 =C2=A0{"AND"},<= /div>
=C2=A0 =C2=A0 =C2=A0{"OR"},
=C2=A0}
@@ -475,6 +482,7 @@ for _, op in ipairs(oplist) do
=C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0end
=C2=A0 =C2=A0 =C2=A0end
=C2=A0= end
+
=C2=A0-------------------------------------------= --------------------------------
=C2=A0-- Test the IS and IS NOT = operators.
=C2=A0--
diff --git a/test/sql-tap/gh-3251-s= tring-pattern-comparison.test.lua b/test/sql-tap/gh-3251-string-pattern-com= parison.test.lua
new file mode 100755
index 0000000..2a= 787f2
--- /dev/null
+++ b/test/sql-tap/gh-3251-string-p= attern-comparison.test.lua
@@ -0,0 +1,213 @@
+#!/usr/bi= n/env tarantool
+test =3D require("sqltester")
+test:plan(128)
+
+local prefix =3D "like-test-&= quot;
+
+-- Unicode byte sequences.
+local va= lid_testcases =3D {
+=C2=A0 =C2=A0 '\x01',
+=C2= =A0 =C2=A0 '\x09',
+=C2=A0 =C2=A0 '\x1F',
+=C2=A0 =C2=A0 '\x7F',
+=C2=A0 =C2=A0 '\xC2\x80= 9;,
+=C2=A0 =C2=A0 '\xC2\x90',
+=C2=A0 =C2=A0 &= #39;\xC2\x9F',
+=C2=A0 =C2=A0 '\xE2\x80\xA8',
+=C2=A0 =C2=A0 '\x20\x0B',
+=C2=A0 =C2=A0 '\xE2\x8= 0\xA9',
+}
+
+-- Non-Unicode byte sequenc= es.
+local invalid_testcases =3D {
+=C2=A0 =C2=A0 '= \xE2\x80',
+=C2=A0 =C2=A0 '\xFE\xFF',
+=C2= =A0 =C2=A0 '\xC2',
+=C2=A0 =C2=A0 '\xED\xB0\x80',=
+=C2=A0 =C2=A0 '\xD0',
+}
+
+local like_test_cases =3D
+{
+=C2=A0 =C2=A0 {"1= .1",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'AB' = LIKE '_B';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {1}} }= ,
+=C2=A0 =C2=A0 {"1.2",
+=C2=A0 =C2=A0 =C2= =A0 =C2=A0 "SELECT 'CD' LIKE '_B';",
+= =C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0 {"1.3= ",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT '' LIKE= '_B';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0 {"1.4",
+=C2=A0 =C2=A0 =C2=A0 = =C2=A0 "SELECT 'AB' LIKE '%B';",
+=C2= =A0 =C2=A0 =C2=A0 =C2=A0 {0, {1}} },
+=C2=A0 =C2=A0 {"1.5&qu= ot;,
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'CD' LIKE = '%B';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0 {"1.6",
+=C2=A0 =C2=A0 =C2=A0 =C2= =A0 "SELECT '' LIKE '%B';",
+=C2=A0 =C2= =A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0 {"1.7",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'AB' LIKE 'A_= _';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0 {"1.8",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 &q= uot;SELECT 'CD' LIKE 'A__';",
+=C2=A0 =C2=A0= =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0 {"1.9",
=
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT '' LIKE 'A__'= ;;",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2= =A0 =C2=A0 {"1.10",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "= SELECT 'AB' LIKE 'A_';",
+=C2=A0 =C2=A0 =C2= =A0 =C2=A0 {0, {1}} },
+=C2=A0 =C2=A0 {"1.11",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'CD' LIKE 'A_';= ",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2= =A0 =C2=A0 {"1.12",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "= SELECT '' LIKE 'A_';",
+=C2=A0 =C2=A0 =C2=A0= =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0 {"1.13",
+= =C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'AB' LIKE 'A';&quo= t;,
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 = =C2=A0 {"1.14",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELE= CT 'CD' LIKE 'A';",
+=C2=A0 =C2=A0 =C2=A0 = =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0 {"1.15",
+= =C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT '' LIKE 'A';"= ,
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2= =A0 {"1.16",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT = 'AB' LIKE '_';",
+=C2=A0 =C2=A0 =C2=A0 =C2= =A0 {0, {0}} },
+=C2=A0 =C2=A0 {"1.17",
+=C2= =A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'CD' LIKE '_';",=
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2= =A0 {"1.18",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT = '' LIKE '_';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 = {0, {0}} },
+=C2=A0 =C2=A0 {"1.19",
+=C2=A0 = =C2=A0 =C2=A0 =C2=A0 "SELECT 'AB' LIKE '__';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {1}} },
+=C2=A0 =C2=A0 {= "1.20",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT '= CD' LIKE '__';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0= , {1}} },
+=C2=A0 =C2=A0 {"1.21",
+=C2=A0 =C2= =A0 =C2=A0 =C2=A0 "SELECT '' LIKE '__';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0 {"= ;1.22",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'AB= 9; LIKE '%A';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}= } },
+=C2=A0 =C2=A0 {"1.23",
+=C2=A0 =C2=A0 = =C2=A0 =C2=A0 "SELECT 'AB' LIKE '%C';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0 {"1= .24",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'ab'= LIKE '%df';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}}= },
+=C2=A0 =C2=A0 {"1.25",
+=C2=A0 =C2=A0 = =C2=A0 =C2=A0 "SELECT 'abCDF' LIKE '%df';",
=
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {1}} },
+=C2=A0 =C2=A0 {&qu= ot;1.26",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'CDF= ' LIKE '%df';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0,= {1}} },
+=C2=A0 =C2=A0 {"1.27",
+=C2=A0 =C2= =A0 =C2=A0 =C2=A0 "SELECT 'ab' LIKE 'a_';",
=
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {1}} },
+=C2=A0 =C2=A0 {&qu= ot;1.28",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'abC= DF' LIKE 'a_';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0= , {0}} },
+=C2=A0 =C2=A0 {"1.29",
+=C2=A0 =C2= =A0 =C2=A0 =C2=A0 "SELECT 'CDF' LIKE 'a_';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0 {&q= uot;1.30",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'ab= ' LIKE 'ab%';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0,= {1}} },
+=C2=A0 =C2=A0 {"1.31",
+=C2=A0 =C2= =A0 =C2=A0 =C2=A0 "SELECT 'abCDF' LIKE 'ab%';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {1}} },
+=C2=A0 =C2=A0 = {"1.32",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT '= ;CDF' LIKE 'ab%';",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0= {0, {0}} },
+=C2=A0 =C2=A0 {"1.33",
+=C2=A0 = =C2=A0 =C2=A0 =C2=A0 "SELECT 'ab' LIKE 'abC%';",<= /div>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2=A0 =C2=A0= {"1.34",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT = 9;abCDF' LIKE 'abC%';",
+=C2=A0 =C2=A0 =C2=A0 = =C2=A0 {0, {1}} },
+=C2=A0 =C2=A0 {"1.35",
+= =C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'CDF' LIKE 'abC%';= ",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {0}} },
+=C2= =A0 =C2=A0 {"1.36",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "= SELECT 'ab' LIKE 'a_%';",
+=C2=A0 =C2=A0 =C2= =A0 =C2=A0 {0, {1}} },
+=C2=A0 =C2=A0 {"1.37",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 "SELECT 'abCDF' LIKE 'a_%&#= 39;;",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 {0, {1}} },
+= =C2=A0 =C2=A0 {"1.38",
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 &qu= ot;SELECT 'CDF' LIKE 'a_%';",
+=C2=A0 =C2=A0= =C2=A0 =C2=A0 {0, {0}} },
+}
+
+test:do_catc= hsql_set_test(like_test_cases, prefix)
+
+-- Invalid te= stcases.
+for i, tested_string in ipairs(invalid_testcases) do
+
+=C2=A0 =C2=A0 -- We should raise an error in case
+=C2=A0 =C2=A0 -- pattern contains invalid characters.
+
+=C2=A0 =C2=A0 local test_name =3D prefix .. "2." .. tost= ring(i)
+=C2=A0 =C2=A0 local test_itself =3D "SELECT 'ab= c' LIKE 'ab" .. tested_string .. "';"
= +=C2=A0 =C2=A0 test:do_catchsql_test(test_name, test_itself,
+=C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 {1, "LIKE or GLOB pattern can only contain UTF-8 charact= ers"})
+
+=C2=A0 =C2=A0 test_name =3D prefix .. &q= uot;3." .. tostring(i)
+=C2=A0 =C2=A0 test_itself =3D "= SELECT 'abc' LIKE 'abc" .. tested_string .. "';&q= uot;
+=C2=A0 =C2=A0 test:do_catchsql_test(test_name, test_itself,=
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 {1, "LIKE or GLOB pattern can only contain= UTF-8 characters"})
+
+=C2=A0 =C2=A0 test_name = =3D prefix .. "4." .. tostring(i)
+=C2=A0 =C2=A0 test_i= tself =3D "SELECT 'abc' LIKE 'ab" .. tested_string ..= "c';"
+=C2=A0 =C2=A0 test:do_catchsql_test(test_na= me, test_itself,
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 {1, "LIKE or GLOB patter= n can only contain UTF-8 characters"})
+
+=C2=A0 = =C2=A0 -- Just skipping if row value predicand contains invalid character.<= /div>
+
+=C2=A0 =C2=A0 test_name =3D prefix .. "5."= .. tostring(i)
+=C2=A0 =C2=A0 test_itself =3D "SELECT '= ab" .. tested_string .. "' LIKE 'abc';"
+=C2=A0 =C2=A0 test:do_execsql_test(test_name, test_itself, {0})
+
+=C2=A0 =C2=A0 test_name =3D prefix .. "6." .. tost= ring(i)
+=C2=A0 =C2=A0 test_itself =3D "SELECT 'abc"= ; .. tested_string .. "' LIKE 'abc';"
+=C2= =A0 =C2=A0 test:do_execsql_test(test_name, test_itself, {0})
+
+=C2=A0 =C2=A0 test_name =3D prefix .. "7." .. tostring(i= )
+=C2=A0 =C2=A0 test_itself =3D "SELECT 'ab" .. te= sted_string .. "c' LIKE 'abc';"
+=C2=A0 =C2= =A0 test:do_execsql_test(test_name, test_itself, {0})
+end
<= div>+
+-- Valid testcases.
+for i, tested_string in ipa= irs(valid_testcases) do
+=C2=A0 =C2=A0 test_name =3D prefix .. &q= uot;8." .. tostring(i)
+=C2=A0 =C2=A0 local test_itself =3D = "SELECT 'abc' LIKE 'ab" .. tested_string .. "= 9;;"
+=C2=A0 =C2=A0 test:do_execsql_test(test_name, test_its= elf, {0})
+
+=C2=A0 =C2=A0 test_name =3D prefix .. &quo= t;9." .. tostring(i)
+=C2=A0 =C2=A0 test_itself =3D "SE= LECT 'abc' LIKE 'abc" .. tested_string .. "';&quo= t;
+=C2=A0 =C2=A0 test:do_execsql_test(test_name, test_itself, {0= })
+
+=C2=A0 =C2=A0 test_name =3D prefix .. "10.&q= uot; .. tostring(i)
+=C2=A0 =C2=A0 test_itself =3D "SELECT &= #39;abc' LIKE 'ab" .. tested_string .. "c';"
+=C2=A0 =C2=A0 test:do_execsql_test(test_name, test_itself, {0})
+
+=C2=A0 =C2=A0 te= st_name =3D prefix .. "11." .. tostring(i)
+=C2=A0 =C2= =A0 test_itself =3D "SELECT 'ab" .. tested_string .. "&#= 39; LIKE 'abc';"
+=C2=A0 =C2=A0 test:do_execsql_test= (test_name, test_itself, {0})
<= div>+
+=C2=A0 =C2=A0 test_name =3D prefix .. "12." .. t= ostring(i)
+=C2=A0 =C2=A0 test_itself =3D "SELECT 'abc&q= uot; .. tested_string .. "' LIKE 'abc';"
+= =C2=A0 =C2=A0 test:do_execsql_test(test_name, test_itself, {0})
+=
+=C2=A0 =C2=A0 test_name =3D prefix .. "13." .. tostri= ng(i)
+=C2=A0 =C2=A0 test_itself =3D "SELECT 'ab" .= . tested_string .. "c' LIKE 'abc';"
+=C2=A0= =C2=A0 test:do_execsql_test(test_name, test_itself, {0})
+end
+
+test:finish_test()

--000000000000ab5f3a05725d790a--