From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id E5F9F27815 for ; Fri, 27 Jul 2018 07:28:35 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 195MGAZiEzej for ; Fri, 27 Jul 2018 07:28:35 -0400 (EDT) Received: from mail-lj1-f174.google.com (mail-lj1-f174.google.com [209.85.208.174]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 196BA277EF for ; Fri, 27 Jul 2018 07:28:34 -0400 (EDT) Received: by mail-lj1-f174.google.com with SMTP id w16-v6so554621ljh.12 for ; Fri, 27 Jul 2018 04:28:34 -0700 (PDT) MIME-Version: 1.0 References: <1530190036-10105-1-git-send-email-hollow653@gmail.com> <20180718024314.be245cmsgklxuvnk@tkn_work_nb> In-Reply-To: From: Nikita Tatunov Date: Fri, 27 Jul 2018 14:28:21 +0300 Message-ID: Subject: [tarantool-patches] Re: [PATCH] sql: LIKE & GLOB pattern comparison issue Content-Type: multipart/alternative; boundary="0000000000007398070571f9696a" Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: avkhatskevich@tarantool.org Cc: Alexander Turenko , tarantool-patches@freelists.org --0000000000007398070571f9696a Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable Hello, Alex! Thanks for the review! =D1=87=D1=82, 19 =D0=B8=D1=8E=D0=BB. 2018 =D0=B3. =D0=B2 14:56, Alex Khatsk= evich : > Hi. > > I have some questions related to the func.c file, however before that I > would ask you to fix tests. > > General ideas: > 1. Those tests are regresson tests (it just tests that problem will not > appear in the future). > We name those tests in the following manear: > gh-XXXX-short-description.test.lua > Done. > 2. The thing you test is not related to a table and other columns. > Please, convert the tests to the next format: {[[select '' like > '_B';]], {1}]]}. > To make it more readable, you can do it like `like_testcases` in > `sql-tap/collation.test.lua`. > Done. > 3. There is two extra things that should be tested: > 1. When string or pattern ends with incorrect unicode symbol (e.g. > half of the whole unicode symbol) > 2. String or pattern contains incorrect unicode symbol. > > Refactored test with this idea taken into account. Now comparing function is only supposed to work with TEXT types, which led to the part of #3572 propositions. Also added error output in case pattern contains invalid symbol & now if string contains invalid symbol it can't be matched with whatever pattern. Some minor fixes to patternCompare function as well. Here's diff: diff --git a/src/box/sql/func.c b/src/box/sql/func.c index c06e3bd..5b53076 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -617,13 +617,17 @@ struct compareInfo { u8 noCase; /* true to ignore case differences */ }; -/* - * For LIKE and GLOB matching on EBCDIC machines, assume that every - * character is exactly one byte in size. Also, provde the Utf8Read() - * macro for fast reading of the next character in the common case where - * the next character is ASCII. +/** + * Providing there are symbols in string s this + * macro returns UTF-8 code of character and + * promotes pointer to the next symbol in the string. + * Otherwise return code is SQL_END_OF_STRING. */ -#define Utf8Read(s, e) ucnv_getNextUChar(pUtf8conv, &s, e, &status) +#define Utf8Read(s, e) (((s) < (e)) ?\ + ucnv_getNextUChar(pUtf8conv, &(s), (e), &(status)) : 0) + +#define SQL_END_OF_STRING 0 +#define SQL_INVALID_UTF8_SYMBOL 0xfffd static const struct compareInfo globInfo =3D { '*', '?', '[', 0 }; @@ -643,51 +647,61 @@ static const struct compareInfo likeInfoAlt =3D { '%'= , '_', 0, 0 }; #define SQLITE_MATCH 0 #define SQLITE_NOMATCH 1 #define SQLITE_NOWILDCARDMATCH 2 +#define SQL_PROHIBITED_PATTERN 3 -/* - * Compare two UTF-8 strings for equality where the first string is - * a GLOB or LIKE expression. Return values: - * - * SQLITE_MATCH: Match - * SQLITE_NOMATCH: No match - * SQLITE_NOWILDCARDMATCH: No match in spite of having * or % wildcards. +/** + * Compare two UTF-8 strings for equality where the first string + * is a GLOB or LIKE expression. * * Globbing rules: * - * '*' Matches any sequence of zero or more characters. + * '*' Matches any sequence of zero or more characters. * - * '?' Matches exactly one character. + * '?' Matches exactly one character. * - * [...] Matches one character from the enclosed list of - * characters. + * [...] Matches one character from the enclosed list of + * characters. * - * [^...] Matches one character not in the enclosed list. + * [^...] Matches one character not in the enclosed list. * - * With the [...] and [^...] matching, a ']' character can be included - * in the list by making it the first character after '[' or '^'. A - * range of characters can be specified using '-'. Example: - * "[a-z]" matches any single lower-case letter. To match a '-', make - * it the last character in the list. + * With the [...] and [^...] matching, a ']' character can be + * included in the list by making it the first character after + * '[' or '^'. A range of characters can be specified using '-'. + * Example: "[a-z]" matches any single lower-case letter. + * To match a '-', make it the last character in the list. * * Like matching rules: * - * '%' Matches any sequence of zero or more characters + * '%' Matches any sequence of zero or more characters. * - ** '_' Matches any one character + ** '_' Matches any one character. * * Ec Where E is the "esc" character and c is any other - * character, including '%', '_', and esc, match exactly c. + * character, including '%', '_', and esc, match + * exactly c. * * The comments within this routine usually assume glob matching. * - * This routine is usually quick, but can be N**2 in the worst case. + * This routine is usually quick, but can be N**2 in the worst + * case. + * + * @param pattern String containing comparison pattern. + * @param string String being compared. + * @param compareInfo Information about how to compare. + * @param matchOther The escape char (LIKE) or '[' (GLOB). + * + * @retval SQLITE_MATCH: Match. + * SQLITE_NOMATCH: No match. + * SQLITE_NOWILDCARDMATCH: No match in spite of having * + * or % wildcards. + * SQL_PROHIBITED_PATTERN Pattern contains invalid + * symbol. */ static int -patternCompare(const char * pattern, /* The glob pattern */ - const char * string, /* The string to compare against the glob */ - const struct compareInfo *pInfo, /* Information about how to do the compare */ - UChar32 matchOther /* The escape char (LIKE) or '[' (GLOB) */ - ) +sql_utf8_pattern_compare(const char * pattern, + const char * string, + const struct compareInfo *pInfo, + UChar32 matchOther) { UChar32 c, c2; /* Next pattern and input string chars */ UChar32 matchOne =3D pInfo->matchOne; /* "?" or "_" */ @@ -698,29 +712,41 @@ patternCompare(const char * pattern, /* The glob pattern */ const char * string_end =3D string + strlen(string); UErrorCode status =3D U_ZERO_ERROR; - while (pattern < pattern_end){ - c =3D Utf8Read(pattern, pattern_end); + while ((c =3D Utf8Read(pattern, pattern_end)) !=3D SQL_END_OF_STRING) { + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; if (c =3D=3D matchAll) { /* Match "*" */ /* Skip over multiple "*" characters in the pattern. If there * are also "?" characters, skip those as well, but consume a * single character of the input string for each "?" skipped */ - while (pattern < pattern_end){ - c =3D Utf8Read(pattern, pattern_end); + while ((c =3D Utf8Read(pattern, pattern_end)) !=3D + SQL_END_OF_STRING) { + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; if (c !=3D matchAll && c !=3D matchOne) break; - if (c =3D=3D matchOne - && Utf8Read(string, string_end) =3D=3D 0) { + if (c =3D=3D matchOne && + (c2 =3D Utf8Read(string, string_end)) =3D=3D + SQL_END_OF_STRING) return SQLITE_NOWILDCARDMATCH; - } + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; } /* "*" at the end of the pattern matches */ - if (pattern =3D=3D pattern_end) + if (c =3D=3D SQL_END_OF_STRING) { + while ((c2 =3D Utf8Read(string, string_end)) !=3D + SQL_END_OF_STRING) + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; return SQLITE_MATCH; + } if (c =3D=3D matchOther) { if (pInfo->matchSet =3D=3D 0) { c =3D Utf8Read(pattern, pattern_end); - if (c =3D=3D 0) + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; + if (c =3D=3D SQL_END_OF_STRING) return SQLITE_NOWILDCARDMATCH; } else { /* "[...]" immediately follows the "*". We have to do a slow @@ -729,13 +755,16 @@ patternCompare(const char * pattern, /* The glob pattern */ assert(matchOther < 0x80); /* '[' is a single-byte character */ while (string < string_end) { int bMatch =3D - patternCompare(&pattern[-1], - string, - pInfo, - matchOther); + sql_utf8_pattern_compare( + &pattern[-1], + string, + pInfo, + matchOther); if (bMatch !=3D SQLITE_NOMATCH) return bMatch; - Utf8Read(string, string_end); + c =3D Utf8Read(string, string_end); + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; } return SQLITE_NOWILDCARDMATCH; } @@ -764,6 +793,8 @@ patternCompare(const char * pattern, /* The glob pattern */ * languages. */ c2 =3D Utf8Read(string, string_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; if (!noCase) { if (c2 !=3D c) continue; @@ -771,9 +802,10 @@ patternCompare(const char * pattern, /* The glob pattern */ if (c2 !=3D c && u_tolower(c2) !=3D c) continue; } - bMatch =3D - patternCompare(pattern, string, - pInfo, matchOther); + bMatch =3D sql_utf8_pattern_compare(pattern, + string, + pInfo, + matchOther); if (bMatch !=3D SQLITE_NOMATCH) return bMatch; } @@ -782,7 +814,9 @@ patternCompare(const char * pattern, /* The glob pattern */ if (c =3D=3D matchOther) { if (pInfo->matchSet =3D=3D 0) { c =3D Utf8Read(pattern, pattern_end); - if (c =3D=3D 0) + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; + if (c =3D=3D SQL_END_OF_STRING) return SQLITE_NOMATCH; zEscaped =3D pattern; } else { @@ -790,23 +824,33 @@ patternCompare(const char * pattern, /* The glob pattern */ int seen =3D 0; int invert =3D 0; c =3D Utf8Read(string, string_end); + if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; if (string =3D=3D string_end) return SQLITE_NOMATCH; c2 =3D Utf8Read(pattern, pattern_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; if (c2 =3D=3D '^') { invert =3D 1; c2 =3D Utf8Read(pattern, pattern_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; } if (c2 =3D=3D ']') { if (c =3D=3D ']') seen =3D 1; c2 =3D Utf8Read(pattern, pattern_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; } - while (c2 && c2 !=3D ']') { + while (c2 !=3D SQL_END_OF_STRING && c2 !=3D ']') { if (c2 =3D=3D '-' && pattern[0] !=3D ']' && pattern < pattern_end && prior_c > 0) { c2 =3D Utf8Read(pattern, pattern_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; if (c >=3D prior_c && c <=3D c2) seen =3D 1; prior_c =3D 0; @@ -817,14 +861,19 @@ patternCompare(const char * pattern, /* The glob pattern */ prior_c =3D c2; } c2 =3D Utf8Read(pattern, pattern_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQL_PROHIBITED_PATTERN; } - if (pattern =3D=3D pattern_end || (seen ^ invert) =3D=3D 0) { + if (pattern =3D=3D pattern_end || + (seen ^ invert) =3D=3D 0) { return SQLITE_NOMATCH; } continue; } } c2 =3D Utf8Read(string, string_end); + if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) + return SQLITE_NOMATCH; if (c =3D=3D c2) continue; if (noCase){ @@ -839,7 +888,8 @@ patternCompare(const char * pattern, /* The glob pattern */ c =3D=3D u_tolower(c2)) continue; } - if (c =3D=3D matchOne && pattern !=3D zEscaped && c2 !=3D 0) + if (c =3D=3D matchOne && pattern !=3D zEscaped && + c2 !=3D SQL_END_OF_STRING) continue; return SQLITE_NOMATCH; } @@ -853,8 +903,7 @@ patternCompare(const char * pattern, /* The glob pattern */ int sqlite3_strglob(const char *zGlobPattern, const char *zString) { - return patternCompare(zGlobPattern, zString, &globInfo, - '['); + return sql_utf8_pattern_compare(zGlobPattern, zString, &globInfo, '['); } /* @@ -864,7 +913,7 @@ sqlite3_strglob(const char *zGlobPattern, const char *zString) int sqlite3_strlike(const char *zPattern, const char *zStr, unsigned int esc) { - return patternCompare(zPattern, zStr, &likeInfoNorm, esc); + return sql_utf8_pattern_compare(zPattern, zStr, &likeInfoNorm, esc); } /* @@ -910,8 +959,9 @@ likeFunc(sqlite3_context * context, int argc, sqlite3_value ** argv) zB =3D (const char *) sqlite3_value_text(argv[0]); zA =3D (const char *) sqlite3_value_text(argv[1]); - /* Limit the length of the LIKE or GLOB pattern to avoid problems - * of deep recursion and N*N behavior in patternCompare(). + /* Limit the length of the LIKE or GLOB pattern to avoid + * problems of deep recursion and N*N behavior in + * sql_utf8_pattern_compare(). */ nPat =3D sqlite3_value_bytes(argv[0]); testcase(nPat =3D=3D db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]); @@ -947,7 +997,12 @@ likeFunc(sqlite3_context * context, int argc, sqlite3_value ** argv) sqlite3_like_count++; #endif int res; - res =3D patternCompare(zB, zA, pInfo, escape); + res =3D sql_utf8_pattern_compare(zB, zA, pInfo, escape); + if (res =3D=3D SQL_PROHIBITED_PATTERN) { + sqlite3_result_error(context, "LIKE or GLOB pattern can only" + " contain UTF-8 characters", -1); + return; + } sqlite3_result_int(context, res =3D=3D SQLITE_MATCH); } diff --git a/test/sql-tap/e_expr.test.lua b/test/sql-tap/e_expr.test.lua index 13d3a96..051210a 100755 --- a/test/sql-tap/e_expr.test.lua +++ b/test/sql-tap/e_expr.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test =3D require("sqltester") -test:plan(12431) +test:plan(10665) --!./tcltestrunner.lua -- 2010 July 16 @@ -77,8 +77,10 @@ local operations =3D { {"<>", "ne1"}, {"!=3D", "ne2"}, {"IS", "is"}, - {"LIKE", "like"}, - {"GLOB", "glob"}, +-- NOTE: This test needs refactoring after deletion of GLOB & +-- type restrictions for LIKE. +-- {"LIKE", "like"}, +-- {"GLOB", "glob"}, {"AND", "and"}, {"OR", "or"}, {"MATCH", "match"}, @@ -96,7 +98,7 @@ operations =3D { {"+", "-"}, {"<<", ">>", "&", "|"}, {"<", "<=3D", ">", ">=3D"}, - {"=3D", "=3D=3D", "!=3D", "<>", "LIKE", "GLOB"}, --"MATCH", "REGEXP"}, + {"=3D", "=3D=3D", "!=3D", "<>"}, --"LIKE", "GLOB"}, "MATCH", "REGEXP"}= , {"AND"}, {"OR"}, } @@ -475,6 +477,7 @@ for _, op in ipairs(oplist) do end end end + --------------------------------------------------------------------------= - -- Test the IS and IS NOT operators. -- diff --git a/test/sql-tap/gh-3251-string-pattern-comparison.lua b/test/sql-tap/gh-3251-string-pattern-comparison.lua new file mode 100755 index 0000000..0202efc --- /dev/null +++ b/test/sql-tap/gh-3251-string-pattern-comparison.lua @@ -0,0 +1,238 @@ +#!/usr/bin/env tarantool +test =3D require("sqltester") +test:plan(106) + +local prefix =3D "like-test-" + +-- Unciode byte sequences. + +local valid_testcases =3D { + '\x01', + '\x09', + '\x1F', + '\x7F', + '\xC2\x80', + '\xC2\x90', + '\xC2\x9F', + '\xE2\x80\xA8', + '\x20\x0B', + '\xE2\x80\xA9', +} + +-- Non-Unicode byte sequences. +local invalid_testcases =3D { + '\xE2\x80', + '\xFE\xFF', + '\xC2', + '\xED\xB0\x80', + '\xD0', +} + +local like_test_cases =3D +{ + {"1.1", + [[ + CREATE TABLE t2 (column1 INTEGER, + column2 VARCHAR(100), + column3 BLOB, + column4 FLOAT, + PRIMARY KEY (column1, column2)); + INSERT INTO t2 VALUES (1, 'AB', X'4142', 5.5); + INSERT INTO t2 VALUES (1, 'CD', X'2020', 1E4); + INSERT INTO t2 VALUES (2, 'AB', X'2020', 12.34567); + INSERT INTO t2 VALUES (-1000, '', X'', 0.0); + CREATE TABLE t1 (a INT PRIMARY KEY, str VARCHAR(100)); + INSERT INTO t1 VALUES (1, 'ab'); + INSERT INTO t1 VALUES (2, 'abCDF'); + INSERT INTO t1 VALUES (3, 'CDF'); + CREATE TABLE t (s1 CHAR(2) PRIMARY KEY, s2 CHAR(2)); + INSERT INTO t VALUES ('AB', 'AB'); + ]], {0}}, + {"1.2", + [[ + SELECT column1, column2, column1 * column4 FROM + t2 WHERE column2 LIKE '_B'; + ]], + {0, {1, 'AB', 5.5, 2, 'AB', 24.69134}} }, + {"1.3", + "SELECT column1, column2 FROM t2 WHERE column2 LIKE '%B';", + {0, {1, 'AB', 2, 'AB'}} }, + {"1.4", + "SELECT column1, column2 FROM t2 WHERE column2 LIKE 'A__';", + {0, {}} }, + {"1.5", + "SELECT column1, column2 FROM t2 WHERE column2 LIKE 'A_';", + {0, {1, 'AB', 2, 'AB'}} }, + {"1.6", + "SELECT column1, column2 FROM t2 WHERE column2 LIKE 'A';", + {0, {}} }, + {"1.7", + "SELECT column1, column2 FROM t2 WHERE column2 LIKE '_';", + {0, {}} }, + {"1.8", + "SELECT * FROM t WHERE s1 LIKE '%A';", + {0, {}} }, + {"1.9", + "SELECT * FROM t WHERE s1 LIKE '%C';", + {0, {}} }, + {"1.10", + "SELECT * FROM t1 WHERE str LIKE '%df';", + {0, {2, 'abCDF', 3, 'CDF'}} }, + {"1.11", + "SELECT * FROM t1 WHERE str LIKE 'a_';", + {0, {1, 'ab'}} }, + {"1.12", + "SELECT column1, column2 FROM t2 WHERE column2 LIKE '__';", + {0, {1, 'AB', 1, 'CD', 2, 'AB'}} }, + {"1.13", + "SELECT str FROM t1 WHERE str LIKE 'ab%';", + {0, {'ab', 'abCDF'}} }, + {"1.14", + "SELECT str FROM t1 WHERE str LIKE 'abC%';", + {0, {'abCDF'}} }, + {"1.15", + "SELECT str FROM t1 WHERE str LIKE 'a_%';", + {0, {'ab', 'abCDF'}} }, + {"1.16", + [[ + DROP TABLE t1; + DROP TABLE t2; + DROP TABLE t; + ]], {0}}, +} + +test:do_catchsql_set_test(like_test_cases, prefix) + +-- Invalid testcases. + +for i, tested_string in ipairs(invalid_testcases) do + local test_name =3D prefix .. "2." .. tostring(i) + local test_itself =3D "SELECT 'abc' LIKE 'ab" .. tested_string .. "';" + +-- We should raise an error if pattern contains invalid characters. + + test:do_catchsql_test( + test_name, + test_itself, { + -- + 1, "LIKE or GLOB pattern can only contain UTF-8 characters" + -- + }) + + test_name =3D prefix .. "3." .. tostring(i) + test_itself =3D "SELECT 'abc' LIKE 'abc" .. tested_string .. "';" + test:do_catchsql_test( + test_name, + test_itself, { + -- + 1, "LIKE or GLOB pattern can only contain UTF-8 characters" + -- + }) + + test_name =3D prefix .. "4." .. tostring(i) + test_itself =3D "SELECT 'abc' LIKE 'ab" .. tested_string .. "c';" + test:do_catchsql_test( + test_name, + test_itself, { + -- + 1, "LIKE or GLOB pattern can only contain UTF-8 characters" + -- + }) + +-- Just skipping if row value predicand contains invalid character. + + test_name =3D prefix .. "5." .. tostring(i) + test_itself =3D "SELECT 'ab" .. tested_string .. "' LIKE 'abc';" + test:do_execsql_test( + test_name, + test_itself, { + -- + 0 + -- + }) + + test_name =3D prefix .. "6." .. tostring(i) + test_itself =3D "SELECT 'abc" .. tested_string .. "' LIKE 'abc';" + test:do_execsql_test( + test_name, + test_itself, { + -- + 0 + -- + }) + + test_name =3D prefix .. "7." .. tostring(i) + test_itself =3D "SELECT 'ab" .. tested_string .. "c' LIKE 'abc';" + test:do_execsql_test( + test_name, + test_itself, { + -- + 0 + -- + }) +end + +-- Valid testcases. + +for i, tested_string in ipairs(valid_testcases) do + test_name =3D prefix .. "8." .. tostring(i) + local test_itself =3D "SELECT 'abc' LIKE 'ab" .. tested_string .. "';" + test:do_execsql_test( + test_name, + test_itself, { + -- + 0 + -- + }) + + test_name =3D prefix .. "9." .. tostring(i) + test_itself =3D "SELECT 'abc' LIKE 'abc" .. tested_string .. "';" + test:do_execsql_test( + test_name, + test_itself, { + -- + 0 + -- + }) + + test_name =3D prefix .. "10." .. tostring(i) + test_itself =3D "SELECT 'abc' LIKE 'ab" .. tested_string .. "c';" + test:do_execsql_test( + test_name, + test_itself, { + -- + 0 + -- + }) + test_name =3D prefix .. "11." .. tostring(i) + test_itself =3D "SELECT 'ab" .. tested_string .. "' LIKE 'abc';" + test:do_execsql_test( + test_name, + test_itself, { + -- + 0 + -- + }) + + test_name =3D prefix .. "12." .. tostring(i) + test_itself =3D "SELECT 'abc" .. tested_string .. "' LIKE 'abc';" + test:do_execsql_test( + test_name, + test_itself, { + -- + 0 + -- + }) + + test_name =3D prefix .. "13." .. tostring(i) + test_itself =3D "SELECT 'ab" .. tested_string .. "c' LIKE 'abc';" + test:do_execsql_test( + test_name, + test_itself, { + -- + 0 + -- + }) +end + +test:finish_test() --0000000000007398070571f9696a Content-Type: text/html; charset="UTF-8" Content-Transfer-Encoding: quoted-printable
Hello, Alex!
Thanks for the review!

=D1=87=D1=82, 19 =D0=B8=D1=8E=D0=BB= . 2018 =D0=B3. =D0=B2 14:56, Alex Khatskevich <avkhatskevich@tarantool.org>:
=20 =20 =20

Hi.

I have some questions related to the func.c file, however before that I would ask you to fix tests.

General ideas:

1. Those tests are regresson tests (it just tests that problem will not appear in the future).
=C2=A0=C2=A0=C2=A0 We name those tests in the following manear: gh-XXXX-short-description.test.lua

Done.
=C2=A0
2. The thing you test is not related to a table and other columns.
=C2=A0=C2=A0=C2=A0 Please, convert the tests to the next format: {[[sel= ect '' like '_B';]], {1}]]}.
=C2=A0=C2=A0=C2=A0 To make it more readable, you can do it like `like_t= estcases` in `sql-tap/collation.test.lua`.

Done.
=C2=A0
3. There is two extra things that should be tested:
=C2=A0=C2=A0=C2=A0 1. When string or pattern ends with incorrect unicod= e symbol (e.g. half of the whole unicode symbol)
=C2=A0=C2=A0=C2=A0 2. String or pattern contains incorrect unicode symb= ol.


Refactored test with this id= ea taken into account.
=C2=A0
Now comparing function is= only supposed to work with TEXT types,
which led to the part of = #3572 propositions.

Also added error output in cas= e pattern contains invalid symbol &
now if string contains in= valid symbol it can't be matched with whatever
pattern. Some = minor fixes to patternCompare function as well.

He= re's diff:

diff --git a/src/box/sql/func.c b/src= /box/sql/func.c
index c06e3bd..5b53076 100644
--- a/src= /box/sql/func.c
+++ b/src/box/sql/func.c
@@ -617,13 +61= 7,17 @@ struct compareInfo {
=C2=A0 u8 noCase; /* true to ign= ore case differences */
=C2=A0};
=C2=A0
-/*
- * For LIKE and GLOB matching on EBCDIC machines, assume that eve= ry
- * character is exactly one byte in size.=C2=A0 Also, provde = the Utf8Read()
- * macro for fast reading of the next character i= n the common case where
- * the next character is ASCII.
+/**
+ * Providing there are symbols in string s this
+ * macro returns UTF-8 code of character and
+ * promotes point= er to the next symbol in the string.
+ * Otherwise return code is= SQL_END_OF_STRING.
=C2=A0 */
-#define Utf8Read(s, e)= =C2=A0 =C2=A0 ucnv_getNextUChar(pUtf8conv, &s, e, &status)
+#define Utf8Read(s, e) (((s) < (e)) ?\
+ ucnv_getNextUChar(pUtf8conv, &(s), (e), &(stat= us)) : 0)
+
+#define SQL_END_OF_STRING=C2=A0 =C2=A0 =C2= =A0 =C2=A0 0
+#define SQL_INVALID_UTF8_SYMBOL=C2=A0 0xfffd
<= div>=C2=A0
=C2=A0static const struct compareInfo globInfo =3D { &= #39;*', '?', '[', 0 };
=C2=A0
@@ -6= 43,51 +647,61 @@ static const struct compareInfo likeInfoAlt =3D { '%&#= 39;, '_', 0, 0 };
=C2=A0#define SQLITE_MATCH=C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A00
=C2=A0#define SQLITE_NOMATCH= =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A01
=C2=A0#define SQLITE_N= OWILDCARDMATCH=C2=A0 =C2=A02
+#define SQL_PROHIBITED_PATTERN=C2= =A0 =C2=A03
=C2=A0
-/*
- * Compare two UTF-8 = strings for equality where the first string is
- * a GLOB or LIKE= expression.=C2=A0 Return values:
- *
- *=C2=A0 =C2=A0 = SQLITE_MATCH:=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 Match
- *= =C2=A0 =C2=A0 SQLITE_NOMATCH:=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 No match
- *=C2=A0 =C2=A0 SQLITE_NOWILDCARDMATCH:=C2=A0 No match in spite of= having * or % wildcards.
+/**
+ * Compare two UTF-8 st= rings for equality where the first string
+ * is a GLOB or LIKE e= xpression.
=C2=A0 *
=C2=A0 * Globbing rules:
= =C2=A0 *
- *=C2=A0 =C2=A0 =C2=A0 '*'=C2=A0 =C2=A0 =C2=A0 = =C2=A0Matches any sequence of zero or more characters.
+ *=C2=A0 = =C2=A0 =C2=A0 '*'=C2=A0 =C2=A0 =C2=A0 Matches any sequence of zero = or more characters.
=C2=A0 *
- *=C2=A0 =C2=A0 =C2=A0 &#= 39;?'=C2=A0 =C2=A0 =C2=A0 =C2=A0Matches exactly one character.
+ *=C2=A0 =C2=A0 =C2=A0 '?'=C2=A0 =C2=A0 =C2=A0 Matches exactly o= ne character.
=C2=A0 *
- *=C2=A0 =C2=A0 =C2=A0[...]=C2= =A0 =C2=A0 =C2=A0 Matches one character from the enclosed list of
- *=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 characters.
+ *=C2=A0 =C2=A0 =C2=A0[...]=C2=A0 =C2=A0 =C2=A0Matches one characte= r from the enclosed list of
+ *=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0characters.
=C2=A0 *
- *=C2=A0 =C2= =A0 =C2=A0[^...]=C2=A0 =C2=A0 =C2=A0Matches one character not in the enclos= ed list.
+ *=C2=A0 =C2=A0 =C2=A0[^...]=C2=A0 =C2=A0 Matches one c= haracter not in the enclosed list.
=C2=A0 *
- * With th= e [...] and [^...] matching, a ']' character can be included
<= div>- * in the list by making it the first character after '[' or &= #39;^'.=C2=A0 A
- * range of characters can be specified usin= g '-'.=C2=A0 Example:
- * "[a-z]" matches any s= ingle lower-case letter.=C2=A0 To match a '-', make
- * i= t the last character in the list.
+ * With the [...] and [^...] m= atching, a ']' character can be
+ * included in the list = by making it the first character after
+ * '[' or '^&= #39;. A range of characters can be specified using '-'.
+= * Example: "[a-z]" matches any single lower-case letter.
+ * To match a '-', make it the last character in the list.
=C2=A0 *
=C2=A0 * Like matching rules:
=C2=A0 *
- *=C2=A0 =C2=A0 =C2=A0 '%'=C2=A0 =C2=A0 =C2=A0 =C2=A0Matc= hes any sequence of zero or more characters
+ *=C2=A0 =C2=A0 =C2= =A0 '%'=C2=A0 =C2=A0 =C2=A0 =C2=A0Matches any sequence of zero or m= ore characters.
=C2=A0 *
- **=C2=A0 =C2=A0 =C2=A0'_= '=C2=A0 =C2=A0 =C2=A0 =C2=A0Matches any one character
+ **=C2= =A0 =C2=A0 =C2=A0'_'=C2=A0 =C2=A0 =C2=A0 =C2=A0Matches any one char= acter.
=C2=A0 *
=C2=A0 *=C2=A0 =C2=A0 =C2=A0 Ec=C2=A0 = =C2=A0 =C2=A0 =C2=A0 Where E is the "esc" character and c is any = other
- *=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = character, including '%', '_', and esc, match exactly c.
+ *=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 characte= r, including '%', '_', and esc, match
+ *=C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 exactly c.
=C2= =A0 *
=C2=A0 * The comments within this routine usually assume gl= ob matching.
=C2=A0 *
- * This routine is usually quick= , but can be N**2 in the worst case.
+ * This routine is usually = quick, but can be N**2 in the worst
+ * case.
+ *
=
+ * @param pattern String containing comparison pattern.
+ *= @param string String being compared.
+ * @param compareInfo Info= rmation about how to compare.
+ * @param matchOther The escape ch= ar (LIKE) or '[' (GLOB).
+ *
+ * @retval SQLITE= _MATCH:=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 Match.
+ * =C2=A0 =C2=A0SQLITE_NOMATCH:=C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0 No match.
+ * =C2=A0 =C2=A0SQLITE_NOWILDCARDMATCH:=C2=A0 No match in spite of h= aving *
+ * =C2=A0 =C2= =A0 or % wildcards.
+ * = =C2=A0 =C2=A0SQL_PROHIBITED_PATTERN=C2=A0 =C2=A0Pattern contains invalid
+ * =C2=A0 =C2=A0 symbol.=
=C2=A0 */
=C2=A0static int
-patternCompare(c= onst char * pattern, /* The glob pat= tern */
- =C2=A0 =C2=A0 = =C2=A0 =C2=A0const char * string, /*= The string to compare against the glob */
- =C2=A0 =C2=A0 =C2=A0 =C2=A0const struct compareInfo *pIn= fo, /* Information about how to do t= he compare */
- =C2=A0 =C2= =A0 =C2=A0 =C2=A0UChar32 matchOther = /* The escape char (LIKE) or '[' (GLOB) */
-=C2=A0 =C2=A0= )
+sql_utf8_pattern_compare(const char * pattern,
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0const char = * string,
+ =C2=A0 =C2=A0 = =C2=A0 =C2=A0const struct compareInfo *pInfo,
+ =C2=A0 =C2=A0 =C2=A0 =C2=A0UChar32 matchOther)
<= div>=C2=A0{
=C2=A0 UChar32= c, c2; /* Next pattern and input s= tring chars */
=C2=A0 UCha= r32 matchOne =3D pInfo->matchOne; /* "?" or "_" */
@@ -698,29 +712,41 @@ patte= rnCompare(const char * pattern, /* T= he glob pattern */
=C2=A0 = const char * string_end =3D string + strlen(string);
=C2=A0 UErrorCode status =3D U_ZERO_ERROR;
=
=C2=A0
- while (patte= rn < pattern_end){
- c= =3D Utf8Read(pattern, pattern_end);
+ while ((c =3D Utf8Read(pattern, pattern_end)) !=3D SQL_END_OF_= STRING) {
+ if (c =3D=3D = SQL_INVALID_UTF8_SYMBOL)
+ return SQL_PROHIBITED_PATTERN;
=C2=A0 if (c =3D=3D matchAll) { /* Match "*" */
=C2=A0 /* Skip over multiple "*" characters in the pattern= .=C2=A0 If there
=C2=A0 = * are also "?" characters, skip those as well, but consume a
=C2=A0 * single character = of the input string for each "?" skipped
=C2=A0 */
- while (pattern < pattern_end){
- c =3D Utf8Read(pattern, pattern_end);
+ while ((c =3D Utf8Read(pattern= , pattern_end)) !=3D
+ = =C2=A0 =C2=A0 =C2=A0 =C2=A0SQL_END_OF_STRING) {
+ if (c =3D=3D SQL_INVALID_UTF8_SYMBOL)
= + return SQL_PROHIBITED_PATTERN;=
=C2=A0 if (c !=3D matc= hAll && c !=3D matchOne)
=C2=A0 break;
- if (c =3D=3D matchOne
- =C2=A0 =C2=A0 && Utf8Read(string, string_end) =3D=3D 0) {
+ if (c =3D=3D matchOne &am= p;&
+ =C2=A0 =C2=A0= (c2 =3D Utf8Read(string, string_end)) =3D=3D
+ =C2=A0 =C2=A0 SQL_END_OF_STRING)
=C2=A0<= span style=3D"white-space:pre"> return SQLITE_NOWILDCARDMATCH;
- }
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQLITE_NOMATCH;=
=C2=A0 }
=C2= =A0 /* "*" at the end of= the pattern matches */
- if (pattern =3D=3D pattern_end)
+ if (c =3D=3D SQL_END_OF_STRING) {
+ while ((c2 =3D Utf8Read(string, string_end)) !=3D<= /div>
+ =C2=A0 =C2=A0 =C2=A0 = =C2=A0SQL_END_OF_STRING)
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQLITE_NOMATCH;
=C2=A0 return SQLITE_MATCH;
+ }
=C2=A0 if (c =3D=3D matchOther) {
=C2=A0 if (pInfo->matchSet =3D=3D 0) {
=C2= =A0 c =3D Utf8Read(pattern, patt= ern_end);
- if (c =3D= =3D 0)
+ if (c =3D=3D = SQL_INVALID_UTF8_SYMBOL)
+ <= /span>return SQL_PROHIBITED_PATTERN;
+ if (c =3D=3D SQL_END_OF_STRING)
=C2=A0 return SQLITE_NOWILDCARDMATCH;
=C2=A0 } else {
=C2=A0= /* "[...]" immediatel= y follows the "*".=C2=A0 We have to do a slow
@@ -729,1= 3 +755,16 @@ patternCompare(const char * pattern, /* The glob pattern */
=C2=A0 assert(matchOther < 0x80); /* '[' is a single-byte character */
=C2= =A0 while (string < string_en= d) {
=C2=A0 int bMatc= h =3D
- =C2=A0 =C2=A0= patternCompare(&pattern[-1],
- =C2=A0 =C2=A0string,
- =C2=A0 =C2=A0pInfo,
- =C2=A0 =C2=A0matchOther);
+ =C2=A0 =C2=A0 sql_utf8_pattern_compare(
+ &pattern[-1],
+ string,
+<= span style=3D"white-space:pre"> pInfo,
+ matchOther);
=C2=A0 if (bMatch !=3D SQLITE_NOMATCH)
=C2=A0 return bMatch;
<= div>- Utf8Read(string, string_e= nd);
+ c =3D Utf8Read= (string, string_end);
+ if (c =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQLITE_NOMATCH;
=C2=A0 }
=C2=A0 return SQLITE_NOWILDCARDMATCH;
=C2=A0 }
@@ -764,6 +793,8 @@ patternC= ompare(const char * pattern, /* The = glob pattern */
=C2=A0 = * languages.
=C2=A0 *= /
=C2=A0 c2 =3D Utf8Rea= d(string, string_end);
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQLITE_NOMATCH;
=C2=A0 if (!noCase) {
=C2=A0 if (c2 !=3D c)
=C2=A0 continue;
@@ -771,9 +802,10 @@ patte= rnCompare(const char * pattern, /* T= he glob pattern */
=C2=A0 if (c2 !=3D c && u_tolower(c2) !=3D c)
=C2=A0 continue;
=C2=A0 }
- bMatch =3D
- =C2=A0 =C2=A0 patternCompare(pattern, string,
- =C2=A0 =C2=A0pInfo, matchOther);
+<= span style=3D"white-space:pre"> bMatch =3D sql_utf8_pattern_compa= re(pattern,
+ =C2= =A0 string,
+ =C2= =A0 pInfo,
+ =C2=A0= matchOther);
=C2=A0 if= (bMatch !=3D SQLITE_NOMATCH)
=C2=A0 return bMatch;
=C2=A0 }
@@ -782,7 +814,9 @@ patternCompare(const char * pa= ttern, /* The glob pattern */
<= div>=C2=A0 if (c =3D=3D matchOther)= {
=C2=A0 if (pInfo->= matchSet =3D=3D 0) {
=C2=A0 c =3D Utf8Read(pattern, pattern_end);
- if (c =3D=3D 0)
+ if (c =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQL_PROHIBITED_PATTERN;
+ if (c =3D=3D SQL_END_OF_STRING= )
=C2=A0 return SQLITE= _NOMATCH;
=C2=A0 zEscap= ed =3D pattern;
=C2=A0 }= else {
@@ -790,23 +824,33 @@ patternCompare(const char * pattern= , /* The glob pattern */
= =C2=A0 int seen =3D 0;
= =C2=A0 int invert =3D 0;
=C2=A0 c =3D Utf8Read(string, s= tring_end);
+ if (c =3D= =3D SQL_INVALID_UTF8_SYMBOL)
+ = return SQLITE_NOMATCH;
=C2=A0 if (string =3D=3D string_end)
=C2=A0 return SQLITE_NOMATCH;
=C2=A0 c2 =3D Utf8Read(pattern, pattern_en= d);
+ if (c2 =3D=3D SQL= _INVALID_UTF8_SYMBOL)
+ return SQL_PROHIBITED_PATTERN;
=C2=A0 if (c2 =3D=3D '^') {
=C2=A0 invert =3D 1;
=C2=A0 c2 =3D Utf8Read(pattern, pattern_end);
+ if (c2 =3D=3D SQL_INVAL= ID_UTF8_SYMBOL)
+ ret= urn SQL_PROHIBITED_PATTERN;
=C2=A0 }
=C2=A0 if= (c2 =3D=3D ']') {
=C2=A0= if (c =3D=3D ']')
=C2=A0 seen =3D 1;
=C2=A0 c2 =3D Utf8Read(pattern, pattern_end);
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMB= OL)
+ return SQL_PROH= IBITED_PATTERN;
=C2=A0 = }
- while (c2 &&= ; c2 !=3D ']') {
+ while (c2 !=3D SQL_END_OF_STRING && c2 !=3D ']') {
=C2=A0 if (c2 =3D=3D '= -' && pattern[0] !=3D ']'
=C2=A0 =C2=A0 =C2=A0 && pattern < patt= ern_end
=C2=A0 =C2=A0 = =C2=A0 && prior_c > 0) {
=C2=A0 c2 =3D Utf8Read(pattern, pattern_end);
+ if (c2 =3D=3D SQL_INVALID_UTF8_SY= MBOL)
+ return SQL_P= ROHIBITED_PATTERN;
=C2=A0 if (c >=3D prior_c && c <=3D c2)
=C2=A0 seen =3D 1;
=C2=A0 prior_c =3D 0;
@@ -817,14 +8= 61,19 @@ patternCompare(const char * pattern, /* The glob pattern */
=C2=A0 prior_c =3D c2;
=C2=A0 }
=C2=A0 = c2 =3D Utf8Read(pattern, pattern_end);
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL)
= + return SQL_PROHIBITED_PATTERN= ;
=C2=A0 }
-<= span style=3D"white-space:pre"> if (pattern =3D=3D pattern_end ||= (seen ^ invert) =3D=3D 0) {
+ = if (pattern =3D=3D pattern_end ||
+ =C2=A0 =C2=A0 (seen ^ invert) =3D=3D 0) {
= =C2=A0 return SQLITE_NOMATCH;
=C2=A0 }
=C2=A0= continue;
=C2=A0 }
=C2=A0 }
=C2=A0 c2 =3D Utf8Read(string, string_end);
+ if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL)
+ return SQLITE_NOMATCH;
=C2=A0<= span style=3D"white-space:pre"> if (c =3D=3D c2)
=C2=A0 continue;
=C2=A0 if (noCase){
@@ -839,7 +888,8 @@ = patternCompare(const char * pattern, /* The glob pattern */
=C2=A0 = =C2=A0 =C2=A0 c =3D=3D u_tolower(c2))
=C2=A0 continue;
=C2=A0 }
- = if (c =3D=3D matchOne && pattern !=3D zEscaped && c2 !=3D 0= )
+ if (c =3D=3D matchOne= && pattern !=3D zEscaped &&
+ =C2=A0 =C2=A0 c2 !=3D SQL_END_OF_STRING)
= =C2=A0 continue;
=C2=A0<= span style=3D"white-space:pre"> return SQLITE_NOMATCH;
= =C2=A0 }
@@ -853,8 +903,7 = @@ patternCompare(const char * pattern, /* The glob pattern */
=C2=A0int
=C2=A0sqlite3_strg= lob(const char *zGlobPattern, const char *zString)
=C2=A0{
<= div>- return patternCompare(zGlobPat= tern, zString, &globInfo,
- = =C2=A0 =C2=A0 =C2=A0 '[');
+ return sql_utf8_pattern_compare(zGlobPattern, zString, &= amp;globInfo, '[');
=C2=A0}
=C2=A0
= =C2=A0/*
@@ -864,7 +913,7 @@ sqlite3_strglob(const char *zGlobPat= tern, const char *zString)
=C2=A0int
=C2=A0sqlite3_strl= ike(const char *zPattern, const char *zStr, unsigned int esc)
=C2= =A0{
- return patternCompa= re(zPattern, zStr, &likeInfoNorm, esc);
+ return sql_utf8_pattern_compare(zPattern, zStr, &li= keInfoNorm, esc);
=C2=A0}
=C2=A0
=C2=A0/*
@@ -910,8 +959,9 @@ likeFunc(sqlite3_context * context, int argc, sq= lite3_value ** argv)
=C2=A0 zB =3D (const char *) sqlite3_value_text(argv[0]);
=C2=A0 zA =3D (const char *) sqlite3_value_text(= argv[1]);
=C2=A0
- /* Limit the length of the LIKE or GLOB pattern to avoid problems
<= div>- * of deep recursion and N*N b= ehavior in patternCompare().
+ /* Limit the length of the LIKE or GLOB pattern to avoid
+ * problems of deep recursion and N*N= behavior in
+ * sql_utf8= _pattern_compare().
=C2=A0 */
=C2=A0 nPat =3D sqlit= e3_value_bytes(argv[0]);
=C2=A0 <= /span>testcase(nPat =3D=3D db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH])= ;
@@ -947,7 +997,12 @@ likeFunc(sqlite3_context * context, int ar= gc, sqlite3_value ** argv)
=C2=A0= sqlite3_like_count++;
=C2=A0#endif
=C2=A0 int res;
- res =3D patternCompare(zB, zA, pInfo, escape);
+ res =3D sql_utf8_pattern_compare(= zB, zA, pInfo, escape);
+ = if (res =3D=3D SQL_PROHIBITED_PATTERN) {
+ sqlite3_result_error(context, "LIKE or GLOB pattern = can only"
+ =C2=A0= =C2=A0 =C2=A0" contain UTF-8 characters", -1);
+ return;
+ }
=C2=A0 sqlite3_result_int(context, res =3D=3D SQLITE_MATCH);
=C2=A0}
=C2=A0
diff --git a/test/sql-tap/e_expr.test.lua b/test/= sql-tap/e_expr.test.lua
index 13d3a96..051210a 100755
-= -- a/test/sql-tap/e_expr.test.lua
+++ b/test/sql-tap/e_expr.test.= lua
@@ -1,6 +1,6 @@
=C2=A0#!/usr/bin/env tarantool
=C2=A0test =3D require("sqltester")
-test:plan(12= 431)
+test:plan(10665)
=C2=A0
=C2=A0--!./tclt= estrunner.lua
=C2=A0-- 2010 July 16
@@ -77,8 +77,10 @@ = local operations =3D {
=C2=A0 =C2=A0 =C2=A0{"<>",= "ne1"},
=C2=A0 =C2=A0 =C2=A0{"!=3D", "n= e2"},
=C2=A0 =C2=A0 =C2=A0{"IS", "is"},<= /div>
-=C2=A0 =C2=A0 {"LIKE", "like"},
-= =C2=A0 =C2=A0 {"GLOB", "glob"},
+-- NOTE: Thi= s test needs refactoring after deletion of GLOB &
+-- type restrictions for LIKE.
+--= =C2=A0 =C2=A0 {"LIKE", "like"},
+--=C2=A0 =C2= =A0 {"GLOB", "glob"},
=C2=A0 =C2=A0 =C2=A0{&q= uot;AND", "and"},
=C2=A0 =C2=A0 =C2=A0{"OR&qu= ot;, "or"},
=C2=A0 =C2=A0 =C2=A0{"MATCH", &qu= ot;match"},
@@ -96,7 +98,7 @@ operations =3D {
=C2= =A0 =C2=A0 =C2=A0{"+", "-"},
=C2=A0 =C2=A0 = =C2=A0{"<<", ">>", "&", "= ;|"},
=C2=A0 =C2=A0 =C2=A0{"<", "<=3D&q= uot;, ">", ">=3D"},
-=C2=A0 =C2=A0 {&qu= ot;=3D", "=3D=3D", "!=3D", "<>", &= quot;LIKE", "GLOB"}, --"MATCH", "REGEXP"= },
+=C2=A0 =C2=A0 {"=3D", "=3D=3D", "!= =3D", "<>"}, --"LIKE", "GLOB"}, &q= uot;MATCH", "REGEXP"},
=C2=A0 =C2=A0 =C2=A0{"= AND"},
=C2=A0 =C2=A0 =C2=A0{"OR"},
=C2= =A0}
@@ -475,6 +477,7 @@ for _, op in ipairs(oplist) do
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0end
=C2=A0 =C2=A0 =C2=A0end
=C2=A0end
+
=C2=A0-----------------------------= ----------------------------------------------
=C2=A0-- Test the = IS and IS NOT operators.
=C2=A0--
diff --git a/test/sql= -tap/gh-3251-string-pattern-comparison.lua b/test/sql-tap/gh-3251-string-pa= ttern-comparison.lua
new file mode 100755
index 0000000= ..0202efc
--- /dev/null
+++ b/test/sql-tap/gh-3251-stri= ng-pattern-comparison.lua
@@ -0,0 +1,238 @@
+#!/usr/bin= /env tarantool
+test =3D require("sqltester")
+test:plan(106)
+
+local prefix =3D "like-test-&q= uot;
+
+-- Unciode byte sequences.
+
+local valid_testcases =3D {
+ = '\x01',
+ &= #39;\x09',
+ '\x1F= ',
+ '\x7F',
+ '\xC2\x80',
=
+ '\xC2\x90',
+ '\xC2\x9F',
+ '\xE2\x80\xA8',
+ '\x20\x0B',
+ '\xE2\x80\xA9',
+}
+
+-- Non-Unicode byte sequences.
+local invalid_= testcases =3D {
+ '\xE= 2\x80',
+ '\xFE\xF= F',
+ '\xC2',<= /div>
+ '\xED\xB0\x80',<= /div>
+ '\xD0',
+}
+
+local like_test_cases =3D
+{
+ {"1.1",
+ [[
+ CREATE TABLE t2 (column1 INTEGER,
+ =C2=A0 =C2=A0 =C2=A0column2 VARCHAR(100),
+ =C2=A0 =C2=A0 =C2=A0co= lumn3 BLOB,
+ =C2=A0 = =C2=A0 =C2=A0column4 FLOAT,
+ = =C2=A0 =C2=A0 =C2=A0PRIMARY KEY (column1, column2));
+ INSERT INTO t2 VALUES (1, 'AB'= , X'4142', 5.5);
+ INSERT INTO t2 VALUES (1, 'CD', X'2020', 1E4);
= + INSERT INTO t2 VALUES (2, 'AB= ', X'2020', 12.34567);
+ INSERT INTO t2 VALUES (-1000, '', X'', 0.0);
+ CREATE TABLE t1 (a INT PR= IMARY KEY, str VARCHAR(100));
+ = INSERT INTO t1 VALUES (1, 'ab');
+ INSERT INTO t1 VALUES (2, 'abCDF');
<= div>+ INSERT INTO t1 VALUES (3, = 9;CDF');
+ CREATE TAB= LE t (s1 CHAR(2) PRIMARY KEY, s2 CHAR(2));
+ INSERT INTO t VALUES ('AB', 'AB');
+ ]], {0}},
+ {"1.2",
+ [[
+= SELECT column1, column2, column1 * column4 FROM
+ t2 WHERE column2 LIKE '_B';
+ ]],
+ {0, {1, 'AB', 5.5, 2, 'AB', 2= 4.69134}} },
+ {"1.3&= quot;,
+ "SELECT col= umn1, column2 FROM t2 WHERE column2 LIKE '%B';",
+ {0, {1, 'AB', 2, 'AB'= ;}} },
+ {"1.4",=
+ "SELECT column1, = column2 FROM t2 WHERE column2 LIKE 'A__';",
+ {0, {}} },
+ {"1.5",
+ "SELECT column1, column2 FROM t2 WHERE column2 LIKE &#= 39;A_';",
+ {0, = {1, 'AB', 2, 'AB'}} },
+ {"1.6",
+ "SELECT column1, column2 FROM t2 WHERE column2 LIKE 'A&#= 39;;",
+ {0, {}} },<= /div>
+ {"1.7",
<= div>+ "SELECT column1, column2= FROM t2 WHERE column2 LIKE '_';",
+ {0, {}} },
+ {"1.8",
+ = "SELECT * FROM t WHERE s1 LIKE '%A';",
+= {0, {}} },
+ {"1.9",
+ "SELECT * FROM t WHERE s1 LIKE '%C';&q= uot;,
+ {0, {}} },
<= div>+ {"1.10",
+= "SELECT * FROM t1 WHERE str L= IKE '%df';",
+ {0, {2, 'abCDF', 3, 'CDF'}} },
+ {"1.11",
+ "SELECT * FROM t1 WHERE str LIKE 'a_';&q= uot;,
+ {0, {1, 'ab&#= 39;}} },
+ {"1.12&quo= t;,
+ "SELECT column= 1, column2 FROM t2 WHERE column2 LIKE '__';",
+ {0, {1, 'AB', 1, 'CD', = 2, 'AB'}} },
+ {&q= uot;1.13",
+ "S= ELECT str FROM t1 WHERE str LIKE 'ab%';",
+ {0, {'ab', 'abCDF'}} },
+ {"1.14",
+ "SELECT str FROM t1 WHERE = str LIKE 'abC%';",
+= {0, {'abCDF'}} },
+ {"1.15",
+ = "SELECT str FROM t1 WHERE str LIKE 'a_%';",
<= div>+ {0, {'ab', 'abCDF= '}} },
+ {"1.16&q= uot;,
+ [[
+ DROP TABLE t1;
+ DROP TABLE t2;
+ DROP TABLE t;
+ ]], {0}},
+}
+
+test:do_catchsql_= set_test(like_test_cases, prefix)
+
+-- Invalid testcas= es.
+
+for i, tested_string in ipairs(invalid_testcases= ) do
+ local test_name =3D= prefix .. "2." .. tostring(i)
+ local test_itself =3D "SELECT 'abc' LIKE '= ;ab" .. tested_string .. "';"
+
+-- = We should raise an error if pattern contains invalid characters.
= +
+ test:do_catchsql_test(
+ test_name,
+ <= /span>test_itself, {
+ -= - <test_name>
+ 1,= "LIKE or GLOB pattern can only contain UTF-8 characters"
+ -- <test_name>
+ })
+
+ test_name =3D prefix .. "3." .= . tostring(i)
+ test_itsel= f =3D "SELECT 'abc' LIKE 'abc" .. tested_string .. &q= uot;';"
+ test:do= _catchsql_test(
+ test_na= me,
+ test_itself, {
+ -- <test_name>
=
+ 1, "LIKE or GLOB patte= rn can only contain UTF-8 characters"
+ -- <test_name>
+ })
+
+ test_name =3D prefix .. "4." .. tostring(i)
+= test_itself =3D "SELECT 'a= bc' LIKE 'ab" .. tested_string .. "c';"
+ test:do_catchsql_test(
+ test_name,
+ test_itself, {
+ -- <test_name>
+ 1, "LIKE or GLOB pattern can only contain UTF-8= characters"
+ -- &= lt;test_name>
+ })
+
+-- Just skipping if row value predicand contains invali= d character.
+
+ test_name =3D prefix .. "5." .. tostring(i)
+ test_itself =3D "SELECT 'ab" .= . tested_string .. "' LIKE 'abc';"
+ test:do_execsql_test(
+ test_name,
+ test_itself, {
+ -- <test_name>
+ 0
+ -- <t= est_name>
+ })
+
+ test_name =3D prefi= x .. "6." .. tostring(i)
+ test_itself =3D "SELECT 'abc" .. tested_string .. = "' LIKE 'abc';"
+ test:do_execsql_test(
+ test_name,
+ test_itself, {
+ -- <= ;test_name>
+ 0
=
+ -- <test_name>
<= div>+ })
+
+ test_name =3D prefix .. "7."= .. tostring(i)
+ test_its= elf =3D "SELECT 'ab" .. tested_string .. "c' LIKE &#= 39;abc';"
+ test:= do_execsql_test(
+ test_n= ame,
+ test_itself, {
+ -- <test_name>
+ 0
+ -- <test_name>
+ })
+end
+
+-- V= alid testcases.
+
+for i, tested_string in ipairs(valid= _testcases) do
+ test_name= =3D prefix .. "8." .. tostring(i)
+ local test_itself =3D "SELECT 'abc' LIKE = 'ab" .. tested_string .. "';"
+ test:do_execsql_test(
+ test_name,
+ test_itself, {
+ -- <test_name>
+= 0
+ -- <tes= t_name>
+ })
+
+ test_name =3D prefix = .. "9." .. tostring(i)
+ test_itself =3D "SELECT 'abc' LIKE 'abc" .. = tested_string .. "';"
+ test:do_execsql_test(
+ test_name,
+ t= est_itself, {
+ -- <t= est_name>
+ 0
+ -- <test_name>
+ })
+
+ test_name =3D prefix .. "10." = .. tostring(i)
+ test_itse= lf =3D "SELECT 'abc' LIKE 'ab" .. tested_string .. &q= uot;c';"
+ test:d= o_execsql_test(
+ test_na= me,
+ test_itself, {
+ -- <test_name>
=
+ 0
+ -- <test_name>
+ })
+ test_name =3D prefix .. "11." .. tostring(i)
+ test_itself =3D "SELECT 'ab&qu= ot; .. tested_string .. "' LIKE 'abc';"
+ test:do_execsql_test(
+ test_name,
+ test_itself, {
+ -- <test_name>
+ 0
+ -- = <test_name>
+ })
+
+ test_name =3D = prefix .. "12." .. tostring(i)
+ test_itself =3D "SELECT 'abc" .. tested_stri= ng .. "' LIKE 'abc';"
+ test:do_execsql_test(
+ test_name,
+ = test_itself, {
+ = -- <test_name>
+ 0=
+ -- <test_name><= /div>
+ })
+
+ test_name =3D prefix .. "13= ." .. tostring(i)
+ t= est_itself =3D "SELECT 'ab" .. tested_string .. "c' = LIKE 'abc';"
+ test:do_execsql_test(
+ test_name,
+ test_itself= , {
+ -- <test_name&g= t;
+ 0
+ -- <test_name>
+ })
+end
+
+= test:finish_test()

--0000000000007398070571f9696a--