From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id AA70B2EFDE for ; Wed, 14 Nov 2018 09:16:59 -0500 (EST) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 9e1v38BeZr4l for ; Wed, 14 Nov 2018 09:16:59 -0500 (EST) Received: from smtp49.i.mail.ru (smtp49.i.mail.ru [94.100.177.109]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id C2CA62EFBF for ; Wed, 14 Nov 2018 09:16:58 -0500 (EST) Content-Type: text/plain; charset=utf-8 Mime-Version: 1.0 (Mac OS X Mail 12.0 \(3445.100.39\)) Subject: [tarantool-patches] Re: [PATCH 1/2] sql: LIKE & GLOB pattern comparison issue From: "n.pettik" In-Reply-To: <20181101103045.fmhy3y6l342wojd6@tkn_work_nb> Date: Wed, 14 Nov 2018 17:16:49 +0300 Content-Transfer-Encoding: quoted-printable Message-Id: <8C50679E-91D6-436B-BB27-39A45A14106D@tarantool.org> References: <87897608-173E-45EB-80A1-8B249706D8A1@tarantool.org> <6a1352e9-425c-d656-1bec-bb04d9f0fee6@tarantool.org> <58B407E2-AF5D-4531-A9FF-9DC57CE0070B@tarantool.org> <860a125b-19f3-3bf1-8705-25156ff508ab@tarantool.org> <45338A27-C589-4330-B206-A4E379A4DE75@tarantool.org> <20181021035140.avx6d3rokx5ta6hi@tkn_work_nb> <6740948F-6C40-4C0F-B237-7C3573225FBC@tarantool.org> <20181029130123.f254chdxxuwi6c4w@tkn_work_nb> <3D4337BA-F528-425C-B352-C195C20DA282@tarantool.org> <20181101103045.fmhy3y6l342wojd6@tkn_work_nb> Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: Nikita Tatunov , Alexander Turenko Hello, guys. I suggest following diff. It doesn=E2=80=99t involve any functional = changes. Obviously, as far as bug has been fixed, patch LGTM. It is up to you whether discard my fixes or apply them. I pushed fixes to the separate branch, since I had to solve rebase conflicts after adding my fixes. np/gh-3251-where-like-hangs diff --git a/src/box/sql/func.c b/src/box/sql/func.c index 6632c5983..e01519aa9 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -650,12 +650,16 @@ static const struct compareInfo likeInfoNorm =3D { = '%', '_', 0, 1 }; static const struct compareInfo likeInfoAlt =3D { '%', '_', 0, 0 }; =20 /** - * Possible error returns from sql_utf8_pattern_compare(). + * Returns codes from sql_utf8_pattern_compare(). */ -#define SQL_MATCH 0 -#define SQL_NOMATCH 1 -#define SQL_NOWILDCARDMATCH 2 -#define SQL_INVALID_PATTERN 3 +enum pattern_match_status { + MATCH =3D 0, + NO_MATCH =3D 1, + /** No match in spite of having * or % wildcards. */ + NO_WILDCARD_MATCH =3D 2, + /** Pattern contains invalid UTF-8 symbol. */ + INVALID_PATTERN =3D 3 +}; =20 /** * Compare two UTF-8 strings for equality where the first string @@ -699,12 +703,7 @@ static const struct compareInfo likeInfoAlt =3D { = '%', '_', 0, 0 }; * @param compareInfo Information about how to compare. * @param matchOther The escape char (LIKE) or '[' (GLOB). * - * @retval SQL_MATCH: Match. - * SQL_NOMATCH: No match. - * SQL_NOWILDCARDMATCH: No match in spite of having * - * or % wildcards. - * SQL_INVALID_PATTERN: Pattern contains invalid - * symbol. + * @retval One of pattern_match_status values. */ static int sql_utf8_pattern_compare(const char *pattern, @@ -729,10 +728,11 @@ sql_utf8_pattern_compare(const char *pattern, while (pattern < pattern_end) { c =3D Utf8Read(pattern, pattern_end); if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; - if (c =3D=3D matchAll) { /* Match "*" */ + return INVALID_PATTERN; + if (c =3D=3D matchAll) { /* - * Skip over multiple "*" characters in + * Match *: + * skip over multiple "*" characters in * the pattern. If there are also "?" * characters, skip those as well, but * consume a single character of the @@ -741,29 +741,28 @@ sql_utf8_pattern_compare(const char *pattern, while ((c =3D Utf8Read(pattern, pattern_end)) !=3D= SQL_END_OF_STRING) { if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; + return INVALID_PATTERN; if (c !=3D matchAll && c !=3D matchOne) break; if (c =3D=3D matchOne && (c2 =3D Utf8Read(string, = string_end)) =3D=3D SQL_END_OF_STRING) - return SQL_NOWILDCARDMATCH; + return NO_WILDCARD_MATCH; if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) - return SQL_NOMATCH; + return NO_MATCH; } /* * "*" at the end of the pattern matches. */ - if (c =3D=3D SQL_END_OF_STRING) { - return SQL_MATCH; - } + if (c =3D=3D SQL_END_OF_STRING) + return MATCH; if (c =3D=3D matchOther) { if (pInfo->matchSet =3D=3D 0) { c =3D Utf8Read(pattern, = pattern_end); if (c =3D=3D = SQL_INVALID_UTF8_SYMBOL) - return = SQL_INVALID_PATTERN; + return INVALID_PATTERN; if (c =3D=3D SQL_END_OF_STRING) - return = SQL_NOWILDCARDMATCH; + return = NO_WILDCARD_MATCH; } else { /* "[...]" immediately * follows the "*". We @@ -785,13 +784,13 @@ sql_utf8_pattern_compare(const char *pattern, string, pInfo, = matchOther); - if (bMatch !=3D = SQL_NOMATCH) + if (bMatch !=3D = NO_MATCH) return bMatch; c =3D Utf8Read(string, = string_end); if (c =3D=3D = SQL_INVALID_UTF8_SYMBOL) - return = SQL_NOMATCH; + return NO_MATCH; } - return SQL_NOWILDCARDMATCH; + return NO_WILDCARD_MATCH; } } =20 @@ -825,7 +824,7 @@ sql_utf8_pattern_compare(const char *pattern, */ c2 =3D Utf8Read(string, string_end); if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) - return SQL_NOMATCH; + return NO_MATCH; if (!noCase) { if (c2 !=3D c) continue; @@ -837,18 +836,18 @@ sql_utf8_pattern_compare(const char *pattern, = string, pInfo, = matchOther); - if (bMatch !=3D SQL_NOMATCH) + if (bMatch !=3D NO_MATCH) return bMatch; } - return SQL_NOWILDCARDMATCH; + return NO_WILDCARD_MATCH; } if (c =3D=3D matchOther) { if (pInfo->matchSet =3D=3D 0) { c =3D Utf8Read(pattern, pattern_end); if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; + return INVALID_PATTERN; if (c =3D=3D SQL_END_OF_STRING) - return SQL_NOMATCH; + return NO_MATCH; zEscaped =3D pattern; } else { UChar32 prior_c =3D 0; @@ -856,24 +855,24 @@ sql_utf8_pattern_compare(const char *pattern, int invert =3D 0; c =3D Utf8Read(string, string_end); if (c =3D=3D SQL_INVALID_UTF8_SYMBOL) - return SQL_NOMATCH; + return NO_MATCH; if (string =3D=3D string_end) - return SQL_NOMATCH; + return NO_MATCH; c2 =3D Utf8Read(pattern, pattern_end); if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; + return INVALID_PATTERN; if (c2 =3D=3D '^') { invert =3D 1; c2 =3D Utf8Read(pattern, = pattern_end); if (c2 =3D=3D = SQL_INVALID_UTF8_SYMBOL) - return = SQL_INVALID_PATTERN; + return INVALID_PATTERN; } if (c2 =3D=3D ']') { if (c =3D=3D ']') seen =3D 1; c2 =3D Utf8Read(pattern, = pattern_end); if (c2 =3D=3D = SQL_INVALID_UTF8_SYMBOL) - return = SQL_INVALID_PATTERN; + return INVALID_PATTERN; } while (c2 !=3D SQL_END_OF_STRING && c2 = !=3D ']') { if (c2 =3D=3D '-' && pattern[0] = !=3D ']' @@ -881,30 +880,28 @@ sql_utf8_pattern_compare(const char *pattern, && prior_c > 0) { c2 =3D Utf8Read(pattern, = pattern_end); if (c2 =3D=3D = SQL_INVALID_UTF8_SYMBOL) - return = SQL_INVALID_PATTERN; + return = INVALID_PATTERN; if (c >=3D prior_c && c = <=3D c2) seen =3D 1; prior_c =3D 0; } else { - if (c =3D=3D c2) { + if (c =3D=3D c2) seen =3D 1; - } prior_c =3D c2; } c2 =3D Utf8Read(pattern, = pattern_end); if (c2 =3D=3D = SQL_INVALID_UTF8_SYMBOL) - return = SQL_INVALID_PATTERN; + return INVALID_PATTERN; } if (pattern =3D=3D pattern_end || - (seen ^ invert) =3D=3D 0) { - return SQL_NOMATCH; - } + (seen ^ invert) =3D=3D 0) + return NO_MATCH; continue; } } c2 =3D Utf8Read(string, string_end); if (c2 =3D=3D SQL_INVALID_UTF8_SYMBOL) - return SQL_NOMATCH; + return NO_MATCH; if (c =3D=3D c2) continue; if (noCase){ @@ -916,16 +913,15 @@ sql_utf8_pattern_compare(const char *pattern, * to_lower allows to respect Turkish '=C4=B0' * in default locale. */ - if (u_tolower(c) =3D=3D c2 || - c =3D=3D u_tolower(c2)) + if (u_tolower(c) =3D=3D c2 || c =3D=3D = u_tolower(c2)) continue; } if (c =3D=3D matchOne && pattern !=3D zEscaped && c2 !=3D SQL_END_OF_STRING) continue; - return SQL_NOMATCH; + return NO_MATCH; } - return string =3D=3D string_end ? SQL_MATCH : SQL_NOMATCH; + return string =3D=3D string_end ? MATCH : NO_MATCH; } =20 /* @@ -1030,12 +1026,12 @@ likeFunc(sqlite3_context * context, int argc, = sqlite3_value ** argv) #endif int res; res =3D sql_utf8_pattern_compare(zB, zA, pInfo, escape); - if (res =3D=3D SQL_INVALID_PATTERN) { + if (res =3D=3D INVALID_PATTERN) { sqlite3_result_error(context, "LIKE or GLOB pattern can = only" " contain UTF-8 characters", -1); return; } - sqlite3_result_int(context, res =3D=3D SQL_MATCH); + sqlite3_result_int(context, res =3D=3D MATCH); } =20 /* diff --git a/test/sql-tap/e_expr.test.lua b/test/sql-tap/e_expr.test.lua index 3697b7d7f..682771f36 100755 --- a/test/sql-tap/e_expr.test.lua +++ b/test/sql-tap/e_expr.test.lua @@ -99,10 +99,10 @@ operations =3D { {"<<", ">>", "&", "|"}, {"<", "<=3D", ">", ">=3D"}, -- NOTE: This test needs refactoring after deletion of GLOB & --- type restrictions for LIKE. (See #3572) --- Another NOTE: MATCH & REGEXP aren't supported in Tarantool & --- are waiting for their hour, don't confuse them --- being commented with ticket above. +-- type restrictions for LIKE. (See #3572) +-- Also, MATCH & REGEXP aren't supported in Tarantool & +-- are waiting for their hour, don't confuse them +-- being commented with ticket above. {"=3D", "=3D=3D", "!=3D", "<>"}, --"LIKE", "GLOB"}, --"MATCH", = "REGEXP"}, {"AND"}, {"OR"}, @@ -493,7 +493,6 @@ for _, op in ipairs(oplist) do end end end - = --------------------------------------------------------------------------= - -- Test the IS and IS NOT operators. -- diff --git a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua = b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua index c2a2a4d92..612b8183a 100755 --- a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua +++ b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua @@ -133,7 +133,7 @@ local invalid_testcases =3D { '\xD0', } =20 --- Invalid testcases. +-- Invalid unicode symbols. for i, tested_string in ipairs(invalid_testcases) do =20 -- We should raise an error in case @@ -183,7 +183,7 @@ local valid_testcases =3D { '\xE2\x80\xA9', } =20 --- Valid testcases. +-- Valid unicode symbols. for i, tested_string in ipairs(valid_testcases) do local test_name =3D prefix .. "8." .. tostring(i) local test_itself =3D "SELECT 'abc' LIKE 'ab" .. tested_string .. = "';"=