From: "n.pettik" <korablev@tarantool.org> To: tarantool-patches@freelists.org Cc: Nikita Tatunov <n.tatunov@tarantool.org>, Alexander Turenko <alexander.turenko@tarantool.org> Subject: [tarantool-patches] Re: [PATCH 1/2] sql: LIKE & GLOB pattern comparison issue Date: Wed, 14 Nov 2018 17:16:49 +0300 [thread overview] Message-ID: <8C50679E-91D6-436B-BB27-39A45A14106D@tarantool.org> (raw) In-Reply-To: <20181101103045.fmhy3y6l342wojd6@tkn_work_nb> Hello, guys. I suggest following diff. It doesn’t involve any functional changes. Obviously, as far as bug has been fixed, patch LGTM. It is up to you whether discard my fixes or apply them. I pushed fixes to the separate branch, since I had to solve rebase conflicts after adding my fixes. np/gh-3251-where-like-hangs diff --git a/src/box/sql/func.c b/src/box/sql/func.c index 6632c5983..e01519aa9 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -650,12 +650,16 @@ static const struct compareInfo likeInfoNorm = { '%', '_', 0, 1 }; static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 }; /** - * Possible error returns from sql_utf8_pattern_compare(). + * Returns codes from sql_utf8_pattern_compare(). */ -#define SQL_MATCH 0 -#define SQL_NOMATCH 1 -#define SQL_NOWILDCARDMATCH 2 -#define SQL_INVALID_PATTERN 3 +enum pattern_match_status { + MATCH = 0, + NO_MATCH = 1, + /** No match in spite of having * or % wildcards. */ + NO_WILDCARD_MATCH = 2, + /** Pattern contains invalid UTF-8 symbol. */ + INVALID_PATTERN = 3 +}; /** * Compare two UTF-8 strings for equality where the first string @@ -699,12 +703,7 @@ static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 }; * @param compareInfo Information about how to compare. * @param matchOther The escape char (LIKE) or '[' (GLOB). * - * @retval SQL_MATCH: Match. - * SQL_NOMATCH: No match. - * SQL_NOWILDCARDMATCH: No match in spite of having * - * or % wildcards. - * SQL_INVALID_PATTERN: Pattern contains invalid - * symbol. + * @retval One of pattern_match_status values. */ static int sql_utf8_pattern_compare(const char *pattern, @@ -729,10 +728,11 @@ sql_utf8_pattern_compare(const char *pattern, while (pattern < pattern_end) { c = Utf8Read(pattern, pattern_end); if (c == SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; - if (c == matchAll) { /* Match "*" */ + return INVALID_PATTERN; + if (c == matchAll) { /* - * Skip over multiple "*" characters in + * Match *: + * skip over multiple "*" characters in * the pattern. If there are also "?" * characters, skip those as well, but * consume a single character of the @@ -741,29 +741,28 @@ sql_utf8_pattern_compare(const char *pattern, while ((c = Utf8Read(pattern, pattern_end)) != SQL_END_OF_STRING) { if (c == SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; + return INVALID_PATTERN; if (c != matchAll && c != matchOne) break; if (c == matchOne && (c2 = Utf8Read(string, string_end)) == SQL_END_OF_STRING) - return SQL_NOWILDCARDMATCH; + return NO_WILDCARD_MATCH; if (c2 == SQL_INVALID_UTF8_SYMBOL) - return SQL_NOMATCH; + return NO_MATCH; } /* * "*" at the end of the pattern matches. */ - if (c == SQL_END_OF_STRING) { - return SQL_MATCH; - } + if (c == SQL_END_OF_STRING) + return MATCH; if (c == matchOther) { if (pInfo->matchSet == 0) { c = Utf8Read(pattern, pattern_end); if (c == SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; + return INVALID_PATTERN; if (c == SQL_END_OF_STRING) - return SQL_NOWILDCARDMATCH; + return NO_WILDCARD_MATCH; } else { /* "[...]" immediately * follows the "*". We @@ -785,13 +784,13 @@ sql_utf8_pattern_compare(const char *pattern, string, pInfo, matchOther); - if (bMatch != SQL_NOMATCH) + if (bMatch != NO_MATCH) return bMatch; c = Utf8Read(string, string_end); if (c == SQL_INVALID_UTF8_SYMBOL) - return SQL_NOMATCH; + return NO_MATCH; } - return SQL_NOWILDCARDMATCH; + return NO_WILDCARD_MATCH; } } @@ -825,7 +824,7 @@ sql_utf8_pattern_compare(const char *pattern, */ c2 = Utf8Read(string, string_end); if (c2 == SQL_INVALID_UTF8_SYMBOL) - return SQL_NOMATCH; + return NO_MATCH; if (!noCase) { if (c2 != c) continue; @@ -837,18 +836,18 @@ sql_utf8_pattern_compare(const char *pattern, string, pInfo, matchOther); - if (bMatch != SQL_NOMATCH) + if (bMatch != NO_MATCH) return bMatch; } - return SQL_NOWILDCARDMATCH; + return NO_WILDCARD_MATCH; } if (c == matchOther) { if (pInfo->matchSet == 0) { c = Utf8Read(pattern, pattern_end); if (c == SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; + return INVALID_PATTERN; if (c == SQL_END_OF_STRING) - return SQL_NOMATCH; + return NO_MATCH; zEscaped = pattern; } else { UChar32 prior_c = 0; @@ -856,24 +855,24 @@ sql_utf8_pattern_compare(const char *pattern, int invert = 0; c = Utf8Read(string, string_end); if (c == SQL_INVALID_UTF8_SYMBOL) - return SQL_NOMATCH; + return NO_MATCH; if (string == string_end) - return SQL_NOMATCH; + return NO_MATCH; c2 = Utf8Read(pattern, pattern_end); if (c2 == SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; + return INVALID_PATTERN; if (c2 == '^') { invert = 1; c2 = Utf8Read(pattern, pattern_end); if (c2 == SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; + return INVALID_PATTERN; } if (c2 == ']') { if (c == ']') seen = 1; c2 = Utf8Read(pattern, pattern_end); if (c2 == SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; + return INVALID_PATTERN; } while (c2 != SQL_END_OF_STRING && c2 != ']') { if (c2 == '-' && pattern[0] != ']' @@ -881,30 +880,28 @@ sql_utf8_pattern_compare(const char *pattern, && prior_c > 0) { c2 = Utf8Read(pattern, pattern_end); if (c2 == SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; + return INVALID_PATTERN; if (c >= prior_c && c <= c2) seen = 1; prior_c = 0; } else { - if (c == c2) { + if (c == c2) seen = 1; - } prior_c = c2; } c2 = Utf8Read(pattern, pattern_end); if (c2 == SQL_INVALID_UTF8_SYMBOL) - return SQL_INVALID_PATTERN; + return INVALID_PATTERN; } if (pattern == pattern_end || - (seen ^ invert) == 0) { - return SQL_NOMATCH; - } + (seen ^ invert) == 0) + return NO_MATCH; continue; } } c2 = Utf8Read(string, string_end); if (c2 == SQL_INVALID_UTF8_SYMBOL) - return SQL_NOMATCH; + return NO_MATCH; if (c == c2) continue; if (noCase){ @@ -916,16 +913,15 @@ sql_utf8_pattern_compare(const char *pattern, * to_lower allows to respect Turkish 'İ' * in default locale. */ - if (u_tolower(c) == c2 || - c == u_tolower(c2)) + if (u_tolower(c) == c2 || c == u_tolower(c2)) continue; } if (c == matchOne && pattern != zEscaped && c2 != SQL_END_OF_STRING) continue; - return SQL_NOMATCH; + return NO_MATCH; } - return string == string_end ? SQL_MATCH : SQL_NOMATCH; + return string == string_end ? MATCH : NO_MATCH; } /* @@ -1030,12 +1026,12 @@ likeFunc(sqlite3_context * context, int argc, sqlite3_value ** argv) #endif int res; res = sql_utf8_pattern_compare(zB, zA, pInfo, escape); - if (res == SQL_INVALID_PATTERN) { + if (res == INVALID_PATTERN) { sqlite3_result_error(context, "LIKE or GLOB pattern can only" " contain UTF-8 characters", -1); return; } - sqlite3_result_int(context, res == SQL_MATCH); + sqlite3_result_int(context, res == MATCH); } /* diff --git a/test/sql-tap/e_expr.test.lua b/test/sql-tap/e_expr.test.lua index 3697b7d7f..682771f36 100755 --- a/test/sql-tap/e_expr.test.lua +++ b/test/sql-tap/e_expr.test.lua @@ -99,10 +99,10 @@ operations = { {"<<", ">>", "&", "|"}, {"<", "<=", ">", ">="}, -- NOTE: This test needs refactoring after deletion of GLOB & --- type restrictions for LIKE. (See #3572) --- Another NOTE: MATCH & REGEXP aren't supported in Tarantool & --- are waiting for their hour, don't confuse them --- being commented with ticket above. +-- type restrictions for LIKE. (See #3572) +-- Also, MATCH & REGEXP aren't supported in Tarantool & +-- are waiting for their hour, don't confuse them +-- being commented with ticket above. {"=", "==", "!=", "<>"}, --"LIKE", "GLOB"}, --"MATCH", "REGEXP"}, {"AND"}, {"OR"}, @@ -493,7 +493,6 @@ for _, op in ipairs(oplist) do end end end - --------------------------------------------------------------------------- -- Test the IS and IS NOT operators. -- diff --git a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua index c2a2a4d92..612b8183a 100755 --- a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua +++ b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua @@ -133,7 +133,7 @@ local invalid_testcases = { '\xD0', } --- Invalid testcases. +-- Invalid unicode symbols. for i, tested_string in ipairs(invalid_testcases) do -- We should raise an error in case @@ -183,7 +183,7 @@ local valid_testcases = { '\xE2\x80\xA9', } --- Valid testcases. +-- Valid unicode symbols. for i, tested_string in ipairs(valid_testcases) do local test_name = prefix .. "8." .. tostring(i) local test_itself = "SELECT 'abc' LIKE 'ab" .. tested_string .. "';"
next prev parent reply other threads:[~2018-11-14 14:16 UTC|newest] Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-08-16 17:00 [tarantool-patches] [PATCH v2 0/2] sql: pattern comparison fixes & GLOB removal N.Tatunov 2018-08-16 17:00 ` [tarantool-patches] [PATCH 1/2] sql: LIKE & GLOB pattern comparison issue N.Tatunov 2018-08-17 9:23 ` [tarantool-patches] " Alex Khatskevich 2018-08-17 11:17 ` Alexander Turenko 2018-08-17 11:42 ` Alex Khatskevich 2018-09-09 13:33 ` Nikita Tatunov 2018-09-10 22:20 ` Alex Khatskevich 2018-09-11 6:06 ` Nikita Tatunov 2018-09-11 10:06 ` Alex Khatskevich 2018-09-11 13:31 ` Nikita Tatunov 2018-10-18 18:02 ` Nikita Tatunov 2018-10-21 3:51 ` Alexander Turenko 2018-10-26 15:19 ` Nikita Tatunov 2018-10-29 13:01 ` Alexander Turenko 2018-10-31 5:25 ` Nikita Tatunov 2018-11-01 10:30 ` Alexander Turenko 2018-11-14 14:16 ` n.pettik [this message] 2018-11-14 17:06 ` Alexander Turenko 2018-08-16 17:00 ` [tarantool-patches] [PATCH 2/2] sql: remove GLOB from Tarantool N.Tatunov 2018-08-17 8:25 ` [tarantool-patches] " Alex Khatskevich 2018-08-17 8:49 ` n.pettik 2018-08-17 9:01 ` Alex Khatskevich 2018-08-17 9:20 ` n.pettik 2018-08-17 9:28 ` Alex Khatskevich [not found] ` <04D02794-07A5-4146-9144-84EE720C8656@corp.mail.ru> 2018-08-17 8:53 ` Alex Khatskevich 2018-08-17 11:26 ` Alexander Turenko 2018-08-17 11:34 ` Alexander Turenko 2018-08-17 13:46 ` Nikita Tatunov 2018-09-09 14:57 ` Nikita Tatunov 2018-09-10 22:06 ` Alex Khatskevich 2018-09-11 7:38 ` Nikita Tatunov 2018-09-11 10:11 ` Alexander Turenko 2018-09-11 10:22 ` Alex Khatskevich 2018-09-11 12:03 ` Alex Khatskevich 2018-10-18 20:28 ` Nikita Tatunov 2018-10-21 3:48 ` Alexander Turenko 2018-10-26 15:21 ` Nikita Tatunov 2018-10-29 12:15 ` Alexander Turenko 2018-11-08 15:09 ` Nikita Tatunov 2018-11-09 12:18 ` Alexander Turenko 2018-11-10 3:38 ` Nikita Tatunov 2018-11-13 19:23 ` Alexander Turenko 2018-11-14 14:16 ` n.pettik 2018-11-14 17:41 ` Alexander Turenko 2018-11-14 21:48 ` n.pettik 2018-11-15 4:57 ` [tarantool-patches] Re: [PATCH v2 0/2] sql: pattern comparison fixes & GLOB removal Kirill Yukhin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=8C50679E-91D6-436B-BB27-39A45A14106D@tarantool.org \ --to=korablev@tarantool.org \ --cc=alexander.turenko@tarantool.org \ --cc=n.tatunov@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='[tarantool-patches] Re: [PATCH 1/2] sql: LIKE & GLOB pattern comparison issue' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox