[tarantool-patches] Re: [PATCH 1/2] sql: LIKE & GLOB pattern comparison issue

n.pettik korablev at tarantool.org
Wed Nov 14 17:16:49 MSK 2018


Hello, guys.

I suggest following diff. It doesn’t involve any functional changes.
Obviously, as far as bug has been fixed, patch LGTM.
It is up to you whether discard my fixes or apply them.
I pushed fixes to the separate branch, since I had to solve
rebase conflicts after adding my fixes.

 np/gh-3251-where-like-hangs

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 6632c5983..e01519aa9 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -650,12 +650,16 @@ static const struct compareInfo likeInfoNorm = { '%', '_', 0, 1 };
 static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 };
 
 /**
- * Possible error returns from sql_utf8_pattern_compare().
+ * Returns codes from sql_utf8_pattern_compare().
  */
-#define SQL_MATCH                0
-#define SQL_NOMATCH              1
-#define SQL_NOWILDCARDMATCH      2
-#define SQL_INVALID_PATTERN      3
+enum pattern_match_status {
+       MATCH = 0,
+       NO_MATCH = 1,
+       /** No match in spite of having * or % wildcards. */
+       NO_WILDCARD_MATCH = 2,
+       /** Pattern contains invalid UTF-8 symbol. */
+       INVALID_PATTERN = 3
+};
 
 /**
  * Compare two UTF-8 strings for equality where the first string
@@ -699,12 +703,7 @@ static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 };
  * @param compareInfo Information about how to compare.
  * @param matchOther The escape char (LIKE) or '[' (GLOB).
  *
- * @retval SQL_MATCH:               Match.
- *        SQL_NOMATCH:             No match.
- *        SQL_NOWILDCARDMATCH:     No match in spite of having *
- *                                 or % wildcards.
- *        SQL_INVALID_PATTERN:     Pattern contains invalid
- *                                 symbol.
+ * @retval One of pattern_match_status values.
  */
 static int
 sql_utf8_pattern_compare(const char *pattern,
@@ -729,10 +728,11 @@ sql_utf8_pattern_compare(const char *pattern,
        while (pattern < pattern_end) {
                c = Utf8Read(pattern, pattern_end);
                if (c == SQL_INVALID_UTF8_SYMBOL)
-                       return SQL_INVALID_PATTERN;
-               if (c == matchAll) {    /* Match "*" */
+                       return INVALID_PATTERN;
+               if (c == matchAll) {
                        /*
-                        * Skip over multiple "*" characters in
+                        * Match *:
+                        * skip over multiple "*" characters in
                         * the pattern. If there are also "?"
                         * characters, skip those as well, but
                         * consume a single character of the
@@ -741,29 +741,28 @@ sql_utf8_pattern_compare(const char *pattern,
                        while ((c = Utf8Read(pattern, pattern_end)) !=
                               SQL_END_OF_STRING) {
                                if (c == SQL_INVALID_UTF8_SYMBOL)
-                                       return SQL_INVALID_PATTERN;
+                                       return INVALID_PATTERN;
                                if (c != matchAll && c != matchOne)
                                        break;
                                if (c == matchOne &&
                                    (c2 = Utf8Read(string, string_end)) ==
                                    SQL_END_OF_STRING)
-                                       return SQL_NOWILDCARDMATCH;
+                                       return NO_WILDCARD_MATCH;
                                if (c2 == SQL_INVALID_UTF8_SYMBOL)
-                                       return SQL_NOMATCH;
+                                       return NO_MATCH;
                        }
                        /*
                         * "*" at the end of the pattern matches.
                         */
-                       if (c == SQL_END_OF_STRING) {
-                               return SQL_MATCH;
-                       }
+                       if (c == SQL_END_OF_STRING)
+                               return MATCH;
                        if (c == matchOther) {
                                if (pInfo->matchSet == 0) {
                                        c = Utf8Read(pattern, pattern_end);
                                        if (c == SQL_INVALID_UTF8_SYMBOL)
-                                               return SQL_INVALID_PATTERN;
+                                               return INVALID_PATTERN;
                                        if (c == SQL_END_OF_STRING)
-                                               return SQL_NOWILDCARDMATCH;
+                                               return NO_WILDCARD_MATCH;
                                } else {
                                        /* "[...]" immediately
                                         * follows the "*". We
@@ -785,13 +784,13 @@ sql_utf8_pattern_compare(const char *pattern,
                                                                string,
                                                                pInfo,
                                                                matchOther);
-                                               if (bMatch != SQL_NOMATCH)
+                                               if (bMatch != NO_MATCH)
                                                        return bMatch;
                                                c = Utf8Read(string, string_end);
                                                if (c == SQL_INVALID_UTF8_SYMBOL)
-                                                       return SQL_NOMATCH;
+                                                       return NO_MATCH;
                                        }
-                                       return SQL_NOWILDCARDMATCH;
+                                       return NO_WILDCARD_MATCH;
                                }
                        }
 
@@ -825,7 +824,7 @@ sql_utf8_pattern_compare(const char *pattern,
                                 */
                                c2 = Utf8Read(string, string_end);
                                if (c2 == SQL_INVALID_UTF8_SYMBOL)
-                                       return SQL_NOMATCH;
+                                       return NO_MATCH;
                                if (!noCase) {
                                        if (c2 != c)
                                                continue;
@@ -837,18 +836,18 @@ sql_utf8_pattern_compare(const char *pattern,
                                                                  string,
                                                                  pInfo,
                                                                  matchOther);
-                               if (bMatch != SQL_NOMATCH)
+                               if (bMatch != NO_MATCH)
                                        return bMatch;
                        }
-                       return SQL_NOWILDCARDMATCH;
+                       return NO_WILDCARD_MATCH;
                }
                if (c == matchOther) {
                        if (pInfo->matchSet == 0) {
                                c = Utf8Read(pattern, pattern_end);
                                if (c == SQL_INVALID_UTF8_SYMBOL)
-                                       return SQL_INVALID_PATTERN;
+                                       return INVALID_PATTERN;
                                if (c == SQL_END_OF_STRING)
-                                       return SQL_NOMATCH;
+                                       return NO_MATCH;
                                zEscaped = pattern;
                        } else {
                                UChar32 prior_c = 0;
@@ -856,24 +855,24 @@ sql_utf8_pattern_compare(const char *pattern,
                                int invert = 0;
                                c = Utf8Read(string, string_end);
                                if (c == SQL_INVALID_UTF8_SYMBOL)
-                                       return SQL_NOMATCH;
+                                       return NO_MATCH;
                                if (string == string_end)
-                                       return SQL_NOMATCH;
+                                       return NO_MATCH;
                                c2 = Utf8Read(pattern, pattern_end);
                                if (c2 == SQL_INVALID_UTF8_SYMBOL)
-                                       return SQL_INVALID_PATTERN;
+                                       return INVALID_PATTERN;
                                if (c2 == '^') {
                                        invert = 1;
                                        c2 = Utf8Read(pattern, pattern_end);
                                        if (c2 == SQL_INVALID_UTF8_SYMBOL)
-                                               return SQL_INVALID_PATTERN;
+                                               return INVALID_PATTERN;
                                }
                                if (c2 == ']') {
                                        if (c == ']')
                                                seen = 1;
                                        c2 = Utf8Read(pattern, pattern_end);
                                        if (c2 == SQL_INVALID_UTF8_SYMBOL)
-                                               return SQL_INVALID_PATTERN;
+                                               return INVALID_PATTERN;
                                }
                                while (c2 != SQL_END_OF_STRING && c2 != ']') {
                                        if (c2 == '-' && pattern[0] != ']'
@@ -881,30 +880,28 @@ sql_utf8_pattern_compare(const char *pattern,
                                            && prior_c > 0) {
                                                c2 = Utf8Read(pattern, pattern_end);
                                                if (c2 == SQL_INVALID_UTF8_SYMBOL)
-                                                       return SQL_INVALID_PATTERN;
+                                                       return INVALID_PATTERN;
                                                if (c >= prior_c && c <= c2)
                                                        seen = 1;
                                                prior_c = 0;
                                        } else {
-                                               if (c == c2) {
+                                               if (c == c2)
                                                        seen = 1;
-                                               }
                                                prior_c = c2;
                                        }
                                        c2 = Utf8Read(pattern, pattern_end);
                                        if (c2 == SQL_INVALID_UTF8_SYMBOL)
-                                               return SQL_INVALID_PATTERN;
+                                               return INVALID_PATTERN;
                                }
                                if (pattern == pattern_end ||
-                                   (seen ^ invert) == 0) {
-                                       return SQL_NOMATCH;
-                               }
+                                   (seen ^ invert) == 0)
+                                       return NO_MATCH;
                                continue;
                        }
                }
                c2 = Utf8Read(string, string_end);
                if (c2 == SQL_INVALID_UTF8_SYMBOL)
-                       return SQL_NOMATCH;
+                       return NO_MATCH;
                if (c == c2)
                        continue;
                if (noCase){
@@ -916,16 +913,15 @@ sql_utf8_pattern_compare(const char *pattern,
                         * to_lower allows to respect Turkish 'İ'
                         * in default locale.
                         */
-                       if (u_tolower(c) == c2 ||
-                           c == u_tolower(c2))
+                       if (u_tolower(c) == c2 || c == u_tolower(c2))
                                continue;
                }
                if (c == matchOne && pattern != zEscaped &&
                    c2 != SQL_END_OF_STRING)
                        continue;
-               return SQL_NOMATCH;
+               return NO_MATCH;
        }
-       return string == string_end ? SQL_MATCH : SQL_NOMATCH;
+       return string == string_end ? MATCH : NO_MATCH;
 }
 
 /*
@@ -1030,12 +1026,12 @@ likeFunc(sqlite3_context * context, int argc, sqlite3_value ** argv)
 #endif
        int res;
        res = sql_utf8_pattern_compare(zB, zA, pInfo, escape);
-       if (res == SQL_INVALID_PATTERN) {
+       if (res == INVALID_PATTERN) {
                sqlite3_result_error(context, "LIKE or GLOB pattern can only"
                                     " contain UTF-8 characters", -1);
                return;
        }
-       sqlite3_result_int(context, res == SQL_MATCH);
+       sqlite3_result_int(context, res == MATCH);
 }
 
 /*
diff --git a/test/sql-tap/e_expr.test.lua b/test/sql-tap/e_expr.test.lua
index 3697b7d7f..682771f36 100755
--- a/test/sql-tap/e_expr.test.lua
+++ b/test/sql-tap/e_expr.test.lua
@@ -99,10 +99,10 @@ operations = {
     {"<<", ">>", "&", "|"},
     {"<", "<=", ">", ">="},
 -- NOTE: This test needs refactoring after deletion of GLOB &
---      type restrictions for LIKE. (See #3572)
--- Another NOTE: MATCH & REGEXP aren't supported in Tarantool &
---              are waiting for their hour, don't confuse them
---              being commented with ticket above.
+-- type restrictions for LIKE. (See #3572)
+-- Also, MATCH & REGEXP aren't supported in Tarantool &
+-- are waiting for their hour, don't confuse them
+-- being commented with ticket above.
     {"=", "==", "!=", "<>"}, --"LIKE", "GLOB"}, --"MATCH", "REGEXP"},
     {"AND"},
     {"OR"},
@@ -493,7 +493,6 @@ for _, op in ipairs(oplist) do
         end
     end
 end
-
 ---------------------------------------------------------------------------
 -- Test the IS and IS NOT operators.
 --
diff --git a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
index c2a2a4d92..612b8183a 100755
--- a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
+++ b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
@@ -133,7 +133,7 @@ local invalid_testcases = {
     '\xD0',
 }
 
--- Invalid testcases.
+-- Invalid unicode symbols.
 for i, tested_string in ipairs(invalid_testcases) do
 
     -- We should raise an error in case
@@ -183,7 +183,7 @@ local valid_testcases = {
     '\xE2\x80\xA9',
 }
 
--- Valid testcases.
+-- Valid unicode symbols.
 for i, tested_string in ipairs(valid_testcases) do
     local test_name = prefix .. "8." .. tostring(i)
     local test_itself = "SELECT 'abc' LIKE 'ab" .. tested_string .. "';"



More information about the Tarantool-patches mailing list