From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id CB46D22C42 for ; Thu, 28 Jun 2018 08:47:40 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id z92y0yBajkPi for ; Thu, 28 Jun 2018 08:47:40 -0400 (EDT) Received: from mail-lf0-f68.google.com (mail-lf0-f68.google.com [209.85.215.68]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 493C022C12 for ; Thu, 28 Jun 2018 08:47:40 -0400 (EDT) Received: by mail-lf0-f68.google.com with SMTP id q6-v6so4114705lfh.6 for ; Thu, 28 Jun 2018 05:47:40 -0700 (PDT) From: "N.Tatunov" Subject: [tarantool-patches] [PATCH] sql: LIKE & GLOB pattern comparison issue Date: Thu, 28 Jun 2018 15:47:16 +0300 Message-Id: <1530190036-10105-1-git-send-email-hollow653@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: alexander.turenko@tarantool.org, "N.Tatunov" Currently function that compares pattern and string for GLOB & LIKE operators doesn't work properly. It uses ICU reading function which perhaps was working differently before and the implementation for the comparison ending isn't paying attention to some special cases, hence in those cases it works improperly. Now the checks for comparison should work fine. Сloses: #3251 Сloses: #3334 --- src/box/sql/func.c | 25 ++++---- test/sql-tap/like1.test.lua | 152 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+), 12 deletions(-) create mode 100755 test/sql-tap/like1.test.lua diff --git a/src/box/sql/func.c b/src/box/sql/func.c index c06e3bd..dcbd7e0 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -643,6 +643,7 @@ static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 }; #define SQLITE_MATCH 0 #define SQLITE_NOMATCH 1 #define SQLITE_NOWILDCARDMATCH 2 +#define SQL_NO_SYMBOLS_LEFT 65535 /* * Compare two UTF-8 strings for equality where the first string is @@ -698,29 +699,28 @@ patternCompare(const char * pattern, /* The glob pattern */ const char * string_end = string + strlen(string); UErrorCode status = U_ZERO_ERROR; - while (pattern < pattern_end){ - c = Utf8Read(pattern, pattern_end); + while ((c = Utf8Read(pattern, pattern_end)) != SQL_NO_SYMBOLS_LEFT) { if (c == matchAll) { /* Match "*" */ /* Skip over multiple "*" characters in the pattern. If there * are also "?" characters, skip those as well, but consume a * single character of the input string for each "?" skipped */ - while (pattern < pattern_end){ - c = Utf8Read(pattern, pattern_end); + while ((c = Utf8Read(pattern, pattern_end)) != + SQL_NO_SYMBOLS_LEFT) { if (c != matchAll && c != matchOne) break; - if (c == matchOne - && Utf8Read(string, string_end) == 0) { + if (c == matchOne && + Utf8Read(string, string_end) == + SQL_NO_SYMBOLS_LEFT) return SQLITE_NOWILDCARDMATCH; - } } /* "*" at the end of the pattern matches */ - if (pattern == pattern_end) + if (c == SQL_NO_SYMBOLS_LEFT) return SQLITE_MATCH; if (c == matchOther) { if (pInfo->matchSet == 0) { c = Utf8Read(pattern, pattern_end); - if (c == 0) + if (c == SQL_NO_SYMBOLS_LEFT) return SQLITE_NOWILDCARDMATCH; } else { /* "[...]" immediately follows the "*". We have to do a slow @@ -782,7 +782,7 @@ patternCompare(const char * pattern, /* The glob pattern */ if (c == matchOther) { if (pInfo->matchSet == 0) { c = Utf8Read(pattern, pattern_end); - if (c == 0) + if (c == SQL_NO_SYMBOLS_LEFT) return SQLITE_NOMATCH; zEscaped = pattern; } else { @@ -802,7 +802,7 @@ patternCompare(const char * pattern, /* The glob pattern */ seen = 1; c2 = Utf8Read(pattern, pattern_end); } - while (c2 && c2 != ']') { + while (c2 != SQL_NO_SYMBOLS_LEFT && c2 != ']') { if (c2 == '-' && pattern[0] != ']' && pattern < pattern_end && prior_c > 0) { @@ -839,7 +839,8 @@ patternCompare(const char * pattern, /* The glob pattern */ c == u_tolower(c2)) continue; } - if (c == matchOne && pattern != zEscaped && c2 != 0) + if (c == matchOne && pattern != zEscaped && + c2 != SQL_NO_SYMBOLS_LEFT) continue; return SQLITE_NOMATCH; } diff --git a/test/sql-tap/like1.test.lua b/test/sql-tap/like1.test.lua new file mode 100755 index 0000000..42b4d43 --- /dev/null +++ b/test/sql-tap/like1.test.lua @@ -0,0 +1,152 @@ +#!/usr/bin/env tarantool +test = require("sqltester") +test:plan(13) + +test:do_catchsql_test( + "like-test-1.1", + [[ + CREATE TABLE t2 (column1 INTEGER, + column2 VARCHAR(100), + column3 BLOB, + column4 FLOAT, + PRIMARY KEY (column1, column2)); + INSERT INTO t2 VALUES (1, 'AB', X'4142', 5.5); + INSERT INTO t2 VALUES (1, 'CD', X'2020', 1E4); + INSERT INTO t2 VALUES (2, 'AB', X'2020', 12.34567); + INSERT INTO t2 VALUES (-1000, '', X'', 0.0); + CREATE TABLE t1 (a INT PRIMARY KEY, str VARCHAR(100)); + INSERT INTO t1 VALUES (1, 'ab'); + INSERT INTO t1 VALUES (2, 'abCDF'); + INSERT INTO t1 VALUES (3, 'CDF'); + CREATE TABLE t (s1 char(2) primary key, s2 char(2)); + INSERT INTO t VALUES ('AB', 'AB'); + ]], { + -- + 0 + -- + }) + +test:do_execsql_test( + "like-test-1.2", + [[ + SELECT column1, column2, column1 * column4 FROM t2 WHERE column2 LIKE '_B'; + ]], { + -- + 1, 'AB', 5.5, 2, 'AB', 24.69134 + -- + }) + +test:do_execsql_test( + "like-test-1.3", + [[ + SELECT column1, column2 FROM t2 WHERE column2 LIKE '%B'; + ]], { + -- + 1, 'AB', 2, 'AB' + -- + }) + +test:do_execsql_test( + "like-test-1.4", + [[ + SELECT column1, column2 FROM t2 WHERE column2 LIKE 'A__'; + ]], { + -- + + -- + }) + +test:do_execsql_test( + "like-test-1.5", + [[ + SELECT column1, column2 FROM t2 WHERE column2 LIKE 'A_'; + ]], { + -- + 1, 'AB', 2, 'AB' + -- + }) + +test:do_execsql_test( + "like-test-1.6", + [[ + SELECT column1, column2 FROM t2 WHERE column2 LIKE 'A'; + ]], { + -- + + -- + }) + +test:do_execsql_test( + "like-test-1.7", + [[ + SELECT column1, column2 FROM t2 WHERE column2 LIKE '_'; + ]], { + -- + + -- + }) + +test:do_execsql_test( + "like-test-1.8", + [[ + SELECT * FROM t WHERE s1 LIKE '%A'; + ]], { + -- + + -- + }) + +test:do_execsql_test( + "like-test-1.9", + [[ + SELECT * FROM t WHERE s1 LIKE '%C'; + ]], { + -- + + -- + }) + +test:do_execsql_test( + "like-test-1.10", + [[ + SELECT * FROM t1 WHERE str LIKE '%df'; + ]], { + -- + 2, 'abCDF', 3, 'CDF' + -- + }) + +test:do_execsql_test( + "like-test-1.11", + [[ + SELECT * FROM t1 WHERE str LIKE 'a_'; + ]], { + -- + 1, 'ab' + -- + }) + +test:do_execsql_test( + "like-test-1.12", + [[ + select column1, column2 from t2 where column2 like '__'; + ]], { + -- + 1, 'AB', 1, 'CD', 2, 'AB' + -- + }) + +test:do_execsql_test( + "like-test-1.13", + [[ + DROP TABLE t1; + DROP TABLE t2; + DROP TABLE t; + ]], { + -- + + -- + }) + + +test:finish_test() -- 2.7.4