Tarantool development patches archive
 help / color / mirror / Atom feed
* [tarantool-patches] [PATCH 0/2] Make LIKE predicate dependent on collation
@ 2019-07-13 22:51 Roman Khabibov
  2019-07-13 22:51 ` [tarantool-patches] [PATCH 1/2] sql: remove "PRAGMA case_sensitive_like" Roman Khabibov
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Roman Khabibov @ 2019-07-13 22:51 UTC (permalink / raw)
  To: tarantool-patches; +Cc: korablev

The first patch removes "PRAGMA case_sensitive_like". The second patch adds
collation comparison to the pattern match function and makes changes to
WHERE x LIKE 'aBc%' optimization.

Roman Khabibov (2):
  sql: remove "PRAGMA case_sensitive_like"
  sql: make LIKE predicate dependent on collation

 src/box/sql/func.c                            | 131 ++++++++----------
 src/box/sql/pragma.c                          |   7 -
 src/box/sql/pragma.h                          | 104 +++++++-------
 src/box/sql/sqlInt.h                          |   9 +-
 src/box/sql/whereexpr.c                       |  70 +++-------
 test/sql-tap/collation.test.lua               |  35 +++--
 ...gh-3251-string-pattern-comparison.test.lua |  90 +++++++++++-
 test/sql-tap/like3.test.lua                   |   4 +-
 test/sql-tap/pragma.test.lua                  |  19 +--
 test/sql-tap/whereG.test.lua                  |   8 +-
 test/sql/sql-debug.result                     |   1 -
 11 files changed, 246 insertions(+), 232 deletions(-)

-- 
Branch: https://github.com/tarantool/tarantool/tree/romanhabibov/gh-3589-like
Issue: https://github.com/tarantool/tarantool/issues/3589

2.20.1 (Apple Git-117)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] [PATCH 1/2] sql: remove "PRAGMA case_sensitive_like"
  2019-07-13 22:51 [tarantool-patches] [PATCH 0/2] Make LIKE predicate dependent on collation Roman Khabibov
@ 2019-07-13 22:51 ` Roman Khabibov
  2019-07-17 16:18   ` [tarantool-patches] " n.pettik
  2019-07-13 22:51 ` [tarantool-patches] [PATCH 2/2] sql: make LIKE predicate dependent on collation Roman Khabibov
  2019-08-01 12:26 ` [tarantool-patches] Re: [PATCH 0/2] Make " Kirill Yukhin
  2 siblings, 1 reply; 9+ messages in thread
From: Roman Khabibov @ 2019-07-13 22:51 UTC (permalink / raw)
  To: tarantool-patches; +Cc: korablev

According to ANSI, LIKE should match characters taking into
account passed collation, so this pragma is no longer needed.

Part of #3589
---
 src/box/sql/func.c           |  34 ------------
 src/box/sql/pragma.c         |   7 ---
 src/box/sql/pragma.h         | 104 ++++++++++++++++-------------------
 src/box/sql/sqlInt.h         |   7 +--
 src/box/sql/whereexpr.c      |   8 +--
 test/sql-tap/pragma.test.lua |  19 +++----
 test/sql/sql-debug.result    |   1 -
 7 files changed, 59 insertions(+), 121 deletions(-)

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 98cad51fd..4e4d14cf7 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1791,40 +1791,6 @@ sqlRegisterPerConnectionBuiltinFunctions(sql * db)
 		sqlOomFault(db);
 }
 
-/*
- * Set the LIKEOPT flag on the 2-argument function with the given name.
- */
-static void
-setLikeOptFlag(sql * db, const char *zName, u8 flagVal)
-{
-	FuncDef *pDef;
-	pDef = sqlFindFunction(db, zName, 2, 0);
-	if (ALWAYS(pDef)) {
-		pDef->funcFlags |= flagVal;
-	}
-}
-
-/**
- * Register the built-in LIKE function.
- */
-void
-sqlRegisterLikeFunctions(sql *db, int is_case_insensitive)
-{
-	/*
-	 * FIXME: after introducing type <BOOLEAN> LIKE must
-	 * return that type: TRUE if the string matches the
-	 * supplied pattern and FALSE otherwise.
-	 */
-	int *is_like_ci = SQL_INT_TO_PTR(is_case_insensitive);
-	sqlCreateFunc(db, "LIKE", FIELD_TYPE_BOOLEAN, 2, 0,
-			  is_like_ci, likeFunc, 0, 0, 0);
-	sqlCreateFunc(db, "LIKE", FIELD_TYPE_BOOLEAN, 3, 0,
-			  is_like_ci, likeFunc, 0, 0, 0);
-	setLikeOptFlag(db, "LIKE",
-		       !(is_case_insensitive) ? (SQL_FUNC_LIKE |
-		       SQL_FUNC_CASE) : SQL_FUNC_LIKE);
-}
-
 int
 sql_is_like_func(struct sql *db, struct Expr *expr, int *is_like_ci)
 {
diff --git a/src/box/sql/pragma.c b/src/box/sql/pragma.c
index 53524b617..293c2cd2e 100644
--- a/src/box/sql/pragma.c
+++ b/src/box/sql/pragma.c
@@ -484,13 +484,6 @@ sqlPragma(Parse * pParse, Token * pId,	/* First part of [schema.]id field */
 					sqlParserTrace(0, 0);
 			}
 #endif
-			/*
-			 * Reinstall the LIKE and functions. The
-			 * variant of LIKE * used will be case
-			 * sensitive or not depending on the RHS.
-			 */
-			if (mask == LIKE_CASE_SENS_FLAG)
-				sqlRegisterLikeFunctions(db, !is_pragma_set);
 		}
 		break;
 	}
diff --git a/src/box/sql/pragma.h b/src/box/sql/pragma.h
index aa7e7cd96..02895b0ea 100644
--- a/src/box/sql/pragma.h
+++ b/src/box/sql/pragma.h
@@ -93,60 +93,57 @@ static const char *const pragCName[] = {
 	/*  55 */ "TEXT",
 	/*  56 */ "match",
 	/*  57 */ "TEXT",
-	/* Used by: case_sensitive_like */
-	/*  58 */ "case_sensitive_like",
-	/*  59 */ "INTEGER",
 	/* Used by: count_changes */
-	/*  60 */ "count_changes",
-	/*  61 */ "INTEGER",
+	/*  58 */ "count_changes",
+	/*  59 */ "INTEGER",
 	/* Used by: defer_foreign_keys */
-	/*  62 */ "defer_foreign_keys",
-	/*  63 */ "INTEGER",
+	/*  60 */ "defer_foreign_keys",
+	/*  61 */ "INTEGER",
 	/* Used by: full_column_names */
-	/*  64 */ "full_column_names",
-	/*  65 */ "INTEGER",
+	/*  62 */ "full_column_names",
+	/*  63 */ "INTEGER",
 	/* Used by: parser_trace */
-	/*  66 */ "parser_trace",
-	/*  67 */ "INTEGER",
+	/*  64 */ "parser_trace",
+	/*  65 */ "INTEGER",
 	/* Used by: recursive_triggers */
-	/*  68 */ "recursive_triggers",
-	/*  69 */ "INTEGER",
+	/*  66 */ "recursive_triggers",
+	/*  67 */ "INTEGER",
 	/* Used by: reverse_unordered_selects */
-	/*  70 */ "reverse_unordered_selects",
-	/*  71 */ "INTEGER",
+	/*  68 */ "reverse_unordered_selects",
+	/*  69 */ "INTEGER",
 	/* Used by: select_trace */
-	/*  72 */ "select_trace",
-	/*  73 */ "INTEGER",
+	/*  70 */ "select_trace",
+	/*  71 */ "INTEGER",
 	/* Used by: short_column_names */
-	/*  74 */ "short_column_names",
-	/*  75 */ "INTEGER",
+	/*  72 */ "short_column_names",
+	/*  73 */ "INTEGER",
 	/* Used by: sql_compound_select_limit */
-	/*  76 */ "sql_compound_select_limit",
-	/*  77 */ "INTEGER",
+	/*  74 */ "sql_compound_select_limit",
+	/*  75 */ "INTEGER",
 	/* Used by: sql_default_engine */
-	/*  78 */ "sql_default_engine",
-	/*  79 */ "TEXT",
+	/*  76 */ "sql_default_engine",
+	/*  77 */ "TEXT",
 	/* Used by: sql_trace */
-	/*  80 */ "sql_trace",
-	/*  81 */ "INTEGER",
+	/*  78 */ "sql_trace",
+	/*  79 */ "INTEGER",
 	/* Used by: vdbe_addoptrace */
-	/*  82 */ "vdbe_addoptrace",
-	/*  83 */ "INTEGER",
+	/*  80 */ "vdbe_addoptrace",
+	/*  81 */ "INTEGER",
 	/* Used by: vdbe_debug */
-	/*  84 */ "vdbe_debug",
-	/*  85 */ "INTEGER",
+	/*  82 */ "vdbe_debug",
+	/*  83 */ "INTEGER",
 	/* Used by: vdbe_eqp */
-	/*  86 */ "vdbe_eqp",
-	/*  87 */ "INTEGER",
+	/*  84 */ "vdbe_eqp",
+	/*  85 */ "INTEGER",
 	/* Used by: vdbe_listing */
-	/*  88 */ "vdbe_listing",
-	/*  89 */ "INTEGER",
+	/*  86 */ "vdbe_listing",
+	/*  87 */ "INTEGER",
 	/* Used by: vdbe_trace */
-	/*  90 */ "vdbe_trace",
-	/*  91 */ "INTEGER",
+	/*  88 */ "vdbe_trace",
+	/*  89 */ "INTEGER",
 	/* Used by: where_trace */
-	/*  92 */ "where_trace",
-	/*  93 */ "INTEGER",
+	/*  90 */ "where_trace",
+	/*  91 */ "INTEGER",
 };
 
 /* Definitions of all built-in pragmas */
@@ -163,11 +160,6 @@ typedef struct PragmaName {
  * to be sorted. For more info see pragma_locate function.
  */
 static const PragmaName aPragmaName[] = {
-	{ /* zName:     */ "case_sensitive_like",
-	 /* ePragTyp:  */ PragTyp_FLAG,
-	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 58, 1,
-	 /* iArg:      */ LIKE_CASE_SENS_FLAG},
 	{ /* zName:     */ "collation_list",
 	 /* ePragTyp:  */ PragTyp_COLLATION_LIST,
 	 /* ePragFlg:  */ PragFlg_Result0,
@@ -210,46 +202,46 @@ static const PragmaName aPragmaName[] = {
 	{ /* zName:     */ "parser_trace",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 66, 1,
+	 /* ColNames:  */ 64, 1,
 	 /* iArg:      */ PARSER_TRACE_FLAG},
 #endif
 	{ /* zName:     */ "recursive_triggers",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 68, 1,
+	 /* ColNames:  */ 66, 1,
 	 /* iArg:      */ SQL_RecTriggers},
 	{ /* zName:     */ "reverse_unordered_selects",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 70, 1,
+	 /* ColNames:  */ 68, 1,
 	 /* iArg:      */ SQL_ReverseOrder},
 #if defined(SQL_DEBUG)
 	{ /* zName:     */ "select_trace",
 	/* ePragTyp:  */ PragTyp_FLAG,
 	/* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	/* ColNames:  */ 72, 1,
+	/* ColNames:  */ 70, 1,
 	/* iArg:      */ SQL_SelectTrace},
 #endif
 	{ /* zName:     */ "short_column_names",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 73, 1,
+	 /* ColNames:  */ 72, 1,
 	 /* iArg:      */ SQL_ShortColNames},
 	{ /* zName:     */ "sql_compound_select_limit",
 	/* ePragTyp:  */ PragTyp_COMPOUND_SELECT_LIMIT,
 	/* ePragFlg:  */ PragFlg_Result0,
-	/* ColNames:  */ 76, 1,
+	/* ColNames:  */ 74, 1,
 	/* iArg:      */ 0},
 	{ /* zName:     */ "sql_default_engine",
 	 /* ePragTyp:  */ PragTyp_DEFAULT_ENGINE,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 78, 1,
+	 /* ColNames:  */ 76, 1,
 	 /* iArg:      */ 0},
 #if defined(SQL_DEBUG)
 	{ /* zName:     */ "sql_trace",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 80, 1,
+	 /* ColNames:  */ 78, 1,
 	 /* iArg:      */ SQL_SqlTrace},
 #endif
 	{ /* zName:     */ "stats",
@@ -268,33 +260,33 @@ static const PragmaName aPragmaName[] = {
 	{ /* zName:     */ "vdbe_addoptrace",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 82, 1,
+	 /* ColNames:  */ 80, 1,
 	 /* iArg:      */ SQL_VdbeAddopTrace},
 	{ /* zName:     */ "vdbe_debug",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 84, 1,
+	 /* ColNames:  */ 82, 1,
 	 /* iArg:      */
 	 SQL_SqlTrace | SQL_VdbeListing | SQL_VdbeTrace},
 	{ /* zName:     */ "vdbe_eqp",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 86, 1,
+	 /* ColNames:  */ 84, 1,
 	 /* iArg:      */ SQL_VdbeEQP},
 	{ /* zName:     */ "vdbe_listing",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 88, 1,
+	 /* ColNames:  */ 86, 1,
 	 /* iArg:      */ SQL_VdbeListing},
 	{ /* zName:     */ "vdbe_trace",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 90, 1,
+	 /* ColNames:  */ 88, 1,
 	 /* iArg:      */ SQL_VdbeTrace},
 	{ /* zName:     */ "where_trace",
 	/* ePragTyp:  */ PragTyp_FLAG,
 	/* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	/* ColNames:  */ 92, 1,
+	/* ColNames:  */ 90, 1,
 	/* iArg:      */ SQL_WhereTrace},
 #endif
 };
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 73dc6e4d7..57da8ec5d 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -1143,8 +1143,6 @@ struct sql {
 /* Debug print info about SQL query as it parsed */
 #define PARSER_TRACE_FLAG  0x00000002
 #define SQL_FullColNames   0x00000004	/* Show full column names on SELECT */
-/* True if LIKE is case sensitive. */
-#define LIKE_CASE_SENS_FLAG 0x00000008
 #define SQL_ShortColNames  0x00000040	/* Show short columns names */
 #define SQL_CountRows      0x00000080	/* Count rows changed by INSERT, */
 					  /*   DELETE, or UPDATE and return */
@@ -1271,7 +1269,6 @@ struct FuncDestructor {
  *     SQL_FUNC_CONSTANT  ==  sql_DETERMINISTIC from the API
  */
 #define SQL_FUNC_LIKE     0x0004	/* Candidate for the LIKE optimization */
-#define SQL_FUNC_CASE     0x0008	/* Case-sensitive LIKE-type function */
 #define SQL_FUNC_EPHEM    0x0010	/* Ephemeral.  Delete with VDBE */
 #define SQL_FUNC_NEEDCOLL 0x0020	/* sqlGetFuncCollSeq() might be called.
 					 * The flag is set when the collation
@@ -4266,8 +4263,6 @@ sql_key_info_unref(struct sql_key_info *key_info);
 struct key_def *
 sql_key_info_to_key_def(struct sql_key_info *key_info);
 
-void sqlRegisterLikeFunctions(sql *, int);
-
 /**
  * Check if the function implements LIKE-style comparison & if it
  * is appropriate to apply a LIKE query optimization.
@@ -4279,7 +4274,7 @@ void sqlRegisterLikeFunctions(sql *, int);
  * @retval 1 if LIKE optimization can be used, 0 otherwise.
  */
 int
-sql_is_like_func(struct sql *db, struct Expr *expr, int *is_like_ci);
+sql_is_like_func(struct sql *db, struct Expr *expr);
 
 int sqlCreateFunc(sql *, const char *, enum field_type,
 		      int, int, void *,
diff --git a/src/box/sql/whereexpr.c b/src/box/sql/whereexpr.c
index a88f96407..0a0e900bd 100644
--- a/src/box/sql/whereexpr.c
+++ b/src/box/sql/whereexpr.c
@@ -237,14 +237,12 @@ operatorMask(int op)
  *                    pattern prefix.
  * @param pisComplete True if the only wildcard is '%' in the
  *                    last character.
- * @param pnoCase     True if case insensitive.
- *
  * @retval True if the given expr is a LIKE operator & is
  *         optimizable using inequality constraints.
  */
 static int
 like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
-			   int *pisComplete, int *pnoCase)
+			   int *pisComplete)
 {
 	/* String on RHS of LIKE operator. */
 	const char *z = 0;
@@ -264,7 +262,7 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
 	/* Result code to return. */
 	int rc;
 
-	if (!sql_is_like_func(db, pExpr, pnoCase)) {
+	if (!sql_is_like_func(db, pExpr)) {
 		return 0;
 	}
 	pList = pExpr->x.pList;
@@ -1156,7 +1154,7 @@ exprAnalyze(SrcList * pSrc,	/* the FROM clause */
 	 */
 	if (pWC->op == TK_AND &&
 	    like_optimization_is_valid(pParse, pExpr, &pStr1,
-				       &isComplete, &noCase)) {
+				       &isComplete)) {
 		Expr *pLeft;
 		/* Copy of pStr1 - RHS of LIKE operator. */
 		Expr *pStr2;
diff --git a/test/sql-tap/pragma.test.lua b/test/sql-tap/pragma.test.lua
index 8221d36e6..d8fb550d0 100755
--- a/test/sql-tap/pragma.test.lua
+++ b/test/sql-tap/pragma.test.lua
@@ -81,21 +81,16 @@ test:do_execsql_test(
 	-- </pragma-3.2>
 })
 
--- Check that "PRAGMA case_sensitive_like" returns its status
--- (0 or 1) if called without parameter.
-test:do_test(
+-- Check that "PRAGMA case_sensitive_like" does not exist.
+test:do_catchsql_test(
 	"pragma-3.3",
-	function()
-		old_value = box.execute('PRAGMA case_sensitive_like').rows
-		box.execute('PRAGMA case_sensitive_like = 1')
-		new_value = box.execute('PRAGMA case_sensitive_like').rows
-		box.execute('PRAGMA case_sensitive_like = '.. old_value[1][1])
-		return new_value[1][1]
-	end,
+	[[
+		PRAGMA case_sensitive_like
+	]], {
 	-- <pragma-3.3>
-	1
+	1, "Pragma 'CASE_SENSITIVE_LIKE' does not exist"
 	-- </pragma-3.3>
-	)
+})
 
 --
 -- gh-3733: remove useless or obsolete pragmas
diff --git a/test/sql/sql-debug.result b/test/sql/sql-debug.result
index 32c65cc81..2dba6844a 100644
--- a/test/sql/sql-debug.result
+++ b/test/sql/sql-debug.result
@@ -38,7 +38,6 @@ box.execute('PRAGMA')
   - name: pragma_value
     type: INTEGER
   rows:
-  - ['case_sensitive_like', 0]
   - ['count_changes', 0]
   - ['defer_foreign_keys', 0]
   - ['full_column_names', 0]
-- 
2.20.1 (Apple Git-117)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] [PATCH 2/2] sql: make LIKE predicate dependent on collation
  2019-07-13 22:51 [tarantool-patches] [PATCH 0/2] Make LIKE predicate dependent on collation Roman Khabibov
  2019-07-13 22:51 ` [tarantool-patches] [PATCH 1/2] sql: remove "PRAGMA case_sensitive_like" Roman Khabibov
@ 2019-07-13 22:51 ` Roman Khabibov
  2019-07-17 16:19   ` [tarantool-patches] " n.pettik
  2019-08-01 12:26 ` [tarantool-patches] Re: [PATCH 0/2] Make " Kirill Yukhin
  2 siblings, 1 reply; 9+ messages in thread
From: Roman Khabibov @ 2019-07-13 22:51 UTC (permalink / raw)
  To: tarantool-patches; +Cc: korablev

According to ANSI, LIKE should match characters taking into
account passed collation.

ISO/IEC JTC 1/SC 32 2011, Part 2: Foundation, page 445

Closes #3589
---
 src/box/sql/func.c                            | 97 +++++++++++--------
 src/box/sql/sqlInt.h                          |  2 +-
 src/box/sql/whereexpr.c                       | 62 ++++--------
 test/sql-tap/collation.test.lua               | 35 +++----
 ...gh-3251-string-pattern-comparison.test.lua | 90 ++++++++++++++++-
 test/sql-tap/like3.test.lua                   |  4 +-
 test/sql-tap/whereG.test.lua                  |  8 +-
 7 files changed, 187 insertions(+), 111 deletions(-)

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 4e4d14cf7..b660d05c5 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -44,6 +44,7 @@
 #include <unicode/ucnv.h>
 #include <unicode/uchar.h>
 #include <unicode/ucol.h>
+#include "box/coll_id_cache.h"
 
 static UConverter* pUtf8conv;
 
@@ -673,6 +674,29 @@ enum pattern_match_status {
 	INVALID_PATTERN = 3
 };
 
+/**
+ * Read an UTF-8 character from @a str and move it to to @a
+ * char_len of that character.
+ *
+ * @param[out] str Ptr to ptr to string.
+ * @param str_end Ptr to string last symbol.
+ * @param[out] char_ptr Ptr to ptr to the UTF-8 character.
+ * @param[out] char_len Ptr to length of the UTF-8 character in
+ * bytes.
+ *
+ * @retval UTF-8 character.
+ */
+static UChar32
+step_utf8_char(const char **str, const char *str_end, const char **char_ptr,
+	       int *char_len)
+{
+	UErrorCode status = U_ZERO_ERROR;
+	*char_ptr = *str;
+	UChar32 next_utf8 = Utf8Read(*str, str_end);
+	*char_len = *str - *char_ptr;
+	return next_utf8;
+}
+
 /**
  * Compare two UTF-8 strings for equality where the first string
  * is a LIKE expression.
@@ -699,7 +723,7 @@ enum pattern_match_status {
  * @param string String being compared.
  * @param pattern_end Ptr to pattern last symbol.
  * @param string_end Ptr to string last symbol.
- * @param is_like_ci true if LIKE is case insensitive.
+ * @param coll Pointer to collation.
  * @param match_other The escape char for LIKE.
  *
  * @retval One of pattern_match_status values.
@@ -709,7 +733,7 @@ sql_utf8_pattern_compare(const char *pattern,
 			 const char *string,
 			 const char *pattern_end,
 			 const char *string_end,
-			 const int is_like_ci,
+			 struct coll *coll,
 			 UChar32 match_other)
 {
 	/* Next pattern and input string chars. */
@@ -717,9 +741,13 @@ sql_utf8_pattern_compare(const char *pattern,
 	/* One past the last escaped input char. */
 	const char *zEscaped = 0;
 	UErrorCode status = U_ZERO_ERROR;
+	const char *pat_char_ptr;
+	const char *str_char_ptr;
+	int pat_char_len;
+	int str_char_len;
 
 	while (pattern < pattern_end) {
-		c = Utf8Read(pattern, pattern_end);
+	c = step_utf8_char(&pattern, pattern_end, &pat_char_ptr, &pat_char_len);
 		if (c == SQL_INVALID_UTF8_SYMBOL)
 			return INVALID_PATTERN;
 		if (c == MATCH_ALL_WILDCARD) {
@@ -730,7 +758,9 @@ sql_utf8_pattern_compare(const char *pattern,
 			 * consume a single character of the
 			 * input string for each "_" skipped.
 			 */
-			while ((c = Utf8Read(pattern, pattern_end)) !=
+			while ((c = step_utf8_char(&pattern, pattern_end,
+						   &pat_char_ptr,
+						   &pat_char_len)) !=
 			       SQL_END_OF_STRING) {
 				if (c == SQL_INVALID_UTF8_SYMBOL)
 					return INVALID_PATTERN;
@@ -751,7 +781,9 @@ sql_utf8_pattern_compare(const char *pattern,
 				return MATCH;
 			}
 			if (c == match_other) {
-				c = Utf8Read(pattern, pattern_end);
+				c = step_utf8_char(&pattern, pattern_end,
+						   &pat_char_ptr,
+						   &pat_char_len);
 				if (c == SQL_INVALID_UTF8_SYMBOL)
 					return INVALID_PATTERN;
 				if (c == SQL_END_OF_STRING)
@@ -773,8 +805,6 @@ sql_utf8_pattern_compare(const char *pattern,
 			 */
 
 			int bMatch;
-			if (is_like_ci)
-				c = u_tolower(c);
 			while (string < string_end){
 				/*
 				 * This loop could have been
@@ -786,21 +816,20 @@ sql_utf8_pattern_compare(const char *pattern,
 				 * lower works better with German
 				 * and Turkish languages.
 				 */
-				c2 = Utf8Read(string, string_end);
+				c2 = step_utf8_char(&string, string_end,
+						    &str_char_ptr,
+						    &str_char_len);
 				if (c2 == SQL_INVALID_UTF8_SYMBOL)
 					return NO_MATCH;
-				if (!is_like_ci) {
-					if (c2 != c)
-						continue;
-				} else {
-					if (c2 != c && u_tolower(c2) != c)
-						continue;
-				}
+				if (coll->cmp(pat_char_ptr, pat_char_len,
+					      str_char_ptr, str_char_len, coll)
+					!= 0)
+					continue;
 				bMatch = sql_utf8_pattern_compare(pattern,
 								  string,
 								  pattern_end,
 								  string_end,
-								  is_like_ci,
+								  coll,
 								  match_other);
 				if (bMatch != NO_MATCH)
 					return bMatch;
@@ -808,30 +837,21 @@ sql_utf8_pattern_compare(const char *pattern,
 			return NO_WILDCARD_MATCH;
 		}
 		if (c == match_other) {
-			c = Utf8Read(pattern, pattern_end);
+			c = step_utf8_char(&pattern, pattern_end, &pat_char_ptr,
+					   &pat_char_len);
 			if (c == SQL_INVALID_UTF8_SYMBOL)
 				return INVALID_PATTERN;
 			if (c == SQL_END_OF_STRING)
 				return NO_MATCH;
 			zEscaped = pattern;
 		}
-		c2 = Utf8Read(string, string_end);
+		c2 = step_utf8_char(&string, string_end, &str_char_ptr,
+				    &str_char_len);
 		if (c2 == SQL_INVALID_UTF8_SYMBOL)
 			return NO_MATCH;
-		if (c == c2)
+		if (coll->cmp(pat_char_ptr, pat_char_len, str_char_ptr,
+			      str_char_len, coll) == 0)
 			continue;
-		if (is_like_ci) {
-			/*
-			 * Small optimization. Reduce number of
-			 * calls to u_tolower function. SQL
-			 * standards suggest use to_upper for
-			 * symbol normalisation. However, using
-			 * to_lower allows to respect Turkish 'İ'
-			 * in default locale.
-			 */
-			if (u_tolower(c) == c2 || c == u_tolower(c2))
-				continue;
-		}
 		if (c == MATCH_ONE_WILDCARD && pattern != zEscaped &&
 		    c2 != SQL_END_OF_STRING)
 			continue;
@@ -847,9 +867,10 @@ sql_utf8_pattern_compare(const char *pattern,
 int
 sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
 {
+	struct coll_id *p = coll_by_name("unicode", strlen("unicode"));
 	return sql_utf8_pattern_compare(zPattern, zStr,
 		                        zPattern + strlen(zPattern),
-		                        zStr + strlen(zStr), 0, esc);
+		                        zStr + strlen(zStr), p->coll, esc);
 }
 
 /**
@@ -859,9 +880,10 @@ sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
 int
 sql_strlike_ci(const char *zPattern, const char *zStr, unsigned int esc)
 {
+	struct coll_id *p = coll_by_name("unicode_ci", strlen("unicode_ci"));
 	return sql_utf8_pattern_compare(zPattern, zStr,
 		                        zPattern + strlen(zPattern),
-		                        zStr + strlen(zStr), 1, esc);
+		                        zStr + strlen(zStr), p->coll, esc);
 }
 
 /**
@@ -883,7 +905,6 @@ likeFunc(sql_context *context, int argc, sql_value **argv)
 	u32 escape = SQL_END_OF_STRING;
 	int nPat;
 	sql *db = sql_context_db_handle(context);
-	int is_like_ci = SQL_PTR_TO_INT(sql_user_data(context));
 	int rhs_type = sql_value_type(argv[0]);
 	int lhs_type = sql_value_type(argv[1]);
 
@@ -940,8 +961,9 @@ likeFunc(sql_context *context, int argc, sql_value **argv)
 	if (!zA || !zB)
 		return;
 	int res;
-	res = sql_utf8_pattern_compare(zB, zA, zB_end, zA_end,
-				       is_like_ci, escape);
+	struct coll *coll = sqlGetFuncCollSeq(context);
+	res = sql_utf8_pattern_compare(zB, zA, zB_end, zA_end, coll, escape);
+
 	if (res == INVALID_PATTERN) {
 		diag_set(ClientError, ER_SQL_EXECUTE, "LIKE pattern can only "\
 			 "contain UTF-8 characters");
@@ -1792,7 +1814,7 @@ sqlRegisterPerConnectionBuiltinFunctions(sql * db)
 }
 
 int
-sql_is_like_func(struct sql *db, struct Expr *expr, int *is_like_ci)
+sql_is_like_func(struct sql *db, struct Expr *expr)
 {
 	if (expr->op != TK_FUNCTION || !expr->x.pList ||
 	    expr->x.pList->nExpr != 2)
@@ -1802,7 +1824,6 @@ sql_is_like_func(struct sql *db, struct Expr *expr, int *is_like_ci)
 	assert(func != NULL);
 	if ((func->funcFlags & SQL_FUNC_LIKE) == 0)
 		return 0;
-	*is_like_ci = (func->funcFlags & SQL_FUNC_CASE) == 0;
 	return 1;
 }
 
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 57da8ec5d..989499abe 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -1356,7 +1356,7 @@ enum trim_side_mask {
   {nArg, SQL_FUNC_SLOCHNG|(bNC*SQL_FUNC_NEEDCOLL), \
    pArg, 0, xFunc, 0, #zName, {SQL_AFF_STRING, {0}}, false}
 #define LIKEFUNC(zName, nArg, arg, flags, type) \
-  {nArg, SQL_FUNC_CONSTANT|flags, \
+  {nArg, SQL_FUNC_NEEDCOLL|SQL_FUNC_CONSTANT|flags, \
    (void *)(SQL_INT_TO_PTR(arg)), 0, likeFunc, 0, #zName, {0}, type, false }
 #define AGGREGATE(zName, nArg, arg, nc, xStep, xFinal, type) \
   {nArg, (nc*SQL_FUNC_NEEDCOLL), \
diff --git a/src/box/sql/whereexpr.c b/src/box/sql/whereexpr.c
index 0a0e900bd..a97e768cf 100644
--- a/src/box/sql/whereexpr.c
+++ b/src/box/sql/whereexpr.c
@@ -228,13 +228,11 @@ operatorMask(int op)
  * In order for the operator to be optimizible, the RHS must be a
  * string literal that does not begin with a wildcard. The LHS
  * must be a column that may only be NULL, a string, or a BLOB,
- * never a number. The collating sequence for the column on the
- * LHS must be appropriate for the operator.
+ * never a number.
  *
  * @param pParse      Parsing and code generating context.
  * @param pExpr       Test this expression.
- * @param ppPrefix    Pointer to TK_STRING expression with
- *                    pattern prefix.
+ * @param ppPrefix    Pointer to expression with pattern prefix.
  * @param pisComplete True if the only wildcard is '%' in the
  *                    last character.
  * @retval True if the given expr is a LIKE operator & is
@@ -276,9 +274,20 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
 		 */
 		return 0;
 	}
+
+	/* Only for "binary" and "unicode_ci" collations. */
+	struct field_def *field_def = pLeft->space_def->fields + pLeft->iColumn;
+	if (field_def->coll_id != 0 && field_def->coll_id != 2 &&
+	    field_def->coll_id != 3)
+		return 0;
 	assert(pLeft->iColumn != (-1));	/* Because IPK never has AFF_TEXT */
 
 	pRight = sqlExprSkipCollate(pList->a[0].pExpr);
+	const char *coll_name = NULL;
+	if (pRight != pList->a[0].pExpr) {
+		assert(pList->a[0].pExpr->op == TK_COLLATE);
+		coll_name = pList->a[0].pExpr->u.zToken;
+	}
 	op = pRight->op;
 	if (op == TK_VARIABLE) {
 		Vdbe *pReprepare = pParse->pReprepare;
@@ -308,6 +317,10 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
 				pParse->is_aborted = true;
 			else
 				pPrefix->u.zToken[cnt] = 0;
+			if (coll_name != NULL)
+				pPrefix = sqlExprAddCollateString(pParse,
+								  pPrefix,
+								  coll_name);
 			*ppPrefix = pPrefix;
 			if (op == TK_VARIABLE) {
 				Vdbe *v = pParse->pVdbe;
@@ -977,8 +990,6 @@ exprAnalyze(SrcList * pSrc,	/* the FROM clause */
 	Expr *pStr1 = 0;
 	/* RHS of LIKE ends with wildcard. */
 	int isComplete = 0;
-	/* uppercase equivalent to lowercase. */
-	int noCase = 0;
 	/* Top-level operator. pExpr->op. */
 	int op;
 	/* Parsing context. */
@@ -1165,59 +1176,22 @@ exprAnalyze(SrcList * pSrc,	/* the FROM clause */
 		const u16 wtFlags = TERM_LIKEOPT | TERM_VIRTUAL | TERM_DYNAMIC;
 
 		pLeft = pExpr->x.pList->a[1].pExpr;
-		pStr2 = sqlExprDup(db, pStr1, 0);
-
-		/*
-		 * Convert the lower bound to upper-case and the
-		 * upper bound to lower-case (upper-case is less
-		 * than lower-case in ASCII) so that the range
-		 * constraints also work for BLOBs.
-		 */
-		if (noCase && !pParse->db->mallocFailed) {
-			int i;
-			char c;
-			pTerm->wtFlags |= TERM_LIKE;
-			for (i = 0; (c = pStr1->u.zToken[i]) != 0; i++) {
-				pStr1->u.zToken[i] = sqlToupper(c);
-				pStr2->u.zToken[i] = sqlTolower(c);
-			}
-		}
+		pStr2 = sqlExprDup(db, sqlExprSkipCollate(pStr1), 0);
 
 		if (!db->mallocFailed) {
 			u8 c, *pC;	/* Last character before the first wildcard */
 			pC = (u8 *) & pStr2->u.
 			    zToken[sqlStrlen30(pStr2->u.zToken) - 1];
 			c = *pC;
-			if (noCase) {
-				/* The point is to increment the last character before the first
-				 * wildcard.  But if we increment '@', that will push it into the
-				 * alphabetic range where case conversions will mess up the
-				 * inequality.  To avoid this, make sure to also run the full
-				 * LIKE on all candidate expressions by clearing the isComplete flag
-				 */
-				if (c == 'A' - 1)
-					isComplete = 0;
-				c = sqlUpperToLower[c];
-			}
 			*pC = c + 1;
 		}
 		pNewExpr1 = sqlExprDup(db, pLeft, 0);
-		if (noCase) {
-			pNewExpr1 =
-				sqlExprAddCollateString(pParse, pNewExpr1,
-							    "unicode_ci");
-		}
 		pNewExpr1 = sqlPExpr(pParse, TK_GE, pNewExpr1, pStr1);
 		transferJoinMarkings(pNewExpr1, pExpr);
 		idxNew1 = whereClauseInsert(pWC, pNewExpr1, wtFlags);
 		testcase(idxNew1 == 0);
 		exprAnalyze(pSrc, pWC, idxNew1);
 		pNewExpr2 = sqlExprDup(db, pLeft, 0);
-		if (noCase) {
-			pNewExpr2 =
-				sqlExprAddCollateString(pParse, pNewExpr2,
-							    "unicode_ci");
-		}
 		pNewExpr2 = sqlPExpr(pParse, TK_LT, pNewExpr2, pStr2);
 		transferJoinMarkings(pNewExpr2, pExpr);
 		idxNew2 = whereClauseInsert(pWC, pNewExpr2, wtFlags);
diff --git a/test/sql-tap/collation.test.lua b/test/sql-tap/collation.test.lua
index 79547361c..9e404282a 100755
--- a/test/sql-tap/collation.test.lua
+++ b/test/sql-tap/collation.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(174)
+test:plan(173)
 
 local prefix = "collation-"
 
@@ -477,13 +477,13 @@ for _, data_collation in ipairs(data_collations) do
     end
 end
 
--- Like uses collation (only for unicode_ci and binary)
+-- <LIKE> uses collation. If <LIKE> has explicit <COLLATE>, use it
+-- instead of implicit.
 local like_testcases =
 {
     {"2.0",
     [[
-        CREATE TABLE tx1 (s1 VARCHAR(5) PRIMARY KEY);
-        CREATE INDEX I1 on tx1(s1 collate "unicode_ci");
+        CREATE TABLE tx1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
         INSERT INTO tx1 VALUES('aaa');
         INSERT INTO tx1 VALUES('Aab');
         INSERT INTO tx1 VALUES('İac');
@@ -491,35 +491,32 @@ local like_testcases =
     ]], {0}},
     {"2.1.1",
         "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
-        {0, {"Aab", "aaa"}} },
+        {0, {"aaa","Aab"}} },
     {"2.1.2",
         "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
-        {0, {0, 0, 0, "SEARCH TABLE TX1 USING COVERING INDEX I1 (S1>? AND S1<?) (~16384 rows)"}}},
+        {0, {0, 0, 0, "SEARCH TABLE TX1 USING PRIMARY KEY (S1>? AND S1<?) (~16384 rows)"}}},
     {"2.2.0",
-        "PRAGMA case_sensitive_like = true;",
-        {0}},
-    {"2.2.1",
-        "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
+        "SELECT * FROM tx1 WHERE s1 LIKE 'A%' COLLATE \"unicode\" order by s1;",
         {0, {"Aab"}} },
-    {"2.2.2",
+    {"2.2.1",
         "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
         {0, {0, 0, 0, "/USING PRIMARY KEY/"}} },
     {"2.3.0",
-        "PRAGMA case_sensitive_like = false;",
-        {0}},
-    {"2.3.1",
         "SELECT * FROM tx1 WHERE s1 LIKE 'i%' order by s1;",
-        {0, {"iad", "İac"}}},
-    {"2.3.2",
-        "SELECT * FROM tx1 WHERE s1 LIKE 'İ%'order by s1;",
-        {0, {"iad", "İac"}} },
+        {0, {"İac", "iad"}}},
+    {"2.3.1",
+        "SELECT * FROM tx1 WHERE s1 LIKE 'İ%' COLLATE \"unicode\" order by s1;",
+        {0, {"İac"}} },
     {"2.4.0",
     [[
         INSERT INTO tx1 VALUES('ЯЁЮ');
     ]], {0} },
     {"2.4.1",
+        "SELECT * FROM tx1 WHERE s1 LIKE 'яёю' COLLATE \"unicode\";",
+        {0, {}} },
+    {"2.4.2",
         "SELECT * FROM tx1 WHERE s1 LIKE 'яёю';",
-        {0, {"ЯЁЮ"}} },
+        {0, {"ЯЁЮ"}} }
 }
 
 test:do_catchsql_set_test(like_testcases, prefix)
diff --git a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
index 95e8c98d0..eb9d7c3b9 100755
--- a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
+++ b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(128)
+test:plan(156)
 
 local prefix = "like-test-"
 
@@ -80,10 +80,10 @@ local like_test_cases =
         {0, {false}} },
     {"1.25",
         "SELECT 'ёфÅŒش' LIKE '%œش';",
-        {0, {true}} },
+        {0, {false}} },
     {"1.26",
         "SELECT 'ÅŒش' LIKE '%œش';",
-        {0, {true}} },
+        {0, {false}} },
     {"1.27",
         "SELECT 'ёф' LIKE 'ё_';",
         {0, {true}} },
@@ -120,6 +120,90 @@ local like_test_cases =
     {"1.38",
         "SELECT 'ÅŒش' LIKE 'ё_%';",
         {0, {false}} },
+    {"1.39",
+        "SELECT 'A' LIKE 'A' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.40",
+        "SELECT 'A' LIKE 'a' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.41",
+        "SELECT 'Ab' COLLATE \"unicode\" LIKE 'ab';",
+        {0, {false}} },
+    {"1.42",
+        "SELECT 'ss' LIKE 'ß' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.43",
+        "SELECT 'Я' LIKE 'я' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.44",
+        "SELECT 'AЯB' LIKE 'AяB' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.45",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.46",
+        "SELECT 'Ab' LIKE '_b' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.47",
+        "SELECT 'A' LIKE '_' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.48",
+        "SELECT 'AB' LIKE '%B' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.49",
+        "SELECT 'A' LIKE 'A' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.50",
+        "SELECT 'A' COLLATE \"unicode_ci\" LIKE 'a';",
+        {0, {true}} },
+    {"1.51",
+        "SELECT 'Я' LIKE 'я' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.52",
+        "SELECT 'AЯB' LIKE 'AяB' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.53",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.54",
+        "SELECT 'Ab' LIKE 'ab' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.55",
+        "SELECT 'ba' LIKE 'ab' COLLATE \"unicode_ci\";",
+        {0, {false}} },
+    {"1.56",
+        "SELECT 'Aaa' LIKE 'A%' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.57",
+        "SELECT 'aaa' LIKE 'A%' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.58",
+        "SELECT 'A' LIKE '_' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.59",
+        "SELECT 'ss' LIKE 'ß' COLLATE \"unicode_de__phonebook_s1\";",
+        {0, {false}} },
+    {"1.60",
+        "SELECT 'ss' LIKE 'ß' COLLATE \"unicode_de__phonebook_s3\";",
+        {0, {false}} },
+    {"1.61",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode_uk_s1\";",
+        {0, {true}} },
+    {"1.62",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode_uk_s3\";",
+        {0, {false}} },
+    {"1.63",
+        "SELECT '%a_' LIKE 'ම%Aම_' COLLATE \"unicode\" ESCAPE 'ම';",
+        {0, {false}} },
+    {"1.64",
+        "SELECT '%a_' LIKE 'ම%Aම_' ESCAPE 'ම' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.65",
+        "SELECT '%_' LIKE 'a%a_' ESCAPE 'A' COLLATE \"unicode_ci\";",
+        {0, {false}} },
+    {"1.66",
+        "SELECT '%_' LIKE 'a%a_' ESCAPE 'a' COLLATE \"unicode_ci\";",
+        {0, {true}} },
 }
 
 test:do_catchsql_set_test(like_test_cases, prefix)
diff --git a/test/sql-tap/like3.test.lua b/test/sql-tap/like3.test.lua
index 0eee25129..e7c25dba6 100755
--- a/test/sql-tap/like3.test.lua
+++ b/test/sql-tap/like3.test.lua
@@ -73,7 +73,7 @@ test:do_execsql_test(
         CREATE TABLE t2(a INT PRIMARY KEY, b TEXT);
         INSERT INTO t2 SELECT a, b FROM t1;
         CREATE INDEX t2ba ON t2(b,a);
-        SELECT a, b FROM t2 WHERE b LIKE 'ab%' ORDER BY +a;
+        SELECT a, b FROM t2 WHERE b LIKE 'ab%' COLLATE "unicode_ci" ORDER BY +a;
     ]], {
         -- <like3-2.0>
         1, "abc", 2, "ABX", 4, "abc", 5, "ABX"
@@ -82,7 +82,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "like3-2.1",
     [[
-        SELECT a, b FROM t2 WHERE +b LIKE 'ab%' ORDER BY +a;
+        SELECT a, b FROM t2 WHERE +b LIKE 'ab%' COLLATE "unicode_ci" ORDER BY +a;
     ]], {
         -- <like3-2.1>
         1, "abc", 2, "ABX", 4, "abc", 5, "ABX"
diff --git a/test/sql-tap/whereG.test.lua b/test/sql-tap/whereG.test.lua
index 177d9d14e..590027023 100755
--- a/test/sql-tap/whereG.test.lua
+++ b/test/sql-tap/whereG.test.lua
@@ -89,7 +89,7 @@ test:do_execsql_test(
     [[
         SELECT DISTINCT aname
           FROM album, composer, track
-         WHERE unlikely(cname LIKE '%bach%')
+         WHERE unlikely(cname LIKE '%bach%' COLLATE "unicode_ci")
            AND composer.cid=track.cid
            AND album.aid=track.aid;
     ]], {
@@ -118,7 +118,7 @@ test:do_execsql_test(
     [[
         SELECT DISTINCT aname
           FROM album, composer, track
-         WHERE likelihood(cname LIKE '%bach%', 0.5)
+         WHERE likelihood(cname LIKE '%bach%' COLLATE "unicode_ci", 0.5)
            AND composer.cid=track.cid
            AND album.aid=track.aid;
     ]], {
@@ -146,7 +146,7 @@ test:do_execsql_test(
     [[
         SELECT DISTINCT aname
           FROM album, composer, track
-         WHERE cname LIKE '%bach%'
+         WHERE cname LIKE '%bach%' COLLATE "unicode_ci"
            AND composer.cid=track.cid
            AND album.aid=track.aid;
     ]], {
@@ -174,7 +174,7 @@ test:do_execsql_test(
     [[
         SELECT DISTINCT aname
           FROM album, composer, track
-         WHERE cname LIKE '%bach%'
+         WHERE cname LIKE '%bach%' COLLATE "unicode_ci"
            AND unlikely(composer.cid=track.cid)
            AND unlikely(album.aid=track.aid);
     ]], {
-- 
2.20.1 (Apple Git-117)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH 1/2] sql: remove "PRAGMA case_sensitive_like"
  2019-07-13 22:51 ` [tarantool-patches] [PATCH 1/2] sql: remove "PRAGMA case_sensitive_like" Roman Khabibov
@ 2019-07-17 16:18   ` n.pettik
  2019-07-24 21:02     ` Roman Khabibov
  0 siblings, 1 reply; 9+ messages in thread
From: n.pettik @ 2019-07-17 16:18 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Roman Khabibov



> On 14 Jul 2019, at 01:51, Roman Khabibov <roman.habibov@tarantool.org> wrote:
> 
> According to ANSI, LIKE should match characters taking into
> account passed collation, so this pragma is no longer needed.

Can’t build this particular patch:

/Users/n.pettik/tarantool/src/box/sql/func.c:1795:1: error: conflicting types for 'sql_is_like_func'
sql_is_like_func(struct sql *db, struct Expr *expr, int *is_like_ci)
^
/Users/n.pettik/tarantool/src/box/sql/sqlInt.h:4277:1: note: previous declaration is here
sql_is_like_func(struct sql *db, struct Expr *expr);
^
/Users/n.pettik/tarantool/src/box/sql/func.c:1805:35: error: use of undeclared identifier 'SQL_FUNC_CASE'
        *is_like_ci = (func->funcFlags & SQL_FUNC_CASE) == 0;
                                         ^
2 errors generated.
make[2]: *** [src/box/sql/CMakeFiles/sql.dir/func.c.o] Error 1
make[2]: *** Waiting for unfinished jobs....
make[1]: *** [src/box/sql/CMakeFiles/sql.dir/all] Error 2
make: *** [all] Error 2

Please, fix compilation errors.

> diff --git a/test/sql-tap/pragma.test.lua b/test/sql-tap/pragma.test.lua
> index 8221d36e6..d8fb550d0 100755
> --- a/test/sql-tap/pragma.test.lua
> +++ b/test/sql-tap/pragma.test.lua
> @@ -81,21 +81,16 @@ test:do_execsql_test(
> 	-- </pragma-3.2>
> })
> 
> --- Check that "PRAGMA case_sensitive_like" returns its status
> --- (0 or 1) if called without parameter.
> -test:do_test(
> +-- Check that "PRAGMA case_sensitive_like" does not exist.
> +test:do_catchsql_test(
> 	"pragma-3.3",
> -	function()
> -		old_value = box.execute('PRAGMA case_sensitive_like').rows
> -		box.execute('PRAGMA case_sensitive_like = 1')
> -		new_value = box.execute('PRAGMA case_sensitive_like').rows
> -		box.execute('PRAGMA case_sensitive_like = '.. old_value[1][1])
> -		return new_value[1][1]
> -	end,
> +	[[
> +		PRAGMA case_sensitive_like
> +	]], {
> 	-- <pragma-3.3>
> -	1
> +	1, "Pragma 'CASE_SENSITIVE_LIKE' does not exist"
> 	-- </pragma-3.3>
> -	)
> +})

Sorry, this test looks weird. Just remove it.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH 2/2] sql: make LIKE predicate dependent on collation
  2019-07-13 22:51 ` [tarantool-patches] [PATCH 2/2] sql: make LIKE predicate dependent on collation Roman Khabibov
@ 2019-07-17 16:19   ` n.pettik
  2019-07-24 21:02     ` Roman Khabibov
  0 siblings, 1 reply; 9+ messages in thread
From: n.pettik @ 2019-07-17 16:19 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Roman Khabibov


> On 14 Jul 2019, at 01:51, Roman Khabibov <roman.habibov@tarantool.org> wrote:
> 
> According to ANSI, LIKE should match characters taking into
> account passed collation.
> 
> ISO/IEC JTC 1/SC 32 2011, Part 2: Foundation, page 445

Nit: it’s likely to be meaningless to indicate exact page;
I’d better outline number of section.

Moreover, I guess this ticket deserves doc request
explaining user-visible changes.

I’ve pushed code-style fixes at the top of your branch.

> 7 files changed, 187 insertions(+), 111 deletions(-)
> 
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index 4e4d14cf7..b660d05c5 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> 
> @@ -699,7 +723,7 @@ enum pattern_match_status {
>  * @param string String being compared.
>  * @param pattern_end Ptr to pattern last symbol.
>  * @param string_end Ptr to string last symbol.
> - * @param is_like_ci true if LIKE is case insensitive.
> + * @param coll Pointer to collation.
>  * @param match_other The escape char for LIKE.
>  *
>  * @retval One of pattern_match_status values.
> @@ -709,7 +733,7 @@ sql_utf8_pattern_compare(const char *pattern,
> 			 const char *string,
> 			 const char *pattern_end,
> 			 const char *string_end,
> -			 const int is_like_ci,
> +			 struct coll *coll,
> 			 UChar32 match_other)
> {
> 	/* Next pattern and input string chars. */
> @@ -717,9 +741,13 @@ sql_utf8_pattern_compare(const char *pattern,
> 	/* One past the last escaped input char. */
> 	const char *zEscaped = 0;
> 	UErrorCode status = U_ZERO_ERROR;
> +	const char *pat_char_ptr;
> +	const char *str_char_ptr;
> +	int pat_char_len;
> +	int str_char_len;

Declaring variables without initialisation is considered
to be bad practise. Also intend to use appropriate types -
in this case size_t.

> 	while (pattern < pattern_end) {
> -		c = Utf8Read(pattern, pattern_end);
> +	c = step_utf8_char(&pattern, pattern_end, &pat_char_ptr, &pat_char_len);

Broken indentation.

> 		if (c == SQL_INVALID_UTF8_SYMBOL)
> 			return INVALID_PATTERN;
> 		if (c == MATCH_ALL_WILDCARD) {
> 
> @@ -847,9 +867,10 @@ sql_utf8_pattern_compare(const char *pattern,
> int
> sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
> {

I guess you can remove both strike_cs and strlike_ci functions.

> +	struct coll_id *p = coll_by_name("unicode", strlen("unicode"));
> 	return sql_utf8_pattern_compare(zPattern, zStr,
> 		                        zPattern + strlen(zPattern),
> -		                        zStr + strlen(zStr), 0, esc);
> +		                        zStr + strlen(zStr), p->coll, esc);
> }
> 
> @@ -228,13 +228,11 @@ operatorMask(int op)
>  * In order for the operator to be optimizible, the RHS must be a
>  * string literal that does not begin with a wildcard. The LHS
>  * must be a column that may only be NULL, a string, or a BLOB,
> - * never a number. The collating sequence for the column on the
> - * LHS must be appropriate for the operator.
> + * never a number.
>  *
>  * @param pParse      Parsing and code generating context.
>  * @param pExpr       Test this expression.
> - * @param ppPrefix    Pointer to TK_STRING expression with
> - *                    pattern prefix.
> + * @param ppPrefix    Pointer to expression with pattern prefix.

If smth has changed, please underline this in comment.

>  * @param pisComplete True if the only wildcard is '%' in the
>  *                    last character.
>  * @retval True if the given expr is a LIKE operator & is
> @@ -276,9 +274,20 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
> 		 */
> 		return 0;
> 	}
> +
> +	/* Only for "binary" and "unicode_ci" collations. */

Why only two collations? Why not “unicode” for instance?
What if collation is a part of expression, not field:
… a COLLATE “unicode” LIKE … ?

> +	struct field_def *field_def = pLeft->space_def->fields + pLeft->iColumn;
> +	if (field_def->coll_id != 0 && field_def->coll_id != 2 &&
> +	    field_def->coll_id != 3)

It’s definitively bad practice to rely on collation’s id.

> +		return 0;
> 	assert(pLeft->iColumn != (-1));	/* Because IPK never has AFF_TEXT */
> 
> 	pRight = sqlExprSkipCollate(pList->a[0].pExpr);
> +	const char *coll_name = NULL;
> +	if (pRight != pList->a[0].pExpr) {
> +		assert(pList->a[0].pExpr->op == TK_COLLATE);
> +		coll_name = pList->a[0].pExpr->u.zToken;

Please, accompany this snippet with comment.

> +	}
> 	op = pRight->op;
> 	if (op == TK_VARIABLE) {
> 		Vdbe *pReprepare = pParse->pReprepare;
> @@ -308,6 +317,10 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
> 				pParse->is_aborted = true;
> 			else
> 				pPrefix->u.zToken[cnt] = 0;
> +			if (coll_name != NULL)
> +				pPrefix = sqlExprAddCollateString(pParse,
> +								  pPrefix,
> +								  coll_name);

The same: I don’t understand what’s going on here.
Please, add explanation to the code.

> @@ -977,8 +990,6 @@ exprAnalyze(SrcList * pSrc,	/* the FROM clause */
> 	Expr *pStr1 = 0;
> 	/* RHS of LIKE ends with wildcard. */
> 	int isComplete = 0;
> -	/* uppercase equivalent to lowercase. */
> -	int noCase = 0;
> 	/* Top-level operator. pExpr->op. */
> 	int op;
> 	/* Parsing context. */
> @@ -1165,59 +1176,22 @@ exprAnalyze(SrcList * pSrc,	/* the FROM clause */
> 		const u16 wtFlags = TERM_LIKEOPT | TERM_VIRTUAL | TERM_DYNAMIC;
> 
> 		pLeft = pExpr->x.pList->a[1].pExpr;
> -		pStr2 = sqlExprDup(db, pStr1, 0);
> -
> -		/*
> -		 * Convert the lower bound to upper-case and the
> -		 * upper bound to lower-case (upper-case is less
> -		 * than lower-case in ASCII) so that the range
> -		 * constraints also work for BLOBs.
> -		 */
> -		if (noCase && !pParse->db->mallocFailed) {
> -			int i;
> -			char c;
> -			pTerm->wtFlags |= TERM_LIKE;
> -			for (i = 0; (c = pStr1->u.zToken[i]) != 0; i++) {
> -				pStr1->u.zToken[i] = sqlToupper(c);
> -				pStr2->u.zToken[i] = sqlTolower(c);
> -			}
> -		}
> +		pStr2 = sqlExprDup(db, sqlExprSkipCollate(pStr1), 0);
> 
> 		if (!db->mallocFailed) {
> 			u8 c, *pC;	/* Last character before the first wildcard */
> 			pC = (u8 *) & pStr2->u.
> 			    zToken[sqlStrlen30(pStr2->u.zToken) - 1];
> 			c = *pC;
> -			if (noCase) {
> -				/* The point is to increment the last character before the first
> -				 * wildcard.  But if we increment '@', that will push it into the
> -				 * alphabetic range where case conversions will mess up the
> -				 * inequality.  To avoid this, make sure to also run the full
> -				 * LIKE on all candidate expressions by clearing the isComplete flag
> -				 */
> -				if (c == 'A' - 1)
> -					isComplete = 0;
> -				c = sqlUpperToLower[c];
> -			}

Are you sure that this code removal wouldn’t broke smth?

> 			*pC = c + 1;
> 		}
> 		pNewExpr1 = sqlExprDup(db, pLeft, 0);
> -		if (noCase) {
> -			pNewExpr1 =
> -				sqlExprAddCollateString(pParse, pNewExpr1,
> -							    "unicode_ci");
> -		}
> 		pNewExpr1 = sqlPExpr(pParse, TK_GE, pNewExpr1, pStr1);
> 		transferJoinMarkings(pNewExpr1, pExpr);
> 		idxNew1 = whereClauseInsert(pWC, pNewExpr1, wtFlags);
> 		testcase(idxNew1 == 0);
> 		exprAnalyze(pSrc, pWC, idxNew1);
> 		pNewExpr2 = sqlExprDup(db, pLeft, 0);
> -		if (noCase) {
> -			pNewExpr2 =
> -				sqlExprAddCollateString(pParse, pNewExpr2,
> -							    "unicode_ci");
> -		}
> 		pNewExpr2 = sqlPExpr(pParse, TK_LT, pNewExpr2, pStr2);
> 		transferJoinMarkings(pNewExpr2, pExpr);
> 		idxNew2 = whereClauseInsert(pWC, pNewExpr2, wtFlags);
> diff --git a/test/sql-tap/collation.test.lua b/test/sql-tap/collation.test.lua
> index 79547361c..9e404282a 100755
> --- a/test/sql-tap/collation.test.lua
> +++ b/test/sql-tap/collation.test.lua
> @@ -1,6 +1,6 @@
> #!/usr/bin/env tarantool
> test = require("sqltester")
> -test:plan(174)
> +test:plan(173)

Please add test verifying that arguments of incompatible
collations can’t participate in LIKE predicate.

> 
> local prefix = "collation-"
> 
> @@ -477,13 +477,13 @@ for _, data_collation in ipairs(data_collations) do
>     end
> end
> 
> --- Like uses collation (only for unicode_ci and binary)
> +-- <LIKE> uses collation. If <LIKE> has explicit <COLLATE>, use it
> +-- instead of implicit.
> local like_testcases =
> {
>     {"2.0",
>     [[
> -        CREATE TABLE tx1 (s1 VARCHAR(5) PRIMARY KEY);
> -        CREATE INDEX I1 on tx1(s1 collate "unicode_ci”);

Why did you drop index?

> +        CREATE TABLE tx1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
>         INSERT INTO tx1 VALUES('aaa');
>         INSERT INTO tx1 VALUES('Aab');
>         INSERT INTO tx1 VALUES('İac');
> @@ -491,35 +491,32 @@ local like_testcases =
>     ]], {0}},
>     {"2.1.1",
>         "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
> -        {0, {"Aab", "aaa"}} },
> +        {0, {"aaa","Aab"}} },

Why order of results has changed?

>     {"2.1.2",
>         "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
> -        {0, {0, 0, 0, "SEARCH TABLE TX1 USING COVERING INDEX I1 (S1>? AND S1<?) (~16384 rows)"}}},
> +        {0, {0, 0, 0, "SEARCH TABLE TX1 USING PRIMARY KEY (S1>? AND S1<?) (~16384 rows)"}}},

I guess this is due to dropped index.

>     {"2.2.0",
> -        "PRAGMA case_sensitive_like = true;",
> -        {0}},
> -    {"2.2.1",
> -        "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
> +        "SELECT * FROM tx1 WHERE s1 LIKE 'A%' COLLATE \"unicode\" order by s1;",
>         {0, {"Aab"}} },
> -    {"2.2.2",
> +    {"2.2.1",
>         "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
>         {0, {0, 0, 0, "/USING PRIMARY KEY/"}} },
>     {"2.3.0",
> -        "PRAGMA case_sensitive_like = false;",
> -        {0}},
> -    {"2.3.1",
>         "SELECT * FROM tx1 WHERE s1 LIKE 'i%' order by s1;",
> -        {0, {"iad", "İac"}}},
> -    {"2.3.2",
> -        "SELECT * FROM tx1 WHERE s1 LIKE 'İ%'order by s1;",
> -        {0, {"iad", "İac"}} },
> +        {0, {"İac", "iad"}}},
> +    {"2.3.1",
> +        "SELECT * FROM tx1 WHERE s1 LIKE 'İ%' COLLATE \"unicode\" order by s1;",
> +        {0, {"İac"}} },
>     {"2.4.0",
>     [[
>         INSERT INTO tx1 VALUES('ЯЁЮ');
>     ]], {0} },
>     {"2.4.1",
> +        "SELECT * FROM tx1 WHERE s1 LIKE 'яёю' COLLATE \"unicode\";",
> +        {0, {}} },
> +    {"2.4.2",
>         "SELECT * FROM tx1 WHERE s1 LIKE 'яёю';",
> -        {0, {"ЯЁЮ"}} },
> +        {0, {"ЯЁЮ"}} }

Stray diff.

> }
> 
> test:do_catchsql_set_test(like_testcases, prefix)
> diff --git a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
> index 95e8c98d0..eb9d7c3b9 100755
> --- a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
> +++ b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
> @@ -1,6 +1,6 @@
> #!/usr/bin/env tarantool
> test = require("sqltester")
> -test:plan(128)
> +test:plan(156)
> 
> local prefix = "like-test-"
> 
> @@ -80,10 +80,10 @@ local like_test_cases =
>         {0, {false}} },
>     {"1.25",
>         "SELECT 'ёфÅŒش' LIKE '%œش';",
> -        {0, {true}} },
> +        {0, {false}} },

Could you add an appropriate collation to avoid fixing
test results?

>     {"1.26",
>         "SELECT 'ÅŒش' LIKE '%œش';",
> -        {0, {true}} },
> +        {0, {false}} },
>     {"1.27",
>         "SELECT 'ёф' LIKE 'ё_';",
>         {0, {true}} },
> @@ -120,6 +120,90 @@ local like_test_cases =
>     {"1.38",
>         "SELECT 'ÅŒش' LIKE 'ё_%';",
>         {0, {false}} },
> +    {"1.39",
> +        "SELECT 'A' LIKE 'A' COLLATE \"unicode\";",
> +        {0, {true}} },

Please, add not only tests involving constant literals, but
also columns with implicit/explicit collations.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH 2/2] sql: make LIKE predicate dependent on collation
  2019-07-17 16:19   ` [tarantool-patches] " n.pettik
@ 2019-07-24 21:02     ` Roman Khabibov
  2019-08-01 10:27       ` n.pettik
  0 siblings, 1 reply; 9+ messages in thread
From: Roman Khabibov @ 2019-07-24 21:02 UTC (permalink / raw)
  To: tarantool-patches; +Cc: n. pettik

Hi! Thanks for the review.

> On Jul 17, 2019, at 19:19, n.pettik <korablev@tarantool.org> wrote:
> 
> 
>> On 14 Jul 2019, at 01:51, Roman Khabibov <roman.habibov@tarantool.org> wrote:
>> 
>> According to ANSI, LIKE should match characters taking into
>> account passed collation.
>> 
>> ISO/IEC JTC 1/SC 32 2011, Part 2: Foundation, page 445
> 
> Nit: it’s likely to be meaningless to indicate exact page;
> I’d better outline number of section.
> 
> Moreover, I guess this ticket deserves doc request
> explaining user-visible changes.
Done.

> I’ve pushed code-style fixes at the top of your branch.
> 
>> 7 files changed, 187 insertions(+), 111 deletions(-)
>> 
>> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
>> index 4e4d14cf7..b660d05c5 100644
>> --- a/src/box/sql/func.c
>> +++ b/src/box/sql/func.c
>> 
>> @@ -699,7 +723,7 @@ enum pattern_match_status {
>> * @param string String being compared.
>> * @param pattern_end Ptr to pattern last symbol.
>> * @param string_end Ptr to string last symbol.
>> - * @param is_like_ci true if LIKE is case insensitive.
>> + * @param coll Pointer to collation.
>> * @param match_other The escape char for LIKE.
>> *
>> * @retval One of pattern_match_status values.
>> @@ -709,7 +733,7 @@ sql_utf8_pattern_compare(const char *pattern,
>> 			 const char *string,
>> 			 const char *pattern_end,
>> 			 const char *string_end,
>> -			 const int is_like_ci,
>> +			 struct coll *coll,
>> 			 UChar32 match_other)
>> {
>> 	/* Next pattern and input string chars. */
>> @@ -717,9 +741,13 @@ sql_utf8_pattern_compare(const char *pattern,
>> 	/* One past the last escaped input char. */
>> 	const char *zEscaped = 0;
>> 	UErrorCode status = U_ZERO_ERROR;
>> +	const char *pat_char_ptr;
>> +	const char *str_char_ptr;
>> +	int pat_char_len;
>> +	int str_char_len;
> 
> Declaring variables without initialisation is considered
> to be bad practise. Also intend to use appropriate types -
> in this case size_t.
> 
>> 	while (pattern < pattern_end) {
>> -		c = Utf8Read(pattern, pattern_end);
>> +	c = step_utf8_char(&pattern, pattern_end, &pat_char_ptr, &pat_char_len);
> 
> Broken indentation.
Thank you.

>> 		if (c == SQL_INVALID_UTF8_SYMBOL)
>> 			return INVALID_PATTERN;
>> 		if (c == MATCH_ALL_WILDCARD) {
>> 
>> @@ -847,9 +867,10 @@ sql_utf8_pattern_compare(const char *pattern,
>> int
>> sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
>> {
> 
> I guess you can remove both strike_cs and strlike_ci functions.
Maybe in 2.3.0.
r:tarantool r.khabibov$ grep -r strlike_c $HOME/tarantool/src
/Users/r.khabibov/tarantool/src/box/sql/analyze.c:		if (sql_strlike_cs("unordered%", z, '[') == 0)
/Users/r.khabibov/tarantool/src/box/sql/analyze.c:		else if (sql_strlike_cs("noskipscan%", z, '[') == 0)
/Users/r.khabibov/tarantool/src/box/sql/vdbe.c:		|| sql_strlike_ci("DELETE%", p->zSql, 0) != 0

> 
>> +	struct coll_id *p = coll_by_name("unicode", strlen("unicode"));
>> 	return sql_utf8_pattern_compare(zPattern, zStr,
>> 		                        zPattern + strlen(zPattern),
>> -		                        zStr + strlen(zStr), 0, esc);
>> +		                        zStr + strlen(zStr), p->coll, esc);
>> }
>> 
>> @@ -228,13 +228,11 @@ operatorMask(int op)
>> * In order for the operator to be optimizible, the RHS must be a
>> * string literal that does not begin with a wildcard. The LHS
>> * must be a column that may only be NULL, a string, or a BLOB,
>> - * never a number. The collating sequence for the column on the
>> - * LHS must be appropriate for the operator.
>> + * never a number.
>> *
>> * @param pParse      Parsing and code generating context.
>> * @param pExpr       Test this expression.
>> - * @param ppPrefix    Pointer to TK_STRING expression with
>> - *                    pattern prefix.
>> + * @param ppPrefix    Pointer to expression with pattern prefix.
> 
> If smth has changed, please underline this in comment.
> 
>> * @param pisComplete True if the only wildcard is '%' in the
>> *                    last character.
>> * @retval True if the given expr is a LIKE operator & is
>> @@ -276,9 +274,20 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
>> 		 */
>> 		return 0;
>> 	}
>> +
>> +	/* Only for "binary" and "unicode_ci" collations. */
> 
> Why only two collations? Why not “unicode” for instance?
> What if collation is a part of expression, not field:
> … a COLLATE “unicode” LIKE … ?
For instance, we consider the following query '... WHERE x LIKE "A%" COLLATE "unicode"' (strength - tertiary). The optimization will be: "A" <= x < "B". Let's take x == "aaa". Comparison of "aaa" and "A" gives us '1'. Comparison of  "aaa" and "B" gives us '-1'. In other words, "A" <= "aaa" < "B", but it is a bad result, because "aaa" is not LIKE "A%" COLLATE "unicode".
Now, let's take "unicode_ci" as the collation (strength - primary).
Comparison of "aaa" and "B" gives us '-1'. Comparison of "aaa" and "A" gives us '1'.  "A" <= "aaa" < "B". Here, we may accept this result.

As I understand, it depends on a collation strength. We might check collation strength and
filter appropriate collations this way (in 2.3.0), but I’m not sure 100%. I have chosen
“binary” and “unicode”, because they most frequently used, IMHO.

>> +	struct field_def *field_def = pLeft->space_def->fields + pLeft->iColumn;
>> +	if (field_def->coll_id != 0 && field_def->coll_id != 2 &&
>> +	    field_def->coll_id != 3)
> 
> It’s definitively bad practice to rely on collation’s id.
> 
>> +		return 0;
>> 	assert(pLeft->iColumn != (-1));	/* Because IPK never has AFF_TEXT */
>> 
>> 	pRight = sqlExprSkipCollate(pList->a[0].pExpr);
>> +	const char *coll_name = NULL;
>> +	if (pRight != pList->a[0].pExpr) {
>> +		assert(pList->a[0].pExpr->op == TK_COLLATE);
>> +		coll_name = pList->a[0].pExpr->u.zToken;
> 
> Please, accompany this snippet with comment.
> 
>> +	}
>> 	op = pRight->op;
>> 	if (op == TK_VARIABLE) {
>> 		Vdbe *pReprepare = pParse->pReprepare;
>> @@ -308,6 +317,10 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
>> 				pParse->is_aborted = true;
>> 			else
>> 				pPrefix->u.zToken[cnt] = 0;
>> +			if (coll_name != NULL)
>> +				pPrefix = sqlExprAddCollateString(pParse,
>> +								  pPrefix,
>> +								  coll_name);
> 
> The same: I don’t understand what’s going on here.
> Please, add explanation to the code.
+/**
+ * Skip TK_COLLATE in @a expr and save root collation name to @a
+ * coll_name if any.
+ *
+ * Wrapper for sqlExprSkipCollate().
+ *
+ * @param expr Expression.
+ * @param[out] coll_name Collation name.
+ * @retval Pointer to non-collate node.
+ */
+static struct Expr*
+skip_coll_and_get_name(struct Expr* expr, const char **coll_name)
+{
+	struct Expr *ret = sqlExprSkipCollate(expr);
+	if (ret != expr) {
+		assert(expr->op == TK_COLLATE);
+		*coll_name = expr->u.zToken;
+	} else {
+		*coll_name = NULL;
+	}
+	return ret;
+}
+
 /**
  * Check to see if the given expression is a LIKE operator that
  * can be optimized using inequality constraints.
@@ -228,13 +251,11 @@ operatorMask(int op)
  * In order for the operator to be optimizible, the RHS must be a
  * string literal that does not begin with a wildcard. The LHS
  * must be a column that may only be NULL, a string, or a BLOB,
- * never a number. The collating sequence for the column on the
- * LHS must be appropriate for the operator.
+ * never a number.
  *
  * @param pParse      Parsing and code generating context.
  * @param pExpr       Test this expression.
- * @param ppPrefix    Pointer to TK_STRING expression with
- *                    pattern prefix.
+ * @param ppPrefix    Pointer to expression with pattern prefix.
  * @param pisComplete True if the only wildcard is '%' in the
  *                    last character.
  * @retval True if the given expr is a LIKE operator & is
@@ -266,7 +287,9 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
 		return 0;
 	}
 	pList = pExpr->x.pList;
-	pLeft = pList->a[1].pExpr;
+	const char *l_coll_name = NULL;
+	pLeft = skip_coll_and_get_name(pList->a[1].pExpr, &l_coll_name);
+
 	/* Value might be numeric */
 	if (pLeft->op != TK_COLUMN ||
 	    sql_expr_type(pLeft) != FIELD_TYPE_STRING) {
@@ -278,7 +301,34 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
 	}
 	assert(pLeft->iColumn != (-1));	/* Because IPK never has AFF_TEXT */
 
-	pRight = sqlExprSkipCollate(pList->a[0].pExpr);
+	const char *r_coll_name = NULL;
+	pRight = skip_coll_and_get_name(pList->a[0].pExpr, &r_coll_name);
+
+	/* Only for "binary" and "unicode_ci" collations. */
+	if (r_coll_name != NULL) {
+		if (strcmp(r_coll_name, "binary") != 0 &&
+		    strcmp(r_coll_name, "unicode_ci") != 0)
+			return 0;
+	} else if (l_coll_name != NULL) {
+		if (strcmp(l_coll_name, "binary") != 0 &&
+		    strcmp(l_coll_name, "unicode_ci") != 0)
+			return 0;
+	} else {
+		/*
+		 * If both arguments haven't <COLLATE> then we
+		 * should check implicit collation.
+		 */
+		struct field_def *field_def = pLeft->space_def->fields +
+					      pLeft->iColumn;
+		uint32_t none_id = coll_by_name("none", strlen("none"))->id;
+		uint32_t u_ci_id =
+			coll_by_name("unicode_ci", strlen("unicode_ci"))->id;
+		uint32_t bin_id = coll_by_name("binary", strlen("binary"))->id;
+		if (field_def->coll_id != none_id && field_def->coll_id !=
+		    u_ci_id && field_def->coll_id != bin_id)
+			return 0;
+	}
+
 	op = pRight->op;
 	if (op == TK_VARIABLE) {
 		Vdbe *pReprepare = pParse->pReprepare;
@@ -308,6 +358,15 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
 				pParse->is_aborted = true;
 			else
 				pPrefix->u.zToken[cnt] = 0;
+			/*
+			 * If <COLLATE> was typed to <LIKE>'s RHS
+			 * add it result expression.
+			 */
+			if (r_coll_name != NULL)
+				pPrefix =
+					sqlExprAddCollateString(pParse,
+								pPrefix,
+								r_coll_name);
 			*ppPrefix = pPrefix;
 			if (op == TK_VARIABLE) {
 				Vdbe *v = pParse->pVdbe;

>> @@ -977,8 +990,6 @@ exprAnalyze(SrcList * pSrc,	/* the FROM clause */
>> 	Expr *pStr1 = 0;
>> 	/* RHS of LIKE ends with wildcard. */
>> 	int isComplete = 0;
>> -	/* uppercase equivalent to lowercase. */
>> -	int noCase = 0;
>> 	/* Top-level operator. pExpr->op. */
>> 	int op;
>> 	/* Parsing context. */
>> @@ -1165,59 +1176,22 @@ exprAnalyze(SrcList * pSrc,	/* the FROM clause */
>> 		const u16 wtFlags = TERM_LIKEOPT | TERM_VIRTUAL | TERM_DYNAMIC;
>> 
>> 		pLeft = pExpr->x.pList->a[1].pExpr;
>> -		pStr2 = sqlExprDup(db, pStr1, 0);
>> -
>> -		/*
>> -		 * Convert the lower bound to upper-case and the
>> -		 * upper bound to lower-case (upper-case is less
>> -		 * than lower-case in ASCII) so that the range
>> -		 * constraints also work for BLOBs.
>> -		 */
>> -		if (noCase && !pParse->db->mallocFailed) {
>> -			int i;
>> -			char c;
>> -			pTerm->wtFlags |= TERM_LIKE;
>> -			for (i = 0; (c = pStr1->u.zToken[i]) != 0; i++) {
>> -				pStr1->u.zToken[i] = sqlToupper(c);
>> -				pStr2->u.zToken[i] = sqlTolower(c);
>> -			}
>> -		}
>> +		pStr2 = sqlExprDup(db, sqlExprSkipCollate(pStr1), 0);
>> 
>> 		if (!db->mallocFailed) {
>> 			u8 c, *pC;	/* Last character before the first wildcard */
>> 			pC = (u8 *) & pStr2->u.
>> 			    zToken[sqlStrlen30(pStr2->u.zToken) - 1];
>> 			c = *pC;
>> -			if (noCase) {
>> -				/* The point is to increment the last character before the first
>> -				 * wildcard.  But if we increment '@', that will push it into the
>> -				 * alphabetic range where case conversions will mess up the
>> -				 * inequality.  To avoid this, make sure to also run the full
>> -				 * LIKE on all candidate expressions by clearing the isComplete flag
>> -				 */
>> -				if (c == 'A' - 1)
>> -					isComplete = 0;
>> -				c = sqlUpperToLower[c];
>> -			}
> 
> Are you sure that this code removal wouldn’t broke smth?
Now, comparison is occurred with collations, so we don’t need to translate characters to
upper or lower case.

>> 			*pC = c + 1;
>> 		}
>> 		pNewExpr1 = sqlExprDup(db, pLeft, 0);
>> -		if (noCase) {
>> -			pNewExpr1 =
>> -				sqlExprAddCollateString(pParse, pNewExpr1,
>> -							    "unicode_ci");
>> -		}
>> 		pNewExpr1 = sqlPExpr(pParse, TK_GE, pNewExpr1, pStr1);
>> 		transferJoinMarkings(pNewExpr1, pExpr);
>> 		idxNew1 = whereClauseInsert(pWC, pNewExpr1, wtFlags);
>> 		testcase(idxNew1 == 0);
>> 		exprAnalyze(pSrc, pWC, idxNew1);
>> 		pNewExpr2 = sqlExprDup(db, pLeft, 0);
>> -		if (noCase) {
>> -			pNewExpr2 =
>> -				sqlExprAddCollateString(pParse, pNewExpr2,
>> -							    "unicode_ci");
>> -		}
>> 		pNewExpr2 = sqlPExpr(pParse, TK_LT, pNewExpr2, pStr2);
>> 		transferJoinMarkings(pNewExpr2, pExpr);
>> 		idxNew2 = whereClauseInsert(pWC, pNewExpr2, wtFlags);
>> diff --git a/test/sql-tap/collation.test.lua b/test/sql-tap/collation.test.lua
>> index 79547361c..9e404282a 100755
>> --- a/test/sql-tap/collation.test.lua
>> +++ b/test/sql-tap/collation.test.lua
>> @@ -1,6 +1,6 @@
>> #!/usr/bin/env tarantool
>> test = require("sqltester")
>> -test:plan(174)
>> +test:plan(173)
> 
> Please add test verifying that arguments of incompatible
> collations can’t participate in LIKE predicate.
diff --git a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
index 95e8c98d0..641469211 100755
--- a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
+++ b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(128)
+test:plan(162)
 
 local prefix = "like-test-"
 
@@ -79,10 +79,10 @@ local like_test_cases =
         "SELECT 'ёф' LIKE '%œش';",
         {0, {false}} },
     {"1.25",
-        "SELECT 'ёфÅŒش' LIKE '%œش';",
+        "SELECT 'ёфÅŒش' LIKE '%œش' COLLATE \"unicode_ci\";",
         {0, {true}} },
     {"1.26",
-        "SELECT 'ÅŒش' LIKE '%œش';",
+        "SELECT 'ÅŒش' LIKE '%œش' COLLATE \"unicode_ci\";",
         {0, {true}} },
     {"1.27",
         "SELECT 'ёф' LIKE 'ё_';",
@@ -120,6 +120,108 @@ local like_test_cases =
     {"1.38",
         "SELECT 'ÅŒش' LIKE 'ё_%';",
         {0, {false}} },
+    {"1.39",
+        "SELECT 'A' LIKE 'A' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.40",
+        "SELECT 'A' LIKE 'a' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.41",
+        "SELECT 'Ab' COLLATE \"unicode\" LIKE 'ab';",
+        {0, {false}} },
+    {"1.42",
+        "SELECT 'ss' LIKE 'ß' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.43",
+        "SELECT 'Я' LIKE 'я' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.44",
+        "SELECT 'AЯB' LIKE 'AяB' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.45",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.46",
+        "SELECT 'Ab' LIKE '_b' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.47",
+        "SELECT 'A' LIKE '_' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.48",
+        "SELECT 'AB' LIKE '%B' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.49",
+        "SELECT 'A' LIKE 'A' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.50",
+        "SELECT 'A' COLLATE \"unicode_ci\" LIKE 'a';",
+        {0, {true}} },
+    {"1.51",
+        "SELECT 'Я' LIKE 'я' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.52",
+        "SELECT 'AЯB' LIKE 'AяB' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.53",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.54",
+        "SELECT 'Ab' LIKE 'ab' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.55",
+        "SELECT 'ba' LIKE 'ab' COLLATE \"unicode_ci\";",
+        {0, {false}} },
+    {"1.56",
+        "SELECT 'Aaa' LIKE 'A%' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.57",
+        "SELECT 'aaa' LIKE 'A%' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.58",
+        "SELECT 'A' LIKE '_' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.59",
+        "SELECT 'ss' LIKE 'ß' COLLATE \"unicode_de__phonebook_s1\";",
+        {0, {false}} },
+    {"1.60",
+        "SELECT 'ss' LIKE 'ß' COLLATE \"unicode_de__phonebook_s3\";",
+        {0, {false}} },
+    {"1.61",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode_uk_s1\";",
+        {0, {true}} },
+    {"1.62",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode_uk_s3\";",
+        {0, {false}} },
+    {"1.63",
+        "SELECT 'Ї' COLLATE \"unicode_uk_s3\" LIKE 'ї' COLLATE \"unicode_uk_s3\";",
+        {0, {false}} },
+    {"1.64",
+        "SELECT '%a_' LIKE 'ම%Aම_' COLLATE \"unicode\" ESCAPE 'ම';",
+        {0, {false}} },
+    {"1.65",
+        "SELECT '%a_' COLLATE \"unicode\" LIKE 'ම%Aම_' COLLATE \"unicode\" ESCAPE 'ම' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.66",
+        "SELECT '%a_' LIKE 'ම%Aම_' ESCAPE 'ම' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.67",
+        "SELECT '%_' LIKE 'a%a_' ESCAPE 'A' COLLATE \"unicode_ci\";",
+        {0, {false}} },
+    {"1.68",
+        "SELECT '%_' LIKE 'a%a_' ESCAPE 'a' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.69",
+        "SELECT 'Ї' COLLATE \"unicode\" LIKE 'ї' COLLATE \"unicode_uk_s3\";",
+        {1, "Illegal mix of collations"} },
+    {"1.70",
+        "SELECT '%a_' COLLATE \"unicode_ci\" LIKE 'ම%Aම_' COLLATE \"unicode\" ESCAPE 'ම';",
+        {1, "Illegal mix of collations"} },
+    {"1.71",
+        "SELECT '%a_' COLLATE \"unicode\" LIKE 'ම%Aම_' ESCAPE 'ම' COLLATE \"unicode_ci\";",
+        {1, "Illegal mix of collations"} },
+    {"1.72",
+        "SELECT '%_' LIKE 'a%a_' COLLATE \"unicode\" ESCAPE 'A' COLLATE \"unicode_ci\";",
+        {1, "Illegal mix of collations"} }
 }

>> 
>> local prefix = "collation-"
>> 
>> @@ -477,13 +477,13 @@ for _, data_collation in ipairs(data_collations) do
>>    end
>> end
>> 
>> --- Like uses collation (only for unicode_ci and binary)
>> +-- <LIKE> uses collation. If <LIKE> has explicit <COLLATE>, use it
>> +-- instead of implicit.
>> local like_testcases =
>> {
>>    {"2.0",
>>    [[
>> -        CREATE TABLE tx1 (s1 VARCHAR(5) PRIMARY KEY);
>> -        CREATE INDEX I1 on tx1(s1 collate "unicode_ci”);
> 
> Why did you drop index?
Dependece of I1 using is conditioned by the following pieces of code or the value of noCase or the value of pragma case insensitive:
-    if (noCase) {
-      pNewExpr1 =
-        sqlExprAddCollateString(pParse, pNewExpr1,
-                  "unicode_ci");
-    }

-    if (noCase) {
-      pNewExpr2 =
-        sqlExprAddCollateString(pParse, pNewExpr2,
-                  "unicode_ci");
-    }
I have removed this code, so covering index isn't used anymore, primary key is used now.
I can't say why it worked this way, but I can say, that now there is no notion "case/nocase". Comparison is provided with implicit or explicit collation. IMHO, covering index should not affect <LIKE> at all.

>> +        CREATE TABLE tx1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
>>        INSERT INTO tx1 VALUES('aaa');
>>        INSERT INTO tx1 VALUES('Aab');
>>        INSERT INTO tx1 VALUES('İac');
>> @@ -491,35 +491,32 @@ local like_testcases =
>>    ]], {0}},
>>    {"2.1.1",
>>        "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
>> -        {0, {"Aab", "aaa"}} },
>> +        {0, {"aaa","Aab"}} },
> 
> Why order of results has changed?
Because I added collation to s1.

> 
>>    {"2.1.2",
>>        "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
>> -        {0, {0, 0, 0, "SEARCH TABLE TX1 USING COVERING INDEX I1 (S1>? AND S1<?) (~16384 rows)"}}},
>> +        {0, {0, 0, 0, "SEARCH TABLE TX1 USING PRIMARY KEY (S1>? AND S1<?) (~16384 rows)"}}},
> 
> I guess this is due to dropped index.
I dropped index, because it don’t affect on this query now.

>>    {"2.2.0",
>> -        "PRAGMA case_sensitive_like = true;",
>> -        {0}},
>> -    {"2.2.1",
>> -        "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
>> +        "SELECT * FROM tx1 WHERE s1 LIKE 'A%' COLLATE \"unicode\" order by s1;",
>>        {0, {"Aab"}} },
>> -    {"2.2.2",
>> +    {"2.2.1",
>>        "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
>>        {0, {0, 0, 0, "/USING PRIMARY KEY/"}} },
>>    {"2.3.0",
>> -        "PRAGMA case_sensitive_like = false;",
>> -        {0}},
>> -    {"2.3.1",
>>        "SELECT * FROM tx1 WHERE s1 LIKE 'i%' order by s1;",
>> -        {0, {"iad", "İac"}}},
>> -    {"2.3.2",
>> -        "SELECT * FROM tx1 WHERE s1 LIKE 'İ%'order by s1;",
>> -        {0, {"iad", "İac"}} },
>> +        {0, {"İac", "iad"}}},
>> +    {"2.3.1",
>> +        "SELECT * FROM tx1 WHERE s1 LIKE 'İ%' COLLATE \"unicode\" order by s1;",
>> +        {0, {"İac"}} },
>>    {"2.4.0",
>>    [[
>>        INSERT INTO tx1 VALUES('ЯЁЮ');
>>    ]], {0} },
>>    {"2.4.1",
>> +        "SELECT * FROM tx1 WHERE s1 LIKE 'яёю' COLLATE \"unicode\";",
>> +        {0, {}} },
>> +    {"2.4.2",
>>        "SELECT * FROM tx1 WHERE s1 LIKE 'яёю';",
>> -        {0, {"ЯЁЮ"}} },
>> +        {0, {"ЯЁЮ"}} }
> 
> Stray diff.
The pragma don’t exist now, so I use case sensitive/insensitive collations in this test.
And meanwhile, I check that explicit collation has the highest priority.

--- Like uses collation (only for unicode_ci and binary)
+-- <LIKE> uses collation. If <LIKE> has explicit <COLLATE>, use it
+-- instead of implicit.
 local like_testcases =
 {
     {"2.0",
     [[
-        CREATE TABLE tx1 (s1 VARCHAR(5) PRIMARY KEY);
+        CREATE TABLE tx1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
         INSERT INTO tx1 VALUES('aaa');
         INSERT INTO tx1 VALUES('Aab');
         INSERT INTO tx1 VALUES('İac');
@@ -490,29 +491,38 @@ local like_testcases =
     ]], {0}},
     {"2.1.1",
         "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
-        {0, {"Aab"}} },
+        {0, {"aaa","Aab"}} },
     {"2.1.2",
         "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
         {0, {0, 0, 0, "SEARCH TABLE TX1 USING PRIMARY KEY (S1>? AND S1<?) (~16384 rows)"}}},
     {"2.2.0",
-        "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
+        "SELECT * FROM tx1 WHERE s1 LIKE 'A%' COLLATE \"unicode\" order by s1;",
         {0, {"Aab"}} },
     {"2.2.1",
         "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
         {0, {0, 0, 0, "/USING PRIMARY KEY/"}} },
     {"2.3.0",
         "SELECT * FROM tx1 WHERE s1 LIKE 'i%' order by s1;",
-        {0, {"iad"}}},
+        {0, {"İac", "iad"}}},
     {"2.3.1",
-        "SELECT * FROM tx1 WHERE s1 LIKE 'İ%'order by s1;",
+        "SELECT * FROM tx1 WHERE s1 LIKE 'İ%' COLLATE \"unicode\" order by s1;",
         {0, {"İac"}} },
     {"2.4.0",
     [[
         INSERT INTO tx1 VALUES('ЯЁЮ');
     ]], {0} },
     {"2.4.1",
-        "SELECT * FROM tx1 WHERE s1 LIKE 'яёю';",
+        "SELECT * FROM tx1 WHERE s1 LIKE 'яёю' COLLATE \"unicode\";",
         {0, {}} },
+    {"2.4.2",
+        "SELECT * FROM tx1 WHERE s1 COLLATE \"binary\" LIKE 'яёю';",
+        {0, {}} },
+    {"2.4.3",
+        "SELECT * FROM tx1 WHERE s1 COLLATE \"binary\" LIKE 'яёю' COLLATE \"unicode\";",
+        {1, "Illegal mix of collations"} },
+    {"2.4.4",
+        "SELECT * FROM tx1 WHERE s1 LIKE 'яёю';",
+        {0, {"ЯЁЮ"}} },
 }

>> }
>> 
>> test:do_catchsql_set_test(like_testcases, prefix)
>> diff --git a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
>> index 95e8c98d0..eb9d7c3b9 100755
>> --- a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
>> +++ b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
>> @@ -1,6 +1,6 @@
>> #!/usr/bin/env tarantool
>> test = require("sqltester")
>> -test:plan(128)
>> +test:plan(156)
>> 
>> local prefix = "like-test-"
>> 
>> @@ -80,10 +80,10 @@ local like_test_cases =
>>        {0, {false}} },
>>    {"1.25",
>>        "SELECT 'ёфÅŒش' LIKE '%œش';",
>> -        {0, {true}} },
>> +        {0, {false}} },
> 
> Could you add an appropriate collation to avoid fixing
> test results?
@@ -79,10 +79,10 @@ local like_test_cases =
         "SELECT 'ёф' LIKE '%œش';",
         {0, {false}} },
     {"1.25",
-        "SELECT 'ёфÅŒش' LIKE '%œش';",
+        "SELECT 'ёфÅŒش' LIKE '%œش' COLLATE \"unicode_ci\";",
         {0, {true}} },
     {"1.26",
-        "SELECT 'ÅŒش' LIKE '%œش';",
+        "SELECT 'ÅŒش' LIKE '%œش' COLLATE \"unicode_ci\";",
         {0, {true}} },

>>    {"1.26",
>>        "SELECT 'ÅŒش' LIKE '%œش';",
>> -        {0, {true}} },
>> +        {0, {false}} },
>>    {"1.27",
>>        "SELECT 'ёф' LIKE 'ё_';",
>>        {0, {true}} },
>> @@ -120,6 +120,90 @@ local like_test_cases =
>>    {"1.38",
>>        "SELECT 'ÅŒش' LIKE 'ё_%';",
>>        {0, {false}} },
>> +    {"1.39",
>> +        "SELECT 'A' LIKE 'A' COLLATE \"unicode\";",
>> +        {0, {true}} },
> 
> Please, add not only tests involving constant literals, but
> also columns with implicit/explicit collations.
I check implicit/explicit in /test/sql-tap/collation.test.lua.
+    {"1.69",
+        "SELECT 'Ї' COLLATE \"unicode\" LIKE 'ї' COLLATE \"unicode_uk_s3\";",
+        {1, "Illegal mix of collations"} },
+    {"1.70",
+        "SELECT '%a_' COLLATE \"unicode_ci\" LIKE 'ම%Aම_' COLLATE \"unicode\" ESCAPE 'ම';",
+        {1, "Illegal mix of collations"} },
+    {"1.71",
+        "SELECT '%a_' COLLATE \"unicode\" LIKE 'ම%Aම_' ESCAPE 'ම' COLLATE \"unicode_ci\";",
+        {1, "Illegal mix of collations"} },
+    {"1.72",
+        "SELECT '%_' LIKE 'a%a_' COLLATE \"unicode\" ESCAPE 'A' COLLATE \"unicode_ci\";",
+        {1, "Illegal mix of collations"} }
 }

commit 6701a59045c10191d7883a58a1f9ff61feb62b86
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Tue Apr 23 02:48:26 2019 +0300

    sql: make LIKE predicate dependent on collation
    
    According to ANSI, LIKE should match characters taking into
    account passed collation.
    
    ISO/IEC JTC 1/SC 32 2011, Part 2: Foundation, 8.5
    
    Closes #3589
    
    @TarantoolBot document
    Title: LIKE depends on collations
    
    Now <LIKE> pattern comparison depends on arguments' collation.
    Highest priority has explicit collation, in other words,
    <COLLATION> clause for string (first), pattern (second) or escape
    (third) arguments. If one of the arguments has this clause, it is
    used. If more than one of the arguments has it, collations must be
    similar. Implicit (column) collation is used when there is no
    explicit collations. If there are neither those nor other
    collation, then the comparison is binary.

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 4ac11cb77..f5991ff61 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -44,6 +44,7 @@
 #include <unicode/ucnv.h>
 #include <unicode/uchar.h>
 #include <unicode/ucol.h>
+#include "box/coll_id_cache.h"
 
 static UConverter* pUtf8conv;
 
@@ -673,6 +674,30 @@ enum pattern_match_status {
 	INVALID_PATTERN = 3
 };
 
+/**
+ * Read an UTF-8 character from string, and move pointer to the
+ * next character. Save current character and its length to output
+ * params - they are served as arguments of coll->cmp() call.
+ *
+ * @param[out] str Ptr to string.
+ * @param str_end Ptr the last symbol in @str.
+ * @param[out] char_ptr Ptr to the UTF-8 character.
+ * @param[out] char_len Ptr to length of the UTF-8 character in
+ * bytes.
+ *
+ * @retval UTF-8 character.
+ */
+static UChar32
+step_utf8_char(const char **str, const char *str_end, const char **char_ptr,
+	       size_t *char_len)
+{
+	UErrorCode status = U_ZERO_ERROR;
+	*char_ptr = *str;
+	UChar32 next_utf8 = Utf8Read(*str, str_end);
+	*char_len = *str - *char_ptr;
+	return next_utf8;
+}
+
 /**
  * Compare two UTF-8 strings for equality where the first string
  * is a LIKE expression.
@@ -699,7 +724,7 @@ enum pattern_match_status {
  * @param string String being compared.
  * @param pattern_end Ptr to pattern last symbol.
  * @param string_end Ptr to string last symbol.
- * @param is_like_ci true if LIKE is case insensitive.
+ * @param coll Pointer to collation.
  * @param match_other The escape char for LIKE.
  *
  * @retval One of pattern_match_status values.
@@ -709,7 +734,7 @@ sql_utf8_pattern_compare(const char *pattern,
 			 const char *string,
 			 const char *pattern_end,
 			 const char *string_end,
-			 const int is_like_ci,
+			 struct coll *coll,
 			 UChar32 match_other)
 {
 	/* Next pattern and input string chars. */
@@ -717,9 +742,14 @@ sql_utf8_pattern_compare(const char *pattern,
 	/* One past the last escaped input char. */
 	const char *zEscaped = 0;
 	UErrorCode status = U_ZERO_ERROR;
+	const char *pat_char_ptr = NULL;
+	const char *str_char_ptr = NULL;
+	size_t pat_char_len = 0;
+	size_t str_char_len = 0;
 
 	while (pattern < pattern_end) {
-		c = Utf8Read(pattern, pattern_end);
+		c = step_utf8_char(&pattern, pattern_end, &pat_char_ptr,
+				   &pat_char_len);
 		if (c == SQL_INVALID_UTF8_SYMBOL)
 			return INVALID_PATTERN;
 		if (c == MATCH_ALL_WILDCARD) {
@@ -730,7 +760,9 @@ sql_utf8_pattern_compare(const char *pattern,
 			 * consume a single character of the
 			 * input string for each "_" skipped.
 			 */
-			while ((c = Utf8Read(pattern, pattern_end)) !=
+			while ((c = step_utf8_char(&pattern, pattern_end,
+						   &pat_char_ptr,
+						   &pat_char_len)) !=
 			       SQL_END_OF_STRING) {
 				if (c == SQL_INVALID_UTF8_SYMBOL)
 					return INVALID_PATTERN;
@@ -751,7 +783,9 @@ sql_utf8_pattern_compare(const char *pattern,
 				return MATCH;
 			}
 			if (c == match_other) {
-				c = Utf8Read(pattern, pattern_end);
+				c = step_utf8_char(&pattern, pattern_end,
+						   &pat_char_ptr,
+						   &pat_char_len);
 				if (c == SQL_INVALID_UTF8_SYMBOL)
 					return INVALID_PATTERN;
 				if (c == SQL_END_OF_STRING)
@@ -773,8 +807,6 @@ sql_utf8_pattern_compare(const char *pattern,
 			 */
 
 			int bMatch;
-			if (is_like_ci)
-				c = u_tolower(c);
 			while (string < string_end){
 				/*
 				 * This loop could have been
@@ -786,21 +818,20 @@ sql_utf8_pattern_compare(const char *pattern,
 				 * lower works better with German
 				 * and Turkish languages.
 				 */
-				c2 = Utf8Read(string, string_end);
+				c2 = step_utf8_char(&string, string_end,
+						    &str_char_ptr,
+						    &str_char_len);
 				if (c2 == SQL_INVALID_UTF8_SYMBOL)
 					return NO_MATCH;
-				if (!is_like_ci) {
-					if (c2 != c)
-						continue;
-				} else {
-					if (c2 != c && u_tolower(c2) != c)
-						continue;
-				}
+				if (coll->cmp(pat_char_ptr, pat_char_len,
+					      str_char_ptr, str_char_len, coll)
+					!= 0)
+					continue;
 				bMatch = sql_utf8_pattern_compare(pattern,
 								  string,
 								  pattern_end,
 								  string_end,
-								  is_like_ci,
+								  coll,
 								  match_other);
 				if (bMatch != NO_MATCH)
 					return bMatch;
@@ -808,30 +839,21 @@ sql_utf8_pattern_compare(const char *pattern,
 			return NO_WILDCARD_MATCH;
 		}
 		if (c == match_other) {
-			c = Utf8Read(pattern, pattern_end);
+			c = step_utf8_char(&pattern, pattern_end, &pat_char_ptr,
+					   &pat_char_len);
 			if (c == SQL_INVALID_UTF8_SYMBOL)
 				return INVALID_PATTERN;
 			if (c == SQL_END_OF_STRING)
 				return NO_MATCH;
 			zEscaped = pattern;
 		}
-		c2 = Utf8Read(string, string_end);
+		c2 = step_utf8_char(&string, string_end, &str_char_ptr,
+				    &str_char_len);
 		if (c2 == SQL_INVALID_UTF8_SYMBOL)
 			return NO_MATCH;
-		if (c == c2)
+		if (coll->cmp(pat_char_ptr, pat_char_len, str_char_ptr,
+			      str_char_len, coll) == 0)
 			continue;
-		if (is_like_ci) {
-			/*
-			 * Small optimization. Reduce number of
-			 * calls to u_tolower function. SQL
-			 * standards suggest use to_upper for
-			 * symbol normalisation. However, using
-			 * to_lower allows to respect Turkish 'İ'
-			 * in default locale.
-			 */
-			if (u_tolower(c) == c2 || c == u_tolower(c2))
-				continue;
-		}
 		if (c == MATCH_ONE_WILDCARD && pattern != zEscaped &&
 		    c2 != SQL_END_OF_STRING)
 			continue;
@@ -847,9 +869,10 @@ sql_utf8_pattern_compare(const char *pattern,
 int
 sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
 {
+	struct coll_id *p = coll_by_name("unicode", strlen("unicode"));
 	return sql_utf8_pattern_compare(zPattern, zStr,
 		                        zPattern + strlen(zPattern),
-		                        zStr + strlen(zStr), 0, esc);
+		                        zStr + strlen(zStr), p->coll, esc);
 }
 
 /**
@@ -859,9 +882,10 @@ sql_strlike_cs(const char *zPattern, const char *zStr, unsigned int esc)
 int
 sql_strlike_ci(const char *zPattern, const char *zStr, unsigned int esc)
 {
+	struct coll_id *p = coll_by_name("unicode_ci", strlen("unicode_ci"));
 	return sql_utf8_pattern_compare(zPattern, zStr,
 		                        zPattern + strlen(zPattern),
-		                        zStr + strlen(zStr), 1, esc);
+		                        zStr + strlen(zStr), p->coll, esc);
 }
 
 /**
@@ -883,7 +907,6 @@ likeFunc(sql_context *context, int argc, sql_value **argv)
 	u32 escape = SQL_END_OF_STRING;
 	int nPat;
 	sql *db = sql_context_db_handle(context);
-	int is_like_ci = SQL_PTR_TO_INT(sql_user_data(context));
 	int rhs_type = sql_value_type(argv[0]);
 	int lhs_type = sql_value_type(argv[1]);
 
@@ -940,8 +963,10 @@ likeFunc(sql_context *context, int argc, sql_value **argv)
 	if (!zA || !zB)
 		return;
 	int res;
-	res = sql_utf8_pattern_compare(zB, zA, zB_end, zA_end,
-				       is_like_ci, escape);
+	struct coll *coll = sqlGetFuncCollSeq(context);
+	assert(coll != NULL);
+	res = sql_utf8_pattern_compare(zB, zA, zB_end, zA_end, coll, escape);
+
 	if (res == INVALID_PATTERN) {
 		diag_set(ClientError, ER_SQL_EXECUTE, "LIKE pattern can only "\
 			 "contain UTF-8 characters");
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 57da8ec5d..989499abe 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -1356,7 +1356,7 @@ enum trim_side_mask {
   {nArg, SQL_FUNC_SLOCHNG|(bNC*SQL_FUNC_NEEDCOLL), \
    pArg, 0, xFunc, 0, #zName, {SQL_AFF_STRING, {0}}, false}
 #define LIKEFUNC(zName, nArg, arg, flags, type) \
-  {nArg, SQL_FUNC_CONSTANT|flags, \
+  {nArg, SQL_FUNC_NEEDCOLL|SQL_FUNC_CONSTANT|flags, \
    (void *)(SQL_INT_TO_PTR(arg)), 0, likeFunc, 0, #zName, {0}, type, false }
 #define AGGREGATE(zName, nArg, arg, nc, xStep, xFinal, type) \
   {nArg, (nc*SQL_FUNC_NEEDCOLL), \
diff --git a/src/box/sql/whereexpr.c b/src/box/sql/whereexpr.c
index 0a0e900bd..19f142e6c 100644
--- a/src/box/sql/whereexpr.c
+++ b/src/box/sql/whereexpr.c
@@ -221,6 +221,29 @@ operatorMask(int op)
 	return c;
 }
 
+/**
+ * Skip TK_COLLATE in @a expr and save root collation name to @a
+ * coll_name if any.
+ *
+ * Wrapper for sqlExprSkipCollate().
+ *
+ * @param expr Expression.
+ * @param[out] coll_name Collation name.
+ * @retval Pointer to non-collate node.
+ */
+static struct Expr*
+skip_coll_and_get_name(struct Expr* expr, const char **coll_name)
+{
+	struct Expr *ret = sqlExprSkipCollate(expr);
+	if (ret != expr) {
+		assert(expr->op == TK_COLLATE);
+		*coll_name = expr->u.zToken;
+	} else {
+		*coll_name = NULL;
+	}
+	return ret;
+}
+
 /**
  * Check to see if the given expression is a LIKE operator that
  * can be optimized using inequality constraints.
@@ -228,13 +251,11 @@ operatorMask(int op)
  * In order for the operator to be optimizible, the RHS must be a
  * string literal that does not begin with a wildcard. The LHS
  * must be a column that may only be NULL, a string, or a BLOB,
- * never a number. The collating sequence for the column on the
- * LHS must be appropriate for the operator.
+ * never a number.
  *
  * @param pParse      Parsing and code generating context.
  * @param pExpr       Test this expression.
- * @param ppPrefix    Pointer to TK_STRING expression with
- *                    pattern prefix.
+ * @param ppPrefix    Pointer to expression with pattern prefix.
  * @param pisComplete True if the only wildcard is '%' in the
  *                    last character.
  * @retval True if the given expr is a LIKE operator & is
@@ -266,7 +287,9 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
 		return 0;
 	}
 	pList = pExpr->x.pList;
-	pLeft = pList->a[1].pExpr;
+	const char *l_coll_name = NULL;
+	pLeft = skip_coll_and_get_name(pList->a[1].pExpr, &l_coll_name);
+
 	/* Value might be numeric */
 	if (pLeft->op != TK_COLUMN ||
 	    sql_expr_type(pLeft) != FIELD_TYPE_STRING) {
@@ -278,7 +301,34 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
 	}
 	assert(pLeft->iColumn != (-1));	/* Because IPK never has AFF_TEXT */
 
-	pRight = sqlExprSkipCollate(pList->a[0].pExpr);
+	const char *r_coll_name = NULL;
+	pRight = skip_coll_and_get_name(pList->a[0].pExpr, &r_coll_name);
+
+	/* Only for "binary" and "unicode_ci" collations. */
+	if (r_coll_name != NULL) {
+		if (strcmp(r_coll_name, "binary") != 0 &&
+		    strcmp(r_coll_name, "unicode_ci") != 0)
+			return 0;
+	} else if (l_coll_name != NULL) {
+		if (strcmp(l_coll_name, "binary") != 0 &&
+		    strcmp(l_coll_name, "unicode_ci") != 0)
+			return 0;
+	} else {
+		/*
+		 * If both arguments haven't <COLLATE> then we
+		 * should check implicit collation.
+		 */
+		struct field_def *field_def = pLeft->space_def->fields +
+					      pLeft->iColumn;
+		uint32_t none_id = coll_by_name("none", strlen("none"))->id;
+		uint32_t u_ci_id =
+			coll_by_name("unicode_ci", strlen("unicode_ci"))->id;
+		uint32_t bin_id = coll_by_name("binary", strlen("binary"))->id;
+		if (field_def->coll_id != none_id && field_def->coll_id !=
+		    u_ci_id && field_def->coll_id != bin_id)
+			return 0;
+	}
+
 	op = pRight->op;
 	if (op == TK_VARIABLE) {
 		Vdbe *pReprepare = pParse->pReprepare;
@@ -308,6 +358,15 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
 				pParse->is_aborted = true;
 			else
 				pPrefix->u.zToken[cnt] = 0;
+			/*
+			 * If <COLLATE> was typed to <LIKE>'s RHS
+			 * add it result expression.
+			 */
+			if (r_coll_name != NULL)
+				pPrefix =
+					sqlExprAddCollateString(pParse,
+								pPrefix,
+								r_coll_name);
 			*ppPrefix = pPrefix;
 			if (op == TK_VARIABLE) {
 				Vdbe *v = pParse->pVdbe;
@@ -977,8 +1036,6 @@ exprAnalyze(SrcList * pSrc,	/* the FROM clause */
 	Expr *pStr1 = 0;
 	/* RHS of LIKE ends with wildcard. */
 	int isComplete = 0;
-	/* uppercase equivalent to lowercase. */
-	int noCase = 0;
 	/* Top-level operator. pExpr->op. */
 	int op;
 	/* Parsing context. */
@@ -1143,18 +1200,13 @@ exprAnalyze(SrcList * pSrc,	/* the FROM clause */
 	 * A like pattern of the form "x LIKE 'aBc%'" is changed
 	 * into constraints:
 	 *
-	 *          x>='ABC' AND x<'abd' AND x LIKE 'aBc%'
+	 *          x>='aBc' AND x<'abd' AND x LIKE 'aBc%'
 	 *
 	 * The last character of the prefix "abc" is incremented
-	 * to form the termination condition "abd". If case is
-	 * not significant (the default for LIKE) then the
-	 * lower-bound is made all uppercase and the upper-bound
-	 * is made all lowercase so that the bounds also work
-	 * when comparing BLOBs.
+	 * to form the termination condition "abd".
 	 */
 	if (pWC->op == TK_AND &&
-	    like_optimization_is_valid(pParse, pExpr, &pStr1,
-				       &isComplete)) {
+	    like_optimization_is_valid(pParse, pExpr, &pStr1, &isComplete)) {
 		Expr *pLeft;
 		/* Copy of pStr1 - RHS of LIKE operator. */
 		Expr *pStr2;
@@ -1167,57 +1219,22 @@ exprAnalyze(SrcList * pSrc,	/* the FROM clause */
 		pLeft = pExpr->x.pList->a[1].pExpr;
 		pStr2 = sqlExprDup(db, pStr1, 0);
 
-		/*
-		 * Convert the lower bound to upper-case and the
-		 * upper bound to lower-case (upper-case is less
-		 * than lower-case in ASCII) so that the range
-		 * constraints also work for BLOBs.
-		 */
-		if (noCase && !pParse->db->mallocFailed) {
-			int i;
-			char c;
-			pTerm->wtFlags |= TERM_LIKE;
-			for (i = 0; (c = pStr1->u.zToken[i]) != 0; i++) {
-				pStr1->u.zToken[i] = sqlToupper(c);
-				pStr2->u.zToken[i] = sqlTolower(c);
-			}
-		}
-
 		if (!db->mallocFailed) {
 			u8 c, *pC;	/* Last character before the first wildcard */
-			pC = (u8 *) & pStr2->u.
-			    zToken[sqlStrlen30(pStr2->u.zToken) - 1];
+			/* pStr2 can contain COLLATE nodes. */
+			struct Expr *real_str = sqlExprSkipCollate(pStr2);
+			pC = (u8 *) & real_str->u.
+			    zToken[sqlStrlen30(real_str->u.zToken) - 1];
 			c = *pC;
-			if (noCase) {
-				/* The point is to increment the last character before the first
-				 * wildcard.  But if we increment '@', that will push it into the
-				 * alphabetic range where case conversions will mess up the
-				 * inequality.  To avoid this, make sure to also run the full
-				 * LIKE on all candidate expressions by clearing the isComplete flag
-				 */
-				if (c == 'A' - 1)
-					isComplete = 0;
-				c = sqlUpperToLower[c];
-			}
 			*pC = c + 1;
 		}
 		pNewExpr1 = sqlExprDup(db, pLeft, 0);
-		if (noCase) {
-			pNewExpr1 =
-				sqlExprAddCollateString(pParse, pNewExpr1,
-							    "unicode_ci");
-		}
 		pNewExpr1 = sqlPExpr(pParse, TK_GE, pNewExpr1, pStr1);
 		transferJoinMarkings(pNewExpr1, pExpr);
 		idxNew1 = whereClauseInsert(pWC, pNewExpr1, wtFlags);
 		testcase(idxNew1 == 0);
 		exprAnalyze(pSrc, pWC, idxNew1);
 		pNewExpr2 = sqlExprDup(db, pLeft, 0);
-		if (noCase) {
-			pNewExpr2 =
-				sqlExprAddCollateString(pParse, pNewExpr2,
-							    "unicode_ci");
-		}
 		pNewExpr2 = sqlPExpr(pParse, TK_LT, pNewExpr2, pStr2);
 		transferJoinMarkings(pNewExpr2, pExpr);
 		idxNew2 = whereClauseInsert(pWC, pNewExpr2, wtFlags);
diff --git a/test/sql-tap/collation.test.lua b/test/sql-tap/collation.test.lua
index e1df8750b..6c3c8ea34 100755
--- a/test/sql-tap/collation.test.lua
+++ b/test/sql-tap/collation.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(172)
+test:plan(175)
 
 local prefix = "collation-"
 
@@ -477,12 +477,13 @@ for _, data_collation in ipairs(data_collations) do
     end
 end
 
--- Like uses collation (only for unicode_ci and binary)
+-- <LIKE> uses collation. If <LIKE> has explicit <COLLATE>, use it
+-- instead of implicit.
 local like_testcases =
 {
     {"2.0",
     [[
-        CREATE TABLE tx1 (s1 VARCHAR(5) PRIMARY KEY);
+        CREATE TABLE tx1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
         INSERT INTO tx1 VALUES('aaa');
         INSERT INTO tx1 VALUES('Aab');
         INSERT INTO tx1 VALUES('İac');
@@ -490,29 +491,38 @@ local like_testcases =
     ]], {0}},
     {"2.1.1",
         "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
-        {0, {"Aab"}} },
+        {0, {"aaa","Aab"}} },
     {"2.1.2",
         "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
         {0, {0, 0, 0, "SEARCH TABLE TX1 USING PRIMARY KEY (S1>? AND S1<?) (~16384 rows)"}}},
     {"2.2.0",
-        "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
+        "SELECT * FROM tx1 WHERE s1 LIKE 'A%' COLLATE \"unicode\" order by s1;",
         {0, {"Aab"}} },
     {"2.2.1",
         "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
         {0, {0, 0, 0, "/USING PRIMARY KEY/"}} },
     {"2.3.0",
         "SELECT * FROM tx1 WHERE s1 LIKE 'i%' order by s1;",
-        {0, {"iad"}}},
+        {0, {"İac", "iad"}}},
     {"2.3.1",
-        "SELECT * FROM tx1 WHERE s1 LIKE 'İ%'order by s1;",
+        "SELECT * FROM tx1 WHERE s1 LIKE 'İ%' COLLATE \"unicode\" order by s1;",
         {0, {"İac"}} },
     {"2.4.0",
     [[
         INSERT INTO tx1 VALUES('ЯЁЮ');
     ]], {0} },
     {"2.4.1",
-        "SELECT * FROM tx1 WHERE s1 LIKE 'яёю';",
+        "SELECT * FROM tx1 WHERE s1 LIKE 'яёю' COLLATE \"unicode\";",
         {0, {}} },
+    {"2.4.2",
+        "SELECT * FROM tx1 WHERE s1 COLLATE \"binary\" LIKE 'яёю';",
+        {0, {}} },
+    {"2.4.3",
+        "SELECT * FROM tx1 WHERE s1 COLLATE \"binary\" LIKE 'яёю' COLLATE \"unicode\";",
+        {1, "Illegal mix of collations"} },
+    {"2.4.4",
+        "SELECT * FROM tx1 WHERE s1 LIKE 'яёю';",
+        {0, {"ЯЁЮ"}} },
 }
 
 test:do_catchsql_set_test(like_testcases, prefix)
diff --git a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
index 95e8c98d0..641469211 100755
--- a/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
+++ b/test/sql-tap/gh-3251-string-pattern-comparison.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(128)
+test:plan(162)
 
 local prefix = "like-test-"
 
@@ -79,10 +79,10 @@ local like_test_cases =
         "SELECT 'ёф' LIKE '%œش';",
         {0, {false}} },
     {"1.25",
-        "SELECT 'ёфÅŒش' LIKE '%œش';",
+        "SELECT 'ёфÅŒش' LIKE '%œش' COLLATE \"unicode_ci\";",
         {0, {true}} },
     {"1.26",
-        "SELECT 'ÅŒش' LIKE '%œش';",
+        "SELECT 'ÅŒش' LIKE '%œش' COLLATE \"unicode_ci\";",
         {0, {true}} },
     {"1.27",
         "SELECT 'ёф' LIKE 'ё_';",
@@ -120,6 +120,108 @@ local like_test_cases =
     {"1.38",
         "SELECT 'ÅŒش' LIKE 'ё_%';",
         {0, {false}} },
+    {"1.39",
+        "SELECT 'A' LIKE 'A' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.40",
+        "SELECT 'A' LIKE 'a' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.41",
+        "SELECT 'Ab' COLLATE \"unicode\" LIKE 'ab';",
+        {0, {false}} },
+    {"1.42",
+        "SELECT 'ss' LIKE 'ß' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.43",
+        "SELECT 'Я' LIKE 'я' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.44",
+        "SELECT 'AЯB' LIKE 'AяB' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.45",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.46",
+        "SELECT 'Ab' LIKE '_b' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.47",
+        "SELECT 'A' LIKE '_' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.48",
+        "SELECT 'AB' LIKE '%B' COLLATE \"unicode\";",
+        {0, {true}} },
+    {"1.49",
+        "SELECT 'A' LIKE 'A' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.50",
+        "SELECT 'A' COLLATE \"unicode_ci\" LIKE 'a';",
+        {0, {true}} },
+    {"1.51",
+        "SELECT 'Я' LIKE 'я' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.52",
+        "SELECT 'AЯB' LIKE 'AяB' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.53",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.54",
+        "SELECT 'Ab' LIKE 'ab' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.55",
+        "SELECT 'ba' LIKE 'ab' COLLATE \"unicode_ci\";",
+        {0, {false}} },
+    {"1.56",
+        "SELECT 'Aaa' LIKE 'A%' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.57",
+        "SELECT 'aaa' LIKE 'A%' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.58",
+        "SELECT 'A' LIKE '_' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.59",
+        "SELECT 'ss' LIKE 'ß' COLLATE \"unicode_de__phonebook_s1\";",
+        {0, {false}} },
+    {"1.60",
+        "SELECT 'ss' LIKE 'ß' COLLATE \"unicode_de__phonebook_s3\";",
+        {0, {false}} },
+    {"1.61",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode_uk_s1\";",
+        {0, {true}} },
+    {"1.62",
+        "SELECT 'Ї' LIKE 'ї' COLLATE \"unicode_uk_s3\";",
+        {0, {false}} },
+    {"1.63",
+        "SELECT 'Ї' COLLATE \"unicode_uk_s3\" LIKE 'ї' COLLATE \"unicode_uk_s3\";",
+        {0, {false}} },
+    {"1.64",
+        "SELECT '%a_' LIKE 'ම%Aම_' COLLATE \"unicode\" ESCAPE 'ම';",
+        {0, {false}} },
+    {"1.65",
+        "SELECT '%a_' COLLATE \"unicode\" LIKE 'ම%Aම_' COLLATE \"unicode\" ESCAPE 'ම' COLLATE \"unicode\";",
+        {0, {false}} },
+    {"1.66",
+        "SELECT '%a_' LIKE 'ම%Aම_' ESCAPE 'ම' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.67",
+        "SELECT '%_' LIKE 'a%a_' ESCAPE 'A' COLLATE \"unicode_ci\";",
+        {0, {false}} },
+    {"1.68",
+        "SELECT '%_' LIKE 'a%a_' ESCAPE 'a' COLLATE \"unicode_ci\";",
+        {0, {true}} },
+    {"1.69",
+        "SELECT 'Ї' COLLATE \"unicode\" LIKE 'ї' COLLATE \"unicode_uk_s3\";",
+        {1, "Illegal mix of collations"} },
+    {"1.70",
+        "SELECT '%a_' COLLATE \"unicode_ci\" LIKE 'ම%Aම_' COLLATE \"unicode\" ESCAPE 'ම';",
+        {1, "Illegal mix of collations"} },
+    {"1.71",
+        "SELECT '%a_' COLLATE \"unicode\" LIKE 'ම%Aම_' ESCAPE 'ම' COLLATE \"unicode_ci\";",
+        {1, "Illegal mix of collations"} },
+    {"1.72",
+        "SELECT '%_' LIKE 'a%a_' COLLATE \"unicode\" ESCAPE 'A' COLLATE \"unicode_ci\";",
+        {1, "Illegal mix of collations"} }
 }
 
 test:do_catchsql_set_test(like_test_cases, prefix)
diff --git a/test/sql-tap/like3.test.lua b/test/sql-tap/like3.test.lua
index b5f52be52..e7c25dba6 100755
--- a/test/sql-tap/like3.test.lua
+++ b/test/sql-tap/like3.test.lua
@@ -73,16 +73,16 @@ test:do_execsql_test(
         CREATE TABLE t2(a INT PRIMARY KEY, b TEXT);
         INSERT INTO t2 SELECT a, b FROM t1;
         CREATE INDEX t2ba ON t2(b,a);
-        SELECT a, b FROM t2 WHERE b LIKE 'ab%' ORDER BY +a;
+        SELECT a, b FROM t2 WHERE b LIKE 'ab%' COLLATE "unicode_ci" ORDER BY +a;
     ]], {
         -- <like3-2.0>
-        1, "abc", 4, "abc"
+        1, "abc", 2, "ABX", 4, "abc", 5, "ABX"
         -- </like3-2.0>
     })
 test:do_execsql_test(
     "like3-2.1",
     [[
-        SELECT a, b FROM t2 WHERE +b LIKE 'ab%' ORDER BY +a;
+        SELECT a, b FROM t2 WHERE +b LIKE 'ab%' COLLATE "unicode_ci" ORDER BY +a;
     ]], {
         -- <like3-2.1>
         1, "abc", 2, "ABX", 4, "abc", 5, "ABX"
diff --git a/test/sql-tap/whereG.test.lua b/test/sql-tap/whereG.test.lua
index 177d9d14e..590027023 100755
--- a/test/sql-tap/whereG.test.lua
+++ b/test/sql-tap/whereG.test.lua
@@ -89,7 +89,7 @@ test:do_execsql_test(
     [[
         SELECT DISTINCT aname
           FROM album, composer, track
-         WHERE unlikely(cname LIKE '%bach%')
+         WHERE unlikely(cname LIKE '%bach%' COLLATE "unicode_ci")
            AND composer.cid=track.cid
            AND album.aid=track.aid;
     ]], {
@@ -118,7 +118,7 @@ test:do_execsql_test(
     [[
         SELECT DISTINCT aname
           FROM album, composer, track
-         WHERE likelihood(cname LIKE '%bach%', 0.5)
+         WHERE likelihood(cname LIKE '%bach%' COLLATE "unicode_ci", 0.5)
            AND composer.cid=track.cid
            AND album.aid=track.aid;
     ]], {
@@ -146,7 +146,7 @@ test:do_execsql_test(
     [[
         SELECT DISTINCT aname
           FROM album, composer, track
-         WHERE cname LIKE '%bach%'
+         WHERE cname LIKE '%bach%' COLLATE "unicode_ci"
            AND composer.cid=track.cid
            AND album.aid=track.aid;
     ]], {
@@ -174,7 +174,7 @@ test:do_execsql_test(
     [[
         SELECT DISTINCT aname
           FROM album, composer, track
-         WHERE cname LIKE '%bach%'
+         WHERE cname LIKE '%bach%' COLLATE "unicode_ci"
            AND unlikely(composer.cid=track.cid)
            AND unlikely(album.aid=track.aid);
     ]], {

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH 1/2] sql: remove "PRAGMA case_sensitive_like"
  2019-07-17 16:18   ` [tarantool-patches] " n.pettik
@ 2019-07-24 21:02     ` Roman Khabibov
  0 siblings, 0 replies; 9+ messages in thread
From: Roman Khabibov @ 2019-07-24 21:02 UTC (permalink / raw)
  To: tarantool-patches; +Cc: n. pettik



> On Jul 17, 2019, at 19:18, n.pettik <korablev@tarantool.org> wrote:
> 
> 
> 
>> On 14 Jul 2019, at 01:51, Roman Khabibov <roman.habibov@tarantool.org> wrote:
>> 
>> According to ANSI, LIKE should match characters taking into
>> account passed collation, so this pragma is no longer needed.
> 
> Can’t build this particular patch:
> 
> /Users/n.pettik/tarantool/src/box/sql/func.c:1795:1: error: conflicting types for 'sql_is_like_func'
> sql_is_like_func(struct sql *db, struct Expr *expr, int *is_like_ci)
> ^
> /Users/n.pettik/tarantool/src/box/sql/sqlInt.h:4277:1: note: previous declaration is here
> sql_is_like_func(struct sql *db, struct Expr *expr);
> ^
> /Users/n.pettik/tarantool/src/box/sql/func.c:1805:35: error: use of undeclared identifier 'SQL_FUNC_CASE'
>        *is_like_ci = (func->funcFlags & SQL_FUNC_CASE) == 0;
>                                         ^
> 2 errors generated.
> make[2]: *** [src/box/sql/CMakeFiles/sql.dir/func.c.o] Error 1
> make[2]: *** Waiting for unfinished jobs....
> make[1]: *** [src/box/sql/CMakeFiles/sql.dir/all] Error 2
> make: *** [all] Error 2
> 
> Please, fix compilation errors.
> 
>> diff --git a/test/sql-tap/pragma.test.lua b/test/sql-tap/pragma.test.lua
>> index 8221d36e6..d8fb550d0 100755
>> --- a/test/sql-tap/pragma.test.lua
>> +++ b/test/sql-tap/pragma.test.lua
>> @@ -81,21 +81,16 @@ test:do_execsql_test(
>> 	-- </pragma-3.2>
>> })
>> 
>> --- Check that "PRAGMA case_sensitive_like" returns its status
>> --- (0 or 1) if called without parameter.
>> -test:do_test(
>> +-- Check that "PRAGMA case_sensitive_like" does not exist.
>> +test:do_catchsql_test(
>> 	"pragma-3.3",
>> -	function()
>> -		old_value = box.execute('PRAGMA case_sensitive_like').rows
>> -		box.execute('PRAGMA case_sensitive_like = 1')
>> -		new_value = box.execute('PRAGMA case_sensitive_like').rows
>> -		box.execute('PRAGMA case_sensitive_like = '.. old_value[1][1])
>> -		return new_value[1][1]
>> -	end,
>> +	[[
>> +		PRAGMA case_sensitive_like
>> +	]], {
>> 	-- <pragma-3.3>
>> -	1
>> +	1, "Pragma 'CASE_SENSITIVE_LIKE' does not exist"
>> 	-- </pragma-3.3>
>> -	)
>> +})
> 
> Sorry, this test looks weird. Just remove it.
Done.

commit ceed475fc0b497074ce122121a729fee85ad9adb
Author: Roman Khabibov <roman.habibov@tarantool.org>
Date:   Tue Apr 16 15:51:55 2019 +0300

    sql: remove "PRAGMA case_sensitive_like"
    
    According to ANSI, LIKE should match characters taking into
    account passed collation, so this pragma is no longer needed.
    
    Part of #3589

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 98cad51fd..4ac11cb77 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1791,42 +1791,8 @@ sqlRegisterPerConnectionBuiltinFunctions(sql * db)
 		sqlOomFault(db);
 }
 
-/*
- * Set the LIKEOPT flag on the 2-argument function with the given name.
- */
-static void
-setLikeOptFlag(sql * db, const char *zName, u8 flagVal)
-{
-	FuncDef *pDef;
-	pDef = sqlFindFunction(db, zName, 2, 0);
-	if (ALWAYS(pDef)) {
-		pDef->funcFlags |= flagVal;
-	}
-}
-
-/**
- * Register the built-in LIKE function.
- */
-void
-sqlRegisterLikeFunctions(sql *db, int is_case_insensitive)
-{
-	/*
-	 * FIXME: after introducing type <BOOLEAN> LIKE must
-	 * return that type: TRUE if the string matches the
-	 * supplied pattern and FALSE otherwise.
-	 */
-	int *is_like_ci = SQL_INT_TO_PTR(is_case_insensitive);
-	sqlCreateFunc(db, "LIKE", FIELD_TYPE_BOOLEAN, 2, 0,
-			  is_like_ci, likeFunc, 0, 0, 0);
-	sqlCreateFunc(db, "LIKE", FIELD_TYPE_BOOLEAN, 3, 0,
-			  is_like_ci, likeFunc, 0, 0, 0);
-	setLikeOptFlag(db, "LIKE",
-		       !(is_case_insensitive) ? (SQL_FUNC_LIKE |
-		       SQL_FUNC_CASE) : SQL_FUNC_LIKE);
-}
-
 int
-sql_is_like_func(struct sql *db, struct Expr *expr, int *is_like_ci)
+sql_is_like_func(struct sql *db, struct Expr *expr)
 {
 	if (expr->op != TK_FUNCTION || !expr->x.pList ||
 	    expr->x.pList->nExpr != 2)
@@ -1836,7 +1802,6 @@ sql_is_like_func(struct sql *db, struct Expr *expr, int *is_like_ci)
 	assert(func != NULL);
 	if ((func->funcFlags & SQL_FUNC_LIKE) == 0)
 		return 0;
-	*is_like_ci = (func->funcFlags & SQL_FUNC_CASE) == 0;
 	return 1;
 }
 
diff --git a/src/box/sql/pragma.c b/src/box/sql/pragma.c
index 53524b617..293c2cd2e 100644
--- a/src/box/sql/pragma.c
+++ b/src/box/sql/pragma.c
@@ -484,13 +484,6 @@ sqlPragma(Parse * pParse, Token * pId,	/* First part of [schema.]id field */
 					sqlParserTrace(0, 0);
 			}
 #endif
-			/*
-			 * Reinstall the LIKE and functions. The
-			 * variant of LIKE * used will be case
-			 * sensitive or not depending on the RHS.
-			 */
-			if (mask == LIKE_CASE_SENS_FLAG)
-				sqlRegisterLikeFunctions(db, !is_pragma_set);
 		}
 		break;
 	}
diff --git a/src/box/sql/pragma.h b/src/box/sql/pragma.h
index aa7e7cd96..02895b0ea 100644
--- a/src/box/sql/pragma.h
+++ b/src/box/sql/pragma.h
@@ -93,60 +93,57 @@ static const char *const pragCName[] = {
 	/*  55 */ "TEXT",
 	/*  56 */ "match",
 	/*  57 */ "TEXT",
-	/* Used by: case_sensitive_like */
-	/*  58 */ "case_sensitive_like",
-	/*  59 */ "INTEGER",
 	/* Used by: count_changes */
-	/*  60 */ "count_changes",
-	/*  61 */ "INTEGER",
+	/*  58 */ "count_changes",
+	/*  59 */ "INTEGER",
 	/* Used by: defer_foreign_keys */
-	/*  62 */ "defer_foreign_keys",
-	/*  63 */ "INTEGER",
+	/*  60 */ "defer_foreign_keys",
+	/*  61 */ "INTEGER",
 	/* Used by: full_column_names */
-	/*  64 */ "full_column_names",
-	/*  65 */ "INTEGER",
+	/*  62 */ "full_column_names",
+	/*  63 */ "INTEGER",
 	/* Used by: parser_trace */
-	/*  66 */ "parser_trace",
-	/*  67 */ "INTEGER",
+	/*  64 */ "parser_trace",
+	/*  65 */ "INTEGER",
 	/* Used by: recursive_triggers */
-	/*  68 */ "recursive_triggers",
-	/*  69 */ "INTEGER",
+	/*  66 */ "recursive_triggers",
+	/*  67 */ "INTEGER",
 	/* Used by: reverse_unordered_selects */
-	/*  70 */ "reverse_unordered_selects",
-	/*  71 */ "INTEGER",
+	/*  68 */ "reverse_unordered_selects",
+	/*  69 */ "INTEGER",
 	/* Used by: select_trace */
-	/*  72 */ "select_trace",
-	/*  73 */ "INTEGER",
+	/*  70 */ "select_trace",
+	/*  71 */ "INTEGER",
 	/* Used by: short_column_names */
-	/*  74 */ "short_column_names",
-	/*  75 */ "INTEGER",
+	/*  72 */ "short_column_names",
+	/*  73 */ "INTEGER",
 	/* Used by: sql_compound_select_limit */
-	/*  76 */ "sql_compound_select_limit",
-	/*  77 */ "INTEGER",
+	/*  74 */ "sql_compound_select_limit",
+	/*  75 */ "INTEGER",
 	/* Used by: sql_default_engine */
-	/*  78 */ "sql_default_engine",
-	/*  79 */ "TEXT",
+	/*  76 */ "sql_default_engine",
+	/*  77 */ "TEXT",
 	/* Used by: sql_trace */
-	/*  80 */ "sql_trace",
-	/*  81 */ "INTEGER",
+	/*  78 */ "sql_trace",
+	/*  79 */ "INTEGER",
 	/* Used by: vdbe_addoptrace */
-	/*  82 */ "vdbe_addoptrace",
-	/*  83 */ "INTEGER",
+	/*  80 */ "vdbe_addoptrace",
+	/*  81 */ "INTEGER",
 	/* Used by: vdbe_debug */
-	/*  84 */ "vdbe_debug",
-	/*  85 */ "INTEGER",
+	/*  82 */ "vdbe_debug",
+	/*  83 */ "INTEGER",
 	/* Used by: vdbe_eqp */
-	/*  86 */ "vdbe_eqp",
-	/*  87 */ "INTEGER",
+	/*  84 */ "vdbe_eqp",
+	/*  85 */ "INTEGER",
 	/* Used by: vdbe_listing */
-	/*  88 */ "vdbe_listing",
-	/*  89 */ "INTEGER",
+	/*  86 */ "vdbe_listing",
+	/*  87 */ "INTEGER",
 	/* Used by: vdbe_trace */
-	/*  90 */ "vdbe_trace",
-	/*  91 */ "INTEGER",
+	/*  88 */ "vdbe_trace",
+	/*  89 */ "INTEGER",
 	/* Used by: where_trace */
-	/*  92 */ "where_trace",
-	/*  93 */ "INTEGER",
+	/*  90 */ "where_trace",
+	/*  91 */ "INTEGER",
 };
 
 /* Definitions of all built-in pragmas */
@@ -163,11 +160,6 @@ typedef struct PragmaName {
  * to be sorted. For more info see pragma_locate function.
  */
 static const PragmaName aPragmaName[] = {
-	{ /* zName:     */ "case_sensitive_like",
-	 /* ePragTyp:  */ PragTyp_FLAG,
-	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 58, 1,
-	 /* iArg:      */ LIKE_CASE_SENS_FLAG},
 	{ /* zName:     */ "collation_list",
 	 /* ePragTyp:  */ PragTyp_COLLATION_LIST,
 	 /* ePragFlg:  */ PragFlg_Result0,
@@ -210,46 +202,46 @@ static const PragmaName aPragmaName[] = {
 	{ /* zName:     */ "parser_trace",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 66, 1,
+	 /* ColNames:  */ 64, 1,
 	 /* iArg:      */ PARSER_TRACE_FLAG},
 #endif
 	{ /* zName:     */ "recursive_triggers",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 68, 1,
+	 /* ColNames:  */ 66, 1,
 	 /* iArg:      */ SQL_RecTriggers},
 	{ /* zName:     */ "reverse_unordered_selects",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 70, 1,
+	 /* ColNames:  */ 68, 1,
 	 /* iArg:      */ SQL_ReverseOrder},
 #if defined(SQL_DEBUG)
 	{ /* zName:     */ "select_trace",
 	/* ePragTyp:  */ PragTyp_FLAG,
 	/* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	/* ColNames:  */ 72, 1,
+	/* ColNames:  */ 70, 1,
 	/* iArg:      */ SQL_SelectTrace},
 #endif
 	{ /* zName:     */ "short_column_names",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 73, 1,
+	 /* ColNames:  */ 72, 1,
 	 /* iArg:      */ SQL_ShortColNames},
 	{ /* zName:     */ "sql_compound_select_limit",
 	/* ePragTyp:  */ PragTyp_COMPOUND_SELECT_LIMIT,
 	/* ePragFlg:  */ PragFlg_Result0,
-	/* ColNames:  */ 76, 1,
+	/* ColNames:  */ 74, 1,
 	/* iArg:      */ 0},
 	{ /* zName:     */ "sql_default_engine",
 	 /* ePragTyp:  */ PragTyp_DEFAULT_ENGINE,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 78, 1,
+	 /* ColNames:  */ 76, 1,
 	 /* iArg:      */ 0},
 #if defined(SQL_DEBUG)
 	{ /* zName:     */ "sql_trace",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 80, 1,
+	 /* ColNames:  */ 78, 1,
 	 /* iArg:      */ SQL_SqlTrace},
 #endif
 	{ /* zName:     */ "stats",
@@ -268,33 +260,33 @@ static const PragmaName aPragmaName[] = {
 	{ /* zName:     */ "vdbe_addoptrace",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 82, 1,
+	 /* ColNames:  */ 80, 1,
 	 /* iArg:      */ SQL_VdbeAddopTrace},
 	{ /* zName:     */ "vdbe_debug",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 84, 1,
+	 /* ColNames:  */ 82, 1,
 	 /* iArg:      */
 	 SQL_SqlTrace | SQL_VdbeListing | SQL_VdbeTrace},
 	{ /* zName:     */ "vdbe_eqp",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 86, 1,
+	 /* ColNames:  */ 84, 1,
 	 /* iArg:      */ SQL_VdbeEQP},
 	{ /* zName:     */ "vdbe_listing",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 88, 1,
+	 /* ColNames:  */ 86, 1,
 	 /* iArg:      */ SQL_VdbeListing},
 	{ /* zName:     */ "vdbe_trace",
 	 /* ePragTyp:  */ PragTyp_FLAG,
 	 /* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	 /* ColNames:  */ 90, 1,
+	 /* ColNames:  */ 88, 1,
 	 /* iArg:      */ SQL_VdbeTrace},
 	{ /* zName:     */ "where_trace",
 	/* ePragTyp:  */ PragTyp_FLAG,
 	/* ePragFlg:  */ PragFlg_Result0 | PragFlg_NoColumns1,
-	/* ColNames:  */ 92, 1,
+	/* ColNames:  */ 90, 1,
 	/* iArg:      */ SQL_WhereTrace},
 #endif
 };
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 73dc6e4d7..57da8ec5d 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -1143,8 +1143,6 @@ struct sql {
 /* Debug print info about SQL query as it parsed */
 #define PARSER_TRACE_FLAG  0x00000002
 #define SQL_FullColNames   0x00000004	/* Show full column names on SELECT */
-/* True if LIKE is case sensitive. */
-#define LIKE_CASE_SENS_FLAG 0x00000008
 #define SQL_ShortColNames  0x00000040	/* Show short columns names */
 #define SQL_CountRows      0x00000080	/* Count rows changed by INSERT, */
 					  /*   DELETE, or UPDATE and return */
@@ -1271,7 +1269,6 @@ struct FuncDestructor {
  *     SQL_FUNC_CONSTANT  ==  sql_DETERMINISTIC from the API
  */
 #define SQL_FUNC_LIKE     0x0004	/* Candidate for the LIKE optimization */
-#define SQL_FUNC_CASE     0x0008	/* Case-sensitive LIKE-type function */
 #define SQL_FUNC_EPHEM    0x0010	/* Ephemeral.  Delete with VDBE */
 #define SQL_FUNC_NEEDCOLL 0x0020	/* sqlGetFuncCollSeq() might be called.
 					 * The flag is set when the collation
@@ -4266,8 +4263,6 @@ sql_key_info_unref(struct sql_key_info *key_info);
 struct key_def *
 sql_key_info_to_key_def(struct sql_key_info *key_info);
 
-void sqlRegisterLikeFunctions(sql *, int);
-
 /**
  * Check if the function implements LIKE-style comparison & if it
  * is appropriate to apply a LIKE query optimization.
@@ -4279,7 +4274,7 @@ void sqlRegisterLikeFunctions(sql *, int);
  * @retval 1 if LIKE optimization can be used, 0 otherwise.
  */
 int
-sql_is_like_func(struct sql *db, struct Expr *expr, int *is_like_ci);
+sql_is_like_func(struct sql *db, struct Expr *expr);
 
 int sqlCreateFunc(sql *, const char *, enum field_type,
 		      int, int, void *,
diff --git a/src/box/sql/whereexpr.c b/src/box/sql/whereexpr.c
index a88f96407..0a0e900bd 100644
--- a/src/box/sql/whereexpr.c
+++ b/src/box/sql/whereexpr.c
@@ -237,14 +237,12 @@ operatorMask(int op)
  *                    pattern prefix.
  * @param pisComplete True if the only wildcard is '%' in the
  *                    last character.
- * @param pnoCase     True if case insensitive.
- *
  * @retval True if the given expr is a LIKE operator & is
  *         optimizable using inequality constraints.
  */
 static int
 like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
-			   int *pisComplete, int *pnoCase)
+			   int *pisComplete)
 {
 	/* String on RHS of LIKE operator. */
 	const char *z = 0;
@@ -264,7 +262,7 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
 	/* Result code to return. */
 	int rc;
 
-	if (!sql_is_like_func(db, pExpr, pnoCase)) {
+	if (!sql_is_like_func(db, pExpr)) {
 		return 0;
 	}
 	pList = pExpr->x.pList;
@@ -1156,7 +1154,7 @@ exprAnalyze(SrcList * pSrc,	/* the FROM clause */
 	 */
 	if (pWC->op == TK_AND &&
 	    like_optimization_is_valid(pParse, pExpr, &pStr1,
-				       &isComplete, &noCase)) {
+				       &isComplete)) {
 		Expr *pLeft;
 		/* Copy of pStr1 - RHS of LIKE operator. */
 		Expr *pStr2;
diff --git a/test/sql-tap/collation.test.lua b/test/sql-tap/collation.test.lua
index 79547361c..e1df8750b 100755
--- a/test/sql-tap/collation.test.lua
+++ b/test/sql-tap/collation.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(174)
+test:plan(172)
 
 local prefix = "collation-"
 
@@ -483,7 +483,6 @@ local like_testcases =
     {"2.0",
     [[
         CREATE TABLE tx1 (s1 VARCHAR(5) PRIMARY KEY);
-        CREATE INDEX I1 on tx1(s1 collate "unicode_ci");
         INSERT INTO tx1 VALUES('aaa');
         INSERT INTO tx1 VALUES('Aab');
         INSERT INTO tx1 VALUES('İac');
@@ -491,35 +490,29 @@ local like_testcases =
     ]], {0}},
     {"2.1.1",
         "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
-        {0, {"Aab", "aaa"}} },
+        {0, {"Aab"}} },
     {"2.1.2",
         "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
-        {0, {0, 0, 0, "SEARCH TABLE TX1 USING COVERING INDEX I1 (S1>? AND S1<?) (~16384 rows)"}}},
+        {0, {0, 0, 0, "SEARCH TABLE TX1 USING PRIMARY KEY (S1>? AND S1<?) (~16384 rows)"}}},
     {"2.2.0",
-        "PRAGMA case_sensitive_like = true;",
-        {0}},
-    {"2.2.1",
         "SELECT * FROM tx1 WHERE s1 LIKE 'A%' order by s1;",
         {0, {"Aab"}} },
-    {"2.2.2",
+    {"2.2.1",
         "EXPLAIN QUERY PLAN SELECT * FROM tx1 WHERE s1 LIKE 'A%';",
         {0, {0, 0, 0, "/USING PRIMARY KEY/"}} },
     {"2.3.0",
-        "PRAGMA case_sensitive_like = false;",
-        {0}},
-    {"2.3.1",
         "SELECT * FROM tx1 WHERE s1 LIKE 'i%' order by s1;",
-        {0, {"iad", "İac"}}},
-    {"2.3.2",
+        {0, {"iad"}}},
+    {"2.3.1",
         "SELECT * FROM tx1 WHERE s1 LIKE 'İ%'order by s1;",
-        {0, {"iad", "İac"}} },
+        {0, {"İac"}} },
     {"2.4.0",
     [[
         INSERT INTO tx1 VALUES('ЯЁЮ');
     ]], {0} },
     {"2.4.1",
         "SELECT * FROM tx1 WHERE s1 LIKE 'яёю';",
-        {0, {"ЯЁЮ"}} },
+        {0, {}} },
 }
 
 test:do_catchsql_set_test(like_testcases, prefix)
diff --git a/test/sql-tap/like3.test.lua b/test/sql-tap/like3.test.lua
index 0eee25129..b5f52be52 100755
--- a/test/sql-tap/like3.test.lua
+++ b/test/sql-tap/like3.test.lua
@@ -76,7 +76,7 @@ test:do_execsql_test(
         SELECT a, b FROM t2 WHERE b LIKE 'ab%' ORDER BY +a;
     ]], {
         -- <like3-2.0>
-        1, "abc", 2, "ABX", 4, "abc", 5, "ABX"
+        1, "abc", 4, "abc"
         -- </like3-2.0>
     })
 test:do_execsql_test(
diff --git a/test/sql-tap/pragma.test.lua b/test/sql-tap/pragma.test.lua
index 8221d36e6..a89353aea 100755
--- a/test/sql-tap/pragma.test.lua
+++ b/test/sql-tap/pragma.test.lua
@@ -1,7 +1,7 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
 
-test:plan(24)
+test:plan(23)
 
 test:do_catchsql_test(
 	"pragma-1.3",
@@ -81,22 +81,6 @@ test:do_execsql_test(
 	-- </pragma-3.2>
 })
 
--- Check that "PRAGMA case_sensitive_like" returns its status
--- (0 or 1) if called without parameter.
-test:do_test(
-	"pragma-3.3",
-	function()
-		old_value = box.execute('PRAGMA case_sensitive_like').rows
-		box.execute('PRAGMA case_sensitive_like = 1')
-		new_value = box.execute('PRAGMA case_sensitive_like').rows
-		box.execute('PRAGMA case_sensitive_like = '.. old_value[1][1])
-		return new_value[1][1]
-	end,
-	-- <pragma-3.3>
-	1
-	-- </pragma-3.3>
-	)
-
 --
 -- gh-3733: remove useless or obsolete pragmas
 --
diff --git a/test/sql/sql-debug.result b/test/sql/sql-debug.result
index 32c65cc81..2dba6844a 100644
--- a/test/sql/sql-debug.result
+++ b/test/sql/sql-debug.result
@@ -38,7 +38,6 @@ box.execute('PRAGMA')
   - name: pragma_value
     type: INTEGER
   rows:
-  - ['case_sensitive_like', 0]
   - ['count_changes', 0]
   - ['defer_foreign_keys', 0]
   - ['full_column_names', 0]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH 2/2] sql: make LIKE predicate dependent on collation
  2019-07-24 21:02     ` Roman Khabibov
@ 2019-08-01 10:27       ` n.pettik
  0 siblings, 0 replies; 9+ messages in thread
From: n.pettik @ 2019-08-01 10:27 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Roman Khabibov, Kirill Yukhin


>>> * @param pisComplete True if the only wildcard is '%' in the
>>> *                    last character.
>>> * @retval True if the given expr is a LIKE operator & is
>>> @@ -276,9 +274,20 @@ like_optimization_is_valid(Parse *pParse, Expr *pExpr, Expr **ppPrefix,
>>> 		 */
>>> 		return 0;
>>> 	}
>>> +
>>> +	/* Only for "binary" and "unicode_ci" collations. */
>> 
>> Why only two collations? Why not “unicode” for instance?
>> What if collation is a part of expression, not field:
>> … a COLLATE “unicode” LIKE … ?
> For instance, we consider the following query '... WHERE x LIKE "A%" COLLATE "unicode"' (strength - tertiary). The optimization will be: "A" <= x < "B". Let's take x == "aaa". Comparison of "aaa" and "A" gives us '1'. Comparison of  "aaa" and "B" gives us '-1'. In other words, "A" <= "aaa" < "B", but it is a bad result, because "aaa" is not LIKE "A%" COLLATE "unicode”.

Ok, but occasionally test suite didn’t include test case
checking this situation. I’ve added it alongside with
clarifying comments and minor fixes and put on
the branch: np/sql-like-collation

https://github.com/tarantool/tarantool/tree/np/sql-like-collation

With these fixes LGTM.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH 0/2] Make LIKE predicate dependent on collation
  2019-07-13 22:51 [tarantool-patches] [PATCH 0/2] Make LIKE predicate dependent on collation Roman Khabibov
  2019-07-13 22:51 ` [tarantool-patches] [PATCH 1/2] sql: remove "PRAGMA case_sensitive_like" Roman Khabibov
  2019-07-13 22:51 ` [tarantool-patches] [PATCH 2/2] sql: make LIKE predicate dependent on collation Roman Khabibov
@ 2019-08-01 12:26 ` Kirill Yukhin
  2 siblings, 0 replies; 9+ messages in thread
From: Kirill Yukhin @ 2019-08-01 12:26 UTC (permalink / raw)
  To: tarantool-patches; +Cc: korablev

Hello,

On 14 Jul 01:51, Roman Khabibov wrote:
> The first patch removes "PRAGMA case_sensitive_like". The second patch adds
> collation comparison to the pattern match function and makes changes to
> WHERE x LIKE 'aBc%' optimization.
> 
> Roman Khabibov (2):
>   sql: remove "PRAGMA case_sensitive_like"
>   sql: make LIKE predicate dependent on collation

I've checked the patch set into master.

--
Regards, Kirill Yukhin

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-08-01 12:26 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-13 22:51 [tarantool-patches] [PATCH 0/2] Make LIKE predicate dependent on collation Roman Khabibov
2019-07-13 22:51 ` [tarantool-patches] [PATCH 1/2] sql: remove "PRAGMA case_sensitive_like" Roman Khabibov
2019-07-17 16:18   ` [tarantool-patches] " n.pettik
2019-07-24 21:02     ` Roman Khabibov
2019-07-13 22:51 ` [tarantool-patches] [PATCH 2/2] sql: make LIKE predicate dependent on collation Roman Khabibov
2019-07-17 16:19   ` [tarantool-patches] " n.pettik
2019-07-24 21:02     ` Roman Khabibov
2019-08-01 10:27       ` n.pettik
2019-08-01 12:26 ` [tarantool-patches] Re: [PATCH 0/2] Make " Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox