Tarantool development patches archive
 help / color / mirror / Atom feed
* [tarantool-patches] [PATCH] sql: replace instr() with position()
@ 2019-03-10  7:41 Ivan Koptelov
  2019-03-12 13:31 ` [tarantool-patches] " n.pettik
  0 siblings, 1 reply; 4+ messages in thread
From: Ivan Koptelov @ 2019-03-10  7:41 UTC (permalink / raw)
  To: tarantool-patches; +Cc: korablev, Ivan Koptelov

Before this patch we had instr() SQL function.
After the patch it is renamed to position().
Also a few things have been changed: arguments
order, allowed arguments types and usage of
collations.

The order of the arguments has changed:
Before: instr(haystack, needle).
After: position(needle, haystack).

Type checking became more strict: before it was
possible to call the function with INTEGER arguments
or with arguments of different types. Now both arguments
must have the same type and be either text or binary
strings.

Before the patch collations were not taken into
consideration during the search. Now it is fixed, and
both implicit (column) collations and explicit
(using COLLATE expression) are used. There are
several possible cases:

- One of collations is mentioned alongside with explicit
COLLATE clause, which forces this collation over another
one. It is allowed to have the same forced collations;
- Both collations are derived from table columns and they
are the same;
- One collation is derived from table column and another
one is not specified (i.e. COLL_NONE);

In all other cases they are not accounted to be compatible
and error is raised.

Related to #3933

@TarantoolBot document
Title: instr() is replaced with position()
Changes are described in the associated commit
message.
---
Branch https://github.com/tarantool/tarantool/tree/sudobobo/gh-3933-add-position-func
Issue https://github.com/tarantool/tarantool/issues/3933

 src/box/sql/expr.c             |  34 +-
 src/box/sql/func.c             | 194 +++++--
 src/box/sql/sqlInt.h           |   7 +
 test/sql-tap/func.test.lua     |  10 +-
 test/sql-tap/func5.test.lua    |   4 +-
 test/sql-tap/instr.test.lua    | 706 -------------------------
 test/sql-tap/position.test.lua | 905 +++++++++++++++++++++++++++++++++
 test/sql-tap/with1.test.lua    |   4 +-
 8 files changed, 1109 insertions(+), 755 deletions(-)
 delete mode 100755 test/sql-tap/instr.test.lua
 create mode 100755 test/sql-tap/position.test.lua

diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index a75f23756..59b43ec41 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -4054,7 +4054,8 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
 					constMask |= MASKBIT32(i);
 				}
 				if ((pDef->funcFlags & SQL_FUNC_NEEDCOLL) !=
-				    0 && coll == NULL) {
+				    0 && coll == NULL &&
+				    !(pDef->funcFlags & SQL_FUNC_ARG_COLL)) {
 					bool unused;
 					uint32_t id;
 					if (sql_expr_coll(pParse,
@@ -4064,6 +4065,37 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
 						return 0;
 				}
 			}
+			if (pDef->funcFlags & SQL_FUNC_ARG_COLL) {
+				assert(nFarg == 2);
+				struct coll *colls[2] = {NULL};
+				uint32_t colls_ids[2] = {0};
+				bool is_forced[2] = {false};
+				if (sql_expr_coll(pParse, pFarg->a[0].pExpr,
+						  &is_forced[0], &colls_ids[0],
+						  &colls[0]) != 0)
+					return 0;
+				if (sql_expr_coll(pParse, pFarg->a[1].pExpr,
+						  &is_forced[1], &colls_ids[1],
+						  &colls[1]) != 0)
+					return 0;
+
+				uint32_t coll_id;
+				if (collations_check_compatibility(colls_ids[0],
+								   is_forced[0],
+								   colls_ids[1],
+								   is_forced[1],
+								   &coll_id)
+								   != 0) {
+					pParse->rc = SQL_TARANTOOL_ERROR;
+					pParse->nErr++;
+					return 0;
+				}
+
+				coll = (coll_id == colls_ids[0]) ? colls[0] :
+								   colls[1];
+				if (coll == NULL)
+					coll = coll_by_id(COLL_NONE)->coll;
+			}
 			if (pFarg) {
 				if (constMask) {
 					r1 = pParse->nMem + 1;
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 2de6ef5ce..f44b7ce78 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -38,6 +38,7 @@
 #include "vdbeInt.h"
 #include "version.h"
 #include "coll/coll.h"
+#include "tarantoolInt.h"
 #include <unicode/ustring.h>
 #include <unicode/ucasemap.h>
 #include <unicode/ucnv.h>
@@ -212,59 +213,173 @@ absFunc(sql_context * context, int argc, sql_value ** argv)
 }
 
 /*
- * Implementation of the instr() function.
+ * Returns name of SQL type, which is given by the int code.
  *
- * instr(haystack,needle) finds the first occurrence of needle
+ * @param sql_type One of enum sql_type codes.
+ * @retval string representing name of the given type.
+ */
+static inline char *
+sql_value_type_to_str(enum sql_type sql_type) {
+	switch(sql_type) {
+		case SQL_INTEGER:
+			return "INTEGER";
+		case SQL_TEXT:
+			return "TEXT";
+		case SQL_FLOAT:
+			return "REAL";
+		case SQL_BLOB:
+			return "BLOB";
+		default:
+			return "NULL";
+	}
+}
+
+/*
+ * Implementation of the position() function.
+ *
+ * position(haystack,needle) finds the first occurrence of needle
  * in haystack and returns the number of previous characters plus 1,
  * or 0 if needle does not occur within haystack.
  *
- * If both haystack and needle are BLOBs, then the result is one more than
- * the number of bytes in haystack prior to the first occurrence of needle,
- * or 0 if needle never occurs in haystack.
+ * Haystack and needle must both have the same type, either
+ * TEXT or BLOB.
+ *
+ * If haystack and needle are BLOBs, then the result is one more
+ * than the number of bytes in haystack prior to the first
+ * occurrence of needle, or 0 if needle never occurs in haystack.
+ *
+ * If haystack and needle are TEXTs, then their collations (if
+ * any) are taken into consideration. If only one param has a
+ * collation, then it is used. If both params have collations,
+ * then the right one is chosen by
+ * box/sql/sqlInt.h/collations_check_compatibility() function
+ * (If collations are not compatible then the error is raised).
+ *
+ * Note that the "collation-selection" logic is implemented in
+ * box/sql/expr.c/sqlExprCodeTarget() function.
  */
 static void
-instrFunc(sql_context * context, int argc, sql_value ** argv)
+positionFunc(sql_context *context, int argc, sql_value **argv)
 {
-	const unsigned char *zHaystack;
-	const unsigned char *zNeedle;
-	int nHaystack;
-	int nNeedle;
-	int typeHaystack, typeNeedle;
+	const unsigned char *haystack;
+	const unsigned char *needle;
+	int n_haystack_bytes;
+	int n_needle_bytes;
+	int type_haystack, type_needle;
 	int N = 1;
-	int isText;
 
 	UNUSED_PARAMETER(argc);
-	typeHaystack = sql_value_type(argv[0]);
-	typeNeedle = sql_value_type(argv[1]);
-	if (typeHaystack == SQL_NULL || typeNeedle == SQL_NULL)
+	type_haystack = sql_value_type(argv[1]);
+	type_needle = sql_value_type(argv[0]);
+	if (type_haystack == SQL_NULL || type_needle == SQL_NULL)
+		return;
+	/*
+ 	 * Position function can be called only with string
+ 	 * or blob params.
+ 	 */
+	int inconsistent_type = 0;
+	if (type_needle != SQL_TEXT && type_needle != SQL_BLOB)
+		inconsistent_type = type_needle;
+	if (type_haystack != SQL_TEXT && type_haystack != SQL_BLOB)
+		inconsistent_type = type_haystack;
+	if (inconsistent_type > 0) {
+		diag_set(ClientError, ER_INCONSISTENT_TYPES, "TEXT or BLOB",
+			 sql_value_type_to_str(inconsistent_type));
+		context->pVdbe->pParse->rc = SQL_TARANTOOL_ERROR;
+		context->pVdbe->pParse->nErr++;
+		sql_result_error(context, tarantoolErrorMessage(), -1);
+		return;
+	}
+	/*
+	 * Both params of Position function must be of the same
+	 * type.
+	 */
+	if (type_haystack != type_needle) {
+		diag_set(ClientError, ER_INCONSISTENT_TYPES,
+			 sql_value_type_to_str(type_needle),
+			 sql_value_type_to_str(type_haystack));
+		context->pVdbe->pParse->rc = SQL_TARANTOOL_ERROR;
+		context->pVdbe->pParse->nErr++;
+		sql_result_error(context, tarantoolErrorMessage(), -1);
 		return;
-	nHaystack = sql_value_bytes(argv[0]);
-	nNeedle = sql_value_bytes(argv[1]);
-	if (nNeedle > 0) {
-		if (typeHaystack == SQL_BLOB && typeNeedle == SQL_BLOB) {
-			zHaystack = sql_value_blob(argv[0]);
-			zNeedle = sql_value_blob(argv[1]);
-			assert(zNeedle != 0);
-			assert(zHaystack != 0 || nHaystack == 0);
-			isText = 0;
+	}
+	n_haystack_bytes = sql_value_bytes(argv[1]);
+	n_needle_bytes = sql_value_bytes(argv[0]);
+	if (n_needle_bytes > 0) {
+		if (type_haystack == SQL_BLOB) {
+			haystack = sql_value_blob(argv[1]);
+			needle = sql_value_blob(argv[0]);
+			assert(needle != 0);
+			assert(haystack != 0 || n_haystack_bytes == 0);
+
+			while (n_needle_bytes <= n_haystack_bytes
+			       && memcmp(haystack, needle, n_needle_bytes) != 0) {
+				N++;
+				n_haystack_bytes--;
+				haystack++;
+			}
+			if (n_needle_bytes > n_haystack_bytes)
+				N = 0;
 		} else {
-			zHaystack = sql_value_text(argv[0]);
-			zNeedle = sql_value_text(argv[1]);
-			isText = 1;
-			if (zHaystack == 0 || zNeedle == 0)
+			/*
+			 * Code below handles not only simple
+			 * cases like position('a', 'bca'), but
+			 * also more complex ones:
+			 * position('a', 'bcá' COLLATE "unicode_ci")
+			 * To do so, we need to use comparison
+			 * window, which has constant character
+			 * size, but variable byte size.
+			 * Character size is equal to
+			 * needle char size.
+			 */
+			haystack = sql_value_text(argv[1]);
+			needle = sql_value_text(argv[0]);
+			if (haystack == 0 || needle == 0)
 				return;
-		}
-		while (nNeedle <= nHaystack
-		       && memcmp(zHaystack, zNeedle, nNeedle) != 0) {
-			N++;
-			do {
-				nHaystack--;
-				zHaystack++;
-			} while (isText && (zHaystack[0] & 0xc0) == 0x80);
-		}
-		if (nNeedle > nHaystack)
+			struct coll *coll = sqlGetFuncCollSeq(context);
+
+ 			int n_needle_chars =
+ 				sql_utf8_char_count(needle, n_needle_bytes);
+			int n_haystack_chars =
+				sql_utf8_char_count(haystack, n_haystack_bytes);
+
+			if (n_haystack_chars < n_needle_chars) {
+				N = 0;
+				goto finish;
+			}
+			/*
+			 * Comparison window is determined by
+			 * beg_offset and end_offset. beg_offset
+			 * is offset in bytes from haystack
+			 * beginning to window beginning.
+			 * end_offset is offset in bytes from
+			 * haystack beginning to window end.
+			 */
+			int end_offset = 0;
+			for (int c = 0; c < n_needle_chars; c++) {
+				SQL_UTF8_FWD_1(haystack, end_offset,
+					       n_haystack_bytes);
+			}
+			int beg_offset = 0;
+			int c;
+			for (c = 0; c + n_needle_chars <= n_haystack_chars; c++) {
+				if (coll->cmp((const char *) haystack + beg_offset,
+					      end_offset - beg_offset,
+					      (const char *) needle,
+					      n_needle_bytes, coll) == 0)
+					goto finish;
+				N++;
+				/* Update offsets. */
+				SQL_UTF8_FWD_1(haystack, beg_offset,
+					       n_haystack_bytes);
+				SQL_UTF8_FWD_1(haystack, end_offset,
+					       n_haystack_bytes);
+			}
+			/* Needle was not found in the haystack. */
 			N = 0;
+		}
 	}
+finish:
 	sql_result_int(context, N);
 }
 
@@ -1760,7 +1875,8 @@ sqlRegisterBuiltinFunctions(void)
 			  FIELD_TYPE_STRING),
 		FUNCTION2(length, 1, 0, 0, lengthFunc, SQL_FUNC_LENGTH,
 			  FIELD_TYPE_INTEGER),
-		FUNCTION(instr, 2, 0, 0, instrFunc, FIELD_TYPE_INTEGER),
+		FUNCTION2(position, 2, 0, 1, positionFunc, SQL_FUNC_ARG_COLL,
+			  FIELD_TYPE_INTEGER),
 		FUNCTION(printf, -1, 0, 0, printfFunc, FIELD_TYPE_STRING),
 		FUNCTION(unicode, 1, 0, 0, unicodeFunc, FIELD_TYPE_STRING),
 		FUNCTION(char, -1, 0, 0, charFunc, FIELD_TYPE_STRING),
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 1d8fae5b0..4d059dd7e 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -1666,6 +1666,13 @@ struct FuncDestructor {
 #define SQL_FUNC_SLOCHNG  0x2000	/* "Slow Change". Value constant during a
 					 * single query - might change over time
 					 */
+#define SQL_FUNC_ARG_COLL 0x4000	/* The flag is set when the collation
+					 * of function arguments should be
+					 * determined, using rules in
+					 * collations_check_compatibility()
+					 * function. Applicable only if the
+					 * function has two arguments.
+					 */
 
 /*
  * The following three macros, FUNCTION(), LIKEFUNC() and AGGREGATE() are
diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
index 889fc5867..ddfd79163 100755
--- a/test/sql-tap/func.test.lua
+++ b/test/sql-tap/func.test.lua
@@ -2860,25 +2860,25 @@ test:do_execsql_test(
      SELECT GROUP_CONCAT(b, '') FROM t100;",
     {string.char(00,65,00,65,00)})
 
--- INSTR
+-- POSITION
 test:do_execsql_test(
     "func-73",
-    "SELECT INSTR(CHAR(00,65,00,66,00), CHAR(65));",
+    "SELECT POSITION(CHAR(65), CHAR(00,65,00,66,00));",
     {2})
 
 test:do_execsql_test(
     "func-74",
-    "SELECT INSTR(CHAR(00,65,00,66,00), CHAR(66));",
+    "SELECT POSITION(CHAR(66), CHAR(00,65,00,66,00));",
     {4})
 
 test:do_execsql_test(
     "func-75",
-    "SELECT INSTR(CHAR(00,65,00,66,00), CHAR(00));",
+    "SELECT POSITION(CHAR(00), CHAR(00,65,00,66,00));",
     {1})
 
 test:do_execsql_test(
     "func-76",
-    "SELECT INSTR(CHAR(65,66), CHAR(00));",
+    "SELECT POSITION(CHAR(00), CHAR(65,66));",
     {0})
 
 test:finish_test()
diff --git a/test/sql-tap/func5.test.lua b/test/sql-tap/func5.test.lua
index bb9e11a04..6605a2ba1 100755
--- a/test/sql-tap/func5.test.lua
+++ b/test/sql-tap/func5.test.lua
@@ -30,7 +30,7 @@ test:do_execsql_test(
         INSERT INTO t1 VALUES(3,'pqr','fuzzy',99);
         INSERT INTO t1 VALUES(4,'abcdefg','xy',22);
         INSERT INTO t1 VALUES(5,'shoe','mayer',2953);
-        SELECT x FROM t1 WHERE c=instr('abcdefg',b) OR a='abcdefg' ORDER BY +x;       
+        SELECT x FROM t1 WHERE c=position(b, 'abcdefg') OR a='abcdefg' ORDER BY +x;
     ]], {
         -- <func5-1.1>
         2, 4
@@ -40,7 +40,7 @@ test:do_execsql_test(
 test:do_execsql_test(
     "func5-1.2",
     [[
-        SELECT x FROM t1 WHERE a='abcdefg' OR c=instr('abcdefg',b) ORDER BY +x; 
+        SELECT x FROM t1 WHERE a='abcdefg' OR c=position(b, 'abcdefg') ORDER BY +x;
     ]], {
         -- <func5-1.1>
         2, 4
diff --git a/test/sql-tap/instr.test.lua b/test/sql-tap/instr.test.lua
deleted file mode 100755
index d42da5ccd..000000000
--- a/test/sql-tap/instr.test.lua
+++ /dev/null
@@ -1,706 +0,0 @@
-#!/usr/bin/env tarantool
-test = require("sqltester")
-test:plan(65)
-
---!./tcltestrunner.lua
--- 2012 October 24
---
--- The author disclaims copyright to this source code.  In place of
--- a legal notice, here is a blessing:
---
---    May you do good and not evil.
---    May you find forgiveness for yourself and forgive others.
---    May you share freely, never taking more than you give.
---
--------------------------------------------------------------------------
--- This file implements regression tests for sql library.  The
--- focus of this file is testing the built-in INSTR() functions.
---
--- EVIDENCE-OF: R-27549-59611 The instr(X,Y) function finds the first
--- occurrence of string Y within string X and returns the number of prior
--- characters plus 1, or 0 if Y is nowhere found within X.
---
--- ["set","testdir",[["file","dirname",["argv0"]]]]
--- ["source",[["testdir"],"\/tester.tcl"]]
--- Create a table to work with.
---
-test:do_test(
-    "instr-1.1",
-    function()
-        return test:execsql "SELECT instr('abcdefg','a');"
-    end, {
-        -- <instr-1.1>
-        1
-        -- </instr-1.1>
-    })
-
-test:do_test(
-    "instr-1.2",
-    function()
-        return test:execsql "SELECT instr('abcdefg','b');"
-    end, {
-        -- <instr-1.2>
-        2
-        -- </instr-1.2>
-    })
-
-test:do_test(
-    "instr-1.3",
-    function()
-        return test:execsql "SELECT instr('abcdefg','c');"
-    end, {
-        -- <instr-1.3>
-        3
-        -- </instr-1.3>
-    })
-
-test:do_test(
-    "instr-1.4",
-    function()
-        return test:execsql "SELECT instr('abcdefg','d');"
-    end, {
-        -- <instr-1.4>
-        4
-        -- </instr-1.4>
-    })
-
-test:do_test(
-    "instr-1.5",
-    function()
-        return test:execsql "SELECT instr('abcdefg','e');"
-    end, {
-        -- <instr-1.5>
-        5
-        -- </instr-1.5>
-    })
-
-test:do_test(
-    "instr-1.6",
-    function()
-        return test:execsql "SELECT instr('abcdefg','f');"
-    end, {
-        -- <instr-1.6>
-        6
-        -- </instr-1.6>
-    })
-
-test:do_test(
-    "instr-1.7",
-    function()
-        return test:execsql "SELECT instr('abcdefg','g');"
-    end, {
-        -- <instr-1.7>
-        7
-        -- </instr-1.7>
-    })
-
-test:do_test(
-    "instr-1.8",
-    function()
-        return test:execsql "SELECT instr('abcdefg','h');"
-    end, {
-        -- <instr-1.8>
-        0
-        -- </instr-1.8>
-    })
-
-test:do_test(
-    "instr-1.9",
-    function()
-        return test:execsql "SELECT instr('abcdefg','abcdefg');"
-    end, {
-        -- <instr-1.9>
-        1
-        -- </instr-1.9>
-    })
-
-test:do_test(
-    "instr-1.10",
-    function()
-        return test:execsql "SELECT instr('abcdefg','abcdefgh');"
-    end, {
-        -- <instr-1.10>
-        0
-        -- </instr-1.10>
-    })
-
-test:do_test(
-    "instr-1.11",
-    function()
-        return test:execsql "SELECT instr('abcdefg','bcdefg');"
-    end, {
-        -- <instr-1.11>
-        2
-        -- </instr-1.11>
-    })
-
-test:do_test(
-    "instr-1.12",
-    function()
-        return test:execsql "SELECT instr('abcdefg','bcdefgh');"
-    end, {
-        -- <instr-1.12>
-        0
-        -- </instr-1.12>
-    })
-
-test:do_test(
-    "instr-1.13",
-    function()
-        return test:execsql "SELECT instr('abcdefg','cdefg');"
-    end, {
-        -- <instr-1.13>
-        3
-        -- </instr-1.13>
-    })
-
-test:do_test(
-    "instr-1.14",
-    function()
-        return test:execsql "SELECT instr('abcdefg','cdefgh');"
-    end, {
-        -- <instr-1.14>
-        0
-        -- </instr-1.14>
-    })
-
-test:do_test(
-    "instr-1.15",
-    function()
-        return test:execsql "SELECT instr('abcdefg','defg');"
-    end, {
-        -- <instr-1.15>
-        4
-        -- </instr-1.15>
-    })
-
-test:do_test(
-    "instr-1.16",
-    function()
-        return test:execsql "SELECT instr('abcdefg','defgh');"
-    end, {
-        -- <instr-1.16>
-        0
-        -- </instr-1.16>
-    })
-
-test:do_test(
-    "instr-1.17",
-    function()
-        return test:execsql "SELECT instr('abcdefg','efg');"
-    end, {
-        -- <instr-1.17>
-        5
-        -- </instr-1.17>
-    })
-
-test:do_test(
-    "instr-1.18",
-    function()
-        return test:execsql "SELECT instr('abcdefg','efgh');"
-    end, {
-        -- <instr-1.18>
-        0
-        -- </instr-1.18>
-    })
-
-test:do_test(
-    "instr-1.19",
-    function()
-        return test:execsql "SELECT instr('abcdefg','fg');"
-    end, {
-        -- <instr-1.19>
-        6
-        -- </instr-1.19>
-    })
-
-test:do_test(
-    "instr-1.20",
-    function()
-        return test:execsql "SELECT instr('abcdefg','fgh');"
-    end, {
-        -- <instr-1.20>
-        0
-        -- </instr-1.20>
-    })
-
-test:do_test(
-    "instr-1.21",
-    function()
-        return test:execsql "SELECT coalesce(instr('abcdefg',NULL),'nil');"
-    end, {
-        -- <instr-1.21>
-        "nil"
-        -- </instr-1.21>
-    })
-
-test:do_test(
-    "instr-1.22",
-    function()
-        return test:execsql "SELECT coalesce(instr(NULL,'x'),'nil');"
-    end, {
-        -- <instr-1.22>
-        "nil"
-        -- </instr-1.22>
-    })
-
-test:do_test(
-    "instr-1.23",
-    function()
-        return test:execsql "SELECT instr(12345,34);"
-    end, {
-        -- <instr-1.23>
-        3
-        -- </instr-1.23>
-    })
-
-test:do_test(
-    "instr-1.24",
-    function()
-        return test:execsql "SELECT instr(123456.78,34);"
-    end, {
-        -- <instr-1.24>
-        3
-        -- </instr-1.24>
-    })
-
-test:do_test(
-    "instr-1.25",
-    function()
-        return test:execsql "SELECT instr(123456.78,x'3334');"
-    end, {
-        -- <instr-1.25>
-        3
-        -- </instr-1.25>
-    })
-
-test:do_test(
-    "instr-1.26",
-    function()
-        return test:execsql "SELECT instr('äbcdefg','efg');"
-    end, {
-        -- <instr-1.26>
-        5
-        -- </instr-1.26>
-    })
-
-test:do_test(
-    "instr-1.27",
-    function()
-        return test:execsql "SELECT instr('€xyzzy','xyz');"
-    end, {
-        -- <instr-1.27>
-        2
-        -- </instr-1.27>
-    })
-
-test:do_test(
-    "instr-1.28",
-    function()
-        return test:execsql "SELECT instr('abc€xyzzy','xyz');"
-    end, {
-        -- <instr-1.28>
-        5
-        -- </instr-1.28>
-    })
-
-test:do_test(
-    "instr-1.29",
-    function()
-        return test:execsql "SELECT instr('abc€xyzzy','€xyz');"
-    end, {
-        -- <instr-1.29>
-        4
-        -- </instr-1.29>
-    })
-
-test:do_test(
-    "instr-1.30",
-    function()
-        return test:execsql "SELECT instr('abc€xyzzy','c€xyz');"
-    end, {
-        -- <instr-1.30>
-        3
-        -- </instr-1.30>
-    })
-
-test:do_test(
-    "instr-1.31",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'01');"
-    end, {
-        -- <instr-1.31>
-        1
-        -- </instr-1.31>
-    })
-
-test:do_test(
-    "instr-1.32",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'02');"
-    end, {
-        -- <instr-1.32>
-        2
-        -- </instr-1.32>
-    })
-
-test:do_test(
-    "instr-1.33",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'03');"
-    end, {
-        -- <instr-1.33>
-        3
-        -- </instr-1.33>
-    })
-
-test:do_test(
-    "instr-1.34",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'04');"
-    end, {
-        -- <instr-1.34>
-        4
-        -- </instr-1.34>
-    })
-
-test:do_test(
-    "instr-1.35",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'05');"
-    end, {
-        -- <instr-1.35>
-        5
-        -- </instr-1.35>
-    })
-
-test:do_test(
-    "instr-1.36",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'06');"
-    end, {
-        -- <instr-1.36>
-        0
-        -- </instr-1.36>
-    })
-
-test:do_test(
-    "instr-1.37",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'0102030405');"
-    end, {
-        -- <instr-1.37>
-        1
-        -- </instr-1.37>
-    })
-
-test:do_test(
-    "instr-1.38",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'02030405');"
-    end, {
-        -- <instr-1.38>
-        2
-        -- </instr-1.38>
-    })
-
-test:do_test(
-    "instr-1.39",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'030405');"
-    end, {
-        -- <instr-1.39>
-        3
-        -- </instr-1.39>
-    })
-
-test:do_test(
-    "instr-1.40",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'0405');"
-    end, {
-        -- <instr-1.40>
-        4
-        -- </instr-1.40>
-    })
-
-test:do_test(
-    "instr-1.41",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'0506');"
-    end, {
-        -- <instr-1.41>
-        0
-        -- </instr-1.41>
-    })
-
-test:do_test(
-    "instr-1.42",
-    function()
-        return test:execsql "SELECT instr(x'0102030405',x'');"
-    end, {
-        -- <instr-1.42>
-        1
-        -- </instr-1.42>
-    })
-
-test:do_test(
-    "instr-1.43",
-    function()
-        return test:execsql "SELECT instr(x'',x'');"
-    end, {
-        -- <instr-1.43>
-        1
-        -- </instr-1.43>
-    })
-
-test:do_test(
-    "instr-1.44",
-    function()
-        return test:execsql "SELECT instr('','');"
-    end, {
-        -- <instr-1.44>
-        1
-        -- </instr-1.44>
-    })
-
-test:do_test(
-    "instr-1.45",
-    function()
-        return test:execsql "SELECT instr('abcdefg','');"
-    end, {
-        -- <instr-1.45>
-        1
-        -- </instr-1.45>
-    })
-
--- ["unset","-nocomplain","longstr"]
-local longstr = "abcdefghijklmonpqrstuvwxyz"
-longstr = longstr .. longstr
-longstr = longstr .. longstr
-longstr = longstr .. longstr
-longstr = longstr .. longstr
-longstr = longstr .. longstr
-longstr = longstr .. longstr
-longstr = longstr .. longstr
-longstr = longstr .. longstr
-longstr = longstr .. longstr
-longstr = longstr .. longstr
-longstr = longstr .. longstr
-longstr = longstr .. longstr
--- puts [string length '"..longstr.."']
-longstr = longstr .. "Xabcde"
-test:do_test(
-    "instr-1.46",
-    function()
-        return test:execsql("SELECT instr('"..longstr.."','X');")
-    end, {
-        -- <instr-1.46>
-        106497
-        -- </instr-1.46>
-    })
-
-test:do_test(
-    "instr-1.47",
-    function()
-        return test:execsql("SELECT instr('"..longstr.."','Y');")
-    end, {
-        -- <instr-1.47>
-        0
-        -- </instr-1.47>
-    })
-
-test:do_test(
-    "instr-1.48",
-    function()
-        return test:execsql( "SELECT instr('"..longstr.."','Xa');")
-    end, {
-        -- <instr-1.48>
-        106497
-        -- </instr-1.48>
-    })
-
-test:do_test(
-    "instr-1.49",
-    function()
-        return test:execsql("SELECT instr('"..longstr.."','zXa');")
-    end, {
-        -- <instr-1.49>
-        106496
-        -- </instr-1.49>
-    })
-
-longstr = string.gsub(longstr, "a", "ä")
-test:do_test(
-    "instr-1.50",
-    function()
-        return test:execsql("SELECT instr('"..longstr.."','X');")
-    end, {
-        -- <instr-1.50>
-        106497
-        -- </instr-1.50>
-    })
-
-test:do_test(
-    "instr-1.51",
-    function()
-        return test:execsql("SELECT instr('"..longstr.."','Y');")
-    end, {
-        -- <instr-1.51>
-        0
-        -- </instr-1.51>
-    })
-
-test:do_test(
-    "instr-1.52",
-    function()
-        return test:execsql("SELECT instr('"..longstr.."','Xä');")
-    end, {
-        -- <instr-1.52>
-        106497
-        -- </instr-1.52>
-    })
-
-test:do_test(
-    "instr-1.53",
-    function()
-        return test:execsql("SELECT instr('"..longstr.."','zXä');")
-    end, {
-        -- <instr-1.53>
-        106496
-        -- </instr-1.53>
-    })
-
-test:do_test(
-    "instr-1.54",
-    function()
-        return test:execsql("SELECT instr(x'78c3a4e282ac79','x');")
-    end, {
-        -- <instr-1.54>
-        1
-        -- </instr-1.54>
-    })
-
-test:do_test(
-    "instr-1.55",
-    function()
-        return test:execsql "SELECT instr(x'78c3a4e282ac79','y');"
-    end, {
-        -- <instr-1.55>
-        4
-        -- </instr-1.55>
-    })
-
--- EVIDENCE-OF: R-46421-32541 Or, if X and Y are both BLOBs, then
--- instr(X,Y) returns one more than the number bytes prior to the first
--- occurrence of Y, or 0 if Y does not occur anywhere within X.
---
-test:do_test(
-    "instr-1.56.1",
-    function()
-        return test:execsql "SELECT instr(x'78c3a4e282ac79',x'79');"
-    end, {
-        -- <instr-1.56.1>
-        7
-        -- </instr-1.56.1>
-    })
-
-test:do_test(
-    "instr-1.56.2",
-    function()
-        return test:execsql "SELECT instr(x'78c3a4e282ac79',x'7a');"
-    end, {
-        -- <instr-1.56.2>
-        0
-        -- </instr-1.56.2>
-    })
-
-test:do_test(
-    "instr-1.56.3",
-    function()
-        return test:execsql "SELECT instr(x'78c3a4e282ac79',x'78');"
-    end, {
-        -- <instr-1.56.3>
-        1
-        -- </instr-1.56.3>
-    })
-
-test:do_test(
-    "instr-1.56.3",
-    function()
-        return test:execsql "SELECT instr(x'78c3a4e282ac79',x'a4');"
-    end, {
-        -- <instr-1.56.3>
-        3
-        -- </instr-1.56.3>
-    })
-
--- EVIDENCE-OF: R-17329-35644 If both arguments X and Y to instr(X,Y) are
--- non-NULL and are not BLOBs then both are interpreted as strings.
---
-test:do_test(
-    "instr-1.57.1",
-    function()
-        return test:execsql "SELECT instr('xä€y',x'79');"
-    end, {
-        -- <instr-1.57.1>
-        4
-        -- </instr-1.57.1>
-    })
-
-test:do_test(
-    "instr-1.57.2",
-    function()
-        return test:execsql "SELECT instr('xä€y',x'a4');"
-    end, {
-        -- <instr-1.57.2>
-        0
-        -- </instr-1.57.2>
-    })
-
-test:do_test(
-    "instr-1.57.3",
-    function()
-        return test:execsql "SELECT instr(x'78c3a4e282ac79','y');"
-    end, {
-        -- <instr-1.57.3>
-        4
-        -- </instr-1.57.3>
-    })
-
--- EVIDENCE-OF: R-14708-27487 If either X or Y are NULL in instr(X,Y)
--- then the result is NULL.
---
-test:do_execsql_test(
-    "instr-1.60",
-    [[
-        SELECT coalesce(instr(NULL,'abc'), 999);
-    ]], {
-        -- <instr-1.60>
-        999
-        -- </instr-1.60>
-    })
-
-test:do_execsql_test(
-    "instr-1.61",
-    [[
-        SELECT coalesce(instr('abc',NULL), 999);
-    ]], {
-        -- <instr-1.61>
-        999
-        -- </instr-1.61>
-    })
-
-test:do_execsql_test(
-    "instr-1.62",
-    [[
-        SELECT coalesce(instr(NULL,NULL), 999);
-    ]], {
-        -- <instr-1.62>
-        999
-        -- </instr-1.62>
-    })
-
-
-
-test:finish_test()
diff --git a/test/sql-tap/position.test.lua b/test/sql-tap/position.test.lua
new file mode 100755
index 000000000..70d11444c
--- /dev/null
+++ b/test/sql-tap/position.test.lua
@@ -0,0 +1,905 @@
+#!/usr/bin/env tarantool
+test = require("sqltester")
+test:plan(80)
+
+--!./tcltestrunner.lua
+-- 2012 October 24
+--
+-- The author disclaims copyright to this source code.  In place of
+-- a legal notice, here is a blessing:
+--
+--    May you do good and not evil.
+--    May you find forgiveness for yourself and forgive others.
+--    May you share freely, never taking more than you give.
+--
+-------------------------------------------------------------------------
+-- This file implements regression tests for sql library.  The
+-- focus of this file is testing the built-in POSITION() functions.
+--
+-- EVIDENCE-OF: R-27549-59611 The position(X,Y) function finds the first
+-- occurrence of string X within string Y and returns the number of prior
+-- characters plus 1, or 0 if X is nowhere found within Y.
+--
+-- ["set","testdir",[["file","dirname",["argv0"]]]]
+-- ["source",[["testdir"],"\/tester.tcl"]]
+-- Create a table to work with.
+--
+test:do_test(
+    "position-1.1",
+    function()
+        return test:execsql "SELECT position('a', 'abcdefg');"
+    end, {
+        -- <position-1.1>
+        1
+        -- </position-1.1>
+    })
+
+test:do_test(
+    "position-1.2",
+    function()
+        return test:execsql "SELECT position('b', 'abcdefg');"
+    end, {
+        -- <position-1.2>
+        2
+        -- </position-1.2>
+    })
+
+test:do_test(
+    "position-1.3",
+    function()
+        return test:execsql "SELECT position('c', 'abcdefg');"
+    end, {
+        -- <position-1.3>
+        3
+        -- </position-1.3>
+    })
+
+test:do_test(
+    "position-1.4",
+    function()
+        return test:execsql "SELECT position('d', 'abcdefg');"
+    end, {
+        -- <position-1.4>
+        4
+        -- </position-1.4>
+    })
+
+test:do_test(
+    "position-1.5",
+    function()
+        return test:execsql "SELECT position('e', 'abcdefg');"
+    end, {
+        -- <position-1.5>
+        5
+        -- </position-1.5>
+    })
+
+test:do_test(
+    "position-1.6",
+    function()
+        return test:execsql "SELECT position('f', 'abcdefg');"
+    end, {
+        -- <position-1.6>
+        6
+        -- </position-1.6>
+    })
+
+test:do_test(
+    "position-1.7",
+    function()
+        return test:execsql "SELECT position('g', 'abcdefg');"
+    end, {
+        -- <position-1.7>
+        7
+        -- </position-1.7>
+    })
+
+test:do_test(
+    "position-1.8",
+    function()
+        return test:execsql "SELECT position('h', 'abcdefg');"
+    end, {
+        -- <position-1.8>
+        0
+        -- </position-1.8>
+    })
+
+test:do_test(
+    "position-1.9",
+    function()
+        return test:execsql "SELECT position('abcdefg', 'abcdefg');"
+    end, {
+        -- <position-1.9>
+        1
+        -- </position-1.9>
+    })
+
+test:do_test(
+    "position-1.10",
+    function()
+        return test:execsql "SELECT position('abcdefgh', 'abcdefg');"
+    end, {
+        -- <position-1.10>
+        0
+        -- </position-1.10>
+    })
+
+test:do_test(
+    "position-1.11",
+    function()
+        return test:execsql "SELECT position('bcdefg', 'abcdefg');"
+    end, {
+        -- <position-1.11>
+        2
+        -- </position-1.11>
+    })
+
+test:do_test(
+    "position-1.12",
+    function()
+        return test:execsql "SELECT position('bcdefgh', 'abcdefg');"
+    end, {
+        -- <position-1.12>
+        0
+        -- </position-1.12>
+    })
+
+test:do_test(
+    "position-1.13",
+    function()
+        return test:execsql "SELECT position('cdefg', 'abcdefg');"
+    end, {
+        -- <position-1.13>
+        3
+        -- </position-1.13>
+    })
+
+test:do_test(
+    "position-1.14",
+    function()
+        return test:execsql "SELECT position('cdefgh', 'abcdefg');"
+    end, {
+        -- <position-1.14>
+        0
+        -- </position-1.14>
+    })
+
+test:do_test(
+    "position-1.15",
+    function()
+        return test:execsql "SELECT position('defg', 'abcdefg');"
+    end, {
+        -- <position-1.15>
+        4
+        -- </position-1.15>
+    })
+
+test:do_test(
+    "position-1.16",
+    function()
+        return test:execsql "SELECT position('defgh', 'abcdefg');"
+    end, {
+        -- <position-1.16>
+        0
+        -- </position-1.16>
+    })
+
+test:do_test(
+    "position-1.17",
+    function()
+        return test:execsql "SELECT position('efg', 'abcdefg');"
+    end, {
+        -- <position-1.17>
+        5
+        -- </position-1.17>
+    })
+
+test:do_test(
+    "position-1.18",
+    function()
+        return test:execsql "SELECT position('efgh', 'abcdefg');"
+    end, {
+        -- <position-1.18>
+        0
+        -- </position-1.18>
+    })
+
+test:do_test(
+    "position-1.19",
+    function()
+        return test:execsql "SELECT position('fg', 'abcdefg');"
+    end, {
+        -- <position-1.19>
+        6
+        -- </position-1.19>
+    })
+
+test:do_test(
+    "position-1.20",
+    function()
+        return test:execsql "SELECT position('fgh', 'abcdefg');"
+    end, {
+        -- <position-1.20>
+        0
+        -- </position-1.20>
+    })
+
+test:do_test(
+    "position-1.21",
+    function()
+        return test:execsql "SELECT coalesce(position(NULL, 'abcdefg'), 'nil');"
+    end, {
+        -- <position-1.21>
+        "nil"
+        -- </position-1.21>
+    })
+
+test:do_test(
+    "position-1.22",
+    function()
+        return test:execsql "SELECT coalesce(position('x', NULL), 'nil');"
+    end, {
+        -- <position-1.22>
+        "nil"
+        -- </position-1.22>
+    })
+
+test:do_test(
+    "position-1.23",
+    function()
+        return test:catchsql "SELECT position(34, 12345);"
+    end, {
+        -- <position-1.23>
+        1, "Inconsistent types: expected TEXT or BLOB got INTEGER"
+        -- </position-1.23>
+    })
+
+test:do_test(
+    "position-1.24",
+    function()
+        return test:catchsql "SELECT position(34, 123456.78);"
+    end, {
+        -- <position-1.24>
+        1, "Inconsistent types: expected TEXT or BLOB got REAL"
+        -- </position-1.24>
+    })
+
+test:do_test(
+    "position-1.25",
+    function()
+        return test:catchsql "SELECT position(x'3334', 123456.78);"
+    end, {
+        -- <position-1.25>
+        1, "Inconsistent types: expected TEXT or BLOB got REAL"
+        -- </position-1.25>
+    })
+
+test:do_test(
+    "position-1.26",
+    function()
+        return test:execsql "SELECT position('efg', 'äbcdefg');"
+    end, {
+        -- <position-1.26>
+        5
+        -- </position-1.26>
+    })
+
+test:do_test(
+    "position-1.27",
+    function()
+        return test:execsql "SELECT position('xyz', '€xyzzy');"
+    end, {
+        -- <position-1.27>
+        2
+        -- </position-1.27>
+    })
+
+test:do_test(
+    "position-1.28",
+    function()
+        return test:execsql "SELECT position('xyz', 'abc€xyzzy');"
+    end, {
+        -- <position-1.28>
+        5
+        -- </position-1.28>
+    })
+
+test:do_test(
+    "position-1.29",
+    function()
+        return test:execsql "SELECT position('€xyz', 'abc€xyzzy');"
+    end, {
+        -- <position-1.29>
+        4
+        -- </position-1.29>
+    })
+
+test:do_test(
+    "position-1.30",
+    function()
+        return test:execsql "SELECT position('c€xyz', 'abc€xyzzy');"
+    end, {
+        -- <position-1.30>
+        3
+        -- </position-1.30>
+    })
+
+test:do_test(
+    "position-1.31",
+    function()
+        return test:execsql "SELECT position(x'01', x'0102030405');"
+    end, {
+        -- <position-1.31>
+        1
+        -- </position-1.31>
+    })
+
+test:do_test(
+    "position-1.32",
+    function()
+        return test:execsql "SELECT position(x'02', x'0102030405');"
+    end, {
+        -- <position-1.32>
+        2
+        -- </position-1.32>
+    })
+
+test:do_test(
+    "position-1.33",
+    function()
+        return test:execsql "SELECT position(x'03', x'0102030405');"
+    end, {
+        -- <position-1.33>
+        3
+        -- </position-1.33>
+    })
+
+test:do_test(
+    "position-1.34",
+    function()
+        return test:execsql "SELECT position(x'04', x'0102030405');"
+    end, {
+        -- <position-1.34>
+        4
+        -- </position-1.34>
+    })
+
+test:do_test(
+    "position-1.35",
+    function()
+        return test:execsql "SELECT position(x'05', x'0102030405');"
+    end, {
+        -- <position-1.35>
+        5
+        -- </position-1.35>
+    })
+
+test:do_test(
+    "position-1.36",
+    function()
+        return test:execsql "SELECT position(x'06', x'0102030405');"
+    end, {
+        -- <position-1.36>
+        0
+        -- </position-1.36>
+    })
+
+test:do_test(
+    "position-1.37",
+    function()
+        return test:execsql "SELECT position(x'0102030405', x'0102030405');"
+    end, {
+        -- <position-1.37>
+        1
+        -- </position-1.37>
+    })
+
+test:do_test(
+    "position-1.38",
+    function()
+        return test:execsql "SELECT position(x'02030405', x'0102030405');"
+    end, {
+        -- <position-1.38>
+        2
+        -- </position-1.38>
+    })
+
+test:do_test(
+    "position-1.39",
+    function()
+        return test:execsql "SELECT position(x'030405', x'0102030405');"
+    end, {
+        -- <position-1.39>
+        3
+        -- </position-1.39>
+    })
+
+test:do_test(
+    "position-1.40",
+    function()
+        return test:execsql "SELECT position(x'0405', x'0102030405');"
+    end, {
+        -- <position-1.40>
+        4
+        -- </position-1.40>
+    })
+
+test:do_test(
+    "position-1.41",
+    function()
+        return test:execsql "SELECT position(x'0506', x'0102030405');"
+    end, {
+        -- <position-1.41>
+        0
+        -- </position-1.41>
+    })
+
+test:do_test(
+    "position-1.42",
+    function()
+        return test:execsql "SELECT position(x'', x'0102030405');"
+    end, {
+        -- <position-1.42>
+        1
+        -- </position-1.42>
+    })
+
+test:do_test(
+    "position-1.43",
+    function()
+        return test:execsql "SELECT position(x'', x'');"
+    end, {
+        -- <position-1.43>
+        1
+        -- </position-1.43>
+    })
+
+test:do_test(
+    "position-1.44",
+    function()
+        return test:execsql "SELECT position('', '');"
+    end, {
+        -- <position-1.44>
+        1
+        -- </position-1.44>
+    })
+
+test:do_test(
+    "position-1.45",
+    function()
+        return test:execsql "SELECT position('', 'abcdefg');"
+    end, {
+        -- <position-1.45>
+        1
+        -- </position-1.45>
+    })
+
+-- ["unset","-nocomplain","longstr"]
+local longstr = "abcdefghijklmonpqrstuvwxyz"
+longstr = longstr .. longstr
+longstr = longstr .. longstr
+longstr = longstr .. longstr
+longstr = longstr .. longstr
+longstr = longstr .. longstr
+longstr = longstr .. longstr
+longstr = longstr .. longstr
+longstr = longstr .. longstr
+longstr = longstr .. longstr
+longstr = longstr .. longstr
+longstr = longstr .. longstr
+longstr = longstr .. longstr
+-- puts [string length '"..longstr.."']
+longstr = longstr .. "Xabcde"
+test:do_test(
+    "position-1.46",
+    function()
+        return test:execsql("SELECT position('X', '"..longstr.."');")
+    end, {
+        -- <position-1.46>
+        106497
+        -- </position-1.46>
+    })
+
+test:do_test(
+    "position-1.47",
+    function()
+        return test:execsql("SELECT position('Y', '"..longstr.."');")
+    end, {
+        -- <position-1.47>
+        0
+        -- </position-1.47>
+    })
+
+test:do_test(
+    "position-1.48",
+    function()
+        return test:execsql( "SELECT position('Xa', '"..longstr.."');")
+    end, {
+        -- <position-1.48>
+        106497
+        -- </position-1.48>
+    })
+
+test:do_test(
+    "position-1.49",
+    function()
+        return test:execsql("SELECT position('zXa', '"..longstr.."');")
+    end, {
+        -- <position-1.49>
+        106496
+        -- </position-1.49>
+    })
+
+longstr = string.gsub(longstr, "a", "ä")
+test:do_test(
+    "position-1.50",
+    function()
+        return test:execsql("SELECT position('X', '"..longstr.."');")
+    end, {
+        -- <position-1.50>
+        106497
+        -- </position-1.50>
+    })
+
+test:do_test(
+    "position-1.51",
+    function()
+        return test:execsql("SELECT position('Y', '"..longstr.."');")
+    end, {
+        -- <position-1.51>
+        0
+        -- </position-1.51>
+    })
+
+test:do_test(
+    "position-1.52",
+    function()
+        return test:execsql("SELECT position('Xä', '"..longstr.."');")
+    end, {
+        -- <position-1.52>
+        106497
+        -- </position-1.52>
+    })
+
+test:do_test(
+    "position-1.53",
+    function()
+        return test:execsql("SELECT position('zXä', '"..longstr.."');")
+    end, {
+        -- <position-1.53>
+        106496
+        -- </position-1.53>
+    })
+
+test:do_test(
+    "position-1.54",
+    function()
+        return test:catchsql("SELECT position('x', x'78c3a4e282ac79');")
+    end, {
+        -- <position-1.54>
+        1, "Inconsistent types: expected TEXT got BLOB"
+        -- </position-1.54>
+    })
+
+test:do_test(
+    "position-1.55",
+    function()
+        return test:catchsql "SELECT position('y', x'78c3a4e282ac79');"
+    end, {
+        -- <position-1.55>
+        1, "Inconsistent types: expected TEXT got BLOB"
+        -- </position-1.55>
+    })
+
+-- EVIDENCE-OF: R-46421-32541 Or, if X and Y are both BLOBs, then
+-- position(X,Y) returns one more than the number bytes prior to the first
+-- occurrence of X, or 0 if X does not occur anywhere within Y.
+--
+test:do_test(
+    "position-1.56.1",
+    function()
+        return test:execsql "SELECT position(x'79', x'78c3a4e282ac79');"
+    end, {
+        -- <position-1.56.1>
+        7
+        -- </position-1.56.1>
+    })
+
+test:do_test(
+    "position-1.56.2",
+    function()
+        return test:execsql "SELECT position(x'7a', x'78c3a4e282ac79');"
+    end, {
+        -- <position-1.56.2>
+        0
+        -- </position-1.56.2>
+    })
+
+test:do_test(
+    "position-1.56.3",
+    function()
+        return test:execsql "SELECT position(x'78', x'78c3a4e282ac79');"
+    end, {
+        -- <position-1.56.3>
+        1
+        -- </position-1.56.3>
+    })
+
+test:do_test(
+    "position-1.56.3",
+    function()
+        return test:execsql "SELECT position(x'a4', x'78c3a4e282ac79');"
+    end, {
+        -- <position-1.56.3>
+        3
+        -- </position-1.56.3>
+    })
+
+-- EVIDENCE-OF: R-17329-35644 If both arguments X and Y to position(X,Y) are
+-- non-NULL and are not BLOBs then both are interpreted as strings.
+--
+test:do_test(
+    "position-1.57.1",
+    function()
+        return test:catchsql "SELECT position(x'79', 'xä€y');"
+    end, {
+        -- <position-1.57.1>
+        1, "Inconsistent types: expected BLOB got TEXT"
+        -- </position-1.57.1>
+    })
+
+test:do_test(
+    "position-1.57.2",
+    function()
+        return test:catchsql "SELECT position(x'a4', 'xä€y');"
+    end, {
+        -- <position-1.57.2>
+        1, "Inconsistent types: expected BLOB got TEXT"
+        -- </position-1.57.2>
+    })
+
+test:do_test(
+    "position-1.57.3",
+    function()
+        return test:catchsql "SELECT position('y', x'78c3a4e282ac79');"
+    end, {
+        -- <position-1.57.3>
+        1, "Inconsistent types: expected TEXT got BLOB"
+        -- </position-1.57.3>
+    })
+
+-- EVIDENCE-OF: R-14708-27487 If either X or Y are NULL in position(X,Y)
+-- then the result is NULL.
+--
+test:do_execsql_test(
+    "position-1.60",
+    [[
+        SELECT coalesce(position(NULL, 'abc'), 999);
+    ]], {
+        -- <position-1.60>
+        999
+        -- </position-1.60>
+    })
+
+test:do_execsql_test(
+    "position-1.61",
+    [[
+        SELECT coalesce(position('abc', NULL), 999);
+    ]], {
+        -- <position-1.61>
+        999
+        -- </position-1.61>
+    })
+
+test:do_execsql_test(
+    "position-1.62",
+    [[
+        SELECT coalesce(position(NULL, NULL), 999);
+    ]], {
+        -- <position-1.62>
+        999
+        -- </position-1.62>
+    })
+
+-- Tests that make use of collations.
+-- A short remainder of how "unicode" and "unicode_ci" collations
+-- works:
+-- unicode_ci: „a“ = „A“ = „á“ = „Á“.
+-- unicode: all symbols above are pairwise unequal.
+
+-- Collation is set in space format.
+
+test:do_execsql_test(
+    "position-1.63",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('à');
+        SELECT POSITION('a', s1) FROM test1;
+        DROP TABLE test1;
+    ]], {
+        1
+    }
+)
+
+test:do_execsql_test(
+    "position-1.64",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('qwèrty');
+        SELECT POSITION('er', s1) FROM test1;
+        DROP TABLE test1;
+    ]], {
+        3
+    }
+)
+
+test:do_execsql_test(
+    "position-1.65",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('qwèrtÿ');
+        SELECT POSITION('y', s1) FROM test1;
+        DROP TABLE test1;
+    ]], {
+        6
+    }
+)
+
+-- Collation is set in space format and also in position() -
+-- for haystack (string) only.
+
+test:do_execsql_test(
+    "position-1.66",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('à');
+        SELECT POSITION('a', s1 COLLATE "unicode") FROM test1;
+        DROP TABLE test1;
+    ]], {
+        0
+    }
+)
+
+test:do_execsql_test(
+    "position-1.67",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('qwèrty');
+        SELECT POSITION('er', s1 COLLATE "unicode") FROM test1;
+        DROP TABLE test1;
+    ]], {
+        0
+    }
+)
+
+test:do_execsql_test(
+    "position-1.68",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('qwèrtÿ');
+        SELECT POSITION('Y', s1 COLLATE "unicode") FROM test1;
+        DROP TABLE test1;
+    ]], {
+        0
+    }
+)
+
+-- Collation is set in space format and also in position () -
+-- for needle (string) only.
+
+test:do_execsql_test(
+    "position-1.69",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('à');
+        SELECT POSITION('a' COLLATE "unicode", s1) FROM test1;
+        DROP TABLE test1;
+    ]], {
+        0
+    }
+)
+
+test:do_execsql_test(
+    "position-1.70",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('qwèrty');
+        SELECT POSITION('er' COLLATE "unicode", s1) FROM test1;
+        DROP TABLE test1;
+    ]], {
+        0
+    }
+)
+
+test:do_execsql_test(
+    "position-1.71",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('qwèrtÿ');
+        SELECT POSITION('Y' COLLATE "unicode", s1) FROM test1;
+        DROP TABLE test1;
+    ]], {
+        0
+    }
+)
+
+-- Collation is set in space format and also in position() -
+-- for both arguments. Arguments have the same collations.
+
+test:do_execsql_test(
+    "position-1.72",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('à');
+        SELECT POSITION('a' COLLATE "unicode", s1 COLLATE "unicode") FROM test1;
+        DROP TABLE test1;
+    ]], {
+        0
+    }
+)
+
+test:do_execsql_test(
+    "position-1.73",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('qwèrty');
+        SELECT POSITION('er' COLLATE "unicode", s1 COLLATE "unicode") FROM test1;
+        DROP TABLE test1;
+    ]], {
+        0
+    }
+)
+
+test:do_execsql_test(
+    "position-1.74",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('qwèrtÿ');
+        SELECT POSITION('Y'COLLATE "unicode", s1 COLLATE "unicode") FROM test1;
+        DROP TABLE test1;
+    ]], {
+        0
+    }
+)
+
+-- Collation is set in space format and also in position() -
+-- for both arguments. Arguments have different explicit
+-- collations thus an error is expected.
+
+test:do_catchsql_test(
+    "position-1.75",
+    [[
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('à');
+        SELECT POSITION('a' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1;
+    ]], {
+        1, "Illegal mix of collations"
+    }
+)
+
+test:do_catchsql_test(
+    "position-1.76",
+    [[
+        DROP TABLE test1;
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('qwèrty');
+        SELECT POSITION('er' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1;
+        DROP TABLE test1;
+    ]], {
+        1, "Illegal mix of collations"
+    }
+)
+
+test:do_catchsql_test(
+    "position-1.77",
+    [[
+        DROP TABLE test1;
+        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
+        INSERT INTO test1 VALUES('qwèrtÿ');
+        SELECT POSITION('Y' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1;
+    ]], {
+        1, "Illegal mix of collations"
+    }
+)
+
+test:finish_test()
diff --git a/test/sql-tap/with1.test.lua b/test/sql-tap/with1.test.lua
index 78964830b..aefa37896 100755
--- a/test/sql-tap/with1.test.lua
+++ b/test/sql-tap/with1.test.lua
@@ -595,11 +595,11 @@ test:do_execsql_test("8.2-soduko", [[
 
     /* The tricky bit. */
     x(s, ind) AS (
-      SELECT sud, instr(sud, '.') FROM input
+      SELECT sud, position('.', sud) FROM input
       UNION ALL
       SELECT
         substr(s, 1, ind-1) || z || substr(s, ind+1),
-        instr( substr(s, 1, ind-1) || z || substr(s, ind+1), '.' )
+        position('.', substr(s, 1, ind-1) || z || substr(s, ind+1))
        FROM x, digits AS z
       WHERE ind>0
         AND NOT EXISTS (
-- 
2.20.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [tarantool-patches] Re: [PATCH] sql: replace instr() with position()
  2019-03-10  7:41 [tarantool-patches] [PATCH] sql: replace instr() with position() Ivan Koptelov
@ 2019-03-12 13:31 ` n.pettik
  2019-03-13 11:56   ` i.koptelov
  0 siblings, 1 reply; 4+ messages in thread
From: n.pettik @ 2019-03-12 13:31 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Ivan Koptelov



> On 10 Mar 2019, at 10:41, Ivan Koptelov <ivan.koptelov@tarantool.org> wrote:
> 
> Before this patch we had instr() SQL function.
> After the patch it is renamed to position().
> Also a few things have been changed: arguments
> order, allowed arguments types and usage of
> collations.
> 
> The order of the arguments has changed:
> Before: instr(haystack, needle).
> After: position(needle, haystack).

What are reasons to change arguments' order?
I guess to make it closer to ANSI syntax <POSITION a IN b>.

> Type checking became more strict: before it was
> possible to call the function with INTEGER arguments
> or with arguments of different types. Now both arguments
> must have the same type and be either text or binary
> strings.

Are there any reasons to support BLOBs as arguments?
For instance, we’ve banned this opportunity for LIKE func.

> Before the patch collations were not taken into
> consideration during the search. Now it is fixed, and
> both implicit (column) collations and explicit
> (using COLLATE expression) are used. There are
> several possible cases:
> 
> - One of collations is mentioned alongside with explicit
> COLLATE clause, which forces this collation over another
> one. It is allowed to have the same forced collations;
> - Both collations are derived from table columns and they
> are the same;
> - One collation is derived from table column and another
> one is not specified (i.e. COLL_NONE);

This is regulated by ANSI “Type combination” rules.
Skip this paragraph and simply mention this fact.

> In all other cases they are not accounted to be compatible
> and error is raised.
> 
> Related to #3933

I would use “Workaround for” label.
Also, please underline the fact that syntax is still
different from ANSI one and describe reasons for that.

> 
> @TarantoolBot document
> Title: instr() is replaced with position()
> Changes are described in the associated commit
> message.

AFAIK we do this vice versa: move vital parts of commit
message under doc bot tag request.

> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
> index a75f23756..59b43ec41 100644
> --- a/src/box/sql/expr.c
> +++ b/src/box/sql/expr.c
> @@ -4054,7 +4054,8 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
> 					constMask |= MASKBIT32(i);
> 				}
> 				if ((pDef->funcFlags & SQL_FUNC_NEEDCOLL) !=
> -				    0 && coll == NULL) {
> +				    0 && coll == NULL &&
> +				    !(pDef->funcFlags & SQL_FUNC_ARG_COLL)) {

Why these conditions can’t be compatible?
What is the difference between SQL_FUNC_NEEDCOLL
and new flag? Does NEEDCOLL refer to returning value?
Please, comment this part.

> 					bool unused;
> 					uint32_t id;
> 					if (sql_expr_coll(pParse,
> @@ -4064,6 +4065,37 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
> 						return 0;
> 				}
> 			}
> +			if (pDef->funcFlags & SQL_FUNC_ARG_COLL) {
> +				assert(nFarg == 2);

Add comment to this assertion or better to the whole branch.
It is not obvious at first sight. Why only functions with two args…
For instance, there’s MAX function, which may take any number
of arguments. Collations are also vital for this function, since
they are used to compare strings.

What is more, I suggest to move this part in a separate patch,
since position is not the only function that must apply collations
on its operands. I don’t insist on dozens of tests on each built-in
function, just a few.

> +				struct coll *colls[2] = {NULL};

I’d rather use explicit naming:
struct coll *lhs_coll = NULL;
struct coll *rhs_coll = NULL;

It makes code a bit longer, but on the other hand IMHO
makes it less prone to typos.

> +				uint32_t colls_ids[2] = {0};
> +				bool is_forced[2] = {false};
> +				if (sql_expr_coll(pParse, pFarg->a[0].pExpr,
> +						  &is_forced[0], &colls_ids[0],
> +						  &colls[0]) != 0)
> +					return 0;
> +				if (sql_expr_coll(pParse, pFarg->a[1].pExpr,
> +						  &is_forced[1], &colls_ids[1],
> +						  &colls[1]) != 0)
> +					return 0;
> +
> +				uint32_t coll_id;
> +				if (collations_check_compatibility(colls_ids[0],
> +								   is_forced[0],
> +								   colls_ids[1],
> +								   is_forced[1],
> +								   &coll_id)
> +								   != 0) {
> +					pParse->rc = SQL_TARANTOOL_ERROR;
> +					pParse->nErr++;
> +					return 0;
> +				}
> +
> +				coll = (coll_id == colls_ids[0]) ? colls[0] :
> +								   colls[1];
> +				if (coll == NULL)
> +					coll = coll_by_id(COLL_NONE)->coll;

Why do we need this?

> +			}
> 			if (pFarg) {
> 				if (constMask) {
> 					r1 = pParse->nMem + 1;
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index 2de6ef5ce..f44b7ce78 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -38,6 +38,7 @@
> #include "vdbeInt.h"
> #include "version.h"
> #include "coll/coll.h"
> +#include "tarantoolInt.h"
> #include <unicode/ustring.h>
> #include <unicode/ucasemap.h>
> #include <unicode/ucnv.h>
> @@ -212,59 +213,173 @@ absFunc(sql_context * context, int argc, sql_value ** argv)
> }
> 
> /*
> - * Implementation of the instr() function.
> + * Returns name of SQL type, which is given by the int code.
>  *
> - * instr(haystack,needle) finds the first occurrence of needle
> + * @param sql_type One of enum sql_type codes.
> + * @retval string representing name of the given type.
> + */
> +static inline char *
> +sql_value_type_to_str(enum sql_type sql_type) {

I don’t recommend using this function: mem_type_to_str()
already exists, just make it non-static. sql_type is extracted
as type of VDBE memory cell, so in fact it is the same thing. 

> +	switch(sql_type) {
> +		case SQL_INTEGER:
> +			return "INTEGER";
> +		case SQL_TEXT:
> +			return "TEXT";
> +		case SQL_FLOAT:
> +			return "REAL";
> +		case SQL_BLOB:
> +			return "BLOB";
> +		default:
> +			return "NULL";
> +	}
> +}
> +
> +/*
> + * Implementation of the position() function.
> + *
> + * position(haystack,needle) finds the first occurrence of needle
>  * in haystack and returns the number of previous characters plus 1,
>  * or 0 if needle does not occur within haystack.
>  *
> - * If both haystack and needle are BLOBs, then the result is one more than
> - * the number of bytes in haystack prior to the first occurrence of needle,
> - * or 0 if needle never occurs in haystack.
> + * Haystack and needle must both have the same type, either
> + * TEXT or BLOB.
> + *
> + * If haystack and needle are BLOBs, then the result is one more
> + * than the number of bytes in haystack prior to the first
> + * occurrence of needle, or 0 if needle never occurs in haystack.
> + *
> + * If haystack and needle are TEXTs, then their collations (if
> + * any) are taken into consideration. If only one param has a
> + * collation, then it is used. If both params have collations,
> + * then the right one is chosen by
> + * box/sql/sqlInt.h/collations_check_compatibility() function
> + * (If collations are not compatible then the error is raised).
> + *
> + * Note that the "collation-selection" logic is implemented in
> + * box/sql/expr.c/sqlExprCodeTarget() function.
>  */
> static void
> -instrFunc(sql_context * context, int argc, sql_value ** argv)
> +positionFunc(sql_context *context, int argc, sql_value **argv)

Please, refactor this function in two steps:
firstly provide code style fixes, then functional ones.
Otherwise it complicates review process.

> {
> -	const unsigned char *zHaystack;
> -	const unsigned char *zNeedle;
> -	int nHaystack;
> -	int nNeedle;
> -	int typeHaystack, typeNeedle;
> +	const unsigned char *haystack;
> +	const unsigned char *needle;
> +	int n_haystack_bytes;
> +	int n_needle_bytes;

Ok, if you started refactoring, please finish it:
move args declaration to their initialisations,
fix name of function, != 0 -> != NULL etc.

> +	int type_haystack, type_needle;
> 	int N = 1;
> -	int isText;
> 
> 	UNUSED_PARAMETER(argc);
> -	typeHaystack = sql_value_type(argv[0]);
> -	typeNeedle = sql_value_type(argv[1]);
> -	if (typeHaystack == SQL_NULL || typeNeedle == SQL_NULL)
> +	type_haystack = sql_value_type(argv[1]);
> +	type_needle = sql_value_type(argv[0]);
> +	if (type_haystack == SQL_NULL || type_needle == SQL_NULL)
> +		return;
> +	/*
> + 	 * Position function can be called only with string
> + 	 * or blob params.
> + 	 */
> +	int inconsistent_type = 0;
> +	if (type_needle != SQL_TEXT && type_needle != SQL_BLOB)
> +		inconsistent_type = type_needle;
> +	if (type_haystack != SQL_TEXT && type_haystack != SQL_BLOB)
> +		inconsistent_type = type_haystack;
> +	if (inconsistent_type > 0) {
> +		diag_set(ClientError, ER_INCONSISTENT_TYPES, "TEXT or BLOB",
> +			 sql_value_type_to_str(inconsistent_type));
> +		context->pVdbe->pParse->rc = SQL_TARANTOOL_ERROR;
> +		context->pVdbe->pParse->nErr++;
> +		sql_result_error(context, tarantoolErrorMessage(), -1);
> +		return;
> +	}
> +	/*
> +	 * Both params of Position function must be of the same
> +	 * type.
> +	 */
> +	if (type_haystack != type_needle) {
> +		diag_set(ClientError, ER_INCONSISTENT_TYPES,
> +			 sql_value_type_to_str(type_needle),
> +			 sql_value_type_to_str(type_haystack));
> +		context->pVdbe->pParse->rc = SQL_TARANTOOL_ERROR;
> +		context->pVdbe->pParse->nErr++;
> +		sql_result_error(context, tarantoolErrorMessage(), -1);
> 		return;
> -	nHaystack = sql_value_bytes(argv[0]);
> -	nNeedle = sql_value_bytes(argv[1]);
> -	if (nNeedle > 0) {
> -		if (typeHaystack == SQL_BLOB && typeNeedle == SQL_BLOB) {
> -			zHaystack = sql_value_blob(argv[0]);
> -			zNeedle = sql_value_blob(argv[1]);
> -			assert(zNeedle != 0);
> -			assert(zHaystack != 0 || nHaystack == 0);
> -			isText = 0;
> +	}
> +	n_haystack_bytes = sql_value_bytes(argv[1]);
> +	n_needle_bytes = sql_value_bytes(argv[0]);
> +	if (n_needle_bytes > 0) {
> +		if (type_haystack == SQL_BLOB) {
> +			haystack = sql_value_blob(argv[1]);
> +			needle = sql_value_blob(argv[0]);
> +			assert(needle != 0);
> +			assert(haystack != 0 || n_haystack_bytes == 0);
> +
> +			while (n_needle_bytes <= n_haystack_bytes
> +			       && memcmp(haystack, needle, n_needle_bytes) != 0) {
> +				N++;
> +				n_haystack_bytes--;
> +				haystack++;
> +			}
> +			if (n_needle_bytes > n_haystack_bytes)
> +				N = 0;
> 		} else {
> -			zHaystack = sql_value_text(argv[0]);
> -			zNeedle = sql_value_text(argv[1]);
> -			isText = 1;
> -			if (zHaystack == 0 || zNeedle == 0)
> +			/*
> +			 * Code below handles not only simple
> +			 * cases like position('a', 'bca'), but
> +			 * also more complex ones:
> +			 * position('a', 'bcá' COLLATE "unicode_ci")
> +			 * To do so, we need to use comparison
> +			 * window, which has constant character
> +			 * size, but variable byte size.
> +			 * Character size is equal to
> +			 * needle char size.
> +			 */
> +			haystack = sql_value_text(argv[1]);
> +			needle = sql_value_text(argv[0]);
> +			if (haystack == 0 || needle == 0)
> 				return;

How they can be NULL, if this case is filtered in the beginning:

if (type_haystack == SQL_NULL || type_needle == SQL_NULL)
       return;


> -		}
> -		while (nNeedle <= nHaystack
> -		       && memcmp(zHaystack, zNeedle, nNeedle) != 0) {
> -			N++;
> -			do {
> -				nHaystack--;
> -				zHaystack++;
> -			} while (isText && (zHaystack[0] & 0xc0) == 0x80);
> -		}
> -		if (nNeedle > nHaystack)
> +			struct coll *coll = sqlGetFuncCollSeq(context);
> +
> + 			int n_needle_chars =
> + 				sql_utf8_char_count(needle, n_needle_bytes);
> +			int n_haystack_chars =
> +				sql_utf8_char_count(haystack, n_haystack_bytes);
> +
> +			if (n_haystack_chars < n_needle_chars) {
> +				N = 0;

What’s N?

> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
> index 1d8fae5b0..4d059dd7e 100644
> --- a/src/box/sql/sqlInt.h
> +++ b/src/box/sql/sqlInt.h
> @@ -1666,6 +1666,13 @@ struct FuncDestructor {
> #define SQL_FUNC_SLOCHNG  0x2000	/* "Slow Change". Value constant during a
> 					 * single query - might change over time
> 					 */
> +#define SQL_FUNC_ARG_COLL 0x4000

ARG_NEED_COLL?

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [tarantool-patches] Re: [PATCH] sql: replace instr() with position()
  2019-03-12 13:31 ` [tarantool-patches] " n.pettik
@ 2019-03-13 11:56   ` i.koptelov
  2019-03-15 17:07     ` n.pettik
  0 siblings, 1 reply; 4+ messages in thread
From: i.koptelov @ 2019-03-13 11:56 UTC (permalink / raw)
  To: tarantool-patches; +Cc: n.pettik

Thank you for the review. Firstly, I would like to discuss some
design issues and then send codestyle & functionality changes.
> On 12 Mar 2019, at 16:31, n.pettik <korablev@tarantool.org> wrote:
> 
>> On 10 Mar 2019, at 10:41, Ivan Koptelov <ivan.koptelov@tarantool.org> wrote:
>> 
>> Before this patch we had instr() SQL function.
>> After the patch it is renamed to position().
>> Also a few things have been changed: arguments
>> order, allowed arguments types and usage of
>> collations.
>> 
>> The order of the arguments has changed:
>> Before: instr(haystack, needle).
>> After: position(needle, haystack).
> 
> What are reasons to change arguments' order?
> I guess to make it closer to ANSI syntax <POSITION a IN b>.
> 
>> Type checking became more strict: before it was
>> possible to call the function with INTEGER arguments
>> or with arguments of different types. Now both arguments
>> must have the same type and be either text or binary
>> strings.
> 
> Are there any reasons to support BLOBs as arguments?
> For instance, we’ve banned this opportunity for LIKE func.
Standard says:

<position expression> ::= <character position expression>
			| <binary position expression>

By the way, is LIKE a function? Isn’t it an expression which
can be placed only after WHERE ?
> 
>> Before the patch collations were not taken into
>> consideration during the search. Now it is fixed, and
>> both implicit (column) collations and explicit
>> (using COLLATE expression) are used. There are
>> several possible cases:
>> 
>> - One of collations is mentioned alongside with explicit
>> COLLATE clause, which forces this collation over another
>> one. It is allowed to have the same forced collations;
>> - Both collations are derived from table columns and they
>> are the same;
>> - One collation is derived from table column and another
>> one is not specified (i.e. COLL_NONE);
> 
> This is regulated by ANSI “Type combination” rules.
> Skip this paragraph and simply mention this fact.
> 
>> In all other cases they are not accounted to be compatible
>> and error is raised.
>> 
>> Related to #3933
> 
> I would use “Workaround for” label.
> Also, please underline the fact that syntax is still
> different from ANSI one and describe reasons for that.
> 
>> 
>> @TarantoolBot document
>> Title: instr() is replaced with position()
>> Changes are described in the associated commit
>> message.
> 
> AFAIK we do this vice versa: move vital parts of commit
> message under doc bot tag request.
> 
>> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
>> index a75f23756..59b43ec41 100644
>> --- a/src/box/sql/expr.c
>> +++ b/src/box/sql/expr.c
>> @@ -4054,7 +4054,8 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
>> 					constMask |= MASKBIT32(i);
>> 				}
>> 				if ((pDef->funcFlags & SQL_FUNC_NEEDCOLL) !=
>> -				    0 && coll == NULL) {
>> +				    0 && coll == NULL &&
>> +				    !(pDef->funcFlags & SQL_FUNC_ARG_COLL)) {
> 
> Why these conditions can’t be compatible?
> What is the difference between SQL_FUNC_NEEDCOLL
> and new flag? Does NEEDCOLL refer to returning value?
> Please, comment this part.
These conditions can’t be compatible because
SQL_FUNC_NEEDCOLL and SQL_FUNC_ARG_COLL ask for
different actions. SQL_FUNC_NEEDCOLL asks
to find first argument with collation in
the arguments expression list (from left
to right) and use it inside function
as a collation of the whole expression.
SQL_FUNC_ARG_COLL asks to check if
arguments collations are compatible
and use the right one if they are.
I would add comment to the code.

If we want to establish proper collation
handling in all functions, we should
probably use only one flag to do check
and set the right collation.

I would merge these flags into one, OK?

> 
>> 					bool unused;
>> 					uint32_t id;
>> 					if (sql_expr_coll(pParse,
>> @@ -4064,6 +4065,37 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
>> 						return 0;
>> 				}
>> 			}
>> +			if (pDef->funcFlags & SQL_FUNC_ARG_COLL) {
>> +				assert(nFarg == 2);
> 
> Add comment to this assertion or better to the whole branch.
> It is not obvious at first sight. Why only functions with two args…
> For instance, there’s MAX function, which may take any number
> of arguments. Collations are also vital for this function, since
> they are used to compare strings.
> 
> What is more, I suggest to move this part in a separate patch,
> since position is not the only function that must apply collations
> on its operands. I don’t insist on dozens of tests on each built-in
> function, just a few.
So do you propose to make two patches: first with adding proper
collation checking/setting in sqlExprCodeTarget() and another
with adding proper collation usage in functions in func.c?
Is it OK if I put them in one patch set?
> 
>> +				struct coll *colls[2] = {NULL};
> 
> I’d rather use explicit naming:
> struct coll *lhs_coll = NULL;
> struct coll *rhs_coll = NULL;
> 
> It makes code a bit longer, but on the other hand IMHO
> makes it less prone to typos.
But this function sqlExprCodeTarget() is called for expression
list (of arguments). If we would use it for functions like MAX
(as we want to) then I think an array should be used. Or just
two loops to check every pair of arguments collations for
compatibility.  
> 
>> +				uint32_t colls_ids[2] = {0};
>> +				bool is_forced[2] = {false};
>> +				if (sql_expr_coll(pParse, pFarg->a[0].pExpr,
>> +						  &is_forced[0], &colls_ids[0],
>> +						  &colls[0]) != 0)
>> +					return 0;
>> +				if (sql_expr_coll(pParse, pFarg->a[1].pExpr,
>> +						  &is_forced[1], &colls_ids[1],
>> +						  &colls[1]) != 0)
>> +					return 0;
>> +
>> +				uint32_t coll_id;
>> +				if (collations_check_compatibility(colls_ids[0],
>> +								   is_forced[0],
>> +								   colls_ids[1],
>> +								   is_forced[1],
>> +								   &coll_id)
>> +								   != 0) {
>> +					pParse->rc = SQL_TARANTOOL_ERROR;
>> +					pParse->nErr++;
>> +					return 0;
>> +				}
>> +
>> +				coll = (coll_id == colls_ids[0]) ? colls[0] :
>> +								   colls[1];
>> +				if (coll == NULL)
>> +					coll = coll_by_id(COLL_NONE)->coll;
> 
> Why do we need this?
Because if there is no collation at all among
arguments, then the context would not have any
collation as well. And sqlGetFuncCollSeq()
would fail with assertion in this case, because
it always expects some collations being set in context.
> 
>> +			}
>> 			if (pFarg) {
>> 				if (constMask) {
>> 					r1 = pParse->nMem + 1;
>> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
>> index 2de6ef5ce..f44b7ce78 100644
>> --- a/src/box/sql/func.c
>> +++ b/src/box/sql/func.c
>> @@ -38,6 +38,7 @@
>> #include "vdbeInt.h"
>> #include "version.h"
>> #include "coll/coll.h"
>> +#include "tarantoolInt.h"
>> #include <unicode/ustring.h>
>> #include <unicode/ucasemap.h>
>> #include <unicode/ucnv.h>
>> @@ -212,59 +213,173 @@ absFunc(sql_context * context, int argc, sql_value ** argv)
>> }
>> 
>> /*
>> - * Implementation of the instr() function.
>> + * Returns name of SQL type, which is given by the int code.
>> *
>> - * instr(haystack,needle) finds the first occurrence of needle
>> + * @param sql_type One of enum sql_type codes.
>> + * @retval string representing name of the given type.
>> + */
>> +static inline char *
>> +sql_value_type_to_str(enum sql_type sql_type) {
> 
> I don’t recommend using this function: mem_type_to_str()
> already exists, just make it non-static. sql_type is extracted
> as type of VDBE memory cell, so in fact it is the same thing. 
Sorry, I’ve missed mem_type_to_str(). I would use it and remove
sql_value_type_to_str()
> 
>> +	switch(sql_type) {
>> +		case SQL_INTEGER:
>> +			return "INTEGER";
>> +		case SQL_TEXT:
>> +			return "TEXT";
>> +		case SQL_FLOAT:
>> +			return "REAL";
>> +		case SQL_BLOB:
>> +			return "BLOB";
>> +		default:
>> +			return "NULL";
>> +	}
>> +}
>> +
>> +/*
>> + * Implementation of the position() function.
>> + *
>> + * position(haystack,needle) finds the first occurrence of needle
>> * in haystack and returns the number of previous characters plus 1,
>> * or 0 if needle does not occur within haystack.
>> *
>> - * If both haystack and needle are BLOBs, then the result is one more than
>> - * the number of bytes in haystack prior to the first occurrence of needle,
>> - * or 0 if needle never occurs in haystack.
>> + * Haystack and needle must both have the same type, either
>> + * TEXT or BLOB.
>> + *
>> + * If haystack and needle are BLOBs, then the result is one more
>> + * than the number of bytes in haystack prior to the first
>> + * occurrence of needle, or 0 if needle never occurs in haystack.
>> + *
>> + * If haystack and needle are TEXTs, then their collations (if
>> + * any) are taken into consideration. If only one param has a
>> + * collation, then it is used. If both params have collations,
>> + * then the right one is chosen by
>> + * box/sql/sqlInt.h/collations_check_compatibility() function
>> + * (If collations are not compatible then the error is raised).
>> + *
>> + * Note that the "collation-selection" logic is implemented in
>> + * box/sql/expr.c/sqlExprCodeTarget() function.
>> */
>> static void
>> -instrFunc(sql_context * context, int argc, sql_value ** argv)
>> +positionFunc(sql_context *context, int argc, sql_value **argv)
> 
> Please, refactor this function in two steps:
> firstly provide code style fixes, then functional ones.
> Otherwise it complicates review process.
> 
>> {
>> -	const unsigned char *zHaystack;
>> -	const unsigned char *zNeedle;
>> -	int nHaystack;
>> -	int nNeedle;
>> -	int typeHaystack, typeNeedle;
>> +	const unsigned char *haystack;
>> +	const unsigned char *needle;
>> +	int n_haystack_bytes;
>> +	int n_needle_bytes;
> 
> Ok, if you started refactoring, please finish it:
> move args declaration to their initialisations,
> fix name of function, != 0 -> != NULL etc.
> 
>> +	int type_haystack, type_needle;
>> 	int N = 1;
>> -	int isText;
>> 
>> 	UNUSED_PARAMETER(argc);
>> -	typeHaystack = sql_value_type(argv[0]);
>> -	typeNeedle = sql_value_type(argv[1]);
>> -	if (typeHaystack == SQL_NULL || typeNeedle == SQL_NULL)
>> +	type_haystack = sql_value_type(argv[1]);
>> +	type_needle = sql_value_type(argv[0]);
>> +	if (type_haystack == SQL_NULL || type_needle == SQL_NULL)
>> +		return;
>> +	/*
>> + 	 * Position function can be called only with string
>> + 	 * or blob params.
>> + 	 */
>> +	int inconsistent_type = 0;
>> +	if (type_needle != SQL_TEXT && type_needle != SQL_BLOB)
>> +		inconsistent_type = type_needle;
>> +	if (type_haystack != SQL_TEXT && type_haystack != SQL_BLOB)
>> +		inconsistent_type = type_haystack;
>> +	if (inconsistent_type > 0) {
>> +		diag_set(ClientError, ER_INCONSISTENT_TYPES, "TEXT or BLOB",
>> +			 sql_value_type_to_str(inconsistent_type));
>> +		context->pVdbe->pParse->rc = SQL_TARANTOOL_ERROR;
>> +		context->pVdbe->pParse->nErr++;
>> +		sql_result_error(context, tarantoolErrorMessage(), -1);
>> +		return;
>> +	}
>> +	/*
>> +	 * Both params of Position function must be of the same
>> +	 * type.
>> +	 */
>> +	if (type_haystack != type_needle) {
>> +		diag_set(ClientError, ER_INCONSISTENT_TYPES,
>> +			 sql_value_type_to_str(type_needle),
>> +			 sql_value_type_to_str(type_haystack));
>> +		context->pVdbe->pParse->rc = SQL_TARANTOOL_ERROR;
>> +		context->pVdbe->pParse->nErr++;
>> +		sql_result_error(context, tarantoolErrorMessage(), -1);
>> 		return;
>> -	nHaystack = sql_value_bytes(argv[0]);
>> -	nNeedle = sql_value_bytes(argv[1]);
>> -	if (nNeedle > 0) {
>> -		if (typeHaystack == SQL_BLOB && typeNeedle == SQL_BLOB) {
>> -			zHaystack = sql_value_blob(argv[0]);
>> -			zNeedle = sql_value_blob(argv[1]);
>> -			assert(zNeedle != 0);
>> -			assert(zHaystack != 0 || nHaystack == 0);
>> -			isText = 0;
>> +	}
>> +	n_haystack_bytes = sql_value_bytes(argv[1]);
>> +	n_needle_bytes = sql_value_bytes(argv[0]);
>> +	if (n_needle_bytes > 0) {
>> +		if (type_haystack == SQL_BLOB) {
>> +			haystack = sql_value_blob(argv[1]);
>> +			needle = sql_value_blob(argv[0]);
>> +			assert(needle != 0);
>> +			assert(haystack != 0 || n_haystack_bytes == 0);
>> +
>> +			while (n_needle_bytes <= n_haystack_bytes
>> +			       && memcmp(haystack, needle, n_needle_bytes) != 0) {
>> +				N++;
>> +				n_haystack_bytes--;
>> +				haystack++;
>> +			}
>> +			if (n_needle_bytes > n_haystack_bytes)
>> +				N = 0;
>> 		} else {
>> -			zHaystack = sql_value_text(argv[0]);
>> -			zNeedle = sql_value_text(argv[1]);
>> -			isText = 1;
>> -			if (zHaystack == 0 || zNeedle == 0)
>> +			/*
>> +			 * Code below handles not only simple
>> +			 * cases like position('a', 'bca'), but
>> +			 * also more complex ones:
>> +			 * position('a', 'bcá' COLLATE "unicode_ci")
>> +			 * To do so, we need to use comparison
>> +			 * window, which has constant character
>> +			 * size, but variable byte size.
>> +			 * Character size is equal to
>> +			 * needle char size.
>> +			 */
>> +			haystack = sql_value_text(argv[1]);
>> +			needle = sql_value_text(argv[0]);
>> +			if (haystack == 0 || needle == 0)
>> 				return;
> 
> How they can be NULL, if this case is filtered in the beginning:
> 
> if (type_haystack == SQL_NULL || type_needle == SQL_NULL)
>       return;
> 
> 
>> -		}
>> -		while (nNeedle <= nHaystack
>> -		       && memcmp(zHaystack, zNeedle, nNeedle) != 0) {
>> -			N++;
>> -			do {
>> -				nHaystack--;
>> -				zHaystack++;
>> -			} while (isText && (zHaystack[0] & 0xc0) == 0x80);
>> -		}
>> -		if (nNeedle > nHaystack)
>> +			struct coll *coll = sqlGetFuncCollSeq(context);
>> +
>> + 			int n_needle_chars =
>> + 				sql_utf8_char_count(needle, n_needle_bytes);
>> +			int n_haystack_chars =
>> +				sql_utf8_char_count(haystack, n_haystack_bytes);
>> +
>> +			if (n_haystack_chars < n_needle_chars) {
>> +				N = 0;
> 
> What’s N?
Position of needle beginning in haystack. I would change the naming.
> 
>> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
>> index 1d8fae5b0..4d059dd7e 100644
>> --- a/src/box/sql/sqlInt.h
>> +++ b/src/box/sql/sqlInt.h
>> @@ -1666,6 +1666,13 @@ struct FuncDestructor {
>> #define SQL_FUNC_SLOCHNG  0x2000	/* "Slow Change". Value constant during a
>> 					 * single query - might change over time
>> 					 */
>> +#define SQL_FUNC_ARG_COLL 0x4000
> 
> ARG_NEED_COLL?

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [tarantool-patches] Re: [PATCH] sql: replace instr() with position()
  2019-03-13 11:56   ` i.koptelov
@ 2019-03-15 17:07     ` n.pettik
  0 siblings, 0 replies; 4+ messages in thread
From: n.pettik @ 2019-03-15 17:07 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Ivan Koptelov

[-- Attachment #1: Type: text/plain, Size: 4002 bytes --]


>> Why these conditions can’t be compatible?
>> What is the difference between SQL_FUNC_NEEDCOLL
>> and new flag? Does NEEDCOLL refer to returning value?
>> Please, comment this part.
> These conditions can’t be compatible because
> SQL_FUNC_NEEDCOLL and SQL_FUNC_ARG_COLL ask for
> different actions. SQL_FUNC_NEEDCOLL asks
> to find first argument with collation in
> the arguments expression list (from left
> to right) and use it inside function
> as a collation of the whole expression.
> SQL_FUNC_ARG_COLL asks to check if
> arguments collations are compatible
> and use the right one if they are.
> I would add comment to the code.
> 
> If we want to establish proper collation
> handling in all functions, we should
> probably use only one flag to do check
> and set the right collation.
> 
> I would merge these flags into one, OK?

Yep.

>>> 					bool unused;
>>> 					uint32_t id;
>>> 					if (sql_expr_coll(pParse,
>>> @@ -4064,6 +4065,37 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
>>> 						return 0;
>>> 				}
>>> 			}
>>> +			if (pDef->funcFlags & SQL_FUNC_ARG_COLL) {
>>> +				assert(nFarg == 2);
>> 
>> Add comment to this assertion or better to the whole branch.
>> It is not obvious at first sight. Why only functions with two args…
>> For instance, there’s MAX function, which may take any number
>> of arguments. Collations are also vital for this function, since
>> they are used to compare strings.
>> 
>> What is more, I suggest to move this part in a separate patch,
>> since position is not the only function that must apply collations
>> on its operands. I don’t insist on dozens of tests on each built-in
>> function, just a few.
> So do you propose to make two patches: first with adding proper
> collation checking/setting in sqlExprCodeTarget() and another
> with adding proper collation usage in functions in func.c?
> Is it OK if I put them in one patch set?

Ok, you can try. Anyway, you can always squash patches.

>> 
>>> +				struct coll *colls[2] = {NULL};
>> 
>> I’d rather use explicit naming:
>> struct coll *lhs_coll = NULL;
>> struct coll *rhs_coll = NULL;
>> 
>> It makes code a bit longer, but on the other hand IMHO
>> makes it less prone to typos.
> But this function sqlExprCodeTarget() is called for expression
> list (of arguments). If we would use it for functions like MAX
> (as we want to) then I think an array should be used. Or just
> two loops to check every pair of arguments collations for
> compatibility.

You don’t need array: just go through arguments from left
to the right and compare on compatibility their collations.

>> 
>>> +				uint32_t colls_ids[2] = {0};
>>> +				bool is_forced[2] = {false};
>>> +				if (sql_expr_coll(pParse, pFarg->a[0].pExpr,
>>> +						  &is_forced[0], &colls_ids[0],
>>> +						  &colls[0]) != 0)
>>> +					return 0;
>>> +				if (sql_expr_coll(pParse, pFarg->a[1].pExpr,
>>> +						  &is_forced[1], &colls_ids[1],
>>> +						  &colls[1]) != 0)
>>> +					return 0;
>>> +
>>> +				uint32_t coll_id;
>>> +				if (collations_check_compatibility(colls_ids[0],
>>> +								   is_forced[0],
>>> +								   colls_ids[1],
>>> +								   is_forced[1],
>>> +								   &coll_id)
>>> +								   != 0) {
>>> +					pParse->rc = SQL_TARANTOOL_ERROR;
>>> +					pParse->nErr++;
>>> +					return 0;
>>> +				}
>>> +
>>> +				coll = (coll_id == colls_ids[0]) ? colls[0] :
>>> +								   colls[1];
>>> +				if (coll == NULL)
>>> +					coll = coll_by_id(COLL_NONE)->coll;
>> 
>> Why do we need this?
> Because if there is no collation at all among
> arguments, then the context would not have any
> collation as well. And sqlGetFuncCollSeq()
> would fail with assertion in this case, because
> it always expects some collations being set in context.

Hm, afair we added “none” collation to avoid such situations.
Will look at sql_expr_coll() and check if we can fix this.


[-- Attachment #2: Type: text/html, Size: 40643 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-03-15 17:07 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-10  7:41 [tarantool-patches] [PATCH] sql: replace instr() with position() Ivan Koptelov
2019-03-12 13:31 ` [tarantool-patches] " n.pettik
2019-03-13 11:56   ` i.koptelov
2019-03-15 17:07     ` n.pettik

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox