[tarantool-patches] Re: [PATCH 2/2] sql: rename instr to position & add collation usage

n.pettik korablev at tarantool.org
Tue Mar 26 15:32:18 MSK 2019


I’ve pushed a couple of code-style fixes. Diff:

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 6654d4998..f9c0be97e 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -212,54 +212,53 @@ absFunc(sql_context * context, int argc, sql_value ** argv)
        }
 }
 
-/*
+/**
  * Implementation of the position() function.
  *
  * position(needle, haystack) finds the first occurrence of needle
- * in haystack and returns the number of previous characters plus 1,
- * or 0 if needle does not occur within haystack.
+ * in haystack and returns the number of previous characters
+ * plus 1, or 0 if needle does not occur within haystack.
  *
- * If both haystack and needle are BLOBs, then the result is one more than
- * the number of bytes in haystack prior to the first occurrence of needle,
- * or 0 if needle never occurs in haystack.
+ * If both haystack and needle are BLOBs, then the result is one
+ * more than the number of bytes in haystack prior to the first
+ * occurrence of needle, or 0 if needle never occurs in haystack.
  */
 static void
-position_func(sql_context * context, int argc, sql_value ** argv)
+position_func(struct sql_context *context, int argc, struct Mem **argv)
 {
        UNUSED_PARAMETER(argc);
-       sql_value *needle = argv[0];
-       sql_value *haystack = argv[1];
-       int type_needle = sql_value_type(needle);
-       int type_haystack = sql_value_type(haystack);
+       struct Mem *needle = argv[0];
+       struct Mem *haystack = argv[1];
+       int needle_type = sql_value_type(needle);
+       int haystack_type = sql_value_type(haystack);
 
-       if (type_haystack == SQL_NULL || type_needle == SQL_NULL)
+       if (haystack_type == SQL_NULL || needle_type == SQL_NULL)
                return;
        /*
         * Position function can be called only with string
         * or blob params.
         */
-       sql_value *inconsistent_type_arg = NULL;
-       if (type_needle != SQL_TEXT && type_needle != SQL_BLOB)
+       struct Mem *inconsistent_type_arg = NULL;
+       if (needle_type != SQL_TEXT && needle_type != SQL_BLOB)
                inconsistent_type_arg = needle;
-       if (type_haystack != SQL_TEXT && type_haystack != SQL_BLOB)
+       if (haystack_type != SQL_TEXT && haystack_type != SQL_BLOB)
                inconsistent_type_arg = haystack;
        if (inconsistent_type_arg != NULL) {
                diag_set(ClientError, ER_INCONSISTENT_TYPES, "TEXT or BLOB",
                         mem_type_to_str(inconsistent_type_arg));
-               context->pVdbe->pParse->is_aborted = true;
-               sql_result_error(context, tarantoolErrorMessage(), -1);
+               context->isError = SQL_TARANTOOL_ERROR;
+               context->fErrorOrAux = 1;
                return;
        }
        /*
         * Both params of Position function must be of the same
         * type.
         */
-       if (type_haystack != type_needle) {
+       if (haystack_type != needle_type) {
                diag_set(ClientError, ER_INCONSISTENT_TYPES,
-                        mem_type_to_str(needle),
-                        mem_type_to_str(haystack));
-               context->pVdbe->pParse->is_aborted = true;
-               sql_result_error(context, tarantoolErrorMessage(), -1);
+                        mem_type_to_str(needle), mem_type_to_str(haystack));
+               context->isError = SQL_TARANTOOL_ERROR;
+               context->fErrorOrAux = 1;
                return;
        }
 
@@ -269,14 +268,18 @@ position_func(sql_context * context, int argc, sql_value ** argv)
        if (n_needle_bytes > 0) {
                const unsigned char *haystack_str;
                const unsigned char *needle_str;
-               if (type_haystack == SQL_BLOB) {
+               if (haystack_type == SQL_BLOB) {
                        needle_str = sql_value_blob(needle);
                        haystack_str = sql_value_blob(haystack);
                        assert(needle_str != NULL);
                        assert(haystack_str != NULL || n_haystack_bytes == 0);
-
-                       while (n_needle_bytes <= n_haystack_bytes
-                              && memcmp(haystack_str, needle_str, n_needle_bytes) != 0) {
+                       /*
+                        * Naive implementation of substring
+                        * searching: matching time O(n * m).
+                        * Can be improved.
+                        */
+                       while (n_needle_bytes <= n_haystack_bytes &&
+                              memcmp(haystack_str, needle_str, n_needle_bytes) != 0) {
                                position++;
                                n_haystack_bytes--;
                                haystack_str++;
@@ -301,7 +304,8 @@ position_func(sql_context * context, int argc, sql_value ** argv)
                        int n_needle_chars =
                                sql_utf8_char_count(needle_str, n_needle_bytes);
                        int n_haystack_chars =
-                               sql_utf8_char_count(haystack_str, n_haystack_bytes);
+                               sql_utf8_char_count(haystack_str,
+                                                   n_haystack_bytes);
 
                        if (n_haystack_chars < n_needle_chars) {
                                position = 0;


> ---
> Before I changed postion() functionality, I refactored it:

I appreciate that you attached this diff, but it would be better if
you placed it in a separate commit (do it next time, now it is ok).

> 
> ---
> src/box/sql/func.c | 68 ++++++++++++++++++++++------------------------
> 1 file changed, 33 insertions(+), 35 deletions(-)
> 
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index 2de6ef5ce..0100bb22f 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> /*
> @@ -1760,7 +1757,8 @@ sqlRegisterBuiltinFunctions(void)
> 			  FIELD_TYPE_STRING),
> 		FUNCTION2(length, 1, 0, 0, lengthFunc, SQL_FUNC_LENGTH,
> 			  FIELD_TYPE_INTEGER),
> -		FUNCTION(instr, 2, 0, 0, instrFunc, FIELD_TYPE_INTEGER),
> +		FUNCTION2(position, 2, 0, 1, position_func, SQL_FUNC_NEEDCOLL,

You don’t need FUNCTION2: SQL_FUNC_NEEDCOL is set by fourth param of FUNCTION.

> +			  FIELD_TYPE_INTEGER),
> 		FUNCTION(printf, -1, 0, 0, printfFunc, FIELD_TYPE_STRING),
> 		FUNCTION(unicode, 1, 0, 0, unicodeFunc, FIELD_TYPE_STRING),
> 		FUNCTION(char, -1, 0, 0, charFunc, FIELD_TYPE_STRING),
> --
> 
> 
> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
> index 2f48d90c6..53926e3f2 100644
> --- a/src/box/sql/expr.c
> +++ b/src/box/sql/expr.c
> @@ -4137,9 +4137,7 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
> 							lhs_id, is_lhs_forced,
> 							rhs_id, is_rhs_forced,
> 							&temp_id) != 0) {
> -							pParse->rc =
> -								SQL_TARANTOOL_ERROR;
> -							pParse->nErr++;
> +							pParse->is_aborted = true;
> 							return 0;
> 						}
> 
> @@ -4152,9 +4150,7 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
> 							curr_id, is_curr_forced,
> 							temp_id, is_temp_forced,
> 							&curr_id) != 0) {
> -							pParse->rc =
> -								SQL_TARANTOOL_ERROR;
> -							pParse->nErr++;
> +							pParse->is_aborted = true;
> 							return 0;
> 						}
> 					}

Remains after rebase, I guess.

> diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
> index c84f22caf..8af4fa359 100644
> --- a/src/box/sql/vdbeInt.h
> +++ b/src/box/sql/vdbeInt.h
> @@ -607,4 +607,13 @@ char *
> sql_vdbe_mem_encode_tuple(struct Mem *fields, uint32_t field_count,
> 			  uint32_t *tuple_size, struct region *region);
> 
> +/**
> + * Simple type to str convertor. It is used to simplify
> + * error reporting.
> + * @param mem Memory field which type we want to get.
> + * @retval String representing name of the type.
> + */
> +char *
> +mem_type_to_str(const struct Mem *p);

Rebase to fresh 2.1: I’ve already moved it to vdbeInt.h

> diff --git a/test/sql-tap/position.test.lua b/test/sql-tap/position.test.lua
> new file mode 100755
> index 000000000..70d11444c
> --- /dev/null
> +++ b/test/sql-tap/position.test.lua
> @@ -0,0 +1,905 @@
> +#!/usr/bin/env tarantool
> +test = require("sqltester")
> +test:plan(80)
> +
> +--!./tcltestrunner.lua
> +-- 2012 October 24
> +--
> +-- The author disclaims copyright to this source code.  In place of
> +-- a legal notice, here is a blessing:
> +--
> +--    May you do good and not evil.
> +--    May you find forgiveness for yourself and forgive others.
> +--    May you share freely, never taking more than you give.
> +--
> +-------------------------------------------------------------------------
> +-- This file implements regression tests for sql library.  The
> +-- focus of this file is testing the built-in POSITION() functions.
> +--
> +-- EVIDENCE-OF: R-27549-59611 The position(X,Y) function finds the first
> +-- occurrence of string X within string Y and returns the number of prior
> +-- characters plus 1, or 0 if X is nowhere found within Y.
> +--
> +-- ["set","testdir",[["file","dirname",["argv0"]]]]
> +-- ["source",[["testdir"],"\/tester.tcl"]]
> +-- Create a table to work with.

Please, remove this annoying header. It’s SQLite’s artefact.

> +test:do_test(
> +    "position-1.45",
> +    function()
> +        return test:execsql "SELECT position('', 'abcdefg');"
> +    end, {
> +        -- <position-1.45>
> +        1
> +        -- </position-1.45>
> +    })
> +
> +-- ["unset","-nocomplain","longstr”]

Remove this comment - it’s an artefact of auto-converting tool.

> +test:do_test(
> +    "position-1.56.3",
> +    function()
> +        return test:execsql "SELECT position(x'a4', x'78c3a4e282ac79');"
> +    end, {
> +        -- <position-1.56.3>
> +        3
> +        -- </position-1.56.3>
> +    })
> +
> +-- EVIDENCE-OF: R-17329-35644 If both arguments X and Y to position(X,Y) are
> +-- non-NULL and are not BLOBs then both are interpreted as strings.

Please, clean-up file from non-related to out SQL implementation comments.

> +--
> +test:do_test(
> +    "position-1.57.1",
> +    function()
> +        return test:catchsql "SELECT position(x'79', 'xä€y');"
> +    end, {
> +        -- <position-1.57.1>
> +        1, "Inconsistent types: expected BLOB got TEXT"
> +        -- </position-1.57.1>
> +    })
> +
> +
> +-- Tests that make use of collations.
> +-- A short remainder of how "unicode" and "unicode_ci" collations
> +-- works:
> +-- unicode_ci: „a“ = „A“ = „á“ = „Á“.
> +-- unicode: all symbols above are pairwise unequal.
> +
> +-- Collation is set in space format.
> +
> +test:do_execsql_test(
> +    "position-1.63",
> +    [[
> +        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
> +        INSERT INTO test1 VALUES('à');
> +        SELECT POSITION('a', s1) FROM test1;
> +        DROP TABLE test1;
> +    ]], {
> +        1
> +    }
> +)
> +
> +test:do_execsql_test(
> +    "position-1.64",
> +    [[
> +        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci”);

Why do you re-create table in each test? It features the same collation.
Also, I’d rather use “unicode” collation: you don’t test case insensitive option.

> +
> +-- Collation is set in space format and also in position() -
> +-- for both arguments. Arguments have different explicit
> +-- collations thus an error is expected.
> +
> +test:do_catchsql_test(
> +    "position-1.75",
> +    [[
> +        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
> +        INSERT INTO test1 VALUES('à');
> +        SELECT POSITION('a' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1;
> +    ]], {
> +        1, "Illegal mix of collations"
> +    }
> +)
> +
> +test:do_catchsql_test(
> +    "position-1.76",
> +    [[
> +        DROP TABLE test1;
> +        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
> +        INSERT INTO test1 VALUES('qwèrty');
> +        SELECT POSITION('er' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1;
> +        DROP TABLE test1;
> +    ]], {
> +        1, "Illegal mix of collations"
> +    }
> +)
> +
> +test:do_catchsql_test(
> +    "position-1.77",
> +    [[
> +        DROP TABLE test1;
> +        CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
> +        INSERT INTO test1 VALUES('qwèrtÿ');
> +        SELECT POSITION('Y' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1;
> +    ]], {
> +        1, "Illegal mix of collations"
> +    }
> +)

Add test cases on: explicitly set incompatible collations,
explicitly set compatible collations.






More information about the Tarantool-patches mailing list