[tarantool-patches] Re: [PATCH 2/2] sql: rename instr to position & add collation usage
n.pettik
korablev at tarantool.org
Tue Mar 26 15:32:18 MSK 2019
I’ve pushed a couple of code-style fixes. Diff:
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 6654d4998..f9c0be97e 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -212,54 +212,53 @@ absFunc(sql_context * context, int argc, sql_value ** argv)
}
}
-/*
+/**
* Implementation of the position() function.
*
* position(needle, haystack) finds the first occurrence of needle
- * in haystack and returns the number of previous characters plus 1,
- * or 0 if needle does not occur within haystack.
+ * in haystack and returns the number of previous characters
+ * plus 1, or 0 if needle does not occur within haystack.
*
- * If both haystack and needle are BLOBs, then the result is one more than
- * the number of bytes in haystack prior to the first occurrence of needle,
- * or 0 if needle never occurs in haystack.
+ * If both haystack and needle are BLOBs, then the result is one
+ * more than the number of bytes in haystack prior to the first
+ * occurrence of needle, or 0 if needle never occurs in haystack.
*/
static void
-position_func(sql_context * context, int argc, sql_value ** argv)
+position_func(struct sql_context *context, int argc, struct Mem **argv)
{
UNUSED_PARAMETER(argc);
- sql_value *needle = argv[0];
- sql_value *haystack = argv[1];
- int type_needle = sql_value_type(needle);
- int type_haystack = sql_value_type(haystack);
+ struct Mem *needle = argv[0];
+ struct Mem *haystack = argv[1];
+ int needle_type = sql_value_type(needle);
+ int haystack_type = sql_value_type(haystack);
- if (type_haystack == SQL_NULL || type_needle == SQL_NULL)
+ if (haystack_type == SQL_NULL || needle_type == SQL_NULL)
return;
/*
* Position function can be called only with string
* or blob params.
*/
- sql_value *inconsistent_type_arg = NULL;
- if (type_needle != SQL_TEXT && type_needle != SQL_BLOB)
+ struct Mem *inconsistent_type_arg = NULL;
+ if (needle_type != SQL_TEXT && needle_type != SQL_BLOB)
inconsistent_type_arg = needle;
- if (type_haystack != SQL_TEXT && type_haystack != SQL_BLOB)
+ if (haystack_type != SQL_TEXT && haystack_type != SQL_BLOB)
inconsistent_type_arg = haystack;
if (inconsistent_type_arg != NULL) {
diag_set(ClientError, ER_INCONSISTENT_TYPES, "TEXT or BLOB",
mem_type_to_str(inconsistent_type_arg));
- context->pVdbe->pParse->is_aborted = true;
- sql_result_error(context, tarantoolErrorMessage(), -1);
+ context->isError = SQL_TARANTOOL_ERROR;
+ context->fErrorOrAux = 1;
return;
}
/*
* Both params of Position function must be of the same
* type.
*/
- if (type_haystack != type_needle) {
+ if (haystack_type != needle_type) {
diag_set(ClientError, ER_INCONSISTENT_TYPES,
- mem_type_to_str(needle),
- mem_type_to_str(haystack));
- context->pVdbe->pParse->is_aborted = true;
- sql_result_error(context, tarantoolErrorMessage(), -1);
+ mem_type_to_str(needle), mem_type_to_str(haystack));
+ context->isError = SQL_TARANTOOL_ERROR;
+ context->fErrorOrAux = 1;
return;
}
@@ -269,14 +268,18 @@ position_func(sql_context * context, int argc, sql_value ** argv)
if (n_needle_bytes > 0) {
const unsigned char *haystack_str;
const unsigned char *needle_str;
- if (type_haystack == SQL_BLOB) {
+ if (haystack_type == SQL_BLOB) {
needle_str = sql_value_blob(needle);
haystack_str = sql_value_blob(haystack);
assert(needle_str != NULL);
assert(haystack_str != NULL || n_haystack_bytes == 0);
-
- while (n_needle_bytes <= n_haystack_bytes
- && memcmp(haystack_str, needle_str, n_needle_bytes) != 0) {
+ /*
+ * Naive implementation of substring
+ * searching: matching time O(n * m).
+ * Can be improved.
+ */
+ while (n_needle_bytes <= n_haystack_bytes &&
+ memcmp(haystack_str, needle_str, n_needle_bytes) != 0) {
position++;
n_haystack_bytes--;
haystack_str++;
@@ -301,7 +304,8 @@ position_func(sql_context * context, int argc, sql_value ** argv)
int n_needle_chars =
sql_utf8_char_count(needle_str, n_needle_bytes);
int n_haystack_chars =
- sql_utf8_char_count(haystack_str, n_haystack_bytes);
+ sql_utf8_char_count(haystack_str,
+ n_haystack_bytes);
if (n_haystack_chars < n_needle_chars) {
position = 0;
> ---
> Before I changed postion() functionality, I refactored it:
I appreciate that you attached this diff, but it would be better if
you placed it in a separate commit (do it next time, now it is ok).
>
> ---
> src/box/sql/func.c | 68 ++++++++++++++++++++++------------------------
> 1 file changed, 33 insertions(+), 35 deletions(-)
>
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index 2de6ef5ce..0100bb22f 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> /*
> @@ -1760,7 +1757,8 @@ sqlRegisterBuiltinFunctions(void)
> FIELD_TYPE_STRING),
> FUNCTION2(length, 1, 0, 0, lengthFunc, SQL_FUNC_LENGTH,
> FIELD_TYPE_INTEGER),
> - FUNCTION(instr, 2, 0, 0, instrFunc, FIELD_TYPE_INTEGER),
> + FUNCTION2(position, 2, 0, 1, position_func, SQL_FUNC_NEEDCOLL,
You don’t need FUNCTION2: SQL_FUNC_NEEDCOL is set by fourth param of FUNCTION.
> + FIELD_TYPE_INTEGER),
> FUNCTION(printf, -1, 0, 0, printfFunc, FIELD_TYPE_STRING),
> FUNCTION(unicode, 1, 0, 0, unicodeFunc, FIELD_TYPE_STRING),
> FUNCTION(char, -1, 0, 0, charFunc, FIELD_TYPE_STRING),
> --
>
>
> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
> index 2f48d90c6..53926e3f2 100644
> --- a/src/box/sql/expr.c
> +++ b/src/box/sql/expr.c
> @@ -4137,9 +4137,7 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
> lhs_id, is_lhs_forced,
> rhs_id, is_rhs_forced,
> &temp_id) != 0) {
> - pParse->rc =
> - SQL_TARANTOOL_ERROR;
> - pParse->nErr++;
> + pParse->is_aborted = true;
> return 0;
> }
>
> @@ -4152,9 +4150,7 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
> curr_id, is_curr_forced,
> temp_id, is_temp_forced,
> &curr_id) != 0) {
> - pParse->rc =
> - SQL_TARANTOOL_ERROR;
> - pParse->nErr++;
> + pParse->is_aborted = true;
> return 0;
> }
> }
Remains after rebase, I guess.
> diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
> index c84f22caf..8af4fa359 100644
> --- a/src/box/sql/vdbeInt.h
> +++ b/src/box/sql/vdbeInt.h
> @@ -607,4 +607,13 @@ char *
> sql_vdbe_mem_encode_tuple(struct Mem *fields, uint32_t field_count,
> uint32_t *tuple_size, struct region *region);
>
> +/**
> + * Simple type to str convertor. It is used to simplify
> + * error reporting.
> + * @param mem Memory field which type we want to get.
> + * @retval String representing name of the type.
> + */
> +char *
> +mem_type_to_str(const struct Mem *p);
Rebase to fresh 2.1: I’ve already moved it to vdbeInt.h
> diff --git a/test/sql-tap/position.test.lua b/test/sql-tap/position.test.lua
> new file mode 100755
> index 000000000..70d11444c
> --- /dev/null
> +++ b/test/sql-tap/position.test.lua
> @@ -0,0 +1,905 @@
> +#!/usr/bin/env tarantool
> +test = require("sqltester")
> +test:plan(80)
> +
> +--!./tcltestrunner.lua
> +-- 2012 October 24
> +--
> +-- The author disclaims copyright to this source code. In place of
> +-- a legal notice, here is a blessing:
> +--
> +-- May you do good and not evil.
> +-- May you find forgiveness for yourself and forgive others.
> +-- May you share freely, never taking more than you give.
> +--
> +-------------------------------------------------------------------------
> +-- This file implements regression tests for sql library. The
> +-- focus of this file is testing the built-in POSITION() functions.
> +--
> +-- EVIDENCE-OF: R-27549-59611 The position(X,Y) function finds the first
> +-- occurrence of string X within string Y and returns the number of prior
> +-- characters plus 1, or 0 if X is nowhere found within Y.
> +--
> +-- ["set","testdir",[["file","dirname",["argv0"]]]]
> +-- ["source",[["testdir"],"\/tester.tcl"]]
> +-- Create a table to work with.
Please, remove this annoying header. It’s SQLite’s artefact.
> +test:do_test(
> + "position-1.45",
> + function()
> + return test:execsql "SELECT position('', 'abcdefg');"
> + end, {
> + -- <position-1.45>
> + 1
> + -- </position-1.45>
> + })
> +
> +-- ["unset","-nocomplain","longstr”]
Remove this comment - it’s an artefact of auto-converting tool.
> +test:do_test(
> + "position-1.56.3",
> + function()
> + return test:execsql "SELECT position(x'a4', x'78c3a4e282ac79');"
> + end, {
> + -- <position-1.56.3>
> + 3
> + -- </position-1.56.3>
> + })
> +
> +-- EVIDENCE-OF: R-17329-35644 If both arguments X and Y to position(X,Y) are
> +-- non-NULL and are not BLOBs then both are interpreted as strings.
Please, clean-up file from non-related to out SQL implementation comments.
> +--
> +test:do_test(
> + "position-1.57.1",
> + function()
> + return test:catchsql "SELECT position(x'79', 'xä€y');"
> + end, {
> + -- <position-1.57.1>
> + 1, "Inconsistent types: expected BLOB got TEXT"
> + -- </position-1.57.1>
> + })
> +
> +
> +-- Tests that make use of collations.
> +-- A short remainder of how "unicode" and "unicode_ci" collations
> +-- works:
> +-- unicode_ci: „a“ = „A“ = „á“ = „Á“.
> +-- unicode: all symbols above are pairwise unequal.
> +
> +-- Collation is set in space format.
> +
> +test:do_execsql_test(
> + "position-1.63",
> + [[
> + CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
> + INSERT INTO test1 VALUES('à');
> + SELECT POSITION('a', s1) FROM test1;
> + DROP TABLE test1;
> + ]], {
> + 1
> + }
> +)
> +
> +test:do_execsql_test(
> + "position-1.64",
> + [[
> + CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci”);
Why do you re-create table in each test? It features the same collation.
Also, I’d rather use “unicode” collation: you don’t test case insensitive option.
> +
> +-- Collation is set in space format and also in position() -
> +-- for both arguments. Arguments have different explicit
> +-- collations thus an error is expected.
> +
> +test:do_catchsql_test(
> + "position-1.75",
> + [[
> + CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
> + INSERT INTO test1 VALUES('à');
> + SELECT POSITION('a' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1;
> + ]], {
> + 1, "Illegal mix of collations"
> + }
> +)
> +
> +test:do_catchsql_test(
> + "position-1.76",
> + [[
> + DROP TABLE test1;
> + CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
> + INSERT INTO test1 VALUES('qwèrty');
> + SELECT POSITION('er' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1;
> + DROP TABLE test1;
> + ]], {
> + 1, "Illegal mix of collations"
> + }
> +)
> +
> +test:do_catchsql_test(
> + "position-1.77",
> + [[
> + DROP TABLE test1;
> + CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci");
> + INSERT INTO test1 VALUES('qwèrtÿ');
> + SELECT POSITION('Y' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1;
> + ]], {
> + 1, "Illegal mix of collations"
> + }
> +)
Add test cases on: explicitly set incompatible collations,
explicitly set compatible collations.
More information about the Tarantool-patches
mailing list