From: "n.pettik" <korablev@tarantool.org> To: tarantool-patches@freelists.org Cc: Ivan Koptelov <ivan.koptelov@tarantool.org> Subject: [tarantool-patches] Re: [PATCH 2/2] sql: rename instr to position & add collation usage Date: Tue, 26 Mar 2019 15:32:18 +0300 [thread overview] Message-ID: <B0C12D96-4683-4084-A5AF-65CA890948CD@tarantool.org> (raw) In-Reply-To: <2f6396ca10b94874467361e482555b225d5256de.1553078164.git.ivan.koptelov@tarantool.org> I’ve pushed a couple of code-style fixes. Diff: diff --git a/src/box/sql/func.c b/src/box/sql/func.c index 6654d4998..f9c0be97e 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -212,54 +212,53 @@ absFunc(sql_context * context, int argc, sql_value ** argv) } } -/* +/** * Implementation of the position() function. * * position(needle, haystack) finds the first occurrence of needle - * in haystack and returns the number of previous characters plus 1, - * or 0 if needle does not occur within haystack. + * in haystack and returns the number of previous characters + * plus 1, or 0 if needle does not occur within haystack. * - * If both haystack and needle are BLOBs, then the result is one more than - * the number of bytes in haystack prior to the first occurrence of needle, - * or 0 if needle never occurs in haystack. + * If both haystack and needle are BLOBs, then the result is one + * more than the number of bytes in haystack prior to the first + * occurrence of needle, or 0 if needle never occurs in haystack. */ static void -position_func(sql_context * context, int argc, sql_value ** argv) +position_func(struct sql_context *context, int argc, struct Mem **argv) { UNUSED_PARAMETER(argc); - sql_value *needle = argv[0]; - sql_value *haystack = argv[1]; - int type_needle = sql_value_type(needle); - int type_haystack = sql_value_type(haystack); + struct Mem *needle = argv[0]; + struct Mem *haystack = argv[1]; + int needle_type = sql_value_type(needle); + int haystack_type = sql_value_type(haystack); - if (type_haystack == SQL_NULL || type_needle == SQL_NULL) + if (haystack_type == SQL_NULL || needle_type == SQL_NULL) return; /* * Position function can be called only with string * or blob params. */ - sql_value *inconsistent_type_arg = NULL; - if (type_needle != SQL_TEXT && type_needle != SQL_BLOB) + struct Mem *inconsistent_type_arg = NULL; + if (needle_type != SQL_TEXT && needle_type != SQL_BLOB) inconsistent_type_arg = needle; - if (type_haystack != SQL_TEXT && type_haystack != SQL_BLOB) + if (haystack_type != SQL_TEXT && haystack_type != SQL_BLOB) inconsistent_type_arg = haystack; if (inconsistent_type_arg != NULL) { diag_set(ClientError, ER_INCONSISTENT_TYPES, "TEXT or BLOB", mem_type_to_str(inconsistent_type_arg)); - context->pVdbe->pParse->is_aborted = true; - sql_result_error(context, tarantoolErrorMessage(), -1); + context->isError = SQL_TARANTOOL_ERROR; + context->fErrorOrAux = 1; return; } /* * Both params of Position function must be of the same * type. */ - if (type_haystack != type_needle) { + if (haystack_type != needle_type) { diag_set(ClientError, ER_INCONSISTENT_TYPES, - mem_type_to_str(needle), - mem_type_to_str(haystack)); - context->pVdbe->pParse->is_aborted = true; - sql_result_error(context, tarantoolErrorMessage(), -1); + mem_type_to_str(needle), mem_type_to_str(haystack)); + context->isError = SQL_TARANTOOL_ERROR; + context->fErrorOrAux = 1; return; } @@ -269,14 +268,18 @@ position_func(sql_context * context, int argc, sql_value ** argv) if (n_needle_bytes > 0) { const unsigned char *haystack_str; const unsigned char *needle_str; - if (type_haystack == SQL_BLOB) { + if (haystack_type == SQL_BLOB) { needle_str = sql_value_blob(needle); haystack_str = sql_value_blob(haystack); assert(needle_str != NULL); assert(haystack_str != NULL || n_haystack_bytes == 0); - - while (n_needle_bytes <= n_haystack_bytes - && memcmp(haystack_str, needle_str, n_needle_bytes) != 0) { + /* + * Naive implementation of substring + * searching: matching time O(n * m). + * Can be improved. + */ + while (n_needle_bytes <= n_haystack_bytes && + memcmp(haystack_str, needle_str, n_needle_bytes) != 0) { position++; n_haystack_bytes--; haystack_str++; @@ -301,7 +304,8 @@ position_func(sql_context * context, int argc, sql_value ** argv) int n_needle_chars = sql_utf8_char_count(needle_str, n_needle_bytes); int n_haystack_chars = - sql_utf8_char_count(haystack_str, n_haystack_bytes); + sql_utf8_char_count(haystack_str, + n_haystack_bytes); if (n_haystack_chars < n_needle_chars) { position = 0; > --- > Before I changed postion() functionality, I refactored it: I appreciate that you attached this diff, but it would be better if you placed it in a separate commit (do it next time, now it is ok). > > --- > src/box/sql/func.c | 68 ++++++++++++++++++++++------------------------ > 1 file changed, 33 insertions(+), 35 deletions(-) > > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > index 2de6ef5ce..0100bb22f 100644 > --- a/src/box/sql/func.c > +++ b/src/box/sql/func.c > /* > @@ -1760,7 +1757,8 @@ sqlRegisterBuiltinFunctions(void) > FIELD_TYPE_STRING), > FUNCTION2(length, 1, 0, 0, lengthFunc, SQL_FUNC_LENGTH, > FIELD_TYPE_INTEGER), > - FUNCTION(instr, 2, 0, 0, instrFunc, FIELD_TYPE_INTEGER), > + FUNCTION2(position, 2, 0, 1, position_func, SQL_FUNC_NEEDCOLL, You don’t need FUNCTION2: SQL_FUNC_NEEDCOL is set by fourth param of FUNCTION. > + FIELD_TYPE_INTEGER), > FUNCTION(printf, -1, 0, 0, printfFunc, FIELD_TYPE_STRING), > FUNCTION(unicode, 1, 0, 0, unicodeFunc, FIELD_TYPE_STRING), > FUNCTION(char, -1, 0, 0, charFunc, FIELD_TYPE_STRING), > -- > > > diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c > index 2f48d90c6..53926e3f2 100644 > --- a/src/box/sql/expr.c > +++ b/src/box/sql/expr.c > @@ -4137,9 +4137,7 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target) > lhs_id, is_lhs_forced, > rhs_id, is_rhs_forced, > &temp_id) != 0) { > - pParse->rc = > - SQL_TARANTOOL_ERROR; > - pParse->nErr++; > + pParse->is_aborted = true; > return 0; > } > > @@ -4152,9 +4150,7 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target) > curr_id, is_curr_forced, > temp_id, is_temp_forced, > &curr_id) != 0) { > - pParse->rc = > - SQL_TARANTOOL_ERROR; > - pParse->nErr++; > + pParse->is_aborted = true; > return 0; > } > } Remains after rebase, I guess. > diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h > index c84f22caf..8af4fa359 100644 > --- a/src/box/sql/vdbeInt.h > +++ b/src/box/sql/vdbeInt.h > @@ -607,4 +607,13 @@ char * > sql_vdbe_mem_encode_tuple(struct Mem *fields, uint32_t field_count, > uint32_t *tuple_size, struct region *region); > > +/** > + * Simple type to str convertor. It is used to simplify > + * error reporting. > + * @param mem Memory field which type we want to get. > + * @retval String representing name of the type. > + */ > +char * > +mem_type_to_str(const struct Mem *p); Rebase to fresh 2.1: I’ve already moved it to vdbeInt.h > diff --git a/test/sql-tap/position.test.lua b/test/sql-tap/position.test.lua > new file mode 100755 > index 000000000..70d11444c > --- /dev/null > +++ b/test/sql-tap/position.test.lua > @@ -0,0 +1,905 @@ > +#!/usr/bin/env tarantool > +test = require("sqltester") > +test:plan(80) > + > +--!./tcltestrunner.lua > +-- 2012 October 24 > +-- > +-- The author disclaims copyright to this source code. In place of > +-- a legal notice, here is a blessing: > +-- > +-- May you do good and not evil. > +-- May you find forgiveness for yourself and forgive others. > +-- May you share freely, never taking more than you give. > +-- > +------------------------------------------------------------------------- > +-- This file implements regression tests for sql library. The > +-- focus of this file is testing the built-in POSITION() functions. > +-- > +-- EVIDENCE-OF: R-27549-59611 The position(X,Y) function finds the first > +-- occurrence of string X within string Y and returns the number of prior > +-- characters plus 1, or 0 if X is nowhere found within Y. > +-- > +-- ["set","testdir",[["file","dirname",["argv0"]]]] > +-- ["source",[["testdir"],"\/tester.tcl"]] > +-- Create a table to work with. Please, remove this annoying header. It’s SQLite’s artefact. > +test:do_test( > + "position-1.45", > + function() > + return test:execsql "SELECT position('', 'abcdefg');" > + end, { > + -- <position-1.45> > + 1 > + -- </position-1.45> > + }) > + > +-- ["unset","-nocomplain","longstr”] Remove this comment - it’s an artefact of auto-converting tool. > +test:do_test( > + "position-1.56.3", > + function() > + return test:execsql "SELECT position(x'a4', x'78c3a4e282ac79');" > + end, { > + -- <position-1.56.3> > + 3 > + -- </position-1.56.3> > + }) > + > +-- EVIDENCE-OF: R-17329-35644 If both arguments X and Y to position(X,Y) are > +-- non-NULL and are not BLOBs then both are interpreted as strings. Please, clean-up file from non-related to out SQL implementation comments. > +-- > +test:do_test( > + "position-1.57.1", > + function() > + return test:catchsql "SELECT position(x'79', 'xä€y');" > + end, { > + -- <position-1.57.1> > + 1, "Inconsistent types: expected BLOB got TEXT" > + -- </position-1.57.1> > + }) > + > + > +-- Tests that make use of collations. > +-- A short remainder of how "unicode" and "unicode_ci" collations > +-- works: > +-- unicode_ci: „a“ = „A“ = „á“ = „Á“. > +-- unicode: all symbols above are pairwise unequal. > + > +-- Collation is set in space format. > + > +test:do_execsql_test( > + "position-1.63", > + [[ > + CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci"); > + INSERT INTO test1 VALUES('à'); > + SELECT POSITION('a', s1) FROM test1; > + DROP TABLE test1; > + ]], { > + 1 > + } > +) > + > +test:do_execsql_test( > + "position-1.64", > + [[ > + CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci”); Why do you re-create table in each test? It features the same collation. Also, I’d rather use “unicode” collation: you don’t test case insensitive option. > + > +-- Collation is set in space format and also in position() - > +-- for both arguments. Arguments have different explicit > +-- collations thus an error is expected. > + > +test:do_catchsql_test( > + "position-1.75", > + [[ > + CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci"); > + INSERT INTO test1 VALUES('à'); > + SELECT POSITION('a' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1; > + ]], { > + 1, "Illegal mix of collations" > + } > +) > + > +test:do_catchsql_test( > + "position-1.76", > + [[ > + DROP TABLE test1; > + CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci"); > + INSERT INTO test1 VALUES('qwèrty'); > + SELECT POSITION('er' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1; > + DROP TABLE test1; > + ]], { > + 1, "Illegal mix of collations" > + } > +) > + > +test:do_catchsql_test( > + "position-1.77", > + [[ > + DROP TABLE test1; > + CREATE TABLE test1 (s1 VARCHAR(5) PRIMARY KEY COLLATE "unicode_ci"); > + INSERT INTO test1 VALUES('qwèrtÿ'); > + SELECT POSITION('Y' COLLATE "unicode_ci", s1 COLLATE "unicode") FROM test1; > + ]], { > + 1, "Illegal mix of collations" > + } > +) Add test cases on: explicitly set incompatible collations, explicitly set compatible collations.
next prev parent reply other threads:[~2019-03-26 12:32 UTC|newest] Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-03-20 11:11 [tarantool-patches] [PATCH 0/2] sql: add better coll. determination & position func Ivan Koptelov 2019-03-20 11:11 ` [tarantool-patches] [PATCH 1/2] sql: add better collation determination in functions Ivan Koptelov 2019-03-25 19:26 ` [tarantool-patches] " n.pettik 2019-03-27 13:38 ` i.koptelov 2019-03-28 12:50 ` n.pettik 2019-03-28 14:08 ` i.koptelov 2019-03-29 9:57 ` n.pettik 2019-03-20 11:11 ` [tarantool-patches] [PATCH 2/2] sql: rename instr to position & add collation usage Ivan Koptelov 2019-03-20 12:59 ` [tarantool-patches] Re: [PATCH 1/2] " i.koptelov 2019-03-26 12:32 ` n.pettik [this message] 2019-03-27 13:39 ` [tarantool-patches] Re: [PATCH 2/2] " i.koptelov 2019-03-28 12:57 ` n.pettik 2019-04-01 14:15 ` [tarantool-patches] Re: [PATCH 0/2] sql: add better coll. determination & position func Kirill Yukhin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=B0C12D96-4683-4084-A5AF-65CA890948CD@tarantool.org \ --to=korablev@tarantool.org \ --cc=ivan.koptelov@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='[tarantool-patches] Re: [PATCH 2/2] sql: rename instr to position & add collation usage' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox