From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 420126FC87; Fri, 1 Oct 2021 19:32:26 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 420126FC87 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1633105946; bh=kbqdrmcwCNfwsgqBc2tR4KdKQoSuankI/3XifwoJoeU=; h=To:Cc:Date:In-Reply-To:References:Subject:List-Id: List-Unsubscribe:List-Archive:List-Post:List-Help:List-Subscribe: From:Reply-To:From; b=UCwuwdQPvxvkOwacm8+K7UGQhBCsxYyFU5bSrq/NxIRQDRuWDChfQoKXvKSIuvKWF 3mRbk/MYbe1ImShu10jQR8/ytymLo47UJK2dUpnpfsEoPGL6eR0ARcT1/HcTVjxyeX coAxPFy70LQOvGfK6WujXnjYkTup1uJsvd4Qj+ek= Received: from smtpng1.i.mail.ru (smtpng1.i.mail.ru [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 8BD6B6FC87 for ; Fri, 1 Oct 2021 19:29:38 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 8BD6B6FC87 Received: by smtpng1.m.smailru.net with esmtpa (envelope-from ) id 1mWLPl-0004Kx-PN; Fri, 01 Oct 2021 19:29:38 +0300 To: v.shpilevoy@tarantool.org Cc: tarantool-patches@dev.tarantool.org Date: Fri, 1 Oct 2021 19:29:37 +0300 Message-Id: X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-4EC0790: 10 X-7564579A: 646B95376F6C166E X-77F55803: 4F1203BC0FB41BD96A58C36AA2E996498302BFE8288A953FFF2204EAAF1B1FDF182A05F53808504008281A9A31EC3069C7A5AF76613C402A4680AD175D24FBDA549832CCB1363052 X-7FA49CB5: FF5795518A3D127A4AD6D5ED66289B5278DA827A17800CE78981306C6E927004EA1F7E6F0F101C67BD4B6F7A4D31EC0BCC500DACC3FED6E28638F802B75D45FF8AA50765F790063743447F216C7C64BDEA1F7E6F0F101C6723150C8DA25C47586E58E00D9D99D84E1BDDB23E98D2D38BBCA57AF85F7723F294E0C15DDC49A3206E0BAE1833C588CACC7F00164DA146DAFE8445B8C89999728AA50765F7900637F6B57BC7E64490618DEB871D839B7333395957E7521B51C2DFABB839C843B9C08941B15DA834481F8AA50765F7900637F6B57BC7E6449061A352F6E88A58FB86F5D81C698A659EA7E827F84554CEF5019E625A9149C048EE9ECD01F8117BC8BEE2021AF6380DFAD18AA50765F790063735872C767BF85DA227C277FBC8AE2E8BDC0F6C5B2EEF3D0C75ECD9A6C639B01B4E70A05D1297E1BBCB5012B2E24CD356 X-C1DE0DAB: C20DE7B7AB408E4181F030C43753B8186998911F362727C414F749A5E30D975CBBB17C150BCA6793D7CD53277FEDFDACF2B68552ABB4B67C9C2B6934AE262D3EE7EAB7254005DCED7532B743992DF240BDC6A1CF3F042BAD6DF99611D93F60EF783E2B6F79C23BED699F904B3F4130E343918A1A30D5E7FCCB5012B2E24CD356 X-C8649E89: 4E36BF7865823D7055A7F0CF078B5EC49A30900B95165D340DAE5B306C240CF5D8E9F551F66AE0D2889F622A35175A3FD9B444B779E4E9041F9F578D844D6FA81D7E09C32AA3244C1255BB7ECC06DB56969C62F8AE0DE3D169B6CAE0477E908D729B2BEF169E0186 X-D57D3AED: 3ZO7eAau8CL7WIMRKs4sN3D3tLDjz0dLbV79QFUyzQ2Ujvy7cMT6pYYqY16iZVKkSc3dCLJ7zSJH7+u4VD18S7Vl4ZUrpaVfd2+vE6kuoey4m4VkSEu530nj6fImhcD4MUrOEAnl0W826KZ9Q+tr5ycPtXkTV4k65bRjmOUUP8cvGozZ33TWg5HZplvhhXbhDGzqmQDTd6OAevLeAnq3Ra9uf7zvY2zzsIhlcp/Y7m53TZgf2aB4JOg4gkr2bioj/2kGGGWRGj6fmCC4nFKTsg== X-Mailru-Sender: 689FA8AB762F7393C37E3C1AEC41BA5D25C28E90379FFD0E01D28AD199A4E65083D72C36FC87018B9F80AB2734326CD2FB559BB5D741EB96352A0ABBE4FDA4210A04DAD6CC59E33667EA787935ED9F1B X-Mras: Ok Subject: [Tarantool-patches] [PATCH v1 6/8] sql: rework POSITION() function X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Mergen Imeev via Tarantool-patches Reply-To: imeevma@tarantool.org Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This patch is a refactoring of POSITION(). In addition, VARBINARY arguments can now be used in this function. Part of #4145 --- src/box/sql/func.c | 201 +++++++++++---------------------- test/sql-tap/position.test.lua | 80 +++++++------ 2 files changed, 110 insertions(+), 171 deletions(-) diff --git a/src/box/sql/func.c b/src/box/sql/func.c index 1d1a8b0cd..415a92738 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -530,6 +530,68 @@ func_trim_str(struct sql_context *ctx, int argc, struct Mem *argv) ctx->is_aborted = true; } +/** Implementation of the POSITION() function. */ +static void +func_position_octets(struct sql_context *ctx, int argc, struct Mem *argv) +{ + assert(argc == 2); + (void)argc; + if (mem_is_null(&argv[0]) || mem_is_null(&argv[1])) + return; + assert(mem_is_bytes(&argv[0]) && mem_is_bytes(&argv[1])); + + const char *key = argv[0].z; + const char *str = argv[1].z; + int key_size = argv[0].n; + int str_size = argv[1].n; + if (key_size <= 0) + return mem_set_uint(ctx->pOut, 1); + /* Matching time O(n * m). */ + for (int i = 0; i <= str_size - key_size; ++i) { + if (memcmp(&str[i], key, key_size) == 0) + return mem_set_uint(ctx->pOut, i + 1); + } + return mem_set_uint(ctx->pOut, 0); +} + +static void +func_position_characters(struct sql_context *ctx, int argc, struct Mem *argv) +{ + assert(argc == 2); + (void)argc; + if (mem_is_null(&argv[0]) || mem_is_null(&argv[1])) + return; + assert(mem_is_str(&argv[0]) && mem_is_str(&argv[1])); + + const char *key = argv[0].z; + const char *str = argv[1].z; + int key_size = argv[0].n; + int str_size = argv[1].n; + if (key_size <= 0) + return mem_set_uint(ctx->pOut, 1); + int key_len = utf8_len_str(key, key_size); + + int start = 0; + int end = 0; + for (int i = 0; i < key_len && end <= str_size; ++i) + end += utf8_len_char(str[end]); + if (end > str_size) + return mem_set_uint(ctx->pOut, 0); + int i = 0; + while (end <= str_size) { + struct coll *coll = ctx->coll; + const char *s = &str[start]; + if (coll->cmp(key, key_size, s, end - start, coll) == 0) + return mem_set_uint(ctx->pOut, i + 1); + start += utf8_len_char(str[start]); + if (end == str_size) + break; + end += utf8_len_char(str[end]); + ++i; + } + return mem_set_uint(ctx->pOut, 0); +} + static const unsigned char * mem_as_ustr(struct Mem *mem) { @@ -693,141 +755,6 @@ lengthFunc(struct sql_context *context, int argc, struct Mem *argv) } } -/** - * Implementation of the position() function. - * - * position(needle, haystack) finds the first occurrence of needle - * in haystack and returns the number of previous characters - * plus 1, or 0 if needle does not occur within haystack. - * - * If both haystack and needle are BLOBs, then the result is one - * more than the number of bytes in haystack prior to the first - * occurrence of needle, or 0 if needle never occurs in haystack. - */ -static void -position_func(struct sql_context *context, int argc, struct Mem *argv) -{ - UNUSED_PARAMETER(argc); - struct Mem *needle = &argv[0]; - struct Mem *haystack = &argv[1]; - enum mp_type needle_type = sql_value_type(needle); - enum mp_type haystack_type = sql_value_type(haystack); - - if (haystack_type == MP_NIL || needle_type == MP_NIL) - return; - /* - * Position function can be called only with string - * or blob params. - */ - struct Mem *inconsistent_type_arg = NULL; - if (needle_type != MP_STR && needle_type != MP_BIN) - inconsistent_type_arg = needle; - if (haystack_type != MP_STR && haystack_type != MP_BIN) - inconsistent_type_arg = haystack; - if (inconsistent_type_arg != NULL) { - diag_set(ClientError, ER_INCONSISTENT_TYPES, - "string or varbinary", mem_str(inconsistent_type_arg)); - context->is_aborted = true; - return; - } - /* - * Both params of Position function must be of the same - * type. - */ - if (haystack_type != needle_type) { - diag_set(ClientError, ER_INCONSISTENT_TYPES, - mem_type_to_str(needle), mem_str(haystack)); - context->is_aborted = true; - return; - } - - int n_needle_bytes = mem_len_unsafe(needle); - int n_haystack_bytes = mem_len_unsafe(haystack); - int position = 1; - if (n_needle_bytes > 0) { - const unsigned char *haystack_str; - const unsigned char *needle_str; - if (haystack_type == MP_BIN) { - needle_str = mem_as_bin(needle); - haystack_str = mem_as_bin(haystack); - assert(needle_str != NULL); - assert(haystack_str != NULL || n_haystack_bytes == 0); - /* - * Naive implementation of substring - * searching: matching time O(n * m). - * Can be improved. - */ - while (n_needle_bytes <= n_haystack_bytes && - memcmp(haystack_str, needle_str, n_needle_bytes) != 0) { - position++; - n_haystack_bytes--; - haystack_str++; - } - if (n_needle_bytes > n_haystack_bytes) - position = 0; - } else { - /* - * Code below handles not only simple - * cases like position('a', 'bca'), but - * also more complex ones: - * position('a', 'bcá' COLLATE "unicode_ci") - * To do so, we need to use comparison - * window, which has constant character - * size, but variable byte size. - * Character size is equal to - * needle char size. - */ - haystack_str = mem_as_ustr(haystack); - needle_str = mem_as_ustr(needle); - - int n_needle_chars = - sql_utf8_char_count(needle_str, n_needle_bytes); - int n_haystack_chars = - sql_utf8_char_count(haystack_str, - n_haystack_bytes); - - if (n_haystack_chars < n_needle_chars) { - position = 0; - goto finish; - } - /* - * Comparison window is determined by - * beg_offset and end_offset. beg_offset - * is offset in bytes from haystack - * beginning to window beginning. - * end_offset is offset in bytes from - * haystack beginning to window end. - */ - int end_offset = 0; - for (int c = 0; c < n_needle_chars; c++) { - SQL_UTF8_FWD_1(haystack_str, end_offset, - n_haystack_bytes); - } - int beg_offset = 0; - struct coll *coll = context->coll; - int c; - for (c = 0; c + n_needle_chars <= n_haystack_chars; c++) { - if (coll->cmp((const char *) haystack_str + beg_offset, - end_offset - beg_offset, - (const char *) needle_str, - n_needle_bytes, coll) == 0) - goto finish; - position++; - /* Update offsets. */ - SQL_UTF8_FWD_1(haystack_str, beg_offset, - n_haystack_bytes); - SQL_UTF8_FWD_1(haystack_str, end_offset, - n_haystack_bytes); - } - /* Needle was not found in the haystack. */ - position = 0; - } - } -finish: - assert(position >= 0); - sql_result_uint(context, position); -} - /* * Implementation of the printf() function. */ @@ -2004,7 +1931,9 @@ static struct sql_func_definition definitions[] = { {"NULLIF", 2, {FIELD_TYPE_ANY, FIELD_TYPE_ANY}, FIELD_TYPE_SCALAR, func_nullif, NULL}, {"POSITION", 2, {FIELD_TYPE_STRING, FIELD_TYPE_STRING}, - FIELD_TYPE_INTEGER, position_func, NULL}, + FIELD_TYPE_INTEGER, func_position_characters, NULL}, + {"POSITION", 2, {FIELD_TYPE_VARBINARY, FIELD_TYPE_VARBINARY}, + FIELD_TYPE_INTEGER, func_position_octets, NULL}, {"PRINTF", -1, {FIELD_TYPE_ANY}, FIELD_TYPE_STRING, printfFunc, NULL}, {"QUOTE", 1, {FIELD_TYPE_ANY}, FIELD_TYPE_STRING, quoteFunc, NULL}, diff --git a/test/sql-tap/position.test.lua b/test/sql-tap/position.test.lua index 6a96ed9bc..5f62c7f54 100755 --- a/test/sql-tap/position.test.lua +++ b/test/sql-tap/position.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool local test = require("sqltester") -test:plan(80) +test:plan(81) test:do_test( "position-1.1", @@ -305,130 +305,130 @@ test:do_test( test:do_test( "position-1.31", function() - return test:catchsql "SELECT position(x'01', x'0102030405');" + return test:execsql "SELECT position(x'01', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 1 -- }) test:do_test( "position-1.32", function() - return test:catchsql "SELECT position(x'02', x'0102030405');" + return test:execsql "SELECT position(x'02', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 2 -- }) test:do_test( "position-1.33", function() - return test:catchsql "SELECT position(x'03', x'0102030405');" + return test:execsql "SELECT position(x'03', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 3 -- }) test:do_test( "position-1.34", function() - return test:catchsql "SELECT position(x'04', x'0102030405');" + return test:execsql "SELECT position(x'04', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 4 -- }) test:do_test( "position-1.35", function() - return test:catchsql "SELECT position(x'05', x'0102030405');" + return test:execsql "SELECT position(x'05', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 5 -- }) test:do_test( "position-1.36", function() - return test:catchsql "SELECT position(x'06', x'0102030405');" + return test:execsql "SELECT position(x'06', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 0 -- }) test:do_test( "position-1.37", function() - return test:catchsql "SELECT position(x'0102030405', x'0102030405');" + return test:execsql "SELECT position(x'0102030405', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 1 -- }) test:do_test( "position-1.38", function() - return test:catchsql "SELECT position(x'02030405', x'0102030405');" + return test:execsql "SELECT position(x'02030405', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 2 -- }) test:do_test( "position-1.39", function() - return test:catchsql "SELECT position(x'030405', x'0102030405');" + return test:execsql "SELECT position(x'030405', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 3 -- }) test:do_test( "position-1.40", function() - return test:catchsql "SELECT position(x'0405', x'0102030405');" + return test:execsql "SELECT position(x'0405', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 4 -- }) test:do_test( "position-1.41", function() - return test:catchsql "SELECT position(x'0506', x'0102030405');" + return test:execsql "SELECT position(x'0506', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 0 -- }) test:do_test( "position-1.42", function() - return test:catchsql "SELECT position(x'', x'0102030405');" + return test:execsql "SELECT position(x'', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 1 -- }) test:do_test( "position-1.43", function() - return test:catchsql "SELECT position(x'', x'');" + return test:execsql "SELECT position(x'', x'');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 1 -- }) @@ -571,40 +571,40 @@ test:do_test( test:do_test( "position-1.56.1", function() - return test:catchsql "SELECT position(x'79', x'78c3a4e282ac79');" + return test:execsql "SELECT position(x'79', x'78c3a4e282ac79');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 7 -- }) test:do_test( "position-1.56.2", function() - return test:catchsql "SELECT position(x'7a', x'78c3a4e282ac79');" + return test:execsql "SELECT position(x'7a', x'78c3a4e282ac79');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 0 -- }) test:do_test( "position-1.56.3", function() - return test:catchsql "SELECT position(x'78', x'78c3a4e282ac79');" + return test:execsql "SELECT position(x'78', x'78c3a4e282ac79');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 1 -- }) test:do_test( "position-1.56.3", function() - return test:catchsql "SELECT position(x'a4', x'78c3a4e282ac79');" + return test:execsql "SELECT position(x'a4', x'78c3a4e282ac79');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 3 -- }) @@ -858,4 +858,14 @@ test:do_catchsql_test( } ) +-- gh-4145: Make sure that POSITION() can wirk with VARBINARY. +test:do_execsql_test( + "position-2", + [[ + SELECT POSITION(x'313233', x'30313231323334353132333435'); + ]], { + 4 + } +) + test:finish_test() -- 2.25.1