From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id D7EE56E1C9; Thu, 11 Nov 2021 13:48:28 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org D7EE56E1C9 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1636627709; bh=H6V8byEB+dqiG+GtteVNgJB7yqD7K/otTlDAjZdFca4=; h=To:Cc:Date:In-Reply-To:References:Subject:List-Id: List-Unsubscribe:List-Archive:List-Post:List-Help:List-Subscribe: From:Reply-To:From; b=rfYqrQ/NH/7GED5O2IMOZ9N7keGTDcY14uVxdzFtUfQEC3LJ0VLWz/nki5zcQOMp+ 02reBXcTzaWjraBOn4gMfX2WCzMW+kEV9cRfKG4kpnO/0fNJ/iNOO614IIU6YJDd1W 6oLvCVAzcaQVfHcKkDnx9zIMbik6y1AV3VG7oNJc= Received: from smtpng1.i.mail.ru (smtpng1.i.mail.ru [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 1012E6E1C9 for ; Thu, 11 Nov 2021 13:45:37 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 1012E6E1C9 Received: by smtpng1.m.smailru.net with esmtpa (envelope-from ) id 1ml7aJ-00033B-Tv; Thu, 11 Nov 2021 13:45:36 +0300 To: kyukhin@tarantool.org Cc: tarantool-patches@dev.tarantool.org Date: Thu, 11 Nov 2021 13:45:35 +0300 Message-Id: <72fce0c2d354058e699fc644ef30fa4ef0d72e56.1636627366.git.imeevma@gmail.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-4EC0790: 10 X-7564579A: B8F34718100C35BD X-77F55803: 4F1203BC0FB41BD9731B3922EC0639792BB520D6BC54996EEEF308B3433E7AC900894C459B0CD1B946425A1D202025EDCFD8BBF07179FF17BD7B9CED87D255675872F0C4753F95D2 X-7FA49CB5: FF5795518A3D127A4AD6D5ED66289B5278DA827A17800CE743AE26858062A689EA1F7E6F0F101C67BD4B6F7A4D31EC0BCC500DACC3FED6E28638F802B75D45FF8AA50765F7900637E8D1333770DC60CDEA1F7E6F0F101C6723150C8DA25C47586E58E00D9D99D84E1BDDB23E98D2D38BBCA57AF85F7723F2174CB66EC70079BE84EDE143163A9254CC7F00164DA146DAFE8445B8C89999728AA50765F7900637F6B57BC7E64490618DEB871D839B7333395957E7521B51C2DFABB839C843B9C08941B15DA834481F8AA50765F7900637F6B57BC7E6449061A352F6E88A58FB86F5D81C698A659EA7E827F84554CEF5019E625A9149C048EE9ECD01F8117BC8BEE2021AF6380DFAD18AA50765F790063735872C767BF85DA227C277FBC8AE2E8BDC0F6C5B2EEF3D0C75ECD9A6C639B01B4E70A05D1297E1BBCB5012B2E24CD356 X-B7AD71C0: AC4F5C86D027EB782CDD5689AFBDA7A213B5FB47DCBC3458834459D11680B505BC806F8490DDC35FCEB4846ADD1ADC6F X-C1DE0DAB: C20DE7B7AB408E4181F030C43753B8186998911F362727C414F749A5E30D975C7E9FEBB9C11794A1CD8C7A55D66F0DD0E4EBBED2973F77829C2B6934AE262D3EE7EAB7254005DCED7532B743992DF240BDC6A1CF3F042BAD6DF99611D93F60EF309DFB797F6729CB699F904B3F4130E343918A1A30D5E7FCCB5012B2E24CD356 X-C8649E89: 4E36BF7865823D7055A7F0CF078B5EC49A30900B95165D34829444FF2D8CB89B9E5B22CED5530517B1BD1E2936D59DA631E67EC1948EF8075648DEF8CDA306B11D7E09C32AA3244C2B2C0B9F2C38C7CC7733FFF9BC028D3360759606DA2E136A729B2BEF169E0186 X-D57D3AED: 3ZO7eAau8CL7WIMRKs4sN3D3tLDjz0dLbV79QFUyzQ2Ujvy7cMT6pYYqY16iZVKkSc3dCLJ7zSJH7+u4VD18S7Vl4ZUrpaVfd2+vE6kuoey4m4VkSEu530nj6fImhcD4MUrOEAnl0W826KZ9Q+tr5ycPtXkTV4k65bRjmOUUP8cvGozZ33TWg5HZplvhhXbhDGzqmQDTd6OAevLeAnq3Ra9uf7zvY2zzsIhlcp/Y7m53TZgf2aB4JOg4gkr2bioj4t8MBgWr8bJhrOyyQPwH7Q== X-Mailru-Sender: 689FA8AB762F7393C37E3C1AEC41BA5D692E5958C0A5236DD353ED039E503FDF83D72C36FC87018B9F80AB2734326CD2FB559BB5D741EB96352A0ABBE4FDA4210A04DAD6CC59E33667EA787935ED9F1B X-Mras: Ok Subject: [Tarantool-patches] [PATCH v1 6/8] sql: rework POSITION() function X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Mergen Imeev via Tarantool-patches Reply-To: imeevma@tarantool.org Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This patch is a refactoring of POSITION(). In addition, VARBINARY arguments can now be used in this function. In addition, POSITION() now uses ICU functions instead of self-created. Part of #4145 --- src/box/sql/func.c | 203 +++++++++++---------------------- test/sql-tap/position.test.lua | 80 +++++++------ 2 files changed, 112 insertions(+), 171 deletions(-) diff --git a/src/box/sql/func.c b/src/box/sql/func.c index ba6b9246d..7914d2ec7 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -509,6 +509,70 @@ func_trim_str(struct sql_context *ctx, int argc, struct Mem *argv) ctx->is_aborted = true; } +/** Implementation of the POSITION() function. */ +static void +func_position_octets(struct sql_context *ctx, int argc, struct Mem *argv) +{ + assert(argc == 2); + (void)argc; + if (mem_is_any_null(&argv[0], &argv[1])) + return; + assert(mem_is_bytes(&argv[0]) && mem_is_bytes(&argv[1])); + + const char *key = argv[0].z; + const char *str = argv[1].z; + int key_size = argv[0].n; + int str_size = argv[1].n; + if (key_size <= 0) + return mem_set_uint(ctx->pOut, 1); + const char *pos = memmem(str, str_size, key, key_size); + return mem_set_uint(ctx->pOut, pos == NULL ? 0 : pos - str + 1); +} + +static void +func_position_characters(struct sql_context *ctx, int argc, struct Mem *argv) +{ + assert(argc == 2); + (void)argc; + if (mem_is_any_null(&argv[0], &argv[1])) + return; + assert(mem_is_str(&argv[0]) && mem_is_str(&argv[1])); + + const char *key = argv[0].z; + const char *str = argv[1].z; + int key_size = argv[0].n; + int str_size = argv[1].n; + if (key_size <= 0) + return mem_set_uint(ctx->pOut, 1); + + int key_end = 0; + int str_end = 0; + while (key_end < key_size && str_end < str_size) { + UChar32 c; + U8_NEXT((uint8_t *)key, key_end, key_size, c); + U8_NEXT((uint8_t *)str, str_end, str_size, c); + } + if (key_end < key_size) + return mem_set_uint(ctx->pOut, 0); + + struct coll *coll = ctx->coll; + if (coll->cmp(key, key_size, str, str_end, coll) == 0) + return mem_set_uint(ctx->pOut, 1); + + int i = 2; + int str_pos = 0; + while (str_end < str_size) { + UChar32 c; + U8_NEXT((uint8_t *)str, str_pos, str_size, c); + U8_NEXT((uint8_t *)str, str_end, str_size, c); + const char *s = str + str_pos; + if (coll->cmp(key, key_size, s, str_end - str_pos, coll) == 0) + return mem_set_uint(ctx->pOut, i); + ++i; + } + return mem_set_uint(ctx->pOut, 0); +} + static const unsigned char * mem_as_ustr(struct Mem *mem) { @@ -672,141 +736,6 @@ lengthFunc(struct sql_context *context, int argc, struct Mem *argv) } } -/** - * Implementation of the position() function. - * - * position(needle, haystack) finds the first occurrence of needle - * in haystack and returns the number of previous characters - * plus 1, or 0 if needle does not occur within haystack. - * - * If both haystack and needle are BLOBs, then the result is one - * more than the number of bytes in haystack prior to the first - * occurrence of needle, or 0 if needle never occurs in haystack. - */ -static void -position_func(struct sql_context *context, int argc, struct Mem *argv) -{ - UNUSED_PARAMETER(argc); - struct Mem *needle = &argv[0]; - struct Mem *haystack = &argv[1]; - enum mp_type needle_type = sql_value_type(needle); - enum mp_type haystack_type = sql_value_type(haystack); - - if (haystack_type == MP_NIL || needle_type == MP_NIL) - return; - /* - * Position function can be called only with string - * or blob params. - */ - struct Mem *inconsistent_type_arg = NULL; - if (needle_type != MP_STR && needle_type != MP_BIN) - inconsistent_type_arg = needle; - if (haystack_type != MP_STR && haystack_type != MP_BIN) - inconsistent_type_arg = haystack; - if (inconsistent_type_arg != NULL) { - diag_set(ClientError, ER_INCONSISTENT_TYPES, - "string or varbinary", mem_str(inconsistent_type_arg)); - context->is_aborted = true; - return; - } - /* - * Both params of Position function must be of the same - * type. - */ - if (haystack_type != needle_type) { - diag_set(ClientError, ER_INCONSISTENT_TYPES, - mem_type_to_str(needle), mem_str(haystack)); - context->is_aborted = true; - return; - } - - int n_needle_bytes = mem_len_unsafe(needle); - int n_haystack_bytes = mem_len_unsafe(haystack); - int position = 1; - if (n_needle_bytes > 0) { - const unsigned char *haystack_str; - const unsigned char *needle_str; - if (haystack_type == MP_BIN) { - needle_str = mem_as_bin(needle); - haystack_str = mem_as_bin(haystack); - assert(needle_str != NULL); - assert(haystack_str != NULL || n_haystack_bytes == 0); - /* - * Naive implementation of substring - * searching: matching time O(n * m). - * Can be improved. - */ - while (n_needle_bytes <= n_haystack_bytes && - memcmp(haystack_str, needle_str, n_needle_bytes) != 0) { - position++; - n_haystack_bytes--; - haystack_str++; - } - if (n_needle_bytes > n_haystack_bytes) - position = 0; - } else { - /* - * Code below handles not only simple - * cases like position('a', 'bca'), but - * also more complex ones: - * position('a', 'bcá' COLLATE "unicode_ci") - * To do so, we need to use comparison - * window, which has constant character - * size, but variable byte size. - * Character size is equal to - * needle char size. - */ - haystack_str = mem_as_ustr(haystack); - needle_str = mem_as_ustr(needle); - - int n_needle_chars = - sql_utf8_char_count(needle_str, n_needle_bytes); - int n_haystack_chars = - sql_utf8_char_count(haystack_str, - n_haystack_bytes); - - if (n_haystack_chars < n_needle_chars) { - position = 0; - goto finish; - } - /* - * Comparison window is determined by - * beg_offset and end_offset. beg_offset - * is offset in bytes from haystack - * beginning to window beginning. - * end_offset is offset in bytes from - * haystack beginning to window end. - */ - int end_offset = 0; - for (int c = 0; c < n_needle_chars; c++) { - SQL_UTF8_FWD_1(haystack_str, end_offset, - n_haystack_bytes); - } - int beg_offset = 0; - struct coll *coll = context->coll; - int c; - for (c = 0; c + n_needle_chars <= n_haystack_chars; c++) { - if (coll->cmp((const char *) haystack_str + beg_offset, - end_offset - beg_offset, - (const char *) needle_str, - n_needle_bytes, coll) == 0) - goto finish; - position++; - /* Update offsets. */ - SQL_UTF8_FWD_1(haystack_str, beg_offset, - n_haystack_bytes); - SQL_UTF8_FWD_1(haystack_str, end_offset, - n_haystack_bytes); - } - /* Needle was not found in the haystack. */ - position = 0; - } - } -finish: - assert(position >= 0); - sql_result_uint(context, position); -} - /* * Implementation of the printf() function. */ @@ -1982,7 +1911,9 @@ static struct sql_func_definition definitions[] = { {"NULLIF", 2, {FIELD_TYPE_ANY, FIELD_TYPE_ANY}, FIELD_TYPE_SCALAR, func_nullif, NULL}, {"POSITION", 2, {FIELD_TYPE_STRING, FIELD_TYPE_STRING}, - FIELD_TYPE_INTEGER, position_func, NULL}, + FIELD_TYPE_INTEGER, func_position_characters, NULL}, + {"POSITION", 2, {FIELD_TYPE_VARBINARY, FIELD_TYPE_VARBINARY}, + FIELD_TYPE_INTEGER, func_position_octets, NULL}, {"PRINTF", -1, {FIELD_TYPE_ANY}, FIELD_TYPE_STRING, printfFunc, NULL}, {"QUOTE", 1, {FIELD_TYPE_ANY}, FIELD_TYPE_STRING, quoteFunc, NULL}, diff --git a/test/sql-tap/position.test.lua b/test/sql-tap/position.test.lua index 6a96ed9bc..e49f4665a 100755 --- a/test/sql-tap/position.test.lua +++ b/test/sql-tap/position.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool local test = require("sqltester") -test:plan(80) +test:plan(81) test:do_test( "position-1.1", @@ -305,130 +305,130 @@ test:do_test( test:do_test( "position-1.31", function() - return test:catchsql "SELECT position(x'01', x'0102030405');" + return test:execsql "SELECT position(x'01', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 1 -- }) test:do_test( "position-1.32", function() - return test:catchsql "SELECT position(x'02', x'0102030405');" + return test:execsql "SELECT position(x'02', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 2 -- }) test:do_test( "position-1.33", function() - return test:catchsql "SELECT position(x'03', x'0102030405');" + return test:execsql "SELECT position(x'03', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 3 -- }) test:do_test( "position-1.34", function() - return test:catchsql "SELECT position(x'04', x'0102030405');" + return test:execsql "SELECT position(x'04', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 4 -- }) test:do_test( "position-1.35", function() - return test:catchsql "SELECT position(x'05', x'0102030405');" + return test:execsql "SELECT position(x'05', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 5 -- }) test:do_test( "position-1.36", function() - return test:catchsql "SELECT position(x'06', x'0102030405');" + return test:execsql "SELECT position(x'06', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 0 -- }) test:do_test( "position-1.37", function() - return test:catchsql "SELECT position(x'0102030405', x'0102030405');" + return test:execsql "SELECT position(x'0102030405', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 1 -- }) test:do_test( "position-1.38", function() - return test:catchsql "SELECT position(x'02030405', x'0102030405');" + return test:execsql "SELECT position(x'02030405', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 2 -- }) test:do_test( "position-1.39", function() - return test:catchsql "SELECT position(x'030405', x'0102030405');" + return test:execsql "SELECT position(x'030405', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 3 -- }) test:do_test( "position-1.40", function() - return test:catchsql "SELECT position(x'0405', x'0102030405');" + return test:execsql "SELECT position(x'0405', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 4 -- }) test:do_test( "position-1.41", function() - return test:catchsql "SELECT position(x'0506', x'0102030405');" + return test:execsql "SELECT position(x'0506', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 0 -- }) test:do_test( "position-1.42", function() - return test:catchsql "SELECT position(x'', x'0102030405');" + return test:execsql "SELECT position(x'', x'0102030405');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 1 -- }) test:do_test( "position-1.43", function() - return test:catchsql "SELECT position(x'', x'');" + return test:execsql "SELECT position(x'', x'');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 1 -- }) @@ -571,40 +571,40 @@ test:do_test( test:do_test( "position-1.56.1", function() - return test:catchsql "SELECT position(x'79', x'78c3a4e282ac79');" + return test:execsql "SELECT position(x'79', x'78c3a4e282ac79');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 7 -- }) test:do_test( "position-1.56.2", function() - return test:catchsql "SELECT position(x'7a', x'78c3a4e282ac79');" + return test:execsql "SELECT position(x'7a', x'78c3a4e282ac79');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 0 -- }) test:do_test( "position-1.56.3", function() - return test:catchsql "SELECT position(x'78', x'78c3a4e282ac79');" + return test:execsql "SELECT position(x'78', x'78c3a4e282ac79');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 1 -- }) test:do_test( "position-1.56.3", function() - return test:catchsql "SELECT position(x'a4', x'78c3a4e282ac79');" + return test:execsql "SELECT position(x'a4', x'78c3a4e282ac79');" end, { -- - 1, "Failed to execute SQL statement: wrong arguments for function POSITION()" + 3 -- }) @@ -858,4 +858,14 @@ test:do_catchsql_test( } ) +-- gh-4145: Make sure POSITION() can work with VARBINARY. +test:do_execsql_test( + "position-2", + [[ + SELECT POSITION(x'313233', x'30313231323334353132333435'); + ]], { + 4 + } +) + test:finish_test() -- 2.25.1