* [Tarantool-patches] [PATCH v1 5/8] sql: rework TRIM() function
2021-11-11 10:45 [Tarantool-patches] [PATCH v1 0/8] Rework standard function Mergen Imeev via Tarantool-patches
` (3 preceding siblings ...)
2021-11-11 10:45 ` [Tarantool-patches] [PATCH v1 4/8] sql: refactor NULLIF() function Mergen Imeev via Tarantool-patches
@ 2021-11-11 10:45 ` Mergen Imeev via Tarantool-patches
2021-11-11 10:45 ` [Tarantool-patches] [PATCH v1 6/8] sql: rework POSITION() function Mergen Imeev via Tarantool-patches
` (3 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Mergen Imeev via Tarantool-patches @ 2021-11-11 10:45 UTC (permalink / raw)
To: kyukhin; +Cc: tarantool-patches
This patch refactoring TRIM() and fixes an issue with incorrect trimming
of some VARBINARY values. Also, TRIM() now use ICU functions instead of
self-created.
Part of #4415
---
src/box/sql/func.c | 361 ++++++++++++++++------------------
test/sql-tap/badutf1.test.lua | 41 ++--
2 files changed, 198 insertions(+), 204 deletions(-)
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 7d54a39cd..ba6b9246d 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -343,6 +343,172 @@ func_nullif(struct sql_context *ctx, int argc, struct Mem *argv)
ctx->is_aborted = true;
}
+/** Implementation of the TRIM() function. */
+static inline int
+trim_bin_end(const char *str, int end, const char *octets, int octets_size,
+ int flags)
+{
+ if ((flags & TRIM_TRAILING) == 0)
+ return end;
+ while (end > 0) {
+ bool is_trimmed = false;
+ char c = str[end - 1];
+ for (int i = 0; i < octets_size && !is_trimmed; ++i)
+ is_trimmed = c == octets[i];
+ if (!is_trimmed)
+ break;
+ --end;
+ }
+ return end;
+}
+
+static inline int
+trim_bin_start(const char *str, int end, const char *octets, int octets_size,
+ int flags)
+{
+ if ((flags & TRIM_LEADING) == 0)
+ return 0;
+ int start = 0;
+ while (start < end) {
+ bool is_trimmed = false;
+ char c = str[start];
+ for (int i = 0; i < octets_size && !is_trimmed; ++i)
+ is_trimmed = c == octets[i];
+ if (!is_trimmed)
+ break;
+ ++start;
+ }
+ return start;
+}
+
+static void
+func_trim_bin(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+ if (mem_is_null(&argv[0]) || (argc == 3 && mem_is_null(&argv[2])))
+ return;
+ assert(argc == 2 || (argc == 3 && mem_is_bin(&argv[2])));
+ assert(mem_is_bin(&argv[0]) && mem_is_uint(&argv[1]));
+ const char *str = argv[0].z;
+ int size = argv[0].n;
+ const char *octets;
+ int octets_size;
+ if (argc == 3) {
+ octets = argv[2].z;
+ octets_size = argv[2].n;
+ } else {
+ octets = "\0";
+ octets_size = 1;
+ }
+
+ int flags = argv[1].u.u;
+ int end = trim_bin_end(str, size, octets, octets_size, flags);
+ int start = trim_bin_start(str, end, octets, octets_size, flags);
+
+ if (start >= end)
+ return mem_set_bin_static(ctx->pOut, "", 0);
+ if (mem_copy_bin(ctx->pOut, &str[start], end - start) != 0)
+ ctx->is_aborted = true;
+}
+
+static inline int
+trim_str_end(const char *str, int end, const char *chars, uint8_t *chars_len,
+ int chars_count, int flags)
+{
+ if ((flags & TRIM_TRAILING) == 0)
+ return end;
+ while (end > 0) {
+ bool is_trimmed = false;
+ const char *c = chars;
+ int len;
+ for (int i = 0; i < chars_count && !is_trimmed; ++i) {
+ len = chars_len[i];
+ const char *s = str + end - len;
+ is_trimmed = len <= end && memcmp(c, s, len) == 0;
+ c += len;
+ }
+ if (!is_trimmed)
+ break;
+ assert(len > 0);
+ end -= len;
+ }
+ return end;
+}
+
+static inline int
+trim_str_start(const char *str, int end, const char *chars, uint8_t *chars_len,
+ int chars_count, int flags)
+{
+ if ((flags & TRIM_LEADING) == 0)
+ return 0;
+ int start = 0;
+ while (start < end) {
+ bool is_trimmed = false;
+ const char *c = chars;
+ int len;
+ for (int i = 0; i < chars_count && !is_trimmed; ++i) {
+ len = chars_len[i];
+ const char *s = str + start;
+ is_trimmed = start + len <= end &&
+ memcmp(c, s, len) == 0;
+ c += len;
+ }
+ if (!is_trimmed)
+ break;
+ assert(len > 0);
+ start += len;
+ }
+ return start;
+}
+
+static void
+func_trim_str(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+ if (mem_is_null(&argv[0]) || (argc == 3 && mem_is_null(&argv[2])))
+ return;
+ assert(argc == 2 || (argc == 3 && mem_is_str(&argv[2])));
+ assert(mem_is_str(&argv[0]) && mem_is_uint(&argv[1]));
+ const char *str = argv[0].z;
+ int size = argv[0].n;
+ const char *chars;
+ int chars_size;
+ if (argc == 3) {
+ chars = argv[2].z;
+ chars_size = argv[2].n;
+ } else {
+ chars = " ";
+ chars_size = 1;
+ }
+
+ struct region *region = &fiber()->gc;
+ size_t svp = region_used(region);
+ uint8_t *chars_len = region_alloc(region, chars_size);
+ if (chars_len == NULL) {
+ ctx->is_aborted = true;
+ diag_set(OutOfMemory, chars_size, "region_alloc", "chars_len");
+ return;
+ }
+ int chars_count = 0;
+
+ int offset = 0;
+ while (offset < chars_size) {
+ UChar32 c;
+ int prev = offset;
+ U8_NEXT((uint8_t *)chars, offset, chars_size, c);
+ chars_len[chars_count++] = offset - prev;
+ }
+
+ uint64_t flags = argv[1].u.u;
+ int end = trim_str_end(str, size, chars, chars_len, chars_count, flags);
+ int start = trim_str_start(str, end, chars, chars_len, chars_count,
+ flags);
+ region_truncate(region, svp);
+
+ if (start >= end)
+ return mem_set_str0_static(ctx->pOut, "");
+ if (mem_copy_str(ctx->pOut, &str[start], end - start) != 0)
+ ctx->is_aborted = true;
+}
+
static const unsigned char *
mem_as_ustr(struct Mem *mem)
{
@@ -1527,193 +1693,6 @@ replaceFunc(struct sql_context *context, int argc, struct Mem *argv)
mem_set_bin_dynamic(context->pOut, (char *)zOut, j);
}
-/**
- * Remove characters included in @a trim_set from @a input_str
- * until encounter a character that doesn't belong to @a trim_set.
- * Remove from the side specified by @a flags.
- * @param context SQL context.
- * @param flags Trim specification: left, right or both.
- * @param trim_set The set of characters for trimming.
- * @param char_len Lengths of each UTF-8 character in @a trim_set.
- * @param char_cnt A number of UTF-8 characters in @a trim_set.
- * @param input_str Input string for trimming.
- * @param input_str_sz Input string size in bytes.
- */
-static void
-trim_procedure(struct sql_context *context, enum trim_side_mask flags,
- const unsigned char *trim_set, const uint8_t *char_len,
- int char_cnt, const unsigned char *input_str, int input_str_sz)
-{
- if (char_cnt == 0)
- goto finish;
- int i, len;
- const unsigned char *z;
- if ((flags & TRIM_LEADING) != 0) {
- while (input_str_sz > 0) {
- z = trim_set;
- for (i = 0; i < char_cnt; ++i, z += len) {
- len = char_len[i];
- if (len <= input_str_sz
- && memcmp(input_str, z, len) == 0)
- break;
- }
- if (i >= char_cnt)
- break;
- input_str += len;
- input_str_sz -= len;
- }
- }
- if ((flags & TRIM_TRAILING) != 0) {
- while (input_str_sz > 0) {
- z = trim_set;
- for (i = 0; i < char_cnt; ++i, z += len) {
- len = char_len[i];
- if (len <= input_str_sz
- && memcmp(&input_str[input_str_sz - len],
- z, len) == 0)
- break;
- }
- if (i >= char_cnt)
- break;
- input_str_sz -= len;
- }
- }
-finish:
- if (context->func->def->returns == FIELD_TYPE_STRING)
- mem_copy_str(context->pOut, (char *)input_str, input_str_sz);
- else
- mem_copy_bin(context->pOut, (char *)input_str, input_str_sz);
-}
-
-/**
- * Prepare arguments for trimming procedure. Allocate memory for
- * @a char_len (array of lengths each character in @a trim_set)
- * and fill it.
- *
- * @param context SQL context.
- * @param trim_set The set of characters for trimming.
- * @param[out] char_len Lengths of each character in @ trim_set.
- * @retval >=0 A number of UTF-8 characters in @a trim_set.
- * @retval -1 Memory allocation error.
- */
-static int
-trim_prepare_char_len(struct sql_context *context,
- const unsigned char *trim_set, int trim_set_sz,
- uint8_t **char_len)
-{
- /*
- * Count the number of UTF-8 characters passing through
- * the entire char set, but not up to the '\0' or X'00'
- * character. This allows to handle trimming set
- * containing such characters.
- */
- int char_cnt = sql_utf8_char_count(trim_set, trim_set_sz);
- if (char_cnt == 0) {
- *char_len = NULL;
- return 0;
- }
-
- if ((*char_len = (uint8_t *)contextMalloc(context, char_cnt)) == NULL)
- return -1;
-
- int i = 0, j = 0;
- while(j < char_cnt) {
- int old_i = i;
- SQL_UTF8_FWD_1(trim_set, i, trim_set_sz);
- (*char_len)[j++] = i - old_i;
- }
-
- return char_cnt;
-}
-
-/**
- * Normalize args from @a argv input array when it has two args.
- *
- * Case: TRIM(<str>)
- * Call trimming procedure with TRIM_BOTH as the flags and " " as
- * the trimming set.
- *
- * Case: TRIM(LEADING/TRAILING/BOTH FROM <str>)
- * If user has specified side keyword only, then call trimming
- * procedure with the specified side and " " as the trimming set.
- */
-static void
-trim_func_two_args(struct sql_context *context, sql_value *arg1,
- sql_value *arg2)
-{
- const unsigned char *trim_set;
- if (mem_is_bin(arg1))
- trim_set = (const unsigned char *)"\0";
- else
- trim_set = (const unsigned char *)" ";
- const unsigned char *input_str;
- if ((input_str = mem_as_ustr(arg1)) == NULL)
- return;
-
- int input_str_sz = mem_len_unsafe(arg1);
- assert(arg2->type == MEM_TYPE_UINT);
- uint8_t len_one = 1;
- trim_procedure(context, arg2->u.u, trim_set,
- &len_one, 1, input_str, input_str_sz);
-}
-
-/**
- * Normalize args from @a argv input array when it has three args.
- *
- * Case: TRIM(<character_set> FROM <str>)
- * If user has specified <character_set> only, call trimming procedure with
- * TRIM_BOTH as the flags and that trimming set.
- *
- * Case: TRIM(LEADING/TRAILING/BOTH <character_set> FROM <str>)
- * If user has specified side keyword and <character_set>, then
- * call trimming procedure with that args.
- */
-static void
-trim_func_three_args(struct sql_context *context, sql_value *arg1,
- sql_value *arg2, sql_value *arg3)
-{
- assert(arg2->type == MEM_TYPE_UINT);
- const unsigned char *input_str, *trim_set;
- if ((input_str = mem_as_ustr(arg1)) == NULL ||
- (trim_set = mem_as_ustr(arg3)) == NULL)
- return;
-
- int trim_set_sz = mem_len_unsafe(arg3);
- int input_str_sz = mem_len_unsafe(arg1);
- uint8_t *char_len;
- int char_cnt = trim_prepare_char_len(context, trim_set, trim_set_sz,
- &char_len);
- if (char_cnt == -1)
- return;
- trim_procedure(context, arg2->u.u, trim_set, char_len,
- char_cnt, input_str, input_str_sz);
- sql_free(char_len);
-}
-
-/**
- * Normalize args from @a argv input array when it has one,
- * two or three args.
- *
- * This is a dispatcher function that calls corresponding
- * implementation depending on the number of arguments.
-*/
-static void
-trim_func(struct sql_context *context, int argc, struct Mem *argv)
-{
- switch (argc) {
- case 2:
- trim_func_two_args(context, &argv[0], &argv[1]);
- break;
- case 3:
- trim_func_three_args(context, &argv[0], &argv[1], &argv[2]);
- break;
- default:
- diag_set(ClientError, ER_FUNC_WRONG_ARG_COUNT, "TRIM",
- "2 or 3", argc);
- context->is_aborted = true;
- }
-}
-
/*
* Compute the soundex encoding of a word.
*
@@ -2040,14 +2019,14 @@ static struct sql_func_definition definitions[] = {
fin_total},
{"TRIM", 2, {FIELD_TYPE_STRING, FIELD_TYPE_INTEGER},
- FIELD_TYPE_STRING, trim_func, NULL},
+ FIELD_TYPE_STRING, func_trim_str, NULL},
{"TRIM", 3, {FIELD_TYPE_STRING, FIELD_TYPE_INTEGER, FIELD_TYPE_STRING},
- FIELD_TYPE_STRING, trim_func, NULL},
+ FIELD_TYPE_STRING, func_trim_str, NULL},
{"TRIM", 2, {FIELD_TYPE_VARBINARY, FIELD_TYPE_INTEGER},
- FIELD_TYPE_VARBINARY, trim_func, NULL},
+ FIELD_TYPE_VARBINARY, func_trim_bin, NULL},
{"TRIM", 3,
{FIELD_TYPE_VARBINARY, FIELD_TYPE_INTEGER, FIELD_TYPE_VARBINARY},
- FIELD_TYPE_VARBINARY, trim_func, NULL},
+ FIELD_TYPE_VARBINARY, func_trim_bin, NULL},
{"TYPEOF", 1, {FIELD_TYPE_ANY}, FIELD_TYPE_STRING, typeofFunc, NULL},
{"UNICODE", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER, unicodeFunc,
diff --git a/test/sql-tap/badutf1.test.lua b/test/sql-tap/badutf1.test.lua
index ce8354840..d1e17ca3e 100755
--- a/test/sql-tap/badutf1.test.lua
+++ b/test/sql-tap/badutf1.test.lua
@@ -1,6 +1,6 @@
#!/usr/bin/env tarantool
local test = require("sqltester")
-test:plan(19)
+test:plan(20)
--!./tcltestrunner.lua
-- 2007 May 15
@@ -296,47 +296,62 @@ test:do_test(
test:do_test(
"badutf-4.4",
function()
- return test:execsql2([[SELECT hex(CAST(TRIM(x'ff80' FROM ]]..
- [[x'808080f0808080ff') AS VARBINARY)) AS x]])
+ return test:execsql2([[
+ SELECT hex(TRIM(x'ff80' FROM x'808080f0808080ff')) AS x;
+ ]])
end, {
-- <badutf-4.4>
- "X", "808080F0808080FF"
+ "X", "F0"
-- </badutf-4.4>
})
test:do_test(
"badutf-4.5",
function()
- return test:execsql2([[SELECT hex(CAST(TRIM(x'ff80' FROM ]]..
- [[x'ff8080f0808080ff') AS VARBINARY)) AS x]])
+ return test:execsql2([[
+ SELECT hex(TRIM(x'ff80' FROM x'ff8080f0808080ff')) AS x;
+ ]])
end, {
-- <badutf-4.5>
- "X", "80F0808080FF"
+ "X", "F0"
-- </badutf-4.5>
})
test:do_test(
"badutf-4.6",
function()
- return test:execsql2([[SELECT hex(CAST(TRIM(x'ff80' FROM ]]..
- [[x'ff80f0808080ff') AS VARBINARY)) AS x]])
+ return test:execsql2([[
+ SELECT hex(TRIM(x'ff80' FROM x'ff80f0808080ff')) AS x;
+ ]])
end, {
-- <badutf-4.6>
- "X", "F0808080FF"
+ "X", "F0"
-- </badutf-4.6>
})
test:do_test(
"badutf-4.7",
function()
- return test:execsql2([[SELECT hex(CAST(TRIM(x'ff8080' FROM ]]..
- [[x'ff80f0808080ff') AS VARBINARY)) AS x]])
+ return test:execsql2([[
+ SELECT hex(TRIM(x'ff8080' FROM x'ff80f0808080ff')) AS x;
+ ]])
end, {
-- <badutf-4.7>
- "X", "FF80F0808080FF"
+ "X", "F0"
-- </badutf-4.7>
})
+-- gh-4145: Make sure that TRIM() properly work with VARBINARY.
+test:do_execsql_test(
+ "badutf-5",
+ [[
+ SELECT HEX(TRIM(x'ff1234' from x'1234125678123412'));
+ ]],
+ {
+ '5678'
+ }
+)
+
--db2("close")
--
2.25.1
^ permalink raw reply [flat|nested] 16+ messages in thread
* [Tarantool-patches] [PATCH v1 6/8] sql: rework POSITION() function
2021-11-11 10:45 [Tarantool-patches] [PATCH v1 0/8] Rework standard function Mergen Imeev via Tarantool-patches
` (4 preceding siblings ...)
2021-11-11 10:45 ` [Tarantool-patches] [PATCH v1 5/8] sql: rework TRIM() function Mergen Imeev via Tarantool-patches
@ 2021-11-11 10:45 ` Mergen Imeev via Tarantool-patches
2021-11-11 10:45 ` [Tarantool-patches] [PATCH v1 7/8] sql: rework SUBSTR() function Mergen Imeev via Tarantool-patches
` (2 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Mergen Imeev via Tarantool-patches @ 2021-11-11 10:45 UTC (permalink / raw)
To: kyukhin; +Cc: tarantool-patches
This patch is a refactoring of POSITION(). In addition, VARBINARY
arguments can now be used in this function. In addition, POSITION() now
uses ICU functions instead of self-created.
Part of #4145
---
src/box/sql/func.c | 203 +++++++++++----------------------
test/sql-tap/position.test.lua | 80 +++++++------
2 files changed, 112 insertions(+), 171 deletions(-)
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index ba6b9246d..7914d2ec7 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -509,6 +509,70 @@ func_trim_str(struct sql_context *ctx, int argc, struct Mem *argv)
ctx->is_aborted = true;
}
+/** Implementation of the POSITION() function. */
+static void
+func_position_octets(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+ assert(argc == 2);
+ (void)argc;
+ if (mem_is_any_null(&argv[0], &argv[1]))
+ return;
+ assert(mem_is_bytes(&argv[0]) && mem_is_bytes(&argv[1]));
+
+ const char *key = argv[0].z;
+ const char *str = argv[1].z;
+ int key_size = argv[0].n;
+ int str_size = argv[1].n;
+ if (key_size <= 0)
+ return mem_set_uint(ctx->pOut, 1);
+ const char *pos = memmem(str, str_size, key, key_size);
+ return mem_set_uint(ctx->pOut, pos == NULL ? 0 : pos - str + 1);
+}
+
+static void
+func_position_characters(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+ assert(argc == 2);
+ (void)argc;
+ if (mem_is_any_null(&argv[0], &argv[1]))
+ return;
+ assert(mem_is_str(&argv[0]) && mem_is_str(&argv[1]));
+
+ const char *key = argv[0].z;
+ const char *str = argv[1].z;
+ int key_size = argv[0].n;
+ int str_size = argv[1].n;
+ if (key_size <= 0)
+ return mem_set_uint(ctx->pOut, 1);
+
+ int key_end = 0;
+ int str_end = 0;
+ while (key_end < key_size && str_end < str_size) {
+ UChar32 c;
+ U8_NEXT((uint8_t *)key, key_end, key_size, c);
+ U8_NEXT((uint8_t *)str, str_end, str_size, c);
+ }
+ if (key_end < key_size)
+ return mem_set_uint(ctx->pOut, 0);
+
+ struct coll *coll = ctx->coll;
+ if (coll->cmp(key, key_size, str, str_end, coll) == 0)
+ return mem_set_uint(ctx->pOut, 1);
+
+ int i = 2;
+ int str_pos = 0;
+ while (str_end < str_size) {
+ UChar32 c;
+ U8_NEXT((uint8_t *)str, str_pos, str_size, c);
+ U8_NEXT((uint8_t *)str, str_end, str_size, c);
+ const char *s = str + str_pos;
+ if (coll->cmp(key, key_size, s, str_end - str_pos, coll) == 0)
+ return mem_set_uint(ctx->pOut, i);
+ ++i;
+ }
+ return mem_set_uint(ctx->pOut, 0);
+}
+
static const unsigned char *
mem_as_ustr(struct Mem *mem)
{
@@ -672,141 +736,6 @@ lengthFunc(struct sql_context *context, int argc, struct Mem *argv)
}
}
-/**
- * Implementation of the position() function.
- *
- * position(needle, haystack) finds the first occurrence of needle
- * in haystack and returns the number of previous characters
- * plus 1, or 0 if needle does not occur within haystack.
- *
- * If both haystack and needle are BLOBs, then the result is one
- * more than the number of bytes in haystack prior to the first
- * occurrence of needle, or 0 if needle never occurs in haystack.
- */
-static void
-position_func(struct sql_context *context, int argc, struct Mem *argv)
-{
- UNUSED_PARAMETER(argc);
- struct Mem *needle = &argv[0];
- struct Mem *haystack = &argv[1];
- enum mp_type needle_type = sql_value_type(needle);
- enum mp_type haystack_type = sql_value_type(haystack);
-
- if (haystack_type == MP_NIL || needle_type == MP_NIL)
- return;
- /*
- * Position function can be called only with string
- * or blob params.
- */
- struct Mem *inconsistent_type_arg = NULL;
- if (needle_type != MP_STR && needle_type != MP_BIN)
- inconsistent_type_arg = needle;
- if (haystack_type != MP_STR && haystack_type != MP_BIN)
- inconsistent_type_arg = haystack;
- if (inconsistent_type_arg != NULL) {
- diag_set(ClientError, ER_INCONSISTENT_TYPES,
- "string or varbinary", mem_str(inconsistent_type_arg));
- context->is_aborted = true;
- return;
- }
- /*
- * Both params of Position function must be of the same
- * type.
- */
- if (haystack_type != needle_type) {
- diag_set(ClientError, ER_INCONSISTENT_TYPES,
- mem_type_to_str(needle), mem_str(haystack));
- context->is_aborted = true;
- return;
- }
-
- int n_needle_bytes = mem_len_unsafe(needle);
- int n_haystack_bytes = mem_len_unsafe(haystack);
- int position = 1;
- if (n_needle_bytes > 0) {
- const unsigned char *haystack_str;
- const unsigned char *needle_str;
- if (haystack_type == MP_BIN) {
- needle_str = mem_as_bin(needle);
- haystack_str = mem_as_bin(haystack);
- assert(needle_str != NULL);
- assert(haystack_str != NULL || n_haystack_bytes == 0);
- /*
- * Naive implementation of substring
- * searching: matching time O(n * m).
- * Can be improved.
- */
- while (n_needle_bytes <= n_haystack_bytes &&
- memcmp(haystack_str, needle_str, n_needle_bytes) != 0) {
- position++;
- n_haystack_bytes--;
- haystack_str++;
- }
- if (n_needle_bytes > n_haystack_bytes)
- position = 0;
- } else {
- /*
- * Code below handles not only simple
- * cases like position('a', 'bca'), but
- * also more complex ones:
- * position('a', 'bcá' COLLATE "unicode_ci")
- * To do so, we need to use comparison
- * window, which has constant character
- * size, but variable byte size.
- * Character size is equal to
- * needle char size.
- */
- haystack_str = mem_as_ustr(haystack);
- needle_str = mem_as_ustr(needle);
-
- int n_needle_chars =
- sql_utf8_char_count(needle_str, n_needle_bytes);
- int n_haystack_chars =
- sql_utf8_char_count(haystack_str,
- n_haystack_bytes);
-
- if (n_haystack_chars < n_needle_chars) {
- position = 0;
- goto finish;
- }
- /*
- * Comparison window is determined by
- * beg_offset and end_offset. beg_offset
- * is offset in bytes from haystack
- * beginning to window beginning.
- * end_offset is offset in bytes from
- * haystack beginning to window end.
- */
- int end_offset = 0;
- for (int c = 0; c < n_needle_chars; c++) {
- SQL_UTF8_FWD_1(haystack_str, end_offset,
- n_haystack_bytes);
- }
- int beg_offset = 0;
- struct coll *coll = context->coll;
- int c;
- for (c = 0; c + n_needle_chars <= n_haystack_chars; c++) {
- if (coll->cmp((const char *) haystack_str + beg_offset,
- end_offset - beg_offset,
- (const char *) needle_str,
- n_needle_bytes, coll) == 0)
- goto finish;
- position++;
- /* Update offsets. */
- SQL_UTF8_FWD_1(haystack_str, beg_offset,
- n_haystack_bytes);
- SQL_UTF8_FWD_1(haystack_str, end_offset,
- n_haystack_bytes);
- }
- /* Needle was not found in the haystack. */
- position = 0;
- }
- }
-finish:
- assert(position >= 0);
- sql_result_uint(context, position);
-}
-
/*
* Implementation of the printf() function.
*/
@@ -1982,7 +1911,9 @@ static struct sql_func_definition definitions[] = {
{"NULLIF", 2, {FIELD_TYPE_ANY, FIELD_TYPE_ANY}, FIELD_TYPE_SCALAR,
func_nullif, NULL},
{"POSITION", 2, {FIELD_TYPE_STRING, FIELD_TYPE_STRING},
- FIELD_TYPE_INTEGER, position_func, NULL},
+ FIELD_TYPE_INTEGER, func_position_characters, NULL},
+ {"POSITION", 2, {FIELD_TYPE_VARBINARY, FIELD_TYPE_VARBINARY},
+ FIELD_TYPE_INTEGER, func_position_octets, NULL},
{"PRINTF", -1, {FIELD_TYPE_ANY}, FIELD_TYPE_STRING, printfFunc,
NULL},
{"QUOTE", 1, {FIELD_TYPE_ANY}, FIELD_TYPE_STRING, quoteFunc, NULL},
diff --git a/test/sql-tap/position.test.lua b/test/sql-tap/position.test.lua
index 6a96ed9bc..e49f4665a 100755
--- a/test/sql-tap/position.test.lua
+++ b/test/sql-tap/position.test.lua
@@ -1,6 +1,6 @@
#!/usr/bin/env tarantool
local test = require("sqltester")
-test:plan(80)
+test:plan(81)
test:do_test(
"position-1.1",
@@ -305,130 +305,130 @@ test:do_test(
test:do_test(
"position-1.31",
function()
- return test:catchsql "SELECT position(x'01', x'0102030405');"
+ return test:execsql "SELECT position(x'01', x'0102030405');"
end, {
-- <position-1.31>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 1
-- </position-1.31>
})
test:do_test(
"position-1.32",
function()
- return test:catchsql "SELECT position(x'02', x'0102030405');"
+ return test:execsql "SELECT position(x'02', x'0102030405');"
end, {
-- <position-1.32>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 2
-- </position-1.32>
})
test:do_test(
"position-1.33",
function()
- return test:catchsql "SELECT position(x'03', x'0102030405');"
+ return test:execsql "SELECT position(x'03', x'0102030405');"
end, {
-- <position-1.33>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 3
-- </position-1.33>
})
test:do_test(
"position-1.34",
function()
- return test:catchsql "SELECT position(x'04', x'0102030405');"
+ return test:execsql "SELECT position(x'04', x'0102030405');"
end, {
-- <position-1.34>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 4
-- </position-1.34>
})
test:do_test(
"position-1.35",
function()
- return test:catchsql "SELECT position(x'05', x'0102030405');"
+ return test:execsql "SELECT position(x'05', x'0102030405');"
end, {
-- <position-1.35>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 5
-- </position-1.35>
})
test:do_test(
"position-1.36",
function()
- return test:catchsql "SELECT position(x'06', x'0102030405');"
+ return test:execsql "SELECT position(x'06', x'0102030405');"
end, {
-- <position-1.36>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 0
-- </position-1.36>
})
test:do_test(
"position-1.37",
function()
- return test:catchsql "SELECT position(x'0102030405', x'0102030405');"
+ return test:execsql "SELECT position(x'0102030405', x'0102030405');"
end, {
-- <position-1.37>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 1
-- </position-1.37>
})
test:do_test(
"position-1.38",
function()
- return test:catchsql "SELECT position(x'02030405', x'0102030405');"
+ return test:execsql "SELECT position(x'02030405', x'0102030405');"
end, {
-- <position-1.38>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 2
-- </position-1.38>
})
test:do_test(
"position-1.39",
function()
- return test:catchsql "SELECT position(x'030405', x'0102030405');"
+ return test:execsql "SELECT position(x'030405', x'0102030405');"
end, {
-- <position-1.39>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 3
-- </position-1.39>
})
test:do_test(
"position-1.40",
function()
- return test:catchsql "SELECT position(x'0405', x'0102030405');"
+ return test:execsql "SELECT position(x'0405', x'0102030405');"
end, {
-- <position-1.40>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 4
-- </position-1.40>
})
test:do_test(
"position-1.41",
function()
- return test:catchsql "SELECT position(x'0506', x'0102030405');"
+ return test:execsql "SELECT position(x'0506', x'0102030405');"
end, {
-- <position-1.41>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 0
-- </position-1.41>
})
test:do_test(
"position-1.42",
function()
- return test:catchsql "SELECT position(x'', x'0102030405');"
+ return test:execsql "SELECT position(x'', x'0102030405');"
end, {
-- <position-1.42>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 1
-- </position-1.42>
})
test:do_test(
"position-1.43",
function()
- return test:catchsql "SELECT position(x'', x'');"
+ return test:execsql "SELECT position(x'', x'');"
end, {
-- <position-1.43>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 1
-- </position-1.43>
})
@@ -571,40 +571,40 @@ test:do_test(
test:do_test(
"position-1.56.1",
function()
- return test:catchsql "SELECT position(x'79', x'78c3a4e282ac79');"
+ return test:execsql "SELECT position(x'79', x'78c3a4e282ac79');"
end, {
-- <position-1.56.1>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 7
-- </position-1.56.1>
})
test:do_test(
"position-1.56.2",
function()
- return test:catchsql "SELECT position(x'7a', x'78c3a4e282ac79');"
+ return test:execsql "SELECT position(x'7a', x'78c3a4e282ac79');"
end, {
-- <position-1.56.2>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 0
-- </position-1.56.2>
})
test:do_test(
"position-1.56.3",
function()
- return test:catchsql "SELECT position(x'78', x'78c3a4e282ac79');"
+ return test:execsql "SELECT position(x'78', x'78c3a4e282ac79');"
end, {
-- <position-1.56.3>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 1
-- </position-1.56.3>
})
test:do_test(
"position-1.56.3",
function()
- return test:catchsql "SELECT position(x'a4', x'78c3a4e282ac79');"
+ return test:execsql "SELECT position(x'a4', x'78c3a4e282ac79');"
end, {
-- <position-1.56.3>
- 1, "Failed to execute SQL statement: wrong arguments for function POSITION()"
+ 3
-- </position-1.56.3>
})
@@ -858,4 +858,14 @@ test:do_catchsql_test(
}
)
+-- gh-4145: Make sure POSITION() can work with VARBINARY.
+test:do_execsql_test(
+ "position-2",
+ [[
+ SELECT POSITION(x'313233', x'30313231323334353132333435');
+ ]], {
+ 4
+ }
+)
+
test:finish_test()
--
2.25.1
^ permalink raw reply [flat|nested] 16+ messages in thread
* [Tarantool-patches] [PATCH v1 7/8] sql: rework SUBSTR() function
2021-11-11 10:45 [Tarantool-patches] [PATCH v1 0/8] Rework standard function Mergen Imeev via Tarantool-patches
` (5 preceding siblings ...)
2021-11-11 10:45 ` [Tarantool-patches] [PATCH v1 6/8] sql: rework POSITION() function Mergen Imeev via Tarantool-patches
@ 2021-11-11 10:45 ` Mergen Imeev via Tarantool-patches
2021-11-11 10:45 ` [Tarantool-patches] [PATCH v1 8/8] sql: refactor LIKE() function Mergen Imeev via Tarantool-patches
2021-11-11 10:59 ` [Tarantool-patches] [PATCH v1 0/8] Rework standard function Kirill Yukhin via Tarantool-patches
8 siblings, 0 replies; 16+ messages in thread
From: Mergen Imeev via Tarantool-patches @ 2021-11-11 10:45 UTC (permalink / raw)
To: kyukhin; +Cc: tarantool-patches
This patch makes SUBSTR() work according to ANSI rules for SUBSTRING()
function. Also, SUBSTR() can now work correctly with large INTEGER
values. The SUBSTR() syntax has not changed.
Part of #4145
@TarantoolBot document
Title: SUBSTR() function
SUBSTR() now works according to the ANSI rules for SUBSTRING().
Rules for SUBSTR() with 2 arguments:
1) let the first argument be VALUE, and the second argument be START;
2) VALUE should be STRING or VARBINARY, START should be INTEGER;
3) if any of arguments is NULL, NULL is returned;
4) let POS be MAX(START - 1, 0), END be length of the VALUE;
5) if POS >= END, the result is empty string;
6) if POS < END, the result will be substring of VALUE, starting from
the position POS to the position END.
Rules for SUBSTR() with 3 arguments:
1) let the first argument be VALUE, the second argument be START, and
the third argument be LENGTH;
2) VALUE should be STRING or VARBINARY, START and LENGTH should be
INTEGERs;
3) if any of arguments is NULL, NULL is returned;
4) if LENGTH < 0, an error is thrown;
5) let POS be MAX(START - 1, 0), END be START + LENGTH - 1;
6) if POS >= END, the result is empty string;
7) if POS < END, the result will be substring of VALUE, starting from
the position POS to the position END.
---
src/box/sql/func.c | 273 ++++++++++++++++++++---------------
test/sql-tap/func.test.lua | 16 +-
test/sql-tap/func2.test.lua | 150 +++++++++----------
test/sql-tap/substr.test.lua | 121 ++++++++++++----
4 files changed, 339 insertions(+), 221 deletions(-)
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 7914d2ec7..7e71a757b 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -573,6 +573,161 @@ func_position_characters(struct sql_context *ctx, int argc, struct Mem *argv)
return mem_set_uint(ctx->pOut, 0);
}
+/** Implementation of the SUBSTR() function. */
+int
+substr_normalize(int64_t base_start, bool is_start_neg, uint64_t base_length,
+ uint64_t *start, uint64_t *length)
+{
+ if (!is_start_neg && base_start > 0) {
+ *start = (uint64_t)base_start - 1;
+ *length = base_length;
+ return 0;
+ }
+ *start = 0;
+ if (base_length == 0) {
+ *length = 0;
+ return 0;
+ }
+ /*
+ * We are subtracting 1 from base_length instead of subtracting from
+ * base_start, since base_start can be INT64_MIN. At the same time,
+ * base_length is not less than 1.
+ */
+ int64_t a = base_start;
+ int64_t b = (int64_t)(base_length - 1);
+ int64_t res;
+ bool is_neg;
+ /*
+ * Integer cannot overflow since non-positive value is added to positive
+ * value.
+ */
+ if (sql_add_int(a, a != 0, b, false, &res, &is_neg) != 0) {
+ diag_set(ClientError, ER_SQL_EXECUTE, "integer is overflowed");
+ return -1;
+ }
+ *length = is_neg ? 0 : (uint64_t)res;
+ return 0;
+}
+
+static void
+func_substr_octets(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+ assert(argc == 2 || argc == 3);
+ if (mem_is_any_null(&argv[0], &argv[1]))
+ return;
+ assert(mem_is_bytes(&argv[0]) && mem_is_int(&argv[1]));
+
+ bool is_str = mem_is_str(&argv[0]);
+ uint64_t size = argv[0].n;
+
+ if (argc == 2) {
+ uint64_t start = mem_is_uint(&argv[1]) && argv[1].u.u > 1 ?
+ argv[1].u.u - 1 : 0;
+ if (start >= size) {
+ if (is_str)
+ return mem_set_str0_static(ctx->pOut, "");
+ else
+ return mem_set_bin_static(ctx->pOut, "", 0);
+ }
+ char *s = &argv[0].z[start];
+ uint64_t n = size - start;
+ ctx->is_aborted = is_str ? mem_copy_str(ctx->pOut, s, n) != 0 :
+ mem_copy_bin(ctx->pOut, s, n) != 0;
+ return;
+ }
+
+ assert(argc == 3);
+ if (mem_is_null(&argv[2]))
+ return;
+ assert(mem_is_int(&argv[2]));
+ if (!mem_is_uint(&argv[2])) {
+ diag_set(ClientError, ER_SQL_EXECUTE, "Length of the result "
+ "cannot be less than 0");
+ ctx->is_aborted = true;
+ return;
+ }
+ uint64_t start;
+ uint64_t length;
+ if (substr_normalize(argv[1].u.i, !mem_is_uint(&argv[1]), argv[2].u.u,
+ &start, &length) != 0) {
+ ctx->is_aborted = true;
+ return;
+ }
+ if (start >= size || length == 0) {
+ if (is_str)
+ return mem_set_str0_static(ctx->pOut, "");
+ else
+ return mem_set_bin_static(ctx->pOut, "", 0);
+ }
+ char *str = &argv[0].z[start];
+ uint64_t len = MIN(size - start, length);
+ ctx->is_aborted = is_str ? mem_copy_str(ctx->pOut, str, len) != 0 :
+ mem_copy_bin(ctx->pOut, str, len) != 0;
+}
+
+static void
+func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+ assert(argc == 2 || argc == 3);
+ (void)argc;
+ if (mem_is_any_null(&argv[0], &argv[1]))
+ return;
+ assert(mem_is_str(&argv[0]) && mem_is_int(&argv[1]));
+
+ const char *str = argv[0].z;
+ int pos = 0;
+ int end = argv[0].n;
+ if (argc == 2) {
+ uint64_t start = mem_is_uint(&argv[1]) && argv[1].u.u > 1 ?
+ argv[1].u.u - 1 : 0;
+ for (uint64_t i = 0; i < start && pos < end; ++i) {
+ UChar32 c;
+ U8_NEXT((uint8_t *)str, pos, end, c);
+ }
+ if (pos == end)
+ return mem_set_str_static(ctx->pOut, "", 0);
+ if (mem_copy_str(ctx->pOut, str + pos, end - pos) != 0)
+ ctx->is_aborted = true;
+ return;
+ }
+
+ assert(argc == 3);
+ if (mem_is_null(&argv[2]))
+ return;
+ assert(mem_is_int(&argv[2]));
+ if (!mem_is_uint(&argv[2])) {
+ diag_set(ClientError, ER_SQL_EXECUTE, "Length of the result "
+ "cannot be less than 0");
+ ctx->is_aborted = true;
+ return;
+ }
+ uint64_t start;
+ uint64_t length;
+ if (substr_normalize(argv[1].u.i, !mem_is_uint(&argv[1]), argv[2].u.u,
+ &start, &length) != 0) {
+ ctx->is_aborted = true;
+ return;
+ }
+ if (length == 0)
+ return mem_set_str_static(ctx->pOut, "", 0);
+
+ for (uint64_t i = 0; i < start && pos < end; ++i) {
+ UChar32 c;
+ U8_NEXT((uint8_t *)str, pos, end, c);
+ }
+ if (pos == end)
+ return mem_set_str_static(ctx->pOut, "", 0);
+
+ int cur = pos;
+ for (uint64_t i = 0; i < length && cur < end; ++i) {
+ UChar32 c;
+ U8_NEXT((uint8_t *)str, cur, end, c);
+ }
+ assert(cur > pos);
+ if (mem_copy_str(ctx->pOut, str + pos, cur - pos) != 0)
+ ctx->is_aborted = true;
+}
+
static const unsigned char *
mem_as_ustr(struct Mem *mem)
{
@@ -770,116 +925,6 @@ printfFunc(struct sql_context *context, int argc, struct Mem *argv)
}
}
-/*
- * Implementation of the substr() function.
- *
- * substr(x,p1,p2) returns p2 characters of x[] beginning with p1.
- * p1 is 1-indexed. So substr(x,1,1) returns the first character
- * of x. If x is text, then we actually count UTF-8 characters.
- * If x is a blob, then we count bytes.
- *
- * If p1 is negative, then we begin abs(p1) from the end of x[].
- *
- * If p2 is negative, return the p2 characters preceding p1.
- */
-static void
-substrFunc(struct sql_context *context, int argc, struct Mem *argv)
-{
- const unsigned char *z;
- const unsigned char *z2;
- int len;
- int p0type;
- int64_t p1, p2;
- int negP2 = 0;
-
- if (argc != 2 && argc != 3) {
- diag_set(ClientError, ER_FUNC_WRONG_ARG_COUNT, "SUBSTR",
- "1 or 2", argc);
- context->is_aborted = true;
- return;
- }
- if (mem_is_null(&argv[1]) || (argc == 3 && mem_is_null(&argv[2])))
- return;
- p0type = sql_value_type(&argv[0]);
- p1 = mem_get_int_unsafe(&argv[1]);
- if (p0type == MP_BIN) {
- z = mem_as_bin(&argv[0]);
- len = mem_len_unsafe(&argv[0]);
- if (z == 0)
- return;
- assert(len == mem_len_unsafe(&argv[0]));
- } else {
- z = mem_as_ustr(&argv[0]);
- if (z == 0)
- return;
- len = 0;
- if (p1 < 0)
- len = sql_utf8_char_count(z, mem_len_unsafe(&argv[0]));
- }
- if (argc == 3) {
- p2 = mem_get_int_unsafe(&argv[2]);
- if (p2 < 0) {
- p2 = -p2;
- negP2 = 1;
- }
- } else {
- p2 = sql_context_db_handle(context)->
- aLimit[SQL_LIMIT_LENGTH];
- }
- if (p1 < 0) {
- p1 += len;
- if (p1 < 0) {
- p2 += p1;
- if (p2 < 0)
- p2 = 0;
- p1 = 0;
- }
- } else if (p1 > 0) {
- p1--;
- } else if (p2 > 0) {
- p2--;
- }
- if (negP2) {
- p1 -= p2;
- if (p1 < 0) {
- p2 += p1;
- p1 = 0;
- }
- }
- assert(p1 >= 0 && p2 >= 0);
- if (p0type != MP_BIN) {
- /*
- * In the code below 'cnt' and 'n_chars' is
- * used because '\0' is not supposed to be
- * end-of-string symbol.
- */
- int byte_size = mem_len_unsafe(&argv[0]);
- int n_chars = sql_utf8_char_count(z, byte_size);
- int cnt = 0;
- int i = 0;
- while (cnt < n_chars && p1) {
- SQL_UTF8_FWD_1(z, i, byte_size);
- cnt++;
- p1--;
- }
- z += i;
- i = 0;
- for (z2 = z; cnt < n_chars && p2; p2--) {
- SQL_UTF8_FWD_1(z2, i, byte_size);
- cnt++;
- }
- z2 += i;
- mem_copy_str(context->pOut, (char *)z, z2 - z);
- } else {
- if (p1 + p2 > len) {
- p2 = len - p1;
- if (p2 < 0)
- p2 = 0;
- }
- mem_copy_bin(context->pOut, (char *)&z[p1], p2);
- }
-}
-
/*
* Implementation of the round() function
*/
@@ -1933,15 +1978,15 @@ static struct sql_func_definition definitions[] = {
{"SOUNDEX", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_STRING, soundexFunc,
NULL},
{"SUBSTR", 2, {FIELD_TYPE_STRING, FIELD_TYPE_INTEGER},
- FIELD_TYPE_STRING, substrFunc, NULL},
+ FIELD_TYPE_STRING, func_substr_characters, NULL},
{"SUBSTR", 3,
{FIELD_TYPE_STRING, FIELD_TYPE_INTEGER, FIELD_TYPE_INTEGER},
- FIELD_TYPE_STRING, substrFunc, NULL},
+ FIELD_TYPE_STRING, func_substr_characters, NULL},
{"SUBSTR", 2, {FIELD_TYPE_VARBINARY, FIELD_TYPE_INTEGER},
- FIELD_TYPE_VARBINARY, substrFunc, NULL},
+ FIELD_TYPE_VARBINARY, func_substr_octets, NULL},
{"SUBSTR", 3,
{FIELD_TYPE_VARBINARY, FIELD_TYPE_INTEGER, FIELD_TYPE_INTEGER},
- FIELD_TYPE_VARBINARY, substrFunc, NULL},
+ FIELD_TYPE_VARBINARY, func_substr_octets, NULL},
{"SUM", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_INTEGER, step_sum, NULL},
{"SUM", 1, {FIELD_TYPE_DOUBLE}, FIELD_TYPE_DOUBLE, step_sum, NULL},
{"TOTAL", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_DOUBLE, step_total,
diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
index 29e1c3de2..b52d5bd4c 100755
--- a/test/sql-tap/func.test.lua
+++ b/test/sql-tap/func.test.lua
@@ -139,7 +139,7 @@ test:do_execsql_test(
SELECT substr(t1,-1,1) FROM tbl1 ORDER BY t1
]], {
-- <func-2.3>
- "e", "s", "m", "e", "s"
+ "", "", "", "", ""
-- </func-2.3>
})
@@ -149,7 +149,7 @@ test:do_execsql_test(
SELECT substr(t1,-1,2) FROM tbl1 ORDER BY t1
]], {
-- <func-2.4>
- "e", "s", "m", "e", "s"
+ "", "", "", "", ""
-- </func-2.4>
})
@@ -159,7 +159,7 @@ test:do_execsql_test(
SELECT substr(t1,-2,1) FROM tbl1 ORDER BY t1
]], {
-- <func-2.5>
- "e", "i", "a", "r", "i"
+ "", "", "", "", ""
-- </func-2.5>
})
@@ -169,7 +169,7 @@ test:do_execsql_test(
SELECT substr(t1,-2,2) FROM tbl1 ORDER BY t1
]], {
-- <func-2.6>
- "ee", "is", "am", "re", "is"
+ "", "", "", "", ""
-- </func-2.6>
})
@@ -179,7 +179,7 @@ test:do_execsql_test(
SELECT substr(t1,-4,2) FROM tbl1 ORDER BY t1
]], {
-- <func-2.7>
- "fr", "", "gr", "wa", "th"
+ "", "", "", "", ""
-- </func-2.7>
})
@@ -288,7 +288,7 @@ if ("ሴ" ~= "u1234") then
SELECT substr(t1,-1,1) FROM tbl1 ORDER BY t1
]], {
-- <func-3.8>
- "8", "s", "s", "o"
+ "", "", "", ""
-- </func-3.8>
})
@@ -298,7 +298,7 @@ if ("ሴ" ~= "u1234") then
SELECT substr(t1,-3,2) FROM tbl1 ORDER BY t1
]], {
-- <func-3.9>
- "F-", "er", "in", "ሴh"
+ "", "", "", ""
-- </func-3.9>
})
@@ -308,7 +308,7 @@ if ("ሴ" ~= "u1234") then
SELECT substr(t1,-4,3) FROM tbl1 ORDER BY t1
]], {
-- <func-3.10>
- "TF-", "ter", "ain", "iሴh"
+ "", "", "", ""
-- </func-3.10>
})
diff --git a/test/sql-tap/func2.test.lua b/test/sql-tap/func2.test.lua
index 792f020f1..b786b4d96 100755
--- a/test/sql-tap/func2.test.lua
+++ b/test/sql-tap/func2.test.lua
@@ -162,7 +162,7 @@ test:do_execsql_test(
SELECT SUBSTR('Supercalifragilisticexpialidocious', -1)
]], {
-- <func2-1.11>
- "s"
+ "Supercalifragilisticexpialidocious"
-- </func2-1.11>
})
@@ -172,7 +172,7 @@ test:do_execsql_test(
SELECT SUBSTR('Supercalifragilisticexpialidocious', -2)
]], {
-- <func2-1.12>
- "us"
+ "Supercalifragilisticexpialidocious"
-- </func2-1.12>
})
@@ -182,7 +182,7 @@ test:do_execsql_test(
SELECT SUBSTR('Supercalifragilisticexpialidocious', -30)
]], {
-- <func2-1.13>
- "rcalifragilisticexpialidocious"
+ "Supercalifragilisticexpialidocious"
-- </func2-1.13>
})
@@ -344,7 +344,7 @@ test:do_execsql_test(
SELECT SUBSTR('Supercalifragilisticexpialidocious', -1, 1)
]], {
-- <func2-1.25.1>
- "s"
+ ""
-- </func2-1.25.1>
})
@@ -354,7 +354,7 @@ test:do_execsql_test(
SELECT SUBSTR('Supercalifragilisticexpialidocious', -1, 2)
]], {
-- <func2-1.25.2>
- "s"
+ ""
-- </func2-1.25.2>
})
@@ -364,7 +364,7 @@ test:do_execsql_test(
SELECT SUBSTR('Supercalifragilisticexpialidocious', -2, 1)
]], {
-- <func2-1.26>
- "u"
+ ""
-- </func2-1.26>
})
@@ -374,7 +374,7 @@ test:do_execsql_test(
SELECT SUBSTR('Supercalifragilisticexpialidocious', -30, 1)
]], {
-- <func2-1.27>
- "r"
+ ""
-- </func2-1.27>
})
@@ -394,7 +394,7 @@ test:do_execsql_test(
SELECT SUBSTR('Supercalifragilisticexpialidocious', -34, 1)
]], {
-- <func2-1.28.1>
- "S"
+ ""
-- </func2-1.28.1>
})
@@ -404,7 +404,7 @@ test:do_execsql_test(
SELECT SUBSTR('Supercalifragilisticexpialidocious', -34, 2)
]], {
-- <func2-1.28.2>
- "Su"
+ ""
-- </func2-1.28.2>
})
@@ -424,7 +424,7 @@ test:do_execsql_test(
SELECT SUBSTR('Supercalifragilisticexpialidocious', -35, 2)
]], {
-- <func2-1.29.2>
- "S"
+ ""
-- </func2-1.29.2>
})
@@ -464,11 +464,13 @@ test:do_execsql_test(
SELECT SUBSTR('Supercalifragilisticexpialidocious', -36, 3)
]], {
-- <func2-1.30.3>
- "S"
+ ""
-- </func2-1.30.3>
})
-- p1 is 1-indexed, p2 length to return, p2<0 return p2 chars before p1
+local err = [[Failed to execute SQL statement: Length of the result cannot ]]..
+ [[be less than 0]]
test:do_execsql_test(
"func2-1.31.0",
[[
@@ -479,23 +481,23 @@ test:do_execsql_test(
-- </func2-1.31.0>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.31.1",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 0, -1)
]], {
-- <func2-1.31.1>
- ""
+ 1, err
-- </func2-1.31.1>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.31.2",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 0, -2)
]], {
-- <func2-1.31.2>
- ""
+ 1, err
-- </func2-1.31.2>
})
@@ -509,13 +511,13 @@ test:do_execsql_test(
-- </func2-1.32.0>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.32.1",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 1, -1)
]], {
-- <func2-1.32.1>
- ""
+ 1, err
-- </func2-1.32.1>
})
@@ -529,23 +531,23 @@ test:do_execsql_test(
-- </func2-1.33.0>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.33.1",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 2, -1)
]], {
-- <func2-1.33.1>
- "S"
+ 1, err
-- </func2-1.33.1>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.33.2",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 2, -2)
]], {
-- <func2-1.33.2>
- "S"
+ 1, err
-- </func2-1.33.2>
})
@@ -559,63 +561,63 @@ test:do_execsql_test(
-- </func2-1.34.0>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.34.1",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 3, -1)
]], {
-- <func2-1.34.1>
- "u"
+ 1, err
-- </func2-1.34.1>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.34.2",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 3, -2)
]], {
-- <func2-1.34.2>
- "Su"
+ 1, err
-- </func2-1.34.2>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.35.1",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 30, -1)
]], {
-- <func2-1.35.1>
- "o"
+ 1, err
-- </func2-1.35.1>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.35.2",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 30, -2)
]], {
-- <func2-1.35.2>
- "do"
+ 1, err
-- </func2-1.35.2>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.36",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 34, -1)
]], {
-- <func2-1.36>
- "u"
+ 1, err
-- </func2-1.36>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.37",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 35, -1)
]], {
-- <func2-1.37>
- "s"
+ 1, err
-- </func2-1.37>
})
@@ -629,23 +631,23 @@ test:do_execsql_test(
-- </func2-1.38.0>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.38.1",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 36, -1)
]], {
-- <func2-1.38.1>
- ""
+ 1, err
-- </func2-1.38.1>
})
-test:do_execsql_test(
+test:do_catchsql_test(
"func2-1.38.2",
[[
SELECT SUBSTR('Supercalifragilisticexpialidocious', 36, -2)
]], {
-- <func2-1.38.2>
- "s"
+ 1, err
-- </func2-1.38.2>
})
@@ -993,7 +995,7 @@ if ("ሴ" ~= "u1234")
SELECT SUBSTR('ሴ', -1, 1)
]], {
-- <func2-2.8.1>
- "ሴ"
+ ""
-- </func2-2.8.1>
})
@@ -1003,7 +1005,7 @@ if ("ሴ" ~= "u1234")
SELECT SUBSTR('ሴ', -1, 2)
]], {
-- <func2-2.8.2>
- "ሴ"
+ ""
-- </func2-2.8.2>
})
@@ -1130,21 +1132,21 @@ test:do_test(
function()
local blob = test:execsql "SELECT SUBSTR(x'1234', -1, 1)"
return bin_to_hex(test.lindex(blob, 0))
- end, "34")
+ end, "")
test:do_test(
"func2-3.4.2",
function()
local blob = test:execsql "SELECT SUBSTR(x'1234', -1, 2)"
return bin_to_hex(test.lindex(blob, 0))
- end, "34")
+ end, "")
test:do_test(
"func2-3.4.3",
function()
local blob = test:execsql "SELECT SUBSTR(x'1234', -1, 3)"
return bin_to_hex(test.lindex(blob, 0))
- end, "34")
+ end, "12")
test:do_test(
"func2-3.5.0",
@@ -1158,21 +1160,21 @@ test:do_test(
function()
local blob = test:execsql "SELECT SUBSTR(x'1234', -2, 1)"
return bin_to_hex(test.lindex(blob, 0))
- end, "12")
+ end, "")
test:do_test(
"func2-3.5.2",
function()
local blob = test:execsql "SELECT SUBSTR(x'1234', -2, 2)"
return bin_to_hex(test.lindex(blob, 0))
- end, "1234")
+ end, "")
test:do_test(
"func2-3.5.3",
function()
local blob = test:execsql "SELECT SUBSTR(x'1234', -2, 3)"
return bin_to_hex(test.lindex(blob, 0))
- end, "1234")
+ end, "")
test:do_test(
"func2-3.6.0",
@@ -1181,26 +1183,28 @@ test:do_test(
return bin_to_hex(test.lindex(blob, 0))
end, "")
+local err = [[Failed to execute SQL statement: Length of the result cannot ]]..
+ [[be less than 0]]
test:do_test(
"func2-3.6.1",
function()
- local blob = test:execsql "SELECT SUBSTR(x'1234', -1, -1)"
- return bin_to_hex(test.lindex(blob, 0))
- end, "12")
+ return test:catchsql("SELECT SUBSTR(x'1234', -1, -1)")
+ end,
+ {1, err})
test:do_test(
"func2-3.6.2",
function()
- local blob = test:execsql "SELECT SUBSTR(x'1234', -1, -2)"
- return bin_to_hex(test.lindex(blob, 0))
- end, "12")
+ return test:catchsql("SELECT SUBSTR(x'1234', -1, -2)")
+ end,
+ {1, err})
test:do_test(
"func2-3.6.3",
function()
- local blob = test:execsql "SELECT SUBSTR(x'1234', -1, -3)"
- return bin_to_hex(test.lindex(blob, 0))
- end, "12")
+ return test:catchsql("SELECT SUBSTR(x'1234', -1, -3)")
+ end,
+ {1, err})
test:do_test(
"func2-3.7.0",
@@ -1212,16 +1216,16 @@ test:do_test(
test:do_test(
"func2-3.7.1",
function()
- local blob = test:execsql "SELECT SUBSTR(x'1234', -2, -1)"
- return bin_to_hex(test.lindex(blob, 0))
- end, "")
+ return test:catchsql("SELECT SUBSTR(x'1234', -2, -1)")
+ end,
+ {1, err})
test:do_test(
"func2-3.7.2",
function()
- local blob = test:execsql "SELECT SUBSTR(x'1234', -2, -2)"
- return bin_to_hex(test.lindex(blob, 0))
- end, "")
+ return test:catchsql("SELECT SUBSTR(x'1234', -2, -2)")
+ end,
+ {1, err})
test:do_test(
"func2-3.8.0",
@@ -1233,16 +1237,16 @@ test:do_test(
test:do_test(
"func2-3.8.1",
function()
- local blob = test:execsql "SELECT SUBSTR(x'1234', 1, -1)"
- return bin_to_hex(test.lindex(blob, 0))
- end, "")
+ return test:catchsql("SELECT SUBSTR(x'1234', 1, -1)")
+ end,
+ {1, err})
test:do_test(
"func2-3.8.2",
function()
- local blob = test:execsql "SELECT SUBSTR(x'1234', 1, -2)"
- return bin_to_hex(test.lindex(blob, 0))
- end, "")
+ return test:catchsql("SELECT SUBSTR(x'1234', 1, -2)")
+ end,
+ {1, err})
test:do_test(
"func2-3.9.0",
@@ -1254,17 +1258,15 @@ test:do_test(
test:do_test(
"func2-3.9.1",
function()
- local blob = test:execsql "SELECT SUBSTR(x'1234', 2, -1)"
- return bin_to_hex(test.lindex(blob, 0))
- end, "12")
+ return test:catchsql("SELECT SUBSTR(x'1234', 2, -2)")
+ end,
+ {1, err})
test:do_test(
"func2-3.9.2",
function()
- local blob = test:execsql "SELECT SUBSTR(x'1234', 2, -2)"
- return bin_to_hex(test.lindex(blob, 0))
- end, "12")
-
-
+ return test:catchsql("SELECT SUBSTR(x'1234', 2, -2)")
+ end,
+ {1, err})
test:finish_test()
diff --git a/test/sql-tap/substr.test.lua b/test/sql-tap/substr.test.lua
index e7e6d7aca..45aae8506 100755
--- a/test/sql-tap/substr.test.lua
+++ b/test/sql-tap/substr.test.lua
@@ -1,6 +1,6 @@
#!/usr/bin/env tarantool
local test = require("sqltester")
-test:plan(93)
+test:plan(85)
--!./tcltestrunner.lua
-- 2007 May 14
@@ -74,17 +74,11 @@ substr_test("1.2", "abcdefg","2","1","b")
substr_test("1.3", "abcdefg","1","2","ab")
substr_test("1.4", "abcdefg","1","100","abcdefg")
substr_test("1.5", "abcdefg","0","2","a")
-substr_test("1.6", "abcdefg","-1","1","g")
-substr_test("1.7", "abcdefg","-1","10","g")
-substr_test("1.8", "abcdefg","-5","3","cde")
-substr_test("1.9", "abcdefg","-7","3","abc")
-substr_test("1.10", "abcdefg","-100","98","abcde")
-substr_test("1.11", "abcdefg","5","-1","d")
-substr_test("1.12", "abcdefg","5","-4","abcd")
-substr_test("1.13", "abcdefg","5","-5","abcd")
-substr_test("1.14", "abcdefg","-5","-1","b")
-substr_test("1.15", "abcdefg","-5","-2","ab")
-substr_test("1.16", "abcdefg","-5","-3","ab")
+substr_test("1.6", "abcdefg","-1","1","")
+substr_test("1.7", "abcdefg","-1","10","abcdefg")
+substr_test("1.8", "abcdefg","-5","3","")
+substr_test("1.9", "abcdefg","-7","3","")
+substr_test("1.10", "abcdefg","-100","98","")
substr_test("1.17", "abcdefg","100","200","")
substr_test("1.18", "abcdefg","200","100","")
-- Make sure NULL is returned if any parameter is NULL
@@ -144,21 +138,20 @@ test:do_test(
substr_test("2.1", "ሴ⍅㑖","1","1","ሴ")
substr_test("2.2", "ሴ⍅㑖","2","1","⍅")
substr_test("2.3", "ሴ⍅㑖","1","2","ሴ⍅")
-substr_test("2.4", "ሴ⍅㑖","-1","1","㑖")
-substr_test("2.5", "aሴb⍅c㑖c","-5","3","b⍅c")
-substr_test("2.6", "aሴb⍅c㑖c","-2","-3","b⍅c")
+substr_test("2.4", "ሴ⍅㑖","-1","1","")
+substr_test("2.5", "aሴb⍅c㑖c","-5","3","")
-- Basic functionality for BLOBs
--
subblob_test("3.1", "61626364656667","1","1","61")
subblob_test("3.2", "61626364656667","2","1","62")
subblob_test("3.3", "61626364656667","1","2","6162")
subblob_test("3.4", "61626364656667","1","100","61626364656667")
-subblob_test("3.5", "61626364656667","0","2","61")
-subblob_test("3.6", "61626364656667","-1","1","67")
-subblob_test("3.7", "61626364656667","-1","10","67")
-subblob_test("3.8", "61626364656667","-5","3","636465")
-subblob_test("3.9", "61626364656667","-7","3","616263")
-subblob_test("3.10", "61626364656667","-100","98","6162636465")
+subblob_test("3.5", "61626364656667", "0", "2", "61")
+subblob_test("3.6", "61626364656667", "-1", "1", "")
+subblob_test("3.7", "61626364656667", "-1", "10", "61626364656667")
+subblob_test("3.8", "61626364656667", "-5", "3", "")
+subblob_test("3.9", "61626364656667","-7","3", "")
+subblob_test("3.10", "61626364656667", "-100", "98", "")
subblob_test("3.11", "61626364656667","100","200","")
subblob_test("3.12", "61626364656667","200","100","")
-- If these blobs were strings, then they would contain multi-byte
@@ -168,9 +161,9 @@ subblob_test("3.12", "61626364656667","200","100","")
subblob_test("4.1", "61E188B462E28D8563E3919663","1","1","61")
subblob_test("4.2", "61E188B462E28D8563E3919663","2","1","E1")
subblob_test("4.3", "61E188B462E28D8563E3919663","1","2","61E1")
-subblob_test("4.4", "61E188B462E28D8563E3919663","-2","1","96")
-subblob_test("4.5", "61E188B462E28D8563E3919663","-5","4","63E39196")
-subblob_test("4.6", "61E188B462E28D8563E3919663","-100","98","61E188B462E28D8563E391")
+subblob_test("4.4", "61E188B462E28D8563E3919663", "-2", "1", "")
+subblob_test("4.5", "61E188B462E28D8563E3919663", "-5", "4", "")
+subblob_test("4.6", "61E188B462E28D8563E3919663", "-100", "98", "")
-- Two-argument SUBSTR
--
local function substr_2_test(id, string, idx, result)
@@ -193,7 +186,85 @@ local function substr_2_test(id, string, idx, result)
end
substr_2_test("5.1","abcdefghijklmnop","5","efghijklmnop")
-substr_2_test("5.2","abcdef","-5","bcdef")
+substr_2_test("5.2","abcdef","-5","abcdef")
+--
+-- gh-4145: Make sure SUBSTR() throws an error if the third argument is
+-- negative.
+--
+test:do_catchsql_test(
+ "substr-6.1",
+ [[
+ SELECT SUBSTR('12345', 1, -1);
+ ]],
+ {
+ 1, [[Failed to execute SQL statement: ]]..
+ [[Length of the result cannot be less than 0]]
+ }
+)
+
+test:do_catchsql_test(
+ "substr-6.2",
+ [[
+ SELECT SUBSTR(x'3132333435', 1, -1);
+ ]],
+ {
+ 1, [[Failed to execute SQL statement: ]]..
+ [[Length of the result cannot be less than 0]]
+ }
+)
+
+-- gh-4145: Make sure that SUBSTR() works according to ANSI.
+
+--
+-- Make sure SUBSTR() returns "" if the sum of the second and third arguments is
+-- 1 or less.
+--
+test:do_execsql_test(
+ "builtins-6.3",
+ [[
+ SELECT SUBSTR('asdfg', -10, 5), SUBSTR('asdfg', -4, 5);
+ ]],
+ {
+ '', ''
+ }
+)
+
+--
+-- Make sure that if the sum of the second and third arguments is more than 1
+-- and the second argument is negative, the result starts from the start of the
+-- string and length of the result will be one less than sum of the second and
+-- third arguments.
+--
+test:do_execsql_test(
+ "builtins-6.4",
+ [[
+ SELECT SUBSTR('123456789', -5, 10);
+ ]],
+ {
+ '1234'
+ }
+)
+
+-- Make sure SUBSTR() can work with big INTEGERs.
+test:do_execsql_test(
+ "builtins-6.5",
+ [[
+ SELECT SUBSTR('123456789', -9223372036854775808, 9223372036854775812);
+ ]],
+ {
+ '123'
+ }
+)
+
+test:do_execsql_test(
+ "builtins-6.6",
+ [[
+ SELECT SUBSTR('123456789', 0, 18000000000000000000);
+ ]],
+ {
+ '123456789'
+ }
+)
test:finish_test()
--
2.25.1
^ permalink raw reply [flat|nested] 16+ messages in thread