Tarantool development patches archive
 help / color / mirror / Atom feed
From: Mergen Imeev via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>,
	tarantool-patches@dev.tarantool.org
Subject: Re: [Tarantool-patches] [PATCH v1 8/8] sql: refactor LIKE() function
Date: Mon, 1 Nov 2021 13:48:51 +0300	[thread overview]
Message-ID: <20211101104851.GF227579@tarantool.org> (raw)
In-Reply-To: <20211020171938.GG203963@tarantool.org>

Thank you for the review! I replaced ucnv_getNextUChar() by U8_NEXT(). Diff and
new patch below.

On Wed, Oct 20, 2021 at 08:19:38PM +0300, Mergen Imeev via Tarantool-patches wrote:
<cut>

Diff:

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index a76b023eb..7e71a757b 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -623,8 +623,12 @@ func_substr_octets(struct sql_context *ctx, int argc, struct Mem *argv)
 	if (argc == 2) {
 		uint64_t start = mem_is_uint(&argv[1]) && argv[1].u.u > 1 ?
 				 argv[1].u.u - 1 : 0;
-		if (start >= size)
-			return return_empty_str(ctx, is_str);
+		if (start >= size) {
+			if (is_str)
+				return mem_set_str0_static(ctx->pOut, "");
+			else
+				return mem_set_bin_static(ctx->pOut, "", 0);
+		}
 		char *s = &argv[0].z[start];
 		uint64_t n = size - start;
 		ctx->is_aborted = is_str ? mem_copy_str(ctx->pOut, s, n) != 0 :
@@ -645,12 +649,16 @@ func_substr_octets(struct sql_context *ctx, int argc, struct Mem *argv)
 	uint64_t start;
 	uint64_t length;
 	if (substr_normalize(argv[1].u.i, !mem_is_uint(&argv[1]), argv[2].u.u,
-	    &start, &length) != 0) {
+			     &start, &length) != 0) {
 		ctx->is_aborted = true;
 		return;
 	}
-	if (start >= size || length == 0)
-		return return_empty_str(ctx, is_str);
+	if (start >= size || length == 0) {
+		if (is_str)
+			return mem_set_str0_static(ctx->pOut, "");
+		else
+			return mem_set_bin_static(ctx->pOut, "", 0);
+	}
 	char *str = &argv[0].z[start];
 	uint64_t len = MIN(size - start, length);
 	ctx->is_aborted = is_str ? mem_copy_str(ctx->pOut, str, len) != 0 :
@@ -666,17 +674,19 @@ func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv)
 		return;
 	assert(mem_is_str(&argv[0]) && mem_is_int(&argv[1]));
 
-	UErrorCode err = U_ZERO_ERROR;
-	const char *pos = argv[0].z;
-	const char *end = argv[0].z + argv[0].n;
+	const char *str = argv[0].z;
+	int pos = 0;
+	int end = argv[0].n;
 	if (argc == 2) {
 		uint64_t start = mem_is_uint(&argv[1]) && argv[1].u.u > 1 ?
 				 argv[1].u.u - 1 : 0;
-		for (uint64_t i = 0; i < start && pos < end; ++i)
-			ucnv_getNextUChar(icu_utf8_conv, &pos, end, &err);
+		for (uint64_t i = 0; i < start && pos < end; ++i) {
+			UChar32 c;
+			U8_NEXT((uint8_t *)str, pos, end, c);
+		}
 		if (pos == end)
 			return mem_set_str_static(ctx->pOut, "", 0);
-		if (mem_copy_str(ctx->pOut, pos, end - pos) != 0)
+		if (mem_copy_str(ctx->pOut, str + pos, end - pos) != 0)
 			ctx->is_aborted = true;
 		return;
 	}
@@ -694,22 +704,27 @@ func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv)
 	uint64_t start;
 	uint64_t length;
 	if (substr_normalize(argv[1].u.i, !mem_is_uint(&argv[1]), argv[2].u.u,
-	    &start, &length) != 0) {
+			     &start, &length) != 0) {
 		ctx->is_aborted = true;
 		return;
 	}
 	if (length == 0)
 		return mem_set_str_static(ctx->pOut, "", 0);
 
-	for (uint64_t i = 0; i < start && err == U_ZERO_ERROR; ++i)
-		ucnv_getNextUChar(icu_utf8_conv, &pos, end, &err);
-	const char *cur = pos;
-	for (uint64_t i = 0; i < length && cur < end; ++i)
-		ucnv_getNextUChar(icu_utf8_conv, &cur, end, &err);
-	if (err != U_ZERO_ERROR || cur == pos)
+	for (uint64_t i = 0; i < start && pos < end; ++i) {
+		UChar32 c;
+		U8_NEXT((uint8_t *)str, pos, end, c);
+	}
+	if (pos == end)
 		return mem_set_str_static(ctx->pOut, "", 0);
 
-	if (mem_copy_str(ctx->pOut, pos, cur - pos) != 0)
+	int cur = pos;
+	for (uint64_t i = 0; i < length && cur < end; ++i) {
+		UChar32 c;
+		U8_NEXT((uint8_t *)str, cur, end, c);
+	}
+	assert(cur > pos);
+	if (mem_copy_str(ctx->pOut, str + pos, cur - pos) != 0)
 		ctx->is_aborted = true;
 }

New patch:

 
commit 56a7a57544cebcbf2992fe4543a8b4a30cdc3f67
Author: Mergen Imeev <imeevma@gmail.com>
Date:   Mon Sep 27 10:15:14 2021 +0300

    sql: rework SUBSTR() function
    
    This patch makes SUBSTR() work according to ANSI rules for SUBSTRING()
    function. Also, SUBSTR() can now work correctly with large INTEGER
    values. The SUBSTR() syntax has not changed.
    
    Part of #4145
    
    @TarantoolBot document
    Title: SUBSTR() function
    
    SUBSTR() now works according to the ANSI rules for SUBSTRING().
    
    Rules for SUBSTR() with 2 arguments:
    1) let the first argument be VALUE, and the second argument be START;
    2) VALUE should be STRING or VARBINARY, START should be INTEGER;
    3) if any of arguments is NULL, NULL is returned;
    4) let POS be MAX(START - 1, 0), END be length of the VALUE;
    5) if POS >= END, the result is empty string;
    6) if POS < END, the result will be substring of VALUE, starting from
       the position POS to the position END.
    
    Rules for SUBSTR() with 3 arguments:
    1) let the first argument be VALUE, the second argument be START, and
       the third argument be LENGTH;
    2) VALUE should be STRING or VARBINARY, START and LENGTH should be
       INTEGERs;
    3) if any of arguments is NULL, NULL is returned;
    4) if LENGTH < 0, an error is thrown;
    5) let POS be MAX(START - 1, 0), END be START + LENGTH - 1;
    6) if POS >= END, the result is empty string;
    7) if POS < END, the result will be substring of VALUE, starting from
       the position POS to the position END.

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 7914d2ec7..7e71a757b 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -573,6 +573,161 @@ func_position_characters(struct sql_context *ctx, int argc, struct Mem *argv)
 	return mem_set_uint(ctx->pOut, 0);
 }
 
+/** Implementation of the SUBSTR() function. */
+int
+substr_normalize(int64_t base_start, bool is_start_neg, uint64_t base_length,
+		 uint64_t *start, uint64_t *length)
+{
+	if (!is_start_neg && base_start > 0) {
+		*start = (uint64_t)base_start - 1;
+		*length = base_length;
+		return 0;
+	}
+	*start = 0;
+	if (base_length == 0) {
+		*length = 0;
+		return 0;
+	}
+	/*
+	 * We are subtracting 1 from base_length instead of subtracting from
+	 * base_start, since base_start can be INT64_MIN. At the same time,
+	 * base_length is not less than 1.
+	 */
+	int64_t a = base_start;
+	int64_t b = (int64_t)(base_length - 1);
+	int64_t res;
+	bool is_neg;
+	/*
+	 * Integer cannot overflow since non-positive value is added to positive
+	 * value.
+	 */
+	if (sql_add_int(a, a != 0, b, false, &res, &is_neg) != 0) {
+		diag_set(ClientError, ER_SQL_EXECUTE, "integer is overflowed");
+		return -1;
+	}
+	*length = is_neg ? 0 : (uint64_t)res;
+	return 0;
+}
+
+static void
+func_substr_octets(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+	assert(argc == 2 || argc == 3);
+	if (mem_is_any_null(&argv[0], &argv[1]))
+		return;
+	assert(mem_is_bytes(&argv[0]) && mem_is_int(&argv[1]));
+
+	bool is_str = mem_is_str(&argv[0]);
+	uint64_t size = argv[0].n;
+
+	if (argc == 2) {
+		uint64_t start = mem_is_uint(&argv[1]) && argv[1].u.u > 1 ?
+				 argv[1].u.u - 1 : 0;
+		if (start >= size) {
+			if (is_str)
+				return mem_set_str0_static(ctx->pOut, "");
+			else
+				return mem_set_bin_static(ctx->pOut, "", 0);
+		}
+		char *s = &argv[0].z[start];
+		uint64_t n = size - start;
+		ctx->is_aborted = is_str ? mem_copy_str(ctx->pOut, s, n) != 0 :
+				  mem_copy_bin(ctx->pOut, s, n) != 0;
+		return;
+	}
+
+	assert(argc == 3);
+	if (mem_is_null(&argv[2]))
+		return;
+	assert(mem_is_int(&argv[2]));
+	if (!mem_is_uint(&argv[2])) {
+		diag_set(ClientError, ER_SQL_EXECUTE, "Length of the result "
+			 "cannot be less than 0");
+		ctx->is_aborted = true;
+		return;
+	}
+	uint64_t start;
+	uint64_t length;
+	if (substr_normalize(argv[1].u.i, !mem_is_uint(&argv[1]), argv[2].u.u,
+			     &start, &length) != 0) {
+		ctx->is_aborted = true;
+		return;
+	}
+	if (start >= size || length == 0) {
+		if (is_str)
+			return mem_set_str0_static(ctx->pOut, "");
+		else
+			return mem_set_bin_static(ctx->pOut, "", 0);
+	}
+	char *str = &argv[0].z[start];
+	uint64_t len = MIN(size - start, length);
+	ctx->is_aborted = is_str ? mem_copy_str(ctx->pOut, str, len) != 0 :
+			  mem_copy_bin(ctx->pOut, str, len) != 0;
+}
+
+static void
+func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+	assert(argc == 2 || argc == 3);
+	(void)argc;
+	if (mem_is_any_null(&argv[0], &argv[1]))
+		return;
+	assert(mem_is_str(&argv[0]) && mem_is_int(&argv[1]));
+
+	const char *str = argv[0].z;
+	int pos = 0;
+	int end = argv[0].n;
+	if (argc == 2) {
+		uint64_t start = mem_is_uint(&argv[1]) && argv[1].u.u > 1 ?
+				 argv[1].u.u - 1 : 0;
+		for (uint64_t i = 0; i < start && pos < end; ++i) {
+			UChar32 c;
+			U8_NEXT((uint8_t *)str, pos, end, c);
+		}
+		if (pos == end)
+			return mem_set_str_static(ctx->pOut, "", 0);
+		if (mem_copy_str(ctx->pOut, str + pos, end - pos) != 0)
+			ctx->is_aborted = true;
+		return;
+	}
+
+	assert(argc == 3);
+	if (mem_is_null(&argv[2]))
+		return;
+	assert(mem_is_int(&argv[2]));
+	if (!mem_is_uint(&argv[2])) {
+		diag_set(ClientError, ER_SQL_EXECUTE, "Length of the result "
+			 "cannot be less than 0");
+		ctx->is_aborted = true;
+		return;
+	}
+	uint64_t start;
+	uint64_t length;
+	if (substr_normalize(argv[1].u.i, !mem_is_uint(&argv[1]), argv[2].u.u,
+			     &start, &length) != 0) {
+		ctx->is_aborted = true;
+		return;
+	}
+	if (length == 0)
+		return mem_set_str_static(ctx->pOut, "", 0);
+
+	for (uint64_t i = 0; i < start && pos < end; ++i) {
+		UChar32 c;
+		U8_NEXT((uint8_t *)str, pos, end, c);
+	}
+	if (pos == end)
+		return mem_set_str_static(ctx->pOut, "", 0);
+
+	int cur = pos;
+	for (uint64_t i = 0; i < length && cur < end; ++i) {
+		UChar32 c;
+		U8_NEXT((uint8_t *)str, cur, end, c);
+	}
+	assert(cur > pos);
+	if (mem_copy_str(ctx->pOut, str + pos, cur - pos) != 0)
+		ctx->is_aborted = true;
+}
+
 static const unsigned char *
 mem_as_ustr(struct Mem *mem)
 {
@@ -770,116 +925,6 @@ printfFunc(struct sql_context *context, int argc, struct Mem *argv)
 	}
 }
 
-/*
- * Implementation of the substr() function.
- *
- * substr(x,p1,p2)  returns p2 characters of x[] beginning with p1.
- * p1 is 1-indexed.  So substr(x,1,1) returns the first character
- * of x.  If x is text, then we actually count UTF-8 characters.
- * If x is a blob, then we count bytes.
- *
- * If p1 is negative, then we begin abs(p1) from the end of x[].
- *
- * If p2 is negative, return the p2 characters preceding p1.
- */
-static void
-substrFunc(struct sql_context *context, int argc, struct Mem *argv)
-{
-	const unsigned char *z;
-	const unsigned char *z2;
-	int len;
-	int p0type;
-	int64_t p1, p2;
-	int negP2 = 0;
-
-	if (argc != 2 && argc != 3) {
-		diag_set(ClientError, ER_FUNC_WRONG_ARG_COUNT, "SUBSTR",
-			 "1 or 2", argc);
-		context->is_aborted = true;
-		return;
-	}
-	if (mem_is_null(&argv[1]) || (argc == 3 && mem_is_null(&argv[2])))
-		return;
-	p0type = sql_value_type(&argv[0]);
-	p1 = mem_get_int_unsafe(&argv[1]);
-	if (p0type == MP_BIN) {
-		z = mem_as_bin(&argv[0]);
-		len = mem_len_unsafe(&argv[0]);
-		if (z == 0)
-			return;
-		assert(len == mem_len_unsafe(&argv[0]));
-	} else {
-		z = mem_as_ustr(&argv[0]);
-		if (z == 0)
-			return;
-		len = 0;
-		if (p1 < 0)
-			len = sql_utf8_char_count(z, mem_len_unsafe(&argv[0]));
-	}
-	if (argc == 3) {
-		p2 = mem_get_int_unsafe(&argv[2]);
-		if (p2 < 0) {
-			p2 = -p2;
-			negP2 = 1;
-		}
-	} else {
-		p2 = sql_context_db_handle(context)->
-		    aLimit[SQL_LIMIT_LENGTH];
-	}
-	if (p1 < 0) {
-		p1 += len;
-		if (p1 < 0) {
-			p2 += p1;
-			if (p2 < 0)
-				p2 = 0;
-			p1 = 0;
-		}
-	} else if (p1 > 0) {
-		p1--;
-	} else if (p2 > 0) {
-		p2--;
-	}
-	if (negP2) {
-		p1 -= p2;
-		if (p1 < 0) {
-			p2 += p1;
-			p1 = 0;
-		}
-	}
-	assert(p1 >= 0 && p2 >= 0);
-	if (p0type != MP_BIN) {
-		/*
-		 * In the code below 'cnt' and 'n_chars' is
-		 * used because '\0' is not supposed to be
-		 * end-of-string symbol.
-		 */
-		int byte_size = mem_len_unsafe(&argv[0]);
-		int n_chars = sql_utf8_char_count(z, byte_size);
-		int cnt = 0;
-		int i = 0;
-		while (cnt < n_chars && p1) {
-			SQL_UTF8_FWD_1(z, i, byte_size);
-			cnt++;
-			p1--;
-		}
-		z += i;
-		i = 0;
-		for (z2 = z; cnt < n_chars && p2; p2--) {
-			SQL_UTF8_FWD_1(z2, i, byte_size);
-			cnt++;
-		}
-		z2 += i;
-		mem_copy_str(context->pOut, (char *)z, z2 - z);
-	} else {
-		if (p1 + p2 > len) {
-			p2 = len - p1;
-			if (p2 < 0)
-				p2 = 0;
-		}
-		mem_copy_bin(context->pOut, (char *)&z[p1], p2);
-	}
-}
-
 /*
  * Implementation of the round() function
  */
@@ -1933,15 +1978,15 @@ static struct sql_func_definition definitions[] = {
 	{"SOUNDEX", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_STRING, soundexFunc,
 	 NULL},
 	{"SUBSTR", 2, {FIELD_TYPE_STRING, FIELD_TYPE_INTEGER},
-	 FIELD_TYPE_STRING, substrFunc, NULL},
+	 FIELD_TYPE_STRING, func_substr_characters, NULL},
 	{"SUBSTR", 3,
 	 {FIELD_TYPE_STRING, FIELD_TYPE_INTEGER, FIELD_TYPE_INTEGER},
-	 FIELD_TYPE_STRING, substrFunc, NULL},
+	 FIELD_TYPE_STRING, func_substr_characters, NULL},
 	{"SUBSTR", 2, {FIELD_TYPE_VARBINARY, FIELD_TYPE_INTEGER},
-	 FIELD_TYPE_VARBINARY, substrFunc, NULL},
+	 FIELD_TYPE_VARBINARY, func_substr_octets, NULL},
 	{"SUBSTR", 3,
 	 {FIELD_TYPE_VARBINARY, FIELD_TYPE_INTEGER, FIELD_TYPE_INTEGER},
-	 FIELD_TYPE_VARBINARY, substrFunc, NULL},
+	 FIELD_TYPE_VARBINARY, func_substr_octets, NULL},
 	{"SUM", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_INTEGER, step_sum, NULL},
 	{"SUM", 1, {FIELD_TYPE_DOUBLE}, FIELD_TYPE_DOUBLE, step_sum, NULL},
 	{"TOTAL", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_DOUBLE, step_total,
diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
index 416f27d69..dc4dfdc0e 100755
--- a/test/sql-tap/func.test.lua
+++ b/test/sql-tap/func.test.lua
@@ -139,7 +139,7 @@ test:do_execsql_test(
         SELECT substr(t1,-1,1) FROM tbl1 ORDER BY t1
     ]], {
         -- <func-2.3>
-        "e", "s", "m", "e", "s"
+        "", "", "", "", ""
         -- </func-2.3>
     })
 
@@ -149,7 +149,7 @@ test:do_execsql_test(
         SELECT substr(t1,-1,2) FROM tbl1 ORDER BY t1
     ]], {
         -- <func-2.4>
-        "e", "s", "m", "e", "s"
+        "", "", "", "", ""
         -- </func-2.4>
     })
 
@@ -159,7 +159,7 @@ test:do_execsql_test(
         SELECT substr(t1,-2,1) FROM tbl1 ORDER BY t1
     ]], {
         -- <func-2.5>
-        "e", "i", "a", "r", "i"
+        "", "", "", "", ""
         -- </func-2.5>
     })
 
@@ -169,7 +169,7 @@ test:do_execsql_test(
         SELECT substr(t1,-2,2) FROM tbl1 ORDER BY t1
     ]], {
         -- <func-2.6>
-        "ee", "is", "am", "re", "is"
+        "", "", "", "", ""
         -- </func-2.6>
     })
 
@@ -179,7 +179,7 @@ test:do_execsql_test(
         SELECT substr(t1,-4,2) FROM tbl1 ORDER BY t1
     ]], {
         -- <func-2.7>
-        "fr", "", "gr", "wa", "th"
+        "", "", "", "", ""
         -- </func-2.7>
     })
 
@@ -288,7 +288,7 @@ if ("ሴ" ~= "u1234") then
             SELECT substr(t1,-1,1) FROM tbl1 ORDER BY t1
         ]], {
             -- <func-3.8>
-            "8", "s", "s", "o"
+            "", "", "", ""
             -- </func-3.8>
         })
 
@@ -298,7 +298,7 @@ if ("ሴ" ~= "u1234") then
             SELECT substr(t1,-3,2) FROM tbl1 ORDER BY t1
         ]], {
             -- <func-3.9>
-            "F-", "er", "in", "ሴh"
+            "", "", "", ""
             -- </func-3.9>
         })
 
@@ -308,7 +308,7 @@ if ("ሴ" ~= "u1234") then
             SELECT substr(t1,-4,3) FROM tbl1 ORDER BY t1
         ]], {
             -- <func-3.10>
-            "TF-", "ter", "ain", "iሴh"
+            "", "", "", ""
             -- </func-3.10>
         })
 
diff --git a/test/sql-tap/func2.test.lua b/test/sql-tap/func2.test.lua
index 792f020f1..b786b4d96 100755
--- a/test/sql-tap/func2.test.lua
+++ b/test/sql-tap/func2.test.lua
@@ -162,7 +162,7 @@ test:do_execsql_test(
         SELECT SUBSTR('Supercalifragilisticexpialidocious', -1)
     ]], {
         -- <func2-1.11>
-        "s"
+        "Supercalifragilisticexpialidocious"
         -- </func2-1.11>
     })
 
@@ -172,7 +172,7 @@ test:do_execsql_test(
         SELECT SUBSTR('Supercalifragilisticexpialidocious', -2)
     ]], {
         -- <func2-1.12>
-        "us"
+        "Supercalifragilisticexpialidocious"
         -- </func2-1.12>
     })
 
@@ -182,7 +182,7 @@ test:do_execsql_test(
         SELECT SUBSTR('Supercalifragilisticexpialidocious', -30)
     ]], {
         -- <func2-1.13>
-        "rcalifragilisticexpialidocious"
+        "Supercalifragilisticexpialidocious"
         -- </func2-1.13>
     })
 
@@ -344,7 +344,7 @@ test:do_execsql_test(
         SELECT SUBSTR('Supercalifragilisticexpialidocious', -1, 1)
     ]], {
         -- <func2-1.25.1>
-        "s"
+        ""
         -- </func2-1.25.1>
     })
 
@@ -354,7 +354,7 @@ test:do_execsql_test(
         SELECT SUBSTR('Supercalifragilisticexpialidocious', -1, 2)
     ]], {
         -- <func2-1.25.2>
-        "s"
+        ""
         -- </func2-1.25.2>
     })
 
@@ -364,7 +364,7 @@ test:do_execsql_test(
         SELECT SUBSTR('Supercalifragilisticexpialidocious', -2, 1)
     ]], {
         -- <func2-1.26>
-        "u"
+        ""
         -- </func2-1.26>
     })
 
@@ -374,7 +374,7 @@ test:do_execsql_test(
         SELECT SUBSTR('Supercalifragilisticexpialidocious', -30, 1)
     ]], {
         -- <func2-1.27>
-        "r"
+        ""
         -- </func2-1.27>
     })
 
@@ -394,7 +394,7 @@ test:do_execsql_test(
         SELECT SUBSTR('Supercalifragilisticexpialidocious', -34, 1)
     ]], {
         -- <func2-1.28.1>
-        "S"
+        ""
         -- </func2-1.28.1>
     })
 
@@ -404,7 +404,7 @@ test:do_execsql_test(
         SELECT SUBSTR('Supercalifragilisticexpialidocious', -34, 2)
     ]], {
         -- <func2-1.28.2>
-        "Su"
+        ""
         -- </func2-1.28.2>
     })
 
@@ -424,7 +424,7 @@ test:do_execsql_test(
         SELECT SUBSTR('Supercalifragilisticexpialidocious', -35, 2)
     ]], {
         -- <func2-1.29.2>
-        "S"
+        ""
         -- </func2-1.29.2>
     })
 
@@ -464,11 +464,13 @@ test:do_execsql_test(
         SELECT SUBSTR('Supercalifragilisticexpialidocious', -36, 3)
     ]], {
         -- <func2-1.30.3>
-        "S"
+        ""
         -- </func2-1.30.3>
     })
 
 -- p1 is 1-indexed, p2 length to return, p2<0 return p2 chars before p1
+local err = [[Failed to execute SQL statement: Length of the result cannot ]]..
+            [[be less than 0]]
 test:do_execsql_test(
     "func2-1.31.0",
     [[
@@ -479,23 +481,23 @@ test:do_execsql_test(
         -- </func2-1.31.0>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.31.1",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 0, -1)
     ]], {
         -- <func2-1.31.1>
-        ""
+        1, err
         -- </func2-1.31.1>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.31.2",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 0, -2)
     ]], {
         -- <func2-1.31.2>
-        ""
+        1, err
         -- </func2-1.31.2>
     })
 
@@ -509,13 +511,13 @@ test:do_execsql_test(
         -- </func2-1.32.0>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.32.1",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 1, -1)
     ]], {
         -- <func2-1.32.1>
-        ""
+        1, err
         -- </func2-1.32.1>
     })
 
@@ -529,23 +531,23 @@ test:do_execsql_test(
         -- </func2-1.33.0>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.33.1",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 2, -1)
     ]], {
         -- <func2-1.33.1>
-        "S"
+        1, err
         -- </func2-1.33.1>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.33.2",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 2, -2)
     ]], {
         -- <func2-1.33.2>
-        "S"
+        1, err
         -- </func2-1.33.2>
     })
 
@@ -559,63 +561,63 @@ test:do_execsql_test(
         -- </func2-1.34.0>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.34.1",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 3, -1)
     ]], {
         -- <func2-1.34.1>
-        "u"
+        1, err
         -- </func2-1.34.1>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.34.2",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 3, -2)
     ]], {
         -- <func2-1.34.2>
-        "Su"
+        1, err
         -- </func2-1.34.2>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.35.1",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 30, -1)
     ]], {
         -- <func2-1.35.1>
-        "o"
+        1, err
         -- </func2-1.35.1>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.35.2",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 30, -2)
     ]], {
         -- <func2-1.35.2>
-        "do"
+        1, err
         -- </func2-1.35.2>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.36",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 34, -1)
     ]], {
         -- <func2-1.36>
-        "u"
+        1, err
         -- </func2-1.36>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.37",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 35, -1)
     ]], {
         -- <func2-1.37>
-        "s"
+        1, err
         -- </func2-1.37>
     })
 
@@ -629,23 +631,23 @@ test:do_execsql_test(
         -- </func2-1.38.0>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.38.1",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 36, -1)
     ]], {
         -- <func2-1.38.1>
-        ""
+        1, err
         -- </func2-1.38.1>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func2-1.38.2",
     [[
         SELECT SUBSTR('Supercalifragilisticexpialidocious', 36, -2)
     ]], {
         -- <func2-1.38.2>
-        "s"
+        1, err
         -- </func2-1.38.2>
     })
 
@@ -993,7 +995,7 @@ if ("ሴ" ~= "u1234")
             SELECT SUBSTR('ሴ', -1, 1)
         ]], {
             -- <func2-2.8.1>
-            "ሴ"
+            ""
             -- </func2-2.8.1>
         })
 
@@ -1003,7 +1005,7 @@ if ("ሴ" ~= "u1234")
             SELECT SUBSTR('ሴ', -1, 2)
         ]], {
             -- <func2-2.8.2>
-            "ሴ"
+            ""
             -- </func2-2.8.2>
         })
 
@@ -1130,21 +1132,21 @@ test:do_test(
     function()
         local blob = test:execsql "SELECT SUBSTR(x'1234', -1, 1)"
         return bin_to_hex(test.lindex(blob, 0))
-    end, "34")
+    end, "")
 
 test:do_test(
     "func2-3.4.2",
     function()
         local blob = test:execsql "SELECT SUBSTR(x'1234', -1, 2)"
         return bin_to_hex(test.lindex(blob, 0))
-    end, "34")
+    end, "")
 
 test:do_test(
     "func2-3.4.3",
     function()
         local blob = test:execsql "SELECT SUBSTR(x'1234', -1, 3)"
         return bin_to_hex(test.lindex(blob, 0))
-    end, "34")
+    end, "12")
 
 test:do_test(
     "func2-3.5.0",
@@ -1158,21 +1160,21 @@ test:do_test(
     function()
         local blob = test:execsql "SELECT SUBSTR(x'1234', -2, 1)"
         return bin_to_hex(test.lindex(blob, 0))
-    end, "12")
+    end, "")
 
 test:do_test(
     "func2-3.5.2",
     function()
         local blob = test:execsql "SELECT SUBSTR(x'1234', -2, 2)"
         return bin_to_hex(test.lindex(blob, 0))
-    end, "1234")
+    end, "")
 
 test:do_test(
     "func2-3.5.3",
     function()
         local blob = test:execsql "SELECT SUBSTR(x'1234', -2, 3)"
         return bin_to_hex(test.lindex(blob, 0))
-    end, "1234")
+    end, "")
 
 test:do_test(
     "func2-3.6.0",
@@ -1181,26 +1183,28 @@ test:do_test(
         return bin_to_hex(test.lindex(blob, 0))
     end, "")
 
+local err = [[Failed to execute SQL statement: Length of the result cannot ]]..
+            [[be less than 0]]
 test:do_test(
     "func2-3.6.1",
     function()
-        local blob = test:execsql "SELECT SUBSTR(x'1234', -1, -1)"
-        return bin_to_hex(test.lindex(blob, 0))
-    end, "12")
+        return test:catchsql("SELECT SUBSTR(x'1234', -1, -1)")
+    end,
+        {1, err})
 
 test:do_test(
     "func2-3.6.2",
     function()
-        local blob = test:execsql "SELECT SUBSTR(x'1234', -1, -2)"
-        return bin_to_hex(test.lindex(blob, 0))
-    end, "12")
+        return test:catchsql("SELECT SUBSTR(x'1234', -1, -2)")
+    end,
+        {1, err})
 
 test:do_test(
     "func2-3.6.3",
     function()
-        local blob = test:execsql "SELECT SUBSTR(x'1234', -1, -3)"
-        return bin_to_hex(test.lindex(blob, 0))
-    end, "12")
+        return test:catchsql("SELECT SUBSTR(x'1234', -1, -3)")
+    end,
+        {1, err})
 
 test:do_test(
     "func2-3.7.0",
@@ -1212,16 +1216,16 @@ test:do_test(
 test:do_test(
     "func2-3.7.1",
     function()
-        local blob = test:execsql "SELECT SUBSTR(x'1234', -2, -1)"
-        return bin_to_hex(test.lindex(blob, 0))
-    end, "")
+        return test:catchsql("SELECT SUBSTR(x'1234', -2, -1)")
+    end,
+        {1, err})
 
 test:do_test(
     "func2-3.7.2",
     function()
-        local blob = test:execsql "SELECT SUBSTR(x'1234', -2, -2)"
-        return bin_to_hex(test.lindex(blob, 0))
-    end, "")
+        return test:catchsql("SELECT SUBSTR(x'1234', -2, -2)")
+    end,
+        {1, err})
 
 test:do_test(
     "func2-3.8.0",
@@ -1233,16 +1237,16 @@ test:do_test(
 test:do_test(
     "func2-3.8.1",
     function()
-        local blob = test:execsql "SELECT SUBSTR(x'1234', 1, -1)"
-        return bin_to_hex(test.lindex(blob, 0))
-    end, "")
+        return test:catchsql("SELECT SUBSTR(x'1234', 1, -1)")
+    end,
+        {1, err})
 
 test:do_test(
     "func2-3.8.2",
     function()
-        local blob = test:execsql "SELECT SUBSTR(x'1234', 1, -2)"
-        return bin_to_hex(test.lindex(blob, 0))
-    end, "")
+        return test:catchsql("SELECT SUBSTR(x'1234', 1, -2)")
+    end,
+        {1, err})
 
 test:do_test(
     "func2-3.9.0",
@@ -1254,17 +1258,15 @@ test:do_test(
 test:do_test(
     "func2-3.9.1",
     function()
-        local blob = test:execsql "SELECT SUBSTR(x'1234', 2, -1)"
-        return bin_to_hex(test.lindex(blob, 0))
-    end, "12")
+        return test:catchsql("SELECT SUBSTR(x'1234', 2, -2)")
+    end,
+        {1, err})
 
 test:do_test(
     "func2-3.9.2",
     function()
-        local blob = test:execsql "SELECT SUBSTR(x'1234', 2, -2)"
-        return bin_to_hex(test.lindex(blob, 0))
-    end, "12")
-
-
+        return test:catchsql("SELECT SUBSTR(x'1234', 2, -2)")
+    end,
+        {1, err})
 
 test:finish_test()
diff --git a/test/sql-tap/substr.test.lua b/test/sql-tap/substr.test.lua
index e7e6d7aca..45aae8506 100755
--- a/test/sql-tap/substr.test.lua
+++ b/test/sql-tap/substr.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 local test = require("sqltester")
-test:plan(93)
+test:plan(85)
 
 --!./tcltestrunner.lua
 -- 2007 May 14
@@ -74,17 +74,11 @@ substr_test("1.2", "abcdefg","2","1","b")
 substr_test("1.3", "abcdefg","1","2","ab")
 substr_test("1.4", "abcdefg","1","100","abcdefg")
 substr_test("1.5", "abcdefg","0","2","a")
-substr_test("1.6", "abcdefg","-1","1","g")
-substr_test("1.7", "abcdefg","-1","10","g")
-substr_test("1.8", "abcdefg","-5","3","cde")
-substr_test("1.9", "abcdefg","-7","3","abc")
-substr_test("1.10", "abcdefg","-100","98","abcde")
-substr_test("1.11", "abcdefg","5","-1","d")
-substr_test("1.12", "abcdefg","5","-4","abcd")
-substr_test("1.13", "abcdefg","5","-5","abcd")
-substr_test("1.14", "abcdefg","-5","-1","b")
-substr_test("1.15", "abcdefg","-5","-2","ab")
-substr_test("1.16", "abcdefg","-5","-3","ab")
+substr_test("1.6", "abcdefg","-1","1","")
+substr_test("1.7", "abcdefg","-1","10","abcdefg")
+substr_test("1.8", "abcdefg","-5","3","")
+substr_test("1.9", "abcdefg","-7","3","")
+substr_test("1.10", "abcdefg","-100","98","")
 substr_test("1.17", "abcdefg","100","200","")
 substr_test("1.18", "abcdefg","200","100","")
 -- Make sure NULL is returned if any parameter is NULL
@@ -144,21 +138,20 @@ test:do_test(
 substr_test("2.1", "ሴ⍅㑖","1","1","ሴ")
 substr_test("2.2", "ሴ⍅㑖","2","1","⍅")
 substr_test("2.3", "ሴ⍅㑖","1","2","ሴ⍅")
-substr_test("2.4", "ሴ⍅㑖","-1","1","㑖")
-substr_test("2.5", "aሴb⍅c㑖c","-5","3","b⍅c")
-substr_test("2.6", "aሴb⍅c㑖c","-2","-3","b⍅c")
+substr_test("2.4", "ሴ⍅㑖","-1","1","")
+substr_test("2.5", "aሴb⍅c㑖c","-5","3","")
 -- Basic functionality for BLOBs
 --
 subblob_test("3.1", "61626364656667","1","1","61")
 subblob_test("3.2", "61626364656667","2","1","62")
 subblob_test("3.3", "61626364656667","1","2","6162")
 subblob_test("3.4", "61626364656667","1","100","61626364656667")
-subblob_test("3.5", "61626364656667","0","2","61")
-subblob_test("3.6", "61626364656667","-1","1","67")
-subblob_test("3.7", "61626364656667","-1","10","67")
-subblob_test("3.8", "61626364656667","-5","3","636465")
-subblob_test("3.9", "61626364656667","-7","3","616263")
-subblob_test("3.10", "61626364656667","-100","98","6162636465")
+subblob_test("3.5", "61626364656667", "0", "2", "61")
+subblob_test("3.6", "61626364656667", "-1", "1", "")
+subblob_test("3.7", "61626364656667", "-1", "10", "61626364656667")
+subblob_test("3.8", "61626364656667", "-5", "3", "")
+subblob_test("3.9", "61626364656667","-7","3", "")
+subblob_test("3.10", "61626364656667", "-100", "98", "")
 subblob_test("3.11", "61626364656667","100","200","")
 subblob_test("3.12", "61626364656667","200","100","")
 -- If these blobs were strings, then they would contain multi-byte
@@ -168,9 +161,9 @@ subblob_test("3.12", "61626364656667","200","100","")
 subblob_test("4.1", "61E188B462E28D8563E3919663","1","1","61")
 subblob_test("4.2", "61E188B462E28D8563E3919663","2","1","E1")
 subblob_test("4.3", "61E188B462E28D8563E3919663","1","2","61E1")
-subblob_test("4.4", "61E188B462E28D8563E3919663","-2","1","96")
-subblob_test("4.5", "61E188B462E28D8563E3919663","-5","4","63E39196")
-subblob_test("4.6", "61E188B462E28D8563E3919663","-100","98","61E188B462E28D8563E391")
+subblob_test("4.4", "61E188B462E28D8563E3919663", "-2", "1", "")
+subblob_test("4.5", "61E188B462E28D8563E3919663", "-5", "4", "")
+subblob_test("4.6", "61E188B462E28D8563E3919663", "-100", "98", "")
 -- Two-argument SUBSTR
 --
 local function substr_2_test(id, string, idx, result)
@@ -193,7 +186,85 @@ local function substr_2_test(id, string, idx, result)
 end
 
 substr_2_test("5.1","abcdefghijklmnop","5","efghijklmnop")
-substr_2_test("5.2","abcdef","-5","bcdef")
+substr_2_test("5.2","abcdef","-5","abcdef")
 
+--
+-- gh-4145: Make sure SUBSTR() throws an error if the third argument is
+-- negative.
+--
+test:do_catchsql_test(
+    "substr-6.1",
+    [[
+        SELECT SUBSTR('12345', 1, -1);
+    ]],
+    {
+        1, [[Failed to execute SQL statement: ]]..
+           [[Length of the result cannot be less than 0]]
+    }
+)
+
+test:do_catchsql_test(
+    "substr-6.2",
+    [[
+        SELECT SUBSTR(x'3132333435', 1, -1);
+    ]],
+    {
+        1, [[Failed to execute SQL statement: ]]..
+           [[Length of the result cannot be less than 0]]
+    }
+)
+
+-- gh-4145: Make sure that SUBSTR() works according to ANSI.
+
+--
+-- Make sure SUBSTR() returns "" if the sum of the second and third arguments is
+-- 1 or less.
+--
+test:do_execsql_test(
+    "builtins-6.3",
+    [[
+        SELECT SUBSTR('asdfg', -10, 5), SUBSTR('asdfg', -4, 5);
+    ]],
+    {
+        '', ''
+    }
+)
+
+--
+-- Make sure that if the sum of the second and third arguments is more than 1
+-- and the second argument is negative, the result starts from the start of the
+-- string and length of the result will be one less than sum of the  second and
+-- third arguments.
+--
+test:do_execsql_test(
+    "builtins-6.4",
+    [[
+        SELECT SUBSTR('123456789', -5, 10);
+    ]],
+    {
+        '1234'
+    }
+)
+
+-- Make sure SUBSTR() can work with big INTEGERs.
+test:do_execsql_test(
+    "builtins-6.5",
+    [[
+        SELECT SUBSTR('123456789', -9223372036854775808, 9223372036854775812);
+    ]],
+    {
+        '123'
+    }
+)
+
+test:do_execsql_test(
+    "builtins-6.6",
+    [[
+        SELECT SUBSTR('123456789', 0, 18000000000000000000);
+    ]],
+    {
+        '123456789'
+    }
+)
 
 test:finish_test()

  reply	other threads:[~2021-11-01 10:48 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-01 16:29 [Tarantool-patches] [PATCH v1 0/8] Rework standard function Mergen Imeev via Tarantool-patches
2021-10-01 16:29 ` [Tarantool-patches] [PATCH v1 1/8] sql: refactor ABS() funcion Mergen Imeev via Tarantool-patches
2021-10-08 21:55   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-20 16:52     ` Mergen Imeev via Tarantool-patches
2021-10-28 22:11       ` Vladislav Shpilevoy via Tarantool-patches
2021-11-01 10:11         ` Mergen Imeev via Tarantool-patches
2021-11-01 13:37           ` Vladislav Shpilevoy via Tarantool-patches
2021-10-01 16:29 ` [Tarantool-patches] [PATCH v1 2/8] sql: refactor CHAR_LENGTH() function Mergen Imeev via Tarantool-patches
2021-10-08 21:56   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-20 16:58     ` Mergen Imeev via Tarantool-patches
2021-10-28 22:11       ` Vladislav Shpilevoy via Tarantool-patches
2021-11-01 10:20         ` Mergen Imeev via Tarantool-patches
2021-10-01 16:29 ` [Tarantool-patches] [PATCH v1 3/8] sql: refactor UPPER() and LOWER() functions Mergen Imeev via Tarantool-patches
2021-10-20 17:02   ` Mergen Imeev via Tarantool-patches
2021-10-01 16:29 ` [Tarantool-patches] [PATCH v1 4/8] sql: refactor NULLIF() function Mergen Imeev via Tarantool-patches
2021-10-01 16:29 ` [Tarantool-patches] [PATCH v1 5/8] sql: rework TRIM() function Mergen Imeev via Tarantool-patches
2021-10-20 17:05   ` Mergen Imeev via Tarantool-patches
2021-10-28 22:12     ` Vladislav Shpilevoy via Tarantool-patches
2021-11-01 10:35       ` Mergen Imeev via Tarantool-patches
2021-10-01 16:29 ` [Tarantool-patches] [PATCH v1 6/8] sql: rework POSITION() function Mergen Imeev via Tarantool-patches
2021-10-08 21:58   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-20 17:08     ` Mergen Imeev via Tarantool-patches
2021-11-01 10:41       ` Mergen Imeev via Tarantool-patches
2021-10-01 16:29 ` [Tarantool-patches] [PATCH v1 7/8] sql: rework SUBSTR() function Mergen Imeev via Tarantool-patches
2021-10-08 22:02   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-20 17:15     ` Mergen Imeev via Tarantool-patches
2021-10-28 22:13       ` Vladislav Shpilevoy via Tarantool-patches
2021-11-01 10:45         ` Mergen Imeev via Tarantool-patches
2021-10-01 16:29 ` [Tarantool-patches] [PATCH v1 8/8] sql: refactor LIKE() function Mergen Imeev via Tarantool-patches
2021-10-08 22:02   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-20 17:19     ` Mergen Imeev via Tarantool-patches
2021-11-01 10:48       ` Mergen Imeev via Tarantool-patches [this message]
2021-11-01 10:53         ` Mergen Imeev via Tarantool-patches
2021-10-04 13:32 ` [Tarantool-patches] [PATCH v1 0/8] Rework standard function Mergen Imeev via Tarantool-patches
2021-11-01 13:38 ` Vladislav Shpilevoy via Tarantool-patches
2021-11-11 10:45 Mergen Imeev via Tarantool-patches
2021-11-11 10:45 ` [Tarantool-patches] [PATCH v1 8/8] sql: refactor LIKE() function Mergen Imeev via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211101104851.GF227579@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=imeevma@tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v1 8/8] sql: refactor LIKE() function' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox