[Tarantool-patches] [PATCH v1 2/8] sql: rework CHAR_LENGTH() function
imeevma at tarantool.org
imeevma at tarantool.org
Thu Nov 11 13:45:27 MSK 2021
The CHAR_LENGTH() and CHARACTER_LENGTH() functions now use ICU functions
to determine the length of a string.
Part of #4145
@TarantoolBot document
Title: Invalid UTF-8 values and ICU
Invalid UTF-8 values may be handled differently depending on the ICU
version. For example, for this request:
```
SELECT CHAR_LENGTH(CAST(x'f0808080' AS STRING));
```
On `centos 7` with `libicu-devel-50.2-4.el7_7.x86_64` the result will
be:
```
tarantool> box.execute([[SELECT CHAR_LENGTH(CAST(x'f0808080' AS STRING));]])
---
- metadata:
- name: COLUMN_1
type: integer
rows:
- [1]
...
```
On `ubuntu 20.04` with `libicu-dev` version `66.1-2ubuntu2` the result
will be:
```
tarantool> box.execute([[SELECT CHAR_LENGTH(CAST(x'f0808080' AS STRING));]])
---
- metadata:
- name: COLUMN_1
type: integer
rows:
- [4]
...
```
---
src/box/sql/func.c | 26 +++++++++++++++++++---
test/sql-tap/badutf1.test.lua | 42 +----------------------------------
test/sql-tap/func3.test.lua | 10 ++++-----
3 files changed, 28 insertions(+), 50 deletions(-)
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index dbeb38bee..bc7a1fedd 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -258,6 +258,26 @@ func_abs_double(struct sql_context *ctx, int argc, struct Mem *argv)
mem_set_double(ctx->pOut, arg->u.r < 0 ? -arg->u.r : arg->u.r);
}
+/** Implementation of the CHAR_LENGTH() function. */
+static void
+func_char_length(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+ assert(argc == 1);
+ (void)argc;
+ struct Mem *arg = &argv[0];
+ if (mem_is_null(arg))
+ return;
+ assert(mem_is_str(arg) && arg->n >= 0);
+ uint32_t len = 0;
+ int offset = 0;
+ while (offset < arg->n) {
+ UChar32 c;
+ U8_NEXT((uint8_t *)arg->z, offset, arg->n, c);
+ ++len;
+ }
+ mem_set_uint(ctx->pOut, len);
+}
+
static const unsigned char *
mem_as_ustr(struct Mem *mem)
{
@@ -1912,8 +1932,8 @@ static struct sql_func_definition definitions[] = {
{"AVG", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_INTEGER, step_avg, fin_avg},
{"AVG", 1, {FIELD_TYPE_DOUBLE}, FIELD_TYPE_DOUBLE, step_avg, fin_avg},
{"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, charFunc, NULL},
- {"CHAR_LENGTH", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER, lengthFunc,
- NULL},
+ {"CHAR_LENGTH", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER,
+ func_char_length, NULL},
{"COALESCE", -1, {FIELD_TYPE_ANY}, FIELD_TYPE_SCALAR, sql_builtin_stub,
NULL},
{"COUNT", 0, {}, FIELD_TYPE_INTEGER, step_count, fin_count},
@@ -1957,7 +1977,7 @@ static struct sql_func_definition definitions[] = {
{"LEAST", -1, {FIELD_TYPE_STRING}, FIELD_TYPE_STRING, minmaxFunc, NULL},
{"LEAST", -1, {FIELD_TYPE_SCALAR}, FIELD_TYPE_SCALAR, minmaxFunc, NULL},
- {"LENGTH", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER, lengthFunc,
+ {"LENGTH", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER, func_char_length,
NULL},
{"LENGTH", 1, {FIELD_TYPE_VARBINARY}, FIELD_TYPE_INTEGER, lengthFunc,
NULL},
diff --git a/test/sql-tap/badutf1.test.lua b/test/sql-tap/badutf1.test.lua
index b25436186..ce8354840 100755
--- a/test/sql-tap/badutf1.test.lua
+++ b/test/sql-tap/badutf1.test.lua
@@ -1,6 +1,6 @@
#!/usr/bin/env tarantool
local test = require("sqltester")
-test:plan(23)
+test:plan(19)
--!./tcltestrunner.lua
-- 2007 May 15
@@ -253,26 +253,6 @@ test:do_test(
test:do_test(
"badutf-3.5",
- function()
- return test:execsql2("SELECT length('\x61\xc0\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80') AS x")
- end, {
- -- <badutf-3.5>
- "X", 12
- -- </badutf-3.5>
- })
-
-test:do_test(
- "badutf-3.6",
- function()
- return test:execsql2("SELECT length('\xc0\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80') AS x")
- end, {
- -- <badutf-3.6>
- "X", 11
- -- </badutf-3.6>
- })
-
-test:do_test(
- "badutf-3.7",
function()
return test:execsql2("SELECT length('\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80') AS x")
end, {
@@ -281,26 +261,6 @@ test:do_test(
-- </badutf-3.7>
})
-test:do_test(
- "badutf-3.8",
- function()
- return test:execsql2("SELECT length('\x80\x80\x80\x80\x80\xf0\x80\x80\x80\x80') AS x")
- end, {
- -- <badutf-3.8>
- "X", 7
- -- </badutf-3.8>
- })
-
-test:do_test(
- "badutf-3.9",
- function()
- return test:execsql2("SELECT length('\x80\x80\x80\x80\x80\xf0\x80\x80\x80\xff') AS x")
- end, {
- -- <badutf-3.9>
- "X", 7
- -- </badutf-3.9>
- })
-
test:do_test(
"badutf-4.1",
function()
diff --git a/test/sql-tap/func3.test.lua b/test/sql-tap/func3.test.lua
index 8d6268bb7..32c807103 100755
--- a/test/sql-tap/func3.test.lua
+++ b/test/sql-tap/func3.test.lua
@@ -1,6 +1,6 @@
#!/usr/bin/env tarantool
local test = require("sqltester")
-test:plan(35)
+test:plan(33)
--!./tcltestrunner.lua
-- 2010 August 27
@@ -294,11 +294,9 @@ suits[2] = {str = '\x80', len = 1}
suits[3] = {str = '\x61\x62\x63', len = 3}
suits[4] = {str = '\x7f\x80\x81', len = 3}
suits[5] = {str = '\x61\xc0', len = 2}
-suits[6] = {str = '\x61\xc0\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80', len = 12}
-suits[7] = {str = '\xc0\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80', len = 11}
-suits[8] = {str = '\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80', len = 10}
-suits[9] = {str = '\x80\x80\x80\x80\x80\xf0\x80\x80\x80\x80', len = 7}
-suits[10] = {str = '\x80\x80\x80\x80\x80\xf0\x80\x80\x80\xff', len = 7}
+suits[6] = {str = '\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80', len = 10}
+suits[7] = {str = '\x80\x80\x80\x80\x80\xf0\x90\x80\x80\x80', len = 7}
+suits[8] = {str = '\x80\x80\x80\x80\x80\xf0\x90\x80\x80\xff', len = 7}
for k,v in pairs(suits) do
test:do_execsql_test(
--
2.25.1
More information about the Tarantool-patches
mailing list