From: Mergen Imeev via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: kyukhin@tarantool.org
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH v1 01/21] sql: rework CHAR() function
Date: Thu, 11 Nov 2021 13:48:50 +0300 [thread overview]
Message-ID: <94041641dd5e0c714fcfc7be2dbe56144de5139e.1636627579.git.imeevma@gmail.com> (raw)
In-Reply-To: <cover.1636627579.git.imeevma@gmail.com>
The CHAR() function now uses the ICU macro to get characters.
Part of #4145
---
src/box/sql/func.c | 100 +++++++++++++++++++++++++--------------------
1 file changed, 55 insertions(+), 45 deletions(-)
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 868d51145..0cd8f8f69 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -728,6 +728,60 @@ func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv)
ctx->is_aborted = true;
}
+/**
+ * Implementation of the CHAR() function.
+ *
+ * This function takes zero or more arguments, each of which is an integer. It
+ * constructs a string where each character of the string is the unicode
+ * character for the corresponding integer argument.
+ *
+ * If an argument is negative or greater than 0x10ffff, the symbol "�" is used.
+ * Symbol '\0' used instead of NULL argument.
+ */
+static void
+func_char(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+ if (argc == 0)
+ return mem_set_str_static(ctx->pOut, "", 0);
+ struct region *region = &fiber()->gc;
+ size_t svp = region_used(region);
+ uint32_t size;
+ UChar32 *buf = region_alloc_array(region, typeof(*buf), argc, &size);
+ if (buf == NULL) {
+ ctx->is_aborted = true;
+ diag_set(OutOfMemory, size, "region_alloc_array", "buf");
+ return;
+ }
+ int len = 0;
+ for (int i = 0; i < argc; ++i) {
+ if (mem_is_null(&argv[i]))
+ buf[i] = 0;
+ else if (!mem_is_uint(&argv[i]) || argv[i].u.u > 0x10ffff)
+ buf[i] = 0xfffd;
+ else
+ buf[i] = argv[i].u.u;
+ len += U8_LENGTH(buf[i]);
+ }
+
+ char *str = sqlDbMallocRawNN(sql_get(), len);
+ if (str == NULL) {
+ region_truncate(region, svp);
+ ctx->is_aborted = true;
+ return;
+ }
+ int pos = 0;
+ for (int i = 0; i < argc; ++i) {
+ UBool is_error = false;
+ U8_APPEND((uint8_t *)str, pos, len, buf[i], is_error);
+ assert(!is_error);
+ (void)is_error;
+ }
+ region_truncate(region, svp);
+ assert(pos == len);
+ (void)pos;
+ mem_set_str_allocated(ctx->pOut, str, len);
+}
+
static const unsigned char *
mem_as_ustr(struct Mem *mem)
{
@@ -1461,50 +1515,6 @@ unicodeFunc(struct sql_context *context, int argc, struct Mem *argv)
sql_result_uint(context, sqlUtf8Read(&z));
}
-/*
- * The char() function takes zero or more arguments, each of which is
- * an integer. It constructs a string where each character of the string
- * is the unicode character for the corresponding integer argument.
- */
-static void
-charFunc(struct sql_context *context, int argc, struct Mem *argv)
-{
- unsigned char *z, *zOut;
- int i;
- zOut = z = sql_malloc64(argc * 4 + 1);
- if (z == NULL) {
- context->is_aborted = true;
- return;
- }
- for (i = 0; i < argc; i++) {
- uint64_t x;
- unsigned c;
- if (sql_value_type(&argv[i]) == MP_INT)
- x = 0xfffd;
- else
- x = mem_get_uint_unsafe(&argv[i]);
- if (x > 0x10ffff)
- x = 0xfffd;
- c = (unsigned)(x & 0x1fffff);
- if (c < 0x00080) {
- *zOut++ = (u8) (c & 0xFF);
- } else if (c < 0x00800) {
- *zOut++ = 0xC0 + (u8) ((c >> 6) & 0x1F);
- *zOut++ = 0x80 + (u8) (c & 0x3F);
- } else if (c < 0x10000) {
- *zOut++ = 0xE0 + (u8) ((c >> 12) & 0x0F);
- *zOut++ = 0x80 + (u8) ((c >> 6) & 0x3F);
- *zOut++ = 0x80 + (u8) (c & 0x3F);
- } else {
- *zOut++ = 0xF0 + (u8) ((c >> 18) & 0x07);
- *zOut++ = 0x80 + (u8) ((c >> 12) & 0x3F);
- *zOut++ = 0x80 + (u8) ((c >> 6) & 0x3F);
- *zOut++ = 0x80 + (u8) (c & 0x3F);
- }
- }
- sql_result_text64(context, (char *)z, zOut - z, sql_free);
-}
-
/*
* The hex() function. Interpret the argument as a blob. Return
* a hexadecimal rendering as text.
@@ -1857,7 +1867,7 @@ static struct sql_func_definition definitions[] = {
NULL},
{"AVG", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_INTEGER, step_avg, fin_avg},
{"AVG", 1, {FIELD_TYPE_DOUBLE}, FIELD_TYPE_DOUBLE, step_avg, fin_avg},
- {"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, charFunc, NULL},
+ {"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, func_char, NULL},
{"CHAR_LENGTH", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER,
func_char_length, NULL},
{"COALESCE", -1, {FIELD_TYPE_ANY}, FIELD_TYPE_SCALAR, sql_builtin_stub,
--
2.25.1
next prev parent reply other threads:[~2021-11-11 10:50 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-11-11 10:48 [Tarantool-patches] [PATCH v1 00/21] Refactor non-standard and non-aggragate functions Mergen Imeev via Tarantool-patches
2021-11-11 10:48 ` Mergen Imeev via Tarantool-patches [this message]
2021-11-11 10:48 ` [Tarantool-patches] [PATCH v1 02/21] sql: refactor GREATEST() and LEAST() functions Mergen Imeev via Tarantool-patches
2021-11-11 10:48 ` [Tarantool-patches] [PATCH v1 03/21] sql: refactor HEX() function Mergen Imeev via Tarantool-patches
2021-11-11 10:48 ` [Tarantool-patches] [PATCH v1 04/21] sql: refactor LENGTH() function Mergen Imeev via Tarantool-patches
2021-11-11 10:48 ` [Tarantool-patches] [PATCH v1 05/21] sql: refactor PRINTF() function Mergen Imeev via Tarantool-patches
2021-11-11 10:48 ` [Tarantool-patches] [PATCH v1 06/21] sql: refactor RANDOM() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 07/21] sql: rework RANDOMBLOB() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 08/21] sql: refactor ZEROBLOB() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 09/21] sql: refactor TYPEOF() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 10/21] sql: refactor ROUND() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 11/21] sql: refactor ROW_COUNT() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 12/21] sql: rework UUID() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 13/21] sql: refactor VERSION() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 14/21] sql: refactor UNICODE() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 15/21] sql: refactor SOUNDEX() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 16/21] sql: refactor REPLACE() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 17/21] sql: refactor QUOTE() function Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 18/21] sql: remove unused code Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 19/21] sql: remove MEM_Dyn flag Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 20/21] sql: remove MEM_Term flag Mergen Imeev via Tarantool-patches
2021-11-11 10:49 ` [Tarantool-patches] [PATCH v1 21/21] sql: make arguments to be const Mergen Imeev via Tarantool-patches
2021-11-11 11:00 ` [Tarantool-patches] [PATCH v1 00/21] Refactor non-standard and non-aggragate functions Kirill Yukhin via Tarantool-patches
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=94041641dd5e0c714fcfc7be2dbe56144de5139e.1636627579.git.imeevma@gmail.com \
--to=tarantool-patches@dev.tarantool.org \
--cc=imeevma@tarantool.org \
--cc=kyukhin@tarantool.org \
--subject='Re: [Tarantool-patches] [PATCH v1 01/21] sql: rework CHAR() function' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox