From: Mergen Imeev via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Cc: tarantool-patches@dev.tarantool.org Subject: Re: [Tarantool-patches] [PATCH v1 01/21] sql: refactor CHAR() function Date: Mon, 25 Oct 2021 11:02:12 +0300 [thread overview] Message-ID: <20211025080212.GA36295@tarantool.org> (raw) In-Reply-To: <52e168e4-1559-fd6c-c5a6-d98e3c2d678a@tarantool.org> Hi! Thank you for the review! My answers, diff and new patch below. Also, I replaced part of old code with ICU macro and changed commit-message. On Fri, Oct 15, 2021 at 12:42:22AM +0200, Vladislav Shpilevoy wrote: > Hi! Thanks for the patch! > > Before this commit on the branch I see a comment called 'Fix'. > Please, cleanup the branch from unfinished work. > Understood, fixed. > On 08.10.2021 19:31, imeevma@tarantool.org wrote: > > Part of #4145 > > --- > > src/box/sql/func.c | 85 ++++++++++++++++++++++------------------------ > > 1 file changed, 40 insertions(+), 45 deletions(-) > > > > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > > index a3c7d8d20..dd5e7d785 100644 > > --- a/src/box/sql/func.c > > +++ b/src/box/sql/func.c > > @@ -738,6 +738,45 @@ func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv) > > ctx->is_aborted = true; > > } > > > > +/** Implementation of the CHAR() function. */ > > Please, keep the comments explaining what the non-trivial functions do. > Added comments for some functions. You will these diffs in this and next few letters. > > +static void > > +func_char(struct sql_context *ctx, int argc, struct Mem *argv) > > +{ Diff: diff --git a/src/box/sql/func.c b/src/box/sql/func.c index 6b5099826..dee28b852 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -717,43 +717,55 @@ func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv) ctx->is_aborted = true; } -/** Implementation of the CHAR() function. */ +/** + * Implementation of the CHAR() function. + * + * This function takes zero or more arguments, each of which is an integer. It + * constructs a string where each character of the string is the unicode + * character for the corresponding integer argument. + * + * If an argument is negative or greater than 0x10ffff, the symbol "�" is used. + * Symbol '\0' used instead of NULL argument. + */ static void func_char(struct sql_context *ctx, int argc, struct Mem *argv) { if (argc == 0) return mem_set_str_static(ctx->pOut, "", 0); - char *str = sqlDbMallocRawNN(sql_get(), argc * 4); - if (str == NULL) { + struct region *region = &fiber()->gc; + size_t svp = region_used(region); + UChar32 *buf = region_alloc(region, argc * sizeof(*buf)); + if (buf == NULL) { ctx->is_aborted = true; return; } - uint8_t *ptr = (uint8_t *)str; + int len = 0; for (int i = 0; i < argc; ++i) { - uint32_t c; if (mem_is_null(&argv[i])) - c = 0; + buf[i] = 0; else if (!mem_is_uint(&argv[i]) || argv[i].u.u > 0x10ffff) - c = 0xfffd; + buf[i] = 0xfffd; else - c = argv[i].u.u; - if (c < 0x80) { - *ptr++ = c & 0xFF; - } else if (c < 0x0800) { - *ptr++ = 0xC0 + ((c >> 6) & 0x1F); - *ptr++ = 0x80 + (c & 0x3F); - } else if (c < 0x10000) { - *ptr++ = 0xE0 + ((c >> 12) & 0x0F); - *ptr++ = 0x80 + ((c >> 6) & 0x3F); - *ptr++ = 0x80 + (c & 0x3F); - } else { - *ptr++ = 0xF0 + ((c >> 18) & 0x07); - *ptr++ = 0x80 + ((c >> 12) & 0x3F); - *ptr++ = 0x80 + ((c >> 6) & 0x3F); - *ptr++ = 0x80 + (c & 0x3F); - } + buf[i] = argv[i].u.u; + len += U8_LENGTH(buf[i]); } - mem_set_str_allocated(ctx->pOut, str, (char *)ptr - str); + + char *str = sqlDbMallocRawNN(sql_get(), len); + if (str == NULL) { + ctx->is_aborted = true; + return; + } + int pos = 0; + for (int i = 0; i < argc; ++i) { + bool is_error = false; + U8_APPEND(str, pos, len, buf[i], is_error); + assert(!is_error); + (void)is_error; + } + region_truncate(region, svp); + assert(pos == len); + (void)pos; + mem_set_str_allocated(ctx->pOut, str, len); } static const unsigned char * New patch: commit 4fa0034165697b694b3c655d92a3661ebf80a027 Author: Mergen Imeev <imeevma@gmail.com> Date: Tue Oct 5 13:55:21 2021 +0300 sql: rework CHAR() function The CHAR() function now uses the ICU macro to get characters. Part of #4145 diff --git a/src/box/sql/func.c b/src/box/sql/func.c index afe34f7f0..dee28b852 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -717,6 +717,57 @@ func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv) ctx->is_aborted = true; } +/** + * Implementation of the CHAR() function. + * + * This function takes zero or more arguments, each of which is an integer. It + * constructs a string where each character of the string is the unicode + * character for the corresponding integer argument. + * + * If an argument is negative or greater than 0x10ffff, the symbol "�" is used. + * Symbol '\0' used instead of NULL argument. + */ +static void +func_char(struct sql_context *ctx, int argc, struct Mem *argv) +{ + if (argc == 0) + return mem_set_str_static(ctx->pOut, "", 0); + struct region *region = &fiber()->gc; + size_t svp = region_used(region); + UChar32 *buf = region_alloc(region, argc * sizeof(*buf)); + if (buf == NULL) { + ctx->is_aborted = true; + return; + } + int len = 0; + for (int i = 0; i < argc; ++i) { + if (mem_is_null(&argv[i])) + buf[i] = 0; + else if (!mem_is_uint(&argv[i]) || argv[i].u.u > 0x10ffff) + buf[i] = 0xfffd; + else + buf[i] = argv[i].u.u; + len += U8_LENGTH(buf[i]); + } + + char *str = sqlDbMallocRawNN(sql_get(), len); + if (str == NULL) { + ctx->is_aborted = true; + return; + } + int pos = 0; + for (int i = 0; i < argc; ++i) { + bool is_error = false; + U8_APPEND(str, pos, len, buf[i], is_error); + assert(!is_error); + (void)is_error; + } + region_truncate(region, svp); + assert(pos == len); + (void)pos; + mem_set_str_allocated(ctx->pOut, str, len); +} + static const unsigned char * mem_as_ustr(struct Mem *mem) { @@ -1450,50 +1501,6 @@ unicodeFunc(struct sql_context *context, int argc, struct Mem *argv) sql_result_uint(context, sqlUtf8Read(&z)); } -/* - * The char() function takes zero or more arguments, each of which is - * an integer. It constructs a string where each character of the string - * is the unicode character for the corresponding integer argument. - */ -static void -charFunc(struct sql_context *context, int argc, struct Mem *argv) -{ - unsigned char *z, *zOut; - int i; - zOut = z = sql_malloc64(argc * 4 + 1); - if (z == NULL) { - context->is_aborted = true; - return; - } - for (i = 0; i < argc; i++) { - uint64_t x; - unsigned c; - if (sql_value_type(&argv[i]) == MP_INT) - x = 0xfffd; - else - x = mem_get_uint_unsafe(&argv[i]); - if (x > 0x10ffff) - x = 0xfffd; - c = (unsigned)(x & 0x1fffff); - if (c < 0x00080) { - *zOut++ = (u8) (c & 0xFF); - } else if (c < 0x00800) { - *zOut++ = 0xC0 + (u8) ((c >> 6) & 0x1F); - *zOut++ = 0x80 + (u8) (c & 0x3F); - } else if (c < 0x10000) { - *zOut++ = 0xE0 + (u8) ((c >> 12) & 0x0F); - *zOut++ = 0x80 + (u8) ((c >> 6) & 0x3F); - *zOut++ = 0x80 + (u8) (c & 0x3F); - } else { - *zOut++ = 0xF0 + (u8) ((c >> 18) & 0x07); - *zOut++ = 0x80 + (u8) ((c >> 12) & 0x3F); - *zOut++ = 0x80 + (u8) ((c >> 6) & 0x3F); - *zOut++ = 0x80 + (u8) (c & 0x3F); - } - } - sql_result_text64(context, (char *)z, zOut - z, sql_free); -} - /* * The hex() function. Interpret the argument as a blob. Return * a hexadecimal rendering as text. @@ -1846,7 +1853,7 @@ static struct sql_func_definition definitions[] = { NULL}, {"AVG", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_INTEGER, step_avg, fin_avg}, {"AVG", 1, {FIELD_TYPE_DOUBLE}, FIELD_TYPE_DOUBLE, step_avg, fin_avg}, - {"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, charFunc, NULL}, + {"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, func_char, NULL}, {"CHAR_LENGTH", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER, func_char_length, NULL}, {"COALESCE", -1, {FIELD_TYPE_ANY}, FIELD_TYPE_SCALAR, sql_builtin_stub,
next prev parent reply other threads:[~2021-10-25 8:02 UTC|newest] Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-10-08 17:31 [Tarantool-patches] [PATCH v1 00/21] Refactor non-standard and non-aggragate functions Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 01/21] sql: refactor CHAR() function Mergen Imeev via Tarantool-patches 2021-10-14 22:42 ` Vladislav Shpilevoy via Tarantool-patches 2021-10-25 8:02 ` Mergen Imeev via Tarantool-patches [this message] 2021-10-29 23:42 ` Vladislav Shpilevoy via Tarantool-patches 2021-11-02 11:35 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 02/21] sql: refactor GREATEST() and LEAST() functions Mergen Imeev via Tarantool-patches 2021-10-14 22:42 ` Vladislav Shpilevoy via Tarantool-patches 2021-10-25 8:17 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 03/21] sql: refactor HEX() function Mergen Imeev via Tarantool-patches 2021-10-14 22:43 ` Vladislav Shpilevoy via Tarantool-patches 2021-10-25 8:19 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 04/21] sql: refactor LENGTH() function Mergen Imeev via Tarantool-patches 2021-10-14 22:43 ` Vladislav Shpilevoy via Tarantool-patches 2021-10-25 8:30 ` Mergen Imeev via Tarantool-patches 2021-10-29 23:42 ` Vladislav Shpilevoy via Tarantool-patches 2021-11-02 11:39 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 05/21] sql: refactor PRINTF() function Mergen Imeev via Tarantool-patches 2021-10-14 22:44 ` Vladislav Shpilevoy via Tarantool-patches 2021-10-25 8:33 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 06/21] sql: refactor RANDOM() function Mergen Imeev via Tarantool-patches 2021-10-25 8:35 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 07/21] sql: rework RANDOMBLOB() function Mergen Imeev via Tarantool-patches 2021-10-25 8:36 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 08/21] sql: refactor ZEROBLOB() function Mergen Imeev via Tarantool-patches 2021-10-25 8:37 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 09/21] sql: refactor TYPEOF() function Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 10/21] sql: refactor ROUND() function Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 11/21] sql: refactor ROW_COUNT() function Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 12/21] sql: rework UUID() function Mergen Imeev via Tarantool-patches 2021-10-25 8:38 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 13/21] sql: refactor VERSION() function Mergen Imeev via Tarantool-patches 2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 14/21] sql: refactor UNICODE() function Mergen Imeev via Tarantool-patches 2021-10-14 22:44 ` Vladislav Shpilevoy via Tarantool-patches 2021-10-25 8:40 ` Mergen Imeev via Tarantool-patches 2021-11-02 11:42 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 15/21] sql: refactor of SOUNDEX() function Mergen Imeev via Tarantool-patches 2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 16/21] sql: refactor REPLACE() function Mergen Imeev via Tarantool-patches 2021-10-14 22:45 ` Vladislav Shpilevoy via Tarantool-patches 2021-10-25 8:45 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 17/21] sql: refactor QUOTE() function Mergen Imeev via Tarantool-patches 2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 18/21] sql: remove unused code Mergen Imeev via Tarantool-patches 2021-10-25 8:51 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 19/21] sql: remove MEM_Dyn flag Mergen Imeev via Tarantool-patches 2021-10-14 22:46 ` Vladislav Shpilevoy via Tarantool-patches 2021-10-25 8:54 ` Mergen Imeev via Tarantool-patches 2021-10-29 23:43 ` Vladislav Shpilevoy via Tarantool-patches 2021-11-02 11:43 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 20/21] sql: remove MEM_Term flag Mergen Imeev via Tarantool-patches 2021-10-14 22:47 ` Vladislav Shpilevoy via Tarantool-patches 2021-10-25 9:57 ` Mergen Imeev via Tarantool-patches 2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 21/21] sql: make arguments to be const Mergen Imeev via Tarantool-patches 2021-11-02 22:15 ` [Tarantool-patches] [PATCH v1 00/21] Refactor non-standard and non-aggragate functions Vladislav Shpilevoy via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20211025080212.GA36295@tarantool.org \ --to=tarantool-patches@dev.tarantool.org \ --cc=imeevma@tarantool.org \ --cc=v.shpilevoy@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH v1 01/21] sql: refactor CHAR() function' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox