From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 69AE970296; Thu, 11 Nov 2021 13:50:34 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 69AE970296 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1636627834; bh=y02NVR/w8Ne/tU6DY3muGStQVKqsHd2p8liSSWh9eH0=; h=To:Cc:Date:In-Reply-To:References:Subject:List-Id: List-Unsubscribe:List-Archive:List-Post:List-Help:List-Subscribe: From:Reply-To:From; b=qa+jsRxjBSPp1T+nyNVJPlRliacQy/wYEHWVpO7DTGRl//FelCX7Qqg/V3kLWWNNx I7RtzkHZR9LsmVbGEKJ4QiQd534lzRYzJiLITGKlNjjoAVPwGenpfadEj/6AgdJdIF OTOd9SCjfw9Yx3DHm0YbVCXE16k32srZ8NBLAel8= Received: from smtpng1.i.mail.ru (smtpng1.i.mail.ru [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 68B7B7105F for ; Thu, 11 Nov 2021 13:48:51 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 68B7B7105F Received: by smtpng1.m.smailru.net with esmtpa (envelope-from ) id 1ml7dS-0004r0-Dr; Thu, 11 Nov 2021 13:48:50 +0300 To: kyukhin@tarantool.org Cc: tarantool-patches@dev.tarantool.org Date: Thu, 11 Nov 2021 13:48:50 +0300 Message-Id: <94041641dd5e0c714fcfc7be2dbe56144de5139e.1636627579.git.imeevma@gmail.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-4EC0790: 10 X-7564579A: 78E4E2B564C1792B X-77F55803: 4F1203BC0FB41BD9731B3922EC0639796C498EBF619285F2FA7E0E589BD7410700894C459B0CD1B96BF39DE92E7EB644CB80C6639710F743BD7B9CED87D2556767A9CA496E58B691 X-7FA49CB5: FF5795518A3D127A4AD6D5ED66289B5278DA827A17800CE7A3295C83650092F9EA1F7E6F0F101C67BD4B6F7A4D31EC0BCC500DACC3FED6E28638F802B75D45FF8AA50765F79006378CCCB41504E044EE8638F802B75D45FF36EB9D2243A4F8B5A6FCA7DBDB1FC311F39EFFDF887939037866D6147AF826D880D88D1A1B6EAE0BA324B27187BC4CBC117882F4460429724CE54428C33FAD305F5C1EE8F4F765FCAA867293B0326636D2E47CDBA5A96583BD4B6F7A4D31EC0BC014FD901B82EE079FA2833FD35BB23D27C277FBC8AE2E8BAA867293B0326636D2E47CDBA5A96583BA9C0B312567BB231DD303D21008E29813377AFFFEAFD269A417C69337E82CC2E827F84554CEF50127C277FBC8AE2E8BA83251EDC214901ED5E8D9A59859A8B66F6A3E018CF4DC80089D37D7C0E48F6C5571747095F342E88FB05168BE4CE3AF X-C1DE0DAB: C20DE7B7AB408E4181F030C43753B8186998911F362727C414F749A5E30D975C7E9FEBB9C11794A156DD52D78B33B4B383E37E4BFC8C00E49C2B6934AE262D3EE7EAB7254005DCED7532B743992DF240BDC6A1CF3F042BAD6DF99611D93F60EFCDE59DDCCE53AF2B699F904B3F4130E343918A1A30D5E7FCCB5012B2E24CD356 X-C8649E89: 4E36BF7865823D7055A7F0CF078B5EC49A30900B95165D34DA1FE609583D493CA7BC4A783F8985C4DA821C14C0B876A0634CBEBC1707BFD43574D8605740DE691D7E09C32AA3244C4381653E45AF66C73358AA7B58A6790B3A76366E8A9DE7CA729B2BEF169E0186 X-D57D3AED: 3ZO7eAau8CL7WIMRKs4sN3D3tLDjz0dLbV79QFUyzQ2Ujvy7cMT6pYYqY16iZVKkSc3dCLJ7zSJH7+u4VD18S7Vl4ZUrpaVfd2+vE6kuoey4m4VkSEu530nj6fImhcD4MUrOEAnl0W826KZ9Q+tr5ycPtXkTV4k65bRjmOUUP8cvGozZ33TWg5HZplvhhXbhDGzqmQDTd6OAevLeAnq3Ra9uf7zvY2zzsIhlcp/Y7m53TZgf2aB4JOg4gkr2bioj4t8MBgWr8bIUg9ZzR5ylMw== X-Mailru-Sender: 689FA8AB762F7393C37E3C1AEC41BA5DFE7B158F679ED0183F96CFC0DB86CD0283D72C36FC87018B9F80AB2734326CD2FB559BB5D741EB96352A0ABBE4FDA4210A04DAD6CC59E33667EA787935ED9F1B X-Mras: Ok Subject: [Tarantool-patches] [PATCH v1 01/21] sql: rework CHAR() function X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Mergen Imeev via Tarantool-patches Reply-To: imeevma@tarantool.org Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" The CHAR() function now uses the ICU macro to get characters. Part of #4145 --- src/box/sql/func.c | 100 +++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 45 deletions(-) diff --git a/src/box/sql/func.c b/src/box/sql/func.c index 868d51145..0cd8f8f69 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -728,6 +728,60 @@ func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv) ctx->is_aborted = true; } +/** + * Implementation of the CHAR() function. + * + * This function takes zero or more arguments, each of which is an integer. It + * constructs a string where each character of the string is the unicode + * character for the corresponding integer argument. + * + * If an argument is negative or greater than 0x10ffff, the symbol "�" is used. + * Symbol '\0' used instead of NULL argument. + */ +static void +func_char(struct sql_context *ctx, int argc, struct Mem *argv) +{ + if (argc == 0) + return mem_set_str_static(ctx->pOut, "", 0); + struct region *region = &fiber()->gc; + size_t svp = region_used(region); + uint32_t size; + UChar32 *buf = region_alloc_array(region, typeof(*buf), argc, &size); + if (buf == NULL) { + ctx->is_aborted = true; + diag_set(OutOfMemory, size, "region_alloc_array", "buf"); + return; + } + int len = 0; + for (int i = 0; i < argc; ++i) { + if (mem_is_null(&argv[i])) + buf[i] = 0; + else if (!mem_is_uint(&argv[i]) || argv[i].u.u > 0x10ffff) + buf[i] = 0xfffd; + else + buf[i] = argv[i].u.u; + len += U8_LENGTH(buf[i]); + } + + char *str = sqlDbMallocRawNN(sql_get(), len); + if (str == NULL) { + region_truncate(region, svp); + ctx->is_aborted = true; + return; + } + int pos = 0; + for (int i = 0; i < argc; ++i) { + UBool is_error = false; + U8_APPEND((uint8_t *)str, pos, len, buf[i], is_error); + assert(!is_error); + (void)is_error; + } + region_truncate(region, svp); + assert(pos == len); + (void)pos; + mem_set_str_allocated(ctx->pOut, str, len); +} + static const unsigned char * mem_as_ustr(struct Mem *mem) { @@ -1461,50 +1515,6 @@ unicodeFunc(struct sql_context *context, int argc, struct Mem *argv) sql_result_uint(context, sqlUtf8Read(&z)); } -/* - * The char() function takes zero or more arguments, each of which is - * an integer. It constructs a string where each character of the string - * is the unicode character for the corresponding integer argument. - */ -static void -charFunc(struct sql_context *context, int argc, struct Mem *argv) -{ - unsigned char *z, *zOut; - int i; - zOut = z = sql_malloc64(argc * 4 + 1); - if (z == NULL) { - context->is_aborted = true; - return; - } - for (i = 0; i < argc; i++) { - uint64_t x; - unsigned c; - if (sql_value_type(&argv[i]) == MP_INT) - x = 0xfffd; - else - x = mem_get_uint_unsafe(&argv[i]); - if (x > 0x10ffff) - x = 0xfffd; - c = (unsigned)(x & 0x1fffff); - if (c < 0x00080) { - *zOut++ = (u8) (c & 0xFF); - } else if (c < 0x00800) { - *zOut++ = 0xC0 + (u8) ((c >> 6) & 0x1F); - *zOut++ = 0x80 + (u8) (c & 0x3F); - } else if (c < 0x10000) { - *zOut++ = 0xE0 + (u8) ((c >> 12) & 0x0F); - *zOut++ = 0x80 + (u8) ((c >> 6) & 0x3F); - *zOut++ = 0x80 + (u8) (c & 0x3F); - } else { - *zOut++ = 0xF0 + (u8) ((c >> 18) & 0x07); - *zOut++ = 0x80 + (u8) ((c >> 12) & 0x3F); - *zOut++ = 0x80 + (u8) ((c >> 6) & 0x3F); - *zOut++ = 0x80 + (u8) (c & 0x3F); - } - } - sql_result_text64(context, (char *)z, zOut - z, sql_free); -} - /* * The hex() function. Interpret the argument as a blob. Return * a hexadecimal rendering as text. @@ -1857,7 +1867,7 @@ static struct sql_func_definition definitions[] = { NULL}, {"AVG", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_INTEGER, step_avg, fin_avg}, {"AVG", 1, {FIELD_TYPE_DOUBLE}, FIELD_TYPE_DOUBLE, step_avg, fin_avg}, - {"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, charFunc, NULL}, + {"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, func_char, NULL}, {"CHAR_LENGTH", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER, func_char_length, NULL}, {"COALESCE", -1, {FIELD_TYPE_ANY}, FIELD_TYPE_SCALAR, sql_builtin_stub, -- 2.25.1