Tarantool development patches archive
 help / color / mirror / Atom feed
From: Mergen Imeev via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: Re: [Tarantool-patches] [PATCH v1 01/21] sql: refactor CHAR() function
Date: Mon, 25 Oct 2021 11:02:12 +0300
Message-ID: <20211025080212.GA36295@tarantool.org> (raw)
In-Reply-To: <52e168e4-1559-fd6c-c5a6-d98e3c2d678a@tarantool.org>

Hi! Thank you for the review! My answers, diff and new patch below. Also, I
replaced part of old code with ICU macro and changed commit-message.

On Fri, Oct 15, 2021 at 12:42:22AM +0200, Vladislav Shpilevoy wrote:
> Hi! Thanks for the patch!
> 
> Before this commit on the branch I see a comment called 'Fix'.
> Please, cleanup the branch from unfinished work.
> 
Understood, fixed.

> On 08.10.2021 19:31, imeevma@tarantool.org wrote:
> > Part of #4145
> > ---
> >  src/box/sql/func.c | 85 ++++++++++++++++++++++------------------------
> >  1 file changed, 40 insertions(+), 45 deletions(-)
> > 
> > diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> > index a3c7d8d20..dd5e7d785 100644
> > --- a/src/box/sql/func.c
> > +++ b/src/box/sql/func.c
> > @@ -738,6 +738,45 @@ func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv)
> >  		ctx->is_aborted = true;
> >  }
> >  
> > +/** Implementation of the CHAR() function. */
> 
> Please, keep the comments explaining what the non-trivial functions do.
> 
Added comments for some functions. You will these diffs in this and next few
letters.

> > +static void
> > +func_char(struct sql_context *ctx, int argc, struct Mem *argv)
> > +{


Diff:

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 6b5099826..dee28b852 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -717,43 +717,55 @@ func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv)
 		ctx->is_aborted = true;
 }
 
-/** Implementation of the CHAR() function. */
+/**
+ * Implementation of the CHAR() function.
+ *
+ * This function takes zero or more arguments, each of which is an integer. It
+ * constructs a string where each character of the string is the unicode
+ * character for the corresponding integer argument.
+ *
+ * If an argument is negative or greater than 0x10ffff, the symbol "�" is used.
+ * Symbol '\0' used instead of NULL argument.
+ */
 static void
 func_char(struct sql_context *ctx, int argc, struct Mem *argv)
 {
 	if (argc == 0)
 		return mem_set_str_static(ctx->pOut, "", 0);
-	char *str = sqlDbMallocRawNN(sql_get(), argc * 4);
-	if (str == NULL) {
+	struct region *region = &fiber()->gc;
+	size_t svp = region_used(region);
+	UChar32 *buf = region_alloc(region, argc * sizeof(*buf));
+	if (buf == NULL) {
 		ctx->is_aborted = true;
 		return;
 	}
-	uint8_t *ptr = (uint8_t *)str;
+	int len = 0;
 	for (int i = 0; i < argc; ++i) {
-		uint32_t c;
 		if (mem_is_null(&argv[i]))
-			c = 0;
+			buf[i] = 0;
 		else if (!mem_is_uint(&argv[i]) || argv[i].u.u > 0x10ffff)
-			c = 0xfffd;
+			buf[i] = 0xfffd;
 		else
-			c = argv[i].u.u;
-		if (c < 0x80) {
-			*ptr++ = c & 0xFF;
-		} else if (c < 0x0800) {
-			*ptr++ = 0xC0 + ((c >> 6) & 0x1F);
-			*ptr++ = 0x80 + (c & 0x3F);
-		} else if (c < 0x10000) {
-			*ptr++ = 0xE0 + ((c >> 12) & 0x0F);
-			*ptr++ = 0x80 + ((c >> 6) & 0x3F);
-			*ptr++ = 0x80 + (c & 0x3F);
-		} else {
-			*ptr++ = 0xF0 + ((c >> 18) & 0x07);
-			*ptr++ = 0x80 + ((c >> 12) & 0x3F);
-			*ptr++ = 0x80 + ((c >> 6) & 0x3F);
-			*ptr++ = 0x80 + (c & 0x3F);
-		}
+			buf[i] = argv[i].u.u;
+		len += U8_LENGTH(buf[i]);
 	}
-	mem_set_str_allocated(ctx->pOut, str, (char *)ptr - str);
+
+	char *str = sqlDbMallocRawNN(sql_get(), len);
+	if (str == NULL) {
+		ctx->is_aborted = true;
+		return;
+	}
+	int pos = 0;
+	for (int i = 0; i < argc; ++i) {
+		bool is_error = false;
+		U8_APPEND(str, pos, len, buf[i], is_error);
+		assert(!is_error);
+		(void)is_error;
+	}
+	region_truncate(region, svp);
+	assert(pos == len);
+	(void)pos;
+	mem_set_str_allocated(ctx->pOut, str, len);
 }
 
 static const unsigned char *



New patch:

commit 4fa0034165697b694b3c655d92a3661ebf80a027
Author: Mergen Imeev <imeevma@gmail.com>
Date:   Tue Oct 5 13:55:21 2021 +0300

    sql: rework CHAR() function
    
    The CHAR() function now uses the ICU macro to get characters.
    
    Part of #4145

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index afe34f7f0..dee28b852 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -717,6 +717,57 @@ func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv)
 		ctx->is_aborted = true;
 }
 
+/**
+ * Implementation of the CHAR() function.
+ *
+ * This function takes zero or more arguments, each of which is an integer. It
+ * constructs a string where each character of the string is the unicode
+ * character for the corresponding integer argument.
+ *
+ * If an argument is negative or greater than 0x10ffff, the symbol "�" is used.
+ * Symbol '\0' used instead of NULL argument.
+ */
+static void
+func_char(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+	if (argc == 0)
+		return mem_set_str_static(ctx->pOut, "", 0);
+	struct region *region = &fiber()->gc;
+	size_t svp = region_used(region);
+	UChar32 *buf = region_alloc(region, argc * sizeof(*buf));
+	if (buf == NULL) {
+		ctx->is_aborted = true;
+		return;
+	}
+	int len = 0;
+	for (int i = 0; i < argc; ++i) {
+		if (mem_is_null(&argv[i]))
+			buf[i] = 0;
+		else if (!mem_is_uint(&argv[i]) || argv[i].u.u > 0x10ffff)
+			buf[i] = 0xfffd;
+		else
+			buf[i] = argv[i].u.u;
+		len += U8_LENGTH(buf[i]);
+	}
+
+	char *str = sqlDbMallocRawNN(sql_get(), len);
+	if (str == NULL) {
+		ctx->is_aborted = true;
+		return;
+	}
+	int pos = 0;
+	for (int i = 0; i < argc; ++i) {
+		bool is_error = false;
+		U8_APPEND(str, pos, len, buf[i], is_error);
+		assert(!is_error);
+		(void)is_error;
+	}
+	region_truncate(region, svp);
+	assert(pos == len);
+	(void)pos;
+	mem_set_str_allocated(ctx->pOut, str, len);
+}
+
 static const unsigned char *
 mem_as_ustr(struct Mem *mem)
 {
@@ -1450,50 +1501,6 @@ unicodeFunc(struct sql_context *context, int argc, struct Mem *argv)
 		sql_result_uint(context, sqlUtf8Read(&z));
 }
 
-/*
- * The char() function takes zero or more arguments, each of which is
- * an integer.  It constructs a string where each character of the string
- * is the unicode character for the corresponding integer argument.
- */
-static void
-charFunc(struct sql_context *context, int argc, struct Mem *argv)
-{
-	unsigned char *z, *zOut;
-	int i;
-	zOut = z = sql_malloc64(argc * 4 + 1);
-	if (z == NULL) {
-		context->is_aborted = true;
-		return;
-	}
-	for (i = 0; i < argc; i++) {
-		uint64_t x;
-		unsigned c;
-		if (sql_value_type(&argv[i]) == MP_INT)
-			x = 0xfffd;
-		else
-			x = mem_get_uint_unsafe(&argv[i]);
-		if (x > 0x10ffff)
-			x = 0xfffd;
-		c = (unsigned)(x & 0x1fffff);
-		if (c < 0x00080) {
-			*zOut++ = (u8) (c & 0xFF);
-		} else if (c < 0x00800) {
-			*zOut++ = 0xC0 + (u8) ((c >> 6) & 0x1F);
-			*zOut++ = 0x80 + (u8) (c & 0x3F);
-		} else if (c < 0x10000) {
-			*zOut++ = 0xE0 + (u8) ((c >> 12) & 0x0F);
-			*zOut++ = 0x80 + (u8) ((c >> 6) & 0x3F);
-			*zOut++ = 0x80 + (u8) (c & 0x3F);
-		} else {
-			*zOut++ = 0xF0 + (u8) ((c >> 18) & 0x07);
-			*zOut++ = 0x80 + (u8) ((c >> 12) & 0x3F);
-			*zOut++ = 0x80 + (u8) ((c >> 6) & 0x3F);
-			*zOut++ = 0x80 + (u8) (c & 0x3F);
-		}
-	}
-	sql_result_text64(context, (char *)z, zOut - z, sql_free);
-}
-
 /*
  * The hex() function.  Interpret the argument as a blob.  Return
  * a hexadecimal rendering as text.
@@ -1846,7 +1853,7 @@ static struct sql_func_definition definitions[] = {
 	 NULL},
 	{"AVG", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_INTEGER, step_avg, fin_avg},
 	{"AVG", 1, {FIELD_TYPE_DOUBLE}, FIELD_TYPE_DOUBLE, step_avg, fin_avg},
-	{"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, charFunc, NULL},
+	{"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, func_char, NULL},
 	{"CHAR_LENGTH", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER,
 	 func_char_length, NULL},
 	{"COALESCE", -1, {FIELD_TYPE_ANY}, FIELD_TYPE_SCALAR, sql_builtin_stub,

  reply	other threads:[~2021-10-25  8:02 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-08 17:31 [Tarantool-patches] [PATCH v1 00/21] Refactor non-standard and non-aggragate functions Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 01/21] sql: refactor CHAR() function Mergen Imeev via Tarantool-patches
2021-10-14 22:42   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-25  8:02     ` Mergen Imeev via Tarantool-patches [this message]
2021-10-29 23:42       ` Vladislav Shpilevoy via Tarantool-patches
2021-11-02 11:35         ` Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 02/21] sql: refactor GREATEST() and LEAST() functions Mergen Imeev via Tarantool-patches
2021-10-14 22:42   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-25  8:17     ` Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 03/21] sql: refactor HEX() function Mergen Imeev via Tarantool-patches
2021-10-14 22:43   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-25  8:19     ` Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 04/21] sql: refactor LENGTH() function Mergen Imeev via Tarantool-patches
2021-10-14 22:43   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-25  8:30     ` Mergen Imeev via Tarantool-patches
2021-10-29 23:42       ` Vladislav Shpilevoy via Tarantool-patches
2021-11-02 11:39         ` Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 05/21] sql: refactor PRINTF() function Mergen Imeev via Tarantool-patches
2021-10-14 22:44   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-25  8:33     ` Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 06/21] sql: refactor RANDOM() function Mergen Imeev via Tarantool-patches
2021-10-25  8:35   ` Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 07/21] sql: rework RANDOMBLOB() function Mergen Imeev via Tarantool-patches
2021-10-25  8:36   ` Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 08/21] sql: refactor ZEROBLOB() function Mergen Imeev via Tarantool-patches
2021-10-25  8:37   ` Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 09/21] sql: refactor TYPEOF() function Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 10/21] sql: refactor ROUND() function Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 11/21] sql: refactor ROW_COUNT() function Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 12/21] sql: rework UUID() function Mergen Imeev via Tarantool-patches
2021-10-25  8:38   ` Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 13/21] sql: refactor VERSION() function Mergen Imeev via Tarantool-patches
2021-10-08 17:31 ` [Tarantool-patches] [PATCH v1 14/21] sql: refactor UNICODE() function Mergen Imeev via Tarantool-patches
2021-10-14 22:44   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-25  8:40     ` Mergen Imeev via Tarantool-patches
2021-11-02 11:42       ` Mergen Imeev via Tarantool-patches
2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 15/21] sql: refactor of SOUNDEX() function Mergen Imeev via Tarantool-patches
2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 16/21] sql: refactor REPLACE() function Mergen Imeev via Tarantool-patches
2021-10-14 22:45   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-25  8:45     ` Mergen Imeev via Tarantool-patches
2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 17/21] sql: refactor QUOTE() function Mergen Imeev via Tarantool-patches
2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 18/21] sql: remove unused code Mergen Imeev via Tarantool-patches
2021-10-25  8:51   ` Mergen Imeev via Tarantool-patches
2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 19/21] sql: remove MEM_Dyn flag Mergen Imeev via Tarantool-patches
2021-10-14 22:46   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-25  8:54     ` Mergen Imeev via Tarantool-patches
2021-10-29 23:43       ` Vladislav Shpilevoy via Tarantool-patches
2021-11-02 11:43         ` Mergen Imeev via Tarantool-patches
2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 20/21] sql: remove MEM_Term flag Mergen Imeev via Tarantool-patches
2021-10-14 22:47   ` Vladislav Shpilevoy via Tarantool-patches
2021-10-25  9:57     ` Mergen Imeev via Tarantool-patches
2021-10-08 17:32 ` [Tarantool-patches] [PATCH v1 21/21] sql: make arguments to be const Mergen Imeev via Tarantool-patches
2021-11-02 22:15 ` [Tarantool-patches] [PATCH v1 00/21] Refactor non-standard and non-aggragate functions Vladislav Shpilevoy via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211025080212.GA36295@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=imeevma@tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Tarantool development patches archive

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://lists.tarantool.org/tarantool-patches/0 tarantool-patches/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 tarantool-patches tarantool-patches/ https://lists.tarantool.org/tarantool-patches \
		tarantool-patches@dev.tarantool.org.
	public-inbox-index tarantool-patches

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git