[Tarantool-patches] [PATCH v1 14/21] sql: refactor UNICODE() function

Mergen Imeev imeevma at tarantool.org
Mon Oct 25 11:40:47 MSK 2021


Thank you for the review! My answer, diff and new patch below. Also, I added
description to the function.

On Fri, Oct 15, 2021 at 12:44:37AM +0200, Vladislav Shpilevoy wrote:
> Thanks for the patch!
> 
> > diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> > index fb7fd772e..5e12ef729 100644
> > --- a/src/box/sql/func.c
> > +++ b/src/box/sql/func.c
> > @@ -1007,6 +1007,19 @@ func_version(struct sql_context *ctx, int argc, struct Mem *argv)
> >  	return mem_set_str0_static(ctx->pOut, (char *)tarantool_version());
> >  }
> >  
> > +/** Implementation of the UNICODE() function. */
> > +static void
> > +func_unicode(struct sql_context *ctx, int argc, struct Mem *argv)
> > +{
> > +	assert(argc == 1);
> > +	(void)argc;
> > +	if (mem_is_null(&argv[0]))
> > +		return;
> > +	assert(mem_is_str(&argv[0]));
> > +	const char *str = tt_cstr(argv[0].z, argv[0].n);
> > +	mem_set_uint(ctx->pOut, sqlUtf8Read((const unsigned char **)&str));
> 
> You can dodge the copying. See utf8_next() in utf8.c:
> 
> 	UChar32 c;
> 	U8_NEXT(str, pos, len, c);
Thanks, fixed.


Diff:

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index ebc38751e..6d80559d5 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1016,17 +1016,28 @@ func_version(struct sql_context *ctx, int argc, struct Mem *argv)
 	return mem_set_str0_static(ctx->pOut, (char *)tarantool_version());
 }
 
-/** Implementation of the UNICODE() function. */
+/**
+ * Implementation of the UNICODE() function.
+ *
+ * Return the Unicode code point value for the first character of the input
+ * string.
+ */
 static void
 func_unicode(struct sql_context *ctx, int argc, struct Mem *argv)
 {
 	assert(argc == 1);
 	(void)argc;
-	if (mem_is_null(&argv[0]))
+	struct Mem *arg = &argv[0];
+	if (mem_is_null(arg))
 		return;
-	assert(mem_is_str(&argv[0]));
-	const char *str = tt_cstr(argv[0].z, argv[0].n);
-	mem_set_uint(ctx->pOut, sqlUtf8Read((const unsigned char **)&str));
+	assert(mem_is_str(arg));
+	if (arg->n == 0)
+		return mem_set_uint(ctx->pOut, 0);
+	int pos = 0;
+	UChar32 c;
+	U8_NEXT(arg->z, pos, arg->n, c);
+	(void)pos;
+	mem_set_uint(ctx->pOut, (uint64_t)c);
 }
 
 static const unsigned char *


New patch:

commit 6346c542b8c81814753a1853d7ae347222af0f23
Author: Mergen Imeev <imeevma at gmail.com>
Date:   Thu Oct 7 13:43:38 2021 +0300

    sql: refactor UNICODE() function
    
    Part of #4145

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 3afc8ec7f..6d80559d5 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1016,6 +1016,30 @@ func_version(struct sql_context *ctx, int argc, struct Mem *argv)
 	return mem_set_str0_static(ctx->pOut, (char *)tarantool_version());
 }
 
+/**
+ * Implementation of the UNICODE() function.
+ *
+ * Return the Unicode code point value for the first character of the input
+ * string.
+ */
+static void
+func_unicode(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+	assert(argc == 1);
+	(void)argc;
+	struct Mem *arg = &argv[0];
+	if (mem_is_null(arg))
+		return;
+	assert(mem_is_str(arg));
+	if (arg->n == 0)
+		return mem_set_uint(ctx->pOut, 0);
+	int pos = 0;
+	UChar32 c;
+	U8_NEXT(arg->z, pos, arg->n, c);
+	(void)pos;
+	mem_set_uint(ctx->pOut, (uint64_t)c);
+}
+
 static const unsigned char *
 mem_as_ustr(struct Mem *mem)
 {
@@ -1437,19 +1461,6 @@ quoteFunc(struct sql_context *context, int argc, struct Mem *argv)
 	}
 }
 
-/*
- * The unicode() function.  Return the integer unicode code-point value
- * for the first character of the input string.
- */
-static void
-unicodeFunc(struct sql_context *context, int argc, struct Mem *argv)
-{
-	const unsigned char *z = mem_as_ustr(&argv[0]);
-	(void)argc;
-	if (z && z[0])
-		sql_result_uint(context, sqlUtf8Read(&z));
-}
-
 /*
  * The replace() function.  Three arguments are all strings: call
  * them A, B, and C. The result is also a string which is derived
@@ -1883,7 +1894,7 @@ static struct sql_func_definition definitions[] = {
 	 FIELD_TYPE_VARBINARY, func_trim_bin, NULL},
 
 	{"TYPEOF", 1, {FIELD_TYPE_ANY}, FIELD_TYPE_STRING, func_typeof, NULL},
-	{"UNICODE", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER, unicodeFunc,
+	{"UNICODE", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER, func_unicode,
 	 NULL},
 	{"UNLIKELY", 1, {FIELD_TYPE_ANY}, FIELD_TYPE_BOOLEAN, sql_builtin_stub,
 	 NULL},


More information about the Tarantool-patches mailing list