[Tarantool-patches] [PATCH v1 01/21] sql: rework CHAR() function

imeevma at tarantool.org imeevma at tarantool.org
Thu Nov 11 13:48:50 MSK 2021


The CHAR() function now uses the ICU macro to get characters.

Part of #4145
---
 src/box/sql/func.c | 100 +++++++++++++++++++++++++--------------------
 1 file changed, 55 insertions(+), 45 deletions(-)

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 868d51145..0cd8f8f69 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -728,6 +728,60 @@ func_substr_characters(struct sql_context *ctx, int argc, struct Mem *argv)
 		ctx->is_aborted = true;
 }
 
+/**
+ * Implementation of the CHAR() function.
+ *
+ * This function takes zero or more arguments, each of which is an integer. It
+ * constructs a string where each character of the string is the unicode
+ * character for the corresponding integer argument.
+ *
+ * If an argument is negative or greater than 0x10ffff, the symbol "�" is used.
+ * Symbol '\0' used instead of NULL argument.
+ */
+static void
+func_char(struct sql_context *ctx, int argc, struct Mem *argv)
+{
+	if (argc == 0)
+		return mem_set_str_static(ctx->pOut, "", 0);
+	struct region *region = &fiber()->gc;
+	size_t svp = region_used(region);
+	uint32_t size;
+	UChar32 *buf = region_alloc_array(region, typeof(*buf), argc, &size);
+	if (buf == NULL) {
+		ctx->is_aborted = true;
+		diag_set(OutOfMemory, size, "region_alloc_array", "buf");
+		return;
+	}
+	int len = 0;
+	for (int i = 0; i < argc; ++i) {
+		if (mem_is_null(&argv[i]))
+			buf[i] = 0;
+		else if (!mem_is_uint(&argv[i]) || argv[i].u.u > 0x10ffff)
+			buf[i] = 0xfffd;
+		else
+			buf[i] = argv[i].u.u;
+		len += U8_LENGTH(buf[i]);
+	}
+
+	char *str = sqlDbMallocRawNN(sql_get(), len);
+	if (str == NULL) {
+		region_truncate(region, svp);
+		ctx->is_aborted = true;
+		return;
+	}
+	int pos = 0;
+	for (int i = 0; i < argc; ++i) {
+		UBool is_error = false;
+		U8_APPEND((uint8_t *)str, pos, len, buf[i], is_error);
+		assert(!is_error);
+		(void)is_error;
+	}
+	region_truncate(region, svp);
+	assert(pos == len);
+	(void)pos;
+	mem_set_str_allocated(ctx->pOut, str, len);
+}
+
 static const unsigned char *
 mem_as_ustr(struct Mem *mem)
 {
@@ -1461,50 +1515,6 @@ unicodeFunc(struct sql_context *context, int argc, struct Mem *argv)
 		sql_result_uint(context, sqlUtf8Read(&z));
 }
 
-/*
- * The char() function takes zero or more arguments, each of which is
- * an integer.  It constructs a string where each character of the string
- * is the unicode character for the corresponding integer argument.
- */
-static void
-charFunc(struct sql_context *context, int argc, struct Mem *argv)
-{
-	unsigned char *z, *zOut;
-	int i;
-	zOut = z = sql_malloc64(argc * 4 + 1);
-	if (z == NULL) {
-		context->is_aborted = true;
-		return;
-	}
-	for (i = 0; i < argc; i++) {
-		uint64_t x;
-		unsigned c;
-		if (sql_value_type(&argv[i]) == MP_INT)
-			x = 0xfffd;
-		else
-			x = mem_get_uint_unsafe(&argv[i]);
-		if (x > 0x10ffff)
-			x = 0xfffd;
-		c = (unsigned)(x & 0x1fffff);
-		if (c < 0x00080) {
-			*zOut++ = (u8) (c & 0xFF);
-		} else if (c < 0x00800) {
-			*zOut++ = 0xC0 + (u8) ((c >> 6) & 0x1F);
-			*zOut++ = 0x80 + (u8) (c & 0x3F);
-		} else if (c < 0x10000) {
-			*zOut++ = 0xE0 + (u8) ((c >> 12) & 0x0F);
-			*zOut++ = 0x80 + (u8) ((c >> 6) & 0x3F);
-			*zOut++ = 0x80 + (u8) (c & 0x3F);
-		} else {
-			*zOut++ = 0xF0 + (u8) ((c >> 18) & 0x07);
-			*zOut++ = 0x80 + (u8) ((c >> 12) & 0x3F);
-			*zOut++ = 0x80 + (u8) ((c >> 6) & 0x3F);
-			*zOut++ = 0x80 + (u8) (c & 0x3F);
-		}
-	}
-	sql_result_text64(context, (char *)z, zOut - z, sql_free);
-}
-
 /*
  * The hex() function.  Interpret the argument as a blob.  Return
  * a hexadecimal rendering as text.
@@ -1857,7 +1867,7 @@ static struct sql_func_definition definitions[] = {
 	 NULL},
 	{"AVG", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_INTEGER, step_avg, fin_avg},
 	{"AVG", 1, {FIELD_TYPE_DOUBLE}, FIELD_TYPE_DOUBLE, step_avg, fin_avg},
-	{"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, charFunc, NULL},
+	{"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, func_char, NULL},
 	{"CHAR_LENGTH", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER,
 	 func_char_length, NULL},
 	{"COALESCE", -1, {FIELD_TYPE_ANY}, FIELD_TYPE_SCALAR, sql_builtin_stub,
-- 
2.25.1



More information about the Tarantool-patches mailing list