[Tarantool-patches] [PATCH v2 12/15] sql: rework GROUP_CONCAT()

Mergen Imeev imeevma at tarantool.org
Sat Sep 25 14:42:36 MSK 2021


Thank you for the review! My answers, diff and new patch below.

On Thu, Sep 23, 2021 at 12:49:52AM +0200, Vladislav Shpilevoy wrote:
> Thanks for the patch!
> 
> > diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> > index f699aa927..001a8641c 100644
> > --- a/src/box/sql/func.c
> > +++ b/src/box/sql/func.c
> > @@ -213,6 +213,52 @@ fin_minmax(struct sql_context *ctx)
> >  	mem_copy(ctx->pOut, ctx->pMem);
> >  }
> >  
> > +/** Implementation of the GROUP_CONCAT() function. */
> > +static void
> > +step_group_concat(struct sql_context *ctx, int argc, struct Mem **argv)
> > +{
> > +	assert(argc == 1 || argc == 2);
> > +	(void)argc;
> > +	if (argv[0]->type == MEM_TYPE_NULL)
> > +		return;
> > +	assert(mem_is_str(argv[0]) || mem_is_bin(argv[0]));
> > +	if (ctx->pMem->type == MEM_TYPE_NULL) {
> > +		if (mem_copy_str(ctx->pMem, argv[0]->z, argv[0]->n) != 0)
> 
> 1. What if the argument is zeroblob with no actual memory allocated yet?
> 
There will be '', which is wrong. I fixed this and added a test.

> > +			ctx->is_aborted = true;
> > +		return;
> > +	}
> > +	const char *sep = NULL;
> > +	int sep_len = 0;
> > +	if (argc == 1) {
> > +		sep = ",";
> > +		sep_len = 1;
> > +	} else if (argv[1]->type == MEM_TYPE_NULL) {
> > +		sep = "";
> > +		sep_len = 0;
> > +	} else {
> > +		assert(mem_is_same_type(argv[0], argv[0]));
> > +		sep = argv[1]->z;
> > +		sep_len = argv[1]->n;
> > +	}
> > +	if (sep_len > 0) {
> > +		if (mem_append(ctx->pMem, sep, sep_len) != 0) {
> 
> 2. Will it work if sep_len == 0? If yes, then I would propose to
> drop the len check here and call the append always.
> 
Fixed. I moved this check to mem_append().

> > +			ctx->is_aborted = true;
> > +			return;
> > +		}


Diff:

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 182fb85be..28094e258 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -223,34 +223,43 @@ step_group_concat(struct sql_context *ctx, int argc, struct Mem **argv)
 {
 	assert(argc == 1 || argc == 2);
 	(void)argc;
-	if (argv[0]->type == MEM_TYPE_NULL)
+	if (mem_is_null(argv[0]))
 		return;
 	assert(mem_is_str(argv[0]) || mem_is_bin(argv[0]));
-	if (ctx->pMem->type == MEM_TYPE_NULL) {
-		if (mem_copy_str(ctx->pMem, argv[0]->z, argv[0]->n) != 0)
+	if (mem_is_null(ctx->pMem)) {
+		if (mem_copy(ctx->pMem, argv[0]) != 0)
 			ctx->is_aborted = true;
 		return;
 	}
+	assert(!mem_is_zerobin(ctx->pMem));
 	const char *sep = NULL;
 	int sep_len = 0;
 	if (argc == 1) {
 		sep = ",";
 		sep_len = 1;
-	} else if (argv[1]->type == MEM_TYPE_NULL) {
+	} else if (mem_is_null(argv[1])) {
 		sep = "";
 		sep_len = 0;
 	} else {
-		assert(mem_is_same_type(argv[0], argv[0]));
+		assert(mem_is_same_type(argv[0], argv[1]));
 		sep = argv[1]->z;
 		sep_len = argv[1]->n;
 	}
-	if (sep_len > 0) {
-		if (mem_append(ctx->pMem, sep, sep_len) != 0) {
-			ctx->is_aborted = true;
-			return;
-		}
+	if (mem_append(ctx->pMem, sep, sep_len) != 0) {
+		ctx->is_aborted = true;
+		return;
+	}
+	uint32_t size;
+	char *str;
+	if (mem_is_zerobin(argv[0])) {
+		size = argv[0]->u.nZero;
+		str = sqlDbMallocRawNN(sql_get(), size);
+		memset(str, 0, size);
+	} else {
+		size = argv[0]->n;
+		str = argv[0]->z;
 	}
-	if (mem_append(ctx->pMem, argv[0]->z, argv[0]->n) != 0) {
+	if (mem_append(ctx->pMem, str, size) != 0) {
 		ctx->is_aborted = true;
 		return;
 	}
diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
index 416f27d69..bd8a8fe78 100755
--- a/test/sql-tap/func.test.lua
+++ b/test/sql-tap/func.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 local test = require("sqltester")
-test:plan(14680)
+test:plan(14681)
 
 --!./tcltestrunner.lua
 -- 2001 September 15
@@ -2142,11 +2142,14 @@ test:do_execsql_test(
         -- </func-24.2>
     })
 
--- do_test func-24.3 {
---   execsql {
---     SELECT group_concat(t1,' ' || rowid || ' ') FROM tbl1
---   }
--- } {{this 2 program 3 is 4 free 5 software}}
+test:do_execsql_test(
+    "func-24.3",
+    [[
+        SELECT group_concat(zeroblob(10));
+    ]], {
+        '\0\0\0\0\0\0\0\0\0\0'
+    })
+
 test:do_execsql_test(
     "func-24.4",
     [[


New patch:

commit 5b8563e9b884c86885a3a91fc608fb144afb69a0
Author: Mergen Imeev <imeevma at gmail.com>
Date:   Thu Sep 9 18:37:00 2021 +0300

    sql: rework GROUP_CONCAT()
    
    This patch simplifies SQL built-in aggregate function GROUP_CONCAT().
    
    Part of #4145

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 3708440e3..28094e258 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -217,6 +217,61 @@ fin_minmax(struct sql_context *ctx)
 	mem_copy(ctx->pOut, ctx->pMem);
 }
 
+/** Implementation of the GROUP_CONCAT() function. */
+static void
+step_group_concat(struct sql_context *ctx, int argc, struct Mem **argv)
+{
+	assert(argc == 1 || argc == 2);
+	(void)argc;
+	if (mem_is_null(argv[0]))
+		return;
+	assert(mem_is_str(argv[0]) || mem_is_bin(argv[0]));
+	if (mem_is_null(ctx->pMem)) {
+		if (mem_copy(ctx->pMem, argv[0]) != 0)
+			ctx->is_aborted = true;
+		return;
+	}
+	assert(!mem_is_zerobin(ctx->pMem));
+	const char *sep = NULL;
+	int sep_len = 0;
+	if (argc == 1) {
+		sep = ",";
+		sep_len = 1;
+	} else if (mem_is_null(argv[1])) {
+		sep = "";
+		sep_len = 0;
+	} else {
+		assert(mem_is_same_type(argv[0], argv[1]));
+		sep = argv[1]->z;
+		sep_len = argv[1]->n;
+	}
+	if (mem_append(ctx->pMem, sep, sep_len) != 0) {
+		ctx->is_aborted = true;
+		return;
+	}
+	uint32_t size;
+	char *str;
+	if (mem_is_zerobin(argv[0])) {
+		size = argv[0]->u.nZero;
+		str = sqlDbMallocRawNN(sql_get(), size);
+		memset(str, 0, size);
+	} else {
+		size = argv[0]->n;
+		str = argv[0]->z;
+	}
+	if (mem_append(ctx->pMem, str, size) != 0) {
+		ctx->is_aborted = true;
+		return;
+	}
+}
+
+/** Finalizer for the GROUP_CONCAT() function. */
+static void
+fin_group_concat(struct sql_context *ctx)
+{
+	mem_copy(ctx->pOut, ctx->pMem);
+}
+
 static const unsigned char *
 mem_as_ustr(struct Mem *mem)
 {
@@ -1761,73 +1816,6 @@ soundexFunc(sql_context * context, int argc, sql_value ** argv)
 	}
 }
 
-/*
- * group_concat(EXPR, ?SEPARATOR?)
- */
-static void
-groupConcatStep(sql_context * context, int argc, sql_value ** argv)
-{
-	const char *zVal;
-	StrAccum *pAccum;
-	const char *zSep;
-	int nVal, nSep;
-	if (argc != 1 && argc != 2) {
-		diag_set(ClientError, ER_FUNC_WRONG_ARG_COUNT,
-			 "GROUP_CONCAT", "1 or 2", argc);
-		context->is_aborted = true;
-		return;
-	}
-	if (mem_is_null(argv[0]))
-		return;
-	pAccum =
-	    (StrAccum *) sql_aggregate_context(context, sizeof(*pAccum));
-
-	if (pAccum) {
-		sql *db = sql_context_db_handle(context);
-		int firstTerm = pAccum->mxAlloc == 0;
-		pAccum->mxAlloc = db->aLimit[SQL_LIMIT_LENGTH];
-		if (!firstTerm) {
-			if (argc == 2) {
-				zSep = mem_as_str0(argv[1]);
-				nSep = mem_len_unsafe(argv[1]);
-			} else {
-				zSep = ",";
-				nSep = 1;
-			}
-			if (zSep)
-				sqlStrAccumAppend(pAccum, zSep, nSep);
-		}
-		zVal = mem_as_str0(argv[0]);
-		nVal = mem_len_unsafe(argv[0]);
-		if (zVal)
-			sqlStrAccumAppend(pAccum, zVal, nVal);
-	}
-}
-
-static void
-groupConcatFinalize(sql_context * context)
-{
-	StrAccum *pAccum;
-	pAccum = sql_aggregate_context(context, 0);
-	if (pAccum) {
-		if (pAccum->accError == STRACCUM_TOOBIG) {
-			diag_set(ClientError, ER_SQL_EXECUTE, "string or binary"\
-				 "string is too big");
-			context->is_aborted = true;
-		} else if (pAccum->accError == STRACCUM_NOMEM) {
-			context->is_aborted = true;
-		} else {
-			char *str = sqlStrAccumFinish(pAccum);
-			int len = pAccum->nChar;
-			assert(len >= 0);
-			if (context->func->def->returns == FIELD_TYPE_STRING)
-				mem_set_str_dynamic(context->pOut, str, len);
-			else
-				mem_set_bin_dynamic(context->pOut, str, len);
-		}
-	}
-}
-
 int
 sql_is_like_func(struct Expr *expr)
 {
@@ -1995,13 +1983,13 @@ static struct sql_func_definition definitions[] = {
 	 NULL},
 
 	{"GROUP_CONCAT", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_STRING,
-	 groupConcatStep, groupConcatFinalize},
+	 step_group_concat, fin_group_concat},
 	{"GROUP_CONCAT", 2, {FIELD_TYPE_STRING, FIELD_TYPE_STRING},
-	 FIELD_TYPE_STRING, groupConcatStep, groupConcatFinalize},
+	 FIELD_TYPE_STRING, step_group_concat, fin_group_concat},
 	{"GROUP_CONCAT", 1, {FIELD_TYPE_VARBINARY}, FIELD_TYPE_VARBINARY,
-	 groupConcatStep, groupConcatFinalize},
+	 step_group_concat, fin_group_concat},
 	{"GROUP_CONCAT", 2, {FIELD_TYPE_VARBINARY, FIELD_TYPE_VARBINARY},
-	 FIELD_TYPE_VARBINARY, groupConcatStep, groupConcatFinalize},
+	 FIELD_TYPE_VARBINARY, step_group_concat, fin_group_concat},
 
 	{"HEX", 1, {FIELD_TYPE_VARBINARY}, FIELD_TYPE_STRING, hexFunc, NULL},
 	{"IFNULL", 2, {FIELD_TYPE_ANY, FIELD_TYPE_ANY}, FIELD_TYPE_SCALAR,
diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
index 416f27d69..bd8a8fe78 100755
--- a/test/sql-tap/func.test.lua
+++ b/test/sql-tap/func.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 local test = require("sqltester")
-test:plan(14680)
+test:plan(14681)
 
 --!./tcltestrunner.lua
 -- 2001 September 15
@@ -2142,11 +2142,14 @@ test:do_execsql_test(
         -- </func-24.2>
     })
 
--- do_test func-24.3 {
---   execsql {
---     SELECT group_concat(t1,' ' || rowid || ' ') FROM tbl1
---   }
--- } {{this 2 program 3 is 4 free 5 software}}
+test:do_execsql_test(
+    "func-24.3",
+    [[
+        SELECT group_concat(zeroblob(10));
+    ]], {
+        '\0\0\0\0\0\0\0\0\0\0'
+    })
+
 test:do_execsql_test(
     "func-24.4",
     [[


More information about the Tarantool-patches mailing list