Tarantool development patches archive
 help / color / mirror / Atom feed
From: Mergen Imeev via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: Re: [Tarantool-patches] [PATCH v2 09/15] sql: rework AVG()
Date: Sat, 25 Sep 2021 14:32:30 +0300	[thread overview]
Message-ID: <20210925113230.GF290467@tarantool.org> (raw)
In-Reply-To: <62bdd807-6f60-4307-1ed8-0badb6c014ae@tarantool.org>

Thank you for the review! My answers, diff and new patch below.

On Thu, Sep 23, 2021 at 12:48:58AM +0200, Vladislav Shpilevoy wrote:
> Thanks for the fixes!
> 
> > diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> > index 12a6a5a2c..b5f154fb1 100644
> > --- a/src/box/sql/func.c
> > +++ b/src/box/sql/func.c
> > @@ -102,6 +102,54 @@ fin_total(struct sql_context *ctx)
> >  		mem_copy_as_ephemeral(ctx->pOut, ctx->pMem);
> >  }
> >  
> > +/** Implementation of the AVG() function. */
> > +static void
> > +step_avg(struct sql_context *ctx, int argc, struct Mem **argv)
> > +{
> > +	assert(argc == 1);
> > +	(void)argc;
> > +	assert(ctx->pMem->type == MEM_TYPE_NULL || mem_is_bin(ctx->pMem));
> > +	if (argv[0]->type == MEM_TYPE_NULL)
> > +		return;
> > +	struct Mem *mem;
> > +	uint32_t *count;
> > +	if (ctx->pMem->type == MEM_TYPE_NULL) {
> > +		uint32_t size = sizeof(struct Mem) + sizeof(uint32_t);
> > +		mem = sqlDbMallocRawNN(sql_get(), size);
> 
> 1. Where is it deleted? Can't find.
> 
It will be set as allocated to MEM and will be released during mem_destroy().
However, I agree that it is obvious that this new MEM shouldn't be cleared
first. I added mem_destroy() for this mem, though it will work as mem_clear()
due to numbers being scalars.

> > +		if (mem == NULL) {
> > +			ctx->is_aborted = true;
> > +			return;
> > +		}
> > +		count = (uint32_t *)(mem + 1);
> > +		mem_create(mem);
> > +		*count = 1;
> > +		mem_copy_as_ephemeral(mem, argv[0]);
> > +		mem_set_bin_allocated(ctx->pMem, (char *)mem, size);
> > +		return;
> > +	}
> > diff --git a/test/sql-tap/built-in-functions.test.lua b/test/sql-tap/built-in-functions.test.lua
> > index 507d06549..08a63b86d 100755
> > --- a/test/sql-tap/built-in-functions.test.lua
> > +++ b/test/sql-tap/built-in-functions.test.lua
> > @@ -605,4 +605,33 @@ test:do_execsql_test(
> >      }
> >  )
> >  
> > +-- Make sure AVG() accepts and returns DOUBLE by default.
> > +test:do_test(
> > +    "builtins-4.1.1",
> > +    function()
> > +        return box.execute([[SELECT AVG(?);]], {1}).metadata
> > +    end, {
> > +        {name = "COLUMN_1", type = "double"},
> > +    })
> > +
> > +test:do_test(
> > +    "builtins-4.1.2",
> > +    function()
> > +        local res = {pcall(box.execute, [[SELECT AVG(?);]], {-1ULL})}
> > +        return {tostring(res[3])}
> > +    end, {
> > +        "Type mismatch: can not convert integer(18446744073709551615) to double"
> > +    })
> > +
> > +-- Make sure AVG() works with DECIMAL properly.
> > +test:do_execsql_test(
> > +    "builtins-4.1.3",
> > +    [[
> > +        SELECT AVG(cast(column_2 as DECIMAL)) from (values(1), (123.432));
> 
> 2. I don't understand how does it work. Why not column_1? Why does it fail?
> 
> tarantool> box.execute('SELECT column_1 from (values(1), (123.432));')
> ---
> - null
> - Can’t resolve field 'COLUMN_1'
> ...
> 
> Isn't there 2 tuples [1], and [123.432] with just 1 column each?
> And why does it work when I delete one of the values?
> 
> tarantool> box.execute('SELECT column_1 from (values(1));')
> ---
> - metadata:
>   - name: COLUMN_1
>     type: integer
>   rows:
>   - [1]
> ...
> 
I believe this is a bug that was introduced in patch
7bfcf57e44028ea425d0d274b9ca402c196716f9 .

Before patch:

tarantool> box.execute([[values(1), (2), (3);]])
---
- metadata:
  - name: column1
    type: integer
  rows:
  - [1]
  - [2]
  - [3]
...

tarantool> box.execute([[values(1), (2), (3), (4);]])
---
- metadata:
  - name: column1
    type: integer
  rows:
  - [1]
  - [2]
  - [3]
  - [4]
...

After patch:

tarantool> box.execute([[values(1), (2), (3);]])
---
- metadata:
  - name: COLUMN_3
    type: integer
  rows:
  - [1]
  - [2]
  - [3]
...

tarantool> box.execute([[values(1), (2), (3), (4);]])
---
- metadata:
  - name: COLUMN_4
    type: integer
  rows:
  - [1]
  - [2]
  - [3]
  - [4]
...

Not sure if there is an issue for this.

> > +    ]],
> > +    {
> > +        dec.new(62.216)
> > +    }
> > +)
> > +
> >  test:finish_test()
> > 


Diff:

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 77a8d637a..e436ffbe1 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -108,12 +108,12 @@ step_avg(struct sql_context *ctx, int argc, struct Mem **argv)
 {
 	assert(argc == 1);
 	(void)argc;
-	assert(ctx->pMem->type == MEM_TYPE_NULL || mem_is_bin(ctx->pMem));
-	if (argv[0]->type == MEM_TYPE_NULL)
+	assert(mem_is_null(ctx->pMem) || mem_is_bin(ctx->pMem));
+	if (mem_is_null(argv[0]))
 		return;
 	struct Mem *mem;
 	uint32_t *count;
-	if (ctx->pMem->type == MEM_TYPE_NULL) {
+	if (mem_is_null(ctx->pMem)) {
 		uint32_t size = sizeof(struct Mem) + sizeof(uint32_t);
 		mem = sqlDbMallocRawNN(sql_get(), size);
 		if (mem == NULL) {
@@ -138,15 +138,19 @@ step_avg(struct sql_context *ctx, int argc, struct Mem **argv)
 static void
 fin_avg(struct sql_context *ctx)
 {
-	assert(ctx->pMem->type == MEM_TYPE_NULL || mem_is_bin(ctx->pMem));
-	if (ctx->pMem->type == MEM_TYPE_NULL)
+	assert(mem_is_null(ctx->pMem) || mem_is_bin(ctx->pMem));
+	if (mem_is_null(ctx->pMem))
 		return mem_set_null(ctx->pOut);
-	struct Mem *mem = (struct Mem *)ctx->pMem->z;
-	uint32_t *count = (uint32_t *)(mem + 1);
-	struct Mem mem_count;
-	mem_create(&mem_count);
-	mem_set_uint(&mem_count, *count);
-	if (mem_div(mem, &mem_count, ctx->pOut) != 0)
+	struct Mem *tmp = (struct Mem *)ctx->pMem->z;
+	uint32_t *count_val = (uint32_t *)(tmp + 1);
+	struct Mem sum;
+	mem_create(&sum);
+	mem_copy_as_ephemeral(&sum, tmp);
+	mem_destroy(tmp);
+	struct Mem count;
+	mem_create(&count);
+	mem_set_uint(&count, *count_val);
+	if (mem_div(&sum, &count, ctx->pOut) != 0)
 		ctx->is_aborted = true;
 }
 

New patch:

commit 2b5ae06ceadff5e121e4508ffc2b7913f6d84e79
Author: Mergen Imeev <imeevma@gmail.com>
Date:   Thu Sep 9 18:19:53 2021 +0300

    sql: rework AVG()
    
    This patch makes AVG() accept DOUBLE values by default. Also, after this
    patch AVG() will be able to work with DECIMAL values.
    
    Part of #4145
    Part of #6355

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index d0606744c..e436ffbe1 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -102,6 +102,58 @@ fin_total(struct sql_context *ctx)
 		mem_copy_as_ephemeral(ctx->pOut, ctx->pMem);
 }
 
+/** Implementation of the AVG() function. */
+static void
+step_avg(struct sql_context *ctx, int argc, struct Mem **argv)
+{
+	assert(argc == 1);
+	(void)argc;
+	assert(mem_is_null(ctx->pMem) || mem_is_bin(ctx->pMem));
+	if (mem_is_null(argv[0]))
+		return;
+	struct Mem *mem;
+	uint32_t *count;
+	if (mem_is_null(ctx->pMem)) {
+		uint32_t size = sizeof(struct Mem) + sizeof(uint32_t);
+		mem = sqlDbMallocRawNN(sql_get(), size);
+		if (mem == NULL) {
+			ctx->is_aborted = true;
+			return;
+		}
+		count = (uint32_t *)(mem + 1);
+		mem_create(mem);
+		*count = 1;
+		mem_copy_as_ephemeral(mem, argv[0]);
+		mem_set_bin_allocated(ctx->pMem, (char *)mem, size);
+		return;
+	}
+	mem = (struct Mem *)ctx->pMem->z;
+	count = (uint32_t *)(mem + 1);
+	++*count;
+	if (mem_add(mem, argv[0], mem) != 0)
+		ctx->is_aborted = true;
+}
+
+/** Finalizer for the AVG() function. */
+static void
+fin_avg(struct sql_context *ctx)
+{
+	assert(mem_is_null(ctx->pMem) || mem_is_bin(ctx->pMem));
+	if (mem_is_null(ctx->pMem))
+		return mem_set_null(ctx->pOut);
+	struct Mem *tmp = (struct Mem *)ctx->pMem->z;
+	uint32_t *count_val = (uint32_t *)(tmp + 1);
+	struct Mem sum;
+	mem_create(&sum);
+	mem_copy_as_ephemeral(&sum, tmp);
+	mem_destroy(tmp);
+	struct Mem count;
+	mem_create(&count);
+	mem_set_uint(&count, *count_val);
+	if (mem_div(&sum, &count, ctx->pOut) != 0)
+		ctx->is_aborted = true;
+}
+
 static const unsigned char *
 mem_as_ustr(struct Mem *mem)
 {
@@ -1656,69 +1708,6 @@ soundexFunc(sql_context * context, int argc, sql_value ** argv)
 	}
 }
 
-/*
- * An instance of the following structure holds the context of a
- * sum() or avg() aggregate computation.
- */
-typedef struct SumCtx SumCtx;
-struct SumCtx {
-	struct Mem mem;
-	uint32_t count;
-};
-
-/*
- * Routines used to compute the sum, average, and total.
- *
- * The SUM() function follows the (broken) SQL standard which means
- * that it returns NULL if it sums over no inputs.  TOTAL returns
- * 0.0 in that case.  In addition, TOTAL always returns a float where
- * SUM might return an integer if it never encounters a floating point
- * value.  TOTAL never fails, but SUM might through an exception if
- * it overflows an integer.
- */
-static void
-sum_step(struct sql_context *context, int argc, sql_value **argv)
-{
-	assert(argc == 1);
-	UNUSED_PARAMETER(argc);
-	struct SumCtx *p = sql_aggregate_context(context, sizeof(*p));
-	if (p == NULL) {
-		context->is_aborted = true;
-		return;
-	}
-	if (p->count == 0) {
-		mem_create(&p->mem);
-		assert(context->func->def->returns == FIELD_TYPE_INTEGER ||
-		       context->func->def->returns == FIELD_TYPE_DOUBLE);
-		if (context->func->def->returns == FIELD_TYPE_INTEGER)
-			mem_set_uint(&p->mem, 0);
-		else
-			mem_set_double(&p->mem, 0.0);
-	}
-	if (argv[0]->type == MEM_TYPE_NULL)
-		return;
-	++p->count;
-	assert(mem_is_num(argv[0]));
-	if (mem_add(&p->mem, argv[0], &p->mem) != 0)
-		context->is_aborted = true;
-}
-
-static void
-avgFinalize(sql_context * context)
-{
-	SumCtx *p;
-	p = sql_aggregate_context(context, 0);
-	if (p == NULL || p->count == 0) {
-		mem_set_null(context->pOut);
-		return;
-	}
-	struct Mem mem;
-	mem_create(&mem);
-	mem_set_uint(&mem, p->count);
-	if (mem_div(&p->mem, &mem, context->pOut) != 0)
-		context->is_aborted = true;
-}
-
 /*
  * The following structure keeps track of state information for the
  * count() aggregate function.
@@ -2015,8 +2004,9 @@ struct sql_func_definition {
 static struct sql_func_definition definitions[] = {
 	{"ABS", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_INTEGER, absFunc, NULL},
 	{"ABS", 1, {FIELD_TYPE_DOUBLE}, FIELD_TYPE_DOUBLE, absFunc, NULL},
-	{"AVG", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_INTEGER, sum_step, avgFinalize},
-	{"AVG", 1, {FIELD_TYPE_DOUBLE}, FIELD_TYPE_DOUBLE, sum_step, avgFinalize},
+	{"AVG", 1, {FIELD_TYPE_DOUBLE}, FIELD_TYPE_DOUBLE, step_avg, fin_avg},
+	{"AVG", 1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_INTEGER, step_avg, fin_avg},
+	{"AVG", 1, {FIELD_TYPE_DECIMAL}, FIELD_TYPE_DECIMAL, step_avg, fin_avg},
 	{"CHAR", -1, {FIELD_TYPE_INTEGER}, FIELD_TYPE_STRING, charFunc, NULL},
 	{"CHAR_LENGTH", 1, {FIELD_TYPE_STRING}, FIELD_TYPE_INTEGER, lengthFunc,
 	 NULL},
diff --git a/test/sql-tap/built-in-functions.test.lua b/test/sql-tap/built-in-functions.test.lua
index 507d06549..08a63b86d 100755
--- a/test/sql-tap/built-in-functions.test.lua
+++ b/test/sql-tap/built-in-functions.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 local test = require("sqltester")
-test:plan(58)
+test:plan(61)
 
 local dec = require('decimal')
 
@@ -477,7 +477,7 @@ test:do_test(
         local res = {pcall(box.execute, [[SELECT AVG(?);]], {'1'})}
         return {tostring(res[3])}
     end, {
-        "Type mismatch: can not convert string('1') to integer"
+        "Type mismatch: can not convert string('1') to double"
     })
 
 test:do_catchsql_test(
@@ -605,4 +605,33 @@ test:do_execsql_test(
     }
 )
 
+-- Make sure AVG() accepts and returns DOUBLE by default.
+test:do_test(
+    "builtins-4.1.1",
+    function()
+        return box.execute([[SELECT AVG(?);]], {1}).metadata
+    end, {
+        {name = "COLUMN_1", type = "double"},
+    })
+
+test:do_test(
+    "builtins-4.1.2",
+    function()
+        local res = {pcall(box.execute, [[SELECT AVG(?);]], {-1ULL})}
+        return {tostring(res[3])}
+    end, {
+        "Type mismatch: can not convert integer(18446744073709551615) to double"
+    })
+
+-- Make sure AVG() works with DECIMAL properly.
+test:do_execsql_test(
+    "builtins-4.1.3",
+    [[
+        SELECT AVG(cast(column_2 as DECIMAL)) from (values(1), (123.432));
+    ]],
+    {
+        dec.new(62.216)
+    }
+)
+
 test:finish_test()

  reply	other threads:[~2021-09-25 11:32 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <cover.1632220375.git.imeevma@gmail.com>
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 01/15] sql: fix possible undefined behavior during cast Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 02/15] sql: use register P1 for number of arguments Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 04/15] sql: move collation to struct sql_context Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 05/15] sql: introduce mem_append() Mergen Imeev via Tarantool-patches
2021-09-25 11:06   ` Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 06/15] sql: remove sql_vdbemem_finalize() Mergen Imeev via Tarantool-patches
2021-09-22 22:47   ` Vladislav Shpilevoy via Tarantool-patches
2021-09-25 11:13     ` Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 07/15] sql: rework SUM() Mergen Imeev via Tarantool-patches
2021-09-22 22:48   ` Vladislav Shpilevoy via Tarantool-patches
2021-09-25 11:17     ` Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 08/15] sql: rework TOTAL() Mergen Imeev via Tarantool-patches
2021-09-25 11:20   ` Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 09/15] sql: rework AVG() Mergen Imeev via Tarantool-patches
2021-09-22 22:48   ` Vladislav Shpilevoy via Tarantool-patches
2021-09-25 11:32     ` Mergen Imeev via Tarantool-patches [this message]
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 10/15] sql: rework COUNT() Mergen Imeev via Tarantool-patches
2021-09-25 11:34   ` Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 11/15] sql: rework MIN() and MAX() Mergen Imeev via Tarantool-patches
2021-09-25 11:36   ` Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 12/15] sql: rework GROUP_CONCAT() Mergen Imeev via Tarantool-patches
2021-09-22 22:49   ` Vladislav Shpilevoy via Tarantool-patches
2021-09-25 11:42     ` Mergen Imeev via Tarantool-patches
2021-09-29  7:03       ` Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 13/15] sql: remove copying of result in finalizers Mergen Imeev via Tarantool-patches
2021-09-22 22:50   ` Vladislav Shpilevoy via Tarantool-patches
2021-09-25 11:47     ` Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 14/15] sql: remove MEM_TYPE_AGG Mergen Imeev via Tarantool-patches
2021-09-21 10:59 ` [Tarantool-patches] [PATCH v2 15/15] sql: remove field argv from struct sql_context Mergen Imeev via Tarantool-patches
2021-09-22 22:51   ` Vladislav Shpilevoy via Tarantool-patches
2021-09-25 12:03     ` Mergen Imeev via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210925113230.GF290467@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=imeevma@tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v2 09/15] sql: rework AVG()' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox