From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <tarantool-patches-bounce@freelists.org>
Received: from localhost (localhost [127.0.0.1])
	by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 5240729E14
	for <tarantool-patches@freelists.org>; Tue,  9 Apr 2019 10:52:43 -0400 (EDT)
Received: from turing.freelists.org ([127.0.0.1])
	by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024)
	with ESMTP id 4TK-A7O7bZqh for <tarantool-patches@freelists.org>;
	Tue,  9 Apr 2019 10:52:43 -0400 (EDT)
Received: from smtpng2.m.smailru.net (smtpng2.m.smailru.net [94.100.179.3])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id D2C2A29DF7
	for <tarantool-patches@freelists.org>; Tue,  9 Apr 2019 10:52:42 -0400 (EDT)
Content-Type: text/plain;
	charset=utf-8
Mime-Version: 1.0 (Mac OS X Mail 12.4 \(3445.104.8\))
Subject: [tarantool-patches] Re: [PATCH 2/2] sql: make aggregate functions
 types more strict
From: "n.pettik" <korablev@tarantool.org>
In-Reply-To: <49e4ae0bc187dc02f908427692c0ddb2cc2d36a8.1554475881.git.ivan.koptelov@tarantool.org>
Date: Tue, 9 Apr 2019 17:52:41 +0300
Content-Transfer-Encoding: quoted-printable
Message-Id: <A663A7E7-1209-4057-97B2-218F45730DB9@tarantool.org>
References: <cover.1554475881.git.ivan.koptelov@tarantool.org>
 <49e4ae0bc187dc02f908427692c0ddb2cc2d36a8.1554475881.git.ivan.koptelov@tarantool.org>
Sender: tarantool-patches-bounce@freelists.org
Errors-to: tarantool-patches-bounce@freelists.org
Reply-To: tarantool-patches@freelists.org
List-Help: <mailto:ecartis@freelists.org?Subject=help>
List-Unsubscribe: <mailto:tarantool-patches-request@freelists.org?Subject=unsubscribe>
List-software: Ecartis version 1.0.0
List-Id: tarantool-patches <tarantool-patches.freelists.org>
List-Subscribe: <mailto:tarantool-patches-request@freelists.org?Subject=subscribe>
List-Owner: <mailto:>
List-post: <mailto:tarantool-patches@freelists.org>
List-Archive: <http://www.freelists.org/archives/tarantool-patches>
To: tarantool-patches@freelists.org
Cc: Ivan Koptelov <ivan.koptelov@tarantool.org>


Firstly, please consider Konstantin=E2=80=99s comments.

Then, I=E2=80=99ve found assertion fault:

tarantool> create table t(id int primary key, b scalar)
tarantool> insert into t values(1, 1), (2, '1'), (3, 4.123)
tarantool> select min(b) from t
Assertion failed: (((p->szMalloc > 0 && p->z =3D=3D p->zMalloc) ? 1 : 0) =
+ ((p->flags & MEM_Dyn) !=3D 0 ? 1 : 0) + ((p->flags & MEM_Ephem) !=3D 0 =
? 1 : 0) + ((p->flags & MEM_Static) !=3D 0 ? 1 : 0) =3D=3D 1), function =
sqlVdbeCheckMemInvariants, file =
/Users/n.pettik/tarantool/src/box/sql/vdbemem.c, line 87.
Abort trap: 6

tarantool> create table t(id int primary key, a int, b text)
tarantool> insert into t values(1,1,'1=E2=80=99)
tarantool> select sum(b) from t
---
- metadata:
  - name: sum(b)
    type: number
  rows:
  - [1]
=E2=80=A6

So, your statement below that SUM accepts only numeric arguments
is false. If string can be converted to number, it is OK:

tarantool> select a=3Db from t;
---
- metadata:
  - name: a=3Db
    type: integer
  rows:
  - [1]
=E2=80=A6

Next:

tarantool> select min(b) from t
---
- metadata:
  - name: min(b)
    type: scalar
  rows:
  - ['1']
=E2=80=A6

Could you please fix this behaviour (scalar->int) in a separate patch?
If it is not that easy, file a ticket.

> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index b1bfc886e..379f28252 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -45,6 +45,17 @@
> #include <unicode/uchar.h>
> #include <unicode/ucol.h>
>=20
> +/*

/**

> + * This structure is for keeping context during work of
> + * aggregate function.
> + */
> +struct aggregate_context {

Let=E2=80=99s change name to minmax_context.

> +    /** Value being aggregated. (e.g. current MAX or current counter =
value). */
> +    Mem value;
> +    /** Reference value to keep track of previous argument's type. */
> +    Mem reference_value;
> +};

I agree that keeping struct Men is too much (especially as values not as =
pointers).
We need only type of that value. What is more, you need only one member:
=E2=80=9Cvalue being aggregated=E2=80=9D comes from argv[0].

> @@ -1588,16 +1604,57 @@ static void
> minmaxStep(sql_context *context, int not_used, sql_value **argv)
> {
> 	UNUSED_PARAMETER(not_used);
> -	sql_value *arg =3D argv[0];
> -	sql_value *best =3D sql_aggregate_context(context, =
sizeof(*best));
> +	struct aggregate_context *aggr_context =3D
> +		(struct aggregate_context *) =
sql_aggregate_context(context,
> +								   =
sizeof(*aggr_context));

This way looks better:

@@ -1604,9 +1604,8 @@ static void
 minmaxStep(sql_context *context, int not_used, sql_value **argv)
 {
        UNUSED_PARAMETER(not_used);
-       struct aggregate_context *aggr_context =3D
-               (struct aggregate_context *) =
sql_aggregate_context(context,
-                                                                  =
sizeof(*aggr_context));
+       struct aggregate_context *aggr_context =3D (struct =
aggregate_context *)
+               sql_aggregate_context(context, sizeof(*aggr_context));
        if (aggr_context =3D=3D NULL)
                retur

> +	if (aggr_context =3D=3D NULL)
> +		return;
>=20
> +	sql_value *arg =3D argv[0];
> +	sql_value *best =3D &(aggr_context->value);
> 	if (best =3D=3D NULL)
> 		return;
>=20
> -	if (sql_value_type(argv[0]) =3D=3D SQL_NULL) {
> +	enum sql_type sql_type =3D sql_value_type(arg);
> +
> +	if (sql_type =3D=3D SQL_NULL) {
> 		if (best->flags !=3D 0)
> 			sqlSkipAccumulatorLoad(context);
> 	} else if (best->flags !=3D 0) {
> +		/*
> +		 * During proceeding of the function, arguments
> +		 * of different types may be encountered (if
> +		 * SCALAR type column is proceeded). Some types
> +		 * are compatible (INTEGER and FLOAT) and others
> +		 * are not (TEXT and BLOB are not compatible with
> +		 * any other type). In the later case an error
> +		 * is raised.
> +		 */
> +		sql_value *reference_value =3D =
&aggr_context->reference_value;
> +		if (reference_value->flags =3D=3D 0)
> +			sqlVdbeMemCopy(reference_value, arg);

AFAIR flags =3D=3D 0 is a situation when we are processing first
entry (i.e. aggregate_context is initialised right now).

> +		enum sql_type ref_sql_type =3D =
sql_value_type(reference_value);
> +
> +		bool is_compatible =3D true;

-> types_are_compatible

> +		if (sql_type !=3D ref_sql_type) {
> +			is_compatible =3D false;
> +			if ((sql_type =3D=3D SQL_INTEGER || sql_type =3D=3D=
 SQL_FLOAT) &&
> +			    (ref_sql_type =3D=3D SQL_INTEGER ||
> +			     ref_sql_type =3D=3D SQL_FLOAT)) {
> +				is_compatible =3D true;
> +			}
> +		}
> +		if (!is_compatible) {
> +			diag_set(ClientError, ER_INCONSISTENT_TYPES,
> +				 mem_type_to_str(reference_value),
> +				 mem_type_to_str(arg));
> +			context->fErrorOrAux =3D 1;
> +			context->isError =3D SQL_TARANTOOL_ERROR;
> +			sqlVdbeMemRelease(best);
> +			sqlVdbeMemRelease(reference_value);

Both of these values are allocated in scope of aggregate context.
I guess there=E2=80=99s no need to provide any cleanup on these =
variables.

> +			return;
> +		}
> +
> 		int max;
> 		int cmp;
> 		struct coll *coll =3D sqlGetFuncCollSeq(context);
> @@ -1625,13 +1682,16 @@ minmaxStep(sql_context *context, int not_used, =
sql_value **argv)
> static void
> minMaxFinalize(sql_context * context)
> {
> -	sql_value *pRes;
> -	pRes =3D (sql_value *) sql_aggregate_context(context, 0);
> +	struct aggregate_context *func_context =3D
> +		(struct aggregate_context *) =
sql_aggregate_context(context, sizeof(*func_context));
> +	sql_value *pRes =3D &(func_context->value);
> +
> 	if (pRes !=3D NULL) {
> 		if (pRes->flags !=3D 0) {
> 			sql_result_value(context, pRes);
> 		}
> 		sqlVdbeMemRelease(pRes);
> +		sqlVdbeMemRelease(&func_context->reference_value);
> 	}
> }
>=20
> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
> index b1ec8c758..624083b22 100644
> --- a/src/box/sql/select.c
> +++ b/src/box/sql/select.c
> @@ -4340,13 +4340,22 @@ pushDownWhereTerms(Parse * pParse,	/* Parse =
context (for malloc() and error repo
>  * argument, this function checks if the following are true:
>  *
>  *    * the query contains just a single aggregate function,
> - *    * the aggregate function is either min() or max(), and
> - *    * the argument to the aggregate function is a column value.
> + *    * the aggregate function is either min() or max(),
> + *    * the argument to the aggregate function is a column value,
> + *    * the type of column is not SCALAR.
>  *
>  * If all of the above are true, then WHERE_ORDERBY_MIN or =
WHERE_ORDERBY_MAX
>  * is returned as appropriate. Also, *ppMinMax is set to point to the
>  * list of arguments passed to the aggregate before returning.
>  *
> + * The requirement of column type not being SCALAR follows from
> + * the purpose of the function. The purpose of the function is
> + * to answer the question: "Should MIN/MAX call be optimised by
> + * using ORDER ON clause code?" If the type of column is SCALAR

There=E2=80=99s no ORDER ON clause.

> + * then we have to iterate over all rows to check if their types
> + * are compatible. Hence, the optimisation should not be used.
> + * For details please see: =
https://github.com/tarantool/tarantool/issues/4032
> + *
>  * Or, if the conditions above are not met, *ppMinMax is set to 0 and
>  * WHERE_ORDERBY_NORMAL is returned.
>  */
> @@ -4364,6 +4373,8 @@ minMaxQuery(AggInfo * pAggInfo, ExprList ** =
ppMinMax)
> 		if (pEList && pEList->nExpr =3D=3D 1
> 		    && pEList->a[0].pExpr->op =3D=3D TK_AGG_COLUMN) {
> 			const char *zFunc =3D pExpr->u.zToken;
> +			if (sql_expr_type(pEList->a[0].pExpr) =3D=3D =
FIELD_TYPE_SCALAR)
> +				return eRet;

I see no difference between byte code generated for SCALAR and other
columns (SELECT MIN(a) FROM t;). Investigate this case please.

> 			if (sqlStrICmp(zFunc, "min") =3D=3D 0) {
> 				eRet =3D WHERE_ORDERBY_MIN;
> 				*ppMinMax =3D pEList;
>=20
> diff --git a/test/sql-tap/select1.test.lua =
b/test/sql-tap/select1.test.lua
> index be63e815d..a4ce19d73 100755
> --- a/test/sql-tap/select1.test.lua
> +++ b/test/sql-tap/select1.test.lua
> @@ -1,6 +1,6 @@
> #!/usr/bin/env tarantool
> test =3D require("sqltester")
> -test:plan(173)
> +test:plan(172)
>=20
> --!./tcltestrunner.lua
> -- ["set","testdir",[["file","dirname",["argv0"]]]]
> @@ -503,16 +503,6 @@ test:do_catchsql_test(
>         -- </select1-2.17>
>     })
>=20
> -test:do_execsql_test(
> -    "select1-2.17.1",
> -    [[
> -        SELECT sum(a) FROM t3
> -    ]], {
> -        -- <select1-2.17.1>
> -        44.0
> -        -- </select1-2.17.1>
> -    })

Don=E2=80=99t delete test, just change it to catch an error.

> -
> test:do_catchsql_test(
>     "select1-2.18=E2=80=9D