[tarantool-patches] Re: [PATCH 2/2] sql: make aggregate functions types more strict
n.pettik
korablev at tarantool.org
Tue Apr 9 17:52:41 MSK 2019
Firstly, please consider Konstantin’s comments.
Then, I’ve found assertion fault:
tarantool> create table t(id int primary key, b scalar)
tarantool> insert into t values(1, 1), (2, '1'), (3, 4.123)
tarantool> select min(b) from t
Assertion failed: (((p->szMalloc > 0 && p->z == p->zMalloc) ? 1 : 0) + ((p->flags & MEM_Dyn) != 0 ? 1 : 0) + ((p->flags & MEM_Ephem) != 0 ? 1 : 0) + ((p->flags & MEM_Static) != 0 ? 1 : 0) == 1), function sqlVdbeCheckMemInvariants, file /Users/n.pettik/tarantool/src/box/sql/vdbemem.c, line 87.
Abort trap: 6
tarantool> create table t(id int primary key, a int, b text)
tarantool> insert into t values(1,1,'1’)
tarantool> select sum(b) from t
---
- metadata:
- name: sum(b)
type: number
rows:
- [1]
…
So, your statement below that SUM accepts only numeric arguments
is false. If string can be converted to number, it is OK:
tarantool> select a=b from t;
---
- metadata:
- name: a=b
type: integer
rows:
- [1]
…
Next:
tarantool> select min(b) from t
---
- metadata:
- name: min(b)
type: scalar
rows:
- ['1']
…
Could you please fix this behaviour (scalar->int) in a separate patch?
If it is not that easy, file a ticket.
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index b1bfc886e..379f28252 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -45,6 +45,17 @@
> #include <unicode/uchar.h>
> #include <unicode/ucol.h>
>
> +/*
/**
> + * This structure is for keeping context during work of
> + * aggregate function.
> + */
> +struct aggregate_context {
Let’s change name to minmax_context.
> + /** Value being aggregated. (e.g. current MAX or current counter value). */
> + Mem value;
> + /** Reference value to keep track of previous argument's type. */
> + Mem reference_value;
> +};
I agree that keeping struct Men is too much (especially as values not as pointers).
We need only type of that value. What is more, you need only one member:
“value being aggregated” comes from argv[0].
> @@ -1588,16 +1604,57 @@ static void
> minmaxStep(sql_context *context, int not_used, sql_value **argv)
> {
> UNUSED_PARAMETER(not_used);
> - sql_value *arg = argv[0];
> - sql_value *best = sql_aggregate_context(context, sizeof(*best));
> + struct aggregate_context *aggr_context =
> + (struct aggregate_context *) sql_aggregate_context(context,
> + sizeof(*aggr_context));
This way looks better:
@@ -1604,9 +1604,8 @@ static void
minmaxStep(sql_context *context, int not_used, sql_value **argv)
{
UNUSED_PARAMETER(not_used);
- struct aggregate_context *aggr_context =
- (struct aggregate_context *) sql_aggregate_context(context,
- sizeof(*aggr_context));
+ struct aggregate_context *aggr_context = (struct aggregate_context *)
+ sql_aggregate_context(context, sizeof(*aggr_context));
if (aggr_context == NULL)
retur
> + if (aggr_context == NULL)
> + return;
>
> + sql_value *arg = argv[0];
> + sql_value *best = &(aggr_context->value);
> if (best == NULL)
> return;
>
> - if (sql_value_type(argv[0]) == SQL_NULL) {
> + enum sql_type sql_type = sql_value_type(arg);
> +
> + if (sql_type == SQL_NULL) {
> if (best->flags != 0)
> sqlSkipAccumulatorLoad(context);
> } else if (best->flags != 0) {
> + /*
> + * During proceeding of the function, arguments
> + * of different types may be encountered (if
> + * SCALAR type column is proceeded). Some types
> + * are compatible (INTEGER and FLOAT) and others
> + * are not (TEXT and BLOB are not compatible with
> + * any other type). In the later case an error
> + * is raised.
> + */
> + sql_value *reference_value = &aggr_context->reference_value;
> + if (reference_value->flags == 0)
> + sqlVdbeMemCopy(reference_value, arg);
AFAIR flags == 0 is a situation when we are processing first
entry (i.e. aggregate_context is initialised right now).
> + enum sql_type ref_sql_type = sql_value_type(reference_value);
> +
> + bool is_compatible = true;
-> types_are_compatible
> + if (sql_type != ref_sql_type) {
> + is_compatible = false;
> + if ((sql_type == SQL_INTEGER || sql_type == SQL_FLOAT) &&
> + (ref_sql_type == SQL_INTEGER ||
> + ref_sql_type == SQL_FLOAT)) {
> + is_compatible = true;
> + }
> + }
> + if (!is_compatible) {
> + diag_set(ClientError, ER_INCONSISTENT_TYPES,
> + mem_type_to_str(reference_value),
> + mem_type_to_str(arg));
> + context->fErrorOrAux = 1;
> + context->isError = SQL_TARANTOOL_ERROR;
> + sqlVdbeMemRelease(best);
> + sqlVdbeMemRelease(reference_value);
Both of these values are allocated in scope of aggregate context.
I guess there’s no need to provide any cleanup on these variables.
> + return;
> + }
> +
> int max;
> int cmp;
> struct coll *coll = sqlGetFuncCollSeq(context);
> @@ -1625,13 +1682,16 @@ minmaxStep(sql_context *context, int not_used, sql_value **argv)
> static void
> minMaxFinalize(sql_context * context)
> {
> - sql_value *pRes;
> - pRes = (sql_value *) sql_aggregate_context(context, 0);
> + struct aggregate_context *func_context =
> + (struct aggregate_context *) sql_aggregate_context(context, sizeof(*func_context));
> + sql_value *pRes = &(func_context->value);
> +
> if (pRes != NULL) {
> if (pRes->flags != 0) {
> sql_result_value(context, pRes);
> }
> sqlVdbeMemRelease(pRes);
> + sqlVdbeMemRelease(&func_context->reference_value);
> }
> }
>
> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
> index b1ec8c758..624083b22 100644
> --- a/src/box/sql/select.c
> +++ b/src/box/sql/select.c
> @@ -4340,13 +4340,22 @@ pushDownWhereTerms(Parse * pParse, /* Parse context (for malloc() and error repo
> * argument, this function checks if the following are true:
> *
> * * the query contains just a single aggregate function,
> - * * the aggregate function is either min() or max(), and
> - * * the argument to the aggregate function is a column value.
> + * * the aggregate function is either min() or max(),
> + * * the argument to the aggregate function is a column value,
> + * * the type of column is not SCALAR.
> *
> * If all of the above are true, then WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX
> * is returned as appropriate. Also, *ppMinMax is set to point to the
> * list of arguments passed to the aggregate before returning.
> *
> + * The requirement of column type not being SCALAR follows from
> + * the purpose of the function. The purpose of the function is
> + * to answer the question: "Should MIN/MAX call be optimised by
> + * using ORDER ON clause code?" If the type of column is SCALAR
There’s no ORDER ON clause.
> + * then we have to iterate over all rows to check if their types
> + * are compatible. Hence, the optimisation should not be used.
> + * For details please see: https://github.com/tarantool/tarantool/issues/4032
> + *
> * Or, if the conditions above are not met, *ppMinMax is set to 0 and
> * WHERE_ORDERBY_NORMAL is returned.
> */
> @@ -4364,6 +4373,8 @@ minMaxQuery(AggInfo * pAggInfo, ExprList ** ppMinMax)
> if (pEList && pEList->nExpr == 1
> && pEList->a[0].pExpr->op == TK_AGG_COLUMN) {
> const char *zFunc = pExpr->u.zToken;
> + if (sql_expr_type(pEList->a[0].pExpr) == FIELD_TYPE_SCALAR)
> + return eRet;
I see no difference between byte code generated for SCALAR and other
columns (SELECT MIN(a) FROM t;). Investigate this case please.
> if (sqlStrICmp(zFunc, "min") == 0) {
> eRet = WHERE_ORDERBY_MIN;
> *ppMinMax = pEList;
>
> diff --git a/test/sql-tap/select1.test.lua b/test/sql-tap/select1.test.lua
> index be63e815d..a4ce19d73 100755
> --- a/test/sql-tap/select1.test.lua
> +++ b/test/sql-tap/select1.test.lua
> @@ -1,6 +1,6 @@
> #!/usr/bin/env tarantool
> test = require("sqltester")
> -test:plan(173)
> +test:plan(172)
>
> --!./tcltestrunner.lua
> -- ["set","testdir",[["file","dirname",["argv0"]]]]
> @@ -503,16 +503,6 @@ test:do_catchsql_test(
> -- </select1-2.17>
> })
>
> -test:do_execsql_test(
> - "select1-2.17.1",
> - [[
> - SELECT sum(a) FROM t3
> - ]], {
> - -- <select1-2.17.1>
> - 44.0
> - -- </select1-2.17.1>
> - })
Don’t delete test, just change it to catch an error.
> -
> test:do_catchsql_test(
> "select1-2.18”
More information about the Tarantool-patches
mailing list