From: "n.pettik" <korablev@tarantool.org>
To: tarantool-patches@freelists.org
Cc: Ivan Koptelov <ivan.koptelov@tarantool.org>
Subject: [tarantool-patches] Re: [PATCH 2/2] sql: make aggregate functions types more strict
Date: Tue, 9 Apr 2019 17:52:41 +0300 [thread overview]
Message-ID: <A663A7E7-1209-4057-97B2-218F45730DB9@tarantool.org> (raw)
In-Reply-To: <49e4ae0bc187dc02f908427692c0ddb2cc2d36a8.1554475881.git.ivan.koptelov@tarantool.org>
Firstly, please consider Konstantin’s comments.
Then, I’ve found assertion fault:
tarantool> create table t(id int primary key, b scalar)
tarantool> insert into t values(1, 1), (2, '1'), (3, 4.123)
tarantool> select min(b) from t
Assertion failed: (((p->szMalloc > 0 && p->z == p->zMalloc) ? 1 : 0) + ((p->flags & MEM_Dyn) != 0 ? 1 : 0) + ((p->flags & MEM_Ephem) != 0 ? 1 : 0) + ((p->flags & MEM_Static) != 0 ? 1 : 0) == 1), function sqlVdbeCheckMemInvariants, file /Users/n.pettik/tarantool/src/box/sql/vdbemem.c, line 87.
Abort trap: 6
tarantool> create table t(id int primary key, a int, b text)
tarantool> insert into t values(1,1,'1’)
tarantool> select sum(b) from t
---
- metadata:
- name: sum(b)
type: number
rows:
- [1]
…
So, your statement below that SUM accepts only numeric arguments
is false. If string can be converted to number, it is OK:
tarantool> select a=b from t;
---
- metadata:
- name: a=b
type: integer
rows:
- [1]
…
Next:
tarantool> select min(b) from t
---
- metadata:
- name: min(b)
type: scalar
rows:
- ['1']
…
Could you please fix this behaviour (scalar->int) in a separate patch?
If it is not that easy, file a ticket.
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index b1bfc886e..379f28252 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -45,6 +45,17 @@
> #include <unicode/uchar.h>
> #include <unicode/ucol.h>
>
> +/*
/**
> + * This structure is for keeping context during work of
> + * aggregate function.
> + */
> +struct aggregate_context {
Let’s change name to minmax_context.
> + /** Value being aggregated. (e.g. current MAX or current counter value). */
> + Mem value;
> + /** Reference value to keep track of previous argument's type. */
> + Mem reference_value;
> +};
I agree that keeping struct Men is too much (especially as values not as pointers).
We need only type of that value. What is more, you need only one member:
“value being aggregated” comes from argv[0].
> @@ -1588,16 +1604,57 @@ static void
> minmaxStep(sql_context *context, int not_used, sql_value **argv)
> {
> UNUSED_PARAMETER(not_used);
> - sql_value *arg = argv[0];
> - sql_value *best = sql_aggregate_context(context, sizeof(*best));
> + struct aggregate_context *aggr_context =
> + (struct aggregate_context *) sql_aggregate_context(context,
> + sizeof(*aggr_context));
This way looks better:
@@ -1604,9 +1604,8 @@ static void
minmaxStep(sql_context *context, int not_used, sql_value **argv)
{
UNUSED_PARAMETER(not_used);
- struct aggregate_context *aggr_context =
- (struct aggregate_context *) sql_aggregate_context(context,
- sizeof(*aggr_context));
+ struct aggregate_context *aggr_context = (struct aggregate_context *)
+ sql_aggregate_context(context, sizeof(*aggr_context));
if (aggr_context == NULL)
retur
> + if (aggr_context == NULL)
> + return;
>
> + sql_value *arg = argv[0];
> + sql_value *best = &(aggr_context->value);
> if (best == NULL)
> return;
>
> - if (sql_value_type(argv[0]) == SQL_NULL) {
> + enum sql_type sql_type = sql_value_type(arg);
> +
> + if (sql_type == SQL_NULL) {
> if (best->flags != 0)
> sqlSkipAccumulatorLoad(context);
> } else if (best->flags != 0) {
> + /*
> + * During proceeding of the function, arguments
> + * of different types may be encountered (if
> + * SCALAR type column is proceeded). Some types
> + * are compatible (INTEGER and FLOAT) and others
> + * are not (TEXT and BLOB are not compatible with
> + * any other type). In the later case an error
> + * is raised.
> + */
> + sql_value *reference_value = &aggr_context->reference_value;
> + if (reference_value->flags == 0)
> + sqlVdbeMemCopy(reference_value, arg);
AFAIR flags == 0 is a situation when we are processing first
entry (i.e. aggregate_context is initialised right now).
> + enum sql_type ref_sql_type = sql_value_type(reference_value);
> +
> + bool is_compatible = true;
-> types_are_compatible
> + if (sql_type != ref_sql_type) {
> + is_compatible = false;
> + if ((sql_type == SQL_INTEGER || sql_type == SQL_FLOAT) &&
> + (ref_sql_type == SQL_INTEGER ||
> + ref_sql_type == SQL_FLOAT)) {
> + is_compatible = true;
> + }
> + }
> + if (!is_compatible) {
> + diag_set(ClientError, ER_INCONSISTENT_TYPES,
> + mem_type_to_str(reference_value),
> + mem_type_to_str(arg));
> + context->fErrorOrAux = 1;
> + context->isError = SQL_TARANTOOL_ERROR;
> + sqlVdbeMemRelease(best);
> + sqlVdbeMemRelease(reference_value);
Both of these values are allocated in scope of aggregate context.
I guess there’s no need to provide any cleanup on these variables.
> + return;
> + }
> +
> int max;
> int cmp;
> struct coll *coll = sqlGetFuncCollSeq(context);
> @@ -1625,13 +1682,16 @@ minmaxStep(sql_context *context, int not_used, sql_value **argv)
> static void
> minMaxFinalize(sql_context * context)
> {
> - sql_value *pRes;
> - pRes = (sql_value *) sql_aggregate_context(context, 0);
> + struct aggregate_context *func_context =
> + (struct aggregate_context *) sql_aggregate_context(context, sizeof(*func_context));
> + sql_value *pRes = &(func_context->value);
> +
> if (pRes != NULL) {
> if (pRes->flags != 0) {
> sql_result_value(context, pRes);
> }
> sqlVdbeMemRelease(pRes);
> + sqlVdbeMemRelease(&func_context->reference_value);
> }
> }
>
> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
> index b1ec8c758..624083b22 100644
> --- a/src/box/sql/select.c
> +++ b/src/box/sql/select.c
> @@ -4340,13 +4340,22 @@ pushDownWhereTerms(Parse * pParse, /* Parse context (for malloc() and error repo
> * argument, this function checks if the following are true:
> *
> * * the query contains just a single aggregate function,
> - * * the aggregate function is either min() or max(), and
> - * * the argument to the aggregate function is a column value.
> + * * the aggregate function is either min() or max(),
> + * * the argument to the aggregate function is a column value,
> + * * the type of column is not SCALAR.
> *
> * If all of the above are true, then WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX
> * is returned as appropriate. Also, *ppMinMax is set to point to the
> * list of arguments passed to the aggregate before returning.
> *
> + * The requirement of column type not being SCALAR follows from
> + * the purpose of the function. The purpose of the function is
> + * to answer the question: "Should MIN/MAX call be optimised by
> + * using ORDER ON clause code?" If the type of column is SCALAR
There’s no ORDER ON clause.
> + * then we have to iterate over all rows to check if their types
> + * are compatible. Hence, the optimisation should not be used.
> + * For details please see: https://github.com/tarantool/tarantool/issues/4032
> + *
> * Or, if the conditions above are not met, *ppMinMax is set to 0 and
> * WHERE_ORDERBY_NORMAL is returned.
> */
> @@ -4364,6 +4373,8 @@ minMaxQuery(AggInfo * pAggInfo, ExprList ** ppMinMax)
> if (pEList && pEList->nExpr == 1
> && pEList->a[0].pExpr->op == TK_AGG_COLUMN) {
> const char *zFunc = pExpr->u.zToken;
> + if (sql_expr_type(pEList->a[0].pExpr) == FIELD_TYPE_SCALAR)
> + return eRet;
I see no difference between byte code generated for SCALAR and other
columns (SELECT MIN(a) FROM t;). Investigate this case please.
> if (sqlStrICmp(zFunc, "min") == 0) {
> eRet = WHERE_ORDERBY_MIN;
> *ppMinMax = pEList;
>
> diff --git a/test/sql-tap/select1.test.lua b/test/sql-tap/select1.test.lua
> index be63e815d..a4ce19d73 100755
> --- a/test/sql-tap/select1.test.lua
> +++ b/test/sql-tap/select1.test.lua
> @@ -1,6 +1,6 @@
> #!/usr/bin/env tarantool
> test = require("sqltester")
> -test:plan(173)
> +test:plan(172)
>
> --!./tcltestrunner.lua
> -- ["set","testdir",[["file","dirname",["argv0"]]]]
> @@ -503,16 +503,6 @@ test:do_catchsql_test(
> -- </select1-2.17>
> })
>
> -test:do_execsql_test(
> - "select1-2.17.1",
> - [[
> - SELECT sum(a) FROM t3
> - ]], {
> - -- <select1-2.17.1>
> - 44.0
> - -- </select1-2.17.1>
> - })
Don’t delete test, just change it to catch an error.
> -
> test:do_catchsql_test(
> "select1-2.18”
next prev parent reply other threads:[~2019-04-09 14:52 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-05 14:57 [tarantool-patches] [PATCH 0/2] " Ivan Koptelov
2019-04-05 14:57 ` [tarantool-patches] [PATCH 1/2] " Ivan Koptelov
2019-04-09 14:52 ` [tarantool-patches] " n.pettik
2019-04-05 14:57 ` [tarantool-patches] [PATCH 2/2] " Ivan Koptelov
2019-04-05 19:48 ` [tarantool-patches] " Konstantin Osipov
2019-04-17 12:50 ` i.koptelov
2019-04-17 13:19 ` n.pettik
2019-04-09 14:52 ` n.pettik [this message]
2019-04-23 15:38 ` i.koptelov
2019-04-24 17:37 ` n.pettik
2019-05-06 13:24 ` i.koptelov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=A663A7E7-1209-4057-97B2-218F45730DB9@tarantool.org \
--to=korablev@tarantool.org \
--cc=ivan.koptelov@tarantool.org \
--cc=tarantool-patches@freelists.org \
--subject='[tarantool-patches] Re: [PATCH 2/2] sql: make aggregate functions types more strict' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox