From: "n.pettik" <korablev@tarantool.org> To: tarantool-patches@freelists.org Cc: Ivan Koptelov <ivan.koptelov@tarantool.org> Subject: [tarantool-patches] Re: [PATCH 2/2] sql: make aggregate functions types more strict Date: Tue, 9 Apr 2019 17:52:41 +0300 [thread overview] Message-ID: <A663A7E7-1209-4057-97B2-218F45730DB9@tarantool.org> (raw) In-Reply-To: <49e4ae0bc187dc02f908427692c0ddb2cc2d36a8.1554475881.git.ivan.koptelov@tarantool.org> Firstly, please consider Konstantin’s comments. Then, I’ve found assertion fault: tarantool> create table t(id int primary key, b scalar) tarantool> insert into t values(1, 1), (2, '1'), (3, 4.123) tarantool> select min(b) from t Assertion failed: (((p->szMalloc > 0 && p->z == p->zMalloc) ? 1 : 0) + ((p->flags & MEM_Dyn) != 0 ? 1 : 0) + ((p->flags & MEM_Ephem) != 0 ? 1 : 0) + ((p->flags & MEM_Static) != 0 ? 1 : 0) == 1), function sqlVdbeCheckMemInvariants, file /Users/n.pettik/tarantool/src/box/sql/vdbemem.c, line 87. Abort trap: 6 tarantool> create table t(id int primary key, a int, b text) tarantool> insert into t values(1,1,'1’) tarantool> select sum(b) from t --- - metadata: - name: sum(b) type: number rows: - [1] … So, your statement below that SUM accepts only numeric arguments is false. If string can be converted to number, it is OK: tarantool> select a=b from t; --- - metadata: - name: a=b type: integer rows: - [1] … Next: tarantool> select min(b) from t --- - metadata: - name: min(b) type: scalar rows: - ['1'] … Could you please fix this behaviour (scalar->int) in a separate patch? If it is not that easy, file a ticket. > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > index b1bfc886e..379f28252 100644 > --- a/src/box/sql/func.c > +++ b/src/box/sql/func.c > @@ -45,6 +45,17 @@ > #include <unicode/uchar.h> > #include <unicode/ucol.h> > > +/* /** > + * This structure is for keeping context during work of > + * aggregate function. > + */ > +struct aggregate_context { Let’s change name to minmax_context. > + /** Value being aggregated. (e.g. current MAX or current counter value). */ > + Mem value; > + /** Reference value to keep track of previous argument's type. */ > + Mem reference_value; > +}; I agree that keeping struct Men is too much (especially as values not as pointers). We need only type of that value. What is more, you need only one member: “value being aggregated” comes from argv[0]. > @@ -1588,16 +1604,57 @@ static void > minmaxStep(sql_context *context, int not_used, sql_value **argv) > { > UNUSED_PARAMETER(not_used); > - sql_value *arg = argv[0]; > - sql_value *best = sql_aggregate_context(context, sizeof(*best)); > + struct aggregate_context *aggr_context = > + (struct aggregate_context *) sql_aggregate_context(context, > + sizeof(*aggr_context)); This way looks better: @@ -1604,9 +1604,8 @@ static void minmaxStep(sql_context *context, int not_used, sql_value **argv) { UNUSED_PARAMETER(not_used); - struct aggregate_context *aggr_context = - (struct aggregate_context *) sql_aggregate_context(context, - sizeof(*aggr_context)); + struct aggregate_context *aggr_context = (struct aggregate_context *) + sql_aggregate_context(context, sizeof(*aggr_context)); if (aggr_context == NULL) retur > + if (aggr_context == NULL) > + return; > > + sql_value *arg = argv[0]; > + sql_value *best = &(aggr_context->value); > if (best == NULL) > return; > > - if (sql_value_type(argv[0]) == SQL_NULL) { > + enum sql_type sql_type = sql_value_type(arg); > + > + if (sql_type == SQL_NULL) { > if (best->flags != 0) > sqlSkipAccumulatorLoad(context); > } else if (best->flags != 0) { > + /* > + * During proceeding of the function, arguments > + * of different types may be encountered (if > + * SCALAR type column is proceeded). Some types > + * are compatible (INTEGER and FLOAT) and others > + * are not (TEXT and BLOB are not compatible with > + * any other type). In the later case an error > + * is raised. > + */ > + sql_value *reference_value = &aggr_context->reference_value; > + if (reference_value->flags == 0) > + sqlVdbeMemCopy(reference_value, arg); AFAIR flags == 0 is a situation when we are processing first entry (i.e. aggregate_context is initialised right now). > + enum sql_type ref_sql_type = sql_value_type(reference_value); > + > + bool is_compatible = true; -> types_are_compatible > + if (sql_type != ref_sql_type) { > + is_compatible = false; > + if ((sql_type == SQL_INTEGER || sql_type == SQL_FLOAT) && > + (ref_sql_type == SQL_INTEGER || > + ref_sql_type == SQL_FLOAT)) { > + is_compatible = true; > + } > + } > + if (!is_compatible) { > + diag_set(ClientError, ER_INCONSISTENT_TYPES, > + mem_type_to_str(reference_value), > + mem_type_to_str(arg)); > + context->fErrorOrAux = 1; > + context->isError = SQL_TARANTOOL_ERROR; > + sqlVdbeMemRelease(best); > + sqlVdbeMemRelease(reference_value); Both of these values are allocated in scope of aggregate context. I guess there’s no need to provide any cleanup on these variables. > + return; > + } > + > int max; > int cmp; > struct coll *coll = sqlGetFuncCollSeq(context); > @@ -1625,13 +1682,16 @@ minmaxStep(sql_context *context, int not_used, sql_value **argv) > static void > minMaxFinalize(sql_context * context) > { > - sql_value *pRes; > - pRes = (sql_value *) sql_aggregate_context(context, 0); > + struct aggregate_context *func_context = > + (struct aggregate_context *) sql_aggregate_context(context, sizeof(*func_context)); > + sql_value *pRes = &(func_context->value); > + > if (pRes != NULL) { > if (pRes->flags != 0) { > sql_result_value(context, pRes); > } > sqlVdbeMemRelease(pRes); > + sqlVdbeMemRelease(&func_context->reference_value); > } > } > > diff --git a/src/box/sql/select.c b/src/box/sql/select.c > index b1ec8c758..624083b22 100644 > --- a/src/box/sql/select.c > +++ b/src/box/sql/select.c > @@ -4340,13 +4340,22 @@ pushDownWhereTerms(Parse * pParse, /* Parse context (for malloc() and error repo > * argument, this function checks if the following are true: > * > * * the query contains just a single aggregate function, > - * * the aggregate function is either min() or max(), and > - * * the argument to the aggregate function is a column value. > + * * the aggregate function is either min() or max(), > + * * the argument to the aggregate function is a column value, > + * * the type of column is not SCALAR. > * > * If all of the above are true, then WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX > * is returned as appropriate. Also, *ppMinMax is set to point to the > * list of arguments passed to the aggregate before returning. > * > + * The requirement of column type not being SCALAR follows from > + * the purpose of the function. The purpose of the function is > + * to answer the question: "Should MIN/MAX call be optimised by > + * using ORDER ON clause code?" If the type of column is SCALAR There’s no ORDER ON clause. > + * then we have to iterate over all rows to check if their types > + * are compatible. Hence, the optimisation should not be used. > + * For details please see: https://github.com/tarantool/tarantool/issues/4032 > + * > * Or, if the conditions above are not met, *ppMinMax is set to 0 and > * WHERE_ORDERBY_NORMAL is returned. > */ > @@ -4364,6 +4373,8 @@ minMaxQuery(AggInfo * pAggInfo, ExprList ** ppMinMax) > if (pEList && pEList->nExpr == 1 > && pEList->a[0].pExpr->op == TK_AGG_COLUMN) { > const char *zFunc = pExpr->u.zToken; > + if (sql_expr_type(pEList->a[0].pExpr) == FIELD_TYPE_SCALAR) > + return eRet; I see no difference between byte code generated for SCALAR and other columns (SELECT MIN(a) FROM t;). Investigate this case please. > if (sqlStrICmp(zFunc, "min") == 0) { > eRet = WHERE_ORDERBY_MIN; > *ppMinMax = pEList; > > diff --git a/test/sql-tap/select1.test.lua b/test/sql-tap/select1.test.lua > index be63e815d..a4ce19d73 100755 > --- a/test/sql-tap/select1.test.lua > +++ b/test/sql-tap/select1.test.lua > @@ -1,6 +1,6 @@ > #!/usr/bin/env tarantool > test = require("sqltester") > -test:plan(173) > +test:plan(172) > > --!./tcltestrunner.lua > -- ["set","testdir",[["file","dirname",["argv0"]]]] > @@ -503,16 +503,6 @@ test:do_catchsql_test( > -- </select1-2.17> > }) > > -test:do_execsql_test( > - "select1-2.17.1", > - [[ > - SELECT sum(a) FROM t3 > - ]], { > - -- <select1-2.17.1> > - 44.0 > - -- </select1-2.17.1> > - }) Don’t delete test, just change it to catch an error. > - > test:do_catchsql_test( > "select1-2.18”
next prev parent reply other threads:[~2019-04-09 14:52 UTC|newest] Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-04-05 14:57 [tarantool-patches] [PATCH 0/2] " Ivan Koptelov 2019-04-05 14:57 ` [tarantool-patches] [PATCH 1/2] " Ivan Koptelov 2019-04-09 14:52 ` [tarantool-patches] " n.pettik 2019-04-05 14:57 ` [tarantool-patches] [PATCH 2/2] " Ivan Koptelov 2019-04-05 19:48 ` [tarantool-patches] " Konstantin Osipov 2019-04-17 12:50 ` i.koptelov 2019-04-17 13:19 ` n.pettik 2019-04-09 14:52 ` n.pettik [this message] 2019-04-23 15:38 ` i.koptelov 2019-04-24 17:37 ` n.pettik 2019-05-06 13:24 ` i.koptelov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=A663A7E7-1209-4057-97B2-218F45730DB9@tarantool.org \ --to=korablev@tarantool.org \ --cc=ivan.koptelov@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='[tarantool-patches] Re: [PATCH 2/2] sql: make aggregate functions types more strict' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox