From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp40.i.mail.ru (smtp40.i.mail.ru [94.100.177.100]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 12E3D445320 for ; Mon, 13 Jul 2020 17:56:18 +0300 (MSK) Date: Mon, 13 Jul 2020 14:56:17 +0000 From: Nikita Pettik Message-ID: <20200713145617.GE15396@tarantool.org> References: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Disposition: inline Content-Transfer-Encoding: 8bit In-Reply-To: Subject: Re: [Tarantool-patches] [PATCH v4 5/5] sql: properly check arguments of functions List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: imeevma@tarantool.org Cc: tarantool-patches@dev.tarantool.org On 13 Jul 08:33, imeevma@tarantool.org wrote: > After this patch, the function arguments will be checked using ApplyType > opcode before they are passed to the function. This means that the rules > for implicitly casting values ​​that were specified as arguments are > defined by this opcode. > > Closes #4159 > --- > src/box/sql/expr.c | 5 + > src/box/sql/func.c | 360 +++- > src/box/sql/select.c | 31 + > src/box/sql/sqlInt.h | 32 + > src/box/sql/vdbe.c | 12 +- > test/sql-tap/cse.test.lua | 8 +- > test/sql-tap/func.test.lua | 48 +- > test/sql-tap/orderby1.test.lua | 2 +- > test/sql-tap/position.test.lua | 6 +- > test/sql/boolean.result | 32 +- > test/sql/gh-4159-function-argumens.result | 2131 +++++++++++++++++-- > test/sql/gh-4159-function-argumens.test.sql | 272 +++ > test/sql/types.result | 76 +- > test/sql/types.test.lua | 2 +- > 14 files changed, 2584 insertions(+), 433 deletions(-) > > diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c > index 7aee240a3..aa5477c6a 100644 > --- a/src/box/sql/expr.c > +++ b/src/box/sql/expr.c > @@ -4104,6 +4104,11 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target) > } else { > r1 = 0; > } > + if (func->def->language == FUNC_LANGUAGE_SQL_BUILTIN) { > + struct func_sql_builtin *f = > + (struct func_sql_builtin *)func; > + sql_emit_func_types(v, &f->args, r1, nFarg); > + } > if (sql_func_flag_is_set(func, SQL_FUNC_NEEDCOLL)) { > sqlVdbeAddOp4(v, OP_CollSeq, 0, 0, 0, > (char *)coll, P4_COLLSEQ); > diff --git a/src/box/sql/func.c b/src/box/sql/func.c > index 9d4c26081..66edc3792 100644 > --- a/src/box/sql/func.c > +++ b/src/box/sql/func.c > @@ -459,17 +459,10 @@ static void > lengthFunc(sql_context * context, int argc, sql_value ** argv) > { > int len; > - > assert(argc == 1); > UNUSED_PARAMETER(argc); > switch (sql_value_type(argv[0])) { > - case MP_BIN: > - case MP_ARRAY: > - case MP_MAP: > - case MP_INT: > - case MP_UINT: > - case MP_BOOL: > - case MP_DOUBLE:{ > + case MP_BIN: { Let's get rid of switch-cases, they are really redundant now. If you are afraid that diff will become big enough, you can refactor each function in a separate patch. > @@ -1550,6 +1514,15 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv) > > assert(argc == 3); > UNUSED_PARAMETER(argc); > + assert(sql_value_type(argv[0]) == MP_NIL || > + sql_value_type(argv[0]) == MP_STR || > + sql_value_type(argv[0]) == MP_BIN); Let's evaluate each type of argv only once. > + assert(sql_value_type(argv[1]) == MP_NIL || > + sql_value_type(argv[1]) == MP_STR || > + sql_value_type(argv[1]) == MP_BIN); > + assert(sql_value_type(argv[2]) == MP_NIL || > + sql_value_type(argv[2]) == MP_STR || > + sql_value_type(argv[2]) == MP_BIN); > zStr = sql_value_text(argv[0]); > if (zStr == 0) > return; > @@ -1858,13 +1831,9 @@ soundexFunc(sql_context * context, int argc, sql_value ** argv) > 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0, > }; > assert(argc == 1); > - enum mp_type mp_type = sql_value_type(argv[0]); > - if (mp_type_is_bloblike(mp_type)) { > - diag_set(ClientError, ER_SQL_TYPE_MISMATCH, > - sql_value_to_diag_str(argv[0]), "text"); > - context->is_aborted = true; > - return; > - } > + assert(sql_value_type(argv[0]) == MP_NIL || > + sql_value_type(argv[0]) == MP_STR || > + sql_value_type(argv[0]) == MP_BIN); Ditto > zIn = (u8 *) sql_value_text(argv[0]); > if (zIn == 0) > zIn = (u8 *) ""; > int param_count; > uint32_t min_count; > uint32_t max_count; > + enum field_type *types; > + enum field_type recurrent_type; > + bool is_blob_like_str; What are these members supposed to mean? > enum field_type returns; > enum func_aggregate aggregate; > bool export_to_sql; > diff --git a/src/box/sql/select.c b/src/box/sql/select.c > index 4b069addb..fe56ede1b 100644 > --- a/src/box/sql/select.c > +++ b/src/box/sql/select.c > @@ -124,6 +124,34 @@ clearSelect(sql * db, Select * p, int bFree) > } > } > > +void > +sql_emit_func_types(struct Vdbe *vdbe, struct sql_builtin_func_args *args, > + int reg, uint32_t argc) -> sql_emit_func_arg_type_check() > +{ > + assert(argc <= args->max_count); > + enum field_type recurrent_type = args->recurrent_type; > + assert(args->max_count > 0 || recurrent_type == FIELD_TYPE_ANY); > + /* > + * It makes no sense to check types of the MEMs if all > + * arguments should be of type ANY. > + */ > + if (recurrent_type == FIELD_TYPE_ANY) > + return; > + size_t size = (argc + 1) * sizeof(enum field_type); > + enum field_type *types = sqlDbMallocZero(sql_get(), size); > + for (uint32_t i = 0; i < argc; ++i) { > + if (args->types == NULL) > + types[i] = args->recurrent_type; > + else > + types[i] = args->types[i]; > + } I don't understand what's going on here. > + types[argc] = field_type_MAX; > + sqlVdbeAddOp4(vdbe, OP_ApplyType, reg, argc, 0, (char *)types, > + P4_DYNAMIC); > + if (args->is_blob_like_str) > + sqlVdbeChangeP5(vdbe, OPFLAG_BLOB_LIKE_STRING); > +} > + > /* > * Initialize a SelectDest structure. > */ > void > sql_emit_table_types(struct Vdbe *v, struct space_def *def, int reg); > > +/** > + * Code an OP_ApplyType opcode that will force types for given Not force, but try to cast implicitly. Force cast is implemented as CAST operator. > + * range of register starting from @a reg. These values then will > + * be used as arguments of a function. > + * > + * @param vdbe VDBE. > + * @param args Information about arguments of the function. > + * @param reg Register where types will be placed. > + * @param argc Number of arguments. > + */ > +void > +sql_emit_func_types(struct Vdbe *vdbe, struct sql_builtin_func_args *args, > + int reg, uint32_t argc); > + > enum field_type > sql_type_result(enum field_type lhs, enum field_type rhs); > > @@ -4406,6 +4424,20 @@ struct sql_builtin_func_args { > uint32_t min_count; > /** Max number of arguments. */ > uint32_t max_count; > + /** > + * If function arguments may not be of the same type, all > + * argument types are described here. > + */ > + enum field_type *types; > + /** > + * Contains the type of arguments if all arguments to the > + * function are of the same type. > + */ > + enum field_type recurrent_type; Recurrent is quite unsuitable name. Why not always use array of type? Is it big deal to fill and store it? > + /** > + * TRUE if the function should treat the BLOB as STRING. > + */ > + bool is_blob_like_str; Why do you need this attribute? > }; > > struct func_sql_builtin { > diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c > index 863f38f5d..a4bf84bcc 100644 > --- a/src/box/sql/vdbe.c > +++ b/src/box/sql/vdbe.c > @@ -2882,7 +2882,7 @@ case OP_Fetch: { > break; > } > > -/* Opcode: ApplyType P1 P2 * P4 * > +/* Opcode: ApplyType P1 P2 * P4 P5 > * Synopsis: type(r[P1@P2]) > * > * Check that types of P2 registers starting from register P1 are > @@ -2890,6 +2890,9 @@ case OP_Fetch: { > * value and the given type are incompatible according to > * field_mp_plain_type_is_compatible(), but both are numeric, > * this opcode attempts to convert the value to the type. > + * > + * If P5 contains the OPFLAG_BLOB_LIKE_STRING flag, the BLOB > + * values ​​are processed as if they had the field type STRING. I see this unprintable symbols for the third time. Did you review your own patch? > case OP_ApplyType: { > enum field_type *types = pOp->p4.types; > @@ -2904,6 +2907,13 @@ case OP_ApplyType: { > pIn1++; > continue; > } Need more info for this chunk. > + if ((pOp->p5 & OPFLAG_BLOB_LIKE_STRING) != 0) { > + if (type == FIELD_TYPE_STRING && > + mem_mp_type(pIn1) == MP_BIN) { > + pIn1++; > + continue; > + } > + } > if (!mp_type_is_numeric(mem_mp_type(pIn1)) || > !sql_type_is_numeric(type) || > mem_convert_to_numeric(pIn1, type, false) != 0) { > diff --git a/test/sql-tap/cse.test.lua b/test/sql-tap/cse.test.lua > index 341b6de01..3c2076a1d 100755 > --- a/test/sql-tap/cse.test.lua > +++ b/test/sql-tap/cse.test.lua > @@ -195,23 +195,23 @@ test:do_execsql_test( > > > > -test:do_execsql_test( > +test:do_catchsql_test( > "cse-1.13", > [[ > SELECT upper(b), typeof(b), b FROM t1 > ]], { > -- > - "11", "integer", 11, "21", "integer", 21 > + 1, "Type mismatch: can not convert 11 to string" > -- > }) DId not look at test changes yet. Please, elaborate on notes above firstly. >