[Tarantool-patches] [PATCH v4 5/5] sql: properly check arguments of functions
Nikita Pettik
korablev at tarantool.org
Mon Jul 13 17:56:17 MSK 2020
On 13 Jul 08:33, imeevma at tarantool.org wrote:
> After this patch, the function arguments will be checked using ApplyType
> opcode before they are passed to the function. This means that the rules
> for implicitly casting values that were specified as arguments are
> defined by this opcode.
>
> Closes #4159
> ---
> src/box/sql/expr.c | 5 +
> src/box/sql/func.c | 360 +++-
> src/box/sql/select.c | 31 +
> src/box/sql/sqlInt.h | 32 +
> src/box/sql/vdbe.c | 12 +-
> test/sql-tap/cse.test.lua | 8 +-
> test/sql-tap/func.test.lua | 48 +-
> test/sql-tap/orderby1.test.lua | 2 +-
> test/sql-tap/position.test.lua | 6 +-
> test/sql/boolean.result | 32 +-
> test/sql/gh-4159-function-argumens.result | 2131 +++++++++++++++++--
> test/sql/gh-4159-function-argumens.test.sql | 272 +++
> test/sql/types.result | 76 +-
> test/sql/types.test.lua | 2 +-
> 14 files changed, 2584 insertions(+), 433 deletions(-)
>
> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
> index 7aee240a3..aa5477c6a 100644
> --- a/src/box/sql/expr.c
> +++ b/src/box/sql/expr.c
> @@ -4104,6 +4104,11 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
> } else {
> r1 = 0;
> }
> + if (func->def->language == FUNC_LANGUAGE_SQL_BUILTIN) {
> + struct func_sql_builtin *f =
> + (struct func_sql_builtin *)func;
> + sql_emit_func_types(v, &f->args, r1, nFarg);
> + }
> if (sql_func_flag_is_set(func, SQL_FUNC_NEEDCOLL)) {
> sqlVdbeAddOp4(v, OP_CollSeq, 0, 0, 0,
> (char *)coll, P4_COLLSEQ);
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index 9d4c26081..66edc3792 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -459,17 +459,10 @@ static void
> lengthFunc(sql_context * context, int argc, sql_value ** argv)
> {
> int len;
> -
> assert(argc == 1);
> UNUSED_PARAMETER(argc);
> switch (sql_value_type(argv[0])) {
> - case MP_BIN:
> - case MP_ARRAY:
> - case MP_MAP:
> - case MP_INT:
> - case MP_UINT:
> - case MP_BOOL:
> - case MP_DOUBLE:{
> + case MP_BIN: {
Let's get rid of switch-cases, they are really redundant now.
If you are afraid that diff will become big enough, you can
refactor each function in a separate patch.
> @@ -1550,6 +1514,15 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
>
> assert(argc == 3);
> UNUSED_PARAMETER(argc);
> + assert(sql_value_type(argv[0]) == MP_NIL ||
> + sql_value_type(argv[0]) == MP_STR ||
> + sql_value_type(argv[0]) == MP_BIN);
Let's evaluate each type of argv only once.
> + assert(sql_value_type(argv[1]) == MP_NIL ||
> + sql_value_type(argv[1]) == MP_STR ||
> + sql_value_type(argv[1]) == MP_BIN);
> + assert(sql_value_type(argv[2]) == MP_NIL ||
> + sql_value_type(argv[2]) == MP_STR ||
> + sql_value_type(argv[2]) == MP_BIN);
> zStr = sql_value_text(argv[0]);
> if (zStr == 0)
> return;
> @@ -1858,13 +1831,9 @@ soundexFunc(sql_context * context, int argc, sql_value ** argv)
> 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0,
> };
> assert(argc == 1);
> - enum mp_type mp_type = sql_value_type(argv[0]);
> - if (mp_type_is_bloblike(mp_type)) {
> - diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
> - sql_value_to_diag_str(argv[0]), "text");
> - context->is_aborted = true;
> - return;
> - }
> + assert(sql_value_type(argv[0]) == MP_NIL ||
> + sql_value_type(argv[0]) == MP_STR ||
> + sql_value_type(argv[0]) == MP_BIN);
Ditto
> zIn = (u8 *) sql_value_text(argv[0]);
> if (zIn == 0)
> zIn = (u8 *) "";
> int param_count;
> uint32_t min_count;
> uint32_t max_count;
> + enum field_type *types;
> + enum field_type recurrent_type;
> + bool is_blob_like_str;
What are these members supposed to mean?
> enum field_type returns;
> enum func_aggregate aggregate;
> bool export_to_sql;
> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
> index 4b069addb..fe56ede1b 100644
> --- a/src/box/sql/select.c
> +++ b/src/box/sql/select.c
> @@ -124,6 +124,34 @@ clearSelect(sql * db, Select * p, int bFree)
> }
> }
>
> +void
> +sql_emit_func_types(struct Vdbe *vdbe, struct sql_builtin_func_args *args,
> + int reg, uint32_t argc)
-> sql_emit_func_arg_type_check()
> +{
> + assert(argc <= args->max_count);
> + enum field_type recurrent_type = args->recurrent_type;
> + assert(args->max_count > 0 || recurrent_type == FIELD_TYPE_ANY);
> + /*
> + * It makes no sense to check types of the MEMs if all
> + * arguments should be of type ANY.
> + */
> + if (recurrent_type == FIELD_TYPE_ANY)
> + return;
> + size_t size = (argc + 1) * sizeof(enum field_type);
> + enum field_type *types = sqlDbMallocZero(sql_get(), size);
> + for (uint32_t i = 0; i < argc; ++i) {
> + if (args->types == NULL)
> + types[i] = args->recurrent_type;
> + else
> + types[i] = args->types[i];
> + }
I don't understand what's going on here.
> + types[argc] = field_type_MAX;
> + sqlVdbeAddOp4(vdbe, OP_ApplyType, reg, argc, 0, (char *)types,
> + P4_DYNAMIC);
> + if (args->is_blob_like_str)
> + sqlVdbeChangeP5(vdbe, OPFLAG_BLOB_LIKE_STRING);
> +}
> +
> /*
> * Initialize a SelectDest structure.
> */
> void
> sql_emit_table_types(struct Vdbe *v, struct space_def *def, int reg);
>
> +/**
> + * Code an OP_ApplyType opcode that will force types for given
Not force, but try to cast implicitly. Force cast is implemented as CAST
operator.
> + * range of register starting from @a reg. These values then will
> + * be used as arguments of a function.
> + *
> + * @param vdbe VDBE.
> + * @param args Information about arguments of the function.
> + * @param reg Register where types will be placed.
> + * @param argc Number of arguments.
> + */
> +void
> +sql_emit_func_types(struct Vdbe *vdbe, struct sql_builtin_func_args *args,
> + int reg, uint32_t argc);
> +
> enum field_type
> sql_type_result(enum field_type lhs, enum field_type rhs);
>
> @@ -4406,6 +4424,20 @@ struct sql_builtin_func_args {
> uint32_t min_count;
> /** Max number of arguments. */
> uint32_t max_count;
> + /**
> + * If function arguments may not be of the same type, all
> + * argument types are described here.
> + */
> + enum field_type *types;
> + /**
> + * Contains the type of arguments if all arguments to the
> + * function are of the same type.
> + */
> + enum field_type recurrent_type;
Recurrent is quite unsuitable name. Why not always use array
of type? Is it big deal to fill and store it?
> + /**
> + * TRUE if the function should treat the BLOB as STRING.
> + */
> + bool is_blob_like_str;
Why do you need this attribute?
> };
>
> struct func_sql_builtin {
> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index 863f38f5d..a4bf84bcc 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> @@ -2882,7 +2882,7 @@ case OP_Fetch: {
> break;
> }
>
> -/* Opcode: ApplyType P1 P2 * P4 *
> +/* Opcode: ApplyType P1 P2 * P4 P5
> * Synopsis: type(r[P1 at P2])
> *
> * Check that types of P2 registers starting from register P1 are
> @@ -2890,6 +2890,9 @@ case OP_Fetch: {
> * value and the given type are incompatible according to
> * field_mp_plain_type_is_compatible(), but both are numeric,
> * this opcode attempts to convert the value to the type.
> + *
> + * If P5 contains the OPFLAG_BLOB_LIKE_STRING flag, the BLOB
> + * values are processed as if they had the field type STRING.
I see this unprintable symbols for the third time. Did you review
your own patch?
> case OP_ApplyType: {
> enum field_type *types = pOp->p4.types;
> @@ -2904,6 +2907,13 @@ case OP_ApplyType: {
> pIn1++;
> continue;
> }
Need more info for this chunk.
> + if ((pOp->p5 & OPFLAG_BLOB_LIKE_STRING) != 0) {
> + if (type == FIELD_TYPE_STRING &&
> + mem_mp_type(pIn1) == MP_BIN) {
> + pIn1++;
> + continue;
> + }
> + }
> if (!mp_type_is_numeric(mem_mp_type(pIn1)) ||
> !sql_type_is_numeric(type) ||
> mem_convert_to_numeric(pIn1, type, false) != 0) {
> diff --git a/test/sql-tap/cse.test.lua b/test/sql-tap/cse.test.lua
> index 341b6de01..3c2076a1d 100755
> --- a/test/sql-tap/cse.test.lua
> +++ b/test/sql-tap/cse.test.lua
> @@ -195,23 +195,23 @@ test:do_execsql_test(
>
>
>
> -test:do_execsql_test(
> +test:do_catchsql_test(
> "cse-1.13",
> [[
> SELECT upper(b), typeof(b), b FROM t1
> ]], {
> -- <cse-1.13>
> - "11", "integer", 11, "21", "integer", 21
> + 1, "Type mismatch: can not convert 11 to string"
> -- </cse-1.13>
> })
DId not look at test changes yet. Please, elaborate on notes above
firstly.
>
More information about the Tarantool-patches
mailing list