[Tarantool-patches] [PATCH v4 5/5] sql: properly check arguments of functions

Nikita Pettik korablev at tarantool.org
Mon Jul 13 17:56:17 MSK 2020


On 13 Jul 08:33, imeevma at tarantool.org wrote:
> After this patch, the function arguments will be checked using ApplyType
> opcode before they are passed to the function. This means that the rules
> for implicitly casting values ​​that were specified as arguments are
> defined by this opcode.
> 
> Closes #4159
> ---
>  src/box/sql/expr.c                          |    5 +
>  src/box/sql/func.c                          |  360 +++-
>  src/box/sql/select.c                        |   31 +
>  src/box/sql/sqlInt.h                        |   32 +
>  src/box/sql/vdbe.c                          |   12 +-
>  test/sql-tap/cse.test.lua                   |    8 +-
>  test/sql-tap/func.test.lua                  |   48 +-
>  test/sql-tap/orderby1.test.lua              |    2 +-
>  test/sql-tap/position.test.lua              |    6 +-
>  test/sql/boolean.result                     |   32 +-
>  test/sql/gh-4159-function-argumens.result   | 2131 +++++++++++++++++--
>  test/sql/gh-4159-function-argumens.test.sql |  272 +++
>  test/sql/types.result                       |   76 +-
>  test/sql/types.test.lua                     |    2 +-
>  14 files changed, 2584 insertions(+), 433 deletions(-)
> 
> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
> index 7aee240a3..aa5477c6a 100644
> --- a/src/box/sql/expr.c
> +++ b/src/box/sql/expr.c
> @@ -4104,6 +4104,11 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
>  			} else {
>  				r1 = 0;
>  			}
> +			if (func->def->language == FUNC_LANGUAGE_SQL_BUILTIN) {
> +				struct func_sql_builtin *f =
> +					(struct func_sql_builtin *)func;
> +				sql_emit_func_types(v, &f->args, r1, nFarg);
> +			}
>  			if (sql_func_flag_is_set(func, SQL_FUNC_NEEDCOLL)) {
>  				sqlVdbeAddOp4(v, OP_CollSeq, 0, 0, 0,
>  						  (char *)coll, P4_COLLSEQ);
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index 9d4c26081..66edc3792 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -459,17 +459,10 @@ static void
>  lengthFunc(sql_context * context, int argc, sql_value ** argv)
>  {
>  	int len;
> -
>  	assert(argc == 1);
>  	UNUSED_PARAMETER(argc);
>  	switch (sql_value_type(argv[0])) {
> -	case MP_BIN:
> -	case MP_ARRAY:
> -	case MP_MAP:
> -	case MP_INT:
> -	case MP_UINT:
> -	case MP_BOOL:
> -	case MP_DOUBLE:{
> +	case MP_BIN: {

Let's get rid of switch-cases, they are really redundant now.
If you are afraid that diff will become big enough, you can
refactor each function in a separate patch.

> @@ -1550,6 +1514,15 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
>  
>  	assert(argc == 3);
>  	UNUSED_PARAMETER(argc);
> +	assert(sql_value_type(argv[0]) == MP_NIL ||
> +	       sql_value_type(argv[0]) == MP_STR ||
> +	       sql_value_type(argv[0]) == MP_BIN);

Let's evaluate each type of argv only once.

> +	assert(sql_value_type(argv[1]) == MP_NIL ||
> +	       sql_value_type(argv[1]) == MP_STR ||
> +	       sql_value_type(argv[1]) == MP_BIN);
> +	assert(sql_value_type(argv[2]) == MP_NIL ||
> +	       sql_value_type(argv[2]) == MP_STR ||
> +	       sql_value_type(argv[2]) == MP_BIN);
>  	zStr = sql_value_text(argv[0]);
>  	if (zStr == 0)
>  		return;
> @@ -1858,13 +1831,9 @@ soundexFunc(sql_context * context, int argc, sql_value ** argv)
>  		1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0,
>  	};
>  	assert(argc == 1);
> -	enum mp_type mp_type = sql_value_type(argv[0]);
> -	if (mp_type_is_bloblike(mp_type)) {
> -		diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
> -			 sql_value_to_diag_str(argv[0]), "text");
> -		context->is_aborted = true;
> -		return;
> -	}
> +	assert(sql_value_type(argv[0]) == MP_NIL ||
> +	       sql_value_type(argv[0]) == MP_STR ||
> +	       sql_value_type(argv[0]) == MP_BIN);

Ditto

>  	zIn = (u8 *) sql_value_text(argv[0]);
>  	if (zIn == 0)
>  		zIn = (u8 *) "";
>  	int param_count;
>  	uint32_t min_count;
>  	uint32_t max_count;
> +	enum field_type *types;
> +	enum field_type recurrent_type;
> +	bool is_blob_like_str;

What are these members supposed to mean?

>  	enum field_type returns;
>  	enum func_aggregate aggregate;
>  	bool export_to_sql;
> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
> index 4b069addb..fe56ede1b 100644
> --- a/src/box/sql/select.c
> +++ b/src/box/sql/select.c
> @@ -124,6 +124,34 @@ clearSelect(sql * db, Select * p, int bFree)
>  	}
>  }
>  
> +void
> +sql_emit_func_types(struct Vdbe *vdbe, struct sql_builtin_func_args *args,
> +		    int reg, uint32_t argc)

-> sql_emit_func_arg_type_check()

> +{
> +	assert(argc <= args->max_count);
> +	enum field_type recurrent_type = args->recurrent_type;
> +	assert(args->max_count > 0 || recurrent_type == FIELD_TYPE_ANY);
> +	/*
> +	 * It makes no sense to check types of the MEMs if all
> +	 * arguments should be of type ANY.
> +	 */
> +	if (recurrent_type == FIELD_TYPE_ANY)
> +		return;
> +	size_t size = (argc + 1) * sizeof(enum field_type);
> +	enum field_type *types = sqlDbMallocZero(sql_get(), size);
> +	for (uint32_t i = 0; i < argc; ++i) {
> +		if (args->types == NULL)
> +			types[i] = args->recurrent_type;
> +		else
> +			types[i] = args->types[i];
> +	}

I don't understand what's going on here.

> +	types[argc] = field_type_MAX;
> +	sqlVdbeAddOp4(vdbe, OP_ApplyType, reg, argc, 0, (char *)types,
> +		      P4_DYNAMIC);
> +	if (args->is_blob_like_str)
> +		sqlVdbeChangeP5(vdbe, OPFLAG_BLOB_LIKE_STRING);
> +}
> +
>  /*
>   * Initialize a SelectDest structure.
>   */
>  void
>  sql_emit_table_types(struct Vdbe *v, struct space_def *def, int reg);
>  
> +/**
> + * Code an OP_ApplyType opcode that will force types for given

Not force, but try to cast implicitly. Force cast is implemented as CAST
operator.

> + * range of register starting from @a reg. These values then will
> + * be used as arguments of a function.
> + *
> + * @param vdbe VDBE.
> + * @param args Information about arguments of the function.
> + * @param reg Register where types will be placed.
> + * @param argc Number of arguments.
> + */
> +void
> +sql_emit_func_types(struct Vdbe *vdbe, struct sql_builtin_func_args *args,
> +		    int reg, uint32_t argc);
> +
>  enum field_type
>  sql_type_result(enum field_type lhs, enum field_type rhs);
>  
> @@ -4406,6 +4424,20 @@ struct sql_builtin_func_args {
>  	uint32_t min_count;
>  	/** Max number of arguments. */
>  	uint32_t max_count;
> +	/**
> +	 * If function arguments may not be of the same type, all
> +	 * argument types are described here.
> +	 */
> +	enum field_type *types;
> +	/**
> +	 * Contains the type of arguments if all arguments to the
> +	 * function are of the same type.
> +	 */
> +	enum field_type recurrent_type;

Recurrent is quite unsuitable name. Why not always use array
of type? Is it big deal to fill and store it?

> +	/**
> +	 * TRUE if the function should treat the BLOB as STRING.
> +	 */
> +	bool is_blob_like_str;

Why do you need this attribute?

>  };
>  
>  struct func_sql_builtin {
> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index 863f38f5d..a4bf84bcc 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> @@ -2882,7 +2882,7 @@ case OP_Fetch: {
>  	break;
>  }
>  
> -/* Opcode: ApplyType P1 P2 * P4 *
> +/* Opcode: ApplyType P1 P2 * P4 P5
>   * Synopsis: type(r[P1 at P2])
>   *
>   * Check that types of P2 registers starting from register P1 are
> @@ -2890,6 +2890,9 @@ case OP_Fetch: {
>   * value and the given type are incompatible according to
>   * field_mp_plain_type_is_compatible(), but both are numeric,
>   * this opcode attempts to convert the value to the type.
> + *
> + * If P5 contains the OPFLAG_BLOB_LIKE_STRING flag, the BLOB
> + * values ​​are processed as if they had the field type STRING.

I see this unprintable symbols for the third time. Did you review
your own patch?


>  case OP_ApplyType: {
>  	enum field_type *types = pOp->p4.types;
> @@ -2904,6 +2907,13 @@ case OP_ApplyType: {
>  			pIn1++;
>  			continue;
>  		}

Need more info for this chunk.

> +		if ((pOp->p5 & OPFLAG_BLOB_LIKE_STRING) != 0) {
> +			if (type == FIELD_TYPE_STRING &&
> +			    mem_mp_type(pIn1) == MP_BIN) {
> +				pIn1++;
> +				continue;
> +			}
> +		}
>  		if (!mp_type_is_numeric(mem_mp_type(pIn1)) ||
>  		    !sql_type_is_numeric(type) ||
>  		    mem_convert_to_numeric(pIn1, type, false) != 0) {
> diff --git a/test/sql-tap/cse.test.lua b/test/sql-tap/cse.test.lua
> index 341b6de01..3c2076a1d 100755
> --- a/test/sql-tap/cse.test.lua
> +++ b/test/sql-tap/cse.test.lua
> @@ -195,23 +195,23 @@ test:do_execsql_test(
>  
>  
>  
> -test:do_execsql_test(
> +test:do_catchsql_test(
>      "cse-1.13",
>      [[
>          SELECT upper(b), typeof(b), b FROM t1
>      ]], {
>          -- <cse-1.13>
> -        "11", "integer", 11, "21", "integer", 21
> +        1, "Type mismatch: can not convert 11 to string"
>          -- </cse-1.13>
>      })

DId not look at test changes yet. Please, elaborate on notes above
firstly.

>  


More information about the Tarantool-patches mailing list