[Tarantool-patches] [PATCH v2 1/7] sql: remove implicit cast for assignment

Mon Jun 22 11:23:21 MSK 2020

On 17 Jun 15:36, imeevma at tarantool.org wrote:
> This patch removes implicit cast for assignment, however,
> it is allowed to implicitly cast DOUBLE to INTEGER and
> vice versa.
> 
> Closes #3809
> ---
> 
> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index 6b769805c..ae2622c9e 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> @@ -418,6 +418,113 @@ sql_value_apply_type(
>  	mem_apply_type((Mem *) pVal, type);
>  }
>  
> +/**
> + * Check that mem_type of the mem is compatible with given type.
> + * In the case of numeric values, this function tries to convert
> + * the mem to the specified type and returns -1 if this is not
> + * possible.
> + *
> + * @param mem The value to check.
> + * @param type The type to check.
> + */
> +static int
> +mem_check_types(struct Mem *mem, enum field_type type)

I'd rename it to mem_icast_to_type() or mem_impl_cast_to_type()
or smth like that.

> +{
> +	if ((mem->flags & MEM_Null) != 0)
> +		return 0;
> +	assert(type < field_type_MAX);
> +	uint32_t flags = mem->flags;
> +	switch (type) {

Instead of such long switch-cases could we organize it in one table
containing valid conversions? I mean sort of field_mp_type_is_compatible()
To provide not only check but execution mechanism you can fill
table with pointers to functions implementing particular casts.

> +	case FIELD_TYPE_INTEGER:
> +		if ((flags & (MEM_Int | MEM_UInt)) != 0)
> +			return 0;
> +		if ((flags & MEM_Real) == 0)
> +			return -1;
> +		double d = mem->u.r;
> +		if (d >= (double)UINT64_MAX || d < (double)INT64_MIN)
> +			return -1;
> +		if (d < (double)INT64_MAX) {
> +			int64_t i = (int64_t) d;
> +			if (i == d) {
> +				mem_set_int(mem, i, i <= -1);
> +				return 0;
> +			}
> +			return -1;
> +		}
> +		uint64_t u = (uint64_t) d;
> +		if (u == d) {
> +			mem_set_u64(mem, u);
> +			return 0;
> +		}
> +		return -1;
> +	case FIELD_TYPE_SCALAR:
> +		/* Can't cast MAP and ARRAY to scalar types. */

Except for map and arrays we alread have decimal. IS this function
able to handle it?

> +		if ((flags & MEM_Subtype) == 0 ||
> +		    mem->subtype != SQL_SUBTYPE_MSGPACK)
> +			return 0;
> +		assert(mp_typeof(*mem->z) == MP_MAP ||
> +		       mp_typeof(*mem->z) == MP_ARRAY);
> +		return -1;
> @@ -2776,6 +2883,31 @@ case OP_ApplyType: {
>  	break;
>  }
>  
> +/* Opcode: CheckType P1 P2 * P4 *

ApplyType was quite suitable name, meanwhile CheckType is a bit confusing
since in fact it doesn't only check but cast (apply, coerce or whatever)
mem to given type.

> + * Synopsis: type(r[P1 at P2])
> + *
> + * Check that types of P2 registers starting from register
> + * P1 are compatible with given with given field types in P4.
> + */
> +case OP_CheckType: {
> +	enum field_type *types = pOp->p4.types;
> +	assert(types != NULL);
> +	assert(types[pOp->p2] == field_type_MAX);
> +	pIn1 = &aMem[pOp->p1];
> +	enum field_type type;
> +	while((type = *(types++)) != field_type_MAX) {
> +		assert(pIn1 <= &p->aMem[(p->nMem+1 - p->nCursor)]);
> +		assert(memIsValid(pIn1));
> +		if (mem_check_types(pIn1, type) != 0) {
> +			diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
> +				 mem_type_to_str(pIn1), field_type_strs[type]);
> +			goto abort_due_to_error;
> +		}
> +		pIn1++;
> +	}
> +	break;
> +}
> +
> diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
> index 8dad2db9a..9e8586ffc 100644
> --- a/src/box/sql/vdbemem.c
> +++ b/src/box/sql/vdbemem.c
> @@ -839,6 +839,13 @@ mem_set_int(struct Mem *mem, int64_t value, bool is_neg)
>  	}
>  }
>  
> +void
> +mem_set_double(struct Mem *mem, double value)
> +{

I see inconsistency with other setters: they provide auxiliary
clean-up in case mem has one of Agg/Dyn/Frame flags. Please
investigate whether it is really required and if it is so add
it to current one (or remove from other setters).

> +	mem->u.r = value;
> +	MemSetTypeFlag(mem, MEM_Real);
> +}
> +
> diff --git a/test/sql-tap/autoinc.test.lua b/test/sql-tap/autoinc.test.lua
> index 37e65e541..ec8dfeab1 100755
> --- a/test/sql-tap/autoinc.test.lua
> +++ b/test/sql-tap/autoinc.test.lua
> @@ -618,7 +618,7 @@ test:do_catchsql_test(
>              INSERT INTO t2 VALUES('asd'); 
>      ]], {
>          -- <autoinc-10.2>
> -        1, "Type mismatch: can not convert asd to integer"
> +        1, "Type mismatch: can not convert text to integer"

Having an opportunity to see which particular value can't be
converted is quite helpful. Let's leave it.

> @@ -694,7 +694,7 @@ test:do_test(
>                   INSERT INTO t3928(b) VALUES('after-int-' || CAST(new.b AS TEXT));
>              END;
>              DELETE FROM t3928 WHERE a!=1;
> -            UPDATE t3928 SET b=456 WHERE a=1;
> +            UPDATE t3928 SET b='456' WHERE a=1;
>              SELECT * FROM t3928 ORDER BY a;
>          ]])
>      end, {
> diff --git a/test/sql-tap/gh-3809-implicit-cast-assignment.test.lua b/test/sql-tap/gh-3809-implicit-cast-assignment.test.lua
> new file mode 100755
> index 000000000..de72cf3a4
> --- /dev/null
> +++ b/test/sql-tap/gh-3809-implicit-cast-assignment.test.lua

It's not regression issue, so you can a) remove gh- prefix;
b) amalgamate it with another test (e.g. sql/types.test.lua).
It's up to you tho.

> @@ -0,0 +1,649 @@
> +#!/usr/bin/env tarantool
> +test = require("sqltester")
> +test:plan(77)
> +
> +--
> +-- Make sure there are no implicit casts during assignment,
> +-- except for the implicit cast between numeric values.
> +--

Why do you still avoid .sql test format?

> +test:execsql([[
> +    CREATE TABLE ti (a INT PRIMARY KEY AUTOINCREMENT, i INTEGER);
> +    CREATE TABLE td (a INT PRIMARY KEY AUTOINCREMENT, d DOUBLE);
> +    CREATE TABLE tb (a INT PRIMARY KEY AUTOINCREMENT, b BOOLEAN);
> +    CREATE TABLE tt (a INT PRIMARY KEY AUTOINCREMENT, t TEXT);
> +    CREATE TABLE tv (a INT PRIMARY KEY AUTOINCREMENT, v VARBINARY);
> +    CREATE TABLE ts (a INT PRIMARY KEY AUTOINCREMENT, s SCALAR);
> +]])
> +
> +test:do_catchsql_test(
> +    "gh-3809-1",
> +    [[
> +        INSERT INTO ti(i) VALUES (11)
> +    ]], {
> +        0
> +    })
> +
> +test:do_catchsql_test(
> +    "gh-3809-2",
> +    [[
> +        INSERT INTO ti(i) VALUES (22.2)
> +    ]], {
> +        1, "Type mismatch: can not convert real to integer"
> +    })
> +
> +test:do_catchsql_test(
> +    "gh-3809-3",
> +    [[
> +        INSERT INTO ti(i) VALUES (33.0)

Should this be valid? Could you please ask Peter to clarify this question?

> +test:execsql([[
> +    DELETE FROM ti;
> +    DELETE FROM td;
> +    DELETE FROM tb;
> +    DELETE FROM tt;
> +    DELETE FROM tv;
> +    DELETE FROM ts;
> +    INSERT INTO ti(a) VALUES(1);
> +    INSERT INTO td(a) VALUES(1);
> +    INSERT INTO tb(a) VALUES(1);
> +    INSERT INTO tt(a) VALUES(1);
> +    INSERT INTO tv(a) VALUES(1);
> +    INSERT INTO ts(a) VALUES(1);
> +]])

Strange turn..Could you please supply each batch of tests with
brief description?

> +test:do_execsql_test(
> +    "gh-3809-39",
> +    [[
> +        SELECT * FROM ti, td, tb, tt, tv, ts;
> +    ]], {
> +        1, "", 1, "", 1, "", 1, "", 1, "", 1, ""
> +    })
> diff --git a/test/sql-tap/index1.test.lua b/test/sql-tap/index1.test.lua
> index e173e685c..ce66b7c1e 100755
> --- a/test/sql-tap/index1.test.lua
> +++ b/test/sql-tap/index1.test.lua
> @@ -593,25 +593,17 @@ test:do_test(
>          -- </index-11.1>
>      })
>  end
> --- integrity_check index-11.2
> --- Numeric strings should compare as if they were numbers.  So even if the
> --- strings are not character-by-character the same, if they represent the
> --- same number they should compare equal to one another.  Verify that this
> --- is true in indices.
> ---
> --- Updated for sql v3: sql will now store these values as numbers
> --- (because the affinity of column a is NUMERIC) so the quirky
> --- representations are not retained. i.e. '+1.0' becomes '1'.
> +
>  test:do_execsql_test(
>      "index-12.1",
>      [[
>          CREATE TABLE t4(id  INT primary key, a NUMBER,b INT );
> -        INSERT INTO t4 VALUES(1, '0.0',1);
> -        INSERT INTO t4 VALUES(2, '0.00',2);
> -        INSERT INTO t4 VALUES(4, '-1.0',4);
> -        INSERT INTO t4 VALUES(5, '+1.0',5);
> -        INSERT INTO t4 VALUES(6, '0',6);
> -        INSERT INTO t4 VALUES(7, '00000',7);
> +        INSERT INTO t4 VALUES(1, 0.0, 1);
> +        INSERT INTO t4 VALUES(2, 0.00, 2);
> +        INSERT INTO t4 VALUES(4, -1.0, 4);
> +        INSERT INTO t4 VALUES(5, +1.0, 5);
> +        INSERT INTO t4 VALUES(6, 0, 6);
> +        INSERT INTO t4 VALUES(7, 00000, 7);
>          SELECT a FROM t4 ORDER BY b;

Does this test make any sense now?

>      ]], {
>          -- <index-12.1>
> @@ -692,7 +684,7 @@ test:do_execsql_test(
>             c  TEXT,
>             UNIQUE(a,c)
>          );
> -        INSERT INTO t5 VALUES(1,2,3);
> +        INSERT INTO t5 VALUES(1,2,'3');
>          SELECT * FROM t5;
>      ]], {
>          -- <index-13.1>
> diff --git a/test/sql-tap/intpkey.test.lua b/test/sql-tap/intpkey.test.lua
> index b6b186632..0db18ba91 100755
> --- a/test/sql-tap/intpkey.test.lua
> +++ b/test/sql-tap/intpkey.test.lua
> @@ -770,11 +770,6 @@ test:do_execsql_test(
>          -- </intpkey-11.1>
>      })
>  
> --- integrity_check intpkey-12.1
> --- Try to use a string that looks like a floating point number as
> --- an integer primary key.  This should actually work when the floating
> --- point value can be rounded to an integer without loss of data.
> ---

Ditto

>  test:do_execsql_test(
>      "intpkey-13.1",
>      [[
> @@ -788,7 +783,7 @@ test:do_execsql_test(
>  test:do_execsql_test(
>      "intpkey-13.2",
>      [[
> -        INSERT INTO t1 VALUES('1',2,3);
> +        INSERT INTO t1 VALUES(1,'2','3');
>          SELECT * FROM t1 WHERE a=1;
>      ]], {
>          -- <intpkey-13.2>
> --- a/test/sql-tap/whereB.test.lua
> +++ b/test/sql-tap/whereB.test.lua
> @@ -112,24 +112,16 @@ test:do_execsql_test(
>      -- </whereB-1.102>
>      })
>  
> --- For this set of tests:
> ---
> ---  *   t1.y holds a text value with affinity TEXT
> ---  *   t2.b holds an integer value with affinity NONE
> ---
> --- These values are not equal and because neither affinity is NUMERIC
> --- no type conversion occurs.
> ---

Ditto

>  test:do_execsql_test(
>      "whereB-2.1",
>      [[
>          DROP TABLE t1;
>          DROP TABLE t2;
>  
> -        CREATE TABLE t1(x  INT primary key, y TEXT);    -- affinity of t1.y is TEXT
> -        INSERT INTO t1 VALUES(1,99);
> +        CREATE TABLE t1(x  INT primary key, y TEXT);
> +        INSERT INTO t1 VALUES(1,'99');
>  
> -        CREATE TABLE t2(a  INT primary key, b SCALAR);  -- affinity of t2.b is NONE
> +        CREATE TABLE t2(a  INT primary key, b SCALAR);
>          CREATE INDEX t2b ON t2(b);
>          INSERT INTO t2 VALUES(2, 99);
>