[Tarantool-patches] [PATCH 1/6] sql: remove implicit cast for assignment

Mon Jun 1 20:03:50 MSK 2020

Thanks for the patch!

See 9 comments below.

On 28/05/2020 16:17, Mergen Imeev wrote:
> This patch removes implicit cast for assignment, however,
> it is allowed to implicitly cast DOUBLE to INTEGER and
> vice versa.
> 
> Closes #3809
> ---
> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index 724bc188b..2a941025c 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> @@ -414,6 +414,94 @@ sql_value_apply_type(
>  	mem_apply_type((Mem *) pVal, type);
>  }
>  
> +/**
> + * Check that mem_type of the mem compatible with given type. In

1. 'compatible' -> 'is compatible'.

> + * case of numeric values this fuction tries to comvert mem to

2. 'fuction' -> 'function', 'comvert' -> 'convert'.

> + * given type and returns -1 if it is impossible.
> + *
> + * @param mem The value to check.
> + * @param type The type to check.
> + */
> +static int
> +mem_check_types(struct Mem *mem, enum field_type type)
> +{
> +	if ((mem->flags & MEM_Null) != 0)
> +		return 0;
> +	assert(type < field_type_MAX);
> +	uint32_t flags = mem->flags;
> +	switch (type) {
> +	case FIELD_TYPE_UNSIGNED:
> +		if ((flags & MEM_Int) != 0)
> +			return -1;
> +		if ((flags & MEM_Real) != 0 && mem->u.r < 0)
> +			return -1;
> +		FALLTHROUGH;
> +	case FIELD_TYPE_INTEGER:
> +		if ((flags & (MEM_Int | MEM_UInt)) != 0)
> +			return 0;
> +		if ((flags & MEM_Real) != 0) {
> +			double d = mem->u.r;
> +			int64_t i = (int64_t) d;
> +			uint64_t u = (uint64_t) d;

3. This is undefined behaviour to cast double to int,
if the double value is out of range of the int.
For example, if double is < 0 or > UINT64_MAX, it is
UB to cast it to uint64_t. If double is < INT64_MIN or
> INT64_MAX, it is UB to cast it to int64_t.

However until clang sanitizer is turned on, I don't know
how you could test it. So up to you whether try to fix it
now, or wait until the sanitizer is on board. And then
probably I will fix it.

> +			if (i == d) {
> +				mem_set_int(mem, i, i <= -1);
> +				return 0;
> +			}
> +			if (u == d) {
> +				mem_set_u64(mem, u);
> +				return 0;
> +			}
> +		}
> +		return -1;
> +	case FIELD_TYPE_BOOLEAN:
> +		if ((flags & MEM_Bool) != 0)
> +			return 0;
> +		return -1;
> +	case FIELD_TYPE_NUMBER:
> +		if ((flags & (MEM_Real | MEM_Int | MEM_UInt)) != 0)
> +			return 0;
> +		return -1;
> +	case FIELD_TYPE_DOUBLE:
> +		if ((flags & MEM_Real) != 0)
> +			return 0;
> +		if ((flags & (MEM_Int | MEM_UInt)) != 0)
> +			return sqlVdbeMemRealify(mem);
> +		return -1;
> +	case FIELD_TYPE_STRING:
> +		if ((flags & MEM_Str) != 0)
> +			return 0;
> +		return -1;
> +	case FIELD_TYPE_VARBINARY:
> +		if ((flags & MEM_Blob) != 0)
> +			return 0;
> +		return -1;
> +	case FIELD_TYPE_SCALAR:
> +		/* Can't cast MAP and ARRAY to scalar types. */
> +		if ((flags & MEM_Subtype) == 0 ||
> +		    mem->subtype != SQL_SUBTYPE_MSGPACK)
> +			return 0;
> +		assert(mp_typeof(*mem->z) == MP_MAP ||
> +		       mp_typeof(*mem->z) == MP_ARRAY);
> +		return -1;
> +	case FIELD_TYPE_MAP:
> +		if ((flags & MEM_Subtype) != 0 &&
> +		    mem->subtype == SQL_SUBTYPE_MSGPACK &&
> +		    mp_typeof(*mem->z) == MP_MAP)
> +			return 0;
> +		return -1;
> +	case FIELD_TYPE_ARRAY:
> +		if ((flags & MEM_Subtype) != 0 &&
> +		    mem->subtype == SQL_SUBTYPE_MSGPACK &&
> +		    mp_typeof(*mem->z) == MP_ARRAY)
> +			return 0;
> +		return -1;
> +	case FIELD_TYPE_ANY:
> +		return 0;
> +	default:
> +		return -1;
> +	}
> +}
> +
>  /*
>   * pMem currently only holds a string type (or maybe a BLOB that we can
>   * interpret as a string if we want to).  Compute its corresponding
> @@ -2772,6 +2860,31 @@ case OP_ApplyType: {
>  	break;
>  }
>  
> +/* Opcode: CheckType P1 P2 * P4 *
> + * Synopsis: type(r[P1 at P2])
> + *
> + * Check that types of P2 registers starting from register
> + * P1 are compatible with given field_types.

4. 'with given field types in P4'.

> + */
> +case OP_CheckType: {
> +	enum field_type *types = pOp->p4.types;
> +	assert(types != NULL);
> +	assert(types[pOp->p2] == field_type_MAX);
> +	pIn1 = &aMem[pOp->p1];
> +	enum field_type type;
> +	while((type = *(types++)) != field_type_MAX) {
> +		assert(pIn1 <= &p->aMem[(p->nMem+1 - p->nCursor)]);
> +		assert(memIsValid(pIn1));
> +		if (mem_check_types(pIn1, type) != 0) {
> +			diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
> +				 mem_type_to_str(pIn1), field_type_strs[type]);
> +			goto abort_due_to_error;
> +		}
> +		pIn1++;
> +	}
> +	break;
> +}
> +
>  /* Opcode: MakeRecord P1 P2 P3 P4 P5
>   * Synopsis: r[P3]=mkrec(r[P1 at P2])
>   *
> diff --git a/test/sql-tap/gh-3809-implicit-cast-assignment.test.lua b/test/sql-tap/gh-3809-implicit-cast-assignment.test.lua
> new file mode 100755
> index 000000000..9df90bf72
> --- /dev/null
> +++ b/test/sql-tap/gh-3809-implicit-cast-assignment.test.lua
> @@ -0,0 +1,633 @@
> +#!/usr/bin/env tarantool
> +test = require("sqltester")
> +test:plan(75)
> +
> +--
> +-- Make sure that duting assignment there is no impolicit casts

5. 'duting' -> 'during', 'impolicit' -> 'implicit'.

> +-- with exception of implicit cast between numeric values.
> +--
> +test:execsql([[
> +    CREATE TABLE ti (a INT PRIMARY KEY AUTOINCREMENT, i INTEGER);
> +    CREATE TABLE td (a INT PRIMARY KEY AUTOINCREMENT, d DOUBLE);
> +    CREATE TABLE tb (a INT PRIMARY KEY AUTOINCREMENT, b BOOLEAN);
> +    CREATE TABLE tt (a INT PRIMARY KEY AUTOINCREMENT, t TEXT);
> +    CREATE TABLE tv (a INT PRIMARY KEY AUTOINCREMENT, v VARBINARY);
> +    CREATE TABLE ts (a INT PRIMARY KEY AUTOINCREMENT, s SCALAR);
> +]])
> +
> +test:do_catchsql_test(
> +    "gh-3809-1",
> +    [[
> +        INSERT INTO ti(i) VALUES (11)
> +    ]], {
> +        0
> +    })
> +
> +test:do_catchsql_test(
> +    "gh-3809-2",
> +    [[
> +        INSERT INTO ti(i) VALUES (22.2)
> +    ]], {
> +        1, "Type mismatch: can not convert real to integer"
> +    })
> +
> +test:do_catchsql_test(
> +    "gh-3809-3",
> +    [[
> +        INSERT INTO ti(i) VALUES (33.0)

6. Do we really want to keep this type of implicit
assignment? After all, it is still double. Even though with 0
fraction.

What if I insert a double value > INT64_MAX? Double can store
integers bigger than int64_t/uint64_t can.

> +    ]], {
> +        0
> +    })
> +
> +test:do_catchsql_test(
> +    "gh-3809-4",
> +    [[
> +        INSERT INTO ti(i) VALUES (true)
> +    ]], {
> +        1, "Type mismatch: can not convert boolean to integer"
> +    })
> +
> +test:do_catchsql_test(
> +    "gh-3809-5",
> +    [[
> +        INSERT INTO ti(i) VALUES ('33')
> +    ]], {
> +        1, "Type mismatch: can not convert text to integer"
> +    })
> +
> +test:do_catchsql_test(
> +    "gh-3809-6",
> +    [[
> +        INSERT INTO ti(i) VALUES (X'3434')
> +    ]], {
> +        1, "Type mismatch: can not convert varbinary to integer"
> +    })
> +
> +test:do_execsql_test(
> +    "gh-3809-7",
> +    [[
> +        SELECT * FROM ti;
> +    ]], {
> +        1, 11, 2, 33
> +    })
> +
> +test:do_catchsql_test(
> +    "gh-3809-8",
> +    [[
> +        INSERT INTO td(d) VALUES (11)

7. What if I insert something > 2^53 or < -2^53? It
can't be stored in double as is. Is it an error?

> +    ]], {
> +        0
> +    })
> +
> diff --git a/test/sql-tap/numcast.test.lua b/test/sql-tap/numcast.test.lua
> index eeac5353a..9bbae5ca4 100755
> --- a/test/sql-tap/numcast.test.lua
> +++ b/test/sql-tap/numcast.test.lua
> @@ -135,7 +135,7 @@ test:do_catchsql_test(
>          INSERT INTO t VALUES(20000000000000000000.01);
>          SELECT * FROM t;
>      ]], {
> -        1,"Tuple field 1 type does not match one required by operation: expected integer"
> +        1,"Type mismatch: can not convert real to integer"

8. Is it correct, that we can remove all the checks from OP_CheckType
except numeric conversions, and everything will work fine, because
box makes the type checks anyway? It seems OP_CheckType duplicates
box's work.

> diff --git a/test/sql-tap/tkt-3998683a16.test.lua b/test/sql-tap/tkt-3998683a16.test.lua
> index 885dcf5cd..c91960c2c 100755
> --- a/test/sql-tap/tkt-3998683a16.test.lua
> +++ b/test/sql-tap/tkt-3998683a16.test.lua
> @@ -26,24 +26,24 @@ test:do_test(
>      function()
>          return test:execsql [[
>              CREATE TABLE t1(x  INT primary key, y NUMBER);
> -            INSERT INTO t1 VALUES(1, '1.0');
> -            INSERT INTO t1 VALUES(2, '.125');
> -            INSERT INTO t1 VALUES(3, '123.');
> -            INSERT INTO t1 VALUES(4, '123.e+2');
> -            INSERT INTO t1 VALUES(5, '.125e+3');
> -            INSERT INTO t1 VALUES(6, '123e4');
> -            INSERT INTO t1 VALUES(11, '  1.0');
> -            INSERT INTO t1 VALUES(12, '  .125');
> -            INSERT INTO t1 VALUES(13, '  123.');
> -            INSERT INTO t1 VALUES(14, '  123.e+2');
> -            INSERT INTO t1 VALUES(15, '  .125e+3');
> -            INSERT INTO t1 VALUES(16, '  123e4');
> -            INSERT INTO t1 VALUES(21, '1.0  ');
> -            INSERT INTO t1 VALUES(22, '.125  ');
> -            INSERT INTO t1 VALUES(23, '123.  ');
> -            INSERT INTO t1 VALUES(24, '123.e+2  ');
> -            INSERT INTO t1 VALUES(25, '.125e+3  ');
> -            INSERT INTO t1 VALUES(26, '123e4  ');
> +            INSERT INTO t1 VALUES(1, 1.0);
> +            INSERT INTO t1 VALUES(2, .125);
> +            INSERT INTO t1 VALUES(3, 123.);
> +            INSERT INTO t1 VALUES(4, 123.e+2);
> +            INSERT INTO t1 VALUES(5, .125e+3);
> +            INSERT INTO t1 VALUES(6, 123e4);
> +            INSERT INTO t1 VALUES(11, 1.0);
> +            INSERT INTO t1 VALUES(12, .125);
> +            INSERT INTO t1 VALUES(13, 123.);
> +            INSERT INTO t1 VALUES(14, 123.e+2);
> +            INSERT INTO t1 VALUES(15, .125e+3);
> +            INSERT INTO t1 VALUES(16, 123e4);
> +            INSERT INTO t1 VALUES(21, 1.0);
> +            INSERT INTO t1 VALUES(22, .125);
> +            INSERT INTO t1 VALUES(23, 123.);
> +            INSERT INTO t1 VALUES(24, 123.e+2);
> +            INSERT INTO t1 VALUES(25, .125e+3);
> +            INSERT INTO t1 VALUES(26, 123e4);

9. There are lines where the second column is a duplicate. I guess
they can be removed now. Previously they were different in spacing
only.