[Tarantool-patches] [PATCH 1/6] sql: remove implicit cast for assignment
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Mon Jun 1 20:03:50 MSK 2020
Thanks for the patch!
See 9 comments below.
On 28/05/2020 16:17, Mergen Imeev wrote:
> This patch removes implicit cast for assignment, however,
> it is allowed to implicitly cast DOUBLE to INTEGER and
> vice versa.
>
> Closes #3809
> ---
> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index 724bc188b..2a941025c 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> @@ -414,6 +414,94 @@ sql_value_apply_type(
> mem_apply_type((Mem *) pVal, type);
> }
>
> +/**
> + * Check that mem_type of the mem compatible with given type. In
1. 'compatible' -> 'is compatible'.
> + * case of numeric values this fuction tries to comvert mem to
2. 'fuction' -> 'function', 'comvert' -> 'convert'.
> + * given type and returns -1 if it is impossible.
> + *
> + * @param mem The value to check.
> + * @param type The type to check.
> + */
> +static int
> +mem_check_types(struct Mem *mem, enum field_type type)
> +{
> + if ((mem->flags & MEM_Null) != 0)
> + return 0;
> + assert(type < field_type_MAX);
> + uint32_t flags = mem->flags;
> + switch (type) {
> + case FIELD_TYPE_UNSIGNED:
> + if ((flags & MEM_Int) != 0)
> + return -1;
> + if ((flags & MEM_Real) != 0 && mem->u.r < 0)
> + return -1;
> + FALLTHROUGH;
> + case FIELD_TYPE_INTEGER:
> + if ((flags & (MEM_Int | MEM_UInt)) != 0)
> + return 0;
> + if ((flags & MEM_Real) != 0) {
> + double d = mem->u.r;
> + int64_t i = (int64_t) d;
> + uint64_t u = (uint64_t) d;
3. This is undefined behaviour to cast double to int,
if the double value is out of range of the int.
For example, if double is < 0 or > UINT64_MAX, it is
UB to cast it to uint64_t. If double is < INT64_MIN or
> INT64_MAX, it is UB to cast it to int64_t.
However until clang sanitizer is turned on, I don't know
how you could test it. So up to you whether try to fix it
now, or wait until the sanitizer is on board. And then
probably I will fix it.
> + if (i == d) {
> + mem_set_int(mem, i, i <= -1);
> + return 0;
> + }
> + if (u == d) {
> + mem_set_u64(mem, u);
> + return 0;
> + }
> + }
> + return -1;
> + case FIELD_TYPE_BOOLEAN:
> + if ((flags & MEM_Bool) != 0)
> + return 0;
> + return -1;
> + case FIELD_TYPE_NUMBER:
> + if ((flags & (MEM_Real | MEM_Int | MEM_UInt)) != 0)
> + return 0;
> + return -1;
> + case FIELD_TYPE_DOUBLE:
> + if ((flags & MEM_Real) != 0)
> + return 0;
> + if ((flags & (MEM_Int | MEM_UInt)) != 0)
> + return sqlVdbeMemRealify(mem);
> + return -1;
> + case FIELD_TYPE_STRING:
> + if ((flags & MEM_Str) != 0)
> + return 0;
> + return -1;
> + case FIELD_TYPE_VARBINARY:
> + if ((flags & MEM_Blob) != 0)
> + return 0;
> + return -1;
> + case FIELD_TYPE_SCALAR:
> + /* Can't cast MAP and ARRAY to scalar types. */
> + if ((flags & MEM_Subtype) == 0 ||
> + mem->subtype != SQL_SUBTYPE_MSGPACK)
> + return 0;
> + assert(mp_typeof(*mem->z) == MP_MAP ||
> + mp_typeof(*mem->z) == MP_ARRAY);
> + return -1;
> + case FIELD_TYPE_MAP:
> + if ((flags & MEM_Subtype) != 0 &&
> + mem->subtype == SQL_SUBTYPE_MSGPACK &&
> + mp_typeof(*mem->z) == MP_MAP)
> + return 0;
> + return -1;
> + case FIELD_TYPE_ARRAY:
> + if ((flags & MEM_Subtype) != 0 &&
> + mem->subtype == SQL_SUBTYPE_MSGPACK &&
> + mp_typeof(*mem->z) == MP_ARRAY)
> + return 0;
> + return -1;
> + case FIELD_TYPE_ANY:
> + return 0;
> + default:
> + return -1;
> + }
> +}
> +
> /*
> * pMem currently only holds a string type (or maybe a BLOB that we can
> * interpret as a string if we want to). Compute its corresponding
> @@ -2772,6 +2860,31 @@ case OP_ApplyType: {
> break;
> }
>
> +/* Opcode: CheckType P1 P2 * P4 *
> + * Synopsis: type(r[P1 at P2])
> + *
> + * Check that types of P2 registers starting from register
> + * P1 are compatible with given field_types.
4. 'with given field types in P4'.
> + */
> +case OP_CheckType: {
> + enum field_type *types = pOp->p4.types;
> + assert(types != NULL);
> + assert(types[pOp->p2] == field_type_MAX);
> + pIn1 = &aMem[pOp->p1];
> + enum field_type type;
> + while((type = *(types++)) != field_type_MAX) {
> + assert(pIn1 <= &p->aMem[(p->nMem+1 - p->nCursor)]);
> + assert(memIsValid(pIn1));
> + if (mem_check_types(pIn1, type) != 0) {
> + diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
> + mem_type_to_str(pIn1), field_type_strs[type]);
> + goto abort_due_to_error;
> + }
> + pIn1++;
> + }
> + break;
> +}
> +
> /* Opcode: MakeRecord P1 P2 P3 P4 P5
> * Synopsis: r[P3]=mkrec(r[P1 at P2])
> *
> diff --git a/test/sql-tap/gh-3809-implicit-cast-assignment.test.lua b/test/sql-tap/gh-3809-implicit-cast-assignment.test.lua
> new file mode 100755
> index 000000000..9df90bf72
> --- /dev/null
> +++ b/test/sql-tap/gh-3809-implicit-cast-assignment.test.lua
> @@ -0,0 +1,633 @@
> +#!/usr/bin/env tarantool
> +test = require("sqltester")
> +test:plan(75)
> +
> +--
> +-- Make sure that duting assignment there is no impolicit casts
5. 'duting' -> 'during', 'impolicit' -> 'implicit'.
> +-- with exception of implicit cast between numeric values.
> +--
> +test:execsql([[
> + CREATE TABLE ti (a INT PRIMARY KEY AUTOINCREMENT, i INTEGER);
> + CREATE TABLE td (a INT PRIMARY KEY AUTOINCREMENT, d DOUBLE);
> + CREATE TABLE tb (a INT PRIMARY KEY AUTOINCREMENT, b BOOLEAN);
> + CREATE TABLE tt (a INT PRIMARY KEY AUTOINCREMENT, t TEXT);
> + CREATE TABLE tv (a INT PRIMARY KEY AUTOINCREMENT, v VARBINARY);
> + CREATE TABLE ts (a INT PRIMARY KEY AUTOINCREMENT, s SCALAR);
> +]])
> +
> +test:do_catchsql_test(
> + "gh-3809-1",
> + [[
> + INSERT INTO ti(i) VALUES (11)
> + ]], {
> + 0
> + })
> +
> +test:do_catchsql_test(
> + "gh-3809-2",
> + [[
> + INSERT INTO ti(i) VALUES (22.2)
> + ]], {
> + 1, "Type mismatch: can not convert real to integer"
> + })
> +
> +test:do_catchsql_test(
> + "gh-3809-3",
> + [[
> + INSERT INTO ti(i) VALUES (33.0)
6. Do we really want to keep this type of implicit
assignment? After all, it is still double. Even though with 0
fraction.
What if I insert a double value > INT64_MAX? Double can store
integers bigger than int64_t/uint64_t can.
> + ]], {
> + 0
> + })
> +
> +test:do_catchsql_test(
> + "gh-3809-4",
> + [[
> + INSERT INTO ti(i) VALUES (true)
> + ]], {
> + 1, "Type mismatch: can not convert boolean to integer"
> + })
> +
> +test:do_catchsql_test(
> + "gh-3809-5",
> + [[
> + INSERT INTO ti(i) VALUES ('33')
> + ]], {
> + 1, "Type mismatch: can not convert text to integer"
> + })
> +
> +test:do_catchsql_test(
> + "gh-3809-6",
> + [[
> + INSERT INTO ti(i) VALUES (X'3434')
> + ]], {
> + 1, "Type mismatch: can not convert varbinary to integer"
> + })
> +
> +test:do_execsql_test(
> + "gh-3809-7",
> + [[
> + SELECT * FROM ti;
> + ]], {
> + 1, 11, 2, 33
> + })
> +
> +test:do_catchsql_test(
> + "gh-3809-8",
> + [[
> + INSERT INTO td(d) VALUES (11)
7. What if I insert something > 2^53 or < -2^53? It
can't be stored in double as is. Is it an error?
> + ]], {
> + 0
> + })
> +
> diff --git a/test/sql-tap/numcast.test.lua b/test/sql-tap/numcast.test.lua
> index eeac5353a..9bbae5ca4 100755
> --- a/test/sql-tap/numcast.test.lua
> +++ b/test/sql-tap/numcast.test.lua
> @@ -135,7 +135,7 @@ test:do_catchsql_test(
> INSERT INTO t VALUES(20000000000000000000.01);
> SELECT * FROM t;
> ]], {
> - 1,"Tuple field 1 type does not match one required by operation: expected integer"
> + 1,"Type mismatch: can not convert real to integer"
8. Is it correct, that we can remove all the checks from OP_CheckType
except numeric conversions, and everything will work fine, because
box makes the type checks anyway? It seems OP_CheckType duplicates
box's work.
> diff --git a/test/sql-tap/tkt-3998683a16.test.lua b/test/sql-tap/tkt-3998683a16.test.lua
> index 885dcf5cd..c91960c2c 100755
> --- a/test/sql-tap/tkt-3998683a16.test.lua
> +++ b/test/sql-tap/tkt-3998683a16.test.lua
> @@ -26,24 +26,24 @@ test:do_test(
> function()
> return test:execsql [[
> CREATE TABLE t1(x INT primary key, y NUMBER);
> - INSERT INTO t1 VALUES(1, '1.0');
> - INSERT INTO t1 VALUES(2, '.125');
> - INSERT INTO t1 VALUES(3, '123.');
> - INSERT INTO t1 VALUES(4, '123.e+2');
> - INSERT INTO t1 VALUES(5, '.125e+3');
> - INSERT INTO t1 VALUES(6, '123e4');
> - INSERT INTO t1 VALUES(11, ' 1.0');
> - INSERT INTO t1 VALUES(12, ' .125');
> - INSERT INTO t1 VALUES(13, ' 123.');
> - INSERT INTO t1 VALUES(14, ' 123.e+2');
> - INSERT INTO t1 VALUES(15, ' .125e+3');
> - INSERT INTO t1 VALUES(16, ' 123e4');
> - INSERT INTO t1 VALUES(21, '1.0 ');
> - INSERT INTO t1 VALUES(22, '.125 ');
> - INSERT INTO t1 VALUES(23, '123. ');
> - INSERT INTO t1 VALUES(24, '123.e+2 ');
> - INSERT INTO t1 VALUES(25, '.125e+3 ');
> - INSERT INTO t1 VALUES(26, '123e4 ');
> + INSERT INTO t1 VALUES(1, 1.0);
> + INSERT INTO t1 VALUES(2, .125);
> + INSERT INTO t1 VALUES(3, 123.);
> + INSERT INTO t1 VALUES(4, 123.e+2);
> + INSERT INTO t1 VALUES(5, .125e+3);
> + INSERT INTO t1 VALUES(6, 123e4);
> + INSERT INTO t1 VALUES(11, 1.0);
> + INSERT INTO t1 VALUES(12, .125);
> + INSERT INTO t1 VALUES(13, 123.);
> + INSERT INTO t1 VALUES(14, 123.e+2);
> + INSERT INTO t1 VALUES(15, .125e+3);
> + INSERT INTO t1 VALUES(16, 123e4);
> + INSERT INTO t1 VALUES(21, 1.0);
> + INSERT INTO t1 VALUES(22, .125);
> + INSERT INTO t1 VALUES(23, 123.);
> + INSERT INTO t1 VALUES(24, 123.e+2);
> + INSERT INTO t1 VALUES(25, .125e+3);
> + INSERT INTO t1 VALUES(26, 123e4);
9. There are lines where the second column is a duplicate. I guess
they can be removed now. Previously they were different in spacing
only.
More information about the Tarantool-patches
mailing list