[tarantool-patches] Re: [PATCH v3 1/2] sql: support -2^63 .. 2^64-1 integer type

n.pettik korablev at tarantool.org
Thu Apr 4 03:49:10 MSK 2019


Compilation of current patch results in fault:

/tarantool/src/box/sql/main.c:1923:53: error: too few arguments to function call, expected 4, have 3
        if (z != NULL && sql_dec_or_hex_to_i64(z, false, &v) == 0)
                         ~~~~~~~~~~~~~~~~~~~~~             ^

You remove caller of this func in the next commit,
but to make patch compilable let's fix it in the current one.

Several examples which look strange to me:

Lets start from very primitive test:

CREATE TABLE t (id INT PRIMARY KEY, a INT);
INSERT INTO t VALUES (1, 18446744073709551615);
tarantool> SELECT a FROM t;
---
- - [-1]
…

Another example:

CREATE TABLE t (id INT PRIMARY KEY, a INT);
tarantool> box.space.T:insert({1, 18446744073709551615ULL})
---
- [1, 18446744073709551615]
…
tarantool> update t set a = a
---
…

tarantool> select * from t
---
- - [1, -1]
...


Couple of bit more complicated cases:

select cast(-184467440737095516161.0 as int)
---
- error: 'Type mismatch: can not convert 0.0 to integer'
...

select cast(184467440737095516161.0 as int)
---
- error: 'Type mismatch: can not convert NaN to integer’
…

tarantool> select cast('18446744073709551615.0' as int)
---
- error: 'Type mismatch: can not convert 18446744073709551615.0 to integer'
…
tarantool> select cast(18446744073709551615.0 as int)
---
- - [0]
…

tarantool> select cast(-18446744073709551615.0 as int)
---
- error: 'Type mismatch: can not convert 0.0 to integer’
…

tarantool> select cast('-9223372036854775809' as int)
---
- - [-9223372036854775808]
...

And so on and so forth.

Please, introduce tests covering all possible scenarios,
involving not only explicit casts (as in examples I provided),
but implicit conversions as well.

Also, test uint as a value of limit/offset clauses,
auto-increment value, uint as an index iterator value,
sorting table containing uints, uint as parameters of
built-in functions etc. In other words, each functional
part of SQL should be tested on interaction with new
range of integer type. Also see examples below related
to aggregate functions.

Test from sql-tap/func.test.lua

CREATE TABLE t6(id INT primary key, x INTEGER);
INSERT INTO t6 VALUES(1, 1);
INSERT INTO t6 VALUES(2, 1<<62);
INSERT INTO t6 VALUES(3, 1<<62);

tarantool> select sum(x) from t6
---
- - [9223372036854775809]
…

tarantool> select ((1<<62)*2.0+1) from t6
---
- - [9223372036854775808]
  - [9223372036854775808]
  - [9223372036854775808]

tarantool>  SELECT sum(x) - ((1<<62)*2.0+1) from t6;
---
- - [0]
...

What is more:

INSERT INTO t6 VALUES(4, 1<<63);
 INSERT INTO t6 VALUES(5, 1<<63);
INSERT INTO t6 VALUES(7, 1<<63);
INSERT INTO t6 VALUES(8, 1<<63);

tarantool>         SELECT x  from t6;
---
- - [1]
  - [4611686018427387904]
  - [4611686018427387904]
  - [-9223372036854775808]
  - [-9223372036854775808]
  - [-9223372036854775808]
  - [-9223372036854775808]
…

tarantool>         SELECT sum(x)  from t6;
---
- - [9223372036854775809]
…

Please add solid set of tests verifying that overflow
during aggregate calculation is handled as should.

Next, look at OP_FCopy: now it is assumed that only
MEM_Int can be handled, which in turn is false.

The same for OP_IfPos. It is quite easy to construct
example which results in crash:

tarantool> select * from t limit 1 offset 18446744073709551615;
Assertion failed: (pIn3->flags & MEM_Int), function sqlVdbeExec, file /tarantool/src/box/sql/vdbe.c, line 5231.

Behaviour of some bitwise operators is dubious:

tarantool> select ~18446744073709551615
---
- - [0]
…

I can’t say whether this is correct result or not. To be discussed.

> diff --git a/src/box/execute.c b/src/box/execute.c
> index 7c77df2e5..813208783 100644
> --- a/src/box/execute.c
> +++ b/src/box/execute.c
> @@ -130,14 +131,9 @@ sql_bind_decode(struct sql_bind *bind, int i, const char **packet)
> 	switch (mp_typeof(**packet)) {
> 	case MP_UINT: {
> 		uint64_t n = mp_decode_uint(packet);
> -		if (n > INT64_MAX) {
> -			diag_set(ClientError, ER_SQL_BIND_VALUE,
> -				 sql_bind_name(bind), "INTEGER");
> -			return -1;
> -		}
> -		bind->i64 = (int64_t) n;
> -		bind->type = SQL_INTEGER;
> -		bind->bytes = sizeof(bind->i64);
> +		bind->u64 = n;
> +		bind->type = (n > INT64_MAX) ? SQL_UNSIGNED : SQL_INTEGER;

If we store value as unsigned, then I guess type of bind var
should be always SQL_UNSIGNED.

> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
> index 82688dff3..0c127e80a 100644
> --- a/src/box/sql/expr.c
> +++ b/src/box/sql/expr.c
> @@ -1192,7 +1192,9 @@ sqlExprAssignVarNumber(Parse * pParse, Expr * pExpr, u32 n)
> 			 * variable number
> 			 */
> 			int64_t i;
> -			bool is_ok = 0 == sql_atoi64(&z[1], &i, n - 1);
> +			bool is_unsigned = false;
> +			bool is_ok = 0 == sql_atoi64(&z[1],
> +				&i, &is_unsigned, n - 1);

Please, rebase patch to fresh master. Now we are using diag
throughout SQL code to raise an error. After that your code
should look like:

 @@ -1193,14 +1193,16 @@ sqlExprAssignVarNumber(Parse * pParse, Expr * pExpr, u32 n)
                        */
                        int64_t i;
                        bool is_unsigned = false;
-                       bool is_ok = 0 == sql_atoi64(&z[1],
-                               &i, &is_unsigned, n - 1);
+                       if (sql_atoi64(&z[1], &i, &is_unsigned, n - 1) != 0) {
+                               pParse->is_aborted = true;
+                               return;
+                       }
                        x = (ynVar) i;
                        testcase(i == 0);
                        testcase(i == 1);
                        testcase(i == SQL_BIND_PARAMETER_MAX - 1);
                        testcase(i == SQL_BIND_PARAMETER_MAX);
-                       if (!is_ok || i < 1 || i > SQL_BIND_PARAMETER_MAX) {
+                       if (i < 1 || i > SQL_BIND_PARAMETER_MAX) {

> @@ -3335,23 +3337,29 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
> 		int64_t value;
> 		const char *z = expr->u.zToken;
> 		assert(z != NULL);
> -		int c = sql_dec_or_hex_to_i64(z, &value);
> -		if (c == 1 || (c == 2 && !is_neg) ||
> -		    (is_neg && value == SMALLEST_INT64)) {
> +		bool is_unsigned = false;
> +		int c = sql_dec_or_hex_to_i64(z, is_neg, &value, &is_unsigned);
> +		if (c < 0) {

Please, incapsulate these checks in sql_dec_or_hex_to_i64()
and raise there error via diag (after rebase to master). Again,
call of this function should look like:

@@ -3338,28 +3338,21 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
                const char *z = expr->u.zToken;
                assert(z != NULL);
                bool is_unsigned = false;
-               int c = sql_dec_or_hex_to_i64(z, is_neg, &value, &is_unsigned);
-               if (c < 0) {
-                       if (sql_strnicmp(z, "0x", 2) == 0) {
-                               sqlErrorMsg(parse,
-                                           "hex literal too big: %s%s",
-                                           is_neg ? "-" : "", z);
-                       } else {
-                               sqlErrorMsg(parse,
-                                           "oversized integer: %s%s",
-                                           is_neg ? "-" : "", z);
-                       }
-               } else {
-                       if (is_unsigned)
-                               /*
-                                * value is in the range
-                                * [INT64_MAX+1, UINT64_MAX]
-                                */
-                               sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0,
-                                                 (u8 *)&value, P4_UINT64);
-                       else
-                               sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0,
-                                                 (u8 *)&value, P4_INT64);
+               if (sql_dec_or_hex_to_i64(z, is_neg, &value,
+                                         &is_unsigned) != 0) {
+                       parse->is_aborted = true;
+                       return;
+               }
+               int p4_type = is_unsigned ? P4_UINT64 : P4_INT64;
+               sqlVdbeAddOp4(v, OP_Int64, 0, mem, 0, (u8 *) &value, p4_type);

Looks way more readable. What is more, it is the only usage of
sql_dec_or_hex_to_i64(), so you can make it static.

> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index 21a69aa51..74b56d780 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> 
> @@ -191,6 +195,11 @@ absFunc(sql_context * context, int argc, sql_value ** argv)
> 			sql_result_int64(context, iVal);
> 			break;
> 		}
> +	case SQL_UNSIGNED: {
> +			u64 iVal = sql_value_uint64(argv[0]);
> +			sql_result_int64(context, iVal);

Should be sql_result_uint64(), otherwise you get:
tarantool> select abs(18446744073709551615)
---
- - [-1]
…

Also, add pls test case verifying that example above is valid.

> @@ -1403,6 +1413,7 @@ struct SumCtx {
> 	i64 cnt;		/* Number of elements summed */
> 	u8 overflow;		/* True if integer overflow seen */
> 	u8 approx;		/* True if non-integer value was input to the sum */
> +	bool is_unsigned;	/* True if value exceeded 2^63 */
> };
> 
> /*
> @@ -1426,16 +1437,40 @@ sumStep(sql_context * context, int argc, sql_value ** argv)
> 	type = sql_value_numeric_type(argv[0]);
> 	if (p && type != SQL_NULL) {
> 		p->cnt++;
> +		enum arithmetic_result rc = ATHR_SIGNED;
> +
> 		if (type == SQL_INTEGER) {
> 			i64 v = sql_value_int64(argv[0]);
> 			p->rSum += v;
> -			if ((p->approx | p->overflow) == 0
> -			    && sqlAddInt64(&p->iSum, v)) {
> -				p->overflow = 1;
> -			}
> +			if ((p->approx | p->overflow) == 0)
> +				rc = sqlAddInt64(&p->iSum,
> +						!p->is_unsigned,
> +						v, true);
> +		} else if (type == SQL_UNSIGNED) {
> +			u64 v = sql_value_uint64(argv[0]);
> +			p->rSum += v;
> +			if ((p->approx | p->overflow) == 0)
> +				rc = sqlAddInt64(&p->iSum,
> +						 !p->is_unsigned,
> +						 v, false);

Processing is almost the same as for integer.
Please, merge these branches into one.

> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
> index eb1488576..8942addd3 100644
> --- a/src/box/sql/sqlInt.h
> +++ b/src/box/sql/sqlInt.h
> @@ -452,6 +452,9 @@ sql_column_subtype(struct sql_stmt *stmt, int i);
> sql_int64
> sql_value_int64(sql_value *);
> 
> +sql_uint64

Do we need this typedef?

> @@ -4296,16 +4309,15 @@ field_type_sequence_dup(struct Parse *parse, enum field_type *types,
>  *
>  * @param z String being parsed.
>  * @param[out] val Output integer value.
> + * @param[out] is_unsigned is true is returned value is positive
> + * and its value is in the range [INT64_MAX+1, UINT64_MAX]
>  * @param length String length in bytes.
>  * @retval
> - *     0    Successful transformation.  Fits in a 64-bit signed
> - *          integer.
> - *     1    Integer too large for a 64-bit signed integer or is
> - *          malformed
> - *     2    Special case of 9223372036854775808
> + *     0	Successful transformation.
> + *     -1	An error occurred.

Please, apply this:

--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -4305,16 +4305,14 @@ field_type_sequence_dup(struct Parse *parse, enum field_type *types,
  *
  * length is the number of bytes in the string (bytes, not
  * characters). The string is not necessarily zero-terminated.
- * The encoding is given by enc.
  *
  * @param z String being parsed.
  * @param[out] val Output integer value.
- * @param[out] is_unsigned is true is returned value is positive
+ * @param[out] is_unsigned is true if returned value is positive
  * and its value is in the range [INT64_MAX+1, UINT64_MAX]
  * @param length String length in bytes.
- * @retval
- *     0       Successful transformation.
- *     -1      An error occurred.
+ * @retval 0 In case of successful transformation.
+ * @retval -1 An error occurred during conversion.

> int
> -sql_atoi64(const char *z, int64_t *val, int length);
> +sql_atoi64(const char *z, int64_t *val, bool *is_unsigned, int length);
> 
> /**
>  * Transform a UTF-8 integer literal, in either decimal or
> @@ -4313,14 +4325,17 @@ sql_atoi64(const char *z, int64_t *val, int length);
>  * accepts hexadecimal literals, whereas sql_atoi64() does not.
>  *
>  * @param z Literal being parsed.
> + * @param is_neg Sign of the number being converted
>  * @param[out] val Parsed value.
> + * @param[out] is_unsigned is true is returned value is positive
> + * and its value is in the range [INT64_MAX+1, UINT64_MAX]
>  * @retval
> - *     0    Successful transformation.  Fits in a 64-bit signed integer.
> - *     1    Integer too large for a 64-bit signed integer or is malformed
> - *     2    Special case of 9223372036854775808
> + *     0	Successful transformation.
> + *     -1	An error occurred.
>  */

Same:

@@ -4327,11 +4325,10 @@ sql_atoi64(const char *z, int64_t *val, bool *is_unsigned, int length);
  * @param z Literal being parsed.
  * @param is_neg Sign of the number being converted
  * @param[out] val Parsed value.
- * @param[out] is_unsigned is true is returned value is positive
+ * @param[out] is_unsigned is true if returned value is positive
  * and its value is in the range [INT64_MAX+1, UINT64_MAX]
- * @retval
- *     0       Successful transformation.
- *     -1      An error occurred.
+ * @retval 0 Successful transformation.
+ * @retval -1 An error occurred during conversion.
  */

> @@ -4358,9 +4373,31 @@ Expr *sqlExprAddCollateString(Parse *, Expr *, const char *);
> Expr *sqlExprSkipCollate(Expr *);
> int sqlCheckIdentifierName(Parse *, char *);
> void sqlVdbeSetChanges(sql *, int);
> -int sqlAddInt64(i64 *, i64);
> -int sqlSubInt64(i64 *, i64);
> -int sqlMulInt64(i64 *, i64);
> +
> +enum arithmetic_result {
> +	/* The result fits the signed 64-bit integer */
> +	ATHR_SIGNED,
> +	/* The result is positive and fits the
> +	 * unsigned 64-bit integer
> +	 */
> +	ATHR_UNSIGNED,
> +	/* The operation causes an overflow */
> +	ATHR_OVERFLOW,
> +	/* The operation causes division by zero */
> +	ATHR_DIVBYZERO
> +};

Kostja supported my point of view concerning removing this
enum and using -1/0 as return values and diag to set an
error.

> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index c1da9a4aa..a148bb7d4 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> 
> @@ -1141,6 +1173,8 @@ case OP_Int64: {           /* out2 */
> 	pOut = out2Prerelease(p, pOp);
> 	assert(pOp->p4.pI64!=0);
> 	pOut->u.i = *pOp->p4.pI64;
> +	if (pOp->p4type == P4_UINT64)
> +		pOut->flags = MEM_UInt;

Why don’t you save value to pOut->u.u in this case?

> @@ -1951,6 +1991,10 @@ case OP_ShiftRight: {           /* same as TK_RSHIFT, in1, in2, out3 */
> 	} else if (iB!=0) {
> 		assert(op==OP_ShiftRight || op==OP_ShiftLeft);
> 
> +		if (is_unsignedB){
> +			/* Limit big unsigned values by 64 */
> +			iB = 64;
> +		}
> 		/* If shifting by a negative amount, shift in the other direction */
> 		if (iB<0) {
> 			assert(OP_ShiftRight==OP_ShiftLeft+1);
> @@ -2002,6 +2046,9 @@ case OP_AddImm: {            /* in1 */
>  */
> case OP_MustBeInt: {            /* jump, in1 */
> 	pIn1 = &aMem[pOp->p1];
> +	if ((pIn1->flags & MEM_UInt)!=0)
> +		break;
> +
> 	if ((pIn1->flags & MEM_Int)==0) {
> 		mem_apply_type(pIn1, FIELD_TYPE_INTEGER);
> 		VdbeBranchTaken((pIn1->flags&MEM_Int)==0, 2);
> 
> @@ -2233,17 +2280,25 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
> 				res = 0;
> 				goto compare_op;
> 			}
> +			if ((pIn1->flags & pIn3->flags & MEM_UInt)!=0) {
> +				if (pIn3->u.u > pIn1->u.u) { res = +1; goto compare_op; }
> +				if (pIn3->u.u < pIn1->u.u) { res = -1; goto compare_op; }
> +				res = 0;
> +				goto compare_op;
> +			}
> 		} else if (type == FIELD_TYPE_STRING) {
> -			if ((flags1 & MEM_Str)==0 && (flags1 & (MEM_Int|MEM_Real))!=0) {
> +			if ((flags1 & MEM_Str)==0 && (flags1 & (MEM_Int|MEM_Real|MEM_UInt))!=0) {
> 				testcase( pIn1->flags & MEM_Int);
> +				testcase( pIn1->flags & MEM_UInt);

’testcase’ functionality seems to be broken, so you can skip this.

> 				testcase( pIn1->flags & MEM_Real);
> 				sqlVdbeMemStringify(pIn1, 1);
> 				testcase( (flags1&MEM_Dyn) != (pIn1->flags&MEM_Dyn));
> 				flags1 = (pIn1->flags & ~MEM_TypeMask) | (flags1 & MEM_TypeMask);
> 				assert(pIn1!=pIn3);
> 			}
> -			if ((flags3 & MEM_Str)==0 && (flags3 & (MEM_Int|MEM_Real))!=0) {
> +			if ((flags3 & MEM_Str)==0 && (flags3 & (MEM_Int|MEM_Real|MEM_UInt))!=0) {
> 				testcase( pIn3->flags & MEM_Int);
> +				testcase( pIn3->flags & MEM_UInt);
> 				testcase( pIn3->flags & MEM_Real);
> 				sqlVdbeMemStringify(pIn3, 1);
> 				testcase( (flags3&MEM_Dyn) != (pIn3->flags&MEM_Dyn));
> 
> 
> @@ -3533,6 +3592,7 @@ case OP_SeekGT: {       /* jump, in3 */
> 	UnpackedRecord r;  /* The key to seek for */
> 	int nField;        /* Number of columns or fields in the key */
> 	i64 iKey;          /* The id we are to seek to */
> +	u32 key_type = MEM_Int;  /* Type of the iKey, integer by default */
> 	int eqOnly;        /* Only interested in == results */
> 	int reg_ipk=0;     /* Register number which holds IPK. */
> 
> @@ -3561,12 +3621,14 @@ case OP_SeekGT: {       /* jump, in3 */
> 		 * the seek, so convert it.
> 		 */
> 		pIn3 = &aMem[reg_ipk];
> -		if ((pIn3->flags & (MEM_Int|MEM_Real|MEM_Str))==MEM_Str) {
> +		if ((pIn3->flags & (MEM_Int|MEM_Real|MEM_Str|MEM_UInt))==MEM_Str) {
> 			mem_apply_numeric_type(pIn3, 0);
> +			key_type = pIn3->flags & MEM_PURE_TYPE_MASK;
> 		}
> 		int64_t i;
> -		if ((pIn3->flags & MEM_Int) == MEM_Int) {
> +		if ((pIn3->flags & (MEM_Int | MEM_UInt)) != 0) {
> 			i = pIn3->u.i;
> +			key_type = pIn3->flags & MEM_PURE_TYPE_MASK;
> 		} else if ((pIn3->flags & MEM_Real) == MEM_Real) {
> 			if (pIn3->u.r > INT64_MAX)
> 				i = INT64_MAX;
> @@ -3585,7 +3647,7 @@ case OP_SeekGT: {       /* jump, in3 */
> 		/* If the P3 value could not be converted into an integer without
> 		 * loss of information, then special processing is required...
> 		 */
> -		if ((pIn3->flags & MEM_Int)==0) {
> +		if ((pIn3->flags & (MEM_Int | MEM_UInt))==0) {
> 			if ((pIn3->flags & MEM_Real)==0) {
> 				/* If the P3 value cannot be converted into any kind of a number,
> 				 * then the seek is not possible, so jump to P2
> 
> 
> diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
> index 61b7d58b2..5b2d129f7 100644
> --- a/src/box/sql/vdbeInt.h
> +++ b/src/box/sql/vdbeInt.h
> 
> @@ -234,7 +235,7 @@ struct Mem {
> #define MEM_Frame     0x0080	/* Value is a VdbeFrame object */
> #define MEM_Undefined 0x0100	/* Value is undefined */
> #define MEM_Cleared   0x0200	/* NULL set by OP_Null, not from data */
> -#define MEM_TypeMask  0x83ff	/* Mask of type bits */
> +#define MEM_TypeMask  0x283ff	/* Mask of type bits */
> 
> /* Whenever Mem contains a valid string or blob representation, one of
>  * the following flags must be set to determine the memory management
> @@ -248,6 +249,8 @@ struct Mem {
> #define MEM_Agg       0x4000	/* Mem.z points to an agg function context */
> #define MEM_Zero      0x8000	/* Mem.i contains count of 0s appended to blob */
> #define MEM_Subtype   0x10000	/* Mem.eSubtype is valid */
> +#define MEM_UInt      0x20000	/* Value is unsigned integer. */

Could you swap or shift values of memory type, to make them
fit into 8 bit mask? To separate *real* types of memory cell from
auxiliary flags.

> +
> #ifdef SQL_OMIT_INCRBLOB
> #undef MEM_Zero
> #define MEM_Zero 0x0000
> @@ -259,7 +262,9 @@ struct Mem {
>  * auxiliary flags.
>  */
> enum {
> -	MEM_PURE_TYPE_MASK = 0x1f
> +	MEM_PURE_TYPE_MASK = MEM_Null | MEM_Str | MEM_Int |
> +			     MEM_Real | MEM_Blob | MEM_Bool |
> +			     MEM_UInt
> };
> 
> 
> @@ -273,7 +278,7 @@ enum {
>  * Clear any existing type flags from a Mem and replace them with f
>  */
> #define MemSetTypeFlag(p, f) \
> -   ((p)->flags = ((p)->flags&~(MEM_TypeMask|MEM_Zero))|f)
> +   ((p)->flags = ((p)->flags&~(MEM_TypeMask|MEM_Zero|MEM_UInt))|f)

Why do we consider MEM_UInt as different from other types?

> diff --git a/src/box/sql/vdbeapi.c b/src/box/sql/vdbeapi.c
> index d7e89073e..e138c5abd 100644
> --- a/src/box/sql/vdbeapi.c
> +++ b/src/box/sql/vdbeapi.c
> @@ -215,7 +215,8 @@ int
> sql_value_int(sql_value * pVal)
> {
> 	int64_t i;
> -	sqlVdbeIntValue((Mem *) pVal, &i);
> +	bool is_unsigned = false;
> +	sqlVdbeIntValue((Mem *) pVal, &i, &is_unsigned);

Add an assertion verifying that is_unsigned is false after
calling sqlVdbeIntValue(). Otherwise, value must be stored
as uint64.

> @@ -223,7 +224,17 @@ sql_int64
> sql_value_int64(sql_value * pVal)
> {
> 	int64_t i;
> -	sqlVdbeIntValue((Mem *) pVal, &i);
> +	bool is_unsigned = false;
> +	sqlVdbeIntValue((Mem *) pVal, &i, &is_unsigned);

The same.

> +	return i;
> +}
> +
> +sql_uint64
> +sql_value_uint64(sql_value * pVal)
> +{
> +	uint64_t i;
> +	bool is_unsigned = false;
> +	sqlVdbeIntValue((Mem *) pVal, (int64_t *)&i, &is_unsigned);

assert(I >= 0);

> @@ -246,41 +257,38 @@ sql_value_text(sql_value * pVal)
> int
> sql_value_type(sql_value * pVal)
> {
> 
> -	return aType[pVal->flags & MEM_PURE_TYPE_MASK];
> +	/*
> +	 * The order of the comparisons is essential.
> +	 * Once the value change its type,
> +	 * e.g. string or blob to integer/real
> +	 * the dynamic data are not disposed,
> +	 * they are kept together with scalar value.
> +	 * Thus the field 'flags' accumulates several
> +	 * bits responsible for data type.
> +	 * So the right order is following:

Could you please illustrate this with an example?
I really doubt that type of value should depend on
order of fetching. Moreover, I believe that these flags
must be incompatible: how value can feature at least
two of these flags?

> +	 * - Null
> +	 * - Unsigned integer
> +	 * - Signed integer
> +	 * - Real
> +	 * - Bool
> +	 * - String
> +	 * - Blob
> +	 */
> +	if ((pVal->flags & MEM_Null) != 0)
> +		return SQL_NULL;
> +	if ((pVal->flags & MEM_UInt) != 0)
> +		return SQL_UNSIGNED;
> +	if ((pVal->flags & MEM_Int) != 0)
> +		return SQL_INTEGER;
> +	if ((pVal->flags & MEM_Real) != 0)
> +		return SQL_FLOAT;
> +	if ((pVal->flags & MEM_Bool) != 0)
> +		return SQL_INTEGER;
> +	if ((pVal->flags & MEM_Str) != 0)
> +		return SQL_TEXT;
> +	if ((pVal->flags & MEM_Blob) != 0)
> +		return SQL_BLOB;
> +	return SQL_NULL;	/* Unknown type */

NULL is not an unknown type. I would add unreachable() assertion here.

> /* Make a copy of an sql_value object
> @@ -410,6 +418,15 @@ sql_result_int64(sql_context * pCtx, i64 iVal)
> 	sqlVdbeMemSetInt64(pCtx->pOut, iVal);
> }
> 
> +void
> +sql_result_uint64(sql_context * pCtx, u64 iVal)

I suppose this function is always called with unsigned
argument, so you can remove branching and always
store it as uint.

> +{
> +	if (iVal > INT64_MAX)
> +		sqlVdbeMemSetUInt64(pCtx->pOut, iVal);
> +	else
> +		sqlVdbeMemSetInt64(pCtx->pOut, (i64)iVal);
> +}
> +
> 
> 
> diff --git a/src/box/sql/vdbeaux.c b/src/box/sql/vdbeaux.c
> index 0cc3c1487..3fbce2f01 100644
> --- a/src/box/sql/vdbeaux.c
> +++ b/src/box/sql/vdbeaux.c
> 
> @@ -3357,6 +3364,44 @@ sqlIntFloatCompare(i64 i, double r)
> 	}
> }
> 
> +/*
> + * Do a comparison between a 64-bit signed integer and a 64-bit
> + * unsigned integer.
> + * Return negative, zero, or positive if the first (i64) is less than,
> + * equal to, or greater than the second (u64).
> + */
> +static int
> +sqlIntUIntCompare(i64 i, u64 u)

If you introduce new functions, please use Tarantool code style.

> +{
> +	if (i < 0)
> +		return -1;
> +	if ((u64)i < u)
> +		return -1;
> +	else if ((u64)i > u)
> +		return +1;
> +	else
> +		return 0;
> +}
> +
> +/*
> + * Do a comparison between a 64-bit unsigned integer and a 64-bit
> + * floating-point number.
> + * Return negative, zero, or positive if the first (u64)
> + * is less than, equal to, or greater than the second (double).
> + */
> +static int
> +sqlUIntFloatCompare(u64 u, double r)
> +{
> +	if (r < 0.0)
> +		return +1;
> +	double s = (double)u;
> +	if (s < r)
> +		return -1;
> +	if (s > r)
> +		return +1;
> +	return 0;
> +}
> +
> /*
>  * Compare the values contained by the two memory cells, returning
>  * negative, zero or positive if pMem1 is less than, equal to, or greater
> @@ -3385,7 +3430,7 @@ sqlMemCompare(const Mem * pMem1, const Mem * pMem2, const struct coll * pColl)
> 
> 	/* At least one of the two values is a number
> 	 */
> -	if (combined_flags & (MEM_Int | MEM_Real)) {
> +	if (combined_flags & (MEM_Int | MEM_Real | MEM_UInt)) {
> 		if ((f1 & f2 & MEM_Int) != 0) {
> 			if (pMem1->u.i < pMem2->u.i)
> 				return -1;
> @@ -3393,6 +3438,13 @@ sqlMemCompare(const Mem * pMem1, const Mem * pMem2, const struct coll * pColl)
> 				return +1;
> 			return 0;
> 		}
> +		if ((f1 & f2 & MEM_UInt) != 0) {
> +			if (pMem1->u.u < pMem2->u.u)
> +				return -1;
> +			if (pMem1->u.u > pMem2->u.u)
> +				return +1;
> +			return 0;
> +		}
> 		if ((f1 & f2 & MEM_Real) != 0) {
> 			if (pMem1->u.r < pMem2->u.r)
> 				return -1;
> @@ -3404,6 +3456,20 @@ sqlMemCompare(const Mem * pMem1, const Mem * pMem2, const struct coll * pColl)
> 			if ((f2 & MEM_Real) != 0) {
> 				return sqlIntFloatCompare(pMem1->u.i,
> 							      pMem2->u.r);
> +			} else if ((f2 & MEM_UInt) != 0){
> +				return sqlIntUIntCompare(pMem1->u.i,
> +							pMem2->u.u);
> +			} else {
> +				return -1;
> +			}
> +		}
> +		if ((f1 & MEM_UInt) != 0) {
> +			if ((f2 & MEM_Real) != 0) {
> +				return sqlUIntFloatCompare(pMem1->u.u,
> +							  pMem2->u.r);
> +			} else if ((f2 & MEM_Int) != 0){
> +				return -sqlIntUIntCompare(pMem2->u.i,
> +							pMem1->u.u);
> 			} else {
> 				return -1;
> 			}
> @@ -3411,7 +3477,10 @@ sqlMemCompare(const Mem * pMem1, const Mem * pMem2, const struct coll * pColl)
> 		if ((f1 & MEM_Real) != 0) {
> 			if ((f2 & MEM_Int) != 0) {
> 				return -sqlIntFloatCompare(pMem2->u.i,
> -							       pMem1->u.r);
> +							   pMem1->u.r);
> +			} else if ((f2 & MEM_UInt) != 0) {
> +				return -sqlUIntFloatCompare(pMem2->u.u,
> +							   pMem1->u.r);
> 			} else {
> 				return -1;
> 			}
> @@ -3567,13 +3636,24 @@ sqlVdbeCompareMsgpack(const char **key1,
> 			goto do_int;
> 		}
> 	case MP_UINT:{
> -			uint64_t v = mp_decode_uint(&aKey1);
> -			if (v > INT64_MAX) {
> -				mem1.u.r = (double)v;
> -				goto do_float;
> +			mem1.u.u = mp_decode_uint(&aKey1);
> +
> +			if (pKey2->flags & MEM_Int) {
> +				rc = -sqlIntUIntCompare(pKey2->u.i,
> +						       mem1.u.u);
> +			} else if (pKey2->flags & MEM_Real) {
> +				rc = sqlUIntFloatCompare(mem1.u.u,
> +							pKey2->u.r);
> +			} else if (pKey2->flags & MEM_UInt) {
> +				if (mem1.u.u < pKey2->u.u) {
> +					rc = -1;
> +				} else if (mem1.u.u > pKey2->u.u) {
> +					rc = +1;
> +				}
> +			} else {
> +				rc = (pKey2->flags & MEM_Null) ? +1 : -1;
> 			}
> -			mem1.u.i = v;
> -			goto do_int;
> +			break;
> 		}
> 	case MP_INT:{
> 			mem1.u.i = mp_decode_int(&aKey1);
> @@ -3587,6 +3667,9 @@ sqlVdbeCompareMsgpack(const char **key1,
> 			} else if (pKey2->flags & MEM_Real) {
> 				rc = sqlIntFloatCompare(mem1.u.i,
> 							    pKey2->u.r);
> +			} else if (pKey2->flags & MEM_UInt) {
> +				rc = sqlIntUIntCompare(mem1.u.i,
> +							pKey2->u.u);
> 			} else {
> 				rc = (pKey2->flags & MEM_Null) ? +1 : -1;
> 			}
> diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
> index 074ff8c96..a30b131a6 100644
> --- a/src/box/sql/vdbemem.c
> +++ b/src/box/sql/vdbemem.c
> 
> @@ -411,39 +414,36 @@ sqlVdbeMemRelease(Mem * p)
> }
> 
> static int
> -doubleToInt64(double r, int64_t *i)
> +doubleToInt64(double r, int64_t *i, bool* is_unsigned)
> {
> -#ifdef SQL_OMIT_FLOATING_POINT
> -	/* When floating-point is omitted, double and int64 are the same thing */
> -	*i = r;
> -	return 0;
> -#else
> -	/*
> -	 * Many compilers we encounter do not define constants for the
> -	 * minimum and maximum 64-bit integers, or they define them
> -	 * inconsistently.  And many do not understand the "LL" notation.
> -	 * So we define our own static constants here using nothing
> -	 * larger than a 32-bit integer constant.
> -	 */
> -	static const int64_t maxInt = LARGEST_INT64;
> -	static const int64_t minInt = SMALLEST_INT64;
> +	static const int64_t minInt = INT64_MIN;
> +	static const uint64_t maxUInt = UINT64_MAX;
> +	static const int64_t maxInt = INT64_MAX;

Please, don’t use these strange aliases.

> @@ -620,10 +653,15 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type)
> 		return 0;
> 	case FIELD_TYPE_INTEGER:
> 		if ((pMem->flags & MEM_Blob) != 0) {
> +			bool is_unsigned = false;
> 			if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i,
> +				       &is_unsigned,
> 				       pMem->n) != 0)
> 				return -1;
> -			MemSetTypeFlag(pMem, MEM_Int);
> +			if (is_unsigned)
> +				MemSetTypeFlag(pMem, MEM_UInt);
> +			else
> +				MemSetTypeFlag(pMem, MEM_Int);

Again, you save uint in pMem->u.i. In some other cases, for instance
in mpstream_encode_vdbe_mem() and in  sqlVdbeRealValue you fetch
uint from u.i. I consider this pattern as bad and unsafe.

> @@ -711,6 +749,14 @@ vdbeReleaseAndSetInt64(Mem * pMem, i64 val)
> 	pMem->flags = MEM_Int;
> }
> 
> +static SQL_NOINLINE void
> +vdbeReleaseAndSetUInt64(Mem * pMem, u64 val)
> +{
> +	sqlVdbeMemSetNull(pMem);
> +	pMem->u.u = val;
> +	pMem->flags = MEM_UInt;
> +}
> +
> /*
>  * Delete any previous value and set the value stored in *pMem to val,
>  * manifest type INTEGER.
> @@ -726,6 +772,17 @@ sqlVdbeMemSetInt64(Mem * pMem, i64 val)
> 	}
> }
> 
> +void
> +sqlVdbeMemSetUInt64(Mem * pMem, u64 val)
> +{
> +	if (VdbeMemDynamic(pMem)) {
> +		vdbeReleaseAndSetUInt64(pMem, val);
> +	} else {
> +		pMem->u.u = val;
> +		pMem->flags = MEM_UInt;
> +	}
> +}
> +
> 
> diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
> index 889fc5867..1f328edb4 100755
> --- a/test/sql-tap/func.test.lua
> +++ b/test/sql-tap/func.test.lua
> @@ -1,6 +1,6 @@
> #!/usr/bin/env tarantool
> test = require("sqltester")
> -test:plan(14586)
> +test:plan(14587)
> 
> --!./tcltestrunner.lua
> -- 2001 September 15
> @@ -919,7 +919,7 @@ test:do_execsql_test(
>                             UNION ALL SELECT -9223372036854775807)
>     ]], {
>         -- <func-8.7>
> -        "real"
> +        “integer"

Add test . I guess original idea was to test smth like this:

tarantool> SELECT sum(x) FROM (SELECT '18446744073' || '709551616' AS x UNION ALL SELECT -9223372036854775807 UNION ALL SELECT -9223372036854775807)
---
- - [0]
...

tarantool> SELECT typeof(sum(x)) FROM (SELECT '18446744073' || '709551616' AS x UNION ALL SELECT -9223372036854775807 UNION ALL SELECT -9223372036854775807)
---
- - ['real']
...






More information about the Tarantool-patches mailing list