[tarantool-patches] Re: [PATCH 07/13] sql: arithmetic functions support big integers

Mon Apr 1 23:43:27 MSK 2019

On 25.03.2019 18:13, n.pettik wrote:
> 
>> Makes arithmetic functions accept arguments with
>> values in the range [2^63, 2^64).
>> ---
>> src/box/sql/func.c    |   2 +-
>> src/box/sql/sqlInt.h  |  23 +++-
>> src/box/sql/util.c    | 236 ++++++++++++++++++++++++++++++++----------
>> src/box/sql/vdbe.c    |  36 ++++---
>> src/box/sql/vdbeInt.h |   2 +-
>> 5 files changed, 223 insertions(+), 76 deletions(-)
>>
>> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
>> index 9b1d7df9a..7f8e3f04e 100644
>> --- a/src/box/sql/sqlInt.h
>> +++ b/src/box/sql/sqlInt.h
>> @@ -4383,9 +4383,26 @@ Expr *sqlExprAddCollateString(Parse *, Expr *, const char *);
>> Expr *sqlExprSkipCollate(Expr *);
>> int sqlCheckIdentifierName(Parse *, char *);
>> void sqlVdbeSetChanges(sql *, int);
>> -int sqlAddInt64(i64 *, i64);
>> -int sqlSubInt64(i64 *, i64);
>> -int sqlMulInt64(i64 *, i64);
>> +
>> +enum arithmetic_result {
>> +	/* The result fits the signed 64-bit integer */
>> +	ATHR_SIGNED,
>> +	/* The result is positive and fits the
>> +	 * unsigned 64-bit integer
>> +	 */
>> +	ATHR_UNSIGNED,
>> +	/* The operation causes an overflow */
>> +	ATHR_OVERFLOW,
>> +	/* The operation causes division by zero */
>> +	ATHR_DIVBYZERO
>> +};
>> +
>> +enum arithmetic_result sqlAddInt64(i64 *, bool, i64, bool);
>> +enum arithmetic_result sqlSubInt64(i64 *, bool, i64, bool);
>> +enum arithmetic_result sqlMulInt64(i64 *, bool, i64, bool);
>> +enum arithmetic_result sqlDivInt64(i64 *, bool, i64, bool);
>> +enum arithmetic_result sqlRemInt64(i64 *, bool, i64, bool);
> 
> Since you’ve already fixed signature of these functions,
> please make them follow Tarantool code style:
> 
> enum arithmetic_result
> sql_add_int64(int64_t *lhs, bool is_lhs_signed, …);

Done.

> 
> What is more, personally I would apply the same fix as for atoi functions:
> make them return -1 in case of overflow or division by 0 and set
> diag message right in these functions; use enum to represent their args.

It's a bad practice to write diagnostic from the low-level functions.
Using a single enum as a return value gives a compact readable code.

> 
>> +
>> int sqlAbsInt32(int);
>> #ifdef SQL_ENABLE_8_3_NAMES
>> void sqlFileSuffix3(const char *, char *);
>> diff --git a/src/box/sql/util.c b/src/box/sql/util.c
>> index be77f72f8..3786c5083 100644
>> --- a/src/box/sql/util.c
>> +++ b/src/box/sql/util.c
>> @@ -1249,74 +1249,202 @@ sqlSafetyCheckSickOrOk(sql * db)
>> }
>>
>> +/*
>> + * Attempt to add, substract, or multiply the 64-bit value iB against
>> + * the other 64-bit integer at *pA and store the result in *pA.
>> + * Return ATHR_SIGNED or ATHR_UNSIGNED on success.
>> + * Or if the operation would have resulted in an
>> + * overflow, leave *pA unchanged and return ATHR_OVERFLOW.
>> + */
>> +enum arithmetic_result
>> +sqlAddInt64(i64 * pA, bool is_signedA, i64 iB, bool is_signedB)
>> {
>> 	i64 iA = *pA;
>> -	testcase(iA == 0);
>> -	testcase(iA == 1);
>> -	testcase(iB == -1);
>> -	testcase(iB == 0);
>> -	if (iB >= 0) {
>> -		testcase(iA > 0 && LARGEST_INT64 - iA == iB);
>> -		testcase(iA > 0 && LARGEST_INT64 - iA == iB - 1);
>> -		if (iA > 0 && LARGEST_INT64 - iA < iB)
>> -			return 1;
>> +
>> +	bool is_negA = iA < 0 && is_signedA;
>> +	bool is_negB = iB < 0 && is_signedB;
>> +
>> +	/* Make sure we've got only one combination of
>> +	 * positive and negative operands
>> +	 */
> 
> Nit: note that correct way of comment formatting is:
> 
> /*
>   * Make sure we've got only one combination of
>   * positive and negative operands.
>   */
> 

Fixed.

>> /*
>> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
>> index ea9d9d98f..d4bd845fb 100644
>> --- a/src/box/sql/vdbe.c
>> +++ b/src/box/sql/vdbe.c
>> @@ -1672,28 +1672,29 @@ case OP_Remainder: {           /* same as TK_REM, in1, in2, out3 */
>> 	if ((type1 & type2 & MEM_Int)!=0) {
>> 		iA = pIn1->u.i;
>> 		iB = pIn2->u.i;
>> +		bool is_signedA = (type1 & MEM_Unsigned) == 0;
>> +		bool is_signedB = (type2 & MEM_Unsigned) == 0;
>> 		bIntint = 1;
>> +		enum arithmetic_result arr;
>> 		switch( pOp->opcode) {
>> -		case OP_Add:       if (sqlAddInt64(&iB,iA)) goto integer_overflow; break;
>> -		case OP_Subtract:  if (sqlSubInt64(&iB,iA)) goto integer_overflow; break;
>> -		case OP_Multiply:  if (sqlMulInt64(&iB,iA)) goto integer_overflow; break;
>> -		case OP_Divide: {
>> -			if (iA == 0)
>> -				goto division_by_zero;
>> -			if (iA==-1 && iB==SMALLEST_INT64) goto integer_overflow;
>> -			iB /= iA;
>> -			break;
>> +		case OP_Add:       arr = sqlAddInt64(&iB, is_signedA, iA, is_signedB); break;
>> +		case OP_Subtract:  arr = sqlSubInt64(&iB, is_signedA, iA, is_signedB); break;
>> +		case OP_Multiply:  arr = sqlMulInt64(&iB, is_signedA, iA, is_signedB); break;
>> +		case OP_Divide:    arr = sqlDivInt64(&iB, is_signedA, iA, is_signedB); break;
>> +		default: 	   arr = sqlRemInt64(&iB, is_signedA, iA, is_signedB); break;
> 
> SQL ANSI specifications doesn’t provide description of unsigned behaviour.
> But for example in C there is no unsigned overflow, because if result can’t
> be represented by unsigned range, it is truncated to modulo (MAX_UINT + 1 == 1).
> Should we follow this way? IDK, it needs discussion involving other team members.

Good point.

> 
>