[tarantool-patches] Re: [PATCH 3/6] sql: refactor arithmetic operations to support unsigned ints

Mon Jul 1 17:21:29 MSK 2019

>> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
>> index f4c1cbcca..457c9b92b 100644
>> --- a/src/box/sql/func.c
>> +++ b/src/box/sql/func.c
>> @@ -1620,9 +1622,13 @@ sum_step(struct sql_context *context, int argc, sql_value **argv)
>> 	p->cnt++;
>> 	if (type == MP_INT || type == MP_UINT) {
>> 		int64_t v = sql_value_int64(argv[0]);
>> -		p->rSum += v;
>> +		if (type == MP_INT)
>> +			p->rSum += v;
>> +		else
>> +			p->rSum += (uint64_t) v;
>> 		if ((p->approx | p->overflow) == 0 &&
>> -		    sqlAddInt64(&p->iSum, v) != 0) {
>> +		    sql_add_int(p->iSum, p->is_neg, v, v < 0, &p->iSum,
> 
> 1. As I understand, now sql_value_int64() returns an integer buffer,
> and its result can't be compared with 0. How do you do it here with
> 'v’?

It is fixed in the next patch. Fix itself is obvious. Moved this change
to current patch.

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 457c9b92b..77340c7fb 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1626,8 +1626,9 @@ sum_step(struct sql_context *context, int argc, sql_value **argv)
                        p->rSum += v;
                else
                        p->rSum += (uint64_t) v;
+
                if ((p->approx | p->overflow) == 0 &&
-                   sql_add_int(p->iSum, p->is_neg, v, v < 0, &p->iSum,
+                   sql_add_int(p->iSum, p->is_neg, v, type == MP_INT, &p->iSum,
                                &p->is_neg) != 0) {
                        p->overflow = 1;

>> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
>> index da17e24ed..c0e2ab699 100644
>> --- a/src/box/sql/sqlInt.h
>> +++ b/src/box/sql/sqlInt.h
>> @@ -4461,10 +4461,37 @@ Expr *sqlExprAddCollateString(Parse *, Expr *, const char *);
>> +int
>> +sql_add_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> +	    int64_t *res, bool *is_res_neg);
>> +
>> +int
>> +sql_sub_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> +	    int64_t *res, bool *is_res_neg);
>> +
>> +int
>> +sql_mul_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> +	    int64_t *res, bool *is_res_neg);
>> +
>> +int
>> +sql_div_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> +	    int64_t *res, bool *is_res_neg);
>> +
>> +int
>> +sql_rem_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> +	    int64_t *res, bool *is_res_neg);
> 
> 2. That function is usually called 'mod', not 'rem’.

Opcode is called OP_Reminder, so to avoid confusions
I guess it would be better to keep _rem_ name. If you
insist I will rename it.

>> diff --git a/src/box/sql/util.c b/src/box/sql/util.c
>> index 2e7c298e1..ee6a83ad5 100644
>> --- a/src/box/sql/util.c
>> +++ b/src/box/sql/util.c
>> @@ -1194,89 +1194,196 @@ sqlSafetyCheckSickOrOk(sql * db)
>> 	}
>> }
>> 
>> -/*
>> - * Attempt to add, substract, or multiply the 64-bit signed value iB against
>> - * the other 64-bit signed integer at *pA and store the result in *pA.
>> - * Return 0 on success.  Or if the operation would have resulted in an
>> - * overflow, leave *pA unchanged and return 1.
>> - */
>> int
>> -sqlAddInt64(i64 * pA, i64 iB)
>> +sql_add_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> +	    int64_t *res, bool *is_res_neg)
>> {
>> -	i64 iA = *pA;
>> -	testcase(iA == 0);
>> -	testcase(iA == 1);
>> -	testcase(iB == -1);
>> -	testcase(iB == 0);
>> -	if (iB >= 0) {
>> -		testcase(iA > 0 && LARGEST_INT64 - iA == iB);
>> -		testcase(iA > 0 && LARGEST_INT64 - iA == iB - 1);
>> -		if (iA > 0 && LARGEST_INT64 - iA < iB)
>> -			return 1;
>> -	} else {
>> -		testcase(iA < 0 && -(iA + LARGEST_INT64) == iB + 1);
>> -		testcase(iA < 0 && -(iA + LARGEST_INT64) == iB + 2);
>> -		if (iA < 0 && -(iA + LARGEST_INT64) > iB + 1)
>> -			return 1;
>> +	/* Addition of two negative integers. */
>> +	if (is_lhs_neg && is_rhs_neg) {
>> +		assert(lhs < 0 && rhs < 0);
>> +		/* This is the same as (lhs + rhs) < INT64_MIN */
>> +		if (-(lhs + INT64_MAX) > rhs + 1)
>> +				return -1;
> 
> 3. Why so complex? Why not 'lhs < INT64_MIN - rhs'? I did it, and the
> tests pass. If they should not, then please, provide a test.

Well, it is the same condition. Let’s use your variant:

diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index 3cbcda04f..73d7b95b6 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -1199,7 +1199,7 @@ sql_add_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
        if (is_lhs_neg && is_rhs_neg) {
                assert(lhs < 0 && rhs < 0);
                /* This is the same as (lhs + rhs) < INT64_MIN */
-               if (-(lhs + INT64_MAX) > rhs + 1)
+               if (lhs < INT64_MIN - rhs)
                                return -1;
                *is_res_neg = true;
                *res = lhs + rhs;

>> +	/*
>> +	 * Make sure we've got only one combination of
>> +	 * positive and negative operands.
>> +	 */
>> +	if (is_lhs_neg) {
>> +		SWAP(is_lhs_neg, is_rhs_neg);
>> +		SWAP(lhs, rhs);
>> +	}
>> +	assert(! is_lhs_neg && is_rhs_neg);
> 
> 4. You have spent more code lines on the SWAP and its
> explanation, than could on avoidance of this SWAP. In
> addition, SWAP version is slower obviously.

Ok, np, refactored:

@@ -1216,16 +1216,8 @@ sql_add_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
                *res = lhs + rhs;
                return 0;
        }
-       /*
-        * Make sure we've got only one combination of
-        * positive and negative operands.
-        */
-       if (is_lhs_neg) {
-               SWAP(is_lhs_neg, is_rhs_neg);
-               SWAP(lhs, rhs);
-       }
-       assert(! is_lhs_neg && is_rhs_neg);
-       *is_res_neg = (uint64_t)(-rhs) > (uint64_t) lhs;
+       *is_res_neg = is_rhs_neg ? (uint64_t)(-rhs) > (uint64_t) lhs :
+                                  (uint64_t)(-lhs) > (uint64_t) rhs;
        *res = lhs + rhs;
        return 0;
 }

> 
> -	/*
> -	 * Make sure we've got only one combination of
> -	 * positive and negative operands.
> -	 */
> -	if (is_lhs_neg) {
> -		SWAP(is_lhs_neg, is_rhs_neg);
> -		SWAP(lhs, rhs);
> -	}
> -	assert(! is_lhs_neg && is_rhs_neg);
> -	*is_res_neg = (uint64_t)(-rhs) > (uint64_t) lhs;
> +	if (is_rhs_neg)
> +		*is_res_neg = (uint64_t)(-rhs) > (uint64_t) lhs;
> +	else
> +		*is_res_neg = (uint64_t)(-lhs) > (uint64_t) rhs;
> 	*res = lhs + rhs;
> 	return 0;

>> int
>> -sqlSubInt64(i64 * pA, i64 iB)
>> +sql_sub_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> +	    int64_t *res, bool *is_res_neg)
>> {
>> -	testcase(iB == SMALLEST_INT64 + 1);
>> -	if (iB == SMALLEST_INT64) {
>> -		testcase((*pA) == (-1));
>> -		testcase((*pA) == 0);
>> -		if ((*pA) >= 0)
>> -			return 1;
>> -		*pA -= iB;
>> +	if (!is_lhs_neg && !is_rhs_neg) {
>> +		uint64_t u_lhs = (uint64_t) lhs;
>> +		uint64_t u_rhs = (uint64_t) rhs;
>> +		/*
>> +		 * Unsigned arithmetic has no overflows, so to
>> +		 * check if lhs is less than rhs we can compare
>> +		 * result of their subtraction with lhs.
> 
> 5. Why not to compare just the values? They are of the same
> sign, use '<‘.

Thx, fixed:

@@ -1237,19 +1229,15 @@ sql_sub_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
        if (!is_lhs_neg && !is_rhs_neg) {
                uint64_t u_lhs = (uint64_t) lhs;
                uint64_t u_rhs = (uint64_t) rhs;
-               /*
-                * Unsigned arithmetic has no overflows, so to
-                * check if lhs is less than rhs we can compare
-                * result of their subtraction with lhs.
-                */
-               *is_res_neg = u_lhs < (uint64_t)(u_lhs - u_rhs);
-               if (! *is_res_neg) {
+               if (u_lhs >= u_rhs) {
+                       *is_res_neg = false;
                        *res = u_lhs - u_rhs;
                        return 0;
                }
                if (u_rhs > (uint64_t) INT64_MAX + 1 &&
                    u_lhs < (uint64_t)(u_rhs - INT64_MAX - 1))
                        return -1;
+               *is_res_neg = true;
                *res = lhs - rhs;
                return 0;
        }

>> +		 */
>> +		*is_res_neg = u_lhs < (uint64_t)(u_lhs - u_rhs);
>> +		if (! *is_res_neg) {
>> +			*res = u_lhs - u_rhs;
>> +			return 0;
>> +		}
>> +		if (u_rhs > (uint64_t) INT64_MAX + 1 &&
>> +		    u_lhs < (uint64_t)(u_rhs - INT64_MAX - 1))
> 
> 6. Too complex. You wanted to check u_lhs - u_rhs < INT64_MIN.
> Then check it, in a one comparison.
> 
>    u_lhs - u_rhs < INT64_MIN
> 
> But u_lhs is < u_rhs, so swap them and change the signs:
> 
>    u_rhs - u_lhs > (uint64_t) INT64_MAX + 1
> 
> No overflows are possible here.

Thx! Applied this diff:

-               if (u_rhs > (uint64_t) INT64_MAX + 1 &&
-                   u_lhs < (uint64_t)(u_rhs - INT64_MAX - 1))
+               if (u_rhs - u_lhs > (uint64_t) INT64_MAX + 1)
                        return -1;

>> int
>> -sqlMulInt64(i64 * pA, i64 iB)
>> +sql_mul_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> +	    int64_t *res, bool *is_res_neg)
>> {
>> -	i64 iA = *pA;
>> -	if (iB > 0) {
>> -		if (iA > LARGEST_INT64 / iB)
>> -			return 1;
>> -		if (iA < SMALLEST_INT64 / iB)
>> -			return 1;
>> -	} else if (iB < 0) {
>> -		if (iA > 0) {
>> -			if (iB < SMALLEST_INT64 / iA)
>> -				return 1;
>> -		} else if (iA < 0) {
>> -			if (iB == SMALLEST_INT64)
>> -				return 1;
>> -			if (iA == SMALLEST_INT64)
>> -				return 1;
>> -			if (-iA > LARGEST_INT64 / -iB)
>> -				return 1;
>> -		}
>> +	if (lhs == 0 || rhs == 0) {
>> +		*res = 0;
>> +		*is_res_neg = false;
>> +		return 0;
>> +	}
>> +	/*
>> +	 * Multiplication of integers with same sign leads to
>> +	 * unsigned result.
>> +	 */
>> +	if (! (is_lhs_neg ^ is_rhs_neg)) {
> 
> 7. Dear God, what is it? What about '!=' operator? Is it
> disabled, or !^ is faster?

Ok, replaced:

@@ -1282,7 +1269,7 @@ sql_mul_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
         * Multiplication of integers with same sign leads to
         * unsigned result.
         */
-       if (! (is_lhs_neg ^ is_rhs_neg)) {
+       if (is_lhs_neg == is_rhs_neg) {
                uint64_t u_res = is_lhs_neg ?
                                 (uint64_t) (-lhs) * (uint64_t) (-rhs) :
                                 (uint64_t) lhs * (uint64_t) (rhs);

>> +		uint64_t u_res = is_lhs_neg ?
>> +				 (uint64_t) (-lhs) * (uint64_t) (-rhs) :
>> +				 (uint64_t) lhs * (uint64_t) (rhs);
>> +		/*
>> +		 * Overflow detection is quite primitive due to
>> +		 * the absence of overflow with unsigned values:
>> +		 * lhs * rhs == res --> rhs == res / lhs;
>> +		 * If this predicate is false, then result was
>> +		 * reduced modulo UINT_MAX + 1.
>> +		 */
>> +		if ((is_lhs_neg && u_res / (uint64_t) (-lhs) !=
>> +					   (uint64_t) (-rhs)) ||
>> +		    (!is_lhs_neg && u_res / (uint64_t) lhs != (uint64_t) rhs))
>> +			return -1;
>> +		*is_res_neg = false;
>> +		*res = u_res;
>> +		return 0;
>> +	}
>> +	/*
>> +	 * Make sure we've got only one combination of
>> +	 * positive and negative operands.
>> +	 */
>> +	if (is_lhs_neg) {
>> +		SWAP(is_lhs_neg, is_rhs_neg);
>> +		SWAP(lhs, rhs);
>> 	}
>> -	*pA = iA * iB;
>> +	assert(! is_lhs_neg && is_rhs_neg);
>> +	uint64_t u_rhs = (uint64_t) (-rhs);
>> +	uint64_t u_res = u_rhs * (uint64_t) lhs;
>> +	if (u_res / u_rhs != (uint64_t) lhs ||
>> +	    u_rhs * (uint64_t) lhs > (uint64_t) INT64_MAX + 1)
> 
> 8. You have already calculated that multiplication, it is
> u_res.

@@ -1313,7 +1300,7 @@ sql_mul_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
        uint64_t u_rhs = (uint64_t) (-rhs);
        uint64_t u_res = u_rhs * (uint64_t) lhs;
        if (u_res / u_rhs != (uint64_t) lhs ||
-           u_rhs * (uint64_t) lhs > (uint64_t) INT64_MAX + 1)
+           u_res > (uint64_t) INT64_MAX + 1)
                return -1;
        *is_res_neg = true;
        *res = lhs * rhs;

>> +int
>> +sql_div_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> +	    int64_t *res, bool *is_res_neg)
>> +{
>> +	if (lhs == 0) {
>> +		*res = 0;
>> +		*is_res_neg = false;
>> +		return 0;
>> +	}
>> +	/*
>> +	 * The only possible overflow situations is when operands
>> +	 * of different signs and result turns out to be less
>> +	 * than INT64_MIN.
>> +	 */
>> +	if (is_lhs_neg ^ is_rhs_neg) {
> 
> 9. 😢

@@ -1334,7 +1321,7 @@ sql_div_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
         * of different signs and result turns out to be less
         * than INT64_MIN.
         */
-       if (is_lhs_neg ^ is_rhs_neg) {
+       if (is_lhs_neg != is_rhs_neg) {
                uint64_t u_res = is_lhs_neg ?
                                 (uint64_t) (-lhs) / (uint64_t) rhs :
                                 (uint64_t) lhs / (uint64_t) (-rhs);

>> +		uint64_t u_res = is_lhs_neg ?
>> +				 (uint64_t) (-lhs) / (uint64_t) rhs :
>> +				 (uint64_t) lhs / (uint64_t) (-rhs);
>> +		if (u_res > (uint64_t) INT64_MAX + 1)
>> +			return -1;
>> +		*is_res_neg = u_res != 0 ? true : false;
> 
> 10. Does not it look tautological to you? 'if <bool> then true else false’.

                if (u_res > (uint64_t) INT64_MAX + 1)
                        return -1;
-               *is_res_neg = u_res != 0 ? true : false;
+               *is_res_neg = u_res != 0;
                *res = -u_res;
                return 0;
        }

>> +		*res = -u_res;
>> +		return 0;
>> +	}
>> +	*is_res_neg = false;
>> +	/*
>> +	 * Another one special case: INT64_MIN / -1
>> +	 * Signed division leads to program termination due
>> +	 * to overflow.
>> +	 */
>> +	if (is_lhs_neg && lhs == INT64_MIN && rhs == -1) {
>> +		*res = (uint64_t) INT64_MAX + 1;
>> +		return 0;
>> +	}
>> +	*res = is_lhs_neg ? (uint64_t) (lhs / rhs) :
> 
> 11. Why do you need that cast? *res is int64_t, so it makes no
> sense to cast the rvalue to uint64_t.

Otherwise compilation error is raised (on linux target):

/tarantool/src/box/sql/util.c: In function ‘sql_div_int’:
/tarantool/src/box/sql/util.c:1342:34: error: signed and unsigned type in conditional expression [-Werror=sign-compare]
  *res = is_lhs_neg ? (lhs / rhs) : (uint64_t) lhs / (uint64_t) rhs;

>> +	       (uint64_t) lhs / (uint64_t) rhs;
>> 	return 0;
>> }
>> 
>> -/*
>> - * Compute the absolute value of a 32-bit signed integer, of possible.  Or
>> - * if the integer has a value of -2147483648, return +2147483647
>> - */
>> int
>> -sqlAbsInt32(int x)
>> +sql_rem_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> +	    int64_t *res, bool *is_res_neg)
>> {
>> -	if (x >= 0)
>> -		return x;
>> -	if (x == (int)0x80000000)
>> -		return 0x7fffffff;
>> -	return -x;
>> +	if (is_lhs_neg) {
>> +		uint64_t u_lhs = (uint64_t) (-lhs);
>> +		uint64_t u_res = u_lhs % (uint64_t) rhs;
> 
> 12. Why do you ignore sign of 'rhs'? You can't just cast it,
> if it is negative.

Yep, you are right, fixed:

@@ -1365,7 +1351,9 @@ sql_rem_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
 {
        if (is_lhs_neg) {
                uint64_t u_lhs = (uint64_t) (-lhs);
-               uint64_t u_res = u_lhs % (uint64_t) rhs;
+               uint64_t u_rhs = is_rhs_neg ? (uint64_t) (-rhs) :
+                                (uint64_t) rhs;
+               uint64_t u_res = u_lhs % u_rhs;
                if (u_res > (uint64_t) INT64_MAX + 1)
                        return -1;
                *res = -u_res;

>> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
>> index d141397a0..9c28b9131 100644
>> --- a/src/box/sql/vdbe.c
>> +++ b/src/box/sql/vdbe.c
>> @@ -1621,27 +1621,48 @@ case OP_Remainder: {           /* same as TK_REM, in1, in2, out3 */
>> 	    (type2 & (MEM_Int | MEM_UInt)) !=0) {
>> 		iA = pIn1->u.i;
>> 		iB = pIn2->u.i;
>> +		bool is_lhs_neg = pIn1->flags & MEM_Int;
>> +		bool is_rhs_neg = pIn2->flags & MEM_Int;
>> +		bool is_res_neg;
>> 		bIntint = 1;
>> 		switch( pOp->opcode) {
>> -		case OP_Add:       if (sqlAddInt64(&iB,iA)) goto integer_overflow; break;
>> -		case OP_Subtract:  if (sqlSubInt64(&iB,iA)) goto integer_overflow; break;
>> -		case OP_Multiply:  if (sqlMulInt64(&iB,iA)) goto integer_overflow; break;
>> +		case OP_Add: {
>> +			if (sql_add_int(iA, is_lhs_neg, iB, is_rhs_neg,
>> +					(int64_t *) &iB, &is_res_neg) != 0)
> 
> 13. Why do you need all these casts to (int64_t *)? These variables already
> are signed 64 bit integers.

On linux (Travis targets) I get compilation errors since there
they (i64 and int64_t) are considered to be different types. 

>> @@ -5134,7 +5175,11 @@ case OP_OffsetLimit: {    /* in1, out2, in3 */
>> 	assert(pIn1->flags & (MEM_Int | MEM_UInt));
>> 	assert(pIn3->flags & (MEM_Int | MEM_UInt));
>> 	x = pIn1->u.i;
>> -	if (x<=0 || sqlAddInt64(&x, pIn3->u.i > 0 ? pIn3->u.i : 0)) {
>> +	int64_t rhs = pIn3->flags & MEM_Int ? 0 : pIn3->u.u;
>> +	bool unused;
>> +	if ((x == 0 || pIn1->flags & MEM_Int) ||
>> +	    sql_add_int(x, pIn1->flags & MEM_Int, rhs, false,
> 
> 14. If you get to this line, then (pIn1->flags & MEM_Int) is already
> 0 and can be inlined.

Wait, why? If x == 0 then pIn1->flags == MEM_UInt -
we consider 0 as an unsigned value.

>> +			(int64_t *) &x, &unused) != 0) {
>> 		/* If the LIMIT is less than or equal to zero, loop forever.  This
>> 		 * is documented.  But also, if the LIMIT+OFFSET exceeds 2^63 then
>> 		 * also loop forever.  This is undocumented.  In fact, one could argue
> 
> Consider my review fixes here and on the branch in a
> separate commit. These changes and the comments are arguable,
> probably I am wrong somewhere.

Almost all applied (see diffs above).