[tarantool-patches] Re: [PATCH 3/6] sql: refactor arithmetic operations to support unsigned ints
n.pettik
korablev at tarantool.org
Mon Jul 1 17:21:29 MSK 2019
>> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
>> index f4c1cbcca..457c9b92b 100644
>> --- a/src/box/sql/func.c
>> +++ b/src/box/sql/func.c
>> @@ -1620,9 +1622,13 @@ sum_step(struct sql_context *context, int argc, sql_value **argv)
>> p->cnt++;
>> if (type == MP_INT || type == MP_UINT) {
>> int64_t v = sql_value_int64(argv[0]);
>> - p->rSum += v;
>> + if (type == MP_INT)
>> + p->rSum += v;
>> + else
>> + p->rSum += (uint64_t) v;
>> if ((p->approx | p->overflow) == 0 &&
>> - sqlAddInt64(&p->iSum, v) != 0) {
>> + sql_add_int(p->iSum, p->is_neg, v, v < 0, &p->iSum,
>
> 1. As I understand, now sql_value_int64() returns an integer buffer,
> and its result can't be compared with 0. How do you do it here with
> 'v’?
It is fixed in the next patch. Fix itself is obvious. Moved this change
to current patch.
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index 457c9b92b..77340c7fb 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -1626,8 +1626,9 @@ sum_step(struct sql_context *context, int argc, sql_value **argv)
p->rSum += v;
else
p->rSum += (uint64_t) v;
+
if ((p->approx | p->overflow) == 0 &&
- sql_add_int(p->iSum, p->is_neg, v, v < 0, &p->iSum,
+ sql_add_int(p->iSum, p->is_neg, v, type == MP_INT, &p->iSum,
&p->is_neg) != 0) {
p->overflow = 1;
>> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
>> index da17e24ed..c0e2ab699 100644
>> --- a/src/box/sql/sqlInt.h
>> +++ b/src/box/sql/sqlInt.h
>> @@ -4461,10 +4461,37 @@ Expr *sqlExprAddCollateString(Parse *, Expr *, const char *);
>> +int
>> +sql_add_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> + int64_t *res, bool *is_res_neg);
>> +
>> +int
>> +sql_sub_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> + int64_t *res, bool *is_res_neg);
>> +
>> +int
>> +sql_mul_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> + int64_t *res, bool *is_res_neg);
>> +
>> +int
>> +sql_div_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> + int64_t *res, bool *is_res_neg);
>> +
>> +int
>> +sql_rem_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> + int64_t *res, bool *is_res_neg);
>
> 2. That function is usually called 'mod', not 'rem’.
Opcode is called OP_Reminder, so to avoid confusions
I guess it would be better to keep _rem_ name. If you
insist I will rename it.
>> diff --git a/src/box/sql/util.c b/src/box/sql/util.c
>> index 2e7c298e1..ee6a83ad5 100644
>> --- a/src/box/sql/util.c
>> +++ b/src/box/sql/util.c
>> @@ -1194,89 +1194,196 @@ sqlSafetyCheckSickOrOk(sql * db)
>> }
>> }
>>
>> -/*
>> - * Attempt to add, substract, or multiply the 64-bit signed value iB against
>> - * the other 64-bit signed integer at *pA and store the result in *pA.
>> - * Return 0 on success. Or if the operation would have resulted in an
>> - * overflow, leave *pA unchanged and return 1.
>> - */
>> int
>> -sqlAddInt64(i64 * pA, i64 iB)
>> +sql_add_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> + int64_t *res, bool *is_res_neg)
>> {
>> - i64 iA = *pA;
>> - testcase(iA == 0);
>> - testcase(iA == 1);
>> - testcase(iB == -1);
>> - testcase(iB == 0);
>> - if (iB >= 0) {
>> - testcase(iA > 0 && LARGEST_INT64 - iA == iB);
>> - testcase(iA > 0 && LARGEST_INT64 - iA == iB - 1);
>> - if (iA > 0 && LARGEST_INT64 - iA < iB)
>> - return 1;
>> - } else {
>> - testcase(iA < 0 && -(iA + LARGEST_INT64) == iB + 1);
>> - testcase(iA < 0 && -(iA + LARGEST_INT64) == iB + 2);
>> - if (iA < 0 && -(iA + LARGEST_INT64) > iB + 1)
>> - return 1;
>> + /* Addition of two negative integers. */
>> + if (is_lhs_neg && is_rhs_neg) {
>> + assert(lhs < 0 && rhs < 0);
>> + /* This is the same as (lhs + rhs) < INT64_MIN */
>> + if (-(lhs + INT64_MAX) > rhs + 1)
>> + return -1;
>
> 3. Why so complex? Why not 'lhs < INT64_MIN - rhs'? I did it, and the
> tests pass. If they should not, then please, provide a test.
Well, it is the same condition. Let’s use your variant:
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index 3cbcda04f..73d7b95b6 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -1199,7 +1199,7 @@ sql_add_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
if (is_lhs_neg && is_rhs_neg) {
assert(lhs < 0 && rhs < 0);
/* This is the same as (lhs + rhs) < INT64_MIN */
- if (-(lhs + INT64_MAX) > rhs + 1)
+ if (lhs < INT64_MIN - rhs)
return -1;
*is_res_neg = true;
*res = lhs + rhs;
>> + /*
>> + * Make sure we've got only one combination of
>> + * positive and negative operands.
>> + */
>> + if (is_lhs_neg) {
>> + SWAP(is_lhs_neg, is_rhs_neg);
>> + SWAP(lhs, rhs);
>> + }
>> + assert(! is_lhs_neg && is_rhs_neg);
>
> 4. You have spent more code lines on the SWAP and its
> explanation, than could on avoidance of this SWAP. In
> addition, SWAP version is slower obviously.
Ok, np, refactored:
@@ -1216,16 +1216,8 @@ sql_add_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
*res = lhs + rhs;
return 0;
}
- /*
- * Make sure we've got only one combination of
- * positive and negative operands.
- */
- if (is_lhs_neg) {
- SWAP(is_lhs_neg, is_rhs_neg);
- SWAP(lhs, rhs);
- }
- assert(! is_lhs_neg && is_rhs_neg);
- *is_res_neg = (uint64_t)(-rhs) > (uint64_t) lhs;
+ *is_res_neg = is_rhs_neg ? (uint64_t)(-rhs) > (uint64_t) lhs :
+ (uint64_t)(-lhs) > (uint64_t) rhs;
*res = lhs + rhs;
return 0;
}
>
> - /*
> - * Make sure we've got only one combination of
> - * positive and negative operands.
> - */
> - if (is_lhs_neg) {
> - SWAP(is_lhs_neg, is_rhs_neg);
> - SWAP(lhs, rhs);
> - }
> - assert(! is_lhs_neg && is_rhs_neg);
> - *is_res_neg = (uint64_t)(-rhs) > (uint64_t) lhs;
> + if (is_rhs_neg)
> + *is_res_neg = (uint64_t)(-rhs) > (uint64_t) lhs;
> + else
> + *is_res_neg = (uint64_t)(-lhs) > (uint64_t) rhs;
> *res = lhs + rhs;
> return 0;
>> int
>> -sqlSubInt64(i64 * pA, i64 iB)
>> +sql_sub_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> + int64_t *res, bool *is_res_neg)
>> {
>> - testcase(iB == SMALLEST_INT64 + 1);
>> - if (iB == SMALLEST_INT64) {
>> - testcase((*pA) == (-1));
>> - testcase((*pA) == 0);
>> - if ((*pA) >= 0)
>> - return 1;
>> - *pA -= iB;
>> + if (!is_lhs_neg && !is_rhs_neg) {
>> + uint64_t u_lhs = (uint64_t) lhs;
>> + uint64_t u_rhs = (uint64_t) rhs;
>> + /*
>> + * Unsigned arithmetic has no overflows, so to
>> + * check if lhs is less than rhs we can compare
>> + * result of their subtraction with lhs.
>
> 5. Why not to compare just the values? They are of the same
> sign, use '<‘.
Thx, fixed:
@@ -1237,19 +1229,15 @@ sql_sub_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
if (!is_lhs_neg && !is_rhs_neg) {
uint64_t u_lhs = (uint64_t) lhs;
uint64_t u_rhs = (uint64_t) rhs;
- /*
- * Unsigned arithmetic has no overflows, so to
- * check if lhs is less than rhs we can compare
- * result of their subtraction with lhs.
- */
- *is_res_neg = u_lhs < (uint64_t)(u_lhs - u_rhs);
- if (! *is_res_neg) {
+ if (u_lhs >= u_rhs) {
+ *is_res_neg = false;
*res = u_lhs - u_rhs;
return 0;
}
if (u_rhs > (uint64_t) INT64_MAX + 1 &&
u_lhs < (uint64_t)(u_rhs - INT64_MAX - 1))
return -1;
+ *is_res_neg = true;
*res = lhs - rhs;
return 0;
}
>> + */
>> + *is_res_neg = u_lhs < (uint64_t)(u_lhs - u_rhs);
>> + if (! *is_res_neg) {
>> + *res = u_lhs - u_rhs;
>> + return 0;
>> + }
>> + if (u_rhs > (uint64_t) INT64_MAX + 1 &&
>> + u_lhs < (uint64_t)(u_rhs - INT64_MAX - 1))
>
> 6. Too complex. You wanted to check u_lhs - u_rhs < INT64_MIN.
> Then check it, in a one comparison.
>
> u_lhs - u_rhs < INT64_MIN
>
> But u_lhs is < u_rhs, so swap them and change the signs:
>
> u_rhs - u_lhs > (uint64_t) INT64_MAX + 1
>
> No overflows are possible here.
Thx! Applied this diff:
- if (u_rhs > (uint64_t) INT64_MAX + 1 &&
- u_lhs < (uint64_t)(u_rhs - INT64_MAX - 1))
+ if (u_rhs - u_lhs > (uint64_t) INT64_MAX + 1)
return -1;
>> int
>> -sqlMulInt64(i64 * pA, i64 iB)
>> +sql_mul_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> + int64_t *res, bool *is_res_neg)
>> {
>> - i64 iA = *pA;
>> - if (iB > 0) {
>> - if (iA > LARGEST_INT64 / iB)
>> - return 1;
>> - if (iA < SMALLEST_INT64 / iB)
>> - return 1;
>> - } else if (iB < 0) {
>> - if (iA > 0) {
>> - if (iB < SMALLEST_INT64 / iA)
>> - return 1;
>> - } else if (iA < 0) {
>> - if (iB == SMALLEST_INT64)
>> - return 1;
>> - if (iA == SMALLEST_INT64)
>> - return 1;
>> - if (-iA > LARGEST_INT64 / -iB)
>> - return 1;
>> - }
>> + if (lhs == 0 || rhs == 0) {
>> + *res = 0;
>> + *is_res_neg = false;
>> + return 0;
>> + }
>> + /*
>> + * Multiplication of integers with same sign leads to
>> + * unsigned result.
>> + */
>> + if (! (is_lhs_neg ^ is_rhs_neg)) {
>
> 7. Dear God, what is it? What about '!=' operator? Is it
> disabled, or !^ is faster?
Ok, replaced:
@@ -1282,7 +1269,7 @@ sql_mul_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
* Multiplication of integers with same sign leads to
* unsigned result.
*/
- if (! (is_lhs_neg ^ is_rhs_neg)) {
+ if (is_lhs_neg == is_rhs_neg) {
uint64_t u_res = is_lhs_neg ?
(uint64_t) (-lhs) * (uint64_t) (-rhs) :
(uint64_t) lhs * (uint64_t) (rhs);
>> + uint64_t u_res = is_lhs_neg ?
>> + (uint64_t) (-lhs) * (uint64_t) (-rhs) :
>> + (uint64_t) lhs * (uint64_t) (rhs);
>> + /*
>> + * Overflow detection is quite primitive due to
>> + * the absence of overflow with unsigned values:
>> + * lhs * rhs == res --> rhs == res / lhs;
>> + * If this predicate is false, then result was
>> + * reduced modulo UINT_MAX + 1.
>> + */
>> + if ((is_lhs_neg && u_res / (uint64_t) (-lhs) !=
>> + (uint64_t) (-rhs)) ||
>> + (!is_lhs_neg && u_res / (uint64_t) lhs != (uint64_t) rhs))
>> + return -1;
>> + *is_res_neg = false;
>> + *res = u_res;
>> + return 0;
>> + }
>> + /*
>> + * Make sure we've got only one combination of
>> + * positive and negative operands.
>> + */
>> + if (is_lhs_neg) {
>> + SWAP(is_lhs_neg, is_rhs_neg);
>> + SWAP(lhs, rhs);
>> }
>> - *pA = iA * iB;
>> + assert(! is_lhs_neg && is_rhs_neg);
>> + uint64_t u_rhs = (uint64_t) (-rhs);
>> + uint64_t u_res = u_rhs * (uint64_t) lhs;
>> + if (u_res / u_rhs != (uint64_t) lhs ||
>> + u_rhs * (uint64_t) lhs > (uint64_t) INT64_MAX + 1)
>
> 8. You have already calculated that multiplication, it is
> u_res.
@@ -1313,7 +1300,7 @@ sql_mul_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
uint64_t u_rhs = (uint64_t) (-rhs);
uint64_t u_res = u_rhs * (uint64_t) lhs;
if (u_res / u_rhs != (uint64_t) lhs ||
- u_rhs * (uint64_t) lhs > (uint64_t) INT64_MAX + 1)
+ u_res > (uint64_t) INT64_MAX + 1)
return -1;
*is_res_neg = true;
*res = lhs * rhs;
>> +int
>> +sql_div_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> + int64_t *res, bool *is_res_neg)
>> +{
>> + if (lhs == 0) {
>> + *res = 0;
>> + *is_res_neg = false;
>> + return 0;
>> + }
>> + /*
>> + * The only possible overflow situations is when operands
>> + * of different signs and result turns out to be less
>> + * than INT64_MIN.
>> + */
>> + if (is_lhs_neg ^ is_rhs_neg) {
>
> 9. 😢
@@ -1334,7 +1321,7 @@ sql_div_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
* of different signs and result turns out to be less
* than INT64_MIN.
*/
- if (is_lhs_neg ^ is_rhs_neg) {
+ if (is_lhs_neg != is_rhs_neg) {
uint64_t u_res = is_lhs_neg ?
(uint64_t) (-lhs) / (uint64_t) rhs :
(uint64_t) lhs / (uint64_t) (-rhs);
>> + uint64_t u_res = is_lhs_neg ?
>> + (uint64_t) (-lhs) / (uint64_t) rhs :
>> + (uint64_t) lhs / (uint64_t) (-rhs);
>> + if (u_res > (uint64_t) INT64_MAX + 1)
>> + return -1;
>> + *is_res_neg = u_res != 0 ? true : false;
>
> 10. Does not it look tautological to you? 'if <bool> then true else false’.
if (u_res > (uint64_t) INT64_MAX + 1)
return -1;
- *is_res_neg = u_res != 0 ? true : false;
+ *is_res_neg = u_res != 0;
*res = -u_res;
return 0;
}
>> + *res = -u_res;
>> + return 0;
>> + }
>> + *is_res_neg = false;
>> + /*
>> + * Another one special case: INT64_MIN / -1
>> + * Signed division leads to program termination due
>> + * to overflow.
>> + */
>> + if (is_lhs_neg && lhs == INT64_MIN && rhs == -1) {
>> + *res = (uint64_t) INT64_MAX + 1;
>> + return 0;
>> + }
>> + *res = is_lhs_neg ? (uint64_t) (lhs / rhs) :
>
> 11. Why do you need that cast? *res is int64_t, so it makes no
> sense to cast the rvalue to uint64_t.
Otherwise compilation error is raised (on linux target):
/tarantool/src/box/sql/util.c: In function ‘sql_div_int’:
/tarantool/src/box/sql/util.c:1342:34: error: signed and unsigned type in conditional expression [-Werror=sign-compare]
*res = is_lhs_neg ? (lhs / rhs) : (uint64_t) lhs / (uint64_t) rhs;
>> + (uint64_t) lhs / (uint64_t) rhs;
>> return 0;
>> }
>>
>> -/*
>> - * Compute the absolute value of a 32-bit signed integer, of possible. Or
>> - * if the integer has a value of -2147483648, return +2147483647
>> - */
>> int
>> -sqlAbsInt32(int x)
>> +sql_rem_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
>> + int64_t *res, bool *is_res_neg)
>> {
>> - if (x >= 0)
>> - return x;
>> - if (x == (int)0x80000000)
>> - return 0x7fffffff;
>> - return -x;
>> + if (is_lhs_neg) {
>> + uint64_t u_lhs = (uint64_t) (-lhs);
>> + uint64_t u_res = u_lhs % (uint64_t) rhs;
>
> 12. Why do you ignore sign of 'rhs'? You can't just cast it,
> if it is negative.
Yep, you are right, fixed:
@@ -1365,7 +1351,9 @@ sql_rem_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg,
{
if (is_lhs_neg) {
uint64_t u_lhs = (uint64_t) (-lhs);
- uint64_t u_res = u_lhs % (uint64_t) rhs;
+ uint64_t u_rhs = is_rhs_neg ? (uint64_t) (-rhs) :
+ (uint64_t) rhs;
+ uint64_t u_res = u_lhs % u_rhs;
if (u_res > (uint64_t) INT64_MAX + 1)
return -1;
*res = -u_res;
>> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
>> index d141397a0..9c28b9131 100644
>> --- a/src/box/sql/vdbe.c
>> +++ b/src/box/sql/vdbe.c
>> @@ -1621,27 +1621,48 @@ case OP_Remainder: { /* same as TK_REM, in1, in2, out3 */
>> (type2 & (MEM_Int | MEM_UInt)) !=0) {
>> iA = pIn1->u.i;
>> iB = pIn2->u.i;
>> + bool is_lhs_neg = pIn1->flags & MEM_Int;
>> + bool is_rhs_neg = pIn2->flags & MEM_Int;
>> + bool is_res_neg;
>> bIntint = 1;
>> switch( pOp->opcode) {
>> - case OP_Add: if (sqlAddInt64(&iB,iA)) goto integer_overflow; break;
>> - case OP_Subtract: if (sqlSubInt64(&iB,iA)) goto integer_overflow; break;
>> - case OP_Multiply: if (sqlMulInt64(&iB,iA)) goto integer_overflow; break;
>> + case OP_Add: {
>> + if (sql_add_int(iA, is_lhs_neg, iB, is_rhs_neg,
>> + (int64_t *) &iB, &is_res_neg) != 0)
>
> 13. Why do you need all these casts to (int64_t *)? These variables already
> are signed 64 bit integers.
On linux (Travis targets) I get compilation errors since there
they (i64 and int64_t) are considered to be different types.
>> @@ -5134,7 +5175,11 @@ case OP_OffsetLimit: { /* in1, out2, in3 */
>> assert(pIn1->flags & (MEM_Int | MEM_UInt));
>> assert(pIn3->flags & (MEM_Int | MEM_UInt));
>> x = pIn1->u.i;
>> - if (x<=0 || sqlAddInt64(&x, pIn3->u.i > 0 ? pIn3->u.i : 0)) {
>> + int64_t rhs = pIn3->flags & MEM_Int ? 0 : pIn3->u.u;
>> + bool unused;
>> + if ((x == 0 || pIn1->flags & MEM_Int) ||
>> + sql_add_int(x, pIn1->flags & MEM_Int, rhs, false,
>
> 14. If you get to this line, then (pIn1->flags & MEM_Int) is already
> 0 and can be inlined.
Wait, why? If x == 0 then pIn1->flags == MEM_UInt -
we consider 0 as an unsigned value.
>> + (int64_t *) &x, &unused) != 0) {
>> /* If the LIMIT is less than or equal to zero, loop forever. This
>> * is documented. But also, if the LIMIT+OFFSET exceeds 2^63 then
>> * also loop forever. This is undocumented. In fact, one could argue
>
> Consider my review fixes here and on the branch in a
> separate commit. These changes and the comments are arguable,
> probably I am wrong somewhere.
Almost all applied (see diffs above).
More information about the Tarantool-patches
mailing list