From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id C6A08310D8 for ; Fri, 7 Jun 2019 11:37:51 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id fpVZGyMStYtF for ; Fri, 7 Jun 2019 11:37:51 -0400 (EDT) Received: from smtpng2.m.smailru.net (smtpng2.m.smailru.net [94.100.179.3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 263C731041 for ; Fri, 7 Jun 2019 11:37:51 -0400 (EDT) From: Nikita Pettik Subject: [tarantool-patches] [PATCH 3/6] sql: refactor arithmetic operations to support unsigned ints Date: Fri, 7 Jun 2019 18:37:43 +0300 Message-Id: <7d8e776bc82cb9d25460c2104e687540526aa7cd.1559919361.git.korablev@tarantool.org> In-Reply-To: References: In-Reply-To: References: Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-Help: List-Unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-Subscribe: List-Owner: List-post: List-Archive: To: tarantool-patches@freelists.org Cc: v.shpilevoy@tarantool.org, Nikita Pettik Let's patch internal VDBE routines which add, subtract, multiply, divide and calculate the remainder of division to allow them take operands of unsigned type. In this respect, each operator now accepts signs of both operands and return sign of result. Part of #3810 Part of #4015 --- src/box/sql/func.c | 14 ++- src/box/sql/sqlInt.h | 35 +++++- src/box/sql/util.c | 233 +++++++++++++++++++++++++++---------- src/box/sql/vdbe.c | 77 +++++++++--- test/sql-tap/func.test.lua | 28 +++-- test/sql/integer-overflow.result | 15 ++- test/sql/integer-overflow.test.lua | 3 + 7 files changed, 309 insertions(+), 96 deletions(-) diff --git a/src/box/sql/func.c b/src/box/sql/func.c index f4c1cbcca..457c9b92b 100644 --- a/src/box/sql/func.c +++ b/src/box/sql/func.c @@ -1582,7 +1582,9 @@ soundexFunc(sql_context * context, int argc, sql_value ** argv) typedef struct SumCtx SumCtx; struct SumCtx { double rSum; /* Floating point sum */ - i64 iSum; /* Integer sum */ + int64_t iSum; /* Integer sum */ + /** True if iSum < 0. */ + bool is_neg; i64 cnt; /* Number of elements summed */ u8 overflow; /* True if integer overflow seen */ u8 approx; /* True if non-integer value was input to the sum */ @@ -1620,9 +1622,13 @@ sum_step(struct sql_context *context, int argc, sql_value **argv) p->cnt++; if (type == MP_INT || type == MP_UINT) { int64_t v = sql_value_int64(argv[0]); - p->rSum += v; + if (type == MP_INT) + p->rSum += v; + else + p->rSum += (uint64_t) v; if ((p->approx | p->overflow) == 0 && - sqlAddInt64(&p->iSum, v) != 0) { + sql_add_int(p->iSum, p->is_neg, v, v < 0, &p->iSum, + &p->is_neg) != 0) { p->overflow = 1; } } else { @@ -1642,7 +1648,7 @@ sumFinalize(sql_context * context) } else if (p->approx) { sql_result_double(context, p->rSum); } else { - sql_result_int64(context, p->iSum); + mem_set_int(context->pOut, p->iSum, p->is_neg); } } } diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index da17e24ed..c0e2ab699 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -4461,10 +4461,37 @@ Expr *sqlExprAddCollateString(Parse *, Expr *, const char *); Expr *sqlExprSkipCollate(Expr *); int sqlCheckIdentifierName(Parse *, char *); void sqlVdbeSetChanges(sql *, int); -int sqlAddInt64(i64 *, i64); -int sqlSubInt64(i64 *, i64); -int sqlMulInt64(i64 *, i64); -int sqlAbsInt32(int); + +/** + * Attempt to add, subtract, multiply or get the remainder of + * 64-bit integer values. There functions are able to operate + * on signed as well as unsigned integers. If result of operation + * is greater 0, then it is assumed to be unsigned and can take + * values in range up to 2^64 - 1. If the result is negative, + * then its minimum value is -2^63. + * Return 0 on success. Or if the operation would have resulted + * in an overflow, return -1. + */ +int +sql_add_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg, + int64_t *res, bool *is_res_neg); + +int +sql_sub_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg, + int64_t *res, bool *is_res_neg); + +int +sql_mul_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg, + int64_t *res, bool *is_res_neg); + +int +sql_div_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg, + int64_t *res, bool *is_res_neg); + +int +sql_rem_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg, + int64_t *res, bool *is_res_neg); + u8 sqlGetBoolean(const char *z, u8); const void *sqlValueText(sql_value *); diff --git a/src/box/sql/util.c b/src/box/sql/util.c index 2e7c298e1..ee6a83ad5 100644 --- a/src/box/sql/util.c +++ b/src/box/sql/util.c @@ -1194,89 +1194,196 @@ sqlSafetyCheckSickOrOk(sql * db) } } -/* - * Attempt to add, substract, or multiply the 64-bit signed value iB against - * the other 64-bit signed integer at *pA and store the result in *pA. - * Return 0 on success. Or if the operation would have resulted in an - * overflow, leave *pA unchanged and return 1. - */ int -sqlAddInt64(i64 * pA, i64 iB) +sql_add_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg, + int64_t *res, bool *is_res_neg) { - i64 iA = *pA; - testcase(iA == 0); - testcase(iA == 1); - testcase(iB == -1); - testcase(iB == 0); - if (iB >= 0) { - testcase(iA > 0 && LARGEST_INT64 - iA == iB); - testcase(iA > 0 && LARGEST_INT64 - iA == iB - 1); - if (iA > 0 && LARGEST_INT64 - iA < iB) - return 1; - } else { - testcase(iA < 0 && -(iA + LARGEST_INT64) == iB + 1); - testcase(iA < 0 && -(iA + LARGEST_INT64) == iB + 2); - if (iA < 0 && -(iA + LARGEST_INT64) > iB + 1) - return 1; + /* Addition of two negative integers. */ + if (is_lhs_neg && is_rhs_neg) { + assert(lhs < 0 && rhs < 0); + /* This is the same as (lhs + rhs) < INT64_MIN */ + if (-(lhs + INT64_MAX) > rhs + 1) + return -1; + *is_res_neg = true; + *res = lhs + rhs; + return 0; } - *pA += iB; + /* Both are unsigned integers. */ + if (!is_lhs_neg && !is_rhs_neg) { + uint64_t u_lhs = (uint64_t) lhs; + uint64_t u_rhs = (uint64_t) rhs; + /* This is the same as (lhs + rhs) > UINT64_MAX */ + if (UINT64_MAX - u_lhs < u_rhs) + return -1; + *is_res_neg = false; + *res = lhs + rhs; + return 0; + } + /* + * Make sure we've got only one combination of + * positive and negative operands. + */ + if (is_lhs_neg) { + SWAP(is_lhs_neg, is_rhs_neg); + SWAP(lhs, rhs); + } + assert(! is_lhs_neg && is_rhs_neg); + *is_res_neg = (uint64_t)(-rhs) > (uint64_t) lhs; + *res = lhs + rhs; return 0; } int -sqlSubInt64(i64 * pA, i64 iB) +sql_sub_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg, + int64_t *res, bool *is_res_neg) { - testcase(iB == SMALLEST_INT64 + 1); - if (iB == SMALLEST_INT64) { - testcase((*pA) == (-1)); - testcase((*pA) == 0); - if ((*pA) >= 0) - return 1; - *pA -= iB; + if (!is_lhs_neg && !is_rhs_neg) { + uint64_t u_lhs = (uint64_t) lhs; + uint64_t u_rhs = (uint64_t) rhs; + /* + * Unsigned arithmetic has no overflows, so to + * check if lhs is less than rhs we can compare + * result of their subtraction with lhs. + */ + *is_res_neg = u_lhs < (uint64_t)(u_lhs - u_rhs); + if (! *is_res_neg) { + *res = u_lhs - u_rhs; + return 0; + } + if (u_rhs > (uint64_t) INT64_MAX + 1 && + u_lhs < (uint64_t)(u_rhs - INT64_MAX - 1)) + return -1; + *res = lhs - rhs; return 0; - } else { - return sqlAddInt64(pA, -iB); } + if (is_rhs_neg) { + return sql_add_int(lhs, is_lhs_neg, -rhs, false, res, + is_res_neg); + } + assert(is_lhs_neg && !is_rhs_neg); + /* + * (lhs - rhs) < 0, lhs < 0, rhs > 0: in this case their + * difference must be less than INT64_MIN. + */ + if ((uint64_t) -lhs + (uint64_t) rhs > (uint64_t) INT64_MAX + 1) + return -1; + *is_res_neg = true; + *res = lhs - rhs; + return 0; } int -sqlMulInt64(i64 * pA, i64 iB) +sql_mul_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg, + int64_t *res, bool *is_res_neg) { - i64 iA = *pA; - if (iB > 0) { - if (iA > LARGEST_INT64 / iB) - return 1; - if (iA < SMALLEST_INT64 / iB) - return 1; - } else if (iB < 0) { - if (iA > 0) { - if (iB < SMALLEST_INT64 / iA) - return 1; - } else if (iA < 0) { - if (iB == SMALLEST_INT64) - return 1; - if (iA == SMALLEST_INT64) - return 1; - if (-iA > LARGEST_INT64 / -iB) - return 1; - } + if (lhs == 0 || rhs == 0) { + *res = 0; + *is_res_neg = false; + return 0; + } + /* + * Multiplication of integers with same sign leads to + * unsigned result. + */ + if (! (is_lhs_neg ^ is_rhs_neg)) { + uint64_t u_res = is_lhs_neg ? + (uint64_t) (-lhs) * (uint64_t) (-rhs) : + (uint64_t) lhs * (uint64_t) (rhs); + /* + * Overflow detection is quite primitive due to + * the absence of overflow with unsigned values: + * lhs * rhs == res --> rhs == res / lhs; + * If this predicate is false, then result was + * reduced modulo UINT_MAX + 1. + */ + if ((is_lhs_neg && u_res / (uint64_t) (-lhs) != + (uint64_t) (-rhs)) || + (!is_lhs_neg && u_res / (uint64_t) lhs != (uint64_t) rhs)) + return -1; + *is_res_neg = false; + *res = u_res; + return 0; + } + /* + * Make sure we've got only one combination of + * positive and negative operands. + */ + if (is_lhs_neg) { + SWAP(is_lhs_neg, is_rhs_neg); + SWAP(lhs, rhs); } - *pA = iA * iB; + assert(! is_lhs_neg && is_rhs_neg); + uint64_t u_rhs = (uint64_t) (-rhs); + uint64_t u_res = u_rhs * (uint64_t) lhs; + if (u_res / u_rhs != (uint64_t) lhs || + u_rhs * (uint64_t) lhs > (uint64_t) INT64_MAX + 1) + return -1; + *is_res_neg = true; + *res = lhs * rhs; + return 0; +} + +int +sql_div_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg, + int64_t *res, bool *is_res_neg) +{ + if (lhs == 0) { + *res = 0; + *is_res_neg = false; + return 0; + } + /* + * The only possible overflow situations is when operands + * of different signs and result turns out to be less + * than INT64_MIN. + */ + if (is_lhs_neg ^ is_rhs_neg) { + uint64_t u_res = is_lhs_neg ? + (uint64_t) (-lhs) / (uint64_t) rhs : + (uint64_t) lhs / (uint64_t) (-rhs); + if (u_res > (uint64_t) INT64_MAX + 1) + return -1; + *is_res_neg = u_res != 0 ? true : false; + *res = -u_res; + return 0; + } + *is_res_neg = false; + /* + * Another one special case: INT64_MIN / -1 + * Signed division leads to program termination due + * to overflow. + */ + if (is_lhs_neg && lhs == INT64_MIN && rhs == -1) { + *res = (uint64_t) INT64_MAX + 1; + return 0; + } + *res = is_lhs_neg ? (uint64_t) (lhs / rhs) : + (uint64_t) lhs / (uint64_t) rhs; return 0; } -/* - * Compute the absolute value of a 32-bit signed integer, of possible. Or - * if the integer has a value of -2147483648, return +2147483647 - */ int -sqlAbsInt32(int x) +sql_rem_int(int64_t lhs, bool is_lhs_neg, int64_t rhs, bool is_rhs_neg, + int64_t *res, bool *is_res_neg) { - if (x >= 0) - return x; - if (x == (int)0x80000000) - return 0x7fffffff; - return -x; + if (is_lhs_neg) { + uint64_t u_lhs = (uint64_t) (-lhs); + uint64_t u_res = u_lhs % (uint64_t) rhs; + if (u_res > (uint64_t) INT64_MAX + 1) + return -1; + *res = -u_res; + *is_res_neg = true; + return 0; + } + /* + * While calculating remainder we always ignore sign of + * rhs - it doesn't affect the result. + * */ + uint64_t u_lhs = (uint64_t) lhs; + uint64_t u_rhs = is_rhs_neg ? (uint64_t) (-rhs) : (uint64_t) rhs; + *res = u_lhs % u_rhs; + *is_res_neg = false; + return 0; } /* diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c index d141397a0..9c28b9131 100644 --- a/src/box/sql/vdbe.c +++ b/src/box/sql/vdbe.c @@ -1621,27 +1621,48 @@ case OP_Remainder: { /* same as TK_REM, in1, in2, out3 */ (type2 & (MEM_Int | MEM_UInt)) !=0) { iA = pIn1->u.i; iB = pIn2->u.i; + bool is_lhs_neg = pIn1->flags & MEM_Int; + bool is_rhs_neg = pIn2->flags & MEM_Int; + bool is_res_neg; bIntint = 1; switch( pOp->opcode) { - case OP_Add: if (sqlAddInt64(&iB,iA)) goto integer_overflow; break; - case OP_Subtract: if (sqlSubInt64(&iB,iA)) goto integer_overflow; break; - case OP_Multiply: if (sqlMulInt64(&iB,iA)) goto integer_overflow; break; + case OP_Add: { + if (sql_add_int(iA, is_lhs_neg, iB, is_rhs_neg, + (int64_t *) &iB, &is_res_neg) != 0) + goto integer_overflow; + break; + } + case OP_Subtract: { + if (sql_sub_int(iB, is_rhs_neg, iA, is_lhs_neg, + (int64_t *) &iB, &is_res_neg) != 0) + goto integer_overflow; + break; + } + case OP_Multiply: { + if (sql_mul_int(iA, is_lhs_neg, iB, is_rhs_neg, + (int64_t *) &iB, &is_res_neg) != 0) + goto integer_overflow; + break; + } case OP_Divide: { if (iA == 0) goto division_by_zero; - if (iA==-1 && iB==SMALLEST_INT64) goto integer_overflow; - iB /= iA; + if (sql_div_int(iB, is_rhs_neg, iA, is_lhs_neg, + (int64_t *) &iB, &is_res_neg) != 0) + goto integer_overflow; break; } default: { if (iA == 0) goto division_by_zero; if (iA==-1) iA = 1; - iB %= iA; + if (sql_rem_int(iB, is_rhs_neg, iA, is_lhs_neg, + (int64_t *) &iB, &is_res_neg) != 0) + goto integer_overflow; break; } } - mem_set_int(pOut, iB, iB < 0); + mem_set_int(pOut, iB, is_res_neg); } else { bIntint = 0; if (sqlVdbeRealValue(pIn1, &rA) != 0) { @@ -1883,13 +1904,13 @@ case OP_ShiftRight: { /* same as TK_RSHIFT, in1, in2, out3 */ break; } bool unused; - if (sqlVdbeIntValue(pIn2, &iA, &unused) != 0) { + if (sqlVdbeIntValue(pIn2, (int64_t *) &iA, &unused) != 0) { diag_set(ClientError, ER_SQL_TYPE_MISMATCH, sql_value_text(pIn2), "integer"); rc = SQL_TARANTOOL_ERROR; goto abort_due_to_error; } - if (sqlVdbeIntValue(pIn1, &iB, &unused) != 0) { + if (sqlVdbeIntValue(pIn1, (int64_t *) &iB, &unused) != 0) { diag_set(ClientError, ER_SQL_TYPE_MISMATCH, sql_value_text(pIn1), "integer"); rc = SQL_TARANTOOL_ERROR; @@ -2191,10 +2212,31 @@ case OP_Ge: { /* same as TK_GE, jump, in1, in3 */ /* Handle the common case of integer comparison here, as an * optimization, to avoid a call to sqlMemCompare() */ - if ((pIn1->flags & pIn3->flags & (MEM_Int | MEM_UInt))!=0) { - if (pIn3->u.i > pIn1->u.i) { res = +1; goto compare_op; } - if (pIn3->u.i < pIn1->u.i) { res = -1; goto compare_op; } - res = 0; + if ((pIn1->flags & pIn3->flags & (MEM_Int | MEM_UInt)) != 0) { + if ((pIn1->flags & pIn3->flags & MEM_Int) != 0) { + if (pIn3->u.i > pIn1->u.i) + res = +1; + else if (pIn3->u.i < pIn1->u.i) + res = -1; + else + res = 0; + goto compare_op; + } + if ((pIn1->flags & pIn3->flags & MEM_UInt) != 0) { + if (pIn3->u.u > pIn1->u.u) + res = +1; + else if (pIn3->u.u < pIn1->u.u) + res = -1; + else + res = 0; + goto compare_op; + } + if ((pIn1->flags & MEM_UInt) != 0 && + (pIn3->flags & MEM_Int) != 0) { + res = -1; + goto compare_op; + } + res = 1; goto compare_op; } } else if (type == FIELD_TYPE_STRING) { @@ -3900,8 +3942,7 @@ case OP_FCopy: { /* out2 */ assert((pIn1->flags & (MEM_Int | MEM_UInt)) != 0); pOut = &aMem[pOp->p2]; - mem_set_int(pOut, pIn1->u.i, pIn1->flags == MEM_UInt ? - false : true); + mem_set_int(pOut, pIn1->u.i, pIn1->flags == MEM_Int); } break; } @@ -5134,7 +5175,11 @@ case OP_OffsetLimit: { /* in1, out2, in3 */ assert(pIn1->flags & (MEM_Int | MEM_UInt)); assert(pIn3->flags & (MEM_Int | MEM_UInt)); x = pIn1->u.i; - if (x<=0 || sqlAddInt64(&x, pIn3->u.i > 0 ? pIn3->u.i : 0)) { + int64_t rhs = pIn3->flags & MEM_Int ? 0 : pIn3->u.u; + bool unused; + if ((x == 0 || pIn1->flags & MEM_Int) || + sql_add_int(x, pIn1->flags & MEM_Int, rhs, false, + (int64_t *) &x, &unused) != 0) { /* If the LIMIT is less than or equal to zero, loop forever. This * is documented. But also, if the LIMIT+OFFSET exceeds 2^63 then * also loop forever. This is undocumented. In fact, one could argue diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua index 09b1cf967..314c528ab 100755 --- a/test/sql-tap/func.test.lua +++ b/test/sql-tap/func.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(14590) +test:plan(14591) --!./tcltestrunner.lua -- 2001 September 15 @@ -1591,14 +1591,14 @@ test:do_execsql_test( -- }) -test:do_catchsql_test( +test:do_execsql_test( "func-18.12", [[ INSERT INTO t6 VALUES(3, 1<<62); SELECT sum(x) - ((1<<62)*2.0+1) from t6; ]], { -- - 1, "Failed to execute SQL statement: integer overflow" + 0 -- }) @@ -1645,18 +1645,30 @@ test:do_catchsql_test( -- }) -test:do_catchsql_test( - "func-18.15", +test:do_execsql_test( + "func-18.15.1", [[ SELECT sum(x) FROM (SELECT 9223372036854775807 AS x UNION ALL SELECT 10 AS x); ]], { - -- - 1, "Failed to execute SQL statement: integer overflow" - -- + -- + 9223372036854775817LL + -- }) +test:do_catchsql_test( + "func-18.15.2", + [[ + SELECT sum(x) FROM + (SELECT 9223372036854775807 AS x UNION ALL SELECT 9223372036854775807 AS x + UNION ALL SELECT 10 AS x); + ]], { + -- + 1, "Failed to execute SQL statement: integer overflow" + -- +}) + test:do_catchsql_test( "func-18.18", [[ diff --git a/test/sql/integer-overflow.result b/test/sql/integer-overflow.result index d6ca66ec9..40962ac5c 100644 --- a/test/sql/integer-overflow.result +++ b/test/sql/integer-overflow.result @@ -10,6 +10,7 @@ box.execute('pragma sql_default_engine=\''..engine..'\'') ... -- gh-3735: make sure that integer overflows errors are -- handled during VDBE execution. +-- gh-3810: range of integer is extended up to 2^64 - 1. -- box.execute('SELECT (2147483647 * 2147483647 * 2147483647);') --- @@ -17,7 +18,11 @@ box.execute('SELECT (2147483647 * 2147483647 * 2147483647);') ... box.execute('SELECT (-9223372036854775808 / -1);') --- -- error: 'Failed to execute SQL statement: integer is overflowed' +- metadata: + - name: (-9223372036854775808 / -1) + type: integer + rows: + - [9223372036854775808] ... box.execute('SELECT (-9223372036854775808 - 1);') --- @@ -25,6 +30,14 @@ box.execute('SELECT (-9223372036854775808 - 1);') ... box.execute('SELECT (9223372036854775807 + 1);') --- +- metadata: + - name: (9223372036854775807 + 1) + type: integer + rows: + - [9223372036854775808] +... +box.execute('SELECT (9223372036854775807 + 9223372036854775807 + 2);') +--- - error: 'Failed to execute SQL statement: integer is overflowed' ... -- Literals are checked right after parsing. diff --git a/test/sql/integer-overflow.test.lua b/test/sql/integer-overflow.test.lua index 4339edf39..7727f368c 100644 --- a/test/sql/integer-overflow.test.lua +++ b/test/sql/integer-overflow.test.lua @@ -4,11 +4,14 @@ box.execute('pragma sql_default_engine=\''..engine..'\'') -- gh-3735: make sure that integer overflows errors are -- handled during VDBE execution. +-- gh-3810: range of integer is extended up to 2^64 - 1. -- box.execute('SELECT (2147483647 * 2147483647 * 2147483647);') box.execute('SELECT (-9223372036854775808 / -1);') box.execute('SELECT (-9223372036854775808 - 1);') box.execute('SELECT (9223372036854775807 + 1);') +box.execute('SELECT (9223372036854775807 + 9223372036854775807 + 2);') + -- Literals are checked right after parsing. -- box.execute('SELECT 9223372036854775808;') -- 2.15.1