From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id E16282E4A3 for ; Tue, 11 Jun 2019 17:11:40 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id rhTk-B92yefM for ; Tue, 11 Jun 2019 17:11:40 -0400 (EDT) Received: from smtp18.mail.ru (smtp18.mail.ru [94.100.176.155]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 745B82E416 for ; Tue, 11 Jun 2019 17:11:40 -0400 (EDT) Subject: [tarantool-patches] Re: [PATCH 1/6] sql: refactor sql_atoi64() References: <15f86ff05673055364c6a6bd11de4568e4c3854f.1559919361.git.korablev@tarantool.org> From: Vladislav Shpilevoy Message-ID: <0ae708be-9670-e726-b7ea-9faec8c007ea@tarantool.org> Date: Tue, 11 Jun 2019 23:11:51 +0200 MIME-Version: 1.0 In-Reply-To: <15f86ff05673055364c6a6bd11de4568e4c3854f.1559919361.git.korablev@tarantool.org> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-Help: List-Unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-Subscribe: List-Owner: List-post: List-Archive: To: tarantool-patches@freelists.org, Nikita Pettik Hi! Thanks for the patchset! See 8 comments below, review fixes at the end of the email, and on the branch in a separate commit. On 07/06/2019 18:37, Nikita Pettik wrote: > We are going to allow using unsigned values in SQL and extend range of > INTEGER type. Hence, we should be able to parse and operate on integers > in range of [2^63, 2^64 - 1]. Current mechanism which involves > sql_atoi64() function doesn't allow this. > > Let's refactor this function: firstly, get rid of manual parsing and use > strtoll() and strtoull() functions from standard library. Then, let's > return sign of parsed literal.> In case of success now function returns 0, > -1 otherwise. > > This patch also inlines sql_dec_or_hex_to_i64() to place of its only > usage: it makes code cleaner and more straightforward. > > Needed for #3810 > Needed for #4015 > --- > diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c > index ba7eea59d..158631416 100644 > --- a/src/box/sql/expr.c > +++ b/src/box/sql/expr.c> @@ -3325,29 +3326,53 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg, > if (is_neg) > i = -i; > sqlVdbeAddOp2(v, OP_Integer, i, mem); > + return; > + } > + int64_t value; > + const char *z = expr->u.zToken; > + assert(z != NULL); > + const char *sign = is_neg ? "-" : ""; > + if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X')) { > + uint64_t u = 0; > + int i, k; > + for (i = 2; z[i] == '0'; i++); > + for (k = i; sqlIsxdigit(z[k]); k++) > + u = u * 16 + sqlHexToInt(z[k]); 1. Why not to use strtoll and strtol? They accept 'base' argument. Set it to 16, and they will do the same, but probably faster. > + memcpy(&value, &u, 8); > + if (z[k] != 0 || k - i > 16) { 2. You treat tail spaces as an error. Not sure if it is right. > + diag_set(ClientError, ER_HEX_LITERAL_MAX, sign, z, > + strlen(z) - 2, 16); > + parse->is_aborted = true; > + return; > + } > } else { > - int64_t value; > - const char *z = expr->u.zToken; > - assert(z != NULL); > - int c = sql_dec_or_hex_to_i64(z, &value); > - if (c == 1 || (c == 2 && !is_neg) || > - (is_neg && value == SMALLEST_INT64)) { > - const char *sign = is_neg ? "-" : ""; > - if (sql_strnicmp(z, "0x", 2) == 0) { > - diag_set(ClientError, ER_HEX_LITERAL_MAX, sign, > - z, strlen(z) - 2, 16); > - } else { > - diag_set(ClientError, ER_INT_LITERAL_MAX, sign, > - z, INT64_MIN, INT64_MAX); > - } > + size_t len = strlen(z); > + if (is_neg) { > + /* > + * UINT64_MAX is constant, so each integer > + * literal certainly contains less digits. > + * It makes no sense to continue processing > + * string if it contains more characters. > + */ > + if (len > UINT64_MAX_DIGIT_COUNT) > + goto int_overflow; > + char *neg_z = tt_static_buf(); > + neg_z[0] = '-'; > + memcpy(neg_z + 1, z, len++); > + neg_z[len]= '\0'; > + z = neg_z; > + } 3. This looks like a crutch. Why do you need strlen() and number of digits here? Firstly, sql_atoi64 already returns an error in case of too long integer. Secondly, if you wanted to add '-', it could be done much faster - make 'value = -value;' in case of 'neg' with a preliminary check that the value is not too big. Will it work? > + bool unused; > + int rc = sql_atoi64(z, &value, &unused, len); > + if (rc != 0) { > +int_overflow: > + diag_set(ClientError, ER_INT_LITERAL_MAX, z, INT64_MIN, > + INT64_MAX); > parse->is_aborted = true; > - } else { > - if (is_neg) > - value = c == 2 ? SMALLEST_INT64 : -value; > - sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0, > - (u8 *)&value, P4_INT64); > + return; > } > } > + sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0, (u8 *)&value, P4_INT64); > }> diff --git a/src/box/sql/util.c b/src/box/sql/util.c > index d5c93f8aa..2e7c298e1 100644 > --- a/src/box/sql/util.c > +++ b/src/box/sql/util.c > @@ -593,120 +593,42 @@ sqlAtoF(const char *z, double *pResult, int length) > int > -sql_atoi64(const char *z, int64_t *val, int length) > +sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length) > { > - int incr = 1; > - u64 u = 0; > - int neg = 0; /* assume positive */ > - int i; > - int c = 0; > - int nonNum = 0; /* True if input contains UTF16 with high byte non-zero */ > - const char *zStart; > - const char *zEnd = z + length; > - incr = 1; > - while (z < zEnd && sqlIsspace(*z)) > - z += incr; > - if (z < zEnd) { > - if (*z == '-') { > - neg = 1; > - z += incr; > - } else if (*z == '+') { > - z += incr; > - } > - } > - zStart = z; > - /* Skip leading zeros. */ > - while (z < zEnd && z[0] == '0') { > - z += incr; > - } > - for (i = 0; &z[i] < zEnd && (c = z[i]) >= '0' && c <= '9'; > - i += incr) { > - u = u * 10 + c - '0'; > - } > - if (u > LARGEST_INT64) { > - *val = neg ? SMALLEST_INT64 : LARGEST_INT64; > - } else if (neg) { > - *val = -(i64) u; > + *is_neg = false; > + const char *str_end = z + length; > + for (; z < str_end && sqlIsspace(*z); z++) > + /* invalid format */ 4. Usually we start sentences from capital letters and finish with dots. > + if (z >= str_end) > + return -1; > + if (*z == '-') > + *is_neg = true; > + > + char* end = NULL; > + errno = 0; > + if (*is_neg) { 5. I don't like this double check. 4 lines above you already know if the value is negative, but you double check it here. > + *val = strtoll(z, &end, 10); > } else { > - *val = (i64) u; > + uint64_t u_val = strtoull(z, &end, 10); > + if (u_val > INT64_MAX) > + return -1; > + *val = u_val; > } > - if (&z[i] < zEnd || (i == 0 && zStart == z) || i > 19 * incr || > - nonNum) { > - /* zNum is empty or contains non-numeric text or is longer > - * than 19 digits (thus guaranteeing that it is too large) > - */ > - return 1; > - } else if (i < 19 * incr) { > - /* Less than 19 digits, so we know that it fits in 64 bits */ > - assert(u <= LARGEST_INT64); > - return 0; > - } else { > - /* zNum is a 19-digit numbers. Compare it against 9223372036854775808. */ > - c = compare2pow63(z, incr); > - if (c < 0) { > - /* zNum is less than 9223372036854775808 so it fits */ > - assert(u <= LARGEST_INT64); > - return 0; > - } else if (c > 0) { > - /* zNum is greater than 9223372036854775808 so it overflows */ > - return 1; > - } else { > - /* zNum is exactly 9223372036854775808. Fits if negative. The > - * special case 2 overflow if positive > - */ > - assert(u - 1 == LARGEST_INT64); > - return neg ? 0 : 2; > - } > - } > -} > + /* No digits were found, e.g. an empty string. */ > + if (end == z) > + return -1; 6. You have already checked emptiness above with this condition: 'if (z >= str_end)'. > + /* > + * String has been parsed, but there's > + * additional character(s) remained. > + */ > + if (*end != '\0') > + return -1; 7. What if other symbols are spaces? > + /* Overflow and underflow errors. */ > + if (errno != 0) > + return -1; 8. You didn't remove 'sql_atoi64() == SQL_OK' in some places. Consider my review fixes here and on the branch in a separate commit. These changes and the comments are arguable, probably I am wrong somewhere. ============================================================= diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index 158631416..0df6c16d1 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -3363,8 +3363,7 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg, z = neg_z; } bool unused; - int rc = sql_atoi64(z, &value, &unused, len); - if (rc != 0) { + if (sql_atoi64(z, &value, &unused, len) != 0) { int_overflow: diag_set(ClientError, ER_INT_LITERAL_MAX, z, INT64_MIN, INT64_MAX); diff --git a/src/box/sql/util.c b/src/box/sql/util.c index 2e7c298e1..de469ec36 100644 --- a/src/box/sql/util.c +++ b/src/box/sql/util.c @@ -38,6 +38,7 @@ */ #include "sqlInt.h" #include +#include #if HAVE_ISNAN || SQL_HAVE_ISNAN #include #endif @@ -596,38 +597,29 @@ sqlAtoF(const char *z, double *pResult, int length) int sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length) { - *is_neg = false; const char *str_end = z + length; - for (; z < str_end && sqlIsspace(*z); z++) - /* invalid format */ + for (; z < str_end && isspace(*z); z++) if (z >= str_end) return -1; - if (*z == '-') - *is_neg = true; - - char* end = NULL; + char *end = NULL; errno = 0; - if (*is_neg) { + if (*z == '-') { + *is_neg = true; *val = strtoll(z, &end, 10); } else { + *is_neg = false; uint64_t u_val = strtoull(z, &end, 10); if (u_val > INT64_MAX) return -1; *val = u_val; } - /* No digits were found, e.g. an empty string. */ - if (end == z) - return -1; - /* - * String has been parsed, but there's - * additional character(s) remained. - */ - if (*end != '\0') - return -1; /* Overflow and underflow errors. */ if (errno != 0) return -1; - + for (; *end != 0; ++end) { + if (! isspace(*end)) + return -1; + } return 0; } diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c index 6ecdb26fc..11a92372b 100644 --- a/src/box/sql/vdbe.c +++ b/src/box/sql/vdbe.c @@ -386,7 +386,7 @@ static u16 SQL_NOINLINE computeNumericType(Mem *pMem) if (sqlAtoF(pMem->z, &pMem->u.r, pMem->n)==0) return 0; bool is_neg; - if (sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, &is_neg, pMem->n)==SQL_OK) + if (sql_atoi64(pMem->z, &pMem->u.i, &is_neg, pMem->n) == 0) return MEM_Int; return MEM_Real; } diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c index 3a361d066..503f199f1 100644 --- a/src/box/sql/vdbemem.c +++ b/src/box/sql/vdbemem.c @@ -467,7 +467,7 @@ sqlVdbeIntValue(Mem * pMem, int64_t *i) } else if (flags & (MEM_Str)) { assert(pMem->z || pMem->n == 0); bool is_neg; - if (sql_atoi64(pMem->z, (int64_t *)i, &is_neg, pMem->n) == 0) + if (sql_atoi64(pMem->z, i, &is_neg, pMem->n) == 0) return 0; } return -1; @@ -588,8 +588,8 @@ sqlVdbeMemNumerify(Mem * pMem) if ((pMem->flags & (MEM_Int | MEM_Real | MEM_Null)) == 0) { assert((pMem->flags & (MEM_Blob | MEM_Str)) != 0); bool is_neg; - if (0 == sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, &is_neg, - pMem->n)) { + if (sql_atoi64(pMem->z, &pMem->u.i, &is_neg, + pMem->n) == 0) { MemSetTypeFlag(pMem, MEM_Int); } else { double v; @@ -655,8 +655,7 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type) return SQL_OK; if ((pMem->flags & MEM_Blob) != 0 && type == FIELD_TYPE_NUMBER) { bool is_neg; - if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, &is_neg, - pMem->n) == 0) { + if (sql_atoi64(pMem->z, &pMem->u.i, &is_neg, pMem->n) == 0) { MemSetTypeFlag(pMem, MEM_Real); pMem->u.r = pMem->u.i; return 0; @@ -688,7 +687,7 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type) case FIELD_TYPE_INTEGER: if ((pMem->flags & MEM_Blob) != 0) { bool is_neg; - if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, &is_neg, + if (sql_atoi64(pMem->z, &pMem->u.i, &is_neg, pMem->n) != 0) return -1; MemSetTypeFlag(pMem, MEM_Int);