[tarantool-patches] Re: [PATCH 1/6] sql: refactor sql_atoi64()
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Wed Jun 12 00:11:51 MSK 2019
Hi! Thanks for the patchset! See 8 comments below,
review fixes at the end of the email, and on the branch
in a separate commit.
On 07/06/2019 18:37, Nikita Pettik wrote:
> We are going to allow using unsigned values in SQL and extend range of
> INTEGER type. Hence, we should be able to parse and operate on integers
> in range of [2^63, 2^64 - 1]. Current mechanism which involves
> sql_atoi64() function doesn't allow this.
>
> Let's refactor this function: firstly, get rid of manual parsing and use
> strtoll() and strtoull() functions from standard library. Then, let's
> return sign of parsed literal.> In case of success now function returns 0,
> -1 otherwise.
>
> This patch also inlines sql_dec_or_hex_to_i64() to place of its only
> usage: it makes code cleaner and more straightforward.
>
> Needed for #3810
> Needed for #4015
> ---
> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
> index ba7eea59d..158631416 100644
> --- a/src/box/sql/expr.c
> +++ b/src/box/sql/expr.c> @@ -3325,29 +3326,53 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
> if (is_neg)
> i = -i;
> sqlVdbeAddOp2(v, OP_Integer, i, mem);
> + return;
> + }
> + int64_t value;
> + const char *z = expr->u.zToken;
> + assert(z != NULL);
> + const char *sign = is_neg ? "-" : "";
> + if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X')) {
> + uint64_t u = 0;
> + int i, k;
> + for (i = 2; z[i] == '0'; i++);
> + for (k = i; sqlIsxdigit(z[k]); k++)
> + u = u * 16 + sqlHexToInt(z[k]);
1. Why not to use strtoll and strtol? They accept 'base' argument.
Set it to 16, and they will do the same, but probably faster.
> + memcpy(&value, &u, 8);
> + if (z[k] != 0 || k - i > 16) {
2. You treat tail spaces as an error. Not sure if it is right.
> + diag_set(ClientError, ER_HEX_LITERAL_MAX, sign, z,
> + strlen(z) - 2, 16);
> + parse->is_aborted = true;
> + return;
> + }
> } else {
> - int64_t value;
> - const char *z = expr->u.zToken;
> - assert(z != NULL);
> - int c = sql_dec_or_hex_to_i64(z, &value);
> - if (c == 1 || (c == 2 && !is_neg) ||
> - (is_neg && value == SMALLEST_INT64)) {
> - const char *sign = is_neg ? "-" : "";
> - if (sql_strnicmp(z, "0x", 2) == 0) {
> - diag_set(ClientError, ER_HEX_LITERAL_MAX, sign,
> - z, strlen(z) - 2, 16);
> - } else {
> - diag_set(ClientError, ER_INT_LITERAL_MAX, sign,
> - z, INT64_MIN, INT64_MAX);
> - }
> + size_t len = strlen(z);
> + if (is_neg) {
> + /*
> + * UINT64_MAX is constant, so each integer
> + * literal certainly contains less digits.
> + * It makes no sense to continue processing
> + * string if it contains more characters.
> + */
> + if (len > UINT64_MAX_DIGIT_COUNT)
> + goto int_overflow;
> + char *neg_z = tt_static_buf();
> + neg_z[0] = '-';
> + memcpy(neg_z + 1, z, len++);
> + neg_z[len]= '\0';
> + z = neg_z;
> + }
3. This looks like a crutch. Why do you need strlen() and number of
digits here? Firstly, sql_atoi64 already returns an error in case of
too long integer. Secondly, if you wanted to add '-', it could be done
much faster - make 'value = -value;' in case of 'neg' with a preliminary
check that the value is not too big. Will it work?
> + bool unused;
> + int rc = sql_atoi64(z, &value, &unused, len);
> + if (rc != 0) {
> +int_overflow:
> + diag_set(ClientError, ER_INT_LITERAL_MAX, z, INT64_MIN,
> + INT64_MAX);
> parse->is_aborted = true;
> - } else {
> - if (is_neg)
> - value = c == 2 ? SMALLEST_INT64 : -value;
> - sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0,
> - (u8 *)&value, P4_INT64);
> + return;
> }
> }
> + sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0, (u8 *)&value, P4_INT64);
> }> diff --git a/src/box/sql/util.c b/src/box/sql/util.c
> index d5c93f8aa..2e7c298e1 100644
> --- a/src/box/sql/util.c
> +++ b/src/box/sql/util.c
> @@ -593,120 +593,42 @@ sqlAtoF(const char *z, double *pResult, int length)
> int
> -sql_atoi64(const char *z, int64_t *val, int length)
> +sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length)
> {
> - int incr = 1;
> - u64 u = 0;
> - int neg = 0; /* assume positive */
> - int i;
> - int c = 0;
> - int nonNum = 0; /* True if input contains UTF16 with high byte non-zero */
> - const char *zStart;
> - const char *zEnd = z + length;
> - incr = 1;
> - while (z < zEnd && sqlIsspace(*z))
> - z += incr;
> - if (z < zEnd) {
> - if (*z == '-') {
> - neg = 1;
> - z += incr;
> - } else if (*z == '+') {
> - z += incr;
> - }
> - }
> - zStart = z;
> - /* Skip leading zeros. */
> - while (z < zEnd && z[0] == '0') {
> - z += incr;
> - }
> - for (i = 0; &z[i] < zEnd && (c = z[i]) >= '0' && c <= '9';
> - i += incr) {
> - u = u * 10 + c - '0';
> - }
> - if (u > LARGEST_INT64) {
> - *val = neg ? SMALLEST_INT64 : LARGEST_INT64;
> - } else if (neg) {
> - *val = -(i64) u;
> + *is_neg = false;
> + const char *str_end = z + length;
> + for (; z < str_end && sqlIsspace(*z); z++)
> + /* invalid format */
4. Usually we start sentences from capital letters and finish
with dots.
> + if (z >= str_end)
> + return -1;
> + if (*z == '-')
> + *is_neg = true;
> +
> + char* end = NULL;
> + errno = 0;
> + if (*is_neg) {
5. I don't like this double check. 4 lines above you already
know if the value is negative, but you double check it here.
> + *val = strtoll(z, &end, 10);
> } else {
> - *val = (i64) u;
> + uint64_t u_val = strtoull(z, &end, 10);
> + if (u_val > INT64_MAX)
> + return -1;
> + *val = u_val;
> }
> - if (&z[i] < zEnd || (i == 0 && zStart == z) || i > 19 * incr ||
> - nonNum) {
> - /* zNum is empty or contains non-numeric text or is longer
> - * than 19 digits (thus guaranteeing that it is too large)
> - */
> - return 1;
> - } else if (i < 19 * incr) {
> - /* Less than 19 digits, so we know that it fits in 64 bits */
> - assert(u <= LARGEST_INT64);
> - return 0;
> - } else {
> - /* zNum is a 19-digit numbers. Compare it against 9223372036854775808. */
> - c = compare2pow63(z, incr);
> - if (c < 0) {
> - /* zNum is less than 9223372036854775808 so it fits */
> - assert(u <= LARGEST_INT64);
> - return 0;
> - } else if (c > 0) {
> - /* zNum is greater than 9223372036854775808 so it overflows */
> - return 1;
> - } else {
> - /* zNum is exactly 9223372036854775808. Fits if negative. The
> - * special case 2 overflow if positive
> - */
> - assert(u - 1 == LARGEST_INT64);
> - return neg ? 0 : 2;
> - }
> - }
> -}
> + /* No digits were found, e.g. an empty string. */
> + if (end == z)
> + return -1;
6. You have already checked emptiness above with
this condition: 'if (z >= str_end)'.
> + /*
> + * String has been parsed, but there's
> + * additional character(s) remained.
> + */
> + if (*end != '\0')
> + return -1;
7. What if other symbols are spaces?
> + /* Overflow and underflow errors. */
> + if (errno != 0)
> + return -1;
8. You didn't remove 'sql_atoi64() == SQL_OK' in some
places.
Consider my review fixes here and on the branch in a
separate commit. These changes and the comments are arguable,
probably I am wrong somewhere.
=============================================================
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 158631416..0df6c16d1 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -3363,8 +3363,7 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
z = neg_z;
}
bool unused;
- int rc = sql_atoi64(z, &value, &unused, len);
- if (rc != 0) {
+ if (sql_atoi64(z, &value, &unused, len) != 0) {
int_overflow:
diag_set(ClientError, ER_INT_LITERAL_MAX, z, INT64_MIN,
INT64_MAX);
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index 2e7c298e1..de469ec36 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -38,6 +38,7 @@
*/
#include "sqlInt.h"
#include <stdarg.h>
+#include <ctype.h>
#if HAVE_ISNAN || SQL_HAVE_ISNAN
#include <math.h>
#endif
@@ -596,38 +597,29 @@ sqlAtoF(const char *z, double *pResult, int length)
int
sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length)
{
- *is_neg = false;
const char *str_end = z + length;
- for (; z < str_end && sqlIsspace(*z); z++)
- /* invalid format */
+ for (; z < str_end && isspace(*z); z++)
if (z >= str_end)
return -1;
- if (*z == '-')
- *is_neg = true;
-
- char* end = NULL;
+ char *end = NULL;
errno = 0;
- if (*is_neg) {
+ if (*z == '-') {
+ *is_neg = true;
*val = strtoll(z, &end, 10);
} else {
+ *is_neg = false;
uint64_t u_val = strtoull(z, &end, 10);
if (u_val > INT64_MAX)
return -1;
*val = u_val;
}
- /* No digits were found, e.g. an empty string. */
- if (end == z)
- return -1;
- /*
- * String has been parsed, but there's
- * additional character(s) remained.
- */
- if (*end != '\0')
- return -1;
/* Overflow and underflow errors. */
if (errno != 0)
return -1;
-
+ for (; *end != 0; ++end) {
+ if (! isspace(*end))
+ return -1;
+ }
return 0;
}
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index 6ecdb26fc..11a92372b 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -386,7 +386,7 @@ static u16 SQL_NOINLINE computeNumericType(Mem *pMem)
if (sqlAtoF(pMem->z, &pMem->u.r, pMem->n)==0)
return 0;
bool is_neg;
- if (sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, &is_neg, pMem->n)==SQL_OK)
+ if (sql_atoi64(pMem->z, &pMem->u.i, &is_neg, pMem->n) == 0)
return MEM_Int;
return MEM_Real;
}
diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
index 3a361d066..503f199f1 100644
--- a/src/box/sql/vdbemem.c
+++ b/src/box/sql/vdbemem.c
@@ -467,7 +467,7 @@ sqlVdbeIntValue(Mem * pMem, int64_t *i)
} else if (flags & (MEM_Str)) {
assert(pMem->z || pMem->n == 0);
bool is_neg;
- if (sql_atoi64(pMem->z, (int64_t *)i, &is_neg, pMem->n) == 0)
+ if (sql_atoi64(pMem->z, i, &is_neg, pMem->n) == 0)
return 0;
}
return -1;
@@ -588,8 +588,8 @@ sqlVdbeMemNumerify(Mem * pMem)
if ((pMem->flags & (MEM_Int | MEM_Real | MEM_Null)) == 0) {
assert((pMem->flags & (MEM_Blob | MEM_Str)) != 0);
bool is_neg;
- if (0 == sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, &is_neg,
- pMem->n)) {
+ if (sql_atoi64(pMem->z, &pMem->u.i, &is_neg,
+ pMem->n) == 0) {
MemSetTypeFlag(pMem, MEM_Int);
} else {
double v;
@@ -655,8 +655,7 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type)
return SQL_OK;
if ((pMem->flags & MEM_Blob) != 0 && type == FIELD_TYPE_NUMBER) {
bool is_neg;
- if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, &is_neg,
- pMem->n) == 0) {
+ if (sql_atoi64(pMem->z, &pMem->u.i, &is_neg, pMem->n) == 0) {
MemSetTypeFlag(pMem, MEM_Real);
pMem->u.r = pMem->u.i;
return 0;
@@ -688,7 +687,7 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type)
case FIELD_TYPE_INTEGER:
if ((pMem->flags & MEM_Blob) != 0) {
bool is_neg;
- if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, &is_neg,
+ if (sql_atoi64(pMem->z, &pMem->u.i, &is_neg,
pMem->n) != 0)
return -1;
MemSetTypeFlag(pMem, MEM_Int);
More information about the Tarantool-patches
mailing list