[tarantool-patches] Re: [PATCH 1/6] sql: refactor sql_atoi64()

n.pettik korablev at tarantool.org
Mon Jul 1 17:20:56 MSK 2019


>> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
>> index ba7eea59d..158631416 100644
>> --- a/src/box/sql/expr.c
>> +++ b/src/box/sql/expr.c> @@ -3325,29 +3326,53 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
>> 		if (is_neg)
>> 			i = -i;
>> 		sqlVdbeAddOp2(v, OP_Integer, i, mem);
>> +		return;
>> +	}
>> +	int64_t value;
>> +	const char *z = expr->u.zToken;
>> +	assert(z != NULL);
>> +	const char *sign = is_neg ? "-" : "";
>> +	if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X')) {
>> +		uint64_t u = 0;
>> +		int i, k;
>> +		for (i = 2; z[i] == '0'; i++);
>> +		for (k = i; sqlIsxdigit(z[k]); k++)
>> +			u = u * 16 + sqlHexToInt(z[k]);
> 
> 1. Why not to use strtoll and strtol? They accept 'base' argument.
> Set it to 16, and they will do the same, but probably faster.

NP:

diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 158631416..182950e9f 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -3333,13 +3333,16 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
        assert(z != NULL);
        const char *sign = is_neg ? "-" : "";
        if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X')) {
-               uint64_t u = 0;
-               int i, k;
-               for (i = 2; z[i] == '0'; i++);
-               for (k = i; sqlIsxdigit(z[k]); k++)
-                       u = u * 16 + sqlHexToInt(z[k]);
-               memcpy(&value, &u, 8);
-               if (z[k] != 0 || k - i > 16) {
+               errno = 0;
+               char *end = NULL;
+               if (is_neg) {
+                       value = strtoll(z, &end, 16);
+               } else {
+                       value = strtoull(z, &end, 16);
+                       if (value > INT64_MAX)
+                               goto int_overflow;
+               }
+               if (errno != 0) {
                        diag_set(ClientError, ER_HEX_LITERAL_MAX, sign, z,
                                 strlen(z) - 2, 16);
                        parse->is_aborted = true;

> 
>> +		memcpy(&value, &u, 8);
>> +		if (z[k] != 0 || k - i > 16) {
> 
> 2. You treat tail spaces as an error. Not sure if it is right.

This code has been removed (see fix above).

> 
>> +			diag_set(ClientError, ER_HEX_LITERAL_MAX, sign, z,
>> +				 strlen(z) - 2, 16);
>> +			parse->is_aborted = true;
>> +			return;
>> +		}
>> 	} else {
>> -		int64_t value;
>> -		const char *z = expr->u.zToken;
>> -		assert(z != NULL);
>> -		int c = sql_dec_or_hex_to_i64(z, &value);
>> -		if (c == 1 || (c == 2 && !is_neg) ||
>> -		    (is_neg && value == SMALLEST_INT64)) {
>> -			const char *sign = is_neg ? "-" : "";
>> -			if (sql_strnicmp(z, "0x", 2) == 0) {
>> -				diag_set(ClientError, ER_HEX_LITERAL_MAX, sign,
>> -					 z, strlen(z) - 2, 16);
>> -			} else {
>> -				diag_set(ClientError, ER_INT_LITERAL_MAX, sign,
>> -					 z, INT64_MIN, INT64_MAX);
>> -			}
>> +		size_t len = strlen(z);
>> +		if (is_neg) {
>> +			/*
>> +			 * UINT64_MAX is constant, so each integer
>> +			 * literal certainly contains less digits.
>> +			 * It makes no sense to continue processing
>> +			 * string if it contains more characters.
>> +			 */
>> +			if (len > UINT64_MAX_DIGIT_COUNT)
>> +				goto int_overflow;
>> +			char *neg_z = tt_static_buf();
>> +			neg_z[0] = '-';
>> +			memcpy(neg_z + 1, z, len++);
>> +			neg_z[len]= '\0';
>> +			z = neg_z;
>> +		}
> 
> 3. This looks like a crutch. Why do you need strlen() and number of
> digits here? Firstly, sql_atoi64 already returns an error in case of
> too long integer. Secondly, if you wanted to add '-', it could be done
> much faster - make 'value = -value;' in case of 'neg' with a preliminary
> check that the value is not too big. Will it work?

It will, but we have to allow parsing in sql_atoi() positive values up to
INT64_MAX + 1. So, several tests fail with this change (sql-tap/func.test.lua and
sql/integer-overflow.test.lua). It is not critical, since in the next patch we are
going to allow using integers up to UINT64_MAX. Hence, I don’t think this
deserves any workaround, so I simply fixed tests. Diff:

--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -242,7 +242,7 @@ struct errcode_record {
        /*187 */_(ER_SQL_ANALYZE_ARGUMENT,      "ANALYZE statement argument %s is not a base table") \
        /*188 */_(ER_SQL_COLUMN_COUNT_MAX,      "Failed to create space '%s': space column count %d exceeds the limit (%d)") \
        /*189 */_(ER_HEX_LITERAL_MAX,           "Hex literal %s%s length %d exceeds the supported limit (%d)") \
-       /*190 */_(ER_INT_LITERAL_MAX,           "Integer literal %s exceeds the supported range %lld - %lld") \
+       /*190 */_(ER_INT_LITERAL_MAX,           "Integer literal %s%s exceeds the supported range %lld - %lld") \
        /*191 */_(ER_SQL_PARSER_LIMIT,          "%s %d exceeds the limit (%d)") \
        /*192 */_(ER_INDEX_DEF_UNSUPPORTED,     "%s are prohibited in an index definition") \
        /*193 */_(ER_CK_DEF_UNSUPPORTED,        "%s are prohibited in a CHECK constraint definition") \
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 158631416..4bb5ac180 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -3347,31 +3350,19 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
                }
        } else {
                size_t len = strlen(z);
-               if (is_neg) {
-                       /*
-                        * UINT64_MAX is constant, so each integer
-                        * literal certainly contains less digits.
-                        * It makes no sense to continue processing
-                        * string if it contains more characters.
-                        */
-                       if (len > UINT64_MAX_DIGIT_COUNT)
-                               goto int_overflow;
-                       char *neg_z = tt_static_buf();
-                       neg_z[0] = '-';
-                       memcpy(neg_z + 1, z, len++);
-                       neg_z[len]= '\0';
-                       z = neg_z;
-               }
                bool unused;
-               int rc = sql_atoi64(z, &value, &unused, len);
-               if (rc != 0) {
+               if (sql_atoi64(z, &value, &unused, len) != 0 ||
+                   (is_neg && (uint64_t) value > (uint64_t) INT64_MAX + 1) ||
+                   (!is_neg && (uint64_t) value > INT64_MAX)) {
 int_overflow:
-                       diag_set(ClientError, ER_INT_LITERAL_MAX, z, INT64_MIN,
-                                INT64_MAX);
+                       diag_set(ClientError, ER_INT_LITERAL_MAX, sign, z,
+                                INT64_MIN, INT64_MAX);
                        parse->is_aborted = true;
                        return;
                }
        }
+       if (is_neg)
+               value = -value;
        sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0, (u8 *)&value, P4_INT64);
 }
 
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index 2e7c298e1..a2340f001 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -611,7 +611,7 @@ sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length)
                *val = strtoll(z, &end, 10);
        } else {
                uint64_t u_val = strtoull(z, &end, 10);
-               if (u_val > INT64_MAX)
+               if (u_val > (uint64_t) INT64_MAX + 1)
                        return -1;
                *val = u_val;
        }

diff --git a/test/sql-tap/func.test.lua b/test/sql-tap/func.test.lua
index 09b1cf967..6cb14f3a3 100755
--- a/test/sql-tap/func.test.lua
+++ b/test/sql-tap/func.test.lua
@@ -912,25 +912,25 @@ test:do_execsql_test(
         -- </func-8.6>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func-8.7",
     [[
         SELECT typeof(sum(x)) FROM (SELECT '9223372036' || '854775808' AS x
                             UNION ALL SELECT -9223372036854775807)
     ]], {
         -- <func-8.7>
-        "number"
+        1, "Failed to execute SQL statement: integer overflow"
         -- </func-8.7>
     })
 
-test:do_execsql_test(
+test:do_catchsql_test(
     "func-8.8",
     [[
         SELECT sum(x)>0.0 FROM (SELECT '9223372036' || '854775808' AS x
                             UNION ALL SELECT -9223372036850000000)
     ]], {
         -- <func-8.8>
-        1
+        1, "Failed to execute SQL statement: integer overflow"
         -- </func-8.8>
     })
 
diff --git a/test/sql/integer-overflow.result b/test/sql/integer-overflow.result
index d6ca66ec9..a4944d1a0 100644
--- a/test/sql/integer-overflow.result
+++ b/test/sql/integer-overflow.result
@@ -48,7 +48,11 @@ box.execute('SELECT 9223372036854775808 - 1;')
 --
 box.execute('SELECT CAST(\'9223372036854775808\' AS INTEGER);')
 ---
-- error: 'Type mismatch: can not convert 9223372036854775808 to integer'
+- metadata:
+  - name: CAST('9223372036854775808' AS INTEGER)
+    type: integer
+  rows:
+  - [-9223372036854775808]
 ...
 -- Due to inexact represantation of large integers in terms of
 -- floating point numbers, numerics with value < INT64_MAX

>> }> diff --git a/src/box/sql/util.c b/src/box/sql/util.c
>> index d5c93f8aa..2e7c298e1 100644
>> --- a/src/box/sql/util.c
>> +++ b/src/box/sql/util.c
>> @@ -593,120 +593,42 @@ sqlAtoF(const char *z, double *pResult, int length)
>> int
>> -sql_atoi64(const char *z, int64_t *val, int length)
>> +sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length)
>> {
>> -	int incr = 1;
>> -	u64 u = 0;
>> -	int neg = 0;		/* assume positive */
>> -	int i;
>> -	int c = 0;
>> -	int nonNum = 0;		/* True if input contains UTF16 with high byte non-zero */
>> -	const char *zStart;
>> -	const char *zEnd = z + length;
>> -	incr = 1;
>> -	while (z < zEnd && sqlIsspace(*z))
>> -		z += incr;
>> -	if (z < zEnd) {
>> -		if (*z == '-') {
>> -			neg = 1;
>> -			z += incr;
>> -		} else if (*z == '+') {
>> -			z += incr;
>> -		}
>> -	}
>> -	zStart = z;
>> -	/* Skip leading zeros. */
>> -	while (z < zEnd && z[0] == '0') {
>> -		z += incr;
>> -	}
>> -	for (i = 0; &z[i] < zEnd && (c = z[i]) >= '0' && c <= '9';
>> -	     i += incr) {
>> -		u = u * 10 + c - '0';
>> -	}
>> -	if (u > LARGEST_INT64) {
>> -		*val = neg ? SMALLEST_INT64 : LARGEST_INT64;
>> -	} else if (neg) {
>> -		*val = -(i64) u;
>> +	*is_neg = false;
>> +	const char *str_end = z + length;
>> +	for (; z < str_end && sqlIsspace(*z); z++)
>> +	/* invalid format */
> 
> 4. Usually we start sentences from capital letters and finish
> with dots.

Refactored a bit whole function (almost applied your diff):

diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index 2e7c298e1..826e4a673 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -38,6 +38,7 @@
  */
 #include "sqlInt.h"
 #include <stdarg.h>
+#include <ctype.h>
 #if HAVE_ISNAN || SQL_HAVE_ISNAN
 #include <math.h>
 #endif
@@ -598,26 +599,24 @@ sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length)
 {
        *is_neg = false;
        const char *str_end = z + length;
-       for (; z < str_end && sqlIsspace(*z); z++)
-       /* invalid format */
+       for (; z < str_end && isspace(*z); z++);
        if (z >= str_end)
                return -1;
        if (*z == '-')
                *is_neg = true;
 
-       char* end = NULL;
+       char *end = NULL;
        errno = 0;
-       if (*is_neg) {
+       if (*z == '-') {
+               *is_neg = true;
                *val = strtoll(z, &end, 10);
        } else {
+               *is_neg = false;
                uint64_t u_val = strtoull(z, &end, 10);
-               if (u_val > INT64_MAX)
+               if (u_val > (uint64_t) INT64_MAX + 1)
                        return -1;
                *val = u_val;
        }
-       /* No digits were found, e.g. an empty string. */
-       if (end == z)
-               return -1;

>> +	if (z >= str_end)
>> +		return -1;
>> +	if (*z == '-')
>> +		*is_neg = true;
>> +
>> +	char* end = NULL;
>> +	errno = 0;
>> +	if (*is_neg) {
> 
> 5. I don't like this double check. 4 lines above you already
> know if the value is negative, but you double check it here.

Ok, see diff above.

>> +		*val = strtoll(z, &end, 10);
>> 	} else {
>> -		*val = (i64) u;
>> +		uint64_t u_val = strtoull(z, &end, 10);
>> +		if (u_val > INT64_MAX)
>> +			return -1;
>> +		*val = u_val;
>> 	}
>> -	if (&z[i] < zEnd || (i == 0 && zStart == z) || i > 19 * incr ||
>> -	    nonNum) {
>> -		/* zNum is empty or contains non-numeric text or is longer
>> -		 * than 19 digits (thus guaranteeing that it is too large)
>> -		 */
>> -		return 1;
>> -	} else if (i < 19 * incr) {
>> -		/* Less than 19 digits, so we know that it fits in 64 bits */
>> -		assert(u <= LARGEST_INT64);
>> -		return 0;
>> -	} else {
>> -		/* zNum is a 19-digit numbers.  Compare it against 9223372036854775808. */
>> -		c = compare2pow63(z, incr);
>> -		if (c < 0) {
>> -			/* zNum is less than 9223372036854775808 so it fits */
>> -			assert(u <= LARGEST_INT64);
>> -			return 0;
>> -		} else if (c > 0) {
>> -			/* zNum is greater than 9223372036854775808 so it overflows */
>> -			return 1;
>> -		} else {
>> -			/* zNum is exactly 9223372036854775808.  Fits if negative.  The
>> -			 * special case 2 overflow if positive
>> -			 */
>> -			assert(u - 1 == LARGEST_INT64);
>> -			return neg ? 0 : 2;
>> -		}
>> -	}
>> -}
>> +	/* No digits were found, e.g. an empty string. */
>> +	if (end == z)
>> +		return -1;
> 
> 6. You have already checked emptiness above with
> this condition: 'if (z >= str_end)’.

Indeed, see diff above.

>> +	/*
>> +	 * String has been parsed, but there's
>> +	 * additional character(s) remained.
>> +	 */
>> +	if (*end != '\0')
>> +		return -1;
> 
> 7. What if other symbols are spaces?

So, you are speaking about situation like:

SELECT CAST(“123    “ AS INTEGER);

According to ANSI cast is allowed in this case:

…
8) If TD is exact numeric, then
…
b) If SD is character string, then SV is replaced by SV with any leading or trailing s removed.

Hence, you are right, we should skip trailing space as well.
Applied your diff:

diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index 826e4a673..e73fe3f87 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -618,21 +618,18 @@ sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length)
                *val = u_val;
        }
-       /*
-        * String has been parsed, but there's
-        * additional character(s) remained.
-        */
-       if (*end != '\0')
-               return -1;
        /* Overflow and underflow errors. */
        if (errno != 0)
                return -1;
+       for (; *end != 0; ++end) {
+               if (! isspace(*end))
+                       return -1;
+	}
        return 0;
 }

>> +	/* Overflow and underflow errors. */
>> +	if (errno != 0)
>> +		return -1;
> 
> 8. You didn't remove 'sql_atoi64() == SQL_OK' in some
> places.

Thx, applied this part of your diff.

> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index 6ecdb26fc..11a92372b 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> @@ -386,7 +386,7 @@ static u16 SQL_NOINLINE computeNumericType(Mem *pMem)
> 	if (sqlAtoF(pMem->z, &pMem->u.r, pMem->n)==0)
> 		return 0;
> 	bool is_neg;
> -	if (sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, &is_neg, pMem->n)==SQL_OK)
> +	if (sql_atoi64(pMem->z, &pMem->u.i, &is_neg, pMem->n) == 0)

This cast is required.. Otherwise on linux compilation error is raised:

/tarantool/src/box/sql/vdbe.c:387:26: error: passing argument 2 of ‘sql_atoi64’ from incompatible pointer type [-Werror=incompatible-pointer-types]
  if (sql_atoi64(pMem->z, &pMem->u.i, &is_neg, pMem->n) == 0)
                          ^
In file included from /tarantool/src/box/sql/vdbe.c:47:0:
/tarantool/src/box/sql/sqlInt.h:4435:1: note: expected ‘int64_t * {aka long int *}’ but argument is of type ‘i64 * {aka long long int *}’
 sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length);





More information about the Tarantool-patches mailing list