[tarantool-patches] [PATCH 1/6] sql: refactor sql_atoi64()

Nikita Pettik korablev at tarantool.org
Fri Jun 7 18:37:41 MSK 2019


We are going to allow using unsigned values in SQL and extend range of
INTEGER type. Hence, we should be able to parse and operate on integers
in range of [2^63, 2^64 - 1]. Current mechanism which involves
sql_atoi64() function doesn't allow this.

Let's refactor this function: firstly, get rid of manual parsing and use
strtoll() and strtoull() functions from standard library. Then, let's
return sign of parsed literal. In case of success now function returns 0,
-1 otherwise.

This patch also inlines sql_dec_or_hex_to_i64() to place of its only
usage: it makes code cleaner and more straightforward.

Needed for #3810
Needed for #4015
---
 src/box/errcode.h      |   2 +-
 src/box/sql/expr.c     |  65 ++++++++++++++++-------
 src/box/sql/sqlInt.h   |  47 ++++-------------
 src/box/sql/sqlLimit.h |   9 ++++
 src/box/sql/util.c     | 140 +++++++++++--------------------------------------
 src/box/sql/vdbe.c     |   6 ++-
 src/box/sql/vdbemem.c  |  14 +++--
 7 files changed, 110 insertions(+), 173 deletions(-)

diff --git a/src/box/errcode.h b/src/box/errcode.h
index 9c15f3322..f4aba2f54 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -242,7 +242,7 @@ struct errcode_record {
 	/*187 */_(ER_SQL_ANALYZE_ARGUMENT,	"ANALYZE statement argument %s is not a base table") \
 	/*188 */_(ER_SQL_COLUMN_COUNT_MAX,	"Failed to create space '%s': space column count %d exceeds the limit (%d)") \
 	/*189 */_(ER_HEX_LITERAL_MAX,		"Hex literal %s%s length %d exceeds the supported limit (%d)") \
-	/*190 */_(ER_INT_LITERAL_MAX,		"Integer literal %s%s exceeds the supported range %lld - %lld") \
+	/*190 */_(ER_INT_LITERAL_MAX,		"Integer literal %s exceeds the supported range %lld - %lld") \
 	/*191 */_(ER_SQL_PARSER_LIMIT,		"%s %d exceeds the limit (%d)") \
 	/*192 */_(ER_INDEX_DEF_UNSUPPORTED,	"%s are prohibited in an index definition") \
 	/*193 */_(ER_CK_DEF_UNSUPPORTED,	"%s are prohibited in a CHECK constraint definition") \
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index ba7eea59d..158631416 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -1196,7 +1196,8 @@ sqlExprAssignVarNumber(Parse * pParse, Expr * pExpr, u32 n)
 			 * variable number
 			 */
 			int64_t i;
-			bool is_ok = 0 == sql_atoi64(&z[1], &i, n - 1);
+			bool is_neg;
+			bool is_ok = 0 == sql_atoi64(&z[1], &i, &is_neg, n - 1);
 			x = (ynVar) i;
 			testcase(i == 0);
 			testcase(i == 1);
@@ -3325,29 +3326,53 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
 		if (is_neg)
 			i = -i;
 		sqlVdbeAddOp2(v, OP_Integer, i, mem);
+		return;
+	}
+	int64_t value;
+	const char *z = expr->u.zToken;
+	assert(z != NULL);
+	const char *sign = is_neg ? "-" : "";
+	if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X')) {
+		uint64_t u = 0;
+		int i, k;
+		for (i = 2; z[i] == '0'; i++);
+		for (k = i; sqlIsxdigit(z[k]); k++)
+			u = u * 16 + sqlHexToInt(z[k]);
+		memcpy(&value, &u, 8);
+		if (z[k] != 0 || k - i > 16) {
+			diag_set(ClientError, ER_HEX_LITERAL_MAX, sign, z,
+				 strlen(z) - 2, 16);
+			parse->is_aborted = true;
+			return;
+		}
 	} else {
-		int64_t value;
-		const char *z = expr->u.zToken;
-		assert(z != NULL);
-		int c = sql_dec_or_hex_to_i64(z, &value);
-		if (c == 1 || (c == 2 && !is_neg) ||
-		    (is_neg && value == SMALLEST_INT64)) {
-			const char *sign = is_neg ? "-" : "";
-			if (sql_strnicmp(z, "0x", 2) == 0) {
-				diag_set(ClientError, ER_HEX_LITERAL_MAX, sign,
-					 z, strlen(z) - 2, 16);
-			} else {
-				diag_set(ClientError, ER_INT_LITERAL_MAX, sign,
-					 z, INT64_MIN, INT64_MAX);
-			}
+		size_t len = strlen(z);
+		if (is_neg) {
+			/*
+			 * UINT64_MAX is constant, so each integer
+			 * literal certainly contains less digits.
+			 * It makes no sense to continue processing
+			 * string if it contains more characters.
+			 */
+			if (len > UINT64_MAX_DIGIT_COUNT)
+				goto int_overflow;
+			char *neg_z = tt_static_buf();
+			neg_z[0] = '-';
+			memcpy(neg_z + 1, z, len++);
+			neg_z[len]= '\0';
+			z = neg_z;
+		}
+		bool unused;
+		int rc = sql_atoi64(z, &value, &unused, len);
+		if (rc != 0) {
+int_overflow:
+			diag_set(ClientError, ER_INT_LITERAL_MAX, z, INT64_MIN,
+				 INT64_MAX);
 			parse->is_aborted = true;
-		} else {
-			if (is_neg)
-				value = c == 2 ? SMALLEST_INT64 : -value;
-			sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0,
-					      (u8 *)&value, P4_INT64);
+			return;
 		}
 	}
+	sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0, (u8 *)&value, P4_INT64);
 }
 
 /*
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 997a46535..da17e24ed 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -4405,52 +4405,25 @@ field_type_sequence_dup(struct Parse *parse, enum field_type *types,
 			uint32_t len);
 
 /**
- * Convert z to a 64-bit signed integer.  z must be decimal. This
- * routine does *not* accept hexadecimal notation.
- *
- * If the z value is representable as a 64-bit twos-complement
- * integer, then write that value into *val and return 0.
- *
- * If z is exactly 9223372036854775808, return 2.  This special
- * case is broken out because while 9223372036854775808 cannot be
- * a signed 64-bit integer, its negative -9223372036854775808 can
- * be.
- *
- * If z is too big for a 64-bit integer and is not
- * 9223372036854775808  or if z contains any non-numeric text,
- * then return 1.
+ * Convert z to a 64-bit signed or unsigned integer.
+ * z must be decimal. This routine does *not* accept
+ * hexadecimal notation. Under the hood it calls
+ * strtoll or strtoull depending on presence of '-' char.
  *
  * length is the number of bytes in the string (bytes, not
  * characters). The string is not necessarily zero-terminated.
- * The encoding is given by enc.
  *
  * @param z String being parsed.
  * @param[out] val Output integer value.
+ * @param[out] is_neg Sign of the result.
  * @param length String length in bytes.
- * @retval
- *     0    Successful transformation.  Fits in a 64-bit signed
- *          integer.
- *     1    Integer too large for a 64-bit signed integer or is
- *          malformed
- *     2    Special case of 9223372036854775808
- */
-int
-sql_atoi64(const char *z, int64_t *val, int length);
-
-/**
- * Transform a UTF-8 integer literal, in either decimal or
- * hexadecimal, into a 64-bit signed integer.  This routine
- * accepts hexadecimal literals, whereas sql_atoi64() does not.
- *
- * @param z Literal being parsed.
- * @param[out] val Parsed value.
- * @retval
- *     0    Successful transformation.  Fits in a 64-bit signed integer.
- *     1    Integer too large for a 64-bit signed integer or is malformed
- *     2    Special case of 9223372036854775808
+ * @retval 0 Successful transformation. Fits in a 64-bit signed
+ *           or unsigned integer.
+ * @retval -1 Error during parsing: it contains non-digit
+ *            characters or it doesn't fit into 64-bit int.
  */
 int
-sql_dec_or_hex_to_i64(const char *z, int64_t *val);
+sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length);
 
 void sqlErrorWithMsg(sql *, int, const char *, ...);
 void sqlError(sql *, int);
diff --git a/src/box/sql/sqlLimit.h b/src/box/sql/sqlLimit.h
index 53dbe1505..1c3038ed3 100644
--- a/src/box/sql/sqlLimit.h
+++ b/src/box/sql/sqlLimit.h
@@ -44,6 +44,15 @@ enum {
 	SQL_BIND_PARAMETER_MAX = 65000,
 };
 
+enum {
+	/**
+	 * Each unsigned (ergo signed) contains less than
+	 * 21 characters. It allows us to prevent from
+	 * parsing oversized literals and raise an error.
+	 */
+	UINT64_MAX_DIGIT_COUNT = 21
+};
+
 /*
  * The maximum length of a TEXT or BLOB in bytes.   This also
  * limits the size of a row in a table or index.
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index d5c93f8aa..2e7c298e1 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -593,120 +593,42 @@ sqlAtoF(const char *z, double *pResult, int length)
 	return z == zEnd && nDigits > 0 && eValid && nonNum == 0;
 }
 
-/*
- * Compare the 19-character string zNum against the text representation
- * value 2^63:  9223372036854775808.  Return negative, zero, or positive
- * if zNum is less than, equal to, or greater than the string.
- * Note that zNum must contain exactly 19 characters.
- *
- * Unlike memcmp() this routine is guaranteed to return the difference
- * in the values of the last digit if the only difference is in the
- * last digit.  So, for example,
- *
- *      compare2pow63("9223372036854775800", 1)
- *
- * will return -8.
- */
-static int
-compare2pow63(const char *zNum, int incr)
-{
-	int c = 0;
-	int i;
-	/* 012345678901234567 */
-	const char *pow63 = "922337203685477580";
-	for (i = 0; c == 0 && i < 18; i++) {
-		c = (zNum[i * incr] - pow63[i]) * 10;
-	}
-	if (c == 0) {
-		c = zNum[18 * incr] - '8';
-		testcase(c == (-1));
-		testcase(c == 0);
-		testcase(c == (+1));
-	}
-	return c;
-}
-
 int
-sql_atoi64(const char *z, int64_t *val, int length)
+sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length)
 {
-	int incr = 1;
-	u64 u = 0;
-	int neg = 0;		/* assume positive */
-	int i;
-	int c = 0;
-	int nonNum = 0;		/* True if input contains UTF16 with high byte non-zero */
-	const char *zStart;
-	const char *zEnd = z + length;
-	incr = 1;
-	while (z < zEnd && sqlIsspace(*z))
-		z += incr;
-	if (z < zEnd) {
-		if (*z == '-') {
-			neg = 1;
-			z += incr;
-		} else if (*z == '+') {
-			z += incr;
-		}
-	}
-	zStart = z;
-	/* Skip leading zeros. */
-	while (z < zEnd && z[0] == '0') {
-		z += incr;
-	}
-	for (i = 0; &z[i] < zEnd && (c = z[i]) >= '0' && c <= '9';
-	     i += incr) {
-		u = u * 10 + c - '0';
-	}
-	if (u > LARGEST_INT64) {
-		*val = neg ? SMALLEST_INT64 : LARGEST_INT64;
-	} else if (neg) {
-		*val = -(i64) u;
+	*is_neg = false;
+	const char *str_end = z + length;
+	for (; z < str_end && sqlIsspace(*z); z++)
+	/* invalid format */
+	if (z >= str_end)
+		return -1;
+	if (*z == '-')
+		*is_neg = true;
+
+	char* end = NULL;
+	errno = 0;
+	if (*is_neg) {
+		*val = strtoll(z, &end, 10);
 	} else {
-		*val = (i64) u;
+		uint64_t u_val = strtoull(z, &end, 10);
+		if (u_val > INT64_MAX)
+			return -1;
+		*val = u_val;
 	}
-	if (&z[i] < zEnd || (i == 0 && zStart == z) || i > 19 * incr ||
-	    nonNum) {
-		/* zNum is empty or contains non-numeric text or is longer
-		 * than 19 digits (thus guaranteeing that it is too large)
-		 */
-		return 1;
-	} else if (i < 19 * incr) {
-		/* Less than 19 digits, so we know that it fits in 64 bits */
-		assert(u <= LARGEST_INT64);
-		return 0;
-	} else {
-		/* zNum is a 19-digit numbers.  Compare it against 9223372036854775808. */
-		c = compare2pow63(z, incr);
-		if (c < 0) {
-			/* zNum is less than 9223372036854775808 so it fits */
-			assert(u <= LARGEST_INT64);
-			return 0;
-		} else if (c > 0) {
-			/* zNum is greater than 9223372036854775808 so it overflows */
-			return 1;
-		} else {
-			/* zNum is exactly 9223372036854775808.  Fits if negative.  The
-			 * special case 2 overflow if positive
-			 */
-			assert(u - 1 == LARGEST_INT64);
-			return neg ? 0 : 2;
-		}
-	}
-}
+	/* No digits were found, e.g. an empty string. */
+	if (end == z)
+		return -1;
+	/*
+	 * String has been parsed, but there's
+	 * additional character(s) remained.
+	 */
+	if (*end != '\0')
+		return -1;
+	/* Overflow and underflow errors. */
+	if (errno != 0)
+		return -1;
 
-int
-sql_dec_or_hex_to_i64(const char *z, int64_t *val)
-{
-	if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X')) {
-		uint64_t u = 0;
-		int i, k;
-		for (i = 2; z[i] == '0'; i++);
-		for (k = i; sqlIsxdigit(z[k]); k++)
-			u = u * 16 + sqlHexToInt(z[k]);
-		memcpy(val, &u, 8);
-		return (z[k] == 0 && k - i <= 16) ? 0 : 1;
-	}
-	return sql_atoi64(z, val, sqlStrlen30(z));
+	return 0;
 }
 
 /*
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index d083d3709..6ecdb26fc 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -279,7 +279,8 @@ mem_apply_numeric_type(struct Mem *record)
 	if ((record->flags & (MEM_Str | MEM_Int | MEM_Real)) != MEM_Str)
 		return -1;
 	int64_t integer_value;
-	if (sql_atoi64(record->z, &integer_value, record->n) == 0) {
+	bool is_neg;
+	if (sql_atoi64(record->z, &integer_value, &is_neg, record->n) == 0) {
 		record->u.i = integer_value;
 		MemSetTypeFlag(record, MEM_Int);
 		return 0;
@@ -384,7 +385,8 @@ static u16 SQL_NOINLINE computeNumericType(Mem *pMem)
 	assert((pMem->flags & (MEM_Str|MEM_Blob))!=0);
 	if (sqlAtoF(pMem->z, &pMem->u.r, pMem->n)==0)
 		return 0;
-	if (sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, pMem->n)==SQL_OK)
+	bool is_neg;
+	if (sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, &is_neg, pMem->n)==SQL_OK)
 		return MEM_Int;
 	return MEM_Real;
 }
diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
index 08b649926..3a361d066 100644
--- a/src/box/sql/vdbemem.c
+++ b/src/box/sql/vdbemem.c
@@ -466,7 +466,8 @@ sqlVdbeIntValue(Mem * pMem, int64_t *i)
 		return doubleToInt64(pMem->u.r, i);
 	} else if (flags & (MEM_Str)) {
 		assert(pMem->z || pMem->n == 0);
-		if (sql_atoi64(pMem->z, (int64_t *)i, pMem->n) == 0)
+		bool is_neg;
+		if (sql_atoi64(pMem->z, (int64_t *)i, &is_neg, pMem->n) == 0)
 			return 0;
 	}
 	return -1;
@@ -586,7 +587,9 @@ sqlVdbeMemNumerify(Mem * pMem)
 {
 	if ((pMem->flags & (MEM_Int | MEM_Real | MEM_Null)) == 0) {
 		assert((pMem->flags & (MEM_Blob | MEM_Str)) != 0);
-		if (0 == sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, pMem->n)) {
+		bool is_neg;
+		if (0 == sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, &is_neg,
+				    pMem->n)) {
 			MemSetTypeFlag(pMem, MEM_Int);
 		} else {
 			double v;
@@ -651,7 +654,9 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type)
 	if (pMem->flags & MEM_Null)
 		return SQL_OK;
 	if ((pMem->flags & MEM_Blob) != 0 && type == FIELD_TYPE_NUMBER) {
-		if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, pMem->n) == 0) {
+		bool is_neg;
+		if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, &is_neg,
+			       pMem->n) == 0) {
 			MemSetTypeFlag(pMem, MEM_Real);
 			pMem->u.r = pMem->u.i;
 			return 0;
@@ -682,7 +687,8 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type)
 		return -1;
 	case FIELD_TYPE_INTEGER:
 		if ((pMem->flags & MEM_Blob) != 0) {
-			if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i,
+			bool is_neg;
+			if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, &is_neg,
 				       pMem->n) != 0)
 				return -1;
 			MemSetTypeFlag(pMem, MEM_Int);
-- 
2.15.1





More information about the Tarantool-patches mailing list