Tarantool development patches archive
 help / color / mirror / Atom feed
From: Nikita Pettik <korablev@tarantool.org>
To: tarantool-patches@freelists.org
Cc: v.shpilevoy@tarantool.org, Nikita Pettik <korablev@tarantool.org>
Subject: [tarantool-patches] [PATCH 1/6] sql: refactor sql_atoi64()
Date: Fri,  7 Jun 2019 18:37:41 +0300	[thread overview]
Message-ID: <15f86ff05673055364c6a6bd11de4568e4c3854f.1559919361.git.korablev@tarantool.org> (raw)
In-Reply-To: <cover.1559919361.git.korablev@tarantool.org>
In-Reply-To: <cover.1559919361.git.korablev@tarantool.org>

We are going to allow using unsigned values in SQL and extend range of
INTEGER type. Hence, we should be able to parse and operate on integers
in range of [2^63, 2^64 - 1]. Current mechanism which involves
sql_atoi64() function doesn't allow this.

Let's refactor this function: firstly, get rid of manual parsing and use
strtoll() and strtoull() functions from standard library. Then, let's
return sign of parsed literal. In case of success now function returns 0,
-1 otherwise.

This patch also inlines sql_dec_or_hex_to_i64() to place of its only
usage: it makes code cleaner and more straightforward.

Needed for #3810
Needed for #4015
---
 src/box/errcode.h      |   2 +-
 src/box/sql/expr.c     |  65 ++++++++++++++++-------
 src/box/sql/sqlInt.h   |  47 ++++-------------
 src/box/sql/sqlLimit.h |   9 ++++
 src/box/sql/util.c     | 140 +++++++++++--------------------------------------
 src/box/sql/vdbe.c     |   6 ++-
 src/box/sql/vdbemem.c  |  14 +++--
 7 files changed, 110 insertions(+), 173 deletions(-)

diff --git a/src/box/errcode.h b/src/box/errcode.h
index 9c15f3322..f4aba2f54 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -242,7 +242,7 @@ struct errcode_record {
 	/*187 */_(ER_SQL_ANALYZE_ARGUMENT,	"ANALYZE statement argument %s is not a base table") \
 	/*188 */_(ER_SQL_COLUMN_COUNT_MAX,	"Failed to create space '%s': space column count %d exceeds the limit (%d)") \
 	/*189 */_(ER_HEX_LITERAL_MAX,		"Hex literal %s%s length %d exceeds the supported limit (%d)") \
-	/*190 */_(ER_INT_LITERAL_MAX,		"Integer literal %s%s exceeds the supported range %lld - %lld") \
+	/*190 */_(ER_INT_LITERAL_MAX,		"Integer literal %s exceeds the supported range %lld - %lld") \
 	/*191 */_(ER_SQL_PARSER_LIMIT,		"%s %d exceeds the limit (%d)") \
 	/*192 */_(ER_INDEX_DEF_UNSUPPORTED,	"%s are prohibited in an index definition") \
 	/*193 */_(ER_CK_DEF_UNSUPPORTED,	"%s are prohibited in a CHECK constraint definition") \
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index ba7eea59d..158631416 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -1196,7 +1196,8 @@ sqlExprAssignVarNumber(Parse * pParse, Expr * pExpr, u32 n)
 			 * variable number
 			 */
 			int64_t i;
-			bool is_ok = 0 == sql_atoi64(&z[1], &i, n - 1);
+			bool is_neg;
+			bool is_ok = 0 == sql_atoi64(&z[1], &i, &is_neg, n - 1);
 			x = (ynVar) i;
 			testcase(i == 0);
 			testcase(i == 1);
@@ -3325,29 +3326,53 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
 		if (is_neg)
 			i = -i;
 		sqlVdbeAddOp2(v, OP_Integer, i, mem);
+		return;
+	}
+	int64_t value;
+	const char *z = expr->u.zToken;
+	assert(z != NULL);
+	const char *sign = is_neg ? "-" : "";
+	if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X')) {
+		uint64_t u = 0;
+		int i, k;
+		for (i = 2; z[i] == '0'; i++);
+		for (k = i; sqlIsxdigit(z[k]); k++)
+			u = u * 16 + sqlHexToInt(z[k]);
+		memcpy(&value, &u, 8);
+		if (z[k] != 0 || k - i > 16) {
+			diag_set(ClientError, ER_HEX_LITERAL_MAX, sign, z,
+				 strlen(z) - 2, 16);
+			parse->is_aborted = true;
+			return;
+		}
 	} else {
-		int64_t value;
-		const char *z = expr->u.zToken;
-		assert(z != NULL);
-		int c = sql_dec_or_hex_to_i64(z, &value);
-		if (c == 1 || (c == 2 && !is_neg) ||
-		    (is_neg && value == SMALLEST_INT64)) {
-			const char *sign = is_neg ? "-" : "";
-			if (sql_strnicmp(z, "0x", 2) == 0) {
-				diag_set(ClientError, ER_HEX_LITERAL_MAX, sign,
-					 z, strlen(z) - 2, 16);
-			} else {
-				diag_set(ClientError, ER_INT_LITERAL_MAX, sign,
-					 z, INT64_MIN, INT64_MAX);
-			}
+		size_t len = strlen(z);
+		if (is_neg) {
+			/*
+			 * UINT64_MAX is constant, so each integer
+			 * literal certainly contains less digits.
+			 * It makes no sense to continue processing
+			 * string if it contains more characters.
+			 */
+			if (len > UINT64_MAX_DIGIT_COUNT)
+				goto int_overflow;
+			char *neg_z = tt_static_buf();
+			neg_z[0] = '-';
+			memcpy(neg_z + 1, z, len++);
+			neg_z[len]= '\0';
+			z = neg_z;
+		}
+		bool unused;
+		int rc = sql_atoi64(z, &value, &unused, len);
+		if (rc != 0) {
+int_overflow:
+			diag_set(ClientError, ER_INT_LITERAL_MAX, z, INT64_MIN,
+				 INT64_MAX);
 			parse->is_aborted = true;
-		} else {
-			if (is_neg)
-				value = c == 2 ? SMALLEST_INT64 : -value;
-			sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0,
-					      (u8 *)&value, P4_INT64);
+			return;
 		}
 	}
+	sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0, (u8 *)&value, P4_INT64);
 }
 
 /*
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 997a46535..da17e24ed 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -4405,52 +4405,25 @@ field_type_sequence_dup(struct Parse *parse, enum field_type *types,
 			uint32_t len);
 
 /**
- * Convert z to a 64-bit signed integer.  z must be decimal. This
- * routine does *not* accept hexadecimal notation.
- *
- * If the z value is representable as a 64-bit twos-complement
- * integer, then write that value into *val and return 0.
- *
- * If z is exactly 9223372036854775808, return 2.  This special
- * case is broken out because while 9223372036854775808 cannot be
- * a signed 64-bit integer, its negative -9223372036854775808 can
- * be.
- *
- * If z is too big for a 64-bit integer and is not
- * 9223372036854775808  or if z contains any non-numeric text,
- * then return 1.
+ * Convert z to a 64-bit signed or unsigned integer.
+ * z must be decimal. This routine does *not* accept
+ * hexadecimal notation. Under the hood it calls
+ * strtoll or strtoull depending on presence of '-' char.
  *
  * length is the number of bytes in the string (bytes, not
  * characters). The string is not necessarily zero-terminated.
- * The encoding is given by enc.
  *
  * @param z String being parsed.
  * @param[out] val Output integer value.
+ * @param[out] is_neg Sign of the result.
  * @param length String length in bytes.
- * @retval
- *     0    Successful transformation.  Fits in a 64-bit signed
- *          integer.
- *     1    Integer too large for a 64-bit signed integer or is
- *          malformed
- *     2    Special case of 9223372036854775808
- */
-int
-sql_atoi64(const char *z, int64_t *val, int length);
-
-/**
- * Transform a UTF-8 integer literal, in either decimal or
- * hexadecimal, into a 64-bit signed integer.  This routine
- * accepts hexadecimal literals, whereas sql_atoi64() does not.
- *
- * @param z Literal being parsed.
- * @param[out] val Parsed value.
- * @retval
- *     0    Successful transformation.  Fits in a 64-bit signed integer.
- *     1    Integer too large for a 64-bit signed integer or is malformed
- *     2    Special case of 9223372036854775808
+ * @retval 0 Successful transformation. Fits in a 64-bit signed
+ *           or unsigned integer.
+ * @retval -1 Error during parsing: it contains non-digit
+ *            characters or it doesn't fit into 64-bit int.
  */
 int
-sql_dec_or_hex_to_i64(const char *z, int64_t *val);
+sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length);
 
 void sqlErrorWithMsg(sql *, int, const char *, ...);
 void sqlError(sql *, int);
diff --git a/src/box/sql/sqlLimit.h b/src/box/sql/sqlLimit.h
index 53dbe1505..1c3038ed3 100644
--- a/src/box/sql/sqlLimit.h
+++ b/src/box/sql/sqlLimit.h
@@ -44,6 +44,15 @@ enum {
 	SQL_BIND_PARAMETER_MAX = 65000,
 };
 
+enum {
+	/**
+	 * Each unsigned (ergo signed) contains less than
+	 * 21 characters. It allows us to prevent from
+	 * parsing oversized literals and raise an error.
+	 */
+	UINT64_MAX_DIGIT_COUNT = 21
+};
+
 /*
  * The maximum length of a TEXT or BLOB in bytes.   This also
  * limits the size of a row in a table or index.
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index d5c93f8aa..2e7c298e1 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -593,120 +593,42 @@ sqlAtoF(const char *z, double *pResult, int length)
 	return z == zEnd && nDigits > 0 && eValid && nonNum == 0;
 }
 
-/*
- * Compare the 19-character string zNum against the text representation
- * value 2^63:  9223372036854775808.  Return negative, zero, or positive
- * if zNum is less than, equal to, or greater than the string.
- * Note that zNum must contain exactly 19 characters.
- *
- * Unlike memcmp() this routine is guaranteed to return the difference
- * in the values of the last digit if the only difference is in the
- * last digit.  So, for example,
- *
- *      compare2pow63("9223372036854775800", 1)
- *
- * will return -8.
- */
-static int
-compare2pow63(const char *zNum, int incr)
-{
-	int c = 0;
-	int i;
-	/* 012345678901234567 */
-	const char *pow63 = "922337203685477580";
-	for (i = 0; c == 0 && i < 18; i++) {
-		c = (zNum[i * incr] - pow63[i]) * 10;
-	}
-	if (c == 0) {
-		c = zNum[18 * incr] - '8';
-		testcase(c == (-1));
-		testcase(c == 0);
-		testcase(c == (+1));
-	}
-	return c;
-}
-
 int
-sql_atoi64(const char *z, int64_t *val, int length)
+sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length)
 {
-	int incr = 1;
-	u64 u = 0;
-	int neg = 0;		/* assume positive */
-	int i;
-	int c = 0;
-	int nonNum = 0;		/* True if input contains UTF16 with high byte non-zero */
-	const char *zStart;
-	const char *zEnd = z + length;
-	incr = 1;
-	while (z < zEnd && sqlIsspace(*z))
-		z += incr;
-	if (z < zEnd) {
-		if (*z == '-') {
-			neg = 1;
-			z += incr;
-		} else if (*z == '+') {
-			z += incr;
-		}
-	}
-	zStart = z;
-	/* Skip leading zeros. */
-	while (z < zEnd && z[0] == '0') {
-		z += incr;
-	}
-	for (i = 0; &z[i] < zEnd && (c = z[i]) >= '0' && c <= '9';
-	     i += incr) {
-		u = u * 10 + c - '0';
-	}
-	if (u > LARGEST_INT64) {
-		*val = neg ? SMALLEST_INT64 : LARGEST_INT64;
-	} else if (neg) {
-		*val = -(i64) u;
+	*is_neg = false;
+	const char *str_end = z + length;
+	for (; z < str_end && sqlIsspace(*z); z++)
+	/* invalid format */
+	if (z >= str_end)
+		return -1;
+	if (*z == '-')
+		*is_neg = true;
+
+	char* end = NULL;
+	errno = 0;
+	if (*is_neg) {
+		*val = strtoll(z, &end, 10);
 	} else {
-		*val = (i64) u;
+		uint64_t u_val = strtoull(z, &end, 10);
+		if (u_val > INT64_MAX)
+			return -1;
+		*val = u_val;
 	}
-	if (&z[i] < zEnd || (i == 0 && zStart == z) || i > 19 * incr ||
-	    nonNum) {
-		/* zNum is empty or contains non-numeric text or is longer
-		 * than 19 digits (thus guaranteeing that it is too large)
-		 */
-		return 1;
-	} else if (i < 19 * incr) {
-		/* Less than 19 digits, so we know that it fits in 64 bits */
-		assert(u <= LARGEST_INT64);
-		return 0;
-	} else {
-		/* zNum is a 19-digit numbers.  Compare it against 9223372036854775808. */
-		c = compare2pow63(z, incr);
-		if (c < 0) {
-			/* zNum is less than 9223372036854775808 so it fits */
-			assert(u <= LARGEST_INT64);
-			return 0;
-		} else if (c > 0) {
-			/* zNum is greater than 9223372036854775808 so it overflows */
-			return 1;
-		} else {
-			/* zNum is exactly 9223372036854775808.  Fits if negative.  The
-			 * special case 2 overflow if positive
-			 */
-			assert(u - 1 == LARGEST_INT64);
-			return neg ? 0 : 2;
-		}
-	}
-}
+	/* No digits were found, e.g. an empty string. */
+	if (end == z)
+		return -1;
+	/*
+	 * String has been parsed, but there's
+	 * additional character(s) remained.
+	 */
+	if (*end != '\0')
+		return -1;
+	/* Overflow and underflow errors. */
+	if (errno != 0)
+		return -1;
 
-int
-sql_dec_or_hex_to_i64(const char *z, int64_t *val)
-{
-	if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X')) {
-		uint64_t u = 0;
-		int i, k;
-		for (i = 2; z[i] == '0'; i++);
-		for (k = i; sqlIsxdigit(z[k]); k++)
-			u = u * 16 + sqlHexToInt(z[k]);
-		memcpy(val, &u, 8);
-		return (z[k] == 0 && k - i <= 16) ? 0 : 1;
-	}
-	return sql_atoi64(z, val, sqlStrlen30(z));
+	return 0;
 }
 
 /*
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index d083d3709..6ecdb26fc 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -279,7 +279,8 @@ mem_apply_numeric_type(struct Mem *record)
 	if ((record->flags & (MEM_Str | MEM_Int | MEM_Real)) != MEM_Str)
 		return -1;
 	int64_t integer_value;
-	if (sql_atoi64(record->z, &integer_value, record->n) == 0) {
+	bool is_neg;
+	if (sql_atoi64(record->z, &integer_value, &is_neg, record->n) == 0) {
 		record->u.i = integer_value;
 		MemSetTypeFlag(record, MEM_Int);
 		return 0;
@@ -384,7 +385,8 @@ static u16 SQL_NOINLINE computeNumericType(Mem *pMem)
 	assert((pMem->flags & (MEM_Str|MEM_Blob))!=0);
 	if (sqlAtoF(pMem->z, &pMem->u.r, pMem->n)==0)
 		return 0;
-	if (sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, pMem->n)==SQL_OK)
+	bool is_neg;
+	if (sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, &is_neg, pMem->n)==SQL_OK)
 		return MEM_Int;
 	return MEM_Real;
 }
diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
index 08b649926..3a361d066 100644
--- a/src/box/sql/vdbemem.c
+++ b/src/box/sql/vdbemem.c
@@ -466,7 +466,8 @@ sqlVdbeIntValue(Mem * pMem, int64_t *i)
 		return doubleToInt64(pMem->u.r, i);
 	} else if (flags & (MEM_Str)) {
 		assert(pMem->z || pMem->n == 0);
-		if (sql_atoi64(pMem->z, (int64_t *)i, pMem->n) == 0)
+		bool is_neg;
+		if (sql_atoi64(pMem->z, (int64_t *)i, &is_neg, pMem->n) == 0)
 			return 0;
 	}
 	return -1;
@@ -586,7 +587,9 @@ sqlVdbeMemNumerify(Mem * pMem)
 {
 	if ((pMem->flags & (MEM_Int | MEM_Real | MEM_Null)) == 0) {
 		assert((pMem->flags & (MEM_Blob | MEM_Str)) != 0);
-		if (0 == sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, pMem->n)) {
+		bool is_neg;
+		if (0 == sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, &is_neg,
+				    pMem->n)) {
 			MemSetTypeFlag(pMem, MEM_Int);
 		} else {
 			double v;
@@ -651,7 +654,9 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type)
 	if (pMem->flags & MEM_Null)
 		return SQL_OK;
 	if ((pMem->flags & MEM_Blob) != 0 && type == FIELD_TYPE_NUMBER) {
-		if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, pMem->n) == 0) {
+		bool is_neg;
+		if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, &is_neg,
+			       pMem->n) == 0) {
 			MemSetTypeFlag(pMem, MEM_Real);
 			pMem->u.r = pMem->u.i;
 			return 0;
@@ -682,7 +687,8 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type)
 		return -1;
 	case FIELD_TYPE_INTEGER:
 		if ((pMem->flags & MEM_Blob) != 0) {
-			if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i,
+			bool is_neg;
+			if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, &is_neg,
 				       pMem->n) != 0)
 				return -1;
 			MemSetTypeFlag(pMem, MEM_Int);
-- 
2.15.1

  reply	other threads:[~2019-06-07 15:37 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-07 15:37 [tarantool-patches] [PATCH 0/6] Introduce UNSIGNED type in SQL Nikita Pettik
2019-06-07 15:37 ` Nikita Pettik [this message]
2019-06-11 21:11   ` [tarantool-patches] Re: [PATCH 1/6] sql: refactor sql_atoi64() Vladislav Shpilevoy
2019-07-01 14:20     ` n.pettik
2019-07-01 21:53       ` Vladislav Shpilevoy
2019-07-05 16:32         ` n.pettik
2019-06-07 15:37 ` [tarantool-patches] [PATCH 2/6] sql: separate VDBE memory holding positive and negative ints Nikita Pettik
2019-06-11 21:11   ` [tarantool-patches] " Vladislav Shpilevoy
2019-07-01 14:21     ` n.pettik
2019-07-01 21:53       ` Vladislav Shpilevoy
2019-07-05 16:33         ` n.pettik
2019-06-07 15:37 ` [tarantool-patches] [PATCH 3/6] sql: refactor arithmetic operations to support unsigned ints Nikita Pettik
2019-06-11 21:11   ` [tarantool-patches] " Vladislav Shpilevoy
2019-07-01 14:21     ` n.pettik
2019-07-01 21:53       ` Vladislav Shpilevoy
2019-07-05 16:36         ` n.pettik
2019-07-10 22:49           ` Vladislav Shpilevoy
2019-07-17 12:24             ` n.pettik
2019-06-07 15:37 ` [tarantool-patches] [PATCH 4/6] sql: make built-in functions operate on unsigned values Nikita Pettik
2019-06-11 21:11   ` [tarantool-patches] " Vladislav Shpilevoy
2019-07-01 14:21     ` n.pettik
2019-07-01 21:53       ` Vladislav Shpilevoy
2019-07-05 16:36         ` n.pettik
2019-07-10 22:49           ` Vladislav Shpilevoy
2019-07-17  0:53             ` n.pettik
2019-06-07 15:37 ` [tarantool-patches] [PATCH 5/6] sql: introduce extended range for INTEGER type Nikita Pettik
2019-06-11 21:11   ` [tarantool-patches] " Vladislav Shpilevoy
2019-07-01 14:21     ` n.pettik
2019-07-01 21:53       ` Vladislav Shpilevoy
2019-07-24 15:59   ` Konstantin Osipov
2019-07-24 16:54     ` n.pettik
2019-07-24 17:09       ` Konstantin Osipov
2019-06-07 15:37 ` [tarantool-patches] [PATCH 6/6] sql: allow to specify UNSIGNED column type Nikita Pettik
2019-07-01 21:53   ` [tarantool-patches] " Vladislav Shpilevoy
2019-07-05 16:36     ` n.pettik
2019-07-10 22:49       ` Vladislav Shpilevoy
2019-07-11 21:25         ` Vladislav Shpilevoy
2019-07-17  0:53           ` n.pettik
2019-07-18 20:18             ` Vladislav Shpilevoy
2019-07-18 20:56               ` n.pettik
2019-07-18 21:08                 ` Vladislav Shpilevoy
2019-07-18 21:13                   ` Vladislav Shpilevoy
2019-07-22 10:20                     ` n.pettik
2019-07-22 19:17                       ` Vladislav Shpilevoy
2019-07-22 10:20                   ` n.pettik
2019-07-17  0:54         ` n.pettik
2019-07-18 20:18           ` Vladislav Shpilevoy
2019-08-06 19:36         ` n.pettik
2019-07-24 13:01 ` [tarantool-patches] Re: [PATCH 0/6] Introduce UNSIGNED type in SQL Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=15f86ff05673055364c6a6bd11de4568e4c3854f.1559919361.git.korablev@tarantool.org \
    --to=korablev@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [tarantool-patches] [PATCH 1/6] sql: refactor sql_atoi64()' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox