From: Nikita Pettik <korablev@tarantool.org>
To: tarantool-patches@freelists.org
Cc: v.shpilevoy@tarantool.org, Nikita Pettik <korablev@tarantool.org>
Subject: [tarantool-patches] [PATCH 1/6] sql: refactor sql_atoi64()
Date: Fri, 7 Jun 2019 18:37:41 +0300 [thread overview]
Message-ID: <15f86ff05673055364c6a6bd11de4568e4c3854f.1559919361.git.korablev@tarantool.org> (raw)
In-Reply-To: <cover.1559919361.git.korablev@tarantool.org>
In-Reply-To: <cover.1559919361.git.korablev@tarantool.org>
We are going to allow using unsigned values in SQL and extend range of
INTEGER type. Hence, we should be able to parse and operate on integers
in range of [2^63, 2^64 - 1]. Current mechanism which involves
sql_atoi64() function doesn't allow this.
Let's refactor this function: firstly, get rid of manual parsing and use
strtoll() and strtoull() functions from standard library. Then, let's
return sign of parsed literal. In case of success now function returns 0,
-1 otherwise.
This patch also inlines sql_dec_or_hex_to_i64() to place of its only
usage: it makes code cleaner and more straightforward.
Needed for #3810
Needed for #4015
---
src/box/errcode.h | 2 +-
src/box/sql/expr.c | 65 ++++++++++++++++-------
src/box/sql/sqlInt.h | 47 ++++-------------
src/box/sql/sqlLimit.h | 9 ++++
src/box/sql/util.c | 140 +++++++++++--------------------------------------
src/box/sql/vdbe.c | 6 ++-
src/box/sql/vdbemem.c | 14 +++--
7 files changed, 110 insertions(+), 173 deletions(-)
diff --git a/src/box/errcode.h b/src/box/errcode.h
index 9c15f3322..f4aba2f54 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -242,7 +242,7 @@ struct errcode_record {
/*187 */_(ER_SQL_ANALYZE_ARGUMENT, "ANALYZE statement argument %s is not a base table") \
/*188 */_(ER_SQL_COLUMN_COUNT_MAX, "Failed to create space '%s': space column count %d exceeds the limit (%d)") \
/*189 */_(ER_HEX_LITERAL_MAX, "Hex literal %s%s length %d exceeds the supported limit (%d)") \
- /*190 */_(ER_INT_LITERAL_MAX, "Integer literal %s%s exceeds the supported range %lld - %lld") \
+ /*190 */_(ER_INT_LITERAL_MAX, "Integer literal %s exceeds the supported range %lld - %lld") \
/*191 */_(ER_SQL_PARSER_LIMIT, "%s %d exceeds the limit (%d)") \
/*192 */_(ER_INDEX_DEF_UNSUPPORTED, "%s are prohibited in an index definition") \
/*193 */_(ER_CK_DEF_UNSUPPORTED, "%s are prohibited in a CHECK constraint definition") \
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index ba7eea59d..158631416 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -1196,7 +1196,8 @@ sqlExprAssignVarNumber(Parse * pParse, Expr * pExpr, u32 n)
* variable number
*/
int64_t i;
- bool is_ok = 0 == sql_atoi64(&z[1], &i, n - 1);
+ bool is_neg;
+ bool is_ok = 0 == sql_atoi64(&z[1], &i, &is_neg, n - 1);
x = (ynVar) i;
testcase(i == 0);
testcase(i == 1);
@@ -3325,29 +3326,53 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
if (is_neg)
i = -i;
sqlVdbeAddOp2(v, OP_Integer, i, mem);
+ return;
+ }
+ int64_t value;
+ const char *z = expr->u.zToken;
+ assert(z != NULL);
+ const char *sign = is_neg ? "-" : "";
+ if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X')) {
+ uint64_t u = 0;
+ int i, k;
+ for (i = 2; z[i] == '0'; i++);
+ for (k = i; sqlIsxdigit(z[k]); k++)
+ u = u * 16 + sqlHexToInt(z[k]);
+ memcpy(&value, &u, 8);
+ if (z[k] != 0 || k - i > 16) {
+ diag_set(ClientError, ER_HEX_LITERAL_MAX, sign, z,
+ strlen(z) - 2, 16);
+ parse->is_aborted = true;
+ return;
+ }
} else {
- int64_t value;
- const char *z = expr->u.zToken;
- assert(z != NULL);
- int c = sql_dec_or_hex_to_i64(z, &value);
- if (c == 1 || (c == 2 && !is_neg) ||
- (is_neg && value == SMALLEST_INT64)) {
- const char *sign = is_neg ? "-" : "";
- if (sql_strnicmp(z, "0x", 2) == 0) {
- diag_set(ClientError, ER_HEX_LITERAL_MAX, sign,
- z, strlen(z) - 2, 16);
- } else {
- diag_set(ClientError, ER_INT_LITERAL_MAX, sign,
- z, INT64_MIN, INT64_MAX);
- }
+ size_t len = strlen(z);
+ if (is_neg) {
+ /*
+ * UINT64_MAX is constant, so each integer
+ * literal certainly contains less digits.
+ * It makes no sense to continue processing
+ * string if it contains more characters.
+ */
+ if (len > UINT64_MAX_DIGIT_COUNT)
+ goto int_overflow;
+ char *neg_z = tt_static_buf();
+ neg_z[0] = '-';
+ memcpy(neg_z + 1, z, len++);
+ neg_z[len]= '\0';
+ z = neg_z;
+ }
+ bool unused;
+ int rc = sql_atoi64(z, &value, &unused, len);
+ if (rc != 0) {
+int_overflow:
+ diag_set(ClientError, ER_INT_LITERAL_MAX, z, INT64_MIN,
+ INT64_MAX);
parse->is_aborted = true;
- } else {
- if (is_neg)
- value = c == 2 ? SMALLEST_INT64 : -value;
- sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0,
- (u8 *)&value, P4_INT64);
+ return;
}
}
+ sqlVdbeAddOp4Dup8(v, OP_Int64, 0, mem, 0, (u8 *)&value, P4_INT64);
}
/*
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 997a46535..da17e24ed 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -4405,52 +4405,25 @@ field_type_sequence_dup(struct Parse *parse, enum field_type *types,
uint32_t len);
/**
- * Convert z to a 64-bit signed integer. z must be decimal. This
- * routine does *not* accept hexadecimal notation.
- *
- * If the z value is representable as a 64-bit twos-complement
- * integer, then write that value into *val and return 0.
- *
- * If z is exactly 9223372036854775808, return 2. This special
- * case is broken out because while 9223372036854775808 cannot be
- * a signed 64-bit integer, its negative -9223372036854775808 can
- * be.
- *
- * If z is too big for a 64-bit integer and is not
- * 9223372036854775808 or if z contains any non-numeric text,
- * then return 1.
+ * Convert z to a 64-bit signed or unsigned integer.
+ * z must be decimal. This routine does *not* accept
+ * hexadecimal notation. Under the hood it calls
+ * strtoll or strtoull depending on presence of '-' char.
*
* length is the number of bytes in the string (bytes, not
* characters). The string is not necessarily zero-terminated.
- * The encoding is given by enc.
*
* @param z String being parsed.
* @param[out] val Output integer value.
+ * @param[out] is_neg Sign of the result.
* @param length String length in bytes.
- * @retval
- * 0 Successful transformation. Fits in a 64-bit signed
- * integer.
- * 1 Integer too large for a 64-bit signed integer or is
- * malformed
- * 2 Special case of 9223372036854775808
- */
-int
-sql_atoi64(const char *z, int64_t *val, int length);
-
-/**
- * Transform a UTF-8 integer literal, in either decimal or
- * hexadecimal, into a 64-bit signed integer. This routine
- * accepts hexadecimal literals, whereas sql_atoi64() does not.
- *
- * @param z Literal being parsed.
- * @param[out] val Parsed value.
- * @retval
- * 0 Successful transformation. Fits in a 64-bit signed integer.
- * 1 Integer too large for a 64-bit signed integer or is malformed
- * 2 Special case of 9223372036854775808
+ * @retval 0 Successful transformation. Fits in a 64-bit signed
+ * or unsigned integer.
+ * @retval -1 Error during parsing: it contains non-digit
+ * characters or it doesn't fit into 64-bit int.
*/
int
-sql_dec_or_hex_to_i64(const char *z, int64_t *val);
+sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length);
void sqlErrorWithMsg(sql *, int, const char *, ...);
void sqlError(sql *, int);
diff --git a/src/box/sql/sqlLimit.h b/src/box/sql/sqlLimit.h
index 53dbe1505..1c3038ed3 100644
--- a/src/box/sql/sqlLimit.h
+++ b/src/box/sql/sqlLimit.h
@@ -44,6 +44,15 @@ enum {
SQL_BIND_PARAMETER_MAX = 65000,
};
+enum {
+ /**
+ * Each unsigned (ergo signed) contains less than
+ * 21 characters. It allows us to prevent from
+ * parsing oversized literals and raise an error.
+ */
+ UINT64_MAX_DIGIT_COUNT = 21
+};
+
/*
* The maximum length of a TEXT or BLOB in bytes. This also
* limits the size of a row in a table or index.
diff --git a/src/box/sql/util.c b/src/box/sql/util.c
index d5c93f8aa..2e7c298e1 100644
--- a/src/box/sql/util.c
+++ b/src/box/sql/util.c
@@ -593,120 +593,42 @@ sqlAtoF(const char *z, double *pResult, int length)
return z == zEnd && nDigits > 0 && eValid && nonNum == 0;
}
-/*
- * Compare the 19-character string zNum against the text representation
- * value 2^63: 9223372036854775808. Return negative, zero, or positive
- * if zNum is less than, equal to, or greater than the string.
- * Note that zNum must contain exactly 19 characters.
- *
- * Unlike memcmp() this routine is guaranteed to return the difference
- * in the values of the last digit if the only difference is in the
- * last digit. So, for example,
- *
- * compare2pow63("9223372036854775800", 1)
- *
- * will return -8.
- */
-static int
-compare2pow63(const char *zNum, int incr)
-{
- int c = 0;
- int i;
- /* 012345678901234567 */
- const char *pow63 = "922337203685477580";
- for (i = 0; c == 0 && i < 18; i++) {
- c = (zNum[i * incr] - pow63[i]) * 10;
- }
- if (c == 0) {
- c = zNum[18 * incr] - '8';
- testcase(c == (-1));
- testcase(c == 0);
- testcase(c == (+1));
- }
- return c;
-}
-
int
-sql_atoi64(const char *z, int64_t *val, int length)
+sql_atoi64(const char *z, int64_t *val, bool *is_neg, int length)
{
- int incr = 1;
- u64 u = 0;
- int neg = 0; /* assume positive */
- int i;
- int c = 0;
- int nonNum = 0; /* True if input contains UTF16 with high byte non-zero */
- const char *zStart;
- const char *zEnd = z + length;
- incr = 1;
- while (z < zEnd && sqlIsspace(*z))
- z += incr;
- if (z < zEnd) {
- if (*z == '-') {
- neg = 1;
- z += incr;
- } else if (*z == '+') {
- z += incr;
- }
- }
- zStart = z;
- /* Skip leading zeros. */
- while (z < zEnd && z[0] == '0') {
- z += incr;
- }
- for (i = 0; &z[i] < zEnd && (c = z[i]) >= '0' && c <= '9';
- i += incr) {
- u = u * 10 + c - '0';
- }
- if (u > LARGEST_INT64) {
- *val = neg ? SMALLEST_INT64 : LARGEST_INT64;
- } else if (neg) {
- *val = -(i64) u;
+ *is_neg = false;
+ const char *str_end = z + length;
+ for (; z < str_end && sqlIsspace(*z); z++)
+ /* invalid format */
+ if (z >= str_end)
+ return -1;
+ if (*z == '-')
+ *is_neg = true;
+
+ char* end = NULL;
+ errno = 0;
+ if (*is_neg) {
+ *val = strtoll(z, &end, 10);
} else {
- *val = (i64) u;
+ uint64_t u_val = strtoull(z, &end, 10);
+ if (u_val > INT64_MAX)
+ return -1;
+ *val = u_val;
}
- if (&z[i] < zEnd || (i == 0 && zStart == z) || i > 19 * incr ||
- nonNum) {
- /* zNum is empty or contains non-numeric text or is longer
- * than 19 digits (thus guaranteeing that it is too large)
- */
- return 1;
- } else if (i < 19 * incr) {
- /* Less than 19 digits, so we know that it fits in 64 bits */
- assert(u <= LARGEST_INT64);
- return 0;
- } else {
- /* zNum is a 19-digit numbers. Compare it against 9223372036854775808. */
- c = compare2pow63(z, incr);
- if (c < 0) {
- /* zNum is less than 9223372036854775808 so it fits */
- assert(u <= LARGEST_INT64);
- return 0;
- } else if (c > 0) {
- /* zNum is greater than 9223372036854775808 so it overflows */
- return 1;
- } else {
- /* zNum is exactly 9223372036854775808. Fits if negative. The
- * special case 2 overflow if positive
- */
- assert(u - 1 == LARGEST_INT64);
- return neg ? 0 : 2;
- }
- }
-}
+ /* No digits were found, e.g. an empty string. */
+ if (end == z)
+ return -1;
+ /*
+ * String has been parsed, but there's
+ * additional character(s) remained.
+ */
+ if (*end != '\0')
+ return -1;
+ /* Overflow and underflow errors. */
+ if (errno != 0)
+ return -1;
-int
-sql_dec_or_hex_to_i64(const char *z, int64_t *val)
-{
- if (z[0] == '0' && (z[1] == 'x' || z[1] == 'X')) {
- uint64_t u = 0;
- int i, k;
- for (i = 2; z[i] == '0'; i++);
- for (k = i; sqlIsxdigit(z[k]); k++)
- u = u * 16 + sqlHexToInt(z[k]);
- memcpy(val, &u, 8);
- return (z[k] == 0 && k - i <= 16) ? 0 : 1;
- }
- return sql_atoi64(z, val, sqlStrlen30(z));
+ return 0;
}
/*
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index d083d3709..6ecdb26fc 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -279,7 +279,8 @@ mem_apply_numeric_type(struct Mem *record)
if ((record->flags & (MEM_Str | MEM_Int | MEM_Real)) != MEM_Str)
return -1;
int64_t integer_value;
- if (sql_atoi64(record->z, &integer_value, record->n) == 0) {
+ bool is_neg;
+ if (sql_atoi64(record->z, &integer_value, &is_neg, record->n) == 0) {
record->u.i = integer_value;
MemSetTypeFlag(record, MEM_Int);
return 0;
@@ -384,7 +385,8 @@ static u16 SQL_NOINLINE computeNumericType(Mem *pMem)
assert((pMem->flags & (MEM_Str|MEM_Blob))!=0);
if (sqlAtoF(pMem->z, &pMem->u.r, pMem->n)==0)
return 0;
- if (sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, pMem->n)==SQL_OK)
+ bool is_neg;
+ if (sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, &is_neg, pMem->n)==SQL_OK)
return MEM_Int;
return MEM_Real;
}
diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
index 08b649926..3a361d066 100644
--- a/src/box/sql/vdbemem.c
+++ b/src/box/sql/vdbemem.c
@@ -466,7 +466,8 @@ sqlVdbeIntValue(Mem * pMem, int64_t *i)
return doubleToInt64(pMem->u.r, i);
} else if (flags & (MEM_Str)) {
assert(pMem->z || pMem->n == 0);
- if (sql_atoi64(pMem->z, (int64_t *)i, pMem->n) == 0)
+ bool is_neg;
+ if (sql_atoi64(pMem->z, (int64_t *)i, &is_neg, pMem->n) == 0)
return 0;
}
return -1;
@@ -586,7 +587,9 @@ sqlVdbeMemNumerify(Mem * pMem)
{
if ((pMem->flags & (MEM_Int | MEM_Real | MEM_Null)) == 0) {
assert((pMem->flags & (MEM_Blob | MEM_Str)) != 0);
- if (0 == sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, pMem->n)) {
+ bool is_neg;
+ if (0 == sql_atoi64(pMem->z, (int64_t *)&pMem->u.i, &is_neg,
+ pMem->n)) {
MemSetTypeFlag(pMem, MEM_Int);
} else {
double v;
@@ -651,7 +654,9 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type)
if (pMem->flags & MEM_Null)
return SQL_OK;
if ((pMem->flags & MEM_Blob) != 0 && type == FIELD_TYPE_NUMBER) {
- if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, pMem->n) == 0) {
+ bool is_neg;
+ if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, &is_neg,
+ pMem->n) == 0) {
MemSetTypeFlag(pMem, MEM_Real);
pMem->u.r = pMem->u.i;
return 0;
@@ -682,7 +687,8 @@ sqlVdbeMemCast(Mem * pMem, enum field_type type)
return -1;
case FIELD_TYPE_INTEGER:
if ((pMem->flags & MEM_Blob) != 0) {
- if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i,
+ bool is_neg;
+ if (sql_atoi64(pMem->z, (int64_t *) &pMem->u.i, &is_neg,
pMem->n) != 0)
return -1;
MemSetTypeFlag(pMem, MEM_Int);
--
2.15.1
next prev parent reply other threads:[~2019-06-07 15:37 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-07 15:37 [tarantool-patches] [PATCH 0/6] Introduce UNSIGNED type in SQL Nikita Pettik
2019-06-07 15:37 ` Nikita Pettik [this message]
2019-06-11 21:11 ` [tarantool-patches] Re: [PATCH 1/6] sql: refactor sql_atoi64() Vladislav Shpilevoy
2019-07-01 14:20 ` n.pettik
2019-07-01 21:53 ` Vladislav Shpilevoy
2019-07-05 16:32 ` n.pettik
2019-06-07 15:37 ` [tarantool-patches] [PATCH 2/6] sql: separate VDBE memory holding positive and negative ints Nikita Pettik
2019-06-11 21:11 ` [tarantool-patches] " Vladislav Shpilevoy
2019-07-01 14:21 ` n.pettik
2019-07-01 21:53 ` Vladislav Shpilevoy
2019-07-05 16:33 ` n.pettik
2019-06-07 15:37 ` [tarantool-patches] [PATCH 3/6] sql: refactor arithmetic operations to support unsigned ints Nikita Pettik
2019-06-11 21:11 ` [tarantool-patches] " Vladislav Shpilevoy
2019-07-01 14:21 ` n.pettik
2019-07-01 21:53 ` Vladislav Shpilevoy
2019-07-05 16:36 ` n.pettik
2019-07-10 22:49 ` Vladislav Shpilevoy
2019-07-17 12:24 ` n.pettik
2019-06-07 15:37 ` [tarantool-patches] [PATCH 4/6] sql: make built-in functions operate on unsigned values Nikita Pettik
2019-06-11 21:11 ` [tarantool-patches] " Vladislav Shpilevoy
2019-07-01 14:21 ` n.pettik
2019-07-01 21:53 ` Vladislav Shpilevoy
2019-07-05 16:36 ` n.pettik
2019-07-10 22:49 ` Vladislav Shpilevoy
2019-07-17 0:53 ` n.pettik
2019-06-07 15:37 ` [tarantool-patches] [PATCH 5/6] sql: introduce extended range for INTEGER type Nikita Pettik
2019-06-11 21:11 ` [tarantool-patches] " Vladislav Shpilevoy
2019-07-01 14:21 ` n.pettik
2019-07-01 21:53 ` Vladislav Shpilevoy
2019-07-24 15:59 ` Konstantin Osipov
2019-07-24 16:54 ` n.pettik
2019-07-24 17:09 ` Konstantin Osipov
2019-06-07 15:37 ` [tarantool-patches] [PATCH 6/6] sql: allow to specify UNSIGNED column type Nikita Pettik
2019-07-01 21:53 ` [tarantool-patches] " Vladislav Shpilevoy
2019-07-05 16:36 ` n.pettik
2019-07-10 22:49 ` Vladislav Shpilevoy
2019-07-11 21:25 ` Vladislav Shpilevoy
2019-07-17 0:53 ` n.pettik
2019-07-18 20:18 ` Vladislav Shpilevoy
2019-07-18 20:56 ` n.pettik
2019-07-18 21:08 ` Vladislav Shpilevoy
2019-07-18 21:13 ` Vladislav Shpilevoy
2019-07-22 10:20 ` n.pettik
2019-07-22 19:17 ` Vladislav Shpilevoy
2019-07-22 10:20 ` n.pettik
2019-07-17 0:54 ` n.pettik
2019-07-18 20:18 ` Vladislav Shpilevoy
2019-08-06 19:36 ` n.pettik
2019-07-24 13:01 ` [tarantool-patches] Re: [PATCH 0/6] Introduce UNSIGNED type in SQL Kirill Yukhin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=15f86ff05673055364c6a6bd11de4568e4c3854f.1559919361.git.korablev@tarantool.org \
--to=korablev@tarantool.org \
--cc=tarantool-patches@freelists.org \
--cc=v.shpilevoy@tarantool.org \
--subject='Re: [tarantool-patches] [PATCH 1/6] sql: refactor sql_atoi64()' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox