Tarantool development patches archive
 help / color / mirror / Atom feed
From: imeevma@tarantool.org
To: korablev@tarantool.org, tsafin@tarantool.org,
	tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH v6 18/22] sql: add implicit cast between numbers in OP_Seek*
Date: Thu, 16 Jul 2020 17:47:11 +0300	[thread overview]
Message-ID: <83ea3cd7b4a64a5bdcea59454f55f284f894a235.1594909974.git.imeevma@gmail.com> (raw)
In-Reply-To: <cover.1594909974.git.imeevma@gmail.com>

This patch adds new rules for implicit casting between numbers in
OP_Seek * opcodes. They are still not used because the ApplyType
opcode is converting numbers, but this will be changed in the next
patch. Conversion within the ApplyType opcode can affect the
result of comparison operations.

Part of #4230
---
 src/box/sql/vdbe.c | 329 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 329 insertions(+)

diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index d853a0edb..dd1ce97b7 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -522,6 +522,265 @@ mem_convert_to_numeric(struct Mem *mem, enum field_type type)
 	return mem_convert_to_integer(mem);
 }
 
+/**
+ * Convert the numeric value contained in the MEM to UNSIGNED.
+ * @see mem_prepare_for_cmp() for more info.
+ *
+ * @param mem The MEM that contains the numeric value.
+ * @param[in][out] oc Operation code.
+ * @retval 0 if the conversion was successful, -1 otherwise.
+ */
+static int
+mem_prepare_for_cmp_to_uint(struct Mem *mem, int *oc, int eqOnly)
+{
+	if ((mem->flags & MEM_Int) != 0) {
+		if (eqOnly == 1)
+			return 0;
+		if (*oc == OP_SeekGT) {
+			mem_set_u64(mem, 0);
+			*oc = OP_SeekGE;
+			return 0;
+		}
+		if (*oc == OP_SeekGE) {
+			mem_set_u64(mem, 0);
+			return 0;
+		}
+		if (*oc == OP_SeekLT)
+			return 0;
+		assert(*oc == OP_SeekLE);
+		return 0;
+	}
+	assert((mem->flags & MEM_Real) != 0);
+	double d = mem->u.r;
+	if (d == (double)(uint64_t)d)
+		return mem_convert_to_unsigned(mem);
+	if (eqOnly == 1)
+		return 0;
+	if (*oc == OP_SeekGT) {
+		if (d >= (double)UINT64_MAX)
+			return 0;
+		if (d < 0) {
+			mem_set_u64(mem, 0);
+			*oc = OP_SeekGE;
+			return 0;
+		}
+		assert((double)(uint64_t)d < d);
+		return mem_convert_to_unsigned(mem);
+	}
+	if (*oc == OP_SeekGE) {
+		if (d >= (double)UINT64_MAX)
+			return 0;
+		if (d < 0) {
+			mem_set_u64(mem, 0);
+			return 0;
+		}
+		assert((double)(uint64_t)d < d);
+		*oc = OP_SeekGT;
+		return mem_convert_to_unsigned(mem);
+	}
+	if (*oc == OP_SeekLT) {
+		if (d >= (double)UINT64_MAX) {
+			*oc = OP_SeekLE;
+			mem_set_u64(mem, UINT64_MAX);
+			return 0;
+		}
+		if (d < 0)
+			return 0;
+		assert((double)(uint64_t)d < d);
+		*oc = OP_SeekLE;
+		return mem_convert_to_unsigned(mem);
+	}
+	assert(*oc == OP_SeekLE);
+	if (d >= (double)UINT64_MAX) {
+		mem_set_u64(mem, UINT64_MAX);
+		return 0;
+	}
+	if (d < 0)
+		return 0;
+	assert((double)(uint64_t)d < d);
+	return mem_convert_to_unsigned(mem);
+}
+
+/**
+ * Convert the numeric value contained in the MEM to INTEGER.
+ * @see mem_prepare_for_cmp() for more info.
+ *
+ * @param mem The MEM that contains the numeric value.
+ * @param[in][out] oc Operation code.
+ * @retval 0 if the conversion was successful, -1 otherwise.
+ */
+static int
+mem_prepare_for_cmp_to_int(struct Mem *mem, int *oc, int eqOnly)
+{
+	assert((mem->flags & MEM_Real) != 0);
+	double d = mem->u.r;
+	if (d == (double)(uint64_t)d || d == (double)(int64_t)d)
+		return mem_convert_to_integer(mem);
+	if (eqOnly == 1)
+		return 0;
+	if (*oc == OP_SeekGT) {
+		if (d >= (double)UINT64_MAX)
+			return 0;
+		if (d < (double)INT64_MIN) {
+			mem_set_i64(mem, INT64_MIN);
+			*oc = OP_SeekGE;
+			return 0;
+		}
+		if (d > 0 || (double)(int64_t)d < d)
+			return mem_convert_to_integer(mem);
+		*oc = OP_SeekGE;
+		return mem_convert_to_integer(mem);
+	}
+	if (*oc == OP_SeekGE) {
+		if (d >= (double)UINT64_MAX)
+			return 0;
+		if (d < (double)INT64_MIN) {
+			mem_set_i64(mem, INT64_MIN);
+			return 0;
+		}
+		if (d > 0 || (double)(int64_t)d < d) {
+			*oc = OP_SeekGT;
+			return mem_convert_to_integer(mem);
+		}
+		return mem_convert_to_integer(mem);
+	}
+	if (*oc == OP_SeekLT) {
+		if (d >= (double)UINT64_MAX) {
+			*oc = OP_SeekLE;
+			mem_set_int(mem, UINT64_MAX, false);
+			return 0;
+		}
+		if (d < (double)INT64_MIN)
+			return 0;
+		if (d > 0 || (double)(int64_t)d < d) {
+			*oc = OP_SeekLE;
+			return mem_convert_to_integer(mem);
+		}
+		return mem_convert_to_integer(mem);
+	}
+	assert(*oc == OP_SeekLE);
+	if (d >= (double)UINT64_MAX) {
+		mem_set_int(mem, UINT64_MAX, false);
+		return 0;
+	}
+	if (d > 0 || (double)(int64_t)d < d)
+		return mem_convert_to_integer(mem);
+	*oc = OP_SeekLT;
+	return mem_convert_to_integer(mem);
+}
+
+/**
+ * Convert the numeric value contained in the MEM to DOUBLE.
+ * @see mem_prepare_for_cmp() for more info.
+ *
+ * @param mem The MEM that contains the numeric value.
+ * @param[in][out] oc Operation code.
+ * @retval 0 if the conversion was successful, -1 otherwise.
+ */
+static int
+mem_prepare_for_cmp_to_double(struct Mem *mem, int *oc, int eqOnly)
+{
+	if ((mem->flags & MEM_Int) != 0) {
+		int64_t i = mem->u.i;
+		if (i == (int64_t)(double)i)
+			return mem_convert_to_double(mem);
+		if (eqOnly == 1)
+			return 0;
+		double d = (double)i;
+		if (*oc == OP_SeekGT) {
+			if (d == (double)INT64_MAX || i < (int64_t)d)
+				*oc = OP_SeekGE;
+			return mem_convert_to_double(mem);
+		}
+		if (*oc == OP_SeekGE) {
+			if (d != (double)INT64_MAX && i > (int64_t)d)
+				*oc = OP_SeekGT;
+			return mem_convert_to_double(mem);
+		}
+		if (*oc == OP_SeekLT) {
+			if (d != (double)INT64_MAX && i > (int64_t)d)
+				*oc = OP_SeekLE;
+			return mem_convert_to_double(mem);
+		}
+		assert(*oc == OP_SeekLE);
+		if (d == (double)INT64_MAX || i < (int64_t)d)
+			*oc = OP_SeekLT;
+		return mem_convert_to_double(mem);
+	}
+	assert((mem->flags & MEM_UInt) != 0);
+	uint64_t u = mem->u.u;
+	if (u == (uint64_t)(double)u)
+		return mem_convert_to_double(mem);
+	if (eqOnly == 1)
+		return 0;
+	double d = (double)u;
+	if (*oc == OP_SeekGT) {
+		if (d == (double)UINT64_MAX || u < (uint64_t)d)
+			*oc = OP_SeekGE;
+		return mem_convert_to_double(mem);
+	}
+	if (*oc == OP_SeekGE) {
+		if (d != (double)UINT64_MAX && u > (uint64_t)d)
+			*oc = OP_SeekGT;
+		return mem_convert_to_double(mem);
+	}
+	if (*oc == OP_SeekLT) {
+		if (d != (double)UINT64_MAX && u > (uint64_t)d)
+			*oc = OP_SeekLE;
+		return mem_convert_to_double(mem);
+	}
+	assert(*oc == OP_SeekLE);
+	if (d == (double)UINT64_MAX || u < (uint64_t)d)
+		*oc = OP_SeekLT;
+	return mem_convert_to_double(mem);
+}
+
+/**
+ * Convert the numeric value contained in the MEM to another
+ * numeric type according to the specified operation. If the
+ * conversion is successful, we will get the converted MEM. If the
+ * conversion fails, the MEM will not be changed.
+ *
+ * There are two reasons why the MEM might not convert.
+ * 1) MEM conversion affects the result of the operation.
+ * For example:
+ * CREATE TABLE t (i INT PRIMARY KEY);
+ * ...
+ * SELECT * FROM t WHERE i = 1.5;
+ *
+ * 2) After conversion, nothing will be found as a result of the
+ * operation.
+ * For example:
+ * CREATE TABLE t (i INT PRIMARY KEY);
+ * ...
+ * SELECT * FROM t WHERE i > 2^100;
+ *
+ *
+ * If the conversion is successful, the operation can also change.
+ * For example:
+ * CREATE TABLE t (i INT PRIMARY KEY);
+ * ...
+ * SELECT * FROM t WHERE i > -(2^100);
+ *
+ * The value becomes INT64_MIN after conversion and the operation
+ * becomes '> ='.
+ *
+ * @param mem The MEM that contains the numeric value.
+ * @param type The type to convert to.
+ * @param[in][out] oc Operation code.
+ * @retval 0 if the conversion was successful, -1 otherwise.
+ */
+static int
+mem_prepare_for_cmp(struct Mem *mem, enum field_type type, int *oc, int eqOnly)
+{
+	if (type == FIELD_TYPE_UNSIGNED)
+		return mem_prepare_for_cmp_to_uint(mem, oc, eqOnly);
+	if (type == FIELD_TYPE_INTEGER)
+		return mem_prepare_for_cmp_to_int(mem, oc, eqOnly);
+	assert(type == FIELD_TYPE_DOUBLE);
+	return mem_prepare_for_cmp_to_double(mem, oc, eqOnly);
+}
+
 /*
  * pMem currently only holds a string type (or maybe a BLOB that we can
  * interpret as a string if we want to).  Compute its corresponding
@@ -3507,6 +3766,74 @@ case OP_SeekGT: {       /* jump, in3 */
 	assert(oc!=OP_SeekLT || r.default_rc==+1);
 
 	r.aMem = &aMem[pOp->p3];
+	bool is_on_region = false;
+	struct region *region = &fiber()->gc;
+	size_t region_svp = 0;
+	for (int i = 0; i < r.nField; ++i) {
+		struct Mem *mem = &r.aMem[i];
+		enum field_type type = r.key_def->parts[i].type;
+		/*
+		 * We already know that MEM_type and field type
+		 * are comparable. If they are not compatible, we
+		 * should try to convert MEM to field type.
+		 */
+		if (!mem_is_type_compatible(mem, type)) {
+			/*
+			 * We cannot make changes to original MEMs
+			 * since they will be used in OP_Idx*. So
+			 * we should copy them and make changes to
+			 * the copies.
+			 */
+			if (!is_on_region) {
+				region_svp = region_used(region);
+				uint32_t size = sizeof(struct Mem) * r.nField;
+				r.aMem = region_aligned_alloc(region, size,
+							      alignof(struct Mem));
+				if (r.aMem == NULL) {
+					diag_set(OutOfMemory, size,
+						 "region_aligned_alloc",
+						 "r.aMem");
+					goto abort_due_to_error;
+				}
+				memcpy(r.aMem, &aMem[pOp->p3], size);
+				is_on_region = true;
+				mem = &r.aMem[i];
+			}
+			/*
+			 * In cases where we can change the MEM
+			 * according to the field type and opcode,
+			 * we will get the converted MEM after
+			 * this function.
+			 *
+			 * There are two cases where MEM does not
+			 * change:
+			 * 1) any resultof conversion can affect
+			 * the result of the operation;
+			 * 2) after conversion nothing will be
+			 * found as a result of the operation.
+			 *
+			 * Examples can be found in description of
+			 * the function.
+			 */
+			if (mem_prepare_for_cmp(mem, type, &oc, eqOnly) != 0) {
+				diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
+					 sql_value_to_diag_str(mem),
+					 field_type_strs[type]);
+				goto abort_due_to_error;
+			}
+			/*
+			 * if the MEM type and the field type are
+			 * still not compatible, then the
+			 * conversion failed and we won't find
+			 * anything.
+			 */
+			if (!mem_is_type_compatible(mem, type)) {
+				res = 1;
+				goto seek_not_found;
+			}
+		}
+	}
+
 #ifdef SQL_DEBUG
 	{ int i; for(i=0; i<r.nField; i++) assert(memIsValid(&r.aMem[i])); }
 #endif
@@ -3544,6 +3871,8 @@ case OP_SeekGT: {       /* jump, in3 */
 		}
 	}
 			seek_not_found:
+	if (is_on_region)
+		region_truncate(region, region_svp);
 	assert(pOp->p2>0);
 	VdbeBranchTaken(res!=0,2);
 	if (res) {
-- 
2.25.1

  parent reply	other threads:[~2020-07-16 14:47 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-16 14:45 [Tarantool-patches] [PATCH v6 00/22] sql: change implicit cast imeevma
2020-07-16 14:45 ` [Tarantool-patches] [PATCH v6 01/22] sql: change implicit cast for assignment imeevma
2020-07-16 14:45 ` [Tarantool-patches] [PATCH v6 02/22] sql: use ApplyType to check function arguments imeevma
2020-07-16 14:45 ` [Tarantool-patches] [PATCH v6 03/22] sql: check args of abs() imeevma
2020-07-16 16:12   ` Nikita Pettik
2020-07-16 14:45 ` [Tarantool-patches] [PATCH v6 04/22] sql: check args of avg(), sum() and total() imeevma
2020-07-16 16:21   ` Nikita Pettik
2020-07-16 14:45 ` [Tarantool-patches] [PATCH v6 05/22] sql: check args of char() imeevma
2020-07-16 16:27   ` Nikita Pettik
2020-07-16 14:45 ` [Tarantool-patches] [PATCH v6 06/22] sql: check args of length() imeevma
2020-07-16 16:31   ` Nikita Pettik
2020-07-16 14:46 ` [Tarantool-patches] [PATCH v6 07/22] sql: check operands of LIKE imeevma
2020-07-16 17:03   ` Nikita Pettik
2020-07-16 14:46 ` [Tarantool-patches] [PATCH v6 08/22] sql: check args of lower() and upper() imeevma
2020-07-16 17:09   ` Nikita Pettik
2020-07-16 14:46 ` [Tarantool-patches] [PATCH v6 09/22] sql: check args of position() imeevma
2020-07-16 17:28   ` Nikita Pettik
2020-07-16 14:46 ` [Tarantool-patches] [PATCH v6 10/22] sql: check args of randomblob() imeevma
2020-07-16 17:28   ` Nikita Pettik
2020-07-16 14:46 ` [Tarantool-patches] [PATCH v6 11/22] sql: check args of replace() imeevma
2020-07-16 14:46 ` [Tarantool-patches] [PATCH v6 12/22] sql: check args of round() imeevma
2020-07-16 14:46 ` [Tarantool-patches] [PATCH v6 13/22] sql: check args of soundex() imeevma
2020-07-16 14:47 ` [Tarantool-patches] [PATCH v6 14/22] sql: check args of substr() imeevma
2020-07-16 14:47 ` [Tarantool-patches] [PATCH v6 15/22] sql: check args of unicode() imeevma
2020-07-16 14:47 ` [Tarantool-patches] [PATCH v6 16/22] sql: check args of zeroblob() imeevma
2020-07-16 14:47 ` [Tarantool-patches] [PATCH v6 17/22] sql: remove unused DOUBLE to INTEGER conversion imeevma
2020-07-16 14:47 ` imeevma [this message]
2020-07-16 14:47 ` [Tarantool-patches] [PATCH v6 19/22] sql: change comparison between numbers using index imeevma
2020-07-16 14:47 ` [Tarantool-patches] [PATCH v6 20/22] sql: remove implicit cast from comparison opcodes imeevma
2020-07-16 14:47 ` [Tarantool-patches] [PATCH v6 21/22] sql: fix implicit cast in opcode MustBeInt imeevma
2020-07-16 14:47 ` [Tarantool-patches] [PATCH v6 22/22] sql: remove implicit cast from MakeRecord opcode imeevma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=83ea3cd7b4a64a5bdcea59454f55f284f894a235.1594909974.git.imeevma@gmail.com \
    --to=imeevma@tarantool.org \
    --cc=korablev@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --cc=tsafin@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v6 18/22] sql: add implicit cast between numbers in OP_Seek*' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox