[tarantool-patches] Re: [PATCH 3/3] sql: fix passing FP values to integer iterator
n.pettik
korablev at tarantool.org
Tue May 28 22:58:31 MSK 2019
> On 28 May 2019, at 00:49, Vladislav Shpilevoy <v.shpilevoy at tarantool.org> wrote:
>
> Thanks for the patch! See 7 comments below.
>
> 1. Double whitespace in the commit title before 'FP’.
Fixed.
>
> 2. The patch does not work with unsigned numbers.
>
> tarantool> format = {}
> tarantool> format[1] = {name = 'ID', type = 'unsigned'}
> tarantool> format[2] = {name = 'A', type = 'unsigned'}
> tarantool> s = box.schema.create_space('T1', {format = format})
> tarantool> pk = s:create_index('pk')
> tarantool> sk = s:create_index('sk', {parts = {'A'}})
> tarantool> box.execute("INSERT INTO t1 VALUES (1, 1);")
> tarantool> box.execute("SELECT a FROM t1 WHERE a IN (1.1, 2.1);")
> 2019-05-28 00:34:19.922 [1452] main/102/interactive key_def.h:488 E> ER_KEY_PART_TYPE: Supplied key type of part 0 does not match index part type: expected unsigned
> ---
> - error: 'Failed to execute SQL statement: Supplied key type of part 0 does not match
> index part type: expected unsigned’
Fixed:
diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c
index 3025e5e6b..107b9802d 100644
--- a/src/box/sql/wherecode.c
+++ b/src/box/sql/wherecode.c
@@ -1115,7 +1115,8 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W
int seek_addr = 0;
for (int i = 0; i < nEq; i++) {
enum field_type type = idx_def->key_def->parts[i].type;
- if (type == FIELD_TYPE_INTEGER) {
+ if (type == FIELD_TYPE_INTEGER ||
+ type == FIELD_TYPE_UNSIGNED) {
/*
* OP_MustBeInt consider NULLs as
* non-integer values, so firstly
@@ -1128,8 +1129,9 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W
}
}
/* Inequality constraint comes always at the end of list. */
- if (pRangeStart != NULL &&
- idx_def->key_def->parts[nEq].type == FIELD_TYPE_INTEGER)
+ enum field_type ineq_type = idx_def->key_def->parts[nEq].type;
+ if (pRangeStart != NULL && (ineq_type == FIELD_TYPE_INTEGER ||
+ ineq_type == FIELD_TYPE_UNSIGNED))
force_integer_reg = regBase + nEq;
emit_apply_type(pParse, regBase, nConstraint - bSeekPastNull,
start_types);
>> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
>> index 3bd82234e..3ead6b40f 100644
>> --- a/src/box/sql/vdbe.c
>> +++ b/src/box/sql/vdbe.c
>> @@ -3496,15 +3495,22 @@ case OP_SeekGT: { /* jump, in3 */
>> pC->seekOp = pOp->opcode;
>> #endif
>> iKey = 0;
>> - reg_ipk = pOp->p5;
>> -
>> - if (reg_ipk > 0) {
>> + /*
>> + * In case floating value is intended to be passed to
>> + * iterator over integer field, we must truncate it to
>> + * integer value and change type of iterator:
>> + * a > 1.5 -> a >= 2
>> + */
>> + int int_field = pOp->p5;
>
> 3. Comments on P5 are now outdated.
Fixed:
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index 3ead6b40f..906baa6b1 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -3395,8 +3395,10 @@ case OP_ColumnsUsed: {
* from the beginning toward the end. In other words, the cursor is
* configured to use Next, not Prev.
*
- * If P5 is not zero, than it is offset of IPK in input vector. Force
- * corresponding value to be INTEGER.
+ * If P5 is not zero, than it is offset of integer fields in input
+ * vector. Force corresponding value to be INTEGER, in case it
+ * is floating point value. Alongside with that, type of
+ * iterator may be changed: a > 1.5 -> a >= 2.
*
* See also: Found, NotFound, SeekLt, SeekGt, SeekLe
*/
@@ -3416,10 +3418,10 @@ case OP_ColumnsUsed: {
* from the beginning toward the end. In other words, the cursor is
* configured to use Next, not Prev.
*
- * If P5 is not zero, than it is offset of IPK in input vector. Force
- * corresponding value to be INTEGER.
+ * If P5 is not zero, than it is offset of integer fields in input
+ * vector. Force corresponding value to be INTEGER.
*
- * See also: Found, NotFound, SeekLt, SeekGe, SeekLe
+ * P5 has the same meaning as for SeekGE.
*/
/* Opcode: SeekLT P1 P2 P3 P4 P5
* Synopsis: key=r[P3 at P4]
@@ -3437,8 +3439,7 @@ case OP_ColumnsUsed: {
* from the end toward the beginning. In other words, the cursor is
* configured to use Prev, not Next.
*
- * If P5 is not zero, than it is offset of IPK in input vector. Force
- * corresponding value to be INTEGER.
+ * P5 has the same meaning as for SeekGE.
*
* See also: Found, NotFound, SeekGt, SeekGe, SeekLe
*/
@@ -3465,6 +3466,8 @@ case OP_ColumnsUsed: {
* The IdxGE opcode will be skipped if this opcode succeeds, but the
* IdxGE opcode will be used on subsequent loop iterations.
*
+ * P5 has the same meaning as for SeekGE.
+ *
* See also: Found, NotFound, SeekGt, SeekGe, SeekLt
*/
case OP_SeekLT: /* jump, in3 */
>> diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c
>> index 977c0fced..e779729c7 100644
>> --- a/src/box/sql/wherecode.c
>> +++ b/src/box/sql/wherecode.c
>> @@ -1086,31 +1086,61 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W
>> start_constraints = 1;
>> }
>> struct index_def *idx_pk = space->index[0]->def;
>> - int fieldno = idx_pk->key_def->parts[0].fieldno;
>> -
>> uint32_t pk_part_count = idx_pk->key_def->part_count;
>> - if (pk_part_count == 1 &&
>> - space->def->fields[fieldno].type == FIELD_TYPE_INTEGER) {
>> - /* Right now INTEGER PRIMARY KEY is the only option to
>> - * get Tarantool's INTEGER column type. Need special handling
>> - * here: try to loosely convert FLOAT to INT. If RHS type
>> - * is not INT or FLOAT - skip this ites, i.e. goto addrNxt.
>> - */
>> - int limit = pRangeStart == NULL ? nEq : nEq + 1;
>> - for (int i = 0; i < limit; i++) {
>> - if (idx_def->key_def->parts[i].fieldno ==
>> - idx_pk->key_def->parts[0].fieldno) {
>> - /* Here: we know for sure that table has INTEGER
>> - PRIMARY KEY, single column, and Index we're
>> - trying to use for scan contains this column. */
>> - if (i < nEq)
>> - sqlVdbeAddOp2(v, OP_MustBeInt, regBase + i, addrNxt);
>> - else
>> - force_integer_reg = regBase + i;
>> - break;
>> - }
>> + /*
>> + * Tarantool's iterator over integer fields doesn't
>> + * tolerate floating point values. Hence, if term
>> + * is equality comparison and value of operand is
>> + * not integer, we can skip it since it always
>> + * results in false: INT a == 0.5 -> false;
>> + * It is done using OP_MustBeInt facilities.
>> + * In case term is greater comparison (a > ?), we
>> + * should notify OP_SeekGT to process truncation of
>> + * floating point value: a > 0.5 -> a >= 1;
>> + * It is done by setting P5 flag for OP_Seek*.
>> + * It is worth mentioning that we do not need
>> + * this step when it comes for less comparison:
>> + * key is NULL in this case: values are ordered
>> + * as NULL, ... NULL, min_value, ..., so to
>> + * fetch min value we pass NULL to GT iterator.
>
> 4. I did not get the 'less comparator == compare with NULL'
> idea. Firstly you describe how to compare 'a > ?', this is
> fully understandable. Then you, as I see, go to the 'a < ?'
> case, but somewhy say, that it is the same as 'NULL < ?'.
> Do not understand.
I say that in this case key which is passed to iterator is NULL
and type of iterator is GE. Consequently, pRangeStart is NULL.
I’ve slightly updated this part of comment:
diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c
index 3025e5e6b..01ee9ce90 100644
--- a/src/box/sql/wherecode.c
+++ b/src/box/sql/wherecode.c
@@ -1089,10 +1089,10 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W
* floating point value: a > 0.5 -> a >= 1;
* It is done by setting P5 flag for OP_Seek*.
* It is worth mentioning that we do not need
- * this step when it comes for less comparison:
- * key is NULL in this case: values are ordered
- * as NULL, ... NULL, min_value, ..., so to
- * fetch min value we pass NULL to GT iterator.
+ * this step when it comes for less (<) comparison
+ * of nullable field. Key is NULL in this case:
+ * values are ordered as NULL, ... NULL, min_value,
+ * so to fetch min value we pass NULL to GT iterator.
>> + * The only exception is less comparison in
>> + * conjunction with ORDER BY DESC clause:
>> + * in such situation we use LE iterator and
>> + * truncated value to compare. But then
>> + * pRangeStart == NULL.
>> + * This procedure is correct for compound index:
>> + * only one comparison of less/greater type can be
>> + * used at the same time. For instance,
>> + * a < 1.5 AND b > 0.5 is handled by SeekGT using
>> + * column a and fetching column b from tuple and
>> + * OP_Le comparison.
>> + *
>> + * Note that OP_ApplyType, which is emitted before
>> + * OP_Seek** doesn't truncate floating point to
>> + * integer. That's why we need this routine.
>
> 5. Why can't OP_ApplyType truncate the numbers? Because it does
> not know, to which side round the values?
It doesn’t (and can't) change type of iterator alongside with truncation.
> Then probably we can drop OP_ApplyType invocations?
We can’t remove whole invocation to OP_ApplyType, but
we can really skip cast to int:
sqlVdbeAddOp2(v, OP_MustBeInt, regBase + i,
addrNxt);
+ start_types[i] = FIELD_TYPE_SCALAR;
+ /*
+ * We need to notify column cache
+ * that type of value may change
+ * so we should fetch value from
+ * tuple again rather then copy
+ * from register.
+ */
+ sql_expr_type_cache_change(pParse, regBase + i,
+ 1);
Idk whether we need such optimisation or not
>> + * Also, note that terms are separated by OR
>> + * predicates, so we consider term as sequence
>> + * of AND'ed predicates.
>> + */
>> + int seek_addr = 0;
>> + for (int i = 0; i < nEq; i++) {
>> + enum field_type type = idx_def->key_def->parts[i].type;
>> + if (type == FIELD_TYPE_INTEGER) {
>> + /*
>> + * OP_MustBeInt consider NULLs as
>> + * non-integer values, so firstly
>> + * check whether value is NULL or not.
>> + */
>> + seek_addr = sqlVdbeAddOp1(v, OP_IsNull,
>> + regBase);
>
> 6. I do not understand, how does it work. You rewrite
> seek_addr in the cycle. It means, that if there are 2 integers,
> the first OP_IsNull address is rewritten by the second, and
> below, in sqlVdbeJumpHere(v, seek_addr), you will update P2
> for the last comparison only. The first OP_IsNull.P2 will stay 0.
You are right. Fixed:
@@ -1112,24 +1112,33 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W
* predicates, so we consider term as sequence
* of AND'ed predicates.
*/
- int seek_addr = 0;
+ size_t addrs_sz = sizeof(int) * nEq;
+ int *seek_addrs = region_alloc(&pParse->region, addrs_sz);
+ if (seek_addrs == NULL) {
+ diag_set(OutOfMemory, addrs_sz, "region", "seek_addrs");
+ pParse->is_aborted = true;
+ return 0;
+ }
+ memset(seek_addrs, 0, addrs_sz);
...
/*
* OP_MustBeInt consider NULLs as
* non-integer values, so firstly
* check whether value is NULL or not.
*/
- seek_addr = sqlVdbeAddOp1(v, OP_IsNull,
+ seek_addrs[i] = sqlVdbeAddOp1(v, OP_IsNull,
regBase);
sqlVdbeAddOp2(v, OP_MustBeInt, regBase + i,
addrNxt);
}
}
/* Inequality constraint comes always at the end of list. */
@@ -1142,8 +1151,10 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W
op = aStartOp[(start_constraints << 2) +
(startEq << 1) + bRev];
assert(op != 0);
- if (seek_addr != 0)
- sqlVdbeJumpHere(v, seek_addr);
+ for (uint32_t i = 0; i < nEq; ++i) {
+ if (seek_addrs[i] != 0)
+ sqlVdbeJumpHere(v, seek_addrs[i]);
+ }
sqlVdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase,
nConstraint);
And added test case:
diff --git a/test/sql/types.test.lua b/test/sql/types.test.lua
index afa3677f0..170a1ad04 100644
--- a/test/sql/types.test.lua
+++ b/test/sql/types.test.lua
@@ -237,3 +237,12 @@ box.execute("SELECT a FROM t1 WHERE a > 1.1;")
box.execute("SELECT a FROM t1 WHERE a < 1.1;")
box.space.T1:drop()
+
+box.execute("CREATE TABLE t1(id INT PRIMARY KEY, a INT, b INT);")
+box.execute("CREATE INDEX i1 ON t1(a, b);")
+box.execute("INSERT INTO t1 VALUES (1, 1, 1);")
+box.execute("SELECT a FROM t1 WHERE a = 1.0 AND b > 0.5;")
+box.execute("SELECT a FROM t1 WHERE a = 1.5 AND b IS NULL;")
+box.execute("SELECT a FROM t1 WHERE a IS NULL AND b IS NULL;")
+
+box.space.T1:drop()
>
>> + sqlVdbeAddOp2(v, OP_MustBeInt, regBase + i,
>> + addrNxt);
>> }
>> }
>> + /* Inequality constraint comes always at the end of list. */
>> + if (pRangeStart != NULL &&
>> + idx_def->key_def->parts[nEq].type == FIELD_TYPE_INTEGER)
>> + force_integer_reg = regBase + nEq;
>> emit_apply_type(pParse, regBase, nConstraint - bSeekPastNull,
>> start_types);
>> if (pLoop->nSkip > 0 && nConstraint == pLoop->nSkip) {
>>
>> diff --git a/test/sql/types.result b/test/sql/types.result
>> index 758018eb5..274d73dd2 100644
>> --- a/test/sql/types.result
>> +++ b/test/sql/types.result
>> @@ -927,3 +927,43 @@ box.execute("SELECT * FROM tboolean WHERE s1 = 1.123;")
>> +...
>> +box.execute("SELECT a FROM t1 WHERE a < 1.1;")
>> +---
>> +- metadata:
>> + - name: A
>> + type: integer
>> + rows:
>> + - [1]
>
> 7. Please, add the Kostja's example about '= 1.0’.
diff --git a/test/sql/types.test.lua b/test/sql/types.test.lua
index afa3677f0..16448436d 100644
--- a/test/sql/types.test.lua
+++ b/test/sql/types.test.lua
@@ -233,7 +233,17 @@ box.execute("CREATE TABLE t1(id INT PRIMARY KEY, a INT UNIQUE);")
box.execute("INSERT INTO t1 VALUES (1, 1);")
box.execute("SELECT a FROM t1 WHERE a IN (1.1, 2.1);")
box.execute("SELECT a FROM t1 WHERE a = 1.1;")
+box.execute("SELECT a FROM t1 WHERE a = 1.0;")
box.execute("SELECT a FROM t1 WHERE a > 1.1;")
box.execute("SELECT a FROM t1 WHERE a < 1.1;")
More information about the Tarantool-patches
mailing list