[tarantool-patches] Re: [PATCH 3/3] sql: fix passing FP values to integer iterator

Tue May 28 22:58:31 MSK 2019

> On 28 May 2019, at 00:49, Vladislav Shpilevoy <v.shpilevoy at tarantool.org> wrote:
> 
> Thanks for the patch! See 7 comments below.
> 
> 1. Double whitespace in the commit title before 'FP’.

Fixed.

> 
> 2. The patch does not work with unsigned numbers.
> 
> 		tarantool> format = {}
> 		tarantool> format[1] = {name = 'ID', type = 'unsigned'}
> 		tarantool> format[2] = {name = 'A', type = 'unsigned'}
> 		tarantool> s = box.schema.create_space('T1', {format = format})
> 		tarantool> pk = s:create_index('pk')
> 		tarantool> sk = s:create_index('sk', {parts = {'A'}})
> 		tarantool> box.execute("INSERT INTO t1 VALUES (1, 1);")
> 		tarantool> box.execute("SELECT a FROM t1 WHERE a IN (1.1, 2.1);")
> 		2019-05-28 00:34:19.922 [1452] main/102/interactive key_def.h:488 E> ER_KEY_PART_TYPE: Supplied key type of part 0 does not match index part type: expected unsigned
> 		---
> 		- error: 'Failed to execute SQL statement: Supplied key type of part 0 does not match
> 		    index part type: expected unsigned’

Fixed:

diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c
index 3025e5e6b..107b9802d 100644
--- a/src/box/sql/wherecode.c
+++ b/src/box/sql/wherecode.c
@@ -1115,7 +1115,8 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,      /* Complete information about the W
                int seek_addr = 0;
                for (int i = 0; i < nEq; i++) {
                        enum field_type type = idx_def->key_def->parts[i].type;
-                       if (type == FIELD_TYPE_INTEGER) {
+                       if (type == FIELD_TYPE_INTEGER ||
+                           type == FIELD_TYPE_UNSIGNED) {
                                /*
                                 * OP_MustBeInt consider NULLs as
                                 * non-integer values, so firstly
@@ -1128,8 +1129,9 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,      /* Complete information about the W
                        }
                }
                /* Inequality constraint comes always at the end of list. */
-               if (pRangeStart != NULL &&
-                   idx_def->key_def->parts[nEq].type == FIELD_TYPE_INTEGER)
+               enum field_type ineq_type = idx_def->key_def->parts[nEq].type;
+               if (pRangeStart != NULL && (ineq_type == FIELD_TYPE_INTEGER ||
+                                           ineq_type == FIELD_TYPE_UNSIGNED))
                        force_integer_reg = regBase + nEq;
                emit_apply_type(pParse, regBase, nConstraint - bSeekPastNull,
                                start_types);

>> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
>> index 3bd82234e..3ead6b40f 100644
>> --- a/src/box/sql/vdbe.c
>> +++ b/src/box/sql/vdbe.c
>> @@ -3496,15 +3495,22 @@ case OP_SeekGT: {       /* jump, in3 */
>> 	pC->seekOp = pOp->opcode;
>> #endif
>> 	iKey = 0;
>> -	reg_ipk = pOp->p5;
>> -
>> -	if (reg_ipk > 0) {
>> +	/*
>> +	 * In case floating value is intended to be passed to
>> +	 * iterator over integer field, we must truncate it to
>> +	 * integer value and change type of iterator:
>> +	 * a > 1.5 -> a >= 2
>> +	 */
>> +	int int_field = pOp->p5;
> 
> 3. Comments on P5 are now outdated.

Fixed:

diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index 3ead6b40f..906baa6b1 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -3395,8 +3395,10 @@ case OP_ColumnsUsed: {
  * from the beginning toward the end.  In other words, the cursor is
  * configured to use Next, not Prev.
  *
- * If P5 is not zero, than it is offset of IPK in input vector. Force
- * corresponding value to be INTEGER.
+ * If P5 is not zero, than it is offset of integer fields in input
+ * vector. Force corresponding value to be INTEGER, in case it
+ * is floating point value. Alongside with that, type of
+ * iterator may be changed: a > 1.5 -> a >= 2.
  *
  * See also: Found, NotFound, SeekLt, SeekGt, SeekLe
  */
@@ -3416,10 +3418,10 @@ case OP_ColumnsUsed: {
  * from the beginning toward the end.  In other words, the cursor is
  * configured to use Next, not Prev.
  *
- * If P5 is not zero, than it is offset of IPK in input vector. Force
- * corresponding value to be INTEGER.
+ * If P5 is not zero, than it is offset of integer fields in input
+ * vector. Force corresponding value to be INTEGER.
  *
- * See also: Found, NotFound, SeekLt, SeekGe, SeekLe
+ * P5 has the same meaning as for SeekGE.
  */
 /* Opcode: SeekLT P1 P2 P3 P4 P5
  * Synopsis: key=r[P3 at P4]
@@ -3437,8 +3439,7 @@ case OP_ColumnsUsed: {
  * from the end toward the beginning.  In other words, the cursor is
  * configured to use Prev, not Next.
  *
- * If P5 is not zero, than it is offset of IPK in input vector. Force
- * corresponding value to be INTEGER.
+ * P5 has the same meaning as for SeekGE.
  *
  * See also: Found, NotFound, SeekGt, SeekGe, SeekLe
  */
@@ -3465,6 +3466,8 @@ case OP_ColumnsUsed: {
  * The IdxGE opcode will be skipped if this opcode succeeds, but the
  * IdxGE opcode will be used on subsequent loop iterations.
  *
+ * P5 has the same meaning as for SeekGE.
+ *
  * See also: Found, NotFound, SeekGt, SeekGe, SeekLt
  */
 case OP_SeekLT:         /* jump, in3 */

>> diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c
>> index 977c0fced..e779729c7 100644
>> --- a/src/box/sql/wherecode.c
>> +++ b/src/box/sql/wherecode.c
>> @@ -1086,31 +1086,61 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,	/* Complete information about the W
>> 			start_constraints = 1;
>> 		}
>> 		struct index_def *idx_pk = space->index[0]->def;
>> -		int fieldno = idx_pk->key_def->parts[0].fieldno;
>> -
>> 		uint32_t pk_part_count = idx_pk->key_def->part_count;
>> -		if (pk_part_count == 1 &&
>> -		    space->def->fields[fieldno].type == FIELD_TYPE_INTEGER) {
>> -			/* Right now INTEGER PRIMARY KEY is the only option to
>> -			 * get Tarantool's INTEGER column type. Need special handling
>> -			 * here: try to loosely convert FLOAT to INT. If RHS type
>> -			 * is not INT or FLOAT - skip this ites, i.e. goto addrNxt.
>> -			 */
>> -			int limit = pRangeStart == NULL ? nEq : nEq + 1;
>> -			for (int i = 0; i < limit; i++) {
>> -				if (idx_def->key_def->parts[i].fieldno ==
>> -				    idx_pk->key_def->parts[0].fieldno) {
>> -					/* Here: we know for sure that table has INTEGER
>> -					   PRIMARY KEY, single column, and Index we're
>> -					   trying to use for scan contains this column. */
>> -					if (i < nEq)
>> -						sqlVdbeAddOp2(v, OP_MustBeInt, regBase + i, addrNxt);
>> -					else
>> -						force_integer_reg = regBase + i;
>> -					break;
>> -				}
>> +		/*
>> +		 * Tarantool's iterator over integer fields doesn't
>> +		 * tolerate floating point values. Hence, if term
>> +		 * is equality comparison and value of operand is
>> +		 * not integer, we can skip it since it always
>> +		 * results in false: INT a == 0.5 -> false;
>> +		 * It is done using OP_MustBeInt facilities.
>> +		 * In case term is greater comparison (a > ?), we
>> +		 * should notify OP_SeekGT to process truncation of
>> +		 * floating point value: a > 0.5 -> a >= 1;
>> +		 * It is done by setting P5 flag for OP_Seek*.
>> +		 * It is worth mentioning that we do not need
>> +		 * this step when it comes for less comparison:
>> +		 * key is NULL in this case: values are ordered
>> +		 * as  NULL, ... NULL, min_value, ..., so to
>> +		 * fetch min value we pass NULL to GT iterator.
> 
> 4. I did not get the 'less comparator == compare with NULL'
> idea. Firstly you describe how to compare 'a > ?', this is
> fully understandable. Then you, as I see, go to the 'a < ?'
> case, but somewhy say, that it is the same as 'NULL < ?'.
> Do not understand.

I say that in this case key which is passed to iterator is NULL
and type of iterator is GE. Consequently, pRangeStart is NULL.
I’ve slightly updated this part of comment:

diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c
index 3025e5e6b..01ee9ce90 100644
--- a/src/box/sql/wherecode.c
+++ b/src/box/sql/wherecode.c
@@ -1089,10 +1089,10 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,    /* Complete information about the W
                 * floating point value: a > 0.5 -> a >= 1;
                 * It is done by setting P5 flag for OP_Seek*.
                 * It is worth mentioning that we do not need
-                * this step when it comes for less comparison:
-                * key is NULL in this case: values are ordered
-                * as  NULL, ... NULL, min_value, ..., so to
-                * fetch min value we pass NULL to GT iterator.
+                * this step when it comes for less (<) comparison
+                * of nullable field. Key is NULL in this case:
+                * values are ordered as  NULL, ... NULL, min_value,
+                * so to fetch min value we pass NULL to GT iterator.


>> +		 * The only exception is less comparison in
>> +		 * conjunction with ORDER BY DESC clause:
>> +		 * in such situation we use LE iterator and
>> +		 * truncated value to compare. But then
>> +		 * pRangeStart == NULL.
>> +		 * This procedure is correct for compound index:
>> +		 * only one comparison of less/greater type can be
>> +		 * used at the same time. For instance,
>> +		 * a < 1.5 AND b > 0.5 is handled by SeekGT using
>> +		 * column a and fetching column b from tuple and
>> +		 * OP_Le comparison.
>> +		 *
>> +		 * Note that OP_ApplyType, which is emitted before
>> +		 * OP_Seek** doesn't truncate floating point to
>> +		 * integer. That's why we need this routine.
> 
> 5. Why can't OP_ApplyType truncate the numbers? Because it does
> not know, to which side round the values?

It doesn’t (and can't) change type of iterator alongside with truncation.

> Then probably we can drop OP_ApplyType invocations?

We can’t remove whole invocation to OP_ApplyType, but
we can really skip cast to int:

                                sqlVdbeAddOp2(v, OP_MustBeInt, regBase + i,
                                              addrNxt);
+                               start_types[i] = FIELD_TYPE_SCALAR;
+                               /*
+                                * We need to notify column cache
+                                * that type of value may change
+                                * so we should fetch value from
+                                * tuple again rather then copy
+                                * from register.
+                                */
+                               sql_expr_type_cache_change(pParse, regBase + i,
+                                                          1);


Idk whether we need such optimisation or not

>> +		 * Also, note that terms are separated by OR
>> +		 * predicates, so we consider term as sequence
>> +		 * of AND'ed predicates.
>> +		 */
>> +		int seek_addr = 0;
>> +		for (int i = 0; i < nEq; i++) {
>> +			enum field_type type = idx_def->key_def->parts[i].type;
>> +			if (type == FIELD_TYPE_INTEGER) {
>> +				/*
>> +				 * OP_MustBeInt consider NULLs as
>> +				 * non-integer values, so firstly
>> +				 * check whether value is NULL or not.
>> +				 */
>> +				seek_addr = sqlVdbeAddOp1(v, OP_IsNull,
>> +							  regBase);
> 
> 6. I do not understand, how does it work. You rewrite
> seek_addr in the cycle. It means, that if there are 2 integers,
> the first OP_IsNull address is rewritten by the second, and
> below, in sqlVdbeJumpHere(v, seek_addr), you will update P2
> for the last comparison only. The first OP_IsNull.P2 will stay 0.

You are right. Fixed:

@@ -1112,24 +1112,33 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,    /* Complete information about the W
                 * predicates, so we consider term as sequence
                 * of AND'ed predicates.
                 */
-               int seek_addr = 0;
+               size_t addrs_sz = sizeof(int) * nEq;
+               int *seek_addrs = region_alloc(&pParse->region, addrs_sz);
+               if (seek_addrs == NULL) {
+                       diag_set(OutOfMemory, addrs_sz, "region", "seek_addrs");
+                       pParse->is_aborted = true;
+                       return 0;
+               }
+               memset(seek_addrs, 0, addrs_sz);
...
                                /*
                                 * OP_MustBeInt consider NULLs as
                                 * non-integer values, so firstly
                                 * check whether value is NULL or not.
                                 */
-                               seek_addr = sqlVdbeAddOp1(v, OP_IsNull,
+                               seek_addrs[i] = sqlVdbeAddOp1(v, OP_IsNull,
                                                          regBase);
                                sqlVdbeAddOp2(v, OP_MustBeInt, regBase + i,
                                              addrNxt);
                        }
                }
                /* Inequality constraint comes always at the end of list. */

@@ -1142,8 +1151,10 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,     /* Complete information about the W
                        op = aStartOp[(start_constraints << 2) +
                                      (startEq << 1) + bRev];
                        assert(op != 0);
-                       if (seek_addr != 0)
-                               sqlVdbeJumpHere(v, seek_addr);
+                       for (uint32_t i = 0; i < nEq; ++i) {
+                               if (seek_addrs[i] != 0)
+                                       sqlVdbeJumpHere(v, seek_addrs[i]);
+                       }
                        sqlVdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase,
                                             nConstraint);
And added test case:

diff --git a/test/sql/types.test.lua b/test/sql/types.test.lua
index afa3677f0..170a1ad04 100644
--- a/test/sql/types.test.lua
+++ b/test/sql/types.test.lua
@@ -237,3 +237,12 @@ box.execute("SELECT a FROM t1 WHERE a > 1.1;")
 box.execute("SELECT a FROM t1 WHERE a < 1.1;")
 
 box.space.T1:drop()
+
+box.execute("CREATE TABLE t1(id INT PRIMARY KEY, a INT, b INT);")
+box.execute("CREATE INDEX i1 ON t1(a, b);")
+box.execute("INSERT INTO t1 VALUES (1, 1, 1);")
+box.execute("SELECT a FROM t1 WHERE a = 1.0 AND b > 0.5;")
+box.execute("SELECT a FROM t1 WHERE a = 1.5 AND b IS NULL;")
+box.execute("SELECT a FROM t1 WHERE a IS NULL AND b IS NULL;")
+
+box.space.T1:drop()

> 
>> +				sqlVdbeAddOp2(v, OP_MustBeInt, regBase + i,
>> +					      addrNxt);
>> 			}
>> 		}
>> +		/* Inequality constraint comes always at the end of list. */
>> +		if (pRangeStart != NULL &&
>> +		    idx_def->key_def->parts[nEq].type == FIELD_TYPE_INTEGER)
>> +			force_integer_reg = regBase + nEq;
>> 		emit_apply_type(pParse, regBase, nConstraint - bSeekPastNull,
>> 				start_types);
>> 		if (pLoop->nSkip > 0 && nConstraint == pLoop->nSkip) {
>> 
>> diff --git a/test/sql/types.result b/test/sql/types.result
>> index 758018eb5..274d73dd2 100644
>> --- a/test/sql/types.result
>> +++ b/test/sql/types.result
>> @@ -927,3 +927,43 @@ box.execute("SELECT * FROM tboolean WHERE s1 = 1.123;")
>> +...
>> +box.execute("SELECT a FROM t1 WHERE a < 1.1;")
>> +---
>> +- metadata:
>> +  - name: A
>> +    type: integer
>> +  rows:
>> +  - [1]
> 
> 7. Please, add the Kostja's example about '= 1.0’.

diff --git a/test/sql/types.test.lua b/test/sql/types.test.lua
index afa3677f0..16448436d 100644
--- a/test/sql/types.test.lua
+++ b/test/sql/types.test.lua
@@ -233,7 +233,17 @@ box.execute("CREATE TABLE t1(id INT PRIMARY KEY, a INT UNIQUE);")
 box.execute("INSERT INTO t1 VALUES (1, 1);")
 box.execute("SELECT a FROM t1 WHERE a IN (1.1, 2.1);")
 box.execute("SELECT a FROM t1 WHERE a = 1.1;")
+box.execute("SELECT a FROM t1 WHERE a = 1.0;")
 box.execute("SELECT a FROM t1 WHERE a > 1.1;")
 box.execute("SELECT a FROM t1 WHERE a < 1.1;")