[Tarantool-patches] [PATCH v2 5/7] sql: remove implicit cast from string for comparison

Mon Jun 22 15:25:50 MSK 2020

On 17 Jun 15:36, imeevma at tarantool.org wrote:
> @@ -2399,14 +2387,6 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
>  	default:       res2 = res>=0;     break;
>  	}
>  
> -	/* Undo any changes made by mem_apply_type() to the input registers. */
> -	assert((pIn1->flags & MEM_Dyn) == (flags1 & MEM_Dyn));
> -	pIn1->flags = flags1;
> -	pIn1->field_type = ft_p1;
> -	assert((pIn3->flags & MEM_Dyn) == (flags3 & MEM_Dyn));
> -	pIn3->flags = flags3;
> -	pIn3->field_type = ft_p3;
> -

Replace these assertions with relevant ones.

> @@ -3496,8 +3476,6 @@ case OP_SeekGT: {       /* jump, in3 */
>  		pIn3 = &aMem[int_field];
>  		if ((pIn3->flags & MEM_Null) != 0)
>  			goto skip_truncate;
> -		if ((pIn3->flags & MEM_Str) != 0)
> -			mem_apply_numeric_type(pIn3);
>  		int64_t i;
>  		if ((pIn3->flags & MEM_Int) == MEM_Int) {
>  			i = pIn3->u.i;

Could you substitute routine below with mem_check_types()
(or whatever this function is called)?

> @@ -3590,6 +3568,26 @@ skip_truncate:
>  	assert(oc!=OP_SeekLT || r.default_rc==+1);
>  
>  	r.aMem = &aMem[pOp->p3];

Code below definitely lacks some comments.

> +	for (int i = 0; i < r.nField; ++i) {
> +		enum field_type type = r.key_def->parts[i].type;
> +		struct Mem *mem = &r.aMem[i];
> +		if ((mem->flags & MEM_Str) != 0 && sql_type_is_numeric(type)) {

What if key_def contains scalar/any type?

> +			diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
> +				field_type_strs[type], mem_type_to_str(mem));
> +			goto abort_due_to_error;
> +		}
> +		if (mem_check_types(mem, type) == 0)
> +			continue;
> +		if ((mem->flags & MEM_Real) != 0 &&
> +		    (type == FIELD_TYPE_INTEGER ||
> +		     type == FIELD_TYPE_UNSIGNED)) {
> +			res = 1;
> +			goto seek_not_found;
> +		}
> +		diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
> +			field_type_strs[type], mem_type_to_str(mem));
> +		goto abort_due_to_error;
> +	}
>  #ifdef SQL_DEBUG
>  	{ int i; for(i=0; i<r.nField; i++) assert(memIsValid(&r.aMem[i])); }
>  #endif
> @@ -4717,6 +4715,27 @@ case OP_IdxGE:  {       /* jump */
>  		r.default_rc = 0;
>  	}
>  	r.aMem = &aMem[pOp->p3];
> +	for (int i = 0; i < r.nField; ++i) {
> +		struct Mem *mem = &r.aMem[i];
> +		enum mp_type mp_type = sql_value_type(mem);
> +		enum field_type field_type = r.key_def->parts[i].type;
> +		if (field_type == FIELD_TYPE_SCALAR ||
> +		    mem->field_type == FIELD_TYPE_SCALAR)
> +			continue;
> +		bool is_nullable = r.key_def->parts[i].nullable_action ==
> +				   ON_CONFLICT_ACTION_NONE;
> +		if (field_mp_plain_type_is_compatible(field_type, mp_type,
> +						      is_nullable))
> +			continue;

Why this procedure is different from one in Seek* opcode
handling routine? Could you put it in one function and reuse?
Also lacks comments.

> +		if (!sql_type_is_numeric(field_type) ||
> +		    !(mp_type == MP_INT || mp_type == MP_UINT ||
> +		      mp_type == MP_DOUBLE || mp_type == MP_FLOAT)) {
> +			diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
> +				 mem_type_to_str(mem),
> +				 field_type_strs[field_type]);
> +			goto abort_due_to_error;
> +		}
> +	}
> diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c
> index 6d8768865..1d7c76670 100644
> --- a/src/box/sql/wherecode.c
> +++ b/src/box/sql/wherecode.c
> @@ -335,72 +335,6 @@ disableTerm(WhereLevel * pLevel, WhereTerm * pTerm)
>  	}
>  }
>  
> -/**
> -			types[i] = FIELD_TYPE_SCALAR;
> -		}
> -	}
> -}
> -
>  /*
>   * Generate code for a single equality term of the WHERE clause.  An equality
>   * term can be either X=expr or X IN (...).   pTerm is the term to be
>  			testcase(pRangeStart->wtFlags & TERM_VIRTUAL);
>  			if (sqlExprIsVector(pRight) == 0) {
> @@ -1049,94 +950,6 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,	/* Complete information about the W
>  		}
>  		struct index_def *idx_pk = space->index[0]->def;
>  		uint32_t pk_part_count = idx_pk->key_def->part_count;
> -		/*
> -		 * Tarantool's iterator over integer fields doesn't
> -		 * tolerate floating point values. Hence, if term
> -		 * is equality comparison and value of operand is
> -		 * not integer, we can skip it since it always
> -		 * results in false: INT a == 0.5 -> false;
> -		 * It is done using OP_MustBeInt facilities.
> -		 * In case term is greater comparison (a > ?), we
> -		 * should notify OP_SeekGT to process truncation of
> -		 * floating point value: a > 0.5 -> a >= 1;
> -		 * It is done by setting P5 flag for OP_Seek*.
> -		 * It is worth mentioning that we do not need
> -		 * this step when it comes for less (<) comparison
> -		 * of nullable field. Key is NULL in this case:
> -		 * values are ordered as  NULL, ... NULL, min_value,
> -		 * so to fetch min value we pass NULL to GT iterator.
> -		 * The only exception is less comparison in
> -		 * conjunction with ORDER BY DESC clause:
> -		 * in such situation we use LE iterator and
> -		 * truncated value to compare. But then
> -		 * pRangeStart == NULL.
> -		 * This procedure is correct for compound index:
> -		 * only one comparison of less/greater type can be
> -		 * used at the same time. For instance,
> -		 * a < 1.5 AND b > 0.5 is handled by SeekGT using
> -		 * column a and fetching column b from tuple and
> -		 * OP_Le comparison.
> -		 *
> -		 * Note that OP_ApplyType, which is emitted before
> -		 * OP_Seek** doesn't truncate floating point to
> -		 * integer. That's why we need this routine.
> -		 * Also, note that terms are separated by OR
> -		 * predicates, so we consider term as sequence
> -		 * of AND'ed predicates.
> -		 */
> -		size_t addrs_sz;
> -		int *seek_addrs = region_alloc_array(&pParse->region,
> -						     typeof(seek_addrs[0]), nEq,
> -						     &addrs_sz);
> -		if (seek_addrs == NULL) {
> -			diag_set(OutOfMemory, addrs_sz, "region_alloc_array",
> -				 "seek_addrs");
> -			pParse->is_aborted = true;
> -			return 0;

Solid code removal. I won't review it, I hope you verified that all
tests are working as desired.

> -		memset(seek_addrs, 0, addrs_sz);
> -		for (int i = 0; i < nEq; i++) {
> -			enum field_type type = idx_def->key_def->parts[i].type;
> -			if (type == FIELD_TYPE_INTEGER ||
> -			    type == FIELD_TYPE_UNSIGNED) {
> -				/*
> -				 * OP_MustBeInt consider NULLs as
> -				 * non-integer values, so firstly
> -				 * check whether value is NULL or not.
> -				 */
> -				seek_addrs[i] = sqlVdbeAddOp1(v, OP_IsNull,
> -							      regBase);
> -				sqlVdbeAddOp2(v, OP_MustBeInt, regBase + i,
> -					      addrNxt);
> -				start_types[i] = FIELD_TYPE_SCALAR;
> -				/*
> -				 * We need to notify column cache
> -				 * that type of value may change
> -				 * so we should fetch value from
> -				 * tuple again rather then copy
> -				 * from register.
> -				 */
> -				sql_expr_type_cache_change(pParse, regBase + i,
> -							   1);
> -			}
> -		}
> -		/* Inequality constraint comes always at the end of list. */
> -		part_count = idx_def->key_def->part_count;
> -		if (pRangeStart != NULL) {
> -			/*
> -			 * nEq == 0 means that filter condition
> -			 * contains only inequality.
> -			 */
> -			uint32_t ineq_idx = nEq == 0 ? 0 : nEq - 1;
> -			assert(ineq_idx < part_count);
> -			enum field_type ineq_type =
> -				idx_def->key_def->parts[ineq_idx].type;
> -			if (ineq_type == FIELD_TYPE_INTEGER ||
> -			    ineq_type == FIELD_TYPE_UNSIGNED)
> -				force_integer_reg = regBase + nEq;
> -		}
> -		emit_apply_type(pParse, regBase, nConstraint - bSeekPastNull,
> -				start_types);
>  		if (pLoop->nSkip > 0 && nConstraint == pLoop->nSkip) {
>  			/* The skip-scan logic inside the call to codeAllEqualityConstraints()
>  			 * above has already left the cursor sitting on the correct row,
> --- /dev/null
> +++ b/test/sql-tap/gh-4230-del-impl-cast-str-to-num.test.lua

Same note: I'd better use .sql test format and/or amalgamate it with
already existing test file.

> @@ -0,0 +1,281 @@
> +#!/usr/bin/env tarantool
> +test = require("sqltester")
> +test:plan(32)
> +
> +--
> +
> +test:execsql([[
> +    CREATE TABLE t1(x TEXT primary key);
> +    INSERT INTO t1 VALUES('1');
> +    CREATE TABLE t2(x INTEGER primary key);
> +    INSERT INTO t2 VALUES(1);
> +    CREATE TABLE t3(x DOUBLE primary key);
> +    INSERT INTO t3 VALUES(1.0);

What about table with scalar/any fields?