[tarantool-patches] Re: [PATCH 4/9] sql: introduce type boolean

n.pettik korablev at tarantool.org
Thu Apr 18 20:54:45 MSK 2019


>> diff --git a/src/box/sql/build.c b/src/box/sql/build.c
>> index 7724e9415..c6fbb1af6 100644
>> --- a/src/box/sql/build.c
>> +++ b/src/box/sql/build.c
>> @@ -958,8 +955,7 @@ emitNewSysSpaceSequenceRecord(Parse *pParse, int space_id, const char reg_seq_id
>> 	sqlVdbeAddOp2(v, OP_IntCopy, reg_seq_id, first_col + 2);
>> 
>> 	/* 3. True, which is 1 in SQL  */
> 
> 1. I guess, it is not '1' anymore.

diff --git a/src/box/sql/build.c b/src/box/sql/build.c
index c6fbb1af6..9f9f6a0da 100644
--- a/src/box/sql/build.c
+++ b/src/box/sql/build.c
@@ -953,10 +953,7 @@ emitNewSysSpaceSequenceRecord(Parse *pParse, int space_id, const char reg_seq_id
        
        /* 2. Sequence id  */
        sqlVdbeAddOp2(v, OP_IntCopy, reg_seq_id, first_col + 2);
-
-       /* 3. True, which is 1 in SQL  */
        sqlVdbeAddOp2(v, OP_Bool, true, first_col + 3);
-
        sqlVdbeAddOp3(v, OP_MakeRecord, first_col + 1, 3, first_col);
 
        return first_col;

>> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
>> index 4b98bd175..6b38e8e66 100644
>> --- a/src/box/sql/expr.c
>> +++ b/src/box/sql/expr.c
>> @@ -3360,6 +3360,15 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
>> 	}
>> }
>> 
> 
> 2. It is worth adding a comment. At a first glance I did not
> understand what is 'mem', and what 'expr' is expected and allowed
> to store - str, int, float?
> 
>> +static void
>> +vdbe_emit_bool(struct Vdbe *v, const struct Expr *expr, int mem)
>> +{
>> +	const char *z = expr->u.zToken;
> 
> 3. It would be safer to add an assertion here that expr actually
> stores a string only.
> 
> 4. The function is quite small, and is called in one place
> only. Mark it 'inline’.

I’ve removed this function at all, see below.

>> @@ -3764,6 +3773,11 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
>> 			expr_code_int(pParse, pExpr, false, target);
>> 			return target;
>> 		}
>> +	case TK_TRUE:
>> +	case TK_FALSE: {
>> +			vdbe_emit_bool(pParse->pVdbe, pExpr, target);
> 
> 5. Correct me, if I am wrong but there is how I understand
> boolean value emission.
> 
> - Parser scans a literal. It marks 'true' with TK_TRUE, 'false'
>   with TK_FALSE (taking into account case-insensitivity);
> 
> - Here you merge TK_FALSE and TK_TRUE into a single 'case'
>   statement;
> 
> - Inside that statement you call vdbe_emit_bool, which again
>   parses the string for being equal 'true' of 'false'.
> 
> So why? Since the first step you already have the boolean value.
> The value is '== TK_TRUE'. You do not need to use strcasecmp here.
> Just split these 'case' statements, and make vdbe_emit_bool()
> taking a boolean value instead of expr.

You are absolutely right. Refactored this simple way:

diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 6b38e8e66..d70b64c45 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -3360,15 +3360,6 @@ expr_code_int(struct Parse *parse, struct Expr *expr, bool is_neg,
        }
 }
 
-static void
-vdbe_emit_bool(struct Vdbe *v, const struct Expr *expr, int mem)
-{
-       const char *z = expr->u.zToken;
-       assert(z != NULL);
-       bool val = strncasecmp(z, "TRUE", 4) == 0;
-       sqlVdbeAddOp2(v, OP_Bool, val, mem);
-}
-
 /*
  * Erase column-cache entry number i
  */
@@ -3775,7 +3766,7 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
                }
        case TK_TRUE:
        case TK_FALSE: {
-                       vdbe_emit_bool(pParse->pVdbe, pExpr, target);
+                       sqlVdbeAddOp2(v, OP_Bool, op == TK_TRUE, target);
                        return target;
                }

> 6. I see, that all other 'case's set pExpr->type. Why don't you
> do it here?

It is required only for non-constant values. For literals it is assigned
in parse.y spanExpr().

> 
>> +			return target;
>> +		}
>> #ifndef SQL_OMIT_FLOATING_POINT
>> 	case TK_FLOAT:{
>> 			pExpr->type = FIELD_TYPE_INTEGER;
>> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
>> index 3cdb119c8..860cd8920 100644
>> --- a/src/box/sql/func.c
>> +++ b/src/box/sql/func.c
>> @@ -1091,6 +1101,11 @@ quoteFunc(sql_context * context, int argc, sql_value ** argv)
>> 			}
>> 			break;
>> 		}
>> +	case MP_BOOL: {
>> +		sql_result_text(context, argv[0]->u.b ? "true" : "false", -1,
>> +				SQL_TRANSIENT);
>> +		break;
>> +	}
>> 	default:{
>> 			assert(sql_value_mp_type(argv[0]) == MP_NIL);
>> 			sql_result_text(context, "NULL", 4, SQL_STATIC);
> 
> 7. In the same file lengthFunc() now accepts boolean value. But obviously
> it should not, bool is not string. It exacerbates 4159 issue.

There are many other functions accepting wrong argument types.
Now length() accepts integers and floats, so lets fix this behaviour
later in scope of #4159.

>> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
>> index 9134f767d..4f62c2782 100644
>> --- a/src/box/sql/sqlInt.h
>> +++ b/src/box/sql/sqlInt.h
>> @@ -2161,6 +2167,24 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
>> 			}
>> 			break;
>> 		}
>> +	} else if ((flags1 | flags3) & MEM_Bool) {
>> +		/*
>> +		 * If one of values is of type BOOLEAN, then the
>> +		 * second one must be BOOLEAN as well. Otherwise
>> +		 * an error is raised.
>> +		 */
>> +		bool is_bool_type_arg1 = flags1 & MEM_Bool;
>> +		bool is_bool_type_arg3 = flags3 & MEM_Bool;
>> +		if (! is_bool_type_arg1 || ! is_bool_type_arg3) {
>> +			char *inconsistent_type = ! is_bool_type_arg1 ?
>> +						  mem_type_to_str(pIn1) :
>> +						  mem_type_to_str(pIn3);
>> +			diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
>> +				 inconsistent_type, "boolean");
>> +			rc = SQL_TARANTOOL_ERROR;
>> +			goto abort_due_to_error;
>> +		}
>> +		res = sqlMemCompare(pIn3, pIn1, NULL);
>> 	} else {
> 
> 8. Looking at all these type comparison prohibitions I am getting
> afraid of how will we select from SCALAR columns?
> 
> A schema:
> 
>    box.execute('CREATE TABLE t (id INTEGER PRIMARY KEY, a SCALAR UNIQUE);')
>    box.execute("INSERT INTO t VALUES (1, 1), (2, true), (3, false), (4, 'str')")
> 
> SQL select:
> 
>> box.execute('SELECT * FROM t WHERE a > 1')
>    ---
>    - error: 'Type mismatch: can not convert INTEGER to boolean'
>    ...
> 
> The same Lua select:
> 
>> box.space.T.index[1]:select({1}, {iterator = 'GT'})
>    ---
>    - - [4, 'str']
>    ...
> 
> In fact, now we can not use SCALAR in SQL for any comparisons
> because it will raise type mismatch on literally everything.
> 
> What are we going to do with it? IMO, we could add a flag like
> 'is_scalar' to struct Mem in its flags section, which would allow
> to compare this value with anything.

Konstantin suggested to extend struct Mem with field_type,
which is going to substitute this flag.

> Looks crutchy though. And is
> not a part of this issue of course, but should be filed into the
> issue list.
> 
> I see a similar issue https://github.com/tarantool/tarantool/issues/4124.
> Probably, it is worth extending it instead of opening new.

I consider this as a trade-off of using scalar: users shouldn’t use this
type until they are really have to. Nevertheless, they still can use CAST
operator and switch case, or write their own lua-function converting values
to required type.

>> 		enum field_type type = pOp->p5 & FIELD_TYPE_MASK;
>> 		if (sql_type_is_numeric(type)) {
>> +bool
>> +sql_column_boolean(sql_stmt *stmt, int i)
>> +{
>> +	bool val = sql_value_boolean(columnMem(stmt, i));
>> +	columnMallocFailure(stmt);
>> +	return val;
>> +}
>> +
>> sql_int64
>> sql_column_int64(sql_stmt * pStmt, int i)
>> {
> 
> 9. You did not update sql_bind_value(), now it binds NULL
> on any boolean value. On the other hand it is not used at
> all and probably could be removed.

I will remove it in a separate commit.

>> diff --git a/src/box/sql/vdbeaux.c b/src/box/sql/vdbeaux.c
>> index 0cc3c1487..0f56028e5 100644
>> --- a/src/box/sql/vdbeaux.c
>> +++ b/src/box/sql/vdbeaux.c
>> @@ -3383,6 +3383,17 @@ sqlMemCompare(const Mem * pMem1, const Mem * pMem2, const struct coll * pColl)
>> 		return (f2 & MEM_Null) - (f1 & MEM_Null);
>> 	}
>> 
>> +	if (combined_flags & MEM_Bool) {
> 
> 10. Sorry for nit, but we use explicit != 0 in conditions.

diff --git a/src/box/sql/vdbeaux.c b/src/box/sql/vdbeaux.c
index 0f56028e5..861d03344 100644
--- a/src/box/sql/vdbeaux.c
+++ b/src/box/sql/vdbeaux.c
@@ -3383,7 +3383,7 @@ sqlMemCompare(const Mem * pMem1, const Mem * pMem2, const struct coll * pColl)
                return (f2 & MEM_Null) - (f1 & MEM_Null);
        }
 
-       if (combined_flags & MEM_Bool) {
+       if ((combined_flags & MEM_Bool) != 0) {
                if ((f1 & f2 & MEM_Bool) != 0) {
                        if (pMem1->u.b == pMem2->u.b)
                                return 0;

>> +		if ((f1 & f2 & MEM_Bool) != 0) {
>> +			if (pMem1->u.b == pMem2->u.b)
>> +				return 0;
>> +			if (pMem1->u.b)
>> +				return 1;
>> +			return -1;
>> +		}
>> +		return -1;
>> +	}
>> +
>> 	/* At least one of the two values is a number
>> 	 */
>> 	if (combined_flags & (MEM_Int | MEM_Real)) {
>> @@ -3561,10 +3572,16 @@ sqlVdbeCompareMsgpack(const char **key1,
>> 			break;
>> 		}
>> 	case MP_BOOL:{
>> -			assert((unsigned char)(*aKey1) == 0xc2
>> -			       || (unsigned char)*aKey1 == 0xc3);
>> -			mem1.u.i = (unsigned)(size_t) aKey1++ - 0xc2;
>> -			goto do_int;
>> +
> 
> 11. Stray empty line.

@@ -3572,7 +3572,6 @@ sqlVdbeCompareMsgpack(const char **key1,
                        break;
                }
        case MP_BOOL:{
-
                        mem1.u.b = mp_decode_bool(&aKey1);
                        if ((pKey2->flags & MEM_Bool) != 0) {
                                if (mem1.u.b != pKey2->u.b) {

>> +			mem1.u.b = mp_decode_bool(&aKey1);
>> +			if ((pKey2->flags & MEM_Bool) != 0) {
>> +				if (mem1.u.b != pKey2->u.b) {
>> +					rc = mem1.u.b ? 1 : -1;
>> +				}
> 
> 12. Usually we omit curly braces when body of a cycle or a condition
> is one line.

@@ -3572,14 +3572,12 @@ sqlVdbeCompareMsgpack(const char **key1,
                        break;
                }
        case MP_BOOL:{
-
                        mem1.u.b = mp_decode_bool(&aKey1);
                        if ((pKey2->flags & MEM_Bool) != 0) {
-                               if (mem1.u.b != pKey2->u.b) {
+                               if (mem1.u.b != pKey2->u.b)
                                        rc = mem1.u.b ? 1 : -1;
-                               }
                        } else {

>> +			} else {
>> +				rc = (pKey2->flags & MEM_Null) != 0 ? +1 : -1;
> 
> 13. '+1' ? Sorry, we do not use prefix '+’.

                        } else {
-                               rc = (pKey2->flags & MEM_Null) != 0 ? +1 : -1;
+                               rc = (pKey2->flags & MEM_Null) != 0 ? 1 : -1;
                        }

>> diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
>> index 15a2f55cb..4fed0eefe 100644
>> --- a/src/box/sql/vdbemem.c
>> +++ b/src/box/sql/vdbemem.c
>> @@ -499,6 +501,16 @@ sqlVdbeRealValue(Mem * pMem, double *v)
>> 	return -1;
>> }
>> 
>> +int
>> +vdbe_value_boolean(Mem *mem, bool *b)
>> +{
>> +	if (mem->flags  & MEM_Bool) {
>> +		*b = mem->u.b;
>> +		return 0;
>> +	}
>> +	return -1;
>> +}
> 
> 14. Probably it is better to name it by Mem, not by
> Vdbe, because it does not take a Vdbe pointer. And
> similar functions contract type names: 'boolean' -> 'bool',
> 'integer' -> 'int', etc. In summary, 'mem_value_bool'
> for example.

diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
index 5251e76ab..73c77efed 100644
--- a/src/box/sql/vdbeInt.h
+++ b/src/box/sql/vdbeInt.h
@@ -510,7 +510,7 @@ int sqlVdbeMemIntegerify(Mem *, bool is_forced);
 int sqlVdbeRealValue(Mem *, double *);
 
 int
-vdbe_value_boolean(Mem *mem, bool *b);
+mem_value_bool(Mem *mem, bool *b);
 
 int mem_apply_integer_type(Mem *);
 int sqlVdbeMemRealify(Mem *);
diff --git a/src/box/sql/vdbeapi.c b/src/box/sql/vdbeapi.c
index f305974bc..4bdbcafd4 100644
--- a/src/box/sql/vdbeapi.c
+++ b/src/box/sql/vdbeapi.c
@@ -215,7 +215,7 @@ bool
 sql_value_boolean(sql_value *val)
 {
        bool b;
-       vdbe_value_boolean((struct Mem *) val, &b);
+       mem_value_bool((struct Mem *) val, &b);
        return b;
 }
 
diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
index 4fed0eefe..92406f938 100644
--- a/src/box/sql/vdbemem.c
+++ b/src/box/sql/vdbemem.c
@@ -502,7 +502,7 @@ sqlVdbeRealValue(Mem * pMem, double *v)
 }
 
 int
-vdbe_value_boolean(Mem *mem, bool *b)
+mem_value_bool(Mem *mem, bool *b)
 {
        if (mem->flags  & MEM_Bool) {
                *b = mem->u.b;

>> @@ -594,6 +606,37 @@ sqlVdbeMemNumerify(Mem * pMem)
>> 	return SQL_OK;
>> }
>> 
>> +/**
>> + * According to ANSI SQL string value can be converted to boolean
>> + * type if string consists of literal "true" or "false" and
>> + * number of leading spaces.
>> + *
>> + * For instance, "   tRuE" can be successfully converted to
>> + * boolean value true.
> 15. So '   true' is valid, but 'true    ' is not? As I see the
> standard, it is not so. Cite:
> 
> """
>    If TD is boolean, then
> 
>    Case:
> 
>        a) If SD is character string, then SV is replaced by
> 
>            TRIM ( BOTH ' ' FROM VE )
> 
>           Case:
> 
>            i)  If the rules for <literal> in Subclause 5.3, “<literal>”,
>                can be applied to SV to determine a valid value of the
>                data type TD, then let TV be that value.
> 
>            ii) Otherwise, an exception condition is raised: data
>                exception — invalid character value for cast.
> 
>        b) If SD is boolean, then TV is SV.
> """
> 
> TD - cast type,
> SD - type of cast target,
> VE,SV - cast target,
> TV - cast result value.
> 
> The key line: "TRIM ( BOTH ' ' FROM VE )" -> 'BOTH'.
> Spaces are trimmed off from both sides.

Yep, you are right. For some reason I’ve missed the fact
that spaces must be removed from both sides.

>> + *
>> + * @param str String to be converted to boolean.
>> + *            Assumed to be null terminated.
>> + * @param result Resulting value of cast.
>> + * @retval 0 If string satisfies conditions above.
>> + * @retval -1 Otherwise.
>> + */
>> +static int
>> +str_cast_to_boolean(const char *str, bool *result)
>> +{
>> +	assert(str != NULL);
>> +	for (; *str == ' '; str++);
>> +	size_t rest_str_len = strlen(str);
>> +	if (rest_str_len == 4 && strncasecmp(str, "true", 4) == 0) {
>> +		*result = true;
>> +		return 0;
>> +	}
>> +	if (rest_str_len == 5 && strncasecmp(str, "false", 5) == 0) {
>> +		*result = false;
>> +		return 0;
>> +	}
> 
> 16. I guess it is far from the most efficient implementation. You
> calculate length of the whole string before comparison, scanning
> its twice in the worst and most common case. Please, consider my
> fixes below. I did not test them though, so probably there are some
> typos.
> Note, that in my fixes I've already accounted my comment about
> trimming trailing whitespaces.
> 
> ============================================================================
> @@ -625,16 +625,20 @@ str_cast_to_boolean(const char *str, bool *result)
> {
> 	assert(str != NULL);
> 	for (; *str == ' '; str++);
> -	size_t rest_str_len = strlen(str);
> -	if (rest_str_len == 4 && strncasecmp(str, "true", 4) == 0) {
> +	if (strncasecmp(str, "true", 4) == 0) {

We can’t use strncasecmp() without verification the fact that
rest_str_len (i.e. string length after skipping leading spaces) is >= 4.

> +		str += 4;
> 		*result = true;
> -		return 0;
> -	}
> -	if (rest_str_len == 5 && strncasecmp(str, "false", 5) == 0) {
> +	} else if (strncasecmp(str, "false", 5) == 0) {
> +		str += 5;
> 		*result = false;
> -		return 0;
> +	} else {
> +		return -1;
> 	}
> -	return -1;
> +	for (; *str != 0; ++str) {
> +		if (*str != ' ')
> +			return -1;
> +	}
> +	return 0;
> }
> ============================================================================

Taking into account my comment diff is next:

diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
index 92406f938..19b42b992 100644
--- a/src/box/sql/vdbemem.c
+++ b/src/box/sql/vdbemem.c
@@ -609,9 +609,9 @@ sqlVdbeMemNumerify(Mem * pMem)
 /**
  * According to ANSI SQL string value can be converted to boolean
  * type if string consists of literal "true" or "false" and
- * number of leading spaces.
+ * number of leading and trailing spaces.
  *
- * For instance, "   tRuE" can be successfully converted to
+ * For instance, "   tRuE  " can be successfully converted to
  * boolean value true.
  *
  * @param str String to be converted to boolean.
@@ -626,15 +626,20 @@ str_cast_to_boolean(const char *str, bool *result)
        assert(str != NULL);
        for (; *str == ' '; str++);
        size_t rest_str_len = strlen(str);
-       if (rest_str_len == 4 && strncasecmp(str, "true", 4) == 0) {
+       if (rest_str_len >= 4 && strncasecmp(str, "true", 4) == 0) {
                *result = true;
-               return 0;
-       }
-       if (rest_str_len == 5 && strncasecmp(str, "false", 5) == 0) {
+               str += 4;
+       } else if (rest_str_len >= 5 && strncasecmp(str, "false", 5) == 0) {
                *result = false;
-               return 0;
+               str += 5;
+       } else {
+               return -1;
        }
-       return -1;
+       for (; *str != '\0'; ++str) {
+               if (*str != ' ')
+                       return -1;
+       }
+       return 0;
 }
 
And a couple of tests:

diff --git a/test/sql/types.result b/test/sql/types.result
index 3aa0169e2..673a61bb4 100644
--- a/test/sql/types.result
+++ b/test/sql/types.result
@@ -796,6 +796,18 @@ box.execute("SELECT CAST('  TrUe' AS BOOLEAN);")
   rows:
   - [true]
 ...
+box.execute("SELECT CAST('  falsE    ' AS BOOLEAN);")
+---
+- metadata:
+  - name: CAST('  falsE    ' AS BOOLEAN)
+    type: boolean
+  rows:
+  - [false]
+...
+box.execute("SELECT CAST('  fals' AS BOOLEAN);")
+---
+- error: 'Type mismatch: can not convert   fals to boolean'
+...
 box.execute("SELECT CAST(X'4D6564766564' AS BOOLEAN);")
 ---
 - error: 'Type mismatch: can not convert Medved to boolean'
diff --git a/test/sql/types.test.lua b/test/sql/types.test.lua
index 2aed0fe94..461635978 100644
--- a/test/sql/types.test.lua
+++ b/test/sql/types.test.lua
@@ -188,6 +188,9 @@ box.execute("SELECT CAST(1 AS BOOLEAN);")
 box.execute("SELECT CAST(1.123 AS BOOLEAN);")
 box.execute("SELECT CAST('abc' AS BOOLEAN);")
 box.execute("SELECT CAST('  TrUe' AS BOOLEAN);")
+box.execute("SELECT CAST('  falsE    ' AS BOOLEAN);")
+box.execute("SELECT CAST('  fals' AS BOOLEAN);")
+
 box.execute("SELECT CAST(X'4D6564766564' AS BOOLEAN);")

>>  * Cast the datatype of the value in pMem according to the type
>>  * @type.  Casting is different from applying type in that a cast
>> diff --git a/test/sql/types.result b/test/sql/types.result
>> index 0a5085658..3aa0169e2 100644
>> --- a/test/sql/types.result
>> +++ b/test/sql/types.result
>> +box.execute("SELECT unknown;")
>> +---
>> +- metadata:
>> +  - name: unknown
>> +    type: scalar
> 
> 17. Should not it be boolean?

SELECT NULL; also gives as type SCALAR.
UNKNOWN is an alias to NULL.





More information about the Tarantool-patches mailing list