[Tarantool-patches] [PATCH v5 43/52] sql: introduce mem_cast_explicit()

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Apr 14 01:59:31 MSK 2021


Thanks for the patch!

See 2 comments below.

> diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
> index be7b47e76..45d2d5fe3 100644
> --- a/src/box/sql/mem.c
> +++ b/src/box/sql/mem.c
> @@ -946,6 +946,131 @@ mem_to_str(struct Mem *mem)
>  	return -1;
>  }
>  
> +static inline int
> +bytes_to_uint(struct Mem *mem)
> +{
> +	bool is_neg;
> +	int64_t i;
> +	if (sql_atoi64(mem->z, &i, &is_neg, mem->n) != 0)
> +		return -1;
> +	if (is_neg)
> +		return -1;
> +	mem_set_uint(mem, (uint64_t)i);
> +	return 0;
> +}
> +
> +static inline int
> +str_to_bool(struct Mem *mem)
> +{
> +	char *str = mem->z;
> +	bool b;
> +	const char *str_true = "TRUE";
> +	const char *str_false = "FALSE";
> +	uint32_t len_true = strlen(str_true);
> +	uint32_t len_false = strlen(str_false);
> +
> +	for (; str[0] == ' '; str++);
> +	if (strncasecmp(str, str_true, len_true) == 0) {
> +		b = true;
> +		str += len_true;
> +	} else if (strncasecmp(str, str_false, len_false) == 0) {
> +		b = false;
> +		str += len_false;
> +	} else {
> +		return -1;
> +	}
> +	for (; str[0] == ' '; str++);
> +	if (str[0] != '\0')
> +		return -1;
> +	mem_set_bool(mem, b);
> +	return 0;
> +}
> +
> +static inline int
> +int_to_bool(struct Mem *mem)
> +{
> +	mem->u.b = mem->u.i != 0;
> +	mem->flags = MEM_Bool;
> +	mem->field_type = FIELD_TYPE_BOOLEAN;
> +	return 0;
> +}
> +
> +static inline int
> +double_to_bool(struct Mem *mem)
> +{
> +	mem->u.b = mem->u.r != 0.;
> +	mem->flags = MEM_Bool;
> +	mem->field_type = FIELD_TYPE_BOOLEAN;
> +	return 0;
> +}
> +
> +static inline int
> +str_to_bin(struct Mem *mem)
> +{
> +	mem->flags = (mem->flags & (MEM_Dyn | MEM_Static | MEM_Ephem)) |
> +		     MEM_Blob;
> +	mem->field_type = FIELD_TYPE_VARBINARY;
> +	return 0;
> +}

1. You have tons of <src>_to_<dst> converters. I propose you to group them.
For example, str_to_* all together, double_to_* all together, and so on.
It would simplify reading and search.

> +
> +int
> +mem_cast_explicit(struct Mem *mem, enum field_type type)
> +{
> +	if ((mem->flags & MEM_Null) != 0) {
> +		mem->field_type = type;
> +		return 0;
> +	}
> +	switch (type) {
> +	case FIELD_TYPE_UNSIGNED:
> +		if ((mem->flags & MEM_UInt) != 0)
> +			return 0;
> +		if ((mem->flags & MEM_Int) != 0)
> +			return -1;
> +		if ((mem->flags & MEM_Blob) != 0 &&
> +		    (mem->flags & MEM_Subtype) != 0)
> +			return -1;
> +		if ((mem->flags & (MEM_Str | MEM_Blob)) != 0)
> +			return bytes_to_uint(mem);
> +		if ((mem->flags & MEM_Real) != 0)
> +			return double_to_int(mem);

2. tarantool> box.execute('SELECT CAST(-1.1 AS UNSIGNED);')
---
- metadata:
  - name: COLUMN_1
    type: unsigned
  rows:
  - [-1]
...

That looks quite broken. Is this a known issue? From the
code I see the issue existed before your patch (but I was
too lazy to try it).

> @@ -2018,113 +2107,6 @@ registerTrace(int iReg, Mem *p) {
>  }
>  #endif
>  
> -/*
> - * Cast the datatype of the value in pMem according to the type
> - * @type.  Casting is different from applying type in that a cast
> - * is forced.  In other words, the value is converted into the desired
> - * type even if that results in loss of data.  This routine is
> - * used (for example) to implement the SQL "cast()" operator.
> - */
> -int
> -sqlVdbeMemCast(Mem * pMem, enum field_type type)
> -{
> -	assert(type < field_type_MAX);
> -	if (pMem->flags & MEM_Null)
> -		return 0;
> -	switch (type) {
> -	case FIELD_TYPE_SCALAR:
> -		return 0;
> -	case FIELD_TYPE_BOOLEAN:
> -		if ((pMem->flags & MEM_Int) != 0) {
> -			mem_set_bool(pMem, pMem->u.i);
> -			return 0;
> -		}
> -		if ((pMem->flags & MEM_UInt) != 0) {
> -			mem_set_bool(pMem, pMem->u.u);
> -			return 0;
> -		}
> -		if ((pMem->flags & MEM_Real) != 0) {
> -			mem_set_bool(pMem, pMem->u.r);
> -			return 0;
> -		}
> -		if ((pMem->flags & MEM_Str) != 0) {
> -			bool value;
> -			if (str_cast_to_boolean(pMem->z, &value) != 0)
> -				return -1;
> -			mem_set_bool(pMem, value);
> -			return 0;
> -		}
> -		if ((pMem->flags & MEM_Bool) != 0)
> -			return 0;
> -		return -1;
> -	case FIELD_TYPE_INTEGER:
> -	case FIELD_TYPE_UNSIGNED:
> -		if ((pMem->flags & (MEM_Blob | MEM_Str)) != 0) {
> -			bool is_neg;
> -			int64_t val;
> -			if (sql_atoi64(pMem->z, &val, &is_neg, pMem->n) != 0)
> -				return -1;
> -			if (type == FIELD_TYPE_UNSIGNED && is_neg)
> -				return -1;
> -			mem_set_int(pMem, val, is_neg);
> -			return 0;
> -		}
> -		if ((pMem->flags & MEM_Bool) != 0) {
> -			pMem->u.u = (uint64_t)pMem->u.b;
> -			pMem->flags = MEM_UInt;
> -			pMem->field_type = FIELD_TYPE_UNSIGNED;
> -			return 0;
> -		}
> -		if ((pMem->flags & MEM_Real) != 0) {
> -			double d = pMem->u.r;
> -			if (d < 0. && d >= (double)INT64_MIN) {
> -				pMem->u.i = (int64_t)d;
> -				pMem->flags = MEM_Int;
> -				pMem->field_type = FIELD_TYPE_INTEGER;
> -				return 0;
> -			}
> -			if (d >= 0. && d < (double)UINT64_MAX) {
> -				pMem->u.u = (uint64_t)d;
> -				pMem->flags = MEM_UInt;
> -				pMem->field_type = FIELD_TYPE_UNSIGNED;
> -				return 0;
> -			}
> -			return -1;
> -		}
> -		if (type == FIELD_TYPE_UNSIGNED &&
> -		    (pMem->flags & MEM_UInt) == 0)
> -			return -1;
> -		return 0;
> -	case FIELD_TYPE_DOUBLE:
> -		return mem_to_double(pMem);
> -	case FIELD_TYPE_NUMBER:
> -		return mem_to_number(pMem);
> -	case FIELD_TYPE_VARBINARY:
> -		if ((pMem->flags & MEM_Blob) != 0)
> -			return 0;
> -		if ((pMem->flags & MEM_Str) != 0) {
> -			MemSetTypeFlag(pMem, MEM_Str);
> -			return 0;
> -		}
> -		return -1;
> -	default:
> -		assert(type == FIELD_TYPE_STRING);
> -		assert(MEM_Str == (MEM_Blob >> 3));
> -		if ((pMem->flags & MEM_Bool) != 0) {
> -			const char *str_bool = SQL_TOKEN_BOOLEAN(pMem->u.b);
> -			if (mem_copy_str0(pMem, str_bool) != 0)
> -				return -1;
> -			return 0;
> -		}
> -		pMem->flags |= (pMem->flags & MEM_Blob) >> 3;
> -			sql_value_apply_type(pMem, FIELD_TYPE_STRING);
> -		assert(pMem->flags & MEM_Str || pMem->db->mallocFailed);
> -		pMem->flags &=
> -			~(MEM_Int | MEM_UInt | MEM_Real | MEM_Blob | MEM_Zero);
> -		return 0;
> -	}
> -}

It is fascinating how a good code structure allows to get rid of
all of that old garbage mess almost naturally, and reveals some
issues.


More information about the Tarantool-patches mailing list