[Tarantool-patches] [PATCH v5 43/52] sql: introduce mem_cast_explicit()
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Wed Apr 14 01:59:31 MSK 2021
Thanks for the patch!
See 2 comments below.
> diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
> index be7b47e76..45d2d5fe3 100644
> --- a/src/box/sql/mem.c
> +++ b/src/box/sql/mem.c
> @@ -946,6 +946,131 @@ mem_to_str(struct Mem *mem)
> return -1;
> }
>
> +static inline int
> +bytes_to_uint(struct Mem *mem)
> +{
> + bool is_neg;
> + int64_t i;
> + if (sql_atoi64(mem->z, &i, &is_neg, mem->n) != 0)
> + return -1;
> + if (is_neg)
> + return -1;
> + mem_set_uint(mem, (uint64_t)i);
> + return 0;
> +}
> +
> +static inline int
> +str_to_bool(struct Mem *mem)
> +{
> + char *str = mem->z;
> + bool b;
> + const char *str_true = "TRUE";
> + const char *str_false = "FALSE";
> + uint32_t len_true = strlen(str_true);
> + uint32_t len_false = strlen(str_false);
> +
> + for (; str[0] == ' '; str++);
> + if (strncasecmp(str, str_true, len_true) == 0) {
> + b = true;
> + str += len_true;
> + } else if (strncasecmp(str, str_false, len_false) == 0) {
> + b = false;
> + str += len_false;
> + } else {
> + return -1;
> + }
> + for (; str[0] == ' '; str++);
> + if (str[0] != '\0')
> + return -1;
> + mem_set_bool(mem, b);
> + return 0;
> +}
> +
> +static inline int
> +int_to_bool(struct Mem *mem)
> +{
> + mem->u.b = mem->u.i != 0;
> + mem->flags = MEM_Bool;
> + mem->field_type = FIELD_TYPE_BOOLEAN;
> + return 0;
> +}
> +
> +static inline int
> +double_to_bool(struct Mem *mem)
> +{
> + mem->u.b = mem->u.r != 0.;
> + mem->flags = MEM_Bool;
> + mem->field_type = FIELD_TYPE_BOOLEAN;
> + return 0;
> +}
> +
> +static inline int
> +str_to_bin(struct Mem *mem)
> +{
> + mem->flags = (mem->flags & (MEM_Dyn | MEM_Static | MEM_Ephem)) |
> + MEM_Blob;
> + mem->field_type = FIELD_TYPE_VARBINARY;
> + return 0;
> +}
1. You have tons of <src>_to_<dst> converters. I propose you to group them.
For example, str_to_* all together, double_to_* all together, and so on.
It would simplify reading and search.
> +
> +int
> +mem_cast_explicit(struct Mem *mem, enum field_type type)
> +{
> + if ((mem->flags & MEM_Null) != 0) {
> + mem->field_type = type;
> + return 0;
> + }
> + switch (type) {
> + case FIELD_TYPE_UNSIGNED:
> + if ((mem->flags & MEM_UInt) != 0)
> + return 0;
> + if ((mem->flags & MEM_Int) != 0)
> + return -1;
> + if ((mem->flags & MEM_Blob) != 0 &&
> + (mem->flags & MEM_Subtype) != 0)
> + return -1;
> + if ((mem->flags & (MEM_Str | MEM_Blob)) != 0)
> + return bytes_to_uint(mem);
> + if ((mem->flags & MEM_Real) != 0)
> + return double_to_int(mem);
2. tarantool> box.execute('SELECT CAST(-1.1 AS UNSIGNED);')
---
- metadata:
- name: COLUMN_1
type: unsigned
rows:
- [-1]
...
That looks quite broken. Is this a known issue? From the
code I see the issue existed before your patch (but I was
too lazy to try it).
> @@ -2018,113 +2107,6 @@ registerTrace(int iReg, Mem *p) {
> }
> #endif
>
> -/*
> - * Cast the datatype of the value in pMem according to the type
> - * @type. Casting is different from applying type in that a cast
> - * is forced. In other words, the value is converted into the desired
> - * type even if that results in loss of data. This routine is
> - * used (for example) to implement the SQL "cast()" operator.
> - */
> -int
> -sqlVdbeMemCast(Mem * pMem, enum field_type type)
> -{
> - assert(type < field_type_MAX);
> - if (pMem->flags & MEM_Null)
> - return 0;
> - switch (type) {
> - case FIELD_TYPE_SCALAR:
> - return 0;
> - case FIELD_TYPE_BOOLEAN:
> - if ((pMem->flags & MEM_Int) != 0) {
> - mem_set_bool(pMem, pMem->u.i);
> - return 0;
> - }
> - if ((pMem->flags & MEM_UInt) != 0) {
> - mem_set_bool(pMem, pMem->u.u);
> - return 0;
> - }
> - if ((pMem->flags & MEM_Real) != 0) {
> - mem_set_bool(pMem, pMem->u.r);
> - return 0;
> - }
> - if ((pMem->flags & MEM_Str) != 0) {
> - bool value;
> - if (str_cast_to_boolean(pMem->z, &value) != 0)
> - return -1;
> - mem_set_bool(pMem, value);
> - return 0;
> - }
> - if ((pMem->flags & MEM_Bool) != 0)
> - return 0;
> - return -1;
> - case FIELD_TYPE_INTEGER:
> - case FIELD_TYPE_UNSIGNED:
> - if ((pMem->flags & (MEM_Blob | MEM_Str)) != 0) {
> - bool is_neg;
> - int64_t val;
> - if (sql_atoi64(pMem->z, &val, &is_neg, pMem->n) != 0)
> - return -1;
> - if (type == FIELD_TYPE_UNSIGNED && is_neg)
> - return -1;
> - mem_set_int(pMem, val, is_neg);
> - return 0;
> - }
> - if ((pMem->flags & MEM_Bool) != 0) {
> - pMem->u.u = (uint64_t)pMem->u.b;
> - pMem->flags = MEM_UInt;
> - pMem->field_type = FIELD_TYPE_UNSIGNED;
> - return 0;
> - }
> - if ((pMem->flags & MEM_Real) != 0) {
> - double d = pMem->u.r;
> - if (d < 0. && d >= (double)INT64_MIN) {
> - pMem->u.i = (int64_t)d;
> - pMem->flags = MEM_Int;
> - pMem->field_type = FIELD_TYPE_INTEGER;
> - return 0;
> - }
> - if (d >= 0. && d < (double)UINT64_MAX) {
> - pMem->u.u = (uint64_t)d;
> - pMem->flags = MEM_UInt;
> - pMem->field_type = FIELD_TYPE_UNSIGNED;
> - return 0;
> - }
> - return -1;
> - }
> - if (type == FIELD_TYPE_UNSIGNED &&
> - (pMem->flags & MEM_UInt) == 0)
> - return -1;
> - return 0;
> - case FIELD_TYPE_DOUBLE:
> - return mem_to_double(pMem);
> - case FIELD_TYPE_NUMBER:
> - return mem_to_number(pMem);
> - case FIELD_TYPE_VARBINARY:
> - if ((pMem->flags & MEM_Blob) != 0)
> - return 0;
> - if ((pMem->flags & MEM_Str) != 0) {
> - MemSetTypeFlag(pMem, MEM_Str);
> - return 0;
> - }
> - return -1;
> - default:
> - assert(type == FIELD_TYPE_STRING);
> - assert(MEM_Str == (MEM_Blob >> 3));
> - if ((pMem->flags & MEM_Bool) != 0) {
> - const char *str_bool = SQL_TOKEN_BOOLEAN(pMem->u.b);
> - if (mem_copy_str0(pMem, str_bool) != 0)
> - return -1;
> - return 0;
> - }
> - pMem->flags |= (pMem->flags & MEM_Blob) >> 3;
> - sql_value_apply_type(pMem, FIELD_TYPE_STRING);
> - assert(pMem->flags & MEM_Str || pMem->db->mallocFailed);
> - pMem->flags &=
> - ~(MEM_Int | MEM_UInt | MEM_Real | MEM_Blob | MEM_Zero);
> - return 0;
> - }
> -}
It is fascinating how a good code structure allows to get rid of
all of that old garbage mess almost naturally, and reveals some
issues.
More information about the Tarantool-patches
mailing list