[Tarantool-patches] [PATCH v2 3/3] sql: replace MEM-type flags by enum mem_type
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Fri Apr 30 00:09:43 MSK 2021
I appreciate the work you did here!
>>> mem->field_type = FIELD_TYPE_VARBINARY;
>>> return 0;
>>> }> @@ -1168,9 +1229,9 @@ mem_get_bin(const struct Mem *mem, const char **s)
>>> int
>>> mem_len(const struct Mem *mem, uint32_t *len)
>>> {
>>> - if ((mem->flags & (MEM_Str | MEM_Blob)) == 0)
>>> + if (mem->type != MEM_STR && mem->type != MEM_BIN)
>>> return -1;
>>
>> 8. Is it -1 for MAP and ARRAY intentionally? Previously they
>> were included into MEM_Blob.
>>
>> I see mem_concat() didn't check for subtypes. Does it mean we
>> could concat two MP_ARRAYs into something invalid? If we could,
>> your patch probably just fixed it. Could you check and add a
>> test if so?
>>
> Fixed. You were right, it is actually possible to concatenate two maps, map and
> array, map and varbinary and so on. I returned ability to do this. Should I add
> a test?
That behaviour is not correct, and should raise an error I think. You
can add a test, but if it works now, then it should have a comment
saying that this test is bad and must start failing in the future. And
now you might test it just to ensure it does not crash anywhere.
>>> @@ -114,175 +120,153 @@ struct Mem {
>>
>> <...>
>>
>>> static inline bool
>>> mem_is_agg(const struct Mem *mem)
>>> {
>>> - return (mem->flags & MEM_Agg) != 0;
>>> + return mem->type == MEM_AGG;
>>> }
>>>
>>> static inline bool
>>> mem_is_bytes(const struct Mem *mem)
>>> {
>>> - return (mem->flags & (MEM_Blob | MEM_Str)) != 0;
>>> + enum mem_type type = mem->type;
>>> + return type == MEM_BIN || type == MEM_MAP || type == MEM_ARRAY ||
>>> + type == MEM_STR;
>>> }
>> 15. It was good when we could check several rare cases at one
>> branch. Maybe you could preserve the bitwise structure of the
>> types? Then we could keep the bit operations and save a few
>> branches. You didn't think of it, or do we go for normal numbers
>> intentionally?
>>
> I did it, although I'm not sure if this is the right choice: although it makes
> our code more compact in some cases, it makes it a little less readable.
I just realized another issue which you will get with UUID and DECIMAL. They
are not present in mp_type, so there won't be a direct mapping anyway. They
are encoded as MP_EXT + MP_UUID/MP_DECIMAL. It fits into a 64bit integer,
but it is a binary header, and is not in mp_type value range. The same for
MEM_TYPE_PTR, MEM_TYPE_FRAME, and other SQL-specific.
See 5 comments below.
> diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
> index b6ff6397f..4f189cac4 100644
> --- a/src/box/sql/mem.c
> +++ b/src/box/sql/mem.c
> @@ -89,39 +89,36 @@ mem_str(const struct Mem *mem)
<...>
> static inline void
> mem_clear(struct Mem *mem)
> {
> - if ((mem->flags & (MEM_Agg | MEM_Dyn | MEM_Frame)) != 0) {
> - if ((mem->flags & MEM_Agg) != 0)
> - sql_vdbemem_finalize(mem, mem->u.func);
> - assert((mem->flags & MEM_Agg) == 0);
> - if ((mem->flags & MEM_Dyn) != 0) {
> - assert(mem->xDel != SQL_DYNAMIC && mem->xDel != NULL);
> - mem->xDel((void *)mem->z);
> - } else if ((mem->flags & MEM_Frame) != 0) {
> - struct VdbeFrame *frame = mem->u.pFrame;
> - frame->pParent = frame->v->pDelFrame;
> - frame->v->pDelFrame = frame;
> - }
> - }
> - mem->flags = MEM_Null;
> + if (mem->type == MEM_TYPE_AGG) {
> + sql_vdbemem_finalize(mem, mem->u.func);
> + assert(mem->type != MEM_TYPE_AGG);
> + } else if (mem->type == MEM_TYPE_FRAME) {
> + struct VdbeFrame *frame = mem->u.pFrame;
> + frame->pParent = frame->v->pDelFrame;
> + frame->v->pDelFrame = frame;
> + } else if ((mem->flags & MEM_Dyn) != 0) {
> + assert(mem->xDel != SQL_DYNAMIC && mem->xDel != NULL);
> + mem->xDel((void *)mem->z);
> + }
1. With the new bitwise structure you could reduce the diff.
For example, here there was one 'if' for a non-common case of
something allocated dynamically. Now there are 3 'if's.
> + mem->type = MEM_TYPE_NULL;
> + mem->flags = 0;
> mem->field_type = field_type_MAX;
> }
> @@ -268,11 +271,13 @@ mem_set_str0_allocated(struct Mem *mem, char *value)
> int
> mem_copy_str(struct Mem *mem, const char *value, uint32_t len)
> {
> - if ((mem->flags & (MEM_Str | MEM_Blob)) != 0 && mem->z == value) {
> + if ((mem->type == MEM_TYPE_STR ||
> + mem->type == MEM_TYPE_BIN) && mem->z == value) {
2. The only reason I proposed to keep the types in their
own bits was to reduce number of the branches in the 'if's.
So as you could write flags & (MEM_TYPE_STR | MEM_TYPE_BIN) != 0
for checking for multiple types instead of using || which adds
an implicit branch.
The same in mem_copy_bin(), mem_to_int(), mem_to_int_precise(),
mem_to_double(), mem_to_number(), end of mem_cast_explicit()
(the FIELD_TYPE_SCALAR case), in a few places in mem_cast_implicit(),
mem_cast_implicit_old(), mem_get_int(), mem_get_uint(), try_return_null(),
mem_concat(), get_number(), mem_cmp_bin(), sqlVdbeCheckMemInvariants(),
end of memTracePrint(). Maybe more which I missed.
> /* Own value, but might be ephemeral. Make it own if so. */
> if (sqlVdbeMemGrow(mem, len, 1) != 0)
> return -1;
> - mem->flags = MEM_Str;
> + mem->type = MEM_TYPE_STR;
> + mem->flags = 0;
> mem->field_type = FIELD_TYPE_STRING;
> return 0;
> }
> @@ -2258,40 +2330,37 @@ sqlVdbeMemTooBig(Mem * p)
> int
> sqlMemCompare(const Mem * pMem1, const Mem * pMem2, const struct coll * pColl)
> {
> - int f1, f2;
> int res;
> - int combined_flags;
>
> - f1 = pMem1->flags;
> - f2 = pMem2->flags;
> - combined_flags = f1 | f2;
> + enum mem_type type1 = pMem1->type;
> + enum mem_type type2 = pMem2->type;
>
> /* If one value is NULL, it is less than the other. If both values
> * are NULL, return 0.
> */
> - if (combined_flags & MEM_Null) {
> - return (f2 & MEM_Null) - (f1 & MEM_Null);
> - }
> + if (type1 == MEM_TYPE_NULL || type2 == MEM_TYPE_NULL)
3. With the bitwise types you could have combined_types and
keep only one branch in such places.
> + return (int)(type2 == MEM_TYPE_NULL) -
> + (int)(type1 == MEM_TYPE_NULL);
>
> diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
> index 6fc15617d..a4a0d2223 100644
> --- a/src/box/sql/mem.h
> +++ b/src/box/sql/mem.h
> @@ -37,73 +37,78 @@ struct region;
> struct mpstream;
> struct VdbeFrame;
>
> -/*
> - * Internally, the vdbe manipulates nearly all SQL values as Mem
> - * structures. Each Mem struct may cache multiple representations (string,
> - * integer etc.) of the same value.
> - */
> +enum mem_type {
> + MEM_TYPE_NULL = 1u << MP_NIL,
> + MEM_TYPE_UINT = 1u << MP_UINT,
> + MEM_TYPE_INT = 1u << MP_INT,
> + MEM_TYPE_STR = 1u << MP_STR,
> + MEM_TYPE_BIN = 1u << MP_BIN,
> + MEM_TYPE_ARRAY = 1u << MP_ARRAY,
> + MEM_TYPE_MAP = 1u << MP_MAP,
> + MEM_TYPE_BOOL = 1u << MP_BOOL,
> + MEM_TYPE_FLOAT = 1u << MP_FLOAT,
> + MEM_TYPE_DOUBLE = 1u << MP_DOUBLE,
> + MEM_TYPE_INVALID = 1u << MP_EXT,
> + MEM_TYPE_FRAME = 1u << (MP_EXT + 1),
> + MEM_TYPE_PTR = 1u << (MP_EXT + 2),
> + MEM_TYPE_AGG = 1u << (MP_EXT + 3),
4. Why do you stick to mp_types? Why can't have your own
bits? Anyway I don't see now any easy conversions between
mem_type and mp_type. It is also kind of incorrect because
MEM_TYPE_PTR = MP_EXT + 2 is not a pointer in MessagePack,
and the same for the other extensions.
> +};
> +
> +/** Internally, the vdbe manipulates nearly all SQL values as Mem structures. */
> struct Mem {
> union MemValue {
> - double r; /* Real value used when MEM_Real is set in flags */
> - i64 i; /* Integer value used when MEM_Int is set in flags */
> - uint64_t u; /* Unsigned integer used when MEM_UInt is set. */
> - bool b; /* Boolean value used when MEM_Bool is set in flags */
> - int nZero; /* Used when bit MEM_Zero is set in flags */
> - void *p; /* Generic pointer */
> + /** Double value when MEM type is MEM_TYPE_DOUBLE. */
> + double r;
5. When I talked about fixing a comment location, I meant that single
comment you needed to change anyway. But ok, lets change all alongside.
> + /** Negative integer value when MEM type is MEM_TYPE_INT. */
> + i64 i;
> + /** Unsigned integer value when MEM type is MEM_TYPE_UINT. */
> + uint64_t u;
> + /** Boolean value when MEM type is MEM_TYPE_BOOL. */
> + bool b;
More information about the Tarantool-patches
mailing list