[Tarantool-patches] [PATCH v2 3/3] sql: replace MEM-type flags by enum mem_type

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Fri Apr 30 00:09:43 MSK 2021


I appreciate the work you did here!

>>>  	mem->field_type = FIELD_TYPE_VARBINARY;
>>>  	return 0;
>>>  }> @@ -1168,9 +1229,9 @@ mem_get_bin(const struct Mem *mem, const char **s)
>>>  int
>>>  mem_len(const struct Mem *mem, uint32_t *len)
>>>  {
>>> -	if ((mem->flags & (MEM_Str | MEM_Blob)) == 0)
>>> +	if (mem->type != MEM_STR && mem->type != MEM_BIN)
>>>  		return -1;
>>
>> 8. Is it -1 for MAP and ARRAY intentionally? Previously they
>> were included into MEM_Blob.
>>
>> I see mem_concat() didn't check for subtypes. Does it mean we
>> could concat two MP_ARRAYs into something invalid? If we could,
>> your patch probably just fixed it. Could you check and add a
>> test if so?
>>
> Fixed. You were right, it is actually possible to concatenate two maps, map and
> array, map and varbinary and so on. I returned ability to do this. Should I add
> a test?

That behaviour is not correct, and should raise an error I think. You
can add a test, but if it works now, then it should have a comment
saying that this test is bad and must start failing in the future. And
now you might test it just to ensure it does not crash anywhere.

>>> @@ -114,175 +120,153 @@ struct Mem {
>>
>> <...>
>>
>>>  static inline bool
>>>  mem_is_agg(const struct Mem *mem)
>>>  {
>>> -	return (mem->flags & MEM_Agg) != 0;
>>> +	return mem->type == MEM_AGG;
>>>  }
>>>  
>>>  static inline bool
>>>  mem_is_bytes(const struct Mem *mem)
>>>  {
>>> -	return (mem->flags & (MEM_Blob | MEM_Str)) != 0;
>>> +	enum mem_type type = mem->type;
>>> +	return type == MEM_BIN || type == MEM_MAP || type == MEM_ARRAY ||
>>> +	       type == MEM_STR;
>>>  }
>> 15. It was good when we could check several rare cases at one
>> branch. Maybe you could preserve the bitwise structure of the
>> types? Then we could keep the bit operations and save a few
>> branches. You didn't think of it, or do we go for normal numbers
>> intentionally?
>>
> I did it, although I'm not sure if this is the right choice: although it makes
> our code more compact in some cases, it makes it a little less readable.

I just realized another issue which you will get with UUID and DECIMAL. They
are not present in mp_type, so there won't be a direct mapping anyway. They
are encoded as MP_EXT + MP_UUID/MP_DECIMAL. It fits into a 64bit integer,
but it is a binary header, and is not in mp_type value range. The same for
MEM_TYPE_PTR, MEM_TYPE_FRAME, and other SQL-specific.

See 5 comments below.

> diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
> index b6ff6397f..4f189cac4 100644
> --- a/src/box/sql/mem.c
> +++ b/src/box/sql/mem.c
> @@ -89,39 +89,36 @@ mem_str(const struct Mem *mem)

<...>

>  static inline void
>  mem_clear(struct Mem *mem)
>  {
> -	if ((mem->flags & (MEM_Agg | MEM_Dyn | MEM_Frame)) != 0) {
> -		if ((mem->flags & MEM_Agg) != 0)
> -			sql_vdbemem_finalize(mem, mem->u.func);
> -		assert((mem->flags & MEM_Agg) == 0);
> -		if ((mem->flags & MEM_Dyn) != 0) {
> -			assert(mem->xDel != SQL_DYNAMIC && mem->xDel != NULL);
> -			mem->xDel((void *)mem->z);
> -		} else if ((mem->flags & MEM_Frame) != 0) {
> -			struct VdbeFrame *frame = mem->u.pFrame;
> -			frame->pParent = frame->v->pDelFrame;
> -			frame->v->pDelFrame = frame;
> -		}
> -	}
> -	mem->flags = MEM_Null;
> +	if (mem->type == MEM_TYPE_AGG) {
> +		sql_vdbemem_finalize(mem, mem->u.func);
> +		assert(mem->type != MEM_TYPE_AGG);
> +	} else if (mem->type == MEM_TYPE_FRAME) {
> +		struct VdbeFrame *frame = mem->u.pFrame;
> +		frame->pParent = frame->v->pDelFrame;
> +		frame->v->pDelFrame = frame;
> +	} else if ((mem->flags & MEM_Dyn) != 0) {
> +		assert(mem->xDel != SQL_DYNAMIC && mem->xDel != NULL);
> +		mem->xDel((void *)mem->z);
> +	}

1. With the new bitwise structure you could reduce the diff.
For example, here there was one 'if' for a non-common case of
something allocated dynamically. Now there are 3 'if's.

> +	mem->type = MEM_TYPE_NULL;
> +	mem->flags = 0;
>  	mem->field_type = field_type_MAX;
>  }
> @@ -268,11 +271,13 @@ mem_set_str0_allocated(struct Mem *mem, char *value)
>  int
>  mem_copy_str(struct Mem *mem, const char *value, uint32_t len)
>  {
> -	if ((mem->flags & (MEM_Str | MEM_Blob)) != 0 && mem->z == value) {
> +	if ((mem->type == MEM_TYPE_STR ||
> +	     mem->type == MEM_TYPE_BIN) && mem->z == value) {

2. The only reason I proposed to keep the types in their
own bits was to reduce number of the branches in the 'if's.
So as you could write flags & (MEM_TYPE_STR | MEM_TYPE_BIN) != 0
for checking for multiple types instead of using || which adds
an implicit branch.

The same in mem_copy_bin(), mem_to_int(), mem_to_int_precise(),
mem_to_double(), mem_to_number(), end of mem_cast_explicit()
(the FIELD_TYPE_SCALAR case), in a few places in mem_cast_implicit(),
mem_cast_implicit_old(), mem_get_int(), mem_get_uint(), try_return_null(),
mem_concat(), get_number(), mem_cmp_bin(), sqlVdbeCheckMemInvariants(),
end of memTracePrint(). Maybe more which I missed.

>  		/* Own value, but might be ephemeral. Make it own if so. */
>  		if (sqlVdbeMemGrow(mem, len, 1) != 0)
>  			return -1;
> -		mem->flags = MEM_Str;
> +		mem->type = MEM_TYPE_STR;
> +		mem->flags = 0;
>  		mem->field_type = FIELD_TYPE_STRING;
>  		return 0;
>  	}
> @@ -2258,40 +2330,37 @@ sqlVdbeMemTooBig(Mem * p)
>  int
>  sqlMemCompare(const Mem * pMem1, const Mem * pMem2, const struct coll * pColl)
>  {
> -	int f1, f2;
>  	int res;
> -	int combined_flags;
>  
> -	f1 = pMem1->flags;
> -	f2 = pMem2->flags;
> -	combined_flags = f1 | f2;
> +	enum mem_type type1 = pMem1->type;
> +	enum mem_type type2 = pMem2->type;
>  
>  	/* If one value is NULL, it is less than the other. If both values
>  	 * are NULL, return 0.
>  	 */
> -	if (combined_flags & MEM_Null) {
> -		return (f2 & MEM_Null) - (f1 & MEM_Null);
> -	}
> +	if (type1 == MEM_TYPE_NULL || type2 == MEM_TYPE_NULL)

3. With the bitwise types you could have combined_types and
keep only one branch in such places.

> +		return (int)(type2 == MEM_TYPE_NULL) -
> +		       (int)(type1 == MEM_TYPE_NULL);
>  
> diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
> index 6fc15617d..a4a0d2223 100644
> --- a/src/box/sql/mem.h
> +++ b/src/box/sql/mem.h
> @@ -37,73 +37,78 @@ struct region;
>  struct mpstream;
>  struct VdbeFrame;
>  
> -/*
> - * Internally, the vdbe manipulates nearly all SQL values as Mem
> - * structures. Each Mem struct may cache multiple representations (string,
> - * integer etc.) of the same value.
> - */
> +enum mem_type {
> +	MEM_TYPE_NULL = 1u << MP_NIL,
> +	MEM_TYPE_UINT = 1u << MP_UINT,
> +	MEM_TYPE_INT = 1u << MP_INT,
> +	MEM_TYPE_STR = 1u << MP_STR,
> +	MEM_TYPE_BIN = 1u << MP_BIN,
> +	MEM_TYPE_ARRAY = 1u << MP_ARRAY,
> +	MEM_TYPE_MAP = 1u << MP_MAP,
> +	MEM_TYPE_BOOL = 1u << MP_BOOL,
> +	MEM_TYPE_FLOAT = 1u << MP_FLOAT,
> +	MEM_TYPE_DOUBLE = 1u << MP_DOUBLE,
> +	MEM_TYPE_INVALID = 1u << MP_EXT,
> +	MEM_TYPE_FRAME = 1u << (MP_EXT + 1),
> +	MEM_TYPE_PTR = 1u << (MP_EXT + 2),
> +	MEM_TYPE_AGG = 1u << (MP_EXT + 3),

4. Why do you stick to mp_types? Why can't have your own
bits? Anyway I don't see now any easy conversions between
mem_type and mp_type. It is also kind of incorrect because
MEM_TYPE_PTR = MP_EXT + 2 is not a pointer in MessagePack,
and the same for the other extensions.

> +};
> +
> +/** Internally, the vdbe manipulates nearly all SQL values as Mem structures. */
>  struct Mem {
>  	union MemValue {
> -		double r;	/* Real value used when MEM_Real is set in flags */
> -		i64 i;		/* Integer value used when MEM_Int is set in flags */
> -		uint64_t u;	/* Unsigned integer used when MEM_UInt is set. */
> -		bool b;         /* Boolean value used when MEM_Bool is set in flags */
> -		int nZero;	/* Used when bit MEM_Zero is set in flags */
> -		void *p;	/* Generic pointer */
> +		/** Double value when MEM type is MEM_TYPE_DOUBLE. */
> +		double r;

5. When I talked about fixing a comment location, I meant that single
comment you needed to change anyway. But ok, lets change all alongside.

> +		/** Negative integer value when MEM type is MEM_TYPE_INT. */
> +		i64 i;
> +		/** Unsigned integer value when MEM type is MEM_TYPE_UINT. */
> +		uint64_t u;
> +		/** Boolean value when MEM type is MEM_TYPE_BOOL. */
> +		bool b;


More information about the Tarantool-patches mailing list