[Tarantool-patches] [PATCH 1/1] sql: replace MEM-type flags by enum mem_type

Sun Apr 25 20:41:48 MSK 2021

Hi! Thanks for the patch!

See 15 comments below. Most of them are the same though.

> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index 9c28d5122..2ce7a3871 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -1766,14 +1766,20 @@ minmaxStep(sql_context * context, int NotUsed, sql_value ** argv)
>  
>  	struct func_sql_builtin *func =
>  		(struct func_sql_builtin *)context->func;
> +	/*
> +	 * TODO: Make proper initialization for aggregate accumulation
> +	 * structures. Currently a hack is applied here: in case mem pBest is a
> +	 * newly created MEM, it bytes are set to 0 using memset(). Since
> +	 * MEM_NULL == 0, it works. However, it does not look right.
> +	 */

1. Not sure I understand. I see sql_aggregate_context() calls mem_set_agg()
which sets MEM_AGG. Isn't it a proper initialization?

>  	pBest = (Mem *) sql_aggregate_context(context, sizeof(*pBest));
>  	if (!pBest)
>  		return;
> diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
> index b6ff6397f..90883ee21 100644
> --- a/src/box/sql/mem.c
> +++ b/src/box/sql/mem.c
> @@ -60,26 +60,26 @@ const char *
>  mem_str(const struct Mem *mem)
>  {
>  	char buf[BUF_SIZE];
> -	switch (mem->flags & MEM_PURE_TYPE_MASK) {
> -	case MEM_Null:
> +	switch (mem->type) {
> +	case MEM_NULL:

2. Better use MEM_TYPE_*. Otherwise in case the mem will have
any other enums, the value names would confuse. It is already
so because the flags also have MEM_ prefix and the only
difference is they are not uppercase in the suffix.

> @@ -149,7 +148,8 @@ mem_set_int(struct Mem *mem, int64_t value, bool is_neg)
>  {
>  	mem_clear(mem);
>  	mem->u.i = value;
> -	mem->flags = is_neg ? MEM_Int : MEM_UInt;
> +	mem->type = is_neg ? MEM_INT : MEM_UINT;
> +	mem->flags = 0;

3. The flags are already cleared by mem_clear(). The same in
mem_set_uint(), mem_set_bool(), mem_set_double().

> @@ -281,7 +287,8 @@ mem_copy_str(struct Mem *mem, const char *value, uint32_t len)
>  		return -1;
>  	memcpy(mem->z, value, len);
>  	mem->n = len;
> -	mem->flags = MEM_Str;
> +	mem->type = MEM_STR;
> +	mem->flags = 0;
4. Ditto. The same in mem_copy_bin().

>  	mem->field_type = FIELD_TYPE_STRING;
>  	return 0;
>  }
> @@ -459,14 +471,16 @@ void
>  mem_set_invalid(struct Mem *mem)
>  {
>  	mem_clear(mem);
> -	mem->flags = MEM_Undefined;
> +	mem->type = MEM_INVALID;
> +	mem->flags = 0;

5. The same as above + mem_set_ptr(), mem_set_frame(),
mem_set_agg().

> @@ -498,19 +514,21 @@ void
>  mem_set_null_clear(struct Mem *mem)
>  {
>  	mem_clear(mem);
> -	mem->flags = MEM_Null | MEM_Cleared;
> +	mem->flags = MEM_Cleared;
>  }
>  
>  static inline int
>  int_to_double(struct Mem *mem)
>  {
> +	assert(mem->type == MEM_INT || mem->type == MEM_UINT);
>  	double d;
> -	if ((mem->flags & MEM_UInt) != 0)
> +	if (mem->type == MEM_UINT)
>  		d = (double)mem->u.u;
>  	else
>  		d = (double)mem->u.i;
>  	mem->u.r = d;
> -	mem->flags = MEM_Real;
> +	mem->type = MEM_DOUBLE;
> +	mem->flags = 0;

6. Do you need to touch the flags? If it was an int, its flags
should have been 0 already. The same in int_to_bool(),
double_to_int(), double_to_int_precise(), double_to_uint(),
double_to_uint_precise(), double_to_bool(), bool_to_int().

> @@ -550,8 +571,9 @@ str_to_str0(struct Mem *mem)
>  static inline int
>  str_to_bin(struct Mem *mem)
>  {
> -	mem->flags = (mem->flags & (MEM_Dyn | MEM_Static | MEM_Ephem)) |
> -		     MEM_Blob;
> +	assert(mem->type == MEM_STR);
> +	mem->type = MEM_BIN;
> +	mem->flags = mem->flags & (MEM_Dyn | MEM_Static | MEM_Ephem);

7. You can now use &=. But did you check if you even need to touch
these flags? It seems you can leave them as is. Previously they
were reset just to drop MEM_Blob flag I think.

The same in bin_to_str().

>  	mem->field_type = FIELD_TYPE_VARBINARY;
>  	return 0;
>  }> @@ -1168,9 +1229,9 @@ mem_get_bin(const struct Mem *mem, const char **s)
>  int
>  mem_len(const struct Mem *mem, uint32_t *len)
>  {
> -	if ((mem->flags & (MEM_Str | MEM_Blob)) == 0)
> +	if (mem->type != MEM_STR && mem->type != MEM_BIN)
>  		return -1;

8. Is it -1 for MAP and ARRAY intentionally? Previously they
were included into MEM_Blob.

I see mem_concat() didn't check for subtypes. Does it mean we
could concat two MP_ARRAYs into something invalid? If we could,
your patch probably just fixed it. Could you check and add a
test if so?

> -	if ((mem->flags & MEM_Blob) !=0 && (mem->flags & MEM_Zero) != 0)
> +	if (mem->type == MEM_BIN && (mem->flags & MEM_Zero) != 0)
>  		*len = mem->n + mem->u.nZero;
>  	else
>  		*len = mem->n;
> @@ -1236,7 +1298,8 @@ mem_move(struct Mem *to, struct Mem *from)
>  {
>  	mem_destroy(to);
>  	memcpy(to, from, sizeof(*to));
> -	from->flags = MEM_Null;
> +	from->type = MEM_NULL;
> +	from->flags = 0;

9. Flags are already 0 after mem_destroy(). The same in mem_add(),
mem_sub(), mem_mul(), mem_div(), mem_rem(), mem_bit_and(),
mem_bit_or(), mem_shift_left(), mem_shift_right(), mem_bit_not().

> @@ -2208,7 +2264,8 @@ sqlValueNew(sql * db)
>  {
>  	Mem *p = sqlDbMallocZero(db, sizeof(*p));
>  	if (p) {
> -		p->flags = MEM_Null;
> +		p->type = MEM_NULL;
> +		p->flags = 0;

10. MallocZero nullifies the memory, no need to set the flags to 0.

>  		p->db = db;
>  	}
>  	return p;
> @@ -2223,7 +2280,8 @@ releaseMemArray(Mem * p, int N)
>  			assert((&p[1]) == pEnd || p[0].db == p[1].db);
>  			assert(sqlVdbeCheckMemInvariants(p));
>  			mem_destroy(p);
> -			p->flags = MEM_Undefined;
> +			p->type = MEM_INVALID;
> +			p->flags = 0;

11. mem_destroy() already cleared the flags.

>  		} while ((++p) < pEnd);
>  	}
>  }
> @@ -2844,23 +2919,28 @@ port_lua_get_vdbemem(struct port *base, uint32_t *size)
>  		mem_clear(&val[i]);
>  		switch (field.type) {
>  		case MP_BOOL:
> -			val[i].flags = MEM_Bool;
> +			val[i].type = MEM_BOOL;
> +			val[i].flags = 0;

12. The flags are cleared by mem_clear() above. The same in the other places
in this function. The same in port_c_get_vdbemem().

> diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
> index 1db7f4deb..48c1850d4 100644
> --- a/src/box/sql/mem.h
> +++ b/src/box/sql/mem.h
> @@ -37,6 +37,35 @@ struct region;
>  struct mpstream;
>  struct VdbeFrame;
>  
> +enum mem_type {
> +	MEM_NULL = 0,
> +	MEM_UINT,
> +	MEM_INT,
> +	MEM_STR,
> +	MEM_BIN,
> +	MEM_ARRAY,
> +	MEM_MAP,
> +	MEM_BOOL,
> +	MEM_FLOAT,
> +	MEM_DOUBLE,
> +	MEM_INVALID,
> +	MEM_FRAME,
> +	MEM_PTR,
> +	MEM_AGG,
> +};
> +
> +static_assert((int)MP_NIL == (int)MEM_NULL);
> +static_assert((int)MP_UINT == (int)MEM_UINT);
> +static_assert((int)MP_INT == (int)MEM_INT);
> +static_assert((int)MP_STR == (int)MEM_STR);
> +static_assert((int)MP_BIN == (int)MEM_BIN);
> +static_assert((int)MP_ARRAY == (int)MEM_ARRAY);
> +static_assert((int)MP_MAP == (int)MEM_MAP);
> +static_assert((int)MP_BOOL == (int)MEM_BOOL);
> +static_assert((int)MP_FLOAT == (int)MEM_FLOAT);
> +static_assert((int)MP_DOUBLE == (int)MEM_DOUBLE);
> +static_assert((int)MP_EXT == (int)MEM_INVALID);

13. You could also assign them right inside of enum declaration. Then
wouldn't need to assert.

> +
>  /*
>   * Internally, the vdbe manipulates nearly all SQL values as Mem
>   * structures. Each Mem struct may cache multiple representations (string,
> @@ -57,9 +86,8 @@ struct Mem {
>  		struct func *func;
>  		struct VdbeFrame *pFrame;	/* Used when flags==MEM_Frame */
>  	} u;
> -	u32 flags;		/* Some combination of MEM_Null, MEM_Str, MEM_Dyn, etc. */
> -	/** Subtype for this value. */
> -	enum sql_subtype subtype;
> +	enum mem_type type;
> +	u32 flags;		/* Some combination of MEM_Term, MEM_Zero, MEM_Dyn, etc. */

14. Since you changed the line, it is better to put the comment on
top of the member, according to the code style. And add a comment to the
type field. Even a trivial one would work. Comments between members help
to make the struct look more 'structured'.

> @@ -114,175 +120,153 @@ struct Mem {

<...>

>  static inline bool
>  mem_is_agg(const struct Mem *mem)
>  {
> -	return (mem->flags & MEM_Agg) != 0;
> +	return mem->type == MEM_AGG;
>  }
>  
>  static inline bool
>  mem_is_bytes(const struct Mem *mem)
>  {
> -	return (mem->flags & (MEM_Blob | MEM_Str)) != 0;
> +	enum mem_type type = mem->type;
> +	return type == MEM_BIN || type == MEM_MAP || type == MEM_ARRAY ||
> +	       type == MEM_STR;
>  }
15. It was good when we could check several rare cases at one
branch. Maybe you could preserve the bitwise structure of the
types? Then we could keep the bit operations and save a few
branches. You didn't think of it, or do we go for normal numbers
intentionally?

On the other hand, having the types equal to mp types wins when
need to convert mem_type to mp_type, but do we need it often?