[Tarantool-patches] [PATCH v4 11/53] sql: introduce mem_is_*() functions()

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Tue Mar 30 02:01:07 MSK 2021


Thanks for working on this!

I have a general comment affecting this entire naming schema.
The names seem too long. Besides, we already had some mem_is_*()
and mem_set_*() functions before this patch, which used short
names for the types.

What I mean is 'integer' -> 'int', 'boolean' -> 'bool',
'string' -> 'str', 'binary' -> 'bin', and so on.

For the integers we have several functions because we split
unsigned, signed, and always negative integers. So we would
need more int-like names. For instance,

	mem_set_uint(uint64_t) - for MEM_UInt.
	mem_set_nint(int64_t) - for MEM_Int.
	mem_set_int(int64_t) - for both, checks the sign inside.
	mem_set_sint(int64_t, bool) - for both, takes the sign flag
	                              in the second argument

This can be discussed. The main point - shorter is better IMO. 

See 14 comments below.

On 23.03.2021 10:35, Mergen Imeev via Tarantool-patches wrote:
> This patch introduces mem_is_*() functions that allows to check current
> MEM state.
> 
> Part of #5818
> ---
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index e600a9800..81b537d9b 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -736,8 +736,8 @@ substrFunc(sql_context * context, int argc, sql_value ** argv)
>  		context->is_aborted = true;
>  		return;
>  	}
> -	if (sql_value_is_null(argv[1])
> -	    || (argc == 3 && sql_value_is_null(argv[2]))
> +	if (mem_is_null(argv[1])
> +	    || (argc == 3 && mem_is_null(argv[2]))

1. This is not movement of huge code blocks, it is rather new code
now. And in the new code better use our code style. Such as

- || goes in the end of line, not in the beginning of a next line;
- Unary operators don't have a whitespace after them;
- Comparison with NULL should be explicit, no implicit boot casts.

I mark the places below which I was able to find after a swift look.

>  	    ) {
>  		return;
>  	}
> @@ -1578,13 +1576,13 @@ replaceFunc(sql_context * context, int argc, sql_value ** argv)
>  	assert(zStr == sql_value_text(argv[0]));	/* No encoding change */
>  	zPattern = sql_value_text(argv[1]);
>  	if (zPattern == 0) {
> -		assert(sql_value_is_null(argv[1])
> +		assert(mem_is_null(argv[1])
>  		       || sql_context_db_handle(context)->mallocFailed);
>  		return;
>  	}
>  	nPattern = sql_value_bytes(argv[1]);
>  	if (nPattern == 0) {
> -		assert(! sql_value_is_null(argv[1]));
> +		assert(! mem_is_null(argv[1]));

2. Whitespace after unary operator.

>  		sql_result_value(context, argv[0]);
>  		return;
>  	}
> @@ -2039,7 +2035,7 @@ countStep(sql_context * context, int argc, sql_value ** argv)
>  		return;
>  	}
>  	p = sql_aggregate_context(context, sizeof(*p));
> -	if ((argc == 0 || ! sql_value_is_null(argv[0])) && p) {
> +	if ((argc == 0 || ! mem_is_null(argv[0])) && p) {

3. Ditto.

>  		p->n++;
>  	}
>  }
> diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
> index ec6aaab64..abc9291ef 100644
> --- a/src/box/sql/mem.c
> +++ b/src/box/sql/mem.c
> @@ -37,6 +37,142 @@
>  #include "box/tuple.h"
>  #include "mpstream/mpstream.h"
>  
> +bool
> +mem_is_null(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_Null) != 0;
> +}

4. Maybe better move them all to mem.h. These one-liners easily
can be inlined (the ones which are <= 3 lines long could be moved).

> +
> +bool
> +mem_is_unsigned(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_UInt) != 0;
> +}
> +
> +bool
> +mem_is_string(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_Str) != 0;
> +}
> +
> +bool
> +mem_is_number(const struct Mem *mem)
> +{
> +	return (mem->flags & (MEM_Real | MEM_Int |MEM_UInt)) != 0;

5. Missed whitespace after the last '|'.

> +}
> +
> +bool
> +mem_is_double(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_Real) != 0;
> +}
> +
> +bool
> +mem_is_integer(const struct Mem *mem)
> +{
> +	return (mem->flags & (MEM_Int | MEM_UInt)) != 0;
> +}
> +
> +bool
> +mem_is_boolean(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_Bool) != 0;
> +}
> +
> +bool
> +mem_is_binary(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_Blob) != 0;
> +}
> +
> +bool
> +mem_is_map(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_Blob) != 0 &&
> +	       (mem->flags & MEM_Subtype) != 0 &&
> +	       mem->subtype == SQL_SUBTYPE_MSGPACK &&
> +	       mp_typeof(*mem->z) == MP_MAP;
> +}
> +
> +bool
> +mem_is_array(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_Blob) != 0 &&
> +	       (mem->flags & MEM_Subtype) != 0 &&
> +	       mem->subtype == SQL_SUBTYPE_MSGPACK &&
> +	       mp_typeof(*mem->z) == MP_ARRAY;
> +}
> +
> +bool
> +mem_is_aggregate(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_Agg) != 0;
> +}
> +
> +bool
> +mem_is_varstring(const struct Mem *mem)
> +{
> +	return (mem->flags & (MEM_Blob | MEM_Str)) != 0;

6. It does not look right to call it varstring if it includes
binary. A string is always binary, but not each binary object
is a string.

Maybe mem_is_bytes()? mem_is_bytearray()?

> +}
> +
> +bool
> +mem_is_frame(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_Frame) != 0;
> +}
> +
> +bool
> +mem_is_undefined(const struct Mem *mem)
> +{
> +	return (mem->flags & MEM_Undefined) != 0;
> +}
> +
> +bool
> +mem_is_static(const struct Mem *mem)
> +{
> +	return (mem->flags & (MEM_Str | MEM_Blob)) != 0 &&
> +	       (mem->flags & MEM_Static) != 0;
> +}
> +
> +bool
> +mem_is_ephemeral(const struct Mem *mem)
> +{
> +	return (mem->flags & (MEM_Str | MEM_Blob)) != 0 &&
> +	       (mem->flags & MEM_Ephem) != 0;

7. How can it be that MEM_Ephem is set, but Str/Blob are not?

> +}
> +
> +bool
> +mem_is_dynamic(const struct Mem *mem)
> +{
> +	return (mem->flags & (MEM_Str | MEM_Blob)) != 0 &&
> +	       (mem->flags & MEM_Dyn) != 0;
> +}
> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index 12712efb4..05e0f78c1 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> @@ -1088,10 +1086,10 @@ case OP_Concat: {           /* same as TK_CONCAT, in1, in2, out3 */
>  	 * Concatenation operation can be applied only to
>  	 * strings and blobs.
>  	 */
> -	uint32_t str_type_p1 = pIn1->flags & (MEM_Blob | MEM_Str);
> -	uint32_t str_type_p2 = pIn2->flags & (MEM_Blob | MEM_Str);
> -	if (str_type_p1 == 0 || str_type_p2 == 0) {
> -		char *inconsistent_type = str_type_p1 == 0 ?
> +	bool str_type_p1 = mem_is_varstring(pIn1);
> +	bool str_type_p2 = mem_is_varstring(pIn2);

8. They are not types now. Only flags. Should be renamed to something
more appropriate.

> +	if (!str_type_p1 || !str_type_p2) {
> +		char *inconsistent_type = !str_type_p1 ?
>  					  mem_type_to_str(pIn1) :
>  					  mem_type_to_str(pIn2);
>  		diag_set(ClientError, ER_INCONSISTENT_TYPES,
> @@ -1100,7 +1098,7 @@ case OP_Concat: {           /* same as TK_CONCAT, in1, in2, out3 */
>  	}
>  
>  	/* Moreover, both operands must be of the same type. */
> -	if (str_type_p1 != str_type_p2) {
> +	if (mem_is_string(pIn1) != mem_is_string(pIn2)) {

9. I would recommend mem_is_same_type(). Up to you.

>  		diag_set(ClientError, ER_INCONSISTENT_TYPES,
>  			 mem_type_to_str(pIn2), mem_type_to_str(pIn1));
>  		goto abort_due_to_error;
> @@ -1186,14 +1183,16 @@ case OP_Remainder: {           /* same as TK_REM, in1, in2, out3 */
>  	pIn2 = &aMem[pOp->p2];
>  	type2 = numericType(pIn2);
>  	pOut = vdbe_prepare_null_out(p, pOp->p3);
> -	flags = pIn1->flags | pIn2->flags;
> -	if ((flags & MEM_Null)!=0) goto arithmetic_result_is_null;
> +	if (mem_is_null(pIn1) || mem_is_null(pIn2))
> +		goto arithmetic_result_is_null;
>  	if ((type1 & (MEM_Int | MEM_UInt)) != 0 &&
>  	    (type2 & (MEM_Int | MEM_UInt)) != 0) {
>  		iA = pIn1->u.i;
>  		iB = pIn2->u.i;
> -		bool is_lhs_neg = pIn1->flags & MEM_Int;
> -		bool is_rhs_neg = pIn2->flags & MEM_Int;
> +		bool is_lhs_neg = mem_is_integer(pIn1) &&
> +				  !mem_is_unsigned(pIn1);
> +		bool is_rhs_neg = mem_is_integer(pIn2) &&
> +				  !mem_is_unsigned(pIn2);

10. The checks look overcomplicated. Worth adding mem_is_nint()?

>  		bool is_res_neg;
>  		switch( pOp->opcode) {
>  		case OP_Add: {
> @@ -1509,7 +1508,7 @@ case OP_ShiftRight: {           /* same as TK_RSHIFT, in1, in2, out3 */
>  	pIn1 = &aMem[pOp->p1];
>  	pIn2 = &aMem[pOp->p2];
>  	pOut = vdbe_prepare_null_out(p, pOp->p3);
> -	if ((pIn1->flags | pIn2->flags) & MEM_Null) {
> +	if (mem_is_null(pIn1) || mem_is_null(pIn2)) {

11. This is at least third time I see the check of kind "one of them is null".
And I see more below. Probably worth adding a function which would do it more
efficient, in one check: mem_is_any_null(mem1, mem2) or something. Up to you.

> @@ -1757,11 +1756,10 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
>  			 * or not both operands are null.
>  			 */
>  			assert(pOp->opcode==OP_Eq || pOp->opcode==OP_Ne);
> -			assert((flags1 & MEM_Cleared)==0);
> +			assert(!mem_is_cleared(pIn1));
>  			assert((pOp->p5 & SQL_JUMPIFNULL)==0);
> -			if ((flags1&flags3&MEM_Null)!=0
> -			    && (flags3&MEM_Cleared)==0
> -				) {
> +			if (mem_is_null(pIn1) && mem_is_null(pIn3) &&

12. You already know pIn1 or pIn3 is NULL from the 'if' above. So it
would be just a bit faster and easier to do mem_is_same_type(). Only
one branch.

> +			    !mem_is_cleared(pIn3)) {
>  				res = 0;  /* Operands are equal */
>  			} else {
>  				res = 1;  /* Operands are not equal */
> @@ -2982,18 +2965,18 @@ case OP_SeekGT: {       /* jump, in3 */
>  		 * the seek, so convert it.
>  		 */
>  		pIn3 = &aMem[int_field];
> -		if ((pIn3->flags & MEM_Null) != 0)
> +		if (mem_is_null(pIn3))
>  			goto skip_truncate;
> -		if ((pIn3->flags & MEM_Str) != 0)
> +		if (mem_is_string(pIn3))
>  			mem_apply_numeric_type(pIn3);
>  		int64_t i;
> -		if ((pIn3->flags & MEM_Int) == MEM_Int) {
> +		if (mem_is_integer(pIn3) && !mem_is_unsigned(pIn3)) {

13. Better be mem_is_nint(), mentioned in one of the previous
comments.

> @@ -3352,11 +3336,11 @@ case OP_FCopy: {     /* out2 */
>  		pIn1 = &aMem[pOp->p1];
>  	}
>  
> -	if ((pOp->p3 & OPFLAG_NOOP_IF_NULL) && (pIn1->flags & MEM_Null)) {
> +	if ((pOp->p3 & OPFLAG_NOOP_IF_NULL) && mem_is_null(pIn1)) {

14. Should be explicit != 0.

>  		pOut = vdbe_prepare_null_out(p, pOp->p2);
>  	} else {
>  		assert(memIsValid(pIn1));
> -		assert((pIn1->flags & (MEM_Int | MEM_UInt)) != 0);
> +		assert(mem_is_integer(pIn1));


More information about the Tarantool-patches mailing list