[Tarantool-patches] [PATCH v2 2/3] sql: make mem_is_bin() to check only for VARBINARY

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Fri Apr 30 00:05:19 MSK 2021


Thanks for working on this!

See 11 comments below!

On 27.04.2021 18:55, Mergen Imeev via Tarantool-patches wrote:
> After this patch, the mem_is_bin() function will return 'true' only if
> the value that the MEM contains is of type VARBINARY. This patch also
> adds the mem_is_bin_ext() function, which is used to check if a MEM
> contains value of type VARBINARY or value of types that are currently
> considered VARBINARY extensions - MAP and ARRAY.
> 
> Part of #4906
> ---
>  src/box/sql/func.c | 10 +++++-----
>  src/box/sql/mem.h  |  8 +++++++-
>  src/box/sql/vdbe.c | 16 ++++++++--------
>  3 files changed, 20 insertions(+), 14 deletions(-)
> 
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index d282b2cea..bed7e8488 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -553,7 +553,7 @@ roundFunc(sql_context * context, int argc, sql_value ** argv)
>  	}
>  	if (mem_is_null(argv[0]))
>  		return;
> -	if (mem_is_bin(argv[0])) {
> +	if (mem_is_bin_ext(argv[0])) {

1. The name is quite confusing. I spent some time thinking about a
better one, but can't find any. Maybe because such a function looks
very strange in its purpose, as well as some of its usages.

Maybe better add a function mem_is_nested() to check for MP_ARRAY
and MP_MAP. Or mem_is_scalar(), which checks it is not MP_ARRAY and
not MP_MAP and you call it with negation. Or add special checks
for array and map. mem_is_bin_ext() would be either

	mem_is_bin() || mem_is_nested()

or

	mem_is_bin() || !mem_is_scalar()

or

	mem_is_bin() || mem_is_array() || mem_is_map()

Then you would see something strange below.

Why in ROUND() we check for types we do not accept instead of
checking the types we do accept? I mean, why not

	if (!mem_is_number() && !mem_is_str())
		return error;

instead of

	if (mem_is_bin_ext())
		return error;

What will happen when you will add UUID, date? Will you change
this on each new type like that?:

	if (mem_is_bin_ext() || mem_is_uuid() || mem_is_date() || ...)
		return error;

IMO would be easier to maintain a list of supported types than a
list of not supported types. Worth changing now?

>  		diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
>  			 mem_str(argv[0]), "numeric");
>  		context->is_aborted = true;
> @@ -613,7 +613,7 @@ case_type##ICUFunc(sql_context *context, int argc, sql_value **argv)   \
>  	const char *z2;                                                        \
>  	int n;                                                                 \
>  	UNUSED_PARAMETER(argc);                                                \
> -	if (mem_is_bin(argv[0])) {                                             \
> +	if (mem_is_bin_ext(argv[0])) {                                         \

2. Ditto.

>  		diag_set(ClientError, ER_INCONSISTENT_TYPES, "text",           \
>  			 "varbinary");                                         \
>  		context->is_aborted = true;                                    \
> @@ -694,7 +694,7 @@ randomBlob(sql_context * context, int argc, sql_value ** argv)
>  	unsigned char *p;
>  	assert(argc == 1);
>  	UNUSED_PARAMETER(argc);
> -	if (mem_is_bin(argv[0])) {
> +	if (mem_is_bin_ext(argv[0])) {

3. Ditto.

>  		diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
>  			 mem_str(argv[0]), "numeric");
>  		context->is_aborted = true;
> @@ -1455,7 +1455,7 @@ trim_func_one_arg(struct sql_context *context, sql_value *arg)
>  	const unsigned char *default_trim;
>  	if (mem_is_null(arg))
>  		return;
> -	if (mem_is_bin(arg))
> +	if (mem_is_bin_ext(arg))

4. TBH, I would ban MP_ARRAY and MP_MAP from TRIM(). It makes 0
sense for them. I am not sure it is even tested. So we are not
talking about a notable 'behaviour change' really.

>  		default_trim = (const unsigned char *) "\0";
>  	else
>  		default_trim = (const unsigned char *) " ";
> @@ -1584,7 +1584,7 @@ soundexFunc(sql_context * context, int argc, sql_value ** argv)
>  		1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0,
>  	};
>  	assert(argc == 1);
> -	if (mem_is_bin(argv[0])) {
> +	if (mem_is_bin_ext(argv[0])) {

5. Ditto.

> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index 2308587e7..bedfa87af 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> @@ -1634,9 +1634,9 @@ case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
>  				 "boolean");
>  			goto abort_due_to_error;
>  		}
> -	} else if (mem_is_bin(pIn3) || mem_is_bin(pIn1)) {
> +	} else if (mem_is_bin_ext(pIn3) || mem_is_bin_ext(pIn1)) {
>  		if (mem_cmp_bin(pIn3, pIn1, &res) != 0) {

6. Arrays and maps are not comparable by memcmp(). The same numbers
can be encoded in MessagePack as 8 bytes and as 1 byte (you can do that
legally, just not widely used). In 2 maps elements might be in different
order. And obviously you can't compare map and array. Such places would
draw more attention for future fixes if we wouldn't mask them behind
_ext() suffix but would rather use more explicit checks.

> -			char *str = !mem_is_bin(pIn3) ?
> +			char *str = !mem_is_bin_ext(pIn3) ?
>  				    mem_type_to_str(pIn3) :
>  				    mem_type_to_str(pIn1);
>  			diag_set(ClientError, ER_SQL_TYPE_MISMATCH, str,
> @@ -2991,7 +2991,7 @@ case OP_Found: {        /* jump, in3 */
>  	} else {
>  		pFree = pIdxKey = sqlVdbeAllocUnpackedRecord(db, pC->key_def);
>  		if (pIdxKey==0) goto no_mem;
> -		assert(mem_is_bin(pIn3));
> +		assert(mem_is_bin_ext(pIn3));

7. This would be mem_is_array() AFAIU. Because it is a tuple, right?

>  		(void)ExpandBlob(pIn3);
>  		sqlVdbeRecordUnpackMsgpack(pC->key_def,
>  					       pIn3->z, pIdxKey);
> @@ -3253,7 +3253,7 @@ case OP_SorterData: {
>  	assert(isSorter(pC));
>  	if (sqlVdbeSorterRowkey(pC, pOut) != 0)
>  		goto abort_due_to_error;
> -	assert(mem_is_bin(pOut));
> +	assert(mem_is_bin_ext(pOut));

8. Ditto.

>  	assert(pOp->p1>=0 && pOp->p1<p->nCursor);
>  	p->apCsr[pOp->p3]->cacheStatus = CACHE_STALE;
>  	break;
> @@ -3616,7 +3616,7 @@ case OP_SorterInsert: {      /* in2 */
>  	assert(cursor != NULL);
>  	assert(isSorter(cursor));
>  	pIn2 = &aMem[pOp->p2];
> -	assert(mem_is_bin(pIn2));
> +	assert(mem_is_bin_ext(pIn2));

9. Ditto.

>  	if (ExpandBlob(pIn2) != 0 ||
>  	    sqlVdbeSorterWrite(cursor, pIn2) != 0)
>  		goto abort_due_to_error;
> @@ -3650,7 +3650,7 @@ case OP_SorterInsert: {      /* in2 */
>  case OP_IdxReplace:
>  case OP_IdxInsert: {
>  	pIn2 = &aMem[pOp->p1];
> -	assert(mem_is_bin(pIn2));
> +	assert(mem_is_bin_ext(pIn2));

10. Ditto.

>  	if (ExpandBlob(pIn2) != 0)
>  		goto abort_due_to_error;
>  	struct space *space;
> @@ -3741,10 +3741,10 @@ case OP_Update: {
>  	assert(pOp->p4type == P4_SPACEPTR);
>  
>  	struct Mem *key_mem = &aMem[pOp->p2];
> -	assert(mem_is_bin(key_mem));
> +	assert(mem_is_bin_ext(key_mem));

11. Ditto here and below.

>  	struct Mem *upd_fields_mem = &aMem[pOp->p3];
> -	assert(mem_is_bin(upd_fields_mem));
> +	assert(mem_is_bin_ext(upd_fields_mem));
>  	uint32_t *upd_fields = (uint32_t *)upd_fields_mem->z;
>  	uint32_t upd_fields_cnt = upd_fields_mem->n / sizeof(uint32_t);


More information about the Tarantool-patches mailing list