[Tarantool-patches] [PATCH v5 20/52] sql: introduce mem_compare()

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sun Apr 11 21:16:30 MSK 2021


Nice fixes!

This is the last email for today, I will continue the review of
the patchset tomorrow.

See 6 comments below.

> diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
> index 859e337aa..eee72a7fe 100644
> --- a/src/box/sql/mem.c
> +++ b/src/box/sql/mem.c
> @@ -624,6 +624,211 @@ mem_rem(const struct Mem *left, const struct Mem *right, struct Mem *result)
>  	return 0;
>  }
>  
> +static int
> +compare_blobs(const struct Mem *a, const struct Mem *b, int *result)
> +{

1. Would be good to have an assertion here that both types are MEM_Blob.

> +	int an = a->n;
> +	int bn = b->n;
> +	int minlen = MIN(an, bn);
> +
> +	/*
> +	 * It is possible to have a Blob value that has some non-zero content
> +	 * followed by zero content.  But that only comes up for Blobs formed
> +	 * by the OP_MakeRecord opcode, and such Blobs never get passed into
> +	 * mem_compare().
> +	 */
> +	assert((a->flags & MEM_Zero) == 0 || an == 0);
> +	assert((b->flags & MEM_Zero) == 0 || bn == 0);
> +
> +	if ((a->flags & b->flags & MEM_Zero) != 0) {
> +		*result = a->u.nZero - b->u.nZero;
> +		return 0;
> +	}
> +	if ((a->flags & MEM_Zero) != 0) {
> +		for (int i = 0; i < minlen; ++i) {
> +			if (b->z[i] != 0) {
> +				*result = -1;
> +				return 0;
> +			}
> +		}
> +		*result = a->u.nZero - bn;
> +		return 0;
> +	}
> +	if ((b->flags & MEM_Zero) != 0) {
> +		for (int i = 0; i < minlen; ++i) {
> +			if (a->z[i] != 0){
> +				*result = 1;
> +				return 0;
> +			}
> +		}
> +		*result = b->u.nZero - an;
> +		return 0;
> +	}
> +	*result = memcmp(a->z, b->z, minlen);
> +	if (*result != 0)
> +		return 0;
> +	*result = an - bn;
> +	return 0;

2. compare_blobs never fails. So you can drop result out argument
and return the comparison result as 'return'.

> +}
> +
> +static int
> +compare_numbers(const struct Mem *left, const struct Mem *right, int *result)
> +{
> +	struct sql_num a, b;
> +	/* TODO: Here should be check for right value type. */

3. What if 'b' is a string, which can't be converted to a number?

> +	if (get_number(right, &b) != 0) {
> +		*result = -1;
> +		return 0;
> +	}
> +	if (get_number(left, &a) != 0) {> +		diag_set(ClientError, ER_SQL_TYPE_MISMATCH, mem_str(left),
> +			 "numeric");
> +		return -1;
> +	}
> +	if (a.type == MEM_Real) {
> +		if (b.type == MEM_Real) {
> +			if (a.d > b.d)
> +				*result = 1;
> +			else if (a.d < b.d)
> +				*result = -1;
> +			else
> +				*result = 0;
> +			return 0;
> +		}
> +		if (b.type == MEM_Int)
> +			*result = double_compare_nint64(a.d, b.i, 1);
> +		else
> +			*result = double_compare_uint64(a.d, b.u, 1);
> +		return 0;
> +	}
> +	if (a.type == MEM_Int) {
> +		if (b.type == MEM_Int) {
> +			if (a.i > b.i)
> +				*result = 1;
> +			else if (a.i < b.i)
> +				*result = -1;
> +			else
> +				*result = 0;
> +			return 0;
> +		}
> +		if (b.type == MEM_UInt)
> +			*result = -1;
> +		else
> +			*result = double_compare_nint64(b.d, a.i, -1);
> +		return 0;
> +	}
> +	assert(a.type == MEM_UInt);
> +	if (b.type == MEM_UInt) {
> +		if (a.u > b.u)
> +			*result = 1;
> +		else if (a.u < b.u)
> +			*result = -1;
> +		else
> +			*result = 0;
> +		return 0;
> +	}
> +	if (b.type == MEM_Int)
> +		*result = 1;
> +	else
> +		*result = double_compare_uint64(b.d, a.u, -1);
> +	return 0;
> +}
> +
> +static int
> +compare_strings(const struct Mem *left, const struct Mem *right, int *result,
> +		const struct coll *coll)
> +{
> +	char *a;
> +	uint32_t an;
> +	char bufl[BUF_SIZE];
> +	if ((left->flags & MEM_Str) != 0) {
> +		a = left->z;
> +		an = left->n;
> +	} else {
> +		assert((left->flags & (MEM_Int | MEM_UInt | MEM_Real)) != 0);
> +		a = &bufl[0];
> +		if ((left->flags & MEM_Int) != 0)
> +			sql_snprintf(BUF_SIZE, a, "%lld", left->u.i);
> +		else if ((left->flags & MEM_UInt) != 0)
> +			sql_snprintf(BUF_SIZE, a, "%llu", left->u.u);
> +		else
> +			sql_snprintf(BUF_SIZE, a, "%!.15g", left->u.r);
> +		an = strlen(a);
> +	}
> +
> +	char *b;
> +	uint32_t bn;
> +	char bufr[BUF_SIZE];
> +	if ((right->flags & MEM_Str) != 0) {
> +		b = right->z;
> +		bn = right->n;
> +	} else {
> +		assert((right->flags & (MEM_Int | MEM_UInt | MEM_Real)) != 0);
> +		b = &bufr[0];
> +		if ((right->flags & MEM_Int) != 0)
> +			sql_snprintf(BUF_SIZE, b, "%lld", right->u.i);
> +		else if ((right->flags & MEM_UInt) != 0)
> +			sql_snprintf(BUF_SIZE, b, "%llu", right->u.u);
> +		else
> +			sql_snprintf(BUF_SIZE, b, "%!.15g", right->u.r);
> +		bn = strlen(b);
> +	}
> +	if (coll) {
> +		*result = coll->cmp(a, an, b, bn, coll);
> +		return 0;
> +	}
> +	uint32_t minlen = MIN(an, bn);
> +	*result = memcmp(a, b, minlen);
> +	if (*result != 0)
> +		return 0;
> +	*result = an - bn;
> +	return 0;

4. It can't fail either. You can return result as 'return'.

> +}
> +
> +int
> +mem_compare(const struct Mem *left, const struct Mem *right, int *result,
> +	    enum field_type type, struct coll *coll)
> +{
> +	assert(((left->flags | right->flags) & MEM_Null) == 0);
> +	int flags_any = left->flags | right->flags;

5. 'any' isn't needed until the next branch. You can move it right above

	if ((flags_any & MEM_Bool) != 0) {

> +	int flags_all = left->flags & right->flags;
> +
> +	if ((flags_all & MEM_Bool) != 0) {
> +		if (left->u.b == right->u.b)
> +			*result = 0;
> +		else if (left->u.b)
> +			*result = 1;
> +		else
> +			*result = -1;
> +		return 0;
> +	}
> +	if ((flags_any & MEM_Bool) != 0) {
> +		char *str = (left->flags & MEM_Bool) == 0 ?
> +			    mem_type_to_str(left) : mem_type_to_str(right);
> +		diag_set(ClientError, ER_SQL_TYPE_MISMATCH, str, "boolean");
> +		return -1;
> +	}
> +
> +	if ((flags_all & MEM_Blob) != 0)
> +		return compare_blobs(left, right, result);
> +	if ((flags_any & MEM_Blob) != 0) {
> +		char *str = (left->flags & MEM_Blob) == 0 ?
> +			    mem_type_to_str(left) : mem_type_to_str(right);
> +		diag_set(ClientError, ER_SQL_TYPE_MISMATCH, str, "varbinary");
> +		return -1;
> +	}
> +
> +	if (type == FIELD_TYPE_STRING)
> +		return compare_strings(left, right, result, coll);
> +
> +	if (sql_type_is_numeric(type) ||
> +	    (flags_any & (MEM_Int | MEM_UInt | MEM_Real)) != 0)
> +		return compare_numbers(left, right, result);
> +
> +	assert((left->flags & MEM_Str) != 0 && (right->flags & MEM_Str) != 0);
> +	return compare_strings(left, right, result, coll);
> +}
> diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
> index 69a7d9f7a..6c022d8d8 100644
> --- a/src/box/sql/mem.h
> +++ b/src/box/sql/mem.h
> @@ -226,6 +226,11 @@ mem_div(const struct Mem *left, const struct Mem *right, struct Mem *result);
>  int
>  mem_rem(const struct Mem *left, const struct Mem *right, struct Mem *result);
>  
> +/** Compare two non-NULL MEMs and return the result of comparison. */
> +int
> +mem_compare(const struct Mem *left, const struct Mem *right, int *result,
> +	    enum field_type type, struct coll *coll);

6. What is 'type'?

Can you keep it out of the comparator somehow? For example, make it
3 functions: mem_cmp_as_str, mem_cmp_as_num, and just a generic mem_cmp
without any types calling the first 2 comparators. Otherwise the type
thing is super ugly. It leaks the details of opcodes into mem.c.


More information about the Tarantool-patches mailing list