[Tarantool-patches] [PATCH v5 20/52] sql: introduce mem_compare()
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Sun Apr 11 21:16:30 MSK 2021
Nice fixes!
This is the last email for today, I will continue the review of
the patchset tomorrow.
See 6 comments below.
> diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
> index 859e337aa..eee72a7fe 100644
> --- a/src/box/sql/mem.c
> +++ b/src/box/sql/mem.c
> @@ -624,6 +624,211 @@ mem_rem(const struct Mem *left, const struct Mem *right, struct Mem *result)
> return 0;
> }
>
> +static int
> +compare_blobs(const struct Mem *a, const struct Mem *b, int *result)
> +{
1. Would be good to have an assertion here that both types are MEM_Blob.
> + int an = a->n;
> + int bn = b->n;
> + int minlen = MIN(an, bn);
> +
> + /*
> + * It is possible to have a Blob value that has some non-zero content
> + * followed by zero content. But that only comes up for Blobs formed
> + * by the OP_MakeRecord opcode, and such Blobs never get passed into
> + * mem_compare().
> + */
> + assert((a->flags & MEM_Zero) == 0 || an == 0);
> + assert((b->flags & MEM_Zero) == 0 || bn == 0);
> +
> + if ((a->flags & b->flags & MEM_Zero) != 0) {
> + *result = a->u.nZero - b->u.nZero;
> + return 0;
> + }
> + if ((a->flags & MEM_Zero) != 0) {
> + for (int i = 0; i < minlen; ++i) {
> + if (b->z[i] != 0) {
> + *result = -1;
> + return 0;
> + }
> + }
> + *result = a->u.nZero - bn;
> + return 0;
> + }
> + if ((b->flags & MEM_Zero) != 0) {
> + for (int i = 0; i < minlen; ++i) {
> + if (a->z[i] != 0){
> + *result = 1;
> + return 0;
> + }
> + }
> + *result = b->u.nZero - an;
> + return 0;
> + }
> + *result = memcmp(a->z, b->z, minlen);
> + if (*result != 0)
> + return 0;
> + *result = an - bn;
> + return 0;
2. compare_blobs never fails. So you can drop result out argument
and return the comparison result as 'return'.
> +}
> +
> +static int
> +compare_numbers(const struct Mem *left, const struct Mem *right, int *result)
> +{
> + struct sql_num a, b;
> + /* TODO: Here should be check for right value type. */
3. What if 'b' is a string, which can't be converted to a number?
> + if (get_number(right, &b) != 0) {
> + *result = -1;
> + return 0;
> + }
> + if (get_number(left, &a) != 0) {> + diag_set(ClientError, ER_SQL_TYPE_MISMATCH, mem_str(left),
> + "numeric");
> + return -1;
> + }
> + if (a.type == MEM_Real) {
> + if (b.type == MEM_Real) {
> + if (a.d > b.d)
> + *result = 1;
> + else if (a.d < b.d)
> + *result = -1;
> + else
> + *result = 0;
> + return 0;
> + }
> + if (b.type == MEM_Int)
> + *result = double_compare_nint64(a.d, b.i, 1);
> + else
> + *result = double_compare_uint64(a.d, b.u, 1);
> + return 0;
> + }
> + if (a.type == MEM_Int) {
> + if (b.type == MEM_Int) {
> + if (a.i > b.i)
> + *result = 1;
> + else if (a.i < b.i)
> + *result = -1;
> + else
> + *result = 0;
> + return 0;
> + }
> + if (b.type == MEM_UInt)
> + *result = -1;
> + else
> + *result = double_compare_nint64(b.d, a.i, -1);
> + return 0;
> + }
> + assert(a.type == MEM_UInt);
> + if (b.type == MEM_UInt) {
> + if (a.u > b.u)
> + *result = 1;
> + else if (a.u < b.u)
> + *result = -1;
> + else
> + *result = 0;
> + return 0;
> + }
> + if (b.type == MEM_Int)
> + *result = 1;
> + else
> + *result = double_compare_uint64(b.d, a.u, -1);
> + return 0;
> +}
> +
> +static int
> +compare_strings(const struct Mem *left, const struct Mem *right, int *result,
> + const struct coll *coll)
> +{
> + char *a;
> + uint32_t an;
> + char bufl[BUF_SIZE];
> + if ((left->flags & MEM_Str) != 0) {
> + a = left->z;
> + an = left->n;
> + } else {
> + assert((left->flags & (MEM_Int | MEM_UInt | MEM_Real)) != 0);
> + a = &bufl[0];
> + if ((left->flags & MEM_Int) != 0)
> + sql_snprintf(BUF_SIZE, a, "%lld", left->u.i);
> + else if ((left->flags & MEM_UInt) != 0)
> + sql_snprintf(BUF_SIZE, a, "%llu", left->u.u);
> + else
> + sql_snprintf(BUF_SIZE, a, "%!.15g", left->u.r);
> + an = strlen(a);
> + }
> +
> + char *b;
> + uint32_t bn;
> + char bufr[BUF_SIZE];
> + if ((right->flags & MEM_Str) != 0) {
> + b = right->z;
> + bn = right->n;
> + } else {
> + assert((right->flags & (MEM_Int | MEM_UInt | MEM_Real)) != 0);
> + b = &bufr[0];
> + if ((right->flags & MEM_Int) != 0)
> + sql_snprintf(BUF_SIZE, b, "%lld", right->u.i);
> + else if ((right->flags & MEM_UInt) != 0)
> + sql_snprintf(BUF_SIZE, b, "%llu", right->u.u);
> + else
> + sql_snprintf(BUF_SIZE, b, "%!.15g", right->u.r);
> + bn = strlen(b);
> + }
> + if (coll) {
> + *result = coll->cmp(a, an, b, bn, coll);
> + return 0;
> + }
> + uint32_t minlen = MIN(an, bn);
> + *result = memcmp(a, b, minlen);
> + if (*result != 0)
> + return 0;
> + *result = an - bn;
> + return 0;
4. It can't fail either. You can return result as 'return'.
> +}
> +
> +int
> +mem_compare(const struct Mem *left, const struct Mem *right, int *result,
> + enum field_type type, struct coll *coll)
> +{
> + assert(((left->flags | right->flags) & MEM_Null) == 0);
> + int flags_any = left->flags | right->flags;
5. 'any' isn't needed until the next branch. You can move it right above
if ((flags_any & MEM_Bool) != 0) {
> + int flags_all = left->flags & right->flags;
> +
> + if ((flags_all & MEM_Bool) != 0) {
> + if (left->u.b == right->u.b)
> + *result = 0;
> + else if (left->u.b)
> + *result = 1;
> + else
> + *result = -1;
> + return 0;
> + }
> + if ((flags_any & MEM_Bool) != 0) {
> + char *str = (left->flags & MEM_Bool) == 0 ?
> + mem_type_to_str(left) : mem_type_to_str(right);
> + diag_set(ClientError, ER_SQL_TYPE_MISMATCH, str, "boolean");
> + return -1;
> + }
> +
> + if ((flags_all & MEM_Blob) != 0)
> + return compare_blobs(left, right, result);
> + if ((flags_any & MEM_Blob) != 0) {
> + char *str = (left->flags & MEM_Blob) == 0 ?
> + mem_type_to_str(left) : mem_type_to_str(right);
> + diag_set(ClientError, ER_SQL_TYPE_MISMATCH, str, "varbinary");
> + return -1;
> + }
> +
> + if (type == FIELD_TYPE_STRING)
> + return compare_strings(left, right, result, coll);
> +
> + if (sql_type_is_numeric(type) ||
> + (flags_any & (MEM_Int | MEM_UInt | MEM_Real)) != 0)
> + return compare_numbers(left, right, result);
> +
> + assert((left->flags & MEM_Str) != 0 && (right->flags & MEM_Str) != 0);
> + return compare_strings(left, right, result, coll);
> +}
> diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
> index 69a7d9f7a..6c022d8d8 100644
> --- a/src/box/sql/mem.h
> +++ b/src/box/sql/mem.h
> @@ -226,6 +226,11 @@ mem_div(const struct Mem *left, const struct Mem *right, struct Mem *result);
> int
> mem_rem(const struct Mem *left, const struct Mem *right, struct Mem *result);
>
> +/** Compare two non-NULL MEMs and return the result of comparison. */
> +int
> +mem_compare(const struct Mem *left, const struct Mem *right, int *result,
> + enum field_type type, struct coll *coll);
6. What is 'type'?
Can you keep it out of the comparator somehow? For example, make it
3 functions: mem_cmp_as_str, mem_cmp_as_num, and just a generic mem_cmp
without any types calling the first 2 comparators. Otherwise the type
thing is super ugly. It leaks the details of opcodes into mem.c.
More information about the Tarantool-patches
mailing list