[Tarantool-patches] [PATCH v2 3/4] sql: introduce mem_cmp_scalar()
Mergen Imeev
imeevma at tarantool.org
Sun Jul 11 20:51:10 MSK 2021
Hi! Thank you for the review! My answers, diff and new patch below.
On Sun, Jul 11, 2021 at 05:03:34PM +0200, Vladislav Shpilevoy wrote:
> Hi! Thanks for the patch!
>
> See 3 comments below.
>
> > diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> > index aa565277c..dc99bd390 100644
> > --- a/src/box/sql/func.c
> > +++ b/src/box/sql/func.c
> > @@ -144,11 +144,10 @@ minmaxFunc(sql_context * context, int argc, sql_value ** argv)
> > for (i = 1; i < argc; i++) {
> > if (mem_is_null(argv[i]))
> > return;
> > - if ((sqlMemCompare(argv[iBest], argv[i], pColl) ^ mask) >=
> > - 0) {
> > - testcase(mask == 0);
> > + int res;
> > + mem_cmp_scalar(argv[iBest], argv[i], &res, pColl);
> > + if ((res ^ mask) >= 0)
>
> 1. It seems that under certain conditions if cmp_scalar fails, res remains
> not initialized. Which can lead to behaviour changing from run to run. The
> same in the other places below.
>
Fixed, added checks. Also, I believe there shoudn't be any problem when we
compare two values of the same type. Right now all comparison functions can
return -1 when they were given wrong argument, but I believe I will fix this
when I remove implicit cast. I want them to accept only values of predefined
types and return result of comparison. Then we can get rid of unnecessary
checks.
> > diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
> > index 2595e2fd4..576596c9f 100644
> > --- a/src/box/sql/mem.c
> > +++ b/src/box/sql/mem.c
> > @@ -59,6 +59,40 @@ enum {
> > BUF_SIZE = 32,
> > };
> >
> > +enum mem_class {
> > + MEM_CLASS_NULL,
> > + MEM_CLASS_BOOL,
> > + MEM_CLASS_NUMBER,
> > + MEM_CLASS_STR,
> > + MEM_CLASS_BIN,
> > + MEM_CLASS_UUID,
> > + mem_class_max,
> > +};
>
> 2. It might make sense to add a comment that these must be sorted
> exactly like enum mp_class.
>
Added.
> > +
> > +static inline enum mem_class
> > +mem_type_class(enum mem_type type)
> > +{
> > + switch (type) {
> > + case MEM_TYPE_NULL:
> > + return MEM_CLASS_NULL;
> > + case MEM_TYPE_UINT:
> > + case MEM_TYPE_INT:
> > + case MEM_TYPE_DOUBLE:
> > + return MEM_CLASS_NUMBER;
> > + case MEM_TYPE_STR:
> > + return MEM_CLASS_STR;
> > + case MEM_TYPE_BIN:
> > + return MEM_CLASS_BIN;
> > + case MEM_TYPE_BOOL:
> > + return MEM_CLASS_BOOL;
> > + case MEM_TYPE_UUID:
> > + return MEM_CLASS_UUID;
>
> 3. It might work faster without branching if done like
> 'static enum mp_class mp_classes[]' - would allow to take
> the class for any type as simple as an array access
> operation.
This cannot be done directly in current design since value in enum mem_type are
not sequential numbers. However, this can be done using something like this:
diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
index 4062ff4b3..05a3fb699 100644
--- a/src/box/sql/mem.c
+++ b/src/box/sql/mem.c
@@ -73,28 +73,27 @@ enum mem_class {
mem_class_max,
};
+enum mem_class mem_classes[] = {
+ /** MEM_TYPE_NULL = */ MEM_CLASS_NULL,
+ /** MEM_TYPE_UINT = */ MEM_CLASS_NUMBER,
+ /** MEM_TYPE_INT = */ MEM_CLASS_NUMBER,
+ /** MEM_TYPE_STR = */ MEM_CLASS_STR,
+ /** MEM_TYPE_BIN = */ MEM_CLASS_BIN,
+ /** MEM_TYPE_ARRAY = */ mem_class_max,
+ /** MEM_TYPE_MAP = */ mem_class_max,
+ /** MEM_TYPE_BOOL = */ MEM_CLASS_BOOL,
+ /** MEM_TYPE_DOUBLE = */ MEM_CLASS_NUMBER,
+ /** MEM_TYPE_UUID = */ MEM_CLASS_UUID,
+ /** MEM_TYPE_INVALID = */ mem_class_max,
+ /** MEM_TYPE_FRAME = */ mem_class_max,
+ /** MEM_TYPE_PTR = */ mem_class_max,
+ /** MEM_TYPE_AGG = */ mem_class_max,
+};
+
static inline enum mem_class
mem_type_class(enum mem_type type)
{
- switch (type) {
- case MEM_TYPE_NULL:
- return MEM_CLASS_NULL;
- case MEM_TYPE_UINT:
- case MEM_TYPE_INT:
- case MEM_TYPE_DOUBLE:
- return MEM_CLASS_NUMBER;
- case MEM_TYPE_STR:
- return MEM_CLASS_STR;
- case MEM_TYPE_BIN:
- return MEM_CLASS_BIN;
- case MEM_TYPE_BOOL:
- return MEM_CLASS_BOOL;
- case MEM_TYPE_UUID:
- return MEM_CLASS_UUID;
- default:
- break;
- }
- return mem_class_max;
+ return mem_classes[ffs(type) - 1];
}
bool
We can use macro instead of static inline function. What do you think?
Also we can think of way to solve this problem using bit-wise operations,
but it looks too complicated.
Diff:
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index dc99bd390..efb14f23e 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -145,7 +145,13 @@ minmaxFunc(sql_context * context, int argc, sql_value ** argv)
if (mem_is_null(argv[i]))
return;
int res;
- mem_cmp_scalar(argv[iBest], argv[i], &res, pColl);
+ if (mem_cmp_scalar(argv[iBest], argv[i], &res, pColl) != 0) {
+ diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
+ mem_str(argv[i]),
+ mem_type_to_str(argv[iBest]));
+ context->is_aborted = true;
+ return;
+ }
if ((res ^ mask) >= 0)
iBest = i;
}
@@ -1057,7 +1063,12 @@ nullifFunc(sql_context * context, int NotUsed, sql_value ** argv)
struct coll *pColl = sqlGetFuncCollSeq(context);
UNUSED_PARAMETER(NotUsed);
int res;
- mem_cmp_scalar(argv[0], argv[1], &res, pColl);
+ if (mem_cmp_scalar(argv[0], argv[1], &res, pColl) != 0) {
+ diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
+ mem_str(argv[1]), mem_type_to_str(argv[0]));
+ context->is_aborted = true;
+ return;
+ }
if (res != 0)
sql_result_value(context, argv[0]);
}
@@ -1827,7 +1838,12 @@ minmaxStep(sql_context * context, int NotUsed, sql_value ** argv)
* comparison is inverted.
*/
bool is_max = (func->flags & SQL_FUNC_MAX) != 0;
- mem_cmp_scalar(pBest, pArg, &cmp, pColl);
+ if (mem_cmp_scalar(pBest, pArg, &cmp, pColl) != 0) {
+ diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
+ mem_str(pArg), mem_type_to_str(pBest));
+ context->is_aborted = true;
+ return;
+ }
if ((is_max && cmp < 0) || (!is_max && cmp > 0)) {
mem_copy(pBest, pArg);
} else {
diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
index 576596c9f..da27cd191 100644
--- a/src/box/sql/mem.c
+++ b/src/box/sql/mem.c
@@ -59,6 +59,10 @@ enum {
BUF_SIZE = 32,
};
+/**
+ * Analogue of enum mp_class for enum mp_type. The order of the classes must be
+ * the same as in the enum mp_class.
+ */
enum mem_class {
MEM_CLASS_NULL,
MEM_CLASS_BOOL,
New patch:
commit c65ccc80c55501035ac6d5ab71b0e30460aee0fd
Author: Mergen Imeev <imeevma at gmail.com>
Date: Sat Jul 10 13:56:39 2021 +0300
sql: introduce mem_cmp_scalar()
This patch introduces the mem_cmp_scalar() function that compares two
MEMs using SCALAR rules. MEMs must be scalars. Prior to this patch, there was a
function that used SCALAR rules to compare two MEMs, but its design became
overly complex as new types appeared.
Part of #6164
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index aa565277c..efb14f23e 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -144,11 +144,16 @@ minmaxFunc(sql_context * context, int argc, sql_value ** argv)
for (i = 1; i < argc; i++) {
if (mem_is_null(argv[i]))
return;
- if ((sqlMemCompare(argv[iBest], argv[i], pColl) ^ mask) >=
- 0) {
- testcase(mask == 0);
- iBest = i;
+ int res;
+ if (mem_cmp_scalar(argv[iBest], argv[i], &res, pColl) != 0) {
+ diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
+ mem_str(argv[i]),
+ mem_type_to_str(argv[iBest]));
+ context->is_aborted = true;
+ return;
}
+ if ((res ^ mask) >= 0)
+ iBest = i;
}
sql_result_value(context, argv[iBest]);
}
@@ -1057,9 +1062,15 @@ nullifFunc(sql_context * context, int NotUsed, sql_value ** argv)
{
struct coll *pColl = sqlGetFuncCollSeq(context);
UNUSED_PARAMETER(NotUsed);
- if (sqlMemCompare(argv[0], argv[1], pColl) != 0) {
- sql_result_value(context, argv[0]);
+ int res;
+ if (mem_cmp_scalar(argv[0], argv[1], &res, pColl) != 0) {
+ diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
+ mem_str(argv[1]), mem_type_to_str(argv[0]));
+ context->is_aborted = true;
+ return;
}
+ if (res != 0)
+ sql_result_value(context, argv[0]);
}
/**
@@ -1827,7 +1838,12 @@ minmaxStep(sql_context * context, int NotUsed, sql_value ** argv)
* comparison is inverted.
*/
bool is_max = (func->flags & SQL_FUNC_MAX) != 0;
- cmp = sqlMemCompare(pBest, pArg, pColl);
+ if (mem_cmp_scalar(pBest, pArg, &cmp, pColl) != 0) {
+ diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
+ mem_str(pArg), mem_type_to_str(pBest));
+ context->is_aborted = true;
+ return;
+ }
if ((is_max && cmp < 0) || (!is_max && cmp > 0)) {
mem_copy(pBest, pArg);
} else {
diff --git a/src/box/sql/mem.c b/src/box/sql/mem.c
index 2595e2fd4..da27cd191 100644
--- a/src/box/sql/mem.c
+++ b/src/box/sql/mem.c
@@ -59,6 +59,44 @@ enum {
BUF_SIZE = 32,
};
+/**
+ * Analogue of enum mp_class for enum mp_type. The order of the classes must be
+ * the same as in the enum mp_class.
+ */
+enum mem_class {
+ MEM_CLASS_NULL,
+ MEM_CLASS_BOOL,
+ MEM_CLASS_NUMBER,
+ MEM_CLASS_STR,
+ MEM_CLASS_BIN,
+ MEM_CLASS_UUID,
+ mem_class_max,
+};
+
+static inline enum mem_class
+mem_type_class(enum mem_type type)
+{
+ switch (type) {
+ case MEM_TYPE_NULL:
+ return MEM_CLASS_NULL;
+ case MEM_TYPE_UINT:
+ case MEM_TYPE_INT:
+ case MEM_TYPE_DOUBLE:
+ return MEM_CLASS_NUMBER;
+ case MEM_TYPE_STR:
+ return MEM_CLASS_STR;
+ case MEM_TYPE_BIN:
+ return MEM_CLASS_BIN;
+ case MEM_TYPE_BOOL:
+ return MEM_CLASS_BOOL;
+ case MEM_TYPE_UUID:
+ return MEM_CLASS_UUID;
+ default:
+ break;
+ }
+ return mem_class_max;
+}
+
bool
mem_is_field_compatible(const struct Mem *mem, enum field_type type)
{
@@ -2009,6 +2047,36 @@ mem_cmp_uuid(const struct Mem *a, const struct Mem *b, int *result)
return 0;
}
+int
+mem_cmp_scalar(const struct Mem *a, const struct Mem *b, int *result,
+ const struct coll *coll)
+{
+ enum mem_class class_a = mem_type_class(a->type);
+ enum mem_class class_b = mem_type_class(b->type);
+ if (class_a != class_b) {
+ *result = class_a - class_b;
+ return 0;
+ }
+ switch (class_a) {
+ case MEM_CLASS_NULL:
+ *result = 0;
+ return 0;
+ case MEM_CLASS_BOOL:
+ return mem_cmp_bool(a, b, result);
+ case MEM_CLASS_NUMBER:
+ return mem_cmp_num(a, b, result);
+ case MEM_CLASS_STR:
+ return mem_cmp_str(a, b, result, coll);
+ case MEM_CLASS_BIN:
+ return mem_cmp_bin(a, b, result);
+ case MEM_CLASS_UUID:
+ return mem_cmp_uuid(a, b, result);
+ default:
+ unreachable();
+ }
+ return 0;
+}
+
/*
* Both *pMem1 and *pMem2 contain string values. Compare the two values
* using the collation sequence pColl. As usual, return a negative , zero
@@ -2440,82 +2508,6 @@ sqlVdbeMemTooBig(Mem * p)
return 0;
}
-/*
- * Compare the values contained by the two memory cells, returning
- * negative, zero or positive if pMem1 is less than, equal to, or greater
- * than pMem2. Sorting order is NULL's first, followed by numbers (integers
- * and reals) sorted numerically, followed by text ordered by the collating
- * sequence pColl and finally blob's ordered by memcmp().
- *
- * Two NULL values are considered equal by this function.
- */
-int
-sqlMemCompare(const Mem * pMem1, const Mem * pMem2, const struct coll * pColl)
-{
- int res;
-
- enum mem_type type1 = pMem1->type;
- enum mem_type type2 = pMem2->type;
-
- /* If one value is NULL, it is less than the other. If both values
- * are NULL, return 0.
- */
- if (((type1 | type2) & MEM_TYPE_NULL) != 0)
- return (int)(type2 == MEM_TYPE_NULL) -
- (int)(type1 == MEM_TYPE_NULL);
-
- if (((type1 | type2) & MEM_TYPE_BOOL) != 0) {
- if ((type1 & type2 & MEM_TYPE_BOOL) != 0) {
- if (pMem1->u.b == pMem2->u.b)
- return 0;
- if (pMem1->u.b)
- return 1;
- return -1;
- }
- if (type2 == MEM_TYPE_BOOL)
- return +1;
- return -1;
- }
-
- if (((type1 | type2) & MEM_TYPE_UUID) != 0) {
- if (mem_cmp_uuid(pMem1, pMem2, &res) == 0)
- return res;
- if (type1 != MEM_TYPE_UUID)
- return +1;
- return -1;
- }
-
- /* At least one of the two values is a number
- */
- if (((type1 | type2) &
- (MEM_TYPE_INT | MEM_TYPE_UINT | MEM_TYPE_DOUBLE)) != 0) {
- if (!mem_is_num(pMem1))
- return +1;
- if (!mem_is_num(pMem2))
- return -1;
- mem_cmp_num(pMem1, pMem2, &res);
- return res;
- }
-
- /* If one value is a string and the other is a blob, the string is less.
- * If both are strings, compare using the collating functions.
- */
- if (((type1 | type2) & MEM_TYPE_STR) != 0) {
- if (type1 != MEM_TYPE_STR) {
- return 1;
- }
- if (type2 != MEM_TYPE_STR) {
- return -1;
- }
- mem_cmp_str(pMem1, pMem2, &res, pColl);
- return res;
- }
-
- /* Both values must be blobs. Compare using memcmp(). */
- mem_cmp_bin(pMem1, pMem2, &res);
- return res;
-}
-
int
sql_vdbemem_finalize(struct Mem *mem, struct func *func)
{
diff --git a/src/box/sql/mem.h b/src/box/sql/mem.h
index b3cd5c545..bbb99c4d2 100644
--- a/src/box/sql/mem.h
+++ b/src/box/sql/mem.h
@@ -696,6 +696,14 @@ mem_cmp_num(const struct Mem *a, const struct Mem *b, int *result);
int
mem_cmp_uuid(const struct Mem *left, const struct Mem *right, int *result);
+/**
+ * Compare two MEMs using SCALAR rules and return the result of comparison. MEMs
+ * should be scalars. Original MEMs are not changed.
+ */
+int
+mem_cmp_scalar(const struct Mem *a, const struct Mem *b, int *result,
+ const struct coll *coll);
+
/**
* Convert the given MEM to INTEGER. This function and the function below define
* the rules that are used to convert values of all other types to INTEGER. In
@@ -961,8 +969,6 @@ int sqlVdbeMemTooBig(Mem *);
#define VdbeMemDynamic(X) (((X)->flags & MEM_Dyn) != 0 ||\
((X)->type & (MEM_TYPE_AGG | MEM_TYPE_FRAME)) != 0)
-int sqlMemCompare(const Mem *, const Mem *, const struct coll *);
-
/** MEM manipulate functions. */
/**
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index 32d02d96e..220e8b269 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -1828,7 +1828,7 @@ case OP_Compare: {
assert(i < (int)def->part_count);
struct coll *coll = def->parts[i].coll;
bool is_rev = def->parts[i].sort_order == SORT_ORDER_DESC;
- iCompare = sqlMemCompare(&aMem[p1+idx], &aMem[p2+idx], coll);
+ mem_cmp_scalar(&aMem[p1+idx], &aMem[p2+idx], &iCompare, coll);
if (iCompare) {
if (is_rev)
iCompare = -iCompare;
diff --git a/src/box/sql/where.c b/src/box/sql/where.c
index e5f35fbf8..dadc6d4a2 100644
--- a/src/box/sql/where.c
+++ b/src/box/sql/where.c
@@ -1272,12 +1272,14 @@ whereRangeSkipScanEst(Parse * pParse, /* Parsing & code generating context */
rc = sql_stat4_column(db, samples[i].sample_key, nEq,
&pVal);
if (rc == 0 && p1 != NULL) {
- int res = sqlMemCompare(p1, pVal, coll);
+ int res;
+ mem_cmp_scalar(p1, pVal, &res, coll);
if (res >= 0)
nLower++;
}
if (rc == 0 && p2 != NULL) {
- int res = sqlMemCompare(p2, pVal, coll);
+ int res;
+ mem_cmp_scalar(p2, pVal, &res, coll);
if (res >= 0)
nUpper++;
}
diff --git a/test/sql-tap/gh-6164-uuid-follow-ups.test.lua b/test/sql-tap/gh-6164-uuid-follow-ups.test.lua
index 4fc5052d8..6b4a811c3 100755
--- a/test/sql-tap/gh-6164-uuid-follow-ups.test.lua
+++ b/test/sql-tap/gh-6164-uuid-follow-ups.test.lua
@@ -1,6 +1,6 @@
#!/usr/bin/env tarantool
local test = require("sqltester")
-test:plan(4)
+test:plan(8)
-- Make sure that function quote() can work with uuid.
test:do_execsql_test(
@@ -35,4 +35,47 @@ test:do_test(
end,
uuid3)
+--
+-- Make sure a comparison that includes a UUID and follows the SCALAR rules is
+-- working correctly.
+--
+box.execute([[CREATE TABLE t (i INTEGER PRIMARY KEY, s SCALAR);]])
+box.execute([[INSERT INTO t VALUES (1, ?)]], {uuid1})
+
+test:do_execsql_test(
+ "gh-6164-5",
+ [[
+ SELECT GREATEST(i, s, x'33', 'something') FROM t;
+ ]], {
+ uuid1
+ })
+
+test:do_execsql_test(
+ "gh-6164-6",
+ [[
+ SELECT LEAST(i, s, x'33', 'something') FROM t;
+ ]], {
+ 1
+ })
+
+box.execute([[INSERT INTO t VALUES (2, 2);]])
+
+test:do_execsql_test(
+ "gh-6164-7",
+ [[
+ SELECT MAX(s) FROM t;
+ ]], {
+ uuid1
+ })
+
+test:do_execsql_test(
+ "gh-6164-8",
+ [[
+ SELECT MIN(s) FROM t;
+ ]], {
+ 2
+ })
+
+box.execute([[DROP TABLE t;]])
+
test:finish_test()
More information about the Tarantool-patches
mailing list