[tarantool-patches] [PATCH 2/2] sql: make aggregate functions types more strict

Ivan Koptelov ivan.koptelov at tarantool.org
Fri Apr 5 17:57:44 MSK 2019


TOTAL, SUM and AVG are now can be called only on
expressions of INTEGER, FLOAT or SCALAR type.
Also if TOTAL, SUM, AVG, MIN or MAX are called on
expression of SCALAR type and during function work
value of incompatible types are encountered (e.g. INTEGER
and TEXT) then the error is raised.

Closes #4032

@TarantoolBot document
Title: sql aggregate functions became more type strict
Patch changes two things:
1) It prohibits calling TOTAL, SUM and AVG on columns/
expressions which has type different of INTEGER,
FLOAT or SCALAR.
2) If one would call TOTAL, SUM, AVG, MIN or MAX on
column/expression of SCALAR type which actually has
values of incompatible types, then the error would be
raised. INTEGER and FLOAT are compatible, all other
types are incompatible. Consider the example:

CREATE TABLE t1 (a INTEGER PRIMARY KEY, b SCALAR);
INSERT INTO t1 VALUES (1, 1);
INSERT INTO t1 VALUES (2, 3.4);
SELECT MAX(b) FROM t1; -- is OK
INSERT INTO t1 VALUES (3, 'abc');
SELECT MAX(b) FROM t1; -- now it would fall with error
---
 src/box/sql/func.c            |  72 ++++++++++++++++++++++--
 src/box/sql/select.c          |  15 ++++-
 test/sql-tap/func5.test.lua   |  50 ++++++++++++++++-
 test/sql-tap/minmax4.test.lua | 101 +++++++++++++++++++++++++++++++++-
 test/sql-tap/select1.test.lua |  12 +---
 test/sql-tap/select5.test.lua |   4 +-
 6 files changed, 231 insertions(+), 23 deletions(-)

diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index b1bfc886e..379f28252 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -45,6 +45,17 @@
 #include <unicode/uchar.h>
 #include <unicode/ucol.h>
 
+/*
+ * This structure is for keeping context during work of
+ * aggregate function.
+ */
+struct aggregate_context {
+    /** Value being aggregated. (e.g. current MAX or current counter value). */
+    Mem value;
+    /** Reference value to keep track of previous argument's type. */
+    Mem reference_value;
+};
+
 static UConverter* pUtf8conv;
 
 /*
@@ -1509,9 +1520,14 @@ sumStep(sql_context * context, int argc, sql_value ** argv)
 			    && sqlAddInt64(&p->iSum, v)) {
 				p->overflow = 1;
 			}
-		} else {
+		} else if (type == SQL_FLOAT) {
 			p->rSum += sql_value_double(argv[0]);
 			p->approx = 1;
+		} else {
+			diag_set(ClientError, ER_INCONSISTENT_TYPES,
+				 "INTEGER or FLOAT", mem_type_to_str(argv[0]));
+			context->fErrorOrAux = 1;
+			context->isError = SQL_TARANTOOL_ERROR;
 		}
 	}
 }
@@ -1588,16 +1604,57 @@ static void
 minmaxStep(sql_context *context, int not_used, sql_value **argv)
 {
 	UNUSED_PARAMETER(not_used);
-	sql_value *arg = argv[0];
-	sql_value *best = sql_aggregate_context(context, sizeof(*best));
+	struct aggregate_context *aggr_context =
+		(struct aggregate_context *) sql_aggregate_context(context,
+								   sizeof(*aggr_context));
+	if (aggr_context == NULL)
+		return;
 
+	sql_value *arg = argv[0];
+	sql_value *best = &(aggr_context->value);
 	if (best == NULL)
 		return;
 
-	if (sql_value_type(argv[0]) == SQL_NULL) {
+	enum sql_type sql_type = sql_value_type(arg);
+
+	if (sql_type == SQL_NULL) {
 		if (best->flags != 0)
 			sqlSkipAccumulatorLoad(context);
 	} else if (best->flags != 0) {
+		/*
+		 * During proceeding of the function, arguments
+		 * of different types may be encountered (if
+		 * SCALAR type column is proceeded). Some types
+		 * are compatible (INTEGER and FLOAT) and others
+		 * are not (TEXT and BLOB are not compatible with
+		 * any other type). In the later case an error
+		 * is raised.
+		 */
+		sql_value *reference_value = &aggr_context->reference_value;
+		if (reference_value->flags == 0)
+			sqlVdbeMemCopy(reference_value, arg);
+		enum sql_type ref_sql_type = sql_value_type(reference_value);
+
+		bool is_compatible = true;
+		if (sql_type != ref_sql_type) {
+			is_compatible = false;
+			if ((sql_type == SQL_INTEGER || sql_type == SQL_FLOAT) &&
+			    (ref_sql_type == SQL_INTEGER ||
+			     ref_sql_type == SQL_FLOAT)) {
+				is_compatible = true;
+			}
+		}
+		if (!is_compatible) {
+			diag_set(ClientError, ER_INCONSISTENT_TYPES,
+				 mem_type_to_str(reference_value),
+				 mem_type_to_str(arg));
+			context->fErrorOrAux = 1;
+			context->isError = SQL_TARANTOOL_ERROR;
+			sqlVdbeMemRelease(best);
+			sqlVdbeMemRelease(reference_value);
+			return;
+		}
+
 		int max;
 		int cmp;
 		struct coll *coll = sqlGetFuncCollSeq(context);
@@ -1625,13 +1682,16 @@ minmaxStep(sql_context *context, int not_used, sql_value **argv)
 static void
 minMaxFinalize(sql_context * context)
 {
-	sql_value *pRes;
-	pRes = (sql_value *) sql_aggregate_context(context, 0);
+	struct aggregate_context *func_context =
+		(struct aggregate_context *) sql_aggregate_context(context, sizeof(*func_context));
+	sql_value *pRes = &(func_context->value);
+
 	if (pRes != NULL) {
 		if (pRes->flags != 0) {
 			sql_result_value(context, pRes);
 		}
 		sqlVdbeMemRelease(pRes);
+		sqlVdbeMemRelease(&func_context->reference_value);
 	}
 }
 
diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index b1ec8c758..624083b22 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -4340,13 +4340,22 @@ pushDownWhereTerms(Parse * pParse,	/* Parse context (for malloc() and error repo
  * argument, this function checks if the following are true:
  *
  *    * the query contains just a single aggregate function,
- *    * the aggregate function is either min() or max(), and
- *    * the argument to the aggregate function is a column value.
+ *    * the aggregate function is either min() or max(),
+ *    * the argument to the aggregate function is a column value,
+ *    * the type of column is not SCALAR.
  *
  * If all of the above are true, then WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX
  * is returned as appropriate. Also, *ppMinMax is set to point to the
  * list of arguments passed to the aggregate before returning.
  *
+ * The requirement of column type not being SCALAR follows from
+ * the purpose of the function. The purpose of the function is
+ * to answer the question: "Should MIN/MAX call be optimised by
+ * using ORDER ON clause code?" If the type of column is SCALAR
+ * then we have to iterate over all rows to check if their types
+ * are compatible. Hence, the optimisation should not be used.
+ * For details please see: https://github.com/tarantool/tarantool/issues/4032
+ *
  * Or, if the conditions above are not met, *ppMinMax is set to 0 and
  * WHERE_ORDERBY_NORMAL is returned.
  */
@@ -4364,6 +4373,8 @@ minMaxQuery(AggInfo * pAggInfo, ExprList ** ppMinMax)
 		if (pEList && pEList->nExpr == 1
 		    && pEList->a[0].pExpr->op == TK_AGG_COLUMN) {
 			const char *zFunc = pExpr->u.zToken;
+			if (sql_expr_type(pEList->a[0].pExpr) == FIELD_TYPE_SCALAR)
+				return eRet;
 			if (sqlStrICmp(zFunc, "min") == 0) {
 				eRet = WHERE_ORDERBY_MIN;
 				*ppMinMax = pEList;
diff --git a/test/sql-tap/func5.test.lua b/test/sql-tap/func5.test.lua
index f3706e631..b504bb8bf 100755
--- a/test/sql-tap/func5.test.lua
+++ b/test/sql-tap/func5.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(15)
+test:plan(19)
 
 --!./tcltestrunner.lua
 -- 2010 August 27
@@ -229,4 +229,52 @@ test:do_catchsql_test(
     }
 )
 
+-- The following tests ensure that AVG and SUM correctly proceeds
+-- SCALAR type. The expected behavior:
+-- 1) If column actually contains INTEGER and FLOAT values,
+--    then INTEGER values are casted to FLOATs and the result
+--    is computed.
+-- 2) If column actually contains values of any other
+--    types, then the error is raised.
+
+test:do_execsql_test(
+    "func-5.4.1",
+    [[
+        CREATE TABLE test6(a SCALAR PRIMARY KEY);
+        INSERT INTO test6 VALUES(1);
+        INSERT INTO test6 VALUES(2.5);
+        SELECT SUM(a) FROM test6;
+    ]], {
+        3.5
+    }
+)
+
+test:do_execsql_test(
+    "func-5.4.2",
+    [[
+        SELECT AVG(a) FROM test6;
+    ]], {
+        1.75
+    }
+)
+
+test:do_catchsql_test(
+    "func-5.4.3",
+    [[
+        INSERT INTO test6 VALUES('abc');
+        SELECT SUM(a) FROM test6;
+    ]], {
+        1,"Inconsistent types: expected INTEGER or FLOAT got TEXT"
+    }
+)
+
+test:do_catchsql_test(
+    "func-5.4.4",
+    [[
+        SELECT AVG(a) FROM test6;
+    ]], {
+        1,"Inconsistent types: expected INTEGER or FLOAT got TEXT"
+    }
+)
+
 test:finish_test()
diff --git a/test/sql-tap/minmax4.test.lua b/test/sql-tap/minmax4.test.lua
index b600c9bfe..7476051f1 100755
--- a/test/sql-tap/minmax4.test.lua
+++ b/test/sql-tap/minmax4.test.lua
@@ -1,6 +1,7 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(21)
+
+test:plan(29)
 
 --!./tcltestrunner.lua
 -- 2012 February 02
@@ -303,6 +304,104 @@ test:do_test(
         -- </minmax4-2.7>
     })
 
+-- The following tests ensure that MIN and MAX correctly proceeds
+-- SCALAR type. The expected behavior:
+-- 1) If column actually contains INTEGER and FLOAT values,
+--    then INTEGER values are casted to FLOATs and the result
+--    is computed.
+-- 2) If column actually contains TEXT and values of any other
+--    types, then the error is raised.
+-- 3) All other combinations would also result in error.
+test:do_test(
+    "minmax4-3.1",
+    function()
+        return test:execsql [[
+            CREATE TABLE t4(a INT PRIMARY KEY, b SCALAR);
+            INSERT INTO t4 VALUES (1, 2);
+            INSERT INTO t4 VALUES (2, 1.5);
+            SELECT MAX(b) FROM t4;
+        ]]
+        end, {
+            2.0,
+    })
+
+test:do_test(
+    "minmax4-3.2",
+    function()
+        return test:execsql [[
+            SELECT MIN(b) FROM t4;
+        ]]
+    end, {
+        1.5,
+    })
+
+test:do_test(
+    "minmax4-3.3",
+    function()
+        return test:catchsql [[
+            INSERT INTO t4 VALUES (3, 'abc');
+            SELECT MIN(b) FROM t4;
+        ]]
+    end, {
+        1, "Inconsistent types: expected REAL got TEXT"
+    })
+
+test:do_test(
+    "minmax4-3.4",
+    function()
+        return test:catchsql [[
+            SELECT MAX(b) FROM t4;
+        ]]
+    end, {
+        1, "Inconsistent types: expected REAL got TEXT"
+    })
+
+-- Cases when we call aggregate MIN/MAX functions on column with
+-- index (e.g. PRIMARY KEY index) deserves it's own test
+-- because in this case MIN/MAX is implemented not with
+-- dedicated function, but with usage of corresponding index.
+test:do_test(
+    "minmax4-3.5",
+    function()
+        return test:execsql [[
+            CREATE TABLE t5(a SCALAR PRIMARY KEY);
+            INSERT INTO t5 VALUES (2);
+            INSERT INTO t5 VALUES (1.5);
+            SELECT MAX(a) FROM t5;
+        ]]
+    end, {
+        2.0,
+    })
 
+test:do_test(
+    "minmax4-3.6",
+    function()
+        return test:execsql [[
+            SELECT MIN(a) FROM t5;
+        ]]
+    end, {
+        1.5,
+    })
+
+test:do_test(
+    "minmax4-3.7",
+    function()
+        return test:catchsql [[
+            INSERT INTO t5 VALUES ('abc');
+            SELECT MIN(a) FROM t5;
+        ]]
+    end, {
+        1, "Inconsistent types: expected INTEGER got TEXT"
+    })
+
+test:do_test(
+    "minmax4-3.8",
+    function()
+        return test:catchsql [[
+            SELECT MAX(a) FROM t5;
+        ]]
+    end, {
+        1, "Inconsistent types: expected INTEGER got TEXT"
+    })
 
 test:finish_test()
diff --git a/test/sql-tap/select1.test.lua b/test/sql-tap/select1.test.lua
index be63e815d..a4ce19d73 100755
--- a/test/sql-tap/select1.test.lua
+++ b/test/sql-tap/select1.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(173)
+test:plan(172)
 
 --!./tcltestrunner.lua
 -- ["set","testdir",[["file","dirname",["argv0"]]]]
@@ -503,16 +503,6 @@ test:do_catchsql_test(
         -- </select1-2.17>
     })
 
-test:do_execsql_test(
-    "select1-2.17.1",
-    [[
-        SELECT sum(a) FROM t3
-    ]], {
-        -- <select1-2.17.1>
-        44.0
-        -- </select1-2.17.1>
-    })
-
 test:do_catchsql_test(
     "select1-2.18",
     [[
diff --git a/test/sql-tap/select5.test.lua b/test/sql-tap/select5.test.lua
index 507448d00..5674edfea 100755
--- a/test/sql-tap/select5.test.lua
+++ b/test/sql-tap/select5.test.lua
@@ -553,8 +553,8 @@ test:do_execsql_test(
 test:do_execsql_test(
     "select5-9.13.2",
     [[
-            CREATE TABLE jj (s1 INT, s2 VARCHAR(1), PRIMARY KEY(s1));
-            INSERT INTO jj VALUES(1, 'A'), (2, 'a');
+            CREATE TABLE jj (s1 INT, s2 INTEGER, PRIMARY KEY(s1));
+            INSERT INTO jj VALUES(1, 0), (2, 0);
             SELECT 1 FROM jj HAVING avg(s2) = 1 AND avg(s2) = 0;
     ]], {
     -- <select5-9.13.2>
-- 
2.20.1





More information about the Tarantool-patches mailing list