[tarantool-patches] [PATCH v2 1/1] sql: support HAVING without GROUP BY clause

Kirill Shcherbatov kshcherbatov at tarantool.org
Thu Nov 29 17:33:11 MSK 2018


Branch: http://github.com/tarantool/tarantool/tree/kshsh/gh-2364-having-without-groupby
Issue: https://github.com/tarantool/tarantool/issues/2364

Allowed to make SELECT requests that have HAVING clause without
GROUP BY. It is possible when both - left and right parts of
request have aggregate function or constant value.

Closes #2364.

@TarantoolBot document
Title: HAVING without GROUP BY clause
A query with a having clause should also have a group by clause.
If you omit group by, all the rows not excluded by the where
clause return as a single group.
Because no grouping is performed between the where and having
clauses, they cannot act independently of each other. Having
acts like where because it affects the rows in a single group
rather than groups, except the having clause can still use
aggregates.
Having without group by is not supported for select from
multiple tables.
---
 src/box/sql/resolve.c         |  97 +++++++++++++++++++++++---
 src/box/sql/sqliteInt.h       |   3 +-
 test/sql-tap/count.test.lua   |   8 +--
 test/sql-tap/select3.test.lua |   2 +-
 test/sql-tap/select5.test.lua | 128 +++++++++++++++++++++++++++++++++-
 5 files changed, 223 insertions(+), 15 deletions(-)

diff --git a/src/box/sql/resolve.c b/src/box/sql/resolve.c
index 9a2d6ff4e..34f0c836a 100644
--- a/src/box/sql/resolve.c
+++ b/src/box/sql/resolve.c
@@ -602,6 +602,8 @@ resolveExprStep(Walker * pWalker, Expr * pExpr)
 		/* A lone identifier is the name of a column.
 		 */
 	case TK_ID:{
+			if ((pNC->ncFlags & NC_AllowAgg) != 0)
+				pNC->ncFlags |= NC_HasUnaggregatedId;
 			return lookupName(pParse, 0, pExpr->u.zToken, pNC,
 					  pExpr);
 		}
@@ -1180,6 +1182,50 @@ resolveOrderGroupBy(NameContext * pNC,	/* The name context of the SELECT stateme
 	return sqlite3ResolveOrderGroupBy(pParse, pSelect, pOrderBy, zType);
 }
 
+/**
+ * Test if specified expression is a regular literal.
+ * @param expr Expression to analyse.
+ * @retval true  If expression is literal.
+ * @retval false Otherwise.
+ */
+static bool
+sql_expr_is_literal(struct Expr *expr)
+{
+	return expr->op == TK_INTEGER || expr->op == TK_FLOAT ||
+	       expr->op == TK_BLOB || expr->op == TK_STRING ||
+	       expr->op == TK_NULL || expr->op == TK_UMINUS;
+}
+
+/**
+ * Test if specified expression is a constant function.
+ * @param parser Parsing context.
+ * @param expr Expression to analyse.
+ * @retval true  If expression is a existent constant function.
+ * @retval false Otherwise.
+ */
+static bool
+sql_expr_is_constant_func(struct Parse *parser, struct Expr *expr)
+{
+	if (expr->op != TK_FUNCTION)
+		return false;
+	char *func_name = expr->u.zToken;
+	struct ExprList *args_list = expr->x.pList;
+	int args_count = args_list != NULL ? args_list->nExpr : 0;
+	struct FuncDef *func =
+		sqlite3FindFunction(parser->db, func_name, args_count, 0);
+	if (func == NULL) {
+		/*
+		 * If we fail to find function with exact number
+		 * of arguments, lets try to search similar
+		 * function but with different number of args.
+		 */
+		func = sqlite3FindFunction(parser->db, func_name, -2, 0);
+		if (func == NULL)
+			return false;
+	}
+	return (func->funcFlags & SQLITE_FUNC_CONSTANT) != 0;
+}
+
 /*
  * Resolve names in the SELECT statement p and all of its descendants.
  */
@@ -1285,13 +1331,23 @@ resolveSelectStep(Walker * pWalker, Select * p)
 		/* Set up the local name-context to pass to sqlite3ResolveExprNames() to
 		 * resolve the result-set expression list.
 		 */
+		bool is_all_select_agg = true;
 		sNC.ncFlags = NC_AllowAgg;
 		sNC.pSrcList = p->pSrc;
 		sNC.pNext = pOuterNC;
-
 		/* Resolve names in the result set. */
-		if (sqlite3ResolveExprListNames(&sNC, p->pEList))
-			return WRC_Abort;
+		for (i = 0; i < p->pEList->nExpr; i++) {
+			struct Expr *expr = p->pEList->a[i].pExpr;
+			u16 has_agg_flag = sNC.ncFlags & NC_HasAgg;
+			sNC.ncFlags &= ~NC_HasAgg;
+			if (sqlite3ResolveExprNames(&sNC, expr))
+				return WRC_Abort;
+			is_all_select_agg &= (sNC.ncFlags & NC_HasAgg) != 0 ||
+					     sql_expr_is_literal(expr) ||
+					     sql_expr_is_constant_func(pParse,
+								       expr);
+			sNC.ncFlags |= has_agg_flag;
+		}
 
 		/* If there are no aggregate functions in the result-set, and no GROUP BY
 		 * expression, do not allow aggregates in any of the other expressions.
@@ -1306,12 +1362,37 @@ resolveSelectStep(Walker * pWalker, Select * p)
 			sNC.ncFlags &= ~NC_AllowAgg;
 		}
 
-		/* If a HAVING clause is present, then there must be a GROUP BY clause.
+		/*
+		 * If a HAVING clause is present, then there must
+		 * be a GROUP BY clause or aggregate function
+		 * should be specified.
 		 */
-		if (p->pHaving && !pGroupBy) {
-			sqlite3ErrorMsg(pParse,
-					"a GROUP BY clause is required before HAVING");
-			return WRC_Abort;
+		if (p->pHaving != NULL && pGroupBy == NULL) {
+			struct NameContext having_nc;
+			memset(&having_nc, 0, sizeof(having_nc));
+			having_nc.pParse = pParse;
+			having_nc.ncFlags = NC_AllowAgg;
+			having_nc.pSrcList = p->pSrc;
+			if (is_all_select_agg &&
+			    sqlite3ResolveExprNames(&having_nc,
+						    p->pHaving) != 0)
+				return WRC_Abort;
+			if ((having_nc.ncFlags & NC_HasAgg) == 0 ||
+			    (having_nc.ncFlags & NC_HasUnaggregatedId) != 0) {
+				const char *err_msg =
+					tt_sprintf("HAVING argument must "
+						   "appear in the GROUP BY "
+						   "clause or be used in an "
+						   "aggregate function");
+				diag_set(ClientError, ER_SQL, err_msg);
+				pParse->nErr++;
+				pParse->rc = SQL_TARANTOOL_ERROR;
+				return WRC_Abort;
+			}
+			sql_expr_delete(db, p->pLimit, false);
+			p->pLimit =
+			    sqlite3ExprAlloc(db, TK_INTEGER,
+					     &sqlite3IntTokens[1], 0);
 		}
 
 		/* Add the output column list to the name-context before parsing the
diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h
index dbf58d967..8f0e0a320 100644
--- a/src/box/sql/sqliteInt.h
+++ b/src/box/sql/sqliteInt.h
@@ -2454,7 +2454,8 @@ struct NameContext {
 #define NC_IdxExpr   0x0020	/* True if resolving columns of CREATE INDEX */
 #define NC_VarSelect 0x0040	/* A correlated subquery has been seen */
 #define NC_MinMaxAgg 0x1000	/* min/max aggregates seen.  See note above */
-
+/** One or more identifiers seen without aggregate function. */
+#define NC_HasUnaggregatedId     0x2000
 /*
  * An instance of the following structure contains all information
  * needed to generate code for a single SELECT statement.
diff --git a/test/sql-tap/count.test.lua b/test/sql-tap/count.test.lua
index b05e3a28e..6f58210f4 100755
--- a/test/sql-tap/count.test.lua
+++ b/test/sql-tap/count.test.lua
@@ -172,15 +172,15 @@ test:do_test(
         return uses_op_count("SELECT count(*) FROM t2 WHERE a IS NOT NULL")
     end, 0)
 
-test:do_catchsql_test(
+test:do_execsql_test(
     "count-2.9",
     [[
         SELECT count(*) FROM t2 HAVING count(*)>1
-    ]], {
+    ]],
         -- <count-2.9>
-        1, "a GROUP BY clause is required before HAVING"
+        {}
         -- </count-2.9>
-    })
+    )
 
 test:do_test(
     "count-2.10",
diff --git a/test/sql-tap/select3.test.lua b/test/sql-tap/select3.test.lua
index dbc95f0d8..2704c267e 100755
--- a/test/sql-tap/select3.test.lua
+++ b/test/sql-tap/select3.test.lua
@@ -200,7 +200,7 @@ test:do_catchsql_test("select3-3.1", [[
   SELECT log, count(*) FROM t1 HAVING log>=4
 ]], {
   -- <select3-3.1>
-  1, "a GROUP BY clause is required before HAVING"
+  1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
   -- </select3-3.1>
 })
 
diff --git a/test/sql-tap/select5.test.lua b/test/sql-tap/select5.test.lua
index 9c3cd2759..b889132aa 100755
--- a/test/sql-tap/select5.test.lua
+++ b/test/sql-tap/select5.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(32)
+test:plan(44)
 
 --!./tcltestrunner.lua
 -- 2001 September 15
@@ -412,5 +412,131 @@ test:do_execsql_test(
         -- </select5-8.8>
     })
 
+--
+-- gh-2364: Support HAVING without GROUP BY clause.
+--
+test:do_catchsql_test(
+    "select5-9.1",
+    [[
+        CREATE TABLE te40 (s1 INT, s2 INT, PRIMARY KEY (s1,s2));
+        INSERT INTO te40 VALUES (1,1);
+        INSERT INTO te40 VALUES (2,2);
+        SELECT s1 FROM te40 HAVING s1 = 1;
+    ]], {
+    -- <select5-9.1>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
+    -- </select5-9.1>
+})
+
+test:do_catchsql_test(
+    "select5-9.2",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING s1 = 2;
+    ]], {
+    -- <select5-9.2>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
+    -- </select5-9.2>
+})
+
+test:do_catchsql_test(
+    "select5-9.3",
+    [[
+        SELECT s1 FROM te40 HAVING SUM(s1) = 2;
+    ]], {
+    -- <select5-9.3>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
+    -- </select5-9.3>
+})
+
+test:do_execsql_test(
+    "select5-9.4",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.4>
+    3
+    -- </select5-9.4>
+})
+
+test:do_execsql_test(
+    "select5-9.5",
+    [[
+        SELECT MIN(s1) FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.5>
+    1
+    -- </select5-9.5>
+})
+
+test:do_execsql_test(
+    "select5-9.6",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) < 0;
+    ]],
+    -- <select5-9.6>
+    {}
+    -- </select5-9.6>
+)
+
+test:do_catchsql_test(
+    "select5-9.7",
+    [[
+        SELECT SUM(s1),s2 FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.7>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
+    -- </select5-9.7>
+})
+
+test:do_catchsql_test(
+    "select5-9.8",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0 and s2 > 0;
+    ]], {
+    -- <select5-9.8>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
+    -- </select5-9.8>
+})
+
+test:do_execsql_test(
+    "select5-9.9",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0 and SUM(s2) > 0;
+    ]], {
+    -- <select5-9.9>
+    3
+    -- </select5-9.9>
+})
+
+test:do_execsql_test(
+    "select5-9.10",
+    [[
+        SELECT 1 FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.10>
+    1
+    -- </select5-9.10>
+})
+
+test:do_execsql_test(
+    "select5-9.11",
+    [[
+        SELECT -1 FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.11>
+    -1
+    -- </select5-9.11>
+})
+
+test:do_execsql_test(
+    "select5-9.12",
+    [[
+        SELECT NULL FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.12>
+    ""
+    -- </select5-9.12>
+})
+
 test:finish_test()
 
-- 
2.19.2





More information about the Tarantool-patches mailing list