Tarantool development patches archive
 help / color / mirror / Atom feed
* [tarantool-patches] [PATCH v1 1/1] sql: support HAVING without GROUP BY clause
@ 2018-08-28 16:39 Kirill Shcherbatov
  2018-08-29 11:52 ` [tarantool-patches] " n.pettik
  0 siblings, 1 reply; 9+ messages in thread
From: Kirill Shcherbatov @ 2018-08-28 16:39 UTC (permalink / raw)
  To: tarantool-patches; +Cc: korablev, Kirill Shcherbatov

To allow user do not specify GROUP BY directly we build it
manually using table primary index.

Closes 2364.

@TarantoolBot document
Title: HAVING without GROUP BY clause
A query with a having clause should also have a group by clause.
If you omit group by, all the rows not excluded by the where
clause return as a single group.
Because no grouping is performed between the where and having
clauses, they cannot act independently of each other. Having
acts like where because it affects the rows in a single group
rather than groups, except the having clause can still use
aggregates.
Having without group by is not supported for select from
multiple tables.
---
Branch: http://github.com/tarantool/tarantool/tree/kshsh/gh-2364-having-without-groupby
Issue: https://github.com/tarantool/tarantool/issues/2364

 src/box/sql/resolve.c         | 47 ++++++++++++++++++++++++++++++++++-------
 test/sql-tap/count.test.lua   |  8 +++----
 test/sql-tap/select3.test.lua |  4 ++--
 test/sql-tap/select5.test.lua | 49 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 93 insertions(+), 15 deletions(-)

diff --git a/src/box/sql/resolve.c b/src/box/sql/resolve.c
index 280ecd9..34bb069 100644
--- a/src/box/sql/resolve.c
+++ b/src/box/sql/resolve.c
@@ -1295,6 +1295,45 @@ resolveSelectStep(Walker * pWalker, Select * p)
 		 * expression, do not allow aggregates in any of the other expressions.
 		 */
 		assert((p->selFlags & SF_Aggregate) == 0);
+		/*
+		 * A query with a HAVING clause should also have
+		 * a group by clause. If GROUP BY is ommited, all
+		 * the rows not excluded by the where clause
+		 * return as a single group built with primary
+		 * key.
+		 */
+		if (p->pHaving != NULL && p->pGroupBy == NULL) {
+			if (p->pSrc->nSrc != 1) {
+				diag_set(ClientError, ER_SQL,
+					"GROUP BY may be ommited only when one "
+					"source table specified");
+				pParse->nErr++;
+				pParse->rc = SQL_TARANTOOL_ERROR;
+				return WRC_Abort;
+			}
+			struct space_def *def = p->pSrc->a->pTab->def;
+			struct Index *pk =
+				sqlite3PrimaryKeyIndex(p->pSrc->a->pTab);
+			assert(pk != NULL);
+			struct key_part *part = pk->def->key_def->parts;
+			struct key_part *part_end =
+				part + pk->def->key_def->part_count;
+			for (; part < part_end && !db->mallocFailed; part++) {
+				struct Token pk_column;
+				char *col_name =
+					def->fields[part->fieldno].name;
+				sqlite3TokenInit(&pk_column, col_name);
+				struct Expr *pk_part =
+					sqlite3ExprAlloc(db, TK_ID,
+							 &pk_column, 0);
+				p->pGroupBy =
+					sql_expr_list_append(db, p->pGroupBy,
+							     pk_part);
+			}
+			/* Memory allocation error. */
+			if (db->mallocFailed != 0)
+				return WRC_Abort;
+		}
 		pGroupBy = p->pGroupBy;
 		if (pGroupBy || (sNC.ncFlags & NC_HasAgg) != 0) {
 			assert(NC_MinMaxAgg == SF_MinMaxAgg);
@@ -1304,14 +1343,6 @@ resolveSelectStep(Walker * pWalker, Select * p)
 			sNC.ncFlags &= ~NC_AllowAgg;
 		}
 
-		/* If a HAVING clause is present, then there must be a GROUP BY clause.
-		 */
-		if (p->pHaving && !pGroupBy) {
-			sqlite3ErrorMsg(pParse,
-					"a GROUP BY clause is required before HAVING");
-			return WRC_Abort;
-		}
-
 		/* Add the output column list to the name-context before parsing the
 		 * other expressions in the SELECT statement. This is so that
 		 * expressions in the WHERE clause (etc.) can refer to expressions by
diff --git a/test/sql-tap/count.test.lua b/test/sql-tap/count.test.lua
index 45808de..700d047 100755
--- a/test/sql-tap/count.test.lua
+++ b/test/sql-tap/count.test.lua
@@ -172,15 +172,15 @@ test:do_test(
         return uses_op_count("SELECT count(*) FROM t2 WHERE a IS NOT NULL")
     end, 0)
 
-test:do_catchsql_test(
+test:do_execsql_test(
     "count-2.9",
     [[
         SELECT count(*) FROM t2 HAVING count(*)>1
-    ]], {
+    ]],
         -- <count-2.9>
-        1, "a GROUP BY clause is required before HAVING"
+        {}
         -- </count-2.9>
-    })
+    )
 
 test:do_test(
     "count-2.10",
diff --git a/test/sql-tap/select3.test.lua b/test/sql-tap/select3.test.lua
index d49bb87..535bd5d 100755
--- a/test/sql-tap/select3.test.lua
+++ b/test/sql-tap/select3.test.lua
@@ -196,11 +196,11 @@ test:do_catchsql_test("select3-2.14", [[
 
 -- Cannot have a HAVING without a GROUP BY
 --
-test:do_catchsql_test("select3-3.1", [[
+test:do_execsql_test("select3-3.1", [[
   SELECT log, count(*) FROM t1 HAVING log>=4
 ]], {
   -- <select3-3.1>
-  1, "a GROUP BY clause is required before HAVING"
+  4,1,4,1,4,1,4,1,4,1,4,1,4,1,4,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1
   -- </select3-3.1>
 })
 
diff --git a/test/sql-tap/select5.test.lua b/test/sql-tap/select5.test.lua
index 7ca25ae..353d767 100755
--- a/test/sql-tap/select5.test.lua
+++ b/test/sql-tap/select5.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(32)
+test:plan(36)
 
 --!./tcltestrunner.lua
 -- 2001 September 15
@@ -412,5 +412,52 @@ test:do_execsql_test(
         -- </select5-8.8>
     })
 
+--
+-- gh-2364: Support HAVING without GROUP BY clause.
+--
+test:do_execsql_test(
+    "select6-1.1",
+    [[
+        CREATE TABLE te40 (s1 INT, s2 INT, PRIMARY KEY (s1,s2));
+        INSERT INTO te40 VALUES (1,1);
+        SELECT s1 FROM te40 HAVING s1 = 1;
+    ]], {
+    -- <select6-1.1>
+    1
+    -- <select6-1.1>
+})
+
+test:do_execsql_test(
+    "select6-1.2",
+    [[
+        SELECT s1 FROM te40 HAVING s1 = 2;
+    ]],
+    -- <select6-1.2>
+    {}
+    -- <select6-1.2>
+)
+
+test:do_catchsql_test(
+    "select6-2.1",
+    [[
+        CREATE TABLE te4 (s1 INT PRIMARY KEY);
+        INSERT INTO te4 VALUES (1);
+        SELECT te40.s1 FROM te40,te4 HAVING s1 = 1;
+    ]], {
+    -- <select6-2.1>
+    1, "SQL error: GROUP BY may be ommited only when one source table specified"
+    -- <select6-2.1>
+})
+
+test:do_execsql_test(
+    "select6-3.1",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select6-3.1>
+    1
+    -- <select6-3.1>
+})
+
 test:finish_test()
 
-- 
2.7.4

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: support HAVING without GROUP BY clause
  2018-08-28 16:39 [tarantool-patches] [PATCH v1 1/1] sql: support HAVING without GROUP BY clause Kirill Shcherbatov
@ 2018-08-29 11:52 ` n.pettik
  2018-08-29 13:29   ` Kirill Shcherbatov
  0 siblings, 1 reply; 9+ messages in thread
From: n.pettik @ 2018-08-29 11:52 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Kirill Shcherbatov

To be honest, personally I don’t like this idea. IMHO this tickets
should be simply closed (as it was before ‘wontfix’). Also, I see that
your implementation differs from other DBs:

Postgres, Oracle, DB2:

drop table if exists t1 \\
create table t1(id int primary key, a int, b int) \\
insert into t1 values(1,2,3), (4,5,6), (7,8,9) \\
SELECT SUM(a) FROM t1 HAVING SUM(a) > 0 \\

1	PostgreSQL 9.6.2, compiled by Visual C++ build 1800, 64-bit

  	sum
1	15

Your version:

tarantool> \set language sql
---
- true
...

tarantool> create table t1(id int primary key, a int, b int)
---
...

tarantool> insert into t1 values(1,2,3), (4,5,6), (7,8,9)
---
...

tarantool> SELECT SUM(a) FROM t1 HAVING SUM(a) > 0
---
- - [2]
  - [5]
  - [8]
…

Please, make it behave the same way as other DBs
and resend patch.


> To allow user do not specify GROUP BY directly we build it
> manually using table primary index.
> 
> Closes 2364.

Closes #2364

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: support HAVING without GROUP BY clause
  2018-08-29 11:52 ` [tarantool-patches] " n.pettik
@ 2018-08-29 13:29   ` Kirill Shcherbatov
  2018-09-03 11:24     ` n.pettik
  2018-09-03 13:23     ` n.pettik
  0 siblings, 2 replies; 9+ messages in thread
From: Kirill Shcherbatov @ 2018-08-29 13:29 UTC (permalink / raw)
  To: tarantool-patches, Nikita Pettik

> Please, make it behave the same way as other DBs
> and resend patch.

====================================

Allowed to use HAVING without GROUP BY when both select list
and having clause have aggregate function.

Closes 2364.

@TarantoolBot document
Title: HAVING without GROUP BY clause
A query with a having clause should also have a group by clause.
Another ability is to use aggregate function both in select list
and having clause.
Example:
SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0;
---
 src/box/sql/resolve.c         | 30 +++++++++++++++----
 test/sql-tap/count.test.lua   |  8 ++---
 test/sql-tap/select3.test.lua |  2 +-
 test/sql-tap/select5.test.lua | 68 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 97 insertions(+), 11 deletions(-)

diff --git a/src/box/sql/resolve.c b/src/box/sql/resolve.c
index 280ecd9..9d28089 100644
--- a/src/box/sql/resolve.c
+++ b/src/box/sql/resolve.c
@@ -1304,12 +1304,32 @@ resolveSelectStep(Walker * pWalker, Select * p)
 			sNC.ncFlags &= ~NC_AllowAgg;
 		}
 
-		/* If a HAVING clause is present, then there must be a GROUP BY clause.
+		/*
+		 * If a HAVING clause is present, then there must
+		 * be a GROUP BY clause or aggregate function
+		 * should be specified.
 		 */
-		if (p->pHaving && !pGroupBy) {
-			sqlite3ErrorMsg(pParse,
-					"a GROUP BY clause is required before HAVING");
-			return WRC_Abort;
+		if (p->pHaving != NULL && pGroupBy == NULL) {
+			struct NameContext having_nc;
+			memset(&having_nc, 0, sizeof(having_nc));
+			having_nc.pParse = pParse;
+			having_nc.ncFlags = NC_AllowAgg;
+			having_nc.pSrcList = p->pSrc;
+			if ((p->selFlags & SF_Aggregate) != 0 &&
+				sqlite3ResolveExprNames(&having_nc,
+							p->pHaving) != 0)
+				return WRC_Abort;
+			if ((having_nc.ncFlags & NC_HasAgg) == 0) {
+				const char *err_msg =
+					tt_sprintf("HAVING expression must "
+						   "appear in the GROUP BY "
+						   "clause or be used in an "
+						   "aggregate function");
+				diag_set(ClientError, ER_SQL, err_msg);
+				pParse->nErr++;
+				pParse->rc = SQL_TARANTOOL_ERROR;
+				return WRC_Abort;
+			}
 		}
 
 		/* Add the output column list to the name-context before parsing the
diff --git a/test/sql-tap/count.test.lua b/test/sql-tap/count.test.lua
index 45808de..700d047 100755
--- a/test/sql-tap/count.test.lua
+++ b/test/sql-tap/count.test.lua
@@ -172,15 +172,15 @@ test:do_test(
         return uses_op_count("SELECT count(*) FROM t2 WHERE a IS NOT NULL")
     end, 0)
 
-test:do_catchsql_test(
+test:do_execsql_test(
     "count-2.9",
     [[
         SELECT count(*) FROM t2 HAVING count(*)>1
-    ]], {
+    ]],
         -- <count-2.9>
-        1, "a GROUP BY clause is required before HAVING"
+        {}
         -- </count-2.9>
-    })
+    )
 
 test:do_test(
     "count-2.10",
diff --git a/test/sql-tap/select3.test.lua b/test/sql-tap/select3.test.lua
index d49bb87..33a0359 100755
--- a/test/sql-tap/select3.test.lua
+++ b/test/sql-tap/select3.test.lua
@@ -200,7 +200,7 @@ test:do_catchsql_test("select3-3.1", [[
   SELECT log, count(*) FROM t1 HAVING log>=4
 ]], {
   -- <select3-3.1>
-  1, "a GROUP BY clause is required before HAVING"
+  1, "SQL error: HAVING expression must appear in the GROUP BY clause or be used in an aggregate function"
   -- </select3-3.1>
 })
 
diff --git a/test/sql-tap/select5.test.lua b/test/sql-tap/select5.test.lua
index 7ca25ae..db3f447 100755
--- a/test/sql-tap/select5.test.lua
+++ b/test/sql-tap/select5.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(32)
+test:plan(38)
 
 --!./tcltestrunner.lua
 -- 2001 September 15
@@ -412,5 +412,71 @@ test:do_execsql_test(
         -- </select5-8.8>
     })
 
+--
+-- gh-2364: Support HAVING without GROUP BY clause.
+--
+test:do_catchsql_test(
+    "select6-1.1",
+    [[
+        CREATE TABLE te40 (s1 INT, s2 INT, PRIMARY KEY (s1,s2));
+        INSERT INTO te40 VALUES (1,1);
+        INSERT INTO te40 VALUES (2,2);
+        SELECT s1 FROM te40 HAVING s1 = 1;
+    ]], {
+    -- <select6-1.1>
+    1, "SQL error: HAVING expression must appear in the GROUP BY clause or be used in an aggregate function"
+    -- <select6-1.1>
+})
+
+test:do_catchsql_test(
+    "select6-1.2",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING s1 = 2;
+    ]], {
+    -- <select6-1.2>
+    1, "SQL error: HAVING expression must appear in the GROUP BY clause or be used in an aggregate function"
+    -- <select6-1.2>
+})
+
+test:do_catchsql_test(
+    "select6-1.3",
+    [[
+        SELECT s1 FROM te40 HAVING SUM(s1) = 2;
+    ]], {
+    -- <select6-1.3>
+    1, "SQL error: HAVING expression must appear in the GROUP BY clause or be used in an aggregate function"
+    -- <select6-1.3>
+})
+
+test:do_execsql_test(
+    "select6-1.4",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select6-1.4>
+    3
+    -- <select6-1.4>
+})
+
+test:do_execsql_test(
+    "select6-2.1",
+    [[
+        SELECT MIN(s1) FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select6-2.1>
+    1
+    -- <select6-2.1>
+})
+
+test:do_execsql_test(
+    "select6-3.1",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) < 0;
+    ]],
+    -- <select6-3.1>
+    {}
+    -- <select6-3.1>
+)
+
 test:finish_test()
 
-- 
2.7.4

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: support HAVING without GROUP BY clause
  2018-08-29 13:29   ` Kirill Shcherbatov
@ 2018-09-03 11:24     ` n.pettik
  2018-09-03 13:23     ` n.pettik
  1 sibling, 0 replies; 9+ messages in thread
From: n.pettik @ 2018-09-03 11:24 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Kirill Shcherbatov


> On 29 Aug 2018, at 16:29, Kirill Shcherbatov <kshcherbatov@tarantool.org> wrote:
> 
>> Please, make it behave the same way as other DBs
>> and resend patch.

Hello. I pulled your branch today, but it still contains previous version.
Could you push your fixes?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: support HAVING without GROUP BY clause
  2018-08-29 13:29   ` Kirill Shcherbatov
  2018-09-03 11:24     ` n.pettik
@ 2018-09-03 13:23     ` n.pettik
  2018-09-10  7:51       ` Kirill Shcherbatov
  1 sibling, 1 reply; 9+ messages in thread
From: n.pettik @ 2018-09-03 13:23 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Kirill Shcherbatov


> On 29 Aug 2018, at 16:29, Kirill Shcherbatov <kshcherbatov@tarantool.org> wrote:
> 
>> Please, make it behave the same way as other DBs
>> and resend patch.

Still your patch doesn’t handle some cases. Example:

Postgres:

drop table if exists t1 \\
create table t1(id int primary key, a int, b int) \\
insert into t1 values(1, -2,3), (4,5,6), (7,8,9) \\
SELECT SUM(a),a FROM t1 HAVING sum(a) > 0 \\

42803: column "t1.a" must appear in the GROUP BY clause or be used in an aggregate function

db2:

db2 => SELECT SUM(a), a from t1 having sum(a) > 0
SQL0119N  An expression starting with "A" specified in a SELECT clause, HAVING 
clause, or ORDER BY clause is not specified in the GROUP BY clause or it is in 
a SELECT clause, HAVING clause, or ORDER BY clause with a column function and 
no GROUP BY clause is specified.  SQLSTATE=42803

Oracle:

ORA-00937: not a single-group group function

Tarantool:

tarantool> SELECT SUM(a),a FROM t1 HAVING sum(a) > 0
---
- - [11, 8]
...

> 
> ====================================
> 
> Allowed to use HAVING without GROUP BY when both select list
> and having clause have aggregate function.
> 
> Closes 2364.
> 
> @TarantoolBot document
> Title: HAVING without GROUP BY clause
> A query with a having clause should also have a group by clause.

Nit: outline also here HAVING and GROUP BY with capital letters pls. 

> Another ability is to use aggregate function both in select list
> and having clause.

I would re-phrase it like:

However, it is still possible to drop GROUP BY clause in case both SELECT
and HAVING arguments are aggregate functions

> Example:
> SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0;
> ---
> src/box/sql/resolve.c         | 30 +++++++++++++++----
> test/sql-tap/count.test.lua   |  8 ++---
> test/sql-tap/select3.test.lua |  2 +-
> test/sql-tap/select5.test.lua | 68 ++++++++++++++++++++++++++++++++++++++++++-
> 4 files changed, 97 insertions(+), 11 deletions(-)
> 
> diff --git a/src/box/sql/resolve.c b/src/box/sql/resolve.c
> index 280ecd9..9d28089 100644
> --- a/src/box/sql/resolve.c
> +++ b/src/box/sql/resolve.c
> @@ -1304,12 +1304,32 @@ resolveSelectStep(Walker * pWalker, Select * p)
> 			sNC.ncFlags &= ~NC_AllowAgg;
> 		}
> 
> -		/* If a HAVING clause is present, then there must be a GROUP BY clause.
> +		/*
> +		 * If a HAVING clause is present, then there must
> +		 * be a GROUP BY clause or aggregate function
> +		 * should be specified.
> 		 */
> -		if (p->pHaving && !pGroupBy) {
> -			sqlite3ErrorMsg(pParse,
> -					"a GROUP BY clause is required before HAVING");
> -			return WRC_Abort;
> +		if (p->pHaving != NULL && pGroupBy == NULL) {
> +			struct NameContext having_nc;
> +			memset(&having_nc, 0, sizeof(having_nc));
> +			having_nc.pParse = pParse;
> +			having_nc.ncFlags = NC_AllowAgg;
> +			having_nc.pSrcList = p->pSrc;
> +			if ((p->selFlags & SF_Aggregate) != 0 &&
> +				sqlite3ResolveExprNames(&having_nc,
> +							p->pHaving) != 0)
> +				return WRC_Abort;
> +			if ((having_nc.ncFlags & NC_HasAgg) == 0) {
> +				const char *err_msg =
> +					tt_sprintf("HAVING expression must “

Nit: HAVING is rather clause, than expression. Overall, this msg looks
a little bit weird. In Postgres it is like:

column "t1.a" must appear in the GROUP BY clause or be used in an aggregate function

So, not HAVING clause must appear, but HAVING argument.

> +						   "appear in the GROUP BY "
> +						   "clause or be used in an "
> +						   "aggregate function");
> +				diag_set(ClientError, ER_SQL, err_msg);
> +				pParse->nErr++;
> +				pParse->rc = SQL_TARANTOOL_ERROR;
> +				return WRC_Abort;
> +			}
> 		}
> 
> 		/* Add the output column list to the name-context before parsing the
> 
> 
> diff --git a/test/sql-tap/select5.test.lua b/test/sql-tap/select5.test.lua
> index 7ca25ae..db3f447 100755
> --- a/test/sql-tap/select5.test.lua
> +++ b/test/sql-tap/select5.test.lua
> @@ -1,6 +1,6 @@
> #!/usr/bin/env tarantool
> test = require("sqltester")
> -test:plan(32)
> +test:plan(38)
> 
> --!./tcltestrunner.lua
> -- 2001 September 15
> @@ -412,5 +412,71 @@ test:do_execsql_test(
>         -- </select5-8.8>
>     })
> 
> +--
> +-- gh-2364: Support HAVING without GROUP BY clause.
> +--
> +test:do_catchsql_test(
> +    "select6-1.1",
> +    [[
> +        CREATE TABLE te40 (s1 INT, s2 INT, PRIMARY KEY (s1,s2));
> +        INSERT INTO te40 VALUES (1,1);
> +        INSERT INTO te40 VALUES (2,2);
> +        SELECT s1 FROM te40 HAVING s1 = 1;
> +    ]], {
> +    -- <select6-1.1>
> +    1, "SQL error: HAVING expression must appear in the GROUP BY clause or be used in an aggregate function"
> +    -- <select6-1.1>
> +})
> +
> +test:do_catchsql_test(
> +    "select6-1.2",
> +    [[
> +        SELECT SUM(s1) FROM te40 HAVING s1 = 2;
> +    ]], {
> +    -- <select6-1.2>
> +    1, "SQL error: HAVING expression must appear in the GROUP BY clause or be used in an aggregate function"
> +    -- <select6-1.2>
> +})
> +
> +test:do_catchsql_test(
> +    "select6-1.3",
> +    [[
> +        SELECT s1 FROM te40 HAVING SUM(s1) = 2;
> +    ]], {
> +    -- <select6-1.3>
> +    1, "SQL error: HAVING expression must appear in the GROUP BY clause or be used in an aggregate function"
> +    -- <select6-1.3>
> +})
> +
> +test:do_execsql_test(
> +    "select6-1.4",
> +    [[
> +        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0;
> +    ]], {
> +    -- <select6-1.4>
> +    3
> +    -- <select6-1.4>
> +})
> +
> +test:do_execsql_test(
> +    "select6-2.1”,

Why 2.1 and not 1.5? The same for tests below.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: support HAVING without GROUP BY clause
  2018-09-03 13:23     ` n.pettik
@ 2018-09-10  7:51       ` Kirill Shcherbatov
  2018-09-10 21:43         ` n.pettik
  0 siblings, 1 reply; 9+ messages in thread
From: Kirill Shcherbatov @ 2018-09-10  7:51 UTC (permalink / raw)
  To: tarantool-patches, Nikita Pettik

> db2 => SELECT SUM(a), a from t1 having sum(a) > 0
> SQL0119N  An expression starting with "A" specified in a SELECT clause, HAVING 
> clause, or ORDER BY clause is not specified in the GROUP BY clause or it is in 
> a SELECT clause, HAVING clause, or ORDER BY clause with a column function and 
> no GROUP BY clause is specified.  SQLSTATE=42803
I am able to handle this for now. 

> I would re-phrase it like:
> 
> However, it is still possible to drop GROUP BY clause in case both SELECT
> and HAVING arguments are aggregate functions
Ok, tnx.

> Nit: HAVING is rather clause, than expression. Overall, this msg looks
> a little bit weird. In Postgres it is like:
> 
> column "t1.a" must appear in the GROUP BY clause or be used in an aggregate function
> 
> So, not HAVING clause must appear, but HAVING argument.
---> HAVING argument

> Why 2.1 and not 1.5? The same for tests below.
It was totall invalid naming. I've fixed it.

==============================================================

From fd960870f3519d335ee5dc61fbdd950d297fe79c Mon Sep 17 00:00:00 2001
Message-Id: <fd960870f3519d335ee5dc61fbdd950d297fe79c.1536565618.git.kshcherbatov@tarantool.org>
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
Date: Wed, 29 Aug 2018 16:24:09 +0300
Subject: [PATCH] sql: support HAVING without GROUP BY

Allowed to use HAVING without GROUP BY when both select list
and having clause have aggregate function.

Closes 2364.

@TarantoolBot document
Title: HAVING without GROUP BY clause
A query with a HAVING clause should also have a GROUP BY clause.
Another ability is to use aggregate function both in select list
and HAVING clause.
Example:
SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0;
---
 src/box/sql/resolve.c         | 45 ++++++++++++++++----
 src/box/sql/sqliteInt.h       |  3 +-
 test/sql-tap/count.test.lua   |  8 ++--
 test/sql-tap/select3.test.lua |  2 +-
 test/sql-tap/select5.test.lua | 98 ++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 141 insertions(+), 15 deletions(-)

diff --git a/src/box/sql/resolve.c b/src/box/sql/resolve.c
index 280ecd9..c38374d 100644
--- a/src/box/sql/resolve.c
+++ b/src/box/sql/resolve.c
@@ -600,6 +600,8 @@ resolveExprStep(Walker * pWalker, Expr * pExpr)
 		/* A lone identifier is the name of a column.
 		 */
 	case TK_ID:{
+			if ((pNC->ncFlags & NC_AllowAgg) != 0)
+				pNC->ncFlags |= NC_HasUnaggregatedId;
 			return lookupName(pParse, 0, pExpr->u.zToken, pNC,
 					  pExpr);
 		}
@@ -1283,13 +1285,19 @@ resolveSelectStep(Walker * pWalker, Select * p)
 		/* Set up the local name-context to pass to sqlite3ResolveExprNames() to
 		 * resolve the result-set expression list.
 		 */
+		bool all_select_agg = true;
 		sNC.ncFlags = NC_AllowAgg;
 		sNC.pSrcList = p->pSrc;
 		sNC.pNext = pOuterNC;
-
 		/* Resolve names in the result set. */
-		if (sqlite3ResolveExprListNames(&sNC, p->pEList))
-			return WRC_Abort;
+		for (i = 0; i < p->pEList->nExpr; i++) {
+			u16 has_agg_flag = sNC.ncFlags & NC_HasAgg;
+			sNC.ncFlags &= ~NC_HasAgg;
+			if (sqlite3ResolveExprNames(&sNC, p->pEList->a[i].pExpr))
+				return WRC_Abort;
+			all_select_agg &= (sNC.ncFlags & NC_HasAgg) != 0;
+			sNC.ncFlags |= has_agg_flag;
+		}
 
 		/* If there are no aggregate functions in the result-set, and no GROUP BY
 		 * expression, do not allow aggregates in any of the other expressions.
@@ -1304,12 +1312,33 @@ resolveSelectStep(Walker * pWalker, Select * p)
 			sNC.ncFlags &= ~NC_AllowAgg;
 		}
 
-		/* If a HAVING clause is present, then there must be a GROUP BY clause.
+		/*
+		 * If a HAVING clause is present, then there must
+		 * be a GROUP BY clause or aggregate function
+		 * should be specified.
 		 */
-		if (p->pHaving && !pGroupBy) {
-			sqlite3ErrorMsg(pParse,
-					"a GROUP BY clause is required before HAVING");
-			return WRC_Abort;
+		if (p->pHaving != NULL && pGroupBy == NULL) {
+			struct NameContext having_nc;
+			memset(&having_nc, 0, sizeof(having_nc));
+			having_nc.pParse = pParse;
+			having_nc.ncFlags = NC_AllowAgg;
+			having_nc.pSrcList = p->pSrc;
+			if (all_select_agg &&
+			    sqlite3ResolveExprNames(&having_nc,
+						    p->pHaving) != 0)
+				return WRC_Abort;
+			if ((having_nc.ncFlags & NC_HasAgg) == 0 ||
+			    (having_nc.ncFlags & NC_HasUnaggregatedId) != 0) {
+				const char *err_msg =
+					tt_sprintf("HAVING argument must "
+						   "appear in the GROUP BY "
+						   "clause or be used in an "
+						   "aggregate function");
+				diag_set(ClientError, ER_SQL, err_msg);
+				pParse->nErr++;
+				pParse->rc = SQL_TARANTOOL_ERROR;
+				return WRC_Abort;
+			}
 		}
 
 		/* Add the output column list to the name-context before parsing the
diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h
index abb6829..6c08fd1 100644
--- a/src/box/sql/sqliteInt.h
+++ b/src/box/sql/sqliteInt.h
@@ -2528,7 +2528,8 @@ struct NameContext {
 #define NC_IdxExpr   0x0020	/* True if resolving columns of CREATE INDEX */
 #define NC_VarSelect 0x0040	/* A correlated subquery has been seen */
 #define NC_MinMaxAgg 0x1000	/* min/max aggregates seen.  See note above */
-
+/** One or more identifiers seen without aggregate function. */
+#define NC_HasUnaggregatedId     0x2000
 /*
  * An instance of the following structure contains all information
  * needed to generate code for a single SELECT statement.
diff --git a/test/sql-tap/count.test.lua b/test/sql-tap/count.test.lua
index 45808de..700d047 100755
--- a/test/sql-tap/count.test.lua
+++ b/test/sql-tap/count.test.lua
@@ -172,15 +172,15 @@ test:do_test(
         return uses_op_count("SELECT count(*) FROM t2 WHERE a IS NOT NULL")
     end, 0)
 
-test:do_catchsql_test(
+test:do_execsql_test(
     "count-2.9",
     [[
         SELECT count(*) FROM t2 HAVING count(*)>1
-    ]], {
+    ]],
         -- <count-2.9>
-        1, "a GROUP BY clause is required before HAVING"
+        {}
         -- </count-2.9>
-    })
+    )
 
 test:do_test(
     "count-2.10",
diff --git a/test/sql-tap/select3.test.lua b/test/sql-tap/select3.test.lua
index d49bb87..c2e901e 100755
--- a/test/sql-tap/select3.test.lua
+++ b/test/sql-tap/select3.test.lua
@@ -200,7 +200,7 @@ test:do_catchsql_test("select3-3.1", [[
   SELECT log, count(*) FROM t1 HAVING log>=4
 ]], {
   -- <select3-3.1>
-  1, "a GROUP BY clause is required before HAVING"
+  1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
   -- </select3-3.1>
 })
 
diff --git a/test/sql-tap/select5.test.lua b/test/sql-tap/select5.test.lua
index 7ca25ae..d5bab1d 100755
--- a/test/sql-tap/select5.test.lua
+++ b/test/sql-tap/select5.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(32)
+test:plan(41)
 
 --!./tcltestrunner.lua
 -- 2001 September 15
@@ -412,5 +412,101 @@ test:do_execsql_test(
         -- </select5-8.8>
     })
 
+--
+-- gh-2364: Support HAVING without GROUP BY clause.
+--
+test:do_catchsql_test(
+    "select5-9.1",
+    [[
+        CREATE TABLE te40 (s1 INT, s2 INT, PRIMARY KEY (s1,s2));
+        INSERT INTO te40 VALUES (1,1);
+        INSERT INTO te40 VALUES (2,2);
+        SELECT s1 FROM te40 HAVING s1 = 1;
+    ]], {
+    -- <select5-9.1>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
+    -- <select5-9.1>
+})
+
+test:do_catchsql_test(
+    "select5-9.2",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING s1 = 2;
+    ]], {
+    -- <select5-9.2>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
+    -- <select5-9.2>
+})
+
+test:do_catchsql_test(
+    "select5-9.3",
+    [[
+        SELECT s1 FROM te40 HAVING SUM(s1) = 2;
+    ]], {
+    -- <select5-9.3>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
+    -- <select5-9.3>
+})
+
+test:do_execsql_test(
+    "select5-9.4",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.4>
+    3
+    -- <select5-9.4>
+})
+
+test:do_execsql_test(
+    "select5-9.5",
+    [[
+        SELECT MIN(s1) FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.5>
+    1
+    -- <select5-9.5>
+})
+
+test:do_execsql_test(
+    "select5-9.6",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) < 0;
+    ]],
+    -- <select5-9.6>
+    {}
+    -- <select5-9.6>
+)
+
+test:do_catchsql_test(
+    "select5-9.7",
+    [[
+        SELECT SUM(s1),s2 FROM te40 HAVING SUM(s1) > 0;
+    ]], {
+    -- <select5-9.7>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
+    -- <select5-9.7>
+})
+
+test:do_catchsql_test(
+    "select5-9.8",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0 and s2 > 0;
+    ]], {
+    -- <select5-9.8>
+    1, "SQL error: HAVING argument must appear in the GROUP BY clause or be used in an aggregate function"
+    -- <select5-9.8>
+})
+
+test:do_execsql_test(
+    "select5-9.9",
+    [[
+        SELECT SUM(s1) FROM te40 HAVING SUM(s1) > 0 and SUM(s2) > 0;
+    ]], {
+    -- <select5-9.9>
+    3
+    -- <select5-9.9>
+})
+
 test:finish_test()
 
-- 
2.7.4

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: support HAVING without GROUP BY clause
  2018-09-10  7:51       ` Kirill Shcherbatov
@ 2018-09-10 21:43         ` n.pettik
  2018-09-20 12:43           ` Kirill Shcherbatov
  0 siblings, 1 reply; 9+ messages in thread
From: n.pettik @ 2018-09-10 21:43 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Kirill Shcherbatov

Still, other DBs (as usual I checked - Postgres, Oracle and DB2) process this
query as well:

 select 1 from t1 having min(a) > 0;

In other words, not only aggregate can appear in SELECT args,
but also constant literals. Semantically, it seems to be correct.
Moreover, quieres like

select date() from t1 having min(a) > 0;

are valid too. What I mean is SELECT arguments must return
single value for all rows in table (i.e. be single-group).

From the first sight, this problem is likely to be minor, but I guess
we should implement correct behaviour as far as we are dealing
with this issue right now.

> diff --git a/src/box/sql/resolve.c b/src/box/sql/resolve.c
> index 280ecd9..c38374d 100644
> --- a/src/box/sql/resolve.c
> +++ b/src/box/sql/resolve.c
> @@ -600,6 +600,8 @@ resolveExprStep(Walker * pWalker, Expr * pExpr)
> 		/* A lone identifier is the name of a column.
> 		 */
> 	case TK_ID:{
> +			if ((pNC->ncFlags & NC_AllowAgg) != 0)
> +				pNC->ncFlags |= NC_HasUnaggregatedId;
> 			return lookupName(pParse, 0, pExpr->u.zToken, pNC,
> 					  pExpr);
> 		}
> @@ -1283,13 +1285,19 @@ resolveSelectStep(Walker * pWalker, Select * p)
> 		/* Set up the local name-context to pass to sqlite3ResolveExprNames() to
> 		 * resolve the result-set expression list.
> 		 */
> +		bool all_select_agg = true;

Nit: use ‘is_’ or ‘if_’ prefix for predicate variable.

> 		sNC.ncFlags = NC_AllowAgg;
> 		sNC.pSrcList = p->pSrc;
> 		sNC.pNext = pOuterNC;
> -
> 		/* Resolve names in the result set. */
> -		if (sqlite3ResolveExprListNames(sNC, p->pEList))
> -			return WRC_Abort;

Leave here explanation comment pls. Without any notion it is
quite hard to understand that this snippet helps to check
HAVING without GROUP BY queries.

> +		for (i = 0; i < p->pEList->nExpr; i++) {
> +			u16 has_agg_flag = sNC.ncFlags & NC_HasAgg;
> +			sNC.ncFlags &= ~NC_HasAgg;
> +			if (sqlite3ResolveExprNames(&sNC, p->pEList->a[i].pExpr))
> +				return WRC_Abort;
> +			all_select_agg &= (sNC.ncFlags & NC_HasAgg) != 0;
> +			sNC.ncFlags |= has_agg_flag;
> +		}

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: support HAVING without GROUP BY clause
  2018-09-10 21:43         ` n.pettik
@ 2018-09-20 12:43           ` Kirill Shcherbatov
  2018-10-01 16:37             ` n.pettik
  0 siblings, 1 reply; 9+ messages in thread
From: Kirill Shcherbatov @ 2018-09-20 12:43 UTC (permalink / raw)
  To: tarantool-patches, Nikita Pettik

On 11.09.2018 00:43, n.pettik wrote:
> Still, other DBs (as usual I checked - Postgres, Oracle and DB2) process this
> query as well:
> 
>  select 1 from t1 having min(a) > 0;
> 
> In other words, not only aggregate can appear in SELECT args,
> but also constant literals. Semantically, it seems to be correct.
> Moreover, quieres like
> 
> select date() from t1 having min(a) > 0;
> 
> are valid too. What I mean is SELECT arguments must return
> single value for all rows in table (i.e. be single-group).
> 
> From the first sight, this problem is likely to be minor, but I guess
> we should implement correct behaviour as far as we are dealing
> with this issue right now.
This cases are totally useless, not so trivial to implement (need to calculate
expression in VDBE, e.g.
SELECT (SELECT s1 FROM te40 LIMIT 1) FROM te40 HAVING min(s1) > 0
is a valid construction for Postgress.
Moreover, current checks are the part of resolveSelectStep that 

Asked Peter, does it really important.

> Nit: use ‘is_’ or ‘if_’ prefix for predicate variable.
Ok.

> Leave here explanation comment pls. Without any notion it is
> quite hard to understand that this snippet helps to check
> HAVING without GROUP BY queries.
Ok.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [tarantool-patches] Re: [PATCH v1 1/1] sql: support HAVING without GROUP BY clause
  2018-09-20 12:43           ` Kirill Shcherbatov
@ 2018-10-01 16:37             ` n.pettik
  0 siblings, 0 replies; 9+ messages in thread
From: n.pettik @ 2018-10-01 16:37 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Kirill Shcherbatov


>> Still, other DBs (as usual I checked - Postgres, Oracle and DB2) process this
>> query as well:
>> 
>> select 1 from t1 having min(a) > 0;
>> 
>> In other words, not only aggregate can appear in SELECT args,
>> but also constant literals. Semantically, it seems to be correct.
>> Moreover, quieres like
>> 
>> select date() from t1 having min(a) > 0;
>> 
>> are valid too. What I mean is SELECT arguments must return
>> single value for all rows in table (i.e. be single-group).
>> 
>> From the first sight, this problem is likely to be minor, but I guess
>> we should implement correct behaviour as far as we are dealing
>> with this issue right now.
> This cases are totally useless, not so trivial to implement (need to calculate
> expression in VDBE, e.g.
> SELECT (SELECT s1 FROM te40 LIMIT 1) FROM te40 HAVING min(s1) > 0
> is a valid construction for Postgress.
> Moreover, current checks are the part of resolveSelectStep that 
> 
> Asked Peter, does it really important.

I would like to notice that we’ve decided to allow literals and constant functions also
appear in select arg list.

From new diff:

diff --git a/src/box/sql/resolve.c b/src/box/sql/resolve.c
index 9a2d6ff4e..7c0d9b44e 100644
--- a/src/box/sql/resolve.c
+++ b/src/box/sql/resolve.c
@@ -602,6 +602,8 @@ resolveExprStep(Walker * pWalker, Expr * pExpr)
                /* A lone identifier is the name of a column.
                 */
        case TK_ID:{
+                       if ((pNC->ncFlags & NC_AllowAgg) != 0)
+                               pNC->ncFlags |= NC_HasUnaggregatedId;
                        return lookupName(pParse, 0, pExpr->u.zToken, pNC,
                                          pExpr);
                }
@@ -1180,6 +1182,44 @@ resolveOrderGroupBy(NameContext * pNC,   /* The name context of the SELECT stateme
        return sqlite3ResolveOrderGroupBy(pParse, pSelect, pOrderBy, zType);
 }
 
+/**
+ * Test if specified expression is a regular literal.
+ * @param expr Expression to analyse.
+ * @retval true  If expression is literal.
+ * @retval false Otherwise.
+ */
+static bool
+sql_expr_is_literal(struct Expr *expr)
+{
+       return expr->op == TK_INTEGER || expr->op == TK_FLOAT ||
+              expr->op == TK_BLOB || expr->op == TK_STRING;
+}

What about TK_NULL? What about unary minus?
This examples now fail:

tarantool> SELECT -1 FROM te40 HAVING SUM(s1) > 0;
---
- error: 'SQL error: HAVING argument must appear in the GROUP BY clause or be used
    in an aggregate function’
…

tarantool> SELECT  NULL FROM te40 HAVING SUM(s1) > 0;
---
- error: 'SQL error: HAVING argument must appear in the GROUP BY clause or be used
    in an aggregate function'
...

+
+/**
+ * Test if specified expression is a constant function.
+ * @param parser Parsing context.
+ * @param expr Expression to analyse.
+ * @retval true  If expression is a existent constant function.
+ * @retval false Otherwise.
+ */
+static bool
+sql_expr_is_constat_func(struct Parse *parser, struct Expr *expr)
+{
+       if (expr->op != TK_FUNCTION)
+               return false;
+       char *func_name = expr->u.zToken;
+       struct ExprList *args_list = expr->x.pList;
+       int args_count = args_list != NULL ? args_list->nExpr : 0;
+       struct FuncDef *func =
+               sqlite3FindFunction(parser->db, func_name, args_count, 0);
+       if (func == NULL)
+               func =  sqlite3FindFunction(parser->db, func_name, -2, 0);
+       if (func == NULL)
+               return false;
+       return func != NULL ?
+              (func->funcFlags & SQLITE_FUNC_CONSTANT) != 0 : false;
+}
+

Consider refactoring: at last return ‘func’ can’t be NULL - this check is performed one
line above.

@@ -1203,7 +1204,7 @@ sql_expr_is_literal(struct Expr *expr)
  * @retval false Otherwise.
  */
 static bool
-sql_expr_is_constat_func(struct Parse *parser, struct Expr *expr)
+sql_expr_is_constant_func(struct Parse *parser, struct Expr *expr)
 {
        if (expr->op != TK_FUNCTION)
                return false;
@@ -1212,12 +1213,17 @@ sql_expr_is_constat_func(struct Parse *parser, struct Expr *expr)
        int args_count = args_list != NULL ? args_list->nExpr : 0;
        struct FuncDef *func =
                sqlite3FindFunction(parser->db, func_name, args_count, 0);
-       if (func == NULL)
-               func =  sqlite3FindFunction(parser->db, func_name, -2, 0);
-       if (func == NULL)
-               return false;
-       return func != NULL ?
-              (func->funcFlags & SQLITE_FUNC_CONSTANT) != 0 : false;
+       if (func == NULL) {
+               /*
+                * If we fail to find function with exact number
+                * of arguments, lets try to search similar
+                * function but with different number of args.
+                */
+               func = sqlite3FindFunction(parser->db, func_name, -2, 0);
+               if (func == NULL)
+                       return false;
+       }
+       return (func->funcFlags & SQLITE_FUNC_CONSTANT) != 0;
 }
 
 /*
@@ -1338,8 +1344,8 @@ resolveSelectStep(Walker * pWalker, Select * p)
                                return WRC_Abort;
                        is_all_select_agg &= (sNC.ncFlags & NC_HasAgg) != 0 ||
                                             sql_expr_is_literal(expr) ||
-                                            sql_expr_is_constat_func(pParse,
-                                                                     expr);
+                                            sql_expr_is_constant_func(pParse,
+                                                                      expr);

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-10-01 16:38 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-28 16:39 [tarantool-patches] [PATCH v1 1/1] sql: support HAVING without GROUP BY clause Kirill Shcherbatov
2018-08-29 11:52 ` [tarantool-patches] " n.pettik
2018-08-29 13:29   ` Kirill Shcherbatov
2018-09-03 11:24     ` n.pettik
2018-09-03 13:23     ` n.pettik
2018-09-10  7:51       ` Kirill Shcherbatov
2018-09-10 21:43         ` n.pettik
2018-09-20 12:43           ` Kirill Shcherbatov
2018-10-01 16:37             ` n.pettik

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox