Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH v2 0/4] Introduce custom aggregate function
@ 2022-02-01 13:37 Mergen Imeev via Tarantool-patches
  2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 1/4] sql: fix COUNT() optimization conditions Mergen Imeev via Tarantool-patches
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Mergen Imeev via Tarantool-patches @ 2022-02-01 13:37 UTC (permalink / raw)
  To: v.shpilevoy; +Cc: tarantool-patches

This patch-set introduces user-defined aggregate functions to SQL.

Changes in v2:
 - Patch-set was reworked due to new design.

https://github.com/tarantool/tarantool/issues/2579
https://github.com/tarantool/tarantool/tree/imeevma/gh-2579-custom-aggregate-functions

Mergen Imeev (4):
  sql: fix COUNT() optimization conditions
  sql: drop unnecessary P2 register for OP_AggFinal
  sql: introduce custom aggregate functions
  sql: introduce FINALIZE for custom aggregate

 .../gh-2579-introduce-custom-aggregates.md    |   3 +
 src/box/alter.cc                              |  13 +
 src/box/lua/schema.lua                        |   2 +-
 src/box/sql/expr.c                            |  17 +-
 src/box/sql/func.c                            |  27 ++-
 src/box/sql/resolve.c                         |  12 +
 src/box/sql/select.c                          |  42 +++-
 src/box/sql/sqlInt.h                          |   6 +-
 src/box/sql/vdbe.c                            |  17 +-
 test/sql-tap/CMakeLists.txt                   |   2 +
 test/sql-tap/gh-2579-custom-aggregate.c       |  28 +++
 .../sql-tap/gh-2579-custom-aggregate.test.lua | 224 ++++++++++++++++++
 12 files changed, 356 insertions(+), 37 deletions(-)
 create mode 100644 changelogs/unreleased/gh-2579-introduce-custom-aggregates.md
 create mode 100644 test/sql-tap/gh-2579-custom-aggregate.c
 create mode 100755 test/sql-tap/gh-2579-custom-aggregate.test.lua

-- 
2.25.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH v2 1/4] sql: fix COUNT() optimization conditions
  2022-02-01 13:37 [Tarantool-patches] [PATCH v2 0/4] Introduce custom aggregate function Mergen Imeev via Tarantool-patches
@ 2022-02-01 13:37 ` Mergen Imeev via Tarantool-patches
  2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 2/4] sql: drop unnecessary P2 register for OP_AggFinal Mergen Imeev via Tarantool-patches
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: Mergen Imeev via Tarantool-patches @ 2022-02-01 13:37 UTC (permalink / raw)
  To: v.shpilevoy; +Cc: tarantool-patches

This patch fixes the conditions under which COUNT() is optimized. At
some point, the conditions were broken, but since there was no other
aggregate function requiring zero arguments, this problem did not change
the behavior.

Needed for #2579
---
 src/box/sql/select.c | 2 +-
 src/box/sql/sqlInt.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index 55aaff87f..b532cac4e 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -4650,7 +4650,7 @@ is_simple_count(struct Select *select, struct AggInfo *agg_info)
 		return NULL;
 	assert(agg_info->aFunc->func->def->language ==
 	       FUNC_LANGUAGE_SQL_BUILTIN);
-	if (sql_func_flag_is_set(agg_info->aFunc->func, SQL_FUNC_COUNT) ||
+	if (strcmp(agg_info->aFunc->func->def->name, "COUNT") != 0 ||
 	    (agg_info->aFunc->pExpr->x.pList != NULL &&
 	     agg_info->aFunc->pExpr->x.pList->nExpr > 0))
 		return NULL;
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 0db16b293..f49522dc8 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -1117,8 +1117,6 @@ struct type_def {
 					 */
 #define SQL_FUNC_LENGTH   0x0040	/* Built-in length() function */
 #define SQL_FUNC_TYPEOF   0x0080	/* Built-in typeof() function */
-/** Built-in count() function. */
-#define SQL_FUNC_COUNT    0x0100
 #define SQL_FUNC_COALESCE 0x0200	/* Built-in coalesce() or ifnull() */
 #define SQL_FUNC_UNLIKELY 0x0400	/* Built-in unlikely() function */
 /** Built-in min() or least() function. */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH v2 2/4] sql: drop unnecessary P2 register for OP_AggFinal
  2022-02-01 13:37 [Tarantool-patches] [PATCH v2 0/4] Introduce custom aggregate function Mergen Imeev via Tarantool-patches
  2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 1/4] sql: fix COUNT() optimization conditions Mergen Imeev via Tarantool-patches
@ 2022-02-01 13:37 ` Mergen Imeev via Tarantool-patches
  2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 3/4] sql: introduce custom aggregate functions Mergen Imeev via Tarantool-patches
  2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 4/4] sql: introduce FINALIZE for custom aggregate Mergen Imeev via Tarantool-patches
  3 siblings, 0 replies; 9+ messages in thread
From: Mergen Imeev via Tarantool-patches @ 2022-02-01 13:37 UTC (permalink / raw)
  To: v.shpilevoy; +Cc: tarantool-patches

---
 src/box/sql/select.c |  4 +---
 src/box/sql/vdbe.c   | 17 +++++------------
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index b532cac4e..6159a9670 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -5573,10 +5573,8 @@ finalizeAggFunctions(Parse * pParse, AggInfo * pAggInfo)
 	int i;
 	struct AggInfo_func *pF;
 	for (i = 0, pF = pAggInfo->aFunc; i < pAggInfo->nFunc; i++, pF++) {
-		ExprList *pList = pF->pExpr->x.pList;
 		assert(!ExprHasProperty(pF->pExpr, EP_xIsSelect));
-		sqlVdbeAddOp2(v, OP_AggFinal, pF->iMem,
-				  pList ? pList->nExpr : 0);
+		sqlVdbeAddOp1(v, OP_AggFinal, pF->iMem);
 		sqlVdbeAppendP4(v, pF->func, P4_FUNC);
 	}
 }
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index 24cb28260..c095bb833 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -4190,18 +4190,11 @@ case OP_AggStep: {
 	break;
 }
 
-/* Opcode: AggFinal P1 P2 * P4 *
- * Synopsis: accum=r[P1] N=P2
- *
- * Execute the finalizer function for an aggregate.  P1 is
- * the memory location that is the accumulator for the aggregate.
- *
- * P2 is the number of arguments that the step function takes and
- * P4 is a pointer to the FuncDef for this function.  The P2
- * argument is not used by this opcode.  It is only there to disambiguate
- * functions that can take varying numbers of arguments.  The
- * P4 argument is only needed for the degenerate case where
- * the step function was not previously called.
+/* Opcode: AggFinal P1 * * P4 *
+ * Synopsis: accum=r[P1]
+ *
+ * Execute the finalizer function for an aggregate. P1 is the memory location
+ * that is the accumulator for the aggregate. P4 is a pointer to the function.
  */
 case OP_AggFinal: {
 	assert(pOp->p1>0 && pOp->p1<=(p->nMem+1 - p->nCursor));
-- 
2.25.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH v2 3/4] sql: introduce custom aggregate functions
  2022-02-01 13:37 [Tarantool-patches] [PATCH v2 0/4] Introduce custom aggregate function Mergen Imeev via Tarantool-patches
  2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 1/4] sql: fix COUNT() optimization conditions Mergen Imeev via Tarantool-patches
  2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 2/4] sql: drop unnecessary P2 register for OP_AggFinal Mergen Imeev via Tarantool-patches
@ 2022-02-01 13:37 ` Mergen Imeev via Tarantool-patches
  2022-02-03 23:29   ` Vladislav Shpilevoy via Tarantool-patches
  2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 4/4] sql: introduce FINALIZE for custom aggregate Mergen Imeev via Tarantool-patches
  3 siblings, 1 reply; 9+ messages in thread
From: Mergen Imeev via Tarantool-patches @ 2022-02-01 13:37 UTC (permalink / raw)
  To: v.shpilevoy; +Cc: tarantool-patches

This patch introduces user-defined aggregate functions in SQL.

Part of #2579
---
 src/box/alter.cc                              |  13 ++
 src/box/lua/schema.lua                        |   2 +-
 src/box/sql/expr.c                            |  17 ++-
 src/box/sql/func.c                            |  14 ++-
 src/box/sql/select.c                          |  31 +++--
 test/sql-tap/CMakeLists.txt                   |   2 +
 test/sql-tap/gh-2579-custom-aggregate.c       |  28 +++++
 .../sql-tap/gh-2579-custom-aggregate.test.lua | 113 ++++++++++++++++++
 8 files changed, 200 insertions(+), 20 deletions(-)
 create mode 100644 test/sql-tap/gh-2579-custom-aggregate.c
 create mode 100755 test/sql-tap/gh-2579-custom-aggregate.test.lua

diff --git a/src/box/alter.cc b/src/box/alter.cc
index 65c1cb952..b85d279e3 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -3460,6 +3460,13 @@ func_def_new_from_tuple(struct tuple *tuple)
 				  def->name, "invalid aggregate value");
 			return NULL;
 		}
+		if (def->aggregate == FUNC_AGGREGATE_GROUP &&
+		    def->exports.lua) {
+			diag_set(ClientError, ER_CREATE_FUNCTION, def->name,
+				 "aggregate function can only be accessed in "
+				 "SQL");
+			return NULL;
+		}
 		const char *param_list = tuple_field_with_type(tuple,
 			BOX_FUNC_FIELD_PARAM_LIST, MP_ARRAY);
 		if (param_list == NULL)
@@ -3482,6 +3489,12 @@ func_def_new_from_tuple(struct tuple *tuple)
 				return NULL;
 			}
 		}
+		if (def->aggregate == FUNC_AGGREGATE_GROUP && argc == 0) {
+			diag_set(ClientError, ER_CREATE_FUNCTION, def->name,
+				 "aggregate function must have at least one "
+				 "argument");
+			return NULL;
+		}
 		def->param_count = argc;
 		const char *opts = tuple_field(tuple, BOX_FUNC_FIELD_OPTS);
 		if (opts_decode(&def->opts, func_opts_reg, &opts,
diff --git a/src/box/lua/schema.lua b/src/box/lua/schema.lua
index 2c02949c5..23d6d0f64 100644
--- a/src/box/lua/schema.lua
+++ b/src/box/lua/schema.lua
@@ -2603,7 +2603,7 @@ box.schema.func.create = function(name, opts)
                               language = 'string', body = 'string',
                               is_deterministic = 'boolean',
                               is_sandboxed = 'boolean',
-                              is_multikey = 'boolean',
+                              is_multikey = 'boolean', aggregate = 'string',
                               takes_raw_args = 'boolean',
                               comment = 'string',
                               param_list = 'table', returns = 'string',
diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index eb169aeb8..920ad9d08 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -5469,6 +5469,17 @@ analyzeAggregate(Walker * pWalker, Expr * pExpr)
 						       (pExpr, EP_xIsSelect));
 						pItem = &pAggInfo->aFunc[i];
 						pItem->pExpr = pExpr;
+						int n = pExpr->x.pList == NULL ?
+							0 : pExpr->x.pList->nExpr;
+						/*
+						 * Allocate n MEMs for arguments
+						 * and one more MEM for
+						 * accumulator. This makes it
+						 * easier to pass these n + 1
+						 * MEMs to the user-defined
+						 * aggregate function.
+						 */
+						pParse->nMem += n;
 						pItem->iMem = ++pParse->nMem;
 						assert(!ExprHasProperty
 						       (pExpr, EP_IntValue));
@@ -5479,12 +5490,6 @@ analyzeAggregate(Walker * pWalker, Expr * pExpr)
 								true;
 							return WRC_Abort;
 						}
-						assert(pItem->func->def->
-						       language ==
-						       FUNC_LANGUAGE_SQL_BUILTIN &&
-						       pItem->func->def->
-						       aggregate ==
-						       FUNC_AGGREGATE_GROUP);
 						if (pExpr->flags & EP_Distinct) {
 							pItem->iDistinct =
 								pParse->nTab++;
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index b69bf7fd6..cda872194 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -2060,9 +2060,12 @@ sql_func_find(struct Expr *expr)
 		return NULL;
 	}
 	int n = expr->x.pList != NULL ? expr->x.pList->nExpr : 0;
-	if (func->def->param_count != n) {
+	int argc = func->def->aggregate == FUNC_AGGREGATE_GROUP ?
+		   func->def->param_count - 1 : func->def->param_count;
+	assert(argc >= 0);
+	if (argc != n) {
 		diag_set(ClientError, ER_FUNC_WRONG_ARG_COUNT, name,
-			 tt_sprintf("%d", func->def->param_count), n);
+			 tt_sprintf("%d", argc), n);
 		return NULL;
 	}
 	return func;
@@ -2072,9 +2075,12 @@ uint32_t
 sql_func_flags(const char *name)
 {
 	struct sql_func_dictionary *dict = built_in_func_get(name);
-	if (dict == NULL)
+	if (dict != NULL)
+		return dict->flags;
+	struct func *func = func_by_name(name, strlen(name));
+	if (func == NULL || func->def->aggregate != FUNC_AGGREGATE_GROUP)
 		return 0;
-	return dict->flags;
+	return SQL_FUNC_AGG;
 }
 
 static struct func_vtab func_sql_builtin_vtab;
diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index 6159a9670..dcea48c6e 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -4648,8 +4648,6 @@ is_simple_count(struct Select *select, struct AggInfo *agg_info)
 		return NULL;
 	if (NEVER(agg_info->nFunc == 0))
 		return NULL;
-	assert(agg_info->aFunc->func->def->language ==
-	       FUNC_LANGUAGE_SQL_BUILTIN);
 	if (strcmp(agg_info->aFunc->func->def->name, "COUNT") != 0 ||
 	    (agg_info->aFunc->pExpr->x.pList != NULL &&
 	     agg_info->aFunc->pExpr->x.pList->nExpr > 0))
@@ -5562,6 +5560,15 @@ resetAccumulator(Parse * pParse, AggInfo * pAggInfo)
 	}
 }
 
+static inline void
+finalize_agg_function(struct Vdbe *vdbe, const struct AggInfo_func *agg_func)
+{
+	if (agg_func->func->def->language == FUNC_LANGUAGE_SQL_BUILTIN) {
+		sqlVdbeAddOp1(vdbe, OP_AggFinal, agg_func->iMem);
+		sqlVdbeAppendP4(vdbe, agg_func->func, P4_FUNC);
+	}
+}
+
 /*
  * Invoke the OP_AggFinalize opcode for every aggregate function
  * in the AggInfo structure.
@@ -5574,8 +5581,7 @@ finalizeAggFunctions(Parse * pParse, AggInfo * pAggInfo)
 	struct AggInfo_func *pF;
 	for (i = 0, pF = pAggInfo->aFunc; i < pAggInfo->nFunc; i++, pF++) {
 		assert(!ExprHasProperty(pF->pExpr, EP_xIsSelect));
-		sqlVdbeAddOp1(v, OP_AggFinal, pF->iMem);
-		sqlVdbeAppendP4(v, pF->func, P4_FUNC);
+		finalize_agg_function(v, pF);
 	}
 }
 
@@ -5602,7 +5608,7 @@ updateAccumulator(Parse * pParse, AggInfo * pAggInfo)
 		assert(!ExprHasProperty(pF->pExpr, EP_xIsSelect));
 		if (pList) {
 			nArg = pList->nExpr;
-			regAgg = sqlGetTempRange(pParse, nArg);
+			regAgg = pF->iMem - nArg;
 			sqlExprCodeExprList(pParse, pList, regAgg, 0,
 						SQL_ECEL_DUP);
 		} else {
@@ -5642,10 +5648,17 @@ updateAccumulator(Parse * pParse, AggInfo * pAggInfo)
 			pParse->is_aborted = true;
 			return;
 		}
-		sqlVdbeAddOp3(v, OP_AggStep, nArg, regAgg, pF->iMem);
-		sqlVdbeAppendP4(v, ctx, P4_FUNCCTX);
-		sql_expr_type_cache_change(pParse, regAgg, nArg);
-		sqlReleaseTempRange(pParse, regAgg, nArg);
+		if (pF->func->def->language == FUNC_LANGUAGE_SQL_BUILTIN) {
+			sqlVdbeAddOp3(v, OP_AggStep, nArg, regAgg, pF->iMem);
+			sqlVdbeAppendP4(v, ctx, P4_FUNCCTX);
+		} else {
+			const char *name = pF->func->def->name;
+			uint32_t len = pF->func->def->name_len;
+			const char *str = sqlDbStrNDup(pParse->db, name, len);
+			assert(regAgg == pF->iMem - nArg);
+			sqlVdbeAddOp4(v, OP_FunctionByName, nArg + 1, regAgg,
+				      pF->iMem, str, P4_DYNAMIC);
+		}
 		if (addrNext) {
 			sqlVdbeResolveLabel(v, addrNext);
 			sqlExprCacheClear(pParse);
diff --git a/test/sql-tap/CMakeLists.txt b/test/sql-tap/CMakeLists.txt
index c4ec1214a..136a517d4 100644
--- a/test/sql-tap/CMakeLists.txt
+++ b/test/sql-tap/CMakeLists.txt
@@ -3,6 +3,8 @@ build_module(gh-5938-wrong-string-length gh-5938-wrong-string-length.c)
 target_link_libraries(gh-5938-wrong-string-length msgpuck)
 build_module(gh-6024-funcs-return-bin gh-6024-funcs-return-bin.c)
 target_link_libraries(gh-6024-funcs-return-bin msgpuck)
+build_module(gh-2579-custom-aggregate gh-2579-custom-aggregate.c)
+target_link_libraries(gh-2579-custom-aggregate msgpuck)
 build_module(sql_uuid sql_uuid.c)
 target_link_libraries(sql_uuid msgpuck core)
 build_module(decimal decimal.c)
diff --git a/test/sql-tap/gh-2579-custom-aggregate.c b/test/sql-tap/gh-2579-custom-aggregate.c
new file mode 100644
index 000000000..f7d8a70a4
--- /dev/null
+++ b/test/sql-tap/gh-2579-custom-aggregate.c
@@ -0,0 +1,28 @@
+#include "msgpuck.h"
+#include "module.h"
+
+enum {
+	BUF_SIZE = 512,
+};
+
+int
+f3(box_function_ctx_t *ctx, const char *args, const char *args_end)
+{
+	(void)args_end;
+	uint32_t arg_count = mp_decode_array(&args);
+	if (arg_count != 2) {
+		return box_error_set(__FILE__, __LINE__, ER_PROC_C,
+				     "invalid argument count");
+	}
+	int num = mp_decode_uint(&args);
+	int sum = 0;
+	if (mp_typeof(*args) != MP_UINT)
+		mp_decode_nil(&args);
+	else
+		sum = mp_decode_uint(&args);
+	sum += num * num;
+	char res[BUF_SIZE];
+	char *end = mp_encode_uint(res, sum);
+	box_return_mp(ctx, res, end);
+	return 0;
+}
diff --git a/test/sql-tap/gh-2579-custom-aggregate.test.lua b/test/sql-tap/gh-2579-custom-aggregate.test.lua
new file mode 100755
index 000000000..213e2e870
--- /dev/null
+++ b/test/sql-tap/gh-2579-custom-aggregate.test.lua
@@ -0,0 +1,113 @@
+#!/usr/bin/env tarantool
+local build_path = os.getenv("BUILDDIR")
+package.cpath = build_path..'/test/sql-tap/?.so;'..build_path..'/test/sql-tap/?.dylib;'..package.cpath
+
+local test = require("sqltester")
+test:plan(5)
+
+test:execsql([[
+        CREATE TABLE t (i INT PRIMARY KEY);
+        INSERT INTO t VALUES(1), (2), (3), (4), (5);
+    ]])
+
+-- Make sure that persistent aggregate functions work as intended.
+box.schema.func.create("F1", {
+    language = "Lua",
+    body = [[
+        function(x, state)
+            if state == nil then
+                state = {sum = 0, count = 0}
+            end
+            state.sum = state.sum + x
+            state.count = state.count + 1
+            return state
+        end
+    ]],
+    param_list = {"integer", "map"},
+    returns = "map",
+    aggregate = "group",
+    exports = {"SQL"},
+})
+
+test:do_execsql_test(
+    "gh-2579-1",
+    [[
+        SELECT f1(i) from t;
+    ]], {
+        {sum = 15, count = 5}
+    })
+
+-- Make sure that non-persistent aggregate functions work as intended.
+local f2 = function(x, state)
+    if state == nil then
+        state = {}
+    end
+    table.insert(state, x)
+    return state
+end
+
+rawset(_G, 'F2', f2)
+
+box.schema.func.create("F2", {
+    language = "Lua",
+    param_list = {"integer", "array"},
+    returns = "array",
+    aggregate = "group",
+    exports = {"SQL"},
+})
+
+test:do_execsql_test(
+    "gh-2579-2",
+    [[
+        SELECT f2(i) from t;
+    ]], {
+        {1, 2, 3, 4, 5}
+    })
+
+-- Make sure that C aggregate functions work as intended.
+box.schema.func.create("gh-2579-custom-aggregate.f3", {
+    language = "C",
+    param_list = {"integer", "integer"},
+    returns = "integer",
+    aggregate = "group",
+    exports = {"SQL"},
+})
+
+test:do_execsql_test(
+    "gh-2579-3",
+    [[
+        SELECT "gh-2579-custom-aggregate.f3"(i) from t;
+    ]], {
+        55
+    })
+
+-- Make sure aggregate functions can't be called in Lua.
+test:do_test(
+    "gh-2579-4",
+    function()
+        local def = {aggregate = 'group', exports = {'LUA', 'SQL'}}
+        local res = {pcall(box.schema.func.create, 'F4', def)}
+        return {tostring(res[2])}
+    end, {
+        "Failed to create function 'F4': aggregate function can only be "..
+        "accessed in SQL"
+    })
+
+-- Make sure aggregate functions can't have less that 1 argument.
+test:do_test(
+    "gh-2579-5",
+    function()
+        local def = {aggregate = 'group', exports = {'SQL'}}
+        local res = {pcall(box.schema.func.create, 'F4', def)}
+        return {tostring(res[2])}
+    end, {
+        "Failed to create function 'F4': aggregate function must have at "..
+        "least one argument"
+    })
+
+box.schema.func.drop('gh-2579-custom-aggregate.f3')
+box.schema.func.drop('F2')
+box.schema.func.drop('F1')
+test:execsql([[DROP TABLE t;]])
+
+test:finish_test()
-- 
2.25.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH v2 4/4] sql: introduce FINALIZE for custom aggregate
  2022-02-01 13:37 [Tarantool-patches] [PATCH v2 0/4] Introduce custom aggregate function Mergen Imeev via Tarantool-patches
                   ` (2 preceding siblings ...)
  2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 3/4] sql: introduce custom aggregate functions Mergen Imeev via Tarantool-patches
@ 2022-02-01 13:37 ` Mergen Imeev via Tarantool-patches
  2022-02-03 23:30   ` Vladislav Shpilevoy via Tarantool-patches
  3 siblings, 1 reply; 9+ messages in thread
From: Mergen Imeev via Tarantool-patches @ 2022-02-01 13:37 UTC (permalink / raw)
  To: v.shpilevoy; +Cc: tarantool-patches

This patch introduces a way to create a FINALIZER for user-defined
aggregate functions.

Closes #2579

@TarantoolBot document
Title: User-defined aggregate functions

User-defined aggregate functions are now available. To create a
user-defined aggregate function, the 'aggregate' option should be set to
'group'.  The function must have at least one argument. The last
argument is always the result of the function in the previous step, or
NULL if it is the first step. The last argument will be added
automatically.

Example:
```
box.schema.func.create("F1", {language = "Lua",
    body = [[
        function(x, state)
            if state == nil then
                state = {sum = 0, count = 0}
            end
            state.sum = state.sum + x
            state.count = state.count + 1
            return state
        end
    ]],
    param_list = {"integer", "map"},
    returns = "map",
    aggregate = "group",
    exports = {"SQL"},
})

box.execute('CREATE TABLE t (i INT PRIMARY KEY);')
box.execute('INSERT INTO t VALUES(1), (2), (3), (4), (5);')
box.execute('SELECT f1(i) FROM t;')
```

The result will be:
```
tarantool> box.execute([[select f1(i) from t;]])
---
- metadata:
  - name: COLUMN_1
    type: map
  rows:
  - [{'sum': 15, 'count': 5}]
...
```

To create a finalizer for a function, another function should be
created. This function should follow the following rules:
1) its name should be '<function name>_finalize';
2) it must take exactly one argument;
3) it must be a non-aggregate function.

If an aggregate function has a finalizer, the result of the aggregate
function will be determined by the finalizer.

Example:
```
box.schema.func.create("F1_finalize", {
    language = "Lua",
    body = [[
        function(state)
            if state == nil then
                return 0
            end
            return state.sum / state.count
        end
    ]],
    param_list = {"map"},
    returns = "number",
    exports = {'LUA', 'SQL'},
})
box.execute('SELECT f1(i) FROM t;')
```

The result will be:
```
tarantool> box.execute([[select f1(i) from t;]])
---
- metadata:
  - name: COLUMN_1
    type: number
  rows:
  - [3]
...
```
---
 .../gh-2579-introduce-custom-aggregates.md    |   3 +
 src/box/sql/func.c                            |  13 ++
 src/box/sql/resolve.c                         |  12 ++
 src/box/sql/select.c                          |   7 ++
 src/box/sql/sqlInt.h                          |   4 +
 .../sql-tap/gh-2579-custom-aggregate.test.lua | 113 +++++++++++++++++-
 6 files changed, 151 insertions(+), 1 deletion(-)
 create mode 100644 changelogs/unreleased/gh-2579-introduce-custom-aggregates.md

diff --git a/changelogs/unreleased/gh-2579-introduce-custom-aggregates.md b/changelogs/unreleased/gh-2579-introduce-custom-aggregates.md
new file mode 100644
index 000000000..7975f9f08
--- /dev/null
+++ b/changelogs/unreleased/gh-2579-introduce-custom-aggregates.md
@@ -0,0 +1,3 @@
+## feature/sql
+
+* user-defined aggregate functions are now available in SQL (gh-2579).
diff --git a/src/box/sql/func.c b/src/box/sql/func.c
index cda872194..a445dbc3a 100644
--- a/src/box/sql/func.c
+++ b/src/box/sql/func.c
@@ -2071,6 +2071,19 @@ sql_func_find(struct Expr *expr)
 	return func;
 }
 
+struct func *
+sql_func_finalize(const char *name)
+{
+	const char *finalize_name = tt_sprintf("%s_finalize", name);
+	uint32_t len = strlen(finalize_name);
+	struct func *finalize = func_by_name(finalize_name, len);
+	if (finalize == NULL ||
+	    finalize->def->param_count != 1 ||
+	    finalize->def->aggregate == FUNC_AGGREGATE_GROUP)
+		return NULL;
+	return finalize;
+}
+
 uint32_t
 sql_func_flags(const char *name)
 {
diff --git a/src/box/sql/resolve.c b/src/box/sql/resolve.c
index 22b4e6799..095e6d6a8 100644
--- a/src/box/sql/resolve.c
+++ b/src/box/sql/resolve.c
@@ -663,6 +663,18 @@ resolveExprStep(Walker * pWalker, Expr * pExpr)
 				return WRC_Abort;
 			}
 			pExpr->type = func->def->returns;
+			/*
+			 * In case a user-defined aggregate function was called,
+			 * the result type will be the result type of the
+			 * FINALIZE part of the function.
+			 */
+			if (func->def->language != FUNC_LANGUAGE_SQL_BUILTIN &&
+			    func->def->aggregate == FUNC_AGGREGATE_GROUP) {
+				const char *name = pExpr->u.zToken;
+				struct func *finalize = sql_func_finalize(name);
+				if (finalize != NULL)
+					pExpr->type = finalize->def->returns;
+			}
 			assert(!func->def->is_deterministic ||
 			       (pNC->ncFlags & NC_IdxExpr) == 0);
 			if (func->def->is_deterministic)
diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index dcea48c6e..a3fe2f60f 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -5566,7 +5566,14 @@ finalize_agg_function(struct Vdbe *vdbe, const struct AggInfo_func *agg_func)
 	if (agg_func->func->def->language == FUNC_LANGUAGE_SQL_BUILTIN) {
 		sqlVdbeAddOp1(vdbe, OP_AggFinal, agg_func->iMem);
 		sqlVdbeAppendP4(vdbe, agg_func->func, P4_FUNC);
+		return;
 	}
+	if (sql_func_finalize(agg_func->pExpr->u.zToken) == NULL)
+		return;
+	const char *name = tt_sprintf("%s_finalize", agg_func->pExpr->u.zToken);
+	const char *str = sqlDbStrDup(sql_get(), name);
+	sqlVdbeAddOp4(vdbe, OP_FunctionByName, 1, agg_func->iMem,
+		      agg_func->iMem, str, P4_DYNAMIC);
 }
 
 /*
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index f49522dc8..8d0321f56 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -4308,6 +4308,10 @@ sql_func_find(struct Expr *expr);
 int
 sql_emit_args_types(struct Vdbe *v, int reg, struct func *base, uint32_t argc);
 
+/** Return a function that is a finalizer for function with given name. */
+struct func *
+sql_func_finalize(const char *name);
+
 /**
  * Return the parameters of the function with the given name. If the function
  * with the given name does not exist, or the function is not a built-in SQL
diff --git a/test/sql-tap/gh-2579-custom-aggregate.test.lua b/test/sql-tap/gh-2579-custom-aggregate.test.lua
index 213e2e870..fc1633b95 100755
--- a/test/sql-tap/gh-2579-custom-aggregate.test.lua
+++ b/test/sql-tap/gh-2579-custom-aggregate.test.lua
@@ -3,7 +3,7 @@ local build_path = os.getenv("BUILDDIR")
 package.cpath = build_path..'/test/sql-tap/?.so;'..build_path..'/test/sql-tap/?.dylib;'..package.cpath
 
 local test = require("sqltester")
-test:plan(5)
+test:plan(9)
 
 test:execsql([[
         CREATE TABLE t (i INT PRIMARY KEY);
@@ -105,6 +105,117 @@ test:do_test(
         "least one argument"
     })
 
+-- Make sure finalizers of aggregate functions work as intended.
+box.schema.func.create("F1_finalize", {
+    language = "Lua",
+    body = [[
+        function(state)
+            if state == nil then
+                return 0
+            end
+            return state.sum / state.count
+        end
+    ]],
+    param_list = {"map"},
+    returns = "number",
+    exports = {'LUA', 'SQL'},
+})
+
+test:do_execsql_test(
+    "gh-2579-6",
+    [[
+        SELECT f1(i), typeof(f1(i)) from t;
+    ]], {
+        3, "integer"
+    })
+
+--
+-- Make sure that the finalizers of the aggregate function and the aggregate
+-- function can be of different types.
+--
+box.schema.func.create("F2_finalize", {
+    language = "Lua",
+    body = [[
+        function(state)
+            if state == nil then
+                return 0
+            end
+            return state[#state]
+        end
+    ]],
+    param_list = {"array"},
+    returns = "number",
+    exports = {'LUA', 'SQL'},
+})
+
+test:do_execsql_test(
+    "gh-2579-7",
+    [[
+        SELECT f2(i) from t;
+    ]], {
+        5
+    })
+
+-- Make sure that aggregate function cannot become finalizer.
+box.schema.func.drop("F2_finalize")
+box.schema.func.create("F2_finalize", {
+    language = "Lua",
+    body = [[
+        function(state)
+            if state == nil then
+                return 0
+            end
+            sum = 0
+            for k, v in pairs(state) do
+                sum = sum + v
+            end
+            return sum
+        end
+    ]],
+    aggregate = "group",
+    param_list = {"array"},
+    returns = "number",
+    exports = {'SQL'},
+})
+
+test:do_execsql_test(
+    "gh-2579-8",
+    [[
+        SELECT f2(i), typeof(f2(i)) from t;
+    ]], {
+        {1, 2, 3, 4, 5}, "array"
+    })
+
+--
+-- Make sure that function with number of arguments not equal to 1 cannot become
+-- finalizer.
+--
+box.schema.func.drop("F2_finalize")
+box.schema.func.create("F2_finalize", {
+    language = "Lua",
+    body = [[
+        function(state, x)
+            if state == nil then
+                return 0
+            end
+            return state[#state] * x
+        end
+    ]],
+    param_list = {"map", "integer"},
+    returns = "number",
+    exports = {'LUA', 'SQL'},
+})
+
+test:do_execsql_test(
+    "gh-2579-9",
+    [[
+        SELECT f2(i), typeof(f2(i)), "F2_finalize"(f2(i), 2) from t;
+    ]], {
+        {1, 2, 3, 4, 5}, "array", 10
+    })
+
+box.schema.func.drop('F2_finalize')
+box.schema.func.drop('F1_finalize')
 box.schema.func.drop('gh-2579-custom-aggregate.f3')
 box.schema.func.drop('F2')
 box.schema.func.drop('F1')
-- 
2.25.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH v2 3/4] sql: introduce custom aggregate functions
  2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 3/4] sql: introduce custom aggregate functions Mergen Imeev via Tarantool-patches
@ 2022-02-03 23:29   ` Vladislav Shpilevoy via Tarantool-patches
  0 siblings, 0 replies; 9+ messages in thread
From: Vladislav Shpilevoy via Tarantool-patches @ 2022-02-03 23:29 UTC (permalink / raw)
  To: imeevma; +Cc: tarantool-patches

Hi! Thanks for the patch!

See 3 comments below. After you done with them, send to a next
reviewer right away.

> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index b69bf7fd6..cda872194 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -2060,9 +2060,12 @@ sql_func_find(struct Expr *expr)
>  		return NULL;
>  	}
>  	int n = expr->x.pList != NULL ? expr->x.pList->nExpr : 0;
> -	if (func->def->param_count != n) {
> +	int argc = func->def->aggregate == FUNC_AGGREGATE_GROUP ?
> +		   func->def->param_count - 1 : func->def->param_count;

1. Why -1 for aggregates? I see it is somehow related to the
last argument being used for state, but it looks intricate. A
comment here would be enough.

> +	assert(argc >= 0);
> +	if (argc != n) {
>  		diag_set(ClientError, ER_FUNC_WRONG_ARG_COUNT, name,
> -			 tt_sprintf("%d", func->def->param_count), n);
> +			 tt_sprintf("%d", argc), n);
>  		return NULL;
>  	}
>  	return func;> diff --git a/test/sql-tap/gh-2579-custom-aggregate.test.lua b/test/sql-tap/gh-2579-custom-aggregate.test.lua
> new file mode 100755
> index 000000000..213e2e870
> --- /dev/null
> +++ b/test/sql-tap/gh-2579-custom-aggregate.test.lua
> @@ -0,0 +1,113 @@
> +#!/usr/bin/env tarantool
> +local build_path = os.getenv("BUILDDIR")
> +package.cpath = build_path..'/test/sql-tap/?.so;'..build_path..'/test/sql-tap/?.dylib;'..package.cpath

2. Lets split it at 80th line column.

> +
> +local test = require("sqltester")
> +test:plan(5)
> +
> +test:execsql([[
> +        CREATE TABLE t (i INT PRIMARY KEY);
> +        INSERT INTO t VALUES(1), (2), (3), (4), (5);
> +    ]])

3. You probably forgot to left shift the expression for one tab.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH v2 4/4] sql: introduce FINALIZE for custom aggregate
  2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 4/4] sql: introduce FINALIZE for custom aggregate Mergen Imeev via Tarantool-patches
@ 2022-02-03 23:30   ` Vladislav Shpilevoy via Tarantool-patches
  2022-02-10  9:21     ` Mergen Imeev via Tarantool-patches
  0 siblings, 1 reply; 9+ messages in thread
From: Vladislav Shpilevoy via Tarantool-patches @ 2022-02-03 23:30 UTC (permalink / raw)
  To: imeevma; +Cc: tarantool-patches

Thanks for the patch!

> Example:
> ```
> box.schema.func.create("F1_finalize", {
>     language = "Lua",
>     body = [[
>         function(state)
>             if state == nil then
>                 return 0
>             end
>             return state.sum / state.count
>         end
>     ]],
>     param_list = {"map"},
>     returns = "number",
>     exports = {'LUA', 'SQL'},
> })

I thought of another approach - let to call the function any name,
make {aggregate = "finalize", source = "F1"}. So it is like a foreign
key. But up to you whether you like it and whether you will try to bring
it up with the product managers.

> box.execute('SELECT f1(i) FROM t;')

The commit technically LGTM.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH v2 4/4] sql: introduce FINALIZE for custom aggregate
  2022-02-03 23:30   ` Vladislav Shpilevoy via Tarantool-patches
@ 2022-02-10  9:21     ` Mergen Imeev via Tarantool-patches
  0 siblings, 0 replies; 9+ messages in thread
From: Mergen Imeev via Tarantool-patches @ 2022-02-10  9:21 UTC (permalink / raw)
  To: Vladislav Shpilevoy; +Cc: tarantool-patches

hi! Thank you for the review! My answer below.

On Fri, Feb 04, 2022 at 12:30:24AM +0100, Vladislav Shpilevoy wrote:
> Thanks for the patch!
> 
> > Example:
> > ```
> > box.schema.func.create("F1_finalize", {
> >     language = "Lua",
> >     body = [[
> >         function(state)
> >             if state == nil then
> >                 return 0
> >             end
> >             return state.sum / state.count
> >         end
> >     ]],
> >     param_list = {"map"},
> >     returns = "number",
> >     exports = {'LUA', 'SQL'},
> > })
> 
> I thought of another approach - let to call the function any name,
> make {aggregate = "finalize", source = "F1"}. So it is like a foreign
> key. But up to you whether you like it and whether you will try to bring
> it up with the product managers.
> 
I tried two more approaches, but in the end I decided to leave it as it is for
now. Both approaches get too complicated when I have to deal with transactions.

> > box.execute('SELECT f1(i) FROM t;')
> 
> The commit technically LGTM.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH v2 1/4] sql: fix COUNT() optimization conditions
  2022-02-10  9:14 [Tarantool-patches] [PATCH v2 0/4] Introduce custom aggregate functions Mergen Imeev via Tarantool-patches
@ 2022-02-10  9:14 ` Mergen Imeev via Tarantool-patches
  0 siblings, 0 replies; 9+ messages in thread
From: Mergen Imeev via Tarantool-patches @ 2022-02-10  9:14 UTC (permalink / raw)
  To: kyukhin; +Cc: tarantool-patches

This patch fixes the conditions under which COUNT() is optimized. At
some point, the conditions were broken, but since there was no other
aggregate function requiring zero arguments, this problem did not change
the behavior.

Needed for #2579
---
 src/box/sql/select.c | 2 +-
 src/box/sql/sqlInt.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index 55aaff87f..b532cac4e 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -4650,7 +4650,7 @@ is_simple_count(struct Select *select, struct AggInfo *agg_info)
 		return NULL;
 	assert(agg_info->aFunc->func->def->language ==
 	       FUNC_LANGUAGE_SQL_BUILTIN);
-	if (sql_func_flag_is_set(agg_info->aFunc->func, SQL_FUNC_COUNT) ||
+	if (strcmp(agg_info->aFunc->func->def->name, "COUNT") != 0 ||
 	    (agg_info->aFunc->pExpr->x.pList != NULL &&
 	     agg_info->aFunc->pExpr->x.pList->nExpr > 0))
 		return NULL;
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 0db16b293..f49522dc8 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -1117,8 +1117,6 @@ struct type_def {
 					 */
 #define SQL_FUNC_LENGTH   0x0040	/* Built-in length() function */
 #define SQL_FUNC_TYPEOF   0x0080	/* Built-in typeof() function */
-/** Built-in count() function. */
-#define SQL_FUNC_COUNT    0x0100
 #define SQL_FUNC_COALESCE 0x0200	/* Built-in coalesce() or ifnull() */
 #define SQL_FUNC_UNLIKELY 0x0400	/* Built-in unlikely() function */
 /** Built-in min() or least() function. */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-02-10  9:21 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-01 13:37 [Tarantool-patches] [PATCH v2 0/4] Introduce custom aggregate function Mergen Imeev via Tarantool-patches
2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 1/4] sql: fix COUNT() optimization conditions Mergen Imeev via Tarantool-patches
2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 2/4] sql: drop unnecessary P2 register for OP_AggFinal Mergen Imeev via Tarantool-patches
2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 3/4] sql: introduce custom aggregate functions Mergen Imeev via Tarantool-patches
2022-02-03 23:29   ` Vladislav Shpilevoy via Tarantool-patches
2022-02-01 13:37 ` [Tarantool-patches] [PATCH v2 4/4] sql: introduce FINALIZE for custom aggregate Mergen Imeev via Tarantool-patches
2022-02-03 23:30   ` Vladislav Shpilevoy via Tarantool-patches
2022-02-10  9:21     ` Mergen Imeev via Tarantool-patches
2022-02-10  9:14 [Tarantool-patches] [PATCH v2 0/4] Introduce custom aggregate functions Mergen Imeev via Tarantool-patches
2022-02-10  9:14 ` [Tarantool-patches] [PATCH v2 1/4] sql: fix COUNT() optimization conditions Mergen Imeev via Tarantool-patches

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox