[tarantool-patches] Re: [PATCH v2 7/8] sql: get rid of FuncDef function hash

Tue Aug 13 23:40:11 MSK 2019

> On 8 Aug 2019, at 17:50, Kirill Shcherbatov <kshcherbatov at tarantool.org> wrote:
> 
> Now it is possible to move all SQL builtin functions to
> Tarantool's function hash. An existent FuncDef function
> representation was replaced with func_sql_builtin class.
> It has a sql-specific method :call and :finalize typica while

Nit: typic? Please, consider using spell-checker.

> port API call is not supported and protected with stub.
> 
> This patch removes FuncDef hash and sql_function_create endpoint,
> but doesn't introduce something instead. Therefore few affected
> tests are disabled. A required functionality would be fixed in
> the next patch.
> 
> Following tests using sql_create_function are broken now.
> They would be fixed in the following commit:

Nit: are going to be fixed in the next commit.

> sql-tap/alias.test.lua sql-tap/check.test.lua
> sql-tap/func5.test.lua sql-tap/lua_sql.test.lua
> sql-tap/subquery.test.lua sql-tap/trigger9.test.lua
> sql/errinj.result sql/errinj.test.lua
> sql/func-recreate.test.lua
> 
> Part of #2200, #4113, #2233
> ---
> 
> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
> index 114ac0e4b..6ef1a1d4a 100644
> --- a/src/box/sql/sqlInt.h
> +++ b/src/box/sql/sqlInt.h
> @@ -1244,78 +1211,12 @@ struct type_def {
> };
> 
> @@ -4516,9 +4337,62 @@ Expr *sqlExprForVectorField(Parse *, Expr *, int);
>  */
> extern int sqlSubProgramsRemaining;
> 
> -/** Register built-in functions to work with ANALYZE data. */
> -void
> -sql_register_analyze_builtins(void);
> +struct func_sql_builtin {
> +	/** Function object base class. */
> +	struct func base;
> +	/**
> +	 * A bitmask representing all supported function
> +	 * overloads. The function supports argc == n iff this
> +	 * bitmask has bit n set 1. In particular case, a bitmask
> +	 * (~0) means this function works with any possible
> +	 * argument.
> +	 *
> +	 * The count of arguments for function is limited with
> +	 * (CHAR_BITS*sizeof(uint64_t) - 1). When the highest bit
> +	 * of the mask is set, this means that greater values
> +	 * are supported. E.g. greatest function works correctly
> +	 * with any number of input arguments.
> +	 */
> +	uint64_t signature_mask;
> +	/** A bitmask of SQL flags. */
> +	uint16_t flags;
> +	/** User data to pass in call method. */
> +	void *user_data;
> +	/** A call method for a function. */

Add explanation why we don’t use basic (i.e. call
method of base class) “call” method. Furthermore,
built-ins are called bypassing func_access_check()
function. This should be either documented or fixed.

> +	void (*call)(sql_context *ctx, int argc, sql_value **argv);
> +	/** Finalize method (only for aggregate function). */
> +	void (*finalize)(sql_context *ctx);

We hold aggregate attribute in func_def; I guess
lua/C aggregate are not going to be implemented
soon (or someday at all); so, considering these
points, why not to move this method to the base vtab?

> +};
> +
> +/**
> + * A SQL method to find a function in a hash by it's name and

Nit: it’s (it is) -> its.

> + * count of arguments. Only functions that have 'SQL' engine
> + * export field set true and have exactly the same signature
> + * are returned.
> + *
> + * Returns not NULL function pointer when a valid and exported
> + * to SQL engine function was found and NULL otherwise.

Nit: was found -> is found.

> + */
> +struct func *
> +sql_func_by_signature(const char *name, uint32_t argc);
> +
> +/**
> + * Test whether SQL-specific flag is set for given function.
> + * Currently only SQL Builtin Functions have such hint flags,
> + * so function returns false for other functions. Such approach
> + * decreases code complexity and allows do not distinguish
> + * functions by implementation details where it is unnecessary.
> + *
> + * Returns true when given flag is set for a given function and
> + * false otherwise.
> + */
> +static inline bool
> +sql_func_flag_test(struct func *func, uint16_t flag)

At least I would rename it to sql_func_test_flag() or
sql_builtin_flag_is_set() or smth like that.

> +{
> +	if (func->def->language != FUNC_LANGUAGE_SQL_BUILTIN)
> +		return false;
> +	return (((struct func_sql_builtin *)func)->flags & flag) != 0;
> +}
> 
> diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
> index f77c019fb..f085477c1 100644
> --- a/src/box/sql/vdbeInt.h
> +++ b/src/box/sql/vdbeInt.h
> 
> @@ -516,7 +523,17 @@ int sqlVdbeMemNumerify(Mem *);
> int sqlVdbeMemCast(Mem *, enum field_type type);
> int sqlVdbeMemFromBtree(BtCursor *, u32, u32, Mem *);
> void sqlVdbeMemRelease(Mem * p);
> -int sqlVdbeMemFinalize(Mem *, FuncDef *);
> +
> +/**
> + * Memory cell pMem contains the context of an aggregate function.

Nit: pMem -> mem.

> + * This routine calls the finalize method for that function. The
> + * result of the aggregate is stored back into pMem.
> + *
> + * Returns -1 if the finalizer reports an error. 0 otherwise.
> + */
> +int
> +sql_vdbemem_finalize(struct Mem *mem, struct func *func);
> +
> const char *sqlOpcodeName(int);
> int sqlVdbeMemGrow(Mem * pMem, int n, int preserve);
> int sqlVdbeMemClearAndResize(Mem * pMem, int n);
> 
> diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c
> index bd52d12df..b68d86dfe 100644
> --- a/src/box/sql/analyze.c
> +++ b/src/box/sql/analyze.c
> @@ -263,7 +263,7 @@ stat4Destructor(void *pOld)
>  * return value is BLOB, but it is really just a pointer to the Stat4Accum
>  * object.
>  */
> -static void
> +MAYBE_UNUSED static void

Is this change really related to the patch? Same for
all other added MAYBE_UNUSED modifiers.

> statInit(sql_context * context, int argc, sql_value ** argv)
> {
> 	Stat4Accum *p;
> @@ -535,7 +535,7 @@ samplePushPrevious(Stat4Accum * p, int iChng)
>  *
>  * The R parameter is only used for STAT4
>  */
> -static void
> +MAYBE_UNUSED static void
> statPush(sql_context * context, int argc, sql_value ** argv)
> {
> 	int i;
> @@ -608,7 +608,7 @@ statPush(sql_context * context, int argc, sql_value ** argv)
>  * The content to returned is determined by the parameter J
>  * which is one of the STAT_GET_xxxx values defined above.
>  */
> -static void
> +MAYBE_UNUSED static void
> statGet(sql_context * context, int argc, sql_value ** argv)
> {
> 	Stat4Accum *p = (Stat4Accum *) sql_value_blob(argv[0]);
> @@ -715,11 +715,11 @@ callStatGet(Vdbe * v, int regStat4, int iParam, int regOut)
> {
> 	assert(regOut != regStat4 && regOut != regStat4 + 1);
> 	sqlVdbeAddOp2(v, OP_Integer, iParam, regStat4 + 1);
> -	struct FuncDef *func =
> -		sqlFindFunction(sql_get(), "_sql_stat_get", 2, 0);
> +	struct func *func =
> +		sql_func_by_signature("_sql_stat_get", 2);
> 	assert(func != NULL);

Instead of asserting func existence let’s raise an error: now
user can remove built-in function from hash - there’s no
protection to avoid such cases. Note that such opportunity
(removing built-ins from cache) has been introduced by moving
built-ins to the global func cache. In this case, assertion will fail
in debug mode and lead to unpredictable consequences in
release mode. Another option is to introduce mentioned protection,
i.e. disallow user to delete functions which are declared with
FUNC_LANGUAGE_SQL_BUILTIN flag.

> diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
> index 1f9d91705..64b3bc835 100644
> --- a/src/box/sql/expr.c
> +++ b/src/box/sql/expr.c
> 
> @@ -3991,8 +3988,9 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
> 			nFarg = pFarg ? pFarg->nExpr : 0;
> 			assert(!ExprHasProperty(pExpr, EP_IntValue));
> 			zId = pExpr->u.zToken;
> -			pDef = sqlFindFunction(db, zId, nFarg, 0);
> -			if (pDef == 0 || pDef->xFinalize != 0) {
> +			struct func *func = sql_func_by_signature(zId, nFarg);
> +			if (func == NULL ||
> +			    func->def->aggregate == FUNC_AGGREGATE_GROUP) {

Why do we need check on aggregation type?

> @@ -4128,12 +4118,15 @@ sqlExprCodeTarget(Parse * pParse, Expr * pExpr, int target)
> 			} else {
> 				r1 = 0;
> 			}
> -			if (pDef->funcFlags & SQL_FUNC_NEEDCOLL) {
> +			if (sql_func_flag_test(func, SQL_FUNC_NEEDCOLL)) {
> 				sqlVdbeAddOp4(v, OP_CollSeq, 0, 0, 0,
> 						  (char *)coll, P4_COLLSEQ);
> 			}
> -			sqlVdbeAddOp4(v, OP_BuiltinFunction0, constMask, r1,
> -					  target, (char *)pDef, P4_FUNCDEF);
> +			assert(func->def->language ==
> +			       FUNC_LANGUAGE_SQL_BUILTIN);
> +			int op = OP_BuiltinFunction0;
> +			sqlVdbeAddOp4(v, op, constMask, r1, target,
> +				      (char *)func, P4_FUNC);
> 			sqlVdbeChangeP5(v, (u8) nFarg);
> 			if (nFarg && constMask == 0) {
> 				sqlReleaseTempRange(pParse, r1, nFarg);
> 
> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
> index 8e07ce892..f07c52b95 100644
> --- a/src/box/sql/func.c
> +++ b/src/box/sql/func.c
> @@ -1825,114 +1825,231 @@ groupConcatFinalize(sql_context * context)
> }
> +
> +#define REG_FUNC(name, signature_mask, returns, flags,     \
> +		 call, user_data, is_deterministic)        \
> +	{name, signature_mask, returns, flags, call, NULL, \
> +	 user_data, FUNC_AGGREGATE_NONE, is_deterministic}
> +
> +#define AGG_FUNC(name, signature_mask, returns, flags,     \
> +		 call, finalize, user_data)                \
> +	{name, signature_mask, returns, flags, call,       \
> +	 finalize, user_data, FUNC_AGGREGATE_GROUP, false}
> +static struct func_vtab func_sql_builtin_vtab;
> +
> +struct func *
> +func_sql_builtin_new(struct func_def *def)
> {
> +	assert(def->language == FUNC_LANGUAGE_SQL_BUILTIN);
> +	if (def->body != NULL || def->is_sandboxed) {
> +		diag_set(ClientError, ER_CREATE_FUNCTION, def->name,
> +			 "body and is_sandboxed options are not compatible "
> +			 "with SQL language”);

Why not move this check to func_def_new_from_tuple() ?

> +		return NULL;
> +	}
> +	func->flags = sql_builtins[idx].flags;
> +	func->user_data = sql_builtins[idx].user_data;
> +	func->call = sql_builtins[idx].call;
> +	func->finalize = sql_builtins[idx].finalize;
> +	func->signature_mask = sql_builtins[idx].signature_mask;
> +	func->base.vtab = &func_sql_builtin_vtab;
> +	func->base.def = def;
> +

Why not memcpy?

> +	def->is_deterministic = sql_builtins[idx].is_deterministic;
> +	def->returns = sql_builtins[idx].returns;
> +	def->aggregate = sql_builtins[idx].aggregate;
> +	def->exports.sql = true;
> +	return &func->base;
> +}
> +
> +static struct func_vtab func_sql_builtin_vtab = {
> +	.call = sql_builtin_call_stub,
> +	.destroy = func_sql_builtin_destroy,
> +};
> 
> diff --git a/src/box/sql/resolve.c b/src/box/sql/resolve.c
> index 0b90edd06..0c54afef6 100644
> --- a/src/box/sql/resolve.c
> +++ b/src/box/sql/resolve.c
> @@ -38,6 +38,9 @@
> #include "sqlInt.h"
> #include <stdlib.h>
> #include <string.h>
> +#include "box/func.h"
> +#include "box/func_def.h"
> +#include "box/schema.h"
> 
> /*
>  * Walk the expression tree pExpr and increase the aggregate function
> @@ -596,27 +599,30 @@ resolveExprStep(Walker * pWalker, Expr * pExpr)
> 			int is_agg = 0;	/* True if is an aggregate function */
> 			int nId;	/* Number of characters in function name */
> 			const char *zId;	/* The function name. */
> -			FuncDef *pDef;	/* Information about the function */
> 
> 			assert(!ExprHasProperty(pExpr, EP_xIsSelect));
> 			zId = pExpr->u.zToken;
> 			nId = sqlStrlen30(zId);
> -			pDef = sqlFindFunction(pParse->db, zId, n, 0);
> -			if (pDef == 0) {
> -				pDef =
> -				    sqlFindFunction(pParse->db, zId, -2,0);
> -				if (pDef == 0) {
> +			struct func *func = sql_func_by_signature(zId, n);
> +			if (func == NULL) {
> +				func = func_by_name(zId, strlen(zId));
> +				if (func == NULL || !func->def->exports.sql) {
> +					func = NULL;
> 					no_such_func = 1;
> 				} else {
> 					wrong_num_args = 1;
> 				}
> 			} else {
> -				is_agg = pDef->xFinalize != 0;
> -				pExpr->type = pDef->ret_type;
> +				is_agg = func->def->language ==
> +					 FUNC_LANGUAGE_SQL_BUILTIN &&
> +					 func->def->aggregate ==
> +					 FUNC_AGGREGATE_GROUP;;

Nit: double semicolon at the end of string.

Can aggregate function be non-builtin?

> +				pExpr->type = func->def->returns;
> 				const char *err =
> 					"second argument to likelihood() must "\
> 					"be a constant between 0.0 and 1.0";
> -				if (pDef->funcFlags & SQL_FUNC_UNLIKELY) {
> +				if (sql_func_flag_test(func,
> +						       SQL_FUNC_UNLIKELY)) {
> 					ExprSetProperty(pExpr,
> 							EP_Unlikely | EP_Skip);
> 					if (n == 2) {
> @@ -643,21 +649,19 @@ resolveExprStep(Walker * pWalker, Expr * pExpr)
> 						 */
> 						/* TUNING: unlikely() probability is 0.0625.  likely() is 0.9375 */
> 						pExpr->iTable =
> -						    pDef->zName[0] ==
> -						    'u' ? 8388608 : 125829120;
> +						    func->def->name[0] == 'u' ?
> +						    8388608 : 125829120;
> 					}
> 				}
> -				if (pDef->
> -				    funcFlags & (SQL_FUNC_CONSTANT |
> -						 SQL_FUNC_SLOCHNG)) {
> +				if (func->def->is_deterministic ||
> +				    sql_func_flag_test(func, SQL_FUNC_SLOCHNG)) {

SQL_FUNC_SLOCHNG seems to be unused.
So, you can simply remove it.

> @@ -700,18 +704,14 @@ resolveExprStep(Walker * pWalker, Expr * pExpr)
> 					pExpr->op2++;
> 					pNC2 = pNC2->pNext;
> 				}
> -				assert(pDef != 0);
> +				assert(func != NULL);
> 				if (pNC2) {
> +					pNC2->ncFlags |= NC_HasAgg;
> 					assert(SQL_FUNC_MINMAX ==
> 					       NC_MinMaxAgg);
> -					testcase((pDef->
> -						  funcFlags &
> -						  SQL_FUNC_MINMAX) != 0);
> -					pNC2->ncFlags |=
> -					    NC_HasAgg | (pDef->
> -							 funcFlags &
> -							 SQL_FUNC_MINMAX);
> -
> +					if (sql_func_flag_test(func,
> +							       SQL_FUNC_MINMAX))
> +						pNC2->ncFlags |= NC_MinMaxAgg;
> 				}
> 				pNC->ncFlags |= NC_AllowAgg;
> 			}
> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index 28552f64a..a66becc89 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> @@ -41,6 +41,8 @@
>  */
> #include "box/box.h"
> #include "box/error.h"
> +#include "box/func.h"
> +#include "box/func_def.h"
> #include "box/fk_constraint.h"
> #include "box/txn.h"
> #include "box/tuple.h"
> 
> @@ -1747,7 +1749,9 @@ case OP_BuiltinFunction: {
> 	}
> #endif
> 	pCtx->is_aborted = false;
> -	(*pCtx->pFunc->xSFunc)(pCtx, pCtx->argc, pCtx->argv);/* IMP: R-24505-23230 */
> +	assert(pCtx->func->def->language == FUNC_LANGUAGE_SQL_BUILTIN);
> +	(*((struct func_sql_builtin *)pCtx->func)->call)(pCtx, pCtx->argc,
> +							 pCtx->argv);
> 
> 	/* If the function returned an error, throw an exception */
> 	if (pCtx->is_aborted)
> 
> @@ -5055,7 +5059,9 @@ case OP_AggStep: {
> 	pCtx->pOut = &t;
> 	pCtx->is_aborted = false;
> 	pCtx->skipFlag = 0;
> -	(pCtx->pFunc->xSFunc)(pCtx,pCtx->argc,pCtx->argv); /* IMP: R-24505-23230 */
> +	assert(pCtx->func->def->language == FUNC_LANGUAGE_SQL_BUILTIN);
> +	(*((struct func_sql_builtin *)pCtx->func)->call)(pCtx, pCtx->argc,
> +							 pCtx->argv);

Nit: please, split it into two steps: type cast and function call.

struct func_sql_builtin *func = …;
func->call();

> 	if (pCtx->is_aborted) {
> 		sqlVdbeMemRelease(&t);
> 		goto abort_due_to_error;
> 
> diff --git a/src/box/sql/vdbeapi.c b/src/box/sql/vdbeapi.c
> index 1aee3cf85..4ff8db621 100644
> --- a/src/box/sql/vdbeapi.c
> +++ b/src/box/sql/vdbeapi.c
> @@ -484,8 +484,9 @@ sql_step(sql_stmt * pStmt)
> void *
> sql_user_data(sql_context * p)
> {
> -	assert(p && p->pFunc);
> -	return p->pFunc->pUserData;
> +	assert(p != NULL && p->func != NULL);
> +	assert(p->func->def->language == FUNC_LANGUAGE_SQL_BUILTIN);
> +	return ((struct func_sql_builtin *)p->func)->user_data;

user_data is used only for min/max functions. Let’s consider removing it.

> @@ -563,7 +564,9 @@ createAggContext(sql_context * p, int nByte)
> void *
> sql_aggregate_context(sql_context * p, int nByte)
> {
> -	assert(p && p->pFunc && p->pFunc->xFinalize);
> +	assert(p != NULL && p->func != NULL &&
> +	       p->func->def->language == FUNC_LANGUAGE_SQL_BUILTIN &&
> +	       p->func->def->aggregate == FUNC_AGGREGATE_GROUP);

Please, split this assert into three ones. IMHO it would
improve code readability.

> diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
> index b8c31ecec..8b2e816f2 100644
> --- a/src/box/sql/vdbemem.c
> +++ b/src/box/sql/vdbemem.c
> @@ -312,33 +312,28 @@ sqlVdbeMemStringify(Mem * pMem)
> 	return 0;
> }
> 
> -/*
> - * Memory cell pMem contains the context of an aggregate function.
> - * This routine calls the finalize method for that function.  The
> - * result of the aggregate is stored back into pMem.
> - *
> - * Return -1 if the finalizer reports an error. 0 otherwise.
> - */
> int
> -sqlVdbeMemFinalize(Mem * pMem, FuncDef * pFunc)
> +sql_vdbemem_finalize(struct Mem *mem, struct func *func)
> {
> -	if (ALWAYS(pFunc && pFunc->xFinalize)) {
> +	if (ALWAYS(func != NULL &&

Please, move assert inside if condition to a separate stmt(s):

diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
index 8b2e816f2..958d67916 100644
--- a/src/box/sql/vdbemem.c
+++ b/src/box/sql/vdbemem.c
@@ -315,12 +315,12 @@ sqlVdbeMemStringify(Mem * pMem)
 int
 sql_vdbemem_finalize(struct Mem *mem, struct func *func)
 {
-       if (ALWAYS(func != NULL &&
-                  func->def->language == FUNC_LANGUAGE_SQL_BUILTIN &&
-                  func->def->aggregate == FUNC_AGGREGATE_GROUP)) {
-               sql_context ctx;
-               Mem t;
-               assert((mem->flags & MEM_Null) != 0 || func == mem->u.func);
+       assert(func != NULL);
+       assert(func->def->language == FUNC_LANGUAGE_SQL_BUILTIN);
+       assert(func->def->language == FUNC_AGGREGATE_GROUP);
+       sql_context ctx;
+       Mem t;
+       assert((mem->flags & MEM_Null) != 0 || func == mem->u.func);

...


> +		   func->def->language == FUNC_LANGUAGE_SQL_BUILTIN &&
> +		   func->def->aggregate == FUNC_AGGREGATE_GROUP)) {
> 		sql_context ctx;
> 		Mem t;
> -		assert((pMem->flags & MEM_Null) != 0 || pFunc == pMem->u.pDef);
> +		assert((mem->flags & MEM_Null) != 0 || func == mem->u.func);
> 		memset(&ctx, 0, sizeof(ctx));
> 		memset(&t, 0, sizeof(t));
> 		t.flags = MEM_Null;
> -		t.db = pMem->db;
> +		t.db = mem->db;
> 		t.field_type = field_type_MAX;
> 		ctx.pOut = &t;
> -		ctx.pMem = pMem;
> -		ctx.pFunc = pFunc;
> -		pFunc->xFinalize(&ctx);	/* IMP: R-24505-23230 */
> -		assert((pMem->flags & MEM_Dyn) == 0);
> -		if (pMem->szMalloc > 0)
> -			sqlDbFree(pMem->db, pMem->zMalloc);
> -		memcpy(pMem, &t, sizeof(t));
> +		ctx.pMem = mem;
> +		ctx.func = func;
> +		((struct func_sql_builtin *)func)->finalize(&ctx);
> +		assert((mem->flags & MEM_Dyn) == 0);
> +		if (mem->szMalloc > 0)
> +			sqlDbFree(mem->db, mem->zMalloc);
> +		memcpy(mem, &t, sizeof(t));
> 		if (ctx.is_aborted)
> 			return -1;
> 	}