[Tarantool-patches] [PATCH v1 1/1] sql: introduce user-defined aggregate functions

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Tue Jan 18 03:10:04 MSK 2022


Hi! Thanks for the patch!

What happened to the idea we had about SQL iterators? Then people would
be able to iterate with yields and to make any kind of aggregation without
having to create functions for that.

See 8 comments below.

> diff --git a/src/box/alter.cc b/src/box/alter.cc
> index 65c1cb952..f1dfcd807 100644
> --- a/src/box/alter.cc
> +++ b/src/box/alter.cc
> @@ -3276,6 +3276,85 @@ func_def_get_ids_from_tuple(struct tuple *tuple, uint32_t *fid, uint32_t *uid)
>  	return tuple_field_u32(tuple, BOX_FUNC_FIELD_UID, uid);
>  }
>  
> +static int
> +func_def_check_body(struct tuple *tuple)
> +{
> +	assert(tuple_field_count(tuple) > BOX_FUNC_FIELD_BODY);
> +	const char *field = tuple_field(tuple, BOX_FUNC_FIELD_BODY);
> +	assert(field != NULL);
> +	enum mp_type type = mp_typeof(*field);
> +	if (type == MP_STR)
> +		return 0;
> +	if (type != MP_MAP) {
> +		diag_set(ClientError, ER_FIELD_TYPE, "map or array",

1. 'Map or string'.

> +			 mp_type_strs[type]);
> +		return -1;
> +	}
> +	const char *agg = tuple_field_cstr(tuple, BOX_FUNC_FIELD_AGGREGATE);
> +	if (agg == NULL)
> +		return -1;
> +	if (STR2ENUM(func_aggregate, agg) != FUNC_AGGREGATE_GROUP) {
> +		const char *name = tuple_field_cstr(tuple, BOX_FUNC_FIELD_NAME);
> +		diag_set(ClientError, ER_CREATE_FUNCTION, name,
> +			 "only aggregate functions can have map as body");
> +		return -1;
> +	}
> +
> +	bool has_step = false;
> +	bool has_finalize = false;
> +	if (mp_decode_map(&field) != 2)
> +		goto error;
> +	for (int i = 0; i < 2; ++i) {
> +		const char *value;
> +		uint32_t size;
> +		if (mp_typeof(*field) != MP_STR)
> +			goto error;
> +		value = mp_decode_str(&field, &size);
> +		if (size == strlen("step")) {
> +			if (strncmp(value, "step", size) != 0)
> +				goto error;

2. I would rather propose to introduce a function to compare
0-terminated string with non-terminated one. It is needed quite
often apparently. For instance,

	strlcmp(const char *l, const char *r, size_t r_len);

> +			has_step = true;
> +		} else if (size == strlen("finalize")) {
> +			if (strncmp(value, "finalize", size) != 0)
> +				goto error;
> +			has_finalize = true;
> +		}
> +		if (mp_typeof(*field) != MP_STR)
> +			goto error;
> +		mp_next(&field);
> +	}
> +	if (has_step && has_finalize)
> +		return 0;
> +error:
> +	const char *name = tuple_field_cstr(tuple, BOX_FUNC_FIELD_NAME);
> +	diag_set(ClientError, ER_CREATE_FUNCTION, name,
> +		 "body of aggregate function should be map that contains "
> +		 "exactly two string fields: 'step' and 'finalize'");
> +	return -1;
> +}
> +
> +static const char *
> +func_def_get_agg_body(struct tuple *tuple, uint32_t *body_len, bool is_step)

3. Please, lets split that function in 2 maybe. Its invocation with
hardcoded 'true'/'false' in the end gives hard time understanding what
these values mean.

> +{
> +	const char *field = tuple_field(tuple, BOX_FUNC_FIELD_BODY);
> +	assert(field != NULL);
> +	if (mp_typeof(*field) == MP_STR) {
> +		if (is_step)
> +			return mp_decode_str(&field, body_len);
> +		*body_len = 0;
> +		return field;

4. You return len as 0, but the returned value != NULL? Why?

> +	}
> @@ -3568,6 +3697,16 @@ on_replace_dd_func(struct trigger * /* trigger */, void *event)
>  		struct func *func = func_new(def);
>  		if (func == NULL)
>  			return -1;
> +		if (def->aggregate == FUNC_AGGREGATE_GROUP) {
> +			struct func_def *fin_def =
> +				func_fin_def_new(new_tuple, def);
> +			if (fin_def == NULL)
> +				return -1;
> +			struct func *func_fin = func_new(fin_def);
> +			if (func_fin == NULL)
> +				return -1;
> +			func_fin_cache_insert(func_fin);

5. What happens if WAL write fails and on_create_func_rollback() is called?

> diff --git a/src/box/schema.h b/src/box/schema.h
> index d3bbdd590..2f020ee96 100644
> --- a/src/box/schema.h
> +++ b/src/box/schema.h
> @@ -102,6 +102,15 @@ func_by_id(uint32_t fid);
>  struct func *
>  func_by_name(const char *name, uint32_t name_len);
>  
> +void
> +func_fin_cache_insert(struct func *func);
> +
> +void
> +func_fin_cache_delete(uint32_t fid);
> +
> +struct func *
> +func_fin_by_id(uint32_t fid);

6. Lets leave some comments about what is 'func fin'.

> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
> index 2e33a31d2..c77cd1c9f 100644
> --- a/src/box/sql/select.c
> +++ b/src/box/sql/select.c
> @@ -5644,10 +5650,17 @@ updateAccumulator(Parse * pParse, AggInfo * pAggInfo)
>  			pParse->is_aborted = true;
>  			return;
>  		}
> -		sqlVdbeAddOp3(v, OP_AggStep, nArg, regAgg, pF->iMem);
> -		sqlVdbeAppendP4(v, ctx, P4_FUNCCTX);
> -		sql_expr_type_cache_change(pParse, regAgg, nArg);
> -		sqlReleaseTempRange(pParse, regAgg, nArg);
> +		if (pF->func->def->language == FUNC_LANGUAGE_SQL_BUILTIN) {
> +			sqlVdbeAddOp3(v, OP_AggStep, nArg, regAgg, pF->iMem);
> +			sqlVdbeAppendP4(v, ctx, P4_FUNCCTX);
> +		} else {
> +			sqlVdbeAddOp3(v, OP_UserAggStep, nArg, regAgg,
> +				      pF->iMem);
> +			const char *name = pF->func->def->name;
> +			int len = pF->func->def->name_len;
> +			char *str = sqlDbStrNDup(pParse->db, name, len);
> +			sqlVdbeAppendP4(v, str, P4_DYNAMIC);

7. Was it possible to unify how builtin and user-defined functions
are called? In the same opcode without any special 'if's.

> diff --git a/test/sql-tap/gh-2579-custom-aggregate.test.lua b/test/sql-tap/gh-2579-custom-aggregate.test.lua
> new file mode 100755
> index 000000000..d5b845761
> --- /dev/null
> +++ b/test/sql-tap/gh-2579-custom-aggregate.test.lua
8. It might also be good to have a test with step/finalize functions
throwing a Lua error from their body.


More information about the Tarantool-patches mailing list