Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: imeevma@tarantool.org
Cc: tarantool-patches@dev.tarantool.org
Subject: Re: [Tarantool-patches] [PATCH v1 1/1] sql: introduce user-defined aggregate functions
Date: Tue, 18 Jan 2022 01:10:04 +0100	[thread overview]
Message-ID: <0875aeb4-255d-ce3a-7244-69193cf2f334@tarantool.org> (raw)
In-Reply-To: <04154369ec1ff8a1eaf7c9ea1ed37e1fcd1a7120.1642167504.git.imeevma@gmail.com>

Hi! Thanks for the patch!

What happened to the idea we had about SQL iterators? Then people would
be able to iterate with yields and to make any kind of aggregation without
having to create functions for that.

See 8 comments below.

> diff --git a/src/box/alter.cc b/src/box/alter.cc
> index 65c1cb952..f1dfcd807 100644
> --- a/src/box/alter.cc
> +++ b/src/box/alter.cc
> @@ -3276,6 +3276,85 @@ func_def_get_ids_from_tuple(struct tuple *tuple, uint32_t *fid, uint32_t *uid)
>  	return tuple_field_u32(tuple, BOX_FUNC_FIELD_UID, uid);
>  }
>  
> +static int
> +func_def_check_body(struct tuple *tuple)
> +{
> +	assert(tuple_field_count(tuple) > BOX_FUNC_FIELD_BODY);
> +	const char *field = tuple_field(tuple, BOX_FUNC_FIELD_BODY);
> +	assert(field != NULL);
> +	enum mp_type type = mp_typeof(*field);
> +	if (type == MP_STR)
> +		return 0;
> +	if (type != MP_MAP) {
> +		diag_set(ClientError, ER_FIELD_TYPE, "map or array",

1. 'Map or string'.

> +			 mp_type_strs[type]);
> +		return -1;
> +	}
> +	const char *agg = tuple_field_cstr(tuple, BOX_FUNC_FIELD_AGGREGATE);
> +	if (agg == NULL)
> +		return -1;
> +	if (STR2ENUM(func_aggregate, agg) != FUNC_AGGREGATE_GROUP) {
> +		const char *name = tuple_field_cstr(tuple, BOX_FUNC_FIELD_NAME);
> +		diag_set(ClientError, ER_CREATE_FUNCTION, name,
> +			 "only aggregate functions can have map as body");
> +		return -1;
> +	}
> +
> +	bool has_step = false;
> +	bool has_finalize = false;
> +	if (mp_decode_map(&field) != 2)
> +		goto error;
> +	for (int i = 0; i < 2; ++i) {
> +		const char *value;
> +		uint32_t size;
> +		if (mp_typeof(*field) != MP_STR)
> +			goto error;
> +		value = mp_decode_str(&field, &size);
> +		if (size == strlen("step")) {
> +			if (strncmp(value, "step", size) != 0)
> +				goto error;

2. I would rather propose to introduce a function to compare
0-terminated string with non-terminated one. It is needed quite
often apparently. For instance,

	strlcmp(const char *l, const char *r, size_t r_len);

> +			has_step = true;
> +		} else if (size == strlen("finalize")) {
> +			if (strncmp(value, "finalize", size) != 0)
> +				goto error;
> +			has_finalize = true;
> +		}
> +		if (mp_typeof(*field) != MP_STR)
> +			goto error;
> +		mp_next(&field);
> +	}
> +	if (has_step && has_finalize)
> +		return 0;
> +error:
> +	const char *name = tuple_field_cstr(tuple, BOX_FUNC_FIELD_NAME);
> +	diag_set(ClientError, ER_CREATE_FUNCTION, name,
> +		 "body of aggregate function should be map that contains "
> +		 "exactly two string fields: 'step' and 'finalize'");
> +	return -1;
> +}
> +
> +static const char *
> +func_def_get_agg_body(struct tuple *tuple, uint32_t *body_len, bool is_step)

3. Please, lets split that function in 2 maybe. Its invocation with
hardcoded 'true'/'false' in the end gives hard time understanding what
these values mean.

> +{
> +	const char *field = tuple_field(tuple, BOX_FUNC_FIELD_BODY);
> +	assert(field != NULL);
> +	if (mp_typeof(*field) == MP_STR) {
> +		if (is_step)
> +			return mp_decode_str(&field, body_len);
> +		*body_len = 0;
> +		return field;

4. You return len as 0, but the returned value != NULL? Why?

> +	}
> @@ -3568,6 +3697,16 @@ on_replace_dd_func(struct trigger * /* trigger */, void *event)
>  		struct func *func = func_new(def);
>  		if (func == NULL)
>  			return -1;
> +		if (def->aggregate == FUNC_AGGREGATE_GROUP) {
> +			struct func_def *fin_def =
> +				func_fin_def_new(new_tuple, def);
> +			if (fin_def == NULL)
> +				return -1;
> +			struct func *func_fin = func_new(fin_def);
> +			if (func_fin == NULL)
> +				return -1;
> +			func_fin_cache_insert(func_fin);

5. What happens if WAL write fails and on_create_func_rollback() is called?

> diff --git a/src/box/schema.h b/src/box/schema.h
> index d3bbdd590..2f020ee96 100644
> --- a/src/box/schema.h
> +++ b/src/box/schema.h
> @@ -102,6 +102,15 @@ func_by_id(uint32_t fid);
>  struct func *
>  func_by_name(const char *name, uint32_t name_len);
>  
> +void
> +func_fin_cache_insert(struct func *func);
> +
> +void
> +func_fin_cache_delete(uint32_t fid);
> +
> +struct func *
> +func_fin_by_id(uint32_t fid);

6. Lets leave some comments about what is 'func fin'.

> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
> index 2e33a31d2..c77cd1c9f 100644
> --- a/src/box/sql/select.c
> +++ b/src/box/sql/select.c
> @@ -5644,10 +5650,17 @@ updateAccumulator(Parse * pParse, AggInfo * pAggInfo)
>  			pParse->is_aborted = true;
>  			return;
>  		}
> -		sqlVdbeAddOp3(v, OP_AggStep, nArg, regAgg, pF->iMem);
> -		sqlVdbeAppendP4(v, ctx, P4_FUNCCTX);
> -		sql_expr_type_cache_change(pParse, regAgg, nArg);
> -		sqlReleaseTempRange(pParse, regAgg, nArg);
> +		if (pF->func->def->language == FUNC_LANGUAGE_SQL_BUILTIN) {
> +			sqlVdbeAddOp3(v, OP_AggStep, nArg, regAgg, pF->iMem);
> +			sqlVdbeAppendP4(v, ctx, P4_FUNCCTX);
> +		} else {
> +			sqlVdbeAddOp3(v, OP_UserAggStep, nArg, regAgg,
> +				      pF->iMem);
> +			const char *name = pF->func->def->name;
> +			int len = pF->func->def->name_len;
> +			char *str = sqlDbStrNDup(pParse->db, name, len);
> +			sqlVdbeAppendP4(v, str, P4_DYNAMIC);

7. Was it possible to unify how builtin and user-defined functions
are called? In the same opcode without any special 'if's.

> diff --git a/test/sql-tap/gh-2579-custom-aggregate.test.lua b/test/sql-tap/gh-2579-custom-aggregate.test.lua
> new file mode 100755
> index 000000000..d5b845761
> --- /dev/null
> +++ b/test/sql-tap/gh-2579-custom-aggregate.test.lua
8. It might also be good to have a test with step/finalize functions
throwing a Lua error from their body.

  reply	other threads:[~2022-01-18  0:10 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-14 13:38 Mergen Imeev via Tarantool-patches
2022-01-18  0:10 ` Vladislav Shpilevoy via Tarantool-patches [this message]
2022-01-23 14:17   ` Mergen Imeev via Tarantool-patches
2022-01-24 22:08     ` Vladislav Shpilevoy via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0875aeb4-255d-ce3a-7244-69193cf2f334@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=imeevma@tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v1 1/1] sql: introduce user-defined aggregate functions' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox