[Tarantool-patches] [PATCH v1 1/1] sql: define format of ephemeral spaces

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sat Jul 24 02:06:30 MSK 2021


Hi! Thanks for the patch!

See 9 comments below.

> diff --git a/src/box/sql.c b/src/box/sql.c
> index 790ca7f70..f5d0d6ce6 100644
> --- a/src/box/sql.c
> +++ b/src/box/sql.c
> @@ -298,7 +298,8 @@ tarantoolsqlCount(struct BtCursor *pCur)
>  }
>  
>  struct space *
> -sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
> +sql_ephemeral_space_create_key_info(uint32_t field_count,
> +				    struct sql_key_info *key_info)

1. The function creates not key_info. It creates a space by key_info.
Maybe call it sql_ephemeral_space_new_from_key_info()?

>  {
>  	struct key_def *def = NULL;
>  	uint32_t part_count = field_count;
> @@ -407,6 +408,122 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
>  	return ephemer_new_space;
>  }
>  
> +enum {
> +	/*
> +	 * Name of the fields will be "_COLUMN_1", "_COLUMN_2" and so on. Due to
> +	 * this, length of each name is no more than strlen("_COLUMN_") plus
> +	 * length of UINT32_MAX turned to string, which is 10 and plus 1 for \0.
> +	 */
> +	NAME_LEN = 19,
> +};
> +
> +static struct space_def *
> +sql_space_def_new_ephemeral(uint32_t field_count, enum field_type *types,
> +			    uint32_t *coll_ids)

2. The last 2 arguments can be made const. The same in the other functions
below.

> +{
> +	struct region *region = &fiber()->gc;
> +	size_t svp = region_used(region);
> +	uint32_t size = field_count * sizeof(struct field_def);
> +	struct field_def *fields = region_aligned_alloc(region, size,
> +							alignof(fields[0]));

3. There is region_alloc_array(). The same in the next function.

> +	if (fields == NULL) {
> +		diag_set(OutOfMemory, size, "region_aligned_alloc", "fields");
> +		return NULL;
> +	}
> +	char *names = region_alloc(region, field_count * NAME_LEN);
> +	if (names == NULL) {
> +		diag_set(OutOfMemory, size, "region_alloc", "names");> +		return NULL;
> +	}
> +	for (uint32_t i = 0; i < field_count; ++i) {
> +		struct field_def *field = &fields[i];
> +		field->name = &names[i * NAME_LEN];
> +		sprintf(field->name, "_COLUMN_%d", i);
> +		field->is_nullable = true;
> +		field->nullable_action = ON_CONFLICT_ACTION_NONE;
> +		field->default_value = NULL;
> +		field->default_value_expr = NULL;
> +		field->type = types[i];
> +		field->coll_id = coll_ids[i];
> +	}
> +	struct space_def *def = space_def_new_ephemeral(field_count, fields);
> +	region_truncate(region, svp);
> +	return def;
> +}
> +
> +static struct index_def *
> +sql_index_def_new_ephemeral(uint32_t field_count, enum field_type *types,
> +			    uint32_t *coll_ids, bool is_pk_rowid)
> +{
> +	uint32_t part_count = is_pk_rowid ? 1 : field_count;
> +	struct region *region = &fiber()->gc;
> +	size_t svp = region_used(region);
> +	uint32_t size = part_count * sizeof(struct key_part_def);
> +	struct key_part_def *parts = region_aligned_alloc(region, size,
> +							  alignof(parts[0]));
> +	if (parts == NULL) {
> +		diag_set(OutOfMemory, size, "region_aligned_alloc", "parts");
> +		return NULL;
> +	}
> +	if (is_pk_rowid) {
> +		uint32_t j = field_count - 1;
> +		parts[0].fieldno = j;
> +		parts[0].nullable_action = ON_CONFLICT_ACTION_NONE;
> +		parts[0].is_nullable = true;
> +		parts[0].exclude_null = false;
> +		parts[0].sort_order = SORT_ORDER_ASC;
> +		parts[0].path = NULL;
> +		parts[0].type = types[j];
> +		parts[0].coll_id = coll_ids[j];
> +	} else {
> +		for (uint32_t i = 0; i < part_count; ++i) {
> +			struct key_part_def *part = &parts[i];
> +			part->fieldno = i;
> +			part->nullable_action = ON_CONFLICT_ACTION_NONE;
> +			part->is_nullable = true;
> +			part->exclude_null = false;
> +			part->sort_order = SORT_ORDER_ASC;
> +			part->path = NULL;
> +			part->type = types[i];
> +			part->coll_id = coll_ids[i];
> +		}
> +	}
> +	struct key_def *key_def = key_def_new(parts, part_count, false);
> +	if (key_def == NULL)
> +		return NULL;
> +	const char *name = "ephemer_idx";
> +	struct index_def *def = index_def_new(0, 0, name, strlen(name), TREE,
> +					      &index_opts_default, key_def,
> +					      NULL);
> +	key_def_delete(key_def);
> +	region_truncate(region, svp);
> +	return def;
> +}
> +
> +struct space *
> +sql_ephemeral_space_create(uint32_t field_count, enum field_type *types,
> +			   uint32_t *coll_ids, bool is_pk_rowid)

4. It is strange how there are functions

	sql_space_def_new_ephemeral
	sql_index_def_new_ephemeral

and at the same time these:

	sql_ephemeral_space_create_key_info
	sql_ephemeral_space_create

First ones use ephemeral in the end, the last ones in the beginning.
I would propose to unify them while you are here. One of the
options:

	sql_ephemeral_space_def_new
	sql_ephemeral_index_def_new
	sql_ephemeral_space_new_from_key_info
	sql_ephemeral_space_new

> diff --git a/src/box/sql/delete.c b/src/box/sql/delete.c
> index 62a726fdd..b35545296 100644
> --- a/src/box/sql/delete.c
> +++ b/src/box/sql/delete.c
> @@ -249,22 +254,20 @@ sql_table_delete_from(struct Parse *parse, struct SrcList *tab_list,
>  			 * account that id field as well. That's why pk_len
>  			 * has one field more than view format.
>  			 */
> -			pk_len = space->def->field_count + 1;
> -			parse->nMem += pk_len;
> -			sqlVdbeAddOp2(v, OP_OpenTEphemeral, reg_eph,
> -					  pk_len);
> +			uint32_t count = space->def->field_count;
> +			fields_info_from_space_def(info->types, info->coll_ids,
> +						   count, space->def);

5. In all usages of fields_info_from_space_def you pass 2 fields of
field_info. I propose to make it take field_info pointer instead
of these 2 arrays. Also for individual fields I propose to add
field_info_set_field(type, coll_id). So as not to miss anything. Currently
it looks too much boilerplate code.

> +			info->types[count] = FIELD_TYPE_UNSIGNED;
> +			info->coll_ids[count] = COLL_NONE;
>  		} else {
> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
> index b9107fccc..c59d7b4b4 100644
> --- a/src/box/sql/select.c
> +++ b/src/box/sql/select.c
> @@ -99,6 +99,103 @@ struct SortCtx {
>  #define SORTFLAG_UseSorter  0x01	/* Use SorterOpen instead of OpenEphemeral */
>  #define SORTFLAG_DESC 0xF0
>  
> +static inline uint32_t
> +multi_select_coll_seq(struct Parse *parser, struct Select *p, int n);
> +
> +struct fields_info *
> +fields_info_new(uint32_t len)
> +{
> +	uint32_t size_info = sizeof(struct fields_info);
> +	uint32_t size_types = len * sizeof(enum field_type);
> +	uint32_t size = size_info + size_types + len * (sizeof(uint32_t));
> +	char *buf = sqlDbMallocRawNN(sql_get(), size);
> +	struct fields_info *info = (struct fields_info *)buf;
> +	if (info == NULL)
> +		return NULL;
> +	info->key_info = NULL;
> +	buf += size_info;
> +	info->types = (enum field_type *)buf;
> +	buf += size_types;
> +	info->coll_ids = (uint32_t *)buf;
> +	return info;
> +}
> +
> +void
> +fields_info_delete(struct fields_info *info)
> +{
> +	if (info->key_info != NULL)
> +		sql_key_info_unref(info->key_info);
> +	return sqlDbFree(sql_get(), info);

6. You do not need this explicit 'return'.

> +}
> +
> +void
> +fields_info_from_space_def(enum field_type *types, uint32_t *coll_ids,
> +			   uint32_t count, struct space_def *def)

7. def can be made const. The same for some other arguments below.

> +{
> +	for (uint32_t i = 0; i < count; ++i) {
> +		types[i] = def->fields[i].type;
> +		coll_ids[i] = def->fields[i].coll_id;
> +	}
> +}
> +
> +void
> +fields_info_from_index_def(enum field_type *types, uint32_t *coll_ids,
> +			   uint32_t count, struct index_def *def)
> +{
> +	for (uint32_t i = 0; i < count; ++i) {
> +		types[i] = def->key_def->parts[i].type;
> +		coll_ids[i] = def->key_def->parts[i].coll_id;
> +	}
> +}
> +
> +static int
> +fields_info_from_expr_list(enum field_type *types, uint32_t *coll_ids,
> +			   struct Parse *parser, struct ExprList *list)
> +{
> +	for (int i = 0; i < list->nExpr; ++i) {
> +		bool b;
> +		struct coll *coll;
> +		struct Expr *expr = list->a[i].pExpr;
> +		types[i] = sql_expr_type(expr);
> +		if (types[i] == FIELD_TYPE_ANY)
> +			types[i] = FIELD_TYPE_SCALAR;
> +		if (sql_expr_coll(parser, expr, &b, &coll_ids[i], &coll) != 0)
> +			return -1;
> +	}
> +	return 0;
> +}
> +
> +static int
> +fields_info_from_order_by(enum field_type *types, uint32_t *coll_ids,
> +			  struct Parse *parser, struct Select *select,
> +			  struct ExprList *order_by)
> +{
> +	for (int i = 0; i < order_by->nExpr; ++i) {
> +		bool b;
> +		struct Expr *expr = order_by->a[i].pExpr;
> +		types[i] = sql_expr_type(expr);
> +		if (types[i] == FIELD_TYPE_ANY)
> +			types[i] = FIELD_TYPE_SCALAR;
> +		uint32_t id = COLL_NONE;
> +		if ((expr->flags & EP_Collate) != 0) {
> +			struct coll *coll;
> +			if (sql_expr_coll(parser, expr, &b, &id, &coll) != 0)
> +				return -1;
> +		} else {
> +			uint32_t fieldno = order_by->a[i].u.x.iOrderByCol - 1;
> +			id = multi_select_coll_seq(parser, select, fieldno);
> +			if (id != COLL_NONE) {
> +				const char *name = coll_by_id(id)->name;
> +				order_by->a[i].pExpr =
> +					sqlExprAddCollateString(parser, expr,
> +								name);
> +			}
> +		}
> +		coll_ids[i] = id;
> +	}
> +	return 0;
> +}

8. It sligtly cuts the ears that a struct called fields_info also
defines a key via key_info. It is like specifying indexes inside of
space format.

Maybe it is worth renaming it to ephemeral_space_def? Or space_info?
Or shorten ephemeral to ephmen in all the new code and use
ephem_space_def? Or sql_space_info? sql_ephemeral_space_def?

And make sql_ephemeral_space_create() take this struct instead of
2 arrays. Besides, you could hide sql_ephemeral_space_create_key_info()
inside of it because you could check key_info != null in
sql_ephemeral_space_create(). It just does not seem like vdbe business
to check these things.

> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
> index ef8dcd693..167e80057 100644
> --- a/src/box/sql/sqlInt.h
> +++ b/src/box/sql/sqlInt.h
> @@ -2248,6 +2248,11 @@ struct Parse {
>  #define OPFLAG_SYSTEMSP      0x20	/* OP_Open**: set if space pointer
>  					 * points to system space.
>  					 */
> +/**
> + * If this flag is set, than in OP_OpenTEphemeral we should use rowid as the

9. than -> then.


More information about the Tarantool-patches mailing list