[Tarantool-patches] [PATCH v1 1/1] sql: define format of ephemeral spaces
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Sat Jul 24 02:06:30 MSK 2021
Hi! Thanks for the patch!
See 9 comments below.
> diff --git a/src/box/sql.c b/src/box/sql.c
> index 790ca7f70..f5d0d6ce6 100644
> --- a/src/box/sql.c
> +++ b/src/box/sql.c
> @@ -298,7 +298,8 @@ tarantoolsqlCount(struct BtCursor *pCur)
> }
>
> struct space *
> -sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
> +sql_ephemeral_space_create_key_info(uint32_t field_count,
> + struct sql_key_info *key_info)
1. The function creates not key_info. It creates a space by key_info.
Maybe call it sql_ephemeral_space_new_from_key_info()?
> {
> struct key_def *def = NULL;
> uint32_t part_count = field_count;
> @@ -407,6 +408,122 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
> return ephemer_new_space;
> }
>
> +enum {
> + /*
> + * Name of the fields will be "_COLUMN_1", "_COLUMN_2" and so on. Due to
> + * this, length of each name is no more than strlen("_COLUMN_") plus
> + * length of UINT32_MAX turned to string, which is 10 and plus 1 for \0.
> + */
> + NAME_LEN = 19,
> +};
> +
> +static struct space_def *
> +sql_space_def_new_ephemeral(uint32_t field_count, enum field_type *types,
> + uint32_t *coll_ids)
2. The last 2 arguments can be made const. The same in the other functions
below.
> +{
> + struct region *region = &fiber()->gc;
> + size_t svp = region_used(region);
> + uint32_t size = field_count * sizeof(struct field_def);
> + struct field_def *fields = region_aligned_alloc(region, size,
> + alignof(fields[0]));
3. There is region_alloc_array(). The same in the next function.
> + if (fields == NULL) {
> + diag_set(OutOfMemory, size, "region_aligned_alloc", "fields");
> + return NULL;
> + }
> + char *names = region_alloc(region, field_count * NAME_LEN);
> + if (names == NULL) {
> + diag_set(OutOfMemory, size, "region_alloc", "names");> + return NULL;
> + }
> + for (uint32_t i = 0; i < field_count; ++i) {
> + struct field_def *field = &fields[i];
> + field->name = &names[i * NAME_LEN];
> + sprintf(field->name, "_COLUMN_%d", i);
> + field->is_nullable = true;
> + field->nullable_action = ON_CONFLICT_ACTION_NONE;
> + field->default_value = NULL;
> + field->default_value_expr = NULL;
> + field->type = types[i];
> + field->coll_id = coll_ids[i];
> + }
> + struct space_def *def = space_def_new_ephemeral(field_count, fields);
> + region_truncate(region, svp);
> + return def;
> +}
> +
> +static struct index_def *
> +sql_index_def_new_ephemeral(uint32_t field_count, enum field_type *types,
> + uint32_t *coll_ids, bool is_pk_rowid)
> +{
> + uint32_t part_count = is_pk_rowid ? 1 : field_count;
> + struct region *region = &fiber()->gc;
> + size_t svp = region_used(region);
> + uint32_t size = part_count * sizeof(struct key_part_def);
> + struct key_part_def *parts = region_aligned_alloc(region, size,
> + alignof(parts[0]));
> + if (parts == NULL) {
> + diag_set(OutOfMemory, size, "region_aligned_alloc", "parts");
> + return NULL;
> + }
> + if (is_pk_rowid) {
> + uint32_t j = field_count - 1;
> + parts[0].fieldno = j;
> + parts[0].nullable_action = ON_CONFLICT_ACTION_NONE;
> + parts[0].is_nullable = true;
> + parts[0].exclude_null = false;
> + parts[0].sort_order = SORT_ORDER_ASC;
> + parts[0].path = NULL;
> + parts[0].type = types[j];
> + parts[0].coll_id = coll_ids[j];
> + } else {
> + for (uint32_t i = 0; i < part_count; ++i) {
> + struct key_part_def *part = &parts[i];
> + part->fieldno = i;
> + part->nullable_action = ON_CONFLICT_ACTION_NONE;
> + part->is_nullable = true;
> + part->exclude_null = false;
> + part->sort_order = SORT_ORDER_ASC;
> + part->path = NULL;
> + part->type = types[i];
> + part->coll_id = coll_ids[i];
> + }
> + }
> + struct key_def *key_def = key_def_new(parts, part_count, false);
> + if (key_def == NULL)
> + return NULL;
> + const char *name = "ephemer_idx";
> + struct index_def *def = index_def_new(0, 0, name, strlen(name), TREE,
> + &index_opts_default, key_def,
> + NULL);
> + key_def_delete(key_def);
> + region_truncate(region, svp);
> + return def;
> +}
> +
> +struct space *
> +sql_ephemeral_space_create(uint32_t field_count, enum field_type *types,
> + uint32_t *coll_ids, bool is_pk_rowid)
4. It is strange how there are functions
sql_space_def_new_ephemeral
sql_index_def_new_ephemeral
and at the same time these:
sql_ephemeral_space_create_key_info
sql_ephemeral_space_create
First ones use ephemeral in the end, the last ones in the beginning.
I would propose to unify them while you are here. One of the
options:
sql_ephemeral_space_def_new
sql_ephemeral_index_def_new
sql_ephemeral_space_new_from_key_info
sql_ephemeral_space_new
> diff --git a/src/box/sql/delete.c b/src/box/sql/delete.c
> index 62a726fdd..b35545296 100644
> --- a/src/box/sql/delete.c
> +++ b/src/box/sql/delete.c
> @@ -249,22 +254,20 @@ sql_table_delete_from(struct Parse *parse, struct SrcList *tab_list,
> * account that id field as well. That's why pk_len
> * has one field more than view format.
> */
> - pk_len = space->def->field_count + 1;
> - parse->nMem += pk_len;
> - sqlVdbeAddOp2(v, OP_OpenTEphemeral, reg_eph,
> - pk_len);
> + uint32_t count = space->def->field_count;
> + fields_info_from_space_def(info->types, info->coll_ids,
> + count, space->def);
5. In all usages of fields_info_from_space_def you pass 2 fields of
field_info. I propose to make it take field_info pointer instead
of these 2 arrays. Also for individual fields I propose to add
field_info_set_field(type, coll_id). So as not to miss anything. Currently
it looks too much boilerplate code.
> + info->types[count] = FIELD_TYPE_UNSIGNED;
> + info->coll_ids[count] = COLL_NONE;
> } else {
> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
> index b9107fccc..c59d7b4b4 100644
> --- a/src/box/sql/select.c
> +++ b/src/box/sql/select.c
> @@ -99,6 +99,103 @@ struct SortCtx {
> #define SORTFLAG_UseSorter 0x01 /* Use SorterOpen instead of OpenEphemeral */
> #define SORTFLAG_DESC 0xF0
>
> +static inline uint32_t
> +multi_select_coll_seq(struct Parse *parser, struct Select *p, int n);
> +
> +struct fields_info *
> +fields_info_new(uint32_t len)
> +{
> + uint32_t size_info = sizeof(struct fields_info);
> + uint32_t size_types = len * sizeof(enum field_type);
> + uint32_t size = size_info + size_types + len * (sizeof(uint32_t));
> + char *buf = sqlDbMallocRawNN(sql_get(), size);
> + struct fields_info *info = (struct fields_info *)buf;
> + if (info == NULL)
> + return NULL;
> + info->key_info = NULL;
> + buf += size_info;
> + info->types = (enum field_type *)buf;
> + buf += size_types;
> + info->coll_ids = (uint32_t *)buf;
> + return info;
> +}
> +
> +void
> +fields_info_delete(struct fields_info *info)
> +{
> + if (info->key_info != NULL)
> + sql_key_info_unref(info->key_info);
> + return sqlDbFree(sql_get(), info);
6. You do not need this explicit 'return'.
> +}
> +
> +void
> +fields_info_from_space_def(enum field_type *types, uint32_t *coll_ids,
> + uint32_t count, struct space_def *def)
7. def can be made const. The same for some other arguments below.
> +{
> + for (uint32_t i = 0; i < count; ++i) {
> + types[i] = def->fields[i].type;
> + coll_ids[i] = def->fields[i].coll_id;
> + }
> +}
> +
> +void
> +fields_info_from_index_def(enum field_type *types, uint32_t *coll_ids,
> + uint32_t count, struct index_def *def)
> +{
> + for (uint32_t i = 0; i < count; ++i) {
> + types[i] = def->key_def->parts[i].type;
> + coll_ids[i] = def->key_def->parts[i].coll_id;
> + }
> +}
> +
> +static int
> +fields_info_from_expr_list(enum field_type *types, uint32_t *coll_ids,
> + struct Parse *parser, struct ExprList *list)
> +{
> + for (int i = 0; i < list->nExpr; ++i) {
> + bool b;
> + struct coll *coll;
> + struct Expr *expr = list->a[i].pExpr;
> + types[i] = sql_expr_type(expr);
> + if (types[i] == FIELD_TYPE_ANY)
> + types[i] = FIELD_TYPE_SCALAR;
> + if (sql_expr_coll(parser, expr, &b, &coll_ids[i], &coll) != 0)
> + return -1;
> + }
> + return 0;
> +}
> +
> +static int
> +fields_info_from_order_by(enum field_type *types, uint32_t *coll_ids,
> + struct Parse *parser, struct Select *select,
> + struct ExprList *order_by)
> +{
> + for (int i = 0; i < order_by->nExpr; ++i) {
> + bool b;
> + struct Expr *expr = order_by->a[i].pExpr;
> + types[i] = sql_expr_type(expr);
> + if (types[i] == FIELD_TYPE_ANY)
> + types[i] = FIELD_TYPE_SCALAR;
> + uint32_t id = COLL_NONE;
> + if ((expr->flags & EP_Collate) != 0) {
> + struct coll *coll;
> + if (sql_expr_coll(parser, expr, &b, &id, &coll) != 0)
> + return -1;
> + } else {
> + uint32_t fieldno = order_by->a[i].u.x.iOrderByCol - 1;
> + id = multi_select_coll_seq(parser, select, fieldno);
> + if (id != COLL_NONE) {
> + const char *name = coll_by_id(id)->name;
> + order_by->a[i].pExpr =
> + sqlExprAddCollateString(parser, expr,
> + name);
> + }
> + }
> + coll_ids[i] = id;
> + }
> + return 0;
> +}
8. It sligtly cuts the ears that a struct called fields_info also
defines a key via key_info. It is like specifying indexes inside of
space format.
Maybe it is worth renaming it to ephemeral_space_def? Or space_info?
Or shorten ephemeral to ephmen in all the new code and use
ephem_space_def? Or sql_space_info? sql_ephemeral_space_def?
And make sql_ephemeral_space_create() take this struct instead of
2 arrays. Besides, you could hide sql_ephemeral_space_create_key_info()
inside of it because you could check key_info != null in
sql_ephemeral_space_create(). It just does not seem like vdbe business
to check these things.
> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
> index ef8dcd693..167e80057 100644
> --- a/src/box/sql/sqlInt.h
> +++ b/src/box/sql/sqlInt.h
> @@ -2248,6 +2248,11 @@ struct Parse {
> #define OPFLAG_SYSTEMSP 0x20 /* OP_Open**: set if space pointer
> * points to system space.
> */
> +/**
> + * If this flag is set, than in OP_OpenTEphemeral we should use rowid as the
9. than -> then.
More information about the Tarantool-patches
mailing list