[Tarantool-patches] [PATCH v1 1/1] sql: define format of ephemeral spaces

Thu Aug 12 21:32:38 MSK 2021

Hi! Thanks for the fixes!

See 7 comments below.

> diff --git a/src/box/sql.c b/src/box/sql.c
> index b87d236d1..d4b88d6e5 100644
> --- a/src/box/sql.c
> +++ b/src/box/sql.c

<...>

> +void
> +sql_space_info_from_space_def(struct space_info *info,
> +			      const struct space_def *def)
> +{
> +	assert(info->field_count == def->field_count + 1);
> +	assert(info->part_count == 0);
> +	for (uint32_t i = 0; i < def->field_count; ++i) {
> +		info->types[i] = def->fields[i].type;
> +		info->coll_ids[i] = def->fields[i].coll_id;
> +	}
> +	info->types[def->field_count] = FIELD_TYPE_INTEGER;
> +	info->coll_ids[def->field_count] = COLL_NONE;

1. Please, explain in a comment what is this last special field.

<...>

> +struct space *
> +sql_ephemeral_space_new_from_info(const struct space_info *info)
> +{
> +	uint32_t field_count = info->field_count;
> +	uint32_t part_count = info->parts == NULL ? field_count :
> +			      info->part_count;
> +	uint32_t parts_indent = field_count * sizeof(struct field_def);
> +	uint32_t names_indent = part_count * sizeof(struct key_part_def) +
> +				parts_indent;
> +	uint32_t size = names_indent + field_count * NAME_LEN;
> +
> +	struct region *region = &fiber()->gc;
> +	size_t svp = region_used(region);
>  	struct field_def *fields = region_aligned_alloc(region, size,
>  							alignof(fields[0]));
>  	if (fields == NULL) {
>  		diag_set(OutOfMemory, size, "region_aligned_alloc", "fields");
>  		return NULL;
>  	}
> -	struct key_part_def *ephemer_key_parts =
> -		(void *)fields + field_count * sizeof(struct field_def);
> -	static_assert(alignof(*fields) == alignof(*ephemer_key_parts),
> -		      "allocated in one block, and should have the same "
> -		      "alignment");
> -	char *names = (char *)ephemer_key_parts +
> -		       part_count * sizeof(struct key_part_def);
> -	for (uint32_t i = 0; i < field_count; ++i) {
> -		struct field_def *field = &fields[i];
> -		field->name = names;
> -		names += name_len;
> -		sprintf(field->name, "_COLUMN_%d", i);
> -		field->is_nullable = true;
> -		field->nullable_action = ON_CONFLICT_ACTION_NONE;
> -		field->default_value = NULL;
> -		field->default_value_expr = NULL;
> -		if (def != NULL && i < def->part_count) {
> -			assert(def->parts[i].type < field_type_MAX);
> -			field->type = def->parts[i].type;
> -			field->coll_id = def->parts[i].coll_id;
> -		} else {
> -			field->coll_id = COLL_NONE;
> -			field->type = FIELD_TYPE_SCALAR;
> -		}
> +	struct key_part_def *parts = (void *)fields + parts_indent;

2. Are you sure void * + integer works bytewise? Shouldn't it be
cast to char * to move the pointer? In other places you use char *.

> +	static_assert(alignof(*fields) == alignof(*parts), "allocated in one "
> +		      "block, and should have the same alignment");
> +	char *names = (char *)fields + names_indent;
> +
> +	for (uint32_t i = 0; i < info->field_count; ++i) {
> +		fields[i].name = &names[i * NAME_LEN];
> +		sprintf(fields[i].name, "_COLUMN_%d", i);
> +		fields[i].is_nullable = true;
> +		fields[i].nullable_action = ON_CONFLICT_ACTION_NONE;
> +		fields[i].default_value = NULL;
> +		fields[i].default_value_expr = NULL;
> +		fields[i].type = info->types[i];
> +		fields[i].coll_id = info->coll_ids[i];
> +	}
> +	if (info->parts == NULL) {
> +		for (uint32_t i = 0; i < part_count; ++i)
> +			parts[i].fieldno = i;
> +	} else {
> +		for (uint32_t i = 0; i < part_count; ++i)
> +			parts[i].fieldno = info->parts[i];
>  	}

3. It looks not so good for the cache that you walk the parts
array more than once.

> -
>  	for (uint32_t i = 0; i < part_count; ++i) {
> -		struct key_part_def *part = &ephemer_key_parts[i];
> -		/*
> -		 * In case we need to save initial order of
> -		 * inserted into ephemeral space rows we use rowid
> -		 * as the only part of PK. If ephemeral space has
> -		 * a rowid, it is always the last column.
> -		 */
> -		uint32_t j = i;
> -		if (key_info != NULL && key_info->is_pk_rowid)
> -			j = field_count - 1;
> -		part->fieldno = j;
> -		part->nullable_action = ON_CONFLICT_ACTION_NONE;
> -		part->is_nullable = true;
> -		part->exclude_null = false;
> -		part->sort_order = SORT_ORDER_ASC;
> -		part->path = NULL;
> -		part->type = fields[j].type;
> -		part->coll_id = fields[j].coll_id;
> +		parts[i].nullable_action = ON_CONFLICT_ACTION_NONE;
> +		parts[i].is_nullable = true;
> +		parts[i].exclude_null = false;
> +		parts[i].sort_order = info->sort_order;
> +		parts[i].path = NULL;
> +		parts[i].type = info->types[parts[i].fieldno];
> +		parts[i].coll_id = info->coll_ids[parts[i].fieldno];
>  	}
> diff --git a/src/box/sql/delete.c b/src/box/sql/delete.c
> index 5226dd6ea..9d5a63fc5 100644
> --- a/src/box/sql/delete.c
> +++ b/src/box/sql/delete.c
> @@ -224,12 +224,20 @@ sql_table_delete_from(struct Parse *parse, struct SrcList *tab_list,
>  		 * is held in ephemeral table, there is no PK for
>  		 * it, so columns should be loaded manually.
>  		 */
> -		struct sql_key_info *pk_info = NULL;
>  		int reg_eph = ++parse->nMem;
>  		int reg_pk = parse->nMem + 1;
> -		int pk_len;
> +		int pk_len = is_view ? space->def->field_count + 1 :
> +			     space->index[0]->def->key_def->part_count;
>  		int eph_cursor = parse->nTab++;
>  		int addr_eph_open = sqlVdbeCurrentAddr(v);
> +		struct space_info *info = sql_space_info_new(pk_len, 0);

4. It is hard to understand when you calculate the space info field
count, create the info, and later sql_space_info_from_space_def() assumes
internally the field count you calculated manually. But I can't propose
anything better now.

> diff --git a/src/box/sql/insert.c b/src/box/sql/insert.c
> index 21b4f2407..99b5ceee6 100644
> --- a/src/box/sql/insert.c
> +++ b/src/box/sql/insert.c
> @@ -442,34 +442,44 @@ sqlInsert(Parse * pParse,	/* Parser context */
>  			reg_eph = ++pParse->nMem;
>  			regRec = sqlGetTempReg(pParse);
>  			regCopy = sqlGetTempRange(pParse, nColumn + 1);
> -			sqlVdbeAddOp2(v, OP_OpenTEphemeral, reg_eph,
> -					  nColumn + 1);
>  			/*
> -			 * This key_info is used to show that
> -			 * rowid should be the first part of PK in
> -			 * case we used AUTOINCREMENT feature.
> -			 * This way we will save initial order of
> -			 * the inserted values. The order is
> -			 * important if we use the AUTOINCREMENT
> -			 * feature, since changing the order can
> -			 * change the number inserted instead of
> -			 * NULL.
> +			 * Order of inserted values is important since it is
> +			 * possible, that NULL will be inserted in field with
> +			 * AUTOINCREMENT. So, the first part of key should be
> +			 * rowid. Since each rowid is unique, we do not need any
> +			 * other parts.
>  			 */
> -			if (space->sequence != NULL) {
> -				struct sql_key_info *key_info =
> -					sql_key_info_new(pParse->db,
> -							 nColumn + 1);
> -				key_info->parts[nColumn].type =
> -					FIELD_TYPE_UNSIGNED;
> -				key_info->is_pk_rowid = true;
> -				sqlVdbeChangeP4(v, -1, (void *)key_info,
> -					        P4_KEYINFO);
> +			struct space_info *info =
> +				sql_space_info_new(nColumn + 1, 1);
> +			if (info == NULL) {
> +				pParse->is_aborted = true;
> +				goto insert_cleanup;
>  			}
> +			struct field_def *fields = space_def->fields;
> +			if (pColumn != NULL) {
> +				for (int i = 0; i < nColumn; ++i) {
> +					int j = pColumn->a[i].idx;
> +					info->types[i] =  fields[j].type;
> +					info->coll_ids[i] =  fields[j].coll_id;
> +				}
> +			} else {
> +				for (int i = 0; i < nColumn; ++i) {
> +					info->types[i] =  fields[i].type;
> +					info->coll_ids[i] =  fields[i].coll_id;
> +				}
> +			}
> +			info->types[nColumn] = FIELD_TYPE_INTEGER;
> +			info->parts[0] = nColumn;

5. Why is sql_space_info_from_space_def() extracted but this is not?
It looks quite internal to space_info, since it changes its fields in
some intricate way.

The same in the other places where info is changed. For
instance, in @@ -5879,6 +6027,39 @@ sqlSelect.

> +
> +			sqlVdbeAddOp4(v, OP_OpenTEphemeral, reg_eph,
> +				      nColumn + 1, 0, (char *)info, P4_DYNAMIC);
>  			addrL = sqlVdbeAddOp1(v, OP_Yield, dest.iSDParm);
>  			VdbeCoverage(v);
>  			sqlVdbeAddOp2(v, OP_NextIdEphemeral, reg_eph,
>  					  regCopy + nColumn);
>  			sqlVdbeAddOp3(v, OP_Copy, regFromSelect, regCopy, nColumn-1);
> +			sqlVdbeAddOp4(v, OP_ApplyType, regCopy, nColumn + 1, 0,
> +				      (char *)info->types, P4_STATIC);
>  			sqlVdbeAddOp3(v, OP_MakeRecord, regCopy,
>  					  nColumn + 1, regRec);
>  			/* Set flag to save memory allocating one by malloc. */
> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
> index b9107fccc..a26ca54a3 100644
> --- a/src/box/sql/select.c
> +++ b/src/box/sql/select.c
> @@ -1044,13 +1132,28 @@ selectInnerLoop(Parse * pParse,		/* The parser context */
>  			 * space format.
>  			 */
>  			uint32_t excess_field_count = 0;
> +			struct VdbeOp *op = sqlVdbeGetOp(v,
> +							 pSort->addrSortIndex);
> +			struct space_info *info;
> +			if (op->p4type == P4_KEYINFO)
> +				info = op->p4.key_info->info;
> +			else
> +				info = op->p4.space_info;
>  			for (i = pSort->nOBSat; i < pSort->pOrderBy->nExpr;
>  			     i++) {
>  				int j = pSort->pOrderBy->a[i].u.x.iOrderByCol;
> -				if (j > 0) {
> -					excess_field_count++;
> -					pEList->a[j - 1].u.x.iOrderByCol =
> -						(u16) (i + 1 - pSort->nOBSat);

6. No need for a whitespace after the cast. Also it could be
turned to uint16_t since you changed the line anyway.

> +				if (j == 0)
> +					continue;
> +				assert(j > 0);
> +				excess_field_count++;
> +				pEList->a[j - 1].u.x.iOrderByCol =
> +					(u16) (i + 1 - pSort->nOBSat);
> +				--info->field_count;
> +				for (int k = j; k < pEList->nExpr; ++k) {
> +					int n = k + pSort->pOrderBy->nExpr + 1;
> +					info->types[n - 1] = info->types[n];
> +					info->coll_ids[n - 1] =
> +						info->coll_ids[n];
>  				}
>  			}
> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
> index 1f87e6823..60fa1678d 100644
> --- a/src/box/sql/sqlInt.h
> +++ b/src/box/sql/sqlInt.h
> @@ -4006,6 +4006,8 @@ sql_analysis_load(struct sql *db);
>   */
>  struct sql_key_info {
>  	sql *db;
> +	/* Informations about all field types and key parts. */

7. For out of comments we use /** opening usually.