[Tarantool-patches] [PATCH v3 4/4] sql: support column addition
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Thu Aug 20 01:20:30 MSK 2020
Thanks for the patch!
See 20 comments below.
> diff --git a/src/box/sql/build.c b/src/box/sql/build.c
> index 9013bc86f..6f3d2747d 100644
> --- a/src/box/sql/build.c
> +++ b/src/box/sql/build.c
> @@ -285,48 +285,113 @@ sql_field_retrieve(Parse *parser, struct space_def *space_def, uint32_t id)
> return field;
> }
>
> -/*
> - * Add a new column to the table currently being constructed.
> +/**
> + * Make shallow copy of @a space on region.
> *
> - * The parser calls this routine once for each column declaration
> - * in a CREATE TABLE statement. sqlStartTable() gets called
> - * first to get things going. Then this routine is called for each
> - * column.
> + * Function is used to add a new column to an existing space with
> + * <ALTER TABLE ADD COLUMN> statement. Copy space def and index
> + * array to create constraints appeared in the statement. The
> + * index array copy will be modified by adding new elements to it.
> + * It is necessary, because the statement may contain several
> + * index definitions (constraints).
> */
> +static struct space *
> +sql_shallow_space_copy(struct Parse *parse, struct space *space)
> +{
> + assert(space->def != NULL);
> + struct space *ret = sql_ephemeral_space_new(parse, space->def->name);
> + if (ret == NULL)
> + return NULL;
> + ret->index_count = space->index_count;
> + ret->index_id_max = space->index_id_max;
> + uint32_t indexes_sz = sizeof(struct index *) * (ret->index_count);
> + ret->index = (struct index **) malloc(indexes_sz);
1. Why can't you use parser's region?
> + if (ret->index == NULL) {
> + diag_set(OutOfMemory, indexes_sz, "realloc", "ret->index");
2. It is not realloc, it is malloc.
3. Seems you need to set parser->is_aborted = true in case of an error. As
far as I understand, it is a contract of each function taking Parse
argument.
> + return NULL;
> + }
> + memcpy(ret->index, space->index, indexes_sz);
> + memcpy(ret->def, space->def, sizeof(struct space_def));
> + ret->def->opts.is_temporary = true;
> + ret->def->opts.is_ephemeral = true;
> + if (ret->def->field_count != 0) {
> + uint32_t fields_size = 0;
> + ret->def->fields =
> + region_alloc_array(&parse->region,
> + typeof(struct field_def),
> + ret->def->field_count, &fields_size);
> + if (ret->def->fields == NULL) {
> + diag_set(OutOfMemory, fields_size, "region_alloc",
> + "ret->def->fields");
> + free(ret->index);
> + return NULL;
> + }
> + memcpy(ret->def->fields, space->def->fields, fields_size);
> + }
> +
> + return ret;
> +}
> @@ -334,18 +399,86 @@ sqlAddColumn(Parse * pParse, Token * pName, struct type_def *type_def)
> +void
> +sql_create_column_end(struct Parse *parse)
> +{
> + struct space *space = parse->create_column_def.space;
> + assert(space != NULL);
> + struct space_def *def = space->def;
> + struct field_def *field = &def->fields[def->field_count - 1];
> + if (field->nullable_action == ON_CONFLICT_ACTION_DEFAULT) {
> + field->nullable_action = ON_CONFLICT_ACTION_NONE;
> + field->is_nullable = true;
> + }
> + /*
> + * Encode the format array and emit code to update _space.
> + */
> + uint32_t table_stmt_sz = 0;
> + struct region *region = &parse->region;
> + char *table_stmt = sql_encode_table(region, def, &table_stmt_sz);
> + char *raw = sqlDbMallocRaw(parse->db, table_stmt_sz);
> + if (table_stmt == NULL || raw == NULL) {
> + parse->is_aborted = true;
> + return;
> + }
> + memcpy(raw, table_stmt, table_stmt_sz);
> +
> + struct Vdbe *v = sqlGetVdbe(parse);
> + assert(v != NULL);
> +
> + struct space *system_space = space_by_id(BOX_SPACE_ID);
> + assert(system_space != NULL);
> + int cursor = parse->nTab++;
> + vdbe_emit_open_cursor(parse, cursor, 0, system_space);
> + sqlVdbeChangeP5(v, OPFLAG_SYSTEMSP);
> +
> + int key_reg = ++parse->nMem;
> + sqlVdbeAddOp2(v, OP_Integer, def->id, key_reg);
> + int addr = sqlVdbeAddOp4Int(v, OP_Found, cursor, 0, key_reg, 1);
> + sqlVdbeAddOp2(v, OP_Halt, -1, ON_CONFLICT_ACTION_ABORT);
> + sqlVdbeJumpHere(v, addr);
> +
> + int tuple_reg = sqlGetTempRange(parse, box_space_field_MAX + 1);
4. You need to call sqlReleaseTempRange() somewhere.
> + for (int i = 0; i < box_space_field_MAX - 1; ++i)
> + sqlVdbeAddOp3(v, OP_Column, cursor, i, tuple_reg + i);
> + sqlVdbeAddOp1(v, OP_Close, cursor);
> +
> + sqlVdbeAddOp2(v, OP_Integer, def->field_count, tuple_reg + 4);
> + sqlVdbeAddOp4(v, OP_Blob, table_stmt_sz, tuple_reg + 6,
> + SQL_SUBTYPE_MSGPACK, raw, P4_DYNAMIC);
> + sqlVdbeAddOp3(v, OP_MakeRecord, tuple_reg, box_space_field_MAX,
> + tuple_reg + box_space_field_MAX);
> + sqlVdbeAddOp4(v, OP_IdxReplace, tuple_reg + box_space_field_MAX, 0, 0,
> + (char *) system_space, P4_SPACEPTR);
> + sql_vdbe_create_constraints(parse, key_reg);
> +
> + /*
> + * Clean up array allocated in sql_shallow_space_copy().
> + */
> + free(space->index);
5. It may happen that sql_create_column_end() is never called. For
example, if an error is encountered somewhere inside column definition,
after sql_create_column_start() is called.
> }
>
> void
> sql_column_add_nullable_action(struct Parse *parser,
> enum on_conflict_action nullable_action)
> {
> - struct space *space = parser->create_table_def.new_space;
> - if (space == NULL || NEVER(space->def->field_count < 1))
> + assert(parser->create_column_def.space != NULL);
> + struct space_def *def = parser->create_column_def.space->def;
> + if (NEVER(def->field_count < 1))
> return;
> - struct space_def *def = space->def;
> struct field_def *field = &def->fields[def->field_count - 1];
> if (field->nullable_action != ON_CONFLICT_ACTION_DEFAULT &&
> nullable_action != field->nullable_action) {
> @@ -364,51 +497,42 @@ sql_column_add_nullable_action(struct Parse *parser,
> }
>
> /*
> - * The expression is the default value for the most recently added column
> - * of the table currently under construction.
> + * The expression is the default value for the most recently added
> + * column.
> *
> * Default value expressions must be constant. Raise an exception if this
> * is not the case.
> *
> * This routine is called by the parser while in the middle of
> - * parsing a CREATE TABLE statement.
> + * parsing a <CREATE TABLE> or a <ALTER TABLE ADD COLUMN>
> + * statement.
> */
> void
> sqlAddDefaultValue(Parse * pParse, ExprSpan * pSpan)
> {
> sql *db = pParse->db;
> - struct space *p = pParse->create_table_def.new_space;
> - if (p != NULL) {
> - assert(p->def->opts.is_ephemeral);
> - struct space_def *def = p->def;
> - if (!sqlExprIsConstantOrFunction
> - (pSpan->pExpr, db->init.busy)) {
> - const char *column_name =
> - def->fields[def->field_count - 1].name;
> - diag_set(ClientError, ER_CREATE_SPACE, def->name,
> - tt_sprintf("default value of column '%s' is "\
> - "not constant", column_name));
> + assert(pParse->create_column_def.space != NULL);
> + struct space_def *def = pParse->create_column_def.space->def;
> + struct field_def *field = &def->fields[def->field_count - 1];
> + if (!sqlExprIsConstantOrFunction(pSpan->pExpr, db->init.busy)) {
> + diag_set(ClientError, ER_CREATE_SPACE, def->name,
> + tt_sprintf("default value of column '%s' is not "
> + "constant", field->name));
> + pParse->is_aborted = true;
> + } else {
> + struct region *region = &pParse->region;
> + uint32_t default_length = (int)(pSpan->zEnd - pSpan->zStart);
> + field->default_value = region_alloc(region, default_length + 1);
> + if (field->default_value == NULL) {
> + diag_set(OutOfMemory, default_length + 1,
> + "region_alloc", "field->default_value");
> pParse->is_aborted = true;
> - } else {
> - assert(def != NULL);
> - struct field_def *field =
> - &def->fields[def->field_count - 1];
> - struct region *region = &pParse->region;
> - uint32_t default_length = (int)(pSpan->zEnd - pSpan->zStart);
> - field->default_value = region_alloc(region,
> - default_length + 1);
> - if (field->default_value == NULL) {
> - diag_set(OutOfMemory, default_length + 1,
> - "region_alloc",
> - "field->default_value");
> - pParse->is_aborted = true;
> - return;
> - }
> - strncpy(field->default_value, pSpan->zStart,
> - default_length);
> - field->default_value[default_length] = '\0';
> + goto add_default_value_exit;
> }
> + strncpy(field->default_value, pSpan->zStart, default_length);
> + field->default_value[default_length] = '\0';
> }
> +add_default_value_exit:
> sql_expr_delete(db, pSpan->pExpr, false);
6. Was it necessary to make so many changes? Wouldn't it work if you
would just replace
struct space *p = pParse->create_table_def.new_space;
with
struct space *p = pParse->create_column_def.space;
?
> }
>
> @@ -574,8 +700,10 @@ sql_create_check_contraint(struct Parse *parser)
> (struct alter_entity_def *) create_ck_def;
> assert(alter_def->entity_type == ENTITY_TYPE_CK);
> (void) alter_def;
> - struct space *space = parser->create_table_def.new_space;
> - bool is_alter = space == NULL;
> + struct space *space = parser->create_column_def.space;
> + if (space == NULL)
> + space = parser->create_table_def.new_space;
7. Why in some places you check create_table_def.new_space != NULL
first, and in some places create_column_def.space != NULL first? Is
the order important somehow?
> + bool is_alter_add_constr = space == NULL;
> /* Prepare payload for ck constraint definition. */
> struct region *region = &parser->region;> @@ -704,8 +845,7 @@ sql_column_collation(struct space_def *def, uint32_t column, uint32_t *coll_id)
> *
> * In cases mentioned above collation is fetched by id.
> */
> - if (space == NULL) {
> - assert(def->opts.is_ephemeral);
> + if (def->opts.is_ephemeral) {
8. space_by_id() above is not needed, if the definition is ephemeral now. It
can be moved below this condition so as not to call it when the result is
not going to be used anyway.
> assert(column < (uint32_t)def->field_count);
> *coll_id = def->fields[column].coll_id;
> struct coll_id *collation = coll_by_id(*coll_id);
> @@ -1148,15 +1292,21 @@ resolve_link(struct Parse *parse_context, const struct space_def *def,
>
> /**
> * Emit code to create sequences, indexes, check and foreign key
> - * constraints appeared in <CREATE TABLE>.
> + * constraints appeared in <CREATE TABLE> or
> + * <ALTER TABLE ADD COLUMN>.
> */
> static void
> sql_vdbe_create_constraints(struct Parse *parse, int reg_space_id)
> {
> assert(reg_space_id != 0);
> struct space *space = parse->create_table_def.new_space;
> - assert(space != NULL);
> + bool is_alter = space == NULL;
> uint32_t i = 0;
> + if (is_alter) {
> + space = parse->create_column_def.space;
> + i = space_by_name(space->def->name)->index_count;
9. Why do you need the original space for that? sql_shallow_space_copy()
copies index_count as well.
> + }
> + assert(space != NULL);
> for (; i < space->index_count; ++i) {
> struct index *idx = space->index[i];
> vdbe_emit_create_index(parse, space->def, idx->def,
> @@ -1908,6 +2077,8 @@ sql_create_foreign_key(struct Parse *parse_context)
> goto tnt_error;
> }
> memset(fk_parse, 0, sizeof(*fk_parse));
> + if (parse_context->create_column_def.space != NULL)
> + child_space = space;
10. Why?
> rlist_add_entry(&parse_context->fkeys, fk_parse, link);
> }
> struct Token *parent = create_fk_def->parent_name;
> @@ -1920,28 +2091,45 @@ sql_create_foreign_key(struct Parse *parse_context)
> struct space *parent_space = space_by_name(parent_name);
> - if (parent_space == NULL) {
> - if (is_self_referenced) {
> - struct fk_constraint_parse *fk =
> - rlist_first_entry(&parse_context->fkeys,
> - struct fk_constraint_parse,
> - link);
> - fk->selfref_cols = parent_cols;
> - fk->is_self_referenced = true;
> - } else {
> - diag_set(ClientError, ER_NO_SUCH_SPACE, parent_name);;
> - goto tnt_error;
> - }
> + if (parent_space == NULL && !is_self_referenced) {
> + diag_set(ClientError, ER_NO_SUCH_SPACE, parent_name);
> + goto tnt_error;
> + }
> + if (is_self_referenced) {
> + struct fk_constraint_parse *fk =
> + rlist_first_entry(&parse_context->fkeys,
> + struct fk_constraint_parse,
> + link);
> + fk->selfref_cols = parent_cols;
> + fk->is_self_referenced = true;
> }
^^^
11. This refactoring seems unnecessary. What changed here functionally?
> - if (!is_alter) {
> + if (!is_alter_add_constr) {
> if (create_def->name.n == 0) {
> - constraint_name =
> - sqlMPrintf(db, "fk_unnamed_%s_%d",
> - space->def->name,
> - ++parse_context->fkey_count);
> + uint32_t idx = ++parse_context->fkey_count;
> + /*
> + * If it is <ALTER TABLE ADD COLUMN> we
> + * should count the existing FK
> + * constraints in the space and form a
> + * name based on this.
> + */
> + if (table_def->new_space == NULL) {
> + struct space *original_space =
> + space_by_name(space->def->name);
> + assert(original_space != NULL);
> + struct rlist *child_fk =
> + &original_space->child_fk_constraint;
> + if (!rlist_empty(child_fk)) {
12. You don't need to check for emptiness. rlist_foreach_entry() works fine
with an empty list.
> + struct fk_constraint *fk;
> + rlist_foreach_entry(fk, child_fk,
> + in_child_space)
> + idx++;
> + }
> + }
> + constraint_name = sqlMPrintf(db, "fk_unnamed_%s_%d",
> + space->def->name, idx);
> } else {
> constraint_name =
> sql_name_from_token(db, &create_def->name);
> @@ -2001,7 +2189,8 @@ sql_create_foreign_key(struct Parse *parse_context)
> }
> int actions = create_fk_def->actions;
> fk_def->field_count = child_cols_count;
> - fk_def->child_id = child_space != NULL ? child_space->def->id : 0;
> + fk_def->child_id = table_def->new_space == NULL ?
> + child_space->def->id : 0;
13. Why?
> fk_def->parent_id = parent_space != NULL ? parent_space->def->id : 0;
> fk_def->is_deferred = create_constr_def->is_deferred;
> fk_def->match = (enum fk_constraint_match) (create_fk_def->match);
> @@ -2420,10 +2613,8 @@ sql_create_index(struct Parse *parse) {
> }
> goto exit_create_index;
> }
> - } else {
> - if (parse->create_table_def.new_space == NULL)
> - goto exit_create_index;
> - space = parse->create_table_def.new_space;
> + } else if (space == NULL) {
14. Why not !is_create_table_or_add_col?
> + goto exit_create_index;
> }
> struct space_def *def = space->def;
>
> diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
> index 995875566..0c9887851 100644
> --- a/src/box/sql/parse.y
> +++ b/src/box/sql/parse.y
> @@ -281,9 +286,11 @@ nm(A) ::= id(A). {
> }
> }
>
> -// "carglist" is a list of additional constraints that come after the
> -// column name and column type in a CREATE TABLE statement.
> -//
> +/*
15. Out-of-function comments are started from /**.
> + * "carglist" is a list of additional constraints and clauses that
> + * come after the column name and column type in a <CREATE TABLE>
> + * or <ALTER TABLE ADD COLUMN> statement.
> + */
> carglist ::= carglist ccons.
> carglist ::= .
> %type cconsname { struct Token }
> diff --git a/src/box/sql/parse_def.h b/src/box/sql/parse_def.h
> index 1105fda6e..336914c57 100644
> --- a/src/box/sql/parse_def.h
> +++ b/src/box/sql/parse_def.h
> @@ -207,6 +209,14 @@ struct create_table_def {
> struct space *new_space;
> };
>
> +struct create_column_def {
> + struct create_entity_def base;
> + /** Shallow space_def copy. */
16. Not space_def.
> + struct space *space;
> + /** Column type. */
> + struct type_def *type_def;
> +};
> +
> struct create_view_def {
> struct create_entity_def base;
> /**
> diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
> index fa87e7bd2..32142a871 100644
> --- a/src/box/sql/sqlInt.h
> +++ b/src/box/sql/sqlInt.h
> @@ -2860,15 +2864,30 @@ struct space *sqlResultSetOfSelect(Parse *, Select *);
>
> struct space *
> sqlStartTable(Parse *, Token *);
> -void sqlAddColumn(Parse *, Token *, struct type_def *);
> +
> +/**
> + * Add new field to the format of ephemeral space in
> + * create_table_def. If it is <ALTER TABLE> create shallow copy of
> + * the existing space and add field to its format.
17. It fills create_column_def, not create_table_def.
> + */
> +void
> +sql_create_column_start(struct Parse *parse);
> +
> diff --git a/test/sql/add-column.result b/test/sql/add-column.result
> new file mode 100644
> index 000000000..f86259105
> --- /dev/null
> +++ b/test/sql/add-column.result
> @@ -0,0 +1,471 @@
> +-- test-run result file version 2
> +--
> +-- gh-3075: Check <ALTER TABLE table ADD COLUMN column> statement.
> +--
> +CREATE TABLE t1 (a INT PRIMARY KEY);
> + | ---
> + | - row_count: 1
> + | ...
> +
> +--
> +-- COLUMN keyword is optional. Check it here, but omit it below.
> +--
> +ALTER TABLE t1 ADD COLUMN b INT;
> + | ---
> + | - row_count: 0
18. It seems you need to return row_count 1. To be consistent with
other ALTER TABLE expressions.
> + | ...
> +
> +--
> +-- A column with the same name already exists.
> +--
> +ALTER TABLE t1 ADD b SCALAR;
> + | ---
> + | - null
> + | - Space field 'B' is duplicate
> + | ...
> +
> +--
> +-- Can't add column to a view.
> +--
> +CREATE VIEW v AS SELECT * FROM t1;
> + | ---
> + | - row_count: 1
> + | ...
> +ALTER TABLE v ADD b INT;
> + | ---
> + | - null
> + | - Can't add column 'B'. 'V' is a view
19. What if I do the same via direct replace into _space in Lua?
> + | ...
> +DROP VIEW v;
> + | ---
> + | - row_count: 1
> + | ...
> +
> +--
> +-- Check PRIMARY KEY constraint works with an added column.
> +--
> +CREATE TABLE pk_check (a INT CONSTRAINT pk PRIMARY KEY);
> + | ---
> + | - row_count: 1
> + | ...
> +ALTER TABLE pk_check DROP CONSTRAINT pk;
> + | ---
> + | - row_count: 1
> + | ...
> +ALTER TABLE pk_check ADD b INT PRIMARY KEY;
> + | ---
> + | - row_count: 0
> + | ...
> +INSERT INTO pk_check VALUES (1, 1);
> + | ---
> + | - row_count: 1
> + | ...
> +INSERT INTO pk_check VALUES (1, 1);
> + | ---
> + | - null
> + | - Duplicate key exists in unique index 'pk_unnamed_PK_CHECK_1' in space 'PK_CHECK'
> + | ...
> +DROP TABLE pk_check;
> + | ---
> + | - row_count: 1
> + | ...
> +
> +--
> +-- Check UNIQUE constraint works with an added column.
> +--
> +CREATE TABLE unique_check (a INT PRIMARY KEY);
> + | ---
> + | - row_count: 1
> + | ...
> +ALTER TABLE unique_check ADD b INT UNIQUE;
> + | ---
> + | - row_count: 0
> + | ...
> +INSERT INTO unique_check VALUES (1, 1);
> + | ---
> + | - row_count: 1
> + | ...
> +INSERT INTO unique_check VALUES (2, 1);
> + | ---
> + | - null
> + | - Duplicate key exists in unique index 'unique_unnamed_UNIQUE_CHECK_2' in space 'UNIQUE_CHECK'
> + | ...
> +DROP TABLE unique_check;
> + | ---
> + | - row_count: 1
> + | ...
> +
> +--
> +-- Check CHECK constraint works with an added column.
> +--
> +CREATE TABLE ck_check (a INT PRIMARY KEY);
> + | ---
> + | - row_count: 1
> + | ...
> +ALTER TABLE ck_check ADD b INT CHECK (b > 0);
> + | ---
> + | - row_count: 0
> + | ...
> +INSERT INTO ck_check VALUES (1, 0);
> + | ---
> + | - null
> + | - 'Check constraint failed ''ck_unnamed_CK_CHECK_1'': b > 0'
> + | ...
> +DROP TABLE ck_check;
> + | ---
> + | - row_count: 1
> + | ...
> +
> +--
> +-- Check FOREIGN KEY constraint works with an added column.
> +--
> +CREATE TABLE fk_check (a INT PRIMARY KEY);
> + | ---
> + | - row_count: 1
> + | ...
> +ALTER TABLE fk_check ADD b INT REFERENCES t1(a);
> + | ---
> + | - row_count: 0
> + | ...
> +INSERT INTO fk_check VALUES (0, 1);
> + | ---
> + | - null
> + | - 'Failed to execute SQL statement: FOREIGN KEY constraint failed'
> + | ...
> +INSERT INTO fk_check VALUES (2, 0);
> + | ---
> + | - null
> + | - 'Failed to execute SQL statement: FOREIGN KEY constraint failed'
> + | ...
> +INSERT INTO fk_check VALUES (2, 1);
> + | ---
> + | - null
> + | - 'Failed to execute SQL statement: FOREIGN KEY constraint failed'
20. It is worth adding one more test with a successfull insertion.
More information about the Tarantool-patches
mailing list