[tarantool-patches] Re: [PATCH v1 3/3] sql: do not use OP_Delete+OP_Insert for UPDATES
n.pettik
korablev at tarantool.org
Tue Dec 25 20:26:31 MSK 2018
> @@ -253,15 +246,29 @@ fkey_lookup_parent(struct Parse *parse_context, struct space *parent,
> }
> sqlite3VdbeGoto(v, ok_label);
> }
> - struct index *idx = space_index(parent, referenced_idx);
> - assert(idx != NULL);
> - sqlite3VdbeAddOp4(v, OP_MakeRecord, temp_regs, field_count, rec_reg,
> - sql_space_index_affinity_str(parse_context->db,
> - parent->def, idx->def),
> - P4_DYNAMIC);
> - sqlite3VdbeAddOp4Int(v, OP_Found, cursor, ok_label, rec_reg, 0);
> - sqlite3ReleaseTempReg(parse_context, rec_reg);
> - sqlite3ReleaseTempRange(parse_context, temp_regs, field_count);
Please, don’t change code without any explanations.
You should state it within commit message or in source code.
> + if (!(fkey_is_self_referenced(fk_def) && op == TK_UPDATE)) {
> + int temp_regs = sqlite3GetTempRange(parse_context, field_count);
> + int rec_reg = sqlite3GetTempReg(parse_context);
> + vdbe_emit_open_cursor(parse_context, cursor, referenced_idx,
> + parent);
> + link = fk_def->links;
> + for (uint32_t i = 0; i < field_count; ++i, ++link) {
> + sqlite3VdbeAddOp2(v, OP_Copy,
> + link->child_field + 1 + reg_data,
> + temp_regs + i);
> + }
> + struct index *idx = space_index(parent, referenced_idx);
> + assert(idx != NULL);
> + sqlite3VdbeAddOp4(v, OP_MakeRecord, temp_regs, field_count,
> + rec_reg,
> + sql_space_index_affinity_str(parse_context->db,
> + parent->def,
> + idx->def),
> + P4_DYNAMIC);
> + sqlite3VdbeAddOp4Int(v, OP_Found, cursor, ok_label, rec_reg, 0);
> + sqlite3ReleaseTempReg(parse_context, rec_reg);
> + sqlite3ReleaseTempRange(parse_context, temp_regs, field_count);
> + }
> struct session *session = current_session();
> if (!fk_def->is_deferred &&
> (session->sql_flags & SQLITE_DeferFKs) == 0 &&
> @@ -515,7 +522,7 @@ fkey_action_is_set_null(struct Parse *parse_context, const struct fkey *fkey)
>
> void
> fkey_emit_check(struct Parse *parser, struct Table *tab, int reg_old,
> - int reg_new, const int *changed_cols)
> + int reg_new, const int *changed_cols, int op)
Why do you need additional argument?
reg_new != 0 for insert
reg_old != 0 for delete
Changed_cols != 0 for update
(According to comment to this function)
What is more, according to code you need only case
when update is processed.
> {
> struct sqlite3 *db = parser->db;
> struct session *user_session = current_session();
> @@ -549,7 +556,7 @@ fkey_emit_check(struct Parse *parser, struct Table *tab, int reg_old,
> * foreign key constraint violation.
> */
> fkey_lookup_parent(parser, parent, fk_def, fk->index_id,
> - reg_old, -1);
> + reg_old, -1, op);
> }
> if (reg_new != 0 && !fkey_action_is_set_null(parser, fk)) {
> /*
> @@ -568,7 +575,7 @@ fkey_emit_check(struct Parse *parser, struct Table *tab, int reg_old,
> * cause an FK violation.
> */
> fkey_lookup_parent(parser, parent, fk_def, fk->index_id,
> - reg_new, +1);
> + reg_new, +1, op);
> }
> }
> /*
>
> void
> -vdbe_emit_insertion_completion(struct Vdbe *v, struct space *space,
> - int raw_data_reg, uint32_t tuple_len,
> +vdbe_emit_insertion_completion(struct Vdbe *v, struct space *space, int op,
> + int raw_data_reg, int raw_key_reg,
> + int upd_cols_reg,
> enum on_conflict_action on_conflict)
> {
> assert(v != NULL);
> @@ -1065,10 +1065,18 @@ vdbe_emit_insertion_completion(struct Vdbe *v, struct space *space,
> pik_flags |= OPFLAG_OE_FAIL;
> else if (on_conflict == ON_CONFLICT_ACTION_ROLLBACK)
> pik_flags |= OPFLAG_OE_ROLLBACK;
> - sqlite3VdbeAddOp3(v, OP_MakeRecord, raw_data_reg, tuple_len,
> - raw_data_reg + tuple_len);
> - sqlite3VdbeAddOp1(v, OP_IdxInsert, raw_data_reg + tuple_len);
> - sqlite3VdbeChangeP4(v, -1, (char *)space, P4_SPACEPTR);
> + if (op == OP_IdxInsert) {
> + uint32_t tuple_raw_reg = raw_data_reg + space->def->field_count;
> + sqlite3VdbeAddOp3(v, OP_MakeRecord, raw_data_reg,
> + space->def->field_count, tuple_raw_reg);
> + sqlite3VdbeAddOp1(v, op, tuple_raw_reg);
> + sqlite3VdbeChangeP4(v, -1, (char *)space, P4_SPACEPTR);
> + } else if (op == OP_IdxUpdate) {
> + sqlite3VdbeAddOp4(v, op, raw_data_reg, raw_key_reg,
> + upd_cols_reg, (char *)space, P4_SPACEPTR);
> + } else {
> + unreachable();
> + }
I’d better introduce separate function (or simply inline) for emitting
update operation. Lets keep vdbe_emit_insertion_completion as is.
> sqlite3VdbeChangeP5(v, pik_flags);
> }
>
>
> void
> @@ -4730,10 +4738,11 @@ void sqlite3WithPush(Parse *, With *, u8);
> * @param reg_old Register with deleted row.
> * @param reg_new Register with inserted row.
> * @param changed_cols Array of updated columns. Can be NULL.
> + * @param op operation, one of TK_UPDATE, TK_INSERT, TK_DELETE.
> */
> void
> fkey_emit_check(struct Parse *parser, struct Table *tab, int reg_old,
> - int reg_new, const int *changed_cols);
> + int reg_new, const int *changed_cols, int op);
>
> /**
> * Emit VDBE code to do CASCADE, SET NULL or SET DEFAULT actions
> diff --git a/src/box/sql/update.c b/src/box/sql/update.c
> index 0e2d0fde8..aca8e4adc 100644
> --- a/src/box/sql/update.c
> +++ b/src/box/sql/update.c
> @@ -36,6 +36,7 @@
> #include "sqliteInt.h"
> #include "box/session.h"
> #include "tarantoolInt.h"
> +#include "box/tuple_format.h"
> #include "box/schema.h"
>
> void
> @@ -112,6 +113,8 @@ sqlite3Update(Parse * pParse, /* The parser context */
> int regNew = 0; /* Content of the NEW.* table in triggers */
> int regOld = 0; /* Content of OLD.* table in triggers */
> int regKey = 0; /* composite PRIMARY KEY value */
> + /* Count of changed rows. Match aXRef items != -1. */
> + int upd_cols_cnt = 0;
>
> db = pParse->db;
> if (pParse->nErr || db->mallocFailed) {
> @@ -183,6 +186,7 @@ sqlite3Update(Parse * pParse, /* The parser context */
> goto update_cleanup;
> }
> aXRef[j] = i;
> + upd_cols_cnt++;
> break;
> }
> }
> @@ -216,7 +220,7 @@ sqlite3Update(Parse * pParse, /* The parser context */
> regNewPk = ++pParse->nMem;
> }
> regNew = pParse->nMem + 1;
> - pParse->nMem += def->field_count;
> + pParse->nMem += def->field_count + 1;
>
> /* If we are trying to update a view, realize that view into
> * an ephemeral table.
> @@ -430,23 +434,64 @@ sqlite3Update(Parse * pParse, /* The parser context */
> vdbe_emit_constraint_checks(pParse, pTab, regNewPk + 1,
> on_error, labelContinue, aXRef);
> /* Do FK constraint checks. */
> - if (hasFK)
> - fkey_emit_check(pParse, pTab, regOldPk, 0, aXRef);
> - /*
> - * Delete the index entries associated with the
> - * current record. It can be already removed by
> - * trigger or REPLACE conflict action.
> - */
> - int addr1 = sqlite3VdbeAddOp4Int(v, OP_NotFound, pk_cursor, 0,
> - regKey, nKey);
> - assert(regNew == regNewPk + 1);
> - sqlite3VdbeAddOp2(v, OP_Delete, pk_cursor, 0);
> - sqlite3VdbeJumpHere(v, addr1);
> - if (hasFK)
> - fkey_emit_check(pParse, pTab, 0, regNewPk, aXRef);
> - vdbe_emit_insertion_completion(v, space, regNew,
> - pTab->def->field_count,
> - on_error);
> + if (hasFK) {
> + fkey_emit_check(pParse, pTab, regOldPk, 0, aXRef,
> + TK_UPDATE);
> + }
> + if (on_error == ON_CONFLICT_ACTION_REPLACE) {
> + /*
> + * Delete the index entries associated with the
> + * current record. It can be already removed by
> + * trigger or REPLACE conflict action.
> + */
> + int not_found_lbl =
> + sqlite3VdbeAddOp4Int(v, OP_NotFound, pk_cursor,
> + 0, regKey, nKey);
> + assert(regNew == regNewPk + 1);
> + sqlite3VdbeAddOp2(v, OP_Delete, pk_cursor, 0);
> + sqlite3VdbeJumpHere(v, not_found_lbl);
> + }
> + if (hasFK) {
> + fkey_emit_check(pParse, pTab, 0, regNewPk, aXRef,
> + TK_UPDATE);
> + }
> + if (on_error == ON_CONFLICT_ACTION_REPLACE) {
> + vdbe_emit_insertion_completion(v, space, OP_IdxInsert,
> + regNew, 0, 0, on_error);
> + } else {
> + int key_reg;
> + if (okOnePass) {
> + key_reg = sqlite3GetTempReg(pParse);
> + const char *zAff =
Nit: zAff -> aff_str.
> + sql_space_index_affinity_str(pParse->db,
> + def,
> + pPk->def);
> + sqlite3VdbeAddOp4(v, OP_MakeRecord, iPk,
> + pk_part_count, key_reg, zAff,
> + pk_part_count);
> + } else {
> + assert(nKey == 0);
> + key_reg = regKey;
> + }
> +
> + /* Prapare array of changed fields. */
Nit: prepare.
> + uint32_t upd_cols_sz = upd_cols_cnt * sizeof(uint32_t);
> + uint32_t *upd_cols = sqlite3DbMallocRaw(db, upd_cols_sz);
> + if (upd_cols == NULL)
> + goto update_cleanup;
> + upd_cols_cnt = 0;
> + for (uint32_t i = 0; i < def->field_count; i++) {
> + if (aXRef[i] == -1)
> + continue;
> + upd_cols[upd_cols_cnt++] = i;
> + }
> + int upd_cols_reg = sqlite3GetTempReg(pParse);
I remember that there was issue connected with different register
allocation policy (during analyze registers were allocated with
sqlite3GetTempReg and simple nMem++). Wouldn’t we face
the same problem here?
> + sqlite3VdbeAddOp4(v, OP_Blob, upd_cols_sz, upd_cols_reg,
> + 0, (const char *)upd_cols, P4_DYNAMIC);
> + vdbe_emit_insertion_completion(v, space, OP_IdxUpdate,
> + regNew, key_reg,
> + upd_cols_reg, on_error);
> + }
> /*
> * Do any ON CASCADE, SET NULL or SET DEFAULT
> * operations required to handle rows that refer
> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index bf6f4cdd1..534d3e885 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
>
> +/* Opcode: OP_IdxUpdate P1 P2 P3 P4 P5
> + * Synopsis: key=r[P1]
Hmm, during vibe execution I see no ’synopsis’.
What is more, I look at opcodes.h and:
/* 114 */ "IdxUpdate" OpHelp("”),
So, our script didn’t generate auxiliary information for opcode.
Open an issue or investigate this case pls.
If you fix it, extend synopsis with other args.
> + *
> + * Make space Update operation. Primary key fields could not be
> + * modified - use OP_Replace opcode instead.
> + * This opcode extends IdxInsert/IdxReplace interface to
> + * transform to OP_Replace in particular case of sql operator
> + * "UPDATE or REPLACE".
> + *
> + * @param P1 The first field of updated tuple.
> + * @param P2 Index of a register with index key MakeRecord.
> + * @param P3 Index of a register with upd_fields blob.
> + * This blob has size sizeof(uint32_t)*upd_fields_cnt.
> + * It's items are numbers of fields to be replaced with
> + * new values from P1 blob. They must be sorted in
> + * ascending order.
I would slightly change comment:
- * Make space Update operation. Primary key fields could not be
- * modified - use OP_Replace opcode instead.
- * This opcode extends IdxInsert/IdxReplace interface to
- * transform to OP_Replace in particular case of sql operator
- * "UPDATE or REPLACE".
- *
- * @param P1 The first field of updated tuple.
- * @param P2 Index of a register with index key MakeRecord.
+ * Process UPDATE operation. Primary key fields can not be
+ * modified - in this case OP_IdxReplace is used instead.
+ * Under the hood it performs box_update() call.
+ * For the performance sake, it takes whole affected row (P1)
+ * and encodes into msgpack only fields to be updated (P3).
+ *
+ * @param P1 The first field to be updated. Fields are located
+ * in the range of [P1...] in decoded state.
+ * Encoded only fields which numbers are presented
+ * in @P3 array.
+ * @param P2 Encoded key to be passed to box_update().
* @param P3 Index of a register with upd_fields blob.
- * This blob has size sizeof(uint32_t)*upd_fields_cnt.
* It's items are numbers of fields to be replaced with
- * new values from P1 blob. They must be sorted in
- * ascending order.
+ * new values from P1.
+ * They must be sorted in ascending order.
> + * @param P4 Pointer to the struct space to be updated.
> + * @param P5 Flags. If P5 contains OPFLAG_NCHANGE, then VDBE
> + * accounts the change in a case of successful
> + * insertion in nChange counter. If P5 contains
> + * OPFLAG_OE_IGNORE, then we are processing INSERT OR
> + * INGORE statement. Thus, in case of conflict we don't
> + * raise an error.
> + */
> +case OP_IdxUpdate: {
Don’t call it IdxUpdate - it has nothing in common with index.
OP_IdxInsert is called this way due to historical reasons.
Lets simply name it OP_Update.
> + struct Mem *new_tuple = &aMem[pOp->p1];
> + if (pOp->p5 & OPFLAG_NCHANGE)
> + p->nChange++;
> +
> + struct space *space = pOp->p4.space;
> + assert(pOp->p4type == P4_SPACEPTR);
> +
> + struct Mem *key_mem = &aMem[pOp->p2];
> + assert((key_mem->flags & MEM_Blob) != 0);
> +
> + struct Mem *upd_fields_mem = &aMem[pOp->p3];
> + assert((upd_fields_mem->flags & MEM_Blob) != 0);
> + uint32_t *upd_fields = (uint32_t *)upd_fields_mem->z;
> + uint32_t upd_fields_cnt = upd_fields_mem->n / sizeof(uint32_t);
> +
> + /* Prepare Tarantool update ops msgpack. */
> + struct region *region = &fiber()->gc;
> + size_t used = region_used(region);
> + bool is_error = false;
> + struct mpstream stream;
> + mpstream_init(&stream, region, region_reserve_cb, region_alloc_cb,
> + mpstream_encode_error, &is_error);
> + mpstream_encode_array(&stream, upd_fields_cnt);
> + for (uint32_t i = 0; i < upd_fields_cnt; i++) {
> + uint32_t field_idx = upd_fields[i];
> + assert(field_idx < space->def->field_count);
> + mpstream_encode_array(&stream, 3);
> + mpstream_encode_strn(&stream, "=", 1);
> + mpstream_encode_uint(&stream, field_idx + 1);
> + mpstream_encode_vdbe_mem(&stream, new_tuple + field_idx);
> + }
> + mpstream_flush(&stream);
> + if (is_error) {
> + diag_set(OutOfMemory, stream.pos - stream.buf,
> + "mpstream_flush", "stream");
> + rc = SQL_TARANTOOL_ERROR;
> + goto abort_due_to_error;
> + }
> + uint32_t ops_size = region_used(region) - used;
> + const char *ops = region_join(region, ops_size);
> + if (ops == NULL) {
> + diag_set(OutOfMemory, ops_size, "region_join", "raw");
> + rc = SQL_TARANTOOL_ERROR;
> + goto abort_due_to_error;
> + }
> +
> + rc = SQLITE_OK;
assert(rc == SQLITE_OK);
If no error takes place, rc must be 0.
Otherwise, error must be handled before.
> + if (box_update(space->def->id, 0, key_mem->z, key_mem->z + key_mem->n,
> + ops, ops + ops_size, TUPLE_INDEX_BASE, NULL) != 0)
I see comment to struct request:
/** Base field offset for UPDATE/UPSERT, e.g. 0 for C and 1 for Lua. */
int index_base;
So why do you pass 1 instead of 0?
> + rc = SQL_TARANTOOL_ERROR;
> +
> + if (pOp->p5 & OPFLAG_OE_IGNORE) {
> + /*
> + * Ignore any kind of failes and do not raise
Nit: fails.
> + * error message
> + */
> + rc = SQLITE_OK;
> + /*
> + * If we are in trigger, increment ignore raised
> + * counter.
> + */
> + if (p->pFrame)
> + p->ignoreRaised++;
> + } else if (pOp->p5 & OPFLAG_OE_FAIL) {
> + p->errorAction = ON_CONFLICT_ACTION_FAIL;
> + } else if (pOp->p5 & OPFLAG_OE_ROLLBACK) {
> + p->errorAction = ON_CONFLICT_ACTION_ROLLBACK;
> + }
> + if (rc != 0)
> + goto abort_due_to_error;
> + break;
> +}
>
> /* Opcode: SInsert P1 P2 P3 * P5
> * Synopsis: space id = P1, key = r[P3], on error goto P2
> diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
> index 50bc35b2b..69898bbcf 100644
> --- a/src/box/sql/vdbeInt.h
> +++ b/src/box/sql/vdbeInt.h
> @@ -552,4 +552,10 @@ int sqlite3VdbeRecordCompareMsgpack(const void *key1,
> struct UnpackedRecord *key2);
> u32 sqlite3VdbeMsgpackGet(const unsigned char *buf, Mem * pMem);
>
> +struct mpstream;
> +
> +/** Perform encoding memory variable to stream. */
> +void
> +mpstream_encode_vdbe_mem(struct mpstream *stream, struct Mem *var);
> +
> #endif /* !defined(SQLITE_VDBEINT_H) */
> diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
> index 22beba8be..1fee2673d 100644
> --- a/src/box/sql/vdbemem.c
> +++ b/src/box/sql/vdbemem.c
> @@ -40,6 +40,7 @@
> #include "vdbeInt.h"
> #include "tarantoolInt.h"
> #include "box/schema.h"
> +#include "mpstream.h"
>
> #ifdef SQLITE_DEBUG
> /*
> @@ -1714,3 +1715,38 @@ sqlite3ValueBytes(sqlite3_value * pVal)
> return 0;
> return valueBytes(pVal);
> }
> +
> +void
> +mpstream_encode_vdbe_mem(struct mpstream *stream, struct Mem *var)
Can we use this routine to encode MsgPack during execution of OP_MakeRecord?
Now it uses sqlite3VdbeMsgpackRecordPut() which is almost the same.
> diff --git a/src/mpstream.c b/src/mpstream.c
> index e4f7950ba..843fc959e 100644
> --- a/src/mpstream.c
> +++ b/src/mpstream.c
> @@ -175,3 +175,23 @@ mpstream_encode_bool(struct mpstream *stream, bool val)
> char *pos = mp_encode_bool(data, val);
> mpstream_advance(stream, pos - data);
> }
> +
> +void
> +mpstream_encode_raw(struct mpstream *stream, const char *raw, uint32_t len)
Hm, it doesn’t encode in fact. Mb better call _copy_raw?
> +{
> + char *data = mpstream_reserve(stream, len);
> + if (data == NULL)
> + return;
> + memcpy(data, raw, len);
> + mpstream_advance(stream, len);
> +}
>
> #if defined(__cplusplus)
> } /* extern "C" */
> #endif /* defined(__cplusplus) */
At this point I didn’t review test changes.
More information about the Tarantool-patches
mailing list