[tarantool-patches] Re: [PATCH v1 3/3] sql: do not use OP_Delete+OP_Insert for UPDATES

n.pettik korablev at tarantool.org
Tue Dec 25 20:26:31 MSK 2018


> @@ -253,15 +246,29 @@ fkey_lookup_parent(struct Parse *parse_context, struct space *parent,
> 		}
> 		sqlite3VdbeGoto(v, ok_label);
> 	}
> -	struct index *idx = space_index(parent, referenced_idx);
> -	assert(idx != NULL);
> -	sqlite3VdbeAddOp4(v, OP_MakeRecord, temp_regs, field_count, rec_reg,
> -			  sql_space_index_affinity_str(parse_context->db,
> -						       parent->def, idx->def),
> -			  P4_DYNAMIC);
> -	sqlite3VdbeAddOp4Int(v, OP_Found, cursor, ok_label, rec_reg, 0);
> -	sqlite3ReleaseTempReg(parse_context, rec_reg);
> -	sqlite3ReleaseTempRange(parse_context, temp_regs, field_count);

Please, don’t change code without any explanations.
You should state it within commit message or in source code.

> +	if (!(fkey_is_self_referenced(fk_def) && op == TK_UPDATE)) {
> +		int temp_regs = sqlite3GetTempRange(parse_context, field_count);
> +		int rec_reg = sqlite3GetTempReg(parse_context);
> +		vdbe_emit_open_cursor(parse_context, cursor, referenced_idx,
> +				      parent);
> +		link = fk_def->links;
> +		for (uint32_t i = 0; i < field_count; ++i, ++link) {
> +			sqlite3VdbeAddOp2(v, OP_Copy,
> +					  link->child_field + 1 + reg_data,
> +					  temp_regs + i);
> +		}
> +		struct index *idx = space_index(parent, referenced_idx);
> +		assert(idx != NULL);
> +		sqlite3VdbeAddOp4(v, OP_MakeRecord, temp_regs, field_count,
> +				  rec_reg,
> +				  sql_space_index_affinity_str(parse_context->db,
> +							       parent->def,
> +							       idx->def),
> +				  P4_DYNAMIC);
> +		sqlite3VdbeAddOp4Int(v, OP_Found, cursor, ok_label, rec_reg, 0);
> +		sqlite3ReleaseTempReg(parse_context, rec_reg);
> +		sqlite3ReleaseTempRange(parse_context, temp_regs, field_count);
> +	}
> 	struct session *session = current_session();
> 	if (!fk_def->is_deferred &&
> 	    (session->sql_flags & SQLITE_DeferFKs) == 0 &&
> @@ -515,7 +522,7 @@ fkey_action_is_set_null(struct Parse *parse_context, const struct fkey *fkey)
> 
> void
> fkey_emit_check(struct Parse *parser, struct Table *tab, int reg_old,
> -		int reg_new, const int *changed_cols)
> +		int reg_new, const int *changed_cols, int op)

Why do you need additional argument?
reg_new != 0 for insert
reg_old != 0 for delete
Changed_cols != 0 for update
(According to comment to this function)

What is more, according to code you need only case
when update is processed.

> {
> 	struct sqlite3 *db = parser->db;
> 	struct session *user_session = current_session();
> @@ -549,7 +556,7 @@ fkey_emit_check(struct Parse *parser, struct Table *tab, int reg_old,
> 			 * foreign key constraint violation.
> 			 */
> 			fkey_lookup_parent(parser, parent, fk_def, fk->index_id,
> -					   reg_old, -1);
> +					   reg_old, -1, op);
> 		}
> 		if (reg_new != 0 && !fkey_action_is_set_null(parser, fk)) {
> 			/*
> @@ -568,7 +575,7 @@ fkey_emit_check(struct Parse *parser, struct Table *tab, int reg_old,
> 			 * cause an FK violation.
> 			 */
> 			fkey_lookup_parent(parser, parent, fk_def, fk->index_id,
> -					   reg_new, +1);
> +					   reg_new, +1, op);
> 		}
> 	}
> 	/*
> 
> void
> -vdbe_emit_insertion_completion(struct Vdbe *v, struct space *space,
> -			       int raw_data_reg, uint32_t tuple_len,
> +vdbe_emit_insertion_completion(struct Vdbe *v, struct space *space, int op,
> +			       int raw_data_reg, int raw_key_reg,
> +			       int upd_cols_reg,
> 			       enum on_conflict_action on_conflict)
> {
> 	assert(v != NULL);
> @@ -1065,10 +1065,18 @@ vdbe_emit_insertion_completion(struct Vdbe *v, struct space *space,
> 		pik_flags |= OPFLAG_OE_FAIL;
> 	else if (on_conflict == ON_CONFLICT_ACTION_ROLLBACK)
> 		pik_flags |= OPFLAG_OE_ROLLBACK;
> -	sqlite3VdbeAddOp3(v, OP_MakeRecord, raw_data_reg, tuple_len,
> -			  raw_data_reg + tuple_len);
> -	sqlite3VdbeAddOp1(v, OP_IdxInsert, raw_data_reg + tuple_len);
> -	sqlite3VdbeChangeP4(v, -1, (char *)space, P4_SPACEPTR);
> +	if (op == OP_IdxInsert) {
> +		uint32_t tuple_raw_reg = raw_data_reg + space->def->field_count;
> +		sqlite3VdbeAddOp3(v, OP_MakeRecord, raw_data_reg,
> +				  space->def->field_count, tuple_raw_reg);
> +		sqlite3VdbeAddOp1(v, op, tuple_raw_reg);
> +		sqlite3VdbeChangeP4(v, -1, (char *)space, P4_SPACEPTR);
> +	} else if (op == OP_IdxUpdate) {
> +		sqlite3VdbeAddOp4(v, op, raw_data_reg, raw_key_reg,
> +				  upd_cols_reg, (char *)space, P4_SPACEPTR);
> +	} else {
> +		unreachable();
> +	}

I’d better introduce separate function (or simply inline) for emitting
update operation. Lets keep vdbe_emit_insertion_completion as is.

> 	sqlite3VdbeChangeP5(v, pik_flags);
> }
> 
> 
> void
> @@ -4730,10 +4738,11 @@ void sqlite3WithPush(Parse *, With *, u8);
>  * @param reg_old Register with deleted row.
>  * @param reg_new Register with inserted row.
>  * @param changed_cols Array of updated columns. Can be NULL.
> + * @param op operation, one of TK_UPDATE, TK_INSERT, TK_DELETE.
>  */
> void
> fkey_emit_check(struct Parse *parser, struct Table *tab, int reg_old,
> -		int reg_new, const int *changed_cols);
> +		int reg_new, const int *changed_cols, int op);
> 
> /**
>  * Emit VDBE code to do CASCADE, SET NULL or SET DEFAULT actions
> diff --git a/src/box/sql/update.c b/src/box/sql/update.c
> index 0e2d0fde8..aca8e4adc 100644
> --- a/src/box/sql/update.c
> +++ b/src/box/sql/update.c
> @@ -36,6 +36,7 @@
> #include "sqliteInt.h"
> #include "box/session.h"
> #include "tarantoolInt.h"
> +#include "box/tuple_format.h"
> #include "box/schema.h"
> 
> void
> @@ -112,6 +113,8 @@ sqlite3Update(Parse * pParse,		/* The parser context */
> 	int regNew = 0;		/* Content of the NEW.* table in triggers */
> 	int regOld = 0;		/* Content of OLD.* table in triggers */
> 	int regKey = 0;		/* composite PRIMARY KEY value */
> +	/* Count of changed rows. Match aXRef items != -1. */
> +	int upd_cols_cnt = 0;
> 
> 	db = pParse->db;
> 	if (pParse->nErr || db->mallocFailed) {
> @@ -183,6 +186,7 @@ sqlite3Update(Parse * pParse,		/* The parser context */
> 					goto update_cleanup;
> 				}
> 				aXRef[j] = i;
> +				upd_cols_cnt++;
> 				break;
> 			}
> 		}
> @@ -216,7 +220,7 @@ sqlite3Update(Parse * pParse,		/* The parser context */
> 		regNewPk = ++pParse->nMem;
> 	}
> 	regNew = pParse->nMem + 1;
> -	pParse->nMem += def->field_count;
> +	pParse->nMem += def->field_count + 1;
> 
> 	/* If we are trying to update a view, realize that view into
> 	 * an ephemeral table.
> @@ -430,23 +434,64 @@ sqlite3Update(Parse * pParse,		/* The parser context */
> 		vdbe_emit_constraint_checks(pParse, pTab, regNewPk + 1,
> 					    on_error, labelContinue, aXRef);
> 		/* Do FK constraint checks. */
> -		if (hasFK)
> -			fkey_emit_check(pParse, pTab, regOldPk, 0, aXRef);
> -		/*
> -		 * Delete the index entries associated with the
> -		 * current record. It can be already removed by
> -		 * trigger or REPLACE conflict action.
> -		 */
> -		int addr1 = sqlite3VdbeAddOp4Int(v, OP_NotFound, pk_cursor, 0,
> -						 regKey, nKey);
> -		assert(regNew == regNewPk + 1);
> -		sqlite3VdbeAddOp2(v, OP_Delete, pk_cursor, 0);
> -		sqlite3VdbeJumpHere(v, addr1);
> -		if (hasFK)
> -			fkey_emit_check(pParse, pTab, 0, regNewPk, aXRef);
> -		vdbe_emit_insertion_completion(v, space, regNew,
> -					       pTab->def->field_count,
> -					       on_error);
> +		if (hasFK) {
> +			fkey_emit_check(pParse, pTab, regOldPk, 0, aXRef,
> +					TK_UPDATE);
> +		}
> +		if (on_error == ON_CONFLICT_ACTION_REPLACE) {
> +			/*
> +			 * Delete the index entries associated with the
> +			 * current record. It can be already removed by
> +			 * trigger or REPLACE conflict action.
> +			 */
> +			int not_found_lbl =
> +				sqlite3VdbeAddOp4Int(v, OP_NotFound, pk_cursor,
> +						     0, regKey, nKey);
> +			assert(regNew == regNewPk + 1);
> +			sqlite3VdbeAddOp2(v, OP_Delete, pk_cursor, 0);
> +			sqlite3VdbeJumpHere(v, not_found_lbl);
> +		}
> +		if (hasFK) {
> +			fkey_emit_check(pParse, pTab, 0, regNewPk, aXRef,
> +					TK_UPDATE);
> +		}
> +		if (on_error == ON_CONFLICT_ACTION_REPLACE) {
> +			vdbe_emit_insertion_completion(v, space, OP_IdxInsert,
> +						       regNew, 0, 0, on_error);
> +		} else {
> +			int key_reg;
> +			if (okOnePass) {
> +				key_reg = sqlite3GetTempReg(pParse);
> +				const char *zAff =

Nit: zAff -> aff_str.

> +					sql_space_index_affinity_str(pParse->db,
> +								     def,
> +								     pPk->def);
> +				sqlite3VdbeAddOp4(v, OP_MakeRecord, iPk,
> +						  pk_part_count, key_reg, zAff,
> +						  pk_part_count);
> +			} else {
> +				assert(nKey == 0);
> +				key_reg = regKey;
> +			}
> +
> +			/* Prapare array of changed fields. */

Nit: prepare.

> +			uint32_t upd_cols_sz = upd_cols_cnt * sizeof(uint32_t);
> +			uint32_t *upd_cols = sqlite3DbMallocRaw(db, upd_cols_sz);
> +			if (upd_cols == NULL)
> +				goto update_cleanup;
> +			upd_cols_cnt = 0;
> +			for (uint32_t i = 0; i < def->field_count; i++) {
> +				if (aXRef[i] == -1)
> +					continue;
> +				upd_cols[upd_cols_cnt++] = i;
> +			}
> +			int upd_cols_reg = sqlite3GetTempReg(pParse);

I remember that there was issue connected with different register
allocation policy (during analyze registers were allocated with
sqlite3GetTempReg and simple nMem++). Wouldn’t we face
the same problem here?

> +			sqlite3VdbeAddOp4(v, OP_Blob, upd_cols_sz, upd_cols_reg,
> +					0, (const char *)upd_cols, P4_DYNAMIC);
> +			vdbe_emit_insertion_completion(v, space, OP_IdxUpdate,
> +						       regNew, key_reg,
> +						       upd_cols_reg, on_error);
> +		}
> 		/*
> 		 * Do any ON CASCADE, SET NULL or SET DEFAULT
> 		 * operations required to handle rows that refer
> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
> index bf6f4cdd1..534d3e885 100644
> --- a/src/box/sql/vdbe.c
> +++ b/src/box/sql/vdbe.c
> 
> +/* Opcode: OP_IdxUpdate P1 P2 P3 P4 P5
> + * Synopsis: key=r[P1]

Hmm, during vibe execution I see no ’synopsis’.
What is more, I look at opcodes.h and:
/* 114 */ "IdxUpdate"        OpHelp("”),

So, our script didn’t generate auxiliary information for opcode.
Open an issue or investigate this case pls.
If you fix it, extend synopsis with other args.

> + *
> + * Make space Update operation. Primary key fields could not be
> + * modified - use OP_Replace opcode instead.
> + * This opcode extends IdxInsert/IdxReplace interface to
> + * transform to OP_Replace in particular case of sql operator
> + * "UPDATE or REPLACE".
> + *
> + * @param P1 The first field of updated tuple.
> + * @param P2 Index of a register with index key MakeRecord.
> + * @param P3 Index of a register with upd_fields blob.
> + *           This blob has size sizeof(uint32_t)*upd_fields_cnt.
> + *           It's items are numbers of fields to be replaced with
> + *           new values from P1 blob. They must be sorted in
> + *           ascending order.

I would slightly change comment:

- * Make space Update operation. Primary key fields could not be
- * modified - use OP_Replace opcode instead.
- * This opcode extends IdxInsert/IdxReplace interface to
- * transform to OP_Replace in particular case of sql operator
- * "UPDATE or REPLACE".
- *
- * @param P1 The first field of updated tuple.
- * @param P2 Index of a register with index key MakeRecord.
+ * Process UPDATE operation. Primary key fields can not be
+ * modified - in this case OP_IdxReplace is used instead.
+ * Under the hood it performs box_update() call.
+ * For the performance sake, it takes whole affected row (P1)
+ * and encodes into msgpack only fields to be updated (P3).
+ *
+ * @param P1 The first field to be updated. Fields are located
+ *           in the range of [P1...] in decoded state.
+ *           Encoded only fields which numbers are presented
+ *           in @P3 array.
+ * @param P2 Encoded key to be passed to box_update().
  * @param P3 Index of a register with upd_fields blob.
- *           This blob has size sizeof(uint32_t)*upd_fields_cnt.
  *           It's items are numbers of fields to be replaced with
- *           new values from P1 blob. They must be sorted in
- *           ascending order.
+ *           new values from P1.
+ *           They must be sorted in ascending order.

> + * @param P4 Pointer to the struct space to be updated.
> + * @param P5 Flags. If P5 contains OPFLAG_NCHANGE, then VDBE
> + *           accounts the change in a case of successful
> + *           insertion in nChange counter. If P5 contains
> + *           OPFLAG_OE_IGNORE, then we are processing INSERT OR
> + *           INGORE statement. Thus, in case of conflict we don't
> + *           raise an error.
> + */
> +case OP_IdxUpdate: {

Don’t call it IdxUpdate - it has nothing in common with index.
OP_IdxInsert is called this way due to historical reasons.
Lets simply name it OP_Update.

> +	struct Mem *new_tuple = &aMem[pOp->p1];
> +	if (pOp->p5 & OPFLAG_NCHANGE)
> +		p->nChange++;
> +
> +	struct space *space = pOp->p4.space;
> +	assert(pOp->p4type == P4_SPACEPTR);
> +
> +	struct Mem *key_mem = &aMem[pOp->p2];
> +	assert((key_mem->flags & MEM_Blob) != 0);
> +
> +	struct Mem *upd_fields_mem = &aMem[pOp->p3];
> +	assert((upd_fields_mem->flags & MEM_Blob) != 0);
> +	uint32_t *upd_fields = (uint32_t *)upd_fields_mem->z;
> +	uint32_t upd_fields_cnt = upd_fields_mem->n / sizeof(uint32_t);
> +
> +	/* Prepare Tarantool update ops msgpack. */
> +	struct region *region = &fiber()->gc;
> +	size_t used = region_used(region);
> +	bool is_error = false;
> +	struct mpstream stream;
> +	mpstream_init(&stream, region, region_reserve_cb, region_alloc_cb,
> +		      mpstream_encode_error, &is_error);
> +	mpstream_encode_array(&stream, upd_fields_cnt);
> +	for (uint32_t i = 0; i < upd_fields_cnt; i++) {
> +		uint32_t field_idx = upd_fields[i];
> +		assert(field_idx < space->def->field_count);
> +		mpstream_encode_array(&stream, 3);
> +		mpstream_encode_strn(&stream, "=", 1);
> +		mpstream_encode_uint(&stream, field_idx + 1);
> +		mpstream_encode_vdbe_mem(&stream, new_tuple + field_idx);
> +	}
> +	mpstream_flush(&stream);
> +	if (is_error) {
> +		diag_set(OutOfMemory, stream.pos - stream.buf,
> +			"mpstream_flush", "stream");
> +		rc = SQL_TARANTOOL_ERROR;
> +		goto abort_due_to_error;
> +	}
> +	uint32_t ops_size = region_used(region) - used;
> +	const char *ops = region_join(region, ops_size);
> +	if (ops == NULL) {
> +		diag_set(OutOfMemory, ops_size, "region_join", "raw");
> +		rc = SQL_TARANTOOL_ERROR;
> +		goto abort_due_to_error;
> +	}
> +
> +	rc = SQLITE_OK;

assert(rc == SQLITE_OK);

If no error takes place, rc must be 0.
Otherwise, error must be handled before.

> +	if (box_update(space->def->id, 0, key_mem->z, key_mem->z + key_mem->n,
> +		       ops, ops + ops_size, TUPLE_INDEX_BASE, NULL) != 0)

I see comment to struct request:

/** Base field offset for UPDATE/UPSERT, e.g. 0 for C and 1 for Lua. */
int index_base;

So why do you pass 1 instead of 0?

> +		rc = SQL_TARANTOOL_ERROR;
> +
> +	if (pOp->p5 & OPFLAG_OE_IGNORE) {
> +		/*
> +		 * Ignore any kind of failes and do not raise

Nit: fails.

> +		 * error message
> +		 */
> +		rc = SQLITE_OK;
> +		/*
> +		 * If we are in trigger, increment ignore raised
> +		 * counter.
> +		 */
> +		if (p->pFrame)
> +			p->ignoreRaised++;
> +	} else if (pOp->p5 & OPFLAG_OE_FAIL) {
> +		p->errorAction = ON_CONFLICT_ACTION_FAIL;
> +	} else if (pOp->p5 & OPFLAG_OE_ROLLBACK) {
> +		p->errorAction = ON_CONFLICT_ACTION_ROLLBACK;
> +	}
> +	if (rc != 0)
> +		goto abort_due_to_error;
> +	break;
> +}
> 
> /* Opcode: SInsert P1 P2 P3 * P5
>  * Synopsis: space id = P1, key = r[P3], on error goto P2
> diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
> index 50bc35b2b..69898bbcf 100644
> --- a/src/box/sql/vdbeInt.h
> +++ b/src/box/sql/vdbeInt.h
> @@ -552,4 +552,10 @@ int sqlite3VdbeRecordCompareMsgpack(const void *key1,
> 				    struct UnpackedRecord *key2);
> u32 sqlite3VdbeMsgpackGet(const unsigned char *buf, Mem * pMem);
> 
> +struct mpstream;
> +
> +/** Perform encoding memory variable to stream. */
> +void
> +mpstream_encode_vdbe_mem(struct mpstream *stream, struct Mem *var);
> +
> #endif				/* !defined(SQLITE_VDBEINT_H) */
> diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
> index 22beba8be..1fee2673d 100644
> --- a/src/box/sql/vdbemem.c
> +++ b/src/box/sql/vdbemem.c
> @@ -40,6 +40,7 @@
> #include "vdbeInt.h"
> #include "tarantoolInt.h"
> #include "box/schema.h"
> +#include "mpstream.h"
> 
> #ifdef SQLITE_DEBUG
> /*
> @@ -1714,3 +1715,38 @@ sqlite3ValueBytes(sqlite3_value * pVal)
> 		return 0;
> 	return valueBytes(pVal);
> }
> +
> +void
> +mpstream_encode_vdbe_mem(struct mpstream *stream, struct Mem *var)

Can we use this routine to encode MsgPack during execution of OP_MakeRecord?
Now it uses sqlite3VdbeMsgpackRecordPut() which is almost the same.

> diff --git a/src/mpstream.c b/src/mpstream.c
> index e4f7950ba..843fc959e 100644
> --- a/src/mpstream.c
> +++ b/src/mpstream.c
> @@ -175,3 +175,23 @@ mpstream_encode_bool(struct mpstream *stream, bool val)
>     char *pos = mp_encode_bool(data, val);
>     mpstream_advance(stream, pos - data);
> }
> +
> +void
> +mpstream_encode_raw(struct mpstream *stream, const char *raw, uint32_t len)

Hm, it doesn’t encode in fact. Mb better call _copy_raw?

> +{
> +	char *data = mpstream_reserve(stream, len);
> +	if (data == NULL)
> +		return;
> +	memcpy(data, raw, len);
> +	mpstream_advance(stream, len);
> +}
> 
> #if defined(__cplusplus)
> } /* extern "C" */
> #endif /* defined(__cplusplus) */

At this point I didn’t review test changes.





More information about the Tarantool-patches mailing list