[patches] [PATCH v3 3/3] sql: make OP_MakeRecord use region allocator

Nikita Pettik korablev at tarantool.org
Tue Feb 13 15:41:29 MSK 2018


VDBE used ordinary malloc to allocate memory for tuples. However,
immediately before calling insertion method to space, tuples were copied
into region memory. Thus, excess copy took place. This patch makes
OP_MakeRecord opcode allocate memory on region right away.
However, in case of ephemeral space, we are able to save memory
allocating one by ordinary malloc and reusing it, instead of cutting
areas from region allocator and waiting them to be free after being
written to WAL.

Closes #3021

Signed-off-by: Nikita Pettik <korablev at tarantool.org>
---
 src/box/sql.c         | 14 ++++----------
 src/box/sql/delete.c  |  2 ++
 src/box/sql/insert.c  |  2 ++
 src/box/sql/select.c  |  6 ++++++
 src/box/sql/update.c  |  2 ++
 src/box/sql/vdbe.c    | 43 ++++++++++++++++++++++++++++++-------------
 src/box/sql/vdbe.h    |  1 +
 src/box/sql/vdbeaux.c | 12 ++++++++++++
 8 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/src/box/sql.c b/src/box/sql.c
index 1b16122c6..fae403c70 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -513,21 +513,15 @@ static int insertOrReplace(BtCursor *pCur, const CursorPayload *pX,
 	assert(operationType == TARANTOOL_INDEX_INSERT ||
 	       operationType == TARANTOOL_INDEX_REPLACE);
 
-	char *buf = (char*)region_alloc(&fiber()->gc, pX->nKey);
-	if (buf == NULL) {
-		diag_set(OutOfMemory, pX->nKey, "region", "buf");
-		return SQLITE_TARANTOOL_ERROR;
-	}
-
-	memcpy(buf, pX->pKey, pX->nKey);
 	int space_id = SQLITE_PAGENO_TO_SPACEID(pCur->pgnoRoot);
-
 	int rc;
 	if (operationType == TARANTOOL_INDEX_INSERT) {
-		rc = box_insert(space_id, buf, (const char *)buf + pX->nKey,
+		rc = box_insert(space_id, pX->pKey,
+				(const char *)pX->pKey + pX->nKey,
 				NULL /* result */);
 	} else {
-		rc = box_replace(space_id, buf, (const char *)buf + pX->nKey,
+		rc = box_replace(space_id, pX->pKey,
+				 (const char *)pX->pKey + pX->nKey,
 				 NULL /* result */);
 	}
 
diff --git a/src/box/sql/delete.c b/src/box/sql/delete.c
index f7e68a6d3..c5750088f 100644
--- a/src/box/sql/delete.c
+++ b/src/box/sql/delete.c
@@ -490,6 +490,8 @@ sqlite3DeleteFrom(Parse * pParse,	/* The parser context */
 			const char *zAff = isView ? 0 :
 					  sqlite3IndexAffinityStr(pParse->db, pPk);
 			sqlite3VdbeAddOp4(v, OP_MakeRecord, iPk, nPk, iKey, zAff, nPk);
+			/* Set flag to save memory allocating one by malloc. */
+			sqlite3VdbeChangeP5(v, 1);
 			sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iEphCur, iKey, iPk, nPk);
 		}
 
diff --git a/src/box/sql/insert.c b/src/box/sql/insert.c
index bc1906fb6..5fbffd78a 100644
--- a/src/box/sql/insert.c
+++ b/src/box/sql/insert.c
@@ -553,6 +553,8 @@ sqlite3Insert(Parse * pParse,	/* Parser context */
 			sqlite3VdbeAddOp3(v, OP_Copy, regFromSelect, regCopy, nColumn-1);
 			sqlite3VdbeAddOp3(v, OP_MakeRecord, regCopy,
 					  nColumn + 1, regRec);
+			/* Set flag to save memory allocating one by malloc. */
+			sqlite3VdbeChangeP5(v, 1);
 			sqlite3VdbeAddOp2(v, OP_IdxInsert, srcTab, regRec);
 
 			sqlite3VdbeGoto(v, addrL);
diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index b86a9fa11..041e6ca87 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -997,6 +997,8 @@ selectInnerLoop(Parse * pParse,		/* The parser context */
 			testcase(eDest == SRT_DistFifo);
 			sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult,
 					  nResultCol, r1 + nPrefixReg);
+			/* Set flag to save memory allocating one by malloc. */
+			sqlite3VdbeChangeP5(v, 1);
 #ifndef SQLITE_OMIT_CTE
 			if (eDest == SRT_DistFifo) {
 				/* If the destination is DistFifo, then cursor (iParm+1) is open
@@ -1032,6 +1034,8 @@ selectInnerLoop(Parse * pParse,		/* The parser context */
 				 */
 				sqlite3VdbeAddOp3(v, OP_Copy, regResult, regCopy, nResultCol - 1);
 				sqlite3VdbeAddOp3(v, OP_MakeRecord, regCopy, nResultCol + 1, regRec);
+				/* Set flag to save memory allocating one by malloc. */
+				sqlite3VdbeChangeP5(v, 1);
 				sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm, regRec);
 				sqlite3ReleaseTempReg(pParse, regRec);
 				sqlite3ReleaseTempRange(pParse, regCopy, nResultCol + 1);
@@ -3015,6 +3019,8 @@ generateOutputSubroutine(Parse * pParse,	/* Parsing context */
 					  pIn->nSdst - 1);
 			sqlite3VdbeAddOp3(v, OP_MakeRecord, regCopy,
 					  pIn->nSdst + 1, regRec);
+			/* Set flag to save memory allocating one by malloc. */
+			sqlite3VdbeChangeP5(v, 1);
 			sqlite3VdbeAddOp2(v, OP_IdxInsert, pDest->iSDParm, regRec);
 			sqlite3ReleaseTempRange(pParse, regCopy, pIn->nSdst + 1);
 			sqlite3ReleaseTempReg(pParse, regRec);
diff --git a/src/box/sql/update.c b/src/box/sql/update.c
index 8d0fe73ff..6ee8caef7 100644
--- a/src/box/sql/update.c
+++ b/src/box/sql/update.c
@@ -405,6 +405,8 @@ sqlite3Update(Parse * pParse,		/* The parser context */
 					  zAff, nPk);
 		sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iEph, regKey, iPk,
 				     nPk);
+		/* Set flag to save memory allocating one by malloc. */
+		sqlite3VdbeChangeP5(v, 1);
 	}
 	/* End the database scan loop.
 	 */
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index ddd849903..2f9a6f19a 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -2666,7 +2666,7 @@ case OP_Affinity: {
 	break;
 }
 
-/* Opcode: MakeRecord P1 P2 P3 P4 *
+/* Opcode: MakeRecord P1 P2 P3 P4 P5
  * Synopsis: r[P3]=mkrec(r[P1 at P2])
  *
  * Convert P2 registers beginning with P1 into the [record format]
@@ -2681,6 +2681,9 @@ case OP_Affinity: {
  * macros defined in sqliteInt.h.
  *
  * If P4 is NULL then all index fields have the affinity BLOB.
+ *
+ * If P5 is not NULL then record under construction is intended to be inserted
+ * into ephemeral space. Thus, sort of memory optimization can be performed.
  */
 case OP_MakeRecord: {
 	Mem *pRec;             /* The new record */
@@ -2689,6 +2692,7 @@ case OP_MakeRecord: {
 	Mem MAYBE_UNUSED *pLast;  /* Last field of the record */
 	int nField;            /* Number of fields in the record */
 	char *zAffinity;       /* The affinity string for the record */
+	u8 bIsEphemeral;
 
 	/* Assuming the record contains N fields, the record format looks
 	 * like this:
@@ -2707,6 +2711,7 @@ case OP_MakeRecord: {
 	 */
 	nField = pOp->p1;
 	zAffinity = pOp->p4.z;
+	bIsEphemeral = pOp->p5;
 	assert(nField>0 && pOp->p2>0 && pOp->p2+nField<=(p->nMem+1 - p->nCursor)+1);
 	pData0 = &aMem[nField];
 	nField = pOp->p2;
@@ -2737,19 +2742,29 @@ case OP_MakeRecord: {
 		goto too_big;
 	}
 
-	/* Make sure the output register has a buffer large enough to store
-	 * the new record. The output register (pOp->p3) is not allowed to
-	 * be one of the input registers (because the following call to
-	 * sqlite3VdbeMemClearAndResize() could clobber the value before it is used).
+	/* In case of ephemeral space, it is possible to save some memory
+	 * allocating one by ordinary malloc: instead of cutting pieces
+	 * from region and waiting while they will be freed after
+	 * statement commitment, it is better to reuse the same chunk.
+	 * Such optimization is prohibited for ordinary spaces, since
+	 * memory shouldn't be reused until it is written into WAL.
 	 */
-	if (sqlite3VdbeMemClearAndResize(pOut, (int)nByte)) {
-		goto no_mem;
+	if (bIsEphemeral) {
+		rc = sqlite3VdbeMemClearAndResize(pOut, nByte);
+		pOut->flags = MEM_Blob;
+	} else {
+		/* Allocate memory on the region for the tuple
+		 * to be passed to Tarantool. Before that, make
+		 * sure previously allocated memory has gone.
+		 */
+		sqlite3VdbeMemRelease(pOut);
+		rc = sql_vdbe_mem_alloc_region(pOut, nByte);
 	}
-
+	if (rc)
+		goto no_mem;
 	/* Write the record */
 	assert(pOp->p3>0 && pOp->p3<=(p->nMem+1 - p->nCursor));
 	pOut->n = sqlite3VdbeMsgpackRecordPut((u8 *)pOut->z, pData0, nField);
-	pOut->flags = MEM_Blob;
 	REGISTER_TRACE(pOp->p3, pOut);
 	UPDATE_MAX_BLOBSIZE(pOut);
 	break;
@@ -4126,11 +4141,11 @@ case OP_RowData: {
 		goto too_big;
 	}
 	testcase( n==0);
-	if (sqlite3VdbeMemClearAndResize(pOut, MAX(n,32))) {
+
+	sqlite3VdbeMemRelease(pOut);
+	rc = sql_vdbe_mem_alloc_region(pOut, n);
+	if (rc)
 		goto no_mem;
-	}
-	pOut->n = n;
-	MemSetTypeFlag(pOut, MEM_Blob);
 	rc = sqlite3CursorPayload(pCrsr, 0, n, pOut->z);
 	if (rc) goto abort_due_to_error;
 	UPDATE_MAX_BLOBSIZE(pOut);
@@ -4477,6 +4492,8 @@ case OP_IdxInsert: {        /* in2 */
 		BtCursor *pBtCur = pC->uc.pCursor;
 		assert((x.pKey == 0) == (pBtCur->pKeyInfo == 0));
 		if (pBtCur->curFlags & BTCF_TaCursor) {
+			/* Make sure that memory has been allocated on region. */
+			assert(aMem[pOp->p2].flags & MEM_Ephem);
 			if (pOp->opcode == OP_IdxInsert)
 				rc = tarantoolSqlite3Insert(pBtCur, &x);
 			else
diff --git a/src/box/sql/vdbe.h b/src/box/sql/vdbe.h
index 0e05168aa..5c40e3f72 100644
--- a/src/box/sql/vdbe.h
+++ b/src/box/sql/vdbe.h
@@ -261,6 +261,7 @@ void sqlite3VdbeRecordUnpackMsgpack(KeyInfo *, int, const void *,
 int sqlite3VdbeRecordCompare(int, const void *, UnpackedRecord *);
 int sqlite3VdbeRecordCompareWithSkip(int, const void *, UnpackedRecord *, int);
 UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(KeyInfo *);
+int sql_vdbe_mem_alloc_region(Mem *, uint32_t);
 
 typedef int (*RecordCompare) (int, const void *, UnpackedRecord *);
 RecordCompare sqlite3VdbeFindCompare(UnpackedRecord *);
diff --git a/src/box/sql/vdbeaux.c b/src/box/sql/vdbeaux.c
index df6881488..80fa6c6a8 100644
--- a/src/box/sql/vdbeaux.c
+++ b/src/box/sql/vdbeaux.c
@@ -3549,6 +3549,18 @@ sqlite3VdbeAllocUnpackedRecord(KeyInfo * pKeyInfo)
 	return p;
 }
 
+/* Allocate memory for internal VDBE structure on region. */
+int
+sql_vdbe_mem_alloc_region(Mem *vdbe_mem, uint32_t size)
+{
+	vdbe_mem->n = size;
+	vdbe_mem->z = region_alloc(&fiber()->gc, size);
+	if (vdbe_mem->z == NULL)
+		return SQLITE_NOMEM;
+	MemSetTypeFlag(vdbe_mem, MEM_Blob | MEM_Ephem);
+	return SQLITE_OK;
+}
+
 #if SQLITE_DEBUG
 /*
  * This function compares two index or table record keys in the same way
-- 
2.15.1




More information about the Tarantool-patches mailing list