[patches] [PATCH v2 3/3] sql: make OP_MakeRecord use region allocator

Nikita Pettik korablev at tarantool.org
Mon Feb 12 22:23:48 MSK 2018


VDBE used ordinary malloc to allocate memory for tuples. However,
immediately before calling insertion method to space, tuples were copied
into region memory. Thus, excess copy took place. This patch makes
OP_MakeRecord opcode allocate memory on region right away.
However, in case of ephemeral space, we are able to save memory
allocating one by ordinary malloc and reusing it, instead of cutting
areas from region allocator and waiting them to be free after being
written to WAL.

Closes #3021

Signed-off-by: Nikita Pettik <korablev at tarantool.org>
---
 src/box/sql.c        | 14 ++++----------
 src/box/sql/delete.c |  2 ++
 src/box/sql/insert.c |  2 ++
 src/box/sql/select.c |  6 ++++++
 src/box/sql/update.c |  2 ++
 src/box/sql/vdbe.c   | 31 +++++++++++++++++++++++--------
 6 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/src/box/sql.c b/src/box/sql.c
index 1ed2490c7..d45d7e75f 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -501,21 +501,15 @@ static int insertOrReplace(BtCursor *pCur, const CursorPayload *pX,
 	assert(operationType == TARANTOOL_INDEX_INSERT ||
 	       operationType == TARANTOOL_INDEX_REPLACE);
 
-	char *buf = (char*)region_alloc(&fiber()->gc, pX->nKey);
-	if (buf == NULL) {
-		diag_set(OutOfMemory, pX->nKey, "region", "buf");
-		return SQLITE_TARANTOOL_ERROR;
-	}
-
-	memcpy(buf, pX->pKey, pX->nKey);
 	int space_id = SQLITE_PAGENO_TO_SPACEID(pCur->pgnoRoot);
-
 	int rc;
 	if (operationType == TARANTOOL_INDEX_INSERT) {
-		rc = box_insert(space_id, buf, (const char *)buf + pX->nKey,
+		rc = box_insert(space_id, pX->pKey,
+				(const char *)pX->pKey + pX->nKey,
 				NULL /* result */);
 	} else {
-		rc = box_replace(space_id, buf, (const char *)buf + pX->nKey,
+		rc = box_replace(space_id, pX->pKey,
+				 (const char *)pX->pKey + pX->nKey,
 				 NULL /* result */);
 	}
 
diff --git a/src/box/sql/delete.c b/src/box/sql/delete.c
index f7e68a6d3..c5750088f 100644
--- a/src/box/sql/delete.c
+++ b/src/box/sql/delete.c
@@ -490,6 +490,8 @@ sqlite3DeleteFrom(Parse * pParse,	/* The parser context */
 			const char *zAff = isView ? 0 :
 					  sqlite3IndexAffinityStr(pParse->db, pPk);
 			sqlite3VdbeAddOp4(v, OP_MakeRecord, iPk, nPk, iKey, zAff, nPk);
+			/* Set flag to save memory allocating one by malloc. */
+			sqlite3VdbeChangeP5(v, 1);
 			sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iEphCur, iKey, iPk, nPk);
 		}
 
diff --git a/src/box/sql/insert.c b/src/box/sql/insert.c
index bc1906fb6..5fbffd78a 100644
--- a/src/box/sql/insert.c
+++ b/src/box/sql/insert.c
@@ -553,6 +553,8 @@ sqlite3Insert(Parse * pParse,	/* Parser context */
 			sqlite3VdbeAddOp3(v, OP_Copy, regFromSelect, regCopy, nColumn-1);
 			sqlite3VdbeAddOp3(v, OP_MakeRecord, regCopy,
 					  nColumn + 1, regRec);
+			/* Set flag to save memory allocating one by malloc. */
+			sqlite3VdbeChangeP5(v, 1);
 			sqlite3VdbeAddOp2(v, OP_IdxInsert, srcTab, regRec);
 
 			sqlite3VdbeGoto(v, addrL);
diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index b86a9fa11..041e6ca87 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -997,6 +997,8 @@ selectInnerLoop(Parse * pParse,		/* The parser context */
 			testcase(eDest == SRT_DistFifo);
 			sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult,
 					  nResultCol, r1 + nPrefixReg);
+			/* Set flag to save memory allocating one by malloc. */
+			sqlite3VdbeChangeP5(v, 1);
 #ifndef SQLITE_OMIT_CTE
 			if (eDest == SRT_DistFifo) {
 				/* If the destination is DistFifo, then cursor (iParm+1) is open
@@ -1032,6 +1034,8 @@ selectInnerLoop(Parse * pParse,		/* The parser context */
 				 */
 				sqlite3VdbeAddOp3(v, OP_Copy, regResult, regCopy, nResultCol - 1);
 				sqlite3VdbeAddOp3(v, OP_MakeRecord, regCopy, nResultCol + 1, regRec);
+				/* Set flag to save memory allocating one by malloc. */
+				sqlite3VdbeChangeP5(v, 1);
 				sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm, regRec);
 				sqlite3ReleaseTempReg(pParse, regRec);
 				sqlite3ReleaseTempRange(pParse, regCopy, nResultCol + 1);
@@ -3015,6 +3019,8 @@ generateOutputSubroutine(Parse * pParse,	/* Parsing context */
 					  pIn->nSdst - 1);
 			sqlite3VdbeAddOp3(v, OP_MakeRecord, regCopy,
 					  pIn->nSdst + 1, regRec);
+			/* Set flag to save memory allocating one by malloc. */
+			sqlite3VdbeChangeP5(v, 1);
 			sqlite3VdbeAddOp2(v, OP_IdxInsert, pDest->iSDParm, regRec);
 			sqlite3ReleaseTempRange(pParse, regCopy, pIn->nSdst + 1);
 			sqlite3ReleaseTempReg(pParse, regRec);
diff --git a/src/box/sql/update.c b/src/box/sql/update.c
index 8d0fe73ff..6ee8caef7 100644
--- a/src/box/sql/update.c
+++ b/src/box/sql/update.c
@@ -405,6 +405,8 @@ sqlite3Update(Parse * pParse,		/* The parser context */
 					  zAff, nPk);
 		sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iEph, regKey, iPk,
 				     nPk);
+		/* Set flag to save memory allocating one by malloc. */
+		sqlite3VdbeChangeP5(v, 1);
 	}
 	/* End the database scan loop.
 	 */
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index ddd849903..f999404c8 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -2689,6 +2689,7 @@ case OP_MakeRecord: {
 	Mem MAYBE_UNUSED *pLast;  /* Last field of the record */
 	int nField;            /* Number of fields in the record */
 	char *zAffinity;       /* The affinity string for the record */
+	u8 bIsEphemeral;
 
 	/* Assuming the record contains N fields, the record format looks
 	 * like this:
@@ -2707,6 +2708,7 @@ case OP_MakeRecord: {
 	 */
 	nField = pOp->p1;
 	zAffinity = pOp->p4.z;
+	bIsEphemeral = pOp->p5;
 	assert(nField>0 && pOp->p2>0 && pOp->p2+nField<=(p->nMem+1 - p->nCursor)+1);
 	pData0 = &aMem[nField];
 	nField = pOp->p2;
@@ -2737,19 +2739,32 @@ case OP_MakeRecord: {
 		goto too_big;
 	}
 
-	/* Make sure the output register has a buffer large enough to store
-	 * the new record. The output register (pOp->p3) is not allowed to
-	 * be one of the input registers (because the following call to
-	 * sqlite3VdbeMemClearAndResize() could clobber the value before it is used).
+	/* In case of ephemeral space, it is possible to save some memory
+	 * allocating one by ordinary malloc: instead of cutting pieces
+	 * from region and waiting while they will be freed after
+	 * statement commitment, it is better to reuse the same chunk.
+	 * Such optimization is prohibited for ordinary spaces, since
+	 * memory shouldn't be reused until it is written into WAL.
 	 */
-	if (sqlite3VdbeMemClearAndResize(pOut, (int)nByte)) {
-		goto no_mem;
+	if (bIsEphemeral) {
+		rc = sqlite3VdbeMemClearAndResize(pOut, nByte);
+		pOut->flags = MEM_Blob;
+	} else {
+		/* Allocate memory on the region for the tuple
+		 * to be passed to Tarantool. Before that, make
+		 * sure previously allocated memory has gone.
+		 */
+		sqlite3VdbeMemRelease(pOut);
+		pOut->n = nByte;
+		pOut->z = region_alloc(&fiber()->gc, nByte);
+		pOut->flags = MEM_Blob | MEM_Ephem;
+		rc = pOut->z ? SQLITE_OK : SQLITE_NOMEM;
 	}
-
+	if (rc)
+		goto no_mem;
 	/* Write the record */
 	assert(pOp->p3>0 && pOp->p3<=(p->nMem+1 - p->nCursor));
 	pOut->n = sqlite3VdbeMsgpackRecordPut((u8 *)pOut->z, pData0, nField);
-	pOut->flags = MEM_Blob;
 	REGISTER_TRACE(pOp->p3, pOut);
 	UPDATE_MAX_BLOBSIZE(pOut);
 	break;
-- 
2.15.1




More information about the Tarantool-patches mailing list