[tarantool-patches] [PATCH v1 1/1] sql: return lookaside system

imeevma at tarantool.org imeevma at tarantool.org
Sat Jul 6 13:23:40 MSK 2019


After the removal of the LOOKASIDE system, it was found that the
performance dropped. To restore performance, this patch partially
returns the LOOKASIDE system in almost the same form as it was
before the patch 4326ca7.

Closes #4319
---
https://github.com/tarantool/tarantool/issues/4319
https://github.com/tarantool/tarantool/tree/imeevma/gh-4319-return-lookaside

 src/box/sql/main.c      | 74 ++++++++++++++++++++++++++++++++++++++
 src/box/sql/malloc.c    | 95 ++++++++++++++++++++++++++++++++++++++++++-------
 src/box/sql/parse.y     | 18 +++++++---
 src/box/sql/prepare.c   |  6 ++++
 src/box/sql/printf.c    |  7 ++--
 src/box/sql/select.c    |  4 +++
 src/box/sql/sqlInt.h    | 43 +++++++++++++++++++++-
 src/box/sql/vdbeaux.c   |  2 +-
 src/box/sql/vdbemem.c   | 13 +++----
 src/box/sql/whereexpr.c |  2 +-
 10 files changed, 235 insertions(+), 29 deletions(-)

diff --git a/src/box/sql/main.c b/src/box/sql/main.c
index bfe3d71..c5dccf3 100644
--- a/src/box/sql/main.c
+++ b/src/box/sql/main.c
@@ -58,6 +58,11 @@ char *sql_temp_directory = 0;
  */
 char *sql_data_directory = 0;
 
+enum {
+	LOOKASIDE_SLOT_NUMBER = 125,
+	LOOKASIDE_SLOT_SIZE = 512,
+};
+
 /*
  * Initialize sql.
  *
@@ -151,6 +156,72 @@ sql_initialize(void)
 	return 0;
 }
 
+/*
+ * Set up the lookaside buffers for a database connection.
+ * Return SQL_OK on success.
+ * If lookaside is already active, return SQL_BUSY.
+ *
+ * The sz parameter is the number of bytes in each lookaside slot.
+ * The cnt parameter is the number of slots.  If pStart is NULL the
+ * space for the lookaside memory is obtained from sql_malloc().
+ * If pStart is not NULL then it is sz*cnt bytes of memory to use for
+ * the lookaside memory.
+ */
+static int
+setupLookaside(sql * db, void *pBuf, int sz, int cnt)
+{
+	void *pStart;
+	if (db->lookaside.nOut)
+		return -1;
+	/* Free any existing lookaside buffer for this handle before
+	 * allocating a new one so we don't have to have space for
+	 * both at the same time.
+	 */
+	if (db->lookaside.bMalloced)
+		sql_free(db->lookaside.pStart);
+	/* The size of a lookaside slot after ROUNDDOWN8 needs to be larger
+	 * than a pointer to be useful.
+	 */
+	sz = ROUNDDOWN8(sz);	/* IMP: R-33038-09382 */
+	if (sz <= (int)sizeof(LookasideSlot *))
+		sz = 0;
+	if (cnt < 0)
+		cnt = 0;
+	if (sz == 0 || cnt == 0) {
+		sz = 0;
+		pStart = 0;
+	} else if (pBuf == 0) {
+		pStart = sqlMalloc(sz * cnt);	/* IMP: R-61949-35727 */
+		if (pStart)
+			cnt = sqlMallocSize(pStart) / sz;
+	} else {
+		pStart = pBuf;
+	}
+	db->lookaside.pStart = pStart;
+	db->lookaside.pFree = 0;
+	db->lookaside.sz = (u16) sz;
+	if (pStart) {
+		int i;
+		LookasideSlot *p;
+		assert(sz > (int)sizeof(LookasideSlot *));
+		p = (LookasideSlot *) pStart;
+		for (i = cnt - 1; i >= 0; i--) {
+			p->pNext = db->lookaside.pFree;
+			db->lookaside.pFree = p;
+			p = (LookasideSlot *) & ((u8 *) p)[sz];
+		}
+		db->lookaside.pEnd = p;
+		db->lookaside.bDisable = 0;
+		db->lookaside.bMalloced = pBuf == 0 ? 1 : 0;
+	} else {
+		db->lookaside.pStart = db;
+		db->lookaside.pEnd = db;
+		db->lookaside.bDisable = 1;
+		db->lookaside.bMalloced = 0;
+	}
+	return 0;
+}
+
 void
 sql_row_count(struct sql_context *context, MAYBE_UNUSED int unused1,
 	      MAYBE_UNUSED sql_value **unused2)
@@ -466,6 +537,9 @@ sql_init_db(sql **out_db)
 	 */
 	sqlRegisterPerConnectionBuiltinFunctions(db);
 
+	/* Enable the lookaside-malloc subsystem */
+	setupLookaside(db, 0, LOOKASIDE_SLOT_SIZE, LOOKASIDE_SLOT_NUMBER);
+
 	*out_db = db;
 	return 0;
 }
diff --git a/src/box/sql/malloc.c b/src/box/sql/malloc.c
index 705f0e1..f9fe950 100644
--- a/src/box/sql/malloc.c
+++ b/src/box/sql/malloc.c
@@ -144,6 +144,15 @@ sql_malloc64(sql_uint64 n)
 }
 
 /*
+ * TRUE if p is a lookaside memory allocation from db
+ */
+static int
+isLookaside(sql * db, void *p)
+{
+	return SQL_WITHIN(p, db->lookaside.pStart, db->lookaside.pEnd);
+}
+
+/*
  * Return the size of a memory allocation previously obtained from
  * sqlMalloc() or sql_malloc().
  */
@@ -153,6 +162,16 @@ sqlMallocSize(void *p)
 	return sql_sized_sizeof(p);
 }
 
+int
+sqlDbMallocSize(sql * db, void *p)
+{
+	assert(p != 0);
+	if (db == 0 || !isLookaside(db, p))
+		return sql_sized_sizeof(p);
+	else
+		return db->lookaside.sz;
+}
+
 /*
  * Free memory previously obtained from sqlMalloc().
  */
@@ -173,7 +192,15 @@ sql_free(void *p)
 void
 sqlDbFree(sql * db, void *p)
 {
-	(void) db;
+	if (db != NULL) {
+		if (isLookaside(db, p)) {
+			LookasideSlot *pBuf = (LookasideSlot *) p;
+			pBuf->pNext = db->lookaside.pFree;
+			db->lookaside.pFree = pBuf;
+			db->lookaside.nOut--;
+			return;
+		}
+	}
 	sql_free(p);
 }
 
@@ -240,9 +267,24 @@ sqlDbMallocZero(sql * db, u64 n)
 	return p;
 }
 
+/* Finish the work of sqlDbMallocRawNN for the unusual and
+ * slower case when the allocation cannot be fulfilled using lookaside.
+ */
+static SQL_NOINLINE void *
+dbMallocRawFinish(sql * db, u64 n)
+{
+	void *p;
+	assert(db != 0);
+	p = sqlMalloc(n);
+	if (!p)
+		sqlOomFault(db);
+	return p;
+}
+
 /*
- * Allocate heap memory. If the allocation fails, set the
- * mallocFailed flag in the connection pointer.
+ * Allocate memory, either lookaside (if possible) or heap.
+ * If the allocation fails, set the mallocFailed flag in
+ * the connection pointer.
  *
  * If db!=0 and db->mallocFailed is true (indicating a prior malloc
  * failure on the same database connection) then always return 0.
@@ -275,12 +317,26 @@ void *
 sqlDbMallocRawNN(sql * db, u64 n)
 {
 	assert(db != NULL);
-	if (db->mallocFailed)
-		return NULL;
-	void *p = sqlMalloc(n);
-	if (p == NULL)
-		sqlOomFault(db);
-	return p;
+	LookasideSlot *pBuf;
+	if (db->lookaside.bDisable == 0) {
+		assert(db->mallocFailed == 0);
+		if (n > db->lookaside.sz) {
+			db->lookaside.anStat[1]++;
+		} else if ((pBuf = db->lookaside.pFree) == 0) {
+			db->lookaside.anStat[2]++;
+		} else {
+			db->lookaside.pFree = pBuf->pNext;
+			db->lookaside.nOut++;
+			db->lookaside.anStat[0]++;
+			if (db->lookaside.nOut > db->lookaside.mxOut) {
+				db->lookaside.mxOut = db->lookaside.nOut;
+			}
+			return (void *)pBuf;
+		}
+	} else if (db->mallocFailed) {
+		return 0;
+	}
+	return dbMallocRawFinish(db, n);
 }
 
 /* Forward declaration */
@@ -296,6 +352,8 @@ sqlDbRealloc(sql * db, void *p, u64 n)
 	assert(db != 0);
 	if (p == 0)
 		return sqlDbMallocRawNN(db, n);
+	if (isLookaside(db, p) && n <= db->lookaside.sz)
+		return p;
 	return dbReallocFinish(db, p, n);
 }
 
@@ -306,9 +364,17 @@ dbReallocFinish(sql * db, void *p, u64 n)
 	assert(db != 0);
 	assert(p != 0);
 	if (db->mallocFailed == 0) {
-		pNew = sql_realloc64(p, n);
-		if (!pNew)
-			sqlOomFault(db);
+		if (isLookaside(db, p)) {
+			pNew = sqlDbMallocRawNN(db, n);
+			if (pNew) {
+				memcpy(pNew, p, db->lookaside.sz);
+				sqlDbFree(db, p);
+			}
+		} else {
+			pNew = sql_realloc64(p, n);
+			if (!pNew)
+				sqlOomFault(db);
+		}
 	}
 	return pNew;
 }
@@ -378,6 +444,9 @@ sqlDbStrNDup(sql * db, const char *z, u64 n)
 void
 sqlOomClear(sql * db)
 {
-	if (db->mallocFailed && db->nVdbeExec == 0)
+	if (db->mallocFailed && db->nVdbeExec == 0) {
 		db->mallocFailed = 0;
+		assert(db->lookaside.bDisable > 0);
+		db->lookaside.bDisable--;
+	}
 }
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index 010feff..7af5e7d 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -106,6 +106,15 @@ struct LimitVal {
 */
 struct TrigEvent { int a; IdList * b; };
 
+/*
+** Disable lookaside memory allocation for objects that might be
+** shared across database connections.
+*/
+static void disableLookaside(Parse *pParse){
+  pParse->disableLookaside++;
+  pParse->db->lookaside.bDisable++;
+}
+
 } // end %include
 
 // Input is a single SQL command
@@ -169,10 +178,11 @@ cmd ::= ROLLBACK TO savepoint_opt nm(X). {
 ///////////////////// The CREATE TABLE statement ////////////////////////////
 //
 cmd ::= create_table create_table_args.
-create_table ::= CREATE TABLE ifnotexists(E) nm(Y). {
+create_table ::= createkw TABLE ifnotexists(E) nm(Y). {
   create_table_def_init(&pParse->create_table_def, &Y, E);
   pParse->create_table_def.new_space = sqlStartTable(pParse, &Y);
 }
+createkw(A) ::= CREATE(A).  {disableLookaside(pParse);}
 
 %type ifnotexists {int}
 ifnotexists(A) ::= .              {A = 0;}
@@ -395,7 +405,7 @@ ifexists(A) ::= .            {A = 0;}
 
 ///////////////////// The CREATE VIEW statement /////////////////////////////
 //
-cmd ::= CREATE(X) VIEW ifnotexists(E) nm(Y) eidlist_opt(C)
+cmd ::= createkw(X) VIEW ifnotexists(E) nm(Y) eidlist_opt(C)
           AS select(S). {
   if (!pParse->parse_only) {
     create_view_def_init(&pParse->create_view_def, &Y, &X, C, S, E);
@@ -1395,7 +1405,7 @@ paren_exprlist(A) ::= LP exprlist(X) RP.  {A = X;}
 
 ///////////////////////////// The CREATE INDEX command ///////////////////////
 //
-cmd ::= CREATE uniqueflag(U) INDEX ifnotexists(NE) nm(X)
+cmd ::= createkw uniqueflag(U) INDEX ifnotexists(NE) nm(X)
         ON nm(Y) LP sortlist(Z) RP. {
   struct SrcList *src_list = sql_src_list_append(pParse->db,0,&Y);
   if (src_list == NULL) {
@@ -1498,7 +1508,7 @@ plus_num(A) ::= number(A).
 minus_num(A) ::= MINUS number(X).     {A = X;}
 //////////////////////////// The CREATE TRIGGER command /////////////////////
 
-cmd ::= CREATE trigger_decl(A) BEGIN trigger_cmd_list(S) END(Z). {
+cmd ::= createkw trigger_decl(A) BEGIN trigger_cmd_list(S) END(Z). {
   Token all;
   all.z = A.z;
   all.n = (int)(Z.z - A.z) + Z.n;
diff --git a/src/box/sql/prepare.c b/src/box/sql/prepare.c
index 84fb31b..cb2c028 100644
--- a/src/box/sql/prepare.c
+++ b/src/box/sql/prepare.c
@@ -256,6 +256,12 @@ sql_parser_destroy(Parse *parser)
 	sqlDbFree(db, parser->aLabel);
 	sql_expr_list_delete(db, parser->pConstExpr);
 	create_table_def_destroy(&parser->create_table_def);
+	if (db != NULL) {
+		assert(db->lookaside.bDisable >=
+		       parser->disableLookaside);
+		db->lookaside.bDisable -= parser->disableLookaside;
+	}
+	parser->disableLookaside = 0;
 	switch (parser->parsed_ast_type) {
 	case AST_TYPE_SELECT:
 		sql_select_delete(db, parser->parsed_ast.select);
diff --git a/src/box/sql/printf.c b/src/box/sql/printf.c
index 98372f0..a3ff5bb 100644
--- a/src/box/sql/printf.c
+++ b/src/box/sql/printf.c
@@ -864,7 +864,7 @@ sqlStrAccumEnlarge(StrAccum * p, int N)
 			if (!isMalloced(p) && p->nChar > 0)
 				memcpy(zNew, p->zText, p->nChar);
 			p->zText = zNew;
-			p->nAlloc = sqlMallocSize(zNew);
+			p->nAlloc = sqlDbMallocSize(p->db, zNew);
 			p->printfFlags |= SQL_PRINTF_MALLOCED;
 		} else {
 			sqlStrAccumReset(p);
@@ -989,8 +989,9 @@ sqlStrAccumReset(StrAccum * p)
  * Initialize a string accumulator.
  *
  * p:     The accumulator to be initialized.
- * db:    Pointer to a database connection.  May be NULL.
- *        db->mallocFailed is set appropriately when not NULL.
+ * db:    Pointer to a database connection.  May be NULL.  Lookaside
+ *        memory is used if not NULL. db->mallocFailed is set appropriately
+ *        when not NULL.
  * zBase: An initial buffer.  May be NULL in which case the initial buffer
  *        is malloced.
  * n:     Size of zBase in bytes.  If total space requirements never exceed
diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index 7c8da25..1ed6bcd 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -1993,6 +1993,10 @@ sqlResultSetOfSelect(Parse * pParse, Select * pSelect)
 	struct space *space = sql_ephemeral_space_new(pParse, NULL);
 	if (space == NULL)
 		return NULL;
+	/* The sqlResultSetOfSelect() is only used in contexts where lookaside
+	 * is disabled
+	 */
+	assert(db->lookaside.bDisable);
 	sqlColumnsFromExprList(pParse, pSelect->pEList, space->def);
 	sqlSelectAddColumnTypeAndCollation(pParse, space->def, pSelect);
 	if (db->mallocFailed)
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 73dc6e4..bd79506 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -1041,6 +1041,8 @@ typedef struct FuncDef FuncDef;
 typedef struct FuncDefHash FuncDefHash;
 typedef struct IdList IdList;
 typedef struct KeyClass KeyClass;
+typedef struct Lookaside Lookaside;
+typedef struct LookasideSlot LookasideSlot;
 typedef struct NameContext NameContext;
 typedef struct Parse Parse;
 typedef struct PrintfArguments PrintfArguments;
@@ -1084,6 +1086,41 @@ typedef int VList;
 #define SQL_N_LIMIT (SQL_LIMIT_TRIGGER_DEPTH+1)
 
 /*
+ * Lookaside malloc is a set of fixed-size buffers that can be used
+ * to satisfy small transient memory allocation requests for objects
+ * associated with a particular database connection.  The use of
+ * lookaside malloc provides a significant performance enhancement
+ * (approx 10%) by avoiding numerous malloc/free requests while parsing
+ * SQL statements.
+ *
+ * The Lookaside structure holds configuration information about the
+ * lookaside malloc subsystem.  Each available memory allocation in
+ * the lookaside subsystem is stored on a linked list of LookasideSlot
+ * objects.
+ *
+ * Lookaside allocations are only allowed for objects that are associated
+ * with a particular database connection.  Hence, schema information cannot
+ * be stored in lookaside because in shared cache mode the schema information
+ * is shared by multiple database connections.  Therefore, while parsing
+ * schema information, the Lookaside.bEnabled flag is cleared so that
+ * lookaside allocations are not used to construct the schema objects.
+ */
+struct Lookaside {
+	u32 bDisable;		/* Only operate the lookaside when zero */
+	u16 sz;			/* Size of each buffer in bytes */
+	u8 bMalloced;		/* True if pStart obtained from sql_malloc() */
+	int nOut;		/* Number of buffers currently checked out */
+	int mxOut;		/* Highwater mark for nOut */
+	int anStat[3];		/* 0: hits.  1: size misses.  2: full misses */
+	LookasideSlot *pFree;	/* List of available buffers */
+	void *pStart;		/* First byte of available memory space */
+	void *pEnd;		/* First byte past end of available space */
+};
+struct LookasideSlot {
+	LookasideSlot *pNext;	/* Next buffer in the list of free buffers */
+};
+
+/*
  * A hash table for built-in function definitions.  (Application-defined
  * functions use a regular table table from hash.h.)
  *
@@ -1133,6 +1170,7 @@ struct sql {
 	void *pUpdateArg;
 	void (*xUpdateCallback) (void *, int, const char *, const char *,
 				 sql_int64);
+	Lookaside lookaside;	/* Lookaside malloc configuration */
 	Hash aFunc;		/* Hash table of connection functions */
 };
 
@@ -2232,6 +2270,7 @@ struct Parse {
 	u8 isMultiWrite;	/* True if statement may modify/insert multiple rows */
 	u8 hasCompound;		/* Need to invoke convertCompoundSelectToSubquery() */
 	u8 okConstFactor;	/* OK to factor out constants */
+	u8 disableLookaside;	/* Number of times lookaside has been disabled */
 	u8 nColCache;		/* Number of entries in aColCache[] */
 	int nRangeReg;		/* Size of the temporary register block */
 	int iRangeReg;		/* First register in temporary register block */
@@ -2506,7 +2545,7 @@ struct TriggerStep {
  * do not necessarily know how big the string will be in the end.
  */
 struct StrAccum {
-	sql *db;		/* Database for temporary buffers. */
+	sql *db;		/* Optional database for lookaside.  Can be NULL */
 	char *zBase;		/* A base allocation.  Not from malloc. */
 	char *zText;		/* The string collected so far */
 	u32 nChar;		/* Length of the string so far */
@@ -2644,6 +2683,7 @@ void *sqlDbReallocOrFree(sql *, void *, u64);
 void *sqlDbRealloc(sql *, void *, u64);
 void sqlDbFree(sql *, void *);
 int sqlMallocSize(void *);
+int sqlDbMallocSize(sql *, void *);
 
 /*
  * On systems with ample stack space and that support alloca(), make
@@ -4293,6 +4333,7 @@ static inline void
 sqlOomFault(struct sql *db)
 {
 	db->mallocFailed = 1;
+	db->lookaside.bDisable++;
 }
 
 void sqlOomClear(sql *);
diff --git a/src/box/sql/vdbeaux.c b/src/box/sql/vdbeaux.c
index baeeb46..ebdb906 100644
--- a/src/box/sql/vdbeaux.c
+++ b/src/box/sql/vdbeaux.c
@@ -188,7 +188,7 @@ growOpArray(Vdbe * v, int nOp)
 	assert(nNew >= (p->nOpAlloc + nOp));
 	pNew = sqlDbRealloc(p->db, v->aOp, nNew * sizeof(Op));
 	if (pNew) {
-		p->szOpAlloc = sqlMallocSize(pNew);
+		p->szOpAlloc = sqlDbMallocSize(p->db, pNew);
 		p->nOpAlloc = p->szOpAlloc / sizeof(Op);
 		v->aOp = pNew;
 		return 0;
diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
index 4e4bd59..466eddf 100644
--- a/src/box/sql/vdbemem.c
+++ b/src/box/sql/vdbemem.c
@@ -69,8 +69,8 @@ sqlVdbeCheckMemInvariants(Mem * p)
 	assert((p->flags & (MEM_Int | MEM_Real)) != (MEM_Int | MEM_Real));
 
 	/* The szMalloc field holds the correct memory allocation size */
-	assert(p->szMalloc == 0
-	       || p->szMalloc == sqlMallocSize(p->zMalloc));
+	assert(p->szMalloc == 0 ||
+	       p->szMalloc == sqlDbMallocSize(p->db, p->zMalloc));
 
 	/* If p holds a string or blob, the Mem.z must point to exactly
 	 * one of the following:
@@ -111,8 +111,8 @@ sqlVdbeMemGrow(Mem * pMem, int n, int bPreserve)
 	assert(bPreserve == 0 || pMem->flags & (MEM_Blob | MEM_Str));
 	testcase(bPreserve && pMem->z == 0);
 
-	assert(pMem->szMalloc == 0
-	       || pMem->szMalloc == sqlMallocSize(pMem->zMalloc));
+	assert(pMem->szMalloc == 0 ||
+	       pMem->szMalloc == sqlDbMallocSize(pMem->db, pMem->zMalloc));
 	if (pMem->szMalloc < n) {
 		if (n < 32)
 			n = 32;
@@ -131,7 +131,8 @@ sqlVdbeMemGrow(Mem * pMem, int n, int bPreserve)
 			pMem->szMalloc = 0;
 			return -1;
 		} else {
-			pMem->szMalloc = sqlMallocSize(pMem->zMalloc);
+			pMem->szMalloc = sqlDbMallocSize(pMem->db,
+							 pMem->zMalloc);
 		}
 	}
 
@@ -1006,7 +1007,7 @@ sqlVdbeMemSetStr(Mem * pMem,	/* Memory cell to set to string value */
 	} else if (xDel == SQL_DYNAMIC) {
 		sqlVdbeMemRelease(pMem);
 		pMem->zMalloc = pMem->z = (char *)z;
-		pMem->szMalloc = sqlMallocSize(pMem->zMalloc);
+		pMem->szMalloc = sqlDbMallocSize(pMem->db, pMem->zMalloc);
 	} else {
 		sqlVdbeMemRelease(pMem);
 		pMem->z = (char *)z;
diff --git a/src/box/sql/whereexpr.c b/src/box/sql/whereexpr.c
index a88f964..9a6eff8 100644
--- a/src/box/sql/whereexpr.c
+++ b/src/box/sql/whereexpr.c
@@ -107,7 +107,7 @@ whereClauseInsert(WhereClause * pWC, Expr * p, u16 wtFlags)
 		if (pOld != pWC->aStatic) {
 			sqlDbFree(db, pOld);
 		}
-		pWC->nSlot = sqlMallocSize(pWC->a) / sizeof(pWC->a[0]);
+		pWC->nSlot = sqlDbMallocSize(db, pWC->a) / sizeof(pWC->a[0]);
 	}
 	pTerm = &pWC->a[idx = pWC->nTerm++];
 	if (p && ExprHasProperty(p, EP_Unlikely)) {
-- 
2.7.4





More information about the Tarantool-patches mailing list