Tarantool development patches archive
 help / color / mirror / Atom feed
From: Nikita Pettik <korablev@tarantool.org>
To: tarantool-patches@freelists.org
Cc: v.shpilevoy@tarantool.org, Nikita Pettik <korablev@tarantool.org>
Subject: [tarantool-patches] [PATCH 1/4] sql: optimize compilation of SELECT COUNT(*)
Date: Mon, 23 Apr 2018 23:29:38 +0300	[thread overview]
Message-ID: <45c716b50ab9dd7405c1d8c20b86abbd639b5f25.1524515002.git.korablev@tarantool.org> (raw)
In-Reply-To: <cover.1524515002.git.korablev@tarantool.org>
In-Reply-To: <cover.1524515002.git.korablev@tarantool.org>

Originally, SQLite chooses the best index to perform COUNT operation.
In Tarantool there is no any considerable difference between them,
so lets don't spend time on this routine and choose always primary index,
in case of simple query 'SELECT COUNT(*) FROM <tab>'.
Also, patch contains codestyle fixes.
---
 src/box/sql/select.c      | 172 ++++++++++++++++++----------------------------
 src/box/sql/vdbe.c        |   2 -
 test/sql-tap/eqp.test.lua |   2 +-
 3 files changed, 68 insertions(+), 108 deletions(-)

diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index d97e466b5..0df8a71d4 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -35,6 +35,8 @@
  */
 #include <box/coll.h>
 #include "sqliteInt.h"
+#include "tarantoolInt.h"
+#include "box/schema.h"
 #include "box/session.h"
 
 /*
@@ -4262,42 +4264,42 @@ minMaxQuery(AggInfo * pAggInfo, ExprList ** ppMinMax)
 	return eRet;
 }
 
-/*
- * The select statement passed as the first argument is an aggregate query.
- * The second argument is the associated aggregate-info object. This
- * function tests if the SELECT is of the form:
+/**
+ * The second argument is the associated aggregate-info object.
+ * This function tests if the SELECT is of the form:
  *
  *   SELECT count(*) FROM <tbl>
  *
- * where table is a database table, not a sub-select or view. If the query
- * does match this pattern, then a pointer to the Table object representing
- * <tbl> is returned. Otherwise, 0 is returned.
+ * where table is not a sub-select or view.
+ *
+ * @param select The select statement in form of aggregate query.
+ * @param agg_info The associated aggregate-info object.
+ * @retval Pointer to space representing the table,
+ *         if the query matches this pattern. NULL otherwise.
  */
-static Table *
-isSimpleCount(Select * p, AggInfo * pAggInfo)
+static struct space*
+is_simple_count(struct Select *select, struct AggInfo *agg_info)
 {
-	Table *pTab;
-	Expr *pExpr;
-
-	assert(!p->pGroupBy);
-
-	if (p->pWhere || p->pEList->nExpr != 1
-	    || p->pSrc->nSrc != 1 || p->pSrc->a[0].pSelect) {
-		return 0;
-	}
-	pTab = p->pSrc->a[0].pTab;
-	pExpr = p->pEList->a[0].pExpr;
-	assert(pTab && !space_is_view(pTab) && pExpr);
-	if (pExpr->op != TK_AGG_FUNCTION)
-		return 0;
-	if (NEVER(pAggInfo->nFunc == 0))
-		return 0;
-	if ((pAggInfo->aFunc[0].pFunc->funcFlags & SQLITE_FUNC_COUNT) == 0)
-		return 0;
-	if (pExpr->flags & EP_Distinct)
-		return 0;
-
-	return pTab;
+	assert(select->pGroupBy == NULL);
+	if (select->pWhere != NULL || select->pEList->nExpr != 1 ||
+	    select->pSrc->nSrc != 1 || select->pSrc->a[0].pSelect != NULL) {
+		return NULL;
+	}
+	uint32_t space_id =
+		SQLITE_PAGENO_TO_SPACEID(select->pSrc->a[0].pTab->tnum);
+	struct space *space = space_by_id(space_id);
+	assert(space != NULL && !space->def->opts.is_view);
+	struct Expr *expr = select->pEList->a[0].pExpr;
+	assert(expr != NULL);
+	if (expr->op != TK_AGG_FUNCTION)
+		return NULL;
+	if (NEVER(agg_info->nFunc == 0))
+		return NULL;
+	if ((agg_info->aFunc[0].pFunc->funcFlags & SQLITE_FUNC_COUNT) == 0)
+		return NULL;
+	if (expr->flags & EP_Distinct)
+		return NULL;
+	return space;
 }
 
 /*
@@ -5294,25 +5296,23 @@ updateAccumulator(Parse * pParse, AggInfo * pAggInfo)
 	}
 }
 
-/*
- * Add a single OP_Explain instruction to the VDBE to explain a simple
- * count(*) query ("SELECT count(*) FROM pTab").
+/**
+ * Add a single OP_Explain instruction to the VDBE to explain
+ * a simple count(*) query ("SELECT count(*) FROM <tab>").
+ *
+ * @param parse_context Current parsing context.
+ * @param table_name Name of table being queried.
  */
 #ifndef SQLITE_OMIT_EXPLAIN
 static void
-explainSimpleCount(Parse * pParse,	/* Parse context */
-		   Table * pTab,	/* Table being queried */
-		   Index * pIdx)	/* Index used to optimize scan, or NULL */
+explain_simple_count(struct Parse *parse_context, const char *table_name)
 {
-	if (pParse->explain == 2) {
-		int bCover = (pIdx != 0 && !IsPrimaryKeyIndex(pIdx));
-		char *zEqp = sqlite3MPrintf(pParse->db, "SCAN TABLE %s%s%s",
-					    pTab->zName,
-					    bCover ? " USING COVERING INDEX " :
-					    "",
-					    bCover ? pIdx->zName : "");
-		sqlite3VdbeAddOp4(pParse->pVdbe, OP_Explain, pParse->iSelectId,
-				  0, 0, zEqp, P4_DYNAMIC);
+	if (parse_context->explain == 2) {
+		char *zEqp = sqlite3MPrintf(parse_context->db, "SCAN TABLE %s",
+					    table_name);
+		sqlite3VdbeAddOp4(parse_context->pVdbe, OP_Explain,
+				  parse_context->iSelectId, 0, 0, zEqp,
+				  P4_DYNAMIC);
 	}
 }
 #else
@@ -6124,71 +6124,32 @@ sqlite3Select(Parse * pParse,		/* The parser context */
 
 		} /* endif pGroupBy.  Begin aggregate queries without GROUP BY: */
 		else {
-			ExprList *pDel = 0;
-#ifndef SQLITE_OMIT_BTREECOUNT
-			Table *pTab;
-			if ((pTab = isSimpleCount(p, &sAggInfo)) != 0) {
-				/* If isSimpleCount() returns a pointer to a Table structure, then
-				 * the SQL statement is of the form:
+			struct space *space = is_simple_count(p, &sAggInfo);
+			if (space != NULL) {
+				/*
+				 * If is_simple_count() returns a pointer to
+				 * space, then the SQL statement is of the form:
 				 *
 				 *   SELECT count(*) FROM <tbl>
 				 *
-				 * where the Table structure returned represents table <tbl>.
-				 *
-				 * This statement is so common that it is optimized specially. The
-				 * OP_Count instruction is executed either on the intkey table that
-				 * contains the data for table <tbl> or on one of its indexes. It
-				 * is better to execute the op on an index, as indexes are almost
-				 * always spread across less pages than their corresponding tables.
+				 * This statement is so common that it is
+				 * optimized specially. The OP_Count instruction
+				 * is executed on the primary key index,
+				 * since there is no difference which index
+				 * to choose (except for partial indexes).
 				 */
-				const int iCsr = pParse->nTab++;	/* Cursor to scan b-tree */
-				Index *pIdx;	/* Iterator variable */
-				KeyInfo *pKeyInfo = 0;	/* Keyinfo for scanned index */
-				Index *pBest;	/* Best index found so far */
-				int iRoot = pTab->tnum;	/* Root page of scanned b-tree */
-
-				/* Search for the index that has the lowest scan cost.
-				 *
-				 * (2011-04-15) Do not do a full scan of an unordered index.
-				 *
-				 * (2013-10-03) Do not count the entries in a partial index.
-				 *
-				 * In practice the KeyInfo structure will not be used. It is only
-				 * passed to keep OP_OpenRead happy.
+				const int cursor = pParse->nTab++;
+				/*
+				 * Open the cursor, execute the OP_Count,
+				 * close the cursor.
 				 */
-				pBest = sqlite3PrimaryKeyIndex(pTab);
-				for (pIdx = pTab->pIndex; pIdx;
-				     pIdx = pIdx->pNext) {
-					if (pIdx->bUnordered == 0
-					    && pIdx->szIdxRow < pTab->szTabRow
-					    && pIdx->pPartIdxWhere == 0
-					    && (!pBest
-						|| pIdx->szIdxRow <
-						pBest->szIdxRow)
-					    ) {
-						pBest = pIdx;
-					}
-				}
-				if (pBest) {
-					iRoot = pBest->tnum;
-					pKeyInfo =
-					    sqlite3KeyInfoOfIndex(pParse, db,
-								  pBest);
-				}
-
-				/* Open a read-only cursor, execute the OP_Count, close the cursor. */
-				emit_open_cursor(pParse, iCsr, iRoot);
-				if (pKeyInfo) {
-					sqlite3VdbeChangeP4(v, -1,
-							    (char *)pKeyInfo,
-							    P4_KEYINFO);
-				}
-				sqlite3VdbeAddOp2(v, OP_Count, iCsr,
+				emit_open_cursor(pParse, cursor,
+						 space->def->id << 10);
+				sqlite3VdbeAddOp2(v, OP_Count, cursor,
 						  sAggInfo.aFunc[0].iMem);
-				sqlite3VdbeAddOp1(v, OP_Close, iCsr);
-				explainSimpleCount(pParse, pTab, pBest);
+				sqlite3VdbeAddOp1(v, OP_Close, cursor);
+				explain_simple_count(pParse, space->def->name);
 			} else
-#endif				/* SQLITE_OMIT_BTREECOUNT */
 			{
 				/* Check if the query is of one of the following forms:
 				 *
@@ -6217,6 +6178,7 @@ sqlite3Select(Parse * pParse,		/* The parser context */
 				 */
 				ExprList *pMinMax = 0;
 				u8 flag = WHERE_ORDERBY_NORMAL;
+				ExprList *pDel = 0;
 
 				assert(p->pGroupBy == 0);
 				assert(flag == 0);
@@ -6267,6 +6229,7 @@ sqlite3Select(Parse * pParse,		/* The parser context */
 				}
 				sqlite3WhereEnd(pWInfo);
 				finalizeAggFunctions(pParse, &sAggInfo);
+				sqlite3ExprListDelete(db, pDel);
 			}
 
 			sSort.pOrderBy = 0;
@@ -6274,7 +6237,6 @@ sqlite3Select(Parse * pParse,		/* The parser context */
 					   SQLITE_JUMPIFNULL);
 			selectInnerLoop(pParse, p, p->pEList, -1, 0, 0, pDest,
 					addrEnd, addrEnd);
-			sqlite3ExprListDelete(db, pDel);
 		}
 		sqlite3VdbeResolveLabel(v, addrEnd);
 
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index 823718398..aaf912320 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -2831,7 +2831,6 @@ case OP_MakeRecord: {
  * Store the number of entries (an integer value) in the table or index
  * opened by cursor P1 in register P2
  */
-#ifndef SQLITE_OMIT_BTREECOUNT
 case OP_Count: {         /* out2 */
 	i64 nEntry;
 	BtCursor *pCrsr;
@@ -2852,7 +2851,6 @@ case OP_Count: {         /* out2 */
 	pOut->u.i = nEntry;
 	break;
 }
-#endif
 
 /* Opcode: Savepoint P1 * * P4 *
  *
diff --git a/test/sql-tap/eqp.test.lua b/test/sql-tap/eqp.test.lua
index 7d6ad8cd1..468bca0a7 100755
--- a/test/sql-tap/eqp.test.lua
+++ b/test/sql-tap/eqp.test.lua
@@ -736,7 +736,7 @@ test:do_eqp_test(7.1, "SELECT count(*) FROM t1", {
     {0, 0, 0, "SCAN TABLE T1"},
 })
 test:do_eqp_test(7.2, "SELECT count(*) FROM t2", {
-    {0, 0, 0, "SCAN TABLE T2 USING COVERING INDEX I1"},
+    {0, 0, 0, "SCAN TABLE T2"},
 })
 -- MUST_WORK_TEST
 if (0 > 0)
-- 
2.15.1

  reply	other threads:[~2018-04-23 20:31 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-23 20:29 [tarantool-patches] [PATCH 0/4] Move original SQLite's statistics to server Nikita Pettik
2018-04-23 20:29 ` Nikita Pettik [this message]
2018-04-24 12:51   ` [tarantool-patches] Re: [PATCH 1/4] sql: optimize compilation of SELECT COUNT(*) Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 2/4] sql: add average tuple size calculation Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 3/4] sql: refactor usages of table's tuple count Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 4/4] sql: move SQL statistics to server Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-05-11 22:00       ` Vladislav Shpilevoy
2018-05-14 11:52         ` n.pettik
2018-05-14 12:54           ` Vladislav Shpilevoy
2018-05-14 13:55             ` n.pettik
2018-05-14 14:12 ` [tarantool-patches] Re: [PATCH 0/4] Move original SQLite's " Vladislav Shpilevoy
2018-05-15 13:42   ` Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=45c716b50ab9dd7405c1d8c20b86abbd639b5f25.1524515002.git.korablev@tarantool.org \
    --to=korablev@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [tarantool-patches] [PATCH 1/4] sql: optimize compilation of SELECT COUNT(*)' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox