Tarantool development patches archive
 help / color / mirror / Atom feed
From: Nikita Pettik <korablev@tarantool.org>
To: tarantool-patches@freelists.org
Cc: v.shpilevoy@tarantool.org, Nikita Pettik <korablev@tarantool.org>
Subject: [tarantool-patches] [PATCH 3/4] sql: refactor usages of table's tuple count
Date: Mon, 23 Apr 2018 23:29:40 +0300	[thread overview]
Message-ID: <7c93d6e8d43dbf9c912daca4ff1762d1fe412b35.1524515002.git.korablev@tarantool.org> (raw)
In-Reply-To: <cover.1524515002.git.korablev@tarantool.org>
In-Reply-To: <cover.1524515002.git.korablev@tarantool.org>

Count of tuples containing in given space can be calculated as a number of
tuples in primary index. However, if table represents temporary object
such as result set of SELECT or VIEW, tuple count can't be precisely
determined. In this case, default approximation is used: 1 million tuples.

Part of #3253
---
 src/box/sql/analyze.c   | 14 +++++++++++++-
 src/box/sql/build.c     |  4 +---
 src/box/sql/pragma.c    |  2 +-
 src/box/sql/select.c    | 12 +++++-------
 src/box/sql/sqliteInt.h | 29 +++++++++++++++++++++++++++--
 src/box/sql/where.c     | 18 ++++++++++++++++--
 6 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c
index 7c16a2154..4965e899a 100644
--- a/src/box/sql/analyze.c
+++ b/src/box/sql/analyze.c
@@ -1392,7 +1392,6 @@ initAvgEq(Index * pIdx)
 				    ((i64) 100 * pIdx->aiRowEst[0]) /
 				    pIdx->aiRowEst[iCol + 1];
 			}
-			pIdx->nRowEst0 = nRow;
 
 			/* Set nSum to the number of distinct (iCol+1) field prefixes that
 			 * occur in the stat4 table for this index. Set sumEq to the sum of
@@ -1634,6 +1633,19 @@ loadStat4(sqlite3 * db)
 			   "\"sample\" FROM \"_sql_stat4\"");
 }
 
+LogEst
+sql_space_tuple_log_count(struct Table *tab)
+{
+	struct space *space = space_by_id(SQLITE_PAGENO_TO_SPACEID(tab->tnum));
+	if (space == NULL)
+		return tab->tuple_log_count;
+	struct index *pk = space_index(space, 0);
+	/* If space represents VIEW, return default number. */
+	if (pk == NULL)
+		return DEFAULT_TUPLE_LOG_COUNT;
+	return sqlite3LogEst(pk->vtab->size(pk));
+}
+
 /*
  * Load the content of the _sql_stat1 and sql_stat4 tables. The
  * contents of _sql_stat1 are used to populate the Index.aiRowEst[]
diff --git a/src/box/sql/build.c b/src/box/sql/build.c
index cd99d5946..5a40308fe 100644
--- a/src/box/sql/build.c
+++ b/src/box/sql/build.c
@@ -560,8 +560,6 @@ sqlite3StartTable(Parse *pParse, Token *pName, int noErr)
 	pTable->pSchema = db->pSchema;
 	sqlite3HashInit(&pTable->idxHash);
 	pTable->nTabRef = 1;
-	pTable->nRowLogEst = 200;
-	assert(200 == sqlite3LogEst(1048576));
 	assert(pParse->pNewTable == 0);
 	pParse->pNewTable = pTable;
 
@@ -3154,7 +3152,7 @@ sqlite3DefaultRowEst(Index * pIdx)
 	 * number of rows in the table, or half the number of rows in the table
 	 * for a partial index.   But do not let the estimate drop below 10.
 	 */
-	a[0] = pIdx->pTable->nRowLogEst;
+	a[0] = pIdx->pTable->tuple_log_count;
 	if (pIdx->pPartIdxWhere != 0)
 		a[0] -= 10;
 	assert(10 == sqlite3LogEst(2));
diff --git a/src/box/sql/pragma.c b/src/box/sql/pragma.c
index 812a5c0a3..02990e422 100644
--- a/src/box/sql/pragma.c
+++ b/src/box/sql/pragma.c
@@ -410,7 +410,7 @@ sqlite3Pragma(Parse * pParse, Token * pId,	/* First part of [schema.]id field */
 						     pTab->zName,
 						     0,
 						     avg_tuple_size_pk,
-						     pTab->nRowLogEst);
+						     sql_space_tuple_log_count(pTab));
 				sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4);
 				for (pIdx = pTab->pIndex; pIdx;
 				     pIdx = pIdx->pNext) {
diff --git a/src/box/sql/select.c b/src/box/sql/select.c
index 391b7e0a2..02f3612c5 100644
--- a/src/box/sql/select.c
+++ b/src/box/sql/select.c
@@ -1990,8 +1990,7 @@ sqlite3ResultSetOfSelect(Parse * pParse, Select * pSelect)
 	assert(db->lookaside.bDisable);
 	pTab->nTabRef = 1;
 	pTab->zName = 0;
-	pTab->nRowLogEst = 200;
-	assert(200 == sqlite3LogEst(1048576));
+	pTab->tuple_log_count = DEFAULT_TUPLE_LOG_COUNT;
 	sqlite3ColumnsFromExprList(pParse, pSelect->pEList, &pTab->nCol,
 				   &pTab->aCol);
 	sqlite3SelectAddColumnTypeAndCollation(pParse, pTab, pSelect);
@@ -4527,7 +4526,7 @@ withExpand(Walker * pWalker, struct SrcList_item *pFrom)
 		pTab->nTabRef = 1;
 		pTab->zName = sqlite3DbStrDup(db, pCte->zName);
 		pTab->iPKey = -1;
-		pTab->nRowLogEst = 200;
+		pTab->tuple_log_count = DEFAULT_TUPLE_LOG_COUNT;
 		assert(200 == sqlite3LogEst(1048576));
 		pTab->tabFlags |= TF_Ephemeral;
 		pFrom->pSelect = sqlite3SelectDup(db, pCte->pSelect, 0);
@@ -4721,8 +4720,7 @@ selectExpander(Walker * pWalker, Select * p)
 			sqlite3ColumnsFromExprList(pParse, pSel->pEList,
 						   &pTab->nCol, &pTab->aCol);
 			pTab->iPKey = -1;
-			pTab->nRowLogEst = 200;
-			assert(200 == sqlite3LogEst(1048576));
+			pTab->tuple_log_count = DEFAULT_TUPLE_LOG_COUNT;
 			pTab->tabFlags |= TF_Ephemeral;
 #endif
 		} else {
@@ -5540,7 +5538,7 @@ sqlite3Select(Parse * pParse,		/* The parser context */
 			explainSetInteger(pItem->iSelectId,
 					  (u8) pParse->iNextSelectId);
 			sqlite3Select(pParse, pSub, &dest);
-			pItem->pTab->nRowLogEst = pSub->nSelectRow;
+			pItem->pTab->tuple_log_count = pSub->nSelectRow;
 			pItem->fg.viaCoroutine = 1;
 			pItem->regResult = dest.iSdst;
 			sqlite3VdbeEndCoroutine(v, pItem->regReturn);
@@ -5579,7 +5577,7 @@ sqlite3Select(Parse * pParse,		/* The parser context */
 			explainSetInteger(pItem->iSelectId,
 					  (u8) pParse->iNextSelectId);
 			sqlite3Select(pParse, pSub, &dest);
-			pItem->pTab->nRowLogEst = pSub->nSelectRow;
+			pItem->pTab->tuple_log_count = pSub->nSelectRow;
 			if (onceAddr)
 				sqlite3VdbeJumpHere(v, onceAddr);
 			retAddr =
diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h
index 8ca8e808f..d026a213d 100644
--- a/src/box/sql/sqliteInt.h
+++ b/src/box/sql/sqliteInt.h
@@ -1961,7 +1961,13 @@ struct Table {
 	i16 iAutoIncPKey;	/* If PK is marked INTEGER PRIMARY KEY AUTOINCREMENT, store
 				   column number here, -1 otherwise Tarantool specifics */
 	i16 nCol;		/* Number of columns in this table */
-	LogEst nRowLogEst;	/* Estimated rows in table - from _sql_stat1 table */
+	/**
+	 * Estimated number of entries in table.
+	 * Used only when table represents temporary objects,
+	 * such as nested SELECTs or VIEWs. Otherwise, this stat
+	 * can be fetched from space struct.
+	 */
+	LogEst tuple_log_count;
 	u8 tabFlags;		/* Mask of TF_* values */
 	u8 keyConf;		/* What to do in case of uniqueness conflict on iPKey */
 #ifndef SQLITE_OMIT_ALTERTABLE
@@ -1972,6 +1978,16 @@ struct Table {
 	Table *pNextZombie;	/* Next on the Parse.pZombieTab list */
 };
 
+/**
+ * Return logarithm of tuple count in space.
+ *
+ * @param tab Table containing id of space to be examined.
+ * @retval Logarithm of tuple count in space, or default values,
+ *         if there is no corresponding space for given table.
+ */
+LogEst
+sql_space_tuple_log_count(struct Table *tab);
+
 /*
  * Allowed values for Table.tabFlags.
  */
@@ -2164,7 +2180,6 @@ struct Index {
 	tRowcnt *aAvgEq;	/* Average nEq values for keys not in aSample */
 	IndexSample *aSample;	/* Samples of the left-most key */
 	tRowcnt *aiRowEst;	/* Non-logarithmic stat1 data for this index */
-	tRowcnt nRowEst0;	/* Non-logarithmic number of rows in the index */
 };
 
 /*
@@ -2199,6 +2214,16 @@ struct IndexSample {
 	tRowcnt *anDLt;		/* Est. number of distinct keys less than this sample */
 };
 
+#ifdef DEFAULT_TUPLE_COUNT
+#undef DEFAULT_TUPLE_COUNT
+#endif
+#define DEFAULT_TUPLE_COUNT 1048576
+
+#ifdef DEFAULT_TUPLE_LOG_COUNT
+#undef DEFAULT_TUPLE_LOG_COUNT
+#endif
+#define DEFAULT_TUPLE_LOG_COUNT sqlite3LogEst(DEFAULT_TUPLE_COUNT)
+
 /*
  * Each token coming out of the lexer is an instance of
  * this structure.  Tokens are also used as part of an expression.
diff --git a/src/box/sql/where.c b/src/box/sql/where.c
index 51b53c2df..e6f9ef431 100644
--- a/src/box/sql/where.c
+++ b/src/box/sql/where.c
@@ -1326,8 +1326,22 @@ whereRangeScanEst(Parse * pParse,	/* Parsing & code generating context */
 			}
 			/* Determine iLower and iUpper using ($P) only. */
 			if (nEq == 0) {
+				/*
+				 * In this simple case, there are no any
+				 * equality constraints, so initially all rows
+				 * are in range.
+				 */
 				iLower = 0;
-				iUpper = p->nRowEst0;
+				uint32_t space_id =
+					SQLITE_PAGENO_TO_SPACEID(p->tnum);
+				struct space *space = space_by_id(space_id);
+				assert(space != NULL);
+				uint32_t iid =
+					SQLITE_PAGENO_TO_INDEXID(p->tnum);
+				struct index *idx =
+					space_index(space, iid);
+				assert(idx != NULL);
+				iUpper = idx->vtab->size(idx);
 			} else {
 				/* Note: this call could be optimized away - since the same values must
 				 * have been requested when testing key $P in whereEqualScanEst().
@@ -2786,7 +2800,7 @@ whereLoopAddBtree(WhereLoopBuilder * pBuilder,	/* WHERE clause information */
 		sPk.aiRowLogEst = aiRowEstPk;
 		sPk.onError = ON_CONFLICT_ACTION_REPLACE;
 		sPk.pTable = pTab;
-		aiRowEstPk[0] = pTab->nRowLogEst;
+		aiRowEstPk[0] = sql_space_tuple_log_count(pTab);
 		aiRowEstPk[1] = 0;
 		pFirst = pSrc->pTab->pIndex;
 		if (pSrc->fg.notIndexed == 0) {
-- 
2.15.1

  parent reply	other threads:[~2018-04-23 20:31 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-23 20:29 [tarantool-patches] [PATCH 0/4] Move original SQLite's statistics to server Nikita Pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 1/4] sql: optimize compilation of SELECT COUNT(*) Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 2/4] sql: add average tuple size calculation Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-04-23 20:29 ` Nikita Pettik [this message]
2018-04-24 12:51   ` [tarantool-patches] Re: [PATCH 3/4] sql: refactor usages of table's tuple count Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 4/4] sql: move SQL statistics to server Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-05-11 22:00       ` Vladislav Shpilevoy
2018-05-14 11:52         ` n.pettik
2018-05-14 12:54           ` Vladislav Shpilevoy
2018-05-14 13:55             ` n.pettik
2018-05-14 14:12 ` [tarantool-patches] Re: [PATCH 0/4] Move original SQLite's " Vladislav Shpilevoy
2018-05-15 13:42   ` Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7c93d6e8d43dbf9c912daca4ff1762d1fe412b35.1524515002.git.korablev@tarantool.org \
    --to=korablev@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [tarantool-patches] [PATCH 3/4] sql: refactor usages of table'\''s tuple count' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox