From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 42D1622869 for ; Mon, 23 Apr 2018 16:31:11 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id i0hlgHXyV6UP for ; Mon, 23 Apr 2018 16:31:11 -0400 (EDT) Received: from smtp57.i.mail.ru (smtp57.i.mail.ru [217.69.128.37]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 80A1B22670 for ; Mon, 23 Apr 2018 16:31:10 -0400 (EDT) From: Nikita Pettik Subject: [tarantool-patches] [PATCH 3/4] sql: refactor usages of table's tuple count Date: Mon, 23 Apr 2018 23:29:40 +0300 Message-Id: <7c93d6e8d43dbf9c912daca4ff1762d1fe412b35.1524515002.git.korablev@tarantool.org> In-Reply-To: References: In-Reply-To: References: Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: v.shpilevoy@tarantool.org, Nikita Pettik Count of tuples containing in given space can be calculated as a number of tuples in primary index. However, if table represents temporary object such as result set of SELECT or VIEW, tuple count can't be precisely determined. In this case, default approximation is used: 1 million tuples. Part of #3253 --- src/box/sql/analyze.c | 14 +++++++++++++- src/box/sql/build.c | 4 +--- src/box/sql/pragma.c | 2 +- src/box/sql/select.c | 12 +++++------- src/box/sql/sqliteInt.h | 29 +++++++++++++++++++++++++++-- src/box/sql/where.c | 18 ++++++++++++++++-- 6 files changed, 63 insertions(+), 16 deletions(-) diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c index 7c16a2154..4965e899a 100644 --- a/src/box/sql/analyze.c +++ b/src/box/sql/analyze.c @@ -1392,7 +1392,6 @@ initAvgEq(Index * pIdx) ((i64) 100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol + 1]; } - pIdx->nRowEst0 = nRow; /* Set nSum to the number of distinct (iCol+1) field prefixes that * occur in the stat4 table for this index. Set sumEq to the sum of @@ -1634,6 +1633,19 @@ loadStat4(sqlite3 * db) "\"sample\" FROM \"_sql_stat4\""); } +LogEst +sql_space_tuple_log_count(struct Table *tab) +{ + struct space *space = space_by_id(SQLITE_PAGENO_TO_SPACEID(tab->tnum)); + if (space == NULL) + return tab->tuple_log_count; + struct index *pk = space_index(space, 0); + /* If space represents VIEW, return default number. */ + if (pk == NULL) + return DEFAULT_TUPLE_LOG_COUNT; + return sqlite3LogEst(pk->vtab->size(pk)); +} + /* * Load the content of the _sql_stat1 and sql_stat4 tables. The * contents of _sql_stat1 are used to populate the Index.aiRowEst[] diff --git a/src/box/sql/build.c b/src/box/sql/build.c index cd99d5946..5a40308fe 100644 --- a/src/box/sql/build.c +++ b/src/box/sql/build.c @@ -560,8 +560,6 @@ sqlite3StartTable(Parse *pParse, Token *pName, int noErr) pTable->pSchema = db->pSchema; sqlite3HashInit(&pTable->idxHash); pTable->nTabRef = 1; - pTable->nRowLogEst = 200; - assert(200 == sqlite3LogEst(1048576)); assert(pParse->pNewTable == 0); pParse->pNewTable = pTable; @@ -3154,7 +3152,7 @@ sqlite3DefaultRowEst(Index * pIdx) * number of rows in the table, or half the number of rows in the table * for a partial index. But do not let the estimate drop below 10. */ - a[0] = pIdx->pTable->nRowLogEst; + a[0] = pIdx->pTable->tuple_log_count; if (pIdx->pPartIdxWhere != 0) a[0] -= 10; assert(10 == sqlite3LogEst(2)); diff --git a/src/box/sql/pragma.c b/src/box/sql/pragma.c index 812a5c0a3..02990e422 100644 --- a/src/box/sql/pragma.c +++ b/src/box/sql/pragma.c @@ -410,7 +410,7 @@ sqlite3Pragma(Parse * pParse, Token * pId, /* First part of [schema.]id field */ pTab->zName, 0, avg_tuple_size_pk, - pTab->nRowLogEst); + sql_space_tuple_log_count(pTab)); sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4); for (pIdx = pTab->pIndex; pIdx; pIdx = pIdx->pNext) { diff --git a/src/box/sql/select.c b/src/box/sql/select.c index 391b7e0a2..02f3612c5 100644 --- a/src/box/sql/select.c +++ b/src/box/sql/select.c @@ -1990,8 +1990,7 @@ sqlite3ResultSetOfSelect(Parse * pParse, Select * pSelect) assert(db->lookaside.bDisable); pTab->nTabRef = 1; pTab->zName = 0; - pTab->nRowLogEst = 200; - assert(200 == sqlite3LogEst(1048576)); + pTab->tuple_log_count = DEFAULT_TUPLE_LOG_COUNT; sqlite3ColumnsFromExprList(pParse, pSelect->pEList, &pTab->nCol, &pTab->aCol); sqlite3SelectAddColumnTypeAndCollation(pParse, pTab, pSelect); @@ -4527,7 +4526,7 @@ withExpand(Walker * pWalker, struct SrcList_item *pFrom) pTab->nTabRef = 1; pTab->zName = sqlite3DbStrDup(db, pCte->zName); pTab->iPKey = -1; - pTab->nRowLogEst = 200; + pTab->tuple_log_count = DEFAULT_TUPLE_LOG_COUNT; assert(200 == sqlite3LogEst(1048576)); pTab->tabFlags |= TF_Ephemeral; pFrom->pSelect = sqlite3SelectDup(db, pCte->pSelect, 0); @@ -4721,8 +4720,7 @@ selectExpander(Walker * pWalker, Select * p) sqlite3ColumnsFromExprList(pParse, pSel->pEList, &pTab->nCol, &pTab->aCol); pTab->iPKey = -1; - pTab->nRowLogEst = 200; - assert(200 == sqlite3LogEst(1048576)); + pTab->tuple_log_count = DEFAULT_TUPLE_LOG_COUNT; pTab->tabFlags |= TF_Ephemeral; #endif } else { @@ -5540,7 +5538,7 @@ sqlite3Select(Parse * pParse, /* The parser context */ explainSetInteger(pItem->iSelectId, (u8) pParse->iNextSelectId); sqlite3Select(pParse, pSub, &dest); - pItem->pTab->nRowLogEst = pSub->nSelectRow; + pItem->pTab->tuple_log_count = pSub->nSelectRow; pItem->fg.viaCoroutine = 1; pItem->regResult = dest.iSdst; sqlite3VdbeEndCoroutine(v, pItem->regReturn); @@ -5579,7 +5577,7 @@ sqlite3Select(Parse * pParse, /* The parser context */ explainSetInteger(pItem->iSelectId, (u8) pParse->iNextSelectId); sqlite3Select(pParse, pSub, &dest); - pItem->pTab->nRowLogEst = pSub->nSelectRow; + pItem->pTab->tuple_log_count = pSub->nSelectRow; if (onceAddr) sqlite3VdbeJumpHere(v, onceAddr); retAddr = diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h index 8ca8e808f..d026a213d 100644 --- a/src/box/sql/sqliteInt.h +++ b/src/box/sql/sqliteInt.h @@ -1961,7 +1961,13 @@ struct Table { i16 iAutoIncPKey; /* If PK is marked INTEGER PRIMARY KEY AUTOINCREMENT, store column number here, -1 otherwise Tarantool specifics */ i16 nCol; /* Number of columns in this table */ - LogEst nRowLogEst; /* Estimated rows in table - from _sql_stat1 table */ + /** + * Estimated number of entries in table. + * Used only when table represents temporary objects, + * such as nested SELECTs or VIEWs. Otherwise, this stat + * can be fetched from space struct. + */ + LogEst tuple_log_count; u8 tabFlags; /* Mask of TF_* values */ u8 keyConf; /* What to do in case of uniqueness conflict on iPKey */ #ifndef SQLITE_OMIT_ALTERTABLE @@ -1972,6 +1978,16 @@ struct Table { Table *pNextZombie; /* Next on the Parse.pZombieTab list */ }; +/** + * Return logarithm of tuple count in space. + * + * @param tab Table containing id of space to be examined. + * @retval Logarithm of tuple count in space, or default values, + * if there is no corresponding space for given table. + */ +LogEst +sql_space_tuple_log_count(struct Table *tab); + /* * Allowed values for Table.tabFlags. */ @@ -2164,7 +2180,6 @@ struct Index { tRowcnt *aAvgEq; /* Average nEq values for keys not in aSample */ IndexSample *aSample; /* Samples of the left-most key */ tRowcnt *aiRowEst; /* Non-logarithmic stat1 data for this index */ - tRowcnt nRowEst0; /* Non-logarithmic number of rows in the index */ }; /* @@ -2199,6 +2214,16 @@ struct IndexSample { tRowcnt *anDLt; /* Est. number of distinct keys less than this sample */ }; +#ifdef DEFAULT_TUPLE_COUNT +#undef DEFAULT_TUPLE_COUNT +#endif +#define DEFAULT_TUPLE_COUNT 1048576 + +#ifdef DEFAULT_TUPLE_LOG_COUNT +#undef DEFAULT_TUPLE_LOG_COUNT +#endif +#define DEFAULT_TUPLE_LOG_COUNT sqlite3LogEst(DEFAULT_TUPLE_COUNT) + /* * Each token coming out of the lexer is an instance of * this structure. Tokens are also used as part of an expression. diff --git a/src/box/sql/where.c b/src/box/sql/where.c index 51b53c2df..e6f9ef431 100644 --- a/src/box/sql/where.c +++ b/src/box/sql/where.c @@ -1326,8 +1326,22 @@ whereRangeScanEst(Parse * pParse, /* Parsing & code generating context */ } /* Determine iLower and iUpper using ($P) only. */ if (nEq == 0) { + /* + * In this simple case, there are no any + * equality constraints, so initially all rows + * are in range. + */ iLower = 0; - iUpper = p->nRowEst0; + uint32_t space_id = + SQLITE_PAGENO_TO_SPACEID(p->tnum); + struct space *space = space_by_id(space_id); + assert(space != NULL); + uint32_t iid = + SQLITE_PAGENO_TO_INDEXID(p->tnum); + struct index *idx = + space_index(space, iid); + assert(idx != NULL); + iUpper = idx->vtab->size(idx); } else { /* Note: this call could be optimized away - since the same values must * have been requested when testing key $P in whereEqualScanEst(). @@ -2786,7 +2800,7 @@ whereLoopAddBtree(WhereLoopBuilder * pBuilder, /* WHERE clause information */ sPk.aiRowLogEst = aiRowEstPk; sPk.onError = ON_CONFLICT_ACTION_REPLACE; sPk.pTable = pTab; - aiRowEstPk[0] = pTab->nRowLogEst; + aiRowEstPk[0] = sql_space_tuple_log_count(pTab); aiRowEstPk[1] = 0; pFirst = pSrc->pTab->pIndex; if (pSrc->fg.notIndexed == 0) { -- 2.15.1