From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id BD526226A8 for ; Mon, 23 Apr 2018 16:31:11 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id oWz9yZXE1vnD for ; Mon, 23 Apr 2018 16:31:11 -0400 (EDT) Received: from smtp57.i.mail.ru (smtp57.i.mail.ru [217.69.128.37]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 73A2622662 for ; Mon, 23 Apr 2018 16:31:10 -0400 (EDT) From: Nikita Pettik Subject: [tarantool-patches] [PATCH 4/4] sql: move SQL statistics to server Date: Mon, 23 Apr 2018 23:29:41 +0300 Message-Id: <291aa5961a366c81ffa7ea465636125617a78dad.1524515002.git.korablev@tarantool.org> In-Reply-To: References: In-Reply-To: References: Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: v.shpilevoy@tarantool.org, Nikita Pettik SQLite provides kind of statistics concerning data holding in each index and space. It is used to help query planer build optimized plan. Such statistics don't depend on internal index implementation (e.g. B-tree or LSM tree) and contain information concerning tuple distribution. This patch moves members resbonsible statistics from original SQLite structs to Tarantool ones. To be more precise, now index's opts contains statistics from _stat1 space (arrays of integers and their loarithm approximation) and more sophisticated one from _stat4 (array of samples). It also contains set of flags to turn on/off different optimizations (such as skip-scan). After ANALYZE command is executed, statistics are saved into _sql_stat1 and _sql_stat4 spaces (in order to persist them). However, it should also be loaded into in-memory structs representing indexes. This patch reworks original SQLite routine to load it directly to newly introduced stat struct of index. Closes #3253 --- src/box/index_def.c | 92 ++++++ src/box/index_def.h | 90 ++++++ src/box/sql/analyze.c | 742 +++++++++++++++++++++++------------------------- src/box/sql/build.c | 61 ---- src/box/sql/parse.y | 2 - src/box/sql/pragma.c | 3 +- src/box/sql/prepare.c | 7 +- src/box/sql/sqliteInt.h | 72 +++-- src/box/sql/vdbe.c | 5 +- src/box/sql/where.c | 190 ++++++++----- src/box/sql/whereexpr.c | 4 +- 11 files changed, 716 insertions(+), 552 deletions(-) diff --git a/src/box/index_def.c b/src/box/index_def.c index eda39a622..14ff94192 100644 --- a/src/box/index_def.c +++ b/src/box/index_def.c @@ -47,6 +47,7 @@ const struct index_opts index_opts_default = { /* .bloom_fpr = */ 0.05, /* .lsn = */ 0, /* .sql = */ NULL, + /* .stat = */ NULL, }; const struct opt_def index_opts_reg[] = { @@ -117,6 +118,8 @@ index_def_new(uint32_t space_id, uint32_t iid, const char *name, return NULL; } } + /* Statistics are initialized separately. */ + assert(opts->stat == NULL); return def; } @@ -154,9 +157,98 @@ index_def_dup(const struct index_def *def) return NULL; } } + if (def->opts.stat != NULL) { + dup->opts.stat = malloc(sizeof(*dup->opts.stat)); + if (dup->opts.stat == NULL) { + diag_set(OutOfMemory, sizeof(*dup->opts.stat), "malloc", + "dup->opts.stat"); + index_def_delete(dup); + return NULL; + } + dup->opts.stat->is_unordered = def->opts.stat->is_unordered; + dup->opts.stat->skip_scan_enabled = + def->opts.stat->skip_scan_enabled; + size_t stat_size = (def->key_def->part_count + 1) * + sizeof(uint32_t); + dup->opts.stat->tuple_stat1 = malloc(stat_size); + if (dup->opts.stat->tuple_stat1 == NULL) { + diag_set(OutOfMemory, stat_size, "malloc", + "tuple_stat1"); + index_def_delete(dup); + return NULL; + } + memcpy(dup->opts.stat->tuple_stat1, def->opts.stat->tuple_stat1, + stat_size); + dup->opts.stat->tuple_log_est = malloc(stat_size); + if (dup->opts.stat->tuple_log_est == NULL) { + diag_set(OutOfMemory, stat_size, "malloc", + "tuple_log_est"); + index_def_delete(dup); + return NULL; + } + memcpy(dup->opts.stat->tuple_log_est, + def->opts.stat->tuple_log_est, stat_size); + uint32_t sample_count = def->opts.stat->sample_count; + dup->opts.stat->sample_count = sample_count; + dup->opts.stat->sample_field_count = + def->opts.stat->sample_field_count; + if (def->opts.stat->samples == NULL) { + dup->opts.stat->samples = NULL; + dup->opts.stat->avg_eq = NULL; + return dup; + } + size_t samples_alloc_size = + /* Array of samples. */ + sample_count * sizeof(def->opts.stat->samples[0]) + + /* Arrays eq, lt, dlt for each sample. */ + def->opts.stat->sample_count * sizeof(uint32_t) * + def->opts.stat->sample_field_count * 3 + + /* Array of avg_eq. */ + (def->key_def->part_count * sizeof(uint32_t)); + dup->opts.stat->samples = malloc(samples_alloc_size); + if (dup->opts.stat->samples == NULL) { + diag_set(OutOfMemory, samples_alloc_size, "malloc", + "samples"); + index_def_delete(dup); + return NULL; + } + memcpy(dup->opts.stat->samples, def->opts.stat->samples, + samples_alloc_size); + for (uint32_t i = 0; i < def->opts.stat->sample_count; ++i) { + size_t key_size = def->opts.stat->samples[i].key_size; + /* + * Add at the end two zero-bytes in order + * to prevent buffer overread. + */ + dup->opts.stat->samples[i].sample_key = + calloc(1, key_size + 2); + if (dup->opts.stat->samples[i].sample_key == NULL) { + diag_set(OutOfMemory, key_size + 2, "calloc", + "sample_key"); + index_def_delete(dup); + return NULL; + } + memcpy(dup->opts.stat->samples[i].sample_key, + def->opts.stat->samples[i].sample_key, key_size); + } + } return dup; } +void +index_stat_destroy_samples(struct index_stat *stat) +{ + if (stat != NULL && stat->samples != NULL) { + for (uint32_t i = 0; i < stat->sample_count; ++i) { + struct index_sample *sample = &stat->samples[i]; + free(sample->sample_key); + } + free(stat->samples); + stat->sample_count = 0; + stat->samples = NULL; + } +} + /** Free a key definition. */ void index_def_delete(struct index_def *index_def) diff --git a/src/box/index_def.h b/src/box/index_def.h index 6167570b7..52e9a0548 100644 --- a/src/box/index_def.h +++ b/src/box/index_def.h @@ -58,6 +58,68 @@ enum rtree_index_distance_type { }; extern const char *rtree_index_distance_type_strs[]; +/** Simple alias to represent logarithm metrics. */ +typedef int16_t log_est; + +/** + * One sample represents index key and three arrays with + * statistics concerning tuples distribution. + * Max number of samples for one index is adjusted by + * SQL_STAT4_SAMPLES macros. By default it is about 24 entities. + * Samples are chosen to be selective. + */ +struct index_sample { + /** Key of the sample. */ + void *sample_key; + /** Size of sample key. */ + size_t key_size; + /** + * List of integers: first one is the approximate number + * of entries in the index whose left-most field exactly + * matches the left-most column of the sample; + * second one - est. number of entries in the index where + * the first two columns match the first two columns of + * the sample; and so forth. + */ + uint32_t *eq; + /** The same as eq list, but key is less than sample. */ + uint32_t *lt; + /** The same as lt list, but includes only distinct keys. */ + uint32_t *dlt; +}; + +/** + * SQL statistics for index, which is used by query planer. + * This is general statistics, without any relation to used + * engine and data structures (e.g. B-tree or LSM tree). + * Statistics appear only after executing ANALYZE statement. + * It is loaded from _sql_stat1 and _sql_stat4 system spaces. + */ +struct index_stat { + /** An array of samples of them left-most key. */ + struct index_sample *samples; + /** Number of samples. */ + uint32_t sample_count; + /** Number of fields in sample arrays: eq, lt and dlt. */ + uint32_t sample_field_count; + /** + * List of integers: the first is the number of tuples + * in the index; the second one is the average number of + * tuples that have the same key part in the first field + * of the index; the third - for the first two fields; + * and so forth. + */ + uint32_t *tuple_stat1; + /** Logarithms of stat1 data. */ + log_est *tuple_log_est; + /** Average eq values for keys not in samples. */ + uint32_t *avg_eq; + /** Use this index for == or IN queries only. */ + bool is_unordered; + /** Don't try to use skip-scan optimization if true. */ + bool skip_scan_enabled; +}; + /** Index options */ struct index_opts { /** @@ -99,6 +161,12 @@ struct index_opts { * SQL statement that produced this index. */ char *sql; + /** + * SQL specific statistics concerning tuples + * distribution for query planer. It is automatically + * filled after running ANALYZE command. + */ + struct index_stat *stat; }; extern const struct index_opts index_opts_default; @@ -113,6 +181,27 @@ index_opts_create(struct index_opts *opts) *opts = index_opts_default; } +/** + * Release memory allocated for samples, statistics they include, + * sample keys and avg_eq array. + */ +void +index_stat_destroy_samples(struct index_stat *stat); + +/** + * Destroy index stat. + */ +static inline void +destroy_stat(struct index_stat *stat) { + if (stat == NULL) + return; + free(stat->tuple_stat1); + free(stat->tuple_log_est); + index_stat_destroy_samples(stat); + TRASH(stat); + free(stat); +} + /** * Destroy index options */ @@ -120,6 +209,7 @@ static inline void index_opts_destroy(struct index_opts *opts) { free(opts->sql); + destroy_stat(opts->stat); TRASH(opts); } diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c index 4965e899a..14dfbd551 100644 --- a/src/box/sql/analyze.c +++ b/src/box/sql/analyze.c @@ -104,8 +104,8 @@ * large nEq values. * */ -#ifndef SQLITE_OMIT_ANALYZE +#include "box/box.h" #include "box/index.h" #include "box/key_def.h" #include "box/tuple_compare.h" @@ -1217,49 +1217,46 @@ struct analysisInfo { sqlite3 *db; }; -/* - * The first argument points to a nul-terminated string containing a - * list of space separated integers. Read the first nOut of these into - * the array aOut[]. +/** + * The first argument points to a nul-terminated string + * containing a list of space separated integers. Load + * the first stat_size of these into the output arrays. + * + * @param stat_string String containing array of integers. + * @param stat_size Size of output arrays. + * @param[out] stat_exact Decoded array of statistics. + * @param[out] stat_log Decoded array of stat logariphms. + * @param[out] index Handle extra flags for this index, + * if not NULL. */ static void -decodeIntArray(char *zIntArray, /* String containing int array to decode */ - int nOut, /* Number of slots in aOut[] */ - tRowcnt * aOut, /* Store integers here */ - LogEst * aLog, /* Or, if aOut==0, here */ - Index * pIndex /* Handle extra flags for this index, if not NULL */ - ) -{ - char *z = zIntArray; - int c; - int i; - tRowcnt v; - - if (z == 0) +decode_stat_string(const char *stat_string, int stat_size, tRowcnt *stat_exact, + LogEst *stat_log, struct index *index) { + char *z = (char *) stat_string; + if (z == NULL) z = ""; - - for (i = 0; *z && i < nOut; i++) { - v = 0; + for (int i = 0; *z && i < stat_size; i++) { + tRowcnt v = 0; + int c; while ((c = z[0]) >= '0' && c <= '9') { v = v * 10 + c - '0'; z++; } - if (aOut) - aOut[i] = v; - if (aLog) - aLog[i] = sqlite3LogEst(v); + if (stat_exact != NULL) + stat_exact[i] = v; + if (stat_log != NULL) + stat_log[i] = sqlite3LogEst(v); if (*z == ' ') z++; } - - if (pIndex) { - pIndex->bUnordered = 0; - pIndex->noSkipScan = 0; + if (index != NULL) { + index->def->opts.stat->is_unordered = false; + index->def->opts.stat->skip_scan_enabled = true; while (z[0]) { if (sqlite3_strglob("unordered*", z) == 0) { - pIndex->bUnordered = 1; + index->def->opts.stat->is_unordered = true; } else if (sqlite3_strglob("noskipscan*", z) == 0) { - pIndex->noSkipScan = 1; + index->def->opts.stat->skip_scan_enabled = false; } while (z[0] != 0 && z[0] != ' ') z++; @@ -1269,369 +1266,374 @@ decodeIntArray(char *zIntArray, /* String containing int array to decode */ } } -/* +/** * This callback is invoked once for each index when reading the * _sql_stat1 table. * * argv[0] = name of the table * argv[1] = name of the index (might be NULL) - * argv[2] = results of analysis - on integer for each column + * argv[2] = results of analysis - array of integers + * + * Entries for which argv[1]==NULL simply record the number + * of rows in the table. This routine also allocates memory + * for stat struct itself and statistics which are not related + * to stat4 samples. * - * Entries for which argv[1]==NULL simply record the number of rows in - * the table. + * @param data Additional analysis info. + * @param argc Number of arguments. + * @param argv Callback arguments. + * @retval 0 on success, -1 otherwise. */ static int -analysisLoader(void *pData, int argc, char **argv, char **NotUsed) +analysis_loader(void *data, int argc, char **argv, char **unused) { - analysisInfo *pInfo = (analysisInfo *) pData; - Index *pIndex; - Table *pTable; - const char *z; - assert(argc == 3); - UNUSED_PARAMETER2(NotUsed, argc); - - if (argv == 0 || argv[0] == 0 || argv[2] == 0) { + UNUSED_PARAMETER(data); + UNUSED_PARAMETER2(unused, argc); + if (argv == 0 || argv[0] == 0 || argv[2] == 0) return 0; - } - pTable = sqlite3HashFind(&pInfo->db->pSchema->tblHash, argv[0]); - if (pTable == NULL) + uint32_t space_id = box_space_id_by_name(argv[0], strlen(argv[0])); + assert(space_id != BOX_ID_NIL); + struct space *space = space_by_id(space_id); + assert(space != NULL); + struct index *index; + if (space == NULL) return 0; - if (argv[1] == 0) { - pIndex = 0; + if (argv[1] == NULL) { + index = NULL; } else if (sqlite3_stricmp(argv[0], argv[1]) == 0) { - pIndex = sqlite3PrimaryKeyIndex(pTable); + index = space_index(space, 0); + assert(index != NULL); } else { - pIndex = sqlite3HashFind(&pTable->idxHash, argv[1]); + uint32_t iid = box_index_id_by_name(space_id, argv[1], + strlen(argv[1])); + assert(iid != BOX_ID_NIL); + index = space_index(space, iid); } - z = argv[2]; - - if (pIndex) { - tRowcnt *aiRowEst = 0; - int nCol = index_column_count(pIndex) + 1; - /* Index.aiRowEst may already be set here if there are duplicate - * _sql_stat1 entries for this index. In that case just clobber - * the old data with the new instead of allocating a new array. + if (index != NULL) { + /* + * Additional field is used to describe total + * count of tuples in index. Although now all + * indexes feature the same number of tuples, + * partial indexes are going to be implemented + * someday. */ - if (pIndex->aiRowEst == 0) { - pIndex->aiRowEst = - (tRowcnt *) sqlite3MallocZero(sizeof(tRowcnt) * - nCol); - if (pIndex->aiRowEst == 0) - sqlite3OomFault(pInfo->db); + int column_count = index->def->key_def->part_count + 1; + /* + * Stat arrays may already be set here if there + * are duplicate _sql_stat1 entries for this + * index. In that case just clobber the old data + * with the new instead of allocating a new array. + */ + struct index_stat *stat = index->def->opts.stat; + if (stat == NULL) { + stat = calloc(1, sizeof(*stat)); + if (stat == NULL) { + diag_set(OutOfMemory, sizeof(*stat), "calloc", + "stat"); + return -1; + } + index->def->opts.stat = stat; } - aiRowEst = pIndex->aiRowEst; - pIndex->bUnordered = 0; - decodeIntArray((char *)z, nCol, aiRowEst, pIndex->aiRowLogEst, - pIndex); - } - - return 0; -} - -/* - * If the Index.aSample variable is not NULL, delete the aSample[] array - * and its contents. - */ -void -sqlite3DeleteIndexSamples(sqlite3 * db, Index * pIdx) -{ - if (pIdx->aSample) { - int j; - for (j = 0; j < pIdx->nSample; j++) { - IndexSample *p = &pIdx->aSample[j]; - sqlite3DbFree(db, p->p); + if (stat->tuple_stat1 == NULL) { + stat->tuple_stat1 = + calloc(column_count, sizeof(uint32_t)); + if (stat->tuple_stat1 == NULL) { + diag_set(OutOfMemory, column_count, "calloc", + "tuple_stat1"); + return -1; + } } - sqlite3DbFree(db, pIdx->aSample); - } - if (db && db->pnBytesFreed == 0) { - pIdx->nSample = 0; - pIdx->aSample = 0; + if (stat->tuple_log_est == NULL) { + stat->tuple_log_est = calloc(column_count, + sizeof(uint32_t)); + if (stat->tuple_log_est == NULL) { + diag_set(OutOfMemory, column_count, "calloc", + "tuple_log_stat"); + return -1; + } + } + decode_stat_string(argv[2], index->def->key_def->part_count + 1, + index->def->opts.stat->tuple_stat1, + index->def->opts.stat->tuple_log_est, + index); } + return 0; } -/* - * Populate the pIdx->aAvgEq[] array based on the samples currently - * stored in pIdx->aSample[]. +/** + * Calculate avg_eq array based on the samples from index. + * Some *magic* calculations happen here. */ static void -initAvgEq(Index * pIdx) +init_avg_eq(struct index *index) { - if (pIdx) { - IndexSample *aSample = pIdx->aSample; - IndexSample *pFinal = &aSample[pIdx->nSample - 1]; - int iCol; - int nCol = 1; - if (pIdx->nSampleCol > 1) { - /* If this is stat4 data, then calculate aAvgEq[] values for all - * sample columns except the last. The last is always set to 1, as - * once the trailing PK fields are considered all index keys are - * unique. - */ - nCol = pIdx->nSampleCol - 1; - pIdx->aAvgEq[nCol] = 1; + assert(index != NULL); + assert(index->def->opts.stat != NULL); + struct index_sample *samples = index->def->opts.stat->samples; + uint32_t sample_count = index->def->opts.stat->sample_count; + uint32_t field_count = index->def->opts.stat->sample_field_count; + struct index_sample *last_sample = &samples[sample_count - 1]; + if (field_count > 1) + index->def->opts.stat->avg_eq[--field_count] = 1; + for (uint32_t i = 0; i < field_count; ++i) { + uint32_t column_count = index->def->key_def->part_count; + tRowcnt eq_sum = 0; + tRowcnt eq_avg = 0; + uint32_t tuple_count = index->vtab->size(index); + uint64_t distinct_tuple_count; + uint64_t terms_sum = 0; + if (i >= column_count || + index->def->opts.stat->tuple_stat1[i + 1] == 0) { + distinct_tuple_count = 100 * last_sample->dlt[i]; + sample_count--; + } else { + assert(index->def->opts.stat->tuple_stat1 != NULL); + distinct_tuple_count = (100 * tuple_count) / + index->def->opts.stat->tuple_stat1[i + 1]; } - for (iCol = 0; iCol < nCol; iCol++) { - int nSample = pIdx->nSample; - int i; /* Used to iterate through samples */ - tRowcnt sumEq = 0; /* Sum of the nEq values */ - tRowcnt avgEq = 0; - tRowcnt nRow; /* Number of rows in index */ - i64 nSum100 = 0; /* Number of terms contributing to sumEq */ - i64 nDist100; /* Number of distinct values in index */ - int nColumn = index_column_count(pIdx); - - if (!pIdx->aiRowEst || iCol >= nColumn - || pIdx->aiRowEst[iCol + 1] == 0) { - nRow = pFinal->anLt[iCol]; - nDist100 = (i64) 100 *pFinal->anDLt[iCol]; - nSample--; - } else { - nRow = pIdx->aiRowEst[0]; - nDist100 = - ((i64) 100 * pIdx->aiRowEst[0]) / - pIdx->aiRowEst[iCol + 1]; + for (uint32_t j = 0; j < sample_count; ++j) { + if (j == (index->def->opts.stat->sample_count - 1) || + samples[j].dlt[i] != samples[j + 1].dlt[i]) { + eq_sum += samples[j].eq[i]; + terms_sum += 100; } - - /* Set nSum to the number of distinct (iCol+1) field prefixes that - * occur in the stat4 table for this index. Set sumEq to the sum of - * the nEq values for column iCol for the same set (adding the value - * only once where there exist duplicate prefixes). - */ - for (i = 0; i < nSample; i++) { - if (i == (pIdx->nSample - 1) - || aSample[i].anDLt[iCol] != - aSample[i + 1].anDLt[iCol] - ) { - sumEq += aSample[i].anEq[iCol]; - nSum100 += 100; - } - } - - if (nDist100 > nSum100) { - avgEq = - ((i64) 100 * (nRow - sumEq)) / (nDist100 - - nSum100); - } - if (avgEq == 0) - avgEq = 1; - pIdx->aAvgEq[iCol] = avgEq; } + if (distinct_tuple_count > terms_sum) { + eq_avg = 100 * (tuple_count - eq_sum) / + (distinct_tuple_count - terms_sum); + } + if (eq_avg == 0) + eq_avg = 1; + index->def->opts.stat->avg_eq[i] = eq_avg; } } -/* - * Given two IndexSample arguments, compare there payloads +/** + * Given two key arguments, compare there payloads. + * This is a simple wrapper around key_compare() to support + * qsort() interface. */ static int -sampleCompareMsgPack(const void *a, const void *b, void *arg) +sample_compare(const void *a, const void *b, void *arg) { struct key_def *def = (struct key_def *)arg; - return key_compare(((IndexSample *) a)->p, ((IndexSample *) b)->p, def); + return key_compare(((struct index_sample *) a)->sample_key, + ((struct index_sample *) b)->sample_key, def); } -/* +/** * Load the content from the _sql_stat4 table - * into the relevant Index.aSample[] arrays. + * into the relevant index->stat->samples[] arrays. + * + * Arguments must point to SQL statements that return + * data equivalent to the following: * - * Arguments zSql1 and zSql2 must point to SQL statements that return - * data equivalent to the following (statements are different for stat4, - * see the caller of this function for details): + * prepare: SELECT tbl,idx,count(*) FROM _sql_stat4 GROUP BY tbl,idx; + * load: SELECT tbl,idx,neq,nlt,ndlt,sample FROM _sql_stat4; * - * zSql1: SELECT tbl,idx,count(*) FROM _sql_stat4 GROUP BY tbl,idx - * zSql2: SELECT tbl,idx,neq,nlt,ndlt,sample FROM _sql_stat4 + * 'prepare' statement is used to allocate enough memory for + * statistics (i.e. arrays lt, dt, dlt and avg_eq). 'load' query + * is needed for * + * @param db Database handler. + * @param sql_select_prepare SELECT statement, see above. + * @param sql_select_load SELECT statement, see above. + * @retval SQLITE_OK on success, smth else otherwise. */ static int -loadStatTbl(sqlite3 * db, /* Database handle */ - Table * pStatTab, /* Stat table to load to */ - const char *zSql1, /* SQL statement 1 (see above) */ - const char *zSql2 /* SQL statement 2 (see above) */ - ) +load_stat_from_space(struct sqlite3 *db, const char *sql_select_prepare, + const char *sql_select_load) { - int rc; /* Result codes from subroutines */ - sqlite3_stmt *pStmt = 0; /* An SQL statement being run */ - Index *pPrevIdx = 0; /* Previous index in the loop */ - IndexSample *pSample; /* A slot in pIdx->aSample[] */ - int nIdxCnt = 0; /* Number of different indexes visited */ - int nIdx = 0; /* Index loop iterator */ - Index **aIndex = 0; /* Array holding all visited indexes */ - - assert(db->lookaside.bDisable); - - assert(pStatTab); - nIdxCnt = box_index_len(SQLITE_PAGENO_TO_SPACEID(pStatTab->tnum), 0); - - if (nIdxCnt > 0) { - aIndex = sqlite3DbMallocZero(db, sizeof(Index *) * nIdxCnt); - if (aIndex == 0) { + struct index **indexes = NULL; + uint32_t index_count = box_index_len(BOX_SQL_STAT4_ID, 0); + if (index_count > 0) { + size_t alloc_size = sizeof(struct index *) * index_count; + indexes = malloc(alloc_size); + if (indexes == NULL) { + diag_set(OutOfMemory, alloc_size, "malloc", "indexes"); return SQLITE_NOMEM_BKPT; } } - - rc = sqlite3_prepare(db, zSql1, -1, &pStmt, 0); + sqlite3_stmt *stmt = NULL; + int rc = sqlite3_prepare(db, sql_select_prepare, -1, &stmt, 0); if (rc) goto finalize; - - while (sqlite3_step(pStmt) == SQLITE_ROW) { - int nIdxCol = 1; /* Number of columns in stat4 records */ - - char *zTab; /* Table name */ - char *zIndex; /* Index name */ - Index *pIdx; /* Pointer to the index object */ - int nSample; /* Number of samples */ - int nByte; /* Bytes of space required */ - int i; /* Bytes of space required */ - tRowcnt *pSpace; - - zTab = (char *)sqlite3_column_text(pStmt, 0); - if (zTab == 0) + uint32_t current_idx_count = 0; + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *space_name = (char *)sqlite3_column_text(stmt, 0); + if (space_name == NULL) continue; - zIndex = (char *)sqlite3_column_text(pStmt, 1); - if (zIndex == 0) + const char *index_name = (char *)sqlite3_column_text(stmt, 1); + if (index_name == NULL) continue; - nSample = sqlite3_column_int(pStmt, 2); - pIdx = sqlite3LocateIndex(db, zIndex, zTab); - assert(pIdx == 0 || pIdx->nSample == 0); - /* Index.nSample is non-zero at this point if data has already been - * loaded from the stat4 table. + uint32_t sample_count = sqlite3_column_int(stmt, 2); + uint32_t space_id = box_space_id_by_name(space_name, + strlen(space_name)); + if (space_id == BOX_ID_NIL) + continue; + struct space *space = space_by_id(space_id); + assert(space != NULL); + uint32_t iid = box_index_id_by_name(space_id, index_name, + strlen(index_name)); + if (iid == BOX_ID_NIL) + continue; + struct index *index = space_index(space, iid); + assert(index != NULL); + assert(index->def->opts.stat != NULL || + index->def->opts.stat->sample_count == 0); + /* + * Samples array is non-zero at this point if + * data has already been loaded from the + * stat4 table. */ - if (pIdx == 0 || pIdx->nSample) + if (index->def->opts.stat != NULL && + index->def->opts.stat->sample_count) continue; - - nIdxCol = index_column_count(pIdx); - pIdx->nSampleCol = nIdxCol; - nByte = sizeof(IndexSample) * nSample; - nByte += sizeof(tRowcnt) * nIdxCol * 3 * nSample; - nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ - - pIdx->aSample = sqlite3DbMallocZero(db, nByte); - if (pIdx->aSample == 0) { - sqlite3_finalize(pStmt); + uint32_t column_count = index->def->key_def->part_count; + index->def->opts.stat->sample_field_count = + index->def->key_def->part_count; + /* Space for samples. */ + size_t alloc_size = sizeof(struct index_sample) * sample_count; + /* Space for eq, lt and dlt stats. */ + alloc_size += sizeof(tRowcnt) * column_count * 3 * sample_count; + /* Space for Index.aAvgEq[]. */ + alloc_size += column_count * sizeof(tRowcnt); + /* + * We are trying to fit into one chunk samples, + * eq_avg and arrays of eq, lt and dlt stats. + * Firstly comes memory for structs of samples, + * than array of eq_avg and finally arrays of + * eq, lt and dlt stats. + */ + index->def->opts.stat->samples = malloc(alloc_size); + if (index->def->opts.stat->samples == NULL) { + diag_set(OutOfMemory, alloc_size, "malloc", "samples"); rc = SQLITE_NOMEM_BKPT; goto finalize; } - pSpace = (tRowcnt *) & pIdx->aSample[nSample]; - pIdx->aAvgEq = pSpace; - pSpace += nIdxCol; - for (i = 0; i < nSample; i++) { - pIdx->aSample[i].anEq = pSpace; - pSpace += nIdxCol; - pIdx->aSample[i].anLt = pSpace; - pSpace += nIdxCol; - pIdx->aSample[i].anDLt = pSpace; - pSpace += nIdxCol; + memset(index->def->opts.stat->samples, 0, alloc_size); + /* Marking memory manually. */ + tRowcnt *pSpace = + (tRowcnt *) & index->def->opts.stat->samples[sample_count]; + index->def->opts.stat->avg_eq = pSpace; + pSpace += column_count; + for (uint32_t i = 0; i < sample_count; i++) { + index->def->opts.stat->samples[i].eq = pSpace; + pSpace += column_count; + index->def->opts.stat->samples[i].lt = pSpace; + pSpace += column_count; + index->def->opts.stat->samples[i].dlt = pSpace; + pSpace += column_count; } - assert(((u8 *) pSpace) - nByte == (u8 *) (pIdx->aSample)); - - aIndex[nIdx] = pIdx; - assert(nIdx < nIdxCnt); - ++nIdx; + assert(((u8 *) pSpace) - alloc_size == + (u8 *) (index->def->opts.stat->samples)); + indexes[current_idx_count] = index; + assert(current_idx_count < index_count); + current_idx_count++; } - rc = sqlite3_finalize(pStmt); + rc = sqlite3_finalize(stmt); if (rc) goto finalize; - rc = sqlite3_prepare(db, zSql2, -1, &pStmt, 0); + rc = sqlite3_prepare(db, sql_select_load, -1, &stmt, 0); if (rc) goto finalize; - - while (sqlite3_step(pStmt) == SQLITE_ROW) { - char *zTab; /* Table name */ - char *zIndex; /* Index name */ - Index *pIdx; /* Pointer to the index object */ - int nCol = 1; /* Number of columns in index */ - - zTab = (char *)sqlite3_column_text(pStmt, 0); - if (zTab == 0) + struct index *prev_index = NULL; + while (sqlite3_step(stmt) == SQLITE_ROW) { + const char *space_name = (char *)sqlite3_column_text(stmt, 0); + if (space_name == NULL) continue; - zIndex = (char *)sqlite3_column_text(pStmt, 1); - if (zIndex == 0) + const char *index_name = (char *)sqlite3_column_text(stmt, 1); + if (index_name == NULL) continue; - pIdx = sqlite3LocateIndex(db, zIndex, zTab); - if (pIdx == 0) + uint32_t space_id = box_space_id_by_name(space_name, + strlen(space_name)); + if (space_id == BOX_ID_NIL) continue; - - nCol = pIdx->nSampleCol; - if (pIdx != pPrevIdx) { - initAvgEq(pPrevIdx); - pPrevIdx = pIdx; + struct space *space = space_by_id(space_id); + assert(space != NULL); + uint32_t iid = box_index_id_by_name(space_id, index_name, + strlen(index_name)); + if (iid == BOX_ID_NIL) + continue; + struct index *index = space_index(space, iid); + assert(index != NULL); + uint32_t field_count = + index->def->opts.stat->sample_field_count; + if (index != prev_index) { + if (prev_index != NULL) + init_avg_eq(prev_index); + prev_index = index; } - pSample = &pIdx->aSample[pIdx->nSample]; - decodeIntArray((char *)sqlite3_column_text(pStmt, 2), nCol, - pSample->anEq, 0, 0); - decodeIntArray((char *)sqlite3_column_text(pStmt, 3), nCol, - pSample->anLt, 0, 0); - decodeIntArray((char *)sqlite3_column_text(pStmt, 4), nCol, - pSample->anDLt, 0, 0); - - /* Take a copy of the sample. Add two 0x00 bytes the end of the buffer. - * This is in case the sample record is corrupted. In that case, the - * sqlite3VdbeRecordCompare() may read up to two varints past the - * end of the allocated buffer before it realizes it is dealing with - * a corrupt record. Adding the two 0x00 bytes prevents this from causing - * a buffer overread. + struct index_stat *stat = index->def->opts.stat; + assert(stat != NULL); + struct index_sample *sample = + &stat->samples[stat->sample_count]; + decode_stat_string((char *)sqlite3_column_text(stmt, 2), + field_count, sample->eq, 0, 0); + decode_stat_string((char *)sqlite3_column_text(stmt, 3), + field_count, sample->lt, 0, 0); + decode_stat_string((char *)sqlite3_column_text(stmt, 4), + field_count, sample->dlt, 0, 0); + /* + * Take a copy of the sample. Add two 0x00 bytes + * the end of the buffer. This is in case the + * sample record is corrupted. In that case, the + * sqlite3VdbeRecordCompare() may read up to two + * varints past the end of the allocated buffer + * before it realizes it is dealing with a corrupt + * record. Adding the two 0x00 bytes prevents this + * from causing a buffer overread. */ - pSample->n = sqlite3_column_bytes(pStmt, 5); - pSample->p = sqlite3DbMallocZero(db, pSample->n + 2); - if (pSample->p == 0) { - sqlite3_finalize(pStmt); + sample->key_size = sqlite3_column_bytes(stmt, 5); + sample->sample_key = calloc(1, sample->key_size + 2); + if (sample->sample_key == NULL) { + sqlite3_finalize(stmt); rc = SQLITE_NOMEM_BKPT; + diag_set(OutOfMemory, sample->key_size + 2, + "sample_key", "calloc"); goto finalize; } - if (pSample->n) { - memcpy(pSample->p, sqlite3_column_blob(pStmt, 5), - pSample->n); + if (sample->key_size > 0) { + memcpy(sample->sample_key, + sqlite3_column_blob(stmt, 5), + sample->key_size); } - pIdx->nSample++; + index->def->opts.stat->sample_count++; } - rc = sqlite3_finalize(pStmt); - if (rc == SQLITE_OK) - initAvgEq(pPrevIdx); - - assert(nIdx <= nIdxCnt); - for (int i = 0; i < nIdx; ++i) { - Index *pIdx = aIndex[i]; - assert(pIdx); - /* - sid, iid - * - space_index_key_def - * - key_compare - */ - int sid = SQLITE_PAGENO_TO_SPACEID(pIdx->tnum); - int iid = SQLITE_PAGENO_TO_INDEXID(pIdx->tnum); - struct space *s = space_by_id(sid); - assert(s); - struct key_def *def = space_index_key_def(s, iid); - assert(def); - qsort_arg(pIdx->aSample, - pIdx->nSample, - sizeof(pIdx->aSample[0]), sampleCompareMsgPack, def); + rc = sqlite3_finalize(stmt); + if (rc == SQLITE_OK) { + if (prev_index != NULL) + init_avg_eq(prev_index); + } + assert(current_idx_count <= index_count); + for (uint32_t i = 0; i < current_idx_count; ++i) { + struct index *index = indexes[i]; + assert(index != NULL); + qsort_arg(index->def->opts.stat->samples, + index->def->opts.stat->sample_count, + sizeof(struct index_sample), + sample_compare, index->def->key_def); } - finalize: - sqlite3DbFree(db, aIndex); + free(indexes); return rc; } -/* - * Load content from the _sql_stat4 table into - * the Index.aSample[] arrays of all indices. - */ static int -loadStat4(sqlite3 * db) +space_clear_stat(struct space *space, void *arg) { - Table *pTab = 0; /* Pointer to stat table */ - - assert(db->lookaside.bDisable); - pTab = sqlite3HashFind(&db->pSchema->tblHash, "_sql_stat4"); - /* _slq_stat4 is a system space, so it always exists. */ - assert(pTab != NULL); - return loadStatTbl(db, pTab, - "SELECT \"tbl\",\"idx\",count(*) FROM \"_sql_stat4\"" - " GROUP BY \"tbl\",\"idx\"", - "SELECT \"tbl\",\"idx\",\"neq\",\"nlt\",\"ndlt\"," - "\"sample\" FROM \"_sql_stat4\""); + (void) arg; + /* Statistics is relevant only for SQL query planer. */ + if (space->def->opts.sql != NULL) { + for (uint32_t i = 0; i < space->index_count; ++i) + index_stat_destroy_samples(space->index[i]->def->opts.stat); + } + return 0; } +/** [10*log_{2}(x)]:10, 9, 8, 7, 6, 5 */ +const log_est default_tuple_est[] = {33, 32, 30, 28, 26, 23}; LogEst sql_space_tuple_log_count(struct Table *tab) @@ -1646,84 +1648,52 @@ sql_space_tuple_log_count(struct Table *tab) return sqlite3LogEst(pk->vtab->size(pk)); } -/* - * Load the content of the _sql_stat1 and sql_stat4 tables. The - * contents of _sql_stat1 are used to populate the Index.aiRowEst[] -*\* arrays. The contents of sql_stat4 are used to populate the - * Index.aSample[] arrays. - * - * If the _sql_stat1 table is not present in the database, SQLITE_ERROR - * is returned. In this case, even if the sql_stat4 table is present, - * no data is read from it. - * - * If the _sql_stat4 table is not present in the database, SQLITE_ERROR - * is returned. However, in this case, data is read from the _sql_stat1 - * table (if it is present) before returning. - * - * If an OOM error occurs, this function always sets db->mallocFailed. - * This means if the caller does not care about other errors, the return - * code may be ignored. - */ -int -sqlite3AnalysisLoad(sqlite3 * db) +log_est +index_field_tuple_est(struct Index *idx, uint32_t field) { - analysisInfo sInfo; - HashElem *i, *j; - char *zSql; - int rc = SQLITE_OK; - - /* Clear any prior statistics */ - for (j = sqliteHashFirst(&db->pSchema->tblHash); j; - j = sqliteHashNext(j)) { - Table *pTab = sqliteHashData(j); - for (i = sqliteHashFirst(&pTab->idxHash); i; - i = sqliteHashNext(i)) { - Index *pIdx = sqliteHashData(i); - pIdx->aiRowLogEst[0] = 0; - sqlite3DeleteIndexSamples(db, pIdx); - pIdx->aSample = 0; - } + struct space *space = space_by_id(SQLITE_PAGENO_TO_SPACEID(idx->tnum)); + if (space == NULL) + return idx->aiRowLogEst[field]; + struct index *tnt_idx = + space_index(space, SQLITE_PAGENO_TO_INDEXID(idx->tnum)); + assert(tnt_idx != NULL); + assert(field <= tnt_idx->def->key_def->part_count); + if (tnt_idx->def->opts.stat == NULL) { + if (field == 0) + return DEFAULT_TUPLE_LOG_COUNT; + if (field == tnt_idx->def->key_def->part_count && + tnt_idx->def->opts.is_unique) + return 0; + return default_tuple_est[field + 1 >= 5 ? 5 : field + 1]; } + return tnt_idx->def->opts.stat->tuple_log_est[field]; +} +int +sql_analysis_load(struct sqlite3 *db) +{ + space_foreach(space_clear_stat, NULL); /* Load new statistics out of the _sql_stat1 table */ + analysisInfo sInfo; sInfo.db = db; - zSql = "SELECT \"tbl\",\"idx\",\"stat\" FROM \"_sql_stat1\""; - rc = sqlite3_exec(db, zSql, analysisLoader, &sInfo, 0); - - /* Set appropriate defaults on all indexes not in the _sql_stat1 table */ - for (j = sqliteHashFirst(&db->pSchema->tblHash); j; - j = sqliteHashNext(j)) { - Table *pTab = sqliteHashData(j); - for (i = sqliteHashFirst(&pTab->idxHash); i; - i = sqliteHashNext(i)) { - Index *pIdx = sqliteHashData(i); - if (pIdx->aiRowLogEst[0] == 0) - sqlite3DefaultRowEst(pIdx); - } - } - + const char *load_stat1 = + "SELECT \"tbl\",\"idx\",\"stat\" FROM \"_sql_stat1\""; + int rc = sqlite3_exec(db, load_stat1, analysis_loader, &sInfo, 0); + if (rc != SQLITE_OK) + return rc; + /* + * This query is used to allocate enough memory for + * statistics. Result rows are given in a form: + * , , + */ + const char *init_query = "SELECT \"tbl\",\"idx\",count(*) FROM " + "\"_sql_stat4\" GROUP BY \"tbl\",\"idx\""; + /* Query for loading statistics into in-memory structs. */ + const char *load_query = "SELECT \"tbl\",\"idx\",\"neq\",\"nlt\"," + "\"ndlt\",\"sample\" FROM \"_sql_stat4\""; /* Load the statistics from the _sql_stat4 table. */ - if (rc == SQLITE_OK && OptimizationEnabled(db, SQLITE_Stat4)) { - db->lookaside.bDisable++; - rc = loadStat4(db); - db->lookaside.bDisable--; - } - - for (j = sqliteHashFirst(&db->pSchema->tblHash); j; - j = sqliteHashNext(j)) { - Table *pTab = sqliteHashData(j); - for (i = sqliteHashFirst(&pTab->idxHash); i; - i = sqliteHashNext(i)) { - Index *pIdx = sqliteHashData(i); - sqlite3_free(pIdx->aiRowEst); - pIdx->aiRowEst = 0; - } - } - - if (rc == SQLITE_NOMEM) { + rc = load_stat_from_space(db, init_query, load_query); + if (rc == SQLITE_NOMEM) sqlite3OomFault(db); - } return rc; } - -#endif /* SQLITE_OMIT_ANALYZE */ diff --git a/src/box/sql/build.c b/src/box/sql/build.c index 5a40308fe..962dc2aa7 100644 --- a/src/box/sql/build.c +++ b/src/box/sql/build.c @@ -216,13 +216,9 @@ sqlite3LocateIndex(sqlite3 * db, const char *zName, const char *zTable) static void freeIndex(sqlite3 * db, Index * p) { -#ifndef SQLITE_OMIT_ANALYZE - sqlite3DeleteIndexSamples(db, p); -#endif sql_expr_free(db, p->pPartIdxWhere, false); sqlite3ExprListDelete(db, p->aColExpr); sqlite3DbFree(db, p->zColAff); - sqlite3_free(p->aiRowEst); sqlite3DbFree(db, p); } @@ -2941,9 +2937,6 @@ sqlite3CreateIndex(Parse * pParse, /* All information about this parse */ requestedSortOrder = pListItem->sortOrder & 0; pIndex->aSortOrder[i] = (u8) requestedSortOrder; } - - sqlite3DefaultRowEst(pIndex); - if (pTab == pParse->pNewTable) { /* This routine has been called to create an automatic index as a * result of a PRIMARY KEY or UNIQUE clause on a column definition, or @@ -3121,60 +3114,6 @@ sqlite3CreateIndex(Parse * pParse, /* All information about this parse */ sqlite3DbFree(db, zName); } -/* - * Fill the Index.aiRowEst[] array with default information - information - * to be used when we have not run the ANALYZE command. - * - * aiRowEst[0] is supposed to contain the number of elements in the index. - * Since we do not know, guess 1 million. aiRowEst[1] is an estimate of the - * number of rows in the table that match any particular value of the - * first column of the index. aiRowEst[2] is an estimate of the number - * of rows that match any particular combination of the first 2 columns - * of the index. And so forth. It must always be the case that -* - * aiRowEst[N]<=aiRowEst[N-1] - * aiRowEst[N]>=1 - * - * Apart from that, we have little to go on besides intuition as to - * how aiRowEst[] should be initialized. The numbers generated here - * are based on typical values found in actual indices. - */ -void -sqlite3DefaultRowEst(Index * pIdx) -{ - /* 10, 9, 8, 7, 6 */ - LogEst aVal[] = { 33, 32, 30, 28, 26 }; - LogEst *a = pIdx->aiRowLogEst; - int nCopy = MIN(ArraySize(aVal), pIdx->nColumn); - int i; - - /* Set the first entry (number of rows in the index) to the estimated - * number of rows in the table, or half the number of rows in the table - * for a partial index. But do not let the estimate drop below 10. - */ - a[0] = pIdx->pTable->tuple_log_count; - if (pIdx->pPartIdxWhere != 0) - a[0] -= 10; - assert(10 == sqlite3LogEst(2)); - if (a[0] < 33) - a[0] = 33; - assert(33 == sqlite3LogEst(10)); - - /* Estimate that a[1] is 10, a[2] is 9, a[3] is 8, a[4] is 7, a[5] is - * 6 and each subsequent value (if any) is 5. - */ - memcpy(&a[1], aVal, nCopy * sizeof(LogEst)); - int col_count = index_column_count(pIdx); - for (i = nCopy + 1; i <= col_count; i++) { - a[i] = 23; - assert(23 == sqlite3LogEst(5)); - } - - assert(0 == sqlite3LogEst(1)); - if (IsUniqueIndex(pIdx)) - a[pIdx->nColumn] = 0; -} - /** * Return number of columns in given index. * If space is ephemeral, use internal diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index 8a3b35a37..f6c67158c 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -1495,10 +1495,8 @@ cmd ::= DROP TRIGGER ifexists(NOERR) fullname(X). { /* %endif SQLITE_OMIT_REINDEX */ /////////////////////////////////// ANALYZE /////////////////////////////////// -%ifndef SQLITE_OMIT_ANALYZE cmd ::= ANALYZE. {sqlite3Analyze(pParse, 0);} cmd ::= ANALYZE nm(X). {sqlite3Analyze(pParse, &X);} -%endif //////////////////////// ALTER TABLE table ... //////////////////////////////// %ifndef SQLITE_OMIT_ALTERTABLE diff --git a/src/box/sql/pragma.c b/src/box/sql/pragma.c index 02990e422..c73103f68 100644 --- a/src/box/sql/pragma.c +++ b/src/box/sql/pragma.c @@ -424,8 +424,7 @@ sqlite3Pragma(Parse * pParse, Token * pId, /* First part of [schema.]id field */ sqlite3VdbeMultiLoad(v, 2, "sii", pIdx->zName, avg_tuple_size_idx, - pIdx-> - aiRowLogEst[0]); + index_field_tuple_est(pIdx, 0)); sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4); } diff --git a/src/box/sql/prepare.c b/src/box/sql/prepare.c index 7a16074dc..a3a56515a 100644 --- a/src/box/sql/prepare.c +++ b/src/box/sql/prepare.c @@ -186,11 +186,8 @@ sqlite3InitDatabase(sqlite3 * db) assert(db->init.busy); { rc = initData.rc; -#ifndef SQLITE_OMIT_ANALYZE - if (rc == SQLITE_OK) { - sqlite3AnalysisLoad(db); - } -#endif + if (rc == SQLITE_OK) + sql_analysis_load(db); } if (db->mallocFailed) { rc = SQLITE_NOMEM_BKPT; diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h index d026a213d..c2ea066d6 100644 --- a/src/box/sql/sqliteInt.h +++ b/src/box/sql/sqliteInt.h @@ -1473,7 +1473,6 @@ typedef struct FuncDef FuncDef; typedef struct FuncDefHash FuncDefHash; typedef struct IdList IdList; typedef struct Index Index; -typedef struct IndexSample IndexSample; typedef struct KeyClass KeyClass; typedef struct KeyInfo KeyInfo; typedef struct Lookaside Lookaside; @@ -1695,7 +1694,6 @@ struct sqlite3 { #define SQLITE_SubqCoroutine 0x0100 /* Evaluate subqueries as coroutines */ #define SQLITE_Transitive 0x0200 /* Transitive constraints */ #define SQLITE_OmitNoopJoin 0x0400 /* Omit unused tables in joins */ -#define SQLITE_Stat4 0x0800 /* Use STAT4 data */ #define SQLITE_CursorHints 0x2000 /* Add OP_CursorHint opcodes */ #define SQLITE_AllOpts 0xffff /* All optimizations */ @@ -2173,14 +2171,41 @@ struct Index { * or _NONE */ unsigned idxType:2; /* 1==UNIQUE, 2==PRIMARY KEY, 0==CREATE INDEX */ - unsigned bUnordered:1; /* Use this index for == or IN queries only */ - unsigned noSkipScan:1; /* Do not try to use skip-scan if true */ - int nSample; /* Number of elements in aSample[] */ - int nSampleCol; /* Size of IndexSample.anEq[] and so on */ - tRowcnt *aAvgEq; /* Average nEq values for keys not in aSample */ - IndexSample *aSample; /* Samples of the left-most key */ - tRowcnt *aiRowEst; /* Non-logarithmic stat1 data for this index */ }; +/** + * default_tuple_est[] array contains default information + * which is used when we don't have real space, e.g. temporary + * objects representing result set of nested SELECT or VIEW. + * + * First number is supposed to contain the number of elements + * in the index. Since we do not know, guess 1 million. + * Second one is an estimate of the number of rows in the + * table that match any particular value of the first column of + * the index. Third one is an estimate of the number of + * rows that match any particular combination of the first 2 + * columns of the index. And so on. It must always be true: + * + * default_tuple_est[N] <= default_tuple_est[N-1] + * default_tuple_est[N] >= 1 + * + * Apart from that, we have little to go on besides intuition + * as to how default values should be initialized. The numbers + * generated here are based on typical values found in actual + * indices. + */ +extern const log_est default_tuple_est[]; + +/** + * Fetch statistics concerning tuples to be selected. + * If there is no appropriate Tarantool's index, + * return one of default values. + * + * @param idx Index. + * @param field Number of field to be examined. + * @retval Estimate logarithm of tuples selected by given field. + */ +log_est +index_field_tuple_est(struct Index *idx, uint32_t field); /* * Allowed values for Index.idxType @@ -2201,19 +2226,6 @@ struct Index { */ #define XN_EXPR (-2) /* Indexed column is an expression */ -/* - * Each sample stored in the sql_stat4 table is represented in memory - * using a structure of this type. See documentation at the top of the - * analyze.c source file for additional information. - */ -struct IndexSample { - void *p; /* Pointer to sampled record */ - int n; /* Size of record in bytes */ - tRowcnt *anEq; /* Est. number of rows where the key equals this sample */ - tRowcnt *anLt; /* Est. number of rows where key is less than this sample */ - tRowcnt *anDLt; /* Est. number of distinct keys less than this sample */ -}; - #ifdef DEFAULT_TUPLE_COUNT #undef DEFAULT_TUPLE_COUNT #endif @@ -3946,9 +3958,19 @@ ssize_t sql_index_tuple_size(struct space *space, struct index *idx); int sqlite3InvokeBusyHandler(BusyHandler *); -int sqlite3AnalysisLoad(sqlite3 *); -void sqlite3DeleteIndexSamples(sqlite3 *, Index *); -void sqlite3DefaultRowEst(Index *); + +/** + * Load the content of the _sql_stat1 and sql_stat4 tables. The + * contents of _sql_stat1 are used to populate the tuple_stat1[] + * arrays. The contents of sql_stat4 are used to populate the + * samples[] arrays. + * + * @param db Database handler. + * @retval SQLITE_OK on success, smth else otherwise. + */ +int +sql_analysis_load(struct sqlite3 *db); + uint32_t index_column_count(const Index *); bool diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c index aaf912320..c02ce745a 100644 --- a/src/box/sql/vdbe.c +++ b/src/box/sql/vdbe.c @@ -4844,7 +4844,7 @@ case OP_RenameTable: { sqlite3DbFree(db, (void*)zSqlStmt); break; } -#if !defined(SQLITE_OMIT_ANALYZE) + /* Opcode: LoadAnalysis P1 * * * * * * Read the sql_stat1 table for database P1 and load the content @@ -4853,11 +4853,10 @@ case OP_RenameTable: { */ case OP_LoadAnalysis: { assert(pOp->p1==0 ); - rc = sqlite3AnalysisLoad(db); + rc = sql_analysis_load(db); if (rc) goto abort_due_to_error; break; } -#endif /* !defined(SQLITE_OMIT_ANALYZE) */ /* Opcode: DropTable P1 * * P4 * * diff --git a/src/box/sql/where.c b/src/box/sql/where.c index e6f9ef431..975e7059f 100644 --- a/src/box/sql/where.c +++ b/src/box/sql/where.c @@ -891,7 +891,16 @@ whereKeyStats(Parse * pParse, /* Database connection */ int roundUp, /* Round up if true. Round down if false */ tRowcnt * aStat) /* OUT: stats written here */ { - IndexSample *aSample = pIdx->aSample; + uint32_t space_id = SQLITE_PAGENO_TO_SPACEID(pIdx->tnum); + struct space *space = space_by_id(space_id); + assert(space != NULL); + uint32_t iid = SQLITE_PAGENO_TO_INDEXID(pIdx->tnum); + struct index *idx = space_index(space, iid); + assert(idx != NULL && idx->def->opts.stat != NULL); + struct index_sample *samples = idx->def->opts.stat->samples; + assert(idx->def->opts.stat->sample_count > 0); + assert(idx->def->opts.stat->samples != NULL); + assert(idx->def->opts.stat->sample_field_count >= pRec->nField); int iCol; /* Index of required stats in anEq[] etc. */ int i; /* Index of first sample >= pRec */ int iSample; /* Smallest sample larger than or equal to pRec */ @@ -905,8 +914,7 @@ whereKeyStats(Parse * pParse, /* Database connection */ UNUSED_PARAMETER(pParse); #endif assert(pRec != 0); - assert(pIdx->nSample > 0); - assert(pRec->nField > 0 && pRec->nField <= pIdx->nSampleCol); + assert(pRec->nField > 0); /* Do a binary search to find the first sample greater than or equal * to pRec. If pRec contains a single field, the set of samples to search @@ -954,7 +962,8 @@ whereKeyStats(Parse * pParse, /* Database connection */ */ nField = pRec->nField; iCol = 0; - iSample = pIdx->nSample * nField; + uint32_t sample_count = idx->def->opts.stat->sample_count; + iSample = sample_count * nField; do { int iSamp; /* Index in aSample[] of test sample */ int n; /* Number of fields in test sample */ @@ -967,8 +976,8 @@ whereKeyStats(Parse * pParse, /* Database connection */ * fields that is greater than the previous effective sample. */ for (n = (iTest % nField) + 1; n < nField; n++) { - if (aSample[iSamp - 1].anLt[n - 1] != - aSample[iSamp].anLt[n - 1]) + if (samples[iSamp - 1].lt[n - 1] != + samples[iSamp].lt[n - 1]) break; } } else { @@ -977,15 +986,15 @@ whereKeyStats(Parse * pParse, /* Database connection */ pRec->nField = n; res = - sqlite3VdbeRecordCompareMsgpack(aSample[iSamp].n, - aSample[iSamp].p, pRec); + sqlite3VdbeRecordCompareMsgpack(samples[iSamp].key_size, + samples[iSamp].sample_key, + pRec); if (res < 0) { iLower = - aSample[iSamp].anLt[n - 1] + aSample[iSamp].anEq[n - - 1]; + samples[iSamp].lt[n - 1] + samples[iSamp].eq[n - 1]; iMin = iTest + 1; } else if (res == 0 && n < nField) { - iLower = aSample[iSamp].anLt[n - 1]; + iLower = samples[iSamp].lt[n - 1]; iMin = iTest + 1; res = -1; } else { @@ -1003,12 +1012,12 @@ whereKeyStats(Parse * pParse, /* Database connection */ if (pParse->db->mallocFailed == 0) { if (res == 0) { /* If (res==0) is true, then pRec must be equal to sample i. */ - assert(i < pIdx->nSample); + assert(i < (int) sample_count); assert(iCol == nField - 1); pRec->nField = nField; assert(0 == - sqlite3VdbeRecordCompareMsgpack(aSample[i].n, - aSample[i].p, + sqlite3VdbeRecordCompareMsgpack(samples[i].key_size, + samples[i].sample_key, pRec) || pParse->db->mallocFailed); } else { @@ -1016,12 +1025,12 @@ whereKeyStats(Parse * pParse, /* Database connection */ * all samples in the aSample[] array, pRec must be smaller than the * (iCol+1) field prefix of sample i. */ - assert(i <= pIdx->nSample && i >= 0); + assert(i <= (int) sample_count && i >= 0); pRec->nField = iCol + 1; - assert(i == pIdx->nSample - || sqlite3VdbeRecordCompareMsgpack(aSample[i].n, - aSample[i].p, - pRec) > 0 + assert(i == (int) sample_count || + sqlite3VdbeRecordCompareMsgpack(samples[i].key_size, + samples[i].sample_key, + pRec) > 0 || pParse->db->mallocFailed); /* if i==0 and iCol==0, then record pRec is smaller than all samples @@ -1032,13 +1041,15 @@ whereKeyStats(Parse * pParse, /* Database connection */ if (iCol > 0) { pRec->nField = iCol; assert(sqlite3VdbeRecordCompareMsgpack - (aSample[i].n, aSample[i].p, pRec) <= 0 + (samples[i].key_size, + samples[i].sample_key, pRec) <= 0 || pParse->db->mallocFailed); } if (i > 0) { pRec->nField = nField; assert(sqlite3VdbeRecordCompareMsgpack - (aSample[i - 1].n, aSample[i - 1].p, + (samples[i - 1].key_size, + samples[i - 1].sample_key, pRec) < 0 || pParse->db->mallocFailed); } } @@ -1048,18 +1059,18 @@ whereKeyStats(Parse * pParse, /* Database connection */ if (res == 0) { /* Record pRec is equal to sample i */ assert(iCol == nField - 1); - aStat[0] = aSample[i].anLt[iCol]; - aStat[1] = aSample[i].anEq[iCol]; + aStat[0] = samples[i].lt[iCol]; + aStat[1] = samples[i].eq[iCol]; } else { /* At this point, the (iCol+1) field prefix of aSample[i] is the first * sample that is greater than pRec. Or, if i==pIdx->nSample then pRec * is larger than all samples in the array. */ tRowcnt iUpper, iGap; - if (i >= pIdx->nSample) { - iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]); + if (i >= (int) sample_count) { + iUpper = sqlite3LogEstToInt(idx->def->opts.stat->tuple_log_est[0]); } else { - iUpper = aSample[i].anLt[iCol]; + iUpper = samples[i].lt[iCol]; } if (iLower >= iUpper) { @@ -1073,7 +1084,7 @@ whereKeyStats(Parse * pParse, /* Database connection */ iGap = iGap / 3; } aStat[0] = iLower + iGap; - aStat[1] = pIdx->aAvgEq[iCol]; + aStat[1] = idx->def->opts.stat->avg_eq[iCol]; } /* Restore the pRec->nField value before returning. */ @@ -1164,10 +1175,15 @@ whereRangeSkipScanEst(Parse * pParse, /* Parsing & code generating context */ int *pbDone) /* Set to true if at least one expr. value extracted */ { Index *p = pLoop->pIndex; + struct space *space = space_by_id(SQLITE_PAGENO_TO_SPACEID(p->tnum)); + assert(space != NULL); + struct index *index = space_index(space, + SQLITE_PAGENO_TO_INDEXID(p->tnum)); + assert(index != NULL && index->def->opts.stat != NULL); int nEq = pLoop->nEq; sqlite3 *db = pParse->db; int nLower = -1; - int nUpper = p->nSample + 1; + int nUpper = index->def->opts.stat->sample_count + 1; int rc = SQLITE_OK; u8 aff = sqlite3IndexColumnAffinity(db, p, nEq); struct coll *pColl; @@ -1185,15 +1201,17 @@ whereRangeSkipScanEst(Parse * pParse, /* Parsing & code generating context */ if (pUpper && rc == SQLITE_OK) { rc = sqlite3Stat4ValueFromExpr(pParse, pUpper->pExpr->pRight, aff, &p2); - nUpper = p2 ? 0 : p->nSample; + nUpper = p2 ? 0 : index->def->opts.stat->sample_count; } if (p1 || p2) { int i; int nDiff; - for (i = 0; rc == SQLITE_OK && i < p->nSample; i++) { - rc = sqlite3Stat4Column(db, p->aSample[i].p, - p->aSample[i].n, nEq, &pVal); + struct index_sample *samples = index->def->opts.stat->samples; + uint32_t sample_count = index->def->opts.stat->sample_count; + for (i = 0; rc == SQLITE_OK && i < (int) sample_count; i++) { + rc = sqlite3Stat4Column(db, samples[i].sample_key, + samples[i].key_size, nEq, &pVal); if (rc == SQLITE_OK && p1) { int res = sqlite3MemCompare(p1, pVal, pColl); if (res >= 0) @@ -1217,7 +1235,8 @@ whereRangeSkipScanEst(Parse * pParse, /* Parsing & code generating context */ */ if (nDiff != 1 || pUpper == 0 || pLower == 0) { int nAdjust = - (sqlite3LogEst(p->nSample) - sqlite3LogEst(nDiff)); + (sqlite3LogEst(sample_count) - + sqlite3LogEst(nDiff)); pLoop->nOut -= nAdjust; *pbDone = 1; WHERETRACE(0x10, @@ -1288,8 +1307,22 @@ whereRangeScanEst(Parse * pParse, /* Parsing & code generating context */ Index *p = pLoop->pIndex; int nEq = pLoop->nEq; - - if (p->nSample > 0 && nEq < p->nSampleCol) { + uint32_t space_id = SQLITE_PAGENO_TO_SPACEID(p->tnum); + struct space *space = space_by_id(space_id); + assert(space != NULL); + uint32_t iid = SQLITE_PAGENO_TO_INDEXID(p->tnum); + struct index *idx = space_index(space, iid); + assert(idx != NULL); + struct index_stat *stat = idx->def->opts.stat; + /* + * Create surrogate stat in case ANALYZE command hasn't + * been ran. Simply fill it with zeros. + */ + struct index_stat surrogate_stat; + memset(&surrogate_stat, 0, sizeof(surrogate_stat)); + if (stat == NULL) + stat = &surrogate_stat; + if (stat->sample_count > 0 && nEq < (int) stat->sample_field_count) { if (nEq == pBuilder->nRecValid) { UnpackedRecord *pRec = pBuilder->pRec; tRowcnt a[2]; @@ -1332,15 +1365,6 @@ whereRangeScanEst(Parse * pParse, /* Parsing & code generating context */ * are in range. */ iLower = 0; - uint32_t space_id = - SQLITE_PAGENO_TO_SPACEID(p->tnum); - struct space *space = space_by_id(space_id); - assert(space != NULL); - uint32_t iid = - SQLITE_PAGENO_TO_INDEXID(p->tnum); - struct index *idx = - space_index(space, iid); - assert(idx != NULL); iUpper = idx->vtab->size(idx); } else { /* Note: this call could be optimized away - since the same values must @@ -1506,8 +1530,6 @@ whereEqualScanEst(Parse * pParse, /* Parsing & code generating context */ assert(nEq >= 1); assert(nEq <= (int)index_column_count(p)); - assert(p->aSample != 0); - assert(p->nSample > 0); assert(pBuilder->nRecValid < nEq); /* If values are not available for all fields of the index to the left @@ -1556,14 +1578,13 @@ whereInScanEst(Parse * pParse, /* Parsing & code generating context */ tRowcnt * pnRow) /* Write the revised row estimate here */ { Index *p = pBuilder->pNew->pIndex; - i64 nRow0 = sqlite3LogEstToInt(p->aiRowLogEst[0]); + i64 nRow0 = sqlite3LogEstToInt(index_field_tuple_est(p, 0)); int nRecValid = pBuilder->nRecValid; int rc = SQLITE_OK; /* Subfunction return code */ tRowcnt nEst; /* Number of rows for a single term */ tRowcnt nRowEst = 0; /* New estimate of the number of rows */ int i; /* Loop counter */ - assert(p->aSample != 0); for (i = 0; rc == SQLITE_OK && i < pList->nExpr; i++) { nEst = nRow0; rc = whereEqualScanEst(pParse, pBuilder, pList->a[i].pExpr, @@ -2323,9 +2344,25 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */ WO_EQ | WO_IN | WO_GT | WO_GE | WO_LT | WO_LE | WO_ISNULL | WO_IS; } - if (pProbe->bUnordered) + struct space *space = + space_by_id(SQLITE_PAGENO_TO_SPACEID(pProbe->tnum)); + struct index *idx = NULL; + struct index_stat *stat = NULL; + if (space != NULL) { + idx = space_index(space, SQLITE_PAGENO_TO_INDEXID(pProbe->tnum)); + assert(idx != NULL); + stat = idx->def->opts.stat; + } + /* + * Create surrogate stat in case ANALYZE command hasn't + * been ran. Simply fill it with zeros. + */ + struct index_stat surrogate_stat; + memset(&surrogate_stat, 0, sizeof(surrogate_stat)); + if (stat == NULL) + stat = &surrogate_stat; + if (stat->is_unordered) opMask &= ~(WO_GT | WO_GE | WO_LT | WO_LE); - assert(pNew->nEq < nProbeCol); saved_nEq = pNew->nEq; @@ -2339,7 +2376,7 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */ pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, saved_nEq, opMask, pProbe); pNew->rSetup = 0; - rSize = pProbe->aiRowLogEst[0]; + rSize = index_field_tuple_est(pProbe, 0); rLogSize = estLog(rSize); for (; rc == SQLITE_OK && pTerm != 0; pTerm = whereScanNext(&scan)) { u16 eOp = pTerm->eOperator; /* Shorthand for pTerm->eOperator */ @@ -2497,8 +2534,8 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */ } else { tRowcnt nOut = 0; if (nInMul == 0 - && pProbe->nSample - && pNew->nEq <= pProbe->nSampleCol + && stat->sample_count + && pNew->nEq <= stat->sample_field_count && ((eOp & WO_IN) == 0 || !ExprHasProperty(pTerm->pExpr, EP_xIsSelect)) @@ -2533,8 +2570,8 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */ } if (nOut == 0) { pNew->nOut += - (pProbe->aiRowLogEst[nEq] - - pProbe->aiRowLogEst[nEq - 1]); + (index_field_tuple_est(pProbe, nEq) - + index_field_tuple_est(pProbe, nEq -1)); if (eOp & WO_ISNULL) { /* TUNING: If there is no likelihood() value, assume that a * "col IS NULL" expression matches twice as many rows @@ -2619,18 +2656,17 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */ */ assert(42 == sqlite3LogEst(18)); if (saved_nEq == saved_nSkip && saved_nEq + 1U < nProbeCol && - pProbe->noSkipScan == 0 && + stat->skip_scan_enabled == true && /* TUNING: Minimum for skip-scan */ - pProbe->aiRowLogEst[saved_nEq + 1] >= 42 && + index_field_tuple_est(pProbe, saved_nEq + 1) >= 42 && (rc = whereLoopResize(db, pNew, pNew->nLTerm + 1)) == SQLITE_OK) { LogEst nIter; pNew->nEq++; pNew->nSkip++; pNew->aLTerm[pNew->nLTerm++] = 0; pNew->wsFlags |= WHERE_SKIPSCAN; - nIter = - pProbe->aiRowLogEst[saved_nEq] - - pProbe->aiRowLogEst[saved_nEq + 1]; + nIter = index_field_tuple_est(pProbe, saved_nEq) - + index_field_tuple_est(pProbe, saved_nEq + 1); pNew->nOut -= nIter; /* TUNING: Because uncertainties in the estimates for skip-scan queries, * add a 1.375 fudge factor to make skip-scan slightly less likely. @@ -2648,6 +2684,31 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */ return rc; } +/** + * Check if is_unordered flag is set for given index. + * Since now there may not be corresponding Tarantool's index + * (e.g. for temporary objects such as ephemeral tables), + * a few preliminary checks are made. + * + * @param idx Index to be tested. + * @retval True, if statistics exist and unordered flag is set. + */ +static bool +index_is_unordered(struct Index *idx) +{ + assert(idx != NULL); + struct space *space = space_by_id(SQLITE_PAGENO_TO_SPACEID(idx->tnum)); + if (space == NULL) + return false; + uint32_t iid = SQLITE_PAGENO_TO_INDEXID(idx->tnum); + struct index *tnt_idx = space_index(space, iid); + if (tnt_idx == NULL) + return false; + if (tnt_idx->def->opts.stat != NULL) + return tnt_idx->def->opts.stat->is_unordered; + return false; +} + /* * Return True if it is possible that pIndex might be useful in * implementing the ORDER BY clause in pBuilder. @@ -2664,8 +2725,7 @@ indexMightHelpWithOrderBy(WhereLoopBuilder * pBuilder, ExprList *aColExpr; int ii, jj; int nIdxCol = index_column_count(pIndex); - - if (pIndex->bUnordered) + if (index_is_unordered(pIndex)) return 0; if ((pOB = pBuilder->pWInfo->pOrderBy) == 0) return 0; @@ -2878,7 +2938,7 @@ whereLoopAddBtree(WhereLoopBuilder * pBuilder, /* WHERE clause information */ testcase(pNew->iTab != pSrc->iCursor); /* See ticket [98d973b8f5] */ continue; /* Partial index inappropriate for this query */ } - rSize = pProbe->aiRowLogEst[0]; + rSize = index_field_tuple_est(pProbe, 0); pNew->nEq = 0; pNew->nBtm = 0; pNew->nTop = 0; @@ -3276,8 +3336,8 @@ wherePathSatisfiesOrderBy(WhereInfo * pWInfo, /* The WHERE clause */ if (pLoop->wsFlags & WHERE_IPK) { pIndex = 0; nColumn = 1; - } else if ((pIndex = pLoop->pIndex) == 0 - || pIndex->bUnordered) { + } else if ((pIndex = pLoop->pIndex) == NULL || + index_is_unordered(pIndex)) { return 0; } else { nColumn = index_column_count(pIndex); diff --git a/src/box/sql/whereexpr.c b/src/box/sql/whereexpr.c index ccdff4684..0bcb529bd 100644 --- a/src/box/sql/whereexpr.c +++ b/src/box/sql/whereexpr.c @@ -1297,9 +1297,7 @@ exprAnalyze(SrcList * pSrc, /* the FROM clause */ */ if (pExpr->op == TK_NOTNULL && pExpr->pLeft->op == TK_COLUMN - && pExpr->pLeft->iColumn >= 0 - && OptimizationEnabled(db, SQLITE_Stat4) - ) { + && pExpr->pLeft->iColumn >= 0) { Expr *pNewExpr; Expr *pLeft = pExpr->pLeft; int idxNew; -- 2.15.1