[tarantool-patches] Re: [PATCH 4/4] sql: move SQL statistics to server
n.pettik
korablev at tarantool.org
Fri May 11 20:29:38 MSK 2018
I am attaching whole patch since it has changed significantly with region allocations.
I didn't answer on comments concerning memory allocation policy deliberately, but
took them into consideration when was reworking patch. I hope I didn’t miss smth.
Overall, thanks for ideas and suggestions!
> On 24 Apr 2018, at 15:51, Vladislav Shpilevoy <v.shpilevoy at tarantool.org> wrote:
>
> Hello. The patch is great, but see 17 comments below %)
>
> Most of them are minor except one. The key idea is to
> use region allocator to build statistics on analyze. See
> details below.
>
> On 23/04/2018 23:29, Nikita Pettik wrote:
>> SQLite provides kind of statistics concerning data holding in each index
>> and space. It is used to help query planer build optimized plan. Such
>> statistics don't depend on internal index implementation (e.g. B-tree or
>> LSM tree) and contain information concerning tuple distribution.
>> This patch moves members resbonsible statistics from original SQLite
>> structs to Tarantool ones. To be more precise, now index's opts contains
>> statistics from _stat1 space (arrays of integers and their loarithm
>
> 1. resbonsible -> responsible, loarithm -> logarithm.
Typos, sorry. Fixed.
> 2. Lets reduce count of mallocs and allocate all on the single
> huge malloc. It eliminates index_stat_destroy_samples, and memory
> fragmentation.
>
> See how it is done for space_def. You must define an index_stat_sizeof
> function, that takes what is needed to calculate summary size,
> calculates offsets on different parts of the result memory etc.
>
> I know, that it is built in load_stat_from_space, but seems like I
> found a solution using region. See the comments below.
Done.
See in the patch below functions index_stat_dup(), index_stat_sizeof()
and stat_copy() in analyse.c
Unfortunately, it is impossible to make dup during analyze, since stats on
region aren't placed in one chunk, so I have to write 2 versions of dup
(in fact it is possible to locate in one chunk on region, but requires a lot of overhead).
>
>> diff --git a/src/box/index_def.h b/src/box/index_def.h
>> index 6167570b7..52e9a0548 100644
>> --- a/src/box/index_def.h
>> +++ b/src/box/index_def.h
>> @@ -113,6 +181,27 @@ index_opts_create(struct index_opts *opts)
>> *opts = index_opts_default;
>> }
>> +/**
>> + * Release memory allocated for samples, statistics they include,
>> + * sample keys and avg_eq array.
>> + */
>> +void
>> +index_stat_destroy_samples(struct index_stat *stat);
>> +
>> +/**
>> + * Destroy index stat.
>> + */
>> +static inline void
>> +destroy_stat(struct index_stat *stat) {
>
> 3. It will be removed after review fixes, but why you did not
> just inlined it in a single usage place?
Nevermind.
>
>> + if (stat == NULL)
>> + return;
>> + free(stat->tuple_stat1);
>> + free(stat->tuple_log_est);
>> + index_stat_destroy_samples(stat);
>> + TRASH(stat);
>> + free(stat);
>> +}
>> +
>> /**
>> * Destroy index options
>> */
>> @@ -120,6 +209,7 @@ static inline void
>> index_opts_destroy(struct index_opts *opts)
>> {
>> free(opts->sql);
>> + destroy_stat(opts->stat);
>
> 4. Will be turned into just free().
Done: now there is only free() call.
>
>> diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c
>> index 4965e899a..14dfbd551 100644
>> --- a/src/box/sql/analyze.c
>> +++ b/src/box/sql/analyze.c
>> @@ -104,8 +104,8 @@
>> * large nEq values.
>> *
>> */
>> -#ifndef SQLITE_OMIT_ANALYZE
>
> 5. The definition still exists in mkkeywordhash.c
Fixed:
+++ b/extra/mkkeywordhash.c
@@ -59,11 +59,6 @@ struct Keyword {
# define ALTER 0x00000001
#endif
#define ALWAYS 0x00000002
-#ifdef SQLITE_OMIT_ANALYZE
-# define ANALYZE 0
-#else
-# define ANALYZE 0x00000004
-#endif
#ifdef SQLITE_OMIT_AUTOINCREMENT
# define AUTOINCR 0
#else
@@ -127,7 +122,7 @@ static Keyword aKeywordTable[] = {
{ "AFTER", "TK_AFTER", TRIGGER, false },
{ "ALL", "TK_ALL", ALWAYS, true },
{ "ALTER", "TK_ALTER", ALTER, true },
- { "ANALYZE", "TK_ANALYZE", ANALYZE, true },
+ { "ANALYZE", "TK_ANALYZE", ALWAYS, true },
>> +decode_stat_string(const char *stat_string, int stat_size, tRowcnt *stat_exact,
>> + LogEst *stat_log, struct index *index) {
>
> 6. Please, create a ticket to store stat_string in a stat table as map or array of stat
> values instead of string. It allows to extremely simplify all of this pure absolute shit
> using raw MessagePack array of integers iteration.
Done: https://github.com/tarantool/tarantool/issues/3372
>
>> + char *z = (char *) stat_string;
>> + if (z == NULL)
>> z = "";
>> -
>> - for (i = 0; *z && i < nOut; i++) {
>> - v = 0;
>> + for (int i = 0; *z && i < stat_size; i++) {
>> + tRowcnt v = 0;
>> + int c;
>> while ((c = z[0]) >= '0' && c <= '9') {
>> v = v * 10 + c - '0';
>> z++;
>> }
>
> 7. sscanf?
Does it really matter? It simply works, so I don’t see reason to change it.
Anyway, it is going to disappear after #3372.
>
>> - if (aOut)
>> - aOut[i] = v;
>> - if (aLog)
>> - aLog[i] = sqlite3LogEst(v);
>> + if (stat_exact != NULL)
>> + stat_exact[i] = v;
>> + if (stat_log != NULL)
>> + stat_log[i] = sqlite3LogEst(v);
>> if (*z == ' ')
>> z++;
>> }
>> -
>> - if (pIndex) {
>> - pIndex->bUnordered = 0;
>> - pIndex->noSkipScan = 0;
>> + if (index != NULL) {
>
> 8. As I can see, you use this function 3 times with index == NULL and 1
> time with not NULL. Lets just inline these things where the index is not NULL,
> and remove the index argument.
Done.
> 9. Lets return 0 here and remove 'if index != NULL' below. The code will
> look more compact.
Done.
>
>> - */
>> -void
>> -sqlite3DeleteIndexSamples(sqlite3 * db, Index * pIdx)
>> -{
>> - if (pIdx->aSample) {
>> - int j;
>> - for (j = 0; j < pIdx->nSample; j++) {
>> - IndexSample *p = &pIdx->aSample[j];
>> - sqlite3DbFree(db, p->p);
>> + if (stat->tuple_stat1 == NULL) {
>
> 10. Can it be NULL, if index->def->opts.stat was not NULL above? And as I can see,
> if analysis loader fails not on the first call, then the statistics is updated
> partially. I propose to make it atomic. For instance, you can create for each index
> a new index_stat object, fill it with new stat, and on success replace the old one.
> See below how to do it simple.
>
>> -/*
>> +/**
>> * Load the content from the _sql_stat4 table
>> - * into the relevant Index.aSample[] arrays.
>> + * into the relevant index->stat->samples[] arrays.
>> + *
>> + * Arguments must point to SQL statements that return
>> + * data equivalent to the following:
>> *
>> - * Arguments zSql1 and zSql2 must point to SQL statements that return
>> - * data equivalent to the following (statements are different for stat4,
>> - * see the caller of this function for details):
>> + * prepare: SELECT tbl,idx,count(*) FROM _sql_stat4 GROUP BY tbl,idx;
>> + * load: SELECT tbl,idx,neq,nlt,ndlt,sample FROM _sql_stat4;
>> *
>> - * zSql1: SELECT tbl,idx,count(*) FROM _sql_stat4 GROUP BY tbl,idx
>> - * zSql2: SELECT tbl,idx,neq,nlt,ndlt,sample FROM _sql_stat4
>> + * 'prepare' statement is used to allocate enough memory for
>> + * statistics (i.e. arrays lt, dt, dlt and avg_eq). 'load' query
>> + * is needed for
>> *
>> + * @param db Database handler.
>> + * @param sql_select_prepare SELECT statement, see above.
>> + * @param sql_select_load SELECT statement, see above.
>> + * @retval SQLITE_OK on success, smth else otherwise.
>> */
>> static int
>> -loadStatTbl(sqlite3 * db, /* Database handle */
>> - Table * pStatTab, /* Stat table to load to */
>> - const char *zSql1, /* SQL statement 1 (see above) */
>> - const char *zSql2 /* SQL statement 2 (see above) */
>> - )
>> +load_stat_from_space(struct sqlite3 *db, const char *sql_select_prepare,
>> + const char *sql_select_load)
>
> 11. As I can see, it is the core function of the patchset. And now it has
> two major problems:
> * partial statistics update on OOM or a non-first prepare() fail - new samples
> are allocated, but not filled, for example, due to malloc() error on the next
> index processing;
> * leaks on opts.stat->samples = malloc(alloc_size); - you do not delete
> the old memory.
>
> These and analysis_loader problems can be solved using region. I propose
> to use in this function and analysis_loader region allocations only.
>
> The array of new index_stat structures is created on a region in
> sql_analysis_load. Analysis loader takes this array via
> void *data parameter, and fills index_stats with more region allocations
> for tuple_stat1, tuple_log_est etc.
>
> Then load_stat_from_space takes the same array and process its index_stat
> instead of original stats in structs index.
>
> At the end of load_stat_from_space you are sure, that all statistics for
> all indexes are completely full, and replace original index stats in
> structs index using index_stat_dup and its super malloc.
>
> Region can be truncated right before return from sql_analysis_load.
>
>> @@ -1646,84 +1648,52 @@ sql_space_tuple_log_count(struct Table *tab)
>> return sqlite3LogEst(pk->vtab->size(pk));
>> }
>> -/*
>> - * Load the content of the _sql_stat1 and sql_stat4 tables. The
>> - * contents of _sql_stat1 are used to populate the Index.aiRowEst[]
>> -*\* arrays. The contents of sql_stat4 are used to populate the
>> - * Index.aSample[] arrays.
>> - *
>> - * If the _sql_stat1 table is not present in the database, SQLITE_ERROR
>> - * is returned. In this case, even if the sql_stat4 table is present,
>> - * no data is read from it.
>> - *
>> - * If the _sql_stat4 table is not present in the database, SQLITE_ERROR
>> - * is returned. However, in this case, data is read from the _sql_stat1
>> - * table (if it is present) before returning.
>> - *
>> - * If an OOM error occurs, this function always sets db->mallocFailed.
>> - * This means if the caller does not care about other errors, the return
>> - * code may be ignored.
>> - */
>> -int
>> -sqlite3AnalysisLoad(sqlite3 * db)
>> +log_est
>
> 12. Lets name this type log_est_t.
Ok.
>
>> +index_field_tuple_est(struct Index *idx, uint32_t field)
>> {
>> - analysisInfo sInfo;
>> - HashElem *i, *j;
>> - char *zSql;
>> - int rc = SQLITE_OK;
>> -
>> - /* Clear any prior statistics */
>> - for (j = sqliteHashFirst(&db->pSchema->tblHash); j;
>> - j = sqliteHashNext(j)) {
>> - Table *pTab = sqliteHashData(j);
>> - for (i = sqliteHashFirst(&pTab->idxHash); i;
>> - i = sqliteHashNext(i)) {
>> - Index *pIdx = sqliteHashData(i);
>> - pIdx->aiRowLogEst[0] = 0;
>> - sqlite3DeleteIndexSamples(db, pIdx);
>> - pIdx->aSample = 0;
>> - }
>> + struct space *space = space_by_id(SQLITE_PAGENO_TO_SPACEID(idx->tnum));
>> + if (space == NULL)
>> + return idx->aiRowLogEst[field];
>> + struct index *tnt_idx =
>> + space_index(space, SQLITE_PAGENO_TO_INDEXID(idx->tnum));
>> + assert(tnt_idx != NULL);
>> + assert(field <= tnt_idx->def->key_def->part_count);
>> + if (tnt_idx->def->opts.stat == NULL) {
>> + if (field == 0)
>
> 13. Why? Field 0 is an ordinary field number.
Not quite. It is assumed that firstly comes total amount
of rows in table, and only then standard statistics.
Well, I just add DEFAULT_TUPLE_LOG_COUNT as
first member at stat array:
-/** [10*log_{2}(x)]:10, 9, 8, 7, 6, 5 */
-const log_est default_tuple_est[] = {33, 32, 30, 28, 26, 23};
+const log_est_t default_tuple_est[] = {DEFAULT_TUPLE_LOG_COUNT,
+/** [10*log_{2}(x)]: 10, 9, 8, 7, 6, 5 */
+ 33, 32, 30, 28, 26, 23};
> And why do you return 0, if
> a field is equal to part_count and the index is unique?
Sorry, I should have described it somewhere in comments.
Added comment:
+log_est_t
index_field_tuple_est(struct Index *idx, uint32_t field)
{
struct space *space = space_by_id(SQLITE_PAGENO_TO_SPACEID(idx->tnum));
@@ -1663,12 +1664,15 @@ index_field_tuple_est(struct Index *idx, uint32_t field)
assert(tnt_idx != NULL);
assert(field <= tnt_idx->def->key_def->part_count);
if (tnt_idx->def->opts.stat == NULL) {
- if (field == 0)
- return DEFAULT_TUPLE_LOG_COUNT;
+ /*
+ * Last number for unique index is always 0:
+ * only one tuple exists with given full key
+ * in unique index and log(1) == 0.
+ */
if (field == tnt_idx->def->key_def->part_count &&
tnt_idx->def->opts.is_unique)
return 0;
- return default_tuple_est[field + 1 >= 5 ? 5 : field + 1];
+ return default_tuple_est[field + 1 >= 6 ? 6 : field];
}
>
>> diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h
>> index d026a213d..c2ea066d6 100644
>> --- a/src/box/sql/sqliteInt.h
>> +++ b/src/box/sql/sqliteInt.h
>> @@ -1473,7 +1473,6 @@ typedef struct FuncDef FuncDef;
>> typedef struct FuncDefHash FuncDefHash;
>> typedef struct IdList IdList;
>> typedef struct Index Index;
>> -typedef struct IndexSample IndexSample;
>
> 14. I still see IndexSample mention in analyze.c on 553 line.
It is slight different sample, idk why IndexSample mentioned there.
Fixed comment:
@@ -550,7 +550,7 @@ samplePushPrevious(Stat4Accum * p, int iChng)
{
int i;
/* Check if any samples from the aBest[] array should be pushed
- * into IndexSample.a[] at this point.
+ * into samples array at this point.
>> @@ -2173,14 +2171,41 @@ struct Index {
>> * or _NONE
>> */
>> unsigned idxType:2; /* 1==UNIQUE, 2==PRIMARY KEY, 0==CREATE INDEX */
>> - unsigned bUnordered:1; /* Use this index for == or IN queries only */
>> - unsigned noSkipScan:1; /* Do not try to use skip-scan if true */
>> - int nSample; /* Number of elements in aSample[] */
>> - int nSampleCol; /* Size of IndexSample.anEq[] and so on */
>> - tRowcnt *aAvgEq; /* Average nEq values for keys not in aSample */
>> - IndexSample *aSample; /* Samples of the left-most key */
>> - tRowcnt *aiRowEst; /* Non-logarithmic stat1 data for this index */
>> };
>> +/**
>> + * default_tuple_est[] array contains default information
>> + * which is used when we don't have real space, e.g. temporary
>> + * objects representing result set of nested SELECT or VIEW.
>> + *
>> + * First number is supposed to contain the number of elements
>> + * in the index. Since we do not know, guess 1 million.
>> + * Second one is an estimate of the number of rows in the
>> + * table that match any particular value of the first column of
>> + * the index. Third one is an estimate of the number of
>> + * rows that match any particular combination of the first 2
>> + * columns of the index. And so on. It must always be true:
>> + *
>> + * default_tuple_est[N] <= default_tuple_est[N-1]
>> + * default_tuple_est[N] >= 1
>> + *
>> + * Apart from that, we have little to go on besides intuition
>> + * as to how default values should be initialized. The numbers
>> + * generated here are based on typical values found in actual
>> + * indices.
>> + */
>> +extern const log_est default_tuple_est[];
>
> 15. Why is it extern? Looks like it is used in analyze.c only, and
> can be static within the file.
Currently, yes - it is used only within analyse.c, but I thought it
might be useful somewhere else..Well, removed extern modifier.
>> +
>> +/**
>> + * Fetch statistics concerning tuples to be selected.
>> + * If there is no appropriate Tarantool's index,
>> + * return one of default values.
>> + *
>> + * @param idx Index.
>> + * @param field Number of field to be examined.
>> + * @retval Estimate logarithm of tuples selected by given field.
>
> 16. All tuples? Or unique by this field?
It is number of tuples, which has the same first n fields.
Also, it is the reason why for unique indexes this number is 0
when it comes for n == part_count.
Clarified this point in comment:
- * Fetch statistics concerning tuples to be selected.
+ * Fetch statistics concerning tuples to be selected:
+ * logarithm of number of tuples which has the same key as for
+ * the first key parts specified by second argument.
+ * Simple example (without logarithms):
+ * idx = {{1,2}, {1,2}, {2, 4}, {2, 3}, {3, 5}}
+ * stat[1] = (2 + 2 + 1) / 3 = 2 - two keys which start from 1,
+ * two - from 2, and one from 3.
+ * stat[2] = (2 + 1 + 1 + 1) / 4 = 1 - only one coincidence of
+ * keys: {1, 2}; the rest are different.
+ * Notice, that stat[0] is an average number of tuples in a whole
+ * index. By default it is DEFAULT_TUPLE_LOG_COUNT == 200.
>> diff --git a/src/box/sql/where.c b/src/box/sql/where.c
>> index e6f9ef431..975e7059f 100644
>> --- a/src/box/sql/where.c
>> +++ b/src/box/sql/where.c
>> @@ -977,15 +986,15 @@ whereKeyStats(Parse * pParse, /* Database connection */
>> pRec->nField = n;
>> res =
>> - sqlite3VdbeRecordCompareMsgpack(aSample[iSamp].n,
>> - aSample[iSamp].p, pRec);
>> + sqlite3VdbeRecordCompareMsgpack(samples[iSamp].key_size,
>> + samples[iSamp].sample_key,
>
> 17. Lets make sample_key has char * type instead of void *. It is common
> practice for raw MessagePack in Tarantool.
Ok:
struct index_sample {
/** Key of the sample. */
- void *sample_key;
+ char *sample_key;
Branch is the same: https://github.com/tarantool/tarantool/tree/np/gh-3253-move-statistics-to-server
Patch itself is below:
===============================================================================
diff --git a/extra/mkkeywordhash.c b/extra/mkkeywordhash.c
index 6166119e2..cf3483151 100644
--- a/extra/mkkeywordhash.c
+++ b/extra/mkkeywordhash.c
@@ -59,11 +59,6 @@ struct Keyword {
# define ALTER 0x00000001
#endif
#define ALWAYS 0x00000002
-#ifdef SQLITE_OMIT_ANALYZE
-# define ANALYZE 0
-#else
-# define ANALYZE 0x00000004
-#endif
#ifdef SQLITE_OMIT_AUTOINCREMENT
# define AUTOINCR 0
#else
@@ -127,7 +122,7 @@ static Keyword aKeywordTable[] = {
{ "AFTER", "TK_AFTER", TRIGGER, false },
{ "ALL", "TK_ALL", ALWAYS, true },
{ "ALTER", "TK_ALTER", ALTER, true },
- { "ANALYZE", "TK_ANALYZE", ANALYZE, true },
+ { "ANALYZE", "TK_ANALYZE", ALWAYS, true },
{ "AND", "TK_AND", ALWAYS, true },
{ "AS", "TK_AS", ALWAYS, true },
{ "ASC", "TK_ASC", ALWAYS, true },
diff --git a/src/box/index_def.c b/src/box/index_def.c
index d0d852519..1702430b3 100644
--- a/src/box/index_def.c
+++ b/src/box/index_def.c
@@ -47,6 +47,7 @@ const struct index_opts index_opts_default = {
/* .bloom_fpr = */ 0.05,
/* .lsn = */ 0,
/* .sql = */ NULL,
+ /* .stat = */ NULL,
};
const struct opt_def index_opts_reg[] = {
@@ -117,6 +118,8 @@ index_def_new(uint32_t space_id, uint32_t iid, const char *name,
return NULL;
}
}
+ /* Statistics are initialized separately. */
+ assert(opts->stat == NULL);
return def;
}
@@ -154,6 +157,80 @@ index_def_dup(const struct index_def *def)
return NULL;
}
}
+ if (def->opts.stat != NULL) {
+ dup->opts.stat = index_stat_dup(def->opts.stat);
+ if (dup->opts.stat == NULL) {
+ index_def_delete(dup);
+ return NULL;
+ }
+ }
+ return dup;
+}
+
+size_t
+index_stat_sizeof(const struct index_sample *samples, uint32_t sample_count,
+ uint32_t field_count)
+{
+ /* Space for index_stat struct itself. */
+ size_t alloc_size = sizeof(struct index_stat);
+ /*
+ * Space for stat1, log_est and avg_eg arrays.
+ * stat1 and log_est feature additional field
+ * to describe total count of tuples in index.
+ */
+ alloc_size += (3 * field_count + 2) * sizeof(uint32_t);
+ /* Space for samples structs. */
+ alloc_size += sizeof(struct index_sample) * sample_count;
+ /* Space for eq, lt and dlt stats. */
+ alloc_size += 3 * sizeof(uint32_t) * field_count * sample_count;
+ /*
+ * Space for sample keys. Additional two bytes are
+ * required for '\0' guards at the end.
+ */
+ for (uint32_t i = 0; i < sample_count; ++i)
+ alloc_size += samples[i].key_size + 2;
+ return alloc_size;
+}
+
+struct index_stat *
+index_stat_dup(const struct index_stat *src)
+{
+ size_t size = index_stat_sizeof(src->samples, src->sample_count,
+ src->sample_field_count);
+ struct index_stat *dup = (struct index_stat *) malloc(size);
+ if (dup == NULL) {
+ diag_set(OutOfMemory, size, "malloc", "index stat");
+ return NULL;
+ }
+ memcpy(dup, src, size);
+ uint32_t array_size = src->sample_field_count * sizeof(uint32_t);
+ uint32_t stat1_offset = sizeof(struct index_stat);
+ dup->tuple_stat1 = (uint32_t *)((char*)dup + stat1_offset);
+ dup->tuple_log_est = (log_est_t *)((char*)dup + stat1_offset +
+ array_size + sizeof(uint32_t));
+ dup->avg_eq = (uint32_t *)((char*)dup + stat1_offset +
+ 2 * (array_size + sizeof(uint32_t)));
+ uint32_t samples_offset = stat1_offset + 3 * array_size +
+ 2 * sizeof(uint32_t);
+ dup->samples = (struct index_sample *)((char*)dup + samples_offset);
+ uint32_t current_sample_offset = samples_offset +
+ src->sample_count *
+ sizeof(struct index_sample);
+ for (uint32_t i = 0; i < src->sample_count; ++i) {
+ dup->samples[i].eq = (uint32_t *)((char*)dup +
+ current_sample_offset);
+ dup->samples[i].lt = (uint32_t *)((char*)dup +
+ current_sample_offset +
+ array_size);
+ dup->samples[i].dlt = (uint32_t *)((char*)dup +
+ current_sample_offset +
+ 2 * size);
+ dup->samples[i].sample_key =
+ (char *)((char*)dup + current_sample_offset + 3 * size);
+ current_sample_offset += current_sample_offset +
+ 3 * array_size +
+ dup->samples[i].key_size + 2;
+ }
return dup;
}
diff --git a/src/box/index_def.h b/src/box/index_def.h
index dc8293b23..107255cc7 100644
--- a/src/box/index_def.h
+++ b/src/box/index_def.h
@@ -58,6 +58,68 @@ enum rtree_index_distance_type {
};
extern const char *rtree_index_distance_type_strs[];
+/** Simple alias to represent logarithm metrics. */
+typedef int16_t log_est_t;
+
+/**
+ * One sample represents index key and three arrays with
+ * statistics concerning tuples distribution.
+ * Max number of samples for one index is adjusted by
+ * SQL_STAT4_SAMPLES macros. By default it is about 24 entities.
+ * Samples are chosen to be selective.
+ */
+struct index_sample {
+ /** Key of the sample. */
+ char *sample_key;
+ /** Size of sample key. */
+ size_t key_size;
+ /**
+ * List of integers: first one is the approximate number
+ * of entries in the index whose left-most field exactly
+ * matches the left-most column of the sample;
+ * second one - est. number of entries in the index where
+ * the first two columns match the first two columns of
+ * the sample; and so forth.
+ */
+ uint32_t *eq;
+ /** The same as eq list, but key is less than sample. */
+ uint32_t *lt;
+ /** The same as lt list, but includes only distinct keys. */
+ uint32_t *dlt;
+};
+
+/**
+ * SQL statistics for index, which is used by query planer.
+ * This is general statistics, without any relation to used
+ * engine and data structures (e.g. B-tree or LSM tree).
+ * Statistics appear only after executing ANALYZE statement.
+ * It is loaded from _sql_stat1 and _sql_stat4 system spaces.
+ */
+struct index_stat {
+ /** An array of samples of them left-most key. */
+ struct index_sample *samples;
+ /** Number of samples. */
+ uint32_t sample_count;
+ /** Number of fields in sample arrays: eq, lt and dlt. */
+ uint32_t sample_field_count;
+ /**
+ * List of integers: the first is the number of tuples
+ * in the index; the second one is the average number of
+ * tuples that have the same key part in the first field
+ * of the index; the third - for the first two fields;
+ * and so forth.
+ */
+ uint32_t *tuple_stat1;
+ /** Logarithms of stat1 data. */
+ log_est_t *tuple_log_est;
+ /** Average eq values for keys not in samples. */
+ uint32_t *avg_eq;
+ /** Use this index for == or IN queries only. */
+ bool is_unordered;
+ /** Don't try to use skip-scan optimization if true. */
+ bool skip_scan_enabled;
+};
+
/** Index options */
struct index_opts {
/**
@@ -99,6 +161,12 @@ struct index_opts {
* SQL statement that produced this index.
*/
char *sql;
+ /**
+ * SQL specific statistics concerning tuples
+ * distribution for query planer. It is automatically
+ * filled after running ANALYZE command.
+ */
+ struct index_stat *stat;
};
extern const struct index_opts index_opts_default;
@@ -120,6 +188,7 @@ static inline void
index_opts_destroy(struct index_opts *opts)
{
free(opts->sql);
+ free(opts->stat);
TRASH(opts);
}
@@ -173,6 +242,50 @@ struct index_def {
struct index_def *
index_def_dup(const struct index_def *def);
+/**
+ * Calculate size of index's statistics.
+ * Statistics is located in memory according to following layout:
+ *
+ * +-------------------------+ <- Allocated memory starts here
+ * | struct index_stat |
+ * |-------------------------|
+ * | stat1 array | ^
+ * |-------------------------| |
+ * | log_est array | | 3 * array_size + 2 * uint_32
+ * |-------------------------| |
+ * | avg_eq array | v
+ * |-------------------------|
+ * | samples struct array |
+ * |-------------------------|
+ * | eq | lt | dlt | key | <- Content of one sample
+ * +-------------------------+
+ * ... <- Up to 24 samples
+ * | eq | lt | dlt | key |
+ * +-------------------------+
+ *
+ * array_size = field_count * sizeof(uint_32)
+ * offset of one sample = 3 * array_size + key_size
+ *
+ * @param samples Array of samples needed for calculating
+ * size of keys.
+ * @param sample_count Count of samples in samples array.
+ * @param field_count Count of fields in statistics arrays.
+ * @retval Size needed for statistics allocation in bytes.
+ */
+size_t
+index_stat_sizeof(const struct index_sample *samples, uint32_t sample_count,
+ uint32_t field_count);
+
+/**
+ * Duplicate index_stat object.
+ * To understand memory layout see index_stat_sizeof() function.
+ *
+ * @param src Stat to duplicate.
+ * @retval Copy of the @src.
+ */
+struct index_stat *
+index_stat_dup(const struct index_stat *src);
+
/* Destroy and free an index_def. */
void
index_def_delete(struct index_def *def);
diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c
index 57cac437c..4f21cd25f 100644
--- a/src/box/sql/analyze.c
+++ b/src/box/sql/analyze.c
@@ -104,8 +104,8 @@
* large nEq values.
*
*/
-#ifndef SQLITE_OMIT_ANALYZE
+#include "box/box.h"
#include "box/index.h"
#include "box/key_def.h"
#include "box/tuple_compare.h"
@@ -550,7 +550,7 @@ samplePushPrevious(Stat4Accum * p, int iChng)
{
int i;
/* Check if any samples from the aBest[] array should be pushed
- * into IndexSample.a[] at this point.
+ * into samples array at this point.
*/
for (i = (p->nCol - 1); i >= iChng; i--) {
Stat4Sample *pBest = &p->aBest[i];
@@ -1204,431 +1204,462 @@ sql_index_tuple_size(struct space *space, struct index *idx)
return avg_tuple_size;
}
-/*
- * Used to pass information from the analyzer reader through to the
- * callback routine.
+/**
+ * Used to pass information from the analyzer reader through
+ * to the callback routine.
*/
-typedef struct analysisInfo analysisInfo;
-struct analysisInfo {
+struct analysis_index_info {
+ /** Database handler. */
sqlite3 *db;
+ /*** Array of statistics for each index. */
+ struct index_stat *stats;
+ /** Ordinal number of index to be processed. */
+ uint32_t index_count;
};
-/*
- * The first argument points to a nul-terminated string containing a
- * list of space separated integers. Read the first nOut of these into
- * the array aOut[].
+/**
+ * The first argument points to a nul-terminated string
+ * containing a list of space separated integers. Load
+ * the first stat_size of these into the output arrays.
+ *
+ * @param stat_string String containing array of integers.
+ * @param stat_size Size of output arrays.
+ * @param[out] stat_exact Decoded array of statistics.
+ * @param[out] stat_log Decoded array of stat logariphms.
*/
static void
-decodeIntArray(char *zIntArray, /* String containing int array to decode */
- int nOut, /* Number of slots in aOut[] */
- tRowcnt * aOut, /* Store integers here */
- LogEst * aLog, /* Or, if aOut==0, here */
- Index * pIndex /* Handle extra flags for this index, if not NULL */
- )
-{
- char *z = zIntArray;
- int c;
- int i;
- tRowcnt v;
-
- if (z == 0)
+decode_stat_string(const char *stat_string, int stat_size, tRowcnt *stat_exact,
+ LogEst *stat_log) {
+ char *z = (char *) stat_string;
+ if (z == NULL)
z = "";
-
- for (i = 0; *z && i < nOut; i++) {
- v = 0;
+ for (int i = 0; *z && i < stat_size; i++) {
+ tRowcnt v = 0;
+ int c;
while ((c = z[0]) >= '0' && c <= '9') {
v = v * 10 + c - '0';
z++;
}
- if (aOut)
- aOut[i] = v;
- if (aLog)
- aLog[i] = sqlite3LogEst(v);
+ if (stat_exact != NULL)
+ stat_exact[i] = v;
+ if (stat_log != NULL)
+ stat_log[i] = sqlite3LogEst(v);
if (*z == ' ')
z++;
}
-
- if (pIndex) {
- pIndex->bUnordered = 0;
- pIndex->noSkipScan = 0;
- while (z[0]) {
- if (sqlite3_strglob("unordered*", z) == 0) {
- pIndex->bUnordered = 1;
- } else if (sqlite3_strglob("noskipscan*", z) == 0) {
- pIndex->noSkipScan = 1;
- }
- while (z[0] != 0 && z[0] != ' ')
- z++;
- while (z[0] == ' ')
- z++;
- }
- }
}
-/*
+/**
* This callback is invoked once for each index when reading the
* _sql_stat1 table.
*
* argv[0] = name of the table
* argv[1] = name of the index (might be NULL)
- * argv[2] = results of analysis - on integer for each column
+ * argv[2] = results of analysis - array of integers
+ *
+ * Entries for which argv[1]==NULL simply record the number
+ * of rows in the table. This routine also allocates memory
+ * for stat struct itself and statistics which are not related
+ * to stat4 samples.
*
- * Entries for which argv[1]==NULL simply record the number of rows in
- * the table.
+ * @param data Additional analysis info.
+ * @param argc Number of arguments.
+ * @param argv Callback arguments.
+ * @retval 0 on success, -1 otherwise.
*/
static int
-analysisLoader(void *pData, int argc, char **argv, char **NotUsed)
+analysis_loader(void *data, int argc, char **argv, char **unused)
{
- analysisInfo *pInfo = (analysisInfo *) pData;
- Index *pIndex;
- Table *pTable;
- const char *z;
-
assert(argc == 3);
- UNUSED_PARAMETER2(NotUsed, argc);
-
- if (argv == 0 || argv[0] == 0 || argv[2] == 0) {
- return 0;
- }
- pTable = sqlite3HashFind(&pInfo->db->pSchema->tblHash, argv[0]);
- if (pTable == NULL)
+ UNUSED_PARAMETER2(unused, argc);
+ if (argv == 0 || argv[0] == 0 || argv[2] == 0)
return 0;
- if (argv[1] == 0) {
- pIndex = 0;
- } else if (sqlite3_stricmp(argv[0], argv[1]) == 0) {
- pIndex = sqlite3PrimaryKeyIndex(pTable);
+ struct analysis_index_info *info = (struct analysis_index_info *) data;
+ assert(info->stats != NULL);
+ struct index_stat *stat = &info->stats[info->index_count++];
+ uint32_t space_id = box_space_id_by_name(argv[0], strlen(argv[0]));
+ if (space_id == BOX_ID_NIL)
+ return -1;
+ struct space *space = space_by_id(space_id);
+ assert(space != NULL);
+ struct index *index;
+ uint32_t iid = box_index_id_by_name(space_id, argv[1], strlen(argv[1]));
+ /*
+ * Convention is if index's name matches with space's
+ * one, then it is primary index.
+ */
+ if (iid != BOX_ID_NIL) {
+ index = space_index(space, iid);
} else {
- pIndex = sqlite3HashFind(&pTable->idxHash, argv[1]);
+ if (sqlite3_stricmp(argv[0], argv[1]) != 0)
+ return -1;
+ index = space_index(space, 0);
}
- z = argv[2];
-
- if (pIndex) {
- tRowcnt *aiRowEst = 0;
- int nCol = index_column_count(pIndex) + 1;
- /* Index.aiRowEst may already be set here if there are duplicate
- * _sql_stat1 entries for this index. In that case just clobber
- * the old data with the new instead of allocating a new array.
- */
- if (pIndex->aiRowEst == 0) {
- pIndex->aiRowEst =
- (tRowcnt *) sqlite3MallocZero(sizeof(tRowcnt) *
- nCol);
- if (pIndex->aiRowEst == 0)
- sqlite3OomFault(pInfo->db);
- }
- aiRowEst = pIndex->aiRowEst;
- pIndex->bUnordered = 0;
- decodeIntArray((char *)z, nCol, aiRowEst, pIndex->aiRowLogEst,
- pIndex);
+ assert(index != NULL);
+ /*
+ * Additional field is used to describe total
+ * count of tuples in index. Although now all
+ * indexes feature the same number of tuples,
+ * partial indexes are going to be implemented
+ * someday.
+ */
+ uint32_t column_count = index->def->key_def->part_count + 1;
+ /*
+ * Stat arrays may already be set here if there
+ * are duplicate _sql_stat1 entries for this
+ * index. In that case just clobber the old data
+ * with the new instead of allocating a new array.
+ */
+ uint32_t stat1_size = column_count * sizeof(uint32_t);
+ stat->tuple_stat1 = region_alloc(&fiber()->gc, stat1_size);
+ if (stat->tuple_stat1 == NULL) {
+ diag_set(OutOfMemory, stat1_size, "region", "tuple_stat1");
+ return -1;
}
-
- return 0;
-}
-
-/*
- * If the Index.aSample variable is not NULL, delete the aSample[] array
- * and its contents.
- */
-void
-sqlite3DeleteIndexSamples(sqlite3 * db, Index * pIdx)
-{
- if (pIdx->aSample) {
- int j;
- for (j = 0; j < pIdx->nSample; j++) {
- IndexSample *p = &pIdx->aSample[j];
- sqlite3DbFree(db, p->p);
- }
- sqlite3DbFree(db, pIdx->aSample);
+ stat->tuple_log_est = region_alloc(&fiber()->gc, stat1_size);
+ if (stat->tuple_log_est == NULL) {
+ diag_set(OutOfMemory, stat1_size, "region", "tuple_log_est");
+ return -1;
}
- if (db && db->pnBytesFreed == 0) {
- pIdx->nSample = 0;
- pIdx->aSample = 0;
+ decode_stat_string(argv[2], column_count, stat->tuple_stat1,
+ stat->tuple_log_est);
+ stat->is_unordered = false;
+ stat->skip_scan_enabled = true;
+ char *z = argv[2];
+ /* Position ptr at the end of stat string. */
+ for (; *z == ' ' || (*z >= '0' && *z <= '9'); ++z);
+ while (z[0]) {
+ if (sqlite3_strglob("unordered*", z) == 0) {
+ index->def->opts.stat->is_unordered = true;
+ } else if (sqlite3_strglob("noskipscan*", z) == 0) {
+ index->def->opts.stat->skip_scan_enabled = false;
+ }
+ while (z[0] != 0 && z[0] != ' ')
+ z++;
+ while (z[0] == ' ')
+ z++;
}
+ return 0;
}
-/*
- * Populate the pIdx->aAvgEq[] array based on the samples currently
- * stored in pIdx->aSample[].
+/**
+ * Calculate avg_eq array based on the samples from index.
+ * Some *magic* calculations happen here.
*/
static void
-initAvgEq(Index * pIdx)
+init_avg_eq(struct index *index, struct index_stat *stat)
{
- if (pIdx) {
- IndexSample *aSample = pIdx->aSample;
- IndexSample *pFinal = &aSample[pIdx->nSample - 1];
- int iCol;
- int nCol = 1;
- if (pIdx->nSampleCol > 1) {
- /* If this is stat4 data, then calculate aAvgEq[] values for all
- * sample columns except the last. The last is always set to 1, as
- * once the trailing PK fields are considered all index keys are
- * unique.
- */
- nCol = pIdx->nSampleCol - 1;
- pIdx->aAvgEq[nCol] = 1;
+ assert(stat != NULL);
+ struct index_sample *samples = stat->samples;
+ uint32_t sample_count = stat->sample_count;
+ uint32_t field_count = stat->sample_field_count;
+ struct index_sample *last_sample = &samples[sample_count - 1];
+ if (field_count > 1)
+ stat->avg_eq[--field_count] = 1;
+ for (uint32_t i = 0; i < field_count; ++i) {
+ uint32_t column_count = index->def->key_def->part_count;
+ tRowcnt eq_sum = 0;
+ tRowcnt eq_avg = 0;
+ uint32_t tuple_count = index->vtab->size(index);
+ uint64_t distinct_tuple_count;
+ uint64_t terms_sum = 0;
+ if (i >= column_count || stat->tuple_stat1[i + 1] == 0) {
+ distinct_tuple_count = 100 * last_sample->dlt[i];
+ sample_count--;
+ } else {
+ assert(stat->tuple_stat1 != NULL);
+ distinct_tuple_count = (100 * tuple_count) /
+ stat->tuple_stat1[i + 1];
}
- for (iCol = 0; iCol < nCol; iCol++) {
- int nSample = pIdx->nSample;
- int i; /* Used to iterate through samples */
- tRowcnt sumEq = 0; /* Sum of the nEq values */
- tRowcnt avgEq = 0;
- tRowcnt nRow; /* Number of rows in index */
- i64 nSum100 = 0; /* Number of terms contributing to sumEq */
- i64 nDist100; /* Number of distinct values in index */
- int nColumn = index_column_count(pIdx);
-
- if (!pIdx->aiRowEst || iCol >= nColumn
- || pIdx->aiRowEst[iCol + 1] == 0) {
- nRow = pFinal->anLt[iCol];
- nDist100 = (i64) 100 *pFinal->anDLt[iCol];
- nSample--;
- } else {
- nRow = pIdx->aiRowEst[0];
- nDist100 =
- ((i64) 100 * pIdx->aiRowEst[0]) /
- pIdx->aiRowEst[iCol + 1];
+ for (uint32_t j = 0; j < sample_count; ++j) {
+ if (j == (stat->sample_count - 1) ||
+ samples[j].dlt[i] != samples[j + 1].dlt[i]) {
+ eq_sum += samples[j].eq[i];
+ terms_sum += 100;
}
-
- /* Set nSum to the number of distinct (iCol+1) field prefixes that
- * occur in the stat4 table for this index. Set sumEq to the sum of
- * the nEq values for column iCol for the same set (adding the value
- * only once where there exist duplicate prefixes).
- */
- for (i = 0; i < nSample; i++) {
- if (i == (pIdx->nSample - 1)
- || aSample[i].anDLt[iCol] !=
- aSample[i + 1].anDLt[iCol]
- ) {
- sumEq += aSample[i].anEq[iCol];
- nSum100 += 100;
- }
- }
-
- if (nDist100 > nSum100) {
- avgEq =
- ((i64) 100 * (nRow - sumEq)) / (nDist100 -
- nSum100);
- }
- if (avgEq == 0)
- avgEq = 1;
- pIdx->aAvgEq[iCol] = avgEq;
}
+ if (distinct_tuple_count > terms_sum) {
+ eq_avg = 100 * (tuple_count - eq_sum) /
+ (distinct_tuple_count - terms_sum);
+ }
+ if (eq_avg == 0)
+ eq_avg = 1;
+ stat->avg_eq[i] = eq_avg;
}
}
-/*
- * Given two IndexSample arguments, compare there payloads
+/**
+ * Given two key arguments, compare there payloads.
+ * This is a simple wrapper around key_compare() to support
+ * qsort() interface.
*/
static int
-sampleCompareMsgPack(const void *a, const void *b, void *arg)
+sample_compare(const void *a, const void *b, void *arg)
{
struct key_def *def = (struct key_def *)arg;
- return key_compare(((IndexSample *) a)->p, ((IndexSample *) b)->p, def);
+ return key_compare(((struct index_sample *) a)->sample_key,
+ ((struct index_sample *) b)->sample_key, def);
}
-/*
+/**
* Load the content from the _sql_stat4 table
- * into the relevant Index.aSample[] arrays.
+ * into the relevant index->stat->samples[] arrays.
+ *
+ * Arguments must point to SQL statements that return
+ * data equivalent to the following:
*
- * Arguments zSql1 and zSql2 must point to SQL statements that return
- * data equivalent to the following (statements are different for stat4,
- * see the caller of this function for details):
+ * prepare: SELECT tbl,idx,count(*) FROM _sql_stat4 GROUP BY tbl,idx;
+ * load: SELECT tbl,idx,neq,nlt,ndlt,sample FROM _sql_stat4;
*
- * zSql1: SELECT tbl,idx,count(*) FROM _sql_stat4 GROUP BY tbl,idx
- * zSql2: SELECT tbl,idx,neq,nlt,ndlt,sample FROM _sql_stat4
+ * 'prepare' statement is used to allocate enough memory for
+ * statistics (i.e. arrays lt, dt, dlt and avg_eq). 'load' query
+ * is needed for
*
+ * @param db Database handler.
+ * @param sql_select_prepare SELECT statement, see above.
+ * @param sql_select_load SELECT statement, see above.
+ * @param[out] stats Statistics is saved here.
+ * @retval 0 on success, -1 otherwise.
*/
static int
-loadStatTbl(sqlite3 * db, /* Database handle */
- Table * pStatTab, /* Stat table to load to */
- const char *zSql1, /* SQL statement 1 (see above) */
- const char *zSql2 /* SQL statement 2 (see above) */
- )
+load_stat_from_space(struct sqlite3 *db, const char *sql_select_prepare,
+ const char *sql_select_load, struct index_stat *stats)
{
- int rc; /* Result codes from subroutines */
- sqlite3_stmt *pStmt = 0; /* An SQL statement being run */
- Index *pPrevIdx = 0; /* Previous index in the loop */
- IndexSample *pSample; /* A slot in pIdx->aSample[] */
- int nIdxCnt = 0; /* Number of different indexes visited */
- int nIdx = 0; /* Index loop iterator */
- Index **aIndex = 0; /* Array holding all visited indexes */
-
- assert(db->lookaside.bDisable);
-
- assert(pStatTab);
- nIdxCnt = box_index_len(SQLITE_PAGENO_TO_SPACEID(pStatTab->tnum), 0);
-
- if (nIdxCnt > 0) {
- aIndex = sqlite3DbMallocZero(db, sizeof(Index *) * nIdxCnt);
- if (aIndex == 0) {
- return SQLITE_NOMEM_BKPT;
+ struct index **indexes = NULL;
+ uint32_t index_count = box_index_len(BOX_SQL_STAT4_ID, 0);
+ if (index_count > 0) {
+ size_t alloc_size = sizeof(struct index *) * index_count;
+ indexes = region_alloc(&fiber()->gc, alloc_size);
+ if (indexes == NULL) {
+ diag_set(OutOfMemory, alloc_size, "region", "indexes");
+ return -1;
}
}
-
- rc = sqlite3_prepare(db, zSql1, -1, &pStmt, 0);
+ sqlite3_stmt *stmt = NULL;
+ int rc = sqlite3_prepare(db, sql_select_prepare, -1, &stmt, 0);
if (rc)
goto finalize;
-
- while (sqlite3_step(pStmt) == SQLITE_ROW) {
- int nIdxCol = 1; /* Number of columns in stat4 records */
-
- char *zTab; /* Table name */
- char *zIndex; /* Index name */
- Index *pIdx; /* Pointer to the index object */
- int nSample; /* Number of samples */
- int nByte; /* Bytes of space required */
- int i; /* Bytes of space required */
- tRowcnt *pSpace;
-
- zTab = (char *)sqlite3_column_text(pStmt, 0);
- if (zTab == 0)
+ uint32_t current_idx_count = 0;
+ while (sqlite3_step(stmt) == SQLITE_ROW) {
+ const char *space_name = (char *)sqlite3_column_text(stmt, 0);
+ if (space_name == NULL)
continue;
- zIndex = (char *)sqlite3_column_text(pStmt, 1);
- if (zIndex == 0)
+ const char *index_name = (char *)sqlite3_column_text(stmt, 1);
+ if (index_name == NULL)
continue;
- nSample = sqlite3_column_int(pStmt, 2);
- pIdx = sqlite3LocateIndex(db, zIndex, zTab);
- assert(pIdx == 0 || pIdx->nSample == 0);
- /* Index.nSample is non-zero at this point if data has already been
- * loaded from the stat4 table.
+ uint32_t sample_count = sqlite3_column_int(stmt, 2);
+ uint32_t space_id = box_space_id_by_name(space_name,
+ strlen(space_name));
+ assert(space_id != BOX_ID_NIL);
+ struct space *space = space_by_id(space_id);
+ assert(space != NULL);
+ struct index *index;
+ uint32_t iid = box_index_id_by_name(space_id, index_name,
+ strlen(index_name));
+ if (sqlite3_stricmp(space_name, index_name) == 0 &&
+ iid == BOX_ID_NIL)
+ index = space_index(space, 0);
+ else
+ index = space_index(space, iid);
+ assert(index != NULL);
+ uint32_t column_count = index->def->key_def->part_count;
+ struct index_stat *stat = &stats[current_idx_count];
+ stat->sample_field_count = column_count;
+ stat->sample_count = 0;
+ /* Space for sample structs. */
+ size_t alloc_size = sizeof(struct index_sample) * sample_count;
+ /* Space for eq, lt and dlt stats. */
+ alloc_size += sizeof(tRowcnt) * column_count * 3 * sample_count;
+ /* Space for avg_eq statistics. */
+ alloc_size += column_count * sizeof(tRowcnt);
+ /*
+ * We are trying to fit into one chunk samples,
+ * eq_avg and arrays of eq, lt and dlt stats.
+ * Firstly comes memory for structs of samples,
+ * than array of eq_avg and finally arrays of
+ * eq, lt and dlt stats.
*/
- if (pIdx == 0 || pIdx->nSample)
- continue;
-
- nIdxCol = index_column_count(pIdx);
- pIdx->nSampleCol = nIdxCol;
- nByte = sizeof(IndexSample) * nSample;
- nByte += sizeof(tRowcnt) * nIdxCol * 3 * nSample;
- nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */
-
- pIdx->aSample = sqlite3DbMallocZero(db, nByte);
- if (pIdx->aSample == 0) {
- sqlite3_finalize(pStmt);
- rc = SQLITE_NOMEM_BKPT;
+ stat->samples = region_alloc(&fiber()->gc, alloc_size);
+ if (stat->samples == NULL) {
+ diag_set(OutOfMemory, alloc_size, "region", "samples");
+ rc = -1;
goto finalize;
}
- pSpace = (tRowcnt *) & pIdx->aSample[nSample];
- pIdx->aAvgEq = pSpace;
- pSpace += nIdxCol;
- for (i = 0; i < nSample; i++) {
- pIdx->aSample[i].anEq = pSpace;
- pSpace += nIdxCol;
- pIdx->aSample[i].anLt = pSpace;
- pSpace += nIdxCol;
- pIdx->aSample[i].anDLt = pSpace;
- pSpace += nIdxCol;
+ memset(stat->samples, 0, alloc_size);
+ /* Marking memory manually. */
+ tRowcnt *pSpace = (tRowcnt *) & stat->samples[sample_count];
+ stat->avg_eq = pSpace;
+ pSpace += column_count;
+ for (uint32_t i = 0; i < sample_count; i++) {
+ stat->samples[i].eq = pSpace;
+ pSpace += column_count;
+ stat->samples[i].lt = pSpace;
+ pSpace += column_count;
+ stat->samples[i].dlt = pSpace;
+ pSpace += column_count;
}
- assert(((u8 *) pSpace) - nByte == (u8 *) (pIdx->aSample));
+ assert(((u8 *) pSpace) - alloc_size == (u8 *) (stat->samples));
+ indexes[current_idx_count] = index;
+ assert(current_idx_count < index_count);
+ current_idx_count++;
- aIndex[nIdx] = pIdx;
- assert(nIdx < nIdxCnt);
- ++nIdx;
}
- rc = sqlite3_finalize(pStmt);
+ rc = sqlite3_finalize(stmt);
if (rc)
goto finalize;
-
- rc = sqlite3_prepare(db, zSql2, -1, &pStmt, 0);
+ rc = sqlite3_prepare(db, sql_select_load, -1, &stmt, 0);
if (rc)
goto finalize;
-
- while (sqlite3_step(pStmt) == SQLITE_ROW) {
- char *zTab; /* Table name */
- char *zIndex; /* Index name */
- Index *pIdx; /* Pointer to the index object */
- int nCol = 1; /* Number of columns in index */
-
- zTab = (char *)sqlite3_column_text(pStmt, 0);
- if (zTab == 0)
+ struct index *prev_index = NULL;
+ current_idx_count = 0;
+ while (sqlite3_step(stmt) == SQLITE_ROW) {
+ const char *space_name = (char *)sqlite3_column_text(stmt, 0);
+ if (space_name == NULL)
continue;
- zIndex = (char *)sqlite3_column_text(pStmt, 1);
- if (zIndex == 0)
+ const char *index_name = (char *)sqlite3_column_text(stmt, 1);
+ if (index_name == NULL)
continue;
- pIdx = sqlite3LocateIndex(db, zIndex, zTab);
- if (pIdx == 0)
- continue;
-
- nCol = pIdx->nSampleCol;
- if (pIdx != pPrevIdx) {
- initAvgEq(pPrevIdx);
- pPrevIdx = pIdx;
+ uint32_t space_id = box_space_id_by_name(space_name,
+ strlen(space_name));
+ assert(space_id != BOX_ID_NIL);
+ struct space *space = space_by_id(space_id);
+ assert(space != NULL);
+ struct index *index;
+ uint32_t iid = box_index_id_by_name(space_id, index_name,
+ strlen(index_name));
+ if (iid != BOX_ID_NIL) {
+ index = space_index(space, iid);
+ } else {
+ if (sqlite3_stricmp(space_name, index_name) != 0)
+ return -1;
+ index = space_index(space, 0);
+ }
+ assert(index != NULL);
+ uint32_t column_count = index->def->key_def->part_count;
+ if (index != prev_index) {
+ if (prev_index != NULL) {
+ init_avg_eq(prev_index,
+ &stats[current_idx_count]);
+ current_idx_count++;
+ }
+ prev_index = index;
}
- pSample = &pIdx->aSample[pIdx->nSample];
- decodeIntArray((char *)sqlite3_column_text(pStmt, 2), nCol,
- pSample->anEq, 0, 0);
- decodeIntArray((char *)sqlite3_column_text(pStmt, 3), nCol,
- pSample->anLt, 0, 0);
- decodeIntArray((char *)sqlite3_column_text(pStmt, 4), nCol,
- pSample->anDLt, 0, 0);
-
- /* Take a copy of the sample. Add two 0x00 bytes the end of the buffer.
- * This is in case the sample record is corrupted. In that case, the
- * sqlite3VdbeRecordCompare() may read up to two varints past the
- * end of the allocated buffer before it realizes it is dealing with
- * a corrupt record. Adding the two 0x00 bytes prevents this from causing
- * a buffer overread.
+ struct index_stat *stat = &stats[current_idx_count];
+ struct index_sample *sample =
+ &stat->samples[stats[current_idx_count].sample_count];
+ decode_stat_string((char *)sqlite3_column_text(stmt, 2),
+ column_count, sample->eq, 0);
+ decode_stat_string((char *)sqlite3_column_text(stmt, 3),
+ column_count, sample->lt, 0);
+ decode_stat_string((char *)sqlite3_column_text(stmt, 4),
+ column_count, sample->dlt, 0);
+ /*
+ * Take a copy of the sample. Add two 0x00 bytes
+ * the end of the buffer. This is in case the
+ * sample record is corrupted. In that case, the
+ * sqlite3VdbeRecordCompare() may read up to two
+ * varints past the end of the allocated buffer
+ * before it realizes it is dealing with a corrupt
+ * record. Adding the two 0x00 bytes prevents this
+ * from causing a buffer overread.
*/
- pSample->n = sqlite3_column_bytes(pStmt, 5);
- pSample->p = sqlite3DbMallocZero(db, pSample->n + 2);
- if (pSample->p == 0) {
- sqlite3_finalize(pStmt);
- rc = SQLITE_NOMEM_BKPT;
+ sample->key_size = sqlite3_column_bytes(stmt, 5);
+ sample->sample_key = region_alloc(&fiber()->gc,
+ sample->key_size + 2);
+ if (sample->sample_key == NULL) {
+ sqlite3_finalize(stmt);
+ rc = -1;
+ diag_set(OutOfMemory, sample->key_size + 2,
+ "region", "sample_key");
goto finalize;
}
- if (pSample->n) {
- memcpy(pSample->p, sqlite3_column_blob(pStmt, 5),
- pSample->n);
+ if (sample->key_size > 0) {
+ memcpy(sample->sample_key,
+ sqlite3_column_blob(stmt, 5),
+ sample->key_size);
}
- pIdx->nSample++;
+ stats[current_idx_count].sample_count++;
}
- rc = sqlite3_finalize(pStmt);
- if (rc == SQLITE_OK)
- initAvgEq(pPrevIdx);
-
- assert(nIdx <= nIdxCnt);
- for (int i = 0; i < nIdx; ++i) {
- Index *pIdx = aIndex[i];
- assert(pIdx);
- /* - sid, iid
- * - space_index_key_def
- * - key_compare
- */
- int sid = SQLITE_PAGENO_TO_SPACEID(pIdx->tnum);
- int iid = SQLITE_PAGENO_TO_INDEXID(pIdx->tnum);
- struct space *s = space_by_id(sid);
- assert(s);
- struct key_def *def = space_index_key_def(s, iid);
- assert(def);
- qsort_arg(pIdx->aSample,
- pIdx->nSample,
- sizeof(pIdx->aSample[0]), sampleCompareMsgPack, def);
+ rc = sqlite3_finalize(stmt);
+ if (rc == SQLITE_OK && prev_index != NULL)
+ init_avg_eq(prev_index, &stats[current_idx_count]);
+ assert(current_idx_count <= index_count);
+ for (uint32_t i = 0; i < current_idx_count; ++i) {
+ struct index *index = indexes[i];
+ assert(index != NULL);
+ qsort_arg(stats[i].samples,
+ stats[i].sample_count,
+ sizeof(struct index_sample),
+ sample_compare, index->def->key_def);
}
-
finalize:
- sqlite3DbFree(db, aIndex);
return rc;
}
-/*
- * Load content from the _sql_stat4 table into
- * the Index.aSample[] arrays of all indices.
- */
static int
-loadStat4(sqlite3 * db)
+load_stat_to_index(struct sqlite3 *db, const char *sql_select_load,
+ struct index_stat **stats)
{
- Table *pTab = 0; /* Pointer to stat table */
-
- assert(db->lookaside.bDisable);
- pTab = sqlite3HashFind(&db->pSchema->tblHash, "_sql_stat4");
- /* _slq_stat4 is a system space, so it always exists. */
- assert(pTab != NULL);
- return loadStatTbl(db, pTab,
- "SELECT \"tbl\",\"idx\",count(*) FROM \"_sql_stat4\""
- " GROUP BY \"tbl\",\"idx\"",
- "SELECT \"tbl\",\"idx\",\"neq\",\"nlt\",\"ndlt\","
- "\"sample\" FROM \"_sql_stat4\"");
+ assert(stats != NULL && *stats != NULL);
+ struct sqlite3_stmt *stmt = NULL;
+ if (sqlite3_prepare(db, sql_select_load, -1, &stmt, 0) != 0)
+ return -1;
+ uint32_t current_idx_count = 0;
+ while (sqlite3_step(stmt) == SQLITE_ROW) {
+ const char *space_name = (char *)sqlite3_column_text(stmt, 0);
+ if (space_name == NULL)
+ continue;
+ const char *index_name = (char *)sqlite3_column_text(stmt, 1);
+ if (index_name == NULL)
+ continue;
+ uint32_t space_id = box_space_id_by_name(space_name,
+ strlen(space_name));
+ if (space_id == BOX_ID_NIL)
+ return -1;
+ struct space *space = space_by_id(space_id);
+ assert(space != NULL);
+ struct index *index;
+ uint32_t iid = box_index_id_by_name(space_id, index_name,
+ strlen(index_name));
+ if (iid != BOX_ID_NIL) {
+ index = space_index(space, iid);
+ } else {
+ if (sqlite3_stricmp(space_name, index_name) != 0)
+ return -1;
+ index = space_index(space, 0);
+ }
+ assert(index != NULL);
+ free(index->def->opts.stat);
+ index->def->opts.stat = stats[current_idx_count++];
+ }
+ return 0;
}
+/**
+ * default_tuple_est[] array contains default information
+ * which is used when we don't have real space, e.g. temporary
+ * objects representing result set of nested SELECT or VIEW.
+ *
+ * First number is supposed to contain the number of elements
+ * in the index. Since we do not know, guess 1 million.
+ * Second one is an estimate of the number of rows in the
+ * table that match any particular value of the first column of
+ * the index. Third one is an estimate of the number of
+ * rows that match any particular combination of the first 2
+ * columns of the index. And so on. It must always be true:
+ *
+ * default_tuple_est[N] <= default_tuple_est[N-1]
+ * default_tuple_est[N] >= 1
+ *
+ * Apart from that, we have little to go on besides intuition
+ * as to how default values should be initialized. The numbers
+ * generated here are based on typical values found in actual
+ * indices.
+ */
+const log_est_t default_tuple_est[] = {DEFAULT_TUPLE_LOG_COUNT,
+/** [10*log_{2}(x)]: 10, 9, 8, 7, 6, 5 */
+ 33, 32, 30, 28, 26, 23};
+
LogEst
sql_space_tuple_log_count(struct Table *tab)
{
@@ -1643,84 +1674,177 @@ sql_space_tuple_log_count(struct Table *tab)
return sqlite3LogEst(pk->vtab->size(pk));
}
-/*
- * Load the content of the _sql_stat1 and sql_stat4 tables. The
- * contents of _sql_stat1 are used to populate the Index.aiRowEst[]
-*\* arrays. The contents of sql_stat4 are used to populate the
- * Index.aSample[] arrays.
- *
- * If the _sql_stat1 table is not present in the database, SQLITE_ERROR
- * is returned. In this case, even if the sql_stat4 table is present,
- * no data is read from it.
- *
- * If the _sql_stat4 table is not present in the database, SQLITE_ERROR
- * is returned. However, in this case, data is read from the _sql_stat1
- * table (if it is present) before returning.
+log_est_t
+index_field_tuple_est(struct Index *idx, uint32_t field)
+{
+ struct space *space = space_by_id(SQLITE_PAGENO_TO_SPACEID(idx->tnum));
+ if (space == NULL)
+ return idx->aiRowLogEst[field];
+ struct index *tnt_idx =
+ space_index(space, SQLITE_PAGENO_TO_INDEXID(idx->tnum));
+ assert(tnt_idx != NULL);
+ assert(field <= tnt_idx->def->key_def->part_count);
+ if (tnt_idx->def->opts.stat == NULL) {
+ /*
+ * Last number for unique index is always 0:
+ * only one tuple exists with given full key
+ * in unique index and log(1) == 0.
+ */
+ if (field == tnt_idx->def->key_def->part_count &&
+ tnt_idx->def->opts.is_unique)
+ return 0;
+ return default_tuple_est[field + 1 >= 6 ? 6 : field];
+ }
+ return tnt_idx->def->opts.stat->tuple_log_est[field];
+}
+
+/**
+ * This function performs copy of statistics.
+ * In contrast to index_stat_dup(), there is no assumption
+ * that source statistics is allocated within chunk. But
+ * destination place is still one big chunk of heap memory.
+ * See also index_stat_sizeof() for understanding memory layout.
*
- * If an OOM error occurs, this function always sets db->mallocFailed.
- * This means if the caller does not care about other errors, the return
- * code may be ignored.
+ * @param dest One chunk of memory where statistics
+ * will be placed.
+ * @param src Statistics to be copied.
*/
-int
-sqlite3AnalysisLoad(sqlite3 * db)
+static void
+stat_copy(struct index_stat *dest, const struct index_stat *src)
{
- analysisInfo sInfo;
- HashElem *i, *j;
- char *zSql;
- int rc = SQLITE_OK;
-
- /* Clear any prior statistics */
- for (j = sqliteHashFirst(&db->pSchema->tblHash); j;
- j = sqliteHashNext(j)) {
- Table *pTab = sqliteHashData(j);
- for (i = sqliteHashFirst(&pTab->idxHash); i;
- i = sqliteHashNext(i)) {
- Index *pIdx = sqliteHashData(i);
- pIdx->aiRowLogEst[0] = 0;
- sqlite3DeleteIndexSamples(db, pIdx);
- pIdx->aSample = 0;
- }
+ assert(dest != NULL);
+ assert(src != NULL);
+ assert(src->sample_count != 0);
+ dest->sample_count = src->sample_count;
+ dest->sample_field_count = src->sample_field_count;
+ dest->skip_scan_enabled = src->skip_scan_enabled;
+ dest->is_unordered = src->is_unordered;
+ uint32_t array_size = src->sample_field_count * sizeof(uint32_t);
+ uint32_t stat1_offset = sizeof(struct index_stat);
+ memcpy((char *)dest + stat1_offset, src->tuple_stat1,
+ array_size + sizeof(uint32_t));
+ memcpy((char *)dest + stat1_offset + array_size + sizeof(uint32_t),
+ src->tuple_log_est, array_size + sizeof(uint32_t));
+ memcpy((char *)dest + stat1_offset + 2 * (array_size + sizeof(uint32_t)),
+ src->avg_eq, array_size);
+ dest->tuple_stat1 = (uint32_t *)((char *)dest + stat1_offset);
+ dest->tuple_log_est = (log_est_t *)((char *)dest + stat1_offset +
+ array_size + sizeof(uint32_t));
+ dest->avg_eq = (uint32_t *)((char *)dest + stat1_offset +
+ 2 * (array_size + sizeof(uint32_t)));
+ uint32_t samples_offset = stat1_offset + 3 * array_size +
+ 2 * sizeof(uint32_t);
+ dest->samples = (struct index_sample *)((char*)dest + samples_offset);
+ uint32_t current_sample_offset = samples_offset + dest->sample_count *
+ sizeof(struct index_sample);
+ assert(src->samples != NULL);
+ for (uint32_t i = 0; i < dest->sample_count; ++i) {
+ dest->samples[i].key_size = src->samples[i].key_size;
+ memcpy((char *)dest + current_sample_offset, src->samples[i].eq,
+ array_size);
+ dest->samples[i].eq = (uint32_t *)((char *)dest +
+ current_sample_offset);
+ current_sample_offset += array_size;
+ memcpy((char *)dest + current_sample_offset, src->samples[i].lt,
+ array_size);
+ dest->samples[i].lt = (uint32_t *)((char *)dest +
+ current_sample_offset);
+ current_sample_offset += array_size;
+ memcpy((char *)dest + current_sample_offset,
+ src->samples[i].dlt, array_size);
+ dest->samples[i].dlt = (uint32_t *)((char *)dest +
+ current_sample_offset);
+ current_sample_offset += array_size;
+ memcpy((char *)dest + current_sample_offset,
+ src->samples[i].sample_key,
+ src->samples[i].key_size + 2);
+ dest->samples[i].sample_key = (char *)dest +
+ current_sample_offset;
+ current_sample_offset += dest->samples[i].key_size + 2;
}
+}
- /* Load new statistics out of the _sql_stat1 table */
- sInfo.db = db;
- zSql = "SELECT \"tbl\",\"idx\",\"stat\" FROM \"_sql_stat1\"";
- rc = sqlite3_exec(db, zSql, analysisLoader, &sInfo, 0);
-
- /* Set appropriate defaults on all indexes not in the _sql_stat1 table */
- for (j = sqliteHashFirst(&db->pSchema->tblHash); j;
- j = sqliteHashNext(j)) {
- Table *pTab = sqliteHashData(j);
- for (i = sqliteHashFirst(&pTab->idxHash); i;
- i = sqliteHashNext(i)) {
- Index *pIdx = sqliteHashData(i);
- if (pIdx->aiRowLogEst[0] == 0)
- sqlite3DefaultRowEst(pIdx);
- }
+int
+sql_analysis_load(struct sqlite3 *db)
+{
+ uint32_t index_count = box_index_len(BOX_SQL_STAT4_ID, 0);
+ if (box_txn_begin() != 0)
+ goto fail;
+ size_t stats_size = index_count * sizeof(struct index_stat);
+ struct index_stat *stats = region_alloc(&fiber()->gc, stats_size);
+ if (stats == NULL) {
+ diag_set(OutOfMemory, stats_size, "region", "stats");
+ goto fail;
}
-
+ struct analysis_index_info info;
+ info.db = db;
+ info.stats = stats;
+ info.index_count = 0;
+ const char *load_stat1 =
+ "SELECT \"tbl\",\"idx\",\"stat\" FROM \"_sql_stat1\"";
+ /* Load new statistics out of the _sql_stat1 table. */
+ if (sqlite3_exec(db, load_stat1, analysis_loader, &info, 0) != 0)
+ goto fail;
+ /*
+ * This query is used to allocate enough memory for
+ * statistics. Result rows are given in a form:
+ * <table name>, <index name>, <count of samples>
+ */
+ const char *init_query = "SELECT \"tbl\",\"idx\",count(*) FROM "
+ "\"_sql_stat4\" GROUP BY \"tbl\",\"idx\"";
+ /* Query for loading statistics into in-memory structs. */
+ const char *load_query = "SELECT \"tbl\",\"idx\",\"neq\",\"nlt\","
+ "\"ndlt\",\"sample\" FROM \"_sql_stat4\"";
/* Load the statistics from the _sql_stat4 table. */
- if (rc == SQLITE_OK && OptimizationEnabled(db, SQLITE_Stat4)) {
- db->lookaside.bDisable++;
- rc = loadStat4(db);
- db->lookaside.bDisable--;
+ if (load_stat_from_space(db, init_query, load_query, stats) != 0)
+ goto fail;
+ /*
+ * Now we have complete statistics for each index
+ * allocated on the region. Time to copy it on the heap.
+ */
+ size_t heap_stats_size = info.index_count * sizeof(struct index_stat *);
+ struct index_stat **heap_stats = region_alloc(&fiber()->gc,
+ heap_stats_size);
+ if (heap_stats == NULL) {
+ diag_set(OutOfMemory, heap_stats_size, "malloc", "heap_stats");
+ goto fail;
}
-
- for (j = sqliteHashFirst(&db->pSchema->tblHash); j;
- j = sqliteHashNext(j)) {
- Table *pTab = sqliteHashData(j);
- for (i = sqliteHashFirst(&pTab->idxHash); i;
- i = sqliteHashNext(i)) {
- Index *pIdx = sqliteHashData(i);
- sqlite3_free(pIdx->aiRowEst);
- pIdx->aiRowEst = 0;
+ /*
+ * We are using 'everything or nothing' policy:
+ * if there is no enough memory for statistics even for
+ * one index, then refresh it for no one.
+ */
+ for (uint32_t i = 0; i < info.index_count; ++i) {
+ size_t size = index_stat_sizeof(stats[i].samples,
+ stats[i].sample_count,
+ stats[i].sample_field_count);
+ heap_stats[i] = malloc(size);
+ if (heap_stats[i] == NULL) {
+ diag_set(OutOfMemory, size, "malloc", "heap_stats");
+ for (uint32_t j = 0; j < i; ++j)
+ free(heap_stats[j]);
+ goto fail;
}
}
-
- if (rc == SQLITE_NOMEM) {
- sqlite3OomFault(db);
- }
- return rc;
+ /*
+ * We can't use stat_dup function since statistics on
+ * region doesn't fit into one memory chunk. Lets
+ * manually copy memory chunks and mark memory.
+ */
+ for (uint32_t i = 0; i < info.index_count; ++i)
+ stat_copy(heap_stats[i], &stats[i]);
+ /*
+ * Ordered query is needed to be sure that indexes come
+ * in the same order as in previous SELECTs.
+ */
+ const char *order_query = "SELECT \"tbl\",\"idx\" FROM "
+ "\"_sql_stat4\" GROUP BY \"tbl\",\"idx\"";
+ if (load_stat_to_index(db, order_query, heap_stats) != 0)
+ goto fail;
+ if (box_txn_commit() != 0)
+ return SQL_TARANTOOL_ERROR;
+ return SQLITE_OK;
+fail:
+ box_txn_rollback();
+ return SQL_TARANTOOL_ERROR;
}
-
-#endif /* SQLITE_OMIT_ANALYZE */
diff --git a/src/box/sql/build.c b/src/box/sql/build.c
index f860e331e..034a8928a 100644
--- a/src/box/sql/build.c
+++ b/src/box/sql/build.c
@@ -214,13 +214,9 @@ sqlite3LocateIndex(sqlite3 * db, const char *zName, const char *zTable)
static void
freeIndex(sqlite3 * db, Index * p)
{
-#ifndef SQLITE_OMIT_ANALYZE
- sqlite3DeleteIndexSamples(db, p);
-#endif
sql_expr_free(db, p->pPartIdxWhere, false);
sqlite3ExprListDelete(db, p->aColExpr);
sqlite3DbFree(db, p->zColAff);
- sqlite3_free(p->aiRowEst);
sqlite3DbFree(db, p);
}
@@ -3034,9 +3030,6 @@ sqlite3CreateIndex(Parse * pParse, /* All information about this parse */
requestedSortOrder = pListItem->sortOrder & 0;
pIndex->aSortOrder[i] = (u8) requestedSortOrder;
}
-
- sqlite3DefaultRowEst(pIndex);
-
if (pTab == pParse->pNewTable) {
/* This routine has been called to create an automatic index as a
* result of a PRIMARY KEY or UNIQUE clause on a column definition, or
@@ -3213,60 +3206,6 @@ sqlite3CreateIndex(Parse * pParse, /* All information about this parse */
sqlite3DbFree(db, zName);
}
-/*
- * Fill the Index.aiRowEst[] array with default information - information
- * to be used when we have not run the ANALYZE command.
- *
- * aiRowEst[0] is supposed to contain the number of elements in the index.
- * Since we do not know, guess 1 million. aiRowEst[1] is an estimate of the
- * number of rows in the table that match any particular value of the
- * first column of the index. aiRowEst[2] is an estimate of the number
- * of rows that match any particular combination of the first 2 columns
- * of the index. And so forth. It must always be the case that
-*
- * aiRowEst[N]<=aiRowEst[N-1]
- * aiRowEst[N]>=1
- *
- * Apart from that, we have little to go on besides intuition as to
- * how aiRowEst[] should be initialized. The numbers generated here
- * are based on typical values found in actual indices.
- */
-void
-sqlite3DefaultRowEst(Index * pIdx)
-{
- /* 10, 9, 8, 7, 6 */
- LogEst aVal[] = { 33, 32, 30, 28, 26 };
- LogEst *a = pIdx->aiRowLogEst;
- int nCopy = MIN(ArraySize(aVal), pIdx->nColumn);
- int i;
-
- /* Set the first entry (number of rows in the index) to the estimated
- * number of rows in the table, or half the number of rows in the table
- * for a partial index. But do not let the estimate drop below 10.
- */
- a[0] = pIdx->pTable->tuple_log_count;
- if (pIdx->pPartIdxWhere != 0)
- a[0] -= 10;
- assert(10 == sqlite3LogEst(2));
- if (a[0] < 33)
- a[0] = 33;
- assert(33 == sqlite3LogEst(10));
-
- /* Estimate that a[1] is 10, a[2] is 9, a[3] is 8, a[4] is 7, a[5] is
- * 6 and each subsequent value (if any) is 5.
- */
- memcpy(&a[1], aVal, nCopy * sizeof(LogEst));
- int col_count = index_column_count(pIdx);
- for (i = nCopy + 1; i <= col_count; i++) {
- a[i] = 23;
- assert(23 == sqlite3LogEst(5));
- }
-
- assert(0 == sqlite3LogEst(1));
- if (IsUniqueIndex(pIdx))
- a[pIdx->nColumn] = 0;
-}
-
/**
* Return number of columns in given index.
* If space is ephemeral, use internal
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index bffd065fb..059e5d037 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -1484,10 +1484,8 @@ cmd ::= DROP TRIGGER ifexists(NOERR) fullname(X). {
/* %endif SQLITE_OMIT_REINDEX */
/////////////////////////////////// ANALYZE ///////////////////////////////////
-%ifndef SQLITE_OMIT_ANALYZE
cmd ::= ANALYZE. {sqlite3Analyze(pParse, 0);}
cmd ::= ANALYZE nm(X). {sqlite3Analyze(pParse, &X);}
-%endif
//////////////////////// ALTER TABLE table ... ////////////////////////////////
%ifndef SQLITE_OMIT_ALTERTABLE
diff --git a/src/box/sql/pragma.c b/src/box/sql/pragma.c
index 117d5ba88..f9cf4e89a 100644
--- a/src/box/sql/pragma.c
+++ b/src/box/sql/pragma.c
@@ -425,8 +425,7 @@ sqlite3Pragma(Parse * pParse, Token * pId, /* First part of [schema.]id field */
sqlite3VdbeMultiLoad(v, 2, "sii",
pIdx->zName,
avg_tuple_size_idx,
- pIdx->
- aiRowLogEst[0]);
+ index_field_tuple_est(pIdx, 0));
sqlite3VdbeAddOp2(v, OP_ResultRow, 1,
4);
}
diff --git a/src/box/sql/prepare.c b/src/box/sql/prepare.c
index 2ab8751a4..322d51e09 100644
--- a/src/box/sql/prepare.c
+++ b/src/box/sql/prepare.c
@@ -186,11 +186,8 @@ sqlite3InitDatabase(sqlite3 * db)
assert(db->init.busy);
{
rc = initData.rc;
-#ifndef SQLITE_OMIT_ANALYZE
- if (rc == SQLITE_OK) {
- sqlite3AnalysisLoad(db);
- }
-#endif
+ if (rc == SQLITE_OK)
+ sql_analysis_load(db);
}
if (db->mallocFailed) {
rc = SQLITE_NOMEM_BKPT;
diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h
index 973ae1a44..d0f6926c1 100644
--- a/src/box/sql/sqliteInt.h
+++ b/src/box/sql/sqliteInt.h
@@ -1460,7 +1460,6 @@ typedef struct FuncDef FuncDef;
typedef struct FuncDefHash FuncDefHash;
typedef struct IdList IdList;
typedef struct Index Index;
-typedef struct IndexSample IndexSample;
typedef struct KeyClass KeyClass;
typedef struct KeyInfo KeyInfo;
typedef struct Lookaside Lookaside;
@@ -1682,7 +1681,6 @@ struct sqlite3 {
#define SQLITE_SubqCoroutine 0x0100 /* Evaluate subqueries as coroutines */
#define SQLITE_Transitive 0x0200 /* Transitive constraints */
#define SQLITE_OmitNoopJoin 0x0400 /* Omit unused tables in joins */
-#define SQLITE_Stat4 0x0800 /* Use STAT4 data */
#define SQLITE_CursorHints 0x2000 /* Add OP_CursorHint opcodes */
#define SQLITE_AllOpts 0xffff /* All optimizations */
@@ -2163,15 +2161,30 @@ struct Index {
* or _NONE
*/
unsigned idxType:2; /* 1==UNIQUE, 2==PRIMARY KEY, 0==CREATE INDEX */
- unsigned bUnordered:1; /* Use this index for == or IN queries only */
- unsigned noSkipScan:1; /* Do not try to use skip-scan if true */
- int nSample; /* Number of elements in aSample[] */
- int nSampleCol; /* Size of IndexSample.anEq[] and so on */
- tRowcnt *aAvgEq; /* Average nEq values for keys not in aSample */
- IndexSample *aSample; /* Samples of the left-most key */
- tRowcnt *aiRowEst; /* Non-logarithmic stat1 data for this index */
};
+/**
+ * Fetch statistics concerning tuples to be selected:
+ * logarithm of number of tuples which has the same key as for
+ * the first key parts specified by second argument.
+ * Simple example (without logarithms):
+ * idx = {{1,2}, {1,2}, {2, 4}, {2, 3}, {3, 5}}
+ * stat[1] = (2 + 2 + 1) / 3 = 2 - two keys which start from 1,
+ * two - from 2, and one from 3.
+ * stat[2] = (2 + 1 + 1 + 1) / 4 = 1 - only one coincidence of
+ * keys: {1, 2}; the rest are different.
+ * Notice, that stat[0] is an average number of tuples in a whole
+ * index. By default it is DEFAULT_TUPLE_LOG_COUNT == 200.
+ * If there is no appropriate Tarantool's index,
+ * return one of default values.
+ *
+ * @param idx Index.
+ * @param field Number of field to be examined.
+ * @retval Estimate logarithm of tuples selected by given field.
+ */
+log_est_t
+index_field_tuple_est(struct Index *idx, uint32_t field);
+
/*
* Allowed values for Index.idxType
*/
@@ -2191,19 +2204,9 @@ struct Index {
*/
#define XN_EXPR (-2) /* Indexed column is an expression */
-/*
- * Each sample stored in the sql_stat4 table is represented in memory
- * using a structure of this type. See documentation at the top of the
- * analyze.c source file for additional information.
- */
-struct IndexSample {
- void *p; /* Pointer to sampled record */
- int n; /* Size of record in bytes */
- tRowcnt *anEq; /* Est. number of rows where the key equals this sample */
- tRowcnt *anLt; /* Est. number of rows where key is less than this sample */
- tRowcnt *anDLt; /* Est. number of distinct keys less than this sample */
-};
-
+#ifdef DEFAULT_TUPLE_COUNT
+#undef DEFAULT_TUPLE_COUNT
+#endif
#define DEFAULT_TUPLE_COUNT 1048576
/** [10*log_{2}(1048576)] == 200 */
#define DEFAULT_TUPLE_LOG_COUNT 200
@@ -3960,9 +3963,19 @@ ssize_t
sql_index_tuple_size(struct space *space, struct index *idx);
int sqlite3InvokeBusyHandler(BusyHandler *);
-int sqlite3AnalysisLoad(sqlite3 *);
-void sqlite3DeleteIndexSamples(sqlite3 *, Index *);
-void sqlite3DefaultRowEst(Index *);
+
+/**
+ * Load the content of the _sql_stat1 and sql_stat4 tables. The
+ * contents of _sql_stat1 are used to populate the tuple_stat1[]
+ * arrays. The contents of sql_stat4 are used to populate the
+ * samples[] arrays.
+ *
+ * @param db Database handler.
+ * @retval SQLITE_OK on success, smth else otherwise.
+ */
+int
+sql_analysis_load(struct sqlite3 *db);
+
uint32_t
index_column_count(const Index *);
bool
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index 96f9d4063..2adc17ed2 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -4830,7 +4830,7 @@ case OP_RenameTable: {
sqlite3DbFree(db, (void*)zSqlStmt);
break;
}
-#if !defined(SQLITE_OMIT_ANALYZE)
+
/* Opcode: LoadAnalysis P1 * * * *
*
* Read the sql_stat1 table for database P1 and load the content
@@ -4839,11 +4839,10 @@ case OP_RenameTable: {
*/
case OP_LoadAnalysis: {
assert(pOp->p1==0 );
- rc = sqlite3AnalysisLoad(db);
+ rc = sql_analysis_load(db);
if (rc) goto abort_due_to_error;
break;
}
-#endif /* !defined(SQLITE_OMIT_ANALYZE) */
/* Opcode: DropTable P1 * * P4 *
*
diff --git a/src/box/sql/where.c b/src/box/sql/where.c
index 39963b10e..e62fedddc 100644
--- a/src/box/sql/where.c
+++ b/src/box/sql/where.c
@@ -888,7 +888,16 @@ whereKeyStats(Parse * pParse, /* Database connection */
int roundUp, /* Round up if true. Round down if false */
tRowcnt * aStat) /* OUT: stats written here */
{
- IndexSample *aSample = pIdx->aSample;
+ uint32_t space_id = SQLITE_PAGENO_TO_SPACEID(pIdx->tnum);
+ struct space *space = space_by_id(space_id);
+ assert(space != NULL);
+ uint32_t iid = SQLITE_PAGENO_TO_INDEXID(pIdx->tnum);
+ struct index *idx = space_index(space, iid);
+ assert(idx != NULL && idx->def->opts.stat != NULL);
+ struct index_sample *samples = idx->def->opts.stat->samples;
+ assert(idx->def->opts.stat->sample_count > 0);
+ assert(idx->def->opts.stat->samples != NULL);
+ assert(idx->def->opts.stat->sample_field_count >= pRec->nField);
int iCol; /* Index of required stats in anEq[] etc. */
int i; /* Index of first sample >= pRec */
int iSample; /* Smallest sample larger than or equal to pRec */
@@ -902,8 +911,7 @@ whereKeyStats(Parse * pParse, /* Database connection */
UNUSED_PARAMETER(pParse);
#endif
assert(pRec != 0);
- assert(pIdx->nSample > 0);
- assert(pRec->nField > 0 && pRec->nField <= pIdx->nSampleCol);
+ assert(pRec->nField > 0);
/* Do a binary search to find the first sample greater than or equal
* to pRec. If pRec contains a single field, the set of samples to search
@@ -951,7 +959,8 @@ whereKeyStats(Parse * pParse, /* Database connection */
*/
nField = pRec->nField;
iCol = 0;
- iSample = pIdx->nSample * nField;
+ uint32_t sample_count = idx->def->opts.stat->sample_count;
+ iSample = sample_count * nField;
do {
int iSamp; /* Index in aSample[] of test sample */
int n; /* Number of fields in test sample */
@@ -964,8 +973,8 @@ whereKeyStats(Parse * pParse, /* Database connection */
* fields that is greater than the previous effective sample.
*/
for (n = (iTest % nField) + 1; n < nField; n++) {
- if (aSample[iSamp - 1].anLt[n - 1] !=
- aSample[iSamp].anLt[n - 1])
+ if (samples[iSamp - 1].lt[n - 1] !=
+ samples[iSamp].lt[n - 1])
break;
}
} else {
@@ -974,15 +983,15 @@ whereKeyStats(Parse * pParse, /* Database connection */
pRec->nField = n;
res =
- sqlite3VdbeRecordCompareMsgpack(aSample[iSamp].n,
- aSample[iSamp].p, pRec);
+ sqlite3VdbeRecordCompareMsgpack(samples[iSamp].key_size,
+ samples[iSamp].sample_key,
+ pRec);
if (res < 0) {
iLower =
- aSample[iSamp].anLt[n - 1] + aSample[iSamp].anEq[n -
- 1];
+ samples[iSamp].lt[n - 1] + samples[iSamp].eq[n - 1];
iMin = iTest + 1;
} else if (res == 0 && n < nField) {
- iLower = aSample[iSamp].anLt[n - 1];
+ iLower = samples[iSamp].lt[n - 1];
iMin = iTest + 1;
res = -1;
} else {
@@ -1000,12 +1009,12 @@ whereKeyStats(Parse * pParse, /* Database connection */
if (pParse->db->mallocFailed == 0) {
if (res == 0) {
/* If (res==0) is true, then pRec must be equal to sample i. */
- assert(i < pIdx->nSample);
+ assert(i < (int) sample_count);
assert(iCol == nField - 1);
pRec->nField = nField;
assert(0 ==
- sqlite3VdbeRecordCompareMsgpack(aSample[i].n,
- aSample[i].p,
+ sqlite3VdbeRecordCompareMsgpack(samples[i].key_size,
+ samples[i].sample_key,
pRec)
|| pParse->db->mallocFailed);
} else {
@@ -1013,12 +1022,12 @@ whereKeyStats(Parse * pParse, /* Database connection */
* all samples in the aSample[] array, pRec must be smaller than the
* (iCol+1) field prefix of sample i.
*/
- assert(i <= pIdx->nSample && i >= 0);
+ assert(i <= (int) sample_count && i >= 0);
pRec->nField = iCol + 1;
- assert(i == pIdx->nSample
- || sqlite3VdbeRecordCompareMsgpack(aSample[i].n,
- aSample[i].p,
- pRec) > 0
+ assert(i == (int) sample_count ||
+ sqlite3VdbeRecordCompareMsgpack(samples[i].key_size,
+ samples[i].sample_key,
+ pRec) > 0
|| pParse->db->mallocFailed);
/* if i==0 and iCol==0, then record pRec is smaller than all samples
@@ -1029,13 +1038,15 @@ whereKeyStats(Parse * pParse, /* Database connection */
if (iCol > 0) {
pRec->nField = iCol;
assert(sqlite3VdbeRecordCompareMsgpack
- (aSample[i].n, aSample[i].p, pRec) <= 0
+ (samples[i].key_size,
+ samples[i].sample_key, pRec) <= 0
|| pParse->db->mallocFailed);
}
if (i > 0) {
pRec->nField = nField;
assert(sqlite3VdbeRecordCompareMsgpack
- (aSample[i - 1].n, aSample[i - 1].p,
+ (samples[i - 1].key_size,
+ samples[i - 1].sample_key,
pRec) < 0 || pParse->db->mallocFailed);
}
}
@@ -1045,18 +1056,18 @@ whereKeyStats(Parse * pParse, /* Database connection */
if (res == 0) {
/* Record pRec is equal to sample i */
assert(iCol == nField - 1);
- aStat[0] = aSample[i].anLt[iCol];
- aStat[1] = aSample[i].anEq[iCol];
+ aStat[0] = samples[i].lt[iCol];
+ aStat[1] = samples[i].eq[iCol];
} else {
/* At this point, the (iCol+1) field prefix of aSample[i] is the first
* sample that is greater than pRec. Or, if i==pIdx->nSample then pRec
* is larger than all samples in the array.
*/
tRowcnt iUpper, iGap;
- if (i >= pIdx->nSample) {
- iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]);
+ if (i >= (int) sample_count) {
+ iUpper = sqlite3LogEstToInt(idx->def->opts.stat->tuple_log_est[0]);
} else {
- iUpper = aSample[i].anLt[iCol];
+ iUpper = samples[i].lt[iCol];
}
if (iLower >= iUpper) {
@@ -1070,7 +1081,7 @@ whereKeyStats(Parse * pParse, /* Database connection */
iGap = iGap / 3;
}
aStat[0] = iLower + iGap;
- aStat[1] = pIdx->aAvgEq[iCol];
+ aStat[1] = idx->def->opts.stat->avg_eq[iCol];
}
/* Restore the pRec->nField value before returning. */
@@ -1161,10 +1172,15 @@ whereRangeSkipScanEst(Parse * pParse, /* Parsing & code generating context */
int *pbDone) /* Set to true if at least one expr. value extracted */
{
Index *p = pLoop->pIndex;
+ struct space *space = space_by_id(SQLITE_PAGENO_TO_SPACEID(p->tnum));
+ assert(space != NULL);
+ struct index *index = space_index(space,
+ SQLITE_PAGENO_TO_INDEXID(p->tnum));
+ assert(index != NULL && index->def->opts.stat != NULL);
int nEq = pLoop->nEq;
sqlite3 *db = pParse->db;
int nLower = -1;
- int nUpper = p->nSample + 1;
+ int nUpper = index->def->opts.stat->sample_count + 1;
int rc = SQLITE_OK;
u8 aff = sqlite3IndexColumnAffinity(db, p, nEq);
struct coll *pColl;
@@ -1182,15 +1198,17 @@ whereRangeSkipScanEst(Parse * pParse, /* Parsing & code generating context */
if (pUpper && rc == SQLITE_OK) {
rc = sqlite3Stat4ValueFromExpr(pParse, pUpper->pExpr->pRight,
aff, &p2);
- nUpper = p2 ? 0 : p->nSample;
+ nUpper = p2 ? 0 : index->def->opts.stat->sample_count;
}
if (p1 || p2) {
int i;
int nDiff;
- for (i = 0; rc == SQLITE_OK && i < p->nSample; i++) {
- rc = sqlite3Stat4Column(db, p->aSample[i].p,
- p->aSample[i].n, nEq, &pVal);
+ struct index_sample *samples = index->def->opts.stat->samples;
+ uint32_t sample_count = index->def->opts.stat->sample_count;
+ for (i = 0; rc == SQLITE_OK && i < (int) sample_count; i++) {
+ rc = sqlite3Stat4Column(db, samples[i].sample_key,
+ samples[i].key_size, nEq, &pVal);
if (rc == SQLITE_OK && p1) {
int res = sqlite3MemCompare(p1, pVal, pColl);
if (res >= 0)
@@ -1214,7 +1232,8 @@ whereRangeSkipScanEst(Parse * pParse, /* Parsing & code generating context */
*/
if (nDiff != 1 || pUpper == 0 || pLower == 0) {
int nAdjust =
- (sqlite3LogEst(p->nSample) - sqlite3LogEst(nDiff));
+ (sqlite3LogEst(sample_count) -
+ sqlite3LogEst(nDiff));
pLoop->nOut -= nAdjust;
*pbDone = 1;
WHERETRACE(0x10,
@@ -1285,8 +1304,22 @@ whereRangeScanEst(Parse * pParse, /* Parsing & code generating context */
Index *p = pLoop->pIndex;
int nEq = pLoop->nEq;
-
- if (p->nSample > 0 && nEq < p->nSampleCol) {
+ uint32_t space_id = SQLITE_PAGENO_TO_SPACEID(p->tnum);
+ struct space *space = space_by_id(space_id);
+ assert(space != NULL);
+ uint32_t iid = SQLITE_PAGENO_TO_INDEXID(p->tnum);
+ struct index *idx = space_index(space, iid);
+ assert(idx != NULL);
+ struct index_stat *stat = idx->def->opts.stat;
+ /*
+ * Create surrogate stat in case ANALYZE command hasn't
+ * been ran. Simply fill it with zeros.
+ */
+ struct index_stat surrogate_stat;
+ memset(&surrogate_stat, 0, sizeof(surrogate_stat));
+ if (stat == NULL)
+ stat = &surrogate_stat;
+ if (stat->sample_count > 0 && nEq < (int) stat->sample_field_count) {
if (nEq == pBuilder->nRecValid) {
UnpackedRecord *pRec = pBuilder->pRec;
tRowcnt a[2];
@@ -1329,15 +1362,6 @@ whereRangeScanEst(Parse * pParse, /* Parsing & code generating context */
* are in range.
*/
iLower = 0;
- uint32_t space_id =
- SQLITE_PAGENO_TO_SPACEID(p->tnum);
- struct space *space = space_by_id(space_id);
- assert(space != NULL);
- uint32_t iid =
- SQLITE_PAGENO_TO_INDEXID(p->tnum);
- struct index *idx =
- space_index(space, iid);
- assert(idx != NULL);
iUpper = idx->vtab->size(idx);
} else {
/* Note: this call could be optimized away - since the same values must
@@ -1503,8 +1527,6 @@ whereEqualScanEst(Parse * pParse, /* Parsing & code generating context */
assert(nEq >= 1);
assert(nEq <= (int)index_column_count(p));
- assert(p->aSample != 0);
- assert(p->nSample > 0);
assert(pBuilder->nRecValid < nEq);
/* If values are not available for all fields of the index to the left
@@ -1553,14 +1575,13 @@ whereInScanEst(Parse * pParse, /* Parsing & code generating context */
tRowcnt * pnRow) /* Write the revised row estimate here */
{
Index *p = pBuilder->pNew->pIndex;
- i64 nRow0 = sqlite3LogEstToInt(p->aiRowLogEst[0]);
+ i64 nRow0 = sqlite3LogEstToInt(index_field_tuple_est(p, 0));
int nRecValid = pBuilder->nRecValid;
int rc = SQLITE_OK; /* Subfunction return code */
tRowcnt nEst; /* Number of rows for a single term */
tRowcnt nRowEst = 0; /* New estimate of the number of rows */
int i; /* Loop counter */
- assert(p->aSample != 0);
for (i = 0; rc == SQLITE_OK && i < pList->nExpr; i++) {
nEst = nRow0;
rc = whereEqualScanEst(pParse, pBuilder, pList->a[i].pExpr,
@@ -2319,9 +2340,25 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */
WO_EQ | WO_IN | WO_GT | WO_GE | WO_LT | WO_LE | WO_ISNULL |
WO_IS;
}
- if (pProbe->bUnordered)
+ struct space *space =
+ space_by_id(SQLITE_PAGENO_TO_SPACEID(pProbe->tnum));
+ struct index *idx = NULL;
+ struct index_stat *stat = NULL;
+ if (space != NULL) {
+ idx = space_index(space, SQLITE_PAGENO_TO_INDEXID(pProbe->tnum));
+ assert(idx != NULL);
+ stat = idx->def->opts.stat;
+ }
+ /*
+ * Create surrogate stat in case ANALYZE command hasn't
+ * been ran. Simply fill it with zeros.
+ */
+ struct index_stat surrogate_stat;
+ memset(&surrogate_stat, 0, sizeof(surrogate_stat));
+ if (stat == NULL)
+ stat = &surrogate_stat;
+ if (stat->is_unordered)
opMask &= ~(WO_GT | WO_GE | WO_LT | WO_LE);
-
assert(pNew->nEq < nProbeCol);
saved_nEq = pNew->nEq;
@@ -2335,7 +2372,7 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */
pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, saved_nEq,
opMask, pProbe);
pNew->rSetup = 0;
- rSize = pProbe->aiRowLogEst[0];
+ rSize = index_field_tuple_est(pProbe, 0);
rLogSize = estLog(rSize);
for (; rc == SQLITE_OK && pTerm != 0; pTerm = whereScanNext(&scan)) {
u16 eOp = pTerm->eOperator; /* Shorthand for pTerm->eOperator */
@@ -2493,8 +2530,8 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */
} else {
tRowcnt nOut = 0;
if (nInMul == 0
- && pProbe->nSample
- && pNew->nEq <= pProbe->nSampleCol
+ && stat->sample_count
+ && pNew->nEq <= stat->sample_field_count
&& ((eOp & WO_IN) == 0
|| !ExprHasProperty(pTerm->pExpr,
EP_xIsSelect))
@@ -2529,8 +2566,8 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */
}
if (nOut == 0) {
pNew->nOut +=
- (pProbe->aiRowLogEst[nEq] -
- pProbe->aiRowLogEst[nEq - 1]);
+ (index_field_tuple_est(pProbe, nEq) -
+ index_field_tuple_est(pProbe, nEq -1));
if (eOp & WO_ISNULL) {
/* TUNING: If there is no likelihood() value, assume that a
* "col IS NULL" expression matches twice as many rows
@@ -2618,18 +2655,17 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */
*/
assert(42 == sqlite3LogEst(18));
if (saved_nEq == saved_nSkip && saved_nEq + 1U < nProbeCol &&
- pProbe->noSkipScan == 0 &&
+ stat->skip_scan_enabled == true &&
/* TUNING: Minimum for skip-scan */
- pProbe->aiRowLogEst[saved_nEq + 1] >= 42 &&
+ index_field_tuple_est(pProbe, saved_nEq + 1) >= 42 &&
(rc = whereLoopResize(db, pNew, pNew->nLTerm + 1)) == SQLITE_OK) {
LogEst nIter;
pNew->nEq++;
pNew->nSkip++;
pNew->aLTerm[pNew->nLTerm++] = 0;
pNew->wsFlags |= WHERE_SKIPSCAN;
- nIter =
- pProbe->aiRowLogEst[saved_nEq] -
- pProbe->aiRowLogEst[saved_nEq + 1];
+ nIter = index_field_tuple_est(pProbe, saved_nEq) -
+ index_field_tuple_est(pProbe, saved_nEq + 1);
pNew->nOut -= nIter;
/* TUNING: Because uncertainties in the estimates for skip-scan queries,
* add a 1.375 fudge factor to make skip-scan slightly less likely.
@@ -2647,6 +2683,31 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder, /* The WhereLoop factory */
return rc;
}
+/**
+ * Check if is_unordered flag is set for given index.
+ * Since now there may not be corresponding Tarantool's index
+ * (e.g. for temporary objects such as ephemeral tables),
+ * a few preliminary checks are made.
+ *
+ * @param idx Index to be tested.
+ * @retval True, if statistics exist and unordered flag is set.
+ */
+static bool
+index_is_unordered(struct Index *idx)
+{
+ assert(idx != NULL);
+ struct space *space = space_by_id(SQLITE_PAGENO_TO_SPACEID(idx->tnum));
+ if (space == NULL)
+ return false;
+ uint32_t iid = SQLITE_PAGENO_TO_INDEXID(idx->tnum);
+ struct index *tnt_idx = space_index(space, iid);
+ if (tnt_idx == NULL)
+ return false;
+ if (tnt_idx->def->opts.stat != NULL)
+ return tnt_idx->def->opts.stat->is_unordered;
+ return false;
+}
+
/*
* Return True if it is possible that pIndex might be useful in
* implementing the ORDER BY clause in pBuilder.
@@ -2663,8 +2724,7 @@ indexMightHelpWithOrderBy(WhereLoopBuilder * pBuilder,
ExprList *aColExpr;
int ii, jj;
int nIdxCol = index_column_count(pIndex);
-
- if (pIndex->bUnordered)
+ if (index_is_unordered(pIndex))
return 0;
if ((pOB = pBuilder->pWInfo->pOrderBy) == 0)
return 0;
@@ -2877,7 +2937,7 @@ whereLoopAddBtree(WhereLoopBuilder * pBuilder, /* WHERE clause information */
testcase(pNew->iTab != pSrc->iCursor); /* See ticket [98d973b8f5] */
continue; /* Partial index inappropriate for this query */
}
- rSize = pProbe->aiRowLogEst[0];
+ rSize = index_field_tuple_est(pProbe, 0);
pNew->nEq = 0;
pNew->nBtm = 0;
pNew->nTop = 0;
@@ -3269,8 +3329,8 @@ wherePathSatisfiesOrderBy(WhereInfo * pWInfo, /* The WHERE clause */
if (pLoop->wsFlags & WHERE_IPK) {
pIndex = 0;
nColumn = 1;
- } else if ((pIndex = pLoop->pIndex) == 0
- || pIndex->bUnordered) {
+ } else if ((pIndex = pLoop->pIndex) == NULL ||
+ index_is_unordered(pIndex)) {
return 0;
} else {
nColumn = index_column_count(pIndex);
diff --git a/src/box/sql/whereexpr.c b/src/box/sql/whereexpr.c
index 86ee2731a..6b63b2c69 100644
--- a/src/box/sql/whereexpr.c
+++ b/src/box/sql/whereexpr.c
@@ -1305,9 +1305,7 @@ exprAnalyze(SrcList * pSrc, /* the FROM clause */
*/
if (pExpr->op == TK_NOTNULL
&& pExpr->pLeft->op == TK_COLUMN
- && pExpr->pLeft->iColumn >= 0
- && OptimizationEnabled(db, SQLITE_Stat4)
- ) {
+ && pExpr->pLeft->iColumn >= 0) {
Expr *pNewExpr;
Expr *pLeft = pExpr->pLeft;
int idxNew;
--
2.15.1
More information about the Tarantool-patches
mailing list