[Tarantool-patches] [PATCH v3 1/1] sql: enable autoindex optimization

Alexander V. Tikhonov avtikhon at tarantool.org
Tue Oct 20 18:09:09 MSK 2020


Hi Nikita, thank you for the fast fix, for now I see that all results
are correct and no new degradations occurred, you can check it here:
https://gitlab.com/tarantool/tarantool/-/pipelines/205228223
Patch LGTM.


On Tue, Oct 20, 2020 at 02:41:16PM +0000, Nikita Pettik wrote:
> On 16 Oct 03:22, Mergen Imeev wrote:
> > Привет! Спасибо за ревью. Мои ответы, изменения и новый патч ниже.
> 
> I've extended commit message and added fixes below. Also dropped second
> commit from branch (remove OP_Realify) since it was extracted to a
> separate branch. Now Release build is working; branch is rebased on master.
> Alexander, could we process with this patch and push it to the master?
> Current CI statuses (not green but problems seem to be unrelated with SQL):
> https://travis-ci.org/github/tarantool/tarantool/builds/737420850
> https://gitlab.com/tarantool/tarantool/-/pipelines/205228223
> 
> diff --git a/src/box/sql/where.c b/src/box/sql/where.c
> index f7931d11d..582cc48ba 100644
> --- a/src/box/sql/where.c
> +++ b/src/box/sql/where.c
> @@ -710,20 +710,20 @@ termCanDriveIndex(WhereTerm * pTerm,      /* WHERE clause term to check */
>  }
>  
>  /**
> - * Generate a code that will create a tuple, which will be inserted in the
> - * ephemeral index. The created tuple consists of rowid and fields described in
> - * the index key description.
> + * Generate a code that will create a tuple, which is supposed to be inserted
> + * in the ephemeral index space. The created tuple consists of rowid and
> + * fields described in the index key description.
>   *
>   * @param parse Parsing context.
>   * @param key_def The index key description.
> - * @param cursor Cursor of space where we will get values for tuple.
> - * @param reg_out Register to which the created tuple will be placed.
> + * @param cursor Cursor of source space from which values for tuple are fetched.
> + * @param reg_out Register to contain the created tuple.
>   * @param reg_eph Register holding pointer to ephemeral index.
>   */
>  static void
> -vdbe_emit_create_ephemeral_index_tuple(struct Parse *parse,
> -                                      const struct key_def *key_def,
> -                                      int cursor, int reg_out, int reg_eph)
> +vdbe_emit_ephemeral_index_tuple(struct Parse *parse,
> +                               const struct key_def *key_def, int cursor,
> +                               int reg_out, int reg_eph)
>  {
>         assert(reg_out != 0);
>         struct Vdbe *v = parse->pVdbe;
> @@ -740,11 +740,11 @@ vdbe_emit_create_ephemeral_index_tuple(struct Parse *parse,
>  
>  /*
>   * Generate code to construct the ephemeral space that contains all used in
> - * query fields of one of the tables that participate in the query. The table is
> - * determined by planner. This ephemeral space will be known as an "ephemeral
> - * index". The PK definition of ephemeral index contains all of its fields.
> - * Also, this functions set up the WhereLevel object pLevel so that the code
> - * generator makes use of ephemeral index.
> + * query fields of one of the tables that participate in the query. The source
> + * table is determined by query planner. This ephemeral space will be known as
> + * an "ephemeral index". The PK definition of ephemeral index contains all of
> + * its fields. Also, this functions set up the WhereLevel object pLevel so
> + * that the code generator makes use of ephemeral index.
>   */
>  static void
>  constructAutomaticIndex(Parse * pParse,                        /* The parsing context */
> @@ -766,7 +766,6 @@ constructAutomaticIndex(Parse * pParse,                     /* The parsing context */
>         WhereLoop *pLoop;       /* The Loop object */
>         Bitmask idxCols;        /* Bitmap of columns used for indexing */
>         Bitmask extraCols;      /* Bitmap of additional columns */
> -       struct SrcList_item *pTabItem;  /* FROM clause term being indexed */
>  
>         /* Generate code to skip over the creation and initialization of the
>          * transient index on 2nd and subsequent iterations of the loop.
> @@ -929,14 +928,13 @@ constructAutomaticIndex(Parse * pParse,                   /* The parsing context */
>  
>         /* Fill the automatic index with content */
>         sqlExprCachePush(pParse);
> -       pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom];
> -       assert(pTabItem->fg.viaCoroutine == 0);
> +       assert(pWC->pWInfo->pTabList->a[pLevel->iFrom].fg.viaCoroutine == 0);
>         int cursor = pLevel->iTabCur;
>         addrTop = sqlVdbeAddOp1(v, OP_Rewind, cursor);
>         VdbeCoverage(v);
>         regRecord = sqlGetTempReg(pParse);
> -       vdbe_emit_create_ephemeral_index_tuple(pParse, idx_def->key_def, cursor,
> -                                              regRecord, reg_eph);
> +       vdbe_emit_ephemeral_index_tuple(pParse, idx_def->key_def, cursor,
> +                                       regRecord, reg_eph);
>         sqlVdbeAddOp2(v, OP_IdxInsert, regRecord, reg_eph);
>         sqlVdbeAddOp2(v, OP_Next, cursor, addrTop + 1);
>         VdbeCoverage(v);
> diff --git a/test/sql-tap/autoindex1.test.lua b/test/sql-tap/autoindex1.test.lua
> index 5520289b1..94b824331 100755
> --- a/test/sql-tap/autoindex1.test.lua
> +++ b/test/sql-tap/autoindex1.test.lua
> @@ -1,13 +1,12 @@
>  #!/usr/bin/env tarantool
>  test = require("sqltester")
> -test:plan(7)
> +test:plan(8)
>  
>  --
>  -- This file implements regression tests for sql library. The focus of this
>  -- script is testing ephemeral index creation logic.
>  --
>  
> --- Make sure that ephemeral index is used in all cases below.
>  test:execsql([[
>      CREATE TABLE t1(a INT, b INT PRIMARY KEY);
>      INSERT INTO t1 VALUES(1, 11);
> @@ -17,25 +16,34 @@ test:execsql([[
>      CREATE TABLE t2(c INT, d INT PRIMARY KEY);
>  ]])
>  
> +test:do_eqp_test(
> +    "autoindex-1.0", [[
> +       SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;
> +    ]], {
> +        {0,0,0,"SCAN TABLE T1 (~1048576 rows)"},
> +        {0,0,0,"EXECUTE CORRELATED SCALAR SUBQUERY 1"},
> +        {1,0,0,"SCAN TABLE T2 (~262144 rows)"}
> +    })
> +
>  for i = 1, 10240 do test:execsql("INSERT INTO t2 VALUES ("..i..", "..i..");") end
>  
> -test:do_execsql_test(
> +test:do_eqp_test(
>      "autoindex-1.1", [[
> -        EXPLAIN QUERY PLAN SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;
> +        SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;
>      ]], {
> -        0,0,0,"SCAN TABLE T1 (~1048576 rows)",
> -        0,0,0,"EXECUTE CORRELATED SCALAR SUBQUERY 1",
> -        1,0,0,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"
> +        {0,0,0,"SCAN TABLE T1 (~1048576 rows)"},
> +        {0,0,0,"EXECUTE CORRELATED SCALAR SUBQUERY 1"},
> +        {1,0,0,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"}
>      })
>  
>  result = test:execsql([[SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;]])
>  
> -test:do_execsql_test(
> +test:do_eqp_test(
>      "autoindex-1.2", [[
> -        EXPLAIN QUERY PLAN SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b;
> +        SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b;
>      ]], {
> -        0,0,0,"SCAN TABLE T1 (~1048576 rows)",
> -        0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"
> +        {0,0,0,"SCAN TABLE T1 (~1048576 rows)"},
> +        {0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"}
>      })
>  
>  test:do_execsql_test(
> @@ -43,12 +51,12 @@ test:do_execsql_test(
>          SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b;
>      ]], result)
>  
> -test:do_execsql_test(
> +test:do_eqp_test(
>      "autoindex-1.4", [[
> -        EXPLAIN QUERY PLAN SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a);
> +        SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a);
>      ]], {
> -        0,0,0,"SCAN TABLE T1 (~1048576 rows)",
> -        0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"
> +        {0,0,0,"SCAN TABLE T1 (~1048576 rows)"},
> +        {0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"}
>      })
>  
>  test:do_execsql_test(
> @@ -85,9 +93,9 @@ test:do_execsql_test(
>          10231
>      })
>  
> -test:do_execsql_test(
> +test:do_eqp_test(
>      "autoindex-1.7", [[
> -        EXPLAIN QUERY PLAN SELECT count(*)
> +        SELECT count(*)
>            FROM t3 AS x1
>            JOIN t3 AS x2 ON x2.a=x1.b
>            JOIN t3 AS x3 ON x3.a=x2.b
> @@ -99,16 +107,16 @@ test:do_execsql_test(
>            JOIN t3 AS x9 ON x9.a=x8.b
>            JOIN t3 AS x10 ON x10.a=x9.b;
>      ]], {
> -        0,0,0,"SCAN TABLE T3 AS X1 (~1048576 rows)",
> -        0,1,1,"SEARCH TABLE T3 AS X2 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> -        0,2,2,"SEARCH TABLE T3 AS X3 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> -        0,3,3,"SEARCH TABLE T3 AS X4 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> -        0,4,4,"SEARCH TABLE T3 AS X5 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> -        0,5,5,"SEARCH TABLE T3 AS X6 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> -        0,6,6,"SEARCH TABLE T3 AS X7 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> -        0,7,7,"SEARCH TABLE T3 AS X8 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> -        0,8,8,"SEARCH TABLE T3 AS X9 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> -        0,9,9,"SEARCH TABLE T3 AS X10 USING EPHEMERAL INDEX (A=?) (~20 rows)"
> +        {0,0,0,"SCAN TABLE T3 AS X1 (~1048576 rows)"},
> +        {0,1,1,"SEARCH TABLE T3 AS X2 USING EPHEMERAL INDEX (A=?) (~20 rows)"},
> +        {0,2,2,"SEARCH TABLE T3 AS X3 USING EPHEMERAL INDEX (A=?) (~20 rows)"},
> +        {0,3,3,"SEARCH TABLE T3 AS X4 USING EPHEMERAL INDEX (A=?) (~20 rows)"},
> +        {0,4,4,"SEARCH TABLE T3 AS X5 USING EPHEMERAL INDEX (A=?) (~20 rows)"},
> +        {0,5,5,"SEARCH TABLE T3 AS X6 USING EPHEMERAL INDEX (A=?) (~20 rows)"},
> +        {0,6,6,"SEARCH TABLE T3 AS X7 USING EPHEMERAL INDEX (A=?) (~20 rows)"},
> +        {0,7,7,"SEARCH TABLE T3 AS X8 USING EPHEMERAL INDEX (A=?) (~20 rows)"},
> +        {0,8,8,"SEARCH TABLE T3 AS X9 USING EPHEMERAL INDEX (A=?) (~20 rows)"},
> +        {0,9,9,"SEARCH TABLE T3 AS X10 USING EPHEMERAL INDEX (A=?) (~20 rows)"}
>      })
>   
> > On Thu, Oct 15, 2020 at 01:40:56AM +0000, Nikita Pettik wrote:
> > > On 14 Oct 12:30, Mergen Imeev wrote:
> > > > Привет! Спасибо за ревью. Мои ответы и изменения ниже.
> > > > 
> > > > On Tue, Oct 13, 2020 at 02:08:34PM +0000, Nikita Pettik wrote:
> > > > > On 11 Oct 13:48, imeevma at tarantool.org wrote:
> > > > > > From: Kirill Yukhin <kyukhin at tarantool.org>
> > > > > > 
> > > > > > This patch enables "autoindex" optimization. This optimization uses an
> > > > > > ephemeral space that contains all the fields used in the query. This
> > > > > 
> > > > > Не очень понимаю, что значит "все поля в запросе"..? Это как?:)
> > > > > Ты имеешь ввиду все поля result-set'a? Или объединение всех полей
> > > > > всех таблиц используемых в запросе?
> > > > > 
> > > > Я думаю лучше будет показать на примере:
> > > > CREATE TABLE t1 (i INT PRIMARY KEY, a INT, b INT, c INT);
> > > > CREATE TABLE t2 (i INT PRIMARY KEY, a INT, b INT, c INT);
> > > > ...
> > > > SELECT t1.a, t2.b FROM t1, t2 WHERE t1.b = t2.c
> > > > 
> > > > В данном случае планировщик выберет один из спейсов, для которого он сделает
> > > > автоиндекс. В случае, если он выберет t1, то автоиндекс будет состоять из
> > > > трех полей: rowid, b, a (в таком порядке). Если он выберет t2, то автоиндекс
> > > > будет так же состоять из трех полей: rowid, c, b. Т.е. в любом случае автоиндекс
> > > > будет содержать не только поля спейса которые участвуют в WHERE, но и поля
> > > > который участвуют в возвращаемом значении, даже если они не участвуют в WHERE.
> > > > Это сделано для того, чтобы не обращаться к оригинальному спейсу в случае
> > > > использования автоиндекс и пользоваться самим автоиндексом вместо оригинального
> > > > спейса.
> > > 
> > > Ок, вот это было бы здорово сразу в коммит месседже описать.
> > >  
> > Добавил.
> > 
> > > > > > ephemeral space is called "autoindex". Autoindex may have fewer fields
> > > > > > than the original space, and the order of these fields in autoindex may
> > > > > 
> > > > > Опять: original space - это какой спейс? В запросе может участвовать
> > > > > много таблиц..
> > > > > 
> > > > Один из спейсов участвующих в запросе. Какой именно выбирает планировщик.
> > > 
> > > ditto
> > > 
> > > > > Еще имхо какая-то путаница выходит: называем эфемерные спейсы автоиндексами.
> > > > > Я бы обозвал это как-нибудь по-другому. Это же просто еще одно использование
> > > > > эфемерных таблиц, не более.
> > > > > 
> > > > Мне кажется это не совсем верно. Обычно использование эфемерных спейсов является
> > > > необходимостью а не оптимизацией. 
> > > 
> > > Так, и?
> > > 
> > Какого ответа или действить ты ждешь на этот вопрос?
> > 
> > > > Насчет того что это спейс а не индекс я
> > > > согласен, но в некотором смысле он ведет себя как индекс.
> > > 
> > > Это как?
> > > 
> > Как я и сказал выше, один такой случай описан ниже. Я имею ввиду случай с
> > представлениями (которые не имеют собственных индексов).
> > 
> > > > Т.е. SQL считает, что
> > > > мы создали индекс. Один из случаев когда это важно упомянут ниже.
> > > 
> > > Короче, предлагаю называть это хотя бы ephemeral index opt,
> > > autoindex space opt или что-нибудь в это духе (чтобы не путать со
> > > скулайтовской терминологией).
> > >  
> > Назвал ephemeral index.
> > 
> > > > > > be different from their order in the original space. In addition to the
> > > > > > mentioned fields, autoindex contains a rowid.
> > > > > > 
> > > > > > The primary key of autoindex is a covering index, i.e. it contains all
> > > > > > of its fields.
> > > > > 
> > > > > В мемтиксе все индексы покрывающие. И 'covering' - свойство для каждого
> > > > > запроса отдельно взятого, а не индекса как такового.
> > > > > 
> > > > В данном случае 'covering' означает что определение содержит все поля спейса.
> > > > В предыдущем ревью я предложил разобраться с этим вопросом чуть позже, т.к.
> > > > считаю, что здесь это не очень важно.
> > > 
> > > У этого термина есть общепринятое определение. Не понимаю в чем тут разбираться,
> > > если он используется не по назначению..
> > >
> > Убрал этот обзац из коммит-месседжа.
> >  
> > > > > > 
> > > > > > Currently this optimization mainly used in cases when values SELECTED
> > > > > > from more than one space and WHERE specified.
> > > > > > 
> > > > > > At the moment, this optimization does not take into account the number
> > > > > > of tuples in spaces. This should be fixed.
> > > > > > 
> > > > > > Closes #4933
> > > > > > 
> > > > > > Co-authored-by: Mergen Imeev <imeevma at gmail.com>
> > > > > > Co-authored-by: Timur Safin <tsafin at tarantool.org>
> > > > > > ---
> > > > > > https://github.com/tarantool/tarantool/issues/4933
> > > > > > https://github.com/tarantool/tarantool/tree/imeevma/gh-4933-autoindex
> > > > > > 
> > > > > > @ChangeLog
> > > > > >  - "autoindex" optimization is now enabled (gh-4933).
> > > > > 
> > > > > МНе кажется, можно чуть больше написать про такую важную штуку
> > > > >  
> > > > Мне кажется это не настолько важная оптимизация
> > > 
> > > Ну-ну
> > > 
> > Опять, какие действия или ответ ты ждешь на это замечание?
> > 
> > > > что ее стоит отдельно много
> > > > описывать. Круг ее применения довольно ограничен -- в основном джойны и 
> > > > сабселекты с подходящим WHERE.
> > > 
> > > > > > diff --git a/src/box/sql/where.c b/src/box/sql/where.c
> > > > > > index e9e936856..6ab2be3ba 100644
> > > > > > --- a/src/box/sql/where.c
> > > > > > +++ b/src/box/sql/where.c
> > > > > > @@ -683,7 +683,6 @@ translateColumnToCopy(Vdbe * v,		/* The VDBE containing code to translate */
> > > > > >  	}
> > > > > > +/**
> > > > > > + * Generate a code that will create a tuple, which will be inserted in the
> > > > > > + * autoindex. The created tuple consists of rowid and fields described in the
> > > > > > + * index key description.
> > > > > > + *
> > > > > > + * @param parse Parsing context.
> > > > > > + * @param key_def The index key description.
> > > > > > + * @param cursor Cursor of space where we will get values for tuple.
> > > > > > + * @param reg_out Register to which the created tuple will be placed.
> > > > > > + * @param reg_eph Register holding pointer to autoindex.
> > > > > > + *
> > > > > > + * @return Return a register number which is the first in a block of registers
> > > > > > + * that holds the elements of the created tuple. The block of registers has
> > > > > > + * already been deallocated by the time this routine returns.
> > > > > 
> > > > > Не очень понял, зачем возвращать уже деаллоцированные регистры..
> > > > > 
> > > > Это значение используется в ветке if, которую я воспроизвести не смог. Насколько
> > > > я понимаю это нужно для подсчета количества используемых регистров в случае
> > > > использованию корутины. Если это так, это может быть необходимо для
> > > > предварительного вычисления количества используемых в корутине регистров. Но это
> > > > только мои предположения.
> > > 
> > > Может стоит все-таки разобраться, покрыть тестами там, или удалить мертвый код.
> > >  
> > Удалил код. Coverall показывал, что код не используется, и тесты не попадали
> > после удаления. Воспросизвести тест, в котором бы этот код использовался не
> > смог. Однако, я все еще думаю, что это возможно.
> > 
> > > > > > + */
> > > > > > +static int
> > > > > > +vdbe_emit_create_autoindex_tuple(struct Parse *parse,
> > > > > > +				 const struct key_def *key_def,
> > > > > > +				 int cursor, int reg_out, int reg_eph)
> > > > > > +{
> > > > > > +	assert(reg_out != 0);
> > > > > > +	struct Vdbe *v = parse->pVdbe;
> > > > > > +	int col_cnt = key_def->part_count;
> > > > > > +	int reg_base = sqlGetTempRange(parse, col_cnt + 1);
> > > > > > +	for (int j = 0; j < col_cnt; j++) {
> > > > > > +		uint32_t tabl_col = key_def->parts[j].fieldno;
> > > > > > +		sqlVdbeAddOp3(v, OP_Column, cursor, tabl_col, reg_base + j);
> > > > > > +	}
> > > > > > +	sqlVdbeAddOp2(v, OP_NextIdEphemeral, reg_eph, reg_base + col_cnt);
> > > > > > +	sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt + 1, reg_out);
> > > > > > +	sqlReleaseTempRange(parse, reg_base, col_cnt + 1);
> > > > > > +	return reg_base;
> > > > > > +}
> > > > > > +
> > > > > >  /*
> > > > > > - * Generate code to construct the Index object for an automatic index
> > > > > > - * and to set up the WhereLevel object pLevel so that the code generator
> > > > > > - * makes use of the automatic index.
> > > > > > + * Generate code to construct the ephemeral space that contains all used in
> > > > > > + * query fields of one of the tables. This ephemeral space will be known as an
> > > > > 
> > > > > Опять: какой таблицы..Очень запутывающий коммент имхо
> > > > > 
> > > > Будет лучше если я напишу "selected by planner table"?
> > > 
> > > Как минимум да
> > >  
> > Добавил.
> > 
> > > > > > + * "autoindex". PK of this ephemeral space is a covering index. Also, this
> > > > > > + * functions set up the WhereLevel object pLevel so that the code generator
> > > > > > + * makes use of the auto-index.
> > > > > >   */
> > > > > >  static void
> > > > > >  constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> > > > > >  		}
> > > > > >   * Estimate the location of a particular key among all keys in an
> > > > > > -#ifndef SQL_OMIT_AUTOMATIC_INDEX
> > > > > >  	/* Automatic indexes */
> > > > > > -	LogEst rSize = pTab->nRowLogEst;
> > > > > > +	rSize = DEFAULT_TUPLE_LOG_COUNT;
> > > > > > +	/* Increase cost of autoindex if number of tuples in space < ~10000. */
> > > > > 
> > > > > 10k как-то маловато. В скулайте, если я правильно помню, какие-то подвижки
> > > > > начинаются со 100к affected rows.
> > > > > 
> > > > Это значение было подобрано эксперементально. На подбор более точных эвристик
> > > > есть отдельная задачу у @tsafin на следующий квартал.
> > > 
> > > На каких "экспериментальных данных"? Сейчас это выглядит оочень агрессивно
> > > 
> > В основном проверка проходила на запросых Q8, Q13, Q17 и Q20 TPC-H. При значении
> > этой величины в 1000000 происходило сильное замедление запросов Q17 и Q20
> > (замедление в 200+ раз). При значении этой величины в 1000 происходило замедение
> > теста Q8 (в 1.5 - 2 раза). При значнии 10000 все 4 запроса работают досточно
> > быстро при значении SCALING FACTOR TPC-H до 3 (выше пока не тестировалось).
> > 
> > > > > > +	if (!space->def->opts.is_view && sql_space_tuple_log_count(space) < 133)
> > > > > > +		rSize += DEFAULT_TUPLE_LOG_COUNT;
> > > > > 
> > > > > Почему для представления это не работает?
> > > > > 
> > > > Скорее наоборот - у представлений стоимость автоиндекса не увеличивается, так
> > > > что он используется чаще. Связано с тем, что посчитать количества таплов в
> > > > представлении не в ран-тайме довольно проблематично. 
> > > 
> > > Так там сейчас все равно константа добавляется. С аналайзом можно и для
> > > вью посчитать кол-во таплов.
> > >  
> > У нас нет аналайза.
> > 
> > > > > >  	LogEst rLogSize = estLog(rSize);
> > > > > >  	if (!pBuilder->pOrSet && /* Not pqart of an OR optimization */
> > > > > >  	    (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0 &&
> > > > > >  	    (pWInfo->pParse->sql_flags & SQL_AutoIndex) != 0 &&
> > > > > >  	    pSrc->pIBIndex == 0	/* Has no INDEXED BY clause */
> > > > > >  	    && !pSrc->fg.notIndexed	/* Has no NOT INDEXED clause */
> > > > > >  	    &&!pSrc->fg.isCorrelated	/* Not a correlated subquery */
> > > > > >  	    && !pSrc->fg.isRecursive	/* Not a recursive common table expression. */
> > > > > >  	    ) {
> > > > > > @@ -2829,24 +2910,10 @@ tnt_error:
> > > > > >  			if (termCanDriveIndex(pTerm, pSrc, 0)) {
> > > > > >  				pNew->nEq = 1;
> > > > > >  				pNew->nSkip = 0;
> > > > > > -				pNew->pIndex = 0;
> > > > > > +				pNew->index_def = NULL;
> > > > > >  				pNew->nLTerm = 1;
> > > > > >  				pNew->aLTerm[0] = pTerm;
> > > > > > -				/* TUNING: One-time cost for computing the automatic index is
> > > > > > -				 * estimated to be X*N*log2(N) where N is the number of rows in
> > > > > > -				 * the table being indexed and where X is 7 (LogEst=28) for normal
> > > > > > -				 * tables or 1.375 (LogEst=4) for views and subqueries.  The value
> > > > > > -				 * of X is smaller for views and subqueries so that the query planner
> > > > > > -				 * will be more aggressive about generating automatic indexes for
> > > > > > -				 * those objects, since there is no opportunity to add schema
> > > > > > -				 * indexes on subqueries and views.
> > > > > > -				 */
> > > > > > -				pNew->rSetup = rLogSize + rSize + 4;
> > > > > > -				if (!pTab->def->opts.is_view &&
> > > > > > -				    pTab->def->id == 0)
> > > > > > -					pNew->rSetup += 24;
> > > > > > -				if (pNew->rSetup < 0)
> > > > > > -					pNew->rSetup = 0;
> > > > > > +				pNew->rSetup = rLogSize + rSize;
> > > > > 
> > > > > Можешь объяснить, почему тут произошли изменения? То есть почему оптимизация
> > > > > стала более агрессивной?
> > > > > 
> > > > Эти значение получены эксперементальным путем. Это так же будет уточняться в
> > > > ходе подбора эвристик.
> > > 
> > > Ну не удаляй тогда код хотя бы, оставь там еще свой коммент какой-то.
> > > Тот же кто потом с этим будет возиться в этом болоте утонет..
> > > 
> > Написал комментарий TODO.
> > 
> > Сомневаюсь, что это будет самая большая проблема человека, который будет
> > разбираться с планировщиком.
> > 
> > > > > > diff --git a/test/sql-tap/autoindex1.test.lua b/test/sql-tap/autoindex1.test.lua
> > > > > > new file mode 100755
> > > > > > index 000000000..42b995dd3
> > > > > > --- /dev/null
> > > > > > +++ b/test/sql-tap/autoindex1.test.lua
> > > > > > +---------------------------------------------------------------------------
> > > > > > +--  This file implements regression tests for sql library.  The focus of this
> > > > > > +-- script is testing automatic index creation logic.
> > > > > > +
> > > > > > +test:execsql([[
> > > > > > +    CREATE TABLE t1(a INT, b INT PRIMARY KEY);
> > > > > > +    INSERT INTO t1 VALUES(1, 11);
> > > > > > +    INSERT INTO t1 VALUES(2, 22);
> > > > > > +    INSERT INTO t1 SELECT a + 2, b + 22 FROM t1;
> > > > > > +    INSERT INTO t1 SELECT a + 4, b + 44 FROM t1;
> > > > > > +    CREATE TABLE t2(c INT, d INT PRIMARY KEY);
> > > > > > +]])
> > > > > > +
> > > > > > +for i = 1, 11000 do test:execsql("INSERT INTO t2 VALUES ("..i..", "..i..");") end
> > > > > 
> > > > > Это быстро работает? Может сделать тест long-run?
> > > > > Было бы еще неплохо к каждой группе тестов коммент, поясняющий почему
> > > > > с эфмереной табличкой будет быстрее.
> > > > > 
> > > > На моем ноутбуке это работает быстро, быстрее чем некоторые другие стандартные
> > > > SQL тесты. До появления сообщения о '10 second...' ни разу не дошло.
> > > > Добавил коммент о том, что из-за автоиндекса тест работает быстрее, чем в случае
> > > > если автоиндекс не используется.
> > > > 
> > > > 
> > > > Diff:
> > > > 
> > > > +-- Make sure that autoindex is used in all cases below.
> > > 
> > > Я имел ввиду добавить объяснение, почему автоиндекс будет быстрее,
> > > чем тыркание туда-сюда по таблицам. Можешь посмотреть примеры в тестах
> > > на аналайз
> > > 
> > Немного расширил комментарий.
> > 
> > > >  test:execsql([[
> > > >      CREATE TABLE t1(a INT, b INT PRIMARY KEY);
> > > >      INSERT INTO t1 VALUES(1, 11);
> > 
> > 
> > Diff:
> > 
> > 
> > diff --git a/src/box/sql/where.c b/src/box/sql/where.c
> > index 6ab2be3ba..f7931d11d 100644
> > --- a/src/box/sql/where.c
> > +++ b/src/box/sql/where.c
> > @@ -711,23 +711,19 @@ termCanDriveIndex(WhereTerm * pTerm,	/* WHERE clause term to check */
> >  
> >  /**
> >   * Generate a code that will create a tuple, which will be inserted in the
> > - * autoindex. The created tuple consists of rowid and fields described in the
> > - * index key description.
> > + * ephemeral index. The created tuple consists of rowid and fields described in
> > + * the index key description.
> >   *
> >   * @param parse Parsing context.
> >   * @param key_def The index key description.
> >   * @param cursor Cursor of space where we will get values for tuple.
> >   * @param reg_out Register to which the created tuple will be placed.
> > - * @param reg_eph Register holding pointer to autoindex.
> > - *
> > - * @return Return a register number which is the first in a block of registers
> > - * that holds the elements of the created tuple. The block of registers has
> > - * already been deallocated by the time this routine returns.
> > + * @param reg_eph Register holding pointer to ephemeral index.
> >   */
> > -static int
> > -vdbe_emit_create_autoindex_tuple(struct Parse *parse,
> > -				 const struct key_def *key_def,
> > -				 int cursor, int reg_out, int reg_eph)
> > +static void
> > +vdbe_emit_create_ephemeral_index_tuple(struct Parse *parse,
> > +				       const struct key_def *key_def,
> > +				       int cursor, int reg_out, int reg_eph)
> >  {
> >  	assert(reg_out != 0);
> >  	struct Vdbe *v = parse->pVdbe;
> > @@ -740,15 +736,15 @@ vdbe_emit_create_autoindex_tuple(struct Parse *parse,
> >  	sqlVdbeAddOp2(v, OP_NextIdEphemeral, reg_eph, reg_base + col_cnt);
> >  	sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt + 1, reg_out);
> >  	sqlReleaseTempRange(parse, reg_base, col_cnt + 1);
> > -	return reg_base;
> >  }
> >  
> >  /*
> >   * Generate code to construct the ephemeral space that contains all used in
> > - * query fields of one of the tables. This ephemeral space will be known as an
> > - * "autoindex". PK of this ephemeral space is a covering index. Also, this
> > - * functions set up the WhereLevel object pLevel so that the code generator
> > - * makes use of the auto-index.
> > + * query fields of one of the tables that participate in the query. The table is
> > + * determined by planner. This ephemeral space will be known as an "ephemeral
> > + * index". The PK definition of ephemeral index contains all of its fields.
> > + * Also, this functions set up the WhereLevel object pLevel so that the code
> > + * generator makes use of ephemeral index.
> >   */
> >  static void
> >  constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> > @@ -771,8 +767,6 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> >  	Bitmask idxCols;	/* Bitmap of columns used for indexing */
> >  	Bitmask extraCols;	/* Bitmap of additional columns */
> >  	struct SrcList_item *pTabItem;	/* FROM clause term being indexed */
> > -	int addrCounter = 0;	/* Address where integer counter is initialized */
> > -	int regBase;		/* Array of registers where record is assembled */
> >  
> >  	/* Generate code to skip over the creation and initialization of the
> >  	 * transient index on 2nd and subsequent iterations of the loop.
> > @@ -907,7 +901,7 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> >  	/* Construct the index definition to describe this index. */
> >  	struct index_opts opts;
> >  	index_opts_create(&opts);
> > -	const char *idx_name = "autoindex";
> > +	const char *idx_name = "ephemeral index";
> >  	struct index_def *idx_def = index_def_new(space->def->id, 0, idx_name,
> >  						  strlen(idx_name), TREE, &opts,
> >  						  key_def, NULL);
> > @@ -936,32 +930,16 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> >  	/* Fill the automatic index with content */
> >  	sqlExprCachePush(pParse);
> >  	pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom];
> > +	assert(pTabItem->fg.viaCoroutine == 0);
> >  	int cursor = pLevel->iTabCur;
> > -	if (pTabItem->fg.viaCoroutine) {
> > -		int regYield = pTabItem->regReturn;
> > -		addrCounter = sqlVdbeAddOp2(v, OP_Integer, 0, 0);
> > -		sqlVdbeAddOp3(v, OP_InitCoroutine, regYield, 0,
> > -				  pTabItem->addrFillSub);
> > -		addrTop = sqlVdbeAddOp1(v, OP_Yield, regYield);
> > -		VdbeCoverage(v);
> > -		VdbeComment((v, "next row of \"%s\"", pTabItem->space->def->name));
> > -	} else {
> > -		addrTop = sqlVdbeAddOp1(v, OP_Rewind, cursor);
> > -		VdbeCoverage(v);
> > -	}
> > +	addrTop = sqlVdbeAddOp1(v, OP_Rewind, cursor);
> > +	VdbeCoverage(v);
> >  	regRecord = sqlGetTempReg(pParse);
> > -	regBase = vdbe_emit_create_autoindex_tuple(pParse, idx_def->key_def,
> > -						   cursor, regRecord, reg_eph);
> > +	vdbe_emit_create_ephemeral_index_tuple(pParse, idx_def->key_def, cursor,
> > +					       regRecord, reg_eph);
> >  	sqlVdbeAddOp2(v, OP_IdxInsert, regRecord, reg_eph);
> > -	if (pTabItem->fg.viaCoroutine) {
> > -		sqlVdbeChangeP2(v, addrCounter, regBase + n + 1);
> > -		translateColumnToCopy(v, addrTop, cursor, pTabItem->regResult);
> > -		sqlVdbeGoto(v, addrTop);
> > -		pTabItem->fg.viaCoroutine = 0;
> > -	} else {
> > -		sqlVdbeAddOp2(v, OP_Next, cursor, addrTop + 1);
> > -		VdbeCoverage(v);
> > -	}
> > +	sqlVdbeAddOp2(v, OP_Next, cursor, addrTop + 1);
> > +	VdbeCoverage(v);
> >  	sqlVdbeChangeP5(v, SQL_STMTSTATUS_AUTOINDEX);
> >  	sqlVdbeJumpHere(v, addrTop);
> >  	sqlReleaseTempReg(pParse, regRecord);
> > @@ -2889,7 +2867,10 @@ tnt_error:
> >  
> >  	/* Automatic indexes */
> >  	rSize = DEFAULT_TUPLE_LOG_COUNT;
> > -	/* Increase cost of autoindex if number of tuples in space < ~10000. */
> > +	/*
> > +	 * Increase cost of ephemeral index if number of tuples in space is less
> > +	 * then 10240.
> > +	 */
> >  	if (!space->def->opts.is_view && sql_space_tuple_log_count(space) < 133)
> >  		rSize += DEFAULT_TUPLE_LOG_COUNT;
> >  	LogEst rLogSize = estLog(rSize);
> > @@ -2913,6 +2894,11 @@ tnt_error:
> >  				pNew->index_def = NULL;
> >  				pNew->nLTerm = 1;
> >  				pNew->aLTerm[0] = pTerm;
> > +				/*
> > +				 * TODO: At the moment we have decided to use
> > +				 * this formula, but it is quite aggressive and
> > +				 * needs tuning.
> > +				 */
> >  				pNew->rSetup = rLogSize + rSize;
> >  				/* TUNING: Each index lookup yields 20 rows in the table.  This
> >  				 * is more than the usual guess of 10 rows, since we have no way
> > @@ -4870,15 +4856,16 @@ sqlWhereEnd(WhereInfo * pWInfo)
> >  					continue;
> >  				}
> >  				/*
> > -				 * In case we are using autoindex, the space
> > -				 * that will be used to get the values will be
> > -				 * autoindex. Since the opcode OP_Column uses
> > -				 * the position of the fields according to the
> > -				 * original space, and the fields may be in
> > -				 * other positions in the autoindex, we must
> > -				 * correct the P2 of OP_Column. To get the
> > -				 * positions of these fields in autoindex, we
> > -				 * use the index definition we created.
> > +				 * In case we are using ephemeral index, the
> > +				 * space that will be used to get the values
> > +				 * will be ephemeral index. Since the opcode
> > +				 * OP_Column uses the position of the fields
> > +				 * according to the original space, and the
> > +				 * fields may be in other positions in the
> > +				 * ephemeral index, we must correct the P2 of
> > +				 * OP_Column. To get the positions of these
> > +				 * fields in ephemeral index, we use the index
> > +				 * definition we created.
> >  				 */
> >  				struct key_def *key_def =
> >  					pLevel->pWLoop->index_def->key_def;
> > diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c
> > index 310b85fa0..96bcab110 100644
> > --- a/src/box/sql/wherecode.c
> > +++ b/src/box/sql/wherecode.c
> > @@ -220,7 +220,7 @@ sqlWhereExplainOneScan(Parse * pParse,	/* Parse context */
> >  			assert(!(flags & WHERE_AUTO_INDEX)
> >  			       || (flags & WHERE_IDX_ONLY));
> >  			if ((flags & WHERE_AUTO_INDEX) != 0) {
> > -				zFmt = "AUTOMATIC INDEX";
> > +				zFmt = "EPHEMERAL INDEX";
> >  			} else if (idx_def->iid == 0) {
> >  				if (isSearch) {
> >  					zFmt = "PRIMARY KEY";
> > diff --git a/test/sql-tap/autoindex1.test.lua b/test/sql-tap/autoindex1.test.lua
> > index cd423d9da..5520289b1 100755
> > --- a/test/sql-tap/autoindex1.test.lua
> > +++ b/test/sql-tap/autoindex1.test.lua
> > @@ -4,10 +4,10 @@ test:plan(7)
> >  
> >  --
> >  -- This file implements regression tests for sql library. The focus of this
> > --- script is testing automatic index creation logic.
> > +-- script is testing ephemeral index creation logic.
> >  --
> >  
> > --- Make sure that autoindex is used in all cases below.
> > +-- Make sure that ephemeral index is used in all cases below.
> >  test:execsql([[
> >      CREATE TABLE t1(a INT, b INT PRIMARY KEY);
> >      INSERT INTO t1 VALUES(1, 11);
> > @@ -17,7 +17,7 @@ test:execsql([[
> >      CREATE TABLE t2(c INT, d INT PRIMARY KEY);
> >  ]])
> >  
> > -for i = 1, 11000 do test:execsql("INSERT INTO t2 VALUES ("..i..", "..i..");") end
> > +for i = 1, 10240 do test:execsql("INSERT INTO t2 VALUES ("..i..", "..i..");") end
> >  
> >  test:do_execsql_test(
> >      "autoindex-1.1", [[
> > @@ -25,7 +25,7 @@ test:do_execsql_test(
> >      ]], {
> >          0,0,0,"SCAN TABLE T1 (~1048576 rows)",
> >          0,0,0,"EXECUTE CORRELATED SCALAR SUBQUERY 1",
> > -        1,0,0,"SEARCH TABLE T2 USING AUTOMATIC INDEX (C=?) (~20 rows)"
> > +        1,0,0,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"
> >      })
> >  
> >  result = test:execsql([[SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;]])
> > @@ -35,7 +35,7 @@ test:do_execsql_test(
> >          EXPLAIN QUERY PLAN SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b;
> >      ]], {
> >          0,0,0,"SCAN TABLE T1 (~1048576 rows)",
> > -        0,1,1,"SEARCH TABLE T2 USING AUTOMATIC INDEX (C=?) (~20 rows)"
> > +        0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"
> >      })
> >  
> >  test:do_execsql_test(
> > @@ -48,7 +48,7 @@ test:do_execsql_test(
> >          EXPLAIN QUERY PLAN SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a);
> >      ]], {
> >          0,0,0,"SCAN TABLE T1 (~1048576 rows)",
> > -        0,1,1,"SEARCH TABLE T2 USING AUTOMATIC INDEX (C=?) (~20 rows)"
> > +        0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"
> >      })
> >  
> >  test:do_execsql_test(
> > @@ -61,10 +61,12 @@ test:execsql([[
> >  ]])
> >  
> >  --
> > --- This query is quite slow in case autoindex is not used. This test shows
> > --- that autoindex allows to increase performance in cases like this.
> > +-- This query is quite slow in case ephemeral index is not used. The main idea
> > +-- behind this test is that ephemeral index allows to use index instead of
> > +-- fullscan in cases below. In this test construction of the index is faster
> > +-- that fullscan.
> >  --
> > -for i = 1, 11000 do test:execsql("INSERT INTO t3 VALUES ("..i..", "..i..", "..(i + 1)..");") end
> > +for i = 1, 10240 do test:execsql("INSERT INTO t3 VALUES ("..i..", "..i..", "..(i + 1)..");") end
> >  
> >  test:do_execsql_test(
> >      "autoindex-1.6", [[
> > @@ -80,7 +82,7 @@ test:do_execsql_test(
> >            JOIN t3 AS x9 ON x9.a=x8.b
> >            JOIN t3 AS x10 ON x10.a=x9.b;
> >      ]], {
> > -        10991
> > +        10231
> >      })
> >  
> >  test:do_execsql_test(
> > @@ -98,15 +100,15 @@ test:do_execsql_test(
> >            JOIN t3 AS x10 ON x10.a=x9.b;
> >      ]], {
> >          0,0,0,"SCAN TABLE T3 AS X1 (~1048576 rows)",
> > -        0,1,1,"SEARCH TABLE T3 AS X2 USING AUTOMATIC INDEX (A=?) (~20 rows)",
> > -        0,2,2,"SEARCH TABLE T3 AS X3 USING AUTOMATIC INDEX (A=?) (~20 rows)",
> > -        0,3,3,"SEARCH TABLE T3 AS X4 USING AUTOMATIC INDEX (A=?) (~20 rows)",
> > -        0,4,4,"SEARCH TABLE T3 AS X5 USING AUTOMATIC INDEX (A=?) (~20 rows)",
> > -        0,5,5,"SEARCH TABLE T3 AS X6 USING AUTOMATIC INDEX (A=?) (~20 rows)",
> > -        0,6,6,"SEARCH TABLE T3 AS X7 USING AUTOMATIC INDEX (A=?) (~20 rows)",
> > -        0,7,7,"SEARCH TABLE T3 AS X8 USING AUTOMATIC INDEX (A=?) (~20 rows)",
> > -        0,8,8,"SEARCH TABLE T3 AS X9 USING AUTOMATIC INDEX (A=?) (~20 rows)",
> > -        0,9,9,"SEARCH TABLE T3 AS X10 USING AUTOMATIC INDEX (A=?) (~20 rows)"
> > +        0,1,1,"SEARCH TABLE T3 AS X2 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,2,2,"SEARCH TABLE T3 AS X3 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,3,3,"SEARCH TABLE T3 AS X4 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,4,4,"SEARCH TABLE T3 AS X5 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,5,5,"SEARCH TABLE T3 AS X6 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,6,6,"SEARCH TABLE T3 AS X7 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,7,7,"SEARCH TABLE T3 AS X8 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,8,8,"SEARCH TABLE T3 AS X9 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,9,9,"SEARCH TABLE T3 AS X10 USING EPHEMERAL INDEX (A=?) (~20 rows)"
> >      })
> >  
> >  test:finish_test()
> > diff --git a/test/sql/misc.result b/test/sql/misc.result
> > index 4d12ba4c9..bbf37e26f 100644
> > --- a/test/sql/misc.result
> > +++ b/test/sql/misc.result
> > @@ -237,5 +237,5 @@ box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;')
> >      type: text
> >    rows:
> >    - [0, 0, 0, 'SCAN TABLE T1 (~1048576 rows)']
> > -  - [0, 1, 1, 'SEARCH TABLE T2 USING AUTOMATIC INDEX (B=?) (~20 rows)']
> > +  - [0, 1, 1, 'SEARCH TABLE T2 USING EPHEMERAL INDEX (B=?) (~20 rows)']
> >  ...
> > diff --git a/test/sql/misc.result b/test/sql/misc.result
> > index bbf37e26f..df2fdde81 100644
> > --- a/test/sql/misc.result
> > +++ b/test/sql/misc.result
> > @@ -213,17 +213,12 @@ box.execute('CREATE TABLE t2(i INT PRIMARY KEY, b INT);')
> >  ---
> >  - row_count: 1
> >  ...
> > -for i = 1, 11000 do\
> > +for i = 1, 10240 do\
> >   box.execute('INSERT INTO t1 VALUES ($1, $1);', {i})\
> >   box.execute('INSERT INTO t2 VALUES ($1, $1);', {i})\
> >  end
> >  ---
> >  ...
> > ---
> > --- There is no need to insert values in the tables since planner assumes a
> > --- default number of tuples for each space, regardless of how many tuples there
> > --- actually are in those spaces. The default value is 1048576 (== 2^20).
> > ---
> >  box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;')
> >  ---
> >  - metadata:
> > diff --git a/test/sql/misc.test.lua b/test/sql/misc.test.lua
> > index d7673bb49..44945eace 100644
> > --- a/test/sql/misc.test.lua
> > +++ b/test/sql/misc.test.lua
> > @@ -68,13 +68,8 @@ box.space.TEST:drop()
> >  -- gh-4933: Make sure that autoindex optimization is used.
> >  box.execute('CREATE TABLE t1(i INT PRIMARY KEY, a INT);')
> >  box.execute('CREATE TABLE t2(i INT PRIMARY KEY, b INT);')
> > -for i = 1, 11000 do\
> > +for i = 1, 10240 do\
> >   box.execute('INSERT INTO t1 VALUES ($1, $1);', {i})\
> >   box.execute('INSERT INTO t2 VALUES ($1, $1);', {i})\
> >  end
> > ---
> > --- There is no need to insert values in the tables since planner assumes a
> > --- default number of tuples for each space, regardless of how many tuples there
> > --- actually are in those spaces. The default value is 1048576 (== 2^20).
> > ---
> >  box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;')
> > 
> > 
> > 
> > 
> > New patch:
> > 
> > 
> > From 1c7820e9b90f34bca671956a51bd345b1d6ab748 Mon Sep 17 00:00:00 2001
> > From: Kirill Yukhin <kyukhin at tarantool.org>
> > Date: Tue, 11 Aug 2020 13:45:40 +0300
> > Subject: [PATCH] sql: enable ephemeral index optimization
> > 
> > This patch enables "ephemeral index" optimization. This optimization
> > uses an ephemeral space that contains all the fields of one of spaces
> > used in the query. This original space is determined by planner. This
> > ephemeral space is called "ephemeral index". Ephemeral index may have
> > fewer fields than the original space, and the order of these fields in
> > ephemeral index may be different from their order in the original space.
> > In addition to the mentioned fields, ephemeral index contains a rowid.
> > Example:
> > 
> > CREATE TABLE t1 (i INT PRIMARY KEY, a INT, b INT, c INT);
> > CREATE TABLE t2 (i INT PRIMARY KEY, a INT, b INT, c INT);
> > ...
> > SELECT t1.a, t2.b FROM t1, t2 WHERE t1.b = t2.c
> > 
> > In case the planner decides to use an ephemeral index and chooses t1 as
> > the source space, an ephemeral index with three fields will be created
> > that will contain the values from t1.b, t1.a, and rowid in that order.
> > 
> > In case the planner decides to use an ephemeral index and chooses t2 as
> > the source space, an ephemeral index with three fields will be created
> > that will contain the values from t2.c, t2.b, and rowid in that order.
> > 
> > This optimization mainly used in cases when WHERE specified and either
> > values SELECTED from more than one space or subselect is used.
> > 
> > Closes #4933
> > 
> > Co-authored-by: Mergen Imeev <imeevma at gmail.com>
> > Co-authored-by: Timur Safin <tsafin at tarantool.org>
> > 
> > diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
> > index 8b2e704cf..3755754da 100644
> > --- a/src/box/CMakeLists.txt
> > +++ b/src/box/CMakeLists.txt
> > @@ -219,7 +219,7 @@ add_library(box STATIC
> >  if(CMAKE_BUILD_TYPE STREQUAL "Debug")
> >    add_definitions(-DSQL_DEBUG=1)
> >  endif()
> > -add_definitions(-DSQL_OMIT_AUTOMATIC_INDEX=1 -DSQL_TEST=1)
> > +add_definitions(-DSQL_TEST=1)
> >  
> >  set(EXT_SRC_DIR ${CMAKE_SOURCE_DIR}/extra)
> >  set(EXT_BIN_DIR ${CMAKE_BINARY_DIR}/extra)
> > diff --git a/src/box/sql.c b/src/box/sql.c
> > index 000246ee9..a551bffc3 100644
> > --- a/src/box/sql.c
> > +++ b/src/box/sql.c
> > @@ -719,7 +719,7 @@ int
> >  tarantoolsqlIdxKeyCompare(struct BtCursor *cursor,
> >  			      struct UnpackedRecord *unpacked)
> >  {
> > -	assert(cursor->curFlags & BTCF_TaCursor);
> > +	assert(cursor->curFlags & (BTCF_TaCursor | BTCF_TEphemCursor));
> >  	assert(cursor->iter != NULL);
> >  	assert(cursor->last_tuple != NULL);
> >  
> > diff --git a/src/box/sql/delete.c b/src/box/sql/delete.c
> > index a78c68df6..62a726fdd 100644
> > --- a/src/box/sql/delete.c
> > +++ b/src/box/sql/delete.c
> > @@ -544,31 +544,3 @@ sql_generate_row_delete(struct Parse *parse, struct space *space,
> >  	sqlVdbeResolveLabel(v, label);
> >  	VdbeModuleComment((v, "END: GenRowDel()"));
> >  }
> > -
> > -int
> > -sql_generate_index_key(struct Parse *parse, struct index *index, int cursor,
> > -		       int reg_out, struct index *prev, int reg_prev)
> > -{
> > -	struct Vdbe *v = parse->pVdbe;
> > -	int col_cnt = index->def->key_def->part_count;
> > -	int reg_base = sqlGetTempRange(parse, col_cnt);
> > -	if (prev != NULL && reg_base != reg_prev)
> > -		prev = NULL;
> > -	for (int j = 0; j < col_cnt; j++) {
> > -		if (prev != NULL && prev->def->key_def->parts[j].fieldno ==
> > -				    index->def->key_def->parts[j].fieldno) {
> > -			/*
> > -			 * This column was already computed by the
> > -			 * previous index.
> > -			 */
> > -			continue;
> > -		}
> > -		uint32_t tabl_col = index->def->key_def->parts[j].fieldno;
> > -		sqlVdbeAddOp3(v, OP_Column, cursor, tabl_col, reg_base + j);
> > -	}
> > -	if (reg_out != 0)
> > -		sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt, reg_out);
> > -
> > -	sqlReleaseTempRange(parse, reg_base, col_cnt);
> > -	return reg_base;
> > -}
> > diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
> > index adf90d824..56c5ee911 100644
> > --- a/src/box/sql/sqlInt.h
> > +++ b/src/box/sql/sqlInt.h
> > @@ -3296,41 +3296,6 @@ sql_generate_row_delete(struct Parse *parse, struct space *space,
> >  			enum on_conflict_action onconf, u8 mode,
> >  			int idx_noseek);
> >  
> > -/**
> > - * Generate code that will assemble an index key and stores it in
> > - * register reg_out. The key will be for index which is an
> > - * index on table. cursor is the index of a cursor open on the
> > - * table table and pointing to the entry that needs indexing.
> > - * cursor must be the cursor of the PRIMARY KEY index.
> > - *
> > - * The prev and reg_prev parameters are used to implement a
> > - * cache to avoid unnecessary register loads.  If prev is not
> > - * NULL, then it is a pointer to a different index for which an
> > - * index key has just been computed into register reg_prev. If the
> > - * current index is generating its key into the same
> > - * sequence of registers and if prev and index share a column in
> > - * common, then the register corresponding to that column already
> > - * holds the correct value and the loading of that register is
> > - * skipped. This optimization is helpful when doing a DELETE or
> > - * an INTEGRITY_CHECK on a table with multiple indices, and
> > - * especially with the PRIMARY KEY columns of the index.
> > - *
> > - * @param parse Parsing context.
> > - * @param index The index for which to generate a key.
> > - * @param cursor Cursor number from which to take column data.
> > - * @param reg_out Put the new key into this register if not NULL.
> > - * @param prev Previously generated index key
> > - * @param reg_prev Register holding previous generated key.
> > - *
> > - * @retval Return a register number which is the first in a block
> > - * of registers that holds the elements of the index key. The
> > - * block of registers has already been deallocated by the time
> > - * this routine returns.
> > - */
> > -int
> > -sql_generate_index_key(struct Parse *parse, struct index *index, int cursor,
> > -		       int reg_out, struct index *prev, int reg_prev);
> > -
> >  /**
> >   * Generate code to do constraint checks prior to an INSERT or
> >   * an UPDATE on the given table.
> > diff --git a/src/box/sql/where.c b/src/box/sql/where.c
> > index e9e936856..f7931d11d 100644
> > --- a/src/box/sql/where.c
> > +++ b/src/box/sql/where.c
> > @@ -683,7 +683,6 @@ translateColumnToCopy(Vdbe * v,		/* The VDBE containing code to translate */
> >  	}
> >  }
> >  
> > -#ifndef SQL_OMIT_AUTOMATIC_INDEX
> >  /*
> >   * Return TRUE if the WHERE clause term pTerm is of a form where it
> >   * could be used with an index to access pSrc, assuming an appropriate
> > @@ -703,19 +702,49 @@ termCanDriveIndex(WhereTerm * pTerm,	/* WHERE clause term to check */
> >  		return 0;
> >  	if (pTerm->u.leftColumn < 0)
> >  		return 0;
> > -	enum field_type type = pSrc->pTab->def->fields[pTerm->u.leftColumn].type;
> > +	enum field_type type = pSrc->space->def->fields[pTerm->u.leftColumn].type;
> >  	enum field_type expr_type = expr_cmp_mutual_type(pTerm->pExpr);
> >  	if (!field_type1_contains_type2(expr_type, type))
> >  		return 0;
> >  	return 1;
> >  }
> > -#endif
> >  
> > -#ifndef SQL_OMIT_AUTOMATIC_INDEX
> > +/**
> > + * Generate a code that will create a tuple, which will be inserted in the
> > + * ephemeral index. The created tuple consists of rowid and fields described in
> > + * the index key description.
> > + *
> > + * @param parse Parsing context.
> > + * @param key_def The index key description.
> > + * @param cursor Cursor of space where we will get values for tuple.
> > + * @param reg_out Register to which the created tuple will be placed.
> > + * @param reg_eph Register holding pointer to ephemeral index.
> > + */
> > +static void
> > +vdbe_emit_create_ephemeral_index_tuple(struct Parse *parse,
> > +				       const struct key_def *key_def,
> > +				       int cursor, int reg_out, int reg_eph)
> > +{
> > +	assert(reg_out != 0);
> > +	struct Vdbe *v = parse->pVdbe;
> > +	int col_cnt = key_def->part_count;
> > +	int reg_base = sqlGetTempRange(parse, col_cnt + 1);
> > +	for (int j = 0; j < col_cnt; j++) {
> > +		uint32_t tabl_col = key_def->parts[j].fieldno;
> > +		sqlVdbeAddOp3(v, OP_Column, cursor, tabl_col, reg_base + j);
> > +	}
> > +	sqlVdbeAddOp2(v, OP_NextIdEphemeral, reg_eph, reg_base + col_cnt);
> > +	sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt + 1, reg_out);
> > +	sqlReleaseTempRange(parse, reg_base, col_cnt + 1);
> > +}
> > +
> >  /*
> > - * Generate code to construct the Index object for an automatic index
> > - * and to set up the WhereLevel object pLevel so that the code generator
> > - * makes use of the automatic index.
> > + * Generate code to construct the ephemeral space that contains all used in
> > + * query fields of one of the tables that participate in the query. The table is
> > + * determined by planner. This ephemeral space will be known as an "ephemeral
> > + * index". The PK definition of ephemeral index contains all of its fields.
> > + * Also, this functions set up the WhereLevel object pLevel so that the code
> > + * generator makes use of ephemeral index.
> >   */
> >  static void
> >  constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> > @@ -727,24 +756,17 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> >  	int nKeyCol;		/* Number of columns in the constructed index */
> >  	WhereTerm *pTerm;	/* A single term of the WHERE clause */
> >  	WhereTerm *pWCEnd;	/* End of pWC->a[] */
> > -	Index *pIdx;		/* Object describing the transient index */
> >  	Vdbe *v;		/* Prepared statement under construction */
> >  	int addrInit;		/* Address of the initialization bypass jump */
> > -	Table *pTable;		/* The table being indexed */
> >  	int addrTop;		/* Top of the index fill loop */
> >  	int regRecord;		/* Register holding an index record */
> >  	int n;			/* Column counter */
> >  	int i;			/* Loop counter */
> >  	int mxBitCol;		/* Maximum column in pSrc->colUsed */
> > -	struct coll *pColl;		/* Collating sequence to on a column */
> >  	WhereLoop *pLoop;	/* The Loop object */
> > -	char *zNotUsed;		/* Extra space on the end of pIdx */
> >  	Bitmask idxCols;	/* Bitmap of columns used for indexing */
> >  	Bitmask extraCols;	/* Bitmap of additional columns */
> > -	int iContinue = 0;	/* Jump here to skip excluded rows */
> >  	struct SrcList_item *pTabItem;	/* FROM clause term being indexed */
> > -	int addrCounter = 0;	/* Address where integer counter is initialized */
> > -	int regBase;		/* Array of registers where record is assembled */
> >  
> >  	/* Generate code to skip over the creation and initialization of the
> >  	 * transient index on 2nd and subsequent iterations of the loop.
> > @@ -758,7 +780,6 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> >  	 * and used to match WHERE clause constraints
> >  	 */
> >  	nKeyCol = 0;
> > -	pTable = pSrc->pTab;
> >  	pWCEnd = &pWC->a[pWC->nTerm];
> >  	pLoop = pLevel->pWLoop;
> >  	idxCols = 0;
> > @@ -770,7 +791,8 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> >  			if ((idxCols & cMask) == 0) {
> >  				if (whereLoopResize
> >  				    (pParse->db, pLoop, nKeyCol + 1)) {
> > -					goto end_auto_index_create;
> > +					pParse->is_aborted = true;
> > +					return;
> >  				}
> >  				pLoop->aLTerm[nKeyCol++] = pTerm;
> >  				idxCols |= cMask;
> > @@ -791,26 +813,27 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> >  	 * if they go out of sync.
> >  	 */
> >  	extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS - 1));
> > -	mxBitCol = MIN(BMS - 1, pTable->def->field_count);
> > -	testcase(pTable->def->field_count == BMS - 1);
> > -	testcase(pTable->def->field_count == BMS - 2);
> > +	struct space *space = pSrc->space;
> > +	mxBitCol = MIN(BMS - 1, space->def->field_count);
> >  	for (i = 0; i < mxBitCol; i++) {
> >  		if (extraCols & MASKBIT(i))
> >  			nKeyCol++;
> >  	}
> >  	if (pSrc->colUsed & MASKBIT(BMS - 1)) {
> > -		nKeyCol += pTable->def->field_count - BMS + 1;
> > +		nKeyCol += space->def->field_count - BMS + 1;
> >  	}
> >  
> > -	/* Construct the Index object to describe this index */
> > -	pIdx = sqlDbMallocZero(pParse->db, sizeof(*pIdx));
> > -	if (pIdx == 0)
> > -		goto end_auto_index_create;
> > -	pLoop->pIndex = pIdx;
> > -	pIdx->zName = "auto-index";
> > -	pIdx->pTable = pTable;
> >  	n = 0;
> >  	idxCols = 0;
> > +	size_t size;
> > +	struct key_part_def *parts = region_alloc_array(&pParse->region,
> > +							typeof(parts[0]),
> > +							nKeyCol, &size);
> > +	if (parts == NULL) {
> > +		diag_set(OutOfMemory, size, "region_alloc_array", "parts");
> > +		pParse->is_aborted = true;
> > +		return;
> > +	}
> >  	for (pTerm = pWC->a; pTerm < pWCEnd; pTerm++) {
> >  		if (termCanDriveIndex(pTerm, pSrc, notReady)) {
> >  			int iCol = pTerm->u.leftColumn;
> > @@ -819,9 +842,17 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> >  			testcase(iCol == BMS - 1);
> >  			testcase(iCol == BMS);
> >  			if ((idxCols & cMask) == 0) {
> > -				Expr *pX = pTerm->pExpr;
> >  				idxCols |= cMask;
> > -				pIdx->aiColumn[n] = pTerm->u.leftColumn;
> > +				struct field_def *field =
> > +					&space->def->fields[iCol];
> > +				struct key_part_def *part = &parts[n];
> > +				part->fieldno = iCol;
> > +				part->type = field->type;
> > +				part->nullable_action = field->nullable_action;
> > +				part->is_nullable = field->is_nullable;
> > +				part->sort_order = SORT_ORDER_ASC;
> > +				part->coll_id = field->coll_id;
> > +				part->path = NULL;
> >  				n++;
> >  			}
> >  		}
> > @@ -833,64 +864,91 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
> >  	 */
> >  	for (i = 0; i < mxBitCol; i++) {
> >  		if (extraCols & MASKBIT(i)) {
> > -			pIdx->aiColumn[n] = i;
> > +			struct field_def *field = &space->def->fields[i];
> > +			struct key_part_def *part = &parts[n];
> > +			part->fieldno = i;
> > +			part->type = field->type;
> > +			part->nullable_action = field->nullable_action;
> > +			part->is_nullable = field->is_nullable;
> > +			part->sort_order = SORT_ORDER_ASC;
> > +			part->coll_id = field->coll_id;
> > +			part->path = NULL;
> >  			n++;
> >  		}
> >  	}
> >  	if (pSrc->colUsed & MASKBIT(BMS - 1)) {
> > -		for (i = BMS - 1; i < (int)pTable->def->field_count; i++) {
> > -			pIdx->aiColumn[n] = i;
> > +		for (i = BMS - 1; i < (int)space->def->field_count; i++) {
> > +			struct field_def *field = &space->def->fields[i];
> > +			struct key_part_def *part = &parts[n];
> > +			part->fieldno = i;
> > +			part->type = field->type;
> > +			part->nullable_action = field->nullable_action;
> > +			part->is_nullable = field->is_nullable;
> > +			part->sort_order = SORT_ORDER_ASC;
> > +			part->coll_id = field->coll_id;
> > +			part->path = NULL;
> >  			n++;
> >  		}
> >  	}
> >  	assert(n == nKeyCol);
> > -	pIdx->aiColumn[n] = XN_ROWID;
> > +
> > +	struct key_def *key_def = key_def_new(parts, nKeyCol, false);
> > +	if (key_def == NULL) {
> > +		pParse->is_aborted = true;
> > +		return;
> > +	}
> > +
> > +	/* Construct the index definition to describe this index. */
> > +	struct index_opts opts;
> > +	index_opts_create(&opts);
> > +	const char *idx_name = "ephemeral index";
> > +	struct index_def *idx_def = index_def_new(space->def->id, 0, idx_name,
> > +						  strlen(idx_name), TREE, &opts,
> > +						  key_def, NULL);
> > +	key_def_delete(key_def);
> > +	if (idx_def == NULL) {
> > +		pParse->is_aborted = true;
> > +		return;
> > +	}
> > +	pLoop->index_def = idx_def;
> >  
> >  	/* Create the automatic index */
> >  	assert(pLevel->iIdxCur >= 0);
> >  	pLevel->iIdxCur = pParse->nTab++;
> > -	sqlVdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol + 1);
> > -	sql_vdbe_set_p4_key_def(pParse, pIdx->key_def);
> > -	VdbeComment((v, "for %s", pTable->def->name));
> > +	struct sql_key_info *pk_info =
> > +		sql_key_info_new_from_key_def(pParse->db, idx_def->key_def);
> > +	if (pk_info == NULL) {
> > +		pParse->is_aborted = true;
> > +		return;
> > +	}
> > +	int reg_eph = sqlGetTempReg(pParse);
> > +	sqlVdbeAddOp4(v, OP_OpenTEphemeral, reg_eph, nKeyCol + 1, 0,
> > +		      (char *)pk_info, P4_KEYINFO);
> > +	sqlVdbeAddOp3(v, OP_IteratorOpen, pLevel->iIdxCur, 0, reg_eph);
> > +	VdbeComment((v, "for %s", space->def->name));
> >  
> >  	/* Fill the automatic index with content */
> >  	sqlExprCachePush(pParse);
> >  	pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom];
> > -	if (pTabItem->fg.viaCoroutine) {
> > -		int regYield = pTabItem->regReturn;
> > -		addrCounter = sqlVdbeAddOp2(v, OP_Integer, 0, 0);
> > -		sqlVdbeAddOp3(v, OP_InitCoroutine, regYield, 0,
> > -				  pTabItem->addrFillSub);
> > -		addrTop = sqlVdbeAddOp1(v, OP_Yield, regYield);
> > -		VdbeCoverage(v);
> > -		VdbeComment((v, "next row of \"%s\"", pTabItem->pTab->zName));
> > -	} else {
> > -		addrTop = sqlVdbeAddOp1(v, OP_Rewind, pLevel->iTabCur);
> > -		VdbeCoverage(v);
> > -	}
> > +	assert(pTabItem->fg.viaCoroutine == 0);
> > +	int cursor = pLevel->iTabCur;
> > +	addrTop = sqlVdbeAddOp1(v, OP_Rewind, cursor);
> > +	VdbeCoverage(v);
> >  	regRecord = sqlGetTempReg(pParse);
> > -	regBase = sql_generate_index_key(pParse, pIdx, pLevel->iTabCur,
> > -					 regRecord, NULL, 0);
> > -	sqlVdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord);
> > -	if (pTabItem->fg.viaCoroutine) {
> > -		sqlVdbeChangeP2(v, addrCounter, regBase + n);
> > -		translateColumnToCopy(v, addrTop, pLevel->iTabCur,
> > -				      pTabItem->regResult, 1);
> > -		sqlVdbeGoto(v, addrTop);
> > -		pTabItem->fg.viaCoroutine = 0;
> > -	} else {
> > -		sqlVdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop + 1);
> > -		VdbeCoverage(v);
> > -	}
> > +	vdbe_emit_create_ephemeral_index_tuple(pParse, idx_def->key_def, cursor,
> > +					       regRecord, reg_eph);
> > +	sqlVdbeAddOp2(v, OP_IdxInsert, regRecord, reg_eph);
> > +	sqlVdbeAddOp2(v, OP_Next, cursor, addrTop + 1);
> > +	VdbeCoverage(v);
> >  	sqlVdbeChangeP5(v, SQL_STMTSTATUS_AUTOINDEX);
> >  	sqlVdbeJumpHere(v, addrTop);
> >  	sqlReleaseTempReg(pParse, regRecord);
> > +	sqlReleaseTempReg(pParse, reg_eph);
> >  	sqlExprCachePop(pParse);
> >  
> >  	/* Jump here when skipping the initialization */
> >  	sqlVdbeJumpHere(v, addrInit);
> >  }
> > -#endif				/* SQL_OMIT_AUTOMATIC_INDEX */
> >  
> >  /*
> >   * Estimate the location of a particular key among all keys in an
> > @@ -1711,7 +1769,7 @@ whereLoopInit(WhereLoop * p)
> >  static void
> >  whereLoopClearUnion(WhereLoop * p)
> >  {
> > -	if ((p->wsFlags & WHERE_AUTO_INDEX) != 0) {
> > +	if ((p->wsFlags & WHERE_AUTO_INDEX) != 0 && p->index_def != NULL) {
> >  		index_def_delete(p->index_def);
> >  		p->index_def = NULL;
> >  	}
> > @@ -2807,16 +2865,20 @@ tnt_error:
> >  		probe = fake_index;
> >  	}
> >  
> > -#ifndef SQL_OMIT_AUTOMATIC_INDEX
> >  	/* Automatic indexes */
> > -	LogEst rSize = pTab->nRowLogEst;
> > +	rSize = DEFAULT_TUPLE_LOG_COUNT;
> > +	/*
> > +	 * Increase cost of ephemeral index if number of tuples in space is less
> > +	 * then 10240.
> > +	 */
> > +	if (!space->def->opts.is_view && sql_space_tuple_log_count(space) < 133)
> > +		rSize += DEFAULT_TUPLE_LOG_COUNT;
> >  	LogEst rLogSize = estLog(rSize);
> >  	if (!pBuilder->pOrSet && /* Not pqart of an OR optimization */
> >  	    (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0 &&
> >  	    (pWInfo->pParse->sql_flags & SQL_AutoIndex) != 0 &&
> >  	    pSrc->pIBIndex == 0	/* Has no INDEXED BY clause */
> >  	    && !pSrc->fg.notIndexed	/* Has no NOT INDEXED clause */
> > -	    && HasRowid(pTab)	/* Not WITHOUT ROWID table. (FIXME: Why not?) */
> >  	    &&!pSrc->fg.isCorrelated	/* Not a correlated subquery */
> >  	    && !pSrc->fg.isRecursive	/* Not a recursive common table expression. */
> >  	    ) {
> > @@ -2829,24 +2891,15 @@ tnt_error:
> >  			if (termCanDriveIndex(pTerm, pSrc, 0)) {
> >  				pNew->nEq = 1;
> >  				pNew->nSkip = 0;
> > -				pNew->pIndex = 0;
> > +				pNew->index_def = NULL;
> >  				pNew->nLTerm = 1;
> >  				pNew->aLTerm[0] = pTerm;
> > -				/* TUNING: One-time cost for computing the automatic index is
> > -				 * estimated to be X*N*log2(N) where N is the number of rows in
> > -				 * the table being indexed and where X is 7 (LogEst=28) for normal
> > -				 * tables or 1.375 (LogEst=4) for views and subqueries.  The value
> > -				 * of X is smaller for views and subqueries so that the query planner
> > -				 * will be more aggressive about generating automatic indexes for
> > -				 * those objects, since there is no opportunity to add schema
> > -				 * indexes on subqueries and views.
> > +				/*
> > +				 * TODO: At the moment we have decided to use
> > +				 * this formula, but it is quite aggressive and
> > +				 * needs tuning.
> >  				 */
> > -				pNew->rSetup = rLogSize + rSize + 4;
> > -				if (!pTab->def->opts.is_view &&
> > -				    pTab->def->id == 0)
> > -					pNew->rSetup += 24;
> > -				if (pNew->rSetup < 0)
> > -					pNew->rSetup = 0;
> > +				pNew->rSetup = rLogSize + rSize;
> >  				/* TUNING: Each index lookup yields 20 rows in the table.  This
> >  				 * is more than the usual guess of 10 rows, since we have no way
> >  				 * of knowing how selective the index will ultimately be.  It would
> > @@ -2862,7 +2915,6 @@ tnt_error:
> >  			}
> >  		}
> >  	}
> > -#endif				/* SQL_OMIT_AUTOMATIC_INDEX */
> >  	/*
> >  	 * If there was an INDEXED BY clause, then only that one
> >  	 * index is considered.
> > @@ -4630,7 +4682,6 @@ sqlWhereBegin(Parse * pParse,	/* The parser context */
> >  	notReady = ~(Bitmask) 0;
> >  	for (ii = 0; ii < nTabList; ii++) {
> >  		pLevel = &pWInfo->a[ii];
> > -#ifndef SQL_OMIT_AUTOMATIC_INDEX
> >  		if ((pLevel->pWLoop->wsFlags & WHERE_AUTO_INDEX) != 0) {
> >  			constructAutomaticIndex(pParse, &pWInfo->sWC,
> >  						&pTabList->a[pLevel->iFrom],
> > @@ -4638,7 +4689,6 @@ sqlWhereBegin(Parse * pParse,	/* The parser context */
> >  			if (db->mallocFailed)
> >  				goto whereBeginError;
> >  		}
> > -#endif
> >  		sqlWhereExplainOneScan(pParse, pTabList, pLevel, ii,
> >  					       pLevel->iFrom, wctrlFlags);
> >  		pLevel->addrBody = sqlVdbeCurrentAddr(v);
> > @@ -4794,18 +4844,35 @@ sqlWhereEnd(WhereInfo * pWInfo)
> >  			for (; k < last; k++, pOp++) {
> >  				if (pOp->p1 != pLevel->iTabCur)
> >  					continue;
> > -				if (pOp->opcode == OP_Column) {
> > -					int x = pOp->p2;
> > -					assert(def == NULL ||
> > -					       def->space_id ==
> > -					       pTabItem->space->def->id);
> > -					if (x >= 0) {
> > -						pOp->p2 = x;
> > -						pOp->p1 = pLevel->iIdxCur;
> > -					}
> > -					assert((pLoop->
> > -						wsFlags & WHERE_IDX_ONLY) == 0
> > -					       || x >= 0);
> > +				if (pOp->opcode != OP_Column)
> > +					continue;
> > +				assert(def == NULL || def->space_id ==
> > +						      pTabItem->space->def->id);
> > +				int x = pOp->p2;
> > +				assert(x >= 0);
> > +				pOp->p1 = pLevel->iIdxCur;
> > +				if ((pLoop->wsFlags & WHERE_AUTO_INDEX) == 0) {
> > +					pOp->p2 = x;
> > +					continue;
> > +				}
> > +				/*
> > +				 * In case we are using ephemeral index, the
> > +				 * space that will be used to get the values
> > +				 * will be ephemeral index. Since the opcode
> > +				 * OP_Column uses the position of the fields
> > +				 * according to the original space, and the
> > +				 * fields may be in other positions in the
> > +				 * ephemeral index, we must correct the P2 of
> > +				 * OP_Column. To get the positions of these
> > +				 * fields in ephemeral index, we use the index
> > +				 * definition we created.
> > +				 */
> > +				struct key_def *key_def =
> > +					pLevel->pWLoop->index_def->key_def;
> > +				uint32_t part_count = key_def->part_count;
> > +				for (uint32_t i = 0; i < part_count; ++i) {
> > +					if ((int)key_def->parts[i].fieldno == x)
> > +						pOp->p2 = i;
> >  				}
> >  			}
> >  		}
> > diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c
> > index 6d8768865..96bcab110 100644
> > --- a/src/box/sql/wherecode.c
> > +++ b/src/box/sql/wherecode.c
> > @@ -219,12 +219,12 @@ sqlWhereExplainOneScan(Parse * pParse,	/* Parse context */
> >  
> >  			assert(!(flags & WHERE_AUTO_INDEX)
> >  			       || (flags & WHERE_IDX_ONLY));
> > -			if (idx_def->iid == 0) {
> > +			if ((flags & WHERE_AUTO_INDEX) != 0) {
> > +				zFmt = "EPHEMERAL INDEX";
> > +			} else if (idx_def->iid == 0) {
> >  				if (isSearch) {
> >  					zFmt = "PRIMARY KEY";
> >  				}
> > -			} else if (flags & WHERE_AUTO_INDEX) {
> > -				zFmt = "AUTOMATIC COVERING INDEX";
> >  			} else if (flags & WHERE_IDX_ONLY) {
> >  				zFmt = "COVERING INDEX %s";
> >  			} else {
> > @@ -807,7 +807,8 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,	/* Complete information about the W
> >  	    notReady & ~sqlWhereGetMask(&pWInfo->sMaskSet, iCur);
> >  	bRev = (pWInfo->revMask >> iLevel) & 1;
> >  	omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY) != 0
> > -	    && (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0;
> > +	    && ((pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0 ||
> > +		(pLoop->wsFlags & WHERE_AUTO_INDEX) != 0);
> >  	VdbeModuleComment((v, "Begin WHERE-loop%d: %s", iLevel,
> >  			   pTabItem->pTab->zName));
> >  
> > @@ -1047,8 +1048,6 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,	/* Complete information about the W
> >  			startEq = 0;
> >  			start_constraints = 1;
> >  		}
> > -		struct index_def *idx_pk = space->index[0]->def;
> > -		uint32_t pk_part_count = idx_pk->key_def->part_count;
> >  		/*
> >  		 * Tarantool's iterator over integer fields doesn't
> >  		 * tolerate floating point values. Hence, if term
> > @@ -1234,6 +1233,8 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,	/* Complete information about the W
> >  		if (omitTable) {
> >  			/* pIdx is a covering index.  No need to access the main table. */
> >  		}  else if (iCur != iIdxCur) {
> > +			struct index_def *idx_pk = space->index[0]->def;
> > +			uint32_t pk_part_count = idx_pk->key_def->part_count;
> >  			int iKeyReg = sqlGetTempRange(pParse, pk_part_count);
> >  			for (j = 0; j < (int) pk_part_count; j++) {
> >  				k = idx_pk->key_def->parts[j].fieldno;
> > diff --git a/test/sql-tap/autoindex1.test.lua b/test/sql-tap/autoindex1.test.lua
> > new file mode 100755
> > index 000000000..5520289b1
> > --- /dev/null
> > +++ b/test/sql-tap/autoindex1.test.lua
> > @@ -0,0 +1,114 @@
> > +#!/usr/bin/env tarantool
> > +test = require("sqltester")
> > +test:plan(7)
> > +
> > +--
> > +-- This file implements regression tests for sql library. The focus of this
> > +-- script is testing ephemeral index creation logic.
> > +--
> > +
> > +-- Make sure that ephemeral index is used in all cases below.
> > +test:execsql([[
> > +    CREATE TABLE t1(a INT, b INT PRIMARY KEY);
> > +    INSERT INTO t1 VALUES(1, 11);
> > +    INSERT INTO t1 VALUES(2, 22);
> > +    INSERT INTO t1 SELECT a + 2, b + 22 FROM t1;
> > +    INSERT INTO t1 SELECT a + 4, b + 44 FROM t1;
> > +    CREATE TABLE t2(c INT, d INT PRIMARY KEY);
> > +]])
> > +
> > +for i = 1, 10240 do test:execsql("INSERT INTO t2 VALUES ("..i..", "..i..");") end
> > +
> > +test:do_execsql_test(
> > +    "autoindex-1.1", [[
> > +        EXPLAIN QUERY PLAN SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;
> > +    ]], {
> > +        0,0,0,"SCAN TABLE T1 (~1048576 rows)",
> > +        0,0,0,"EXECUTE CORRELATED SCALAR SUBQUERY 1",
> > +        1,0,0,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"
> > +    })
> > +
> > +result = test:execsql([[SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;]])
> > +
> > +test:do_execsql_test(
> > +    "autoindex-1.2", [[
> > +        EXPLAIN QUERY PLAN SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b;
> > +    ]], {
> > +        0,0,0,"SCAN TABLE T1 (~1048576 rows)",
> > +        0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"
> > +    })
> > +
> > +test:do_execsql_test(
> > +    "autoindex-1.3", [[
> > +        SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b;
> > +    ]], result)
> > +
> > +test:do_execsql_test(
> > +    "autoindex-1.4", [[
> > +        EXPLAIN QUERY PLAN SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a);
> > +    ]], {
> > +        0,0,0,"SCAN TABLE T1 (~1048576 rows)",
> > +        0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"
> > +    })
> > +
> > +test:do_execsql_test(
> > +    "autoindex-1.5", [[
> > +        SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a);
> > +    ]], result)
> > +
> > +test:execsql([[
> > +    CREATE TABLE t3(i INT PRIMARY KEY, a INT, b INT);
> > +]])
> > +
> > +--
> > +-- This query is quite slow in case ephemeral index is not used. The main idea
> > +-- behind this test is that ephemeral index allows to use index instead of
> > +-- fullscan in cases below. In this test construction of the index is faster
> > +-- that fullscan.
> > +--
> > +for i = 1, 10240 do test:execsql("INSERT INTO t3 VALUES ("..i..", "..i..", "..(i + 1)..");") end
> > +
> > +test:do_execsql_test(
> > +    "autoindex-1.6", [[
> > +        SELECT count(*)
> > +          FROM t3 AS x1
> > +          JOIN t3 AS x2 ON x2.a=x1.b
> > +          JOIN t3 AS x3 ON x3.a=x2.b
> > +          JOIN t3 AS x4 ON x4.a=x3.b
> > +          JOIN t3 AS x5 ON x5.a=x4.b
> > +          JOIN t3 AS x6 ON x6.a=x5.b
> > +          JOIN t3 AS x7 ON x7.a=x6.b
> > +          JOIN t3 AS x8 ON x8.a=x7.b
> > +          JOIN t3 AS x9 ON x9.a=x8.b
> > +          JOIN t3 AS x10 ON x10.a=x9.b;
> > +    ]], {
> > +        10231
> > +    })
> > +
> > +test:do_execsql_test(
> > +    "autoindex-1.7", [[
> > +        EXPLAIN QUERY PLAN SELECT count(*)
> > +          FROM t3 AS x1
> > +          JOIN t3 AS x2 ON x2.a=x1.b
> > +          JOIN t3 AS x3 ON x3.a=x2.b
> > +          JOIN t3 AS x4 ON x4.a=x3.b
> > +          JOIN t3 AS x5 ON x5.a=x4.b
> > +          JOIN t3 AS x6 ON x6.a=x5.b
> > +          JOIN t3 AS x7 ON x7.a=x6.b
> > +          JOIN t3 AS x8 ON x8.a=x7.b
> > +          JOIN t3 AS x9 ON x9.a=x8.b
> > +          JOIN t3 AS x10 ON x10.a=x9.b;
> > +    ]], {
> > +        0,0,0,"SCAN TABLE T3 AS X1 (~1048576 rows)",
> > +        0,1,1,"SEARCH TABLE T3 AS X2 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,2,2,"SEARCH TABLE T3 AS X3 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,3,3,"SEARCH TABLE T3 AS X4 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,4,4,"SEARCH TABLE T3 AS X5 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,5,5,"SEARCH TABLE T3 AS X6 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,6,6,"SEARCH TABLE T3 AS X7 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,7,7,"SEARCH TABLE T3 AS X8 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,8,8,"SEARCH TABLE T3 AS X9 USING EPHEMERAL INDEX (A=?) (~20 rows)",
> > +        0,9,9,"SEARCH TABLE T3 AS X10 USING EPHEMERAL INDEX (A=?) (~20 rows)"
> > +    })
> > +
> > +test:finish_test()
> > diff --git a/test/sql-tap/autoindex4.test.lua b/test/sql-tap/autoindex4.test.lua
> > index 46488f13a..e22f49c67 100755
> > --- a/test/sql-tap/autoindex4.test.lua
> > +++ b/test/sql-tap/autoindex4.test.lua
> > @@ -1,6 +1,6 @@
> >  #!/usr/bin/env tarantool
> >  test = require("sqltester")
> > -test:plan(7)
> > +test:plan(8)
> >  
> >  --!./tcltestrunner.lua
> >  -- 2014-10-24
> > @@ -75,6 +75,18 @@ test:do_execsql_test(
> >          -- </autoindex4-1.4>
> >      })
> >  
> > +test:do_execsql_test(
> > +    "autoindex4-2.0",
> > +    [[
> > +        CREATE TABLE t3(i INT PRIMARY KEY, e INT, f INT);
> > +        INSERT INTO t3 VALUES(1, 123,654), (2, 555,444), (3, 234,987);
> > +        SELECT (SELECT count(*) FROM t1, t2 WHERE a=e AND x=f), e, f, '|' FROM t3 ORDER BY i;
> > +    ]], {
> > +        -- <autoindex4-2.0>
> > +        1, 123, 654, "|", 0, 555, 444, "|", 4, 234, 987, "|"
> > +        -- </autoindex4-2.0>
> > +    })
> > +
> >  -- do_execsql_test autoindex4-2.0 {
> >  --   CREATE TABLE t3(e INT,f INT);
> >  --   INSERT INTO t3 VALUES(123,654),(555,444),(234,987);
> > diff --git a/test/sql-tap/whereF.test.lua b/test/sql-tap/whereF.test.lua
> > index 5a894b748..3235df437 100755
> > --- a/test/sql-tap/whereF.test.lua
> > +++ b/test/sql-tap/whereF.test.lua
> > @@ -90,10 +90,20 @@ test:do_execsql_test(
> >  
> >  -- for _ in X(0, "X!foreach", [=[["tn sql","\n  1 \"SELECT * FROM t1,           t2 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n  2 \"SELECT * FROM t2,           t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n  3 \"SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n"]]=]) do
> >  for tn, sql in ipairs({"SELECT * FROM t1,           t2 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e",
> > -                       "SELECT * FROM t2,           t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e",
> > -                       "SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do
> > +                       "SELECT * FROM t2,           t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do
> >      test:do_test(
> > -        "2."..tn,
> > +        "2.1."..tn,
> > +        function()
> > +            return test:execsql("EXPLAIN QUERY PLAN "..sql)
> > +        end, {
> > +            '/SEARCH TABLE T1/',
> > +            '/SEARCH TABLE T2/'
> > +        })
> > +end
> > +
> > +for tn, sql in ipairs({"SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do
> > +    test:do_test(
> > +        "2.2."..tn,
> >          function()
> >              return test:execsql("EXPLAIN QUERY PLAN "..sql)
> >          end, {
> > diff --git a/test/sql/misc.result b/test/sql/misc.result
> > index 6af11bfba..df2fdde81 100644
> > --- a/test/sql/misc.result
> > +++ b/test/sql/misc.result
> > @@ -204,3 +204,33 @@ box.execute('SELECT field66, field68, field70 FROM test')
> >  box.space.TEST:drop()
> >  ---
> >  ...
> > +-- gh-4933: Make sure that autoindex optimization is used.
> > +box.execute('CREATE TABLE t1(i INT PRIMARY KEY, a INT);')
> > +---
> > +- row_count: 1
> > +...
> > +box.execute('CREATE TABLE t2(i INT PRIMARY KEY, b INT);')
> > +---
> > +- row_count: 1
> > +...
> > +for i = 1, 10240 do\
> > +	box.execute('INSERT INTO t1 VALUES ($1, $1);', {i})\
> > +	box.execute('INSERT INTO t2 VALUES ($1, $1);', {i})\
> > +end
> > +---
> > +...
> > +box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;')
> > +---
> > +- metadata:
> > +  - name: selectid
> > +    type: integer
> > +  - name: order
> > +    type: integer
> > +  - name: from
> > +    type: integer
> > +  - name: detail
> > +    type: text
> > +  rows:
> > +  - [0, 0, 0, 'SCAN TABLE T1 (~1048576 rows)']
> > +  - [0, 1, 1, 'SEARCH TABLE T2 USING EPHEMERAL INDEX (B=?) (~20 rows)']
> > +...
> > diff --git a/test/sql/misc.test.lua b/test/sql/misc.test.lua
> > index 63244228c..44945eace 100644
> > --- a/test/sql/misc.test.lua
> > +++ b/test/sql/misc.test.lua
> > @@ -64,3 +64,12 @@ box.execute('SELECT field70, field64 FROM test')
> >  pk:alter({parts = {66}})
> >  box.execute('SELECT field66, field68, field70 FROM test')
> >  box.space.TEST:drop()
> > +
> > +-- gh-4933: Make sure that autoindex optimization is used.
> > +box.execute('CREATE TABLE t1(i INT PRIMARY KEY, a INT);')
> > +box.execute('CREATE TABLE t2(i INT PRIMARY KEY, b INT);')
> > +for i = 1, 10240 do\
> > +	box.execute('INSERT INTO t1 VALUES ($1, $1);', {i})\
> > +	box.execute('INSERT INTO t2 VALUES ($1, $1);', {i})\
> > +end
> > +box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;')


More information about the Tarantool-patches mailing list