From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp46.i.mail.ru (smtp46.i.mail.ru [94.100.177.106]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id DB765469719 for ; Tue, 20 Oct 2020 17:41:18 +0300 (MSK) Date: Tue, 20 Oct 2020 14:41:16 +0000 From: Nikita Pettik Message-ID: <20201020144116.GB23945@tarantool.org> References: <5c615731b1bd3e6d279dfb0cbc92823dea993a03.1602413265.git.imeevma@gmail.com> <20201013140833.GA18852@tarantool.org> <20201014093019.GA360354@tarantool.org> <20201015014056.GB25221@tarantool.org> <20201016002216.GA109977@tarantool.org> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Disposition: inline Content-Transfer-Encoding: 8bit In-Reply-To: <20201016002216.GA109977@tarantool.org> Subject: Re: [Tarantool-patches] [PATCH v3 1/1] sql: enable autoindex optimization List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Mergen Imeev Cc: Mergen Imeev , tarantool-patches@dev.tarantool.org On 16 Oct 03:22, Mergen Imeev wrote: > Привет! Спасибо за ревью. Мои ответы, изменения и новый патч ниже. I've extended commit message and added fixes below. Also dropped second commit from branch (remove OP_Realify) since it was extracted to a separate branch. Now Release build is working; branch is rebased on master. Alexander, could we process with this patch and push it to the master? Current CI statuses (not green but problems seem to be unrelated with SQL): https://travis-ci.org/github/tarantool/tarantool/builds/737420850 https://gitlab.com/tarantool/tarantool/-/pipelines/205228223 diff --git a/src/box/sql/where.c b/src/box/sql/where.c index f7931d11d..582cc48ba 100644 --- a/src/box/sql/where.c +++ b/src/box/sql/where.c @@ -710,20 +710,20 @@ termCanDriveIndex(WhereTerm * pTerm, /* WHERE clause term to check */ } /** - * Generate a code that will create a tuple, which will be inserted in the - * ephemeral index. The created tuple consists of rowid and fields described in - * the index key description. + * Generate a code that will create a tuple, which is supposed to be inserted + * in the ephemeral index space. The created tuple consists of rowid and + * fields described in the index key description. * * @param parse Parsing context. * @param key_def The index key description. - * @param cursor Cursor of space where we will get values for tuple. - * @param reg_out Register to which the created tuple will be placed. + * @param cursor Cursor of source space from which values for tuple are fetched. + * @param reg_out Register to contain the created tuple. * @param reg_eph Register holding pointer to ephemeral index. */ static void -vdbe_emit_create_ephemeral_index_tuple(struct Parse *parse, - const struct key_def *key_def, - int cursor, int reg_out, int reg_eph) +vdbe_emit_ephemeral_index_tuple(struct Parse *parse, + const struct key_def *key_def, int cursor, + int reg_out, int reg_eph) { assert(reg_out != 0); struct Vdbe *v = parse->pVdbe; @@ -740,11 +740,11 @@ vdbe_emit_create_ephemeral_index_tuple(struct Parse *parse, /* * Generate code to construct the ephemeral space that contains all used in - * query fields of one of the tables that participate in the query. The table is - * determined by planner. This ephemeral space will be known as an "ephemeral - * index". The PK definition of ephemeral index contains all of its fields. - * Also, this functions set up the WhereLevel object pLevel so that the code - * generator makes use of ephemeral index. + * query fields of one of the tables that participate in the query. The source + * table is determined by query planner. This ephemeral space will be known as + * an "ephemeral index". The PK definition of ephemeral index contains all of + * its fields. Also, this functions set up the WhereLevel object pLevel so + * that the code generator makes use of ephemeral index. */ static void constructAutomaticIndex(Parse * pParse, /* The parsing context */ @@ -766,7 +766,6 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */ WhereLoop *pLoop; /* The Loop object */ Bitmask idxCols; /* Bitmap of columns used for indexing */ Bitmask extraCols; /* Bitmap of additional columns */ - struct SrcList_item *pTabItem; /* FROM clause term being indexed */ /* Generate code to skip over the creation and initialization of the * transient index on 2nd and subsequent iterations of the loop. @@ -929,14 +928,13 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */ /* Fill the automatic index with content */ sqlExprCachePush(pParse); - pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom]; - assert(pTabItem->fg.viaCoroutine == 0); + assert(pWC->pWInfo->pTabList->a[pLevel->iFrom].fg.viaCoroutine == 0); int cursor = pLevel->iTabCur; addrTop = sqlVdbeAddOp1(v, OP_Rewind, cursor); VdbeCoverage(v); regRecord = sqlGetTempReg(pParse); - vdbe_emit_create_ephemeral_index_tuple(pParse, idx_def->key_def, cursor, - regRecord, reg_eph); + vdbe_emit_ephemeral_index_tuple(pParse, idx_def->key_def, cursor, + regRecord, reg_eph); sqlVdbeAddOp2(v, OP_IdxInsert, regRecord, reg_eph); sqlVdbeAddOp2(v, OP_Next, cursor, addrTop + 1); VdbeCoverage(v); diff --git a/test/sql-tap/autoindex1.test.lua b/test/sql-tap/autoindex1.test.lua index 5520289b1..94b824331 100755 --- a/test/sql-tap/autoindex1.test.lua +++ b/test/sql-tap/autoindex1.test.lua @@ -1,13 +1,12 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(7) +test:plan(8) -- -- This file implements regression tests for sql library. The focus of this -- script is testing ephemeral index creation logic. -- --- Make sure that ephemeral index is used in all cases below. test:execsql([[ CREATE TABLE t1(a INT, b INT PRIMARY KEY); INSERT INTO t1 VALUES(1, 11); @@ -17,25 +16,34 @@ test:execsql([[ CREATE TABLE t2(c INT, d INT PRIMARY KEY); ]]) +test:do_eqp_test( + "autoindex-1.0", [[ + SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1; + ]], { + {0,0,0,"SCAN TABLE T1 (~1048576 rows)"}, + {0,0,0,"EXECUTE CORRELATED SCALAR SUBQUERY 1"}, + {1,0,0,"SCAN TABLE T2 (~262144 rows)"} + }) + for i = 1, 10240 do test:execsql("INSERT INTO t2 VALUES ("..i..", "..i..");") end -test:do_execsql_test( +test:do_eqp_test( "autoindex-1.1", [[ - EXPLAIN QUERY PLAN SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1; + SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1; ]], { - 0,0,0,"SCAN TABLE T1 (~1048576 rows)", - 0,0,0,"EXECUTE CORRELATED SCALAR SUBQUERY 1", - 1,0,0,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)" + {0,0,0,"SCAN TABLE T1 (~1048576 rows)"}, + {0,0,0,"EXECUTE CORRELATED SCALAR SUBQUERY 1"}, + {1,0,0,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"} }) result = test:execsql([[SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;]]) -test:do_execsql_test( +test:do_eqp_test( "autoindex-1.2", [[ - EXPLAIN QUERY PLAN SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b; + SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b; ]], { - 0,0,0,"SCAN TABLE T1 (~1048576 rows)", - 0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)" + {0,0,0,"SCAN TABLE T1 (~1048576 rows)"}, + {0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"} }) test:do_execsql_test( @@ -43,12 +51,12 @@ test:do_execsql_test( SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b; ]], result) -test:do_execsql_test( +test:do_eqp_test( "autoindex-1.4", [[ - EXPLAIN QUERY PLAN SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a); + SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a); ]], { - 0,0,0,"SCAN TABLE T1 (~1048576 rows)", - 0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)" + {0,0,0,"SCAN TABLE T1 (~1048576 rows)"}, + {0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)"} }) test:do_execsql_test( @@ -85,9 +93,9 @@ test:do_execsql_test( 10231 }) -test:do_execsql_test( +test:do_eqp_test( "autoindex-1.7", [[ - EXPLAIN QUERY PLAN SELECT count(*) + SELECT count(*) FROM t3 AS x1 JOIN t3 AS x2 ON x2.a=x1.b JOIN t3 AS x3 ON x3.a=x2.b @@ -99,16 +107,16 @@ test:do_execsql_test( JOIN t3 AS x9 ON x9.a=x8.b JOIN t3 AS x10 ON x10.a=x9.b; ]], { - 0,0,0,"SCAN TABLE T3 AS X1 (~1048576 rows)", - 0,1,1,"SEARCH TABLE T3 AS X2 USING EPHEMERAL INDEX (A=?) (~20 rows)", - 0,2,2,"SEARCH TABLE T3 AS X3 USING EPHEMERAL INDEX (A=?) (~20 rows)", - 0,3,3,"SEARCH TABLE T3 AS X4 USING EPHEMERAL INDEX (A=?) (~20 rows)", - 0,4,4,"SEARCH TABLE T3 AS X5 USING EPHEMERAL INDEX (A=?) (~20 rows)", - 0,5,5,"SEARCH TABLE T3 AS X6 USING EPHEMERAL INDEX (A=?) (~20 rows)", - 0,6,6,"SEARCH TABLE T3 AS X7 USING EPHEMERAL INDEX (A=?) (~20 rows)", - 0,7,7,"SEARCH TABLE T3 AS X8 USING EPHEMERAL INDEX (A=?) (~20 rows)", - 0,8,8,"SEARCH TABLE T3 AS X9 USING EPHEMERAL INDEX (A=?) (~20 rows)", - 0,9,9,"SEARCH TABLE T3 AS X10 USING EPHEMERAL INDEX (A=?) (~20 rows)" + {0,0,0,"SCAN TABLE T3 AS X1 (~1048576 rows)"}, + {0,1,1,"SEARCH TABLE T3 AS X2 USING EPHEMERAL INDEX (A=?) (~20 rows)"}, + {0,2,2,"SEARCH TABLE T3 AS X3 USING EPHEMERAL INDEX (A=?) (~20 rows)"}, + {0,3,3,"SEARCH TABLE T3 AS X4 USING EPHEMERAL INDEX (A=?) (~20 rows)"}, + {0,4,4,"SEARCH TABLE T3 AS X5 USING EPHEMERAL INDEX (A=?) (~20 rows)"}, + {0,5,5,"SEARCH TABLE T3 AS X6 USING EPHEMERAL INDEX (A=?) (~20 rows)"}, + {0,6,6,"SEARCH TABLE T3 AS X7 USING EPHEMERAL INDEX (A=?) (~20 rows)"}, + {0,7,7,"SEARCH TABLE T3 AS X8 USING EPHEMERAL INDEX (A=?) (~20 rows)"}, + {0,8,8,"SEARCH TABLE T3 AS X9 USING EPHEMERAL INDEX (A=?) (~20 rows)"}, + {0,9,9,"SEARCH TABLE T3 AS X10 USING EPHEMERAL INDEX (A=?) (~20 rows)"} }) > On Thu, Oct 15, 2020 at 01:40:56AM +0000, Nikita Pettik wrote: > > On 14 Oct 12:30, Mergen Imeev wrote: > > > Привет! Спасибо за ревью. Мои ответы и изменения ниже. > > > > > > On Tue, Oct 13, 2020 at 02:08:34PM +0000, Nikita Pettik wrote: > > > > On 11 Oct 13:48, imeevma@tarantool.org wrote: > > > > > From: Kirill Yukhin > > > > > > > > > > This patch enables "autoindex" optimization. This optimization uses an > > > > > ephemeral space that contains all the fields used in the query. This > > > > > > > > Не очень понимаю, что значит "все поля в запросе"..? Это как?:) > > > > Ты имеешь ввиду все поля result-set'a? Или объединение всех полей > > > > всех таблиц используемых в запросе? > > > > > > > Я думаю лучше будет показать на примере: > > > CREATE TABLE t1 (i INT PRIMARY KEY, a INT, b INT, c INT); > > > CREATE TABLE t2 (i INT PRIMARY KEY, a INT, b INT, c INT); > > > ... > > > SELECT t1.a, t2.b FROM t1, t2 WHERE t1.b = t2.c > > > > > > В данном случае планировщик выберет один из спейсов, для которого он сделает > > > автоиндекс. В случае, если он выберет t1, то автоиндекс будет состоять из > > > трех полей: rowid, b, a (в таком порядке). Если он выберет t2, то автоиндекс > > > будет так же состоять из трех полей: rowid, c, b. Т.е. в любом случае автоиндекс > > > будет содержать не только поля спейса которые участвуют в WHERE, но и поля > > > который участвуют в возвращаемом значении, даже если они не участвуют в WHERE. > > > Это сделано для того, чтобы не обращаться к оригинальному спейсу в случае > > > использования автоиндекс и пользоваться самим автоиндексом вместо оригинального > > > спейса. > > > > Ок, вот это было бы здорово сразу в коммит месседже описать. > > > Добавил. > > > > > > ephemeral space is called "autoindex". Autoindex may have fewer fields > > > > > than the original space, and the order of these fields in autoindex may > > > > > > > > Опять: original space - это какой спейс? В запросе может участвовать > > > > много таблиц.. > > > > > > > Один из спейсов участвующих в запросе. Какой именно выбирает планировщик. > > > > ditto > > > > > > Еще имхо какая-то путаница выходит: называем эфемерные спейсы автоиндексами. > > > > Я бы обозвал это как-нибудь по-другому. Это же просто еще одно использование > > > > эфемерных таблиц, не более. > > > > > > > Мне кажется это не совсем верно. Обычно использование эфемерных спейсов является > > > необходимостью а не оптимизацией. > > > > Так, и? > > > Какого ответа или действить ты ждешь на этот вопрос? > > > > Насчет того что это спейс а не индекс я > > > согласен, но в некотором смысле он ведет себя как индекс. > > > > Это как? > > > Как я и сказал выше, один такой случай описан ниже. Я имею ввиду случай с > представлениями (которые не имеют собственных индексов). > > > > Т.е. SQL считает, что > > > мы создали индекс. Один из случаев когда это важно упомянут ниже. > > > > Короче, предлагаю называть это хотя бы ephemeral index opt, > > autoindex space opt или что-нибудь в это духе (чтобы не путать со > > скулайтовской терминологией). > > > Назвал ephemeral index. > > > > > > be different from their order in the original space. In addition to the > > > > > mentioned fields, autoindex contains a rowid. > > > > > > > > > > The primary key of autoindex is a covering index, i.e. it contains all > > > > > of its fields. > > > > > > > > В мемтиксе все индексы покрывающие. И 'covering' - свойство для каждого > > > > запроса отдельно взятого, а не индекса как такового. > > > > > > > В данном случае 'covering' означает что определение содержит все поля спейса. > > > В предыдущем ревью я предложил разобраться с этим вопросом чуть позже, т.к. > > > считаю, что здесь это не очень важно. > > > > У этого термина есть общепринятое определение. Не понимаю в чем тут разбираться, > > если он используется не по назначению.. > > > Убрал этот обзац из коммит-месседжа. > > > > > > > > > > > Currently this optimization mainly used in cases when values SELECTED > > > > > from more than one space and WHERE specified. > > > > > > > > > > At the moment, this optimization does not take into account the number > > > > > of tuples in spaces. This should be fixed. > > > > > > > > > > Closes #4933 > > > > > > > > > > Co-authored-by: Mergen Imeev > > > > > Co-authored-by: Timur Safin > > > > > --- > > > > > https://github.com/tarantool/tarantool/issues/4933 > > > > > https://github.com/tarantool/tarantool/tree/imeevma/gh-4933-autoindex > > > > > > > > > > @ChangeLog > > > > > - "autoindex" optimization is now enabled (gh-4933). > > > > > > > > МНе кажется, можно чуть больше написать про такую важную штуку > > > > > > > Мне кажется это не настолько важная оптимизация > > > > Ну-ну > > > Опять, какие действия или ответ ты ждешь на это замечание? > > > > что ее стоит отдельно много > > > описывать. Круг ее применения довольно ограничен -- в основном джойны и > > > сабселекты с подходящим WHERE. > > > > > > > diff --git a/src/box/sql/where.c b/src/box/sql/where.c > > > > > index e9e936856..6ab2be3ba 100644 > > > > > --- a/src/box/sql/where.c > > > > > +++ b/src/box/sql/where.c > > > > > @@ -683,7 +683,6 @@ translateColumnToCopy(Vdbe * v, /* The VDBE containing code to translate */ > > > > > } > > > > > +/** > > > > > + * Generate a code that will create a tuple, which will be inserted in the > > > > > + * autoindex. The created tuple consists of rowid and fields described in the > > > > > + * index key description. > > > > > + * > > > > > + * @param parse Parsing context. > > > > > + * @param key_def The index key description. > > > > > + * @param cursor Cursor of space where we will get values for tuple. > > > > > + * @param reg_out Register to which the created tuple will be placed. > > > > > + * @param reg_eph Register holding pointer to autoindex. > > > > > + * > > > > > + * @return Return a register number which is the first in a block of registers > > > > > + * that holds the elements of the created tuple. The block of registers has > > > > > + * already been deallocated by the time this routine returns. > > > > > > > > Не очень понял, зачем возвращать уже деаллоцированные регистры.. > > > > > > > Это значение используется в ветке if, которую я воспроизвести не смог. Насколько > > > я понимаю это нужно для подсчета количества используемых регистров в случае > > > использованию корутины. Если это так, это может быть необходимо для > > > предварительного вычисления количества используемых в корутине регистров. Но это > > > только мои предположения. > > > > Может стоит все-таки разобраться, покрыть тестами там, или удалить мертвый код. > > > Удалил код. Coverall показывал, что код не используется, и тесты не попадали > после удаления. Воспросизвести тест, в котором бы этот код использовался не > смог. Однако, я все еще думаю, что это возможно. > > > > > > + */ > > > > > +static int > > > > > +vdbe_emit_create_autoindex_tuple(struct Parse *parse, > > > > > + const struct key_def *key_def, > > > > > + int cursor, int reg_out, int reg_eph) > > > > > +{ > > > > > + assert(reg_out != 0); > > > > > + struct Vdbe *v = parse->pVdbe; > > > > > + int col_cnt = key_def->part_count; > > > > > + int reg_base = sqlGetTempRange(parse, col_cnt + 1); > > > > > + for (int j = 0; j < col_cnt; j++) { > > > > > + uint32_t tabl_col = key_def->parts[j].fieldno; > > > > > + sqlVdbeAddOp3(v, OP_Column, cursor, tabl_col, reg_base + j); > > > > > + } > > > > > + sqlVdbeAddOp2(v, OP_NextIdEphemeral, reg_eph, reg_base + col_cnt); > > > > > + sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt + 1, reg_out); > > > > > + sqlReleaseTempRange(parse, reg_base, col_cnt + 1); > > > > > + return reg_base; > > > > > +} > > > > > + > > > > > /* > > > > > - * Generate code to construct the Index object for an automatic index > > > > > - * and to set up the WhereLevel object pLevel so that the code generator > > > > > - * makes use of the automatic index. > > > > > + * Generate code to construct the ephemeral space that contains all used in > > > > > + * query fields of one of the tables. This ephemeral space will be known as an > > > > > > > > Опять: какой таблицы..Очень запутывающий коммент имхо > > > > > > > Будет лучше если я напишу "selected by planner table"? > > > > Как минимум да > > > Добавил. > > > > > > + * "autoindex". PK of this ephemeral space is a covering index. Also, this > > > > > + * functions set up the WhereLevel object pLevel so that the code generator > > > > > + * makes use of the auto-index. > > > > > */ > > > > > static void > > > > > constructAutomaticIndex(Parse * pParse, /* The parsing context */ > > > > > } > > > > > * Estimate the location of a particular key among all keys in an > > > > > -#ifndef SQL_OMIT_AUTOMATIC_INDEX > > > > > /* Automatic indexes */ > > > > > - LogEst rSize = pTab->nRowLogEst; > > > > > + rSize = DEFAULT_TUPLE_LOG_COUNT; > > > > > + /* Increase cost of autoindex if number of tuples in space < ~10000. */ > > > > > > > > 10k как-то маловато. В скулайте, если я правильно помню, какие-то подвижки > > > > начинаются со 100к affected rows. > > > > > > > Это значение было подобрано эксперементально. На подбор более точных эвристик > > > есть отдельная задачу у @tsafin на следующий квартал. > > > > На каких "экспериментальных данных"? Сейчас это выглядит оочень агрессивно > > > В основном проверка проходила на запросых Q8, Q13, Q17 и Q20 TPC-H. При значении > этой величины в 1000000 происходило сильное замедление запросов Q17 и Q20 > (замедление в 200+ раз). При значении этой величины в 1000 происходило замедение > теста Q8 (в 1.5 - 2 раза). При значнии 10000 все 4 запроса работают досточно > быстро при значении SCALING FACTOR TPC-H до 3 (выше пока не тестировалось). > > > > > > + if (!space->def->opts.is_view && sql_space_tuple_log_count(space) < 133) > > > > > + rSize += DEFAULT_TUPLE_LOG_COUNT; > > > > > > > > Почему для представления это не работает? > > > > > > > Скорее наоборот - у представлений стоимость автоиндекса не увеличивается, так > > > что он используется чаще. Связано с тем, что посчитать количества таплов в > > > представлении не в ран-тайме довольно проблематично. > > > > Так там сейчас все равно константа добавляется. С аналайзом можно и для > > вью посчитать кол-во таплов. > > > У нас нет аналайза. > > > > > > LogEst rLogSize = estLog(rSize); > > > > > if (!pBuilder->pOrSet && /* Not pqart of an OR optimization */ > > > > > (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0 && > > > > > (pWInfo->pParse->sql_flags & SQL_AutoIndex) != 0 && > > > > > pSrc->pIBIndex == 0 /* Has no INDEXED BY clause */ > > > > > && !pSrc->fg.notIndexed /* Has no NOT INDEXED clause */ > > > > > &&!pSrc->fg.isCorrelated /* Not a correlated subquery */ > > > > > && !pSrc->fg.isRecursive /* Not a recursive common table expression. */ > > > > > ) { > > > > > @@ -2829,24 +2910,10 @@ tnt_error: > > > > > if (termCanDriveIndex(pTerm, pSrc, 0)) { > > > > > pNew->nEq = 1; > > > > > pNew->nSkip = 0; > > > > > - pNew->pIndex = 0; > > > > > + pNew->index_def = NULL; > > > > > pNew->nLTerm = 1; > > > > > pNew->aLTerm[0] = pTerm; > > > > > - /* TUNING: One-time cost for computing the automatic index is > > > > > - * estimated to be X*N*log2(N) where N is the number of rows in > > > > > - * the table being indexed and where X is 7 (LogEst=28) for normal > > > > > - * tables or 1.375 (LogEst=4) for views and subqueries. The value > > > > > - * of X is smaller for views and subqueries so that the query planner > > > > > - * will be more aggressive about generating automatic indexes for > > > > > - * those objects, since there is no opportunity to add schema > > > > > - * indexes on subqueries and views. > > > > > - */ > > > > > - pNew->rSetup = rLogSize + rSize + 4; > > > > > - if (!pTab->def->opts.is_view && > > > > > - pTab->def->id == 0) > > > > > - pNew->rSetup += 24; > > > > > - if (pNew->rSetup < 0) > > > > > - pNew->rSetup = 0; > > > > > + pNew->rSetup = rLogSize + rSize; > > > > > > > > Можешь объяснить, почему тут произошли изменения? То есть почему оптимизация > > > > стала более агрессивной? > > > > > > > Эти значение получены эксперементальным путем. Это так же будет уточняться в > > > ходе подбора эвристик. > > > > Ну не удаляй тогда код хотя бы, оставь там еще свой коммент какой-то. > > Тот же кто потом с этим будет возиться в этом болоте утонет.. > > > Написал комментарий TODO. > > Сомневаюсь, что это будет самая большая проблема человека, который будет > разбираться с планировщиком. > > > > > > diff --git a/test/sql-tap/autoindex1.test.lua b/test/sql-tap/autoindex1.test.lua > > > > > new file mode 100755 > > > > > index 000000000..42b995dd3 > > > > > --- /dev/null > > > > > +++ b/test/sql-tap/autoindex1.test.lua > > > > > +--------------------------------------------------------------------------- > > > > > +-- This file implements regression tests for sql library. The focus of this > > > > > +-- script is testing automatic index creation logic. > > > > > + > > > > > +test:execsql([[ > > > > > + CREATE TABLE t1(a INT, b INT PRIMARY KEY); > > > > > + INSERT INTO t1 VALUES(1, 11); > > > > > + INSERT INTO t1 VALUES(2, 22); > > > > > + INSERT INTO t1 SELECT a + 2, b + 22 FROM t1; > > > > > + INSERT INTO t1 SELECT a + 4, b + 44 FROM t1; > > > > > + CREATE TABLE t2(c INT, d INT PRIMARY KEY); > > > > > +]]) > > > > > + > > > > > +for i = 1, 11000 do test:execsql("INSERT INTO t2 VALUES ("..i..", "..i..");") end > > > > > > > > Это быстро работает? Может сделать тест long-run? > > > > Было бы еще неплохо к каждой группе тестов коммент, поясняющий почему > > > > с эфмереной табличкой будет быстрее. > > > > > > > На моем ноутбуке это работает быстро, быстрее чем некоторые другие стандартные > > > SQL тесты. До появления сообщения о '10 second...' ни разу не дошло. > > > Добавил коммент о том, что из-за автоиндекса тест работает быстрее, чем в случае > > > если автоиндекс не используется. > > > > > > > > > Diff: > > > > > > +-- Make sure that autoindex is used in all cases below. > > > > Я имел ввиду добавить объяснение, почему автоиндекс будет быстрее, > > чем тыркание туда-сюда по таблицам. Можешь посмотреть примеры в тестах > > на аналайз > > > Немного расширил комментарий. > > > > test:execsql([[ > > > CREATE TABLE t1(a INT, b INT PRIMARY KEY); > > > INSERT INTO t1 VALUES(1, 11); > > > Diff: > > > diff --git a/src/box/sql/where.c b/src/box/sql/where.c > index 6ab2be3ba..f7931d11d 100644 > --- a/src/box/sql/where.c > +++ b/src/box/sql/where.c > @@ -711,23 +711,19 @@ termCanDriveIndex(WhereTerm * pTerm, /* WHERE clause term to check */ > > /** > * Generate a code that will create a tuple, which will be inserted in the > - * autoindex. The created tuple consists of rowid and fields described in the > - * index key description. > + * ephemeral index. The created tuple consists of rowid and fields described in > + * the index key description. > * > * @param parse Parsing context. > * @param key_def The index key description. > * @param cursor Cursor of space where we will get values for tuple. > * @param reg_out Register to which the created tuple will be placed. > - * @param reg_eph Register holding pointer to autoindex. > - * > - * @return Return a register number which is the first in a block of registers > - * that holds the elements of the created tuple. The block of registers has > - * already been deallocated by the time this routine returns. > + * @param reg_eph Register holding pointer to ephemeral index. > */ > -static int > -vdbe_emit_create_autoindex_tuple(struct Parse *parse, > - const struct key_def *key_def, > - int cursor, int reg_out, int reg_eph) > +static void > +vdbe_emit_create_ephemeral_index_tuple(struct Parse *parse, > + const struct key_def *key_def, > + int cursor, int reg_out, int reg_eph) > { > assert(reg_out != 0); > struct Vdbe *v = parse->pVdbe; > @@ -740,15 +736,15 @@ vdbe_emit_create_autoindex_tuple(struct Parse *parse, > sqlVdbeAddOp2(v, OP_NextIdEphemeral, reg_eph, reg_base + col_cnt); > sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt + 1, reg_out); > sqlReleaseTempRange(parse, reg_base, col_cnt + 1); > - return reg_base; > } > > /* > * Generate code to construct the ephemeral space that contains all used in > - * query fields of one of the tables. This ephemeral space will be known as an > - * "autoindex". PK of this ephemeral space is a covering index. Also, this > - * functions set up the WhereLevel object pLevel so that the code generator > - * makes use of the auto-index. > + * query fields of one of the tables that participate in the query. The table is > + * determined by planner. This ephemeral space will be known as an "ephemeral > + * index". The PK definition of ephemeral index contains all of its fields. > + * Also, this functions set up the WhereLevel object pLevel so that the code > + * generator makes use of ephemeral index. > */ > static void > constructAutomaticIndex(Parse * pParse, /* The parsing context */ > @@ -771,8 +767,6 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */ > Bitmask idxCols; /* Bitmap of columns used for indexing */ > Bitmask extraCols; /* Bitmap of additional columns */ > struct SrcList_item *pTabItem; /* FROM clause term being indexed */ > - int addrCounter = 0; /* Address where integer counter is initialized */ > - int regBase; /* Array of registers where record is assembled */ > > /* Generate code to skip over the creation and initialization of the > * transient index on 2nd and subsequent iterations of the loop. > @@ -907,7 +901,7 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */ > /* Construct the index definition to describe this index. */ > struct index_opts opts; > index_opts_create(&opts); > - const char *idx_name = "autoindex"; > + const char *idx_name = "ephemeral index"; > struct index_def *idx_def = index_def_new(space->def->id, 0, idx_name, > strlen(idx_name), TREE, &opts, > key_def, NULL); > @@ -936,32 +930,16 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */ > /* Fill the automatic index with content */ > sqlExprCachePush(pParse); > pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom]; > + assert(pTabItem->fg.viaCoroutine == 0); > int cursor = pLevel->iTabCur; > - if (pTabItem->fg.viaCoroutine) { > - int regYield = pTabItem->regReturn; > - addrCounter = sqlVdbeAddOp2(v, OP_Integer, 0, 0); > - sqlVdbeAddOp3(v, OP_InitCoroutine, regYield, 0, > - pTabItem->addrFillSub); > - addrTop = sqlVdbeAddOp1(v, OP_Yield, regYield); > - VdbeCoverage(v); > - VdbeComment((v, "next row of \"%s\"", pTabItem->space->def->name)); > - } else { > - addrTop = sqlVdbeAddOp1(v, OP_Rewind, cursor); > - VdbeCoverage(v); > - } > + addrTop = sqlVdbeAddOp1(v, OP_Rewind, cursor); > + VdbeCoverage(v); > regRecord = sqlGetTempReg(pParse); > - regBase = vdbe_emit_create_autoindex_tuple(pParse, idx_def->key_def, > - cursor, regRecord, reg_eph); > + vdbe_emit_create_ephemeral_index_tuple(pParse, idx_def->key_def, cursor, > + regRecord, reg_eph); > sqlVdbeAddOp2(v, OP_IdxInsert, regRecord, reg_eph); > - if (pTabItem->fg.viaCoroutine) { > - sqlVdbeChangeP2(v, addrCounter, regBase + n + 1); > - translateColumnToCopy(v, addrTop, cursor, pTabItem->regResult); > - sqlVdbeGoto(v, addrTop); > - pTabItem->fg.viaCoroutine = 0; > - } else { > - sqlVdbeAddOp2(v, OP_Next, cursor, addrTop + 1); > - VdbeCoverage(v); > - } > + sqlVdbeAddOp2(v, OP_Next, cursor, addrTop + 1); > + VdbeCoverage(v); > sqlVdbeChangeP5(v, SQL_STMTSTATUS_AUTOINDEX); > sqlVdbeJumpHere(v, addrTop); > sqlReleaseTempReg(pParse, regRecord); > @@ -2889,7 +2867,10 @@ tnt_error: > > /* Automatic indexes */ > rSize = DEFAULT_TUPLE_LOG_COUNT; > - /* Increase cost of autoindex if number of tuples in space < ~10000. */ > + /* > + * Increase cost of ephemeral index if number of tuples in space is less > + * then 10240. > + */ > if (!space->def->opts.is_view && sql_space_tuple_log_count(space) < 133) > rSize += DEFAULT_TUPLE_LOG_COUNT; > LogEst rLogSize = estLog(rSize); > @@ -2913,6 +2894,11 @@ tnt_error: > pNew->index_def = NULL; > pNew->nLTerm = 1; > pNew->aLTerm[0] = pTerm; > + /* > + * TODO: At the moment we have decided to use > + * this formula, but it is quite aggressive and > + * needs tuning. > + */ > pNew->rSetup = rLogSize + rSize; > /* TUNING: Each index lookup yields 20 rows in the table. This > * is more than the usual guess of 10 rows, since we have no way > @@ -4870,15 +4856,16 @@ sqlWhereEnd(WhereInfo * pWInfo) > continue; > } > /* > - * In case we are using autoindex, the space > - * that will be used to get the values will be > - * autoindex. Since the opcode OP_Column uses > - * the position of the fields according to the > - * original space, and the fields may be in > - * other positions in the autoindex, we must > - * correct the P2 of OP_Column. To get the > - * positions of these fields in autoindex, we > - * use the index definition we created. > + * In case we are using ephemeral index, the > + * space that will be used to get the values > + * will be ephemeral index. Since the opcode > + * OP_Column uses the position of the fields > + * according to the original space, and the > + * fields may be in other positions in the > + * ephemeral index, we must correct the P2 of > + * OP_Column. To get the positions of these > + * fields in ephemeral index, we use the index > + * definition we created. > */ > struct key_def *key_def = > pLevel->pWLoop->index_def->key_def; > diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c > index 310b85fa0..96bcab110 100644 > --- a/src/box/sql/wherecode.c > +++ b/src/box/sql/wherecode.c > @@ -220,7 +220,7 @@ sqlWhereExplainOneScan(Parse * pParse, /* Parse context */ > assert(!(flags & WHERE_AUTO_INDEX) > || (flags & WHERE_IDX_ONLY)); > if ((flags & WHERE_AUTO_INDEX) != 0) { > - zFmt = "AUTOMATIC INDEX"; > + zFmt = "EPHEMERAL INDEX"; > } else if (idx_def->iid == 0) { > if (isSearch) { > zFmt = "PRIMARY KEY"; > diff --git a/test/sql-tap/autoindex1.test.lua b/test/sql-tap/autoindex1.test.lua > index cd423d9da..5520289b1 100755 > --- a/test/sql-tap/autoindex1.test.lua > +++ b/test/sql-tap/autoindex1.test.lua > @@ -4,10 +4,10 @@ test:plan(7) > > -- > -- This file implements regression tests for sql library. The focus of this > --- script is testing automatic index creation logic. > +-- script is testing ephemeral index creation logic. > -- > > --- Make sure that autoindex is used in all cases below. > +-- Make sure that ephemeral index is used in all cases below. > test:execsql([[ > CREATE TABLE t1(a INT, b INT PRIMARY KEY); > INSERT INTO t1 VALUES(1, 11); > @@ -17,7 +17,7 @@ test:execsql([[ > CREATE TABLE t2(c INT, d INT PRIMARY KEY); > ]]) > > -for i = 1, 11000 do test:execsql("INSERT INTO t2 VALUES ("..i..", "..i..");") end > +for i = 1, 10240 do test:execsql("INSERT INTO t2 VALUES ("..i..", "..i..");") end > > test:do_execsql_test( > "autoindex-1.1", [[ > @@ -25,7 +25,7 @@ test:do_execsql_test( > ]], { > 0,0,0,"SCAN TABLE T1 (~1048576 rows)", > 0,0,0,"EXECUTE CORRELATED SCALAR SUBQUERY 1", > - 1,0,0,"SEARCH TABLE T2 USING AUTOMATIC INDEX (C=?) (~20 rows)" > + 1,0,0,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)" > }) > > result = test:execsql([[SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;]]) > @@ -35,7 +35,7 @@ test:do_execsql_test( > EXPLAIN QUERY PLAN SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b; > ]], { > 0,0,0,"SCAN TABLE T1 (~1048576 rows)", > - 0,1,1,"SEARCH TABLE T2 USING AUTOMATIC INDEX (C=?) (~20 rows)" > + 0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)" > }) > > test:do_execsql_test( > @@ -48,7 +48,7 @@ test:do_execsql_test( > EXPLAIN QUERY PLAN SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a); > ]], { > 0,0,0,"SCAN TABLE T1 (~1048576 rows)", > - 0,1,1,"SEARCH TABLE T2 USING AUTOMATIC INDEX (C=?) (~20 rows)" > + 0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)" > }) > > test:do_execsql_test( > @@ -61,10 +61,12 @@ test:execsql([[ > ]]) > > -- > --- This query is quite slow in case autoindex is not used. This test shows > --- that autoindex allows to increase performance in cases like this. > +-- This query is quite slow in case ephemeral index is not used. The main idea > +-- behind this test is that ephemeral index allows to use index instead of > +-- fullscan in cases below. In this test construction of the index is faster > +-- that fullscan. > -- > -for i = 1, 11000 do test:execsql("INSERT INTO t3 VALUES ("..i..", "..i..", "..(i + 1)..");") end > +for i = 1, 10240 do test:execsql("INSERT INTO t3 VALUES ("..i..", "..i..", "..(i + 1)..");") end > > test:do_execsql_test( > "autoindex-1.6", [[ > @@ -80,7 +82,7 @@ test:do_execsql_test( > JOIN t3 AS x9 ON x9.a=x8.b > JOIN t3 AS x10 ON x10.a=x9.b; > ]], { > - 10991 > + 10231 > }) > > test:do_execsql_test( > @@ -98,15 +100,15 @@ test:do_execsql_test( > JOIN t3 AS x10 ON x10.a=x9.b; > ]], { > 0,0,0,"SCAN TABLE T3 AS X1 (~1048576 rows)", > - 0,1,1,"SEARCH TABLE T3 AS X2 USING AUTOMATIC INDEX (A=?) (~20 rows)", > - 0,2,2,"SEARCH TABLE T3 AS X3 USING AUTOMATIC INDEX (A=?) (~20 rows)", > - 0,3,3,"SEARCH TABLE T3 AS X4 USING AUTOMATIC INDEX (A=?) (~20 rows)", > - 0,4,4,"SEARCH TABLE T3 AS X5 USING AUTOMATIC INDEX (A=?) (~20 rows)", > - 0,5,5,"SEARCH TABLE T3 AS X6 USING AUTOMATIC INDEX (A=?) (~20 rows)", > - 0,6,6,"SEARCH TABLE T3 AS X7 USING AUTOMATIC INDEX (A=?) (~20 rows)", > - 0,7,7,"SEARCH TABLE T3 AS X8 USING AUTOMATIC INDEX (A=?) (~20 rows)", > - 0,8,8,"SEARCH TABLE T3 AS X9 USING AUTOMATIC INDEX (A=?) (~20 rows)", > - 0,9,9,"SEARCH TABLE T3 AS X10 USING AUTOMATIC INDEX (A=?) (~20 rows)" > + 0,1,1,"SEARCH TABLE T3 AS X2 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,2,2,"SEARCH TABLE T3 AS X3 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,3,3,"SEARCH TABLE T3 AS X4 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,4,4,"SEARCH TABLE T3 AS X5 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,5,5,"SEARCH TABLE T3 AS X6 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,6,6,"SEARCH TABLE T3 AS X7 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,7,7,"SEARCH TABLE T3 AS X8 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,8,8,"SEARCH TABLE T3 AS X9 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,9,9,"SEARCH TABLE T3 AS X10 USING EPHEMERAL INDEX (A=?) (~20 rows)" > }) > > test:finish_test() > diff --git a/test/sql/misc.result b/test/sql/misc.result > index 4d12ba4c9..bbf37e26f 100644 > --- a/test/sql/misc.result > +++ b/test/sql/misc.result > @@ -237,5 +237,5 @@ box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;') > type: text > rows: > - [0, 0, 0, 'SCAN TABLE T1 (~1048576 rows)'] > - - [0, 1, 1, 'SEARCH TABLE T2 USING AUTOMATIC INDEX (B=?) (~20 rows)'] > + - [0, 1, 1, 'SEARCH TABLE T2 USING EPHEMERAL INDEX (B=?) (~20 rows)'] > ... > diff --git a/test/sql/misc.result b/test/sql/misc.result > index bbf37e26f..df2fdde81 100644 > --- a/test/sql/misc.result > +++ b/test/sql/misc.result > @@ -213,17 +213,12 @@ box.execute('CREATE TABLE t2(i INT PRIMARY KEY, b INT);') > --- > - row_count: 1 > ... > -for i = 1, 11000 do\ > +for i = 1, 10240 do\ > box.execute('INSERT INTO t1 VALUES ($1, $1);', {i})\ > box.execute('INSERT INTO t2 VALUES ($1, $1);', {i})\ > end > --- > ... > --- > --- There is no need to insert values in the tables since planner assumes a > --- default number of tuples for each space, regardless of how many tuples there > --- actually are in those spaces. The default value is 1048576 (== 2^20). > --- > box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;') > --- > - metadata: > diff --git a/test/sql/misc.test.lua b/test/sql/misc.test.lua > index d7673bb49..44945eace 100644 > --- a/test/sql/misc.test.lua > +++ b/test/sql/misc.test.lua > @@ -68,13 +68,8 @@ box.space.TEST:drop() > -- gh-4933: Make sure that autoindex optimization is used. > box.execute('CREATE TABLE t1(i INT PRIMARY KEY, a INT);') > box.execute('CREATE TABLE t2(i INT PRIMARY KEY, b INT);') > -for i = 1, 11000 do\ > +for i = 1, 10240 do\ > box.execute('INSERT INTO t1 VALUES ($1, $1);', {i})\ > box.execute('INSERT INTO t2 VALUES ($1, $1);', {i})\ > end > --- > --- There is no need to insert values in the tables since planner assumes a > --- default number of tuples for each space, regardless of how many tuples there > --- actually are in those spaces. The default value is 1048576 (== 2^20). > --- > box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;') > > > > > New patch: > > > From 1c7820e9b90f34bca671956a51bd345b1d6ab748 Mon Sep 17 00:00:00 2001 > From: Kirill Yukhin > Date: Tue, 11 Aug 2020 13:45:40 +0300 > Subject: [PATCH] sql: enable ephemeral index optimization > > This patch enables "ephemeral index" optimization. This optimization > uses an ephemeral space that contains all the fields of one of spaces > used in the query. This original space is determined by planner. This > ephemeral space is called "ephemeral index". Ephemeral index may have > fewer fields than the original space, and the order of these fields in > ephemeral index may be different from their order in the original space. > In addition to the mentioned fields, ephemeral index contains a rowid. > Example: > > CREATE TABLE t1 (i INT PRIMARY KEY, a INT, b INT, c INT); > CREATE TABLE t2 (i INT PRIMARY KEY, a INT, b INT, c INT); > ... > SELECT t1.a, t2.b FROM t1, t2 WHERE t1.b = t2.c > > In case the planner decides to use an ephemeral index and chooses t1 as > the source space, an ephemeral index with three fields will be created > that will contain the values from t1.b, t1.a, and rowid in that order. > > In case the planner decides to use an ephemeral index and chooses t2 as > the source space, an ephemeral index with three fields will be created > that will contain the values from t2.c, t2.b, and rowid in that order. > > This optimization mainly used in cases when WHERE specified and either > values SELECTED from more than one space or subselect is used. > > Closes #4933 > > Co-authored-by: Mergen Imeev > Co-authored-by: Timur Safin > > diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt > index 8b2e704cf..3755754da 100644 > --- a/src/box/CMakeLists.txt > +++ b/src/box/CMakeLists.txt > @@ -219,7 +219,7 @@ add_library(box STATIC > if(CMAKE_BUILD_TYPE STREQUAL "Debug") > add_definitions(-DSQL_DEBUG=1) > endif() > -add_definitions(-DSQL_OMIT_AUTOMATIC_INDEX=1 -DSQL_TEST=1) > +add_definitions(-DSQL_TEST=1) > > set(EXT_SRC_DIR ${CMAKE_SOURCE_DIR}/extra) > set(EXT_BIN_DIR ${CMAKE_BINARY_DIR}/extra) > diff --git a/src/box/sql.c b/src/box/sql.c > index 000246ee9..a551bffc3 100644 > --- a/src/box/sql.c > +++ b/src/box/sql.c > @@ -719,7 +719,7 @@ int > tarantoolsqlIdxKeyCompare(struct BtCursor *cursor, > struct UnpackedRecord *unpacked) > { > - assert(cursor->curFlags & BTCF_TaCursor); > + assert(cursor->curFlags & (BTCF_TaCursor | BTCF_TEphemCursor)); > assert(cursor->iter != NULL); > assert(cursor->last_tuple != NULL); > > diff --git a/src/box/sql/delete.c b/src/box/sql/delete.c > index a78c68df6..62a726fdd 100644 > --- a/src/box/sql/delete.c > +++ b/src/box/sql/delete.c > @@ -544,31 +544,3 @@ sql_generate_row_delete(struct Parse *parse, struct space *space, > sqlVdbeResolveLabel(v, label); > VdbeModuleComment((v, "END: GenRowDel()")); > } > - > -int > -sql_generate_index_key(struct Parse *parse, struct index *index, int cursor, > - int reg_out, struct index *prev, int reg_prev) > -{ > - struct Vdbe *v = parse->pVdbe; > - int col_cnt = index->def->key_def->part_count; > - int reg_base = sqlGetTempRange(parse, col_cnt); > - if (prev != NULL && reg_base != reg_prev) > - prev = NULL; > - for (int j = 0; j < col_cnt; j++) { > - if (prev != NULL && prev->def->key_def->parts[j].fieldno == > - index->def->key_def->parts[j].fieldno) { > - /* > - * This column was already computed by the > - * previous index. > - */ > - continue; > - } > - uint32_t tabl_col = index->def->key_def->parts[j].fieldno; > - sqlVdbeAddOp3(v, OP_Column, cursor, tabl_col, reg_base + j); > - } > - if (reg_out != 0) > - sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt, reg_out); > - > - sqlReleaseTempRange(parse, reg_base, col_cnt); > - return reg_base; > -} > diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h > index adf90d824..56c5ee911 100644 > --- a/src/box/sql/sqlInt.h > +++ b/src/box/sql/sqlInt.h > @@ -3296,41 +3296,6 @@ sql_generate_row_delete(struct Parse *parse, struct space *space, > enum on_conflict_action onconf, u8 mode, > int idx_noseek); > > -/** > - * Generate code that will assemble an index key and stores it in > - * register reg_out. The key will be for index which is an > - * index on table. cursor is the index of a cursor open on the > - * table table and pointing to the entry that needs indexing. > - * cursor must be the cursor of the PRIMARY KEY index. > - * > - * The prev and reg_prev parameters are used to implement a > - * cache to avoid unnecessary register loads. If prev is not > - * NULL, then it is a pointer to a different index for which an > - * index key has just been computed into register reg_prev. If the > - * current index is generating its key into the same > - * sequence of registers and if prev and index share a column in > - * common, then the register corresponding to that column already > - * holds the correct value and the loading of that register is > - * skipped. This optimization is helpful when doing a DELETE or > - * an INTEGRITY_CHECK on a table with multiple indices, and > - * especially with the PRIMARY KEY columns of the index. > - * > - * @param parse Parsing context. > - * @param index The index for which to generate a key. > - * @param cursor Cursor number from which to take column data. > - * @param reg_out Put the new key into this register if not NULL. > - * @param prev Previously generated index key > - * @param reg_prev Register holding previous generated key. > - * > - * @retval Return a register number which is the first in a block > - * of registers that holds the elements of the index key. The > - * block of registers has already been deallocated by the time > - * this routine returns. > - */ > -int > -sql_generate_index_key(struct Parse *parse, struct index *index, int cursor, > - int reg_out, struct index *prev, int reg_prev); > - > /** > * Generate code to do constraint checks prior to an INSERT or > * an UPDATE on the given table. > diff --git a/src/box/sql/where.c b/src/box/sql/where.c > index e9e936856..f7931d11d 100644 > --- a/src/box/sql/where.c > +++ b/src/box/sql/where.c > @@ -683,7 +683,6 @@ translateColumnToCopy(Vdbe * v, /* The VDBE containing code to translate */ > } > } > > -#ifndef SQL_OMIT_AUTOMATIC_INDEX > /* > * Return TRUE if the WHERE clause term pTerm is of a form where it > * could be used with an index to access pSrc, assuming an appropriate > @@ -703,19 +702,49 @@ termCanDriveIndex(WhereTerm * pTerm, /* WHERE clause term to check */ > return 0; > if (pTerm->u.leftColumn < 0) > return 0; > - enum field_type type = pSrc->pTab->def->fields[pTerm->u.leftColumn].type; > + enum field_type type = pSrc->space->def->fields[pTerm->u.leftColumn].type; > enum field_type expr_type = expr_cmp_mutual_type(pTerm->pExpr); > if (!field_type1_contains_type2(expr_type, type)) > return 0; > return 1; > } > -#endif > > -#ifndef SQL_OMIT_AUTOMATIC_INDEX > +/** > + * Generate a code that will create a tuple, which will be inserted in the > + * ephemeral index. The created tuple consists of rowid and fields described in > + * the index key description. > + * > + * @param parse Parsing context. > + * @param key_def The index key description. > + * @param cursor Cursor of space where we will get values for tuple. > + * @param reg_out Register to which the created tuple will be placed. > + * @param reg_eph Register holding pointer to ephemeral index. > + */ > +static void > +vdbe_emit_create_ephemeral_index_tuple(struct Parse *parse, > + const struct key_def *key_def, > + int cursor, int reg_out, int reg_eph) > +{ > + assert(reg_out != 0); > + struct Vdbe *v = parse->pVdbe; > + int col_cnt = key_def->part_count; > + int reg_base = sqlGetTempRange(parse, col_cnt + 1); > + for (int j = 0; j < col_cnt; j++) { > + uint32_t tabl_col = key_def->parts[j].fieldno; > + sqlVdbeAddOp3(v, OP_Column, cursor, tabl_col, reg_base + j); > + } > + sqlVdbeAddOp2(v, OP_NextIdEphemeral, reg_eph, reg_base + col_cnt); > + sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt + 1, reg_out); > + sqlReleaseTempRange(parse, reg_base, col_cnt + 1); > +} > + > /* > - * Generate code to construct the Index object for an automatic index > - * and to set up the WhereLevel object pLevel so that the code generator > - * makes use of the automatic index. > + * Generate code to construct the ephemeral space that contains all used in > + * query fields of one of the tables that participate in the query. The table is > + * determined by planner. This ephemeral space will be known as an "ephemeral > + * index". The PK definition of ephemeral index contains all of its fields. > + * Also, this functions set up the WhereLevel object pLevel so that the code > + * generator makes use of ephemeral index. > */ > static void > constructAutomaticIndex(Parse * pParse, /* The parsing context */ > @@ -727,24 +756,17 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */ > int nKeyCol; /* Number of columns in the constructed index */ > WhereTerm *pTerm; /* A single term of the WHERE clause */ > WhereTerm *pWCEnd; /* End of pWC->a[] */ > - Index *pIdx; /* Object describing the transient index */ > Vdbe *v; /* Prepared statement under construction */ > int addrInit; /* Address of the initialization bypass jump */ > - Table *pTable; /* The table being indexed */ > int addrTop; /* Top of the index fill loop */ > int regRecord; /* Register holding an index record */ > int n; /* Column counter */ > int i; /* Loop counter */ > int mxBitCol; /* Maximum column in pSrc->colUsed */ > - struct coll *pColl; /* Collating sequence to on a column */ > WhereLoop *pLoop; /* The Loop object */ > - char *zNotUsed; /* Extra space on the end of pIdx */ > Bitmask idxCols; /* Bitmap of columns used for indexing */ > Bitmask extraCols; /* Bitmap of additional columns */ > - int iContinue = 0; /* Jump here to skip excluded rows */ > struct SrcList_item *pTabItem; /* FROM clause term being indexed */ > - int addrCounter = 0; /* Address where integer counter is initialized */ > - int regBase; /* Array of registers where record is assembled */ > > /* Generate code to skip over the creation and initialization of the > * transient index on 2nd and subsequent iterations of the loop. > @@ -758,7 +780,6 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */ > * and used to match WHERE clause constraints > */ > nKeyCol = 0; > - pTable = pSrc->pTab; > pWCEnd = &pWC->a[pWC->nTerm]; > pLoop = pLevel->pWLoop; > idxCols = 0; > @@ -770,7 +791,8 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */ > if ((idxCols & cMask) == 0) { > if (whereLoopResize > (pParse->db, pLoop, nKeyCol + 1)) { > - goto end_auto_index_create; > + pParse->is_aborted = true; > + return; > } > pLoop->aLTerm[nKeyCol++] = pTerm; > idxCols |= cMask; > @@ -791,26 +813,27 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */ > * if they go out of sync. > */ > extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS - 1)); > - mxBitCol = MIN(BMS - 1, pTable->def->field_count); > - testcase(pTable->def->field_count == BMS - 1); > - testcase(pTable->def->field_count == BMS - 2); > + struct space *space = pSrc->space; > + mxBitCol = MIN(BMS - 1, space->def->field_count); > for (i = 0; i < mxBitCol; i++) { > if (extraCols & MASKBIT(i)) > nKeyCol++; > } > if (pSrc->colUsed & MASKBIT(BMS - 1)) { > - nKeyCol += pTable->def->field_count - BMS + 1; > + nKeyCol += space->def->field_count - BMS + 1; > } > > - /* Construct the Index object to describe this index */ > - pIdx = sqlDbMallocZero(pParse->db, sizeof(*pIdx)); > - if (pIdx == 0) > - goto end_auto_index_create; > - pLoop->pIndex = pIdx; > - pIdx->zName = "auto-index"; > - pIdx->pTable = pTable; > n = 0; > idxCols = 0; > + size_t size; > + struct key_part_def *parts = region_alloc_array(&pParse->region, > + typeof(parts[0]), > + nKeyCol, &size); > + if (parts == NULL) { > + diag_set(OutOfMemory, size, "region_alloc_array", "parts"); > + pParse->is_aborted = true; > + return; > + } > for (pTerm = pWC->a; pTerm < pWCEnd; pTerm++) { > if (termCanDriveIndex(pTerm, pSrc, notReady)) { > int iCol = pTerm->u.leftColumn; > @@ -819,9 +842,17 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */ > testcase(iCol == BMS - 1); > testcase(iCol == BMS); > if ((idxCols & cMask) == 0) { > - Expr *pX = pTerm->pExpr; > idxCols |= cMask; > - pIdx->aiColumn[n] = pTerm->u.leftColumn; > + struct field_def *field = > + &space->def->fields[iCol]; > + struct key_part_def *part = &parts[n]; > + part->fieldno = iCol; > + part->type = field->type; > + part->nullable_action = field->nullable_action; > + part->is_nullable = field->is_nullable; > + part->sort_order = SORT_ORDER_ASC; > + part->coll_id = field->coll_id; > + part->path = NULL; > n++; > } > } > @@ -833,64 +864,91 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */ > */ > for (i = 0; i < mxBitCol; i++) { > if (extraCols & MASKBIT(i)) { > - pIdx->aiColumn[n] = i; > + struct field_def *field = &space->def->fields[i]; > + struct key_part_def *part = &parts[n]; > + part->fieldno = i; > + part->type = field->type; > + part->nullable_action = field->nullable_action; > + part->is_nullable = field->is_nullable; > + part->sort_order = SORT_ORDER_ASC; > + part->coll_id = field->coll_id; > + part->path = NULL; > n++; > } > } > if (pSrc->colUsed & MASKBIT(BMS - 1)) { > - for (i = BMS - 1; i < (int)pTable->def->field_count; i++) { > - pIdx->aiColumn[n] = i; > + for (i = BMS - 1; i < (int)space->def->field_count; i++) { > + struct field_def *field = &space->def->fields[i]; > + struct key_part_def *part = &parts[n]; > + part->fieldno = i; > + part->type = field->type; > + part->nullable_action = field->nullable_action; > + part->is_nullable = field->is_nullable; > + part->sort_order = SORT_ORDER_ASC; > + part->coll_id = field->coll_id; > + part->path = NULL; > n++; > } > } > assert(n == nKeyCol); > - pIdx->aiColumn[n] = XN_ROWID; > + > + struct key_def *key_def = key_def_new(parts, nKeyCol, false); > + if (key_def == NULL) { > + pParse->is_aborted = true; > + return; > + } > + > + /* Construct the index definition to describe this index. */ > + struct index_opts opts; > + index_opts_create(&opts); > + const char *idx_name = "ephemeral index"; > + struct index_def *idx_def = index_def_new(space->def->id, 0, idx_name, > + strlen(idx_name), TREE, &opts, > + key_def, NULL); > + key_def_delete(key_def); > + if (idx_def == NULL) { > + pParse->is_aborted = true; > + return; > + } > + pLoop->index_def = idx_def; > > /* Create the automatic index */ > assert(pLevel->iIdxCur >= 0); > pLevel->iIdxCur = pParse->nTab++; > - sqlVdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol + 1); > - sql_vdbe_set_p4_key_def(pParse, pIdx->key_def); > - VdbeComment((v, "for %s", pTable->def->name)); > + struct sql_key_info *pk_info = > + sql_key_info_new_from_key_def(pParse->db, idx_def->key_def); > + if (pk_info == NULL) { > + pParse->is_aborted = true; > + return; > + } > + int reg_eph = sqlGetTempReg(pParse); > + sqlVdbeAddOp4(v, OP_OpenTEphemeral, reg_eph, nKeyCol + 1, 0, > + (char *)pk_info, P4_KEYINFO); > + sqlVdbeAddOp3(v, OP_IteratorOpen, pLevel->iIdxCur, 0, reg_eph); > + VdbeComment((v, "for %s", space->def->name)); > > /* Fill the automatic index with content */ > sqlExprCachePush(pParse); > pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom]; > - if (pTabItem->fg.viaCoroutine) { > - int regYield = pTabItem->regReturn; > - addrCounter = sqlVdbeAddOp2(v, OP_Integer, 0, 0); > - sqlVdbeAddOp3(v, OP_InitCoroutine, regYield, 0, > - pTabItem->addrFillSub); > - addrTop = sqlVdbeAddOp1(v, OP_Yield, regYield); > - VdbeCoverage(v); > - VdbeComment((v, "next row of \"%s\"", pTabItem->pTab->zName)); > - } else { > - addrTop = sqlVdbeAddOp1(v, OP_Rewind, pLevel->iTabCur); > - VdbeCoverage(v); > - } > + assert(pTabItem->fg.viaCoroutine == 0); > + int cursor = pLevel->iTabCur; > + addrTop = sqlVdbeAddOp1(v, OP_Rewind, cursor); > + VdbeCoverage(v); > regRecord = sqlGetTempReg(pParse); > - regBase = sql_generate_index_key(pParse, pIdx, pLevel->iTabCur, > - regRecord, NULL, 0); > - sqlVdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord); > - if (pTabItem->fg.viaCoroutine) { > - sqlVdbeChangeP2(v, addrCounter, regBase + n); > - translateColumnToCopy(v, addrTop, pLevel->iTabCur, > - pTabItem->regResult, 1); > - sqlVdbeGoto(v, addrTop); > - pTabItem->fg.viaCoroutine = 0; > - } else { > - sqlVdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop + 1); > - VdbeCoverage(v); > - } > + vdbe_emit_create_ephemeral_index_tuple(pParse, idx_def->key_def, cursor, > + regRecord, reg_eph); > + sqlVdbeAddOp2(v, OP_IdxInsert, regRecord, reg_eph); > + sqlVdbeAddOp2(v, OP_Next, cursor, addrTop + 1); > + VdbeCoverage(v); > sqlVdbeChangeP5(v, SQL_STMTSTATUS_AUTOINDEX); > sqlVdbeJumpHere(v, addrTop); > sqlReleaseTempReg(pParse, regRecord); > + sqlReleaseTempReg(pParse, reg_eph); > sqlExprCachePop(pParse); > > /* Jump here when skipping the initialization */ > sqlVdbeJumpHere(v, addrInit); > } > -#endif /* SQL_OMIT_AUTOMATIC_INDEX */ > > /* > * Estimate the location of a particular key among all keys in an > @@ -1711,7 +1769,7 @@ whereLoopInit(WhereLoop * p) > static void > whereLoopClearUnion(WhereLoop * p) > { > - if ((p->wsFlags & WHERE_AUTO_INDEX) != 0) { > + if ((p->wsFlags & WHERE_AUTO_INDEX) != 0 && p->index_def != NULL) { > index_def_delete(p->index_def); > p->index_def = NULL; > } > @@ -2807,16 +2865,20 @@ tnt_error: > probe = fake_index; > } > > -#ifndef SQL_OMIT_AUTOMATIC_INDEX > /* Automatic indexes */ > - LogEst rSize = pTab->nRowLogEst; > + rSize = DEFAULT_TUPLE_LOG_COUNT; > + /* > + * Increase cost of ephemeral index if number of tuples in space is less > + * then 10240. > + */ > + if (!space->def->opts.is_view && sql_space_tuple_log_count(space) < 133) > + rSize += DEFAULT_TUPLE_LOG_COUNT; > LogEst rLogSize = estLog(rSize); > if (!pBuilder->pOrSet && /* Not pqart of an OR optimization */ > (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0 && > (pWInfo->pParse->sql_flags & SQL_AutoIndex) != 0 && > pSrc->pIBIndex == 0 /* Has no INDEXED BY clause */ > && !pSrc->fg.notIndexed /* Has no NOT INDEXED clause */ > - && HasRowid(pTab) /* Not WITHOUT ROWID table. (FIXME: Why not?) */ > &&!pSrc->fg.isCorrelated /* Not a correlated subquery */ > && !pSrc->fg.isRecursive /* Not a recursive common table expression. */ > ) { > @@ -2829,24 +2891,15 @@ tnt_error: > if (termCanDriveIndex(pTerm, pSrc, 0)) { > pNew->nEq = 1; > pNew->nSkip = 0; > - pNew->pIndex = 0; > + pNew->index_def = NULL; > pNew->nLTerm = 1; > pNew->aLTerm[0] = pTerm; > - /* TUNING: One-time cost for computing the automatic index is > - * estimated to be X*N*log2(N) where N is the number of rows in > - * the table being indexed and where X is 7 (LogEst=28) for normal > - * tables or 1.375 (LogEst=4) for views and subqueries. The value > - * of X is smaller for views and subqueries so that the query planner > - * will be more aggressive about generating automatic indexes for > - * those objects, since there is no opportunity to add schema > - * indexes on subqueries and views. > + /* > + * TODO: At the moment we have decided to use > + * this formula, but it is quite aggressive and > + * needs tuning. > */ > - pNew->rSetup = rLogSize + rSize + 4; > - if (!pTab->def->opts.is_view && > - pTab->def->id == 0) > - pNew->rSetup += 24; > - if (pNew->rSetup < 0) > - pNew->rSetup = 0; > + pNew->rSetup = rLogSize + rSize; > /* TUNING: Each index lookup yields 20 rows in the table. This > * is more than the usual guess of 10 rows, since we have no way > * of knowing how selective the index will ultimately be. It would > @@ -2862,7 +2915,6 @@ tnt_error: > } > } > } > -#endif /* SQL_OMIT_AUTOMATIC_INDEX */ > /* > * If there was an INDEXED BY clause, then only that one > * index is considered. > @@ -4630,7 +4682,6 @@ sqlWhereBegin(Parse * pParse, /* The parser context */ > notReady = ~(Bitmask) 0; > for (ii = 0; ii < nTabList; ii++) { > pLevel = &pWInfo->a[ii]; > -#ifndef SQL_OMIT_AUTOMATIC_INDEX > if ((pLevel->pWLoop->wsFlags & WHERE_AUTO_INDEX) != 0) { > constructAutomaticIndex(pParse, &pWInfo->sWC, > &pTabList->a[pLevel->iFrom], > @@ -4638,7 +4689,6 @@ sqlWhereBegin(Parse * pParse, /* The parser context */ > if (db->mallocFailed) > goto whereBeginError; > } > -#endif > sqlWhereExplainOneScan(pParse, pTabList, pLevel, ii, > pLevel->iFrom, wctrlFlags); > pLevel->addrBody = sqlVdbeCurrentAddr(v); > @@ -4794,18 +4844,35 @@ sqlWhereEnd(WhereInfo * pWInfo) > for (; k < last; k++, pOp++) { > if (pOp->p1 != pLevel->iTabCur) > continue; > - if (pOp->opcode == OP_Column) { > - int x = pOp->p2; > - assert(def == NULL || > - def->space_id == > - pTabItem->space->def->id); > - if (x >= 0) { > - pOp->p2 = x; > - pOp->p1 = pLevel->iIdxCur; > - } > - assert((pLoop-> > - wsFlags & WHERE_IDX_ONLY) == 0 > - || x >= 0); > + if (pOp->opcode != OP_Column) > + continue; > + assert(def == NULL || def->space_id == > + pTabItem->space->def->id); > + int x = pOp->p2; > + assert(x >= 0); > + pOp->p1 = pLevel->iIdxCur; > + if ((pLoop->wsFlags & WHERE_AUTO_INDEX) == 0) { > + pOp->p2 = x; > + continue; > + } > + /* > + * In case we are using ephemeral index, the > + * space that will be used to get the values > + * will be ephemeral index. Since the opcode > + * OP_Column uses the position of the fields > + * according to the original space, and the > + * fields may be in other positions in the > + * ephemeral index, we must correct the P2 of > + * OP_Column. To get the positions of these > + * fields in ephemeral index, we use the index > + * definition we created. > + */ > + struct key_def *key_def = > + pLevel->pWLoop->index_def->key_def; > + uint32_t part_count = key_def->part_count; > + for (uint32_t i = 0; i < part_count; ++i) { > + if ((int)key_def->parts[i].fieldno == x) > + pOp->p2 = i; > } > } > } > diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c > index 6d8768865..96bcab110 100644 > --- a/src/box/sql/wherecode.c > +++ b/src/box/sql/wherecode.c > @@ -219,12 +219,12 @@ sqlWhereExplainOneScan(Parse * pParse, /* Parse context */ > > assert(!(flags & WHERE_AUTO_INDEX) > || (flags & WHERE_IDX_ONLY)); > - if (idx_def->iid == 0) { > + if ((flags & WHERE_AUTO_INDEX) != 0) { > + zFmt = "EPHEMERAL INDEX"; > + } else if (idx_def->iid == 0) { > if (isSearch) { > zFmt = "PRIMARY KEY"; > } > - } else if (flags & WHERE_AUTO_INDEX) { > - zFmt = "AUTOMATIC COVERING INDEX"; > } else if (flags & WHERE_IDX_ONLY) { > zFmt = "COVERING INDEX %s"; > } else { > @@ -807,7 +807,8 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W > notReady & ~sqlWhereGetMask(&pWInfo->sMaskSet, iCur); > bRev = (pWInfo->revMask >> iLevel) & 1; > omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY) != 0 > - && (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0; > + && ((pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0 || > + (pLoop->wsFlags & WHERE_AUTO_INDEX) != 0); > VdbeModuleComment((v, "Begin WHERE-loop%d: %s", iLevel, > pTabItem->pTab->zName)); > > @@ -1047,8 +1048,6 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W > startEq = 0; > start_constraints = 1; > } > - struct index_def *idx_pk = space->index[0]->def; > - uint32_t pk_part_count = idx_pk->key_def->part_count; > /* > * Tarantool's iterator over integer fields doesn't > * tolerate floating point values. Hence, if term > @@ -1234,6 +1233,8 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W > if (omitTable) { > /* pIdx is a covering index. No need to access the main table. */ > } else if (iCur != iIdxCur) { > + struct index_def *idx_pk = space->index[0]->def; > + uint32_t pk_part_count = idx_pk->key_def->part_count; > int iKeyReg = sqlGetTempRange(pParse, pk_part_count); > for (j = 0; j < (int) pk_part_count; j++) { > k = idx_pk->key_def->parts[j].fieldno; > diff --git a/test/sql-tap/autoindex1.test.lua b/test/sql-tap/autoindex1.test.lua > new file mode 100755 > index 000000000..5520289b1 > --- /dev/null > +++ b/test/sql-tap/autoindex1.test.lua > @@ -0,0 +1,114 @@ > +#!/usr/bin/env tarantool > +test = require("sqltester") > +test:plan(7) > + > +-- > +-- This file implements regression tests for sql library. The focus of this > +-- script is testing ephemeral index creation logic. > +-- > + > +-- Make sure that ephemeral index is used in all cases below. > +test:execsql([[ > + CREATE TABLE t1(a INT, b INT PRIMARY KEY); > + INSERT INTO t1 VALUES(1, 11); > + INSERT INTO t1 VALUES(2, 22); > + INSERT INTO t1 SELECT a + 2, b + 22 FROM t1; > + INSERT INTO t1 SELECT a + 4, b + 44 FROM t1; > + CREATE TABLE t2(c INT, d INT PRIMARY KEY); > +]]) > + > +for i = 1, 10240 do test:execsql("INSERT INTO t2 VALUES ("..i..", "..i..");") end > + > +test:do_execsql_test( > + "autoindex-1.1", [[ > + EXPLAIN QUERY PLAN SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1; > + ]], { > + 0,0,0,"SCAN TABLE T1 (~1048576 rows)", > + 0,0,0,"EXECUTE CORRELATED SCALAR SUBQUERY 1", > + 1,0,0,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)" > + }) > + > +result = test:execsql([[SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;]]) > + > +test:do_execsql_test( > + "autoindex-1.2", [[ > + EXPLAIN QUERY PLAN SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b; > + ]], { > + 0,0,0,"SCAN TABLE T1 (~1048576 rows)", > + 0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)" > + }) > + > +test:do_execsql_test( > + "autoindex-1.3", [[ > + SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b; > + ]], result) > + > +test:do_execsql_test( > + "autoindex-1.4", [[ > + EXPLAIN QUERY PLAN SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a); > + ]], { > + 0,0,0,"SCAN TABLE T1 (~1048576 rows)", > + 0,1,1,"SEARCH TABLE T2 USING EPHEMERAL INDEX (C=?) (~20 rows)" > + }) > + > +test:do_execsql_test( > + "autoindex-1.5", [[ > + SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a); > + ]], result) > + > +test:execsql([[ > + CREATE TABLE t3(i INT PRIMARY KEY, a INT, b INT); > +]]) > + > +-- > +-- This query is quite slow in case ephemeral index is not used. The main idea > +-- behind this test is that ephemeral index allows to use index instead of > +-- fullscan in cases below. In this test construction of the index is faster > +-- that fullscan. > +-- > +for i = 1, 10240 do test:execsql("INSERT INTO t3 VALUES ("..i..", "..i..", "..(i + 1)..");") end > + > +test:do_execsql_test( > + "autoindex-1.6", [[ > + SELECT count(*) > + FROM t3 AS x1 > + JOIN t3 AS x2 ON x2.a=x1.b > + JOIN t3 AS x3 ON x3.a=x2.b > + JOIN t3 AS x4 ON x4.a=x3.b > + JOIN t3 AS x5 ON x5.a=x4.b > + JOIN t3 AS x6 ON x6.a=x5.b > + JOIN t3 AS x7 ON x7.a=x6.b > + JOIN t3 AS x8 ON x8.a=x7.b > + JOIN t3 AS x9 ON x9.a=x8.b > + JOIN t3 AS x10 ON x10.a=x9.b; > + ]], { > + 10231 > + }) > + > +test:do_execsql_test( > + "autoindex-1.7", [[ > + EXPLAIN QUERY PLAN SELECT count(*) > + FROM t3 AS x1 > + JOIN t3 AS x2 ON x2.a=x1.b > + JOIN t3 AS x3 ON x3.a=x2.b > + JOIN t3 AS x4 ON x4.a=x3.b > + JOIN t3 AS x5 ON x5.a=x4.b > + JOIN t3 AS x6 ON x6.a=x5.b > + JOIN t3 AS x7 ON x7.a=x6.b > + JOIN t3 AS x8 ON x8.a=x7.b > + JOIN t3 AS x9 ON x9.a=x8.b > + JOIN t3 AS x10 ON x10.a=x9.b; > + ]], { > + 0,0,0,"SCAN TABLE T3 AS X1 (~1048576 rows)", > + 0,1,1,"SEARCH TABLE T3 AS X2 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,2,2,"SEARCH TABLE T3 AS X3 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,3,3,"SEARCH TABLE T3 AS X4 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,4,4,"SEARCH TABLE T3 AS X5 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,5,5,"SEARCH TABLE T3 AS X6 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,6,6,"SEARCH TABLE T3 AS X7 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,7,7,"SEARCH TABLE T3 AS X8 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,8,8,"SEARCH TABLE T3 AS X9 USING EPHEMERAL INDEX (A=?) (~20 rows)", > + 0,9,9,"SEARCH TABLE T3 AS X10 USING EPHEMERAL INDEX (A=?) (~20 rows)" > + }) > + > +test:finish_test() > diff --git a/test/sql-tap/autoindex4.test.lua b/test/sql-tap/autoindex4.test.lua > index 46488f13a..e22f49c67 100755 > --- a/test/sql-tap/autoindex4.test.lua > +++ b/test/sql-tap/autoindex4.test.lua > @@ -1,6 +1,6 @@ > #!/usr/bin/env tarantool > test = require("sqltester") > -test:plan(7) > +test:plan(8) > > --!./tcltestrunner.lua > -- 2014-10-24 > @@ -75,6 +75,18 @@ test:do_execsql_test( > -- > }) > > +test:do_execsql_test( > + "autoindex4-2.0", > + [[ > + CREATE TABLE t3(i INT PRIMARY KEY, e INT, f INT); > + INSERT INTO t3 VALUES(1, 123,654), (2, 555,444), (3, 234,987); > + SELECT (SELECT count(*) FROM t1, t2 WHERE a=e AND x=f), e, f, '|' FROM t3 ORDER BY i; > + ]], { > + -- > + 1, 123, 654, "|", 0, 555, 444, "|", 4, 234, 987, "|" > + -- > + }) > + > -- do_execsql_test autoindex4-2.0 { > -- CREATE TABLE t3(e INT,f INT); > -- INSERT INTO t3 VALUES(123,654),(555,444),(234,987); > diff --git a/test/sql-tap/whereF.test.lua b/test/sql-tap/whereF.test.lua > index 5a894b748..3235df437 100755 > --- a/test/sql-tap/whereF.test.lua > +++ b/test/sql-tap/whereF.test.lua > @@ -90,10 +90,20 @@ test:do_execsql_test( > > -- for _ in X(0, "X!foreach", [=[["tn sql","\n 1 \"SELECT * FROM t1, t2 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n 2 \"SELECT * FROM t2, t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n 3 \"SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n"]]=]) do > for tn, sql in ipairs({"SELECT * FROM t1, t2 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e", > - "SELECT * FROM t2, t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e", > - "SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do > + "SELECT * FROM t2, t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do > test:do_test( > - "2."..tn, > + "2.1."..tn, > + function() > + return test:execsql("EXPLAIN QUERY PLAN "..sql) > + end, { > + '/SEARCH TABLE T1/', > + '/SEARCH TABLE T2/' > + }) > +end > + > +for tn, sql in ipairs({"SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do > + test:do_test( > + "2.2."..tn, > function() > return test:execsql("EXPLAIN QUERY PLAN "..sql) > end, { > diff --git a/test/sql/misc.result b/test/sql/misc.result > index 6af11bfba..df2fdde81 100644 > --- a/test/sql/misc.result > +++ b/test/sql/misc.result > @@ -204,3 +204,33 @@ box.execute('SELECT field66, field68, field70 FROM test') > box.space.TEST:drop() > --- > ... > +-- gh-4933: Make sure that autoindex optimization is used. > +box.execute('CREATE TABLE t1(i INT PRIMARY KEY, a INT);') > +--- > +- row_count: 1 > +... > +box.execute('CREATE TABLE t2(i INT PRIMARY KEY, b INT);') > +--- > +- row_count: 1 > +... > +for i = 1, 10240 do\ > + box.execute('INSERT INTO t1 VALUES ($1, $1);', {i})\ > + box.execute('INSERT INTO t2 VALUES ($1, $1);', {i})\ > +end > +--- > +... > +box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;') > +--- > +- metadata: > + - name: selectid > + type: integer > + - name: order > + type: integer > + - name: from > + type: integer > + - name: detail > + type: text > + rows: > + - [0, 0, 0, 'SCAN TABLE T1 (~1048576 rows)'] > + - [0, 1, 1, 'SEARCH TABLE T2 USING EPHEMERAL INDEX (B=?) (~20 rows)'] > +... > diff --git a/test/sql/misc.test.lua b/test/sql/misc.test.lua > index 63244228c..44945eace 100644 > --- a/test/sql/misc.test.lua > +++ b/test/sql/misc.test.lua > @@ -64,3 +64,12 @@ box.execute('SELECT field70, field64 FROM test') > pk:alter({parts = {66}}) > box.execute('SELECT field66, field68, field70 FROM test') > box.space.TEST:drop() > + > +-- gh-4933: Make sure that autoindex optimization is used. > +box.execute('CREATE TABLE t1(i INT PRIMARY KEY, a INT);') > +box.execute('CREATE TABLE t2(i INT PRIMARY KEY, b INT);') > +for i = 1, 10240 do\ > + box.execute('INSERT INTO t1 VALUES ($1, $1);', {i})\ > + box.execute('INSERT INTO t2 VALUES ($1, $1);', {i})\ > +end > +box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;')