[Tarantool-patches] [PATCH v1 1/1] sql: enable autoindex optimization
imeevma at tarantool.org
imeevma at tarantool.org
Fri Sep 4 14:53:44 MSK 2020
This patch enables the "auto-index" optimization in SQL. The auto-index
is actually an ephemeral space that contain some columns of the original
space. In some cases, this can speed up execution, since creating a new
index and searching using it can be more cost efficient than scanning.
Co-authored-by: Mergen Imeev <imeevma at gmail.com>
Co-authored-by: Timur Safin <tsafin at tarantool.org>
---
https://github.com/tarantool/tarantool/issues/4933
https://github.com/tarantool/tarantool/tree/imeevma/gh-4933-autoindex
@ChangeLog
- "Auto-index" optimization is now enabled (gh-4933).
src/box/CMakeLists.txt | 2 +-
src/box/sql.c | 2 +-
src/box/sql/delete.c | 16 ++--
src/box/sql/sqlInt.h | 8 +-
src/box/sql/where.c | 170 +++++++++++++++++++++++------------
src/box/sql/wherecode.c | 13 +--
test/sql-tap/whereF.test.lua | 16 +++-
7 files changed, 151 insertions(+), 76 deletions(-)
diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index b8b2689d2..7e3ad0e22 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -217,7 +217,7 @@ add_library(box STATIC
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
add_definitions(-DSQL_DEBUG=1)
endif()
-add_definitions(-DSQL_OMIT_AUTOMATIC_INDEX=1 -DSQL_TEST=1)
+add_definitions(-DSQL_TEST=1)
set(EXT_SRC_DIR ${CMAKE_SOURCE_DIR}/extra)
set(EXT_BIN_DIR ${CMAKE_BINARY_DIR}/extra)
diff --git a/src/box/sql.c b/src/box/sql.c
index 000246ee9..a551bffc3 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -719,7 +719,7 @@ int
tarantoolsqlIdxKeyCompare(struct BtCursor *cursor,
struct UnpackedRecord *unpacked)
{
- assert(cursor->curFlags & BTCF_TaCursor);
+ assert(cursor->curFlags & (BTCF_TaCursor | BTCF_TEphemCursor));
assert(cursor->iter != NULL);
assert(cursor->last_tuple != NULL);
diff --git a/src/box/sql/delete.c b/src/box/sql/delete.c
index 68abd1f58..57478c129 100644
--- a/src/box/sql/delete.c
+++ b/src/box/sql/delete.c
@@ -546,24 +546,25 @@ sql_generate_row_delete(struct Parse *parse, struct space *space,
}
int
-sql_generate_index_key(struct Parse *parse, struct index *index, int cursor,
- int reg_out, struct index *prev, int reg_prev)
+sql_generate_index_key(struct Parse *parse, struct index_def *idx_def,
+ int cursor, int reg_out, struct index *prev,
+ int reg_prev, int reg_eph)
{
struct Vdbe *v = parse->pVdbe;
- int col_cnt = index->def->key_def->part_count;
- int reg_base = sqlGetTempRange(parse, col_cnt);
+ int col_cnt = idx_def->key_def->part_count;
+ int reg_base = sqlGetTempRange(parse, col_cnt + 1);
if (prev != NULL && reg_base != reg_prev)
prev = NULL;
for (int j = 0; j < col_cnt; j++) {
if (prev != NULL && prev->def->key_def->parts[j].fieldno ==
- index->def->key_def->parts[j].fieldno) {
+ idx_def->key_def->parts[j].fieldno) {
/*
* This column was already computed by the
* previous index.
*/
continue;
}
- uint32_t tabl_col = index->def->key_def->parts[j].fieldno;
+ uint32_t tabl_col = idx_def->key_def->parts[j].fieldno;
sqlVdbeAddOp3(v, OP_Column, cursor, tabl_col, reg_base + j);
/*
* If the column type is NUMBER but the number
@@ -578,8 +579,9 @@ sql_generate_index_key(struct Parse *parse, struct index *index, int cursor,
*/
sqlVdbeDeletePriorOpcode(v, OP_Realify);
}
+ sqlVdbeAddOp2(v, OP_NextIdEphemeral, reg_eph, reg_base + col_cnt);
if (reg_out != 0)
- sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt, reg_out);
+ sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt + 1, reg_out);
sqlReleaseTempRange(parse, reg_base, col_cnt);
return reg_base;
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index adf90d824..6f4e9d7b5 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -3316,11 +3316,12 @@ sql_generate_row_delete(struct Parse *parse, struct space *space,
* especially with the PRIMARY KEY columns of the index.
*
* @param parse Parsing context.
- * @param index The index for which to generate a key.
+ * @param idx_def The index definition for which to generate a key.
* @param cursor Cursor number from which to take column data.
* @param reg_out Put the new key into this register if not NULL.
* @param prev Previously generated index key
* @param reg_prev Register holding previous generated key.
+ * @param reg_eph Register holding adress of ephemeral space.
*
* @retval Return a register number which is the first in a block
* of registers that holds the elements of the index key. The
@@ -3328,8 +3329,9 @@ sql_generate_row_delete(struct Parse *parse, struct space *space,
* this routine returns.
*/
int
-sql_generate_index_key(struct Parse *parse, struct index *index, int cursor,
- int reg_out, struct index *prev, int reg_prev);
+sql_generate_index_key(struct Parse *parse, struct index_def *idx_def,
+ int cursor, int reg_out, struct index *prev,
+ int reg_prev, int reg_eph);
/**
* Generate code to do constraint checks prior to an INSERT or
diff --git a/src/box/sql/where.c b/src/box/sql/where.c
index e9e936856..c5f79f908 100644
--- a/src/box/sql/where.c
+++ b/src/box/sql/where.c
@@ -703,7 +703,7 @@ termCanDriveIndex(WhereTerm * pTerm, /* WHERE clause term to check */
return 0;
if (pTerm->u.leftColumn < 0)
return 0;
- enum field_type type = pSrc->pTab->def->fields[pTerm->u.leftColumn].type;
+ enum field_type type = pSrc->space->def->fields[pTerm->u.leftColumn].type;
enum field_type expr_type = expr_cmp_mutual_type(pTerm->pExpr);
if (!field_type1_contains_type2(expr_type, type))
return 0;
@@ -727,21 +727,18 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */
int nKeyCol; /* Number of columns in the constructed index */
WhereTerm *pTerm; /* A single term of the WHERE clause */
WhereTerm *pWCEnd; /* End of pWC->a[] */
- Index *pIdx; /* Object describing the transient index */
Vdbe *v; /* Prepared statement under construction */
int addrInit; /* Address of the initialization bypass jump */
- Table *pTable; /* The table being indexed */
+ /* The space being indexed. */
+ struct space *space;
int addrTop; /* Top of the index fill loop */
int regRecord; /* Register holding an index record */
int n; /* Column counter */
int i; /* Loop counter */
int mxBitCol; /* Maximum column in pSrc->colUsed */
- struct coll *pColl; /* Collating sequence to on a column */
WhereLoop *pLoop; /* The Loop object */
- char *zNotUsed; /* Extra space on the end of pIdx */
Bitmask idxCols; /* Bitmap of columns used for indexing */
Bitmask extraCols; /* Bitmap of additional columns */
- int iContinue = 0; /* Jump here to skip excluded rows */
struct SrcList_item *pTabItem; /* FROM clause term being indexed */
int addrCounter = 0; /* Address where integer counter is initialized */
int regBase; /* Array of registers where record is assembled */
@@ -758,7 +755,7 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */
* and used to match WHERE clause constraints
*/
nKeyCol = 0;
- pTable = pSrc->pTab;
+ space = pSrc->space;
pWCEnd = &pWC->a[pWC->nTerm];
pLoop = pLevel->pWLoop;
idxCols = 0;
@@ -770,7 +767,8 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */
if ((idxCols & cMask) == 0) {
if (whereLoopResize
(pParse->db, pLoop, nKeyCol + 1)) {
- goto end_auto_index_create;
+ pParse->is_aborted = true;
+ return;
}
pLoop->aLTerm[nKeyCol++] = pTerm;
idxCols |= cMask;
@@ -791,26 +789,24 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */
* if they go out of sync.
*/
extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS - 1));
- mxBitCol = MIN(BMS - 1, pTable->def->field_count);
- testcase(pTable->def->field_count == BMS - 1);
- testcase(pTable->def->field_count == BMS - 2);
+ mxBitCol = MIN(BMS - 1, space->def->field_count);
for (i = 0; i < mxBitCol; i++) {
if (extraCols & MASKBIT(i))
nKeyCol++;
}
if (pSrc->colUsed & MASKBIT(BMS - 1)) {
- nKeyCol += pTable->def->field_count - BMS + 1;
+ nKeyCol += space->def->field_count - BMS + 1;
}
- /* Construct the Index object to describe this index */
- pIdx = sqlDbMallocZero(pParse->db, sizeof(*pIdx));
- if (pIdx == 0)
- goto end_auto_index_create;
- pLoop->pIndex = pIdx;
- pIdx->zName = "auto-index";
- pIdx->pTable = pTable;
n = 0;
idxCols = 0;
+ uint32_t size = sizeof(struct key_part_def) * nKeyCol;
+ struct key_part_def *part_def = malloc(size);
+ if (part_def == NULL) {
+ diag_set(OutOfMemory, size, "malloc", "part_def");
+ pParse->is_aborted = true;
+ return;
+ }
for (pTerm = pWC->a; pTerm < pWCEnd; pTerm++) {
if (termCanDriveIndex(pTerm, pSrc, notReady)) {
int iCol = pTerm->u.leftColumn;
@@ -819,9 +815,18 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */
testcase(iCol == BMS - 1);
testcase(iCol == BMS);
if ((idxCols & cMask) == 0) {
- Expr *pX = pTerm->pExpr;
idxCols |= cMask;
- pIdx->aiColumn[n] = pTerm->u.leftColumn;
+ int i = pTerm->u.leftColumn;
+ struct field_def *field =
+ &space->def->fields[i];
+ struct key_part_def *part = &part_def[n];
+ part->fieldno = iCol;
+ part->type = field->type;
+ part->nullable_action = field->nullable_action;
+ part->is_nullable = field->is_nullable;
+ part->sort_order = SORT_ORDER_ASC;
+ part->coll_id = field->coll_id;
+ part->path = NULL;
n++;
}
}
@@ -833,25 +838,65 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */
*/
for (i = 0; i < mxBitCol; i++) {
if (extraCols & MASKBIT(i)) {
- pIdx->aiColumn[n] = i;
+ struct field_def *field = &space->def->fields[i];
+ struct key_part_def *part = &part_def[n];
+ part->fieldno = i;
+ part->type = field->type;
+ part->nullable_action = field->nullable_action;
+ part->is_nullable = field->is_nullable;
+ part->sort_order = SORT_ORDER_ASC;
+ part->coll_id = field->coll_id;
+ part->path = NULL;
n++;
}
}
if (pSrc->colUsed & MASKBIT(BMS - 1)) {
- for (i = BMS - 1; i < (int)pTable->def->field_count; i++) {
- pIdx->aiColumn[n] = i;
+ for (i = BMS - 1; i < (int)space->def->field_count; i++) {
+ struct field_def *field = &space->def->fields[i];
+ struct key_part_def *part = &part_def[n];
+ part->fieldno = i;
+ part->type = field->type;
+ part->nullable_action = field->nullable_action;
+ part->is_nullable = field->is_nullable;
+ part->sort_order = SORT_ORDER_ASC;
+ part->coll_id = field->coll_id;
+ part->path = NULL;
n++;
}
}
assert(n == nKeyCol);
- pIdx->aiColumn[n] = XN_ROWID;
+
+ struct key_def *key_def = key_def_new(part_def, nKeyCol, false);
+ free(part_def);
+ if (key_def == NULL) {
+ pParse->is_aborted = true;
+ return;
+ }
+
+ /* Construct the index definition to describe this index. */
+ struct index_opts opts;
+ index_opts_create(&opts);
+ struct index_def *idx_def = index_def_new(space->def->id, 0,
+ "auto-index",
+ sizeof("auto-index") -1,
+ TREE, &opts, key_def, NULL);
+ key_def_delete(key_def);
+ if (idx_def == NULL) {
+ pParse->is_aborted = true;
+ return;
+ }
+ pLoop->index_def = idx_def;
/* Create the automatic index */
assert(pLevel->iIdxCur >= 0);
pLevel->iIdxCur = pParse->nTab++;
- sqlVdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol + 1);
- sql_vdbe_set_p4_key_def(pParse, pIdx->key_def);
- VdbeComment((v, "for %s", pTable->def->name));
+ int reg_eph = ++pParse->nMem;
+ struct sql_key_info *pk_info =
+ sql_key_info_new_from_key_def(pParse->db, idx_def->key_def);
+ sqlVdbeAddOp4(v, OP_OpenTEphemeral, reg_eph, nKeyCol + 1, 0,
+ (char *)pk_info, P4_KEYINFO);
+ sqlVdbeAddOp3(v, OP_IteratorOpen, pLevel->iIdxCur, 0, reg_eph);
+ VdbeComment((v, "for %s", space->def->name));
/* Fill the automatic index with content */
sqlExprCachePush(pParse);
@@ -863,19 +908,19 @@ constructAutomaticIndex(Parse * pParse, /* The parsing context */
pTabItem->addrFillSub);
addrTop = sqlVdbeAddOp1(v, OP_Yield, regYield);
VdbeCoverage(v);
- VdbeComment((v, "next row of \"%s\"", pTabItem->pTab->zName));
+ VdbeComment((v, "next row of \"%s\"", pTabItem->space->def->name));
} else {
addrTop = sqlVdbeAddOp1(v, OP_Rewind, pLevel->iTabCur);
VdbeCoverage(v);
}
regRecord = sqlGetTempReg(pParse);
- regBase = sql_generate_index_key(pParse, pIdx, pLevel->iTabCur,
- regRecord, NULL, 0);
- sqlVdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord);
+ regBase = sql_generate_index_key(pParse, idx_def, pLevel->iTabCur,
+ regRecord, NULL, 0, reg_eph);
+ sqlVdbeAddOp2(v, OP_IdxInsert, regRecord, reg_eph);
if (pTabItem->fg.viaCoroutine) {
sqlVdbeChangeP2(v, addrCounter, regBase + n);
translateColumnToCopy(v, addrTop, pLevel->iTabCur,
- pTabItem->regResult, 1);
+ pTabItem->regResult);
sqlVdbeGoto(v, addrTop);
pTabItem->fg.viaCoroutine = 0;
} else {
@@ -1711,7 +1756,7 @@ whereLoopInit(WhereLoop * p)
static void
whereLoopClearUnion(WhereLoop * p)
{
- if ((p->wsFlags & WHERE_AUTO_INDEX) != 0) {
+ if ((p->wsFlags & WHERE_AUTO_INDEX) != 0 && p->index_def != NULL) {
index_def_delete(p->index_def);
p->index_def = NULL;
}
@@ -2809,14 +2854,13 @@ tnt_error:
#ifndef SQL_OMIT_AUTOMATIC_INDEX
/* Automatic indexes */
- LogEst rSize = pTab->nRowLogEst;
+ rSize = DEFAULT_TUPLE_LOG_COUNT;
LogEst rLogSize = estLog(rSize);
if (!pBuilder->pOrSet && /* Not pqart of an OR optimization */
(pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0 &&
(pWInfo->pParse->sql_flags & SQL_AutoIndex) != 0 &&
pSrc->pIBIndex == 0 /* Has no INDEXED BY clause */
&& !pSrc->fg.notIndexed /* Has no NOT INDEXED clause */
- && HasRowid(pTab) /* Not WITHOUT ROWID table. (FIXME: Why not?) */
&&!pSrc->fg.isCorrelated /* Not a correlated subquery */
&& !pSrc->fg.isRecursive /* Not a recursive common table expression. */
) {
@@ -2829,7 +2873,7 @@ tnt_error:
if (termCanDriveIndex(pTerm, pSrc, 0)) {
pNew->nEq = 1;
pNew->nSkip = 0;
- pNew->pIndex = 0;
+ pNew->index_def = NULL;
pNew->nLTerm = 1;
pNew->aLTerm[0] = pTerm;
/* TUNING: One-time cost for computing the automatic index is
@@ -2841,12 +2885,12 @@ tnt_error:
* those objects, since there is no opportunity to add schema
* indexes on subqueries and views.
*/
- pNew->rSetup = rLogSize + rSize + 4;
- if (!pTab->def->opts.is_view &&
- pTab->def->id == 0)
- pNew->rSetup += 24;
- if (pNew->rSetup < 0)
- pNew->rSetup = 0;
+ pNew->rSetup = rLogSize + rSize;
+ if (!space->def->opts.is_view &&
+ space->def->id == 0)
+ pNew->rSetup += 28;
+ else
+ pNew->rSetup -= 10;
/* TUNING: Each index lookup yields 20 rows in the table. This
* is more than the usual guess of 10 rows, since we have no way
* of knowing how selective the index will ultimately be. It would
@@ -4794,18 +4838,34 @@ sqlWhereEnd(WhereInfo * pWInfo)
for (; k < last; k++, pOp++) {
if (pOp->p1 != pLevel->iTabCur)
continue;
- if (pOp->opcode == OP_Column) {
- int x = pOp->p2;
- assert(def == NULL ||
- def->space_id ==
- pTabItem->space->def->id);
- if (x >= 0) {
- pOp->p2 = x;
- pOp->p1 = pLevel->iIdxCur;
- }
- assert((pLoop->
- wsFlags & WHERE_IDX_ONLY) == 0
- || x >= 0);
+ if (pOp->opcode != OP_Column)
+ continue;
+ assert(def == NULL || def->space_id ==
+ pTabItem->space->def->id);
+ int x = pOp->p2;
+ assert((pLoop->wsFlags & WHERE_IDX_ONLY) == 0 ||
+ x >= 0);
+ if (x < 0)
+ continue;
+ pOp->p1 = pLevel->iIdxCur;
+ if ((pLoop->wsFlags & WHERE_AUTO_INDEX) == 0) {
+ pOp->p2 = x;
+ continue;
+ }
+ /*
+ * In case we are using auto-indexing, we have
+ * to change the column number, because in
+ * ephemeral space columns are in the same order
+ * as they described in key_def. So, instead of
+ * the field number, we have to insert index of
+ * the part with this fieldno.
+ */
+ struct key_def *key_def =
+ pLevel->pWLoop->index_def->key_def;
+ uint32_t part_count = key_def->part_count;
+ for (uint32_t i = 0; i < part_count; ++i) {
+ if ((int)key_def->parts[i].fieldno == x)
+ pOp->p2 = i;
}
}
}
diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c
index 6d8768865..534819d48 100644
--- a/src/box/sql/wherecode.c
+++ b/src/box/sql/wherecode.c
@@ -219,12 +219,12 @@ sqlWhereExplainOneScan(Parse * pParse, /* Parse context */
assert(!(flags & WHERE_AUTO_INDEX)
|| (flags & WHERE_IDX_ONLY));
- if (idx_def->iid == 0) {
+ if (flags & WHERE_AUTO_INDEX) {
+ zFmt = "AUTOMATIC COVERING INDEX";
+ } else if (idx_def->iid == 0) {
if (isSearch) {
zFmt = "PRIMARY KEY";
}
- } else if (flags & WHERE_AUTO_INDEX) {
- zFmt = "AUTOMATIC COVERING INDEX";
} else if (flags & WHERE_IDX_ONLY) {
zFmt = "COVERING INDEX %s";
} else {
@@ -807,7 +807,8 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W
notReady & ~sqlWhereGetMask(&pWInfo->sMaskSet, iCur);
bRev = (pWInfo->revMask >> iLevel) & 1;
omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY) != 0
- && (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0;
+ && ((pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0 ||
+ (pLoop->wsFlags & WHERE_AUTO_INDEX) != 0);
VdbeModuleComment((v, "Begin WHERE-loop%d: %s", iLevel,
pTabItem->pTab->zName));
@@ -1047,8 +1048,6 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W
startEq = 0;
start_constraints = 1;
}
- struct index_def *idx_pk = space->index[0]->def;
- uint32_t pk_part_count = idx_pk->key_def->part_count;
/*
* Tarantool's iterator over integer fields doesn't
* tolerate floating point values. Hence, if term
@@ -1234,6 +1233,8 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo, /* Complete information about the W
if (omitTable) {
/* pIdx is a covering index. No need to access the main table. */
} else if (iCur != iIdxCur) {
+ struct index_def *idx_pk = space->index[0]->def;
+ uint32_t pk_part_count = idx_pk->key_def->part_count;
int iKeyReg = sqlGetTempRange(pParse, pk_part_count);
for (j = 0; j < (int) pk_part_count; j++) {
k = idx_pk->key_def->parts[j].fieldno;
diff --git a/test/sql-tap/whereF.test.lua b/test/sql-tap/whereF.test.lua
index 5a894b748..3235df437 100755
--- a/test/sql-tap/whereF.test.lua
+++ b/test/sql-tap/whereF.test.lua
@@ -90,10 +90,20 @@ test:do_execsql_test(
-- for _ in X(0, "X!foreach", [=[["tn sql","\n 1 \"SELECT * FROM t1, t2 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n 2 \"SELECT * FROM t2, t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n 3 \"SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n"]]=]) do
for tn, sql in ipairs({"SELECT * FROM t1, t2 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e",
- "SELECT * FROM t2, t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e",
- "SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do
+ "SELECT * FROM t2, t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do
test:do_test(
- "2."..tn,
+ "2.1."..tn,
+ function()
+ return test:execsql("EXPLAIN QUERY PLAN "..sql)
+ end, {
+ '/SEARCH TABLE T1/',
+ '/SEARCH TABLE T2/'
+ })
+end
+
+for tn, sql in ipairs({"SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do
+ test:do_test(
+ "2.2."..tn,
function()
return test:execsql("EXPLAIN QUERY PLAN "..sql)
end, {
--
2.25.1
More information about the Tarantool-patches
mailing list