[Tarantool-patches] [PATCH v3 1/1] sql: enable autoindex optimization

Tarantool development patches archive
 help / color / mirror / Atom feed

From: imeevma@tarantool.org
To: korablev@tarantool.org, tsafin@tarantool.org
Cc: Mergen Imeev <imeevma@gmail.com>, tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH v3 1/1] sql: enable autoindex optimization
Date: Sun, 11 Oct 2020 13:48:46 +0300	[thread overview]
Message-ID: <5c615731b1bd3e6d279dfb0cbc92823dea993a03.1602413265.git.imeevma@gmail.com> (raw)

From: Kirill Yukhin <kyukhin@tarantool.org>

This patch enables "autoindex" optimization. This optimization uses an
ephemeral space that contains all the fields used in the query. This
ephemeral space is called "autoindex". Autoindex may have fewer fields
than the original space, and the order of these fields in autoindex may
be different from their order in the original space. In addition to the
mentioned fields, autoindex contains a rowid.

The primary key of autoindex is a covering index, i.e. it contains all
of its fields.

Currently this optimization mainly used in cases when values SELECTED
from more than one space and WHERE specified.

At the moment, this optimization does not take into account the number
of tuples in spaces. This should be fixed.

Closes #4933

Co-authored-by: Mergen Imeev <imeevma@gmail.com>
Co-authored-by: Timur Safin <tsafin@tarantool.org>
---
https://github.com/tarantool/tarantool/issues/4933
https://github.com/tarantool/tarantool/tree/imeevma/gh-4933-autoindex

@ChangeLog
 - "autoindex" optimization is now enabled (gh-4933).

 src/box/CMakeLists.txt           |   2 +-
 src/box/sql.c                    |   2 +-
 src/box/sql/delete.c             |  28 ----
 src/box/sql/sqlInt.h             |  35 -----
 src/box/sql/where.c              | 238 +++++++++++++++++++++----------
 src/box/sql/wherecode.c          |  13 +-
 test/sql-tap/autoindex1.test.lua | 115 +++++++++++++++
 test/sql-tap/autoindex4.test.lua |  14 +-
 test/sql-tap/whereF.test.lua     |  16 ++-
 test/sql/misc.result             |  35 +++++
 test/sql/misc.test.lua           |  14 ++
 11 files changed, 358 insertions(+), 154 deletions(-)
 create mode 100755 test/sql-tap/autoindex1.test.lua

diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index 8b2e704cf..3755754da 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -219,7 +219,7 @@ add_library(box STATIC
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
   add_definitions(-DSQL_DEBUG=1)
 endif()
-add_definitions(-DSQL_OMIT_AUTOMATIC_INDEX=1 -DSQL_TEST=1)
+add_definitions(-DSQL_TEST=1)
 
 set(EXT_SRC_DIR ${CMAKE_SOURCE_DIR}/extra)
 set(EXT_BIN_DIR ${CMAKE_BINARY_DIR}/extra)
diff --git a/src/box/sql.c b/src/box/sql.c
index 000246ee9..a551bffc3 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -719,7 +719,7 @@ int
 tarantoolsqlIdxKeyCompare(struct BtCursor *cursor,
 			      struct UnpackedRecord *unpacked)
 {
-	assert(cursor->curFlags & BTCF_TaCursor);
+	assert(cursor->curFlags & (BTCF_TaCursor | BTCF_TEphemCursor));
 	assert(cursor->iter != NULL);
 	assert(cursor->last_tuple != NULL);
 
diff --git a/src/box/sql/delete.c b/src/box/sql/delete.c
index a78c68df6..62a726fdd 100644
--- a/src/box/sql/delete.c
+++ b/src/box/sql/delete.c
@@ -544,31 +544,3 @@ sql_generate_row_delete(struct Parse *parse, struct space *space,
 	sqlVdbeResolveLabel(v, label);
 	VdbeModuleComment((v, "END: GenRowDel()"));
 }
-
-int
-sql_generate_index_key(struct Parse *parse, struct index *index, int cursor,
-		       int reg_out, struct index *prev, int reg_prev)
-{
-	struct Vdbe *v = parse->pVdbe;
-	int col_cnt = index->def->key_def->part_count;
-	int reg_base = sqlGetTempRange(parse, col_cnt);
-	if (prev != NULL && reg_base != reg_prev)
-		prev = NULL;
-	for (int j = 0; j < col_cnt; j++) {
-		if (prev != NULL && prev->def->key_def->parts[j].fieldno ==
-				    index->def->key_def->parts[j].fieldno) {
-			/*
-			 * This column was already computed by the
-			 * previous index.
-			 */
-			continue;
-		}
-		uint32_t tabl_col = index->def->key_def->parts[j].fieldno;
-		sqlVdbeAddOp3(v, OP_Column, cursor, tabl_col, reg_base + j);
-	}
-	if (reg_out != 0)
-		sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt, reg_out);
-
-	sqlReleaseTempRange(parse, reg_base, col_cnt);
-	return reg_base;
-}
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index adf90d824..56c5ee911 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -3296,41 +3296,6 @@ sql_generate_row_delete(struct Parse *parse, struct space *space,
 			enum on_conflict_action onconf, u8 mode,
 			int idx_noseek);
 
-/**
- * Generate code that will assemble an index key and stores it in
- * register reg_out. The key will be for index which is an
- * index on table. cursor is the index of a cursor open on the
- * table table and pointing to the entry that needs indexing.
- * cursor must be the cursor of the PRIMARY KEY index.
- *
- * The prev and reg_prev parameters are used to implement a
- * cache to avoid unnecessary register loads.  If prev is not
- * NULL, then it is a pointer to a different index for which an
- * index key has just been computed into register reg_prev. If the
- * current index is generating its key into the same
- * sequence of registers and if prev and index share a column in
- * common, then the register corresponding to that column already
- * holds the correct value and the loading of that register is
- * skipped. This optimization is helpful when doing a DELETE or
- * an INTEGRITY_CHECK on a table with multiple indices, and
- * especially with the PRIMARY KEY columns of the index.
- *
- * @param parse Parsing context.
- * @param index The index for which to generate a key.
- * @param cursor Cursor number from which to take column data.
- * @param reg_out Put the new key into this register if not NULL.
- * @param prev Previously generated index key
- * @param reg_prev Register holding previous generated key.
- *
- * @retval Return a register number which is the first in a block
- * of registers that holds the elements of the index key. The
- * block of registers has already been deallocated by the time
- * this routine returns.
- */
-int
-sql_generate_index_key(struct Parse *parse, struct index *index, int cursor,
-		       int reg_out, struct index *prev, int reg_prev);
-
 /**
  * Generate code to do constraint checks prior to an INSERT or
  * an UPDATE on the given table.
diff --git a/src/box/sql/where.c b/src/box/sql/where.c
index e9e936856..6ab2be3ba 100644
--- a/src/box/sql/where.c
+++ b/src/box/sql/where.c
@@ -683,7 +683,6 @@ translateColumnToCopy(Vdbe * v,		/* The VDBE containing code to translate */
 	}
 }
 
-#ifndef SQL_OMIT_AUTOMATIC_INDEX
 /*
  * Return TRUE if the WHERE clause term pTerm is of a form where it
  * could be used with an index to access pSrc, assuming an appropriate
@@ -703,19 +702,53 @@ termCanDriveIndex(WhereTerm * pTerm,	/* WHERE clause term to check */
 		return 0;
 	if (pTerm->u.leftColumn < 0)
 		return 0;
-	enum field_type type = pSrc->pTab->def->fields[pTerm->u.leftColumn].type;
+	enum field_type type = pSrc->space->def->fields[pTerm->u.leftColumn].type;
 	enum field_type expr_type = expr_cmp_mutual_type(pTerm->pExpr);
 	if (!field_type1_contains_type2(expr_type, type))
 		return 0;
 	return 1;
 }
-#endif
 
-#ifndef SQL_OMIT_AUTOMATIC_INDEX
+/**
+ * Generate a code that will create a tuple, which will be inserted in the
+ * autoindex. The created tuple consists of rowid and fields described in the
+ * index key description.
+ *
+ * @param parse Parsing context.
+ * @param key_def The index key description.
+ * @param cursor Cursor of space where we will get values for tuple.
+ * @param reg_out Register to which the created tuple will be placed.
+ * @param reg_eph Register holding pointer to autoindex.
+ *
+ * @return Return a register number which is the first in a block of registers
+ * that holds the elements of the created tuple. The block of registers has
+ * already been deallocated by the time this routine returns.
+ */
+static int
+vdbe_emit_create_autoindex_tuple(struct Parse *parse,
+				 const struct key_def *key_def,
+				 int cursor, int reg_out, int reg_eph)
+{
+	assert(reg_out != 0);
+	struct Vdbe *v = parse->pVdbe;
+	int col_cnt = key_def->part_count;
+	int reg_base = sqlGetTempRange(parse, col_cnt + 1);
+	for (int j = 0; j < col_cnt; j++) {
+		uint32_t tabl_col = key_def->parts[j].fieldno;
+		sqlVdbeAddOp3(v, OP_Column, cursor, tabl_col, reg_base + j);
+	}
+	sqlVdbeAddOp2(v, OP_NextIdEphemeral, reg_eph, reg_base + col_cnt);
+	sqlVdbeAddOp3(v, OP_MakeRecord, reg_base, col_cnt + 1, reg_out);
+	sqlReleaseTempRange(parse, reg_base, col_cnt + 1);
+	return reg_base;
+}
+
 /*
- * Generate code to construct the Index object for an automatic index
- * and to set up the WhereLevel object pLevel so that the code generator
- * makes use of the automatic index.
+ * Generate code to construct the ephemeral space that contains all used in
+ * query fields of one of the tables. This ephemeral space will be known as an
+ * "autoindex". PK of this ephemeral space is a covering index. Also, this
+ * functions set up the WhereLevel object pLevel so that the code generator
+ * makes use of the auto-index.
  */
 static void
 constructAutomaticIndex(Parse * pParse,			/* The parsing context */
@@ -727,21 +760,16 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
 	int nKeyCol;		/* Number of columns in the constructed index */
 	WhereTerm *pTerm;	/* A single term of the WHERE clause */
 	WhereTerm *pWCEnd;	/* End of pWC->a[] */
-	Index *pIdx;		/* Object describing the transient index */
 	Vdbe *v;		/* Prepared statement under construction */
 	int addrInit;		/* Address of the initialization bypass jump */
-	Table *pTable;		/* The table being indexed */
 	int addrTop;		/* Top of the index fill loop */
 	int regRecord;		/* Register holding an index record */
 	int n;			/* Column counter */
 	int i;			/* Loop counter */
 	int mxBitCol;		/* Maximum column in pSrc->colUsed */
-	struct coll *pColl;		/* Collating sequence to on a column */
 	WhereLoop *pLoop;	/* The Loop object */
-	char *zNotUsed;		/* Extra space on the end of pIdx */
 	Bitmask idxCols;	/* Bitmap of columns used for indexing */
 	Bitmask extraCols;	/* Bitmap of additional columns */
-	int iContinue = 0;	/* Jump here to skip excluded rows */
 	struct SrcList_item *pTabItem;	/* FROM clause term being indexed */
 	int addrCounter = 0;	/* Address where integer counter is initialized */
 	int regBase;		/* Array of registers where record is assembled */
@@ -758,7 +786,6 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
 	 * and used to match WHERE clause constraints
 	 */
 	nKeyCol = 0;
-	pTable = pSrc->pTab;
 	pWCEnd = &pWC->a[pWC->nTerm];
 	pLoop = pLevel->pWLoop;
 	idxCols = 0;
@@ -770,7 +797,8 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
 			if ((idxCols & cMask) == 0) {
 				if (whereLoopResize
 				    (pParse->db, pLoop, nKeyCol + 1)) {
-					goto end_auto_index_create;
+					pParse->is_aborted = true;
+					return;
 				}
 				pLoop->aLTerm[nKeyCol++] = pTerm;
 				idxCols |= cMask;
@@ -791,26 +819,27 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
 	 * if they go out of sync.
 	 */
 	extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS - 1));
-	mxBitCol = MIN(BMS - 1, pTable->def->field_count);
-	testcase(pTable->def->field_count == BMS - 1);
-	testcase(pTable->def->field_count == BMS - 2);
+	struct space *space = pSrc->space;
+	mxBitCol = MIN(BMS - 1, space->def->field_count);
 	for (i = 0; i < mxBitCol; i++) {
 		if (extraCols & MASKBIT(i))
 			nKeyCol++;
 	}
 	if (pSrc->colUsed & MASKBIT(BMS - 1)) {
-		nKeyCol += pTable->def->field_count - BMS + 1;
+		nKeyCol += space->def->field_count - BMS + 1;
 	}
 
-	/* Construct the Index object to describe this index */
-	pIdx = sqlDbMallocZero(pParse->db, sizeof(*pIdx));
-	if (pIdx == 0)
-		goto end_auto_index_create;
-	pLoop->pIndex = pIdx;
-	pIdx->zName = "auto-index";
-	pIdx->pTable = pTable;
 	n = 0;
 	idxCols = 0;
+	size_t size;
+	struct key_part_def *parts = region_alloc_array(&pParse->region,
+							typeof(parts[0]),
+							nKeyCol, &size);
+	if (parts == NULL) {
+		diag_set(OutOfMemory, size, "region_alloc_array", "parts");
+		pParse->is_aborted = true;
+		return;
+	}
 	for (pTerm = pWC->a; pTerm < pWCEnd; pTerm++) {
 		if (termCanDriveIndex(pTerm, pSrc, notReady)) {
 			int iCol = pTerm->u.leftColumn;
@@ -819,9 +848,17 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
 			testcase(iCol == BMS - 1);
 			testcase(iCol == BMS);
 			if ((idxCols & cMask) == 0) {
-				Expr *pX = pTerm->pExpr;
 				idxCols |= cMask;
-				pIdx->aiColumn[n] = pTerm->u.leftColumn;
+				struct field_def *field =
+					&space->def->fields[iCol];
+				struct key_part_def *part = &parts[n];
+				part->fieldno = iCol;
+				part->type = field->type;
+				part->nullable_action = field->nullable_action;
+				part->is_nullable = field->is_nullable;
+				part->sort_order = SORT_ORDER_ASC;
+				part->coll_id = field->coll_id;
+				part->path = NULL;
 				n++;
 			}
 		}
@@ -833,29 +870,73 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
 	 */
 	for (i = 0; i < mxBitCol; i++) {
 		if (extraCols & MASKBIT(i)) {
-			pIdx->aiColumn[n] = i;
+			struct field_def *field = &space->def->fields[i];
+			struct key_part_def *part = &parts[n];
+			part->fieldno = i;
+			part->type = field->type;
+			part->nullable_action = field->nullable_action;
+			part->is_nullable = field->is_nullable;
+			part->sort_order = SORT_ORDER_ASC;
+			part->coll_id = field->coll_id;
+			part->path = NULL;
 			n++;
 		}
 	}
 	if (pSrc->colUsed & MASKBIT(BMS - 1)) {
-		for (i = BMS - 1; i < (int)pTable->def->field_count; i++) {
-			pIdx->aiColumn[n] = i;
+		for (i = BMS - 1; i < (int)space->def->field_count; i++) {
+			struct field_def *field = &space->def->fields[i];
+			struct key_part_def *part = &parts[n];
+			part->fieldno = i;
+			part->type = field->type;
+			part->nullable_action = field->nullable_action;
+			part->is_nullable = field->is_nullable;
+			part->sort_order = SORT_ORDER_ASC;
+			part->coll_id = field->coll_id;
+			part->path = NULL;
 			n++;
 		}
 	}
 	assert(n == nKeyCol);
-	pIdx->aiColumn[n] = XN_ROWID;
+
+	struct key_def *key_def = key_def_new(parts, nKeyCol, false);
+	if (key_def == NULL) {
+		pParse->is_aborted = true;
+		return;
+	}
+
+	/* Construct the index definition to describe this index. */
+	struct index_opts opts;
+	index_opts_create(&opts);
+	const char *idx_name = "autoindex";
+	struct index_def *idx_def = index_def_new(space->def->id, 0, idx_name,
+						  strlen(idx_name), TREE, &opts,
+						  key_def, NULL);
+	key_def_delete(key_def);
+	if (idx_def == NULL) {
+		pParse->is_aborted = true;
+		return;
+	}
+	pLoop->index_def = idx_def;
 
 	/* Create the automatic index */
 	assert(pLevel->iIdxCur >= 0);
 	pLevel->iIdxCur = pParse->nTab++;
-	sqlVdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol + 1);
-	sql_vdbe_set_p4_key_def(pParse, pIdx->key_def);
-	VdbeComment((v, "for %s", pTable->def->name));
+	struct sql_key_info *pk_info =
+		sql_key_info_new_from_key_def(pParse->db, idx_def->key_def);
+	if (pk_info == NULL) {
+		pParse->is_aborted = true;
+		return;
+	}
+	int reg_eph = sqlGetTempReg(pParse);
+	sqlVdbeAddOp4(v, OP_OpenTEphemeral, reg_eph, nKeyCol + 1, 0,
+		      (char *)pk_info, P4_KEYINFO);
+	sqlVdbeAddOp3(v, OP_IteratorOpen, pLevel->iIdxCur, 0, reg_eph);
+	VdbeComment((v, "for %s", space->def->name));
 
 	/* Fill the automatic index with content */
 	sqlExprCachePush(pParse);
 	pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom];
+	int cursor = pLevel->iTabCur;
 	if (pTabItem->fg.viaCoroutine) {
 		int regYield = pTabItem->regReturn;
 		addrCounter = sqlVdbeAddOp2(v, OP_Integer, 0, 0);
@@ -863,34 +944,33 @@ constructAutomaticIndex(Parse * pParse,			/* The parsing context */
 				  pTabItem->addrFillSub);
 		addrTop = sqlVdbeAddOp1(v, OP_Yield, regYield);
 		VdbeCoverage(v);
-		VdbeComment((v, "next row of \"%s\"", pTabItem->pTab->zName));
+		VdbeComment((v, "next row of \"%s\"", pTabItem->space->def->name));
 	} else {
-		addrTop = sqlVdbeAddOp1(v, OP_Rewind, pLevel->iTabCur);
+		addrTop = sqlVdbeAddOp1(v, OP_Rewind, cursor);
 		VdbeCoverage(v);
 	}
 	regRecord = sqlGetTempReg(pParse);
-	regBase = sql_generate_index_key(pParse, pIdx, pLevel->iTabCur,
-					 regRecord, NULL, 0);
-	sqlVdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord);
+	regBase = vdbe_emit_create_autoindex_tuple(pParse, idx_def->key_def,
+						   cursor, regRecord, reg_eph);
+	sqlVdbeAddOp2(v, OP_IdxInsert, regRecord, reg_eph);
 	if (pTabItem->fg.viaCoroutine) {
-		sqlVdbeChangeP2(v, addrCounter, regBase + n);
-		translateColumnToCopy(v, addrTop, pLevel->iTabCur,
-				      pTabItem->regResult, 1);
+		sqlVdbeChangeP2(v, addrCounter, regBase + n + 1);
+		translateColumnToCopy(v, addrTop, cursor, pTabItem->regResult);
 		sqlVdbeGoto(v, addrTop);
 		pTabItem->fg.viaCoroutine = 0;
 	} else {
-		sqlVdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop + 1);
+		sqlVdbeAddOp2(v, OP_Next, cursor, addrTop + 1);
 		VdbeCoverage(v);
 	}
 	sqlVdbeChangeP5(v, SQL_STMTSTATUS_AUTOINDEX);
 	sqlVdbeJumpHere(v, addrTop);
 	sqlReleaseTempReg(pParse, regRecord);
+	sqlReleaseTempReg(pParse, reg_eph);
 	sqlExprCachePop(pParse);
 
 	/* Jump here when skipping the initialization */
 	sqlVdbeJumpHere(v, addrInit);
 }
-#endif				/* SQL_OMIT_AUTOMATIC_INDEX */
 
 /*
  * Estimate the location of a particular key among all keys in an
@@ -1711,7 +1791,7 @@ whereLoopInit(WhereLoop * p)
 static void
 whereLoopClearUnion(WhereLoop * p)
 {
-	if ((p->wsFlags & WHERE_AUTO_INDEX) != 0) {
+	if ((p->wsFlags & WHERE_AUTO_INDEX) != 0 && p->index_def != NULL) {
 		index_def_delete(p->index_def);
 		p->index_def = NULL;
 	}
@@ -2807,16 +2887,17 @@ tnt_error:
 		probe = fake_index;
 	}
 
-#ifndef SQL_OMIT_AUTOMATIC_INDEX
 	/* Automatic indexes */
-	LogEst rSize = pTab->nRowLogEst;
+	rSize = DEFAULT_TUPLE_LOG_COUNT;
+	/* Increase cost of autoindex if number of tuples in space < ~10000. */
+	if (!space->def->opts.is_view && sql_space_tuple_log_count(space) < 133)
+		rSize += DEFAULT_TUPLE_LOG_COUNT;
 	LogEst rLogSize = estLog(rSize);
 	if (!pBuilder->pOrSet && /* Not pqart of an OR optimization */
 	    (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0 &&
 	    (pWInfo->pParse->sql_flags & SQL_AutoIndex) != 0 &&
 	    pSrc->pIBIndex == 0	/* Has no INDEXED BY clause */
 	    && !pSrc->fg.notIndexed	/* Has no NOT INDEXED clause */
-	    && HasRowid(pTab)	/* Not WITHOUT ROWID table. (FIXME: Why not?) */
 	    &&!pSrc->fg.isCorrelated	/* Not a correlated subquery */
 	    && !pSrc->fg.isRecursive	/* Not a recursive common table expression. */
 	    ) {
@@ -2829,24 +2910,10 @@ tnt_error:
 			if (termCanDriveIndex(pTerm, pSrc, 0)) {
 				pNew->nEq = 1;
 				pNew->nSkip = 0;
-				pNew->pIndex = 0;
+				pNew->index_def = NULL;
 				pNew->nLTerm = 1;
 				pNew->aLTerm[0] = pTerm;
-				/* TUNING: One-time cost for computing the automatic index is
-				 * estimated to be X*N*log2(N) where N is the number of rows in
-				 * the table being indexed and where X is 7 (LogEst=28) for normal
-				 * tables or 1.375 (LogEst=4) for views and subqueries.  The value
-				 * of X is smaller for views and subqueries so that the query planner
-				 * will be more aggressive about generating automatic indexes for
-				 * those objects, since there is no opportunity to add schema
-				 * indexes on subqueries and views.
-				 */
-				pNew->rSetup = rLogSize + rSize + 4;
-				if (!pTab->def->opts.is_view &&
-				    pTab->def->id == 0)
-					pNew->rSetup += 24;
-				if (pNew->rSetup < 0)
-					pNew->rSetup = 0;
+				pNew->rSetup = rLogSize + rSize;
 				/* TUNING: Each index lookup yields 20 rows in the table.  This
 				 * is more than the usual guess of 10 rows, since we have no way
 				 * of knowing how selective the index will ultimately be.  It would
@@ -2862,7 +2929,6 @@ tnt_error:
 			}
 		}
 	}
-#endif				/* SQL_OMIT_AUTOMATIC_INDEX */
 	/*
 	 * If there was an INDEXED BY clause, then only that one
 	 * index is considered.
@@ -4630,7 +4696,6 @@ sqlWhereBegin(Parse * pParse,	/* The parser context */
 	notReady = ~(Bitmask) 0;
 	for (ii = 0; ii < nTabList; ii++) {
 		pLevel = &pWInfo->a[ii];
-#ifndef SQL_OMIT_AUTOMATIC_INDEX
 		if ((pLevel->pWLoop->wsFlags & WHERE_AUTO_INDEX) != 0) {
 			constructAutomaticIndex(pParse, &pWInfo->sWC,
 						&pTabList->a[pLevel->iFrom],
@@ -4638,7 +4703,6 @@ sqlWhereBegin(Parse * pParse,	/* The parser context */
 			if (db->mallocFailed)
 				goto whereBeginError;
 		}
-#endif
 		sqlWhereExplainOneScan(pParse, pTabList, pLevel, ii,
 					       pLevel->iFrom, wctrlFlags);
 		pLevel->addrBody = sqlVdbeCurrentAddr(v);
@@ -4794,18 +4858,34 @@ sqlWhereEnd(WhereInfo * pWInfo)
 			for (; k < last; k++, pOp++) {
 				if (pOp->p1 != pLevel->iTabCur)
 					continue;
-				if (pOp->opcode == OP_Column) {
-					int x = pOp->p2;
-					assert(def == NULL ||
-					       def->space_id ==
-					       pTabItem->space->def->id);
-					if (x >= 0) {
-						pOp->p2 = x;
-						pOp->p1 = pLevel->iIdxCur;
-					}
-					assert((pLoop->
-						wsFlags & WHERE_IDX_ONLY) == 0
-					       || x >= 0);
+				if (pOp->opcode != OP_Column)
+					continue;
+				assert(def == NULL || def->space_id ==
+						      pTabItem->space->def->id);
+				int x = pOp->p2;
+				assert(x >= 0);
+				pOp->p1 = pLevel->iIdxCur;
+				if ((pLoop->wsFlags & WHERE_AUTO_INDEX) == 0) {
+					pOp->p2 = x;
+					continue;
+				}
+				/*
+				 * In case we are using autoindex, the space
+				 * that will be used to get the values will be
+				 * autoindex. Since the opcode OP_Column uses
+				 * the position of the fields according to the
+				 * original space, and the fields may be in
+				 * other positions in the autoindex, we must
+				 * correct the P2 of OP_Column. To get the
+				 * positions of these fields in autoindex, we
+				 * use the index definition we created.
+				 */
+				struct key_def *key_def =
+					pLevel->pWLoop->index_def->key_def;
+				uint32_t part_count = key_def->part_count;
+				for (uint32_t i = 0; i < part_count; ++i) {
+					if ((int)key_def->parts[i].fieldno == x)
+						pOp->p2 = i;
 				}
 			}
 		}
diff --git a/src/box/sql/wherecode.c b/src/box/sql/wherecode.c
index 6d8768865..0d58b4701 100644
--- a/src/box/sql/wherecode.c
+++ b/src/box/sql/wherecode.c
@@ -219,12 +219,12 @@ sqlWhereExplainOneScan(Parse * pParse,	/* Parse context */
 
 			assert(!(flags & WHERE_AUTO_INDEX)
 			       || (flags & WHERE_IDX_ONLY));
-			if (idx_def->iid == 0) {
+			if ((flags & WHERE_AUTO_INDEX) != 0) {
+				zFmt = "AUTOMATIC COVERING INDEX";
+			} else if (idx_def->iid == 0) {
 				if (isSearch) {
 					zFmt = "PRIMARY KEY";
 				}
-			} else if (flags & WHERE_AUTO_INDEX) {
-				zFmt = "AUTOMATIC COVERING INDEX";
 			} else if (flags & WHERE_IDX_ONLY) {
 				zFmt = "COVERING INDEX %s";
 			} else {
@@ -807,7 +807,8 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,	/* Complete information about the W
 	    notReady & ~sqlWhereGetMask(&pWInfo->sMaskSet, iCur);
 	bRev = (pWInfo->revMask >> iLevel) & 1;
 	omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY) != 0
-	    && (pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0;
+	    && ((pWInfo->wctrlFlags & WHERE_OR_SUBCLAUSE) == 0 ||
+		(pLoop->wsFlags & WHERE_AUTO_INDEX) != 0);
 	VdbeModuleComment((v, "Begin WHERE-loop%d: %s", iLevel,
 			   pTabItem->pTab->zName));
 
@@ -1047,8 +1048,6 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,	/* Complete information about the W
 			startEq = 0;
 			start_constraints = 1;
 		}
-		struct index_def *idx_pk = space->index[0]->def;
-		uint32_t pk_part_count = idx_pk->key_def->part_count;
 		/*
 		 * Tarantool's iterator over integer fields doesn't
 		 * tolerate floating point values. Hence, if term
@@ -1234,6 +1233,8 @@ sqlWhereCodeOneLoopStart(WhereInfo * pWInfo,	/* Complete information about the W
 		if (omitTable) {
 			/* pIdx is a covering index.  No need to access the main table. */
 		}  else if (iCur != iIdxCur) {
+			struct index_def *idx_pk = space->index[0]->def;
+			uint32_t pk_part_count = idx_pk->key_def->part_count;
 			int iKeyReg = sqlGetTempRange(pParse, pk_part_count);
 			for (j = 0; j < (int) pk_part_count; j++) {
 				k = idx_pk->key_def->parts[j].fieldno;
diff --git a/test/sql-tap/autoindex1.test.lua b/test/sql-tap/autoindex1.test.lua
new file mode 100755
index 000000000..42b995dd3
--- /dev/null
+++ b/test/sql-tap/autoindex1.test.lua
@@ -0,0 +1,115 @@
+#!/usr/bin/env tarantool
+test = require("sqltester")
+test:plan(7)
+
+--  2010 April 07
+
+--  The author disclaims copyright to this source code.  In place of
+--  a legal notice, here is a blessing:
+
+--     May you do good and not evil.
+--     May you find forgiveness for yourself and forgive others.
+--     May you share freely, never taking more than you give.
+
+---------------------------------------------------------------------------
+--  This file implements regression tests for sql library.  The focus of this
+-- script is testing automatic index creation logic.
+
+test:execsql([[
+    CREATE TABLE t1(a INT, b INT PRIMARY KEY);
+    INSERT INTO t1 VALUES(1, 11);
+    INSERT INTO t1 VALUES(2, 22);
+    INSERT INTO t1 SELECT a + 2, b + 22 FROM t1;
+    INSERT INTO t1 SELECT a + 4, b + 44 FROM t1;
+    CREATE TABLE t2(c INT, d INT PRIMARY KEY);
+]])
+
+for i = 1, 11000 do test:execsql("INSERT INTO t2 VALUES ("..i..", "..i..");") end
+
+test:do_execsql_test(
+    "autoindex-1.1", [[
+        EXPLAIN QUERY PLAN SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;
+    ]], {
+        0,0,0,"SCAN TABLE T1 (~1048576 rows)",
+        0,0,0,"EXECUTE CORRELATED SCALAR SUBQUERY 1",
+        1,0,0,"SEARCH TABLE T2 USING AUTOMATIC COVERING INDEX (C=?) (~20 rows)"
+    })
+
+result = test:execsql([[SELECT b, (SELECT d FROM t2 WHERE c = a) FROM t1;]])
+
+test:do_execsql_test(
+    "autoindex-1.2", [[
+        EXPLAIN QUERY PLAN SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b;
+    ]], {
+        0,0,0,"SCAN TABLE T1 (~1048576 rows)",
+        0,1,1,"SEARCH TABLE T2 USING AUTOMATIC COVERING INDEX (C=?) (~20 rows)"
+    })
+
+test:do_execsql_test(
+    "autoindex-1.3", [[
+        SELECT b, d FROM t1 JOIN t2 ON a = c ORDER BY b;
+    ]], result)
+
+test:do_execsql_test(
+    "autoindex-1.4", [[
+        EXPLAIN QUERY PLAN SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a);
+    ]], {
+        0,0,0,"SCAN TABLE T1 (~1048576 rows)",
+        0,1,1,"SEARCH TABLE T2 USING AUTOMATIC COVERING INDEX (C=?) (~20 rows)"
+    })
+
+test:do_execsql_test(
+    "autoindex-1.5", [[
+        SELECT b, d FROM t1 CROSS JOIN t2 ON (c = a);
+    ]], result)
+
+test:execsql([[
+    CREATE TABLE t3(i INT PRIMARY KEY, a INT, b INT);
+]])
+
+for i = 1, 11000 do test:execsql("INSERT INTO t3 VALUES ("..i..", "..i..", "..(i + 1)..");") end
+
+test:do_execsql_test(
+    "autoindex-1.6", [[
+        SELECT count(*)
+          FROM t3 AS x1
+          JOIN t3 AS x2 ON x2.a=x1.b
+          JOIN t3 AS x3 ON x3.a=x2.b
+          JOIN t3 AS x4 ON x4.a=x3.b
+          JOIN t3 AS x5 ON x5.a=x4.b
+          JOIN t3 AS x6 ON x6.a=x5.b
+          JOIN t3 AS x7 ON x7.a=x6.b
+          JOIN t3 AS x8 ON x8.a=x7.b
+          JOIN t3 AS x9 ON x9.a=x8.b
+          JOIN t3 AS x10 ON x10.a=x9.b;
+    ]], {
+        10991
+    })
+
+test:do_execsql_test(
+    "autoindex-1.7", [[
+        EXPLAIN QUERY PLAN SELECT count(*)
+          FROM t3 AS x1
+          JOIN t3 AS x2 ON x2.a=x1.b
+          JOIN t3 AS x3 ON x3.a=x2.b
+          JOIN t3 AS x4 ON x4.a=x3.b
+          JOIN t3 AS x5 ON x5.a=x4.b
+          JOIN t3 AS x6 ON x6.a=x5.b
+          JOIN t3 AS x7 ON x7.a=x6.b
+          JOIN t3 AS x8 ON x8.a=x7.b
+          JOIN t3 AS x9 ON x9.a=x8.b
+          JOIN t3 AS x10 ON x10.a=x9.b;
+    ]], {
+        0,0,0,"SCAN TABLE T3 AS X1 (~1048576 rows)",
+        0,1,1,"SEARCH TABLE T3 AS X2 USING AUTOMATIC COVERING INDEX (A=?) (~20 rows)",
+        0,2,2,"SEARCH TABLE T3 AS X3 USING AUTOMATIC COVERING INDEX (A=?) (~20 rows)",
+        0,3,3,"SEARCH TABLE T3 AS X4 USING AUTOMATIC COVERING INDEX (A=?) (~20 rows)",
+        0,4,4,"SEARCH TABLE T3 AS X5 USING AUTOMATIC COVERING INDEX (A=?) (~20 rows)",
+        0,5,5,"SEARCH TABLE T3 AS X6 USING AUTOMATIC COVERING INDEX (A=?) (~20 rows)",
+        0,6,6,"SEARCH TABLE T3 AS X7 USING AUTOMATIC COVERING INDEX (A=?) (~20 rows)",
+        0,7,7,"SEARCH TABLE T3 AS X8 USING AUTOMATIC COVERING INDEX (A=?) (~20 rows)",
+        0,8,8,"SEARCH TABLE T3 AS X9 USING AUTOMATIC COVERING INDEX (A=?) (~20 rows)",
+        0,9,9,"SEARCH TABLE T3 AS X10 USING AUTOMATIC COVERING INDEX (A=?) (~20 rows)"
+    })
+
+test:finish_test()
diff --git a/test/sql-tap/autoindex4.test.lua b/test/sql-tap/autoindex4.test.lua
index 46488f13a..e22f49c67 100755
--- a/test/sql-tap/autoindex4.test.lua
+++ b/test/sql-tap/autoindex4.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(7)
+test:plan(8)
 
 --!./tcltestrunner.lua
 -- 2014-10-24
@@ -75,6 +75,18 @@ test:do_execsql_test(
         -- </autoindex4-1.4>
     })
 
+test:do_execsql_test(
+    "autoindex4-2.0",
+    [[
+        CREATE TABLE t3(i INT PRIMARY KEY, e INT, f INT);
+        INSERT INTO t3 VALUES(1, 123,654), (2, 555,444), (3, 234,987);
+        SELECT (SELECT count(*) FROM t1, t2 WHERE a=e AND x=f), e, f, '|' FROM t3 ORDER BY i;
+    ]], {
+        -- <autoindex4-2.0>
+        1, 123, 654, "|", 0, 555, 444, "|", 4, 234, 987, "|"
+        -- </autoindex4-2.0>
+    })
+
 -- do_execsql_test autoindex4-2.0 {
 --   CREATE TABLE t3(e INT,f INT);
 --   INSERT INTO t3 VALUES(123,654),(555,444),(234,987);
diff --git a/test/sql-tap/whereF.test.lua b/test/sql-tap/whereF.test.lua
index 5a894b748..3235df437 100755
--- a/test/sql-tap/whereF.test.lua
+++ b/test/sql-tap/whereF.test.lua
@@ -90,10 +90,20 @@ test:do_execsql_test(
 
 -- for _ in X(0, "X!foreach", [=[["tn sql","\n  1 \"SELECT * FROM t1,           t2 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n  2 \"SELECT * FROM t2,           t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n  3 \"SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e\"\n"]]=]) do
 for tn, sql in ipairs({"SELECT * FROM t1,           t2 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e",
-                       "SELECT * FROM t2,           t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e",
-                       "SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do
+                       "SELECT * FROM t2,           t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do
     test:do_test(
-        "2."..tn,
+        "2.1."..tn,
+        function()
+            return test:execsql("EXPLAIN QUERY PLAN "..sql)
+        end, {
+            '/SEARCH TABLE T1/',
+            '/SEARCH TABLE T2/'
+        })
+end
+
+for tn, sql in ipairs({"SELECT * FROM t2 CROSS JOIN t1 WHERE t1.a>? AND t2.d>t1.c AND t1.b=t2.e"}) do
+    test:do_test(
+        "2.2."..tn,
         function()
             return test:execsql("EXPLAIN QUERY PLAN "..sql)
         end, {
diff --git a/test/sql/misc.result b/test/sql/misc.result
index 6af11bfba..b52a83bda 100644
--- a/test/sql/misc.result
+++ b/test/sql/misc.result
@@ -204,3 +204,38 @@ box.execute('SELECT field66, field68, field70 FROM test')
 box.space.TEST:drop()
 ---
 ...
+-- gh-4933: Make sure that autoindex optimization is used.
+box.execute('CREATE TABLE t1(i INT PRIMARY KEY, a INT);')
+---
+- row_count: 1
+...
+box.execute('CREATE TABLE t2(i INT PRIMARY KEY, b INT);')
+---
+- row_count: 1
+...
+for i = 1, 11000 do\
+	box.execute('INSERT INTO t1 VALUES ($1, $1);', {i})\
+	box.execute('INSERT INTO t2 VALUES ($1, $1);', {i})\
+end
+---
+...
+--
+-- There is no need to insert values in the tables since planner assumes a
+-- default number of tuples for each space, regardless of how many tuples there
+-- actually are in those spaces. The default value is 1048576 (== 2^20).
+--
+box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;')
+---
+- metadata:
+  - name: selectid
+    type: integer
+  - name: order
+    type: integer
+  - name: from
+    type: integer
+  - name: detail
+    type: text
+  rows:
+  - [0, 0, 0, 'SCAN TABLE T1 (~1048576 rows)']
+  - [0, 1, 1, 'SEARCH TABLE T2 USING AUTOMATIC COVERING INDEX (B=?) (~20 rows)']
+...
diff --git a/test/sql/misc.test.lua b/test/sql/misc.test.lua
index 63244228c..d7673bb49 100644
--- a/test/sql/misc.test.lua
+++ b/test/sql/misc.test.lua
@@ -64,3 +64,17 @@ box.execute('SELECT field70, field64 FROM test')
 pk:alter({parts = {66}})
 box.execute('SELECT field66, field68, field70 FROM test')
 box.space.TEST:drop()
+
+-- gh-4933: Make sure that autoindex optimization is used.
+box.execute('CREATE TABLE t1(i INT PRIMARY KEY, a INT);')
+box.execute('CREATE TABLE t2(i INT PRIMARY KEY, b INT);')
+for i = 1, 11000 do\
+	box.execute('INSERT INTO t1 VALUES ($1, $1);', {i})\
+	box.execute('INSERT INTO t2 VALUES ($1, $1);', {i})\
+end
+--
+-- There is no need to insert values in the tables since planner assumes a
+-- default number of tuples for each space, regardless of how many tuples there
+-- actually are in those spaces. The default value is 1048576 (== 2^20).
+--
+box.execute('EXPLAIN QUERY PLAN SELECT a, b FROM t1, t2 WHERE a = b;')
-- 
2.25.1

next             reply	other threads:[~2020-10-11 10:48 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-11 10:48 imeevma [this message]
2020-10-13 14:08 ` Nikita Pettik
2020-10-14  9:30   ` Mergen Imeev
2020-10-15  1:40     ` Nikita Pettik
2020-10-16  0:22       ` Mergen Imeev
2020-10-20 14:41         ` Nikita Pettik
2020-10-20 15:09           ` Alexander V. Tikhonov
2020-10-20 15:40             ` Nikita Pettik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5c615731b1bd3e6d279dfb0cbc92823dea993a03.1602413265.git.imeevma@gmail.com \
    --to=imeevma@tarantool.org \
    --cc=imeevma@gmail.com \
    --cc=korablev@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --cc=tsafin@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v3 1/1] sql: enable autoindex optimization' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox