[tarantool-patches] [PATCH v1 1/3] sql: disallow identical samples in statistics

imeevma at tarantool.org imeevma at tarantool.org
Thu Mar 21 22:30:05 MSK 2019


Before this patch it was possible that there were less rows of
statistic in _sql_stat4 table than it should be according to
number of samples created during analyze. It was this way because
some of rows in statistics were identical and were replaced
during inserting statistics in table _sql_stat4.

This patch disallows creation of identical rows of statistics
during analyze. After this patch number of statistics in
_sql_stat4 will be at least no less than it was before.

Needed for #2843
---
 src/box/sql/analyze.c          | 18 +++++++++++++++++-
 test/sql-tap/analyze1.test.lua | 19 ++++++++++++++++++-
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c
index 6ea598c..ffb7335 100644
--- a/src/box/sql/analyze.c
+++ b/src/box/sql/analyze.c
@@ -180,6 +180,13 @@ struct Stat4Accum {
 	int iGet;		/* Index of current sample accessed by stat_get() */
 	Stat4Sample *a;		/* Array of mxSample Stat4Sample objects */
 	sql *db;		/* Database connection, for malloc() */
+	/*
+	 * Count of rows with index value identical current
+	 * index value.
+	 */
+	uint64_t identical_index_value;
+	/* Row number of previous periodic sample. */
+	uint64_t previous_psample;
 };
 
 /* Reclaim memory used by a Stat4Sample
@@ -307,6 +314,8 @@ statInit(sql_context * context, int argc, sql_value ** argv)
 	p->nKeyCol = nKeyCol;
 	p->current.anDLt = (tRowcnt *) & p[1];
 	p->current.anEq = &p->current.anDLt[nColUp];
+	p->identical_index_value = 0;
+	p->previous_psample = 0;
 
 	{
 		u8 *pSpace;	/* Allocated space not yet assigned */
@@ -477,7 +486,9 @@ sampleInsert(Stat4Accum * p, Stat4Sample * pNew, int nEqZero)
 	/* Insert the new sample */
 	pSample = &p->a[p->nSample];
 	sampleCopy(p, pSample, pNew);
-	p->nSample++;
+	if (pNew->isPSample == 0 || p->previous_psample == 0 ||
+	    p->nRow - p->previous_psample > p->identical_index_value)
+		p->nSample++;
 
 	/* Zero the first nEqZero entries in the anEq[] array. */
 	memset(pSample->anEq, 0, sizeof(tRowcnt) * nEqZero);
@@ -559,6 +570,10 @@ statPush(sql_context * context, int argc, sql_value ** argv)
 	assert(p->nCol > 0);
 	/* iChng == p->nCol means that the current and previous rows are identical */
 	assert(iChng <= p->nCol);
+	if (iChng == p->nCol)
+		++p->identical_index_value;
+	else
+		p->identical_index_value = 0;
 	if (p->nRow == 0) {
 		/* This is the first call to this function. Do initialization. */
 		for (i = 0; i < p->nCol + 1; i++)
@@ -592,6 +607,7 @@ statPush(sql_context * context, int argc, sql_value ** argv)
 			p->current.iCol = 0;
 			sampleInsert(p, &p->current, p->nCol);
 			p->current.isPSample = 0;
+			p->previous_psample = p->nRow;
 		}
 		/* Update the aBest[] array. */
 		for (i = 0; i < p->nCol; i++) {
diff --git a/test/sql-tap/analyze1.test.lua b/test/sql-tap/analyze1.test.lua
index cc12593..959ea5e 100755
--- a/test/sql-tap/analyze1.test.lua
+++ b/test/sql-tap/analyze1.test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env tarantool
 test = require("sqltester")
-test:plan(38)
+test:plan(39)
 
 --!./tcltestrunner.lua
 -- 2005 July 22
@@ -546,6 +546,23 @@ test:do_execsql_test(
     -- </analyze-6.1.4>
 })
 
+-- This test show that index with 1000 identical index values and
+-- 25 distinct ones gives max number of samples.
+test:do_test(
+    "analyze-7.1",
+    function()
+        test:execsql("CREATE TABLE t7(i INTEGER PRIMARY KEY, a INTEGER);")
+        test:execsql("CREATE INDEX i7 ON t7(a);")
+        for i = 0, 999 do test:execsql("INSERT INTO t7 VALUES("..i..", 0) ") end
+        for i = 1, 24 do test:execsql("INSERT INTO t7 VALUES(".. i + 999 .. ", ".. i ..") ") end
+        test:execsql("ANALYZE;")
+        return test:execsql([[SELECT count(*) FROM "_sql_stat4" WHERE "idx" = 'I7';]])
+    end, {
+    -- <analyze-6.1.4>
+    24
+    -- </analyze-6.1.4>
+})
+
 -- # This test corrupts the database file so it must be the last test
 -- # in the series.
 -- #
-- 
2.7.4





More information about the Tarantool-patches mailing list