[tarantool-patches] Re: [PATCH 2/4] sql: add average tuple size calculation

n.pettik korablev at tarantool.org
Fri May 11 20:29:31 MSK 2018


>>  +ssize_t
>> +sql_index_tuple_size(struct space *space, struct index *idx)
>> +{
>> +	assert(space != NULL);
>> +	assert(idx != NULL);
>> +	assert(idx->def->space_id == space->def->id);
>> +	ssize_t tuple_count = idx->vtab->size(idx);
>> +	ssize_t space_size = space->vtab->bsize(space);
> 
> 1. Lets use wrappers: index_size() and space_bsize() - they are defined
> already.

Done:

+++ b/src/box/sql/analyze.c
@@ -1197,9 +1197,10 @@ sql_index_tuple_size(struct space *space, struct index *idx)
        assert(space != NULL);
        assert(idx != NULL);
        assert(idx->def->space_id == space->def->id);
-       ssize_t tuple_count = idx->vtab->size(idx);
-       ssize_t space_size = space->vtab->bsize(space);
-       ssize_t avg_tuple_size = DIV_OR_ZERO(space_size, tuple_count);
+       ssize_t tuple_count = index_size(idx);
+       ssize_t space_size = space_bsize(space);
+       ssize_t avg_tuple_size = tuple_count != 0 ?
+                                (space_size / tuple_count) : 0;

>> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
>> index 0df8a71d4..391b7e0a2 100644
>> --- a/src/box/sql/select.c
>> +++ b/src/box/sql/select.c
>> @@ -1588,20 +1588,19 @@ generateSortTail(Parse * pParse,	/* Parsing context */
>>   * the SQLITE_ENABLE_COLUMN_METADATA compile-time option is used.
>>   */
>>  #ifdef SQLITE_ENABLE_COLUMN_METADATA
>> -#define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,D,E,F)
>> +#define columnType(A,B,C,D,E) columnTypeImpl(A,B,D,E)
>>  #else				/* if !defined(SQLITE_ENABLE_COLUMN_METADATA) */
>> -#define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,F)
>> +#define columnType(A,B,C,D,E) columnTypeImpl(A,B)
>>  #endif
>>  static enum field_type
>> -columnTypeImpl(NameContext * pNC, Expr * pExpr,
>> +columnTypeImpl(NameContext * pNC, Expr * pExpr
>>  #ifdef SQLITE_ENABLE_COLUMN_METADATA
>> -	       const char **pzOrigTab, const char **pzOrigCol,
>> +	       , const char **pzOrigTab, const char **pzOrigCol,
> 
> 2. As I can see, the third argument is always NULL. Lets remove it
> too.

Done:

@@ -1655,8 +1655,8 @@ columnTypeImpl(NameContext * pNC, Expr * pExpr
                                        sNC.pNext = pNC;
                                        sNC.pParse = pNC->pParse;
                                        column_type =
-                                           columnType(&sNC, p, 0,
-                                                      &zOrigTab, &zOrigCol);
+                                           columnType(&sNC, p, &zOrigTab,
+                                                      &zOrigCol);

@@ -1685,7 +1685,7 @@ columnTypeImpl(NameContext * pNC, Expr * pExpr
                        sNC.pNext = pNC;
                        sNC.pParse = pNC->pParse;
                        column_type =
-                           columnType(&sNC, p, 0, &zOrigTab, &zOrigCol);
+                           columnType(&sNC, p, &zOrigTab, &zOrigCol);

@@ -1921,7 +1921,7 @@ sqlite3SelectAddColumnTypeAndCollation(Parse * pParse,            /* Parsing contexts */
        for (i = 0, pCol = pTab->aCol; i < pTab->nCol; i++, pCol++) {
                enum field_type type;
                p = a[i].pExpr;
-               type = columnType(&sNC, p, 0, 0, 0);
+               type = columnType(&sNC, p, 0, 0);

>> diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h
>> index 59662cf14..8ca8e808f 100644
>> --- a/src/box/sql/sqliteInt.h
>> +++ b/src/box/sql/sqliteInt.h
>> @@ -1396,6 +1396,11 @@ struct BusyHandler {
>>   */
>>  #define IsPowerOfTwo(X) (((X)&((X)-1))==0)
>>  +#ifdef ZERO_OR_DIV
>> +#undef ZERO_OR_DIV
>> +#endif
>> +#define DIV_OR_ZERO(NUM, DENOM) (((DENOM) != 0) ? ((NUM) / (DENOM)) : 0)
> 
> 3.
> 
> Divide by 0: *exists*
> Programmers:https://pm1.narvii.com/6585/b5b717574d0d6250181c18aadd89fbe0b3c7bf3a_hq.jpg <https://pm1.narvii.com/6585/b5b717574d0d6250181c18aadd89fbe0b3c7bf3a_hq.jpg>
> 
> Lets just inline it. And what is ZERO_OR_DIV?

It was just typo in naming. Anyway, I have removed this macro:

+++ b/src/box/sql/sqliteInt.h
@@ -1389,11 +1389,6 @@ struct BusyHandler {
  */
 #define IsPowerOfTwo(X) (((X)&((X)-1))==0)
 
-#ifdef ZERO_OR_DIV
-#undef ZERO_OR_DIV
-#endif
-#define DIV_OR_ZERO(NUM, DENOM) (((DENOM) != 0) ? ((NUM) / (DENOM)) : 0)
-

>> diff --git a/src/box/sql/where.c b/src/box/sql/where.c
>> index 2a2630281..51b53c2df 100644
>> --- a/src/box/sql/where.c
>> +++ b/src/box/sql/where.c
>> @@ -2545,15 +2537,31 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder,	/* The WhereLoop factory */
>>  		 * seek only. Then, if this is a non-covering index, add the cost of
>>  		 * visiting the rows in the main table.
>>  		 */
>> -		rCostIdx =
>> -		    pNew->nOut + 1 +
>> -		    (15 * pProbe->szIdxRow) / pSrc->pTab->szTabRow;
>> +		struct space *space =
>> +			space_by_id(SQLITE_PAGENO_TO_SPACEID(pProbe->tnum));
>> +		assert(space != NULL);
>> +		struct index *idx =
>> +			space_index(space,
>> +				    SQLITE_PAGENO_TO_INDEXID(pProbe->tnum));
>> +		assert(idx != NULL);
>> +		/*
>> +		 * FIXME: currently, the procedure below makes no
>> +		 * sense, since there are no partial indexes, so
>> +		 * all indexes in the space feature the same
>> +		 * average tuple size.
> 
> 4. Do not forget about Vinyl. In it even with no partial indexes different
> ones can contain different tuple count, tuples of different size (due to
> specific of secondary indexes disk data structure). Now it does not support SQL,
> but will do.

Ok, updated comment (and inlined macro):

--- a/src/box/sql/where.c
+++ b/src/box/sql/where.c
@@ -2544,14 +2544,17 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder,     /* The WhereLoop factory */
                 * FIXME: currently, the procedure below makes no
                 * sense, since there are no partial indexes, so
                 * all indexes in the space feature the same
-                * average tuple size.
+                * average tuple size. Moreover, secondary
+                * indexes in Vinyl engine may contain different
+                * tuple count of different sizes.
                 */
                ssize_t avg_tuple_size = sql_index_tuple_size(space, idx);
                struct index *pk = space_index(space, 0);
                assert(pProbe->pTable == pSrc->pTab);
                ssize_t avg_tuple_size_pk = sql_index_tuple_size(space, pk);
-               uint32_t partial_index_cost = DIV_OR_ZERO((15 * avg_tuple_size),
-                                                         avg_tuple_size_pk);
+               uint32_t partial_index_cost =
+                       avg_tuple_size_pk != 0 ?
+                       (15 * avg_tuple_size) / avg_tuple_size_pk : 0;

>> diff --git a/test/sql-tap/analyze9.test.lua b/test/sql-tap/analyze9.test.lua
>> index 4ce575e90..3b3d52f67 100755
>> --- a/test/sql-tap/analyze9.test.lua
>> +++ b/test/sql-tap/analyze9.test.lua
>> +-- These tests are commented until query planer will be stable.
> 
> 5. What do you mean saying 'unstable'? The test is flaky or incorrect?

Both.

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.tarantool.org/pipermail/tarantool-patches/attachments/20180511/eedd685d/attachment.html>


More information about the Tarantool-patches mailing list