Tarantool development patches archive
 help / color / mirror / Atom feed
From: n.pettik <korablev@tarantool.org>
To: tarantool-patches@freelists.org
Cc: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Subject: [tarantool-patches] Re: [PATCH 2/4] sql: add average tuple size calculation
Date: Fri, 11 May 2018 20:29:31 +0300	[thread overview]
Message-ID: <16ADACB2-9CCC-4282-9533-AB1FA6A5E770@tarantool.org> (raw)
In-Reply-To: <46e6a842-2a0b-90fc-7da0-2268eb6d05e3@tarantool.org>

[-- Attachment #1: Type: text/plain, Size: 7192 bytes --]


>>  +ssize_t
>> +sql_index_tuple_size(struct space *space, struct index *idx)
>> +{
>> +	assert(space != NULL);
>> +	assert(idx != NULL);
>> +	assert(idx->def->space_id == space->def->id);
>> +	ssize_t tuple_count = idx->vtab->size(idx);
>> +	ssize_t space_size = space->vtab->bsize(space);
> 
> 1. Lets use wrappers: index_size() and space_bsize() - they are defined
> already.

Done:

+++ b/src/box/sql/analyze.c
@@ -1197,9 +1197,10 @@ sql_index_tuple_size(struct space *space, struct index *idx)
        assert(space != NULL);
        assert(idx != NULL);
        assert(idx->def->space_id == space->def->id);
-       ssize_t tuple_count = idx->vtab->size(idx);
-       ssize_t space_size = space->vtab->bsize(space);
-       ssize_t avg_tuple_size = DIV_OR_ZERO(space_size, tuple_count);
+       ssize_t tuple_count = index_size(idx);
+       ssize_t space_size = space_bsize(space);
+       ssize_t avg_tuple_size = tuple_count != 0 ?
+                                (space_size / tuple_count) : 0;

>> diff --git a/src/box/sql/select.c b/src/box/sql/select.c
>> index 0df8a71d4..391b7e0a2 100644
>> --- a/src/box/sql/select.c
>> +++ b/src/box/sql/select.c
>> @@ -1588,20 +1588,19 @@ generateSortTail(Parse * pParse,	/* Parsing context */
>>   * the SQLITE_ENABLE_COLUMN_METADATA compile-time option is used.
>>   */
>>  #ifdef SQLITE_ENABLE_COLUMN_METADATA
>> -#define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,D,E,F)
>> +#define columnType(A,B,C,D,E) columnTypeImpl(A,B,D,E)
>>  #else				/* if !defined(SQLITE_ENABLE_COLUMN_METADATA) */
>> -#define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,F)
>> +#define columnType(A,B,C,D,E) columnTypeImpl(A,B)
>>  #endif
>>  static enum field_type
>> -columnTypeImpl(NameContext * pNC, Expr * pExpr,
>> +columnTypeImpl(NameContext * pNC, Expr * pExpr
>>  #ifdef SQLITE_ENABLE_COLUMN_METADATA
>> -	       const char **pzOrigTab, const char **pzOrigCol,
>> +	       , const char **pzOrigTab, const char **pzOrigCol,
> 
> 2. As I can see, the third argument is always NULL. Lets remove it
> too.

Done:

@@ -1655,8 +1655,8 @@ columnTypeImpl(NameContext * pNC, Expr * pExpr
                                        sNC.pNext = pNC;
                                        sNC.pParse = pNC->pParse;
                                        column_type =
-                                           columnType(&sNC, p, 0,
-                                                      &zOrigTab, &zOrigCol);
+                                           columnType(&sNC, p, &zOrigTab,
+                                                      &zOrigCol);

@@ -1685,7 +1685,7 @@ columnTypeImpl(NameContext * pNC, Expr * pExpr
                        sNC.pNext = pNC;
                        sNC.pParse = pNC->pParse;
                        column_type =
-                           columnType(&sNC, p, 0, &zOrigTab, &zOrigCol);
+                           columnType(&sNC, p, &zOrigTab, &zOrigCol);

@@ -1921,7 +1921,7 @@ sqlite3SelectAddColumnTypeAndCollation(Parse * pParse,            /* Parsing contexts */
        for (i = 0, pCol = pTab->aCol; i < pTab->nCol; i++, pCol++) {
                enum field_type type;
                p = a[i].pExpr;
-               type = columnType(&sNC, p, 0, 0, 0);
+               type = columnType(&sNC, p, 0, 0);

>> diff --git a/src/box/sql/sqliteInt.h b/src/box/sql/sqliteInt.h
>> index 59662cf14..8ca8e808f 100644
>> --- a/src/box/sql/sqliteInt.h
>> +++ b/src/box/sql/sqliteInt.h
>> @@ -1396,6 +1396,11 @@ struct BusyHandler {
>>   */
>>  #define IsPowerOfTwo(X) (((X)&((X)-1))==0)
>>  +#ifdef ZERO_OR_DIV
>> +#undef ZERO_OR_DIV
>> +#endif
>> +#define DIV_OR_ZERO(NUM, DENOM) (((DENOM) != 0) ? ((NUM) / (DENOM)) : 0)
> 
> 3.
> 
> Divide by 0: *exists*
> Programmers:https://pm1.narvii.com/6585/b5b717574d0d6250181c18aadd89fbe0b3c7bf3a_hq.jpg <https://pm1.narvii.com/6585/b5b717574d0d6250181c18aadd89fbe0b3c7bf3a_hq.jpg>
> 
> Lets just inline it. And what is ZERO_OR_DIV?

It was just typo in naming. Anyway, I have removed this macro:

+++ b/src/box/sql/sqliteInt.h
@@ -1389,11 +1389,6 @@ struct BusyHandler {
  */
 #define IsPowerOfTwo(X) (((X)&((X)-1))==0)
 
-#ifdef ZERO_OR_DIV
-#undef ZERO_OR_DIV
-#endif
-#define DIV_OR_ZERO(NUM, DENOM) (((DENOM) != 0) ? ((NUM) / (DENOM)) : 0)
-

>> diff --git a/src/box/sql/where.c b/src/box/sql/where.c
>> index 2a2630281..51b53c2df 100644
>> --- a/src/box/sql/where.c
>> +++ b/src/box/sql/where.c
>> @@ -2545,15 +2537,31 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder,	/* The WhereLoop factory */
>>  		 * seek only. Then, if this is a non-covering index, add the cost of
>>  		 * visiting the rows in the main table.
>>  		 */
>> -		rCostIdx =
>> -		    pNew->nOut + 1 +
>> -		    (15 * pProbe->szIdxRow) / pSrc->pTab->szTabRow;
>> +		struct space *space =
>> +			space_by_id(SQLITE_PAGENO_TO_SPACEID(pProbe->tnum));
>> +		assert(space != NULL);
>> +		struct index *idx =
>> +			space_index(space,
>> +				    SQLITE_PAGENO_TO_INDEXID(pProbe->tnum));
>> +		assert(idx != NULL);
>> +		/*
>> +		 * FIXME: currently, the procedure below makes no
>> +		 * sense, since there are no partial indexes, so
>> +		 * all indexes in the space feature the same
>> +		 * average tuple size.
> 
> 4. Do not forget about Vinyl. In it even with no partial indexes different
> ones can contain different tuple count, tuples of different size (due to
> specific of secondary indexes disk data structure). Now it does not support SQL,
> but will do.

Ok, updated comment (and inlined macro):

--- a/src/box/sql/where.c
+++ b/src/box/sql/where.c
@@ -2544,14 +2544,17 @@ whereLoopAddBtreeIndex(WhereLoopBuilder * pBuilder,     /* The WhereLoop factory */
                 * FIXME: currently, the procedure below makes no
                 * sense, since there are no partial indexes, so
                 * all indexes in the space feature the same
-                * average tuple size.
+                * average tuple size. Moreover, secondary
+                * indexes in Vinyl engine may contain different
+                * tuple count of different sizes.
                 */
                ssize_t avg_tuple_size = sql_index_tuple_size(space, idx);
                struct index *pk = space_index(space, 0);
                assert(pProbe->pTable == pSrc->pTab);
                ssize_t avg_tuple_size_pk = sql_index_tuple_size(space, pk);
-               uint32_t partial_index_cost = DIV_OR_ZERO((15 * avg_tuple_size),
-                                                         avg_tuple_size_pk);
+               uint32_t partial_index_cost =
+                       avg_tuple_size_pk != 0 ?
+                       (15 * avg_tuple_size) / avg_tuple_size_pk : 0;

>> diff --git a/test/sql-tap/analyze9.test.lua b/test/sql-tap/analyze9.test.lua
>> index 4ce575e90..3b3d52f67 100755
>> --- a/test/sql-tap/analyze9.test.lua
>> +++ b/test/sql-tap/analyze9.test.lua
>> +-- These tests are commented until query planer will be stable.
> 
> 5. What do you mean saying 'unstable'? The test is flaky or incorrect?

Both.


[-- Attachment #2: Type: text/html, Size: 27886 bytes --]

  reply	other threads:[~2018-05-11 17:29 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-23 20:29 [tarantool-patches] [PATCH 0/4] Move original SQLite's statistics to server Nikita Pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 1/4] sql: optimize compilation of SELECT COUNT(*) Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 2/4] sql: add average tuple size calculation Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik [this message]
2018-04-23 20:29 ` [tarantool-patches] [PATCH 3/4] sql: refactor usages of table's tuple count Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 4/4] sql: move SQL statistics to server Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-05-11 22:00       ` Vladislav Shpilevoy
2018-05-14 11:52         ` n.pettik
2018-05-14 12:54           ` Vladislav Shpilevoy
2018-05-14 13:55             ` n.pettik
2018-05-14 14:12 ` [tarantool-patches] Re: [PATCH 0/4] Move original SQLite's " Vladislav Shpilevoy
2018-05-15 13:42   ` Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=16ADACB2-9CCC-4282-9533-AB1FA6A5E770@tarantool.org \
    --to=korablev@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='[tarantool-patches] Re: [PATCH 2/4] sql: add average tuple size calculation' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox