From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 5D7D7252C6 for ; Fri, 11 May 2018 18:00:18 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id uhaNVg1WCJKd for ; Fri, 11 May 2018 18:00:18 -0400 (EDT) Received: from smtp55.i.mail.ru (smtp55.i.mail.ru [217.69.128.35]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 8A51524F3E for ; Fri, 11 May 2018 18:00:17 -0400 (EDT) Subject: [tarantool-patches] Re: [PATCH 4/4] sql: move SQL statistics to server References: <291aa5961a366c81ffa7ea465636125617a78dad.1524515002.git.korablev@tarantool.org> <1dd6cfc3-25f1-8184-df64-2e1638ad55a0@tarantool.org> <8E69DCFF-0088-4FCD-A4CA-E3C98DEF7274@tarantool.org> From: Vladislav Shpilevoy Message-ID: <1f4a08f6-366d-8d0c-ec5c-9e8e33a766ee@tarantool.org> Date: Sat, 12 May 2018 01:00:14 +0300 MIME-Version: 1.0 In-Reply-To: <8E69DCFF-0088-4FCD-A4CA-E3C98DEF7274@tarantool.org> Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Language: en-US Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: "n.pettik" , tarantool-patches@freelists.org On 11/05/2018 20:29, n.pettik wrote: > I am attaching whole patch since it has changed significantly with region allocations. > I didn't answer on comments concerning memory allocation policy deliberately, but > took them into consideration when was reworking patch. I hope I didn’t miss smth. > Overall, thanks for ideas and suggestions! Thank you very much! The patch now looks almost perfect. See 6 comments below. > diff --git a/src/box/index_def.c b/src/box/index_def.c > index d0d852519..1702430b3 100644 > --- a/src/box/index_def.c > +++ b/src/box/index_def.c > +struct index_stat * > +index_stat_dup(const struct index_stat *src) > +{ > + size_t size = index_stat_sizeof(src->samples, src->sample_count, > + src->sample_field_count); > + struct index_stat *dup = (struct index_stat *) malloc(size); > + if (dup == NULL) { > + diag_set(OutOfMemory, size, "malloc", "index stat"); > + return NULL; > + } > + memcpy(dup, src, size); > + uint32_t array_size = src->sample_field_count * sizeof(uint32_t); > + uint32_t stat1_offset = sizeof(struct index_stat); > + dup->tuple_stat1 = (uint32_t *)((char*)dup + stat1_offset); > + dup->tuple_log_est = (log_est_t *)((char*)dup + stat1_offset + > + array_size + sizeof(uint32_t)); > + dup->avg_eq = (uint32_t *)((char*)dup + stat1_offset + > + 2 * (array_size + sizeof(uint32_t))); > + uint32_t samples_offset = stat1_offset + 3 * array_size + > + 2 * sizeof(uint32_t); > + dup->samples = (struct index_sample *)((char*)dup + samples_offset); > + uint32_t current_sample_offset = samples_offset + > + src->sample_count * > + sizeof(struct index_sample); > + for (uint32_t i = 0; i < src->sample_count; ++i) { > + dup->samples[i].eq = (uint32_t *)((char*)dup + > + current_sample_offset); > + dup->samples[i].lt = (uint32_t *)((char*)dup + > + current_sample_offset + > + array_size); > + dup->samples[i].dlt = (uint32_t *)((char*)dup + > + current_sample_offset + > + 2 * size); 1. Typo? Maybe array_size instead of size? I slightly refactored it. Take a look. Looks more compact, and tests are passed. I wish it was my task, huge malloc formatting is very interesting. diff --git a/src/box/index_def.c b/src/box/index_def.c index 1702430b3..d9035e3c5 100644 --- a/src/box/index_def.c +++ b/src/box/index_def.c @@ -205,31 +205,25 @@ index_stat_dup(const struct index_stat *src) memcpy(dup, src, size); uint32_t array_size = src->sample_field_count * sizeof(uint32_t); uint32_t stat1_offset = sizeof(struct index_stat); - dup->tuple_stat1 = (uint32_t *)((char*)dup + stat1_offset); - dup->tuple_log_est = (log_est_t *)((char*)dup + stat1_offset + - array_size + sizeof(uint32_t)); - dup->avg_eq = (uint32_t *)((char*)dup + stat1_offset + - 2 * (array_size + sizeof(uint32_t))); - uint32_t samples_offset = stat1_offset + 3 * array_size + - 2 * sizeof(uint32_t); - dup->samples = (struct index_sample *)((char*)dup + samples_offset); - uint32_t current_sample_offset = samples_offset + - src->sample_count * - sizeof(struct index_sample); + char *pos = (char *) dup + stat1_offset; + dup->tuple_stat1 = (uint32_t *) pos; + pos += array_size + sizeof(uint32_t); + dup->tuple_log_est = (log_est_t *) pos; + pos += array_size + sizeof(uint32_t); + dup->avg_eq = (uint32_t *) pos; + pos += array_size; + dup->samples = (struct index_sample *) pos; + pos += src->sample_count * sizeof(struct index_sample); + for (uint32_t i = 0; i < src->sample_count; ++i) { - dup->samples[i].eq = (uint32_t *)((char*)dup + - current_sample_offset); - dup->samples[i].lt = (uint32_t *)((char*)dup + - current_sample_offset + - array_size); - dup->samples[i].dlt = (uint32_t *)((char*)dup + - current_sample_offset + - 2 * size); - dup->samples[i].sample_key = - (char *)((char*)dup + current_sample_offset + 3 * size); - current_sample_offset += current_sample_offset + - 3 * array_size + - dup->samples[i].key_size + 2; + dup->samples[i].eq = (uint32_t *) pos; + pos += array_size; + dup->samples[i].lt = (uint32_t *) pos; + pos += array_size; + dup->samples[i].dlt = (uint32_t *) pos; + pos += array_size; + dup->samples[i].sample_key = pos; + pos += dup->samples[i].key_size + 2; } return dup; } > diff --git a/src/box/index_def.h b/src/box/index_def.h > index dc8293b23..107255cc7 100644 > --- a/src/box/index_def.h > +++ b/src/box/index_def.h > @@ -173,6 +242,50 @@ struct index_def { > struct index_def * > index_def_dup(const struct index_def *def); > > +/** > + * Calculate size of index's statistics. > + * Statistics is located in memory according to following layout: > + * > + * +-------------------------+ <- Allocated memory starts here > + * | struct index_stat | > + * |-------------------------| > + * | stat1 array | ^ > + * |-------------------------| | > + * | log_est array | | 3 * array_size + 2 * uint_32 > + * |-------------------------| | > + * | avg_eq array | v > + * |-------------------------| > + * | samples struct array | > + * |-------------------------| > + * | eq | lt | dlt | key | <- Content of one sample > + * +-------------------------+ > + * ... <- Up to 24 samples > + * | eq | lt | dlt | key | > + * +-------------------------+ Nice picture. > + * > + * array_size = field_count * sizeof(uint_32) > + * offset of one sample = 3 * array_size + key_size 2. +2, no? For zeros. (See the question about them below.) > + * > + * @param samples Array of samples needed for calculating > + * size of keys. > + * @param sample_count Count of samples in samples array. > + * @param field_count Count of fields in statistics arrays. > + * @retval Size needed for statistics allocation in bytes. > + */ > +size_t > +index_stat_sizeof(const struct index_sample *samples, uint32_t sample_count, > + uint32_t field_count); > + > +/** > + * Duplicate index_stat object. > + * To understand memory layout see index_stat_sizeof() function. > + * > + * @param src Stat to duplicate. > + * @retval Copy of the @src. 3. Or NULL. > diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c > index 57cac437c..4f21cd25f 100644 > --- a/src/box/sql/analyze.c > +++ b/src/box/sql/analyze.c > +decode_stat_string(const char *stat_string, int stat_size, tRowcnt *stat_exact, > + LogEst *stat_log) { > + char *z = (char *) stat_string; 4. Why do you need this cast? > + /* > + * Take a copy of the sample. Add two 0x00 bytes > + * the end of the buffer. This is in case the > + * sample record is corrupted. In that case, the > + * sqlite3VdbeRecordCompare() may read up to two > + * varints past the end of the allocated buffer > + * before it realizes it is dealing with a corrupt > + * record. Adding the two 0x00 bytes prevents this > + * from causing a buffer overread. 5. Are you sure that this 0x0000 is still needed? Tarantool does not stores any "corrupted" things. Maybe it is Btree artifacts? > + > +/** > + * This function performs copy of statistics. > + * In contrast to index_stat_dup(), there is no assumption > + * that source statistics is allocated within chunk. But > + * destination place is still one big chunk of heap memory. > + * See also index_stat_sizeof() for understanding memory layout. > * > - * If an OOM error occurs, this function always sets db->mallocFailed. > - * This means if the caller does not care about other errors, the return > - * code may be ignored. > + * @param dest One chunk of memory where statistics > + * will be placed. > + * @param src Statistics to be copied. > */ > -int > -sqlite3AnalysisLoad(sqlite3 * db) > +static void > +stat_copy(struct index_stat *dest, const struct index_stat *src) > { > - analysisInfo sInfo; > - HashElem *i, *j; > - char *zSql; > - int rc = SQLITE_OK; > - > - /* Clear any prior statistics */ > - for (j = sqliteHashFirst(&db->pSchema->tblHash); j; > - j = sqliteHashNext(j)) { > - Table *pTab = sqliteHashData(j); > - for (i = sqliteHashFirst(&pTab->idxHash); i; > - i = sqliteHashNext(i)) { > - Index *pIdx = sqliteHashData(i); > - pIdx->aiRowLogEst[0] = 0; > - sqlite3DeleteIndexSamples(db, pIdx); > - pIdx->aSample = 0; > - } > + assert(dest != NULL); 6. How about this diff? diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c index 4f21cd25f..280b9132d 100644 --- a/src/box/sql/analyze.c +++ b/src/box/sql/analyze.c @@ -1721,46 +1721,39 @@ stat_copy(struct index_stat *dest, const struct index_stat *src) dest->is_unordered = src->is_unordered; uint32_t array_size = src->sample_field_count * sizeof(uint32_t); uint32_t stat1_offset = sizeof(struct index_stat); - memcpy((char *)dest + stat1_offset, src->tuple_stat1, - array_size + sizeof(uint32_t)); - memcpy((char *)dest + stat1_offset + array_size + sizeof(uint32_t), - src->tuple_log_est, array_size + sizeof(uint32_t)); - memcpy((char *)dest + stat1_offset + 2 * (array_size + sizeof(uint32_t)), - src->avg_eq, array_size); - dest->tuple_stat1 = (uint32_t *)((char *)dest + stat1_offset); - dest->tuple_log_est = (log_est_t *)((char *)dest + stat1_offset + - array_size + sizeof(uint32_t)); - dest->avg_eq = (uint32_t *)((char *)dest + stat1_offset + - 2 * (array_size + sizeof(uint32_t))); - uint32_t samples_offset = stat1_offset + 3 * array_size + - 2 * sizeof(uint32_t); - dest->samples = (struct index_sample *)((char*)dest + samples_offset); - uint32_t current_sample_offset = samples_offset + dest->sample_count * - sizeof(struct index_sample); + char *pos = (char *) dest + stat1_offset; + memcpy(pos, src->tuple_stat1, array_size + sizeof(uint32_t)); + dest->tuple_stat1 = (uint32_t *) pos; + pos += array_size + sizeof(uint32_t); + + memcpy(pos, src->tuple_log_est, array_size + sizeof(uint32_t)); + dest->tuple_log_est = (log_est_t *) pos; + pos += array_size + sizeof(uint32_t); + + memcpy(pos, src->avg_eq, array_size); + dest->avg_eq = (uint32_t *) pos; + pos += array_size; + + dest->samples = (struct index_sample *) pos; + pos += dest->sample_count * sizeof(struct index_sample); assert(src->samples != NULL); for (uint32_t i = 0; i < dest->sample_count; ++i) { dest->samples[i].key_size = src->samples[i].key_size; - memcpy((char *)dest + current_sample_offset, src->samples[i].eq, - array_size); - dest->samples[i].eq = (uint32_t *)((char *)dest + - current_sample_offset); - current_sample_offset += array_size; - memcpy((char *)dest + current_sample_offset, src->samples[i].lt, - array_size); - dest->samples[i].lt = (uint32_t *)((char *)dest + - current_sample_offset); - current_sample_offset += array_size; - memcpy((char *)dest + current_sample_offset, - src->samples[i].dlt, array_size); - dest->samples[i].dlt = (uint32_t *)((char *)dest + - current_sample_offset); - current_sample_offset += array_size; - memcpy((char *)dest + current_sample_offset, - src->samples[i].sample_key, + memcpy(pos, src->samples[i].eq, array_size); + dest->samples[i].eq = (uint32_t *) pos; + pos += array_size; + + memcpy(pos, src->samples[i].lt, array_size); + dest->samples[i].lt = (uint32_t *) pos; + pos += array_size; + + memcpy(pos, src->samples[i].dlt, array_size); + dest->samples[i].dlt = (uint32_t *) pos; + pos += array_size; + memcpy(pos, src->samples[i].sample_key, src->samples[i].key_size + 2); - dest->samples[i].sample_key = (char *)dest + - current_sample_offset; - current_sample_offset += dest->samples[i].key_size + 2; + dest->samples[i].sample_key = pos; + pos += dest->samples[i].key_size + 2; } }