[tarantool-patches] Re: [PATCH 4/4] sql: move SQL statistics to server
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Sat May 12 01:00:14 MSK 2018
On 11/05/2018 20:29, n.pettik wrote:
> I am attaching whole patch since it has changed significantly with region allocations.
> I didn't answer on comments concerning memory allocation policy deliberately, but
> took them into consideration when was reworking patch. I hope I didn’t miss smth.
> Overall, thanks for ideas and suggestions!
Thank you very much! The patch now looks almost perfect. See 6 comments below.
> diff --git a/src/box/index_def.c b/src/box/index_def.c
> index d0d852519..1702430b3 100644
> --- a/src/box/index_def.c
> +++ b/src/box/index_def.c
> +struct index_stat *
> +index_stat_dup(const struct index_stat *src)
> +{
> + size_t size = index_stat_sizeof(src->samples, src->sample_count,
> + src->sample_field_count);
> + struct index_stat *dup = (struct index_stat *) malloc(size);
> + if (dup == NULL) {
> + diag_set(OutOfMemory, size, "malloc", "index stat");
> + return NULL;
> + }
> + memcpy(dup, src, size);
> + uint32_t array_size = src->sample_field_count * sizeof(uint32_t);
> + uint32_t stat1_offset = sizeof(struct index_stat);
> + dup->tuple_stat1 = (uint32_t *)((char*)dup + stat1_offset);
> + dup->tuple_log_est = (log_est_t *)((char*)dup + stat1_offset +
> + array_size + sizeof(uint32_t));
> + dup->avg_eq = (uint32_t *)((char*)dup + stat1_offset +
> + 2 * (array_size + sizeof(uint32_t)));
> + uint32_t samples_offset = stat1_offset + 3 * array_size +
> + 2 * sizeof(uint32_t);
> + dup->samples = (struct index_sample *)((char*)dup + samples_offset);
> + uint32_t current_sample_offset = samples_offset +
> + src->sample_count *
> + sizeof(struct index_sample);
> + for (uint32_t i = 0; i < src->sample_count; ++i) {
> + dup->samples[i].eq = (uint32_t *)((char*)dup +
> + current_sample_offset);
> + dup->samples[i].lt = (uint32_t *)((char*)dup +
> + current_sample_offset +
> + array_size);
> + dup->samples[i].dlt = (uint32_t *)((char*)dup +
> + current_sample_offset +
> + 2 * size);
1. Typo? Maybe array_size instead of size? I slightly refactored it. Take a look. Looks
more compact, and tests are passed.
I wish it was my task, huge malloc formatting is very interesting.
diff --git a/src/box/index_def.c b/src/box/index_def.c
index 1702430b3..d9035e3c5 100644
--- a/src/box/index_def.c
+++ b/src/box/index_def.c
@@ -205,31 +205,25 @@ index_stat_dup(const struct index_stat *src)
memcpy(dup, src, size);
uint32_t array_size = src->sample_field_count * sizeof(uint32_t);
uint32_t stat1_offset = sizeof(struct index_stat);
- dup->tuple_stat1 = (uint32_t *)((char*)dup + stat1_offset);
- dup->tuple_log_est = (log_est_t *)((char*)dup + stat1_offset +
- array_size + sizeof(uint32_t));
- dup->avg_eq = (uint32_t *)((char*)dup + stat1_offset +
- 2 * (array_size + sizeof(uint32_t)));
- uint32_t samples_offset = stat1_offset + 3 * array_size +
- 2 * sizeof(uint32_t);
- dup->samples = (struct index_sample *)((char*)dup + samples_offset);
- uint32_t current_sample_offset = samples_offset +
- src->sample_count *
- sizeof(struct index_sample);
+ char *pos = (char *) dup + stat1_offset;
+ dup->tuple_stat1 = (uint32_t *) pos;
+ pos += array_size + sizeof(uint32_t);
+ dup->tuple_log_est = (log_est_t *) pos;
+ pos += array_size + sizeof(uint32_t);
+ dup->avg_eq = (uint32_t *) pos;
+ pos += array_size;
+ dup->samples = (struct index_sample *) pos;
+ pos += src->sample_count * sizeof(struct index_sample);
+
for (uint32_t i = 0; i < src->sample_count; ++i) {
- dup->samples[i].eq = (uint32_t *)((char*)dup +
- current_sample_offset);
- dup->samples[i].lt = (uint32_t *)((char*)dup +
- current_sample_offset +
- array_size);
- dup->samples[i].dlt = (uint32_t *)((char*)dup +
- current_sample_offset +
- 2 * size);
- dup->samples[i].sample_key =
- (char *)((char*)dup + current_sample_offset + 3 * size);
- current_sample_offset += current_sample_offset +
- 3 * array_size +
- dup->samples[i].key_size + 2;
+ dup->samples[i].eq = (uint32_t *) pos;
+ pos += array_size;
+ dup->samples[i].lt = (uint32_t *) pos;
+ pos += array_size;
+ dup->samples[i].dlt = (uint32_t *) pos;
+ pos += array_size;
+ dup->samples[i].sample_key = pos;
+ pos += dup->samples[i].key_size + 2;
}
return dup;
}
> diff --git a/src/box/index_def.h b/src/box/index_def.h
> index dc8293b23..107255cc7 100644
> --- a/src/box/index_def.h
> +++ b/src/box/index_def.h
> @@ -173,6 +242,50 @@ struct index_def {
> struct index_def *
> index_def_dup(const struct index_def *def);
>
> +/**
> + * Calculate size of index's statistics.
> + * Statistics is located in memory according to following layout:
> + *
> + * +-------------------------+ <- Allocated memory starts here
> + * | struct index_stat |
> + * |-------------------------|
> + * | stat1 array | ^
> + * |-------------------------| |
> + * | log_est array | | 3 * array_size + 2 * uint_32
> + * |-------------------------| |
> + * | avg_eq array | v
> + * |-------------------------|
> + * | samples struct array |
> + * |-------------------------|
> + * | eq | lt | dlt | key | <- Content of one sample
> + * +-------------------------+
> + * ... <- Up to 24 samples
> + * | eq | lt | dlt | key |
> + * +-------------------------+
Nice picture.
> + *
> + * array_size = field_count * sizeof(uint_32)
> + * offset of one sample = 3 * array_size + key_size
2. +2, no? For zeros. (See the question about them below.)
> + *
> + * @param samples Array of samples needed for calculating
> + * size of keys.
> + * @param sample_count Count of samples in samples array.
> + * @param field_count Count of fields in statistics arrays.
> + * @retval Size needed for statistics allocation in bytes.
> + */
> +size_t
> +index_stat_sizeof(const struct index_sample *samples, uint32_t sample_count,
> + uint32_t field_count);
> +
> +/**
> + * Duplicate index_stat object.
> + * To understand memory layout see index_stat_sizeof() function.
> + *
> + * @param src Stat to duplicate.
> + * @retval Copy of the @src.
3. Or NULL.
> diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c
> index 57cac437c..4f21cd25f 100644
> --- a/src/box/sql/analyze.c
> +++ b/src/box/sql/analyze.c
> +decode_stat_string(const char *stat_string, int stat_size, tRowcnt *stat_exact,
> + LogEst *stat_log) {
> + char *z = (char *) stat_string;
4. Why do you need this cast?
> + /*
> + * Take a copy of the sample. Add two 0x00 bytes
> + * the end of the buffer. This is in case the
> + * sample record is corrupted. In that case, the
> + * sqlite3VdbeRecordCompare() may read up to two
> + * varints past the end of the allocated buffer
> + * before it realizes it is dealing with a corrupt
> + * record. Adding the two 0x00 bytes prevents this
> + * from causing a buffer overread.
5. Are you sure that this 0x0000 is still needed? Tarantool does not stores
any "corrupted" things. Maybe it is Btree artifacts?
> +
> +/**
> + * This function performs copy of statistics.
> + * In contrast to index_stat_dup(), there is no assumption
> + * that source statistics is allocated within chunk. But
> + * destination place is still one big chunk of heap memory.
> + * See also index_stat_sizeof() for understanding memory layout.
> *
> - * If an OOM error occurs, this function always sets db->mallocFailed.
> - * This means if the caller does not care about other errors, the return
> - * code may be ignored.
> + * @param dest One chunk of memory where statistics
> + * will be placed.
> + * @param src Statistics to be copied.
> */
> -int
> -sqlite3AnalysisLoad(sqlite3 * db)
> +static void
> +stat_copy(struct index_stat *dest, const struct index_stat *src)
> {
> - analysisInfo sInfo;
> - HashElem *i, *j;
> - char *zSql;
> - int rc = SQLITE_OK;
> -
> - /* Clear any prior statistics */
> - for (j = sqliteHashFirst(&db->pSchema->tblHash); j;
> - j = sqliteHashNext(j)) {
> - Table *pTab = sqliteHashData(j);
> - for (i = sqliteHashFirst(&pTab->idxHash); i;
> - i = sqliteHashNext(i)) {
> - Index *pIdx = sqliteHashData(i);
> - pIdx->aiRowLogEst[0] = 0;
> - sqlite3DeleteIndexSamples(db, pIdx);
> - pIdx->aSample = 0;
> - }
> + assert(dest != NULL);
6. How about this diff?
diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c
index 4f21cd25f..280b9132d 100644
--- a/src/box/sql/analyze.c
+++ b/src/box/sql/analyze.c
@@ -1721,46 +1721,39 @@ stat_copy(struct index_stat *dest, const struct index_stat *src)
dest->is_unordered = src->is_unordered;
uint32_t array_size = src->sample_field_count * sizeof(uint32_t);
uint32_t stat1_offset = sizeof(struct index_stat);
- memcpy((char *)dest + stat1_offset, src->tuple_stat1,
- array_size + sizeof(uint32_t));
- memcpy((char *)dest + stat1_offset + array_size + sizeof(uint32_t),
- src->tuple_log_est, array_size + sizeof(uint32_t));
- memcpy((char *)dest + stat1_offset + 2 * (array_size + sizeof(uint32_t)),
- src->avg_eq, array_size);
- dest->tuple_stat1 = (uint32_t *)((char *)dest + stat1_offset);
- dest->tuple_log_est = (log_est_t *)((char *)dest + stat1_offset +
- array_size + sizeof(uint32_t));
- dest->avg_eq = (uint32_t *)((char *)dest + stat1_offset +
- 2 * (array_size + sizeof(uint32_t)));
- uint32_t samples_offset = stat1_offset + 3 * array_size +
- 2 * sizeof(uint32_t);
- dest->samples = (struct index_sample *)((char*)dest + samples_offset);
- uint32_t current_sample_offset = samples_offset + dest->sample_count *
- sizeof(struct index_sample);
+ char *pos = (char *) dest + stat1_offset;
+ memcpy(pos, src->tuple_stat1, array_size + sizeof(uint32_t));
+ dest->tuple_stat1 = (uint32_t *) pos;
+ pos += array_size + sizeof(uint32_t);
+
+ memcpy(pos, src->tuple_log_est, array_size + sizeof(uint32_t));
+ dest->tuple_log_est = (log_est_t *) pos;
+ pos += array_size + sizeof(uint32_t);
+
+ memcpy(pos, src->avg_eq, array_size);
+ dest->avg_eq = (uint32_t *) pos;
+ pos += array_size;
+
+ dest->samples = (struct index_sample *) pos;
+ pos += dest->sample_count * sizeof(struct index_sample);
assert(src->samples != NULL);
for (uint32_t i = 0; i < dest->sample_count; ++i) {
dest->samples[i].key_size = src->samples[i].key_size;
- memcpy((char *)dest + current_sample_offset, src->samples[i].eq,
- array_size);
- dest->samples[i].eq = (uint32_t *)((char *)dest +
- current_sample_offset);
- current_sample_offset += array_size;
- memcpy((char *)dest + current_sample_offset, src->samples[i].lt,
- array_size);
- dest->samples[i].lt = (uint32_t *)((char *)dest +
- current_sample_offset);
- current_sample_offset += array_size;
- memcpy((char *)dest + current_sample_offset,
- src->samples[i].dlt, array_size);
- dest->samples[i].dlt = (uint32_t *)((char *)dest +
- current_sample_offset);
- current_sample_offset += array_size;
- memcpy((char *)dest + current_sample_offset,
- src->samples[i].sample_key,
+ memcpy(pos, src->samples[i].eq, array_size);
+ dest->samples[i].eq = (uint32_t *) pos;
+ pos += array_size;
+
+ memcpy(pos, src->samples[i].lt, array_size);
+ dest->samples[i].lt = (uint32_t *) pos;
+ pos += array_size;
+
+ memcpy(pos, src->samples[i].dlt, array_size);
+ dest->samples[i].dlt = (uint32_t *) pos;
+ pos += array_size;
+ memcpy(pos, src->samples[i].sample_key,
src->samples[i].key_size + 2);
- dest->samples[i].sample_key = (char *)dest +
- current_sample_offset;
- current_sample_offset += dest->samples[i].key_size + 2;
+ dest->samples[i].sample_key = pos;
+ pos += dest->samples[i].key_size + 2;
}
}
More information about the Tarantool-patches
mailing list