Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: "n.pettik" <korablev@tarantool.org>, tarantool-patches@freelists.org
Subject: [tarantool-patches] Re: [PATCH 4/4] sql: move SQL statistics to server
Date: Sat, 12 May 2018 01:00:14 +0300	[thread overview]
Message-ID: <1f4a08f6-366d-8d0c-ec5c-9e8e33a766ee@tarantool.org> (raw)
In-Reply-To: <8E69DCFF-0088-4FCD-A4CA-E3C98DEF7274@tarantool.org>



On 11/05/2018 20:29, n.pettik wrote:
> I am attaching whole patch since it has changed significantly with region allocations.
> I didn't answer on comments concerning memory allocation policy deliberately, but
> took them into consideration when was reworking patch.  I hope I didn’t miss smth.
> Overall, thanks for ideas and suggestions!

Thank you very much! The patch now looks almost perfect. See 6 comments below.
> diff --git a/src/box/index_def.c b/src/box/index_def.c
> index d0d852519..1702430b3 100644
> --- a/src/box/index_def.c
> +++ b/src/box/index_def.c
> +struct index_stat *
> +index_stat_dup(const struct index_stat *src)
> +{
> +	size_t size = index_stat_sizeof(src->samples, src->sample_count,
> +					src->sample_field_count);
> +	struct index_stat *dup = (struct index_stat *) malloc(size);
> +	if (dup == NULL) {
> +		diag_set(OutOfMemory, size, "malloc", "index stat");
> +		return NULL;
> +	}
> +	memcpy(dup, src, size);
> +	uint32_t array_size = src->sample_field_count * sizeof(uint32_t);
> +	uint32_t stat1_offset = sizeof(struct index_stat);
> +	dup->tuple_stat1 = (uint32_t *)((char*)dup + stat1_offset);
> +	dup->tuple_log_est = (log_est_t *)((char*)dup + stat1_offset +
> +					   array_size + sizeof(uint32_t));
> +	dup->avg_eq = (uint32_t *)((char*)dup + stat1_offset +
> +				   2 * (array_size + sizeof(uint32_t)));
> +	uint32_t samples_offset = stat1_offset + 3 * array_size +
> +				  2 * sizeof(uint32_t);
> +	dup->samples = (struct index_sample *)((char*)dup + samples_offset);
> +	uint32_t current_sample_offset = samples_offset +
> +					 src->sample_count *
> +					 sizeof(struct index_sample);
> +	for (uint32_t i = 0; i < src->sample_count; ++i) {
> +		dup->samples[i].eq = (uint32_t *)((char*)dup +
> +						  current_sample_offset);
> +		dup->samples[i].lt = (uint32_t *)((char*)dup +
> +						  current_sample_offset +
> +						  array_size);
> +		dup->samples[i].dlt = (uint32_t *)((char*)dup +
> +						   current_sample_offset +
> +						   2 * size);

1. Typo? Maybe array_size instead of size? I slightly refactored it. Take a look. Looks
more compact, and tests are passed.

I wish it was my task, huge malloc formatting is very interesting.

diff --git a/src/box/index_def.c b/src/box/index_def.c
index 1702430b3..d9035e3c5 100644
--- a/src/box/index_def.c
+++ b/src/box/index_def.c
@@ -205,31 +205,25 @@ index_stat_dup(const struct index_stat *src)
  	memcpy(dup, src, size);
  	uint32_t array_size = src->sample_field_count * sizeof(uint32_t);
  	uint32_t stat1_offset = sizeof(struct index_stat);
-	dup->tuple_stat1 = (uint32_t *)((char*)dup + stat1_offset);
-	dup->tuple_log_est = (log_est_t *)((char*)dup + stat1_offset +
-					   array_size + sizeof(uint32_t));
-	dup->avg_eq = (uint32_t *)((char*)dup + stat1_offset +
-				   2 * (array_size + sizeof(uint32_t)));
-	uint32_t samples_offset = stat1_offset + 3 * array_size +
-				  2 * sizeof(uint32_t);
-	dup->samples = (struct index_sample *)((char*)dup + samples_offset);
-	uint32_t current_sample_offset = samples_offset +
-					 src->sample_count *
-					 sizeof(struct index_sample);
+	char *pos = (char *) dup + stat1_offset;
+	dup->tuple_stat1 = (uint32_t *) pos;
+	pos += array_size + sizeof(uint32_t);
+	dup->tuple_log_est = (log_est_t *) pos;
+	pos += array_size + sizeof(uint32_t);
+	dup->avg_eq = (uint32_t *) pos;
+	pos += array_size;
+	dup->samples = (struct index_sample *) pos;
+	pos += src->sample_count * sizeof(struct index_sample);
+
  	for (uint32_t i = 0; i < src->sample_count; ++i) {
-		dup->samples[i].eq = (uint32_t *)((char*)dup +
-						  current_sample_offset);
-		dup->samples[i].lt = (uint32_t *)((char*)dup +
-						  current_sample_offset +
-						  array_size);
-		dup->samples[i].dlt = (uint32_t *)((char*)dup +
-						   current_sample_offset +
-						   2 * size);
-		dup->samples[i].sample_key =
-			(char *)((char*)dup + current_sample_offset + 3 * size);
-		current_sample_offset += current_sample_offset +
-					 3 * array_size +
-					 dup->samples[i].key_size + 2;
+		dup->samples[i].eq = (uint32_t *) pos;
+		pos += array_size;
+		dup->samples[i].lt = (uint32_t *) pos;
+		pos += array_size;
+		dup->samples[i].dlt = (uint32_t *) pos;
+		pos += array_size;
+		dup->samples[i].sample_key = pos;
+		pos += dup->samples[i].key_size + 2;
  	}
  	return dup;
  }


> diff --git a/src/box/index_def.h b/src/box/index_def.h
> index dc8293b23..107255cc7 100644
> --- a/src/box/index_def.h
> +++ b/src/box/index_def.h
> @@ -173,6 +242,50 @@ struct index_def {
>   struct index_def *
>   index_def_dup(const struct index_def *def);
>   
> +/**
> + * Calculate size of index's statistics.
> + * Statistics is located in memory according to following layout:
> + *
> + * +-------------------------+ <- Allocated memory starts here
> + * |    struct index_stat    |
> + * |-------------------------|
> + * |        stat1 array      | ^
> + * |-------------------------| |
> + * |      log_est array      | | 3 * array_size + 2 * uint_32
> + * |-------------------------| |
> + * |       avg_eq array      | v
> + * |-------------------------|
> + * |   samples struct array  |
> + * |-------------------------|
> + * | eq | lt | dlt |   key   | <- Content of one sample
> + * +-------------------------+
> + *            ...              <- Up to 24 samples
> + * | eq | lt | dlt |   key   |
> + * +-------------------------+

Nice picture.

> + *
> + * array_size = field_count * sizeof(uint_32)
> + * offset of one sample = 3 * array_size + key_size

2. +2, no? For zeros. (See the question about them below.)

> + *
> + * @param samples Array of samples needed for calculating
> + *                size of keys.
> + * @param sample_count Count of samples in samples array.
> + * @param field_count Count of fields in statistics arrays.
> + * @retval Size needed for statistics allocation in bytes.
> + */
> +size_t
> +index_stat_sizeof(const struct index_sample *samples, uint32_t sample_count,
> +		  uint32_t field_count);
> +
> +/**
> + * Duplicate index_stat object.
> + * To understand memory layout see index_stat_sizeof() function.
> + *
> + * @param src Stat to duplicate.
> + * @retval Copy of the @src.

3. Or NULL.
> diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c
> index 57cac437c..4f21cd25f 100644
> --- a/src/box/sql/analyze.c
> +++ b/src/box/sql/analyze.c
> +decode_stat_string(const char *stat_string, int stat_size, tRowcnt *stat_exact,
> +		   LogEst *stat_log) {
> +	char *z = (char *) stat_string;

4. Why do you need this cast?

> +		/*
> +		 * Take a copy of the sample. Add two 0x00 bytes
> +		 * the end of the buffer. This is in case the
> +		 * sample record is corrupted. In that case, the
> +		 * sqlite3VdbeRecordCompare() may read up to two
> +		 * varints past the end of the allocated buffer
> +		 * before it realizes it is dealing with a corrupt
> +		 * record. Adding the two 0x00 bytes prevents this
> +		 * from causing a buffer overread.

5. Are you sure that this 0x0000 is still needed? Tarantool does not stores
any "corrupted" things. Maybe it is Btree artifacts?

> +
> +/**
> + * This function performs copy of statistics.
> + * In contrast to index_stat_dup(), there is no assumption
> + * that source statistics is allocated within chunk. But
> + * destination place is still one big chunk of heap memory.
> + * See also index_stat_sizeof() for understanding memory layout.
>    *
> - * If an OOM error occurs, this function always sets db->mallocFailed.
> - * This means if the caller does not care about other errors, the return
> - * code may be ignored.
> + * @param dest One chunk of memory where statistics
> + *             will be placed.
> + * @param src Statistics to be copied.
>    */
> -int
> -sqlite3AnalysisLoad(sqlite3 * db)
> +static void
> +stat_copy(struct index_stat *dest, const struct index_stat *src)
>   {
> -	analysisInfo sInfo;
> -	HashElem *i, *j;
> -	char *zSql;
> -	int rc = SQLITE_OK;
> -
> -	/* Clear any prior statistics */
> -	for (j = sqliteHashFirst(&db->pSchema->tblHash); j;
> -	     j = sqliteHashNext(j)) {
> -		Table *pTab = sqliteHashData(j);
> -		for (i = sqliteHashFirst(&pTab->idxHash); i;
> -		     i = sqliteHashNext(i)) {
> -			Index *pIdx = sqliteHashData(i);
> -			pIdx->aiRowLogEst[0] = 0;
> -			sqlite3DeleteIndexSamples(db, pIdx);
> -			pIdx->aSample = 0;
> -		}
> +	assert(dest != NULL);

6. How about this diff?

diff --git a/src/box/sql/analyze.c b/src/box/sql/analyze.c
index 4f21cd25f..280b9132d 100644
--- a/src/box/sql/analyze.c
+++ b/src/box/sql/analyze.c
@@ -1721,46 +1721,39 @@ stat_copy(struct index_stat *dest, const struct index_stat *src)
  	dest->is_unordered = src->is_unordered;
  	uint32_t array_size = src->sample_field_count * sizeof(uint32_t);
  	uint32_t stat1_offset = sizeof(struct index_stat);
-	memcpy((char *)dest + stat1_offset, src->tuple_stat1,
-	       array_size + sizeof(uint32_t));
-	memcpy((char *)dest + stat1_offset + array_size + sizeof(uint32_t),
-	       src->tuple_log_est, array_size + sizeof(uint32_t));
-	memcpy((char *)dest + stat1_offset + 2 * (array_size + sizeof(uint32_t)),
-	       src->avg_eq, array_size);
-	dest->tuple_stat1 = (uint32_t *)((char *)dest + stat1_offset);
-	dest->tuple_log_est = (log_est_t *)((char *)dest + stat1_offset +
-					    array_size + sizeof(uint32_t));
-	dest->avg_eq = (uint32_t *)((char *)dest + stat1_offset +
-				    2 * (array_size + sizeof(uint32_t)));
-	uint32_t samples_offset = stat1_offset + 3 * array_size +
-				  2 * sizeof(uint32_t);
-	dest->samples = (struct index_sample *)((char*)dest + samples_offset);
-	uint32_t current_sample_offset = samples_offset + dest->sample_count *
-					 sizeof(struct index_sample);
+	char *pos = (char *) dest + stat1_offset;
+	memcpy(pos, src->tuple_stat1, array_size + sizeof(uint32_t));
+	dest->tuple_stat1 = (uint32_t *) pos;
+	pos += array_size + sizeof(uint32_t);
+
+	memcpy(pos, src->tuple_log_est, array_size + sizeof(uint32_t));
+	dest->tuple_log_est = (log_est_t *) pos;
+	pos += array_size + sizeof(uint32_t);
+
+	memcpy(pos, src->avg_eq, array_size);
+	dest->avg_eq = (uint32_t *) pos;
+	pos += array_size;
+
+	dest->samples = (struct index_sample *) pos;
+	pos += dest->sample_count * sizeof(struct index_sample);
  	assert(src->samples != NULL);
  	for (uint32_t i = 0; i < dest->sample_count; ++i) {
  		dest->samples[i].key_size = src->samples[i].key_size;
-		memcpy((char *)dest + current_sample_offset, src->samples[i].eq,
-		       array_size);
-		dest->samples[i].eq = (uint32_t *)((char *)dest +
-						   current_sample_offset);
-		current_sample_offset += array_size;
-		memcpy((char *)dest + current_sample_offset, src->samples[i].lt,
-		       array_size);
-		dest->samples[i].lt = (uint32_t *)((char *)dest +
-						   current_sample_offset);
-		current_sample_offset += array_size;
-		memcpy((char *)dest + current_sample_offset,
-		       src->samples[i].dlt, array_size);
-		dest->samples[i].dlt = (uint32_t *)((char *)dest +
-						    current_sample_offset);
-		current_sample_offset += array_size;
-		memcpy((char *)dest + current_sample_offset,
-		       src->samples[i].sample_key,
+		memcpy(pos, src->samples[i].eq, array_size);
+		dest->samples[i].eq = (uint32_t *) pos;
+		pos += array_size;
+
+		memcpy(pos, src->samples[i].lt, array_size);
+		dest->samples[i].lt = (uint32_t *) pos;
+		pos += array_size;
+
+		memcpy(pos, src->samples[i].dlt, array_size);
+		dest->samples[i].dlt = (uint32_t *) pos;
+		pos += array_size;
+		memcpy(pos, src->samples[i].sample_key,
  		       src->samples[i].key_size + 2);
-		dest->samples[i].sample_key = (char *)dest +
-					      current_sample_offset;
-		current_sample_offset += dest->samples[i].key_size + 2;
+		dest->samples[i].sample_key = pos;
+		pos += dest->samples[i].key_size + 2;
  	}
  }

  reply	other threads:[~2018-05-11 22:00 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-23 20:29 [tarantool-patches] [PATCH 0/4] Move original SQLite's " Nikita Pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 1/4] sql: optimize compilation of SELECT COUNT(*) Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 2/4] sql: add average tuple size calculation Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 3/4] sql: refactor usages of table's tuple count Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-04-23 20:29 ` [tarantool-patches] [PATCH 4/4] sql: move SQL statistics to server Nikita Pettik
2018-04-24 12:51   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-11 17:29     ` n.pettik
2018-05-11 22:00       ` Vladislav Shpilevoy [this message]
2018-05-14 11:52         ` n.pettik
2018-05-14 12:54           ` Vladislav Shpilevoy
2018-05-14 13:55             ` n.pettik
2018-05-14 14:12 ` [tarantool-patches] Re: [PATCH 0/4] Move original SQLite's " Vladislav Shpilevoy
2018-05-15 13:42   ` Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1f4a08f6-366d-8d0c-ec5c-9e8e33a766ee@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=korablev@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='[tarantool-patches] Re: [PATCH 4/4] sql: move SQL statistics to server' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox