From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <v.shpilevoy@tarantool.org>
Received: from smtpng3.m.smailru.net (smtpng3.m.smailru.net [94.100.177.149])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256
 bits)) (No client certificate requested)
 by dev.tarantool.org (Postfix) with ESMTPS id 91C5C4696C3
 for <tarantool-patches@dev.tarantool.org>;
 Sat,  4 Apr 2020 21:34:34 +0300 (MSK)
References: <91166652f0b9cd151ca485538e6e895639c62acd.1585823952.git.imeevma@gmail.com>
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Message-ID: <d9db63dc-340a-cd05-5242-ec23af8f1d73@tarantool.org>
Date: Sat, 4 Apr 2020 20:34:32 +0200
MIME-Version: 1.0
In-Reply-To: <91166652f0b9cd151ca485538e6e895639c62acd.1585823952.git.imeevma@gmail.com>
Content-Type: text/plain; charset=utf-8
Content-Language: en-US
Content-Transfer-Encoding: 7bit
Subject: Re: [Tarantool-patches] [PATCH v1 1/1] sql: specify field types in
 ephemeral space format
List-Id: Tarantool development patches <tarantool-patches.dev.tarantool.org>
List-Unsubscribe: <https://lists.tarantool.org/mailman/options/tarantool-patches>, 
 <mailto:tarantool-patches-request@dev.tarantool.org?subject=unsubscribe>
List-Archive: <https://lists.tarantool.org/pipermail/tarantool-patches/>
List-Post: <mailto:tarantool-patches@dev.tarantool.org>
List-Help: <mailto:tarantool-patches-request@dev.tarantool.org?subject=help>
List-Subscribe: <https://lists.tarantool.org/mailman/listinfo/tarantool-patches>, 
 <mailto:tarantool-patches-request@dev.tarantool.org?subject=subscribe>
To: imeevma@tarantool.org, tsafin@tarantool.org, tarantool-patches@dev.tarantool.org

Hi! Thanks for the patch!

On 02/04/2020 12:41, imeevma@tarantool.org wrote:
> This patch allows to specify field types in ephemeral space format.
> Prior to this patch, all fields had a SCALAR field type.

1. Why is it needed? (I see description in the ticket, but it should
be in the commit message too).

I copy paste it here:

> During ephemeral table creation via SQL all parts are declared as SCALAR: see sql_ephemeral_space_create. However, in most cases it is possible to fill it with specific type - it may speed up internal routine connected with tuples comparison.

And the real question is how is it going to speedup and what exactly?
If the subject are internal comparators, they don't depend on format.
They depend only on key definitions, and the latter are ok so far, they
have types.

> Closes #3841
> ---
> https://github.com/tarantool/tarantool/issues/3841
> https://github.com/tarantool/tarantool/tree/imeevma/gh-3841-format-for-ephemeral-space
> 
> diff --git a/src/box/space_def.h b/src/box/space_def.h
> index ac6d226..e7b0c28 100644
> --- a/src/box/space_def.h
> +++ b/src/box/space_def.h
> @@ -177,7 +177,7 @@ space_def_new(uint32_t id, uint32_t uid, uint32_t exact_field_count,
>   * @retval Space definition.
>   */
>  struct space_def *
> -space_def_new_ephemeral(uint32_t field_count);
> +space_def_new_ephemeral(uint32_t field_count, struct field_def *fields);

2. The comment is not updated.

>  /**
>   * Size of the space_def.
> diff --git a/src/box/sql.c b/src/box/sql.c
> index 1256df8..27089bd 100644
> --- a/src/box/sql.c
> +++ b/src/box/sql.c
> @@ -330,58 +330,77 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
>  			return NULL;
>  	}
>  
> -	struct key_part_def *ephemer_key_parts = region_alloc(&fiber()->gc,
> -				sizeof(*ephemer_key_parts) * field_count);
> -	if (ephemer_key_parts == NULL) {
> -		diag_set(OutOfMemory, sizeof(*ephemer_key_parts) * field_count,
> -			 "region", "key parts");
> -		return NULL;
> +	struct region *region = &fiber()->gc;
> +	/*
> +	 * Name of the fields will be "_COLUMN_1", "_COLUMN_2"
> +	 * and so on. Since number of columns no more than 2000,

3. You said no more than 2000, but below check no more than 9999.
Why?

> +	 * length of each name is no more than strlen("_COLUMN_")
> +	 * + 5.
> +	 */
> +	assert(SQL_MAX_COLUMN < 9999);
> +	uint32_t name_len = sizeof("_COLUMN_") + 5;
> +	uint32_t size = field_count * (sizeof(struct field_def) + name_len +
> +				       sizeof(struct key_part_def));
> +	struct field_def *fields = region_alloc(region, size);
> +	struct key_part_def *parts = (void *)fields +
> +				     field_count * sizeof(struct field_def);
> +	char *names = (char *)parts + field_count * sizeof(struct key_part_def);
> +	for (uint32_t i = 0; i < field_count; ++i) {

4. Why do you need a second cycle for this? Can the fields
be initialized in the same cycle as key parts?

> +		struct field_def *field = &fields[i];
> +		field->name = names;
> +		names += name_len;
> +		sprintf(field->name, "_COLUMN_%d", i);
> +		field->is_nullable = true;
> +		field->nullable_action = ON_CONFLICT_ACTION_NONE;
> +		field->default_value = NULL;
> +		field->default_value_expr = NULL;
> +		if (def != NULL && i < def->part_count) {
> +			assert(def->parts[i].type < field_type_MAX);
> +			field->type = def->parts[i].type;
> +			field->coll_id = def->parts[i].coll_id;
> +		} else {
> +			field->coll_id = COLL_NONE;
> +			field->type = FIELD_TYPE_SCALAR;
> +		}
>  	}
> +
>  	for (uint32_t i = 0; i < field_count; ++i) {
> -		struct key_part_def *part = &ephemer_key_parts[i];
> +		struct key_part_def *part = &parts[i];
>  		part->fieldno = i;
>  		part->nullable_action = ON_CONFLICT_ACTION_NONE;
>  		part->is_nullable = true;
>  		part->sort_order = SORT_ORDER_ASC;
>  		part->path = NULL;
> -		if (def != NULL && i < def->part_count) {
> -			assert(def->parts[i].type < field_type_MAX);
> -			part->type = def->parts[i].type;
> -			part->coll_id = def->parts[i].coll_id;
> -		} else {
> -			part->coll_id = COLL_NONE;
> -			part->type = FIELD_TYPE_SCALAR;
> -		}
> +		part->type = fields[i].type;
> +		part->coll_id = fields[i].coll_id;
>  	}
> -	struct key_def *ephemer_key_def = key_def_new(ephemer_key_parts,
> -						      field_count, false);
> -	if (ephemer_key_def == NULL)
> +	struct key_def *key_def = key_def_new(parts, field_count, false);
> +	if (key_def == NULL)
>  		return NULL;
>  
> -	struct index_def *ephemer_index_def =
> +	struct index_def *index_def =
>  		index_def_new(0, 0, "ephemer_idx", strlen("ephemer_idx"), TREE,
> -			      &index_opts_default, ephemer_key_def, NULL);
> -	key_def_delete(ephemer_key_def);
> -	if (ephemer_index_def == NULL)
> +			      &index_opts_default, key_def, NULL);
> +	key_def_delete(key_def);
> +	if (index_def == NULL)
>  		return NULL;
>  
>  	struct rlist key_list;
>  	rlist_create(&key_list);
> -	rlist_add_entry(&key_list, ephemer_index_def, link);
> +	rlist_add_entry(&key_list, index_def, link);
>  
> -	struct space_def *ephemer_space_def =
> -		space_def_new_ephemeral(field_count);
> -	if (ephemer_space_def == NULL) {
> -		index_def_delete(ephemer_index_def);
> +	struct space_def *space_def = space_def_new_ephemeral(field_count,
> +							      fields);
> +	if (space_def == NULL) {
> +		index_def_delete(index_def);
>  		return NULL;
>  	}
>  
> -	struct space *ephemer_new_space = space_new_ephemeral(ephemer_space_def,
> -							      &key_list);
> -	index_def_delete(ephemer_index_def);
> -	space_def_delete(ephemer_space_def);
> +	struct space *space = space_new_ephemeral(space_def, &key_list);
> +	index_def_delete(index_def);
> +	space_def_delete(space_def);
>  
> -	return ephemer_new_space;
> +	return space;

5. Seems like you could keep the old names, and avoid half of these
changes.

>  }
>  
>  int tarantoolsqlEphemeralInsert(struct space *space, const char *tuple,
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 312c966..beaeb0f 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c

6. Looks like all the diff in this file can be dropped. Because it
does not contain any functional changes. Right? You just turned
'if' into assert + the same 'if' a bit earlier.

> @@ -698,6 +698,12 @@ tuple_format_destroy(struct tuple_format *format)
>   * dictionary will never be altered. If it can, then alter can
>   * affect another space, which shares a format with one which is
>   * altered.
> + *
> + * The only way to change the format of the space is to recreate
> + * space with the new format inside of BOX. Since there is no
> + * mechanism for recreating the ephemeral space, we need not worry
> + * about changing the format of the ephemeral space.
> + *
>   * @param p_format Double pointer to format. It is updated with
>   * 		   hashed value, if corresponding format was found
>   * 		   in hash table
> @@ -709,13 +715,7 @@ static bool
>  tuple_format_reuse(struct tuple_format **p_format)
>  {
>  	struct tuple_format *format = *p_format;
> -	if (!format->is_ephemeral)
> -		return false;
> -	/*
> -	 * These fields do not participate in hashing.
> -	 * Make sure they're unset.
> -	 */
> -	assert(format->dict->name_count == 0);
> +	assert(format->is_ephemeral);
>  	assert(format->is_temporary);
>  	mh_int_t key = mh_tuple_format_find(tuple_formats_hash, format,
>  					    NULL);
> @@ -739,9 +739,7 @@ tuple_format_reuse(struct tuple_format **p_format)
>  static int
>  tuple_format_add_to_hash(struct tuple_format *format)
>  {
> -	if(!format->is_ephemeral)
> -		return 0;
> -	assert(format->dict->name_count == 0);
> +	assert(format->is_ephemeral);
>  	assert(format->is_temporary);
>  	mh_int_t key = mh_tuple_format_put(tuple_formats_hash,
>  					   (const struct tuple_format **)&format,
> @@ -795,11 +793,11 @@ tuple_format_new(struct tuple_format_vtab *vtab, void *engine,
>  	if (tuple_format_create(format, keys, key_count, space_fields,
>  				space_field_count) < 0)
>  		goto err;
> -	if (tuple_format_reuse(&format))
> +	if (is_ephemeral && tuple_format_reuse(&format))
>  		return format;
>  	if (tuple_format_register(format) < 0)
>  		goto err;
> -	if (tuple_format_add_to_hash(format) < 0) {
> +	if (is_ephemeral && tuple_format_add_to_hash(format) < 0) {
>  		tuple_format_deregister(format);
>  		goto err;
>  	}
>