[Tarantool-patches] [PATCH v1 1/1] sql: specify field types in ephemeral space format

Mergen Imeev imeevma at tarantool.org
Fri Apr 10 03:04:45 MSK 2020


Hi! Thank you for review! My answers, diff and new patch
below.

On Sat, Apr 04, 2020 at 08:34:32PM +0200, Vladislav Shpilevoy wrote:
> Hi! Thanks for the patch!
> 
> On 02/04/2020 12:41, imeevma at tarantool.org wrote:
> > This patch allows to specify field types in ephemeral space format.
> > Prior to this patch, all fields had a SCALAR field type.
> 
> 1. Why is it needed? (I see description in the ticket, but it should
> be in the commit message too).
> 
> I copy paste it here:
> 
> > During ephemeral table creation via SQL all parts are declared as SCALAR: see sql_ephemeral_space_create. However, in most cases it is possible to fill it with specific type - it may speed up internal routine connected with tuples comparison.
> 
I am not sure that it will speed up something, at least at
this patch. But this patch is needed for #4692 and #4256.

> And the real question is how is it going to speedup and what exactly?
> If the subject are internal comparators, they don't depend on format.
> They depend only on key definitions, and the latter are ok so far, they
> have types.
> 
> > Closes #3841
> > ---
> > https://github.com/tarantool/tarantool/issues/3841
> > https://github.com/tarantool/tarantool/tree/imeevma/gh-3841-format-for-ephemeral-space
> > 
> > diff --git a/src/box/space_def.h b/src/box/space_def.h
> > index ac6d226..e7b0c28 100644
> > --- a/src/box/space_def.h
> > +++ b/src/box/space_def.h
> > @@ -177,7 +177,7 @@ space_def_new(uint32_t id, uint32_t uid, uint32_t exact_field_count,
> >   * @retval Space definition.
> >   */
> >  struct space_def *
> > -space_def_new_ephemeral(uint32_t field_count);
> > +space_def_new_ephemeral(uint32_t field_count, struct field_def *fields);
> 
> 2. The comment is not updated.
> 
Fixed.

> >  /**
> >   * Size of the space_def.
> > diff --git a/src/box/sql.c b/src/box/sql.c
> > index 1256df8..27089bd 100644
> > --- a/src/box/sql.c
> > +++ b/src/box/sql.c
> > @@ -330,58 +330,77 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
> >  			return NULL;
> >  	}
> >  
> > -	struct key_part_def *ephemer_key_parts = region_alloc(&fiber()->gc,
> > -				sizeof(*ephemer_key_parts) * field_count);
> > -	if (ephemer_key_parts == NULL) {
> > -		diag_set(OutOfMemory, sizeof(*ephemer_key_parts) * field_count,
> > -			 "region", "key parts");
> > -		return NULL;
> > +	struct region *region = &fiber()->gc;
> > +	/*
> > +	 * Name of the fields will be "_COLUMN_1", "_COLUMN_2"
> > +	 * and so on. Since number of columns no more than 2000,
> 
> 3. You said no more than 2000, but below check no more than 9999.
> Why?
> 
Important here is that number of figures is no more than 4.
Still, I fixed that.

> > +	 * length of each name is no more than strlen("_COLUMN_")
> > +	 * + 5.
> > +	 */
> > +	assert(SQL_MAX_COLUMN < 9999);
> > +	uint32_t name_len = sizeof("_COLUMN_") + 5;
> > +	uint32_t size = field_count * (sizeof(struct field_def) + name_len +
> > +				       sizeof(struct key_part_def));
> > +	struct field_def *fields = region_alloc(region, size);
> > +	struct key_part_def *parts = (void *)fields +
> > +				     field_count * sizeof(struct field_def);
> > +	char *names = (char *)parts + field_count * sizeof(struct key_part_def);
> > +	for (uint32_t i = 0; i < field_count; ++i) {
> 
> 4. Why do you need a second cycle for this? Can the fields
> be initialized in the same cycle as key parts?
> 
They can, but I plan to change the definition of the index
in some cases, which is easier to do when they are in two
different cycles. The only problem was that index had to have
all fields since it worked as format in some cases. This is not
so now since format is fixed. Still, I changed assert.

> > +		struct field_def *field = &fields[i];
> > +		field->name = names;
> > +		names += name_len;
> > +		sprintf(field->name, "_COLUMN_%d", i);
> > +		field->is_nullable = true;
> > +		field->nullable_action = ON_CONFLICT_ACTION_NONE;
> > +		field->default_value = NULL;
> > +		field->default_value_expr = NULL;
> > +		if (def != NULL && i < def->part_count) {
> > +			assert(def->parts[i].type < field_type_MAX);
> > +			field->type = def->parts[i].type;
> > +			field->coll_id = def->parts[i].coll_id;
> > +		} else {
> > +			field->coll_id = COLL_NONE;
> > +			field->type = FIELD_TYPE_SCALAR;
> > +		}
> >  	}
> > +
> >  	for (uint32_t i = 0; i < field_count; ++i) {
> > -		struct key_part_def *part = &ephemer_key_parts[i];
> > +		struct key_part_def *part = &parts[i];
> >  		part->fieldno = i;
> >  		part->nullable_action = ON_CONFLICT_ACTION_NONE;
> >  		part->is_nullable = true;
> >  		part->sort_order = SORT_ORDER_ASC;
> >  		part->path = NULL;
> > -		if (def != NULL && i < def->part_count) {
> > -			assert(def->parts[i].type < field_type_MAX);
> > -			part->type = def->parts[i].type;
> > -			part->coll_id = def->parts[i].coll_id;
> > -		} else {
> > -			part->coll_id = COLL_NONE;
> > -			part->type = FIELD_TYPE_SCALAR;
> > -		}
> > +		part->type = fields[i].type;
> > +		part->coll_id = fields[i].coll_id;
> >  	}
> > -	struct key_def *ephemer_key_def = key_def_new(ephemer_key_parts,
> > -						      field_count, false);
> > -	if (ephemer_key_def == NULL)
> > +	struct key_def *key_def = key_def_new(parts, field_count, false);
> > +	if (key_def == NULL)
> >  		return NULL;
> >  
> > -	struct index_def *ephemer_index_def =
> > +	struct index_def *index_def =
> >  		index_def_new(0, 0, "ephemer_idx", strlen("ephemer_idx"), TREE,
> > -			      &index_opts_default, ephemer_key_def, NULL);
> > -	key_def_delete(ephemer_key_def);
> > -	if (ephemer_index_def == NULL)
> > +			      &index_opts_default, key_def, NULL);
> > +	key_def_delete(key_def);
> > +	if (index_def == NULL)
> >  		return NULL;
> >  
> >  	struct rlist key_list;
> >  	rlist_create(&key_list);
> > -	rlist_add_entry(&key_list, ephemer_index_def, link);
> > +	rlist_add_entry(&key_list, index_def, link);
> >  
> > -	struct space_def *ephemer_space_def =
> > -		space_def_new_ephemeral(field_count);
> > -	if (ephemer_space_def == NULL) {
> > -		index_def_delete(ephemer_index_def);
> > +	struct space_def *space_def = space_def_new_ephemeral(field_count,
> > +							      fields);
> > +	if (space_def == NULL) {
> > +		index_def_delete(index_def);
> >  		return NULL;
> >  	}
> >  
> > -	struct space *ephemer_new_space = space_new_ephemeral(ephemer_space_def,
> > -							      &key_list);
> > -	index_def_delete(ephemer_index_def);
> > -	space_def_delete(ephemer_space_def);
> > +	struct space *space = space_new_ephemeral(space_def, &key_list);
> > +	index_def_delete(index_def);
> > +	space_def_delete(space_def);
> >  
> > -	return ephemer_new_space;
> > +	return space;
> 
> 5. Seems like you could keep the old names, and avoid half of these
> changes.
> 
Fixed, returned old names.

> >  }
> >  
> >  int tarantoolsqlEphemeralInsert(struct space *space, const char *tuple,
> > diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> > index 312c966..beaeb0f 100644
> > --- a/src/box/tuple_format.c
> > +++ b/src/box/tuple_format.c
> 
> 6. Looks like all the diff in this file can be dropped. Because it
> does not contain any functional changes. Right? You just turned
> 'if' into assert + the same 'if' a bit earlier.
> 
Not exactly so. Since now we have dict in format of
ephemeral space old assert throws an error each time
ephemeral space is used. New 'if' allows to remove this
assert without changing general logic.

> > @@ -698,6 +698,12 @@ tuple_format_destroy(struct tuple_format *format)
> >   * dictionary will never be altered. If it can, then alter can
> >   * affect another space, which shares a format with one which is
> >   * altered.
> > + *
> > + * The only way to change the format of the space is to recreate
> > + * space with the new format inside of BOX. Since there is no
> > + * mechanism for recreating the ephemeral space, we need not worry
> > + * about changing the format of the ephemeral space.
> > + *
> >   * @param p_format Double pointer to format. It is updated with
> >   * 		   hashed value, if corresponding format was found
> >   * 		   in hash table
> > @@ -709,13 +715,7 @@ static bool
> >  tuple_format_reuse(struct tuple_format **p_format)
> >  {
> >  	struct tuple_format *format = *p_format;
> > -	if (!format->is_ephemeral)
> > -		return false;
> > -	/*
> > -	 * These fields do not participate in hashing.
> > -	 * Make sure they're unset.
> > -	 */
> > -	assert(format->dict->name_count == 0);
> > +	assert(format->is_ephemeral);
> >  	assert(format->is_temporary);
> >  	mh_int_t key = mh_tuple_format_find(tuple_formats_hash, format,
> >  					    NULL);
> > @@ -739,9 +739,7 @@ tuple_format_reuse(struct tuple_format **p_format)
> >  static int
> >  tuple_format_add_to_hash(struct tuple_format *format)
> >  {
> > -	if(!format->is_ephemeral)
> > -		return 0;
> > -	assert(format->dict->name_count == 0);
> > +	assert(format->is_ephemeral);
> >  	assert(format->is_temporary);
> >  	mh_int_t key = mh_tuple_format_put(tuple_formats_hash,
> >  					   (const struct tuple_format **)&format,
> > @@ -795,11 +793,11 @@ tuple_format_new(struct tuple_format_vtab *vtab, void *engine,
> >  	if (tuple_format_create(format, keys, key_count, space_fields,
> >  				space_field_count) < 0)
> >  		goto err;
> > -	if (tuple_format_reuse(&format))
> > +	if (is_ephemeral && tuple_format_reuse(&format))
> >  		return format;
> >  	if (tuple_format_register(format) < 0)
> >  		goto err;
> > -	if (tuple_format_add_to_hash(format) < 0) {
> > +	if (is_ephemeral && tuple_format_add_to_hash(format) < 0) {
> >  		tuple_format_deregister(format);
> >  		goto err;
> >  	}
> > 

Diff:

>From f5e07106409a3183d81b57f4dd44bb868848021d Mon Sep 17 00:00:00 2001
From: Mergen Imeev <imeevma at gmail.com>
Date: Fri, 10 Apr 2020 02:42:54 +0300
Subject: [PATCH] Review fix


diff --git a/src/box/space_def.h b/src/box/space_def.h
index e7b0c28..61bbdb4 100644
--- a/src/box/space_def.h
+++ b/src/box/space_def.h
@@ -173,6 +173,7 @@ space_def_new(uint32_t id, uint32_t uid, uint32_t exact_field_count,
 /**
  * Create a new ephemeral space definition.
  * @param field_count Number of fields in the space.
+ * @param fields Field definitions.
  *
  * @retval Space definition.
  */
diff --git a/src/box/sql.c b/src/box/sql.c
index 27089bd..288c4fa 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -337,14 +337,15 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
 	 * length of each name is no more than strlen("_COLUMN_")
 	 * + 5.
 	 */
-	assert(SQL_MAX_COLUMN < 9999);
+	assert(SQL_MAX_COLUMN <= 2000);
 	uint32_t name_len = sizeof("_COLUMN_") + 5;
 	uint32_t size = field_count * (sizeof(struct field_def) + name_len +
 				       sizeof(struct key_part_def));
 	struct field_def *fields = region_alloc(region, size);
-	struct key_part_def *parts = (void *)fields +
+	struct key_part_def *ephemer_key_parts = (void *)fields +
 				     field_count * sizeof(struct field_def);
-	char *names = (char *)parts + field_count * sizeof(struct key_part_def);
+	char *names = (char *)ephemer_key_parts +
+		      field_count * sizeof(struct key_part_def);
 	for (uint32_t i = 0; i < field_count; ++i) {
 		struct field_def *field = &fields[i];
 		field->name = names;
@@ -365,7 +366,7 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
 	}
 
 	for (uint32_t i = 0; i < field_count; ++i) {
-		struct key_part_def *part = &parts[i];
+		struct key_part_def *part = &ephemer_key_parts[i];
 		part->fieldno = i;
 		part->nullable_action = ON_CONFLICT_ACTION_NONE;
 		part->is_nullable = true;
@@ -374,33 +375,35 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
 		part->type = fields[i].type;
 		part->coll_id = fields[i].coll_id;
 	}
-	struct key_def *key_def = key_def_new(parts, field_count, false);
-	if (key_def == NULL)
+	struct key_def *ephemer_key_def = key_def_new(ephemer_key_parts,
+						      field_count, false);
+	if (ephemer_key_def == NULL)
 		return NULL;
 
-	struct index_def *index_def =
+	struct index_def *ephemer_index_def =
 		index_def_new(0, 0, "ephemer_idx", strlen("ephemer_idx"), TREE,
-			      &index_opts_default, key_def, NULL);
-	key_def_delete(key_def);
-	if (index_def == NULL)
+			      &index_opts_default, ephemer_key_def, NULL);
+	key_def_delete(ephemer_key_def);
+	if (ephemer_index_def == NULL)
 		return NULL;
 
 	struct rlist key_list;
 	rlist_create(&key_list);
-	rlist_add_entry(&key_list, index_def, link);
+	rlist_add_entry(&key_list, ephemer_index_def, link);
 
-	struct space_def *space_def = space_def_new_ephemeral(field_count,
-							      fields);
-	if (space_def == NULL) {
-		index_def_delete(index_def);
+	struct space_def *ephemer_space_def =
+		space_def_new_ephemeral(field_count, fields);
+	if (ephemer_space_def == NULL) {
+		index_def_delete(ephemer_index_def);
 		return NULL;
 	}
 
-	struct space *space = space_new_ephemeral(space_def, &key_list);
-	index_def_delete(index_def);
-	space_def_delete(space_def);
+	struct space *ephemer_new_space = space_new_ephemeral(ephemer_space_def,
+							      &key_list);
+	index_def_delete(ephemer_index_def);
+	space_def_delete(ephemer_space_def);
 
-	return space;
+	return ephemer_new_space;
 }
 
 int tarantoolsqlEphemeralInsert(struct space *space, const char *tuple,


New patch:

>From 2cf0d4749e7858b533225374f62da8556a51bcd9 Mon Sep 17 00:00:00 2001
From: Mergen Imeev <imeevma at gmail.com>
Date: Thu, 23 Jan 2020 18:34:00 +0300
Subject: [PATCH] sql: specify field types in ephemeral space format

This patch allows to specify field types in ephemeral space format.
Prior to this patch, all fields had a SCALAR field type.

This patch simplifies the structure of the ephemeral space a little.

Needed for #4256
Needed for #4692
Closes #3841

diff --git a/src/box/space_def.c b/src/box/space_def.c
index 6611312..5af0f38 100644
--- a/src/box/space_def.c
+++ b/src/box/space_def.c
@@ -233,7 +233,7 @@ space_def_new(uint32_t id, uint32_t uid, uint32_t exact_field_count,
 }
 
 struct space_def*
-space_def_new_ephemeral(uint32_t field_count)
+space_def_new_ephemeral(uint32_t field_count, struct field_def *fields)
 {
 	struct space_opts opts = space_opts_default;
 	opts.is_temporary = true;
@@ -242,8 +242,7 @@ space_def_new_ephemeral(uint32_t field_count)
 						    "ephemeral",
 						    strlen("ephemeral"),
 						    "memtx", strlen("memtx"),
-						    &opts, &field_def_default,
-						    0);
+						    &opts, fields, field_count);
 	return space_def;
 }
 
diff --git a/src/box/space_def.h b/src/box/space_def.h
index ac6d226..61bbdb4 100644
--- a/src/box/space_def.h
+++ b/src/box/space_def.h
@@ -173,11 +173,12 @@ space_def_new(uint32_t id, uint32_t uid, uint32_t exact_field_count,
 /**
  * Create a new ephemeral space definition.
  * @param field_count Number of fields in the space.
+ * @param fields Field definitions.
  *
  * @retval Space definition.
  */
 struct space_def *
-space_def_new_ephemeral(uint32_t field_count);
+space_def_new_ephemeral(uint32_t field_count, struct field_def *fields);
 
 /**
  * Size of the space_def.
diff --git a/src/box/sql.c b/src/box/sql.c
index 1256df8..288c4fa 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -330,13 +330,41 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
 			return NULL;
 	}
 
-	struct key_part_def *ephemer_key_parts = region_alloc(&fiber()->gc,
-				sizeof(*ephemer_key_parts) * field_count);
-	if (ephemer_key_parts == NULL) {
-		diag_set(OutOfMemory, sizeof(*ephemer_key_parts) * field_count,
-			 "region", "key parts");
-		return NULL;
+	struct region *region = &fiber()->gc;
+	/*
+	 * Name of the fields will be "_COLUMN_1", "_COLUMN_2"
+	 * and so on. Since number of columns no more than 2000,
+	 * length of each name is no more than strlen("_COLUMN_")
+	 * + 5.
+	 */
+	assert(SQL_MAX_COLUMN <= 2000);
+	uint32_t name_len = sizeof("_COLUMN_") + 5;
+	uint32_t size = field_count * (sizeof(struct field_def) + name_len +
+				       sizeof(struct key_part_def));
+	struct field_def *fields = region_alloc(region, size);
+	struct key_part_def *ephemer_key_parts = (void *)fields +
+				     field_count * sizeof(struct field_def);
+	char *names = (char *)ephemer_key_parts +
+		      field_count * sizeof(struct key_part_def);
+	for (uint32_t i = 0; i < field_count; ++i) {
+		struct field_def *field = &fields[i];
+		field->name = names;
+		names += name_len;
+		sprintf(field->name, "_COLUMN_%d", i);
+		field->is_nullable = true;
+		field->nullable_action = ON_CONFLICT_ACTION_NONE;
+		field->default_value = NULL;
+		field->default_value_expr = NULL;
+		if (def != NULL && i < def->part_count) {
+			assert(def->parts[i].type < field_type_MAX);
+			field->type = def->parts[i].type;
+			field->coll_id = def->parts[i].coll_id;
+		} else {
+			field->coll_id = COLL_NONE;
+			field->type = FIELD_TYPE_SCALAR;
+		}
 	}
+
 	for (uint32_t i = 0; i < field_count; ++i) {
 		struct key_part_def *part = &ephemer_key_parts[i];
 		part->fieldno = i;
@@ -344,14 +372,8 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
 		part->is_nullable = true;
 		part->sort_order = SORT_ORDER_ASC;
 		part->path = NULL;
-		if (def != NULL && i < def->part_count) {
-			assert(def->parts[i].type < field_type_MAX);
-			part->type = def->parts[i].type;
-			part->coll_id = def->parts[i].coll_id;
-		} else {
-			part->coll_id = COLL_NONE;
-			part->type = FIELD_TYPE_SCALAR;
-		}
+		part->type = fields[i].type;
+		part->coll_id = fields[i].coll_id;
 	}
 	struct key_def *ephemer_key_def = key_def_new(ephemer_key_parts,
 						      field_count, false);
@@ -370,7 +392,7 @@ sql_ephemeral_space_create(uint32_t field_count, struct sql_key_info *key_info)
 	rlist_add_entry(&key_list, ephemer_index_def, link);
 
 	struct space_def *ephemer_space_def =
-		space_def_new_ephemeral(field_count);
+		space_def_new_ephemeral(field_count, fields);
 	if (ephemer_space_def == NULL) {
 		index_def_delete(ephemer_index_def);
 		return NULL;
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 312c966..beaeb0f 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -698,6 +698,12 @@ tuple_format_destroy(struct tuple_format *format)
  * dictionary will never be altered. If it can, then alter can
  * affect another space, which shares a format with one which is
  * altered.
+ *
+ * The only way to change the format of the space is to recreate
+ * space with the new format inside of BOX. Since there is no
+ * mechanism for recreating the ephemeral space, we need not worry
+ * about changing the format of the ephemeral space.
+ *
  * @param p_format Double pointer to format. It is updated with
  * 		   hashed value, if corresponding format was found
  * 		   in hash table
@@ -709,13 +715,7 @@ static bool
 tuple_format_reuse(struct tuple_format **p_format)
 {
 	struct tuple_format *format = *p_format;
-	if (!format->is_ephemeral)
-		return false;
-	/*
-	 * These fields do not participate in hashing.
-	 * Make sure they're unset.
-	 */
-	assert(format->dict->name_count == 0);
+	assert(format->is_ephemeral);
 	assert(format->is_temporary);
 	mh_int_t key = mh_tuple_format_find(tuple_formats_hash, format,
 					    NULL);
@@ -739,9 +739,7 @@ tuple_format_reuse(struct tuple_format **p_format)
 static int
 tuple_format_add_to_hash(struct tuple_format *format)
 {
-	if(!format->is_ephemeral)
-		return 0;
-	assert(format->dict->name_count == 0);
+	assert(format->is_ephemeral);
 	assert(format->is_temporary);
 	mh_int_t key = mh_tuple_format_put(tuple_formats_hash,
 					   (const struct tuple_format **)&format,
@@ -795,11 +793,11 @@ tuple_format_new(struct tuple_format_vtab *vtab, void *engine,
 	if (tuple_format_create(format, keys, key_count, space_fields,
 				space_field_count) < 0)
 		goto err;
-	if (tuple_format_reuse(&format))
+	if (is_ephemeral && tuple_format_reuse(&format))
 		return format;
 	if (tuple_format_register(format) < 0)
 		goto err;
-	if (tuple_format_add_to_hash(format) < 0) {
+	if (is_ephemeral && tuple_format_add_to_hash(format) < 0) {
 		tuple_format_deregister(format);
 		goto err;
 	}



More information about the Tarantool-patches mailing list