[PATCH 4/4] Allow to reuse tuple_formats for ephemeral spaces

Kirill Yukhin kyukhin at tarantool.org
Thu Jan 24 15:39:28 MSK 2019


Hello Vladimir,

Thanks fore review!
My answers are inlinded.

On 23 Jan 11:38, Vladimir Davydov wrote:
> On Tue, Jan 22, 2019 at 05:07:20PM +0300, Kirill Yukhin wrote:
> > diff --git a/src/box/box.cc b/src/box/box.cc
> > index 9f2fd6d..3799544 100644
> > --- a/src/box/box.cc
> > +++ b/src/box/box.cc
> > @@ -2050,6 +2051,9 @@ box_init(void)
> >  	 */
> >  	session_init();
> >  
> > +	if (tuple_format_init() != 0)
> > +		diag_raise();
> > +
> 
> Please also implement tuple_format_free(). Call it in unit tests.
> Add it to the commented block in box_free() so that we won't forget
> it when we fix box shutdown.

Looks like tuple_format_free() already exists and is invoked by tuple_free().
Moved tuple_format_init() to tuple_init() and added call to mhash destructor
to tuple_format_free().
 
> > diff --git a/src/box/space.c b/src/box/space.c
> > index 4d174f7..316b34b 100644
> > --- a/src/box/space.c
> > +++ b/src/box/space.c
> > @@ -194,10 +194,11 @@ space_new(struct space_def *def, struct rlist *key_list)
> >  struct space *
> >  space_new_ephemeral(struct space_def *def, struct rlist *key_list)
> >  {
> > +	assert(def->opts.is_temporary);
> > +	assert(def->opts.is_ephemeral);
> >  	struct space *space = space_new(def, key_list);
> >  	if (space == NULL)
> >  		return NULL;
> > -	space->def->opts.is_temporary = true;
> 
> Should have been done in patch 2.

Done.

> > diff --git a/src/box/space_def.c b/src/box/space_def.c
> > index 1d2bfcb..c37df24 100644
> > --- a/src/box/space_def.c
> > +++ b/src/box/space_def.c
> > @@ -61,6 +62,7 @@ const struct space_opts space_opts_default = {
> >  const struct opt_def space_opts_reg[] = {
> >  	OPT_DEF("group_id", OPT_UINT32, struct space_opts, group_id),
> >  	OPT_DEF("temporary", OPT_BOOL, struct space_opts, is_temporary),
> > +	OPT_DEF("ephemeral", OPT_BOOL, struct space_opts, is_ephemeral),
> 
> This isn't needed as "ephemeral" space option is purely ephemeral,
> meaning it should never be persisted.

Removed.

> > diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> > index 6edc0fb..5d54e94 100644
> > --- a/src/box/tuple_format.c
> > +++ b/src/box/tuple_format.c
> > @@ -276,7 +278,11 @@ tuple_format_register(struct tuple_format *format)
> >  			formats_capacity = new_capacity;
> >  			tuple_formats = formats;
> >  		}
> > -		if (formats_size == FORMAT_ID_MAX + 1) {
> > +		struct errinj *inj = errinj(ERRINJ_TUPLE_FORMAT_COUNT,
> > +					    ERRINJ_INT);
> > +		if ((inj != NULL && inj->iparam > 0
> > +		     && formats_size >= inj->iparam + 1)
> > +		    || formats_size == FORMAT_ID_MAX + 1) {
> 
> IMO the code would be easier to follow if you rewrote it as follows:
> 
> 		uint32_t formats_size_max = FORMAT_ID_MAX + 1;
> 		struct errinj *inj = errinj(ERRINJ_TUPLE_FORMAT_COUNT,
> 					    ERRINJ_INT);
> 		if (inf != NULL && inj->iparam > 0)
> 			formats_size = inj->iparam;
> 		if (formats_size >= fromats_size_max) {

Applied, thanks.

> > @@ -380,10 +386,77 @@ error:
> >  	return NULL;
> >  }
> >  
> > +static int
> > +tuple_format_cmp(const struct tuple_format *a, const struct tuple_format *b)
> > +{
> > +	if (a->exact_field_count != b->exact_field_count)
> > +		return a->exact_field_count - b->exact_field_count;
> > +	if (tuple_format_field_count(a) != tuple_format_field_count(b))
> > +		return tuple_format_field_count(a) - tuple_format_field_count(b);
> > +
> > +	for (uint32_t i = 0; i < tuple_format_field_count(a); ++i) {
> > +		struct tuple_field *field_a = tuple_format_field(
> > +			(struct tuple_format *)a, i);
> 
> Please don't use const for struct tuple_format to avoid these type
> conversions.

Mhash comparator declaration won't match in that case. So, let's keep it.

> > +		struct tuple_field *field_b = tuple_format_field(
> > +			(struct tuple_format *)b, i);
> > +		if (field_a->type != field_b->type)
> > +			return (int)field_a->type - (int)field_b->type;
> > +		if (field_a->coll_id != field_b->coll_id)
> > +			return (int)field_a->coll_id - (int)field_b->coll_id;
> > +		if (field_a->nullable_action != field_b->nullable_action)
> > +			return (int)field_a->nullable_action -
> > +				(int)field_b->nullable_action;
> > +		if (field_a->is_key_part != field_b->is_key_part)
> > +			return (int)field_a->is_key_part -
> > +				(int)field_b->is_key_part;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static uint32_t
> > +tuple_format_hash(struct tuple_format *format)
> > +{
> > +	uint32_t h = 0;
> 
> 'h' should be initially set to a prime number, say 13.
> Take a look at HASH_SEED in tuple_hash.cc or json.c.

Okay, but I'd prefer 727.

> > +	uint32_t carry = 0;
> > +	uint32_t size = 0;
> > +	for (uint32_t i = 0; i < tuple_format_field_count(format); ++i) {
> > +		struct tuple_field *f = tuple_format_field(format, i);
> > +		PMurHash32_Process(&h, &carry, &f->type,
> > +				   sizeof(enum field_type));
> > +		size += sizeof(enum field_type);
> > +		PMurHash32_Process(&h, &carry, &f->coll_id,
> > +				   sizeof(uint32_t));
> 
> If we change coll_id type from uint32_t to uint16_t, we'll almost
> certainly overlook this place. Please use sizeof(f->coll_id).

Done.

> Also, I think we should define a macro here that would compute a hash
> over a tuple_field struct member and increment the size:
> 
>   tuple_format_hash(...)
>   {
>   #define TUPLE_FIELD_MEMBER_HASH(field, member, h, carry, size) ...
> 
>   for each field
>         TUPLE_FIELD_MEMBER_HASH(f, coll_id, h, carry, size);
>   ...
> 
>   #undef TUPLE_FIELD_MEMBER_HASH
>   }

Done.

> > +		size += sizeof(uint32_t);
> > +		PMurHash32_Process(&h, &carry, &f->nullable_action,
> > +				   sizeof(enum on_conflict_action));
> > +		size += sizeof(enum on_conflict_action);
> > +		PMurHash32_Process(&h, &carry, &f->is_key_part, sizeof(bool));
> > +		size += sizeof(bool);
> > +	}
> > +	return PMurHash32_Result(h, carry, size);
> > +}
> > +
> > +#define MH_SOURCE 1
> > +#define mh_name _tuple_format
> > +#define mh_key_t struct tuple_format *
> > +#define mh_node_t struct tuple_format *
> > +#define mh_arg_t void *
> > +#define mh_hash(a, arg) ((*(a))->hash)
> > +#define mh_hash_key(a, arg) ((a)->hash)
> > +#define mh_cmp(a, b, arg) (tuple_format_cmp(*(a), *(b)))
> > +#define mh_cmp_key(a, b, arg) (tuple_format_cmp((a), *(b)))
> > +#include "salad/mhash.h"
> > +
> > +struct mh_tuple_format_t *tuple_formats_hash = NULL;
> > +
> >  /** Free tuple format resources, doesn't unregister. */
> >  static inline void
> >  tuple_format_destroy(struct tuple_format *format)
> >  {
> > +	mh_int_t key = mh_tuple_format_find(tuple_formats_hash, format, NULL);
> > +	if (key != mh_end(tuple_formats_hash))
> > +		mh_tuple_format_del(tuple_formats_hash, key, NULL);
> 
> Since you add a format to the hash in tuple_format_new(), I guess
> you should remove it from the hash in tuple_format_delete(), not in
> tuple_format_destroy().

Moved.

> > @@ -411,18 +485,46 @@ tuple_format_new(struct tuple_format_vtab *vtab, void *engine,
> >  	format->vtab = *vtab;
> >  	format->engine = engine;
> >  	format->is_temporary = is_temporary;
> > +	format->is_ephemeral = is_ephemeral;
> >  	format->exact_field_count = exact_field_count;
> > -	if (tuple_format_register(format) < 0) {
> > -		tuple_format_destroy(format);
> > -		free(format);
> > -		return NULL;
> > -	}
> >  	if (tuple_format_create(format, keys, key_count, space_fields,
> >  				space_field_count) < 0) {
> >  		tuple_format_delete(format);
> >  		return NULL;
> >  	}
> > -	return format;
> > +	format->hash = tuple_format_hash(format);
> 
> I don't think we should compute the hash if we aren't going to reuse the
> format: the hash function won't work for all formats as it doesn't check
> dict, engine, temporary flag. I'd also add assertions making sure those
> fields are unset if the space is going to be reused.

Done.

> > +	struct errinj *inj = errinj(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT);
> > +	if (inj != NULL && inj->iparam == 200) {
> > +		inj = NULL;
> > +	}
> 
> Looks like this piece left from some preliminary version of the patch.
> Please remove.

Whoops, removed.

> > +	if (format->is_ephemeral) {
> > +		mh_int_t key = mh_tuple_format_find(tuple_formats_hash, format,
> > +						    NULL);
> > +		if (key != mh_end(tuple_formats_hash)) {
> > +			struct tuple_format **entry = mh_tuple_format_node(tuple_formats_hash, key);
> > +			tuple_format_destroy(format);
> > +			free(format);
> > +			return *entry;
> 
> This piece of code should be factored out to a separate function, say
> tuple_format_reuse(), with a comment explaining why we only reuse
> ephemeral formats.

Done.

> > +		} else {
> > +			if (tuple_format_register(format) < 0) {
> > +				tuple_format_destroy(format);
> > +				free(format);
> > +				return NULL;
> > +			}
> > +		        mh_tuple_format_put(tuple_formats_hash,
> > +					    (const
> > +					     struct tuple_format **)&format,
> 
> Splitting type conversion looks ugly. Please don't do that. Better not
> align arguments by parenthesis in such a case:

Re-indented.

> 			mh_tuple_format_put(tuple_formats_hash,
> 				(const struct tuple_format **)&format,
> 				NULL, NULL);
> 
> Also, you forgot to check ENOMEM error here (say hi to #3534 :)

Check added.

> > +					     NULL, NULL);
> > +			return format;
> > +		}
> > +	} else {
> > +		if (tuple_format_register(format) < 0) {
> 
> Please try to rearrange the code somehow so that there's only one place
> in the code where you call tuple_format_register().

Extracted into separate routine.
 
> > diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> > index 9778fda..d347eb7 100644
> > --- a/src/box/tuple_format.h
> > +++ b/src/box/tuple_format.h
> > @@ -151,6 +151,11 @@ struct tuple_format {
> >  	 * in progress.
> >  	 */
> >  	bool is_temporary;
> > +	/**
> > +	 * This format belongs to ephemeral space and thus might
> > +	 * be shared with other ephemeral spaces.
> > +	 */
> > +	bool is_ephemeral;
> >  	/**
> >  	 * Size of field map of tuple in bytes.
> >  	 * \sa struct tuple
> > @@ -193,6 +198,11 @@ struct tuple_format {
> >  	 * tuple_field::token.
> >  	 */
> >  	struct json_tree fields;
> > +
> 
> Extra new line.

Removed.

> > +	/**
> > +	 * Hash key for shared formats of epeheral spaces.
> > +	 */
> > +	uint32_t hash;
> 
> I'd move this definition higher, after 'id'.

Moved.

> > @@ -200,7 +210,7 @@ struct tuple_format {
> >   * a given format.
> >   */
> >  static inline uint32_t
> > -tuple_format_field_count(struct tuple_format *format)
> > +tuple_format_field_count(const struct tuple_format *format)
> 
> Please don't do that. I deliberately avoid using const for tuple_format
> as it uses reference counting and json tree, which don't play nicely
> with const.

Mhash requires it for comparator which in turn uses the routine. Let's keep
it.

> >  {
> >  	const struct json_token *root = &format->fields.root;
> >  	return root->children != NULL ? root->max_child_idx + 1 : 0;
> > @@ -263,6 +273,7 @@ tuple_format_unref(struct tuple_format *format)
> >   * @param key_count The number of keys in @a keys array.
> >   * @param space_fields Array of fields, defined in a space format.
> >   * @param space_field_count Length of @a space_fields.
> > + * @param is_ephemeral Set if format belongs to ephemeral space.
> 
> is_ephemeral goes after is_temporary in the argument list.

Moved.

> > @@ -459,6 +471,13 @@ tuple_field_by_part_raw(struct tuple_format *format, const char *data,
> >  	return tuple_field_raw(format, data, field_map, part->fieldno);
> >  }
> >  
> > +/**
> > + * Initialize tuple formats.
> 
> I'd rather say 'Initialize tuple format subsystem' or 'Initialize tuple
> format library'.

Pasted.

> > diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua
> > index fa7f9f2..0b50864 100644
> > --- a/test/sql/errinj.test.lua
> > +++ b/test/sql/errinj.test.lua
> > @@ -5,6 +5,20 @@ box.sql.execute('pragma sql_default_engine=\''..engine..'\'')
> >  errinj = box.error.injection
> >  fiber = require('fiber')
> >  
> > +-- gh-3924 Check that tuple_formats of ephemeral spaces are
> > +-- reused.
> > +format = {}
> 
> This assignment isn't needed. Please remove.

Removed.

--
Regards, Kirill Yukhin



More information about the Tarantool-patches mailing list