[tarantool-patches] Re: [PATCH v2 2/5] box: introduce slot_cache in key_part

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Aug 22 03:27:00 MSK 2018


Hi! Thanks for the fixes! See 15 comments below.

On 15/08/2018 15:15, Kirill Shcherbatov wrote:
> Same key_part could be used in different formats multiple
> times, so different field->offset_slot would be allocated.
> In most scenarios we work with series of tuples of same
> format, and (in general) format lookup for field would be
> expensive operation for JSON-paths defined in key_part.
> New slot_cache field in key_part structure and epoch-based
> mechanism to validate it actuality should be effective
> approach to improve performance.
> New routine tuple_field_by_part use tuple and key_part
> to access field that allows to rework and speedup all
> scenarios of access tuple data by index.
> This also allows to work with JSON-path key_parts later.
> 
> Part of #1012.
> ---
>   src/box/alter.cc             |  4 ++
>   src/box/key_def.c            |  2 +
>   src/box/key_def.h            |  4 ++
>   src/box/memtx_bitset.c       |  8 +++-
>   src/box/memtx_rtree.c        |  6 ++-
>   src/box/space.c              | 25 +++++++++++++
>   src/box/space.h              | 10 +++++
>   src/box/tuple_compare.cc     | 88 ++++++++++++++++++++++++++++++++------------
>   src/box/tuple_extract_key.cc | 39 ++++++++++++++------
>   src/box/tuple_format.c       | 12 ++++++
>   src/box/tuple_format.h       | 19 ++++++++++
>   src/box/tuple_hash.cc        | 49 +++++++++++++++++++-----
>   src/box/vy_stmt.h            |  6 ++-
>   13 files changed, 224 insertions(+), 48 deletions(-)
> 
> diff --git a/src/box/alter.cc b/src/box/alter.cc
> index 3007a13..e1a0d9c 100644
> --- a/src/box/alter.cc
> +++ b/src/box/alter.cc
> @@ -887,6 +887,10 @@ alter_space_do(struct txn *txn, struct alter_space *alter)
>   	alter->new_space->sequence = alter->old_space->sequence;
>   	memcpy(alter->new_space->access, alter->old_space->access,
>   	       sizeof(alter->old_space->access));
> +	space_format_update_epoch(alter->new_space,
> +				  alter->old_space->format != NULL ?
> +				  alter->old_space->format->epoch : 0,
> +				  &alter->key_list);
>   

1. As I remember, we already had this talk during SQL triggers
development, but lets repeat. Please, do not put any operation
specific things onto the main path. Either use ModifySpace,
ModifyIndex, RebuildIndex, CreateIndex, DropIndex etc classes,
or change ModifySpace so that it can be called from _index
trigger, or introduce ModifySpaceFormat or something.
Alter_space_do should not be changed.


>   	/*
>   	 * Build new indexes, check if tuples conform to
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index ee09dc9..8a4262b 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -258,6 +258,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
>   	def->parts[part_no].type = type;
>   	def->parts[part_no].coll = coll;
>   	def->parts[part_no].coll_id = coll_id;
> +	def->parts[part_no].slot_cache = TUPLE_OFFSET_SLOT_NIL;
> +	def->parts[part_no].format_epoch = 0;
>   	column_mask_set_fieldno(&def->column_mask, fieldno);
>   	/**

2. I do not see where do you update cmp_def in this file. Exactly
cmp_def is used in comparators, not key_def.

>   	 * When all parts are set, initialize the tuple
> diff --git a/src/box/key_def.h b/src/box/key_def.h
> index aecbe03..42c054c 100644
> --- a/src/box/key_def.h
> +++ b/src/box/key_def.h
> @@ -74,6 +74,10 @@ struct key_part {
>   	struct coll *coll;
>   	/** True if a part can store NULLs. */
>   	bool is_nullable;
> +	/** Format epoch for slot_cache. */
> +	uint64_t format_epoch;
> +	/** Cache for corresponding tuple_format slot_offset. */
> +	int32_t slot_cache;

3. From these comments and names it is very hard to understand
anything.
3.1. When I grep 'slot_offset' I see the only match here. For a
novice it would be difficult to find its origin.
3.2. I believe key_part should not depend on a format even in
member names. Please, rename 'format_epoch' to offset_slot_epoch.
3.3. slot_cache -> offset_slot.
3.4. In comments, please, describe what is epoch, where are
offset_slot and epoch from? How epoch works?

>   };
>   
>   struct key_def;
> diff --git a/src/box/space.c b/src/box/space.c
> index 871cc67..c34428a 100644
> --- a/src/box/space.c
> +++ b/src/box/space.c
> @@ -226,6 +226,31 @@ space_index_key_def(struct space *space, uint32_t id)
>   }
>   
>   void
> +space_format_update_epoch(struct space *space, uint64_t last_epoch,
> +			  struct rlist *key_list)
> +{
> +	struct tuple_format *format = space->format;
> +	if (format == NULL)
> +		return;

4. This SQLite style of NULLs handling is not ok, I think. Just
do not call this function, when the format is NULL.

5. This function is a tuple format method and should not take
a space in the arguments. Please, pass the format without space
intermediation, and put it into tuple_format.c/.h.

> +	bool is_format_epoch_changed = false;
> +	struct index_def *index_def;
> +	rlist_foreach_entry(index_def, key_list, link) {
> +		struct key_part *part = index_def->key_def->parts;
> +		struct key_part *parts_end =
> +			part + index_def->key_def->part_count;
> +		for (; part < parts_end; part++) {
> +			struct tuple_field *field =
> +				&format->fields[part->fieldno];
> +			if (field->offset_slot != part->slot_cache)
> +				is_format_epoch_changed = true;
> +		}
> +	}
> +	format->epoch = last_epoch;
> +	if (is_format_epoch_changed)
> +		format->epoch++;
> +}
> +
> +void
>   generic_space_swap_index(struct space *old_space, struct space *new_space,
>   			 uint32_t old_index_id, uint32_t new_index_id)
>   {
> diff --git a/src/box/space.h b/src/box/space.h
> index 8888ec8..8d13bc8 100644
> --- a/src/box/space.h
> +++ b/src/box/space.h
> @@ -228,6 +228,16 @@ struct key_def *
>   space_index_key_def(struct space *space, uint32_t id);
>   
>   /**
> + * Setup space format epoch value.
> + * @param space to update.

6. Parameter name and description shall not be mixed.

> + * @param last_epoch last space epo

7. Typo: 'epo'.

> + * @param key_list list of index_defs.

8. List of which index_defs? Old, new? Besides,
as I see in the alter code, neither of them. Alter
can drop some index_defs from this list before you
update the space_format, for example, via
DropIndex::alter_def.

> + */
> +void
> +space_format_update_epoch(struct space *space, uint64_t last_epoch,
> +			  struct rlist *key_list);
> +
> +/**
>    * Look up the index by id.
>    */
>   static inline struct index *
> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
> index e53afba..f07b695 100644
> --- a/src/box/tuple_compare.cc
> +++ b/src/box/tuple_compare.cc
> @@ -432,9 +432,17 @@ tuple_common_key_parts(const struct tuple *tuple_a,
>   {
>   	uint32_t i;
>   	for (i = 0; i < key_def->part_count; i++) {
> -		const struct key_part *part = &key_def->parts[i];
> -		const char *field_a = tuple_field(tuple_a, part->fieldno);
> -		const char *field_b = tuple_field(tuple_b, part->fieldno);
> +		struct key_part *part = (struct key_part *)&key_def->parts[i];
> +		const char *field_a =
> +			tuple_field_by_part(tuple_format(tuple_a),
> +					    tuple_data(tuple_a),
> +					    tuple_field_map(tuple_a),
> +					    part);
> +		const char *field_b =
> +			tuple_field_by_part(tuple_format(tuple_b),
> +					    tuple_data(tuple_b),
> +					    tuple_field_map(tuple_b),
> +					    part);

9. Do not call tuple_format on each iteration. It can not change
in between. Same about tuple_data, tuple_field_map. As I said in
the previous review, these functions are not free.

>   		enum mp_type a_type = field_a != NULL ?
>   				      mp_typeof(*field_a) : MP_NIL;
>   		enum mp_type b_type = field_b != NULL ?
> @@ -498,10 +506,21 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
>   		end = part + key_def->part_count;
>   
>   	for (; part < end; part++) {
> -		field_a = tuple_field_raw(format_a, tuple_a_raw, field_map_a,
> -					  part->fieldno);
> -		field_b = tuple_field_raw(format_b, tuple_b_raw, field_map_b,
> -					  part->fieldno);
> +		if (is_flat) {
> +			field_a = tuple_field_raw(format_a, tuple_a_raw,
> +						  field_map_a,
> +						  part->fieldno);
> +			field_b = tuple_field_raw(format_b, tuple_b_raw,
> +						  field_map_b,
> +						  part->fieldno);
> +		} else {
> +			field_a = tuple_field_by_part(format_a, tuple_a_raw,
> +						      field_map_a,
> +						      (struct key_part *)part);
> +			field_b = tuple_field_by_part(format_b, tuple_b_raw,
> +						      field_map_b,
> +						      (struct key_part *)part);

10. As you see, any function retrieving tuple field from const char * has
suffix '_raw'. Please, rename tuple_field_by_part to tuple_field_by_part_raw.

> +		}
>   		assert(has_optional_parts ||
>   		       (field_a != NULL && field_b != NULL));
>   		if (! is_nullable) {
> diff --git a/src/box/tuple_extract_key.cc b/src/box/tuple_extract_key.cc
> index 880abb6..d95ee8d 100644
> --- a/src/box/tuple_extract_key.cc
> +++ b/src/box/tuple_extract_key.cc
> @@ -110,9 +110,15 @@ tuple_extract_key_slowpath(const struct tuple *tuple,
>   
>   	/* Calculate the key size. */
>   	for (uint32_t i = 0; i < part_count; ++i) {
> -		const char *field =
> -			tuple_field_raw(format, data, field_map,
> -					key_def->parts[i].fieldno);
> +		const char *field;
> +		if (is_flat) {
> +			field = tuple_field_raw(format, data, field_map,
> +						key_def->parts[i].fieldno);
> +		} else {
> +			field = tuple_field_by_part(format, data, field_map,
> +						    (struct key_part *)
> +							&key_def->parts[i]);

10. Broken indentation. It is not a single place. Please, fix in others too.

> +		}
>   		if (has_optional_parts && field == NULL) {
>   			bsize += mp_sizeof_nil();
>   			continue;> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index c7dc48f..a989917 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -324,6 +329,20 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
>   		     const char *tuple);
>   
>   /**
> + * Get a field refereed by multipart index @def part @idx in
> + * @tuple.

11. All the tree parameter names (def, idx, tuple) do not
exist in this function.

> + *
> + * @param format tuple format
> + * @param tuple a pointer to MessagePack array
> + * @param field_map a pointer to the LAST element of field map
> + * @param part multipart index part to use.

12. Please, start a sentence with a capital letter and finish
with the dot. In other places and commits too.

> + * @retval field data if field exists or NULL
> + */
> +const char *
> +tuple_field_by_part(const struct tuple_format *format, const char *data,
> +		    const uint32_t *field_map, struct key_part *part);
> +
> +/**
>    * Get a field at the specific position in this MessagePack array.
>    * Returns a pointer to MessagePack data.
>    * @param format tuple format
> diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc
> index dee9be3..272e814 100644
> --- a/src/box/tuple_hash.cc
> +++ b/src/box/tuple_hash.cc
> @@ -157,7 +157,11 @@ struct TupleHash
>   		uint32_t h = HASH_SEED;
>   		uint32_t carry = 0;
>   		uint32_t total_size = 0;
> -		const char *field = tuple_field(tuple, key_def->parts->fieldno);
> +		const char *field =
> +			tuple_field_by_part(tuple_format(tuple),
> +					    tuple_data(tuple),
> +					    tuple_field_map(tuple),
> +					    (struct key_part *)key_def->parts);

13. Why do you need this cast to struct key_part *? I removed it
and nothing changed.

>   		TupleFieldHash<TYPE, MORE_TYPES...>::
>   			hash(&field, &h, &carry, &total_size);
>   		return PMurHash32_Result(h, carry, total_size);
> @@ -312,13 +320,16 @@ tuple_hash_key_part(uint32_t *ph1, uint32_t *pcarry,
>   		    const struct tuple *tuple,
>   		    const struct key_part *part)
>   {
> -	const char *field = tuple_field(tuple, part->fieldno);
> +	const char *field =
> +		tuple_field_by_part(tuple_format(tuple), tuple_data(tuple),
> +				    tuple_field_map(tuple),
> +				    (struct key_part *)part);

14. Why do you need non-const struct key_part in
tuple_field_by_part?

>   	if (field == NULL)
>   		return tuple_hash_null(ph1, pcarry);
>   	return tuple_hash_field(ph1, pcarry, &field, part->coll);
>   }
>   
> -template <bool has_optional_parts>
> +template <bool has_optional_parts, bool is_flat>
>   uint32_t
>   tuple_hash_slowpath(const struct tuple *tuple, const struct key_def *key_def)
>   {
> @@ -327,7 +338,15 @@ tuple_hash_slowpath(const struct tuple *tuple, const struct key_def *key_def)
>   	uint32_t carry = 0;
>   	uint32_t total_size = 0;
>   	uint32_t prev_fieldno = key_def->parts[0].fieldno;
> -	const char *field = tuple_field(tuple, key_def->parts[0].fieldno);
> +	const char *field;
> +	if (is_flat) {
> +		field = tuple_field(tuple, prev_fieldno);
> +	} else {
> +		field = tuple_field_by_part(tuple_format(tuple),
> +					    tuple_data(tuple),
> +					    tuple_field_map(tuple),
> +					    (struct key_part *)&key_def->parts);
> +	}
>   	const char *end = (char *)tuple + tuple_size(tuple);
>   	if (has_optional_parts && field == NULL) {
>   		total_size += tuple_hash_null(&h, &carry);
> @@ -341,7 +360,19 @@ tuple_hash_slowpath(const struct tuple *tuple, const struct key_def *key_def)
>   		 * need of tuple_field
>   		 */
>   		if (prev_fieldno + 1 != key_def->parts[part_id].fieldno) {
> -			field = tuple_field(tuple, key_def->parts[part_id].fieldno);
> +			if (is_flat) {
> +				field = tuple_field(tuple,
> +						    key_def->parts[part_id].
> +						    fieldno);
> +			} else {
> +				struct key_part *part =
> +					(struct key_part *)
> +					&key_def->parts[part_id];
> +				field = tuple_field_by_part(tuple_format(tuple),
> +							    tuple_data(tuple),
> +							    tuple_field_map(tuple),
> +							    part);
> +			}
>   		}

15. Please, do not call tuple_format, tuple_data and tuple_field_map
multiple times.

>   		if (has_optional_parts && (field == NULL || field >= end)) {
>   			total_size += tuple_hash_null(&h, &carry);




More information about the Tarantool-patches mailing list