[tarantool-patches] Re: [PATCH v1 2/5] box: introduce slot_cache in key_part

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Thu Aug 9 01:03:15 MSK 2018


Thanks for the patch! See 8 comments below.

> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
> index e53afba..6d34c60 100644
> --- a/src/box/tuple_compare.cc
> +++ b/src/box/tuple_compare.cc
> @@ -457,7 +457,7 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
>   	assert(!has_optional_parts || is_nullable);
>   	assert(is_nullable == key_def->is_nullable);
>   	assert(has_optional_parts == key_def->has_optional_parts);
> -	const struct key_part *part = key_def->parts;
> +	struct key_part *part = (struct key_part *)key_def->parts;

1. Why?

>   	const char *tuple_a_raw = tuple_data(tuple_a);
>   	const char *tuple_b_raw = tuple_data(tuple_b);
>   	if (key_def->part_count == 1 && part->fieldno == 0) {
> @@ -484,10 +484,6 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
>   	}
>   
>   	bool was_null_met = false;
> -	const struct tuple_format *format_a = tuple_format(tuple_a);
> -	const struct tuple_format *format_b = tuple_format(tuple_b);
> -	const uint32_t *field_map_a = tuple_field_map(tuple_a);
> -	const uint32_t *field_map_b = tuple_field_map(tuple_b);

2. These things were needed to get formats and tuples data once. Now you
lookup format and raw data on each part in the cycle below. So it is
slower in part_count times. See below the main comment.

>   	const struct key_part *end;
>   	const char *field_a, *field_b;
>   	enum mp_type a_type, b_type;
> @@ -497,11 +493,10 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
>   	else
>   		end = part + key_def->part_count;
>   
> -	for (; part < end; part++) {
> -		field_a = tuple_field_raw(format_a, tuple_a_raw, field_map_a,
> -					  part->fieldno);
> -		field_b = tuple_field_raw(format_b, tuple_b_raw, field_map_b,
> -					  part->fieldno);
> +	uint32_t i = 0;
> +	for (; part < end; part++, i++) {
> +		field_a = tuple_field_by_part(tuple_a, key_def, i);
> +		field_b = tuple_field_by_part(tuple_b, key_def, i);

3. As I said you verbally, when we were discussing how to implement
comparators, you do not need to use slot cache, epochs and other
complex JSON things to compare flat index keys, with no JSON paths. You
should add a new template parameter to tuple_compare_slowpath like
'is_flat' or something, and if it is false, then use regular tuple_field
or tuple_field_raw. If true, then try to use slot cache, epoch etc.
Comparators are chosen once when key_def is created and template 'if'
is compile time, so these 'if's will cost nothing.

Now tuple_field_by_part is much slower than tuple_field_raw - it makes
additional lookup of a format (from a huge array of tuple formats),
of tuple data (additional call), field map (one more call). Further
both tuple_field_by_part and tuple_field_raw do 3 'if's and lookup
in field_map. But tuple_field_raw also updates slot_cache and
format_epoch - it is +2 assignments. So tuple_field_raw is faster.
In this cycle in the summary it is in part_count times faster.

So please, do not use tuple_field_by_part for flat indexes. It has no
profit for them.

All the same about key extraction and hash.

>   		assert(has_optional_parts ||
>   		       (field_a != NULL && field_b != NULL));
>   		if (! is_nullable) {
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 2e19d2e..40cd48a 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -29,12 +29,15 @@
>    * SUCH DAMAGE.
>    */
>   #include "json/path.h"
> +#include "tuple.h"

4. Tuple format should not depend on tuple. Vice versa. Please,
consult other tuple_field methods how to solve this dependency
cycle.

>   #include "tuple_format.h"
>   
>   /** Global table of tuple formats */
>   struct tuple_format **tuple_formats;
>   static intptr_t recycled_format_ids = FORMAT_ID_NIL;
>   
> +static uint64_t format_epoch = 1;

5. Looks like you've ignored my point that a global format
epoch will expire epoch in all formats when just a one
space is altered. Please, do not. Epoch should be one per
space.

> +
>   static uint32_t formats_size = 0, formats_capacity = 0;
>   
>   static const struct tuple_field tuple_field_default = {
> @@ -69,6 +72,7 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
>   		format->fields[i] = tuple_field_default;
>   
>   	int current_slot = 0;
> +	bool format_epoch_changed = false;

6. Prefix flags with 'is_'.

>   
>   	/* extract field type info */
>   	for (uint16_t key_no = 0; key_no < key_count; ++key_no) {> @@ -542,6 +551,40 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
>   	return -1;
>   }
>   
> +const char *
> +tuple_field_by_part(const struct tuple *tuple, const struct key_def *def,
> +		    uint32_t idx)
> +{
> +	struct key_part *part = (struct key_part *)&def->parts[idx];
> +	const struct tuple_format *format = tuple_format(tuple);
> +	const char *data = tuple_data(tuple);
> +	const uint32_t *field_map = tuple_field_map(tuple);
> +
> +	uint32_t field_no = part->fieldno;
> +	if (likely(field_no < format->index_field_count)) {
> +		assert(format->epoch > 0);
> +		int32_t offset_slot = TUPLE_OFFSET_SLOT_NIL;
> +		if (part->format_epoch != format->epoch) {
> +			offset_slot = format->fields[field_no].offset_slot;
> +			if (format->epoch > part->format_epoch) {
> +				part->slot_cache = offset_slot;
> +				part->format_epoch = format->epoch;
> +			}
> +		}
> +		if (offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> +			return field_map[offset_slot] != 0 ?
> +			       data + field_map[offset_slot] : NULL;
> +		}
> +	}
> +	ERROR_INJECT(ERRINJ_TUPLE_FIELD, return NULL);

7. Why do you need it?

> +	uint32_t field_count = mp_decode_array(&data);
> +	if (unlikely(field_no >= field_count))
> +		return NULL;
> +	for (uint32_t k = 0; k < field_no; k++)
> +		mp_next(&data);
> +	return data;
> +}
> +
>   int
>   tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
>                           const uint32_t *field_map, const char *path,
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index c7dc48f..3cb1284 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -115,6 +115,8 @@ struct tuple_field {
>    * Tuple format describes how tuple is stored and information about its fields
>    */
>   struct tuple_format {
> +	/** Tuple format epoch. */

8. Too small comment for such complex feature.

> +	uint64_t epoch;
>   	/** Virtual function table */
>   	struct tuple_format_vtab vtab;
>   	/** Pointer to engine-specific data. */




More information about the Tarantool-patches mailing list