Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
To: tarantool-patches@freelists.org,
	Kirill Shcherbatov <kshcherbatov@tarantool.org>
Subject: [tarantool-patches] Re: [PATCH v1 2/5] box: introduce slot_cache in key_part
Date: Thu, 9 Aug 2018 01:03:15 +0300	[thread overview]
Message-ID: <ef2cf1f5-b439-34c3-49a3-a004f4079922@tarantool.org> (raw)
In-Reply-To: <14a46618ad51e12394c1f578adbd8874b49ce93c.1533558332.git.kshcherbatov@tarantool.org>

Thanks for the patch! See 8 comments below.

> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
> index e53afba..6d34c60 100644
> --- a/src/box/tuple_compare.cc
> +++ b/src/box/tuple_compare.cc
> @@ -457,7 +457,7 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
>   	assert(!has_optional_parts || is_nullable);
>   	assert(is_nullable == key_def->is_nullable);
>   	assert(has_optional_parts == key_def->has_optional_parts);
> -	const struct key_part *part = key_def->parts;
> +	struct key_part *part = (struct key_part *)key_def->parts;

1. Why?

>   	const char *tuple_a_raw = tuple_data(tuple_a);
>   	const char *tuple_b_raw = tuple_data(tuple_b);
>   	if (key_def->part_count == 1 && part->fieldno == 0) {
> @@ -484,10 +484,6 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
>   	}
>   
>   	bool was_null_met = false;
> -	const struct tuple_format *format_a = tuple_format(tuple_a);
> -	const struct tuple_format *format_b = tuple_format(tuple_b);
> -	const uint32_t *field_map_a = tuple_field_map(tuple_a);
> -	const uint32_t *field_map_b = tuple_field_map(tuple_b);

2. These things were needed to get formats and tuples data once. Now you
lookup format and raw data on each part in the cycle below. So it is
slower in part_count times. See below the main comment.

>   	const struct key_part *end;
>   	const char *field_a, *field_b;
>   	enum mp_type a_type, b_type;
> @@ -497,11 +493,10 @@ tuple_compare_slowpath(const struct tuple *tuple_a, const struct tuple *tuple_b,
>   	else
>   		end = part + key_def->part_count;
>   
> -	for (; part < end; part++) {
> -		field_a = tuple_field_raw(format_a, tuple_a_raw, field_map_a,
> -					  part->fieldno);
> -		field_b = tuple_field_raw(format_b, tuple_b_raw, field_map_b,
> -					  part->fieldno);
> +	uint32_t i = 0;
> +	for (; part < end; part++, i++) {
> +		field_a = tuple_field_by_part(tuple_a, key_def, i);
> +		field_b = tuple_field_by_part(tuple_b, key_def, i);

3. As I said you verbally, when we were discussing how to implement
comparators, you do not need to use slot cache, epochs and other
complex JSON things to compare flat index keys, with no JSON paths. You
should add a new template parameter to tuple_compare_slowpath like
'is_flat' or something, and if it is false, then use regular tuple_field
or tuple_field_raw. If true, then try to use slot cache, epoch etc.
Comparators are chosen once when key_def is created and template 'if'
is compile time, so these 'if's will cost nothing.

Now tuple_field_by_part is much slower than tuple_field_raw - it makes
additional lookup of a format (from a huge array of tuple formats),
of tuple data (additional call), field map (one more call). Further
both tuple_field_by_part and tuple_field_raw do 3 'if's and lookup
in field_map. But tuple_field_raw also updates slot_cache and
format_epoch - it is +2 assignments. So tuple_field_raw is faster.
In this cycle in the summary it is in part_count times faster.

So please, do not use tuple_field_by_part for flat indexes. It has no
profit for them.

All the same about key extraction and hash.

>   		assert(has_optional_parts ||
>   		       (field_a != NULL && field_b != NULL));
>   		if (! is_nullable) {
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 2e19d2e..40cd48a 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -29,12 +29,15 @@
>    * SUCH DAMAGE.
>    */
>   #include "json/path.h"
> +#include "tuple.h"

4. Tuple format should not depend on tuple. Vice versa. Please,
consult other tuple_field methods how to solve this dependency
cycle.

>   #include "tuple_format.h"
>   
>   /** Global table of tuple formats */
>   struct tuple_format **tuple_formats;
>   static intptr_t recycled_format_ids = FORMAT_ID_NIL;
>   
> +static uint64_t format_epoch = 1;

5. Looks like you've ignored my point that a global format
epoch will expire epoch in all formats when just a one
space is altered. Please, do not. Epoch should be one per
space.

> +
>   static uint32_t formats_size = 0, formats_capacity = 0;
>   
>   static const struct tuple_field tuple_field_default = {
> @@ -69,6 +72,7 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
>   		format->fields[i] = tuple_field_default;
>   
>   	int current_slot = 0;
> +	bool format_epoch_changed = false;

6. Prefix flags with 'is_'.

>   
>   	/* extract field type info */
>   	for (uint16_t key_no = 0; key_no < key_count; ++key_no) {> @@ -542,6 +551,40 @@ tuple_field_go_to_key(const char **field, const char *key, int len)
>   	return -1;
>   }
>   
> +const char *
> +tuple_field_by_part(const struct tuple *tuple, const struct key_def *def,
> +		    uint32_t idx)
> +{
> +	struct key_part *part = (struct key_part *)&def->parts[idx];
> +	const struct tuple_format *format = tuple_format(tuple);
> +	const char *data = tuple_data(tuple);
> +	const uint32_t *field_map = tuple_field_map(tuple);
> +
> +	uint32_t field_no = part->fieldno;
> +	if (likely(field_no < format->index_field_count)) {
> +		assert(format->epoch > 0);
> +		int32_t offset_slot = TUPLE_OFFSET_SLOT_NIL;
> +		if (part->format_epoch != format->epoch) {
> +			offset_slot = format->fields[field_no].offset_slot;
> +			if (format->epoch > part->format_epoch) {
> +				part->slot_cache = offset_slot;
> +				part->format_epoch = format->epoch;
> +			}
> +		}
> +		if (offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> +			return field_map[offset_slot] != 0 ?
> +			       data + field_map[offset_slot] : NULL;
> +		}
> +	}
> +	ERROR_INJECT(ERRINJ_TUPLE_FIELD, return NULL);

7. Why do you need it?

> +	uint32_t field_count = mp_decode_array(&data);
> +	if (unlikely(field_no >= field_count))
> +		return NULL;
> +	for (uint32_t k = 0; k < field_no; k++)
> +		mp_next(&data);
> +	return data;
> +}
> +
>   int
>   tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
>                           const uint32_t *field_map, const char *path,
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index c7dc48f..3cb1284 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -115,6 +115,8 @@ struct tuple_field {
>    * Tuple format describes how tuple is stored and information about its fields
>    */
>   struct tuple_format {
> +	/** Tuple format epoch. */

8. Too small comment for such complex feature.

> +	uint64_t epoch;
>   	/** Virtual function table */
>   	struct tuple_format_vtab vtab;
>   	/** Pointer to engine-specific data. */

  reply	other threads:[~2018-08-08 22:03 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-06 12:26 [tarantool-patches] [PATCH v1 0/5] box: indexes by JSON path Kirill Shcherbatov
2018-08-06 12:26 ` [tarantool-patches] [PATCH v1 1/5] rfc: describe a Tarantool JSON indexes Kirill Shcherbatov
2018-08-06 12:26 ` [tarantool-patches] [PATCH v1 2/5] box: introduce slot_cache in key_part Kirill Shcherbatov
2018-08-08 22:03   ` Vladislav Shpilevoy [this message]
2018-08-15 12:14     ` [tarantool-patches] " Kirill Shcherbatov
2018-08-06 12:27 ` [tarantool-patches] [PATCH v1 3/5] box: introduce path field " Kirill Shcherbatov
2018-08-08 22:03   ` [tarantool-patches] " Vladislav Shpilevoy
2018-08-15 12:14     ` Kirill Shcherbatov
2018-08-06 12:27 ` [tarantool-patches] [PATCH v1 4/5] box: introduce path_hash and tuple_field tree Kirill Shcherbatov
2018-08-08 22:03   ` [tarantool-patches] " Vladislav Shpilevoy
2018-08-15 12:14     ` Kirill Shcherbatov
2018-08-06 12:27 ` [tarantool-patches] [PATCH v1 5/5] box: specify indexes in user-friendly form Kirill Shcherbatov
2018-08-08 22:03 ` [tarantool-patches] Re: [PATCH v1 0/5] box: indexes by JSON path Vladislav Shpilevoy
2018-08-15 12:14   ` Kirill Shcherbatov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ef2cf1f5-b439-34c3-49a3-a004f4079922@tarantool.org \
    --to=v.shpilevoy@tarantool.org \
    --cc=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='[tarantool-patches] Re: [PATCH v1 2/5] box: introduce slot_cache in key_part' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox