Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: Kirill Shcherbatov <kshcherbatov@tarantool.org>
Cc: tarantool-patches@freelists.org, kostja@tarantool.org
Subject: Re: [PATCH v7 4/5] box: introduce offset_slot cache in key_part
Date: Thu, 10 Jan 2019 14:28:38 +0300	[thread overview]
Message-ID: <20190110112838.3pwcibq4f656al5t@esperanza> (raw)
In-Reply-To: <6dacf60b2af94fff454c5c5603224eea5e44be2f.1547022001.git.kshcherbatov@tarantool.org>

On Wed, Jan 09, 2019 at 11:29:39AM +0300, Kirill Shcherbatov wrote:
> diff --git a/src/box/key_def.c b/src/box/key_def.c
> index 3012b05df..328954296 100644
> --- a/src/box/key_def.c
> +++ b/src/box/key_def.c
> @@ -139,7 +139,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
>  		 enum field_type type, enum on_conflict_action nullable_action,
>  		 struct coll *coll, uint32_t coll_id,
>  		 enum sort_order sort_order, const char *path,
> -		 uint32_t path_len, char **paths)
> +		 uint32_t path_len, char **paths, int32_t offset_slot_cache,
> +		 uint64_t format_epoch)
>  {
>  	assert(part_no < def->part_count);
>  	assert(type < field_type_MAX);
> @@ -151,6 +152,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
>  	def->parts[part_no].coll = coll;
>  	def->parts[part_no].coll_id = coll_id;
>  	def->parts[part_no].sort_order = sort_order;
> +	def->parts[part_no].offset_slot_cache = offset_slot_cache;
> +	def->parts[part_no].format_epoch = format_epoch;
>  	if (path != NULL) {
>  		assert(paths != NULL);
>  		def->parts[part_no].path = *paths;
> @@ -198,7 +201,8 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)
>  		uint32_t path_len = part->path != NULL ? strlen(part->path) : 0;
>  		key_def_set_part(def, i, part->fieldno, part->type,
>  				 part->nullable_action, coll, part->coll_id,
> -				 part->sort_order, part->path, path_len, &paths);
> +				 part->sort_order, part->path, path_len, &paths,
> +				 TUPLE_OFFSET_SLOT_NIL, 0);
>  	}
>  	key_def_set_cmp(def);
>  	return def;
> @@ -251,7 +255,7 @@ box_key_def_new(uint32_t *fields, uint32_t *types, uint32_t part_count)
>  				 (enum field_type)types[item],
>  				 ON_CONFLICT_ACTION_DEFAULT,
>  				 NULL, COLL_NONE, SORT_ORDER_ASC, NULL, 0,
> -				 NULL);
> +				 NULL, TUPLE_OFFSET_SLOT_NIL, 0);
>  	}
>  	key_def_set_cmp(key_def);
>  	return key_def;
> @@ -676,7 +680,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
>  		key_def_set_part(new_def, pos++, part->fieldno, part->type,
>  				 part->nullable_action, part->coll,
>  				 part->coll_id, part->sort_order, part->path,
> -				 part->path_len, &paths);
> +				 part->path_len, &paths, part->offset_slot_cache,
> +				 part->format_epoch);
>  	}
>  
>  	/* Set-append second key def's part to the new key def. */
> @@ -688,7 +693,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
>  		key_def_set_part(new_def, pos++, part->fieldno, part->type,
>  				 part->nullable_action, part->coll,
>  				 part->coll_id, part->sort_order, part->path,
> -				 part->path_len, &paths);
> +				 part->path_len, &paths, part->offset_slot_cache,
> +				 part->format_epoch);
>  	}
>  	key_def_set_cmp(new_def);
>  	return new_def;
> diff --git a/src/box/key_def.h b/src/box/key_def.h
> index c6b7a8c74..54e3dca89 100644
> --- a/src/box/key_def.h
> +++ b/src/box/key_def.h
> @@ -96,6 +96,17 @@ struct key_part {
>  	char *path;
>  	/** The length of JSON path. */
>  	uint32_t path_len;
> +	/** Cached format epoch. */

Not quite clear comment, please elaborate. May be

	Epoch of the tuple format the offset slot cached in this
	part is valid for, see tuple_format::epoch.

?

> +	uint64_t format_epoch;
> +	/**
> +	 * Cached value of the offset slot corresponding to
> +	 * the indexed field (tuple_field::offset_slot).
> +	 * Valid only if key_def::epoch equals the epoch of
> +	 * the tuple format. Updated in
> +	 * tuple_field_by_part_slowpath to always store the

This is nitpicking, but still please try to make the comments look
rectangular with no abrupt line breaks. Rephrase it if necessary or
make other lines a few characters longer or shorter.

In this case you could use tuple_field_by_part() instead of
tuple_field_by_part_slowpath() - it would still be clear, but the
comment would look better :-)

> +	 * offset corresponding to the last used tuple format.
> +	 */
> +	int32_t offset_slot_cache;
>  };
>  
>  struct key_def;
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index 401752654..9178c2722 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -40,6 +40,7 @@ struct tuple_format **tuple_formats;
>  static intptr_t recycled_format_ids = FORMAT_ID_NIL;
>  
>  static uint32_t formats_size = 0, formats_capacity = 0;
> +static uint64_t formats_epoch = 0;
>  
>  static struct tuple_field *
>  tuple_field_new(void)
> @@ -599,6 +600,7 @@ tuple_format_new(struct tuple_format_vtab *vtab, struct key_def * const *keys,
>  	format->vtab = *vtab;
>  	format->engine = NULL;
>  	format->is_temporary = false;
> +	format->epoch = ++formats_epoch;
>  	if (tuple_format_register(format) < 0) {
>  		tuple_format_destroy(format);
>  		free(format);
> @@ -1105,6 +1107,7 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
>  	part.fieldno = fieldno;
>  	part.path = (char *)path + lexer.offset;
>  	part.path_len = path_len - lexer.offset;
> +	part.format_epoch = 0;
>  	rc = tuple_field_by_part_raw_slowpath(format, tuple, field_map, &part,
>  					      field);
>  	if (rc == 0)
> @@ -1131,6 +1134,13 @@ tuple_field_by_part_raw_slowpath(struct tuple_format *format, const char *data,
>  		int32_t offset_slot = field->offset_slot;
>  		assert(-offset_slot * sizeof(uint32_t) <=
>  		       format->field_map_size);
> +
> +		/* Update format epoch cache. */
> +		assert(part->format_epoch != format->epoch);
> +		assert(format->epoch != 0);
> +		part->offset_slot_cache = offset_slot;
> +		part->format_epoch = format->epoch;
> +
>  		*raw = field_map[offset_slot] == 0 ?
>  		       NULL : data + field_map[offset_slot];
>  		return 0;
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index dd7cd147a..00540207c 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -138,6 +138,12 @@ tuple_field_is_nullable(struct tuple_field *tuple_field)
>   * Tuple format describes how tuple is stored and information about its fields
>   */
>  struct tuple_format {
> +	/**
> +	 * Counter that grows incrementally on space rebuild
> +	 * used for caching offset slot in key_part, for more
> +	 * details see key_part::offset_slot_cache.
> +	 */
> +	uint64_t epoch;
>  	/** Virtual function table */
>  	struct tuple_format_vtab vtab;
>  	/** Pointer to engine-specific data. */
> @@ -488,6 +494,12 @@ tuple_field_by_part_raw(struct tuple_format *format, const char *data,
>  {
>  	if (likely(part->path == NULL)) {
>  		return tuple_field_raw(format, data, field_map, part->fieldno);
> +	} else if (part->format_epoch == format->epoch) {
> +		int32_t offset_slot = part->offset_slot_cache;
> +		assert(-offset_slot * sizeof(uint32_t) <=
> +		       format->field_map_size);
> +		return field_map[offset_slot] != 0 ?
> +		       data + field_map[offset_slot] : NULL;

I think that offset_slot_cache should be checked before part->path - as
it won't make tuple_field_by_part() less efficient for path-less fields.
Actually, quite the contrary - we will avoid an extra memory lookup in
tuple_format->fields if offset_slot_cache is up-to-date.

  reply	other threads:[~2019-01-10 11:28 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-09  8:29 [PATCH v7 0/5] box: Indexes by JSON path Kirill Shcherbatov
2019-01-09  8:29 ` [PATCH v7 1/5] box: introduce JSON Indexes Kirill Shcherbatov
2019-01-10 10:16   ` Vladimir Davydov
2019-01-09  8:29 ` [PATCH v7 2/5] box: introduce has_json_paths flag in templates Kirill Shcherbatov
2019-01-09  8:29 ` [PATCH v7 3/5] box: tune tuple_field_raw_by_path for indexed data Kirill Shcherbatov
2019-01-09  8:29 ` [PATCH v7 4/5] box: introduce offset_slot cache in key_part Kirill Shcherbatov
2019-01-10 11:28   ` Vladimir Davydov [this message]
2019-01-09  8:29 ` [PATCH v7 5/5] box: specify indexes in user-friendly form Kirill Shcherbatov
2019-01-10 10:21   ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190110112838.3pwcibq4f656al5t@esperanza \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH v7 4/5] box: introduce offset_slot cache in key_part' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox