Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: Kirill Shcherbatov <kshcherbatov@tarantool.org>
Cc: tarantool-patches@freelists.org, kostja@tarantool.org
Subject: Re: [PATCH v1 4/4] box: refactor tuple_init_field_map to use bitmap
Date: Thu, 27 Dec 2018 22:12:49 +0300	[thread overview]
Message-ID: <20181227191249.vojcr2jl3ugnm5pw@esperanza> (raw)
In-Reply-To: <a96867bf26f8010fbf530f1ad4e287c1327dcf5e.1545909237.git.kshcherbatov@tarantool.org>

On Thu, Dec 27, 2018 at 02:15:55PM +0300, Kirill Shcherbatov wrote:
> Refactored tuple_init_field_map to fill a local bitmap and
> compare it with template required_fields bitmap containing
> information about required fields. Each field is mapped to
> bitmap with field:id - unique field identifier.
> This approach to check the required fields will work even after
> the introduction of JSON paths, when the field tree becomes
> multilevel.
> 
> Needed for #1012
> ---
>  src/box/errcode.h                 |   2 +-
>  src/box/tuple_format.c            | 103 ++++++++++++++++++++++++++++--
>  src/box/tuple_format.h            |  18 ++++++
>  test/box/alter_limits.result      |   6 +-
>  test/box/ddl.result               |  18 ++----
>  test/box/misc.result              |   2 +-
>  test/box/sql.result               |   9 +--
>  test/box/tree_pk_multipart.result |   6 +-
>  test/engine/ddl.result            |  21 ++----
>  test/engine/null.result           |  39 ++++-------
>  test/vinyl/constraint.result      |   9 +--
>  test/vinyl/errinj.result          |   9 +--
>  test/vinyl/savepoint.result       |   6 +-
>  13 files changed, 157 insertions(+), 91 deletions(-)
> 
> diff --git a/src/box/errcode.h b/src/box/errcode.h
> index 94381f9f7..f7dbb948e 100644
> --- a/src/box/errcode.h
> +++ b/src/box/errcode.h
> @@ -91,7 +91,7 @@ struct errcode_record {
>  	/* 36 */_(ER_NO_SUCH_SPACE,		"Space '%s' does not exist") \
>  	/* 37 */_(ER_NO_SUCH_FIELD,		"Field %d was not found in the tuple") \
>  	/* 38 */_(ER_EXACT_FIELD_COUNT,		"Tuple field count %u does not match space field count %u") \
> -	/* 39 */_(ER_MIN_FIELD_COUNT,		"Tuple field count %u is less than required by space format or defined indexes (expected at least %u)") \
> +	/* 39 */_(ER_FIELD_MISSING,		"Tuple field %s required by space format is missing") \
>  	/* 40 */_(ER_WAL_IO,			"Failed to write to disk") \
>  	/* 41 */_(ER_MORE_THAN_ONE_TUPLE,	"Get() doesn't support partial keys and non-unique indexes") \
>  	/* 42 */_(ER_ACCESS_DENIED,		"%s access to %s '%s' is denied for user '%s'") \
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index d7489dcd0..a2b7fc7e6 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -28,6 +28,8 @@
>   * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>   * SUCH DAMAGE.
>   */
> +#include "bit/bit.h"
> +#include "fiber.h"
>  #include "json/json.h"
>  #include "tuple_format.h"
>  #include "coll_id_cache.h"
> @@ -207,6 +209,30 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
>  		return -1;
>  	}
>  	format->field_map_size = field_map_size;
> +
> +	/* Allocate required fields bitmap. */
> +	uint32_t required_fields_sz =
> +		DIV_ROUND_UP(format->total_field_count, sizeof(uint32_t)) *
> +		sizeof(uint32_t);

let total_field_count = 31
sizeof(uint32_t) = 4
DIV_ROUND_UP(total_field_count, sizeof(uint32_t)) = 8
required_fields_sz = 9 * 4 = 32 bytes

while you only need 4 bytes to store the bitmap...

> +	format->required_fields = calloc(1, required_fields_sz);
> +	if (format->required_fields == NULL) {
> +		diag_set(OutOfMemory, required_fields_sz, "calloc",
> +			 "format->required_fields");
> +		return -1;
> +	}
> +	struct tuple_field *field;
> +	struct json_token *root = (struct json_token *)&format->fields.root;
> +	json_tree_foreach_entry_preorder(field, root, struct tuple_field,
> +					 token) {
> +		/*
> +		 * Mark all leaf non-nullable fields as required
> +		 * setting corresponding bit 1 in format
> +		 * required_fields.
> +		 */
> +		if (json_token_is_leaf(&field->token) &&
> +		    !tuple_field_is_nullable(field))
> +			bit_set(format->required_fields, field->id);
> +	}
>  	return 0;
>  }
>  
> @@ -305,6 +331,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
>  		struct tuple_field *field = tuple_field_new();
>  		if (field == NULL)
>  			goto error;
> +		field->id = fieldno;
>  		field->token.num = fieldno;
>  		field->token.type = JSON_TOKEN_NUM;
>  		if (json_tree_add(&format->fields, &format->fields.root,
> @@ -324,6 +351,8 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
>  		format->dict = dict;
>  		tuple_dictionary_ref(dict);
>  	}
> +	format->total_field_count = field_count;
> +	format->required_fields = NULL;
>  	format->refs = 0;
>  	format->id = FORMAT_ID_NIL;
>  	format->index_field_count = index_field_count;
> @@ -340,6 +369,7 @@ error:
>  static inline void
>  tuple_format_destroy(struct tuple_format *format)
>  {
> +	free(format->required_fields);
>  	tuple_format_destroy_fields(format);
>  	tuple_dictionary_unref(format->dict);
>  }
> @@ -421,6 +451,48 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
>  	return true;
>  }
>  
> +/**
> + * Return meta information of a tuple field given a format
> + * and a unique field identifier.
> + * Typically used on error handling so it is not

Typically? There's just one place where this function is used.

> + * performance-critical and may use full-tree-traverse on lookup.
> + */
> +static struct tuple_field *
> +tuple_format_field_by_id(struct tuple_format *format, uint32_t id)
> +{
> +	struct tuple_field *field;
> +	json_tree_foreach_entry_preorder(field, &format->fields.root,
> +					 struct tuple_field, token) {
> +		if (field->id == id)
> +			return field;
> +	}
> +	return NULL;
> +}
> +
> +/**
> + * Analyze required_fields to ensure that all required fields
> + * present in tuple. Routine relies on required_fields is
> + * initialized on tuple_format_create and all required field's
> + * bits ids are set 1.
> + */
> +static int
> +tuple_format_required_fields_test(struct tuple_format *format,
> +				  uint32_t *required_fields)
> +{
> +	uint32_t bitmap_len =
> +		DIV_ROUND_UP(format->total_field_count, sizeof(uint32_t));

This one looks different from required_fields_sz calculation in
tuple_format_create(). What's going on?

Looks like it would be a good idea to hide this calculation behind
a helper function.

> +	uint32_t missed_field_id;
> +	if (likely(bitmap_majority_test(required_fields, format->required_fields,
> +					bitmap_len, &missed_field_id) == 0))
> +		return 0;
> +	assert(missed_field_id < format->total_field_count);
> +	struct tuple_field *missed_field =

missing_field

> +		tuple_format_field_by_id(format, missed_field_id);
> +	assert(missed_field != NULL);
> +	diag_set(ClientError, ER_FIELD_MISSING, tuple_field_path(missed_field));
> +	return -1;
> +}
> +
>  /** @sa declaration for details. */
>  int
>  tuple_init_field_map(struct tuple_format *format, uint32_t *field_map,
> @@ -440,15 +512,29 @@ tuple_init_field_map(struct tuple_format *format, uint32_t *field_map,
>  			 (unsigned) format->exact_field_count);
>  		return -1;
>  	}
> -	if (validate && field_count < format->min_field_count) {
> -		diag_set(ClientError, ER_MIN_FIELD_COUNT,
> -			 (unsigned) field_count,
> -			 (unsigned) format->min_field_count);
> -		return -1;
> -	}
>  
>  	/* first field is simply accessible, so we do not store offset to it */
>  	struct tuple_field *field = tuple_format_field(format, 0);
> +	/*
> +	 * Allocate fields_bitmap - a bitmap of fields that are
> +	 * present in tuple.
> +	 */
> +	struct region *region = &fiber()->gc;
> +	uint32_t *fields_bitmap = NULL;
> +	if (validate) {
> +		uint32_t fields_bitmap_sz =
> +			DIV_ROUND_UP(format->total_field_count,
> +				     sizeof(uint32_t)) * sizeof(uint32_t);
> +		fields_bitmap = region_alloc(region, fields_bitmap_sz);
> +		if (fields_bitmap == NULL) {
> +			diag_set(OutOfMemory, fields_bitmap_sz, "calloc",
> +				"required_fields");
> +			return -1;
> +		}
> +		memset(fields_bitmap, 0, fields_bitmap_sz);
> +		if (field_count > 0)

field_count can't be 0 - otherwise we would bail out early.

> +			bit_set(fields_bitmap, field->id);
> +	}
>  	if (validate &&
>  	    !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
>  					 tuple_field_is_nullable(field))) {
> @@ -484,9 +570,12 @@ tuple_init_field_map(struct tuple_format *format, uint32_t *field_map,
>  			field_map[field->offset_slot] =
>  				(uint32_t) (pos - tuple);
>  		}
> +		if (validate)
> +			bit_set(fields_bitmap, field->id);
>  		mp_next(&pos);
>  	}
> -	return 0;
> +	return validate ?
> +	       tuple_format_required_fields_test(format, fields_bitmap) : 0;
>  }

  parent reply	other threads:[~2018-12-27 19:12 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-27 11:15 [PATCH v1 0/4] box: JSON preparatory patchset Kirill Shcherbatov
2018-12-27 11:15 ` [PATCH v1 1/4] lib: introduce json_tree_snprint_path Kirill Shcherbatov
2018-12-27 18:51   ` Vladimir Davydov
2018-12-27 11:15 ` [PATCH v1 2/4] lib: introduce json_token_is_leaf helper Kirill Shcherbatov
2018-12-27 18:52   ` Vladimir Davydov
2018-12-27 11:15 ` [PATCH v1 3/4] box: introduce bitmap_majority_test routine Kirill Shcherbatov
2018-12-27 18:59   ` Vladimir Davydov
2018-12-29 12:58     ` [tarantool-patches] " Kirill Shcherbatov
2018-12-29 13:19       ` Vladimir Davydov
2018-12-29 13:57         ` Kirill Shcherbatov
2018-12-29 16:16           ` Vladimir Davydov
2018-12-27 11:15 ` [PATCH v1 4/4] box: refactor tuple_init_field_map to use bitmap Kirill Shcherbatov
2018-12-27 11:48   ` [tarantool-patches] " Konstantin Osipov
2018-12-27 19:12   ` Vladimir Davydov [this message]
2018-12-29 12:58     ` [tarantool-patches] " Kirill Shcherbatov
2018-12-29 13:22       ` Vladimir Davydov
2018-12-29 16:25       ` Vladimir Davydov
2018-12-27 19:13 ` [PATCH v1 0/4] box: JSON preparatory patchset Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181227191249.vojcr2jl3ugnm5pw@esperanza \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH v1 4/4] box: refactor tuple_init_field_map to use bitmap' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox