[PATCH v1 5/5] box: refactor tuple_init_field_map to use bitmap

Vladimir Davydov vdavydov.dev at gmail.com
Tue Dec 25 20:04:48 MSK 2018


On Sun, Dec 23, 2018 at 03:40:40PM +0300, Kirill Shcherbatov wrote:
> Refactored tuple_init_field_map to fill temporal bitmap and
> compare it with template req_fields_bitmap containing information
> about required fields. Each field is mapped to bitmap using
> unique field identifier.
> This approach to check the required fields will work even after
> the introduction of JSON paths, when the field tree becomes
> multilevel.
> 
> Needed for #1012
> ---
>  src/box/errcode.h                 |   2 +-
>  src/box/tuple_format.c            | 183 +++++++++++++++++++++++-------
>  src/box/tuple_format.h            |  23 ++++
>  test/box/alter_limits.result      |   8 +-
>  test/box/ddl.result               |  24 ++--
>  test/box/misc.result              |   2 +-
>  test/box/sql.result               |  12 +-
>  test/box/tree_pk_multipart.result |   8 +-
>  test/engine/ddl.result            |  28 ++---
>  test/engine/null.result           |  52 ++++-----
>  test/vinyl/constraint.result      |  12 +-
>  test/vinyl/errinj.result          |  12 +-
>  test/vinyl/savepoint.result       |   8 +-
>  13 files changed, 250 insertions(+), 124 deletions(-)
> 
> diff --git a/src/box/errcode.h b/src/box/errcode.h
> index 7d1f8ddc7..812e643d8 100644
> --- a/src/box/errcode.h
> +++ b/src/box/errcode.h
> @@ -91,7 +91,7 @@ struct errcode_record {
>  	/* 36 */_(ER_NO_SUCH_SPACE,		"Space '%s' does not exist") \
>  	/* 37 */_(ER_NO_SUCH_FIELD,		"Field %d was not found in the tuple") \
>  	/* 38 */_(ER_EXACT_FIELD_COUNT,		"Tuple field count %u does not match space field count %u") \
> -	/* 39 */_(ER_MIN_FIELD_COUNT,		"Tuple field count %u is less than required by space format or defined indexes (expected at least %u)") \
> +	/* 39 */_(ER_FIELD_STRUCTURE_MISMATCH,	"Tuple field %s does not math document structure defined by space format or index: %s") \
>  	/* 40 */_(ER_WAL_IO,			"Failed to write to disk") \
>  	/* 41 */_(ER_MORE_THAN_ONE_TUPLE,	"Get() doesn't support partial keys and non-unique indexes") \
>  	/* 42 */_(ER_ACCESS_DENIED,		"%s access to %s '%s' is denied for user '%s'") \
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index e29f84dc5..6e269fd77 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -28,6 +28,8 @@
>   * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>   * SUCH DAMAGE.
>   */
> +#include "bit/bit.h"
> +#include "fiber.h"
>  #include "json/json.h"
>  #include "tuple_format.h"
>  #include "coll_id_cache.h"
> @@ -206,6 +208,30 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
>  		return -1;
>  	}
>  	format->field_map_size = field_map_size;
> +
> +	/* Allocate required fields bitmap. */
> +	uint32_t req_fields_bitmap_sz = (format->total_field_count +
> +					 CHAR_BIT * sizeof(unsigned long) - 1) /
> +					 CHAR_BIT * sizeof(unsigned long);

There's DIV_ROUND_UP helper for this. Anyway, I don't think this is
correct. Suppose there are 63 fields, then you will allocate just one
byte.

> +	format->req_fields_bitmap = calloc(1, req_fields_bitmap_sz);
> +	if (format->req_fields_bitmap == NULL) {
> +		diag_set(OutOfMemory, req_fields_bitmap_sz, "calloc",
> +			 "format->req_fields_bitmap");
> +		return -1;
> +	}
> +	struct tuple_field *field;
> +	struct json_token *root = (struct json_token *)&format->fields.root;

Please remove all pointless type conversions from this patch.

> +	json_tree_foreach_entry_preorder(field, root, struct tuple_field,
> +					 token) {
> +		/*
> +		 * Mark all leaf non-nullable fields as required
> +		 * setting corresponding bit 1 in format
> +		 * req_fields_bitmap.
> +		 */
> +		if (tuple_field_is_leaf(field) &&
> +		    !tuple_field_is_nullable(field))
> +			bit_set(format->req_fields_bitmap, field->id);
> +	}
>  	return 0;
>  }
>  
> @@ -304,6 +330,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
>  		struct tuple_field *field = tuple_field_new();
>  		if (field == NULL)
>  			goto error;
> +		field->id = fieldno;
>  		field->token.num = fieldno;
>  		field->token.type = JSON_TOKEN_NUM;
>  		if (json_tree_add(&format->fields, &format->fields.root,
> @@ -323,6 +350,8 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
>  		format->dict = dict;
>  		tuple_dictionary_ref(dict);
>  	}
> +	format->total_field_count = field_count;
> +	format->req_fields_bitmap = NULL;
>  	format->refs = 0;
>  	format->id = FORMAT_ID_NIL;
>  	format->index_field_count = index_field_count;
> @@ -339,6 +368,7 @@ error:
>  static inline void
>  tuple_format_destroy(struct tuple_format *format)
>  {
> +	free(format->req_fields_bitmap);
>  	tuple_format_destroy_fields(format);
>  	tuple_dictionary_unref(format->dict);
>  }
> @@ -422,6 +452,74 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
>  	return true;
>  }
>  
> +/**
> + * Return meta information of a tuple field given a format
> + * and a unique field identifier.

The comment should also say why it's OK to implement this function in
such a suboptimal way.

> + */
> +static struct tuple_field *
> +tuple_format_field_by_id(const struct tuple_format *format, uint32_t id)
> +{
> +	struct tuple_field *field;
> +	struct json_token *root = (struct json_token *)&format->fields.root;
> +	json_tree_foreach_entry_preorder(field, root, struct tuple_field,
> +					 token) {
> +		if (field->id == id)
> +			return field;
> +	}
> +	return NULL;
> +}
> +
> +/**
> + * Analyze fields_bitmap to ensure that all required fields
> + * present in tuple. Routine relies on req_fields_bitmap is
> + * initialized on tuple_format_create and all required field's
> + * id bits are set 1.
> + */
> +static int
> +tuple_format_fields_bitmap_test(const struct tuple_format *format,
> +				const char *fields_bitmap)
> +{
> +	struct tuple_field *missed_field = NULL;
> +	const char *req_fields_bitmap = format->req_fields_bitmap;
> +	for (uint32_t i = 0; i < format->total_field_count; i++) {
> +		bool is_required = bit_test(req_fields_bitmap, i);
> +		if (is_required && is_required != bit_test(fields_bitmap, i)) {
> +			missed_field = tuple_format_field_by_id(format, i);
> +			assert(missed_field != NULL);
> +			break;
> +		}
> +	}

This path is hot. Testing bits one by one is too slow. You need to use
ffs or builtin. I think it's worth implementing it in the scope of the
bit library (bit_find or something).

> +	if (missed_field == NULL)
> +		return 0;
> +
> +	struct json_token *token = &missed_field->token;
> +	const char *err;
> +	if (missed_field->token.type == JSON_TOKEN_STR) {
> +		err = tt_sprintf("map does not contain a key \"%.*s\"",
> +				 token->len, token->str);
> +	} else if (missed_field->token.type == JSON_TOKEN_NUM) {
> +		struct json_token *parent = token->parent;
> +		/*
> +		 * Determine minimal field size looking for
> +		 * the greatest fieldno between non-nullable
> +		 * missed_field neighbors.
> +		 */
> +		uint32_t expected_size;
> +		for (int i = 0; i <= parent->max_child_idx; i++) {
> +			struct tuple_field *field =
> +				tuple_format_field((struct tuple_format *)format,
> +						   i);
> +			if (!tuple_field_is_nullable(field))
> +				expected_size = i + 1;
> +		}
> +		err = tt_sprintf("invalid array size %d (expected at least %d)",
> +				 token->num, expected_size);
> +	}
> +	diag_set(ClientError, ER_FIELD_STRUCTURE_MISMATCH,
> +		 tuple_field_path(missed_field), err);

Why not simply

  Field %s required by space format is missing

?

> +	return -1;
> +}
> +
>  /** @sa declaration for details. */
>  int
>  tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
> @@ -441,54 +539,59 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
>  			 (unsigned) format->exact_field_count);
>  		return -1;
>  	}
> -	if (validate && field_count < format->min_field_count) {
> -		diag_set(ClientError, ER_MIN_FIELD_COUNT,
> -			 (unsigned) field_count,
> -			 (unsigned) format->min_field_count);
> -		return -1;
> -	}
> -
> -	/* first field is simply accessible, so we do not store offset to it */
> -	const struct tuple_field *field =
> -		tuple_format_field((struct tuple_format *)format, 0);
> -	if (validate &&
> -	    !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
> -					 tuple_field_is_nullable(field))) {
> -		diag_set(ClientError, ER_FIELD_TYPE, tuple_field_path(field),
> -			 field_type_strs[field->type]);
> -		return -1;
> -	}
> -	mp_next(&pos);
> -	/* other fields...*/
> -	uint32_t i = 1;
>  	uint32_t defined_field_count = MIN(field_count, validate ?
>  					   tuple_format_field_count(format) :
>  					   format->index_field_count);
> -	if (field_count < format->index_field_count) {
> -		/*
> -		 * Nullify field map to be able to detect by 0,
> -		 * which key fields are absent in tuple_field().
> -		 */
> -		memset((char *)field_map - format->field_map_size, 0,
> -		       format->field_map_size);
> -	}
> -	for (; i < defined_field_count; ++i) {
> -		field = tuple_format_field((struct tuple_format *)format, i);
> -		if (validate &&
> -		    !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
> -						 tuple_field_is_nullable(field))) {
> -			diag_set(ClientError, ER_FIELD_TYPE,
> -				 tuple_field_path(field),
> -				 field_type_strs[field->type]);

You don't need to change this code. Leave it as is, please.

> +	struct region *region = &fiber()->gc;
> +	char *fields_bitmap = NULL;
> +	if (validate) {
> +		uint32_t fields_bitmap_sz = (format->total_field_count +
> +					     CHAR_BIT *
> +					     sizeof(unsigned long) - 1) /
> +					     CHAR_BIT * sizeof(unsigned long);
> +		fields_bitmap = region_alloc(region, fields_bitmap_sz);
> +		if (fields_bitmap == NULL) {
> +			diag_set(OutOfMemory, fields_bitmap_sz, "calloc",
> +				"fields_bitmap");
>  			return -1;
>  		}
> -		if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> -			field_map[field->offset_slot] =
> -				(uint32_t) (pos - tuple);
> +		memset(fields_bitmap, 0, fields_bitmap_sz);
> +	}
> +	memset((char *)field_map - format->field_map_size, 0,
> +	       format->field_map_size);
> +	struct json_tree *tree = (struct json_tree *)&format->fields;
> +	struct json_token *parent = &tree->root;
> +	struct json_token token;
> +	token.parent = NULL;
> +	token.type = JSON_TOKEN_NUM;
> +	token.num = 0;
> +	while ((uint32_t)token.num < defined_field_count) {
> +		struct tuple_field *field =
> +			json_tree_lookup_entry(tree, parent, &token,
> +					       struct tuple_field, token);
> +		if (field != NULL) {
> +			bool is_nullable = tuple_field_is_nullable(field);
> +			if (validate &&
> +			    !field_mp_type_is_compatible(field->type,
> +							 mp_typeof(*pos),
> +							 is_nullable) != 0) {
> +				diag_set(ClientError, ER_FIELD_TYPE,
> +					 tuple_field_path(field),
> +					 field_type_strs[field->type]);
> +				return -1;
> +			}
> +			if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> +				field_map[field->offset_slot] =
> +					(uint32_t)(pos - tuple);
> +			}
> +			if (validate)
> +				bit_set(fields_bitmap, field->id);
>  		}
> +		token.num++;
>  		mp_next(&pos);
>  	}
> -	return 0;
> +	return validate ?
> +	       tuple_format_fields_bitmap_test(format, fields_bitmap) : 0;
>  }
>  
>  uint32_t
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index 949337807..947d0d8a5 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -31,6 +31,7 @@
>   * SUCH DAMAGE.
>   */
>  
> +#include "bit/bit.h"
>  #include "key_def.h"
>  #include "field_def.h"
>  #include "errinj.h"
> @@ -114,6 +115,8 @@ struct tuple_field {
>  	struct coll *coll;
>  	/** Collation identifier. */
>  	uint32_t coll_id;
> +	/** Field unique identifier in tuple_format. */
> +	uint32_t id;
>  	/** Link in tuple_format::fields. */
>  	struct json_token token;
>  };
> @@ -173,6 +176,20 @@ struct tuple_format {
>  	 * Shared names storage used by all formats of a space.
>  	 */
>  	struct tuple_dictionary *dict;
> +	/**
> +	 * This bitmap of "required fields" contains information
> +	 * about fields that must present in tuple to be inserted.
> +	 * Look for tuple_format_fields_bitmap_test comment for
> +	 * more details.

And how fields are mapped to bits?

> +	 */
> +	char *req_fields_bitmap;

Why not call it simply required_fields?

> +	/**
> +	 * Total count of format fields in fields subtree.
> +	 * Required to allocate temporal objects containing

temporal != temporary. Far from it. I guess I'll just have to rewrite
all your comments, again...

> +	 * attributes for all fields. Particularly to allocate
> +	 * temporal req_fields_bitmap on region.
> +	 */
> +	uint32_t total_field_count;
>  	/**
>  	 * Fields comprising the format, organized in a tree.
>  	 * First level nodes correspond to tuple fields.
> @@ -194,6 +211,12 @@ tuple_format_field_count(const struct tuple_format *format)
>  	return root->children != NULL ? root->max_child_idx + 1 : 0;
>  }
>  
> +static inline bool
> +tuple_field_is_leaf(struct tuple_field *field)
> +{
> +	return field->token.max_child_idx == -1;

This check should live in json.h.



More information about the Tarantool-patches mailing list