[PATCH v1 5/5] box: refactor tuple_init_field_map to use bitmap
Vladimir Davydov
vdavydov.dev at gmail.com
Tue Dec 25 20:04:48 MSK 2018
On Sun, Dec 23, 2018 at 03:40:40PM +0300, Kirill Shcherbatov wrote:
> Refactored tuple_init_field_map to fill temporal bitmap and
> compare it with template req_fields_bitmap containing information
> about required fields. Each field is mapped to bitmap using
> unique field identifier.
> This approach to check the required fields will work even after
> the introduction of JSON paths, when the field tree becomes
> multilevel.
>
> Needed for #1012
> ---
> src/box/errcode.h | 2 +-
> src/box/tuple_format.c | 183 +++++++++++++++++++++++-------
> src/box/tuple_format.h | 23 ++++
> test/box/alter_limits.result | 8 +-
> test/box/ddl.result | 24 ++--
> test/box/misc.result | 2 +-
> test/box/sql.result | 12 +-
> test/box/tree_pk_multipart.result | 8 +-
> test/engine/ddl.result | 28 ++---
> test/engine/null.result | 52 ++++-----
> test/vinyl/constraint.result | 12 +-
> test/vinyl/errinj.result | 12 +-
> test/vinyl/savepoint.result | 8 +-
> 13 files changed, 250 insertions(+), 124 deletions(-)
>
> diff --git a/src/box/errcode.h b/src/box/errcode.h
> index 7d1f8ddc7..812e643d8 100644
> --- a/src/box/errcode.h
> +++ b/src/box/errcode.h
> @@ -91,7 +91,7 @@ struct errcode_record {
> /* 36 */_(ER_NO_SUCH_SPACE, "Space '%s' does not exist") \
> /* 37 */_(ER_NO_SUCH_FIELD, "Field %d was not found in the tuple") \
> /* 38 */_(ER_EXACT_FIELD_COUNT, "Tuple field count %u does not match space field count %u") \
> - /* 39 */_(ER_MIN_FIELD_COUNT, "Tuple field count %u is less than required by space format or defined indexes (expected at least %u)") \
> + /* 39 */_(ER_FIELD_STRUCTURE_MISMATCH, "Tuple field %s does not math document structure defined by space format or index: %s") \
> /* 40 */_(ER_WAL_IO, "Failed to write to disk") \
> /* 41 */_(ER_MORE_THAN_ONE_TUPLE, "Get() doesn't support partial keys and non-unique indexes") \
> /* 42 */_(ER_ACCESS_DENIED, "%s access to %s '%s' is denied for user '%s'") \
> diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
> index e29f84dc5..6e269fd77 100644
> --- a/src/box/tuple_format.c
> +++ b/src/box/tuple_format.c
> @@ -28,6 +28,8 @@
> * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> * SUCH DAMAGE.
> */
> +#include "bit/bit.h"
> +#include "fiber.h"
> #include "json/json.h"
> #include "tuple_format.h"
> #include "coll_id_cache.h"
> @@ -206,6 +208,30 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys,
> return -1;
> }
> format->field_map_size = field_map_size;
> +
> + /* Allocate required fields bitmap. */
> + uint32_t req_fields_bitmap_sz = (format->total_field_count +
> + CHAR_BIT * sizeof(unsigned long) - 1) /
> + CHAR_BIT * sizeof(unsigned long);
There's DIV_ROUND_UP helper for this. Anyway, I don't think this is
correct. Suppose there are 63 fields, then you will allocate just one
byte.
> + format->req_fields_bitmap = calloc(1, req_fields_bitmap_sz);
> + if (format->req_fields_bitmap == NULL) {
> + diag_set(OutOfMemory, req_fields_bitmap_sz, "calloc",
> + "format->req_fields_bitmap");
> + return -1;
> + }
> + struct tuple_field *field;
> + struct json_token *root = (struct json_token *)&format->fields.root;
Please remove all pointless type conversions from this patch.
> + json_tree_foreach_entry_preorder(field, root, struct tuple_field,
> + token) {
> + /*
> + * Mark all leaf non-nullable fields as required
> + * setting corresponding bit 1 in format
> + * req_fields_bitmap.
> + */
> + if (tuple_field_is_leaf(field) &&
> + !tuple_field_is_nullable(field))
> + bit_set(format->req_fields_bitmap, field->id);
> + }
> return 0;
> }
>
> @@ -304,6 +330,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
> struct tuple_field *field = tuple_field_new();
> if (field == NULL)
> goto error;
> + field->id = fieldno;
> field->token.num = fieldno;
> field->token.type = JSON_TOKEN_NUM;
> if (json_tree_add(&format->fields, &format->fields.root,
> @@ -323,6 +350,8 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count,
> format->dict = dict;
> tuple_dictionary_ref(dict);
> }
> + format->total_field_count = field_count;
> + format->req_fields_bitmap = NULL;
> format->refs = 0;
> format->id = FORMAT_ID_NIL;
> format->index_field_count = index_field_count;
> @@ -339,6 +368,7 @@ error:
> static inline void
> tuple_format_destroy(struct tuple_format *format)
> {
> + free(format->req_fields_bitmap);
> tuple_format_destroy_fields(format);
> tuple_dictionary_unref(format->dict);
> }
> @@ -422,6 +452,74 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1,
> return true;
> }
>
> +/**
> + * Return meta information of a tuple field given a format
> + * and a unique field identifier.
The comment should also say why it's OK to implement this function in
such a suboptimal way.
> + */
> +static struct tuple_field *
> +tuple_format_field_by_id(const struct tuple_format *format, uint32_t id)
> +{
> + struct tuple_field *field;
> + struct json_token *root = (struct json_token *)&format->fields.root;
> + json_tree_foreach_entry_preorder(field, root, struct tuple_field,
> + token) {
> + if (field->id == id)
> + return field;
> + }
> + return NULL;
> +}
> +
> +/**
> + * Analyze fields_bitmap to ensure that all required fields
> + * present in tuple. Routine relies on req_fields_bitmap is
> + * initialized on tuple_format_create and all required field's
> + * id bits are set 1.
> + */
> +static int
> +tuple_format_fields_bitmap_test(const struct tuple_format *format,
> + const char *fields_bitmap)
> +{
> + struct tuple_field *missed_field = NULL;
> + const char *req_fields_bitmap = format->req_fields_bitmap;
> + for (uint32_t i = 0; i < format->total_field_count; i++) {
> + bool is_required = bit_test(req_fields_bitmap, i);
> + if (is_required && is_required != bit_test(fields_bitmap, i)) {
> + missed_field = tuple_format_field_by_id(format, i);
> + assert(missed_field != NULL);
> + break;
> + }
> + }
This path is hot. Testing bits one by one is too slow. You need to use
ffs or builtin. I think it's worth implementing it in the scope of the
bit library (bit_find or something).
> + if (missed_field == NULL)
> + return 0;
> +
> + struct json_token *token = &missed_field->token;
> + const char *err;
> + if (missed_field->token.type == JSON_TOKEN_STR) {
> + err = tt_sprintf("map does not contain a key \"%.*s\"",
> + token->len, token->str);
> + } else if (missed_field->token.type == JSON_TOKEN_NUM) {
> + struct json_token *parent = token->parent;
> + /*
> + * Determine minimal field size looking for
> + * the greatest fieldno between non-nullable
> + * missed_field neighbors.
> + */
> + uint32_t expected_size;
> + for (int i = 0; i <= parent->max_child_idx; i++) {
> + struct tuple_field *field =
> + tuple_format_field((struct tuple_format *)format,
> + i);
> + if (!tuple_field_is_nullable(field))
> + expected_size = i + 1;
> + }
> + err = tt_sprintf("invalid array size %d (expected at least %d)",
> + token->num, expected_size);
> + }
> + diag_set(ClientError, ER_FIELD_STRUCTURE_MISMATCH,
> + tuple_field_path(missed_field), err);
Why not simply
Field %s required by space format is missing
?
> + return -1;
> +}
> +
> /** @sa declaration for details. */
> int
> tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
> @@ -441,54 +539,59 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map,
> (unsigned) format->exact_field_count);
> return -1;
> }
> - if (validate && field_count < format->min_field_count) {
> - diag_set(ClientError, ER_MIN_FIELD_COUNT,
> - (unsigned) field_count,
> - (unsigned) format->min_field_count);
> - return -1;
> - }
> -
> - /* first field is simply accessible, so we do not store offset to it */
> - const struct tuple_field *field =
> - tuple_format_field((struct tuple_format *)format, 0);
> - if (validate &&
> - !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
> - tuple_field_is_nullable(field))) {
> - diag_set(ClientError, ER_FIELD_TYPE, tuple_field_path(field),
> - field_type_strs[field->type]);
> - return -1;
> - }
> - mp_next(&pos);
> - /* other fields...*/
> - uint32_t i = 1;
> uint32_t defined_field_count = MIN(field_count, validate ?
> tuple_format_field_count(format) :
> format->index_field_count);
> - if (field_count < format->index_field_count) {
> - /*
> - * Nullify field map to be able to detect by 0,
> - * which key fields are absent in tuple_field().
> - */
> - memset((char *)field_map - format->field_map_size, 0,
> - format->field_map_size);
> - }
> - for (; i < defined_field_count; ++i) {
> - field = tuple_format_field((struct tuple_format *)format, i);
> - if (validate &&
> - !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
> - tuple_field_is_nullable(field))) {
> - diag_set(ClientError, ER_FIELD_TYPE,
> - tuple_field_path(field),
> - field_type_strs[field->type]);
You don't need to change this code. Leave it as is, please.
> + struct region *region = &fiber()->gc;
> + char *fields_bitmap = NULL;
> + if (validate) {
> + uint32_t fields_bitmap_sz = (format->total_field_count +
> + CHAR_BIT *
> + sizeof(unsigned long) - 1) /
> + CHAR_BIT * sizeof(unsigned long);
> + fields_bitmap = region_alloc(region, fields_bitmap_sz);
> + if (fields_bitmap == NULL) {
> + diag_set(OutOfMemory, fields_bitmap_sz, "calloc",
> + "fields_bitmap");
> return -1;
> }
> - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> - field_map[field->offset_slot] =
> - (uint32_t) (pos - tuple);
> + memset(fields_bitmap, 0, fields_bitmap_sz);
> + }
> + memset((char *)field_map - format->field_map_size, 0,
> + format->field_map_size);
> + struct json_tree *tree = (struct json_tree *)&format->fields;
> + struct json_token *parent = &tree->root;
> + struct json_token token;
> + token.parent = NULL;
> + token.type = JSON_TOKEN_NUM;
> + token.num = 0;
> + while ((uint32_t)token.num < defined_field_count) {
> + struct tuple_field *field =
> + json_tree_lookup_entry(tree, parent, &token,
> + struct tuple_field, token);
> + if (field != NULL) {
> + bool is_nullable = tuple_field_is_nullable(field);
> + if (validate &&
> + !field_mp_type_is_compatible(field->type,
> + mp_typeof(*pos),
> + is_nullable) != 0) {
> + diag_set(ClientError, ER_FIELD_TYPE,
> + tuple_field_path(field),
> + field_type_strs[field->type]);
> + return -1;
> + }
> + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
> + field_map[field->offset_slot] =
> + (uint32_t)(pos - tuple);
> + }
> + if (validate)
> + bit_set(fields_bitmap, field->id);
> }
> + token.num++;
> mp_next(&pos);
> }
> - return 0;
> + return validate ?
> + tuple_format_fields_bitmap_test(format, fields_bitmap) : 0;
> }
>
> uint32_t
> diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
> index 949337807..947d0d8a5 100644
> --- a/src/box/tuple_format.h
> +++ b/src/box/tuple_format.h
> @@ -31,6 +31,7 @@
> * SUCH DAMAGE.
> */
>
> +#include "bit/bit.h"
> #include "key_def.h"
> #include "field_def.h"
> #include "errinj.h"
> @@ -114,6 +115,8 @@ struct tuple_field {
> struct coll *coll;
> /** Collation identifier. */
> uint32_t coll_id;
> + /** Field unique identifier in tuple_format. */
> + uint32_t id;
> /** Link in tuple_format::fields. */
> struct json_token token;
> };
> @@ -173,6 +176,20 @@ struct tuple_format {
> * Shared names storage used by all formats of a space.
> */
> struct tuple_dictionary *dict;
> + /**
> + * This bitmap of "required fields" contains information
> + * about fields that must present in tuple to be inserted.
> + * Look for tuple_format_fields_bitmap_test comment for
> + * more details.
And how fields are mapped to bits?
> + */
> + char *req_fields_bitmap;
Why not call it simply required_fields?
> + /**
> + * Total count of format fields in fields subtree.
> + * Required to allocate temporal objects containing
temporal != temporary. Far from it. I guess I'll just have to rewrite
all your comments, again...
> + * attributes for all fields. Particularly to allocate
> + * temporal req_fields_bitmap on region.
> + */
> + uint32_t total_field_count;
> /**
> * Fields comprising the format, organized in a tree.
> * First level nodes correspond to tuple fields.
> @@ -194,6 +211,12 @@ tuple_format_field_count(const struct tuple_format *format)
> return root->children != NULL ? root->max_child_idx + 1 : 0;
> }
>
> +static inline bool
> +tuple_field_is_leaf(struct tuple_field *field)
> +{
> + return field->token.max_child_idx == -1;
This check should live in json.h.
More information about the Tarantool-patches
mailing list