From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Tue, 25 Dec 2018 20:04:48 +0300 From: Vladimir Davydov Subject: Re: [PATCH v1 5/5] box: refactor tuple_init_field_map to use bitmap Message-ID: <20181225170448.6q3dac2cupp5wg7p@esperanza> References: <9009e7b9b9954231c29fc904a5ddf88041907a74.1545567929.git.kshcherbatov@tarantool.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <9009e7b9b9954231c29fc904a5ddf88041907a74.1545567929.git.kshcherbatov@tarantool.org> To: Kirill Shcherbatov Cc: tarantool-patches@freelists.org, kostja@tarantool.org List-ID: On Sun, Dec 23, 2018 at 03:40:40PM +0300, Kirill Shcherbatov wrote: > Refactored tuple_init_field_map to fill temporal bitmap and > compare it with template req_fields_bitmap containing information > about required fields. Each field is mapped to bitmap using > unique field identifier. > This approach to check the required fields will work even after > the introduction of JSON paths, when the field tree becomes > multilevel. > > Needed for #1012 > --- > src/box/errcode.h | 2 +- > src/box/tuple_format.c | 183 +++++++++++++++++++++++------- > src/box/tuple_format.h | 23 ++++ > test/box/alter_limits.result | 8 +- > test/box/ddl.result | 24 ++-- > test/box/misc.result | 2 +- > test/box/sql.result | 12 +- > test/box/tree_pk_multipart.result | 8 +- > test/engine/ddl.result | 28 ++--- > test/engine/null.result | 52 ++++----- > test/vinyl/constraint.result | 12 +- > test/vinyl/errinj.result | 12 +- > test/vinyl/savepoint.result | 8 +- > 13 files changed, 250 insertions(+), 124 deletions(-) > > diff --git a/src/box/errcode.h b/src/box/errcode.h > index 7d1f8ddc7..812e643d8 100644 > --- a/src/box/errcode.h > +++ b/src/box/errcode.h > @@ -91,7 +91,7 @@ struct errcode_record { > /* 36 */_(ER_NO_SUCH_SPACE, "Space '%s' does not exist") \ > /* 37 */_(ER_NO_SUCH_FIELD, "Field %d was not found in the tuple") \ > /* 38 */_(ER_EXACT_FIELD_COUNT, "Tuple field count %u does not match space field count %u") \ > - /* 39 */_(ER_MIN_FIELD_COUNT, "Tuple field count %u is less than required by space format or defined indexes (expected at least %u)") \ > + /* 39 */_(ER_FIELD_STRUCTURE_MISMATCH, "Tuple field %s does not math document structure defined by space format or index: %s") \ > /* 40 */_(ER_WAL_IO, "Failed to write to disk") \ > /* 41 */_(ER_MORE_THAN_ONE_TUPLE, "Get() doesn't support partial keys and non-unique indexes") \ > /* 42 */_(ER_ACCESS_DENIED, "%s access to %s '%s' is denied for user '%s'") \ > diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c > index e29f84dc5..6e269fd77 100644 > --- a/src/box/tuple_format.c > +++ b/src/box/tuple_format.c > @@ -28,6 +28,8 @@ > * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF > * SUCH DAMAGE. > */ > +#include "bit/bit.h" > +#include "fiber.h" > #include "json/json.h" > #include "tuple_format.h" > #include "coll_id_cache.h" > @@ -206,6 +208,30 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, > return -1; > } > format->field_map_size = field_map_size; > + > + /* Allocate required fields bitmap. */ > + uint32_t req_fields_bitmap_sz = (format->total_field_count + > + CHAR_BIT * sizeof(unsigned long) - 1) / > + CHAR_BIT * sizeof(unsigned long); There's DIV_ROUND_UP helper for this. Anyway, I don't think this is correct. Suppose there are 63 fields, then you will allocate just one byte. > + format->req_fields_bitmap = calloc(1, req_fields_bitmap_sz); > + if (format->req_fields_bitmap == NULL) { > + diag_set(OutOfMemory, req_fields_bitmap_sz, "calloc", > + "format->req_fields_bitmap"); > + return -1; > + } > + struct tuple_field *field; > + struct json_token *root = (struct json_token *)&format->fields.root; Please remove all pointless type conversions from this patch. > + json_tree_foreach_entry_preorder(field, root, struct tuple_field, > + token) { > + /* > + * Mark all leaf non-nullable fields as required > + * setting corresponding bit 1 in format > + * req_fields_bitmap. > + */ > + if (tuple_field_is_leaf(field) && > + !tuple_field_is_nullable(field)) > + bit_set(format->req_fields_bitmap, field->id); > + } > return 0; > } > > @@ -304,6 +330,7 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, > struct tuple_field *field = tuple_field_new(); > if (field == NULL) > goto error; > + field->id = fieldno; > field->token.num = fieldno; > field->token.type = JSON_TOKEN_NUM; > if (json_tree_add(&format->fields, &format->fields.root, > @@ -323,6 +350,8 @@ tuple_format_alloc(struct key_def * const *keys, uint16_t key_count, > format->dict = dict; > tuple_dictionary_ref(dict); > } > + format->total_field_count = field_count; > + format->req_fields_bitmap = NULL; > format->refs = 0; > format->id = FORMAT_ID_NIL; > format->index_field_count = index_field_count; > @@ -339,6 +368,7 @@ error: > static inline void > tuple_format_destroy(struct tuple_format *format) > { > + free(format->req_fields_bitmap); > tuple_format_destroy_fields(format); > tuple_dictionary_unref(format->dict); > } > @@ -422,6 +452,74 @@ tuple_format1_can_store_format2_tuples(struct tuple_format *format1, > return true; > } > > +/** > + * Return meta information of a tuple field given a format > + * and a unique field identifier. The comment should also say why it's OK to implement this function in such a suboptimal way. > + */ > +static struct tuple_field * > +tuple_format_field_by_id(const struct tuple_format *format, uint32_t id) > +{ > + struct tuple_field *field; > + struct json_token *root = (struct json_token *)&format->fields.root; > + json_tree_foreach_entry_preorder(field, root, struct tuple_field, > + token) { > + if (field->id == id) > + return field; > + } > + return NULL; > +} > + > +/** > + * Analyze fields_bitmap to ensure that all required fields > + * present in tuple. Routine relies on req_fields_bitmap is > + * initialized on tuple_format_create and all required field's > + * id bits are set 1. > + */ > +static int > +tuple_format_fields_bitmap_test(const struct tuple_format *format, > + const char *fields_bitmap) > +{ > + struct tuple_field *missed_field = NULL; > + const char *req_fields_bitmap = format->req_fields_bitmap; > + for (uint32_t i = 0; i < format->total_field_count; i++) { > + bool is_required = bit_test(req_fields_bitmap, i); > + if (is_required && is_required != bit_test(fields_bitmap, i)) { > + missed_field = tuple_format_field_by_id(format, i); > + assert(missed_field != NULL); > + break; > + } > + } This path is hot. Testing bits one by one is too slow. You need to use ffs or builtin. I think it's worth implementing it in the scope of the bit library (bit_find or something). > + if (missed_field == NULL) > + return 0; > + > + struct json_token *token = &missed_field->token; > + const char *err; > + if (missed_field->token.type == JSON_TOKEN_STR) { > + err = tt_sprintf("map does not contain a key \"%.*s\"", > + token->len, token->str); > + } else if (missed_field->token.type == JSON_TOKEN_NUM) { > + struct json_token *parent = token->parent; > + /* > + * Determine minimal field size looking for > + * the greatest fieldno between non-nullable > + * missed_field neighbors. > + */ > + uint32_t expected_size; > + for (int i = 0; i <= parent->max_child_idx; i++) { > + struct tuple_field *field = > + tuple_format_field((struct tuple_format *)format, > + i); > + if (!tuple_field_is_nullable(field)) > + expected_size = i + 1; > + } > + err = tt_sprintf("invalid array size %d (expected at least %d)", > + token->num, expected_size); > + } > + diag_set(ClientError, ER_FIELD_STRUCTURE_MISMATCH, > + tuple_field_path(missed_field), err); Why not simply Field %s required by space format is missing ? > + return -1; > +} > + > /** @sa declaration for details. */ > int > tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map, > @@ -441,54 +539,59 @@ tuple_init_field_map(const struct tuple_format *format, uint32_t *field_map, > (unsigned) format->exact_field_count); > return -1; > } > - if (validate && field_count < format->min_field_count) { > - diag_set(ClientError, ER_MIN_FIELD_COUNT, > - (unsigned) field_count, > - (unsigned) format->min_field_count); > - return -1; > - } > - > - /* first field is simply accessible, so we do not store offset to it */ > - const struct tuple_field *field = > - tuple_format_field((struct tuple_format *)format, 0); > - if (validate && > - !field_mp_type_is_compatible(field->type, mp_typeof(*pos), > - tuple_field_is_nullable(field))) { > - diag_set(ClientError, ER_FIELD_TYPE, tuple_field_path(field), > - field_type_strs[field->type]); > - return -1; > - } > - mp_next(&pos); > - /* other fields...*/ > - uint32_t i = 1; > uint32_t defined_field_count = MIN(field_count, validate ? > tuple_format_field_count(format) : > format->index_field_count); > - if (field_count < format->index_field_count) { > - /* > - * Nullify field map to be able to detect by 0, > - * which key fields are absent in tuple_field(). > - */ > - memset((char *)field_map - format->field_map_size, 0, > - format->field_map_size); > - } > - for (; i < defined_field_count; ++i) { > - field = tuple_format_field((struct tuple_format *)format, i); > - if (validate && > - !field_mp_type_is_compatible(field->type, mp_typeof(*pos), > - tuple_field_is_nullable(field))) { > - diag_set(ClientError, ER_FIELD_TYPE, > - tuple_field_path(field), > - field_type_strs[field->type]); You don't need to change this code. Leave it as is, please. > + struct region *region = &fiber()->gc; > + char *fields_bitmap = NULL; > + if (validate) { > + uint32_t fields_bitmap_sz = (format->total_field_count + > + CHAR_BIT * > + sizeof(unsigned long) - 1) / > + CHAR_BIT * sizeof(unsigned long); > + fields_bitmap = region_alloc(region, fields_bitmap_sz); > + if (fields_bitmap == NULL) { > + diag_set(OutOfMemory, fields_bitmap_sz, "calloc", > + "fields_bitmap"); > return -1; > } > - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { > - field_map[field->offset_slot] = > - (uint32_t) (pos - tuple); > + memset(fields_bitmap, 0, fields_bitmap_sz); > + } > + memset((char *)field_map - format->field_map_size, 0, > + format->field_map_size); > + struct json_tree *tree = (struct json_tree *)&format->fields; > + struct json_token *parent = &tree->root; > + struct json_token token; > + token.parent = NULL; > + token.type = JSON_TOKEN_NUM; > + token.num = 0; > + while ((uint32_t)token.num < defined_field_count) { > + struct tuple_field *field = > + json_tree_lookup_entry(tree, parent, &token, > + struct tuple_field, token); > + if (field != NULL) { > + bool is_nullable = tuple_field_is_nullable(field); > + if (validate && > + !field_mp_type_is_compatible(field->type, > + mp_typeof(*pos), > + is_nullable) != 0) { > + diag_set(ClientError, ER_FIELD_TYPE, > + tuple_field_path(field), > + field_type_strs[field->type]); > + return -1; > + } > + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { > + field_map[field->offset_slot] = > + (uint32_t)(pos - tuple); > + } > + if (validate) > + bit_set(fields_bitmap, field->id); > } > + token.num++; > mp_next(&pos); > } > - return 0; > + return validate ? > + tuple_format_fields_bitmap_test(format, fields_bitmap) : 0; > } > > uint32_t > diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h > index 949337807..947d0d8a5 100644 > --- a/src/box/tuple_format.h > +++ b/src/box/tuple_format.h > @@ -31,6 +31,7 @@ > * SUCH DAMAGE. > */ > > +#include "bit/bit.h" > #include "key_def.h" > #include "field_def.h" > #include "errinj.h" > @@ -114,6 +115,8 @@ struct tuple_field { > struct coll *coll; > /** Collation identifier. */ > uint32_t coll_id; > + /** Field unique identifier in tuple_format. */ > + uint32_t id; > /** Link in tuple_format::fields. */ > struct json_token token; > }; > @@ -173,6 +176,20 @@ struct tuple_format { > * Shared names storage used by all formats of a space. > */ > struct tuple_dictionary *dict; > + /** > + * This bitmap of "required fields" contains information > + * about fields that must present in tuple to be inserted. > + * Look for tuple_format_fields_bitmap_test comment for > + * more details. And how fields are mapped to bits? > + */ > + char *req_fields_bitmap; Why not call it simply required_fields? > + /** > + * Total count of format fields in fields subtree. > + * Required to allocate temporal objects containing temporal != temporary. Far from it. I guess I'll just have to rewrite all your comments, again... > + * attributes for all fields. Particularly to allocate > + * temporal req_fields_bitmap on region. > + */ > + uint32_t total_field_count; > /** > * Fields comprising the format, organized in a tree. > * First level nodes correspond to tuple fields. > @@ -194,6 +211,12 @@ tuple_format_field_count(const struct tuple_format *format) > return root->children != NULL ? root->max_child_idx + 1 : 0; > } > > +static inline bool > +tuple_field_is_leaf(struct tuple_field *field) > +{ > + return field->token.max_child_idx == -1; This check should live in json.h.