From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v5 1/4] box: introduce tuple_format_iterator class Date: Mon, 6 May 2019 14:57:38 +0300 Message-Id: <0e9e576a96eadd8bf20de1ffe3635ac543d9de9c.1557142159.git.kshcherbatov@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: Kirill Shcherbatov List-ID: The similar code in tuple_field_map_create and vy_stmt_new_surrogate_delete_raw that performs tuple decode with tuple_format has been refactored as reusable tuple_format_iterator class. Being thus encapsulated, this code will be uniformly managed and extended in the further patches in scope of multikey indexes. Extended engine/json test with vy_stmt_new_surrogate_delete_raw corner case test. There was no problem before this patch, but small bug appeared during tuple_format_iterator_next implementation was not covered. Needed for #1257 --- src/box/tuple_format.c | 356 ++++++++++++++++++++------------------ src/box/tuple_format.h | 118 +++++++++++++ src/box/vy_stmt.c | 117 +++++-------- test/engine/json.result | 17 ++ test/engine/json.test.lua | 5 + 5 files changed, 371 insertions(+), 242 deletions(-) diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 804a678a1..de48e9bb0 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -790,179 +790,29 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, return -1; } *field_map = (uint32_t *)((char *)*field_map + *field_map_size); - - const char *pos = tuple; - int rc = 0; - /* Check to see if the tuple has a sufficient number of fields. */ - uint32_t field_count = mp_decode_array(&pos); - if (validate && format->exact_field_count > 0 && - format->exact_field_count != field_count) { - diag_set(ClientError, ER_EXACT_FIELD_COUNT, - (unsigned) field_count, - (unsigned) format->exact_field_count); - goto error; - } - /* - * Allocate a field bitmap that will be used for checking - * that all mandatory fields are present. - */ - void *required_fields = NULL; - size_t required_fields_sz = 0; - if (validate) { - required_fields_sz = bitmap_size(format->total_field_count); - required_fields = region_alloc(region, required_fields_sz); - if (required_fields == NULL) { - diag_set(OutOfMemory, required_fields_sz, - "region", "required field bitmap"); - goto error; - } - memcpy(required_fields, format->required_fields, - required_fields_sz); - } - /* - * Initialize the tuple field map and validate field types. - */ - if (field_count == 0) { - /* Empty tuple, nothing to do. */ - goto finish; - } - uint32_t defined_field_count = MIN(field_count, validate ? - tuple_format_field_count(format) : - format->index_field_count); /* * Nullify field map to be able to detect by 0, * which key fields are absent in tuple_field(). */ memset((char *)*field_map - *field_map_size, 0, *field_map_size); - /* - * Prepare mp stack of the size equal to the maximum depth - * of the indexed field in the format::fields tree - * (fields_depth) to carry out a simultaneous parsing of - * the tuple and tree traversal to process type - * validations and field map initialization. - */ - uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame); - struct mp_frame *frames = region_alloc(region, frames_sz); - if (frames == NULL) { - diag_set(OutOfMemory, frames_sz, "region", "frames"); - goto error; - } - struct mp_stack stack; - mp_stack_create(&stack, format->fields_depth, frames); - mp_stack_push(&stack, MP_ARRAY, defined_field_count); - struct tuple_field *field; - struct json_token *parent = &format->fields.root; - while (true) { - struct mp_frame *frame = mp_stack_top(&stack); - while (!mp_frame_advance(frame)) { - /* - * If the elements of the current frame - * are over, pop this frame out of stack - * and climb one position in the - * format::fields tree to match the - * changed JSON path to the data in the - * tuple. - */ - mp_stack_pop(&stack); - if (mp_stack_is_empty(&stack)) - goto finish; - frame = mp_stack_top(&stack); - parent = parent->parent; - } - /* - * Use the top frame of the stack and the - * current data offset to prepare the JSON token - * for the subsequent format::fields lookup. - */ - struct json_token token; - switch (frame->type) { - case MP_ARRAY: - token.type = JSON_TOKEN_NUM; - token.num = frame->idx; - break; - case MP_MAP: - if (mp_typeof(*pos) != MP_STR) { - /* - * JSON path support only string - * keys for map. Skip this entry. - */ - mp_next(&pos); - mp_next(&pos); - continue; - } - token.type = JSON_TOKEN_STR; - token.str = mp_decode_str(&pos, (uint32_t *)&token.len); - break; - default: - unreachable(); - } - /* - * Perform lookup for a field in format::fields, - * that represents the field metadata by JSON path - * corresponding to the current position in the - * tuple. - */ - enum mp_type type = mp_typeof(*pos); - assert(parent != NULL); - field = json_tree_lookup_entry(&format->fields, parent, &token, - struct tuple_field, token); - if (field != NULL) { - bool is_nullable = tuple_field_is_nullable(field); - if (validate && - !field_mp_type_is_compatible(field->type, type, - is_nullable) != 0) { - diag_set(ClientError, ER_FIELD_TYPE, - tuple_field_path(field), - field_type_strs[field->type]); - goto error; - } - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) - (*field_map)[field->offset_slot] = pos - tuple; - if (required_fields != NULL) - bit_clear(required_fields, field->id); - } - /* - * If the current position of the data in tuple - * matches the container type (MP_MAP or MP_ARRAY) - * and the format::fields tree has such a record, - * prepare a new stack frame because it needs to - * be analyzed in the next iterations. - */ - if ((type == MP_ARRAY || type == MP_MAP) && - !mp_stack_is_full(&stack) && field != NULL) { - uint32_t size = type == MP_ARRAY ? - mp_decode_array(&pos) : - mp_decode_map(&pos); - mp_stack_push(&stack, type, size); - parent = &field->token; - } else { - mp_next(&pos); - } - } -finish: - /* - * Check the required field bitmap for missing fields. - */ - if (required_fields != NULL) { - struct bit_iterator it; - bit_iterator_init(&it, required_fields, - required_fields_sz, true); - size_t id = bit_iterator_next(&it); - if (id < SIZE_MAX) { - /* A field is missing, report an error. */ - field = tuple_format_field_by_id(format, id); - assert(field != NULL); - diag_set(ClientError, ER_FIELD_MISSING, - tuple_field_path(field)); - goto error; + uint32_t field_count; + struct tuple_format_iterator it; + uint8_t flags = validate ? TUPLE_FORMAT_ITERATOR_VALIDATE : 0; + if (tuple_format_iterator_create(&it, format, tuple, flags, + &field_count, region) != 0) + return -1; + struct tuple_format_iterator_entry entry; + while (tuple_format_iterator_next(&it, &entry) == 0 && + entry.data != NULL) { + if (entry.field == NULL) + continue; + if (entry.field->offset_slot != TUPLE_OFFSET_SLOT_NIL) { + (*field_map)[entry.field->offset_slot] = + entry.data - tuple; } } -out: *field_map = (uint32_t *)((char *)*field_map - *field_map_size); - return rc; -error: - rc = -1; - goto out; + return entry.data == NULL ? 0 : -1; } uint32_t @@ -1033,3 +883,179 @@ box_tuple_format_unref(box_tuple_format_t *format) tuple_format_unref(format); } +int +tuple_format_iterator_create(struct tuple_format_iterator *it, + struct tuple_format *format, const char *tuple, + uint8_t flags, uint32_t *defined_field_count, + struct region *region) +{ + assert(mp_typeof(*tuple) == MP_ARRAY); + *defined_field_count = mp_decode_array(&tuple); + bool validate = flags & TUPLE_FORMAT_ITERATOR_VALIDATE; + if (validate && format->exact_field_count > 0 && + format->exact_field_count != *defined_field_count) { + diag_set(ClientError, ER_EXACT_FIELD_COUNT, + (unsigned) *defined_field_count, + (unsigned) format->exact_field_count); + return -1; + } + it->parent = &format->fields.root; + it->format = format; + it->pos = tuple; + it->flags = flags; + it->required_fields = NULL; + it->required_fields_sz = 0; + + uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame); + if (validate) + it->required_fields_sz = bitmap_size(format->total_field_count); + uint32_t total_sz = frames_sz + 2 * it->required_fields_sz; + struct mp_frame *frames = region_alloc(region, total_sz); + if (frames == NULL) { + diag_set(OutOfMemory, total_sz, "region", + "tuple_format_iterator"); + return -1; + } + mp_stack_create(&it->stack, format->fields_depth, frames); + *defined_field_count = MIN(*defined_field_count, validate ? + tuple_format_field_count(format) : + format->index_field_count); + mp_stack_push(&it->stack, MP_ARRAY, *defined_field_count); + + if (validate) { + it->required_fields = (char *)frames + frames_sz; + memcpy(it->required_fields, format->required_fields, + it->required_fields_sz); + } + return 0; +} + +/** + * Scan required_fields bitmap and raise error when it is + * non-empty. + * @sa format:required_fields and field:multikey_required_fields + * definition. + */ +static int +tuple_format_required_fields_validate(struct tuple_format *format, + void *required_fields, + uint32_t required_fields_sz) +{ + struct bit_iterator it; + bit_iterator_init(&it, required_fields, required_fields_sz, true); + size_t id = bit_iterator_next(&it); + if (id < SIZE_MAX) { + /* A field is missing, report an error. */ + struct tuple_field *field = + tuple_format_field_by_id(format, id); + assert(field != NULL); + diag_set(ClientError, ER_FIELD_MISSING, + tuple_field_path(field)); + return -1; + } + return 0; +} + +int +tuple_format_iterator_next(struct tuple_format_iterator *it, + struct tuple_format_iterator_entry *entry) +{ + entry->data = it->pos; + struct mp_frame *frame = mp_stack_top(&it->stack); + while (!mp_frame_advance(frame)) { + /* + * If the elements of the current frame + * are over, pop this frame out of stack + * and climb one position in the format::fields + * tree to match the changed JSON path to the + * data in the tuple. + */ + mp_stack_pop(&it->stack); + if (mp_stack_is_empty(&it->stack)) + goto eof; + frame = mp_stack_top(&it->stack); + it->parent = it->parent->parent; + } + entry->parent = + it->parent != &it->format->fields.root ? + json_tree_entry(it->parent, struct tuple_field, token) : NULL; + /* + * Use the top frame of the stack and the + * current data offset to prepare the JSON token + * and perform subsequent format::fields lookup. + */ + struct json_token token; + switch (frame->type) { + case MP_ARRAY: + token.type = JSON_TOKEN_NUM; + token.num = frame->idx; + break; + case MP_MAP: + if (mp_typeof(*it->pos) != MP_STR) { + entry->data = it->pos; + entry->field = NULL; + mp_next(&it->pos); + mp_next(&it->pos); + return 0; + } + token.type = JSON_TOKEN_STR; + token.str = mp_decode_str(&it->pos, (uint32_t *)&token.len); + break; + default: + unreachable(); + } + assert(it->parent != NULL); + struct tuple_field *field = + json_tree_lookup_entry(&it->format->fields, it->parent, &token, + struct tuple_field, token); + if (it->flags & TUPLE_FORMAT_ITERATOR_KEY_PARTS_ONLY && + field != NULL && !field->is_key_part) + field = NULL; + entry->field = field; + entry->data = it->pos; + /* + * If the current position of the data in tuple + * matches the container type (MP_MAP or MP_ARRAY) + * and the format::fields tree has such a record, + * prepare a new stack frame because it needs to + * be analyzed in the next iterations. + */ + enum mp_type type = mp_typeof(*it->pos); + if ((type == MP_ARRAY || type == MP_MAP) && + !mp_stack_is_full(&it->stack) && field != NULL) { + uint32_t size = type == MP_ARRAY ? + mp_decode_array(&it->pos) : + mp_decode_map(&it->pos); + entry->count = size; + mp_stack_push(&it->stack, type, size); + it->parent = &field->token; + } else { + entry->count = 0; + mp_next(&it->pos); + } + entry->data_end = it->pos; + if (field == NULL || (it->flags & TUPLE_FORMAT_ITERATOR_VALIDATE) == 0) + return 0; + + /* + * Check if field mp_type is compatible with type + * defined in format. + */ + bool is_nullable = tuple_field_is_nullable(field); + if (!field_mp_type_is_compatible(field->type, mp_typeof(*entry->data), + is_nullable) != 0) { + diag_set(ClientError, ER_FIELD_TYPE, + tuple_field_path(field), + field_type_strs[field->type]); + return -1; + } + bit_clear(it->required_fields, field->id); + return 0; +eof: + if (it->flags & TUPLE_FORMAT_ITERATOR_VALIDATE && + tuple_format_required_fields_validate(it->format, + it->required_fields, it->required_fields_sz) != 0) + return -1; + entry->data = NULL; + return 0; +} diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index 22a0fb232..244f60266 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -412,6 +412,124 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, int tuple_format_init(); + +/** Tuple format iterator flags to configure parse mode. */ +enum { + /** + * This flag is set for iterator that should perform tuple + * validation to conform the specified format. + */ + TUPLE_FORMAT_ITERATOR_VALIDATE = 1 << 0, + /** + * This flag is set for iterator that should skip the + * tuple fields that are not marked as key_parts in + * format::fields tree. + */ + TUPLE_FORMAT_ITERATOR_KEY_PARTS_ONLY = 1 << 1, +}; + +/** + * A tuple msgpack iterator that decodes the tuple and returns + * only fields that are described in the tuple_format. + */ +struct tuple_format_iterator { + /** The current read position in msgpack. */ + const char *pos; + /** + * Tuple format is used to perform field lookups in + * format::fields JSON tree. + */ + struct tuple_format *format; + /** The combination of tuple format iterator flags. */ + uint8_t flags; + /** + * Traversal stack of msgpack frames is used to determine + * when the parsing of the current composite mp structure + * (array or map) is completed to update to the parent + * pointer accordingly. + * + * Stack has the size equal to the maximum depth of the + * indexed field in the format::fields tree + * (format::fields_depth). + */ + struct mp_stack stack; + /** + * The pointer to the parent node in the format::fields + * JSON tree. Is required for relative lookup for the + * next field. + */ + struct json_token *parent; + /** + * The size of validation bitmasks required_fields and + * multikey_required_fields. + */ + uint32_t required_fields_sz; + /** + * Field bitmap that is used for checking that all + * mandatory fields are present. + * Not NULL iff validate == true. + */ + void *required_fields; +}; + +/** Tuple format iterator next method's returning entry. */ +struct tuple_format_iterator_entry { + /** Pointer to the tuple field data. NULL if EOF. */ + const char *data; + /** + * Pointer to the end of tuple field data. + * Is defined only for leaf fields + * (json_token_is_leaf(&field->token) == true). + */ + const char *data_end; + /** + * Format field metadata that represents the data field. + * May be NULL if the field isn't present in the format. + * + * (All child entries of an array are returned present in + * the format, no matter formatted or not) + */ + struct tuple_field *field; + /** + * Format parent field metadata. NULL for top-level + * fields. + */ + struct tuple_field *parent; + /** + * Number of child entries of the analyzed field that has + * container type. Is defined for intermediate fields. + * (field->type in FIELD_TYPE_ARRAY, FIELD_TYPE_MAP). + */ + int count; +}; + +/** + * Initialize tuple decode iterator with tuple format and tuple + * data pointer. + * + * Function uses the region to allocate the traversal stack + * and required fields bitmasks. + * + * Returns 0 on success. In case of error sets diag message and + * returns -1. + */ +int +tuple_format_iterator_create(struct tuple_format_iterator *it, + struct tuple_format *format, const char *tuple, + uint8_t flags, uint32_t *field_count, + struct region *region); + +/** + * Perform tuple decode step and update iterator state. + * Update entry pointer with actual format parse context. + * + * Returns 0 on success. In case of error sets diag message and + * returns -1. + */ +int +tuple_format_iterator_next(struct tuple_format_iterator *it, + struct tuple_format_iterator_entry *entry); + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c index e1cdd293d..8d46a9eac 100644 --- a/src/box/vy_stmt.c +++ b/src/box/vy_stmt.c @@ -417,10 +417,17 @@ vy_stmt_new_surrogate_delete_raw(struct tuple_format *format, } char *field_map_begin = data + src_size; uint32_t *field_map = (uint32_t *) (data + total_size); - - const char *src_pos = src_data; - uint32_t src_count = mp_decode_array(&src_pos); - uint32_t field_count = MIN(src_count, format->index_field_count); + /* + * Perform simultaneous parsing of the tuple and + * format::fields tree traversal to copy indexed field + * data and initialize field map. + */ + uint32_t field_count; + struct tuple_format_iterator it; + if (tuple_format_iterator_create(&it, format, src_data, + TUPLE_FORMAT_ITERATOR_KEY_PARTS_ONLY, &field_count, + region) != 0) + goto out; /* * Nullify field map to be able to detect by 0, which key * fields are absent in tuple_field(). @@ -428,85 +435,41 @@ vy_stmt_new_surrogate_delete_raw(struct tuple_format *format, memset((char *)field_map - format->field_map_size, 0, format->field_map_size); char *pos = mp_encode_array(data, field_count); - /* - * Perform simultaneous parsing of the tuple and - * format::fields tree traversal to copy indexed field - * data and initialize field map. In many details the code - * above works like tuple_field_map_create, read it's - * comments for more details. - */ - uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame); - struct mp_frame *frames = region_alloc(region, frames_sz); - if (frames == NULL) { - diag_set(OutOfMemory, frames_sz, "region", "frames"); - goto out; - } - struct mp_stack stack; - mp_stack_create(&stack, format->fields_depth, frames); - mp_stack_push(&stack, MP_ARRAY, field_count); - struct tuple_field *field; - struct json_token *parent = &format->fields.root; - while (true) { - struct mp_frame *frame = mp_stack_top(&stack); - while (!mp_frame_advance(frame)) { - mp_stack_pop(&stack); - if (mp_stack_is_empty(&stack)) - goto finish; - frame = mp_stack_top(&stack); - parent = parent->parent; - } - struct json_token token; - switch (frame->type) { - case MP_ARRAY: - token.type = JSON_TOKEN_NUM; - token.num = frame->idx; - break; - case MP_MAP: - if (mp_typeof(*src_pos) != MP_STR) { - mp_next(&src_pos); - mp_next(&src_pos); - pos = mp_encode_nil(pos); - pos = mp_encode_nil(pos); - continue; - } - token.type = JSON_TOKEN_STR; - token.str = mp_decode_str(&src_pos, (uint32_t *)&token.len); - pos = mp_encode_str(pos, token.str, token.len); - break; - default: - unreachable(); - } - assert(parent != NULL); - field = json_tree_lookup_entry(&format->fields, parent, &token, - struct tuple_field, token); - if (field == NULL || !field->is_key_part) { - mp_next(&src_pos); + + struct tuple_format_iterator_entry entry; + while (tuple_format_iterator_next(&it, &entry) == 0 && + entry.data != NULL) { + if (entry.field == NULL) { + /* + * Instead of copying useless data having + * no representation in tuple_format, + * write nil. + */ pos = mp_encode_nil(pos); + if (entry.parent != NULL && + entry.parent->type == FIELD_TYPE_MAP) + pos = mp_encode_nil(pos); continue; } - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) - field_map[field->offset_slot] = pos - data; - enum mp_type type = mp_typeof(*src_pos); - if ((type == MP_ARRAY || type == MP_MAP) && - !mp_stack_is_full(&stack)) { - uint32_t size; - if (type == MP_ARRAY) { - size = mp_decode_array(&src_pos); - pos = mp_encode_array(pos, size); - } else { - size = mp_decode_map(&src_pos); - pos = mp_encode_map(pos, size); - } - mp_stack_push(&stack, type, size); - parent = &field->token; + if (entry.field->token.type == JSON_TOKEN_STR) { + pos = mp_encode_str(pos, entry.field->token.str, + entry.field->token.len); + } + /* Initialize field_map with data offset. */ + if (entry.field->offset_slot != TUPLE_OFFSET_SLOT_NIL) + field_map[entry.field->offset_slot] = pos - data; + /* Copy field data. */ + if (entry.field->type == FIELD_TYPE_ARRAY) { + pos = mp_encode_array(pos, entry.count); + } else if (entry.field->type == FIELD_TYPE_MAP) { + pos = mp_encode_map(pos, entry.count); } else { - const char *src_field = src_pos; - mp_next(&src_pos); - memcpy(pos, src_field, src_pos - src_field); - pos += src_pos - src_field; + memcpy(pos, entry.data, entry.data_end - entry.data); + pos += entry.data_end - entry.data; } } -finish: + if (entry.data != NULL) + goto out; assert(pos <= data + src_size); uint32_t bsize = pos - data; stmt = vy_stmt_alloc(format, bsize); diff --git a/test/engine/json.result b/test/engine/json.result index 09c704963..84b1309e1 100644 --- a/test/engine/json.result +++ b/test/engine/json.result @@ -702,6 +702,23 @@ s:replace({4, {"d1", name='D1'}, "test"}) --- - [4, {1: 'd1', 'name': 'D1'}, 'test'] ... +idx0:drop() +--- +... +s:truncate() +--- +... +idx0 = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}}) +--- +... +s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}}) +--- +- [5, [1, 1, 1], [{'fname': 'A', 'sname': 'B'}, {'fname': 'C', 'sname': 'D'}, {'fname': 'A', + 'sname': 'B'}]] +... +s:delete(5) +--- +... s:drop() --- ... diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua index 5c235e1ba..e864ec14d 100644 --- a/test/engine/json.test.lua +++ b/test/engine/json.test.lua @@ -200,6 +200,11 @@ pk = s:create_index('pk', {parts={{1, 'int'}}}) idx0 = s:create_index('idx0', {parts = {{2, 'str', path = 'name'}, {3, "str"}}}) s:insert({4, {"d", name='D'}, "test"}) s:replace({4, {"d1", name='D1'}, "test"}) +idx0:drop() +s:truncate() +idx0 = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}}) +s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}}) +s:delete(5) s:drop() -- -- 2.21.0