From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v4 1/3] box: introduce tuple_parse_iterator class Date: Fri, 19 Apr 2019 17:14:23 +0300 Message-Id: <57c7d98b69f64abdb1bec67aa837e6c2245f02c2.1555682707.git.kshcherbatov@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: kostja@tarantool.org, Kirill Shcherbatov List-ID: The similar code in tuple_field_map_create and vy_stmt_new_surrogate_delete_raw that performs tuple decode with tuple_format has been refactored as reusable tuple_format_iterator class. Being thus encapsulated, this code will be uniformly managed and extended in the further patches in scope of multikey indexes. Extended engine/json test with vy_stmt_new_surrogate_delete_raw corner case test. There was no problem before this patch, but small bug appeared during tuple_format_iterator_next implementation was not covered. Needed for #1257 --- src/box/tuple_format.c | 161 ++++++++++++++------------------------ src/box/tuple_format.h | 140 +++++++++++++++++++++++++++++++++ src/box/vy_stmt.c | 110 +++++++++----------------- test/engine/json.result | 17 ++++ test/engine/json.test.lua | 5 ++ 5 files changed, 259 insertions(+), 174 deletions(-) diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 804a678a1..3c56f3703 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -793,8 +793,13 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, const char *pos = tuple; int rc = 0; + struct tuple_format_iterator it; + if (tuple_format_iterator_create(&it, format, tuple, region) != 0) + goto error; + /* Check to see if the tuple has a sufficient number of fields. */ - uint32_t field_count = mp_decode_array(&pos); + uint32_t field_count = !mp_stack_is_empty(&it.stack) ? + mp_stack_top(&it.stack)->count : 0; if (validate && format->exact_field_count > 0 && format->exact_field_count != field_count) { diag_set(ClientError, ER_EXACT_FIELD_COUNT, @@ -829,115 +834,38 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, uint32_t defined_field_count = MIN(field_count, validate ? tuple_format_field_count(format) : format->index_field_count); + mp_stack_top(&it.stack)->count = defined_field_count; + /* * Nullify field map to be able to detect by 0, * which key fields are absent in tuple_field(). */ memset((char *)*field_map - *field_map_size, 0, *field_map_size); - /* - * Prepare mp stack of the size equal to the maximum depth - * of the indexed field in the format::fields tree - * (fields_depth) to carry out a simultaneous parsing of - * the tuple and tree traversal to process type - * validations and field map initialization. - */ - uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame); - struct mp_frame *frames = region_alloc(region, frames_sz); - if (frames == NULL) { - diag_set(OutOfMemory, frames_sz, "region", "frames"); - goto error; - } - struct mp_stack stack; - mp_stack_create(&stack, format->fields_depth, frames); - mp_stack_push(&stack, MP_ARRAY, defined_field_count); + const char *pos_end; struct tuple_field *field; - struct json_token *parent = &format->fields.root; - while (true) { - struct mp_frame *frame = mp_stack_top(&stack); - while (!mp_frame_advance(frame)) { - /* - * If the elements of the current frame - * are over, pop this frame out of stack - * and climb one position in the - * format::fields tree to match the - * changed JSON path to the data in the - * tuple. - */ - mp_stack_pop(&stack); - if (mp_stack_is_empty(&stack)) - goto finish; - frame = mp_stack_top(&stack); - parent = parent->parent; - } - /* - * Use the top frame of the stack and the - * current data offset to prepare the JSON token - * for the subsequent format::fields lookup. - */ - struct json_token token; - switch (frame->type) { - case MP_ARRAY: - token.type = JSON_TOKEN_NUM; - token.num = frame->idx; - break; - case MP_MAP: - if (mp_typeof(*pos) != MP_STR) { - /* - * JSON path support only string - * keys for map. Skip this entry. - */ - mp_next(&pos); - mp_next(&pos); - continue; - } - token.type = JSON_TOKEN_STR; - token.str = mp_decode_str(&pos, (uint32_t *)&token.len); - break; - default: - unreachable(); - } - /* - * Perform lookup for a field in format::fields, - * that represents the field metadata by JSON path - * corresponding to the current position in the - * tuple. - */ - enum mp_type type = mp_typeof(*pos); - assert(parent != NULL); - field = json_tree_lookup_entry(&format->fields, parent, &token, - struct tuple_field, token); - if (field != NULL) { - bool is_nullable = tuple_field_is_nullable(field); - if (validate && - !field_mp_type_is_compatible(field->type, type, - is_nullable) != 0) { - diag_set(ClientError, ER_FIELD_TYPE, - tuple_field_path(field), - field_type_strs[field->type]); - goto error; - } - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) - (*field_map)[field->offset_slot] = pos - tuple; - if (required_fields != NULL) - bit_clear(required_fields, field->id); - } + while (tuple_format_iterator_next(&it, false, &field, &pos, + &pos_end) != TUPLE_FORMAT_ITERATOR_STOP) { + if (field == NULL) + continue; /* - * If the current position of the data in tuple - * matches the container type (MP_MAP or MP_ARRAY) - * and the format::fields tree has such a record, - * prepare a new stack frame because it needs to - * be analyzed in the next iterations. + * Check if field mp_type is compatible with type + * defined in format. */ - if ((type == MP_ARRAY || type == MP_MAP) && - !mp_stack_is_full(&stack) && field != NULL) { - uint32_t size = type == MP_ARRAY ? - mp_decode_array(&pos) : - mp_decode_map(&pos); - mp_stack_push(&stack, type, size); - parent = &field->token; - } else { - mp_next(&pos); + bool is_nullable = tuple_field_is_nullable(field); + if (validate && + !field_mp_type_is_compatible(field->type, mp_typeof(*pos), + is_nullable) != 0) { + diag_set(ClientError, ER_FIELD_TYPE, + tuple_field_path(field), + field_type_strs[field->type]); + goto error; } + /* Initialize field_map with data offset. */ + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) + (*field_map)[field->offset_slot] = pos - tuple; + /* Mark this field as present in the tuple. */ + if (required_fields != NULL) + bit_clear(required_fields, field->id); } finish: /* @@ -1033,3 +961,34 @@ box_tuple_format_unref(box_tuple_format_t *format) tuple_format_unref(format); } +int +tuple_format_iterator_create(struct tuple_format_iterator *it, + struct tuple_format *format, const char *tuple, + struct region *region) +{ + assert(mp_typeof(*tuple) == MP_ARRAY); + const char *field = tuple; + uint32_t field_count = mp_decode_array(&field); + it->parent = &format->fields.root; + it->format = format; + it->pos = field; + if (field_count == 0) { + mp_stack_create(&it->stack, 0, NULL); + return 0; + } + /* + * Prepare mp stack of the size equal to the maximum depth + * of the indexed field in the format::fields tree + * (fields_depth) to carry out a simultaneous parsing of + * the tuple and tree traversal. + */ + uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame); + struct mp_frame *frames = region_alloc(region, frames_sz); + if (frames == NULL) { + diag_set(OutOfMemory, frames_sz, "region", "frames"); + return -1; + } + mp_stack_create(&it->stack, format->fields_depth, frames); + mp_stack_push(&it->stack, MP_ARRAY, field_count); + return 0; +} diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index 22a0fb232..15bc4ef09 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -412,6 +412,146 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, int tuple_format_init(); +/** + * A tuple msgpack iterator that decodes the tuple and returns + * only fields that are described in the tuple_format. + */ +struct tuple_format_iterator { + /** + * Tuple format is used to perform field lookups in + * format::fields JSON tree. + */ + struct tuple_format *format; + /** + * The pointer to the parent node in the format::fields + * JSON tree. Is required for relative lookup for the + * next field. + */ + struct json_token *parent; + /** + * Traversal stack of msgpack frames is used to determine + * when the parsing of the current composite mp structure + * (array or map) is completed to update to the parent + * pointer accordingly. + */ + struct mp_stack stack; + /** The current read position in msgpack. */ + const char *pos; +}; + +/** + * Initialize tuple decode iterator with tuple format and tuple + * data pointer. + * + * Function uses the region for the traversal stack allocation. + * + * Returns 0 on success. In case of memory allocation error sets + * diag message and returns -1. + */ +int +tuple_format_iterator_create(struct tuple_format_iterator *it, + struct tuple_format *format, const char *tuple, + struct region *region); + +/** The returning state of tuple_format_iterator_next. */ +enum tuple_format_iterator_status { + TUPLE_FORMAT_ITERATOR_STOP, + TUPLE_FORMAT_ITERATOR_NEXT, +}; + +/** + * Perform tuple decode step and update iterator state. + * + * Returns true when decode step succeeded and initialize: + * field - the tuple_field pointer to format::fields field + * that matches to the currently processed msgpack field + * (when exists), + * key_parts_only - the flag is set true allows to skip format + * fields that are not parts of some index. + * data - the pointer to the currently processed msgpack field, + * data_end - the pointer to the end of currently processed + * msgpack field(in case of MP_MAP or MP_ARRAY that + * is described in format this is the end of field + * header). + */ +static inline enum tuple_format_iterator_status +tuple_format_iterator_next(struct tuple_format_iterator *it, bool key_parts_only, + struct tuple_field **field, const char **data, + const char **data_end) +{ + struct mp_frame *frame = mp_stack_top(&it->stack); + while (!mp_frame_advance(frame)) { + /* + * If the elements of the current frame + * are over, pop this frame out of stack + * and climb one position in the format::fields + * tree to match the changed JSON path to the + * data in the tuple. + */ + mp_stack_pop(&it->stack); + if (mp_stack_is_empty(&it->stack)) + return TUPLE_FORMAT_ITERATOR_STOP; + frame = mp_stack_top(&it->stack); + it->parent = it->parent->parent; + } + /* + * Use the top frame of the stack and the + * current data offset to prepare the JSON token + * for the subsequent format::fields lookup. + */ + struct json_token token; + switch (frame->type) { + case MP_ARRAY: + token.type = JSON_TOKEN_NUM; + token.num = frame->idx; + break; + case MP_MAP: + if (mp_typeof(*it->pos) != MP_STR) { + mp_next(&it->pos); + mp_next(&it->pos); + *field = NULL; + return TUPLE_FORMAT_ITERATOR_NEXT; + } + token.type = JSON_TOKEN_STR; + token.str = mp_decode_str(&it->pos, (uint32_t *)&token.len); + break; + default: + unreachable(); + } + /* + * Perform lookup for a field in format::fields, + * that represents the field metadata by JSON path + * corresponding to the current position in the + * tuple. + */ + assert(it->parent != NULL); + *field = json_tree_lookup_entry(&it->format->fields, it->parent, &token, + struct tuple_field, token); + if (key_parts_only && *field != NULL && !(*field)->is_key_part) + *field = NULL; + *data = it->pos; + /* + * If the current position of the data in tuple + * matches the container type (MP_MAP or MP_ARRAY) + * and the format::fields tree has such a record, + * prepare a new stack frame because it needs to + * be analyzed in the next iterations. + */ + enum mp_type type = mp_typeof(*it->pos); + if ((type == MP_ARRAY || type == MP_MAP) && + !mp_stack_is_full(&it->stack) && *field != NULL) { + uint32_t size = type == MP_ARRAY ? + mp_decode_array(&it->pos) : + mp_decode_map(&it->pos); + mp_stack_push(&it->stack, type, size); + it->parent = &(*field)->token; + } else { + mp_next(&it->pos); + } + *data_end = it->pos; + return TUPLE_FORMAT_ITERATOR_NEXT; +} + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c index e1cdd293d..9155ba50c 100644 --- a/src/box/vy_stmt.c +++ b/src/box/vy_stmt.c @@ -417,10 +417,19 @@ vy_stmt_new_surrogate_delete_raw(struct tuple_format *format, } char *field_map_begin = data + src_size; uint32_t *field_map = (uint32_t *) (data + total_size); - + /* + * Perform simultaneous parsing of the tuple and + * format::fields tree traversal to copy indexed field + * data and initialize field map. + */ + struct tuple_format_iterator it; const char *src_pos = src_data; - uint32_t src_count = mp_decode_array(&src_pos); - uint32_t field_count = MIN(src_count, format->index_field_count); + if (tuple_format_iterator_create(&it, format, src_pos, region) != 0) + goto out; + + uint32_t field_count = MIN((uint32_t)mp_stack_top(&it.stack)->count, + format->index_field_count); + mp_stack_top(&it.stack)->count = field_count; /* * Nullify field map to be able to detect by 0, which key * fields are absent in tuple_field(). @@ -428,85 +437,40 @@ vy_stmt_new_surrogate_delete_raw(struct tuple_format *format, memset((char *)field_map - format->field_map_size, 0, format->field_map_size); char *pos = mp_encode_array(data, field_count); - /* - * Perform simultaneous parsing of the tuple and - * format::fields tree traversal to copy indexed field - * data and initialize field map. In many details the code - * above works like tuple_field_map_create, read it's - * comments for more details. - */ - uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame); - struct mp_frame *frames = region_alloc(region, frames_sz); - if (frames == NULL) { - diag_set(OutOfMemory, frames_sz, "region", "frames"); - goto out; - } - struct mp_stack stack; - mp_stack_create(&stack, format->fields_depth, frames); - mp_stack_push(&stack, MP_ARRAY, field_count); + const char *src_pos_end; struct tuple_field *field; - struct json_token *parent = &format->fields.root; - while (true) { - struct mp_frame *frame = mp_stack_top(&stack); - while (!mp_frame_advance(frame)) { - mp_stack_pop(&stack); - if (mp_stack_is_empty(&stack)) - goto finish; - frame = mp_stack_top(&stack); - parent = parent->parent; - } - struct json_token token; - switch (frame->type) { - case MP_ARRAY: - token.type = JSON_TOKEN_NUM; - token.num = frame->idx; - break; - case MP_MAP: - if (mp_typeof(*src_pos) != MP_STR) { - mp_next(&src_pos); - mp_next(&src_pos); - pos = mp_encode_nil(pos); - pos = mp_encode_nil(pos); - continue; - } - token.type = JSON_TOKEN_STR; - token.str = mp_decode_str(&src_pos, (uint32_t *)&token.len); - pos = mp_encode_str(pos, token.str, token.len); - break; - default: - unreachable(); - } - assert(parent != NULL); - field = json_tree_lookup_entry(&format->fields, parent, &token, - struct tuple_field, token); - if (field == NULL || !field->is_key_part) { - mp_next(&src_pos); + while (tuple_format_iterator_next(&it, true, &field, &src_pos, + &src_pos_end) != TUPLE_FORMAT_ITERATOR_STOP) { + struct mp_frame *frame = mp_stack_top(&it.stack); + if (field == NULL) { + /* + * Instead of copying useless data having + * no representation in tuple_format, + * write nil. + */ pos = mp_encode_nil(pos); + if (frame->type == MP_MAP) + pos = mp_encode_nil(pos); continue; } + if (field->token.type == JSON_TOKEN_STR) { + assert(frame->type == MP_MAP); + pos = mp_encode_str(pos, field->token.str, + field->token.len); + } + /* Initialize field_map with data offset. */ if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) field_map[field->offset_slot] = pos - data; - enum mp_type type = mp_typeof(*src_pos); - if ((type == MP_ARRAY || type == MP_MAP) && - !mp_stack_is_full(&stack)) { - uint32_t size; - if (type == MP_ARRAY) { - size = mp_decode_array(&src_pos); - pos = mp_encode_array(pos, size); - } else { - size = mp_decode_map(&src_pos); - pos = mp_encode_map(pos, size); - } - mp_stack_push(&stack, type, size); - parent = &field->token; + /* Copy field data. */ + if (field->type == FIELD_TYPE_ARRAY) { + pos = mp_encode_array(pos, frame->count); + } else if (field->type == FIELD_TYPE_MAP) { + pos = mp_encode_map(pos, frame->count); } else { - const char *src_field = src_pos; - mp_next(&src_pos); - memcpy(pos, src_field, src_pos - src_field); - pos += src_pos - src_field; + memcpy(pos, src_pos, src_pos_end - src_pos); + pos += src_pos_end - src_pos; } } -finish: assert(pos <= data + src_size); uint32_t bsize = pos - data; stmt = vy_stmt_alloc(format, bsize); diff --git a/test/engine/json.result b/test/engine/json.result index 09c704963..84b1309e1 100644 --- a/test/engine/json.result +++ b/test/engine/json.result @@ -702,6 +702,23 @@ s:replace({4, {"d1", name='D1'}, "test"}) --- - [4, {1: 'd1', 'name': 'D1'}, 'test'] ... +idx0:drop() +--- +... +s:truncate() +--- +... +idx0 = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}}) +--- +... +s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}}) +--- +- [5, [1, 1, 1], [{'fname': 'A', 'sname': 'B'}, {'fname': 'C', 'sname': 'D'}, {'fname': 'A', + 'sname': 'B'}]] +... +s:delete(5) +--- +... s:drop() --- ... diff --git a/test/engine/json.test.lua b/test/engine/json.test.lua index 5c235e1ba..e864ec14d 100644 --- a/test/engine/json.test.lua +++ b/test/engine/json.test.lua @@ -200,6 +200,11 @@ pk = s:create_index('pk', {parts={{1, 'int'}}}) idx0 = s:create_index('idx0', {parts = {{2, 'str', path = 'name'}, {3, "str"}}}) s:insert({4, {"d", name='D'}, "test"}) s:replace({4, {"d1", name='D1'}, "test"}) +idx0:drop() +s:truncate() +idx0 = s:create_index('idx2', {parts = {{3, 'str', path = '[1].fname'}, {3, 'str', path = '[1].sname'}}}) +s:insert({5, {1, 1, 1}, {{fname='A', sname='B'}, {fname='C', sname='D'}, {fname='A', sname='B'}}}) +s:delete(5) s:drop() -- -- 2.21.0