From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v3 5/7] box: introduce tuple_parse_iterator class Date: Tue, 2 Apr 2019 18:49:36 +0300 Message-Id: <5d9690693bcae1856064b5eda976b56ee1137b37.1554218695.git.kshcherbatov@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: Kirill Shcherbatov List-ID: The similar code in tuple_field_map_create and vy_stmt_new_surrogate_delete_raw that performs tuple parsing in deep has been refactored as reusable tuple_parse_iterator class. Being thus encapsulated, this code will be uniformly managed and extended in the further patches in scope of multikey indexes. Needed for #1257 --- src/box/tuple_format.c | 222 +++++++++++++++++++++++------------------ src/box/tuple_format.h | 65 ++++++++++++ src/box/vy_stmt.c | 93 ++++++----------- 3 files changed, 218 insertions(+), 162 deletions(-) diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 1043707ad..070897ec2 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -831,109 +831,34 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, * which key fields are absent in tuple_field(). */ memset((char *)*field_map - *field_map_size, 0, *field_map_size); - /* - * Prepare mp stack of the size equal to the maximum depth - * of the indexed field in the format::fields tree - * (fields_depth) to carry out a simultaneous parsing of - * the tuple and tree traversal to process type - * validations and field map initialization. - */ - uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame); - struct mp_frame *frames = region_alloc(region, frames_sz); - if (frames == NULL) { - diag_set(OutOfMemory, frames_sz, "region", "frames"); + struct tuple_parse_iterator it; + if (tuple_parse_iterator_create(&it, format, pos, defined_field_count, + region) != 0) goto error; - } - struct mp_stack stack; - mp_stack_create(&stack, format->fields_depth, frames); - mp_stack_push(&stack, MP_ARRAY, defined_field_count); + const char *pos_end; struct tuple_field *field; - struct json_token *parent = &format->fields.root; - while (true) { - int idx; - while ((idx = mp_stack_advance(&stack)) == -1) { - /* - * If the elements of the current frame - * are over, pop this frame out of stack - * and climb one position in the - * format::fields tree to match the - * changed JSON path to the data in the - * tuple. - */ - mp_stack_pop(&stack); - if (mp_stack_is_empty(&stack)) - goto finish; - parent = parent->parent; - } - /* - * Use the top frame of the stack and the - * current data offset to prepare the JSON token - * for the subsequent format::fields lookup. - */ - struct json_token token; - switch (mp_stack_type(&stack)) { - case MP_ARRAY: - token.type = JSON_TOKEN_NUM; - token.num = idx; - break; - case MP_MAP: - if (mp_typeof(*pos) != MP_STR) { - /* - * JSON path support only string - * keys for map. Skip this entry. - */ - mp_next(&pos); - mp_next(&pos); - continue; - } - token.type = JSON_TOKEN_STR; - token.str = mp_decode_str(&pos, (uint32_t *)&token.len); - break; - default: - unreachable(); - } - /* - * Perform lookup for a field in format::fields, - * that represents the field metadata by JSON path - * corresponding to the current position in the - * tuple. - */ - enum mp_type type = mp_typeof(*pos); - assert(parent != NULL); - field = json_tree_lookup_entry(&format->fields, parent, &token, - struct tuple_field, token); - if (field != NULL) { - bool is_nullable = tuple_field_is_nullable(field); - if (validate && - !field_mp_type_is_compatible(field->type, type, - is_nullable) != 0) { - diag_set(ClientError, ER_FIELD_TYPE, - tuple_field_path(field), - field_type_strs[field->type]); - goto error; - } - if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) - (*field_map)[field->offset_slot] = pos - tuple; - if (required_fields != NULL) - bit_clear(required_fields, field->id); - } + while (tuple_parse_iterator_advice(&it, &field, &pos, &pos_end) > 0) { + if (field == NULL) + continue; /* - * If the current position of the data in tuple - * matches the container type (MP_MAP or MP_ARRAY) - * and the format::fields tree has such a record, - * prepare a new stack frame because it needs to - * be analyzed in the next iterations. + * Check if field mp_type is compatible with type + * defined in format. */ - if ((type == MP_ARRAY || type == MP_MAP) && - !mp_stack_is_full(&stack) && field != NULL) { - uint32_t size = type == MP_ARRAY ? - mp_decode_array(&pos) : - mp_decode_map(&pos); - mp_stack_push(&stack, type, size); - parent = &field->token; - } else { - mp_next(&pos); + bool is_nullable = tuple_field_is_nullable(field); + if (validate && + !field_mp_type_is_compatible(field->type, mp_typeof(*pos), + is_nullable) != 0) { + diag_set(ClientError, ER_FIELD_TYPE, + tuple_field_path(field), + field_type_strs[field->type]); + goto error; } + /* Initialize field_map with data offset. */ + if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) + (*field_map)[field->offset_slot] = pos - tuple; + /* Mark this field as present in the tuple. */ + if (required_fields != NULL) + bit_clear(required_fields, field->id); } finish: /* @@ -1029,3 +954,104 @@ box_tuple_format_unref(box_tuple_format_t *format) tuple_format_unref(format); } +int +tuple_parse_iterator_create(struct tuple_parse_iterator *it, + struct tuple_format *format, const char *data, + uint32_t field_count, struct region *region) +{ + /* + * Prepare mp stack of the size equal to the maximum depth + * of the indexed field in the format::fields tree + * (fields_depth) to carry out a simultaneous parsing of + * the tuple and tree traversal. + */ + uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame); + struct mp_frame *frames = region_alloc(region, frames_sz); + if (frames == NULL) { + diag_set(OutOfMemory, frames_sz, "region", "frames"); + return -1; + } + mp_stack_create(&it->stack, format->fields_depth, frames); + mp_stack_push(&it->stack, MP_ARRAY, field_count); + it->parent = &format->fields.root; + it->format = format; + it->pos = data; + return 0; +} + +int +tuple_parse_iterator_advice(struct tuple_parse_iterator *it, + struct tuple_field **field, const char **data, + const char **data_end) +{ + int idx, rc = 0; + while ((idx = mp_stack_advance(&it->stack)) == -1) { + /* + * If the elements of the current frame + * are over, pop this frame out of stack + * and climb one position in the format::fields + * tree to match the changed JSON path to the + * data in the tuple. + */ + mp_stack_pop(&it->stack); + if (mp_stack_is_empty(&it->stack)) + return rc; + it->parent = it->parent->parent; + } + /* + * Use the top frame of the stack and the + * current data offset to prepare the JSON token + * for the subsequent format::fields lookup. + */ + struct json_token token; + switch (mp_stack_type(&it->stack)) { + case MP_ARRAY: + rc = 1; + token.type = JSON_TOKEN_NUM; + token.num = idx; + break; + case MP_MAP: + rc = 2; + if (mp_typeof(*it->pos) != MP_STR) { + mp_next(&it->pos); + mp_next(&it->pos); + *field = NULL; + return rc; + } + token.type = JSON_TOKEN_STR; + token.str = mp_decode_str(&it->pos, (uint32_t *)&token.len); + break; + default: + unreachable(); + } + /* + * Perform lookup for a field in format::fields, + * that represents the field metadata by JSON path + * corresponding to the current position in the + * tuple. + */ + assert(it->parent != NULL); + *field = json_tree_lookup_entry(&it->format->fields, it->parent, &token, + struct tuple_field, token); + *data = it->pos; + /* + * If the current position of the data in tuple + * matches the container type (MP_MAP or MP_ARRAY) + * and the format::fields tree has such a record, + * prepare a new stack frame because it needs to + * be analyzed in the next iterations. + */ + enum mp_type type = mp_typeof(*it->pos); + if ((type == MP_ARRAY || type == MP_MAP) && + !mp_stack_is_full(&it->stack) && *field != NULL) { + uint32_t size = type == MP_ARRAY ? + mp_decode_array(&it->pos) : + mp_decode_map(&it->pos); + mp_stack_push(&it->stack, type, size); + it->parent = &(*field)->token; + } else { + mp_next(&it->pos); + } + *data_end = it->pos; + return rc; +} diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index 22a0fb232..bef1d0903 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -412,6 +412,71 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple, int tuple_format_init(); +/** + * A tuple msgpack iterator that parse tuple in deep an returns + * only fields that are described in the tuple_format. + */ +struct tuple_parse_iterator { + /** + * Tuple format is used to perform field lookups in + * format::fields JSON tree. + */ + struct tuple_format *format; + /** + * The pointer to the parent node in the format::fields + * JSON tree. Is required for relative lookup for the + * next field. + */ + struct json_token *parent; + /** + * Traversal stack of msgpack frames is used to determine + * when the parsing of the current composite mp structure + * (array or map) is completed to update to the parent + * pointer accordingly. + */ + struct mp_stack stack; + /** The current read position in msgpack. */ + const char *pos; +}; + +/** + * Initialize tuple parse iterator with tuple format, data pointer + * and the count of top-level msgpack fields to be processed. + * + * This function assumes that the msgpack header containing the + * number of top-level msgpack fields (field_count) has already + * been parsed and the data pointer has already been shifted + * correspondingly. This allows directly limit the number of + * fields that must be parsed. + + * Function uses the region for the traversal stack allocation. + * + * Returns 0 on success. In case of memory allocation error sets + * diag message and returns -1. + */ +int +tuple_parse_iterator_create(struct tuple_parse_iterator *it, + struct tuple_format *format, const char *data, + uint32_t field_count, struct region *region); + +/** + * Parse tuple in deep and update iterator state. + * + * Returns the number of fields at the current tuple nesting + * level that have been processed (2 for map item, 1 for array + * key:value pair, 0 on stop) and initializes: + * field - the tuple_field pointer to format::fields field + * that matches to the currently processed msgpack field + * (when exists), + * data - the pointer to the currently processed msgpack field, + * data_end - the pointer to the end of currently processed + * msgpack field. + */ +int +tuple_parse_iterator_advice(struct tuple_parse_iterator *it, + struct tuple_field **field, const char **data, + const char **data_end); + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c index add86622b..1e8bb7825 100644 --- a/src/box/vy_stmt.c +++ b/src/box/vy_stmt.c @@ -431,81 +431,46 @@ vy_stmt_new_surrogate_delete_raw(struct tuple_format *format, /* * Perform simultaneous parsing of the tuple and * format::fields tree traversal to copy indexed field - * data and initialize field map. In many details the code - * above works like tuple_field_map_create, read it's - * comments for more details. + * data and initialize field map. */ - uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame); - struct mp_frame *frames = region_alloc(region, frames_sz); - if (frames == NULL) { - diag_set(OutOfMemory, frames_sz, "region", "frames"); + struct tuple_parse_iterator it; + if (tuple_parse_iterator_create(&it, format, src_pos, field_count, + region) != 0) goto out; - } - struct mp_stack stack; - mp_stack_create(&stack, format->fields_depth, frames); - mp_stack_push(&stack, MP_ARRAY, field_count); + int rc; + const char *src_pos_end; struct tuple_field *field; - struct json_token *parent = &format->fields.root; - while (true) { - int idx; - while ((idx = mp_stack_advance(&stack)) == -1) { - mp_stack_pop(&stack); - if (mp_stack_is_empty(&stack)) - goto finish; - parent = parent->parent; - } - struct json_token token; - switch (mp_stack_type(&stack)) { - case MP_ARRAY: - token.type = JSON_TOKEN_NUM; - token.num = idx; - break; - case MP_MAP: - if (mp_typeof(*src_pos) != MP_STR) { - mp_next(&src_pos); - mp_next(&src_pos); - pos = mp_encode_nil(pos); - pos = mp_encode_nil(pos); - continue; - } - token.type = JSON_TOKEN_STR; - token.str = mp_decode_str(&src_pos, (uint32_t *)&token.len); - pos = mp_encode_str(pos, token.str, token.len); - break; - default: - unreachable(); - } - assert(parent != NULL); - field = json_tree_lookup_entry(&format->fields, parent, &token, - struct tuple_field, token); + while ((rc = tuple_parse_iterator_advice(&it, &field, &src_pos, + &src_pos_end)) > 0) { if (field == NULL || !field->is_key_part) { - mp_next(&src_pos); - pos = mp_encode_nil(pos); + /* + * Instead of copying useless data having + * no representation in tuple_format, + * write nil. + */ + while (rc-- > 0) + pos = mp_encode_nil(pos); continue; } + if (field->token.type == JSON_TOKEN_STR) { + assert(rc-- == 2); + pos = mp_encode_str(pos, field->token.str, + field->token.len); + } + assert(rc == 1); + /* Initialize field_map with data offset. */ if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL) field_map[field->offset_slot] = pos - data; - enum mp_type type = mp_typeof(*src_pos); - if ((type == MP_ARRAY || type == MP_MAP) && - !mp_stack_is_full(&stack)) { - uint32_t size; - if (type == MP_ARRAY) { - size = mp_decode_array(&src_pos); - pos = mp_encode_array(pos, size); - } else { - size = mp_decode_map(&src_pos); - pos = mp_encode_map(pos, size); - } - mp_stack_push(&stack, type, size); - parent = &field->token; + /* Copy field data. */ + if (field->type == FIELD_TYPE_ARRAY) { + pos = mp_encode_array(pos, mp_decode_array(&src_pos)); + } else if (field->type == FIELD_TYPE_MAP) { + pos = mp_encode_map(pos, mp_decode_map(&src_pos)); } else { - const char *src_field = src_pos; - mp_next(&src_pos); - memcpy(pos, src_field, src_pos - src_field); - pos += src_pos - src_field; + memcpy(pos, src_pos, src_pos_end - src_pos); + pos += src_pos_end - src_pos; } } -finish: assert(pos <= data + src_size); uint32_t bsize = pos - data; stmt = vy_stmt_alloc(format, bsize); -- 2.21.0