[PATCH v3 5/7] box: introduce tuple_parse_iterator class
Kirill Shcherbatov
kshcherbatov at tarantool.org
Tue Apr 2 18:49:36 MSK 2019
The similar code in tuple_field_map_create and
vy_stmt_new_surrogate_delete_raw that performs tuple parsing in
deep has been refactored as reusable tuple_parse_iterator class.
Being thus encapsulated, this code will be uniformly managed and
extended in the further patches in scope of multikey indexes.
Needed for #1257
---
src/box/tuple_format.c | 222 +++++++++++++++++++++++------------------
src/box/tuple_format.h | 65 ++++++++++++
src/box/vy_stmt.c | 93 ++++++-----------
3 files changed, 218 insertions(+), 162 deletions(-)
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 1043707ad..070897ec2 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -831,109 +831,34 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
* which key fields are absent in tuple_field().
*/
memset((char *)*field_map - *field_map_size, 0, *field_map_size);
- /*
- * Prepare mp stack of the size equal to the maximum depth
- * of the indexed field in the format::fields tree
- * (fields_depth) to carry out a simultaneous parsing of
- * the tuple and tree traversal to process type
- * validations and field map initialization.
- */
- uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
- struct mp_frame *frames = region_alloc(region, frames_sz);
- if (frames == NULL) {
- diag_set(OutOfMemory, frames_sz, "region", "frames");
+ struct tuple_parse_iterator it;
+ if (tuple_parse_iterator_create(&it, format, pos, defined_field_count,
+ region) != 0)
goto error;
- }
- struct mp_stack stack;
- mp_stack_create(&stack, format->fields_depth, frames);
- mp_stack_push(&stack, MP_ARRAY, defined_field_count);
+ const char *pos_end;
struct tuple_field *field;
- struct json_token *parent = &format->fields.root;
- while (true) {
- int idx;
- while ((idx = mp_stack_advance(&stack)) == -1) {
- /*
- * If the elements of the current frame
- * are over, pop this frame out of stack
- * and climb one position in the
- * format::fields tree to match the
- * changed JSON path to the data in the
- * tuple.
- */
- mp_stack_pop(&stack);
- if (mp_stack_is_empty(&stack))
- goto finish;
- parent = parent->parent;
- }
- /*
- * Use the top frame of the stack and the
- * current data offset to prepare the JSON token
- * for the subsequent format::fields lookup.
- */
- struct json_token token;
- switch (mp_stack_type(&stack)) {
- case MP_ARRAY:
- token.type = JSON_TOKEN_NUM;
- token.num = idx;
- break;
- case MP_MAP:
- if (mp_typeof(*pos) != MP_STR) {
- /*
- * JSON path support only string
- * keys for map. Skip this entry.
- */
- mp_next(&pos);
- mp_next(&pos);
- continue;
- }
- token.type = JSON_TOKEN_STR;
- token.str = mp_decode_str(&pos, (uint32_t *)&token.len);
- break;
- default:
- unreachable();
- }
- /*
- * Perform lookup for a field in format::fields,
- * that represents the field metadata by JSON path
- * corresponding to the current position in the
- * tuple.
- */
- enum mp_type type = mp_typeof(*pos);
- assert(parent != NULL);
- field = json_tree_lookup_entry(&format->fields, parent, &token,
- struct tuple_field, token);
- if (field != NULL) {
- bool is_nullable = tuple_field_is_nullable(field);
- if (validate &&
- !field_mp_type_is_compatible(field->type, type,
- is_nullable) != 0) {
- diag_set(ClientError, ER_FIELD_TYPE,
- tuple_field_path(field),
- field_type_strs[field->type]);
- goto error;
- }
- if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
- (*field_map)[field->offset_slot] = pos - tuple;
- if (required_fields != NULL)
- bit_clear(required_fields, field->id);
- }
+ while (tuple_parse_iterator_advice(&it, &field, &pos, &pos_end) > 0) {
+ if (field == NULL)
+ continue;
/*
- * If the current position of the data in tuple
- * matches the container type (MP_MAP or MP_ARRAY)
- * and the format::fields tree has such a record,
- * prepare a new stack frame because it needs to
- * be analyzed in the next iterations.
+ * Check if field mp_type is compatible with type
+ * defined in format.
*/
- if ((type == MP_ARRAY || type == MP_MAP) &&
- !mp_stack_is_full(&stack) && field != NULL) {
- uint32_t size = type == MP_ARRAY ?
- mp_decode_array(&pos) :
- mp_decode_map(&pos);
- mp_stack_push(&stack, type, size);
- parent = &field->token;
- } else {
- mp_next(&pos);
+ bool is_nullable = tuple_field_is_nullable(field);
+ if (validate &&
+ !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
+ is_nullable) != 0) {
+ diag_set(ClientError, ER_FIELD_TYPE,
+ tuple_field_path(field),
+ field_type_strs[field->type]);
+ goto error;
}
+ /* Initialize field_map with data offset. */
+ if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
+ (*field_map)[field->offset_slot] = pos - tuple;
+ /* Mark this field as present in the tuple. */
+ if (required_fields != NULL)
+ bit_clear(required_fields, field->id);
}
finish:
/*
@@ -1029,3 +954,104 @@ box_tuple_format_unref(box_tuple_format_t *format)
tuple_format_unref(format);
}
+int
+tuple_parse_iterator_create(struct tuple_parse_iterator *it,
+ struct tuple_format *format, const char *data,
+ uint32_t field_count, struct region *region)
+{
+ /*
+ * Prepare mp stack of the size equal to the maximum depth
+ * of the indexed field in the format::fields tree
+ * (fields_depth) to carry out a simultaneous parsing of
+ * the tuple and tree traversal.
+ */
+ uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
+ struct mp_frame *frames = region_alloc(region, frames_sz);
+ if (frames == NULL) {
+ diag_set(OutOfMemory, frames_sz, "region", "frames");
+ return -1;
+ }
+ mp_stack_create(&it->stack, format->fields_depth, frames);
+ mp_stack_push(&it->stack, MP_ARRAY, field_count);
+ it->parent = &format->fields.root;
+ it->format = format;
+ it->pos = data;
+ return 0;
+}
+
+int
+tuple_parse_iterator_advice(struct tuple_parse_iterator *it,
+ struct tuple_field **field, const char **data,
+ const char **data_end)
+{
+ int idx, rc = 0;
+ while ((idx = mp_stack_advance(&it->stack)) == -1) {
+ /*
+ * If the elements of the current frame
+ * are over, pop this frame out of stack
+ * and climb one position in the format::fields
+ * tree to match the changed JSON path to the
+ * data in the tuple.
+ */
+ mp_stack_pop(&it->stack);
+ if (mp_stack_is_empty(&it->stack))
+ return rc;
+ it->parent = it->parent->parent;
+ }
+ /*
+ * Use the top frame of the stack and the
+ * current data offset to prepare the JSON token
+ * for the subsequent format::fields lookup.
+ */
+ struct json_token token;
+ switch (mp_stack_type(&it->stack)) {
+ case MP_ARRAY:
+ rc = 1;
+ token.type = JSON_TOKEN_NUM;
+ token.num = idx;
+ break;
+ case MP_MAP:
+ rc = 2;
+ if (mp_typeof(*it->pos) != MP_STR) {
+ mp_next(&it->pos);
+ mp_next(&it->pos);
+ *field = NULL;
+ return rc;
+ }
+ token.type = JSON_TOKEN_STR;
+ token.str = mp_decode_str(&it->pos, (uint32_t *)&token.len);
+ break;
+ default:
+ unreachable();
+ }
+ /*
+ * Perform lookup for a field in format::fields,
+ * that represents the field metadata by JSON path
+ * corresponding to the current position in the
+ * tuple.
+ */
+ assert(it->parent != NULL);
+ *field = json_tree_lookup_entry(&it->format->fields, it->parent, &token,
+ struct tuple_field, token);
+ *data = it->pos;
+ /*
+ * If the current position of the data in tuple
+ * matches the container type (MP_MAP or MP_ARRAY)
+ * and the format::fields tree has such a record,
+ * prepare a new stack frame because it needs to
+ * be analyzed in the next iterations.
+ */
+ enum mp_type type = mp_typeof(*it->pos);
+ if ((type == MP_ARRAY || type == MP_MAP) &&
+ !mp_stack_is_full(&it->stack) && *field != NULL) {
+ uint32_t size = type == MP_ARRAY ?
+ mp_decode_array(&it->pos) :
+ mp_decode_map(&it->pos);
+ mp_stack_push(&it->stack, type, size);
+ it->parent = &(*field)->token;
+ } else {
+ mp_next(&it->pos);
+ }
+ *data_end = it->pos;
+ return rc;
+}
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index 22a0fb232..bef1d0903 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -412,6 +412,71 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
int
tuple_format_init();
+/**
+ * A tuple msgpack iterator that parse tuple in deep an returns
+ * only fields that are described in the tuple_format.
+ */
+struct tuple_parse_iterator {
+ /**
+ * Tuple format is used to perform field lookups in
+ * format::fields JSON tree.
+ */
+ struct tuple_format *format;
+ /**
+ * The pointer to the parent node in the format::fields
+ * JSON tree. Is required for relative lookup for the
+ * next field.
+ */
+ struct json_token *parent;
+ /**
+ * Traversal stack of msgpack frames is used to determine
+ * when the parsing of the current composite mp structure
+ * (array or map) is completed to update to the parent
+ * pointer accordingly.
+ */
+ struct mp_stack stack;
+ /** The current read position in msgpack. */
+ const char *pos;
+};
+
+/**
+ * Initialize tuple parse iterator with tuple format, data pointer
+ * and the count of top-level msgpack fields to be processed.
+ *
+ * This function assumes that the msgpack header containing the
+ * number of top-level msgpack fields (field_count) has already
+ * been parsed and the data pointer has already been shifted
+ * correspondingly. This allows directly limit the number of
+ * fields that must be parsed.
+
+ * Function uses the region for the traversal stack allocation.
+ *
+ * Returns 0 on success. In case of memory allocation error sets
+ * diag message and returns -1.
+ */
+int
+tuple_parse_iterator_create(struct tuple_parse_iterator *it,
+ struct tuple_format *format, const char *data,
+ uint32_t field_count, struct region *region);
+
+/**
+ * Parse tuple in deep and update iterator state.
+ *
+ * Returns the number of fields at the current tuple nesting
+ * level that have been processed (2 for map item, 1 for array
+ * key:value pair, 0 on stop) and initializes:
+ * field - the tuple_field pointer to format::fields field
+ * that matches to the currently processed msgpack field
+ * (when exists),
+ * data - the pointer to the currently processed msgpack field,
+ * data_end - the pointer to the end of currently processed
+ * msgpack field.
+ */
+int
+tuple_parse_iterator_advice(struct tuple_parse_iterator *it,
+ struct tuple_field **field, const char **data,
+ const char **data_end);
+
#if defined(__cplusplus)
} /* extern "C" */
#endif /* defined(__cplusplus) */
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index add86622b..1e8bb7825 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -431,81 +431,46 @@ vy_stmt_new_surrogate_delete_raw(struct tuple_format *format,
/*
* Perform simultaneous parsing of the tuple and
* format::fields tree traversal to copy indexed field
- * data and initialize field map. In many details the code
- * above works like tuple_field_map_create, read it's
- * comments for more details.
+ * data and initialize field map.
*/
- uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
- struct mp_frame *frames = region_alloc(region, frames_sz);
- if (frames == NULL) {
- diag_set(OutOfMemory, frames_sz, "region", "frames");
+ struct tuple_parse_iterator it;
+ if (tuple_parse_iterator_create(&it, format, src_pos, field_count,
+ region) != 0)
goto out;
- }
- struct mp_stack stack;
- mp_stack_create(&stack, format->fields_depth, frames);
- mp_stack_push(&stack, MP_ARRAY, field_count);
+ int rc;
+ const char *src_pos_end;
struct tuple_field *field;
- struct json_token *parent = &format->fields.root;
- while (true) {
- int idx;
- while ((idx = mp_stack_advance(&stack)) == -1) {
- mp_stack_pop(&stack);
- if (mp_stack_is_empty(&stack))
- goto finish;
- parent = parent->parent;
- }
- struct json_token token;
- switch (mp_stack_type(&stack)) {
- case MP_ARRAY:
- token.type = JSON_TOKEN_NUM;
- token.num = idx;
- break;
- case MP_MAP:
- if (mp_typeof(*src_pos) != MP_STR) {
- mp_next(&src_pos);
- mp_next(&src_pos);
- pos = mp_encode_nil(pos);
- pos = mp_encode_nil(pos);
- continue;
- }
- token.type = JSON_TOKEN_STR;
- token.str = mp_decode_str(&src_pos, (uint32_t *)&token.len);
- pos = mp_encode_str(pos, token.str, token.len);
- break;
- default:
- unreachable();
- }
- assert(parent != NULL);
- field = json_tree_lookup_entry(&format->fields, parent, &token,
- struct tuple_field, token);
+ while ((rc = tuple_parse_iterator_advice(&it, &field, &src_pos,
+ &src_pos_end)) > 0) {
if (field == NULL || !field->is_key_part) {
- mp_next(&src_pos);
- pos = mp_encode_nil(pos);
+ /*
+ * Instead of copying useless data having
+ * no representation in tuple_format,
+ * write nil.
+ */
+ while (rc-- > 0)
+ pos = mp_encode_nil(pos);
continue;
}
+ if (field->token.type == JSON_TOKEN_STR) {
+ assert(rc-- == 2);
+ pos = mp_encode_str(pos, field->token.str,
+ field->token.len);
+ }
+ assert(rc == 1);
+ /* Initialize field_map with data offset. */
if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
field_map[field->offset_slot] = pos - data;
- enum mp_type type = mp_typeof(*src_pos);
- if ((type == MP_ARRAY || type == MP_MAP) &&
- !mp_stack_is_full(&stack)) {
- uint32_t size;
- if (type == MP_ARRAY) {
- size = mp_decode_array(&src_pos);
- pos = mp_encode_array(pos, size);
- } else {
- size = mp_decode_map(&src_pos);
- pos = mp_encode_map(pos, size);
- }
- mp_stack_push(&stack, type, size);
- parent = &field->token;
+ /* Copy field data. */
+ if (field->type == FIELD_TYPE_ARRAY) {
+ pos = mp_encode_array(pos, mp_decode_array(&src_pos));
+ } else if (field->type == FIELD_TYPE_MAP) {
+ pos = mp_encode_map(pos, mp_decode_map(&src_pos));
} else {
- const char *src_field = src_pos;
- mp_next(&src_pos);
- memcpy(pos, src_field, src_pos - src_field);
- pos += src_pos - src_field;
+ memcpy(pos, src_pos, src_pos_end - src_pos);
+ pos += src_pos_end - src_pos;
}
}
-finish:
assert(pos <= data + src_size);
uint32_t bsize = pos - data;
stmt = vy_stmt_alloc(format, bsize);
--
2.21.0
More information about the Tarantool-patches
mailing list