[tarantool-patches] Re: [PATCH v3 5/7] box: introduce tuple_parse_iterator class
Kirill Shcherbatov
kshcherbatov at tarantool.org
Fri Apr 5 20:17:27 MSK 2019
Take a look for a new API. Also I've re-factored names.
============================================================
The similar code in tuple_field_map_create and
vy_stmt_new_surrogate_delete_raw that performs tuple decode
with tuple_format has been refactored as reusable
tuple_format_iterator class.
Being thus encapsulated, this code will be uniformly managed and
extended in the further patches in scope of multikey indexes.
Needed for #1257
---
src/box/tuple_format.c | 236 +++++++++++++++++++++++------------------
src/box/tuple_format.h | 75 +++++++++++++
src/box/vy_stmt.c | 109 +++++++------------
3 files changed, 248 insertions(+), 172 deletions(-)
diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 093046b37..a072b96e0 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -790,8 +790,13 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
const char *pos = tuple;
int rc = 0;
+ struct tuple_format_iterator it;
+ if (tuple_format_iterator_create(&it, format, tuple, region) != 0)
+ goto error;
+
/* Check to see if the tuple has a sufficient number of fields. */
- uint32_t field_count = mp_decode_array(&pos);
+ uint32_t field_count = !mp_stack_is_empty(&it.stack) ?
+ mp_stack_top(&it.stack)->count : 0;
if (validate && format->exact_field_count > 0 &&
format->exact_field_count != field_count) {
diag_set(ClientError, ER_EXACT_FIELD_COUNT,
@@ -826,115 +831,37 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
uint32_t defined_field_count = MIN(field_count, validate ?
tuple_format_field_count(format) :
format->index_field_count);
+ tuple_format_iterator_limit(&it, defined_field_count);
+
/*
* Nullify field map to be able to detect by 0,
* which key fields are absent in tuple_field().
*/
memset((char *)*field_map - *field_map_size, 0, *field_map_size);
- /*
- * Prepare mp stack of the size equal to the maximum depth
- * of the indexed field in the format::fields tree
- * (fields_depth) to carry out a simultaneous parsing of
- * the tuple and tree traversal to process type
- * validations and field map initialization.
- */
- uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
- struct mp_frame *frames = region_alloc(region, frames_sz);
- if (frames == NULL) {
- diag_set(OutOfMemory, frames_sz, "region", "frames");
- goto error;
- }
- struct mp_stack stack;
- mp_stack_create(&stack, format->fields_depth, frames);
- mp_stack_push(&stack, MP_ARRAY, defined_field_count);
+ const char *pos_end;
struct tuple_field *field;
- struct json_token *parent = &format->fields.root;
- while (true) {
- struct mp_frame *frame = mp_stack_top(&stack);
- while (!mp_frame_advance(frame)) {
- /*
- * If the elements of the current frame
- * are over, pop this frame out of stack
- * and climb one position in the
- * format::fields tree to match the
- * changed JSON path to the data in the
- * tuple.
- */
- mp_stack_pop(&stack);
- if (mp_stack_is_empty(&stack))
- goto finish;
- frame = mp_stack_top(&stack);
- parent = parent->parent;
- }
- /*
- * Use the top frame of the stack and the
- * current data offset to prepare the JSON token
- * for the subsequent format::fields lookup.
- */
- struct json_token token;
- switch (frame->type) {
- case MP_ARRAY:
- token.type = JSON_TOKEN_NUM;
- token.num = frame->idx;
- break;
- case MP_MAP:
- if (mp_typeof(*pos) != MP_STR) {
- /*
- * JSON path support only string
- * keys for map. Skip this entry.
- */
- mp_next(&pos);
- mp_next(&pos);
- continue;
- }
- token.type = JSON_TOKEN_STR;
- token.str = mp_decode_str(&pos, (uint32_t *)&token.len);
- break;
- default:
- unreachable();
- }
- /*
- * Perform lookup for a field in format::fields,
- * that represents the field metadata by JSON path
- * corresponding to the current position in the
- * tuple.
- */
- enum mp_type type = mp_typeof(*pos);
- assert(parent != NULL);
- field = json_tree_lookup_entry(&format->fields, parent, &token,
- struct tuple_field, token);
- if (field != NULL) {
- bool is_nullable = tuple_field_is_nullable(field);
- if (validate &&
- !field_mp_type_is_compatible(field->type, type,
- is_nullable) != 0) {
- diag_set(ClientError, ER_FIELD_TYPE,
- tuple_field_path(field),
- field_type_strs[field->type]);
- goto error;
- }
- if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
- (*field_map)[field->offset_slot] = pos - tuple;
- if (required_fields != NULL)
- bit_clear(required_fields, field->id);
- }
+ while (tuple_format_iterator_advice(&it, &field, &pos, &pos_end)) {
+ if (field == NULL)
+ continue;
/*
- * If the current position of the data in tuple
- * matches the container type (MP_MAP or MP_ARRAY)
- * and the format::fields tree has such a record,
- * prepare a new stack frame because it needs to
- * be analyzed in the next iterations.
+ * Check if field mp_type is compatible with type
+ * defined in format.
*/
- if ((type == MP_ARRAY || type == MP_MAP) &&
- !mp_stack_is_full(&stack) && field != NULL) {
- uint32_t size = type == MP_ARRAY ?
- mp_decode_array(&pos) :
- mp_decode_map(&pos);
- mp_stack_push(&stack, type, size);
- parent = &field->token;
- } else {
- mp_next(&pos);
+ bool is_nullable = tuple_field_is_nullable(field);
+ if (validate &&
+ !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
+ is_nullable) != 0) {
+ diag_set(ClientError, ER_FIELD_TYPE,
+ tuple_field_path(field),
+ field_type_strs[field->type]);
+ goto error;
}
+ /* Initialize field_map with data offset. */
+ if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
+ (*field_map)[field->offset_slot] = pos - tuple;
+ /* Mark this field as present in the tuple. */
+ if (required_fields != NULL)
+ bit_clear(required_fields, field->id);
}
finish:
/*
@@ -1030,3 +957,110 @@ box_tuple_format_unref(box_tuple_format_t *format)
tuple_format_unref(format);
}
+int
+tuple_format_iterator_create(struct tuple_format_iterator *it,
+ struct tuple_format *format, const char *tuple,
+ struct region *region)
+{
+ assert(mp_typeof(*tuple) == MP_ARRAY);
+ const char *field = tuple;
+ uint32_t field_count = mp_decode_array(&field);
+ it->parent = &format->fields.root;
+ it->format = format;
+ it->pos = field;
+ if (field_count == 0) {
+ mp_stack_create(&it->stack, 0, NULL);
+ return 0;
+ }
+ /*
+ * Prepare mp stack of the size equal to the maximum depth
+ * of the indexed field in the format::fields tree
+ * (fields_depth) to carry out a simultaneous parsing of
+ * the tuple and tree traversal.
+ */
+ uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
+ struct mp_frame *frames = region_alloc(region, frames_sz);
+ if (frames == NULL) {
+ diag_set(OutOfMemory, frames_sz, "region", "frames");
+ return -1;
+ }
+ mp_stack_create(&it->stack, format->fields_depth, frames);
+ mp_stack_push(&it->stack, MP_ARRAY, field_count);
+ return 0;
+}
+
+bool
+tuple_format_iterator_advice(struct tuple_format_iterator *it,
+ struct tuple_field **field, const char **data,
+ const char **data_end)
+{
+ struct mp_frame *frame = mp_stack_top(&it->stack);
+ while (!mp_frame_advance(frame)) {
+ /*
+ * If the elements of the current frame
+ * are over, pop this frame out of stack
+ * and climb one position in the format::fields
+ * tree to match the changed JSON path to the
+ * data in the tuple.
+ */
+ mp_stack_pop(&it->stack);
+ if (mp_stack_is_empty(&it->stack))
+ return false;
+ frame = mp_stack_top(&it->stack);
+ it->parent = it->parent->parent;
+ }
+ /*
+ * Use the top frame of the stack and the
+ * current data offset to prepare the JSON token
+ * for the subsequent format::fields lookup.
+ */
+ struct json_token token;
+ switch (frame->type) {
+ case MP_ARRAY:
+ token.type = JSON_TOKEN_NUM;
+ token.num = frame->idx;
+ break;
+ case MP_MAP:
+ if (mp_typeof(*it->pos) != MP_STR) {
+ mp_next(&it->pos);
+ mp_next(&it->pos);
+ *field = NULL;
+ return true;
+ }
+ token.type = JSON_TOKEN_STR;
+ token.str = mp_decode_str(&it->pos, (uint32_t *)&token.len);
+ break;
+ default:
+ unreachable();
+ }
+ /*
+ * Perform lookup for a field in format::fields,
+ * that represents the field metadata by JSON path
+ * corresponding to the current position in the
+ * tuple.
+ */
+ assert(it->parent != NULL);
+ *field = json_tree_lookup_entry(&it->format->fields, it->parent, &token,
+ struct tuple_field, token);
+ *data = it->pos;
+ /*
+ * If the current position of the data in tuple
+ * matches the container type (MP_MAP or MP_ARRAY)
+ * and the format::fields tree has such a record,
+ * prepare a new stack frame because it needs to
+ * be analyzed in the next iterations.
+ */
+ enum mp_type type = mp_typeof(*it->pos);
+ if ((type == MP_ARRAY || type == MP_MAP) &&
+ !mp_stack_is_full(&it->stack) && *field != NULL) {
+ uint32_t size = type == MP_ARRAY ?
+ mp_decode_array(&it->pos) :
+ mp_decode_map(&it->pos);
+ mp_stack_push(&it->stack, type, size);
+ it->parent = &(*field)->token;
+ } else {
+ mp_next(&it->pos);
+ }
+ *data_end = it->pos;
+ return true;
+}
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index 22a0fb232..41774eb9c 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -412,6 +412,81 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
int
tuple_format_init();
+/**
+ * A tuple msgpack iterator that decodes the tuple and returns
+ * only fields that are described in the tuple_format.
+ */
+struct tuple_format_iterator {
+ /**
+ * Tuple format is used to perform field lookups in
+ * format::fields JSON tree.
+ */
+ struct tuple_format *format;
+ /**
+ * The pointer to the parent node in the format::fields
+ * JSON tree. Is required for relative lookup for the
+ * next field.
+ */
+ struct json_token *parent;
+ /**
+ * Traversal stack of msgpack frames is used to determine
+ * when the parsing of the current composite mp structure
+ * (array or map) is completed to update to the parent
+ * pointer accordingly.
+ */
+ struct mp_stack stack;
+ /** The current read position in msgpack. */
+ const char *pos;
+};
+
+/**
+ * Initialize tuple decode iterator with tuple format and tuple
+ * data pointer.
+ *
+ * Function uses the region for the traversal stack allocation.
+ *
+ * Returns 0 on success. In case of memory allocation error sets
+ * diag message and returns -1.
+ */
+int
+tuple_format_iterator_create(struct tuple_format_iterator *it,
+ struct tuple_format *format, const char *tuple,
+ struct region *region);
+
+/**
+ * Perform tuple decode step and update iterator state.
+ *
+ * Returns true when decode step succeeded and initialize:
+ * field - the tuple_field pointer to format::fields field
+ * that matches to the currently processed msgpack field
+ * (when exists),
+ * data - the pointer to the currently processed msgpack field,
+ * data_end - the pointer to the end of currently processed
+ * msgpack field(in case of MP_MAP or MP_ARRAY that
+ * is described in format this is the end of field
+ * header).
+ */
+bool
+tuple_format_iterator_advice(struct tuple_format_iterator *it,
+ struct tuple_field **field, const char **data,
+ const char **data_end);
+
+/**
+ * Limit the number of fields that iterator must decode for
+ * the current nesting level.
+ *
+ * The field_count argument must not exceed the number of items
+ * available for scanning at the current nesting level.
+ */
+static inline void
+tuple_format_iterator_limit(struct tuple_format_iterator *it,
+ uint32_t field_count)
+{
+ struct mp_frame *frame = mp_stack_top(&it->stack);
+ assert(field_count <= (uint32_t)frame->count);
+ frame->count = field_count;
+}
+
#if defined(__cplusplus)
} /* extern "C" */
#endif /* defined(__cplusplus) */
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index 776b1f69c..c6a1f4033 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -417,95 +417,62 @@ vy_stmt_new_surrogate_delete_raw(struct tuple_format *format,
}
char *field_map_begin = data + src_size;
uint32_t *field_map = (uint32_t *) (data + total_size);
-
+ /*
+ * Perform simultaneous parsing of the tuple and
+ * format::fields tree traversal to copy indexed field
+ * data and initialize field map.
+ */
+ struct tuple_format_iterator it;
const char *src_pos = src_data;
- uint32_t src_count = mp_decode_array(&src_pos);
- uint32_t field_count = MIN(src_count, format->index_field_count);
+ if (tuple_format_iterator_create(&it, format, src_pos, region) != 0)
+ goto out;
+
+ uint32_t field_count = MIN((uint32_t)mp_stack_top(&it.stack)->count,
+ format->index_field_count);
+ tuple_format_iterator_limit(&it, field_count);
+
/*
* Nullify field map to be able to detect by 0, which key
* fields are absent in tuple_field().
*/
memset((char *)field_map - format->field_map_size, 0,
format->field_map_size);
+
char *pos = mp_encode_array(data, field_count);
- /*
- * Perform simultaneous parsing of the tuple and
- * format::fields tree traversal to copy indexed field
- * data and initialize field map. In many details the code
- * above works like tuple_field_map_create, read it's
- * comments for more details.
- */
- uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
- struct mp_frame *frames = region_alloc(region, frames_sz);
- if (frames == NULL) {
- diag_set(OutOfMemory, frames_sz, "region", "frames");
- goto out;
- }
- struct mp_stack stack;
- mp_stack_create(&stack, format->fields_depth, frames);
- mp_stack_push(&stack, MP_ARRAY, field_count);
+ const char *src_pos_end;
struct tuple_field *field;
- struct json_token *parent = &format->fields.root;
- while (true) {
- struct mp_frame *frame = mp_stack_top(&stack);
- while (!mp_frame_advance(frame)) {
- mp_stack_pop(&stack);
- if (mp_stack_is_empty(&stack))
- goto finish;
- parent = parent->parent;
- }
- struct json_token token;
- switch (frame->type) {
- case MP_ARRAY:
- token.type = JSON_TOKEN_NUM;
- token.num = frame->idx;
- break;
- case MP_MAP:
- if (mp_typeof(*src_pos) != MP_STR) {
- mp_next(&src_pos);
- mp_next(&src_pos);
- pos = mp_encode_nil(pos);
- pos = mp_encode_nil(pos);
- continue;
- }
- token.type = JSON_TOKEN_STR;
- token.str = mp_decode_str(&src_pos, (uint32_t *)&token.len);
- pos = mp_encode_str(pos, token.str, token.len);
- break;
- default:
- unreachable();
- }
- assert(parent != NULL);
- field = json_tree_lookup_entry(&format->fields, parent, &token,
- struct tuple_field, token);
+ while (tuple_format_iterator_advice(&it, &field, &src_pos,
+ &src_pos_end)) {
+ struct mp_frame *frame = mp_stack_top(&it.stack);
if (field == NULL || !field->is_key_part) {
- mp_next(&src_pos);
+ /*
+ * Instead of copying useless data having
+ * no representation in tuple_format,
+ * write nil.
+ */
pos = mp_encode_nil(pos);
+ if (frame->type == MP_MAP)
+ pos = mp_encode_nil(pos);
continue;
}
+ if (field->token.type == JSON_TOKEN_STR) {
+ assert(frame->type == MP_MAP);
+ pos = mp_encode_str(pos, field->token.str,
+ field->token.len);
+ }
+ /* Initialize field_map with data offset. */
if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
field_map[field->offset_slot] = pos - data;
- enum mp_type type = mp_typeof(*src_pos);
- if ((type == MP_ARRAY || type == MP_MAP) &&
- !mp_stack_is_full(&stack)) {
- uint32_t size;
- if (type == MP_ARRAY) {
- size = mp_decode_array(&src_pos);
- pos = mp_encode_array(pos, size);
- } else {
- size = mp_decode_map(&src_pos);
- pos = mp_encode_map(pos, size);
- }
- mp_stack_push(&stack, type, size);
- parent = &field->token;
+ /* Copy field data. */
+ if (field->type == FIELD_TYPE_ARRAY) {
+ pos = mp_encode_array(pos, frame->count);
+ } else if (field->type == FIELD_TYPE_MAP) {
+ pos = mp_encode_map(pos, frame->count);
} else {
- const char *src_field = src_pos;
- mp_next(&src_pos);
- memcpy(pos, src_field, src_pos - src_field);
- pos += src_pos - src_field;
+ memcpy(pos, src_pos, src_pos_end - src_pos);
+ pos += src_pos_end - src_pos;
}
}
-finish:
assert(pos <= data + src_size);
uint32_t bsize = pos - data;
stmt = vy_stmt_alloc(format, bsize);
--
2.21.0
More information about the Tarantool-patches
mailing list