[PATCH v3 5/7] box: introduce tuple_parse_iterator class

Kirill Shcherbatov kshcherbatov at tarantool.org
Tue Apr 2 18:49:36 MSK 2019


The similar code in tuple_field_map_create and
vy_stmt_new_surrogate_delete_raw that performs tuple parsing in
deep has been refactored as reusable tuple_parse_iterator class.

Being thus encapsulated, this code will be uniformly managed and
extended in the further patches in scope of multikey indexes.

Needed for #1257
---
 src/box/tuple_format.c | 222 +++++++++++++++++++++++------------------
 src/box/tuple_format.h |  65 ++++++++++++
 src/box/vy_stmt.c      |  93 ++++++-----------
 3 files changed, 218 insertions(+), 162 deletions(-)

diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 1043707ad..070897ec2 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -831,109 +831,34 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
 	 * which key fields are absent in tuple_field().
 	 */
 	memset((char *)*field_map - *field_map_size, 0, *field_map_size);
-	/*
-	 * Prepare mp stack of the size equal to the maximum depth
-	 * of the indexed field in the format::fields tree
-	 * (fields_depth) to carry out a simultaneous parsing of
-	 * the tuple and tree traversal to process type
-	 * validations and field map initialization.
-	 */
-	uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
-	struct mp_frame *frames = region_alloc(region, frames_sz);
-	if (frames == NULL) {
-		diag_set(OutOfMemory, frames_sz, "region", "frames");
+	struct tuple_parse_iterator it;
+	if (tuple_parse_iterator_create(&it, format, pos, defined_field_count,
+					region) != 0)
 		goto error;
-	}
-	struct mp_stack stack;
-	mp_stack_create(&stack, format->fields_depth, frames);
-	mp_stack_push(&stack, MP_ARRAY, defined_field_count);
+	const char *pos_end;
 	struct tuple_field *field;
-	struct json_token *parent = &format->fields.root;
-	while (true) {
-		int idx;
-		while ((idx = mp_stack_advance(&stack)) == -1) {
-			/*
-			 * If the elements of the current frame
-			 * are over, pop this frame out of stack
-			 * and climb one position in the
-			 * format::fields tree to match the
-			 * changed JSON path to the data in the
-			 * tuple.
-			 */
-			mp_stack_pop(&stack);
-			if (mp_stack_is_empty(&stack))
-				goto finish;
-			parent = parent->parent;
-		}
-		/*
-		 * Use the top frame of the stack and the
-		 * current data offset to prepare the JSON token
-		 * for the subsequent format::fields lookup.
-		 */
-		struct json_token token;
-		switch (mp_stack_type(&stack)) {
-		case MP_ARRAY:
-			token.type = JSON_TOKEN_NUM;
-			token.num = idx;
-			break;
-		case MP_MAP:
-			if (mp_typeof(*pos) != MP_STR) {
-				/*
-				 * JSON path support only string
-				 * keys for map. Skip this entry.
-				 */
-				mp_next(&pos);
-				mp_next(&pos);
-				continue;
-			}
-			token.type = JSON_TOKEN_STR;
-			token.str = mp_decode_str(&pos, (uint32_t *)&token.len);
-			break;
-		default:
-			unreachable();
-		}
-		/*
-		 * Perform lookup for a field in format::fields,
-		 * that represents the field metadata by JSON path
-		 * corresponding to the current position in the
-		 * tuple.
-		 */
-		enum mp_type type = mp_typeof(*pos);
-		assert(parent != NULL);
-		field = json_tree_lookup_entry(&format->fields, parent, &token,
-					       struct tuple_field, token);
-		if (field != NULL) {
-			bool is_nullable = tuple_field_is_nullable(field);
-			if (validate &&
-			    !field_mp_type_is_compatible(field->type, type,
-							 is_nullable) != 0) {
-				diag_set(ClientError, ER_FIELD_TYPE,
-					 tuple_field_path(field),
-					 field_type_strs[field->type]);
-				goto error;
-			}
-			if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
-				(*field_map)[field->offset_slot] = pos - tuple;
-			if (required_fields != NULL)
-				bit_clear(required_fields, field->id);
-		}
+	while (tuple_parse_iterator_advice(&it, &field, &pos, &pos_end) > 0) {
+		if (field == NULL)
+			continue;
 		/*
-		 * If the current position of the data in tuple
-		 * matches the container type (MP_MAP or MP_ARRAY)
-		 * and the format::fields tree has such a record,
-		 * prepare a new stack frame because it needs to
-		 * be analyzed in the next iterations.
+		 * Check if field mp_type is compatible with type
+		 * defined in format.
 		 */
-		if ((type == MP_ARRAY || type == MP_MAP) &&
-		    !mp_stack_is_full(&stack) && field != NULL) {
-			uint32_t size = type == MP_ARRAY ?
-					mp_decode_array(&pos) :
-					mp_decode_map(&pos);
-			mp_stack_push(&stack, type, size);
-			parent = &field->token;
-		} else {
-			mp_next(&pos);
+		bool is_nullable = tuple_field_is_nullable(field);
+		if (validate &&
+		    !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
+						 is_nullable) != 0) {
+			diag_set(ClientError, ER_FIELD_TYPE,
+				tuple_field_path(field),
+				field_type_strs[field->type]);
+			goto error;
 		}
+		/* Initialize field_map with data offset. */
+		if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
+			(*field_map)[field->offset_slot] = pos - tuple;
+		/* Mark this field as present in the tuple. */
+		if (required_fields != NULL)
+			bit_clear(required_fields, field->id);
 	}
 finish:
 	/*
@@ -1029,3 +954,104 @@ box_tuple_format_unref(box_tuple_format_t *format)
 	tuple_format_unref(format);
 }
 
+int
+tuple_parse_iterator_create(struct tuple_parse_iterator *it,
+			    struct tuple_format *format, const char *data,
+			    uint32_t field_count, struct region *region)
+{
+	/*
+	 * Prepare mp stack of the size equal to the maximum depth
+	 * of the indexed field in the format::fields tree
+	 * (fields_depth) to carry out a simultaneous parsing of
+	 * the tuple and tree traversal.
+	 */
+	uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
+	struct mp_frame *frames = region_alloc(region, frames_sz);
+	if (frames == NULL) {
+		diag_set(OutOfMemory, frames_sz, "region", "frames");
+		return -1;
+	}
+	mp_stack_create(&it->stack, format->fields_depth, frames);
+	mp_stack_push(&it->stack, MP_ARRAY, field_count);
+	it->parent = &format->fields.root;
+	it->format = format;
+	it->pos = data;
+	return 0;
+}
+
+int
+tuple_parse_iterator_advice(struct tuple_parse_iterator *it,
+			    struct tuple_field **field, const char **data,
+			    const char **data_end)
+{
+	int idx, rc = 0;
+	while ((idx = mp_stack_advance(&it->stack)) == -1) {
+		/*
+		 * If the elements of the current frame
+		 * are over, pop this frame out of stack
+		 * and climb one position in the format::fields
+		 * tree to match the changed JSON path to the
+		 * data in the tuple.
+		 */
+		mp_stack_pop(&it->stack);
+		if (mp_stack_is_empty(&it->stack))
+			return rc;
+		it->parent = it->parent->parent;
+	}
+	/*
+	 * Use the top frame of the stack and the
+	 * current data offset to prepare the JSON token
+	 * for the subsequent format::fields lookup.
+	 */
+	struct json_token token;
+	switch (mp_stack_type(&it->stack)) {
+	case MP_ARRAY:
+		rc = 1;
+		token.type = JSON_TOKEN_NUM;
+		token.num = idx;
+		break;
+	case MP_MAP:
+		rc = 2;
+		if (mp_typeof(*it->pos) != MP_STR) {
+			mp_next(&it->pos);
+			mp_next(&it->pos);
+			*field = NULL;
+			return rc;
+		}
+		token.type = JSON_TOKEN_STR;
+		token.str = mp_decode_str(&it->pos, (uint32_t *)&token.len);
+		break;
+	default:
+		unreachable();
+	}
+	/*
+	 * Perform lookup for a field in format::fields,
+	 * that represents the field metadata by JSON path
+	 * corresponding to the current position in the
+	 * tuple.
+	 */
+	assert(it->parent != NULL);
+	*field = json_tree_lookup_entry(&it->format->fields, it->parent, &token,
+				        struct tuple_field, token);
+	*data = it->pos;
+	/*
+	 * If the current position of the data in tuple
+	 * matches the container type (MP_MAP or MP_ARRAY)
+	 * and the format::fields tree has such a record,
+	 * prepare a new stack frame because it needs to
+	 * be analyzed in the next iterations.
+	 */
+	enum mp_type type = mp_typeof(*it->pos);
+	if ((type == MP_ARRAY || type == MP_MAP) &&
+	    !mp_stack_is_full(&it->stack) && *field != NULL) {
+		uint32_t size = type == MP_ARRAY ?
+				mp_decode_array(&it->pos) :
+				mp_decode_map(&it->pos);
+		mp_stack_push(&it->stack, type, size);
+		it->parent = &(*field)->token;
+	} else {
+		mp_next(&it->pos);
+	}
+	*data_end = it->pos;
+	return rc;
+}
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index 22a0fb232..bef1d0903 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -412,6 +412,71 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
 int
 tuple_format_init();
 
+/**
+ * A tuple msgpack iterator that parse tuple in deep an returns
+ * only fields that are described in the tuple_format.
+ */
+struct tuple_parse_iterator {
+	/**
+	 * Tuple format is used to perform field lookups in
+	 * format::fields JSON tree.
+	 */
+	struct tuple_format *format;
+	/**
+	 * The pointer to the parent node in the format::fields
+	 * JSON tree. Is required for relative lookup for the
+	 * next field.
+	 */
+	struct json_token *parent;
+	/**
+	 * Traversal stack of msgpack frames is used to determine
+	 * when the parsing of the current composite mp structure
+	 * (array or map) is completed to update to the parent
+	 * pointer accordingly.
+	 */
+	struct mp_stack stack;
+	/** The current read position in msgpack. */
+	const char *pos;
+};
+
+/**
+ * Initialize tuple parse iterator with tuple format, data pointer
+ * and the count of top-level msgpack fields to be processed.
+ *
+ * This function assumes that the msgpack header containing the
+ * number of top-level msgpack fields (field_count) has already
+ * been parsed and the data pointer has already been shifted
+ * correspondingly. This allows directly limit the number of
+ * fields that must be parsed.
+
+ * Function uses the region for the traversal stack allocation.
+ *
+ * Returns 0 on success. In case of memory allocation error sets
+ * diag message and returns -1.
+ */
+int
+tuple_parse_iterator_create(struct tuple_parse_iterator *it,
+			    struct tuple_format *format, const char *data,
+			    uint32_t field_count, struct region *region);
+
+/**
+ * Parse tuple in deep and update iterator state.
+ *
+ * Returns the number of fields at the current tuple nesting
+ * level that have been processed (2 for map item, 1 for array
+ * key:value pair, 0 on stop) and initializes:
+ * field - the tuple_field pointer to format::fields field
+ *         that matches to the currently processed msgpack field
+ *         (when exists),
+ * data  - the pointer to the currently processed msgpack field,
+ * data_end - the pointer to the end of currently processed
+ *            msgpack field.
+ */
+int
+tuple_parse_iterator_advice(struct tuple_parse_iterator *it,
+			    struct tuple_field **field, const char **data,
+			    const char **data_end);
+
 #if defined(__cplusplus)
 } /* extern "C" */
 #endif /* defined(__cplusplus) */
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index add86622b..1e8bb7825 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -431,81 +431,46 @@ vy_stmt_new_surrogate_delete_raw(struct tuple_format *format,
 	/*
 	 * Perform simultaneous parsing of the tuple and
 	 * format::fields tree traversal to copy indexed field
-	 * data and initialize field map. In many details the code
-	 * above works like tuple_field_map_create, read it's
-	 * comments for more details.
+	 * data and initialize field map.
 	 */
-	uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
-	struct mp_frame *frames = region_alloc(region, frames_sz);
-	if (frames == NULL) {
-		diag_set(OutOfMemory, frames_sz, "region", "frames");
+	struct tuple_parse_iterator it;
+	if (tuple_parse_iterator_create(&it, format, src_pos, field_count,
+					region) != 0)
 		goto out;
-	}
-	struct mp_stack stack;
-	mp_stack_create(&stack, format->fields_depth, frames);
-	mp_stack_push(&stack, MP_ARRAY, field_count);
+	int rc;
+	const char *src_pos_end;
 	struct tuple_field *field;
-	struct json_token *parent = &format->fields.root;
-	while (true) {
-		int idx;
-		while ((idx = mp_stack_advance(&stack)) == -1) {
-			mp_stack_pop(&stack);
-			if (mp_stack_is_empty(&stack))
-				goto finish;
-			parent = parent->parent;
-		}
-		struct json_token token;
-		switch (mp_stack_type(&stack)) {
-		case MP_ARRAY:
-			token.type = JSON_TOKEN_NUM;
-			token.num = idx;
-			break;
-		case MP_MAP:
-			if (mp_typeof(*src_pos) != MP_STR) {
-				mp_next(&src_pos);
-				mp_next(&src_pos);
-				pos = mp_encode_nil(pos);
-				pos = mp_encode_nil(pos);
-				continue;
-			}
-			token.type = JSON_TOKEN_STR;
-			token.str = mp_decode_str(&src_pos, (uint32_t *)&token.len);
-			pos = mp_encode_str(pos, token.str, token.len);
-			break;
-		default:
-			unreachable();
-		}
-		assert(parent != NULL);
-		field = json_tree_lookup_entry(&format->fields, parent, &token,
-					       struct tuple_field, token);
+	while ((rc = tuple_parse_iterator_advice(&it, &field, &src_pos,
+						 &src_pos_end)) > 0) {
 		if (field == NULL || !field->is_key_part) {
-			mp_next(&src_pos);
-			pos = mp_encode_nil(pos);
+			/*
+			 * Instead of copying useless data having
+			 * no representation in tuple_format,
+			 * write nil.
+			 */
+			while (rc-- > 0)
+				pos = mp_encode_nil(pos);
 			continue;
 		}
+		if (field->token.type == JSON_TOKEN_STR) {
+			assert(rc-- == 2);
+			pos = mp_encode_str(pos, field->token.str,
+					    field->token.len);
+		}
+		assert(rc == 1);
+		/* Initialize field_map with data offset. */
 		if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
 			field_map[field->offset_slot] = pos - data;
-		enum mp_type type = mp_typeof(*src_pos);
-		if ((type == MP_ARRAY || type == MP_MAP) &&
-		    !mp_stack_is_full(&stack)) {
-			uint32_t size;
-			if (type == MP_ARRAY) {
-				size = mp_decode_array(&src_pos);
-				pos = mp_encode_array(pos, size);
-			} else {
-				size = mp_decode_map(&src_pos);
-				pos = mp_encode_map(pos, size);
-			}
-			mp_stack_push(&stack, type, size);
-			parent = &field->token;
+		/* Copy field data. */
+		if (field->type == FIELD_TYPE_ARRAY) {
+			pos = mp_encode_array(pos, mp_decode_array(&src_pos));
+		} else if (field->type == FIELD_TYPE_MAP) {
+			pos = mp_encode_map(pos, mp_decode_map(&src_pos));
 		} else {
-			const char *src_field = src_pos;
-			mp_next(&src_pos);
-			memcpy(pos, src_field, src_pos - src_field);
-			pos += src_pos - src_field;
+			memcpy(pos, src_pos, src_pos_end - src_pos);
+			pos += src_pos_end - src_pos;
 		}
 	}
-finish:
 	assert(pos <= data + src_size);
 	uint32_t bsize = pos - data;
 	stmt = vy_stmt_alloc(format, bsize);
-- 
2.21.0




More information about the Tarantool-patches mailing list