Tarantool development patches archive
 help / color / mirror / Atom feed
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org,
	Vladimir Davydov <vdavydov.dev@gmail.com>
Subject: Re: [tarantool-patches] Re: [PATCH v3 5/7] box: introduce tuple_parse_iterator class
Date: Fri, 5 Apr 2019 20:17:27 +0300	[thread overview]
Message-ID: <e6934ea3-bdcf-068f-47da-36d36d1b4dd4@tarantool.org> (raw)
In-Reply-To: <20190403140440.zq7tfbogfjjrjto2@esperanza>

Take a look for a new API. Also I've re-factored names.

============================================================

The similar code in tuple_field_map_create and
vy_stmt_new_surrogate_delete_raw that performs tuple decode
with tuple_format has been refactored as reusable
tuple_format_iterator class.

Being thus encapsulated, this code will be uniformly managed and
extended in the further patches in scope of multikey indexes.

Needed for #1257
---
 src/box/tuple_format.c | 236 +++++++++++++++++++++++------------------
 src/box/tuple_format.h |  75 +++++++++++++
 src/box/vy_stmt.c      | 109 +++++++------------
 3 files changed, 248 insertions(+), 172 deletions(-)

diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 093046b37..a072b96e0 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -790,8 +790,13 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
 
 	const char *pos = tuple;
 	int rc = 0;
+	struct tuple_format_iterator it;
+	if (tuple_format_iterator_create(&it, format, tuple, region) != 0)
+		goto error;
+
 	/* Check to see if the tuple has a sufficient number of fields. */
-	uint32_t field_count = mp_decode_array(&pos);
+	uint32_t field_count = !mp_stack_is_empty(&it.stack) ?
+				mp_stack_top(&it.stack)->count : 0;
 	if (validate && format->exact_field_count > 0 &&
 	    format->exact_field_count != field_count) {
 		diag_set(ClientError, ER_EXACT_FIELD_COUNT,
@@ -826,115 +831,37 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
 	uint32_t defined_field_count = MIN(field_count, validate ?
 					   tuple_format_field_count(format) :
 					   format->index_field_count);
+	tuple_format_iterator_limit(&it, defined_field_count);
+
 	/*
 	 * Nullify field map to be able to detect by 0,
 	 * which key fields are absent in tuple_field().
 	 */
 	memset((char *)*field_map - *field_map_size, 0, *field_map_size);
-	/*
-	 * Prepare mp stack of the size equal to the maximum depth
-	 * of the indexed field in the format::fields tree
-	 * (fields_depth) to carry out a simultaneous parsing of
-	 * the tuple and tree traversal to process type
-	 * validations and field map initialization.
-	 */
-	uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
-	struct mp_frame *frames = region_alloc(region, frames_sz);
-	if (frames == NULL) {
-		diag_set(OutOfMemory, frames_sz, "region", "frames");
-		goto error;
-	}
-	struct mp_stack stack;
-	mp_stack_create(&stack, format->fields_depth, frames);
-	mp_stack_push(&stack, MP_ARRAY, defined_field_count);
+	const char *pos_end;
 	struct tuple_field *field;
-	struct json_token *parent = &format->fields.root;
-	while (true) {
-		struct mp_frame *frame = mp_stack_top(&stack);
-		while (!mp_frame_advance(frame)) {
-			/*
-			 * If the elements of the current frame
-			 * are over, pop this frame out of stack
-			 * and climb one position in the
-			 * format::fields tree to match the
-			 * changed JSON path to the data in the
-			 * tuple.
-			 */
-			mp_stack_pop(&stack);
-			if (mp_stack_is_empty(&stack))
-				goto finish;
-			frame = mp_stack_top(&stack);
-			parent = parent->parent;
-		}
-		/*
-		 * Use the top frame of the stack and the
-		 * current data offset to prepare the JSON token
-		 * for the subsequent format::fields lookup.
-		 */
-		struct json_token token;
-		switch (frame->type) {
-		case MP_ARRAY:
-			token.type = JSON_TOKEN_NUM;
-			token.num = frame->idx;
-			break;
-		case MP_MAP:
-			if (mp_typeof(*pos) != MP_STR) {
-				/*
-				 * JSON path support only string
-				 * keys for map. Skip this entry.
-				 */
-				mp_next(&pos);
-				mp_next(&pos);
-				continue;
-			}
-			token.type = JSON_TOKEN_STR;
-			token.str = mp_decode_str(&pos, (uint32_t *)&token.len);
-			break;
-		default:
-			unreachable();
-		}
-		/*
-		 * Perform lookup for a field in format::fields,
-		 * that represents the field metadata by JSON path
-		 * corresponding to the current position in the
-		 * tuple.
-		 */
-		enum mp_type type = mp_typeof(*pos);
-		assert(parent != NULL);
-		field = json_tree_lookup_entry(&format->fields, parent, &token,
-					       struct tuple_field, token);
-		if (field != NULL) {
-			bool is_nullable = tuple_field_is_nullable(field);
-			if (validate &&
-			    !field_mp_type_is_compatible(field->type, type,
-							 is_nullable) != 0) {
-				diag_set(ClientError, ER_FIELD_TYPE,
-					 tuple_field_path(field),
-					 field_type_strs[field->type]);
-				goto error;
-			}
-			if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
-				(*field_map)[field->offset_slot] = pos - tuple;
-			if (required_fields != NULL)
-				bit_clear(required_fields, field->id);
-		}
+	while (tuple_format_iterator_advice(&it, &field, &pos, &pos_end)) {
+		if (field == NULL)
+			continue;
 		/*
-		 * If the current position of the data in tuple
-		 * matches the container type (MP_MAP or MP_ARRAY)
-		 * and the format::fields tree has such a record,
-		 * prepare a new stack frame because it needs to
-		 * be analyzed in the next iterations.
+		 * Check if field mp_type is compatible with type
+		 * defined in format.
 		 */
-		if ((type == MP_ARRAY || type == MP_MAP) &&
-		    !mp_stack_is_full(&stack) && field != NULL) {
-			uint32_t size = type == MP_ARRAY ?
-					mp_decode_array(&pos) :
-					mp_decode_map(&pos);
-			mp_stack_push(&stack, type, size);
-			parent = &field->token;
-		} else {
-			mp_next(&pos);
+		bool is_nullable = tuple_field_is_nullable(field);
+		if (validate &&
+		    !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
+						 is_nullable) != 0) {
+			diag_set(ClientError, ER_FIELD_TYPE,
+				tuple_field_path(field),
+				field_type_strs[field->type]);
+			goto error;
 		}
+		/* Initialize field_map with data offset. */
+		if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
+			(*field_map)[field->offset_slot] = pos - tuple;
+		/* Mark this field as present in the tuple. */
+		if (required_fields != NULL)
+			bit_clear(required_fields, field->id);
 	}
 finish:
 	/*
@@ -1030,3 +957,110 @@ box_tuple_format_unref(box_tuple_format_t *format)
 	tuple_format_unref(format);
 }
 
+int
+tuple_format_iterator_create(struct tuple_format_iterator *it,
+			     struct tuple_format *format, const char *tuple,
+			     struct region *region)
+{
+	assert(mp_typeof(*tuple) == MP_ARRAY);
+	const char *field = tuple;
+	uint32_t field_count = mp_decode_array(&field);
+	it->parent = &format->fields.root;
+	it->format = format;
+	it->pos = field;
+	if (field_count == 0) {
+		mp_stack_create(&it->stack, 0, NULL);
+		return 0;
+	}
+	/*
+	 * Prepare mp stack of the size equal to the maximum depth
+	 * of the indexed field in the format::fields tree
+	 * (fields_depth) to carry out a simultaneous parsing of
+	 * the tuple and tree traversal.
+	 */
+	uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
+	struct mp_frame *frames = region_alloc(region, frames_sz);
+	if (frames == NULL) {
+		diag_set(OutOfMemory, frames_sz, "region", "frames");
+		return -1;
+	}
+	mp_stack_create(&it->stack, format->fields_depth, frames);
+	mp_stack_push(&it->stack, MP_ARRAY, field_count);
+	return 0;
+}
+
+bool
+tuple_format_iterator_advice(struct tuple_format_iterator *it,
+			     struct tuple_field **field, const char **data,
+			     const char **data_end)
+{
+	struct mp_frame *frame = mp_stack_top(&it->stack);
+	while (!mp_frame_advance(frame)) {
+		/*
+		 * If the elements of the current frame
+		 * are over, pop this frame out of stack
+		 * and climb one position in the format::fields
+		 * tree to match the changed JSON path to the
+		 * data in the tuple.
+		 */
+		mp_stack_pop(&it->stack);
+		if (mp_stack_is_empty(&it->stack))
+			return false;
+		frame = mp_stack_top(&it->stack);
+		it->parent = it->parent->parent;
+	}
+	/*
+	 * Use the top frame of the stack and the
+	 * current data offset to prepare the JSON token
+	 * for the subsequent format::fields lookup.
+	 */
+	struct json_token token;
+	switch (frame->type) {
+	case MP_ARRAY:
+		token.type = JSON_TOKEN_NUM;
+		token.num = frame->idx;
+		break;
+	case MP_MAP:
+		if (mp_typeof(*it->pos) != MP_STR) {
+			mp_next(&it->pos);
+			mp_next(&it->pos);
+			*field = NULL;
+			return true;
+		}
+		token.type = JSON_TOKEN_STR;
+		token.str = mp_decode_str(&it->pos, (uint32_t *)&token.len);
+		break;
+	default:
+		unreachable();
+	}
+	/*
+	 * Perform lookup for a field in format::fields,
+	 * that represents the field metadata by JSON path
+	 * corresponding to the current position in the
+	 * tuple.
+	 */
+	assert(it->parent != NULL);
+	*field = json_tree_lookup_entry(&it->format->fields, it->parent, &token,
+				        struct tuple_field, token);
+	*data = it->pos;
+	/*
+	 * If the current position of the data in tuple
+	 * matches the container type (MP_MAP or MP_ARRAY)
+	 * and the format::fields tree has such a record,
+	 * prepare a new stack frame because it needs to
+	 * be analyzed in the next iterations.
+	 */
+	enum mp_type type = mp_typeof(*it->pos);
+	if ((type == MP_ARRAY || type == MP_MAP) &&
+	    !mp_stack_is_full(&it->stack) && *field != NULL) {
+		uint32_t size = type == MP_ARRAY ?
+				mp_decode_array(&it->pos) :
+				mp_decode_map(&it->pos);
+		mp_stack_push(&it->stack, type, size);
+		it->parent = &(*field)->token;
+	} else {
+		mp_next(&it->pos);
+	}
+	*data_end = it->pos;
+	return true;
+}
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index 22a0fb232..41774eb9c 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -412,6 +412,81 @@ tuple_field_map_create(struct tuple_format *format, const char *tuple,
 int
 tuple_format_init();
 
+/**
+ * A tuple msgpack iterator that decodes the tuple and returns
+ * only fields that are described in the tuple_format.
+ */
+struct tuple_format_iterator {
+	/**
+	 * Tuple format is used to perform field lookups in
+	 * format::fields JSON tree.
+	 */
+	struct tuple_format *format;
+	/**
+	 * The pointer to the parent node in the format::fields
+	 * JSON tree. Is required for relative lookup for the
+	 * next field.
+	 */
+	struct json_token *parent;
+	/**
+	 * Traversal stack of msgpack frames is used to determine
+	 * when the parsing of the current composite mp structure
+	 * (array or map) is completed to update to the parent
+	 * pointer accordingly.
+	 */
+	struct mp_stack stack;
+	/** The current read position in msgpack. */
+	const char *pos;
+};
+
+/**
+ * Initialize tuple decode iterator with tuple format and tuple
+ * data pointer.
+ *
+ * Function uses the region for the traversal stack allocation.
+ *
+ * Returns 0 on success. In case of memory allocation error sets
+ * diag message and returns -1.
+ */
+int
+tuple_format_iterator_create(struct tuple_format_iterator *it,
+			     struct tuple_format *format, const char *tuple,
+			     struct region *region);
+
+/**
+ * Perform tuple decode step and update iterator state.
+ *
+ * Returns true when decode step succeeded and initialize:
+ * field - the tuple_field pointer to format::fields field
+ *         that matches to the currently processed msgpack field
+ *         (when exists),
+ * data  - the pointer to the currently processed msgpack field,
+ * data_end - the pointer to the end of currently processed
+ *            msgpack field(in case of MP_MAP or MP_ARRAY that
+ *            is described in format this is the end of field
+ *            header).
+ */
+bool
+tuple_format_iterator_advice(struct tuple_format_iterator *it,
+			     struct tuple_field **field, const char **data,
+			     const char **data_end);
+
+/**
+ * Limit the number of fields that iterator must decode for
+ * the current nesting level.
+ *
+ * The field_count argument must not exceed the number of items
+ * available for scanning at the current nesting level.
+ */
+static inline void
+tuple_format_iterator_limit(struct tuple_format_iterator *it,
+			    uint32_t field_count)
+{
+	struct mp_frame *frame = mp_stack_top(&it->stack);
+	assert(field_count <= (uint32_t)frame->count);
+	frame->count = field_count;
+}
+
 #if defined(__cplusplus)
 } /* extern "C" */
 #endif /* defined(__cplusplus) */
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index 776b1f69c..c6a1f4033 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -417,95 +417,62 @@ vy_stmt_new_surrogate_delete_raw(struct tuple_format *format,
 	}
 	char *field_map_begin = data + src_size;
 	uint32_t *field_map = (uint32_t *) (data + total_size);
-
+	/*
+	 * Perform simultaneous parsing of the tuple and
+	 * format::fields tree traversal to copy indexed field
+	 * data and initialize field map.
+	 */
+	struct tuple_format_iterator it;
 	const char *src_pos = src_data;
-	uint32_t src_count = mp_decode_array(&src_pos);
-	uint32_t field_count = MIN(src_count, format->index_field_count);
+	if (tuple_format_iterator_create(&it, format, src_pos, region) != 0)
+		goto out;
+
+	uint32_t field_count = MIN((uint32_t)mp_stack_top(&it.stack)->count,
+				   format->index_field_count);
+	tuple_format_iterator_limit(&it, field_count);
+
 	/*
 	 * Nullify field map to be able to detect by 0, which key
 	 * fields are absent in tuple_field().
 	 */
 	memset((char *)field_map - format->field_map_size, 0,
 		format->field_map_size);
+
 	char *pos = mp_encode_array(data, field_count);
-	/*
-	 * Perform simultaneous parsing of the tuple and
-	 * format::fields tree traversal to copy indexed field
-	 * data and initialize field map. In many details the code
-	 * above works like tuple_field_map_create, read it's
-	 * comments for more details.
-	 */
-	uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
-	struct mp_frame *frames = region_alloc(region, frames_sz);
-	if (frames == NULL) {
-		diag_set(OutOfMemory, frames_sz, "region", "frames");
-		goto out;
-	}
-	struct mp_stack stack;
-	mp_stack_create(&stack, format->fields_depth, frames);
-	mp_stack_push(&stack, MP_ARRAY, field_count);
+	const char *src_pos_end;
 	struct tuple_field *field;
-	struct json_token *parent = &format->fields.root;
-	while (true) {
-		struct mp_frame *frame = mp_stack_top(&stack);
-		while (!mp_frame_advance(frame)) {
-			mp_stack_pop(&stack);
-			if (mp_stack_is_empty(&stack))
-				goto finish;
-			parent = parent->parent;
-		}
-		struct json_token token;
-		switch (frame->type) {
-		case MP_ARRAY:
-			token.type = JSON_TOKEN_NUM;
-			token.num = frame->idx;
-			break;
-		case MP_MAP:
-			if (mp_typeof(*src_pos) != MP_STR) {
-				mp_next(&src_pos);
-				mp_next(&src_pos);
-				pos = mp_encode_nil(pos);
-				pos = mp_encode_nil(pos);
-				continue;
-			}
-			token.type = JSON_TOKEN_STR;
-			token.str = mp_decode_str(&src_pos, (uint32_t *)&token.len);
-			pos = mp_encode_str(pos, token.str, token.len);
-			break;
-		default:
-			unreachable();
-		}
-		assert(parent != NULL);
-		field = json_tree_lookup_entry(&format->fields, parent, &token,
-					       struct tuple_field, token);
+	while (tuple_format_iterator_advice(&it, &field, &src_pos,
+					    &src_pos_end)) {
+		struct mp_frame *frame = mp_stack_top(&it.stack);
 		if (field == NULL || !field->is_key_part) {
-			mp_next(&src_pos);
+			/*
+			 * Instead of copying useless data having
+			 * no representation in tuple_format,
+			 * write nil.
+			 */
 			pos = mp_encode_nil(pos);
+			if (frame->type == MP_MAP)
+				pos = mp_encode_nil(pos);
 			continue;
 		}
+		if (field->token.type == JSON_TOKEN_STR) {
+			assert(frame->type == MP_MAP);
+			pos = mp_encode_str(pos, field->token.str,
+					    field->token.len);
+		}
+		/* Initialize field_map with data offset. */
 		if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
 			field_map[field->offset_slot] = pos - data;
-		enum mp_type type = mp_typeof(*src_pos);
-		if ((type == MP_ARRAY || type == MP_MAP) &&
-		    !mp_stack_is_full(&stack)) {
-			uint32_t size;
-			if (type == MP_ARRAY) {
-				size = mp_decode_array(&src_pos);
-				pos = mp_encode_array(pos, size);
-			} else {
-				size = mp_decode_map(&src_pos);
-				pos = mp_encode_map(pos, size);
-			}
-			mp_stack_push(&stack, type, size);
-			parent = &field->token;
+		/* Copy field data. */
+		if (field->type == FIELD_TYPE_ARRAY) {
+			pos = mp_encode_array(pos, frame->count);
+		} else if (field->type == FIELD_TYPE_MAP) {
+			pos = mp_encode_map(pos, frame->count);
 		} else {
-			const char *src_field = src_pos;
-			mp_next(&src_pos);
-			memcpy(pos, src_field, src_pos - src_field);
-			pos += src_pos - src_field;
+			memcpy(pos, src_pos, src_pos_end - src_pos);
+			pos += src_pos_end - src_pos;
 		}
 	}
-finish:
 	assert(pos <= data + src_size);
 	uint32_t bsize = pos - data;
 	stmt = vy_stmt_alloc(format, bsize);
-- 
2.21.0

  reply	other threads:[~2019-04-05 17:17 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-02 15:49 [PATCH v3 0/7] box: introduce multikey indexes in memtx Kirill Shcherbatov
2019-04-02 15:49 ` [PATCH v3 1/7] box: cleanup key_def virtual extract_key setter Kirill Shcherbatov
2019-04-03 12:42   ` Vladimir Davydov
2019-04-03 16:22     ` [tarantool-patches] " Kirill Shcherbatov
2019-04-03 18:01       ` Vladimir Davydov
2019-04-02 15:49 ` [PATCH v3 2/7] lib: introduce a new json_path_multikey_offset helper Kirill Shcherbatov
2019-04-03 12:56   ` Vladimir Davydov
2019-04-03 16:22     ` [tarantool-patches] " Kirill Shcherbatov
2019-04-03 18:02       ` Vladimir Davydov
2019-04-04  6:17       ` Konstantin Osipov
2019-04-02 15:49 ` [PATCH v3 3/7] box: move offset_slot init to tuple_format_add_field Kirill Shcherbatov
2019-04-03 12:57   ` Vladimir Davydov
2019-04-03 18:02   ` Vladimir Davydov
2019-04-04  6:19   ` [tarantool-patches] " Konstantin Osipov
2019-04-05 17:17     ` [tarantool-patches] " Kirill Shcherbatov
2019-04-02 15:49 ` [PATCH v3 4/7] lib: update msgpuck library Kirill Shcherbatov
2019-04-03 17:49   ` [tarantool-patches] " Kirill Shcherbatov
2019-04-04 15:54     ` Vladimir Davydov
2019-04-05 17:17       ` [tarantool-patches] " Kirill Shcherbatov
2019-04-07 12:22         ` Vladimir Davydov
2019-04-02 15:49 ` [PATCH v3 5/7] box: introduce tuple_parse_iterator class Kirill Shcherbatov
2019-04-03 14:04   ` Vladimir Davydov
2019-04-05 17:17     ` Kirill Shcherbatov [this message]
2019-04-02 15:49 ` [PATCH v3 6/7] box: introduce field_map_builder class Kirill Shcherbatov
2019-04-03 14:38   ` Vladimir Davydov
2019-04-05 17:17     ` [tarantool-patches] " Kirill Shcherbatov
2019-04-03 16:30   ` Vladimir Davydov
2019-04-02 15:49 ` [PATCH v3 7/7] box: introduce multikey indexes in memtx Kirill Shcherbatov
2019-04-04 14:20   ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e6934ea3-bdcf-068f-47da-36d36d1b4dd4@tarantool.org \
    --to=kshcherbatov@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --cc=vdavydov.dev@gmail.com \
    --subject='Re: [tarantool-patches] Re: [PATCH v3 5/7] box: introduce tuple_parse_iterator class' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox