Tarantool development patches archive
 help / color / mirror / Atom feed
From: Kirill Shcherbatov <kshcherbatov@tarantool.org>
To: tarantool-patches@freelists.org,
	Alexander Turenko <alexander.turenko@tarantool.org>
Cc: vdavydov.dev@gmail.com, Kirill Shcherbatov <kshcherbatov@gmail.com>
Subject: Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
Date: Fri, 18 Jan 2019 10:03:13 +0300	[thread overview]
Message-ID: <6acc6cf3-138d-52a1-9709-552877a2a953@tarantool.org> (raw)
In-Reply-To: <20190117163409.hvga32kxxgcc5sat@tkn_work_nb>

> We discussed with Vladimir what could be motivating example for this set
> of functions to show those are really general-purpose and should reside
> in the library.
It is not possible to restore full JSON path as we have no MAP key in our stack
as it is not required. But I've implemented an equivalent "mp_stack_path"
sequence consists of "type letter - M or A" concatinaded with "processed item index".
"A[1]", "A[2]", "A[2]M[1]", "A[2]M[2]", "A[3]", "A[3]A[1]"
Believe it is representative enough.

> The doxygen comment says it is >=, while the actual code uses >.
> The contract description is not clear: the function signals when it
> moves over the last item, not when it reaches the last item.
Ok.

============================================

Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.

Needed for #1012
---
 msgpuck.h      | 136 +++++++++++++++++++++++++++++++++++++++++++++++++
 test/msgpuck.c |  83 +++++++++++++++++++++++++++++-
 2 files changed, 218 insertions(+), 1 deletion(-)

diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..b9bbe78 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -359,6 +359,45 @@ enum mp_type {
 	MP_EXT
 };
 
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+	/** MP frame type calculated with mp_typeof(). */
+	enum mp_type type;
+	/**
+	 * Total items in MP_MAP or MP_ARRAY container
+	 * calculated with mp_decode_map() or mp_decode_array().
+	 */
+	uint32_t size;
+	/**
+	 * Count of items already processed. Must be less or
+	 * equal to mp_frame::size member.
+	 */
+	uint32_t curr;
+};
+
+/**
+ * Stack of service contexts mp_frame to do complex msgpack parse.
+ * The structure is needed in order to facilitate the decoding
+ * nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows you to determine that the parsing of
+ * the current container is complete.
+*/
+struct mp_stack {
+	/** The maximum stack depth. */
+	uint32_t size;
+	/**
+	 * Count of used stack frames. Corresponds to the index
+	 * in the array to perform the push operation. Must be
+	 * less or equal to mp_stack::size member.
+	 */
+	uint32_t used;
+	/**
+	 * Array of mp_frames descriptors having at least
+	 * mp_stack::size items.
+	 */
+	struct mp_frame *frames;
+};
+
 /**
  * \brief Determine MsgPack type by a first byte \a c of encoded data.
  *
@@ -1184,6 +1223,103 @@ mp_check(const char **data, const char *end);
  * {{{ Implementation
  */
 
+/**
+ * \brief Initialize mp_stack \a stack of specified size \a size
+ * and user-allocated array \a frames.
+ * The \a frames allocation must have at least \a size mp_frame
+ * items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame service context allocation to use,
+ *                 must have at least \a size items.
+ */
+MP_PROTO void
+mp_stack_init(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+	stack->frames = frames;
+	stack->size = size;
+	stack->used = 0;
+}
+
+/**
+ * \brief Test if mp_stack \a stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+	return stack->used == 0;
+}
+
+/**
+ * \brief Test if mp_stack \a stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+	return stack->used >= stack->size;
+}
+
+/**
+ * \brief Pop the top mp_stack \a stack frame.
+ * The \a stack must not be empty.
+ * \param stack - the pointer to a mp_stack to operate with.
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack)
+{
+	assert(!mp_stack_is_empty(stack));
+	--stack->used;
+}
+
+/**
+ * \brief Get a pointer to a top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * The \a stack must not be empty.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO struct mp_frame *
+mp_stack_top(struct mp_stack *stack)
+{
+	assert(!mp_stack_is_empty(stack));
+	return &stack->frames[stack->used - 1];
+}
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \a stack. The \a stack must not be full.
+ * \param stack - the pointer to a stack to operate with.
+ * \param type - the type of mp_frame to create.
+ * \param size - size size of mp_frame to create.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
+{
+	assert(!mp_stack_is_full(stack));
+	uint32_t idx = stack->used++;
+	stack->frames[idx].type = type;
+	stack->frames[idx].size = size;
+	stack->frames[idx].curr = 0;
+}
+
+/**
+ * \brief Advance a mp_stack_top()::\a curr attribute of the
+ * \a stack. The \a stack must not be empty. The function also
+ * signals when it moves over the last item.
+ * \param stack - the pointer to a stack to operate with
+ * \retval true, when the top frame have already parsed:
+ *         mp_stack_top()::\a curr > mp_stack_top()::\a size;
+ *         false otherwise.
+ */
+MP_PROTO bool
+mp_stack_advance(struct mp_stack *stack)
+{
+	struct mp_frame *frame = mp_stack_top(stack);
+	return ++frame->curr > frame->size;
+}
+
 /** \cond false */
 extern const enum mp_type mp_type_hint[];
 extern const int8_t mp_parser_hint[];
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..dbfd2bb 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -1093,10 +1093,90 @@ test_overflow()
 	return check_plan();
 }
 
+const char *
+mp_stack_path(struct mp_stack *stack)
+{
+	static char buff[1024];
+	char *wptr = buff;
+	for (uint32_t idx = 0; idx < stack->used; idx++) {
+		wptr += sprintf(wptr, "%c[%d]",
+				stack->frames[idx].type == MP_MAP ? 'M' : 'A',
+				stack->frames[idx].curr);
+	}
+	return buff;
+}
+
+static int
+test_mpstack()
+{
+	plan(10);
+	header();
+
+	char tuple[1024];
+	char *wptr = tuple;
+	/* ["first", {"key1": 1, "key2": [1]}, [1]] */
+	wptr = mp_encode_array(wptr, 3);
+	wptr = mp_encode_str(wptr, "first", 5);
+	wptr = mp_encode_map(wptr, 2);
+	wptr = mp_encode_str(wptr, "key1", 4);
+	wptr = mp_encode_uint(wptr, 1);
+	wptr = mp_encode_str(wptr, "key2", 4);
+	wptr = mp_encode_array(wptr, 1);
+	wptr = mp_encode_uint(wptr, 1);
+	wptr = mp_encode_array(wptr, 1);
+	wptr = mp_encode_uint(wptr, 1);
+	const char *rptr = tuple;
+
+	struct mp_frame mp_frames[3];
+	struct mp_stack mp_stack;
+	mp_stack_init(&mp_stack, 2, mp_frames);
+	is(mp_stack_is_empty(&mp_stack), true, "Stack is empty");
+	mp_stack_push(&mp_stack, MP_ARRAY, mp_decode_array(&rptr));
+	is(mp_stack_is_empty(&mp_stack), false, "Stack is not empty");
+
+	int path_idx = 0;
+	char *paths[] = {
+		"A[1]", "A[2]", "A[2]M[1]", "A[2]M[2]", "A[3]", "A[3]A[1]",
+	};
+	int path_cnt = sizeof(paths)/sizeof(paths[0]);
+
+	/* Parse tuple and test paths. */
+	while (1) {
+		while (mp_stack_advance(&mp_stack)) {
+			mp_stack_pop(&mp_stack);
+			if (mp_stack_is_empty(&mp_stack))
+				goto end;
+		}
+		if (path_idx > path_cnt)
+			goto end;
+		is(strcmp(paths[path_idx], mp_stack_path(&mp_stack)), 0,
+		   "correct path %s", paths[path_idx])
+		uint32_t len;
+		if (mp_stack_top(&mp_stack)->type == MP_MAP)
+			mp_decode_str(&rptr, &len);
+		enum mp_type type = mp_typeof(*rptr);
+		if ((type == MP_ARRAY || type == MP_MAP) &&
+		    !mp_stack_is_full(&mp_stack)) {
+			uint32_t size = type == MP_ARRAY ?
+					mp_decode_array(&rptr) :
+					mp_decode_map(&rptr);
+			mp_stack_push(&mp_stack, type, size);
+		} else {
+			mp_next(&rptr);
+		}
+		path_idx++;
+	}
+end:
+	is(path_idx, path_cnt, "correct paths built: %d/%d", path_idx, path_cnt);
+	is(mp_stack_is_empty(&mp_stack), true, "Stack is empty");
+
+	footer();
+	return check_plan();
+}
 
 int main()
 {
-	plan(20);
+	plan(21);
 	test_uints();
 	test_ints();
 	test_bools();
@@ -1117,6 +1197,7 @@ int main()
 	test_mp_check();
 	test_numbers();
 	test_overflow();
+	test_mpstack();
 
 	return check_plan();
 }
-- 
2.19.2

  reply	other threads:[~2019-01-18  7:03 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-16 13:36 Kirill Shcherbatov
2019-01-16 18:03 ` Konstantin Osipov
2019-01-17  7:26   ` Kirill Shcherbatov
2019-01-17  7:32 ` [tarantool-patches] " Kirill Shcherbatov
2019-01-17 11:58 ` Alexander Turenko
2019-01-17 12:28   ` [tarantool-patches] " Kirill Shcherbatov
2019-01-17 16:34     ` Alexander Turenko
2019-01-18  7:03       ` Kirill Shcherbatov [this message]
2019-01-18 10:32       ` Kirill Shcherbatov
2019-01-21  9:46         ` Kirill Shcherbatov
2019-01-21 11:25         ` Alexander Turenko
2019-01-21 12:35           ` Kirill Shcherbatov
2019-01-21 20:25             ` Vladimir Davydov
2019-01-22 12:28               ` Kirill Shcherbatov
2019-01-22 20:21                 ` Vladimir Davydov
2019-01-23  8:23                   ` Kirill Shcherbatov
2019-01-23 10:06                     ` Vladimir Davydov
2019-01-23 11:39                       ` Kirill Shcherbatov
2019-01-24 17:58                     ` Konstantin Osipov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6acc6cf3-138d-52a1-9709-552877a2a953@tarantool.org \
    --to=kshcherbatov@tarantool.org \
    --cc=alexander.turenko@tarantool.org \
    --cc=kshcherbatov@gmail.com \
    --cc=tarantool-patches@freelists.org \
    --cc=vdavydov.dev@gmail.com \
    --subject='Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox