* [PATCH v1 1/1] implement mp_stack class
@ 2019-01-16 13:36 Kirill Shcherbatov
2019-01-16 18:03 ` Konstantin Osipov
` (2 more replies)
0 siblings, 3 replies; 19+ messages in thread
From: Kirill Shcherbatov @ 2019-01-16 13:36 UTC (permalink / raw)
To: tarantool-patches, vdavydov.dev; +Cc: kostja, Kirill Shcherbatov
From: Kirill Shcherbatov <kshcherbatov@gmail.com>
https://github.com/tarantool/tarantool/tree/kshch/gh-1012-msgpuck-class
A new mp_stack class is a stack of service contexts mp_frame to
do complex msgpack parse. The structure is needed in order to
facilitate the decoding of nested msgpack structures without
recursion.
Needed for #1012
---
msgpuck.h | 108 +++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 59 ++++++++++++++++++++++++++-
2 files changed, 166 insertions(+), 1 deletion(-)
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..45e19f9 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -359,6 +359,23 @@ enum mp_type {
MP_EXT
};
+struct mp_frame {
+ enum mp_type type;
+ uint32_t size;
+ uint32_t curr;
+};
+
+/**
+ * Stack of service contexts mp_frame to do complex msgpack parse.
+ * The structure is needed in order to facilitate the decoding
+ * of nested msgpack structures without recursion.
+*/
+struct mp_stack {
+ uint32_t size;
+ uint32_t used;
+ struct mp_frame *frames;
+};
+
/**
* \brief Determine MsgPack type by a first byte \a c of encoded data.
*
@@ -1184,6 +1201,97 @@ mp_check(const char **data, const char *end);
* {{{ Implementation
*/
+/**
+ * \brief Initialize mp_stack \stack of specified size \size and
+ * user-allocated array \frames.
+ * The \frames allocation must have at least \size mp_frame items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame service context allocation to use,
+ * must have at least \size items.
+ */
+MP_PROTO void
+mp_stack_init(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+ stack->frames = frames;
+ stack->size = size;
+ stack->used = 0;
+}
+
+/**
+ * \brief Test if mp_stack \stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+ return stack->used == 0;
+}
+
+/**
+ * \brief Test if mp_stack \stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+ return stack->used >= stack->size;
+}
+
+/**
+ * \brief Pop the top mp_stack \stack frame.
+ * \stack must not be empty.
+ * \param stack - the pointer to a mp_stack to operate with.
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ --stack->used;
+}
+
+/**
+ * \brief Get a pointer to a top mp_stack \stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * The \stack must not be empty.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO struct mp_frame *
+mp_stack_top(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ return &stack->frames[stack->used - 1];
+}
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \stack. The \stack must not be full.
+ * \param stack - the pointer to a stack to operate with.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
+{
+ assert(!mp_stack_is_full(stack));
+ uint32_t idx = stack->used++;
+ stack->frames[idx].type = type;
+ stack->frames[idx].size = size;
+ stack->frames[idx].curr = 0;
+}
+
+/**
+ * \brief Advance a mp_stack::curr attribute of the mp_stack.
+ * \stack top frame. The \stack must not be empty.
+ * \retval true, when the top frame have already parsed:
+ * mp_stack::curr >= mp_stack::size; false otherwise.
+ */
+MP_PROTO bool
+mp_stack_advance(struct mp_stack *stack)
+{
+ struct mp_frame *frame = mp_stack_top(stack);
+ return ++frame->curr > frame->size;
+}
+
/** \cond false */
extern const enum mp_type mp_type_hint[];
extern const int8_t mp_parser_hint[];
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..fe6aa47 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -1093,10 +1093,66 @@ test_overflow()
return check_plan();
}
+static int
+test_mpstack()
+{
+ plan(18);
+ header();
+
+ char tuple[1024];
+ char *wptr = tuple;
+ /* ["first", {"key1": 1, "key2": [1]}, [1]] */
+ wptr = mp_encode_array(wptr, 3);
+ wptr = mp_encode_str(wptr, "first", 5);
+ wptr = mp_encode_map(wptr, 2);
+ wptr = mp_encode_str(wptr, "key1", 4);
+ wptr = mp_encode_uint(wptr, 1);
+ wptr = mp_encode_str(wptr, "key2", 4);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 1);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 1);
+ const char *rptr = tuple;
+
+ struct mp_frame mp_frames[2];
+ struct mp_stack mp_stack;
+ mp_stack_init(&mp_stack, 2, mp_frames);
+ is(mp_stack_is_empty(&mp_stack), true, "Stack is empty");
+ mp_stack_push(&mp_stack, MP_ARRAY, mp_decode_array(&rptr));
+ is(mp_stack_is_empty(&mp_stack), false, "Stack is not empty");
+ is(mp_stack_is_full(&mp_stack), false, "Stack is not full");
+ is(mp_stack_top(&mp_stack)->type, MP_ARRAY, "Correct field type");
+ is(mp_stack_top(&mp_stack)->size, 3, "Correct items count");
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 1 parsed");
+ mp_next(&rptr);
+ is(mp_stack_top(&mp_stack)->curr, 1, "Correct current item index");
+ mp_stack_push(&mp_stack, MP_MAP, mp_decode_map(&rptr));
+ is(mp_stack_top(&mp_stack)->type, MP_MAP, "Correct field type");
+ is(mp_stack_top(&mp_stack)->size, 2, "Correct items count");
+ is(mp_stack_is_full(&mp_stack), true, "Stack is full");
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 2 parsed");
+ mp_next(&rptr);
+ mp_next(&rptr);
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 2 parsed");
+ is(mp_stack_advance(&mp_stack), true, "All items on level 2 parsed");
+ mp_next(&rptr);
+ mp_next(&rptr);
+ mp_stack_pop(&mp_stack);
+ is(mp_stack_is_full(&mp_stack), false, "Stack is not full");
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 1 parsed");
+ mp_next(&rptr);
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 1 parsed");
+ is(mp_stack_advance(&mp_stack), true, "All items on level 1 parsed");
+ mp_stack_pop(&mp_stack);
+ is(mp_stack_is_empty(&mp_stack), true, "Stack is empty");
+
+ footer();
+ return check_plan();
+}
int main()
{
- plan(20);
+ plan(21);
test_uints();
test_ints();
test_bools();
@@ -1117,6 +1173,7 @@ int main()
test_mp_check();
test_numbers();
test_overflow();
+ test_mpstack();
return check_plan();
}
--
2.19.2
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v1 1/1] implement mp_stack class
2019-01-16 13:36 [PATCH v1 1/1] implement mp_stack class Kirill Shcherbatov
@ 2019-01-16 18:03 ` Konstantin Osipov
2019-01-17 7:26 ` Kirill Shcherbatov
2019-01-17 7:32 ` [tarantool-patches] " Kirill Shcherbatov
2019-01-17 11:58 ` Alexander Turenko
2 siblings, 1 reply; 19+ messages in thread
From: Konstantin Osipov @ 2019-01-16 18:03 UTC (permalink / raw)
To: Kirill Shcherbatov; +Cc: tarantool-patches, vdavydov.dev, Kirill Shcherbatov
* Kirill Shcherbatov <kshcherbatov@tarantool.org> [19/01/16 17:06]:
> From: Kirill Shcherbatov <kshcherbatov@gmail.com>
>
> https://github.com/tarantool/tarantool/tree/kshch/gh-1012-msgpuck-class
>
> A new mp_stack class is a stack of service contexts mp_frame to
> do complex msgpack parse. The structure is needed in order to
> facilitate the decoding of nested msgpack structures without
> recursion.
What is a nested msgpack structure?
Where do you encounter nested msgpack structures?
--
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v1 1/1] implement mp_stack class
2019-01-16 18:03 ` Konstantin Osipov
@ 2019-01-17 7:26 ` Kirill Shcherbatov
0 siblings, 0 replies; 19+ messages in thread
From: Kirill Shcherbatov @ 2019-01-17 7:26 UTC (permalink / raw)
To: Konstantin Osipov; +Cc: tarantool-patches, vdavydov.dev, Kirill Shcherbatov
Sorry, I've really forgot write a better comments to this structure because it is really trivial.
Updated:
/** Message pack MP_MAP or MP_ARRAY container descriptor. */
struct mp_frame {
/** MP frame type calculated with mp_typeof(). */
enum mp_type type;
/**
* Total items in MP_MAP or MP_ARRAY container
* calculated with mp_decode_map() or mp_decode_array().
*/
uint32_t size;
/**
* Count of items already processed. Must be less or
* equal to mp_frame::size member.
*/
uint32_t curr;
};
/**
* Stack of service contexts mp_frame to do complex msgpack parse.
* The structure is needed in order to facilitate the decoding
* nested MP_ARRAY and MP_MAP msgpack containers without
* recursion. This allows you to determine that the parsing of
* the current container is complete.
*/
struct mp_stack {
/** The maximum stack depth. */
uint32_t size;
/**
* Count of used stack frames. Corresponds to the index
* in the array to perform the push operation. Must be
* less or equal to mp_stack::size member.
*/
uint32_t used;
/**
* Array of mp_frames descriptors having at least
* mp_stack::size items.
*/
struct mp_frame *frames;
};
^ permalink raw reply [flat|nested] 19+ messages in thread
* [tarantool-patches] [PATCH v1 1/1] implement mp_stack class
2019-01-16 13:36 [PATCH v1 1/1] implement mp_stack class Kirill Shcherbatov
2019-01-16 18:03 ` Konstantin Osipov
@ 2019-01-17 7:32 ` Kirill Shcherbatov
2019-01-17 11:58 ` Alexander Turenko
2 siblings, 0 replies; 19+ messages in thread
From: Kirill Shcherbatov @ 2019-01-17 7:32 UTC (permalink / raw)
To: tarantool-patches, Vladimir Davydov; +Cc: Kostya Osipov
https://github.com/tarantool/tarantool/tree/kshch/gh-1012-msgpuck-class
Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.
Needed for #1012
---
msgpuck.h | 130 +++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 59 +++++++++++++++++++++-
2 files changed, 188 insertions(+), 1 deletion(-)
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..a12eca9 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -359,6 +359,45 @@ enum mp_type {
MP_EXT
};
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+ /** MP frame type calculated with mp_typeof(). */
+ enum mp_type type;
+ /**
+ * Total items in MP_MAP or MP_ARRAY container
+ * calculated with mp_decode_map() or mp_decode_array().
+ */
+ uint32_t size;
+ /**
+ * Count of items already processed. Must be less or
+ * equal to mp_frame::size member.
+ */
+ uint32_t curr;
+};
+
+/**
+ * Stack of service contexts mp_frame to do complex msgpack parse.
+ * The structure is needed in order to facilitate the decoding
+ * nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows you to determine that the parsing of
+ * the current container is complete.
+*/
+struct mp_stack {
+ /** The maximum stack depth. */
+ uint32_t size;
+ /**
+ * Count of used stack frames. Corresponds to the index
+ * in the array to perform the push operation. Must be
+ * less or equal to mp_stack::size member.
+ */
+ uint32_t used;
+ /**
+ * Array of mp_frames descriptors having at least
+ * mp_stack::size items.
+ */
+ struct mp_frame *frames;
+};
+
/**
* \brief Determine MsgPack type by a first byte \a c of encoded data.
*
@@ -1184,6 +1223,97 @@ mp_check(const char **data, const char *end);
* {{{ Implementation
*/
+/**
+ * \brief Initialize mp_stack \stack of specified size \size and
+ * user-allocated array \frames.
+ * The \frames allocation must have at least \size mp_frame items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame service context allocation to use,
+ * must have at least \size items.
+ */
+MP_PROTO void
+mp_stack_init(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+ stack->frames = frames;
+ stack->size = size;
+ stack->used = 0;
+}
+
+/**
+ * \brief Test if mp_stack \stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+ return stack->used == 0;
+}
+
+/**
+ * \brief Test if mp_stack \stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+ return stack->used >= stack->size;
+}
+
+/**
+ * \brief Pop the top mp_stack \stack frame.
+ * \stack must not be empty.
+ * \param stack - the pointer to a mp_stack to operate with.
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ --stack->used;
+}
+
+/**
+ * \brief Get a pointer to a top mp_stack \stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * The \stack must not be empty.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO struct mp_frame *
+mp_stack_top(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ return &stack->frames[stack->used - 1];
+}
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \stack. The \stack must not be full.
+ * \param stack - the pointer to a stack to operate with.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
+{
+ assert(!mp_stack_is_full(stack));
+ uint32_t idx = stack->used++;
+ stack->frames[idx].type = type;
+ stack->frames[idx].size = size;
+ stack->frames[idx].curr = 0;
+}
+
+/**
+ * \brief Advance a mp_stack::curr attribute of the mp_stack.
+ * \stack top frame. The \stack must not be empty.
+ * \retval true, when the top frame have already parsed:
+ * mp_stack::curr >= mp_stack::size; false otherwise.
+ */
+MP_PROTO bool
+mp_stack_advance(struct mp_stack *stack)
+{
+ struct mp_frame *frame = mp_stack_top(stack);
+ return ++frame->curr > frame->size;
+}
+
/** \cond false */
extern const enum mp_type mp_type_hint[];
extern const int8_t mp_parser_hint[];
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..fe6aa47 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -1093,10 +1093,66 @@ test_overflow()
return check_plan();
}
+static int
+test_mpstack()
+{
+ plan(18);
+ header();
+
+ char tuple[1024];
+ char *wptr = tuple;
+ /* ["first", {"key1": 1, "key2": [1]}, [1]] */
+ wptr = mp_encode_array(wptr, 3);
+ wptr = mp_encode_str(wptr, "first", 5);
+ wptr = mp_encode_map(wptr, 2);
+ wptr = mp_encode_str(wptr, "key1", 4);
+ wptr = mp_encode_uint(wptr, 1);
+ wptr = mp_encode_str(wptr, "key2", 4);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 1);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 1);
+ const char *rptr = tuple;
+
+ struct mp_frame mp_frames[2];
+ struct mp_stack mp_stack;
+ mp_stack_init(&mp_stack, 2, mp_frames);
+ is(mp_stack_is_empty(&mp_stack), true, "Stack is empty");
+ mp_stack_push(&mp_stack, MP_ARRAY, mp_decode_array(&rptr));
+ is(mp_stack_is_empty(&mp_stack), false, "Stack is not empty");
+ is(mp_stack_is_full(&mp_stack), false, "Stack is not full");
+ is(mp_stack_top(&mp_stack)->type, MP_ARRAY, "Correct field type");
+ is(mp_stack_top(&mp_stack)->size, 3, "Correct items count");
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 1 parsed");
+ mp_next(&rptr);
+ is(mp_stack_top(&mp_stack)->curr, 1, "Correct current item index");
+ mp_stack_push(&mp_stack, MP_MAP, mp_decode_map(&rptr));
+ is(mp_stack_top(&mp_stack)->type, MP_MAP, "Correct field type");
+ is(mp_stack_top(&mp_stack)->size, 2, "Correct items count");
+ is(mp_stack_is_full(&mp_stack), true, "Stack is full");
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 2 parsed");
+ mp_next(&rptr);
+ mp_next(&rptr);
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 2 parsed");
+ is(mp_stack_advance(&mp_stack), true, "All items on level 2 parsed");
+ mp_next(&rptr);
+ mp_next(&rptr);
+ mp_stack_pop(&mp_stack);
+ is(mp_stack_is_full(&mp_stack), false, "Stack is not full");
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 1 parsed");
+ mp_next(&rptr);
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 1 parsed");
+ is(mp_stack_advance(&mp_stack), true, "All items on level 1 parsed");
+ mp_stack_pop(&mp_stack);
+ is(mp_stack_is_empty(&mp_stack), true, "Stack is empty");
+
+ footer();
+ return check_plan();
+}
int main()
{
- plan(20);
+ plan(21);
test_uints();
test_ints();
test_bools();
@@ -1117,6 +1173,7 @@ int main()
test_mp_check();
test_numbers();
test_overflow();
+ test_mpstack();
return check_plan();
}
--
2.19.2
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] [PATCH v1 1/1] implement mp_stack class
2019-01-16 13:36 [PATCH v1 1/1] implement mp_stack class Kirill Shcherbatov
2019-01-16 18:03 ` Konstantin Osipov
2019-01-17 7:32 ` [tarantool-patches] " Kirill Shcherbatov
@ 2019-01-17 11:58 ` Alexander Turenko
2019-01-17 12:28 ` [tarantool-patches] " Kirill Shcherbatov
2 siblings, 1 reply; 19+ messages in thread
From: Alexander Turenko @ 2019-01-17 11:58 UTC (permalink / raw)
To: Kirill Shcherbatov
Cc: vdavydov.dev, kostja, tarantool-patches, Kirill Shcherbatov
Look at the `make doc` warnings, the doxygen format is slightly
different then yours one.
Please, start the commit mesaage from the upper letter (because it has
no prefix).
Can you please share how do you intend to use it? One or more real
cases.
WBR, Alexander Turenko.
On Wed, Jan 16, 2019 at 04:36:12PM +0300, Kirill Shcherbatov wrote:
> From: Kirill Shcherbatov <kshcherbatov@gmail.com>
>
> https://github.com/tarantool/tarantool/tree/kshch/gh-1012-msgpuck-class
It is tarantool/small, not tarantool/tarantool.
> +/**
> + * \brief Advance a mp_stack::curr attribute of the mp_stack.
> + * \stack top frame. The \stack must not be empty.
> + * \retval true, when the top frame have already parsed:
> + * mp_stack::curr >= mp_stack::size; false otherwise.
mp_stack::curr -> mp_stack_top()::curr
mp_stack::size -> mp_stack_top()::size
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-17 11:58 ` Alexander Turenko
@ 2019-01-17 12:28 ` Kirill Shcherbatov
2019-01-17 16:34 ` Alexander Turenko
0 siblings, 1 reply; 19+ messages in thread
From: Kirill Shcherbatov @ 2019-01-17 12:28 UTC (permalink / raw)
To: tarantool-patches, Alexander Turenko; +Cc: vdavydov.dev, Kirill Shcherbatov
On 17.01.2019 14:58, Alexander Turenko wrote:
> Look at the `make doc` warnings, the doxygen format is slightly
> different then yours one.
> Please, start the commit mesaage from the upper letter (because it has
> no prefix).
Tnx, fixed.
> Can you please share how do you intend to use it? One or more real
> cases.
Please take a look at
[tarantool-patches] [PATCH v8 3/5] box: introduce JSON Indexes
tuple_init_field_map routine
==============================
Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.
Needed for #1012
---
msgpuck.h | 135 +++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 59 ++++++++++++++++++++-
2 files changed, 193 insertions(+), 1 deletion(-)
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..3b1249f 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -359,6 +359,45 @@ enum mp_type {
MP_EXT
};
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+ /** MP frame type calculated with mp_typeof(). */
+ enum mp_type type;
+ /**
+ * Total items in MP_MAP or MP_ARRAY container
+ * calculated with mp_decode_map() or mp_decode_array().
+ */
+ uint32_t size;
+ /**
+ * Count of items already processed. Must be less or
+ * equal to mp_frame::size member.
+ */
+ uint32_t curr;
+};
+
+/**
+ * Stack of service contexts mp_frame to do complex msgpack parse.
+ * The structure is needed in order to facilitate the decoding
+ * nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows you to determine that the parsing of
+ * the current container is complete.
+*/
+struct mp_stack {
+ /** The maximum stack depth. */
+ uint32_t size;
+ /**
+ * Count of used stack frames. Corresponds to the index
+ * in the array to perform the push operation. Must be
+ * less or equal to mp_stack::size member.
+ */
+ uint32_t used;
+ /**
+ * Array of mp_frames descriptors having at least
+ * mp_stack::size items.
+ */
+ struct mp_frame *frames;
+};
+
/**
* \brief Determine MsgPack type by a first byte \a c of encoded data.
*
@@ -1184,6 +1223,102 @@ mp_check(const char **data, const char *end);
* {{{ Implementation
*/
+/**
+ * \brief Initialize mp_stack \a stack of specified size \a size
+ * and user-allocated array \a frames.
+ * The \a frames allocation must have at least \a size mp_frame
+ * items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame service context allocation to use,
+ * must have at least \a size items.
+ */
+MP_PROTO void
+mp_stack_init(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+ stack->frames = frames;
+ stack->size = size;
+ stack->used = 0;
+}
+
+/**
+ * \brief Test if mp_stack \a stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+ return stack->used == 0;
+}
+
+/**
+ * \brief Test if mp_stack \a stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+ return stack->used >= stack->size;
+}
+
+/**
+ * \brief Pop the top mp_stack \a stack frame.
+ * The \a stack must not be empty.
+ * \param stack - the pointer to a mp_stack to operate with.
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ --stack->used;
+}
+
+/**
+ * \brief Get a pointer to a top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * The \a stack must not be empty.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO struct mp_frame *
+mp_stack_top(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ return &stack->frames[stack->used - 1];
+}
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \a stack. The \a stack must not be full.
+ * \param stack - the pointer to a stack to operate with.
+ * \param type - the type of mp_frame to create.
+ * \param size - size size of mp_frame to create.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
+{
+ assert(!mp_stack_is_full(stack));
+ uint32_t idx = stack->used++;
+ stack->frames[idx].type = type;
+ stack->frames[idx].size = size;
+ stack->frames[idx].curr = 0;
+}
+
+/**
+ * \brief Advance a mp_stack_top()::\a curr attribute of the
+ * \a stack. The \a stack must not be empty.
+ * \param stack - the pointer to a stack to operate with
+ * \retval true, when the top frame have already parsed:
+ * mp_stack_top()::\a curr >= mp_stack_top()::\a size;
+ * false otherwise.
+ */
+MP_PROTO bool
+mp_stack_advance(struct mp_stack *stack)
+{
+ struct mp_frame *frame = mp_stack_top(stack);
+ return ++frame->curr > frame->size;
+}
+
/** \cond false */
extern const enum mp_type mp_type_hint[];
extern const int8_t mp_parser_hint[];
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..fe6aa47 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -1093,10 +1093,66 @@ test_overflow()
return check_plan();
}
+static int
+test_mpstack()
+{
+ plan(18);
+ header();
+
+ char tuple[1024];
+ char *wptr = tuple;
+ /* ["first", {"key1": 1, "key2": [1]}, [1]] */
+ wptr = mp_encode_array(wptr, 3);
+ wptr = mp_encode_str(wptr, "first", 5);
+ wptr = mp_encode_map(wptr, 2);
+ wptr = mp_encode_str(wptr, "key1", 4);
+ wptr = mp_encode_uint(wptr, 1);
+ wptr = mp_encode_str(wptr, "key2", 4);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 1);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 1);
+ const char *rptr = tuple;
+
+ struct mp_frame mp_frames[2];
+ struct mp_stack mp_stack;
+ mp_stack_init(&mp_stack, 2, mp_frames);
+ is(mp_stack_is_empty(&mp_stack), true, "Stack is empty");
+ mp_stack_push(&mp_stack, MP_ARRAY, mp_decode_array(&rptr));
+ is(mp_stack_is_empty(&mp_stack), false, "Stack is not empty");
+ is(mp_stack_is_full(&mp_stack), false, "Stack is not full");
+ is(mp_stack_top(&mp_stack)->type, MP_ARRAY, "Correct field type");
+ is(mp_stack_top(&mp_stack)->size, 3, "Correct items count");
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 1 parsed");
+ mp_next(&rptr);
+ is(mp_stack_top(&mp_stack)->curr, 1, "Correct current item index");
+ mp_stack_push(&mp_stack, MP_MAP, mp_decode_map(&rptr));
+ is(mp_stack_top(&mp_stack)->type, MP_MAP, "Correct field type");
+ is(mp_stack_top(&mp_stack)->size, 2, "Correct items count");
+ is(mp_stack_is_full(&mp_stack), true, "Stack is full");
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 2 parsed");
+ mp_next(&rptr);
+ mp_next(&rptr);
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 2 parsed");
+ is(mp_stack_advance(&mp_stack), true, "All items on level 2 parsed");
+ mp_next(&rptr);
+ mp_next(&rptr);
+ mp_stack_pop(&mp_stack);
+ is(mp_stack_is_full(&mp_stack), false, "Stack is not full");
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 1 parsed");
+ mp_next(&rptr);
+ is(mp_stack_advance(&mp_stack), false, "Not all items on level 1 parsed");
+ is(mp_stack_advance(&mp_stack), true, "All items on level 1 parsed");
+ mp_stack_pop(&mp_stack);
+ is(mp_stack_is_empty(&mp_stack), true, "Stack is empty");
+
+ footer();
+ return check_plan();
+}
int main()
{
- plan(20);
+ plan(21);
test_uints();
test_ints();
test_bools();
@@ -1117,6 +1173,7 @@ int main()
test_mp_check();
test_numbers();
test_overflow();
+ test_mpstack();
return check_plan();
}
--
2.19.2
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-17 12:28 ` [tarantool-patches] " Kirill Shcherbatov
@ 2019-01-17 16:34 ` Alexander Turenko
2019-01-18 7:03 ` Kirill Shcherbatov
2019-01-18 10:32 ` Kirill Shcherbatov
0 siblings, 2 replies; 19+ messages in thread
From: Alexander Turenko @ 2019-01-17 16:34 UTC (permalink / raw)
To: Kirill Shcherbatov; +Cc: tarantool-patches, vdavydov.dev, Kirill Shcherbatov
We discussed with Vladimir what could be motivating example for this set
of functions to show those are really general-purpose and should reside
in the library.
He proposed to implement simple (as much as possible) converter from
msgpack to json. It only need to support map, array, int and string. The
code can be written as a test to the library.
> +/**
> + * \brief Advance a mp_stack_top()::\a curr attribute of the
> + * \a stack. The \a stack must not be empty.
> + * \param stack - the pointer to a stack to operate with
> + * \retval true, when the top frame have already parsed:
> + * mp_stack_top()::\a curr >= mp_stack_top()::\a size;
> + * false otherwise.
> + */
> +MP_PROTO bool
> +mp_stack_advance(struct mp_stack *stack)
> +{
> + struct mp_frame *frame = mp_stack_top(stack);
> + return ++frame->curr > frame->size;
The doxygen comment says it is >=, while the actual code uses >.
The contract description is not clear: the function signals when it
moves over the last item, not when it reaches the last item.
WBR, Alexander Turenko.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-17 16:34 ` Alexander Turenko
@ 2019-01-18 7:03 ` Kirill Shcherbatov
2019-01-18 10:32 ` Kirill Shcherbatov
1 sibling, 0 replies; 19+ messages in thread
From: Kirill Shcherbatov @ 2019-01-18 7:03 UTC (permalink / raw)
To: tarantool-patches, Alexander Turenko; +Cc: vdavydov.dev, Kirill Shcherbatov
> We discussed with Vladimir what could be motivating example for this set
> of functions to show those are really general-purpose and should reside
> in the library.
It is not possible to restore full JSON path as we have no MAP key in our stack
as it is not required. But I've implemented an equivalent "mp_stack_path"
sequence consists of "type letter - M or A" concatinaded with "processed item index".
"A[1]", "A[2]", "A[2]M[1]", "A[2]M[2]", "A[3]", "A[3]A[1]"
Believe it is representative enough.
> The doxygen comment says it is >=, while the actual code uses >.
> The contract description is not clear: the function signals when it
> moves over the last item, not when it reaches the last item.
Ok.
============================================
Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.
Needed for #1012
---
msgpuck.h | 136 +++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 83 +++++++++++++++++++++++++++++-
2 files changed, 218 insertions(+), 1 deletion(-)
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..b9bbe78 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -359,6 +359,45 @@ enum mp_type {
MP_EXT
};
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+ /** MP frame type calculated with mp_typeof(). */
+ enum mp_type type;
+ /**
+ * Total items in MP_MAP or MP_ARRAY container
+ * calculated with mp_decode_map() or mp_decode_array().
+ */
+ uint32_t size;
+ /**
+ * Count of items already processed. Must be less or
+ * equal to mp_frame::size member.
+ */
+ uint32_t curr;
+};
+
+/**
+ * Stack of service contexts mp_frame to do complex msgpack parse.
+ * The structure is needed in order to facilitate the decoding
+ * nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows you to determine that the parsing of
+ * the current container is complete.
+*/
+struct mp_stack {
+ /** The maximum stack depth. */
+ uint32_t size;
+ /**
+ * Count of used stack frames. Corresponds to the index
+ * in the array to perform the push operation. Must be
+ * less or equal to mp_stack::size member.
+ */
+ uint32_t used;
+ /**
+ * Array of mp_frames descriptors having at least
+ * mp_stack::size items.
+ */
+ struct mp_frame *frames;
+};
+
/**
* \brief Determine MsgPack type by a first byte \a c of encoded data.
*
@@ -1184,6 +1223,103 @@ mp_check(const char **data, const char *end);
* {{{ Implementation
*/
+/**
+ * \brief Initialize mp_stack \a stack of specified size \a size
+ * and user-allocated array \a frames.
+ * The \a frames allocation must have at least \a size mp_frame
+ * items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame service context allocation to use,
+ * must have at least \a size items.
+ */
+MP_PROTO void
+mp_stack_init(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+ stack->frames = frames;
+ stack->size = size;
+ stack->used = 0;
+}
+
+/**
+ * \brief Test if mp_stack \a stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+ return stack->used == 0;
+}
+
+/**
+ * \brief Test if mp_stack \a stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+ return stack->used >= stack->size;
+}
+
+/**
+ * \brief Pop the top mp_stack \a stack frame.
+ * The \a stack must not be empty.
+ * \param stack - the pointer to a mp_stack to operate with.
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ --stack->used;
+}
+
+/**
+ * \brief Get a pointer to a top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * The \a stack must not be empty.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO struct mp_frame *
+mp_stack_top(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ return &stack->frames[stack->used - 1];
+}
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \a stack. The \a stack must not be full.
+ * \param stack - the pointer to a stack to operate with.
+ * \param type - the type of mp_frame to create.
+ * \param size - size size of mp_frame to create.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
+{
+ assert(!mp_stack_is_full(stack));
+ uint32_t idx = stack->used++;
+ stack->frames[idx].type = type;
+ stack->frames[idx].size = size;
+ stack->frames[idx].curr = 0;
+}
+
+/**
+ * \brief Advance a mp_stack_top()::\a curr attribute of the
+ * \a stack. The \a stack must not be empty. The function also
+ * signals when it moves over the last item.
+ * \param stack - the pointer to a stack to operate with
+ * \retval true, when the top frame have already parsed:
+ * mp_stack_top()::\a curr > mp_stack_top()::\a size;
+ * false otherwise.
+ */
+MP_PROTO bool
+mp_stack_advance(struct mp_stack *stack)
+{
+ struct mp_frame *frame = mp_stack_top(stack);
+ return ++frame->curr > frame->size;
+}
+
/** \cond false */
extern const enum mp_type mp_type_hint[];
extern const int8_t mp_parser_hint[];
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..dbfd2bb 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -1093,10 +1093,90 @@ test_overflow()
return check_plan();
}
+const char *
+mp_stack_path(struct mp_stack *stack)
+{
+ static char buff[1024];
+ char *wptr = buff;
+ for (uint32_t idx = 0; idx < stack->used; idx++) {
+ wptr += sprintf(wptr, "%c[%d]",
+ stack->frames[idx].type == MP_MAP ? 'M' : 'A',
+ stack->frames[idx].curr);
+ }
+ return buff;
+}
+
+static int
+test_mpstack()
+{
+ plan(10);
+ header();
+
+ char tuple[1024];
+ char *wptr = tuple;
+ /* ["first", {"key1": 1, "key2": [1]}, [1]] */
+ wptr = mp_encode_array(wptr, 3);
+ wptr = mp_encode_str(wptr, "first", 5);
+ wptr = mp_encode_map(wptr, 2);
+ wptr = mp_encode_str(wptr, "key1", 4);
+ wptr = mp_encode_uint(wptr, 1);
+ wptr = mp_encode_str(wptr, "key2", 4);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 1);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 1);
+ const char *rptr = tuple;
+
+ struct mp_frame mp_frames[3];
+ struct mp_stack mp_stack;
+ mp_stack_init(&mp_stack, 2, mp_frames);
+ is(mp_stack_is_empty(&mp_stack), true, "Stack is empty");
+ mp_stack_push(&mp_stack, MP_ARRAY, mp_decode_array(&rptr));
+ is(mp_stack_is_empty(&mp_stack), false, "Stack is not empty");
+
+ int path_idx = 0;
+ char *paths[] = {
+ "A[1]", "A[2]", "A[2]M[1]", "A[2]M[2]", "A[3]", "A[3]A[1]",
+ };
+ int path_cnt = sizeof(paths)/sizeof(paths[0]);
+
+ /* Parse tuple and test paths. */
+ while (1) {
+ while (mp_stack_advance(&mp_stack)) {
+ mp_stack_pop(&mp_stack);
+ if (mp_stack_is_empty(&mp_stack))
+ goto end;
+ }
+ if (path_idx > path_cnt)
+ goto end;
+ is(strcmp(paths[path_idx], mp_stack_path(&mp_stack)), 0,
+ "correct path %s", paths[path_idx])
+ uint32_t len;
+ if (mp_stack_top(&mp_stack)->type == MP_MAP)
+ mp_decode_str(&rptr, &len);
+ enum mp_type type = mp_typeof(*rptr);
+ if ((type == MP_ARRAY || type == MP_MAP) &&
+ !mp_stack_is_full(&mp_stack)) {
+ uint32_t size = type == MP_ARRAY ?
+ mp_decode_array(&rptr) :
+ mp_decode_map(&rptr);
+ mp_stack_push(&mp_stack, type, size);
+ } else {
+ mp_next(&rptr);
+ }
+ path_idx++;
+ }
+end:
+ is(path_idx, path_cnt, "correct paths built: %d/%d", path_idx, path_cnt);
+ is(mp_stack_is_empty(&mp_stack), true, "Stack is empty");
+
+ footer();
+ return check_plan();
+}
int main()
{
- plan(20);
+ plan(21);
test_uints();
test_ints();
test_bools();
@@ -1117,6 +1197,7 @@ int main()
test_mp_check();
test_numbers();
test_overflow();
+ test_mpstack();
return check_plan();
}
--
2.19.2
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-17 16:34 ` Alexander Turenko
2019-01-18 7:03 ` Kirill Shcherbatov
@ 2019-01-18 10:32 ` Kirill Shcherbatov
2019-01-21 9:46 ` Kirill Shcherbatov
2019-01-21 11:25 ` Alexander Turenko
1 sibling, 2 replies; 19+ messages in thread
From: Kirill Shcherbatov @ 2019-01-18 10:32 UTC (permalink / raw)
To: tarantool-patches, Alexander Turenko; +Cc: vdavydov.dev, Kirill Shcherbatov
Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.
Needed for #1012
---
msgpuck.h | 136 +++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 79 +++++++++++++++++++++++++++-
2 files changed, 214 insertions(+), 1 deletion(-)
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..b9bbe78 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -359,6 +359,45 @@ enum mp_type {
MP_EXT
};
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+ /** MP frame type calculated with mp_typeof(). */
+ enum mp_type type;
+ /**
+ * Total items in MP_MAP or MP_ARRAY container
+ * calculated with mp_decode_map() or mp_decode_array().
+ */
+ uint32_t size;
+ /**
+ * Count of items already processed. Must be less or
+ * equal to mp_frame::size member.
+ */
+ uint32_t curr;
+};
+
+/**
+ * Stack of service contexts mp_frame to do complex msgpack parse.
+ * The structure is needed in order to facilitate the decoding
+ * nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows you to determine that the parsing of
+ * the current container is complete.
+*/
+struct mp_stack {
+ /** The maximum stack depth. */
+ uint32_t size;
+ /**
+ * Count of used stack frames. Corresponds to the index
+ * in the array to perform the push operation. Must be
+ * less or equal to mp_stack::size member.
+ */
+ uint32_t used;
+ /**
+ * Array of mp_frames descriptors having at least
+ * mp_stack::size items.
+ */
+ struct mp_frame *frames;
+};
+
/**
* \brief Determine MsgPack type by a first byte \a c of encoded data.
*
@@ -1184,6 +1223,103 @@ mp_check(const char **data, const char *end);
* {{{ Implementation
*/
+/**
+ * \brief Initialize mp_stack \a stack of specified size \a size
+ * and user-allocated array \a frames.
+ * The \a frames allocation must have at least \a size mp_frame
+ * items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame service context allocation to use,
+ * must have at least \a size items.
+ */
+MP_PROTO void
+mp_stack_init(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+ stack->frames = frames;
+ stack->size = size;
+ stack->used = 0;
+}
+
+/**
+ * \brief Test if mp_stack \a stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+ return stack->used == 0;
+}
+
+/**
+ * \brief Test if mp_stack \a stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+ return stack->used >= stack->size;
+}
+
+/**
+ * \brief Pop the top mp_stack \a stack frame.
+ * The \a stack must not be empty.
+ * \param stack - the pointer to a mp_stack to operate with.
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ --stack->used;
+}
+
+/**
+ * \brief Get a pointer to a top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * The \a stack must not be empty.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO struct mp_frame *
+mp_stack_top(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ return &stack->frames[stack->used - 1];
+}
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \a stack. The \a stack must not be full.
+ * \param stack - the pointer to a stack to operate with.
+ * \param type - the type of mp_frame to create.
+ * \param size - size size of mp_frame to create.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
+{
+ assert(!mp_stack_is_full(stack));
+ uint32_t idx = stack->used++;
+ stack->frames[idx].type = type;
+ stack->frames[idx].size = size;
+ stack->frames[idx].curr = 0;
+}
+
+/**
+ * \brief Advance a mp_stack_top()::\a curr attribute of the
+ * \a stack. The \a stack must not be empty. The function also
+ * signals when it moves over the last item.
+ * \param stack - the pointer to a stack to operate with
+ * \retval true, when the top frame have already parsed:
+ * mp_stack_top()::\a curr > mp_stack_top()::\a size;
+ * false otherwise.
+ */
+MP_PROTO bool
+mp_stack_advance(struct mp_stack *stack)
+{
+ struct mp_frame *frame = mp_stack_top(stack);
+ return ++frame->curr > frame->size;
+}
+
/** \cond false */
extern const enum mp_type mp_type_hint[];
extern const int8_t mp_parser_hint[];
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..6d5a7a5 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -1093,10 +1093,86 @@ test_overflow()
return check_plan();
}
+static inline bool
+mp_stack_top_is_internal_item(struct mp_stack *mp_stack)
+{
+ struct mp_frame *frame = mp_stack_top(mp_stack);
+ return frame->curr > 0 && frame->curr < frame->size;
+}
+
+static int
+test_mpstack()
+{
+ plan(1);
+ header();
+
+ char tuple[1024];
+ char *wptr = tuple;
+ const char *descr = "{\"first\", {\"key1\": \"one\", \"key2\": {2}}, {3}}";
+ wptr = mp_encode_array(wptr, 3);
+ wptr = mp_encode_str(wptr, "first", 5);
+ wptr = mp_encode_map(wptr, 2);
+ wptr = mp_encode_str(wptr, "key1", 4);
+ wptr = mp_encode_str(wptr, "one", 3);
+ wptr = mp_encode_str(wptr, "key2", 4);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 2);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 3);
+ const char *rptr = tuple;
+ char buff[1024];
+ wptr = buff;
+
+ struct mp_frame mp_frames[3];
+ struct mp_stack mp_stack;
+ mp_stack_init(&mp_stack, 3, mp_frames);
+ while (1) {
+ uint32_t len;
+ if (!mp_stack_is_empty(&mp_stack) &&
+ mp_stack_top(&mp_stack)->type == MP_MAP) {
+ const char *key = mp_decode_str(&rptr, &len);
+ wptr += sprintf(wptr, "\"%.*s\": ", len, key);
+ }
+ enum mp_type type = mp_typeof(*rptr);
+ if (type == MP_ARRAY || type == MP_MAP) {
+ uint32_t size = type == MP_ARRAY ?
+ mp_decode_array(&rptr) :
+ mp_decode_map(&rptr);
+ mp_stack_push(&mp_stack, type, size);
+ wptr += sprintf(wptr, "{");
+ } else {
+ if (type == MP_STR) {
+ const char *str = mp_decode_str(&rptr, &len);
+ wptr += sprintf(wptr, "\"%.*s\"", len, str);
+ } else if (type == MP_UINT) {
+ uint64_t num = mp_decode_uint(&rptr);
+ wptr += sprintf(wptr, "%u", (unsigned)num);
+ } else {
+ mp_unreachable();
+ }
+ if (mp_stack_top_is_internal_item(&mp_stack))
+ wptr += sprintf(wptr, ", ");
+ }
+ while (mp_stack_advance(&mp_stack)) {
+ mp_stack_pop(&mp_stack);
+ wptr += sprintf(wptr, "}");
+ if (mp_stack_is_empty(&mp_stack))
+ goto end;
+ if (mp_stack_top_is_internal_item(&mp_stack))
+ wptr += sprintf(wptr, ", ");
+ }
+ }
+end:
+ is(strcmp(buff, descr), 0, "valid mpstack json description: "
+ "have \"%s\" expected \"%s\"", buff, descr);
+
+ footer();
+ return check_plan();
+}
int main()
{
- plan(20);
+ plan(21);
test_uints();
test_ints();
test_bools();
@@ -1117,6 +1193,7 @@ int main()
test_mp_check();
test_numbers();
test_overflow();
+ test_mpstack();
return check_plan();
}
--
2.19.2
^ permalink raw reply [flat|nested] 19+ messages in thread
* [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-18 10:32 ` Kirill Shcherbatov
@ 2019-01-21 9:46 ` Kirill Shcherbatov
2019-01-21 11:25 ` Alexander Turenko
1 sibling, 0 replies; 19+ messages in thread
From: Kirill Shcherbatov @ 2019-01-21 9:46 UTC (permalink / raw)
To: tarantool-patches, Alexander Turenko
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 6d5a7a5..cb23ec3 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -1108,7 +1108,7 @@ test_mpstack()
char tuple[1024];
char *wptr = tuple;
- const char *descr = "{\"first\", {\"key1\": \"one\", \"key2\": {2}}, {3}}";
+ const char *descr = "[\"first\", [\"key1\": \"one\", \"key2\": [2]], [3]]";
wptr = mp_encode_array(wptr, 3);
wptr = mp_encode_str(wptr, "first", 5);
wptr = mp_encode_map(wptr, 2);
@@ -1139,7 +1139,7 @@ test_mpstack()
mp_decode_array(&rptr) :
mp_decode_map(&rptr);
mp_stack_push(&mp_stack, type, size);
- wptr += sprintf(wptr, "{");
+ wptr += sprintf(wptr, "[");
} else {
if (type == MP_STR) {
const char *str = mp_decode_str(&rptr, &len);
@@ -1155,7 +1155,7 @@ test_mpstack()
}
while (mp_stack_advance(&mp_stack)) {
mp_stack_pop(&mp_stack);
- wptr += sprintf(wptr, "}");
+ wptr += sprintf(wptr, "]");
if (mp_stack_is_empty(&mp_stack))
goto end;
if (mp_stack_top_is_internal_item(&mp_stack))
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-18 10:32 ` Kirill Shcherbatov
2019-01-21 9:46 ` Kirill Shcherbatov
@ 2019-01-21 11:25 ` Alexander Turenko
2019-01-21 12:35 ` Kirill Shcherbatov
1 sibling, 1 reply; 19+ messages in thread
From: Alexander Turenko @ 2019-01-21 11:25 UTC (permalink / raw)
To: Kirill Shcherbatov; +Cc: tarantool-patches, vdavydov.dev, Kirill Shcherbatov
Minor comments are below, they are on code comments and the test.
If you doubt on some wording comments ask me and engage Vladimir into
discussion.
WBR, Alexander Turenko.
On Fri, Jan 18, 2019 at 01:32:28PM +0300, Kirill Shcherbatov wrote:
> Introduced a new mp_stack class to make complex msgpack parsing.
> The structure is needed in order to facilitate the decoding
> nested MP_ARRAY and MP_MAP msgpack containers without recursion.
> This allows you to determine that the parsing of the current
> container is complete.
>
> Needed for #1012
> ---
> msgpuck.h | 136 +++++++++++++++++++++++++++++++++++++++++++++++++
> test/msgpuck.c | 79 +++++++++++++++++++++++++++-
> 2 files changed, 214 insertions(+), 1 deletion(-)
>
> diff --git a/msgpuck.h b/msgpuck.h
> index b0d4967..b9bbe78 100644
> --- a/msgpuck.h
> +++ b/msgpuck.h
> @@ -359,6 +359,45 @@ enum mp_type {
> MP_EXT
> };
>
> +/** Message pack MP_MAP or MP_ARRAY container descriptor. */
> +struct mp_frame {
> + /** MP frame type calculated with mp_typeof(). */
> + enum mp_type type;
> + /**
> + * Total items in MP_MAP or MP_ARRAY container
> + * calculated with mp_decode_map() or mp_decode_array().
> + */
> + uint32_t size;
> + /**
> + * Count of items already processed. Must be less or
> + * equal to mp_frame::size member.
> + */
> + uint32_t curr;
> +};
> +
> +/**
> + * Stack of service contexts mp_frame to do complex msgpack parse.
Here we introduce the term 'service context', but don't describe it
anywhere. I think it would be better to say 'Stack of map/array
descriptors mp_frame' -- reuse the previously introduced term.
> + * The structure is needed in order to facilitate the decoding
> + * nested MP_ARRAY and MP_MAP msgpack containers without
> + * recursion. This allows you to determine that the parsing of
> + * the current container is complete.
> +*/
> +struct mp_stack {
> + /** The maximum stack depth. */
> + uint32_t size;
> + /**
> + * Count of used stack frames. Corresponds to the index
> + * in the array to perform the push operation. Must be
> + * less or equal to mp_stack::size member.
> + */
> + uint32_t used;
> + /**
> + * Array of mp_frames descriptors having at least
> + * mp_stack::size items.
I would simplify the clause like so:
> Array of size mp_stack::size of mp_frames.
> + */
> + struct mp_frame *frames;
> +};
> +
> /**
> * \brief Determine MsgPack type by a first byte \a c of encoded data.
> *
> @@ -1184,6 +1223,103 @@ mp_check(const char **data, const char *end);
> * {{{ Implementation
> */
>
> +/**
> + * \brief Initialize mp_stack \a stack of specified size \a size
> + * and user-allocated array \a frames.
> + * The \a frames allocation must have at least \a size mp_frame
> + * items.
> + * \param stack - the pointer to a mp_stack to initialize.
> + * \param size - stack size, count of stack::frames to use.
> + * \param frames - mp_frame service context allocation to use,
> + * must have at least \a size items.
I would use the 'descriptor' term you introduce above or some other, but
not two different terms for one object. I'm about using 'service
context' here.
Also I thought the term 'allocation' is a process of obtaining memory,
not a result. I would formulate it as: 'preallocated array of size \a
size of struct mp_frame items'.
> + */
> +MP_PROTO void
> +mp_stack_init(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
> +{
> + stack->frames = frames;
> + stack->size = size;
> + stack->used = 0;
> +}
> +
> +/**
> + * \brief Test if mp_stack \a stack is empty.
> + * \param stack - the pointer to a mp_stack to a stack to test.
> + */
> +MP_PROTO bool
> +mp_stack_is_empty(struct mp_stack *stack)
> +{
> + return stack->used == 0;
> +}
> +
> +/**
> + * \brief Test if mp_stack \a stack is full.
> + * \param stack - the pointer to a mp_stack to a stack to test.
> + */
> +MP_PROTO bool
> +mp_stack_is_full(struct mp_stack *stack)
> +{
> + return stack->used >= stack->size;
> +}
> +
> +/**
> + * \brief Pop the top mp_stack \a stack frame.
> + * The \a stack must not be empty.
> + * \param stack - the pointer to a mp_stack to operate with.
> + */
> +MP_PROTO void
> +mp_stack_pop(struct mp_stack *stack)
> +{
> + assert(!mp_stack_is_empty(stack));
> + --stack->used;
> +}
> +
> +/**
> + * \brief Get a pointer to a top mp_stack \a stack frame.
> + * \param stack - the pointer to a mp_stack to operate with.
> + * The \a stack must not be empty.
> + * \retval a pointer to a top stack frame.
> + */
> +MP_PROTO struct mp_frame *
> +mp_stack_top(struct mp_stack *stack)
> +{
> + assert(!mp_stack_is_empty(stack));
> + return &stack->frames[stack->used - 1];
> +}
> +
> +/**
> + * \brief Construct a new mp_frame and push it on to mp_stack
> + * \a stack. The \a stack must not be full.
> + * \param stack - the pointer to a stack to operate with.
> + * \param type - the type of mp_frame to create.
> + * \param size - size size of mp_frame to create.
> + * \retval a pointer to a top stack frame.
> + */
> +MP_PROTO void
> +mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
> +{
Retval is void.
> + char tuple[1024];
> + char *wptr = tuple;
> + const char *descr = "{\"first\", {\"key1\": \"one\", \"key2\": {2}}, {3}}";
1. The line is too long.
2. Now it is changed: { -> [ and } -> ], but it still is not correct.
JSON encodes maps with {} and arrays with [].
> + wptr = mp_encode_array(wptr, 3);
> + wptr = mp_encode_str(wptr, "first", 5);
> + wptr = mp_encode_map(wptr, 2);
> + wptr = mp_encode_str(wptr, "key1", 4);
> + wptr = mp_encode_str(wptr, "one", 3);
> + wptr = mp_encode_str(wptr, "key2", 4);
> + wptr = mp_encode_array(wptr, 1);
> + wptr = mp_encode_uint(wptr, 2);
> + wptr = mp_encode_array(wptr, 1);
> + wptr = mp_encode_uint(wptr, 3);
> + const char *rptr = tuple;
> + char buff[1024];
> + wptr = buff;
> +
> + struct mp_frame mp_frames[3];
> + struct mp_stack mp_stack;
> + mp_stack_init(&mp_stack, 3, mp_frames);
I would move the converter into a separate function and use it here.
> + enum mp_type type = mp_typeof(*rptr);
> + if (type == MP_ARRAY || type == MP_MAP) {
> + uint32_t size = type == MP_ARRAY ?
> + mp_decode_array(&rptr) :
> + mp_decode_map(&rptr);
> + mp_stack_push(&mp_stack, type, size);
> + wptr += sprintf(wptr, "{");
> + } else {
> + if (type == MP_STR) {
> + const char *str = mp_decode_str(&rptr, &len);
> + wptr += sprintf(wptr, "\"%.*s\"", len, str);
> + } else if (type == MP_UINT) {
> + uint64_t num = mp_decode_uint(&rptr);
> + wptr += sprintf(wptr, "%u", (unsigned)num);
> + } else {
> + mp_unreachable();
> + }
> + if (mp_stack_top_is_internal_item(&mp_stack))
> + wptr += sprintf(wptr, ", ");
> + }
I think this block would look better if we'll rewrite it as switch.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-21 11:25 ` Alexander Turenko
@ 2019-01-21 12:35 ` Kirill Shcherbatov
2019-01-21 20:25 ` Vladimir Davydov
0 siblings, 1 reply; 19+ messages in thread
From: Kirill Shcherbatov @ 2019-01-21 12:35 UTC (permalink / raw)
To: Kirill Shcherbatov, vdavydov.dev; +Cc: Alexander Turenko, tarantool-patches
> If you doubt on some wording comments ask me and engage Vladimir into
> discussion.
Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.
Needed for #1012
---
msgpuck.h | 134 +++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 92 ++++++++++++++++++++++++++++++++-
2 files changed, 225 insertions(+), 1 deletion(-)
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..98c5b25 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -359,6 +359,44 @@ enum mp_type {
MP_EXT
};
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+ /** MP frame type calculated with mp_typeof(). */
+ enum mp_type type;
+ /**
+ * Total items in MP_MAP or MP_ARRAY container
+ * calculated with mp_decode_map() or mp_decode_array().
+ */
+ uint32_t size;
+ /**
+ * Count of items already processed. Must be less or
+ * equal to mp_frame::size member.
+ */
+ uint32_t curr;
+};
+
+/**
+ * Stack of map/array descriptors mp_frame to do complex msgpack
+ parse. The structure is needed in order to facilitate the
+ decoding nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows you to determine that the parsing of
+ * the current container is complete.
+*/
+struct mp_stack {
+ /** The maximum stack depth. */
+ uint32_t size;
+ /**
+ * Count of used stack frames. Corresponds to the index
+ * in the array to perform the push operation. Must be
+ * less or equal to mp_stack::size member.
+ */
+ uint32_t used;
+ /**
+ * Array of size mp_stack::size of mp_frames.
+ */
+ struct mp_frame *frames;
+};
+
/**
* \brief Determine MsgPack type by a first byte \a c of encoded data.
*
@@ -1184,6 +1222,102 @@ mp_check(const char **data, const char *end);
* {{{ Implementation
*/
+/**
+ * \brief Initialize mp_stack \a stack of specified size \a size
+ * and user-allocated array \a frames.
+ * The \a frames allocation must have at least \a size mp_frame
+ * items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame preallocated array of size \a size
+ * of struct mp_frame items
+ */
+MP_PROTO void
+mp_stack_init(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+ stack->frames = frames;
+ stack->size = size;
+ stack->used = 0;
+}
+
+/**
+ * \brief Test if mp_stack \a stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+ return stack->used == 0;
+}
+
+/**
+ * \brief Test if mp_stack \a stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+ return stack->used >= stack->size;
+}
+
+/**
+ * \brief Pop the top mp_stack \a stack frame.
+ * The \a stack must not be empty.
+ * \param stack - the pointer to a mp_stack to operate with.
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ --stack->used;
+}
+
+/**
+ * \brief Get a pointer to a top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * The \a stack must not be empty.
+ * \retval a pointer to a top stack frame.
+ */
+MP_PROTO struct mp_frame *
+mp_stack_top(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ return &stack->frames[stack->used - 1];
+}
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \a stack. The \a stack must not be full.
+ * \param stack - the pointer to a stack to operate with.
+ * \param type - the type of mp_frame to create.
+ * \param size - size size of mp_frame to create.
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
+{
+ assert(!mp_stack_is_full(stack));
+ uint32_t idx = stack->used++;
+ stack->frames[idx].type = type;
+ stack->frames[idx].size = size;
+ stack->frames[idx].curr = 0;
+}
+
+/**
+ * \brief Advance a mp_stack_top()::\a curr attribute of the
+ * \a stack. The \a stack must not be empty. The function also
+ * signals when it moves over the last item.
+ * \param stack - the pointer to a stack to operate with
+ * \retval true, when the top frame have already parsed:
+ * mp_stack_top()::\a curr > mp_stack_top()::\a size;
+ * false otherwise.
+ */
+MP_PROTO bool
+mp_stack_advance(struct mp_stack *stack)
+{
+ struct mp_frame *frame = mp_stack_top(stack);
+ return ++frame->curr > frame->size;
+}
+
/** \cond false */
extern const enum mp_type mp_type_hint[];
extern const int8_t mp_parser_hint[];
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..80a9def 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -1093,10 +1093,99 @@ test_overflow()
return check_plan();
}
+static inline bool
+mp_stack_advance_and_print(struct mp_stack *mp_stack, char **wptr)
+{
+ struct mp_frame *frame = mp_stack_top(mp_stack);
+ if (frame->curr > 0 && frame->curr < frame->size)
+ *wptr += sprintf(*wptr, ", ");
+ return mp_stack_advance(mp_stack);
+}
+
+static void
+mp_stack_json_print(struct mp_stack *mp_stack, const char *tuple, char *buff)
+{
+ const char *rptr = tuple;
+ char *wptr = buff;
+ while (true) {
+ uint32_t len;
+ const char *str;
+ if (!mp_stack_is_empty(mp_stack) &&
+ mp_stack_top(mp_stack)->type == MP_MAP) {
+ str = mp_decode_str(&rptr, &len);
+ wptr += sprintf(wptr, "\"%.*s\": ", len, str);
+ }
+ enum mp_type type = mp_typeof(*rptr);
+ switch (type) {
+ case MP_ARRAY:
+ case MP_MAP: {
+ uint32_t size = type == MP_ARRAY ?
+ mp_decode_array(&rptr) :
+ mp_decode_map(&rptr);
+ mp_stack_push(mp_stack, type, size);
+ wptr += sprintf(wptr, type == MP_ARRAY ? "[" : "{");
+ break;
+ }
+ case MP_STR: {
+ str = mp_decode_str(&rptr, &len);
+ wptr += sprintf(wptr, "\"%.*s\"", len, str);
+ break;
+ }
+ case MP_UINT: {
+ uint64_t num = mp_decode_uint(&rptr);
+ wptr += sprintf(wptr, "%u", (unsigned)num);
+ break;
+ }
+ default:
+ mp_unreachable();
+ }
+ while (mp_stack_advance_and_print(mp_stack, &wptr)) {
+ enum mp_type type = mp_stack_top(mp_stack)->type;
+ assert(type == MP_ARRAY || type == MP_MAP);
+ mp_stack_pop(mp_stack);
+ wptr += sprintf(wptr, type == MP_ARRAY ? "]" : "}");
+ if (mp_stack_is_empty(mp_stack))
+ return;
+ }
+ }
+}
+
+static int
+test_mpstack()
+{
+ plan(1);
+ header();
+
+ char tuple[1024];
+ char *wptr = tuple;
+ const char *descr = "[\"one\", {\"k1\": \"two\", \"k2\": [2]}, [3]]";
+ wptr = mp_encode_array(wptr, 3);
+ wptr = mp_encode_str(wptr, "one", 5);
+ wptr = mp_encode_map(wptr, 2);
+ wptr = mp_encode_str(wptr, "k1", 4);
+ wptr = mp_encode_str(wptr, "two", 3);
+ wptr = mp_encode_str(wptr, "k2", 4);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 2);
+ wptr = mp_encode_array(wptr, 1);
+ wptr = mp_encode_uint(wptr, 3);
+
+ struct mp_frame mp_frames[3];
+ struct mp_stack mp_stack;
+ mp_stack_init(&mp_stack, 3, mp_frames);
+ char buff[1024];
+ mp_stack_json_print(&mp_stack, tuple, buff);
+
+ is(strcmp(buff, descr), 0, "valid mpstack json description: "
+ "have \"%s\" expected \"%s\"", buff, descr);
+
+ footer();
+ return check_plan();
+}
int main()
{
- plan(20);
+ plan(21);
test_uints();
test_ints();
test_bools();
@@ -1117,6 +1206,7 @@ int main()
test_mp_check();
test_numbers();
test_overflow();
+ test_mpstack();
return check_plan();
}
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-21 12:35 ` Kirill Shcherbatov
@ 2019-01-21 20:25 ` Vladimir Davydov
2019-01-22 12:28 ` Kirill Shcherbatov
0 siblings, 1 reply; 19+ messages in thread
From: Vladimir Davydov @ 2019-01-21 20:25 UTC (permalink / raw)
To: Kirill Shcherbatov
Cc: Kirill Shcherbatov, Alexander Turenko, tarantool-patches
On Mon, Jan 21, 2019 at 03:35:39PM +0300, Kirill Shcherbatov wrote:
> > If you doubt on some wording comments ask me and engage Vladimir into
> > discussion.
>
> Introduced a new mp_stack class to make complex msgpack parsing.
> The structure is needed in order to facilitate the decoding
> nested MP_ARRAY and MP_MAP msgpack containers without recursion.
> This allows you to determine that the parsing of the current
> container is complete.
>
> Needed for #1012
> ---
> msgpuck.h | 134 +++++++++++++++++++++++++++++++++++++++++++++++++
> test/msgpuck.c | 92 ++++++++++++++++++++++++++++++++-
> 2 files changed, 225 insertions(+), 1 deletion(-)
>
> diff --git a/msgpuck.h b/msgpuck.h
> index b0d4967..98c5b25 100644
> --- a/msgpuck.h
> +++ b/msgpuck.h
> @@ -359,6 +359,44 @@ enum mp_type {
> MP_EXT
> };
>
> +/** Message pack MP_MAP or MP_ARRAY container descriptor. */
> +struct mp_frame {
> + /** MP frame type calculated with mp_typeof(). */
> + enum mp_type type;
> + /**
> + * Total items in MP_MAP or MP_ARRAY container
Total number of items
> + * calculated with mp_decode_map() or mp_decode_array().
> + */
> + uint32_t size;
> + /**
> + * Count of items already processed. Must be less or
less than
> + * equal to mp_frame::size member.
> + */
> + uint32_t curr;
> +};
> +
> +/**
> + * Stack of map/array descriptors mp_frame to do complex msgpack
> + parse. The structure is needed in order to facilitate the
'the' before decoding is redundant
> + decoding nested MP_ARRAY and MP_MAP msgpack containers without
Bad comment formatting (* missing).
> + * recursion. This allows you to determine that the parsing of
Please avoid using 'you' in comments.
> + * the current container is complete.
> +*/
> +struct mp_stack {
> + /** The maximum stack depth. */
> + uint32_t size;
> + /**
> + * Count of used stack frames. Corresponds to the index
> + * in the array to perform the push operation. Must be
> + * less or equal to mp_stack::size member.
> + */
> + uint32_t used;
> + /**
> + * Array of size mp_stack::size of mp_frames.
> + */
> + struct mp_frame *frames;
> +};
> +
> /**
> * \brief Determine MsgPack type by a first byte \a c of encoded data.
> *
> @@ -1184,6 +1222,102 @@ mp_check(const char **data, const char *end);
> * {{{ Implementation
> */
>
> +/**
> + * \brief Initialize mp_stack \a stack of specified size \a size
> + * and user-allocated array \a frames.
> + * The \a frames allocation must have at least \a size mp_frame
> + * items.
> + * \param stack - the pointer to a mp_stack to initialize.
> + * \param size - stack size, count of stack::frames to use.
> + * \param frames - mp_frame preallocated array of size \a size
> + * of struct mp_frame items
> + */
> +MP_PROTO void
MP_PROTO is used only for function prototypes. For bodies you should use
MP_IMPL. Please take a look at any other function, say mp_decode_array,
and implement your methods in the same fashion.
> +mp_stack_init(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
We typically call constructors _create.
> +{
> + stack->frames = frames;
> + stack->size = size;
> + stack->used = 0;
> +}
> +
> +/**
> + * \brief Test if mp_stack \a stack is empty.
> + * \param stack - the pointer to a mp_stack to a stack to test.
\retval is missing
> + */
> +MP_PROTO bool
> +mp_stack_is_empty(struct mp_stack *stack)
> +{
> + return stack->used == 0;
> +}
> +
> +/**
> + * \brief Test if mp_stack \a stack is full.
> + * \param stack - the pointer to a mp_stack to a stack to test.
\retval
> + */
> +MP_PROTO bool
> +mp_stack_is_full(struct mp_stack *stack)
> +{
> + return stack->used >= stack->size;
> +}
> +
> +/**
> + * \brief Pop the top mp_stack \a stack frame.
> + * The \a stack must not be empty.
There's \pre tag for pre-conditions. Please take a look at
mp_check_array description.
> + * \param stack - the pointer to a mp_stack to operate with.
> + */
> +MP_PROTO void
> +mp_stack_pop(struct mp_stack *stack)
> +{
> + assert(!mp_stack_is_empty(stack));
> + --stack->used;
> +}
> +
> +/**
> + * \brief Get a pointer to a top mp_stack \a stack frame.
> + * \param stack - the pointer to a mp_stack to operate with.
> + * The \a stack must not be empty.
\pre
> + * \retval a pointer to a top stack frame.
> + */
> +MP_PROTO struct mp_frame *
> +mp_stack_top(struct mp_stack *stack)
> +{
> + assert(!mp_stack_is_empty(stack));
> + return &stack->frames[stack->used - 1];
> +}
> +
> +/**
> + * \brief Construct a new mp_frame and push it on to mp_stack
> + * \a stack. The \a stack must not be full.
\pre
> + * \param stack - the pointer to a stack to operate with.
> + * \param type - the type of mp_frame to create.
> + * \param size - size size of mp_frame to create.
> + */
> +MP_PROTO void
> +mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
> +{
> + assert(!mp_stack_is_full(stack));
> + uint32_t idx = stack->used++;
> + stack->frames[idx].type = type;
> + stack->frames[idx].size = size;
> + stack->frames[idx].curr = 0;
> +}
> +
> +/**
> + * \brief Advance a mp_stack_top()::\a curr attribute of the
> + * \a stack. The \a stack must not be empty. The function also
\pre
> + * signals when it moves over the last item.
> + * \param stack - the pointer to a stack to operate with
> + * \retval true, when the top frame have already parsed:
s/have/has
s/when/if
Don't use comma (,) before 'if' or 'when'.
Parsed what? Has been parsed, you wanted to say?
> + * mp_stack_top()::\a curr > mp_stack_top()::\a size;
> + * false otherwise.
> + */
> +MP_PROTO bool
> +mp_stack_advance(struct mp_stack *stack)
> +{
> + struct mp_frame *frame = mp_stack_top(stack);
> + return ++frame->curr > frame->size;
> +}
> +
> /** \cond false */
> extern const enum mp_type mp_type_hint[];
> extern const int8_t mp_parser_hint[];
> diff --git a/test/msgpuck.c b/test/msgpuck.c
> index 9265453..80a9def 100644
> --- a/test/msgpuck.c
> +++ b/test/msgpuck.c
> @@ -1093,10 +1093,99 @@ test_overflow()
> return check_plan();
> }
>
> +static inline bool
> +mp_stack_advance_and_print(struct mp_stack *mp_stack, char **wptr)
> +{
> + struct mp_frame *frame = mp_stack_top(mp_stack);
> + if (frame->curr > 0 && frame->curr < frame->size)
> + *wptr += sprintf(*wptr, ", ");
> + return mp_stack_advance(mp_stack);
> +}
> +
> +static void
> +mp_stack_json_print(struct mp_stack *mp_stack, const char *tuple, char *buff)
> +{
> + const char *rptr = tuple;
> + char *wptr = buff;
> + while (true) {
I just looked at msgpuck.c - it turns out we have a very similar
function called mp_snprint, which we use for debug output. What about
reimplementing it without recursion using the new mp_stack API? It would
make the purpose of mp_stack clear by giving a usage example right in
the msgpuck code. Besides we wouldn't need to write any additional
tests. What do you think?
> + uint32_t len;
> + const char *str;
> + if (!mp_stack_is_empty(mp_stack) &&
> + mp_stack_top(mp_stack)->type == MP_MAP) {
> + str = mp_decode_str(&rptr, &len);
> + wptr += sprintf(wptr, "\"%.*s\": ", len, str);
> + }
> + enum mp_type type = mp_typeof(*rptr);
> + switch (type) {
> + case MP_ARRAY:
> + case MP_MAP: {
> + uint32_t size = type == MP_ARRAY ?
> + mp_decode_array(&rptr) :
> + mp_decode_map(&rptr);
> + mp_stack_push(mp_stack, type, size);
> + wptr += sprintf(wptr, type == MP_ARRAY ? "[" : "{");
> + break;
> + }
> + case MP_STR: {
> + str = mp_decode_str(&rptr, &len);
> + wptr += sprintf(wptr, "\"%.*s\"", len, str);
> + break;
> + }
> + case MP_UINT: {
> + uint64_t num = mp_decode_uint(&rptr);
> + wptr += sprintf(wptr, "%u", (unsigned)num);
> + break;
> + }
> + default:
> + mp_unreachable();
> + }
> + while (mp_stack_advance_and_print(mp_stack, &wptr)) {
> + enum mp_type type = mp_stack_top(mp_stack)->type;
> + assert(type == MP_ARRAY || type == MP_MAP);
> + mp_stack_pop(mp_stack);
> + wptr += sprintf(wptr, type == MP_ARRAY ? "]" : "}");
> + if (mp_stack_is_empty(mp_stack))
> + return;
> + }
> + }
> +}
> +
> +static int
> +test_mpstack()
> +{
> + plan(1);
> + header();
> +
> + char tuple[1024];
> + char *wptr = tuple;
> + const char *descr = "[\"one\", {\"k1\": \"two\", \"k2\": [2]}, [3]]";
> + wptr = mp_encode_array(wptr, 3);
> + wptr = mp_encode_str(wptr, "one", 5);
> + wptr = mp_encode_map(wptr, 2);
> + wptr = mp_encode_str(wptr, "k1", 4);
> + wptr = mp_encode_str(wptr, "two", 3);
> + wptr = mp_encode_str(wptr, "k2", 4);
> + wptr = mp_encode_array(wptr, 1);
> + wptr = mp_encode_uint(wptr, 2);
> + wptr = mp_encode_array(wptr, 1);
> + wptr = mp_encode_uint(wptr, 3);
> +
> + struct mp_frame mp_frames[3];
> + struct mp_stack mp_stack;
> + mp_stack_init(&mp_stack, 3, mp_frames);
> + char buff[1024];
> + mp_stack_json_print(&mp_stack, tuple, buff);
> +
> + is(strcmp(buff, descr), 0, "valid mpstack json description: "
> + "have \"%s\" expected \"%s\"", buff, descr);
> +
> + footer();
> + return check_plan();
> +}
>
> int main()
> {
> - plan(20);
> + plan(21);
> test_uints();
> test_ints();
> test_bools();
> @@ -1117,6 +1206,7 @@ int main()
> test_mp_check();
> test_numbers();
> test_overflow();
> + test_mpstack();
>
> return check_plan();
> }
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-21 20:25 ` Vladimir Davydov
@ 2019-01-22 12:28 ` Kirill Shcherbatov
2019-01-22 20:21 ` Vladimir Davydov
0 siblings, 1 reply; 19+ messages in thread
From: Kirill Shcherbatov @ 2019-01-22 12:28 UTC (permalink / raw)
To: tarantool-patches, Vladimir Davydov
Hi! Thank you for review. I like your idea with rewriting mp_fprint and mp_snprint routines.
=================================================
Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.
Reworked mp_snprint and mp_fprint routines to use mp_stack
instead of recursion.
Needed for #1012
---
CMakeLists.txt | 1 +
msgpuck.c | 170 +++++++++++++++++++++++++++----------------------
msgpuck.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 24 ++++++-
4 files changed, 287 insertions(+), 76 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c129b77..6f62f54 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,7 @@ check_c_compiler_flag("-mno-unaligned-access" CC_HAS_MNO_UNALIGNED_ACCESS)
add_library(msgpuck STATIC msgpuck.c hints.c)
set_target_properties(msgpuck PROPERTIES VERSION 1.0 SOVERSION 1)
set_target_properties(msgpuck PROPERTIES OUTPUT_NAME "msgpuck")
+add_definitions(-DMP_PRINT_MAX_DEPTH=3)
if (NOT ${PROJECT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR})
# Embedded mode, skip tests, documentation and the install targets
diff --git a/msgpuck.c b/msgpuck.c
index 72b800c..84b9d63 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -232,84 +232,106 @@ mp_format(char *data, size_t data_size, const char *format, ...)
return res;
}
-#define MP_PRINT(SELF, PRINTF) \
+#define MP_PRINT(PRINTF) \
{ \
- switch (mp_typeof(**data)) { \
- case MP_NIL: \
- mp_decode_nil(data); \
- PRINTF("null"); \
- break; \
- case MP_UINT: \
- PRINTF("%llu", (unsigned long long) mp_decode_uint(data)); \
- break; \
- case MP_INT: \
- PRINTF("%lld", (long long) mp_decode_int(data)); \
- break; \
- case MP_STR: \
- case MP_BIN: \
- { \
- uint32_t len = mp_typeof(**data) == MP_STR ? \
- mp_decode_strl(data) : mp_decode_binl(data); \
- PRINTF("\""); \
- const char *s; \
- for (s = *data; s < *data + len; s++) { \
- unsigned char c = (unsigned char ) *s; \
- if (c < 128 && mp_char2escape[c] != NULL) { \
- /* Escape character */ \
- PRINTF("%s", mp_char2escape[c]); \
+ struct mp_frame mp_frames[MP_PRINT_MAX_DEPTH]; \
+ struct mp_stack mp_stack; \
+ mp_stack_create(&mp_stack, MP_PRINT_MAX_DEPTH, mp_frames); \
+ do { \
+ bool is_map_value = !mp_stack_is_empty(&mp_stack) && \
+ mp_stack_top(&mp_stack)->type == MP_MAP && \
+ mp_stack_top(&mp_stack)->curr % 2 == 0; \
+ if (is_map_value) \
+ PRINTF(": "); \
+ switch (mp_typeof(**data)) { \
+ case MP_NIL: \
+ mp_decode_nil(data); \
+ PRINTF("null"); \
+ break; \
+ case MP_UINT: \
+ PRINTF("%llu", \
+ (unsigned long long)mp_decode_uint(data)); \
+ break; \
+ case MP_INT: \
+ PRINTF("%lld", (long long) mp_decode_int(data)); \
+ break; \
+ case MP_STR: \
+ case MP_BIN: { \
+ uint32_t len = mp_typeof(**data) == MP_STR ? \
+ mp_decode_strl(data) : mp_decode_binl(data); \
+ PRINTF("\""); \
+ const char *s; \
+ for (s = *data; s < *data + len; s++) { \
+ unsigned char c = (unsigned char ) *s; \
+ if (c < 128 && mp_char2escape[c] != NULL) { \
+ /* Escape character */ \
+ PRINTF("%s", mp_char2escape[c]); \
+ } else { \
+ PRINTF("%c", c); \
+ } \
+ } \
+ PRINTF("\""); \
+ *data += len; \
+ break; \
+ } \
+ case MP_ARRAY: { \
+ PRINTF("["); \
+ if (!mp_stack_is_full(&mp_stack)) { \
+ mp_stack_push(&mp_stack, MP_ARRAY, \
+ mp_decode_array(data)); \
} else { \
- PRINTF("%c", c); \
+ /* \
+ * Skip msgpack items that do not \
+ * fit onto the stack. \
+ */ \
+ PRINTF("...]"); \
+ mp_next(data); \
} \
+ break; \
} \
- PRINTF("\""); \
- *data += len; \
- break; \
- } \
- case MP_ARRAY: \
- { \
- uint32_t count = mp_decode_array(data); \
- PRINTF("["); \
- uint32_t i; \
- for (i = 0; i < count; i++) { \
- if (i) \
- PRINTF(", "); \
- SELF(data); \
+ case MP_MAP: { \
+ PRINTF("{"); \
+ if (!mp_stack_is_full(&mp_stack)) { \
+ mp_stack_push(&mp_stack, MP_MAP, \
+ 2 * mp_decode_map(data)); \
+ } else { \
+ /* \
+ * Skip msgpack items that do not \
+ * fit onto the stack. \
+ */ \
+ PRINTF("...}"); \
+ mp_next(data); \
+ } \
+ break; \
} \
- PRINTF("]"); \
- break; \
- } \
- case MP_MAP: \
- { \
- uint32_t count = mp_decode_map(data); \
- PRINTF("{"); \
- uint32_t i; \
- for (i = 0; i < count; i++) { \
- if (i) \
+ case MP_BOOL: \
+ PRINTF(mp_decode_bool(data) ? "true" : "false"); \
+ break; \
+ case MP_FLOAT: \
+ PRINTF("%g", mp_decode_float(data)); \
+ break; \
+ case MP_DOUBLE: \
+ PRINTF("%lg", mp_decode_double(data)); \
+ break; \
+ case MP_EXT: \
+ mp_next(data); \
+ PRINTF("undefined"); \
+ break; \
+ default: \
+ mp_unreachable(); \
+ return -1; \
+ } \
+ while (!mp_stack_is_empty(&mp_stack)) { \
+ struct mp_frame *frame = mp_stack_top(&mp_stack); \
+ if (frame->curr > 0 && frame->curr < frame->size && \
+ (frame->type == MP_ARRAY || is_map_value)) \
PRINTF(", "); \
- SELF(data); \
- PRINTF(": "); \
- SELF(data); \
+ if (!mp_stack_advance(&mp_stack)) \
+ break; \
+ PRINTF(frame->type == MP_MAP ? "}" : "]"); \
+ mp_stack_pop(&mp_stack); \
} \
- PRINTF("}"); \
- break; \
- } \
- case MP_BOOL: \
- PRINTF(mp_decode_bool(data) ? "true" : "false"); \
- break; \
- case MP_FLOAT: \
- PRINTF("%g", mp_decode_float(data)); \
- break; \
- case MP_DOUBLE: \
- PRINTF("%lg", mp_decode_double(data)); \
- break; \
- case MP_EXT: \
- mp_next(data); \
- PRINTF("undefined"); \
- break; \
- default: \
- mp_unreachable(); \
- return -1; \
- } \
+ } while (!mp_stack_is_empty(&mp_stack)); \
}
static inline int
@@ -323,8 +345,7 @@ mp_fprint_internal(FILE *file, const char **data)
total_bytes += bytes; \
} while (0)
#define PRINT(...) HANDLE(fprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
#undef HANDLE
#undef SELF
#undef PRINT
@@ -359,8 +380,7 @@ mp_snprint_internal(char *buf, int size, const char **data)
} \
} while (0)
#define PRINT(...) HANDLE(snprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_snprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
#undef HANDLE
#undef SELF
#undef PRINT
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..de8b2d0 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -1176,6 +1176,123 @@ mp_next(const char **data);
MP_PROTO int
mp_check(const char **data, const char *end);
+/**
+ * The maximum depth of the message pack nesting maps and arrays
+ * supported by mp_fprint and mp_snprintf routines. Upon reaching
+ * the maximum depth, the parsing will not be carried out.
+ */
+#ifndef MP_PRINT_MAX_DEPTH
+#error "MP_PRINT_MAX_DEPTH must be defined"
+#endif /* MP_PRINT_MAX_DEPTH */
+
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+ /** MP frame type calculated with mp_typeof(). */
+ enum mp_type type;
+ /**
+ * Total number of items in MP_MAP or MP_ARRAY container
+ * calculated with mp_decode_map() or mp_decode_array().
+ */
+ uint32_t size;
+ /**
+ * Count of items already processed. Must be less than or
+ * equal to mp_frame::size member.
+ */
+ uint32_t curr;
+};
+
+/**
+ * Stack of map/array descriptors mp_frame to do complex msgpack
+ * parse. The structure is needed in order to facilitate
+ * decoding nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows to determine that the parsing of the
+ * current container is complete.
+*/
+struct mp_stack {
+ /** The maximum stack depth. */
+ uint32_t size;
+ /**
+ * Count of used stack frames. Corresponds to the index
+ * in the array to perform the push operation. Must be
+ * less or equal to mp_stack::size member.
+ */
+ uint32_t used;
+ /**
+ * Array of size mp_stack::size of mp_frames.
+ */
+ struct mp_frame *frames;
+};
+
+/**
+ * \brief Initialize mp_stack \a stack of specified size \a size
+ * and user-allocated array \a frames.
+ * The \a frames allocation must have at least \a size mp_frame
+ * items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame preallocated array of size \a size
+ * of struct mp_frame items
+ */
+MP_PROTO void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames);
+
+/**
+ * \brief Test if mp_stack \a stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is empty, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack);
+
+/**
+ * \brief Test if mp_stack \a stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is full, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack);
+
+/**
+ * \brief Pop the top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack);
+
+/**
+ * \brief Get a pointer to a top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * \retval a pointer to a top stack frame.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO struct mp_frame *
+mp_stack_top(struct mp_stack *stack);
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \a stack.
+ * \param stack - the pointer to a stack to operate with.
+ * \param type - the type of mp_frame to create.
+ * \param size - size size of mp_frame to create.
+ * \pre mp_stack_is_full(stack) == false
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size);
+
+/**
+ * \brief Advance a mp_stack_top()::\a curr attribute of the
+ * \a stack. The function also signals when it moves over the
+ * last item.
+ * \param stack - the pointer to a stack to operate with
+ * \retval true if the top frame has been already parsed:
+ * mp_stack_top()::\a curr > mp_stack_top()::\a size;
+ * false otherwise.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO bool
+mp_stack_advance(struct mp_stack *stack);
+
/*
* }}}
*/
@@ -1184,6 +1301,57 @@ mp_check(const char **data, const char *end);
* {{{ Implementation
*/
+MP_IMPL void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+ stack->frames = frames;
+ stack->size = size;
+ stack->used = 0;
+}
+
+MP_IMPL bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+ return stack->used == 0;
+}
+
+MP_IMPL bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+ return stack->used >= stack->size;
+}
+
+MP_IMPL void
+mp_stack_pop(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ --stack->used;
+}
+
+MP_IMPL struct mp_frame *
+mp_stack_top(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ return &stack->frames[stack->used - 1];
+}
+
+MP_IMPL void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
+{
+ assert(!mp_stack_is_full(stack));
+ uint32_t idx = stack->used++;
+ stack->frames[idx].type = type;
+ stack->frames[idx].size = size;
+ stack->frames[idx].curr = 0;
+}
+
+MP_IMPL bool
+mp_stack_advance(struct mp_stack *stack)
+{
+ struct mp_frame *frame = mp_stack_top(stack);
+ return ++frame->curr > frame->size;
+}
+
/** \cond false */
extern const enum mp_type mp_type_hint[];
extern const int8_t mp_parser_hint[];
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..46c9eed 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -764,7 +764,7 @@ test_format(void)
int
test_mp_print()
{
- plan(10);
+ plan(11);
header();
char msgpack[128];
@@ -838,6 +838,28 @@ test_mp_print()
int rc = mp_fprint(stdin, msgpack);
is(rc, -1, "mp_fprint I/O error");
+ /* Test mp_snprint parsing stack overflow. */
+ char mp_buf[MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) + mp_sizeof_uint(1)];
+ int json_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1;
+ char json[json_sz];
+ char *buf_wptr = mp_buf;
+ char *json_wptr = json;
+ for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) {
+ if (i < MP_PRINT_MAX_DEPTH + 1) {
+ buf_wptr = mp_encode_array(buf_wptr, 1);
+ *(json_wptr++) = '[';
+ } else if (i == MP_PRINT_MAX_DEPTH + 1) {
+ buf_wptr = mp_encode_uint(buf_wptr, 1);
+ json_wptr += sprintf(json_wptr, "...");
+ } else {
+ *(json_wptr++) = ']';
+ }
+ }
+ *(json_wptr++) = '\0';
+ char decoded[json_sz];
+ rc = mp_snprint(decoded, json_sz, mp_buf);
+ ok(rc == json_sz - 1 && memcmp(decoded, json, json_sz - 1) == 0,
+ "mp_snprint parsing stack overflow test");
footer();
return check_plan();
}
--
2.19.2
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-22 12:28 ` Kirill Shcherbatov
@ 2019-01-22 20:21 ` Vladimir Davydov
2019-01-23 8:23 ` Kirill Shcherbatov
0 siblings, 1 reply; 19+ messages in thread
From: Vladimir Davydov @ 2019-01-22 20:21 UTC (permalink / raw)
To: Kirill Shcherbatov; +Cc: tarantool-patches
On Tue, Jan 22, 2019 at 03:28:03PM +0300, Kirill Shcherbatov wrote:
> Hi! Thank you for review. I like your idea with rewriting mp_fprint and mp_snprint routines.
>
> =================================================
>
> Introduced a new mp_stack class to make complex msgpack parsing.
> The structure is needed in order to facilitate the decoding
> nested MP_ARRAY and MP_MAP msgpack containers without recursion.
> This allows you to determine that the parsing of the current
> container is complete.
> Reworked mp_snprint and mp_fprint routines to use mp_stack
> instead of recursion.
>
> Needed for #1012
> ---
> CMakeLists.txt | 1 +
> msgpuck.c | 170 +++++++++++++++++++++++++++----------------------
> msgpuck.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++++
> test/msgpuck.c | 24 ++++++-
> 4 files changed, 287 insertions(+), 76 deletions(-)
>
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index c129b77..6f62f54 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -15,6 +15,7 @@ check_c_compiler_flag("-mno-unaligned-access" CC_HAS_MNO_UNALIGNED_ACCESS)
> add_library(msgpuck STATIC msgpuck.c hints.c)
> set_target_properties(msgpuck PROPERTIES VERSION 1.0 SOVERSION 1)
> set_target_properties(msgpuck PROPERTIES OUTPUT_NAME "msgpuck")
> +add_definitions(-DMP_PRINT_MAX_DEPTH=3)
This won't allow us to override this macro in Tarantool cmake file
AFAIU. Come to think of it, I guess we can the define the macro without
touching CMakeLists.txt at all, by simply adding
#ifndef MP_PRINT_MAX_DEPTH
# define MP_RPINT_MAX_DEPTH 32
#endif
to msgpuck.h instead.
>
> if (NOT ${PROJECT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR})
> # Embedded mode, skip tests, documentation and the install targets
> diff --git a/msgpuck.c b/msgpuck.c
> index 72b800c..84b9d63 100644
> --- a/msgpuck.c
> +++ b/msgpuck.c
> @@ -232,84 +232,106 @@ mp_format(char *data, size_t data_size, const char *format, ...)
> return res;
> }
>
> -#define MP_PRINT(SELF, PRINTF) \
> +#define MP_PRINT(PRINTF) \
> { \
> - switch (mp_typeof(**data)) { \
> - case MP_NIL: \
> - mp_decode_nil(data); \
> - PRINTF("null"); \
> - break; \
> - case MP_UINT: \
> - PRINTF("%llu", (unsigned long long) mp_decode_uint(data)); \
> - break; \
> - case MP_INT: \
> - PRINTF("%lld", (long long) mp_decode_int(data)); \
> - break; \
> - case MP_STR: \
> - case MP_BIN: \
> - { \
> - uint32_t len = mp_typeof(**data) == MP_STR ? \
> - mp_decode_strl(data) : mp_decode_binl(data); \
> - PRINTF("\""); \
> - const char *s; \
> - for (s = *data; s < *data + len; s++) { \
> - unsigned char c = (unsigned char ) *s; \
> - if (c < 128 && mp_char2escape[c] != NULL) { \
> - /* Escape character */ \
> - PRINTF("%s", mp_char2escape[c]); \
> + struct mp_frame mp_frames[MP_PRINT_MAX_DEPTH]; \
> + struct mp_stack mp_stack; \
We usually try to shorten local variable names provided it doesn't
hurt readability so I'd rename 'mp_stack' to 'stack' and 'mp_frames'
to 'frames'.
> + mp_stack_create(&mp_stack, MP_PRINT_MAX_DEPTH, mp_frames); \
> + do { \
> + bool is_map_value = !mp_stack_is_empty(&mp_stack) && \
> + mp_stack_top(&mp_stack)->type == MP_MAP && \
> + mp_stack_top(&mp_stack)->curr % 2 == 0; \
> + if (is_map_value) \
> + PRINTF(": "); \
I'd like all map/array formatting to be gathered in one place - the code
would be easier to follow then. I don't see why you need to print ':'
here, '[' and '{' in switch-case, while ',', '}' and ']' in the end of
the loop. Can't you print all those elements (of course, except "[...]"
and "{...}") in the end of the loop, after emitting a token?
> + switch (mp_typeof(**data)) { \
> + case MP_NIL: \
> + mp_decode_nil(data); \
> + PRINTF("null"); \
> + break; \
> + case MP_UINT: \
> + PRINTF("%llu", \
> + (unsigned long long)mp_decode_uint(data)); \
> + break; \
> + case MP_INT: \
> + PRINTF("%lld", (long long) mp_decode_int(data)); \
> + break; \
> + case MP_STR: \
> + case MP_BIN: { \
> + uint32_t len = mp_typeof(**data) == MP_STR ? \
> + mp_decode_strl(data) : mp_decode_binl(data); \
> + PRINTF("\""); \
> + const char *s; \
> + for (s = *data; s < *data + len; s++) { \
> + unsigned char c = (unsigned char ) *s; \
> + if (c < 128 && mp_char2escape[c] != NULL) { \
> + /* Escape character */ \
> + PRINTF("%s", mp_char2escape[c]); \
> + } else { \
> + PRINTF("%c", c); \
> + } \
> + } \
> + PRINTF("\""); \
> + *data += len; \
> + break; \
> + } \
> + case MP_ARRAY: { \
> + PRINTF("["); \
> + if (!mp_stack_is_full(&mp_stack)) { \
> + mp_stack_push(&mp_stack, MP_ARRAY, \
> + mp_decode_array(data)); \
> } else { \
> - PRINTF("%c", c); \
> + /* \
> + * Skip msgpack items that do not \
> + * fit onto the stack. \
> + */ \
> + PRINTF("...]"); \
> + mp_next(data); \
> } \
> + break; \
> } \
> - PRINTF("\""); \
> - *data += len; \
> - break; \
> - } \
> - case MP_ARRAY: \
> - { \
> - uint32_t count = mp_decode_array(data); \
> - PRINTF("["); \
> - uint32_t i; \
> - for (i = 0; i < count; i++) { \
> - if (i) \
> - PRINTF(", "); \
> - SELF(data); \
> + case MP_MAP: { \
> + PRINTF("{"); \
> + if (!mp_stack_is_full(&mp_stack)) { \
> + mp_stack_push(&mp_stack, MP_MAP, \
> + 2 * mp_decode_map(data)); \
> + } else { \
> + /* \
> + * Skip msgpack items that do not \
> + * fit onto the stack. \
> + */ \
I don't like that you copied-and-pasted the comment. IMO it would be
better to make MP_MAP and MP_ARRAY share the same case.
> + PRINTF("...}"); \
> + mp_next(data); \
> + } \
> + break; \
> } \
> - PRINTF("]"); \
> - break; \
> - } \
> - case MP_MAP: \
> - { \
> - uint32_t count = mp_decode_map(data); \
> - PRINTF("{"); \
> - uint32_t i; \
> - for (i = 0; i < count; i++) { \
> - if (i) \
> + case MP_BOOL: \
> + PRINTF(mp_decode_bool(data) ? "true" : "false"); \
> + break; \
> + case MP_FLOAT: \
> + PRINTF("%g", mp_decode_float(data)); \
> + break; \
> + case MP_DOUBLE: \
> + PRINTF("%lg", mp_decode_double(data)); \
> + break; \
> + case MP_EXT: \
> + mp_next(data); \
> + PRINTF("undefined"); \
> + break; \
> + default: \
> + mp_unreachable(); \
> + return -1; \
> + } \
> + while (!mp_stack_is_empty(&mp_stack)) { \
> + struct mp_frame *frame = mp_stack_top(&mp_stack); \
> + if (frame->curr > 0 && frame->curr < frame->size && \
You check (frame->curr < frame->size) explicitly here and then
implicitly in mp_stack_advance() below. This looks ugly. Please
rearrange the code so as not to access frame->size directly.
> + (frame->type == MP_ARRAY || is_map_value)) \
> PRINTF(", "); \
> - SELF(data); \
> - PRINTF(": "); \
> - SELF(data); \
> + if (!mp_stack_advance(&mp_stack)) \
> + break; \
Let's please rewrite this function without the outer loop - it would
shrink the diff making the code easier for review. All you need to do
is replace this 'break' with 'goto next', place 'next' label at the
beginning of this function, and remove the outer loop altogether.
> + PRINTF(frame->type == MP_MAP ? "}" : "]"); \
> + mp_stack_pop(&mp_stack); \
> } \
> - PRINTF("}"); \
> - break; \
> - } \
> - case MP_BOOL: \
> - PRINTF(mp_decode_bool(data) ? "true" : "false"); \
> - break; \
> - case MP_FLOAT: \
> - PRINTF("%g", mp_decode_float(data)); \
> - break; \
> - case MP_DOUBLE: \
> - PRINTF("%lg", mp_decode_double(data)); \
> - break; \
> - case MP_EXT: \
> - mp_next(data); \
> - PRINTF("undefined"); \
> - break; \
> - default: \
> - mp_unreachable(); \
> - return -1; \
> - } \
> + } while (!mp_stack_is_empty(&mp_stack)); \
Bad formatting: too many spaces before \
> }
>
> static inline int
> @@ -323,8 +345,7 @@ mp_fprint_internal(FILE *file, const char **data)
> total_bytes += bytes; \
> } while (0)
> #define PRINT(...) HANDLE(fprintf, __VA_ARGS__)
> -#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__)
> -MP_PRINT(SELF, PRINT)
> +MP_PRINT(PRINT)
You don't need HANDLE anymore - you can define PRINT directly.
Also, you don't seem to need mp_fprint_internal.
> #undef HANDLE
> #undef SELF
Pointless undef - SELF isn't defined anymore.
> #undef PRINT
> @@ -359,8 +380,7 @@ mp_snprint_internal(char *buf, int size, const char **data)
> } \
> } while (0)
> #define PRINT(...) HANDLE(snprintf, __VA_ARGS__)
> -#define SELF(...) HANDLE(mp_snprint_internal, __VA_ARGS__)
> -MP_PRINT(SELF, PRINT)
> +MP_PRINT(PRINT)
> #undef HANDLE
> #undef SELF
Ditto.
> #undef PRINT
> diff --git a/msgpuck.h b/msgpuck.h
> index b0d4967..de8b2d0 100644
> --- a/msgpuck.h
> +++ b/msgpuck.h
> @@ -1176,6 +1176,123 @@ mp_next(const char **data);
> MP_PROTO int
> mp_check(const char **data, const char *end);
>
> +/**
> + * The maximum depth of the message pack nesting maps and arrays
> + * supported by mp_fprint and mp_snprintf routines. Upon reaching
> + * the maximum depth, the parsing will not be carried out.
The maximum msgpack nesting depth supported by mp_snprint().
Everything beyond that will be omitted (replaced with "...").
> + */
> +#ifndef MP_PRINT_MAX_DEPTH
> +#error "MP_PRINT_MAX_DEPTH must be defined"
> +#endif /* MP_PRINT_MAX_DEPTH */
> +
> +/** Message pack MP_MAP or MP_ARRAY container descriptor. */
> +struct mp_frame {
> + /** MP frame type calculated with mp_typeof(). */
> + enum mp_type type;
> + /**
> + * Total number of items in MP_MAP or MP_ARRAY container
> + * calculated with mp_decode_map() or mp_decode_array().
> + */
> + uint32_t size;
> + /**
> + * Count of items already processed. Must be less than or
> + * equal to mp_frame::size member.
> + */
> + uint32_t curr;
> +};
> +
> +/**
> + * Stack of map/array descriptors mp_frame to do complex msgpack
> + * parse. The structure is needed in order to facilitate
> + * decoding nested MP_ARRAY and MP_MAP msgpack containers without
> + * recursion. This allows to determine that the parsing of the
> + * current container is complete.
> +*/
> +struct mp_stack {
> + /** The maximum stack depth. */
> + uint32_t size;
> + /**
> + * Count of used stack frames. Corresponds to the index
> + * in the array to perform the push operation. Must be
> + * less or equal to mp_stack::size member.
> + */
> + uint32_t used;
> + /**
> + * Array of size mp_stack::size of mp_frames.
> + */
> + struct mp_frame *frames;
> +};
> +
> +/**
> + * \brief Initialize mp_stack \a stack of specified size \a size
> + * and user-allocated array \a frames.
> + * The \a frames allocation must have at least \a size mp_frame
> + * items.
> + * \param stack - the pointer to a mp_stack to initialize.
> + * \param size - stack size, count of stack::frames to use.
> + * \param frames - mp_frame preallocated array of size \a size
> + * of struct mp_frame items
> + */
> +MP_PROTO void
> +mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames);
> +
> +/**
> + * \brief Test if mp_stack \a stack is empty.
> + * \param stack - the pointer to a mp_stack to a stack to test.
> + * \retval true if mp_stack is empty, false otherwise.
> + */
> +MP_PROTO bool
> +mp_stack_is_empty(struct mp_stack *stack);
> +
> +/**
> + * \brief Test if mp_stack \a stack is full.
> + * \param stack - the pointer to a mp_stack to a stack to test.
> + * \retval true if mp_stack is full, false otherwise.
> + */
> +MP_PROTO bool
> +mp_stack_is_full(struct mp_stack *stack);
> +
> +/**
> + * \brief Pop the top mp_stack \a stack frame.
> + * \param stack - the pointer to a mp_stack to operate with.
> + * \pre mp_stack_is_empty(stack) == false
> + */
> +MP_PROTO void
> +mp_stack_pop(struct mp_stack *stack);
> +
> +/**
> + * \brief Get a pointer to a top mp_stack \a stack frame.
> + * \param stack - the pointer to a mp_stack to operate with.
> + * \retval a pointer to a top stack frame.
> + * \pre mp_stack_is_empty(stack) == false
> + */
> +MP_PROTO struct mp_frame *
> +mp_stack_top(struct mp_stack *stack);
> +
> +/**
> + * \brief Construct a new mp_frame and push it on to mp_stack
> + * \a stack.
> + * \param stack - the pointer to a stack to operate with.
> + * \param type - the type of mp_frame to create.
> + * \param size - size size of mp_frame to create.
> + * \pre mp_stack_is_full(stack) == false
> + */
> +MP_PROTO void
> +mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size);
> +
> +/**
> + * \brief Advance a mp_stack_top()::\a curr attribute of the
> + * \a stack. The function also signals when it moves over the
> + * last item.
> + * \param stack - the pointer to a stack to operate with
> + * \retval true if the top frame has been already parsed:
'has already been parsed' would be grammatically correct.
Anyway, let's not use 'already' at all here - it's confusing. Let's just
say, 'if the top stack frame has been fully processed'?
Also, I'd return 'false' in this case, because they typically return
false on the end of input.
> + * mp_stack_top()::\a curr > mp_stack_top()::\a size;
> + * false otherwise.
> + * \pre mp_stack_is_empty(stack) == false
> + */
> +MP_PROTO bool
> +mp_stack_advance(struct mp_stack *stack);
> +
> /*
> * }}}
> */
> @@ -1184,6 +1301,57 @@ mp_check(const char **data, const char *end);
> * {{{ Implementation
> */
>
> +MP_IMPL void
> +mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
> +{
> + stack->frames = frames;
> + stack->size = size;
> + stack->used = 0;
> +}
> +
> +MP_IMPL bool
> +mp_stack_is_empty(struct mp_stack *stack)
> +{
> + return stack->used == 0;
> +}
> +
> +MP_IMPL bool
> +mp_stack_is_full(struct mp_stack *stack)
> +{
> + return stack->used >= stack->size;
> +}
> +
> +MP_IMPL void
> +mp_stack_pop(struct mp_stack *stack)
> +{
> + assert(!mp_stack_is_empty(stack));
> + --stack->used;
> +}
> +
> +MP_IMPL struct mp_frame *
> +mp_stack_top(struct mp_stack *stack)
> +{
> + assert(!mp_stack_is_empty(stack));
> + return &stack->frames[stack->used - 1];
> +}
> +
> +MP_IMPL void
> +mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
> +{
> + assert(!mp_stack_is_full(stack));
> + uint32_t idx = stack->used++;
> + stack->frames[idx].type = type;
> + stack->frames[idx].size = size;
> + stack->frames[idx].curr = 0;
> +}
> +
> +MP_IMPL bool
> +mp_stack_advance(struct mp_stack *stack)
> +{
> + struct mp_frame *frame = mp_stack_top(stack);
> + return ++frame->curr > frame->size;
> +}
> +
These functions go before '\cond false' while the rest of MP_IMPL goes
after it. I don't know why it is like that, but I'm pretty sure mp_stack
implementation should go after '\cond false' too :) Let's please move
these functions to the end of the implementation section - I'd like
MP_IMPL to be defined in the same order as MP_PROTO.
> /** \cond false */
> extern const enum mp_type mp_type_hint[];
> extern const int8_t mp_parser_hint[];
> diff --git a/test/msgpuck.c b/test/msgpuck.c
> index 9265453..46c9eed 100644
> --- a/test/msgpuck.c
> +++ b/test/msgpuck.c
> @@ -764,7 +764,7 @@ test_format(void)
> int
> test_mp_print()
> {
> - plan(10);
> + plan(11);
> header();
>
> char msgpack[128];
> @@ -838,6 +838,28 @@ test_mp_print()
> int rc = mp_fprint(stdin, msgpack);
> is(rc, -1, "mp_fprint I/O error");
>
> + /* Test mp_snprint parsing stack overflow. */
> + char mp_buf[MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) + mp_sizeof_uint(1)];
> + int json_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1;
Let's please not use 'json' in variable names - we don't mention json
anywhere else in the library.
> + char json[json_sz];
Better allocate the buffers with malloc().
> + char *buf_wptr = mp_buf;
> + char *json_wptr = json;
> + for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) {
> + if (i < MP_PRINT_MAX_DEPTH + 1) {
> + buf_wptr = mp_encode_array(buf_wptr, 1);
> + *(json_wptr++) = '[';
> + } else if (i == MP_PRINT_MAX_DEPTH + 1) {
> + buf_wptr = mp_encode_uint(buf_wptr, 1);
> + json_wptr += sprintf(json_wptr, "...");
> + } else {
> + *(json_wptr++) = ']';
> + }
> + }
> + *(json_wptr++) = '\0';
> + char decoded[json_sz];
> + rc = mp_snprint(decoded, json_sz, mp_buf);
> + ok(rc == json_sz - 1 && memcmp(decoded, json, json_sz - 1) == 0,
> + "mp_snprint parsing stack overflow test");
"mp_snprint max nesting depth"
> footer();
> return check_plan();
> }
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-22 20:21 ` Vladimir Davydov
@ 2019-01-23 8:23 ` Kirill Shcherbatov
2019-01-23 10:06 ` Vladimir Davydov
2019-01-24 17:58 ` Konstantin Osipov
0 siblings, 2 replies; 19+ messages in thread
From: Kirill Shcherbatov @ 2019-01-23 8:23 UTC (permalink / raw)
To: tarantool-patches, Vladimir Davydov
Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.
Reworked mp_snprint and mp_fprint routines to use mp_stack
instead of recursion.
Needed for #1012
---
msgpuck.c | 63 ++++++++++---------
msgpuck.h | 167 +++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 35 ++++++++++-
3 files changed, 235 insertions(+), 30 deletions(-)
diff --git a/msgpuck.c b/msgpuck.c
index 72b800c..e77ad5b 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -232,8 +232,12 @@ mp_format(char *data, size_t data_size, const char *format, ...)
return res;
}
-#define MP_PRINT(SELF, PRINTF) \
+#define MP_PRINT(PRINTF) \
{ \
+ struct mp_frame frames[MP_PRINT_MAX_DEPTH]; \
+ struct mp_stack stack; \
+ mp_stack_create(&stack, MP_PRINT_MAX_DEPTH, frames); \
+next: \
switch (mp_typeof(**data)) { \
case MP_NIL: \
mp_decode_nil(data); \
@@ -266,31 +270,21 @@ mp_format(char *data, size_t data_size, const char *format, ...)
break; \
} \
case MP_ARRAY: \
- { \
- uint32_t count = mp_decode_array(data); \
- PRINTF("["); \
- uint32_t i; \
- for (i = 0; i < count; i++) { \
- if (i) \
- PRINTF(", "); \
- SELF(data); \
- } \
- PRINTF("]"); \
- break; \
- } \
case MP_MAP: \
{ \
- uint32_t count = mp_decode_map(data); \
- PRINTF("{"); \
- uint32_t i; \
- for (i = 0; i < count; i++) { \
- if (i) \
- PRINTF(", "); \
- SELF(data); \
- PRINTF(": "); \
- SELF(data); \
+ enum mp_type type = mp_typeof(**data); \
+ if (!mp_stack_is_full(&stack)) { \
+ uint32_t count = type == MP_ARRAY ? \
+ mp_decode_array(data) : 2*mp_decode_map(data); \
+ mp_stack_push(&stack, type, count); \
+ } else { \
+ /* \
+ * Skip msgpack items that do not \
+ * fit onto the stack. \
+ */ \
+ PRINTF(type == MP_ARRAY ? "[...]" : "{...}"); \
+ mp_next(data); \
} \
- PRINTF("}"); \
break; \
} \
case MP_BOOL: \
@@ -310,6 +304,21 @@ mp_format(char *data, size_t data_size, const char *format, ...)
mp_unreachable(); \
return -1; \
} \
+ while (!mp_stack_is_empty(&stack)) { \
+ struct mp_frame *frame = mp_stack_top(&stack); \
+ if (!mp_stack_advance(&stack)) { \
+ PRINTF(frame->type == MP_ARRAY ? "]" : "}"); \
+ mp_stack_pop(&stack); \
+ } else { \
+ if (frame->curr == 1) \
+ PRINTF(frame->type == MP_ARRAY ? "[" : "{"); \
+ else if (frame->type == MP_MAP && frame->curr % 2 == 0) \
+ PRINTF(": "); \
+ else \
+ PRINTF(", "); \
+ goto next; \
+ } \
+ } \
}
static inline int
@@ -323,10 +332,8 @@ mp_fprint_internal(FILE *file, const char **data)
total_bytes += bytes; \
} while (0)
#define PRINT(...) HANDLE(fprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
#undef HANDLE
-#undef SELF
#undef PRINT
return total_bytes;
}
@@ -359,10 +366,8 @@ mp_snprint_internal(char *buf, int size, const char **data)
} \
} while (0)
#define PRINT(...) HANDLE(snprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_snprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
#undef HANDLE
-#undef SELF
#undef PRINT
return total_bytes;
}
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..e2a90fa 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -1176,6 +1176,122 @@ mp_next(const char **data);
MP_PROTO int
mp_check(const char **data, const char *end);
+/**
+ * The maximum msgpack nesting depth supported by mp_snprint().
+ * Everything beyond that will be omitted (replaced with "...").
+ */
+#ifndef MP_PRINT_MAX_DEPTH
+#define MP_PRINT_MAX_DEPTH 32
+#endif
+
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+ /** MP frame type calculated with mp_typeof(). */
+ enum mp_type type;
+ /**
+ * Total number of items in MP_MAP or MP_ARRAY container
+ * calculated with mp_decode_map() or mp_decode_array().
+ */
+ uint32_t size;
+ /**
+ * Count of items already processed. Must be less than or
+ * equal to mp_frame::size member.
+ */
+ uint32_t curr;
+};
+
+/**
+ * Stack of map/array descriptors mp_frame to do complex msgpack
+ * parse. The structure is needed in order to facilitate
+ * decoding nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows to determine that the parsing of the
+ * current container is complete.
+*/
+struct mp_stack {
+ /** The maximum stack depth. */
+ uint32_t size;
+ /**
+ * Count of used stack frames. Corresponds to the index
+ * in the array to perform the push operation. Must be
+ * less or equal to mp_stack::size member.
+ */
+ uint32_t used;
+ /**
+ * Array of size mp_stack::size of mp_frames.
+ */
+ struct mp_frame *frames;
+};
+
+/**
+ * \brief Initialize mp_stack \a stack of specified size \a size
+ * and user-allocated array \a frames.
+ * The \a frames allocation must have at least \a size mp_frame
+ * items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame preallocated array of size \a size
+ * of struct mp_frame items
+ */
+MP_PROTO void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames);
+
+/**
+ * \brief Test if mp_stack \a stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is empty, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack);
+
+/**
+ * \brief Test if mp_stack \a stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is full, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack);
+
+/**
+ * \brief Pop the top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack);
+
+/**
+ * \brief Get a pointer to a top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * \retval a pointer to a top stack frame.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO struct mp_frame *
+mp_stack_top(struct mp_stack *stack);
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \a stack.
+ * \param stack - the pointer to a stack to operate with.
+ * \param type - the type of mp_frame to create.
+ * \param size - size size of mp_frame to create.
+ * \pre mp_stack_is_full(stack) == false
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size);
+
+/**
+ * \brief Advance a mp_stack_top()::\a curr attribute of the
+ * \a stack. The function also signals when it moves over the
+ * last item.
+ * \param stack - the pointer to a stack to operate with
+ * \retval false if the top stack frame has been fully processed:
+ * mp_stack_top()::\a curr > mp_stack_top()::\a size;
+ * true otherwise.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO bool
+mp_stack_advance(struct mp_stack *stack);
+
/*
* }}}
*/
@@ -2176,6 +2292,57 @@ mp_check(const char **data, const char *end)
return 0;
}
+MP_IMPL void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+ stack->frames = frames;
+ stack->size = size;
+ stack->used = 0;
+}
+
+MP_IMPL bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+ return stack->used == 0;
+}
+
+MP_IMPL bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+ return stack->used >= stack->size;
+}
+
+MP_IMPL void
+mp_stack_pop(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ --stack->used;
+}
+
+MP_IMPL struct mp_frame *
+mp_stack_top(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ return &stack->frames[stack->used - 1];
+}
+
+MP_IMPL void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
+{
+ assert(!mp_stack_is_full(stack));
+ uint32_t idx = stack->used++;
+ stack->frames[idx].type = type;
+ stack->frames[idx].size = size;
+ stack->frames[idx].curr = 0;
+}
+
+MP_IMPL bool
+mp_stack_advance(struct mp_stack *stack)
+{
+ struct mp_frame *frame = mp_stack_top(stack);
+ return ++frame->curr <= frame->size;
+}
+
/** \endcond */
/*
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..73ace19 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -764,7 +764,7 @@ test_format(void)
int
test_mp_print()
{
- plan(10);
+ plan(12);
header();
char msgpack[128];
@@ -838,6 +838,39 @@ test_mp_print()
int rc = mp_fprint(stdin, msgpack);
is(rc, -1, "mp_fprint I/O error");
+ /* Test mp_snprint max nesting depth. */
+ int mp_buff_sz = MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) +
+ mp_sizeof_uint(1);
+ int template_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1;
+ char *mp_buff = malloc(mp_buff_sz);
+ char *template = malloc(template_sz);
+ char *decoded = malloc(template_sz);
+ if (mp_buff == NULL || template == NULL || decoded == NULL) {
+ fail("Failed to malloc memory");
+ goto end;
+ }
+ char *buf_wptr = mp_buff;
+ char *template_wptr = template;
+ for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) {
+ if (i < MP_PRINT_MAX_DEPTH + 1) {
+ buf_wptr = mp_encode_array(buf_wptr, 1);
+ *(template_wptr++) = '[';
+ } else if (i == MP_PRINT_MAX_DEPTH + 1) {
+ buf_wptr = mp_encode_uint(buf_wptr, 1);
+ template_wptr += sprintf(template_wptr, "...");
+ } else {
+ *(template_wptr++) = ']';
+ }
+ }
+ *(template_wptr++) = '\0';
+ rc = mp_snprint(decoded, template_sz, mp_buff);
+ ok(rc == template_sz - 1, "mp_snprint max nesting depth return value");
+ ok(memcmp(decoded, template, template_sz - 1) == 0,
+ "mp_snprint max nesting depth result");
+end:
+ free(decoded);
+ free(template);
+ free(mp_buff);
footer();
return check_plan();
}
--
2.19.2
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-23 8:23 ` Kirill Shcherbatov
@ 2019-01-23 10:06 ` Vladimir Davydov
2019-01-23 11:39 ` Kirill Shcherbatov
2019-01-24 17:58 ` Konstantin Osipov
1 sibling, 1 reply; 19+ messages in thread
From: Vladimir Davydov @ 2019-01-23 10:06 UTC (permalink / raw)
To: Kirill Shcherbatov; +Cc: tarantool-patches
On Wed, Jan 23, 2019 at 11:23:51AM +0300, Kirill Shcherbatov wrote:
> Introduced a new mp_stack class to make complex msgpack parsing.
> The structure is needed in order to facilitate the decoding
> nested MP_ARRAY and MP_MAP msgpack containers without recursion.
> This allows you to determine that the parsing of the current
> container is complete.
> Reworked mp_snprint and mp_fprint routines to use mp_stack
> instead of recursion.
>
> Needed for #1012
There's no ticket #1012 in the msgpuck repository. Please use a full
link to the github issue.
> ---
> msgpuck.c | 63 ++++++++++---------
> msgpuck.h | 167 +++++++++++++++++++++++++++++++++++++++++++++++++
> test/msgpuck.c | 35 ++++++++++-
> 3 files changed, 235 insertions(+), 30 deletions(-)
>
> diff --git a/msgpuck.c b/msgpuck.c
> index 72b800c..e77ad5b 100644
> --- a/msgpuck.c
> +++ b/msgpuck.c
> @@ -232,8 +232,12 @@ mp_format(char *data, size_t data_size, const char *format, ...)
> return res;
> }
>
> -#define MP_PRINT(SELF, PRINTF) \
> +#define MP_PRINT(PRINTF) \
> { \
> + struct mp_frame frames[MP_PRINT_MAX_DEPTH]; \
> + struct mp_stack stack; \
> + mp_stack_create(&stack, MP_PRINT_MAX_DEPTH, frames); \
> +next: \
> switch (mp_typeof(**data)) { \
> case MP_NIL: \
> mp_decode_nil(data); \
> @@ -266,31 +270,21 @@ mp_format(char *data, size_t data_size, const char *format, ...)
> break; \
> } \
> case MP_ARRAY: \
> - { \
> - uint32_t count = mp_decode_array(data); \
> - PRINTF("["); \
> - uint32_t i; \
> - for (i = 0; i < count; i++) { \
> - if (i) \
> - PRINTF(", "); \
> - SELF(data); \
> - } \
> - PRINTF("]"); \
> - break; \
> - } \
> case MP_MAP: \
> { \
> - uint32_t count = mp_decode_map(data); \
> - PRINTF("{"); \
> - uint32_t i; \
> - for (i = 0; i < count; i++) { \
> - if (i) \
> - PRINTF(", "); \
> - SELF(data); \
> - PRINTF(": "); \
> - SELF(data); \
> + enum mp_type type = mp_typeof(**data); \
> + if (!mp_stack_is_full(&stack)) { \
> + uint32_t count = type == MP_ARRAY ? \
> + mp_decode_array(data) : 2*mp_decode_map(data); \
Bad formatting: missing spaces around '*'.
> + mp_stack_push(&stack, type, count); \
> + } else { \
> + /* \
> + * Skip msgpack items that do not \
> + * fit onto the stack. \
> + */ \
> + PRINTF(type == MP_ARRAY ? "[...]" : "{...}"); \
> + mp_next(data); \
> } \
> - PRINTF("}"); \
> break; \
> } \
> case MP_BOOL: \
> @@ -310,6 +304,21 @@ mp_format(char *data, size_t data_size, const char *format, ...)
> mp_unreachable(); \
> return -1; \
> } \
> + while (!mp_stack_is_empty(&stack)) { \
> + struct mp_frame *frame = mp_stack_top(&stack); \
> + if (!mp_stack_advance(&stack)) { \
> + PRINTF(frame->type == MP_ARRAY ? "]" : "}"); \
> + mp_stack_pop(&stack); \
> + } else { \
> + if (frame->curr == 1) \
> + PRINTF(frame->type == MP_ARRAY ? "[" : "{"); \
> + else if (frame->type == MP_MAP && frame->curr % 2 == 0) \
> + PRINTF(": "); \
> + else \
> + PRINTF(", "); \
> + goto next; \
> + } \
> + } \
This doesn't work for empty maps/arrays. Please fix and extend
test_mp_print:expected to check that.
> }
>
> static inline int
> @@ -323,10 +332,8 @@ mp_fprint_internal(FILE *file, const char **data)
> total_bytes += bytes; \
> } while (0)
> #define PRINT(...) HANDLE(fprintf, __VA_ARGS__)
> -#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__)
> -MP_PRINT(SELF, PRINT)
> +MP_PRINT(PRINT)
I asked you to get rid of mp_fprint_internal and mp_snprint_internal in
comments to the previous version.
> #undef HANDLE
> -#undef SELF
> #undef PRINT
> return total_bytes;
> }
> @@ -359,10 +366,8 @@ mp_snprint_internal(char *buf, int size, const char **data)
> } \
> } while (0)
> #define PRINT(...) HANDLE(snprintf, __VA_ARGS__)
> -#define SELF(...) HANDLE(mp_snprint_internal, __VA_ARGS__)
> -MP_PRINT(SELF, PRINT)
> +MP_PRINT(PRINT)
> #undef HANDLE
> -#undef SELF
> #undef PRINT
> return total_bytes;
> }
> diff --git a/test/msgpuck.c b/test/msgpuck.c
> index 9265453..73ace19 100644
> --- a/test/msgpuck.c
> +++ b/test/msgpuck.c
> @@ -764,7 +764,7 @@ test_format(void)
> int
> test_mp_print()
> {
> - plan(10);
> + plan(12);
> header();
>
> char msgpack[128];
> @@ -838,6 +838,39 @@ test_mp_print()
> int rc = mp_fprint(stdin, msgpack);
> is(rc, -1, "mp_fprint I/O error");
>
> + /* Test mp_snprint max nesting depth. */
> + int mp_buff_sz = MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) +
> + mp_sizeof_uint(1);
> + int template_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1;
> + char *mp_buff = malloc(mp_buff_sz);
> + char *template = malloc(template_sz);
> + char *decoded = malloc(template_sz);
> + if (mp_buff == NULL || template == NULL || decoded == NULL) {
> + fail("Failed to malloc memory");
> + goto end;
> + }
I wouldn't bother checking malloc() return value - after all it's just a
test. If malloc() fails, the test will crash, which is OK.
> + char *buf_wptr = mp_buff;
mp_buff, but buf_wptr
I understand that it's merely a test, but I just can't help asking: why
can't you be consistent in variable naming?
> + char *template_wptr = template;
> + for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) {
> + if (i < MP_PRINT_MAX_DEPTH + 1) {
> + buf_wptr = mp_encode_array(buf_wptr, 1);
> + *(template_wptr++) = '[';
'template' is a confusing name: it's not a template, it's the expected
output. Sorry, but it's difficult to understand the code when variable
names are misleading.
> + } else if (i == MP_PRINT_MAX_DEPTH + 1) {
> + buf_wptr = mp_encode_uint(buf_wptr, 1);
> + template_wptr += sprintf(template_wptr, "...");
Please use snprintf.
> + } else {
> + *(template_wptr++) = ']';
> + }
> + }
Please add an assertion making sure that we haven't exceeded the buffer
size.
> + *(template_wptr++) = '\0';
> + rc = mp_snprint(decoded, template_sz, mp_buff);
> + ok(rc == template_sz - 1, "mp_snprint max nesting depth return value");
> + ok(memcmp(decoded, template, template_sz - 1) == 0,
> + "mp_snprint max nesting depth result");
You should use strcmp() to make sure \0 has been written by mp_snprint.
> +end:
> + free(decoded);
> + free(template);
> + free(mp_buff);
> footer();
> return check_plan();
> }
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-23 10:06 ` Vladimir Davydov
@ 2019-01-23 11:39 ` Kirill Shcherbatov
0 siblings, 0 replies; 19+ messages in thread
From: Kirill Shcherbatov @ 2019-01-23 11:39 UTC (permalink / raw)
To: tarantool-patches, Vladimir Davydov
Changes are required to account your comment (patch below):
diff --git a/msgpuck.c b/msgpuck.c
index 1ad2dd4..8f1710e 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -238,25 +238,25 @@ mp_format(char *data, size_t data_size, const char *format, ...)
struct mp_stack stack; \
mp_stack_create(&stack, MP_PRINT_MAX_DEPTH, frames); \
next: \
- switch (mp_typeof(**data)) { \
+ switch (mp_typeof(*data)) { \
case MP_NIL: \
- mp_decode_nil(data); \
+ mp_decode_nil(&data); \
PRINTF("null"); \
break; \
case MP_UINT: \
- PRINTF("%llu", (unsigned long long) mp_decode_uint(data)); \
+ PRINTF("%llu", (unsigned long long) mp_decode_uint(&data)); \
break; \
case MP_INT: \
- PRINTF("%lld", (long long) mp_decode_int(data)); \
+ PRINTF("%lld", (long long) mp_decode_int(&data)); \
break; \
case MP_STR: \
case MP_BIN: \
{ \
- uint32_t len = mp_typeof(**data) == MP_STR ? \
- mp_decode_strl(data) : mp_decode_binl(data); \
+ uint32_t len = mp_typeof(*data) == MP_STR ? \
+ mp_decode_strl(&data) : mp_decode_binl(&data); \
PRINTF("\""); \
const char *s; \
- for (s = *data; s < *data + len; s++) { \
+ for (s = data; s < data + len; s++) { \
unsigned char c = (unsigned char ) *s; \
if (c < 128 && mp_char2escape[c] != NULL) { \
/* Escape character */ \
@@ -266,16 +266,17 @@ next: \
} \
} \
PRINTF("\""); \
- *data += len; \
+ data += len; \
break; \
} \
case MP_ARRAY: \
case MP_MAP: \
{ \
- enum mp_type type = mp_typeof(**data); \
+ enum mp_type type = mp_typeof(*data); \
if (!mp_stack_is_full(&stack)) { \
uint32_t count = type == MP_ARRAY ? \
- mp_decode_array(data) : 2 * mp_decode_map(data);\
+ mp_decode_array(&data) : \
+ 2 * mp_decode_map(&data); \
mp_stack_push(&stack, type, count); \
} else { \
/* \
@@ -283,21 +284,21 @@ next: \
* fit onto the stack. \
*/ \
PRINTF(type == MP_ARRAY ? "[...]" : "{...}"); \
- mp_next(data); \
+ mp_next(&data); \
} \
break; \
} \
case MP_BOOL: \
- PRINTF(mp_decode_bool(data) ? "true" : "false"); \
+ PRINTF(mp_decode_bool(&data) ? "true" : "false"); \
break; \
case MP_FLOAT: \
- PRINTF("%g", mp_decode_float(data)); \
+ PRINTF("%g", mp_decode_float(&data)); \
break; \
case MP_DOUBLE: \
- PRINTF("%lg", mp_decode_double(data)); \
+ PRINTF("%lg", mp_decode_double(&data)); \
break; \
case MP_EXT: \
- mp_next(data); \
+ mp_next(&data); \
PRINTF("undefined"); \
break; \
default: \
@@ -323,9 +324,11 @@ next: \
} \
}
-static inline int
-mp_fprint_internal(FILE *file, const char **data)
+int
+mp_fprint(FILE *file, const char *data)
{
+ if (!file)
+ file = stdout;
int total_bytes = 0;
#define HANDLE(FUN, ...) do { \
int bytes = FUN(file, __VA_ARGS__); \
@@ -341,16 +344,7 @@ MP_PRINT(PRINT)
}
int
-mp_fprint(FILE *file, const char *data)
-{
- if (!file)
- file = stdout;
- int res = mp_fprint_internal(file, &data);
- return res;
-}
-
-static inline int
-mp_snprint_internal(char *buf, int size, const char **data)
+mp_snprint(char *buf, int size, const char *data)
{
int total_bytes = 0;
#define HANDLE(FUN, ...) do { \
@@ -374,9 +368,3 @@ MP_PRINT(PRINT)
return total_bytes;
}
#undef MP_PRINT
-
-int
-mp_snprint(char *buf, int size, const char *data)
-{
- return mp_snprint_internal(buf, size, &data);
-}
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 71a4bfc..884883d 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -30,6 +30,7 @@
* SUCH DAMAGE.
*/
+#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <inttypes.h>
@@ -843,30 +844,32 @@ test_mp_print()
/* Test mp_snprint max nesting depth. */
int mp_buff_sz = MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) +
mp_sizeof_uint(1);
- int template_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1;
+ int exp_str_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1;
char *mp_buff = malloc(mp_buff_sz);
- char *template = malloc(template_sz);
- char *decoded = malloc(template_sz);
+ char *exp_str = malloc(exp_str_sz);
+ char *decoded = malloc(exp_str_sz);
char *buff_wptr = mp_buff;
- char *template_wptr = template;
+ char *exp_str_wptr = exp_str;
for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) {
+ int exp_str_rest = exp_str_sz - (exp_str_wptr - exp_str);
+ assert(exp_str_rest > 0);
if (i < MP_PRINT_MAX_DEPTH + 1) {
buff_wptr = mp_encode_array(buff_wptr, 1);
- *(template_wptr++) = '[';
+ rc = snprintf(exp_str_wptr, exp_str_rest, "[");
} else if (i == MP_PRINT_MAX_DEPTH + 1) {
buff_wptr = mp_encode_uint(buff_wptr, 1);
- template_wptr += sprintf(template_wptr, "...");
+ rc = snprintf(exp_str_wptr, exp_str_rest, "...");
} else {
- *(template_wptr++) = ']';
+ rc = snprintf(exp_str_wptr, exp_str_rest, "]");
}
+ exp_str_wptr += rc;
}
- *(template_wptr++) = '\0';
- rc = mp_snprint(decoded, template_sz, mp_buff);
- ok(rc == template_sz - 1, "mp_snprint max nesting depth return value");
- ok(memcmp(decoded, template, template_sz - 1) == 0,
- "mp_snprint max nesting depth result");
+ assert(exp_str_wptr + 1 == exp_str + exp_str_sz);
+ rc = mp_snprint(decoded, exp_str_sz, mp_buff);
+ ok(rc == exp_str_sz - 1, "mp_snprint max nesting depth return value");
+ ok(strcmp(decoded, exp_str) == 0, "mp_snprint max nesting depth result");
free(decoded);
- free(template);
+ free(exp_str);
free(mp_buff);
footer();
return check_plan();
================================================================
Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.
Reworked mp_snprint and mp_fprint routines to use mp_stack
instead of recursion.
Needed for https://github.com/tarantool/tarantool/issues/1012
---
msgpuck.c | 113 ++++++++++++++++-----------------
msgpuck.h | 167 +++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 41 ++++++++++--
3 files changed, 258 insertions(+), 63 deletions(-)
diff --git a/msgpuck.c b/msgpuck.c
index 72b800c..8f1710e 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -232,27 +232,31 @@ mp_format(char *data, size_t data_size, const char *format, ...)
return res;
}
-#define MP_PRINT(SELF, PRINTF) \
+#define MP_PRINT(PRINTF) \
{ \
- switch (mp_typeof(**data)) { \
+ struct mp_frame frames[MP_PRINT_MAX_DEPTH]; \
+ struct mp_stack stack; \
+ mp_stack_create(&stack, MP_PRINT_MAX_DEPTH, frames); \
+next: \
+ switch (mp_typeof(*data)) { \
case MP_NIL: \
- mp_decode_nil(data); \
+ mp_decode_nil(&data); \
PRINTF("null"); \
break; \
case MP_UINT: \
- PRINTF("%llu", (unsigned long long) mp_decode_uint(data)); \
+ PRINTF("%llu", (unsigned long long) mp_decode_uint(&data)); \
break; \
case MP_INT: \
- PRINTF("%lld", (long long) mp_decode_int(data)); \
+ PRINTF("%lld", (long long) mp_decode_int(&data)); \
break; \
case MP_STR: \
case MP_BIN: \
{ \
- uint32_t len = mp_typeof(**data) == MP_STR ? \
- mp_decode_strl(data) : mp_decode_binl(data); \
+ uint32_t len = mp_typeof(*data) == MP_STR ? \
+ mp_decode_strl(&data) : mp_decode_binl(&data); \
PRINTF("\""); \
const char *s; \
- for (s = *data; s < *data + len; s++) { \
+ for (s = data; s < data + len; s++) { \
unsigned char c = (unsigned char ) *s; \
if (c < 128 && mp_char2escape[c] != NULL) { \
/* Escape character */ \
@@ -262,59 +266,69 @@ mp_format(char *data, size_t data_size, const char *format, ...)
} \
} \
PRINTF("\""); \
- *data += len; \
+ data += len; \
break; \
} \
case MP_ARRAY: \
- { \
- uint32_t count = mp_decode_array(data); \
- PRINTF("["); \
- uint32_t i; \
- for (i = 0; i < count; i++) { \
- if (i) \
- PRINTF(", "); \
- SELF(data); \
- } \
- PRINTF("]"); \
- break; \
- } \
case MP_MAP: \
{ \
- uint32_t count = mp_decode_map(data); \
- PRINTF("{"); \
- uint32_t i; \
- for (i = 0; i < count; i++) { \
- if (i) \
- PRINTF(", "); \
- SELF(data); \
- PRINTF(": "); \
- SELF(data); \
+ enum mp_type type = mp_typeof(*data); \
+ if (!mp_stack_is_full(&stack)) { \
+ uint32_t count = type == MP_ARRAY ? \
+ mp_decode_array(&data) : \
+ 2 * mp_decode_map(&data); \
+ mp_stack_push(&stack, type, count); \
+ } else { \
+ /* \
+ * Skip msgpack items that do not \
+ * fit onto the stack. \
+ */ \
+ PRINTF(type == MP_ARRAY ? "[...]" : "{...}"); \
+ mp_next(&data); \
} \
- PRINTF("}"); \
break; \
} \
case MP_BOOL: \
- PRINTF(mp_decode_bool(data) ? "true" : "false"); \
+ PRINTF(mp_decode_bool(&data) ? "true" : "false"); \
break; \
case MP_FLOAT: \
- PRINTF("%g", mp_decode_float(data)); \
+ PRINTF("%g", mp_decode_float(&data)); \
break; \
case MP_DOUBLE: \
- PRINTF("%lg", mp_decode_double(data)); \
+ PRINTF("%lg", mp_decode_double(&data)); \
break; \
case MP_EXT: \
- mp_next(data); \
+ mp_next(&data); \
PRINTF("undefined"); \
break; \
default: \
mp_unreachable(); \
return -1; \
} \
+ while (!mp_stack_is_empty(&stack)) { \
+ struct mp_frame *frame = mp_stack_top(&stack); \
+ if (!mp_stack_advance(&stack)) { \
+ if (frame->curr == 1) \
+ PRINTF(frame->type == MP_ARRAY ? "[" : "{"); \
+ PRINTF(frame->type == MP_ARRAY ? "]" : "}"); \
+ mp_stack_pop(&stack); \
+ } else { \
+ if (frame->curr == 1) \
+ PRINTF(frame->type == MP_ARRAY ? "[" : "{"); \
+ else if (frame->type == MP_MAP && frame->curr % 2 == 0) \
+ PRINTF(": "); \
+ else \
+ PRINTF(", "); \
+ goto next; \
+ } \
+ } \
}
-static inline int
-mp_fprint_internal(FILE *file, const char **data)
+int
+mp_fprint(FILE *file, const char *data)
{
+ if (!file)
+ file = stdout;
int total_bytes = 0;
#define HANDLE(FUN, ...) do { \
int bytes = FUN(file, __VA_ARGS__); \
@@ -323,25 +337,14 @@ mp_fprint_internal(FILE *file, const char **data)
total_bytes += bytes; \
} while (0)
#define PRINT(...) HANDLE(fprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
#undef HANDLE
-#undef SELF
#undef PRINT
return total_bytes;
}
int
-mp_fprint(FILE *file, const char *data)
-{
- if (!file)
- file = stdout;
- int res = mp_fprint_internal(file, &data);
- return res;
-}
-
-static inline int
-mp_snprint_internal(char *buf, int size, const char **data)
+mp_snprint(char *buf, int size, const char *data)
{
int total_bytes = 0;
#define HANDLE(FUN, ...) do { \
@@ -359,17 +362,9 @@ mp_snprint_internal(char *buf, int size, const char **data)
} \
} while (0)
#define PRINT(...) HANDLE(snprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_snprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
#undef HANDLE
-#undef SELF
#undef PRINT
return total_bytes;
}
#undef MP_PRINT
-
-int
-mp_snprint(char *buf, int size, const char *data)
-{
- return mp_snprint_internal(buf, size, &data);
-}
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..e2a90fa 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -1176,6 +1176,122 @@ mp_next(const char **data);
MP_PROTO int
mp_check(const char **data, const char *end);
+/**
+ * The maximum msgpack nesting depth supported by mp_snprint().
+ * Everything beyond that will be omitted (replaced with "...").
+ */
+#ifndef MP_PRINT_MAX_DEPTH
+#define MP_PRINT_MAX_DEPTH 32
+#endif
+
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+ /** MP frame type calculated with mp_typeof(). */
+ enum mp_type type;
+ /**
+ * Total number of items in MP_MAP or MP_ARRAY container
+ * calculated with mp_decode_map() or mp_decode_array().
+ */
+ uint32_t size;
+ /**
+ * Count of items already processed. Must be less than or
+ * equal to mp_frame::size member.
+ */
+ uint32_t curr;
+};
+
+/**
+ * Stack of map/array descriptors mp_frame to do complex msgpack
+ * parse. The structure is needed in order to facilitate
+ * decoding nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows to determine that the parsing of the
+ * current container is complete.
+*/
+struct mp_stack {
+ /** The maximum stack depth. */
+ uint32_t size;
+ /**
+ * Count of used stack frames. Corresponds to the index
+ * in the array to perform the push operation. Must be
+ * less or equal to mp_stack::size member.
+ */
+ uint32_t used;
+ /**
+ * Array of size mp_stack::size of mp_frames.
+ */
+ struct mp_frame *frames;
+};
+
+/**
+ * \brief Initialize mp_stack \a stack of specified size \a size
+ * and user-allocated array \a frames.
+ * The \a frames allocation must have at least \a size mp_frame
+ * items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame preallocated array of size \a size
+ * of struct mp_frame items
+ */
+MP_PROTO void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames);
+
+/**
+ * \brief Test if mp_stack \a stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is empty, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack);
+
+/**
+ * \brief Test if mp_stack \a stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is full, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack);
+
+/**
+ * \brief Pop the top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack);
+
+/**
+ * \brief Get a pointer to a top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * \retval a pointer to a top stack frame.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO struct mp_frame *
+mp_stack_top(struct mp_stack *stack);
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \a stack.
+ * \param stack - the pointer to a stack to operate with.
+ * \param type - the type of mp_frame to create.
+ * \param size - size size of mp_frame to create.
+ * \pre mp_stack_is_full(stack) == false
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size);
+
+/**
+ * \brief Advance a mp_stack_top()::\a curr attribute of the
+ * \a stack. The function also signals when it moves over the
+ * last item.
+ * \param stack - the pointer to a stack to operate with
+ * \retval false if the top stack frame has been fully processed:
+ * mp_stack_top()::\a curr > mp_stack_top()::\a size;
+ * true otherwise.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO bool
+mp_stack_advance(struct mp_stack *stack);
+
/*
* }}}
*/
@@ -2176,6 +2292,57 @@ mp_check(const char **data, const char *end)
return 0;
}
+MP_IMPL void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+ stack->frames = frames;
+ stack->size = size;
+ stack->used = 0;
+}
+
+MP_IMPL bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+ return stack->used == 0;
+}
+
+MP_IMPL bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+ return stack->used >= stack->size;
+}
+
+MP_IMPL void
+mp_stack_pop(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ --stack->used;
+}
+
+MP_IMPL struct mp_frame *
+mp_stack_top(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ return &stack->frames[stack->used - 1];
+}
+
+MP_IMPL void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
+{
+ assert(!mp_stack_is_full(stack));
+ uint32_t idx = stack->used++;
+ stack->frames[idx].type = type;
+ stack->frames[idx].size = size;
+ stack->frames[idx].curr = 0;
+}
+
+MP_IMPL bool
+mp_stack_advance(struct mp_stack *stack)
+{
+ struct mp_frame *frame = mp_stack_top(stack);
+ return ++frame->curr <= frame->size;
+}
+
/** \endcond */
/*
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..884883d 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -30,6 +30,7 @@
* SUCH DAMAGE.
*/
+#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <inttypes.h>
@@ -764,15 +765,16 @@ test_format(void)
int
test_mp_print()
{
- plan(10);
+ plan(12);
header();
char msgpack[128];
char *d = msgpack;
- d = mp_encode_array(d, 6);
+ d = mp_encode_array(d, 8);
d = mp_encode_int(d, -5);
d = mp_encode_uint(d, 42);
d = mp_encode_str(d, "kill bill", 9);
+ d = mp_encode_array(d, 0);
d = mp_encode_map(d, 6);
d = mp_encode_str(d, "bool true", 9);
d = mp_encode_bool(d, true);
@@ -791,13 +793,14 @@ test_mp_print()
*d++ = 0;
char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
d = mp_encode_bin(d, bin, sizeof(bin));
+ d = mp_encode_map(d, 0);
assert(d <= msgpack + sizeof(msgpack));
const char *expected =
- "[-5, 42, \"kill bill\", "
+ "[-5, 42, \"kill bill\", [], "
"{\"bool true\": true, \"bool false\": false, \"null\": null, "
"\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
- "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\"]";
+ "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
int esize = strlen(expected);
char result[256];
@@ -838,6 +841,36 @@ test_mp_print()
int rc = mp_fprint(stdin, msgpack);
is(rc, -1, "mp_fprint I/O error");
+ /* Test mp_snprint max nesting depth. */
+ int mp_buff_sz = MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) +
+ mp_sizeof_uint(1);
+ int exp_str_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1;
+ char *mp_buff = malloc(mp_buff_sz);
+ char *exp_str = malloc(exp_str_sz);
+ char *decoded = malloc(exp_str_sz);
+ char *buff_wptr = mp_buff;
+ char *exp_str_wptr = exp_str;
+ for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) {
+ int exp_str_rest = exp_str_sz - (exp_str_wptr - exp_str);
+ assert(exp_str_rest > 0);
+ if (i < MP_PRINT_MAX_DEPTH + 1) {
+ buff_wptr = mp_encode_array(buff_wptr, 1);
+ rc = snprintf(exp_str_wptr, exp_str_rest, "[");
+ } else if (i == MP_PRINT_MAX_DEPTH + 1) {
+ buff_wptr = mp_encode_uint(buff_wptr, 1);
+ rc = snprintf(exp_str_wptr, exp_str_rest, "...");
+ } else {
+ rc = snprintf(exp_str_wptr, exp_str_rest, "]");
+ }
+ exp_str_wptr += rc;
+ }
+ assert(exp_str_wptr + 1 == exp_str + exp_str_sz);
+ rc = mp_snprint(decoded, exp_str_sz, mp_buff);
+ ok(rc == exp_str_sz - 1, "mp_snprint max nesting depth return value");
+ ok(strcmp(decoded, exp_str) == 0, "mp_snprint max nesting depth result");
+ free(decoded);
+ free(exp_str);
+ free(mp_buff);
footer();
return check_plan();
}
--
2.19.2
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
2019-01-23 8:23 ` Kirill Shcherbatov
2019-01-23 10:06 ` Vladimir Davydov
@ 2019-01-24 17:58 ` Konstantin Osipov
1 sibling, 0 replies; 19+ messages in thread
From: Konstantin Osipov @ 2019-01-24 17:58 UTC (permalink / raw)
To: tarantool-patches; +Cc: Vladimir Davydov
* Kirill Shcherbatov <kshcherbatov@tarantool.org> [19/01/23 11:24]:
> Introduced a new mp_stack class to make complex msgpack parsing.
> The structure is needed in order to facilitate the decoding
> nested MP_ARRAY and MP_MAP msgpack containers without recursion.
> This allows you to determine that the parsing of the current
> container is complete.
> Reworked mp_snprint and mp_fprint routines to use mp_stack
> instead of recursion.
Sorry to be nitpicking, but do we really need more than 65k frames?
With 2-byte frame identifiers we can fit a single frame into a
single 8-byte register.
>
> Needed for #1012
--
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2019-01-24 17:58 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-16 13:36 [PATCH v1 1/1] implement mp_stack class Kirill Shcherbatov
2019-01-16 18:03 ` Konstantin Osipov
2019-01-17 7:26 ` Kirill Shcherbatov
2019-01-17 7:32 ` [tarantool-patches] " Kirill Shcherbatov
2019-01-17 11:58 ` Alexander Turenko
2019-01-17 12:28 ` [tarantool-patches] " Kirill Shcherbatov
2019-01-17 16:34 ` Alexander Turenko
2019-01-18 7:03 ` Kirill Shcherbatov
2019-01-18 10:32 ` Kirill Shcherbatov
2019-01-21 9:46 ` Kirill Shcherbatov
2019-01-21 11:25 ` Alexander Turenko
2019-01-21 12:35 ` Kirill Shcherbatov
2019-01-21 20:25 ` Vladimir Davydov
2019-01-22 12:28 ` Kirill Shcherbatov
2019-01-22 20:21 ` Vladimir Davydov
2019-01-23 8:23 ` Kirill Shcherbatov
2019-01-23 10:06 ` Vladimir Davydov
2019-01-23 11:39 ` Kirill Shcherbatov
2019-01-24 17:58 ` Konstantin Osipov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox