* [PATCH v2 1/1] Implement mp_stack class
@ 2019-02-03 10:21 Kirill Shcherbatov
2019-02-04 9:41 ` Vladimir Davydov
0 siblings, 1 reply; 2+ messages in thread
From: Kirill Shcherbatov @ 2019-02-03 10:21 UTC (permalink / raw)
To: tarantool-patches, vdavydov.dev; +Cc: Kirill Shcherbatov
From: Kirill Shcherbatov <kshcherbatov@gmail.com>
Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.
Reworked mp_snprint and mp_fprint routines to use mp_stack
instead of recursion.
Needed for https://github.com/tarantool/tarantool/issues/1012
---
msgpuck.c | 110 ++++++++++++++---------------
msgpuck.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 41 +++++++++--
3 files changed, 275 insertions(+), 63 deletions(-)
diff --git a/msgpuck.c b/msgpuck.c
index 72b800c..cb93d68 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -232,27 +232,31 @@ mp_format(char *data, size_t data_size, const char *format, ...)
return res;
}
-#define MP_PRINT(SELF, PRINTF) \
+#define MP_PRINT(PRINTF) \
{ \
- switch (mp_typeof(**data)) { \
+ struct mp_frame frames[MP_PRINT_MAX_DEPTH]; \
+ struct mp_stack stack; \
+ mp_stack_create(&stack, MP_PRINT_MAX_DEPTH, frames); \
+next: \
+ switch (mp_typeof(*data)) { \
case MP_NIL: \
- mp_decode_nil(data); \
+ mp_decode_nil(&data); \
PRINTF("null"); \
break; \
case MP_UINT: \
- PRINTF("%llu", (unsigned long long) mp_decode_uint(data)); \
+ PRINTF("%llu", (unsigned long long) mp_decode_uint(&data)); \
break; \
case MP_INT: \
- PRINTF("%lld", (long long) mp_decode_int(data)); \
+ PRINTF("%lld", (long long) mp_decode_int(&data)); \
break; \
case MP_STR: \
case MP_BIN: \
{ \
- uint32_t len = mp_typeof(**data) == MP_STR ? \
- mp_decode_strl(data) : mp_decode_binl(data); \
+ uint32_t len = mp_typeof(*data) == MP_STR ? \
+ mp_decode_strl(&data) : mp_decode_binl(&data); \
PRINTF("\""); \
const char *s; \
- for (s = *data; s < *data + len; s++) { \
+ for (s = data; s < data + len; s++) { \
unsigned char c = (unsigned char ) *s; \
if (c < 128 && mp_char2escape[c] != NULL) { \
/* Escape character */ \
@@ -262,59 +266,66 @@ mp_format(char *data, size_t data_size, const char *format, ...)
} \
} \
PRINTF("\""); \
- *data += len; \
+ data += len; \
break; \
} \
case MP_ARRAY: \
- { \
- uint32_t count = mp_decode_array(data); \
- PRINTF("["); \
- uint32_t i; \
- for (i = 0; i < count; i++) { \
- if (i) \
- PRINTF(", "); \
- SELF(data); \
- } \
- PRINTF("]"); \
- break; \
- } \
case MP_MAP: \
{ \
- uint32_t count = mp_decode_map(data); \
- PRINTF("{"); \
- uint32_t i; \
- for (i = 0; i < count; i++) { \
- if (i) \
- PRINTF(", "); \
- SELF(data); \
- PRINTF(": "); \
- SELF(data); \
+ enum mp_type type = mp_typeof(*data); \
+ if (!mp_stack_is_full(&stack)) { \
+ uint32_t count = type == MP_ARRAY ? \
+ mp_decode_array(&data) : \
+ 2 * mp_decode_map(&data); \
+ mp_stack_push(&stack, type, count); \
+ } else { \
+ /* \
+ * Skip msgpack items that do not \
+ * fit onto the stack. \
+ */ \
+ PRINTF(type == MP_ARRAY ? "[...]" : "{...}"); \
+ mp_next(&data); \
} \
- PRINTF("}"); \
break; \
} \
case MP_BOOL: \
- PRINTF(mp_decode_bool(data) ? "true" : "false"); \
+ PRINTF(mp_decode_bool(&data) ? "true" : "false"); \
break; \
case MP_FLOAT: \
- PRINTF("%g", mp_decode_float(data)); \
+ PRINTF("%g", mp_decode_float(&data)); \
break; \
case MP_DOUBLE: \
- PRINTF("%lg", mp_decode_double(data)); \
+ PRINTF("%lg", mp_decode_double(&data)); \
break; \
case MP_EXT: \
- mp_next(data); \
+ mp_next(&data); \
PRINTF("undefined"); \
break; \
default: \
mp_unreachable(); \
return -1; \
} \
+ while (!mp_stack_is_empty(&stack)) { \
+ enum mp_type type = mp_stack_type(&stack); \
+ int curr = mp_stack_advance(&stack); \
+ if (curr == 0 || mp_stack_count(&stack) == 0) \
+ PRINTF(type == MP_ARRAY ? "[" : "{"); \
+ if (curr == -1) { \
+ PRINTF(type == MP_ARRAY ? "]" : "}"); \
+ mp_stack_pop(&stack); \
+ continue; \
+ } else if (curr != 0) { \
+ PRINTF(type == MP_MAP && curr % 2 == 1 ? ": " : ", "); \
+ } \
+ goto next; \
+ } \
}
-static inline int
-mp_fprint_internal(FILE *file, const char **data)
+int
+mp_fprint(FILE *file, const char *data)
{
+ if (!file)
+ file = stdout;
int total_bytes = 0;
#define HANDLE(FUN, ...) do { \
int bytes = FUN(file, __VA_ARGS__); \
@@ -323,25 +334,14 @@ mp_fprint_internal(FILE *file, const char **data)
total_bytes += bytes; \
} while (0)
#define PRINT(...) HANDLE(fprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
#undef HANDLE
-#undef SELF
#undef PRINT
return total_bytes;
}
int
-mp_fprint(FILE *file, const char *data)
-{
- if (!file)
- file = stdout;
- int res = mp_fprint_internal(file, &data);
- return res;
-}
-
-static inline int
-mp_snprint_internal(char *buf, int size, const char **data)
+mp_snprint(char *buf, int size, const char *data)
{
int total_bytes = 0;
#define HANDLE(FUN, ...) do { \
@@ -359,17 +359,9 @@ mp_snprint_internal(char *buf, int size, const char **data)
} \
} while (0)
#define PRINT(...) HANDLE(snprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_snprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
#undef HANDLE
-#undef SELF
#undef PRINT
return total_bytes;
}
#undef MP_PRINT
-
-int
-mp_snprint(char *buf, int size, const char *data)
-{
- return mp_snprint_internal(buf, size, &data);
-}
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..15a6c9b 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -1176,6 +1176,133 @@ mp_next(const char **data);
MP_PROTO int
mp_check(const char **data, const char *end);
+/**
+ * The maximum msgpack nesting depth supported by mp_snprint().
+ * Everything beyond that will be omitted (replaced with "...").
+ */
+#ifndef MP_PRINT_MAX_DEPTH
+#define MP_PRINT_MAX_DEPTH 32
+#endif
+
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+ /**
+ * MP frame type calculated with mp_typeof().
+ */
+ enum mp_type type;
+ /**
+ * Total number of items in MP_MAP or MP_ARRAY container
+ * calculated with mp_decode_map() or mp_decode_array().
+ */
+ int count;
+ /**
+ * Index of currently processing item. Must be less than
+ * mp_frame::count member.
+ */
+ int curr;
+};
+
+/**
+ * Stack of map/array descriptors mp_frame to do complex msgpack
+ * parse. The structure is needed in order to facilitate
+ * decoding nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows to determine that the parsing of the
+ * current container is complete.
+*/
+struct mp_stack {
+ /**
+ * The maximum stack depth.
+ */
+ int size;
+ /**
+ * Count of used stack frames. Corresponds to the index
+ * in the array to perform the push operation. Must be
+ * less or equal to mp_stack::size member.
+ */
+ int used;
+ /**
+ * Array of size mp_stack::size of mp_frames.
+ */
+ struct mp_frame *frames;
+};
+
+/**
+ * \brief Initialize mp_stack \a stack of specified size \a size
+ * and user-allocated array \a frames.
+ * The \a frames allocation must have at least \a size mp_frame
+ * items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame preallocated array of size \a size
+ * of struct mp_frame items
+ */
+MP_PROTO void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames);
+
+/**
+ * \brief Test if mp_stack \a stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is empty, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack);
+
+/**
+ * \brief Test if mp_stack \a stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is full, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack);
+
+/**
+ * \brief Pop the top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack);
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \a stack.
+ * \param stack - the pointer to a stack to operate with.
+ * \param type - the type of mp_frame to create.
+ * \param count - the count of itemes of mp_frame to create.
+ * \pre mp_stack_is_full(stack) == false
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t count);
+
+/**
+ * \brief Get type attribute of the \a stack top frame.
+ * \param stack - the pointer to a stack to operate with.
+ * \retval enum mp_type value - the top stack frame type.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO enum mp_type
+mp_stack_type(struct mp_stack *stack);
+
+/**
+ * \brief Get count attribute of the \a stack top frame.
+ * \param stack - the pointer to a stack to operate with.
+ * \retval count - the top stack frame items count.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO int
+mp_stack_count(struct mp_stack *stack);
+
+/**
+ * \brief Advance curr attribute of the \a stack top frame.
+ * \param stack - the pointer to a stack to operate with.
+ * \retval index of the element to process if the top
+ * mp_frame::curr is less than top mp_frame::count field.
+ * -1 otherwise.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO int
+mp_stack_advance(struct mp_stack *stack);
+
/*
* }}}
*/
@@ -2176,6 +2303,66 @@ mp_check(const char **data, const char *end)
return 0;
}
+MP_IMPL void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+ stack->frames = frames;
+ stack->size = size;
+ stack->used = 0;
+}
+
+MP_IMPL bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+ return stack->used == 0;
+}
+
+MP_IMPL bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+ return stack->used >= stack->size;
+}
+
+MP_IMPL void
+mp_stack_pop(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ --stack->used;
+}
+
+MP_IMPL void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t count)
+{
+ assert(!mp_stack_is_full(stack));
+ uint32_t idx = stack->used++;
+ stack->frames[idx].type = type;
+ stack->frames[idx].count = count;
+ stack->frames[idx].curr = -1;
+}
+
+MP_IMPL enum mp_type
+mp_stack_type(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ return stack->frames[stack->used - 1].type;
+}
+
+MP_IMPL int
+mp_stack_count(struct mp_stack *stack)
+{
+ return stack->frames[stack->used - 1].count;
+}
+
+MP_IMPL int
+mp_stack_advance(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ struct mp_frame *frame = &stack->frames[stack->used - 1];
+ if (frame->curr < frame->count - 1)
+ return ++frame->curr;
+ return -1;
+}
+
/** \endcond */
/*
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..884883d 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -30,6 +30,7 @@
* SUCH DAMAGE.
*/
+#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <inttypes.h>
@@ -764,15 +765,16 @@ test_format(void)
int
test_mp_print()
{
- plan(10);
+ plan(12);
header();
char msgpack[128];
char *d = msgpack;
- d = mp_encode_array(d, 6);
+ d = mp_encode_array(d, 8);
d = mp_encode_int(d, -5);
d = mp_encode_uint(d, 42);
d = mp_encode_str(d, "kill bill", 9);
+ d = mp_encode_array(d, 0);
d = mp_encode_map(d, 6);
d = mp_encode_str(d, "bool true", 9);
d = mp_encode_bool(d, true);
@@ -791,13 +793,14 @@ test_mp_print()
*d++ = 0;
char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
d = mp_encode_bin(d, bin, sizeof(bin));
+ d = mp_encode_map(d, 0);
assert(d <= msgpack + sizeof(msgpack));
const char *expected =
- "[-5, 42, \"kill bill\", "
+ "[-5, 42, \"kill bill\", [], "
"{\"bool true\": true, \"bool false\": false, \"null\": null, "
"\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
- "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\"]";
+ "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
int esize = strlen(expected);
char result[256];
@@ -838,6 +841,36 @@ test_mp_print()
int rc = mp_fprint(stdin, msgpack);
is(rc, -1, "mp_fprint I/O error");
+ /* Test mp_snprint max nesting depth. */
+ int mp_buff_sz = MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) +
+ mp_sizeof_uint(1);
+ int exp_str_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1;
+ char *mp_buff = malloc(mp_buff_sz);
+ char *exp_str = malloc(exp_str_sz);
+ char *decoded = malloc(exp_str_sz);
+ char *buff_wptr = mp_buff;
+ char *exp_str_wptr = exp_str;
+ for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) {
+ int exp_str_rest = exp_str_sz - (exp_str_wptr - exp_str);
+ assert(exp_str_rest > 0);
+ if (i < MP_PRINT_MAX_DEPTH + 1) {
+ buff_wptr = mp_encode_array(buff_wptr, 1);
+ rc = snprintf(exp_str_wptr, exp_str_rest, "[");
+ } else if (i == MP_PRINT_MAX_DEPTH + 1) {
+ buff_wptr = mp_encode_uint(buff_wptr, 1);
+ rc = snprintf(exp_str_wptr, exp_str_rest, "...");
+ } else {
+ rc = snprintf(exp_str_wptr, exp_str_rest, "]");
+ }
+ exp_str_wptr += rc;
+ }
+ assert(exp_str_wptr + 1 == exp_str + exp_str_sz);
+ rc = mp_snprint(decoded, exp_str_sz, mp_buff);
+ ok(rc == exp_str_sz - 1, "mp_snprint max nesting depth return value");
+ ok(strcmp(decoded, exp_str) == 0, "mp_snprint max nesting depth result");
+ free(decoded);
+ free(exp_str);
+ free(mp_buff);
footer();
return check_plan();
}
--
2.20.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH v2 1/1] Implement mp_stack class
2019-02-03 10:21 [PATCH v2 1/1] Implement mp_stack class Kirill Shcherbatov
@ 2019-02-04 9:41 ` Vladimir Davydov
0 siblings, 0 replies; 2+ messages in thread
From: Vladimir Davydov @ 2019-02-04 9:41 UTC (permalink / raw)
To: Kirill Shcherbatov; +Cc: tarantool-patches, Kirill Shcherbatov
On Sun, Feb 03, 2019 at 01:21:43PM +0300, Kirill Shcherbatov wrote:
> diff --git a/msgpuck.c b/msgpuck.c
> index 72b800c..cb93d68 100644
> --- a/msgpuck.c
> +++ b/msgpuck.c
> @@ -323,25 +334,14 @@ mp_fprint_internal(FILE *file, const char **data)
> total_bytes += bytes; \
> } while (0)
> #define PRINT(...) HANDLE(fprintf, __VA_ARGS__)
> -#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__)
> -MP_PRINT(SELF, PRINT)
> +MP_PRINT(PRINT)
> #undef HANDLE
HANDLE is redundant, I removed it.
> -#undef SELF
> #undef PRINT
> return total_bytes;
> }
> +/**
> + * \brief Initialize mp_stack \a stack of specified size \a size
> + * and user-allocated array \a frames.
> + * The \a frames allocation must have at least \a size mp_frame
> + * items.
> + * \param stack - the pointer to a mp_stack to initialize.
> + * \param size - stack size, count of stack::frames to use.
> + * \param frames - mp_frame preallocated array of size \a size
> + * of struct mp_frame items
> + */
> +MP_PROTO void
> +mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames);
> +
> +/**
> + * \brief Construct a new mp_frame and push it on to mp_stack
> + * \a stack.
> + * \param stack - the pointer to a stack to operate with.
> + * \param type - the type of mp_frame to create.
> + * \param count - the count of itemes of mp_frame to create.
> + * \pre mp_stack_is_full(stack) == false
> + */
> +MP_PROTO void
> +mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t count);
s/uint32_t/int - Fixed.
Pushed to the master. My incremental diff is below.
diff --git a/msgpuck.c b/msgpuck.c
index cb93d68902bd..3338a3c38698 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -327,15 +327,13 @@ mp_fprint(FILE *file, const char *data)
if (!file)
file = stdout;
int total_bytes = 0;
-#define HANDLE(FUN, ...) do { \
- int bytes = FUN(file, __VA_ARGS__); \
+#define PRINT(...) do { \
+ int bytes = fprintf(file, __VA_ARGS__); \
if (mp_unlikely(bytes < 0)) \
return -1; \
total_bytes += bytes; \
} while (0)
-#define PRINT(...) HANDLE(fprintf, __VA_ARGS__)
-MP_PRINT(PRINT)
-#undef HANDLE
+ MP_PRINT(PRINT)
#undef PRINT
return total_bytes;
}
@@ -344,8 +342,8 @@ int
mp_snprint(char *buf, int size, const char *data)
{
int total_bytes = 0;
-#define HANDLE(FUN, ...) do { \
- int bytes = FUN(buf, size, __VA_ARGS__); \
+#define PRINT(...) do { \
+ int bytes = snprintf(buf, size, __VA_ARGS__); \
if (mp_unlikely(bytes < 0)) \
return -1; \
total_bytes += bytes; \
@@ -358,9 +356,7 @@ mp_snprint(char *buf, int size, const char *data)
size = 0; \
} \
} while (0)
-#define PRINT(...) HANDLE(snprintf, __VA_ARGS__)
-MP_PRINT(PRINT)
-#undef HANDLE
+ MP_PRINT(PRINT)
#undef PRINT
return total_bytes;
}
diff --git a/msgpuck.h b/msgpuck.h
index 15a6c9b36855..ab84f0b4936f 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -1237,7 +1237,7 @@ struct mp_stack {
* of struct mp_frame items
*/
MP_PROTO void
-mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames);
+mp_stack_create(struct mp_stack *stack, int size, struct mp_frame *frames);
/**
* \brief Test if mp_stack \a stack is empty.
@@ -1272,7 +1272,7 @@ mp_stack_pop(struct mp_stack *stack);
* \pre mp_stack_is_full(stack) == false
*/
MP_PROTO void
-mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t count);
+mp_stack_push(struct mp_stack *stack, enum mp_type type, int count);
/**
* \brief Get type attribute of the \a stack top frame.
@@ -2304,7 +2304,7 @@ mp_check(const char **data, const char *end)
}
MP_IMPL void
-mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+mp_stack_create(struct mp_stack *stack, int size, struct mp_frame *frames)
{
stack->frames = frames;
stack->size = size;
@@ -2331,10 +2331,10 @@ mp_stack_pop(struct mp_stack *stack)
}
MP_IMPL void
-mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t count)
+mp_stack_push(struct mp_stack *stack, enum mp_type type, int count)
{
assert(!mp_stack_is_full(stack));
- uint32_t idx = stack->used++;
+ int idx = stack->used++;
stack->frames[idx].type = type;
stack->frames[idx].count = count;
stack->frames[idx].curr = -1;
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-02-04 9:41 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-03 10:21 [PATCH v2 1/1] Implement mp_stack class Kirill Shcherbatov
2019-02-04 9:41 ` Vladimir Davydov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox