[tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class
Kirill Shcherbatov
kshcherbatov at tarantool.org
Tue Jan 22 15:28:03 MSK 2019
Hi! Thank you for review. I like your idea with rewriting mp_fprint and mp_snprint routines.
=================================================
Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.
Reworked mp_snprint and mp_fprint routines to use mp_stack
instead of recursion.
Needed for #1012
---
CMakeLists.txt | 1 +
msgpuck.c | 170 +++++++++++++++++++++++++++----------------------
msgpuck.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 24 ++++++-
4 files changed, 287 insertions(+), 76 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c129b77..6f62f54 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,7 @@ check_c_compiler_flag("-mno-unaligned-access" CC_HAS_MNO_UNALIGNED_ACCESS)
add_library(msgpuck STATIC msgpuck.c hints.c)
set_target_properties(msgpuck PROPERTIES VERSION 1.0 SOVERSION 1)
set_target_properties(msgpuck PROPERTIES OUTPUT_NAME "msgpuck")
+add_definitions(-DMP_PRINT_MAX_DEPTH=3)
if (NOT ${PROJECT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR})
# Embedded mode, skip tests, documentation and the install targets
diff --git a/msgpuck.c b/msgpuck.c
index 72b800c..84b9d63 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -232,84 +232,106 @@ mp_format(char *data, size_t data_size, const char *format, ...)
return res;
}
-#define MP_PRINT(SELF, PRINTF) \
+#define MP_PRINT(PRINTF) \
{ \
- switch (mp_typeof(**data)) { \
- case MP_NIL: \
- mp_decode_nil(data); \
- PRINTF("null"); \
- break; \
- case MP_UINT: \
- PRINTF("%llu", (unsigned long long) mp_decode_uint(data)); \
- break; \
- case MP_INT: \
- PRINTF("%lld", (long long) mp_decode_int(data)); \
- break; \
- case MP_STR: \
- case MP_BIN: \
- { \
- uint32_t len = mp_typeof(**data) == MP_STR ? \
- mp_decode_strl(data) : mp_decode_binl(data); \
- PRINTF("\""); \
- const char *s; \
- for (s = *data; s < *data + len; s++) { \
- unsigned char c = (unsigned char ) *s; \
- if (c < 128 && mp_char2escape[c] != NULL) { \
- /* Escape character */ \
- PRINTF("%s", mp_char2escape[c]); \
+ struct mp_frame mp_frames[MP_PRINT_MAX_DEPTH]; \
+ struct mp_stack mp_stack; \
+ mp_stack_create(&mp_stack, MP_PRINT_MAX_DEPTH, mp_frames); \
+ do { \
+ bool is_map_value = !mp_stack_is_empty(&mp_stack) && \
+ mp_stack_top(&mp_stack)->type == MP_MAP && \
+ mp_stack_top(&mp_stack)->curr % 2 == 0; \
+ if (is_map_value) \
+ PRINTF(": "); \
+ switch (mp_typeof(**data)) { \
+ case MP_NIL: \
+ mp_decode_nil(data); \
+ PRINTF("null"); \
+ break; \
+ case MP_UINT: \
+ PRINTF("%llu", \
+ (unsigned long long)mp_decode_uint(data)); \
+ break; \
+ case MP_INT: \
+ PRINTF("%lld", (long long) mp_decode_int(data)); \
+ break; \
+ case MP_STR: \
+ case MP_BIN: { \
+ uint32_t len = mp_typeof(**data) == MP_STR ? \
+ mp_decode_strl(data) : mp_decode_binl(data); \
+ PRINTF("\""); \
+ const char *s; \
+ for (s = *data; s < *data + len; s++) { \
+ unsigned char c = (unsigned char ) *s; \
+ if (c < 128 && mp_char2escape[c] != NULL) { \
+ /* Escape character */ \
+ PRINTF("%s", mp_char2escape[c]); \
+ } else { \
+ PRINTF("%c", c); \
+ } \
+ } \
+ PRINTF("\""); \
+ *data += len; \
+ break; \
+ } \
+ case MP_ARRAY: { \
+ PRINTF("["); \
+ if (!mp_stack_is_full(&mp_stack)) { \
+ mp_stack_push(&mp_stack, MP_ARRAY, \
+ mp_decode_array(data)); \
} else { \
- PRINTF("%c", c); \
+ /* \
+ * Skip msgpack items that do not \
+ * fit onto the stack. \
+ */ \
+ PRINTF("...]"); \
+ mp_next(data); \
} \
+ break; \
} \
- PRINTF("\""); \
- *data += len; \
- break; \
- } \
- case MP_ARRAY: \
- { \
- uint32_t count = mp_decode_array(data); \
- PRINTF("["); \
- uint32_t i; \
- for (i = 0; i < count; i++) { \
- if (i) \
- PRINTF(", "); \
- SELF(data); \
+ case MP_MAP: { \
+ PRINTF("{"); \
+ if (!mp_stack_is_full(&mp_stack)) { \
+ mp_stack_push(&mp_stack, MP_MAP, \
+ 2 * mp_decode_map(data)); \
+ } else { \
+ /* \
+ * Skip msgpack items that do not \
+ * fit onto the stack. \
+ */ \
+ PRINTF("...}"); \
+ mp_next(data); \
+ } \
+ break; \
} \
- PRINTF("]"); \
- break; \
- } \
- case MP_MAP: \
- { \
- uint32_t count = mp_decode_map(data); \
- PRINTF("{"); \
- uint32_t i; \
- for (i = 0; i < count; i++) { \
- if (i) \
+ case MP_BOOL: \
+ PRINTF(mp_decode_bool(data) ? "true" : "false"); \
+ break; \
+ case MP_FLOAT: \
+ PRINTF("%g", mp_decode_float(data)); \
+ break; \
+ case MP_DOUBLE: \
+ PRINTF("%lg", mp_decode_double(data)); \
+ break; \
+ case MP_EXT: \
+ mp_next(data); \
+ PRINTF("undefined"); \
+ break; \
+ default: \
+ mp_unreachable(); \
+ return -1; \
+ } \
+ while (!mp_stack_is_empty(&mp_stack)) { \
+ struct mp_frame *frame = mp_stack_top(&mp_stack); \
+ if (frame->curr > 0 && frame->curr < frame->size && \
+ (frame->type == MP_ARRAY || is_map_value)) \
PRINTF(", "); \
- SELF(data); \
- PRINTF(": "); \
- SELF(data); \
+ if (!mp_stack_advance(&mp_stack)) \
+ break; \
+ PRINTF(frame->type == MP_MAP ? "}" : "]"); \
+ mp_stack_pop(&mp_stack); \
} \
- PRINTF("}"); \
- break; \
- } \
- case MP_BOOL: \
- PRINTF(mp_decode_bool(data) ? "true" : "false"); \
- break; \
- case MP_FLOAT: \
- PRINTF("%g", mp_decode_float(data)); \
- break; \
- case MP_DOUBLE: \
- PRINTF("%lg", mp_decode_double(data)); \
- break; \
- case MP_EXT: \
- mp_next(data); \
- PRINTF("undefined"); \
- break; \
- default: \
- mp_unreachable(); \
- return -1; \
- } \
+ } while (!mp_stack_is_empty(&mp_stack)); \
}
static inline int
@@ -323,8 +345,7 @@ mp_fprint_internal(FILE *file, const char **data)
total_bytes += bytes; \
} while (0)
#define PRINT(...) HANDLE(fprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
#undef HANDLE
#undef SELF
#undef PRINT
@@ -359,8 +380,7 @@ mp_snprint_internal(char *buf, int size, const char **data)
} \
} while (0)
#define PRINT(...) HANDLE(snprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_snprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
#undef HANDLE
#undef SELF
#undef PRINT
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..de8b2d0 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -1176,6 +1176,123 @@ mp_next(const char **data);
MP_PROTO int
mp_check(const char **data, const char *end);
+/**
+ * The maximum depth of the message pack nesting maps and arrays
+ * supported by mp_fprint and mp_snprintf routines. Upon reaching
+ * the maximum depth, the parsing will not be carried out.
+ */
+#ifndef MP_PRINT_MAX_DEPTH
+#error "MP_PRINT_MAX_DEPTH must be defined"
+#endif /* MP_PRINT_MAX_DEPTH */
+
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+ /** MP frame type calculated with mp_typeof(). */
+ enum mp_type type;
+ /**
+ * Total number of items in MP_MAP or MP_ARRAY container
+ * calculated with mp_decode_map() or mp_decode_array().
+ */
+ uint32_t size;
+ /**
+ * Count of items already processed. Must be less than or
+ * equal to mp_frame::size member.
+ */
+ uint32_t curr;
+};
+
+/**
+ * Stack of map/array descriptors mp_frame to do complex msgpack
+ * parse. The structure is needed in order to facilitate
+ * decoding nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows to determine that the parsing of the
+ * current container is complete.
+*/
+struct mp_stack {
+ /** The maximum stack depth. */
+ uint32_t size;
+ /**
+ * Count of used stack frames. Corresponds to the index
+ * in the array to perform the push operation. Must be
+ * less or equal to mp_stack::size member.
+ */
+ uint32_t used;
+ /**
+ * Array of size mp_stack::size of mp_frames.
+ */
+ struct mp_frame *frames;
+};
+
+/**
+ * \brief Initialize mp_stack \a stack of specified size \a size
+ * and user-allocated array \a frames.
+ * The \a frames allocation must have at least \a size mp_frame
+ * items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame preallocated array of size \a size
+ * of struct mp_frame items
+ */
+MP_PROTO void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames);
+
+/**
+ * \brief Test if mp_stack \a stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is empty, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack);
+
+/**
+ * \brief Test if mp_stack \a stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is full, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack);
+
+/**
+ * \brief Pop the top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack);
+
+/**
+ * \brief Get a pointer to a top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * \retval a pointer to a top stack frame.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO struct mp_frame *
+mp_stack_top(struct mp_stack *stack);
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \a stack.
+ * \param stack - the pointer to a stack to operate with.
+ * \param type - the type of mp_frame to create.
+ * \param size - size size of mp_frame to create.
+ * \pre mp_stack_is_full(stack) == false
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size);
+
+/**
+ * \brief Advance a mp_stack_top()::\a curr attribute of the
+ * \a stack. The function also signals when it moves over the
+ * last item.
+ * \param stack - the pointer to a stack to operate with
+ * \retval true if the top frame has been already parsed:
+ * mp_stack_top()::\a curr > mp_stack_top()::\a size;
+ * false otherwise.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO bool
+mp_stack_advance(struct mp_stack *stack);
+
/*
* }}}
*/
@@ -1184,6 +1301,57 @@ mp_check(const char **data, const char *end);
* {{{ Implementation
*/
+MP_IMPL void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+ stack->frames = frames;
+ stack->size = size;
+ stack->used = 0;
+}
+
+MP_IMPL bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+ return stack->used == 0;
+}
+
+MP_IMPL bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+ return stack->used >= stack->size;
+}
+
+MP_IMPL void
+mp_stack_pop(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ --stack->used;
+}
+
+MP_IMPL struct mp_frame *
+mp_stack_top(struct mp_stack *stack)
+{
+ assert(!mp_stack_is_empty(stack));
+ return &stack->frames[stack->used - 1];
+}
+
+MP_IMPL void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size)
+{
+ assert(!mp_stack_is_full(stack));
+ uint32_t idx = stack->used++;
+ stack->frames[idx].type = type;
+ stack->frames[idx].size = size;
+ stack->frames[idx].curr = 0;
+}
+
+MP_IMPL bool
+mp_stack_advance(struct mp_stack *stack)
+{
+ struct mp_frame *frame = mp_stack_top(stack);
+ return ++frame->curr > frame->size;
+}
+
/** \cond false */
extern const enum mp_type mp_type_hint[];
extern const int8_t mp_parser_hint[];
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..46c9eed 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -764,7 +764,7 @@ test_format(void)
int
test_mp_print()
{
- plan(10);
+ plan(11);
header();
char msgpack[128];
@@ -838,6 +838,28 @@ test_mp_print()
int rc = mp_fprint(stdin, msgpack);
is(rc, -1, "mp_fprint I/O error");
+ /* Test mp_snprint parsing stack overflow. */
+ char mp_buf[MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) + mp_sizeof_uint(1)];
+ int json_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1;
+ char json[json_sz];
+ char *buf_wptr = mp_buf;
+ char *json_wptr = json;
+ for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) {
+ if (i < MP_PRINT_MAX_DEPTH + 1) {
+ buf_wptr = mp_encode_array(buf_wptr, 1);
+ *(json_wptr++) = '[';
+ } else if (i == MP_PRINT_MAX_DEPTH + 1) {
+ buf_wptr = mp_encode_uint(buf_wptr, 1);
+ json_wptr += sprintf(json_wptr, "...");
+ } else {
+ *(json_wptr++) = ']';
+ }
+ }
+ *(json_wptr++) = '\0';
+ char decoded[json_sz];
+ rc = mp_snprint(decoded, json_sz, mp_buf);
+ ok(rc == json_sz - 1 && memcmp(decoded, json, json_sz - 1) == 0,
+ "mp_snprint parsing stack overflow test");
footer();
return check_plan();
}
--
2.19.2
More information about the Tarantool-patches
mailing list