From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Subject: Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class References: <9613f79e65d9838763422aae0bfb3ca9e2901a32.1547645639.git.kshcherbatov@gmail.com> <20190117115843.nqellm5mmsglqnlf@tkn_work_nb> <842c55cd-5d3f-e857-42f4-94d648c88273@tarantool.org> <20190117163409.hvga32kxxgcc5sat@tkn_work_nb> <1ba698d1-8450-f2d3-7717-11adbf0d5f4f@tarantool.org> <20190121112541.hkqylj7hwcjeq526@tkn_work_nb> <2259a346-76dd-f721-176f-bfad39b25b5e@gmail.com> <20190121202529.r7wwnzcesdh34sda@esperanza> From: Kirill Shcherbatov Message-ID: <09a7ccb3-fa8b-8691-ea26-c3146c42d839@tarantool.org> Date: Tue, 22 Jan 2019 15:28:03 +0300 MIME-Version: 1.0 In-Reply-To: <20190121202529.r7wwnzcesdh34sda@esperanza> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit To: tarantool-patches@freelists.org, Vladimir Davydov List-ID: Hi! Thank you for review. I like your idea with rewriting mp_fprint and mp_snprint routines. ================================================= Introduced a new mp_stack class to make complex msgpack parsing. The structure is needed in order to facilitate the decoding nested MP_ARRAY and MP_MAP msgpack containers without recursion. This allows you to determine that the parsing of the current container is complete. Reworked mp_snprint and mp_fprint routines to use mp_stack instead of recursion. Needed for #1012 --- CMakeLists.txt | 1 + msgpuck.c | 170 +++++++++++++++++++++++++++---------------------- msgpuck.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++++ test/msgpuck.c | 24 ++++++- 4 files changed, 287 insertions(+), 76 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c129b77..6f62f54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ check_c_compiler_flag("-mno-unaligned-access" CC_HAS_MNO_UNALIGNED_ACCESS) add_library(msgpuck STATIC msgpuck.c hints.c) set_target_properties(msgpuck PROPERTIES VERSION 1.0 SOVERSION 1) set_target_properties(msgpuck PROPERTIES OUTPUT_NAME "msgpuck") +add_definitions(-DMP_PRINT_MAX_DEPTH=3) if (NOT ${PROJECT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR}) # Embedded mode, skip tests, documentation and the install targets diff --git a/msgpuck.c b/msgpuck.c index 72b800c..84b9d63 100644 --- a/msgpuck.c +++ b/msgpuck.c @@ -232,84 +232,106 @@ mp_format(char *data, size_t data_size, const char *format, ...) return res; } -#define MP_PRINT(SELF, PRINTF) \ +#define MP_PRINT(PRINTF) \ { \ - switch (mp_typeof(**data)) { \ - case MP_NIL: \ - mp_decode_nil(data); \ - PRINTF("null"); \ - break; \ - case MP_UINT: \ - PRINTF("%llu", (unsigned long long) mp_decode_uint(data)); \ - break; \ - case MP_INT: \ - PRINTF("%lld", (long long) mp_decode_int(data)); \ - break; \ - case MP_STR: \ - case MP_BIN: \ - { \ - uint32_t len = mp_typeof(**data) == MP_STR ? \ - mp_decode_strl(data) : mp_decode_binl(data); \ - PRINTF("\""); \ - const char *s; \ - for (s = *data; s < *data + len; s++) { \ - unsigned char c = (unsigned char ) *s; \ - if (c < 128 && mp_char2escape[c] != NULL) { \ - /* Escape character */ \ - PRINTF("%s", mp_char2escape[c]); \ + struct mp_frame mp_frames[MP_PRINT_MAX_DEPTH]; \ + struct mp_stack mp_stack; \ + mp_stack_create(&mp_stack, MP_PRINT_MAX_DEPTH, mp_frames); \ + do { \ + bool is_map_value = !mp_stack_is_empty(&mp_stack) && \ + mp_stack_top(&mp_stack)->type == MP_MAP && \ + mp_stack_top(&mp_stack)->curr % 2 == 0; \ + if (is_map_value) \ + PRINTF(": "); \ + switch (mp_typeof(**data)) { \ + case MP_NIL: \ + mp_decode_nil(data); \ + PRINTF("null"); \ + break; \ + case MP_UINT: \ + PRINTF("%llu", \ + (unsigned long long)mp_decode_uint(data)); \ + break; \ + case MP_INT: \ + PRINTF("%lld", (long long) mp_decode_int(data)); \ + break; \ + case MP_STR: \ + case MP_BIN: { \ + uint32_t len = mp_typeof(**data) == MP_STR ? \ + mp_decode_strl(data) : mp_decode_binl(data); \ + PRINTF("\""); \ + const char *s; \ + for (s = *data; s < *data + len; s++) { \ + unsigned char c = (unsigned char ) *s; \ + if (c < 128 && mp_char2escape[c] != NULL) { \ + /* Escape character */ \ + PRINTF("%s", mp_char2escape[c]); \ + } else { \ + PRINTF("%c", c); \ + } \ + } \ + PRINTF("\""); \ + *data += len; \ + break; \ + } \ + case MP_ARRAY: { \ + PRINTF("["); \ + if (!mp_stack_is_full(&mp_stack)) { \ + mp_stack_push(&mp_stack, MP_ARRAY, \ + mp_decode_array(data)); \ } else { \ - PRINTF("%c", c); \ + /* \ + * Skip msgpack items that do not \ + * fit onto the stack. \ + */ \ + PRINTF("...]"); \ + mp_next(data); \ } \ + break; \ } \ - PRINTF("\""); \ - *data += len; \ - break; \ - } \ - case MP_ARRAY: \ - { \ - uint32_t count = mp_decode_array(data); \ - PRINTF("["); \ - uint32_t i; \ - for (i = 0; i < count; i++) { \ - if (i) \ - PRINTF(", "); \ - SELF(data); \ + case MP_MAP: { \ + PRINTF("{"); \ + if (!mp_stack_is_full(&mp_stack)) { \ + mp_stack_push(&mp_stack, MP_MAP, \ + 2 * mp_decode_map(data)); \ + } else { \ + /* \ + * Skip msgpack items that do not \ + * fit onto the stack. \ + */ \ + PRINTF("...}"); \ + mp_next(data); \ + } \ + break; \ } \ - PRINTF("]"); \ - break; \ - } \ - case MP_MAP: \ - { \ - uint32_t count = mp_decode_map(data); \ - PRINTF("{"); \ - uint32_t i; \ - for (i = 0; i < count; i++) { \ - if (i) \ + case MP_BOOL: \ + PRINTF(mp_decode_bool(data) ? "true" : "false"); \ + break; \ + case MP_FLOAT: \ + PRINTF("%g", mp_decode_float(data)); \ + break; \ + case MP_DOUBLE: \ + PRINTF("%lg", mp_decode_double(data)); \ + break; \ + case MP_EXT: \ + mp_next(data); \ + PRINTF("undefined"); \ + break; \ + default: \ + mp_unreachable(); \ + return -1; \ + } \ + while (!mp_stack_is_empty(&mp_stack)) { \ + struct mp_frame *frame = mp_stack_top(&mp_stack); \ + if (frame->curr > 0 && frame->curr < frame->size && \ + (frame->type == MP_ARRAY || is_map_value)) \ PRINTF(", "); \ - SELF(data); \ - PRINTF(": "); \ - SELF(data); \ + if (!mp_stack_advance(&mp_stack)) \ + break; \ + PRINTF(frame->type == MP_MAP ? "}" : "]"); \ + mp_stack_pop(&mp_stack); \ } \ - PRINTF("}"); \ - break; \ - } \ - case MP_BOOL: \ - PRINTF(mp_decode_bool(data) ? "true" : "false"); \ - break; \ - case MP_FLOAT: \ - PRINTF("%g", mp_decode_float(data)); \ - break; \ - case MP_DOUBLE: \ - PRINTF("%lg", mp_decode_double(data)); \ - break; \ - case MP_EXT: \ - mp_next(data); \ - PRINTF("undefined"); \ - break; \ - default: \ - mp_unreachable(); \ - return -1; \ - } \ + } while (!mp_stack_is_empty(&mp_stack)); \ } static inline int @@ -323,8 +345,7 @@ mp_fprint_internal(FILE *file, const char **data) total_bytes += bytes; \ } while (0) #define PRINT(...) HANDLE(fprintf, __VA_ARGS__) -#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__) -MP_PRINT(SELF, PRINT) +MP_PRINT(PRINT) #undef HANDLE #undef SELF #undef PRINT @@ -359,8 +380,7 @@ mp_snprint_internal(char *buf, int size, const char **data) } \ } while (0) #define PRINT(...) HANDLE(snprintf, __VA_ARGS__) -#define SELF(...) HANDLE(mp_snprint_internal, __VA_ARGS__) -MP_PRINT(SELF, PRINT) +MP_PRINT(PRINT) #undef HANDLE #undef SELF #undef PRINT diff --git a/msgpuck.h b/msgpuck.h index b0d4967..de8b2d0 100644 --- a/msgpuck.h +++ b/msgpuck.h @@ -1176,6 +1176,123 @@ mp_next(const char **data); MP_PROTO int mp_check(const char **data, const char *end); +/** + * The maximum depth of the message pack nesting maps and arrays + * supported by mp_fprint and mp_snprintf routines. Upon reaching + * the maximum depth, the parsing will not be carried out. + */ +#ifndef MP_PRINT_MAX_DEPTH +#error "MP_PRINT_MAX_DEPTH must be defined" +#endif /* MP_PRINT_MAX_DEPTH */ + +/** Message pack MP_MAP or MP_ARRAY container descriptor. */ +struct mp_frame { + /** MP frame type calculated with mp_typeof(). */ + enum mp_type type; + /** + * Total number of items in MP_MAP or MP_ARRAY container + * calculated with mp_decode_map() or mp_decode_array(). + */ + uint32_t size; + /** + * Count of items already processed. Must be less than or + * equal to mp_frame::size member. + */ + uint32_t curr; +}; + +/** + * Stack of map/array descriptors mp_frame to do complex msgpack + * parse. The structure is needed in order to facilitate + * decoding nested MP_ARRAY and MP_MAP msgpack containers without + * recursion. This allows to determine that the parsing of the + * current container is complete. +*/ +struct mp_stack { + /** The maximum stack depth. */ + uint32_t size; + /** + * Count of used stack frames. Corresponds to the index + * in the array to perform the push operation. Must be + * less or equal to mp_stack::size member. + */ + uint32_t used; + /** + * Array of size mp_stack::size of mp_frames. + */ + struct mp_frame *frames; +}; + +/** + * \brief Initialize mp_stack \a stack of specified size \a size + * and user-allocated array \a frames. + * The \a frames allocation must have at least \a size mp_frame + * items. + * \param stack - the pointer to a mp_stack to initialize. + * \param size - stack size, count of stack::frames to use. + * \param frames - mp_frame preallocated array of size \a size + * of struct mp_frame items + */ +MP_PROTO void +mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames); + +/** + * \brief Test if mp_stack \a stack is empty. + * \param stack - the pointer to a mp_stack to a stack to test. + * \retval true if mp_stack is empty, false otherwise. + */ +MP_PROTO bool +mp_stack_is_empty(struct mp_stack *stack); + +/** + * \brief Test if mp_stack \a stack is full. + * \param stack - the pointer to a mp_stack to a stack to test. + * \retval true if mp_stack is full, false otherwise. + */ +MP_PROTO bool +mp_stack_is_full(struct mp_stack *stack); + +/** + * \brief Pop the top mp_stack \a stack frame. + * \param stack - the pointer to a mp_stack to operate with. + * \pre mp_stack_is_empty(stack) == false + */ +MP_PROTO void +mp_stack_pop(struct mp_stack *stack); + +/** + * \brief Get a pointer to a top mp_stack \a stack frame. + * \param stack - the pointer to a mp_stack to operate with. + * \retval a pointer to a top stack frame. + * \pre mp_stack_is_empty(stack) == false + */ +MP_PROTO struct mp_frame * +mp_stack_top(struct mp_stack *stack); + +/** + * \brief Construct a new mp_frame and push it on to mp_stack + * \a stack. + * \param stack - the pointer to a stack to operate with. + * \param type - the type of mp_frame to create. + * \param size - size size of mp_frame to create. + * \pre mp_stack_is_full(stack) == false + */ +MP_PROTO void +mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size); + +/** + * \brief Advance a mp_stack_top()::\a curr attribute of the + * \a stack. The function also signals when it moves over the + * last item. + * \param stack - the pointer to a stack to operate with + * \retval true if the top frame has been already parsed: + * mp_stack_top()::\a curr > mp_stack_top()::\a size; + * false otherwise. + * \pre mp_stack_is_empty(stack) == false + */ +MP_PROTO bool +mp_stack_advance(struct mp_stack *stack); + /* * }}} */ @@ -1184,6 +1301,57 @@ mp_check(const char **data, const char *end); * {{{ Implementation */ +MP_IMPL void +mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames) +{ + stack->frames = frames; + stack->size = size; + stack->used = 0; +} + +MP_IMPL bool +mp_stack_is_empty(struct mp_stack *stack) +{ + return stack->used == 0; +} + +MP_IMPL bool +mp_stack_is_full(struct mp_stack *stack) +{ + return stack->used >= stack->size; +} + +MP_IMPL void +mp_stack_pop(struct mp_stack *stack) +{ + assert(!mp_stack_is_empty(stack)); + --stack->used; +} + +MP_IMPL struct mp_frame * +mp_stack_top(struct mp_stack *stack) +{ + assert(!mp_stack_is_empty(stack)); + return &stack->frames[stack->used - 1]; +} + +MP_IMPL void +mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size) +{ + assert(!mp_stack_is_full(stack)); + uint32_t idx = stack->used++; + stack->frames[idx].type = type; + stack->frames[idx].size = size; + stack->frames[idx].curr = 0; +} + +MP_IMPL bool +mp_stack_advance(struct mp_stack *stack) +{ + struct mp_frame *frame = mp_stack_top(stack); + return ++frame->curr > frame->size; +} + /** \cond false */ extern const enum mp_type mp_type_hint[]; extern const int8_t mp_parser_hint[]; diff --git a/test/msgpuck.c b/test/msgpuck.c index 9265453..46c9eed 100644 --- a/test/msgpuck.c +++ b/test/msgpuck.c @@ -764,7 +764,7 @@ test_format(void) int test_mp_print() { - plan(10); + plan(11); header(); char msgpack[128]; @@ -838,6 +838,28 @@ test_mp_print() int rc = mp_fprint(stdin, msgpack); is(rc, -1, "mp_fprint I/O error"); + /* Test mp_snprint parsing stack overflow. */ + char mp_buf[MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) + mp_sizeof_uint(1)]; + int json_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1; + char json[json_sz]; + char *buf_wptr = mp_buf; + char *json_wptr = json; + for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) { + if (i < MP_PRINT_MAX_DEPTH + 1) { + buf_wptr = mp_encode_array(buf_wptr, 1); + *(json_wptr++) = '['; + } else if (i == MP_PRINT_MAX_DEPTH + 1) { + buf_wptr = mp_encode_uint(buf_wptr, 1); + json_wptr += sprintf(json_wptr, "..."); + } else { + *(json_wptr++) = ']'; + } + } + *(json_wptr++) = '\0'; + char decoded[json_sz]; + rc = mp_snprint(decoded, json_sz, mp_buf); + ok(rc == json_sz - 1 && memcmp(decoded, json, json_sz - 1) == 0, + "mp_snprint parsing stack overflow test"); footer(); return check_plan(); } -- 2.19.2