From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Kirill Shcherbatov Subject: [PATCH v2 1/1] Implement mp_stack class Date: Sun, 3 Feb 2019 13:21:43 +0300 Message-Id: <473372ec0b111ff0731857bb4c45409866cb3a5d.1549009319.git.kshcherbatov@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: tarantool-patches@freelists.org, vdavydov.dev@gmail.com Cc: Kirill Shcherbatov List-ID: From: Kirill Shcherbatov Introduced a new mp_stack class to make complex msgpack parsing. The structure is needed in order to facilitate the decoding nested MP_ARRAY and MP_MAP msgpack containers without recursion. This allows you to determine that the parsing of the current container is complete. Reworked mp_snprint and mp_fprint routines to use mp_stack instead of recursion. Needed for https://github.com/tarantool/tarantool/issues/1012 --- msgpuck.c | 110 ++++++++++++++--------------- msgpuck.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++++ test/msgpuck.c | 41 +++++++++-- 3 files changed, 275 insertions(+), 63 deletions(-) diff --git a/msgpuck.c b/msgpuck.c index 72b800c..cb93d68 100644 --- a/msgpuck.c +++ b/msgpuck.c @@ -232,27 +232,31 @@ mp_format(char *data, size_t data_size, const char *format, ...) return res; } -#define MP_PRINT(SELF, PRINTF) \ +#define MP_PRINT(PRINTF) \ { \ - switch (mp_typeof(**data)) { \ + struct mp_frame frames[MP_PRINT_MAX_DEPTH]; \ + struct mp_stack stack; \ + mp_stack_create(&stack, MP_PRINT_MAX_DEPTH, frames); \ +next: \ + switch (mp_typeof(*data)) { \ case MP_NIL: \ - mp_decode_nil(data); \ + mp_decode_nil(&data); \ PRINTF("null"); \ break; \ case MP_UINT: \ - PRINTF("%llu", (unsigned long long) mp_decode_uint(data)); \ + PRINTF("%llu", (unsigned long long) mp_decode_uint(&data)); \ break; \ case MP_INT: \ - PRINTF("%lld", (long long) mp_decode_int(data)); \ + PRINTF("%lld", (long long) mp_decode_int(&data)); \ break; \ case MP_STR: \ case MP_BIN: \ { \ - uint32_t len = mp_typeof(**data) == MP_STR ? \ - mp_decode_strl(data) : mp_decode_binl(data); \ + uint32_t len = mp_typeof(*data) == MP_STR ? \ + mp_decode_strl(&data) : mp_decode_binl(&data); \ PRINTF("\""); \ const char *s; \ - for (s = *data; s < *data + len; s++) { \ + for (s = data; s < data + len; s++) { \ unsigned char c = (unsigned char ) *s; \ if (c < 128 && mp_char2escape[c] != NULL) { \ /* Escape character */ \ @@ -262,59 +266,66 @@ mp_format(char *data, size_t data_size, const char *format, ...) } \ } \ PRINTF("\""); \ - *data += len; \ + data += len; \ break; \ } \ case MP_ARRAY: \ - { \ - uint32_t count = mp_decode_array(data); \ - PRINTF("["); \ - uint32_t i; \ - for (i = 0; i < count; i++) { \ - if (i) \ - PRINTF(", "); \ - SELF(data); \ - } \ - PRINTF("]"); \ - break; \ - } \ case MP_MAP: \ { \ - uint32_t count = mp_decode_map(data); \ - PRINTF("{"); \ - uint32_t i; \ - for (i = 0; i < count; i++) { \ - if (i) \ - PRINTF(", "); \ - SELF(data); \ - PRINTF(": "); \ - SELF(data); \ + enum mp_type type = mp_typeof(*data); \ + if (!mp_stack_is_full(&stack)) { \ + uint32_t count = type == MP_ARRAY ? \ + mp_decode_array(&data) : \ + 2 * mp_decode_map(&data); \ + mp_stack_push(&stack, type, count); \ + } else { \ + /* \ + * Skip msgpack items that do not \ + * fit onto the stack. \ + */ \ + PRINTF(type == MP_ARRAY ? "[...]" : "{...}"); \ + mp_next(&data); \ } \ - PRINTF("}"); \ break; \ } \ case MP_BOOL: \ - PRINTF(mp_decode_bool(data) ? "true" : "false"); \ + PRINTF(mp_decode_bool(&data) ? "true" : "false"); \ break; \ case MP_FLOAT: \ - PRINTF("%g", mp_decode_float(data)); \ + PRINTF("%g", mp_decode_float(&data)); \ break; \ case MP_DOUBLE: \ - PRINTF("%lg", mp_decode_double(data)); \ + PRINTF("%lg", mp_decode_double(&data)); \ break; \ case MP_EXT: \ - mp_next(data); \ + mp_next(&data); \ PRINTF("undefined"); \ break; \ default: \ mp_unreachable(); \ return -1; \ } \ + while (!mp_stack_is_empty(&stack)) { \ + enum mp_type type = mp_stack_type(&stack); \ + int curr = mp_stack_advance(&stack); \ + if (curr == 0 || mp_stack_count(&stack) == 0) \ + PRINTF(type == MP_ARRAY ? "[" : "{"); \ + if (curr == -1) { \ + PRINTF(type == MP_ARRAY ? "]" : "}"); \ + mp_stack_pop(&stack); \ + continue; \ + } else if (curr != 0) { \ + PRINTF(type == MP_MAP && curr % 2 == 1 ? ": " : ", "); \ + } \ + goto next; \ + } \ } -static inline int -mp_fprint_internal(FILE *file, const char **data) +int +mp_fprint(FILE *file, const char *data) { + if (!file) + file = stdout; int total_bytes = 0; #define HANDLE(FUN, ...) do { \ int bytes = FUN(file, __VA_ARGS__); \ @@ -323,25 +334,14 @@ mp_fprint_internal(FILE *file, const char **data) total_bytes += bytes; \ } while (0) #define PRINT(...) HANDLE(fprintf, __VA_ARGS__) -#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__) -MP_PRINT(SELF, PRINT) +MP_PRINT(PRINT) #undef HANDLE -#undef SELF #undef PRINT return total_bytes; } int -mp_fprint(FILE *file, const char *data) -{ - if (!file) - file = stdout; - int res = mp_fprint_internal(file, &data); - return res; -} - -static inline int -mp_snprint_internal(char *buf, int size, const char **data) +mp_snprint(char *buf, int size, const char *data) { int total_bytes = 0; #define HANDLE(FUN, ...) do { \ @@ -359,17 +359,9 @@ mp_snprint_internal(char *buf, int size, const char **data) } \ } while (0) #define PRINT(...) HANDLE(snprintf, __VA_ARGS__) -#define SELF(...) HANDLE(mp_snprint_internal, __VA_ARGS__) -MP_PRINT(SELF, PRINT) +MP_PRINT(PRINT) #undef HANDLE -#undef SELF #undef PRINT return total_bytes; } #undef MP_PRINT - -int -mp_snprint(char *buf, int size, const char *data) -{ - return mp_snprint_internal(buf, size, &data); -} diff --git a/msgpuck.h b/msgpuck.h index b0d4967..15a6c9b 100644 --- a/msgpuck.h +++ b/msgpuck.h @@ -1176,6 +1176,133 @@ mp_next(const char **data); MP_PROTO int mp_check(const char **data, const char *end); +/** + * The maximum msgpack nesting depth supported by mp_snprint(). + * Everything beyond that will be omitted (replaced with "..."). + */ +#ifndef MP_PRINT_MAX_DEPTH +#define MP_PRINT_MAX_DEPTH 32 +#endif + +/** Message pack MP_MAP or MP_ARRAY container descriptor. */ +struct mp_frame { + /** + * MP frame type calculated with mp_typeof(). + */ + enum mp_type type; + /** + * Total number of items in MP_MAP or MP_ARRAY container + * calculated with mp_decode_map() or mp_decode_array(). + */ + int count; + /** + * Index of currently processing item. Must be less than + * mp_frame::count member. + */ + int curr; +}; + +/** + * Stack of map/array descriptors mp_frame to do complex msgpack + * parse. The structure is needed in order to facilitate + * decoding nested MP_ARRAY and MP_MAP msgpack containers without + * recursion. This allows to determine that the parsing of the + * current container is complete. +*/ +struct mp_stack { + /** + * The maximum stack depth. + */ + int size; + /** + * Count of used stack frames. Corresponds to the index + * in the array to perform the push operation. Must be + * less or equal to mp_stack::size member. + */ + int used; + /** + * Array of size mp_stack::size of mp_frames. + */ + struct mp_frame *frames; +}; + +/** + * \brief Initialize mp_stack \a stack of specified size \a size + * and user-allocated array \a frames. + * The \a frames allocation must have at least \a size mp_frame + * items. + * \param stack - the pointer to a mp_stack to initialize. + * \param size - stack size, count of stack::frames to use. + * \param frames - mp_frame preallocated array of size \a size + * of struct mp_frame items + */ +MP_PROTO void +mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames); + +/** + * \brief Test if mp_stack \a stack is empty. + * \param stack - the pointer to a mp_stack to a stack to test. + * \retval true if mp_stack is empty, false otherwise. + */ +MP_PROTO bool +mp_stack_is_empty(struct mp_stack *stack); + +/** + * \brief Test if mp_stack \a stack is full. + * \param stack - the pointer to a mp_stack to a stack to test. + * \retval true if mp_stack is full, false otherwise. + */ +MP_PROTO bool +mp_stack_is_full(struct mp_stack *stack); + +/** + * \brief Pop the top mp_stack \a stack frame. + * \param stack - the pointer to a mp_stack to operate with. + * \pre mp_stack_is_empty(stack) == false + */ +MP_PROTO void +mp_stack_pop(struct mp_stack *stack); + +/** + * \brief Construct a new mp_frame and push it on to mp_stack + * \a stack. + * \param stack - the pointer to a stack to operate with. + * \param type - the type of mp_frame to create. + * \param count - the count of itemes of mp_frame to create. + * \pre mp_stack_is_full(stack) == false + */ +MP_PROTO void +mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t count); + +/** + * \brief Get type attribute of the \a stack top frame. + * \param stack - the pointer to a stack to operate with. + * \retval enum mp_type value - the top stack frame type. + * \pre mp_stack_is_empty(stack) == false + */ +MP_PROTO enum mp_type +mp_stack_type(struct mp_stack *stack); + +/** + * \brief Get count attribute of the \a stack top frame. + * \param stack - the pointer to a stack to operate with. + * \retval count - the top stack frame items count. + * \pre mp_stack_is_empty(stack) == false + */ +MP_PROTO int +mp_stack_count(struct mp_stack *stack); + +/** + * \brief Advance curr attribute of the \a stack top frame. + * \param stack - the pointer to a stack to operate with. + * \retval index of the element to process if the top + * mp_frame::curr is less than top mp_frame::count field. + * -1 otherwise. + * \pre mp_stack_is_empty(stack) == false + */ +MP_PROTO int +mp_stack_advance(struct mp_stack *stack); + /* * }}} */ @@ -2176,6 +2303,66 @@ mp_check(const char **data, const char *end) return 0; } +MP_IMPL void +mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames) +{ + stack->frames = frames; + stack->size = size; + stack->used = 0; +} + +MP_IMPL bool +mp_stack_is_empty(struct mp_stack *stack) +{ + return stack->used == 0; +} + +MP_IMPL bool +mp_stack_is_full(struct mp_stack *stack) +{ + return stack->used >= stack->size; +} + +MP_IMPL void +mp_stack_pop(struct mp_stack *stack) +{ + assert(!mp_stack_is_empty(stack)); + --stack->used; +} + +MP_IMPL void +mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t count) +{ + assert(!mp_stack_is_full(stack)); + uint32_t idx = stack->used++; + stack->frames[idx].type = type; + stack->frames[idx].count = count; + stack->frames[idx].curr = -1; +} + +MP_IMPL enum mp_type +mp_stack_type(struct mp_stack *stack) +{ + assert(!mp_stack_is_empty(stack)); + return stack->frames[stack->used - 1].type; +} + +MP_IMPL int +mp_stack_count(struct mp_stack *stack) +{ + return stack->frames[stack->used - 1].count; +} + +MP_IMPL int +mp_stack_advance(struct mp_stack *stack) +{ + assert(!mp_stack_is_empty(stack)); + struct mp_frame *frame = &stack->frames[stack->used - 1]; + if (frame->curr < frame->count - 1) + return ++frame->curr; + return -1; +} + /** \endcond */ /* diff --git a/test/msgpuck.c b/test/msgpuck.c index 9265453..884883d 100644 --- a/test/msgpuck.c +++ b/test/msgpuck.c @@ -30,6 +30,7 @@ * SUCH DAMAGE. */ +#include #include #include #include @@ -764,15 +765,16 @@ test_format(void) int test_mp_print() { - plan(10); + plan(12); header(); char msgpack[128]; char *d = msgpack; - d = mp_encode_array(d, 6); + d = mp_encode_array(d, 8); d = mp_encode_int(d, -5); d = mp_encode_uint(d, 42); d = mp_encode_str(d, "kill bill", 9); + d = mp_encode_array(d, 0); d = mp_encode_map(d, 6); d = mp_encode_str(d, "bool true", 9); d = mp_encode_bool(d, true); @@ -791,13 +793,14 @@ test_mp_print() *d++ = 0; char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r"; d = mp_encode_bin(d, bin, sizeof(bin)); + d = mp_encode_map(d, 0); assert(d <= msgpack + sizeof(msgpack)); const char *expected = - "[-5, 42, \"kill bill\", " + "[-5, 42, \"kill bill\", [], " "{\"bool true\": true, \"bool false\": false, \"null\": null, " "\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, " - "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\"]"; + "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]"; int esize = strlen(expected); char result[256]; @@ -838,6 +841,36 @@ test_mp_print() int rc = mp_fprint(stdin, msgpack); is(rc, -1, "mp_fprint I/O error"); + /* Test mp_snprint max nesting depth. */ + int mp_buff_sz = MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) + + mp_sizeof_uint(1); + int exp_str_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1; + char *mp_buff = malloc(mp_buff_sz); + char *exp_str = malloc(exp_str_sz); + char *decoded = malloc(exp_str_sz); + char *buff_wptr = mp_buff; + char *exp_str_wptr = exp_str; + for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) { + int exp_str_rest = exp_str_sz - (exp_str_wptr - exp_str); + assert(exp_str_rest > 0); + if (i < MP_PRINT_MAX_DEPTH + 1) { + buff_wptr = mp_encode_array(buff_wptr, 1); + rc = snprintf(exp_str_wptr, exp_str_rest, "["); + } else if (i == MP_PRINT_MAX_DEPTH + 1) { + buff_wptr = mp_encode_uint(buff_wptr, 1); + rc = snprintf(exp_str_wptr, exp_str_rest, "..."); + } else { + rc = snprintf(exp_str_wptr, exp_str_rest, "]"); + } + exp_str_wptr += rc; + } + assert(exp_str_wptr + 1 == exp_str + exp_str_sz); + rc = mp_snprint(decoded, exp_str_sz, mp_buff); + ok(rc == exp_str_sz - 1, "mp_snprint max nesting depth return value"); + ok(strcmp(decoded, exp_str) == 0, "mp_snprint max nesting depth result"); + free(decoded); + free(exp_str); + free(mp_buff); footer(); return check_plan(); } -- 2.20.1