From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Subject: Re: [tarantool-patches] Re: [PATCH v1 1/1] implement mp_stack class References: <20190117115843.nqellm5mmsglqnlf@tkn_work_nb> <842c55cd-5d3f-e857-42f4-94d648c88273@tarantool.org> <20190117163409.hvga32kxxgcc5sat@tkn_work_nb> <1ba698d1-8450-f2d3-7717-11adbf0d5f4f@tarantool.org> <20190121112541.hkqylj7hwcjeq526@tkn_work_nb> <2259a346-76dd-f721-176f-bfad39b25b5e@gmail.com> <20190121202529.r7wwnzcesdh34sda@esperanza> <09a7ccb3-fa8b-8691-ea26-c3146c42d839@tarantool.org> <20190122202136.uag7irkm2ltg3dkh@esperanza> <20190123100659.emxmwzoivrk4e3lh@esperanza> From: Kirill Shcherbatov Message-ID: Date: Wed, 23 Jan 2019 14:39:44 +0300 MIME-Version: 1.0 In-Reply-To: <20190123100659.emxmwzoivrk4e3lh@esperanza> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit To: tarantool-patches@freelists.org, Vladimir Davydov List-ID: Changes are required to account your comment (patch below): diff --git a/msgpuck.c b/msgpuck.c index 1ad2dd4..8f1710e 100644 --- a/msgpuck.c +++ b/msgpuck.c @@ -238,25 +238,25 @@ mp_format(char *data, size_t data_size, const char *format, ...) struct mp_stack stack; \ mp_stack_create(&stack, MP_PRINT_MAX_DEPTH, frames); \ next: \ - switch (mp_typeof(**data)) { \ + switch (mp_typeof(*data)) { \ case MP_NIL: \ - mp_decode_nil(data); \ + mp_decode_nil(&data); \ PRINTF("null"); \ break; \ case MP_UINT: \ - PRINTF("%llu", (unsigned long long) mp_decode_uint(data)); \ + PRINTF("%llu", (unsigned long long) mp_decode_uint(&data)); \ break; \ case MP_INT: \ - PRINTF("%lld", (long long) mp_decode_int(data)); \ + PRINTF("%lld", (long long) mp_decode_int(&data)); \ break; \ case MP_STR: \ case MP_BIN: \ { \ - uint32_t len = mp_typeof(**data) == MP_STR ? \ - mp_decode_strl(data) : mp_decode_binl(data); \ + uint32_t len = mp_typeof(*data) == MP_STR ? \ + mp_decode_strl(&data) : mp_decode_binl(&data); \ PRINTF("\""); \ const char *s; \ - for (s = *data; s < *data + len; s++) { \ + for (s = data; s < data + len; s++) { \ unsigned char c = (unsigned char ) *s; \ if (c < 128 && mp_char2escape[c] != NULL) { \ /* Escape character */ \ @@ -266,16 +266,17 @@ next: \ } \ } \ PRINTF("\""); \ - *data += len; \ + data += len; \ break; \ } \ case MP_ARRAY: \ case MP_MAP: \ { \ - enum mp_type type = mp_typeof(**data); \ + enum mp_type type = mp_typeof(*data); \ if (!mp_stack_is_full(&stack)) { \ uint32_t count = type == MP_ARRAY ? \ - mp_decode_array(data) : 2 * mp_decode_map(data);\ + mp_decode_array(&data) : \ + 2 * mp_decode_map(&data); \ mp_stack_push(&stack, type, count); \ } else { \ /* \ @@ -283,21 +284,21 @@ next: \ * fit onto the stack. \ */ \ PRINTF(type == MP_ARRAY ? "[...]" : "{...}"); \ - mp_next(data); \ + mp_next(&data); \ } \ break; \ } \ case MP_BOOL: \ - PRINTF(mp_decode_bool(data) ? "true" : "false"); \ + PRINTF(mp_decode_bool(&data) ? "true" : "false"); \ break; \ case MP_FLOAT: \ - PRINTF("%g", mp_decode_float(data)); \ + PRINTF("%g", mp_decode_float(&data)); \ break; \ case MP_DOUBLE: \ - PRINTF("%lg", mp_decode_double(data)); \ + PRINTF("%lg", mp_decode_double(&data)); \ break; \ case MP_EXT: \ - mp_next(data); \ + mp_next(&data); \ PRINTF("undefined"); \ break; \ default: \ @@ -323,9 +324,11 @@ next: \ } \ } -static inline int -mp_fprint_internal(FILE *file, const char **data) +int +mp_fprint(FILE *file, const char *data) { + if (!file) + file = stdout; int total_bytes = 0; #define HANDLE(FUN, ...) do { \ int bytes = FUN(file, __VA_ARGS__); \ @@ -341,16 +344,7 @@ MP_PRINT(PRINT) } int -mp_fprint(FILE *file, const char *data) -{ - if (!file) - file = stdout; - int res = mp_fprint_internal(file, &data); - return res; -} - -static inline int -mp_snprint_internal(char *buf, int size, const char **data) +mp_snprint(char *buf, int size, const char *data) { int total_bytes = 0; #define HANDLE(FUN, ...) do { \ @@ -374,9 +368,3 @@ MP_PRINT(PRINT) return total_bytes; } #undef MP_PRINT - -int -mp_snprint(char *buf, int size, const char *data) -{ - return mp_snprint_internal(buf, size, &data); -} diff --git a/test/msgpuck.c b/test/msgpuck.c index 71a4bfc..884883d 100644 --- a/test/msgpuck.c +++ b/test/msgpuck.c @@ -30,6 +30,7 @@ * SUCH DAMAGE. */ +#include #include #include #include @@ -843,30 +844,32 @@ test_mp_print() /* Test mp_snprint max nesting depth. */ int mp_buff_sz = MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) + mp_sizeof_uint(1); - int template_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1; + int exp_str_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1; char *mp_buff = malloc(mp_buff_sz); - char *template = malloc(template_sz); - char *decoded = malloc(template_sz); + char *exp_str = malloc(exp_str_sz); + char *decoded = malloc(exp_str_sz); char *buff_wptr = mp_buff; - char *template_wptr = template; + char *exp_str_wptr = exp_str; for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) { + int exp_str_rest = exp_str_sz - (exp_str_wptr - exp_str); + assert(exp_str_rest > 0); if (i < MP_PRINT_MAX_DEPTH + 1) { buff_wptr = mp_encode_array(buff_wptr, 1); - *(template_wptr++) = '['; + rc = snprintf(exp_str_wptr, exp_str_rest, "["); } else if (i == MP_PRINT_MAX_DEPTH + 1) { buff_wptr = mp_encode_uint(buff_wptr, 1); - template_wptr += sprintf(template_wptr, "..."); + rc = snprintf(exp_str_wptr, exp_str_rest, "..."); } else { - *(template_wptr++) = ']'; + rc = snprintf(exp_str_wptr, exp_str_rest, "]"); } + exp_str_wptr += rc; } - *(template_wptr++) = '\0'; - rc = mp_snprint(decoded, template_sz, mp_buff); - ok(rc == template_sz - 1, "mp_snprint max nesting depth return value"); - ok(memcmp(decoded, template, template_sz - 1) == 0, - "mp_snprint max nesting depth result"); + assert(exp_str_wptr + 1 == exp_str + exp_str_sz); + rc = mp_snprint(decoded, exp_str_sz, mp_buff); + ok(rc == exp_str_sz - 1, "mp_snprint max nesting depth return value"); + ok(strcmp(decoded, exp_str) == 0, "mp_snprint max nesting depth result"); free(decoded); - free(template); + free(exp_str); free(mp_buff); footer(); return check_plan(); ================================================================ Introduced a new mp_stack class to make complex msgpack parsing. The structure is needed in order to facilitate the decoding nested MP_ARRAY and MP_MAP msgpack containers without recursion. This allows you to determine that the parsing of the current container is complete. Reworked mp_snprint and mp_fprint routines to use mp_stack instead of recursion. Needed for https://github.com/tarantool/tarantool/issues/1012 --- msgpuck.c | 113 ++++++++++++++++----------------- msgpuck.h | 167 +++++++++++++++++++++++++++++++++++++++++++++++++ test/msgpuck.c | 41 ++++++++++-- 3 files changed, 258 insertions(+), 63 deletions(-) diff --git a/msgpuck.c b/msgpuck.c index 72b800c..8f1710e 100644 --- a/msgpuck.c +++ b/msgpuck.c @@ -232,27 +232,31 @@ mp_format(char *data, size_t data_size, const char *format, ...) return res; } -#define MP_PRINT(SELF, PRINTF) \ +#define MP_PRINT(PRINTF) \ { \ - switch (mp_typeof(**data)) { \ + struct mp_frame frames[MP_PRINT_MAX_DEPTH]; \ + struct mp_stack stack; \ + mp_stack_create(&stack, MP_PRINT_MAX_DEPTH, frames); \ +next: \ + switch (mp_typeof(*data)) { \ case MP_NIL: \ - mp_decode_nil(data); \ + mp_decode_nil(&data); \ PRINTF("null"); \ break; \ case MP_UINT: \ - PRINTF("%llu", (unsigned long long) mp_decode_uint(data)); \ + PRINTF("%llu", (unsigned long long) mp_decode_uint(&data)); \ break; \ case MP_INT: \ - PRINTF("%lld", (long long) mp_decode_int(data)); \ + PRINTF("%lld", (long long) mp_decode_int(&data)); \ break; \ case MP_STR: \ case MP_BIN: \ { \ - uint32_t len = mp_typeof(**data) == MP_STR ? \ - mp_decode_strl(data) : mp_decode_binl(data); \ + uint32_t len = mp_typeof(*data) == MP_STR ? \ + mp_decode_strl(&data) : mp_decode_binl(&data); \ PRINTF("\""); \ const char *s; \ - for (s = *data; s < *data + len; s++) { \ + for (s = data; s < data + len; s++) { \ unsigned char c = (unsigned char ) *s; \ if (c < 128 && mp_char2escape[c] != NULL) { \ /* Escape character */ \ @@ -262,59 +266,69 @@ mp_format(char *data, size_t data_size, const char *format, ...) } \ } \ PRINTF("\""); \ - *data += len; \ + data += len; \ break; \ } \ case MP_ARRAY: \ - { \ - uint32_t count = mp_decode_array(data); \ - PRINTF("["); \ - uint32_t i; \ - for (i = 0; i < count; i++) { \ - if (i) \ - PRINTF(", "); \ - SELF(data); \ - } \ - PRINTF("]"); \ - break; \ - } \ case MP_MAP: \ { \ - uint32_t count = mp_decode_map(data); \ - PRINTF("{"); \ - uint32_t i; \ - for (i = 0; i < count; i++) { \ - if (i) \ - PRINTF(", "); \ - SELF(data); \ - PRINTF(": "); \ - SELF(data); \ + enum mp_type type = mp_typeof(*data); \ + if (!mp_stack_is_full(&stack)) { \ + uint32_t count = type == MP_ARRAY ? \ + mp_decode_array(&data) : \ + 2 * mp_decode_map(&data); \ + mp_stack_push(&stack, type, count); \ + } else { \ + /* \ + * Skip msgpack items that do not \ + * fit onto the stack. \ + */ \ + PRINTF(type == MP_ARRAY ? "[...]" : "{...}"); \ + mp_next(&data); \ } \ - PRINTF("}"); \ break; \ } \ case MP_BOOL: \ - PRINTF(mp_decode_bool(data) ? "true" : "false"); \ + PRINTF(mp_decode_bool(&data) ? "true" : "false"); \ break; \ case MP_FLOAT: \ - PRINTF("%g", mp_decode_float(data)); \ + PRINTF("%g", mp_decode_float(&data)); \ break; \ case MP_DOUBLE: \ - PRINTF("%lg", mp_decode_double(data)); \ + PRINTF("%lg", mp_decode_double(&data)); \ break; \ case MP_EXT: \ - mp_next(data); \ + mp_next(&data); \ PRINTF("undefined"); \ break; \ default: \ mp_unreachable(); \ return -1; \ } \ + while (!mp_stack_is_empty(&stack)) { \ + struct mp_frame *frame = mp_stack_top(&stack); \ + if (!mp_stack_advance(&stack)) { \ + if (frame->curr == 1) \ + PRINTF(frame->type == MP_ARRAY ? "[" : "{"); \ + PRINTF(frame->type == MP_ARRAY ? "]" : "}"); \ + mp_stack_pop(&stack); \ + } else { \ + if (frame->curr == 1) \ + PRINTF(frame->type == MP_ARRAY ? "[" : "{"); \ + else if (frame->type == MP_MAP && frame->curr % 2 == 0) \ + PRINTF(": "); \ + else \ + PRINTF(", "); \ + goto next; \ + } \ + } \ } -static inline int -mp_fprint_internal(FILE *file, const char **data) +int +mp_fprint(FILE *file, const char *data) { + if (!file) + file = stdout; int total_bytes = 0; #define HANDLE(FUN, ...) do { \ int bytes = FUN(file, __VA_ARGS__); \ @@ -323,25 +337,14 @@ mp_fprint_internal(FILE *file, const char **data) total_bytes += bytes; \ } while (0) #define PRINT(...) HANDLE(fprintf, __VA_ARGS__) -#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__) -MP_PRINT(SELF, PRINT) +MP_PRINT(PRINT) #undef HANDLE -#undef SELF #undef PRINT return total_bytes; } int -mp_fprint(FILE *file, const char *data) -{ - if (!file) - file = stdout; - int res = mp_fprint_internal(file, &data); - return res; -} - -static inline int -mp_snprint_internal(char *buf, int size, const char **data) +mp_snprint(char *buf, int size, const char *data) { int total_bytes = 0; #define HANDLE(FUN, ...) do { \ @@ -359,17 +362,9 @@ mp_snprint_internal(char *buf, int size, const char **data) } \ } while (0) #define PRINT(...) HANDLE(snprintf, __VA_ARGS__) -#define SELF(...) HANDLE(mp_snprint_internal, __VA_ARGS__) -MP_PRINT(SELF, PRINT) +MP_PRINT(PRINT) #undef HANDLE -#undef SELF #undef PRINT return total_bytes; } #undef MP_PRINT - -int -mp_snprint(char *buf, int size, const char *data) -{ - return mp_snprint_internal(buf, size, &data); -} diff --git a/msgpuck.h b/msgpuck.h index b0d4967..e2a90fa 100644 --- a/msgpuck.h +++ b/msgpuck.h @@ -1176,6 +1176,122 @@ mp_next(const char **data); MP_PROTO int mp_check(const char **data, const char *end); +/** + * The maximum msgpack nesting depth supported by mp_snprint(). + * Everything beyond that will be omitted (replaced with "..."). + */ +#ifndef MP_PRINT_MAX_DEPTH +#define MP_PRINT_MAX_DEPTH 32 +#endif + +/** Message pack MP_MAP or MP_ARRAY container descriptor. */ +struct mp_frame { + /** MP frame type calculated with mp_typeof(). */ + enum mp_type type; + /** + * Total number of items in MP_MAP or MP_ARRAY container + * calculated with mp_decode_map() or mp_decode_array(). + */ + uint32_t size; + /** + * Count of items already processed. Must be less than or + * equal to mp_frame::size member. + */ + uint32_t curr; +}; + +/** + * Stack of map/array descriptors mp_frame to do complex msgpack + * parse. The structure is needed in order to facilitate + * decoding nested MP_ARRAY and MP_MAP msgpack containers without + * recursion. This allows to determine that the parsing of the + * current container is complete. +*/ +struct mp_stack { + /** The maximum stack depth. */ + uint32_t size; + /** + * Count of used stack frames. Corresponds to the index + * in the array to perform the push operation. Must be + * less or equal to mp_stack::size member. + */ + uint32_t used; + /** + * Array of size mp_stack::size of mp_frames. + */ + struct mp_frame *frames; +}; + +/** + * \brief Initialize mp_stack \a stack of specified size \a size + * and user-allocated array \a frames. + * The \a frames allocation must have at least \a size mp_frame + * items. + * \param stack - the pointer to a mp_stack to initialize. + * \param size - stack size, count of stack::frames to use. + * \param frames - mp_frame preallocated array of size \a size + * of struct mp_frame items + */ +MP_PROTO void +mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames); + +/** + * \brief Test if mp_stack \a stack is empty. + * \param stack - the pointer to a mp_stack to a stack to test. + * \retval true if mp_stack is empty, false otherwise. + */ +MP_PROTO bool +mp_stack_is_empty(struct mp_stack *stack); + +/** + * \brief Test if mp_stack \a stack is full. + * \param stack - the pointer to a mp_stack to a stack to test. + * \retval true if mp_stack is full, false otherwise. + */ +MP_PROTO bool +mp_stack_is_full(struct mp_stack *stack); + +/** + * \brief Pop the top mp_stack \a stack frame. + * \param stack - the pointer to a mp_stack to operate with. + * \pre mp_stack_is_empty(stack) == false + */ +MP_PROTO void +mp_stack_pop(struct mp_stack *stack); + +/** + * \brief Get a pointer to a top mp_stack \a stack frame. + * \param stack - the pointer to a mp_stack to operate with. + * \retval a pointer to a top stack frame. + * \pre mp_stack_is_empty(stack) == false + */ +MP_PROTO struct mp_frame * +mp_stack_top(struct mp_stack *stack); + +/** + * \brief Construct a new mp_frame and push it on to mp_stack + * \a stack. + * \param stack - the pointer to a stack to operate with. + * \param type - the type of mp_frame to create. + * \param size - size size of mp_frame to create. + * \pre mp_stack_is_full(stack) == false + */ +MP_PROTO void +mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size); + +/** + * \brief Advance a mp_stack_top()::\a curr attribute of the + * \a stack. The function also signals when it moves over the + * last item. + * \param stack - the pointer to a stack to operate with + * \retval false if the top stack frame has been fully processed: + * mp_stack_top()::\a curr > mp_stack_top()::\a size; + * true otherwise. + * \pre mp_stack_is_empty(stack) == false + */ +MP_PROTO bool +mp_stack_advance(struct mp_stack *stack); + /* * }}} */ @@ -2176,6 +2292,57 @@ mp_check(const char **data, const char *end) return 0; } +MP_IMPL void +mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames) +{ + stack->frames = frames; + stack->size = size; + stack->used = 0; +} + +MP_IMPL bool +mp_stack_is_empty(struct mp_stack *stack) +{ + return stack->used == 0; +} + +MP_IMPL bool +mp_stack_is_full(struct mp_stack *stack) +{ + return stack->used >= stack->size; +} + +MP_IMPL void +mp_stack_pop(struct mp_stack *stack) +{ + assert(!mp_stack_is_empty(stack)); + --stack->used; +} + +MP_IMPL struct mp_frame * +mp_stack_top(struct mp_stack *stack) +{ + assert(!mp_stack_is_empty(stack)); + return &stack->frames[stack->used - 1]; +} + +MP_IMPL void +mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t size) +{ + assert(!mp_stack_is_full(stack)); + uint32_t idx = stack->used++; + stack->frames[idx].type = type; + stack->frames[idx].size = size; + stack->frames[idx].curr = 0; +} + +MP_IMPL bool +mp_stack_advance(struct mp_stack *stack) +{ + struct mp_frame *frame = mp_stack_top(stack); + return ++frame->curr <= frame->size; +} + /** \endcond */ /* diff --git a/test/msgpuck.c b/test/msgpuck.c index 9265453..884883d 100644 --- a/test/msgpuck.c +++ b/test/msgpuck.c @@ -30,6 +30,7 @@ * SUCH DAMAGE. */ +#include #include #include #include @@ -764,15 +765,16 @@ test_format(void) int test_mp_print() { - plan(10); + plan(12); header(); char msgpack[128]; char *d = msgpack; - d = mp_encode_array(d, 6); + d = mp_encode_array(d, 8); d = mp_encode_int(d, -5); d = mp_encode_uint(d, 42); d = mp_encode_str(d, "kill bill", 9); + d = mp_encode_array(d, 0); d = mp_encode_map(d, 6); d = mp_encode_str(d, "bool true", 9); d = mp_encode_bool(d, true); @@ -791,13 +793,14 @@ test_mp_print() *d++ = 0; char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r"; d = mp_encode_bin(d, bin, sizeof(bin)); + d = mp_encode_map(d, 0); assert(d <= msgpack + sizeof(msgpack)); const char *expected = - "[-5, 42, \"kill bill\", " + "[-5, 42, \"kill bill\", [], " "{\"bool true\": true, \"bool false\": false, \"null\": null, " "\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, " - "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\"]"; + "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]"; int esize = strlen(expected); char result[256]; @@ -838,6 +841,36 @@ test_mp_print() int rc = mp_fprint(stdin, msgpack); is(rc, -1, "mp_fprint I/O error"); + /* Test mp_snprint max nesting depth. */ + int mp_buff_sz = MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) + + mp_sizeof_uint(1); + int exp_str_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1; + char *mp_buff = malloc(mp_buff_sz); + char *exp_str = malloc(exp_str_sz); + char *decoded = malloc(exp_str_sz); + char *buff_wptr = mp_buff; + char *exp_str_wptr = exp_str; + for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) { + int exp_str_rest = exp_str_sz - (exp_str_wptr - exp_str); + assert(exp_str_rest > 0); + if (i < MP_PRINT_MAX_DEPTH + 1) { + buff_wptr = mp_encode_array(buff_wptr, 1); + rc = snprintf(exp_str_wptr, exp_str_rest, "["); + } else if (i == MP_PRINT_MAX_DEPTH + 1) { + buff_wptr = mp_encode_uint(buff_wptr, 1); + rc = snprintf(exp_str_wptr, exp_str_rest, "..."); + } else { + rc = snprintf(exp_str_wptr, exp_str_rest, "]"); + } + exp_str_wptr += rc; + } + assert(exp_str_wptr + 1 == exp_str + exp_str_sz); + rc = mp_snprint(decoded, exp_str_sz, mp_buff); + ok(rc == exp_str_sz - 1, "mp_snprint max nesting depth return value"); + ok(strcmp(decoded, exp_str) == 0, "mp_snprint max nesting depth result"); + free(decoded); + free(exp_str); + free(mp_buff); footer(); return check_plan(); } -- 2.19.2