[PATCH v2 1/1] Implement mp_stack class

Kirill Shcherbatov kshcherbatov at tarantool.org
Sun Feb 3 13:21:43 MSK 2019


From: Kirill Shcherbatov <kshcherbatov at gmail.com>

Introduced a new mp_stack class to make complex msgpack parsing.
The structure is needed in order to facilitate the decoding
nested MP_ARRAY and MP_MAP msgpack containers without recursion.
This allows you to determine that the parsing of the current
container is complete.
Reworked mp_snprint and mp_fprint routines to use mp_stack
instead of recursion.

Needed for https://github.com/tarantool/tarantool/issues/1012
---
 msgpuck.c      | 110 ++++++++++++++---------------
 msgpuck.h      | 187 +++++++++++++++++++++++++++++++++++++++++++++++++
 test/msgpuck.c |  41 +++++++++--
 3 files changed, 275 insertions(+), 63 deletions(-)

diff --git a/msgpuck.c b/msgpuck.c
index 72b800c..cb93d68 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -232,27 +232,31 @@ mp_format(char *data, size_t data_size, const char *format, ...)
 	return res;
 }
 
-#define MP_PRINT(SELF, PRINTF) \
+#define MP_PRINT(PRINTF) \
 {										\
-	switch (mp_typeof(**data)) {						\
+	struct mp_frame frames[MP_PRINT_MAX_DEPTH];				\
+	struct mp_stack stack;							\
+	mp_stack_create(&stack, MP_PRINT_MAX_DEPTH, frames);			\
+next:										\
+	switch (mp_typeof(*data)) {						\
 	case MP_NIL:								\
-		mp_decode_nil(data);						\
+		mp_decode_nil(&data);						\
 		PRINTF("null");							\
 		break;								\
 	case MP_UINT:								\
-		PRINTF("%llu", (unsigned long long) mp_decode_uint(data));	\
+		PRINTF("%llu", (unsigned long long) mp_decode_uint(&data));	\
 		break;								\
 	case MP_INT:								\
-		PRINTF("%lld", (long long) mp_decode_int(data));		\
+		PRINTF("%lld", (long long) mp_decode_int(&data));		\
 		break;								\
 	case MP_STR:								\
 	case MP_BIN:								\
 	{									\
-		uint32_t len = mp_typeof(**data) == MP_STR ?			\
-			mp_decode_strl(data) : mp_decode_binl(data);		\
+		uint32_t len = mp_typeof(*data) == MP_STR ?			\
+			mp_decode_strl(&data) : mp_decode_binl(&data);		\
 		PRINTF("\"");							\
 		const char *s;							\
-		for (s = *data; s < *data + len; s++) {				\
+		for (s = data; s < data + len; s++) {				\
 			unsigned char c = (unsigned char ) *s;			\
 			if (c < 128 && mp_char2escape[c] != NULL) {		\
 				/* Escape character */				\
@@ -262,59 +266,66 @@ mp_format(char *data, size_t data_size, const char *format, ...)
 			}							\
 		}								\
 		PRINTF("\"");							\
-		*data += len;							\
+		data += len;							\
 		break;								\
 	}									\
 	case MP_ARRAY:								\
-	{									\
-		uint32_t count = mp_decode_array(data);				\
-		PRINTF("[");							\
-		uint32_t i;							\
-		for (i = 0; i < count; i++) {					\
-			if (i)							\
-				PRINTF(", ");					\
-			SELF(data);						\
-		}								\
-		PRINTF("]");							\
-		break;								\
-	}									\
 	case MP_MAP:								\
 	{									\
-		uint32_t count = mp_decode_map(data);				\
-		PRINTF("{");							\
-		uint32_t i;							\
-		for (i = 0; i < count; i++) {					\
-			if (i)							\
-				PRINTF(", ");					\
-			SELF(data);						\
-			PRINTF(": ");						\
-			SELF(data);						\
+		enum mp_type type = mp_typeof(*data);				\
+		if (!mp_stack_is_full(&stack)) {				\
+			uint32_t count = type == MP_ARRAY ?			\
+					 mp_decode_array(&data) :		\
+					 2 * mp_decode_map(&data);		\
+			mp_stack_push(&stack, type, count);			\
+		} else {							\
+			/*							\
+			* Skip msgpack items that do not			\
+			* fit onto the stack.					\
+			*/							\
+			PRINTF(type == MP_ARRAY ? "[...]" : "{...}");		\
+			mp_next(&data);						\
 		}								\
-		PRINTF("}");							\
 		break;								\
 	}									\
 	case MP_BOOL:								\
-		PRINTF(mp_decode_bool(data) ? "true" : "false");		\
+		PRINTF(mp_decode_bool(&data) ? "true" : "false");		\
 		break;								\
 	case MP_FLOAT:								\
-		PRINTF("%g", mp_decode_float(data));				\
+		PRINTF("%g", mp_decode_float(&data));				\
 		break;								\
 	case MP_DOUBLE:								\
-		PRINTF("%lg", mp_decode_double(data));				\
+		PRINTF("%lg", mp_decode_double(&data));				\
 		break;								\
 	case MP_EXT:								\
-		mp_next(data);							\
+		mp_next(&data);							\
 		PRINTF("undefined");						\
 		break;								\
 	default:								\
 		mp_unreachable();						\
 		return -1;							\
 	}									\
+	while (!mp_stack_is_empty(&stack)) {					\
+		enum mp_type type = mp_stack_type(&stack);			\
+		int curr = mp_stack_advance(&stack);				\
+		if (curr == 0 || mp_stack_count(&stack) == 0)			\
+			PRINTF(type == MP_ARRAY ? "[" : "{");			\
+		if (curr == -1) {						\
+			PRINTF(type == MP_ARRAY ? "]" : "}");			\
+			mp_stack_pop(&stack);					\
+			continue;						\
+		} else if (curr != 0) {						\
+			PRINTF(type == MP_MAP && curr % 2 == 1 ? ": " : ", ");	\
+		}								\
+		goto next;							\
+	}									\
 }
 
-static inline int
-mp_fprint_internal(FILE *file, const char **data)
+int
+mp_fprint(FILE *file, const char *data)
 {
+	if (!file)
+		file = stdout;
 	int total_bytes = 0;
 #define HANDLE(FUN, ...) do {							\
 	int bytes = FUN(file, __VA_ARGS__);					\
@@ -323,25 +334,14 @@ mp_fprint_internal(FILE *file, const char **data)
 	total_bytes += bytes;							\
 } while (0)
 #define PRINT(...) HANDLE(fprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_fprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
 #undef HANDLE
-#undef SELF
 #undef PRINT
 	return total_bytes;
 }
 
 int
-mp_fprint(FILE *file, const char *data)
-{
-	if (!file)
-		file = stdout;
-	int res = mp_fprint_internal(file, &data);
-	return res;
-}
-
-static inline int
-mp_snprint_internal(char *buf, int size, const char **data)
+mp_snprint(char *buf, int size, const char *data)
 {
 	int total_bytes = 0;
 #define HANDLE(FUN, ...) do {							\
@@ -359,17 +359,9 @@ mp_snprint_internal(char *buf, int size, const char **data)
 	}									\
 } while (0)
 #define PRINT(...) HANDLE(snprintf, __VA_ARGS__)
-#define SELF(...) HANDLE(mp_snprint_internal, __VA_ARGS__)
-MP_PRINT(SELF, PRINT)
+MP_PRINT(PRINT)
 #undef HANDLE
-#undef SELF
 #undef PRINT
 	return total_bytes;
 }
 #undef MP_PRINT
-
-int
-mp_snprint(char *buf, int size, const char *data)
-{
-	return mp_snprint_internal(buf, size, &data);
-}
diff --git a/msgpuck.h b/msgpuck.h
index b0d4967..15a6c9b 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -1176,6 +1176,133 @@ mp_next(const char **data);
 MP_PROTO int
 mp_check(const char **data, const char *end);
 
+/**
+ * The maximum msgpack nesting depth supported by mp_snprint().
+ * Everything beyond that will be omitted (replaced with "...").
+ */
+#ifndef MP_PRINT_MAX_DEPTH
+#define MP_PRINT_MAX_DEPTH 32
+#endif
+
+/** Message pack MP_MAP or MP_ARRAY container descriptor. */
+struct mp_frame {
+	/**
+	 * MP frame type calculated with mp_typeof().
+	 */
+	enum mp_type type;
+	/**
+	 * Total number of items in MP_MAP or MP_ARRAY container
+	 * calculated with mp_decode_map() or mp_decode_array().
+	 */
+	int count;
+	/**
+	 * Index of currently processing item. Must be less than
+	 * mp_frame::count member.
+	 */
+	int curr;
+};
+
+/**
+ * Stack of map/array descriptors mp_frame to do complex msgpack
+ * parse. The structure is needed in order to facilitate
+ * decoding nested MP_ARRAY and MP_MAP msgpack containers without
+ * recursion. This allows to determine that the parsing of the
+ * current container is complete.
+*/
+struct mp_stack {
+	/**
+	 * The maximum stack depth.
+	 */
+	int size;
+	/**
+	 * Count of used stack frames. Corresponds to the index
+	 * in the array to perform the push operation. Must be
+	 * less or equal to mp_stack::size member.
+	 */
+	int used;
+	/**
+	 * Array of size mp_stack::size of mp_frames.
+	 */
+	struct mp_frame *frames;
+};
+
+/**
+ * \brief Initialize mp_stack \a stack of specified size \a size
+ * and user-allocated array \a frames.
+ * The \a frames allocation must have at least \a size mp_frame
+ * items.
+ * \param stack - the pointer to a mp_stack to initialize.
+ * \param size - stack size, count of stack::frames to use.
+ * \param frames - mp_frame preallocated array of size \a size
+ *                 of struct mp_frame items
+ */
+MP_PROTO void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames);
+
+/**
+ * \brief Test if mp_stack \a stack is empty.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is empty, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_empty(struct mp_stack *stack);
+
+/**
+ * \brief Test if mp_stack \a stack is full.
+ * \param stack - the pointer to a mp_stack to a stack to test.
+ * \retval true if mp_stack is full, false otherwise.
+ */
+MP_PROTO bool
+mp_stack_is_full(struct mp_stack *stack);
+
+/**
+ * \brief Pop the top mp_stack \a stack frame.
+ * \param stack - the pointer to a mp_stack to operate with.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO void
+mp_stack_pop(struct mp_stack *stack);
+
+/**
+ * \brief Construct a new mp_frame and push it on to mp_stack
+ * \a stack.
+ * \param stack - the pointer to a stack to operate with.
+ * \param type - the type of mp_frame to create.
+ * \param count - the count of itemes of mp_frame to create.
+ * \pre mp_stack_is_full(stack) == false
+ */
+MP_PROTO void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t count);
+
+/**
+ * \brief Get type attribute of the \a stack top frame.
+ * \param stack - the pointer to a stack to operate with.
+ * \retval enum mp_type value - the top stack frame type.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO enum mp_type
+mp_stack_type(struct mp_stack *stack);
+
+/**
+ * \brief Get count attribute of the \a stack top frame.
+ * \param stack - the pointer to a stack to operate with.
+ * \retval count - the top stack frame items count.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO int
+mp_stack_count(struct mp_stack *stack);
+
+/**
+ * \brief Advance curr attribute of the \a stack top frame.
+ * \param stack - the pointer to a stack to operate with.
+ * \retval index of the element to process if the top
+ *         mp_frame::curr is less than top mp_frame::count field.
+ *         -1 otherwise.
+ * \pre mp_stack_is_empty(stack) == false
+ */
+MP_PROTO int
+mp_stack_advance(struct mp_stack *stack);
+
 /*
  * }}}
  */
@@ -2176,6 +2303,66 @@ mp_check(const char **data, const char *end)
 	return 0;
 }
 
+MP_IMPL void
+mp_stack_create(struct mp_stack *stack, uint32_t size, struct mp_frame *frames)
+{
+	stack->frames = frames;
+	stack->size = size;
+	stack->used = 0;
+}
+
+MP_IMPL bool
+mp_stack_is_empty(struct mp_stack *stack)
+{
+	return stack->used == 0;
+}
+
+MP_IMPL bool
+mp_stack_is_full(struct mp_stack *stack)
+{
+	return stack->used >= stack->size;
+}
+
+MP_IMPL void
+mp_stack_pop(struct mp_stack *stack)
+{
+	assert(!mp_stack_is_empty(stack));
+	--stack->used;
+}
+
+MP_IMPL void
+mp_stack_push(struct mp_stack *stack, enum mp_type type, uint32_t count)
+{
+	assert(!mp_stack_is_full(stack));
+	uint32_t idx = stack->used++;
+	stack->frames[idx].type = type;
+	stack->frames[idx].count = count;
+	stack->frames[idx].curr = -1;
+}
+
+MP_IMPL enum mp_type
+mp_stack_type(struct mp_stack *stack)
+{
+	assert(!mp_stack_is_empty(stack));
+	return stack->frames[stack->used - 1].type;
+}
+
+MP_IMPL int
+mp_stack_count(struct mp_stack *stack)
+{
+	return stack->frames[stack->used - 1].count;
+}
+
+MP_IMPL int
+mp_stack_advance(struct mp_stack *stack)
+{
+	assert(!mp_stack_is_empty(stack));
+	struct mp_frame *frame = &stack->frames[stack->used - 1];
+	if (frame->curr < frame->count - 1)
+		return ++frame->curr;
+	return -1;
+}
+
 /** \endcond */
 
 /*
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 9265453..884883d 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -30,6 +30,7 @@
  * SUCH DAMAGE.
  */
 
+#include <assert.h>
 #include <string.h>
 #include <stdlib.h>
 #include <inttypes.h>
@@ -764,15 +765,16 @@ test_format(void)
 int
 test_mp_print()
 {
-	plan(10);
+	plan(12);
 	header();
 
 	char msgpack[128];
 	char *d = msgpack;
-	d = mp_encode_array(d, 6);
+	d = mp_encode_array(d, 8);
 	d = mp_encode_int(d, -5);
 	d = mp_encode_uint(d, 42);
 	d = mp_encode_str(d, "kill bill", 9);
+	d = mp_encode_array(d, 0);
 	d = mp_encode_map(d, 6);
 	d = mp_encode_str(d, "bool true", 9);
 	d = mp_encode_bool(d, true);
@@ -791,13 +793,14 @@ test_mp_print()
 	*d++ = 0;
 	char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
 	d = mp_encode_bin(d, bin, sizeof(bin));
+	d = mp_encode_map(d, 0);
 	assert(d <= msgpack + sizeof(msgpack));
 
 	const char *expected =
-		"[-5, 42, \"kill bill\", "
+		"[-5, 42, \"kill bill\", [], "
 		"{\"bool true\": true, \"bool false\": false, \"null\": null, "
 		"\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
-		"\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\"]";
+		"\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
 	int esize = strlen(expected);
 
 	char result[256];
@@ -838,6 +841,36 @@ test_mp_print()
 	int rc = mp_fprint(stdin, msgpack);
 	is(rc, -1, "mp_fprint I/O error");
 
+	/* Test mp_snprint max nesting depth. */
+	int mp_buff_sz = MP_PRINT_MAX_DEPTH * mp_sizeof_array(1) +
+			 mp_sizeof_uint(1);
+	int exp_str_sz = 2 * (MP_PRINT_MAX_DEPTH + 1) + 3 + 1;
+	char *mp_buff = malloc(mp_buff_sz);
+	char *exp_str = malloc(exp_str_sz);
+	char *decoded = malloc(exp_str_sz);
+	char *buff_wptr = mp_buff;
+	char *exp_str_wptr = exp_str;
+	for (int i = 0; i <= 2 * (MP_PRINT_MAX_DEPTH + 1); i++) {
+		int exp_str_rest = exp_str_sz - (exp_str_wptr - exp_str);
+		assert(exp_str_rest > 0);
+		if (i < MP_PRINT_MAX_DEPTH + 1) {
+			buff_wptr = mp_encode_array(buff_wptr, 1);
+			rc = snprintf(exp_str_wptr, exp_str_rest, "[");
+		} else if (i == MP_PRINT_MAX_DEPTH + 1) {
+			buff_wptr = mp_encode_uint(buff_wptr, 1);
+			rc = snprintf(exp_str_wptr, exp_str_rest, "...");
+		} else {
+			rc = snprintf(exp_str_wptr, exp_str_rest, "]");
+		}
+		exp_str_wptr += rc;
+	}
+	assert(exp_str_wptr + 1 == exp_str + exp_str_sz);
+	rc = mp_snprint(decoded, exp_str_sz, mp_buff);
+	ok(rc == exp_str_sz - 1, "mp_snprint max nesting depth return value");
+	ok(strcmp(decoded, exp_str) == 0, "mp_snprint max nesting depth result");
+	free(decoded);
+	free(exp_str);
+	free(mp_buff);
 	footer();
 	return check_plan();
 }
-- 
2.20.1




More information about the Tarantool-patches mailing list