[Tarantool-patches] [PATCH msgpuck 1/2] Return recursion to mp_snprint() and mp_fprint()

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Tue May 12 02:46:01 MSK 2020


The commit partially reverts 5a1d91ae2830d9a1f1201c9026724ae3e32d5324
"Implement mp_stack class". The mp_stack struct is still alive,
but is not used for printing anymore.

Despite mp_stack printer looked nice, it still wasn't good enough
by a couple of reasons:

- On every print it allocated all 32 stack frames;

- It wasn't flexible. The solution stops being so simple, when
  number of states for every stack frame grows drastically. This
  is the case of the future MP_EXT serializer.

Lets elaborate the last problem. MP_EXT serializer in theory can
do 3 possible things:

- Return a plain text ready to print. For example, a decimal
  extension may return a ready-to-print string with a floating
  point number. Or UUID may return a UUID string.

- Return a pointer to non-MP_EXT MessagePack to continue
  serialization by msgpuck built-in means. For example, if an
  object is a mere MP_MAP behind MP_EXT, then the serializer could
  just remove MP_EXT header and let the other part be serialized
  normally.

- In case of a complex object underneath, with non-MessagePack
  elements in it, with some of its MessagePack internals needed to
  be substituted before printing, with necessity to avoid copying
  and recursion (otherwise mp_stack becomes useless anyway), the
  serializer can return a tree-like structure with pointers at
  raw MessagePack, at plain text, at more nested objects of this
  kind. For example, consider this:

  MP_EXT: MP_ERROR: <MP_MAP> {
    MP_ERROR_STACK 0x00: <MP_ARRAY> [
        <MP_MAP> {
            MP_ERROR_TYPE 0x00: <MP_STR>,
            MP_ERROR_FILE 0x01: <MP_STR>,
            MP_ERROR_PAYLOAD 0x02: <MP_MAP> {
                ...
            }
        },
        ...
    ],
    ...
  }

  If this would be printed using the previous way (trim MP_EXT and
  print normally), this would look like this:

  {0: [{0: 'clienterror', 1: 'test.c', 2: {...}}]}

  However it may be needed to serialize it like this:

  {'stack': [{'type': 'clienterror', 'file': 'test.c', 'payload': {...}}]}

  So the original MP_ERROR object should be split into a
  non-binary tree, where nodes can point at plain static data
  (strings 'type', 'file'), at the original MessagePack (payload
  should point at the original object, to avoid copying and
  recursion), at an array or map of these objects (like 'stack').

This runs out of control even quicker, when comes realization,
that all the 3 return ways may return something, what needs
destruction after printing.

However complexity level still would be bearable if the last
option wouldn't make the stack stop being a stack. Indeed,
whatever it returns, if it is not a scalar ready-to-print plain
object, it should be pushed on the stack. When push a tree on the
stack, it stops being a stack. Since we will start working not
with top of the stack.

The commit makes decision to just return recursion into
mp_snprint() and mp_fprint() functions. That allows to use
recursion in future mp_ext serializers as well, and makes to code
more similar to all the other serializers we have.

Part of https://github.com/tarantool/tarantool/issues/4719
---
 msgpuck.c | 133 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 79 insertions(+), 54 deletions(-)

diff --git a/msgpuck.c b/msgpuck.c
index d0ffb83..7ea86e8 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -232,31 +232,28 @@ mp_format(char *data, size_t data_size, const char *format, ...)
 	return res;
 }
 
-#define MP_PRINT(PRINTF) \
+#define MP_PRINT(SELF, PRINTF) \
 {										\
-	struct mp_frame frames[MP_PRINT_MAX_DEPTH];				\
-	struct mp_stack stack;							\
-	mp_stack_create(&stack, MP_PRINT_MAX_DEPTH, frames);			\
-next:										\
-	switch (mp_typeof(*data)) {						\
+	switch (mp_typeof(**data)) {						\
 	case MP_NIL:								\
-		mp_decode_nil(&data);						\
+		mp_decode_nil(data);						\
 		PRINTF("null");							\
 		break;								\
 	case MP_UINT:								\
-		PRINTF("%llu", (unsigned long long) mp_decode_uint(&data));	\
+		PRINTF("%llu", (unsigned long long) mp_decode_uint(data));	\
 		break;								\
 	case MP_INT:								\
-		PRINTF("%lld", (long long) mp_decode_int(&data));		\
+		PRINTF("%lld", (long long) mp_decode_int(data));		\
 		break;								\
 	case MP_STR:								\
 	case MP_BIN:								\
 	{									\
-		uint32_t len = mp_typeof(*data) == MP_STR ?			\
-			mp_decode_strl(&data) : mp_decode_binl(&data);		\
+		uint32_t len = mp_typeof(**data) == MP_STR ?			\
+			mp_decode_strl(data) : mp_decode_binl(data);		\
 		PRINTF("\"");							\
 		const char *s;							\
-		for (s = data; s < data + len; s++) {				\
+		const char *end = *data + len;					\
+		for (s = *data; s < end; s++) {					\
 			unsigned char c = (unsigned char ) *s;			\
 			if (c < 128 && mp_char2escape[c] != NULL) {		\
 				/* Escape character */				\
@@ -266,84 +263,102 @@ next:										\
 			}							\
 		}								\
 		PRINTF("\"");							\
-		data += len;							\
+		*data = end;							\
 		break;								\
 	}									\
 	case MP_ARRAY:								\
+	{									\
+		PRINTF("[");							\
+		if (depth <= 0) {						\
+			PRINTF("...");						\
+			mp_next(data);						\
+		} else {							\
+			--depth;						\
+			uint32_t count = mp_decode_array(data);			\
+			for (uint32_t i = 0; i < count; i++) {			\
+				if (i)						\
+					PRINTF(", ");				\
+				SELF(data);					\
+			}							\
+			++depth;						\
+		}								\
+		PRINTF("]");							\
+		break;								\
+	}									\
 	case MP_MAP:								\
 	{									\
-		enum mp_type type = mp_typeof(*data);				\
-		if (!mp_stack_is_full(&stack)) {				\
-			uint32_t count = type == MP_ARRAY ?			\
-					 mp_decode_array(&data) :		\
-					 2 * mp_decode_map(&data);		\
-			mp_stack_push(&stack, type, count);			\
+		PRINTF("{");							\
+		if (depth <= 0) {						\
+			PRINTF("...");						\
+			mp_next(data);						\
 		} else {							\
-			/*							\
-			* Skip msgpack items that do not			\
-			* fit onto the stack.					\
-			*/							\
-			PRINTF(type == MP_ARRAY ? "[...]" : "{...}");		\
-			mp_next(&data);						\
+			--depth;						\
+			uint32_t count = mp_decode_map(data);			\
+			for (uint32_t i = 0; i < count; i++) {			\
+				if (i)						\
+					PRINTF(", ");				\
+				SELF(data);					\
+				PRINTF(": ");					\
+				SELF(data);					\
+			}							\
+			++depth;						\
 		}								\
+		PRINTF("}");							\
 		break;								\
 	}									\
 	case MP_BOOL:								\
-		PRINTF(mp_decode_bool(&data) ? "true" : "false");		\
+		PRINTF(mp_decode_bool(data) ? "true" : "false");		\
 		break;								\
 	case MP_FLOAT:								\
-		PRINTF("%g", mp_decode_float(&data));				\
+		PRINTF("%g", mp_decode_float(data));				\
 		break;								\
 	case MP_DOUBLE:								\
-		PRINTF("%lg", mp_decode_double(&data));				\
+		PRINTF("%lg", mp_decode_double(data));				\
 		break;								\
 	case MP_EXT:								\
-		mp_next(&data);							\
+		mp_next(data);							\
 		PRINTF("undefined");						\
 		break;								\
 	default:								\
 		mp_unreachable();						\
 		return -1;							\
 	}									\
-	while (!mp_stack_is_empty(&stack)) {					\
-		struct mp_frame *frame = mp_stack_top(&stack);			\
-		if (frame->idx < 0)						\
-			PRINTF(frame->type == MP_ARRAY ? "[" : "{");		\
-		if (!mp_frame_advance(frame)) {					\
-			PRINTF(frame->type == MP_ARRAY ? "]" : "}");		\
-			mp_stack_pop(&stack);					\
-			continue;						\
-		} else if (frame->idx != 0) {					\
-			PRINTF(frame->type == MP_MAP && frame->idx % 2 == 1 ?	\
-			       ": " : ", ");					\
-		}								\
-		goto next;							\
-	}									\
 }
 
-int
-mp_fprint(FILE *file, const char *data)
+static int
+mp_fprint_recursion(FILE *file, const char **data, int depth)
 {
-	if (!file)
-		file = stdout;
 	int total_bytes = 0;
-#define PRINT(...) do {								\
-	int bytes = fprintf(file, __VA_ARGS__);					\
+#define HANDLE(FUN, ...) do {							\
+	int bytes = FUN(file, __VA_ARGS__);					\
 	if (mp_unlikely(bytes < 0))						\
 		return -1;							\
 	total_bytes += bytes;							\
 } while (0)
-	MP_PRINT(PRINT)
+#define PRINT(...) HANDLE(fprintf, __VA_ARGS__)
+#define SELF(...) HANDLE(mp_fprint_recursion, __VA_ARGS__, depth)
+MP_PRINT(SELF, PRINT)
+#undef HANDLE
+#undef SELF
 #undef PRINT
 	return total_bytes;
 }
 
 int
-mp_snprint(char *buf, int size, const char *data)
+mp_fprint(FILE *file, const char *data)
+{
+	if (!file)
+		file = stdout;
+	int res = mp_fprint_recursion(file, &data, MP_PRINT_MAX_DEPTH);
+	return res;
+}
+
+static int
+mp_snprint_recursion(char *buf, int size, const char **data, int depth)
 {
 	int total_bytes = 0;
-#define PRINT(...) do {								\
-	int bytes = snprintf(buf, size, __VA_ARGS__);				\
+#define HANDLE(FUN, ...) do {							\
+	int bytes = FUN(buf, size, __VA_ARGS__);				\
 	if (mp_unlikely(bytes < 0))						\
 		return -1;							\
 	total_bytes += bytes;							\
@@ -356,8 +371,18 @@ mp_snprint(char *buf, int size, const char *data)
 		size = 0;							\
 	}									\
 } while (0)
-	MP_PRINT(PRINT)
+#define PRINT(...) HANDLE(snprintf, __VA_ARGS__)
+#define SELF(...) HANDLE(mp_snprint_recursion, __VA_ARGS__, depth)
+MP_PRINT(SELF, PRINT)
+#undef HANDLE
+#undef SELF
 #undef PRINT
 	return total_bytes;
 }
 #undef MP_PRINT
+
+int
+mp_snprint(char *buf, int size, const char *data)
+{
+	return mp_snprint_recursion(buf, size, &data, MP_PRINT_MAX_DEPTH);
+}
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list