[PATCH v3] Add MsgPack ext types handling.

Serge Petrenko sergepetrenko at tarantool.org
Mon Jun 3 16:14:25 MSK 2019


Add the ability to encode/decode MsgPack ext types.

Needed-for tarantool/tarantool#692
---
https://github.com/tarantool/msgpuck/tree/sp/add-extension-types
https://github.com/tarantool/tarantool/issues/692

Changes in v3:
  - unify api with str/strl and bin/binl
  - add more tests
  - a fix in mp_sizeof_ext()
  - some minor fixes

Changes in v2:
  - fixed and simplified mp_check_ext.
    It was wrong by one byte.
  - only use fixext types when length fits
    exactly (is 1, 2, 4, 8 or 16)
    Otherwise use ext 8

 hints.c        |  24 +++++++
 msgpuck.c      |  20 +++++-
 msgpuck.h      | 181 +++++++++++++++++++++++++++++++++++++++++++++++++
 test/msgpuck.c | 143 +++++++++++++++++++++++++++++++-------
 4 files changed, 342 insertions(+), 26 deletions(-)

diff --git a/hints.c b/hints.c
index be859e9..d9b3fe7 100644
--- a/hints.c
+++ b/hints.c
@@ -672,3 +672,27 @@ const char *mp_char2escape[128] = {
 	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 	NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f"
 };
+
+/*
+ * This lookup table is used by mp_encode_ext() to
+ * determine ext code (fixext 1, fixext 2, fixext 4, fixext 8,
+ * fixext 16) to encode using size.
+ */
+const uint8_t mp_ext_hint[16] = {
+	0xd4,  /*  1 */
+	0xd5,  /*  2 */
+	   0,  /*  3 */
+	0xd6,  /*  4 */
+	   0,  /*  5 */
+	   0,  /*  6 */
+	   0,  /*  7 */
+	0xd7,  /*  8 */
+	   0,  /*  9 */
+	   0,  /* 10 */
+	   0,  /* 11 */
+	   0,  /* 12 */
+	   0,  /* 13 */
+	   0,  /* 14 */
+	   0,  /* 15 */
+	0xd8   /* 16 */
+};
diff --git a/msgpuck.c b/msgpuck.c
index d0ffb83..e5f7e25 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -33,6 +33,17 @@
 #define MP_LIBRARY 1
 #include "msgpuck.h"
 
+const char *
+mp_ext_type_str(enum mp_ext_type type)
+{
+	switch(type) {
+	case MP_EXT_TIMESTAMP:
+		return "timestamp";
+	default:
+		return "undefined";
+	}
+}
+
 size_t
 mp_vformat(char *data, size_t data_size, const char *format, va_list vl)
 {
@@ -298,9 +309,14 @@ next:										\
 		PRINTF("%lg", mp_decode_double(&data));				\
 		break;								\
 	case MP_EXT:								\
-		mp_next(&data);							\
-		PRINTF("undefined");						\
+	{									\
+		int8_t type;							\
+		uint32_t len;							\
+		len = mp_decode_extl(&data, &type);				\
+		PRINTF("%s", mp_ext_type_str(type));	    			\
+		data += len;							\
 		break;								\
+	}									\
 	default:								\
 		mp_unreachable();						\
 		return -1;							\
diff --git a/msgpuck.h b/msgpuck.h
index 6b29cd6..12ae9c1 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -382,6 +382,13 @@ enum mp_type {
 	MP_EXT
 };
 
+/**
+ * \brief MsgPack extension data types.
+ */
+enum mp_ext_type {
+	MP_EXT_TIMESTAMP = -1
+};
+
 /**
  * \brief Determine MsgPack type by a first byte \a c of encoded data.
  *
@@ -532,6 +539,81 @@ mp_check_map(const char *cur, const char *end);
 MP_PROTO uint32_t
 mp_decode_map(const char **data);
 
+/**
+ * \brief calculate exact buffer size needed to store
+ * ext header for a value of length \a len.
+ * \param len value length in bytes.
+ * \retval buffer size in bytes
+ */
+MP_PROTO uint32_t
+mp_sizeof_extl(uint32_t len);
+
+/**
+ * \brief Equivalent to mp_sizeof_extl(\a len) + \a len.
+ * \param len - a extension data length
+ * \return size in chars (max is 6 + \a len)
+ */
+MP_PROTO uint32_t
+mp_sizeof_ext(uint32_t len);
+
+/**
+ * \brief Encode extension header with \a type and
+ * value length \a len.
+ * The value must be encoded after the header.
+ * \return \a data + \link mp_sizeof_extl() mp_sizeof_extl(size)\endlink
+ */
+MP_PROTO char *
+mp_encode_extl(char *data, int8_t type, uint32_t len);
+
+/**
+ * \brief Encode extension data of length \a len.
+ * The function is equivalent to mp_encode_extl() + memcpy.
+ * \param data - a buffer
+ * \param ext_data - a pointer to extension data
+ * \param type - extension type to encode
+ * \param len - a extension data length
+ * \return \a data + mp_sizeof_ext(\a len) ==
+ * data + mp_sizeof_extl(\a len) + \a len
+ * \sa mp_encode_strl
+ */
+MP_PROTO char *
+mp_encode_ext(char *data, char *ext_data, int8_t type, uint32_t len);
+
+/**
+ * \brief Check that \a cur buffer has enough bytes to decode an ext header.
+ * \param cur buffer
+ * \param end end of the buffer
+ * \retval 0 - buffer has enough bytes
+ * \retval > 0 - the numbeer of remaining bytes to read
+ * \pre cur < end
+ * \pre mp_typeof(*cur) == MP_EXT
+ */
+MP_PROTO MP_PURE ptrdiff_t
+mp_check_extl(const char *cur, const char *end);
+
+/**
+ * \brief Decode an extension header from MsgPack \a data.
+ *
+ * The extension type value must be decoded after the header.
+ * \param data - the pointer to a buffer.
+ * \param type - decoded type of the following value.
+ * \retval - the length of the following ext value.
+ * \post *data = *data + mp_sizeof_extl(length)
+ */
+MP_PROTO uint32_t
+mp_decode_extl(const char **data, int8_t *type);
+
+/**
+ * \brief Decode an extension from MsgPack \a data
+ * \param data - the pointer to a buffer
+ * \param type - the pointer to save extension type
+ * \param len - the pointer to save extension data length
+ * \return a pointer to decoded extension data
+ * \post *data = *data + mp_sizeof_ext(*len)
+ */
+MP_PROTO const char *
+mp_decode_ext(const char **data, int8_t *type, uint32_t *len);
+
 /**
  * \brief Calculate exact buffer size needed to store an integer \a num.
  * Maximum return value is 9. For performance reasons you can preallocate
@@ -1326,6 +1408,7 @@ mp_frame_advance(struct mp_frame *frame);
 extern const enum mp_type mp_type_hint[];
 extern const int8_t mp_parser_hint[];
 extern const char *mp_char2escape[];
+extern const uint8_t mp_ext_hint[];
 
 MP_IMPL MP_ALWAYSINLINE enum mp_type
 mp_typeof(const char c)
@@ -1462,6 +1545,104 @@ mp_decode_map(const char **data)
 	}
 }
 
+MP_IMPL uint32_t
+mp_sizeof_extl(uint32_t len)
+{
+	if (len && len <= 16 && mp_ext_hint[len-1]) return 2;
+	if (len <= UINT8_MAX) return 3;
+	if (len <= UINT16_MAX) return 4;
+	else return 6;
+}
+
+MP_IMPL uint32_t
+mp_sizeof_ext(uint32_t len)
+{
+	return mp_sizeof_extl(len) + len;
+}
+
+MP_IMPL char *
+mp_encode_extl(char *data, int8_t type, uint32_t len)
+{
+	/*
+	 * Only use fixext when length is exactly 1, 2, 4, 8 or 16.
+	 * Otherwise use ext 8 if length <= 255.
+	 */
+	if (len && len <= 16 && mp_ext_hint[len-1]) {
+		data = mp_store_u8(data, mp_ext_hint[len-1]);
+	} else if (len <= UINT8_MAX) {
+		data = mp_store_u8(data, 0xc7);
+		data = mp_store_u8(data, (uint8_t) len);
+	} else if (len <= UINT16_MAX) {
+		data = mp_store_u8(data, 0xc8);
+		data = mp_store_u16(data, (uint16_t) len);
+	} else {
+		data = mp_store_u8(data, 0xc9);
+		data = mp_store_u32(data,len);
+	}
+	data = mp_store_u8(data, type);
+	return data;
+}
+
+MP_IMPL char *
+mp_encode_ext(char *data, char *ext_data, int8_t type, uint32_t len)
+{
+	data = mp_encode_extl(data, type, len);
+	memcpy(data, ext_data, len);
+	return data + len;
+}
+
+MP_IMPL ptrdiff_t
+mp_check_extl(const char *cur, const char *end)
+{
+	assert(cur < end);
+	assert(mp_typeof(*cur) == MP_EXT);
+	uint8_t c = mp_load_u8(&cur);
+	if ((c & 0xf0) == 0xd0) {
+		return 1 - (end - cur);
+	}
+
+	assert(c >= 0xc7 && c <= 0xc9);
+	return (1 << (c - 0xc7)) + 1 - (end - cur); /* 0xc7 -> 2, 0xc8 -> 3, 0xc9 ->5 */
+}
+
+MP_IMPL uint32_t
+mp_decode_extl(const char **data, int8_t *type) {
+	uint8_t c = mp_load_u8(data);
+	uint32_t len;
+	switch 	 (c) {
+	case 0xd4:
+	case 0xd5:
+	case 0xd6:
+	case 0xd7:
+	case 0xd8:
+		len = 1u << (c - 0xd4);
+		break;
+	case 0xc7:
+		len = mp_load_u8(data);
+		break;
+	case 0xc8:
+		len = mp_load_u16(data);
+		break;
+	case 0xc9:
+		len = mp_load_u32(data);
+		break;
+	default:
+		mp_unreachable();
+	}
+	*type = mp_load_u8(data);
+	return len;
+}
+
+MP_IMPL const char *
+mp_decode_ext(const char **data, int8_t *type, uint32_t *len) {
+	assert(len != NULL);
+
+	*len = mp_decode_extl(data, type);
+	const char *ext_data = *data;
+	*data += *len;
+	return ext_data;
+}
+
 MP_IMPL uint32_t
 mp_sizeof_uint(uint64_t num)
 {
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 3b31bfc..c2aa70f 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -53,14 +53,17 @@ static char *data = buf + 1; /* use unaligned address to fail early */
 #define SCALAR(x) x
 #define COMPLEX(x)
 
-#define DEFINE_TEST(_type, _complex, _v, _r, _rl) ({                           \
-	const char *d1 = mp_encode_##_type(data, (_v));                        \
+#define COMMA ,
+
+#define DEFINE_TEST(_type, _complex, _ext, _v, _r, _rl) ({		       \
+	_ext(int8_t ext_type = 0);					       \
+	const char *d1 = mp_encode_##_type(data, _ext(ext_type COMMA) (_v));   \
 	const char *d2 = data;                                                 \
 	_complex(const char *d3 = data);                                       \
 	_complex(const char *d4 = data);                                       \
 	note(""#_type" "#_v"");                                                \
 	is(mp_check_##_type(data, d1), 0, "mp_check_"#_type"("#_v") == 0");    \
-	is(mp_decode_##_type(&d2), (_v), "mp_decode(mp_encode("#_v")) == "#_v);\
+	is(mp_decode_##_type(&d2 _ext(COMMA &ext_type)), (_v), "mp_decode(mp_encode("#_v")) == "#_v);\
 	_complex(mp_next(&d3));                                                \
 	_complex(ok(!mp_check(&d4, d3 + _rl), "mp_check("#_v")"));             \
 	is((d1 - data), (_rl), "len(mp_encode_"#_type"("#_v")");               \
@@ -72,20 +75,23 @@ static char *data = buf + 1; /* use unaligned address to fail early */
 	})
 
 
-#define DEFINE_TEST_STRBIN(_type, _vl) ({                                      \
+#define DEFINE_TEST_STRBINEXT(_type, _not_ext, _ext, _vl) ({		       \
 	note(""#_type" len="#_vl"");                                           \
 	char *s1 = str;                                                        \
 	for (uint32_t i = 0; i < _vl; i++) {                                   \
 		s1[i] = 'a' + i % 26;                                          \
-	}                                                                      \
-	const char *d1 = mp_encode_##_type(data, s1, _vl);                     \
+	}								       \
+	_ext(int8_t ext_type = 0);					       \
+	const char *d1 = mp_encode_##_type(data, s1, _ext(ext_type COMMA) _vl);\
 	const char *d2;                                                        \
 	uint32_t len2;                                                         \
 	d2 = data;                                                             \
-	const char *s2 = mp_decode_##_type(&d2, &len2);                        \
+	const char *s2 = mp_decode_##_type(&d2, _ext(&ext_type COMMA) &len2);  \
 	is(_vl, len2, "len(mp_decode_"#_type"(x, %u))", _vl);                  \
+	_ext(is(ext_type, 0, "type(mp_decode_"#_type"(x))"));		       \
 	d2 = data;                                                             \
-	(void) mp_decode_strbin(&d2, &len2);                                   \
+	_not_ext((void) mp_decode_strbin(&d2, &len2));                         \
+	_ext((void) mp_decode_ext(&d2, &ext_type, &len2));		       \
 	is(_vl, len2, "len(mp_decode_strbin(x, %u))", _vl);                    \
 	const char *d3 = data;                                                 \
 	mp_next(&d3);                                                          \
@@ -100,17 +106,19 @@ static char *data = buf + 1; /* use unaligned address to fail early */
 	is(memcmp(s1, s2, _vl), 0, "mp_encode_"#_type"(x, "#_vl") == x");      \
 })
 
-#define test_uint(...)   DEFINE_TEST(uint, SCALAR, __VA_ARGS__)
-#define test_int(...)    DEFINE_TEST(int, SCALAR, __VA_ARGS__)
-#define test_bool(...)   DEFINE_TEST(bool, SCALAR, __VA_ARGS__)
-#define test_float(...)  DEFINE_TEST(float, SCALAR, __VA_ARGS__)
-#define test_double(...) DEFINE_TEST(double, SCALAR, __VA_ARGS__)
-#define test_strl(...)   DEFINE_TEST(strl, COMPLEX, __VA_ARGS__)
-#define test_binl(...)   DEFINE_TEST(binl, COMPLEX, __VA_ARGS__)
-#define test_array(...)  DEFINE_TEST(array, COMPLEX, __VA_ARGS__)
-#define test_map(...)    DEFINE_TEST(map, COMPLEX, __VA_ARGS__)
-#define test_str(...)    DEFINE_TEST_STRBIN(str, __VA_ARGS__)
-#define test_bin(...)    DEFINE_TEST_STRBIN(bin, __VA_ARGS__)
+#define test_uint(...)   DEFINE_TEST(uint, SCALAR, COMPLEX, __VA_ARGS__)
+#define test_int(...)    DEFINE_TEST(int, SCALAR, COMPLEX, __VA_ARGS__)
+#define test_bool(...)   DEFINE_TEST(bool, SCALAR, COMPLEX, __VA_ARGS__)
+#define test_float(...)  DEFINE_TEST(float, SCALAR, COMPLEX, __VA_ARGS__)
+#define test_double(...) DEFINE_TEST(double, SCALAR, COMPLEX, __VA_ARGS__)
+#define test_strl(...)   DEFINE_TEST(strl, COMPLEX, COMPLEX, __VA_ARGS__)
+#define test_binl(...)   DEFINE_TEST(binl, COMPLEX, COMPLEX, __VA_ARGS__)
+#define test_extl(...)	 DEFINE_TEST(extl, COMPLEX, SCALAR, __VA_ARGS__)
+#define test_array(...)  DEFINE_TEST(array, COMPLEX, COMPLEX, __VA_ARGS__)
+#define test_map(...)    DEFINE_TEST(map, COMPLEX, COMPLEX, __VA_ARGS__)
+#define test_str(...)    DEFINE_TEST_STRBINEXT(str, SCALAR, COMPLEX, __VA_ARGS__)
+#define test_bin(...)    DEFINE_TEST_STRBINEXT(bin, SCALAR, COMPLEX, __VA_ARGS__)
+#define test_ext(...)	 DEFINE_TEST_STRBINEXT(ext, COMPLEX, SCALAR, __VA_ARGS__)
 
 static int
 test_uints(void)
@@ -343,6 +351,53 @@ test_binls(void)
 	return check_plan();
 }
 
+static int
+test_extls(void)
+{
+	plan(168);
+	header();
+
+	/* fixext 1,2,4,8,16 */
+	test_extl(0x01U, "\xd4\x00", 2);
+	test_extl(0x02U, "\xd5\x00", 2);
+	test_extl(0x04U, "\xd6\x00", 2);
+	test_extl(0x08U, "\xd7\x00", 2);
+	test_extl(0x10U, "\xd8\x00", 2);
+
+	/* ext 8 */
+	test_extl(0x11U, "\xc7\x11\x00", 3);
+	test_extl(0xfeU, "\xc7\xfe\x00", 3);
+	test_extl(0xffU, "\xc7\xff\x00", 3);
+
+	test_extl(0x00U, "\xc7\x00\x00", 3);
+	test_extl(0x03U, "\xc7\x03\x00", 3);
+	test_extl(0x05U, "\xc7\x05\x00", 3);
+	test_extl(0x06U, "\xc7\x06\x00", 3);
+	test_extl(0x07U, "\xc7\x07\x00", 3);
+	test_extl(0x09U, "\xc7\x09\x00", 3);
+	test_extl(0x0aU, "\xc7\x0a\x00", 3);
+	test_extl(0x0bU, "\xc7\x0b\x00", 3);
+	test_extl(0x0cU, "\xc7\x0c\x00", 3);
+	test_extl(0x0dU, "\xc7\x0d\x00", 3);
+	test_extl(0x0eU, "\xc7\x0e\x00", 3);
+	test_extl(0x0fU, "\xc7\x0f\x00", 3);
+
+	/* ext 16 */
+	test_extl(0x0100U, "\xc8\x01\x00\x00", 4);
+	test_extl(0x0101U, "\xc8\x01\x01\x00", 4);
+	test_extl(0xfffeU, "\xc8\xff\xfe\x00", 4);
+	test_extl(0xffffU, "\xc8\xff\xff\x00", 4);
+
+	/* ext 32 */
+	test_extl(0x00010000U, "\xc9\x00\x01\x00\x00\x00", 6);
+	test_extl(0x00010001U, "\xc9\x00\x01\x00\x01\x00", 6);
+	test_extl(0xfffffffeU, "\xc9\xff\xff\xff\xfe\x00", 6);
+	test_extl(0xffffffffU, "\xc9\xff\xff\xff\xff\x00", 6);
+
+	footer();
+	return check_plan();
+}
+
 static int
 test_strs(void)
 {
@@ -389,6 +444,45 @@ test_bins(void)
 	return check_plan();
 }
 
+static int
+test_exts(void)
+{
+	plan(225);
+	header();
+
+	test_ext(0x01);
+	test_ext(0x02);
+	test_ext(0x03);
+	test_ext(0x04);
+	test_ext(0x05);
+	test_ext(0x06);
+	test_ext(0x07);
+	test_ext(0x08);
+	test_ext(0x09);
+	test_ext(0x0a);
+	test_ext(0x0b);
+	test_ext(0x0c);
+	test_ext(0x0d);
+	test_ext(0x0e);
+	test_ext(0x0f);
+	test_ext(0x10);
+
+	test_ext(0x11);
+	test_ext(0xfe);
+	test_ext(0xff);
+
+	test_ext(0x0100);
+	test_ext(0x0101);
+	test_ext(0xfffe);
+	test_ext(0xffff);
+
+	test_ext(0x00010000);
+	test_ext(0x00010001);
+
+	footer();
+	return check_plan();
+}
+
 static void
 test_next_on_array(uint32_t count)
 {
@@ -789,9 +883,8 @@ test_mp_print()
 	d = mp_encode_double(d, 3.14);
 	d = mp_encode_uint(d, 100);
 	d = mp_encode_uint(d, 500);
-	*d++ = 0xd4; /* let's pack smallest fixed ext */
-	*d++ = 0;
-	*d++ = 0;
+	/* let's pack zero-length ext */
+	d = mp_encode_extl(d, MP_EXT_TIMESTAMP, 0);
 	char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
 	d = mp_encode_bin(d, bin, sizeof(bin));
 	d = mp_encode_map(d, 0);
@@ -800,7 +893,7 @@ test_mp_print()
 	const char *expected =
 		"[-5, 42, \"kill bill\", [], "
 		"{\"bool true\": true, \"bool false\": false, \"null\": null, "
-		"\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
+		"\"float\": 3.14, \"double\": 3.14, 100: 500}, timestamp, "
 		"\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
 	int esize = strlen(expected);
 
@@ -1133,7 +1226,7 @@ test_overflow()
 
 int main()
 {
-	plan(20);
+	plan(22);
 	test_uints();
 	test_ints();
 	test_bools();
@@ -1142,8 +1235,10 @@ int main()
 	test_nils();
 	test_strls();
 	test_binls();
+	test_extls();
 	test_strs();
 	test_bins();
+	test_exts();
 	test_arrays();
 	test_maps();
 	test_next_on_arrays();
-- 
2.20.1 (Apple Git-117)




More information about the Tarantool-patches mailing list