From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Serge Petrenko Subject: [PATCH v3] Add MsgPack ext types handling. Date: Mon, 3 Jun 2019 16:14:25 +0300 Message-Id: <20190603131425.29405-1-sergepetrenko@tarantool.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit To: vdavydov.dev@gmail.com Cc: tarantool-patches@freelists.org, georgy@tarantool.org, kostja@tarantool.org, Serge Petrenko List-ID: Add the ability to encode/decode MsgPack ext types. Needed-for tarantool/tarantool#692 --- https://github.com/tarantool/msgpuck/tree/sp/add-extension-types https://github.com/tarantool/tarantool/issues/692 Changes in v3: - unify api with str/strl and bin/binl - add more tests - a fix in mp_sizeof_ext() - some minor fixes Changes in v2: - fixed and simplified mp_check_ext. It was wrong by one byte. - only use fixext types when length fits exactly (is 1, 2, 4, 8 or 16) Otherwise use ext 8 hints.c | 24 +++++++ msgpuck.c | 20 +++++- msgpuck.h | 181 +++++++++++++++++++++++++++++++++++++++++++++++++ test/msgpuck.c | 143 +++++++++++++++++++++++++++++++------- 4 files changed, 342 insertions(+), 26 deletions(-) diff --git a/hints.c b/hints.c index be859e9..d9b3fe7 100644 --- a/hints.c +++ b/hints.c @@ -672,3 +672,27 @@ const char *mp_char2escape[128] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f" }; + +/* + * This lookup table is used by mp_encode_ext() to + * determine ext code (fixext 1, fixext 2, fixext 4, fixext 8, + * fixext 16) to encode using size. + */ +const uint8_t mp_ext_hint[16] = { + 0xd4, /* 1 */ + 0xd5, /* 2 */ + 0, /* 3 */ + 0xd6, /* 4 */ + 0, /* 5 */ + 0, /* 6 */ + 0, /* 7 */ + 0xd7, /* 8 */ + 0, /* 9 */ + 0, /* 10 */ + 0, /* 11 */ + 0, /* 12 */ + 0, /* 13 */ + 0, /* 14 */ + 0, /* 15 */ + 0xd8 /* 16 */ +}; diff --git a/msgpuck.c b/msgpuck.c index d0ffb83..e5f7e25 100644 --- a/msgpuck.c +++ b/msgpuck.c @@ -33,6 +33,17 @@ #define MP_LIBRARY 1 #include "msgpuck.h" +const char * +mp_ext_type_str(enum mp_ext_type type) +{ + switch(type) { + case MP_EXT_TIMESTAMP: + return "timestamp"; + default: + return "undefined"; + } +} + size_t mp_vformat(char *data, size_t data_size, const char *format, va_list vl) { @@ -298,9 +309,14 @@ next: \ PRINTF("%lg", mp_decode_double(&data)); \ break; \ case MP_EXT: \ - mp_next(&data); \ - PRINTF("undefined"); \ + { \ + int8_t type; \ + uint32_t len; \ + len = mp_decode_extl(&data, &type); \ + PRINTF("%s", mp_ext_type_str(type)); \ + data += len; \ break; \ + } \ default: \ mp_unreachable(); \ return -1; \ diff --git a/msgpuck.h b/msgpuck.h index 6b29cd6..12ae9c1 100644 --- a/msgpuck.h +++ b/msgpuck.h @@ -382,6 +382,13 @@ enum mp_type { MP_EXT }; +/** + * \brief MsgPack extension data types. + */ +enum mp_ext_type { + MP_EXT_TIMESTAMP = -1 +}; + /** * \brief Determine MsgPack type by a first byte \a c of encoded data. * @@ -532,6 +539,81 @@ mp_check_map(const char *cur, const char *end); MP_PROTO uint32_t mp_decode_map(const char **data); +/** + * \brief calculate exact buffer size needed to store + * ext header for a value of length \a len. + * \param len value length in bytes. + * \retval buffer size in bytes + */ +MP_PROTO uint32_t +mp_sizeof_extl(uint32_t len); + +/** + * \brief Equivalent to mp_sizeof_extl(\a len) + \a len. + * \param len - a extension data length + * \return size in chars (max is 6 + \a len) + */ +MP_PROTO uint32_t +mp_sizeof_ext(uint32_t len); + +/** + * \brief Encode extension header with \a type and + * value length \a len. + * The value must be encoded after the header. + * \return \a data + \link mp_sizeof_extl() mp_sizeof_extl(size)\endlink + */ +MP_PROTO char * +mp_encode_extl(char *data, int8_t type, uint32_t len); + +/** + * \brief Encode extension data of length \a len. + * The function is equivalent to mp_encode_extl() + memcpy. + * \param data - a buffer + * \param ext_data - a pointer to extension data + * \param type - extension type to encode + * \param len - a extension data length + * \return \a data + mp_sizeof_ext(\a len) == + * data + mp_sizeof_extl(\a len) + \a len + * \sa mp_encode_strl + */ +MP_PROTO char * +mp_encode_ext(char *data, char *ext_data, int8_t type, uint32_t len); + +/** + * \brief Check that \a cur buffer has enough bytes to decode an ext header. + * \param cur buffer + * \param end end of the buffer + * \retval 0 - buffer has enough bytes + * \retval > 0 - the numbeer of remaining bytes to read + * \pre cur < end + * \pre mp_typeof(*cur) == MP_EXT + */ +MP_PROTO MP_PURE ptrdiff_t +mp_check_extl(const char *cur, const char *end); + +/** + * \brief Decode an extension header from MsgPack \a data. + * + * The extension type value must be decoded after the header. + * \param data - the pointer to a buffer. + * \param type - decoded type of the following value. + * \retval - the length of the following ext value. + * \post *data = *data + mp_sizeof_extl(length) + */ +MP_PROTO uint32_t +mp_decode_extl(const char **data, int8_t *type); + +/** + * \brief Decode an extension from MsgPack \a data + * \param data - the pointer to a buffer + * \param type - the pointer to save extension type + * \param len - the pointer to save extension data length + * \return a pointer to decoded extension data + * \post *data = *data + mp_sizeof_ext(*len) + */ +MP_PROTO const char * +mp_decode_ext(const char **data, int8_t *type, uint32_t *len); + /** * \brief Calculate exact buffer size needed to store an integer \a num. * Maximum return value is 9. For performance reasons you can preallocate @@ -1326,6 +1408,7 @@ mp_frame_advance(struct mp_frame *frame); extern const enum mp_type mp_type_hint[]; extern const int8_t mp_parser_hint[]; extern const char *mp_char2escape[]; +extern const uint8_t mp_ext_hint[]; MP_IMPL MP_ALWAYSINLINE enum mp_type mp_typeof(const char c) @@ -1462,6 +1545,104 @@ mp_decode_map(const char **data) } } +MP_IMPL uint32_t +mp_sizeof_extl(uint32_t len) +{ + if (len && len <= 16 && mp_ext_hint[len-1]) return 2; + if (len <= UINT8_MAX) return 3; + if (len <= UINT16_MAX) return 4; + else return 6; +} + +MP_IMPL uint32_t +mp_sizeof_ext(uint32_t len) +{ + return mp_sizeof_extl(len) + len; +} + +MP_IMPL char * +mp_encode_extl(char *data, int8_t type, uint32_t len) +{ + /* + * Only use fixext when length is exactly 1, 2, 4, 8 or 16. + * Otherwise use ext 8 if length <= 255. + */ + if (len && len <= 16 && mp_ext_hint[len-1]) { + data = mp_store_u8(data, mp_ext_hint[len-1]); + } else if (len <= UINT8_MAX) { + data = mp_store_u8(data, 0xc7); + data = mp_store_u8(data, (uint8_t) len); + } else if (len <= UINT16_MAX) { + data = mp_store_u8(data, 0xc8); + data = mp_store_u16(data, (uint16_t) len); + } else { + data = mp_store_u8(data, 0xc9); + data = mp_store_u32(data,len); + } + data = mp_store_u8(data, type); + return data; +} + +MP_IMPL char * +mp_encode_ext(char *data, char *ext_data, int8_t type, uint32_t len) +{ + data = mp_encode_extl(data, type, len); + memcpy(data, ext_data, len); + return data + len; +} + +MP_IMPL ptrdiff_t +mp_check_extl(const char *cur, const char *end) +{ + assert(cur < end); + assert(mp_typeof(*cur) == MP_EXT); + uint8_t c = mp_load_u8(&cur); + if ((c & 0xf0) == 0xd0) { + return 1 - (end - cur); + } + + assert(c >= 0xc7 && c <= 0xc9); + return (1 << (c - 0xc7)) + 1 - (end - cur); /* 0xc7 -> 2, 0xc8 -> 3, 0xc9 ->5 */ +} + +MP_IMPL uint32_t +mp_decode_extl(const char **data, int8_t *type) { + uint8_t c = mp_load_u8(data); + uint32_t len; + switch (c) { + case 0xd4: + case 0xd5: + case 0xd6: + case 0xd7: + case 0xd8: + len = 1u << (c - 0xd4); + break; + case 0xc7: + len = mp_load_u8(data); + break; + case 0xc8: + len = mp_load_u16(data); + break; + case 0xc9: + len = mp_load_u32(data); + break; + default: + mp_unreachable(); + } + *type = mp_load_u8(data); + return len; +} + +MP_IMPL const char * +mp_decode_ext(const char **data, int8_t *type, uint32_t *len) { + assert(len != NULL); + + *len = mp_decode_extl(data, type); + const char *ext_data = *data; + *data += *len; + return ext_data; +} + MP_IMPL uint32_t mp_sizeof_uint(uint64_t num) { diff --git a/test/msgpuck.c b/test/msgpuck.c index 3b31bfc..c2aa70f 100644 --- a/test/msgpuck.c +++ b/test/msgpuck.c @@ -53,14 +53,17 @@ static char *data = buf + 1; /* use unaligned address to fail early */ #define SCALAR(x) x #define COMPLEX(x) -#define DEFINE_TEST(_type, _complex, _v, _r, _rl) ({ \ - const char *d1 = mp_encode_##_type(data, (_v)); \ +#define COMMA , + +#define DEFINE_TEST(_type, _complex, _ext, _v, _r, _rl) ({ \ + _ext(int8_t ext_type = 0); \ + const char *d1 = mp_encode_##_type(data, _ext(ext_type COMMA) (_v)); \ const char *d2 = data; \ _complex(const char *d3 = data); \ _complex(const char *d4 = data); \ note(""#_type" "#_v""); \ is(mp_check_##_type(data, d1), 0, "mp_check_"#_type"("#_v") == 0"); \ - is(mp_decode_##_type(&d2), (_v), "mp_decode(mp_encode("#_v")) == "#_v);\ + is(mp_decode_##_type(&d2 _ext(COMMA &ext_type)), (_v), "mp_decode(mp_encode("#_v")) == "#_v);\ _complex(mp_next(&d3)); \ _complex(ok(!mp_check(&d4, d3 + _rl), "mp_check("#_v")")); \ is((d1 - data), (_rl), "len(mp_encode_"#_type"("#_v")"); \ @@ -72,20 +75,23 @@ static char *data = buf + 1; /* use unaligned address to fail early */ }) -#define DEFINE_TEST_STRBIN(_type, _vl) ({ \ +#define DEFINE_TEST_STRBINEXT(_type, _not_ext, _ext, _vl) ({ \ note(""#_type" len="#_vl""); \ char *s1 = str; \ for (uint32_t i = 0; i < _vl; i++) { \ s1[i] = 'a' + i % 26; \ - } \ - const char *d1 = mp_encode_##_type(data, s1, _vl); \ + } \ + _ext(int8_t ext_type = 0); \ + const char *d1 = mp_encode_##_type(data, s1, _ext(ext_type COMMA) _vl);\ const char *d2; \ uint32_t len2; \ d2 = data; \ - const char *s2 = mp_decode_##_type(&d2, &len2); \ + const char *s2 = mp_decode_##_type(&d2, _ext(&ext_type COMMA) &len2); \ is(_vl, len2, "len(mp_decode_"#_type"(x, %u))", _vl); \ + _ext(is(ext_type, 0, "type(mp_decode_"#_type"(x))")); \ d2 = data; \ - (void) mp_decode_strbin(&d2, &len2); \ + _not_ext((void) mp_decode_strbin(&d2, &len2)); \ + _ext((void) mp_decode_ext(&d2, &ext_type, &len2)); \ is(_vl, len2, "len(mp_decode_strbin(x, %u))", _vl); \ const char *d3 = data; \ mp_next(&d3); \ @@ -100,17 +106,19 @@ static char *data = buf + 1; /* use unaligned address to fail early */ is(memcmp(s1, s2, _vl), 0, "mp_encode_"#_type"(x, "#_vl") == x"); \ }) -#define test_uint(...) DEFINE_TEST(uint, SCALAR, __VA_ARGS__) -#define test_int(...) DEFINE_TEST(int, SCALAR, __VA_ARGS__) -#define test_bool(...) DEFINE_TEST(bool, SCALAR, __VA_ARGS__) -#define test_float(...) DEFINE_TEST(float, SCALAR, __VA_ARGS__) -#define test_double(...) DEFINE_TEST(double, SCALAR, __VA_ARGS__) -#define test_strl(...) DEFINE_TEST(strl, COMPLEX, __VA_ARGS__) -#define test_binl(...) DEFINE_TEST(binl, COMPLEX, __VA_ARGS__) -#define test_array(...) DEFINE_TEST(array, COMPLEX, __VA_ARGS__) -#define test_map(...) DEFINE_TEST(map, COMPLEX, __VA_ARGS__) -#define test_str(...) DEFINE_TEST_STRBIN(str, __VA_ARGS__) -#define test_bin(...) DEFINE_TEST_STRBIN(bin, __VA_ARGS__) +#define test_uint(...) DEFINE_TEST(uint, SCALAR, COMPLEX, __VA_ARGS__) +#define test_int(...) DEFINE_TEST(int, SCALAR, COMPLEX, __VA_ARGS__) +#define test_bool(...) DEFINE_TEST(bool, SCALAR, COMPLEX, __VA_ARGS__) +#define test_float(...) DEFINE_TEST(float, SCALAR, COMPLEX, __VA_ARGS__) +#define test_double(...) DEFINE_TEST(double, SCALAR, COMPLEX, __VA_ARGS__) +#define test_strl(...) DEFINE_TEST(strl, COMPLEX, COMPLEX, __VA_ARGS__) +#define test_binl(...) DEFINE_TEST(binl, COMPLEX, COMPLEX, __VA_ARGS__) +#define test_extl(...) DEFINE_TEST(extl, COMPLEX, SCALAR, __VA_ARGS__) +#define test_array(...) DEFINE_TEST(array, COMPLEX, COMPLEX, __VA_ARGS__) +#define test_map(...) DEFINE_TEST(map, COMPLEX, COMPLEX, __VA_ARGS__) +#define test_str(...) DEFINE_TEST_STRBINEXT(str, SCALAR, COMPLEX, __VA_ARGS__) +#define test_bin(...) DEFINE_TEST_STRBINEXT(bin, SCALAR, COMPLEX, __VA_ARGS__) +#define test_ext(...) DEFINE_TEST_STRBINEXT(ext, COMPLEX, SCALAR, __VA_ARGS__) static int test_uints(void) @@ -343,6 +351,53 @@ test_binls(void) return check_plan(); } +static int +test_extls(void) +{ + plan(168); + header(); + + /* fixext 1,2,4,8,16 */ + test_extl(0x01U, "\xd4\x00", 2); + test_extl(0x02U, "\xd5\x00", 2); + test_extl(0x04U, "\xd6\x00", 2); + test_extl(0x08U, "\xd7\x00", 2); + test_extl(0x10U, "\xd8\x00", 2); + + /* ext 8 */ + test_extl(0x11U, "\xc7\x11\x00", 3); + test_extl(0xfeU, "\xc7\xfe\x00", 3); + test_extl(0xffU, "\xc7\xff\x00", 3); + + test_extl(0x00U, "\xc7\x00\x00", 3); + test_extl(0x03U, "\xc7\x03\x00", 3); + test_extl(0x05U, "\xc7\x05\x00", 3); + test_extl(0x06U, "\xc7\x06\x00", 3); + test_extl(0x07U, "\xc7\x07\x00", 3); + test_extl(0x09U, "\xc7\x09\x00", 3); + test_extl(0x0aU, "\xc7\x0a\x00", 3); + test_extl(0x0bU, "\xc7\x0b\x00", 3); + test_extl(0x0cU, "\xc7\x0c\x00", 3); + test_extl(0x0dU, "\xc7\x0d\x00", 3); + test_extl(0x0eU, "\xc7\x0e\x00", 3); + test_extl(0x0fU, "\xc7\x0f\x00", 3); + + /* ext 16 */ + test_extl(0x0100U, "\xc8\x01\x00\x00", 4); + test_extl(0x0101U, "\xc8\x01\x01\x00", 4); + test_extl(0xfffeU, "\xc8\xff\xfe\x00", 4); + test_extl(0xffffU, "\xc8\xff\xff\x00", 4); + + /* ext 32 */ + test_extl(0x00010000U, "\xc9\x00\x01\x00\x00\x00", 6); + test_extl(0x00010001U, "\xc9\x00\x01\x00\x01\x00", 6); + test_extl(0xfffffffeU, "\xc9\xff\xff\xff\xfe\x00", 6); + test_extl(0xffffffffU, "\xc9\xff\xff\xff\xff\x00", 6); + + footer(); + return check_plan(); +} + static int test_strs(void) { @@ -389,6 +444,45 @@ test_bins(void) return check_plan(); } +static int +test_exts(void) +{ + plan(225); + header(); + + test_ext(0x01); + test_ext(0x02); + test_ext(0x03); + test_ext(0x04); + test_ext(0x05); + test_ext(0x06); + test_ext(0x07); + test_ext(0x08); + test_ext(0x09); + test_ext(0x0a); + test_ext(0x0b); + test_ext(0x0c); + test_ext(0x0d); + test_ext(0x0e); + test_ext(0x0f); + test_ext(0x10); + + test_ext(0x11); + test_ext(0xfe); + test_ext(0xff); + + test_ext(0x0100); + test_ext(0x0101); + test_ext(0xfffe); + test_ext(0xffff); + + test_ext(0x00010000); + test_ext(0x00010001); + + footer(); + return check_plan(); +} + static void test_next_on_array(uint32_t count) { @@ -789,9 +883,8 @@ test_mp_print() d = mp_encode_double(d, 3.14); d = mp_encode_uint(d, 100); d = mp_encode_uint(d, 500); - *d++ = 0xd4; /* let's pack smallest fixed ext */ - *d++ = 0; - *d++ = 0; + /* let's pack zero-length ext */ + d = mp_encode_extl(d, MP_EXT_TIMESTAMP, 0); char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r"; d = mp_encode_bin(d, bin, sizeof(bin)); d = mp_encode_map(d, 0); @@ -800,7 +893,7 @@ test_mp_print() const char *expected = "[-5, 42, \"kill bill\", [], " "{\"bool true\": true, \"bool false\": false, \"null\": null, " - "\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, " + "\"float\": 3.14, \"double\": 3.14, 100: 500}, timestamp, " "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]"; int esize = strlen(expected); @@ -1133,7 +1226,7 @@ test_overflow() int main() { - plan(20); + plan(22); test_uints(); test_ints(); test_bools(); @@ -1142,8 +1235,10 @@ int main() test_nils(); test_strls(); test_binls(); + test_extls(); test_strs(); test_bins(); + test_exts(); test_arrays(); test_maps(); test_next_on_arrays(); -- 2.20.1 (Apple Git-117)