From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 0F6C72EB3F for ; Tue, 21 May 2019 09:57:56 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id xLzum6zh2E5q for ; Tue, 21 May 2019 09:57:55 -0400 (EDT) Received: from smtp17.mail.ru (smtp17.mail.ru [94.100.176.154]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id C2F992EB0B for ; Tue, 21 May 2019 09:57:55 -0400 (EDT) From: Serge Petrenko Subject: [tarantool-patches] [PATCH v2] Add MsgPack ext types handling. Date: Tue, 21 May 2019 16:57:49 +0300 Message-Id: <20190521135749.45377-1-sergepetrenko@tarantool.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-Help: List-Unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-Subscribe: List-Owner: List-post: List-Archive: To: tarantool-patches@freelists.org Cc: georgy@tarantool.org, kostja@tarantool.org, Serge Petrenko Add the ability to encode/decode MsgPack ext types, introduce the first extension type: MP_EXT_DECIMAL: it will be used to store decimals in tarantool. Needed-for tarantool/tarantool#692 --- https://github.com/tarantool/msgpuck/tree/sp/add-extension-types https://github.com/tarantool/tarantool/issues/692 Changes in v2: - fixed and simplified mp_check_ext. It was wrong by one byte. - only use fixext types when length fits exactly (is 1, 2, 4, 8 or 16) Otherwise use ext 8 hints.c | 24 ++++++++++ msgpuck.c | 20 +++++++- msgpuck.h | 124 +++++++++++++++++++++++++++++++++++++++++++++++++ test/msgpuck.c | 7 ++- 4 files changed, 169 insertions(+), 6 deletions(-) diff --git a/hints.c b/hints.c index be859e9..d9b3fe7 100644 --- a/hints.c +++ b/hints.c @@ -672,3 +672,27 @@ const char *mp_char2escape[128] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f" }; + +/* + * This lookup table is used by mp_encode_ext() to + * determine ext code (fixext 1, fixext 2, fixext 4, fixext 8, + * fixext 16) to encode using size. + */ +const uint8_t mp_ext_hint[16] = { + 0xd4, /* 1 */ + 0xd5, /* 2 */ + 0, /* 3 */ + 0xd6, /* 4 */ + 0, /* 5 */ + 0, /* 6 */ + 0, /* 7 */ + 0xd7, /* 8 */ + 0, /* 9 */ + 0, /* 10 */ + 0, /* 11 */ + 0, /* 12 */ + 0, /* 13 */ + 0, /* 14 */ + 0, /* 15 */ + 0xd8 /* 16 */ +}; diff --git a/msgpuck.c b/msgpuck.c index d0ffb83..04aeb0f 100644 --- a/msgpuck.c +++ b/msgpuck.c @@ -33,6 +33,17 @@ #define MP_LIBRARY 1 #include "msgpuck.h" +const char * +mp_ext_type_str(enum mp_ext_type type) +{ + switch(type) { + case MP_EXT_DECIMAL: + return "decimal"; + default: + return "undefined"; + } +} + size_t mp_vformat(char *data, size_t data_size, const char *format, va_list vl) { @@ -298,9 +309,14 @@ next: \ PRINTF("%lg", mp_decode_double(&data)); \ break; \ case MP_EXT: \ - mp_next(&data); \ - PRINTF("undefined"); \ + { \ + uint8_t type; \ + uint32_t len; \ + len = mp_decode_ext(&data, &len, &type); \ + PRINTF("%s", mp_ext_type_str(type)); \ + data += len; \ break; \ + } \ default: \ mp_unreachable(); \ return -1; \ diff --git a/msgpuck.h b/msgpuck.h index 6b29cd6..ec9e135 100644 --- a/msgpuck.h +++ b/msgpuck.h @@ -382,6 +382,13 @@ enum mp_type { MP_EXT }; +/** + * \brief MsgPack extension data types. + */ +enum mp_ext_type { + MP_EXT_DECIMAL = 0 +}; + /** * \brief Determine MsgPack type by a first byte \a c of encoded data. * @@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end); MP_PROTO uint32_t mp_decode_map(const char **data); +/** + * \brief calculate exact buffer size needed to store + * ext value of length \a len. + * \param len value length in bytes. + * \retval buffer size in bytes + */ +MP_PROTO uint32_t +mp_sizeof_ext(uint32_t len); + +/** + * \brief Encode extension header with \a type and + * value length \a len. + * The value must be encoded after the header. + * \return \a data + \link mp_sizeof_ext() mp_sizeof_ext(size)\endlink + */ +MP_PROTO char * +mp_encode_ext(char *data, uint8_t type, uint32_t len); + +/** + * \brief Check that \a cur buffer has enough bytes to decode an ext header. + * \param cur buffer + * \param end end of the buffer + * \retval 0 - buffer has enough bytes + * \retval > 0 - the numbeer of remaining bytes to read + * \pre cur < end + * \pre mp_typeof(*cur) == MP_EXT + */ +MP_PROTO MP_PURE ptrdiff_t +mp_check_ext(const char *cur, const char *end); + +/** + * \brief Decode an extension header from MsgPack \a data. + * + * The extension type value must be decoded after the header. + * \param data - the pointer to a buffer. + * \param len - decoded length of the following value. + * \param type - decoded type of the following value. + * \retval - the length of the following ext value. + * \post *data = *data + mp_sizeof_ext(length) + */ +MP_PROTO uint32_t +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type); + /** * \brief Calculate exact buffer size needed to store an integer \a num. * Maximum return value is 9. For performance reasons you can preallocate @@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame); extern const enum mp_type mp_type_hint[]; extern const int8_t mp_parser_hint[]; extern const char *mp_char2escape[]; +extern const uint8_t mp_ext_hint[]; MP_IMPL MP_ALWAYSINLINE enum mp_type mp_typeof(const char c) @@ -1462,6 +1513,79 @@ mp_decode_map(const char **data) } } +MP_IMPL uint32_t +mp_sizeof_ext(uint32_t len) +{ + if (len <= 16) return 2; + if (len <= UINT8_MAX) return 3; + if (len <= UINT16_MAX) return 4; + else return 5; +} + +MP_IMPL char * +mp_encode_ext(char *data, uint8_t type, uint32_t len) +{ + /* + * Only use fixext when length is exactly 1, 2, 4, 8 or 16. + * Otherwise use ext 8 if length <= 255. + */ + if (len <= 16 && mp_ext_hint[len-1]) { + data = mp_store_u8(data, mp_ext_hint[len-1]); + } else if (len <= UINT8_MAX) { + data = mp_store_u8(data, 0xc7); + data = mp_store_u8(data, (uint8_t) len); + } else if (len <= UINT16_MAX) { + data = mp_store_u8(data, 0xc8); + data = mp_store_u16(data, (uint16_t) len); + } else { + data = mp_store_u8(data, 0xc9); + data = mp_store_u32(data,len); + } + data = mp_store_u8(data, type); + return data; +} + +MP_IMPL ptrdiff_t +mp_check_ext(const char *cur, const char *end) +{ + assert(cur < end); + assert(mp_typeof(*cur) == MP_EXT); + uint8_t c = mp_load_u8(&cur); + if (c & 0xf0 == 0xd0) { + return 1 - (end - cur); + } + + assert(c >= 0xc7 && c <= 0xc9); + return 1 << (c - 0xc7) + 1 - (end - cur); /* 0xc7 -> 2, 0xc8 -> 3, 0xc9 ->5 */ +} + +MP_IMPL uint32_t +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) { + uint8_t c = mp_load_u8(data); + switch (c) { + case 0xd4: + case 0xd5: + case 0xd6: + case 0xd7: + case 0xd8: + *len = 1u << (c - 0xd4); + break; + case 0xc7: + *len = mp_load_u8(data); + break; + case 0xc8: + *len = mp_load_u16(data); + break; + case 0xc9: + *len = mp_load_u32(data); + break; + default: + mp_unreachable(); + } + *type = mp_load_u8(data); + return *len; +} + MP_IMPL uint32_t mp_sizeof_uint(uint64_t num) { diff --git a/test/msgpuck.c b/test/msgpuck.c index 3b31bfc..5ddaba7 100644 --- a/test/msgpuck.c +++ b/test/msgpuck.c @@ -789,9 +789,8 @@ test_mp_print() d = mp_encode_double(d, 3.14); d = mp_encode_uint(d, 100); d = mp_encode_uint(d, 500); - *d++ = 0xd4; /* let's pack smallest fixed ext */ - *d++ = 0; - *d++ = 0; + /* let's pack zero-length ext */ + d = mp_encode_ext(d, MP_EXT_DECIMAL, 0); char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r"; d = mp_encode_bin(d, bin, sizeof(bin)); d = mp_encode_map(d, 0); @@ -800,7 +799,7 @@ test_mp_print() const char *expected = "[-5, 42, \"kill bill\", [], " "{\"bool true\": true, \"bool false\": false, \"null\": null, " - "\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, " + "\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, " "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]"; int esize = strlen(expected); -- 2.20.1 (Apple Git-117)