From: Serge Petrenko <sergepetrenko@tarantool.org> To: Konstantin Osipov <kostja@tarantool.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com>, tarantool-patches@freelists.org Subject: Re: [tarantool-patches] Re: [PATCH] Add MsgPack ext types handling. Date: Tue, 23 Apr 2019 16:02:24 +0300 [thread overview] Message-ID: <39783BF9-0E1F-4AB0-B538-B7F2A2A123FF@tarantool.org> (raw) In-Reply-To: <20190423122146.GA5668@chai> > 23 апр. 2019 г., в 15:21, Konstantin Osipov <kostja@tarantool.org> написал(а): > > * Serge Petrenko <sergepetrenko@tarantool.org> [19/04/23 15:09]: > > I mentioned before that this would break all clients. > > It's high time to reconsider our in-memory storage format and make > it different from serialization format: we're seeing significant > performance penalty with serialization to and from msgpack in SQL, > and DECIMAL is a yet another blow on the single-format-approach: > our clients are simply not prepared to handle non-standard > extensions. > > Let's investigate what it takes to store decimal as an mp ext, and > serialize as mp string. Well, we can do this, but how can clients distinguish decimal and string then? > > >> Add the ability to encode/decode MsgPack ext types, >> introduce the first extension type: MP_EXT_DECIMAL: it will be used to >> store decimals in tarantool. >> >> Needed-for tarantool/tarantool#692 >> --- >> https://github.com/tarantool/msgpuck/tree/sp/add-extension-types >> https://github.com/tarantool/tarantool/issues/692 >> >> hints.c | 24 +++++++++ >> msgpuck.c | 20 +++++++- >> msgpuck.h | 133 +++++++++++++++++++++++++++++++++++++++++++++++++ >> test/msgpuck.c | 7 ++- >> 4 files changed, 178 insertions(+), 6 deletions(-) >> >> diff --git a/hints.c b/hints.c >> index be859e9..0ab4f4d 100644 >> --- a/hints.c >> +++ b/hints.c >> @@ -672,3 +672,27 @@ const char *mp_char2escape[128] = { >> NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, >> NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f" >> }; >> + >> +/* >> + * This lookup table is used by mp_encode_ext() to >> + * determine ext code (fixext 1, fixext 2, fixext 4, fixext 8, >> + * fixext 16) to encode using size. >> + */ >> +const uint8_t mp_ext_hint[16] = { >> + 0xd4, /* 1 */ >> + 0xd5, /* 2 */ >> + 0xd6, /* 3 */ >> + 0xd6, /* 4 */ >> + 0xd7, /* 5 */ >> + 0xd7, /* 6 */ >> + 0xd7, /* 7 */ >> + 0xd7, /* 8 */ >> + 0xd8, /* 9 */ >> + 0xd8, /* 10 */ >> + 0xd8, /* 11 */ >> + 0xd8, /* 12 */ >> + 0xd8, /* 13 */ >> + 0xd8, /* 14 */ >> + 0xd8, /* 15 */ >> + 0xd8 /* 16 */ >> +}; >> diff --git a/msgpuck.c b/msgpuck.c >> index d0ffb83..04aeb0f 100644 >> --- a/msgpuck.c >> +++ b/msgpuck.c >> @@ -33,6 +33,17 @@ >> #define MP_LIBRARY 1 >> #include "msgpuck.h" >> >> +const char * >> +mp_ext_type_str(enum mp_ext_type type) >> +{ >> + switch(type) { >> + case MP_EXT_DECIMAL: >> + return "decimal"; >> + default: >> + return "undefined"; >> + } >> +} >> + >> size_t >> mp_vformat(char *data, size_t data_size, const char *format, va_list vl) >> { >> @@ -298,9 +309,14 @@ next: \ >> PRINTF("%lg", mp_decode_double(&data)); \ >> break; \ >> case MP_EXT: \ >> - mp_next(&data); \ >> - PRINTF("undefined"); \ >> + { \ >> + uint8_t type; \ >> + uint32_t len; \ >> + len = mp_decode_ext(&data, &len, &type); \ >> + PRINTF("%s", mp_ext_type_str(type)); \ >> + data += len; \ >> break; \ >> + } \ >> default: \ >> mp_unreachable(); \ >> return -1; \ >> diff --git a/msgpuck.h b/msgpuck.h >> index 6b29cd6..0a7d230 100644 >> --- a/msgpuck.h >> +++ b/msgpuck.h >> @@ -382,6 +382,13 @@ enum mp_type { >> MP_EXT >> }; >> >> +/** >> + * \brief MsgPack extension data types. >> + */ >> +enum mp_ext_type { >> + MP_EXT_DECIMAL = 0 >> +}; >> + >> /** >> * \brief Determine MsgPack type by a first byte \a c of encoded data. >> * >> @@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end); >> MP_PROTO uint32_t >> mp_decode_map(const char **data); >> >> +/** >> + * \brief calculate exact buffer size needed to store >> + * ext value of length \a len. >> + * \param len value length in bytes. >> + * \retval buffer size in bytes >> + */ >> +MP_PROTO uint32_t >> +mp_sizeof_ext(uint32_t len); >> + >> +/** >> + * \brief Encode extension header with \a type and >> + * value length \a len. >> + * The value must be encoded after the header. >> + * \return \a data + \link mp_sizeof_ext() mp_sizeof_ext(size)\endlink >> + */ >> +MP_PROTO char * >> +mp_encode_ext(char *data, uint8_t type, uint32_t len); >> + >> +/** >> + * \brief Check that \a cur buffer has enough bytes to decode a map header. >> + * \param cur buffer >> + * \param end end of the buffer >> + * \retval 0 - buffer has enough bytes >> + * \retval > 0 - the numbeer of remaining bytes to read >> + * \pre cur < end >> + * \pre mp_typeof(*cur) == MP_EXT >> + */ >> +MP_PROTO MP_PURE ptrdiff_t >> +mp_check_ext(const char *cur, const char *end); >> + >> +/** >> + * \brief Decode an extension header from MsgPack \a data. >> + * >> + * The extension type value must be decoded after the header. >> + * \param data - the pointer to a buffer. >> + * \param len - decoded length of the following value. >> + * \param type - decoded type of the following value. >> + * \retval - the length of the following ext value. >> + * \post *data = *data + mp_sizeof_ext(length) >> + */ >> +MP_PROTO uint32_t >> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type); >> + >> /** >> * \brief Calculate exact buffer size needed to store an integer \a num. >> * Maximum return value is 9. For performance reasons you can preallocate >> @@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame); >> extern const enum mp_type mp_type_hint[]; >> extern const int8_t mp_parser_hint[]; >> extern const char *mp_char2escape[]; >> +extern const uint8_t mp_ext_hint[]; >> >> MP_IMPL MP_ALWAYSINLINE enum mp_type >> mp_typeof(const char c) >> @@ -1462,6 +1513,88 @@ mp_decode_map(const char **data) >> } >> } >> >> +MP_IMPL uint32_t >> +mp_sizeof_ext(uint32_t len) >> +{ >> + if (len <= 16) return 2; >> + if (len <= UINT8_MAX) return 3; >> + if (len <= UINT16_MAX) return 4; >> + else return 5; >> +} >> + >> +MP_IMPL char * >> +mp_encode_ext(char *data, uint8_t type, uint32_t len) >> +{ >> + /* >> + * zero-length should be encoded in EXT 8, because >> + * fixext always assumes exact length (1, 2, 4, 8 and 16). >> + */ >> + if (len <= 16 && len > 0) { >> + data = mp_store_u8(data, mp_ext_hint[len-1]); >> + } else if (len <= UINT8_MAX) { >> + data = mp_store_u8(data, 0xc7); >> + data = mp_store_u8(data, (uint8_t) len); >> + } else if (len <= UINT16_MAX) { >> + data = mp_store_u8(data, 0xc8); >> + data = mp_store_u16(data, (uint16_t) len); >> + } else { >> + data = mp_store_u8(data, 0xc9); >> + data = mp_store_u32(data, (uint32_t) len); >> + } >> + data = mp_store_u8(data, type); >> + return data; >> +} >> + >> +MP_IMPL ptrdiff_t >> +mp_check_ext(const char *cur, const char *end) >> +{ >> + assert(cur < end); >> + assert(mp_typeof(*cur) == MP_EXT); >> + uint8_t c = mp_load_u8(&cur); >> + switch (c) { >> + case 0xd4: >> + case 0xd5: >> + case 0xd6: >> + case 0xd7: >> + case 0xd8: >> + return 1 - (end - cur); >> + case 0xc7: >> + case 0xc8: >> + return c - 0xc4 - (end - cur); >> + case 0xc9: >> + return 6 - (end - cur); >> + default: >> + mp_unreachable(); >> + } >> +} >> + >> +MP_IMPL uint32_t >> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) { >> + uint8_t c = mp_load_u8(data); >> + switch (c) { >> + case 0xd4: >> + case 0xd5: >> + case 0xd6: >> + case 0xd7: >> + case 0xd8: >> + *len = 1u << (c - 0xd4); >> + break; >> + case 0xc7: >> + *len = mp_load_u8(data); >> + break; >> + case 0xc8: >> + *len = mp_load_u16(data); >> + break; >> + case 0xc9: >> + *len = mp_load_u32(data); >> + break; >> + default: >> + mp_unreachable(); >> + } >> + *type = mp_load_u8(data); >> + return *len; >> +} >> + >> MP_IMPL uint32_t >> mp_sizeof_uint(uint64_t num) >> { >> diff --git a/test/msgpuck.c b/test/msgpuck.c >> index 3b31bfc..5ddaba7 100644 >> --- a/test/msgpuck.c >> +++ b/test/msgpuck.c >> @@ -789,9 +789,8 @@ test_mp_print() >> d = mp_encode_double(d, 3.14); >> d = mp_encode_uint(d, 100); >> d = mp_encode_uint(d, 500); >> - *d++ = 0xd4; /* let's pack smallest fixed ext */ >> - *d++ = 0; >> - *d++ = 0; >> + /* let's pack zero-length ext */ >> + d = mp_encode_ext(d, MP_EXT_DECIMAL, 0); >> char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r"; >> d = mp_encode_bin(d, bin, sizeof(bin)); >> d = mp_encode_map(d, 0); >> @@ -800,7 +799,7 @@ test_mp_print() >> const char *expected = >> "[-5, 42, \"kill bill\", [], " >> "{\"bool true\": true, \"bool false\": false, \"null\": null, " >> - "\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, " >> + "\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, " >> "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]"; >> int esize = strlen(expected); >> >> -- >> 2.20.1 (Apple Git-117) >> > > -- > Konstantin Osipov, Moscow, Russia, +7 903 626 22 32 > http://tarantool.io - www.twitter.com/kostja_osipov >
next prev parent reply other threads:[~2019-04-23 13:02 UTC|newest] Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-04-23 11:37 Serge Petrenko 2019-04-23 12:21 ` [tarantool-patches] " Konstantin Osipov 2019-04-23 13:02 ` Serge Petrenko [this message] 2019-04-23 13:44 ` [tarantool-patches] " Konstantin Osipov 2019-04-23 13:54 ` Vladislav Shpilevoy 2019-04-23 14:20 ` Konstantin Osipov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=39783BF9-0E1F-4AB0-B538-B7F2A2A123FF@tarantool.org \ --to=sergepetrenko@tarantool.org \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --cc=vdavydov.dev@gmail.com \ --subject='Re: [tarantool-patches] Re: [PATCH] Add MsgPack ext types handling.' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox