From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Content-Type: text/plain; charset=utf-8 Mime-Version: 1.0 (Mac OS X Mail 12.4 \(3445.104.8\)) Subject: Re: [tarantool-patches] Re: [PATCH] Add MsgPack ext types handling. From: Serge Petrenko In-Reply-To: <20190423122146.GA5668@chai> Date: Tue, 23 Apr 2019 16:02:24 +0300 Content-Transfer-Encoding: quoted-printable Message-Id: <39783BF9-0E1F-4AB0-B538-B7F2A2A123FF@tarantool.org> References: <20190423113732.20786-1-sergepetrenko@tarantool.org> <20190423122146.GA5668@chai> To: Konstantin Osipov Cc: Vladimir Davydov , tarantool-patches@freelists.org List-ID: > 23 =D0=B0=D0=BF=D1=80. 2019 =D0=B3., =D0=B2 15:21, Konstantin Osipov = =D0=BD=D0=B0=D0=BF=D0=B8=D1=81=D0=B0=D0=BB(=D0=B0):= >=20 > * Serge Petrenko [19/04/23 15:09]: >=20 > I mentioned before that this would break all clients. >=20 > It's high time to reconsider our in-memory storage format and make > it different from serialization format: we're seeing significant > performance penalty with serialization to and from msgpack in SQL, > and DECIMAL is a yet another blow on the single-format-approach: > our clients are simply not prepared to handle non-standard > extensions.=20 >=20 > Let's investigate what it takes to store decimal as an mp ext, and > serialize as mp string. Well, we can do this, but how can clients distinguish decimal and string = then? >=20 >=20 >> Add the ability to encode/decode MsgPack ext types, >> introduce the first extension type: MP_EXT_DECIMAL: it will be used = to >> store decimals in tarantool. >>=20 >> Needed-for tarantool/tarantool#692 >> --- >> https://github.com/tarantool/msgpuck/tree/sp/add-extension-types >> https://github.com/tarantool/tarantool/issues/692 >>=20 >> hints.c | 24 +++++++++ >> msgpuck.c | 20 +++++++- >> msgpuck.h | 133 = +++++++++++++++++++++++++++++++++++++++++++++++++ >> test/msgpuck.c | 7 ++- >> 4 files changed, 178 insertions(+), 6 deletions(-) >>=20 >> diff --git a/hints.c b/hints.c >> index be859e9..0ab4f4d 100644 >> --- a/hints.c >> +++ b/hints.c >> @@ -672,3 +672,27 @@ const char *mp_char2escape[128] =3D { >> NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, >> NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f" >> }; >> + >> +/* >> + * This lookup table is used by mp_encode_ext() to >> + * determine ext code (fixext 1, fixext 2, fixext 4, fixext 8, >> + * fixext 16) to encode using size. >> + */ >> +const uint8_t mp_ext_hint[16] =3D { >> + 0xd4, /* 1 */ >> + 0xd5, /* 2 */ >> + 0xd6, /* 3 */ >> + 0xd6, /* 4 */ >> + 0xd7, /* 5 */ >> + 0xd7, /* 6 */ >> + 0xd7, /* 7 */ >> + 0xd7, /* 8 */ >> + 0xd8, /* 9 */ >> + 0xd8, /* 10 */ >> + 0xd8, /* 11 */ >> + 0xd8, /* 12 */ >> + 0xd8, /* 13 */ >> + 0xd8, /* 14 */ >> + 0xd8, /* 15 */ >> + 0xd8 /* 16 */ >> +}; >> diff --git a/msgpuck.c b/msgpuck.c >> index d0ffb83..04aeb0f 100644 >> --- a/msgpuck.c >> +++ b/msgpuck.c >> @@ -33,6 +33,17 @@ >> #define MP_LIBRARY 1 >> #include "msgpuck.h" >>=20 >> +const char * >> +mp_ext_type_str(enum mp_ext_type type) >> +{ >> + switch(type) { >> + case MP_EXT_DECIMAL: >> + return "decimal"; >> + default: >> + return "undefined"; >> + } >> +} >> + >> size_t >> mp_vformat(char *data, size_t data_size, const char *format, va_list = vl) >> { >> @@ -298,9 +309,14 @@ next: = \ >> PRINTF("%lg", mp_decode_double(&data)); = \ >> break; = \ >> case MP_EXT: = \ >> - mp_next(&data); = \ >> - PRINTF("undefined"); = \ >> + { = \ >> + uint8_t type; = \ >> + uint32_t len; = \ >> + len =3D mp_decode_ext(&data, &len, &type); = \ >> + PRINTF("%s", mp_ext_type_str(type)); = \ >> + data +=3D len; = \ >> break; = \ >> + } = \ >> default: = \ >> mp_unreachable(); = \ >> return -1; = \ >> diff --git a/msgpuck.h b/msgpuck.h >> index 6b29cd6..0a7d230 100644 >> --- a/msgpuck.h >> +++ b/msgpuck.h >> @@ -382,6 +382,13 @@ enum mp_type { >> MP_EXT >> }; >>=20 >> +/** >> + * \brief MsgPack extension data types. >> + */ >> +enum mp_ext_type { >> + MP_EXT_DECIMAL =3D 0 >> +}; >> + >> /** >> * \brief Determine MsgPack type by a first byte \a c of encoded = data. >> * >> @@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end); >> MP_PROTO uint32_t >> mp_decode_map(const char **data); >>=20 >> +/** >> + * \brief calculate exact buffer size needed to store >> + * ext value of length \a len. >> + * \param len value length in bytes. >> + * \retval buffer size in bytes >> + */ >> +MP_PROTO uint32_t >> +mp_sizeof_ext(uint32_t len); >> + >> +/** >> + * \brief Encode extension header with \a type and >> + * value length \a len. >> + * The value must be encoded after the header. >> + * \return \a data + \link mp_sizeof_ext() = mp_sizeof_ext(size)\endlink >> + */ >> +MP_PROTO char * >> +mp_encode_ext(char *data, uint8_t type, uint32_t len); >> + >> +/** >> + * \brief Check that \a cur buffer has enough bytes to decode a map = header. >> + * \param cur buffer >> + * \param end end of the buffer >> + * \retval 0 - buffer has enough bytes >> + * \retval > 0 - the numbeer of remaining bytes to read >> + * \pre cur < end >> + * \pre mp_typeof(*cur) =3D=3D MP_EXT >> + */ >> +MP_PROTO MP_PURE ptrdiff_t >> +mp_check_ext(const char *cur, const char *end); >> + >> +/** >> + * \brief Decode an extension header from MsgPack \a data. >> + * >> + * The extension type value must be decoded after the header. >> + * \param data - the pointer to a buffer. >> + * \param len - decoded length of the following value. >> + * \param type - decoded type of the following value. >> + * \retval - the length of the following ext value. >> + * \post *data =3D *data + mp_sizeof_ext(length) >> + */ >> +MP_PROTO uint32_t >> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type); >> + >> /** >> * \brief Calculate exact buffer size needed to store an integer \a = num. >> * Maximum return value is 9. For performance reasons you can = preallocate >> @@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame); >> extern const enum mp_type mp_type_hint[]; >> extern const int8_t mp_parser_hint[]; >> extern const char *mp_char2escape[]; >> +extern const uint8_t mp_ext_hint[]; >>=20 >> MP_IMPL MP_ALWAYSINLINE enum mp_type >> mp_typeof(const char c) >> @@ -1462,6 +1513,88 @@ mp_decode_map(const char **data) >> } >> } >>=20 >> +MP_IMPL uint32_t >> +mp_sizeof_ext(uint32_t len) >> +{ >> + if (len <=3D 16) return 2; >> + if (len <=3D UINT8_MAX) return 3; >> + if (len <=3D UINT16_MAX) return 4; >> + else return 5; >> +} >> + >> +MP_IMPL char * >> +mp_encode_ext(char *data, uint8_t type, uint32_t len) >> +{ >> + /* >> + * zero-length should be encoded in EXT 8, because >> + * fixext always assumes exact length (1, 2, 4, 8 and 16). >> + */ >> + if (len <=3D 16 && len > 0) { >> + data =3D mp_store_u8(data, mp_ext_hint[len-1]); >> + } else if (len <=3D UINT8_MAX) { >> + data =3D mp_store_u8(data, 0xc7); >> + data =3D mp_store_u8(data, (uint8_t) len); >> + } else if (len <=3D UINT16_MAX) { >> + data =3D mp_store_u8(data, 0xc8); >> + data =3D mp_store_u16(data, (uint16_t) len); >> + } else { >> + data =3D mp_store_u8(data, 0xc9); >> + data =3D mp_store_u32(data, (uint32_t) len); >> + } >> + data =3D mp_store_u8(data, type); >> + return data; >> +} >> + >> +MP_IMPL ptrdiff_t >> +mp_check_ext(const char *cur, const char *end) >> +{ >> + assert(cur < end); >> + assert(mp_typeof(*cur) =3D=3D MP_EXT); >> + uint8_t c =3D mp_load_u8(&cur); >> + switch (c) { >> + case 0xd4: >> + case 0xd5: >> + case 0xd6: >> + case 0xd7: >> + case 0xd8: >> + return 1 - (end - cur); >> + case 0xc7: >> + case 0xc8: >> + return c - 0xc4 - (end - cur); >> + case 0xc9: >> + return 6 - (end - cur); >> + default: >> + mp_unreachable(); >> + } >> +} >> + >> +MP_IMPL uint32_t >> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) { >> + uint8_t c =3D mp_load_u8(data); >> + switch (c) { >> + case 0xd4: >> + case 0xd5: >> + case 0xd6: >> + case 0xd7: >> + case 0xd8: >> + *len =3D 1u << (c - 0xd4); >> + break; >> + case 0xc7: >> + *len =3D mp_load_u8(data); >> + break; >> + case 0xc8: >> + *len =3D mp_load_u16(data); >> + break; >> + case 0xc9: >> + *len =3D mp_load_u32(data); >> + break; >> + default: >> + mp_unreachable(); >> + } >> + *type =3D mp_load_u8(data); >> + return *len; >> +} >> + >> MP_IMPL uint32_t >> mp_sizeof_uint(uint64_t num) >> { >> diff --git a/test/msgpuck.c b/test/msgpuck.c >> index 3b31bfc..5ddaba7 100644 >> --- a/test/msgpuck.c >> +++ b/test/msgpuck.c >> @@ -789,9 +789,8 @@ test_mp_print() >> d =3D mp_encode_double(d, 3.14); >> d =3D mp_encode_uint(d, 100); >> d =3D mp_encode_uint(d, 500); >> - *d++ =3D 0xd4; /* let's pack smallest fixed ext */ >> - *d++ =3D 0; >> - *d++ =3D 0; >> + /* let's pack zero-length ext */ >> + d =3D mp_encode_ext(d, MP_EXT_DECIMAL, 0); >> char bin[] =3D "\x12test\x34\b\t\n\"bla\\-bla\"\f\r"; >> d =3D mp_encode_bin(d, bin, sizeof(bin)); >> d =3D mp_encode_map(d, 0); >> @@ -800,7 +799,7 @@ test_mp_print() >> const char *expected =3D >> "[-5, 42, \"kill bill\", [], " >> "{\"bool true\": true, \"bool false\": false, \"null\": = null, " >> - "\"float\": 3.14, \"double\": 3.14, 100: 500}, = undefined, " >> + "\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, = " >> = "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]"; >> int esize =3D strlen(expected); >>=20 >> --=20 >> 2.20.1 (Apple Git-117) >>=20 >=20 > --=20 > Konstantin Osipov, Moscow, Russia, +7 903 626 22 32 > http://tarantool.io - www.twitter.com/kostja_osipov >=20