From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Content-Type: text/plain; charset=utf-8 Mime-Version: 1.0 (Mac OS X Mail 12.4 \(3445.104.11\)) Subject: Re: [tarantool-patches] [PATCH v2] Add MsgPack ext types handling. From: Serge Petrenko In-Reply-To: <20190529163838.c52ackt2shbaotss@esperanza> Date: Mon, 3 Jun 2019 16:15:22 +0300 Content-Transfer-Encoding: quoted-printable Message-Id: <7949589F-11DA-4BB0-990E-E1210C8E9D65@tarantool.org> References: <20190521135749.45377-1-sergepetrenko@tarantool.org> <20190529163838.c52ackt2shbaotss@esperanza> To: Vladimir Davydov Cc: tarantool-patches@freelists.org, Georgy Kirichenko , kostja@tarantool.org List-ID: > 29 =D0=BC=D0=B0=D1=8F 2019 =D0=B3., =D0=B2 19:38, Vladimir Davydov = =D0=BD=D0=B0=D0=BF=D0=B8=D1=81=D0=B0=D0=BB(=D0=B0= ): >=20 > On Tue, May 21, 2019 at 04:57:49PM +0300, Serge Petrenko wrote: >> diff --git a/msgpuck.c b/msgpuck.c >> index d0ffb83..04aeb0f 100644 >> --- a/msgpuck.c >> +++ b/msgpuck.c >> @@ -33,6 +33,17 @@ >> #define MP_LIBRARY 1 >> #include "msgpuck.h" >>=20 >> +const char * >> +mp_ext_type_str(enum mp_ext_type type) >> +{ >> + switch(type) { >> + case MP_EXT_DECIMAL: >> + return "decimal"; >=20 > This isn't particularly useful. Unless we can print a decimal as > decimal here, I'd leave 'undefined' for now. >=20 >> + default: >> + return "undefined"; >> + } >> +} >> + >> size_t >> mp_vformat(char *data, size_t data_size, const char *format, va_list = vl) >> { >> @@ -298,9 +309,14 @@ next: = \ >> PRINTF("%lg", mp_decode_double(&data)); = \ >> break; = \ >> case MP_EXT: = \ >> - mp_next(&data); = \ >> - PRINTF("undefined"); = \ >> + { = \ >> + uint8_t type; = \ >> + uint32_t len; = \ >> + len =3D mp_decode_ext(&data, &len, &type); = \ >> + PRINTF("%s", mp_ext_type_str(type)); = \ >> + data +=3D len; = \ >> break; = \ >> + } = \ >> default: = \ >> mp_unreachable(); = \ >> return -1; = \ >> diff --git a/msgpuck.h b/msgpuck.h >> index 6b29cd6..ec9e135 100644 >> --- a/msgpuck.h >> +++ b/msgpuck.h >> @@ -382,6 +382,13 @@ enum mp_type { >> MP_EXT >> }; >>=20 >> +/** >> + * \brief MsgPack extension data types. >> + */ >> +enum mp_ext_type { >> + MP_EXT_DECIMAL =3D 0 >=20 > Not sure, this should be defined here. After all, it's an application > defined type so its code should be defined in the application, not in > the library. OTOH, I'd expect to see timestamp here, which is defined = in > the spec. >=20 >> +}; >> + >> /** >> * \brief Determine MsgPack type by a first byte \a c of encoded = data. >> * >> @@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end); >> MP_PROTO uint32_t >> mp_decode_map(const char **data); >>=20 >> +/** >> + * \brief calculate exact buffer size needed to store >> + * ext value of length \a len. >> + * \param len value length in bytes. >> + * \retval buffer size in bytes >> + */ >> +MP_PROTO uint32_t >> +mp_sizeof_ext(uint32_t len); >> + >> +/** >> + * \brief Encode extension header with \a type and >> + * value length \a len. >> + * The value must be encoded after the header. >> + * \return \a data + \link mp_sizeof_ext() = mp_sizeof_ext(size)\endlink >> + */ >> +MP_PROTO char * >> +mp_encode_ext(char *data, uint8_t type, uint32_t len); >> + >> +/** >> + * \brief Check that \a cur buffer has enough bytes to decode an ext = header. >> + * \param cur buffer >> + * \param end end of the buffer >> + * \retval 0 - buffer has enough bytes >> + * \retval > 0 - the numbeer of remaining bytes to read >> + * \pre cur < end >> + * \pre mp_typeof(*cur) =3D=3D MP_EXT >> + */ >> +MP_PROTO MP_PURE ptrdiff_t >> +mp_check_ext(const char *cur, const char *end); >> + >> +/** >> + * \brief Decode an extension header from MsgPack \a data. >> + * >> + * The extension type value must be decoded after the header. >> + * \param data - the pointer to a buffer. >> + * \param len - decoded length of the following value. >> + * \param type - decoded type of the following value. >> + * \retval - the length of the following ext value. >> + * \post *data =3D *data + mp_sizeof_ext(length) >> + */ >> +MP_PROTO uint32_t >> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type); >> + >> /** >> * \brief Calculate exact buffer size needed to store an integer \a = num. >> * Maximum return value is 9. For performance reasons you can = preallocate >> @@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame); >> extern const enum mp_type mp_type_hint[]; >> extern const int8_t mp_parser_hint[]; >> extern const char *mp_char2escape[]; >> +extern const uint8_t mp_ext_hint[]; >>=20 >> MP_IMPL MP_ALWAYSINLINE enum mp_type >> mp_typeof(const char c) >> @@ -1462,6 +1513,79 @@ mp_decode_map(const char **data) >> } >> } >>=20 >> +MP_IMPL uint32_t >> +mp_sizeof_ext(uint32_t len) >> +{ >> + if (len <=3D 16) return 2; >> + if (len <=3D UINT8_MAX) return 3; >> + if (len <=3D UINT16_MAX) return 4; >> + else return 5; >> +} >> + >> +MP_IMPL char * >> +mp_encode_ext(char *data, uint8_t type, uint32_t len) >=20 > I'd expect it to take a pointer to the extension data, just like in > case of mp_encode_str/bin. For encoding only the header, I'd introduce > a separate function mp_encode_extl. Same is fair for decoding = functions. >=20 >> +{ >> + /* >> + * Only use fixext when length is exactly 1, 2, 4, 8 or 16. >> + * Otherwise use ext 8 if length <=3D 255. >> + */ >> + if (len <=3D 16 && mp_ext_hint[len-1]) { >=20 > AFAICS this is inconsistent with mp_sizeof_ext: the latter assumes = that > any ext data of length <=3D 16 is encoded using fixext. >=20 >> + data =3D mp_store_u8(data, mp_ext_hint[len-1]); >> + } else if (len <=3D UINT8_MAX) { >> + data =3D mp_store_u8(data, 0xc7); >> + data =3D mp_store_u8(data, (uint8_t) len); >> + } else if (len <=3D UINT16_MAX) { >> + data =3D mp_store_u8(data, 0xc8); >> + data =3D mp_store_u16(data, (uint16_t) len); >> + } else { >> + data =3D mp_store_u8(data, 0xc9); >> + data =3D mp_store_u32(data,len); >> + } >> + data =3D mp_store_u8(data, type); >> + return data; >> +} >> + >> +MP_IMPL ptrdiff_t >> +mp_check_ext(const char *cur, const char *end) >> +{ >> + assert(cur < end); >> + assert(mp_typeof(*cur) =3D=3D MP_EXT); >> + uint8_t c =3D mp_load_u8(&cur); >> + if (c & 0xf0 =3D=3D 0xd0) { >> + return 1 - (end - cur); >> + } >> + >> + assert(c >=3D 0xc7 && c <=3D 0xc9); >> + return 1 << (c - 0xc7) + 1 - (end - cur); /* 0xc7 -> 2, 0xc8 -> = 3, 0xc9 ->5 */ >> +} >> + >> +MP_IMPL uint32_t >> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) { >=20 > For consistency with mp_encode_ext, type should go before len. >=20 >> + uint8_t c =3D mp_load_u8(data); >> + switch (c) { >> + case 0xd4: >> + case 0xd5: >> + case 0xd6: >> + case 0xd7: >> + case 0xd8: >> + *len =3D 1u << (c - 0xd4); >> + break; >> + case 0xc7: >> + *len =3D mp_load_u8(data); >> + break; >> + case 0xc8: >> + *len =3D mp_load_u16(data); >> + break; >> + case 0xc9: >> + *len =3D mp_load_u32(data); >> + break; >> + default: >> + mp_unreachable(); >> + } >> + *type =3D mp_load_u8(data); >> + return *len; >> +} >> + >> MP_IMPL uint32_t >> mp_sizeof_uint(uint64_t num) >> { >> diff --git a/test/msgpuck.c b/test/msgpuck.c >> index 3b31bfc..5ddaba7 100644 >> --- a/test/msgpuck.c >> +++ b/test/msgpuck.c >> @@ -789,9 +789,8 @@ test_mp_print() >> d =3D mp_encode_double(d, 3.14); >> d =3D mp_encode_uint(d, 100); >> d =3D mp_encode_uint(d, 500); >> - *d++ =3D 0xd4; /* let's pack smallest fixed ext */ >> - *d++ =3D 0; >> - *d++ =3D 0; >> + /* let's pack zero-length ext */ >> + d =3D mp_encode_ext(d, MP_EXT_DECIMAL, 0); >=20 > Please test every single function you add for all kinds of extension > encodings (fixext, ext). >=20 >> char bin[] =3D "\x12test\x34\b\t\n\"bla\\-bla\"\f\r"; >> d =3D mp_encode_bin(d, bin, sizeof(bin)); >> d =3D mp_encode_map(d, 0); >> @@ -800,7 +799,7 @@ test_mp_print() >> const char *expected =3D >> "[-5, 42, \"kill bill\", [], " >> "{\"bool true\": true, \"bool false\": false, \"null\": = null, " >> - "\"float\": 3.14, \"double\": 3.14, 100: 500}, = undefined, " >> + "\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, = " >> = "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]"; >> int esize =3D strlen(expected); >>=20 Thank you for review! I=E2=80=99ve addressed your comments in v3, please = check it out.