From: Serge Petrenko <sergepetrenko@tarantool.org>
To: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: tarantool-patches@freelists.org,
Georgy Kirichenko <georgy@tarantool.org>,
kostja@tarantool.org
Subject: Re: [tarantool-patches] [PATCH v2] Add MsgPack ext types handling.
Date: Mon, 3 Jun 2019 16:15:22 +0300 [thread overview]
Message-ID: <7949589F-11DA-4BB0-990E-E1210C8E9D65@tarantool.org> (raw)
In-Reply-To: <20190529163838.c52ackt2shbaotss@esperanza>
> 29 мая 2019 г., в 19:38, Vladimir Davydov <vdavydov.dev@gmail.com> написал(а):
>
> On Tue, May 21, 2019 at 04:57:49PM +0300, Serge Petrenko wrote:
>> diff --git a/msgpuck.c b/msgpuck.c
>> index d0ffb83..04aeb0f 100644
>> --- a/msgpuck.c
>> +++ b/msgpuck.c
>> @@ -33,6 +33,17 @@
>> #define MP_LIBRARY 1
>> #include "msgpuck.h"
>>
>> +const char *
>> +mp_ext_type_str(enum mp_ext_type type)
>> +{
>> + switch(type) {
>> + case MP_EXT_DECIMAL:
>> + return "decimal";
>
> This isn't particularly useful. Unless we can print a decimal as
> decimal here, I'd leave 'undefined' for now.
>
>> + default:
>> + return "undefined";
>> + }
>> +}
>> +
>> size_t
>> mp_vformat(char *data, size_t data_size, const char *format, va_list vl)
>> {
>> @@ -298,9 +309,14 @@ next: \
>> PRINTF("%lg", mp_decode_double(&data)); \
>> break; \
>> case MP_EXT: \
>> - mp_next(&data); \
>> - PRINTF("undefined"); \
>> + { \
>> + uint8_t type; \
>> + uint32_t len; \
>> + len = mp_decode_ext(&data, &len, &type); \
>> + PRINTF("%s", mp_ext_type_str(type)); \
>> + data += len; \
>> break; \
>> + } \
>> default: \
>> mp_unreachable(); \
>> return -1; \
>> diff --git a/msgpuck.h b/msgpuck.h
>> index 6b29cd6..ec9e135 100644
>> --- a/msgpuck.h
>> +++ b/msgpuck.h
>> @@ -382,6 +382,13 @@ enum mp_type {
>> MP_EXT
>> };
>>
>> +/**
>> + * \brief MsgPack extension data types.
>> + */
>> +enum mp_ext_type {
>> + MP_EXT_DECIMAL = 0
>
> Not sure, this should be defined here. After all, it's an application
> defined type so its code should be defined in the application, not in
> the library. OTOH, I'd expect to see timestamp here, which is defined in
> the spec.
>
>> +};
>> +
>> /**
>> * \brief Determine MsgPack type by a first byte \a c of encoded data.
>> *
>> @@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end);
>> MP_PROTO uint32_t
>> mp_decode_map(const char **data);
>>
>> +/**
>> + * \brief calculate exact buffer size needed to store
>> + * ext value of length \a len.
>> + * \param len value length in bytes.
>> + * \retval buffer size in bytes
>> + */
>> +MP_PROTO uint32_t
>> +mp_sizeof_ext(uint32_t len);
>> +
>> +/**
>> + * \brief Encode extension header with \a type and
>> + * value length \a len.
>> + * The value must be encoded after the header.
>> + * \return \a data + \link mp_sizeof_ext() mp_sizeof_ext(size)\endlink
>> + */
>> +MP_PROTO char *
>> +mp_encode_ext(char *data, uint8_t type, uint32_t len);
>> +
>> +/**
>> + * \brief Check that \a cur buffer has enough bytes to decode an ext header.
>> + * \param cur buffer
>> + * \param end end of the buffer
>> + * \retval 0 - buffer has enough bytes
>> + * \retval > 0 - the numbeer of remaining bytes to read
>> + * \pre cur < end
>> + * \pre mp_typeof(*cur) == MP_EXT
>> + */
>> +MP_PROTO MP_PURE ptrdiff_t
>> +mp_check_ext(const char *cur, const char *end);
>> +
>> +/**
>> + * \brief Decode an extension header from MsgPack \a data.
>> + *
>> + * The extension type value must be decoded after the header.
>> + * \param data - the pointer to a buffer.
>> + * \param len - decoded length of the following value.
>> + * \param type - decoded type of the following value.
>> + * \retval - the length of the following ext value.
>> + * \post *data = *data + mp_sizeof_ext(length)
>> + */
>> +MP_PROTO uint32_t
>> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type);
>> +
>> /**
>> * \brief Calculate exact buffer size needed to store an integer \a num.
>> * Maximum return value is 9. For performance reasons you can preallocate
>> @@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame);
>> extern const enum mp_type mp_type_hint[];
>> extern const int8_t mp_parser_hint[];
>> extern const char *mp_char2escape[];
>> +extern const uint8_t mp_ext_hint[];
>>
>> MP_IMPL MP_ALWAYSINLINE enum mp_type
>> mp_typeof(const char c)
>> @@ -1462,6 +1513,79 @@ mp_decode_map(const char **data)
>> }
>> }
>>
>> +MP_IMPL uint32_t
>> +mp_sizeof_ext(uint32_t len)
>> +{
>> + if (len <= 16) return 2;
>> + if (len <= UINT8_MAX) return 3;
>> + if (len <= UINT16_MAX) return 4;
>> + else return 5;
>> +}
>> +
>> +MP_IMPL char *
>> +mp_encode_ext(char *data, uint8_t type, uint32_t len)
>
> I'd expect it to take a pointer to the extension data, just like in
> case of mp_encode_str/bin. For encoding only the header, I'd introduce
> a separate function mp_encode_extl. Same is fair for decoding functions.
>
>> +{
>> + /*
>> + * Only use fixext when length is exactly 1, 2, 4, 8 or 16.
>> + * Otherwise use ext 8 if length <= 255.
>> + */
>> + if (len <= 16 && mp_ext_hint[len-1]) {
>
> AFAICS this is inconsistent with mp_sizeof_ext: the latter assumes that
> any ext data of length <= 16 is encoded using fixext.
>
>> + data = mp_store_u8(data, mp_ext_hint[len-1]);
>> + } else if (len <= UINT8_MAX) {
>> + data = mp_store_u8(data, 0xc7);
>> + data = mp_store_u8(data, (uint8_t) len);
>> + } else if (len <= UINT16_MAX) {
>> + data = mp_store_u8(data, 0xc8);
>> + data = mp_store_u16(data, (uint16_t) len);
>> + } else {
>> + data = mp_store_u8(data, 0xc9);
>> + data = mp_store_u32(data,len);
>> + }
>> + data = mp_store_u8(data, type);
>> + return data;
>> +}
>> +
>> +MP_IMPL ptrdiff_t
>> +mp_check_ext(const char *cur, const char *end)
>> +{
>> + assert(cur < end);
>> + assert(mp_typeof(*cur) == MP_EXT);
>> + uint8_t c = mp_load_u8(&cur);
>> + if (c & 0xf0 == 0xd0) {
>> + return 1 - (end - cur);
>> + }
>> +
>> + assert(c >= 0xc7 && c <= 0xc9);
>> + return 1 << (c - 0xc7) + 1 - (end - cur); /* 0xc7 -> 2, 0xc8 -> 3, 0xc9 ->5 */
>> +}
>> +
>> +MP_IMPL uint32_t
>> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) {
>
> For consistency with mp_encode_ext, type should go before len.
>
>> + uint8_t c = mp_load_u8(data);
>> + switch (c) {
>> + case 0xd4:
>> + case 0xd5:
>> + case 0xd6:
>> + case 0xd7:
>> + case 0xd8:
>> + *len = 1u << (c - 0xd4);
>> + break;
>> + case 0xc7:
>> + *len = mp_load_u8(data);
>> + break;
>> + case 0xc8:
>> + *len = mp_load_u16(data);
>> + break;
>> + case 0xc9:
>> + *len = mp_load_u32(data);
>> + break;
>> + default:
>> + mp_unreachable();
>> + }
>> + *type = mp_load_u8(data);
>> + return *len;
>> +}
>> +
>> MP_IMPL uint32_t
>> mp_sizeof_uint(uint64_t num)
>> {
>> diff --git a/test/msgpuck.c b/test/msgpuck.c
>> index 3b31bfc..5ddaba7 100644
>> --- a/test/msgpuck.c
>> +++ b/test/msgpuck.c
>> @@ -789,9 +789,8 @@ test_mp_print()
>> d = mp_encode_double(d, 3.14);
>> d = mp_encode_uint(d, 100);
>> d = mp_encode_uint(d, 500);
>> - *d++ = 0xd4; /* let's pack smallest fixed ext */
>> - *d++ = 0;
>> - *d++ = 0;
>> + /* let's pack zero-length ext */
>> + d = mp_encode_ext(d, MP_EXT_DECIMAL, 0);
>
> Please test every single function you add for all kinds of extension
> encodings (fixext, ext).
>
>> char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
>> d = mp_encode_bin(d, bin, sizeof(bin));
>> d = mp_encode_map(d, 0);
>> @@ -800,7 +799,7 @@ test_mp_print()
>> const char *expected =
>> "[-5, 42, \"kill bill\", [], "
>> "{\"bool true\": true, \"bool false\": false, \"null\": null, "
>> - "\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
>> + "\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, "
>> "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
>> int esize = strlen(expected);
>>
Thank you for review! I’ve addressed your comments in v3, please check it out.
prev parent reply other threads:[~2019-06-03 13:15 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-21 13:57 Serge Petrenko
2019-05-29 16:38 ` Vladimir Davydov
2019-06-03 13:15 ` Serge Petrenko [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7949589F-11DA-4BB0-990E-E1210C8E9D65@tarantool.org \
--to=sergepetrenko@tarantool.org \
--cc=georgy@tarantool.org \
--cc=kostja@tarantool.org \
--cc=tarantool-patches@freelists.org \
--cc=vdavydov.dev@gmail.com \
--subject='Re: [tarantool-patches] [PATCH v2] Add MsgPack ext types handling.' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox