* [PATCH] Add MsgPack ext types handling.
@ 2019-04-23 11:37 Serge Petrenko
2019-04-23 12:21 ` [tarantool-patches] " Konstantin Osipov
0 siblings, 1 reply; 6+ messages in thread
From: Serge Petrenko @ 2019-04-23 11:37 UTC (permalink / raw)
To: vdavydov.dev; +Cc: tarantool-patches, Serge Petrenko
Add the ability to encode/decode MsgPack ext types,
introduce the first extension type: MP_EXT_DECIMAL: it will be used to
store decimals in tarantool.
Needed-for tarantool/tarantool#692
---
https://github.com/tarantool/msgpuck/tree/sp/add-extension-types
https://github.com/tarantool/tarantool/issues/692
hints.c | 24 +++++++++
msgpuck.c | 20 +++++++-
msgpuck.h | 133 +++++++++++++++++++++++++++++++++++++++++++++++++
test/msgpuck.c | 7 ++-
4 files changed, 178 insertions(+), 6 deletions(-)
diff --git a/hints.c b/hints.c
index be859e9..0ab4f4d 100644
--- a/hints.c
+++ b/hints.c
@@ -672,3 +672,27 @@ const char *mp_char2escape[128] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f"
};
+
+/*
+ * This lookup table is used by mp_encode_ext() to
+ * determine ext code (fixext 1, fixext 2, fixext 4, fixext 8,
+ * fixext 16) to encode using size.
+ */
+const uint8_t mp_ext_hint[16] = {
+ 0xd4, /* 1 */
+ 0xd5, /* 2 */
+ 0xd6, /* 3 */
+ 0xd6, /* 4 */
+ 0xd7, /* 5 */
+ 0xd7, /* 6 */
+ 0xd7, /* 7 */
+ 0xd7, /* 8 */
+ 0xd8, /* 9 */
+ 0xd8, /* 10 */
+ 0xd8, /* 11 */
+ 0xd8, /* 12 */
+ 0xd8, /* 13 */
+ 0xd8, /* 14 */
+ 0xd8, /* 15 */
+ 0xd8 /* 16 */
+};
diff --git a/msgpuck.c b/msgpuck.c
index d0ffb83..04aeb0f 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -33,6 +33,17 @@
#define MP_LIBRARY 1
#include "msgpuck.h"
+const char *
+mp_ext_type_str(enum mp_ext_type type)
+{
+ switch(type) {
+ case MP_EXT_DECIMAL:
+ return "decimal";
+ default:
+ return "undefined";
+ }
+}
+
size_t
mp_vformat(char *data, size_t data_size, const char *format, va_list vl)
{
@@ -298,9 +309,14 @@ next: \
PRINTF("%lg", mp_decode_double(&data)); \
break; \
case MP_EXT: \
- mp_next(&data); \
- PRINTF("undefined"); \
+ { \
+ uint8_t type; \
+ uint32_t len; \
+ len = mp_decode_ext(&data, &len, &type); \
+ PRINTF("%s", mp_ext_type_str(type)); \
+ data += len; \
break; \
+ } \
default: \
mp_unreachable(); \
return -1; \
diff --git a/msgpuck.h b/msgpuck.h
index 6b29cd6..0a7d230 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -382,6 +382,13 @@ enum mp_type {
MP_EXT
};
+/**
+ * \brief MsgPack extension data types.
+ */
+enum mp_ext_type {
+ MP_EXT_DECIMAL = 0
+};
+
/**
* \brief Determine MsgPack type by a first byte \a c of encoded data.
*
@@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end);
MP_PROTO uint32_t
mp_decode_map(const char **data);
+/**
+ * \brief calculate exact buffer size needed to store
+ * ext value of length \a len.
+ * \param len value length in bytes.
+ * \retval buffer size in bytes
+ */
+MP_PROTO uint32_t
+mp_sizeof_ext(uint32_t len);
+
+/**
+ * \brief Encode extension header with \a type and
+ * value length \a len.
+ * The value must be encoded after the header.
+ * \return \a data + \link mp_sizeof_ext() mp_sizeof_ext(size)\endlink
+ */
+MP_PROTO char *
+mp_encode_ext(char *data, uint8_t type, uint32_t len);
+
+/**
+ * \brief Check that \a cur buffer has enough bytes to decode a map header.
+ * \param cur buffer
+ * \param end end of the buffer
+ * \retval 0 - buffer has enough bytes
+ * \retval > 0 - the numbeer of remaining bytes to read
+ * \pre cur < end
+ * \pre mp_typeof(*cur) == MP_EXT
+ */
+MP_PROTO MP_PURE ptrdiff_t
+mp_check_ext(const char *cur, const char *end);
+
+/**
+ * \brief Decode an extension header from MsgPack \a data.
+ *
+ * The extension type value must be decoded after the header.
+ * \param data - the pointer to a buffer.
+ * \param len - decoded length of the following value.
+ * \param type - decoded type of the following value.
+ * \retval - the length of the following ext value.
+ * \post *data = *data + mp_sizeof_ext(length)
+ */
+MP_PROTO uint32_t
+mp_decode_ext(const char **data, uint32_t *len, uint8_t *type);
+
/**
* \brief Calculate exact buffer size needed to store an integer \a num.
* Maximum return value is 9. For performance reasons you can preallocate
@@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame);
extern const enum mp_type mp_type_hint[];
extern const int8_t mp_parser_hint[];
extern const char *mp_char2escape[];
+extern const uint8_t mp_ext_hint[];
MP_IMPL MP_ALWAYSINLINE enum mp_type
mp_typeof(const char c)
@@ -1462,6 +1513,88 @@ mp_decode_map(const char **data)
}
}
+MP_IMPL uint32_t
+mp_sizeof_ext(uint32_t len)
+{
+ if (len <= 16) return 2;
+ if (len <= UINT8_MAX) return 3;
+ if (len <= UINT16_MAX) return 4;
+ else return 5;
+}
+
+MP_IMPL char *
+mp_encode_ext(char *data, uint8_t type, uint32_t len)
+{
+ /*
+ * zero-length should be encoded in EXT 8, because
+ * fixext always assumes exact length (1, 2, 4, 8 and 16).
+ */
+ if (len <= 16 && len > 0) {
+ data = mp_store_u8(data, mp_ext_hint[len-1]);
+ } else if (len <= UINT8_MAX) {
+ data = mp_store_u8(data, 0xc7);
+ data = mp_store_u8(data, (uint8_t) len);
+ } else if (len <= UINT16_MAX) {
+ data = mp_store_u8(data, 0xc8);
+ data = mp_store_u16(data, (uint16_t) len);
+ } else {
+ data = mp_store_u8(data, 0xc9);
+ data = mp_store_u32(data, (uint32_t) len);
+ }
+ data = mp_store_u8(data, type);
+ return data;
+}
+
+MP_IMPL ptrdiff_t
+mp_check_ext(const char *cur, const char *end)
+{
+ assert(cur < end);
+ assert(mp_typeof(*cur) == MP_EXT);
+ uint8_t c = mp_load_u8(&cur);
+ switch (c) {
+ case 0xd4:
+ case 0xd5:
+ case 0xd6:
+ case 0xd7:
+ case 0xd8:
+ return 1 - (end - cur);
+ case 0xc7:
+ case 0xc8:
+ return c - 0xc4 - (end - cur);
+ case 0xc9:
+ return 6 - (end - cur);
+ default:
+ mp_unreachable();
+ }
+}
+
+MP_IMPL uint32_t
+mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) {
+ uint8_t c = mp_load_u8(data);
+ switch (c) {
+ case 0xd4:
+ case 0xd5:
+ case 0xd6:
+ case 0xd7:
+ case 0xd8:
+ *len = 1u << (c - 0xd4);
+ break;
+ case 0xc7:
+ *len = mp_load_u8(data);
+ break;
+ case 0xc8:
+ *len = mp_load_u16(data);
+ break;
+ case 0xc9:
+ *len = mp_load_u32(data);
+ break;
+ default:
+ mp_unreachable();
+ }
+ *type = mp_load_u8(data);
+ return *len;
+}
+
MP_IMPL uint32_t
mp_sizeof_uint(uint64_t num)
{
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 3b31bfc..5ddaba7 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -789,9 +789,8 @@ test_mp_print()
d = mp_encode_double(d, 3.14);
d = mp_encode_uint(d, 100);
d = mp_encode_uint(d, 500);
- *d++ = 0xd4; /* let's pack smallest fixed ext */
- *d++ = 0;
- *d++ = 0;
+ /* let's pack zero-length ext */
+ d = mp_encode_ext(d, MP_EXT_DECIMAL, 0);
char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
d = mp_encode_bin(d, bin, sizeof(bin));
d = mp_encode_map(d, 0);
@@ -800,7 +799,7 @@ test_mp_print()
const char *expected =
"[-5, 42, \"kill bill\", [], "
"{\"bool true\": true, \"bool false\": false, \"null\": null, "
- "\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
+ "\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, "
"\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
int esize = strlen(expected);
--
2.20.1 (Apple Git-117)
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [tarantool-patches] [PATCH] Add MsgPack ext types handling.
2019-04-23 11:37 [PATCH] Add MsgPack ext types handling Serge Petrenko
@ 2019-04-23 12:21 ` Konstantin Osipov
2019-04-23 13:02 ` [tarantool-patches] " Serge Petrenko
0 siblings, 1 reply; 6+ messages in thread
From: Konstantin Osipov @ 2019-04-23 12:21 UTC (permalink / raw)
To: tarantool-patches; +Cc: vdavydov.dev, Serge Petrenko
* Serge Petrenko <sergepetrenko@tarantool.org> [19/04/23 15:09]:
I mentioned before that this would break all clients.
It's high time to reconsider our in-memory storage format and make
it different from serialization format: we're seeing significant
performance penalty with serialization to and from msgpack in SQL,
and DECIMAL is a yet another blow on the single-format-approach:
our clients are simply not prepared to handle non-standard
extensions.
Let's investigate what it takes to store decimal as an mp ext, and
serialize as mp string.
> Add the ability to encode/decode MsgPack ext types,
> introduce the first extension type: MP_EXT_DECIMAL: it will be used to
> store decimals in tarantool.
>
> Needed-for tarantool/tarantool#692
> ---
> https://github.com/tarantool/msgpuck/tree/sp/add-extension-types
> https://github.com/tarantool/tarantool/issues/692
>
> hints.c | 24 +++++++++
> msgpuck.c | 20 +++++++-
> msgpuck.h | 133 +++++++++++++++++++++++++++++++++++++++++++++++++
> test/msgpuck.c | 7 ++-
> 4 files changed, 178 insertions(+), 6 deletions(-)
>
> diff --git a/hints.c b/hints.c
> index be859e9..0ab4f4d 100644
> --- a/hints.c
> +++ b/hints.c
> @@ -672,3 +672,27 @@ const char *mp_char2escape[128] = {
> NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
> NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f"
> };
> +
> +/*
> + * This lookup table is used by mp_encode_ext() to
> + * determine ext code (fixext 1, fixext 2, fixext 4, fixext 8,
> + * fixext 16) to encode using size.
> + */
> +const uint8_t mp_ext_hint[16] = {
> + 0xd4, /* 1 */
> + 0xd5, /* 2 */
> + 0xd6, /* 3 */
> + 0xd6, /* 4 */
> + 0xd7, /* 5 */
> + 0xd7, /* 6 */
> + 0xd7, /* 7 */
> + 0xd7, /* 8 */
> + 0xd8, /* 9 */
> + 0xd8, /* 10 */
> + 0xd8, /* 11 */
> + 0xd8, /* 12 */
> + 0xd8, /* 13 */
> + 0xd8, /* 14 */
> + 0xd8, /* 15 */
> + 0xd8 /* 16 */
> +};
> diff --git a/msgpuck.c b/msgpuck.c
> index d0ffb83..04aeb0f 100644
> --- a/msgpuck.c
> +++ b/msgpuck.c
> @@ -33,6 +33,17 @@
> #define MP_LIBRARY 1
> #include "msgpuck.h"
>
> +const char *
> +mp_ext_type_str(enum mp_ext_type type)
> +{
> + switch(type) {
> + case MP_EXT_DECIMAL:
> + return "decimal";
> + default:
> + return "undefined";
> + }
> +}
> +
> size_t
> mp_vformat(char *data, size_t data_size, const char *format, va_list vl)
> {
> @@ -298,9 +309,14 @@ next: \
> PRINTF("%lg", mp_decode_double(&data)); \
> break; \
> case MP_EXT: \
> - mp_next(&data); \
> - PRINTF("undefined"); \
> + { \
> + uint8_t type; \
> + uint32_t len; \
> + len = mp_decode_ext(&data, &len, &type); \
> + PRINTF("%s", mp_ext_type_str(type)); \
> + data += len; \
> break; \
> + } \
> default: \
> mp_unreachable(); \
> return -1; \
> diff --git a/msgpuck.h b/msgpuck.h
> index 6b29cd6..0a7d230 100644
> --- a/msgpuck.h
> +++ b/msgpuck.h
> @@ -382,6 +382,13 @@ enum mp_type {
> MP_EXT
> };
>
> +/**
> + * \brief MsgPack extension data types.
> + */
> +enum mp_ext_type {
> + MP_EXT_DECIMAL = 0
> +};
> +
> /**
> * \brief Determine MsgPack type by a first byte \a c of encoded data.
> *
> @@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end);
> MP_PROTO uint32_t
> mp_decode_map(const char **data);
>
> +/**
> + * \brief calculate exact buffer size needed to store
> + * ext value of length \a len.
> + * \param len value length in bytes.
> + * \retval buffer size in bytes
> + */
> +MP_PROTO uint32_t
> +mp_sizeof_ext(uint32_t len);
> +
> +/**
> + * \brief Encode extension header with \a type and
> + * value length \a len.
> + * The value must be encoded after the header.
> + * \return \a data + \link mp_sizeof_ext() mp_sizeof_ext(size)\endlink
> + */
> +MP_PROTO char *
> +mp_encode_ext(char *data, uint8_t type, uint32_t len);
> +
> +/**
> + * \brief Check that \a cur buffer has enough bytes to decode a map header.
> + * \param cur buffer
> + * \param end end of the buffer
> + * \retval 0 - buffer has enough bytes
> + * \retval > 0 - the numbeer of remaining bytes to read
> + * \pre cur < end
> + * \pre mp_typeof(*cur) == MP_EXT
> + */
> +MP_PROTO MP_PURE ptrdiff_t
> +mp_check_ext(const char *cur, const char *end);
> +
> +/**
> + * \brief Decode an extension header from MsgPack \a data.
> + *
> + * The extension type value must be decoded after the header.
> + * \param data - the pointer to a buffer.
> + * \param len - decoded length of the following value.
> + * \param type - decoded type of the following value.
> + * \retval - the length of the following ext value.
> + * \post *data = *data + mp_sizeof_ext(length)
> + */
> +MP_PROTO uint32_t
> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type);
> +
> /**
> * \brief Calculate exact buffer size needed to store an integer \a num.
> * Maximum return value is 9. For performance reasons you can preallocate
> @@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame);
> extern const enum mp_type mp_type_hint[];
> extern const int8_t mp_parser_hint[];
> extern const char *mp_char2escape[];
> +extern const uint8_t mp_ext_hint[];
>
> MP_IMPL MP_ALWAYSINLINE enum mp_type
> mp_typeof(const char c)
> @@ -1462,6 +1513,88 @@ mp_decode_map(const char **data)
> }
> }
>
> +MP_IMPL uint32_t
> +mp_sizeof_ext(uint32_t len)
> +{
> + if (len <= 16) return 2;
> + if (len <= UINT8_MAX) return 3;
> + if (len <= UINT16_MAX) return 4;
> + else return 5;
> +}
> +
> +MP_IMPL char *
> +mp_encode_ext(char *data, uint8_t type, uint32_t len)
> +{
> + /*
> + * zero-length should be encoded in EXT 8, because
> + * fixext always assumes exact length (1, 2, 4, 8 and 16).
> + */
> + if (len <= 16 && len > 0) {
> + data = mp_store_u8(data, mp_ext_hint[len-1]);
> + } else if (len <= UINT8_MAX) {
> + data = mp_store_u8(data, 0xc7);
> + data = mp_store_u8(data, (uint8_t) len);
> + } else if (len <= UINT16_MAX) {
> + data = mp_store_u8(data, 0xc8);
> + data = mp_store_u16(data, (uint16_t) len);
> + } else {
> + data = mp_store_u8(data, 0xc9);
> + data = mp_store_u32(data, (uint32_t) len);
> + }
> + data = mp_store_u8(data, type);
> + return data;
> +}
> +
> +MP_IMPL ptrdiff_t
> +mp_check_ext(const char *cur, const char *end)
> +{
> + assert(cur < end);
> + assert(mp_typeof(*cur) == MP_EXT);
> + uint8_t c = mp_load_u8(&cur);
> + switch (c) {
> + case 0xd4:
> + case 0xd5:
> + case 0xd6:
> + case 0xd7:
> + case 0xd8:
> + return 1 - (end - cur);
> + case 0xc7:
> + case 0xc8:
> + return c - 0xc4 - (end - cur);
> + case 0xc9:
> + return 6 - (end - cur);
> + default:
> + mp_unreachable();
> + }
> +}
> +
> +MP_IMPL uint32_t
> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) {
> + uint8_t c = mp_load_u8(data);
> + switch (c) {
> + case 0xd4:
> + case 0xd5:
> + case 0xd6:
> + case 0xd7:
> + case 0xd8:
> + *len = 1u << (c - 0xd4);
> + break;
> + case 0xc7:
> + *len = mp_load_u8(data);
> + break;
> + case 0xc8:
> + *len = mp_load_u16(data);
> + break;
> + case 0xc9:
> + *len = mp_load_u32(data);
> + break;
> + default:
> + mp_unreachable();
> + }
> + *type = mp_load_u8(data);
> + return *len;
> +}
> +
> MP_IMPL uint32_t
> mp_sizeof_uint(uint64_t num)
> {
> diff --git a/test/msgpuck.c b/test/msgpuck.c
> index 3b31bfc..5ddaba7 100644
> --- a/test/msgpuck.c
> +++ b/test/msgpuck.c
> @@ -789,9 +789,8 @@ test_mp_print()
> d = mp_encode_double(d, 3.14);
> d = mp_encode_uint(d, 100);
> d = mp_encode_uint(d, 500);
> - *d++ = 0xd4; /* let's pack smallest fixed ext */
> - *d++ = 0;
> - *d++ = 0;
> + /* let's pack zero-length ext */
> + d = mp_encode_ext(d, MP_EXT_DECIMAL, 0);
> char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
> d = mp_encode_bin(d, bin, sizeof(bin));
> d = mp_encode_map(d, 0);
> @@ -800,7 +799,7 @@ test_mp_print()
> const char *expected =
> "[-5, 42, \"kill bill\", [], "
> "{\"bool true\": true, \"bool false\": false, \"null\": null, "
> - "\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
> + "\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, "
> "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
> int esize = strlen(expected);
>
> --
> 2.20.1 (Apple Git-117)
>
--
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [tarantool-patches] Re: [PATCH] Add MsgPack ext types handling.
2019-04-23 12:21 ` [tarantool-patches] " Konstantin Osipov
@ 2019-04-23 13:02 ` Serge Petrenko
2019-04-23 13:44 ` Konstantin Osipov
0 siblings, 1 reply; 6+ messages in thread
From: Serge Petrenko @ 2019-04-23 13:02 UTC (permalink / raw)
To: Konstantin Osipov; +Cc: Vladimir Davydov, tarantool-patches
> 23 апр. 2019 г., в 15:21, Konstantin Osipov <kostja@tarantool.org> написал(а):
>
> * Serge Petrenko <sergepetrenko@tarantool.org> [19/04/23 15:09]:
>
> I mentioned before that this would break all clients.
>
> It's high time to reconsider our in-memory storage format and make
> it different from serialization format: we're seeing significant
> performance penalty with serialization to and from msgpack in SQL,
> and DECIMAL is a yet another blow on the single-format-approach:
> our clients are simply not prepared to handle non-standard
> extensions.
>
> Let's investigate what it takes to store decimal as an mp ext, and
> serialize as mp string.
Well, we can do this, but how can clients distinguish decimal and string then?
>
>
>> Add the ability to encode/decode MsgPack ext types,
>> introduce the first extension type: MP_EXT_DECIMAL: it will be used to
>> store decimals in tarantool.
>>
>> Needed-for tarantool/tarantool#692
>> ---
>> https://github.com/tarantool/msgpuck/tree/sp/add-extension-types
>> https://github.com/tarantool/tarantool/issues/692
>>
>> hints.c | 24 +++++++++
>> msgpuck.c | 20 +++++++-
>> msgpuck.h | 133 +++++++++++++++++++++++++++++++++++++++++++++++++
>> test/msgpuck.c | 7 ++-
>> 4 files changed, 178 insertions(+), 6 deletions(-)
>>
>> diff --git a/hints.c b/hints.c
>> index be859e9..0ab4f4d 100644
>> --- a/hints.c
>> +++ b/hints.c
>> @@ -672,3 +672,27 @@ const char *mp_char2escape[128] = {
>> NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
>> NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f"
>> };
>> +
>> +/*
>> + * This lookup table is used by mp_encode_ext() to
>> + * determine ext code (fixext 1, fixext 2, fixext 4, fixext 8,
>> + * fixext 16) to encode using size.
>> + */
>> +const uint8_t mp_ext_hint[16] = {
>> + 0xd4, /* 1 */
>> + 0xd5, /* 2 */
>> + 0xd6, /* 3 */
>> + 0xd6, /* 4 */
>> + 0xd7, /* 5 */
>> + 0xd7, /* 6 */
>> + 0xd7, /* 7 */
>> + 0xd7, /* 8 */
>> + 0xd8, /* 9 */
>> + 0xd8, /* 10 */
>> + 0xd8, /* 11 */
>> + 0xd8, /* 12 */
>> + 0xd8, /* 13 */
>> + 0xd8, /* 14 */
>> + 0xd8, /* 15 */
>> + 0xd8 /* 16 */
>> +};
>> diff --git a/msgpuck.c b/msgpuck.c
>> index d0ffb83..04aeb0f 100644
>> --- a/msgpuck.c
>> +++ b/msgpuck.c
>> @@ -33,6 +33,17 @@
>> #define MP_LIBRARY 1
>> #include "msgpuck.h"
>>
>> +const char *
>> +mp_ext_type_str(enum mp_ext_type type)
>> +{
>> + switch(type) {
>> + case MP_EXT_DECIMAL:
>> + return "decimal";
>> + default:
>> + return "undefined";
>> + }
>> +}
>> +
>> size_t
>> mp_vformat(char *data, size_t data_size, const char *format, va_list vl)
>> {
>> @@ -298,9 +309,14 @@ next: \
>> PRINTF("%lg", mp_decode_double(&data)); \
>> break; \
>> case MP_EXT: \
>> - mp_next(&data); \
>> - PRINTF("undefined"); \
>> + { \
>> + uint8_t type; \
>> + uint32_t len; \
>> + len = mp_decode_ext(&data, &len, &type); \
>> + PRINTF("%s", mp_ext_type_str(type)); \
>> + data += len; \
>> break; \
>> + } \
>> default: \
>> mp_unreachable(); \
>> return -1; \
>> diff --git a/msgpuck.h b/msgpuck.h
>> index 6b29cd6..0a7d230 100644
>> --- a/msgpuck.h
>> +++ b/msgpuck.h
>> @@ -382,6 +382,13 @@ enum mp_type {
>> MP_EXT
>> };
>>
>> +/**
>> + * \brief MsgPack extension data types.
>> + */
>> +enum mp_ext_type {
>> + MP_EXT_DECIMAL = 0
>> +};
>> +
>> /**
>> * \brief Determine MsgPack type by a first byte \a c of encoded data.
>> *
>> @@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end);
>> MP_PROTO uint32_t
>> mp_decode_map(const char **data);
>>
>> +/**
>> + * \brief calculate exact buffer size needed to store
>> + * ext value of length \a len.
>> + * \param len value length in bytes.
>> + * \retval buffer size in bytes
>> + */
>> +MP_PROTO uint32_t
>> +mp_sizeof_ext(uint32_t len);
>> +
>> +/**
>> + * \brief Encode extension header with \a type and
>> + * value length \a len.
>> + * The value must be encoded after the header.
>> + * \return \a data + \link mp_sizeof_ext() mp_sizeof_ext(size)\endlink
>> + */
>> +MP_PROTO char *
>> +mp_encode_ext(char *data, uint8_t type, uint32_t len);
>> +
>> +/**
>> + * \brief Check that \a cur buffer has enough bytes to decode a map header.
>> + * \param cur buffer
>> + * \param end end of the buffer
>> + * \retval 0 - buffer has enough bytes
>> + * \retval > 0 - the numbeer of remaining bytes to read
>> + * \pre cur < end
>> + * \pre mp_typeof(*cur) == MP_EXT
>> + */
>> +MP_PROTO MP_PURE ptrdiff_t
>> +mp_check_ext(const char *cur, const char *end);
>> +
>> +/**
>> + * \brief Decode an extension header from MsgPack \a data.
>> + *
>> + * The extension type value must be decoded after the header.
>> + * \param data - the pointer to a buffer.
>> + * \param len - decoded length of the following value.
>> + * \param type - decoded type of the following value.
>> + * \retval - the length of the following ext value.
>> + * \post *data = *data + mp_sizeof_ext(length)
>> + */
>> +MP_PROTO uint32_t
>> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type);
>> +
>> /**
>> * \brief Calculate exact buffer size needed to store an integer \a num.
>> * Maximum return value is 9. For performance reasons you can preallocate
>> @@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame);
>> extern const enum mp_type mp_type_hint[];
>> extern const int8_t mp_parser_hint[];
>> extern const char *mp_char2escape[];
>> +extern const uint8_t mp_ext_hint[];
>>
>> MP_IMPL MP_ALWAYSINLINE enum mp_type
>> mp_typeof(const char c)
>> @@ -1462,6 +1513,88 @@ mp_decode_map(const char **data)
>> }
>> }
>>
>> +MP_IMPL uint32_t
>> +mp_sizeof_ext(uint32_t len)
>> +{
>> + if (len <= 16) return 2;
>> + if (len <= UINT8_MAX) return 3;
>> + if (len <= UINT16_MAX) return 4;
>> + else return 5;
>> +}
>> +
>> +MP_IMPL char *
>> +mp_encode_ext(char *data, uint8_t type, uint32_t len)
>> +{
>> + /*
>> + * zero-length should be encoded in EXT 8, because
>> + * fixext always assumes exact length (1, 2, 4, 8 and 16).
>> + */
>> + if (len <= 16 && len > 0) {
>> + data = mp_store_u8(data, mp_ext_hint[len-1]);
>> + } else if (len <= UINT8_MAX) {
>> + data = mp_store_u8(data, 0xc7);
>> + data = mp_store_u8(data, (uint8_t) len);
>> + } else if (len <= UINT16_MAX) {
>> + data = mp_store_u8(data, 0xc8);
>> + data = mp_store_u16(data, (uint16_t) len);
>> + } else {
>> + data = mp_store_u8(data, 0xc9);
>> + data = mp_store_u32(data, (uint32_t) len);
>> + }
>> + data = mp_store_u8(data, type);
>> + return data;
>> +}
>> +
>> +MP_IMPL ptrdiff_t
>> +mp_check_ext(const char *cur, const char *end)
>> +{
>> + assert(cur < end);
>> + assert(mp_typeof(*cur) == MP_EXT);
>> + uint8_t c = mp_load_u8(&cur);
>> + switch (c) {
>> + case 0xd4:
>> + case 0xd5:
>> + case 0xd6:
>> + case 0xd7:
>> + case 0xd8:
>> + return 1 - (end - cur);
>> + case 0xc7:
>> + case 0xc8:
>> + return c - 0xc4 - (end - cur);
>> + case 0xc9:
>> + return 6 - (end - cur);
>> + default:
>> + mp_unreachable();
>> + }
>> +}
>> +
>> +MP_IMPL uint32_t
>> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) {
>> + uint8_t c = mp_load_u8(data);
>> + switch (c) {
>> + case 0xd4:
>> + case 0xd5:
>> + case 0xd6:
>> + case 0xd7:
>> + case 0xd8:
>> + *len = 1u << (c - 0xd4);
>> + break;
>> + case 0xc7:
>> + *len = mp_load_u8(data);
>> + break;
>> + case 0xc8:
>> + *len = mp_load_u16(data);
>> + break;
>> + case 0xc9:
>> + *len = mp_load_u32(data);
>> + break;
>> + default:
>> + mp_unreachable();
>> + }
>> + *type = mp_load_u8(data);
>> + return *len;
>> +}
>> +
>> MP_IMPL uint32_t
>> mp_sizeof_uint(uint64_t num)
>> {
>> diff --git a/test/msgpuck.c b/test/msgpuck.c
>> index 3b31bfc..5ddaba7 100644
>> --- a/test/msgpuck.c
>> +++ b/test/msgpuck.c
>> @@ -789,9 +789,8 @@ test_mp_print()
>> d = mp_encode_double(d, 3.14);
>> d = mp_encode_uint(d, 100);
>> d = mp_encode_uint(d, 500);
>> - *d++ = 0xd4; /* let's pack smallest fixed ext */
>> - *d++ = 0;
>> - *d++ = 0;
>> + /* let's pack zero-length ext */
>> + d = mp_encode_ext(d, MP_EXT_DECIMAL, 0);
>> char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
>> d = mp_encode_bin(d, bin, sizeof(bin));
>> d = mp_encode_map(d, 0);
>> @@ -800,7 +799,7 @@ test_mp_print()
>> const char *expected =
>> "[-5, 42, \"kill bill\", [], "
>> "{\"bool true\": true, \"bool false\": false, \"null\": null, "
>> - "\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
>> + "\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, "
>> "\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
>> int esize = strlen(expected);
>>
>> --
>> 2.20.1 (Apple Git-117)
>>
>
> --
> Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
> http://tarantool.io - www.twitter.com/kostja_osipov
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [tarantool-patches] Re: [PATCH] Add MsgPack ext types handling.
2019-04-23 13:02 ` [tarantool-patches] " Serge Petrenko
@ 2019-04-23 13:44 ` Konstantin Osipov
2019-04-23 13:54 ` Vladislav Shpilevoy
0 siblings, 1 reply; 6+ messages in thread
From: Konstantin Osipov @ 2019-04-23 13:44 UTC (permalink / raw)
To: Serge Petrenko; +Cc: Vladimir Davydov, tarantool-patches
* Serge Petrenko <sergepetrenko@tarantool.org> [19/04/23 16:05]:
>
>
> > 23 апр. 2019 г., в 15:21, Konstantin Osipov <kostja@tarantool.org> написал(а):
> >
> > * Serge Petrenko <sergepetrenko@tarantool.org> [19/04/23 15:09]:
> >
> > I mentioned before that this would break all clients.
> >
> > It's high time to reconsider our in-memory storage format and make
> > it different from serialization format: we're seeing significant
> > performance penalty with serialization to and from msgpack in SQL,
> > and DECIMAL is a yet another blow on the single-format-approach:
> > our clients are simply not prepared to handle non-standard
> > extensions.
> >
> > Let's investigate what it takes to store decimal as an mp ext, and
> > serialize as mp string.
>
> Well, we can do this, but how can clients distinguish decimal and string then?
From result set metadata or, more generally, from schema.
--
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [tarantool-patches] Re: [PATCH] Add MsgPack ext types handling.
2019-04-23 13:44 ` Konstantin Osipov
@ 2019-04-23 13:54 ` Vladislav Shpilevoy
2019-04-23 14:20 ` Konstantin Osipov
0 siblings, 1 reply; 6+ messages in thread
From: Vladislav Shpilevoy @ 2019-04-23 13:54 UTC (permalink / raw)
To: tarantool-patches, Konstantin Osipov, Serge Petrenko; +Cc: Vladimir Davydov
On 23/04/2019 16:44, Konstantin Osipov wrote:
> * Serge Petrenko <sergepetrenko@tarantool.org> [19/04/23 16:05]:
>>
>>
>>> 23 апр. 2019 г., в 15:21, Konstantin Osipov <kostja@tarantool.org> написал(а):
>>>
>>> * Serge Petrenko <sergepetrenko@tarantool.org> [19/04/23 15:09]:
>>>
>>> I mentioned before that this would break all clients.
>>>
>>> It's high time to reconsider our in-memory storage format and make
>>> it different from serialization format: we're seeing significant
>>> performance penalty with serialization to and from msgpack in SQL,
>>> and DECIMAL is a yet another blow on the single-format-approach:
>>> our clients are simply not prepared to handle non-standard
>>> extensions.
>>>
>>> Let's investigate what it takes to store decimal as an mp ext, and
>>> serialize as mp string.
>>
>> Well, we can do this, but how can clients distinguish decimal and string then?
>
> From result set metadata or, more generally, from schema.
Then I will be able to violate format via insertion of strings into
decimal column.
>
>
>
> --
> Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
> http://tarantool.io - www.twitter.com/kostja_osipov
>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2019-04-23 14:20 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-23 11:37 [PATCH] Add MsgPack ext types handling Serge Petrenko
2019-04-23 12:21 ` [tarantool-patches] " Konstantin Osipov
2019-04-23 13:02 ` [tarantool-patches] " Serge Petrenko
2019-04-23 13:44 ` Konstantin Osipov
2019-04-23 13:54 ` Vladislav Shpilevoy
2019-04-23 14:20 ` Konstantin Osipov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox