[tarantool-patches] Re: [PATCH] Add MsgPack ext types handling.

Serge Petrenko sergepetrenko at tarantool.org
Tue Apr 23 16:02:24 MSK 2019



> 23 апр. 2019 г., в 15:21, Konstantin Osipov <kostja at tarantool.org> написал(а):
> 
> * Serge Petrenko <sergepetrenko at tarantool.org> [19/04/23 15:09]:
> 
> I mentioned before that this would break all clients.
> 
> It's high time to reconsider our in-memory storage format and make
> it different from serialization format: we're seeing significant
> performance penalty with serialization to and from msgpack in SQL,
> and DECIMAL is a yet another blow on the single-format-approach:
> our clients are simply not prepared to handle non-standard
> extensions. 
> 
> Let's investigate what it takes to store decimal as an mp ext, and
> serialize as mp string.

Well, we can do this, but how can clients distinguish decimal and string then?

> 
> 
>> Add the ability to encode/decode MsgPack ext types,
>> introduce the first extension type: MP_EXT_DECIMAL: it will be used to
>> store decimals in tarantool.
>> 
>> Needed-for tarantool/tarantool#692
>> ---
>> https://github.com/tarantool/msgpuck/tree/sp/add-extension-types
>> https://github.com/tarantool/tarantool/issues/692
>> 
>> hints.c        |  24 +++++++++
>> msgpuck.c      |  20 +++++++-
>> msgpuck.h      | 133 +++++++++++++++++++++++++++++++++++++++++++++++++
>> test/msgpuck.c |   7 ++-
>> 4 files changed, 178 insertions(+), 6 deletions(-)
>> 
>> diff --git a/hints.c b/hints.c
>> index be859e9..0ab4f4d 100644
>> --- a/hints.c
>> +++ b/hints.c
>> @@ -672,3 +672,27 @@ const char *mp_char2escape[128] = {
>> 	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
>> 	NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f"
>> };
>> +
>> +/*
>> + * This lookup table is used by mp_encode_ext() to
>> + * determine ext code (fixext 1, fixext 2, fixext 4, fixext 8,
>> + * fixext 16) to encode using size.
>> + */
>> +const uint8_t mp_ext_hint[16] = {
>> +	0xd4,  /*  1 */
>> +	0xd5,  /*  2 */
>> +	0xd6,  /*  3 */
>> +	0xd6,  /*  4 */
>> +	0xd7,  /*  5 */
>> +	0xd7,  /*  6 */
>> +	0xd7,  /*  7 */
>> +	0xd7,  /*  8 */
>> +	0xd8,  /*  9 */
>> +	0xd8,  /* 10 */
>> +	0xd8,  /* 11 */
>> +	0xd8,  /* 12 */
>> +	0xd8,  /* 13 */
>> +	0xd8,  /* 14 */
>> +	0xd8,  /* 15 */
>> +	0xd8   /* 16 */
>> +};
>> diff --git a/msgpuck.c b/msgpuck.c
>> index d0ffb83..04aeb0f 100644
>> --- a/msgpuck.c
>> +++ b/msgpuck.c
>> @@ -33,6 +33,17 @@
>> #define MP_LIBRARY 1
>> #include "msgpuck.h"
>> 
>> +const char *
>> +mp_ext_type_str(enum mp_ext_type type)
>> +{
>> +	switch(type) {
>> +	case MP_EXT_DECIMAL:
>> +		return "decimal";
>> +	default:
>> +		return "undefined";
>> +	}
>> +}
>> +
>> size_t
>> mp_vformat(char *data, size_t data_size, const char *format, va_list vl)
>> {
>> @@ -298,9 +309,14 @@ next:										\
>> 		PRINTF("%lg", mp_decode_double(&data));				\
>> 		break;								\
>> 	case MP_EXT:								\
>> -		mp_next(&data);							\
>> -		PRINTF("undefined");						\
>> +	{									\
>> +		uint8_t type;							\
>> +		uint32_t len;							\
>> +		len = mp_decode_ext(&data, &len, &type);			\
>> +		PRINTF("%s", mp_ext_type_str(type));	    			\
>> +		data += len;							\
>> 		break;								\
>> +	}									\
>> 	default:								\
>> 		mp_unreachable();						\
>> 		return -1;							\
>> diff --git a/msgpuck.h b/msgpuck.h
>> index 6b29cd6..0a7d230 100644
>> --- a/msgpuck.h
>> +++ b/msgpuck.h
>> @@ -382,6 +382,13 @@ enum mp_type {
>> 	MP_EXT
>> };
>> 
>> +/**
>> + * \brief MsgPack extension data types.
>> + */
>> +enum mp_ext_type {
>> +	MP_EXT_DECIMAL = 0
>> +};
>> +
>> /**
>>  * \brief Determine MsgPack type by a first byte \a c of encoded data.
>>  *
>> @@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end);
>> MP_PROTO uint32_t
>> mp_decode_map(const char **data);
>> 
>> +/**
>> + * \brief calculate exact buffer size needed to store
>> + * ext value of length \a len.
>> + * \param len value length in bytes.
>> + * \retval buffer size in bytes
>> + */
>> +MP_PROTO uint32_t
>> +mp_sizeof_ext(uint32_t len);
>> +
>> +/**
>> + * \brief Encode extension header with \a type and
>> + * value length \a len.
>> + * The value must be encoded after the header.
>> + * \return \a data + \link mp_sizeof_ext() mp_sizeof_ext(size)\endlink
>> + */
>> +MP_PROTO char *
>> +mp_encode_ext(char *data, uint8_t type, uint32_t len);
>> +
>> +/**
>> + * \brief Check that \a cur buffer has enough bytes to decode a map header.
>> + * \param cur buffer
>> + * \param end end of the buffer
>> + * \retval 0 - buffer has enough bytes
>> + * \retval > 0 - the numbeer of remaining bytes to read
>> + * \pre cur < end
>> + * \pre mp_typeof(*cur) == MP_EXT
>> + */
>> +MP_PROTO MP_PURE ptrdiff_t
>> +mp_check_ext(const char *cur, const char *end);
>> +
>> +/**
>> + * \brief Decode an extension header from MsgPack \a data.
>> + *
>> + * The extension type value must be decoded after the header.
>> + * \param data - the pointer to a buffer.
>> + * \param len -  decoded length of the following value.
>> + * \param type - decoded type of the following value.
>> + * \retval - the length of the following ext value.
>> + * \post *data = *data + mp_sizeof_ext(length)
>> + */
>> +MP_PROTO uint32_t
>> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type);
>> +
>> /**
>>  * \brief Calculate exact buffer size needed to store an integer \a num.
>>  * Maximum return value is 9. For performance reasons you can preallocate
>> @@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame);
>> extern const enum mp_type mp_type_hint[];
>> extern const int8_t mp_parser_hint[];
>> extern const char *mp_char2escape[];
>> +extern const uint8_t mp_ext_hint[];
>> 
>> MP_IMPL MP_ALWAYSINLINE enum mp_type
>> mp_typeof(const char c)
>> @@ -1462,6 +1513,88 @@ mp_decode_map(const char **data)
>> 	}
>> }
>> 
>> +MP_IMPL uint32_t
>> +mp_sizeof_ext(uint32_t len)
>> +{
>> +	if (len <= 16) return 2;
>> +	if (len <= UINT8_MAX) return 3;
>> +	if (len <= UINT16_MAX) return 4;
>> +	else return 5;
>> +}
>> +
>> +MP_IMPL char *
>> +mp_encode_ext(char *data, uint8_t type, uint32_t len)
>> +{
>> +	/*
>> +	 * zero-length should be encoded in EXT 8, because
>> +	 * fixext always assumes exact length (1, 2, 4, 8 and 16).
>> +	 */
>> +	if (len <= 16 && len > 0) {
>> +		data = mp_store_u8(data, mp_ext_hint[len-1]);
>> +	} else if (len <= UINT8_MAX) {
>> +		data = mp_store_u8(data, 0xc7);
>> +		data = mp_store_u8(data, (uint8_t) len);
>> +	} else if (len <= UINT16_MAX) {
>> +		data = mp_store_u8(data, 0xc8);
>> +		data = mp_store_u16(data, (uint16_t) len);
>> +	} else {
>> +		data = mp_store_u8(data, 0xc9);
>> +		data = mp_store_u32(data, (uint32_t) len);
>> +	}
>> +	data = mp_store_u8(data, type);
>> +	return data;
>> +}
>> +
>> +MP_IMPL ptrdiff_t
>> +mp_check_ext(const char *cur, const char *end)
>> +{
>> +	assert(cur < end);
>> +	assert(mp_typeof(*cur) == MP_EXT);
>> +	uint8_t c = mp_load_u8(&cur);
>> +	switch (c) {
>> +	case 0xd4:
>> +	case 0xd5:
>> +	case 0xd6:
>> +	case 0xd7:
>> +	case 0xd8:
>> +	    return 1 - (end - cur);
>> +	case 0xc7:
>> +	case 0xc8:
>> +	    return c - 0xc4 - (end - cur);
>> +	case 0xc9:
>> +	    return 6 - (end - cur);
>> +	default:
>> +		mp_unreachable();
>> +	}
>> +}
>> +
>> +MP_IMPL uint32_t
>> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) {
>> +	uint8_t c = mp_load_u8(data);
>> +	switch 	 (c) {
>> +	case 0xd4:
>> +	case 0xd5:
>> +	case 0xd6:
>> +	case 0xd7:
>> +	case 0xd8:
>> +		*len = 1u << (c - 0xd4);
>> +		break;
>> +	case 0xc7:
>> +		*len = mp_load_u8(data);
>> +		break;
>> +	case 0xc8:
>> +		*len = mp_load_u16(data);
>> +		break;
>> +	case 0xc9:
>> +		*len = mp_load_u32(data);
>> +		break;
>> +	default:
>> +		mp_unreachable();
>> +	}
>> +	*type = mp_load_u8(data);
>> +	return *len;
>> +}
>> +
>> MP_IMPL uint32_t
>> mp_sizeof_uint(uint64_t num)
>> {
>> diff --git a/test/msgpuck.c b/test/msgpuck.c
>> index 3b31bfc..5ddaba7 100644
>> --- a/test/msgpuck.c
>> +++ b/test/msgpuck.c
>> @@ -789,9 +789,8 @@ test_mp_print()
>> 	d = mp_encode_double(d, 3.14);
>> 	d = mp_encode_uint(d, 100);
>> 	d = mp_encode_uint(d, 500);
>> -	*d++ = 0xd4; /* let's pack smallest fixed ext */
>> -	*d++ = 0;
>> -	*d++ = 0;
>> +	/* let's pack zero-length ext */
>> +	d = mp_encode_ext(d, MP_EXT_DECIMAL, 0);
>> 	char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
>> 	d = mp_encode_bin(d, bin, sizeof(bin));
>> 	d = mp_encode_map(d, 0);
>> @@ -800,7 +799,7 @@ test_mp_print()
>> 	const char *expected =
>> 		"[-5, 42, \"kill bill\", [], "
>> 		"{\"bool true\": true, \"bool false\": false, \"null\": null, "
>> -		"\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
>> +		"\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, "
>> 		"\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
>> 	int esize = strlen(expected);
>> 
>> -- 
>> 2.20.1 (Apple Git-117)
>> 
> 
> -- 
> Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
> http://tarantool.io - www.twitter.com/kostja_osipov
> 




More information about the Tarantool-patches mailing list