Tarantool development patches archive
 help / color / mirror / Atom feed
* [tarantool-patches] [PATCH v2] Add MsgPack ext types handling.
@ 2019-05-21 13:57 Serge Petrenko
  2019-05-29 16:38 ` Vladimir Davydov
  0 siblings, 1 reply; 3+ messages in thread
From: Serge Petrenko @ 2019-05-21 13:57 UTC (permalink / raw)
  To: tarantool-patches; +Cc: georgy, kostja, Serge Petrenko

Add the ability to encode/decode MsgPack ext types,
introduce the first extension type: MP_EXT_DECIMAL: it will be used to
store decimals in tarantool.

Needed-for tarantool/tarantool#692
---
https://github.com/tarantool/msgpuck/tree/sp/add-extension-types
https://github.com/tarantool/tarantool/issues/692

Changes in v2:
  - fixed and simplified mp_check_ext.
    It was wrong by one byte.
  - only use fixext types when length fits
    exactly (is 1, 2, 4, 8 or 16)
    Otherwise use ext 8

 hints.c        |  24 ++++++++++
 msgpuck.c      |  20 +++++++-
 msgpuck.h      | 124 +++++++++++++++++++++++++++++++++++++++++++++++++
 test/msgpuck.c |   7 ++-
 4 files changed, 169 insertions(+), 6 deletions(-)

diff --git a/hints.c b/hints.c
index be859e9..d9b3fe7 100644
--- a/hints.c
+++ b/hints.c
@@ -672,3 +672,27 @@ const char *mp_char2escape[128] = {
 	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 	NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f"
 };
+
+/*
+ * This lookup table is used by mp_encode_ext() to
+ * determine ext code (fixext 1, fixext 2, fixext 4, fixext 8,
+ * fixext 16) to encode using size.
+ */
+const uint8_t mp_ext_hint[16] = {
+	0xd4,  /*  1 */
+	0xd5,  /*  2 */
+	   0,  /*  3 */
+	0xd6,  /*  4 */
+	   0,  /*  5 */
+	   0,  /*  6 */
+	   0,  /*  7 */
+	0xd7,  /*  8 */
+	   0,  /*  9 */
+	   0,  /* 10 */
+	   0,  /* 11 */
+	   0,  /* 12 */
+	   0,  /* 13 */
+	   0,  /* 14 */
+	   0,  /* 15 */
+	0xd8   /* 16 */
+};
diff --git a/msgpuck.c b/msgpuck.c
index d0ffb83..04aeb0f 100644
--- a/msgpuck.c
+++ b/msgpuck.c
@@ -33,6 +33,17 @@
 #define MP_LIBRARY 1
 #include "msgpuck.h"
 
+const char *
+mp_ext_type_str(enum mp_ext_type type)
+{
+	switch(type) {
+	case MP_EXT_DECIMAL:
+		return "decimal";
+	default:
+		return "undefined";
+	}
+}
+
 size_t
 mp_vformat(char *data, size_t data_size, const char *format, va_list vl)
 {
@@ -298,9 +309,14 @@ next:										\
 		PRINTF("%lg", mp_decode_double(&data));				\
 		break;								\
 	case MP_EXT:								\
-		mp_next(&data);							\
-		PRINTF("undefined");						\
+	{									\
+		uint8_t type;							\
+		uint32_t len;							\
+		len = mp_decode_ext(&data, &len, &type);			\
+		PRINTF("%s", mp_ext_type_str(type));	    			\
+		data += len;							\
 		break;								\
+	}									\
 	default:								\
 		mp_unreachable();						\
 		return -1;							\
diff --git a/msgpuck.h b/msgpuck.h
index 6b29cd6..ec9e135 100644
--- a/msgpuck.h
+++ b/msgpuck.h
@@ -382,6 +382,13 @@ enum mp_type {
 	MP_EXT
 };
 
+/**
+ * \brief MsgPack extension data types.
+ */
+enum mp_ext_type {
+	MP_EXT_DECIMAL = 0
+};
+
 /**
  * \brief Determine MsgPack type by a first byte \a c of encoded data.
  *
@@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end);
 MP_PROTO uint32_t
 mp_decode_map(const char **data);
 
+/**
+ * \brief calculate exact buffer size needed to store
+ * ext value of length \a len.
+ * \param len value length in bytes.
+ * \retval buffer size in bytes
+ */
+MP_PROTO uint32_t
+mp_sizeof_ext(uint32_t len);
+
+/**
+ * \brief Encode extension header with \a type and
+ * value length \a len.
+ * The value must be encoded after the header.
+ * \return \a data + \link mp_sizeof_ext() mp_sizeof_ext(size)\endlink
+ */
+MP_PROTO char *
+mp_encode_ext(char *data, uint8_t type, uint32_t len);
+
+/**
+ * \brief Check that \a cur buffer has enough bytes to decode an ext header.
+ * \param cur buffer
+ * \param end end of the buffer
+ * \retval 0 - buffer has enough bytes
+ * \retval > 0 - the numbeer of remaining bytes to read
+ * \pre cur < end
+ * \pre mp_typeof(*cur) == MP_EXT
+ */
+MP_PROTO MP_PURE ptrdiff_t
+mp_check_ext(const char *cur, const char *end);
+
+/**
+ * \brief Decode an extension header from MsgPack \a data.
+ *
+ * The extension type value must be decoded after the header.
+ * \param data - the pointer to a buffer.
+ * \param len -  decoded length of the following value.
+ * \param type - decoded type of the following value.
+ * \retval - the length of the following ext value.
+ * \post *data = *data + mp_sizeof_ext(length)
+ */
+MP_PROTO uint32_t
+mp_decode_ext(const char **data, uint32_t *len, uint8_t *type);
+
 /**
  * \brief Calculate exact buffer size needed to store an integer \a num.
  * Maximum return value is 9. For performance reasons you can preallocate
@@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame);
 extern const enum mp_type mp_type_hint[];
 extern const int8_t mp_parser_hint[];
 extern const char *mp_char2escape[];
+extern const uint8_t mp_ext_hint[];
 
 MP_IMPL MP_ALWAYSINLINE enum mp_type
 mp_typeof(const char c)
@@ -1462,6 +1513,79 @@ mp_decode_map(const char **data)
 	}
 }
 
+MP_IMPL uint32_t
+mp_sizeof_ext(uint32_t len)
+{
+	if (len <= 16) return 2;
+	if (len <= UINT8_MAX) return 3;
+	if (len <= UINT16_MAX) return 4;
+	else return 5;
+}
+
+MP_IMPL char *
+mp_encode_ext(char *data, uint8_t type, uint32_t len)
+{
+	/*
+	 * Only use fixext when length is exactly 1, 2, 4, 8 or 16.
+	 * Otherwise use ext 8 if length <= 255.
+	 */
+	if (len <= 16 && mp_ext_hint[len-1]) {
+		data = mp_store_u8(data, mp_ext_hint[len-1]);
+	} else if (len <= UINT8_MAX) {
+		data = mp_store_u8(data, 0xc7);
+		data = mp_store_u8(data, (uint8_t) len);
+	} else if (len <= UINT16_MAX) {
+		data = mp_store_u8(data, 0xc8);
+		data = mp_store_u16(data, (uint16_t) len);
+	} else {
+		data = mp_store_u8(data, 0xc9);
+		data = mp_store_u32(data,len);
+	}
+	data = mp_store_u8(data, type);
+	return data;
+}
+
+MP_IMPL ptrdiff_t
+mp_check_ext(const char *cur, const char *end)
+{
+	assert(cur < end);
+	assert(mp_typeof(*cur) == MP_EXT);
+	uint8_t c = mp_load_u8(&cur);
+	if (c & 0xf0 == 0xd0) {
+		return 1 - (end - cur);
+	}
+
+	assert(c >= 0xc7 && c <= 0xc9);
+	return 1 << (c - 0xc7) + 1 - (end - cur); /* 0xc7 -> 2, 0xc8 -> 3, 0xc9 ->5 */
+}
+
+MP_IMPL uint32_t
+mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) {
+	uint8_t c = mp_load_u8(data);
+	switch 	 (c) {
+	case 0xd4:
+	case 0xd5:
+	case 0xd6:
+	case 0xd7:
+	case 0xd8:
+		*len = 1u << (c - 0xd4);
+		break;
+	case 0xc7:
+		*len = mp_load_u8(data);
+		break;
+	case 0xc8:
+		*len = mp_load_u16(data);
+		break;
+	case 0xc9:
+		*len = mp_load_u32(data);
+		break;
+	default:
+		mp_unreachable();
+	}
+	*type = mp_load_u8(data);
+	return *len;
+}
+
 MP_IMPL uint32_t
 mp_sizeof_uint(uint64_t num)
 {
diff --git a/test/msgpuck.c b/test/msgpuck.c
index 3b31bfc..5ddaba7 100644
--- a/test/msgpuck.c
+++ b/test/msgpuck.c
@@ -789,9 +789,8 @@ test_mp_print()
 	d = mp_encode_double(d, 3.14);
 	d = mp_encode_uint(d, 100);
 	d = mp_encode_uint(d, 500);
-	*d++ = 0xd4; /* let's pack smallest fixed ext */
-	*d++ = 0;
-	*d++ = 0;
+	/* let's pack zero-length ext */
+	d = mp_encode_ext(d, MP_EXT_DECIMAL, 0);
 	char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
 	d = mp_encode_bin(d, bin, sizeof(bin));
 	d = mp_encode_map(d, 0);
@@ -800,7 +799,7 @@ test_mp_print()
 	const char *expected =
 		"[-5, 42, \"kill bill\", [], "
 		"{\"bool true\": true, \"bool false\": false, \"null\": null, "
-		"\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
+		"\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, "
 		"\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
 	int esize = strlen(expected);
 
-- 
2.20.1 (Apple Git-117)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [tarantool-patches] [PATCH v2] Add MsgPack ext types handling.
  2019-05-21 13:57 [tarantool-patches] [PATCH v2] Add MsgPack ext types handling Serge Petrenko
@ 2019-05-29 16:38 ` Vladimir Davydov
  2019-06-03 13:15   ` Serge Petrenko
  0 siblings, 1 reply; 3+ messages in thread
From: Vladimir Davydov @ 2019-05-29 16:38 UTC (permalink / raw)
  To: Serge Petrenko; +Cc: tarantool-patches, georgy, kostja

On Tue, May 21, 2019 at 04:57:49PM +0300, Serge Petrenko wrote:
> diff --git a/msgpuck.c b/msgpuck.c
> index d0ffb83..04aeb0f 100644
> --- a/msgpuck.c
> +++ b/msgpuck.c
> @@ -33,6 +33,17 @@
>  #define MP_LIBRARY 1
>  #include "msgpuck.h"
>  
> +const char *
> +mp_ext_type_str(enum mp_ext_type type)
> +{
> +	switch(type) {
> +	case MP_EXT_DECIMAL:
> +		return "decimal";

This isn't particularly useful. Unless we can print a decimal as
decimal here, I'd leave 'undefined' for now.

> +	default:
> +		return "undefined";
> +	}
> +}
> +
>  size_t
>  mp_vformat(char *data, size_t data_size, const char *format, va_list vl)
>  {
> @@ -298,9 +309,14 @@ next:										\
>  		PRINTF("%lg", mp_decode_double(&data));				\
>  		break;								\
>  	case MP_EXT:								\
> -		mp_next(&data);							\
> -		PRINTF("undefined");						\
> +	{									\
> +		uint8_t type;							\
> +		uint32_t len;							\
> +		len = mp_decode_ext(&data, &len, &type);			\
> +		PRINTF("%s", mp_ext_type_str(type));	    			\
> +		data += len;							\
>  		break;								\
> +	}									\
>  	default:								\
>  		mp_unreachable();						\
>  		return -1;							\
> diff --git a/msgpuck.h b/msgpuck.h
> index 6b29cd6..ec9e135 100644
> --- a/msgpuck.h
> +++ b/msgpuck.h
> @@ -382,6 +382,13 @@ enum mp_type {
>  	MP_EXT
>  };
>  
> +/**
> + * \brief MsgPack extension data types.
> + */
> +enum mp_ext_type {
> +	MP_EXT_DECIMAL = 0

Not sure, this should be defined here. After all, it's an application
defined type so its code should be defined in the application, not in
the library. OTOH, I'd expect to see timestamp here, which is defined in
the spec.

> +};
> +
>  /**
>   * \brief Determine MsgPack type by a first byte \a c of encoded data.
>   *
> @@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end);
>  MP_PROTO uint32_t
>  mp_decode_map(const char **data);
>  
> +/**
> + * \brief calculate exact buffer size needed to store
> + * ext value of length \a len.
> + * \param len value length in bytes.
> + * \retval buffer size in bytes
> + */
> +MP_PROTO uint32_t
> +mp_sizeof_ext(uint32_t len);
> +
> +/**
> + * \brief Encode extension header with \a type and
> + * value length \a len.
> + * The value must be encoded after the header.
> + * \return \a data + \link mp_sizeof_ext() mp_sizeof_ext(size)\endlink
> + */
> +MP_PROTO char *
> +mp_encode_ext(char *data, uint8_t type, uint32_t len);
> +
> +/**
> + * \brief Check that \a cur buffer has enough bytes to decode an ext header.
> + * \param cur buffer
> + * \param end end of the buffer
> + * \retval 0 - buffer has enough bytes
> + * \retval > 0 - the numbeer of remaining bytes to read
> + * \pre cur < end
> + * \pre mp_typeof(*cur) == MP_EXT
> + */
> +MP_PROTO MP_PURE ptrdiff_t
> +mp_check_ext(const char *cur, const char *end);
> +
> +/**
> + * \brief Decode an extension header from MsgPack \a data.
> + *
> + * The extension type value must be decoded after the header.
> + * \param data - the pointer to a buffer.
> + * \param len -  decoded length of the following value.
> + * \param type - decoded type of the following value.
> + * \retval - the length of the following ext value.
> + * \post *data = *data + mp_sizeof_ext(length)
> + */
> +MP_PROTO uint32_t
> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type);
> +
>  /**
>   * \brief Calculate exact buffer size needed to store an integer \a num.
>   * Maximum return value is 9. For performance reasons you can preallocate
> @@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame);
>  extern const enum mp_type mp_type_hint[];
>  extern const int8_t mp_parser_hint[];
>  extern const char *mp_char2escape[];
> +extern const uint8_t mp_ext_hint[];
>  
>  MP_IMPL MP_ALWAYSINLINE enum mp_type
>  mp_typeof(const char c)
> @@ -1462,6 +1513,79 @@ mp_decode_map(const char **data)
>  	}
>  }
>  
> +MP_IMPL uint32_t
> +mp_sizeof_ext(uint32_t len)
> +{
> +	if (len <= 16) return 2;
> +	if (len <= UINT8_MAX) return 3;
> +	if (len <= UINT16_MAX) return 4;
> +	else return 5;
> +}
> +
> +MP_IMPL char *
> +mp_encode_ext(char *data, uint8_t type, uint32_t len)

I'd expect it to take a pointer to the extension data, just like in
case of mp_encode_str/bin. For encoding only the header, I'd introduce
a separate function mp_encode_extl. Same is fair for decoding functions.

> +{
> +	/*
> +	 * Only use fixext when length is exactly 1, 2, 4, 8 or 16.
> +	 * Otherwise use ext 8 if length <= 255.
> +	 */
> +	if (len <= 16 && mp_ext_hint[len-1]) {

AFAICS this is inconsistent with mp_sizeof_ext: the latter assumes that
any ext data of length <= 16 is encoded using fixext.

> +		data = mp_store_u8(data, mp_ext_hint[len-1]);
> +	} else if (len <= UINT8_MAX) {
> +		data = mp_store_u8(data, 0xc7);
> +		data = mp_store_u8(data, (uint8_t) len);
> +	} else if (len <= UINT16_MAX) {
> +		data = mp_store_u8(data, 0xc8);
> +		data = mp_store_u16(data, (uint16_t) len);
> +	} else {
> +		data = mp_store_u8(data, 0xc9);
> +		data = mp_store_u32(data,len);
> +	}
> +	data = mp_store_u8(data, type);
> +	return data;
> +}
> +
> +MP_IMPL ptrdiff_t
> +mp_check_ext(const char *cur, const char *end)
> +{
> +	assert(cur < end);
> +	assert(mp_typeof(*cur) == MP_EXT);
> +	uint8_t c = mp_load_u8(&cur);
> +	if (c & 0xf0 == 0xd0) {
> +		return 1 - (end - cur);
> +	}
> +
> +	assert(c >= 0xc7 && c <= 0xc9);
> +	return 1 << (c - 0xc7) + 1 - (end - cur); /* 0xc7 -> 2, 0xc8 -> 3, 0xc9 ->5 */
> +}
> +
> +MP_IMPL uint32_t
> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) {

For consistency with mp_encode_ext, type should go before len.

> +	uint8_t c = mp_load_u8(data);
> +	switch 	 (c) {
> +	case 0xd4:
> +	case 0xd5:
> +	case 0xd6:
> +	case 0xd7:
> +	case 0xd8:
> +		*len = 1u << (c - 0xd4);
> +		break;
> +	case 0xc7:
> +		*len = mp_load_u8(data);
> +		break;
> +	case 0xc8:
> +		*len = mp_load_u16(data);
> +		break;
> +	case 0xc9:
> +		*len = mp_load_u32(data);
> +		break;
> +	default:
> +		mp_unreachable();
> +	}
> +	*type = mp_load_u8(data);
> +	return *len;
> +}
> +
>  MP_IMPL uint32_t
>  mp_sizeof_uint(uint64_t num)
>  {
> diff --git a/test/msgpuck.c b/test/msgpuck.c
> index 3b31bfc..5ddaba7 100644
> --- a/test/msgpuck.c
> +++ b/test/msgpuck.c
> @@ -789,9 +789,8 @@ test_mp_print()
>  	d = mp_encode_double(d, 3.14);
>  	d = mp_encode_uint(d, 100);
>  	d = mp_encode_uint(d, 500);
> -	*d++ = 0xd4; /* let's pack smallest fixed ext */
> -	*d++ = 0;
> -	*d++ = 0;
> +	/* let's pack zero-length ext */
> +	d = mp_encode_ext(d, MP_EXT_DECIMAL, 0);

Please test every single function you add for all kinds of extension
encodings (fixext, ext).

>  	char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
>  	d = mp_encode_bin(d, bin, sizeof(bin));
>  	d = mp_encode_map(d, 0);
> @@ -800,7 +799,7 @@ test_mp_print()
>  	const char *expected =
>  		"[-5, 42, \"kill bill\", [], "
>  		"{\"bool true\": true, \"bool false\": false, \"null\": null, "
> -		"\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
> +		"\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, "
>  		"\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
>  	int esize = strlen(expected);
>  

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [tarantool-patches] [PATCH v2] Add MsgPack ext types handling.
  2019-05-29 16:38 ` Vladimir Davydov
@ 2019-06-03 13:15   ` Serge Petrenko
  0 siblings, 0 replies; 3+ messages in thread
From: Serge Petrenko @ 2019-06-03 13:15 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: tarantool-patches, Georgy Kirichenko, kostja



> 29 мая 2019 г., в 19:38, Vladimir Davydov <vdavydov.dev@gmail.com> написал(а):
> 
> On Tue, May 21, 2019 at 04:57:49PM +0300, Serge Petrenko wrote:
>> diff --git a/msgpuck.c b/msgpuck.c
>> index d0ffb83..04aeb0f 100644
>> --- a/msgpuck.c
>> +++ b/msgpuck.c
>> @@ -33,6 +33,17 @@
>> #define MP_LIBRARY 1
>> #include "msgpuck.h"
>> 
>> +const char *
>> +mp_ext_type_str(enum mp_ext_type type)
>> +{
>> +	switch(type) {
>> +	case MP_EXT_DECIMAL:
>> +		return "decimal";
> 
> This isn't particularly useful. Unless we can print a decimal as
> decimal here, I'd leave 'undefined' for now.
> 
>> +	default:
>> +		return "undefined";
>> +	}
>> +}
>> +
>> size_t
>> mp_vformat(char *data, size_t data_size, const char *format, va_list vl)
>> {
>> @@ -298,9 +309,14 @@ next:										\
>> 		PRINTF("%lg", mp_decode_double(&data));				\
>> 		break;								\
>> 	case MP_EXT:								\
>> -		mp_next(&data);							\
>> -		PRINTF("undefined");						\
>> +	{									\
>> +		uint8_t type;							\
>> +		uint32_t len;							\
>> +		len = mp_decode_ext(&data, &len, &type);			\
>> +		PRINTF("%s", mp_ext_type_str(type));	    			\
>> +		data += len;							\
>> 		break;								\
>> +	}									\
>> 	default:								\
>> 		mp_unreachable();						\
>> 		return -1;							\
>> diff --git a/msgpuck.h b/msgpuck.h
>> index 6b29cd6..ec9e135 100644
>> --- a/msgpuck.h
>> +++ b/msgpuck.h
>> @@ -382,6 +382,13 @@ enum mp_type {
>> 	MP_EXT
>> };
>> 
>> +/**
>> + * \brief MsgPack extension data types.
>> + */
>> +enum mp_ext_type {
>> +	MP_EXT_DECIMAL = 0
> 
> Not sure, this should be defined here. After all, it's an application
> defined type so its code should be defined in the application, not in
> the library. OTOH, I'd expect to see timestamp here, which is defined in
> the spec.
> 
>> +};
>> +
>> /**
>>  * \brief Determine MsgPack type by a first byte \a c of encoded data.
>>  *
>> @@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end);
>> MP_PROTO uint32_t
>> mp_decode_map(const char **data);
>> 
>> +/**
>> + * \brief calculate exact buffer size needed to store
>> + * ext value of length \a len.
>> + * \param len value length in bytes.
>> + * \retval buffer size in bytes
>> + */
>> +MP_PROTO uint32_t
>> +mp_sizeof_ext(uint32_t len);
>> +
>> +/**
>> + * \brief Encode extension header with \a type and
>> + * value length \a len.
>> + * The value must be encoded after the header.
>> + * \return \a data + \link mp_sizeof_ext() mp_sizeof_ext(size)\endlink
>> + */
>> +MP_PROTO char *
>> +mp_encode_ext(char *data, uint8_t type, uint32_t len);
>> +
>> +/**
>> + * \brief Check that \a cur buffer has enough bytes to decode an ext header.
>> + * \param cur buffer
>> + * \param end end of the buffer
>> + * \retval 0 - buffer has enough bytes
>> + * \retval > 0 - the numbeer of remaining bytes to read
>> + * \pre cur < end
>> + * \pre mp_typeof(*cur) == MP_EXT
>> + */
>> +MP_PROTO MP_PURE ptrdiff_t
>> +mp_check_ext(const char *cur, const char *end);
>> +
>> +/**
>> + * \brief Decode an extension header from MsgPack \a data.
>> + *
>> + * The extension type value must be decoded after the header.
>> + * \param data - the pointer to a buffer.
>> + * \param len -  decoded length of the following value.
>> + * \param type - decoded type of the following value.
>> + * \retval - the length of the following ext value.
>> + * \post *data = *data + mp_sizeof_ext(length)
>> + */
>> +MP_PROTO uint32_t
>> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type);
>> +
>> /**
>>  * \brief Calculate exact buffer size needed to store an integer \a num.
>>  * Maximum return value is 9. For performance reasons you can preallocate
>> @@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame);
>> extern const enum mp_type mp_type_hint[];
>> extern const int8_t mp_parser_hint[];
>> extern const char *mp_char2escape[];
>> +extern const uint8_t mp_ext_hint[];
>> 
>> MP_IMPL MP_ALWAYSINLINE enum mp_type
>> mp_typeof(const char c)
>> @@ -1462,6 +1513,79 @@ mp_decode_map(const char **data)
>> 	}
>> }
>> 
>> +MP_IMPL uint32_t
>> +mp_sizeof_ext(uint32_t len)
>> +{
>> +	if (len <= 16) return 2;
>> +	if (len <= UINT8_MAX) return 3;
>> +	if (len <= UINT16_MAX) return 4;
>> +	else return 5;
>> +}
>> +
>> +MP_IMPL char *
>> +mp_encode_ext(char *data, uint8_t type, uint32_t len)
> 
> I'd expect it to take a pointer to the extension data, just like in
> case of mp_encode_str/bin. For encoding only the header, I'd introduce
> a separate function mp_encode_extl. Same is fair for decoding functions.
> 
>> +{
>> +	/*
>> +	 * Only use fixext when length is exactly 1, 2, 4, 8 or 16.
>> +	 * Otherwise use ext 8 if length <= 255.
>> +	 */
>> +	if (len <= 16 && mp_ext_hint[len-1]) {
> 
> AFAICS this is inconsistent with mp_sizeof_ext: the latter assumes that
> any ext data of length <= 16 is encoded using fixext.
> 
>> +		data = mp_store_u8(data, mp_ext_hint[len-1]);
>> +	} else if (len <= UINT8_MAX) {
>> +		data = mp_store_u8(data, 0xc7);
>> +		data = mp_store_u8(data, (uint8_t) len);
>> +	} else if (len <= UINT16_MAX) {
>> +		data = mp_store_u8(data, 0xc8);
>> +		data = mp_store_u16(data, (uint16_t) len);
>> +	} else {
>> +		data = mp_store_u8(data, 0xc9);
>> +		data = mp_store_u32(data,len);
>> +	}
>> +	data = mp_store_u8(data, type);
>> +	return data;
>> +}
>> +
>> +MP_IMPL ptrdiff_t
>> +mp_check_ext(const char *cur, const char *end)
>> +{
>> +	assert(cur < end);
>> +	assert(mp_typeof(*cur) == MP_EXT);
>> +	uint8_t c = mp_load_u8(&cur);
>> +	if (c & 0xf0 == 0xd0) {
>> +		return 1 - (end - cur);
>> +	}
>> +
>> +	assert(c >= 0xc7 && c <= 0xc9);
>> +	return 1 << (c - 0xc7) + 1 - (end - cur); /* 0xc7 -> 2, 0xc8 -> 3, 0xc9 ->5 */
>> +}
>> +
>> +MP_IMPL uint32_t
>> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) {
> 
> For consistency with mp_encode_ext, type should go before len.
> 
>> +	uint8_t c = mp_load_u8(data);
>> +	switch 	 (c) {
>> +	case 0xd4:
>> +	case 0xd5:
>> +	case 0xd6:
>> +	case 0xd7:
>> +	case 0xd8:
>> +		*len = 1u << (c - 0xd4);
>> +		break;
>> +	case 0xc7:
>> +		*len = mp_load_u8(data);
>> +		break;
>> +	case 0xc8:
>> +		*len = mp_load_u16(data);
>> +		break;
>> +	case 0xc9:
>> +		*len = mp_load_u32(data);
>> +		break;
>> +	default:
>> +		mp_unreachable();
>> +	}
>> +	*type = mp_load_u8(data);
>> +	return *len;
>> +}
>> +
>> MP_IMPL uint32_t
>> mp_sizeof_uint(uint64_t num)
>> {
>> diff --git a/test/msgpuck.c b/test/msgpuck.c
>> index 3b31bfc..5ddaba7 100644
>> --- a/test/msgpuck.c
>> +++ b/test/msgpuck.c
>> @@ -789,9 +789,8 @@ test_mp_print()
>> 	d = mp_encode_double(d, 3.14);
>> 	d = mp_encode_uint(d, 100);
>> 	d = mp_encode_uint(d, 500);
>> -	*d++ = 0xd4; /* let's pack smallest fixed ext */
>> -	*d++ = 0;
>> -	*d++ = 0;
>> +	/* let's pack zero-length ext */
>> +	d = mp_encode_ext(d, MP_EXT_DECIMAL, 0);
> 
> Please test every single function you add for all kinds of extension
> encodings (fixext, ext).
> 
>> 	char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
>> 	d = mp_encode_bin(d, bin, sizeof(bin));
>> 	d = mp_encode_map(d, 0);
>> @@ -800,7 +799,7 @@ test_mp_print()
>> 	const char *expected =
>> 		"[-5, 42, \"kill bill\", [], "
>> 		"{\"bool true\": true, \"bool false\": false, \"null\": null, "
>> -		"\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
>> +		"\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, "
>> 		"\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
>> 	int esize = strlen(expected);
>> 

Thank you for review! I’ve addressed your comments in v3, please check it out.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-06-03 13:15 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-21 13:57 [tarantool-patches] [PATCH v2] Add MsgPack ext types handling Serge Petrenko
2019-05-29 16:38 ` Vladimir Davydov
2019-06-03 13:15   ` Serge Petrenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox