Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: Serge Petrenko <sergepetrenko@tarantool.org>
Cc: tarantool-patches@freelists.org, georgy@tarantool.org,
	kostja@tarantool.org
Subject: Re: [tarantool-patches] [PATCH v2] Add MsgPack ext types handling.
Date: Wed, 29 May 2019 19:38:38 +0300	[thread overview]
Message-ID: <20190529163838.c52ackt2shbaotss@esperanza> (raw)
In-Reply-To: <20190521135749.45377-1-sergepetrenko@tarantool.org>

On Tue, May 21, 2019 at 04:57:49PM +0300, Serge Petrenko wrote:
> diff --git a/msgpuck.c b/msgpuck.c
> index d0ffb83..04aeb0f 100644
> --- a/msgpuck.c
> +++ b/msgpuck.c
> @@ -33,6 +33,17 @@
>  #define MP_LIBRARY 1
>  #include "msgpuck.h"
>  
> +const char *
> +mp_ext_type_str(enum mp_ext_type type)
> +{
> +	switch(type) {
> +	case MP_EXT_DECIMAL:
> +		return "decimal";

This isn't particularly useful. Unless we can print a decimal as
decimal here, I'd leave 'undefined' for now.

> +	default:
> +		return "undefined";
> +	}
> +}
> +
>  size_t
>  mp_vformat(char *data, size_t data_size, const char *format, va_list vl)
>  {
> @@ -298,9 +309,14 @@ next:										\
>  		PRINTF("%lg", mp_decode_double(&data));				\
>  		break;								\
>  	case MP_EXT:								\
> -		mp_next(&data);							\
> -		PRINTF("undefined");						\
> +	{									\
> +		uint8_t type;							\
> +		uint32_t len;							\
> +		len = mp_decode_ext(&data, &len, &type);			\
> +		PRINTF("%s", mp_ext_type_str(type));	    			\
> +		data += len;							\
>  		break;								\
> +	}									\
>  	default:								\
>  		mp_unreachable();						\
>  		return -1;							\
> diff --git a/msgpuck.h b/msgpuck.h
> index 6b29cd6..ec9e135 100644
> --- a/msgpuck.h
> +++ b/msgpuck.h
> @@ -382,6 +382,13 @@ enum mp_type {
>  	MP_EXT
>  };
>  
> +/**
> + * \brief MsgPack extension data types.
> + */
> +enum mp_ext_type {
> +	MP_EXT_DECIMAL = 0

Not sure, this should be defined here. After all, it's an application
defined type so its code should be defined in the application, not in
the library. OTOH, I'd expect to see timestamp here, which is defined in
the spec.

> +};
> +
>  /**
>   * \brief Determine MsgPack type by a first byte \a c of encoded data.
>   *
> @@ -532,6 +539,49 @@ mp_check_map(const char *cur, const char *end);
>  MP_PROTO uint32_t
>  mp_decode_map(const char **data);
>  
> +/**
> + * \brief calculate exact buffer size needed to store
> + * ext value of length \a len.
> + * \param len value length in bytes.
> + * \retval buffer size in bytes
> + */
> +MP_PROTO uint32_t
> +mp_sizeof_ext(uint32_t len);
> +
> +/**
> + * \brief Encode extension header with \a type and
> + * value length \a len.
> + * The value must be encoded after the header.
> + * \return \a data + \link mp_sizeof_ext() mp_sizeof_ext(size)\endlink
> + */
> +MP_PROTO char *
> +mp_encode_ext(char *data, uint8_t type, uint32_t len);
> +
> +/**
> + * \brief Check that \a cur buffer has enough bytes to decode an ext header.
> + * \param cur buffer
> + * \param end end of the buffer
> + * \retval 0 - buffer has enough bytes
> + * \retval > 0 - the numbeer of remaining bytes to read
> + * \pre cur < end
> + * \pre mp_typeof(*cur) == MP_EXT
> + */
> +MP_PROTO MP_PURE ptrdiff_t
> +mp_check_ext(const char *cur, const char *end);
> +
> +/**
> + * \brief Decode an extension header from MsgPack \a data.
> + *
> + * The extension type value must be decoded after the header.
> + * \param data - the pointer to a buffer.
> + * \param len -  decoded length of the following value.
> + * \param type - decoded type of the following value.
> + * \retval - the length of the following ext value.
> + * \post *data = *data + mp_sizeof_ext(length)
> + */
> +MP_PROTO uint32_t
> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type);
> +
>  /**
>   * \brief Calculate exact buffer size needed to store an integer \a num.
>   * Maximum return value is 9. For performance reasons you can preallocate
> @@ -1326,6 +1376,7 @@ mp_frame_advance(struct mp_frame *frame);
>  extern const enum mp_type mp_type_hint[];
>  extern const int8_t mp_parser_hint[];
>  extern const char *mp_char2escape[];
> +extern const uint8_t mp_ext_hint[];
>  
>  MP_IMPL MP_ALWAYSINLINE enum mp_type
>  mp_typeof(const char c)
> @@ -1462,6 +1513,79 @@ mp_decode_map(const char **data)
>  	}
>  }
>  
> +MP_IMPL uint32_t
> +mp_sizeof_ext(uint32_t len)
> +{
> +	if (len <= 16) return 2;
> +	if (len <= UINT8_MAX) return 3;
> +	if (len <= UINT16_MAX) return 4;
> +	else return 5;
> +}
> +
> +MP_IMPL char *
> +mp_encode_ext(char *data, uint8_t type, uint32_t len)

I'd expect it to take a pointer to the extension data, just like in
case of mp_encode_str/bin. For encoding only the header, I'd introduce
a separate function mp_encode_extl. Same is fair for decoding functions.

> +{
> +	/*
> +	 * Only use fixext when length is exactly 1, 2, 4, 8 or 16.
> +	 * Otherwise use ext 8 if length <= 255.
> +	 */
> +	if (len <= 16 && mp_ext_hint[len-1]) {

AFAICS this is inconsistent with mp_sizeof_ext: the latter assumes that
any ext data of length <= 16 is encoded using fixext.

> +		data = mp_store_u8(data, mp_ext_hint[len-1]);
> +	} else if (len <= UINT8_MAX) {
> +		data = mp_store_u8(data, 0xc7);
> +		data = mp_store_u8(data, (uint8_t) len);
> +	} else if (len <= UINT16_MAX) {
> +		data = mp_store_u8(data, 0xc8);
> +		data = mp_store_u16(data, (uint16_t) len);
> +	} else {
> +		data = mp_store_u8(data, 0xc9);
> +		data = mp_store_u32(data,len);
> +	}
> +	data = mp_store_u8(data, type);
> +	return data;
> +}
> +
> +MP_IMPL ptrdiff_t
> +mp_check_ext(const char *cur, const char *end)
> +{
> +	assert(cur < end);
> +	assert(mp_typeof(*cur) == MP_EXT);
> +	uint8_t c = mp_load_u8(&cur);
> +	if (c & 0xf0 == 0xd0) {
> +		return 1 - (end - cur);
> +	}
> +
> +	assert(c >= 0xc7 && c <= 0xc9);
> +	return 1 << (c - 0xc7) + 1 - (end - cur); /* 0xc7 -> 2, 0xc8 -> 3, 0xc9 ->5 */
> +}
> +
> +MP_IMPL uint32_t
> +mp_decode_ext(const char **data, uint32_t *len, uint8_t *type) {

For consistency with mp_encode_ext, type should go before len.

> +	uint8_t c = mp_load_u8(data);
> +	switch 	 (c) {
> +	case 0xd4:
> +	case 0xd5:
> +	case 0xd6:
> +	case 0xd7:
> +	case 0xd8:
> +		*len = 1u << (c - 0xd4);
> +		break;
> +	case 0xc7:
> +		*len = mp_load_u8(data);
> +		break;
> +	case 0xc8:
> +		*len = mp_load_u16(data);
> +		break;
> +	case 0xc9:
> +		*len = mp_load_u32(data);
> +		break;
> +	default:
> +		mp_unreachable();
> +	}
> +	*type = mp_load_u8(data);
> +	return *len;
> +}
> +
>  MP_IMPL uint32_t
>  mp_sizeof_uint(uint64_t num)
>  {
> diff --git a/test/msgpuck.c b/test/msgpuck.c
> index 3b31bfc..5ddaba7 100644
> --- a/test/msgpuck.c
> +++ b/test/msgpuck.c
> @@ -789,9 +789,8 @@ test_mp_print()
>  	d = mp_encode_double(d, 3.14);
>  	d = mp_encode_uint(d, 100);
>  	d = mp_encode_uint(d, 500);
> -	*d++ = 0xd4; /* let's pack smallest fixed ext */
> -	*d++ = 0;
> -	*d++ = 0;
> +	/* let's pack zero-length ext */
> +	d = mp_encode_ext(d, MP_EXT_DECIMAL, 0);

Please test every single function you add for all kinds of extension
encodings (fixext, ext).

>  	char bin[] = "\x12test\x34\b\t\n\"bla\\-bla\"\f\r";
>  	d = mp_encode_bin(d, bin, sizeof(bin));
>  	d = mp_encode_map(d, 0);
> @@ -800,7 +799,7 @@ test_mp_print()
>  	const char *expected =
>  		"[-5, 42, \"kill bill\", [], "
>  		"{\"bool true\": true, \"bool false\": false, \"null\": null, "
> -		"\"float\": 3.14, \"double\": 3.14, 100: 500}, undefined, "
> +		"\"float\": 3.14, \"double\": 3.14, 100: 500}, decimal, "
>  		"\"\\u0012test4\\b\\t\\n\\\"bla\\\\-bla\\\"\\f\\r\\u0000\", {}]";
>  	int esize = strlen(expected);
>  

  reply	other threads:[~2019-05-29 16:38 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-21 13:57 Serge Petrenko
2019-05-29 16:38 ` Vladimir Davydov [this message]
2019-06-03 13:15   ` Serge Petrenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190529163838.c52ackt2shbaotss@esperanza \
    --to=vdavydov.dev@gmail.com \
    --cc=georgy@tarantool.org \
    --cc=kostja@tarantool.org \
    --cc=sergepetrenko@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [tarantool-patches] [PATCH v2] Add MsgPack ext types handling.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox