[Tarantool-patches] [PATCH v5 4/8] box, datetime: messagepack support for datetime
Safin Timur
tsafin at tarantool.org
Wed Aug 18 17:27:38 MSK 2021
On 17.08.2021 21:36, Vladimir Davydov via Tarantool-patches wrote:
> On Mon, Aug 16, 2021 at 02:59:38AM +0300, Timur Safin via
> Tarantool-patches wrote:
>> Serialize datetime_t as newly introduced MP_EXT type.
>> It saves 1 required integer field and upto 2 optional
>> unsigned fields in very compact fashion.
>> - secs is required field;
>> - but nsec, offset are both optional;
>>
>> * json, yaml serialization formats, lua output mode
>> supported;
>> * exported symbols for datetime messagepack size calculations
>> so they are available for usage on Lua side.
>>
>> Part of #5941
>> Part of #5946
>> ---
>> extra/exports | 5 +-
>> src/box/field_def.c | 35 +++---
>> src/box/field_def.h | 1 +
>> src/box/lua/serialize_lua.c | 7 +-
>> src/box/msgpack.c | 7 +-
>> src/box/tuple_compare.cc | 20 ++++
>> src/lib/core/CMakeLists.txt | 4 +-
>> src/lib/core/datetime.c | 9 ++
>> src/lib/core/datetime.h | 11 ++
>> src/lib/core/mp_datetime.c | 189 ++++++++++++++++++++++++++++++
>> src/lib/core/mp_datetime.h | 89 ++++++++++++++
>> src/lib/core/mp_extension_types.h | 1 +
>> src/lib/mpstream/mpstream.c | 11 ++
>> src/lib/mpstream/mpstream.h | 4 +
>> src/lua/msgpack.c | 12 ++
>> src/lua/msgpackffi.lua | 18 +++
>> src/lua/serializer.c | 4 +
>> src/lua/serializer.h | 2 +
>> src/lua/utils.c | 1 -
>> test/unit/datetime.c | 125 +++++++++++++++++++-
>> test/unit/datetime.result | 115 +++++++++++++++++-
>> third_party/lua-cjson/lua_cjson.c | 8 ++
>> third_party/lua-yaml/lyaml.cc | 6 +-
>
> Please add a Lua test checking that serialization (msgpack, yaml, json)
> works fine. Should be added in this patch.
Ok, will move test here (once reshake patches)
>
>> 23 files changed, 661 insertions(+), 23 deletions(-)
>> create mode 100644 src/lib/core/mp_datetime.c
>> create mode 100644 src/lib/core/mp_datetime.h
>>
>> diff --git a/src/box/field_def.c b/src/box/field_def.c
>> index 51acb8025..2682a42ee 100644
>> --- a/src/box/field_def.c
>> +++ b/src/box/field_def.c
>> @@ -72,6 +72,7 @@ const uint32_t field_mp_type[] = {
>> /* [FIELD_TYPE_UUID] = */ 0, /* only MP_UUID is supported */
>> /* [FIELD_TYPE_ARRAY] = */ 1U << MP_ARRAY,
>> /* [FIELD_TYPE_MAP] = */ (1U << MP_MAP),
>> + /* [FIELD_TYPE_DATETIME] = */ 0, /* only MP_DATETIME is supported
> */
>> };
>>
>> const uint32_t field_ext_type[] = {
>> @@ -83,11 +84,13 @@ const uint32_t field_ext_type[] = {
>> /* [FIELD_TYPE_INTEGER] = */ 0,
>> /* [FIELD_TYPE_BOOLEAN] = */ 0,
>> /* [FIELD_TYPE_VARBINARY] = */ 0,
>> - /* [FIELD_TYPE_SCALAR] = */ (1U << MP_DECIMAL) | (1U <<
> MP_UUID),
>> + /* [FIELD_TYPE_SCALAR] = */ (1U << MP_DECIMAL) | (1U <<
> MP_UUID) |
>> + (1U << MP_DATETIME),
>> /* [FIELD_TYPE_DECIMAL] = */ 1U << MP_DECIMAL,
>> /* [FIELD_TYPE_UUID] = */ 1U << MP_UUID,
>> /* [FIELD_TYPE_ARRAY] = */ 0,
>> /* [FIELD_TYPE_MAP] = */ 0,
>> + /* [FIELD_TYPE_DATETIME] = */ 1U << MP_DATETIME,
>> };
>>
>> const char *field_type_strs[] = {
>> @@ -104,6 +107,7 @@ const char *field_type_strs[] = {
>> /* [FIELD_TYPE_UUID] = */ "uuid",
>> /* [FIELD_TYPE_ARRAY] = */ "array",
>> /* [FIELD_TYPE_MAP] = */ "map",
>> + /* [FIELD_TYPE_DATETIME] = */ "datetime",
>> };
>
> This doesn't belong to this patch. Please split in two:
>
> - encoding datetime as msgpack/yaml/json - this patch
> - indexing of datetime - next patch
Understood. Very reasonable.
>
>>
>> const char *on_conflict_action_strs[] = {
>> @@ -128,20 +132,21 @@ field_type_by_name_wrapper(const char *str,
> uint32_t len)
>> * values can be stored in the j type.
>> */
>> static const bool field_type_compatibility[] = {
>> - /* ANY UNSIGNED STRING NUMBER DOUBLE INTEGER BOOLEAN
> VARBINARY SCALAR DECIMAL UUID ARRAY MAP */
>> -/* ANY */ true, false, false, false, false, false,
> false, false, false, false, false, false, false,
>> -/* UNSIGNED */ true, true, false, true, false, true,
> false, false, true, false, false, false, false,
>> -/* STRING */ true, false, true, false, false, false,
> false, false, true, false, false, false, false,
>> -/* NUMBER */ true, false, false, true, false, false,
> false, false, true, false, false, false, false,
>> -/* DOUBLE */ true, false, false, true, true, false,
> false, false, true, false, false, false, false,
>> -/* INTEGER */ true, false, false, true, false, true,
> false, false, true, false, false, false, false,
>> -/* BOOLEAN */ true, false, false, false, false, false,
> true, false, true, false, false, false, false,
>> -/* VARBINARY*/ true, false, false, false, false, false,
> false, true, true, false, false, false, false,
>> -/* SCALAR */ true, false, false, false, false, false,
> false, false, true, false, false, false, false,
>> -/* DECIMAL */ true, false, false, true, false, false,
> false, false, true, true, false, false, false,
>> -/* UUID */ true, false, false, false, false, false,
> false, false, false, false, true, false, false,
>> -/* ARRAY */ true, false, false, false, false, false,
> false, false, false, false, false, true, false,
>> -/* MAP */ true, false, false, false, false, false,
> false, false, false, false, false, false, true,
>> + /* ANY UNSIGNED STRING NUMBER DOUBLE INTEGER BOOLEAN
> VARBINARY SCALAR DECIMAL UUID ARRAY MAP DATETIME */
>> +/* ANY */ true, false, false, false, false, false,
> false, false, false, false, false, false, false, false,
>> +/* UNSIGNED */ true, true, false, true, false, true,
> false, false, true, false, false, false, false, false,
>> +/* STRING */ true, false, true, false, false, false,
> false, false, true, false, false, false, false, false,
>> +/* NUMBER */ true, false, false, true, false, false,
> false, false, true, false, false, false, false, false,
>> +/* DOUBLE */ true, false, false, true, true, false,
> false, false, true, false, false, false, false, false,
>> +/* INTEGER */ true, false, false, true, false, true,
> false, false, true, false, false, false, false, false,
>> +/* BOOLEAN */ true, false, false, false, false, false,
> true, false, true, false, false, false, false, false,
>> +/* VARBINARY*/ true, false, false, false, false, false,
> false, true, true, false, false, false, false, false,
>> +/* SCALAR */ true, false, false, false, false, false,
> false, false, true, false, false, false, false, false,
>> +/* DECIMAL */ true, false, false, true, false, false,
> false, false, true, true, false, false, false, false,
>> +/* UUID */ true, false, false, false, false, false,
> false, false, false, false, true, false, false, false,
>> +/* ARRAY */ true, false, false, false, false, false,
> false, false, false, false, false, true, false, false,
>> +/* MAP */ true, false, false, false, false, false,
> false, false, false, false, false, false, true, false,
>> +/* DATETIME */ true, false, false, false, false, false,
> false, false, true, false, false, false, false, true,
>> };
>>
>> bool
>> diff --git a/src/box/field_def.h b/src/box/field_def.h
>> index c5cfe5e86..120b2a93d 100644
>> --- a/src/box/field_def.h
>> +++ b/src/box/field_def.h
>> @@ -63,6 +63,7 @@ enum field_type {
>> FIELD_TYPE_UUID,
>> FIELD_TYPE_ARRAY,
>> FIELD_TYPE_MAP,
>> + FIELD_TYPE_DATETIME,
>> field_type_MAX
>> };
>>
>> diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
>> index 1f791980f..51855011b 100644
>> --- a/src/box/lua/serialize_lua.c
>> +++ b/src/box/lua/serialize_lua.c
>> @@ -768,7 +768,7 @@ static int
>> dump_node(struct lua_dumper *d, struct node *nd, int indent)
>> {
>> struct luaL_field *field = &nd->field;
>> - char buf[FPCONV_G_FMT_BUFSIZE];
>> + char buf[FPCONV_G_FMT_BUFSIZE + 8];
>
> Why +8? Better use max(FPCONV_G_FMT_BUFSIZE, <your buf size>).
>
>> int ltype = lua_type(d->L, -1);
>> const char *str = NULL;
>> size_t len = 0;
>> @@ -861,6 +861,11 @@ dump_node(struct lua_dumper *d, struct node *nd,
> int indent)
>> str = tt_uuid_str(field->uuidval);
>> len = UUID_STR_LEN;
>> break;
>> + case MP_DATETIME:
>> + nd->mask |= NODE_QUOTE;
>> + str = buf;
>> + len = datetime_to_string(field->dateval, buf,
> sizeof buf);
>
> sizeof(buf)
>
> Please fix here and in other places.
Well, but... Ok
(This is matter of taste though)
>
>> + break;
>> default:
>> d->err = EINVAL;
>> snprintf(d->err_msg, sizeof(d->err_msg),
>> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
>> index 43cd29ce9..9a69f2a72 100644
>> --- a/src/box/tuple_compare.cc
>> +++ b/src/box/tuple_compare.cc
>> @@ -36,6 +36,7 @@
>> #include "lib/core/decimal.h"
>> #include "lib/core/mp_decimal.h"
>> #include "uuid/mp_uuid.h"
>> +#include "lib/core/mp_datetime.h"
>> #include "lib/core/mp_extension_types.h"
>>
>> /* {{{ tuple_compare */
>> @@ -76,6 +77,7 @@ enum mp_class {
>> MP_CLASS_STR,
>> MP_CLASS_BIN,
>> MP_CLASS_UUID,
>> + MP_CLASS_DATETIME,
>> MP_CLASS_ARRAY,
>> MP_CLASS_MAP,
>> mp_class_max,
>> @@ -99,6 +101,8 @@ static enum mp_class mp_ext_classes[] = {
>> /* .MP_UNKNOWN_EXTENSION = */ mp_class_max, /* unsupported */
>> /* .MP_DECIMAL = */ MP_CLASS_NUMBER,
>> /* .MP_UUID = */ MP_CLASS_UUID,
>> + /* .MP_ERROR = */ mp_class_max,
>> + /* .MP_DATETIME = */ MP_CLASS_DATETIME,
>> };
>>
>> static enum mp_class
>> @@ -390,6 +394,19 @@ mp_compare_uuid(const char *field_a, const char
> *field_b)
>> return memcmp(field_a + 2, field_b + 2, UUID_PACKED_LEN);
>> }
>>
>> +static int
>> +mp_compare_datetime(const char *lhs, const char *rhs)
>> +{
>> + struct datetime lhs_dt, rhs_dt;
>> + struct datetime *ret;
>> + ret = mp_decode_datetime(&lhs, &lhs_dt);
>> + assert(ret != NULL);
>> + ret = mp_decode_datetime(&rhs, &rhs_dt);
>> + assert(ret != NULL);
>> + (void)ret;
>> + return datetime_compare(&lhs_dt, &rhs_dt);
>> +}
>> +
>
> Comparators is a part of indexing hence belong to the next patch.
Ok.
>
>> typedef int (*mp_compare_f)(const char *, const char *);
>> static mp_compare_f mp_class_comparators[] = {
>> /* .MP_CLASS_NIL = */ NULL,
>> @@ -398,6 +415,7 @@ static mp_compare_f mp_class_comparators[] = {
>> /* .MP_CLASS_STR = */ mp_compare_str,
>> /* .MP_CLASS_BIN = */ mp_compare_bin,
>> /* .MP_CLASS_UUID = */ mp_compare_uuid,
>> + /* .MP_CLASS_DATETIME=*/ mp_compare_datetime,
>> /* .MP_CLASS_ARRAY = */ NULL,
>> /* .MP_CLASS_MAP = */ NULL,
>> };
>> @@ -478,6 +496,8 @@ tuple_compare_field(const char *field_a, const char
> *field_b,
>> return mp_compare_decimal(field_a, field_b);
>> case FIELD_TYPE_UUID:
>> return mp_compare_uuid(field_a, field_b);
>> + case FIELD_TYPE_DATETIME:
>> + return mp_compare_datetime(field_a, field_b);
>> default:
>> unreachable();
>> return 0;
>> diff --git a/src/lib/core/datetime.c b/src/lib/core/datetime.c
>> index c24a0df82..baf9cc8ae 100644
>> --- a/src/lib/core/datetime.c
>> +++ b/src/lib/core/datetime.c
>> @@ -165,3 +165,12 @@ datetime_to_string(const struct datetime *date,
> char *buf, uint32_t len)
>> }
>> #undef ADVANCE
>>
>> +int
>> +datetime_compare(const struct datetime *lhs, const struct datetime
> *rhs)
>> +{
>> + int result = COMPARE_RESULT(lhs->secs, rhs->secs);
>> + if (result != 0)
>> + return result;
>> +
>> + return COMPARE_RESULT(lhs->nsec, rhs->nsec);
>
> What about offset? Shouldn't you take it into account (convert
> all dates to UTC)?
We keep datetime normalized, that's part of their design (to not waste
time in normalizations when need to proceed arithmetic). Information
about original time-zone is only for information/display purposes.
Changing of timezone will not impact values of .secs or .nsec, but will
change displayed text.
>
>> +}
>> diff --git a/src/lib/core/datetime.h b/src/lib/core/datetime.h
>> index 964e76fcc..5122e422e 100644
>> --- a/src/lib/core/datetime.h
>> +++ b/src/lib/core/datetime.h
>> @@ -70,6 +70,17 @@ struct datetime_interval {
>> int32_t nsec;
>> };
>>
>> +/**
>> + * Compare arguments of a datetime type
>> + * @param lhs left datetime argument
>> + * @param rhs right datetime argument
>> + * @retval < 0 if lhs less than rhs
>> + * @retval = 0 if lhs and rhs equal
>> + * @retval > 0 if lhs greater than rhs
>> + */
>> +int
>> +datetime_compare(const struct datetime *lhs, const struct datetime
> *rhs);
>> +
>> /**
>> * Convert datetime to string using default format
>> * @param date source datetime value
>> diff --git a/src/lib/core/mp_datetime.c b/src/lib/core/mp_datetime.c
>> new file mode 100644
>> index 000000000..d0a3e562c
>> --- /dev/null
>> +++ b/src/lib/core/mp_datetime.c
>> @@ -0,0 +1,189 @@
>> +/*
>> + * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
>> + *
>> + * Redistribution and use in source and binary forms, with or
>> + * without modification, are permitted provided that the following
>> + * conditions are met:
>> + *
>> + * 1. Redistributions of source code must retain the above
>> + * copyright notice, this list of conditions and the
>> + * following disclaimer.
>> + *
>> + * 2. Redistributions in binary form must reproduce the above
>> + * copyright notice, this list of conditions and the following
>> + * disclaimer in the documentation and/or other materials
>> + * provided with the distribution.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
>> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
>> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
>> + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
>> + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
>> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
>> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
>> + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
>> + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>> + * SUCH DAMAGE.
>> + */
>> +
>> +#include "mp_datetime.h"
>> +#include "msgpuck.h"
>> +#include "mp_extension_types.h"
>> +
>> +/*
>> + Datetime MessagePack serialization schema is MP_EXT (0xC7 for 1 byte
> length)
>> + extension, which creates container of 1 to 3 integers.
>> +
>> +
> +----+---+-----------+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....
> +
>> + |0xC7| 4 |len (uint8)| seconds (int) | nanoseconds (uint) | offset
> (uint) |
>> +
> +----+---+-----------+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....
> +
>> +
>> + MessagePack extension MP_EXT (0xC7), after 1-byte length, contains:
>> +
>> + - signed integer seconds part (required). Depending on the value of
>> + seconds it may be from 1 to 8 bytes positive or negative integer
> number;
>> +
>> + - [optional] fraction time in nanoseconds as unsigned integer.
>> + If this value is 0 then it's not saved (unless there is offset
> field,
>> + as below);
>> +
>> + - [optional] timzeone offset in minutes as unsigned integer.
>> + If this field is 0 then it's not saved.
>> + */
>> +
>> +static inline uint32_t
>> +mp_sizeof_Xint(int64_t n)
>
> mp_sizeof_xint - Let's not mix camel-case and underscores.
>
> I think this should be put somewhere in a public header so that everyone
> can use them: mp_utils.h?
Will create - you are the 2nd who is asking for that extraction. Pity we
do not have such mp utilities file already.
>
>> +{
>> + return n < 0 ? mp_sizeof_int(n) : mp_sizeof_uint(n);
>> +}
>> +
>> +static inline char *
>> +mp_encode_Xint(char *data, int64_t v)
>> +{
>> + return v < 0 ? mp_encode_int(data, v) : mp_encode_uint(data, v);
>> +}
>> +
>> +static inline int64_t
>> +mp_decode_Xint(const char **data)
>> +{
>> + switch (mp_typeof(**data)) {
>> + case MP_UINT:
>> + return (int64_t)mp_decode_uint(data);
>
> assert(value <= INT64_MAX) ?
That's why I kept this local for a moment. Taking into consideration the
nature of our data we are guaranteed to not ever approach any bound
ranges neither for unsigned nor for signed integers here.
But in generic header file it need to be able to proceed all types of
input data gracefully.
On the 2nd thought I believe this assert should be placed into
mp_encode_xint() function. To avoid garbage data to be generated.
>
>> + case MP_INT:
>> + return mp_decode_int(data);
>> + default:
>> + mp_unreachable();
>> + }
>> + return 0;
>> +}
>> +
>> +static inline uint32_t
>> +mp_sizeof_datetime_raw(const struct datetime *date)
>> +{
>> + uint32_t sz = mp_sizeof_Xint(date->secs);
>> +
>> + // even if nanosecs == 0 we need to output anything
>> + // if we have non-null tz offset
>> + if (date->nsec != 0 || date->offset != 0)
>> + sz += mp_sizeof_Xint(date->nsec);
>> + if (date->offset)
>
> if (date->offset != 0)
>
> Please be consistent.
Done.
Increment
-------------------------------------
diff --git a/src/lib/core/mp_datetime.c b/src/lib/core/mp_datetime.c
index 963752c23..1c9e8a7df 100644
--- a/src/lib/core/mp_datetime.c
+++ b/src/lib/core/mp_datetime.c
@@ -33,19 +33,20 @@
*/
static inline uint32_t
-mp_sizeof_Xint(int64_t n)
+mp_sizeof_xint(int64_t n)
{
return n < 0 ? mp_sizeof_int(n) : mp_sizeof_uint(n);
}
static inline char *
-mp_encode_Xint(char *data, int64_t v)
+mp_encode_xint(char *data, int64_t v)
{
+ assert((uint64_t)v <= LONG_MAX);
return v < 0 ? mp_encode_int(data, v) : mp_encode_uint(data, v);
}
static inline int64_t
-mp_decode_Xint(const char **data)
+mp_decode_xint(const char **data)
{
switch (mp_typeof(**data)) {
case MP_UINT:
@@ -72,7 +73,7 @@ static inline uint32_t
mp_sizeof_datetime_raw(const struct datetime *date)
{
check_secs(date->secs);
- uint32_t sz = mp_sizeof_Xint(date->secs);
+ uint32_t sz = mp_sizeof_xint(date->secs);
/*
* even if nanosecs == 0 we need to output something
@@ -80,11 +81,11 @@ mp_sizeof_datetime_raw(const struct datetime *date)
*/
if (date->nsec != 0 || date->offset != 0) {
check_nanosecs(date->nsec);
- sz += mp_sizeof_Xint(date->nsec);
+ sz += mp_sizeof_xint(date->nsec);
}
- if (date->offset) {
+ if (date->offset != 0) {
check_tz_offset(date->offset);
- sz += mp_sizeof_Xint(date->offset);
+ sz += mp_sizeof_xint(date->offset);
}
return sz;
}
@@ -102,7 +103,7 @@ datetime_unpack(const char **data, uint32_t len,
struct datetime *date)
memset(date, 0, sizeof(*date));
- int64_t seconds = mp_decode_Xint(data);
+ int64_t seconds = mp_decode_xint(data);
check_secs(seconds);
date->secs = seconds;
@@ -119,7 +120,7 @@ datetime_unpack(const char **data, uint32_t len,
struct datetime *date)
if (len <= 0)
return date;
- int64_t offset = mp_decode_Xint(data);
+ int64_t offset = mp_decode_xint(data);
check_tz_offset(offset);
date->offset = offset;
@@ -146,11 +147,11 @@ mp_decode_datetime(const char **data, struct
datetime *date)
char *
datetime_pack(char *data, const struct datetime *date)
{
- data = mp_encode_Xint(data, date->secs);
+ data = mp_encode_xint(data, date->secs);
if (date->nsec != 0 || date->offset != 0)
data = mp_encode_uint(data, date->nsec);
if (date->offset)
- data = mp_encode_Xint(data, date->offset);
+ data = mp_encode_xint(data, date->offset);
return data;
}
-------------------------------------
>
Thanks,
Timur
More information about the Tarantool-patches
mailing list