[Tarantool-patches] [PATCH v5 4/8] box, datetime: messagepack support for datetime

Serge Petrenko sergepetrenko at tarantool.org
Wed Aug 18 12:01:25 MSK 2021



18.08.2021 02:42, Safin Timur пишет:
> Thanks Sergey for your feedback, below you'll see few comments and 
> incremental patch...
>
> On 17.08.2021 15:16, Serge Petrenko wrote:
>>
>>
>> 16.08.2021 02:59, Timur Safin via Tarantool-patches пишет:
>>> Serialize datetime_t as newly introduced MP_EXT type.
>>> It saves 1 required integer field and upto 2 optional
>>> unsigned fields in very compact fashion.
>>> - secs is required field;
>>> - but nsec, offset are both optional;
>>>
>>> * json, yaml serialization formats, lua output mode
>>>    supported;
>>> * exported symbols for datetime messagepack size calculations
>>>    so they are available for usage on Lua side.
>>>
>>> Part of #5941
>>> Part of #5946
>>> ---
>>>   extra/exports                     |   5 +-
>>>   src/box/field_def.c               |  35 +++---
>>>   src/box/field_def.h               |   1 +
>>>   src/box/lua/serialize_lua.c       |   7 +-
>>>   src/box/msgpack.c                 |   7 +-
>>>   src/box/tuple_compare.cc          |  20 ++++
>>>   src/lib/core/CMakeLists.txt       |   4 +-
>>>   src/lib/core/datetime.c           |   9 ++
>>>   src/lib/core/datetime.h           |  11 ++
>>>   src/lib/core/mp_datetime.c        | 189 
>>> ++++++++++++++++++++++++++++++
>>>   src/lib/core/mp_datetime.h        |  89 ++++++++++++++
>>>   src/lib/core/mp_extension_types.h |   1 +
>>>   src/lib/mpstream/mpstream.c       |  11 ++
>>>   src/lib/mpstream/mpstream.h       |   4 +
>>>   src/lua/msgpack.c                 |  12 ++
>>>   src/lua/msgpackffi.lua            |  18 +++
>>>   src/lua/serializer.c              |   4 +
>>>   src/lua/serializer.h              |   2 +
>>>   src/lua/utils.c                   |   1 -
>>>   test/unit/datetime.c              | 125 +++++++++++++++++++-
>>>   test/unit/datetime.result         | 115 +++++++++++++++++-
>>>   third_party/lua-cjson/lua_cjson.c |   8 ++
>>>   third_party/lua-yaml/lyaml.cc     |   6 +-
>>>   23 files changed, 661 insertions(+), 23 deletions(-)
>>>   create mode 100644 src/lib/core/mp_datetime.c
>>>   create mode 100644 src/lib/core/mp_datetime.h
>>>
>>> diff --git a/extra/exports b/extra/exports
>>> index 2437e175c..c34a5c2b5 100644
>>> --- a/extra/exports
>>> +++ b/extra/exports
>>> @@ -151,9 +151,10 @@ csv_setopt
>>>   datetime_asctime
>>>   datetime_ctime
>>>   datetime_now
>>> +datetime_pack
>>>   datetime_strftime
>>>   datetime_to_string
>>> -decimal_unpack
>>> +datetime_unpack
>>
>>
>> decimal_unpack should stay there.
>
> It's there, but 2 lines below :)
>
> That was me copy-pasted decimal_unpack a few patches before, but has 
> not changed it to datetime_unpack. I've corrected it in this patch.
>
> I've now corrected the original appearance, now with correct name.

Ok, thanks!

>
>>
>>
>>>   decimal_from_string
>>>   decimal_unpack
>
>      ^^^ it was here
>
>>>   tnt_dt_dow
>>> @@ -397,6 +398,7 @@ mp_decode_uint
>>>   mp_encode_array
>>>   mp_encode_bin
>>>   mp_encode_bool
>>> +mp_encode_datetime
>>>   mp_encode_decimal
>>>   mp_encode_double
>>>   mp_encode_float
>>> @@ -413,6 +415,7 @@ mp_next
>>>   mp_next_slowpath
>>>   mp_parser_hint
>>>   mp_sizeof_array
>>> +mp_sizeof_datetime
>>>   mp_sizeof_decimal
>>>   mp_sizeof_str
>>>   mp_sizeof_uuid
>>> diff --git a/src/box/field_def.c b/src/box/field_def.c
>>> index 51acb8025..2682a42ee 100644
>>> --- a/src/box/field_def.c
>>> +++ b/src/box/field_def.c
>>> @@ -72,6 +72,7 @@ const uint32_t field_mp_type[] = {
>>>       /* [FIELD_TYPE_UUID]     =  */ 0, /* only MP_UUID is supported */
>>>       /* [FIELD_TYPE_ARRAY]    =  */ 1U << MP_ARRAY,
>>>       /* [FIELD_TYPE_MAP]      =  */ (1U << MP_MAP),
>>> +    /* [FIELD_TYPE_DATETIME] =  */ 0, /* only MP_DATETIME is 
>>> supported */
>>>   };
>>>   const uint32_t field_ext_type[] = {
>>> @@ -83,11 +84,13 @@ const uint32_t field_ext_type[] = {
>>>       /* [FIELD_TYPE_INTEGER]   = */ 0,
>>>       /* [FIELD_TYPE_BOOLEAN]   = */ 0,
>>>       /* [FIELD_TYPE_VARBINARY] = */ 0,
>>> -    /* [FIELD_TYPE_SCALAR]    = */ (1U << MP_DECIMAL) | (1U << 
>>> MP_UUID),
>>> +    /* [FIELD_TYPE_SCALAR]    = */ (1U << MP_DECIMAL) | (1U << 
>>> MP_UUID) |
>>> +        (1U << MP_DATETIME),
>>>       /* [FIELD_TYPE_DECIMAL]   = */ 1U << MP_DECIMAL,
>>>       /* [FIELD_TYPE_UUID]      = */ 1U << MP_UUID,
>>>       /* [FIELD_TYPE_ARRAY]     = */ 0,
>>>       /* [FIELD_TYPE_MAP]       = */ 0,
>>> +    /* [FIELD_TYPE_DATETIME]  = */ 1U << MP_DATETIME,
>>>   };
>>>   const char *field_type_strs[] = {
>>> @@ -104,6 +107,7 @@ const char *field_type_strs[] = {
>>>       /* [FIELD_TYPE_UUID]     = */ "uuid",
>>>       /* [FIELD_TYPE_ARRAY]    = */ "array",
>>>       /* [FIELD_TYPE_MAP]      = */ "map",
>>> +    /* [FIELD_TYPE_DATETIME] = */ "datetime",
>>>   };
>>>   const char *on_conflict_action_strs[] = {
>>> @@ -128,20 +132,21 @@ field_type_by_name_wrapper(const char *str, 
>>> uint32_t len)
>>>    * values can be stored in the j type.
>>>    */
>>>   static const bool field_type_compatibility[] = {
>>> -       /*   ANY   UNSIGNED  STRING   NUMBER  DOUBLE  INTEGER 
>>> BOOLEAN VARBINARY SCALAR  DECIMAL   UUID    ARRAY    MAP  */
>>> -/*   ANY    */ true,   false,   false,   false,   false, false,   
>>> false,   false,  false,  false,  false,   false, false,
>>> -/* UNSIGNED */ true,   true,    false,   true,    false, true,    
>>> false,   false,  true,   false,  false,   false, false,
>>> -/*  STRING  */ true,   false,   true,    false,   false, false,   
>>> false,   false,  true,   false,  false,   false, false,
>>> -/*  NUMBER  */ true,   false,   false,   true,    false, false,   
>>> false,   false,  true,   false,  false,   false, false,
>>> -/*  DOUBLE  */ true,   false,   false,   true,    true, false,   
>>> false,   false,  true,   false,  false,   false, false,
>>> -/*  INTEGER */ true,   false,   false,   true,    false, true,    
>>> false,   false,  true,   false,  false,   false, false,
>>> -/*  BOOLEAN */ true,   false,   false,   false,   false, false,   
>>> true,    false,  true,   false,  false,   false, false,
>>> -/* VARBINARY*/ true,   false,   false,   false,   false, false,   
>>> false,   true,   true,   false,  false,   false, false,
>>> -/*  SCALAR  */ true,   false,   false,   false,   false, false,   
>>> false,   false,  true,   false,  false,   false, false,
>>> -/*  DECIMAL */ true,   false,   false,   true,    false, false,   
>>> false,   false,  true,   true,   false,   false, false,
>>> -/*   UUID   */ true,   false,   false,   false,   false, false,   
>>> false,   false,  false,  false,  true,    false, false,
>>> -/*   ARRAY  */ true,   false,   false,   false,   false, false,   
>>> false,   false,  false,  false,  false,   true, false,
>>> -/*    MAP   */ true,   false,   false,   false,   false, false,   
>>> false,   false,  false,  false,  false,   false, true,
>>> +       /*   ANY   UNSIGNED  STRING   NUMBER  DOUBLE  INTEGER 
>>> BOOLEAN VARBINARY SCALAR  DECIMAL   UUID    ARRAY    MAP DATETIME */
>>> +/*   ANY    */ true,   false,   false,   false,   false, false,   
>>> false,   false,  false,  false,  false,   false, false,   false,
>>> +/* UNSIGNED */ true,   true,    false,   true,    false, true,    
>>> false,   false,  true,   false,  false,   false, false,   false,
>>> +/*  STRING  */ true,   false,   true,    false,   false, false,   
>>> false,   false,  true,   false,  false,   false, false,   false,
>>> +/*  NUMBER  */ true,   false,   false,   true,    false, false,   
>>> false,   false,  true,   false,  false,   false, false,   false,
>>> +/*  DOUBLE  */ true,   false,   false,   true,    true, false,   
>>> false,   false,  true,   false,  false,   false, false,   false,
>>> +/*  INTEGER */ true,   false,   false,   true,    false, true,    
>>> false,   false,  true,   false,  false,   false, false,   false,
>>> +/*  BOOLEAN */ true,   false,   false,   false,   false, false,   
>>> true,    false,  true,   false,  false,   false, false,   false,
>>> +/* VARBINARY*/ true,   false,   false,   false,   false, false,   
>>> false,   true,   true,   false,  false,   false, false,   false,
>>> +/*  SCALAR  */ true,   false,   false,   false,   false, false,   
>>> false,   false,  true,   false,  false,   false, false,   false,
>>> +/*  DECIMAL */ true,   false,   false,   true,    false, false,   
>>> false,   false,  true,   true,   false,   false, false,   false,
>>> +/*   UUID   */ true,   false,   false,   false,   false, false,   
>>> false,   false,  false,  false,  true,    false, false,   false,
>>> +/*   ARRAY  */ true,   false,   false,   false,   false, false,   
>>> false,   false,  false,  false,  false,   true, false,   false,
>>> +/*    MAP   */ true,   false,   false,   false,   false, false,   
>>> false,   false,  false,  false,  false,   false, true,    false,
>>> +/* DATETIME */ true,   false,   false,   false,   false, false,   
>>> false,   false,  true,   false,  false,   false, false,   true,
>>>   };
>>>   bool
>>> diff --git a/src/box/field_def.h b/src/box/field_def.h
>>> index c5cfe5e86..120b2a93d 100644
>>> --- a/src/box/field_def.h
>>> +++ b/src/box/field_def.h
>>> @@ -63,6 +63,7 @@ enum field_type {
>>>       FIELD_TYPE_UUID,
>>>       FIELD_TYPE_ARRAY,
>>>       FIELD_TYPE_MAP,
>>> +    FIELD_TYPE_DATETIME,
>>>       field_type_MAX
>>>   };
>>
>>
>> Please, define FIELD_TYPE_DATETIME higher.
>> Right after FIELD_TYPE_UUID.
>>
>> This way you won't need to rework field type allowed in index check
>> in the next commit.
>
> That's very straighforward and easy, my bad that I've overcomplicated it!
>
> But I'll move the change to the next patch, as it'scorrectly has 
> pointed out by Vova, should be part of indices support,
>

Ok.

>
>>
>>
>>> diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
>>> index 1f791980f..51855011b 100644
>>> --- a/src/box/lua/serialize_lua.c
>>> +++ b/src/box/lua/serialize_lua.c
>>> @@ -768,7 +768,7 @@ static int
>>>   dump_node(struct lua_dumper *d, struct node *nd, int indent)
>>>   {
>>>       struct luaL_field *field = &nd->field;
>>> -    char buf[FPCONV_G_FMT_BUFSIZE];
>>> +    char buf[FPCONV_G_FMT_BUFSIZE + 8];
>>
>>
>> Why "+8"?
>
> Well, because current FPCONV_G_FMT_BUFSIZE (32) was not enough for 
> full ISO-8601 literal with nanoseconds :)
>
> Probably I should introduce some newer constant...
>
> [Or, as Vova has suggested - just to use MAX from those 2 values, my 
> length and FPCONV_G_FMT_BUFSIZE.]
>
> --------------------------------------------
> diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
> index 51855011b..eef3a4995 100644
> --- a/src/box/lua/serialize_lua.c
> +++ b/src/box/lua/serialize_lua.c
> @@ -768,7 +768,7 @@ static int
>  dump_node(struct lua_dumper *d, struct node *nd, int indent)
>  {
>      struct luaL_field *field = &nd->field;
> -    char buf[FPCONV_G_FMT_BUFSIZE + 8];
> +    char buf[MAX(FPCONV_G_FMT_BUFSIZE, DT_TO_STRING_BUFSIZE)];
>      int ltype = lua_type(d->L, -1);
>      const char *str = NULL;
>      size_t len = 0;
> diff --git a/src/lib/core/datetime.h b/src/lib/core/datetime.h
> index 497cd9f14..b8d179600 100644
> --- a/src/lib/core/datetime.h
> +++ b/src/lib/core/datetime.h
> @@ -87,6 +87,11 @@ struct datetime_interval {
>  int
>  datetime_compare(const struct datetime *lhs, const struct datetime 
> *rhs);
>
> +/**
> + * Required size of datetime_to_string string buffer
> + */
> +#define DT_TO_STRING_BUFSIZE   48
> +
>  /**
>   * Convert datetime to string using default format
>   * @param date source datetime value
> --------------------------------------------
>

Looks good.

>
>>
>>
>>>       int ltype = lua_type(d->L, -1);
>>>       const char *str = NULL;
>>>       size_t len = 0;
>>
>>
>> <stripped>
>>
>>
>>> diff --git a/src/lib/core/mp_datetime.c b/src/lib/core/mp_datetime.c
>>> new file mode 100644
>>> index 000000000..d0a3e562c
>>> --- /dev/null
>>> +++ b/src/lib/core/mp_datetime.c
>>> @@ -0,0 +1,189 @@
>>> +/*
>>> + * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
>>> + *
>>> + * Redistribution and use in source and binary forms, with or
>>> + * without modification, are permitted provided that the following
>>> + * conditions are met:
>>> + *
>>> + * 1. Redistributions of source code must retain the above
>>> + *    copyright notice, this list of conditions and the
>>> + *    following disclaimer.
>>> + *
>>> + * 2. Redistributions in binary form must reproduce the above
>>> + *    copyright notice, this list of conditions and the following
>>> + *    disclaimer in the documentation and/or other materials
>>> + *    provided with the distribution.
>>> + *
>>> + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
>>> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
>>> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
>>> + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
>>> + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
>>> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
>>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
>>> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
>>> + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
>>> + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>>> + * SUCH DAMAGE.
>>> + */
>>> +
>>
>> Same about the license.
>> Please, replace that with
>>
>> /*
>>   * SPDX-License-Identifier: BSD-2-Clause
>>   *
>>   * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
>>   */
>>
>> And do the same for all new files.
>
> Updated.
>
>>
>>> +#include "mp_datetime.h"
>>> +#include "msgpuck.h"
>>> +#include "mp_extension_types.h"
>>> +
>>> +/*
>>> +  Datetime MessagePack serialization schema is MP_EXT (0xC7 for 1 
>>> byte length)
>>> +  extension, which creates container of 1 to 3 integers.
>>> +
>>> + 
>>> +----+---+-----------+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+ 
>>>
>>> +  |0xC7| 4 |len (uint8)| seconds (int) | nanoseconds (uint) | 
>>> offset (uint) |
>>> + 
>>> +----+---+-----------+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+ 
>>>
>>
>> The order should be 0xC7, len(uint8), 4, seconds, ...
>> according to
>> https://github.com/msgpack/msgpack/blob/master/spec.md#ext-format-family
>
> Indeed, that was my misconception, thanks for correction!
> [Updated picture in the patch and in the discussion - 
> https://github.com/tarantool/tarantool/discussions/6244#discussioncomment-1043990]
>
>>
>>> +
>>> +  MessagePack extension MP_EXT (0xC7), after 1-byte length, contains:
>>> +
>>> +  - signed integer seconds part (required). Depending on the value of
>>> +    seconds it may be from 1 to 8 bytes positive or negative 
>>> integer number;
>>> +
>>> +  - [optional] fraction time in nanoseconds as unsigned integer.
>>> +    If this value is 0 then it's not saved (unless there is offset 
>>> field,
>>> +    as below);
>>> +
>>> +  - [optional] timzeone offset in minutes as unsigned integer.
>>> +    If this field is 0 then it's not saved.
>>> + */
>>> +
>>> +static inline uint32_t
>>> +mp_sizeof_Xint(int64_t n)
>>> +{
>>> +    return n < 0 ? mp_sizeof_int(n) : mp_sizeof_uint(n);
>>> +}
>>> +
>>> +static inline char *
>>> +mp_encode_Xint(char *data, int64_t v)
>>> +{
>>> +    return v < 0 ? mp_encode_int(data, v) : mp_encode_uint(data, v);
>>> +}
>>> +
>>> +static inline int64_t
>>> +mp_decode_Xint(const char **data)
>>> +{
>>> +    switch (mp_typeof(**data)) {
>>> +    case MP_UINT:
>>> +        return (int64_t)mp_decode_uint(data);
>>> +    case MP_INT:
>>> +        return mp_decode_int(data);
>>> +    default:
>>> +        mp_unreachable();
>>> +    }
>>> +    return 0;
>>> +}
>>
>> I believe mp_decode_Xint and mp_encode_Xint
>> belong to a more generic file, but I couldn't find an
>> appropriate one. Up to you.
>
> Yup, it was planned to be placed to more generic place once it would 
> become useful at least the 2nd time. And this time is actually 2nd 
> (1st was in SQL AST parser branch here 
> https://github.com/tarantool/tarantool/commit/55a4182ebfbed1a3c916fb7e326f8f7861776a7f#diff-e3f5bdfa58bcaed35b89f22e94be7ad472a6b37d656a129722ea0d5609503c6aR132-R143). 
> But that patchset has not yet landed to the master, so once again code 
> usage is 1st time and worth only local application. When I'll return 
> to distributed-sql AST parser I'll reshake them and put elsewhere.
>
>
>>
>>> +
>>> +static inline uint32_t
>>> +mp_sizeof_datetime_raw(const struct datetime *date)
>>> +{
>>> +    uint32_t sz = mp_sizeof_Xint(date->secs);
>>> +
>>> +    // even if nanosecs == 0 we need to output anything
>>> +    // if we have non-null tz offset
>>
>>
>> Please, stick with our comment format:
>
> Oh, yup, that slipt thru. Corrected.
>
>>
>> /*
>>   * Even if nanosecs == 0 we need to output anything
>>   * if we have non-null tz offset
>> */
>>
>>
>>> +    if (date->nsec != 0 || date->offset != 0)
>>> +        sz += mp_sizeof_Xint(date->nsec);
>>> +    if (date->offset)
>>> +        sz += mp_sizeof_Xint(date->offset);
>>> +    return sz;
>>> +}
>>> +
>>> +uint32_t
>>> +mp_sizeof_datetime(const struct datetime *date)
>>> +{
>>> +    return mp_sizeof_ext(mp_sizeof_datetime_raw(date));
>>> +}
>>> +
>>> +struct datetime *
>>> +datetime_unpack(const char **data, uint32_t len, struct datetime 
>>> *date)
>>> +{
>>> +    const char * svp = *data;
>>> +
>>> +    memset(date, 0, sizeof(*date));
>>> +
>>> +    date->secs = mp_decode_Xint(data);
>>
>>
>> Please, leave a comment about date->secs possible range here.
>> Why is it ok to store a decoded int64_t in a double.
>
> Yes, that's reasonable complain. I'll document dt supported range in 
> the datetime.h header, and to declare legal bounds there, so we could 
> use them later in asserts.
>
> Please see incremental patch for this step below...
>
>>
>>
>>> +
>>> +    len -= *data - svp;
>>> +    if (len <= 0)
>>> +        return date;
>>> +
>>> +    svp = *data;
>>> +    date->nsec = mp_decode_Xint(data);
>>> +    len -= *data - svp;
>>> +
>>> +    if (len <= 0)
>>> +        return date;
>>> +
>>> +    date->offset = mp_decode_Xint(data);
>>> +
>>> +    return date;
>>> +}
>>> +
>>> +struct datetime *
>>> +mp_decode_datetime(const char **data, struct datetime *date)
>>> +{
>>> +    if (mp_typeof(**data) != MP_EXT)
>>> +        return NULL;
>>> +
>>> +    int8_t type;
>>> +    uint32_t len = mp_decode_extl(data, &type);
>>> +
>>> +    if (type != MP_DATETIME || len == 0) {
>>> +        return NULL;
>>
>>
>> Please, revert data to savepoint when decoding fails.
>> If mp_decode_extl or datetime_unpack fail, you mustn't
>> modify data.
>>
>
> Didn't think about this case - will make sure data points to the 
> original location if fails.
>
>>
>>> +    }
>>> +    return datetime_unpack(data, len, date);
>>> +}
>>> +
>>> +char *
>>> +datetime_pack(char *data, const struct datetime *date)
>>> +{
>>> +    data = mp_encode_Xint(data, date->secs);
>>> +    if (date->nsec != 0 || date->offset != 0)
>>> +        data = mp_encode_Xint(data, date->nsec);
>>> +    if (date->offset)
>>> +        data = mp_encode_Xint(data, date->offset);
>>> +
>>> +    return data;
>>> +}
>>
>>
>> <stripped>
>>
>>
>>> diff --git a/src/lua/serializer.h b/src/lua/serializer.h
>>> index 0a0501a74..e7a240e0a 100644
>>> --- a/src/lua/serializer.h
>>> +++ b/src/lua/serializer.h
>>> @@ -52,6 +52,7 @@ extern "C" {
>>>   #include <lauxlib.h>
>>>   #include "trigger.h"
>>> +#include "lib/core/datetime.h"
>>>   #include "lib/core/decimal.h" /* decimal_t */
>>>   #include "lib/core/mp_extension_types.h"
>>>   #include "lua/error.h"
>>> @@ -223,6 +224,7 @@ struct luaL_field {
>>>           uint32_t size;
>>>           decimal_t *decval;
>>>           struct tt_uuid *uuidval;
>>> +        struct datetime *dateval;
>>>       };
>>>       enum mp_type type;
>>>       /* subtypes of MP_EXT */
>>> diff --git a/src/lua/utils.c b/src/lua/utils.c
>>> index 2c89326f3..771f6f278 100644
>>> --- a/src/lua/utils.c
>>> +++ b/src/lua/utils.c
>>> @@ -254,7 +254,6 @@ luaL_setcdatagc(struct lua_State *L, int idx)
>>>       lua_pop(L, 1);
>>>   }
>>> -
>>
>>
>> Extraneous change. Please, remove.
>
> Removed from the patch. Thanks!
>
>>
>>
>>>   /**
>>>    * A helper to register a single type metatable.
>>>    */
>>> diff --git a/test/unit/datetime.c b/test/unit/datetime.c
>>> index 1ae76003b..a72ac2253 100644
>>> --- a/test/unit/datetime.c
>>> +++ b/test/unit/datetime.c
>>> @@ -6,6 +6,9 @@
>>>   #include "unit.h"
>>>   #include "datetime.h"
>>> +#include "mp_datetime.h"
>>> +#include "msgpuck.h"
>>> +#include "mp_extension_types.h"
>>>   static const char sample[] = "2012-12-24T15:30Z";
>>> @@ -247,12 +250,132 @@ tostring_datetime_test(void)
>>>       check_plan();
>>>   }
>>>
>>
>>
>> <stripped>
>>
>
>
> -----------------------------------------------------
> diff --git a/src/lib/core/datetime.h b/src/lib/core/datetime.h
> index f98f7010d..df3c1c83d 100644
> --- a/src/lib/core/datetime.h
> +++ b/src/lib/core/datetime.h
> @@ -5,6 +5,7 @@
>   * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
>   */
>
> +#include <limits.h>
>  #include <stdint.h>
>  #include <stdbool.h>
>  #include "c-dt/dt.h"
> @@ -30,6 +31,26 @@ extern "C"
>  #define DT_EPOCH_1970_OFFSET  719163
>  #endif
>
> +/**
> + * c-dt library uses int as type for dt value, which
> + * represents the number of days since Rata Die date.
> + * This implies limits to the number of seconds we
> + * could safely store in our structures and then safely
> + * pass to c-dt functions.
> + *
> + * So supported ranges will be
> + * - for seconds [-185604722870400 .. 185480451417600]
> + * - for dates   [-5879610-06-22T00:00Z .. 5879611-07-11T00:00Z]
> + */
> +#define MAX_DT_DAY_VALUE (int64_t)INT_MAX
> +#define MIN_DT_DAY_VALUE (int64_t)INT_MIN
> +#define SECS_EPOCH_1970_OFFSET     \
> +    ((int64_t)DT_EPOCH_1970_OFFSET * SECS_PER_DAY)
> +#define MAX_EPOCH_SECS_VALUE    \
> +    (MAX_DT_DAY_VALUE * SECS_PER_DAY - SECS_EPOCH_1970_OFFSET)
> +#define MIN_EPOCH_SECS_VALUE    \
> +    (MIN_DT_DAY_VALUE * SECS_PER_DAY - SECS_EPOCH_1970_OFFSET)
> +
>  /**
>   * datetime structure keeps number of seconds since
>   * Unix Epoch.
> diff --git a/src/lib/core/mp_datetime.c b/src/lib/core/mp_datetime.c
> index 7e475d5f1..963752c23 100644
> --- a/src/lib/core/mp_datetime.c
> +++ b/src/lib/core/mp_datetime.c
> @@ -1,34 +1,12 @@
>  /*
> - * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
> - *
> - * Redistribution and use in source and binary forms, with or
> - * without modification, are permitted provided that the following
> - * conditions are met:
> - *
> - * 1. Redistributions of source code must retain the above
> - *    copyright notice, this list of conditions and the
> - *    following disclaimer.
> - *
> - * 2. Redistributions in binary form must reproduce the above
> - *    copyright notice, this list of conditions and the following
> - *    disclaimer in the documentation and/or other materials
> - *    provided with the distribution.
> + * SPDX-License-Identifier: BSD-2-Clause
>   *
> - * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
> - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
> - * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
> - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> - * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> - * SUCH DAMAGE.
> + * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
>   */
>
> +#include <limits.h>
> +#include <assert.h>
> +
>  #include "mp_datetime.h"
>  #include "msgpuck.h"
>  #include "mp_extension_types.h"
> @@ -37,9 +15,9 @@
>    Datetime MessagePack serialization schema is MP_EXT (0xC7 for 1 
> byte length)
>    extension, which creates container of 1 to 3 integers.
>
> - 
> +----+---+-----------+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+
> -  |0xC7| 4 |len (uint8)| seconds (int) | nanoseconds (uint) | offset 
> (uint) |
> - 
> +----+---+-----------+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+
> + 
> +----+-----------+---+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+
> +  |0xC7|len (uint8)| 4 | seconds (int) | nanoseconds (uint) | offset 
> (int)  |
> + 
> +----+-----------+---+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+
>
>    MessagePack extension MP_EXT (0xC7), after 1-byte length, contains:
>
> @@ -50,7 +28,7 @@
>      If this value is 0 then it's not saved (unless there is offset 
> field,
>      as below);
>
> -  - [optional] timzeone offset in minutes as unsigned integer.
> +  - [optional] timezone offset in minutes as signed integer.
>      If this field is 0 then it's not saved.
>   */
>
> @@ -80,17 +58,34 @@ mp_decode_Xint(const char **data)
>      return 0;
>  }
>
> +#define check_secs(secs)                                \
> +    assert((int64_t)(secs) <= MAX_EPOCH_SECS_VALUE);\
> +    assert((int64_t)(secs) >= MIN_EPOCH_SECS_VALUE);
> +
> +#define check_nanosecs(nsec)      assert((nsec) < 1000000000);
> +
> +#define check_tz_offset(offset)       \
> +    assert((offset) <= (12 * 60));\
> +    assert((offset) >= (-12 * 60));
> +
>  static inline uint32_t
>  mp_sizeof_datetime_raw(const struct datetime *date)
>  {
> +    check_secs(date->secs);
>      uint32_t sz = mp_sizeof_Xint(date->secs);
>
> -    // even if nanosecs == 0 we need to output anything
> -    // if we have non-null tz offset
> -    if (date->nsec != 0 || date->offset != 0)
> +    /*
> +     * even if nanosecs == 0 we need to output something
> +     * if we have a non-null tz offset
> +     */
> +    if (date->nsec != 0 || date->offset != 0) {
> +        check_nanosecs(date->nsec);
>          sz += mp_sizeof_Xint(date->nsec);
> -    if (date->offset)
> +    }
> +    if (date->offset) {
> +        check_tz_offset(date->offset);
>          sz += mp_sizeof_Xint(date->offset);
> +    }
>      return sz;
>  }
>
> @@ -103,24 +98,30 @@ mp_sizeof_datetime(const struct datetime *date)
>  struct datetime *
>  datetime_unpack(const char **data, uint32_t len, struct datetime *date)
>  {
> -    const char * svp = *data;
> +    const char *svp = *data;
>
>      memset(date, 0, sizeof(*date));
>
> -    date->secs = mp_decode_Xint(data);
> +    int64_t seconds = mp_decode_Xint(data);
> +    check_secs(seconds);
> +    date->secs = seconds;
>
>      len -= *data - svp;
>      if (len <= 0)
>          return date;
>
>      svp = *data;
> -    date->nsec = mp_decode_Xint(data);
> +    uint64_t nanoseconds = mp_decode_uint(data);
> +    check_nanosecs(nanoseconds);
> +    date->nsec = nanoseconds;
>      len -= *data - svp;
>
>      if (len <= 0)
>          return date;
>
> -    date->offset = mp_decode_Xint(data);
> +    int64_t offset = mp_decode_Xint(data);
> +    check_tz_offset(offset);
> +    date->offset = offset;
>
>      return date;
>  }
> @@ -131,10 +132,12 @@ mp_decode_datetime(const char **data, struct 
> datetime *date)
>      if (mp_typeof(**data) != MP_EXT)
>          return NULL;
>
> +    const char *svp = *data;
>      int8_t type;
>      uint32_t len = mp_decode_extl(data, &type);
>
>      if (type != MP_DATETIME || len == 0) {
> +        *data = svp;
>          return NULL;
>      }
>      return datetime_unpack(data, len, date);
> @@ -145,7 +148,7 @@ datetime_pack(char *data, const struct datetime 
> *date)
>  {
>      data = mp_encode_Xint(data, date->secs);
>      if (date->nsec != 0 || date->offset != 0)
> -        data = mp_encode_Xint(data, date->nsec);
> +        data = mp_encode_uint(data, date->nsec);
>      if (date->offset)
>          data = mp_encode_Xint(data, date->offset);
>
> @@ -165,7 +168,9 @@ mp_encode_datetime(char *data, const struct 
> datetime *date)
>  int
>  mp_snprint_datetime(char *buf, int size, const char **data, uint32_t 
> len)
>  {
> -    struct datetime date = {0, 0, 0};
> +    struct datetime date = {
> +        .secs = 0, .nsec = 0, .offset = 0
> +    };
>
>      if (datetime_unpack(data, len, &date) == NULL)
>          return -1;
> @@ -176,7 +181,9 @@ mp_snprint_datetime(char *buf, int size, const 
> char **data, uint32_t len)
>  int
>  mp_fprint_datetime(FILE *file, const char **data, uint32_t len)
>  {
> -    struct datetime date = {0, 0, 0};
> +    struct datetime date = {
> +        .secs = 0, .nsec = 0, .offset = 0
> +    };
>
>      if (datetime_unpack(data, len, &date) == NULL)
>          return -1;
> diff --git a/src/lib/core/mp_datetime.h b/src/lib/core/mp_datetime.h
> index 9a4d2720c..92e94a243 100644
> --- a/src/lib/core/mp_datetime.h
> +++ b/src/lib/core/mp_datetime.h
> @@ -1,33 +1,8 @@
>  #pragma once
>  /*
> - * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
> - *
> - * Redistribution and use in source and binary forms, with or
> - * without modification, are permitted provided that the following
> - * conditions are met:
> - *
> - * 1. Redistributions of source code must retain the above
> - *    copyright notice, this list of conditions and the
> - *    following disclaimer.
> + * SPDX-License-Identifier: BSD-2-Clause
>   *
> - * 2. Redistributions in binary form must reproduce the above
> - *    copyright notice, this list of conditions and the following
> - *    disclaimer in the documentation and/or other materials
> - *    provided with the distribution.
> - *
> - * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
> - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
> - * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
> - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> - * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> - * SUCH DAMAGE.
> + * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
>   */
>
>  #include <stdio.h>
>
> -----------------------------------------------------
>
> Thanks,
> Timur

Thanks for the changes!

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list