[Tarantool-patches] [PATCH v5 4/8] box, datetime: messagepack support for datetime

Safin Timur tsafin at tarantool.org
Wed Aug 18 02:42:06 MSK 2021


Thanks Sergey for your feedback, below you'll see few comments and 
incremental patch...

On 17.08.2021 15:16, Serge Petrenko wrote:
> 
> 
> 16.08.2021 02:59, Timur Safin via Tarantool-patches пишет:
>> Serialize datetime_t as newly introduced MP_EXT type.
>> It saves 1 required integer field and upto 2 optional
>> unsigned fields in very compact fashion.
>> - secs is required field;
>> - but nsec, offset are both optional;
>>
>> * json, yaml serialization formats, lua output mode
>>    supported;
>> * exported symbols for datetime messagepack size calculations
>>    so they are available for usage on Lua side.
>>
>> Part of #5941
>> Part of #5946
>> ---
>>   extra/exports                     |   5 +-
>>   src/box/field_def.c               |  35 +++---
>>   src/box/field_def.h               |   1 +
>>   src/box/lua/serialize_lua.c       |   7 +-
>>   src/box/msgpack.c                 |   7 +-
>>   src/box/tuple_compare.cc          |  20 ++++
>>   src/lib/core/CMakeLists.txt       |   4 +-
>>   src/lib/core/datetime.c           |   9 ++
>>   src/lib/core/datetime.h           |  11 ++
>>   src/lib/core/mp_datetime.c        | 189 ++++++++++++++++++++++++++++++
>>   src/lib/core/mp_datetime.h        |  89 ++++++++++++++
>>   src/lib/core/mp_extension_types.h |   1 +
>>   src/lib/mpstream/mpstream.c       |  11 ++
>>   src/lib/mpstream/mpstream.h       |   4 +
>>   src/lua/msgpack.c                 |  12 ++
>>   src/lua/msgpackffi.lua            |  18 +++
>>   src/lua/serializer.c              |   4 +
>>   src/lua/serializer.h              |   2 +
>>   src/lua/utils.c                   |   1 -
>>   test/unit/datetime.c              | 125 +++++++++++++++++++-
>>   test/unit/datetime.result         | 115 +++++++++++++++++-
>>   third_party/lua-cjson/lua_cjson.c |   8 ++
>>   third_party/lua-yaml/lyaml.cc     |   6 +-
>>   23 files changed, 661 insertions(+), 23 deletions(-)
>>   create mode 100644 src/lib/core/mp_datetime.c
>>   create mode 100644 src/lib/core/mp_datetime.h
>>
>> diff --git a/extra/exports b/extra/exports
>> index 2437e175c..c34a5c2b5 100644
>> --- a/extra/exports
>> +++ b/extra/exports
>> @@ -151,9 +151,10 @@ csv_setopt
>>   datetime_asctime
>>   datetime_ctime
>>   datetime_now
>> +datetime_pack
>>   datetime_strftime
>>   datetime_to_string
>> -decimal_unpack
>> +datetime_unpack
> 
> 
> decimal_unpack should stay there.

It's there, but 2 lines below :)

That was me copy-pasted decimal_unpack a few patches before, but has not 
changed it to datetime_unpack. I've corrected it in this patch.

I've now corrected the original appearance, now with correct name.

> 
> 
>>   decimal_from_string
>>   decimal_unpack

      ^^^ it was here

>>   tnt_dt_dow
>> @@ -397,6 +398,7 @@ mp_decode_uint
>>   mp_encode_array
>>   mp_encode_bin
>>   mp_encode_bool
>> +mp_encode_datetime
>>   mp_encode_decimal
>>   mp_encode_double
>>   mp_encode_float
>> @@ -413,6 +415,7 @@ mp_next
>>   mp_next_slowpath
>>   mp_parser_hint
>>   mp_sizeof_array
>> +mp_sizeof_datetime
>>   mp_sizeof_decimal
>>   mp_sizeof_str
>>   mp_sizeof_uuid
>> diff --git a/src/box/field_def.c b/src/box/field_def.c
>> index 51acb8025..2682a42ee 100644
>> --- a/src/box/field_def.c
>> +++ b/src/box/field_def.c
>> @@ -72,6 +72,7 @@ const uint32_t field_mp_type[] = {
>>       /* [FIELD_TYPE_UUID]     =  */ 0, /* only MP_UUID is supported */
>>       /* [FIELD_TYPE_ARRAY]    =  */ 1U << MP_ARRAY,
>>       /* [FIELD_TYPE_MAP]      =  */ (1U << MP_MAP),
>> +    /* [FIELD_TYPE_DATETIME] =  */ 0, /* only MP_DATETIME is 
>> supported */
>>   };
>>   const uint32_t field_ext_type[] = {
>> @@ -83,11 +84,13 @@ const uint32_t field_ext_type[] = {
>>       /* [FIELD_TYPE_INTEGER]   = */ 0,
>>       /* [FIELD_TYPE_BOOLEAN]   = */ 0,
>>       /* [FIELD_TYPE_VARBINARY] = */ 0,
>> -    /* [FIELD_TYPE_SCALAR]    = */ (1U << MP_DECIMAL) | (1U << MP_UUID),
>> +    /* [FIELD_TYPE_SCALAR]    = */ (1U << MP_DECIMAL) | (1U << 
>> MP_UUID) |
>> +        (1U << MP_DATETIME),
>>       /* [FIELD_TYPE_DECIMAL]   = */ 1U << MP_DECIMAL,
>>       /* [FIELD_TYPE_UUID]      = */ 1U << MP_UUID,
>>       /* [FIELD_TYPE_ARRAY]     = */ 0,
>>       /* [FIELD_TYPE_MAP]       = */ 0,
>> +    /* [FIELD_TYPE_DATETIME]  = */ 1U << MP_DATETIME,
>>   };
>>   const char *field_type_strs[] = {
>> @@ -104,6 +107,7 @@ const char *field_type_strs[] = {
>>       /* [FIELD_TYPE_UUID]     = */ "uuid",
>>       /* [FIELD_TYPE_ARRAY]    = */ "array",
>>       /* [FIELD_TYPE_MAP]      = */ "map",
>> +    /* [FIELD_TYPE_DATETIME] = */ "datetime",
>>   };
>>   const char *on_conflict_action_strs[] = {
>> @@ -128,20 +132,21 @@ field_type_by_name_wrapper(const char *str, 
>> uint32_t len)
>>    * values can be stored in the j type.
>>    */
>>   static const bool field_type_compatibility[] = {
>> -       /*   ANY   UNSIGNED  STRING   NUMBER  DOUBLE  INTEGER  BOOLEAN 
>> VARBINARY SCALAR  DECIMAL   UUID    ARRAY    MAP  */
>> -/*   ANY    */ true,   false,   false,   false,   false,   false,   
>> false,   false,  false,  false,  false,   false,   false,
>> -/* UNSIGNED */ true,   true,    false,   true,    false,   true,    
>> false,   false,  true,   false,  false,   false,   false,
>> -/*  STRING  */ true,   false,   true,    false,   false,   false,   
>> false,   false,  true,   false,  false,   false,   false,
>> -/*  NUMBER  */ true,   false,   false,   true,    false,   false,   
>> false,   false,  true,   false,  false,   false,   false,
>> -/*  DOUBLE  */ true,   false,   false,   true,    true,    false,   
>> false,   false,  true,   false,  false,   false,   false,
>> -/*  INTEGER */ true,   false,   false,   true,    false,   true,    
>> false,   false,  true,   false,  false,   false,   false,
>> -/*  BOOLEAN */ true,   false,   false,   false,   false,   false,   
>> true,    false,  true,   false,  false,   false,   false,
>> -/* VARBINARY*/ true,   false,   false,   false,   false,   false,   
>> false,   true,   true,   false,  false,   false,   false,
>> -/*  SCALAR  */ true,   false,   false,   false,   false,   false,   
>> false,   false,  true,   false,  false,   false,   false,
>> -/*  DECIMAL */ true,   false,   false,   true,    false,   false,   
>> false,   false,  true,   true,   false,   false,   false,
>> -/*   UUID   */ true,   false,   false,   false,   false,   false,   
>> false,   false,  false,  false,  true,    false,   false,
>> -/*   ARRAY  */ true,   false,   false,   false,   false,   false,   
>> false,   false,  false,  false,  false,   true,    false,
>> -/*    MAP   */ true,   false,   false,   false,   false,   false,   
>> false,   false,  false,  false,  false,   false,   true,
>> +       /*   ANY   UNSIGNED  STRING   NUMBER  DOUBLE  INTEGER  BOOLEAN 
>> VARBINARY SCALAR  DECIMAL   UUID    ARRAY    MAP     DATETIME */
>> +/*   ANY    */ true,   false,   false,   false,   false,   false,   
>> false,   false,  false,  false,  false,   false,   false,   false,
>> +/* UNSIGNED */ true,   true,    false,   true,    false,   true,    
>> false,   false,  true,   false,  false,   false,   false,   false,
>> +/*  STRING  */ true,   false,   true,    false,   false,   false,   
>> false,   false,  true,   false,  false,   false,   false,   false,
>> +/*  NUMBER  */ true,   false,   false,   true,    false,   false,   
>> false,   false,  true,   false,  false,   false,   false,   false,
>> +/*  DOUBLE  */ true,   false,   false,   true,    true,    false,   
>> false,   false,  true,   false,  false,   false,   false,   false,
>> +/*  INTEGER */ true,   false,   false,   true,    false,   true,    
>> false,   false,  true,   false,  false,   false,   false,   false,
>> +/*  BOOLEAN */ true,   false,   false,   false,   false,   false,   
>> true,    false,  true,   false,  false,   false,   false,   false,
>> +/* VARBINARY*/ true,   false,   false,   false,   false,   false,   
>> false,   true,   true,   false,  false,   false,   false,   false,
>> +/*  SCALAR  */ true,   false,   false,   false,   false,   false,   
>> false,   false,  true,   false,  false,   false,   false,   false,
>> +/*  DECIMAL */ true,   false,   false,   true,    false,   false,   
>> false,   false,  true,   true,   false,   false,   false,   false,
>> +/*   UUID   */ true,   false,   false,   false,   false,   false,   
>> false,   false,  false,  false,  true,    false,   false,   false,
>> +/*   ARRAY  */ true,   false,   false,   false,   false,   false,   
>> false,   false,  false,  false,  false,   true,    false,   false,
>> +/*    MAP   */ true,   false,   false,   false,   false,   false,   
>> false,   false,  false,  false,  false,   false,   true,    false,
>> +/* DATETIME */ true,   false,   false,   false,   false,   false,   
>> false,   false,  true,   false,  false,   false,   false,   true,
>>   };
>>   bool
>> diff --git a/src/box/field_def.h b/src/box/field_def.h
>> index c5cfe5e86..120b2a93d 100644
>> --- a/src/box/field_def.h
>> +++ b/src/box/field_def.h
>> @@ -63,6 +63,7 @@ enum field_type {
>>       FIELD_TYPE_UUID,
>>       FIELD_TYPE_ARRAY,
>>       FIELD_TYPE_MAP,
>> +    FIELD_TYPE_DATETIME,
>>       field_type_MAX
>>   };
> 
> 
> Please, define FIELD_TYPE_DATETIME higher.
> Right after FIELD_TYPE_UUID.
> 
> This way you won't need to rework field type allowed in index check
> in the next commit.

That's very straighforward and easy, my bad that I've overcomplicated it!

But I'll move the change to the next patch, as it'scorrectly has pointed 
out by Vova, should be part of indices support,


> 
> 
>> diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
>> index 1f791980f..51855011b 100644
>> --- a/src/box/lua/serialize_lua.c
>> +++ b/src/box/lua/serialize_lua.c
>> @@ -768,7 +768,7 @@ static int
>>   dump_node(struct lua_dumper *d, struct node *nd, int indent)
>>   {
>>       struct luaL_field *field = &nd->field;
>> -    char buf[FPCONV_G_FMT_BUFSIZE];
>> +    char buf[FPCONV_G_FMT_BUFSIZE + 8];
> 
> 
> Why "+8"?

Well, because current FPCONV_G_FMT_BUFSIZE (32) was not enough for full 
ISO-8601 literal with nanoseconds :)

Probably I should introduce some newer constant...

[Or, as Vova has suggested - just to use MAX from those 2 values, my 
length and FPCONV_G_FMT_BUFSIZE.]

--------------------------------------------
diff --git a/src/box/lua/serialize_lua.c b/src/box/lua/serialize_lua.c
index 51855011b..eef3a4995 100644
--- a/src/box/lua/serialize_lua.c
+++ b/src/box/lua/serialize_lua.c
@@ -768,7 +768,7 @@ static int
  dump_node(struct lua_dumper *d, struct node *nd, int indent)
  {
  	struct luaL_field *field = &nd->field;
-	char buf[FPCONV_G_FMT_BUFSIZE + 8];
+	char buf[MAX(FPCONV_G_FMT_BUFSIZE, DT_TO_STRING_BUFSIZE)];
  	int ltype = lua_type(d->L, -1);
  	const char *str = NULL;
  	size_t len = 0;
diff --git a/src/lib/core/datetime.h b/src/lib/core/datetime.h
index 497cd9f14..b8d179600 100644
--- a/src/lib/core/datetime.h
+++ b/src/lib/core/datetime.h
@@ -87,6 +87,11 @@ struct datetime_interval {
  int
  datetime_compare(const struct datetime *lhs, const struct datetime *rhs);

+/**
+ * Required size of datetime_to_string string buffer
+ */
+#define DT_TO_STRING_BUFSIZE   48
+
  /**
   * Convert datetime to string using default format
   * @param date source datetime value
--------------------------------------------


> 
> 
>>       int ltype = lua_type(d->L, -1);
>>       const char *str = NULL;
>>       size_t len = 0;
> 
> 
> <stripped>
> 
> 
>> diff --git a/src/lib/core/mp_datetime.c b/src/lib/core/mp_datetime.c
>> new file mode 100644
>> index 000000000..d0a3e562c
>> --- /dev/null
>> +++ b/src/lib/core/mp_datetime.c
>> @@ -0,0 +1,189 @@
>> +/*
>> + * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
>> + *
>> + * Redistribution and use in source and binary forms, with or
>> + * without modification, are permitted provided that the following
>> + * conditions are met:
>> + *
>> + * 1. Redistributions of source code must retain the above
>> + *    copyright notice, this list of conditions and the
>> + *    following disclaimer.
>> + *
>> + * 2. Redistributions in binary form must reproduce the above
>> + *    copyright notice, this list of conditions and the following
>> + *    disclaimer in the documentation and/or other materials
>> + *    provided with the distribution.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
>> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
>> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
>> + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
>> + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
>> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
>> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
>> + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
>> + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>> + * SUCH DAMAGE.
>> + */
>> +
> 
> Same about the license.
> Please, replace that with
> 
> /*
>   * SPDX-License-Identifier: BSD-2-Clause
>   *
>   * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
>   */
> 
> And do the same for all new files.

Updated.

> 
>> +#include "mp_datetime.h"
>> +#include "msgpuck.h"
>> +#include "mp_extension_types.h"
>> +
>> +/*
>> +  Datetime MessagePack serialization schema is MP_EXT (0xC7 for 1 
>> byte length)
>> +  extension, which creates container of 1 to 3 integers.
>> +
>> +  
>> +----+---+-----------+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+ 
>>
>> +  |0xC7| 4 |len (uint8)| seconds (int) | nanoseconds (uint) | offset 
>> (uint) |
>> +  
>> +----+---+-----------+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+ 
>>
> 
> The order should be 0xC7, len(uint8), 4, seconds, ...
> according to
> https://github.com/msgpack/msgpack/blob/master/spec.md#ext-format-family

Indeed, that was my misconception, thanks for correction!
[Updated picture in the patch and in the discussion - 
https://github.com/tarantool/tarantool/discussions/6244#discussioncomment-1043990]

> 
>> +
>> +  MessagePack extension MP_EXT (0xC7), after 1-byte length, contains:
>> +
>> +  - signed integer seconds part (required). Depending on the value of
>> +    seconds it may be from 1 to 8 bytes positive or negative integer 
>> number;
>> +
>> +  - [optional] fraction time in nanoseconds as unsigned integer.
>> +    If this value is 0 then it's not saved (unless there is offset 
>> field,
>> +    as below);
>> +
>> +  - [optional] timzeone offset in minutes as unsigned integer.
>> +    If this field is 0 then it's not saved.
>> + */
>> +
>> +static inline uint32_t
>> +mp_sizeof_Xint(int64_t n)
>> +{
>> +    return n < 0 ? mp_sizeof_int(n) : mp_sizeof_uint(n);
>> +}
>> +
>> +static inline char *
>> +mp_encode_Xint(char *data, int64_t v)
>> +{
>> +    return v < 0 ? mp_encode_int(data, v) : mp_encode_uint(data, v);
>> +}
>> +
>> +static inline int64_t
>> +mp_decode_Xint(const char **data)
>> +{
>> +    switch (mp_typeof(**data)) {
>> +    case MP_UINT:
>> +        return (int64_t)mp_decode_uint(data);
>> +    case MP_INT:
>> +        return mp_decode_int(data);
>> +    default:
>> +        mp_unreachable();
>> +    }
>> +    return 0;
>> +}
> 
> I believe mp_decode_Xint and mp_encode_Xint
> belong to a more generic file, but I couldn't find an
> appropriate one. Up to you.

Yup, it was planned to be placed to more generic place once it would 
become useful at least the 2nd time. And this time is actually 2nd (1st 
was in SQL AST parser branch here 
https://github.com/tarantool/tarantool/commit/55a4182ebfbed1a3c916fb7e326f8f7861776a7f#diff-e3f5bdfa58bcaed35b89f22e94be7ad472a6b37d656a129722ea0d5609503c6aR132-R143). 
But that patchset has not yet landed to the master, so once again code 
usage is 1st time and worth only local application. When I'll return to 
distributed-sql AST parser I'll reshake them and put elsewhere.


> 
>> +
>> +static inline uint32_t
>> +mp_sizeof_datetime_raw(const struct datetime *date)
>> +{
>> +    uint32_t sz = mp_sizeof_Xint(date->secs);
>> +
>> +    // even if nanosecs == 0 we need to output anything
>> +    // if we have non-null tz offset
> 
> 
> Please, stick with our comment format:

Oh, yup, that slipt thru. Corrected.

> 
> /*
>   * Even if nanosecs == 0 we need to output anything
>   * if we have non-null tz offset
> */
> 
> 
>> +    if (date->nsec != 0 || date->offset != 0)
>> +        sz += mp_sizeof_Xint(date->nsec);
>> +    if (date->offset)
>> +        sz += mp_sizeof_Xint(date->offset);
>> +    return sz;
>> +}
>> +
>> +uint32_t
>> +mp_sizeof_datetime(const struct datetime *date)
>> +{
>> +    return mp_sizeof_ext(mp_sizeof_datetime_raw(date));
>> +}
>> +
>> +struct datetime *
>> +datetime_unpack(const char **data, uint32_t len, struct datetime *date)
>> +{
>> +    const char * svp = *data;
>> +
>> +    memset(date, 0, sizeof(*date));
>> +
>> +    date->secs = mp_decode_Xint(data);
> 
> 
> Please, leave a comment about date->secs possible range here.
> Why is it ok to store a decoded int64_t in a double.

Yes, that's reasonable complain. I'll document dt supported range in the 
datetime.h header, and to declare legal bounds there, so we could use 
them later in asserts.

Please see incremental patch for this step below...

> 
> 
>> +
>> +    len -= *data - svp;
>> +    if (len <= 0)
>> +        return date;
>> +
>> +    svp = *data;
>> +    date->nsec = mp_decode_Xint(data);
>> +    len -= *data - svp;
>> +
>> +    if (len <= 0)
>> +        return date;
>> +
>> +    date->offset = mp_decode_Xint(data);
>> +
>> +    return date;
>> +}
>> +
>> +struct datetime *
>> +mp_decode_datetime(const char **data, struct datetime *date)
>> +{
>> +    if (mp_typeof(**data) != MP_EXT)
>> +        return NULL;
>> +
>> +    int8_t type;
>> +    uint32_t len = mp_decode_extl(data, &type);
>> +
>> +    if (type != MP_DATETIME || len == 0) {
>> +        return NULL;
> 
> 
> Please, revert data to savepoint when decoding fails.
> If mp_decode_extl or datetime_unpack fail, you mustn't
> modify data.
> 

Didn't think about this case - will make sure data points to the 
original location if fails.

> 
>> +    }
>> +    return datetime_unpack(data, len, date);
>> +}
>> +
>> +char *
>> +datetime_pack(char *data, const struct datetime *date)
>> +{
>> +    data = mp_encode_Xint(data, date->secs);
>> +    if (date->nsec != 0 || date->offset != 0)
>> +        data = mp_encode_Xint(data, date->nsec);
>> +    if (date->offset)
>> +        data = mp_encode_Xint(data, date->offset);
>> +
>> +    return data;
>> +}
> 
> 
> <stripped>
> 
> 
>> diff --git a/src/lua/serializer.h b/src/lua/serializer.h
>> index 0a0501a74..e7a240e0a 100644
>> --- a/src/lua/serializer.h
>> +++ b/src/lua/serializer.h
>> @@ -52,6 +52,7 @@ extern "C" {
>>   #include <lauxlib.h>
>>   #include "trigger.h"
>> +#include "lib/core/datetime.h"
>>   #include "lib/core/decimal.h" /* decimal_t */
>>   #include "lib/core/mp_extension_types.h"
>>   #include "lua/error.h"
>> @@ -223,6 +224,7 @@ struct luaL_field {
>>           uint32_t size;
>>           decimal_t *decval;
>>           struct tt_uuid *uuidval;
>> +        struct datetime *dateval;
>>       };
>>       enum mp_type type;
>>       /* subtypes of MP_EXT */
>> diff --git a/src/lua/utils.c b/src/lua/utils.c
>> index 2c89326f3..771f6f278 100644
>> --- a/src/lua/utils.c
>> +++ b/src/lua/utils.c
>> @@ -254,7 +254,6 @@ luaL_setcdatagc(struct lua_State *L, int idx)
>>       lua_pop(L, 1);
>>   }
>> -
> 
> 
> Extraneous change. Please, remove.

Removed from the patch. Thanks!

> 
> 
>>   /**
>>    * A helper to register a single type metatable.
>>    */
>> diff --git a/test/unit/datetime.c b/test/unit/datetime.c
>> index 1ae76003b..a72ac2253 100644
>> --- a/test/unit/datetime.c
>> +++ b/test/unit/datetime.c
>> @@ -6,6 +6,9 @@
>>   #include "unit.h"
>>   #include "datetime.h"
>> +#include "mp_datetime.h"
>> +#include "msgpuck.h"
>> +#include "mp_extension_types.h"
>>   static const char sample[] = "2012-12-24T15:30Z";
>> @@ -247,12 +250,132 @@ tostring_datetime_test(void)
>>       check_plan();
>>   }
>>
> 
> 
> <stripped>
> 


-----------------------------------------------------
diff --git a/src/lib/core/datetime.h b/src/lib/core/datetime.h
index f98f7010d..df3c1c83d 100644
--- a/src/lib/core/datetime.h
+++ b/src/lib/core/datetime.h
@@ -5,6 +5,7 @@
   * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
   */

+#include <limits.h>
  #include <stdint.h>
  #include <stdbool.h>
  #include "c-dt/dt.h"
@@ -30,6 +31,26 @@ extern "C"
  #define DT_EPOCH_1970_OFFSET  719163
  #endif

+/**
+ * c-dt library uses int as type for dt value, which
+ * represents the number of days since Rata Die date.
+ * This implies limits to the number of seconds we
+ * could safely store in our structures and then safely
+ * pass to c-dt functions.
+ *
+ * So supported ranges will be
+ * - for seconds [-185604722870400 .. 185480451417600]
+ * - for dates   [-5879610-06-22T00:00Z .. 5879611-07-11T00:00Z]
+ */
+#define MAX_DT_DAY_VALUE (int64_t)INT_MAX
+#define MIN_DT_DAY_VALUE (int64_t)INT_MIN
+#define SECS_EPOCH_1970_OFFSET 	\
+	((int64_t)DT_EPOCH_1970_OFFSET * SECS_PER_DAY)
+#define MAX_EPOCH_SECS_VALUE    \
+	(MAX_DT_DAY_VALUE * SECS_PER_DAY - SECS_EPOCH_1970_OFFSET)
+#define MIN_EPOCH_SECS_VALUE    \
+	(MIN_DT_DAY_VALUE * SECS_PER_DAY - SECS_EPOCH_1970_OFFSET)
+
  /**
   * datetime structure keeps number of seconds since
   * Unix Epoch.
diff --git a/src/lib/core/mp_datetime.c b/src/lib/core/mp_datetime.c
index 7e475d5f1..963752c23 100644
--- a/src/lib/core/mp_datetime.c
+++ b/src/lib/core/mp_datetime.c
@@ -1,34 +1,12 @@
  /*
- * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * 1. Redistributions of source code must retain the above
- *    copyright notice, this list of conditions and the
- *    following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above
- *    copyright notice, this list of conditions and the following
- *    disclaimer in the documentation and/or other materials
- *    provided with the distribution.
+ * SPDX-License-Identifier: BSD-2-Clause
   *
- * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
- * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
   */

+#include <limits.h>
+#include <assert.h>
+
  #include "mp_datetime.h"
  #include "msgpuck.h"
  #include "mp_extension_types.h"
@@ -37,9 +15,9 @@
    Datetime MessagePack serialization schema is MP_EXT (0xC7 for 1 byte 
length)
    extension, which creates container of 1 to 3 integers.

- 
+----+---+-----------+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+
-  |0xC7| 4 |len (uint8)| seconds (int) | nanoseconds (uint) | offset 
(uint) |
- 
+----+---+-----------+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+
+ 
+----+-----------+---+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+
+  |0xC7|len (uint8)| 4 | seconds (int) | nanoseconds (uint) | offset 
(int)  |
+ 
+----+-----------+---+====~~~~~~~====+-----~~~~~~~~-------+....~~~~~~~....+

    MessagePack extension MP_EXT (0xC7), after 1-byte length, contains:

@@ -50,7 +28,7 @@
      If this value is 0 then it's not saved (unless there is offset field,
      as below);

-  - [optional] timzeone offset in minutes as unsigned integer.
+  - [optional] timezone offset in minutes as signed integer.
      If this field is 0 then it's not saved.
   */

@@ -80,17 +58,34 @@ mp_decode_Xint(const char **data)
  	return 0;
  }

+#define check_secs(secs)                                \
+	assert((int64_t)(secs) <= MAX_EPOCH_SECS_VALUE);\
+	assert((int64_t)(secs) >= MIN_EPOCH_SECS_VALUE);
+
+#define check_nanosecs(nsec)      assert((nsec) < 1000000000);
+
+#define check_tz_offset(offset)       \
+	assert((offset) <= (12 * 60));\
+	assert((offset) >= (-12 * 60));
+
  static inline uint32_t
  mp_sizeof_datetime_raw(const struct datetime *date)
  {
+	check_secs(date->secs);
  	uint32_t sz = mp_sizeof_Xint(date->secs);

-	// even if nanosecs == 0 we need to output anything
-	// if we have non-null tz offset
-	if (date->nsec != 0 || date->offset != 0)
+	/*
+	 * even if nanosecs == 0 we need to output something
+	 * if we have a non-null tz offset
+	 */
+	if (date->nsec != 0 || date->offset != 0) {
+		check_nanosecs(date->nsec);
  		sz += mp_sizeof_Xint(date->nsec);
-	if (date->offset)
+	}
+	if (date->offset) {
+		check_tz_offset(date->offset);
  		sz += mp_sizeof_Xint(date->offset);
+	}
  	return sz;
  }

@@ -103,24 +98,30 @@ mp_sizeof_datetime(const struct datetime *date)
  struct datetime *
  datetime_unpack(const char **data, uint32_t len, struct datetime *date)
  {
-	const char * svp = *data;
+	const char *svp = *data;

  	memset(date, 0, sizeof(*date));

-	date->secs = mp_decode_Xint(data);
+	int64_t seconds = mp_decode_Xint(data);
+	check_secs(seconds);
+	date->secs = seconds;

  	len -= *data - svp;
  	if (len <= 0)
  		return date;

  	svp = *data;
-	date->nsec = mp_decode_Xint(data);
+	uint64_t nanoseconds = mp_decode_uint(data);
+	check_nanosecs(nanoseconds);
+	date->nsec = nanoseconds;
  	len -= *data - svp;

  	if (len <= 0)
  		return date;

-	date->offset = mp_decode_Xint(data);
+	int64_t offset = mp_decode_Xint(data);
+	check_tz_offset(offset);
+	date->offset = offset;

  	return date;
  }
@@ -131,10 +132,12 @@ mp_decode_datetime(const char **data, struct 
datetime *date)
  	if (mp_typeof(**data) != MP_EXT)
  		return NULL;

+	const char *svp = *data;
  	int8_t type;
  	uint32_t len = mp_decode_extl(data, &type);

  	if (type != MP_DATETIME || len == 0) {
+		*data = svp;
  		return NULL;
  	}
  	return datetime_unpack(data, len, date);
@@ -145,7 +148,7 @@ datetime_pack(char *data, const struct datetime *date)
  {
  	data = mp_encode_Xint(data, date->secs);
  	if (date->nsec != 0 || date->offset != 0)
-		data = mp_encode_Xint(data, date->nsec);
+		data = mp_encode_uint(data, date->nsec);
  	if (date->offset)
  		data = mp_encode_Xint(data, date->offset);

@@ -165,7 +168,9 @@ mp_encode_datetime(char *data, const struct datetime 
*date)
  int
  mp_snprint_datetime(char *buf, int size, const char **data, uint32_t len)
  {
-	struct datetime date = {0, 0, 0};
+	struct datetime date = {
+		.secs = 0, .nsec = 0, .offset = 0
+	};

  	if (datetime_unpack(data, len, &date) == NULL)
  		return -1;
@@ -176,7 +181,9 @@ mp_snprint_datetime(char *buf, int size, const char 
**data, uint32_t len)
  int
  mp_fprint_datetime(FILE *file, const char **data, uint32_t len)
  {
-	struct datetime date = {0, 0, 0};
+	struct datetime date = {
+		.secs = 0, .nsec = 0, .offset = 0
+	};

  	if (datetime_unpack(data, len, &date) == NULL)
  		return -1;
diff --git a/src/lib/core/mp_datetime.h b/src/lib/core/mp_datetime.h
index 9a4d2720c..92e94a243 100644
--- a/src/lib/core/mp_datetime.h
+++ b/src/lib/core/mp_datetime.h
@@ -1,33 +1,8 @@
  #pragma once
  /*
- * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * 1. Redistributions of source code must retain the above
- *    copyright notice, this list of conditions and the
- *    following disclaimer.
+ * SPDX-License-Identifier: BSD-2-Clause
   *
- * 2. Redistributions in binary form must reproduce the above
- *    copyright notice, this list of conditions and the following
- *    disclaimer in the documentation and/or other materials
- *    provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
- * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * Copyright 2021, Tarantool AUTHORS, please see AUTHORS file.
   */

  #include <stdio.h>

-----------------------------------------------------

Thanks,
Timur


More information about the Tarantool-patches mailing list