[Tarantool-patches] [PATCH v3 6/9] box, datetime: datetime comparison for indices

Serge Petrenko sergepetrenko at tarantool.org
Tue Aug 3 15:02:29 MSK 2021



02.08.2021 03:41, Timur Safin via Tarantool-patches пишет:
> * storage hints implemented for datetime_t values;
> * proper comparison for indices of datetime type.
>
> Part of #5941
> Part of #5946

Hi! Thanks for the patch!

Please find 2 comments below.

> ---
>   src/box/field_def.c           | 18 ++++++++
>   src/box/field_def.h           |  3 ++
>   src/box/memtx_space.c         |  3 +-
>   src/box/tuple_compare.cc      | 56 +++++++++++++++++++++++++
>   src/box/vinyl.c               |  3 +-
>   test/engine/datetime.result   | 77 +++++++++++++++++++++++++++++++++++
>   test/engine/datetime.test.lua | 35 ++++++++++++++++
>   7 files changed, 191 insertions(+), 4 deletions(-)
>   create mode 100644 test/engine/datetime.result
>   create mode 100644 test/engine/datetime.test.lua
>
> diff --git a/src/box/field_def.c b/src/box/field_def.c
> index 6964e3e9f..aaf5f9cff 100644
> --- a/src/box/field_def.c
> +++ b/src/box/field_def.c
> @@ -193,3 +193,21 @@ field_type_by_name(const char *name, size_t len)
>   		return FIELD_TYPE_ANY;
>   	return field_type_MAX;
>   }
> +
> +const bool field_type_index_allowed[] =
> +    {
> +	/* [FIELD_TYPE_ANY]      = */ false,
> +	/* [FIELD_TYPE_UNSIGNED] = */ true,
> +	/* [FIELD_TYPE_STRING]   = */ true,
> +	/* [FIELD_TYPE_NUMBER]   = */ true,
> +	/* [FIELD_TYPE_DOUBLE]   = */ true,
> +	/* [FIELD_TYPE_INTEGER]  = */ true,
> +	/* [FIELD_TYPE_BOOLEAN]  = */ true,
> +	/* [FIELD_TYPE_VARBINARY]= */ true,
> +	/* [FIELD_TYPE_SCALAR]   = */ true,
> +	/* [FIELD_TYPE_DECIMAL]  = */ true,
> +	/* [FIELD_TYPE_UUID]     = */ true,
> +	/* [FIELD_TYPE_ARRAY]    = */ false,
> +	/* [FIELD_TYPE_MAP]      = */ false,
> +	/* [FIELD_TYPE_DATETIME] = */ true,
> +};
> diff --git a/src/box/field_def.h b/src/box/field_def.h
> index 120b2a93d..bd02418df 100644
> --- a/src/box/field_def.h
> +++ b/src/box/field_def.h
> @@ -120,6 +120,9 @@ extern const uint32_t field_ext_type[];
>   extern const struct opt_def field_def_reg[];
>   extern const struct field_def field_def_default;
>   
> +/** helper table for checking allowed indices for types */
> +extern const bool field_type_index_allowed[];
> +
>   /**
>    * @brief Field definition
>    * Contains information about of one tuple field.
> diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c
> index b71318d24..1ab16122e 100644
> --- a/src/box/memtx_space.c
> +++ b/src/box/memtx_space.c
> @@ -748,8 +748,7 @@ memtx_space_check_index_def(struct space *space, struct index_def *index_def)
>   	/* Check that there are no ANY, ARRAY, MAP parts */
>   	for (uint32_t i = 0; i < key_def->part_count; i++) {
>   		struct key_part *part = &key_def->parts[i];
> -		if (part->type <= FIELD_TYPE_ANY ||
> -		    part->type >= FIELD_TYPE_ARRAY) {
> +		if (!field_type_index_allowed[part->type]) {
>   			diag_set(ClientError, ER_MODIFY_INDEX,
>   				 index_def->name, space_name(space),
>   				 tt_sprintf("field type '%s' is not supported",
> diff --git a/src/box/tuple_compare.cc b/src/box/tuple_compare.cc
> index 48c833643..f733b9f01 100644
> --- a/src/box/tuple_compare.cc
> +++ b/src/box/tuple_compare.cc
> @@ -538,6 +538,8 @@ tuple_compare_field_with_type(const char *field_a, enum mp_type a_type,
>   						   field_b, b_type);
>   	case FIELD_TYPE_UUID:
>   		return mp_compare_uuid(field_a, field_b);
> +	case FIELD_TYPE_DATETIME:
> +		return mp_compare_datetime(field_a, field_b);
>   	default:
>   		unreachable();
>   		return 0;
> @@ -1538,6 +1540,21 @@ func_index_compare_with_key(struct tuple *tuple, hint_t tuple_hint,
>   #define HINT_VALUE_DOUBLE_MAX	(exp2(HINT_VALUE_BITS - 1) - 1)
>   #define HINT_VALUE_DOUBLE_MIN	(-exp2(HINT_VALUE_BITS - 1))
>   
> +/**
> + * We need to squeeze 64 bits of seconds and 32 bits of nanoseconds
> + * into 60 bits of hint value. The idea is to represent wide enough
> + * years range, and leave the rest of bits occupied from nanoseconds part:
> + * - 36 bits is enough for time range of [208BC..4147]
> + * - for nanoseconds there is left 24 bits, which are MSB part of
> + *   32-bit value
> + */
> +#define HINT_VALUE_SECS_BITS	36
> +#define HINT_VALUE_NSEC_BITS	(HINT_VALUE_BITS - HINT_VALUE_SECS_BITS)
> +#define HINT_VALUE_SECS_MAX	((1LL << HINT_VALUE_SECS_BITS) - 1)
> +#define HINT_VALUE_SECS_MIN	(-(1LL << HINT_VALUE_SECS_BITS))
> +#define HINT_VALUE_NSEC_SHIFT	(sizeof(int) * CHAR_BIT - HINT_VALUE_NSEC_BITS)
> +#define HINT_VALUE_NSEC_MAX	((1ULL << HINT_VALUE_NSEC_BITS) - 1)
> +
>   /*
>    * HINT_CLASS_BITS should be big enough to store any mp_class value.
>    * Note, ((1 << HINT_CLASS_BITS) - 1) is reserved for HINT_NONE.
> @@ -1630,6 +1647,24 @@ hint_uuid_raw(const char *data)
>   	return hint_create(MP_CLASS_UUID, val);
>   }
>   
> +static inline hint_t
> +hint_datetime(struct datetime_t *date)
> +{
> +	/*
> +	 * Use at most HINT_VALUE_SECS_BITS from datetime
> +	 * seconds field as a hint value, and at MSB part
> +	 * of HINT_VALUE_NSEC_BITS from nanoseconds.
> +	 */
> +	int64_t secs = date->secs;
> +	int32_t nsec = date->nsec;
> +	uint64_t val = secs <= HINT_VALUE_SECS_MIN ? 0 :
> +			secs - HINT_VALUE_SECS_MIN;
> +	val &= HINT_VALUE_SECS_MAX;
> +	val <<= HINT_VALUE_NSEC_BITS;
> +	val |= (nsec >> HINT_VALUE_NSEC_SHIFT) & HINT_VALUE_NSEC_MAX;
> +	return hint_create(MP_CLASS_DATETIME, val);
> +}
> +
I like the idea with having hints for some "near" dates.

You just need to assign the same HINT_VALUE_MAX hint to every datetime value
with date->secs >= HINT_VALUE_SECS_MAX. Otherwise the comparison would
make mistakes (judging by hint values only) for such far away dates.

>   static inline uint64_t
>   hint_str_raw(const char *s, uint32_t len)
>   {
> @@ -1761,6 +1796,17 @@ field_hint_uuid(const char *field)
>   	return hint_uuid_raw(data);
>   }
>   
> +static inline hint_t
> +field_hint_datetime(const char *field)
> +{
> +	assert(mp_typeof(*field) == MP_EXT);
> +	int8_t ext_type;
> +	uint32_t len = mp_decode_extl(&field, &ext_type);
> +	assert(ext_type == MP_DATETIME);
> +	struct datetime_t date;
> +	return hint_datetime(datetime_unpack(&field, len, &date));
> +}
> +
>   static inline hint_t
>   field_hint_string(const char *field, struct coll *coll)
>   {
> @@ -1812,6 +1858,11 @@ field_hint_scalar(const char *field, struct coll *coll)
>   		}
>   		case MP_UUID:
>   			return hint_uuid_raw(field);
> +		case MP_DATETIME:
> +		{
> +			struct datetime_t date;
> +			return hint_datetime(datetime_unpack(&field, len, &date));
> +		}

But you don't allow datetime in SCALAR fields, as I see in the previous 
commit.
So you don't need to account for datetime in scalar hints.

>   		default:
>   			unreachable();
>   		}
> @@ -1849,6 +1900,8 @@ field_hint(const char *field, struct coll *coll)
>   		return field_hint_decimal(field);
>   	case FIELD_TYPE_UUID:
>   		return field_hint_uuid(field);
> +	case FIELD_TYPE_DATETIME:
> +		return field_hint_datetime(field);
>   	default:
>   		unreachable();
>   	}
> @@ -1963,6 +2016,9 @@ key_def_set_hint_func(struct key_def *def)
>   	case FIELD_TYPE_UUID:
>   		key_def_set_hint_func<FIELD_TYPE_UUID>(def);
>   		break;
> +	case FIELD_TYPE_DATETIME:
> +		key_def_set_hint_func<FIELD_TYPE_DATETIME>(def);
> +		break;
>   	default:
>   		/* Invalid key definition. */
>   		def->key_hint = NULL;
> diff --git a/src/box/vinyl.c b/src/box/vinyl.c
> index c80b2d99b..360d1fa70 100644
> --- a/src/box/vinyl.c
> +++ b/src/box/vinyl.c
> @@ -662,8 +662,7 @@ vinyl_space_check_index_def(struct space *space, struct index_def *index_def)
>   	/* Check that there are no ANY, ARRAY, MAP parts */
>   	for (uint32_t i = 0; i < key_def->part_count; i++) {
>   		struct key_part *part = &key_def->parts[i];
> -		if (part->type <= FIELD_TYPE_ANY ||
> -		    part->type >= FIELD_TYPE_ARRAY) {
> +		if (!field_type_index_allowed[part->type]) {
>   			diag_set(ClientError, ER_MODIFY_INDEX,
>   				 index_def->name, space_name(space),
>   				 tt_sprintf("field type '%s' is not supported",
> diff --git a/test/engine/datetime.result b/test/engine/datetime.result
> new file mode 100644
> index 000000000..848a0aaec
> --- /dev/null
> +++ b/test/engine/datetime.result
> @@ -0,0 +1,77 @@
> +-- test-run result file version 2
> +env = require('test_run')
> + | ---
> + | ...
> +test_run = env.new()
> + | ---
> + | ...
> +engine = test_run:get_cfg('engine')
> + | ---
> + | ...
> +
> +date = require('datetime')
> + | ---
> + | ...
> +
> +_ = box.schema.space.create('T', {engine = engine})
> + | ---
> + | ...
> +_ = box.space.T:create_index('pk', {parts={1,'datetime'}})
> + | ---
> + | ...
> +
> +box.space.T:insert{date('1970-01-01')}\
> +box.space.T:insert{date('1970-01-02')}\
> +box.space.T:insert{date('1970-01-03')}\
> +box.space.T:insert{date('2000-01-01')}
> + | ---
> + | ...
> +
> +o = box.space.T:select{}
> + | ---
> + | ...
> +assert(tostring(o[1][1]) == '1970-01-01T00:00Z')
> + | ---
> + | - true
> + | ...
> +assert(tostring(o[2][1]) == '1970-01-02T00:00Z')
> + | ---
> + | - true
> + | ...
> +assert(tostring(o[3][1]) == '1970-01-03T00:00Z')
> + | ---
> + | - true
> + | ...
> +assert(tostring(o[4][1]) == '2000-01-01T00:00Z')
> + | ---
> + | - true
> + | ...
> +
> +for i = 1,16 do\
> +    box.space.T:insert{date.now()}\
> +end
> + | ---
> + | ...
> +
> +a = box.space.T:select{}
> + | ---
> + | ...
> +err = {}
> + | ---
> + | ...
> +for i = 1, #a - 1 do\
> +    if a[i][1] >= a[i+1][1] then\
> +        table.insert(err, {a[i][1], a[i+1][1]})\
> +        break\
> +    end\
> +end
> + | ---
> + | ...
> +
> +err
> + | ---
> + | - []
> + | ...
> +box.space.T:drop()
> + | ---
> + | ...
> diff --git a/test/engine/datetime.test.lua b/test/engine/datetime.test.lua
> new file mode 100644
> index 000000000..3685e4d4b
> --- /dev/null
> +++ b/test/engine/datetime.test.lua
> @@ -0,0 +1,35 @@
> +env = require('test_run')
> +test_run = env.new()
> +engine = test_run:get_cfg('engine')
> +
> +date = require('datetime')
> +
> +_ = box.schema.space.create('T', {engine = engine})
> +_ = box.space.T:create_index('pk', {parts={1,'datetime'}})
> +
> +box.space.T:insert{date('1970-01-01')}\
> +box.space.T:insert{date('1970-01-02')}\
> +box.space.T:insert{date('1970-01-03')}\
> +box.space.T:insert{date('2000-01-01')}
> +
> +o = box.space.T:select{}
> +assert(tostring(o[1][1]) == '1970-01-01T00:00Z')
> +assert(tostring(o[2][1]) == '1970-01-02T00:00Z')
> +assert(tostring(o[3][1]) == '1970-01-03T00:00Z')
> +assert(tostring(o[4][1]) == '2000-01-01T00:00Z')
> +
> +for i = 1,16 do\
> +    box.space.T:insert{date.now()}\
> +end
> +
> +a = box.space.T:select{}
> +err = {}
> +for i = 1, #a - 1 do\
> +    if a[i][1] >= a[i+1][1] then\
> +        table.insert(err, {a[i][1], a[i+1][1]})\
> +        break\
> +    end\
> +end
> +
> +err
> +box.space.T:drop()

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list