[Tarantool-patches] [PATCH v6 4/5] datetime: perf test for datetime parser

Serge Petrenko sergepetrenko at tarantool.org
Thu Aug 19 13:19:59 MSK 2021



19.08.2021 05:56, Timur Safin пишет:
> It was told that if field `datetime.secs` would be `double` we should get
> better performance in LuaJIT instead of `uint64_t` type, which is used at the
> moment.
>
> So we have created benchmark, which was comparing implementations of functions
> from `datetime.c` if we would use `double` or `int64_t` for `datetime.secs` field.
>
> Despite expectations, based on prior experience with floaing-point on x86
> processors, comparison shows that `double` provides similar or
> sometimes better timings. And picture stays consistent be it SSE2, AVX1 or
> AVX2 code.
>
> Part of #5941
> ---

I agree with Vladimir here.
Looks like this perf test doesn't belong to Tarantool repository.
Would you mind dropping it?

>   perf/CMakeLists.txt      |   3 +
>   perf/datetime-common.h   | 105 +++++++++++++++++++
>   perf/datetime-compare.cc | 213 +++++++++++++++++++++++++++++++++++++++
>   perf/datetime-parser.cc  | 105 +++++++++++++++++++
>   4 files changed, 426 insertions(+)
>   create mode 100644 perf/datetime-common.h
>   create mode 100644 perf/datetime-compare.cc
>   create mode 100644 perf/datetime-parser.cc
>
> diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt
> index 3651de5b4..b5d7caf81 100644
> --- a/perf/CMakeLists.txt
> +++ b/perf/CMakeLists.txt
> @@ -12,3 +12,6 @@ include_directories(${CMAKE_SOURCE_DIR}/third_party)
>   
>   add_executable(tuple.perftest tuple.cc)
>   target_link_libraries(tuple.perftest core box tuple benchmark::benchmark)
> +
> +add_executable(datetime.perftest datetime-parser.cc datetime-compare.cc)
> +target_link_libraries(datetime.perftest cdt core benchmark::benchmark)
> diff --git a/perf/datetime-common.h b/perf/datetime-common.h
> new file mode 100644
> index 000000000..6fd4e1e3b
> --- /dev/null
> +++ b/perf/datetime-common.h
> @@ -0,0 +1,105 @@
> +#include <assert.h>
> +#include <stdint.h>
> +#include <string.h>
> +#include <benchmark/benchmark.h>
> +
> +#include "dt.h"
> +#include "datetime.h"
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +static const char sample[] = "2012-12-24T15:30Z";
> +
> +#define S(s)               \
> +	{ s, sizeof(s) - 1 }
> +
> +static struct
> +{
> +	const char *sz;
> +	size_t len;
> +} tests[] = {
> +    S("2012-12-24 15:30Z"),
> +    S("2012-12-24 15:30z"),
> +    S("2012-12-24 15:30"),
> +    S("2012-12-24 16:30+01:00"),
> +    S("2012-12-24 16:30+0100"),
> +    S("2012-12-24 16:30+01"),
> +    S("2012-12-24 14:30-01:00"),
> +    S("2012-12-24 14:30-0100"),
> +    S("2012-12-24 14:30-01"),
> +    S("2012-12-24 15:30:00Z"),
> +    S("2012-12-24 15:30:00z"),
> +    S("2012-12-24 15:30:00"),
> +    S("2012-12-24 16:30:00+01:00"),
> +    S("2012-12-24 16:30:00+0100"),
> +    S("2012-12-24 14:30:00-01:00"),
> +    S("2012-12-24 14:30:00-0100"),
> +    S("2012-12-24 15:30:00.123456Z"),
> +    S("2012-12-24 15:30:00.123456z"),
> +    S("2012-12-24 15:30:00.123456"),
> +    S("2012-12-24 16:30:00.123456+01:00"),
> +    S("2012-12-24 16:30:00.123456+01"),
> +    S("2012-12-24 14:30:00.123456-01:00"),
> +    S("2012-12-24 14:30:00.123456-01"),
> +    S("2012-12-24t15:30Z"),
> +    S("2012-12-24t15:30z"),
> +    S("2012-12-24t15:30"),
> +    S("2012-12-24t16:30+01:00"),
> +    S("2012-12-24t16:30+0100"),
> +    S("2012-12-24t14:30-01:00"),
> +    S("2012-12-24t14:30-0100"),
> +    S("2012-12-24t15:30:00Z"),
> +    S("2012-12-24t15:30:00z"),
> +    S("2012-12-24t15:30:00"),
> +    S("2012-12-24t16:30:00+01:00"),
> +    S("2012-12-24t16:30:00+0100"),
> +    S("2012-12-24t14:30:00-01:00"),
> +    S("2012-12-24t14:30:00-0100"),
> +    S("2012-12-24t15:30:00.123456Z"),
> +    S("2012-12-24t15:30:00.123456z"),
> +    S("2012-12-24t16:30:00.123456+01:00"),
> +    S("2012-12-24t14:30:00.123456-01:00"),
> +    S("2012-12-24 16:30 +01:00"),
> +    S("2012-12-24 14:30 -01:00"),
> +    S("2012-12-24 15:30 UTC"),
> +    S("2012-12-24 16:30 UTC+1"),
> +    S("2012-12-24 16:30 UTC+01"),
> +    S("2012-12-24 16:30 UTC+0100"),
> +    S("2012-12-24 16:30 UTC+01:00"),
> +    S("2012-12-24 14:30 UTC-1"),
> +    S("2012-12-24 14:30 UTC-01"),
> +    S("2012-12-24 14:30 UTC-01:00"),
> +    S("2012-12-24 14:30 UTC-0100"),
> +    S("2012-12-24 15:30 GMT"),
> +    S("2012-12-24 16:30 GMT+1"),
> +    S("2012-12-24 16:30 GMT+01"),
> +    S("2012-12-24 16:30 GMT+0100"),
> +    S("2012-12-24 16:30 GMT+01:00"),
> +    S("2012-12-24 14:30 GMT-1"),
> +    S("2012-12-24 14:30 GMT-01"),
> +    S("2012-12-24 14:30 GMT-01:00"),
> +    S("2012-12-24 14:30 GMT-0100"),
> +    S("2012-12-24 14:30 -01:00"),
> +    S("2012-12-24 16:30:00 +01:00"),
> +    S("2012-12-24 14:30:00 -01:00"),
> +    S("2012-12-24 16:30:00.123456 +01:00"),
> +    S("2012-12-24 14:30:00.123456 -01:00"),
> +    S("2012-12-24 15:30:00.123456 -00:00"),
> +    S("20121224T1630+01:00"),
> +    S("2012-12-24T1630+01:00"),
> +    S("20121224T16:30+01"),
> +    S("20121224T16:30 +01"),
> +};
> +#undef S
> +
> +#define DIM(a) (sizeof(a) / sizeof(a[0]))
> +
> +int
> +parse_datetime(const char *str, size_t len, int64_t *sp, int32_t *np,
> +	       int32_t *op);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> diff --git a/perf/datetime-compare.cc b/perf/datetime-compare.cc
> new file mode 100644
> index 000000000..5096eb987
> --- /dev/null
> +++ b/perf/datetime-compare.cc
> @@ -0,0 +1,213 @@
> +#include "dt.h"
> +#include <string.h>
> +#include <assert.h>
> +#include <limits.h>
> +
> +#include "datetime-common.h"
> +
> +template <typename T>
> +struct datetime_bench
> +{
> +	T secs;
> +	uint32_t nsec;
> +	uint32_t offset;
> +
> +static struct datetime_bench date_array[];
> +};
> +template<typename T>
> +struct datetime_bench<T> datetime_bench<T>::date_array[DIM(tests)];
> +
> +/// Parse 70 datetime literals of various lengths
> +template <typename T>
> +static void
> +Assign70()
> +{
> +	size_t index;
> +	int64_t secs_expected;
> +	int nanosecs;
> +	int ofs;
> +	using dt_bench = datetime_bench<T>;
> +
> +	for (index = 0; index < DIM(tests); index++) {
> +		int64_t secs;
> +		int rc = parse_datetime(tests[index].sz, tests[index].len,
> +					&secs, &nanosecs, &ofs);
> +		assert(rc == 0);
> +		dt_bench::date_array[index].secs = (T)secs;
> +		dt_bench::date_array[index].nsec = nanosecs;
> +		dt_bench::date_array[index].offset = ofs;
> +	}
> +}
> +
> +template <typename T>
> +static void
> +DateTime_Assign70(benchmark::State &state)
> +{
> +	for (auto _ : state)
> +		Assign70<T>();
> +}
> +BENCHMARK_TEMPLATE1(DateTime_Assign70, uint64_t);
> +BENCHMARK_TEMPLATE1(DateTime_Assign70, double);
> +
> +#define COMPARE_RESULT_BENCH(a, b) (a < b ? -1 : a > b)
> +
> +template <typename T>
> +int datetime_compare(const struct datetime_bench<T> *lhs,
> +		     const struct datetime_bench<T> *rhs)
> +{
> +	int result = COMPARE_RESULT_BENCH(lhs->secs, rhs->secs);
> +	if (result != 0)
> +		return result;
> +
> +	return COMPARE_RESULT_BENCH(lhs->nsec, rhs->nsec);
> +}
> +
> +template <typename T>
> +static void
> +AssignCompare70()
> +{
> +	size_t index;
> +	int nanosecs;
> +	int ofs;
> +	using dt_bench = datetime_bench<T>;
> +
> +	size_t arrays_sz = DIM(tests);
> +	for (index = 0; index < arrays_sz; index++) {
> +		int64_t secs;
> +		int rc = parse_datetime(tests[index].sz, tests[index].len,
> +					&secs, &nanosecs, &ofs);
> +		assert(rc == 0);
> +		dt_bench::date_array[index].secs = (T)secs;
> +		dt_bench::date_array[index].nsec = nanosecs;
> +		dt_bench::date_array[index].offset = ofs;
> +	}
> +
> +	for (index = 0; index < (arrays_sz - 1); index++) {
> +		volatile int rc = datetime_compare<T>(&dt_bench::date_array[index],
> +					     &dt_bench::date_array[index + 1]);
> +		assert(rc == 0 || rc == -1 || rc == 1);
> +	}
> +}
> +
> +template <typename T>
> +static void
> +DateTime_AssignCompare70(benchmark::State &state)
> +{
> +	for (auto _ : state)
> +		AssignCompare70<T>();
> +}
> +BENCHMARK_TEMPLATE1(DateTime_AssignCompare70, uint64_t);
> +BENCHMARK_TEMPLATE1(DateTime_AssignCompare70, double);
> +
> +template <typename T>
> +static void
> +Compare20()
> +{
> +	size_t index;
> +	int nanosecs;
> +	int ofs;
> +	using dt_bench = datetime_bench<T>;
> +
> +	for (size_t i = 0; i < 10; i++) {
> +		volatile int rc = datetime_compare<T>(&dt_bench::date_array[i],
> +					     &dt_bench::date_array[32 + i]);
> +		assert(rc == 0 || rc == -1 || rc == 1);
> +	}
> +}
> +
> +template <typename T>
> +static void
> +DateTime_Compare20(benchmark::State &state)
> +{
> +	for (auto _ : state)
> +		Compare20<T>();
> +}
> +BENCHMARK_TEMPLATE1(DateTime_Compare20, uint64_t);
> +BENCHMARK_TEMPLATE1(DateTime_Compare20, double);
> +
> +
> +#define SECS_EPOCH_1970_OFFSET ((int64_t)DT_EPOCH_1970_OFFSET * SECS_PER_DAY)
> +
> +template<typename T>
> +int
> +datetime_to_string(const struct datetime_bench<T> *date, char *buf, uint32_t len)
> +{
> +#define ADVANCE(sz)		\
> +	if (buf != NULL) { 	\
> +		buf += sz; 	\
> +		len -= sz; 	\
> +	}			\
> +	ret += sz;
> +
> +	int offset = date->offset;
> +	/* for negative offsets around Epoch date we could get
> +	 * negative secs value, which should be attributed to
> +	 * 1969-12-31, not 1970-01-01, thus we first shift
> +	 * epoch to Rata Die then divide by seconds per day,
> +	 * not in reverse
> +	 */
> +	int64_t secs = (int64_t)date->secs + offset * 60 + SECS_EPOCH_1970_OFFSET;
> +	assert((secs / SECS_PER_DAY) <= INT_MAX);
> +	dt_t dt = dt_from_rdn(secs / SECS_PER_DAY);
> +
> +	int year, month, day, sec, ns, sign;
> +	dt_to_ymd(dt, &year, &month, &day);
> +
> +	int hour = (secs / 3600) % 24,
> +	    minute = (secs / 60) % 60;
> +	sec = secs % 60;
> +	ns = date->nsec;
> +
> +	int ret = 0;
> +	uint32_t sz = snprintf(buf, len, "%04d-%02d-%02dT%02d:%02d",
> +			       year, month, day, hour, minute);
> +	ADVANCE(sz);
> +	if (sec || ns) {
> +		sz = snprintf(buf, len, ":%02d", sec);
> +		ADVANCE(sz);
> +		if (ns) {
> +			if ((ns % 1000000) == 0)
> +				sz = snprintf(buf, len, ".%03d", ns / 1000000);
> +			else if ((ns % 1000) == 0)
> +				sz = snprintf(buf, len, ".%06d", ns / 1000);
> +			else
> +				sz = snprintf(buf, len, ".%09d", ns);
> +			ADVANCE(sz);
> +		}
> +	}
> +	if (offset == 0) {
> +		sz = snprintf(buf, len, "Z");
> +		ADVANCE(sz);
> +	}
> +	else {
> +		if (offset < 0)
> +			sign = '-', offset = -offset;
> +		else
> +			sign = '+';
> +
> +		sz = snprintf(buf, len, "%c%02d:%02d", sign, offset / 60, offset % 60);
> +		ADVANCE(sz);
> +	}
> +	return ret;
> +}
> +#undef ADVANCE
> +
> +template <typename T>
> +static void
> +ToString1()
> +{
> +	char buf[48];
> +	struct datetime_bench<T> dateval = datetime_bench<T>::date_array[13];
> +
> +	volatile auto len = datetime_to_string<T>(&dateval, buf, sizeof(buf));
> +}
> +
> +template <typename T>
> +static void
> +DateTime_ToString1(benchmark::State &state)
> +{
> +	for (auto _ : state)
> +		ToString1<T>();
> +}
> +BENCHMARK_TEMPLATE1(DateTime_ToString1, uint64_t);
> +BENCHMARK_TEMPLATE1(DateTime_ToString1, double);
> diff --git a/perf/datetime-parser.cc b/perf/datetime-parser.cc
> new file mode 100644
> index 000000000..61557fe8f
> --- /dev/null
> +++ b/perf/datetime-parser.cc
> @@ -0,0 +1,105 @@
> +#include "dt.h"
> +#include <string.h>
> +#include <assert.h>
> +
> +#include "datetime-common.h"
> +
> +/* p5-time-moment/src/moment_parse.c: parse_string_lenient() */
> +int
> +parse_datetime(const char *str, size_t len, int64_t *sp, int32_t *np,
> +	       int32_t *op)
> +{
> +	size_t n;
> +	dt_t dt;
> +	char c;
> +	int sod = 0, nanosecond = 0, offset = 0;
> +
> +	n = dt_parse_iso_date(str, len, &dt);
> +	if (!n)
> +		return 1;
> +	if (n == len)
> +		goto exit;
> +
> +	c = str[n++];
> +	if (!(c == 'T' || c == 't' || c == ' '))
> +		return 1;
> +
> +	str += n;
> +	len -= n;
> +
> +	n = dt_parse_iso_time(str, len, &sod, &nanosecond);
> +	if (!n)
> +		return 1;
> +	if (n == len)
> +		goto exit;
> +
> +	if (str[n] == ' ')
> +	n++;
> +
> +	str += n;
> +	len -= n;
> +
> +	n = dt_parse_iso_zone_lenient(str, len, &offset);
> +	if (!n || n != len)
> +		return 1;
> +
> +exit:
> +	*sp = ((int64_t)dt_rdn(dt) - 719163) * 86400 + sod - offset * 60;
> +	*np = nanosecond;
> +	*op = offset;
> +
> +	return 0;
> +}
> +
> +/// Parse 70 datetime literals of various lengths
> +static void
> +ParseTimeStamps()
> +{
> +	size_t index;
> +	int64_t secs_expected;
> +	int nanosecs;
> +	int ofs;
> +	parse_datetime(sample, sizeof(sample) - 1, &secs_expected,
> +		       &nanosecs, &ofs);
> +
> +	for (index = 0; index < DIM(tests); index++)
> +	{
> +		int64_t secs;
> +		int rc = parse_datetime(tests[index].sz, tests[index].len,
> +					&secs, &nanosecs, &ofs);
> +		assert(rc == 0);
> +		assert(secs == secs_expected);
> +	}
> +}
> +
> +static void
> +CDT_Parse70(benchmark::State &state)
> +{
> +	for (auto _ : state)
> +		ParseTimeStamps();
> +}
> +BENCHMARK(CDT_Parse70);
> +
> +/// Parse single datetime literal of longest length
> +static void
> +Parse1()
> +{
> +	const char civil_string[] = "2015-02-18T10:50:31.521345123+10:00";
> +	int64_t secs;
> +	int nanosecs;
> +	int ofs;
> +	int rc = parse_datetime(civil_string, sizeof(civil_string) - 1,
> +				&secs, &nanosecs, &ofs);
> +	assert(rc == 0);
> +	assert(nanosecs == 521345123);
> +}
> +
> +static void
> +CDT_Parse1(benchmark::State &state)
> +{
> +	for (auto _ : state)
> +		Parse1();
> +}
> +BENCHMARK(CDT_Parse1);
> +
> +BENCHMARK_MAIN();

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list