[Tarantool-patches] [PATCH v5 7/8] datetime: perf test for datetime parser

Timur Safin tsafin at tarantool.org
Mon Aug 16 02:59:41 MSK 2021


It was told that if field `datetime.secs` would be `double` we should get
better performance in LuaJIT instead of `uint64_t` type, which is used at the
moment.

So we have created benchmark, which was comparing implementations of functions
from `datetime.c` if we would use `double` or `int64_t` for `datetime.secs` field.

Despite expectations, based on prior experience with floaing-point on x86
processors, comparison shows that `double` provides similar or
sometimes better timings. And picture stays consistent be it SSE2, AVX1 or
AVX2 code.

Part of #5941
---
 perf/CMakeLists.txt      |   3 +
 perf/datetime-common.h   | 105 +++++++++++++++++++
 perf/datetime-compare.cc | 213 +++++++++++++++++++++++++++++++++++++++
 perf/datetime-parser.cc  | 105 +++++++++++++++++++
 4 files changed, 426 insertions(+)
 create mode 100644 perf/datetime-common.h
 create mode 100644 perf/datetime-compare.cc
 create mode 100644 perf/datetime-parser.cc

diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt
index 3651de5b4..b5d7caf81 100644
--- a/perf/CMakeLists.txt
+++ b/perf/CMakeLists.txt
@@ -12,3 +12,6 @@ include_directories(${CMAKE_SOURCE_DIR}/third_party)
 
 add_executable(tuple.perftest tuple.cc)
 target_link_libraries(tuple.perftest core box tuple benchmark::benchmark)
+
+add_executable(datetime.perftest datetime-parser.cc datetime-compare.cc)
+target_link_libraries(datetime.perftest cdt core benchmark::benchmark)
diff --git a/perf/datetime-common.h b/perf/datetime-common.h
new file mode 100644
index 000000000..6fd4e1e3b
--- /dev/null
+++ b/perf/datetime-common.h
@@ -0,0 +1,105 @@
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+#include <benchmark/benchmark.h>
+
+#include "dt.h"
+#include "datetime.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const char sample[] = "2012-12-24T15:30Z";
+
+#define S(s)               \
+	{ s, sizeof(s) - 1 }
+
+static struct
+{
+	const char *sz;
+	size_t len;
+} tests[] = {
+    S("2012-12-24 15:30Z"),
+    S("2012-12-24 15:30z"),
+    S("2012-12-24 15:30"),
+    S("2012-12-24 16:30+01:00"),
+    S("2012-12-24 16:30+0100"),
+    S("2012-12-24 16:30+01"),
+    S("2012-12-24 14:30-01:00"),
+    S("2012-12-24 14:30-0100"),
+    S("2012-12-24 14:30-01"),
+    S("2012-12-24 15:30:00Z"),
+    S("2012-12-24 15:30:00z"),
+    S("2012-12-24 15:30:00"),
+    S("2012-12-24 16:30:00+01:00"),
+    S("2012-12-24 16:30:00+0100"),
+    S("2012-12-24 14:30:00-01:00"),
+    S("2012-12-24 14:30:00-0100"),
+    S("2012-12-24 15:30:00.123456Z"),
+    S("2012-12-24 15:30:00.123456z"),
+    S("2012-12-24 15:30:00.123456"),
+    S("2012-12-24 16:30:00.123456+01:00"),
+    S("2012-12-24 16:30:00.123456+01"),
+    S("2012-12-24 14:30:00.123456-01:00"),
+    S("2012-12-24 14:30:00.123456-01"),
+    S("2012-12-24t15:30Z"),
+    S("2012-12-24t15:30z"),
+    S("2012-12-24t15:30"),
+    S("2012-12-24t16:30+01:00"),
+    S("2012-12-24t16:30+0100"),
+    S("2012-12-24t14:30-01:00"),
+    S("2012-12-24t14:30-0100"),
+    S("2012-12-24t15:30:00Z"),
+    S("2012-12-24t15:30:00z"),
+    S("2012-12-24t15:30:00"),
+    S("2012-12-24t16:30:00+01:00"),
+    S("2012-12-24t16:30:00+0100"),
+    S("2012-12-24t14:30:00-01:00"),
+    S("2012-12-24t14:30:00-0100"),
+    S("2012-12-24t15:30:00.123456Z"),
+    S("2012-12-24t15:30:00.123456z"),
+    S("2012-12-24t16:30:00.123456+01:00"),
+    S("2012-12-24t14:30:00.123456-01:00"),
+    S("2012-12-24 16:30 +01:00"),
+    S("2012-12-24 14:30 -01:00"),
+    S("2012-12-24 15:30 UTC"),
+    S("2012-12-24 16:30 UTC+1"),
+    S("2012-12-24 16:30 UTC+01"),
+    S("2012-12-24 16:30 UTC+0100"),
+    S("2012-12-24 16:30 UTC+01:00"),
+    S("2012-12-24 14:30 UTC-1"),
+    S("2012-12-24 14:30 UTC-01"),
+    S("2012-12-24 14:30 UTC-01:00"),
+    S("2012-12-24 14:30 UTC-0100"),
+    S("2012-12-24 15:30 GMT"),
+    S("2012-12-24 16:30 GMT+1"),
+    S("2012-12-24 16:30 GMT+01"),
+    S("2012-12-24 16:30 GMT+0100"),
+    S("2012-12-24 16:30 GMT+01:00"),
+    S("2012-12-24 14:30 GMT-1"),
+    S("2012-12-24 14:30 GMT-01"),
+    S("2012-12-24 14:30 GMT-01:00"),
+    S("2012-12-24 14:30 GMT-0100"),
+    S("2012-12-24 14:30 -01:00"),
+    S("2012-12-24 16:30:00 +01:00"),
+    S("2012-12-24 14:30:00 -01:00"),
+    S("2012-12-24 16:30:00.123456 +01:00"),
+    S("2012-12-24 14:30:00.123456 -01:00"),
+    S("2012-12-24 15:30:00.123456 -00:00"),
+    S("20121224T1630+01:00"),
+    S("2012-12-24T1630+01:00"),
+    S("20121224T16:30+01"),
+    S("20121224T16:30 +01"),
+};
+#undef S
+
+#define DIM(a) (sizeof(a) / sizeof(a[0]))
+
+int
+parse_datetime(const char *str, size_t len, int64_t *sp, int32_t *np,
+	       int32_t *op);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/perf/datetime-compare.cc b/perf/datetime-compare.cc
new file mode 100644
index 000000000..6e561a5eb
--- /dev/null
+++ b/perf/datetime-compare.cc
@@ -0,0 +1,213 @@
+#include "dt.h"
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "datetime-common.h"
+
+template <typename T>
+struct datetime_bench
+{
+	T secs;
+	uint32_t nsec;
+	uint32_t offset;
+
+static struct datetime_bench date_array[];
+};
+template<typename T>
+struct datetime_bench<T> datetime_bench<T>::date_array[DIM(tests)];
+
+/// Parse 70 datetime literals of various lengths
+template <typename T>
+static void
+Assign70()
+{
+	size_t index;
+	int64_t secs_expected;
+	int nanosecs;
+	int ofs;
+	using dt_bench = datetime_bench<T>;
+
+	for (index = 0; index < DIM(tests); index++) {
+		int64_t secs;
+		int rc = parse_datetime(tests[index].sz, tests[index].len,
+					&secs, &nanosecs, &ofs);
+		assert(rc == 0);
+		dt_bench::date_array[index].secs = (T)secs;
+		dt_bench::date_array[index].nsec = nanosecs;
+		dt_bench::date_array[index].offset = ofs;
+	}
+}
+
+template <typename T>
+static void
+DateTime_Assign70(benchmark::State &state)
+{
+	for (auto _ : state)
+		Assign70<T>();
+}
+BENCHMARK_TEMPLATE1(DateTime_Assign70, uint64_t);
+BENCHMARK_TEMPLATE1(DateTime_Assign70, double);
+
+#define COMPARE_RESULT_BENCH(a, b) (a < b ? -1 : a > b)
+
+template <typename T>
+int datetime_compare(const struct datetime_bench<T> *lhs,
+		     const struct datetime_bench<T> *rhs)
+{
+	int result = COMPARE_RESULT_BENCH(lhs->secs, rhs->secs);
+	if (result != 0)
+		return result;
+
+	return COMPARE_RESULT_BENCH(lhs->nsec, rhs->nsec);
+}
+
+template <typename T>
+static void
+AssignCompare70()
+{
+	size_t index;
+	int nanosecs;
+	int ofs;
+	using dt_bench = datetime_bench<T>;
+
+	size_t arrays_sz = DIM(tests);
+	for (index = 0; index < arrays_sz; index++) {
+		int64_t secs;
+		int rc = parse_datetime(tests[index].sz, tests[index].len,
+					&secs, &nanosecs, &ofs);
+		assert(rc == 0);
+		dt_bench::date_array[index].secs = (T)secs;
+		dt_bench::date_array[index].nsec = nanosecs;
+		dt_bench::date_array[index].offset = ofs;
+	}
+
+	for (index = 0; index < (arrays_sz - 1); index++) {
+		volatile int rc = datetime_compare<T>(&dt_bench::date_array[index],
+					     &dt_bench::date_array[index + 1]);
+		assert(rc == 0 || rc == -1 || rc == 1);
+	}
+}
+
+template <typename T>
+static void
+DateTime_AssignCompare70(benchmark::State &state)
+{
+	for (auto _ : state)
+		AssignCompare70<T>();
+}
+BENCHMARK_TEMPLATE1(DateTime_AssignCompare70, uint64_t);
+BENCHMARK_TEMPLATE1(DateTime_AssignCompare70, double);
+
+template <typename T>
+static void
+Compare20()
+{
+	size_t index;
+	int nanosecs;
+	int ofs;
+	using dt_bench = datetime_bench<T>;
+
+	for (size_t i = 0; i < 10; i++) {
+		volatile int rc = datetime_compare<T>(&dt_bench::date_array[i],
+					     &dt_bench::date_array[32 + i]);
+		assert(rc == 0 || rc == -1 || rc == 1);
+	}
+}
+
+template <typename T>
+static void
+DateTime_Compare20(benchmark::State &state)
+{
+	for (auto _ : state)
+		Compare20<T>();
+}
+BENCHMARK_TEMPLATE1(DateTime_Compare20, uint64_t);
+BENCHMARK_TEMPLATE1(DateTime_Compare20, double);
+
+
+#define SECS_EPOCH_1970_OFFSET ((int64_t)DT_EPOCH_1970_OFFSET * SECS_PER_DAY)
+
+template<typename T>
+int
+datetime_to_string(const struct datetime_bench<T> *date, char *buf, uint32_t len)
+{
+#define ADVANCE(sz)		\
+	if (buf != NULL) { 	\
+		buf += sz; 	\
+		len -= sz; 	\
+	}			\
+	ret += sz;
+
+	int offset = date->offset;
+	/* for negative offsets around Epoch date we could get
+	 * negative secs value, which should be attributed to
+	 * 1969-12-31, not 1970-01-01, thus we first shift
+	 * epoch to Rata Die then divide by seconds per day,
+	 * not in reverse
+	 */
+	int64_t secs = (int64_t)date->secs + offset * 60 + SECS_EPOCH_1970_OFFSET;
+	assert((secs / SECS_PER_DAY) <= INT_MAX);
+	dt_t dt = dt_from_rdn(secs / SECS_PER_DAY);
+
+	int year, month, day, sec, ns, sign;
+	dt_to_ymd(dt, &year, &month, &day);
+
+	int hour = (secs / 3600) % 24,
+	    minute = (secs / 60) % 60;
+	sec = secs % 60;
+	ns = date->nsec;
+
+	int ret = 0;
+	uint32_t sz = snprintf(buf, len, "%04d-%02d-%02dT%02d:%02d",
+			       year, month, day, hour, minute);
+	ADVANCE(sz);
+	if (sec || ns) {
+		sz = snprintf(buf, len, ":%02d", sec);
+		ADVANCE(sz);
+		if (ns) {
+			if ((ns % 1000000) == 0)
+				sz = snprintf(buf, len, ".%03d", ns / 1000000);
+			else if ((ns % 1000) == 0)
+				sz = snprintf(buf, len, ".%06d", ns / 1000);
+			else
+				sz = snprintf(buf, len, ".%09d", ns);
+			ADVANCE(sz);
+		}
+	}
+	if (offset == 0) {
+		sz = snprintf(buf, len, "Z");
+		ADVANCE(sz);
+	}
+	else {
+		if (offset < 0)
+			sign = '-', offset = -offset;
+		else
+			sign = '+';
+
+		sz = snprintf(buf, len, "%c%02d:%02d", sign, offset / 60, offset % 60);
+		ADVANCE(sz);
+	}
+	return ret;
+}
+#undef ADVANCE
+
+template <typename T>
+static void
+ToString1()
+{
+	char buf[48];
+	struct datetime_bench<T> dateval = datetime_bench<T>::date_array[13];
+
+	volatile auto len = datetime_to_string<T>(&dateval, buf, sizeof buf);
+}
+
+template <typename T>
+static void
+DateTime_ToString1(benchmark::State &state)
+{
+	for (auto _ : state)
+		ToString1<T>();
+}
+BENCHMARK_TEMPLATE1(DateTime_ToString1, uint64_t);
+BENCHMARK_TEMPLATE1(DateTime_ToString1, double);
diff --git a/perf/datetime-parser.cc b/perf/datetime-parser.cc
new file mode 100644
index 000000000..61557fe8f
--- /dev/null
+++ b/perf/datetime-parser.cc
@@ -0,0 +1,105 @@
+#include "dt.h"
+#include <string.h>
+#include <assert.h>
+
+#include "datetime-common.h"
+
+/* p5-time-moment/src/moment_parse.c: parse_string_lenient() */
+int
+parse_datetime(const char *str, size_t len, int64_t *sp, int32_t *np,
+	       int32_t *op)
+{
+	size_t n;
+	dt_t dt;
+	char c;
+	int sod = 0, nanosecond = 0, offset = 0;
+
+	n = dt_parse_iso_date(str, len, &dt);
+	if (!n)
+		return 1;
+	if (n == len)
+		goto exit;
+
+	c = str[n++];
+	if (!(c == 'T' || c == 't' || c == ' '))
+		return 1;
+
+	str += n;
+	len -= n;
+
+	n = dt_parse_iso_time(str, len, &sod, &nanosecond);
+	if (!n)
+		return 1;
+	if (n == len)
+		goto exit;
+
+	if (str[n] == ' ')
+	n++;
+
+	str += n;
+	len -= n;
+
+	n = dt_parse_iso_zone_lenient(str, len, &offset);
+	if (!n || n != len)
+		return 1;
+
+exit:
+	*sp = ((int64_t)dt_rdn(dt) - 719163) * 86400 + sod - offset * 60;
+	*np = nanosecond;
+	*op = offset;
+
+	return 0;
+}
+
+/// Parse 70 datetime literals of various lengths
+static void
+ParseTimeStamps()
+{
+	size_t index;
+	int64_t secs_expected;
+	int nanosecs;
+	int ofs;
+	parse_datetime(sample, sizeof(sample) - 1, &secs_expected,
+		       &nanosecs, &ofs);
+
+	for (index = 0; index < DIM(tests); index++)
+	{
+		int64_t secs;
+		int rc = parse_datetime(tests[index].sz, tests[index].len,
+					&secs, &nanosecs, &ofs);
+		assert(rc == 0);
+		assert(secs == secs_expected);
+	}
+}
+
+static void
+CDT_Parse70(benchmark::State &state)
+{
+	for (auto _ : state)
+		ParseTimeStamps();
+}
+BENCHMARK(CDT_Parse70);
+
+/// Parse single datetime literal of longest length
+static void
+Parse1()
+{
+	const char civil_string[] = "2015-02-18T10:50:31.521345123+10:00";
+	int64_t secs;
+	int nanosecs;
+	int ofs;
+	int rc = parse_datetime(civil_string, sizeof(civil_string) - 1,
+				&secs, &nanosecs, &ofs);
+	assert(rc == 0);
+	assert(nanosecs == 521345123);
+}
+
+static void
+CDT_Parse1(benchmark::State &state)
+{
+	for (auto _ : state)
+		Parse1();
+}
+BENCHMARK(CDT_Parse1);
+
+BENCHMARK_MAIN();
-- 
2.29.2



More information about the Tarantool-patches mailing list