[Tarantool-patches] [PATCH v5 7/8] datetime: perf test for datetime parser
Timur Safin
tsafin at tarantool.org
Mon Aug 16 02:59:41 MSK 2021
It was told that if field `datetime.secs` would be `double` we should get
better performance in LuaJIT instead of `uint64_t` type, which is used at the
moment.
So we have created benchmark, which was comparing implementations of functions
from `datetime.c` if we would use `double` or `int64_t` for `datetime.secs` field.
Despite expectations, based on prior experience with floaing-point on x86
processors, comparison shows that `double` provides similar or
sometimes better timings. And picture stays consistent be it SSE2, AVX1 or
AVX2 code.
Part of #5941
---
perf/CMakeLists.txt | 3 +
perf/datetime-common.h | 105 +++++++++++++++++++
perf/datetime-compare.cc | 213 +++++++++++++++++++++++++++++++++++++++
perf/datetime-parser.cc | 105 +++++++++++++++++++
4 files changed, 426 insertions(+)
create mode 100644 perf/datetime-common.h
create mode 100644 perf/datetime-compare.cc
create mode 100644 perf/datetime-parser.cc
diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt
index 3651de5b4..b5d7caf81 100644
--- a/perf/CMakeLists.txt
+++ b/perf/CMakeLists.txt
@@ -12,3 +12,6 @@ include_directories(${CMAKE_SOURCE_DIR}/third_party)
add_executable(tuple.perftest tuple.cc)
target_link_libraries(tuple.perftest core box tuple benchmark::benchmark)
+
+add_executable(datetime.perftest datetime-parser.cc datetime-compare.cc)
+target_link_libraries(datetime.perftest cdt core benchmark::benchmark)
diff --git a/perf/datetime-common.h b/perf/datetime-common.h
new file mode 100644
index 000000000..6fd4e1e3b
--- /dev/null
+++ b/perf/datetime-common.h
@@ -0,0 +1,105 @@
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+#include <benchmark/benchmark.h>
+
+#include "dt.h"
+#include "datetime.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const char sample[] = "2012-12-24T15:30Z";
+
+#define S(s) \
+ { s, sizeof(s) - 1 }
+
+static struct
+{
+ const char *sz;
+ size_t len;
+} tests[] = {
+ S("2012-12-24 15:30Z"),
+ S("2012-12-24 15:30z"),
+ S("2012-12-24 15:30"),
+ S("2012-12-24 16:30+01:00"),
+ S("2012-12-24 16:30+0100"),
+ S("2012-12-24 16:30+01"),
+ S("2012-12-24 14:30-01:00"),
+ S("2012-12-24 14:30-0100"),
+ S("2012-12-24 14:30-01"),
+ S("2012-12-24 15:30:00Z"),
+ S("2012-12-24 15:30:00z"),
+ S("2012-12-24 15:30:00"),
+ S("2012-12-24 16:30:00+01:00"),
+ S("2012-12-24 16:30:00+0100"),
+ S("2012-12-24 14:30:00-01:00"),
+ S("2012-12-24 14:30:00-0100"),
+ S("2012-12-24 15:30:00.123456Z"),
+ S("2012-12-24 15:30:00.123456z"),
+ S("2012-12-24 15:30:00.123456"),
+ S("2012-12-24 16:30:00.123456+01:00"),
+ S("2012-12-24 16:30:00.123456+01"),
+ S("2012-12-24 14:30:00.123456-01:00"),
+ S("2012-12-24 14:30:00.123456-01"),
+ S("2012-12-24t15:30Z"),
+ S("2012-12-24t15:30z"),
+ S("2012-12-24t15:30"),
+ S("2012-12-24t16:30+01:00"),
+ S("2012-12-24t16:30+0100"),
+ S("2012-12-24t14:30-01:00"),
+ S("2012-12-24t14:30-0100"),
+ S("2012-12-24t15:30:00Z"),
+ S("2012-12-24t15:30:00z"),
+ S("2012-12-24t15:30:00"),
+ S("2012-12-24t16:30:00+01:00"),
+ S("2012-12-24t16:30:00+0100"),
+ S("2012-12-24t14:30:00-01:00"),
+ S("2012-12-24t14:30:00-0100"),
+ S("2012-12-24t15:30:00.123456Z"),
+ S("2012-12-24t15:30:00.123456z"),
+ S("2012-12-24t16:30:00.123456+01:00"),
+ S("2012-12-24t14:30:00.123456-01:00"),
+ S("2012-12-24 16:30 +01:00"),
+ S("2012-12-24 14:30 -01:00"),
+ S("2012-12-24 15:30 UTC"),
+ S("2012-12-24 16:30 UTC+1"),
+ S("2012-12-24 16:30 UTC+01"),
+ S("2012-12-24 16:30 UTC+0100"),
+ S("2012-12-24 16:30 UTC+01:00"),
+ S("2012-12-24 14:30 UTC-1"),
+ S("2012-12-24 14:30 UTC-01"),
+ S("2012-12-24 14:30 UTC-01:00"),
+ S("2012-12-24 14:30 UTC-0100"),
+ S("2012-12-24 15:30 GMT"),
+ S("2012-12-24 16:30 GMT+1"),
+ S("2012-12-24 16:30 GMT+01"),
+ S("2012-12-24 16:30 GMT+0100"),
+ S("2012-12-24 16:30 GMT+01:00"),
+ S("2012-12-24 14:30 GMT-1"),
+ S("2012-12-24 14:30 GMT-01"),
+ S("2012-12-24 14:30 GMT-01:00"),
+ S("2012-12-24 14:30 GMT-0100"),
+ S("2012-12-24 14:30 -01:00"),
+ S("2012-12-24 16:30:00 +01:00"),
+ S("2012-12-24 14:30:00 -01:00"),
+ S("2012-12-24 16:30:00.123456 +01:00"),
+ S("2012-12-24 14:30:00.123456 -01:00"),
+ S("2012-12-24 15:30:00.123456 -00:00"),
+ S("20121224T1630+01:00"),
+ S("2012-12-24T1630+01:00"),
+ S("20121224T16:30+01"),
+ S("20121224T16:30 +01"),
+};
+#undef S
+
+#define DIM(a) (sizeof(a) / sizeof(a[0]))
+
+int
+parse_datetime(const char *str, size_t len, int64_t *sp, int32_t *np,
+ int32_t *op);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/perf/datetime-compare.cc b/perf/datetime-compare.cc
new file mode 100644
index 000000000..6e561a5eb
--- /dev/null
+++ b/perf/datetime-compare.cc
@@ -0,0 +1,213 @@
+#include "dt.h"
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "datetime-common.h"
+
+template <typename T>
+struct datetime_bench
+{
+ T secs;
+ uint32_t nsec;
+ uint32_t offset;
+
+static struct datetime_bench date_array[];
+};
+template<typename T>
+struct datetime_bench<T> datetime_bench<T>::date_array[DIM(tests)];
+
+/// Parse 70 datetime literals of various lengths
+template <typename T>
+static void
+Assign70()
+{
+ size_t index;
+ int64_t secs_expected;
+ int nanosecs;
+ int ofs;
+ using dt_bench = datetime_bench<T>;
+
+ for (index = 0; index < DIM(tests); index++) {
+ int64_t secs;
+ int rc = parse_datetime(tests[index].sz, tests[index].len,
+ &secs, &nanosecs, &ofs);
+ assert(rc == 0);
+ dt_bench::date_array[index].secs = (T)secs;
+ dt_bench::date_array[index].nsec = nanosecs;
+ dt_bench::date_array[index].offset = ofs;
+ }
+}
+
+template <typename T>
+static void
+DateTime_Assign70(benchmark::State &state)
+{
+ for (auto _ : state)
+ Assign70<T>();
+}
+BENCHMARK_TEMPLATE1(DateTime_Assign70, uint64_t);
+BENCHMARK_TEMPLATE1(DateTime_Assign70, double);
+
+#define COMPARE_RESULT_BENCH(a, b) (a < b ? -1 : a > b)
+
+template <typename T>
+int datetime_compare(const struct datetime_bench<T> *lhs,
+ const struct datetime_bench<T> *rhs)
+{
+ int result = COMPARE_RESULT_BENCH(lhs->secs, rhs->secs);
+ if (result != 0)
+ return result;
+
+ return COMPARE_RESULT_BENCH(lhs->nsec, rhs->nsec);
+}
+
+template <typename T>
+static void
+AssignCompare70()
+{
+ size_t index;
+ int nanosecs;
+ int ofs;
+ using dt_bench = datetime_bench<T>;
+
+ size_t arrays_sz = DIM(tests);
+ for (index = 0; index < arrays_sz; index++) {
+ int64_t secs;
+ int rc = parse_datetime(tests[index].sz, tests[index].len,
+ &secs, &nanosecs, &ofs);
+ assert(rc == 0);
+ dt_bench::date_array[index].secs = (T)secs;
+ dt_bench::date_array[index].nsec = nanosecs;
+ dt_bench::date_array[index].offset = ofs;
+ }
+
+ for (index = 0; index < (arrays_sz - 1); index++) {
+ volatile int rc = datetime_compare<T>(&dt_bench::date_array[index],
+ &dt_bench::date_array[index + 1]);
+ assert(rc == 0 || rc == -1 || rc == 1);
+ }
+}
+
+template <typename T>
+static void
+DateTime_AssignCompare70(benchmark::State &state)
+{
+ for (auto _ : state)
+ AssignCompare70<T>();
+}
+BENCHMARK_TEMPLATE1(DateTime_AssignCompare70, uint64_t);
+BENCHMARK_TEMPLATE1(DateTime_AssignCompare70, double);
+
+template <typename T>
+static void
+Compare20()
+{
+ size_t index;
+ int nanosecs;
+ int ofs;
+ using dt_bench = datetime_bench<T>;
+
+ for (size_t i = 0; i < 10; i++) {
+ volatile int rc = datetime_compare<T>(&dt_bench::date_array[i],
+ &dt_bench::date_array[32 + i]);
+ assert(rc == 0 || rc == -1 || rc == 1);
+ }
+}
+
+template <typename T>
+static void
+DateTime_Compare20(benchmark::State &state)
+{
+ for (auto _ : state)
+ Compare20<T>();
+}
+BENCHMARK_TEMPLATE1(DateTime_Compare20, uint64_t);
+BENCHMARK_TEMPLATE1(DateTime_Compare20, double);
+
+
+#define SECS_EPOCH_1970_OFFSET ((int64_t)DT_EPOCH_1970_OFFSET * SECS_PER_DAY)
+
+template<typename T>
+int
+datetime_to_string(const struct datetime_bench<T> *date, char *buf, uint32_t len)
+{
+#define ADVANCE(sz) \
+ if (buf != NULL) { \
+ buf += sz; \
+ len -= sz; \
+ } \
+ ret += sz;
+
+ int offset = date->offset;
+ /* for negative offsets around Epoch date we could get
+ * negative secs value, which should be attributed to
+ * 1969-12-31, not 1970-01-01, thus we first shift
+ * epoch to Rata Die then divide by seconds per day,
+ * not in reverse
+ */
+ int64_t secs = (int64_t)date->secs + offset * 60 + SECS_EPOCH_1970_OFFSET;
+ assert((secs / SECS_PER_DAY) <= INT_MAX);
+ dt_t dt = dt_from_rdn(secs / SECS_PER_DAY);
+
+ int year, month, day, sec, ns, sign;
+ dt_to_ymd(dt, &year, &month, &day);
+
+ int hour = (secs / 3600) % 24,
+ minute = (secs / 60) % 60;
+ sec = secs % 60;
+ ns = date->nsec;
+
+ int ret = 0;
+ uint32_t sz = snprintf(buf, len, "%04d-%02d-%02dT%02d:%02d",
+ year, month, day, hour, minute);
+ ADVANCE(sz);
+ if (sec || ns) {
+ sz = snprintf(buf, len, ":%02d", sec);
+ ADVANCE(sz);
+ if (ns) {
+ if ((ns % 1000000) == 0)
+ sz = snprintf(buf, len, ".%03d", ns / 1000000);
+ else if ((ns % 1000) == 0)
+ sz = snprintf(buf, len, ".%06d", ns / 1000);
+ else
+ sz = snprintf(buf, len, ".%09d", ns);
+ ADVANCE(sz);
+ }
+ }
+ if (offset == 0) {
+ sz = snprintf(buf, len, "Z");
+ ADVANCE(sz);
+ }
+ else {
+ if (offset < 0)
+ sign = '-', offset = -offset;
+ else
+ sign = '+';
+
+ sz = snprintf(buf, len, "%c%02d:%02d", sign, offset / 60, offset % 60);
+ ADVANCE(sz);
+ }
+ return ret;
+}
+#undef ADVANCE
+
+template <typename T>
+static void
+ToString1()
+{
+ char buf[48];
+ struct datetime_bench<T> dateval = datetime_bench<T>::date_array[13];
+
+ volatile auto len = datetime_to_string<T>(&dateval, buf, sizeof buf);
+}
+
+template <typename T>
+static void
+DateTime_ToString1(benchmark::State &state)
+{
+ for (auto _ : state)
+ ToString1<T>();
+}
+BENCHMARK_TEMPLATE1(DateTime_ToString1, uint64_t);
+BENCHMARK_TEMPLATE1(DateTime_ToString1, double);
diff --git a/perf/datetime-parser.cc b/perf/datetime-parser.cc
new file mode 100644
index 000000000..61557fe8f
--- /dev/null
+++ b/perf/datetime-parser.cc
@@ -0,0 +1,105 @@
+#include "dt.h"
+#include <string.h>
+#include <assert.h>
+
+#include "datetime-common.h"
+
+/* p5-time-moment/src/moment_parse.c: parse_string_lenient() */
+int
+parse_datetime(const char *str, size_t len, int64_t *sp, int32_t *np,
+ int32_t *op)
+{
+ size_t n;
+ dt_t dt;
+ char c;
+ int sod = 0, nanosecond = 0, offset = 0;
+
+ n = dt_parse_iso_date(str, len, &dt);
+ if (!n)
+ return 1;
+ if (n == len)
+ goto exit;
+
+ c = str[n++];
+ if (!(c == 'T' || c == 't' || c == ' '))
+ return 1;
+
+ str += n;
+ len -= n;
+
+ n = dt_parse_iso_time(str, len, &sod, &nanosecond);
+ if (!n)
+ return 1;
+ if (n == len)
+ goto exit;
+
+ if (str[n] == ' ')
+ n++;
+
+ str += n;
+ len -= n;
+
+ n = dt_parse_iso_zone_lenient(str, len, &offset);
+ if (!n || n != len)
+ return 1;
+
+exit:
+ *sp = ((int64_t)dt_rdn(dt) - 719163) * 86400 + sod - offset * 60;
+ *np = nanosecond;
+ *op = offset;
+
+ return 0;
+}
+
+/// Parse 70 datetime literals of various lengths
+static void
+ParseTimeStamps()
+{
+ size_t index;
+ int64_t secs_expected;
+ int nanosecs;
+ int ofs;
+ parse_datetime(sample, sizeof(sample) - 1, &secs_expected,
+ &nanosecs, &ofs);
+
+ for (index = 0; index < DIM(tests); index++)
+ {
+ int64_t secs;
+ int rc = parse_datetime(tests[index].sz, tests[index].len,
+ &secs, &nanosecs, &ofs);
+ assert(rc == 0);
+ assert(secs == secs_expected);
+ }
+}
+
+static void
+CDT_Parse70(benchmark::State &state)
+{
+ for (auto _ : state)
+ ParseTimeStamps();
+}
+BENCHMARK(CDT_Parse70);
+
+/// Parse single datetime literal of longest length
+static void
+Parse1()
+{
+ const char civil_string[] = "2015-02-18T10:50:31.521345123+10:00";
+ int64_t secs;
+ int nanosecs;
+ int ofs;
+ int rc = parse_datetime(civil_string, sizeof(civil_string) - 1,
+ &secs, &nanosecs, &ofs);
+ assert(rc == 0);
+ assert(nanosecs == 521345123);
+}
+
+static void
+CDT_Parse1(benchmark::State &state)
+{
+ for (auto _ : state)
+ Parse1();
+}
+BENCHMARK(CDT_Parse1);
+
+BENCHMARK_MAIN();
--
2.29.2
More information about the Tarantool-patches
mailing list