Hi, Sergey,
thanks for the patch! See my comments.
Sergey
This patch adds a helper script to aggregate the benchmark results from JSON files to the format parsable by the InfluxDB line protocol [1].
format cannot be parsed by protocol, please rephrase.
Something like "the format compatible with the InfluxDB line protocol"
may be say that cjson is required?All JSON files from each suite in the <perf/output> directory are considered as the benchmark results and aggregated into the <perf/output/summary.txt> file that can be posted to the InfluxDB. The results are aggregated via the new target LuaJIT-perf-aggregate.
[1]: https://docs.influxdata.com/influxdb/v2/reference/syntax/line-protocol/
---
perf/CMakeLists.txt | 13 ++++
perf/helpers/aggregate.lua | 124 +++++++++++++++++++++++++++++++++++++
2 files changed, 137 insertions(+)
create mode 100644 perf/helpers/aggregate.lua
diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt
index cc3c312f..68e561fd 100644
--- a/perf/CMakeLists.txt
+++ b/perf/CMakeLists.txt
@@ -97,3 +97,16 @@ add_custom_target(${PROJECT_NAME}-perf
add_custom_target(${PROJECT_NAME}-perf-console
DEPENDS LuaJIT-benches-console
)
+
+set(PERF_SUMMARY ${PERF_OUTPUT_DIR}/summary.txt)
+add_custom_target(${PROJECT_NAME}-perf-aggregate
+ BYPRODUCTS ${PERF_SUMMARY}
+ COMMENT "Aggregate performance test results into ${PERF_SUMMARY}"
+ COMMAND ${CMAKE_COMMAND} -E env
+ LUA_CPATH="${LUA_CPATH}"
+ ${LUAJIT_BINARY} ${CMAKE_CURRENT_SOURCE_DIR}/helpers/aggregate.lua
+ ${PERF_SUMMARY}
+ ${PERF_OUTPUT_DIR}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ DEPENDS luajit-main
+)
diff --git a/perf/helpers/aggregate.lua b/perf/helpers/aggregate.lua
new file mode 100644
index 00000000..12a8ab89
--- /dev/null
+++ b/perf/helpers/aggregate.lua
@@ -0,0 +1,124 @@
+local json = require('cjson')
What if cjson is absent? Do we want to handle error?
+ +-- File to aggregate the benchmark results from JSON files to the +-- format parsable by the InfluxDB line protocol [1]: +-- <measurement>,<tag_set> <field_set> <timestamp> +-- +-- <tag_set> and <field_set> have the following format: +-- <key1>=<value1>,<key2>=<value2> +-- +-- The reported tag set is a set of values that can be used for +-- filtering data (i.e., branch or benchmark name). +-- +-- luacheck: push no max comment line length +-- +-- [1]: https://docs.influxdata.com/influxdb/v2/reference/syntax/line-protocol/ +-- +-- luacheck: pop
I propose to document command-line options
(1st arg is output file, 2nd arg is a dir, "current dir by default"),
env variables (PERF_COMMIT, PERF_BRANCH) and requirements
(git is an optional requirement, cjson Lua module is mandatory).
+
+local output = assert(arg[1], 'Output file is required as the first argument')
+local input_dir = arg[2] or '.'
+
+local out_fh = assert(io.open(output, 'w+'))
+
+local function exec(cmd)
+ return io.popen(cmd):read('*all'):gsub('%s+$', '')
+end
+
+local commit = os.getenv('PERF_COMMIT') or exec('git rev-parse --short HEAD')
+assert(commit, 'can not determine the commit')
+
+local branch = os.getenv('PERF_BRANCH') or
+ exec('git rev-parse --abbrev-ref HEAD')
+assert(branch, 'can not determine the branch')
+
+-- Not very robust, but OK for our needs.
+local function listdir(path)
+ local handle = io.popen('ls -1 ' .. path)
+
+ local files = {}
+ for file in handle:lines() do
+ table.insert(files, file)
+ end
+
+ return files
+end
+
+local tag_set = {branch = branch}
+
+local function table_plain_copy(src)
+ local dst = {}
+ for k, v in pairs(src) do
+ dst[k] = v
+ end
+ return dst
+end
+
+local function read_all(file)
+ local fh = assert(io.open(file, 'rb'))
+ local content = fh:read('*all')
+ fh:close()
+ return content
+end
+
+local REPORTED_FIELDS = {
+ 'cpu_time',
+ 'items_per_second',
+ 'iterations',
+ 'real_time',
+}
+
+local function influx_kv(tab)
+ local kv_string = {}
+ for k, v in pairs(tab) do
+ table.insert(kv_string, ('%s=%s'):format(k, v))
+ end
+ return table.concat(kv_string, ',')
+end
+
+local time = os.time()
+local function influx_line(measurement, tags, fields)
+ return ('%s,%s %s %d\n'):format(measurement, influx_kv(tags),
+ influx_kv(fields), time)
+end
+
+for _, suite_name in pairs(listdir(input_dir)) do
+ -- May list the report file, but will be ignored by the
+ -- condition below.
+ local suite_dir = ('%s/%s'):format(input_dir, suite_name)
+ for _, file in pairs(listdir(suite_dir)) do
+ -- Skip files in which we are not interested.
+ if not file:match('%.json$') then goto continue end
+
+ local data = read_all(('%s/%s'):format(suite_dir, file))
+ local bench_name = file:match('([^/]+)%.json')
+ local bench_data = json.decode(data)
+ local benchmarks = bench_data.benchmarks
+ local arch = bench_data.context.arch
+ local gc64 = bench_data.context.gc64
+ local jit = bench_data.context.jit
+
+ for _, bench in ipairs(benchmarks) do
+ local full_tag_set = table_plain_copy(tag_set)
+ full_tag_set.name = bench.name
+ full_tag_set.suite = suite_name
+ full_tag_set.arch = arch
+ full_tag_set.gc64 = gc64
+ full_tag_set.jit = jit
+
+ -- Save the commit as a field, since we don't want to filter
+ -- benchmarks by the commit (one point of data).
+ local field_set = {commit = ('"%s"'):format(commit)}
+
+ for _, field in ipairs(REPORTED_FIELDS) do
+ field_set[field] = bench[field]
+ end
+
+ local line = influx_line(bench_name, full_tag_set, field_set)
+ out_fh:write(line)
+ end
+ ::continue::
+ end
+end
+
+out_fh:close()