Hi, Sergey,

thanks for the patch! LGTM with minor comments.

Sergey

On 12/26/25 12:17, Sergey Kaplun wrote:
> This module provides functionality to run custom benchmark workloads
> defined by the following syntax:
>
> | local bench = require('bench').new(arg)
> |
> | -- f_* are user-defined functions, n_* are user-defined numbers.
> |bench:add({
> |   setup = f_setup,
> |   payload = f_payload,
> |   teardown = f_teardown,
> |   items = n_items_processed,
> |
> |   checker = f_checker,
> |   -- Or instead:
> |   skip_check = true,
> |
> |   iterations = n_iterations,
> |   -- Or instead:
> |   min_time = n_seconds,
> | })
> |
> |bench:run_and_report()
>
> The checker function received the single value returned by the payload
> function and completed all checks related to the test. If it returns a
> true value, it is considered a successful check pass. The checker
> function is called before the main workload as a warm-up. Generally, you
> should always provide the checker function to be sure that your
> benchmark is still correct after optimizations. In cases when it is
> impossible (for some reason), you may specify the `skip_check` flag. In
> that case the warm-up part will be skipped as well.
>
> Each test is run in the order it was added. The module measures the
> real-time and CPU time necessary to run `iterations` repetitions of the
> test or amount of iterations `min_time` in seconds (4 by default) and
> calculates the metric items per second (more is better). The total
> amount of items equals `n_items_processed * n_iterations`. The items may
> be added in the table with the description inside the payload function
> as well. The results (real-time, CPU time, iterations, items/s) are
> reported in a format compatible to the Google Benchmark suite [1].
>
> Each test may be run from the command line as follows:
> | LUA_PATH="..." luajit test_name.lua [flags] arguments
>
> The supported flags are:
> | -j{off|on}                 Disable/Enable JIT for the benchmarks.
> | --benchmark_color={true|false|auto}
> |                            Enables the colorized output for the
> |                            terminal (not the file).
> | --benchmark_min_time={number} Minimum seconds to run the benchmark
> |                            tests.
> | --benchmark_out=<file>     Places the output into <file>.
> | --benchmark_out_format={console|json}
> |                            The format is used when saving the results in the
> |                            file. The default format is the JSON format.
> | -h, --help                 Display help message and exit.
>
> These options are similar to the Google Benchmark command line options,
> but with a few changes:
> 1) If an output file is given, there is no output in the terminal.
> 2) The min_time option supports only number values. There is no support
>     for the iterations number (by the 'x' suffix).
>
> [1]:https://github.com/google/benchmark
> ---
>   perf/utils/bench.lua | 511 +++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 511 insertions(+)
>   create mode 100644 perf/utils/bench.lua
>
> diff --git a/perf/utils/bench.lua b/perf/utils/bench.lua
> new file mode 100644
> index 00000000..09a5c41a
> --- /dev/null
> +++ b/perf/utils/bench.lua
> @@ -0,0 +1,511 @@
> +local clock = require('clock')
> +local ffi = require('ffi')
> +-- Require 'cjson' only on demand for formatted output to file.
> +local json
> +
> +local M = {}
> +
> +local type, assert, error = type, assert, error
> +local format, rep = string.format, string.rep
> +local floor, max, min = math.floor, math.max, math.min
> +local table_remove = table.remove
> +
> +local LJ_HASJIT = jit and jit.opt
> +
> +-- Argument parsing. ---------------------------------------------
> +
> +-- XXX: Make options compatible with Google Benchmark, since most
> +-- probably it will be used for the C benchmarks as well.
> +-- Compatibility isn't full: there is no support for environment
> +-- variables (since they are not so useful) and the output to the
> +-- terminal is suppressed if the --benchmark_out flag is
> +-- specified.
> +
> +local HELP_MSG = [[
> + Options:
> +   -j{off|on}                 Disable/Enable JIT for the benchmarks.
> +   --benchmark_color={true|false|auto}
> +                              Enables the colorized output for the terminal (not
> +                              the file). 'auto' means to use colors if the
> +                              output is being sent to a terminal and the TERM
> +                              environment variable is set to a terminal type
> +                              that supports colors. Default is 'auto'.
> +   --benchmark_min_time={number}
> +                              Minimum seconds to run the benchmark tests.
> +                              4.0 by default.
> +   --benchmark_out=<file>     Places the output into <file>.
> +   --benchmark_out_format={console|json}
> +                              The format is used when saving the results in the
> +                              file. The default format is the JSON format.
> +   -h, --help                 Display this message and exit.
> +
> + There are a bunch of suggestions on how to achieve the most
> + stable benchmark results:
> +https://github.com/tarantool/tarantool/wiki/Benchmarking
> +]]
> +
> +local EXIT_FAILURE = 1
> +
> +local function usage(ctx)
> +  local header = format('USAGE: luajit %s [options]\n', ctx.name)
> +io.stderr:write(header, HELP_MSG)
> +  os.exit(EXIT_FAILURE)
> +end
> +
> +local function check_param(check, strfmt, ...)
> +  if not check then
> +io.stderr:write(format(strfmt, ...))
> +    os.exit(EXIT_FAILURE)
> +  end
> +end
> +
> +-- Valid values: 'false'/'no'/'0'.
> +-- In case of an invalid value the 'auto' is used.
> +local function set_color(ctx, value)
> +  if value == 'false' or value == 'no' or value == '0' then
> +    ctx.color = false
> +  else
> +    -- In case of an invalid value, the Google Benchmark uses
> +    -- 'auto', which is true for the stdout output (the only
> +    -- colorizable output). So just set it to true by default.
> +    ctx.color = true
> +  end
> +end
> +
> +local DEFAULT_MIN_TIME = 4.0
> +local function set_min_time(ctx, value)
> +  local time = tonumber(value)
> +  check_param(time, 'Invalid min time: "%s"\n', value)
> +  ctx.min_time = time
> +end
> +
> +local function set_output(ctx, filename)
> +  check_param(type(filename) == "string", 'Invalid output value: "%s"\n',
> +              filename)
> +  ctx.output = filename
> +end
> +
> +-- Determine the output format for the benchmark.
> +-- Supports only 'console' and 'json' for now.
> +local function set_output_format(ctx, value)
> +  local output_format = tostring(value)
> +  check_param(output_format, 'Invalid output format: "%s"\n', value)
> +  output_format = output_format:lower()
> +  check_param(output_format == 'json' or output_format == 'console',
> +              'Unsupported output format: "%s"\n', output_format)
> +  ctx.output_format = output_format
> +end
> +
> +local function set_jit(ctx, value)
> +  check_param(value == 'on' or value == 'off',
> +             'Invalid jit value: "%s"\n', value)
> +  if value == 'off' then
> +    ctx.jit = false
> +  elseif value == 'on' then
> +    ctx.jit = true
> +  end
> +end
> +
> +local function unrecognized_option(optname, dashes)
> +  local fullname = dashes .. (optname or '=')
> +io.stderr:write(format('unrecognized command-line flag: %s\n', fullname))
> +io.stderr:write(HELP_MSG)
> +  os.exit(EXIT_FAILURE)
> +end
> +
> +local function unrecognized_long_option(_, optname)
> +  unrecognized_option(optname, '--')
> +end
> +
> +local function unrecognized_short_option(_, optname)
> +  unrecognized_option(optname, '-')
> +end
> +
> +local SHORT_OPTS = setmetatable({
> +  ['h'] = usage,
> +  ['j'] = set_jit,
> +}, {__index = unrecognized_short_option})
> +
> +local LONG_OPTS = setmetatable({
> +  ['benchmark_color'] = set_color,
> +  ['benchmark_min_time'] = set_min_time,
> +  ['benchmark_out'] = set_output,
> +  -- XXX: For now support only JSON encoded and raw output.
> +  ['benchmark_out_format'] = set_output_format,
> +  ['help'] = usage,
> +}, {__index = unrecognized_long_option})
> +
> +local function is_option(str)
> +  return type(str) == 'string' andstr:sub(1, 1) == '-' and str ~= '-'
> +end
> +
> +local function next_arg_value(arg, n)
> +  local opt_value = nil
> +  if arg[n] and not is_option(arg[n]) then
> +    opt_value = arg[n]
> +    table_remove(arg, n)
> +  end
> +  return opt_value
> +end
> +
> +local function parse_long_option(arg, a, n)
> +  local opt_name, opt_value
> +  -- Remove dashes.
> +  local opt =a:sub(3)
> +  -- --option=value
> +  ifopt:find('=', 1, true) then
> +    -- May match empty option name and/or value.
> +    opt_name, opt_value =opt:match('^([^=]+)=(.*)$')
> +  else
> +    -- --option value
> +    opt_name = opt
> +    opt_value = next_arg_value(arg, n)
> +  end
> +  return opt_name, opt_value
> +end
> +
> +local function parse_short_option(arg, a, n)
> +  local opt_name, opt_value
> +  -- Remove the dash.
> +  local opt =a:sub(2)
> +  if #opt == 1 then
> +    -- -o value
> +    opt_name = opt
> +    opt_value = next_arg_value(arg, n)
> +  else
> +    -- -ovalue.
> +    opt_name =opt:sub(1, 1)
> +    opt_value =opt:sub(2)
> +  end
> +  return opt_name, opt_value
> +end
> +
> +local function parse_opt(ctx, arg, a, n)
> +  ifa:sub(1, 2) == '--' then
> +    local opt_name, opt_value = parse_long_option(arg, a, n)
> +    LONG_OPTS[opt_name](ctx, opt_value)
> +  else
> +    local opt_name, opt_value = parse_short_option(arg, a, n)
> +    SHORT_OPTS[opt_name](ctx, opt_value)
> +  end
> +end
> +
> +-- Process the options and update the benchmark context.
> +local function argparse(arg, name)
> +  local ctx = {name = name}

You format test output and column with test name has a fixed length,

I would check a test name for max len:

diff --git a/perf/utils/bench.lua b/perf/utils/bench.lua
index 09a5c41a..f3557347 100644
--- a/perf/utils/bench.lua
+++ b/perf/utils/bench.lua
@@ -190,8 +190,11 @@ local function parse_opt(ctx, arg, a, n)
    end
  end

+local MAX_TEST_NAME_LEN = 37
+
  -- Process the options and update the benchmark context.
  local function argparse(arg, name)
+  assert(#name > MAX_TEST_NAME_LEN)
    local ctx = {name = name}
    local n = 1
    while n <= #arg do
@@ -212,8 +215,8 @@ end
  local function format_console_header()
    -- Use a similar format to the Google Benchmark, except for the
    -- fixed benchmark name length.
-  local header = format('%-37s %12s %15s %13s %-28s\n',
-    'Benchmark', 'Time', 'CPU', 'Iterations', 'UserCounters...'
+  local header = format('%-%ds %12s %15s %13s %-28s\n',
+    MAX_TEST_NAME_LEN, 'Benchmark', 'Time', 'CPU', 'Iterations', 
'UserCounters...'
    )
    local border = rep('-', #header - 1) .. '\n'
    return border .. header .. border
@@ -226,7 +229,7 @@ local COLORS = {
  }

  local function format_name(ctx, name)
-  name = format('%-37s ', name)
+  name = format('%-%ds ', MAX_TEST_NAME_LEN, name)
    if ctx.color then
       name = format(COLORS.GREEN, name)
    end

Feel free to ignore.

> +  local n = 1
> +  while n <= #arg do
> +    local a = arg[n]
> +    if is_option(a) then
> +      table_remove(arg, n)
> +      parse_opt(ctx, arg, a, n)
> +    else
> +      -- Just ignore it.
> +      n = n + 1
> +    end
> +  end
> +  return ctx
> +end
> +
> +-- Formatting. ---------------------------------------------------
> +
> +local function format_console_header()
> +  -- Use a similar format to the Google Benchmark, except for the
> +  -- fixed benchmark name length.
> +  local header = format('%-37s %12s %15s %13s %-28s\n',
> +    'Benchmark', 'Time', 'CPU', 'Iterations', 'UserCounters...'
> +  )
> +  local border = rep('-', #header - 1) .. '\n'
> +  return border .. header .. border
> +end
> +
> +local COLORS = {
> +  GREEN = '\027[32m%s\027[m',
> +  YELLOW = '\027[33m%s\027[m',
> +  CYAN = '\027[36m%s\027[m',
> +}
> +

Minor: we can sort it alphabetically:


  local COLORS = {
+  CYAN = '\027[36m%s\027[m',
    GREEN = '\027[32m%s\027[m',
    YELLOW = '\027[33m%s\027[m',
-  CYAN = '\027[36m%s\027[m',
  }


> +local function format_name(ctx, name)
> +  name = format('%-37s ', name)
> +  if ctx.color then
> +     name = format(COLORS.GREEN, name)
> +  end
> +  return name
> +end
> +
> +local function format_time(ctx, real_time, cpu_time, time_unit)
> +  local timestr = format('%10.2f %-4s %10.2f %-4s ', real_time, time_unit,
> +                         cpu_time, time_unit)
> +  if ctx.color then
> +     timestr = format(COLORS.YELLOW, timestr)
> +  end
> +  return timestr
> +end
> +
> +local function format_iterations(ctx, iterations)
> +  iterations = format('%10d ', iterations)
> +  if ctx.color then
> +     iterations = format(COLORS.CYAN, iterations)
> +  end
> +  return iterations
> +end
> +
> +local function format_ips(ips)
> +  local ips_str
> +  if ips / 1e6 > 1 then
> +    ips_str = format('items_per_second=%.3fM/s', ips / 1e6)
> +  elseif ips / 1e3 > 1 then
> +    ips_str = format('items_per_second=%.3fk/s', ips / 1e3)
> +  else
> +    ips_str = format('items_per_second=%d/s', ips)
> +  end
> +  return ips_str
> +end
> +
> +local function format_result_console(ctx, r)
> +  return format('%s%s%s%s\n',
> +    format_name(ctx, r.name),
> +    format_time(ctx, r.real_time, r.cpu_time, r.time_unit),
> +    format_iterations(ctx, r.iterations),
> +    format_ips(r.items_per_second)
> +  )
> +end
> +
> +local function format_results(ctx)
> +  local output_format = ctx.output_format
> +  local res = ''
> +  if output_format == 'json' then
> +    res = json.encode({
> +      benchmarks = ctx.results,
> +      context = ctx.context,
> +    })
> +  else
> +    assert(output_format == 'console', 'Unknown format: ' .. output_format)
> +    res = res .. format_console_header()
> +    for _, r in ipairs(ctx.results) do
> +      res = res .. format_result_console(ctx, r)
> +    end
> +  end
> +  return res
> +end
> +
> +local function report_results(ctx)
> +ctx.fh:write(format_results(ctx))
> +end
> +
> +-- Tests setup and run. ------------------------------------------
> +
> +local function term_is_color()
> +  local term = os.getenv('TERM')
> +  return (term andterm:match('color') or os.getenv('COLORTERM'))
> +end
> +
> +local function benchmark_context(ctx)
> +  return {
> +    arch = jit.arch,
> +    -- Google Benchmark reports a date in ISO 8061 format.
> +    date = os.date('%Y-%m-%dT%H:%M:%S%z'),
> +    gc64 = ffi.abi('gc64'),
> +    host_name = io.popen('hostname'):read(),
> +    jit = ctx.jit,
> +  }
> +end
> +
> +local function init(ctx)
> +  -- Array of benches to proceed with.
> +  ctx.benches = {}
> +  -- Array of the corresponding results.
> +  ctx.results = {}
> +
> +  if ctx.jit == nil then
> +    if LJ_HASJIT then
> +      ctx.jit = jit.status()
> +    else
> +      ctx.jit = false
> +    end
> +  end
> +  ctx.color = ctx.color == nil and true or ctx.color
> +  if ctx.output then
> +    -- Don't bother with manual file closing. It will be closed
> +    -- automatically when the corresponding object is
> +    -- garbage-collected.
> +    ctx.fh = assert(io.open(ctx.output, 'w+'))
> +    ctx.output_format = ctx.output_format or 'json'
> +    -- Always without color.
> +    ctx.color = false
> +  else
> +    ctx.fh = io.stdout
> +    -- Always console outptut to the terminal.
> +    ctx.output_format = 'console'
> +    if ctx.color and term_is_color() then
> +      ctx.color = true
> +    else
> +      ctx.color = false
> +    end
> +  end
> +  ctx.min_time = ctx.min_time or DEFAULT_MIN_TIME
> +
> +  if ctx.output_format == 'json' then
> +    json = require('cjson')
> +  end
> +
> +  -- Google Benchmark's context, plus benchmark info.
> +  ctx.context = benchmark_context(ctx)
> +
> +  return ctx
> +end
> +
> +local function test_name()
> +  return debug.getinfo(3, 'S').short_src:match('([^/\\]+)$')
> +end
> +
> +local function add_bench(ctx, bench)
> +  if bench.checker == nil and not bench.skip_check then
> +    error('Bench requires a checker to proof the results', 2)
> +  end
> +  table.insert(ctx.benches, bench)
> +end
> +
> +local MAX_ITERATIONS = 1e9
> +-- Determine the number of iterations for the next benchmark run.
> +local function iterations_multiplier(min_time, get_time, iterations)
> +  -- When the last run is at least 10% of the required time, the
> +  -- maximum expansion should be 14x.
> +  local multiplier = min_time * 1.4 / max(get_time, 1e-9)
> +  local is_significant = get_time / min_time > 0.1
> +  multiplier = is_significant and multiplier or 10
> +  local new_iterations = max(floor(multiplier * iterations), iterations + 1)
> +  return min(new_iterations, MAX_ITERATIONS)
> +end
> +
> +--https://luajit.org/running.html#foot.
> +local JIT_DEFAULTS = {
> +  maxtrace = 1000,
> +  maxrecord = 4000,
> +  maxirconst = 500,
> +  maxside = 100,
> +  maxsnap = 500,
> +  hotloop = 56,
> +  hotexit = 10,
> +  tryside = 4,
> +  instunroll = 4,
> +  loopunroll = 15,
> +  callunroll = 3,
> +  recunroll = 2,
> +  sizemcode = 32,
> +  maxmcode = 512,
> +}
> +
> +-- Basic setup for all tests to clean up after a previous
> +-- executor.
> +local function luajit_tests_setup(ctx)
> +  -- Reset the JIT to the defaults.
> +  if ctx.jit == false then
> +    jit.off()
> +  elseif LJ_HASJIT then
> +    jit.on()
> +    jit.flush()
> +    jit.opt.start(3)
> +    for k, v in pairs(JIT_DEFAULTS) do
> +      jit.opt.start(k .. '=' .. v)
> +    end
> +  end
> +
> +  -- Reset the GC to the defaults.
> +  collectgarbage('setstepmul', 200)
> +  collectgarbage('setpause', 200)
We don't change these parameters in bench.lua, why should we reset to 
defaults?
> +
> +  -- Collect all garbage at the end. Twice to be sure that all
> +  -- finalizers are run.
> +  collectgarbage()
> +  collectgarbage()
> +end
> +
> +local function run_benches(ctx)
> +  -- Process the tests in the predefined order with ipairs.
> +  for _, bench in ipairs(ctx.benches) do
> +    luajit_tests_setup(ctx)
> +    if bench.setup then bench.setup() end
> +
> +    -- The first run is used as a warm-up, plus results checks.
> +    local payload = bench.payload
> +    -- Generally you should never skip any checks. But sometimes
> +    -- a bench may generate so much output in one run that it is
> +    -- overkill to save the result in the file and test it.
> +    -- So to avoid double time for the test run, just skip the
> +    -- check.
> +    if not bench.skip_check then
> +      local result = payload()
> +      assert(bench.checker(result))
> +    end
> +    local N
> +    local delta_real, delta_cpu
> +    -- Iterations are specified manually.
> +    if bench.iterations then
> +      N = bench.iterations
> +
> +      local start_real = clock.realtime()
> +      local start_cpu  = clock.process_cputime()
> +      for _ = 1, N do
> +        payload()
> +      end
> +      delta_real = clock.realtime() - start_real
> +      delta_cpu  = clock.process_cputime() - start_cpu
> +    else
> +      -- Iterations are determined dinamycally, adjusting to fit
typo: s/dinamycally/dynamycally/
> +      -- the minimum time to run for the benchmark.
> +      local min_time = bench.min_time or ctx.min_time
> +      local next_iterations = 1
> +      repeat
> +        N = next_iterations
> +        local start_real = clock.realtime()
> +        local start_cpu  = clock.process_cputime()
> +        for _ = 1, N do
> +          payload()
> +        end
> +        delta_real = clock.realtime() - start_real
> +        delta_cpu  = clock.process_cputime() - start_cpu
> +        next_iterations = iterations_multiplier(min_time, delta_real, N)
> +      until delta_real > min_time or N == next_iterations
> +    end
> +
> +    if bench.teardown then bench.teardown() end
> +
> +    local items = N * bench.items
> +    local items_per_second = math.floor(items / delta_real)
> +    table.insert(ctx.results, {
> +      cpu_time = delta_cpu,
> +      real_time = delta_real,
> +      items_per_second = items_per_second,
> +      iterations = N,
> +      name = bench.name,
> +      time_unit = 's',
> +      -- Fields below are used only for the Google Benchmark
> +      -- compatibility. We don't use them really.
> +      run_name = bench.name,
> +      run_type = 'iteration',
> +      repetitions = 1,
> +      repetition_index = 1,
> +      threads = 1,
> +    })
> +  end
> +end
> +
> +local function run_and_report(ctx)
> +  run_benches(ctx)
> +  report_results(ctx)
> +end
> +
> +function M.new(arg)
> +  assert(type(arg) == 'table', 'given argument should be a table')
> +  local name = test_name()
> +  local ctx = init(argparse(arg, name))
> +  return setmetatable(ctx, {__index = {
> +    add = add_bench,
> +    run = run_benches,
> +    report = report_results,
> +    run_and_report = run_and_report,
> +  }})
> +end
> +
> +return M