Hi, Sergey,

thanks for the patch!

Sergey

On 10/24/25 13:50, Sergey Kaplun wrote:
> This patch adjusts the aforementioned test to use the benchmark
> framework introduced before. The default arguments are adjusted
> according to the <PARAM_x86.txt> file. The arguments to the script still
> can be provided in the command line run.
>
> The benchmark input is given by redirecting the corresponding
> <FASTA_5000000> file generated by the `libs/fasta.lua 5e6`. The output
> from the benchmark is redirected to /dev/null. All checks are done by
> the comparison with the precomputed values for the aforementioned file.
> ---
>   perf/LuaJIT-benches/k-nucleotide.lua | 93 ++++++++++++++++++++++++----
>   1 file changed, 82 insertions(+), 11 deletions(-)
>
> diff --git a/perf/LuaJIT-benches/k-nucleotide.lua b/perf/LuaJIT-benches/k-nucleotide.lua
> index 0bfb41be..ae51dae9 100644
> --- a/perf/LuaJIT-benches/k-nucleotide.lua
> +++ b/perf/LuaJIT-benches/k-nucleotide.lua
> @@ -1,3 +1,4 @@
> +local bench = require('bench').new(arg)
>   
>   local function kfrequency(seq, freq, k, frame)
>     local sub = string.sub
> @@ -12,7 +13,8 @@ local function count(seq, frag)
>     local k = #frag
>     local freq = {}
>     for frame=1,k do kfrequency(seq, freq, k, frame) end
> -  io.write(freq[frag] or 0, "\t", frag, "\n")
> +  return freq[frag]
> +  -- io.write(freq[frag] or 0, "\t", frag, "\n")
remove this at all?
>   end
>   
>   local function frequency(seq, k)
> @@ -24,10 +26,13 @@ local function frequency(seq, k)
>       local fa, fb = freq[a], freq[b]
>       return fa == fb and a > b or fa > fb
>     end)
> +  local res = {}
>     for _,c in ipairs(sfreq) do
> -    io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum))
> +    -- io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum))
remove?
> +    res[c] = freq[c]*100/sum
>     end
> -  io.write("\n")
> +  -- io.write("\n")
> +  return res
>   end
>   
>   local function readseq()
> @@ -48,11 +53,77 @@ local function readseq()
>     return string.upper(table.concat(lines, "", 1, ln))
>   end
>   
> -local seq = readseq()
> -frequency(seq, 1)
> -frequency(seq, 2)
> -count(seq, "GGT")
> -count(seq, "GGTA")
> -count(seq, "GGTATT")
> -count(seq, "GGTATTTTAATT")
> -count(seq, "GGTATTTTAATTTATAGT")
> +local function check_freq(res, expected)
> +  for k,v in pairs(expected) do
> +    assert(string.format("%0.3f", res[k]) == v,
> +           "Incorrect frequency for fragment " .. k)
> +  end
> +end
> +
> +-- The input is generated by `fasta.lua 5e6'. The check function
> +-- is corresponding.
> +local N = 5e6
> +-- See <libs/fasta.lua> for the details.
> +local items = N * 5
> +bench:add({
> +  name = "k_nucleotide",
> +  payload = function()
> +    local seq = readseq()
> +    local sfreq1 = frequency(seq, 1)
> +    local sfreq2 = frequency(seq, 2)
> +    local GGT  = count(seq, "GGT")
> +    local GGTA = count(seq, "GGTA")
> +    local GGTATT = count(seq, "GGTATT")
> +    local GGTATTTTAATT = count(seq, "GGTATTTTAATT")
> +    local GGTATTTTAATTTATAGT = count(seq, "GGTATTTTAATTTATAGT")
> +
> +    local res = {
> +      sfreq1 = sfreq1,
> +      sfreq2 = sfreq2,
> +      GGT  = GGT,
> +      GGTA = GGTA,
> +      GGTATT = GGTATT,
> +      GGTATTTTAATT = GGTATTTTAATT,
> +      GGTATTTTAATTTATAGT = GGTATTTTAATTTATAGT,
> +    }
> +    -- XXX: Reset input for the non-check iteration.
> +io.stdin:seek("set", 0)
> +    return res
> +  end,
> +  checker = function(res)
> +    check_freq(res.sfreq1, {
> +      A = "30.296",
> +      T = "30.149",
> +      C = "19.800",
> +      G = "19.754",
> +    })
> +    check_freq(res.sfreq2, {
> +      AA = "9.177",
> +      TA = "9.132",
> +      AT = "9.130",
> +      TT = "9.091",
> +      CA = "6.002",
> +      AC = "6.001",
> +      AG = "5.987",
> +      GA = "5.984",
> +      CT = "5.971",
> +      TC = "5.971",
> +      GT = "5.957",
> +      TG = "5.956",
> +      CC = "3.917",
> +      GC = "3.911",
> +      CG = "3.909",
> +      GG = "3.902",
> +    })
> +
> +    assert(res.GGT == 294331)
> +    assert(res.GGTA == 89290)
> +    assert(res.GGTATT == 9462)
> +    assert(res.GGTATTTTAATT == 178)
> +    assert(res.GGTATTTTAATTTATAGT == 178)
> +    return true
> +  end,
> +  items = items,
> +})
> +
> +bench:run_and_report()