Hi, Sergey,

thanks for the patch!

Sergey

On 10/24/25 13:50, Sergey Kaplun wrote:
This patch adjusts the aforementioned test to use the benchmark
framework introduced before. The default arguments are adjusted
according to the <PARAM_x86.txt> file. The arguments to the script still
can be provided in the command line run.

The benchmark input is given by redirecting the corresponding
<FASTA_5000000> file generated by the `libs/fasta.lua 5e6`. The output
from the benchmark is redirected to /dev/null. All checks are done by
the comparison with the precomputed values for the aforementioned file.
---
 perf/LuaJIT-benches/k-nucleotide.lua | 93 ++++++++++++++++++++++++----
 1 file changed, 82 insertions(+), 11 deletions(-)

diff --git a/perf/LuaJIT-benches/k-nucleotide.lua b/perf/LuaJIT-benches/k-nucleotide.lua
index 0bfb41be..ae51dae9 100644
--- a/perf/LuaJIT-benches/k-nucleotide.lua
+++ b/perf/LuaJIT-benches/k-nucleotide.lua
@@ -1,3 +1,4 @@
+local bench = require('bench').new(arg)
 
 local function kfrequency(seq, freq, k, frame)
   local sub = string.sub
@@ -12,7 +13,8 @@ local function count(seq, frag)
   local k = #frag
   local freq = {}
   for frame=1,k do kfrequency(seq, freq, k, frame) end
-  io.write(freq[frag] or 0, "\t", frag, "\n")
+  return freq[frag]
+  -- io.write(freq[frag] or 0, "\t", frag, "\n")
remove this at all?
 end
 
 local function frequency(seq, k)
@@ -24,10 +26,13 @@ local function frequency(seq, k)
     local fa, fb = freq[a], freq[b]
     return fa == fb and a > b or fa > fb
   end)
+  local res = {}
   for _,c in ipairs(sfreq) do
-    io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum))
+    -- io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum))
remove?
+    res[c] = freq[c]*100/sum
   end
-  io.write("\n")
+  -- io.write("\n")
+  return res
 end
 
 local function readseq()
@@ -48,11 +53,77 @@ local function readseq()
   return string.upper(table.concat(lines, "", 1, ln))
 end
 
-local seq = readseq()
-frequency(seq, 1)
-frequency(seq, 2)
-count(seq, "GGT")
-count(seq, "GGTA")
-count(seq, "GGTATT")
-count(seq, "GGTATTTTAATT")
-count(seq, "GGTATTTTAATTTATAGT")
+local function check_freq(res, expected)
+  for k,v in pairs(expected) do
+    assert(string.format("%0.3f", res[k]) == v,
+           "Incorrect frequency for fragment " .. k)
+  end
+end
+
+-- The input is generated by `fasta.lua 5e6'. The check function
+-- is corresponding.
+local N = 5e6
+-- See <libs/fasta.lua> for the details.
+local items = N * 5
+bench:add({
+  name = "k_nucleotide",
+  payload = function()
+    local seq = readseq()
+    local sfreq1 = frequency(seq, 1)
+    local sfreq2 = frequency(seq, 2)
+    local GGT  = count(seq, "GGT")
+    local GGTA = count(seq, "GGTA")
+    local GGTATT = count(seq, "GGTATT")
+    local GGTATTTTAATT = count(seq, "GGTATTTTAATT")
+    local GGTATTTTAATTTATAGT = count(seq, "GGTATTTTAATTTATAGT")
+
+    local res = {
+      sfreq1 = sfreq1,
+      sfreq2 = sfreq2,
+      GGT  = GGT,
+      GGTA = GGTA,
+      GGTATT = GGTATT,
+      GGTATTTTAATT = GGTATTTTAATT,
+      GGTATTTTAATTTATAGT = GGTATTTTAATTTATAGT,
+    }
+    -- XXX: Reset input for the non-check iteration.
+    io.stdin:seek("set", 0)
+    return res
+  end,
+  checker = function(res)
+    check_freq(res.sfreq1, {
+      A = "30.296",
+      T = "30.149",
+      C = "19.800",
+      G = "19.754",
+    })
+    check_freq(res.sfreq2, {
+      AA = "9.177",
+      TA = "9.132",
+      AT = "9.130",
+      TT = "9.091",
+      CA = "6.002",
+      AC = "6.001",
+      AG = "5.987",
+      GA = "5.984",
+      CT = "5.971",
+      TC = "5.971",
+      GT = "5.957",
+      TG = "5.956",
+      CC = "3.917",
+      GC = "3.911",
+      CG = "3.909",
+      GG = "3.902",
+    })
+
+    assert(res.GGT == 294331)
+    assert(res.GGTA == 89290)
+    assert(res.GGTATT == 9462)
+    assert(res.GGTATTTTAATT == 178)
+    assert(res.GGTATTTTAATTTATAGT == 178)
+    return true
+  end,
+  items = items,
+})
+
+bench:run_and_report()