<!DOCTYPE html>
<html data-lt-installed="true">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body style="padding-bottom: 1px;">
<p>Hi, Sergey,</p>
<p>thanks for the patch!</p>
<p>Sergey</p>
<div class="moz-cite-prefix">On 10/24/25 13:50, Sergey Kaplun wrote:<br>
</div>
<blockquote type="cite"
cite="mid:30a168f9cbe4c5476f274f3e20b56511ca5c3f8d.1761301736.git.skaplun@tarantool.org">
<pre wrap="" class="moz-quote-pre">This patch adjusts the aforementioned test to use the benchmark
framework introduced before. The default arguments are adjusted
according to the <PARAM_x86.txt> file. The arguments to the script still
can be provided in the command line run.
The benchmark input is given by redirecting the corresponding
<FASTA_5000000> file generated by the `libs/fasta.lua 5e6`. The output
from the benchmark is redirected to /dev/null. All checks are done by
the comparison with the precomputed values for the aforementioned file.
---
perf/LuaJIT-benches/k-nucleotide.lua | 93 ++++++++++++++++++++++++----
1 file changed, 82 insertions(+), 11 deletions(-)
diff --git a/perf/LuaJIT-benches/k-nucleotide.lua b/perf/LuaJIT-benches/k-nucleotide.lua
index 0bfb41be..ae51dae9 100644
--- a/perf/LuaJIT-benches/k-nucleotide.lua
+++ b/perf/LuaJIT-benches/k-nucleotide.lua
@@ -1,3 +1,4 @@
+local bench = require('bench').new(arg)
local function kfrequency(seq, freq, k, frame)
local sub = string.sub
@@ -12,7 +13,8 @@ local function count(seq, frag)
local k = #frag
local freq = {}
for frame=1,k do kfrequency(seq, freq, k, frame) end
- io.write(freq[frag] or 0, "\t", frag, "\n")
+ return freq[frag]
+ -- io.write(freq[frag] or 0, "\t", frag, "\n")</pre>
</blockquote>
remove this at all?
<blockquote type="cite"
cite="mid:30a168f9cbe4c5476f274f3e20b56511ca5c3f8d.1761301736.git.skaplun@tarantool.org">
<pre wrap="" class="moz-quote-pre">
end
local function frequency(seq, k)
@@ -24,10 +26,13 @@ local function frequency(seq, k)
local fa, fb = freq[a], freq[b]
return fa == fb and a > b or fa > fb
end)
+ local res = {}
for _,c in ipairs(sfreq) do
- io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum))
+ -- io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum))</pre>
</blockquote>
remove?
<blockquote type="cite"
cite="mid:30a168f9cbe4c5476f274f3e20b56511ca5c3f8d.1761301736.git.skaplun@tarantool.org">
<pre wrap="" class="moz-quote-pre">
+ res[c] = freq[c]*100/sum
end
- io.write("\n")
+ -- io.write("\n")
+ return res
end
local function readseq()
@@ -48,11 +53,77 @@ local function readseq()
return string.upper(table.concat(lines, "", 1, ln))
end
-local seq = readseq()
-frequency(seq, 1)
-frequency(seq, 2)
-count(seq, "GGT")
-count(seq, "GGTA")
-count(seq, "GGTATT")
-count(seq, "GGTATTTTAATT")
-count(seq, "GGTATTTTAATTTATAGT")
+local function check_freq(res, expected)
+ for k,v in pairs(expected) do
+ assert(string.format("%0.3f", res[k]) == v,
+ "Incorrect frequency for fragment " .. k)
+ end
+end
+
+-- The input is generated by `fasta.lua 5e6'. The check function
+-- is corresponding.
+local N = 5e6
+-- See <libs/fasta.lua> for the details.
+local items = N * 5
+bench:add({
+ name = "k_nucleotide",
+ payload = function()
+ local seq = readseq()
+ local sfreq1 = frequency(seq, 1)
+ local sfreq2 = frequency(seq, 2)
+ local GGT = count(seq, "GGT")
+ local GGTA = count(seq, "GGTA")
+ local GGTATT = count(seq, "GGTATT")
+ local GGTATTTTAATT = count(seq, "GGTATTTTAATT")
+ local GGTATTTTAATTTATAGT = count(seq, "GGTATTTTAATTTATAGT")
+
+ local res = {
+ sfreq1 = sfreq1,
+ sfreq2 = sfreq2,
+ GGT = GGT,
+ GGTA = GGTA,
+ GGTATT = GGTATT,
+ GGTATTTTAATT = GGTATTTTAATT,
+ GGTATTTTAATTTATAGT = GGTATTTTAATTTATAGT,
+ }
+ -- XXX: Reset input for the non-check iteration.
+ <a class="moz-txt-link-freetext" href="io.stdin:seek(">io.stdin:seek(</a>"set", 0)
+ return res
+ end,
+ checker = function(res)
+ check_freq(res.sfreq1, {
+ A = "30.296",
+ T = "30.149",
+ C = "19.800",
+ G = "19.754",
+ })
+ check_freq(res.sfreq2, {
+ AA = "9.177",
+ TA = "9.132",
+ AT = "9.130",
+ TT = "9.091",
+ CA = "6.002",
+ AC = "6.001",
+ AG = "5.987",
+ GA = "5.984",
+ CT = "5.971",
+ TC = "5.971",
+ GT = "5.957",
+ TG = "5.956",
+ CC = "3.917",
+ GC = "3.911",
+ CG = "3.909",
+ GG = "3.902",
+ })
+
+ assert(res.GGT == 294331)
+ assert(res.GGTA == 89290)
+ assert(res.GGTATT == 9462)
+ assert(res.GGTATTTTAATT == 178)
+ assert(res.GGTATTTTAATTTATAGT == 178)
+ return true
+ end,
+ items = items,
+})
+
+bench:run_and_report()
</pre>
</blockquote>
</body>
<lt-container></lt-container>
</html>