Hi, Sergey,
thanks for the patch!
Sergey
This patch adjusts the aforementioned test to use the benchmark
framework introduced before. The default arguments are adjusted
according to the <PARAM_x86.txt> file. The arguments to the script still
can be provided in the command line run.
The benchmark input is given by redirecting the corresponding
<FASTA_5000000> file generated by the `libs/fasta.lua 5e6`. The output
from the benchmark is redirected to /dev/null. All checks are done by
the comparison with the precomputed values for the aforementioned file.
---
perf/LuaJIT-benches/k-nucleotide.lua | 93 ++++++++++++++++++++++++----
1 file changed, 82 insertions(+), 11 deletions(-)
diff --git a/perf/LuaJIT-benches/k-nucleotide.lua b/perf/LuaJIT-benches/k-nucleotide.lua
index 0bfb41be..ae51dae9 100644
--- a/perf/LuaJIT-benches/k-nucleotide.lua
+++ b/perf/LuaJIT-benches/k-nucleotide.lua
@@ -1,3 +1,4 @@
+local bench = require('bench').new(arg)
local function kfrequency(seq, freq, k, frame)
local sub = string.sub
@@ -12,7 +13,8 @@ local function count(seq, frag)
local k = #frag
local freq = {}
for frame=1,k do kfrequency(seq, freq, k, frame) end
- io.write(freq[frag] or 0, "\t", frag, "\n")
+ return freq[frag]
+ -- io.write(freq[frag] or 0, "\t", frag, "\n")
remove this at all?
end
local function frequency(seq, k)
@@ -24,10 +26,13 @@ local function frequency(seq, k)
local fa, fb = freq[a], freq[b]
return fa == fb and a > b or fa > fb
end)
+ local res = {}
for _,c in ipairs(sfreq) do
- io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum))
+ -- io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum))
remove?
+ res[c] = freq[c]*100/sum
end
- io.write("\n")
+ -- io.write("\n")
+ return res
end
local function readseq()
@@ -48,11 +53,77 @@ local function readseq()
return string.upper(table.concat(lines, "", 1, ln))
end
-local seq = readseq()
-frequency(seq, 1)
-frequency(seq, 2)
-count(seq, "GGT")
-count(seq, "GGTA")
-count(seq, "GGTATT")
-count(seq, "GGTATTTTAATT")
-count(seq, "GGTATTTTAATTTATAGT")
+local function check_freq(res, expected)
+ for k,v in pairs(expected) do
+ assert(string.format("%0.3f", res[k]) == v,
+ "Incorrect frequency for fragment " .. k)
+ end
+end
+
+-- The input is generated by `fasta.lua 5e6'. The check function
+-- is corresponding.
+local N = 5e6
+-- See <libs/fasta.lua> for the details.
+local items = N * 5
+bench:add({
+ name = "k_nucleotide",
+ payload = function()
+ local seq = readseq()
+ local sfreq1 = frequency(seq, 1)
+ local sfreq2 = frequency(seq, 2)
+ local GGT = count(seq, "GGT")
+ local GGTA = count(seq, "GGTA")
+ local GGTATT = count(seq, "GGTATT")
+ local GGTATTTTAATT = count(seq, "GGTATTTTAATT")
+ local GGTATTTTAATTTATAGT = count(seq, "GGTATTTTAATTTATAGT")
+
+ local res = {
+ sfreq1 = sfreq1,
+ sfreq2 = sfreq2,
+ GGT = GGT,
+ GGTA = GGTA,
+ GGTATT = GGTATT,
+ GGTATTTTAATT = GGTATTTTAATT,
+ GGTATTTTAATTTATAGT = GGTATTTTAATTTATAGT,
+ }
+ -- XXX: Reset input for the non-check iteration.
+ io.stdin:seek("set", 0)
+ return res
+ end,
+ checker = function(res)
+ check_freq(res.sfreq1, {
+ A = "30.296",
+ T = "30.149",
+ C = "19.800",
+ G = "19.754",
+ })
+ check_freq(res.sfreq2, {
+ AA = "9.177",
+ TA = "9.132",
+ AT = "9.130",
+ TT = "9.091",
+ CA = "6.002",
+ AC = "6.001",
+ AG = "5.987",
+ GA = "5.984",
+ CT = "5.971",
+ TC = "5.971",
+ GT = "5.957",
+ TG = "5.956",
+ CC = "3.917",
+ GC = "3.911",
+ CG = "3.909",
+ GG = "3.902",
+ })
+
+ assert(res.GGT == 294331)
+ assert(res.GGTA == 89290)
+ assert(res.GGTATT == 9462)
+ assert(res.GGTATTTTAATT == 178)
+ assert(res.GGTATTTTAATTTATAGT == 178)
+ return true
+ end,
+ items = items,
+})
+
+bench:run_and_report()