From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id B089D15A809A; Mon, 17 Nov 2025 11:37:00 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org B089D15A809A DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1763368620; bh=TSo7wCW9jLWivXfg88uQX0qYc/GcoSF/2GMuguruqp0=; h=Date:To:Cc:References:In-Reply-To:Subject:List-Id: List-Unsubscribe:List-Archive:List-Post:List-Help:List-Subscribe: From:Reply-To:From; b=oupUzE9pPcDer7Tq4a41SlyoJfEdxSKhpsaOh8qK4MN/z8F4Xz6dKQOxOEjpJjkvK ohyQ9cwvjadUIsbqtytzoj8E9kRiNpiqGobSnCEYAnDni2NcXfpH+57RrRJcikUsKj YWYwf83qKFIGjy2ZyJMshNq19EpReG29OS6Co5w0= Received: from send197.i.mail.ru (send197.i.mail.ru [95.163.59.36]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 2B3B715A8096 for ; Mon, 17 Nov 2025 11:36:30 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 2B3B715A8096 Received: by exim-smtp-77df68b9cf-jzxv4 with esmtpa (envelope-from ) id 1vKuir-000000008aS-0o1n; Mon, 17 Nov 2025 11:36:29 +0300 Content-Type: multipart/alternative; boundary="------------fju75HYY0GFOInaHSDGOyhNC" Message-ID: <9f45ff89-e948-4549-86e4-c2233ccbfeef@tarantool.org> Date: Mon, 17 Nov 2025 11:36:29 +0300 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Content-Language: en-US To: Sergey Kaplun Cc: tarantool-patches@dev.tarantool.org References: <30a168f9cbe4c5476f274f3e20b56511ca5c3f8d.1761301736.git.skaplun@tarantool.org> In-Reply-To: <30a168f9cbe4c5476f274f3e20b56511ca5c3f8d.1761301736.git.skaplun@tarantool.org> X-Mailru-Src: smtp X-4EC0790: 10 X-7564579A: 78E4E2B564C1792B X-77F55803: 4F1203BC0FB41BD9110BAFCBE6BD1D1673EE44C5E0B5084EEF227B89B5FCF5FC182A05F5380850408B47F68A1BADEED83DE06ABAFEAF6705EA7BAD38A141E7A0B90FB3693C7F7FA47821E8BEC7DEA809 X-7FA49CB5: FF5795518A3D127A4AD6D5ED66289B5278DA827A17800CE7A3DED2DACB82E709C2099A533E45F2D0395957E7521B51C2CFCAF695D4D8E9FCEA1F7E6F0F101C6759CC434672EE6371C2A783ECEC0211ADC4224003CC836476D5A39DEEDB180909611E41BBFE2FEB2BC635427C2EF7E891BDD3928FF6565ACDB6349BAC9F45753013A736C8815374109FA2833FD35BB23D9E625A9149C048EE33AC447995A7AD18CB629EEF1311BF91D2E47CDBA5A96583BD4B6F7A4D31EC0BC014FD901B82EE079FA2833FD35BB23D27C277FBC8AE2E8B974A882099E279BDA471835C12D1D977C4224003CC836476EB9C4185024447017B076A6E789B0E975F5C1EE8F4F765FC6A4E49BB0F3BA1413AA81AA40904B5D9CF19DD082D7633A0C84D3B47A649675F3AA81AA40904B5D98AA50765F79006371C58C39218EE08BCD81D268191BDAD3D3666184CF4C3C14F3FC91FA280E0CE3D1A620F70A64A45A98AA50765F79006372E808ACE2090B5E1725E5C173C3A84C3C5EA940A35A165FF2DBA43225CD8A89FB26E97DCB74E625242539A7722CA490CB5C8C57E37DE458BEDA766A37F9254B7 X-C1DE0DAB: 0D63561A33F958A5E9FCD451B4E5147B5002B1117B3ED696A8FA5C5F924694DB54BB1175C6E7DD94823CB91A9FED034534781492E4B8EEAD69BF13FED57427F1BDAD6C7F3747799A X-C8649E89: 1C3962B70DF3F0ADE00A9FD3E00BEEDF3FED46C3ACD6F73ED3581295AF09D3DF87807E0823442EA2ED31085941D9CD0AF7F820E7B07EA4CF26192E32025F1BD0998829156B55414DC921A775744142CFDD7489E0C1607E9AC9FF4CCC84A62DB7DD12B02666E49356B8FF789EAE16DF4CB7393678D7FC29EFF6E2F9DF836A45BF111DC66A97D0BFE2913E6812662D5F2AB9AF64DB4688768036DF5FE9C0001AF333F2C28C22F508233FCF178C6DD14203 X-D57D3AED: 3ZO7eAau8CL7WIMRKs4sN3D3tLDjz0dLbV79QFUyzQ2Ujvy7cMT6pYYqY16iZVKkSc3dCLJ7zSJH7+u4VD18S7Vl4ZUrpaVfd2+vE6kuoey4m4VkSEu53w8ahmwBjZKM/YPHZyZHvz5uv+WouB9+ObcCpyrx6l7KImUglyhkEat/+ysWwi0gdhEs0JGjl6ggRWTy1haxBpVdbIX1nthFXMZebaIdHP2ghjoIc/363UZI6Kf1ptIMVfFX+FmbwDW1W/37309XTUg= X-Mailru-Sender: 520A125C2F17F0B1A9638AD358559B5961411C63A194A3463DE06ABAFEAF6705EA7BAD38A141E7A0B7CBEF92542CD7C8795FA72BAB74744FC77752E0C033A69EA16A481184E8BB1C9B38E6EA4F046BE03A5DB60FBEB33A8A0DA7A0AF5A3A8387 X-Mras: Ok Subject: Re: [Tarantool-patches] [PATCH v1 luajit 11/41] perf: adjust k-nucleotide in LuaJIT-benches X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Sergey Bronnikov via Tarantool-patches Reply-To: Sergey Bronnikov Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This is a multi-part message in MIME format. --------------fju75HYY0GFOInaHSDGOyhNC Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit Hi, Sergey, thanks for the patch! Sergey On 10/24/25 13:50, Sergey Kaplun wrote: > This patch adjusts the aforementioned test to use the benchmark > framework introduced before. The default arguments are adjusted > according to the file. The arguments to the script still > can be provided in the command line run. > > The benchmark input is given by redirecting the corresponding > file generated by the `libs/fasta.lua 5e6`. The output > from the benchmark is redirected to /dev/null. All checks are done by > the comparison with the precomputed values for the aforementioned file. > --- > perf/LuaJIT-benches/k-nucleotide.lua | 93 ++++++++++++++++++++++++---- > 1 file changed, 82 insertions(+), 11 deletions(-) > > diff --git a/perf/LuaJIT-benches/k-nucleotide.lua b/perf/LuaJIT-benches/k-nucleotide.lua > index 0bfb41be..ae51dae9 100644 > --- a/perf/LuaJIT-benches/k-nucleotide.lua > +++ b/perf/LuaJIT-benches/k-nucleotide.lua > @@ -1,3 +1,4 @@ > +local bench = require('bench').new(arg) > > local function kfrequency(seq, freq, k, frame) > local sub = string.sub > @@ -12,7 +13,8 @@ local function count(seq, frag) > local k = #frag > local freq = {} > for frame=1,k do kfrequency(seq, freq, k, frame) end > - io.write(freq[frag] or 0, "\t", frag, "\n") > + return freq[frag] > + -- io.write(freq[frag] or 0, "\t", frag, "\n") remove this at all? > end > > local function frequency(seq, k) > @@ -24,10 +26,13 @@ local function frequency(seq, k) > local fa, fb = freq[a], freq[b] > return fa == fb and a > b or fa > fb > end) > + local res = {} > for _,c in ipairs(sfreq) do > - io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum)) > + -- io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum)) remove? > + res[c] = freq[c]*100/sum > end > - io.write("\n") > + -- io.write("\n") > + return res > end > > local function readseq() > @@ -48,11 +53,77 @@ local function readseq() > return string.upper(table.concat(lines, "", 1, ln)) > end > > -local seq = readseq() > -frequency(seq, 1) > -frequency(seq, 2) > -count(seq, "GGT") > -count(seq, "GGTA") > -count(seq, "GGTATT") > -count(seq, "GGTATTTTAATT") > -count(seq, "GGTATTTTAATTTATAGT") > +local function check_freq(res, expected) > + for k,v in pairs(expected) do > + assert(string.format("%0.3f", res[k]) == v, > + "Incorrect frequency for fragment " .. k) > + end > +end > + > +-- The input is generated by `fasta.lua 5e6'. The check function > +-- is corresponding. > +local N = 5e6 > +-- See for the details. > +local items = N * 5 > +bench:add({ > + name = "k_nucleotide", > + payload = function() > + local seq = readseq() > + local sfreq1 = frequency(seq, 1) > + local sfreq2 = frequency(seq, 2) > + local GGT = count(seq, "GGT") > + local GGTA = count(seq, "GGTA") > + local GGTATT = count(seq, "GGTATT") > + local GGTATTTTAATT = count(seq, "GGTATTTTAATT") > + local GGTATTTTAATTTATAGT = count(seq, "GGTATTTTAATTTATAGT") > + > + local res = { > + sfreq1 = sfreq1, > + sfreq2 = sfreq2, > + GGT = GGT, > + GGTA = GGTA, > + GGTATT = GGTATT, > + GGTATTTTAATT = GGTATTTTAATT, > + GGTATTTTAATTTATAGT = GGTATTTTAATTTATAGT, > + } > + -- XXX: Reset input for the non-check iteration. > +io.stdin:seek("set", 0) > + return res > + end, > + checker = function(res) > + check_freq(res.sfreq1, { > + A = "30.296", > + T = "30.149", > + C = "19.800", > + G = "19.754", > + }) > + check_freq(res.sfreq2, { > + AA = "9.177", > + TA = "9.132", > + AT = "9.130", > + TT = "9.091", > + CA = "6.002", > + AC = "6.001", > + AG = "5.987", > + GA = "5.984", > + CT = "5.971", > + TC = "5.971", > + GT = "5.957", > + TG = "5.956", > + CC = "3.917", > + GC = "3.911", > + CG = "3.909", > + GG = "3.902", > + }) > + > + assert(res.GGT == 294331) > + assert(res.GGTA == 89290) > + assert(res.GGTATT == 9462) > + assert(res.GGTATTTTAATT == 178) > + assert(res.GGTATTTTAATTTATAGT == 178) > + return true > + end, > + items = items, > +}) > + > +bench:run_and_report() --------------fju75HYY0GFOInaHSDGOyhNC Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 7bit

Hi, Sergey,

thanks for the patch!

Sergey

On 10/24/25 13:50, Sergey Kaplun wrote:
This patch adjusts the aforementioned test to use the benchmark
framework introduced before. The default arguments are adjusted
according to the <PARAM_x86.txt> file. The arguments to the script still
can be provided in the command line run.

The benchmark input is given by redirecting the corresponding
<FASTA_5000000> file generated by the `libs/fasta.lua 5e6`. The output
from the benchmark is redirected to /dev/null. All checks are done by
the comparison with the precomputed values for the aforementioned file.
---
 perf/LuaJIT-benches/k-nucleotide.lua | 93 ++++++++++++++++++++++++----
 1 file changed, 82 insertions(+), 11 deletions(-)

diff --git a/perf/LuaJIT-benches/k-nucleotide.lua b/perf/LuaJIT-benches/k-nucleotide.lua
index 0bfb41be..ae51dae9 100644
--- a/perf/LuaJIT-benches/k-nucleotide.lua
+++ b/perf/LuaJIT-benches/k-nucleotide.lua
@@ -1,3 +1,4 @@
+local bench = require('bench').new(arg)
 
 local function kfrequency(seq, freq, k, frame)
   local sub = string.sub
@@ -12,7 +13,8 @@ local function count(seq, frag)
   local k = #frag
   local freq = {}
   for frame=1,k do kfrequency(seq, freq, k, frame) end
-  io.write(freq[frag] or 0, "\t", frag, "\n")
+  return freq[frag]
+  -- io.write(freq[frag] or 0, "\t", frag, "\n")
remove this at all?
 end
 
 local function frequency(seq, k)
@@ -24,10 +26,13 @@ local function frequency(seq, k)
     local fa, fb = freq[a], freq[b]
     return fa == fb and a > b or fa > fb
   end)
+  local res = {}
   for _,c in ipairs(sfreq) do
-    io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum))
+    -- io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum))
remove?
+    res[c] = freq[c]*100/sum
   end
-  io.write("\n")
+  -- io.write("\n")
+  return res
 end
 
 local function readseq()
@@ -48,11 +53,77 @@ local function readseq()
   return string.upper(table.concat(lines, "", 1, ln))
 end
 
-local seq = readseq()
-frequency(seq, 1)
-frequency(seq, 2)
-count(seq, "GGT")
-count(seq, "GGTA")
-count(seq, "GGTATT")
-count(seq, "GGTATTTTAATT")
-count(seq, "GGTATTTTAATTTATAGT")
+local function check_freq(res, expected)
+  for k,v in pairs(expected) do
+    assert(string.format("%0.3f", res[k]) == v,
+           "Incorrect frequency for fragment " .. k)
+  end
+end
+
+-- The input is generated by `fasta.lua 5e6'. The check function
+-- is corresponding.
+local N = 5e6
+-- See <libs/fasta.lua> for the details.
+local items = N * 5
+bench:add({
+  name = "k_nucleotide",
+  payload = function()
+    local seq = readseq()
+    local sfreq1 = frequency(seq, 1)
+    local sfreq2 = frequency(seq, 2)
+    local GGT  = count(seq, "GGT")
+    local GGTA = count(seq, "GGTA")
+    local GGTATT = count(seq, "GGTATT")
+    local GGTATTTTAATT = count(seq, "GGTATTTTAATT")
+    local GGTATTTTAATTTATAGT = count(seq, "GGTATTTTAATTTATAGT")
+
+    local res = {
+      sfreq1 = sfreq1,
+      sfreq2 = sfreq2,
+      GGT  = GGT,
+      GGTA = GGTA,
+      GGTATT = GGTATT,
+      GGTATTTTAATT = GGTATTTTAATT,
+      GGTATTTTAATTTATAGT = GGTATTTTAATTTATAGT,
+    }
+    -- XXX: Reset input for the non-check iteration.
+    io.stdin:seek("set", 0)
+    return res
+  end,
+  checker = function(res)
+    check_freq(res.sfreq1, {
+      A = "30.296",
+      T = "30.149",
+      C = "19.800",
+      G = "19.754",
+    })
+    check_freq(res.sfreq2, {
+      AA = "9.177",
+      TA = "9.132",
+      AT = "9.130",
+      TT = "9.091",
+      CA = "6.002",
+      AC = "6.001",
+      AG = "5.987",
+      GA = "5.984",
+      CT = "5.971",
+      TC = "5.971",
+      GT = "5.957",
+      TG = "5.956",
+      CC = "3.917",
+      GC = "3.911",
+      CG = "3.909",
+      GG = "3.902",
+    })
+
+    assert(res.GGT == 294331)
+    assert(res.GGTA == 89290)
+    assert(res.GGTATT == 9462)
+    assert(res.GGTATTTTAATT == 178)
+    assert(res.GGTATTTTAATTTATAGT == 178)
+    return true
+  end,
+  items = items,
+})
+
+bench:run_and_report()
--------------fju75HYY0GFOInaHSDGOyhNC--