From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 17558168743F; Mon, 29 Dec 2025 16:29:53 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 17558168743F DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1767014993; bh=Om5S5A2qwK594wLX+4VnPECFrj+5GaDsstT7V0MDLFg=; h=Date:To:Cc:References:In-Reply-To:Subject:List-Id: List-Unsubscribe:List-Archive:List-Post:List-Help:List-Subscribe: From:Reply-To:From; b=H37C9ysNLckp4zeCsMPRO4sMUFP3Z3GhCs7BQJjizcxzfYeFu3G3ky3AlYWExDgZH hiF9v4pBtnuRzdIjPWr7LYRJZkG96YC1WWJXXtvTTugAri1B4ddN9GPJs1zsPYn6+Q on/JLUtCzDUxWFNletQJjL/Rm87QZxtwY+1HoT4E= Received: from send265.i.mail.ru (send265.i.mail.ru [95.163.59.104]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 74DF2166497A for ; Mon, 29 Dec 2025 16:29:51 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 74DF2166497A Received: by exim-smtp-7b4fb89df9-64pcp with esmtpa (envelope-from ) id 1vaDJj-0000000092x-0Keh; Mon, 29 Dec 2025 16:29:48 +0300 Content-Type: multipart/alternative; boundary="------------2GliyEDk8CzVRwg37b51jDjr" Message-ID: Date: Mon, 29 Dec 2025 16:28:41 +0300 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Content-Language: en-US To: Sergey Kaplun Cc: tarantool-patches@dev.tarantool.org References: In-Reply-To: X-Mailru-Src: smtp X-4EC0790: 10 X-7564579A: B8F34718100C35BD X-77F55803: 4F1203BC0FB41BD979975AF0D777FEBD78E5B51DE69E67E134CC12E14949938B182A05F5380850405BEC9A1CA1C7E54E3DE06ABAFEAF6705BA0CBF42BD8861DF05D593F66932C592BC0FEE398BF55443 X-7FA49CB5: FF5795518A3D127A4AD6D5ED66289B5278DA827A17800CE7A40BC8721E3D31BBEA1F7E6F0F101C67BD4B6F7A4D31EC0BCC500DACC3FED6E28638F802B75D45FF8AA50765F7900637AC83A81C8FD4AD23D82A6BABE6F325AC2E85FA5F3EDFCBAA7353EFBB55337566FFA32339A312E8EBCF86D78B8DEF2E71C4B18DC8C9E8ED9B9FC125BAFA237B63389733CBF5DBD5E913377AFFFEAFD269176DF2183F8FC7C0A3E989B1926288338941B15DA834481FCF19DD082D7633A0EF3E4896CB9E6436389733CBF5DBD5E9D5E8D9A59859A8B652D31B9D28593E51CC7F00164DA146DA6F5DAA56C3B73B237318B6A418E8EAB8D32BA5DBAC0009BE9E8FC8737B5C2249D88082464251487476E601842F6C81A12EF20D2F80756B5FB606B96278B59C4276E601842F6C81A127C277FBC8AE2E8BE31AAB7FF4261FD93AA81AA40904B5D99C9F4D5AE37F343AD1F44FA8B9022EA23BBE47FD9DD3FB595F5C1EE8F4F765FC72CEEB2601E22B093A03B725D353964B0B7D0EA88DDEDAC722CA9DD8327EE4930A3850AC1BE2E735C8D5298E42E60C1FC4224003CC83647689D4C264860C145E X-C1DE0DAB: 0D63561A33F958A522D8568C3CADAD975002B1117B3ED696A33E51E8EEB3FA8C8B25839F35DFE037823CB91A9FED034534781492E4B8EEADE77C5D9E92540650BDAD6C7F3747799A X-C8649E89: 1C3962B70DF3F0AD73CAD6646DEDE191716CD42B3DD1D34CAB70F9BE574AE9C625B6776AC983F447FC0B9F89525902EE6F57B2FD27647F25E66C117BDB76D659255A9B72FC156F1DB8E82475EB68BC186E43A1868CA731A979D0364911514D774DA822636CC08A7AB8341EE9D5BE9A0AF08B624E82E4AD2BED061CF9A2635246128290E932E885228CD93680B12512CF4C41F94D744909CE2512F26BEC029E55448553D2254B8D95CD72808BE417F3B9E0E7457915DAA85F X-D57D3AED: 3ZO7eAau8CL7WIMRKs4sN3D3tLDjz0dLbV79QFUyzQ2Ujvy7cMT6pYYqY16iZVKkSc3dCLJ7zSJH7+u4VD18S7Vl4ZUrpaVfd2+vE6kuoey4m4VkSEu53w8ahmwBjZKM/YPHZyZHvz5uv+WouB9+ObcCpyrx6l7KImUglyhkEat/+ysWwi0gdhEs0JGjl6ggRWTy1haxBpVdbIX1nthFXMZebaIdHP2ghjoIc/363UZI6Kf1ptIMVTZJppT4ZVHRRAicWm2M8E4= X-Mailru-Sender: 811C44EDE0507D1FFB106D83BFFB684FDA66482BAA5E5BAB0841F56765D88C401E9F38F2C87A7A639926515C74BA4B74645D15D82EE4B272BD6E4642A116CA93524AA66B5ACBE6721EF430B9A63E2A504198E0F3ECE9B5443453F38A29522196 X-Mras: Ok Subject: Re: [Tarantool-patches] [PATCH v2 luajit 01/41] perf: add LuaJIT-test-cleanup perf suite X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Sergey Bronnikov via Tarantool-patches Reply-To: Sergey Bronnikov Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This is a multi-part message in MIME format. --------------2GliyEDk8CzVRwg37b51jDjr Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit Hi, Sergey! Thanks for the patch! LGTM Sergey On 12/26/25 12:17, Sergey Kaplun wrote: > This patch introduces the LuaJIT-test-cleanup benchmark suite [1] into > our LuaJIT fork source tree. To provide reliable reproducible results > several benchmarks need to be adjusted. However, to be sure we initially > use the valid suite, everything in the directory > is moved intact. > > [1]:https://github.com/LuaJIT/LuaJIT-test-cleanup/tree/014708b/bench > --- > .luacheckrc | 1 + > perf/LuaJIT-benches/PARAM_arm.txt | 29 + > perf/LuaJIT-benches/PARAM_mips.txt | 29 + > perf/LuaJIT-benches/PARAM_ppc.txt | 29 + > perf/LuaJIT-benches/PARAM_x86.txt | 29 + > perf/LuaJIT-benches/SUMCOL_1.txt | 1000 ++++++++++++++++++++ > perf/LuaJIT-benches/TEST_md5sum.txt | 20 + > perf/LuaJIT-benches/array3d.lua | 59 ++ > perf/LuaJIT-benches/binary-trees.lua | 47 + > perf/LuaJIT-benches/chameneos.lua | 68 ++ > perf/LuaJIT-benches/coroutine-ring.lua | 42 + > perf/LuaJIT-benches/euler14-bit.lua | 22 + > perf/LuaJIT-benches/fannkuch.lua | 50 + > perf/LuaJIT-benches/fasta.lua | 95 ++ > perf/LuaJIT-benches/k-nucleotide.lua | 58 ++ > perf/LuaJIT-benches/life.lua | 111 +++ > perf/LuaJIT-benches/mandelbrot-bit.lua | 33 + > perf/LuaJIT-benches/mandelbrot.lua | 23 + > perf/LuaJIT-benches/md5.lua | 183 ++++ > perf/LuaJIT-benches/meteor.lua | 220 +++++ > perf/LuaJIT-benches/nbody.lua | 119 +++ > perf/LuaJIT-benches/nsieve-bit-fp.lua | 37 + > perf/LuaJIT-benches/nsieve-bit.lua | 27 + > perf/LuaJIT-benches/nsieve.lua | 21 + > perf/LuaJIT-benches/partialsums.lua | 29 + > perf/LuaJIT-benches/pidigits-nogmp.lua | 100 ++ > perf/LuaJIT-benches/ray.lua | 135 +++ > perf/LuaJIT-benches/recursive-ack.lua | 8 + > perf/LuaJIT-benches/recursive-fib.lua | 7 + > perf/LuaJIT-benches/revcomp.lua | 37 + > perf/LuaJIT-benches/scimark-2010-12-20.lua | 400 ++++++++ > perf/LuaJIT-benches/scimark-fft.lua | 1 + > perf/LuaJIT-benches/scimark-lu.lua | 1 + > perf/LuaJIT-benches/scimark-sor.lua | 1 + > perf/LuaJIT-benches/scimark-sparse.lua | 1 + > perf/LuaJIT-benches/scimark_lib.lua | 297 ++++++ > perf/LuaJIT-benches/series.lua | 34 + > perf/LuaJIT-benches/spectral-norm.lua | 40 + > perf/LuaJIT-benches/sum-file.lua | 6 + > 39 files changed, 3449 insertions(+) > create mode 100644 perf/LuaJIT-benches/PARAM_arm.txt > create mode 100644 perf/LuaJIT-benches/PARAM_mips.txt > create mode 100644 perf/LuaJIT-benches/PARAM_ppc.txt > create mode 100644 perf/LuaJIT-benches/PARAM_x86.txt > create mode 100644 perf/LuaJIT-benches/SUMCOL_1.txt > create mode 100644 perf/LuaJIT-benches/TEST_md5sum.txt > create mode 100644 perf/LuaJIT-benches/array3d.lua > create mode 100644 perf/LuaJIT-benches/binary-trees.lua > create mode 100644 perf/LuaJIT-benches/chameneos.lua > create mode 100644 perf/LuaJIT-benches/coroutine-ring.lua > create mode 100644 perf/LuaJIT-benches/euler14-bit.lua > create mode 100644 perf/LuaJIT-benches/fannkuch.lua > create mode 100644 perf/LuaJIT-benches/fasta.lua > create mode 100644 perf/LuaJIT-benches/k-nucleotide.lua > create mode 100644 perf/LuaJIT-benches/life.lua > create mode 100644 perf/LuaJIT-benches/mandelbrot-bit.lua > create mode 100644 perf/LuaJIT-benches/mandelbrot.lua > create mode 100644 perf/LuaJIT-benches/md5.lua > create mode 100644 perf/LuaJIT-benches/meteor.lua > create mode 100644 perf/LuaJIT-benches/nbody.lua > create mode 100644 perf/LuaJIT-benches/nsieve-bit-fp.lua > create mode 100644 perf/LuaJIT-benches/nsieve-bit.lua > create mode 100644 perf/LuaJIT-benches/nsieve.lua > create mode 100644 perf/LuaJIT-benches/partialsums.lua > create mode 100644 perf/LuaJIT-benches/pidigits-nogmp.lua > create mode 100644 perf/LuaJIT-benches/ray.lua > create mode 100644 perf/LuaJIT-benches/recursive-ack.lua > create mode 100644 perf/LuaJIT-benches/recursive-fib.lua > create mode 100644 perf/LuaJIT-benches/revcomp.lua > create mode 100644 perf/LuaJIT-benches/scimark-2010-12-20.lua > create mode 100644 perf/LuaJIT-benches/scimark-fft.lua > create mode 100644 perf/LuaJIT-benches/scimark-lu.lua > create mode 100644 perf/LuaJIT-benches/scimark-sor.lua > create mode 100644 perf/LuaJIT-benches/scimark-sparse.lua > create mode 100644 perf/LuaJIT-benches/scimark_lib.lua > create mode 100644 perf/LuaJIT-benches/series.lua > create mode 100644 perf/LuaJIT-benches/spectral-norm.lua > create mode 100644 perf/LuaJIT-benches/sum-file.lua > > diff --git a/.luacheckrc b/.luacheckrc > index 19098dd9..35824875 100644 > --- a/.luacheckrc > +++ b/.luacheckrc > @@ -16,6 +16,7 @@ files['test/tarantool-tests/'] = { > -- test suites and need to be coherent with the upstream. > exclude_files = { > 'dynasm/', > + 'perf/LuaJIT-benches/', > 'src/', > 'test/LuaJIT-tests/', > 'test/PUC-Rio-Lua-5.1-tests/', > diff --git a/perf/LuaJIT-benches/PARAM_arm.txt b/perf/LuaJIT-benches/PARAM_arm.txt > new file mode 100644 > index 00000000..a07fd010 > --- /dev/null > +++ b/perf/LuaJIT-benches/PARAM_arm.txt > @@ -0,0 +1,29 @@ > +array3d 200 > +binary-trees 13 > +chameneos 1e6 > +coroutine-ring 3e6 > +euler14-bit 5e6 > +fannkuch 10 > +fasta 2e6 > +k-nucleotide 5e5 FASTA_500000 > +life > +mandelbrot 2000 > +mandelbrot-bit 2000 > +md5 5000 > +nbody 1e6 > +nsieve 9 > +nsieve-bit 9 > +nsieve-bit-fp 9 > +partialsums 2e6 > +pidigits-nogmp 2000 > +ray 4 > +recursive-ack 9 > +recursive-fib 37 > +revcomp 1e6 FASTA_1000000 > +scimark-fft 2000 > +scimark-lu 300 > +scimark-sor 5000 > +scimark-sparse 5e3 > +series 1500 > +spectral-norm 1000 > +sum-file 1000 SUMCOL_1000 > diff --git a/perf/LuaJIT-benches/PARAM_mips.txt b/perf/LuaJIT-benches/PARAM_mips.txt > new file mode 100644 > index 00000000..e6bcadba > --- /dev/null > +++ b/perf/LuaJIT-benches/PARAM_mips.txt > @@ -0,0 +1,29 @@ > +array3d 50 > +binary-trees 10 > +chameneos 5e4 > +coroutine-ring 2e5 > +euler14-bit 2e4 > +fannkuch 8 > +fasta 2e4 > +k-nucleotide 1e4 FASTA_10000 > +life > +mandelbrot 150 > +mandelbrot-bit 150 > +md5 10 > +nbody 1e4 > +nsieve 4 > +nsieve-bit 4 > +nsieve-bit-fp 2 > +partialsums 5e4 > +pidigits-nogmp 150 > +ray 2 > +recursive-ack 7 > +recursive-fib 29 > +revcomp 5e4 FASTA_50000 > +scimark-fft 20 > +scimark-lu 3 > +scimark-sor 40 > +scimark-sparse 100 > +series 50 > +spectral-norm 100 > +sum-file 100 SUMCOL_100 > diff --git a/perf/LuaJIT-benches/PARAM_ppc.txt b/perf/LuaJIT-benches/PARAM_ppc.txt > new file mode 100644 > index 00000000..c8319a15 > --- /dev/null > +++ b/perf/LuaJIT-benches/PARAM_ppc.txt > @@ -0,0 +1,29 @@ > +array3d 200 > +binary-trees 13 > +chameneos 1e6 > +coroutine-ring 4e6 > +euler14-bit 1e6 > +fannkuch 9 > +fasta 5e5 > +k-nucleotide 1e5 FASTA_100000 > +life > +mandelbrot 800 > +mandelbrot-bit 800 > +md5 500 > +nbody 1e5 > +nsieve 8 > +nsieve-bit 8 > +nsieve-bit-fp 8 > +partialsums 5e5 > +pidigits-nogmp 800 > +ray 5 > +recursive-ack 9 > +recursive-fib 34 > +revcomp 1e6 FASTA_1000000 > +scimark-fft 500 > +scimark-lu 100 > +scimark-sor 1000 > +scimark-sparse 3000 > +series 1000 > +spectral-norm 200 > +sum-file 1000 SUMCOL_1000 > diff --git a/perf/LuaJIT-benches/PARAM_x86.txt b/perf/LuaJIT-benches/PARAM_x86.txt > new file mode 100644 > index 00000000..87088d7b > --- /dev/null > +++ b/perf/LuaJIT-benches/PARAM_x86.txt > @@ -0,0 +1,29 @@ > +array3d 300 > +binary-trees 16 > +chameneos 1e7 > +coroutine-ring 2e7 > +euler14-bit 2e7 > +fannkuch 11 > +fasta 25e6 > +k-nucleotide 5e6 FASTA_5000000 > +life > +mandelbrot 5000 > +mandelbrot-bit 5000 > +md5 20000 > +nbody 5e6 > +nsieve 12 > +nsieve-bit 12 > +nsieve-bit-fp 12 > +partialsums 1e7 > +pidigits-nogmp 5000 > +ray 9 > +recursive-ack 10 > +recursive-fib 40 > +revcomp 5e6 FASTA_5000000 > +scimark-fft 50000 > +scimark-lu 5000 > +scimark-sor 50000 > +scimark-sparse 15e4 > +series 10000 > +spectral-norm 3000 > +sum-file 5000 SUMCOL_5000 > diff --git a/perf/LuaJIT-benches/SUMCOL_1.txt b/perf/LuaJIT-benches/SUMCOL_1.txt > new file mode 100644 > index 00000000..956aba14 > --- /dev/null > +++ b/perf/LuaJIT-benches/SUMCOL_1.txt > @@ -0,0 +1,1000 @@ > +276 > +498 > +-981 > +770 > +-401 > +702 > +966 > +950 > +-853 > +-53 > +-293 > +604 > +288 > +892 > +-697 > +204 > +96 > +408 > +880 > +-7 > +-817 > +422 > +-261 > +-485 > +-77 > +826 > +184 > +864 > +-751 > +626 > +812 > +-369 > +-353 > +-371 > +488 > +-83 > +-659 > +24 > +524 > +-21 > +840 > +-757 > +-17 > +-973 > +-843 > +260 > +858 > +-389 > +-521 > +-99 > +482 > +-561 > +-213 > +630 > +766 > +932 > +112 > +-419 > +-877 > +762 > +266 > +-837 > +170 > +834 > +746 > +764 > +922 > +-89 > +576 > +-63 > +90 > +684 > +316 > +506 > +-959 > +708 > +70 > +252 > +-747 > +342 > +-593 > +-895 > +-937 > +-707 > +350 > +588 > +-201 > +-683 > +-113 > +-511 > +-867 > +322 > +202 > +472 > +150 > +-9 > +-643 > +28 > +336 > +86 > +-925 > +836 > +-473 > +-451 > +-971 > +-805 > +-619 > +84 > +-67 > +806 > +270 > +366 > +334 > +-555 > +-557 > +-331 > +-409 > +-553 > +-145 > +-71 > +528 > +490 > +492 > +828 > +628 > +-961 > +536 > +-859 > +-271 > +974 > +-671 > +-749 > +414 > +-257 > +778 > +56 > +598 > +-437 > +-899 > +-785 > +-987 > +32 > +-999 > +132 > +-821 > +-209 > +402 > +-543 > +194 > +-967 > +294 > +-943 > +-285 > +-483 > +-97 > +660 > +-481 > +-829 > +-309 > +-597 > +-855 > +80 > +-355 > +192 > +-823 > +436 > +916 > +282 > +-629 > +612 > +-329 > +-535 > +780 > +-47 > +706 > +110 > +756 > +-857 > +-933 > +-345 > +-523 > +718 > +-31 > +902 > +678 > +540 > +698 > +456 > +-399 > +126 > +412 > +-563 > +-321 > +-487 > +-641 > +-195 > +-199 > +-955 > +772 > +570 > +18 > +-217 > +886 > +984 > +-721 > +-995 > +46 > +-989 > +946 > +64 > +716 > +-719 > +-869 > +-579 > +776 > +450 > +936 > +980 > +-439 > +-977 > +-455 > +-997 > +6 > +268 > +-269 > +-421 > +328 > +352 > +578 > +-575 > +476 > +976 > +-57 > +-469 > +544 > +582 > +-43 > +510 > +-939 > +-581 > +-337 > +-203 > +-737 > +-827 > +852 > +-279 > +-803 > +-911 > +-865 > +548 > +48 > +-75 > +416 > +-275 > +688 > +-255 > +-687 > +-461 > +-233 > +420 > +912 > +-901 > +-299 > +12 > +568 > +694 > +-411 > +-883 > +-327 > +-361 > +-339 > +646 > +-137 > +-905 > +670 > +686 > +-131 > +-849 > +-825 > +256 > +228 > +-841 > +68 > +368 > +-909 > +242 > +298 > +118 > +10 > +222 > +954 > +-493 > +-459 > +-445 > +608 > +-765 > +34 > +468 > +-715 > +690 > +-185 > +-551 > +-571 > +-241 > +292 > +92 > +768 > +-923 > +956 > +614 > +8 > +730 > +208 > +-417 > +300 > +136 > +-59 > +-251 > +-539 > +166 > +798 > +866 > +454 > +-391 > +-317 > +668 > +502 > +-15 > +994 > +854 > +-189 > +666 > +446 > +-565 > +-5 > +42 > +-227 > +-87 > +-779 > +26 > +312 > +354 > +754 > +396 > +-515 > +220 > +872 > +654 > +88 > +-667 > +250 > +572 > +952 > +72 > +982 > +972 > +-529 > +-471 > +-533 > +-427 > +538 > +154 > +-457 > +-819 > +750 > +152 > +452 > +-41 > +838 > +-489 > +418 > +-649 > +-637 > +-197 > +74 > +394 > +-653 > +-727 > +-435 > +-23 > +348 > +638 > +-611 > +914 > +-357 > +-743 > +-685 > +580 > +-247 > +-577 > +54 > +-931 > +-3 > +558 > +-793 > +-443 > +-759 > +162 > +-811 > +384 > +720 > +-117 > +900 > +-519 > +-39 > +744 > +432 > +286 > +-873 > +380 > +-167 > +-283 > +430 > +-155 > +-755 > +206 > +100 > +364 > +-677 > +332 > +-567 > +382 > +-605 > +-181 > +676 > +-475 > +-845 > +910 > +546 > +14 > +398 > +616 > +-769 > +424 > +992 > +-235 > +-239 > +774 > +478 > +-919 > +168 > +-771 > +-773 > +-69 > +-509 > +930 > +550 > +-463 > +178 > +-861 > +-761 > +-795 > +234 > +-831 > +-61 > +-979 > +-851 > +-665 > +-709 > +896 > +742 > +-123 > +590 > +-693 > +-887 > +-379 > +144 > +-717 > +20 > +174 > +82 > +464 > +30 > +-969 > +-349 > +-531 > +-799 > +-661 > +-647 > +-623 > +878 > +148 > +-545 > +238 > +-259 > +554 > +726 > +-37 > +-797 > +98 > +78 > +-591 > +-975 > +962 > +120 > +906 > +-207 > +656 > +-171 > +652 > +188 > +672 > +-133 > +-91 > +224 > +818 > +-333 > +-839 > +-499 > +22 > +-739 > +142 > +378 > +-403 > +-315 > +370 > +284 > +122 > +230 > +-527 > +-127 > +442 > +534 > +160 > +722 > +262 > +-657 > +304 > +258 > +-103 > +960 > +-495 > +-265 > +634 > +-101 > +480 > +-363 > +308 > +76 > +-949 > +-585 > +904 > +146 > +-703 > +164 > +850 > +246 > +732 > +-725 > +566 > +274 > +-163 > +-935 > +-681 > +-229 > +254 > +-733 > +-547 > +-273 > +-903 > +736 > +-711 > +794 > +392 > +-655 > +-549 > +808 > +-429 > +484 > +-701 > +-617 > +804 > +36 > +-775 > +-335 > +-927 > +714 > +-177 > +-325 > +-413 > +-963 > +114 > +-253 > +-789 > +-645 > +40 > +434 > +898 > +924 > +-19 > +738 > +788 > +280 > +-121 > +594 > +-913 > +426 > +816 > +-373 > +-45 > +340 > +-109 > +-323 > +58 > +-249 > +940 > +-297 > +988 > +998 > +-607 > +-745 > +-633 > +-115 > +996 > +-893 > +696 > +400 > +848 > +500 > +-263 > +562 > +-807 > +-105 > +-603 > +658 > +-73 > +-863 > +448 > +680 > +-157 > +-161 > +728 > +814 > +-477 > +-375 > +1000 > +-631 > +-991 > +362 > +156 > +-187 > +-705 > +-917 > +-449 > +-741 > +556 > +440 > +-589 > +-11 > +-359 > +-891 > +-801 > +-153 > +-381 > +938 > +-173 > +-243 > +618 > +-599 > +-497 > +486 > +128 > +790 > +460 > +-27 > +-305 > +-205 > +-215 > +324 > +-341 > +50 > +458 > +52 > +-621 > +874 > +386 > +560 > +-569 > +-51 > +802 > +786 > +920 > +-425 > +466 > +444 > +-507 > +-915 > +346 > +622 > +-679 > +784 > +-689 > +388 > +508 > +-613 > +-313 > +-447 > +564 > +-897 > +-211 > +-225 > +-615 > +-367 > +186 > +894 > +-65 > +-453 > +-245 > +602 > +496 > +-651 > +-601 > +820 > +226 > +-695 > +-119 > +372 > +180 > +94 > +214 > +542 > +648 > +-871 > +592 > +584 > +824 > +796 > +374 > +-945 > +-311 > +516 > +942 > +-221 > +-433 > +200 > +-465 > +-953 > +870 > +868 > +-879 > +518 > +356 > +-223 > +682 > +990 > +-191 > +-541 > +-951 > +-921 > +-319 > +-169 > +-291 > +-289 > +792 > +876 > +306 > +-491 > +326 > +-885 > +62 > +514 > +-929 > +318 > +-231 > +632 > +44 > +-107 > +644 > +-267 > +-343 > +-847 > +934 > +734 > +-505 > +-351 > +574 > +-627 > +636 > +-93 > +-431 > +-835 > +428 > +-183 > +-151 > +2 > +-813 > +-595 > +958 > +-141 > +692 > +-385 > +610 > +-179 > +376 > +948 > +198 > +-675 > +964 > +-907 > +918 > +-165 > +-1 > +406 > +748 > +-111 > +532 > +-55 > +-281 > +740 > +504 > +236 > +-29 > +662 > +-713 > +-537 > +196 > +-587 > +822 > +-135 > +700 > +-35 > +674 > +-407 > +240 > +-673 > +-669 > +-393 > +470 > +-525 > +-875 > +-383 > +-625 > +296 > +-85 > +-147 > +-277 > +800 > +-691 > +-143 > +16 > +-983 > +-303 > +290 > +-139 > +172 > +320 > +512 > +596 > +640 > +664 > +-791 > +-783 > +-387 > +-735 > +-467 > +-301 > +810 > +134 > +216 > +278 > +176 > +606 > +140 > +-787 > +978 > +586 > +890 > +882 > +-753 > +-13 > +970 > +-941 > +-175 > +-777 > +-809 > +-441 > +-347 > +-377 > +390 > +-423 > +842 > +642 > +190 > +302 > +438 > +704 > +310 > +-49 > +124 > +-781 > +-287 > +724 > +-767 > +830 > +620 > +-295 > +244 > +-159 > +-307 > +-397 > +66 > +-237 > +314 > +-79 > +624 > +710 > +272 > +-365 > +928 > +856 > +138 > +-479 > +520 > +832 > +862 > +760 > +846 > +-81 > +106 > +-513 > +-193 > +650 > +782 > +-517 > +944 > +218 > +712 > +-663 > +-559 > +462 > +-635 > +-25 > +182 > +530 > +844 > +330 > +-833 > +102 > +-881 > +108 > +-947 > +-763 > +-405 > +232 > +410 > +104 > +-729 > +-149 > +-889 > +888 > +360 > +968 > +908 > +116 > +-815 > +-129 > +522 > +-723 > +-993 > +860 > +-503 > +926 > +-219 > +-415 > +60 > +158 > +-609 > +-501 > +986 > +-699 > +-583 > +884 > +212 > +210 > +-957 > +526 > +-985 > +552 > +344 > +-395 > +-95 > +338 > +248 > +494 > +130 > +404 > +358 > +600 > +-639 > +-125 > +-33 > +-965 > +752 > +474 > +-731 > +758 > +-573 > +4 > +38 > +264 > diff --git a/perf/LuaJIT-benches/TEST_md5sum.txt b/perf/LuaJIT-benches/TEST_md5sum.txt > new file mode 100644 > index 00000000..15aa8a1c > --- /dev/null > +++ b/perf/LuaJIT-benches/TEST_md5sum.txt > @@ -0,0 +1,20 @@ > +binarytrees 10 7202f4e13df7abc5ad8c07f05fe9d644 > +chameneos 1e5 a629ce12f63050c6656bce175258cf8f > +cheapconcr 1000 d29799d1e263810a4db7bbf43ca66499 > +cheapconcw 1000 d29799d1e263810a4db7bbf43ca66499 > +fannkuch 8 51e5e372cbc5471ea8940b20ad782319 > +fasta 1e5 78cd327de6f0a5667da0aa9349888279 > +knucleotide x 88efb24c1fed533959ed84bb32c88142 +mandelbrot 200 cc65e64bd553ed18896de1dfe7fae3e5 > +meteor 3000 9a65bb4b0a735ace1eaa4f2628f01026 > +nbody 1e4 e0361c898ba747117ec177f7b3b3359c > +nsieve 4 767e02c93624995732e151932fa5f304 > +nsievebits 4 767e02c93624995732e151932fa5f304 > +partialsums 1e5 33efb41c72f8ecfb5b36c99e32189a3f > +pidigits 200 173a11a77bb1e72dd31254a760317428 > +recursive 4 07a47c2d2cf50503b16efda789f84916 > +regexdna x fdf3e6e9c599754e1eec3e524ea13fed +revcomp x 47de276e2f72519b57b82da39f4c7592 +spectralnorm 200 25f44bd552ccd9faa0ee2ae5617947e2 > +sumfile x 2ebd3caa45b31a2e74e436b645eab4b0 + > diff --git a/perf/LuaJIT-benches/array3d.lua b/perf/LuaJIT-benches/array3d.lua > new file mode 100644 > index 00000000..c10b09b1 > --- /dev/null > +++ b/perf/LuaJIT-benches/array3d.lua > @@ -0,0 +1,59 @@ > + > +local function array_set(self, x, y, z, p) > + assert(x >= 0 and x < self.nx, "x outside PA") > + assert(y >= 0 and y < self.ny, "y outside PA") > + assert(z >= 0 and z < self.nz, "z outside PA") > + local pos = (z*self.ny + y)*self.nx + x > + local image = self.image > + if self.packed then > + local maxv = self.max_voltage > + if p > maxv then self.max_voltage = p*2.0 end > + local oldp = image[pos] or 0.0 -- Works with uninitialized table, too > + if oldp > maxv then p = p + maxv*2.0 end > + image[pos] = p > + else > + image[pos] = p > + end > + self.changed = true > + self.changed_recently = true > +end > + > +local function array_points(self) > + local y, z = 0, 0 > + return function(self, x) > + x = x + 1 > + if x >= self.nx then > + x = 0 > + y = y + 1 > + if y >= self.ny then > + y = 0 > + z = z + 1 > + if z >= self.nz then > + return nil, nil, nil > + end > + end > + end > + return x, y, z > + end, self, 0 > +end > + > +local function array_new(nx, ny, nz, packed) > + return { > + nx = nx, ny = ny, nz = nz, > + packed = packed, max_voltage = 0.0, > + changed = false, changed_recently = false, > + image = {}, -- Preferably use a fixed-type, pre-sized array here. > + set = array_set, > + points = array_points, > + } > +end > + > +local dim = tonumber(arg and arg[1]) or 300 -- Array dimension dim^3 > +local packed = arg and arg[2] == "packed" -- Packed image or flat > +local arr = array_new(dim, dim, dim, packed) > + > +for x,y,z inarr:points() do > +arr:set(x, y, z, x*x) > +end > +assert(arr.image[dim^3-1] == (dim-1)^2) > + > diff --git a/perf/LuaJIT-benches/binary-trees.lua b/perf/LuaJIT-benches/binary-trees.lua > new file mode 100644 > index 00000000..bf040466 > --- /dev/null > +++ b/perf/LuaJIT-benches/binary-trees.lua > @@ -0,0 +1,47 @@ > + > +local function BottomUpTree(item, depth) > + if depth > 0 then > + local i = item + item > + depth = depth - 1 > + local left, right = BottomUpTree(i-1, depth), BottomUpTree(i, depth) > + return { item, left, right } > + else > + return { item } > + end > +end > + > +local function ItemCheck(tree) > + if tree[2] then > + return tree[1] + ItemCheck(tree[2]) - ItemCheck(tree[3]) > + else > + return tree[1] > + end > +end > + > +local N = tonumber(arg and arg[1]) or 0 > +local mindepth = 4 > +local maxdepth = mindepth + 2 > +if maxdepth < N then maxdepth = N end > + > +do > + local stretchdepth = maxdepth + 1 > + local stretchtree = BottomUpTree(0, stretchdepth) > + io.write(string.format("stretch tree of depth %d\t check: %d\n", > + stretchdepth, ItemCheck(stretchtree))) > +end > + > +local longlivedtree = BottomUpTree(0, maxdepth) > + > +for depth=mindepth,maxdepth,2 do > + local iterations = 2 ^ (maxdepth - depth + mindepth) > + local check = 0 > + for i=1,iterations do > + check = check + ItemCheck(BottomUpTree(1, depth)) + > + ItemCheck(BottomUpTree(-1, depth)) > + end > + io.write(string.format("%d\t trees of depth %d\t check: %d\n", > + iterations*2, depth, check)) > +end > + > +io.write(string.format("long lived tree of depth %d\t check: %d\n", > + maxdepth, ItemCheck(longlivedtree))) > diff --git a/perf/LuaJIT-benches/chameneos.lua b/perf/LuaJIT-benches/chameneos.lua > new file mode 100644 > index 00000000..78b64c3f > --- /dev/null > +++ b/perf/LuaJIT-benches/chameneos.lua > @@ -0,0 +1,68 @@ > + > +local co = coroutine > +local create, resume, yield = co.create, co.resume, co.yield > + > +local N = tonumber(arg and arg[1]) or 10 > +local first, second > + > +-- Meet another creature. > +local function meet(me) > + while second do yield() end -- Wait until meeting place clears. > + local other = first > + if other then -- Hey, I found a new friend! > + first = nil > + second = me > + else -- Sniff, nobody here (yet). > + local n = N - 1 > + if n < 0 then return end -- Uh oh, the mall is closed. > + N = n > + first = me > + repeat yield(); other = second until other -- Wait for another creature. > + second = nil > + yield() -- Be nice and let others meet up. > + end > + return other > +end > + > +-- Create a very social creature. > +local function creature(color) > + return create(function() > + local me = color > + for met=0,1000000000 do > + local other = meet(me) > + if not other then return met end > + if me ~= other then > + if me == "blue" then me = other == "red" and "yellow" or "red" > + elseif me == "red" then me = other == "blue" and "yellow" or "blue" > + else me = other == "blue" and "red" or "blue" end > + end > + end > + end) > +end > + > +-- Trivial round-robin scheduler. > +local function schedule(threads) > + local resume = resume > + local nthreads, meetings = #threads, 0 > + repeat > + for i=1,nthreads do > + local thr = threads[i] > + if not thr then return meetings end > + local ok, met = resume(thr) > + if met then > + meetings = meetings + met > + threads[i] = nil > + end > + end > + until false > +end > + > +-- A bunch of colorful creatures. > +local threads = { > + creature("blue"), > + creature("red"), > + creature("yellow"), > + creature("blue"), > +} > + > +io.write(schedule(threads), "\n") > diff --git a/perf/LuaJIT-benches/coroutine-ring.lua b/perf/LuaJIT-benches/coroutine-ring.lua > new file mode 100644 > index 00000000..1e8c5ef6 > --- /dev/null > +++ b/perf/LuaJIT-benches/coroutine-ring.lua > @@ -0,0 +1,42 @@ > +-- The Computer Language Benchmarks Game > +--http://shootout.alioth.debian.org/ > +-- contributed by Sam Roberts > +-- reviewed by Bruno Massa > + > +local n = tonumber(arg and arg[1]) or 2e7 > + > +-- fixed size pool > +local poolsize = 503 > +local threads = {} > + > +-- cache these to avoid global environment lookups > +local create = coroutine.create > +local resume = coroutine.resume > +local yield = coroutine.yield > + > +local id = 1 > +local token = 0 > +local ok > + > +local body = function(token) > + while true do > + token = yield(token + 1) > + end > +end > + > +-- create all threads > +for id = 1, poolsize do > + threads[id] = create(body) > +end > + > +-- send the token > +repeat > + if id == poolsize then > + id = 1 > + else > + id = id + 1 > + end > + ok, token = resume(threads[id], token) > +until token == n > + > +io.write(id, "\n") > diff --git a/perf/LuaJIT-benches/euler14-bit.lua b/perf/LuaJIT-benches/euler14-bit.lua > new file mode 100644 > index 00000000..537f2bf3 > --- /dev/null > +++ b/perf/LuaJIT-benches/euler14-bit.lua > @@ -0,0 +1,22 @@ > + > +local bit = require("bit") > +local bnot, bor, band = bit.bnot, bit.bor, bit.band > +local shl, shr = bit.lshift, bit.rshift > + > +local N = tonumber(arg and arg[1]) or 10000000 > +local cache, m, n = { 1 }, 1, 1 > +if arg and arg[2] then cache = nil end > +for i=2,N do > + local j = i > + for len=1,1000000000 do > + j = bor(band(shr(j,1), band(j,1)-1), band(shl(j,1)+j+1, bnot(band(j,1)-1))) > + if cache then > + local x = cache[j]; if x then j = x+len; break end > + elseif j == 1 then > + j = len+1; break > + end > + end > + if cache then cache[i] = j end > + if j > m then m, n = j, i end > +end > +io.write("Found ", n, " (chain length: ", m, ")\n") > diff --git a/perf/LuaJIT-benches/fannkuch.lua b/perf/LuaJIT-benches/fannkuch.lua > new file mode 100644 > index 00000000..2a4cd426 > --- /dev/null > +++ b/perf/LuaJIT-benches/fannkuch.lua > @@ -0,0 +1,50 @@ > + > +local function fannkuch(n) > + local p, q, s, odd, check, maxflips = {}, {}, {}, true, 0, 0 > + for i=1,n do p[i] = i; q[i] = i; s[i] = i end > + repeat > + -- Print max. 30 permutations. > + if check < 30 then > + if not p[n] then return maxflips end -- Catch n = 0, 1, 2. > + io.write(unpack(p)); io.write("\n") > + check = check + 1 > + end > + -- Copy and flip. > + local q1 = p[1] -- Cache 1st element. > + if p[n] ~= n and q1 ~= 1 then -- Avoid useless work. > + for i=2,n do q[i] = p[i] end -- Work on a copy. > + local flips = 1 -- Flip ... > + while true do > + local qq = q[q1] > + if qq == 1 then -- ... until 1st element is 1. > + if flips > maxflips then maxflips = flips end -- New maximum? > + break > + end > + q[q1] = q1 > + if q1 >= 4 then > + local i, j = 2, q1 - 1 > + repeat q[i], q[j] = q[j], q[i]; i = i + 1; j = j - 1; until i >= j > + end > + q1 = qq > + flips=flips+1 > + end > + end > + -- Permute. > + if odd then > + p[2], p[1] = p[1], p[2]; odd = false -- Rotate 1<-2. > + else > + p[2], p[3] = p[3], p[2]; odd = true -- Rotate 1<-2 and 1<-2<-3. > + for i=3,n do > + local sx = s[i] > + if sx ~= 1 then s[i] = sx-1; break end > + if i == n then return maxflips end -- Out of permutations. > + s[i] = i > + -- Rotate 1<-...<-i+1. > + local t=p[1]; for j=i+1,1,-1 do p[j],t=t,p[j] end > + end > + end > + until false > +end > + > +local n = tonumber(arg and arg[1]) or 1 > +io.write("Pfannkuchen(", n, ") = ", fannkuch(n), "\n") > diff --git a/perf/LuaJIT-benches/fasta.lua b/perf/LuaJIT-benches/fasta.lua > new file mode 100644 > index 00000000..7ce60804 > --- /dev/null > +++ b/perf/LuaJIT-benches/fasta.lua > @@ -0,0 +1,95 @@ > + > +local Last = 42 > +local function random(max) > + local y = (Last * 3877 + 29573) % 139968 > + Last = y > + return (max * y) / 139968 > +end > + > +local function make_repeat_fasta(id, desc, s, n) > + local write, sub = io.write, string.sub > + write(">", id, " ", desc, "\n") > + local p, sn, s2 = 1, #s, s..s > + for i=60,n,60 do > + write(sub(s2, p, p + 59), "\n") > + p = p + 60; if p > sn then p = p - sn end > + end > + local tail = n % 60 > + if tail > 0 then write(sub(s2, p, p + tail-1), "\n") end > +end > + > +local function make_random_fasta(id, desc, bs, n) > + io.write(">", id, " ", desc, "\n") > + loadstring([=[ > + local write, char, unpack, n, random = io.write, string.char, unpack, ... > + local buf, p = {}, 1 > + for i=60,n,60 do > + for j=p,p+59 do ]=]..bs..[=[ end > + buf[p+60] = 10; p = p + 61 > + if p >= 2048 then write(char(unpack(buf, 1, p-1))); p = 1 end > + end > + local tail = n % 60 > + if tail > 0 then > + for j=p,p+tail-1 do ]=]..bs..[=[ end > + p = p + tail; buf[p] = 10; p = p + 1 > + end > + write(char(unpack(buf, 1, p-1))) > + ]=], desc)(n, random) > +end > + > +local function bisect(c, p, lo, hi) > + local n = hi - lo > + if n == 0 then return "buf[j] = "..c[hi].."\n" end > + local mid = math.floor(n / 2) > + return "if r < "..p[lo+mid].." then\n"..bisect(c, p, lo, lo+mid).. > + "else\n"..bisect(c, p, lo+mid+1, hi).."end\n" > +end > + > +local function make_bisect(tab) > + local c, p, sum = {}, {}, 0 > + for i,row in ipairs(tab) do > + c[i] = string.byte(row[1]) > + sum = sum + row[2] > + p[i] = sum > + end > + return "local r = random(1)\n"..bisect(c, p, 1, #tab) > +end > + > +local alu = > + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG".. > + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA".. > + "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT".. > + "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA".. > + "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG".. > + "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC".. > + "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA" > + > +local iub = make_bisect{ > + { "a", 0.27 }, > + { "c", 0.12 }, > + { "g", 0.12 }, > + { "t", 0.27 }, > + { "B", 0.02 }, > + { "D", 0.02 }, > + { "H", 0.02 }, > + { "K", 0.02 }, > + { "M", 0.02 }, > + { "N", 0.02 }, > + { "R", 0.02 }, > + { "S", 0.02 }, > + { "V", 0.02 }, > + { "W", 0.02 }, > + { "Y", 0.02 }, > +} > + > +local homosapiens = make_bisect{ > + { "a", 0.3029549426680 }, > + { "c", 0.1979883004921 }, > + { "g", 0.1975473066391 }, > + { "t", 0.3015094502008 }, > +} > + > +local N = tonumber(arg and arg[1]) or 1000 > +make_repeat_fasta('ONE', 'Homo sapiens alu', alu, N*2) > +make_random_fasta('TWO', 'IUB ambiguity codes', iub, N*3) > +make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, N*5) > diff --git a/perf/LuaJIT-benches/k-nucleotide.lua b/perf/LuaJIT-benches/k-nucleotide.lua > new file mode 100644 > index 00000000..0bfb41be > --- /dev/null > +++ b/perf/LuaJIT-benches/k-nucleotide.lua > @@ -0,0 +1,58 @@ > + > +local function kfrequency(seq, freq, k, frame) > + local sub = string.sub > + local k1 = k - 1 > + for i=frame,#seq-k1,k do > + local c = sub(seq, i, i+k1) > + freq[c] = (freq[c] or 0) + 1 > + end > +end > + > +local function count(seq, frag) > + local k = #frag > + local freq = {} > + for frame=1,k do kfrequency(seq, freq, k, frame) end > + io.write(freq[frag] or 0, "\t", frag, "\n") > +end > + > +local function frequency(seq, k) > + local freq = {} > + for frame=1,k do kfrequency(seq, freq, k, frame) end > + local sfreq, sn, sum = {}, 1, 0 > + for c,v in pairs(freq) do sfreq[sn] = c; sn = sn + 1; sum = sum + v end > + table.sort(sfreq, function(a, b) > + local fa, fb = freq[a], freq[b] > + return fa == fb and a > b or fa > fb > + end) > + for _,c in ipairs(sfreq) do > + io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum)) > + end > + io.write("\n") > +end > + > +local function readseq() > + local sub = string.sub > + for line in io.lines() do > + if sub(line, 1, 1) == ">" and sub(line, 2, 6) == "THREE" then break end > + end > + local lines, ln = {}, 0 > + for line in io.lines() do > + local c = sub(line, 1, 1) > + if c == ">" then > + break > + elseif c ~= ";" then > + ln = ln + 1 > + lines[ln] = line > + end > + end > + return string.upper(table.concat(lines, "", 1, ln)) > +end > + > +local seq = readseq() > +frequency(seq, 1) > +frequency(seq, 2) > +count(seq, "GGT") > +count(seq, "GGTA") > +count(seq, "GGTATT") > +count(seq, "GGTATTTTAATT") > +count(seq, "GGTATTTTAATTTATAGT") > diff --git a/perf/LuaJIT-benches/life.lua b/perf/LuaJIT-benches/life.lua > new file mode 100644 > index 00000000..911d9fe1 > --- /dev/null > +++ b/perf/LuaJIT-benches/life.lua > @@ -0,0 +1,111 @@ > +-- life.lua > +-- original by Dave Bollinger posted to lua-l > +-- modified to use ANSI terminal escape sequences > +-- modified to use for instead of while > + > +local write=io.write > + > +ALIVE="�" DEAD="�" > +ALIVE="O" DEAD="-" > + > +function delay() -- NOTE: SYSTEM-DEPENDENT, adjust as necessary > + for i=1,10000 do end > + -- local i=os.clock()+1 while(os.clock() +end > + > +function ARRAY2D(w,h) > + local t = {w=w,h=h} > + for y=1,h do > + t[y] = {} > + for x=1,w do > + t[y][x]=0 > + end > + end > + return t > +end > + > +_CELLS = {} > + > +-- give birth to a "shape" within the cell array > +function _CELLS:spawn(shape,left,top) > + for y=0,shape.h-1 do > + for x=0,shape.w-1 do > + self[top+y][left+x] = shape[y*shape.w+x+1] > + end > + end > +end > + > +-- run the CA and produce the next generation > +function _CELLS:evolve(next) > + local ym1,y,yp1,yi=self.h-1,self.h,1,self.h > + while yi > 0 do > + local xm1,x,xp1,xi=self.w-1,self.w,1,self.w > + while xi > 0 do > + local sum = self[ym1][xm1] + self[ym1][x] + self[ym1][xp1] + > + self[y][xm1] + self[y][xp1] + > + self[yp1][xm1] + self[yp1][x] + self[yp1][xp1] > + next[y][x] = ((sum==2) and self[y][x]) or ((sum==3) and 1) or 0 > + xm1,x,xp1,xi = x,xp1,xp1+1,xi-1 > + end > + ym1,y,yp1,yi = y,yp1,yp1+1,yi-1 > + end > +end > + > +-- output the array to screen > +function _CELLS:draw() > + local out="" -- accumulate to reduce flicker > + for y=1,self.h do > + for x=1,self.w do > + out=out..(((self[y][x]>0) and ALIVE) or DEAD) > + end > + out=out.."\n" > + end > + write(out) > +end > + > +-- constructor > +function CELLS(w,h) > + local c = ARRAY2D(w,h) > + c.spawn = _CELLS.spawn > + c.evolve = _CELLS.evolve > + c.draw = _CELLS.draw > + return c > +end > + > +-- > +-- shapes suitable for use with spawn() above > +-- > +HEART = { 1,0,1,1,0,1,1,1,1; w=3,h=3 } > +GLIDER = { 0,0,1,1,0,1,0,1,1; w=3,h=3 } > +EXPLODE = { 0,1,0,1,1,1,1,0,1,0,1,0; w=3,h=4 } > +FISH = { 0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,1,0; w=5,h=4 } > +BUTTERFLY = { 1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1; w=5,h=5 } > + > +-- the main routine > +function LIFE(w,h) > + -- create two arrays > + local thisgen = CELLS(w,h) > + local nextgen = CELLS(w,h) > + > + -- create some life > + -- about 1000 generations of fun, then a glider steady-state > +thisgen:spawn(GLIDER,5,4) > +thisgen:spawn(EXPLODE,25,10) > +thisgen:spawn(FISH,4,12) > + > + -- run until break > + local gen=1 > + write("\027[2J") -- ANSI clear screen > + while 1 do > +thisgen:evolve(nextgen) > + thisgen,nextgen = nextgen,thisgen > + write("\027[H") -- ANSI home cursor > +thisgen:draw() > + write("Life - generation ",gen,"\n") > + gen=gen+1 > + if gen>2000 then break end > + --delay() -- no delay > + end > +end > + > +LIFE(40,20) > diff --git a/perf/LuaJIT-benches/mandelbrot-bit.lua b/perf/LuaJIT-benches/mandelbrot-bit.lua > new file mode 100644 > index 00000000..91d96975 > --- /dev/null > +++ b/perf/LuaJIT-benches/mandelbrot-bit.lua > @@ -0,0 +1,33 @@ > + > +local bit = require("bit") > +local bor, band = bit.bor, bit.band > +local shl, shr, rol = bit.lshift, bit.rshift, bit.rol > +local write, char, unpack = io.write, string.char, unpack > +local N = tonumber(arg and arg[1]) or 100 > +local M, buf = 2/N, {} > +write("P4\n", N, " ", N, "\n") > +for y=0,N-1 do > + local Ci, b, p = y*M-1, -16777216, 0 > + local Ciq = Ci*Ci > + for x=0,N-1,2 do > + local Cr, Cr2 = x*M-1.5, (x+1)*M-1.5 > + local Zr, Zi, Zrq, Ziq = Cr, Ci, Cr*Cr, Ciq > + local Zr2, Zi2, Zrq2, Ziq2 = Cr2, Ci, Cr2*Cr2, Ciq > + b = rol(b, 2) > + for i=1,49 do > + Zi = Zr*Zi*2 + Ci; Zi2 = Zr2*Zi2*2 + Ci > + Zr = Zrq-Ziq + Cr; Zr2 = Zrq2-Ziq2 + Cr2 > + Ziq = Zi*Zi; Ziq2 = Zi2*Zi2 > + Zrq = Zr*Zr; Zrq2 = Zr2*Zr2 > + if band(b, 2) ~= 0 and Zrq+Ziq > 4.0 then b = band(b, -3) end > + if band(b, 1) ~= 0 and Zrq2+Ziq2 > 4.0 then b = band(b, -2) end > + if band(b, 3) == 0 then break end > + end > + if b >= 0 then p = p + 1; buf[p] = b; b = -16777216; end > + end > + if b ~= -16777216 then > + if band(N, 1) ~= 0 then b = shr(b, 1) end > + p = p + 1; buf[p] = shl(b, 8-band(N, 7)) > + end > + write(char(unpack(buf, 1, p))) > +end > diff --git a/perf/LuaJIT-benches/mandelbrot.lua b/perf/LuaJIT-benches/mandelbrot.lua > new file mode 100644 > index 00000000..0ef595a2 > --- /dev/null > +++ b/perf/LuaJIT-benches/mandelbrot.lua > @@ -0,0 +1,23 @@ > + > +local write, char, unpack = io.write, string.char, unpack > +local N = tonumber(arg and arg[1]) or 100 > +local M, ba, bb, buf = 2/N, 2^(N%8+1)-1, 2^(8-N%8), {} > +write("P4\n", N, " ", N, "\n") > +for y=0,N-1 do > + local Ci, b, p = y*M-1, 1, 0 > + for x=0,N-1 do > + local Cr = x*M-1.5 > + local Zr, Zi, Zrq, Ziq = Cr, Ci, Cr*Cr, Ci*Ci > + b = b + b > + for i=1,49 do > + Zi = Zr*Zi*2 + Ci > + Zr = Zrq-Ziq + Cr > + Ziq = Zi*Zi > + Zrq = Zr*Zr > + if Zrq+Ziq > 4.0 then b = b + 1; break; end > + end > + if b >= 256 then p = p + 1; buf[p] = 511 - b; b = 1; end > + end > + if b ~= 1 then p = p + 1; buf[p] = (ba-b)*bb; end > + write(char(unpack(buf, 1, p))) > +end > diff --git a/perf/LuaJIT-benches/md5.lua b/perf/LuaJIT-benches/md5.lua > new file mode 100644 > index 00000000..fdf6b4a7 > --- /dev/null > +++ b/perf/LuaJIT-benches/md5.lua > @@ -0,0 +1,183 @@ > + > +local bit = require("bit") > +local tobit, tohex, bnot = bit.tobit or bit.cast, bit.tohex, bit.bnot > +local bor, band, bxor = bit.bor, bit.band, bit.bxor > +local lshift, rshift, rol, bswap = bit.lshift, bit.rshift, bit.rol, bit.bswap > +local byte, char, sub, rep = string.byte, string.char, string.sub, string.rep > + > +if not rol then -- Replacement function if rotates are missing. > + local bor, shl, shr = bit.bor, bit.lshift, bit.rshift > + function rol(a, b) return bor(shl(a, b), shr(a, 32-b)) end > +end > + > +if not bswap then -- Replacement function if bswap is missing. > + local bor, band, shl, shr = bit.bor, bit.band, bit.lshift, bit.rshift > + function bswap(a) > + return bor(shr(a, 24), band(shr(a, 8), 0xff00), > + shl(band(a, 0xff00), 8), shl(a, 24)); > + end > +end > + > +if not tohex then -- (Unreliable) replacement function if tohex is missing. > + function tohex(a) > + return string.sub(string.format("%08x", a), -8) > + end > +end > + > +local function tr_f(a, b, c, d, x, s) > + return rol(bxor(d, band(b, bxor(c, d))) + a + x, s) + b > +end > + > +local function tr_g(a, b, c, d, x, s) > + return rol(bxor(c, band(d, bxor(b, c))) + a + x, s) + b > +end > + > +local function tr_h(a, b, c, d, x, s) > + return rol(bxor(b, c, d) + a + x, s) + b > +end > + > +local function tr_i(a, b, c, d, x, s) > + return rol(bxor(c, bor(b, bnot(d))) + a + x, s) + b > +end > + > +local function transform(x, a1, b1, c1, d1) > + local a, b, c, d = a1, b1, c1, d1 > + > + a = tr_f(a, b, c, d, x[ 1] + 0xd76aa478, 7) > + d = tr_f(d, a, b, c, x[ 2] + 0xe8c7b756, 12) > + c = tr_f(c, d, a, b, x[ 3] + 0x242070db, 17) > + b = tr_f(b, c, d, a, x[ 4] + 0xc1bdceee, 22) > + a = tr_f(a, b, c, d, x[ 5] + 0xf57c0faf, 7) > + d = tr_f(d, a, b, c, x[ 6] + 0x4787c62a, 12) > + c = tr_f(c, d, a, b, x[ 7] + 0xa8304613, 17) > + b = tr_f(b, c, d, a, x[ 8] + 0xfd469501, 22) > + a = tr_f(a, b, c, d, x[ 9] + 0x698098d8, 7) > + d = tr_f(d, a, b, c, x[10] + 0x8b44f7af, 12) > + c = tr_f(c, d, a, b, x[11] + 0xffff5bb1, 17) > + b = tr_f(b, c, d, a, x[12] + 0x895cd7be, 22) > + a = tr_f(a, b, c, d, x[13] + 0x6b901122, 7) > + d = tr_f(d, a, b, c, x[14] + 0xfd987193, 12) > + c = tr_f(c, d, a, b, x[15] + 0xa679438e, 17) > + b = tr_f(b, c, d, a, x[16] + 0x49b40821, 22) > + > + a = tr_g(a, b, c, d, x[ 2] + 0xf61e2562, 5) > + d = tr_g(d, a, b, c, x[ 7] + 0xc040b340, 9) > + c = tr_g(c, d, a, b, x[12] + 0x265e5a51, 14) > + b = tr_g(b, c, d, a, x[ 1] + 0xe9b6c7aa, 20) > + a = tr_g(a, b, c, d, x[ 6] + 0xd62f105d, 5) > + d = tr_g(d, a, b, c, x[11] + 0x02441453, 9) > + c = tr_g(c, d, a, b, x[16] + 0xd8a1e681, 14) > + b = tr_g(b, c, d, a, x[ 5] + 0xe7d3fbc8, 20) > + a = tr_g(a, b, c, d, x[10] + 0x21e1cde6, 5) > + d = tr_g(d, a, b, c, x[15] + 0xc33707d6, 9) > + c = tr_g(c, d, a, b, x[ 4] + 0xf4d50d87, 14) > + b = tr_g(b, c, d, a, x[ 9] + 0x455a14ed, 20) > + a = tr_g(a, b, c, d, x[14] + 0xa9e3e905, 5) > + d = tr_g(d, a, b, c, x[ 3] + 0xfcefa3f8, 9) > + c = tr_g(c, d, a, b, x[ 8] + 0x676f02d9, 14) > + b = tr_g(b, c, d, a, x[13] + 0x8d2a4c8a, 20) > + > + a = tr_h(a, b, c, d, x[ 6] + 0xfffa3942, 4) > + d = tr_h(d, a, b, c, x[ 9] + 0x8771f681, 11) > + c = tr_h(c, d, a, b, x[12] + 0x6d9d6122, 16) > + b = tr_h(b, c, d, a, x[15] + 0xfde5380c, 23) > + a = tr_h(a, b, c, d, x[ 2] + 0xa4beea44, 4) > + d = tr_h(d, a, b, c, x[ 5] + 0x4bdecfa9, 11) > + c = tr_h(c, d, a, b, x[ 8] + 0xf6bb4b60, 16) > + b = tr_h(b, c, d, a, x[11] + 0xbebfbc70, 23) > + a = tr_h(a, b, c, d, x[14] + 0x289b7ec6, 4) > + d = tr_h(d, a, b, c, x[ 1] + 0xeaa127fa, 11) > + c = tr_h(c, d, a, b, x[ 4] + 0xd4ef3085, 16) > + b = tr_h(b, c, d, a, x[ 7] + 0x04881d05, 23) > + a = tr_h(a, b, c, d, x[10] + 0xd9d4d039, 4) > + d = tr_h(d, a, b, c, x[13] + 0xe6db99e5, 11) > + c = tr_h(c, d, a, b, x[16] + 0x1fa27cf8, 16) > + b = tr_h(b, c, d, a, x[ 3] + 0xc4ac5665, 23) > + > + a = tr_i(a, b, c, d, x[ 1] + 0xf4292244, 6) > + d = tr_i(d, a, b, c, x[ 8] + 0x432aff97, 10) > + c = tr_i(c, d, a, b, x[15] + 0xab9423a7, 15) > + b = tr_i(b, c, d, a, x[ 6] + 0xfc93a039, 21) > + a = tr_i(a, b, c, d, x[13] + 0x655b59c3, 6) > + d = tr_i(d, a, b, c, x[ 4] + 0x8f0ccc92, 10) > + c = tr_i(c, d, a, b, x[11] + 0xffeff47d, 15) > + b = tr_i(b, c, d, a, x[ 2] + 0x85845dd1, 21) > + a = tr_i(a, b, c, d, x[ 9] + 0x6fa87e4f, 6) > + d = tr_i(d, a, b, c, x[16] + 0xfe2ce6e0, 10) > + c = tr_i(c, d, a, b, x[ 7] + 0xa3014314, 15) > + b = tr_i(b, c, d, a, x[14] + 0x4e0811a1, 21) > + a = tr_i(a, b, c, d, x[ 5] + 0xf7537e82, 6) > + d = tr_i(d, a, b, c, x[12] + 0xbd3af235, 10) > + c = tr_i(c, d, a, b, x[ 3] + 0x2ad7d2bb, 15) > + b = tr_i(b, c, d, a, x[10] + 0xeb86d391, 21) > + > + return tobit(a+a1), tobit(b+b1), tobit(c+c1), tobit(d+d1) > +end > + > +-- Note: this is copying the original string and NOT particularly fast. > +-- A library for struct unpacking would make this task much easier. > +local function md5(msg) > + local len = #msg > + msg = msg.."\128"..rep("\0", 63 - band(len + 8, 63)) > + ..char(band(lshift(len, 3), 255), band(rshift(len, 5), 255), > + band(rshift(len, 13), 255), band(rshift(len, 21), 255)) > + .."\0\0\0\0" > + local a, b, c, d = 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 > + local x, k = {}, 1 > + for i=1,#msg,4 do > + local m0, m1, m2, m3 = byte(msg, i, i+3) > + x[k] = bor(m0, lshift(m1, 8), lshift(m2, 16), lshift(m3, 24)) > + if k == 16 then > + a, b, c, d = transform(x, a, b, c, d) > + k = 1 > + else > + k = k + 1 > + end > + end > + return tohex(bswap(a))..tohex(bswap(b))..tohex(bswap(c))..tohex(bswap(d)) > +end > + > +assert(md5('') == 'd41d8cd98f00b204e9800998ecf8427e') > +assert(md5('a') == '0cc175b9c0f1b6a831c399e269772661') > +assert(md5('abc') == '900150983cd24fb0d6963f7d28e17f72') > +assert(md5('message digest') == 'f96b697d7cb7938d525a2f31aaf161d0') > +assert(md5('abcdefghijklmnopqrstuvwxyz') == 'c3fcd3d76192e4007dfb496cca67e13b') > +assert(md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789') == > + 'd174ab98d277d9f5a5611c2c9f419d9f') > +assert(md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890') == > + '57edf4a22be3c955ac49da2e2107b67a') > + > +local N = tonumber(arg and arg[1]) or 10000 > + > + -- Credits: William Shakespeare, Romeo and Juliet > +local txt = [[Rebellious subjects, enemies to peace, > +Profaners of this neighbour-stained steel,-- > +Will they not hear? What, ho! you men, you beasts, > +That quench the fire of your pernicious rage > +With purple fountains issuing from your veins, > +On pain of torture, from those bloody hands > +Throw your mistemper'd weapons to the ground, > +And hear the sentence of your moved prince. > +Three civil brawls, bred of an airy word, > +By thee, old Capulet, and Montague, > +Have thrice disturb'd the quiet of our streets, > +And made Verona's ancient citizens > +Cast by their grave beseeming ornaments, > +To wield old partisans, in hands as old, > +Canker'd with peace, to part your canker'd hate: > +If ever you disturb our streets again, > +Your lives shall pay the forfeit of the peace. > +For this time, all the rest depart away: > +You Capulet; shall go along with me: > +And, Montague, come you this afternoon, > +To know our further pleasure in this case, > +To old Free-town, our common judgment-place. > +Once more, on pain of death, all men depart.]] > + txt = txt..txt..txt..txt > + txt = txt..txt..txt..txt > + > +for i=1,N do > + res = md5(txt) > +end > +assert(res == 'a831e91e0f70eddcb70dc61c6f82f6cd') > + > diff --git a/perf/LuaJIT-benches/meteor.lua b/perf/LuaJIT-benches/meteor.lua > new file mode 100644 > index 00000000..80588ab5 > --- /dev/null > +++ b/perf/LuaJIT-benches/meteor.lua > @@ -0,0 +1,220 @@ > + > +-- Generate a decision tree based solver for the meteor puzzle. > +local function generatesolver(countinit) > + local pairs, ipairs, format = pairs, ipairs, string.format > + local byte, min, sort = string.byte, math.min, table.sort > + > + -- Cached position to distance lookup. > + local dist = setmetatable({}, { __index = function(t, xy) > + local x = xy%10; local y = (xy-x)/10 > + if (x+y)%2 == 1 then y = y + 1; x = 10 - x end > + local d = xy + 256*x*x + 1024*y*y; t[xy] = d; return d > + end}) > + > + -- Lookup table to validate a cell and to find its successor. > + local ok = {} > + for i=0,150 do ok[i] = false end > + for i=99,0,-1 do > + local x = i%10 > + if ((i-x)/10+x)%2 == 0 then > + ok[i] = i + (ok[i+1] and 1 or (ok[i+2] and 2 or 3)) > + end > + end > + > + -- Temporary board state for the island checks. > + local islands, slide = {}, {20,22,24,26,28,31,33,35,37,39} > + local bbc, bb = 0, {} > + for i=0,19 do bb[i] = false; bb[i+80] = false end > + for i=20,79 do bb[i] = ok[i] end > + > + -- Recursive flood fill algorithm. > + local function fill(bb, p) > + bbc = bbc + 1 > + local n = p+2; if bb[n] then bb[n] = false; fill(bb, n) end > + n = p-2; if bb[n] then bb[n] = false; fill(bb, n) end > + n = p-9; if bb[n] then bb[n] = false; fill(bb, n) end > + n = p-11; if bb[n] then bb[n] = false; fill(bb, n) end > + n = p+9; if bb[n] then bb[n] = false; fill(bb, n) end > + n = p+11; if bb[n] then bb[n] = false; fill(bb, n) end > + end > + > + -- Generate pruned, sliding decision trees. > + local dtrees = {{}, {}, {}, {}, {}, {}, {}, {}, {}, {}} > + local rot = { nil, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} } > + for k=0,9 do > + -- Generate 10 initial pieces from line noise. :-) > + local t = { 60, 62, byte("@BMBIK@KT@GPIKR@IKIKT@GK@KM@BG", k*3+1, k*3+3) } > + rot[1] = t > + for i,xy in ipairs(t) do > + local x = xy%10; local y = (xy-x-60)/10 > + -- Add 11 more variations by rotating and flipping. > + for j=2,12 do > + if j == 7 then y = -y else x,y = (x+3*y)/2, (y-x)/2 end > + rot[j][i] = x+10*y > + end > + end > + for r,v in ipairs(rot) do > + -- Exploit symmetry and leave out half of the orientations of one piece. > + -- The selected piece gives the best reduction of the solution space. > + if k ~= 3 or r%2 == 0 then > + -- Normalize to origin, add distance, sort by distance from origin. > + local m = min(v[1], v[2], v[3], v[4], v[5]) > + for i=1,5 do v[i] = dist[v[i]-m] end > + sort(v) > + local v2, v3, v4, v5 = v[2]%256, v[3]%256, v[4]%256, v[5]%256 > + -- Slide the piece across 2 rows, prune the tree, check for islands. > + for j,p in ipairs(slide) do > + bb[p] = false > + if ok[p+v2] and ok[p+v3] and ok[p+v4] and ok[p+v5] then -- Prune. > + for i=p+1,79 do bb[i] = ok[i] end -- Clear remaining board. > + bb[p+v2] = false; bb[p+v3] = false -- Add piece. > + bb[p+v4] = false; bb[p+v5] = false > + bbc = j -- Flood fill and count the filled positions. > + if bb[71] then bb[71] = false; fill(bb, 71) end -- Lower left. > + if bb[79] then bb[79] = false; fill(bb, 79) end -- Lower right. > + local di = 0 > + if bbc < 22 then bbc = 26 > + elseif bbc < 26 then -- Island found, locate it, fill from above. > + for i=p+2,79 do if bb[i] then di = i-p; break end end > + for i=p-9,p-1 do if ok[i] then fill(bb, i) bbc = bbc - 1 end end > + end > + if bbc == 26 then -- Prune boards with static islands. > + local tb = dtrees[j] -- Build decision tree in distance order. > + local ta = tb[v2]; if not ta then ta = {}; tb[v2] = ta end > + tb = ta[v3]; if not tb then tb = {}; ta[v3] = tb end > + ta = tb[v4]; if not ta then ta = {}; tb[v4] = ta; islands[ta] = di > + elseif islands[ta] ~= di then islands[ta] = 0 end > + ta[v5] = di*10+k -- Leaves hold island check and piece number. > + end > + end > + end > + end > + end > + end > + > + local s = "local u0,u1,u2,u3,u4,u5,u6,u7,u8,u9" -- Piece use flags. > + for p=0,99 do if ok[p] then s = s..",b"..p end end -- Board cells. > + s = s.."\n"..[[ > +local countinit = ... > +local count = countinit > +local bmin, bmax, pcs = 9, 0, {} > +local smin, smax > +local write, reverse = io.write, string.reverse > + > +-- Print min/max boards. > +local function printboard(s) > + local flip = true > + for x in string.gmatch(string.gsub(s, ".", "%1 "), "..........") do > + write(x, flip and "\n " or "\n") > + flip = not flip > + end > + write("\n") > +end > + > +-- Print result. > +local function printresult() > + write(countinit-count, " solutions found\n\n") > + printboard(smin) > + printboard(smax) > +end > + > +-- Generate piece lookup array from the order of use. > +local function genp() > + local p = pcs > + p[u0] = "0" p[u1] = "1" p[u2] = "2" p[u3] = "3" p[u4] = "4" > + p[u5] = "5" p[u6] = "6" p[u7] = "7" p[u8] = "8" p[u9] = "9" > + return p > +end > + > +-- Goal function. > +local function f91(k) > + if k ~= 10 then return end > + count = count - 2 -- Need to count the symmetric solution, too. > + repeat > + -- Quick precheck before constructing the string. > + local b0, b99 = b0, b99 > + if b0 <= bmin then bmin = b0 elseif b0 >= bmax then bmax = b0 > + elseif b99 <= bmin then bmin = b99 elseif b99 >= bmax then bmax = b99 > + else break end > + -- Translate the filled board to a string. > + local p = genp() > + local s = p[b0] ]] > + for p=2,99 do if ok[p] then s = s.."..p[b"..p.."]" end end > + s = s..[[ > + -- Remember min/max boards, dito for the symmetric board. > + if not smin then smin = s; smax = s > + elseif s < smin then smin = s elseif s > smax then smax = s end > + s = reverse(s) > + if s < smin then smin = s elseif s > smax then smax = s end > + until true > + if count <= 0 then error() end -- Early abort if max count given. > +end > +local f93 = f91 > +]] > + > + -- Recursively convert the decision tree to Lua code. > + local function codetree(tree, d, p, pn) > + local found, s = false, "" > + d = d + 1 > + for a,t in pairs(tree) do > + local b = p+a > + if b < 100 then -- Prune the tree at the lower border. > + local pp = b ~= pn and pn or ok[b] -- Find maximum successor function. > + if d >= 5 then -- Try to place the last cell of a piece and advance. > + found = true > + local u = t%10 > + local di = (t-u)/10 > + if di ~= 0 and d == 5 then > + di = di + p; if pp == di then pp = ok[di] end > + s = format("%sif b%d and not u%d and not b%d then b%d=k u%d=k f%d(k) u%d=N b%d=N end\n", > + s, di, u, b, b, u, pp, u, b) > + else > + s = format("%sif not u%d and not b%d then b%d=k u%d=k f%d(k) u%d=N b%d=N end\n", > + s, u, b, b, u, pp, u, b) > + end > + else -- Try to place an intermediate cell. > + local di = d ~= 4 and 0 or islands[t] > + if di == 0 then > + local st = codetree(t, d, p, pp) > + if st then > + found = true > + s = format("%sif not b%d then b%d=k\n%sb%d=N end\n", s, b, b, st, b) > + end > + else -- Combine island checks. > + di = di + p; if pp == di then pp = ok[di] end > + local st = codetree(t, 6, p, pp) > + if st then > + found = true > + s = format("%sif b%d and not b%d then b%d=k\n%sb%d=N end\n", s, di, b, b, st, b) > + end > + end > + end > + end > + end > + return found and s > + end > + > + -- Embed the decision tree into a function hierarchy. > + local j = 5 > + for p=88,0,-1 do > + local pn = ok[p] > + if pn then > + s = format("%slocal function f%d(k)\nlocal N if b%d then return f%d(k) end k=k+1 b%d=k\n%sb%d=N end\n", > + s, p, p, pn, p, codetree(dtrees[j], 1, p, pn), p) > + j = j - 1; if j == 0 then j = 10 end > + end > + end > + > + -- Compile and return solver function and result getter. > + return loadstring(s.."return f0, printresult\n", "solver")(countinit) > +end > + > +-- Generate the solver function hierarchy. > +local solver, printresult = generatesolver(tonumber(arg and arg[1]) or 10000) > + > +-- The optimizer for LuaJIT 1.1.x is not helpful here, so turn it off. > +if jit and jit.opt and jit.version_num < 10200 then jit.opt.start(0) end > + > +-- Run the solver protected to get partial results (max count or ctrl-c). > +pcall(solver, 0) > +printresult() > diff --git a/perf/LuaJIT-benches/nbody.lua b/perf/LuaJIT-benches/nbody.lua > new file mode 100644 > index 00000000..e0ff8f77 > --- /dev/null > +++ b/perf/LuaJIT-benches/nbody.lua > @@ -0,0 +1,119 @@ > + > +local sqrt = math.sqrt > + > +local PI = 3.141592653589793 > +local SOLAR_MASS = 4 * PI * PI > +local DAYS_PER_YEAR = 365.24 > +local bodies = { > + { -- Sun > + x = 0, > + y = 0, > + z = 0, > + vx = 0, > + vy = 0, > + vz = 0, > + mass = SOLAR_MASS > + }, > + { -- Jupiter > + x = 4.84143144246472090e+00, > + y = -1.16032004402742839e+00, > + z = -1.03622044471123109e-01, > + vx = 1.66007664274403694e-03 * DAYS_PER_YEAR, > + vy = 7.69901118419740425e-03 * DAYS_PER_YEAR, > + vz = -6.90460016972063023e-05 * DAYS_PER_YEAR, > + mass = 9.54791938424326609e-04 * SOLAR_MASS > + }, > + { -- Saturn > + x = 8.34336671824457987e+00, > + y = 4.12479856412430479e+00, > + z = -4.03523417114321381e-01, > + vx = -2.76742510726862411e-03 * DAYS_PER_YEAR, > + vy = 4.99852801234917238e-03 * DAYS_PER_YEAR, > + vz = 2.30417297573763929e-05 * DAYS_PER_YEAR, > + mass = 2.85885980666130812e-04 * SOLAR_MASS > + }, > + { -- Uranus > + x = 1.28943695621391310e+01, > + y = -1.51111514016986312e+01, > + z = -2.23307578892655734e-01, > + vx = 2.96460137564761618e-03 * DAYS_PER_YEAR, > + vy = 2.37847173959480950e-03 * DAYS_PER_YEAR, > + vz = -2.96589568540237556e-05 * DAYS_PER_YEAR, > + mass = 4.36624404335156298e-05 * SOLAR_MASS > + }, > + { -- Neptune > + x = 1.53796971148509165e+01, > + y = -2.59193146099879641e+01, > + z = 1.79258772950371181e-01, > + vx = 2.68067772490389322e-03 * DAYS_PER_YEAR, > + vy = 1.62824170038242295e-03 * DAYS_PER_YEAR, > + vz = -9.51592254519715870e-05 * DAYS_PER_YEAR, > + mass = 5.15138902046611451e-05 * SOLAR_MASS > + } > +} > + > +local function advance(bodies, nbody, dt) > + for i=1,nbody do > + local bi = bodies[i] > + local bix, biy, biz, bimass = bi.x, bi.y, bi.z, bi.mass > + local bivx, bivy, bivz = bi.vx, bi.vy, bi.vz > + for j=i+1,nbody do > + local bj = bodies[j] > + local dx, dy, dz = bix-bj.x, biy-bj.y, biz-bj.z > + local mag = sqrt(dx*dx + dy*dy + dz*dz) > + mag = dt / (mag * mag * mag) > + local bm = bj.mass*mag > + bivx = bivx - (dx * bm) > + bivy = bivy - (dy * bm) > + bivz = bivz - (dz * bm) > + bm = bimass*mag > + bj.vx = bj.vx + (dx * bm) > + bj.vy = bj.vy + (dy * bm) > + bj.vz = bj.vz + (dz * bm) > + end > + bi.vx = bivx > + bi.vy = bivy > + bi.vz = bivz > + bi.x = bix + dt * bivx > + bi.y = biy + dt * bivy > + bi.z = biz + dt * bivz > + end > +end > + > +local function energy(bodies, nbody) > + local e = 0 > + for i=1,nbody do > + local bi = bodies[i] > + local vx, vy, vz, bim = bi.vx, bi.vy, bi.vz, bi.mass > + e = e + (0.5 * bim * (vx*vx + vy*vy + vz*vz)) > + for j=i+1,nbody do > + local bj = bodies[j] > + local dx, dy, dz = bi.x-bj.x, bi.y-bj.y, bi.z-bj.z > + local distance = sqrt(dx*dx + dy*dy + dz*dz) > + e = e - ((bim * bj.mass) / distance) > + end > + end > + return e > +end > + > +local function offsetMomentum(b, nbody) > + local px, py, pz = 0, 0, 0 > + for i=1,nbody do > + local bi = b[i] > + local bim = bi.mass > + px = px + (bi.vx * bim) > + py = py + (bi.vy * bim) > + pz = pz + (bi.vz * bim) > + end > + b[1].vx = -px / SOLAR_MASS > + b[1].vy = -py / SOLAR_MASS > + b[1].vz = -pz / SOLAR_MASS > +end > + > +local N = tonumber(arg and arg[1]) or 1000 > +local nbody = #bodies > + > +offsetMomentum(bodies, nbody) > +io.write( string.format("%0.9f",energy(bodies, nbody)), "\n") > +for i=1,N do advance(bodies, nbody, 0.01) end > +io.write( string.format("%0.9f",energy(bodies, nbody)), "\n") > diff --git a/perf/LuaJIT-benches/nsieve-bit-fp.lua b/perf/LuaJIT-benches/nsieve-bit-fp.lua > new file mode 100644 > index 00000000..3971ec1f > --- /dev/null > +++ b/perf/LuaJIT-benches/nsieve-bit-fp.lua > @@ -0,0 +1,37 @@ > + > +local floor, ceil = math.floor, math.ceil > + > +local precision = 50 -- Maximum precision of lua_Number (minus safety margin). > +local onebits = (2^precision)-1 > + > +local function nsieve(p, m) > + local cm = ceil(m/precision) > + do local onebits = onebits; for i=0,cm do p[i] = onebits end end > + local count, idx, bit = 0, 2, 2 > + for i=2,m do > + local r = p[idx] / bit > + if r - floor(r) >= 0.5 then -- Bit set? > + local kidx, kbit = idx, bit > + for k=i+i,m,i do > + kidx = kidx + i > + while kidx >= cm do kidx = kidx - cm; kbit = kbit + kbit end > + local x = p[kidx] > + local r = x / kbit > + if r - floor(r) >= 0.5 then p[kidx] = x - kbit*0.5 end -- Clear bit. > + end > + count = count + 1 > + end > + idx = idx + 1 > + if idx >= cm then idx = 0; bit = bit + bit end > + end > + return count > +end > + > +local N = tonumber(arg and arg[1]) or 1 > +if N < 2 then N = 2 end > +local primes = {} > + > +for i=0,2 do > + local m = (2^(N-i))*10000 > + io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m))) > +end > diff --git a/perf/LuaJIT-benches/nsieve-bit.lua b/perf/LuaJIT-benches/nsieve-bit.lua > new file mode 100644 > index 00000000..820a3726 > --- /dev/null > +++ b/perf/LuaJIT-benches/nsieve-bit.lua > @@ -0,0 +1,27 @@ > + > +local bit = require("bit") > +local band, bxor, rshift, rol = bit.band, bit.bxor, bit.rshift, bit.rol > + > +local function nsieve(p, m) > + local count = 0 > + for i=0,rshift(m, 5) do p[i] = -1 end > + for i=2,m do > + if band(rshift(p[rshift(i, 5)], i), 1) ~= 0 then > + count = count + 1 > + for j=i+i,m,i do > + local jx = rshift(j, 5) > + p[jx] = band(p[jx], rol(-2, j)) > + end > + end > + end > + return count > +end > + > +local N = tonumber(arg and arg[1]) or 1 > +if N < 2 then N = 2 end > +local primes = {} > + > +for i=0,2 do > + local m = (2^(N-i))*10000 > + io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m))) > +end > diff --git a/perf/LuaJIT-benches/nsieve.lua b/perf/LuaJIT-benches/nsieve.lua > new file mode 100644 > index 00000000..6de0524f > --- /dev/null > +++ b/perf/LuaJIT-benches/nsieve.lua > @@ -0,0 +1,21 @@ > + > +local function nsieve(p, m) > + for i=2,m do p[i] = true end > + local count = 0 > + for i=2,m do > + if p[i] then > + for k=i+i,m,i do p[k] = false end > + count = count + 1 > + end > + end > + return count > +end > + > +local N = tonumber(arg and arg[1]) or 1 > +if N < 2 then N = 2 end > +local primes = {} > + > +for i=0,2 do > + local m = (2^(N-i))*10000 > + io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m))) > +end > diff --git a/perf/LuaJIT-benches/partialsums.lua b/perf/LuaJIT-benches/partialsums.lua > new file mode 100644 > index 00000000..46bb9da3 > --- /dev/null > +++ b/perf/LuaJIT-benches/partialsums.lua > @@ -0,0 +1,29 @@ > + > +local n = tonumber(arg[1]) > +local function pr(fmt, x) io.write(string.format(fmt, x)) end > + > +local a1, a2, a3, a4, a5, a6, a7, a8, a9, alt = 1, 0, 0, 0, 0, 0, 0, 0, 0, 1 > +local sqrt, sin, cos = math.sqrt, math.sin, math.cos > +for k=1,n do > + local k2, sk, ck = k*k, sin(k), cos(k) > + local k3 = k2*k > + a1 = a1 + (2/3)^k > + a2 = a2 + 1/sqrt(k) > + a3 = a3 + 1/(k2+k) > + a4 = a4 + 1/(k3*sk*sk) > + a5 = a5 + 1/(k3*ck*ck) > + a6 = a6 + 1/k > + a7 = a7 + 1/k2 > + a8 = a8 + alt/k > + a9 = a9 + alt/(k+k-1) > + alt = -alt > +end > +pr("%.9f\t(2/3)^k\n", a1) > +pr("%.9f\tk^-0.5\n", a2) > +pr("%.9f\t1/k(k+1)\n", a3) > +pr("%.9f\tFlint Hills\n", a4) > +pr("%.9f\tCookson Hills\n", a5) > +pr("%.9f\tHarmonic\n", a6) > +pr("%.9f\tRiemann Zeta\n", a7) > +pr("%.9f\tAlternating Harmonic\n", a8) > +pr("%.9f\tGregory\n", a9) > diff --git a/perf/LuaJIT-benches/pidigits-nogmp.lua b/perf/LuaJIT-benches/pidigits-nogmp.lua > new file mode 100644 > index 00000000..63a1cb0e > --- /dev/null > +++ b/perf/LuaJIT-benches/pidigits-nogmp.lua > @@ -0,0 +1,100 @@ > + > +-- Start of dynamically compiled chunk. > +local chunk = [=[ > + > +-- Factory function for multi-precision number (mpn) operations. > +local function fmm(fa, fb) > + return loadstring([[ > + return function(y, a, ka, b, kb) > + local carry, n = 0, #a ]]..(fb == 0 and "" or [[ > + local na, nb = n, #b -- Need to adjust lengths. 1 element suffices here. > + if na > nb then b[na] = 0 elseif na < nb then a[nb] = 0; n = nb end > + ]])..[[ > + for i=1,n do -- Sum up all elements and propagate carry. > + local x = a[i] ]]..(fa == 2 and "*ka" or "").. > + (fb == 2 and "+b[i]*kb" or (fb == 1 and "+b[i]" or ""))..[[ + carry > + if x < RADIX and x >= 0 then carry = 0; y[i] = x -- Check for overflow. > + else local d = x % RADIX; carry = (x-d) / RADIX; y[i] = d end > + end > + y[n+1] = nil -- Truncate target. 1 element suffices here. > + if carry == 0 then while n > 0 and y[n] == 0 do y[n] = nil end > + elseif carry == -1 then y[n] = y[n] - RADIX else y[n+1] = carry end > + ]]..(fb == 0 and "" or [[ -- Undo length adjustment. > + if na > nb then b[na] = nil elseif na < nb and y ~= a then a[nb] = nil end > + ]])..[[ > + return y > + end]])() > +end > + > +-- Generate needed mpn functions. > +local mm_kk, mm_k1, mm_k0, mm_11 = fmm(2, 2), fmm(2, 1), fmm(2, 0), fmm(1, 1) > + > +-- Choose the most efficient mpn function for y = a*ka + b*kb at run-time. > +local function mm(y, a, ka, b, kb) > + local f = mm_kk > + if kb == 0 or #b == 0 then if ka == 1 then return a else f = mm_k0 end > + elseif kb == 1 then if ka == 1 then f = mm_11 else f = mm_k1 end end > + return f(y, a, ka, b, kb) > +end > + > +-- Compose matrix with numbers on the right. > +local function compose_r(aq,ar,as,at, bq,br,bs,bt) > + mm(ar, ar,bq, at,br) mm(at, at,bt, ar,bs) > + mm(as, as,bt, aq,bs) mm(aq, aq,bq, nil,0) > +end > + > +-- Compose matrix with numbers on the left. > +local function compose_l(aq,ar,as,at, bq,br,bs,bt) > + mm(ar, ar,bt, aq,br) mm(at, at,bt, as,br) > + mm(as, as,bq, at,bs) mm(aq, aq,bq, nil,0) > +end > + > +-- Extract one digit. > +local u, v, jj = {}, {}, 0 > +local function extract(q,r,s,t, j) > + local u = j == jj + 1 and mm(u, u,1, q,1) or mm(u, q,j, r,1); jj = j > + local v = mm(v, t,1, s,j) > + local nu, nv, y = #u, #v > + if nu == nv then > + if nu == 1 then y = u[1] / v[1] > + else y = (u[nu]*RADIX + u[nu-1]) / (v[nv]*RADIX + v[nv-1]) end > + elseif nu == nv+1 then y = (u[nu]*RADIX + u[nv]) / v[nv] > + else return 0 end > + return math.floor(y) > +end > + > +-- Coroutine which yields successive digits of PI. > +return coroutine.wrap(function() > + local q, r, s, t, k = {1}, {}, {}, {1}, 1 > + repeat > + local y = extract(q,r,s,t, 3) > + if y == extract(q,r,s,t, 4) then > + coroutine.yield(y) > + compose_r(q,r,s,t, 10, -10*y, 0, 1) > + else > + compose_l(q,r,s,t, k, 4*k+2, 0, 2*k+1) > + k = k + 1 > + end > + until false > +end) > + > +]=] -- End of dynamically compiled chunk. > + > +local N = tonumber(arg and arg[1]) or 27 > +local RADIX = N < 6500 and 2^36 or 2^32 -- Avoid overflow. > + > +-- Substitute radix and compile chunk. > +local pidigit = loadstring(string.gsub(chunk, "RADIX", tostring(RADIX)))() > + > +-- Print lines with 10 digits. > +for i=10,N,10 do > + for j=1,10 do io.write(pidigit()) end > + io.write("\t:", i, "\n") > +end > + > +-- Print remaining digits (if any). > +local n10 = N % 10 > +if n10 ~= 0 then > + for i=1,n10 do io.write(pidigit()) end > + io.write(string.rep(" ", 10-n10), "\t:", N, "\n") > +end > diff --git a/perf/LuaJIT-benches/ray.lua b/perf/LuaJIT-benches/ray.lua > new file mode 100644 > index 00000000..2acc24c0 > --- /dev/null > +++ b/perf/LuaJIT-benches/ray.lua > @@ -0,0 +1,135 @@ > +local sqrt = math.sqrt > +local huge = math.huge > + > +local delta = 1 > +while delta * delta + 1 ~= 1 do > + delta = delta * 0.5 > +end > + > +local function length(x, y, z) return sqrt(x*x + y*y + z*z) end > +local function vlen(v) return length(v[1], v[2], v[3]) end > +local function mul(c, x, y, z) return c*x, c*y, c*z end > +local function unitise(x, y, z) return mul(1/length(x, y, z), x, y, z) end > +local function dot(x1, y1, z1, x2, y2, z2) > + return x1*x2 + y1*y2 + z1*z2 > +end > + > +local function vsub(a, b) return a[1] - b[1], a[2] - b[2], a[3] - b[3] end > +local function vdot(a, b) return dot(a[1], a[2], a[3], b[1], b[2], b[3]) end > + > + > +local sphere = {} > +functionsphere:new(centre, radius) > + self.__index = self > + return setmetatable({centre=centre, radius=radius}, self) > +end > + > +local function sphere_distance(self, origin, dir) > + local vx, vy, vz = vsub(self.centre, origin) > + local b = dot(vx, vy, vz, dir[1], dir[2], dir[3]) > + local r = self.radius > + local disc = r*r + b*b - vx*vx-vy*vy-vz*vz > + if disc < 0 then return huge end > + local d = sqrt(disc) > + local t2 = b + d > + if t2 < 0 then return huge end > + local t1 = b - d > + return t1 > 0 and t1 or t2 > +end > + > +functionsphere:intersect(origin, dir, best) > + local lambda = sphere_distance(self, origin, dir) > + if lambda < best[1] then > + local c = self.centre > + best[1] = lambda > + local b2 = best[2] > + b2[1], b2[2], b2[3] = > + unitise( > + origin[1] - c[1] + lambda * dir[1], > + origin[2] - c[2] + lambda * dir[2], > + origin[3] - c[3] + lambda * dir[3]) > + end > +end > + > +local group = {} > +functiongroup:new(bound) > + self.__index = self > + return setmetatable({bound=bound, children={}}, self) > +end > + > +functiongroup:add(s) > + self.children[#self.children+1] = s > +end > + > +functiongroup:intersect(origin, dir, best) > + local lambda = sphere_distance(self.bound, origin, dir) > + if lambda < best[1] then > + for _, c in ipairs(self.children) do > +c:intersect(origin, dir, best) > + end > + end > +end > + > +local hit = { 0, 0, 0 } > +local ilight > +local best = { huge, { 0, 0, 0 } } > + > +local function ray_trace(light, camera, dir, scene) > + best[1] = huge > +scene:intersect(camera, dir, best) > + local b1 = best[1] > + if b1 == huge then return 0 end > + local b2 = best[2] > + local g = vdot(b2, light) > + if g >= 0 then return 0 end > + hit[1] = camera[1] + b1*dir[1] + delta*b2[1] > + hit[2] = camera[2] + b1*dir[2] + delta*b2[2] > + hit[3] = camera[3] + b1*dir[3] + delta*b2[3] > + best[1] = huge > +scene:intersect(hit, ilight, best) > + if best[1] == huge then > + return -g > + else > + return 0 > + end > +end > + > +local function create(level, centre, radius) > + local s =sphere:new(centre, radius) > + if level == 1 then return s end > + local gr =group:new(sphere:new(centre, 3*radius)) > +gr:add(s) > + local rn = 3*radius/sqrt(12) > + for dz = -1,1,2 do > + for dx = -1,1,2 do > +gr:add(create(level-1, { centre[1] + rn*dx, centre[2] + rn, centre[3] + rn*dz }, radius*0.5)) > + end > + end > + return gr > +end > + > + > +local level, n, ss = tonumber(arg[1]) or 9, tonumber(arg[2]) or 256, 4 > +local iss = 1/ss > +local gf = 255/(ss*ss) > + > +io.write(("P5\n%d %d\n255\n"):format(n, n)) > +local light = { unitise(-1, -3, 2) } > +ilight = { -light[1], -light[2], -light[3] } > +local camera = { 0, 0, -4 } > +local dir = { 0, 0, 0 } > + > +local scene = create(level, {0, -1, 0}, 1) > + > +for y = n/2-1, -n/2, -1 do > + for x = -n/2, n/2-1 do > + local g = 0 > + for d = y, y+.99, iss do > + for e = x, x+.99, iss do > + dir[1], dir[2], dir[3] = unitise(e, d, n) > + g = g + ray_trace(light, camera, dir, scene) > + end > + end > + io.write(string.char(math.floor(0.5 + g*gf))) > + end > +end > diff --git a/perf/LuaJIT-benches/recursive-ack.lua b/perf/LuaJIT-benches/recursive-ack.lua > new file mode 100644 > index 00000000..fad30589 > --- /dev/null > +++ b/perf/LuaJIT-benches/recursive-ack.lua > @@ -0,0 +1,8 @@ > +local function Ack(m, n) > + if m == 0 then return n+1 end > + if n == 0 then return Ack(m-1, 1) end > + return Ack(m-1, (Ack(m, n-1))) -- The parentheses are deliberate. > +end > + > +local N = tonumber(arg and arg[1]) or 10 > +io.write("Ack(3,", N ,"): ", Ack(3,N), "\n") > diff --git a/perf/LuaJIT-benches/recursive-fib.lua b/perf/LuaJIT-benches/recursive-fib.lua > new file mode 100644 > index 00000000..ef9950de > --- /dev/null > +++ b/perf/LuaJIT-benches/recursive-fib.lua > @@ -0,0 +1,7 @@ > +local function fib(n) > + if n < 2 then return 1 end > + return fib(n-2) + fib(n-1) > +end > + > +local n = tonumber(arg[1]) or 10 > +io.write(string.format("Fib(%d): %d\n", n, fib(n))) > diff --git a/perf/LuaJIT-benches/revcomp.lua b/perf/LuaJIT-benches/revcomp.lua > new file mode 100644 > index 00000000..34fe347b > --- /dev/null > +++ b/perf/LuaJIT-benches/revcomp.lua > @@ -0,0 +1,37 @@ > + > +local sub = string.sub > +iubc = setmetatable({ > + A="T", C="G", B="V", D="H", K="M", R="Y", > + a="T", c="G", b="V", d="H", k="M", r="Y", > + T="A", G="C", V="B", H="D", M="K", Y="R", U="A", > + t="A", g="C", v="B", h="D", m="K", y="R", u="A", > + N="N", S="S", W="W", n="N", s="S", w="W", > +}, { __index = function(t, s) > + local r = t[sub(s, 2)]..t[sub(s, 1, 1)]; t[s] = r; return r end }) > + > +local wcode = [=[ > +return function(t, n) > + if n == 1 then return end > + local iubc, sub, write = iubc, string.sub, io.write > + local s = table.concat(t, "", 1, n-1) > + for i=#s-59,1,-60 do > + write(]=] > +for i=59,3,-4 do wcode = wcode.."iubc[sub(s, i+"..(i-3)..", i+"..i..")], " end > +wcode = wcode..[=["\n") > + end > + local r = #s % 60 > + if r ~= 0 then > + for i=r,1,-4 do write(iubc[sub(s, i-3 < 1 and 1 or i-3, i)]) end > + write("\n") > + end > +end > +]=] > +local writerev = loadstring(wcode)() > + > +local t, n = {}, 1 > +for line in io.lines() do > + local c = sub(line, 1, 1) > + if c == ">" then writerev(t, n); io.write(line, "\n"); n = 1 > + elseif c ~= ";" then t[n] = line; n = n + 1 end > +end > +writerev(t, n) > diff --git a/perf/LuaJIT-benches/scimark-2010-12-20.lua b/perf/LuaJIT-benches/scimark-2010-12-20.lua > new file mode 100644 > index 00000000..353acb7c > --- /dev/null > +++ b/perf/LuaJIT-benches/scimark-2010-12-20.lua > @@ -0,0 +1,400 @@ > +------------------------------------------------------------------------------ > +-- Lua SciMark (2010-12-20). > +-- > +-- A literal translation of SciMark 2.0a, written in Java and C. > +-- Credits go to the original authors Roldan Pozo and Bruce Miller. > +-- See:http://math.nist.gov/scimark2/ > +------------------------------------------------------------------------------ > + > +local SCIMARK_VERSION = "2010-12-10" > +local SCIMARK_COPYRIGHT = "Copyright (C) 2006-2010 Mike Pall" > + > +local MIN_TIME = 2.0 > +local RANDOM_SEED = 101009 -- Must be odd. > +local SIZE_SELECT = "small" > + > +local benchmarks = { > + "FFT", "SOR", "MC", "SPARSE", "LU", > + small = { > + FFT = { 1024 }, > + SOR = { 100 }, > + MC = { }, > + SPARSE = { 1000, 5000 }, > + LU = { 100 }, > + }, > + large = { > + FFT = { 1048576 }, > + SOR = { 1000 }, > + MC = { }, > + SPARSE = { 100000, 1000000 }, > + LU = { 1000 }, > + }, > +} > + > +local abs, log, sin, floor = math.abs, math.log, math.sin, math.floor > +local pi, clock = math.pi, os.clock > +local format = string.format > + > +------------------------------------------------------------------------------ > +-- Select array type: Lua tables or native (FFI) arrays > +------------------------------------------------------------------------------ > + > +local darray, iarray > + > +local function array_init() > + if jit and jit.status and jit.status() then > + local ok, ffi = pcall(require, "ffi") > + if ok then > + darray = ffi.typeof("double[?]") > + iarray = ffi.typeof("int[?]") > + return > + end > + end > + function darray(n) return {} end > + iarray = darray > +end > + > +------------------------------------------------------------------------------ > +-- This is a Lagged Fibonacci Pseudo-random Number Generator with > +-- j, k, M = 5, 17, 31. Pretty weak, but same as C/Java SciMark. > +------------------------------------------------------------------------------ > + > +local rand, rand_init > + > +if jit and jit.status and jit.status() then > + -- LJ2 has bit operations and zero-based arrays (internally). > + local bit = require("bit") > + local band, sar = bit.band, bit.arshift > + function rand_init(seed) > + local Rm, Rj, Ri = iarray(17), 16, 11 > + for i=0,16 do Rm[i] = 0 end > + for i=16,0,-1 do > + seed = band(seed*9069, 0x7fffffff) > + Rm[i] = seed > + end > + function rand() > + local i = band(Ri+1, sar(Ri-16, 31)) > + local j = band(Rj+1, sar(Rj-16, 31)) > + Ri, Rj = i, j > + local k = band(Rm[i] - Rm[j], 0x7fffffff) > + Rm[j] = k > + return k * (1.0/2147483647.0) > + end > + end > +else > + -- Better for standard Lua with one-based arrays and without bit operations. > + function rand_init(seed) > + local Rm, Rj = {}, 1 > + for i=1,17 do Rm[i] = 0 end > + for i=17,1,-1 do > + seed = (seed*9069) % (2^31) > + Rm[i] = seed > + end > + function rand() > + local j, m = Rj, Rm > + local h = j - 5 > + if h < 1 then h = h + 17 end > + local k = m[h] - m[j] > + if k < 0 then k = k + 2147483647 end > + m[j] = k > + if j < 17 then Rj = j + 1 else Rj = 1 end > + return k * (1.0/2147483647.0) > + end > + end > +end > + > +local function random_vector(n) > + local v = darray(n+1) > + for x=1,n do v[x] = rand() end > + return v > +end > + > +local function random_matrix(m, n) > + local a = {} > + for y=1,m do > + local v = darray(n+1) > + a[y] = v > + for x=1,n do v[x] = rand() end > + end > + return a > +end > + > +------------------------------------------------------------------------------ > +-- FFT: Fast Fourier Transform. > +------------------------------------------------------------------------------ > + > +local function fft_bitreverse(v, n) > + local j = 0 > + for i=0,2*n-4,2 do > + if i < j then > + v[i+1], v[i+2], v[j+1], v[j+2] = v[j+1], v[j+2], v[i+1], v[i+2] > + end > + local k = n > + while k <= j do j = j - k; k = k / 2 end > + j = j + k > + end > +end > + > +local function fft_transform(v, n, dir) > + if n <= 1 then return end > + fft_bitreverse(v, n) > + local dual = 1 > + repeat > + local dual2 = 2*dual > + for i=1,2*n-1,2*dual2 do > + local j = i+dual2 > + local ir, ii = v[i], v[i+1] > + local jr, ji = v[j], v[j+1] > + v[j], v[j+1] = ir - jr, ii - ji > + v[i], v[i+1] = ir + jr, ii + ji > + end > + local theta = dir * pi / dual > + local s, s2 = sin(theta), 2.0 * sin(theta * 0.5)^2 > + local wr, wi = 1.0, 0.0 > + for a=3,dual2-1,2 do > + wr, wi = wr - s*wi - s2*wr, wi + s*wr - s2*wi > + for i=a,a+2*(n-dual2),2*dual2 do > + local j = i+dual2 > + local jr, ji = v[j], v[j+1] > + local dr, di = wr*jr - wi*ji, wr*ji + wi*jr > + local ir, ii = v[i], v[i+1] > + v[j], v[j+1] = ir - dr, ii - di > + v[i], v[i+1] = ir + dr, ii + di > + end > + end > + dual = dual2 > + until dual >= n > +end > + > +function benchmarks.FFT(n) > + local l2n = log(n)/log(2) > + if l2n % 1 ~= 0 then > +io.stderr:write("Error: FFT data length is not a power of 2\n") > + os.exit(1) > + end > + local v = random_vector(n*2) > + return function(cycles) > + local norm = 1.0 / n > + for p=1,cycles do > + fft_transform(v, n, -1) > + fft_transform(v, n, 1) > + for i=1,n*2 do v[i] = v[i] * norm end > + end > + return ((5*n-2)*l2n + 2*(n+1)) * cycles > + end > +end > + > +------------------------------------------------------------------------------ > +-- SOR: Jacobi Successive Over-Relaxation. > +------------------------------------------------------------------------------ > + > +local function sor_run(mat, m, n, cycles, omega) > + local om4, om1 = omega*0.25, 1.0-omega > + m = m - 1 > + n = n - 1 > + for i=1,cycles do > + for y=2,m do > + local v, vp, vn = mat[y], mat[y-1], mat[y+1] > + for x=2,n do > + v[x] = om4*((vp[x]+vn[x])+(v[x-1]+v[x+1])) + om1*v[x] > + end > + end > + end > +end > + > +function benchmarks.SOR(n) > + local mat = random_matrix(n, n) > + return function(cycles) > + sor_run(mat, n, n, cycles, 1.25) > + return (n-1)*(n-1)*cycles*6 > + end > +end > + > +------------------------------------------------------------------------------ > +-- MC: Monte Carlo Integration. > +------------------------------------------------------------------------------ > + > +local function mc_integrate(cycles) > + local under_curve = 0 > + local rand = rand > + for i=1,cycles do > + local x = rand() > + local y = rand() > + if x*x + y*y <= 1.0 then under_curve = under_curve + 1 end > + end > + return (under_curve/cycles) * 4 > +end > + > +function benchmarks.MC() > + return function(cycles) > + local res = mc_integrate(cycles) > + assert(math.sqrt(cycles)*math.abs(res-math.pi) < 5.0, "bad MC result") > + return cycles * 4 -- Way off, but same as SciMark in C/Java. > + end > +end > + > +------------------------------------------------------------------------------ > +-- Sparse Matrix Multiplication. > +------------------------------------------------------------------------------ > + > +local function sparse_mult(n, cycles, vy, val, row, col, vx) > + for p=1,cycles do > + for r=1,n do > + local sum = 0 > + for i=row[r],row[r+1]-1 do sum = sum + vx[col[i]] * val[i] end > + vy[r] = sum > + end > + end > +end > + > +function benchmarks.SPARSE(n, nz) > + local nr = floor(nz/n) > + local anz = nr*n > + local vx = random_vector(n) > + local val = random_vector(anz) > + local vy, col, row = darray(n+1), iarray(nz+1), iarray(n+2) > + row[1] = 1 > + for r=1,n do > + local step = floor(r/nr) > + if step < 1 then step = 1 end > + local rr = row[r] > + row[r+1] = rr+nr > + for i=0,nr-1 do col[rr+i] = 1+i*step end > + end > + return function(cycles) > + sparse_mult(n, cycles, vy, val, row, col, vx) > + return anz*cycles*2 > + end > +end > + > +------------------------------------------------------------------------------ > +-- LU: Dense Matrix Factorization. > +------------------------------------------------------------------------------ > + > +local function lu_factor(a, pivot, m, n) > + local min_m_n = m < n and m or n > + for j=1,min_m_n do > + local jp, t = j, abs(a[j][j]) > + for i=j+1,m do > + local ab = abs(a[i][j]) > + if ab > t then > + jp = i > + t = ab > + end > + end > + pivot[j] = jp > + if a[jp][j] == 0 then error("zero pivot") end > + if jp ~= j then a[j], a[jp] = a[jp], a[j] end > + if j < m then > + local recp = 1.0 / a[j][j] > + for k=j+1,m do > + local v = a[k] > + v[j] = v[j] * recp > + end > + end > + if j < min_m_n then > + for i=j+1,m do > + local vi, vj = a[i], a[j] > + local eij = vi[j] > + for k=j+1,n do vi[k] = vi[k] - eij * vj[k] end > + end > + end > + end > +end > + > +local function matrix_alloc(m, n) > + local a = {} > + for y=1,m do a[y] = darray(n+1) end > + return a > +end > + > +local function matrix_copy(dst, src, m, n) > + for y=1,m do > + local vd, vs = dst[y], src[y] > + for x=1,n do vd[x] = vs[x] end > + end > +end > + > +function benchmarks.LU(n) > + local mat = random_matrix(n, n) > + local tmp = matrix_alloc(n, n) > + local pivot = iarray(n+1) > + return function(cycles) > + for i=1,cycles do > + matrix_copy(tmp, mat, n, n) > + lu_factor(tmp, pivot, n, n) > + end > + return 2.0/3.0*n*n*n*cycles > + end > +end > + > +------------------------------------------------------------------------------ > +-- Main program. > +------------------------------------------------------------------------------ > + > +local function printf(...) > + io.write(format(...)) > +end > + > +local function fmtparams(p1, p2) > + if p2 then return format("[%d, %d]", p1, p2) > + elseif p1 then return format("[%d]", p1) end > + return "" > +end > + > +local function measure(min_time, name, ...) > + array_init() > + rand_init(RANDOM_SEED) > + local run = benchmarks[name](...) > + local cycles = 1 > + repeat > + local tm = clock() > + local flops = run(cycles, ...) > + tm = clock() - tm > + if tm >= min_time then > + local res = flops / tm * 1.0e-6 > + local p1, p2 = ... > + printf("%-7s %8.2f %s\n", name, res, fmtparams(...)) > + return res > + end > + cycles = cycles * 2 > + until false > +end > + > +printf("Lua SciMark %s based on SciMark 2.0a. %s.\n\n", > + SCIMARK_VERSION, SCIMARK_COPYRIGHT) > + > +while arg and arg[1] do > + local a = table.remove(arg, 1) > + if a == "-noffi" then > + package.preload.ffi = nil > + elseif a == "-small" then > + SIZE_SELECT = "small" > + elseif a == "-large" then > + SIZE_SELECT = "large" > + elseif benchmarks[a] then > + local p = benchmarks[SIZE_SELECT][a] > + measure(MIN_TIME, a, tonumber(arg[1]) or p[1], tonumber(arg[2]) or p[2]) > + return > + else > + printf("Usage: scimark [-noffi] [-small|-large] [BENCH params...]\n\n") > + printf("BENCH -small -large\n") > + printf("---------------------------------------\n") > + for _,name in ipairs(benchmarks) do > + printf("%-7s %-13s %s\n", name, > + fmtparams(unpack(benchmarks.small[name])), > + fmtparams(unpack(benchmarks.large[name]))) > + end > + printf("\n") > + os.exit(1) > + end > +end > + > +local params = benchmarks[SIZE_SELECT] > +local sum = 0 > +for _,name in ipairs(benchmarks) do > + sum = sum + measure(MIN_TIME, name, unpack(params[name])) > +end > +printf("\nSciMark %8.2f [%s problem sizes]\n", sum / #benchmarks, SIZE_SELECT) > +io.flush() > + > diff --git a/perf/LuaJIT-benches/scimark-fft.lua b/perf/LuaJIT-benches/scimark-fft.lua > new file mode 100644 > index 00000000..c05bb69a > --- /dev/null > +++ b/perf/LuaJIT-benches/scimark-fft.lua > @@ -0,0 +1 @@ > +require("scimark_lib").FFT(1024)(tonumber(arg and arg[1]) or 50000) > diff --git a/perf/LuaJIT-benches/scimark-lu.lua b/perf/LuaJIT-benches/scimark-lu.lua > new file mode 100644 > index 00000000..7636d994 > --- /dev/null > +++ b/perf/LuaJIT-benches/scimark-lu.lua > @@ -0,0 +1 @@ > +require("scimark_lib").LU(100)(tonumber(arg and arg[1]) or 5000) > diff --git a/perf/LuaJIT-benches/scimark-sor.lua b/perf/LuaJIT-benches/scimark-sor.lua > new file mode 100644 > index 00000000..e537e986 > --- /dev/null > +++ b/perf/LuaJIT-benches/scimark-sor.lua > @@ -0,0 +1 @@ > +require("scimark_lib").SOR(100)(tonumber(arg and arg[1]) or 50000) > diff --git a/perf/LuaJIT-benches/scimark-sparse.lua b/perf/LuaJIT-benches/scimark-sparse.lua > new file mode 100644 > index 00000000..01a2258d > --- /dev/null > +++ b/perf/LuaJIT-benches/scimark-sparse.lua > @@ -0,0 +1 @@ > +require("scimark_lib").SPARSE(1000, 5000)(tonumber(arg and arg[1]) or 150000) > diff --git a/perf/LuaJIT-benches/scimark_lib.lua b/perf/LuaJIT-benches/scimark_lib.lua > new file mode 100644 > index 00000000..aeffd75a > --- /dev/null > +++ b/perf/LuaJIT-benches/scimark_lib.lua > @@ -0,0 +1,297 @@ > +------------------------------------------------------------------------------ > +-- Lua SciMark (2010-03-15). > +-- > +-- A literal translation of SciMark 2.0a, written in Java and C. > +-- Credits go to the original authors Roldan Pozo and Bruce Miller. > +-- See:http://math.nist.gov/scimark2/ > +------------------------------------------------------------------------------ > + > + > +local SCIMARK_VERSION = "2010-03-15" > + > +local RANDOM_SEED = 101009 -- Must be odd. > + > +local abs, log, sin, floor = math.abs, math.log, math.sin, math.floor > +local pi, clock = math.pi, os.clock > + > +local benchmarks = {} > + > +------------------------------------------------------------------------------ > +-- This is a Lagged Fibonacci Pseudo-random Number Generator with > +-- j, k, M = 5, 17, 31. Pretty weak, but same as C/Java SciMark. > +------------------------------------------------------------------------------ > + > +local rand, rand_init > + > +if jit and jit.status and jit.status() then > + -- LJ2 has bit operations and zero-based arrays (internally). > + local bit = require("bit") > + local band, sar = bit.band, bit.arshift > + local Rm, Rj, Ri = {}, 0, 0 > + for i=0,16 do Rm[i] = 0 end > + function rand_init(seed) > + Rj, Ri = 16, 11 > + for i=16,0,-1 do > + seed = band(seed*9069, 0x7fffffff) > + Rm[i] = seed > + end > + end > + function rand() > + local i = band(Ri+1, sar(Ri-16, 31)) > + local j = band(Rj+1, sar(Rj-16, 31)) > + Ri, Rj = i, j > + local k = band(Rm[i] - Rm[j], 0x7fffffff) > + Rm[j] = k > + return k * (1.0/2147483647.0) > + end > +else > + -- Better for standard Lua with one-based arrays and without bit operations. > + local Rm, Rj = {}, 1 > + for i=1,17 do Rm[i] = 0 end > + function rand_init(seed) > + Rj = 1 > + for i=17,1,-1 do > + seed = (seed*9069) % (2^31) > + Rm[i] = seed > + end > + end > + function rand() > + local j, m = Rj, Rm > + local h = j - 5 > + if h < 1 then h = h + 17 end > + local k = m[h] - m[j] > + if k < 0 then k = k + 2147483647 end > + m[j] = k > + if j < 17 then Rj = j + 1 else Rj = 1 end > + return k * (1.0/2147483647.0) > + end > +end > + > +local function random_vector(n) > + local v = {} > + for x=1,n do v[x] = rand() end > + return v > +end > + > +local function random_matrix(m, n) > + local a = {} > + for y=1,m do > + local v = {} > + a[y] = v > + for x=1,n do v[x] = rand() end > + end > + return a > +end > + > +------------------------------------------------------------------------------ > +-- FFT: Fast Fourier Transform. > +------------------------------------------------------------------------------ > + > +local function fft_bitreverse(v, n) > + local j = 0 > + for i=0,2*n-4,2 do > + if i < j then > + v[i+1], v[i+2], v[j+1], v[j+2] = v[j+1], v[j+2], v[i+1], v[i+2] > + end > + local k = n > + while k <= j do j = j - k; k = k / 2 end > + j = j + k > + end > +end > + > +local function fft_transform(v, n, dir) > + if n <= 1 then return end > + fft_bitreverse(v, n) > + local dual = 1 > + repeat > + local dual2 = 2*dual > + for i=1,2*n-1,2*dual2 do > + local j = i+dual2 > + local ir, ii = v[i], v[i+1] > + local jr, ji = v[j], v[j+1] > + v[j], v[j+1] = ir - jr, ii - ji > + v[i], v[i+1] = ir + jr, ii + ji > + end > + local theta = dir * pi / dual > + local s, s2 = sin(theta), 2.0 * sin(theta * 0.5)^2 > + local wr, wi = 1.0, 0.0 > + for a=3,dual2-1,2 do > + wr, wi = wr - s*wi - s2*wr, wi + s*wr - s2*wi > + for i=a,a+2*(n-dual2),2*dual2 do > + local j = i+dual2 > + local jr, ji = v[j], v[j+1] > + local dr, di = wr*jr - wi*ji, wr*ji + wi*jr > + local ir, ii = v[i], v[i+1] > + v[j], v[j+1] = ir - dr, ii - di > + v[i], v[i+1] = ir + dr, ii + di > + end > + end > + dual = dual2 > + until dual >= n > +end > + > +function benchmarks.FFT(n) > + local l2n = log(n)/log(2) > + if l2n % 1 ~= 0 then > +io.stderr:write("Error: FFT data length is not a power of 2\n") > + os.exit(1) > + end > + local v = random_vector(n*2) > + return function(cycles) > + local norm = 1.0 / n > + for p=1,cycles do > + fft_transform(v, n, -1) > + fft_transform(v, n, 1) > + for i=1,n*2 do v[i] = v[i] * norm end > + end > + return ((5*n-2)*l2n + 2*(n+1)) * cycles > + end > +end > + > +------------------------------------------------------------------------------ > +-- SOR: Jacobi Successive Over-Relaxation. > +------------------------------------------------------------------------------ > + > +local function sor_run(mat, m, n, cycles, omega) > + local om4, om1 = omega*0.25, 1.0-omega > + m = m - 1 > + n = n - 1 > + for i=1,cycles do > + for y=2,m do > + local v, vp, vn = mat[y], mat[y-1], mat[y+1] > + for x=2,n do > + v[x] = om4*((vp[x]+vn[x])+(v[x-1]+v[x+1])) + om1*v[x] > + end > + end > + end > +end > + > +function benchmarks.SOR(n) > + local mat = random_matrix(n, n) > + return function(cycles) > + sor_run(mat, n, n, cycles, 1.25) > + return (n-1)*(n-1)*cycles*6 > + end > +end > + > +------------------------------------------------------------------------------ > +-- MC: Monte Carlo Integration. > +------------------------------------------------------------------------------ > + > +local function mc_integrate(cycles) > + local under_curve = 0 > + local rand = rand > + for i=1,cycles do > + local x = rand() > + local y = rand() > + if x*x + y*y <= 1.0 then under_curve = under_curve + 1 end > + end > + return (under_curve/cycles) * 4 > +end > + > +function benchmarks.MC() > + return function(cycles) > + local res = mc_integrate(cycles) > + assert(math.sqrt(cycles)*math.abs(res-math.pi) < 5.0, "bad MC result") > + return cycles * 4 -- Way off, but same as SciMark in C/Java. > + end > +end > + > +------------------------------------------------------------------------------ > +-- Sparse Matrix Multiplication. > +------------------------------------------------------------------------------ > + > +local function sparse_mult(n, cycles, vy, val, row, col, vx) > + for p=1,cycles do > + for r=1,n do > + local sum = 0 > + for i=row[r],row[r+1]-1 do sum = sum + vx[col[i]] * val[i] end > + vy[r] = sum > + end > + end > +end > + > +function benchmarks.SPARSE(n, nz) > + local nr = floor(nz/n) > + local anz = nr*n > + local vx = random_vector(n) > + local val = random_vector(anz) > + local vy, col, row = {}, {}, {} > + row[1] = 1 > + for r=1,n do > + local step = floor(r/nr) > + if step < 1 then step = 1 end > + local rr = row[r] > + row[r+1] = rr+nr > + for i=0,nr-1 do col[rr+i] = 1+i*step end > + end > + return function(cycles) > + sparse_mult(n, cycles, vy, val, row, col, vx) > + return anz*cycles*2 > + end > +end > + > +------------------------------------------------------------------------------ > +-- LU: Dense Matrix Factorization. > +------------------------------------------------------------------------------ > + > +local function lu_factor(a, pivot, m, n) > + local min_m_n = m < n and m or n > + for j=1,min_m_n do > + local jp, t = j, abs(a[j][j]) > + for i=j+1,m do > + local ab = abs(a[i][j]) > + if ab > t then > + jp = i > + t = ab > + end > + end > + pivot[j] = jp > + if a[jp][j] == 0 then error("zero pivot") end > + if jp ~= j then a[j], a[jp] = a[jp], a[j] end > + if j < m then > + local recp = 1.0 / a[j][j] > + for k=j+1,m do > + local v = a[k] > + v[j] = v[j] * recp > + end > + end > + if j < min_m_n then > + for i=j+1,m do > + local vi, vj = a[i], a[j] > + local eij = vi[j] > + for k=j+1,n do vi[k] = vi[k] - eij * vj[k] end > + end > + end > + end > +end > + > +local function matrix_alloc(m, n) > + local a = {} > + for y=1,m do a[y] = {} end > + return a > +end > + > +local function matrix_copy(dst, src, m, n) > + for y=1,m do > + local vd, vs = dst[y], src[y] > + for x=1,n do vd[x] = vs[x] end > + end > +end > + > +function benchmarks.LU(n) > + local mat = random_matrix(n, n) > + local tmp = matrix_alloc(n, n) > + local pivot = {} > + return function(cycles) > + for i=1,cycles do > + matrix_copy(tmp, mat, n, n) > + lu_factor(tmp, pivot, n, n) > + end > + return 2.0/3.0*n*n*n*cycles > + end > +end > + > +rand_init(RANDOM_SEED) > + > +return benchmarks > diff --git a/perf/LuaJIT-benches/series.lua b/perf/LuaJIT-benches/series.lua > new file mode 100644 > index 00000000..f766cb32 > --- /dev/null > +++ b/perf/LuaJIT-benches/series.lua > @@ -0,0 +1,34 @@ > + > +local function integrate(x0, x1, nsteps, omegan, f) > + local x, dx = x0, (x1-x0)/nsteps > + local rvalue = ((x0+1)^x0 * f(omegan*x0)) / 2 > + for i=3,nsteps do > + x = x + dx > + rvalue = rvalue + (x+1)^x * f(omegan*x) > + end > + return (rvalue + ((x1+1)^x1 * f(omegan*x1)) / 2) * dx > +end > + > +local function series(n) > + local sin, cos = math.sin, math.cos > + local omega = math.pi > + local t = {} > + > + t[1] = integrate(0, 2, 1000, 0, function() return 1 end) / 2 > + t[2] = 0 > + > + for i=2,n do > + t[2*i-1] = integrate(0, 2, 1000, omega*i, cos) > + t[2*i] = integrate(0, 2, 1000, omega*i, sin) > + end > + > + return t > +end > + > +local n = tonumber(arg and arg[1]) or 10000 > +local tm = os.clock() > +local t = series(n) > +tm = os.clock() - tm > +assert(math.abs(t[1]-2.87295) < 0.00001) > +io.write(string.format("size %d, %.2f s, %.1f iterations/s\n", > + n, tm, (2*n-1)/tm)) > diff --git a/perf/LuaJIT-benches/spectral-norm.lua b/perf/LuaJIT-benches/spectral-norm.lua > new file mode 100644 > index 00000000..ecc80112 > --- /dev/null > +++ b/perf/LuaJIT-benches/spectral-norm.lua > @@ -0,0 +1,40 @@ > + > +local function A(i, j) > + local ij = i+j-1 > + return 1.0 / (ij * (ij-1) * 0.5 + i) > +end > + > +local function Av(x, y, N) > + for i=1,N do > + local a = 0 > + for j=1,N do a = a + x[j] * A(i, j) end > + y[i] = a > + end > +end > + > +local function Atv(x, y, N) > + for i=1,N do > + local a = 0 > + for j=1,N do a = a + x[j] * A(j, i) end > + y[i] = a > + end > +end > + > +local function AtAv(x, y, t, N) > + Av(x, t, N) > + Atv(t, y, N) > +end > + > +local N = tonumber(arg and arg[1]) or 100 > +local u, v, t = {}, {}, {} > +for i=1,N do u[i] = 1 end > + > +for i=1,10 do AtAv(u, v, t, N) AtAv(v, u, t, N) end > + > +local vBv, vv = 0, 0 > +for i=1,N do > + local ui, vi = u[i], v[i] > + vBv = vBv + ui*vi > + vv = vv + vi*vi > +end > +io.write(string.format("%0.9f\n", math.sqrt(vBv / vv))) > diff --git a/perf/LuaJIT-benches/sum-file.lua b/perf/LuaJIT-benches/sum-file.lua > new file mode 100644 > index 00000000..c9e618fd > --- /dev/null > +++ b/perf/LuaJIT-benches/sum-file.lua > @@ -0,0 +1,6 @@ > + > +local sum = 0 > +for line in io.lines() do > + sum = sum + line > +end > +io.write(sum, "\n") --------------2GliyEDk8CzVRwg37b51jDjr Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: 8bit

Hi, Sergey!

Thanks for the patch! LGTM

Sergey

On 12/26/25 12:17, Sergey Kaplun wrote:
This patch introduces the LuaJIT-test-cleanup benchmark suite [1] into
our LuaJIT fork source tree. To provide reliable reproducible results
several benchmarks need to be adjusted. However, to be sure we initially
use the valid suite, everything in the <perf/LuaJIT-benches> directory
is moved intact.

[1]: https://github.com/LuaJIT/LuaJIT-test-cleanup/tree/014708b/bench
---
 .luacheckrc                                |    1 +
 perf/LuaJIT-benches/PARAM_arm.txt          |   29 +
 perf/LuaJIT-benches/PARAM_mips.txt         |   29 +
 perf/LuaJIT-benches/PARAM_ppc.txt          |   29 +
 perf/LuaJIT-benches/PARAM_x86.txt          |   29 +
 perf/LuaJIT-benches/SUMCOL_1.txt           | 1000 ++++++++++++++++++++
 perf/LuaJIT-benches/TEST_md5sum.txt        |   20 +
 perf/LuaJIT-benches/array3d.lua            |   59 ++
 perf/LuaJIT-benches/binary-trees.lua       |   47 +
 perf/LuaJIT-benches/chameneos.lua          |   68 ++
 perf/LuaJIT-benches/coroutine-ring.lua     |   42 +
 perf/LuaJIT-benches/euler14-bit.lua        |   22 +
 perf/LuaJIT-benches/fannkuch.lua           |   50 +
 perf/LuaJIT-benches/fasta.lua              |   95 ++
 perf/LuaJIT-benches/k-nucleotide.lua       |   58 ++
 perf/LuaJIT-benches/life.lua               |  111 +++
 perf/LuaJIT-benches/mandelbrot-bit.lua     |   33 +
 perf/LuaJIT-benches/mandelbrot.lua         |   23 +
 perf/LuaJIT-benches/md5.lua                |  183 ++++
 perf/LuaJIT-benches/meteor.lua             |  220 +++++
 perf/LuaJIT-benches/nbody.lua              |  119 +++
 perf/LuaJIT-benches/nsieve-bit-fp.lua      |   37 +
 perf/LuaJIT-benches/nsieve-bit.lua         |   27 +
 perf/LuaJIT-benches/nsieve.lua             |   21 +
 perf/LuaJIT-benches/partialsums.lua        |   29 +
 perf/LuaJIT-benches/pidigits-nogmp.lua     |  100 ++
 perf/LuaJIT-benches/ray.lua                |  135 +++
 perf/LuaJIT-benches/recursive-ack.lua      |    8 +
 perf/LuaJIT-benches/recursive-fib.lua      |    7 +
 perf/LuaJIT-benches/revcomp.lua            |   37 +
 perf/LuaJIT-benches/scimark-2010-12-20.lua |  400 ++++++++
 perf/LuaJIT-benches/scimark-fft.lua        |    1 +
 perf/LuaJIT-benches/scimark-lu.lua         |    1 +
 perf/LuaJIT-benches/scimark-sor.lua        |    1 +
 perf/LuaJIT-benches/scimark-sparse.lua     |    1 +
 perf/LuaJIT-benches/scimark_lib.lua        |  297 ++++++
 perf/LuaJIT-benches/series.lua             |   34 +
 perf/LuaJIT-benches/spectral-norm.lua      |   40 +
 perf/LuaJIT-benches/sum-file.lua           |    6 +
 39 files changed, 3449 insertions(+)
 create mode 100644 perf/LuaJIT-benches/PARAM_arm.txt
 create mode 100644 perf/LuaJIT-benches/PARAM_mips.txt
 create mode 100644 perf/LuaJIT-benches/PARAM_ppc.txt
 create mode 100644 perf/LuaJIT-benches/PARAM_x86.txt
 create mode 100644 perf/LuaJIT-benches/SUMCOL_1.txt
 create mode 100644 perf/LuaJIT-benches/TEST_md5sum.txt
 create mode 100644 perf/LuaJIT-benches/array3d.lua
 create mode 100644 perf/LuaJIT-benches/binary-trees.lua
 create mode 100644 perf/LuaJIT-benches/chameneos.lua
 create mode 100644 perf/LuaJIT-benches/coroutine-ring.lua
 create mode 100644 perf/LuaJIT-benches/euler14-bit.lua
 create mode 100644 perf/LuaJIT-benches/fannkuch.lua
 create mode 100644 perf/LuaJIT-benches/fasta.lua
 create mode 100644 perf/LuaJIT-benches/k-nucleotide.lua
 create mode 100644 perf/LuaJIT-benches/life.lua
 create mode 100644 perf/LuaJIT-benches/mandelbrot-bit.lua
 create mode 100644 perf/LuaJIT-benches/mandelbrot.lua
 create mode 100644 perf/LuaJIT-benches/md5.lua
 create mode 100644 perf/LuaJIT-benches/meteor.lua
 create mode 100644 perf/LuaJIT-benches/nbody.lua
 create mode 100644 perf/LuaJIT-benches/nsieve-bit-fp.lua
 create mode 100644 perf/LuaJIT-benches/nsieve-bit.lua
 create mode 100644 perf/LuaJIT-benches/nsieve.lua
 create mode 100644 perf/LuaJIT-benches/partialsums.lua
 create mode 100644 perf/LuaJIT-benches/pidigits-nogmp.lua
 create mode 100644 perf/LuaJIT-benches/ray.lua
 create mode 100644 perf/LuaJIT-benches/recursive-ack.lua
 create mode 100644 perf/LuaJIT-benches/recursive-fib.lua
 create mode 100644 perf/LuaJIT-benches/revcomp.lua
 create mode 100644 perf/LuaJIT-benches/scimark-2010-12-20.lua
 create mode 100644 perf/LuaJIT-benches/scimark-fft.lua
 create mode 100644 perf/LuaJIT-benches/scimark-lu.lua
 create mode 100644 perf/LuaJIT-benches/scimark-sor.lua
 create mode 100644 perf/LuaJIT-benches/scimark-sparse.lua
 create mode 100644 perf/LuaJIT-benches/scimark_lib.lua
 create mode 100644 perf/LuaJIT-benches/series.lua
 create mode 100644 perf/LuaJIT-benches/spectral-norm.lua
 create mode 100644 perf/LuaJIT-benches/sum-file.lua

diff --git a/.luacheckrc b/.luacheckrc
index 19098dd9..35824875 100644
--- a/.luacheckrc
+++ b/.luacheckrc
@@ -16,6 +16,7 @@ files['test/tarantool-tests/'] = {
 -- test suites and need to be coherent with the upstream.
 exclude_files = {
   'dynasm/',
+  'perf/LuaJIT-benches/',
   'src/',
   'test/LuaJIT-tests/',
   'test/PUC-Rio-Lua-5.1-tests/',
diff --git a/perf/LuaJIT-benches/PARAM_arm.txt b/perf/LuaJIT-benches/PARAM_arm.txt
new file mode 100644
index 00000000..a07fd010
--- /dev/null
+++ b/perf/LuaJIT-benches/PARAM_arm.txt
@@ -0,0 +1,29 @@
+array3d 200
+binary-trees 13
+chameneos 1e6
+coroutine-ring 3e6
+euler14-bit 5e6
+fannkuch 10
+fasta 2e6
+k-nucleotide 5e5 FASTA_500000
+life
+mandelbrot 2000
+mandelbrot-bit 2000
+md5 5000
+nbody 1e6
+nsieve 9
+nsieve-bit 9
+nsieve-bit-fp 9
+partialsums 2e6
+pidigits-nogmp 2000
+ray 4
+recursive-ack 9
+recursive-fib 37
+revcomp 1e6 FASTA_1000000
+scimark-fft 2000
+scimark-lu 300
+scimark-sor 5000
+scimark-sparse 5e3
+series 1500
+spectral-norm 1000
+sum-file 1000 SUMCOL_1000
diff --git a/perf/LuaJIT-benches/PARAM_mips.txt b/perf/LuaJIT-benches/PARAM_mips.txt
new file mode 100644
index 00000000..e6bcadba
--- /dev/null
+++ b/perf/LuaJIT-benches/PARAM_mips.txt
@@ -0,0 +1,29 @@
+array3d 50
+binary-trees 10
+chameneos 5e4
+coroutine-ring 2e5
+euler14-bit 2e4
+fannkuch 8
+fasta 2e4
+k-nucleotide 1e4 FASTA_10000
+life
+mandelbrot 150
+mandelbrot-bit 150
+md5 10
+nbody 1e4
+nsieve 4
+nsieve-bit 4
+nsieve-bit-fp 2
+partialsums 5e4
+pidigits-nogmp 150
+ray 2
+recursive-ack 7
+recursive-fib 29
+revcomp 5e4 FASTA_50000
+scimark-fft 20
+scimark-lu 3
+scimark-sor 40
+scimark-sparse 100
+series 50
+spectral-norm 100
+sum-file 100 SUMCOL_100
diff --git a/perf/LuaJIT-benches/PARAM_ppc.txt b/perf/LuaJIT-benches/PARAM_ppc.txt
new file mode 100644
index 00000000..c8319a15
--- /dev/null
+++ b/perf/LuaJIT-benches/PARAM_ppc.txt
@@ -0,0 +1,29 @@
+array3d 200
+binary-trees 13
+chameneos 1e6
+coroutine-ring 4e6
+euler14-bit 1e6
+fannkuch 9
+fasta 5e5
+k-nucleotide 1e5 FASTA_100000
+life
+mandelbrot 800
+mandelbrot-bit 800
+md5 500
+nbody 1e5
+nsieve 8
+nsieve-bit 8
+nsieve-bit-fp 8
+partialsums 5e5
+pidigits-nogmp 800
+ray 5
+recursive-ack 9
+recursive-fib 34
+revcomp 1e6 FASTA_1000000
+scimark-fft 500
+scimark-lu 100
+scimark-sor 1000
+scimark-sparse 3000
+series 1000
+spectral-norm 200
+sum-file 1000 SUMCOL_1000
diff --git a/perf/LuaJIT-benches/PARAM_x86.txt b/perf/LuaJIT-benches/PARAM_x86.txt
new file mode 100644
index 00000000..87088d7b
--- /dev/null
+++ b/perf/LuaJIT-benches/PARAM_x86.txt
@@ -0,0 +1,29 @@
+array3d 300
+binary-trees 16
+chameneos 1e7
+coroutine-ring 2e7
+euler14-bit 2e7
+fannkuch 11
+fasta 25e6
+k-nucleotide 5e6 FASTA_5000000
+life
+mandelbrot 5000
+mandelbrot-bit 5000
+md5 20000
+nbody 5e6
+nsieve 12
+nsieve-bit 12
+nsieve-bit-fp 12
+partialsums 1e7
+pidigits-nogmp 5000
+ray 9
+recursive-ack 10
+recursive-fib 40
+revcomp 5e6 FASTA_5000000
+scimark-fft 50000
+scimark-lu 5000
+scimark-sor 50000
+scimark-sparse 15e4
+series 10000
+spectral-norm 3000
+sum-file 5000 SUMCOL_5000
diff --git a/perf/LuaJIT-benches/SUMCOL_1.txt b/perf/LuaJIT-benches/SUMCOL_1.txt
new file mode 100644
index 00000000..956aba14
--- /dev/null
+++ b/perf/LuaJIT-benches/SUMCOL_1.txt
@@ -0,0 +1,1000 @@
+276
+498
+-981
+770
+-401
+702
+966
+950
+-853
+-53
+-293
+604
+288
+892
+-697
+204
+96
+408
+880
+-7
+-817
+422
+-261
+-485
+-77
+826
+184
+864
+-751
+626
+812
+-369
+-353
+-371
+488
+-83
+-659
+24
+524
+-21
+840
+-757
+-17
+-973
+-843
+260
+858
+-389
+-521
+-99
+482
+-561
+-213
+630
+766
+932
+112
+-419
+-877
+762
+266
+-837
+170
+834
+746
+764
+922
+-89
+576
+-63
+90
+684
+316
+506
+-959
+708
+70
+252
+-747
+342
+-593
+-895
+-937
+-707
+350
+588
+-201
+-683
+-113
+-511
+-867
+322
+202
+472
+150
+-9
+-643
+28
+336
+86
+-925
+836
+-473
+-451
+-971
+-805
+-619
+84
+-67
+806
+270
+366
+334
+-555
+-557
+-331
+-409
+-553
+-145
+-71
+528
+490
+492
+828
+628
+-961
+536
+-859
+-271
+974
+-671
+-749
+414
+-257
+778
+56
+598
+-437
+-899
+-785
+-987
+32
+-999
+132
+-821
+-209
+402
+-543
+194
+-967
+294
+-943
+-285
+-483
+-97
+660
+-481
+-829
+-309
+-597
+-855
+80
+-355
+192
+-823
+436
+916
+282
+-629
+612
+-329
+-535
+780
+-47
+706
+110
+756
+-857
+-933
+-345
+-523
+718
+-31
+902
+678
+540
+698
+456
+-399
+126
+412
+-563
+-321
+-487
+-641
+-195
+-199
+-955
+772
+570
+18
+-217
+886
+984
+-721
+-995
+46
+-989
+946
+64
+716
+-719
+-869
+-579
+776
+450
+936
+980
+-439
+-977
+-455
+-997
+6
+268
+-269
+-421
+328
+352
+578
+-575
+476
+976
+-57
+-469
+544
+582
+-43
+510
+-939
+-581
+-337
+-203
+-737
+-827
+852
+-279
+-803
+-911
+-865
+548
+48
+-75
+416
+-275
+688
+-255
+-687
+-461
+-233
+420
+912
+-901
+-299
+12
+568
+694
+-411
+-883
+-327
+-361
+-339
+646
+-137
+-905
+670
+686
+-131
+-849
+-825
+256
+228
+-841
+68
+368
+-909
+242
+298
+118
+10
+222
+954
+-493
+-459
+-445
+608
+-765
+34
+468
+-715
+690
+-185
+-551
+-571
+-241
+292
+92
+768
+-923
+956
+614
+8
+730
+208
+-417
+300
+136
+-59
+-251
+-539
+166
+798
+866
+454
+-391
+-317
+668
+502
+-15
+994
+854
+-189
+666
+446
+-565
+-5
+42
+-227
+-87
+-779
+26
+312
+354
+754
+396
+-515
+220
+872
+654
+88
+-667
+250
+572
+952
+72
+982
+972
+-529
+-471
+-533
+-427
+538
+154
+-457
+-819
+750
+152
+452
+-41
+838
+-489
+418
+-649
+-637
+-197
+74
+394
+-653
+-727
+-435
+-23
+348
+638
+-611
+914
+-357
+-743
+-685
+580
+-247
+-577
+54
+-931
+-3
+558
+-793
+-443
+-759
+162
+-811
+384
+720
+-117
+900
+-519
+-39
+744
+432
+286
+-873
+380
+-167
+-283
+430
+-155
+-755
+206
+100
+364
+-677
+332
+-567
+382
+-605
+-181
+676
+-475
+-845
+910
+546
+14
+398
+616
+-769
+424
+992
+-235
+-239
+774
+478
+-919
+168
+-771
+-773
+-69
+-509
+930
+550
+-463
+178
+-861
+-761
+-795
+234
+-831
+-61
+-979
+-851
+-665
+-709
+896
+742
+-123
+590
+-693
+-887
+-379
+144
+-717
+20
+174
+82
+464
+30
+-969
+-349
+-531
+-799
+-661
+-647
+-623
+878
+148
+-545
+238
+-259
+554
+726
+-37
+-797
+98
+78
+-591
+-975
+962
+120
+906
+-207
+656
+-171
+652
+188
+672
+-133
+-91
+224
+818
+-333
+-839
+-499
+22
+-739
+142
+378
+-403
+-315
+370
+284
+122
+230
+-527
+-127
+442
+534
+160
+722
+262
+-657
+304
+258
+-103
+960
+-495
+-265
+634
+-101
+480
+-363
+308
+76
+-949
+-585
+904
+146
+-703
+164
+850
+246
+732
+-725
+566
+274
+-163
+-935
+-681
+-229
+254
+-733
+-547
+-273
+-903
+736
+-711
+794
+392
+-655
+-549
+808
+-429
+484
+-701
+-617
+804
+36
+-775
+-335
+-927
+714
+-177
+-325
+-413
+-963
+114
+-253
+-789
+-645
+40
+434
+898
+924
+-19
+738
+788
+280
+-121
+594
+-913
+426
+816
+-373
+-45
+340
+-109
+-323
+58
+-249
+940
+-297
+988
+998
+-607
+-745
+-633
+-115
+996
+-893
+696
+400
+848
+500
+-263
+562
+-807
+-105
+-603
+658
+-73
+-863
+448
+680
+-157
+-161
+728
+814
+-477
+-375
+1000
+-631
+-991
+362
+156
+-187
+-705
+-917
+-449
+-741
+556
+440
+-589
+-11
+-359
+-891
+-801
+-153
+-381
+938
+-173
+-243
+618
+-599
+-497
+486
+128
+790
+460
+-27
+-305
+-205
+-215
+324
+-341
+50
+458
+52
+-621
+874
+386
+560
+-569
+-51
+802
+786
+920
+-425
+466
+444
+-507
+-915
+346
+622
+-679
+784
+-689
+388
+508
+-613
+-313
+-447
+564
+-897
+-211
+-225
+-615
+-367
+186
+894
+-65
+-453
+-245
+602
+496
+-651
+-601
+820
+226
+-695
+-119
+372
+180
+94
+214
+542
+648
+-871
+592
+584
+824
+796
+374
+-945
+-311
+516
+942
+-221
+-433
+200
+-465
+-953
+870
+868
+-879
+518
+356
+-223
+682
+990
+-191
+-541
+-951
+-921
+-319
+-169
+-291
+-289
+792
+876
+306
+-491
+326
+-885
+62
+514
+-929
+318
+-231
+632
+44
+-107
+644
+-267
+-343
+-847
+934
+734
+-505
+-351
+574
+-627
+636
+-93
+-431
+-835
+428
+-183
+-151
+2
+-813
+-595
+958
+-141
+692
+-385
+610
+-179
+376
+948
+198
+-675
+964
+-907
+918
+-165
+-1
+406
+748
+-111
+532
+-55
+-281
+740
+504
+236
+-29
+662
+-713
+-537
+196
+-587
+822
+-135
+700
+-35
+674
+-407
+240
+-673
+-669
+-393
+470
+-525
+-875
+-383
+-625
+296
+-85
+-147
+-277
+800
+-691
+-143
+16
+-983
+-303
+290
+-139
+172
+320
+512
+596
+640
+664
+-791
+-783
+-387
+-735
+-467
+-301
+810
+134
+216
+278
+176
+606
+140
+-787
+978
+586
+890
+882
+-753
+-13
+970
+-941
+-175
+-777
+-809
+-441
+-347
+-377
+390
+-423
+842
+642
+190
+302
+438
+704
+310
+-49
+124
+-781
+-287
+724
+-767
+830
+620
+-295
+244
+-159
+-307
+-397
+66
+-237
+314
+-79
+624
+710
+272
+-365
+928
+856
+138
+-479
+520
+832
+862
+760
+846
+-81
+106
+-513
+-193
+650
+782
+-517
+944
+218
+712
+-663
+-559
+462
+-635
+-25
+182
+530
+844
+330
+-833
+102
+-881
+108
+-947
+-763
+-405
+232
+410
+104
+-729
+-149
+-889
+888
+360
+968
+908
+116
+-815
+-129
+522
+-723
+-993
+860
+-503
+926
+-219
+-415
+60
+158
+-609
+-501
+986
+-699
+-583
+884
+212
+210
+-957
+526
+-985
+552
+344
+-395
+-95
+338
+248
+494
+130
+404
+358
+600
+-639
+-125
+-33
+-965
+752
+474
+-731
+758
+-573
+4
+38
+264
diff --git a/perf/LuaJIT-benches/TEST_md5sum.txt b/perf/LuaJIT-benches/TEST_md5sum.txt
new file mode 100644
index 00000000..15aa8a1c
--- /dev/null
+++ b/perf/LuaJIT-benches/TEST_md5sum.txt
@@ -0,0 +1,20 @@
+binarytrees	10	7202f4e13df7abc5ad8c07f05fe9d644
+chameneos	1e5	a629ce12f63050c6656bce175258cf8f
+cheapconcr	1000	d29799d1e263810a4db7bbf43ca66499
+cheapconcw	1000	d29799d1e263810a4db7bbf43ca66499
+fannkuch	8	51e5e372cbc5471ea8940b20ad782319
+fasta	1e5	78cd327de6f0a5667da0aa9349888279
+knucleotide	x	88efb24c1fed533959ed84bb32c88142 <FASTA_10000
+mandelbrot	200	cc65e64bd553ed18896de1dfe7fae3e5
+meteor	3000	9a65bb4b0a735ace1eaa4f2628f01026
+nbody	1e4	e0361c898ba747117ec177f7b3b3359c
+nsieve	4	767e02c93624995732e151932fa5f304
+nsievebits	4	767e02c93624995732e151932fa5f304
+partialsums	1e5	33efb41c72f8ecfb5b36c99e32189a3f
+pidigits	200	173a11a77bb1e72dd31254a760317428
+recursive	4	07a47c2d2cf50503b16efda789f84916
+regexdna	x	fdf3e6e9c599754e1eec3e524ea13fed <FASTA_10000
+revcomp	x	47de276e2f72519b57b82da39f4c7592 <FASTA_10000
+spectralnorm 200	25f44bd552ccd9faa0ee2ae5617947e2
+sumfile	x	2ebd3caa45b31a2e74e436b645eab4b0 <SUMCOL_100
+
diff --git a/perf/LuaJIT-benches/array3d.lua b/perf/LuaJIT-benches/array3d.lua
new file mode 100644
index 00000000..c10b09b1
--- /dev/null
+++ b/perf/LuaJIT-benches/array3d.lua
@@ -0,0 +1,59 @@
+
+local function array_set(self, x, y, z, p)
+  assert(x >= 0 and x < self.nx, "x outside PA")
+  assert(y >= 0 and y < self.ny, "y outside PA")
+  assert(z >= 0 and z < self.nz, "z outside PA")
+  local pos = (z*self.ny + y)*self.nx + x
+  local image = self.image
+  if self.packed then
+    local maxv = self.max_voltage
+    if p > maxv then self.max_voltage = p*2.0 end
+    local oldp = image[pos] or 0.0 -- Works with uninitialized table, too
+    if oldp > maxv then p = p + maxv*2.0 end
+    image[pos] = p
+  else
+    image[pos] = p
+  end
+  self.changed = true
+  self.changed_recently = true
+end
+
+local function array_points(self)
+  local y, z = 0, 0
+  return function(self, x)
+    x = x + 1
+    if x >= self.nx then
+      x = 0
+      y = y + 1
+      if y >= self.ny then
+	y = 0
+	z = z + 1
+	if z >= self.nz then
+	  return nil, nil, nil
+	end
+      end
+    end
+    return x, y, z
+  end, self, 0
+end
+
+local function array_new(nx, ny, nz, packed)
+  return {
+    nx = nx, ny = ny, nz = nz,
+    packed = packed, max_voltage = 0.0,
+    changed = false, changed_recently = false,
+    image = {}, -- Preferably use a fixed-type, pre-sized array here.
+    set = array_set,
+    points = array_points,
+  }
+end
+
+local dim = tonumber(arg and arg[1]) or 300 -- Array dimension dim^3
+local packed = arg and arg[2] == "packed"   -- Packed image or flat
+local arr = array_new(dim, dim, dim, packed)
+
+for x,y,z in arr:points() do
+  arr:set(x, y, z, x*x)
+end
+assert(arr.image[dim^3-1] == (dim-1)^2)
+
diff --git a/perf/LuaJIT-benches/binary-trees.lua b/perf/LuaJIT-benches/binary-trees.lua
new file mode 100644
index 00000000..bf040466
--- /dev/null
+++ b/perf/LuaJIT-benches/binary-trees.lua
@@ -0,0 +1,47 @@
+
+local function BottomUpTree(item, depth)
+  if depth > 0 then
+    local i = item + item
+    depth = depth - 1
+    local left, right = BottomUpTree(i-1, depth), BottomUpTree(i, depth)
+    return { item, left, right }
+  else
+    return { item }
+  end
+end
+
+local function ItemCheck(tree)
+  if tree[2] then
+    return tree[1] + ItemCheck(tree[2]) - ItemCheck(tree[3])
+  else
+    return tree[1]
+  end
+end
+
+local N = tonumber(arg and arg[1]) or 0
+local mindepth = 4
+local maxdepth = mindepth + 2
+if maxdepth < N then maxdepth = N end
+
+do
+  local stretchdepth = maxdepth + 1
+  local stretchtree = BottomUpTree(0, stretchdepth)
+  io.write(string.format("stretch tree of depth %d\t check: %d\n",
+    stretchdepth, ItemCheck(stretchtree)))
+end
+
+local longlivedtree = BottomUpTree(0, maxdepth)
+
+for depth=mindepth,maxdepth,2 do
+  local iterations = 2 ^ (maxdepth - depth + mindepth)
+  local check = 0
+  for i=1,iterations do
+    check = check + ItemCheck(BottomUpTree(1, depth)) +
+            ItemCheck(BottomUpTree(-1, depth))
+  end
+  io.write(string.format("%d\t trees of depth %d\t check: %d\n",
+    iterations*2, depth, check))
+end
+
+io.write(string.format("long lived tree of depth %d\t check: %d\n",
+  maxdepth, ItemCheck(longlivedtree)))
diff --git a/perf/LuaJIT-benches/chameneos.lua b/perf/LuaJIT-benches/chameneos.lua
new file mode 100644
index 00000000..78b64c3f
--- /dev/null
+++ b/perf/LuaJIT-benches/chameneos.lua
@@ -0,0 +1,68 @@
+
+local co = coroutine
+local create, resume, yield = co.create, co.resume, co.yield
+
+local N = tonumber(arg and arg[1]) or 10
+local first, second
+
+-- Meet another creature.
+local function meet(me)
+  while second do yield() end -- Wait until meeting place clears.
+  local other = first
+  if other then -- Hey, I found a new friend!
+    first = nil
+    second = me
+  else -- Sniff, nobody here (yet).
+    local n = N - 1
+    if n < 0 then return end -- Uh oh, the mall is closed.
+    N = n
+    first = me
+    repeat yield(); other = second until other -- Wait for another creature.
+    second = nil
+    yield() -- Be nice and let others meet up.
+  end
+  return other
+end
+
+-- Create a very social creature.
+local function creature(color)
+  return create(function()
+    local me = color
+    for met=0,1000000000 do
+      local other = meet(me)
+      if not other then return met end
+      if me ~= other then
+        if me == "blue" then me = other == "red" and "yellow" or "red"
+        elseif me == "red" then me = other == "blue" and "yellow" or "blue"
+        else me = other == "blue" and "red" or "blue" end
+      end
+    end
+  end)
+end
+
+-- Trivial round-robin scheduler.
+local function schedule(threads)
+  local resume = resume
+  local nthreads, meetings = #threads, 0
+  repeat
+    for i=1,nthreads do
+      local thr = threads[i]
+      if not thr then return meetings end
+      local ok, met = resume(thr)
+      if met then
+        meetings = meetings + met
+        threads[i] = nil
+      end
+    end
+  until false
+end
+
+-- A bunch of colorful creatures.
+local threads = {
+  creature("blue"),
+  creature("red"),
+  creature("yellow"),
+  creature("blue"),
+}
+
+io.write(schedule(threads), "\n")
diff --git a/perf/LuaJIT-benches/coroutine-ring.lua b/perf/LuaJIT-benches/coroutine-ring.lua
new file mode 100644
index 00000000..1e8c5ef6
--- /dev/null
+++ b/perf/LuaJIT-benches/coroutine-ring.lua
@@ -0,0 +1,42 @@
+-- The Computer Language Benchmarks Game
+-- http://shootout.alioth.debian.org/
+-- contributed by Sam Roberts
+-- reviewed by Bruno Massa
+
+local n         = tonumber(arg and arg[1]) or 2e7
+
+-- fixed size pool
+local poolsize  = 503
+local threads   = {}
+
+-- cache these to avoid global environment lookups
+local create    = coroutine.create
+local resume    = coroutine.resume
+local yield     = coroutine.yield
+
+local id        = 1
+local token     = 0
+local ok
+
+local body = function(token)
+  while true do
+    token = yield(token + 1)
+  end
+end
+
+-- create all threads
+for id = 1, poolsize do
+  threads[id] = create(body)
+end
+
+-- send the token
+repeat
+  if id == poolsize then
+    id = 1
+  else
+    id = id + 1
+  end
+  ok, token = resume(threads[id], token)
+until token == n
+
+io.write(id, "\n")
diff --git a/perf/LuaJIT-benches/euler14-bit.lua b/perf/LuaJIT-benches/euler14-bit.lua
new file mode 100644
index 00000000..537f2bf3
--- /dev/null
+++ b/perf/LuaJIT-benches/euler14-bit.lua
@@ -0,0 +1,22 @@
+
+local bit = require("bit")
+local bnot, bor, band = bit.bnot, bit.bor, bit.band
+local shl, shr = bit.lshift, bit.rshift
+
+local N = tonumber(arg and arg[1]) or 10000000
+local cache, m, n = { 1 }, 1, 1
+if arg and arg[2] then cache = nil end
+for i=2,N do
+  local j = i
+  for len=1,1000000000 do
+    j = bor(band(shr(j,1), band(j,1)-1), band(shl(j,1)+j+1, bnot(band(j,1)-1)))
+    if cache then
+      local x = cache[j]; if x then j = x+len; break end
+    elseif j == 1 then
+      j = len+1; break
+    end
+  end
+  if cache then cache[i] = j end
+  if j > m then m, n = j, i end
+end
+io.write("Found ", n, " (chain length: ", m, ")\n")
diff --git a/perf/LuaJIT-benches/fannkuch.lua b/perf/LuaJIT-benches/fannkuch.lua
new file mode 100644
index 00000000..2a4cd426
--- /dev/null
+++ b/perf/LuaJIT-benches/fannkuch.lua
@@ -0,0 +1,50 @@
+
+local function fannkuch(n)
+  local p, q, s, odd, check, maxflips = {}, {}, {}, true, 0, 0
+  for i=1,n do p[i] = i; q[i] = i; s[i] = i end
+  repeat
+    -- Print max. 30 permutations.
+    if check < 30 then
+      if not p[n] then return maxflips end	-- Catch n = 0, 1, 2.
+      io.write(unpack(p)); io.write("\n")
+      check = check + 1
+    end
+    -- Copy and flip.
+    local q1 = p[1]				-- Cache 1st element.
+    if p[n] ~= n and q1 ~= 1 then		-- Avoid useless work.
+      for i=2,n do q[i] = p[i] end		-- Work on a copy.
+      local flips = 1			-- Flip ...
+      while true do
+	local qq = q[q1]
+	if qq == 1 then				-- ... until 1st element is 1.
+	  if flips > maxflips then maxflips = flips end -- New maximum?
+	  break
+	end
+	q[q1] = q1
+	if q1 >= 4 then
+	  local i, j = 2, q1 - 1
+	  repeat q[i], q[j] = q[j], q[i]; i = i + 1; j = j - 1; until i >= j
+	end
+	q1 = qq
+	flips=flips+1
+      end
+    end
+    -- Permute.
+    if odd then
+      p[2], p[1] = p[1], p[2]; odd = false	-- Rotate 1<-2.
+    else
+      p[2], p[3] = p[3], p[2]; odd = true	-- Rotate 1<-2 and 1<-2<-3.
+      for i=3,n do
+	local sx = s[i]
+	if sx ~= 1 then s[i] = sx-1; break end
+	if i == n then return maxflips end	-- Out of permutations.
+	s[i] = i
+	-- Rotate 1<-...<-i+1.
+	local t=p[1]; for j=i+1,1,-1 do p[j],t=t,p[j] end
+      end
+    end
+  until false
+end
+
+local n = tonumber(arg and arg[1]) or 1
+io.write("Pfannkuchen(", n, ") = ", fannkuch(n), "\n")
diff --git a/perf/LuaJIT-benches/fasta.lua b/perf/LuaJIT-benches/fasta.lua
new file mode 100644
index 00000000..7ce60804
--- /dev/null
+++ b/perf/LuaJIT-benches/fasta.lua
@@ -0,0 +1,95 @@
+
+local Last = 42
+local function random(max)
+  local y = (Last * 3877 + 29573) % 139968
+  Last = y
+  return (max * y) / 139968
+end
+
+local function make_repeat_fasta(id, desc, s, n)
+  local write, sub = io.write, string.sub
+  write(">", id, " ", desc, "\n")
+  local p, sn, s2 = 1, #s, s..s
+  for i=60,n,60 do
+    write(sub(s2, p, p + 59), "\n")
+    p = p + 60; if p > sn then p = p - sn end
+  end
+  local tail = n % 60
+  if tail > 0 then write(sub(s2, p, p + tail-1), "\n") end
+end
+
+local function make_random_fasta(id, desc, bs, n)
+  io.write(">", id, " ", desc, "\n")
+  loadstring([=[
+    local write, char, unpack, n, random = io.write, string.char, unpack, ...
+    local buf, p = {}, 1
+    for i=60,n,60 do
+      for j=p,p+59 do ]=]..bs..[=[ end
+      buf[p+60] = 10; p = p + 61
+      if p >= 2048 then write(char(unpack(buf, 1, p-1))); p = 1 end
+    end
+    local tail = n % 60
+    if tail > 0 then
+      for j=p,p+tail-1 do ]=]..bs..[=[ end
+      p = p + tail; buf[p] = 10; p = p + 1
+    end
+    write(char(unpack(buf, 1, p-1)))
+  ]=], desc)(n, random)
+end
+
+local function bisect(c, p, lo, hi)
+  local n = hi - lo
+  if n == 0 then return "buf[j] = "..c[hi].."\n" end
+  local mid = math.floor(n / 2)
+  return "if r < "..p[lo+mid].." then\n"..bisect(c, p, lo, lo+mid)..
+         "else\n"..bisect(c, p, lo+mid+1, hi).."end\n"
+end
+
+local function make_bisect(tab)
+  local c, p, sum = {}, {}, 0
+  for i,row in ipairs(tab) do
+    c[i] = string.byte(row[1])
+    sum = sum + row[2]
+    p[i] = sum
+  end
+  return "local r = random(1)\n"..bisect(c, p, 1, #tab)
+end
+
+local alu =
+  "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"..
+  "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"..
+  "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"..
+  "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"..
+  "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"..
+  "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"..
+  "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
+
+local iub = make_bisect{
+  { "a", 0.27 },
+  { "c", 0.12 },
+  { "g", 0.12 },
+  { "t", 0.27 },
+  { "B", 0.02 },
+  { "D", 0.02 },
+  { "H", 0.02 },
+  { "K", 0.02 },
+  { "M", 0.02 },
+  { "N", 0.02 },
+  { "R", 0.02 },
+  { "S", 0.02 },
+  { "V", 0.02 },
+  { "W", 0.02 },
+  { "Y", 0.02 },
+}
+
+local homosapiens = make_bisect{
+  { "a", 0.3029549426680 },
+  { "c", 0.1979883004921 },
+  { "g", 0.1975473066391 },
+  { "t", 0.3015094502008 },
+}
+
+local N = tonumber(arg and arg[1]) or 1000
+make_repeat_fasta('ONE', 'Homo sapiens alu', alu, N*2)
+make_random_fasta('TWO', 'IUB ambiguity codes', iub, N*3)
+make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, N*5)
diff --git a/perf/LuaJIT-benches/k-nucleotide.lua b/perf/LuaJIT-benches/k-nucleotide.lua
new file mode 100644
index 00000000..0bfb41be
--- /dev/null
+++ b/perf/LuaJIT-benches/k-nucleotide.lua
@@ -0,0 +1,58 @@
+
+local function kfrequency(seq, freq, k, frame)
+  local sub = string.sub
+  local k1 = k - 1
+  for i=frame,#seq-k1,k do
+    local c = sub(seq, i, i+k1)
+    freq[c] = (freq[c] or 0) + 1
+  end
+end
+
+local function count(seq, frag)
+  local k = #frag
+  local freq = {}
+  for frame=1,k do kfrequency(seq, freq, k, frame) end
+  io.write(freq[frag] or 0, "\t", frag, "\n")
+end
+
+local function frequency(seq, k)
+  local freq = {}
+  for frame=1,k do kfrequency(seq, freq, k, frame) end
+  local sfreq, sn, sum = {}, 1, 0
+  for c,v in pairs(freq) do sfreq[sn] = c; sn = sn + 1; sum = sum + v end
+  table.sort(sfreq, function(a, b)
+    local fa, fb = freq[a], freq[b]
+    return fa == fb and a > b or fa > fb
+  end)
+  for _,c in ipairs(sfreq) do
+    io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum))
+  end
+  io.write("\n")
+end
+
+local function readseq()
+  local sub = string.sub
+  for line in io.lines() do
+    if sub(line, 1, 1) == ">" and sub(line, 2, 6) == "THREE" then break end
+  end
+  local lines, ln = {}, 0
+  for line in io.lines() do
+    local c = sub(line, 1, 1)
+    if c == ">" then
+      break
+    elseif c ~= ";" then
+      ln = ln + 1
+      lines[ln] = line
+    end
+  end
+  return string.upper(table.concat(lines, "", 1, ln))
+end
+
+local seq = readseq()
+frequency(seq, 1)
+frequency(seq, 2)
+count(seq, "GGT")
+count(seq, "GGTA")
+count(seq, "GGTATT")
+count(seq, "GGTATTTTAATT")
+count(seq, "GGTATTTTAATTTATAGT")
diff --git a/perf/LuaJIT-benches/life.lua b/perf/LuaJIT-benches/life.lua
new file mode 100644
index 00000000..911d9fe1
--- /dev/null
+++ b/perf/LuaJIT-benches/life.lua
@@ -0,0 +1,111 @@
+-- life.lua
+-- original by Dave Bollinger <DBollinger@compuserve.com> posted to lua-l
+-- modified to use ANSI terminal escape sequences
+-- modified to use for instead of while
+
+local write=io.write
+
+ALIVE="�"	DEAD="�"
+ALIVE="O"	DEAD="-"
+
+function delay() -- NOTE: SYSTEM-DEPENDENT, adjust as necessary
+  for i=1,10000 do end
+  -- local i=os.clock()+1 while(os.clock()<i) do end
+end
+
+function ARRAY2D(w,h)
+  local t = {w=w,h=h}
+  for y=1,h do
+    t[y] = {}
+    for x=1,w do
+      t[y][x]=0
+    end
+  end
+  return t
+end
+
+_CELLS = {}
+
+-- give birth to a "shape" within the cell array
+function _CELLS:spawn(shape,left,top)
+  for y=0,shape.h-1 do
+    for x=0,shape.w-1 do
+      self[top+y][left+x] = shape[y*shape.w+x+1]
+    end
+  end
+end
+
+-- run the CA and produce the next generation
+function _CELLS:evolve(next)
+  local ym1,y,yp1,yi=self.h-1,self.h,1,self.h
+  while yi > 0 do
+    local xm1,x,xp1,xi=self.w-1,self.w,1,self.w
+    while xi > 0 do
+      local sum = self[ym1][xm1] + self[ym1][x] + self[ym1][xp1] +
+                  self[y][xm1] + self[y][xp1] +
+                  self[yp1][xm1] + self[yp1][x] + self[yp1][xp1]
+      next[y][x] = ((sum==2) and self[y][x]) or ((sum==3) and 1) or 0
+      xm1,x,xp1,xi = x,xp1,xp1+1,xi-1
+    end
+    ym1,y,yp1,yi = y,yp1,yp1+1,yi-1
+  end
+end
+
+-- output the array to screen
+function _CELLS:draw()
+  local out="" -- accumulate to reduce flicker
+  for y=1,self.h do
+   for x=1,self.w do
+      out=out..(((self[y][x]>0) and ALIVE) or DEAD)
+    end
+    out=out.."\n"
+  end
+  write(out)
+end
+
+-- constructor
+function CELLS(w,h)
+  local c = ARRAY2D(w,h)
+  c.spawn = _CELLS.spawn
+  c.evolve = _CELLS.evolve
+  c.draw = _CELLS.draw
+  return c
+end
+
+--
+-- shapes suitable for use with spawn() above
+--
+HEART = { 1,0,1,1,0,1,1,1,1; w=3,h=3 }
+GLIDER = { 0,0,1,1,0,1,0,1,1; w=3,h=3 }
+EXPLODE = { 0,1,0,1,1,1,1,0,1,0,1,0; w=3,h=4 }
+FISH = { 0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,1,0; w=5,h=4 }
+BUTTERFLY = { 1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1; w=5,h=5 }
+
+-- the main routine
+function LIFE(w,h)
+  -- create two arrays
+  local thisgen = CELLS(w,h)
+  local nextgen = CELLS(w,h)
+
+  -- create some life
+  -- about 1000 generations of fun, then a glider steady-state
+  thisgen:spawn(GLIDER,5,4)
+  thisgen:spawn(EXPLODE,25,10)
+  thisgen:spawn(FISH,4,12)
+
+  -- run until break
+  local gen=1
+  write("\027[2J")	-- ANSI clear screen
+  while 1 do
+    thisgen:evolve(nextgen)
+    thisgen,nextgen = nextgen,thisgen
+    write("\027[H")	-- ANSI home cursor
+    thisgen:draw()
+    write("Life - generation ",gen,"\n")
+    gen=gen+1
+    if gen>2000 then break end
+    --delay()		-- no delay
+  end
+end
+
+LIFE(40,20)
diff --git a/perf/LuaJIT-benches/mandelbrot-bit.lua b/perf/LuaJIT-benches/mandelbrot-bit.lua
new file mode 100644
index 00000000..91d96975
--- /dev/null
+++ b/perf/LuaJIT-benches/mandelbrot-bit.lua
@@ -0,0 +1,33 @@
+
+local bit = require("bit")
+local bor, band = bit.bor, bit.band
+local shl, shr, rol = bit.lshift, bit.rshift, bit.rol
+local write, char, unpack = io.write, string.char, unpack
+local N = tonumber(arg and arg[1]) or 100
+local M, buf = 2/N, {}
+write("P4\n", N, " ", N, "\n")
+for y=0,N-1 do
+  local Ci, b, p = y*M-1, -16777216, 0
+  local Ciq = Ci*Ci
+  for x=0,N-1,2 do
+    local Cr, Cr2 = x*M-1.5, (x+1)*M-1.5
+    local Zr, Zi, Zrq, Ziq = Cr, Ci, Cr*Cr, Ciq
+    local Zr2, Zi2, Zrq2, Ziq2 = Cr2, Ci, Cr2*Cr2, Ciq
+    b = rol(b, 2)
+    for i=1,49 do
+      Zi = Zr*Zi*2 + Ci; Zi2 = Zr2*Zi2*2 + Ci
+      Zr = Zrq-Ziq + Cr; Zr2 = Zrq2-Ziq2 + Cr2
+      Ziq = Zi*Zi; Ziq2 = Zi2*Zi2
+      Zrq = Zr*Zr; Zrq2 = Zr2*Zr2
+      if band(b, 2) ~= 0 and Zrq+Ziq > 4.0 then b = band(b, -3) end
+      if band(b, 1) ~= 0 and Zrq2+Ziq2 > 4.0 then b = band(b, -2) end
+      if band(b, 3) == 0 then break end
+    end
+    if b >= 0 then p = p + 1; buf[p] = b; b = -16777216; end
+  end
+  if b ~= -16777216 then
+    if band(N, 1) ~= 0 then b = shr(b, 1) end
+    p = p + 1; buf[p] = shl(b, 8-band(N, 7))
+  end
+  write(char(unpack(buf, 1, p)))
+end
diff --git a/perf/LuaJIT-benches/mandelbrot.lua b/perf/LuaJIT-benches/mandelbrot.lua
new file mode 100644
index 00000000..0ef595a2
--- /dev/null
+++ b/perf/LuaJIT-benches/mandelbrot.lua
@@ -0,0 +1,23 @@
+
+local write, char, unpack = io.write, string.char, unpack
+local N = tonumber(arg and arg[1]) or 100
+local M, ba, bb, buf = 2/N, 2^(N%8+1)-1, 2^(8-N%8), {}
+write("P4\n", N, " ", N, "\n")
+for y=0,N-1 do
+  local Ci, b, p = y*M-1, 1, 0
+  for x=0,N-1 do
+    local Cr = x*M-1.5
+    local Zr, Zi, Zrq, Ziq = Cr, Ci, Cr*Cr, Ci*Ci
+    b = b + b
+    for i=1,49 do
+      Zi = Zr*Zi*2 + Ci
+      Zr = Zrq-Ziq + Cr
+      Ziq = Zi*Zi
+      Zrq = Zr*Zr
+      if Zrq+Ziq > 4.0 then b = b + 1; break; end
+    end
+    if b >= 256 then p = p + 1; buf[p] = 511 - b; b = 1; end
+  end
+  if b ~= 1 then p = p + 1; buf[p] = (ba-b)*bb; end
+  write(char(unpack(buf, 1, p)))
+end
diff --git a/perf/LuaJIT-benches/md5.lua b/perf/LuaJIT-benches/md5.lua
new file mode 100644
index 00000000..fdf6b4a7
--- /dev/null
+++ b/perf/LuaJIT-benches/md5.lua
@@ -0,0 +1,183 @@
+
+local bit = require("bit")
+local tobit, tohex, bnot = bit.tobit or bit.cast, bit.tohex, bit.bnot
+local bor, band, bxor = bit.bor, bit.band, bit.bxor
+local lshift, rshift, rol, bswap = bit.lshift, bit.rshift, bit.rol, bit.bswap
+local byte, char, sub, rep = string.byte, string.char, string.sub, string.rep
+
+if not rol then -- Replacement function if rotates are missing.
+  local bor, shl, shr = bit.bor, bit.lshift, bit.rshift
+  function rol(a, b) return bor(shl(a, b), shr(a, 32-b)) end
+end
+
+if not bswap then -- Replacement function if bswap is missing.
+  local bor, band, shl, shr = bit.bor, bit.band, bit.lshift, bit.rshift
+  function bswap(a)
+    return bor(shr(a, 24), band(shr(a, 8), 0xff00),
+	       shl(band(a, 0xff00), 8), shl(a, 24));
+  end
+end
+
+if not tohex then -- (Unreliable) replacement function if tohex is missing.
+  function tohex(a)
+    return string.sub(string.format("%08x", a), -8)
+  end
+end
+
+local function tr_f(a, b, c, d, x, s)
+  return rol(bxor(d, band(b, bxor(c, d))) + a + x, s) + b
+end
+
+local function tr_g(a, b, c, d, x, s)
+  return rol(bxor(c, band(d, bxor(b, c))) + a + x, s) + b
+end
+
+local function tr_h(a, b, c, d, x, s)
+  return rol(bxor(b, c, d) + a + x, s) + b
+end
+
+local function tr_i(a, b, c, d, x, s)
+  return rol(bxor(c, bor(b, bnot(d))) + a + x, s) + b
+end
+
+local function transform(x, a1, b1, c1, d1)
+  local a, b, c, d = a1, b1, c1, d1
+
+  a = tr_f(a, b, c, d, x[ 1] + 0xd76aa478,  7)
+  d = tr_f(d, a, b, c, x[ 2] + 0xe8c7b756, 12)
+  c = tr_f(c, d, a, b, x[ 3] + 0x242070db, 17)
+  b = tr_f(b, c, d, a, x[ 4] + 0xc1bdceee, 22)
+  a = tr_f(a, b, c, d, x[ 5] + 0xf57c0faf,  7)
+  d = tr_f(d, a, b, c, x[ 6] + 0x4787c62a, 12)
+  c = tr_f(c, d, a, b, x[ 7] + 0xa8304613, 17)
+  b = tr_f(b, c, d, a, x[ 8] + 0xfd469501, 22)
+  a = tr_f(a, b, c, d, x[ 9] + 0x698098d8,  7)
+  d = tr_f(d, a, b, c, x[10] + 0x8b44f7af, 12)
+  c = tr_f(c, d, a, b, x[11] + 0xffff5bb1, 17)
+  b = tr_f(b, c, d, a, x[12] + 0x895cd7be, 22)
+  a = tr_f(a, b, c, d, x[13] + 0x6b901122,  7)
+  d = tr_f(d, a, b, c, x[14] + 0xfd987193, 12)
+  c = tr_f(c, d, a, b, x[15] + 0xa679438e, 17)
+  b = tr_f(b, c, d, a, x[16] + 0x49b40821, 22)
+
+  a = tr_g(a, b, c, d, x[ 2] + 0xf61e2562,  5)
+  d = tr_g(d, a, b, c, x[ 7] + 0xc040b340,  9)
+  c = tr_g(c, d, a, b, x[12] + 0x265e5a51, 14)
+  b = tr_g(b, c, d, a, x[ 1] + 0xe9b6c7aa, 20)
+  a = tr_g(a, b, c, d, x[ 6] + 0xd62f105d,  5)
+  d = tr_g(d, a, b, c, x[11] + 0x02441453,  9)
+  c = tr_g(c, d, a, b, x[16] + 0xd8a1e681, 14)
+  b = tr_g(b, c, d, a, x[ 5] + 0xe7d3fbc8, 20)
+  a = tr_g(a, b, c, d, x[10] + 0x21e1cde6,  5)
+  d = tr_g(d, a, b, c, x[15] + 0xc33707d6,  9)
+  c = tr_g(c, d, a, b, x[ 4] + 0xf4d50d87, 14)
+  b = tr_g(b, c, d, a, x[ 9] + 0x455a14ed, 20)
+  a = tr_g(a, b, c, d, x[14] + 0xa9e3e905,  5)
+  d = tr_g(d, a, b, c, x[ 3] + 0xfcefa3f8,  9)
+  c = tr_g(c, d, a, b, x[ 8] + 0x676f02d9, 14)
+  b = tr_g(b, c, d, a, x[13] + 0x8d2a4c8a, 20)
+
+  a = tr_h(a, b, c, d, x[ 6] + 0xfffa3942,  4)
+  d = tr_h(d, a, b, c, x[ 9] + 0x8771f681, 11)
+  c = tr_h(c, d, a, b, x[12] + 0x6d9d6122, 16)
+  b = tr_h(b, c, d, a, x[15] + 0xfde5380c, 23)
+  a = tr_h(a, b, c, d, x[ 2] + 0xa4beea44,  4)
+  d = tr_h(d, a, b, c, x[ 5] + 0x4bdecfa9, 11)
+  c = tr_h(c, d, a, b, x[ 8] + 0xf6bb4b60, 16)
+  b = tr_h(b, c, d, a, x[11] + 0xbebfbc70, 23)
+  a = tr_h(a, b, c, d, x[14] + 0x289b7ec6,  4)
+  d = tr_h(d, a, b, c, x[ 1] + 0xeaa127fa, 11)
+  c = tr_h(c, d, a, b, x[ 4] + 0xd4ef3085, 16)
+  b = tr_h(b, c, d, a, x[ 7] + 0x04881d05, 23)
+  a = tr_h(a, b, c, d, x[10] + 0xd9d4d039,  4)
+  d = tr_h(d, a, b, c, x[13] + 0xe6db99e5, 11)
+  c = tr_h(c, d, a, b, x[16] + 0x1fa27cf8, 16)
+  b = tr_h(b, c, d, a, x[ 3] + 0xc4ac5665, 23)
+
+  a = tr_i(a, b, c, d, x[ 1] + 0xf4292244,  6)
+  d = tr_i(d, a, b, c, x[ 8] + 0x432aff97, 10)
+  c = tr_i(c, d, a, b, x[15] + 0xab9423a7, 15)
+  b = tr_i(b, c, d, a, x[ 6] + 0xfc93a039, 21)
+  a = tr_i(a, b, c, d, x[13] + 0x655b59c3,  6)
+  d = tr_i(d, a, b, c, x[ 4] + 0x8f0ccc92, 10)
+  c = tr_i(c, d, a, b, x[11] + 0xffeff47d, 15)
+  b = tr_i(b, c, d, a, x[ 2] + 0x85845dd1, 21)
+  a = tr_i(a, b, c, d, x[ 9] + 0x6fa87e4f,  6)
+  d = tr_i(d, a, b, c, x[16] + 0xfe2ce6e0, 10)
+  c = tr_i(c, d, a, b, x[ 7] + 0xa3014314, 15)
+  b = tr_i(b, c, d, a, x[14] + 0x4e0811a1, 21)
+  a = tr_i(a, b, c, d, x[ 5] + 0xf7537e82,  6)
+  d = tr_i(d, a, b, c, x[12] + 0xbd3af235, 10)
+  c = tr_i(c, d, a, b, x[ 3] + 0x2ad7d2bb, 15)
+  b = tr_i(b, c, d, a, x[10] + 0xeb86d391, 21)
+
+  return tobit(a+a1), tobit(b+b1), tobit(c+c1), tobit(d+d1)
+end
+
+-- Note: this is copying the original string and NOT particularly fast.
+-- A library for struct unpacking would make this task much easier.
+local function md5(msg)
+  local len = #msg
+  msg = msg.."\128"..rep("\0", 63 - band(len + 8, 63))
+	   ..char(band(lshift(len, 3), 255), band(rshift(len, 5), 255),
+		  band(rshift(len, 13), 255), band(rshift(len, 21), 255))
+	   .."\0\0\0\0"
+  local a, b, c, d = 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476
+  local x, k = {}, 1
+  for i=1,#msg,4 do
+    local m0, m1, m2, m3 = byte(msg, i, i+3)
+    x[k] = bor(m0, lshift(m1, 8), lshift(m2, 16), lshift(m3, 24))
+    if k == 16 then
+      a, b, c, d = transform(x, a, b, c, d)
+      k = 1
+    else
+      k = k + 1
+    end
+  end
+  return tohex(bswap(a))..tohex(bswap(b))..tohex(bswap(c))..tohex(bswap(d))
+end
+
+assert(md5('') == 'd41d8cd98f00b204e9800998ecf8427e')
+assert(md5('a') == '0cc175b9c0f1b6a831c399e269772661')
+assert(md5('abc') == '900150983cd24fb0d6963f7d28e17f72')
+assert(md5('message digest') == 'f96b697d7cb7938d525a2f31aaf161d0')
+assert(md5('abcdefghijklmnopqrstuvwxyz') == 'c3fcd3d76192e4007dfb496cca67e13b')
+assert(md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789') ==
+       'd174ab98d277d9f5a5611c2c9f419d9f')
+assert(md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890') ==
+       '57edf4a22be3c955ac49da2e2107b67a')
+
+local N = tonumber(arg and arg[1]) or 10000
+
+  -- Credits: William Shakespeare, Romeo and Juliet
+local txt = [[Rebellious subjects, enemies to peace,
+Profaners of this neighbour-stained steel,--
+Will they not hear? What, ho! you men, you beasts,
+That quench the fire of your pernicious rage
+With purple fountains issuing from your veins,
+On pain of torture, from those bloody hands
+Throw your mistemper'd weapons to the ground,
+And hear the sentence of your moved prince.
+Three civil brawls, bred of an airy word,
+By thee, old Capulet, and Montague,
+Have thrice disturb'd the quiet of our streets,
+And made Verona's ancient citizens
+Cast by their grave beseeming ornaments,
+To wield old partisans, in hands as old,
+Canker'd with peace, to part your canker'd hate:
+If ever you disturb our streets again,
+Your lives shall pay the forfeit of the peace.
+For this time, all the rest depart away:
+You Capulet; shall go along with me:
+And, Montague, come you this afternoon,
+To know our further pleasure in this case,
+To old Free-town, our common judgment-place.
+Once more, on pain of death, all men depart.]]
+  txt = txt..txt..txt..txt
+  txt = txt..txt..txt..txt
+
+for i=1,N do
+  res = md5(txt)
+end
+assert(res == 'a831e91e0f70eddcb70dc61c6f82f6cd')
+
diff --git a/perf/LuaJIT-benches/meteor.lua b/perf/LuaJIT-benches/meteor.lua
new file mode 100644
index 00000000..80588ab5
--- /dev/null
+++ b/perf/LuaJIT-benches/meteor.lua
@@ -0,0 +1,220 @@
+
+-- Generate a decision tree based solver for the meteor puzzle.
+local function generatesolver(countinit)
+  local pairs, ipairs, format = pairs, ipairs, string.format
+  local byte, min, sort = string.byte, math.min, table.sort
+
+  -- Cached position to distance lookup.
+  local dist = setmetatable({}, { __index = function(t, xy)
+    local x = xy%10; local y = (xy-x)/10
+    if (x+y)%2 == 1 then y = y + 1; x = 10 - x end
+    local d = xy + 256*x*x + 1024*y*y; t[xy] = d; return d
+  end})
+
+  -- Lookup table to validate a cell and to find its successor.
+  local ok = {}
+  for i=0,150 do ok[i] = false end
+  for i=99,0,-1 do
+    local x = i%10
+    if ((i-x)/10+x)%2 == 0 then
+      ok[i] = i + (ok[i+1] and 1 or (ok[i+2] and 2 or 3))
+    end
+  end
+
+  -- Temporary board state for the island checks.
+  local islands, slide = {}, {20,22,24,26,28,31,33,35,37,39}
+  local bbc, bb = 0, {}
+  for i=0,19 do bb[i] = false; bb[i+80] = false end
+  for i=20,79 do bb[i] = ok[i] end
+
+  -- Recursive flood fill algorithm.
+  local function fill(bb, p)
+    bbc = bbc + 1
+    local n = p+2; if bb[n] then bb[n] = false; fill(bb, n) end
+    n = p-2; if bb[n] then bb[n] = false; fill(bb, n) end
+    n = p-9; if bb[n] then bb[n] = false; fill(bb, n) end
+    n = p-11; if bb[n] then bb[n] = false; fill(bb, n) end
+    n = p+9; if bb[n] then bb[n] = false; fill(bb, n) end
+    n = p+11; if bb[n] then bb[n] = false; fill(bb, n) end
+  end
+
+  -- Generate pruned, sliding decision trees.
+  local dtrees = {{}, {}, {}, {}, {}, {}, {}, {}, {}, {}}
+  local rot = { nil, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }
+  for k=0,9 do
+    -- Generate 10 initial pieces from line noise. :-)
+    local t = { 60, 62, byte("@BMBIK@KT@GPIKR@IKIKT@GK@KM@BG", k*3+1, k*3+3) }
+    rot[1] = t
+    for i,xy in ipairs(t) do
+      local x = xy%10; local y = (xy-x-60)/10
+      -- Add 11 more variations by rotating and flipping.
+      for j=2,12 do
+	if j == 7 then y = -y else x,y = (x+3*y)/2, (y-x)/2 end
+	rot[j][i] = x+10*y
+      end
+    end
+    for r,v in ipairs(rot) do
+      -- Exploit symmetry and leave out half of the orientations of one piece.
+      -- The selected piece gives the best reduction of the solution space.
+      if k ~= 3 or r%2 == 0 then
+	-- Normalize to origin, add distance, sort by distance from origin.
+	local m = min(v[1], v[2], v[3], v[4], v[5])
+	for i=1,5 do v[i] = dist[v[i]-m] end
+	sort(v)
+	local v2, v3, v4, v5 = v[2]%256, v[3]%256, v[4]%256, v[5]%256
+	-- Slide the piece across 2 rows, prune the tree, check for islands.
+	for j,p in ipairs(slide) do
+	  bb[p] = false
+	  if ok[p+v2] and ok[p+v3] and ok[p+v4] and ok[p+v5] then -- Prune.
+	    for i=p+1,79 do bb[i] = ok[i] end -- Clear remaining board.
+	    bb[p+v2] = false; bb[p+v3] = false -- Add piece.
+	    bb[p+v4] = false; bb[p+v5] = false
+	    bbc = j -- Flood fill and count the filled positions.
+	    if bb[71] then bb[71] = false; fill(bb, 71) end -- Lower left.
+	    if bb[79] then bb[79] = false; fill(bb, 79) end -- Lower right.
+	    local di = 0
+	    if bbc < 22 then bbc = 26
+	    elseif bbc < 26 then -- Island found, locate it, fill from above.
+	      for i=p+2,79 do if bb[i] then di = i-p; break end end
+	      for i=p-9,p-1 do if ok[i] then fill(bb, i) bbc = bbc - 1 end end
+	    end
+	    if bbc == 26 then -- Prune boards with static islands.
+	      local tb = dtrees[j] -- Build decision tree in distance order.
+	      local ta = tb[v2]; if not ta then ta = {}; tb[v2] = ta end
+	      tb = ta[v3]; if not tb then tb = {}; ta[v3] = tb end
+	      ta = tb[v4]; if not ta then ta = {}; tb[v4] = ta; islands[ta] = di
+	      elseif islands[ta] ~= di then islands[ta] = 0 end
+	      ta[v5] = di*10+k -- Leaves hold island check and piece number.
+	    end
+	  end
+	end
+      end
+    end
+  end
+
+  local s = "local u0,u1,u2,u3,u4,u5,u6,u7,u8,u9" -- Piece use flags.
+  for p=0,99 do if ok[p] then s = s..",b"..p end end -- Board cells.
+  s = s.."\n"..[[
+local countinit = ...
+local count = countinit
+local bmin, bmax, pcs = 9, 0, {}
+local smin, smax
+local write, reverse = io.write, string.reverse
+
+-- Print min/max boards.
+local function printboard(s)
+  local flip = true
+  for x in string.gmatch(string.gsub(s, ".", "%1 "), "..........") do
+    write(x, flip and "\n " or "\n")
+    flip = not flip
+  end
+  write("\n")
+end
+
+-- Print result.
+local function printresult()
+  write(countinit-count, " solutions found\n\n")
+  printboard(smin)
+  printboard(smax)
+end
+
+-- Generate piece lookup array from the order of use.
+local function genp()
+  local p = pcs
+  p[u0] = "0" p[u1] = "1" p[u2] = "2" p[u3] = "3" p[u4] = "4"
+  p[u5] = "5" p[u6] = "6" p[u7] = "7" p[u8] = "8" p[u9] = "9"
+  return p
+end
+
+-- Goal function.
+local function f91(k)
+  if k ~= 10 then return end
+  count = count - 2 -- Need to count the symmetric solution, too.
+  repeat
+    -- Quick precheck before constructing the string.
+    local b0, b99 = b0, b99
+    if b0 <= bmin then bmin = b0 elseif b0 >= bmax then bmax = b0
+    elseif b99 <= bmin then bmin = b99 elseif b99 >= bmax then bmax = b99
+    else break end
+    -- Translate the filled board to a string.
+    local p = genp()
+    local s = p[b0] ]]
+  for p=2,99 do if ok[p] then s = s.."..p[b"..p.."]" end end
+  s = s..[[
+    -- Remember min/max boards, dito for the symmetric board.
+    if not smin then smin = s; smax = s
+    elseif s < smin then smin = s elseif s > smax then smax = s end
+    s = reverse(s)
+    if s < smin then smin = s elseif s > smax then smax = s end
+  until true
+  if count <= 0 then error() end -- Early abort if max count given.
+end
+local f93 = f91
+]]
+
+  -- Recursively convert the decision tree to Lua code.
+  local function codetree(tree, d, p, pn)
+    local found, s = false, ""
+    d = d + 1
+    for a,t in pairs(tree) do
+      local b = p+a
+      if b < 100 then -- Prune the tree at the lower border.
+	local pp = b ~= pn and pn or ok[b] -- Find maximum successor function.
+	if d >= 5 then -- Try to place the last cell of a piece and advance.
+	  found = true
+	  local u = t%10
+	  local di = (t-u)/10
+	  if di ~= 0 and d == 5 then
+	    di = di + p; if pp == di then pp = ok[di] end
+	    s = format("%sif b%d and not u%d and not b%d then b%d=k u%d=k f%d(k) u%d=N b%d=N end\n",
+		       s, di, u, b, b, u, pp, u, b)
+	  else
+	    s = format("%sif not u%d and not b%d then b%d=k u%d=k f%d(k) u%d=N b%d=N end\n",
+		       s, u, b, b, u, pp, u, b)
+	  end
+	else -- Try to place an intermediate cell.
+	  local di = d ~= 4 and 0 or islands[t]
+	  if di == 0 then
+	    local st = codetree(t, d, p, pp)
+	    if st then
+	      found = true
+	      s = format("%sif not b%d then b%d=k\n%sb%d=N end\n", s, b, b, st, b)
+	    end
+	  else -- Combine island checks.
+	    di = di + p; if pp == di then pp = ok[di] end
+	    local st = codetree(t, 6, p, pp)
+	    if st then
+	      found = true
+	      s = format("%sif b%d and not b%d then b%d=k\n%sb%d=N end\n", s, di, b, b, st, b)
+	    end
+	  end
+	end
+      end
+    end
+    return found and s
+  end
+
+  -- Embed the decision tree into a function hierarchy.
+  local j = 5
+  for p=88,0,-1 do
+    local pn = ok[p]
+    if pn then
+      s = format("%slocal function f%d(k)\nlocal N if b%d then return f%d(k) end k=k+1 b%d=k\n%sb%d=N end\n",
+	    s, p, p, pn, p, codetree(dtrees[j], 1, p, pn), p)
+      j = j - 1; if j == 0 then j = 10 end
+    end
+  end
+
+  -- Compile and return solver function and result getter.
+  return loadstring(s.."return f0, printresult\n", "solver")(countinit)
+end
+
+-- Generate the solver function hierarchy.
+local solver, printresult = generatesolver(tonumber(arg and arg[1]) or 10000)
+
+-- The optimizer for LuaJIT 1.1.x is not helpful here, so turn it off.
+if jit and jit.opt and jit.version_num < 10200 then jit.opt.start(0) end
+
+-- Run the solver protected to get partial results (max count or ctrl-c).
+pcall(solver, 0)
+printresult()
diff --git a/perf/LuaJIT-benches/nbody.lua b/perf/LuaJIT-benches/nbody.lua
new file mode 100644
index 00000000..e0ff8f77
--- /dev/null
+++ b/perf/LuaJIT-benches/nbody.lua
@@ -0,0 +1,119 @@
+
+local sqrt = math.sqrt
+
+local PI = 3.141592653589793
+local SOLAR_MASS = 4 * PI * PI
+local DAYS_PER_YEAR = 365.24
+local bodies = {
+  { -- Sun
+    x = 0,
+    y = 0,
+    z = 0,
+    vx = 0,
+    vy = 0,
+    vz = 0,
+    mass = SOLAR_MASS
+  },
+  { -- Jupiter
+    x = 4.84143144246472090e+00,
+    y = -1.16032004402742839e+00,
+    z = -1.03622044471123109e-01,
+    vx = 1.66007664274403694e-03 * DAYS_PER_YEAR,
+    vy = 7.69901118419740425e-03 * DAYS_PER_YEAR,
+    vz = -6.90460016972063023e-05 * DAYS_PER_YEAR,
+    mass = 9.54791938424326609e-04 * SOLAR_MASS
+  },
+  { -- Saturn
+    x = 8.34336671824457987e+00,
+    y = 4.12479856412430479e+00,
+    z = -4.03523417114321381e-01,
+    vx = -2.76742510726862411e-03 * DAYS_PER_YEAR,
+    vy = 4.99852801234917238e-03 * DAYS_PER_YEAR,
+    vz = 2.30417297573763929e-05 * DAYS_PER_YEAR,
+    mass = 2.85885980666130812e-04 * SOLAR_MASS
+  },
+  { -- Uranus
+    x = 1.28943695621391310e+01,
+    y = -1.51111514016986312e+01,
+    z = -2.23307578892655734e-01,
+    vx = 2.96460137564761618e-03 * DAYS_PER_YEAR,
+    vy = 2.37847173959480950e-03 * DAYS_PER_YEAR,
+    vz = -2.96589568540237556e-05 * DAYS_PER_YEAR,
+    mass = 4.36624404335156298e-05 * SOLAR_MASS
+  },
+  { -- Neptune
+    x = 1.53796971148509165e+01,
+    y = -2.59193146099879641e+01,
+    z = 1.79258772950371181e-01,
+    vx = 2.68067772490389322e-03 * DAYS_PER_YEAR,
+    vy = 1.62824170038242295e-03 * DAYS_PER_YEAR,
+    vz = -9.51592254519715870e-05 * DAYS_PER_YEAR,
+    mass = 5.15138902046611451e-05 * SOLAR_MASS
+  }
+}
+
+local function advance(bodies, nbody, dt)
+  for i=1,nbody do
+    local bi = bodies[i]
+    local bix, biy, biz, bimass = bi.x, bi.y, bi.z, bi.mass
+    local bivx, bivy, bivz = bi.vx, bi.vy, bi.vz
+    for j=i+1,nbody do
+      local bj = bodies[j]
+      local dx, dy, dz = bix-bj.x, biy-bj.y, biz-bj.z
+      local mag = sqrt(dx*dx + dy*dy + dz*dz)
+      mag = dt / (mag * mag * mag)
+      local bm = bj.mass*mag
+      bivx = bivx - (dx * bm)
+      bivy = bivy - (dy * bm)
+      bivz = bivz - (dz * bm)
+      bm = bimass*mag
+      bj.vx = bj.vx + (dx * bm)
+      bj.vy = bj.vy + (dy * bm)
+      bj.vz = bj.vz + (dz * bm)
+    end
+    bi.vx = bivx
+    bi.vy = bivy
+    bi.vz = bivz
+    bi.x = bix + dt * bivx
+    bi.y = biy + dt * bivy
+    bi.z = biz + dt * bivz
+  end
+end
+
+local function energy(bodies, nbody)
+  local e = 0
+  for i=1,nbody do
+    local bi = bodies[i]
+    local vx, vy, vz, bim = bi.vx, bi.vy, bi.vz, bi.mass
+    e = e + (0.5 * bim * (vx*vx + vy*vy + vz*vz))
+    for j=i+1,nbody do
+      local bj = bodies[j]
+      local dx, dy, dz = bi.x-bj.x, bi.y-bj.y, bi.z-bj.z
+      local distance = sqrt(dx*dx + dy*dy + dz*dz)
+      e = e - ((bim * bj.mass) / distance)
+    end
+  end
+  return e
+end
+
+local function offsetMomentum(b, nbody)
+  local px, py, pz = 0, 0, 0
+  for i=1,nbody do
+    local bi = b[i]
+    local bim = bi.mass
+    px = px + (bi.vx * bim)
+    py = py + (bi.vy * bim)
+    pz = pz + (bi.vz * bim)
+  end
+  b[1].vx = -px / SOLAR_MASS
+  b[1].vy = -py / SOLAR_MASS
+  b[1].vz = -pz / SOLAR_MASS
+end
+
+local N = tonumber(arg and arg[1]) or 1000
+local nbody = #bodies
+
+offsetMomentum(bodies, nbody)
+io.write( string.format("%0.9f",energy(bodies, nbody)), "\n")
+for i=1,N do advance(bodies, nbody, 0.01) end
+io.write( string.format("%0.9f",energy(bodies, nbody)), "\n")
diff --git a/perf/LuaJIT-benches/nsieve-bit-fp.lua b/perf/LuaJIT-benches/nsieve-bit-fp.lua
new file mode 100644
index 00000000..3971ec1f
--- /dev/null
+++ b/perf/LuaJIT-benches/nsieve-bit-fp.lua
@@ -0,0 +1,37 @@
+
+local floor, ceil = math.floor, math.ceil
+
+local precision = 50 -- Maximum precision of lua_Number (minus safety margin).
+local onebits = (2^precision)-1
+
+local function nsieve(p, m)
+  local cm = ceil(m/precision)
+  do local onebits = onebits; for i=0,cm do p[i] = onebits end end
+  local count, idx, bit = 0, 2, 2
+  for i=2,m do
+    local r = p[idx] / bit
+    if r - floor(r) >= 0.5 then -- Bit set?
+      local kidx, kbit = idx, bit
+      for k=i+i,m,i do
+        kidx = kidx + i
+        while kidx >= cm do kidx = kidx - cm; kbit = kbit + kbit end
+        local x = p[kidx]
+        local r = x / kbit
+        if r - floor(r) >= 0.5 then p[kidx] = x - kbit*0.5 end -- Clear bit.
+      end
+      count = count + 1
+    end
+    idx = idx + 1
+    if idx >= cm then idx = 0; bit = bit + bit end
+  end
+  return count
+end
+
+local N = tonumber(arg and arg[1]) or 1
+if N < 2 then N = 2 end
+local primes = {}
+
+for i=0,2 do
+  local m = (2^(N-i))*10000
+  io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m)))
+end
diff --git a/perf/LuaJIT-benches/nsieve-bit.lua b/perf/LuaJIT-benches/nsieve-bit.lua
new file mode 100644
index 00000000..820a3726
--- /dev/null
+++ b/perf/LuaJIT-benches/nsieve-bit.lua
@@ -0,0 +1,27 @@
+
+local bit = require("bit")
+local band, bxor, rshift, rol = bit.band, bit.bxor, bit.rshift, bit.rol
+
+local function nsieve(p, m)
+  local count = 0
+  for i=0,rshift(m, 5) do p[i] = -1 end
+  for i=2,m do
+    if band(rshift(p[rshift(i, 5)], i), 1) ~= 0 then
+      count = count + 1
+      for j=i+i,m,i do
+	local jx = rshift(j, 5)
+	p[jx] = band(p[jx], rol(-2, j))
+      end
+    end
+  end
+  return count
+end
+
+local N = tonumber(arg and arg[1]) or 1
+if N < 2 then N = 2 end
+local primes = {}
+
+for i=0,2 do
+  local m = (2^(N-i))*10000
+  io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m)))
+end
diff --git a/perf/LuaJIT-benches/nsieve.lua b/perf/LuaJIT-benches/nsieve.lua
new file mode 100644
index 00000000..6de0524f
--- /dev/null
+++ b/perf/LuaJIT-benches/nsieve.lua
@@ -0,0 +1,21 @@
+
+local function nsieve(p, m)
+  for i=2,m do p[i] = true end
+  local count = 0
+  for i=2,m do
+    if p[i] then
+      for k=i+i,m,i do p[k] = false end
+      count = count + 1
+    end
+  end
+  return count
+end
+
+local N = tonumber(arg and arg[1]) or 1
+if N < 2 then N = 2 end
+local primes = {}
+
+for i=0,2 do
+  local m = (2^(N-i))*10000
+  io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m)))
+end
diff --git a/perf/LuaJIT-benches/partialsums.lua b/perf/LuaJIT-benches/partialsums.lua
new file mode 100644
index 00000000..46bb9da3
--- /dev/null
+++ b/perf/LuaJIT-benches/partialsums.lua
@@ -0,0 +1,29 @@
+
+local n = tonumber(arg[1])
+local function pr(fmt, x) io.write(string.format(fmt, x)) end
+
+local a1, a2, a3, a4, a5, a6, a7, a8, a9, alt = 1, 0, 0, 0, 0, 0, 0, 0, 0, 1
+local sqrt, sin, cos = math.sqrt, math.sin, math.cos
+for k=1,n do
+  local k2, sk, ck = k*k, sin(k), cos(k)
+  local k3 = k2*k
+  a1 = a1 + (2/3)^k
+  a2 = a2 + 1/sqrt(k)
+  a3 = a3 + 1/(k2+k)
+  a4 = a4 + 1/(k3*sk*sk)
+  a5 = a5 + 1/(k3*ck*ck)
+  a6 = a6 + 1/k
+  a7 = a7 + 1/k2
+  a8 = a8 + alt/k
+  a9 = a9 + alt/(k+k-1)
+  alt = -alt
+end
+pr("%.9f\t(2/3)^k\n", a1)
+pr("%.9f\tk^-0.5\n", a2)
+pr("%.9f\t1/k(k+1)\n", a3)
+pr("%.9f\tFlint Hills\n", a4)
+pr("%.9f\tCookson Hills\n", a5)
+pr("%.9f\tHarmonic\n", a6)
+pr("%.9f\tRiemann Zeta\n", a7)
+pr("%.9f\tAlternating Harmonic\n", a8)
+pr("%.9f\tGregory\n", a9)
diff --git a/perf/LuaJIT-benches/pidigits-nogmp.lua b/perf/LuaJIT-benches/pidigits-nogmp.lua
new file mode 100644
index 00000000..63a1cb0e
--- /dev/null
+++ b/perf/LuaJIT-benches/pidigits-nogmp.lua
@@ -0,0 +1,100 @@
+
+-- Start of dynamically compiled chunk.
+local chunk = [=[
+
+-- Factory function for multi-precision number (mpn) operations.
+local function fmm(fa, fb)
+  return loadstring([[
+    return function(y, a, ka, b, kb)
+      local carry, n = 0, #a ]]..(fb == 0 and "" or [[
+      local na, nb = n, #b -- Need to adjust lengths. 1 element suffices here.
+      if na > nb then b[na] = 0 elseif na < nb then a[nb] = 0; n = nb end
+    ]])..[[
+      for i=1,n do -- Sum up all elements and propagate carry.
+        local x = a[i] ]]..(fa == 2 and "*ka" or "")..
+          (fb == 2 and "+b[i]*kb" or (fb == 1 and "+b[i]" or ""))..[[ + carry
+        if x < RADIX and x >= 0 then carry = 0; y[i] = x -- Check for overflow.
+        else local d = x % RADIX; carry = (x-d) / RADIX; y[i] = d end
+      end
+      y[n+1] = nil -- Truncate target. 1 element suffices here.
+      if carry == 0 then while n > 0 and y[n] == 0 do y[n] = nil end
+      elseif carry == -1 then y[n] = y[n] - RADIX else y[n+1] = carry end
+    ]]..(fb == 0 and "" or [[ -- Undo length adjustment.
+      if na > nb then b[na] = nil elseif na < nb and y ~= a then a[nb] = nil end
+    ]])..[[
+      return y
+    end]])()
+end
+
+-- Generate needed mpn functions.
+local mm_kk, mm_k1, mm_k0, mm_11 = fmm(2, 2), fmm(2, 1), fmm(2, 0), fmm(1, 1)
+
+-- Choose the most efficient mpn function for y = a*ka + b*kb at run-time.
+local function mm(y, a, ka, b, kb)
+  local f = mm_kk
+  if kb == 0 or #b == 0 then if ka == 1 then return a else f = mm_k0 end
+  elseif kb == 1 then if ka == 1 then f = mm_11 else f = mm_k1 end end
+  return f(y, a, ka, b, kb)
+end
+
+-- Compose matrix with numbers on the right.
+local function compose_r(aq,ar,as,at, bq,br,bs,bt)
+  mm(ar, ar,bq, at,br) mm(at, at,bt, ar,bs)
+  mm(as, as,bt, aq,bs) mm(aq, aq,bq, nil,0)
+end
+
+-- Compose matrix with numbers on the left.
+local function compose_l(aq,ar,as,at, bq,br,bs,bt)
+  mm(ar, ar,bt, aq,br) mm(at, at,bt, as,br)
+  mm(as, as,bq, at,bs) mm(aq, aq,bq, nil,0)
+end
+
+-- Extract one digit.
+local u, v, jj = {}, {}, 0
+local function extract(q,r,s,t, j)
+  local u = j == jj + 1 and mm(u, u,1, q,1) or mm(u, q,j, r,1); jj = j
+  local v = mm(v, t,1, s,j)
+  local nu, nv, y = #u, #v
+  if nu == nv then
+    if nu == 1 then y = u[1] / v[1]
+    else y = (u[nu]*RADIX + u[nu-1]) / (v[nv]*RADIX + v[nv-1]) end
+  elseif nu == nv+1 then y = (u[nu]*RADIX + u[nv]) / v[nv]
+  else return 0 end
+  return math.floor(y)
+end
+
+-- Coroutine which yields successive digits of PI.
+return coroutine.wrap(function()
+  local q, r, s, t, k = {1}, {}, {}, {1}, 1
+  repeat
+    local y = extract(q,r,s,t, 3)
+    if y == extract(q,r,s,t, 4) then
+      coroutine.yield(y)
+      compose_r(q,r,s,t,  10, -10*y, 0, 1)
+    else
+      compose_l(q,r,s,t,   k, 4*k+2, 0, 2*k+1)
+      k = k + 1
+    end
+  until false
+end)
+
+]=] -- End of dynamically compiled chunk.
+
+local N = tonumber(arg and arg[1]) or 27
+local RADIX = N < 6500 and 2^36 or 2^32 -- Avoid overflow.
+
+-- Substitute radix and compile chunk.
+local pidigit = loadstring(string.gsub(chunk, "RADIX", tostring(RADIX)))()
+
+-- Print lines with 10 digits.
+for i=10,N,10 do
+  for j=1,10 do io.write(pidigit()) end
+  io.write("\t:", i, "\n")
+end
+
+-- Print remaining digits (if any).
+local n10 = N % 10
+if n10 ~= 0 then
+  for i=1,n10 do io.write(pidigit()) end
+  io.write(string.rep(" ", 10-n10), "\t:", N, "\n")
+end
diff --git a/perf/LuaJIT-benches/ray.lua b/perf/LuaJIT-benches/ray.lua
new file mode 100644
index 00000000..2acc24c0
--- /dev/null
+++ b/perf/LuaJIT-benches/ray.lua
@@ -0,0 +1,135 @@
+local sqrt = math.sqrt
+local huge = math.huge
+
+local delta = 1
+while delta * delta + 1 ~= 1 do
+  delta = delta * 0.5
+end
+
+local function length(x, y, z)  return sqrt(x*x + y*y + z*z) end
+local function vlen(v)          return length(v[1], v[2], v[3]) end
+local function mul(c, x, y, z)  return c*x, c*y, c*z end
+local function unitise(x, y, z) return mul(1/length(x, y, z), x, y, z) end
+local function dot(x1, y1, z1, x2, y2, z2)
+  return x1*x2 + y1*y2 + z1*z2
+end
+
+local function vsub(a, b)        return a[1] - b[1], a[2] - b[2], a[3] - b[3] end
+local function vdot(a, b)        return dot(a[1], a[2], a[3], b[1], b[2], b[3]) end
+
+
+local sphere = {}
+function sphere:new(centre, radius)
+  self.__index = self
+  return setmetatable({centre=centre, radius=radius}, self)
+end
+
+local function sphere_distance(self, origin, dir)
+  local vx, vy, vz = vsub(self.centre, origin)
+  local b = dot(vx, vy, vz, dir[1], dir[2], dir[3])
+  local r = self.radius
+  local disc = r*r + b*b - vx*vx-vy*vy-vz*vz
+  if disc < 0 then return huge end
+  local d = sqrt(disc)
+  local t2 = b + d
+  if t2 < 0 then return huge end
+  local t1 = b - d
+  return t1 > 0 and t1 or t2
+end
+
+function sphere:intersect(origin, dir, best)
+  local lambda = sphere_distance(self, origin, dir)
+  if lambda < best[1] then
+    local c = self.centre
+    best[1] = lambda
+    local b2 = best[2]
+    b2[1], b2[2], b2[3] =
+      unitise(
+        origin[1] - c[1] + lambda * dir[1],
+        origin[2] - c[2] + lambda * dir[2],
+        origin[3] - c[3] + lambda * dir[3])
+  end
+end
+
+local group = {}
+function group:new(bound)
+  self.__index = self
+  return setmetatable({bound=bound, children={}}, self)
+end
+
+function group:add(s)
+  self.children[#self.children+1] = s
+end
+
+function group:intersect(origin, dir, best)
+  local lambda = sphere_distance(self.bound, origin, dir)
+  if lambda < best[1] then
+    for _, c in ipairs(self.children) do
+      c:intersect(origin, dir, best)
+    end
+  end
+end
+
+local hit = { 0, 0, 0 }
+local ilight
+local best = { huge, { 0, 0, 0 } }
+
+local function ray_trace(light, camera, dir, scene)
+  best[1] = huge
+  scene:intersect(camera, dir, best)
+  local b1 = best[1]
+  if b1 == huge then return 0 end
+  local b2 = best[2]
+  local g = vdot(b2, light)
+  if g >= 0 then return 0 end
+  hit[1] = camera[1] + b1*dir[1] + delta*b2[1]
+  hit[2] = camera[2] + b1*dir[2] + delta*b2[2]
+  hit[3] = camera[3] + b1*dir[3] + delta*b2[3]
+  best[1] = huge
+  scene:intersect(hit, ilight, best)
+  if best[1] == huge then
+    return -g
+  else
+    return 0
+  end
+end
+
+local function create(level, centre, radius)
+  local s = sphere:new(centre, radius)
+  if level == 1 then return s end
+  local gr = group:new(sphere:new(centre, 3*radius))
+  gr:add(s)
+  local rn = 3*radius/sqrt(12)
+  for dz = -1,1,2 do
+    for dx = -1,1,2 do
+      gr:add(create(level-1, { centre[1] + rn*dx, centre[2] + rn, centre[3] + rn*dz }, radius*0.5))
+    end
+  end
+  return gr
+end
+
+
+local level, n, ss = tonumber(arg[1]) or 9, tonumber(arg[2]) or 256, 4
+local iss = 1/ss
+local gf = 255/(ss*ss)
+
+io.write(("P5\n%d %d\n255\n"):format(n, n))
+local light = { unitise(-1, -3, 2) }
+ilight = { -light[1], -light[2], -light[3] }
+local camera = { 0, 0, -4 }
+local dir = { 0, 0, 0 }
+
+local scene = create(level, {0, -1, 0}, 1)
+
+for y = n/2-1, -n/2, -1 do
+  for x = -n/2, n/2-1 do
+    local g = 0
+    for d = y, y+.99, iss do
+      for e = x, x+.99, iss do
+        dir[1], dir[2], dir[3] = unitise(e, d, n)
+        g = g + ray_trace(light, camera, dir, scene) 
+      end
+    end
+    io.write(string.char(math.floor(0.5 + g*gf)))
+  end
+end
diff --git a/perf/LuaJIT-benches/recursive-ack.lua b/perf/LuaJIT-benches/recursive-ack.lua
new file mode 100644
index 00000000..fad30589
--- /dev/null
+++ b/perf/LuaJIT-benches/recursive-ack.lua
@@ -0,0 +1,8 @@
+local function Ack(m, n)
+  if m == 0 then return n+1 end
+  if n == 0 then return Ack(m-1, 1) end
+  return Ack(m-1, (Ack(m, n-1))) -- The parentheses are deliberate.
+end
+
+local N = tonumber(arg and arg[1]) or 10
+io.write("Ack(3,", N ,"): ", Ack(3,N), "\n")
diff --git a/perf/LuaJIT-benches/recursive-fib.lua b/perf/LuaJIT-benches/recursive-fib.lua
new file mode 100644
index 00000000..ef9950de
--- /dev/null
+++ b/perf/LuaJIT-benches/recursive-fib.lua
@@ -0,0 +1,7 @@
+local function fib(n)
+  if n < 2 then return 1 end
+  return fib(n-2) + fib(n-1)
+end
+
+local n = tonumber(arg[1]) or 10
+io.write(string.format("Fib(%d): %d\n", n, fib(n)))
diff --git a/perf/LuaJIT-benches/revcomp.lua b/perf/LuaJIT-benches/revcomp.lua
new file mode 100644
index 00000000..34fe347b
--- /dev/null
+++ b/perf/LuaJIT-benches/revcomp.lua
@@ -0,0 +1,37 @@
+
+local sub = string.sub
+iubc = setmetatable({
+  A="T", C="G", B="V", D="H", K="M", R="Y",
+  a="T", c="G", b="V", d="H", k="M", r="Y",
+  T="A", G="C", V="B", H="D", M="K", Y="R", U="A",
+  t="A", g="C", v="B", h="D", m="K", y="R", u="A",
+  N="N", S="S", W="W", n="N", s="S", w="W",
+}, { __index = function(t, s)
+  local r = t[sub(s, 2)]..t[sub(s, 1, 1)]; t[s] = r; return r end })
+
+local wcode = [=[
+return function(t, n)
+  if n == 1 then return end
+  local iubc, sub, write = iubc, string.sub, io.write
+  local s = table.concat(t, "", 1, n-1)
+  for i=#s-59,1,-60 do
+    write(]=]
+for i=59,3,-4 do wcode = wcode.."iubc[sub(s, i+"..(i-3)..", i+"..i..")], " end
+wcode = wcode..[=["\n")
+  end
+  local r = #s % 60
+  if r ~= 0 then
+    for i=r,1,-4 do write(iubc[sub(s, i-3 < 1 and 1 or i-3, i)]) end
+    write("\n")
+  end
+end
+]=]
+local writerev = loadstring(wcode)()
+
+local t, n = {}, 1
+for line in io.lines() do
+  local c = sub(line, 1, 1)
+  if c == ">" then writerev(t, n); io.write(line, "\n"); n = 1
+  elseif c ~= ";" then t[n] = line; n = n + 1 end
+end
+writerev(t, n)
diff --git a/perf/LuaJIT-benches/scimark-2010-12-20.lua b/perf/LuaJIT-benches/scimark-2010-12-20.lua
new file mode 100644
index 00000000..353acb7c
--- /dev/null
+++ b/perf/LuaJIT-benches/scimark-2010-12-20.lua
@@ -0,0 +1,400 @@
+------------------------------------------------------------------------------
+-- Lua SciMark (2010-12-20).
+--
+-- A literal translation of SciMark 2.0a, written in Java and C.
+-- Credits go to the original authors Roldan Pozo and Bruce Miller.
+-- See: http://math.nist.gov/scimark2/
+------------------------------------------------------------------------------
+
+local SCIMARK_VERSION = "2010-12-10"
+local SCIMARK_COPYRIGHT = "Copyright (C) 2006-2010 Mike Pall"
+
+local MIN_TIME = 2.0
+local RANDOM_SEED = 101009 -- Must be odd.
+local SIZE_SELECT = "small"
+
+local benchmarks = {
+  "FFT", "SOR", "MC", "SPARSE", "LU",
+  small = {
+    FFT		= { 1024 },
+    SOR		= { 100 },
+    MC		= { },
+    SPARSE	= { 1000, 5000 },
+    LU		= { 100 },
+  },
+  large = {
+    FFT		= { 1048576 },
+    SOR		= { 1000 },
+    MC		= { },
+    SPARSE	= { 100000, 1000000 },
+    LU		= { 1000 },
+  },
+}
+
+local abs, log, sin, floor = math.abs, math.log, math.sin, math.floor
+local pi, clock = math.pi, os.clock
+local format = string.format
+
+------------------------------------------------------------------------------
+-- Select array type: Lua tables or native (FFI) arrays
+------------------------------------------------------------------------------
+
+local darray, iarray
+
+local function array_init()
+  if jit and jit.status and jit.status() then
+    local ok, ffi = pcall(require, "ffi")
+    if ok then
+      darray = ffi.typeof("double[?]")
+      iarray = ffi.typeof("int[?]")
+      return
+    end
+  end
+  function darray(n) return {} end
+  iarray = darray
+end
+
+------------------------------------------------------------------------------
+-- This is a Lagged Fibonacci Pseudo-random Number Generator with
+-- j, k, M = 5, 17, 31. Pretty weak, but same as C/Java SciMark.
+------------------------------------------------------------------------------
+
+local rand, rand_init
+
+if jit and jit.status and jit.status() then
+  -- LJ2 has bit operations and zero-based arrays (internally).
+  local bit = require("bit")
+  local band, sar = bit.band, bit.arshift
+  function rand_init(seed)
+    local Rm, Rj, Ri = iarray(17), 16, 11
+    for i=0,16 do Rm[i] = 0 end
+    for i=16,0,-1 do
+      seed = band(seed*9069, 0x7fffffff)
+      Rm[i] = seed
+    end
+    function rand()
+      local i = band(Ri+1, sar(Ri-16, 31))
+      local j = band(Rj+1, sar(Rj-16, 31))
+      Ri, Rj = i, j
+      local k = band(Rm[i] - Rm[j], 0x7fffffff)
+      Rm[j] = k
+      return k * (1.0/2147483647.0)
+    end
+  end
+else
+  -- Better for standard Lua with one-based arrays and without bit operations.
+  function rand_init(seed)
+    local Rm, Rj = {}, 1
+    for i=1,17 do Rm[i] = 0 end
+    for i=17,1,-1 do
+      seed = (seed*9069) % (2^31)
+      Rm[i] = seed
+    end
+    function rand()
+      local j, m = Rj, Rm
+      local h = j - 5
+      if h < 1 then h = h + 17 end
+      local k = m[h] - m[j]
+      if k < 0 then k = k + 2147483647 end
+      m[j] = k
+      if j < 17 then Rj = j + 1 else Rj = 1 end
+      return k * (1.0/2147483647.0)
+    end
+  end
+end
+
+local function random_vector(n)
+  local v = darray(n+1)
+  for x=1,n do v[x] = rand() end
+  return v
+end
+
+local function random_matrix(m, n)
+  local a = {}
+  for y=1,m do
+    local v = darray(n+1)
+    a[y] = v
+    for x=1,n do v[x] = rand() end
+  end
+  return a
+end
+
+------------------------------------------------------------------------------
+-- FFT: Fast Fourier Transform.
+------------------------------------------------------------------------------
+
+local function fft_bitreverse(v, n)
+  local j = 0
+  for i=0,2*n-4,2 do
+    if i < j then
+      v[i+1], v[i+2], v[j+1], v[j+2] = v[j+1], v[j+2], v[i+1], v[i+2]
+    end
+    local k = n
+    while k <= j do j = j - k; k = k / 2 end
+    j = j + k
+  end
+end
+
+local function fft_transform(v, n, dir)
+  if n <= 1 then return end
+  fft_bitreverse(v, n)
+  local dual = 1
+  repeat
+    local dual2 = 2*dual
+    for i=1,2*n-1,2*dual2 do
+      local j = i+dual2
+      local ir, ii = v[i], v[i+1]
+      local jr, ji = v[j], v[j+1]
+      v[j], v[j+1] = ir - jr, ii - ji
+      v[i], v[i+1] = ir + jr, ii + ji
+    end
+    local theta = dir * pi / dual
+    local s, s2 = sin(theta), 2.0 * sin(theta * 0.5)^2
+    local wr, wi = 1.0, 0.0
+    for a=3,dual2-1,2 do
+      wr, wi = wr - s*wi - s2*wr, wi + s*wr - s2*wi
+      for i=a,a+2*(n-dual2),2*dual2 do
+	local j = i+dual2
+	local jr, ji = v[j], v[j+1]
+	local dr, di = wr*jr - wi*ji, wr*ji + wi*jr
+	local ir, ii = v[i], v[i+1]
+	v[j], v[j+1] = ir - dr, ii - di
+	v[i], v[i+1] = ir + dr, ii + di
+      end
+    end
+    dual = dual2
+  until dual >= n
+end
+
+function benchmarks.FFT(n)
+  local l2n = log(n)/log(2)
+  if l2n % 1 ~= 0 then
+    io.stderr:write("Error: FFT data length is not a power of 2\n")
+    os.exit(1)
+  end
+  local v = random_vector(n*2)
+  return function(cycles)
+    local norm = 1.0 / n
+    for p=1,cycles do
+      fft_transform(v, n, -1)
+      fft_transform(v, n, 1)
+      for i=1,n*2 do v[i] = v[i] * norm end
+    end
+    return ((5*n-2)*l2n + 2*(n+1)) * cycles
+  end
+end
+
+------------------------------------------------------------------------------
+-- SOR: Jacobi Successive Over-Relaxation.
+------------------------------------------------------------------------------
+
+local function sor_run(mat, m, n, cycles, omega)
+  local om4, om1 = omega*0.25, 1.0-omega
+  m = m - 1
+  n = n - 1
+  for i=1,cycles do
+    for y=2,m do
+      local v, vp, vn = mat[y], mat[y-1], mat[y+1]
+      for x=2,n do
+	v[x] = om4*((vp[x]+vn[x])+(v[x-1]+v[x+1])) + om1*v[x]
+      end
+    end
+  end
+end
+
+function benchmarks.SOR(n)
+  local mat = random_matrix(n, n)
+  return function(cycles)
+    sor_run(mat, n, n, cycles, 1.25)
+    return (n-1)*(n-1)*cycles*6
+  end
+end
+
+------------------------------------------------------------------------------
+-- MC: Monte Carlo Integration.
+------------------------------------------------------------------------------
+
+local function mc_integrate(cycles)
+  local under_curve = 0
+  local rand = rand
+  for i=1,cycles do
+    local x = rand()
+    local y = rand()
+    if x*x + y*y <= 1.0 then under_curve = under_curve + 1 end
+  end
+  return (under_curve/cycles) * 4
+end
+
+function benchmarks.MC()
+  return function(cycles)
+    local res = mc_integrate(cycles)
+    assert(math.sqrt(cycles)*math.abs(res-math.pi) < 5.0, "bad MC result")
+    return cycles * 4 -- Way off, but same as SciMark in C/Java.
+  end
+end
+
+------------------------------------------------------------------------------
+-- Sparse Matrix Multiplication.
+------------------------------------------------------------------------------
+
+local function sparse_mult(n, cycles, vy, val, row, col, vx)
+  for p=1,cycles do
+    for r=1,n do
+      local sum = 0
+      for i=row[r],row[r+1]-1 do sum = sum + vx[col[i]] * val[i] end
+      vy[r] = sum
+    end
+  end
+end
+
+function benchmarks.SPARSE(n, nz)
+  local nr = floor(nz/n)
+  local anz = nr*n
+  local vx = random_vector(n)
+  local val = random_vector(anz)
+  local vy, col, row = darray(n+1), iarray(nz+1), iarray(n+2)
+  row[1] = 1
+  for r=1,n do
+    local step = floor(r/nr)
+    if step < 1 then step = 1 end
+    local rr = row[r]
+    row[r+1] = rr+nr
+    for i=0,nr-1 do col[rr+i] = 1+i*step end
+  end
+  return function(cycles)
+    sparse_mult(n, cycles, vy, val, row, col, vx)
+    return anz*cycles*2
+  end
+end
+
+------------------------------------------------------------------------------
+-- LU: Dense Matrix Factorization.
+------------------------------------------------------------------------------
+
+local function lu_factor(a, pivot, m, n)
+  local min_m_n = m < n and m or n
+  for j=1,min_m_n do
+    local jp, t = j, abs(a[j][j])
+    for i=j+1,m do
+      local ab = abs(a[i][j])
+      if ab > t then
+	jp = i
+	t = ab
+      end
+    end
+    pivot[j] = jp
+    if a[jp][j] == 0 then error("zero pivot") end
+    if jp ~= j then a[j], a[jp] = a[jp], a[j] end
+    if j < m then
+      local recp = 1.0 / a[j][j]
+      for k=j+1,m do
+	local v = a[k]
+	v[j] = v[j] * recp
+      end
+    end
+    if j < min_m_n then
+      for i=j+1,m do
+	local vi, vj = a[i], a[j]
+	local eij = vi[j]
+	for k=j+1,n do vi[k] = vi[k] - eij * vj[k] end
+      end
+    end
+  end
+end
+
+local function matrix_alloc(m, n)
+  local a = {}
+  for y=1,m do a[y] = darray(n+1) end
+  return a
+end
+
+local function matrix_copy(dst, src, m, n)
+  for y=1,m do
+    local vd, vs = dst[y], src[y]
+    for x=1,n do vd[x] = vs[x] end
+  end
+end
+
+function benchmarks.LU(n)
+  local mat = random_matrix(n, n)
+  local tmp = matrix_alloc(n, n)
+  local pivot = iarray(n+1)
+  return function(cycles)
+    for i=1,cycles do
+      matrix_copy(tmp, mat, n, n)
+      lu_factor(tmp, pivot, n, n)
+    end
+    return 2.0/3.0*n*n*n*cycles
+  end
+end
+
+------------------------------------------------------------------------------
+-- Main program.
+------------------------------------------------------------------------------
+
+local function printf(...)
+  io.write(format(...))
+end
+
+local function fmtparams(p1, p2)
+  if p2 then return format("[%d, %d]", p1, p2)
+  elseif p1 then return format("[%d]", p1) end
+  return ""
+end
+
+local function measure(min_time, name, ...)
+  array_init()
+  rand_init(RANDOM_SEED)
+  local run = benchmarks[name](...)
+  local cycles = 1
+  repeat
+    local tm = clock()
+    local flops = run(cycles, ...)
+    tm = clock() - tm
+    if tm >= min_time then
+      local res = flops / tm * 1.0e-6
+      local p1, p2 = ...
+      printf("%-7s %8.2f  %s\n", name, res, fmtparams(...))
+      return res
+    end
+    cycles = cycles * 2
+  until false
+end
+
+printf("Lua SciMark %s based on SciMark 2.0a. %s.\n\n",
+       SCIMARK_VERSION, SCIMARK_COPYRIGHT)
+
+while arg and arg[1] do
+  local a = table.remove(arg, 1)
+  if a == "-noffi" then
+    package.preload.ffi = nil
+  elseif a == "-small" then
+    SIZE_SELECT = "small"
+  elseif a == "-large" then
+    SIZE_SELECT = "large"
+  elseif benchmarks[a] then
+    local p = benchmarks[SIZE_SELECT][a]
+    measure(MIN_TIME, a, tonumber(arg[1]) or p[1], tonumber(arg[2]) or p[2])
+    return
+  else
+    printf("Usage: scimark [-noffi] [-small|-large] [BENCH params...]\n\n")
+    printf("BENCH   -small         -large\n")
+    printf("---------------------------------------\n")
+    for _,name in ipairs(benchmarks) do
+      printf("%-7s %-13s %s\n", name,
+	     fmtparams(unpack(benchmarks.small[name])),
+	     fmtparams(unpack(benchmarks.large[name])))
+    end
+    printf("\n")
+    os.exit(1)
+  end
+end
+
+local params = benchmarks[SIZE_SELECT]
+local sum = 0
+for _,name in ipairs(benchmarks) do
+  sum = sum + measure(MIN_TIME, name, unpack(params[name]))
+end
+printf("\nSciMark %8.2f  [%s problem sizes]\n", sum / #benchmarks, SIZE_SELECT)
+io.flush()
+
diff --git a/perf/LuaJIT-benches/scimark-fft.lua b/perf/LuaJIT-benches/scimark-fft.lua
new file mode 100644
index 00000000..c05bb69a
--- /dev/null
+++ b/perf/LuaJIT-benches/scimark-fft.lua
@@ -0,0 +1 @@
+require("scimark_lib").FFT(1024)(tonumber(arg and arg[1]) or 50000)
diff --git a/perf/LuaJIT-benches/scimark-lu.lua b/perf/LuaJIT-benches/scimark-lu.lua
new file mode 100644
index 00000000..7636d994
--- /dev/null
+++ b/perf/LuaJIT-benches/scimark-lu.lua
@@ -0,0 +1 @@
+require("scimark_lib").LU(100)(tonumber(arg and arg[1]) or 5000)
diff --git a/perf/LuaJIT-benches/scimark-sor.lua b/perf/LuaJIT-benches/scimark-sor.lua
new file mode 100644
index 00000000..e537e986
--- /dev/null
+++ b/perf/LuaJIT-benches/scimark-sor.lua
@@ -0,0 +1 @@
+require("scimark_lib").SOR(100)(tonumber(arg and arg[1]) or 50000)
diff --git a/perf/LuaJIT-benches/scimark-sparse.lua b/perf/LuaJIT-benches/scimark-sparse.lua
new file mode 100644
index 00000000..01a2258d
--- /dev/null
+++ b/perf/LuaJIT-benches/scimark-sparse.lua
@@ -0,0 +1 @@
+require("scimark_lib").SPARSE(1000, 5000)(tonumber(arg and arg[1]) or 150000)
diff --git a/perf/LuaJIT-benches/scimark_lib.lua b/perf/LuaJIT-benches/scimark_lib.lua
new file mode 100644
index 00000000..aeffd75a
--- /dev/null
+++ b/perf/LuaJIT-benches/scimark_lib.lua
@@ -0,0 +1,297 @@
+------------------------------------------------------------------------------
+-- Lua SciMark (2010-03-15).
+--
+-- A literal translation of SciMark 2.0a, written in Java and C.
+-- Credits go to the original authors Roldan Pozo and Bruce Miller.
+-- See: http://math.nist.gov/scimark2/
+------------------------------------------------------------------------------
+
+
+local SCIMARK_VERSION = "2010-03-15"
+
+local RANDOM_SEED = 101009 -- Must be odd.
+
+local abs, log, sin, floor = math.abs, math.log, math.sin, math.floor
+local pi, clock = math.pi, os.clock
+
+local benchmarks = {}
+
+------------------------------------------------------------------------------
+-- This is a Lagged Fibonacci Pseudo-random Number Generator with
+-- j, k, M = 5, 17, 31. Pretty weak, but same as C/Java SciMark.
+------------------------------------------------------------------------------
+
+local rand, rand_init
+
+if jit and jit.status and jit.status() then
+  -- LJ2 has bit operations and zero-based arrays (internally).
+  local bit = require("bit")
+  local band, sar = bit.band, bit.arshift
+  local Rm, Rj, Ri = {}, 0, 0
+  for i=0,16 do Rm[i] = 0 end
+  function rand_init(seed)
+    Rj, Ri = 16, 11
+    for i=16,0,-1 do
+      seed = band(seed*9069, 0x7fffffff)
+      Rm[i] = seed
+    end
+  end
+  function rand()
+    local i = band(Ri+1, sar(Ri-16, 31))
+    local j = band(Rj+1, sar(Rj-16, 31))
+    Ri, Rj = i, j
+    local k = band(Rm[i] - Rm[j], 0x7fffffff)
+    Rm[j] = k
+    return k * (1.0/2147483647.0)
+  end
+else
+  -- Better for standard Lua with one-based arrays and without bit operations.
+  local Rm, Rj = {}, 1
+  for i=1,17 do Rm[i] = 0 end
+  function rand_init(seed)
+    Rj = 1
+    for i=17,1,-1 do
+      seed = (seed*9069) % (2^31)
+      Rm[i] = seed
+    end
+  end
+  function rand()
+    local j, m = Rj, Rm
+    local h = j - 5
+    if h < 1 then h = h + 17 end
+    local k = m[h] - m[j]
+    if k < 0 then k = k + 2147483647 end
+    m[j] = k
+    if j < 17 then Rj = j + 1 else Rj = 1 end
+    return k * (1.0/2147483647.0)
+  end
+end
+
+local function random_vector(n)
+  local v = {}
+  for x=1,n do v[x] = rand() end
+  return v
+end
+
+local function random_matrix(m, n)
+  local a = {}
+  for y=1,m do
+    local v = {}
+    a[y] = v
+    for x=1,n do v[x] = rand() end
+  end
+  return a
+end
+
+------------------------------------------------------------------------------
+-- FFT: Fast Fourier Transform.
+------------------------------------------------------------------------------
+
+local function fft_bitreverse(v, n)
+  local j = 0
+  for i=0,2*n-4,2 do
+    if i < j then
+      v[i+1], v[i+2], v[j+1], v[j+2] = v[j+1], v[j+2], v[i+1], v[i+2]
+    end
+    local k = n
+    while k <= j do j = j - k; k = k / 2 end
+    j = j + k
+  end
+end
+
+local function fft_transform(v, n, dir)
+  if n <= 1 then return end
+  fft_bitreverse(v, n)
+  local dual = 1
+  repeat
+    local dual2 = 2*dual
+    for i=1,2*n-1,2*dual2 do
+      local j = i+dual2
+      local ir, ii = v[i], v[i+1]
+      local jr, ji = v[j], v[j+1]
+      v[j], v[j+1] = ir - jr, ii - ji
+      v[i], v[i+1] = ir + jr, ii + ji
+    end
+    local theta = dir * pi / dual
+    local s, s2 = sin(theta), 2.0 * sin(theta * 0.5)^2
+    local wr, wi = 1.0, 0.0
+    for a=3,dual2-1,2 do
+      wr, wi = wr - s*wi - s2*wr, wi + s*wr - s2*wi
+      for i=a,a+2*(n-dual2),2*dual2 do
+	local j = i+dual2
+	local jr, ji = v[j], v[j+1]
+	local dr, di = wr*jr - wi*ji, wr*ji + wi*jr
+	local ir, ii = v[i], v[i+1]
+	v[j], v[j+1] = ir - dr, ii - di
+	v[i], v[i+1] = ir + dr, ii + di
+      end
+    end
+    dual = dual2
+  until dual >= n
+end
+
+function benchmarks.FFT(n)
+  local l2n = log(n)/log(2)
+  if l2n % 1 ~= 0 then
+    io.stderr:write("Error: FFT data length is not a power of 2\n")
+    os.exit(1)
+  end
+  local v = random_vector(n*2)
+  return function(cycles)
+    local norm = 1.0 / n
+    for p=1,cycles do
+      fft_transform(v, n, -1)
+      fft_transform(v, n, 1)
+      for i=1,n*2 do v[i] = v[i] * norm end
+    end
+    return ((5*n-2)*l2n + 2*(n+1)) * cycles
+  end
+end
+
+------------------------------------------------------------------------------
+-- SOR: Jacobi Successive Over-Relaxation.
+------------------------------------------------------------------------------
+
+local function sor_run(mat, m, n, cycles, omega)
+  local om4, om1 = omega*0.25, 1.0-omega
+  m = m - 1
+  n = n - 1
+  for i=1,cycles do
+    for y=2,m do
+      local v, vp, vn = mat[y], mat[y-1], mat[y+1]
+      for x=2,n do
+	v[x] = om4*((vp[x]+vn[x])+(v[x-1]+v[x+1])) + om1*v[x]
+      end
+    end
+  end
+end
+
+function benchmarks.SOR(n)
+  local mat = random_matrix(n, n)
+  return function(cycles)
+    sor_run(mat, n, n, cycles, 1.25)
+    return (n-1)*(n-1)*cycles*6
+  end
+end
+
+------------------------------------------------------------------------------
+-- MC: Monte Carlo Integration.
+------------------------------------------------------------------------------
+
+local function mc_integrate(cycles)
+  local under_curve = 0
+  local rand = rand
+  for i=1,cycles do
+    local x = rand()
+    local y = rand()
+    if x*x + y*y <= 1.0 then under_curve = under_curve + 1 end
+  end
+  return (under_curve/cycles) * 4
+end
+
+function benchmarks.MC()
+  return function(cycles)
+    local res = mc_integrate(cycles)
+    assert(math.sqrt(cycles)*math.abs(res-math.pi) < 5.0, "bad MC result")
+    return cycles * 4 -- Way off, but same as SciMark in C/Java.
+  end
+end
+
+------------------------------------------------------------------------------
+-- Sparse Matrix Multiplication.
+------------------------------------------------------------------------------
+
+local function sparse_mult(n, cycles, vy, val, row, col, vx)
+  for p=1,cycles do
+    for r=1,n do
+      local sum = 0
+      for i=row[r],row[r+1]-1 do sum = sum + vx[col[i]] * val[i] end
+      vy[r] = sum
+    end
+  end
+end
+
+function benchmarks.SPARSE(n, nz)
+  local nr = floor(nz/n)
+  local anz = nr*n
+  local vx = random_vector(n)
+  local val = random_vector(anz)
+  local vy, col, row = {}, {}, {}
+  row[1] = 1
+  for r=1,n do
+    local step = floor(r/nr)
+    if step < 1 then step = 1 end
+    local rr = row[r]
+    row[r+1] = rr+nr
+    for i=0,nr-1 do col[rr+i] = 1+i*step end
+  end
+  return function(cycles)
+    sparse_mult(n, cycles, vy, val, row, col, vx)
+    return anz*cycles*2
+  end
+end
+
+------------------------------------------------------------------------------
+-- LU: Dense Matrix Factorization.
+------------------------------------------------------------------------------
+
+local function lu_factor(a, pivot, m, n)
+  local min_m_n = m < n and m or n
+  for j=1,min_m_n do
+    local jp, t = j, abs(a[j][j])
+    for i=j+1,m do
+      local ab = abs(a[i][j])
+      if ab > t then
+	jp = i
+	t = ab
+      end
+    end
+    pivot[j] = jp
+    if a[jp][j] == 0 then error("zero pivot") end
+    if jp ~= j then a[j], a[jp] = a[jp], a[j] end
+    if j < m then
+      local recp = 1.0 / a[j][j]
+      for k=j+1,m do
+        local v = a[k]
+	v[j] = v[j] * recp
+      end
+    end
+    if j < min_m_n then
+      for i=j+1,m do
+	local vi, vj = a[i], a[j]
+	local eij = vi[j]
+	for k=j+1,n do vi[k] = vi[k] - eij * vj[k] end
+      end
+    end
+  end
+end
+
+local function matrix_alloc(m, n)
+  local a = {}
+  for y=1,m do a[y] = {} end
+  return a
+end
+
+local function matrix_copy(dst, src, m, n)
+  for y=1,m do
+    local vd, vs = dst[y], src[y]
+    for x=1,n do vd[x] = vs[x] end
+  end
+end
+
+function benchmarks.LU(n)
+  local mat = random_matrix(n, n)
+  local tmp = matrix_alloc(n, n)
+  local pivot = {}
+  return function(cycles)
+    for i=1,cycles do
+      matrix_copy(tmp, mat, n, n)
+      lu_factor(tmp, pivot, n, n)
+    end
+    return 2.0/3.0*n*n*n*cycles
+  end
+end
+
+rand_init(RANDOM_SEED)
+
+return benchmarks
diff --git a/perf/LuaJIT-benches/series.lua b/perf/LuaJIT-benches/series.lua
new file mode 100644
index 00000000..f766cb32
--- /dev/null
+++ b/perf/LuaJIT-benches/series.lua
@@ -0,0 +1,34 @@
+
+local function integrate(x0, x1, nsteps, omegan, f)
+  local x, dx = x0, (x1-x0)/nsteps
+  local rvalue = ((x0+1)^x0 * f(omegan*x0)) / 2
+  for i=3,nsteps do
+    x = x + dx
+    rvalue = rvalue + (x+1)^x * f(omegan*x)
+  end
+  return (rvalue + ((x1+1)^x1 * f(omegan*x1)) / 2) * dx
+end
+
+local function series(n)
+  local sin, cos = math.sin, math.cos
+  local omega = math.pi
+  local t = {}
+
+  t[1] = integrate(0, 2, 1000, 0, function() return 1 end) / 2
+  t[2] = 0
+
+  for i=2,n do
+    t[2*i-1] = integrate(0, 2, 1000, omega*i, cos)
+    t[2*i] = integrate(0, 2, 1000, omega*i, sin)
+  end
+
+  return t
+end
+
+local n = tonumber(arg and arg[1]) or 10000
+local tm = os.clock()
+local t = series(n)
+tm = os.clock() - tm
+assert(math.abs(t[1]-2.87295) < 0.00001)
+io.write(string.format("size %d, %.2f s, %.1f iterations/s\n",
+                       n, tm, (2*n-1)/tm))
diff --git a/perf/LuaJIT-benches/spectral-norm.lua b/perf/LuaJIT-benches/spectral-norm.lua
new file mode 100644
index 00000000..ecc80112
--- /dev/null
+++ b/perf/LuaJIT-benches/spectral-norm.lua
@@ -0,0 +1,40 @@
+
+local function A(i, j)
+  local ij = i+j-1
+  return 1.0 / (ij * (ij-1) * 0.5 + i)
+end
+
+local function Av(x, y, N)
+  for i=1,N do
+    local a = 0
+    for j=1,N do a = a + x[j] * A(i, j) end
+    y[i] = a
+  end
+end
+
+local function Atv(x, y, N)
+  for i=1,N do
+    local a = 0
+    for j=1,N do a = a + x[j] * A(j, i) end
+    y[i] = a
+  end
+end
+
+local function AtAv(x, y, t, N)
+  Av(x, t, N)
+  Atv(t, y, N)
+end
+
+local N = tonumber(arg and arg[1]) or 100
+local u, v, t = {}, {}, {}
+for i=1,N do u[i] = 1 end
+
+for i=1,10 do AtAv(u, v, t, N) AtAv(v, u, t, N) end
+
+local vBv, vv = 0, 0
+for i=1,N do
+  local ui, vi = u[i], v[i]
+  vBv = vBv + ui*vi
+  vv = vv + vi*vi
+end
+io.write(string.format("%0.9f\n", math.sqrt(vBv / vv)))
diff --git a/perf/LuaJIT-benches/sum-file.lua b/perf/LuaJIT-benches/sum-file.lua
new file mode 100644
index 00000000..c9e618fd
--- /dev/null
+++ b/perf/LuaJIT-benches/sum-file.lua
@@ -0,0 +1,6 @@
+
+local sum = 0
+for line in io.lines() do
+  sum = sum + line
+end
+io.write(sum, "\n")
--------------2GliyEDk8CzVRwg37b51jDjr--