This patch introduces the LuaJIT-test-cleanup bench suite [1] into our LuaJIT fork source tree. To provide relatable reprodusible results several benchmarks need to be adjusted. However, to be sure we initially use the valid suite, everything in the directory is moved intact. [1]: https://github.com/LuaJIT/LuaJIT-test-cleanup/tree/014708b/bench --- .luacheckrc | 1 + perf/LuaJIT-benches/PARAM_arm.txt | 29 + perf/LuaJIT-benches/PARAM_mips.txt | 29 + perf/LuaJIT-benches/PARAM_ppc.txt | 29 + perf/LuaJIT-benches/PARAM_x86.txt | 29 + perf/LuaJIT-benches/SUMCOL_1.txt | 1000 ++++++++++++++++++++ perf/LuaJIT-benches/TEST_md5sum.txt | 20 + perf/LuaJIT-benches/array3d.lua | 59 ++ perf/LuaJIT-benches/binary-trees.lua | 47 + perf/LuaJIT-benches/chameneos.lua | 68 ++ perf/LuaJIT-benches/coroutine-ring.lua | 42 + perf/LuaJIT-benches/euler14-bit.lua | 22 + perf/LuaJIT-benches/fannkuch.lua | 50 + perf/LuaJIT-benches/fasta.lua | 95 ++ perf/LuaJIT-benches/k-nucleotide.lua | 58 ++ perf/LuaJIT-benches/life.lua | 111 +++ perf/LuaJIT-benches/mandelbrot-bit.lua | 33 + perf/LuaJIT-benches/mandelbrot.lua | 23 + perf/LuaJIT-benches/md5.lua | 183 ++++ perf/LuaJIT-benches/meteor.lua | 220 +++++ perf/LuaJIT-benches/nbody.lua | 119 +++ perf/LuaJIT-benches/nsieve-bit-fp.lua | 37 + perf/LuaJIT-benches/nsieve-bit.lua | 27 + perf/LuaJIT-benches/nsieve.lua | 21 + perf/LuaJIT-benches/partialsums.lua | 29 + perf/LuaJIT-benches/pidigits-nogmp.lua | 100 ++ perf/LuaJIT-benches/ray.lua | 135 +++ perf/LuaJIT-benches/recursive-ack.lua | 8 + perf/LuaJIT-benches/recursive-fib.lua | 7 + perf/LuaJIT-benches/revcomp.lua | 37 + perf/LuaJIT-benches/scimark-2010-12-20.lua | 400 ++++++++ perf/LuaJIT-benches/scimark-fft.lua | 1 + perf/LuaJIT-benches/scimark-lu.lua | 1 + perf/LuaJIT-benches/scimark-sor.lua | 1 + perf/LuaJIT-benches/scimark-sparse.lua | 1 + perf/LuaJIT-benches/scimark_lib.lua | 297 ++++++ perf/LuaJIT-benches/series.lua | 34 + perf/LuaJIT-benches/spectral-norm.lua | 40 + perf/LuaJIT-benches/sum-file.lua | 6 + 39 files changed, 3449 insertions(+) create mode 100644 perf/LuaJIT-benches/PARAM_arm.txt create mode 100644 perf/LuaJIT-benches/PARAM_mips.txt create mode 100644 perf/LuaJIT-benches/PARAM_ppc.txt create mode 100644 perf/LuaJIT-benches/PARAM_x86.txt create mode 100644 perf/LuaJIT-benches/SUMCOL_1.txt create mode 100644 perf/LuaJIT-benches/TEST_md5sum.txt create mode 100644 perf/LuaJIT-benches/array3d.lua create mode 100644 perf/LuaJIT-benches/binary-trees.lua create mode 100644 perf/LuaJIT-benches/chameneos.lua create mode 100644 perf/LuaJIT-benches/coroutine-ring.lua create mode 100644 perf/LuaJIT-benches/euler14-bit.lua create mode 100644 perf/LuaJIT-benches/fannkuch.lua create mode 100644 perf/LuaJIT-benches/fasta.lua create mode 100644 perf/LuaJIT-benches/k-nucleotide.lua create mode 100644 perf/LuaJIT-benches/life.lua create mode 100644 perf/LuaJIT-benches/mandelbrot-bit.lua create mode 100644 perf/LuaJIT-benches/mandelbrot.lua create mode 100644 perf/LuaJIT-benches/md5.lua create mode 100644 perf/LuaJIT-benches/meteor.lua create mode 100644 perf/LuaJIT-benches/nbody.lua create mode 100644 perf/LuaJIT-benches/nsieve-bit-fp.lua create mode 100644 perf/LuaJIT-benches/nsieve-bit.lua create mode 100644 perf/LuaJIT-benches/nsieve.lua create mode 100644 perf/LuaJIT-benches/partialsums.lua create mode 100644 perf/LuaJIT-benches/pidigits-nogmp.lua create mode 100644 perf/LuaJIT-benches/ray.lua create mode 100644 perf/LuaJIT-benches/recursive-ack.lua create mode 100644 perf/LuaJIT-benches/recursive-fib.lua create mode 100644 perf/LuaJIT-benches/revcomp.lua create mode 100644 perf/LuaJIT-benches/scimark-2010-12-20.lua create mode 100644 perf/LuaJIT-benches/scimark-fft.lua create mode 100644 perf/LuaJIT-benches/scimark-lu.lua create mode 100644 perf/LuaJIT-benches/scimark-sor.lua create mode 100644 perf/LuaJIT-benches/scimark-sparse.lua create mode 100644 perf/LuaJIT-benches/scimark_lib.lua create mode 100644 perf/LuaJIT-benches/series.lua create mode 100644 perf/LuaJIT-benches/spectral-norm.lua create mode 100644 perf/LuaJIT-benches/sum-file.lua diff --git a/.luacheckrc b/.luacheckrc index 19098dd9..35824875 100644 --- a/.luacheckrc +++ b/.luacheckrc @@ -16,6 +16,7 @@ files['test/tarantool-tests/'] = { -- test suites and need to be coherent with the upstream. exclude_files = { 'dynasm/', + 'perf/LuaJIT-benches/', 'src/', 'test/LuaJIT-tests/', 'test/PUC-Rio-Lua-5.1-tests/', diff --git a/perf/LuaJIT-benches/PARAM_arm.txt b/perf/LuaJIT-benches/PARAM_arm.txt new file mode 100644 index 00000000..a07fd010 --- /dev/null +++ b/perf/LuaJIT-benches/PARAM_arm.txt @@ -0,0 +1,29 @@ +array3d 200 +binary-trees 13 +chameneos 1e6 +coroutine-ring 3e6 +euler14-bit 5e6 +fannkuch 10 +fasta 2e6 +k-nucleotide 5e5 FASTA_500000 +life +mandelbrot 2000 +mandelbrot-bit 2000 +md5 5000 +nbody 1e6 +nsieve 9 +nsieve-bit 9 +nsieve-bit-fp 9 +partialsums 2e6 +pidigits-nogmp 2000 +ray 4 +recursive-ack 9 +recursive-fib 37 +revcomp 1e6 FASTA_1000000 +scimark-fft 2000 +scimark-lu 300 +scimark-sor 5000 +scimark-sparse 5e3 +series 1500 +spectral-norm 1000 +sum-file 1000 SUMCOL_1000 diff --git a/perf/LuaJIT-benches/PARAM_mips.txt b/perf/LuaJIT-benches/PARAM_mips.txt new file mode 100644 index 00000000..e6bcadba --- /dev/null +++ b/perf/LuaJIT-benches/PARAM_mips.txt @@ -0,0 +1,29 @@ +array3d 50 +binary-trees 10 +chameneos 5e4 +coroutine-ring 2e5 +euler14-bit 2e4 +fannkuch 8 +fasta 2e4 +k-nucleotide 1e4 FASTA_10000 +life +mandelbrot 150 +mandelbrot-bit 150 +md5 10 +nbody 1e4 +nsieve 4 +nsieve-bit 4 +nsieve-bit-fp 2 +partialsums 5e4 +pidigits-nogmp 150 +ray 2 +recursive-ack 7 +recursive-fib 29 +revcomp 5e4 FASTA_50000 +scimark-fft 20 +scimark-lu 3 +scimark-sor 40 +scimark-sparse 100 +series 50 +spectral-norm 100 +sum-file 100 SUMCOL_100 diff --git a/perf/LuaJIT-benches/PARAM_ppc.txt b/perf/LuaJIT-benches/PARAM_ppc.txt new file mode 100644 index 00000000..c8319a15 --- /dev/null +++ b/perf/LuaJIT-benches/PARAM_ppc.txt @@ -0,0 +1,29 @@ +array3d 200 +binary-trees 13 +chameneos 1e6 +coroutine-ring 4e6 +euler14-bit 1e6 +fannkuch 9 +fasta 5e5 +k-nucleotide 1e5 FASTA_100000 +life +mandelbrot 800 +mandelbrot-bit 800 +md5 500 +nbody 1e5 +nsieve 8 +nsieve-bit 8 +nsieve-bit-fp 8 +partialsums 5e5 +pidigits-nogmp 800 +ray 5 +recursive-ack 9 +recursive-fib 34 +revcomp 1e6 FASTA_1000000 +scimark-fft 500 +scimark-lu 100 +scimark-sor 1000 +scimark-sparse 3000 +series 1000 +spectral-norm 200 +sum-file 1000 SUMCOL_1000 diff --git a/perf/LuaJIT-benches/PARAM_x86.txt b/perf/LuaJIT-benches/PARAM_x86.txt new file mode 100644 index 00000000..87088d7b --- /dev/null +++ b/perf/LuaJIT-benches/PARAM_x86.txt @@ -0,0 +1,29 @@ +array3d 300 +binary-trees 16 +chameneos 1e7 +coroutine-ring 2e7 +euler14-bit 2e7 +fannkuch 11 +fasta 25e6 +k-nucleotide 5e6 FASTA_5000000 +life +mandelbrot 5000 +mandelbrot-bit 5000 +md5 20000 +nbody 5e6 +nsieve 12 +nsieve-bit 12 +nsieve-bit-fp 12 +partialsums 1e7 +pidigits-nogmp 5000 +ray 9 +recursive-ack 10 +recursive-fib 40 +revcomp 5e6 FASTA_5000000 +scimark-fft 50000 +scimark-lu 5000 +scimark-sor 50000 +scimark-sparse 15e4 +series 10000 +spectral-norm 3000 +sum-file 5000 SUMCOL_5000 diff --git a/perf/LuaJIT-benches/SUMCOL_1.txt b/perf/LuaJIT-benches/SUMCOL_1.txt new file mode 100644 index 00000000..956aba14 --- /dev/null +++ b/perf/LuaJIT-benches/SUMCOL_1.txt @@ -0,0 +1,1000 @@ +276 +498 +-981 +770 +-401 +702 +966 +950 +-853 +-53 +-293 +604 +288 +892 +-697 +204 +96 +408 +880 +-7 +-817 +422 +-261 +-485 +-77 +826 +184 +864 +-751 +626 +812 +-369 +-353 +-371 +488 +-83 +-659 +24 +524 +-21 +840 +-757 +-17 +-973 +-843 +260 +858 +-389 +-521 +-99 +482 +-561 +-213 +630 +766 +932 +112 +-419 +-877 +762 +266 +-837 +170 +834 +746 +764 +922 +-89 +576 +-63 +90 +684 +316 +506 +-959 +708 +70 +252 +-747 +342 +-593 +-895 +-937 +-707 +350 +588 +-201 +-683 +-113 +-511 +-867 +322 +202 +472 +150 +-9 +-643 +28 +336 +86 +-925 +836 +-473 +-451 +-971 +-805 +-619 +84 +-67 +806 +270 +366 +334 +-555 +-557 +-331 +-409 +-553 +-145 +-71 +528 +490 +492 +828 +628 +-961 +536 +-859 +-271 +974 +-671 +-749 +414 +-257 +778 +56 +598 +-437 +-899 +-785 +-987 +32 +-999 +132 +-821 +-209 +402 +-543 +194 +-967 +294 +-943 +-285 +-483 +-97 +660 +-481 +-829 +-309 +-597 +-855 +80 +-355 +192 +-823 +436 +916 +282 +-629 +612 +-329 +-535 +780 +-47 +706 +110 +756 +-857 +-933 +-345 +-523 +718 +-31 +902 +678 +540 +698 +456 +-399 +126 +412 +-563 +-321 +-487 +-641 +-195 +-199 +-955 +772 +570 +18 +-217 +886 +984 +-721 +-995 +46 +-989 +946 +64 +716 +-719 +-869 +-579 +776 +450 +936 +980 +-439 +-977 +-455 +-997 +6 +268 +-269 +-421 +328 +352 +578 +-575 +476 +976 +-57 +-469 +544 +582 +-43 +510 +-939 +-581 +-337 +-203 +-737 +-827 +852 +-279 +-803 +-911 +-865 +548 +48 +-75 +416 +-275 +688 +-255 +-687 +-461 +-233 +420 +912 +-901 +-299 +12 +568 +694 +-411 +-883 +-327 +-361 +-339 +646 +-137 +-905 +670 +686 +-131 +-849 +-825 +256 +228 +-841 +68 +368 +-909 +242 +298 +118 +10 +222 +954 +-493 +-459 +-445 +608 +-765 +34 +468 +-715 +690 +-185 +-551 +-571 +-241 +292 +92 +768 +-923 +956 +614 +8 +730 +208 +-417 +300 +136 +-59 +-251 +-539 +166 +798 +866 +454 +-391 +-317 +668 +502 +-15 +994 +854 +-189 +666 +446 +-565 +-5 +42 +-227 +-87 +-779 +26 +312 +354 +754 +396 +-515 +220 +872 +654 +88 +-667 +250 +572 +952 +72 +982 +972 +-529 +-471 +-533 +-427 +538 +154 +-457 +-819 +750 +152 +452 +-41 +838 +-489 +418 +-649 +-637 +-197 +74 +394 +-653 +-727 +-435 +-23 +348 +638 +-611 +914 +-357 +-743 +-685 +580 +-247 +-577 +54 +-931 +-3 +558 +-793 +-443 +-759 +162 +-811 +384 +720 +-117 +900 +-519 +-39 +744 +432 +286 +-873 +380 +-167 +-283 +430 +-155 +-755 +206 +100 +364 +-677 +332 +-567 +382 +-605 +-181 +676 +-475 +-845 +910 +546 +14 +398 +616 +-769 +424 +992 +-235 +-239 +774 +478 +-919 +168 +-771 +-773 +-69 +-509 +930 +550 +-463 +178 +-861 +-761 +-795 +234 +-831 +-61 +-979 +-851 +-665 +-709 +896 +742 +-123 +590 +-693 +-887 +-379 +144 +-717 +20 +174 +82 +464 +30 +-969 +-349 +-531 +-799 +-661 +-647 +-623 +878 +148 +-545 +238 +-259 +554 +726 +-37 +-797 +98 +78 +-591 +-975 +962 +120 +906 +-207 +656 +-171 +652 +188 +672 +-133 +-91 +224 +818 +-333 +-839 +-499 +22 +-739 +142 +378 +-403 +-315 +370 +284 +122 +230 +-527 +-127 +442 +534 +160 +722 +262 +-657 +304 +258 +-103 +960 +-495 +-265 +634 +-101 +480 +-363 +308 +76 +-949 +-585 +904 +146 +-703 +164 +850 +246 +732 +-725 +566 +274 +-163 +-935 +-681 +-229 +254 +-733 +-547 +-273 +-903 +736 +-711 +794 +392 +-655 +-549 +808 +-429 +484 +-701 +-617 +804 +36 +-775 +-335 +-927 +714 +-177 +-325 +-413 +-963 +114 +-253 +-789 +-645 +40 +434 +898 +924 +-19 +738 +788 +280 +-121 +594 +-913 +426 +816 +-373 +-45 +340 +-109 +-323 +58 +-249 +940 +-297 +988 +998 +-607 +-745 +-633 +-115 +996 +-893 +696 +400 +848 +500 +-263 +562 +-807 +-105 +-603 +658 +-73 +-863 +448 +680 +-157 +-161 +728 +814 +-477 +-375 +1000 +-631 +-991 +362 +156 +-187 +-705 +-917 +-449 +-741 +556 +440 +-589 +-11 +-359 +-891 +-801 +-153 +-381 +938 +-173 +-243 +618 +-599 +-497 +486 +128 +790 +460 +-27 +-305 +-205 +-215 +324 +-341 +50 +458 +52 +-621 +874 +386 +560 +-569 +-51 +802 +786 +920 +-425 +466 +444 +-507 +-915 +346 +622 +-679 +784 +-689 +388 +508 +-613 +-313 +-447 +564 +-897 +-211 +-225 +-615 +-367 +186 +894 +-65 +-453 +-245 +602 +496 +-651 +-601 +820 +226 +-695 +-119 +372 +180 +94 +214 +542 +648 +-871 +592 +584 +824 +796 +374 +-945 +-311 +516 +942 +-221 +-433 +200 +-465 +-953 +870 +868 +-879 +518 +356 +-223 +682 +990 +-191 +-541 +-951 +-921 +-319 +-169 +-291 +-289 +792 +876 +306 +-491 +326 +-885 +62 +514 +-929 +318 +-231 +632 +44 +-107 +644 +-267 +-343 +-847 +934 +734 +-505 +-351 +574 +-627 +636 +-93 +-431 +-835 +428 +-183 +-151 +2 +-813 +-595 +958 +-141 +692 +-385 +610 +-179 +376 +948 +198 +-675 +964 +-907 +918 +-165 +-1 +406 +748 +-111 +532 +-55 +-281 +740 +504 +236 +-29 +662 +-713 +-537 +196 +-587 +822 +-135 +700 +-35 +674 +-407 +240 +-673 +-669 +-393 +470 +-525 +-875 +-383 +-625 +296 +-85 +-147 +-277 +800 +-691 +-143 +16 +-983 +-303 +290 +-139 +172 +320 +512 +596 +640 +664 +-791 +-783 +-387 +-735 +-467 +-301 +810 +134 +216 +278 +176 +606 +140 +-787 +978 +586 +890 +882 +-753 +-13 +970 +-941 +-175 +-777 +-809 +-441 +-347 +-377 +390 +-423 +842 +642 +190 +302 +438 +704 +310 +-49 +124 +-781 +-287 +724 +-767 +830 +620 +-295 +244 +-159 +-307 +-397 +66 +-237 +314 +-79 +624 +710 +272 +-365 +928 +856 +138 +-479 +520 +832 +862 +760 +846 +-81 +106 +-513 +-193 +650 +782 +-517 +944 +218 +712 +-663 +-559 +462 +-635 +-25 +182 +530 +844 +330 +-833 +102 +-881 +108 +-947 +-763 +-405 +232 +410 +104 +-729 +-149 +-889 +888 +360 +968 +908 +116 +-815 +-129 +522 +-723 +-993 +860 +-503 +926 +-219 +-415 +60 +158 +-609 +-501 +986 +-699 +-583 +884 +212 +210 +-957 +526 +-985 +552 +344 +-395 +-95 +338 +248 +494 +130 +404 +358 +600 +-639 +-125 +-33 +-965 +752 +474 +-731 +758 +-573 +4 +38 +264 diff --git a/perf/LuaJIT-benches/TEST_md5sum.txt b/perf/LuaJIT-benches/TEST_md5sum.txt new file mode 100644 index 00000000..15aa8a1c --- /dev/null +++ b/perf/LuaJIT-benches/TEST_md5sum.txt @@ -0,0 +1,20 @@ +binarytrees 10 7202f4e13df7abc5ad8c07f05fe9d644 +chameneos 1e5 a629ce12f63050c6656bce175258cf8f +cheapconcr 1000 d29799d1e263810a4db7bbf43ca66499 +cheapconcw 1000 d29799d1e263810a4db7bbf43ca66499 +fannkuch 8 51e5e372cbc5471ea8940b20ad782319 +fasta 1e5 78cd327de6f0a5667da0aa9349888279 +knucleotide x 88efb24c1fed533959ed84bb32c88142 = 0 and x < self.nx, "x outside PA") + assert(y >= 0 and y < self.ny, "y outside PA") + assert(z >= 0 and z < self.nz, "z outside PA") + local pos = (z*self.ny + y)*self.nx + x + local image = self.image + if self.packed then + local maxv = self.max_voltage + if p > maxv then self.max_voltage = p*2.0 end + local oldp = image[pos] or 0.0 -- Works with uninitialized table, too + if oldp > maxv then p = p + maxv*2.0 end + image[pos] = p + else + image[pos] = p + end + self.changed = true + self.changed_recently = true +end + +local function array_points(self) + local y, z = 0, 0 + return function(self, x) + x = x + 1 + if x >= self.nx then + x = 0 + y = y + 1 + if y >= self.ny then + y = 0 + z = z + 1 + if z >= self.nz then + return nil, nil, nil + end + end + end + return x, y, z + end, self, 0 +end + +local function array_new(nx, ny, nz, packed) + return { + nx = nx, ny = ny, nz = nz, + packed = packed, max_voltage = 0.0, + changed = false, changed_recently = false, + image = {}, -- Preferably use a fixed-type, pre-sized array here. + set = array_set, + points = array_points, + } +end + +local dim = tonumber(arg and arg[1]) or 300 -- Array dimension dim^3 +local packed = arg and arg[2] == "packed" -- Packed image or flat +local arr = array_new(dim, dim, dim, packed) + +for x,y,z in arr:points() do + arr:set(x, y, z, x*x) +end +assert(arr.image[dim^3-1] == (dim-1)^2) + diff --git a/perf/LuaJIT-benches/binary-trees.lua b/perf/LuaJIT-benches/binary-trees.lua new file mode 100644 index 00000000..bf040466 --- /dev/null +++ b/perf/LuaJIT-benches/binary-trees.lua @@ -0,0 +1,47 @@ + +local function BottomUpTree(item, depth) + if depth > 0 then + local i = item + item + depth = depth - 1 + local left, right = BottomUpTree(i-1, depth), BottomUpTree(i, depth) + return { item, left, right } + else + return { item } + end +end + +local function ItemCheck(tree) + if tree[2] then + return tree[1] + ItemCheck(tree[2]) - ItemCheck(tree[3]) + else + return tree[1] + end +end + +local N = tonumber(arg and arg[1]) or 0 +local mindepth = 4 +local maxdepth = mindepth + 2 +if maxdepth < N then maxdepth = N end + +do + local stretchdepth = maxdepth + 1 + local stretchtree = BottomUpTree(0, stretchdepth) + io.write(string.format("stretch tree of depth %d\t check: %d\n", + stretchdepth, ItemCheck(stretchtree))) +end + +local longlivedtree = BottomUpTree(0, maxdepth) + +for depth=mindepth,maxdepth,2 do + local iterations = 2 ^ (maxdepth - depth + mindepth) + local check = 0 + for i=1,iterations do + check = check + ItemCheck(BottomUpTree(1, depth)) + + ItemCheck(BottomUpTree(-1, depth)) + end + io.write(string.format("%d\t trees of depth %d\t check: %d\n", + iterations*2, depth, check)) +end + +io.write(string.format("long lived tree of depth %d\t check: %d\n", + maxdepth, ItemCheck(longlivedtree))) diff --git a/perf/LuaJIT-benches/chameneos.lua b/perf/LuaJIT-benches/chameneos.lua new file mode 100644 index 00000000..78b64c3f --- /dev/null +++ b/perf/LuaJIT-benches/chameneos.lua @@ -0,0 +1,68 @@ + +local co = coroutine +local create, resume, yield = co.create, co.resume, co.yield + +local N = tonumber(arg and arg[1]) or 10 +local first, second + +-- Meet another creature. +local function meet(me) + while second do yield() end -- Wait until meeting place clears. + local other = first + if other then -- Hey, I found a new friend! + first = nil + second = me + else -- Sniff, nobody here (yet). + local n = N - 1 + if n < 0 then return end -- Uh oh, the mall is closed. + N = n + first = me + repeat yield(); other = second until other -- Wait for another creature. + second = nil + yield() -- Be nice and let others meet up. + end + return other +end + +-- Create a very social creature. +local function creature(color) + return create(function() + local me = color + for met=0,1000000000 do + local other = meet(me) + if not other then return met end + if me ~= other then + if me == "blue" then me = other == "red" and "yellow" or "red" + elseif me == "red" then me = other == "blue" and "yellow" or "blue" + else me = other == "blue" and "red" or "blue" end + end + end + end) +end + +-- Trivial round-robin scheduler. +local function schedule(threads) + local resume = resume + local nthreads, meetings = #threads, 0 + repeat + for i=1,nthreads do + local thr = threads[i] + if not thr then return meetings end + local ok, met = resume(thr) + if met then + meetings = meetings + met + threads[i] = nil + end + end + until false +end + +-- A bunch of colorful creatures. +local threads = { + creature("blue"), + creature("red"), + creature("yellow"), + creature("blue"), +} + +io.write(schedule(threads), "\n") diff --git a/perf/LuaJIT-benches/coroutine-ring.lua b/perf/LuaJIT-benches/coroutine-ring.lua new file mode 100644 index 00000000..1e8c5ef6 --- /dev/null +++ b/perf/LuaJIT-benches/coroutine-ring.lua @@ -0,0 +1,42 @@ +-- The Computer Language Benchmarks Game +-- http://shootout.alioth.debian.org/ +-- contributed by Sam Roberts +-- reviewed by Bruno Massa + +local n = tonumber(arg and arg[1]) or 2e7 + +-- fixed size pool +local poolsize = 503 +local threads = {} + +-- cache these to avoid global environment lookups +local create = coroutine.create +local resume = coroutine.resume +local yield = coroutine.yield + +local id = 1 +local token = 0 +local ok + +local body = function(token) + while true do + token = yield(token + 1) + end +end + +-- create all threads +for id = 1, poolsize do + threads[id] = create(body) +end + +-- send the token +repeat + if id == poolsize then + id = 1 + else + id = id + 1 + end + ok, token = resume(threads[id], token) +until token == n + +io.write(id, "\n") diff --git a/perf/LuaJIT-benches/euler14-bit.lua b/perf/LuaJIT-benches/euler14-bit.lua new file mode 100644 index 00000000..537f2bf3 --- /dev/null +++ b/perf/LuaJIT-benches/euler14-bit.lua @@ -0,0 +1,22 @@ + +local bit = require("bit") +local bnot, bor, band = bit.bnot, bit.bor, bit.band +local shl, shr = bit.lshift, bit.rshift + +local N = tonumber(arg and arg[1]) or 10000000 +local cache, m, n = { 1 }, 1, 1 +if arg and arg[2] then cache = nil end +for i=2,N do + local j = i + for len=1,1000000000 do + j = bor(band(shr(j,1), band(j,1)-1), band(shl(j,1)+j+1, bnot(band(j,1)-1))) + if cache then + local x = cache[j]; if x then j = x+len; break end + elseif j == 1 then + j = len+1; break + end + end + if cache then cache[i] = j end + if j > m then m, n = j, i end +end +io.write("Found ", n, " (chain length: ", m, ")\n") diff --git a/perf/LuaJIT-benches/fannkuch.lua b/perf/LuaJIT-benches/fannkuch.lua new file mode 100644 index 00000000..2a4cd426 --- /dev/null +++ b/perf/LuaJIT-benches/fannkuch.lua @@ -0,0 +1,50 @@ + +local function fannkuch(n) + local p, q, s, odd, check, maxflips = {}, {}, {}, true, 0, 0 + for i=1,n do p[i] = i; q[i] = i; s[i] = i end + repeat + -- Print max. 30 permutations. + if check < 30 then + if not p[n] then return maxflips end -- Catch n = 0, 1, 2. + io.write(unpack(p)); io.write("\n") + check = check + 1 + end + -- Copy and flip. + local q1 = p[1] -- Cache 1st element. + if p[n] ~= n and q1 ~= 1 then -- Avoid useless work. + for i=2,n do q[i] = p[i] end -- Work on a copy. + local flips = 1 -- Flip ... + while true do + local qq = q[q1] + if qq == 1 then -- ... until 1st element is 1. + if flips > maxflips then maxflips = flips end -- New maximum? + break + end + q[q1] = q1 + if q1 >= 4 then + local i, j = 2, q1 - 1 + repeat q[i], q[j] = q[j], q[i]; i = i + 1; j = j - 1; until i >= j + end + q1 = qq + flips=flips+1 + end + end + -- Permute. + if odd then + p[2], p[1] = p[1], p[2]; odd = false -- Rotate 1<-2. + else + p[2], p[3] = p[3], p[2]; odd = true -- Rotate 1<-2 and 1<-2<-3. + for i=3,n do + local sx = s[i] + if sx ~= 1 then s[i] = sx-1; break end + if i == n then return maxflips end -- Out of permutations. + s[i] = i + -- Rotate 1<-...<-i+1. + local t=p[1]; for j=i+1,1,-1 do p[j],t=t,p[j] end + end + end + until false +end + +local n = tonumber(arg and arg[1]) or 1 +io.write("Pfannkuchen(", n, ") = ", fannkuch(n), "\n") diff --git a/perf/LuaJIT-benches/fasta.lua b/perf/LuaJIT-benches/fasta.lua new file mode 100644 index 00000000..7ce60804 --- /dev/null +++ b/perf/LuaJIT-benches/fasta.lua @@ -0,0 +1,95 @@ + +local Last = 42 +local function random(max) + local y = (Last * 3877 + 29573) % 139968 + Last = y + return (max * y) / 139968 +end + +local function make_repeat_fasta(id, desc, s, n) + local write, sub = io.write, string.sub + write(">", id, " ", desc, "\n") + local p, sn, s2 = 1, #s, s..s + for i=60,n,60 do + write(sub(s2, p, p + 59), "\n") + p = p + 60; if p > sn then p = p - sn end + end + local tail = n % 60 + if tail > 0 then write(sub(s2, p, p + tail-1), "\n") end +end + +local function make_random_fasta(id, desc, bs, n) + io.write(">", id, " ", desc, "\n") + loadstring([=[ + local write, char, unpack, n, random = io.write, string.char, unpack, ... + local buf, p = {}, 1 + for i=60,n,60 do + for j=p,p+59 do ]=]..bs..[=[ end + buf[p+60] = 10; p = p + 61 + if p >= 2048 then write(char(unpack(buf, 1, p-1))); p = 1 end + end + local tail = n % 60 + if tail > 0 then + for j=p,p+tail-1 do ]=]..bs..[=[ end + p = p + tail; buf[p] = 10; p = p + 1 + end + write(char(unpack(buf, 1, p-1))) + ]=], desc)(n, random) +end + +local function bisect(c, p, lo, hi) + local n = hi - lo + if n == 0 then return "buf[j] = "..c[hi].."\n" end + local mid = math.floor(n / 2) + return "if r < "..p[lo+mid].." then\n"..bisect(c, p, lo, lo+mid).. + "else\n"..bisect(c, p, lo+mid+1, hi).."end\n" +end + +local function make_bisect(tab) + local c, p, sum = {}, {}, 0 + for i,row in ipairs(tab) do + c[i] = string.byte(row[1]) + sum = sum + row[2] + p[i] = sum + end + return "local r = random(1)\n"..bisect(c, p, 1, #tab) +end + +local alu = + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG".. + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA".. + "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT".. + "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA".. + "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG".. + "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC".. + "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA" + +local iub = make_bisect{ + { "a", 0.27 }, + { "c", 0.12 }, + { "g", 0.12 }, + { "t", 0.27 }, + { "B", 0.02 }, + { "D", 0.02 }, + { "H", 0.02 }, + { "K", 0.02 }, + { "M", 0.02 }, + { "N", 0.02 }, + { "R", 0.02 }, + { "S", 0.02 }, + { "V", 0.02 }, + { "W", 0.02 }, + { "Y", 0.02 }, +} + +local homosapiens = make_bisect{ + { "a", 0.3029549426680 }, + { "c", 0.1979883004921 }, + { "g", 0.1975473066391 }, + { "t", 0.3015094502008 }, +} + +local N = tonumber(arg and arg[1]) or 1000 +make_repeat_fasta('ONE', 'Homo sapiens alu', alu, N*2) +make_random_fasta('TWO', 'IUB ambiguity codes', iub, N*3) +make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, N*5) diff --git a/perf/LuaJIT-benches/k-nucleotide.lua b/perf/LuaJIT-benches/k-nucleotide.lua new file mode 100644 index 00000000..0bfb41be --- /dev/null +++ b/perf/LuaJIT-benches/k-nucleotide.lua @@ -0,0 +1,58 @@ + +local function kfrequency(seq, freq, k, frame) + local sub = string.sub + local k1 = k - 1 + for i=frame,#seq-k1,k do + local c = sub(seq, i, i+k1) + freq[c] = (freq[c] or 0) + 1 + end +end + +local function count(seq, frag) + local k = #frag + local freq = {} + for frame=1,k do kfrequency(seq, freq, k, frame) end + io.write(freq[frag] or 0, "\t", frag, "\n") +end + +local function frequency(seq, k) + local freq = {} + for frame=1,k do kfrequency(seq, freq, k, frame) end + local sfreq, sn, sum = {}, 1, 0 + for c,v in pairs(freq) do sfreq[sn] = c; sn = sn + 1; sum = sum + v end + table.sort(sfreq, function(a, b) + local fa, fb = freq[a], freq[b] + return fa == fb and a > b or fa > fb + end) + for _,c in ipairs(sfreq) do + io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum)) + end + io.write("\n") +end + +local function readseq() + local sub = string.sub + for line in io.lines() do + if sub(line, 1, 1) == ">" and sub(line, 2, 6) == "THREE" then break end + end + local lines, ln = {}, 0 + for line in io.lines() do + local c = sub(line, 1, 1) + if c == ">" then + break + elseif c ~= ";" then + ln = ln + 1 + lines[ln] = line + end + end + return string.upper(table.concat(lines, "", 1, ln)) +end + +local seq = readseq() +frequency(seq, 1) +frequency(seq, 2) +count(seq, "GGT") +count(seq, "GGTA") +count(seq, "GGTATT") +count(seq, "GGTATTTTAATT") +count(seq, "GGTATTTTAATTTATAGT") diff --git a/perf/LuaJIT-benches/life.lua b/perf/LuaJIT-benches/life.lua new file mode 100644 index 00000000..911d9fe1 --- /dev/null +++ b/perf/LuaJIT-benches/life.lua @@ -0,0 +1,111 @@ +-- life.lua +-- original by Dave Bollinger posted to lua-l +-- modified to use ANSI terminal escape sequences +-- modified to use for instead of while + +local write=io.write + +ALIVE="¥" DEAD="þ" +ALIVE="O" DEAD="-" + +function delay() -- NOTE: SYSTEM-DEPENDENT, adjust as necessary + for i=1,10000 do end + -- local i=os.clock()+1 while(os.clock() 0 do + local xm1,x,xp1,xi=self.w-1,self.w,1,self.w + while xi > 0 do + local sum = self[ym1][xm1] + self[ym1][x] + self[ym1][xp1] + + self[y][xm1] + self[y][xp1] + + self[yp1][xm1] + self[yp1][x] + self[yp1][xp1] + next[y][x] = ((sum==2) and self[y][x]) or ((sum==3) and 1) or 0 + xm1,x,xp1,xi = x,xp1,xp1+1,xi-1 + end + ym1,y,yp1,yi = y,yp1,yp1+1,yi-1 + end +end + +-- output the array to screen +function _CELLS:draw() + local out="" -- accumulate to reduce flicker + for y=1,self.h do + for x=1,self.w do + out=out..(((self[y][x]>0) and ALIVE) or DEAD) + end + out=out.."\n" + end + write(out) +end + +-- constructor +function CELLS(w,h) + local c = ARRAY2D(w,h) + c.spawn = _CELLS.spawn + c.evolve = _CELLS.evolve + c.draw = _CELLS.draw + return c +end + +-- +-- shapes suitable for use with spawn() above +-- +HEART = { 1,0,1,1,0,1,1,1,1; w=3,h=3 } +GLIDER = { 0,0,1,1,0,1,0,1,1; w=3,h=3 } +EXPLODE = { 0,1,0,1,1,1,1,0,1,0,1,0; w=3,h=4 } +FISH = { 0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,1,0; w=5,h=4 } +BUTTERFLY = { 1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1; w=5,h=5 } + +-- the main routine +function LIFE(w,h) + -- create two arrays + local thisgen = CELLS(w,h) + local nextgen = CELLS(w,h) + + -- create some life + -- about 1000 generations of fun, then a glider steady-state + thisgen:spawn(GLIDER,5,4) + thisgen:spawn(EXPLODE,25,10) + thisgen:spawn(FISH,4,12) + + -- run until break + local gen=1 + write("\027[2J") -- ANSI clear screen + while 1 do + thisgen:evolve(nextgen) + thisgen,nextgen = nextgen,thisgen + write("\027[H") -- ANSI home cursor + thisgen:draw() + write("Life - generation ",gen,"\n") + gen=gen+1 + if gen>2000 then break end + --delay() -- no delay + end +end + +LIFE(40,20) diff --git a/perf/LuaJIT-benches/mandelbrot-bit.lua b/perf/LuaJIT-benches/mandelbrot-bit.lua new file mode 100644 index 00000000..91d96975 --- /dev/null +++ b/perf/LuaJIT-benches/mandelbrot-bit.lua @@ -0,0 +1,33 @@ + +local bit = require("bit") +local bor, band = bit.bor, bit.band +local shl, shr, rol = bit.lshift, bit.rshift, bit.rol +local write, char, unpack = io.write, string.char, unpack +local N = tonumber(arg and arg[1]) or 100 +local M, buf = 2/N, {} +write("P4\n", N, " ", N, "\n") +for y=0,N-1 do + local Ci, b, p = y*M-1, -16777216, 0 + local Ciq = Ci*Ci + for x=0,N-1,2 do + local Cr, Cr2 = x*M-1.5, (x+1)*M-1.5 + local Zr, Zi, Zrq, Ziq = Cr, Ci, Cr*Cr, Ciq + local Zr2, Zi2, Zrq2, Ziq2 = Cr2, Ci, Cr2*Cr2, Ciq + b = rol(b, 2) + for i=1,49 do + Zi = Zr*Zi*2 + Ci; Zi2 = Zr2*Zi2*2 + Ci + Zr = Zrq-Ziq + Cr; Zr2 = Zrq2-Ziq2 + Cr2 + Ziq = Zi*Zi; Ziq2 = Zi2*Zi2 + Zrq = Zr*Zr; Zrq2 = Zr2*Zr2 + if band(b, 2) ~= 0 and Zrq+Ziq > 4.0 then b = band(b, -3) end + if band(b, 1) ~= 0 and Zrq2+Ziq2 > 4.0 then b = band(b, -2) end + if band(b, 3) == 0 then break end + end + if b >= 0 then p = p + 1; buf[p] = b; b = -16777216; end + end + if b ~= -16777216 then + if band(N, 1) ~= 0 then b = shr(b, 1) end + p = p + 1; buf[p] = shl(b, 8-band(N, 7)) + end + write(char(unpack(buf, 1, p))) +end diff --git a/perf/LuaJIT-benches/mandelbrot.lua b/perf/LuaJIT-benches/mandelbrot.lua new file mode 100644 index 00000000..0ef595a2 --- /dev/null +++ b/perf/LuaJIT-benches/mandelbrot.lua @@ -0,0 +1,23 @@ + +local write, char, unpack = io.write, string.char, unpack +local N = tonumber(arg and arg[1]) or 100 +local M, ba, bb, buf = 2/N, 2^(N%8+1)-1, 2^(8-N%8), {} +write("P4\n", N, " ", N, "\n") +for y=0,N-1 do + local Ci, b, p = y*M-1, 1, 0 + for x=0,N-1 do + local Cr = x*M-1.5 + local Zr, Zi, Zrq, Ziq = Cr, Ci, Cr*Cr, Ci*Ci + b = b + b + for i=1,49 do + Zi = Zr*Zi*2 + Ci + Zr = Zrq-Ziq + Cr + Ziq = Zi*Zi + Zrq = Zr*Zr + if Zrq+Ziq > 4.0 then b = b + 1; break; end + end + if b >= 256 then p = p + 1; buf[p] = 511 - b; b = 1; end + end + if b ~= 1 then p = p + 1; buf[p] = (ba-b)*bb; end + write(char(unpack(buf, 1, p))) +end diff --git a/perf/LuaJIT-benches/md5.lua b/perf/LuaJIT-benches/md5.lua new file mode 100644 index 00000000..fdf6b4a7 --- /dev/null +++ b/perf/LuaJIT-benches/md5.lua @@ -0,0 +1,183 @@ + +local bit = require("bit") +local tobit, tohex, bnot = bit.tobit or bit.cast, bit.tohex, bit.bnot +local bor, band, bxor = bit.bor, bit.band, bit.bxor +local lshift, rshift, rol, bswap = bit.lshift, bit.rshift, bit.rol, bit.bswap +local byte, char, sub, rep = string.byte, string.char, string.sub, string.rep + +if not rol then -- Replacement function if rotates are missing. + local bor, shl, shr = bit.bor, bit.lshift, bit.rshift + function rol(a, b) return bor(shl(a, b), shr(a, 32-b)) end +end + +if not bswap then -- Replacement function if bswap is missing. + local bor, band, shl, shr = bit.bor, bit.band, bit.lshift, bit.rshift + function bswap(a) + return bor(shr(a, 24), band(shr(a, 8), 0xff00), + shl(band(a, 0xff00), 8), shl(a, 24)); + end +end + +if not tohex then -- (Unreliable) replacement function if tohex is missing. + function tohex(a) + return string.sub(string.format("%08x", a), -8) + end +end + +local function tr_f(a, b, c, d, x, s) + return rol(bxor(d, band(b, bxor(c, d))) + a + x, s) + b +end + +local function tr_g(a, b, c, d, x, s) + return rol(bxor(c, band(d, bxor(b, c))) + a + x, s) + b +end + +local function tr_h(a, b, c, d, x, s) + return rol(bxor(b, c, d) + a + x, s) + b +end + +local function tr_i(a, b, c, d, x, s) + return rol(bxor(c, bor(b, bnot(d))) + a + x, s) + b +end + +local function transform(x, a1, b1, c1, d1) + local a, b, c, d = a1, b1, c1, d1 + + a = tr_f(a, b, c, d, x[ 1] + 0xd76aa478, 7) + d = tr_f(d, a, b, c, x[ 2] + 0xe8c7b756, 12) + c = tr_f(c, d, a, b, x[ 3] + 0x242070db, 17) + b = tr_f(b, c, d, a, x[ 4] + 0xc1bdceee, 22) + a = tr_f(a, b, c, d, x[ 5] + 0xf57c0faf, 7) + d = tr_f(d, a, b, c, x[ 6] + 0x4787c62a, 12) + c = tr_f(c, d, a, b, x[ 7] + 0xa8304613, 17) + b = tr_f(b, c, d, a, x[ 8] + 0xfd469501, 22) + a = tr_f(a, b, c, d, x[ 9] + 0x698098d8, 7) + d = tr_f(d, a, b, c, x[10] + 0x8b44f7af, 12) + c = tr_f(c, d, a, b, x[11] + 0xffff5bb1, 17) + b = tr_f(b, c, d, a, x[12] + 0x895cd7be, 22) + a = tr_f(a, b, c, d, x[13] + 0x6b901122, 7) + d = tr_f(d, a, b, c, x[14] + 0xfd987193, 12) + c = tr_f(c, d, a, b, x[15] + 0xa679438e, 17) + b = tr_f(b, c, d, a, x[16] + 0x49b40821, 22) + + a = tr_g(a, b, c, d, x[ 2] + 0xf61e2562, 5) + d = tr_g(d, a, b, c, x[ 7] + 0xc040b340, 9) + c = tr_g(c, d, a, b, x[12] + 0x265e5a51, 14) + b = tr_g(b, c, d, a, x[ 1] + 0xe9b6c7aa, 20) + a = tr_g(a, b, c, d, x[ 6] + 0xd62f105d, 5) + d = tr_g(d, a, b, c, x[11] + 0x02441453, 9) + c = tr_g(c, d, a, b, x[16] + 0xd8a1e681, 14) + b = tr_g(b, c, d, a, x[ 5] + 0xe7d3fbc8, 20) + a = tr_g(a, b, c, d, x[10] + 0x21e1cde6, 5) + d = tr_g(d, a, b, c, x[15] + 0xc33707d6, 9) + c = tr_g(c, d, a, b, x[ 4] + 0xf4d50d87, 14) + b = tr_g(b, c, d, a, x[ 9] + 0x455a14ed, 20) + a = tr_g(a, b, c, d, x[14] + 0xa9e3e905, 5) + d = tr_g(d, a, b, c, x[ 3] + 0xfcefa3f8, 9) + c = tr_g(c, d, a, b, x[ 8] + 0x676f02d9, 14) + b = tr_g(b, c, d, a, x[13] + 0x8d2a4c8a, 20) + + a = tr_h(a, b, c, d, x[ 6] + 0xfffa3942, 4) + d = tr_h(d, a, b, c, x[ 9] + 0x8771f681, 11) + c = tr_h(c, d, a, b, x[12] + 0x6d9d6122, 16) + b = tr_h(b, c, d, a, x[15] + 0xfde5380c, 23) + a = tr_h(a, b, c, d, x[ 2] + 0xa4beea44, 4) + d = tr_h(d, a, b, c, x[ 5] + 0x4bdecfa9, 11) + c = tr_h(c, d, a, b, x[ 8] + 0xf6bb4b60, 16) + b = tr_h(b, c, d, a, x[11] + 0xbebfbc70, 23) + a = tr_h(a, b, c, d, x[14] + 0x289b7ec6, 4) + d = tr_h(d, a, b, c, x[ 1] + 0xeaa127fa, 11) + c = tr_h(c, d, a, b, x[ 4] + 0xd4ef3085, 16) + b = tr_h(b, c, d, a, x[ 7] + 0x04881d05, 23) + a = tr_h(a, b, c, d, x[10] + 0xd9d4d039, 4) + d = tr_h(d, a, b, c, x[13] + 0xe6db99e5, 11) + c = tr_h(c, d, a, b, x[16] + 0x1fa27cf8, 16) + b = tr_h(b, c, d, a, x[ 3] + 0xc4ac5665, 23) + + a = tr_i(a, b, c, d, x[ 1] + 0xf4292244, 6) + d = tr_i(d, a, b, c, x[ 8] + 0x432aff97, 10) + c = tr_i(c, d, a, b, x[15] + 0xab9423a7, 15) + b = tr_i(b, c, d, a, x[ 6] + 0xfc93a039, 21) + a = tr_i(a, b, c, d, x[13] + 0x655b59c3, 6) + d = tr_i(d, a, b, c, x[ 4] + 0x8f0ccc92, 10) + c = tr_i(c, d, a, b, x[11] + 0xffeff47d, 15) + b = tr_i(b, c, d, a, x[ 2] + 0x85845dd1, 21) + a = tr_i(a, b, c, d, x[ 9] + 0x6fa87e4f, 6) + d = tr_i(d, a, b, c, x[16] + 0xfe2ce6e0, 10) + c = tr_i(c, d, a, b, x[ 7] + 0xa3014314, 15) + b = tr_i(b, c, d, a, x[14] + 0x4e0811a1, 21) + a = tr_i(a, b, c, d, x[ 5] + 0xf7537e82, 6) + d = tr_i(d, a, b, c, x[12] + 0xbd3af235, 10) + c = tr_i(c, d, a, b, x[ 3] + 0x2ad7d2bb, 15) + b = tr_i(b, c, d, a, x[10] + 0xeb86d391, 21) + + return tobit(a+a1), tobit(b+b1), tobit(c+c1), tobit(d+d1) +end + +-- Note: this is copying the original string and NOT particularly fast. +-- A library for struct unpacking would make this task much easier. +local function md5(msg) + local len = #msg + msg = msg.."\128"..rep("\0", 63 - band(len + 8, 63)) + ..char(band(lshift(len, 3), 255), band(rshift(len, 5), 255), + band(rshift(len, 13), 255), band(rshift(len, 21), 255)) + .."\0\0\0\0" + local a, b, c, d = 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 + local x, k = {}, 1 + for i=1,#msg,4 do + local m0, m1, m2, m3 = byte(msg, i, i+3) + x[k] = bor(m0, lshift(m1, 8), lshift(m2, 16), lshift(m3, 24)) + if k == 16 then + a, b, c, d = transform(x, a, b, c, d) + k = 1 + else + k = k + 1 + end + end + return tohex(bswap(a))..tohex(bswap(b))..tohex(bswap(c))..tohex(bswap(d)) +end + +assert(md5('') == 'd41d8cd98f00b204e9800998ecf8427e') +assert(md5('a') == '0cc175b9c0f1b6a831c399e269772661') +assert(md5('abc') == '900150983cd24fb0d6963f7d28e17f72') +assert(md5('message digest') == 'f96b697d7cb7938d525a2f31aaf161d0') +assert(md5('abcdefghijklmnopqrstuvwxyz') == 'c3fcd3d76192e4007dfb496cca67e13b') +assert(md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789') == + 'd174ab98d277d9f5a5611c2c9f419d9f') +assert(md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890') == + '57edf4a22be3c955ac49da2e2107b67a') + +local N = tonumber(arg and arg[1]) or 10000 + + -- Credits: William Shakespeare, Romeo and Juliet +local txt = [[Rebellious subjects, enemies to peace, +Profaners of this neighbour-stained steel,-- +Will they not hear? What, ho! you men, you beasts, +That quench the fire of your pernicious rage +With purple fountains issuing from your veins, +On pain of torture, from those bloody hands +Throw your mistemper'd weapons to the ground, +And hear the sentence of your moved prince. +Three civil brawls, bred of an airy word, +By thee, old Capulet, and Montague, +Have thrice disturb'd the quiet of our streets, +And made Verona's ancient citizens +Cast by their grave beseeming ornaments, +To wield old partisans, in hands as old, +Canker'd with peace, to part your canker'd hate: +If ever you disturb our streets again, +Your lives shall pay the forfeit of the peace. +For this time, all the rest depart away: +You Capulet; shall go along with me: +And, Montague, come you this afternoon, +To know our further pleasure in this case, +To old Free-town, our common judgment-place. +Once more, on pain of death, all men depart.]] + txt = txt..txt..txt..txt + txt = txt..txt..txt..txt + +for i=1,N do + res = md5(txt) +end +assert(res == 'a831e91e0f70eddcb70dc61c6f82f6cd') + diff --git a/perf/LuaJIT-benches/meteor.lua b/perf/LuaJIT-benches/meteor.lua new file mode 100644 index 00000000..80588ab5 --- /dev/null +++ b/perf/LuaJIT-benches/meteor.lua @@ -0,0 +1,220 @@ + +-- Generate a decision tree based solver for the meteor puzzle. +local function generatesolver(countinit) + local pairs, ipairs, format = pairs, ipairs, string.format + local byte, min, sort = string.byte, math.min, table.sort + + -- Cached position to distance lookup. + local dist = setmetatable({}, { __index = function(t, xy) + local x = xy%10; local y = (xy-x)/10 + if (x+y)%2 == 1 then y = y + 1; x = 10 - x end + local d = xy + 256*x*x + 1024*y*y; t[xy] = d; return d + end}) + + -- Lookup table to validate a cell and to find its successor. + local ok = {} + for i=0,150 do ok[i] = false end + for i=99,0,-1 do + local x = i%10 + if ((i-x)/10+x)%2 == 0 then + ok[i] = i + (ok[i+1] and 1 or (ok[i+2] and 2 or 3)) + end + end + + -- Temporary board state for the island checks. + local islands, slide = {}, {20,22,24,26,28,31,33,35,37,39} + local bbc, bb = 0, {} + for i=0,19 do bb[i] = false; bb[i+80] = false end + for i=20,79 do bb[i] = ok[i] end + + -- Recursive flood fill algorithm. + local function fill(bb, p) + bbc = bbc + 1 + local n = p+2; if bb[n] then bb[n] = false; fill(bb, n) end + n = p-2; if bb[n] then bb[n] = false; fill(bb, n) end + n = p-9; if bb[n] then bb[n] = false; fill(bb, n) end + n = p-11; if bb[n] then bb[n] = false; fill(bb, n) end + n = p+9; if bb[n] then bb[n] = false; fill(bb, n) end + n = p+11; if bb[n] then bb[n] = false; fill(bb, n) end + end + + -- Generate pruned, sliding decision trees. + local dtrees = {{}, {}, {}, {}, {}, {}, {}, {}, {}, {}} + local rot = { nil, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} } + for k=0,9 do + -- Generate 10 initial pieces from line noise. :-) + local t = { 60, 62, byte("@BMBIK@KT@GPIKR@IKIKT@GK@KM@BG", k*3+1, k*3+3) } + rot[1] = t + for i,xy in ipairs(t) do + local x = xy%10; local y = (xy-x-60)/10 + -- Add 11 more variations by rotating and flipping. + for j=2,12 do + if j == 7 then y = -y else x,y = (x+3*y)/2, (y-x)/2 end + rot[j][i] = x+10*y + end + end + for r,v in ipairs(rot) do + -- Exploit symmetry and leave out half of the orientations of one piece. + -- The selected piece gives the best reduction of the solution space. + if k ~= 3 or r%2 == 0 then + -- Normalize to origin, add distance, sort by distance from origin. + local m = min(v[1], v[2], v[3], v[4], v[5]) + for i=1,5 do v[i] = dist[v[i]-m] end + sort(v) + local v2, v3, v4, v5 = v[2]%256, v[3]%256, v[4]%256, v[5]%256 + -- Slide the piece across 2 rows, prune the tree, check for islands. + for j,p in ipairs(slide) do + bb[p] = false + if ok[p+v2] and ok[p+v3] and ok[p+v4] and ok[p+v5] then -- Prune. + for i=p+1,79 do bb[i] = ok[i] end -- Clear remaining board. + bb[p+v2] = false; bb[p+v3] = false -- Add piece. + bb[p+v4] = false; bb[p+v5] = false + bbc = j -- Flood fill and count the filled positions. + if bb[71] then bb[71] = false; fill(bb, 71) end -- Lower left. + if bb[79] then bb[79] = false; fill(bb, 79) end -- Lower right. + local di = 0 + if bbc < 22 then bbc = 26 + elseif bbc < 26 then -- Island found, locate it, fill from above. + for i=p+2,79 do if bb[i] then di = i-p; break end end + for i=p-9,p-1 do if ok[i] then fill(bb, i) bbc = bbc - 1 end end + end + if bbc == 26 then -- Prune boards with static islands. + local tb = dtrees[j] -- Build decision tree in distance order. + local ta = tb[v2]; if not ta then ta = {}; tb[v2] = ta end + tb = ta[v3]; if not tb then tb = {}; ta[v3] = tb end + ta = tb[v4]; if not ta then ta = {}; tb[v4] = ta; islands[ta] = di + elseif islands[ta] ~= di then islands[ta] = 0 end + ta[v5] = di*10+k -- Leaves hold island check and piece number. + end + end + end + end + end + end + + local s = "local u0,u1,u2,u3,u4,u5,u6,u7,u8,u9" -- Piece use flags. + for p=0,99 do if ok[p] then s = s..",b"..p end end -- Board cells. + s = s.."\n"..[[ +local countinit = ... +local count = countinit +local bmin, bmax, pcs = 9, 0, {} +local smin, smax +local write, reverse = io.write, string.reverse + +-- Print min/max boards. +local function printboard(s) + local flip = true + for x in string.gmatch(string.gsub(s, ".", "%1 "), "..........") do + write(x, flip and "\n " or "\n") + flip = not flip + end + write("\n") +end + +-- Print result. +local function printresult() + write(countinit-count, " solutions found\n\n") + printboard(smin) + printboard(smax) +end + +-- Generate piece lookup array from the order of use. +local function genp() + local p = pcs + p[u0] = "0" p[u1] = "1" p[u2] = "2" p[u3] = "3" p[u4] = "4" + p[u5] = "5" p[u6] = "6" p[u7] = "7" p[u8] = "8" p[u9] = "9" + return p +end + +-- Goal function. +local function f91(k) + if k ~= 10 then return end + count = count - 2 -- Need to count the symmetric solution, too. + repeat + -- Quick precheck before constructing the string. + local b0, b99 = b0, b99 + if b0 <= bmin then bmin = b0 elseif b0 >= bmax then bmax = b0 + elseif b99 <= bmin then bmin = b99 elseif b99 >= bmax then bmax = b99 + else break end + -- Translate the filled board to a string. + local p = genp() + local s = p[b0] ]] + for p=2,99 do if ok[p] then s = s.."..p[b"..p.."]" end end + s = s..[[ + -- Remember min/max boards, dito for the symmetric board. + if not smin then smin = s; smax = s + elseif s < smin then smin = s elseif s > smax then smax = s end + s = reverse(s) + if s < smin then smin = s elseif s > smax then smax = s end + until true + if count <= 0 then error() end -- Early abort if max count given. +end +local f93 = f91 +]] + + -- Recursively convert the decision tree to Lua code. + local function codetree(tree, d, p, pn) + local found, s = false, "" + d = d + 1 + for a,t in pairs(tree) do + local b = p+a + if b < 100 then -- Prune the tree at the lower border. + local pp = b ~= pn and pn or ok[b] -- Find maximum successor function. + if d >= 5 then -- Try to place the last cell of a piece and advance. + found = true + local u = t%10 + local di = (t-u)/10 + if di ~= 0 and d == 5 then + di = di + p; if pp == di then pp = ok[di] end + s = format("%sif b%d and not u%d and not b%d then b%d=k u%d=k f%d(k) u%d=N b%d=N end\n", + s, di, u, b, b, u, pp, u, b) + else + s = format("%sif not u%d and not b%d then b%d=k u%d=k f%d(k) u%d=N b%d=N end\n", + s, u, b, b, u, pp, u, b) + end + else -- Try to place an intermediate cell. + local di = d ~= 4 and 0 or islands[t] + if di == 0 then + local st = codetree(t, d, p, pp) + if st then + found = true + s = format("%sif not b%d then b%d=k\n%sb%d=N end\n", s, b, b, st, b) + end + else -- Combine island checks. + di = di + p; if pp == di then pp = ok[di] end + local st = codetree(t, 6, p, pp) + if st then + found = true + s = format("%sif b%d and not b%d then b%d=k\n%sb%d=N end\n", s, di, b, b, st, b) + end + end + end + end + end + return found and s + end + + -- Embed the decision tree into a function hierarchy. + local j = 5 + for p=88,0,-1 do + local pn = ok[p] + if pn then + s = format("%slocal function f%d(k)\nlocal N if b%d then return f%d(k) end k=k+1 b%d=k\n%sb%d=N end\n", + s, p, p, pn, p, codetree(dtrees[j], 1, p, pn), p) + j = j - 1; if j == 0 then j = 10 end + end + end + + -- Compile and return solver function and result getter. + return loadstring(s.."return f0, printresult\n", "solver")(countinit) +end + +-- Generate the solver function hierarchy. +local solver, printresult = generatesolver(tonumber(arg and arg[1]) or 10000) + +-- The optimizer for LuaJIT 1.1.x is not helpful here, so turn it off. +if jit and jit.opt and jit.version_num < 10200 then jit.opt.start(0) end + +-- Run the solver protected to get partial results (max count or ctrl-c). +pcall(solver, 0) +printresult() diff --git a/perf/LuaJIT-benches/nbody.lua b/perf/LuaJIT-benches/nbody.lua new file mode 100644 index 00000000..e0ff8f77 --- /dev/null +++ b/perf/LuaJIT-benches/nbody.lua @@ -0,0 +1,119 @@ + +local sqrt = math.sqrt + +local PI = 3.141592653589793 +local SOLAR_MASS = 4 * PI * PI +local DAYS_PER_YEAR = 365.24 +local bodies = { + { -- Sun + x = 0, + y = 0, + z = 0, + vx = 0, + vy = 0, + vz = 0, + mass = SOLAR_MASS + }, + { -- Jupiter + x = 4.84143144246472090e+00, + y = -1.16032004402742839e+00, + z = -1.03622044471123109e-01, + vx = 1.66007664274403694e-03 * DAYS_PER_YEAR, + vy = 7.69901118419740425e-03 * DAYS_PER_YEAR, + vz = -6.90460016972063023e-05 * DAYS_PER_YEAR, + mass = 9.54791938424326609e-04 * SOLAR_MASS + }, + { -- Saturn + x = 8.34336671824457987e+00, + y = 4.12479856412430479e+00, + z = -4.03523417114321381e-01, + vx = -2.76742510726862411e-03 * DAYS_PER_YEAR, + vy = 4.99852801234917238e-03 * DAYS_PER_YEAR, + vz = 2.30417297573763929e-05 * DAYS_PER_YEAR, + mass = 2.85885980666130812e-04 * SOLAR_MASS + }, + { -- Uranus + x = 1.28943695621391310e+01, + y = -1.51111514016986312e+01, + z = -2.23307578892655734e-01, + vx = 2.96460137564761618e-03 * DAYS_PER_YEAR, + vy = 2.37847173959480950e-03 * DAYS_PER_YEAR, + vz = -2.96589568540237556e-05 * DAYS_PER_YEAR, + mass = 4.36624404335156298e-05 * SOLAR_MASS + }, + { -- Neptune + x = 1.53796971148509165e+01, + y = -2.59193146099879641e+01, + z = 1.79258772950371181e-01, + vx = 2.68067772490389322e-03 * DAYS_PER_YEAR, + vy = 1.62824170038242295e-03 * DAYS_PER_YEAR, + vz = -9.51592254519715870e-05 * DAYS_PER_YEAR, + mass = 5.15138902046611451e-05 * SOLAR_MASS + } +} + +local function advance(bodies, nbody, dt) + for i=1,nbody do + local bi = bodies[i] + local bix, biy, biz, bimass = bi.x, bi.y, bi.z, bi.mass + local bivx, bivy, bivz = bi.vx, bi.vy, bi.vz + for j=i+1,nbody do + local bj = bodies[j] + local dx, dy, dz = bix-bj.x, biy-bj.y, biz-bj.z + local mag = sqrt(dx*dx + dy*dy + dz*dz) + mag = dt / (mag * mag * mag) + local bm = bj.mass*mag + bivx = bivx - (dx * bm) + bivy = bivy - (dy * bm) + bivz = bivz - (dz * bm) + bm = bimass*mag + bj.vx = bj.vx + (dx * bm) + bj.vy = bj.vy + (dy * bm) + bj.vz = bj.vz + (dz * bm) + end + bi.vx = bivx + bi.vy = bivy + bi.vz = bivz + bi.x = bix + dt * bivx + bi.y = biy + dt * bivy + bi.z = biz + dt * bivz + end +end + +local function energy(bodies, nbody) + local e = 0 + for i=1,nbody do + local bi = bodies[i] + local vx, vy, vz, bim = bi.vx, bi.vy, bi.vz, bi.mass + e = e + (0.5 * bim * (vx*vx + vy*vy + vz*vz)) + for j=i+1,nbody do + local bj = bodies[j] + local dx, dy, dz = bi.x-bj.x, bi.y-bj.y, bi.z-bj.z + local distance = sqrt(dx*dx + dy*dy + dz*dz) + e = e - ((bim * bj.mass) / distance) + end + end + return e +end + +local function offsetMomentum(b, nbody) + local px, py, pz = 0, 0, 0 + for i=1,nbody do + local bi = b[i] + local bim = bi.mass + px = px + (bi.vx * bim) + py = py + (bi.vy * bim) + pz = pz + (bi.vz * bim) + end + b[1].vx = -px / SOLAR_MASS + b[1].vy = -py / SOLAR_MASS + b[1].vz = -pz / SOLAR_MASS +end + +local N = tonumber(arg and arg[1]) or 1000 +local nbody = #bodies + +offsetMomentum(bodies, nbody) +io.write( string.format("%0.9f",energy(bodies, nbody)), "\n") +for i=1,N do advance(bodies, nbody, 0.01) end +io.write( string.format("%0.9f",energy(bodies, nbody)), "\n") diff --git a/perf/LuaJIT-benches/nsieve-bit-fp.lua b/perf/LuaJIT-benches/nsieve-bit-fp.lua new file mode 100644 index 00000000..3971ec1f --- /dev/null +++ b/perf/LuaJIT-benches/nsieve-bit-fp.lua @@ -0,0 +1,37 @@ + +local floor, ceil = math.floor, math.ceil + +local precision = 50 -- Maximum precision of lua_Number (minus safety margin). +local onebits = (2^precision)-1 + +local function nsieve(p, m) + local cm = ceil(m/precision) + do local onebits = onebits; for i=0,cm do p[i] = onebits end end + local count, idx, bit = 0, 2, 2 + for i=2,m do + local r = p[idx] / bit + if r - floor(r) >= 0.5 then -- Bit set? + local kidx, kbit = idx, bit + for k=i+i,m,i do + kidx = kidx + i + while kidx >= cm do kidx = kidx - cm; kbit = kbit + kbit end + local x = p[kidx] + local r = x / kbit + if r - floor(r) >= 0.5 then p[kidx] = x - kbit*0.5 end -- Clear bit. + end + count = count + 1 + end + idx = idx + 1 + if idx >= cm then idx = 0; bit = bit + bit end + end + return count +end + +local N = tonumber(arg and arg[1]) or 1 +if N < 2 then N = 2 end +local primes = {} + +for i=0,2 do + local m = (2^(N-i))*10000 + io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m))) +end diff --git a/perf/LuaJIT-benches/nsieve-bit.lua b/perf/LuaJIT-benches/nsieve-bit.lua new file mode 100644 index 00000000..820a3726 --- /dev/null +++ b/perf/LuaJIT-benches/nsieve-bit.lua @@ -0,0 +1,27 @@ + +local bit = require("bit") +local band, bxor, rshift, rol = bit.band, bit.bxor, bit.rshift, bit.rol + +local function nsieve(p, m) + local count = 0 + for i=0,rshift(m, 5) do p[i] = -1 end + for i=2,m do + if band(rshift(p[rshift(i, 5)], i), 1) ~= 0 then + count = count + 1 + for j=i+i,m,i do + local jx = rshift(j, 5) + p[jx] = band(p[jx], rol(-2, j)) + end + end + end + return count +end + +local N = tonumber(arg and arg[1]) or 1 +if N < 2 then N = 2 end +local primes = {} + +for i=0,2 do + local m = (2^(N-i))*10000 + io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m))) +end diff --git a/perf/LuaJIT-benches/nsieve.lua b/perf/LuaJIT-benches/nsieve.lua new file mode 100644 index 00000000..6de0524f --- /dev/null +++ b/perf/LuaJIT-benches/nsieve.lua @@ -0,0 +1,21 @@ + +local function nsieve(p, m) + for i=2,m do p[i] = true end + local count = 0 + for i=2,m do + if p[i] then + for k=i+i,m,i do p[k] = false end + count = count + 1 + end + end + return count +end + +local N = tonumber(arg and arg[1]) or 1 +if N < 2 then N = 2 end +local primes = {} + +for i=0,2 do + local m = (2^(N-i))*10000 + io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m))) +end diff --git a/perf/LuaJIT-benches/partialsums.lua b/perf/LuaJIT-benches/partialsums.lua new file mode 100644 index 00000000..46bb9da3 --- /dev/null +++ b/perf/LuaJIT-benches/partialsums.lua @@ -0,0 +1,29 @@ + +local n = tonumber(arg[1]) +local function pr(fmt, x) io.write(string.format(fmt, x)) end + +local a1, a2, a3, a4, a5, a6, a7, a8, a9, alt = 1, 0, 0, 0, 0, 0, 0, 0, 0, 1 +local sqrt, sin, cos = math.sqrt, math.sin, math.cos +for k=1,n do + local k2, sk, ck = k*k, sin(k), cos(k) + local k3 = k2*k + a1 = a1 + (2/3)^k + a2 = a2 + 1/sqrt(k) + a3 = a3 + 1/(k2+k) + a4 = a4 + 1/(k3*sk*sk) + a5 = a5 + 1/(k3*ck*ck) + a6 = a6 + 1/k + a7 = a7 + 1/k2 + a8 = a8 + alt/k + a9 = a9 + alt/(k+k-1) + alt = -alt +end +pr("%.9f\t(2/3)^k\n", a1) +pr("%.9f\tk^-0.5\n", a2) +pr("%.9f\t1/k(k+1)\n", a3) +pr("%.9f\tFlint Hills\n", a4) +pr("%.9f\tCookson Hills\n", a5) +pr("%.9f\tHarmonic\n", a6) +pr("%.9f\tRiemann Zeta\n", a7) +pr("%.9f\tAlternating Harmonic\n", a8) +pr("%.9f\tGregory\n", a9) diff --git a/perf/LuaJIT-benches/pidigits-nogmp.lua b/perf/LuaJIT-benches/pidigits-nogmp.lua new file mode 100644 index 00000000..63a1cb0e --- /dev/null +++ b/perf/LuaJIT-benches/pidigits-nogmp.lua @@ -0,0 +1,100 @@ + +-- Start of dynamically compiled chunk. +local chunk = [=[ + +-- Factory function for multi-precision number (mpn) operations. +local function fmm(fa, fb) + return loadstring([[ + return function(y, a, ka, b, kb) + local carry, n = 0, #a ]]..(fb == 0 and "" or [[ + local na, nb = n, #b -- Need to adjust lengths. 1 element suffices here. + if na > nb then b[na] = 0 elseif na < nb then a[nb] = 0; n = nb end + ]])..[[ + for i=1,n do -- Sum up all elements and propagate carry. + local x = a[i] ]]..(fa == 2 and "*ka" or "").. + (fb == 2 and "+b[i]*kb" or (fb == 1 and "+b[i]" or ""))..[[ + carry + if x < RADIX and x >= 0 then carry = 0; y[i] = x -- Check for overflow. + else local d = x % RADIX; carry = (x-d) / RADIX; y[i] = d end + end + y[n+1] = nil -- Truncate target. 1 element suffices here. + if carry == 0 then while n > 0 and y[n] == 0 do y[n] = nil end + elseif carry == -1 then y[n] = y[n] - RADIX else y[n+1] = carry end + ]]..(fb == 0 and "" or [[ -- Undo length adjustment. + if na > nb then b[na] = nil elseif na < nb and y ~= a then a[nb] = nil end + ]])..[[ + return y + end]])() +end + +-- Generate needed mpn functions. +local mm_kk, mm_k1, mm_k0, mm_11 = fmm(2, 2), fmm(2, 1), fmm(2, 0), fmm(1, 1) + +-- Choose the most efficient mpn function for y = a*ka + b*kb at run-time. +local function mm(y, a, ka, b, kb) + local f = mm_kk + if kb == 0 or #b == 0 then if ka == 1 then return a else f = mm_k0 end + elseif kb == 1 then if ka == 1 then f = mm_11 else f = mm_k1 end end + return f(y, a, ka, b, kb) +end + +-- Compose matrix with numbers on the right. +local function compose_r(aq,ar,as,at, bq,br,bs,bt) + mm(ar, ar,bq, at,br) mm(at, at,bt, ar,bs) + mm(as, as,bt, aq,bs) mm(aq, aq,bq, nil,0) +end + +-- Compose matrix with numbers on the left. +local function compose_l(aq,ar,as,at, bq,br,bs,bt) + mm(ar, ar,bt, aq,br) mm(at, at,bt, as,br) + mm(as, as,bq, at,bs) mm(aq, aq,bq, nil,0) +end + +-- Extract one digit. +local u, v, jj = {}, {}, 0 +local function extract(q,r,s,t, j) + local u = j == jj + 1 and mm(u, u,1, q,1) or mm(u, q,j, r,1); jj = j + local v = mm(v, t,1, s,j) + local nu, nv, y = #u, #v + if nu == nv then + if nu == 1 then y = u[1] / v[1] + else y = (u[nu]*RADIX + u[nu-1]) / (v[nv]*RADIX + v[nv-1]) end + elseif nu == nv+1 then y = (u[nu]*RADIX + u[nv]) / v[nv] + else return 0 end + return math.floor(y) +end + +-- Coroutine which yields successive digits of PI. +return coroutine.wrap(function() + local q, r, s, t, k = {1}, {}, {}, {1}, 1 + repeat + local y = extract(q,r,s,t, 3) + if y == extract(q,r,s,t, 4) then + coroutine.yield(y) + compose_r(q,r,s,t, 10, -10*y, 0, 1) + else + compose_l(q,r,s,t, k, 4*k+2, 0, 2*k+1) + k = k + 1 + end + until false +end) + +]=] -- End of dynamically compiled chunk. + +local N = tonumber(arg and arg[1]) or 27 +local RADIX = N < 6500 and 2^36 or 2^32 -- Avoid overflow. + +-- Substitute radix and compile chunk. +local pidigit = loadstring(string.gsub(chunk, "RADIX", tostring(RADIX)))() + +-- Print lines with 10 digits. +for i=10,N,10 do + for j=1,10 do io.write(pidigit()) end + io.write("\t:", i, "\n") +end + +-- Print remaining digits (if any). +local n10 = N % 10 +if n10 ~= 0 then + for i=1,n10 do io.write(pidigit()) end + io.write(string.rep(" ", 10-n10), "\t:", N, "\n") +end diff --git a/perf/LuaJIT-benches/ray.lua b/perf/LuaJIT-benches/ray.lua new file mode 100644 index 00000000..2acc24c0 --- /dev/null +++ b/perf/LuaJIT-benches/ray.lua @@ -0,0 +1,135 @@ +local sqrt = math.sqrt +local huge = math.huge + +local delta = 1 +while delta * delta + 1 ~= 1 do + delta = delta * 0.5 +end + +local function length(x, y, z) return sqrt(x*x + y*y + z*z) end +local function vlen(v) return length(v[1], v[2], v[3]) end +local function mul(c, x, y, z) return c*x, c*y, c*z end +local function unitise(x, y, z) return mul(1/length(x, y, z), x, y, z) end +local function dot(x1, y1, z1, x2, y2, z2) + return x1*x2 + y1*y2 + z1*z2 +end + +local function vsub(a, b) return a[1] - b[1], a[2] - b[2], a[3] - b[3] end +local function vdot(a, b) return dot(a[1], a[2], a[3], b[1], b[2], b[3]) end + + +local sphere = {} +function sphere:new(centre, radius) + self.__index = self + return setmetatable({centre=centre, radius=radius}, self) +end + +local function sphere_distance(self, origin, dir) + local vx, vy, vz = vsub(self.centre, origin) + local b = dot(vx, vy, vz, dir[1], dir[2], dir[3]) + local r = self.radius + local disc = r*r + b*b - vx*vx-vy*vy-vz*vz + if disc < 0 then return huge end + local d = sqrt(disc) + local t2 = b + d + if t2 < 0 then return huge end + local t1 = b - d + return t1 > 0 and t1 or t2 +end + +function sphere:intersect(origin, dir, best) + local lambda = sphere_distance(self, origin, dir) + if lambda < best[1] then + local c = self.centre + best[1] = lambda + local b2 = best[2] + b2[1], b2[2], b2[3] = + unitise( + origin[1] - c[1] + lambda * dir[1], + origin[2] - c[2] + lambda * dir[2], + origin[3] - c[3] + lambda * dir[3]) + end +end + +local group = {} +function group:new(bound) + self.__index = self + return setmetatable({bound=bound, children={}}, self) +end + +function group:add(s) + self.children[#self.children+1] = s +end + +function group:intersect(origin, dir, best) + local lambda = sphere_distance(self.bound, origin, dir) + if lambda < best[1] then + for _, c in ipairs(self.children) do + c:intersect(origin, dir, best) + end + end +end + +local hit = { 0, 0, 0 } +local ilight +local best = { huge, { 0, 0, 0 } } + +local function ray_trace(light, camera, dir, scene) + best[1] = huge + scene:intersect(camera, dir, best) + local b1 = best[1] + if b1 == huge then return 0 end + local b2 = best[2] + local g = vdot(b2, light) + if g >= 0 then return 0 end + hit[1] = camera[1] + b1*dir[1] + delta*b2[1] + hit[2] = camera[2] + b1*dir[2] + delta*b2[2] + hit[3] = camera[3] + b1*dir[3] + delta*b2[3] + best[1] = huge + scene:intersect(hit, ilight, best) + if best[1] == huge then + return -g + else + return 0 + end +end + +local function create(level, centre, radius) + local s = sphere:new(centre, radius) + if level == 1 then return s end + local gr = group:new(sphere:new(centre, 3*radius)) + gr:add(s) + local rn = 3*radius/sqrt(12) + for dz = -1,1,2 do + for dx = -1,1,2 do + gr:add(create(level-1, { centre[1] + rn*dx, centre[2] + rn, centre[3] + rn*dz }, radius*0.5)) + end + end + return gr +end + + +local level, n, ss = tonumber(arg[1]) or 9, tonumber(arg[2]) or 256, 4 +local iss = 1/ss +local gf = 255/(ss*ss) + +io.write(("P5\n%d %d\n255\n"):format(n, n)) +local light = { unitise(-1, -3, 2) } +ilight = { -light[1], -light[2], -light[3] } +local camera = { 0, 0, -4 } +local dir = { 0, 0, 0 } + +local scene = create(level, {0, -1, 0}, 1) + +for y = n/2-1, -n/2, -1 do + for x = -n/2, n/2-1 do + local g = 0 + for d = y, y+.99, iss do + for e = x, x+.99, iss do + dir[1], dir[2], dir[3] = unitise(e, d, n) + g = g + ray_trace(light, camera, dir, scene) + end + end + io.write(string.char(math.floor(0.5 + g*gf))) + end +end diff --git a/perf/LuaJIT-benches/recursive-ack.lua b/perf/LuaJIT-benches/recursive-ack.lua new file mode 100644 index 00000000..fad30589 --- /dev/null +++ b/perf/LuaJIT-benches/recursive-ack.lua @@ -0,0 +1,8 @@ +local function Ack(m, n) + if m == 0 then return n+1 end + if n == 0 then return Ack(m-1, 1) end + return Ack(m-1, (Ack(m, n-1))) -- The parentheses are deliberate. +end + +local N = tonumber(arg and arg[1]) or 10 +io.write("Ack(3,", N ,"): ", Ack(3,N), "\n") diff --git a/perf/LuaJIT-benches/recursive-fib.lua b/perf/LuaJIT-benches/recursive-fib.lua new file mode 100644 index 00000000..ef9950de --- /dev/null +++ b/perf/LuaJIT-benches/recursive-fib.lua @@ -0,0 +1,7 @@ +local function fib(n) + if n < 2 then return 1 end + return fib(n-2) + fib(n-1) +end + +local n = tonumber(arg[1]) or 10 +io.write(string.format("Fib(%d): %d\n", n, fib(n))) diff --git a/perf/LuaJIT-benches/revcomp.lua b/perf/LuaJIT-benches/revcomp.lua new file mode 100644 index 00000000..34fe347b --- /dev/null +++ b/perf/LuaJIT-benches/revcomp.lua @@ -0,0 +1,37 @@ + +local sub = string.sub +iubc = setmetatable({ + A="T", C="G", B="V", D="H", K="M", R="Y", + a="T", c="G", b="V", d="H", k="M", r="Y", + T="A", G="C", V="B", H="D", M="K", Y="R", U="A", + t="A", g="C", v="B", h="D", m="K", y="R", u="A", + N="N", S="S", W="W", n="N", s="S", w="W", +}, { __index = function(t, s) + local r = t[sub(s, 2)]..t[sub(s, 1, 1)]; t[s] = r; return r end }) + +local wcode = [=[ +return function(t, n) + if n == 1 then return end + local iubc, sub, write = iubc, string.sub, io.write + local s = table.concat(t, "", 1, n-1) + for i=#s-59,1,-60 do + write(]=] +for i=59,3,-4 do wcode = wcode.."iubc[sub(s, i+"..(i-3)..", i+"..i..")], " end +wcode = wcode..[=["\n") + end + local r = #s % 60 + if r ~= 0 then + for i=r,1,-4 do write(iubc[sub(s, i-3 < 1 and 1 or i-3, i)]) end + write("\n") + end +end +]=] +local writerev = loadstring(wcode)() + +local t, n = {}, 1 +for line in io.lines() do + local c = sub(line, 1, 1) + if c == ">" then writerev(t, n); io.write(line, "\n"); n = 1 + elseif c ~= ";" then t[n] = line; n = n + 1 end +end +writerev(t, n) diff --git a/perf/LuaJIT-benches/scimark-2010-12-20.lua b/perf/LuaJIT-benches/scimark-2010-12-20.lua new file mode 100644 index 00000000..353acb7c --- /dev/null +++ b/perf/LuaJIT-benches/scimark-2010-12-20.lua @@ -0,0 +1,400 @@ +------------------------------------------------------------------------------ +-- Lua SciMark (2010-12-20). +-- +-- A literal translation of SciMark 2.0a, written in Java and C. +-- Credits go to the original authors Roldan Pozo and Bruce Miller. +-- See: http://math.nist.gov/scimark2/ +------------------------------------------------------------------------------ + +local SCIMARK_VERSION = "2010-12-10" +local SCIMARK_COPYRIGHT = "Copyright (C) 2006-2010 Mike Pall" + +local MIN_TIME = 2.0 +local RANDOM_SEED = 101009 -- Must be odd. +local SIZE_SELECT = "small" + +local benchmarks = { + "FFT", "SOR", "MC", "SPARSE", "LU", + small = { + FFT = { 1024 }, + SOR = { 100 }, + MC = { }, + SPARSE = { 1000, 5000 }, + LU = { 100 }, + }, + large = { + FFT = { 1048576 }, + SOR = { 1000 }, + MC = { }, + SPARSE = { 100000, 1000000 }, + LU = { 1000 }, + }, +} + +local abs, log, sin, floor = math.abs, math.log, math.sin, math.floor +local pi, clock = math.pi, os.clock +local format = string.format + +------------------------------------------------------------------------------ +-- Select array type: Lua tables or native (FFI) arrays +------------------------------------------------------------------------------ + +local darray, iarray + +local function array_init() + if jit and jit.status and jit.status() then + local ok, ffi = pcall(require, "ffi") + if ok then + darray = ffi.typeof("double[?]") + iarray = ffi.typeof("int[?]") + return + end + end + function darray(n) return {} end + iarray = darray +end + +------------------------------------------------------------------------------ +-- This is a Lagged Fibonacci Pseudo-random Number Generator with +-- j, k, M = 5, 17, 31. Pretty weak, but same as C/Java SciMark. +------------------------------------------------------------------------------ + +local rand, rand_init + +if jit and jit.status and jit.status() then + -- LJ2 has bit operations and zero-based arrays (internally). + local bit = require("bit") + local band, sar = bit.band, bit.arshift + function rand_init(seed) + local Rm, Rj, Ri = iarray(17), 16, 11 + for i=0,16 do Rm[i] = 0 end + for i=16,0,-1 do + seed = band(seed*9069, 0x7fffffff) + Rm[i] = seed + end + function rand() + local i = band(Ri+1, sar(Ri-16, 31)) + local j = band(Rj+1, sar(Rj-16, 31)) + Ri, Rj = i, j + local k = band(Rm[i] - Rm[j], 0x7fffffff) + Rm[j] = k + return k * (1.0/2147483647.0) + end + end +else + -- Better for standard Lua with one-based arrays and without bit operations. + function rand_init(seed) + local Rm, Rj = {}, 1 + for i=1,17 do Rm[i] = 0 end + for i=17,1,-1 do + seed = (seed*9069) % (2^31) + Rm[i] = seed + end + function rand() + local j, m = Rj, Rm + local h = j - 5 + if h < 1 then h = h + 17 end + local k = m[h] - m[j] + if k < 0 then k = k + 2147483647 end + m[j] = k + if j < 17 then Rj = j + 1 else Rj = 1 end + return k * (1.0/2147483647.0) + end + end +end + +local function random_vector(n) + local v = darray(n+1) + for x=1,n do v[x] = rand() end + return v +end + +local function random_matrix(m, n) + local a = {} + for y=1,m do + local v = darray(n+1) + a[y] = v + for x=1,n do v[x] = rand() end + end + return a +end + +------------------------------------------------------------------------------ +-- FFT: Fast Fourier Transform. +------------------------------------------------------------------------------ + +local function fft_bitreverse(v, n) + local j = 0 + for i=0,2*n-4,2 do + if i < j then + v[i+1], v[i+2], v[j+1], v[j+2] = v[j+1], v[j+2], v[i+1], v[i+2] + end + local k = n + while k <= j do j = j - k; k = k / 2 end + j = j + k + end +end + +local function fft_transform(v, n, dir) + if n <= 1 then return end + fft_bitreverse(v, n) + local dual = 1 + repeat + local dual2 = 2*dual + for i=1,2*n-1,2*dual2 do + local j = i+dual2 + local ir, ii = v[i], v[i+1] + local jr, ji = v[j], v[j+1] + v[j], v[j+1] = ir - jr, ii - ji + v[i], v[i+1] = ir + jr, ii + ji + end + local theta = dir * pi / dual + local s, s2 = sin(theta), 2.0 * sin(theta * 0.5)^2 + local wr, wi = 1.0, 0.0 + for a=3,dual2-1,2 do + wr, wi = wr - s*wi - s2*wr, wi + s*wr - s2*wi + for i=a,a+2*(n-dual2),2*dual2 do + local j = i+dual2 + local jr, ji = v[j], v[j+1] + local dr, di = wr*jr - wi*ji, wr*ji + wi*jr + local ir, ii = v[i], v[i+1] + v[j], v[j+1] = ir - dr, ii - di + v[i], v[i+1] = ir + dr, ii + di + end + end + dual = dual2 + until dual >= n +end + +function benchmarks.FFT(n) + local l2n = log(n)/log(2) + if l2n % 1 ~= 0 then + io.stderr:write("Error: FFT data length is not a power of 2\n") + os.exit(1) + end + local v = random_vector(n*2) + return function(cycles) + local norm = 1.0 / n + for p=1,cycles do + fft_transform(v, n, -1) + fft_transform(v, n, 1) + for i=1,n*2 do v[i] = v[i] * norm end + end + return ((5*n-2)*l2n + 2*(n+1)) * cycles + end +end + +------------------------------------------------------------------------------ +-- SOR: Jacobi Successive Over-Relaxation. +------------------------------------------------------------------------------ + +local function sor_run(mat, m, n, cycles, omega) + local om4, om1 = omega*0.25, 1.0-omega + m = m - 1 + n = n - 1 + for i=1,cycles do + for y=2,m do + local v, vp, vn = mat[y], mat[y-1], mat[y+1] + for x=2,n do + v[x] = om4*((vp[x]+vn[x])+(v[x-1]+v[x+1])) + om1*v[x] + end + end + end +end + +function benchmarks.SOR(n) + local mat = random_matrix(n, n) + return function(cycles) + sor_run(mat, n, n, cycles, 1.25) + return (n-1)*(n-1)*cycles*6 + end +end + +------------------------------------------------------------------------------ +-- MC: Monte Carlo Integration. +------------------------------------------------------------------------------ + +local function mc_integrate(cycles) + local under_curve = 0 + local rand = rand + for i=1,cycles do + local x = rand() + local y = rand() + if x*x + y*y <= 1.0 then under_curve = under_curve + 1 end + end + return (under_curve/cycles) * 4 +end + +function benchmarks.MC() + return function(cycles) + local res = mc_integrate(cycles) + assert(math.sqrt(cycles)*math.abs(res-math.pi) < 5.0, "bad MC result") + return cycles * 4 -- Way off, but same as SciMark in C/Java. + end +end + +------------------------------------------------------------------------------ +-- Sparse Matrix Multiplication. +------------------------------------------------------------------------------ + +local function sparse_mult(n, cycles, vy, val, row, col, vx) + for p=1,cycles do + for r=1,n do + local sum = 0 + for i=row[r],row[r+1]-1 do sum = sum + vx[col[i]] * val[i] end + vy[r] = sum + end + end +end + +function benchmarks.SPARSE(n, nz) + local nr = floor(nz/n) + local anz = nr*n + local vx = random_vector(n) + local val = random_vector(anz) + local vy, col, row = darray(n+1), iarray(nz+1), iarray(n+2) + row[1] = 1 + for r=1,n do + local step = floor(r/nr) + if step < 1 then step = 1 end + local rr = row[r] + row[r+1] = rr+nr + for i=0,nr-1 do col[rr+i] = 1+i*step end + end + return function(cycles) + sparse_mult(n, cycles, vy, val, row, col, vx) + return anz*cycles*2 + end +end + +------------------------------------------------------------------------------ +-- LU: Dense Matrix Factorization. +------------------------------------------------------------------------------ + +local function lu_factor(a, pivot, m, n) + local min_m_n = m < n and m or n + for j=1,min_m_n do + local jp, t = j, abs(a[j][j]) + for i=j+1,m do + local ab = abs(a[i][j]) + if ab > t then + jp = i + t = ab + end + end + pivot[j] = jp + if a[jp][j] == 0 then error("zero pivot") end + if jp ~= j then a[j], a[jp] = a[jp], a[j] end + if j < m then + local recp = 1.0 / a[j][j] + for k=j+1,m do + local v = a[k] + v[j] = v[j] * recp + end + end + if j < min_m_n then + for i=j+1,m do + local vi, vj = a[i], a[j] + local eij = vi[j] + for k=j+1,n do vi[k] = vi[k] - eij * vj[k] end + end + end + end +end + +local function matrix_alloc(m, n) + local a = {} + for y=1,m do a[y] = darray(n+1) end + return a +end + +local function matrix_copy(dst, src, m, n) + for y=1,m do + local vd, vs = dst[y], src[y] + for x=1,n do vd[x] = vs[x] end + end +end + +function benchmarks.LU(n) + local mat = random_matrix(n, n) + local tmp = matrix_alloc(n, n) + local pivot = iarray(n+1) + return function(cycles) + for i=1,cycles do + matrix_copy(tmp, mat, n, n) + lu_factor(tmp, pivot, n, n) + end + return 2.0/3.0*n*n*n*cycles + end +end + +------------------------------------------------------------------------------ +-- Main program. +------------------------------------------------------------------------------ + +local function printf(...) + io.write(format(...)) +end + +local function fmtparams(p1, p2) + if p2 then return format("[%d, %d]", p1, p2) + elseif p1 then return format("[%d]", p1) end + return "" +end + +local function measure(min_time, name, ...) + array_init() + rand_init(RANDOM_SEED) + local run = benchmarks[name](...) + local cycles = 1 + repeat + local tm = clock() + local flops = run(cycles, ...) + tm = clock() - tm + if tm >= min_time then + local res = flops / tm * 1.0e-6 + local p1, p2 = ... + printf("%-7s %8.2f %s\n", name, res, fmtparams(...)) + return res + end + cycles = cycles * 2 + until false +end + +printf("Lua SciMark %s based on SciMark 2.0a. %s.\n\n", + SCIMARK_VERSION, SCIMARK_COPYRIGHT) + +while arg and arg[1] do + local a = table.remove(arg, 1) + if a == "-noffi" then + package.preload.ffi = nil + elseif a == "-small" then + SIZE_SELECT = "small" + elseif a == "-large" then + SIZE_SELECT = "large" + elseif benchmarks[a] then + local p = benchmarks[SIZE_SELECT][a] + measure(MIN_TIME, a, tonumber(arg[1]) or p[1], tonumber(arg[2]) or p[2]) + return + else + printf("Usage: scimark [-noffi] [-small|-large] [BENCH params...]\n\n") + printf("BENCH -small -large\n") + printf("---------------------------------------\n") + for _,name in ipairs(benchmarks) do + printf("%-7s %-13s %s\n", name, + fmtparams(unpack(benchmarks.small[name])), + fmtparams(unpack(benchmarks.large[name]))) + end + printf("\n") + os.exit(1) + end +end + +local params = benchmarks[SIZE_SELECT] +local sum = 0 +for _,name in ipairs(benchmarks) do + sum = sum + measure(MIN_TIME, name, unpack(params[name])) +end +printf("\nSciMark %8.2f [%s problem sizes]\n", sum / #benchmarks, SIZE_SELECT) +io.flush() + diff --git a/perf/LuaJIT-benches/scimark-fft.lua b/perf/LuaJIT-benches/scimark-fft.lua new file mode 100644 index 00000000..c05bb69a --- /dev/null +++ b/perf/LuaJIT-benches/scimark-fft.lua @@ -0,0 +1 @@ +require("scimark_lib").FFT(1024)(tonumber(arg and arg[1]) or 50000) diff --git a/perf/LuaJIT-benches/scimark-lu.lua b/perf/LuaJIT-benches/scimark-lu.lua new file mode 100644 index 00000000..7636d994 --- /dev/null +++ b/perf/LuaJIT-benches/scimark-lu.lua @@ -0,0 +1 @@ +require("scimark_lib").LU(100)(tonumber(arg and arg[1]) or 5000) diff --git a/perf/LuaJIT-benches/scimark-sor.lua b/perf/LuaJIT-benches/scimark-sor.lua new file mode 100644 index 00000000..e537e986 --- /dev/null +++ b/perf/LuaJIT-benches/scimark-sor.lua @@ -0,0 +1 @@ +require("scimark_lib").SOR(100)(tonumber(arg and arg[1]) or 50000) diff --git a/perf/LuaJIT-benches/scimark-sparse.lua b/perf/LuaJIT-benches/scimark-sparse.lua new file mode 100644 index 00000000..01a2258d --- /dev/null +++ b/perf/LuaJIT-benches/scimark-sparse.lua @@ -0,0 +1 @@ +require("scimark_lib").SPARSE(1000, 5000)(tonumber(arg and arg[1]) or 150000) diff --git a/perf/LuaJIT-benches/scimark_lib.lua b/perf/LuaJIT-benches/scimark_lib.lua new file mode 100644 index 00000000..aeffd75a --- /dev/null +++ b/perf/LuaJIT-benches/scimark_lib.lua @@ -0,0 +1,297 @@ +------------------------------------------------------------------------------ +-- Lua SciMark (2010-03-15). +-- +-- A literal translation of SciMark 2.0a, written in Java and C. +-- Credits go to the original authors Roldan Pozo and Bruce Miller. +-- See: http://math.nist.gov/scimark2/ +------------------------------------------------------------------------------ + + +local SCIMARK_VERSION = "2010-03-15" + +local RANDOM_SEED = 101009 -- Must be odd. + +local abs, log, sin, floor = math.abs, math.log, math.sin, math.floor +local pi, clock = math.pi, os.clock + +local benchmarks = {} + +------------------------------------------------------------------------------ +-- This is a Lagged Fibonacci Pseudo-random Number Generator with +-- j, k, M = 5, 17, 31. Pretty weak, but same as C/Java SciMark. +------------------------------------------------------------------------------ + +local rand, rand_init + +if jit and jit.status and jit.status() then + -- LJ2 has bit operations and zero-based arrays (internally). + local bit = require("bit") + local band, sar = bit.band, bit.arshift + local Rm, Rj, Ri = {}, 0, 0 + for i=0,16 do Rm[i] = 0 end + function rand_init(seed) + Rj, Ri = 16, 11 + for i=16,0,-1 do + seed = band(seed*9069, 0x7fffffff) + Rm[i] = seed + end + end + function rand() + local i = band(Ri+1, sar(Ri-16, 31)) + local j = band(Rj+1, sar(Rj-16, 31)) + Ri, Rj = i, j + local k = band(Rm[i] - Rm[j], 0x7fffffff) + Rm[j] = k + return k * (1.0/2147483647.0) + end +else + -- Better for standard Lua with one-based arrays and without bit operations. + local Rm, Rj = {}, 1 + for i=1,17 do Rm[i] = 0 end + function rand_init(seed) + Rj = 1 + for i=17,1,-1 do + seed = (seed*9069) % (2^31) + Rm[i] = seed + end + end + function rand() + local j, m = Rj, Rm + local h = j - 5 + if h < 1 then h = h + 17 end + local k = m[h] - m[j] + if k < 0 then k = k + 2147483647 end + m[j] = k + if j < 17 then Rj = j + 1 else Rj = 1 end + return k * (1.0/2147483647.0) + end +end + +local function random_vector(n) + local v = {} + for x=1,n do v[x] = rand() end + return v +end + +local function random_matrix(m, n) + local a = {} + for y=1,m do + local v = {} + a[y] = v + for x=1,n do v[x] = rand() end + end + return a +end + +------------------------------------------------------------------------------ +-- FFT: Fast Fourier Transform. +------------------------------------------------------------------------------ + +local function fft_bitreverse(v, n) + local j = 0 + for i=0,2*n-4,2 do + if i < j then + v[i+1], v[i+2], v[j+1], v[j+2] = v[j+1], v[j+2], v[i+1], v[i+2] + end + local k = n + while k <= j do j = j - k; k = k / 2 end + j = j + k + end +end + +local function fft_transform(v, n, dir) + if n <= 1 then return end + fft_bitreverse(v, n) + local dual = 1 + repeat + local dual2 = 2*dual + for i=1,2*n-1,2*dual2 do + local j = i+dual2 + local ir, ii = v[i], v[i+1] + local jr, ji = v[j], v[j+1] + v[j], v[j+1] = ir - jr, ii - ji + v[i], v[i+1] = ir + jr, ii + ji + end + local theta = dir * pi / dual + local s, s2 = sin(theta), 2.0 * sin(theta * 0.5)^2 + local wr, wi = 1.0, 0.0 + for a=3,dual2-1,2 do + wr, wi = wr - s*wi - s2*wr, wi + s*wr - s2*wi + for i=a,a+2*(n-dual2),2*dual2 do + local j = i+dual2 + local jr, ji = v[j], v[j+1] + local dr, di = wr*jr - wi*ji, wr*ji + wi*jr + local ir, ii = v[i], v[i+1] + v[j], v[j+1] = ir - dr, ii - di + v[i], v[i+1] = ir + dr, ii + di + end + end + dual = dual2 + until dual >= n +end + +function benchmarks.FFT(n) + local l2n = log(n)/log(2) + if l2n % 1 ~= 0 then + io.stderr:write("Error: FFT data length is not a power of 2\n") + os.exit(1) + end + local v = random_vector(n*2) + return function(cycles) + local norm = 1.0 / n + for p=1,cycles do + fft_transform(v, n, -1) + fft_transform(v, n, 1) + for i=1,n*2 do v[i] = v[i] * norm end + end + return ((5*n-2)*l2n + 2*(n+1)) * cycles + end +end + +------------------------------------------------------------------------------ +-- SOR: Jacobi Successive Over-Relaxation. +------------------------------------------------------------------------------ + +local function sor_run(mat, m, n, cycles, omega) + local om4, om1 = omega*0.25, 1.0-omega + m = m - 1 + n = n - 1 + for i=1,cycles do + for y=2,m do + local v, vp, vn = mat[y], mat[y-1], mat[y+1] + for x=2,n do + v[x] = om4*((vp[x]+vn[x])+(v[x-1]+v[x+1])) + om1*v[x] + end + end + end +end + +function benchmarks.SOR(n) + local mat = random_matrix(n, n) + return function(cycles) + sor_run(mat, n, n, cycles, 1.25) + return (n-1)*(n-1)*cycles*6 + end +end + +------------------------------------------------------------------------------ +-- MC: Monte Carlo Integration. +------------------------------------------------------------------------------ + +local function mc_integrate(cycles) + local under_curve = 0 + local rand = rand + for i=1,cycles do + local x = rand() + local y = rand() + if x*x + y*y <= 1.0 then under_curve = under_curve + 1 end + end + return (under_curve/cycles) * 4 +end + +function benchmarks.MC() + return function(cycles) + local res = mc_integrate(cycles) + assert(math.sqrt(cycles)*math.abs(res-math.pi) < 5.0, "bad MC result") + return cycles * 4 -- Way off, but same as SciMark in C/Java. + end +end + +------------------------------------------------------------------------------ +-- Sparse Matrix Multiplication. +------------------------------------------------------------------------------ + +local function sparse_mult(n, cycles, vy, val, row, col, vx) + for p=1,cycles do + for r=1,n do + local sum = 0 + for i=row[r],row[r+1]-1 do sum = sum + vx[col[i]] * val[i] end + vy[r] = sum + end + end +end + +function benchmarks.SPARSE(n, nz) + local nr = floor(nz/n) + local anz = nr*n + local vx = random_vector(n) + local val = random_vector(anz) + local vy, col, row = {}, {}, {} + row[1] = 1 + for r=1,n do + local step = floor(r/nr) + if step < 1 then step = 1 end + local rr = row[r] + row[r+1] = rr+nr + for i=0,nr-1 do col[rr+i] = 1+i*step end + end + return function(cycles) + sparse_mult(n, cycles, vy, val, row, col, vx) + return anz*cycles*2 + end +end + +------------------------------------------------------------------------------ +-- LU: Dense Matrix Factorization. +------------------------------------------------------------------------------ + +local function lu_factor(a, pivot, m, n) + local min_m_n = m < n and m or n + for j=1,min_m_n do + local jp, t = j, abs(a[j][j]) + for i=j+1,m do + local ab = abs(a[i][j]) + if ab > t then + jp = i + t = ab + end + end + pivot[j] = jp + if a[jp][j] == 0 then error("zero pivot") end + if jp ~= j then a[j], a[jp] = a[jp], a[j] end + if j < m then + local recp = 1.0 / a[j][j] + for k=j+1,m do + local v = a[k] + v[j] = v[j] * recp + end + end + if j < min_m_n then + for i=j+1,m do + local vi, vj = a[i], a[j] + local eij = vi[j] + for k=j+1,n do vi[k] = vi[k] - eij * vj[k] end + end + end + end +end + +local function matrix_alloc(m, n) + local a = {} + for y=1,m do a[y] = {} end + return a +end + +local function matrix_copy(dst, src, m, n) + for y=1,m do + local vd, vs = dst[y], src[y] + for x=1,n do vd[x] = vs[x] end + end +end + +function benchmarks.LU(n) + local mat = random_matrix(n, n) + local tmp = matrix_alloc(n, n) + local pivot = {} + return function(cycles) + for i=1,cycles do + matrix_copy(tmp, mat, n, n) + lu_factor(tmp, pivot, n, n) + end + return 2.0/3.0*n*n*n*cycles + end +end + +rand_init(RANDOM_SEED) + +return benchmarks diff --git a/perf/LuaJIT-benches/series.lua b/perf/LuaJIT-benches/series.lua new file mode 100644 index 00000000..f766cb32 --- /dev/null +++ b/perf/LuaJIT-benches/series.lua @@ -0,0 +1,34 @@ + +local function integrate(x0, x1, nsteps, omegan, f) + local x, dx = x0, (x1-x0)/nsteps + local rvalue = ((x0+1)^x0 * f(omegan*x0)) / 2 + for i=3,nsteps do + x = x + dx + rvalue = rvalue + (x+1)^x * f(omegan*x) + end + return (rvalue + ((x1+1)^x1 * f(omegan*x1)) / 2) * dx +end + +local function series(n) + local sin, cos = math.sin, math.cos + local omega = math.pi + local t = {} + + t[1] = integrate(0, 2, 1000, 0, function() return 1 end) / 2 + t[2] = 0 + + for i=2,n do + t[2*i-1] = integrate(0, 2, 1000, omega*i, cos) + t[2*i] = integrate(0, 2, 1000, omega*i, sin) + end + + return t +end + +local n = tonumber(arg and arg[1]) or 10000 +local tm = os.clock() +local t = series(n) +tm = os.clock() - tm +assert(math.abs(t[1]-2.87295) < 0.00001) +io.write(string.format("size %d, %.2f s, %.1f iterations/s\n", + n, tm, (2*n-1)/tm)) diff --git a/perf/LuaJIT-benches/spectral-norm.lua b/perf/LuaJIT-benches/spectral-norm.lua new file mode 100644 index 00000000..ecc80112 --- /dev/null +++ b/perf/LuaJIT-benches/spectral-norm.lua @@ -0,0 +1,40 @@ + +local function A(i, j) + local ij = i+j-1 + return 1.0 / (ij * (ij-1) * 0.5 + i) +end + +local function Av(x, y, N) + for i=1,N do + local a = 0 + for j=1,N do a = a + x[j] * A(i, j) end + y[i] = a + end +end + +local function Atv(x, y, N) + for i=1,N do + local a = 0 + for j=1,N do a = a + x[j] * A(j, i) end + y[i] = a + end +end + +local function AtAv(x, y, t, N) + Av(x, t, N) + Atv(t, y, N) +end + +local N = tonumber(arg and arg[1]) or 100 +local u, v, t = {}, {}, {} +for i=1,N do u[i] = 1 end + +for i=1,10 do AtAv(u, v, t, N) AtAv(v, u, t, N) end + +local vBv, vv = 0, 0 +for i=1,N do + local ui, vi = u[i], v[i] + vBv = vBv + ui*vi + vv = vv + vi*vi +end +io.write(string.format("%0.9f\n", math.sqrt(vBv / vv))) diff --git a/perf/LuaJIT-benches/sum-file.lua b/perf/LuaJIT-benches/sum-file.lua new file mode 100644 index 00000000..c9e618fd --- /dev/null +++ b/perf/LuaJIT-benches/sum-file.lua @@ -0,0 +1,6 @@ + +local sum = 0 +for line in io.lines() do + sum = sum + line +end +io.write(sum, "\n") -- 2.51.0