[Tarantool-patches] [PATCH luajit 02/19] test: introduce mcode generator for tests
Maxim Kokryashkin
m.kokryashkin at tarantool.org
Tue Aug 15 13:14:18 MSK 2023
Hi, Sergey!
Thanks for the patch!
Please consider my comments below.
On Wed, Aug 09, 2023 at 06:35:51PM +0300, Sergey Kaplun via Tarantool-patches wrote:
> The test <test/tarantool-tests/gh-6098-fix-side-exit-patching-on-arm64>
> depends on particular offset of mcode for side trace regarding the
> parent trace. Before this commit just run some amount of functions to
> generate traces to fill the required mcode range. Unfortunately, this
> approach is not robust, since sometimes trace is not recorded due to
> errors "leaving loop in root trace" observed because of hotcount
> collisions.
>
> This patch introduces the following helpers:
> * `frontend.gettraceno(func)` -- returns the traceno for the given
> function, assumming that there is compiled trace for its prototype
> (i.e. the 0th bytecode is JFUNC).
> * `jit.generators.fillmcode(traceno, size)` fills mcode area of the
> given size from the given trace. It is useful to generate some mcode
> to test jumps to side traces remote enough from the parent.
> ---
> ...8-fix-side-exit-patching-on-arm64.test.lua | 78 ++----------
> test/tarantool-tests/utils/frontend.lua | 24 ++++
> test/tarantool-tests/utils/jit/generators.lua | 115 ++++++++++++++++++
> 3 files changed, 150 insertions(+), 67 deletions(-)
> create mode 100644 test/tarantool-tests/utils/jit/generators.lua
>
> diff --git a/test/tarantool-tests/gh-6098-fix-side-exit-patching-on-arm64.test.lua b/test/tarantool-tests/gh-6098-fix-side-exit-patching-on-arm64.test.lua
> index 93db3041..678ac914 100644
> --- a/test/tarantool-tests/gh-6098-fix-side-exit-patching-on-arm64.test.lua
> +++ b/test/tarantool-tests/gh-6098-fix-side-exit-patching-on-arm64.test.lua
> @@ -1,8 +1,12 @@
> local tap = require('tap')
> local test = tap.test('gh-6098-fix-side-exit-patching-on-arm64'):skipcond({
> ['Test requires JIT enabled'] = not jit.status(),
> + ['Disabled on *BSD due to #4819'] = jit.os == 'BSD',
> })
>
> +local generators = require('utils').jit.generators
> +local frontend = require('utils').frontend
> +
> test:plan(1)
>
> -- The function to be tested for side exit patching:
> @@ -20,52 +24,6 @@ local function cbool(cond)
> end
> end
>
> --- XXX: Function template below produces 8Kb mcode for ARM64, so
> --- we need to compile at least 128 traces to exceed 1Mb delta
> --- between <cbool> root trace side exit and <cbool> side trace.
> --- Unfortunately, we have no other option for extending this jump
> --- delta, since the base of the current mcode area (J->mcarea) is
> --- used as a hint for mcode allocator (see lj_mcode.c for info).
> -local FUNCS = 128
> -local recfuncs = { }
> -for i = 1, FUNCS do
> - -- This is a quite heavy workload (though it doesn't look like
> - -- one at first). Each load from a table is type guarded. Each
> - -- table lookup (for both stores and loads) is guarded for table
> - -- <hmask> value and metatable presence. The code below results
> - -- to 8Kb of mcode for ARM64 in practice.
> - recfuncs[i] = assert(load(([[
> - return function(src)
> - local p = %d
> - local tmp = { }
> - local dst = { }
> - for i = 1, 3 do
> - tmp.a = src.a * p tmp.j = src.j * p tmp.s = src.s * p
> - tmp.b = src.b * p tmp.k = src.k * p tmp.t = src.t * p
> - tmp.c = src.c * p tmp.l = src.l * p tmp.u = src.u * p
> - tmp.d = src.d * p tmp.m = src.m * p tmp.v = src.v * p
> - tmp.e = src.e * p tmp.n = src.n * p tmp.w = src.w * p
> - tmp.f = src.f * p tmp.o = src.o * p tmp.x = src.x * p
> - tmp.g = src.g * p tmp.p = src.p * p tmp.y = src.y * p
> - tmp.h = src.h * p tmp.q = src.q * p tmp.z = src.z * p
> - tmp.i = src.i * p tmp.r = src.r * p
> -
> - dst.a = tmp.z + p dst.j = tmp.q + p dst.s = tmp.h + p
> - dst.b = tmp.y + p dst.k = tmp.p + p dst.t = tmp.g + p
> - dst.c = tmp.x + p dst.l = tmp.o + p dst.u = tmp.f + p
> - dst.d = tmp.w + p dst.m = tmp.n + p dst.v = tmp.e + p
> - dst.e = tmp.v + p dst.n = tmp.m + p dst.w = tmp.d + p
> - dst.f = tmp.u + p dst.o = tmp.l + p dst.x = tmp.c + p
> - dst.g = tmp.t + p dst.p = tmp.k + p dst.y = tmp.b + p
> - dst.h = tmp.s + p dst.q = tmp.j + p dst.z = tmp.a + p
> - dst.i = tmp.r + p dst.r = tmp.i + p
> - end
> - dst.tmp = tmp
> - return dst
> - end
> - ]]):format(i)), ('Syntax error in function recfuncs[%d]'):format(i))()
> -end
> -
> -- Make compiler work hard:
> -- * No optimizations at all to produce more mcode.
> -- * Try to compile all compiled paths as early as JIT can.
> @@ -78,27 +36,13 @@ cbool(true)
> -- a root trace for <cbool>.
> cbool(true)
>
> -for i = 1, FUNCS do
> - -- XXX: FNEW is NYI, hence loop recording fails at this point.
> - -- The recording is aborted on purpose: we are going to record
> - -- <FUNCS> number of traces for functions in <recfuncs>.
> - -- Otherwise, loop recording might lead to a very long trace
> - -- error (via return to a lower frame), or a trace with lots of
> - -- side traces. We need neither of this, but just bunch of
> - -- traces filling the available mcode area.
> - local function tnew(p)
> - return {
> - a = p + 1, f = p + 6, k = p + 11, p = p + 16, u = p + 21, z = p + 26,
> - b = p + 2, g = p + 7, l = p + 12, q = p + 17, v = p + 22,
> - c = p + 3, h = p + 8, m = p + 13, r = p + 18, w = p + 23,
> - d = p + 4, i = p + 9, n = p + 14, s = p + 19, x = p + 24,
> - e = p + 5, j = p + 10, o = p + 15, t = p + 20, y = p + 25,
> - }
> - end
> - -- Each function call produces a trace (see the template for the
> - -- function definition above).
> - recfuncs[i](tnew(i))
> -end
> +local cbool_traceno = frontend.gettraceno(cbool)
> +
> +-- XXX: Unfortunately, we have no other option for extending
> +-- this jump delta, since the base of the current mcode area
> +-- (J->mcarea) is used as a hint for mcode allocator (see
> +-- lj_mcode.c for info).
> +generators.fillmcode(cbool_traceno, 1024 * 1024)
>
> -- XXX: I tried to make the test in pure Lua, but I failed to
> -- implement the robust solution. As a result I've implemented a
> diff --git a/test/tarantool-tests/utils/frontend.lua b/test/tarantool-tests/utils/frontend.lua
> index 2afebbb2..414257fd 100644
> --- a/test/tarantool-tests/utils/frontend.lua
> +++ b/test/tarantool-tests/utils/frontend.lua
> @@ -1,6 +1,10 @@
> local M = {}
>
> local bc = require('jit.bc')
> +local jutil = require('jit.util')
> +local vmdef = require('jit.vmdef')
> +local bcnames = vmdef.bcnames
> +local band, rshift = bit.band, bit.rshift
>
> function M.hasbc(f, bytecode)
> assert(type(f) == 'function', 'argument #1 should be a function')
> @@ -22,4 +26,24 @@ function M.hasbc(f, bytecode)
> return hasbc
> end
>
> +-- Get traceno of the trace assotiated for the given function.
> +function M.gettraceno(func)
> + assert(type(func) == 'function', 'argument #1 should be a function')
> +
> + -- The 0th BC is the header.
> + local func_ins = jutil.funcbc(func, 0)
> + local BC_NAME_LENGTH = 6
> + local RD_SHIFT = 16
> +
> + -- Calculate index in `bcnames` string.
> + local op_idx = BC_NAME_LENGTH * band(func_ins, 0xff)
> + -- Get the name of the operation.
> + local op_name = string.sub(bcnames, op_idx + 1, op_idx + BC_NAME_LENGTH)
> + assert(op_name:match('JFUNC'),
> + 'The given function has non-jitted header: ' .. op_name)
> +
> + -- RD contains the traceno.
> + return rshift(func_ins, RD_SHIFT)
> +end
> +
> return M
> diff --git a/test/tarantool-tests/utils/jit/generators.lua b/test/tarantool-tests/utils/jit/generators.lua
> new file mode 100644
> index 00000000..62b6e0ef
> --- /dev/null
> +++ b/test/tarantool-tests/utils/jit/generators.lua
> @@ -0,0 +1,115 @@
> +local M = {}
> +
> +local jutil = require('jit.util')
> +
> +local function getlast_traceno()
> + return misc.getmetrics().jit_trace_num
> +end
> +
> +-- Convert addr to positive value if needed.
> +local function canonize_address(addr)
Nit: most of the time, the `canonize` variant is used in theological materials,
while the `canonicalize` is more common in the sphere of software development.
Feel free to ignore.
> + if addr < 0 then addr = addr + 2 ^ 32 end
> + return addr
> +end
> +
> +-- Need some storage to avoid functions and traces to be
> +-- collected.
Typo: s/Need/We need/ or s/Need some storage/Some storage is needed/
Typo: s/to be collected/being collected/
> +local recfuncs = {}
> +local last_i = 0
> +-- This function generates a table of functions with heavy mcode
> +-- payload with tab arithmetics to fill the mcode area from the
> +-- one trace mcode by the some given size. This size is usually
Typo: s/by the some/by some/
> +-- big enough, because we want to check long jump side exits from
> +-- some traces.
> +-- Assumes, that maxmcode and maxtrace options are set to be sure,
Typo: s/that/that the/
> +-- that we can produce such amount of mcode.
> +function M.fillmcode(trace_from, size)
> + local mcode, addr_from = jutil.tracemc(trace_from)
> + assert(mcode, 'the #1 argument should be an existed trace number')
Typo: s/existed/existing/
> + addr_from = canonize_address(addr_from)
> + local required_diff = size + #mcode
> +
> + -- Marker to check that traces are not flushed.
> + local maxtraceno = getlast_traceno()
> + local FLUSH_ERR = 'Traces are flushed, check your maxtrace, maxmcode options'
> +
> + local _, last_addr = jutil.tracemc(maxtraceno)
> + last_addr = canonize_address(last_addr)
> +
> + -- Addresses of traces may increase or decrease depending on OS,
> + -- so use absolute diff.
> + while math.abs(last_addr - addr_from) > required_diff do
> + last_i = last_i + 1
> + -- This is a quite heavy workload (though it doesn't look like
Typo: s/This is a quite/This is quite a/
> + -- one at first). Each load from a table is type guarded. Each
> + -- table lookup (for both stores and loads) is guarded for
> + -- table <hmask> value and presence of the metatable. The code
Typo: s/and presence/and the presence/
> + -- below results to ~8Kb of mcode for ARM64 and MIPS64 in
Typo: s/results to/results in/
> + -- practice.
> + local fname = ('fillmcode[%d]'):format(last_i)
> + recfuncs[last_i] = assert(loadstring(([[
> + return function(src)
> + local p = %d
Nit: Poor naming, a more descriptive name is preferred.
> + local tmp = { }
> + local dst = { }
> + -- XXX: use 5 as stop index to reduce LLEAVE (leaving loop
Typo: s/as stop/as a stop/
> + -- in root trace) errors due to hotcount collisions.
> + for i = 1, 5 do
> + tmp.a = src.a * p tmp.j = src.j * p tmp.s = src.s * p
> + tmp.b = src.b * p tmp.k = src.k * p tmp.t = src.t * p
> + tmp.c = src.c * p tmp.l = src.l * p tmp.u = src.u * p
> + tmp.d = src.d * p tmp.m = src.m * p tmp.v = src.v * p
> + tmp.e = src.e * p tmp.n = src.n * p tmp.w = src.w * p
> + tmp.f = src.f * p tmp.o = src.o * p tmp.x = src.x * p
> + tmp.g = src.g * p tmp.p = src.p * p tmp.y = src.y * p
> + tmp.h = src.h * p tmp.q = src.q * p tmp.z = src.z * p
> + tmp.i = src.i * p tmp.r = src.r * p
> +
> + dst.a = tmp.z + p dst.j = tmp.q + p dst.s = tmp.h + p
> + dst.b = tmp.y + p dst.k = tmp.p + p dst.t = tmp.g + p
> + dst.c = tmp.x + p dst.l = tmp.o + p dst.u = tmp.f + p
> + dst.d = tmp.w + p dst.m = tmp.n + p dst.v = tmp.e + p
> + dst.e = tmp.v + p dst.n = tmp.m + p dst.w = tmp.d + p
> + dst.f = tmp.u + p dst.o = tmp.l + p dst.x = tmp.c + p
> + dst.g = tmp.t + p dst.p = tmp.k + p dst.y = tmp.b + p
> + dst.h = tmp.s + p dst.q = tmp.j + p dst.z = tmp.a + p
> + dst.i = tmp.r + p dst.r = tmp.i + p
> + end
> + dst.tmp = tmp
> + return dst
> + end
> + ]]):format(last_i), fname), ('Syntax error in function %s'):format(fname))()
> + -- XXX: FNEW is NYI, hence loop recording fails at this point.
> + -- The recording is aborted on purpose: the whole loop
> + -- recording might lead to a very long trace error (via return
> + -- to a lower frame), or a trace with lots of side traces. We
> + -- need neither of this, but just a bunch of traces filling
> + -- the available mcode area.
> + local function tnew(p)
Nit: same issue with naming.
> + return {
> + a = p + 1, f = p + 6, k = p + 11, p = p + 16, u = p + 21, z = p + 26,
> + b = p + 2, g = p + 7, l = p + 12, q = p + 17, v = p + 22,
> + c = p + 3, h = p + 8, m = p + 13, r = p + 18, w = p + 23,
> + d = p + 4, i = p + 9, n = p + 14, s = p + 19, x = p + 24,
> + e = p + 5, j = p + 10, o = p + 15, t = p + 20, y = p + 25,
> + }
> + end
> + -- Each function call produces a trace (see the template for
> + -- the function definition above).
> + recfuncs[last_i](tnew(last_i))
> + local last_traceno = getlast_traceno()
> + if last_traceno < maxtraceno then
> + error(FLUSH_ERR)
> + end
> +
> + -- Calculate the address of the last trace start.
> + maxtraceno = last_traceno
> + _, last_addr = jutil.tracemc(last_traceno)
> + if not last_addr then
> + error(FLUSH_ERR)
> + end
> + last_addr = canonize_address(last_addr)
> + end
> +end
> +
> +return M
> --
> 2.41.0
Best regards,
Maxim Kokryashkin
>
More information about the Tarantool-patches
mailing list