[Tarantool-patches] [PATCH luajit 02/19] test: introduce mcode generator for tests

Maxim Kokryashkin m.kokryashkin at tarantool.org
Tue Aug 15 13:14:18 MSK 2023


Hi, Sergey!
Thanks for the patch!
Please consider my comments below.

On Wed, Aug 09, 2023 at 06:35:51PM +0300, Sergey Kaplun via Tarantool-patches wrote:
> The test <test/tarantool-tests/gh-6098-fix-side-exit-patching-on-arm64>
> depends on particular offset of mcode for side trace regarding the
> parent trace. Before this commit just run some amount of functions to
> generate traces to fill the required mcode range. Unfortunately, this
> approach is not robust, since sometimes trace is not recorded due to
> errors "leaving loop in root trace" observed because of hotcount
> collisions.
> 
> This patch introduces the following helpers:
> * `frontend.gettraceno(func)` -- returns the traceno for the given
>   function, assumming that there is compiled trace for its prototype
>   (i.e. the 0th bytecode is JFUNC).
> * `jit.generators.fillmcode(traceno, size)` fills mcode area of the
>   given size from the given trace. It is useful to generate some mcode
>   to test jumps to side traces remote enough from the parent.
> ---
>  ...8-fix-side-exit-patching-on-arm64.test.lua |  78 ++----------
>  test/tarantool-tests/utils/frontend.lua       |  24 ++++
>  test/tarantool-tests/utils/jit/generators.lua | 115 ++++++++++++++++++
>  3 files changed, 150 insertions(+), 67 deletions(-)
>  create mode 100644 test/tarantool-tests/utils/jit/generators.lua
> 
> diff --git a/test/tarantool-tests/gh-6098-fix-side-exit-patching-on-arm64.test.lua b/test/tarantool-tests/gh-6098-fix-side-exit-patching-on-arm64.test.lua
> index 93db3041..678ac914 100644
> --- a/test/tarantool-tests/gh-6098-fix-side-exit-patching-on-arm64.test.lua
> +++ b/test/tarantool-tests/gh-6098-fix-side-exit-patching-on-arm64.test.lua
> @@ -1,8 +1,12 @@
>  local tap = require('tap')
>  local test = tap.test('gh-6098-fix-side-exit-patching-on-arm64'):skipcond({
>    ['Test requires JIT enabled'] = not jit.status(),
> +  ['Disabled on *BSD due to #4819'] = jit.os == 'BSD',
>  })
>  
> +local generators = require('utils').jit.generators
> +local frontend = require('utils').frontend
> +
>  test:plan(1)
>  
>  -- The function to be tested for side exit patching:
> @@ -20,52 +24,6 @@ local function cbool(cond)
>    end
>  end
>  
> --- XXX: Function template below produces 8Kb mcode for ARM64, so
> --- we need to compile at least 128 traces to exceed 1Mb delta
> --- between <cbool> root trace side exit and <cbool> side trace.
> --- Unfortunately, we have no other option for extending this jump
> --- delta, since the base of the current mcode area (J->mcarea) is
> --- used as a hint for mcode allocator (see lj_mcode.c for info).
> -local FUNCS = 128
> -local recfuncs = { }
> -for i = 1, FUNCS do
> -  -- This is a quite heavy workload (though it doesn't look like
> -  -- one at first). Each load from a table is type guarded. Each
> -  -- table lookup (for both stores and loads) is guarded for table
> -  -- <hmask> value and metatable presence. The code below results
> -  -- to 8Kb of mcode for ARM64 in practice.
> -  recfuncs[i] = assert(load(([[
> -    return function(src)
> -      local p = %d
> -      local tmp = { }
> -      local dst = { }
> -      for i = 1, 3 do
> -        tmp.a = src.a * p   tmp.j = src.j * p   tmp.s = src.s * p
> -        tmp.b = src.b * p   tmp.k = src.k * p   tmp.t = src.t * p
> -        tmp.c = src.c * p   tmp.l = src.l * p   tmp.u = src.u * p
> -        tmp.d = src.d * p   tmp.m = src.m * p   tmp.v = src.v * p
> -        tmp.e = src.e * p   tmp.n = src.n * p   tmp.w = src.w * p
> -        tmp.f = src.f * p   tmp.o = src.o * p   tmp.x = src.x * p
> -        tmp.g = src.g * p   tmp.p = src.p * p   tmp.y = src.y * p
> -        tmp.h = src.h * p   tmp.q = src.q * p   tmp.z = src.z * p
> -        tmp.i = src.i * p   tmp.r = src.r * p
> -
> -        dst.a = tmp.z + p   dst.j = tmp.q + p   dst.s = tmp.h + p
> -        dst.b = tmp.y + p   dst.k = tmp.p + p   dst.t = tmp.g + p
> -        dst.c = tmp.x + p   dst.l = tmp.o + p   dst.u = tmp.f + p
> -        dst.d = tmp.w + p   dst.m = tmp.n + p   dst.v = tmp.e + p
> -        dst.e = tmp.v + p   dst.n = tmp.m + p   dst.w = tmp.d + p
> -        dst.f = tmp.u + p   dst.o = tmp.l + p   dst.x = tmp.c + p
> -        dst.g = tmp.t + p   dst.p = tmp.k + p   dst.y = tmp.b + p
> -        dst.h = tmp.s + p   dst.q = tmp.j + p   dst.z = tmp.a + p
> -        dst.i = tmp.r + p   dst.r = tmp.i + p
> -      end
> -      dst.tmp = tmp
> -      return dst
> -    end
> -  ]]):format(i)), ('Syntax error in function recfuncs[%d]'):format(i))()
> -end
> -
>  -- Make compiler work hard:
>  -- * No optimizations at all to produce more mcode.
>  -- * Try to compile all compiled paths as early as JIT can.
> @@ -78,27 +36,13 @@ cbool(true)
>  -- a root trace for <cbool>.
>  cbool(true)
>  
> -for i = 1, FUNCS do
> -  -- XXX: FNEW is NYI, hence loop recording fails at this point.
> -  -- The recording is aborted on purpose: we are going to record
> -  -- <FUNCS> number of traces for functions in <recfuncs>.
> -  -- Otherwise, loop recording might lead to a very long trace
> -  -- error (via return to a lower frame), or a trace with lots of
> -  -- side traces. We need neither of this, but just bunch of
> -  -- traces filling the available mcode area.
> -  local function tnew(p)
> -    return {
> -      a = p + 1, f = p + 6,  k = p + 11, p = p + 16, u = p + 21, z = p + 26,
> -      b = p + 2, g = p + 7,  l = p + 12, q = p + 17, v = p + 22,
> -      c = p + 3, h = p + 8,  m = p + 13, r = p + 18, w = p + 23,
> -      d = p + 4, i = p + 9,  n = p + 14, s = p + 19, x = p + 24,
> -      e = p + 5, j = p + 10, o = p + 15, t = p + 20, y = p + 25,
> -    }
> -  end
> -  -- Each function call produces a trace (see the template for the
> -  -- function definition above).
> -  recfuncs[i](tnew(i))
> -end
> +local cbool_traceno = frontend.gettraceno(cbool)
> +
> +-- XXX: Unfortunately, we have no other option for extending
> +-- this jump delta, since the base of the current mcode area
> +-- (J->mcarea) is used as a hint for mcode allocator (see
> +-- lj_mcode.c for info).
> +generators.fillmcode(cbool_traceno, 1024 * 1024)
>  
>  -- XXX: I tried to make the test in pure Lua, but I failed to
>  -- implement the robust solution. As a result I've implemented a
> diff --git a/test/tarantool-tests/utils/frontend.lua b/test/tarantool-tests/utils/frontend.lua
> index 2afebbb2..414257fd 100644
> --- a/test/tarantool-tests/utils/frontend.lua
> +++ b/test/tarantool-tests/utils/frontend.lua
> @@ -1,6 +1,10 @@
>  local M = {}
>  
>  local bc = require('jit.bc')
> +local jutil = require('jit.util')
> +local vmdef = require('jit.vmdef')
> +local bcnames = vmdef.bcnames
> +local band, rshift = bit.band, bit.rshift
>  
>  function M.hasbc(f, bytecode)
>    assert(type(f) == 'function', 'argument #1 should be a function')
> @@ -22,4 +26,24 @@ function M.hasbc(f, bytecode)
>    return hasbc
>  end
>  
> +-- Get traceno of the trace assotiated for the given function.
> +function M.gettraceno(func)
> +  assert(type(func) == 'function', 'argument #1 should be a function')
> +
> +  -- The 0th BC is the header.
> +  local func_ins = jutil.funcbc(func, 0)
> +  local BC_NAME_LENGTH = 6
> +  local RD_SHIFT = 16
> +
> +  -- Calculate index in `bcnames` string.
> +  local op_idx = BC_NAME_LENGTH * band(func_ins, 0xff)
> +  -- Get the name of the operation.
> +  local op_name = string.sub(bcnames, op_idx + 1, op_idx + BC_NAME_LENGTH)
> +  assert(op_name:match('JFUNC'),
> +         'The given function has non-jitted header: ' .. op_name)
> +
> +  -- RD contains the traceno.
> +  return rshift(func_ins, RD_SHIFT)
> +end
> +
>  return M
> diff --git a/test/tarantool-tests/utils/jit/generators.lua b/test/tarantool-tests/utils/jit/generators.lua
> new file mode 100644
> index 00000000..62b6e0ef
> --- /dev/null
> +++ b/test/tarantool-tests/utils/jit/generators.lua
> @@ -0,0 +1,115 @@
> +local M = {}
> +
> +local jutil = require('jit.util')
> +
> +local function getlast_traceno()
> +  return misc.getmetrics().jit_trace_num
> +end
> +
> +-- Convert addr to positive value if needed.
> +local function canonize_address(addr)
Nit: most of the time, the `canonize` variant is used in theological materials,
while the `canonicalize` is more common in the sphere of software development.
Feel free to ignore.
> +  if addr < 0 then addr = addr + 2 ^ 32 end
> +  return addr
> +end
> +
> +-- Need some storage to avoid functions and traces to be
> +-- collected.
Typo: s/Need/We need/ or s/Need some storage/Some storage is needed/
Typo: s/to be collected/being collected/
> +local recfuncs = {}
> +local last_i = 0
> +-- This function generates a table of functions with heavy mcode
> +-- payload with tab arithmetics to fill the mcode area from the
> +-- one trace mcode by the some given size. This size is usually
Typo: s/by the some/by some/
> +-- big enough, because we want to check long jump side exits from
> +-- some traces.
> +-- Assumes, that maxmcode and maxtrace options are set to be sure,
Typo: s/that/that the/
> +-- that we can produce such amount of mcode.
> +function M.fillmcode(trace_from, size)
> +  local mcode, addr_from = jutil.tracemc(trace_from)
> +  assert(mcode, 'the #1 argument should be an existed trace number')
Typo: s/existed/existing/
> +  addr_from = canonize_address(addr_from)
> +  local required_diff = size + #mcode
> +
> +  -- Marker to check that traces are not flushed.
> +  local maxtraceno = getlast_traceno()
> +  local FLUSH_ERR = 'Traces are flushed, check your maxtrace, maxmcode options'
> +
> +  local _, last_addr = jutil.tracemc(maxtraceno)
> +  last_addr = canonize_address(last_addr)
> +
> +  -- Addresses of traces may increase or decrease depending on OS,
> +  -- so use absolute diff.
> +  while math.abs(last_addr - addr_from) > required_diff do
> +    last_i = last_i + 1
> +    -- This is a quite heavy workload (though it doesn't look like
Typo: s/This is a quite/This is quite a/
> +    -- one at first). Each load from a table is type guarded. Each
> +    -- table lookup (for both stores and loads) is guarded for
> +    -- table <hmask> value and presence of the metatable. The code
Typo: s/and presence/and the presence/
> +    -- below results to ~8Kb of mcode for ARM64 and MIPS64 in
Typo: s/results to/results in/
> +    -- practice.
> +    local fname = ('fillmcode[%d]'):format(last_i)
> +    recfuncs[last_i] = assert(loadstring(([[
> +      return function(src)
> +        local p = %d
Nit: Poor naming, a more descriptive name is preferred.
> +        local tmp = { }
> +        local dst = { }
> +        -- XXX: use 5 as stop index to reduce LLEAVE (leaving loop
Typo: s/as stop/as a stop/
> +        -- in root trace) errors due to hotcount collisions.
> +        for i = 1, 5 do
> +          tmp.a = src.a * p   tmp.j = src.j * p   tmp.s = src.s * p
> +          tmp.b = src.b * p   tmp.k = src.k * p   tmp.t = src.t * p
> +          tmp.c = src.c * p   tmp.l = src.l * p   tmp.u = src.u * p
> +          tmp.d = src.d * p   tmp.m = src.m * p   tmp.v = src.v * p
> +          tmp.e = src.e * p   tmp.n = src.n * p   tmp.w = src.w * p
> +          tmp.f = src.f * p   tmp.o = src.o * p   tmp.x = src.x * p
> +          tmp.g = src.g * p   tmp.p = src.p * p   tmp.y = src.y * p
> +          tmp.h = src.h * p   tmp.q = src.q * p   tmp.z = src.z * p
> +          tmp.i = src.i * p   tmp.r = src.r * p
> +
> +          dst.a = tmp.z + p   dst.j = tmp.q + p   dst.s = tmp.h + p
> +          dst.b = tmp.y + p   dst.k = tmp.p + p   dst.t = tmp.g + p
> +          dst.c = tmp.x + p   dst.l = tmp.o + p   dst.u = tmp.f + p
> +          dst.d = tmp.w + p   dst.m = tmp.n + p   dst.v = tmp.e + p
> +          dst.e = tmp.v + p   dst.n = tmp.m + p   dst.w = tmp.d + p
> +          dst.f = tmp.u + p   dst.o = tmp.l + p   dst.x = tmp.c + p
> +          dst.g = tmp.t + p   dst.p = tmp.k + p   dst.y = tmp.b + p
> +          dst.h = tmp.s + p   dst.q = tmp.j + p   dst.z = tmp.a + p
> +          dst.i = tmp.r + p   dst.r = tmp.i + p
> +        end
> +        dst.tmp = tmp
> +        return dst
> +      end
> +    ]]):format(last_i), fname), ('Syntax error in function %s'):format(fname))()
> +    -- XXX: FNEW is NYI, hence loop recording fails at this point.
> +    -- The recording is aborted on purpose: the whole loop
> +    -- recording might lead to a very long trace error (via return
> +    -- to a lower frame), or a trace with lots of side traces. We
> +    -- need neither of this, but just a bunch of traces filling
> +    -- the available mcode area.
> +    local function tnew(p)
Nit: same issue with naming.
> +      return {
> +        a = p + 1, f = p + 6,  k = p + 11, p = p + 16, u = p + 21, z = p + 26,
> +        b = p + 2, g = p + 7,  l = p + 12, q = p + 17, v = p + 22,
> +        c = p + 3, h = p + 8,  m = p + 13, r = p + 18, w = p + 23,
> +        d = p + 4, i = p + 9,  n = p + 14, s = p + 19, x = p + 24,
> +        e = p + 5, j = p + 10, o = p + 15, t = p + 20, y = p + 25,
> +      }
> +    end
> +    -- Each function call produces a trace (see the template for
> +    -- the function definition above).
> +    recfuncs[last_i](tnew(last_i))
> +    local last_traceno = getlast_traceno()
> +    if last_traceno < maxtraceno then
> +      error(FLUSH_ERR)
> +    end
> +
> +    -- Calculate the address of the last trace start.
> +    maxtraceno = last_traceno
> +    _, last_addr = jutil.tracemc(last_traceno)
> +    if not last_addr then
> +      error(FLUSH_ERR)
> +    end
> +    last_addr = canonize_address(last_addr)
> +  end
> +end
> +
> +return M
> -- 
> 2.41.0
Best regards,
Maxim Kokryashkin
> 


More information about the Tarantool-patches mailing list