[Tarantool-patches] [PATCH luajit 2/2] Fix base register coalescing in side trace.

Maxim Kokryashkin m.kokryashkin at tarantool.org
Fri Oct 13 08:59:16 MSK 2023


Hi, Sergey!
Thanks for the patch!
LGTM, except for a few comments below.

On Wed, Oct 11, 2023 at 06:04:10PM +0300, Sergey Kaplun wrote:
> From: Mike Pall <mike>
> 
> Thanks to Sergey Kaplun, NiLuJe and Peter Cawley.
> 
> (cherry-picked from commit aa2db7ebd1267836af5221336ccb4e9b4aa8372d)
> 
> The previous patch fixed just part of the problem with the register
> coalesing. For example, the parent base register may be used inside the
> parent or child register sets when it shouldn't. This leads to incorrect
> register allocations, which may lead to crashes or undefined behaviour.
> This patch fixes it by excluding the parent base register from both
> register sets.
> 
> The test case for this patch doesn't fail before the commit since it
> requires specific register allocation, which is hard to construct and
> very fragile. Due to the lack of ideal sync with the upstream
> repository, the test is passed before the patch.
> It should become correct in future patches.
Typo: s/in future patches/after backporting future patches/
> 
> Resolves tarantool/tarantool#8767
How can I reassure that, since we don't have any reproducer?
> Part of tarantool/tarantool#9145
> 
> Sergey Kaplun:
> * added the description and the test for the problem
> ---
>  src/lj_asm.c                                  |   7 +-
>  src/lj_asm_arm.h                              |   7 +-
>  src/lj_asm_arm64.h                            |   7 +-
>  src/lj_asm_mips.h                             |   8 +-
>  src/lj_asm_ppc.h                              |   8 +-
>  src/lj_asm_x86.h                              |   8 +-
>  .../lj-1031-asm-head-side-base-reg.test.lua   | 139 ++++++++++++++++++
>  7 files changed, 163 insertions(+), 21 deletions(-)
>  create mode 100644 test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua
> 
> diff --git a/src/lj_asm.c b/src/lj_asm.c
> index ca06860a..5137d05c 100644
> --- a/src/lj_asm.c
> +++ b/src/lj_asm.c
> @@ -1860,6 +1860,7 @@ static void asm_head_side(ASMState *as)
>    RegSet allow = RSET_ALL;  /* Inverse of all coalesced registers. */
>    RegSet live = RSET_EMPTY;  /* Live parent registers. */
>    RegSet pallow = RSET_GPR;  /* Registers needed by the parent stack check. */
> +  Reg pbase;
>    IRIns *irp = &as->parent->ir[REF_BASE];  /* Parent base. */
>    int32_t spadj, spdelta;
>    int pass2 = 0;
> @@ -1870,7 +1871,11 @@ static void asm_head_side(ASMState *as)
>      /* Force snap #0 alloc to prevent register overwrite in stack check. */
>      asm_snap_alloc(as, 0);
>    }
> -  allow = asm_head_side_base(as, irp, allow);
> +  pbase = asm_head_side_base(as, irp);
> +  if (pbase != RID_NONE) {
> +    rset_clear(allow, pbase);
> +    rset_clear(pallow, pbase);
> +  }
>  
>    /* Scan all parent SLOADs and collect register dependencies. */
>    for (i = as->stopins; i > REF_BASE; i--) {
> diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
> index 47564d2e..5a0f925f 100644
> --- a/src/lj_asm_arm.h
> +++ b/src/lj_asm_arm.h
> @@ -2107,7 +2107,7 @@ static void asm_head_root_base(ASMState *as)
>  }
>  
>  /* Coalesce BASE register for a side trace. */
> -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> +static Reg asm_head_side_base(ASMState *as, IRIns *irp)
>  {
>    IRIns *ir;
>    asm_head_lreg(as);
> @@ -2115,16 +2115,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
>    if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
>      ra_spill(as, ir);
>    if (ra_hasspill(irp->s)) {
> -    rset_clear(allow, ra_dest(as, ir, allow));
> +    return ra_dest(as, ir, RSET_GPR);
>    } else {
>      Reg r = irp->r;
>      lj_assertA(ra_hasreg(r), "base reg lost");
> -    rset_clear(allow, r);
>      if (r != ir->r && !rset_test(as->freeset, r))
>        ra_restore(as, regcost_ref(as->cost[r]));
>      ra_destreg(as, ir, r);
> +    return r;
>    }
> -  return allow;
>  }
>  
>  /* -- Tail of trace ------------------------------------------------------- */
> diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
> index d1d4237b..88b47ceb 100644
> --- a/src/lj_asm_arm64.h
> +++ b/src/lj_asm_arm64.h
> @@ -1873,7 +1873,7 @@ static void asm_head_root_base(ASMState *as)
>  }
>  
>  /* Coalesce BASE register for a side trace. */
> -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> +static Reg asm_head_side_base(ASMState *as, IRIns *irp)
>  {
>    IRIns *ir;
>    asm_head_lreg(as);
> @@ -1881,16 +1881,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
>    if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
>      ra_spill(as, ir);
>    if (ra_hasspill(irp->s)) {
> -    rset_clear(allow, ra_dest(as, ir, allow));
> +    return ra_dest(as, ir, RSET_GPR);
>    } else {
>      Reg r = irp->r;
>      lj_assertA(ra_hasreg(r), "base reg lost");
> -    rset_clear(allow, r);
>      if (r != ir->r && !rset_test(as->freeset, r))
>        ra_restore(as, regcost_ref(as->cost[r]));
>      ra_destreg(as, ir, r);
> +    return r;
>    }
> -  return allow;
>  }
>  
>  /* -- Tail of trace ------------------------------------------------------- */
> diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
> index ac9090f2..597c6d62 100644
> --- a/src/lj_asm_mips.h
> +++ b/src/lj_asm_mips.h
> @@ -2595,7 +2595,7 @@ static void asm_head_root_base(ASMState *as)
>  }
>  
>  /* Coalesce BASE register for a side trace. */
> -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> +static Reg asm_head_side_base(ASMState *as, IRIns *irp)
>  {
>    IRIns *ir = IR(REF_BASE);
>    Reg r = ir->r;
> @@ -2605,15 +2605,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
>      if (rset_test(as->modset, r) || irt_ismarked(ir->t))
>        ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
>      if (irp->r == r) {
> -      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
> +      return r;  /* Same BASE register already coalesced. */
>      } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
> -      rset_clear(allow, irp->r);
>        emit_move(as, r, irp->r);  /* Move from coalesced parent reg. */
> +      return irp->r;
>      } else {
>        emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
>      }
>    }
> -  return allow;
> +  return RID_NONE;
>  }
>  
>  /* -- Tail of trace ------------------------------------------------------- */
> diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
> index 971dcc88..7bba71b3 100644
> --- a/src/lj_asm_ppc.h
> +++ b/src/lj_asm_ppc.h
> @@ -2130,7 +2130,7 @@ static void asm_head_root_base(ASMState *as)
>  }
>  
>  /* Coalesce BASE register for a side trace. */
> -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> +static Reg asm_head_side_base(ASMState *as, IRIns *irp)
>  {
>    IRIns *ir = IR(REF_BASE);
>    Reg r = ir->r;
> @@ -2139,15 +2139,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
>      if (rset_test(as->modset, r) || irt_ismarked(ir->t))
>        ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
>      if (irp->r == r) {
> -      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
> +      return r;  /* Same BASE register already coalesced. */
>      } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
> -      rset_clear(allow, irp->r);
>        emit_mr(as, r, irp->r);  /* Move from coalesced parent reg. */
> +      return irp->r;
>      } else {
>        emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
>      }
>    }
> -  return allow;
> +  return RID_NONE;
>  }
>  
>  /* -- Tail of trace ------------------------------------------------------- */
> diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
> index 2b810c8d..86ce3937 100644
> --- a/src/lj_asm_x86.h
> +++ b/src/lj_asm_x86.h
> @@ -2856,7 +2856,7 @@ static void asm_head_root_base(ASMState *as)
>  }
>  
>  /* Coalesce or reload BASE register for a side trace. */
> -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> +static Reg asm_head_side_base(ASMState *as, IRIns *irp)
>  {
>    IRIns *ir = IR(REF_BASE);
>    Reg r = ir->r;
> @@ -2865,16 +2865,16 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
>      if (rset_test(as->modset, r) || irt_ismarked(ir->t))
>        ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
>      if (irp->r == r) {
> -      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
> +      return r;  /* Same BASE register already coalesced. */
>      } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
>        /* Move from coalesced parent reg. */
> -      rset_clear(allow, irp->r);
>        emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
> +      return irp->r;
>      } else {
>        emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
>      }
>    }
> -  return allow;
> +  return RID_NONE;
>  }
>  
>  /* -- Tail of trace ------------------------------------------------------- */
> diff --git a/test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua b/test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua
> new file mode 100644
> index 00000000..fc5efaaa
> --- /dev/null
> +++ b/test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua
> @@ -0,0 +1,139 @@
> +local tap = require('tap')
> +-- Test file to demonstrate incorrect side trace head assembling
> +-- when use parent trace register holding base (`RID_BASE`).
> +-- See also, https://github.com/LuaJIT/LuaJIT/issues/1031.
> +--
> +-- XXX: For now, the test doesn't fail even on arm64 due to the
> +-- different from upstream register allocation. Nevertheless, this
> +-- issue should be gone with future backporting, so just leave the
> +-- test case as is.
> +local test = tap.test('lj-1031-asm-head-side-base-reg'):skipcond({
> +  ['Test requires JIT enabled'] = not jit.status(),
> +})
> +
> +local ffi = require 'ffi'
Please use parantheses here too.
> +local int64_t = ffi.typeof('int64_t')
> +
> +test:plan(1)
> +
> +-- To reproduce the issue (reproduced only on arm64) we need a
> +-- little bit of a tricky structure for the traces:
> +-- +-> + -- start TRACE 1
> +-- |   |    ...
> +-- |   |---> + -- start TRACE 3 (side trace for the 1st)
> +-- |   |     | ...
> +-- |   |     v (link with TRACE 2)
> +-- |   | +-> + -- start TRACE 2 (some loop that wasn't recorded
> +-- |   | |   |                   before due to the loop limit)
> +-- |   | +---+
> +-- +<--+
> +-- Also, we need high register pressure to be sure that register
> +-- holding value of the base will be spilled.
> +-- So, during the assembly of "TRACE 3" (#6 in our test),
> +-- `RID_BASE` is spilled and restored via 32-bit fpr. This leads
> +-- to an incorrect value because it contains a 64-bit value.
> +-- See the original ticket for the details.
> +
> +-- XXX: Reduce amount of IR.
> +local tonumber = tonumber
> +local ffi_new = ffi.new
> +
> +-- Resulting slots for values calculated on trace.
> +-- luacheck: ignore
> +local r1
> +local r2
> +local r3
> +local r4
> +local r5
> +local r6
> +local r7
> +local r8
> +local r9
> +local r10
> +local r11
> +local r12
> +local r13
> +local r14
> +local r15
> +local r16
> +local r17
> +local r18
> +local r19
> +
> +local ARR_SIZE = 1e2
Maybe it is better to just write it as 100?
> +local lim_arr = {}
> +
> +-- XXX: Prevent irrelevant output in jit.dump().
> +jit.off()
> +
> +local INNER_TRACE_LIMIT = 20
> +local INNER_LIMIT1 = 1
> +local INNER_LIMIT2 = 4
Please drop a comment with explanation of why those numbers were chosen.
> +for i = 1, INNER_TRACE_LIMIT do lim_arr[i] = INNER_LIMIT1 end
> +for i = INNER_TRACE_LIMIT + 1, ARR_SIZE + 1 do lim_arr[i] = INNER_LIMIT2 end
These loops are hardly readable, it would be nice to make the multiline.
> +
> +-- Enable compilation back.
> +jit.on()
> +
> +jit.opt.start('hotloop=1', 'hotexit=2')
> +
> +-- XXX: Trace numbers are given with the respect of using
> +-- `jit.dump`.
> +-- Start TRACE 2 (1 is inside dump bc).
> +local k = 0
> +while k < ARR_SIZE do
> +  k = k + 1
> +  -- Forcify register pressure.
> +  local l1  = ffi_new(int64_t, k + 1)
> +  local l2  = ffi_new(int64_t, k + 2)
> +  local l3  = ffi_new(int64_t, k + 3)
> +  local l4  = ffi_new(int64_t, k + 4)
> +  local l5  = ffi_new(int64_t, k + 5)
> +  local l6  = ffi_new(int64_t, k + 6)
> +  local l7  = ffi_new(int64_t, k + 7)
> +  local l8  = ffi_new(int64_t, k + 8)
> +  local l9  = ffi_new(int64_t, k + 9)
> +  local l10 = ffi_new(int64_t, k + 10)
> +  local l11 = ffi_new(int64_t, k + 11)
> +  local l12 = ffi_new(int64_t, k + 12)
> +  local l13 = ffi_new(int64_t, k + 13)
> +  local l14 = ffi_new(int64_t, k + 14)
> +  local l15 = ffi_new(int64_t, k + 15)
> +  local l16 = ffi_new(int64_t, k + 16)
> +  local l17 = ffi_new(int64_t, k + 17)
> +  local l18 = ffi_new(int64_t, k + 18)
> +  local l19 = ffi_new(int64_t, k + 19)
> +  -- Side exit for TRACE 6 start 2/1.
> +  -- luacheck: ignore
> +  if k > 55 then else end
Please drop a comment and explain why it is 55, for some people
that is not obvious.
> +  r1  = tonumber(l1)
> +  r2  = tonumber(l2)
> +  r3  = tonumber(l3)
> +  r4  = tonumber(l4)
> +  r5  = tonumber(l5)
> +  r6  = tonumber(l6)
> +  r7  = tonumber(l7)
> +  r8  = tonumber(l8)
> +  r9  = tonumber(l9)
> +  r10 = tonumber(l10)
> +  r11 = tonumber(l11)
> +  r12 = tonumber(l12)
> +  r13 = tonumber(l13)
> +  r14 = tonumber(l14)
> +  r15 = tonumber(l15)
> +  r15 = tonumber(l15)
> +  r16 = tonumber(l16)
> +  r17 = tonumber(l17)
> +  r18 = tonumber(l18)
> +  r19 = tonumber(l19)
> +  local lim_inner = lim_arr[k]
> +  -- TRACE 3. Loop is not taken at the moment of recording TRACE 2
> +  -- (lim_inner == 1).
> +  for _ = 1, lim_inner do
> +    -- TRACE 6 stop -> 3.
> +  end
> +end
> +
> +test:ok(true, 'correct side trace head assembling')
> +
> +test:done(true)
> -- 
> 2.42.0
> 


More information about the Tarantool-patches mailing list