[Tarantool-patches] [PATCH luajit 2/2] Fix base register coalescing in side trace.
Maxim Kokryashkin
m.kokryashkin at tarantool.org
Fri Oct 13 08:59:16 MSK 2023
Hi, Sergey!
Thanks for the patch!
LGTM, except for a few comments below.
On Wed, Oct 11, 2023 at 06:04:10PM +0300, Sergey Kaplun wrote:
> From: Mike Pall <mike>
>
> Thanks to Sergey Kaplun, NiLuJe and Peter Cawley.
>
> (cherry-picked from commit aa2db7ebd1267836af5221336ccb4e9b4aa8372d)
>
> The previous patch fixed just part of the problem with the register
> coalesing. For example, the parent base register may be used inside the
> parent or child register sets when it shouldn't. This leads to incorrect
> register allocations, which may lead to crashes or undefined behaviour.
> This patch fixes it by excluding the parent base register from both
> register sets.
>
> The test case for this patch doesn't fail before the commit since it
> requires specific register allocation, which is hard to construct and
> very fragile. Due to the lack of ideal sync with the upstream
> repository, the test is passed before the patch.
> It should become correct in future patches.
Typo: s/in future patches/after backporting future patches/
>
> Resolves tarantool/tarantool#8767
How can I reassure that, since we don't have any reproducer?
> Part of tarantool/tarantool#9145
>
> Sergey Kaplun:
> * added the description and the test for the problem
> ---
> src/lj_asm.c | 7 +-
> src/lj_asm_arm.h | 7 +-
> src/lj_asm_arm64.h | 7 +-
> src/lj_asm_mips.h | 8 +-
> src/lj_asm_ppc.h | 8 +-
> src/lj_asm_x86.h | 8 +-
> .../lj-1031-asm-head-side-base-reg.test.lua | 139 ++++++++++++++++++
> 7 files changed, 163 insertions(+), 21 deletions(-)
> create mode 100644 test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua
>
> diff --git a/src/lj_asm.c b/src/lj_asm.c
> index ca06860a..5137d05c 100644
> --- a/src/lj_asm.c
> +++ b/src/lj_asm.c
> @@ -1860,6 +1860,7 @@ static void asm_head_side(ASMState *as)
> RegSet allow = RSET_ALL; /* Inverse of all coalesced registers. */
> RegSet live = RSET_EMPTY; /* Live parent registers. */
> RegSet pallow = RSET_GPR; /* Registers needed by the parent stack check. */
> + Reg pbase;
> IRIns *irp = &as->parent->ir[REF_BASE]; /* Parent base. */
> int32_t spadj, spdelta;
> int pass2 = 0;
> @@ -1870,7 +1871,11 @@ static void asm_head_side(ASMState *as)
> /* Force snap #0 alloc to prevent register overwrite in stack check. */
> asm_snap_alloc(as, 0);
> }
> - allow = asm_head_side_base(as, irp, allow);
> + pbase = asm_head_side_base(as, irp);
> + if (pbase != RID_NONE) {
> + rset_clear(allow, pbase);
> + rset_clear(pallow, pbase);
> + }
>
> /* Scan all parent SLOADs and collect register dependencies. */
> for (i = as->stopins; i > REF_BASE; i--) {
> diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
> index 47564d2e..5a0f925f 100644
> --- a/src/lj_asm_arm.h
> +++ b/src/lj_asm_arm.h
> @@ -2107,7 +2107,7 @@ static void asm_head_root_base(ASMState *as)
> }
>
> /* Coalesce BASE register for a side trace. */
> -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> +static Reg asm_head_side_base(ASMState *as, IRIns *irp)
> {
> IRIns *ir;
> asm_head_lreg(as);
> @@ -2115,16 +2115,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
> ra_spill(as, ir);
> if (ra_hasspill(irp->s)) {
> - rset_clear(allow, ra_dest(as, ir, allow));
> + return ra_dest(as, ir, RSET_GPR);
> } else {
> Reg r = irp->r;
> lj_assertA(ra_hasreg(r), "base reg lost");
> - rset_clear(allow, r);
> if (r != ir->r && !rset_test(as->freeset, r))
> ra_restore(as, regcost_ref(as->cost[r]));
> ra_destreg(as, ir, r);
> + return r;
> }
> - return allow;
> }
>
> /* -- Tail of trace ------------------------------------------------------- */
> diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
> index d1d4237b..88b47ceb 100644
> --- a/src/lj_asm_arm64.h
> +++ b/src/lj_asm_arm64.h
> @@ -1873,7 +1873,7 @@ static void asm_head_root_base(ASMState *as)
> }
>
> /* Coalesce BASE register for a side trace. */
> -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> +static Reg asm_head_side_base(ASMState *as, IRIns *irp)
> {
> IRIns *ir;
> asm_head_lreg(as);
> @@ -1881,16 +1881,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
> ra_spill(as, ir);
> if (ra_hasspill(irp->s)) {
> - rset_clear(allow, ra_dest(as, ir, allow));
> + return ra_dest(as, ir, RSET_GPR);
> } else {
> Reg r = irp->r;
> lj_assertA(ra_hasreg(r), "base reg lost");
> - rset_clear(allow, r);
> if (r != ir->r && !rset_test(as->freeset, r))
> ra_restore(as, regcost_ref(as->cost[r]));
> ra_destreg(as, ir, r);
> + return r;
> }
> - return allow;
> }
>
> /* -- Tail of trace ------------------------------------------------------- */
> diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
> index ac9090f2..597c6d62 100644
> --- a/src/lj_asm_mips.h
> +++ b/src/lj_asm_mips.h
> @@ -2595,7 +2595,7 @@ static void asm_head_root_base(ASMState *as)
> }
>
> /* Coalesce BASE register for a side trace. */
> -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> +static Reg asm_head_side_base(ASMState *as, IRIns *irp)
> {
> IRIns *ir = IR(REF_BASE);
> Reg r = ir->r;
> @@ -2605,15 +2605,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> if (rset_test(as->modset, r) || irt_ismarked(ir->t))
> ir->r = RID_INIT; /* No inheritance for modified BASE register. */
> if (irp->r == r) {
> - rset_clear(allow, r); /* Mark same BASE register as coalesced. */
> + return r; /* Same BASE register already coalesced. */
> } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
> - rset_clear(allow, irp->r);
> emit_move(as, r, irp->r); /* Move from coalesced parent reg. */
> + return irp->r;
> } else {
> emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
> }
> }
> - return allow;
> + return RID_NONE;
> }
>
> /* -- Tail of trace ------------------------------------------------------- */
> diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
> index 971dcc88..7bba71b3 100644
> --- a/src/lj_asm_ppc.h
> +++ b/src/lj_asm_ppc.h
> @@ -2130,7 +2130,7 @@ static void asm_head_root_base(ASMState *as)
> }
>
> /* Coalesce BASE register for a side trace. */
> -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> +static Reg asm_head_side_base(ASMState *as, IRIns *irp)
> {
> IRIns *ir = IR(REF_BASE);
> Reg r = ir->r;
> @@ -2139,15 +2139,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> if (rset_test(as->modset, r) || irt_ismarked(ir->t))
> ir->r = RID_INIT; /* No inheritance for modified BASE register. */
> if (irp->r == r) {
> - rset_clear(allow, r); /* Mark same BASE register as coalesced. */
> + return r; /* Same BASE register already coalesced. */
> } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
> - rset_clear(allow, irp->r);
> emit_mr(as, r, irp->r); /* Move from coalesced parent reg. */
> + return irp->r;
> } else {
> emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
> }
> }
> - return allow;
> + return RID_NONE;
> }
>
> /* -- Tail of trace ------------------------------------------------------- */
> diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
> index 2b810c8d..86ce3937 100644
> --- a/src/lj_asm_x86.h
> +++ b/src/lj_asm_x86.h
> @@ -2856,7 +2856,7 @@ static void asm_head_root_base(ASMState *as)
> }
>
> /* Coalesce or reload BASE register for a side trace. */
> -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> +static Reg asm_head_side_base(ASMState *as, IRIns *irp)
> {
> IRIns *ir = IR(REF_BASE);
> Reg r = ir->r;
> @@ -2865,16 +2865,16 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
> if (rset_test(as->modset, r) || irt_ismarked(ir->t))
> ir->r = RID_INIT; /* No inheritance for modified BASE register. */
> if (irp->r == r) {
> - rset_clear(allow, r); /* Mark same BASE register as coalesced. */
> + return r; /* Same BASE register already coalesced. */
> } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
> /* Move from coalesced parent reg. */
> - rset_clear(allow, irp->r);
> emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
> + return irp->r;
> } else {
> emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
> }
> }
> - return allow;
> + return RID_NONE;
> }
>
> /* -- Tail of trace ------------------------------------------------------- */
> diff --git a/test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua b/test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua
> new file mode 100644
> index 00000000..fc5efaaa
> --- /dev/null
> +++ b/test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua
> @@ -0,0 +1,139 @@
> +local tap = require('tap')
> +-- Test file to demonstrate incorrect side trace head assembling
> +-- when use parent trace register holding base (`RID_BASE`).
> +-- See also, https://github.com/LuaJIT/LuaJIT/issues/1031.
> +--
> +-- XXX: For now, the test doesn't fail even on arm64 due to the
> +-- different from upstream register allocation. Nevertheless, this
> +-- issue should be gone with future backporting, so just leave the
> +-- test case as is.
> +local test = tap.test('lj-1031-asm-head-side-base-reg'):skipcond({
> + ['Test requires JIT enabled'] = not jit.status(),
> +})
> +
> +local ffi = require 'ffi'
Please use parantheses here too.
> +local int64_t = ffi.typeof('int64_t')
> +
> +test:plan(1)
> +
> +-- To reproduce the issue (reproduced only on arm64) we need a
> +-- little bit of a tricky structure for the traces:
> +-- +-> + -- start TRACE 1
> +-- | | ...
> +-- | |---> + -- start TRACE 3 (side trace for the 1st)
> +-- | | | ...
> +-- | | v (link with TRACE 2)
> +-- | | +-> + -- start TRACE 2 (some loop that wasn't recorded
> +-- | | | | before due to the loop limit)
> +-- | | +---+
> +-- +<--+
> +-- Also, we need high register pressure to be sure that register
> +-- holding value of the base will be spilled.
> +-- So, during the assembly of "TRACE 3" (#6 in our test),
> +-- `RID_BASE` is spilled and restored via 32-bit fpr. This leads
> +-- to an incorrect value because it contains a 64-bit value.
> +-- See the original ticket for the details.
> +
> +-- XXX: Reduce amount of IR.
> +local tonumber = tonumber
> +local ffi_new = ffi.new
> +
> +-- Resulting slots for values calculated on trace.
> +-- luacheck: ignore
> +local r1
> +local r2
> +local r3
> +local r4
> +local r5
> +local r6
> +local r7
> +local r8
> +local r9
> +local r10
> +local r11
> +local r12
> +local r13
> +local r14
> +local r15
> +local r16
> +local r17
> +local r18
> +local r19
> +
> +local ARR_SIZE = 1e2
Maybe it is better to just write it as 100?
> +local lim_arr = {}
> +
> +-- XXX: Prevent irrelevant output in jit.dump().
> +jit.off()
> +
> +local INNER_TRACE_LIMIT = 20
> +local INNER_LIMIT1 = 1
> +local INNER_LIMIT2 = 4
Please drop a comment with explanation of why those numbers were chosen.
> +for i = 1, INNER_TRACE_LIMIT do lim_arr[i] = INNER_LIMIT1 end
> +for i = INNER_TRACE_LIMIT + 1, ARR_SIZE + 1 do lim_arr[i] = INNER_LIMIT2 end
These loops are hardly readable, it would be nice to make the multiline.
> +
> +-- Enable compilation back.
> +jit.on()
> +
> +jit.opt.start('hotloop=1', 'hotexit=2')
> +
> +-- XXX: Trace numbers are given with the respect of using
> +-- `jit.dump`.
> +-- Start TRACE 2 (1 is inside dump bc).
> +local k = 0
> +while k < ARR_SIZE do
> + k = k + 1
> + -- Forcify register pressure.
> + local l1 = ffi_new(int64_t, k + 1)
> + local l2 = ffi_new(int64_t, k + 2)
> + local l3 = ffi_new(int64_t, k + 3)
> + local l4 = ffi_new(int64_t, k + 4)
> + local l5 = ffi_new(int64_t, k + 5)
> + local l6 = ffi_new(int64_t, k + 6)
> + local l7 = ffi_new(int64_t, k + 7)
> + local l8 = ffi_new(int64_t, k + 8)
> + local l9 = ffi_new(int64_t, k + 9)
> + local l10 = ffi_new(int64_t, k + 10)
> + local l11 = ffi_new(int64_t, k + 11)
> + local l12 = ffi_new(int64_t, k + 12)
> + local l13 = ffi_new(int64_t, k + 13)
> + local l14 = ffi_new(int64_t, k + 14)
> + local l15 = ffi_new(int64_t, k + 15)
> + local l16 = ffi_new(int64_t, k + 16)
> + local l17 = ffi_new(int64_t, k + 17)
> + local l18 = ffi_new(int64_t, k + 18)
> + local l19 = ffi_new(int64_t, k + 19)
> + -- Side exit for TRACE 6 start 2/1.
> + -- luacheck: ignore
> + if k > 55 then else end
Please drop a comment and explain why it is 55, for some people
that is not obvious.
> + r1 = tonumber(l1)
> + r2 = tonumber(l2)
> + r3 = tonumber(l3)
> + r4 = tonumber(l4)
> + r5 = tonumber(l5)
> + r6 = tonumber(l6)
> + r7 = tonumber(l7)
> + r8 = tonumber(l8)
> + r9 = tonumber(l9)
> + r10 = tonumber(l10)
> + r11 = tonumber(l11)
> + r12 = tonumber(l12)
> + r13 = tonumber(l13)
> + r14 = tonumber(l14)
> + r15 = tonumber(l15)
> + r15 = tonumber(l15)
> + r16 = tonumber(l16)
> + r17 = tonumber(l17)
> + r18 = tonumber(l18)
> + r19 = tonumber(l19)
> + local lim_inner = lim_arr[k]
> + -- TRACE 3. Loop is not taken at the moment of recording TRACE 2
> + -- (lim_inner == 1).
> + for _ = 1, lim_inner do
> + -- TRACE 6 stop -> 3.
> + end
> +end
> +
> +test:ok(true, 'correct side trace head assembling')
> +
> +test:done(true)
> --
> 2.42.0
>
More information about the Tarantool-patches
mailing list