[Tarantool-patches] [PATCH luajit 2/2] Fix base register coalescing in side trace.

Sergey Kaplun skaplun at tarantool.org
Wed Oct 11 18:04:10 MSK 2023


From: Mike Pall <mike>

Thanks to Sergey Kaplun, NiLuJe and Peter Cawley.

(cherry-picked from commit aa2db7ebd1267836af5221336ccb4e9b4aa8372d)

The previous patch fixed just part of the problem with the register
coalesing. For example, the parent base register may be used inside the
parent or child register sets when it shouldn't. This leads to incorrect
register allocations, which may lead to crashes or undefined behaviour.
This patch fixes it by excluding the parent base register from both
register sets.

The test case for this patch doesn't fail before the commit since it
requires specific register allocation, which is hard to construct and
very fragile. Due to the lack of ideal sync with the upstream
repository, the test is passed before the patch.
It should become correct in future patches.

Resolves tarantool/tarantool#8767
Part of tarantool/tarantool#9145

Sergey Kaplun:
* added the description and the test for the problem
---
 src/lj_asm.c                                  |   7 +-
 src/lj_asm_arm.h                              |   7 +-
 src/lj_asm_arm64.h                            |   7 +-
 src/lj_asm_mips.h                             |   8 +-
 src/lj_asm_ppc.h                              |   8 +-
 src/lj_asm_x86.h                              |   8 +-
 .../lj-1031-asm-head-side-base-reg.test.lua   | 139 ++++++++++++++++++
 7 files changed, 163 insertions(+), 21 deletions(-)
 create mode 100644 test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua

diff --git a/src/lj_asm.c b/src/lj_asm.c
index ca06860a..5137d05c 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1860,6 +1860,7 @@ static void asm_head_side(ASMState *as)
   RegSet allow = RSET_ALL;  /* Inverse of all coalesced registers. */
   RegSet live = RSET_EMPTY;  /* Live parent registers. */
   RegSet pallow = RSET_GPR;  /* Registers needed by the parent stack check. */
+  Reg pbase;
   IRIns *irp = &as->parent->ir[REF_BASE];  /* Parent base. */
   int32_t spadj, spdelta;
   int pass2 = 0;
@@ -1870,7 +1871,11 @@ static void asm_head_side(ASMState *as)
     /* Force snap #0 alloc to prevent register overwrite in stack check. */
     asm_snap_alloc(as, 0);
   }
-  allow = asm_head_side_base(as, irp, allow);
+  pbase = asm_head_side_base(as, irp);
+  if (pbase != RID_NONE) {
+    rset_clear(allow, pbase);
+    rset_clear(pallow, pbase);
+  }
 
   /* Scan all parent SLOADs and collect register dependencies. */
   for (i = as->stopins; i > REF_BASE; i--) {
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 47564d2e..5a0f925f 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -2107,7 +2107,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir;
   asm_head_lreg(as);
@@ -2115,16 +2115,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
     ra_spill(as, ir);
   if (ra_hasspill(irp->s)) {
-    rset_clear(allow, ra_dest(as, ir, allow));
+    return ra_dest(as, ir, RSET_GPR);
   } else {
     Reg r = irp->r;
     lj_assertA(ra_hasreg(r), "base reg lost");
-    rset_clear(allow, r);
     if (r != ir->r && !rset_test(as->freeset, r))
       ra_restore(as, regcost_ref(as->cost[r]));
     ra_destreg(as, ir, r);
+    return r;
   }
-  return allow;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index d1d4237b..88b47ceb 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -1873,7 +1873,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir;
   asm_head_lreg(as);
@@ -1881,16 +1881,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
     ra_spill(as, ir);
   if (ra_hasspill(irp->s)) {
-    rset_clear(allow, ra_dest(as, ir, allow));
+    return ra_dest(as, ir, RSET_GPR);
   } else {
     Reg r = irp->r;
     lj_assertA(ra_hasreg(r), "base reg lost");
-    rset_clear(allow, r);
     if (r != ir->r && !rset_test(as->freeset, r))
       ra_restore(as, regcost_ref(as->cost[r]));
     ra_destreg(as, ir, r);
+    return r;
   }
-  return allow;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index ac9090f2..597c6d62 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -2595,7 +2595,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
@@ -2605,15 +2605,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
-      rset_clear(allow, irp->r);
       emit_move(as, r, irp->r);  /* Move from coalesced parent reg. */
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 971dcc88..7bba71b3 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -2130,7 +2130,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
@@ -2139,15 +2139,15 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
-      rset_clear(allow, irp->r);
       emit_mr(as, r, irp->r);  /* Move from coalesced parent reg. */
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 2b810c8d..86ce3937 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -2856,7 +2856,7 @@ static void asm_head_root_base(ASMState *as)
 }
 
 /* Coalesce or reload BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
@@ -2865,16 +2865,16 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
       /* Move from coalesced parent reg. */
-      rset_clear(allow, irp->r);
       emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */
diff --git a/test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua b/test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua
new file mode 100644
index 00000000..fc5efaaa
--- /dev/null
+++ b/test/tarantool-tests/lj-1031-asm-head-side-base-reg.test.lua
@@ -0,0 +1,139 @@
+local tap = require('tap')
+-- Test file to demonstrate incorrect side trace head assembling
+-- when use parent trace register holding base (`RID_BASE`).
+-- See also, https://github.com/LuaJIT/LuaJIT/issues/1031.
+--
+-- XXX: For now, the test doesn't fail even on arm64 due to the
+-- different from upstream register allocation. Nevertheless, this
+-- issue should be gone with future backporting, so just leave the
+-- test case as is.
+local test = tap.test('lj-1031-asm-head-side-base-reg'):skipcond({
+  ['Test requires JIT enabled'] = not jit.status(),
+})
+
+local ffi = require 'ffi'
+local int64_t = ffi.typeof('int64_t')
+
+test:plan(1)
+
+-- To reproduce the issue (reproduced only on arm64) we need a
+-- little bit of a tricky structure for the traces:
+-- +-> + -- start TRACE 1
+-- |   |    ...
+-- |   |---> + -- start TRACE 3 (side trace for the 1st)
+-- |   |     | ...
+-- |   |     v (link with TRACE 2)
+-- |   | +-> + -- start TRACE 2 (some loop that wasn't recorded
+-- |   | |   |                   before due to the loop limit)
+-- |   | +---+
+-- +<--+
+-- Also, we need high register pressure to be sure that register
+-- holding value of the base will be spilled.
+-- So, during the assembly of "TRACE 3" (#6 in our test),
+-- `RID_BASE` is spilled and restored via 32-bit fpr. This leads
+-- to an incorrect value because it contains a 64-bit value.
+-- See the original ticket for the details.
+
+-- XXX: Reduce amount of IR.
+local tonumber = tonumber
+local ffi_new = ffi.new
+
+-- Resulting slots for values calculated on trace.
+-- luacheck: ignore
+local r1
+local r2
+local r3
+local r4
+local r5
+local r6
+local r7
+local r8
+local r9
+local r10
+local r11
+local r12
+local r13
+local r14
+local r15
+local r16
+local r17
+local r18
+local r19
+
+local ARR_SIZE = 1e2
+local lim_arr = {}
+
+-- XXX: Prevent irrelevant output in jit.dump().
+jit.off()
+
+local INNER_TRACE_LIMIT = 20
+local INNER_LIMIT1 = 1
+local INNER_LIMIT2 = 4
+for i = 1, INNER_TRACE_LIMIT do lim_arr[i] = INNER_LIMIT1 end
+for i = INNER_TRACE_LIMIT + 1, ARR_SIZE + 1 do lim_arr[i] = INNER_LIMIT2 end
+
+-- Enable compilation back.
+jit.on()
+
+jit.opt.start('hotloop=1', 'hotexit=2')
+
+-- XXX: Trace numbers are given with the respect of using
+-- `jit.dump`.
+-- Start TRACE 2 (1 is inside dump bc).
+local k = 0
+while k < ARR_SIZE do
+  k = k + 1
+  -- Forcify register pressure.
+  local l1  = ffi_new(int64_t, k + 1)
+  local l2  = ffi_new(int64_t, k + 2)
+  local l3  = ffi_new(int64_t, k + 3)
+  local l4  = ffi_new(int64_t, k + 4)
+  local l5  = ffi_new(int64_t, k + 5)
+  local l6  = ffi_new(int64_t, k + 6)
+  local l7  = ffi_new(int64_t, k + 7)
+  local l8  = ffi_new(int64_t, k + 8)
+  local l9  = ffi_new(int64_t, k + 9)
+  local l10 = ffi_new(int64_t, k + 10)
+  local l11 = ffi_new(int64_t, k + 11)
+  local l12 = ffi_new(int64_t, k + 12)
+  local l13 = ffi_new(int64_t, k + 13)
+  local l14 = ffi_new(int64_t, k + 14)
+  local l15 = ffi_new(int64_t, k + 15)
+  local l16 = ffi_new(int64_t, k + 16)
+  local l17 = ffi_new(int64_t, k + 17)
+  local l18 = ffi_new(int64_t, k + 18)
+  local l19 = ffi_new(int64_t, k + 19)
+  -- Side exit for TRACE 6 start 2/1.
+  -- luacheck: ignore
+  if k > 55 then else end
+  r1  = tonumber(l1)
+  r2  = tonumber(l2)
+  r3  = tonumber(l3)
+  r4  = tonumber(l4)
+  r5  = tonumber(l5)
+  r6  = tonumber(l6)
+  r7  = tonumber(l7)
+  r8  = tonumber(l8)
+  r9  = tonumber(l9)
+  r10 = tonumber(l10)
+  r11 = tonumber(l11)
+  r12 = tonumber(l12)
+  r13 = tonumber(l13)
+  r14 = tonumber(l14)
+  r15 = tonumber(l15)
+  r15 = tonumber(l15)
+  r16 = tonumber(l16)
+  r17 = tonumber(l17)
+  r18 = tonumber(l18)
+  r19 = tonumber(l19)
+  local lim_inner = lim_arr[k]
+  -- TRACE 3. Loop is not taken at the moment of recording TRACE 2
+  -- (lim_inner == 1).
+  for _ = 1, lim_inner do
+    -- TRACE 6 stop -> 3.
+  end
+end
+
+test:ok(true, 'correct side trace head assembling')
+
+test:done(true)
-- 
2.42.0



More information about the Tarantool-patches mailing list