Hi, Sergey! thanks for the patch! LGTM On 10.01.2025 16:07, Sergey Kaplun wrote: > From: Mike Pall > > Reported by Peter Cawley. > > (cherry picked from commit 644723649ea04cb23b72c814b88b72a29e4afed4) > > Load fusing optimization doesn't take into account the presence of the > `IR_NEWREF` which may cause rehashing and deallocation of the array part > of the table. This may lead to the incorrect stores if the fusing > optimization occurs across this IR, leading to inconsistent behaviour > between the JIT and the VM. > > This patch adds the corresponding check by the refactoring of the > `noconflict()` function -- it now accepts the mask of the `check` as the > last argument. The first bit stands for the `IR_NEWREF` check, the > second for the multiple reference of the given instruction. > Unfortunately, this commit misses the check for the `table.clear()` > introduced for the preprevious patch. Thus, the corresponding test fails > again. This will be fixed in the next commit. > > Sergey Kaplun: > * added the description and the test for the problem > > Part of tarantool/tarantool#10709 > --- > src/lj_asm_x86.h | 17 +++--- > .../lj-1117-fuse-across-newref.test.lua | 52 +++++++++++++++++++ > 2 files changed, 61 insertions(+), 8 deletions(-) > create mode 100644 test/tarantool-tests/lj-1117-fuse-across-newref.test.lua > > diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h > index cba7ba80..d77087d6 100644 > --- a/src/lj_asm_x86.h > +++ b/src/lj_asm_x86.h > @@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) > /* Check if there's no conflicting instruction between curins and ref. > ** Also avoid fusing loads if there are multiple references. > */ > -static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) > +static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check) > { > IRIns *ir = as->ir; > IRRef i = as->curins; > @@ -118,7 +118,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) > while (--i > ref) { > if (ir[i].o == conflict) > return 0; /* Conflict found. */ > - else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref)) > + else if ((check & 1) && ir[i].o == IR_NEWREF) > + return 0; > + else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref)) > return 0; > } > return 1; /* Ok, no conflict. */ > @@ -134,7 +136,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) > lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY"); > /* We can avoid the FLOAD of t->array for colocated arrays. */ > if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && > - !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { > + !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) { > as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ > return irb->op1; /* Table obj. */ > } > @@ -448,7 +450,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) > RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; > if (ir->o == IR_SLOAD) { > if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && > - noconflict(as, ref, IR_RETF, 0) && > + noconflict(as, ref, IR_RETF, 2) && > !(LJ_GC64 && irt_isaddr(ir->t))) { > as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); > as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + > @@ -459,13 +461,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) > } else if (ir->o == IR_FLOAD) { > /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ > if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) && > - noconflict(as, ref, IR_FSTORE, 0)) { > + noconflict(as, ref, IR_FSTORE, 2)) { > asm_fusefref(as, ir, xallow); > return RID_MRM; > } > } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { > - if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && > - noconflict(as, ref, IR_CALLS, 1) && /* Don't cross table.clear. */ > + if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) && > !(LJ_GC64 && irt_isaddr(ir->t))) { > asm_fuseahuref(as, ir->op1, xallow); > return RID_MRM; > @@ -475,7 +476,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) > ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). > */ > if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) && > - noconflict(as, ref, IR_XSTORE, 0)) { > + noconflict(as, ref, IR_XSTORE, 2)) { > asm_fusexref(as, ir->op1, xallow); > return RID_MRM; > } > diff --git a/test/tarantool-tests/lj-1117-fuse-across-newref.test.lua b/test/tarantool-tests/lj-1117-fuse-across-newref.test.lua > new file mode 100644 > index 00000000..4b8772bf > --- /dev/null > +++ b/test/tarantool-tests/lj-1117-fuse-across-newref.test.lua > @@ -0,0 +1,52 @@ > +local tap = require('tap') > +-- Test file to demonstrate LuaJIT's incorrect fusion across > +-- `IR_NEWREF`. > +-- See also:https://github.com/LuaJIT/LuaJIT/issues/1117. > +local test = tap.test('lj-1117-fuse-across-newref'):skipcond({ > + ['Test requires JIT enabled'] = not jit.status(), > +}) > + > +local ffi = require('ffi') > + > +test:plan(1) > + > +-- Table with content. > +local tab = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 42} > +-- Use the alias to trick the code flow analysis. > +local tab_alias = tab > +local result_tab = {} > + > +-- Need to start recording trace at the 16th iteration to avoid > +-- rehashing of the `t` and `result_tab` before the `if` > +-- condition below on the 32nd iteration. Also, the inner loop > +-- isn't recorded this way. After rehashing in the NEWREF, the > +-- fusion will use the wrong address, which leads to the dirty > +-- reads visible (always, not flaky) under Valgrind with the > +-- `--free-fill` option set. > +jit.opt.start('hotloop=16') > + > +-- The amount of iterations required for the rehashing of the > +-- table. > +for i = 1, 33 do > + -- ALOAD to be fused. > + local value = tab[16] > + -- NEWREF instruction. > + tab_alias[{}] = 100 > + -- Need this CONV cast to trigger load fusion. See `asm_comp()` > + -- for the details. Before the patch, this fusion takes the > + -- incorrect address of the already deallocated array part of > + -- the table, which leads to the incorrect result. > + result_tab[i] = ffi.cast('int64_t', value) > + if i == 32 then > + -- Clear the array part. > + for j = 1, 15 do > + tab[j] = nil > + end > + -- Next rehash of the `tab`/`tab_alias` will dealloc the array > + -- part. > + end > +end > + > +test:samevalues(result_tab, 'no fusion across NEWREF') > + > +test:done(true)