Hi, Sergey, thanks for the patch! LGTM Sergey On 3/31/26 10:54, Sergey Kaplun wrote: > From: Mike Pall > > Thanks to Sergey Kaplun. > > (cherry picked from commit ab834de8b6a963a83046a72b5a7751dcd6cdcff0) > > If the VM event contains a trace, it may cause several inconsistencies > during the recording of another trace: > > - If there is an exit from the trace in the VM event for the 'trace > start' VM event, the JIT engine converts the newly recorded trace to > the "side trace". So, when this side exit is taken, the JIT returns > from the VM event in the middle of another frame. > > - Stitching semantics are broken for the VM events due to an > inconsistent frame link chain. > > This patch fixes these issues by forbidding stitching in the VM event > and saving the context of the JIT engine at the VM event for trace > start. > > Sergey Kaplun: > * added the description and the test for the problem > > Part of tarantool/tarantool#12134 > --- > > Branch:https://github.com/tarantool/luajit/tree/skaplun/lj-1429-1434-recording-interference > Related issues: > *https://github.com/LuaJIT/LuaJIT/issues/1429 > *https://github.com/LuaJIT/LuaJIT/issues/1434 > *https://github.com/tarantool/tarantool/issues/12134 > > src/lj_dispatch.c | 22 +++++----- > src/lj_trace.c | 11 ++++- > .../lj-1429-stitching-to-vm-event.test.lua | 35 ++++++++++++++++ > .../lj-1434-trace-start-interference.test.lua | 40 +++++++++++++++++++ > 4 files changed, 97 insertions(+), 11 deletions(-) > create mode 100644 test/tarantool-tests/lj-1429-stitching-to-vm-event.test.lua > create mode 100644 test/tarantool-tests/lj-1434-trace-start-interference.test.lua > > diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c > index 431cb3c2..e5f876b1 100644 > --- a/src/lj_dispatch.c > +++ b/src/lj_dispatch.c > @@ -523,16 +523,18 @@ out: > /* Stitch a new trace. */ > void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc) > { > - ERRNO_SAVE > - lua_State *L = J->L; > - void *cf = cframe_raw(L->cframe); > - const BCIns *oldpc = cframe_pc(cf); > - setcframe_pc(cf, pc); > - /* Before dispatch, have to bias PC by 1. */ > - L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf)); > - lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */ > - setcframe_pc(cf, oldpc); > - ERRNO_RESTORE > + if (!(J2G(J)->hookmask & HOOK_VMEVENT)) { > + ERRNO_SAVE > + lua_State *L = J->L; > + void *cf = cframe_raw(L->cframe); > + const BCIns *oldpc = cframe_pc(cf); > + setcframe_pc(cf, pc); > + /* Before dispatch, have to bias PC by 1. */ > + L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf)); > + lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */ > + setcframe_pc(cf, oldpc); > + ERRNO_RESTORE > + } > } > #endif > > diff --git a/src/lj_trace.c b/src/lj_trace.c > index 0dfbfa9f..7ed4c588 100644 > --- a/src/lj_trace.c > +++ b/src/lj_trace.c > @@ -459,7 +459,11 @@ static void trace_start(jit_State *J) > J->ktrace = 0; > setgcref(J->cur.startpt, obj2gco(J->pt)); > > - lj_vmevent_send(J2G(J), TRACE, > + lj_vmevent_send_(J2G(J), TRACE, > + TValue savetv = J2G(J)->tmptv; > + TValue savetv2 = J2G(J)->tmptv2; > + TraceNo parent = J->parent; > + ExitNo exitno = J->exitno; > setstrV(V, V->top++, lj_str_newlit(V, "start")); > setintV(V->top++, traceno); > setfuncV(V, V->top++, J->fn); > @@ -474,6 +478,11 @@ static void trace_start(jit_State *J) > setintV(V->top++, -1); > } > } > + , > + J2G(J)->tmptv = savetv; > + J2G(J)->tmptv2 = savetv2; > + J->parent = parent; > + J->exitno = exitno; > ); > lj_record_setup(J); > } > diff --git a/test/tarantool-tests/lj-1429-stitching-to-vm-event.test.lua b/test/tarantool-tests/lj-1429-stitching-to-vm-event.test.lua > new file mode 100644 > index 00000000..9ad6922e > --- /dev/null > +++ b/test/tarantool-tests/lj-1429-stitching-to-vm-event.test.lua > @@ -0,0 +1,35 @@ > +local tap = require('tap') > + > +-- The test file to demonstrate the incorrect recording of the > +-- trace when stitching in the VM event. > +-- See alsohttps://github.com/LuaJIT/LuaJIT/issues/1429. > + > +local test = tap.test('lj-1429-stitching-to-vm-event'):skipcond({ > + ['Test requires JIT enabled'] = not jit.status(), > +}) > + > +test:plan(1) > + > +local function always_number(val) > + return tonumber(val) or 1 > +end > + > +-- This handler leads to stitching in the VM event. > +local function hdl() > + always_number('') > +end > + > +jit.opt.start('hotloop=1', 'hotexit=1') > + > +jit.attach(hdl, 'trace') > + > +coroutine.wrap(function() > + always_number('') > + always_number('') > + always_number(0) -- Start side trace, invoke handler. > + -- This breaks the recording semantics before the patch. > +end)() > + > +test:ok(true, 'no assertion failure') > + > +test:done(true) > diff --git a/test/tarantool-tests/lj-1434-trace-start-interference.test.lua b/test/tarantool-tests/lj-1434-trace-start-interference.test.lua > new file mode 100644 > index 00000000..c4dfbfdc > --- /dev/null > +++ b/test/tarantool-tests/lj-1434-trace-start-interference.test.lua > @@ -0,0 +1,40 @@ > +local tap = require('tap') > + > +-- The test file to demonstrate the incorrect recording of the > +-- trace when facing the trace exit in the VM event (start). > +-- See alsohttps://github.com/LuaJIT/LuaJIT/issues/1434. > + > +local test = tap.test('lj-1434-trace-start-interference'):skipcond({ > + ['Test requires JIT enabled'] = not jit.status(), > +}) > + > +test:plan(1) > + > +local function call(self) > + return self > +end > + > +local function cb() > + -- Side exit for trace 1. > + call(nil) > +end > + > +jit.opt.start('hotloop=1', 'hotexit=1'); > + > +jit.attach(cb, 'trace') > + > +coroutine.wrap(function() > + for i = 1, 4 do > + -- Record trace 1. > + call(call(i)) > + -- Start trace 2. Side exit from trace 1 in the 'trace start' > + -- VM event converts the second trace to the "side trace". > + -- After that the VM assertion `lj_assert_bad_for_arg_type()` > + -- fails, since we return from the VM event in the middle of > + -- another frame. > + end > +end)() > + > +test:ok(true, 'no assertion failure') > + > +test:done(true)