Hi, Sergey, thanks for the patch! LGTM On 10.09.2024 17:05, Sergey Kaplun wrote: > From: Mike Pall > > (cherry picked from commit 204cee2c917f55f288c0b166742e56c134fe578c) > > It is possible that a snapshot topslot is less than the possible topslot > of the Lua stack. In that case, if the Lua stack overflows in > `lj_vmevent_prepare()`, the error is raised inside > `lj_vm_exit_handler()`, which has no corresponding DWARF eh_frame [1], > so it leads to the crash. > > This patch fix-ups the topslot of the snapshot on trace exit to the > maximum possible one. > > Sergey Kaplun: > * added the description and the test for the problem > > [1]:https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html > > Part of tarantool/tarantool#10199 > --- > > Branch:https://github.com/tarantool/luajit/tree/skaplun/fix-stack-alloc-on-trace > Issue:https://github.com/tarantool/tarantool/issues/10199 > > src/lj_trace.c | 6 ++- > .../fix-stack-alloc-on-trace-exit.test.lua | 53 +++++++++++++++++++ > 2 files changed, 58 insertions(+), 1 deletion(-) > create mode 100644 test/tarantool-tests/fix-stack-alloc-on-trace-exit.test.lua > > diff --git a/src/lj_trace.c b/src/lj_trace.c > index 20014ecb..94cb27e5 100644 > --- a/src/lj_trace.c > +++ b/src/lj_trace.c > @@ -522,7 +522,11 @@ static void trace_stop(jit_State *J) > lj_assertJ(J->parent != 0 && J->cur.root != 0, "not a side trace"); > lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode); > /* Avoid compiling a side trace twice (stack resizing uses parent exit). */ > - traceref(J, J->parent)->snap[J->exitno].count = SNAPCOUNT_DONE; > + { > + SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; > + snap->count = SNAPCOUNT_DONE; > + if (J->cur.topslot > snap->topslot) snap->topslot = J->cur.topslot; > + } > /* Add to side trace chain in root trace. */ > { > GCtrace *root = traceref(J, J->cur.root); > diff --git a/test/tarantool-tests/fix-stack-alloc-on-trace-exit.test.lua b/test/tarantool-tests/fix-stack-alloc-on-trace-exit.test.lua > new file mode 100644 > index 00000000..ca04e54e > --- /dev/null > +++ b/test/tarantool-tests/fix-stack-alloc-on-trace-exit.test.lua > @@ -0,0 +1,53 @@ > +local tap = require('tap') > + > +-- Test file to demonstrate incorrect Lua stack restoration on > +-- exit from trace by the stack overflow. > + > +local test = tap.test('fix-stack-alloc-on-trace-exit'):skipcond({ > + ['Test requires JIT enabled'] = not jit.status(), > +}) > + > +local jit_dump = require('jit.dump') > + > +test:plan(2) > + > +-- Before the patch, it is possible that a snapshot topslot is > +-- less than the possible topslot of the Lua stack. In that case, > +-- if the Lua stack overflows in `lj_vmevent_prepare()`, the error > +-- is raised inside `lj_vm_exit_handler()`, which has no > +-- corresponding DWARF eh_frame, so it leads to the crash. > + > +-- Need for the stack growing in `lj_vmevent_prepare`. > +jit_dump.start('x', '/dev/null') > + > +-- Create a coroutine with a fixed stack size. > +local coro = coroutine.create(function() > + jit.opt.start('hotloop=1', 'hotexit=1', 'callunroll=1') > + > + -- `math.modf` recording is NYI. > + -- Local `math_modf` simplifies `jit.dump()` output. > + local math_modf = math.modf > + > + local function trace(n) > + n = n + 1 > + -- luacheck: ignore > + -- Start a side trace here. > + if n % 2 == 0 then end > + -- Stop the recording of the side trace and a main trace, > + -- stitching. > + math_modf(1, 1) > + -- Grow stack, avoid tail calls. > + local unused = trace(n) > + return unused > + end > + > + local n = 0 > + trace(n) > +end) > + > +local result, errmsg = coroutine.resume(coro) > + > +test:ok(not result, 'correct status and no crash') > +test:like(errmsg, 'stack overflow', 'correct error message') > + > +test:done(true)