Hi, Sergey, thanks for the patch! LGTM Sergey On 6/12/25 12:12, Sergey Kaplun wrote: > From: Mike Pall > > Reported by Sergey Kaplun. > > (cherry picked from commit cd4af8ad80bb6430ad2e547f7af236268c9be7d9) > > When restoring a snapshot because of exiting by the check of the stack, > the snapshot from the parent trace is used. For the correct error > message, the snapshot map uses the next bytecode after the stored one. > In case, when the PC in the snapshot is BC_RET* (the last bytecode in > the prototype), there is no next value to be referenced, so this > approach leads to the assertion failure in `lj_debug_framepc()`. > > This patch prevents this behaviour by checking the original bytecode and > avoiding PC adjusting in the case of the last bytecode in the prototype. > > Sergey Kaplun: > * added the description and the test for the problem > > Part of tarantool/tarantool#11278 > --- > > Related issues: > *https://github.com/LuaJIT/LuaJIT/issues/1359 > *https://github.com/tarantool/tarantool/issues/11278 > Branch:https://github.com/tarantool/luajit/tree/skaplun/lj-1359-bad-pc-on-snap-restore-stackov > > src/lj_bc.h | 5 ++ > src/lj_parse.c | 14 +---- > src/lj_snap.c | 6 ++- > ...59-bad-pc-on-snap-restore-stackov.test.lua | 51 +++++++++++++++++++ > 4 files changed, 61 insertions(+), 15 deletions(-) > create mode 100644 test/tarantool-tests/lj-1359-bad-pc-on-snap-restore-stackov.test.lua > > diff --git a/src/lj_bc.h b/src/lj_bc.h > index 69a45f28..2196dbed 100644 > --- a/src/lj_bc.h > +++ b/src/lj_bc.h > @@ -259,6 +259,11 @@ static LJ_AINLINE int bc_isret(BCOp op) > return (op == BC_RETM || op == BC_RET || op == BC_RET0 || op == BC_RET1); > } > > +static LJ_AINLINE int bc_isret_or_tail(BCOp op) > +{ > + return (op == BC_CALLMT || op == BC_CALLT || bc_isret(op)); > +} > + > LJ_DATA const uint16_t lj_bc_mode[]; > LJ_DATA const uint16_t lj_bc_ofs[]; > > diff --git a/src/lj_parse.c b/src/lj_parse.c > index ec85ac9b..88108c01 100644 > --- a/src/lj_parse.c > +++ b/src/lj_parse.c > @@ -1519,23 +1519,11 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar) > > #endif > > -/* Check if bytecode op returns. */ > -static int bcopisret(BCOp op) > -{ > - switch (op) { > - case BC_CALLMT: case BC_CALLT: > - case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1: > - return 1; > - default: > - return 0; > - } > -} > - > /* Fixup return instruction for prototype. */ > static void fs_fixup_ret(FuncState *fs) > { > BCPos lastpc = fs->pc; > - if (lastpc <= fs->lasttarget || !bcopisret(bc_op(fs->bcbase[lastpc-1].ins))) { > + if (lastpc <= fs->lasttarget || !bc_isret_or_tail(bc_op(fs->bcbase[lastpc-1].ins))) { > if ((fs->bl->flags & FSCOPE_UPVAL)) > bcemit_AJ(fs, BC_UCLO, 0, 0); > bcemit_AD(fs, BC_RET0, 0, 1); /* Need final return. */ > diff --git a/src/lj_snap.c b/src/lj_snap.c > index 4cfae579..3542a46c 100644 > --- a/src/lj_snap.c > +++ b/src/lj_snap.c > @@ -954,8 +954,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) > const BCIns *pc = snap_pc(&map[nent]); > lua_State *L = J->L; > > - /* Set interpreter PC to the next PC to get correct error messages. */ > - setcframe_pc(L->cframe, pc+1); > + /* Set interpreter PC to the next PC to get correct error messages. > + ** But not for returns or tail calls, since pc+1 may be out-of-range. > + */ > + setcframe_pc(L->cframe, bc_isret_or_tail(bc_op(*pc)) ? pc : pc+1); > setcframe_pc(cframe_raw(cframe_prev(L->cframe)), pc); > > /* Make sure the stack is big enough for the slots from the snapshot. */ > diff --git a/test/tarantool-tests/lj-1359-bad-pc-on-snap-restore-stackov.test.lua b/test/tarantool-tests/lj-1359-bad-pc-on-snap-restore-stackov.test.lua > new file mode 100644 > index 00000000..d09b5c42 > --- /dev/null > +++ b/test/tarantool-tests/lj-1359-bad-pc-on-snap-restore-stackov.test.lua > @@ -0,0 +1,51 @@ > +local tap = require('tap') > + > +-- The test file to demonstrate the LuaJIT misbehaviour during > +-- stack overflow on snapshot restoration for the last bytecode in > +-- the prototype. > +-- See also:https://github.com/LuaJIT/LuaJIT/issues/1359. > + > +local test = tap.test('lj-1359-bad-pc-on-snap-restore-stackov'):skipcond({ > + ['Test requires JIT enabled'] = not jit.status(), > +}) > + > +test:plan(2) > + > +jit.opt.start('hotloop=1') > + > +-- When restoring a snapshot because of exiting by the check of > +-- the stack, the snapshot from the parent trace is used. For the > +-- correct error message, the snapshot map uses the next bytecode > +-- after the stored one. In case, when the PC in the snapshot is > +-- BC_RET* (the last bytecode in the prototype), there is no next > +-- value to be referenced, so this approach leads to the assertion > +-- failure in lj_debug_framepc(). > + > +local counter = 0 > +-- The second trace started from the side exit by the counter. > +-- It ends by entering the first trace. > +local function func_side_trace() > + if counter > 5 then > + -- This RET0 BC is the first recorded for the second trace. > + -- The stack check for the child trace uses the first snapshot > + -- from the parent trace. > + return > + end > + counter = counter + 1; > +end > + > +-- The trace with up-recursion for the function that causes stack > +-- overflow. It is recorded first and inlines func_side_trace. > +local function stackov_f() > + local r = stackov_f(func_side_trace()) > + return r > +end > + > +local result, errmsg = pcall(stackov_f) > + > +-- There is no assertion failure if we are here. > +-- Just sanity checks. > +test:ok(not result, 'correct status') > +test:like(errmsg, 'stack overflow', 'correct error message') > + > +test:done(true)