Hello, Sergey! Thanks for the patch! See a comment below. Sergey On 6/10/25 13:28, Sergey Kaplun wrote: > From: Mike Pall > > Reported by Junlong Li. Fixed by Peter Cawley. > > (cherry picked from commit 86e7123bb1782a5f200ba5e83b8c4f3fbad4f7bc) > > This patch is a follow-up to the previous commit, which leads to a dirty > read of the pseudo-valid PC set for the cframe on snapshot restoration. > To avoid these dirty reads, this patch sets the PC to the outer frame > as well before possible error throwing. > > Sergey Kaplun: > * added the description and the test for the problem > > Part of tarantool/tarantool#11278 > --- > src/lj_snap.c | 3 +- > src/lj_trace.c | 4 +- > ...-1196-stack-overflow-snap-restore.test.lua | 65 +++++++++++++++++++ > 3 files changed, 68 insertions(+), 4 deletions(-) > create mode 100644 test/tarantool-tests/lj-1196-stack-overflow-snap-restore.test.lua > > diff --git a/src/lj_snap.c b/src/lj_snap.c > index 8d7bd868..4cfae579 100644 > --- a/src/lj_snap.c > +++ b/src/lj_snap.c > @@ -955,7 +955,8 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) > lua_State *L = J->L; > > /* Set interpreter PC to the next PC to get correct error messages. */ > - setcframe_pc(cframe_raw(L->cframe), pc+1); > + setcframe_pc(L->cframe, pc+1); > + setcframe_pc(cframe_raw(cframe_prev(L->cframe)), pc); > > /* Make sure the stack is big enough for the slots from the snapshot. */ > if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) { > diff --git a/src/lj_trace.c b/src/lj_trace.c > index 8a18d3cf..0d1d233a 100644 > --- a/src/lj_trace.c > +++ b/src/lj_trace.c > @@ -909,10 +909,8 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) > exd.J = J; > exd.exptr = exptr; > errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); > - if (errcode) { > - setcframe_pc(cframe_raw(L->cframe), L); /* Point to any valid memory. */ > + if (errcode) > return -errcode; /* Return negated error code. */ > - } > > if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */ > > diff --git a/test/tarantool-tests/lj-1196-stack-overflow-snap-restore.test.lua b/test/tarantool-tests/lj-1196-stack-overflow-snap-restore.test.lua > new file mode 100644 > index 00000000..942d1f82 > --- /dev/null > +++ b/test/tarantool-tests/lj-1196-stack-overflow-snap-restore.test.lua > @@ -0,0 +1,65 @@ > +local tap = require('tap') > + > +-- Test file to demonstrate LuaJIT dirty reads after stack > +-- overflow during restoration from the snapshot. > +-- The test fails before the patch under Valgrind. Please specify valgrind option that is required for reproducing the bug. Cannot reproduce with command line below with reverted patch: VALGRIND_OPTS="--leak-check=no --malloc-fill=0x00 --free-fill=0x00" ctest -V -R test/tarantool-tests/lj-1196-partial-snap-restore.test.lua -V > +-- > +-- luacheck: push no max_comment_line_length > +-- > +-- See also:https://github.com/LuaJIT/LuaJIT/issues/1196, > +--https://www.freelists.org/post/luajit/Invalid-read-found-by-valgrind. > +-- > +-- luacheck: pop > + > +local test = tap.test('lj-1196-stack-overflow-snap-restore') > + > +test:plan(4) > + > +-- XXX: This file has the same tests as the > +-- , but without disabling the > +-- compilation for the given functions. Hence, the check here is > +-- less strict -- we just check that there are no dirty reads, > +-- uninitialized memory access, etc. > + > +local function recursive_f_noarg() > + recursive_f_noarg() > +end > + > +local function recursive_one_arg(argument) > + recursive_one_arg(argument) > +end > + > +local function recursive_f_vararg(...) > + recursive_f_vararg(1, ...) > +end > + > +local function recursive_f_vararg_tail(...) > + return recursive_f_vararg_tail(1, ...) > +end > + > +-- Use `coroutine.wrap()`, for independent stack sizes. > +-- The invalid read is done by the error handler > +-- `debug.traceback()`, since it observes the pseudo PC (`L`) and > +-- reads the memory by `L - 4` address before the patch. > + > +coroutine.wrap(function() > + local status = xpcall(recursive_f_noarg, debug.traceback) > +test:ok(not status, 'correct status, recursive no arguments') > +end)() > + > +coroutine.wrap(function() > + local status = xpcall(recursive_one_arg, debug.traceback, 1) > +test:ok(not status, 'correct status, recursive one argument') > +end)() > + > +coroutine.wrap(function() > + local status = xpcall(recursive_f_vararg, debug.traceback, 1) > +test:ok(not status, 'correct status, recursive vararg') > +end)() > + > +coroutine.wrap(function() > + local status = xpcall(recursive_f_vararg_tail, debug.traceback, 1) > +test:ok(not status, 'correct status, recursive vararg tail') > +end)() > + > +test:done(true)