[Tarantool-patches] [PATCH luajit] Rework stack overflow handling.
Sergey Bronnikov
sergeyb at tarantool.org
Fri Jun 6 15:54:02 MSK 2025
Hello, Sergey,
thanks for the patch! See my comments below.
Sergey
On 4/25/25 16:42, Sergey Kaplun wrote:
> From: Mike Pall <mike>
>
> Reported by pwnhacker0x18. Fixed by Peter Cawley.
>
> (cherry picked from commit defe61a56751a0db5f00ff3ab7b8f45436ba74c8)
>
> In case of the Lua stack overflow error, LuaJIT restores the `L->top`
> value and pushes the error message above. It is possible that the
> restored value is greater than `L->maxstack`, so pushing the error
> message causes dirty write out-of-bounds.
>
> This patch prevents it by overwriting stack overflow handling machinery.
> Now, in the aforementioned case, the last frame is replaced with a dummy
> frame to avoid dirty writes.
>
> Sergey Kaplun:
> * added the description and the test for the problem
>
> Part of tarantool/tarantool#11278
> ---
>
> Related issues:
> *https://github.com/LuaJIT/LuaJIT/issues/1152
> *https://github.com/tarantool/tarantool/issues/11278
> Branch:https://github.com/tarantool/luajit/tree/skaplun/lj-1152-stack-buffer-overflow-on-error
>
> The CI is red due to CMake minimum required version in c-dt module in
> Tarantool.
These problems are already fixed, please re-push the branch.
>
> The cherry-pick also included a part fixed in the merge commit:
> https://github.com/LuaJIT/LuaJIT/commit/0d313b243
>
> src/lj_debug.c | 1 +
> src/lj_err.c | 23 +++++-
> src/lj_err.h | 1 +
> src/lj_state.c | 58 +++++++++-----
> test/LuaJIT-tests/lang/stackov.lua | 8 +-
> ...52-stack-buffer-overflow-on-error.test.lua | 79 +++++++++++++++++++
> test/tarantool-tests/utils/CMakeLists.txt | 1 +
> 7 files changed, 148 insertions(+), 23 deletions(-)
> create mode 100644 test/tarantool-tests/lj-1152-stack-buffer-overflow-on-error.test.lua
>
> diff --git a/src/lj_debug.c b/src/lj_debug.c
> index 107f464c..76e48aca 100644
> --- a/src/lj_debug.c
> +++ b/src/lj_debug.c
> @@ -64,6 +64,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
> if (cf == NULL || (char *)cframe_pc(cf) == (char *)cframe_L(cf))
> return NO_BCPOS;
> ins = cframe_pc(cf); /* Only happens during error/hook handling. */
> + if (!ins) return NO_BCPOS;
> } else {
> if (frame_islua(nextframe)) {
> ins = frame_pc(nextframe);
> diff --git a/src/lj_err.c b/src/lj_err.c
> index 1a9a2f2b..80dca847 100644
> --- a/src/lj_err.c
> +++ b/src/lj_err.c
> @@ -784,7 +784,14 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
> TValue *base = tvref(G(L)->jit_base);
> if (base) L->base = base;
> }
> - if (curr_funcisL(L)) L->top = curr_topL(L);
> + if (curr_funcisL(L)) {
> + L->top = curr_topL(L);
> + if (LJ_UNLIKELY(L->top > tvref(L->maxstack))) {
> + /* The current Lua frame violates the stack. Replace it with a dummy. */
> + L->top = L->base;
> + setframe_gc(L->base - 1 - LJ_FR2, obj2gco(L), LJ_TTHREAD);
> + }
> + }
> setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRMEM));
> lj_err_throw(L, LUA_ERRMEM);
> }
> @@ -845,9 +852,11 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
> {
> ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L);
> if (ef) {
> - TValue *errfunc = restorestack(L, ef);
> - TValue *top = L->top;
> + TValue *errfunc, *top;
> + lj_state_checkstack(L, LUA_MINSTACK * 2); /* Might raise new error. */
> lj_trace_abort(G(L));
> + errfunc = restorestack(L, ef);
> + top = L->top;
> if (!tvisfunc(errfunc) || L->status == LUA_ERRERR) {
> setstrV(L, top-1, lj_err_str(L, LJ_ERR_ERRERR));
> lj_err_throw(L, LUA_ERRERR);
> @@ -862,7 +871,15 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
> lj_err_throw(L, LUA_ERRRUN);
> }
>
> +/* Stack overflow error. */
> +void LJ_FASTCALL lj_err_stkov(lua_State *L)
> +{
> + lj_debug_addloc(L, err2msg(LJ_ERR_STKOV), L->base-1, NULL);
> + lj_err_run(L);
> +}
> +
> #if LJ_HASJIT
> +/* Rethrow error after doing a trace exit. */
> LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode)
> {
> if (errcode == LUA_ERRRUN)
> diff --git a/src/lj_err.h b/src/lj_err.h
> index b0c72c24..56bd0375 100644
> --- a/src/lj_err.h
> +++ b/src/lj_err.h
> @@ -23,6 +23,7 @@ LJ_DATA const char *lj_err_allmsg;
> LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
> LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode);
> LJ_FUNC_NORET void lj_err_mem(lua_State *L);
> +LJ_FUNC_NORET void LJ_FASTCALL lj_err_stkov(lua_State *L);
> LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
> #if LJ_HASJIT
> LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode);
> diff --git a/src/lj_state.c b/src/lj_state.c
> index 5a920102..053e5ec9 100644
> --- a/src/lj_state.c
> +++ b/src/lj_state.c
> @@ -120,27 +120,49 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
> /* Try to grow stack. */
> void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
> {
> - MSize n;
> - if (L->stacksize >= LJ_STACK_MAXEX) {
> - /* 4. Throw 'error in error handling' when we are _over_ the limit. */
> - if (L->stacksize > LJ_STACK_MAXEX)
> + MSize n = L->stacksize + need;
> + if (LJ_LIKELY(n < LJ_STACK_MAX)) { /* The stack can grow as requested. */
> + if (n < 2 * L->stacksize) { /* Try to double the size. */
> + n = 2 * L->stacksize;
> + if (n > LJ_STACK_MAX)
> + n = LJ_STACK_MAX;
> + }
> + resizestack(L, n);
> + } else { /* Request would overflow. Raise a stack overflow error. */
> + if (LJ_HASJIT) {
> + TValue *base = tvref(G(L)->jit_base);
> + if (base) L->base = base;
> + }
> + if (curr_funcisL(L)) {
> + L->top = curr_topL(L);
> + if (L->top > tvref(L->maxstack)) {
> + /* The current Lua frame violates the stack, so replace it with a
> + ** dummy. This can happen when BC_IFUNCF is trying to grow the stack.
> + */
> + L->top = L->base;
> + setframe_gc(L->base - 1 - LJ_FR2, obj2gco(L), LJ_TTHREAD);
> + }
> + }
> + if (L->stacksize <= LJ_STACK_MAXEX) {
> + /* An error handler might want to inspect the stack overflow error, but
> + ** will need some stack space to run in. We give it a stack size beyond
> + ** the normal limit in order to do so, then rely on lj_state_relimitstack
> + ** calls during unwinding to bring us back to a convential stack size.
> + ** The + 1 is space for the error message, and 2 * LUA_MINSTACK is for
> + ** the lj_state_checkstack() call in lj_err_run().
> + */
> + resizestack(L, LJ_STACK_MAX + 1 + 2 * LUA_MINSTACK);
> + lj_err_stkov(L); /* May invoke an error handler. */
> + } else {
> + /* If we're here, then the stack overflow error handler is requesting
> + ** to grow the stack even further. We have no choice but to abort the
> + ** error handler.
> + */
> + GCstr *em = lj_err_str(L, LJ_ERR_STKOV); /* Might OOM. */
> + setstrV(L, L->top++, em); /* There is always space to push an error. */
> lj_err_throw(L, LUA_ERRERR); /* Does not invoke an error handler. */
> - /* 1. We are _at_ the limit after the last growth. */
> - if (L->status < LUA_ERRRUN) { /* 2. Throw 'stack overflow'. */
> - L->status = LUA_ERRRUN; /* Prevent ending here again for pushed msg. */
> - lj_err_msg(L, LJ_ERR_STKOV); /* May invoke an error handler. */
> }
> - /* 3. Add space (over the limit) for pushed message and error handler. */
> - }
> - n = L->stacksize + need;
> - if (n > LJ_STACK_MAX) {
> - n += 2*LUA_MINSTACK;
> - } else if (n < 2*L->stacksize) {
> - n = 2*L->stacksize;
> - if (n >= LJ_STACK_MAX)
> - n = LJ_STACK_MAX;
> }
> - resizestack(L, n);
> }
>
> void LJ_FASTCALL lj_state_growstack1(lua_State *L)
> diff --git a/test/LuaJIT-tests/lang/stackov.lua b/test/LuaJIT-tests/lang/stackov.lua
> index 8afa86b4..b052ad80 100644
> --- a/test/LuaJIT-tests/lang/stackov.lua
> +++ b/test/LuaJIT-tests/lang/stackov.lua
I don't get why we need this change. With reverted patch this test is
passed.
> @@ -31,13 +31,17 @@ end
> do --- Base test.
> local err, s = xpcall(f, debug.traceback)
> assert(err == false)
> - test_error_msg(f, s)
> + -- There is no place on the stack to invoke the handler.
> + -- Just test the error reason.
> + assert(string.match(s, "stack overflow"))
> end
>
> do --- Stack overflow with non-empty arg list.
> local err, s = xpcall(g, debug.traceback, 1)
> assert(err == false)
> - test_error_msg(g, s)
> + -- There is no place on the stack to invoke the handler.
> + -- Just test the error reason.
> + assert(string.match(s, "stack overflow"))
> end
>
> do --- Vararg tail call with non-empty arg list. +slow
> diff --git a/test/tarantool-tests/lj-1152-stack-buffer-overflow-on-error.test.lua b/test/tarantool-tests/lj-1152-stack-buffer-overflow-on-error.test.lua
> new file mode 100644
> index 00000000..08e344de
> --- /dev/null
> +++ b/test/tarantool-tests/lj-1152-stack-buffer-overflow-on-error.test.lua
> @@ -0,0 +1,79 @@
> +local tap = require('tap')
> +local allocinject = require('allocinject')
> +
> +local test = tap.test('lj-1152-stack-buffer-overflow-on-error')
> +test:plan(6)
> +
> +local LJ_MAX_LOCVAR = 200
> +
> +-- Generate the following Lua chunk:
> +-- local function recursive_f()
> +-- local _
> +-- ...
> +-- recursive_f()
> +-- end
> +-- return recursive_f
> +local function generate_recursive_f(n_locals)
> + local chunk = 'local function recursive_f()\n'
> + for _ = 1, n_locals do
> + chunk = chunk .. 'local _\n'
> + end
> + chunk = chunk .. [[
> + recursive_f()
> + end
> + return recursive_f
> + ]]
> + local f = assert(loadstring(chunk))
> + return f()
> +end
> +
> +-- Use `coroutine.wrap()` for functions to use newly created stack
> +-- with fixed number of stack slots.
> +
> +-- Check that we still got the correct error message in case of
> +-- the unsafe error handler function.
> +coroutine.wrap(function()
> + -- XXX: Use different recursive functions as callee and handler
> + -- to be sure to get the invalid stack value instead of `nil`.
> + local function recursive() recursive() end
> + local function bad_errfunc() bad_errfunc() end
> + local r, e = xpcall(recursive, bad_errfunc)
> + -- XXX: Don't create a constant string that is anchored to the
> + -- prototype. It is necessary to make the error message freed by
> + -- the GC and OOM raising in the last test.
> + local EXPECTED_MSG = 'stack ' .. 'overflow'
> +test:ok(not r, 'correct status')
> +test:like(e, EXPECTED_MSG, 'correct error message')
> +end)()
> +
> +coroutine.wrap(function()
> + -- Collect all strings including the possibly-existed string
> + -- with the 'stack overflow' error message.
> + collectgarbage()
> + local function recursive() recursive() end
> + -- Avoid trace recording. A trace can't be allocated anyway.
> + jit.off(recursive)
> +
> + -- Check the case when the error
> + allocinject.enable_null_alloc()
> + local r, e = pcall(recursive)
> + allocinject.disable()
> +
> +test:ok(not r, 'correct status')
> +test:like(e, 'not enough memory', 'correct error message')
> +end)()
> +
> +-- Check overflow of the buffer related to the Lua stack.
> +-- This test fails under ASAN without the patch.
> +local recursive_f = generate_recursive_f(LJ_MAX_LOCVAR)
> +coroutine.wrap(function()
> + local r, e = pcall(recursive_f)
> +test:ok(not r, 'correct status')
> + -- XXX: Don't create a constant string that is anchored to the
> + -- prototype. It is necessary to make the error message freed by
> + -- the GC and OOM raising in the last test.
> + local EXPECTED_MSG = 'stack ' .. 'overflow'
> +test:like(e, EXPECTED_MSG, 'correct error message')
> +end)()
> +
> +test:done(true)
> diff --git a/test/tarantool-tests/utils/CMakeLists.txt b/test/tarantool-tests/utils/CMakeLists.txt
> index 15871934..e74e184b 100644
> --- a/test/tarantool-tests/utils/CMakeLists.txt
> +++ b/test/tarantool-tests/utils/CMakeLists.txt
> @@ -1,4 +1,5 @@
> list(APPEND tests
> + lj-1152-stack-buffer-overflow-on-error.test.lua
> lj-1166-error-stitch-oom-ir-buff.test.lua
> lj-1166-error-stitch-oom-snap-buff.test.lua
> lj-1247-fin-tab-rehashing-on-trace.test.lua
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.tarantool.org/pipermail/tarantool-patches/attachments/20250606/f3e6ab1d/attachment.htm>
More information about the Tarantool-patches
mailing list