[Tarantool-patches] [PATCH luajit] Rework stack overflow handling.

Sergey Bronnikov sergeyb at tarantool.org
Fri Jun 6 15:54:02 MSK 2025


Hello, Sergey,

thanks for the patch! See my comments below.

Sergey

On 4/25/25 16:42, Sergey Kaplun wrote:
> From: Mike Pall <mike>
>
> Reported by pwnhacker0x18. Fixed by Peter Cawley.
>
> (cherry picked from commit defe61a56751a0db5f00ff3ab7b8f45436ba74c8)
>
> In case of the Lua stack overflow error, LuaJIT restores the `L->top`
> value and pushes the error message above. It is possible that the
> restored value is greater than `L->maxstack`, so pushing the error
> message causes dirty write out-of-bounds.
>
> This patch prevents it by overwriting stack overflow handling machinery.
> Now, in the aforementioned case, the last frame is replaced with a dummy
> frame to avoid dirty writes.
>
> Sergey Kaplun:
> * added the description and the test for the problem
>
> Part of tarantool/tarantool#11278
> ---
>
> Related issues:
> *https://github.com/LuaJIT/LuaJIT/issues/1152
> *https://github.com/tarantool/tarantool/issues/11278
> Branch:https://github.com/tarantool/luajit/tree/skaplun/lj-1152-stack-buffer-overflow-on-error
>
> The CI is red due to CMake minimum required version in c-dt module in
> Tarantool.
These problems are already fixed, please re-push the branch.
>
> The cherry-pick also included a part fixed in the merge commit:
> https://github.com/LuaJIT/LuaJIT/commit/0d313b243
>
>   src/lj_debug.c                                |  1 +
>   src/lj_err.c                                  | 23 +++++-
>   src/lj_err.h                                  |  1 +
>   src/lj_state.c                                | 58 +++++++++-----
>   test/LuaJIT-tests/lang/stackov.lua            |  8 +-
>   ...52-stack-buffer-overflow-on-error.test.lua | 79 +++++++++++++++++++
>   test/tarantool-tests/utils/CMakeLists.txt     |  1 +
>   7 files changed, 148 insertions(+), 23 deletions(-)
>   create mode 100644 test/tarantool-tests/lj-1152-stack-buffer-overflow-on-error.test.lua
>
> diff --git a/src/lj_debug.c b/src/lj_debug.c
> index 107f464c..76e48aca 100644
> --- a/src/lj_debug.c
> +++ b/src/lj_debug.c
> @@ -64,6 +64,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
>       if (cf == NULL || (char *)cframe_pc(cf) == (char *)cframe_L(cf))
>         return NO_BCPOS;
>       ins = cframe_pc(cf);  /* Only happens during error/hook handling. */
> +    if (!ins) return NO_BCPOS;
>     } else {
>       if (frame_islua(nextframe)) {
>         ins = frame_pc(nextframe);
> diff --git a/src/lj_err.c b/src/lj_err.c
> index 1a9a2f2b..80dca847 100644
> --- a/src/lj_err.c
> +++ b/src/lj_err.c
> @@ -784,7 +784,14 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
>       TValue *base = tvref(G(L)->jit_base);
>       if (base) L->base = base;
>     }
> -  if (curr_funcisL(L)) L->top = curr_topL(L);
> +  if (curr_funcisL(L)) {
> +    L->top = curr_topL(L);
> +    if (LJ_UNLIKELY(L->top > tvref(L->maxstack))) {
> +      /* The current Lua frame violates the stack. Replace it with a dummy. */
> +      L->top = L->base;
> +      setframe_gc(L->base - 1 - LJ_FR2, obj2gco(L), LJ_TTHREAD);
> +    }
> +  }
>     setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRMEM));
>     lj_err_throw(L, LUA_ERRMEM);
>   }
> @@ -845,9 +852,11 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
>   {
>     ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L);
>     if (ef) {
> -    TValue *errfunc = restorestack(L, ef);
> -    TValue *top = L->top;
> +    TValue *errfunc, *top;
> +    lj_state_checkstack(L, LUA_MINSTACK * 2);  /* Might raise new error. */
>       lj_trace_abort(G(L));
> +    errfunc = restorestack(L, ef);
> +    top = L->top;
>       if (!tvisfunc(errfunc) || L->status == LUA_ERRERR) {
>         setstrV(L, top-1, lj_err_str(L, LJ_ERR_ERRERR));
>         lj_err_throw(L, LUA_ERRERR);
> @@ -862,7 +871,15 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
>     lj_err_throw(L, LUA_ERRRUN);
>   }
>   
> +/* Stack overflow error. */
> +void LJ_FASTCALL lj_err_stkov(lua_State *L)
> +{
> +  lj_debug_addloc(L, err2msg(LJ_ERR_STKOV), L->base-1, NULL);
> +  lj_err_run(L);
> +}
> +
>   #if LJ_HASJIT
> +/* Rethrow error after doing a trace exit. */
>   LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode)
>   {
>     if (errcode == LUA_ERRRUN)
> diff --git a/src/lj_err.h b/src/lj_err.h
> index b0c72c24..56bd0375 100644
> --- a/src/lj_err.h
> +++ b/src/lj_err.h
> @@ -23,6 +23,7 @@ LJ_DATA const char *lj_err_allmsg;
>   LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
>   LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode);
>   LJ_FUNC_NORET void lj_err_mem(lua_State *L);
> +LJ_FUNC_NORET void LJ_FASTCALL lj_err_stkov(lua_State *L);
>   LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
>   #if LJ_HASJIT
>   LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode);
> diff --git a/src/lj_state.c b/src/lj_state.c
> index 5a920102..053e5ec9 100644
> --- a/src/lj_state.c
> +++ b/src/lj_state.c
> @@ -120,27 +120,49 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
>   /* Try to grow stack. */
>   void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
>   {
> -  MSize n;
> -  if (L->stacksize >= LJ_STACK_MAXEX) {
> -    /* 4. Throw 'error in error handling' when we are _over_ the limit. */
> -    if (L->stacksize > LJ_STACK_MAXEX)
> +  MSize n = L->stacksize + need;
> +  if (LJ_LIKELY(n < LJ_STACK_MAX)) {  /* The stack can grow as requested. */
> +    if (n < 2 * L->stacksize) {  /* Try to double the size. */
> +      n = 2 * L->stacksize;
> +      if (n > LJ_STACK_MAX)
> +	n = LJ_STACK_MAX;
> +    }
> +    resizestack(L, n);
> +  } else {  /* Request would overflow. Raise a stack overflow error. */
> +    if (LJ_HASJIT) {
> +      TValue *base = tvref(G(L)->jit_base);
> +      if (base) L->base = base;
> +    }
> +    if (curr_funcisL(L)) {
> +      L->top = curr_topL(L);
> +      if (L->top > tvref(L->maxstack)) {
> +	/* The current Lua frame violates the stack, so replace it with a
> +	** dummy. This can happen when BC_IFUNCF is trying to grow the stack.
> +	*/
> +	L->top = L->base;
> +	setframe_gc(L->base - 1 - LJ_FR2, obj2gco(L), LJ_TTHREAD);
> +      }
> +    }
> +    if (L->stacksize <= LJ_STACK_MAXEX) {
> +      /* An error handler might want to inspect the stack overflow error, but
> +      ** will need some stack space to run in. We give it a stack size beyond
> +      ** the normal limit in order to do so, then rely on lj_state_relimitstack
> +      ** calls during unwinding to bring us back to a convential stack size.
> +      ** The + 1 is space for the error message, and 2 * LUA_MINSTACK is for
> +      ** the lj_state_checkstack() call in lj_err_run().
> +      */
> +      resizestack(L, LJ_STACK_MAX + 1 + 2 * LUA_MINSTACK);
> +      lj_err_stkov(L);  /* May invoke an error handler. */
> +    } else {
> +      /* If we're here, then the stack overflow error handler is requesting
> +      ** to grow the stack even further. We have no choice but to abort the
> +      ** error handler.
> +      */
> +      GCstr *em = lj_err_str(L, LJ_ERR_STKOV);  /* Might OOM. */
> +      setstrV(L, L->top++, em);  /* There is always space to push an error. */
>         lj_err_throw(L, LUA_ERRERR);  /* Does not invoke an error handler. */
> -    /* 1. We are _at_ the limit after the last growth. */
> -    if (L->status < LUA_ERRRUN) {  /* 2. Throw 'stack overflow'. */
> -      L->status = LUA_ERRRUN;  /* Prevent ending here again for pushed msg. */
> -      lj_err_msg(L, LJ_ERR_STKOV);  /* May invoke an error handler. */
>       }
> -    /* 3. Add space (over the limit) for pushed message and error handler. */
> -  }
> -  n = L->stacksize + need;
> -  if (n > LJ_STACK_MAX) {
> -    n += 2*LUA_MINSTACK;
> -  } else if (n < 2*L->stacksize) {
> -    n = 2*L->stacksize;
> -    if (n >= LJ_STACK_MAX)
> -      n = LJ_STACK_MAX;
>     }
> -  resizestack(L, n);
>   }
>   
>   void LJ_FASTCALL lj_state_growstack1(lua_State *L)
> diff --git a/test/LuaJIT-tests/lang/stackov.lua b/test/LuaJIT-tests/lang/stackov.lua
> index 8afa86b4..b052ad80 100644
> --- a/test/LuaJIT-tests/lang/stackov.lua
> +++ b/test/LuaJIT-tests/lang/stackov.lua
I don't get why we need this change. With reverted patch this test is 
passed.
> @@ -31,13 +31,17 @@ end
>   do --- Base test.
>     local err, s = xpcall(f, debug.traceback)
>     assert(err == false)
> -  test_error_msg(f, s)
> +  -- There is no place on the stack to invoke the handler.
> +  -- Just test the error reason.
> +  assert(string.match(s, "stack overflow"))
>   end
>   
>   do --- Stack overflow with non-empty arg list.
>     local err, s = xpcall(g, debug.traceback, 1)
>     assert(err == false)
> -  test_error_msg(g, s)
> +  -- There is no place on the stack to invoke the handler.
> +  -- Just test the error reason.
> +  assert(string.match(s, "stack overflow"))
>   end
>   
>   do --- Vararg tail call with non-empty arg list. +slow
> diff --git a/test/tarantool-tests/lj-1152-stack-buffer-overflow-on-error.test.lua b/test/tarantool-tests/lj-1152-stack-buffer-overflow-on-error.test.lua
> new file mode 100644
> index 00000000..08e344de
> --- /dev/null
> +++ b/test/tarantool-tests/lj-1152-stack-buffer-overflow-on-error.test.lua
> @@ -0,0 +1,79 @@
> +local tap = require('tap')
> +local allocinject = require('allocinject')
> +
> +local test = tap.test('lj-1152-stack-buffer-overflow-on-error')
> +test:plan(6)
> +
> +local LJ_MAX_LOCVAR = 200
> +
> +-- Generate the following Lua chunk:
> +--   local function recursive_f()
> +--     local _
> +--     ...
> +--     recursive_f()
> +--   end
> +--   return recursive_f
> +local function generate_recursive_f(n_locals)
> +  local chunk = 'local function recursive_f()\n'
> +  for _ = 1, n_locals do
> +    chunk = chunk .. 'local _\n'
> +  end
> +  chunk = chunk .. [[
> +      recursive_f()
> +    end
> +    return recursive_f
> +  ]]
> +  local f = assert(loadstring(chunk))
> +  return f()
> +end
> +
> +-- Use `coroutine.wrap()` for functions to use newly created stack
> +-- with fixed number of stack slots.
> +
> +-- Check that we still got the correct error message in case of
> +-- the unsafe error handler function.
> +coroutine.wrap(function()
> +  -- XXX: Use different recursive functions as callee and handler
> +  -- to be sure to get the invalid stack value instead of `nil`.
> +  local function recursive() recursive() end
> +  local function bad_errfunc() bad_errfunc() end
> +  local r, e = xpcall(recursive, bad_errfunc)
> +  -- XXX: Don't create a constant string that is anchored to the
> +  -- prototype. It is necessary to make the error message freed by
> +  -- the GC and OOM raising in the last test.
> +  local EXPECTED_MSG = 'stack ' .. 'overflow'
> +test:ok(not r, 'correct status')
> +test:like(e, EXPECTED_MSG, 'correct error message')
> +end)()
> +
> +coroutine.wrap(function()
> +  -- Collect all strings including the possibly-existed string
> +  -- with the 'stack overflow' error message.
> +  collectgarbage()
> +  local function recursive() recursive() end
> +  -- Avoid trace recording. A trace can't be allocated anyway.
> +  jit.off(recursive)
> +
> +  -- Check the case when the error
> +  allocinject.enable_null_alloc()
> +  local r, e = pcall(recursive)
> +  allocinject.disable()
> +
> +test:ok(not r, 'correct status')
> +test:like(e, 'not enough memory', 'correct error message')
> +end)()
> +
> +-- Check overflow of the buffer related to the Lua stack.
> +-- This test fails under ASAN without the patch.
> +local recursive_f = generate_recursive_f(LJ_MAX_LOCVAR)
> +coroutine.wrap(function()
> +  local r, e = pcall(recursive_f)
> +test:ok(not r, 'correct status')
> +  -- XXX: Don't create a constant string that is anchored to the
> +  -- prototype. It is necessary to make the error message freed by
> +  -- the GC and OOM raising in the last test.
> +  local EXPECTED_MSG = 'stack ' .. 'overflow'
> +test:like(e, EXPECTED_MSG, 'correct error message')
> +end)()
> +
> +test:done(true)
> diff --git a/test/tarantool-tests/utils/CMakeLists.txt b/test/tarantool-tests/utils/CMakeLists.txt
> index 15871934..e74e184b 100644
> --- a/test/tarantool-tests/utils/CMakeLists.txt
> +++ b/test/tarantool-tests/utils/CMakeLists.txt
> @@ -1,4 +1,5 @@
>   list(APPEND tests
> +  lj-1152-stack-buffer-overflow-on-error.test.lua
>     lj-1166-error-stitch-oom-ir-buff.test.lua
>     lj-1166-error-stitch-oom-snap-buff.test.lua
>     lj-1247-fin-tab-rehashing-on-trace.test.lua
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.tarantool.org/pipermail/tarantool-patches/attachments/20250606/f3e6ab1d/attachment.htm>


More information about the Tarantool-patches mailing list