[Tarantool-patches] [PATCH luajit] FFI: Drop finalizer table rehash after GC cycle.
Sergey Kaplun
skaplun at tarantool.org
Mon Sep 2 15:54:21 MSK 2024
From: Mike Pall <mike>
Reported by Sergey Kaplun.
(cherry picked from commit fb22d0f80f291827a4004e16bc589b54bcc4a3c7)
The raising of the OOM error when rehashing the finalizer table (when we
can't allocate a new hash part) leads to crashes in either
`lj_trace_exit()` or `lj_trace_unwind()` due to unprotected error
raising, which either has no DWARF eh_frame or loses the context of the
JIT compiler.
This patch drops rehashing of the finalizer table to avoid these
crashes.
Sergey Kaplun:
* added the description and the test for the problem
Part of tarantool/tarantool#10199
Resolves tarantool/tarantool#10290
---
Branch: https://github.com/tarantool/luajit/tree/skaplun/lj-1247-fin-tab-rehashing-on-trace
Related Issues:
* https://github.com/tarantool/tarantool/issues/10290
* https://github.com/LuaJIT/LuaJIT/issues/1247
* https://github.com/tarantool/tarantool/issues/10199
src/lj_gc.c | 7 -
src/lj_obj.h | 2 +-
test/tarantool-tests/CMakeLists.txt | 1 +
...j-1247-fin-tab-rehashing-on-trace.test.lua | 127 ++++++++++++++++++
.../CMakeLists.txt | 1 +
.../lj_1247_allocinject.c | 49 +++++++
6 files changed, 179 insertions(+), 8 deletions(-)
create mode 100644 test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace.test.lua
create mode 100644 test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/CMakeLists.txt
create mode 100644 test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/lj_1247_allocinject.c
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 4c222f21..a2fc93a0 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -548,7 +548,6 @@ static void gc_finalize(lua_State *L)
setcdataV(L, &tmp, gco2cd(o));
tv = lj_tab_set(L, tabref(g->gcroot[GCROOT_FFI_FIN]), &tmp);
if (!tvisnil(tv)) {
- g->gc.nocdatafin = 0;
copyTV(L, &tmp, tv);
setnilV(tv); /* Clear entry in finalizer table. */
gc_call_finalizer(g, L, &tmp, o);
@@ -694,9 +693,6 @@ static size_t gc_onestep(lua_State *L)
lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
g->gc.state = GCSfinalize;
-#if LJ_HASFFI
- g->gc.nocdatafin = 1;
-#endif
} else { /* Otherwise skip this phase to help the JIT. */
g->gc.state = GCSpause; /* End of GC cycle. */
g->gc.debt = 0;
@@ -713,9 +709,6 @@ static size_t gc_onestep(lua_State *L)
g->gc.estimate -= GCFINALIZECOST;
return GCFINALIZECOST;
}
-#if LJ_HASFFI
- if (!g->gc.nocdatafin) lj_tab_rehash(L, tabref(g->gcroot[GCROOT_FFI_FIN]));
-#endif
g->gc.state = GCSpause; /* End of GC cycle. */
g->gc.debt = 0;
return 0;
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 06ea0cd0..ff22e5f8 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -611,7 +611,7 @@ typedef struct GCState {
GCSize threshold; /* Memory threshold. */
uint8_t currentwhite; /* Current white color. */
uint8_t state; /* GC state. */
- uint8_t nocdatafin; /* No cdata finalizer called. */
+ uint8_t unused0;
#if LJ_64
uint8_t lightudnum; /* Number of lightuserdata segments - 1. */
#else
diff --git a/test/tarantool-tests/CMakeLists.txt b/test/tarantool-tests/CMakeLists.txt
index e3750bf3..e5d5a470 100644
--- a/test/tarantool-tests/CMakeLists.txt
+++ b/test/tarantool-tests/CMakeLists.txt
@@ -37,6 +37,7 @@ add_subdirectory(lj-flush-on-trace)
add_subdirectory(lj-1004-oom-error-frame)
add_subdirectory(lj-1066-fix-cur_L-after-coroutine-resume)
add_subdirectory(lj-1166-error-stitch)
+add_subdirectory(lj-1247-fin-tab-rehashing-on-trace)
# The part of the memory profiler toolchain is located in tools
# directory, jit, profiler, and bytecode toolchains are located
diff --git a/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace.test.lua b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace.test.lua
new file mode 100644
index 00000000..308043a2
--- /dev/null
+++ b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace.test.lua
@@ -0,0 +1,127 @@
+local tap = require('tap')
+
+-- The test file to demonstrate the incorrect JIT behaviour during
+-- OOM on the finalizer table rehashing in the context of the JIT
+-- trace.
+-- See also:
+-- * https://github.com/LuaJIT/LuaJIT/issues/1247,
+-- * https://github.com/tarantool/tarantool/issues/10290.
+
+local test = tap.test('lj-1247-fin-tab-rehashing-on-trace'):skipcond({
+ ['Broken unwiding in tarantool_panic_handler'] = _TARANTOOL and
+ (jit.os == 'OSX'),
+ ['Disabled on MacOS due to #8652'] = jit.os == 'OSX',
+ ['Test requires JIT enabled'] = not jit.status(),
+})
+
+-- XXX: The original issue has 2 ways to crash:
+-- 1) in `lj_trace_unwind()`
+-- 2) in `lj_trace_exit()`
+-- But, since we have an additional GC pressure due to requiring a
+-- `tap` module, the second case needs an impossibly big
+-- `gcstepmul` value to reproduce the issue. So, since the root
+-- issue is the same and now rehashing of finalizer table is
+-- omitted, we test only the first case.
+test:plan(2)
+
+local allocinject = require('lj_1247_allocinject')
+
+local ffi = require('ffi')
+ffi.cdef[[
+ struct test {int a;};
+]]
+
+local N_GC_STEPS = 100
+local N_GC_FINALIZERS = 100
+
+local function empty() end
+
+-- Create a chunk like the following:
+--[[
+ local tostring = tostring
+ local r = ...
+ for _ = 1, 4 do
+ r[1] = tostring(1)
+ -- ...
+ r[N_GCSTEPS] = tostring(N_GC_STEPS)
+ end
+--]]
+local function create_chunk(n_steps)
+ local chunk = 'local tostring = tostring\n'
+ chunk = chunk .. ('local r = ...\n')
+ chunk = chunk .. 'for _ = 1, 4 do\n'
+ for i = 1, n_steps do
+ chunk = chunk .. (' r[%d] = tostring(%d)\n'):format(i, i)
+ end
+ chunk = chunk .. 'end\n'
+ chunk = chunk .. 'return r\n'
+ return chunk
+end
+
+local function add_more_garbage(size)
+ return ffi.new('char[?]', size)
+end
+
+-- Helper to skip the atomic phase.
+local function skip_atomic()
+ local first_gc_called = false
+ local function mark_fin() first_gc_called = true end
+ jit.off(mark_fin)
+ debug.getmetatable(newproxy(true)).__gc = mark_fin
+
+ -- Skip the atomic phase.
+ jit.off()
+ while not first_gc_called do collectgarbage('step') end
+ jit.on()
+end
+
+local function crash_on_trace_unwind_gc_setup()
+ skip_atomic()
+ collectgarbage('setstepmul', 1000)
+ add_more_garbage(1024 * 1024)
+end
+
+local f = assert(loadstring(create_chunk(N_GC_STEPS)))
+
+-- Create a really long trace.
+jit.flush()
+jit.opt.start('hotloop=2', 'maxirconst=5000', 'maxrecord=10000', 'maxsnap=1000',
+ '-fold')
+
+-- luacheck: no unused
+local gc_anchor = {}
+local function anchor_finalizer(i)
+ gc_anchor[i] = ffi.gc(ffi.new('struct test', i), empty)
+end
+
+for i = 1, N_GC_FINALIZERS do
+ anchor_finalizer(i)
+end
+
+-- Record the trace first.
+f({})
+
+-- The table for anchoring cdata objects.
+local res_tab = {}
+
+collectgarbage()
+collectgarbage()
+collectgarbage('setpause', 0)
+collectgarbage('setstepmul', 1)
+
+gc_anchor = nil
+
+crash_on_trace_unwind_gc_setup()
+
+-- OOM on every allocation (i.e., on finalizer table rehashing
+-- too).
+allocinject.enable()
+
+local r, err = pcall(f, res_tab)
+
+allocinject.disable()
+
+test:ok(not r, 'correct status')
+test:like(err, 'not enough memory', 'correct error message')
+
+test:done(true)
diff --git a/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/CMakeLists.txt b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/CMakeLists.txt
new file mode 100644
index 00000000..c3742e45
--- /dev/null
+++ b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/CMakeLists.txt
@@ -0,0 +1 @@
+BuildTestCLib(lj_1247_allocinject lj_1247_allocinject.c)
diff --git a/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/lj_1247_allocinject.c b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/lj_1247_allocinject.c
new file mode 100644
index 00000000..81aea60b
--- /dev/null
+++ b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/lj_1247_allocinject.c
@@ -0,0 +1,49 @@
+#include "lua.h"
+#include "lauxlib.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+static lua_Alloc old_allocf = NULL;
+static void *old_alloc_state = NULL;
+
+/* Function to be used instead of the default allocator. */
+static void *allocf_with_injection(void *ud, void *ptr, size_t osize,
+ size_t nsize)
+{
+ /* Always OOM on allocation (not on realloc). */
+ if (ptr == NULL)
+ return NULL;
+ else
+ return old_allocf(ud, ptr, osize, nsize);
+}
+
+static int enable(lua_State *L)
+{
+ assert(old_allocf == NULL);
+ old_allocf = lua_getallocf(L, &old_alloc_state);
+ lua_setallocf(L, allocf_with_injection, old_alloc_state);
+ return 0;
+}
+
+static int disable(lua_State *L)
+{
+ assert(old_allocf != NULL);
+ assert(old_allocf != allocf_with_injection);
+ lua_setallocf(L, old_allocf, old_alloc_state);
+ old_allocf = NULL;
+ old_alloc_state = NULL;
+ return 0;
+}
+
+static const struct luaL_Reg allocinject[] = {
+ {"enable", enable},
+ {"disable", disable},
+ {NULL, NULL}
+};
+
+LUA_API int luaopen_lj_1247_allocinject(lua_State *L)
+{
+ luaL_register(L, "lj_1247_allocinject", allocinject);
+ return 1;
+}
--
2.46.0
More information about the Tarantool-patches
mailing list