[Tarantool-patches] [PATCH luajit] FFI: Drop finalizer table rehash after GC cycle.

Sergey Kaplun skaplun at tarantool.org
Mon Sep 2 15:54:21 MSK 2024


From: Mike Pall <mike>

Reported by Sergey Kaplun.

(cherry picked from commit fb22d0f80f291827a4004e16bc589b54bcc4a3c7)

The raising of the OOM error when rehashing the finalizer table (when we
can't allocate a new hash part) leads to crashes in either
`lj_trace_exit()` or `lj_trace_unwind()` due to unprotected error
raising, which either has no DWARF eh_frame or loses the context of the
JIT compiler.

This patch drops rehashing of the finalizer table to avoid these
crashes.

Sergey Kaplun:
* added the description and the test for the problem

Part of tarantool/tarantool#10199
Resolves tarantool/tarantool#10290
---

Branch: https://github.com/tarantool/luajit/tree/skaplun/lj-1247-fin-tab-rehashing-on-trace
Related Issues:
* https://github.com/tarantool/tarantool/issues/10290
* https://github.com/LuaJIT/LuaJIT/issues/1247
* https://github.com/tarantool/tarantool/issues/10199

 src/lj_gc.c                                   |   7 -
 src/lj_obj.h                                  |   2 +-
 test/tarantool-tests/CMakeLists.txt           |   1 +
 ...j-1247-fin-tab-rehashing-on-trace.test.lua | 127 ++++++++++++++++++
 .../CMakeLists.txt                            |   1 +
 .../lj_1247_allocinject.c                     |  49 +++++++
 6 files changed, 179 insertions(+), 8 deletions(-)
 create mode 100644 test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace.test.lua
 create mode 100644 test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/CMakeLists.txt
 create mode 100644 test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/lj_1247_allocinject.c

diff --git a/src/lj_gc.c b/src/lj_gc.c
index 4c222f21..a2fc93a0 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -548,7 +548,6 @@ static void gc_finalize(lua_State *L)
     setcdataV(L, &tmp, gco2cd(o));
     tv = lj_tab_set(L, tabref(g->gcroot[GCROOT_FFI_FIN]), &tmp);
     if (!tvisnil(tv)) {
-      g->gc.nocdatafin = 0;
       copyTV(L, &tmp, tv);
       setnilV(tv);  /* Clear entry in finalizer table. */
       gc_call_finalizer(g, L, &tmp, o);
@@ -694,9 +693,6 @@ static size_t gc_onestep(lua_State *L)
 	lj_str_resize(L, g->strmask >> 1);  /* Shrink string table. */
       if (gcref(g->gc.mmudata)) {  /* Need any finalizations? */
 	g->gc.state = GCSfinalize;
-#if LJ_HASFFI
-	g->gc.nocdatafin = 1;
-#endif
       } else {  /* Otherwise skip this phase to help the JIT. */
 	g->gc.state = GCSpause;  /* End of GC cycle. */
 	g->gc.debt = 0;
@@ -713,9 +709,6 @@ static size_t gc_onestep(lua_State *L)
 	g->gc.estimate -= GCFINALIZECOST;
       return GCFINALIZECOST;
     }
-#if LJ_HASFFI
-    if (!g->gc.nocdatafin) lj_tab_rehash(L, tabref(g->gcroot[GCROOT_FFI_FIN]));
-#endif
     g->gc.state = GCSpause;  /* End of GC cycle. */
     g->gc.debt = 0;
     return 0;
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 06ea0cd0..ff22e5f8 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -611,7 +611,7 @@ typedef struct GCState {
   GCSize threshold;	/* Memory threshold. */
   uint8_t currentwhite;	/* Current white color. */
   uint8_t state;	/* GC state. */
-  uint8_t nocdatafin;	/* No cdata finalizer called. */
+  uint8_t unused0;
 #if LJ_64
   uint8_t lightudnum;	/* Number of lightuserdata segments - 1. */
 #else
diff --git a/test/tarantool-tests/CMakeLists.txt b/test/tarantool-tests/CMakeLists.txt
index e3750bf3..e5d5a470 100644
--- a/test/tarantool-tests/CMakeLists.txt
+++ b/test/tarantool-tests/CMakeLists.txt
@@ -37,6 +37,7 @@ add_subdirectory(lj-flush-on-trace)
 add_subdirectory(lj-1004-oom-error-frame)
 add_subdirectory(lj-1066-fix-cur_L-after-coroutine-resume)
 add_subdirectory(lj-1166-error-stitch)
+add_subdirectory(lj-1247-fin-tab-rehashing-on-trace)
 
 # The part of the memory profiler toolchain is located in tools
 # directory, jit, profiler, and bytecode toolchains are located
diff --git a/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace.test.lua b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace.test.lua
new file mode 100644
index 00000000..308043a2
--- /dev/null
+++ b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace.test.lua
@@ -0,0 +1,127 @@
+local tap = require('tap')
+
+-- The test file to demonstrate the incorrect JIT behaviour during
+-- OOM on the finalizer table rehashing in the context of the JIT
+-- trace.
+-- See also:
+-- * https://github.com/LuaJIT/LuaJIT/issues/1247,
+-- * https://github.com/tarantool/tarantool/issues/10290.
+
+local test = tap.test('lj-1247-fin-tab-rehashing-on-trace'):skipcond({
+  ['Broken unwiding in tarantool_panic_handler'] = _TARANTOOL and
+                                                   (jit.os == 'OSX'),
+  ['Disabled on MacOS due to #8652'] = jit.os == 'OSX',
+  ['Test requires JIT enabled'] = not jit.status(),
+})
+
+-- XXX: The original issue has 2 ways to crash:
+-- 1) in `lj_trace_unwind()`
+-- 2) in `lj_trace_exit()`
+-- But, since we have an additional GC pressure due to requiring a
+-- `tap` module, the second case needs an impossibly big
+-- `gcstepmul` value to reproduce the issue. So, since the root
+-- issue is the same and now rehashing of finalizer table is
+-- omitted, we test only the first case.
+test:plan(2)
+
+local allocinject = require('lj_1247_allocinject')
+
+local ffi = require('ffi')
+ffi.cdef[[
+  struct test {int a;};
+]]
+
+local N_GC_STEPS = 100
+local N_GC_FINALIZERS = 100
+
+local function empty() end
+
+-- Create a chunk like the following:
+--[[
+  local tostring = tostring
+  local r = ...
+  for _ = 1, 4 do
+    r[1] = tostring(1)
+    -- ...
+    r[N_GCSTEPS] = tostring(N_GC_STEPS)
+  end
+--]]
+local function create_chunk(n_steps)
+  local chunk = 'local tostring = tostring\n'
+  chunk = chunk .. ('local r = ...\n')
+  chunk = chunk .. 'for _ = 1, 4 do\n'
+  for i = 1, n_steps do
+    chunk = chunk .. ('  r[%d] = tostring(%d)\n'):format(i, i)
+  end
+  chunk = chunk .. 'end\n'
+  chunk = chunk .. 'return r\n'
+  return chunk
+end
+
+local function add_more_garbage(size)
+  return ffi.new('char[?]', size)
+end
+
+-- Helper to skip the atomic phase.
+local function skip_atomic()
+  local first_gc_called = false
+  local function mark_fin() first_gc_called = true end
+  jit.off(mark_fin)
+  debug.getmetatable(newproxy(true)).__gc = mark_fin
+
+  -- Skip the atomic phase.
+  jit.off()
+  while not first_gc_called do collectgarbage('step') end
+  jit.on()
+end
+
+local function crash_on_trace_unwind_gc_setup()
+  skip_atomic()
+  collectgarbage('setstepmul', 1000)
+  add_more_garbage(1024 * 1024)
+end
+
+local f = assert(loadstring(create_chunk(N_GC_STEPS)))
+
+-- Create a really long trace.
+jit.flush()
+jit.opt.start('hotloop=2', 'maxirconst=5000', 'maxrecord=10000', 'maxsnap=1000',
+              '-fold')
+
+-- luacheck: no unused
+local gc_anchor = {}
+local function anchor_finalizer(i)
+  gc_anchor[i] = ffi.gc(ffi.new('struct test', i), empty)
+end
+
+for i = 1, N_GC_FINALIZERS do
+  anchor_finalizer(i)
+end
+
+-- Record the trace first.
+f({})
+
+-- The table for anchoring cdata objects.
+local res_tab = {}
+
+collectgarbage()
+collectgarbage()
+collectgarbage('setpause', 0)
+collectgarbage('setstepmul', 1)
+
+gc_anchor = nil
+
+crash_on_trace_unwind_gc_setup()
+
+-- OOM on every allocation (i.e., on finalizer table rehashing
+-- too).
+allocinject.enable()
+
+local r, err = pcall(f, res_tab)
+
+allocinject.disable()
+
+test:ok(not r, 'correct status')
+test:like(err, 'not enough memory', 'correct error message')
+
+test:done(true)
diff --git a/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/CMakeLists.txt b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/CMakeLists.txt
new file mode 100644
index 00000000..c3742e45
--- /dev/null
+++ b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/CMakeLists.txt
@@ -0,0 +1 @@
+BuildTestCLib(lj_1247_allocinject lj_1247_allocinject.c)
diff --git a/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/lj_1247_allocinject.c b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/lj_1247_allocinject.c
new file mode 100644
index 00000000..81aea60b
--- /dev/null
+++ b/test/tarantool-tests/lj-1247-fin-tab-rehashing-on-trace/lj_1247_allocinject.c
@@ -0,0 +1,49 @@
+#include "lua.h"
+#include "lauxlib.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+static lua_Alloc old_allocf = NULL;
+static void *old_alloc_state = NULL;
+
+/* Function to be used instead of the default allocator. */
+static void *allocf_with_injection(void *ud, void *ptr, size_t osize,
+				   size_t nsize)
+{
+	/* Always OOM on allocation (not on realloc). */
+	if (ptr == NULL)
+		return NULL;
+	else
+		return old_allocf(ud, ptr, osize, nsize);
+}
+
+static int enable(lua_State *L)
+{
+	assert(old_allocf == NULL);
+	old_allocf = lua_getallocf(L, &old_alloc_state);
+	lua_setallocf(L, allocf_with_injection, old_alloc_state);
+	return 0;
+}
+
+static int disable(lua_State *L)
+{
+	assert(old_allocf != NULL);
+	assert(old_allocf != allocf_with_injection);
+	lua_setallocf(L, old_allocf, old_alloc_state);
+	old_allocf = NULL;
+	old_alloc_state = NULL;
+	return 0;
+}
+
+static const struct luaL_Reg allocinject[] = {
+	{"enable", enable},
+	{"disable", disable},
+	{NULL, NULL}
+};
+
+LUA_API int luaopen_lj_1247_allocinject(lua_State *L)
+{
+	luaL_register(L, "lj_1247_allocinject", allocinject);
+	return 1;
+}
-- 
2.46.0



More information about the Tarantool-patches mailing list