Hi, Sergey! thanks for the patch with fix! LGTM Sergey On 20.03.2025 19:20, Sergey Kaplun wrote: > This is a follow-up to the commits > 2115828bc6fae911215cd07593df2ae0a83a0f42 ("FFI: Drop finalizer table > rehash after GC cycle.") and bfcbaa70e7b3e720578c05db3f66d832419e8566 > ("Drop unused function wrapper."). After them, the rehashing of the cdata > finalizer table at the end of the GC cycle is dropped. Without > reshashing of this table, the table increases the estimated amount of > memory for the GC. Hence, with the bigger `estimate`, the threshold > before starting the GC cycle is increased too. This allows allocating > more cdata objects and increasing the size of the finalizer table again. > This increases the memory estimate again and so on. As a result, we have > unlimited memory growth without rehashing of the table for the > cdata-intensive workloads. > > This patch reverts back the code changes (but not the test) of the > aforementioned commits. Also, it fixes the possible crash after > rehashing of the cdata finalizers table by adding the protected call to > the GC steps on the trace itself and on the trace exit. > --- > > Branch:https://github.com/tarantool/luajit/tree/refs/heads/skaplun/gh-noticket-fix-gc-finalizer-pressure > Related issue:https://github.com/LuaJIT/LuaJIT/issues/1350 > > See also: > *https://github.com/LuaJIT/LuaJIT/issues/1247 > *https://github.com/LuaJIT/LuaJIT/pull/946 > > src/lj_gc.c | 31 +++++++++- > src/lj_obj.h | 2 +- > src/lj_tab.c | 7 +++ > src/lj_tab.h | 3 + > src/lj_trace.c | 20 +++++- > ...lj-1350-fix-gc-finalizer-pressure.test.lua | 61 +++++++++++++++++++ > 6 files changed, 119 insertions(+), 5 deletions(-) > create mode 100644 test/tarantool-tests/lj-1350-fix-gc-finalizer-pressure.test.lua > > diff --git a/src/lj_gc.c b/src/lj_gc.c > index a2fc93a0..f455b55b 100644 > --- a/src/lj_gc.c > +++ b/src/lj_gc.c > @@ -548,6 +548,7 @@ static void gc_finalize(lua_State *L) > setcdataV(L, &tmp, gco2cd(o)); > tv = lj_tab_set(L, tabref(g->gcroot[GCROOT_FFI_FIN]), &tmp); > if (!tvisnil(tv)) { > + g->gc.nocdatafin = 0; > copyTV(L, &tmp, tv); > setnilV(tv); /* Clear entry in finalizer table. */ > gc_call_finalizer(g, L, &tmp, o); > @@ -693,6 +694,9 @@ static size_t gc_onestep(lua_State *L) > lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ > if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ > g->gc.state = GCSfinalize; > +#if LJ_HASFFI > + g->gc.nocdatafin = 1; > +#endif > } else { /* Otherwise skip this phase to help the JIT. */ > g->gc.state = GCSpause; /* End of GC cycle. */ > g->gc.debt = 0; > @@ -709,6 +713,9 @@ static size_t gc_onestep(lua_State *L) > g->gc.estimate -= GCFINALIZECOST; > return GCFINALIZECOST; > } > +#if LJ_HASFFI > + if (!g->gc.nocdatafin) lj_tab_rehash(L, tabref(g->gcroot[GCROOT_FFI_FIN])); > +#endif > g->gc.state = GCSpause; /* End of GC cycle. */ > g->gc.debt = 0; > return 0; > @@ -757,15 +764,35 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L) > lj_gc_step(L); > } > > +/* Need to protect lj_gc_step because it may throw. */ > +static TValue *gc_step_jit_cp(lua_State *L, lua_CFunction dummy, void *ud) > +{ > + MSize steps = (MSize)(uintptr_t)ud; > + UNUSED(dummy); > + > + /* Always catch error here and don't call error function. */ > + cframe_errfunc(L->cframe) = 0; > + cframe_nres(L->cframe) = -2*LUAI_MAXSTACK*(int)sizeof(TValue); > + > + while (steps-- > 0 && lj_gc_step(L) == 0) > + ; > + > + return NULL; > +} > + > #if LJ_HASJIT > /* Perform multiple GC steps. Called from JIT-compiled code. */ > int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) > { > lua_State *L = gco2th(gcref(g->cur_L)); > + int32_t ostate = g->vmstate; > + int errcode; > L->base = tvref(G(L)->jit_base); > L->top = curr_topL(L); > - while (steps-- > 0 && lj_gc_step(L) == 0) > - ; > + errcode = lj_vm_cpcall(L, NULL, (void *)(uintptr_t)steps, gc_step_jit_cp); > + g->vmstate = ostate; > + if (errcode) > + lj_err_throw(L, errcode); /* Propagate errors. */ > /* Return 1 to force a trace exit. */ > return (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize); > } > diff --git a/src/lj_obj.h b/src/lj_obj.h > index ff22e5f8..06ea0cd0 100644 > --- a/src/lj_obj.h > +++ b/src/lj_obj.h > @@ -611,7 +611,7 @@ typedef struct GCState { > GCSize threshold; /* Memory threshold. */ > uint8_t currentwhite; /* Current white color. */ > uint8_t state; /* GC state. */ > - uint8_t unused0; > + uint8_t nocdatafin; /* No cdata finalizer called. */ > #if LJ_64 > uint8_t lightudnum; /* Number of lightuserdata segments - 1. */ > #else > diff --git a/src/lj_tab.c b/src/lj_tab.c > index c99c6521..1d6a4b7f 100644 > --- a/src/lj_tab.c > +++ b/src/lj_tab.c > @@ -388,6 +388,13 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek) > lj_tab_resize(L, t, asize, hsize2hbits(total)); > } > > +#if LJ_HASFFI > +void lj_tab_rehash(lua_State *L, GCtab *t) > +{ > + rehashtab(L, t, niltv(L)); > +} > +#endif > + > void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) > { > lj_tab_resize(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); > diff --git a/src/lj_tab.h b/src/lj_tab.h > index 8575e7bd..71e34945 100644 > --- a/src/lj_tab.h > +++ b/src/lj_tab.h > @@ -41,6 +41,9 @@ LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); > LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); > LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t); > LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); > +#if LJ_HASFFI > +LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); > +#endif > LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits); > LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); > > diff --git a/src/lj_trace.c b/src/lj_trace.c > index f9b8ff00..0d1d233a 100644 > --- a/src/lj_trace.c > +++ b/src/lj_trace.c > @@ -823,6 +823,18 @@ static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud) > return NULL; > } > > +/* Need to protect lj_gc_step because it may throw. */ > +static TValue *trace_exit_gc_cp(lua_State *L, lua_CFunction dummy, void *unused) > +{ > + /* Always catch error here and don't call error function. */ > + cframe_errfunc(L->cframe) = 0; > + cframe_nres(L->cframe) = -2*LUAI_MAXSTACK*(int)sizeof(TValue); > + lj_gc_step(L); > + UNUSED(dummy); > + UNUSED(unused); > + return NULL; > +} > + > #ifndef LUAJIT_DISABLE_VMEVENT > /* Push all registers from exit state. */ > static void trace_exit_regs(lua_State *L, ExitState *ex) > @@ -918,8 +930,12 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) > } else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) { > /* Just exit to interpreter. */ > } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { > - if (!(G(L)->hookmask & HOOK_GC)) > - lj_gc_step(L); /* Exited because of GC: drive GC forward. */ > + if (!(G(L)->hookmask & HOOK_GC)) { > + /* Exited because of GC: drive GC forward. */ > + errcode = lj_vm_cpcall(L, NULL, NULL, trace_exit_gc_cp); > + if (errcode) > + return -errcode; /* Return negated error code. */ > + } > } else if ((J->flags & JIT_F_ON)) { > trace_hotside(J, pc); > } > diff --git a/test/tarantool-tests/lj-1350-fix-gc-finalizer-pressure.test.lua b/test/tarantool-tests/lj-1350-fix-gc-finalizer-pressure.test.lua > new file mode 100644 > index 00000000..3b2660cd > --- /dev/null > +++ b/test/tarantool-tests/lj-1350-fix-gc-finalizer-pressure.test.lua > @@ -0,0 +1,61 @@ > +local tap = require('tap') > +local ffi = require('ffi') > + > +-- Test file to demonstrate too fast memory growth without > +-- shrinking the finalizer table for cdata at the end of the GC > +-- cycle. > +-- See also:https://github.com/LuaJIT/LuaJIT/issues/1350. > + > +local test = tap.test('lj-1350-fix-gc-finalizer-pressure'):skipcond({ > + -- Tarantool has its own cdata objects. This makes the test > + -- unpredictable. Disable it. > + ['Disable test for Tarantool'] = _TARANTOOL, > + ['Test requires GC64 mode enabled'] = not ffi.abi('gc64'), > +}) > + > +test:plan(1) > + > +-- This stress test shows the memory overconsumption if LuaJIT > +-- does not rehash the table with cdata finalizers at the end of > +-- the GC cycle. Without reshashing of this table, it increases > +-- the estimated amount of memory for the GC. Hence, with the > +-- bigger `estimate`, the threshold before starting the GC cycle > +-- is increased too. This allows allocating more cdata objects and > +-- increasing the size of the finalizer table again. This > +-- increases the memory estimate again and so on. As a result, we > +-- have unlimited memory growth without rehashing of the table for > +-- the cdata-intensive workloads. > + > +local cnt = 0 > +-- Finalizer function for cdata object. > +local function dec() cnt = cnt - 1 end > + > +local function new_obj() > + -- Use an array type to make it possible to set a GC finalizer. > + local obj = ffi.new('uint64_t [1]') > + -- Insert the object into the finalizer table. > + ffi.gc(obj, dec) > + cnt = cnt + 1 > + -- The empirical assertion check. Without rehashing, there are > + -- more cdata objects than the mentioned limit. > + assert(cnt < 10e6, 'too much cdata alive at the moment') > + return obj > +end > + > +-- Reset the GC. > +collectgarbage('collect') > + > +local t = {} > +-- Don't use too huge limit to avoid the test run being too long. > +-- But it is big enough to trigger the assertion above without > +-- cdata finalizer table rehashing at the end of the GC cycle. > +for i = 1, 3e7 do > + table.insert(t, new_obj()) > + -- Reset the table. Now cdata objects are collectable. > + if i % 3.5e6 == 0 then > + t = {} > + end > +end > + > +test:ok(true, 'not too much cdata alive at the moment') > +test:done(true)