From: Mike Pall <mike> (cherry picked from e613105ca92fe25e7bd63031b409faa8c908ac35) Child function inherits parents upvalues. Assume parent function is marked first (all closed upvalues and function are colored to black), and then `debug.setupvalue()`/`lua_setupvalue()` is called for an unmarked child function with inherited upvalues. The barrier is tried to move forward (but not actually move, due to the colors of operands) for a non-marked function (instead marked upvalue). Now black upvalue refers to a white object. Black objects can't refer white objects due to GC invariant, so the invariant is violated. This patch changes a function object to an upvalue for barrier movement. Sergey Kaplun: * added the description and the test for the problem Part of tarantool/tarantool#6548 --- Related issue: https://github.com/tarantool/tarantool/issues/6548 Branch: https://github.com/tarantool/luajit/tree/skaplun/gh-noticket-fix-gc-setupvalue-full-ci Tarantool branch: https://github.com/tarantool/tarantool/tree/skaplun/gh-noticket-fix-gc-setupvalue-full-ci Note: CI is red. But this job is red on master too... src/lj_api.c | 8 ++- src/lj_debug.c | 7 ++- src/lj_debug.h | 3 +- .../fix-gc-setupvalue.test.lua | 60 +++++++++++++++++++ test/tarantool-tests/utils.lua | 32 ++++++++++ 5 files changed, 104 insertions(+), 6 deletions(-) create mode 100644 test/tarantool-tests/fix-gc-setupvalue.test.lua diff --git a/src/lj_api.c b/src/lj_api.c index c7a0b327..ba38881f 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -943,7 +943,8 @@ LUA_API int lua_next(lua_State *L, int idx) LUA_API const char *lua_getupvalue(lua_State *L, int idx, int n) { TValue *val; - const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val); + GCobj *o; + const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val, &o); if (name) { copyTV(L, L->top, val); incr_top(L); @@ -1129,13 +1130,14 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) { cTValue *f = index2adr(L, idx); TValue *val; + GCobj *o; const char *name; api_checknelems(L, 1); - name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val); + name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o); if (name) { L->top--; copyTV(L, val, L->top); - lj_gc_barrier(L, funcV(f), L->top); + lj_gc_barrier(L, o, L->top); } return name; } diff --git a/src/lj_debug.c b/src/lj_debug.c index bb9ab288..8eb5983b 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -221,19 +221,22 @@ const char *lj_debug_uvname(GCproto *pt, uint32_t idx) } /* Get name and value of upvalue. */ -const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp) +const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp, GCobj **op) { if (tvisfunc(o)) { GCfunc *fn = funcV(o); if (isluafunc(fn)) { GCproto *pt = funcproto(fn); if (idx < pt->sizeuv) { - *tvp = uvval(&gcref(fn->l.uvptr[idx])->uv); + GCobj *uvo = gcref(fn->l.uvptr[idx]); + *tvp = uvval(&uvo->uv); + *op = uvo; return lj_debug_uvname(pt, idx); } } else { if (idx < fn->c.nupvalues) { *tvp = &fn->c.upvalue[idx]; + *op = obj2gco(fn); return ""; } } diff --git a/src/lj_debug.h b/src/lj_debug.h index a157d284..e037728a 100644 --- a/src/lj_debug.h +++ b/src/lj_debug.h @@ -29,7 +29,8 @@ typedef struct lj_Debug { LJ_FUNC cTValue *lj_debug_frame(lua_State *L, int level, int *size); LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc); LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx); -LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); +LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp, + GCobj **op); LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, BCReg slot, const char **name); LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame, diff --git a/test/tarantool-tests/fix-gc-setupvalue.test.lua b/test/tarantool-tests/fix-gc-setupvalue.test.lua new file mode 100644 index 00000000..8d83ee6e --- /dev/null +++ b/test/tarantool-tests/fix-gc-setupvalue.test.lua @@ -0,0 +1,60 @@ +local tap = require('tap') +local utils = require('utils') + +local test = tap.test('fix-gc-setupvalue') +test:plan(1) + +-- Test file to demonstrate LuaJIT GC invariant violation +-- for inherited upvalues. + +-- The bug is about the situation, when black upvalue refers to +-- a white object. This happens due to parent function is marked +-- first (all closed upvalues and function are colored to black), +-- and then `debug.setupvalue()` is called for a child function +-- with inherited upvalues. The barrier is move forward for a +-- non-marked function (instead upvalue) and invariant is +-- violated. + +-- Create to functions with closed upvalue. +do + local uv = 1 + local function f_parent() + local function f() + return uv + 1 + end + _G.f = f + return uv + 1 + end + -- Set up `f()`. + f_parent() + _G.f_parent = f_parent +end + +-- Set GC on start. +collectgarbage() +-- Set minimally possible stepmul. +-- 1024/10 * stepmul == 10 < sizeof(GCfuncL), so it guarantees, +-- that 2 functions will be marked in different time. +local oldstepmul = collectgarbage('setstepmul', 1) + +-- `f_parent()` function is marked before `f()`, so wait until +-- it becomes black and proceed with the test. +while not utils.gc_isblack(_G.f_parent) do + collectgarbage('step') +end + +-- Set created string (white) for the upvalue. +debug.setupvalue(_G.f, 1, '4'..'1') +_G.f = nil + +-- Lets finish it faster. +collectgarbage('setstepmul', oldstepmul) +-- Finish GC cycle to be sure that the object is collected. +while not collectgarbage('step') do end + +-- Generate some garbage to reuse freed memory. +for i = 1, 1e2 do local _ = {string.rep('0', i)} end + +test:ok(_G.f_parent() == 42, 'correct set up of upvalue') + +os.exit(test:check() and 0 or 1) diff --git a/test/tarantool-tests/utils.lua b/test/tarantool-tests/utils.lua index 5bd42b30..68781f28 100644 --- a/test/tarantool-tests/utils.lua +++ b/test/tarantool-tests/utils.lua @@ -3,11 +3,43 @@ local M = {} local ffi = require('ffi') local tap = require('tap') local bc = require('jit.bc') +local bit = require('bit') + +local GCRef = ffi.abi('gc64') and 'uint64_t' or 'uint32_t' +local LJ_GC_BLACK = 0x04 ffi.cdef([[ int setenv(const char *name, const char *value, int overwrite); + typedef struct { + ]]..GCRef..[[ nextgc; + uint8_t marked; + uint8_t gct; + /* Need this fields for correct alignment and sizeof. */ + uint8_t misc1; + uint8_t misc2; + } GCHeader; ]]) +function M.gc_isblack(obj) + local objtype = type(obj) + assert(objtype ~= 'number' and objtype ~= 'boolean', + 'can proceed only with GC objects') + local address + if objtype == 'string' then + -- XXX: get strdata first and go back to GCHeader. + address = ffi.cast('char *', obj) + address = address - (ffi.sizeof('GCHeader') + 8) + else + -- XXX: FFI ABI forbids to cast functions objects + -- to non-functional pointers, but we can get their address + -- via tostring. + local str_address = tostring(obj):gsub(objtype .. ': ', '') + address = tonumber(str_address) + end + local marked = ffi.cast('GCHeader *', address).marked + return bit.band(marked, LJ_GC_BLACK) == LJ_GC_BLACK +end + local function luacmd(args) -- arg[-1] is guaranteed to be not nil. local idx = -2 -- 2.34.1
Hi! Thanks for the patch! LGTM with minor updates to the description. Sergos > On 15 Dec 2021, at 13:17, Sergey Kaplun <skaplun@tarantool.org> wrote: > > From: Mike Pall <mike> > > (cherry picked from e613105ca92fe25e7bd63031b409faa8c908ac35) > > Child function inherits parents upvalues. Assume parent function is parents’ upvalues. > marked first (all closed upvalues and function are colored to black), I believe coloring should be described, or at least GC mentioned > and then `debug.setupvalue()`/`lua_setupvalue()` is called for an > unmarked child function with inherited upvalues. The barrier is tried to Not quite clear which way it tries to move - mark the upvalue white? > move forward (but not actually move, due to the colors of operands) for > a non-marked function (instead marked upvalue). Now black upvalue refers > to a white object. Black objects can't refer white objects due to GC > invariant, so the invariant is violated. > > This patch changes a function object to an upvalue for barrier movement. > > Sergey Kaplun: > * added the description and the test for the problem > > Part of tarantool/tarantool#6548 > --- > Related issue: https://github.com/tarantool/tarantool/issues/6548 > Branch: https://github.com/tarantool/luajit/tree/skaplun/gh-noticket-fix-gc-setupvalue-full-ci > Tarantool branch: https://github.com/tarantool/tarantool/tree/skaplun/gh-noticket-fix-gc-setupvalue-full-ci > > Note: CI is red. But this job is red on master too... > > src/lj_api.c | 8 ++- > src/lj_debug.c | 7 ++- > src/lj_debug.h | 3 +- > .../fix-gc-setupvalue.test.lua | 60 +++++++++++++++++++ > test/tarantool-tests/utils.lua | 32 ++++++++++ > 5 files changed, 104 insertions(+), 6 deletions(-) > create mode 100644 test/tarantool-tests/fix-gc-setupvalue.test.lua > > diff --git a/src/lj_api.c b/src/lj_api.c > index c7a0b327..ba38881f 100644 > --- a/src/lj_api.c > +++ b/src/lj_api.c > @@ -943,7 +943,8 @@ LUA_API int lua_next(lua_State *L, int idx) > LUA_API const char *lua_getupvalue(lua_State *L, int idx, int n) > { > TValue *val; > - const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val); > + GCobj *o; > + const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val, &o); > if (name) { > copyTV(L, L->top, val); > incr_top(L); > @@ -1129,13 +1130,14 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) > { > cTValue *f = index2adr(L, idx); > TValue *val; > + GCobj *o; > const char *name; > api_checknelems(L, 1); > - name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val); > + name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o); > if (name) { > L->top--; > copyTV(L, val, L->top); > - lj_gc_barrier(L, funcV(f), L->top); > + lj_gc_barrier(L, o, L->top); > } > return name; > } > diff --git a/src/lj_debug.c b/src/lj_debug.c > index bb9ab288..8eb5983b 100644 > --- a/src/lj_debug.c > +++ b/src/lj_debug.c > @@ -221,19 +221,22 @@ const char *lj_debug_uvname(GCproto *pt, uint32_t idx) > } > > /* Get name and value of upvalue. */ > -const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp) > +const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp, GCobj **op) > { > if (tvisfunc(o)) { > GCfunc *fn = funcV(o); > if (isluafunc(fn)) { > GCproto *pt = funcproto(fn); > if (idx < pt->sizeuv) { > - *tvp = uvval(&gcref(fn->l.uvptr[idx])->uv); > + GCobj *uvo = gcref(fn->l.uvptr[idx]); > + *tvp = uvval(&uvo->uv); > + *op = uvo; > return lj_debug_uvname(pt, idx); > } > } else { > if (idx < fn->c.nupvalues) { > *tvp = &fn->c.upvalue[idx]; > + *op = obj2gco(fn); > return ""; > } > } > diff --git a/src/lj_debug.h b/src/lj_debug.h > index a157d284..e037728a 100644 > --- a/src/lj_debug.h > +++ b/src/lj_debug.h > @@ -29,7 +29,8 @@ typedef struct lj_Debug { > LJ_FUNC cTValue *lj_debug_frame(lua_State *L, int level, int *size); > LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc); > LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx); > -LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); > +LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp, > + GCobj **op); > LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, > BCReg slot, const char **name); > LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame, > diff --git a/test/tarantool-tests/fix-gc-setupvalue.test.lua b/test/tarantool-tests/fix-gc-setupvalue.test.lua > new file mode 100644 > index 00000000..8d83ee6e > --- /dev/null > +++ b/test/tarantool-tests/fix-gc-setupvalue.test.lua > @@ -0,0 +1,60 @@ > +local tap = require('tap') > +local utils = require('utils') > + > +local test = tap.test('fix-gc-setupvalue') > +test:plan(1) > + > +-- Test file to demonstrate LuaJIT GC invariant violation > +-- for inherited upvalues. > + > +-- The bug is about the situation, when black upvalue refers to > +-- a white object. This happens due to parent function is marked > +-- first (all closed upvalues and function are colored to black), > +-- and then `debug.setupvalue()` is called for a child function > +-- with inherited upvalues. The barrier is move forward for a > +-- non-marked function (instead upvalue) and invariant is > +-- violated. > + > +-- Create to functions with closed upvalue. > +do > + local uv = 1 > + local function f_parent() > + local function f() > + return uv + 1 > + end > + _G.f = f > + return uv + 1 > + end > + -- Set up `f()`. > + f_parent() > + _G.f_parent = f_parent > +end > + > +-- Set GC on start. > +collectgarbage() > +-- Set minimally possible stepmul. > +-- 1024/10 * stepmul == 10 < sizeof(GCfuncL), so it guarantees, > +-- that 2 functions will be marked in different time. > +local oldstepmul = collectgarbage('setstepmul', 1) > + > +-- `f_parent()` function is marked before `f()`, so wait until > +-- it becomes black and proceed with the test. > +while not utils.gc_isblack(_G.f_parent) do > + collectgarbage('step') > +end > + > +-- Set created string (white) for the upvalue. > +debug.setupvalue(_G.f, 1, '4'..'1') > +_G.f = nil > + > +-- Lets finish it faster. > +collectgarbage('setstepmul', oldstepmul) > +-- Finish GC cycle to be sure that the object is collected. > +while not collectgarbage('step') do end > + If I’m not missing the point, the below will never be executed since invariant is violated. > +-- Generate some garbage to reuse freed memory. > +for i = 1, 1e2 do local _ = {string.rep('0', i)} end > + > +test:ok(_G.f_parent() == 42, 'correct set up of upvalue') > + > +os.exit(test:check() and 0 or 1) > diff --git a/test/tarantool-tests/utils.lua b/test/tarantool-tests/utils.lua > index 5bd42b30..68781f28 100644 > --- a/test/tarantool-tests/utils.lua > +++ b/test/tarantool-tests/utils.lua > @@ -3,11 +3,43 @@ local M = {} > local ffi = require('ffi') > local tap = require('tap') > local bc = require('jit.bc') > +local bit = require('bit') > + > +local GCRef = ffi.abi('gc64') and 'uint64_t' or 'uint32_t' > +local LJ_GC_BLACK = 0x04 > > ffi.cdef([[ > int setenv(const char *name, const char *value, int overwrite); > + typedef struct { > + ]]..GCRef..[[ nextgc; > + uint8_t marked; > + uint8_t gct; > + /* Need this fields for correct alignment and sizeof. */ > + uint8_t misc1; > + uint8_t misc2; > + } GCHeader; > ]]) > > +function M.gc_isblack(obj) > + local objtype = type(obj) > + assert(objtype ~= 'number' and objtype ~= 'boolean', > + 'can proceed only with GC objects') > + local address > + if objtype == 'string' then > + -- XXX: get strdata first and go back to GCHeader. > + address = ffi.cast('char *', obj) > + address = address - (ffi.sizeof('GCHeader') + 8) > + else > + -- XXX: FFI ABI forbids to cast functions objects > + -- to non-functional pointers, but we can get their address > + -- via tostring. > + local str_address = tostring(obj):gsub(objtype .. ': ', '') > + address = tonumber(str_address) > + end > + local marked = ffi.cast('GCHeader *', address).marked > + return bit.band(marked, LJ_GC_BLACK) == LJ_GC_BLACK > +end > + > local function luacmd(args) > -- arg[-1] is guaranteed to be not nil. > local idx = -2 > -- > 2.34.1 >
Hi, Sergos! Thanks for the review. I updated commit message considerring your comments. The branch is force-pushed. | Fix write barrier for lua_setupvalue() and debug.setupvalue(). | | (cherry picked from e613105ca92fe25e7bd63031b409faa8c908ac35) | | At the first GC state (`GCSpause`) all objects should be marked with the | current white (`LJ_GC_WHITE0` or `LJ_GC_WHITE1`). The main lua_State, | globals table, registry, and metatables are marked gray and added to the | gray list. The state now changes to `GCSpropagate`. | | At `GCSpropagate` each object in the gray list is removed and marked | black (`LJ_GC_BLACK`), then any white (type 0 or 1) objects it references are marked | gray and added to the gray list. Once the gray list is empty the current | white is switched to the other white. All objects marked with the old | white type are now dead objects. | | Child function inherits parents' upvalues. Assume parent function is | marked first (all closed upvalues and function are colored to black), | and then `debug.setupvalue()`/`lua_setupvalue()` is called for an | unmarked child function with inherited upvalues. The barrier is tried to | move forward by marking object gray (but not actually move, due to the | colors of operands) for a non-marked function (instead marked upvalue). | Now black upvalue refers to a white object. Black objects can't refer | white objects due to GC invariant, so the invariant is violated. | | This patch changes a function object to an upvalue for barrier movement. | | Sergey Kaplun: | * added the description and the test for the problem | | Part of tarantool/tarantool#6548 On 06.06.22, sergos wrote: > Hi! > > Thanks for the patch! > > LGTM with minor updates to the description. > > Sergos > > > On 15 Dec 2021, at 13:17, Sergey Kaplun <skaplun@tarantool.org> wrote: > > > > From: Mike Pall <mike> > > > > (cherry picked from e613105ca92fe25e7bd63031b409faa8c908ac35) > > > > Child function inherits parents upvalues. Assume parent function is > > parents’ upvalues. > > > marked first (all closed upvalues and function are colored to black), > > I believe coloring should be described, or at least GC mentioned > > > and then `debug.setupvalue()`/`lua_setupvalue()` is called for an > > unmarked child function with inherited upvalues. The barrier is tried to > > Not quite clear which way it tries to move - mark the upvalue white? > > > move forward (but not actually move, due to the colors of operands) for > > a non-marked function (instead marked upvalue). Now black upvalue refers > > to a white object. Black objects can't refer white objects due to GC > > invariant, so the invariant is violated. > > > > This patch changes a function object to an upvalue for barrier movement. > > > > Sergey Kaplun: > > * added the description and the test for the problem > > > > Part of tarantool/tarantool#6548 > > --- > > Related issue: https://github.com/tarantool/tarantool/issues/6548 > > Branch: https://github.com/tarantool/luajit/tree/skaplun/gh-noticket-fix-gc-setupvalue-full-ci > > Tarantool branch: https://github.com/tarantool/tarantool/tree/skaplun/gh-noticket-fix-gc-setupvalue-full-ci > > <snipped> > > diff --git a/test/tarantool-tests/fix-gc-setupvalue.test.lua b/test/tarantool-tests/fix-gc-setupvalue.test.lua > > new file mode 100644 > > index 00000000..8d83ee6e > > --- /dev/null > > +++ b/test/tarantool-tests/fix-gc-setupvalue.test.lua > > @@ -0,0 +1,60 @@ > > +local tap = require('tap') > > +local utils = require('utils') > > + > > +local test = tap.test('fix-gc-setupvalue') > > +test:plan(1) > > + > > +-- Test file to demonstrate LuaJIT GC invariant violation > > +-- for inherited upvalues. > > + > > +-- The bug is about the situation, when black upvalue refers to > > +-- a white object. This happens due to parent function is marked > > +-- first (all closed upvalues and function are colored to black), > > +-- and then `debug.setupvalue()` is called for a child function > > +-- with inherited upvalues. The barrier is move forward for a > > +-- non-marked function (instead upvalue) and invariant is > > +-- violated. > > + <snipped> > > +-- Set created string (white) for the upvalue. > > +debug.setupvalue(_G.f, 1, '4'..'1') > > +_G.f = nil > > + > > +-- Lets finish it faster. > > +collectgarbage('setstepmul', oldstepmul) > > +-- Finish GC cycle to be sure that the object is collected. > > +while not collectgarbage('step') do end > > + > > If I’m not missing the point, the below will never be executed since > invariant is violated. GC invariant is violated, but there is no defined behaviour in this case (the object may be remarked later and assertion isn't failed). So the simplest way is to create some other objects to reuse memory pointed by dead object. > > > +-- Generate some garbage to reuse freed memory. > > +for i = 1, 1e2 do local _ = {string.rep('0', i)} end > > + > > +test:ok(_G.f_parent() == 42, 'correct set up of upvalue') > > + > > +os.exit(test:check() and 0 or 1) > > diff --git a/test/tarantool-tests/utils.lua b/test/tarantool-tests/utils.lua > > index 5bd42b30..68781f28 100644 > > --- a/test/tarantool-tests/utils.lua > > +++ b/test/tarantool-tests/utils.lua <snipped> > > -- > > 2.34.1 > > > -- Best regards, Sergey Kaplun
Sergey, Thanks for the patch! LGTM, considering all fixes after Sergos review. -- Best regards, IM
Sergey, I've checked the patch into all long-term branches in tarantool/luajit and bumped a new version in master, 2.10 and 1.10. On 15.12.21, Sergey Kaplun wrote: > From: Mike Pall <mike> > > (cherry picked from e613105ca92fe25e7bd63031b409faa8c908ac35) > > Child function inherits parents upvalues. Assume parent function is > marked first (all closed upvalues and function are colored to black), > and then `debug.setupvalue()`/`lua_setupvalue()` is called for an > unmarked child function with inherited upvalues. The barrier is tried to > move forward (but not actually move, due to the colors of operands) for > a non-marked function (instead marked upvalue). Now black upvalue refers > to a white object. Black objects can't refer white objects due to GC > invariant, so the invariant is violated. > > This patch changes a function object to an upvalue for barrier movement. > > Sergey Kaplun: > * added the description and the test for the problem > > Part of tarantool/tarantool#6548 > --- > Related issue: https://github.com/tarantool/tarantool/issues/6548 > Branch: https://github.com/tarantool/luajit/tree/skaplun/gh-noticket-fix-gc-setupvalue-full-ci > Tarantool branch: https://github.com/tarantool/tarantool/tree/skaplun/gh-noticket-fix-gc-setupvalue-full-ci > > Note: CI is red. But this job is red on master too... > > src/lj_api.c | 8 ++- > src/lj_debug.c | 7 ++- > src/lj_debug.h | 3 +- > .../fix-gc-setupvalue.test.lua | 60 +++++++++++++++++++ > test/tarantool-tests/utils.lua | 32 ++++++++++ > 5 files changed, 104 insertions(+), 6 deletions(-) > create mode 100644 test/tarantool-tests/fix-gc-setupvalue.test.lua > <snipped> > -- > 2.34.1 > -- Best regards, IM