From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng1.m.smailru.net (smtpng1.m.smailru.net [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 878DC46970E for ; Wed, 5 Feb 2020 19:24:47 +0300 (MSK) From: Igor Munkin Date: Wed, 5 Feb 2020 19:22:27 +0300 Message-Id: In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH v2 luajit 1/3] gdb: introduce luajit-gdb extension List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org The provided luajit-gdb extenstion contains the following additional commands: * lj-arch -- dumps values of LJ_64 and LJ_GC64 macro definitions * lj-tv -- dumps the type and some GCobj info related to the given TValue * lj-str -- dumps the contents of the given GCstr * lj-tab -- dumps the contents of the given GCtab * lj-stack -- dumps Lua stack of the given lua_State * lj-state -- shows current VM, GC and JIT states * lj-gc -- shows current GC stats Currently extension supports only x64 builds but respects LJ_GC64 value Signed-off-by: Igor Munkin --- src/luajit-gdb.py | 684 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 684 insertions(+) create mode 100644 src/luajit-gdb.py diff --git a/src/luajit-gdb.py b/src/luajit-gdb.py new file mode 100644 index 0000000..77da5e6 --- /dev/null +++ b/src/luajit-gdb.py @@ -0,0 +1,684 @@ +import re +import gdb + +gtype_cache = {} + +def gtype(typestr): + global gtype_cache + if typestr in gtype_cache: + return gtype_cache[typestr] + + m = re.match(r'((?:(?:struct|union) )?\S*)\s*[*]', typestr) + + gtype = gdb.lookup_type(typestr) if m is None \ + else gdb.lookup_type(m.group(1)).pointer() + + gtype_cache[typestr] = gtype + return gtype + +def cast(typestr, val): + return gdb.Value(val).cast(gtype(typestr)) + +def lookup(symbol): + variable, _ = gdb.lookup_symbol(symbol) + return variable.value() if variable else None + +def parse_arg(arg): + if not arg: + return None + + ret = gdb.parse_and_eval(arg) + + if not ret: + raise gdb.GdbError('table argument empty') + + return ret + +def tou64(val): + return cast('uint64_t', val) & 0xFFFFFFFFFFFFFFFF + +def tou32(val): + return cast('uint32_t', val) & 0xFFFFFFFF + +def i2notu64(val): + return ~int(val) & 0xFFFFFFFFFFFFFFFF + +def i2notu32(val): + return ~int(val) & 0xFFFFFFFF + +def strx64(val): + return hex(cast('uint64_t', val) & 0xFFFFFFFFFFFFFFFF) + +# Types {{{ + +LJ_T = { + 'NIL' : i2notu32(0), + 'FALSE' : i2notu32(1), + 'TRUE' : i2notu32(2), + 'LIGHTUD' : i2notu32(3), + 'STR' : i2notu32(4), + 'UPVAL' : i2notu32(5), + 'THREAD' : i2notu32(6), + 'PROTO' : i2notu32(7), + 'FUNC' : i2notu32(8), + 'TRACE' : i2notu32(9), + 'CDATA' : i2notu32(10), + 'TAB' : i2notu32(11), + 'UDATA' : i2notu32(12), + 'NUMX' : i2notu32(13), +} + +def typenames(value): + return { + LJ_T[k]: 'LJ_T' + k for k in LJ_T.keys() + }.get(int(value), 'LJ_TINVALID') + +# }}} + +# Frames {{{ + +FRAME_TYPE = 0x3 +FRAME_P = 0x4 +FRAME_TYPEP = FRAME_TYPE | FRAME_P + +FRAME = { + 'LUA': 0x0, + 'C': 0x1, + 'CONT': 0x2, + 'VARG': 0x3, + 'LUAP': 0x4, + 'CP': 0x5, + 'PCALL': 0x6, + 'PCALLH': 0x7, +} + +def frametypes(ft): + return { + FRAME['LUA'] : 'L', + FRAME['C'] : 'C', + FRAME['CONT'] : 'M', + FRAME['VARG'] : 'V', + }.get(ft, '?') + +def bc_a(ins): + return (ins >> 8) & 0xff + +def frame_ftsz(framelink): + return cast('ptrdiff_t', framelink['ftsz'] if LJ_FR2 \ + else framelink['fr']['tp']['ftsz']) + +def frame_pc(framelink): + return cast('BCIns *', frame_ftsz(framelink)) if LJ_FR2 \ + else mref('BCIns *', framelink['fr']['tp']['pcr']) + +def frame_prevl(framelink): + return framelink - (1 + LJ_FR2 + bc_a(frame_pc(framelink)[-1])) + +def frame_ispcall(framelink): + return (frame_ftsz(framelink) & FRAME['PCALL']) == FRAME['PCALL'] + +def frame_sized(framelink): + return (frame_ftsz(framelink) & ~FRAME_TYPEP) + +def frame_prevd(framelink): + return cast('TValue *', cast('char *', framelink) - frame_sized(framelink)) + +def frame_type(framelink): + return frame_ftsz(framelink) & FRAME_TYPE + +def frame_typep(framelink): + return frame_ftsz(framelink) & FRAME_TYPEP + +def frame_islua(framelink): + return frametypes(int(frame_type(framelink))) == 'L' \ + and int(frame_ftsz(framelink)) > 0 + +def frame_prev(framelink): + return frame_prevl(framelink) if frame_islua(framelink) \ + else frame_prevd(framelink) + +# }}} + +# Const {{{ + +LJ_64 = str(gdb.parse_and_eval('IRT_PTR')) == 'IRT_P64' + +LJ_GC64 = str(gdb.parse_and_eval('IRT_PGC')) == 'IRT_P64' + +LJ_FR2 = LJ_GC64 + +LJ_GCVMASK = ((1 << 47) - 1) + +PADDING = ' ' * len(':' + hex((1 << (47 if LJ_GC64 else 32)) - 1)) + +# }}} + +def itype(o): + return cast('uint32_t', o['it64'] >> 47) if LJ_GC64 else o['it'] + +def mref(typename, obj): + return cast(typename, obj['ptr64'] if LJ_GC64 else obj['ptr32']) + +def gcref(obj): + return cast('GCobj *', obj['gcptr64'] if LJ_GC64 + else cast('uintptr_t', obj['gcptr32'])) + +def gcval(obj): + return cast('GCobj *', obj['gcptr64'] & LJ_GCVMASK if LJ_GC64 + else cast('uintptr_t', obj['gcptr32'])) + +def L(L=None): + # lookup a symbol for the main coroutine considering the host app + for l in (L, *map(lambda l: lookup(l), ( + # LuaJIT main coro (see luajit/src/luajit.c) + 'globalL', + # Tarantool main coro (see tarantool/src/lua/init.h) + 'tarantool_L', + # TODO: Add more + ))): + if l: + return cast('lua_State *', l) + +def G(L): + return mref('global_State *', L['glref']) + +def J(g): + typeGG = gtype('GG_State') + + return cast('jit_State *', int(cast('char *', g)) + - int(typeGG['g'].bitpos / 8) + + int(typeGG['J'].bitpos / 8) + ) + +def vm_state(g): + return { + i2notu64(0): 'INTERP', + i2notu64(1): 'C', + i2notu64(2): 'GC', + i2notu64(3): 'EXIT', + i2notu64(4): 'RECORD', + i2notu64(5): 'OPT', + i2notu64(6): 'ASM', + }.get(int(tou64(g['vmstate'])), 'TRACE') + +def gc_state(g): + return { + 0: 'PAUSE', + 1: 'PROPAGATE', + 2: 'ATOMIC', + 3: 'SWEEPSTRING', + 4: 'SWEEP', + 5: 'FINALIZE', + 6: 'LAST', + }.get(int(g['gc']['state']), 'INVALID') + +def jit_state(g): + return { + 0: 'IDLE', + 0x10: 'ACTIVE', + 0x11: 'RECORD', + 0x12: 'START', + 0x13: 'END', + 0x14: 'ASM', + 0x15: 'ERR', + }.get(int(J(g)['state']), 'INVALID') + +def tvisnumber(o): + return itype(o) <= (0xfffeffff if LJ_64 and not LJ_GC64 else LJ_T['NUMX']) + +def tvislightud(o): + if LJ_64 and not LJ_GC64: + return (cast('int32_t', itype(o)) >> 15) == -2 + else: + return itype(o) == LJ_T['LIGHTUD'] + +def strdata(obj): + # String is printed with pointer to it, thanks to gdb. Just strip it. + return str(cast('char *', cast('GCstr *', obj) + 1))[len(PADDING):] + +def itypemap(o): + if LJ_64 and not LJ_GC64: + return LJ_T['NUMX'] if tvisnumber(o) \ + else LJ_T['LIGHTUD'] if tvislightud(o) \ + else itype(o) + else: + return LJ_T['NUMX'] if tvisnumber(o) else itype(o) + +def funcproto(func): + assert(func['ffid'] == 0) + + return cast('GCproto *', + mref('char *', func['pc']) - gdb.lookup_type('GCproto').sizeof) + +def gclistlen(root): + count = 0 + while(gcref(root)): + count += 1 + root = gcref(root)['gch']['nextgc'] + return count + +# Dumpers {{{ + +def dump_lj_tnil(tv): + return 'nil' + +def dump_lj_tfalse(tv): + return 'false' + +def dump_lj_ttrue(tv): + return 'true' + +def dump_lj_tlightud(tv): + return 'light userdata @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_tstr(tv): + return 'string {body} @ {address}'.format( + body = strdata(gcval(tv['gcr'])), + address = strx64(gcval(tv['gcr'])) + ) + +def dump_lj_tupval(tv): + return 'upvalue @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_tthread(tv): + return 'thread @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_tproto(tv): + return 'proto @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_tfunc(tv): + func = cast('struct GCfuncC *', gcval(tv['gcr'])) + ffid = func['ffid'] + + if ffid == 0: + pt = funcproto(func) + return 'Lua function @ {addr}, {nupvals} upvalues, {chunk}:{line}'.format( + addr = strx64(func), + nupvals = int(func['nupvalues']), + chunk = strdata(cast('GCstr *', gcval(pt['chunkname']))), + line = pt['firstline'] + ) + elif ffid == 1: + return 'C function @ {}'.format(strx64(func['f'])) + else: + return 'fast function #{}'.format(int(ffid)) + +def dump_lj_ttrace(tv): + trace = cast('struct GCtrace *', gcval(tv['gcr'])) + return 'trace {traceno} @ {addr}'.format( + traceno = strx64(trace['traceno']), + addr = strx64(trace) + ) + +def dump_lj_tcdata(tv): + return 'cdata @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_ttab(tv): + table = cast('GCtab *', gcval(tv['gcr'])) + return 'table @ {gcr} (asize: {asize}, hmask: {hmask})'.format( + gcr = strx64(table), + asize = table['asize'], + hmask = strx64(table['hmask']), + ) + +def dump_lj_tudata(tv): + return 'userdata @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_tnumx(tv): + return 'number {}'.format(cast('double', tv['n'])) + +def dump_lj_invalid(tv): + return 'not valid type @ {}'.format(strx64(gcval(tv['gcr']))) + +# }}} + +dumpers = { + 'LJ_TNIL': dump_lj_tnil, + 'LJ_TFALSE': dump_lj_tfalse, + 'LJ_TTRUE': dump_lj_ttrue, + 'LJ_TLIGHTUD': dump_lj_tlightud, + 'LJ_TSTR': dump_lj_tstr, + 'LJ_TUPVAL': dump_lj_tupval, + 'LJ_TTHREAD': dump_lj_tthread, + 'LJ_TPROTO': dump_lj_tproto, + 'LJ_TFUNC': dump_lj_tfunc, + 'LJ_TTRACE': dump_lj_ttrace, + 'LJ_TCDATA': dump_lj_tcdata, + 'LJ_TTAB': dump_lj_ttab, + 'LJ_TUDATA': dump_lj_tudata, + 'LJ_TNUMX': dump_lj_tnumx, +} + +def dump_tvalue(tvalue): + return dumpers.get(typenames(itypemap(tvalue)), dump_lj_invalid)(tvalue) + +def dump_framelink(L, fr): + fr2 = fr + LJ_FR2 + + return '{fr}{padding} [ ] FRAME: [{pp}] delta={d}, {f}\n'.format( + fr = fr, + padding = ':{fr2}'.format(fr2 = fr2) if LJ_FR2 else PADDING, + pp = 'PP' if frame_ispcall(fr2) else '{frname}{p}'.format( + frname = frametypes(int(frame_type(fr2))), + p = 'P' if frame_typep(fr2) & FRAME_P else '' + ), + d = cast('TValue *', fr2) - cast('TValue *', frame_prev(fr2)), + f = dump_lj_tfunc(fr), + ) + +def dump_stack_slot(L, slot, base=None, top=None): + base = base or L['base'] + top = top or L['top'] + + return '{addr}{padding} [ {B}{T}{M}] VALUE: {value}\n'.format( + addr = strx64(slot), + padding = PADDING, + B = 'B' if slot == base else ' ', + T = 'T' if slot == top else ' ', + M = 'M' if slot == mref('TValue *', L['maxstack']) else ' ', + value = dump_tvalue(slot), + ) + +def dump_stack(L, base=None, top=None): + base = base or L['base'] + top = top or L['top'] + maxstack = mref('TValue *', L['maxstack']) + red = 5 + 2 * LJ_FR2 + + dump = '\n'.join([ + '{start}:{end} [ ] {n} slots: Red zone'.format( + start = strx64(maxstack + 1), + end = strx64(maxstack + red), + n = red, + ), + '{maxstack}{padding} [ M]'.format( + maxstack = strx64(maxstack), + padding = PADDING, + ), + '{start}:{end} [ ] {nfreeslots} slots: Free stack slots'.format( + start = strx64(top + 1), + end = strx64(maxstack - 1), + nfreeslots = int((tou64(maxstack) - tou64(top) - 8) >> 3), + ), + '{top}{padding} [ T ]'.format( + top = strx64(top), + padding = PADDING, + ) + ]) + '\n' + + slot = top - 1 + framelink = base - (1 + LJ_FR2) + + while framelink > mref('TValue *', L['stack']): + while slot > framelink + LJ_FR2: + dump += dump_stack_slot(L, slot, base, top) + slot -= 1 + dump += dump_framelink(L, framelink) + framelink = frame_prev(framelink + LJ_FR2) - LJ_FR2 + slot -= 1 + LJ_FR2 + + dump += '{fr}{padding} [S ] FRAME: dummy L'.format( + fr = slot, + padding = ':{nilslot}'.format(nilslot = slot + 1) if LJ_FR2 else PADDING + ) + + return dump + +def dump_gc(g): + gc = g['gc'] + stats = [ '{key}: {value}'.format(key = f, value = gc[f]) for f in ( + 'total', 'threshold', 'debt', 'estimate', 'stepmul', 'pause' + ) ] + + stats += [ 'sweepstr: {sweepstr}/{strmask}'.format( + sweepstr = gc['sweepstr'], + # String hash mask (size of hash table - 1). + strmask = g['strmask'] + 1, + ) ] + + stats += [ '{key}: {number} objects'.format( + key = f, + number = gclistlen(gc[f]), + ) for f in ('root', 'gray', 'grayagain', 'weak') ] + + # TODO: mmudata + + return '\n'.join(map(lambda s: '\t' + s, stats)) + +class LJDumpArch(gdb.Command): + ''' +lj-arch + +The command requires no args and dumps values of LJ_64 and LJ_GC64 +compile-time flags. These values define the sizes of host and GC +pointers respectively. + ''' + + def __init__(self): + super(LJDumpArch, self).__init__( + 'lj-arch', gdb.COMMAND_DATA + ) + + def invoke(self, arg, from_tty): + gdb.write('LJ_64: {LJ_64}, LJ_GC64: {LJ_GC64}\n'.format( + LJ_64 = LJ_64, + LJ_GC64 = LJ_GC64 + )) + +LJDumpArch() + +class LJDumpTValue(gdb.Command): + ''' +lj-tv + +The command recieves a pointer to (TValue address) and dumps +the type and some info related to it. + +* LJ_TNIL: nil +* LJ_TFALSE: false +* LJ_TTRUE: true +* LJ_TLIGHTUD: light userdata @ +* LJ_TSTR: string @ +* LJ_TUPVAL: upvalue @ +* LJ_TTHREAD: thread @ +* LJ_TPROTO: proto @ +* LJ_TFUNC: + : Lua function @ , upvalues, + : C function + : fast function # +* LJ_TTRACE: trace @ +* LJ_TCDATA: cdata @ +* LJ_TTAB: table @ (asize: , hmask: ) +* LJ_TUDATA: userdata @ +* LJ_TNUMX: number + +Whether the type of the given address differs from the listed above, then +error message occurs. + ''' + + def __init__(self): + super(LJDumpTValue, self).__init__( + 'lj-tv', gdb.COMMAND_DATA + ) + + def invoke(self, arg, from_tty): + tv = cast('TValue *', parse_arg(arg)) + gdb.write('{}\n'.format(dump_tvalue(tv))) + +LJDumpTValue() + +class LJDumpString(gdb.Command): + ''' +lj-str + +The command recieves a of the corresponding GCstr object and dumps +the payload, size in bytes and hash. + +*Caveat*: Since Python 2 provides no native Unicode support, the payload +is replaced with the corresponding error when decoding fails. + ''' + + def __init__(self): + super(LJDumpString, self).__init__( + 'lj-str', gdb.COMMAND_DATA + ) + + def invoke(self, arg, from_tty): + string = cast('GCstr *', parse_arg(arg)) + gdb.write("String: {body} [{len} bytes] with hash {hash}\n".format( + body = strdata(string), + hash = strx64(string['hash']), + len = string['len'], + )) + + +LJDumpString() + +class LJDumpTable(gdb.Command): + ''' +lj-tab + +The command recieves a GCtab adress and dumps the table contents: +* Metatable address whether the one is set +* Array part slots: + : []: +* Hash part nodes: + : { } => { }; next = + ''' + + def __init__(self): + super(LJDumpTable, self).__init__( + 'lj-tab', gdb.COMMAND_DATA) + + def invoke(self, arg, from_tty): + t = cast('GCtab *', parse_arg(arg)) + array = mref('TValue *', t['array']) + nodes = mref('struct Node *', t['node']) + mt = gcval(t['metatable']) + capacity = { + 'apart': int(t['asize']), + 'hpart': int(t['hmask'] + 1) if t['hmask'] > 0 else 0 + } + + if mt != 0: + gdb.write('Metatable detected: {}\n'.format(strx64(mt))) + + gdb.write('Array part: {} slots\n'.format(capacity['apart'])) + for i in range(capacity['apart']): + slot = array + i + gdb.write('{ptr}: [{index}]: {value}\n'.format( + ptr = slot, + index = i, + value = dump_tvalue(slot) + )) + + gdb.write('Hash part: {} nodes\n'.format(capacity['hpart'])) + # See hmask comment in lj_obj.h + for i in range(capacity['hpart']): + node = nodes + i + gdb.write('{ptr}: {{ {key} }} => {{ {val} }}; next = {n}\n'.format( + ptr = node, + key = dump_tvalue(node['key']), + val= dump_tvalue(node['val']), + n = mref('struct Node *', node['next']) + )) + +LJDumpTable() + +class LJDumpStack(gdb.Command): + ''' +lj-stack [] + +The command recieves a lua_State address and dumps the given Lua +coroutine guest stack: + + [] + +* : guest stack slot address +* : + - S: Bottom of the stack (the slot L->stack points to) + - B: Base of the current guest frame (the slot L->base points to) + - T: Top of the current guest frame (the slot L->top points to) + - M: Last slot of the stack (the slot L->maxstack points to) +* : see help lj-tv for more info +* : framelink slot differs from the value slot: it contains info + related to the function being executed within this guest frame, its + type and link to the parent guest frame + [] delta=, + - : + + L: VM performs a call as a result of bytecode execution + + C: VM performs a call as a result of lj_vm_call + + M: VM performs a call to a metamethod as a result of bytecode + execution + + V: Variable-length frame for storing arguments of a variadic + function + + CP: Protected C frame + + PP: VM performs a call as a result of executinig pcall or xpcall + +If L is ommited the main coroutine is used. + ''' + + def __init__(self): + super(LJDumpStack, self).__init__( + 'lj-stack', gdb.COMMAND_DATA) + + def invoke(self, arg, from_tty): + gdb.write('{}\n'.format(dump_stack(L(parse_arg(arg))))) + +LJDumpStack() + +class LJState(gdb.Command): + ''' +lj-state +The command requires no args and dumps current VM and GC states +* VM state: +* GC state: +* JIT state: + ''' + + def __init__(self): + super(LJState, self).__init__( + 'lj-state', gdb.COMMAND_DATA) + + def invoke(self, arg, from_tty): + g = G(L(None)) + gdb.write('{}\n'.format('\n'.join( + map(lambda t: '{} state: {}'.format(*t), { + 'VM': vm_state(g), + 'GC': gc_state(g), + 'JIT': jit_state(g), + }.items()) + ))) + +LJState() + +class LJGC(gdb.Command): + ''' +lj-gc + +The command requires no args and dumps current GC stats: +* total: +* threshold: +* debt: +* estimate: +* stepmul: +* pause: +* sweepstr: +* root: +* gray: +* grayagain: +* weak: + ''' + + def __init__(self): + super(LJGC, self).__init__( + 'lj-gc', gdb.COMMAND_DATA) + + def invoke(self, arg, from_tty): + g = G(L(None)) + gdb.write('GC stats: {state}\n{stats}\n'.format( + state = gc_state(g), + stats = dump_gc(g) + )) + +LJGC() -- 2.24.0