From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng1.m.smailru.net (smtpng1.m.smailru.net [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 5CEF54696C3 for ; Mon, 27 Jan 2020 23:43:25 +0300 (MSK) From: Igor Munkin Date: Mon, 27 Jan 2020 23:41:05 +0300 Message-Id: <07e14d23b884536fa16cbbc0df8977db1e8419d7.1580157324.git.imun@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit 1/4] gdb: introduce luajit-gdb extension List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org The provided luajit-gdb extenstion contains the following additional commands: * lj-arch -- dumps values of LJ_64 and LJ_GC64 macro definitions * lj-tv -- dumps the type and some GCobj info related to the given TValue * lj-str -- dumps the contents of the given GCstr * lj-tab -- dumps the contents of the given GCtab * lj-stack -- dumps Lua stack of the given lua_State * lj-state -- show current VM and GC states Currently extension supports only x64 builds but respects LJ_GC64 value Signed-off-by: Igor Munkin --- src/luajit-gdb.py | 511 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 511 insertions(+) create mode 100644 src/luajit-gdb.py diff --git a/src/luajit-gdb.py b/src/luajit-gdb.py new file mode 100644 index 0000000..633c56a --- /dev/null +++ b/src/luajit-gdb.py @@ -0,0 +1,511 @@ +import re +import gdb + +gtype_cache = {} + +def gtype(typestr): + global gtype_cache + if typestr in gtype_cache: + return gtype_cache[typestr] + + m = re.match(r'((?:(?:struct|union) )?\S*)\s*[*]', typestr) + + gtype = gdb.lookup_type(typestr) if m is None \ + else gdb.lookup_type(m.group(1)).pointer() + + gtype_cache[typestr] = gtype + return gtype + +def cast(typestr, val): + return gdb.Value(val).cast(gtype(typestr)) + +def tou64(val): + return cast('uint64_t', val) & 0xFFFFFFFFFFFFFFFF + +def tou32(val): + return cast('uint32_t', val) & 0xFFFFFFFF + +def i2notu64(val): + return ~int(val) & 0xFFFFFFFFFFFFFFFF + +def i2notu32(val): + return ~int(val) & 0xFFFFFFFF + +def strx64(val): + return hex(cast('uint64_t', val) & 0xFFFFFFFFFFFFFFFF) + +# Types {{{ + +LJ_T = { + 'NIL' : i2notu32(0), + 'FALSE' : i2notu32(1), + 'TRUE' : i2notu32(2), + 'LIGHTUD' : i2notu32(3), + 'STR' : i2notu32(4), + 'UPVAL' : i2notu32(5), + 'THREAD' : i2notu32(6), + 'PROTO' : i2notu32(7), + 'FUNC' : i2notu32(8), + 'TRACE' : i2notu32(9), + 'CDATA' : i2notu32(10), + 'TAB' : i2notu32(11), + 'UDATA' : i2notu32(12), + 'NUMX' : i2notu32(13), +} + +def typenames(value): + return { + LJ_T[k]: 'LJ_T' + k for k in LJ_T.keys() + }.get(int(value), 'LJ_TINVALID') + +# }}} + +# Frames {{{ + +FRAME_TYPE = 0x3 +FRAME_P = 0x4 +FRAME_TYPEP = FRAME_TYPE | FRAME_P + +FRAME = { + 'LUA': 0x0, + 'C': 0x1, + 'CONT': 0x2, + 'VARG': 0x3, + 'LUAP': 0x4, + 'CP': 0x5, + 'PCALL': 0x6, + 'PCALLH': 0x7, +} + +def frametypes(ft): + return { + FRAME['LUA'] : 'L', + FRAME['C'] : 'C', + FRAME['CONT'] : 'M', + FRAME['VARG'] : 'V', + }.get(ft, '?') + +def bc_a(ins): + return (ins >> 8) & 0xff + +def frame_ftsz(framelink): + return cast('ptrdiff_t', framelink['ftsz'] if LJ_FR2 \ + else framelink['fr']['tp']['ftsz']) + +def frame_pc(framelink): + return cast('BCIns *', frame_ftsz(framelink)) if LJ_FR2 \ + else mref('BCIns *', framelink['fr']['tp']['pcr']) + +def frame_prevl(framelink): + return framelink - (1 + LJ_FR2 + bc_a(frame_pc(framelink)[-1])) + +def frame_ispcall(framelink): + return (frame_ftsz(framelink) & FRAME['PCALL']) == FRAME['PCALL'] + +def frame_sized(framelink): + return (frame_ftsz(framelink) & ~FRAME_TYPEP) + +def frame_prevd(framelink): + return cast('TValue *', cast('char *', framelink) - frame_sized(framelink)) + +def frame_type(framelink): + return frame_ftsz(framelink) & FRAME_TYPE + +def frame_typep(framelink): + return frame_ftsz(framelink) & FRAME_TYPEP + +def frame_islua(framelink): + return frametypes(int(frame_type(framelink))) == 'L' \ + and int(frame_ftsz(framelink)) > 0 + +def frame_prev(framelink): + return frame_prevl(framelink) if frame_islua(framelink) \ + else frame_prevd(framelink) + +# }}} + +# Const {{{ + +LJ_64 = str(gdb.parse_and_eval('IRT_PTR')) == 'IRT_P64' + +LJ_GC64 = str(gdb.parse_and_eval('IRT_PGC')) == 'IRT_P64' + +LJ_FR2 = LJ_GC64 + +LJ_GCVMASK = ((1 << 47) - 1) + +PADDING = ' ' * len(':' + hex((1 << (47 if LJ_GC64 else 32)) - 1)) + +# }}} + +def itype(o): + return cast('uint32_t', o['it64'] >> 47) if LJ_GC64 else o['it'] + +def mref(typename, obj): + return cast(typename, obj['ptr64'] if LJ_GC64 else obj['ptr32']) + +def gcref(obj): + return cast('GCobj *', obj['gcptr64'] if LJ_GC64 + else cast('uintptr_t', obj['gcptr32'])) + +def gcval(obj): + return cast('GCobj *', obj['gcptr64'] & LJ_GCVMASK if LJ_GC64 + else cast('uintptr_t', obj['gcptr32'])) + +def G(L): + return mref('global_State *', L['glref']) + +def vm_state(L): + return { + i2notu64(0): 'INTERP', + i2notu64(1): 'C', + i2notu64(2): 'GC', + i2notu64(3): 'EXIT', + i2notu64(4): 'RECORD', + i2notu64(5): 'OPT', + i2notu64(6): 'ASM', + }.get(int(tou64(G(L)['vmstate'])), 'TRACE') + +def gc_state(L): + return { + 0: 'PAUSE', + 1: 'PROPAGATE', + 2: 'ATOMIC', + 3: 'SWEEPSTRING', + 4: 'SWEEP', + 5: 'FINALIZE', + 6: 'LAST', + }.get(int(G(L)['gc']['state']), 'INVALID') + +def tvisnumber(o): + return itype(o) <= (0xfffeffff if LJ_64 and not LJ_GC64 else LJ_T['NUMX']) + +def tvislightud(o): + if LJ_64 and not LJ_GC64: + return (cast('int32_t', itype(o)) >> 15) == -2 + else: + return itype(o) == LJ_T['LIGHTUD'] + +def strdata(obj): + # String is printed with pointer to it, thanks to gdb. Just strip it. + return str(cast('char *', cast('GCstr *', obj) + 1))[len(PADDING):] + +def itypemap(o): + if LJ_64 and not LJ_GC64: + return LJ_T['NUMX'] if tvisnumber(o) \ + else LJ_T['LIGHTUD'] if tvislightud(o) \ + else itype(o) + else: + return LJ_T['NUMX'] if tvisnumber(o) else itype(o) + +def funcproto(func): + assert(func['ffid'] == 0) + + return cast('GCproto *', + mref('char *', func['pc']) - gdb.lookup_type('GCproto').sizeof) + +# Dumpers {{{ + +def dump_lj_tnil(tv): + return 'nil' + +def dump_lj_tfalse(tv): + return 'false' + +def dump_lj_ttrue(tv): + return 'true' + +def dump_lj_tlightud(tv): + return 'light userdata @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_tstr(tv): + return 'string {body} @ {address}'.format( + body = strdata(gcval(tv['gcr'])), + address = strx64(gcval(tv['gcr'])) + ) + +def dump_lj_tupval(tv): + return 'upvalue @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_tthread(tv): + return 'thread @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_tproto(tv): + return 'proto @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_tfunc(tv): + func = cast('struct GCfuncC *', gcval(tv['gcr'])) + ffid = func['ffid'] + + if ffid == 0: + pt = funcproto(func) + # String will be printed with pointer to that string, thanks to gdb. + return 'Lua function @ {addr}, {nupvals} upvalues, {chunk}:{line}'.format( + addr = strx64(func), + nupvals = int(func['nupvalues']), + chunk = strdata(cast('GCstr *', gcval(pt['chunkname']))), + line = pt['firstline'] + ) + elif ffid == 1: + return 'C function @ {}'.format(strx64(func['f'])) + else: + return 'fast function #{}'.format(int(ffid)) + +def dump_lj_ttrace(tv): + trace = cast('struct GCtrace *', gcval(tv['gcr'])) + return 'trace {traceno} @ {addr}'.format( + traceno = strx64(trace['traceno']), + addr = strx64(trace) + ) + +def dump_lj_tcdata(tv): + return 'cdata @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_ttab(tv): + return 'table @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_tudata(tv): + return 'userdata @ {}'.format(strx64(gcval(tv['gcr']))) + +def dump_lj_tnumx(tv): + return 'number {}'.format(cast('double', tv['n'])) + +def dump_lj_invalid(tv): + return 'not valid type @ {}'.format(strx64(gcval(tv['gcr']))) + +# }}} + +dumpers = { + 'LJ_TNIL': dump_lj_tnil, + 'LJ_TFALSE': dump_lj_tfalse, + 'LJ_TTRUE': dump_lj_ttrue, + 'LJ_TLIGHTUD': dump_lj_tlightud, + 'LJ_TSTR': dump_lj_tstr, + 'LJ_TUPVAL': dump_lj_tupval, + 'LJ_TTHREAD': dump_lj_tthread, + 'LJ_TPROTO': dump_lj_tproto, + 'LJ_TFUNC': dump_lj_tfunc, + 'LJ_TTRACE': dump_lj_ttrace, + 'LJ_TCDATA': dump_lj_tcdata, + 'LJ_TTAB': dump_lj_ttab, + 'LJ_TUDATA': dump_lj_tudata, + 'LJ_TNUMX': dump_lj_tnumx, +} + +def dump_tvalue(tvalue): + return dumpers.get(typenames(itypemap(tvalue)), dump_lj_invalid)(tvalue) + +def dump_framelink(L, fr): + fr2 = fr + LJ_FR2 + + return '{fr}{padding} [ ] FRAME: [{pp}] delta={d}, {f}\n'.format( + fr = fr, + padding = ':{fr2}'.format(fr2 = fr2) if LJ_FR2 else PADDING, + pp = 'PP' if frame_ispcall(fr2) else '{frname}{p}'.format( + frname = frametypes(int(frame_type(fr2))), + p = 'P' if frame_typep(fr2) & FRAME_P else '' + ), + d = cast('TValue *', fr2) - cast('TValue *', frame_prev(fr2)), + f = dump_lj_tfunc(fr), + ) + +def dump_stack_slot(L, slot, base=None, top=None): + base = base or L['base'] + top = top or L['top'] + + return '{addr}{padding} [ {B}{T}{M}] VALUE: {value}\n'.format( + addr = strx64(slot), + padding = PADDING, + B = 'B' if slot == base else ' ', + T = 'T' if slot == top else ' ', + M = 'M' if slot == mref('TValue *', L['maxstack']) else ' ', + value = dump_tvalue(slot), + ) + +def dump_stack(L, base=None, top=None): + base = base or L['base'] + top = top or L['top'] + maxstack = mref('TValue *', L['maxstack']) + red = 5 + 2 * LJ_FR2 + + dump = '\n'.join([ + '{start}:{end} [ ] {n} slots: Red zone'.format( + start = strx64(maxstack + 1), + end = strx64(maxstack + red), + n = red, + ), + '{maxstack}{padding} [ M]'.format( + maxstack = strx64(maxstack), + padding = PADDING, + ), + '{start}:{end} [ ] {nfreeslots} slots: Free stack slots'.format( + start = strx64(top + 1), + end = strx64(maxstack - 1), + nfreeslots = int((tou64(maxstack) - tou64(top) - 8) >> 3), + ), + '{top}{padding} [ T ]'.format( + top = strx64(top), + padding = PADDING, + ) + ]) + '\n' + + slot = top - 1 + framelink = base - (1 + LJ_FR2) + + while framelink > mref('TValue *', L['stack']): + while slot > framelink + LJ_FR2: + dump += dump_stack_slot(L, slot, base, top) + slot -= 1 + dump += dump_framelink(L, framelink) + framelink = frame_prev(framelink + LJ_FR2) - LJ_FR2 + slot -= 1 + LJ_FR2 + + dump += '{fr}{padding} [S ] FRAME: dummy L'.format( + fr = slot, + padding = ':{nilslot}'.format(nilslot = slot + 1) if LJ_FR2 else PADDING + ) + + return dump + +def parse_arg(arg): + argv = gdb.string_to_argv(arg) + + if len(argv) == 0: + raise gdb.GdbError("Wrong number of arguments." + "Use 'help ' to get more info.") + + ret = gdb.parse_and_eval(arg) + + if not ret: + raise gdb.GdbError('table argument empty') + + return ret + +class LJDumpArch(gdb.Command): + ''' +lj-arch +Dumps compile-time information + ''' + + def __init__(self): + super(LJDumpArch, self).__init__( + 'lj-arch', gdb.COMMAND_DATA + ) + + def invoke(self, arg, from_tty): + gdb.write('LJ_64: {LJ_64}, LJ_GC64: {LJ_GC64}\n'.format( + LJ_64 = LJ_64, + LJ_GC64 = LJ_GC64 + )) + +LJDumpArch() + +class LJDumpTValue(gdb.Command): + ''' +lj-tv address +Dumps the contents of the TValue at address. + ''' + + def __init__(self): + super(LJDumpTValue, self).__init__( + 'lj-tv', gdb.COMMAND_DATA + ) + + def invoke(self, arg, from_tty): + tv = cast('TValue *', parse_arg(arg)) + gdb.write('{}\n'.format(dump_tvalue(tv))) + +LJDumpTValue() + +class LJDumpString(gdb.Command): + ''' +lj-str address +Dumps the contents of the GCstr at address. + ''' + + def __init__(self): + super(LJDumpString, self).__init__( + 'lj-str', gdb.COMMAND_DATA + ) + + def invoke(self, arg, from_tty): + string = cast('GCstr *', parse_arg(arg)) + gdb.write("String: {body} [{len} bytes] with hash {hash}\n".format( + body = strdata(string), + hash = strx64(string['hash']), + len = string['len'], + )) + + +LJDumpString() + +class LJDumpTable(gdb.Command): + ''' +lj-tab address +Dumps the contents of the GCtab at address. + ''' + + def __init__(self): + super(LJDumpTable, self).__init__( + 'lj-tab', gdb.COMMAND_DATA) + + def invoke(self, arg, from_tty): + t = cast('GCtab *', parse_arg(arg)) + array = mref('TValue *', t['array']) + nodes = mref('struct Node *', t['node']) + mt = gcval(t['metatable']) + + if mt != 0: + gdb.write('Metatable detected: {}\n'.format(strx64(mt))) + + gdb.write('Array part:\n') + for i in range(int(t['asize'])): + gdb.write('[{index}] {value}\n'.format( + index = i, + value = dump_tvalue(array + i) + )) + + gdb.write('Hash part:\n') + # See hmask comment in lj_obj.h + for i in range(int(t['hmask'] + 1)): + node = nodes + i + gdb.write('{{ {key} }} => {{ {value} }} next = {next}\n'.format( + key = dump_tvalue(node['key']), + value = dump_tvalue(node['val']), + next = mref('struct Node *', node['next']) + )) + +LJDumpTable() + +class LJDumpStack(gdb.Command): + ''' +lj-stack L +Dumps Lua stack of the given coroutine L. + ''' + + def __init__(self): + super(LJDumpStack, self).__init__( + 'lj-stack', gdb.COMMAND_DATA) + + def invoke(self, arg, from_tty): + L = cast('lua_State *', parse_arg(arg)) + gdb.write('{}\n'.format(dump_stack(L))) + +LJDumpStack() + +class LJState(gdb.Command): + ''' +lj-state L +Show current VM and GC state. + ''' + + def __init__(self): + super(LJState, self).__init__( + 'lj-state', gdb.COMMAND_DATA) + + def invoke(self, arg, from_tty): + L = cast('lua_State *', parse_arg(arg)) + gdb.write('VM state: {}\n'.format(vm_state(L))) + gdb.write('GC state: {}\n'.format(gc_state(L))) + +LJState() -- 2.24.0