From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 885596F154; Tue, 6 Sep 2022 15:55:19 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 885596F154 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1662468919; bh=x/cFEURFYalieJ8rHiHW1Fs1qRqnHNzRnQ0Ga9Wb9vY=; h=To:Cc:Date:Subject:List-Id:List-Unsubscribe:List-Archive: List-Post:List-Help:List-Subscribe:From:Reply-To:From; b=Im/b+WwA5QuNsbg1FbDCYLwXsQvWo9ao/G9f54w9WWDS7FsbgYqWpqiL27KTEFsjG WFy3lJUQBMvTpW9X+TduM16W/XgB1nHz6znqWnELE6Rfq+EFR5bebIDY4dEP4Awnd3 dtn82XKjtNyyQVhR1NfjnDfE6gKBzQCo+qYrn8fQ= Received: from mail-lj1-f175.google.com (mail-lj1-f175.google.com [209.85.208.175]) (using TLSv1.3 with cipher TLS_AES_128_GCM_SHA256 (128/128 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id F1C3D6F153 for ; Tue, 6 Sep 2022 15:55:17 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org F1C3D6F153 Received: by mail-lj1-f175.google.com with SMTP id s15so12192069ljp.5 for ; Tue, 06 Sep 2022 05:55:17 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:message-id:date:subject:cc :to:from:x-gm-message-state:from:to:cc:subject:date; bh=oDNw17f+qBorYf3GCF68PKfo9TbzaS7mlrVKS/RRKmQ=; b=EoS7xX3XdYxO4z1dqYvM968oogytnSFNB2ebKcYytc7Iu+AtykOnRpWii5YyLAISDd dMHgST/2MliCVXpYMqC8YQj7Hg5ZIKbYmKEQOEWqCySK15ZQ3mTXY6V56DPjFd+0yoQt 9k+yNG/yt+HIN86Qqy7AcaTzdNbckR3OOgqbkFMWkrq36n7rBX1e4EwT4bD+e/ZhChth KGZOw4Xk/rLSQ6wtJAyoiQUK5zz6PE9XtApRt+x3ixtsppz9Pu9GwPf1qBhksplPBOpX 6fTqu5r49euacHfEnPsE/Gp5Eh4YfLEpRJ3EWuxKhvUTpNzxaXztkh1ORwu/x2TdIwOs K3ug== X-Gm-Message-State: ACgBeo2XyM4flimb7bfdRSpyRYePWVJnq0iqpDkp0pQJDLzZWMsLauPL Yub3LFCrKzsn9CYs4cx36AX2ACvxTwXnuw== X-Google-Smtp-Source: AA6agR7Z/hCS1lG8+PwbOh8uvHXk8I3N5tk66uFoyfUr0D9YDoZ00PCCeAfe6ziM6I4uuq+Ixd2hBQ== X-Received: by 2002:a2e:83c9:0:b0:24d:a95d:7b7a with SMTP id s9-20020a2e83c9000000b0024da95d7b7amr15883472ljh.254.1662468916608; Tue, 06 Sep 2022 05:55:16 -0700 (PDT) Received: from localhost.localdomain ([185.6.247.97]) by smtp.gmail.com with ESMTPSA id w12-20020a05651234cc00b00489d1896c06sm1718249lfr.125.2022.09.06.05.55.15 (version=TLS1_3 cipher=TLS_CHACHA20_POLY1305_SHA256 bits=256/256); Tue, 06 Sep 2022 05:55:16 -0700 (PDT) To: tarantool-discussions@dev.tarantool.org, imun@tarantool.org, skaplun@tarantool.org Cc: "m.kokryashkin" Date: Tue, 6 Sep 2022 15:55:12 +0300 Message-Id: <20220906125512.57456-1-max.kokryashkin@gmail.com> X-Mailer: git-send-email 2.32.1 (Apple Git-133) MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-discussions] [PATCH luajit] lldb: introduce luajit-lldb X-BeenThere: tarantool-discussions@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development process List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Maksim Kokryashkin via Tarantool-discussions Reply-To: Maksim Kokryashkin Errors-To: tarantool-discussions-bounces@dev.tarantool.org Sender: "Tarantool-discussions" From: "m.kokryashkin" It is impossible to run gdb on M1 devices, the only available debugger is lldb. The luajit-gdb extension doesn't work with lldb, so this patch introduces the luajit-lldb extension, which re-implements exactly the same functionality. Part of tarantool/tarantool#4808 --- Issue: https://github.com/tarantool/tarantool/issues/4808 Branch: https://github.com/tarantool/luajit/tree/gh-fckxorg/luajit-lldb src/luajit_lldb.py | 1034 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1034 insertions(+) create mode 100644 src/luajit_lldb.py diff --git a/src/luajit_lldb.py b/src/luajit_lldb.py new file mode 100644 index 00000000..69b50232 --- /dev/null +++ b/src/luajit_lldb.py @@ -0,0 +1,1034 @@ +import abc +import argparse +import re +import shlex + +import lldb + +LJ_64 = None +LJ_GC64 = None +LJ_FR2 = None +LJ_DUALNUM = None +PADDING = None + +# Constants +IRT_P64 = 9 +LJ_GCVMASK = ((1 << 47) - 1) +LJ_TISNUM = None + +# Global +target = None + +class Ptr: + def __init__(self, value, normal_type): + self.value = value + self.normal_type = normal_type + + @property + def deref(self): + return self.normal_type(self.value.Dereference()) + + def __add__(self, other): + assert isinstance(other, int) + return self.__class__(cast(self.normal_type.__name__ + ' *', cast('uintptr_t', self.value.unsigned + other * self.value.deref.size))) + + def __sub__(self, other): + assert isinstance(other, int) or isinstance(other, Ptr) + if isinstance(other, int): + return self.__add__(-other) + else: + return self.value.unsigned - other.value.unsigned + + + def __eq__(self, other): + assert isinstance(other, Ptr) or (isinstance(other, int) and other >= 0) + if isinstance(other, Ptr): + return self.value.unsigned == other.value.unsigned + else: + return self.value.unsigned == other + + def __ne__(self, other): + return not self == other + + def __gt__(self, other): + assert isinstance(other, Ptr) + return self.value.unsigned > other.value.unsigned + + def __bool__(self): + return self.value.unsigned != 0 + + def __int__(self): + return self.value.unsigned + + def __str__(self): + return self.value.value + +class MetaStruct(type): + def __init__(cls, name, bases, nmspc): + super(MetaStruct, cls).__init__(name, bases, nmspc) + + def make_general(field, tp): + extras = { + 'raw' : None, + 'uints': 'unsigned', + 'ints': 'signed', + 'strings': 'value', + } + if tp in extras.keys(): + tp = extras[tp] + if tp is None: + return lambda self: self[field] + else: + return lambda self: getattr(self[field], tp) + else: + return lambda self: globals()[tp](self[field]) + + if hasattr(cls, 'metainfo'): + for tp in cls.metainfo.keys(): + if tp == 'custom': + for field, value in cls.metainfo['custom'].items(): + setattr(cls, field, value) + else: + for field in cls.metainfo[tp]: + setattr(cls, field, property(make_general(field, tp))) + +class Struct(metaclass=MetaStruct): + def __init__(self, value): + self.value = value + + def __getitem__(self, name): + return self.value.GetChildMemberWithName(name) + + @property + def addr(self): + return self.value.address_of + +c_structs = { + 'MRef': { + 'custom': { + 'ptr': property(lambda self: self['ptr64'] if LJ_GC64 else self['ptr32']) + } + }, + 'GCRef': { + 'custom': { + 'gcptr': property(lambda self: self['gcptr64'] if LJ_GC64 else self['gcptr32']) + } + }, + 'TValue': { + 'GCRef': ['gcr'], + 'uints': ['it', 'i'], + 'ints': ['it64'], + 'strings': ['n'], + 'custom': { + 'ftsz': property(lambda self: self['ftsz'].signed if LJ_GC64 else None), + 'fr': property(lambda self: FR(self['fr']) if not LJ_GC64 else None) + } + }, + 'GCState': { + 'GCRef': ['root', 'gray', 'grayagain', 'weak', 'mmudata'], + 'uints': ['state', 'total', 'threshold', 'debt', 'estimate', + 'stepmul', 'pause', 'sweepstr'] + }, + 'lua_State': { + 'MRef': ['glref', 'stack', 'maxstack'], + 'TValuePtr': ['top', 'base'] + }, + 'global_State': { + 'GCState': ['gc'], + 'uints': ['vmstate', 'strmask'] + }, + 'jit_State': { + 'uints': ['state'] + }, + 'GChead': { + 'GCRef': ['nextgc'] + }, + 'GCobj': { + 'GChead': ['gch'] + }, + 'GCstr': { + 'uints': ['hash', 'len'] + }, + 'FrameLink': { + 'MRef': ['pcr'], + 'ints': ['ftsz'] + }, + 'FR': { + 'FrameLink': ['tp'] + }, + 'GCfuncC': { + 'MRef': ['pc'], + 'uints': ['ffid', 'nupvalues'], + 'raw': ['f'] + }, + 'GCtab': { + 'MRef': ['array', 'node'], + 'GCRef': ['metatable'], + 'uints': ['asize', 'hmask'] + }, + 'GCproto': { + 'GCRef': ['chunkname'], + 'raw': ['firstline'] + }, + 'GCtrace': { + 'uints': ['traceno'] + }, + 'Node': { + 'TValue': ['key', 'val'], + 'MRef': ['next'] + }, + 'BCIns': {} +} + +for cls in c_structs.keys(): + globals()[cls] = type(cls, (Struct, ), {'metainfo': c_structs[cls]} ) + +for cls in Struct.__subclasses__(): + ptr_name = cls.__name__ + 'Ptr' + def make_ptr_init(nm, cls): + return type( + nm, + (Ptr,), + { + '__init__': lambda self, value: super(type(self), self).__init__(value, cls) + } + ) + + globals()[ptr_name] = make_ptr_init(ptr_name, cls) + + +class Command(object): + def __init__(self, debugger, unused): + pass + + def get_short_help(self): + return self.__doc__.splitlines()[0] + + def get_long_help(self): + return self.__doc__ + + def __call__(self, debugger, command, exe_ctx, result): + try: + args = self.argument_parser.parse_args(shlex.split(command)) + self.execute(debugger, args, result) + except Exception as e: + msg = u'Failed to execute command `{}`: {}'.format(self.command, e) + result.SetError(msg) + + @property + def argument_parser(self): + return argparse.ArgumentParser( + prog=self.command, + description=self.get_long_help(), + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + @abc.abstractproperty + def command(self): + """Command name. + This name will be used by LLDB in order to unique/ly identify an + implementation that should be executed when a command is run + in the REPL. + """ + + @abc.abstractmethod + def execute(self, debugger, args, result): + """Implementation of the command. + Subclasses override this method to implement the logic of a given + command, e.g. printing a stacktrace. The command output should be + communicated back via the provided result object, so that it's + properly routed to LLDB frontend. Any unhandled exception will be + automatically transformed into proper errors. + Args: + debugger: lldb.SBDebugger: the primary interface to LLDB scripting + args: argparse.Namespace: an object holding parsed command arguments + result: lldb.SBCommandReturnObject: a container which holds the + result from command execution + """ + +def gcval(obj): + return cast(GCobjPtr, cast('uintptr_t', obj.gcptr.unsigned & LJ_GCVMASK) if LJ_GC64 + else cast('uintptr_t', obj.gcptr)) + +def gcref(obj): + return cast(GCobjPtr, obj.gcptr if LJ_GC64 + else cast('uintptr_t', obj.gcptr)) + +def gcnext(obj): + return gcref(obj).deref.gch.nextgc + +def gclistlen(root, end=0x0): + count = 0 + while(gcref(root) != end): + count += 1 + root = gcnext(root) + return count + +def gcringlen(root): + if not gcref(root): + return 0 + elif gcref(root) == gcref(gcnext(root)): + return 1 + else: + return 1 + gclistlen(gcnext(root), gcref(root).deref) + +gclen = { + 'root': gclistlen, + 'gray': gclistlen, + 'grayagain': gclistlen, + 'weak': gclistlen, + # XXX: gc.mmudata is a ring-list. + 'mmudata': gcringlen, +} + +def dump_gc(g): + gc = g.gc + stats = [ '{key}: {value}'.format(key = f, value = getattr(gc, f)) for f in ( + 'total', 'threshold', 'debt', 'estimate', 'stepmul', 'pause' + ) ] + + stats += [ 'sweepstr: {sweepstr}/{strmask}'.format( + sweepstr = gc.sweepstr, + # String hash mask (size of hash table - 1). + strmask = g.strmask + 1, + ) ] + + stats += [ '{key}: {number} objects'.format( + key = stat, + number = handler(getattr(gc, stat)) + ) for stat, handler in gclen.items() ] + return '\n'.join(map(lambda s: '\t' + s, stats)) + +def cast(typename, value): + pointer_type = False + name = None + if isinstance(value, Struct) or isinstance(value, Ptr): + value = value.value + if isinstance(typename, type): + name = typename.__name__ + if name.endswith('Ptr'): + pointer_type = True + name = name[:-3] + else: + name = typename + if name[-1] == '*': + name = name[:-1].strip() + pointer_type = True + t = target.FindFirstType(name) + if pointer_type: + t = t.GetPointerType() + + if isinstance(value, int): + if pointer_type: + return target.CreateValueFromAddress('value', lldb.SBAddress(value, target), t.GetPointeeType()).address_of + else: + return target.CreateValueFromData(name = 'value', data = lldb.SBData.CreateDataFromInt(value), type=t) + + if isinstance(typename, type): + return typename(value.Cast(t)) + else: + return value.Cast(t) + +def lookup_global(name): + global target + return target.FindFirstGlobalVariable(name) + +def mref(typename, obj): + return cast(typename, obj.ptr) + +def type_member(type_obj, name): + return next((x for x in type_obj.members if x.name == name), None) + +def J(g): + global target + typeGG = target.FindFirstType('GG_State') + g_member = type_member(typeGG, 'g') + j_member = type_member(typeGG, 'J') + jtype = target.FindFirstType('jit_State').GetPointerType() + + return jit_State(lldb.SBValue().CreateValueFromData(name = 'jit_state_ptr', data = lldb.SBData.CreateDataFromInt(cast('char *', g).unsigned + - g_member.GetOffsetInBytes() + + j_member.GetOffsetInBytes()), type=jtype)) + +def G(L): + return mref(global_StatePtr, L.glref).deref + +def L(L=None): + # lookup a symbol for the main coroutine considering the host app + # XXX Fragile: though the loop initialization looks like a crap but it + # respects both Python 2 and Python 3. + for l in [ L ] + list(map(lambda l: lookup_global(l), ( + # LuaJIT main coro (see luajit/src/luajit.c) + 'globalL', + # Tarantool main coro (see tarantool/src/lua/init.h) + 'tarantool_L', + # TODO: Add more + ))): + if l: + return lua_State(l) + +def tou32(val): + return val & 0xFFFFFFFF + +def i2notu32(val): + return ~int(val) & 0xFFFFFFFF + +def vm_state(g): + return { + i2notu32(0): 'INTERP', + i2notu32(1): 'LFUNC', + i2notu32(2): 'FFUNC', + i2notu32(3): 'CFUNC', + i2notu32(4): 'GC', + i2notu32(5): 'EXIT', + i2notu32(6): 'RECORD', + i2notu32(7): 'OPT', + i2notu32(8): 'ASM', + }.get(int(tou32(g.vmstate)), 'TRACE') + +def gc_state(g): + return { + 0: 'PAUSE', + 1: 'PROPAGATE', + 2: 'ATOMIC', + 3: 'SWEEPSTRING', + 4: 'SWEEP', + 5: 'FINALIZE', + 6: 'LAST', + }.get(g.gc.state, 'INVALID') + +def jit_state(g): + return { + 0: 'IDLE', + 0x10: 'ACTIVE', + 0x11: 'RECORD', + 0x12: 'START', + 0x13: 'END', + 0x14: 'ASM', + 0x15: 'ERR', + }.get(J(g).state, 'INVALID') + +def strx64(val): + return re.sub('L?$', '', + hex(int(val) & 0xFFFFFFFFFFFFFFFF)) + +def funcproto(func): + assert(func.ffid == 0) + + type_proto = target.FindFirstType('GCproto') + type_proto_size = type_proto.GetByteSize() + value = cast('uintptr_t', mref('char *', func.pc).unsigned - type_proto_size) + return cast(GCprotoPtr, value) + +def strdata(obj): + process = target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + try: + ptr = cast('char *', obj + 1) + return ptr.summary + except UnicodeEncodeError: + return "" + +def itype(o): + return tou32(o.it64 >> 47) if LJ_GC64 else o.it + +def tvisint(o): + return LJ_DUALNUM and itype(o) == LJ_TISNUM + +def tvislightud(o): + if LJ_64 and not LJ_GC64: + return (cast('int32_t', itype(o)) >> 15) == -2 + else: + return itype(o) == LJ_T['LIGHTUD'] + +def tvisnumber(o): + return itype(o) <= LJ_TISNUM + +def dump_lj_tnil(tv): + return 'nil' + +def dump_lj_tfalse(tv): + return 'false' + +def dump_lj_ttrue(tv): + return 'true' + +def dump_lj_tlightud(tv): + return 'light userdata @ {}'.format(strx64(gcval(tv.deref.gcr))) + +def dump_lj_tstr(tv): + return 'string {body} @ {address}'.format( + body = strdata(cast(GCstrPtr, gcval(tv.deref.gcr))), + address = strx64(gcval(tv.deref.gcr)) + ) + +def dump_lj_tupval(tv): + return 'upvalue @ {}'.format(strx64(gcval(tv.deref.gcr))) + +def dump_lj_tthread(tv): + return 'thread @ {}'.format(strx64(gcval(tv.deref.gcr))) + +def dump_lj_tproto(tv): + return 'proto @ {}'.format(strx64(gcval(tv.deref.gcr))) + +def dump_lj_tfunc(tv): + func = cast(GCfuncCPtr, gcval(tv.deref.gcr)) + ffid = func.deref.ffid + + if ffid == 0: + pt = funcproto(func.deref).deref + return 'Lua function @ {addr}, {nupvals} upvalues, {chunk}:{line}'.format( + addr = strx64(func), + nupvals = func.deref.nupvalues, + chunk = strdata(cast(GCstrPtr, gcval(pt.chunkname))), + line = pt.firstline + ) + elif ffid == 1: + return 'C function @ {}'.format(strx64(func.deref.f.unsigned)) + else: + return 'fast function #{}'.format(ffid) + +def dump_lj_ttrace(tv): + trace = cast(GCtracePtr, gcval(tv.deref.gcr)) + return 'trace {traceno} @ {addr}'.format( + traceno = strx64(trace.deref.traceno), + addr = strx64(trace) + ) + +def dump_lj_tcdata(tv): + return 'cdata @ {}'.format(strx64(gcval(tv.deref.gcr))) + +def dump_lj_ttab(tv): + table = cast(GCtabPtr, gcval(tv.deref.gcr)) + return 'table @ {gcr} (asize: {asize}, hmask: {hmask})'.format( + gcr = strx64(table), + asize = table.deref.asize, + hmask = strx64(table.deref.hmask), + ) + +def dump_lj_tudata(tv): + return 'userdata @ {}'.format(strx64(gcval(tv.deref.gcr))) + +def dump_lj_tnumx(tv): + if tvisint(tv.deref): + return 'integer {}'.format(cast('int32_t', tv.deref.i)) + else: + return 'number {}'.format(tv.deref.n) + +def dump_lj_invalid(tv): + return 'not valid type @ {}'.format(strx64(gcval(tv.deref.gcr))) + +dumpers = { + 'LJ_TNIL': dump_lj_tnil, + 'LJ_TFALSE': dump_lj_tfalse, + 'LJ_TTRUE': dump_lj_ttrue, + 'LJ_TLIGHTUD': dump_lj_tlightud, + 'LJ_TSTR': dump_lj_tstr, + 'LJ_TUPVAL': dump_lj_tupval, + 'LJ_TTHREAD': dump_lj_tthread, + 'LJ_TPROTO': dump_lj_tproto, + 'LJ_TFUNC': dump_lj_tfunc, + 'LJ_TTRACE': dump_lj_ttrace, + 'LJ_TCDATA': dump_lj_tcdata, + 'LJ_TTAB': dump_lj_ttab, + 'LJ_TUDATA': dump_lj_tudata, + 'LJ_TNUMX': dump_lj_tnumx, +} + +LJ_T = { + 'NIL' : i2notu32(0), + 'FALSE' : i2notu32(1), + 'TRUE' : i2notu32(2), + 'LIGHTUD' : i2notu32(3), + 'STR' : i2notu32(4), + 'UPVAL' : i2notu32(5), + 'THREAD' : i2notu32(6), + 'PROTO' : i2notu32(7), + 'FUNC' : i2notu32(8), + 'TRACE' : i2notu32(9), + 'CDATA' : i2notu32(10), + 'TAB' : i2notu32(11), + 'UDATA' : i2notu32(12), + 'NUMX' : i2notu32(13), +} + +def itypemap(o): + if LJ_64 and not LJ_GC64: + return LJ_T['NUMX'] if tvisnumber(o) \ + else LJ_T['LIGHTUD'] if tvislightud(o) else itype(o) + else: + return LJ_T['NUMX'] if tvisnumber(o) else itype(o) + +def typenames(value): + return { + LJ_T[k]: 'LJ_T' + k for k in LJ_T.keys() + }.get(int(value), 'LJ_TINVALID') + +def dump_tvalue(tvptr): + return dumpers.get(typenames(itypemap(tvptr.deref)), dump_lj_invalid)(tvptr) + +FRAME_TYPE = 0x3 +FRAME_P = 0x4 +FRAME_TYPEP = FRAME_TYPE | FRAME_P + +FRAME = { + 'LUA': 0x0, + 'C': 0x1, + 'CONT': 0x2, + 'VARG': 0x3, + 'LUAP': 0x4, + 'CP': 0x5, + 'PCALL': 0x6, + 'PCALLH': 0x7, +} + +def frametypes(ft): + return { + FRAME['LUA'] : 'L', + FRAME['C'] : 'C', + FRAME['CONT'] : 'M', + FRAME['VARG'] : 'V', + }.get(ft, '?') + +def bc_a(ins): + return (ins >> 8) & 0xff + +def frame_ftsz(framelink): + return cast('ptrdiff_t', framelink.ftsz if LJ_FR2 \ + else framelink.fr.tp.ftsz) + +def frame_pc(framelink): + return cast(BCInsPtr, frame_ftsz(framelink.deref)) if LJ_FR2 \ + else mref(BCInsPtr, framelink.fr.tp.pcr) + +def frame_prevl(framelink): + process = target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + return framelink - (1 + LJ_FR2 + bc_a(frame.EvaluateExpression('((BCIns *)' + str(frame_pc(framelink)) + ')[-1]').unsigned)) + +def frame_ispcall(framelink): + return (frame_ftsz(framelink).unsigned & FRAME['PCALL']) == FRAME['PCALL'] + +def frame_sized(framelink): + return (frame_ftsz(framelink).unsigned & ~FRAME_TYPEP) + +def frame_prevd(framelink): + return framelink - frame_sized(framelink.deref) + +def frame_type(framelink): + return frame_ftsz(framelink).unsigned & FRAME_TYPE + +def frame_typep(framelink): + return frame_ftsz(framelink).unsigned & FRAME_TYPEP + +def frame_islua(framelink): + return frametypes(frame_type(framelink)) == 'L' \ + and frame_ftsz(framelink).unsigned > 0 + +def frame_prev(framelink): + return frame_prevl(framelink) if frame_islua(framelink.deref) \ + else frame_prevd(framelink) + +def dump_framelink(L, fr): + fr2 = fr + LJ_FR2 + + return '{fr}{padding} [ ] FRAME: [{pp}] delta={d}, {f}\n'.format( + fr = strx64(fr), + padding = ':{fr2: <{width}}'.format(fr2 = strx64(fr2), width=len(PADDING) - 1) if LJ_FR2 else PADDING, + pp = 'PP' if frame_ispcall(fr2.deref) else '{frname}{p}'.format( + frname = frametypes(frame_type(fr2.deref)), + p = 'P' if frame_typep(fr2.deref) & FRAME_P else '' + ), + d = fr2 - frame_prev(fr2), + f = dump_lj_tfunc(fr), + ) + +def dump_stack_slot(L, slot, base=None, top=None, eol='\n'): + base = base or L.base + top = top or L.top + + return '{addr}{padding} [ {B}{T}{M}] VALUE: {value}{eol}'.format( + addr = strx64(slot), + padding = PADDING, + B = 'B' if slot == base else ' ', + T = 'T' if slot == top else ' ', + M = 'M' if slot == mref(TValuePtr, L.maxstack) else ' ', + value = dump_tvalue(slot), + eol = eol, + ) + +def dump_stack(L, base=None, top=None): + base = base or L.base + top = top or L.top + stack = mref(TValuePtr, L.stack) + maxstack = mref(TValuePtr, L.maxstack) + red = 5 + 2 * LJ_FR2 + + dump = '\n'.join([ + '{padding} Red zone: {nredslots: >2} slots {padding}'.format( + padding = '-' * len(PADDING), + nredslots = red, + ), + *( + dump_stack_slot(L, maxstack + offset, base, top, '') + for offset in range(red, 0, -1) + ), + '{padding} Stack: {nstackslots: >5} slots {padding}'.format( + padding = '-' * len(PADDING), + nstackslots = int((maxstack - stack) >> 3), + ), + dump_stack_slot(L, maxstack, base, top, ''), + '{start}:{end: <{width}} [ ] {nfreeslots} slots: Free stack slots'.format( + start = strx64(top + 1), + end = strx64(maxstack - 1), + width = len(PADDING) - 1, + nfreeslots = int((maxstack - top - 8) >> 3), + ), + ]) + '\n' + + slot = top + framelink = base - (1 + LJ_FR2) + + # XXX: Lua stack unwinding algorithm consists of the following steps: + # 1. dump all data slots in the (framelink, top) interval + # 2. check whether there are remaining frames + # 3. if there are no slots further, stop the unwinding loop + # 4. otherwise, resolve the next framelink and top and go to (1) + # + # Postcondition (i.e. do-while) loops is the most fitting idiom for such + # case, but Python doesn't provide such lexical construction. Hence step (1) + # is unrolled for the topmost stack frame. + while slot > framelink + LJ_FR2: + dump += dump_stack_slot(L, slot, base, top) + slot -= 1 + + while framelink > stack: + assert slot == framelink + LJ_FR2, "Invalid slot during frame unwind" + dump += dump_framelink(L, framelink) + framelink = frame_prev(framelink + LJ_FR2) - LJ_FR2 + slot -= 1 + LJ_FR2 + while slot > framelink + LJ_FR2: + dump += dump_stack_slot(L, slot, base, top) + slot -= 1 + + assert slot == framelink + LJ_FR2, "Invalid slot after frame unwind" + # Skip a nil slot for the last frame for 2-slot frames. + slot -= LJ_FR2 + + dump += '{fr}{padding} [S ] FRAME: dummy L'.format( + fr = strx64(slot), + padding = ':{nilslot: <{offset}}'.format(nilslot = strx64(slot + 1), offset=len(PADDING) - 1) if LJ_FR2 else PADDING + ) + + return dump + + + +class LJDumpTValue(Command): + ''' +lj-tv + +The command receives a pointer to (TValue address) and dumps +the type and some info related to it. + +* LJ_TNIL: nil +* LJ_TFALSE: false +* LJ_TTRUE: true +* LJ_TLIGHTUD: light userdata @ +* LJ_TSTR: string @ +* LJ_TUPVAL: upvalue @ +* LJ_TTHREAD: thread @ +* LJ_TPROTO: proto @ +* LJ_TFUNC: + : Lua function @ , upvalues, + : C function + : fast function # +* LJ_TTRACE: trace @ +* LJ_TCDATA: cdata @ +* LJ_TTAB: table @ (asize: , hmask: ) +* LJ_TUDATA: userdata @ +* LJ_TNUMX: number + +Whether the type of the given address differs from the listed above, then +error message occurs. + ''' + command = 'lj-tv' + + @property + def argument_parser(self): + parser = super(LJDumpTValue, self).argument_parser + + parser.add_argument('tv', nargs=1, type=str, default=None) + + return parser + + + def execute(self, debugger, args, result): + global target + expr = args.tv[0] + target = debugger.GetSelectedTarget() + process = target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + + tvptr = TValuePtr(frame.EvaluateExpression(expr)) + print('{}\n'.format(dump_tvalue(tvptr))) + + + +class LJState(Command): + ''' +lj-state +The command requires no args and dumps current VM and GC states +* VM state: +* GC state: +* JIT state: + ''' + + command = 'lj-state' + + def execute(self, debugger, args, result): + global target + target = debugger.GetSelectedTarget() + g = G(L(None)) + print('{}\n'.format('\n'.join( + map(lambda t: '{} state: {}'.format(*t), { + 'VM': vm_state(g), + 'GC': gc_state(g), + 'JIT': jit_state(g), + }.items()) + ))) + +class LJDumpArch(Command): + ''' +lj-arch + +The command requires no args and dumps values of LJ_64 and LJ_GC64 +compile-time flags. These values define the sizes of host and GC +pointers respectively. + ''' + command = 'lj-arch' + + def execute(self, debugger, args, result): + print( + 'LJ_64: {LJ_64}, LJ_GC64: {LJ_GC64}, LJ_DUALNUM: {LJ_DUALNUM}\n' + .format( + LJ_64 = LJ_64, + LJ_GC64 = LJ_GC64, + LJ_DUALNUM = LJ_DUALNUM + ) + ) + +class LJGC(Command): + ''' +lj-gc + +The command requires no args and dumps current GC stats: +* total: +* threshold: +* debt: +* estimate: +* stepmul: +* pause: +* sweepstr: +* root: +* gray: +* grayagain: +* weak: +* mmudata: + ''' + command = 'lj-gc' + + def execute(self, debugger, args, result): + global target + target = debugger.GetSelectedTarget() + g = G(L(None)) + print('GC stats: {state}\n{stats}\n'.format( + state = gc_state(g), + stats = dump_gc(g) + )) + +class LJDumpString(Command): + ''' +lj-str + +The command receives a of the corresponding GCstr object and dumps +the payload, size in bytes and hash. + +*Caveat*: Since Python 2 provides no native Unicode support, the payload +is replaced with the corresponding error when decoding fails. + ''' + command = 'lj-str' + + @property + def argument_parser(self): + parser = super(LJDumpString, self).argument_parser + parser.add_argument('gcr', nargs=1, type=str, default=None) + return parser + + def execute(self, debugger, args, result): + global target + expr = args.gcr[0] + target = debugger.GetSelectedTarget() + process = target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + + string_ptr = GCstrPtr(frame.EvaluateExpression(expr)) + print("String: {body} [{len} bytes] with hash {hash}\n".format( + body = strdata(string_ptr), + hash = strx64(string_ptr.deref.hash), + len = string_ptr.deref.len, + )) + +class LJDumpTable(Command): + ''' +lj-tab + +The command receives a GCtab adress and dumps the table contents: +* Metatable address whether the one is set +* Array part slots: + : []: +* Hash part nodes: + : { } => { }; next = + ''' + command = 'lj-tab' + + @property + def argument_parser(self): + parser = super(LJDumpTable, self).argument_parser + parser.add_argument('gctab', nargs=1, type=str, default=None) + return parser + + def execute(self, debugger, args, result): + global target + expr = args.gctab[0] + target = debugger.GetSelectedTarget() + process = target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + + t = GCtabPtr(frame.EvaluateExpression(expr)) + array = mref(TValuePtr, t.deref.array) + nodes = mref(NodePtr, t.deref.node) + mt = gcval(t.deref.metatable) + capacity = { + 'apart': int(t.deref.asize), + 'hpart': int(t.deref.hmask + 1) if t.deref.hmask > 0 else 0 + } + + if mt: + print('Metatable detected: {}\n'.format(strx64(mt))) + + print('Array part: {} slots\n'.format(capacity['apart'])) + for i in range(capacity['apart']): + slot = array + i + print('{ptr}: [{index}]: {value}\n'.format( + ptr = strx64(slot), + index = i, + value = dump_tvalue(slot) + )) + + print('Hash part: {} nodes\n'.format(capacity['hpart'])) + # See hmask comment in lj_obj.h + for i in range(capacity['hpart']): + node = nodes + i + print('{ptr}: {{ {key} }} => {{ {val} }}; next = {n}\n'.format( + ptr = strx64(node), + key = dump_tvalue(TValuePtr(node.deref.key.addr)), + val= dump_tvalue(TValuePtr(node.deref.val.addr)), + n = strx64(mref(NodePtr, node.deref.next)) + )) + +class LJDumpStack(Command): + ''' +lj-stack [] + +The command receives a lua_State address and dumps the given Lua +coroutine guest stack: + + [] + +* : guest stack slot address +* : + - S: Bottom of the stack (the slot L->stack points to) + - B: Base of the current guest frame (the slot L->base points to) + - T: Top of the current guest frame (the slot L->top points to) + - M: Last slot of the stack (the slot L->maxstack points to) +* : see help lj-tv for more info +* : framelink slot differs from the value slot: it contains info + related to the function being executed within this guest frame, its + type and link to the parent guest frame + [] delta=, + - : + + L: VM performs a call as a result of bytecode execution + + C: VM performs a call as a result of lj_vm_call + + M: VM performs a call to a metamethod as a result of bytecode + execution + + V: Variable-length frame for storing arguments of a variadic + function + + CP: Protected C frame + + PP: VM performs a call as a result of executinig pcall or xpcall + +If L is ommited the main coroutine is used. + ''' + command = 'lj-stack' + + @property + def argument_parser(self): + parser = super(LJDumpStack, self).argument_parser + parser.add_argument('lua_State', nargs='?', type=str, default=[None]) + return parser + + def execute(self, debugger, args, result): + global target + expr = args.lua_State[0] + target = debugger.GetSelectedTarget() + process = target.GetProcess() + thread = process.GetSelectedThread() + frame = thread.GetSelectedFrame() + + l = frame.EvaluateExpression(expr) if expr else None + + print('{}\n'.format(dump_stack(L(l)))) + + + + + +def register_commands(debugger): + for cls in Command.__subclasses__(): + debugger.HandleCommand( + 'command script add -c luajit_lldb.{cls} {command}'.format( + cls=cls.__name__, + command=cls.command, + ) + ) + +def configure(debugger): + global LJ_64, LJ_GC64, LJ_FR2, LJ_DUALNUM, PADDING, LJ_TISNUM + target = debugger.GetSelectedTarget() + module = target.modules[0] + LJ_DUALNUM = module.FindSymbol('lj_lib_checknumber') != None + + try: + irtype_enum = target.FindFirstType('IRType').enum_members + for member in irtype_enum: + if member.name == 'IRT_PTR': + LJ_64 = member.unsigned & 0x1f == IRT_P64 + if member.name == 'IRT_PGC': + LJ_FR2 = LJ_GC64 = member.unsigned & 0x1f == IRT_P64 + except: + print('luajit-lldb.py failed to load: ' + 'no debugging symbols found for libluajit\n') + + PADDING = ' ' * len(':' + hex((1 << (47 if LJ_GC64 else 32)) - 1)) + LJ_TISNUM = 0xfffeffff if LJ_64 and not LJ_GC64 else LJ_T['NUMX'] + + + +def __lldb_init_module(debugger, internal_dict): + configure(debugger) + register_commands(debugger) -- 2.32.1 (Apple Git-133)