[Tarantool-discussions] [PATCH luajit] lldb: introduce luajit-lldb

Maksim Kokryashkin max.kokryashkin at gmail.com
Tue Sep 6 15:55:12 MSK 2022


From: "m.kokryashkin" <m.kokryashkin at m-kokryashkin.local>

It is impossible to run gdb on M1 devices, the only available
debugger is lldb. The luajit-gdb extension doesn't work with
lldb, so this patch introduces the luajit-lldb extension,
which re-implements exactly the same functionality.

Part of tarantool/tarantool#4808
---
Issue: https://github.com/tarantool/tarantool/issues/4808
Branch: https://github.com/tarantool/luajit/tree/gh-fckxorg/luajit-lldb

 src/luajit_lldb.py | 1034 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1034 insertions(+)
 create mode 100644 src/luajit_lldb.py

diff --git a/src/luajit_lldb.py b/src/luajit_lldb.py
new file mode 100644
index 00000000..69b50232
--- /dev/null
+++ b/src/luajit_lldb.py
@@ -0,0 +1,1034 @@
+import abc
+import argparse
+import re
+import shlex
+
+import lldb
+
+LJ_64 = None
+LJ_GC64 = None
+LJ_FR2 = None
+LJ_DUALNUM = None
+PADDING = None
+
+# Constants
+IRT_P64 = 9
+LJ_GCVMASK = ((1 << 47) - 1)
+LJ_TISNUM = None
+
+# Global
+target = None
+
+class Ptr:
+    def __init__(self, value, normal_type):
+        self.value = value
+        self.normal_type = normal_type
+
+    @property
+    def deref(self):
+        return self.normal_type(self.value.Dereference())
+
+    def __add__(self, other):
+        assert isinstance(other, int)
+        return self.__class__(cast(self.normal_type.__name__ + ' *', cast('uintptr_t', self.value.unsigned + other * self.value.deref.size)))
+
+    def __sub__(self, other):
+        assert isinstance(other, int) or isinstance(other, Ptr)
+        if isinstance(other, int):
+            return self.__add__(-other)
+        else:
+            return self.value.unsigned - other.value.unsigned
+
+
+    def __eq__(self, other):
+        assert isinstance(other, Ptr) or (isinstance(other, int) and other >= 0)
+        if isinstance(other, Ptr):
+            return self.value.unsigned == other.value.unsigned
+        else:
+            return self.value.unsigned == other
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __gt__(self, other):
+        assert isinstance(other, Ptr)
+        return self.value.unsigned > other.value.unsigned
+
+    def __bool__(self):
+        return self.value.unsigned != 0
+
+    def __int__(self):
+        return self.value.unsigned
+
+    def __str__(self):
+        return self.value.value
+
+class MetaStruct(type):
+    def __init__(cls, name, bases, nmspc):
+        super(MetaStruct, cls).__init__(name, bases, nmspc)
+
+        def make_general(field, tp):
+            extras = {
+                        'raw' : None,
+                        'uints': 'unsigned',
+                        'ints': 'signed',
+                        'strings': 'value',
+                    }
+            if tp in extras.keys():
+                tp = extras[tp]
+                if tp is None:
+                    return lambda self: self[field]
+                else:
+                    return lambda self: getattr(self[field], tp)
+            else:
+                return lambda self: globals()[tp](self[field])
+
+        if hasattr(cls, 'metainfo'):
+            for tp in cls.metainfo.keys():
+                if tp == 'custom':
+                    for field, value in cls.metainfo['custom'].items():
+                        setattr(cls, field, value)
+                else:
+                    for field in cls.metainfo[tp]:
+                        setattr(cls, field, property(make_general(field, tp)))
+
+class Struct(metaclass=MetaStruct):
+    def __init__(self, value):
+        self.value = value
+
+    def __getitem__(self, name):
+        return self.value.GetChildMemberWithName(name)
+
+    @property
+    def addr(self):
+        return self.value.address_of
+
+c_structs = {
+    'MRef': {
+        'custom': {
+            'ptr': property(lambda self: self['ptr64'] if LJ_GC64 else self['ptr32'])
+        }
+    },
+    'GCRef': {
+        'custom': {
+            'gcptr': property(lambda self: self['gcptr64'] if LJ_GC64 else self['gcptr32'])
+        }
+    },
+    'TValue': {
+        'GCRef': ['gcr'],
+        'uints': ['it', 'i'],
+        'ints': ['it64'],
+        'strings': ['n'],
+        'custom': {
+            'ftsz': property(lambda self: self['ftsz'].signed if LJ_GC64 else None),
+            'fr': property(lambda self: FR(self['fr']) if not LJ_GC64 else None)
+        }
+    },
+    'GCState': {
+        'GCRef': ['root', 'gray', 'grayagain', 'weak', 'mmudata'],
+        'uints': ['state', 'total', 'threshold', 'debt', 'estimate',
+            'stepmul', 'pause', 'sweepstr']
+    },
+    'lua_State': {
+        'MRef': ['glref', 'stack', 'maxstack'],
+        'TValuePtr': ['top', 'base']
+    },
+    'global_State': {
+        'GCState': ['gc'],
+        'uints': ['vmstate', 'strmask']
+    },
+    'jit_State': {
+        'uints': ['state']
+    },
+    'GChead': {
+        'GCRef': ['nextgc']
+    },
+    'GCobj': {
+        'GChead': ['gch']
+    },
+    'GCstr': {
+        'uints': ['hash', 'len']
+    },
+    'FrameLink': {
+        'MRef': ['pcr'],
+        'ints': ['ftsz']
+    },
+    'FR': {
+        'FrameLink': ['tp']
+    },
+    'GCfuncC': {
+        'MRef': ['pc'],
+        'uints': ['ffid', 'nupvalues'],
+        'raw': ['f']
+    },
+    'GCtab': {
+        'MRef': ['array', 'node'],
+        'GCRef': ['metatable'],
+        'uints': ['asize', 'hmask']
+    },
+    'GCproto': {
+        'GCRef': ['chunkname'],
+        'raw': ['firstline']
+    },
+    'GCtrace': {
+        'uints': ['traceno']
+    },
+    'Node': {
+        'TValue': ['key', 'val'],
+        'MRef': ['next']
+    },
+    'BCIns': {}
+}
+
+for cls in c_structs.keys():
+    globals()[cls] = type(cls, (Struct, ), {'metainfo': c_structs[cls]} )
+
+for cls in Struct.__subclasses__():
+    ptr_name = cls.__name__ + 'Ptr'
+    def make_ptr_init(nm, cls):
+        return type(
+                nm,
+                (Ptr,),
+                {
+                    '__init__': lambda self, value: super(type(self), self).__init__(value, cls)
+                }
+            )
+
+    globals()[ptr_name] = make_ptr_init(ptr_name, cls)
+
+
+class Command(object):
+    def __init__(self, debugger, unused):
+        pass
+
+    def get_short_help(self):
+        return self.__doc__.splitlines()[0]
+
+    def get_long_help(self):
+        return self.__doc__
+
+    def __call__(self, debugger, command, exe_ctx, result):
+        try:
+            args = self.argument_parser.parse_args(shlex.split(command))
+            self.execute(debugger, args, result)
+        except Exception as e:
+            msg = u'Failed to execute command `{}`: {}'.format(self.command, e)
+            result.SetError(msg)
+
+    @property
+    def argument_parser(self):
+        return argparse.ArgumentParser(
+            prog=self.command,
+            description=self.get_long_help(),
+            formatter_class=argparse.RawDescriptionHelpFormatter
+        )
+
+    @abc.abstractproperty
+    def command(self):
+        """Command name.
+        This name will be used by LLDB in order to unique/ly identify an
+        implementation that should be executed when a command is run
+        in the REPL.
+        """
+
+    @abc.abstractmethod
+    def execute(self, debugger, args, result):
+        """Implementation of the command.
+        Subclasses override this method to implement the logic of a given
+        command, e.g. printing a stacktrace. The command output should be
+        communicated back via the provided result object, so that it's
+        properly routed to LLDB frontend. Any unhandled exception will be
+        automatically transformed into proper errors.
+        Args:
+            debugger: lldb.SBDebugger: the primary interface to LLDB scripting
+            args: argparse.Namespace: an object holding parsed command arguments
+            result: lldb.SBCommandReturnObject: a container which holds the
+                    result from command execution
+        """
+
+def gcval(obj):
+    return cast(GCobjPtr, cast('uintptr_t', obj.gcptr.unsigned & LJ_GCVMASK) if LJ_GC64
+        else cast('uintptr_t', obj.gcptr))
+
+def gcref(obj):
+    return cast(GCobjPtr, obj.gcptr if LJ_GC64
+        else cast('uintptr_t', obj.gcptr))
+
+def gcnext(obj):
+    return gcref(obj).deref.gch.nextgc
+
+def gclistlen(root, end=0x0):
+    count = 0
+    while(gcref(root) != end):
+        count += 1
+        root = gcnext(root)
+    return count
+
+def gcringlen(root):
+    if not gcref(root):
+        return 0
+    elif gcref(root) == gcref(gcnext(root)):
+        return 1
+    else:
+        return 1 + gclistlen(gcnext(root), gcref(root).deref)
+
+gclen = {
+    'root': gclistlen,
+    'gray': gclistlen,
+    'grayagain': gclistlen,
+    'weak': gclistlen,
+    # XXX: gc.mmudata is a ring-list.
+    'mmudata': gcringlen,
+}
+
+def dump_gc(g):
+    gc = g.gc
+    stats = [ '{key}: {value}'.format(key = f, value = getattr(gc, f)) for f in (
+        'total', 'threshold', 'debt', 'estimate', 'stepmul', 'pause'
+    ) ]
+
+    stats += [ 'sweepstr: {sweepstr}/{strmask}'.format(
+        sweepstr = gc.sweepstr,
+        # String hash mask (size of hash table - 1).
+        strmask = g.strmask + 1,
+    ) ]
+
+    stats += [ '{key}: {number} objects'.format(
+        key = stat,
+        number = handler(getattr(gc, stat))
+    ) for stat, handler in gclen.items() ]
+    return '\n'.join(map(lambda s: '\t' + s, stats))
+
+def cast(typename, value):
+    pointer_type = False
+    name = None
+    if isinstance(value, Struct) or isinstance(value, Ptr):
+        value = value.value
+    if isinstance(typename, type):
+        name = typename.__name__
+        if name.endswith('Ptr'):
+            pointer_type = True
+            name = name[:-3]
+    else:
+        name = typename
+        if name[-1] == '*':
+            name = name[:-1].strip()
+            pointer_type = True
+    t = target.FindFirstType(name)
+    if pointer_type:
+        t = t.GetPointerType()
+
+    if isinstance(value, int):
+        if pointer_type:
+            return target.CreateValueFromAddress('value', lldb.SBAddress(value, target), t.GetPointeeType()).address_of
+        else:
+            return target.CreateValueFromData(name = 'value', data = lldb.SBData.CreateDataFromInt(value), type=t)
+
+    if isinstance(typename, type):
+        return typename(value.Cast(t))
+    else:
+        return value.Cast(t)
+
+def lookup_global(name):
+    global target
+    return target.FindFirstGlobalVariable(name)
+
+def mref(typename, obj):
+    return cast(typename, obj.ptr)
+
+def type_member(type_obj, name):
+    return next((x for x in type_obj.members if x.name == name), None)
+
+def J(g):
+    global target
+    typeGG = target.FindFirstType('GG_State')
+    g_member = type_member(typeGG, 'g')
+    j_member = type_member(typeGG, 'J')
+    jtype = target.FindFirstType('jit_State').GetPointerType()
+
+    return jit_State(lldb.SBValue().CreateValueFromData(name = 'jit_state_ptr', data = lldb.SBData.CreateDataFromInt(cast('char *', g).unsigned
+        - g_member.GetOffsetInBytes()
+        + j_member.GetOffsetInBytes()), type=jtype))
+
+def G(L):
+    return mref(global_StatePtr, L.glref).deref
+
+def L(L=None):
+    # lookup a symbol for the main coroutine considering the host app
+    # XXX Fragile: though the loop initialization looks like a crap but it
+    # respects both Python 2 and Python 3.
+    for l in [ L ] + list(map(lambda l: lookup_global(l), (
+        # LuaJIT main coro (see luajit/src/luajit.c)
+        'globalL',
+        # Tarantool main coro (see tarantool/src/lua/init.h)
+        'tarantool_L',
+        # TODO: Add more
+    ))):
+        if l:
+            return lua_State(l)
+
+def tou32(val):
+    return val & 0xFFFFFFFF
+
+def i2notu32(val):
+    return ~int(val) & 0xFFFFFFFF
+
+def vm_state(g):
+    return {
+        i2notu32(0): 'INTERP',
+        i2notu32(1): 'LFUNC',
+        i2notu32(2): 'FFUNC',
+        i2notu32(3): 'CFUNC',
+        i2notu32(4): 'GC',
+        i2notu32(5): 'EXIT',
+        i2notu32(6): 'RECORD',
+        i2notu32(7): 'OPT',
+        i2notu32(8): 'ASM',
+    }.get(int(tou32(g.vmstate)), 'TRACE')
+
+def gc_state(g):
+    return {
+        0: 'PAUSE',
+        1: 'PROPAGATE',
+        2: 'ATOMIC',
+        3: 'SWEEPSTRING',
+        4: 'SWEEP',
+        5: 'FINALIZE',
+        6: 'LAST',
+    }.get(g.gc.state, 'INVALID')
+
+def jit_state(g):
+    return {
+        0:    'IDLE',
+        0x10: 'ACTIVE',
+        0x11: 'RECORD',
+        0x12: 'START',
+        0x13: 'END',
+        0x14: 'ASM',
+        0x15: 'ERR',
+    }.get(J(g).state, 'INVALID')
+
+def strx64(val):
+    return re.sub('L?$', '',
+                  hex(int(val) & 0xFFFFFFFFFFFFFFFF))
+
+def funcproto(func):
+    assert(func.ffid == 0)
+
+    type_proto = target.FindFirstType('GCproto')
+    type_proto_size = type_proto.GetByteSize()
+    value = cast('uintptr_t', mref('char *', func.pc).unsigned - type_proto_size)
+    return cast(GCprotoPtr, value)
+
+def strdata(obj):
+    process = target.GetProcess()
+    thread = process.GetSelectedThread()
+    frame = thread.GetSelectedFrame()
+    try:
+        ptr = cast('char *', obj + 1)
+        return ptr.summary
+    except UnicodeEncodeError:
+        return "<luajit-lldb: error occured while rendering non-ascii slot>"
+
+def itype(o):
+    return tou32(o.it64 >> 47) if LJ_GC64 else o.it
+
+def tvisint(o):
+    return LJ_DUALNUM and itype(o) == LJ_TISNUM
+
+def tvislightud(o):
+    if LJ_64 and not LJ_GC64:
+        return (cast('int32_t', itype(o)) >> 15) == -2
+    else:
+        return itype(o) == LJ_T['LIGHTUD']
+
+def tvisnumber(o):
+    return itype(o) <= LJ_TISNUM
+
+def dump_lj_tnil(tv):
+    return 'nil'
+
+def dump_lj_tfalse(tv):
+    return 'false'
+
+def dump_lj_ttrue(tv):
+    return 'true'
+
+def dump_lj_tlightud(tv):
+    return 'light userdata @ {}'.format(strx64(gcval(tv.deref.gcr)))
+
+def dump_lj_tstr(tv):
+    return 'string {body} @ {address}'.format(
+        body = strdata(cast(GCstrPtr, gcval(tv.deref.gcr))),
+        address = strx64(gcval(tv.deref.gcr))
+    )
+
+def dump_lj_tupval(tv):
+    return 'upvalue @ {}'.format(strx64(gcval(tv.deref.gcr)))
+
+def dump_lj_tthread(tv):
+    return 'thread @ {}'.format(strx64(gcval(tv.deref.gcr)))
+
+def dump_lj_tproto(tv):
+    return 'proto @ {}'.format(strx64(gcval(tv.deref.gcr)))
+
+def dump_lj_tfunc(tv):
+    func = cast(GCfuncCPtr, gcval(tv.deref.gcr))
+    ffid = func.deref.ffid
+
+    if ffid == 0:
+        pt = funcproto(func.deref).deref
+        return 'Lua function @ {addr}, {nupvals} upvalues, {chunk}:{line}'.format(
+            addr = strx64(func),
+            nupvals = func.deref.nupvalues,
+            chunk = strdata(cast(GCstrPtr, gcval(pt.chunkname))),
+            line = pt.firstline
+        )
+    elif ffid == 1:
+        return 'C function @ {}'.format(strx64(func.deref.f.unsigned))
+    else:
+        return 'fast function #{}'.format(ffid)
+
+def dump_lj_ttrace(tv):
+    trace = cast(GCtracePtr, gcval(tv.deref.gcr))
+    return 'trace {traceno} @ {addr}'.format(
+        traceno = strx64(trace.deref.traceno),
+        addr = strx64(trace)
+    )
+
+def dump_lj_tcdata(tv):
+    return 'cdata @ {}'.format(strx64(gcval(tv.deref.gcr)))
+
+def dump_lj_ttab(tv):
+    table = cast(GCtabPtr, gcval(tv.deref.gcr))
+    return 'table @ {gcr} (asize: {asize}, hmask: {hmask})'.format(
+        gcr = strx64(table),
+        asize = table.deref.asize,
+        hmask = strx64(table.deref.hmask),
+    )
+
+def dump_lj_tudata(tv):
+    return 'userdata @ {}'.format(strx64(gcval(tv.deref.gcr)))
+
+def dump_lj_tnumx(tv):
+    if tvisint(tv.deref):
+        return 'integer {}'.format(cast('int32_t', tv.deref.i))
+    else:
+        return 'number {}'.format(tv.deref.n)
+
+def dump_lj_invalid(tv):
+    return 'not valid type @ {}'.format(strx64(gcval(tv.deref.gcr)))
+
+dumpers = {
+    'LJ_TNIL': dump_lj_tnil,
+    'LJ_TFALSE': dump_lj_tfalse,
+    'LJ_TTRUE': dump_lj_ttrue,
+    'LJ_TLIGHTUD': dump_lj_tlightud,
+    'LJ_TSTR': dump_lj_tstr,
+    'LJ_TUPVAL': dump_lj_tupval,
+    'LJ_TTHREAD': dump_lj_tthread,
+    'LJ_TPROTO': dump_lj_tproto,
+    'LJ_TFUNC': dump_lj_tfunc,
+    'LJ_TTRACE': dump_lj_ttrace,
+    'LJ_TCDATA': dump_lj_tcdata,
+    'LJ_TTAB': dump_lj_ttab,
+    'LJ_TUDATA': dump_lj_tudata,
+    'LJ_TNUMX': dump_lj_tnumx,
+}
+
+LJ_T = {
+    'NIL'     : i2notu32(0),
+    'FALSE'   : i2notu32(1),
+    'TRUE'    : i2notu32(2),
+    'LIGHTUD' : i2notu32(3),
+    'STR'     : i2notu32(4),
+    'UPVAL'   : i2notu32(5),
+    'THREAD'  : i2notu32(6),
+    'PROTO'   : i2notu32(7),
+    'FUNC'    : i2notu32(8),
+    'TRACE'   : i2notu32(9),
+    'CDATA'   : i2notu32(10),
+    'TAB'     : i2notu32(11),
+    'UDATA'   : i2notu32(12),
+    'NUMX'    : i2notu32(13),
+}
+
+def itypemap(o):
+    if LJ_64 and not LJ_GC64:
+        return LJ_T['NUMX'] if tvisnumber(o)       \
+        else LJ_T['LIGHTUD'] if tvislightud(o) else itype(o)
+    else:
+        return LJ_T['NUMX'] if tvisnumber(o) else itype(o)
+
+def typenames(value):
+    return {
+        LJ_T[k]: 'LJ_T' + k for k in LJ_T.keys()
+    }.get(int(value), 'LJ_TINVALID')
+
+def dump_tvalue(tvptr):
+    return dumpers.get(typenames(itypemap(tvptr.deref)), dump_lj_invalid)(tvptr)
+
+FRAME_TYPE = 0x3
+FRAME_P = 0x4
+FRAME_TYPEP = FRAME_TYPE | FRAME_P
+
+FRAME = {
+    'LUA': 0x0,
+    'C': 0x1,
+    'CONT': 0x2,
+    'VARG': 0x3,
+    'LUAP': 0x4,
+    'CP': 0x5,
+    'PCALL': 0x6,
+    'PCALLH': 0x7,
+}
+
+def frametypes(ft):
+    return {
+        FRAME['LUA']  : 'L',
+        FRAME['C']    : 'C',
+        FRAME['CONT'] : 'M',
+        FRAME['VARG'] : 'V',
+    }.get(ft, '?')
+
+def bc_a(ins):
+    return (ins >> 8) & 0xff
+
+def frame_ftsz(framelink):
+    return cast('ptrdiff_t', framelink.ftsz if LJ_FR2 \
+        else framelink.fr.tp.ftsz)
+
+def frame_pc(framelink):
+    return cast(BCInsPtr, frame_ftsz(framelink.deref)) if LJ_FR2 \
+        else mref(BCInsPtr, framelink.fr.tp.pcr)
+
+def frame_prevl(framelink):
+    process = target.GetProcess()
+    thread = process.GetSelectedThread()
+    frame = thread.GetSelectedFrame()
+    return framelink - (1 + LJ_FR2 + bc_a(frame.EvaluateExpression('((BCIns *)' + str(frame_pc(framelink)) + ')[-1]').unsigned))
+
+def frame_ispcall(framelink):
+    return (frame_ftsz(framelink).unsigned & FRAME['PCALL']) == FRAME['PCALL']
+
+def frame_sized(framelink):
+    return (frame_ftsz(framelink).unsigned & ~FRAME_TYPEP)
+
+def frame_prevd(framelink):
+    return framelink - frame_sized(framelink.deref)
+
+def frame_type(framelink):
+    return frame_ftsz(framelink).unsigned & FRAME_TYPE
+
+def frame_typep(framelink):
+    return frame_ftsz(framelink).unsigned & FRAME_TYPEP
+
+def frame_islua(framelink):
+    return frametypes(frame_type(framelink)) == 'L' \
+        and frame_ftsz(framelink).unsigned > 0
+
+def frame_prev(framelink):
+    return frame_prevl(framelink) if frame_islua(framelink.deref) \
+        else frame_prevd(framelink)
+
+def dump_framelink(L, fr):
+    fr2 = fr + LJ_FR2
+
+    return '{fr}{padding} [    ] FRAME: [{pp}] delta={d}, {f}\n'.format(
+        fr = strx64(fr),
+        padding = ':{fr2: <{width}}'.format(fr2 = strx64(fr2), width=len(PADDING) - 1) if LJ_FR2 else PADDING,
+        pp = 'PP' if frame_ispcall(fr2.deref) else '{frname}{p}'.format(
+            frname = frametypes(frame_type(fr2.deref)),
+            p = 'P' if frame_typep(fr2.deref) & FRAME_P else ''
+        ),
+        d = fr2 - frame_prev(fr2),
+        f = dump_lj_tfunc(fr),
+    )
+
+def dump_stack_slot(L, slot, base=None, top=None, eol='\n'):
+    base = base or L.base
+    top = top or L.top
+
+    return '{addr}{padding} [ {B}{T}{M}] VALUE: {value}{eol}'.format(
+        addr = strx64(slot),
+        padding = PADDING,
+        B = 'B' if slot == base else ' ',
+        T = 'T' if slot == top else ' ',
+        M = 'M' if slot == mref(TValuePtr, L.maxstack) else ' ',
+        value = dump_tvalue(slot),
+        eol = eol,
+    )
+
+def dump_stack(L, base=None, top=None):
+    base = base or L.base
+    top = top or L.top
+    stack = mref(TValuePtr, L.stack)
+    maxstack = mref(TValuePtr, L.maxstack)
+    red = 5 + 2 * LJ_FR2
+
+    dump = '\n'.join([
+        '{padding} Red zone: {nredslots: >2} slots {padding}'.format(
+            padding = '-' * len(PADDING),
+            nredslots = red,
+        ),
+        *(
+            dump_stack_slot(L, maxstack + offset, base, top, '')
+                for offset in range(red, 0, -1)
+        ),
+        '{padding} Stack: {nstackslots: >5} slots {padding}'.format(
+            padding = '-' * len(PADDING),
+            nstackslots = int((maxstack - stack) >> 3),
+        ),
+        dump_stack_slot(L, maxstack, base, top, ''),
+        '{start}:{end: <{width}} [    ] {nfreeslots} slots: Free stack slots'.format(
+            start = strx64(top + 1),
+            end = strx64(maxstack - 1),
+            width = len(PADDING) - 1,
+            nfreeslots = int((maxstack - top - 8) >> 3),
+        ),
+    ]) + '\n'
+
+    slot = top
+    framelink = base - (1 + LJ_FR2)
+
+    # XXX: Lua stack unwinding algorithm consists of the following steps:
+    # 1. dump all data slots in the (framelink, top) interval
+    # 2. check whether there are remaining frames
+    # 3. if there are no slots further, stop the unwinding loop
+    # 4. otherwise, resolve the next framelink and top and go to (1)
+    #
+    # Postcondition (i.e. do-while) loops is the most fitting idiom for such
+    # case, but Python doesn't provide such lexical construction. Hence step (1)
+    # is unrolled for the topmost stack frame.
+    while slot > framelink + LJ_FR2:
+        dump += dump_stack_slot(L, slot, base, top)
+        slot -= 1
+
+    while framelink > stack:
+        assert slot == framelink + LJ_FR2, "Invalid slot during frame unwind"
+        dump += dump_framelink(L, framelink)
+        framelink = frame_prev(framelink + LJ_FR2) - LJ_FR2
+        slot -= 1 + LJ_FR2
+        while slot > framelink + LJ_FR2:
+            dump += dump_stack_slot(L, slot, base, top)
+            slot -= 1
+
+    assert slot == framelink + LJ_FR2, "Invalid slot after frame unwind"
+    # Skip a nil slot for the last frame for 2-slot frames.
+    slot -= LJ_FR2
+
+    dump += '{fr}{padding} [S   ] FRAME: dummy L'.format(
+        fr = strx64(slot),
+        padding = ':{nilslot: <{offset}}'.format(nilslot = strx64(slot + 1), offset=len(PADDING) - 1) if LJ_FR2 else PADDING
+    )
+
+    return dump
+
+
+
+class LJDumpTValue(Command):
+    '''
+lj-tv <TValue *>
+
+The command receives a pointer to <tv> (TValue address) and dumps
+the type and some info related to it.
+
+* LJ_TNIL: nil
+* LJ_TFALSE: false
+* LJ_TTRUE: true
+* LJ_TLIGHTUD: light userdata @ <gcr>
+* LJ_TSTR: string <string payload> @ <gcr>
+* LJ_TUPVAL: upvalue @ <gcr>
+* LJ_TTHREAD: thread @ <gcr>
+* LJ_TPROTO: proto @ <gcr>
+* LJ_TFUNC: <LFUNC|CFUNC|FFUNC>
+  <LFUNC>: Lua function @ <gcr>, <nupvals> upvalues, <chunk:line>
+  <CFUNC>: C function <mcode address>
+  <FFUNC>: fast function #<ffid>
+* LJ_TTRACE: trace <traceno> @ <gcr>
+* LJ_TCDATA: cdata @ <gcr>
+* LJ_TTAB: table @ <gcr> (asize: <asize>, hmask: <hmask>)
+* LJ_TUDATA: userdata @ <gcr>
+* LJ_TNUMX: number <numeric payload>
+
+Whether the type of the given address differs from the listed above, then
+error message occurs.
+    '''
+    command = 'lj-tv'
+
+    @property
+    def argument_parser(self):
+        parser = super(LJDumpTValue, self).argument_parser
+
+        parser.add_argument('tv', nargs=1, type=str, default=None)
+
+        return parser
+
+
+    def execute(self, debugger, args, result):
+        global target
+        expr = args.tv[0]
+        target = debugger.GetSelectedTarget()
+        process = target.GetProcess()
+        thread = process.GetSelectedThread()
+        frame = thread.GetSelectedFrame()
+
+        tvptr = TValuePtr(frame.EvaluateExpression(expr))
+        print('{}\n'.format(dump_tvalue(tvptr)))
+
+
+
+class LJState(Command):
+    '''
+lj-state
+The command requires no args and dumps current VM and GC states
+* VM state: <INTERP|C|GC|EXIT|RECORD|OPT|ASM|TRACE>
+* GC state: <PAUSE|PROPAGATE|ATOMIC|SWEEPSTRING|SWEEP|FINALIZE|LAST>
+* JIT state: <IDLE|ACTIVE|RECORD|START|END|ASM|ERR>
+    '''
+
+    command = 'lj-state'
+
+    def execute(self, debugger, args, result):
+        global target
+        target = debugger.GetSelectedTarget()
+        g = G(L(None))
+        print('{}\n'.format('\n'.join(
+            map(lambda t: '{} state: {}'.format(*t), {
+                'VM': vm_state(g),
+                'GC': gc_state(g),
+                'JIT': jit_state(g),
+            }.items())
+        )))
+
+class LJDumpArch(Command):
+    '''
+lj-arch
+
+The command requires no args and dumps values of LJ_64 and LJ_GC64
+compile-time flags. These values define the sizes of host and GC
+pointers respectively.
+    '''
+    command = 'lj-arch'
+
+    def execute(self, debugger, args, result):
+        print(
+            'LJ_64: {LJ_64}, LJ_GC64: {LJ_GC64}, LJ_DUALNUM: {LJ_DUALNUM}\n'
+            .format(
+                LJ_64 = LJ_64,
+                LJ_GC64 = LJ_GC64,
+                LJ_DUALNUM = LJ_DUALNUM
+            )
+        )
+
+class LJGC(Command):
+    '''
+lj-gc
+
+The command requires no args and dumps current GC stats:
+* total: <total number of allocated bytes in GC area>
+* threshold: <limit when gc step is triggered>
+* debt: <how much GC is behind schedule>
+* estimate: <estimate of memory actually in use>
+* stepmul: <incremental GC step granularity>
+* pause: <pause between successive GC cycles>
+* sweepstr: <sweep position in string table>
+* root: <number of all collectable objects>
+* gray: <number of gray objects>
+* grayagain: <number of objects for atomic traversal>
+* weak: <number of weak tables (to be cleared)>
+* mmudata: <number of udata|cdata to be finalized>
+    '''
+    command = 'lj-gc'
+
+    def execute(self, debugger, args, result):
+        global target
+        target = debugger.GetSelectedTarget()
+        g = G(L(None))
+        print('GC stats: {state}\n{stats}\n'.format(
+            state = gc_state(g),
+            stats = dump_gc(g)
+        ))
+
+class LJDumpString(Command):
+    '''
+lj-str <GCstr *>
+
+The command receives a <gcr> of the corresponding GCstr object and dumps
+the payload, size in bytes and hash.
+
+*Caveat*: Since Python 2 provides no native Unicode support, the payload
+is replaced with the corresponding error when decoding fails.
+    '''
+    command = 'lj-str'
+
+    @property
+    def argument_parser(self):
+        parser = super(LJDumpString, self).argument_parser
+        parser.add_argument('gcr', nargs=1, type=str, default=None)
+        return parser
+
+    def execute(self, debugger, args, result):
+        global target
+        expr = args.gcr[0]
+        target = debugger.GetSelectedTarget()
+        process = target.GetProcess()
+        thread = process.GetSelectedThread()
+        frame = thread.GetSelectedFrame()
+
+        string_ptr = GCstrPtr(frame.EvaluateExpression(expr))
+        print("String: {body} [{len} bytes] with hash {hash}\n".format(
+            body = strdata(string_ptr),
+            hash = strx64(string_ptr.deref.hash),
+            len = string_ptr.deref.len,
+        ))
+
+class LJDumpTable(Command):
+    '''
+lj-tab <GCtab *>
+
+The command receives a GCtab adress and dumps the table contents:
+* Metatable address whether the one is set
+* Array part <asize> slots:
+  <aslot ptr>: [<index>]: <tv>
+* Hash part <hsize> nodes:
+  <hnode ptr>: { <tv> } => { <tv> }; next = <next hnode ptr>
+    '''
+    command = 'lj-tab'
+
+    @property
+    def argument_parser(self):
+        parser = super(LJDumpTable, self).argument_parser
+        parser.add_argument('gctab', nargs=1, type=str, default=None)
+        return parser
+
+    def execute(self, debugger, args, result):
+        global target
+        expr = args.gctab[0]
+        target = debugger.GetSelectedTarget()
+        process = target.GetProcess()
+        thread = process.GetSelectedThread()
+        frame = thread.GetSelectedFrame()
+
+        t = GCtabPtr(frame.EvaluateExpression(expr))
+        array = mref(TValuePtr, t.deref.array)
+        nodes = mref(NodePtr, t.deref.node)
+        mt = gcval(t.deref.metatable)
+        capacity = {
+            'apart': int(t.deref.asize),
+            'hpart': int(t.deref.hmask + 1) if t.deref.hmask > 0 else 0
+        }
+
+        if mt:
+            print('Metatable detected: {}\n'.format(strx64(mt)))
+
+        print('Array part: {} slots\n'.format(capacity['apart']))
+        for i in range(capacity['apart']):
+            slot = array + i
+            print('{ptr}: [{index}]: {value}\n'.format(
+                ptr = strx64(slot),
+                index = i,
+                value = dump_tvalue(slot)
+            ))
+
+        print('Hash part: {} nodes\n'.format(capacity['hpart']))
+        # See hmask comment in lj_obj.h
+        for i in range(capacity['hpart']):
+            node = nodes + i
+            print('{ptr}: {{ {key} }} => {{ {val} }}; next = {n}\n'.format(
+                ptr = strx64(node),
+                key = dump_tvalue(TValuePtr(node.deref.key.addr)),
+                val= dump_tvalue(TValuePtr(node.deref.val.addr)),
+                n = strx64(mref(NodePtr, node.deref.next))
+            ))
+
+class LJDumpStack(Command):
+    '''
+lj-stack [<lua_State *>]
+
+The command receives a lua_State address and dumps the given Lua
+coroutine guest stack:
+
+<slot ptr> [<slot attributes>] <VALUE|FRAME>
+
+* <slot ptr>: guest stack slot address
+* <slot attributes>:
+  - S: Bottom of the stack (the slot L->stack points to)
+  - B: Base of the current guest frame (the slot L->base points to)
+  - T: Top of the current guest frame (the slot L->top points to)
+  - M: Last slot of the stack (the slot L->maxstack points to)
+* <VALUE>: see help lj-tv for more info
+* <FRAME>: framelink slot differs from the value slot: it contains info
+  related to the function being executed within this guest frame, its
+  type and link to the parent guest frame
+  [<frame type>] delta=<slots in frame>, <lj-tv for LJ_TFUNC slot>
+  - <frame type>:
+    + L:  VM performs a call as a result of bytecode execution
+    + C:  VM performs a call as a result of lj_vm_call
+    + M:  VM performs a call to a metamethod as a result of bytecode
+          execution
+    + V:  Variable-length frame for storing arguments of a variadic
+          function
+    + CP: Protected C frame
+    + PP: VM performs a call as a result of executinig pcall or xpcall
+
+If L is ommited the main coroutine is used.
+    '''
+    command = 'lj-stack'
+
+    @property
+    def argument_parser(self):
+        parser = super(LJDumpStack, self).argument_parser
+        parser.add_argument('lua_State', nargs='?', type=str, default=[None])
+        return parser
+
+    def execute(self, debugger, args, result):
+        global target
+        expr = args.lua_State[0]
+        target = debugger.GetSelectedTarget()
+        process = target.GetProcess()
+        thread = process.GetSelectedThread()
+        frame = thread.GetSelectedFrame()
+
+        l = frame.EvaluateExpression(expr) if expr else None
+
+        print('{}\n'.format(dump_stack(L(l))))
+
+
+
+
+
+def register_commands(debugger):
+    for cls in Command.__subclasses__():
+        debugger.HandleCommand(
+            'command script add -c luajit_lldb.{cls} {command}'.format(
+                cls=cls.__name__,
+                command=cls.command,
+            )
+        )
+
+def configure(debugger):
+    global LJ_64, LJ_GC64, LJ_FR2, LJ_DUALNUM, PADDING, LJ_TISNUM
+    target = debugger.GetSelectedTarget()
+    module = target.modules[0]
+    LJ_DUALNUM = module.FindSymbol('lj_lib_checknumber') != None
+
+    try:
+        irtype_enum = target.FindFirstType('IRType').enum_members
+        for member in irtype_enum:
+            if member.name == 'IRT_PTR':
+                LJ_64 = member.unsigned & 0x1f == IRT_P64
+            if member.name == 'IRT_PGC':
+                LJ_FR2 = LJ_GC64 = member.unsigned & 0x1f == IRT_P64
+    except:
+        print('luajit-lldb.py failed to load: '
+              'no debugging symbols found for libluajit\n')
+
+    PADDING = ' ' * len(':' + hex((1 << (47 if LJ_GC64 else 32)) - 1))
+    LJ_TISNUM = 0xfffeffff if LJ_64 and not LJ_GC64 else LJ_T['NUMX']
+
+
+
+def __lldb_init_module(debugger, internal_dict):
+    configure(debugger)
+    register_commands(debugger)
-- 
2.32.1 (Apple Git-133)



More information about the Tarantool-discussions mailing list