Hi, Sergey,

thanks for the patch! LGTM with minor comments.

Sergey

On 6/25/26 23:29, Sergey Kaplun wrote:
> This patch adds dumpers for a single IR instruction (`lj-ir`), as well
> as for all bytecodes inside one trace (`lj-trace`). Its dump is quite
> similar to the -jdump flag but also reports types of register operands
> (`ref`, `lit`, `cst`) and operation mode (`N`, `A`, `W`, etc.).
> The `lj-trace` command accepts optional /rs flags to dump registers
> associated with IR and snapshots for the trace correspondingly.
> The `lj-ir` command can be used for dumping IR constants as well.
> The `lj-jslots` command dumps the content of `J->slot`. It is useful to
> simplify debugging of `rec_check_slots()` assertion failures.
>
> For LLDB value, the `__getitem__` metamethod now accepts bool keys.
> Also, `__index__` is set to allow lldb.value to be used as an index
> without explicit conversion to int. Old GDB versions (below 7.12) are
> not supported because of the gdb.Value lacks the `__index__` metamethod
> and can't be monkey-patched. The support for these versions may be added
> by demand.
>
> Part of tarantool/tarantool#4808
> ---
>   src/luajit_dbg.py                             | 1216 ++++++++++++++++-
>   .../debug-extension-tests.py                  |  365 +++++
>   2 files changed, 1570 insertions(+), 11 deletions(-)
>
> diff --git a/src/luajit_dbg.py b/src/luajit_dbg.py
> index 2edb199a..fd6ca8a5 100644
> --- a/src/luajit_dbg.py
> +++ b/src/luajit_dbg.py
> @@ -58,6 +58,26 @@ class Debugger(object):
>               self.LLDB = True
>               return super(Debugger, self).__new__(_LLDBDebugger)
>   
> +    def parse_flags(self, raw_flags, permitted_flags):
> +        flags = {}
> +        for flag in raw_flags:
> +            if flag not in permitted_flags:
> +                raise self.error('Unrecongnized option: "{}"'.format(flag))
typo: s/Unrecongnized/Unrecognized/
> +            flags[flag] = True
> +        return flags
> +
> +    def extract_flags(self, arg, permitted_flags):
> +        if not arg:
> +            return None, None
> +        flags = {}
> +        if arg.startswith('/'):
> +            match = re.match(r'/(\S*)\s+(.*)$', arg)
> +            if not match:
> +                return arg, flags
> +            raw_flags, arg = match.group(1, 2)
> +            flags = self.parse_flags(raw_flags, permitted_flags)
> +        return arg, flags
> +
>       def configure(self):
>           global PADDING, LJ_TISNUM
>           if not self.check_libluajit():
> @@ -70,6 +90,17 @@ class Debugger(object):
>               self.write('luajit_dbg.py failed to load: '
>                          'no debugging symbols found for libluajit\n')
>               return False
> +
> +        # Setup arch.
> +        try:
> +            self.arch = str(self.eval('LJ_ARCH_NAME')).split('"')[1]
> +        except Exception:
> +            try:
> +                self.arch = self.detect_arch()
> +            except Exception:
> +                # Setup on demand if necessary.
> +                pass
> +
>           return True
>   
>       def initialize_extension(self, commands):
> @@ -99,21 +130,42 @@ class Debugger(object):
>           '''Return the content of the string by the given pointer.'''
>           pass
>   
> +    @abc.abstractmethod
> +    def address(self, obj):
> +        '''Return the address in memory of the given object.'''
> +        pass
> +
>       @abc.abstractmethod
>       def lookup_global(self, symbol):
>           '''Look up the global C symbol by the given name.'''
>           pass
>   
> +    @abc.abstractmethod
> +    def member_by_offset(self, typename, offset, prev_name=None):
> +        '''Look up the global C symbol by the given name.'''
> +        pass
> +
>       @abc.abstractmethod
>       def eval(self, command):
>           '''Parse and evaluate the given debugger command.'''
>           pass
>   
> +    @abc.abstractmethod
> +    def detect_arch(self):
> +        '''Detect the CPU architecture and canonicalize it to the LuaJIT
> +        notation.'''
> +        pass
> +
>       @abc.abstractmethod
>       def write(self, msg):
>           '''Print the message.'''
>           pass
>   
> +    @abc.abstractmethod
> +    def error(self, msg):
> +        '''Create the error object with message.'''
> +        pass
> +
>       @abc.abstractmethod
>       def check_libluajit(self):
>           '''Check that libluajit is loaded.
> @@ -172,10 +224,50 @@ class _GDBDebugger(Debugger):
>           # A string is printed with a pointer to it. Just strip it.
>           return re.sub(r'^0x[a-f0-9]+\s+(?=")', '', str(strptr))
>   
> +    def address(self, obj):
> +        return obj.address
> +
>       def lookup_global(self, symbol):
>           variable, _ = gdb.lookup_symbol(symbol)
>           return variable.value() if variable else None
>   
> +    def member_by_offset(self, tp, offset, prev_name=None):
> +        if isinstance(tp, str):
> +            tp = self._dbgtype(tp)
> +        assert offset < tp.sizeof, 'offset is bigger than object size'
> +        if tp.code == gdb.TYPE_CODE_TYPEDEF:
> +            tp = tp.strip_typedefs()
> +        if tp.code == gdb.TYPE_CODE_STRUCT:
> +            fields = tp.fields()
> +            for n_field in range(len(fields)):
> +                islast = n_field == (len(fields) - 1)
> +                field = fields[n_field]
> +                start_field = field.bitpos / 8
may be //?
> +                end_field = fields[n_field + 1].bitpos / 8 if not islast \
> +                    else tp.sizeof
> +                if start_field <= offset and offset < end_field:
> +                    next_name = self.member_by_offset(
> +                        field.type,
> +                        offset - start_field,
> +                        prev_name=field.name
> +                    )
> +                    return '.{field}{suffix}'.format(
> +                        field=field.name,
> +                        suffix=next_name if next_name else ''
> +                    )
> +        elif tp.code == gdb.TYPE_CODE_ARRAY:
> +            # Get array field type.
> +            target = tp.target()
> +            tsize = target.sizeof
> +            idx = int(offset // tsize)
> +            next_name = self.member_by_offset(target, offset - idx * tsize)
> +            idxname = idx_name(prev_name)
> +            if idxname and idx in idxname:
> +                idx = idxname[idx]
> +            return '[{}]{}'.format(idx, next_name if next_name else '')
> +        else:
> +            return None
> +
>       def eval(self, command):
>           if not command:
>               return None
> @@ -185,9 +277,23 @@ class _GDBDebugger(Debugger):
>               raise gdb.GdbError('table argument empty')
>           return ret
>   
> +    def detect_arch(self):
> +        if hasattr(self, 'arch'):
> +            return self.arch
> +        target = str(gdb.execute('info target', False, True))
> +        if re.match('.*x86-64.*', target, flags=re.DOTALL):
> +            return 'x64'
> +        elif re.match('.*aarch64.*', target, flags=re.DOTALL):
> +            return 'arm64'
> +        else:
> +            return ''
> +
>       def write(self, msg):
>           gdb.write(msg)
>   
> +    def error(self, errmsg):
> +        return gdb.GdbError(errmsg)
> +
>       def check_libluajit(self):
>           # XXX Fragile: Though connecting the callback looks bad,
>           # it respects both Python 2 and Python 3 (see #4828).
> @@ -322,8 +428,26 @@ class _LLDBDebugger(Debugger):
>           def lldb__getitem__(lldbval, key):
>               if type(key) is lldb.value:
>                   key = int(key)
> +            if type(key) is bool:
> +                key = int(key)
>               if type(key) is int:
>                   # Allow array access.
> +                ltp = lldbval.sbvalue.GetType()
> +                # XXX: LLDB in versions 17 - 19 can't use an array
> +                # object as the initializer for `lldb.value` since
> +                # `GetValue()` for it returns `None` leading to
> +                # the invalid result. See
> +                #https://github.com/llvm/llvm-project/pull/90144.
> +                if (self.version < 17 or self.version > 19) or \
> +                   ltp.GetTypeClass() != lldb.eTypeClassArray:
> +                    pass
probably it is better to invert condition and remove section with "pass"
<snipped>
> +
> +
> +def ir_kptr(ir):
> +    irname = IRS[ir['o']]
> +    assert irname == 'KPTR' or irname == 'KKPTR', 'wrong IR for ir_iptr()'
typo: s/ir_iptr()/ir_kptr() or ir_kkptr()/
> +    return mref('void *', dbg.cast('IRIns *', dbg.address(ir))[LJ_GC64]['ptr'])
> +
> +
> +def ir_kgc(ir):
> +    irname = IRS[ir['o']]
> +    assert irname == 'KGC', 'wrong IR for ir_kgc()'
> +    return gcref(dbg.cast('IRIns *', dbg.address(ir))[LJ_GC64]['gcr'])
> +
> +
> +def ir_knum(ir):
> +    irname = IRS[ir['o']]
> +    assert irname == 'KNUM', 'wrong IR for ir_knum()'
> +    return dbg.address(dbg.cast('IRIns *', dbg.address(ir))[1]['tv'])
> +
> +
> +def ir_kint64(ir):
> +    irname = IRS[ir['o']]
> +    assert irname == 'KINT64', 'wrong IR for ir_knum()'
typo: s/ir_knum/ir_kint64/
> +    return dbg.address(dbg.cast('IRIns *', dbg.address(ir))[1]['tv'])
> +
> +
>   # Dumpers.
>   
>   # GCobj dumpers.
> @@ -1467,6 +2281,325 @@ def dump_func(func):
>           return 'fast function #{}\n'.format(int(ffid))
>   
>   
> +# JIT dumpers.
> +
> +
> +def dump_call_func(trace, callop):
> +    ctype = ''
> +    if callop > 0:
> +        ir = trace['ir'][REF_BIAS + callop]
> +        if IRTYPES[irt_type(ir['t'])] == 'nil':  # nil == CARG(func, ctype)
> +            callop = int(ir['op1']) - REF_BIAS
> +            cdt_idx_irk = trace['ir'][ir['op2']]
> +            assert IRS[cdt_idx_irk['o']] == 'KINT', \
> +                   'unexpected IR for ctype storage'
> +            ctype_idx = cdt_idx_irk['i']
> +            ctype = 'ctype: {}'.format(ctype_idx)
> +
> +    func_str = ''
> +    if callop < 0:
> +        irk = trace['ir'][REF_BIAS + callop]
> +        assert IRS[irk['o']] == 'KINT64', \
> +               'unexpected IR for FFI function storage'
> +        func_addr = int(ir_kint64(irk)['u64'])
> +        # TODO: Symbol demangling.
> +        func_str = '[{:#x}]'.format(func_addr)
> +    else:
> +        func_str = '[{:04d}]'.format(callop)
> +
> +    return func_str, ctype
> +
> +
> +def dump_call_args(trace, ins):
> +    if ins < 0:
> +        return '{{{}}}'.format(dump_irk(trace, ins))
> +    else:
> +        ir = trace['ir'][REF_BIAS + ins]
> +        irname = IRS[ir['o']]
> +        if irname == 'CARG':
> +            last_arg = ''
> +            args = dump_call_args(trace, int(ir['op1']) - REF_BIAS)
> +            op2 = int(ir['op2']) - REF_BIAS
> +            if op2 < 0:
> +                last_arg = '{{{}}}'.format(dump_irk(trace, op2))
> +            else:
> +                last_arg = '{{{:04d}}}'.format(op2)
> +            return args + ', ' + last_arg
> +        else:
> +            return '{{{:04d}}}'.format(ins)
> +
> +
> +# Special FP constant.
> +CONST_BIAS = 2 ** 52 + 2 ** 51
> +
> +
> +def dump_irk(trace, idx):
> +    ref = idx + REF_BIAS
> +    assert ref >= trace['nk'] and ref < REF_BIAS, 'bad constant in IR dump'
> +    irins = trace['ir'][ref]
> +    irname = IRS[irins['o']]
> +    slot = ''
> +    if irname == 'KSLOT':
> +        slot = ' KSLOT: @{}'.format(int(irins['op2']))
> +        irins = trace['ir'][irins['op1']]
> +        irname = IRS[irins['o']]
> +
> +    irtype = irins['t']
> +    if irname == 'KPRI':
> +        typename = typenames(irt_toitype(irtype))
> +        # Trivial dump for primitives.
> +        irk = tv_dumpers.get(
> +            typename, dump_lj_tv_invalid  # noqa: F821 # Generated.
> +        )(0)
> +    elif irname == 'KINT':
> +        irk = 'integer {}'.format(dbg.cast('int32_t', irins['i']))
> +    elif irname == 'KGC':
> +        typename = typenames(irt_toitype(irtype))
> +        irk = gco_dumpers.get(typename, dump_lj_gco_invalid)(ir_kgc(irins))
> +    elif irname == 'KKPTR':
> +        addr = ir_kptr(irins)
> +        if addr == dbg.address(G(L())['nilnode']):
> +            return '[g->nilnode]' + slot
> +        irk = '[{}]'.format(strx64(addr))
> +    elif irname == 'KPTR':
> +        irk = '[{}]'.format(strx64(ir_kptr(irins)))
> +    elif irname == 'KNULL':
> +        irk = 'NULL'
> +    elif irname == 'KNUM':
> +        tv_num = ir_knum(irins)
> +        if float(tv_num['n']) == CONST_BIAS:
> +            return 'bias'
> +        irk = dump_lj_tv_numx(tv_num)
> +    elif irname == 'KINT64':
> +        irk = 'int64_t {}'.format(dbg.cast(
> +            'int64_t', int(ir_kint64(irins)['u64'])
> +        ))
> +    else:
> +        return 'Unknown IRK: ' + irname
> +    return irk + slot
> +
> +
<snipped>
> +
> +def dump_snap(trace, snapno, snap):
> +    dump = 'SNAP   #{:<3d} ['.format(snapno)
> +    snap_map = dbg.address(trace['snapmap'][snap['mapofs']])
> +    snap_entry_num = 0
> +    for slot in range(0, snap['nslots']):
> +        dump += ' '
> +        snap_entry = int(snap_map[snap_entry_num])
> +        if snap_entry_num < snap['nent'] and snap_entry >> TREF_SHIFT == slot:
> +            snap_entry_num += 1
> +            ref = int((snap_entry & TREF_REFMASK) - REF_BIAS)
> +            if ref < 0:
> +                if int(snap_entry) == 0x1057fff:
magic number
<snipped>
> +# Assume not cross-platform debugging.
> +machine = os.uname().machine
> +if machine == 'x86_64':
> +    RX_GPR = r'r\w\w'
> +    RX_FPR = r'xmm\d+'
> +elif machine == 'arm64' or machine == 'aarch64':
> +    RX_GPR = r'x\d+'
> +    RX_FPR = r'd\d+'
> +else:
> +    raise Exception('Unknown archeticture in testing')
typo: s/archeticture/architecture/
<snipped>
> +
> +class TestLJIRConst(TestCaseBase):
> +    location = 'trace_stop'
> +
> +    # No narrowing of 42.
> +    if IS_DUALNUM:
> +        # KNUM occupies 2 slots.
> +        _knum_irnum = '6'
> +        _kgc_irnum = '8' if IS_GC64 else '7'
> +        _kptr_irnum = '10' if IS_GC64 else '8'
> +    else:
> +        # KNUM occupies 2 slots.
> +        _knum_irnum = '8'
> +        _kgc_irnum = '10' if IS_GC64 else '9'
> +        _kptr_irnum = '12' if IS_GC64 else '10'
both branches contains the same comment, is it a typo or not?
> <snipped>
>