Hi, Sergey, thanks for the patch! LGTM with minor comments. Sergey On 6/25/26 23:29, Sergey Kaplun wrote: > This patch adds dumpers for a single IR instruction (`lj-ir`), as well > as for all bytecodes inside one trace (`lj-trace`). Its dump is quite > similar to the -jdump flag but also reports types of register operands > (`ref`, `lit`, `cst`) and operation mode (`N`, `A`, `W`, etc.). > The `lj-trace` command accepts optional /rs flags to dump registers > associated with IR and snapshots for the trace correspondingly. > The `lj-ir` command can be used for dumping IR constants as well. > The `lj-jslots` command dumps the content of `J->slot`. It is useful to > simplify debugging of `rec_check_slots()` assertion failures. > > For LLDB value, the `__getitem__` metamethod now accepts bool keys. > Also, `__index__` is set to allow lldb.value to be used as an index > without explicit conversion to int. Old GDB versions (below 7.12) are > not supported because of the gdb.Value lacks the `__index__` metamethod > and can't be monkey-patched. The support for these versions may be added > by demand. > > Part of tarantool/tarantool#4808 > --- > src/luajit_dbg.py | 1216 ++++++++++++++++- > .../debug-extension-tests.py | 365 +++++ > 2 files changed, 1570 insertions(+), 11 deletions(-) > > diff --git a/src/luajit_dbg.py b/src/luajit_dbg.py > index 2edb199a..fd6ca8a5 100644 > --- a/src/luajit_dbg.py > +++ b/src/luajit_dbg.py > @@ -58,6 +58,26 @@ class Debugger(object): > self.LLDB = True > return super(Debugger, self).__new__(_LLDBDebugger) > > + def parse_flags(self, raw_flags, permitted_flags): > + flags = {} > + for flag in raw_flags: > + if flag not in permitted_flags: > + raise self.error('Unrecongnized option: "{}"'.format(flag)) typo: s/Unrecongnized/Unrecognized/ > + flags[flag] = True > + return flags > + > + def extract_flags(self, arg, permitted_flags): > + if not arg: > + return None, None > + flags = {} > + if arg.startswith('/'): > + match = re.match(r'/(\S*)\s+(.*)$', arg) > + if not match: > + return arg, flags > + raw_flags, arg = match.group(1, 2) > + flags = self.parse_flags(raw_flags, permitted_flags) > + return arg, flags > + > def configure(self): > global PADDING, LJ_TISNUM > if not self.check_libluajit(): > @@ -70,6 +90,17 @@ class Debugger(object): > self.write('luajit_dbg.py failed to load: ' > 'no debugging symbols found for libluajit\n') > return False > + > + # Setup arch. > + try: > + self.arch = str(self.eval('LJ_ARCH_NAME')).split('"')[1] > + except Exception: > + try: > + self.arch = self.detect_arch() > + except Exception: > + # Setup on demand if necessary. > + pass > + > return True > > def initialize_extension(self, commands): > @@ -99,21 +130,42 @@ class Debugger(object): > '''Return the content of the string by the given pointer.''' > pass > > + @abc.abstractmethod > + def address(self, obj): > + '''Return the address in memory of the given object.''' > + pass > + > @abc.abstractmethod > def lookup_global(self, symbol): > '''Look up the global C symbol by the given name.''' > pass > > + @abc.abstractmethod > + def member_by_offset(self, typename, offset, prev_name=None): > + '''Look up the global C symbol by the given name.''' > + pass > + > @abc.abstractmethod > def eval(self, command): > '''Parse and evaluate the given debugger command.''' > pass > > + @abc.abstractmethod > + def detect_arch(self): > + '''Detect the CPU architecture and canonicalize it to the LuaJIT > + notation.''' > + pass > + > @abc.abstractmethod > def write(self, msg): > '''Print the message.''' > pass > > + @abc.abstractmethod > + def error(self, msg): > + '''Create the error object with message.''' > + pass > + > @abc.abstractmethod > def check_libluajit(self): > '''Check that libluajit is loaded. > @@ -172,10 +224,50 @@ class _GDBDebugger(Debugger): > # A string is printed with a pointer to it. Just strip it. > return re.sub(r'^0x[a-f0-9]+\s+(?=")', '', str(strptr)) > > + def address(self, obj): > + return obj.address > + > def lookup_global(self, symbol): > variable, _ = gdb.lookup_symbol(symbol) > return variable.value() if variable else None > > + def member_by_offset(self, tp, offset, prev_name=None): > + if isinstance(tp, str): > + tp = self._dbgtype(tp) > + assert offset < tp.sizeof, 'offset is bigger than object size' > + if tp.code == gdb.TYPE_CODE_TYPEDEF: > + tp = tp.strip_typedefs() > + if tp.code == gdb.TYPE_CODE_STRUCT: > + fields = tp.fields() > + for n_field in range(len(fields)): > + islast = n_field == (len(fields) - 1) > + field = fields[n_field] > + start_field = field.bitpos / 8 may be //? > + end_field = fields[n_field + 1].bitpos / 8 if not islast \ > + else tp.sizeof > + if start_field <= offset and offset < end_field: > + next_name = self.member_by_offset( > + field.type, > + offset - start_field, > + prev_name=field.name > + ) > + return '.{field}{suffix}'.format( > + field=field.name, > + suffix=next_name if next_name else '' > + ) > + elif tp.code == gdb.TYPE_CODE_ARRAY: > + # Get array field type. > + target = tp.target() > + tsize = target.sizeof > + idx = int(offset // tsize) > + next_name = self.member_by_offset(target, offset - idx * tsize) > + idxname = idx_name(prev_name) > + if idxname and idx in idxname: > + idx = idxname[idx] > + return '[{}]{}'.format(idx, next_name if next_name else '') > + else: > + return None > + > def eval(self, command): > if not command: > return None > @@ -185,9 +277,23 @@ class _GDBDebugger(Debugger): > raise gdb.GdbError('table argument empty') > return ret > > + def detect_arch(self): > + if hasattr(self, 'arch'): > + return self.arch > + target = str(gdb.execute('info target', False, True)) > + if re.match('.*x86-64.*', target, flags=re.DOTALL): > + return 'x64' > + elif re.match('.*aarch64.*', target, flags=re.DOTALL): > + return 'arm64' > + else: > + return '' > + > def write(self, msg): > gdb.write(msg) > > + def error(self, errmsg): > + return gdb.GdbError(errmsg) > + > def check_libluajit(self): > # XXX Fragile: Though connecting the callback looks bad, > # it respects both Python 2 and Python 3 (see #4828). > @@ -322,8 +428,26 @@ class _LLDBDebugger(Debugger): > def lldb__getitem__(lldbval, key): > if type(key) is lldb.value: > key = int(key) > + if type(key) is bool: > + key = int(key) > if type(key) is int: > # Allow array access. > + ltp = lldbval.sbvalue.GetType() > + # XXX: LLDB in versions 17 - 19 can't use an array > + # object as the initializer for `lldb.value` since > + # `GetValue()` for it returns `None` leading to > + # the invalid result. See > + #https://github.com/llvm/llvm-project/pull/90144. > + if (self.version < 17 or self.version > 19) or \ > + ltp.GetTypeClass() != lldb.eTypeClassArray: > + pass probably it is better to invert condition and remove section with "pass" > + > + > +def ir_kptr(ir): > + irname = IRS[ir['o']] > + assert irname == 'KPTR' or irname == 'KKPTR', 'wrong IR for ir_iptr()' typo: s/ir_iptr()/ir_kptr() or ir_kkptr()/ > + return mref('void *', dbg.cast('IRIns *', dbg.address(ir))[LJ_GC64]['ptr']) > + > + > +def ir_kgc(ir): > + irname = IRS[ir['o']] > + assert irname == 'KGC', 'wrong IR for ir_kgc()' > + return gcref(dbg.cast('IRIns *', dbg.address(ir))[LJ_GC64]['gcr']) > + > + > +def ir_knum(ir): > + irname = IRS[ir['o']] > + assert irname == 'KNUM', 'wrong IR for ir_knum()' > + return dbg.address(dbg.cast('IRIns *', dbg.address(ir))[1]['tv']) > + > + > +def ir_kint64(ir): > + irname = IRS[ir['o']] > + assert irname == 'KINT64', 'wrong IR for ir_knum()' typo: s/ir_knum/ir_kint64/ > + return dbg.address(dbg.cast('IRIns *', dbg.address(ir))[1]['tv']) > + > + > # Dumpers. > > # GCobj dumpers. > @@ -1467,6 +2281,325 @@ def dump_func(func): > return 'fast function #{}\n'.format(int(ffid)) > > > +# JIT dumpers. > + > + > +def dump_call_func(trace, callop): > + ctype = '' > + if callop > 0: > + ir = trace['ir'][REF_BIAS + callop] > + if IRTYPES[irt_type(ir['t'])] == 'nil': # nil == CARG(func, ctype) > + callop = int(ir['op1']) - REF_BIAS > + cdt_idx_irk = trace['ir'][ir['op2']] > + assert IRS[cdt_idx_irk['o']] == 'KINT', \ > + 'unexpected IR for ctype storage' > + ctype_idx = cdt_idx_irk['i'] > + ctype = 'ctype: {}'.format(ctype_idx) > + > + func_str = '' > + if callop < 0: > + irk = trace['ir'][REF_BIAS + callop] > + assert IRS[irk['o']] == 'KINT64', \ > + 'unexpected IR for FFI function storage' > + func_addr = int(ir_kint64(irk)['u64']) > + # TODO: Symbol demangling. > + func_str = '[{:#x}]'.format(func_addr) > + else: > + func_str = '[{:04d}]'.format(callop) > + > + return func_str, ctype > + > + > +def dump_call_args(trace, ins): > + if ins < 0: > + return '{{{}}}'.format(dump_irk(trace, ins)) > + else: > + ir = trace['ir'][REF_BIAS + ins] > + irname = IRS[ir['o']] > + if irname == 'CARG': > + last_arg = '' > + args = dump_call_args(trace, int(ir['op1']) - REF_BIAS) > + op2 = int(ir['op2']) - REF_BIAS > + if op2 < 0: > + last_arg = '{{{}}}'.format(dump_irk(trace, op2)) > + else: > + last_arg = '{{{:04d}}}'.format(op2) > + return args + ', ' + last_arg > + else: > + return '{{{:04d}}}'.format(ins) > + > + > +# Special FP constant. > +CONST_BIAS = 2 ** 52 + 2 ** 51 > + > + > +def dump_irk(trace, idx): > + ref = idx + REF_BIAS > + assert ref >= trace['nk'] and ref < REF_BIAS, 'bad constant in IR dump' > + irins = trace['ir'][ref] > + irname = IRS[irins['o']] > + slot = '' > + if irname == 'KSLOT': > + slot = ' KSLOT: @{}'.format(int(irins['op2'])) > + irins = trace['ir'][irins['op1']] > + irname = IRS[irins['o']] > + > + irtype = irins['t'] > + if irname == 'KPRI': > + typename = typenames(irt_toitype(irtype)) > + # Trivial dump for primitives. > + irk = tv_dumpers.get( > + typename, dump_lj_tv_invalid # noqa: F821 # Generated. > + )(0) > + elif irname == 'KINT': > + irk = 'integer {}'.format(dbg.cast('int32_t', irins['i'])) > + elif irname == 'KGC': > + typename = typenames(irt_toitype(irtype)) > + irk = gco_dumpers.get(typename, dump_lj_gco_invalid)(ir_kgc(irins)) > + elif irname == 'KKPTR': > + addr = ir_kptr(irins) > + if addr == dbg.address(G(L())['nilnode']): > + return '[g->nilnode]' + slot > + irk = '[{}]'.format(strx64(addr)) > + elif irname == 'KPTR': > + irk = '[{}]'.format(strx64(ir_kptr(irins))) > + elif irname == 'KNULL': > + irk = 'NULL' > + elif irname == 'KNUM': > + tv_num = ir_knum(irins) > + if float(tv_num['n']) == CONST_BIAS: > + return 'bias' > + irk = dump_lj_tv_numx(tv_num) > + elif irname == 'KINT64': > + irk = 'int64_t {}'.format(dbg.cast( > + 'int64_t', int(ir_kint64(irins)['u64']) > + )) > + else: > + return 'Unknown IRK: ' + irname > + return irk + slot > + > + > + > +def dump_snap(trace, snapno, snap): > + dump = 'SNAP #{:<3d} ['.format(snapno) > + snap_map = dbg.address(trace['snapmap'][snap['mapofs']]) > + snap_entry_num = 0 > + for slot in range(0, snap['nslots']): > + dump += ' ' > + snap_entry = int(snap_map[snap_entry_num]) > + if snap_entry_num < snap['nent'] and snap_entry >> TREF_SHIFT == slot: > + snap_entry_num += 1 > + ref = int((snap_entry & TREF_REFMASK) - REF_BIAS) > + if ref < 0: > + if int(snap_entry) == 0x1057fff: magic number > +# Assume not cross-platform debugging. > +machine = os.uname().machine > +if machine == 'x86_64': > + RX_GPR = r'r\w\w' > + RX_FPR = r'xmm\d+' > +elif machine == 'arm64' or machine == 'aarch64': > + RX_GPR = r'x\d+' > + RX_FPR = r'd\d+' > +else: > + raise Exception('Unknown archeticture in testing') typo: s/archeticture/architecture/ > + > +class TestLJIRConst(TestCaseBase): > + location = 'trace_stop' > + > + # No narrowing of 42. > + if IS_DUALNUM: > + # KNUM occupies 2 slots. > + _knum_irnum = '6' > + _kgc_irnum = '8' if IS_GC64 else '7' > + _kptr_irnum = '10' if IS_GC64 else '8' > + else: > + # KNUM occupies 2 slots. > + _knum_irnum = '8' > + _kgc_irnum = '10' if IS_GC64 else '9' > + _kptr_irnum = '12' if IS_GC64 else '10' both branches contains the same comment, is it a typo or not? > >