Hi, Sergey,
thanks for the patch! LGTM with minor comments.
Sergey
This patch adds dumpers for a single IR instruction (`lj-ir`), as well
as for all bytecodes inside one trace (`lj-trace`). Its dump is quite
similar to the -jdump flag but also reports types of register operands
(`ref`, `lit`, `cst`) and operation mode (`N`, `A`, `W`, etc.).
The `lj-trace` command accepts optional /rs flags to dump registers
associated with IR and snapshots for the trace correspondingly.
The `lj-ir` command can be used for dumping IR constants as well.
The `lj-jslots` command dumps the content of `J->slot`. It is useful to
simplify debugging of `rec_check_slots()` assertion failures.
For LLDB value, the `__getitem__` metamethod now accepts bool keys.
Also, `__index__` is set to allow lldb.value to be used as an index
without explicit conversion to int. Old GDB versions (below 7.12) are
not supported because of the gdb.Value lacks the `__index__` metamethod
and can't be monkey-patched. The support for these versions may be added
by demand.
Part of tarantool/tarantool#4808
---
src/luajit_dbg.py | 1216 ++++++++++++++++-
.../debug-extension-tests.py | 365 +++++
2 files changed, 1570 insertions(+), 11 deletions(-)
diff --git a/src/luajit_dbg.py b/src/luajit_dbg.py
index 2edb199a..fd6ca8a5 100644
--- a/src/luajit_dbg.py
+++ b/src/luajit_dbg.py
@@ -58,6 +58,26 @@ class Debugger(object):
self.LLDB = True
return super(Debugger, self).__new__(_LLDBDebugger)
+ def parse_flags(self, raw_flags, permitted_flags):
+ flags = {}
+ for flag in raw_flags:
+ if flag not in permitted_flags:
+ raise self.error('Unrecongnized option: "{}"'.format(flag))
typo: s/Unrecongnized/Unrecognized/
+ flags[flag] = True
+ return flags
+
+ def extract_flags(self, arg, permitted_flags):
+ if not arg:
+ return None, None
+ flags = {}
+ if arg.startswith('/'):
+ match = re.match(r'/(\S*)\s+(.*)$', arg)
+ if not match:
+ return arg, flags
+ raw_flags, arg = match.group(1, 2)
+ flags = self.parse_flags(raw_flags, permitted_flags)
+ return arg, flags
+
def configure(self):
global PADDING, LJ_TISNUM
if not self.check_libluajit():
@@ -70,6 +90,17 @@ class Debugger(object):
self.write('luajit_dbg.py failed to load: '
'no debugging symbols found for libluajit\n')
return False
+
+ # Setup arch.
+ try:
+ self.arch = str(self.eval('LJ_ARCH_NAME')).split('"')[1]
+ except Exception:
+ try:
+ self.arch = self.detect_arch()
+ except Exception:
+ # Setup on demand if necessary.
+ pass
+
return True
def initialize_extension(self, commands):
@@ -99,21 +130,42 @@ class Debugger(object):
'''Return the content of the string by the given pointer.'''
pass
+ @abc.abstractmethod
+ def address(self, obj):
+ '''Return the address in memory of the given object.'''
+ pass
+
@abc.abstractmethod
def lookup_global(self, symbol):
'''Look up the global C symbol by the given name.'''
pass
+ @abc.abstractmethod
+ def member_by_offset(self, typename, offset, prev_name=None):
+ '''Look up the global C symbol by the given name.'''
+ pass
+
@abc.abstractmethod
def eval(self, command):
'''Parse and evaluate the given debugger command.'''
pass
+ @abc.abstractmethod
+ def detect_arch(self):
+ '''Detect the CPU architecture and canonicalize it to the LuaJIT
+ notation.'''
+ pass
+
@abc.abstractmethod
def write(self, msg):
'''Print the message.'''
pass
+ @abc.abstractmethod
+ def error(self, msg):
+ '''Create the error object with message.'''
+ pass
+
@abc.abstractmethod
def check_libluajit(self):
'''Check that libluajit is loaded.
@@ -172,10 +224,50 @@ class _GDBDebugger(Debugger):
# A string is printed with a pointer to it. Just strip it.
return re.sub(r'^0x[a-f0-9]+\s+(?=")', '', str(strptr))
+ def address(self, obj):
+ return obj.address
+
def lookup_global(self, symbol):
variable, _ = gdb.lookup_symbol(symbol)
return variable.value() if variable else None
+ def member_by_offset(self, tp, offset, prev_name=None):
+ if isinstance(tp, str):
+ tp = self._dbgtype(tp)
+ assert offset < tp.sizeof, 'offset is bigger than object size'
+ if tp.code == gdb.TYPE_CODE_TYPEDEF:
+ tp = tp.strip_typedefs()
+ if tp.code == gdb.TYPE_CODE_STRUCT:
+ fields = tp.fields()
+ for n_field in range(len(fields)):
+ islast = n_field == (len(fields) - 1)
+ field = fields[n_field]
+ start_field = field.bitpos / 8
may be //?
+ end_field = fields[n_field + 1].bitpos / 8 if not islast \
+ else tp.sizeof
+ if start_field <= offset and offset < end_field:
+ next_name = self.member_by_offset(
+ field.type,
+ offset - start_field,
+ prev_name=field.name
+ )
+ return '.{field}{suffix}'.format(
+ field=field.name,
+ suffix=next_name if next_name else ''
+ )
+ elif tp.code == gdb.TYPE_CODE_ARRAY:
+ # Get array field type.
+ target = tp.target()
+ tsize = target.sizeof
+ idx = int(offset // tsize)
+ next_name = self.member_by_offset(target, offset - idx * tsize)
+ idxname = idx_name(prev_name)
+ if idxname and idx in idxname:
+ idx = idxname[idx]
+ return '[{}]{}'.format(idx, next_name if next_name else '')
+ else:
+ return None
+
def eval(self, command):
if not command:
return None
@@ -185,9 +277,23 @@ class _GDBDebugger(Debugger):
raise gdb.GdbError('table argument empty')
return ret
+ def detect_arch(self):
+ if hasattr(self, 'arch'):
+ return self.arch
+ target = str(gdb.execute('info target', False, True))
+ if re.match('.*x86-64.*', target, flags=re.DOTALL):
+ return 'x64'
+ elif re.match('.*aarch64.*', target, flags=re.DOTALL):
+ return 'arm64'
+ else:
+ return ''
+
def write(self, msg):
gdb.write(msg)
+ def error(self, errmsg):
+ return gdb.GdbError(errmsg)
+
def check_libluajit(self):
# XXX Fragile: Though connecting the callback looks bad,
# it respects both Python 2 and Python 3 (see #4828).
@@ -322,8 +428,26 @@ class _LLDBDebugger(Debugger):
def lldb__getitem__(lldbval, key):
if type(key) is lldb.value:
key = int(key)
+ if type(key) is bool:
+ key = int(key)
if type(key) is int:
# Allow array access.
+ ltp = lldbval.sbvalue.GetType()
+ # XXX: LLDB in versions 17 - 19 can't use an array
+ # object as the initializer for `lldb.value` since
+ # `GetValue()` for it returns `None` leading to
+ # the invalid result. See
+ # https://github.com/llvm/llvm-project/pull/90144.
+ if (self.version < 17 or self.version > 19) or \
+ ltp.GetTypeClass() != lldb.eTypeClassArray:
+ pass
probably it is better to invert condition and remove section with
"pass"
<snipped>
typo: s/ir_iptr()/ir_kptr() or ir_kkptr()/+ + +def ir_kptr(ir): + irname = IRS[ir['o']] + assert irname == 'KPTR' or irname == 'KKPTR', 'wrong IR for ir_iptr()'
+ return mref('void *', dbg.cast('IRIns *', dbg.address(ir))[LJ_GC64]['ptr'])
+
+
+def ir_kgc(ir):
+ irname = IRS[ir['o']]
+ assert irname == 'KGC', 'wrong IR for ir_kgc()'
+ return gcref(dbg.cast('IRIns *', dbg.address(ir))[LJ_GC64]['gcr'])
+
+
+def ir_knum(ir):
+ irname = IRS[ir['o']]
+ assert irname == 'KNUM', 'wrong IR for ir_knum()'
+ return dbg.address(dbg.cast('IRIns *', dbg.address(ir))[1]['tv'])
+
+
+def ir_kint64(ir):
+ irname = IRS[ir['o']]
+ assert irname == 'KINT64', 'wrong IR for ir_knum()'
typo: s/ir_knum/ir_kint64/
+ return dbg.address(dbg.cast('IRIns *', dbg.address(ir))[1]['tv'])
+
+
# Dumpers.
# GCobj dumpers.
@@ -1467,6 +2281,325 @@ def dump_func(func):
return 'fast function #{}\n'.format(int(ffid))
+# JIT dumpers.
+
+
+def dump_call_func(trace, callop):
+ ctype = ''
+ if callop > 0:
+ ir = trace['ir'][REF_BIAS + callop]
+ if IRTYPES[irt_type(ir['t'])] == 'nil': # nil == CARG(func, ctype)
+ callop = int(ir['op1']) - REF_BIAS
+ cdt_idx_irk = trace['ir'][ir['op2']]
+ assert IRS[cdt_idx_irk['o']] == 'KINT', \
+ 'unexpected IR for ctype storage'
+ ctype_idx = cdt_idx_irk['i']
+ ctype = 'ctype: {}'.format(ctype_idx)
+
+ func_str = ''
+ if callop < 0:
+ irk = trace['ir'][REF_BIAS + callop]
+ assert IRS[irk['o']] == 'KINT64', \
+ 'unexpected IR for FFI function storage'
+ func_addr = int(ir_kint64(irk)['u64'])
+ # TODO: Symbol demangling.
+ func_str = '[{:#x}]'.format(func_addr)
+ else:
+ func_str = '[{:04d}]'.format(callop)
+
+ return func_str, ctype
+
+
+def dump_call_args(trace, ins):
+ if ins < 0:
+ return '{{{}}}'.format(dump_irk(trace, ins))
+ else:
+ ir = trace['ir'][REF_BIAS + ins]
+ irname = IRS[ir['o']]
+ if irname == 'CARG':
+ last_arg = ''
+ args = dump_call_args(trace, int(ir['op1']) - REF_BIAS)
+ op2 = int(ir['op2']) - REF_BIAS
+ if op2 < 0:
+ last_arg = '{{{}}}'.format(dump_irk(trace, op2))
+ else:
+ last_arg = '{{{:04d}}}'.format(op2)
+ return args + ', ' + last_arg
+ else:
+ return '{{{:04d}}}'.format(ins)
+
+
+# Special FP constant.
+CONST_BIAS = 2 ** 52 + 2 ** 51
+
+
+def dump_irk(trace, idx):
+ ref = idx + REF_BIAS
+ assert ref >= trace['nk'] and ref < REF_BIAS, 'bad constant in IR dump'
+ irins = trace['ir'][ref]
+ irname = IRS[irins['o']]
+ slot = ''
+ if irname == 'KSLOT':
+ slot = ' KSLOT: @{}'.format(int(irins['op2']))
+ irins = trace['ir'][irins['op1']]
+ irname = IRS[irins['o']]
+
+ irtype = irins['t']
+ if irname == 'KPRI':
+ typename = typenames(irt_toitype(irtype))
+ # Trivial dump for primitives.
+ irk = tv_dumpers.get(
+ typename, dump_lj_tv_invalid # noqa: F821 # Generated.
+ )(0)
+ elif irname == 'KINT':
+ irk = 'integer {}'.format(dbg.cast('int32_t', irins['i']))
+ elif irname == 'KGC':
+ typename = typenames(irt_toitype(irtype))
+ irk = gco_dumpers.get(typename, dump_lj_gco_invalid)(ir_kgc(irins))
+ elif irname == 'KKPTR':
+ addr = ir_kptr(irins)
+ if addr == dbg.address(G(L())['nilnode']):
+ return '[g->nilnode]' + slot
+ irk = '[{}]'.format(strx64(addr))
+ elif irname == 'KPTR':
+ irk = '[{}]'.format(strx64(ir_kptr(irins)))
+ elif irname == 'KNULL':
+ irk = 'NULL'
+ elif irname == 'KNUM':
+ tv_num = ir_knum(irins)
+ if float(tv_num['n']) == CONST_BIAS:
+ return 'bias'
+ irk = dump_lj_tv_numx(tv_num)
+ elif irname == 'KINT64':
+ irk = 'int64_t {}'.format(dbg.cast(
+ 'int64_t', int(ir_kint64(irins)['u64'])
+ ))
+ else:
+ return 'Unknown IRK: ' + irname
+ return irk + slot
+
+
<snipped>
+
+def dump_snap(trace, snapno, snap):
+ dump = 'SNAP #{:<3d} ['.format(snapno)
+ snap_map = dbg.address(trace['snapmap'][snap['mapofs']])
+ snap_entry_num = 0
+ for slot in range(0, snap['nslots']):
+ dump += ' '
+ snap_entry = int(snap_map[snap_entry_num])
+ if snap_entry_num < snap['nent'] and snap_entry >> TREF_SHIFT == slot:
+ snap_entry_num += 1
+ ref = int((snap_entry & TREF_REFMASK) - REF_BIAS)
+ if ref < 0:
+ if int(snap_entry) == 0x1057fff:
magic number
<snipped>
+# Assume not cross-platform debugging.
+machine = os.uname().machine
+if machine == 'x86_64':
+ RX_GPR = r'r\w\w'
+ RX_FPR = r'xmm\d+'
+elif machine == 'arm64' or machine == 'aarch64':
+ RX_GPR = r'x\d+'
+ RX_FPR = r'd\d+'
+else:
+ raise Exception('Unknown archeticture in testing')
typo: s/archeticture/architecture/
<snipped>
both branches contains the same comment, is it a typo or not?+ +class TestLJIRConst(TestCaseBase): + location = 'trace_stop' + + # No narrowing of 42. + if IS_DUALNUM: + # KNUM occupies 2 slots. + _knum_irnum = '6' + _kgc_irnum = '8' if IS_GC64 else '7' + _kptr_irnum = '10' if IS_GC64 else '8' + else: + # KNUM occupies 2 slots. + _knum_irnum = '8' + _kgc_irnum = '10' if IS_GC64 else '9' + _kptr_irnum = '12' if IS_GC64 else '10'
<snipped>