From: Sergey Kaplun via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Sergey Bronnikov <sergeyb@tarantool.org>,
Evgeniy Temirgaleev <e.temirgaleev@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH luajit 4/4] dbg: introduce lj-bc, lj-func and lj-proto dumpers
Date: Thu, 4 Jun 2026 12:30:52 +0300 [thread overview]
Message-ID: <20260604093052.2221827-5-skaplun@tarantool.org> (raw)
In-Reply-To: <20260604093052.2221827-1-skaplun@tarantool.org>
This patch adds dumpers for a single bytecode instruction (`lj-bc`), as
well as for all bytecodes inside one function (`lj-func`) or prototype
(`lj-proto`). Its dump is quite similar to the -bl flag but also
reports types of register operands (`jmp`, `dst`, `str`, etc.).
For LLDB, the result from the `lookup_global()` method is wrapped to the
`lldb.value` object to make it more convenient.
Part of tarantool/tarantool#4808
---
src/luajit_dbg.py | 416 +++++++++++++++++-
.../debug-extension-tests.py | 79 ++++
2 files changed, 490 insertions(+), 5 deletions(-)
diff --git a/src/luajit_dbg.py b/src/luajit_dbg.py
index f5868e61..60308179 100644
--- a/src/luajit_dbg.py
+++ b/src/luajit_dbg.py
@@ -475,7 +475,7 @@ class _LLDBDebugger(Debugger):
return strptr.sbvalue.summary
def lookup_global(self, symbol):
- return self.target.FindFirstGlobalVariable(symbol)
+ return lldb.value(self.target.FindFirstGlobalVariable(symbol))
def eval(self, command):
if not command:
@@ -648,6 +648,202 @@ def itypemap(o):
return LJ_T['NUMX'] if tvisnumber(o) else itype(o)
+# Bytecode.
+
+def bc_op(ins):
+ return int(ins) & 0xff
+
+
+def bc_a(ins):
+ return (int(ins) >> 8) & 0xff
+
+
+def bc_b(ins):
+ return int(ins) >> 24
+
+
+def bc_c(ins):
+ return (int(ins) >> 16) & 0xff
+
+
+def bc_d(ins):
+ return int(ins) >> 16
+
+
+BCMODE = [
+ 'none', 'dst', 'base', 'var', 'rbase', 'uv',
+ 'lit', 'lits', 'pri', 'num', 'str', 'tab', 'func', 'jump', 'cdata',
+]
+
+
+lj_bc_mode_ = None
+
+
+def lj_bc_mode():
+ global lj_bc_mode_
+ if lj_bc_mode_:
+ return lj_bc_mode_
+ lj_bc_mode_ = dbg.lookup_global('lj_bc_mode')
+ return lj_bc_mode_
+
+
+def bcmode_a(op):
+ return int(lj_bc_mode()[op] & 7)
+
+
+def bcmode_b(op):
+ return int((lj_bc_mode()[op] >> 3) & 15)
+
+
+def bcmode_cd(op):
+ return int((lj_bc_mode()[op] >> 7) & 15)
+
+
+# Unfortunately, there is no place in the VM except the generated
+# Lua table, where the bytecode names are stored. So duplicate
+# them here.
+BYTECODES = [
+ # Comparison ops. ORDER OPR.
+ 'ISLT',
+ 'ISGE',
+ 'ISLE',
+ 'ISGT',
+
+ 'ISEQV',
+ 'ISNEV',
+ 'ISEQS',
+ 'ISNES',
+ 'ISEQN',
+ 'ISNEN',
+ 'ISEQP',
+ 'ISNEP',
+
+ # Unary test and copy ops.
+ 'ISTC',
+ 'ISFC',
+ 'IST',
+ 'ISF',
+ 'ISTYPE',
+ 'ISNUM',
+ 'MOV',
+ 'NOT',
+ 'UNM',
+ 'LEN',
+ 'ADDVN',
+ 'SUBVN',
+ 'MULVN',
+ 'DIVVN',
+ 'MODVN',
+
+ # Binary ops. ORDER OPR.
+ 'ADDNV',
+ 'SUBNV',
+ 'MULNV',
+ 'DIVNV',
+ 'MODNV',
+
+ 'ADDVV',
+ 'SUBVV',
+ 'MULVV',
+ 'DIVVV',
+ 'MODVV',
+
+ 'POW',
+ 'CAT',
+
+ # Constant ops.
+ 'KSTR',
+ 'KCDATA',
+ 'KSHORT',
+ 'KNUM',
+ 'KPRI',
+ 'KNIL',
+
+ # Upvalue and function ops.
+ 'UGET',
+ 'USETV',
+ 'USETS',
+ 'USETN',
+ 'USETP',
+ 'UCLO',
+ 'FNEW',
+
+ # Table ops.
+ 'TNEW',
+ 'TDUP',
+ 'GGET',
+ 'GSET',
+ 'TGETV',
+ 'TGETS',
+ 'TGETB',
+ 'TGETR',
+ 'TSETV',
+ 'TSETS',
+ 'TSETB',
+ 'TSETM',
+ 'TSETR',
+
+ # Calls and vararg handling. T = tail call.
+ 'CALLM',
+ 'CALL',
+ 'CALLMT',
+ 'CALLT',
+ 'ITERC',
+ 'ITERN',
+ 'VARG',
+ 'ISNEXT',
+
+ # Returns.
+ 'RETM',
+ 'RET',
+ 'RET0',
+ 'RET1',
+
+ # Loops and branches. I/J = interp/JIT.
+ # I/C/L = init/call/loop.
+ 'FORI',
+ 'JFORI',
+
+ 'FORL',
+ 'IFORL',
+ 'JFORL',
+
+ 'ITERL',
+ 'IITERL',
+ 'JITERL',
+
+ 'LOOP',
+ 'ILOOP',
+ 'JLOOP',
+
+ 'JMP',
+
+ # Function headers. I/J = interp/JIT.
+ # F/V/C = fixarg/vararg/C func.
+ 'FUNCF',
+ 'IFUNCF',
+ 'JFUNCF',
+ 'FUNCV',
+ 'IFUNCV',
+ 'JFUNCV',
+ 'FUNCC',
+ 'FUNCCW',
+]
+
+
+def proto_bc(proto):
+ return dbg.cast('BCIns *',
+ dbg.cast('char *', proto) + dbg.sizeof('GCproto'))
+
+
+def proto_kgc(pt, idx):
+ return gcref(mref('GCRef *', pt['k'])[idx])
+
+
+def proto_knumtv(pt, idx):
+ return mref('TValue *', pt['k'])[idx]
+
+
# Frames.
@@ -676,10 +872,6 @@ def frametypes(ft):
}.get(ft, '?')
-def bc_a(ins):
- return (ins >> 8) & 0xff
-
-
def frame_ftsz(framelink):
return dbg.cast('ptrdiff_t', framelink['ftsz'] if LJ_FR2
else framelink['fr']['tp']['ftsz'])
@@ -1129,6 +1321,137 @@ def dump_gc(g):
return '\n'.join(map(lambda s: '\t' + s, stats))
+def proto_loc(proto):
+ return '{chunk}:{firstline}'.format(
+ chunk=strdata(dbg.cast('GCstr *', gcval(proto['chunkname']))),
+ firstline=proto['firstline'],
+ )
+
+
+def funck(pt, idx):
+ if idx >= 0:
+ assert idx < pt['sizekn'], 'invalid idx for numeric constant in proto'
+ tv = proto_knumtv(pt, idx)
+ return dump_tvalue(tv)
+ else:
+ assert ~idx < pt['sizekgc'], 'invalid idx for GC constant in proto'
+ gcobj = proto_kgc(pt, idx)
+ if typenames(i2notu32(gcobj['gch']['gct'])) == 'LJ_TPROTO':
+ return proto_loc(dbg.cast('GCproto *', gcobj))
+ return dump_gcobj(gcobj)
+
+
+def funcuvname(pt, idx):
+ assert idx < pt['sizeuv'], 'invalid idx for upvalue in proto'
+ uvinfo = mref('uint8_t *', pt['uvinfo'])
+ if not uvinfo:
+ return ''
+
+ # if (idx) while (*uvinfo++ || --idx);
+ while idx > 0:
+ while uvinfo[0]:
+ uvinfo += 1
+ uvinfo += 1
+ idx -= 1
+
+ return 'upvalue {name} @ {addr}'.format(
+ name=dbg.cstr(dbg.cast('char *', uvinfo)),
+ addr=strx64(uvinfo)
+ )
+
+
+def dump_reg(rtype, value, jmp_format=None, jmp_ctx=None):
+ if rtype == 'jump':
+ # Destination of jump instruction encoded as offset from
+ # BCBIAS_J.
+ delta = value - 0x7fff
+ if jmp_format:
+ value = jmp_format(jmp_ctx, delta)
+ else:
+ prefix = '+' if delta >= 0 else ''
+ value = prefix + str(delta)
+ else:
+ value = '{:3d}'.format(value)
+
+ return '{rtype:6} {value}'.format(
+ rtype=rtype + ':',
+ value=value,
+ )
+
+
+def dump_kc(rtype, value, proto):
+ kc = ''
+ if proto:
+ if rtype == 'str' or rtype == 'func':
+ kc = funck(proto, ~value)
+ elif rtype == 'num':
+ kc = funck(proto, value)
+ elif rtype == 'uv':
+ kc = funcuvname(proto, value)
+
+ if kc != '':
+ kc = ' ; ' + kc
+ return kc
+
+
+def dump_bc(ins, jmp_format=None, jmp_ctx=None, proto=None):
+ op = bc_op(ins)
+ if op >= len(BYTECODES):
+ return 'INVALID'
+
+ bcname = BYTECODES[op]
+ bcma = bcmode_a(op)
+ bcmb = bcmode_b(op)
+ bcmcd = bcmode_cd(op)
+
+ kca = dump_kc(BCMODE[bcma], bc_a(ins), proto) if bcma else ''
+ kcc = dump_kc(
+ BCMODE[bcmcd], bc_c(ins) if bcmb else bc_d(ins), proto
+ ) if bcmcd else ''
+
+ return '{name:6} {ra}{rb}{rcd}{kc}'.format(
+ name=bcname,
+ ra=dump_reg(BCMODE[bcma], bc_a(ins)) + ' ' if bcma else '',
+ rb=dump_reg(BCMODE[bcmb], bc_b(ins)) + ' ' if bcmb else '',
+ rcd=dump_reg(
+ BCMODE[bcmcd], bc_c(ins) if bcmb else bc_d(ins),
+ jmp_format=jmp_format, jmp_ctx=jmp_ctx
+ ) if bcmcd else '',
+ kc=kca + kcc
+ )
+
+
+def dump_proto(proto):
+ startbc = proto_bc(proto)
+ func_loc = proto_loc(proto)
+ # Location has the following format: '{chunk}:{firstline}'.
+ dump = '{func_loc}-{lastline}\n'.format(
+ func_loc=func_loc,
+ lastline=proto['firstline'] + proto['numline'],
+ )
+
+ def jmp_format(npc_from, delta):
+ return '=> ' + str(npc_from + delta).zfill(4)
+
+ for bcnum in range(0, int(proto['sizebc'])):
+ dump += (str(bcnum).zfill(4) + ' ' + dump_bc(
+ startbc[bcnum], jmp_format=jmp_format, jmp_ctx=bcnum, proto=proto,
+ ) + '\n')
+ return dump
+
+
+def dump_func(func):
+ ffid = func['ffid']
+
+ if ffid == 0:
+ pt = funcproto(func)
+ return dump_proto(pt)
+ elif ffid == 1:
+ return 'C function @ {}\n'.format(strx64(func['f']))
+ else:
+ return 'fast function #{}\n'.format(int(ffid))
+
+
# Extension commands. ############################################
@@ -1152,6 +1475,59 @@ pointers, respectively.
)
+class LJDumpBC(dbg.LJBase):
+ '''
+lj-bc <BCIns *>
+
+The command receives a pointer to a bytecode instruction and dumps
+the type of the instruction and the values of RA, RB, and RC (or RD)
+virtual registers and their modes (operand types):
+
+<BCNAME> <modeA>: <RA>
+<BCNAME> <modeA>: <RA> <modeB>: <RB> <modeC>: <RC> ; <const> ; <uvname>
+<BCNAME> <modeA>: <RA> <modeD>: <RD>
+
+<BCNAME>: Name of the bytecode instruction
+<R[ABCD]>: The value of the R[ABCD] virtual register operand
+<mode[ABCD]>: The operand type for the R[ABCD] register
+<const>: The value of the constant associated with the operand, if any
+<uvname>: The name of the upvalue, if any
+
+For the list of bytecode names and modes (operand types), see:
+https://github.com/tarantool/tarantool/wiki/LuaJIT-Bytecodes.
+ '''
+
+ def execute(self, arg):
+ dbg.write('{}\n'.format(
+ dump_bc(dbg.cast('BCIns *', dbg.eval(arg))[0])
+ ))
+
+
+class LJDumpFunc(dbg.LJBase):
+ '''
+lj-func <GCfunc *>
+
+The command receives a <gcr> of the corresponding GCfunc object and dumps
+the chunk name, where the corresponding function is defined, the
+corresponding range of lines, and a list of bytecodes related to this
+function:
+
+<file>:<start>-<end>
+<bcnum> <BC>
+...
+<bcnum> <BC>
+
+<file>: The location of the corresponding function definition
+<start>: The number of the line where the function starts
+<end>: The number of the line where the function ends
+<bcnum>: The sequential number of the bytecode instruction
+<BC>: The encoded bytecode instruction. Type "help lj-bc" for details.
+ '''
+
+ def execute(self, arg):
+ dbg.write('{}'.format(dump_func(dbg.cast('GCfuncC *', dbg.eval(arg)))))
+
+
class LJGC(dbg.LJBase):
'''
lj-gc
@@ -1208,6 +1584,33 @@ error message occurs.
dbg.write('{}\n'.format(dump_gcobj(gcobj)))
+class LJDumpProto(dbg.LJBase):
+ '''
+lj-proto <GCproto *>
+
+The command receives a <gcr> of the corresponding GCproto object and dumps
+the chunk name, where the corresponding function is defined, the
+corresponding range of lines, and a list of bytecodes related to this
+function:
+
+<file>:<start>-<end>
+<bcnum> <BC>
+...
+<bcnum> <BC>
+
+<file>: The location of the corresponding function definition
+<start>: The number of the line where the function starts
+<end>: The number of the line where the function ends
+<bcnum>: The sequential number of the bytecode instruction
+<BC>: The encoded bytecode instruction. Type "help lj-bc" for details.
+ '''
+
+ def execute(self, arg):
+ dbg.write('{}'.format(
+ dump_proto(dbg.cast('GCproto *', dbg.eval(arg)))
+ ))
+
+
class LJDumpStack(dbg.LJBase):
'''
lj-stack [<lua_State *>]
@@ -1368,8 +1771,11 @@ error message occurs.
def load(event=None):
dbg.initialize_extension({
'lj-arch': LJDumpArch,
+ 'lj-bc': LJDumpBC,
+ 'lj-func': LJDumpFunc,
'lj-gc': LJGC,
'lj-gco': LJDumpGCobj,
+ 'lj-proto': LJDumpProto,
'lj-stack': LJDumpStack,
'lj-state': LJState,
'lj-str': LJDumpString,
diff --git a/test/tarantool-debugger-tests/debug-extension-tests.py b/test/tarantool-debugger-tests/debug-extension-tests.py
index 7e2b5ac4..b677942c 100644
--- a/test/tarantool-debugger-tests/debug-extension-tests.py
+++ b/test/tarantool-debugger-tests/debug-extension-tests.py
@@ -45,6 +45,7 @@ else:
RX_ADDR = r'0x[a-f0-9]+'
RX_HASH = RX_ADDR # The same pattern for hexademic values.
+RX_BCN = r'00\d\d'
RX_FRAME = r'\[(S|\s)(B|\s)(T|\s)(M|\s)\]'
@@ -149,14 +150,25 @@ def gcval(arg):
return 'gcval(' + arg + ')'
+def mref(arg, tp):
+ if sys.platform == 'darwin':
+ # Assume GC64 build only.
+ return '((' + tp + '*)(' + arg + ').ptr64)'
+ else:
+ return 'mref(' + arg + ', ' + tp + ')'
+
+
class TestLoad(TestCaseBase):
extension_cmds = ''
location = 'lj_cf_print'
lua_script = 'print(1)'
pattern = (
r'lj-arch command initialized\n'
+ r'lj-bc command initialized\n'
+ r'lj-func command initialized\n'
r'lj-gc command initialized\n'
r'lj-gco command initialized\n'
+ r'lj-proto command initialized\n'
r'lj-stack command initialized\n'
r'lj-state command initialized\n'
r'lj-str command initialized\n'
@@ -359,6 +371,73 @@ class TestLJGCo(TestCaseBase):
pattern = GCO_RX
+PROTO_FUNC_SCRIPT = (
+ 'local uvname = false\n'
+ 'local function testf(...)\n'
+ ' local a = ...\n'
+ ' local s1 = a + 42\n'
+ ' uvname = "conststr"\n'
+ ' if a >= 42 then\n'
+ ' return a - s1\n'
+ ' end\n'
+ 'end\n'
+ 'print(testf)\n'
+)
+
+
+PROTO_FUNC_BC_RX = (
+ RX_BCN + r' FUNCV rbase: \d\s*\n' +
+ RX_BCN + r' VARG base: \d lit: \d lit: \d\s*\n' +
+ RX_BCN + r' ADDVN dst: \d var: \d num: +\d' +
+ r' ; ' + RX_INT + r' 42\s*\n' +
+ RX_BCN + r' USETS uv: \d str: \d' +
+ r' ; upvalue "uvname" @ ' + RX_ADDR +
+ r' ; string "conststr" @ ' + RX_ADDR + r'\s*\n' +
+ RX_BCN + r' KSHORT dst: \d lits: 42\s*\n' +
+ RX_BCN + r' ISGT var: \d var: \d\s*\n' +
+ RX_BCN + r' JMP rbase: \d jump: => ' + RX_BCN + r'\s*\n' +
+ RX_BCN + r' SUBVV dst: \d var: \d var: \d\s*\n' +
+ RX_BCN + r' RET1 rbase: \d lit: \d\s*\n' +
+ RX_BCN + r' RET0 rbase: \d lit: \d\s*\n'
+)
+
+
+class TestLJFunc(TestCaseBase):
+ location = 'lj_cf_print'
+ extension_cmds = 'lj-func ' + gcval('L->base')
+ lua_script = PROTO_FUNC_SCRIPT
+ pattern = PROTO_FUNC_BC_RX
+
+
+class TestLJProto(TestCaseBase):
+ location = 'lj_cf_print'
+ extension_cmds = (
+ 'lj-proto '
+ ' ((char *) ' + mref(
+ '((GCfuncL *)' + gcval('L->base') + ')->pc', 'char'
+ ) + ') - sizeof(GCproto)\n'
+ )
+ lua_script = PROTO_FUNC_SCRIPT
+ pattern = PROTO_FUNC_BC_RX
+
+
+class TestLJBC(TestCaseBase):
+ location = 'lj_cf_print'
+ extension_cmds = (
+ 'lj-bc ' + mref(
+ '((GCfuncL *)' + gcval('L->base') + ')->pc', 'BCIns'
+ ) + '\n'
+ 'lj-bc ' + mref(
+ '((GCfuncL *)' + gcval('L->base') + ')->pc', 'BCIns'
+ ) + ' + 6\n'
+ )
+ lua_script = PROTO_FUNC_SCRIPT
+ pattern = (
+ r'FUNCV rbase: \d\s*\n'
+ r'JMP rbase: \d jump: \+\d\n'
+ )
+
+
for test_cls in TestCaseBase.__subclasses__():
test_cls.test = lambda self: self.check()
--
2.54.0
next prev parent reply other threads:[~2026-06-04 9:33 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-04 9:30 [Tarantool-patches] [PATCH luajit 0/4] Introduce dumpers for bytecodes in debuggers Sergey Kaplun via Tarantool-patches
2026-06-04 9:30 ` [Tarantool-patches] [PATCH luajit 1/4] dbg: fix lj-stack command for LLDB Sergey Kaplun via Tarantool-patches
2026-06-05 14:55 ` Sergey Bronnikov via Tarantool-patches
2026-06-04 9:30 ` [Tarantool-patches] [PATCH luajit 2/4] dbg: fix DUALNUM detection " Sergey Kaplun via Tarantool-patches
2026-06-05 14:57 ` Sergey Bronnikov via Tarantool-patches
2026-06-05 16:01 ` Sergey Kaplun via Tarantool-patches
2026-06-04 9:30 ` [Tarantool-patches] [PATCH luajit 3/4] dbg: introduce lj-gco command Sergey Kaplun via Tarantool-patches
2026-06-05 15:02 ` Sergey Bronnikov via Tarantool-patches
2026-06-04 9:30 ` Sergey Kaplun via Tarantool-patches [this message]
2026-06-05 15:07 ` [Tarantool-patches] [PATCH luajit 4/4] dbg: introduce lj-bc, lj-func and lj-proto dumpers Sergey Bronnikov via Tarantool-patches
2026-06-05 16:10 ` Sergey Kaplun via Tarantool-patches
2026-06-05 14:55 ` [Tarantool-patches] [PATCH luajit 0/4] Introduce dumpers for bytecodes in debuggers Sergey Bronnikov via Tarantool-patches
2026-06-05 16:03 ` [Tarantool-patches] [PATCH luajit 3/5] dbg: update help for the lj-arch command Sergey Kaplun via Tarantool-patches
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260604093052.2221827-5-skaplun@tarantool.org \
--to=tarantool-patches@dev.tarantool.org \
--cc=e.temirgaleev@tarantool.org \
--cc=sergeyb@tarantool.org \
--cc=skaplun@tarantool.org \
--subject='Re: [Tarantool-patches] [PATCH luajit 4/4] dbg: introduce lj-bc, lj-func and lj-proto dumpers' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox