Tarantool development patches archive
 help / color / mirror / Atom feed
From: Sergey Kaplun via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Sergey Bronnikov <sergeyb@tarantool.org>,
	Evgeniy Temirgaleev <e.temirgaleev@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH luajit 4/4] dbg: introduce lj-bc, lj-func and lj-proto dumpers
Date: Thu,  4 Jun 2026 12:30:52 +0300	[thread overview]
Message-ID: <20260604093052.2221827-5-skaplun@tarantool.org> (raw)
In-Reply-To: <20260604093052.2221827-1-skaplun@tarantool.org>

This patch adds dumpers for a single bytecode instruction (`lj-bc`), as
well as for all bytecodes inside one function (`lj-func`) or prototype
(`lj-proto`). Its dump is quite similar to the -bl flag but also
reports types of register operands (`jmp`, `dst`, `str`, etc.).

For LLDB, the result from the `lookup_global()` method is wrapped to the
`lldb.value` object to make it more convenient.

Part of tarantool/tarantool#4808
---
 src/luajit_dbg.py                             | 416 +++++++++++++++++-
 .../debug-extension-tests.py                  |  79 ++++
 2 files changed, 490 insertions(+), 5 deletions(-)

diff --git a/src/luajit_dbg.py b/src/luajit_dbg.py
index f5868e61..60308179 100644
--- a/src/luajit_dbg.py
+++ b/src/luajit_dbg.py
@@ -475,7 +475,7 @@ class _LLDBDebugger(Debugger):
         return strptr.sbvalue.summary
 
     def lookup_global(self, symbol):
-        return self.target.FindFirstGlobalVariable(symbol)
+        return lldb.value(self.target.FindFirstGlobalVariable(symbol))
 
     def eval(self, command):
         if not command:
@@ -648,6 +648,202 @@ def itypemap(o):
         return LJ_T['NUMX'] if tvisnumber(o) else itype(o)
 
 
+# Bytecode.
+
+def bc_op(ins):
+    return int(ins) & 0xff
+
+
+def bc_a(ins):
+    return (int(ins) >> 8) & 0xff
+
+
+def bc_b(ins):
+    return int(ins) >> 24
+
+
+def bc_c(ins):
+    return (int(ins) >> 16) & 0xff
+
+
+def bc_d(ins):
+    return int(ins) >> 16
+
+
+BCMODE = [
+    'none', 'dst', 'base', 'var', 'rbase', 'uv',
+    'lit', 'lits', 'pri', 'num', 'str', 'tab', 'func', 'jump', 'cdata',
+]
+
+
+lj_bc_mode_ = None
+
+
+def lj_bc_mode():
+    global lj_bc_mode_
+    if lj_bc_mode_:
+        return lj_bc_mode_
+    lj_bc_mode_ = dbg.lookup_global('lj_bc_mode')
+    return lj_bc_mode_
+
+
+def bcmode_a(op):
+    return int(lj_bc_mode()[op] & 7)
+
+
+def bcmode_b(op):
+    return int((lj_bc_mode()[op] >> 3) & 15)
+
+
+def bcmode_cd(op):
+    return int((lj_bc_mode()[op] >> 7) & 15)
+
+
+# Unfortunately, there is no place in the VM except the generated
+# Lua table, where the bytecode names are stored. So duplicate
+# them here.
+BYTECODES = [
+    # Comparison ops. ORDER OPR.
+    'ISLT',
+    'ISGE',
+    'ISLE',
+    'ISGT',
+
+    'ISEQV',
+    'ISNEV',
+    'ISEQS',
+    'ISNES',
+    'ISEQN',
+    'ISNEN',
+    'ISEQP',
+    'ISNEP',
+
+    # Unary test and copy ops.
+    'ISTC',
+    'ISFC',
+    'IST',
+    'ISF',
+    'ISTYPE',
+    'ISNUM',
+    'MOV',
+    'NOT',
+    'UNM',
+    'LEN',
+    'ADDVN',
+    'SUBVN',
+    'MULVN',
+    'DIVVN',
+    'MODVN',
+
+    # Binary ops. ORDER OPR.
+    'ADDNV',
+    'SUBNV',
+    'MULNV',
+    'DIVNV',
+    'MODNV',
+
+    'ADDVV',
+    'SUBVV',
+    'MULVV',
+    'DIVVV',
+    'MODVV',
+
+    'POW',
+    'CAT',
+
+    # Constant ops.
+    'KSTR',
+    'KCDATA',
+    'KSHORT',
+    'KNUM',
+    'KPRI',
+    'KNIL',
+
+    # Upvalue and function ops.
+    'UGET',
+    'USETV',
+    'USETS',
+    'USETN',
+    'USETP',
+    'UCLO',
+    'FNEW',
+
+    # Table ops.
+    'TNEW',
+    'TDUP',
+    'GGET',
+    'GSET',
+    'TGETV',
+    'TGETS',
+    'TGETB',
+    'TGETR',
+    'TSETV',
+    'TSETS',
+    'TSETB',
+    'TSETM',
+    'TSETR',
+
+    # Calls and vararg handling. T = tail call.
+    'CALLM',
+    'CALL',
+    'CALLMT',
+    'CALLT',
+    'ITERC',
+    'ITERN',
+    'VARG',
+    'ISNEXT',
+
+    # Returns.
+    'RETM',
+    'RET',
+    'RET0',
+    'RET1',
+
+    # Loops and branches. I/J = interp/JIT.
+    # I/C/L = init/call/loop.
+    'FORI',
+    'JFORI',
+
+    'FORL',
+    'IFORL',
+    'JFORL',
+
+    'ITERL',
+    'IITERL',
+    'JITERL',
+
+    'LOOP',
+    'ILOOP',
+    'JLOOP',
+
+    'JMP',
+
+    # Function headers. I/J = interp/JIT.
+    # F/V/C = fixarg/vararg/C func.
+    'FUNCF',
+    'IFUNCF',
+    'JFUNCF',
+    'FUNCV',
+    'IFUNCV',
+    'JFUNCV',
+    'FUNCC',
+    'FUNCCW',
+]
+
+
+def proto_bc(proto):
+    return dbg.cast('BCIns *',
+                    dbg.cast('char *', proto) + dbg.sizeof('GCproto'))
+
+
+def proto_kgc(pt, idx):
+    return gcref(mref('GCRef *', pt['k'])[idx])
+
+
+def proto_knumtv(pt, idx):
+    return mref('TValue *', pt['k'])[idx]
+
+
 # Frames.
 
 
@@ -676,10 +872,6 @@ def frametypes(ft):
     }.get(ft, '?')
 
 
-def bc_a(ins):
-    return (ins >> 8) & 0xff
-
-
 def frame_ftsz(framelink):
     return dbg.cast('ptrdiff_t', framelink['ftsz'] if LJ_FR2
                     else framelink['fr']['tp']['ftsz'])
@@ -1129,6 +1321,137 @@ def dump_gc(g):
     return '\n'.join(map(lambda s: '\t' + s, stats))
 
 
+def proto_loc(proto):
+    return '{chunk}:{firstline}'.format(
+        chunk=strdata(dbg.cast('GCstr *', gcval(proto['chunkname']))),
+        firstline=proto['firstline'],
+    )
+
+
+def funck(pt, idx):
+    if idx >= 0:
+        assert idx < pt['sizekn'], 'invalid idx for numeric constant in proto'
+        tv = proto_knumtv(pt, idx)
+        return dump_tvalue(tv)
+    else:
+        assert ~idx < pt['sizekgc'], 'invalid idx for GC constant in proto'
+        gcobj = proto_kgc(pt, idx)
+        if typenames(i2notu32(gcobj['gch']['gct'])) == 'LJ_TPROTO':
+            return proto_loc(dbg.cast('GCproto *', gcobj))
+        return dump_gcobj(gcobj)
+
+
+def funcuvname(pt, idx):
+    assert idx < pt['sizeuv'], 'invalid idx for upvalue in proto'
+    uvinfo = mref('uint8_t *', pt['uvinfo'])
+    if not uvinfo:
+        return ''
+
+    # if (idx) while (*uvinfo++ || --idx);
+    while idx > 0:
+        while uvinfo[0]:
+            uvinfo += 1
+        uvinfo += 1
+        idx -= 1
+
+    return 'upvalue {name} @ {addr}'.format(
+        name=dbg.cstr(dbg.cast('char *', uvinfo)),
+        addr=strx64(uvinfo)
+    )
+
+
+def dump_reg(rtype, value, jmp_format=None, jmp_ctx=None):
+    if rtype == 'jump':
+        # Destination of jump instruction encoded as offset from
+        # BCBIAS_J.
+        delta = value - 0x7fff
+        if jmp_format:
+            value = jmp_format(jmp_ctx, delta)
+        else:
+            prefix = '+' if delta >= 0 else ''
+            value = prefix + str(delta)
+    else:
+        value = '{:3d}'.format(value)
+
+    return '{rtype:6} {value}'.format(
+        rtype=rtype + ':',
+        value=value,
+    )
+
+
+def dump_kc(rtype, value, proto):
+    kc = ''
+    if proto:
+        if rtype == 'str' or rtype == 'func':
+            kc = funck(proto, ~value)
+        elif rtype == 'num':
+            kc = funck(proto, value)
+        elif rtype == 'uv':
+            kc = funcuvname(proto, value)
+
+        if kc != '':
+            kc = ' ; ' + kc
+    return kc
+
+
+def dump_bc(ins, jmp_format=None, jmp_ctx=None, proto=None):
+    op = bc_op(ins)
+    if op >= len(BYTECODES):
+        return 'INVALID'
+
+    bcname = BYTECODES[op]
+    bcma = bcmode_a(op)
+    bcmb = bcmode_b(op)
+    bcmcd = bcmode_cd(op)
+
+    kca = dump_kc(BCMODE[bcma], bc_a(ins), proto) if bcma else ''
+    kcc = dump_kc(
+        BCMODE[bcmcd], bc_c(ins) if bcmb else bc_d(ins), proto
+    ) if bcmcd else ''
+
+    return '{name:6} {ra}{rb}{rcd}{kc}'.format(
+        name=bcname,
+        ra=dump_reg(BCMODE[bcma], bc_a(ins)) + ' ' if bcma else '',
+        rb=dump_reg(BCMODE[bcmb], bc_b(ins)) + ' ' if bcmb else '',
+        rcd=dump_reg(
+            BCMODE[bcmcd], bc_c(ins) if bcmb else bc_d(ins),
+            jmp_format=jmp_format, jmp_ctx=jmp_ctx
+        ) if bcmcd else '',
+        kc=kca + kcc
+    )
+
+
+def dump_proto(proto):
+    startbc = proto_bc(proto)
+    func_loc = proto_loc(proto)
+    # Location has the following format: '{chunk}:{firstline}'.
+    dump = '{func_loc}-{lastline}\n'.format(
+        func_loc=func_loc,
+        lastline=proto['firstline'] + proto['numline'],
+    )
+
+    def jmp_format(npc_from, delta):
+        return '=> ' + str(npc_from + delta).zfill(4)
+
+    for bcnum in range(0, int(proto['sizebc'])):
+        dump += (str(bcnum).zfill(4) + ' ' + dump_bc(
+            startbc[bcnum], jmp_format=jmp_format, jmp_ctx=bcnum, proto=proto,
+        ) + '\n')
+    return dump
+
+
+def dump_func(func):
+    ffid = func['ffid']
+
+    if ffid == 0:
+        pt = funcproto(func)
+        return dump_proto(pt)
+    elif ffid == 1:
+        return 'C function @ {}\n'.format(strx64(func['f']))
+    else:
+        return 'fast function #{}\n'.format(int(ffid))
+
+
 # Extension commands. ############################################
 
 
@@ -1152,6 +1475,59 @@ pointers, respectively.
         )
 
 
+class LJDumpBC(dbg.LJBase):
+    '''
+lj-bc <BCIns *>
+
+The command receives a pointer to a bytecode instruction and dumps
+the type of the instruction and the values of RA, RB, and RC (or RD)
+virtual registers and their modes (operand types):
+
+<BCNAME>  <modeA>: <RA>
+<BCNAME>  <modeA>: <RA>  <modeB>: <RB>  <modeC>: <RC> ; <const> ; <uvname>
+<BCNAME>  <modeA>: <RA>  <modeD>: <RD>
+
+<BCNAME>: Name of the bytecode instruction
+<R[ABCD]>: The value of the R[ABCD] virtual register operand
+<mode[ABCD]>: The operand type for the R[ABCD] register
+<const>: The value of the constant associated with the operand, if any
+<uvname>: The name of the upvalue, if any
+
+For the list of bytecode names and modes (operand types), see:
+https://github.com/tarantool/tarantool/wiki/LuaJIT-Bytecodes.
+    '''
+
+    def execute(self, arg):
+        dbg.write('{}\n'.format(
+            dump_bc(dbg.cast('BCIns *', dbg.eval(arg))[0])
+        ))
+
+
+class LJDumpFunc(dbg.LJBase):
+    '''
+lj-func <GCfunc *>
+
+The command receives a <gcr> of the corresponding GCfunc object and dumps
+the chunk name, where the corresponding function is defined, the
+corresponding range of lines, and a list of bytecodes related to this
+function:
+
+<file>:<start>-<end>
+<bcnum>  <BC>
+...
+<bcnum>  <BC>
+
+<file>: The location of the corresponding function definition
+<start>: The number of the line where the function starts
+<end>: The number of the line where the function ends
+<bcnum>: The sequential number of the bytecode instruction
+<BC>: The encoded bytecode instruction. Type "help lj-bc" for details.
+    '''
+
+    def execute(self, arg):
+        dbg.write('{}'.format(dump_func(dbg.cast('GCfuncC *', dbg.eval(arg)))))
+
+
 class LJGC(dbg.LJBase):
     '''
 lj-gc
@@ -1208,6 +1584,33 @@ error message occurs.
         dbg.write('{}\n'.format(dump_gcobj(gcobj)))
 
 
+class LJDumpProto(dbg.LJBase):
+    '''
+lj-proto <GCproto *>
+
+The command receives a <gcr> of the corresponding GCproto object and dumps
+the chunk name, where the corresponding function is defined, the
+corresponding range of lines, and a list of bytecodes related to this
+function:
+
+<file>:<start>-<end>
+<bcnum>  <BC>
+...
+<bcnum>  <BC>
+
+<file>: The location of the corresponding function definition
+<start>: The number of the line where the function starts
+<end>: The number of the line where the function ends
+<bcnum>: The sequential number of the bytecode instruction
+<BC>: The encoded bytecode instruction. Type "help lj-bc" for details.
+    '''
+
+    def execute(self, arg):
+        dbg.write('{}'.format(
+            dump_proto(dbg.cast('GCproto *', dbg.eval(arg)))
+        ))
+
+
 class LJDumpStack(dbg.LJBase):
     '''
 lj-stack [<lua_State *>]
@@ -1368,8 +1771,11 @@ error message occurs.
 def load(event=None):
     dbg.initialize_extension({
         'lj-arch':  LJDumpArch,
+        'lj-bc':    LJDumpBC,
+        'lj-func':  LJDumpFunc,
         'lj-gc':    LJGC,
         'lj-gco':   LJDumpGCobj,
+        'lj-proto': LJDumpProto,
         'lj-stack': LJDumpStack,
         'lj-state': LJState,
         'lj-str':   LJDumpString,
diff --git a/test/tarantool-debugger-tests/debug-extension-tests.py b/test/tarantool-debugger-tests/debug-extension-tests.py
index 7e2b5ac4..b677942c 100644
--- a/test/tarantool-debugger-tests/debug-extension-tests.py
+++ b/test/tarantool-debugger-tests/debug-extension-tests.py
@@ -45,6 +45,7 @@ else:
 
 RX_ADDR = r'0x[a-f0-9]+'
 RX_HASH = RX_ADDR  # The same pattern for hexademic values.
+RX_BCN = r'00\d\d'
 RX_FRAME = r'\[(S|\s)(B|\s)(T|\s)(M|\s)\]'
 
 
@@ -149,14 +150,25 @@ def gcval(arg):
         return 'gcval(' + arg + ')'
 
 
+def mref(arg, tp):
+    if sys.platform == 'darwin':
+        # Assume GC64 build only.
+        return '((' + tp + '*)(' + arg + ').ptr64)'
+    else:
+        return 'mref(' + arg + ', ' + tp + ')'
+
+
 class TestLoad(TestCaseBase):
     extension_cmds = ''
     location = 'lj_cf_print'
     lua_script = 'print(1)'
     pattern = (
         r'lj-arch command initialized\n'
+        r'lj-bc command initialized\n'
+        r'lj-func command initialized\n'
         r'lj-gc command initialized\n'
         r'lj-gco command initialized\n'
+        r'lj-proto command initialized\n'
         r'lj-stack command initialized\n'
         r'lj-state command initialized\n'
         r'lj-str command initialized\n'
@@ -359,6 +371,73 @@ class TestLJGCo(TestCaseBase):
     pattern = GCO_RX
 
 
+PROTO_FUNC_SCRIPT = (
+    'local uvname = false\n'
+    'local function testf(...)\n'
+    '  local a = ...\n'
+    '  local s1 = a + 42\n'
+    '  uvname = "conststr"\n'
+    '  if a >= 42 then\n'
+    '    return a - s1\n'
+    '  end\n'
+    'end\n'
+    'print(testf)\n'
+)
+
+
+PROTO_FUNC_BC_RX = (
+    RX_BCN + r' FUNCV  rbase:   \d\s*\n' +
+    RX_BCN + r' VARG   base:    \d lit:     \d lit:     \d\s*\n' +
+    RX_BCN + r' ADDVN  dst:     \d var:     \d num: +\d' +
+             r' ; ' + RX_INT + r' 42\s*\n' +
+    RX_BCN + r' USETS  uv:      \d str:     \d' +
+             r' ; upvalue "uvname" @ ' + RX_ADDR +
+             r' ; string "conststr" @ ' + RX_ADDR + r'\s*\n' +
+    RX_BCN + r' KSHORT dst:     \d lits:   42\s*\n' +
+    RX_BCN + r' ISGT   var:     \d var:     \d\s*\n' +
+    RX_BCN + r' JMP    rbase:   \d jump:  => ' + RX_BCN + r'\s*\n' +
+    RX_BCN + r' SUBVV  dst:     \d var:     \d var:     \d\s*\n' +
+    RX_BCN + r' RET1   rbase:   \d lit:     \d\s*\n' +
+    RX_BCN + r' RET0   rbase:   \d lit:     \d\s*\n'
+)
+
+
+class TestLJFunc(TestCaseBase):
+    location = 'lj_cf_print'
+    extension_cmds = 'lj-func ' + gcval('L->base')
+    lua_script = PROTO_FUNC_SCRIPT
+    pattern = PROTO_FUNC_BC_RX
+
+
+class TestLJProto(TestCaseBase):
+    location = 'lj_cf_print'
+    extension_cmds = (
+        'lj-proto '
+        '  ((char *) ' + mref(
+            '((GCfuncL *)' + gcval('L->base') + ')->pc', 'char'
+        ) + ') - sizeof(GCproto)\n'
+    )
+    lua_script = PROTO_FUNC_SCRIPT
+    pattern = PROTO_FUNC_BC_RX
+
+
+class TestLJBC(TestCaseBase):
+    location = 'lj_cf_print'
+    extension_cmds = (
+        'lj-bc ' + mref(
+            '((GCfuncL *)' + gcval('L->base') + ')->pc', 'BCIns'
+        ) + '\n'
+        'lj-bc ' + mref(
+            '((GCfuncL *)' + gcval('L->base') + ')->pc', 'BCIns'
+        ) + ' + 6\n'
+    )
+    lua_script = PROTO_FUNC_SCRIPT
+    pattern = (
+        r'FUNCV  rbase:   \d\s*\n'
+        r'JMP    rbase:   \d jump:  \+\d\n'
+    )
+
+
 for test_cls in TestCaseBase.__subclasses__():
     test_cls.test = lambda self: self.check()
 
-- 
2.54.0


  parent reply	other threads:[~2026-06-04  9:33 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-04  9:30 [Tarantool-patches] [PATCH luajit 0/4] Introduce dumpers for bytecodes in debuggers Sergey Kaplun via Tarantool-patches
2026-06-04  9:30 ` [Tarantool-patches] [PATCH luajit 1/4] dbg: fix lj-stack command for LLDB Sergey Kaplun via Tarantool-patches
2026-06-05 14:55   ` Sergey Bronnikov via Tarantool-patches
2026-06-04  9:30 ` [Tarantool-patches] [PATCH luajit 2/4] dbg: fix DUALNUM detection " Sergey Kaplun via Tarantool-patches
2026-06-05 14:57   ` Sergey Bronnikov via Tarantool-patches
2026-06-05 16:01     ` Sergey Kaplun via Tarantool-patches
2026-06-04  9:30 ` [Tarantool-patches] [PATCH luajit 3/4] dbg: introduce lj-gco command Sergey Kaplun via Tarantool-patches
2026-06-05 15:02   ` Sergey Bronnikov via Tarantool-patches
2026-06-04  9:30 ` Sergey Kaplun via Tarantool-patches [this message]
2026-06-05 15:07   ` [Tarantool-patches] [PATCH luajit 4/4] dbg: introduce lj-bc, lj-func and lj-proto dumpers Sergey Bronnikov via Tarantool-patches
2026-06-05 16:10     ` Sergey Kaplun via Tarantool-patches
2026-06-05 14:55 ` [Tarantool-patches] [PATCH luajit 0/4] Introduce dumpers for bytecodes in debuggers Sergey Bronnikov via Tarantool-patches
2026-06-05 16:03 ` [Tarantool-patches] [PATCH luajit 3/5] dbg: update help for the lj-arch command Sergey Kaplun via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260604093052.2221827-5-skaplun@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=e.temirgaleev@tarantool.org \
    --cc=sergeyb@tarantool.org \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit 4/4] dbg: introduce lj-bc, lj-func and lj-proto dumpers' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox