Tarantool development patches archive
 help / color / mirror / Atom feed
From: Sergey Kaplun via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Maxim Kokryashkin <m.kokryashkin@tarantool.org>,
	Igor Munkin <imun@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH luajit 2/2] gdb: introduce lj-bc, lj-func and lj-proto dumpers
Date: Thu,  9 Jun 2022 13:11:14 +0300	[thread overview]
Message-ID: <6ef8aa43d1a571fef477550885e24faf1e4b962b.1654767443.git.skaplun@tarantool.org> (raw)
In-Reply-To: <cover.1654767443.git.skaplun@tarantool.org>

This patch adds dumpers as for single bytecode instruction (`lj-bc`), as
for all bytecodes inside one function (`lj-func`) or prototype
(`lj-proto`). Its dump is quite similar with -bl flag, but also reports
types of registers operands (`jmp`, `dst`, `str`, etc.).
---
 src/luajit-gdb.py | 324 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 323 insertions(+), 1 deletion(-)

diff --git a/src/luajit-gdb.py b/src/luajit-gdb.py
index 779a25f8..339b57ed 100644
--- a/src/luajit-gdb.py
+++ b/src/luajit-gdb.py
@@ -113,8 +113,162 @@ def frametypes(ft):
         FRAME['VARG'] : 'V',
     }.get(ft, '?')
 
+def bc_op(ins):
+    return int(ins) & 0xff
+
 def bc_a(ins):
-    return (ins >> 8) & 0xff
+    return (int(ins) >> 8) & 0xff
+
+def bc_b(ins):
+    return int(ins) >> 24
+
+def bc_c(ins):
+    return (int(ins) >> 16) & 0xff
+
+def bc_d(ins):
+    return int(ins) >> 16
+
+___ = 0
+BC_NAME = 0
+BC_A = 1
+BC_B = 2
+BC_CD = 3
+BC_MM = 4
+
+BYTECODES = [
+    # Comparison ops. ORDER OPR.
+    ['ISLT',   'var',  ___,     'var',    'lt'],
+    ['ISGE',   'var',  ___,     'var',    'lt'],
+    ['ISLE',   'var',  ___,     'var',    'le'],
+    ['ISGT',   'var',  ___,     'var',    'le'],
+
+    ['ISEQV',  'var',  ___,     'var',    'eq'],
+    ['ISNEV',  'var',  ___,     'var',    'eq'],
+    ['ISEQS',  'var',  ___,     'str',    'eq'],
+    ['ISNES',  'var',  ___,     'str',    'eq'],
+    ['ISEQN',  'var',  ___,     'num',    'eq'],
+    ['ISNEN',  'var',  ___,     'num',    'eq'],
+    ['ISEQP',  'var',  ___,     'pri',    'eq'],
+    ['ISNEP',  'var',  ___,     'pri',    'eq'],
+
+    # Unary test and copy ops.
+    ['ISTC',   'dst',  ___,     'var',    ___],
+    ['ISFC',   'dst',  ___,     'var',    ___],
+    ['IST',    ___,    ___,     'var',    ___],
+    ['ISF',    ___,    ___,     'var',    ___],
+    ['ISTYPE', 'var',  ___,     'lit',    ___],
+    ['ISNUM',  'var',  ___,     'lit',    ___],
+    ['MOV',    'dst',  ___,     'var',    ___],
+    ['NOT',    'dst',  ___,     'var',    ___],
+    ['UNM',    'dst',  ___,     'var',    'unm'],
+    ['LEN',    'dst',  ___,     'var',    'len'],
+    ['ADDVN',  'dst',  'var',   'num',    'add'],
+    ['SUBVN',  'dst',  'var',   'num',    'sub'],
+    ['MULVN',  'dst',  'var',   'num',    'mul'],
+    ['DIVVN',  'dst',  'var',   'num',    'div'],
+    ['MODVN',  'dst',  'var',   'num',    'mod'],
+
+    # Binary ops. ORDER OPR.
+    ['ADDNV',  'dst',  'var',   'num',    'add'],
+    ['SUBNV',  'dst',  'var',   'num',    'sub'],
+    ['MULNV',  'dst',  'var',   'num',    'mul'],
+    ['DIVNV',  'dst',  'var',   'num',    'div'],
+    ['MODNV',  'dst',  'var',   'num',    'mod'],
+
+    ['ADDVV',  'dst',  'var',   'var',    'add'],
+    ['SUBVV',  'dst',  'var',   'var',    'sub'],
+    ['MULVV',  'dst',  'var',   'var',    'mul'],
+    ['DIVVV',  'dst',  'var',   'var',    'div'],
+    ['MODVV',  'dst',  'var',   'var',    'mod'],
+
+    ['POW',    'dst',  'var',   'var',    'pow'],
+    ['CAT',    'dst',  'rbase', 'rbase',  'concat'],
+
+    # Constant ops.
+    ['KSTR',   'dst',   ___,    'str',    ___],
+    ['KCDATA', 'dst',   ___,    'cdata',  ___],
+    ['KSHORT', 'dst',   ___,    'lits',   ___],
+    ['KNUM',   'dst',   ___,    'num',    ___],
+    ['KPRI',   'dst',   ___,    'pri',    ___],
+    ['KNIL',   'base',  ___,    'base',   ___],
+
+    # Upvalue and function ops.
+    ['UGET',   'dst',    ___,    'uv',     ___],
+    ['USETV',  'uv',     ___,    'var',    ___],
+    ['USETS',  'uv',     ___,    'str',    ___],
+    ['USETN',  'uv',     ___,    'num',    ___],
+    ['USETP',  'uv',     ___,    'pri',    ___],
+    ['UCLO',   'rbase',  ___,    'jump',   ___],
+    ['FNEW',   'dst',    ___,    'func',   ___],
+
+    # Table ops.
+    ['TNEW',   'dst',   ___,    'lit',    ___],
+    ['TDUP',   'dst',   ___,    'tab',    ___],
+    ['GGET',   'dst',   ___,    'str',    'index'],
+    ['GSET',   'var',   ___,    'str',    'newindex'],
+    ['TGETV',  'dst',   'var',  'var',    'index'],
+    ['TGETS',  'dst',   'var',  'str',    'index'],
+    ['TGETB',  'dst',   'var',  'lit',    'index'],
+    ['TGETR',  'dst',   'var',  'var',    'index'],
+    ['TSETV',  'var',   'var',  'var',    'newindex'],
+    ['TSETS',  'var',   'var',  'str',    'newindex'],
+    ['TSETB',  'var',   'var',  'lit',    'newindex'],
+    ['TSETM',  'base',  ___,    'num',    'newindex'],
+    ['TSETR',  'var',   'var',  'var',    'newindex'],
+
+    # Calls and vararg handling. T = tail call.
+    ['CALLM',  'base',  'lit',  'lit',    'call'],
+    ['CALL',   'base',  'lit',  'lit',    'call'],
+    ['CALLMT', 'base',  ___,    'lit',    'call'],
+    ['CALLT',  'base',  ___,    'lit',    'call'],
+    ['ITERC',  'base',  'lit',  'lit',    'call'],
+    ['ITERN',  'base',  'lit',  'lit',    'call'],
+    ['VARG',   'base',  'lit',  'lit',    ___],
+    ['ISNEXT', 'base',  ___,    'jump',   ___],
+
+    # Returns.
+    ['RETM',   'base',  ___,    'lit',    ___],
+    ['RET',    'rbase', ___,    'lit',    ___],
+    ['RET0',   'rbase', ___,    'lit',    ___],
+    ['RET1',   'rbase', ___,    'lit',    ___],
+
+    # Loops and branches. I/J = interp/JIT, I/C/L = init/call/loop.
+    ['FORI',   'base',  ___,    'jump',   ___],
+    ['JFORI',  'base',  ___,    'jump',   ___],
+
+    ['FORL',   'base',  ___,    'jump',   ___],
+    ['IFORL',  'base',  ___,    'jump',   ___],
+    ['JFORL',  'base',  ___,    'lit',    ___],
+
+    ['ITERL',  'base',  ___,    'jump',   ___],
+    ['IITERL', 'base',  ___,    'jump',   ___],
+    ['JITERL', 'base',  ___,    'lit',    ___],
+
+    ['LOOP',   'rbase', ___,    'jump',   ___],
+    ['ILOOP',  'rbase', ___,    'jump',   ___],
+    ['JLOOP',  'rbase', ___,    'lit',    ___],
+
+    ['JMP',    'rbase', ___,    'jump',   ___],
+
+    # Function headers. I/J = interp/JIT, F/V/C = fixarg/vararg/C func.
+    ['FUNCF',  'rbase', ___,    ___,      ___],
+    ['IFUNCF', 'rbase', ___,    ___,      ___],
+    ['JFUNCF', 'rbase', ___,    'lit',    ___],
+    ['FUNCV',  'rbase', ___,    ___,      ___],
+    ['IFUNCV', 'rbase', ___,    ___,      ___],
+    ['JFUNCV', 'rbase', ___,    'lit',    ___],
+    ['FUNCC',  'rbase', ___,    ___,      ___],
+    ['FUNCCW', 'rbase', ___,    ___,      ___],
+]
+
+def proto_bc(proto):
+    return cast('BCIns *', cast('char *', proto) + gdb.lookup_type('GCproto').sizeof)
+
+def proto_kgc(pt, idx):
+    return gcref(mref('GCRef *', pt['k'])[idx])
+
+def proto_knumtv(pt, idx):
+    return mref('TValue *', pt['k'])[idx]
 
 def frame_ftsz(framelink):
     return cast('ptrdiff_t', framelink['ftsz'] if LJ_FR2 \
@@ -561,6 +715,132 @@ def dump_gc(g):
 
     return '\n'.join(map(lambda s: '\t' + s, stats))
 
+def proto_loc(proto):
+    return '{chunk}:{firstline}'.format(
+        chunk = strdata(cast('GCstr *', gcval(proto['chunkname']))),
+        firstline = proto['firstline'],
+    )
+
+def funck(pt, idx):
+    if idx >= 0:
+        assert idx < pt['sizekn'], 'invalid idx for numeric constant in proto'
+        tv = proto_knumtv(pt, idx)
+        return dump_tvalue(tv)
+    else:
+        assert ~idx < pt['sizekgc'], 'invalid idx for GC constant in proto'
+        gcobj = proto_kgc(pt, idx)
+        if typenames(i2notu32(gcobj['gch']['gct'])) == 'LJ_TPROTO':
+            return proto_loc(cast('GCproto *', gcobj))
+        return dump_gcobj(gcobj)
+
+def funcuvname(pt, idx):
+    assert idx < pt['sizeuv'], 'invalid idx for upvalue in proto'
+    uvinfo = mref('uint8_t *', pt['uvinfo'])
+    if not uvinfo:
+        return ''
+
+    # if (idx) while (*uvinfo++ || --idx);
+    while idx > 0:
+        while uvinfo[0]:
+            uvinfo += 1
+        uvinfo += 1
+        idx -= 1
+
+    return str(cast('char *', uvinfo))
+
+def dump_reg(bc, reg, value, jmp_format=None, jmp_ctx=None):
+    rtype = bc[reg]
+    is_jmp = rtype == 'jump'
+    padding = ':' + ' ' * (5 - len(rtype))
+
+    if rtype == 'jump':
+        # Destination of jump instruction encoded as offset from BCBIAS_J.
+        delta = value - 0x7fff
+        if jmp_format:
+            value = jmp_format(jmp_ctx, delta)
+        else:
+            prefix = '+' if delta >= 0 else ''
+            value = prefix + str(delta)
+    else:
+        value = '{:3d}'.format(value)
+
+    return '{rtype}{padding} {value}'.format(
+        rtype = rtype,
+        padding = padding,
+        value = value,
+    )
+
+def dump_kc(bc, reg, value, proto):
+    rtype = bc[reg]
+    kc = ''
+    if proto:
+        if rtype == 'str' or rtype == 'func':
+            kc = funck(proto, ~value)
+        elif rtype == 'num':
+            kc = funck(proto, value)
+        elif rtype == 'uv':
+            kc = funcuvname(proto, value)
+
+        if kc != '':
+            kc = ' ; ' + kc
+    return kc
+
+def dump_bc(ins, jmp_format=None, jmp_ctx=None, proto=None):
+    op = bc_op(ins)
+    if op >= len(BYTECODES):
+        return 'INVALID'
+
+    bc = BYTECODES[op]
+    bc_name = bc[BC_NAME]
+    name_padding = ' ' * (6 - len(bc_name))
+
+    bc_hasa = bc[BC_A]
+    bc_hasb = bc[BC_B]
+
+    kca = dump_kc(bc, BC_A, bc_a(ins), proto) if bc_hasa else ''
+    kcc = dump_kc(bc, BC_CD, bc_c(ins) if bc_hasb else bc_d(ins), proto) if bc[BC_CD] else ''
+
+    return '{name}{npad} {ra}{rb}{rcd}{kc}'.format(
+        name = bc_name,
+        npad = name_padding,
+        ra = dump_reg(bc, BC_A, bc_a(ins)) + ' ' if bc_hasa else '',
+        rb = dump_reg(bc, BC_B, bc_b(ins)) + ' ' if bc_hasb else '',
+        rcd = dump_reg(
+            bc, BC_CD, bc_c(ins) if bc_hasb else bc_d(ins),
+            jmp_format=jmp_format, jmp_ctx=jmp_ctx
+        ) if bc[BC_CD] else '',
+        kc=kca+kcc
+    )
+
+def dump_proto(proto):
+    startbc = proto_bc(proto)
+    func_loc = proto_loc(proto)
+    # Location has the following format: '{chunk}:{firstline}'.
+    dump = '{func_loc}-{lastline}\n'.format(
+        func_loc = func_loc,
+        lastline = proto['firstline'] + proto['numline'],
+    )
+
+    def jmp_format(npc_from, delta):
+        return '=> ' + str(npc_from + delta).zfill(4)
+
+    for bcnum in range(0, proto['sizebc']):
+        dump += (str(bcnum).zfill(4) + ' ' + dump_bc(
+            startbc[bcnum], jmp_format=jmp_format, jmp_ctx=bcnum,
+            proto = proto,
+        ) + '\n')
+    return dump
+
+def dump_func(func):
+    ffid = func['ffid']
+
+    if ffid == 0:
+        pt = funcproto(func)
+        return dump_proto(pt)
+    elif ffid == 1:
+        return 'C function @ {}'.format(strx64(func['f']))
+    else:
+        return 'fast function #{}'.format(int(ffid))
 
 class LJBase(gdb.Command):
 
@@ -767,6 +1047,45 @@ The command requires no args and dumps current GC stats:
             stats = dump_gc(g)
         ))
 
+class LJDumpBC(LJBase):
+    '''
+lj-bc <BCIns *>
+
+The command receives a pointer to bytecode instruction and dumps
+type of an instruction, the values of RA, RB and RC (or RD) registers.
+    '''
+
+    def invoke(self, arg, from_tty):
+        gdb.write('{}\n'.format(dump_bc(cast("BCIns *", parse_arg(arg))[0])))
+
+class LJDumpProto(LJBase):
+    '''
+lj-proto <GCproto *>
+
+The command receives a <gcr> of the corresponding GCproto object and dumps
+the chunk name, where the corresponding function is defined, corresponding
+range of lines and a list of bytecodes related to this function.
+
+The constants or upvalues of the prototype are decoded after ';'.
+    '''
+
+    def invoke(self, arg, from_tty):
+        gdb.write('{}'.format(dump_proto(cast("GCproto *", parse_arg(arg)))))
+
+class LJDumpFunc(LJBase):
+    '''
+lj-funcl <GCfunc *>
+
+The command receives a <gcr> of the corresponding GCfunc object and dumps
+the chunk name, where the corresponding function is defined, corresponding
+range of lines and a list of bytecodes related to this function.
+
+The constants or upvalues of the function are decoded after ';'.
+    '''
+
+    def invoke(self, arg, from_tty):
+        gdb.write('{}'.format(dump_func(cast("GCfuncC *", parse_arg(arg)))))
+
 def init(commands):
     global LJ_64, LJ_GC64, LJ_FR2, LJ_DUALNUM, LJ_TISNUM, PADDING
 
@@ -832,6 +1151,9 @@ def load(event=None):
         'lj-stack': LJDumpStack,
         'lj-state': LJState,
         'lj-gc': LJGC,
+        'lj-bc': LJDumpBC,
+        'lj-proto': LJDumpProto,
+        'lj-func': LJDumpFunc,
     })
 
 load(None)
-- 
2.34.1


  parent reply	other threads:[~2022-06-09 10:14 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-09 10:11 [Tarantool-patches] [PATCH luajit 0/2] Introduce dumpers for bytecodes in gdb Sergey Kaplun via Tarantool-patches
2022-06-09 10:11 ` [Tarantool-patches] [PATCH luajit 1/2] gdb: introduce dumpers for GCobj Sergey Kaplun via Tarantool-patches
2022-07-04 15:24   ` Maxim Kokryashkin via Tarantool-patches
2022-07-14 12:08     ` Sergey Kaplun via Tarantool-patches
2022-07-19  8:39       ` Maxim Kokryashkin via Tarantool-patches
2022-06-09 10:11 ` Sergey Kaplun via Tarantool-patches [this message]
2022-07-04 16:10   ` [Tarantool-patches] [PATCH luajit 2/2] gdb: introduce lj-bc, lj-func and lj-proto dumpers Maxim Kokryashkin via Tarantool-patches
2022-07-14 15:41     ` Sergey Kaplun via Tarantool-patches
2022-07-19  8:40       ` Maxim Kokryashkin via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6ef8aa43d1a571fef477550885e24faf1e4b962b.1654767443.git.skaplun@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=imun@tarantool.org \
    --cc=m.kokryashkin@tarantool.org \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit 2/2] gdb: introduce lj-bc, lj-func and lj-proto dumpers' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox