From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id D9A496ECF8; Thu, 4 Jun 2026 12:33:33 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org D9A496ECF8 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1780565614; bh=yUxxI1YG6oI4B6o7O8IQc9XY/DkfW6SknOo4S9q9QYI=; h=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To:Cc: From; b=vIfoJfFrAQGv3DjOLeuUqZFKagwyrGF5etq/F2RJPdSV1Pq3bi8dpnT4K1dtCeadO R/jYRhx3WLgi9hxU3E2MAHuRDD2uTN8X0zodZwR9nrUbg86kKVaGoecuPfJttmmagw GNkGjj36XLEtURNCXD2bYPazkLN2wlN0QsDCZen4= Received: from send81.i.mail.ru (send81.i.mail.ru [89.221.237.176]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id A9D716ECF8 for ; Thu, 4 Jun 2026 12:31:34 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org A9D716ECF8 Received: by exim-smtp-5b85998476-kbmmh with esmtpa (envelope-from ) id 1wV4QH-00000000I8o-2F1S; Thu, 04 Jun 2026 12:31:34 +0300 To: Sergey Bronnikov , Evgeniy Temirgaleev Date: Thu, 4 Jun 2026 12:30:52 +0300 Message-ID: <20260604093052.2221827-5-skaplun@tarantool.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260604093052.2221827-1-skaplun@tarantool.org> References: <20260604093052.2221827-1-skaplun@tarantool.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Mailru-Src: smtp X-4EC0790: 10 X-7564579A: 646B95376F6C166E X-77F55803: 4F1203BC0FB41BD9FAD06046D747065BA79DE29C7F6902231ABDD09BBCC3CDE2182A05F538085040F6C41414CC95B7D23DE06ABAFEAF67055F59BAC7243D15BC91590B24FD7EC5C2CAABEEF29940EAE0 X-7FA49CB5: FF5795518A3D127A4AD6D5ED66289B5278DA827A17800CE7AC4684DF4EC4B256EA1F7E6F0F101C67BD4B6F7A4D31EC0BCC500DACC3FED6E28638F802B75D45FF8AA50765F7900637AC83A81C8FD4AD23D82A6BABE6F325AC2E85FA5F3EDFCBAA7353EFBB5533756660A56BB7F6F20B60E7836A0BA8B9D6606F911E34BE6722F3CCB9591D5FF5AC3F389733CBF5DBD5E913377AFFFEAFD269176DF2183F8FC7C078FCF50C7EAF9C588941B15DA834481FCF19DD082D7633A0EF3E4896CB9E6436389733CBF5DBD5E9D5E8D9A59859A8B65FF0BFC5AEE34BE6CC7F00164DA146DA6F5DAA56C3B73B237318B6A418E8EAB86D1867E19FE14079C09775C1D3CA48CF17B107DEF921CE791DD303D21008E298D5E8D9A59859A8B6B372FE9A2E580EFC725E5C173C3A84C37727919777A35F2B35872C767BF85DA2F004C90652538430E4A6367B16DE6309 X-C1DE0DAB: 0D63561A33F958A515B1CEBDF28A4C7E5002B1117B3ED696B037467BAB692C731E49B01306B5E3AD823CB91A9FED034534781492E4B8EEAD0605949680455D49C79554A2A72441328621D336A7BC284946AD531847A6065A535571D14F44ED41 X-C8649E89: 1C3962B70DF3F0AD73CAD6646DEDE191716CD42B3DD1D34CAB70F9BE574AE9C625B6776AC983F447FC0B9F89525902EE6F57B2FD27647F25E66C117BDB76D65932720DEC7E91B3939803000C3EB946B9615BB4ACB5BD2CCB4964F656EB783F7EB0751E929E958070B8341EE9D5BE9A0A567EF7813A02B178EF77CD829DB9C1E01EB65843CABF88E0C7CEAA0681F5848F4C41F94D744909CECFA6C6B0C050A61A8CAF69B82BA93681CD72808BE417F3B9E0E7457915DAA85F X-D57D3AED: 3ZO7eAau8CL7WIMRKs4sN3D3tLDjz0dLbV79QFUyzQ2Ujvy7cMT6pYYqY16iZVKkSc3dCLJ7zSJH7+u4VD18S7Vl4ZUrpaVfd2+vE6kuoey4m4VkSEu53w8ahmwBjZKM/YPHZyZHvz5uv+WouB9+ObcCpyrx6l7KImUglyhkEat/+ysWwi0gdhEs0JGjl6ggRWTy1haxBpVdbIX1nthFXMZebaIdHP2ghjoIc/363UZI6Kf1ptIMVRI2994ruhLUheLUiGjSS9Q= X-DA7885C5: 06F970EE72A4778BF255D290C0D534F98D567B4BA99E4E71EE97DA585A1A5ECC5BD6B145813641365B1A4C17EAA7BC4BEF2421ABFA55128DAF83EF9164C44C7E X-Mailru-Sender: 689FA8AB762F7393520AF17B8A65FDE2BFCF09240C7B46256034FF430E69FBFF34BC2643AECE2681E49D44BB4BD9522A059A1ED8796F048DB274557F927329BE89D5A3BC2B10C37545BD1C3CC395C826B4A721A3011E896F X-Mras: Ok Subject: [Tarantool-patches] [PATCH luajit 4/4] dbg: introduce lj-bc, lj-func and lj-proto dumpers X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Sergey Kaplun via Tarantool-patches Reply-To: Sergey Kaplun Cc: tarantool-patches@dev.tarantool.org Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This patch adds dumpers for a single bytecode instruction (`lj-bc`), as well as for all bytecodes inside one function (`lj-func`) or prototype (`lj-proto`). Its dump is quite similar to the -bl flag but also reports types of register operands (`jmp`, `dst`, `str`, etc.). For LLDB, the result from the `lookup_global()` method is wrapped to the `lldb.value` object to make it more convenient. Part of tarantool/tarantool#4808 --- src/luajit_dbg.py | 416 +++++++++++++++++- .../debug-extension-tests.py | 79 ++++ 2 files changed, 490 insertions(+), 5 deletions(-) diff --git a/src/luajit_dbg.py b/src/luajit_dbg.py index f5868e61..60308179 100644 --- a/src/luajit_dbg.py +++ b/src/luajit_dbg.py @@ -475,7 +475,7 @@ class _LLDBDebugger(Debugger): return strptr.sbvalue.summary def lookup_global(self, symbol): - return self.target.FindFirstGlobalVariable(symbol) + return lldb.value(self.target.FindFirstGlobalVariable(symbol)) def eval(self, command): if not command: @@ -648,6 +648,202 @@ def itypemap(o): return LJ_T['NUMX'] if tvisnumber(o) else itype(o) +# Bytecode. + +def bc_op(ins): + return int(ins) & 0xff + + +def bc_a(ins): + return (int(ins) >> 8) & 0xff + + +def bc_b(ins): + return int(ins) >> 24 + + +def bc_c(ins): + return (int(ins) >> 16) & 0xff + + +def bc_d(ins): + return int(ins) >> 16 + + +BCMODE = [ + 'none', 'dst', 'base', 'var', 'rbase', 'uv', + 'lit', 'lits', 'pri', 'num', 'str', 'tab', 'func', 'jump', 'cdata', +] + + +lj_bc_mode_ = None + + +def lj_bc_mode(): + global lj_bc_mode_ + if lj_bc_mode_: + return lj_bc_mode_ + lj_bc_mode_ = dbg.lookup_global('lj_bc_mode') + return lj_bc_mode_ + + +def bcmode_a(op): + return int(lj_bc_mode()[op] & 7) + + +def bcmode_b(op): + return int((lj_bc_mode()[op] >> 3) & 15) + + +def bcmode_cd(op): + return int((lj_bc_mode()[op] >> 7) & 15) + + +# Unfortunately, there is no place in the VM except the generated +# Lua table, where the bytecode names are stored. So duplicate +# them here. +BYTECODES = [ + # Comparison ops. ORDER OPR. + 'ISLT', + 'ISGE', + 'ISLE', + 'ISGT', + + 'ISEQV', + 'ISNEV', + 'ISEQS', + 'ISNES', + 'ISEQN', + 'ISNEN', + 'ISEQP', + 'ISNEP', + + # Unary test and copy ops. + 'ISTC', + 'ISFC', + 'IST', + 'ISF', + 'ISTYPE', + 'ISNUM', + 'MOV', + 'NOT', + 'UNM', + 'LEN', + 'ADDVN', + 'SUBVN', + 'MULVN', + 'DIVVN', + 'MODVN', + + # Binary ops. ORDER OPR. + 'ADDNV', + 'SUBNV', + 'MULNV', + 'DIVNV', + 'MODNV', + + 'ADDVV', + 'SUBVV', + 'MULVV', + 'DIVVV', + 'MODVV', + + 'POW', + 'CAT', + + # Constant ops. + 'KSTR', + 'KCDATA', + 'KSHORT', + 'KNUM', + 'KPRI', + 'KNIL', + + # Upvalue and function ops. + 'UGET', + 'USETV', + 'USETS', + 'USETN', + 'USETP', + 'UCLO', + 'FNEW', + + # Table ops. + 'TNEW', + 'TDUP', + 'GGET', + 'GSET', + 'TGETV', + 'TGETS', + 'TGETB', + 'TGETR', + 'TSETV', + 'TSETS', + 'TSETB', + 'TSETM', + 'TSETR', + + # Calls and vararg handling. T = tail call. + 'CALLM', + 'CALL', + 'CALLMT', + 'CALLT', + 'ITERC', + 'ITERN', + 'VARG', + 'ISNEXT', + + # Returns. + 'RETM', + 'RET', + 'RET0', + 'RET1', + + # Loops and branches. I/J = interp/JIT. + # I/C/L = init/call/loop. + 'FORI', + 'JFORI', + + 'FORL', + 'IFORL', + 'JFORL', + + 'ITERL', + 'IITERL', + 'JITERL', + + 'LOOP', + 'ILOOP', + 'JLOOP', + + 'JMP', + + # Function headers. I/J = interp/JIT. + # F/V/C = fixarg/vararg/C func. + 'FUNCF', + 'IFUNCF', + 'JFUNCF', + 'FUNCV', + 'IFUNCV', + 'JFUNCV', + 'FUNCC', + 'FUNCCW', +] + + +def proto_bc(proto): + return dbg.cast('BCIns *', + dbg.cast('char *', proto) + dbg.sizeof('GCproto')) + + +def proto_kgc(pt, idx): + return gcref(mref('GCRef *', pt['k'])[idx]) + + +def proto_knumtv(pt, idx): + return mref('TValue *', pt['k'])[idx] + + # Frames. @@ -676,10 +872,6 @@ def frametypes(ft): }.get(ft, '?') -def bc_a(ins): - return (ins >> 8) & 0xff - - def frame_ftsz(framelink): return dbg.cast('ptrdiff_t', framelink['ftsz'] if LJ_FR2 else framelink['fr']['tp']['ftsz']) @@ -1129,6 +1321,137 @@ def dump_gc(g): return '\n'.join(map(lambda s: '\t' + s, stats)) +def proto_loc(proto): + return '{chunk}:{firstline}'.format( + chunk=strdata(dbg.cast('GCstr *', gcval(proto['chunkname']))), + firstline=proto['firstline'], + ) + + +def funck(pt, idx): + if idx >= 0: + assert idx < pt['sizekn'], 'invalid idx for numeric constant in proto' + tv = proto_knumtv(pt, idx) + return dump_tvalue(tv) + else: + assert ~idx < pt['sizekgc'], 'invalid idx for GC constant in proto' + gcobj = proto_kgc(pt, idx) + if typenames(i2notu32(gcobj['gch']['gct'])) == 'LJ_TPROTO': + return proto_loc(dbg.cast('GCproto *', gcobj)) + return dump_gcobj(gcobj) + + +def funcuvname(pt, idx): + assert idx < pt['sizeuv'], 'invalid idx for upvalue in proto' + uvinfo = mref('uint8_t *', pt['uvinfo']) + if not uvinfo: + return '' + + # if (idx) while (*uvinfo++ || --idx); + while idx > 0: + while uvinfo[0]: + uvinfo += 1 + uvinfo += 1 + idx -= 1 + + return 'upvalue {name} @ {addr}'.format( + name=dbg.cstr(dbg.cast('char *', uvinfo)), + addr=strx64(uvinfo) + ) + + +def dump_reg(rtype, value, jmp_format=None, jmp_ctx=None): + if rtype == 'jump': + # Destination of jump instruction encoded as offset from + # BCBIAS_J. + delta = value - 0x7fff + if jmp_format: + value = jmp_format(jmp_ctx, delta) + else: + prefix = '+' if delta >= 0 else '' + value = prefix + str(delta) + else: + value = '{:3d}'.format(value) + + return '{rtype:6} {value}'.format( + rtype=rtype + ':', + value=value, + ) + + +def dump_kc(rtype, value, proto): + kc = '' + if proto: + if rtype == 'str' or rtype == 'func': + kc = funck(proto, ~value) + elif rtype == 'num': + kc = funck(proto, value) + elif rtype == 'uv': + kc = funcuvname(proto, value) + + if kc != '': + kc = ' ; ' + kc + return kc + + +def dump_bc(ins, jmp_format=None, jmp_ctx=None, proto=None): + op = bc_op(ins) + if op >= len(BYTECODES): + return 'INVALID' + + bcname = BYTECODES[op] + bcma = bcmode_a(op) + bcmb = bcmode_b(op) + bcmcd = bcmode_cd(op) + + kca = dump_kc(BCMODE[bcma], bc_a(ins), proto) if bcma else '' + kcc = dump_kc( + BCMODE[bcmcd], bc_c(ins) if bcmb else bc_d(ins), proto + ) if bcmcd else '' + + return '{name:6} {ra}{rb}{rcd}{kc}'.format( + name=bcname, + ra=dump_reg(BCMODE[bcma], bc_a(ins)) + ' ' if bcma else '', + rb=dump_reg(BCMODE[bcmb], bc_b(ins)) + ' ' if bcmb else '', + rcd=dump_reg( + BCMODE[bcmcd], bc_c(ins) if bcmb else bc_d(ins), + jmp_format=jmp_format, jmp_ctx=jmp_ctx + ) if bcmcd else '', + kc=kca + kcc + ) + + +def dump_proto(proto): + startbc = proto_bc(proto) + func_loc = proto_loc(proto) + # Location has the following format: '{chunk}:{firstline}'. + dump = '{func_loc}-{lastline}\n'.format( + func_loc=func_loc, + lastline=proto['firstline'] + proto['numline'], + ) + + def jmp_format(npc_from, delta): + return '=> ' + str(npc_from + delta).zfill(4) + + for bcnum in range(0, int(proto['sizebc'])): + dump += (str(bcnum).zfill(4) + ' ' + dump_bc( + startbc[bcnum], jmp_format=jmp_format, jmp_ctx=bcnum, proto=proto, + ) + '\n') + return dump + + +def dump_func(func): + ffid = func['ffid'] + + if ffid == 0: + pt = funcproto(func) + return dump_proto(pt) + elif ffid == 1: + return 'C function @ {}\n'.format(strx64(func['f'])) + else: + return 'fast function #{}\n'.format(int(ffid)) + + # Extension commands. ############################################ @@ -1152,6 +1475,59 @@ pointers, respectively. ) +class LJDumpBC(dbg.LJBase): + ''' +lj-bc + +The command receives a pointer to a bytecode instruction and dumps +the type of the instruction and the values of RA, RB, and RC (or RD) +virtual registers and their modes (operand types): + + : + : : : ; ; + : : + +: Name of the bytecode instruction +: The value of the R[ABCD] virtual register operand +: The operand type for the R[ABCD] register +: The value of the constant associated with the operand, if any +: The name of the upvalue, if any + +For the list of bytecode names and modes (operand types), see: +https://github.com/tarantool/tarantool/wiki/LuaJIT-Bytecodes. + ''' + + def execute(self, arg): + dbg.write('{}\n'.format( + dump_bc(dbg.cast('BCIns *', dbg.eval(arg))[0]) + )) + + +class LJDumpFunc(dbg.LJBase): + ''' +lj-func + +The command receives a of the corresponding GCfunc object and dumps +the chunk name, where the corresponding function is defined, the +corresponding range of lines, and a list of bytecodes related to this +function: + +:- + +... + + +: The location of the corresponding function definition +: The number of the line where the function starts +: The number of the line where the function ends +: The sequential number of the bytecode instruction +: The encoded bytecode instruction. Type "help lj-bc" for details. + ''' + + def execute(self, arg): + dbg.write('{}'.format(dump_func(dbg.cast('GCfuncC *', dbg.eval(arg))))) + + class LJGC(dbg.LJBase): ''' lj-gc @@ -1208,6 +1584,33 @@ error message occurs. dbg.write('{}\n'.format(dump_gcobj(gcobj))) +class LJDumpProto(dbg.LJBase): + ''' +lj-proto + +The command receives a of the corresponding GCproto object and dumps +the chunk name, where the corresponding function is defined, the +corresponding range of lines, and a list of bytecodes related to this +function: + +:- + +... + + +: The location of the corresponding function definition +: The number of the line where the function starts +: The number of the line where the function ends +: The sequential number of the bytecode instruction +: The encoded bytecode instruction. Type "help lj-bc" for details. + ''' + + def execute(self, arg): + dbg.write('{}'.format( + dump_proto(dbg.cast('GCproto *', dbg.eval(arg))) + )) + + class LJDumpStack(dbg.LJBase): ''' lj-stack [] @@ -1368,8 +1771,11 @@ error message occurs. def load(event=None): dbg.initialize_extension({ 'lj-arch': LJDumpArch, + 'lj-bc': LJDumpBC, + 'lj-func': LJDumpFunc, 'lj-gc': LJGC, 'lj-gco': LJDumpGCobj, + 'lj-proto': LJDumpProto, 'lj-stack': LJDumpStack, 'lj-state': LJState, 'lj-str': LJDumpString, diff --git a/test/tarantool-debugger-tests/debug-extension-tests.py b/test/tarantool-debugger-tests/debug-extension-tests.py index 7e2b5ac4..b677942c 100644 --- a/test/tarantool-debugger-tests/debug-extension-tests.py +++ b/test/tarantool-debugger-tests/debug-extension-tests.py @@ -45,6 +45,7 @@ else: RX_ADDR = r'0x[a-f0-9]+' RX_HASH = RX_ADDR # The same pattern for hexademic values. +RX_BCN = r'00\d\d' RX_FRAME = r'\[(S|\s)(B|\s)(T|\s)(M|\s)\]' @@ -149,14 +150,25 @@ def gcval(arg): return 'gcval(' + arg + ')' +def mref(arg, tp): + if sys.platform == 'darwin': + # Assume GC64 build only. + return '((' + tp + '*)(' + arg + ').ptr64)' + else: + return 'mref(' + arg + ', ' + tp + ')' + + class TestLoad(TestCaseBase): extension_cmds = '' location = 'lj_cf_print' lua_script = 'print(1)' pattern = ( r'lj-arch command initialized\n' + r'lj-bc command initialized\n' + r'lj-func command initialized\n' r'lj-gc command initialized\n' r'lj-gco command initialized\n' + r'lj-proto command initialized\n' r'lj-stack command initialized\n' r'lj-state command initialized\n' r'lj-str command initialized\n' @@ -359,6 +371,73 @@ class TestLJGCo(TestCaseBase): pattern = GCO_RX +PROTO_FUNC_SCRIPT = ( + 'local uvname = false\n' + 'local function testf(...)\n' + ' local a = ...\n' + ' local s1 = a + 42\n' + ' uvname = "conststr"\n' + ' if a >= 42 then\n' + ' return a - s1\n' + ' end\n' + 'end\n' + 'print(testf)\n' +) + + +PROTO_FUNC_BC_RX = ( + RX_BCN + r' FUNCV rbase: \d\s*\n' + + RX_BCN + r' VARG base: \d lit: \d lit: \d\s*\n' + + RX_BCN + r' ADDVN dst: \d var: \d num: +\d' + + r' ; ' + RX_INT + r' 42\s*\n' + + RX_BCN + r' USETS uv: \d str: \d' + + r' ; upvalue "uvname" @ ' + RX_ADDR + + r' ; string "conststr" @ ' + RX_ADDR + r'\s*\n' + + RX_BCN + r' KSHORT dst: \d lits: 42\s*\n' + + RX_BCN + r' ISGT var: \d var: \d\s*\n' + + RX_BCN + r' JMP rbase: \d jump: => ' + RX_BCN + r'\s*\n' + + RX_BCN + r' SUBVV dst: \d var: \d var: \d\s*\n' + + RX_BCN + r' RET1 rbase: \d lit: \d\s*\n' + + RX_BCN + r' RET0 rbase: \d lit: \d\s*\n' +) + + +class TestLJFunc(TestCaseBase): + location = 'lj_cf_print' + extension_cmds = 'lj-func ' + gcval('L->base') + lua_script = PROTO_FUNC_SCRIPT + pattern = PROTO_FUNC_BC_RX + + +class TestLJProto(TestCaseBase): + location = 'lj_cf_print' + extension_cmds = ( + 'lj-proto ' + ' ((char *) ' + mref( + '((GCfuncL *)' + gcval('L->base') + ')->pc', 'char' + ) + ') - sizeof(GCproto)\n' + ) + lua_script = PROTO_FUNC_SCRIPT + pattern = PROTO_FUNC_BC_RX + + +class TestLJBC(TestCaseBase): + location = 'lj_cf_print' + extension_cmds = ( + 'lj-bc ' + mref( + '((GCfuncL *)' + gcval('L->base') + ')->pc', 'BCIns' + ) + '\n' + 'lj-bc ' + mref( + '((GCfuncL *)' + gcval('L->base') + ')->pc', 'BCIns' + ) + ' + 6\n' + ) + lua_script = PROTO_FUNC_SCRIPT + pattern = ( + r'FUNCV rbase: \d\s*\n' + r'JMP rbase: \d jump: \+\d\n' + ) + + for test_cls in TestCaseBase.__subclasses__(): test_cls.test = lambda self: self.check() -- 2.54.0