From: Mikhail Shishatskiy via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: tarantool-patches@dev.tarantool.org, imun@tarantool.org, skaplun@tarantool.org Subject: [Tarantool-patches] [PATCH luajit v4 0/4] memprof: group allocations on traces by traceno Date: Wed, 29 Sep 2021 23:07:54 +0300 [thread overview] Message-ID: <20210929200758.149446-1-m.shishatskiy@tarantool.org> (raw) In-Reply-To: <20210820070546.115293-1-m.shishatskiy@tarantool.org> New patch series v4 with fixes as per review by Igor Munkin: Changes in v4: * refactored the modules in order not to violate MVC: now process module uses the symtab.id(loc) as a key (but not the fully demangled name) * reorganized patches: now the first important patch has the location dumping and simple parsing without the trace start info; the second patch adds the information about the trace's start and extends the symtab. @ChangeLog ====================================================================== ##feature/luajit * Now memory profiler records allocations from traces. The memory profiler parser can display new type of allocation sources in the following format: ``` | TRACE [<trace-no>] <trace-addr> started at @<sym-chunk>:<sym-line> ``` ====================================================================== Mikhail Shishatskiy (4): test: separate memprof Lua API tests into subtests memprof: refactor location parsing memprof: group allocations on traces by traceno memprof: add info about trace start to symtab src/Makefile.dep.original | 3 +- src/lj_memprof.c | 79 +++++- src/lj_memprof.h | 22 +- .../misclib-memprof-lapi.test.lua | 261 ++++++++++++------ tools/memprof.lua | 4 +- tools/memprof/humanize.lua | 30 +- tools/memprof/parse.lua | 39 +-- tools/memprof/process.lua | 9 +- tools/utils/symtab.lua | 49 +++- 9 files changed, 364 insertions(+), 132 deletions(-) diff --git a/src/Makefile.dep.original b/src/Makefile.dep.original index ee6bafb2..faa44a0b 100644 --- a/src/Makefile.dep.original +++ b/src/Makefile.dep.original @@ -146,7 +146,8 @@ lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h lj_memprof.o: lj_memprof.c lj_arch.h lua.h luaconf.h lj_memprof.h \ - lj_def.h lj_wbuf.h lj_obj.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h + lj_def.h lj_wbuf.h lj_obj.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ + lj_jit.h lj_ir.h lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \ lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h diff --git a/src/lj_debug.c b/src/lj_debug.c index 58b75dae..bb9ab288 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -110,7 +110,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) /* -- Line numbers -------------------------------------------------------- */ /* Get line number for a bytecode position. */ -BCLine LJ_FASTCALL lj_debug_line(const GCproto *pt, BCPos pc) +BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc) { const void *lineinfo = proto_lineinfo(pt); if (pc <= pt->sizebc && lineinfo) { diff --git a/src/lj_debug.h b/src/lj_debug.h index 7814c588..a157d284 100644 --- a/src/lj_debug.h +++ b/src/lj_debug.h @@ -27,7 +27,7 @@ typedef struct lj_Debug { } lj_Debug; LJ_FUNC cTValue *lj_debug_frame(lua_State *L, int level, int *size); -LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(const GCproto *pt, BCPos pc); +LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc); LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx); LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, diff --git a/src/lj_memprof.c b/src/lj_memprof.c index fc5bc301..e8b2ebbc 100644 --- a/src/lj_memprof.c +++ b/src/lj_memprof.c @@ -32,8 +32,8 @@ static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION, static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace) { - const GCproto *pt = &gcref(trace->startpt)->pt; - BCLine lineno = -1; + GCproto *pt = &gcref(trace->startpt)->pt; + BCLine lineno = 0; const BCIns *startpc = mref(trace->startpc, const BCIns); lua_assert(startpc >= proto_bc(pt) && @@ -43,11 +43,14 @@ static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace) lua_assert(lineno >= 0); lj_wbuf_addbyte(out, SYMTAB_TRACE); - lj_wbuf_addu64(out, (uint64_t)trace->mcode); lj_wbuf_addu64(out, (uint64_t)trace->traceno); + lj_wbuf_addu64(out, (uint64_t)trace->mcode); /* - ** All the existing prototypes have already been dumped, so we do not - ** need to repeat their dump for trace locations. + ** The information about the prototype, associated with the + ** trace's start has already been dumped, as it is anchored + ** via the trace and is not collected while the trace is alive. + ** For this reason, we do not need to repeat dumping the chunk + ** name for the prototype. */ lj_wbuf_addu64(out, (uintptr_t)pt); lj_wbuf_addu64(out, (uint64_t)lineno); @@ -200,8 +203,8 @@ static void memprof_write_trace(struct memprof *mp, uint8_t aevent) const TraceNo traceno = g->vmstate; const GCtrace *trace = traceref(J, traceno); lj_wbuf_addbyte(out, aevent | ASOURCE_TRACE); - lj_wbuf_addu64(out, (uintptr_t)trace->mcode); lj_wbuf_addu64(out, (uint64_t)traceno); + lj_wbuf_addu64(out, (uintptr_t)trace->mcode); } #else @@ -238,8 +241,9 @@ static const memprof_writer memprof_writers[] = { ** behaviour of Lua and JITted code must match 1:1 in terms of allocations, ** which makes using memprof with enabled JIT virtually redundant. ** But if one wants to investigate allocations with JIT enabled, - ** memprof_write_trace() dumps trace number to the binary output. - ** It can be useful to compare with with jit.v or jit.dump outputs. + ** memprof_write_trace() dumps trace number and mcode starting address + ** to the binary output. It can be useful to compare with with jit.v or + ** jit.dump outputs. */ memprof_write_trace /* LJ_VMST_TRACE */ }; diff --git a/src/lj_memprof.h b/src/lj_memprof.h index e72dadf7..395fb429 100644 --- a/src/lj_memprof.h +++ b/src/lj_memprof.h @@ -27,14 +27,14 @@ ** reserved := <BYTE> <BYTE> <BYTE> ** sym := sym-lua | sym-trace | sym-final ** sym-lua := sym-header sym-addr sym-chunk sym-line -** sym-trace := sym-header trace-addr trace-no sym-addr sym-line +** sym-trace := sym-header trace-no trace-addr sym-addr sym-line ** sym-header := <BYTE> ** sym-addr := <ULEB128> ** sym-chunk := string ** sym-line := <ULEB128> ** sym-final := sym-header -** trace-addr := <ULEB128> ** trace-no := <ULEB128> +** trace-addr := <ULEB128> ** string := string-len string-payload ** string-len := <ULEB128> ** string-payload := <BYTE> {string-len} @@ -76,11 +76,11 @@ ** loc := loc-lua | loc-c | loc-trace ** loc-lua := sym-addr line-no ** loc-c := sym-addr -** loc-trace := trace-addr trace-no +** loc-trace := trace-no trace-addr ** sym-addr := <ULEB128> ** line-no := <ULEB128> -** trace-addr := <ULEB128> ** trace-no := <ULEB128> +** trace-addr := <ULEB128> ** oaddr := <ULEB128> ** naddr := <ULEB128> ** osize := <ULEB128> diff --git a/test/tarantool-tests/misclib-memprof-lapi.test.lua b/test/tarantool-tests/misclib-memprof-lapi.test.lua index f84b6df0..b9edb80d 100644 --- a/test/tarantool-tests/misclib-memprof-lapi.test.lua +++ b/test/tarantool-tests/misclib-memprof-lapi.test.lua @@ -79,15 +79,18 @@ local function generate_parsed_output(payload) end local function fill_ev_type(events, symbols, event_type) - local ev_type = {} + local ev_type = { + line = {}, + trace = {}, + } for _, event in pairs(events[event_type]) do local addr = event.loc.addr local traceno = event.loc.traceno if traceno ~= 0 and symbols.trace[traceno] then - local trace_loc = symbols.trace[traceno].sym_loc + local trace_loc = symbols.trace[traceno].start addr = trace_loc.addr - ev_type[trace_loc.line] = { + ev_type.trace[traceno] = { name = string.format("TRACE [%d] %s:%d", traceno, symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined ), @@ -97,9 +100,9 @@ local function fill_ev_type(events, symbols, event_type) ev_type.INTERNAL = { name = "INTERNAL", num = event.num, - } + } elseif symbols.lfunc[addr] then - ev_type[event.loc.line] = { + ev_type.line[event.loc.line] = { name = string.format( "%s:%d", symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined ), @@ -115,14 +118,14 @@ local function form_source_line(line) end local function check_alloc_report(alloc, traceno, line, function_line, nevents) - assert(alloc[line], ("no event on line %d"):format(line)) - local event = alloc[line] - local expected_name + local expected_name, event if traceno ~= 0 then expected_name = string.format("TRACE [%d] ", traceno).. form_source_line(function_line) + event = alloc.trace[traceno] else expected_name = form_source_line(function_line) + event = alloc.line[line] end assert(expected_name == event.name, ("got='%s', expected='%s'"):format( event.name, @@ -136,7 +139,7 @@ local function check_alloc_report(alloc, traceno, line, function_line, nevents) end -- Test profiler API. -test:test("base", function(subtest) +test:test("smoke", function(subtest) subtest:plan(6) -- Not a directory. @@ -169,12 +172,12 @@ test:test("output", function(subtest) local alloc = fill_ev_type(events, symbols, "alloc") local free = fill_ev_type(events, symbols, "free") - -- Check allocation reports. The second argument is a line number - -- of the allocation event itself. The third is a line number of - -- the corresponding function definition. The last one is - -- the number of allocations. - -- 1 event - alocation of table by itself + 1 allocation - -- of array part as far it is bigger than LJ_MAX_COLOSIZE (16). + -- Check allocation reports. The second argument is a line + -- number of the allocation event itself. The third is a line + -- number of the corresponding function definition. The last + -- one is the number of allocations. 1 event - alocation of + -- table by itself + 1 allocation of array part as far it is + -- bigger than LJ_MAX_COLOSIZE (16). subtest:ok(check_alloc_report(alloc, 0, 34, 32, 2)) -- 20 strings allocations. subtest:ok(check_alloc_report(alloc, 0, 39, 32, 20)) @@ -202,8 +205,8 @@ test:test("stack-resize", function(subtest) -- We need to cause stack resize for local variables at function -- call. Let's create a new coroutine (all slots are free). -- It has 1 slot for dummy frame + 39 free slots + 5 extra slots - -- (so-called red zone) + 2 * LJ_FR2 slots. So 50 local variables - -- is enough. + -- (so-called red zone) + 2 * LJ_FR2 slots. So 50 local + -- variables is enough. local payload_str = "" for i = 1, 50 do payload_str = payload_str..("local v%d = %d\n"):format(i, i) diff --git a/tools/memprof.lua b/tools/memprof.lua index 18b44fdd..760122fc 100644 --- a/tools/memprof.lua +++ b/tools/memprof.lua @@ -104,8 +104,8 @@ local function dump(inputfile) if not leak_only then view.profile_info(events, symbols) end - local dheap = process.form_heap_delta(events, symbols) - view.leak_info(dheap) + local dheap = process.form_heap_delta(events) + view.leak_info(dheap, symbols) os.exit(0) end diff --git a/tools/memprof/humanize.lua b/tools/memprof/humanize.lua index 800a465e..7d30f976 100644 --- a/tools/memprof/humanize.lua +++ b/tools/memprof/humanize.lua @@ -7,6 +7,23 @@ local symtab = require "utils.symtab" local M = {} +function M.describe_location(symbols, loc) + if loc.traceno == 0 then + return symtab.demangle(symbols, loc) + end + + local trace = symbols.trace[loc.traceno] + + -- If trace, which was remembered in the symtab, has not + -- been flushed, assotiate it with a proto, where trace + -- recording started. + if trace and trace.addr == loc.addr then + return symtab.demangle(symbols, loc).." started at ".. + symtab.demangle(symbols, trace.start) + end + return symtab.demangle(symbols, loc) +end + function M.render(events, symbols) local ids = {} @@ -56,13 +73,16 @@ function M.profile_info(events, symbols) print("") end -function M.leak_info(dheap) +function M.leak_info(dheap, symbols) local leaks = {} - for line, info in pairs(dheap) do + for _, info in pairs(dheap) do -- Report "INTERNAL" events inconsistencies for profiling -- with enabled jit. if info.dbytes > 0 then - table.insert(leaks, {line = line, dbytes = info.dbytes}) + table.insert(leaks, { + line = M.describe_location(symbols, info.loc), + dbytes = info.dbytes + }) end end @@ -81,21 +101,4 @@ function M.leak_info(dheap) print("") end -function M.describe_location(symbols, loc) - if loc.traceno == 0 then - return symtab.demangle(symbols, loc) - end - - local trace = symbols.trace[loc.traceno] - - -- If trace, which was remembered in the symtab, has not - -- been flushed, assotiate it with a proto, where trace - -- recording started. - if trace and trace.addr == loc.addr then - return symtab.demangle(symbols, loc).." started at ".. - symtab.demangle(symbols, trace.sym_loc) - end - return symtab.demangle(symbols, loc) -end - return M diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua index adc7c072..968fd90e 100644 --- a/tools/memprof/parse.lua +++ b/tools/memprof/parse.lua @@ -8,6 +8,8 @@ local bit = require "bit" local band = bit.band local lshift = bit.lshift +local symtab = require "utils.symtab" + local string_format = string.format local LJM_MAGIC = "ljm" @@ -62,25 +64,24 @@ local function link_to_previous(heap_chunk, e, nsize) end end -local function id_location(addr, line, traceno) - return string_format("f%#xl%dt%d", addr, line, traceno), { - addr = addr, - line = line, - traceno = traceno, - } -end - local function parse_location(reader, asource) - if asource == ASOURCE_INT then - return id_location(0, 0, 0) - elseif asource == ASOURCE_CFUNC then - return id_location(reader:read_uleb128(), 0, 0) + local loc = { + addr = 0, + line = 0, + traceno = 0, + } + if asource == ASOURCE_CFUNC then + loc.addr = reader:read_uleb128() elseif asource == ASOURCE_LFUNC then - return id_location(reader:read_uleb128(), reader:read_uleb128(), 0) + loc.addr = reader:read_uleb128() + loc.line = reader:read_uleb128() elseif asource == ASOURCE_TRACE then - return id_location(reader:read_uleb128(), 0, reader:read_uleb128()) + loc.traceno = reader:read_uleb128() + loc.addr = reader:read_uleb128() + elseif asource ~= ASOURCE_INT then + error("Unknown asource "..asource) end - error("Unknown asource "..asource) + return symtab.id(loc), loc end local function parse_alloc(reader, asource, events, heap) diff --git a/tools/memprof/process.lua b/tools/memprof/process.lua index f277ed84..360f6cc4 100644 --- a/tools/memprof/process.lua +++ b/tools/memprof/process.lua @@ -2,9 +2,9 @@ local M = {} -local humanize = require "memprof.humanize" +local symtab = require "utils.symtab" -function M.form_heap_delta(events, symbols) +function M.form_heap_delta(events) -- Auto resurrects source event lines for counting/reporting. local dheap = setmetatable({}, {__index = function(t, line) rawset(t, line, { @@ -17,11 +17,12 @@ function M.form_heap_delta(events, symbols) for _, event in pairs(events.alloc) do if event.loc then - local ev_line = humanize.describe_location(symbols, event.loc) + local ev_line = symtab.id(event.loc) if (event.alloc > 0) then dheap[ev_line].dbytes = dheap[ev_line].dbytes + event.alloc dheap[ev_line].nalloc = dheap[ev_line].nalloc + event.num + dheap[ev_line].loc = event.loc end end end @@ -37,16 +38,18 @@ function M.form_heap_delta(events, symbols) -- that references the table with memory changed -- (may be empty). for _, heap_chunk in pairs(event.primary) do - local ev_line = humanize.describe_location(symbols, heap_chunk.loc) + local ev_line = symtab.id(heap_chunk.loc) if (heap_chunk.alloced > 0) then dheap[ev_line].dbytes = dheap[ev_line].dbytes + heap_chunk.alloced dheap[ev_line].nalloc = dheap[ev_line].nalloc + heap_chunk.count + dheap[ev_line].loc = heap_chunk.loc end if (heap_chunk.freed > 0) then dheap[ev_line].dbytes = dheap[ev_line].dbytes - heap_chunk.freed dheap[ev_line].nfree = dheap[ev_line].nfree + heap_chunk.count + dheap[ev_line].loc = heap_chunk.loc end end end diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua index 0e742ee1..496d8480 100644 --- a/tools/utils/symtab.lua +++ b/tools/utils/symtab.lua @@ -32,14 +32,14 @@ local function parse_sym_lfunc(reader, symtab) end local function parse_sym_trace(reader, symtab) - local trace_addr = reader:read_uleb128() local traceno = reader:read_uleb128() + local trace_addr = reader:read_uleb128() local sym_addr = reader:read_uleb128() local sym_line = reader:read_uleb128() symtab.trace[traceno] = { addr = trace_addr, - sym_loc = { + start = { addr = sym_addr, line = sym_line, traceno = 0, @@ -94,19 +94,23 @@ function M.parse(reader) return symtab end +function M.id(loc) + return string_format("f%#xl%dt%d", loc.addr, loc.line, loc.traceno) +end + local function demangle_lfunc(symtab, loc) local addr = loc.addr if addr == 0 then return "INTERNAL" elseif symtab.lfunc[addr] then - return string_format("%s:%d", symtab.lfunc[loc.addr].source, loc.line) + return string_format("%s:%d", symtab.lfunc[addr].source, loc.line) end return string_format("CFUNC %#x", addr) end local function demangle_trace(loc) - return string_format("TRACE [%d] 0x%x", loc.traceno, loc.addr) + return string_format("TRACE [%d] %#x", loc.traceno, loc.addr) end function M.demangle(symtab, loc) -- 2.33.0
next prev parent reply other threads:[~2021-09-29 20:08 UTC|newest] Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-08-20 7:05 [Tarantool-patches] [PATCH luajit v3 0/4] memprof: group allocations on traces by trace number Mikhail Shishatskiy via Tarantool-patches 2021-08-20 7:05 ` [Tarantool-patches] [PATCH luajit v3 1/5] core: add const to lj_debug_line proto parameter Mikhail Shishatskiy via Tarantool-patches 2021-09-16 15:29 ` Igor Munkin via Tarantool-patches 2021-08-20 7:05 ` [Tarantool-patches] [PATCH luajit v3 2/5] test: separate memprof Lua API tests into subtests Mikhail Shishatskiy via Tarantool-patches 2021-09-16 15:29 ` Igor Munkin via Tarantool-patches 2021-08-20 7:05 ` [Tarantool-patches] [PATCH luajit v3 3/5] memprof: dump traceno if allocate from trace Mikhail Shishatskiy via Tarantool-patches 2021-09-16 15:32 ` Igor Munkin via Tarantool-patches 2021-09-29 19:21 ` Mikhail Shishatskiy via Tarantool-patches 2021-08-20 7:05 ` [Tarantool-patches] [PATCH luajit v3 4/5] memprof: extend symtab with info about traces Mikhail Shishatskiy via Tarantool-patches 2021-09-16 15:32 ` Igor Munkin via Tarantool-patches 2021-09-29 19:21 ` Mikhail Shishatskiy via Tarantool-patches 2021-08-20 7:05 ` [Tarantool-patches] [PATCH luajit v3 5/5] luajit: change order of modules Mikhail Shishatskiy via Tarantool-patches 2021-09-16 15:32 ` Igor Munkin via Tarantool-patches 2021-09-29 20:07 ` Mikhail Shishatskiy via Tarantool-patches [this message] 2021-09-29 20:07 ` [Tarantool-patches] [PATCH luajit v4 1/4] test: separate memprof Lua API tests into subtests Mikhail Shishatskiy via Tarantool-patches 2021-10-27 13:56 ` Igor Munkin via Tarantool-patches 2021-10-27 15:07 ` Sergey Kaplun via Tarantool-patches 2021-09-29 20:07 ` [Tarantool-patches] [PATCH luajit v4 2/4] memprof: refactor location parsing Mikhail Shishatskiy via Tarantool-patches 2021-10-27 13:56 ` Igor Munkin via Tarantool-patches [not found] ` <20211104130010.mcvnra6e4yl5moo2@surf.localdomain> 2021-11-10 15:38 ` Igor Munkin via Tarantool-patches 2021-09-29 20:07 ` [Tarantool-patches] [PATCH luajit v4 3/4] memprof: group allocations on traces by traceno Mikhail Shishatskiy via Tarantool-patches 2021-10-27 13:56 ` Igor Munkin via Tarantool-patches [not found] ` <20211104130156.f2botlihlfhwd3yh@surf.localdomain> 2021-11-11 15:34 ` Igor Munkin via Tarantool-patches 2021-09-29 20:07 ` [Tarantool-patches] [PATCH luajit v4 4/4] memprof: add info about trace start to symtab Mikhail Shishatskiy via Tarantool-patches 2021-11-01 16:31 ` Igor Munkin via Tarantool-patches [not found] ` <20211104130228.x6qcne5xeh544hm7@surf.localdomain> 2021-11-12 13:34 ` Igor Munkin via Tarantool-patches 2021-11-17 8:17 ` Sergey Kaplun via Tarantool-patches 2021-11-22 15:11 ` Mikhail Shishatskiy via Tarantool-patches 2021-11-24 12:42 ` Mikhail Shishatskiy via Tarantool-patches 2021-11-24 16:44 ` Igor Munkin via Tarantool-patches 2022-01-27 23:29 ` [Tarantool-patches] [PATCH luajit v4 0/4] memprof: group allocations on traces by traceno Igor Munkin via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210929200758.149446-1-m.shishatskiy@tarantool.org \ --to=tarantool-patches@dev.tarantool.org \ --cc=imun@tarantool.org \ --cc=m.shishatskiy@tarantool.org \ --cc=skaplun@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH luajit v4 0/4] memprof: group allocations on traces by traceno' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox