[Tarantool-patches] [PATCH luajit v4 0/4] memprof: group allocations on traces by traceno
Mikhail Shishatskiy
m.shishatskiy at tarantool.org
Wed Sep 29 23:07:54 MSK 2021
New patch series v4 with fixes as per review by Igor Munkin:
Changes in v4:
* refactored the modules in order not to violate MVC: now
process module uses the symtab.id(loc) as a key (but not the
fully demangled name)
* reorganized patches: now the first important patch has the
location dumping and simple parsing without the trace start
info; the second patch adds the information about the trace's
start and extends the symtab.
@ChangeLog
======================================================================
##feature/luajit
* Now memory profiler records allocations from traces. The memory
profiler parser can display new type of allocation sources in
the following format:
```
| TRACE [<trace-no>] <trace-addr> started at @<sym-chunk>:<sym-line>
```
======================================================================
Mikhail Shishatskiy (4):
test: separate memprof Lua API tests into subtests
memprof: refactor location parsing
memprof: group allocations on traces by traceno
memprof: add info about trace start to symtab
src/Makefile.dep.original | 3 +-
src/lj_memprof.c | 79 +++++-
src/lj_memprof.h | 22 +-
.../misclib-memprof-lapi.test.lua | 261 ++++++++++++------
tools/memprof.lua | 4 +-
tools/memprof/humanize.lua | 30 +-
tools/memprof/parse.lua | 39 +--
tools/memprof/process.lua | 9 +-
tools/utils/symtab.lua | 49 +++-
9 files changed, 364 insertions(+), 132 deletions(-)
diff --git a/src/Makefile.dep.original b/src/Makefile.dep.original
index ee6bafb2..faa44a0b 100644
--- a/src/Makefile.dep.original
+++ b/src/Makefile.dep.original
@@ -146,7 +146,8 @@ lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
lj_memprof.o: lj_memprof.c lj_arch.h lua.h luaconf.h lj_memprof.h \
- lj_def.h lj_wbuf.h lj_obj.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h
+ lj_def.h lj_wbuf.h lj_obj.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
+ lj_jit.h lj_ir.h
lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
diff --git a/src/lj_debug.c b/src/lj_debug.c
index 58b75dae..bb9ab288 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -110,7 +110,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
/* -- Line numbers -------------------------------------------------------- */
/* Get line number for a bytecode position. */
-BCLine LJ_FASTCALL lj_debug_line(const GCproto *pt, BCPos pc)
+BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc)
{
const void *lineinfo = proto_lineinfo(pt);
if (pc <= pt->sizebc && lineinfo) {
diff --git a/src/lj_debug.h b/src/lj_debug.h
index 7814c588..a157d284 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -27,7 +27,7 @@ typedef struct lj_Debug {
} lj_Debug;
LJ_FUNC cTValue *lj_debug_frame(lua_State *L, int level, int *size);
-LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(const GCproto *pt, BCPos pc);
+LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc);
LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx);
LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp);
LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
diff --git a/src/lj_memprof.c b/src/lj_memprof.c
index fc5bc301..e8b2ebbc 100644
--- a/src/lj_memprof.c
+++ b/src/lj_memprof.c
@@ -32,8 +32,8 @@ static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION,
static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
{
- const GCproto *pt = &gcref(trace->startpt)->pt;
- BCLine lineno = -1;
+ GCproto *pt = &gcref(trace->startpt)->pt;
+ BCLine lineno = 0;
const BCIns *startpc = mref(trace->startpc, const BCIns);
lua_assert(startpc >= proto_bc(pt) &&
@@ -43,11 +43,14 @@ static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
lua_assert(lineno >= 0);
lj_wbuf_addbyte(out, SYMTAB_TRACE);
- lj_wbuf_addu64(out, (uint64_t)trace->mcode);
lj_wbuf_addu64(out, (uint64_t)trace->traceno);
+ lj_wbuf_addu64(out, (uint64_t)trace->mcode);
/*
- ** All the existing prototypes have already been dumped, so we do not
- ** need to repeat their dump for trace locations.
+ ** The information about the prototype, associated with the
+ ** trace's start has already been dumped, as it is anchored
+ ** via the trace and is not collected while the trace is alive.
+ ** For this reason, we do not need to repeat dumping the chunk
+ ** name for the prototype.
*/
lj_wbuf_addu64(out, (uintptr_t)pt);
lj_wbuf_addu64(out, (uint64_t)lineno);
@@ -200,8 +203,8 @@ static void memprof_write_trace(struct memprof *mp, uint8_t aevent)
const TraceNo traceno = g->vmstate;
const GCtrace *trace = traceref(J, traceno);
lj_wbuf_addbyte(out, aevent | ASOURCE_TRACE);
- lj_wbuf_addu64(out, (uintptr_t)trace->mcode);
lj_wbuf_addu64(out, (uint64_t)traceno);
+ lj_wbuf_addu64(out, (uintptr_t)trace->mcode);
}
#else
@@ -238,8 +241,9 @@ static const memprof_writer memprof_writers[] = {
** behaviour of Lua and JITted code must match 1:1 in terms of allocations,
** which makes using memprof with enabled JIT virtually redundant.
** But if one wants to investigate allocations with JIT enabled,
- ** memprof_write_trace() dumps trace number to the binary output.
- ** It can be useful to compare with with jit.v or jit.dump outputs.
+ ** memprof_write_trace() dumps trace number and mcode starting address
+ ** to the binary output. It can be useful to compare with with jit.v or
+ ** jit.dump outputs.
*/
memprof_write_trace /* LJ_VMST_TRACE */
};
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
index e72dadf7..395fb429 100644
--- a/src/lj_memprof.h
+++ b/src/lj_memprof.h
@@ -27,14 +27,14 @@
** reserved := <BYTE> <BYTE> <BYTE>
** sym := sym-lua | sym-trace | sym-final
** sym-lua := sym-header sym-addr sym-chunk sym-line
-** sym-trace := sym-header trace-addr trace-no sym-addr sym-line
+** sym-trace := sym-header trace-no trace-addr sym-addr sym-line
** sym-header := <BYTE>
** sym-addr := <ULEB128>
** sym-chunk := string
** sym-line := <ULEB128>
** sym-final := sym-header
-** trace-addr := <ULEB128>
** trace-no := <ULEB128>
+** trace-addr := <ULEB128>
** string := string-len string-payload
** string-len := <ULEB128>
** string-payload := <BYTE> {string-len}
@@ -76,11 +76,11 @@
** loc := loc-lua | loc-c | loc-trace
** loc-lua := sym-addr line-no
** loc-c := sym-addr
-** loc-trace := trace-addr trace-no
+** loc-trace := trace-no trace-addr
** sym-addr := <ULEB128>
** line-no := <ULEB128>
-** trace-addr := <ULEB128>
** trace-no := <ULEB128>
+** trace-addr := <ULEB128>
** oaddr := <ULEB128>
** naddr := <ULEB128>
** osize := <ULEB128>
diff --git a/test/tarantool-tests/misclib-memprof-lapi.test.lua b/test/tarantool-tests/misclib-memprof-lapi.test.lua
index f84b6df0..b9edb80d 100644
--- a/test/tarantool-tests/misclib-memprof-lapi.test.lua
+++ b/test/tarantool-tests/misclib-memprof-lapi.test.lua
@@ -79,15 +79,18 @@ local function generate_parsed_output(payload)
end
local function fill_ev_type(events, symbols, event_type)
- local ev_type = {}
+ local ev_type = {
+ line = {},
+ trace = {},
+ }
for _, event in pairs(events[event_type]) do
local addr = event.loc.addr
local traceno = event.loc.traceno
if traceno ~= 0 and symbols.trace[traceno] then
- local trace_loc = symbols.trace[traceno].sym_loc
+ local trace_loc = symbols.trace[traceno].start
addr = trace_loc.addr
- ev_type[trace_loc.line] = {
+ ev_type.trace[traceno] = {
name = string.format("TRACE [%d] %s:%d",
traceno, symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined
),
@@ -97,9 +100,9 @@ local function fill_ev_type(events, symbols, event_type)
ev_type.INTERNAL = {
name = "INTERNAL",
num = event.num,
- }
+ }
elseif symbols.lfunc[addr] then
- ev_type[event.loc.line] = {
+ ev_type.line[event.loc.line] = {
name = string.format(
"%s:%d", symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined
),
@@ -115,14 +118,14 @@ local function form_source_line(line)
end
local function check_alloc_report(alloc, traceno, line, function_line, nevents)
- assert(alloc[line], ("no event on line %d"):format(line))
- local event = alloc[line]
- local expected_name
+ local expected_name, event
if traceno ~= 0 then
expected_name = string.format("TRACE [%d] ", traceno)..
form_source_line(function_line)
+ event = alloc.trace[traceno]
else
expected_name = form_source_line(function_line)
+ event = alloc.line[line]
end
assert(expected_name == event.name, ("got='%s', expected='%s'"):format(
event.name,
@@ -136,7 +139,7 @@ local function check_alloc_report(alloc, traceno, line, function_line, nevents)
end
-- Test profiler API.
-test:test("base", function(subtest)
+test:test("smoke", function(subtest)
subtest:plan(6)
-- Not a directory.
@@ -169,12 +172,12 @@ test:test("output", function(subtest)
local alloc = fill_ev_type(events, symbols, "alloc")
local free = fill_ev_type(events, symbols, "free")
- -- Check allocation reports. The second argument is a line number
- -- of the allocation event itself. The third is a line number of
- -- the corresponding function definition. The last one is
- -- the number of allocations.
- -- 1 event - alocation of table by itself + 1 allocation
- -- of array part as far it is bigger than LJ_MAX_COLOSIZE (16).
+ -- Check allocation reports. The second argument is a line
+ -- number of the allocation event itself. The third is a line
+ -- number of the corresponding function definition. The last
+ -- one is the number of allocations. 1 event - alocation of
+ -- table by itself + 1 allocation of array part as far it is
+ -- bigger than LJ_MAX_COLOSIZE (16).
subtest:ok(check_alloc_report(alloc, 0, 34, 32, 2))
-- 20 strings allocations.
subtest:ok(check_alloc_report(alloc, 0, 39, 32, 20))
@@ -202,8 +205,8 @@ test:test("stack-resize", function(subtest)
-- We need to cause stack resize for local variables at function
-- call. Let's create a new coroutine (all slots are free).
-- It has 1 slot for dummy frame + 39 free slots + 5 extra slots
- -- (so-called red zone) + 2 * LJ_FR2 slots. So 50 local variables
- -- is enough.
+ -- (so-called red zone) + 2 * LJ_FR2 slots. So 50 local
+ -- variables is enough.
local payload_str = ""
for i = 1, 50 do
payload_str = payload_str..("local v%d = %d\n"):format(i, i)
diff --git a/tools/memprof.lua b/tools/memprof.lua
index 18b44fdd..760122fc 100644
--- a/tools/memprof.lua
+++ b/tools/memprof.lua
@@ -104,8 +104,8 @@ local function dump(inputfile)
if not leak_only then
view.profile_info(events, symbols)
end
- local dheap = process.form_heap_delta(events, symbols)
- view.leak_info(dheap)
+ local dheap = process.form_heap_delta(events)
+ view.leak_info(dheap, symbols)
os.exit(0)
end
diff --git a/tools/memprof/humanize.lua b/tools/memprof/humanize.lua
index 800a465e..7d30f976 100644
--- a/tools/memprof/humanize.lua
+++ b/tools/memprof/humanize.lua
@@ -7,6 +7,23 @@ local symtab = require "utils.symtab"
local M = {}
+function M.describe_location(symbols, loc)
+ if loc.traceno == 0 then
+ return symtab.demangle(symbols, loc)
+ end
+
+ local trace = symbols.trace[loc.traceno]
+
+ -- If trace, which was remembered in the symtab, has not
+ -- been flushed, assotiate it with a proto, where trace
+ -- recording started.
+ if trace and trace.addr == loc.addr then
+ return symtab.demangle(symbols, loc).." started at "..
+ symtab.demangle(symbols, trace.start)
+ end
+ return symtab.demangle(symbols, loc)
+end
+
function M.render(events, symbols)
local ids = {}
@@ -56,13 +73,16 @@ function M.profile_info(events, symbols)
print("")
end
-function M.leak_info(dheap)
+function M.leak_info(dheap, symbols)
local leaks = {}
- for line, info in pairs(dheap) do
+ for _, info in pairs(dheap) do
-- Report "INTERNAL" events inconsistencies for profiling
-- with enabled jit.
if info.dbytes > 0 then
- table.insert(leaks, {line = line, dbytes = info.dbytes})
+ table.insert(leaks, {
+ line = M.describe_location(symbols, info.loc),
+ dbytes = info.dbytes
+ })
end
end
@@ -81,21 +101,4 @@ function M.leak_info(dheap)
print("")
end
-function M.describe_location(symbols, loc)
- if loc.traceno == 0 then
- return symtab.demangle(symbols, loc)
- end
-
- local trace = symbols.trace[loc.traceno]
-
- -- If trace, which was remembered in the symtab, has not
- -- been flushed, assotiate it with a proto, where trace
- -- recording started.
- if trace and trace.addr == loc.addr then
- return symtab.demangle(symbols, loc).." started at "..
- symtab.demangle(symbols, trace.sym_loc)
- end
- return symtab.demangle(symbols, loc)
-end
-
return M
diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
index adc7c072..968fd90e 100644
--- a/tools/memprof/parse.lua
+++ b/tools/memprof/parse.lua
@@ -8,6 +8,8 @@ local bit = require "bit"
local band = bit.band
local lshift = bit.lshift
+local symtab = require "utils.symtab"
+
local string_format = string.format
local LJM_MAGIC = "ljm"
@@ -62,25 +64,24 @@ local function link_to_previous(heap_chunk, e, nsize)
end
end
-local function id_location(addr, line, traceno)
- return string_format("f%#xl%dt%d", addr, line, traceno), {
- addr = addr,
- line = line,
- traceno = traceno,
- }
-end
-
local function parse_location(reader, asource)
- if asource == ASOURCE_INT then
- return id_location(0, 0, 0)
- elseif asource == ASOURCE_CFUNC then
- return id_location(reader:read_uleb128(), 0, 0)
+ local loc = {
+ addr = 0,
+ line = 0,
+ traceno = 0,
+ }
+ if asource == ASOURCE_CFUNC then
+ loc.addr = reader:read_uleb128()
elseif asource == ASOURCE_LFUNC then
- return id_location(reader:read_uleb128(), reader:read_uleb128(), 0)
+ loc.addr = reader:read_uleb128()
+ loc.line = reader:read_uleb128()
elseif asource == ASOURCE_TRACE then
- return id_location(reader:read_uleb128(), 0, reader:read_uleb128())
+ loc.traceno = reader:read_uleb128()
+ loc.addr = reader:read_uleb128()
+ elseif asource ~= ASOURCE_INT then
+ error("Unknown asource "..asource)
end
- error("Unknown asource "..asource)
+ return symtab.id(loc), loc
end
local function parse_alloc(reader, asource, events, heap)
diff --git a/tools/memprof/process.lua b/tools/memprof/process.lua
index f277ed84..360f6cc4 100644
--- a/tools/memprof/process.lua
+++ b/tools/memprof/process.lua
@@ -2,9 +2,9 @@
local M = {}
-local humanize = require "memprof.humanize"
+local symtab = require "utils.symtab"
-function M.form_heap_delta(events, symbols)
+function M.form_heap_delta(events)
-- Auto resurrects source event lines for counting/reporting.
local dheap = setmetatable({}, {__index = function(t, line)
rawset(t, line, {
@@ -17,11 +17,12 @@ function M.form_heap_delta(events, symbols)
for _, event in pairs(events.alloc) do
if event.loc then
- local ev_line = humanize.describe_location(symbols, event.loc)
+ local ev_line = symtab.id(event.loc)
if (event.alloc > 0) then
dheap[ev_line].dbytes = dheap[ev_line].dbytes + event.alloc
dheap[ev_line].nalloc = dheap[ev_line].nalloc + event.num
+ dheap[ev_line].loc = event.loc
end
end
end
@@ -37,16 +38,18 @@ function M.form_heap_delta(events, symbols)
-- that references the table with memory changed
-- (may be empty).
for _, heap_chunk in pairs(event.primary) do
- local ev_line = humanize.describe_location(symbols, heap_chunk.loc)
+ local ev_line = symtab.id(heap_chunk.loc)
if (heap_chunk.alloced > 0) then
dheap[ev_line].dbytes = dheap[ev_line].dbytes + heap_chunk.alloced
dheap[ev_line].nalloc = dheap[ev_line].nalloc + heap_chunk.count
+ dheap[ev_line].loc = heap_chunk.loc
end
if (heap_chunk.freed > 0) then
dheap[ev_line].dbytes = dheap[ev_line].dbytes - heap_chunk.freed
dheap[ev_line].nfree = dheap[ev_line].nfree + heap_chunk.count
+ dheap[ev_line].loc = heap_chunk.loc
end
end
end
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index 0e742ee1..496d8480 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -32,14 +32,14 @@ local function parse_sym_lfunc(reader, symtab)
end
local function parse_sym_trace(reader, symtab)
- local trace_addr = reader:read_uleb128()
local traceno = reader:read_uleb128()
+ local trace_addr = reader:read_uleb128()
local sym_addr = reader:read_uleb128()
local sym_line = reader:read_uleb128()
symtab.trace[traceno] = {
addr = trace_addr,
- sym_loc = {
+ start = {
addr = sym_addr,
line = sym_line,
traceno = 0,
@@ -94,19 +94,23 @@ function M.parse(reader)
return symtab
end
+function M.id(loc)
+ return string_format("f%#xl%dt%d", loc.addr, loc.line, loc.traceno)
+end
+
local function demangle_lfunc(symtab, loc)
local addr = loc.addr
if addr == 0 then
return "INTERNAL"
elseif symtab.lfunc[addr] then
- return string_format("%s:%d", symtab.lfunc[loc.addr].source, loc.line)
+ return string_format("%s:%d", symtab.lfunc[addr].source, loc.line)
end
return string_format("CFUNC %#x", addr)
end
local function demangle_trace(loc)
- return string_format("TRACE [%d] 0x%x", loc.traceno, loc.addr)
+ return string_format("TRACE [%d] %#x", loc.traceno, loc.addr)
end
function M.demangle(symtab, loc)
--
2.33.0
More information about the Tarantool-patches
mailing list