[Tarantool-patches] [PATCH luajit v3 4/5] memprof: extend symtab with info about traces
Mikhail Shishatskiy
m.shishatskiy at tarantool.org
Fri Aug 20 10:05:45 MSK 2021
This patch extends the memprof symtab by adding information
about traces, which are present at the start of profiling.
The symtab format changed with adding <sym-trace> entry,
which consists of a trace address, trace number, address
of function proto, and line, where trace recording started:
| sym-trace := sym-header trace-addr trace-no sym-addr sym-line
| trace-addr := <ULEB128>
| trace-no := <ULEB128>
Also, the memprof parser is adjusted to recognize new
symtab entries and associate them with allocation events
from traces.
With adding traces to the symbol table the API of <utils/symtab.lua>
changed: now table with symbols contains two tables: `lfunc` for
Lua functions symbols and `trace` for trace entries.
As trace can be associated with a function proto, <utils/humanize.lua>
module extended with a function `describe_location` intended
to be used instead of symtab.demangle. The reason is that the
location name can be more than demangled `loc`: for example,
when getting trace location name, we want to assotiate the
trace with a function proto, where trace recording started.
On the one hand, we want to get a verbose trace description
with demangled proto location:
| TRACE [<trace-no>] <trace-addr> started at @<proto-name>:<proto-line>
On the other hand, idiomatically, this should be done not by
the demangler module but by the humanizer module.
Resolves tarantool/tarantool#5814
---
Issue: https://github.com/tarantool/tarantool/issues/5814
Luajit branch: https://github.com/tarantool/luajit/tree/shishqa/gh-5814-group-allocations-on-trace-by-trace-number
tarantool branch: https://github.com/tarantool/tarantool/tree/shishqa/gh-5814-group-allocations-on-trace-by-trace-number
src/lj_memprof.c | 40 ++++++
src/lj_memprof.h | 7 +-
.../misclib-memprof-lapi.test.lua | 135 +++++++++++++-----
tools/memprof/humanize.lua | 21 ++-
tools/memprof/process.lua | 6 +-
tools/utils/symtab.lua | 45 ++++--
6 files changed, 206 insertions(+), 48 deletions(-)
diff --git a/src/lj_memprof.c b/src/lj_memprof.c
index fb99829d..fc5bc301 100644
--- a/src/lj_memprof.c
+++ b/src/lj_memprof.c
@@ -28,6 +28,42 @@
static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION,
0x0, 0x0, 0x0};
+#if LJ_HASJIT
+
+static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
+{
+ const GCproto *pt = &gcref(trace->startpt)->pt;
+ BCLine lineno = -1;
+
+ const BCIns *startpc = mref(trace->startpc, const BCIns);
+ lua_assert(startpc >= proto_bc(pt) &&
+ startpc < proto_bc(pt) + pt->sizebc);
+
+ lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
+ lua_assert(lineno >= 0);
+
+ lj_wbuf_addbyte(out, SYMTAB_TRACE);
+ lj_wbuf_addu64(out, (uint64_t)trace->mcode);
+ lj_wbuf_addu64(out, (uint64_t)trace->traceno);
+ /*
+ ** All the existing prototypes have already been dumped, so we do not
+ ** need to repeat their dump for trace locations.
+ */
+ lj_wbuf_addu64(out, (uintptr_t)pt);
+ lj_wbuf_addu64(out, (uint64_t)lineno);
+}
+
+#else
+
+static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
+{
+ UNUSED(out);
+ UNUSED(trace);
+ lua_assert(0);
+}
+
+#endif
+
static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
{
const GCRef *iter = &g->gc.root;
@@ -47,6 +83,10 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
lj_wbuf_addu64(out, (uint64_t)pt->firstline);
break;
}
+ case (~LJ_TTRACE): {
+ dump_symtab_trace(out, gco2trace(o));
+ break;
+ }
default:
break;
}
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
index 6a35385d..e72dadf7 100644
--- a/src/lj_memprof.h
+++ b/src/lj_memprof.h
@@ -16,7 +16,7 @@
#include "lj_def.h"
#include "lj_wbuf.h"
-#define LJS_CURRENT_VERSION 0x1
+#define LJS_CURRENT_VERSION 0x2
/*
** symtab format:
@@ -25,13 +25,16 @@
** prologue := 'l' 'j' 's' version reserved
** version := <BYTE>
** reserved := <BYTE> <BYTE> <BYTE>
-** sym := sym-lua | sym-final
+** sym := sym-lua | sym-trace | sym-final
** sym-lua := sym-header sym-addr sym-chunk sym-line
+** sym-trace := sym-header trace-addr trace-no sym-addr sym-line
** sym-header := <BYTE>
** sym-addr := <ULEB128>
** sym-chunk := string
** sym-line := <ULEB128>
** sym-final := sym-header
+** trace-addr := <ULEB128>
+** trace-no := <ULEB128>
** string := string-len string-payload
** string-len := <ULEB128>
** string-payload := <BYTE> {string-len}
diff --git a/test/tarantool-tests/misclib-memprof-lapi.test.lua b/test/tarantool-tests/misclib-memprof-lapi.test.lua
index dbf384ed..f84b6df0 100644
--- a/test/tarantool-tests/misclib-memprof-lapi.test.lua
+++ b/test/tarantool-tests/misclib-memprof-lapi.test.lua
@@ -7,7 +7,14 @@ require("utils").skipcond(
local tap = require("tap")
local test = tap.test("misc-memprof-lapi")
-test:plan(3)
+test:plan(4)
+
+local jit_opt_default = {
+ 3, -- level
+ "hotloop=56",
+ "hotexit=10",
+ "minstitch=0",
+}
jit.off()
jit.flush()
@@ -22,12 +29,12 @@ local symtab = require "utils.symtab"
local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.memprofdata.tmp.bin")
local BAD_PATH = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%1/memprofdata.tmp.bin")
-local function payload()
+local function default_payload()
-- Preallocate table to avoid table array part reallocations.
- local _ = table_new(100, 0)
+ local _ = table_new(20, 0)
- -- Want too see 100 objects here.
- for i = 1, 100 do
+ -- Want too see 20 objects here.
+ for i = 1, 20 do
-- Try to avoid crossing with "test" module objects.
_[i] = "memprof-str-"..i
end
@@ -37,7 +44,7 @@ local function payload()
collectgarbage()
end
-local function generate_output(filename)
+local function generate_output(filename, payload)
-- Clean up all garbage to avoid pollution of free.
collectgarbage()
@@ -52,19 +59,49 @@ local function generate_output(filename)
assert(res, err)
end
+local function generate_parsed_output(payload)
+ local res, err = pcall(generate_output, TMP_BINFILE, payload)
+
+ -- Want to cleanup carefully if something went wrong.
+ if not res then
+ os.remove(TMP_BINFILE)
+ error(err)
+ end
+
+ local reader = bufread.new(TMP_BINFILE)
+ local symbols = symtab.parse(reader)
+ local events = memprof.parse(reader)
+
+ -- We don't need it any more.
+ os.remove(TMP_BINFILE)
+
+ return symbols, events
+end
+
local function fill_ev_type(events, symbols, event_type)
local ev_type = {}
for _, event in pairs(events[event_type]) do
local addr = event.loc.addr
- if addr == 0 then
+ local traceno = event.loc.traceno
+
+ if traceno ~= 0 and symbols.trace[traceno] then
+ local trace_loc = symbols.trace[traceno].sym_loc
+ addr = trace_loc.addr
+ ev_type[trace_loc.line] = {
+ name = string.format("TRACE [%d] %s:%d",
+ traceno, symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined
+ ),
+ num = event.num,
+ }
+ elseif addr == 0 then
ev_type.INTERNAL = {
name = "INTERNAL",
num = event.num,
}
- elseif symbols[addr] then
+ elseif symbols.lfunc[addr] then
ev_type[event.loc.line] = {
name = string.format(
- "%s:%d", symbols[addr].source, symbols[addr].linedefined
+ "%s:%d", symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined
),
num = event.num,
}
@@ -77,10 +114,22 @@ local function form_source_line(line)
return string.format("@%s:%d", arg[0], line)
end
-local function check_alloc_report(alloc, line, function_line, nevents)
- assert(form_source_line(function_line) == alloc[line].name)
- assert(alloc[line].num == nevents, ("got=%d, expected=%d"):format(
- alloc[line].num,
+local function check_alloc_report(alloc, traceno, line, function_line, nevents)
+ assert(alloc[line], ("no event on line %d"):format(line))
+ local event = alloc[line]
+ local expected_name
+ if traceno ~= 0 then
+ expected_name = string.format("TRACE [%d] ", traceno)..
+ form_source_line(function_line)
+ else
+ expected_name = form_source_line(function_line)
+ end
+ assert(expected_name == event.name, ("got='%s', expected='%s'"):format(
+ event.name,
+ expected_name
+ ))
+ assert(event.num == nevents, ("got=%d, expected=%d"):format(
+ event.num,
nevents
))
return true
@@ -115,20 +164,7 @@ end)
test:test("output", function(subtest)
subtest:plan(7)
- local res, err = pcall(generate_output, TMP_BINFILE)
-
- -- Want to cleanup carefully if something went wrong.
- if not res then
- os.remove(TMP_BINFILE)
- error(err)
- end
-
- local reader = bufread.new(TMP_BINFILE)
- local symbols = symtab.parse(reader)
- local events = memprof.parse(reader, symbols)
-
- -- We don't need it any more.
- os.remove(TMP_BINFILE)
+ local symbols, events = generate_parsed_output(default_payload)
local alloc = fill_ev_type(events, symbols, "alloc")
local free = fill_ev_type(events, symbols, "free")
@@ -139,18 +175,18 @@ test:test("output", function(subtest)
-- the number of allocations.
-- 1 event - alocation of table by itself + 1 allocation
-- of array part as far it is bigger than LJ_MAX_COLOSIZE (16).
- subtest:ok(check_alloc_report(alloc, 27, 25, 2))
- -- 100 strings allocations.
- subtest:ok(check_alloc_report(alloc, 32, 25, 100))
+ subtest:ok(check_alloc_report(alloc, 0, 34, 32, 2))
+ -- 20 strings allocations.
+ subtest:ok(check_alloc_report(alloc, 0, 39, 32, 20))
-- Collect all previous allocated objects.
- subtest:ok(free.INTERNAL.num == 102)
+ subtest:ok(free.INTERNAL.num == 22)
-- Tests for leak-only option.
-- See also https://github.com/tarantool/tarantool/issues/5812.
local heap_delta = process.form_heap_delta(events, symbols)
- local tab_alloc_stats = heap_delta[form_source_line(27)]
- local str_alloc_stats = heap_delta[form_source_line(32)]
+ local tab_alloc_stats = heap_delta[form_source_line(34)]
+ local str_alloc_stats = heap_delta[form_source_line(39)]
subtest:ok(tab_alloc_stats.nalloc == tab_alloc_stats.nfree)
subtest:ok(tab_alloc_stats.dbytes == 0)
subtest:ok(str_alloc_stats.nalloc == str_alloc_stats.nfree)
@@ -179,5 +215,38 @@ test:test("stack-resize", function(subtest)
misc.memprof.stop()
end)
+-- Test profiler with enabled JIT.
jit.on()
+
+test:test("jit-output", function(subtest)
+ -- Disabled on *BSD due to #4819.
+ if jit.os == 'BSD' then
+ subtest:plan(1)
+ subtest:skip('Disabled due to #4819')
+ return
+ end
+
+ subtest:plan(3)
+
+ jit.opt.start(3, "hotloop=10")
+ jit.flush()
+
+ -- Pregenerate traces to fill symtab entries in the next run.
+ default_payload()
+
+ local symbols, events = generate_parsed_output(default_payload)
+
+ local alloc = fill_ev_type(events, symbols, "alloc")
+ local free = fill_ev_type(events, symbols, "free")
+
+ -- We expect, that loop will be compiled into a trace.
+ subtest:ok(check_alloc_report(alloc, 1, 37, 32, 20))
+ -- See same checks with jit.off().
+ subtest:ok(check_alloc_report(alloc, 0, 34, 32, 2))
+ subtest:ok(free.INTERNAL.num == 22)
+
+ -- Restore default JIT settings.
+ jit.opt.start(unpack(jit_opt_default))
+end)
+
os.exit(test:check() and 0 or 1)
diff --git a/tools/memprof/humanize.lua b/tools/memprof/humanize.lua
index 7771005d..800a465e 100644
--- a/tools/memprof/humanize.lua
+++ b/tools/memprof/humanize.lua
@@ -21,7 +21,7 @@ function M.render(events, symbols)
for i = 1, #ids do
local event = events[ids[i]]
print(string.format("%s: %d events\t+%d bytes\t-%d bytes",
- symtab.demangle(symbols, event.loc),
+ M.describe_location(symbols, event.loc),
event.num,
event.alloc,
event.free
@@ -29,7 +29,7 @@ function M.render(events, symbols)
local prim_loc = {}
for _, heap_chunk in pairs(event.primary) do
- table.insert(prim_loc, symtab.demangle(symbols, heap_chunk.loc))
+ table.insert(prim_loc, M.describe_location(symbols, heap_chunk.loc))
end
if #prim_loc ~= 0 then
table.sort(prim_loc)
@@ -81,4 +81,21 @@ function M.leak_info(dheap)
print("")
end
+function M.describe_location(symbols, loc)
+ if loc.traceno == 0 then
+ return symtab.demangle(symbols, loc)
+ end
+
+ local trace = symbols.trace[loc.traceno]
+
+ -- If trace, which was remembered in the symtab, has not
+ -- been flushed, assotiate it with a proto, where trace
+ -- recording started.
+ if trace and trace.addr == loc.addr then
+ return symtab.demangle(symbols, loc).." started at "..
+ symtab.demangle(symbols, trace.sym_loc)
+ end
+ return symtab.demangle(symbols, loc)
+end
+
return M
diff --git a/tools/memprof/process.lua b/tools/memprof/process.lua
index 0bcb965b..f277ed84 100644
--- a/tools/memprof/process.lua
+++ b/tools/memprof/process.lua
@@ -2,7 +2,7 @@
local M = {}
-local symtab = require "utils.symtab"
+local humanize = require "memprof.humanize"
function M.form_heap_delta(events, symbols)
-- Auto resurrects source event lines for counting/reporting.
@@ -17,7 +17,7 @@ function M.form_heap_delta(events, symbols)
for _, event in pairs(events.alloc) do
if event.loc then
- local ev_line = symtab.demangle(symbols, event.loc)
+ local ev_line = humanize.describe_location(symbols, event.loc)
if (event.alloc > 0) then
dheap[ev_line].dbytes = dheap[ev_line].dbytes + event.alloc
@@ -37,7 +37,7 @@ function M.form_heap_delta(events, symbols)
-- that references the table with memory changed
-- (may be empty).
for _, heap_chunk in pairs(event.primary) do
- local ev_line = symtab.demangle(symbols, heap_chunk.loc)
+ local ev_line = humanize.describe_location(symbols, heap_chunk.loc)
if (heap_chunk.alloced > 0) then
dheap[ev_line].dbytes = dheap[ev_line].dbytes + heap_chunk.alloced
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index 3ed1dd13..0e742ee1 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -10,11 +10,12 @@ local band = bit.band
local string_format = string.format
local LJS_MAGIC = "ljs"
-local LJS_CURRENT_VERSION = 1
+local LJS_CURRENT_VERSION = 0x2
local LJS_EPILOGUE_HEADER = 0x80
local LJS_SYMTYPE_MASK = 0x03
local SYMTAB_LFUNC = 0
+local SYMTAB_TRACE = 1
local M = {}
@@ -24,18 +25,38 @@ local function parse_sym_lfunc(reader, symtab)
local sym_chunk = reader:read_string()
local sym_line = reader:read_uleb128()
- symtab[sym_addr] = {
+ symtab.lfunc[sym_addr] = {
source = sym_chunk,
linedefined = sym_line,
}
end
+local function parse_sym_trace(reader, symtab)
+ local trace_addr = reader:read_uleb128()
+ local traceno = reader:read_uleb128()
+ local sym_addr = reader:read_uleb128()
+ local sym_line = reader:read_uleb128()
+
+ symtab.trace[traceno] = {
+ addr = trace_addr,
+ sym_loc = {
+ addr = sym_addr,
+ line = sym_line,
+ traceno = 0,
+ },
+ }
+end
+
local parsers = {
[SYMTAB_LFUNC] = parse_sym_lfunc,
+ [SYMTAB_TRACE] = parse_sym_trace,
}
function M.parse(reader)
- local symtab = {}
+ local symtab = {
+ lfunc = {},
+ trace = {},
+ }
local magic = reader:read_octets(3)
local version = reader:read_octets(1)
@@ -73,18 +94,26 @@ function M.parse(reader)
return symtab
end
-function M.demangle(symtab, loc)
+local function demangle_lfunc(symtab, loc)
local addr = loc.addr
if addr == 0 then
return "INTERNAL"
+ elseif symtab.lfunc[addr] then
+ return string_format("%s:%d", symtab.lfunc[loc.addr].source, loc.line)
end
+ return string_format("CFUNC %#x", addr)
+end
- if symtab[addr] then
- return string_format("%s:%d", symtab[addr].source, loc.line)
- end
+local function demangle_trace(loc)
+ return string_format("TRACE [%d] 0x%x", loc.traceno, loc.addr)
+end
- return string_format("CFUNC %#x", addr)
+function M.demangle(symtab, loc)
+ if loc.traceno ~= 0 then
+ return demangle_trace(loc)
+ end
+ return demangle_lfunc(symtab, loc)
end
return M
--
2.32.0
More information about the Tarantool-patches
mailing list