[Tarantool-patches] [PATCH luajit v1 1/4] memprof: enrich symtab when meeting new prototype
Mikhail Shishatskiy
m.shishatskiy at tarantool.org
Tue Aug 24 17:16:36 MSK 2021
Sorry, I forgot about LJ_HASMEMPROF in some files:
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index 692e1a9d..242ea465 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -353,7 +353,9 @@ GCproto *lj_bcread_proto(LexState *ls)
pt->sizeuv = (uint8_t)sizeuv;
pt->flags = (uint8_t)flags;
pt->trace = 0;
+#if LJ_HASMEMPROF
pt->is_streamed = 0;
+#endif
setgcref(pt->chunkname, obj2gco(ls->chunkname));
/* Close potentially uninitialized gap between bc and kgc. */
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 1a540610..746c6e33 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -1577,7 +1577,9 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
pt->flags = (uint8_t)(fs->flags & ~(PROTO_HAS_RETURN|PROTO_FIXUP_RETURN));
pt->numparams = fs->numparams;
pt->framesize = fs->framesize;
+#if LJ_HASMEMPROF
pt->is_streamed = 0;
+#endif
setgcref(pt->chunkname, obj2gco(ls->chunkname));
/* Close potentially uninitialized gap between bc and kgc. */
On 21.08.2021 19:49, Mikhail Shishatskiy wrote:
>Previously, the profiler dumped all the function prototypes only
>at the start of profiling. It was enough for most cases, but
>sometimes we may want to investigate the memory profile of module
>loading. In such a case, we expect memprof to dump new prototypes
>from parsed source code.
>
>This patch extends memprof's streaming format with entries providing
>extra information about allocation source's symbols if they were not
>streamed at the start of profiling.
>
>| loc := loc-lua | loc-lua-sym | loc-c | loc-trace
>| loc-lua-sym := sym-addr sym-chunk sym-line line-no
>
>The `sym-chunk` and `sym-line` are the prototype's chunk name and line
>where it was defined.
>
>To recognize entries with new symbols, event-header format changed:
>
>| event-header: [FUTSSSEE]
>| * T : 1 bit indicating if additional symtab entry provided or not
>
>Also, the profiler parser is adjusted to recognize entries described
>above and enrich the symtab on the fly while parsing events. For this
>reason, <utils/symtab.lua> API has been changed: function
>`parse_sym_lfunc` became public and now returns sym-addr to be used
>in the event parser module.
>
>Resolves tarantool/tarantool#5815
>---
>
>Issue: https://github.com/tarantool/tarantool/issues/5815
>Branch: https://github.com/tarantool/luajit/tree/shishqa/gh-5815-enrich-symtab-when-prototype-is-allocated
>Tarantool branch: https://github.com/tarantool/tarantool/tree/shishqa/gh-5815-enrich-symtab-when-prototype-is-allocated
>
> src/lj_bcread.c | 1 +
> src/lj_memprof.c | 19 +++-
> src/lj_memprof.h | 16 ++-
> src/lj_obj.h | 7 ++
> src/lj_parse.c | 1 +
> .../misclib-memprof-lapi.test.lua | 102 +++++++++++++-----
> tools/memprof/parse.lua | 55 +++++++---
> tools/utils/symtab.lua | 6 +-
> 8 files changed, 160 insertions(+), 47 deletions(-)
>
>diff --git a/src/lj_bcread.c b/src/lj_bcread.c
>index 48c5e7c7..692e1a9d 100644
>--- a/src/lj_bcread.c
>+++ b/src/lj_bcread.c
>@@ -353,6 +353,7 @@ GCproto *lj_bcread_proto(LexState *ls)
> pt->sizeuv = (uint8_t)sizeuv;
> pt->flags = (uint8_t)flags;
> pt->trace = 0;
>+ pt->is_streamed = 0;
> setgcref(pt->chunkname, obj2gco(ls->chunkname));
>
> /* Close potentially uninitialized gap between bc and kgc. */
>diff --git a/src/lj_memprof.c b/src/lj_memprof.c
>index 14e4e67b..fb217a2c 100644
>--- a/src/lj_memprof.c
>+++ b/src/lj_memprof.c
>@@ -67,7 +67,7 @@ static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
> static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
> {
> const GCRef *iter = &g->gc.root;
>- const GCobj *o;
>+ GCobj *o;
> const size_t ljs_header_len = sizeof(ljs_header) / sizeof(ljs_header[0]);
>
> /* Write prologue. */
>@@ -76,11 +76,12 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
> while ((o = gcref(*iter)) != NULL) {
> switch (o->gch.gct) {
> case (~LJ_TPROTO): {
>- const GCproto *pt = gco2pt(o);
>+ GCproto *pt = gco2pt(o);
> lj_wbuf_addbyte(out, SYMTAB_LFUNC);
> lj_wbuf_addu64(out, (uintptr_t)pt);
> lj_wbuf_addstring(out, proto_chunknamestr(pt));
> lj_wbuf_addu64(out, (uint64_t)pt->firstline);
>+ pt->is_streamed = 1;
> break;
> }
> case (~LJ_TTRACE): {
>@@ -138,6 +139,7 @@ static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent,
> ** -DLUAJIT_DISABLE_DEBUGINFO flag.
> */
> const BCLine line = lj_debug_frameline(L, fn, nextframe);
>+ GCproto *pt = funcproto(fn);
>
> if (line < 0) {
> /*
>@@ -149,9 +151,20 @@ static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent,
> ** We report such allocations as internal in order not to confuse users.
> */
> lj_wbuf_addbyte(out, aevent | ASOURCE_INT);
>+
>+ } else if (LJ_UNLIKELY(!pt->is_streamed)) {
>+
>+ lj_wbuf_addbyte(out, aevent | ASOURCE_LFUNC | LJM_SYMTAB);
>+ lj_wbuf_addu64(out, (uintptr_t)pt);
>+ lj_wbuf_addstring(out, proto_chunknamestr(pt));
>+ lj_wbuf_addu64(out, (uint64_t)pt->firstline);
>+ lj_wbuf_addu64(out, (uint64_t)line);
>+ pt->is_streamed = 1;
>+
> } else {
>+
> lj_wbuf_addbyte(out, aevent | ASOURCE_LFUNC);
>- lj_wbuf_addu64(out, (uintptr_t)funcproto(fn));
>+ lj_wbuf_addu64(out, (uintptr_t)pt);
> lj_wbuf_addu64(out, (uint64_t)line);
> }
> }
>diff --git a/src/lj_memprof.h b/src/lj_memprof.h
>index e72dadf7..0f5b4c6d 100644
>--- a/src/lj_memprof.h
>+++ b/src/lj_memprof.h
>@@ -57,7 +57,7 @@
> #define SYMTAB_TRACE ((uint8_t)1)
> #define SYMTAB_FINAL ((uint8_t)0x80)
>
>-#define LJM_CURRENT_FORMAT_VERSION 0x02
>+#define LJM_CURRENT_FORMAT_VERSION 0x03
>
> /*
> ** Event stream format:
>@@ -73,11 +73,14 @@
> ** event-realloc := event-header loc? oaddr osize naddr nsize
> ** event-free := event-header loc? oaddr osize
> ** event-header := <BYTE>
>-** loc := loc-lua | loc-c | loc-trace
>+** loc := loc-lua | loc-lua-sym | loc-c | loc-trace
> ** loc-lua := sym-addr line-no
>+** loc-lua-sym := sym-addr sym-chunk sym-line line-no
> ** loc-c := sym-addr
> ** loc-trace := trace-addr trace-no
> ** sym-addr := <ULEB128>
>+** sym-chunk := string
>+** sym-line := <ULEB128>
> ** line-no := <ULEB128>
> ** trace-addr := <ULEB128>
> ** trace-no := <ULEB128>
>@@ -85,6 +88,9 @@
> ** naddr := <ULEB128>
> ** osize := <ULEB128>
> ** nsize := <ULEB128>
>+** string := string-len string-payload
>+** string-len := <ULEB128>
>+** string-payload := <BYTE> {string-len}
> ** epilogue := event-header
> **
> ** <BYTE> : A single byte (no surprises here)
>@@ -95,10 +101,11 @@
> ** version: [VVVVVVVV]
> ** * VVVVVVVV: Byte interpreted as a plain integer version number
> **
>-** event-header: [FUUSSSEE]
>+** event-header: [FUTSSSEE]
> ** * EE : 2 bits for representing allocation event type (AEVENT_*)
> ** * SSS : 3 bits for representing allocation source type (ASOURCE_*)
>-** * UU : 2 unused bits
>+** * T : 1 bit indicating if additional symtab entry provided or not
>+** * U : 1 unused bit
> ** * F : 0 for regular events, 1 for epilogue's *F*inal header
> ** (if F is set to 1, all other bits are currently ignored)
> */
>@@ -114,6 +121,7 @@
> #define ASOURCE_CFUNC ((uint8_t)(3 << 2))
> #define ASOURCE_TRACE ((uint8_t)(4 << 2))
>
>+#define LJM_SYMTAB 0x20
> #define LJM_EPILOGUE_HEADER 0x80
>
> /* Profiler public API. */
>diff --git a/src/lj_obj.h b/src/lj_obj.h
>index d26e60be..c8f6b13a 100644
>--- a/src/lj_obj.h
>+++ b/src/lj_obj.h
>@@ -385,6 +385,13 @@ typedef struct GCproto {
> MRef lineinfo; /* Compressed map from bytecode ins. to source line. */
> MRef uvinfo; /* Upvalue names. */
> MRef varinfo; /* Names and compressed extents of local variables. */
>+#if LJ_HASMEMPROF
>+ /*
>+ ** Flag, indicating, if this proto has been streamed to the
>+ ** memprof's binary output.
>+ */
>+ uint8_t is_streamed;
>+#endif
> } GCproto;
>
> /* Flags for prototype. */
>diff --git a/src/lj_parse.c b/src/lj_parse.c
>index a6325a76..1a540610 100644
>--- a/src/lj_parse.c
>+++ b/src/lj_parse.c
>@@ -1577,6 +1577,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
> pt->flags = (uint8_t)(fs->flags & ~(PROTO_HAS_RETURN|PROTO_FIXUP_RETURN));
> pt->numparams = fs->numparams;
> pt->framesize = fs->framesize;
>+ pt->is_streamed = 0;
> setgcref(pt->chunkname, obj2gco(ls->chunkname));
>
> /* Close potentially uninitialized gap between bc and kgc. */
>diff --git a/test/tarantool-tests/misclib-memprof-lapi.test.lua b/test/tarantool-tests/misclib-memprof-lapi.test.lua
>index 17bcb2f1..f4666da5 100644
>--- a/test/tarantool-tests/misclib-memprof-lapi.test.lua
>+++ b/test/tarantool-tests/misclib-memprof-lapi.test.lua
>@@ -7,7 +7,7 @@ require("utils").skipcond(
> local tap = require("tap")
>
> local test = tap.test("misc-memprof-lapi")
>-test:plan(4)
>+test:plan(5)
>
> local jit_opt_default = {
> 3, -- level
>@@ -70,7 +70,7 @@ local function generate_parsed_output(payload)
>
> local reader = bufread.new(TMP_BINFILE)
> local symbols = symtab.parse(reader)
>- local events = memprof.parse(reader)
>+ local events = memprof.parse(reader, symbols)
>
> -- We don't need it any more.
> os.remove(TMP_BINFILE)
>@@ -79,7 +79,9 @@ local function generate_parsed_output(payload)
> end
>
> local function fill_ev_type(events, symbols, event_type)
>- local ev_type = {}
>+ local ev_type = {
>+ trace = {},
>+ }
> for _, event in pairs(events[event_type]) do
> local addr = event.loc.addr
> local traceno = event.loc.traceno
>@@ -87,7 +89,7 @@ local function fill_ev_type(events, symbols, event_type)
> if traceno ~= 0 and symbols.trace[traceno] then
> local trace_loc = symbols.trace[traceno].sym_loc
> addr = trace_loc.addr
>- ev_type[trace_loc.line] = {
>+ ev_type.trace[traceno] = {
> name = string.format("TRACE [%d] %s:%d",
> traceno, symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined
> ),
>@@ -99,9 +101,13 @@ local function fill_ev_type(events, symbols, event_type)
> num = event.num,
> }
> elseif symbols.lfunc[addr] then
>- ev_type[event.loc.line] = {
>+ local source = symbols.lfunc[addr].source
>+ if ev_type[source] == nil then
>+ ev_type[source] = {}
>+ end
>+ ev_type[source][event.loc.line] = {
> name = string.format(
>- "%s:%d", symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined
>+ "%s:%d", source, symbols.lfunc[addr].linedefined
> ),
> num = event.num,
> }
>@@ -110,23 +116,35 @@ local function fill_ev_type(events, symbols, event_type)
> return ev_type
> end
>
>-local function form_source_line(line)
>- return string.format("@%s:%d", arg[0], line)
>+local function form_source_line(loc)
>+ return string.format("%s:%d", loc.source, loc.function_line)
> end
>
>-local function check_alloc_report(alloc, traceno, line, function_line, nevents)
>- assert(alloc[line], ("no event on line %d"):format(line))
>- local event = alloc[line]
>- local expected_name
>- if traceno ~= 0 then
>- expected_name = string.format("TRACE [%d] ", traceno)..
>- form_source_line(function_line)
>+local function get_event_name_trace(loc, alloc)
>+ local event = alloc.trace[loc.traceno]
>+ return event, string.format("TRACE [%d] ", loc.traceno)..form_source_line(loc)
>+end
>+
>+local function get_event_name_lua(loc, alloc)
>+ local event = alloc[loc.source][loc.line]
>+ return event, form_source_line(loc)
>+end
>+
>+local function check_alloc_report(loc, alloc, nevents)
>+ if loc.source == nil then
>+ loc.source = '@'..arg[0]
>+ end
>+
>+ local event, name
>+ if loc.traceno ~= nil then
>+ event, name = get_event_name_trace(loc, alloc)
> else
>- expected_name = form_source_line(function_line)
>+ event, name = get_event_name_lua(loc, alloc)
> end
>- assert(expected_name == event.name, ("got='%s', expected='%s'"):format(
>+
>+ assert(name == event.name, ("got='%s', expected='%s'"):format(
> event.name,
>- expected_name
>+ name
> ))
> assert(event.num == nevents, ("got=%d, expected=%d"):format(
> event.num,
>@@ -175,9 +193,9 @@ test:test("output", function(subtest)
> -- the number of allocations.
> -- 1 event - alocation of table by itself + 1 allocation
> -- of array part as far it is bigger than LJ_MAX_COLOSIZE (16).
>- subtest:ok(check_alloc_report(alloc, 0, 34, 32, 2))
>+ subtest:ok(check_alloc_report({line = 34, function_line = 32}, alloc, 2))
> -- 20 strings allocations.
>- subtest:ok(check_alloc_report(alloc, 0, 39, 32, 20))
>+ subtest:ok(check_alloc_report({line = 39, function_line = 32}, alloc, 20))
>
> -- Collect all previous allocated objects.
> subtest:ok(free.INTERNAL.num == 22)
>@@ -185,8 +203,8 @@ test:test("output", function(subtest)
> -- Tests for leak-only option.
> -- See also https://github.com/tarantool/tarantool/issues/5812.
> local heap_delta = process.form_heap_delta(events, symbols)
>- local tab_alloc_stats = heap_delta[form_source_line(34)]
>- local str_alloc_stats = heap_delta[form_source_line(39)]
>+ local tab_alloc_stats = heap_delta[form_source_line({ function_line = 34 })]
>+ local str_alloc_stats = heap_delta[form_source_line({ function_line = 39 })]
> subtest:ok(tab_alloc_stats.nalloc == tab_alloc_stats.nfree)
> subtest:ok(tab_alloc_stats.dbytes == 0)
> subtest:ok(str_alloc_stats.nalloc == str_alloc_stats.nfree)
>@@ -215,6 +233,38 @@ test:test("stack-resize", function(subtest)
> misc.memprof.stop()
> end)
>
>+-- Test for extending symtab with function prototypes
>+-- while profiler is running.
>+test:test("symtab-enriching", function(subtest)
>+ subtest:plan(2)
>+
>+ local payload_str = [[
>+ local M = {
>+ tmp = string.rep("1", 100) -- line 2.
>+ }
>+
>+ function M.payload()
>+ local str = string.rep("42", 100) -- line 6.
>+ end
>+
>+ return M
>+ ]]
>+
>+ local symbols, events = generate_parsed_output(function()
>+ local str_chunk = assert(loadstring(payload_str, 'str_chunk'))()
>+ str_chunk.payload()
>+ end)
>+
>+ local alloc = fill_ev_type(events, symbols, "alloc")
>+
>+ subtest:ok(check_alloc_report(
>+ {source = 'str_chunk', line = 6, function_line = 5}, alloc, 1)
>+ )
>+ subtest:ok(check_alloc_report(
>+ {source = 'str_chunk', line = 2, function_line = 0}, alloc, 1)
>+ )
>+end)
>+
> -- Test profiler with enabled JIT.
> jit.on()
>
>@@ -247,9 +297,13 @@ test:test("jit-output", function(subtest)
> alloc = fill_ev_type(events, symbols, "alloc")
>
> -- We expect, that loop will be compiled into a trace.
>- subtest:ok(check_alloc_report(alloc, 1, 37, 32, 20))
>+ subtest:ok(check_alloc_report(
>+ { traceno = 1, line = 37, function_line = 32 }, alloc, 20
>+ ))
> -- See same checks with jit.off().
>- subtest:ok(check_alloc_report(alloc, 0, 34, 32, 2))
>+ subtest:ok(check_alloc_report(
>+ { line = 34, function_line = 32 }, alloc, 2
>+ ))
>
> -- Restore default JIT settings.
> jit.opt.start(unpack(jit_opt_default))
>diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
>index adc7c072..888c6636 100644
>--- a/tools/memprof/parse.lua
>+++ b/tools/memprof/parse.lua
>@@ -8,11 +8,14 @@ local bit = require "bit"
> local band = bit.band
> local lshift = bit.lshift
>
>+local symtab = require "utils.symtab"
>+
> local string_format = string.format
>
> local LJM_MAGIC = "ljm"
>-local LJM_CURRENT_VERSION = 0x02
>+local LJM_CURRENT_VERSION = 0x03
>
>+local LJM_SYMTAB = 0x20
> local LJM_EPILOGUE_HEADER = 0x80
>
> local AEVENT_ALLOC = 1
>@@ -26,9 +29,10 @@ local ASOURCE_LFUNC = lshift(2, 2)
> local ASOURCE_CFUNC = lshift(3, 2)
> local ASOURCE_TRACE = lshift(4, 2)
>
>-local ASOURCE_MASK = lshift(0x7, 2)
>+local ASOURCE_MASK = lshift(0xF, 2)
>+local ASOURCE_TYPE_MASK = lshift(0x7, 2)
>
>-local EV_HEADER_MAX = ASOURCE_TRACE + AEVENT_REALLOC
>+local EV_HEADER_MAX = LJM_SYMTAB + ASOURCE_TRACE + AEVENT_REALLOC
>
> local M = {}
>
>@@ -70,7 +74,18 @@ local function id_location(addr, line, traceno)
> }
> end
>
>-local function parse_location(reader, asource)
>+local function parse_location_symbols(reader, asource, symbols)
>+ if asource == ASOURCE_LFUNC then
>+ return id_location(
>+ symtab.parse_sym_lfunc(reader, symbols),
>+ reader:read_uleb128(),
>+ 0
>+ )
>+ end
>+ error("Unknown asource "..asource)
>+end
>+
>+local function parse_location_common(reader, asource)
> if asource == ASOURCE_INT then
> return id_location(0, 0, 0)
> elseif asource == ASOURCE_CFUNC then
>@@ -83,8 +98,20 @@ local function parse_location(reader, asource)
> error("Unknown asource "..asource)
> end
>
>-local function parse_alloc(reader, asource, events, heap)
>- local id, loc = parse_location(reader, asource)
>+local function parse_location(reader, asource, symbols)
>+ local has_symbols = band(asource, LJM_SYMTAB)
>+ local asource_type = band(asource, ASOURCE_TYPE_MASK)
>+ local id, loc
>+ if has_symbols == LJM_SYMTAB then
>+ id, loc = parse_location_symbols(reader, asource_type, symbols)
>+ else
>+ id, loc = parse_location_common(reader, asource_type)
>+ end
>+ return id, loc
>+end
>+
>+local function parse_alloc(reader, asource, events, heap, symbols)
>+ local id, loc = parse_location(reader, asource, symbols)
>
> local naddr = reader:read_uleb128()
> local nsize = reader:read_uleb128()
>@@ -99,8 +126,8 @@ local function parse_alloc(reader, asource, events, heap)
> heap[naddr] = {nsize, id, loc}
> end
>
>-local function parse_realloc(reader, asource, events, heap)
>- local id, loc = parse_location(reader, asource)
>+local function parse_realloc(reader, asource, events, heap, symbols)
>+ local id, loc = parse_location(reader, asource, symbols)
>
> local oaddr = reader:read_uleb128()
> local osize = reader:read_uleb128()
>@@ -121,8 +148,8 @@ local function parse_realloc(reader, asource, events, heap)
> heap[naddr] = {nsize, id, loc}
> end
>
>-local function parse_free(reader, asource, events, heap)
>- local id, loc = parse_location(reader, asource)
>+local function parse_free(reader, asource, events, heap, symbols)
>+ local id, loc = parse_location(reader, asource, symbols)
>
> local oaddr = reader:read_uleb128()
> local osize = reader:read_uleb128()
>@@ -155,7 +182,7 @@ local function ev_header_split(evh)
> return band(evh, AEVENT_MASK), band(evh, ASOURCE_MASK)
> end
>
>-local function parse_event(reader, events)
>+local function parse_event(reader, events, symbols)
> local ev_header = reader:read_octet()
>
> assert(ev_header_is_valid(ev_header), "Bad ev_header "..ev_header)
>@@ -169,12 +196,12 @@ local function parse_event(reader, events)
>
> assert(parser, "Bad aevent "..aevent)
>
>- parser.parse(reader, asource, events[parser.evname], events.heap)
>+ parser.parse(reader, asource, events[parser.evname], events.heap, symbols)
>
> return true
> end
>
>-function M.parse(reader)
>+function M.parse(reader, symbols)
> local events = {
> alloc = {},
> realloc = {},
>@@ -199,7 +226,7 @@ function M.parse(reader)
> ))
> end
>
>- while parse_event(reader, events) do
>+ while parse_event(reader, events, symbols) do
> -- Empty body.
> end
>
>diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
>index 0e742ee1..601c9563 100644
>--- a/tools/utils/symtab.lua
>+++ b/tools/utils/symtab.lua
>@@ -20,7 +20,7 @@ local SYMTAB_TRACE = 1
> local M = {}
>
> -- Parse a single entry in a symtab: lfunc symbol.
>-local function parse_sym_lfunc(reader, symtab)
>+function M.parse_sym_lfunc(reader, symtab)
> local sym_addr = reader:read_uleb128()
> local sym_chunk = reader:read_string()
> local sym_line = reader:read_uleb128()
>@@ -29,6 +29,8 @@ local function parse_sym_lfunc(reader, symtab)
> source = sym_chunk,
> linedefined = sym_line,
> }
>+
>+ return sym_addr
> end
>
> local function parse_sym_trace(reader, symtab)
>@@ -48,7 +50,7 @@ local function parse_sym_trace(reader, symtab)
> end
>
> local parsers = {
>- [SYMTAB_LFUNC] = parse_sym_lfunc,
>+ [SYMTAB_LFUNC] = M.parse_sym_lfunc,
> [SYMTAB_TRACE] = parse_sym_trace,
> }
>
>--
>2.32.0
>
More information about the Tarantool-patches
mailing list