[Tarantool-patches] [PATCH luajit v1 1/4] memprof: enrich symtab when meeting new prototype

Mikhail Shishatskiy m.shishatskiy at tarantool.org
Sat Aug 21 15:49:59 MSK 2021


Previously, the profiler dumped all the function prototypes only
at the start of profiling. It was enough for most cases, but
sometimes we may want to investigate the memory profile of module
loading. In such a case, we expect memprof to dump new prototypes
from parsed source code.

This patch extends memprof's streaming format with entries providing
extra information about allocation source's symbols if they were not
streamed at the start of profiling.

| loc         := loc-lua | loc-lua-sym | loc-c | loc-trace
| loc-lua-sym := sym-addr sym-chunk sym-line line-no

The `sym-chunk` and `sym-line` are the prototype's chunk name and line
where it was defined.

To recognize entries with new symbols, event-header format changed:

| event-header: [FUTSSSEE]
| * T : 1 bit indicating if additional symtab entry provided or not

Also, the profiler parser is adjusted to recognize entries described
above and enrich the symtab on the fly while parsing events. For this
reason, <utils/symtab.lua> API has been changed: function
`parse_sym_lfunc` became public and now returns sym-addr to be used
in the event parser module.

Resolves tarantool/tarantool#5815
---

Issue: https://github.com/tarantool/tarantool/issues/5815
Branch: https://github.com/tarantool/luajit/tree/shishqa/gh-5815-enrich-symtab-when-prototype-is-allocated
Tarantool branch: https://github.com/tarantool/tarantool/tree/shishqa/gh-5815-enrich-symtab-when-prototype-is-allocated

 src/lj_bcread.c                               |   1 +
 src/lj_memprof.c                              |  19 +++-
 src/lj_memprof.h                              |  16 ++-
 src/lj_obj.h                                  |   7 ++
 src/lj_parse.c                                |   1 +
 .../misclib-memprof-lapi.test.lua             | 102 +++++++++++++-----
 tools/memprof/parse.lua                       |  55 +++++++---
 tools/utils/symtab.lua                        |   6 +-
 8 files changed, 160 insertions(+), 47 deletions(-)

diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index 48c5e7c7..692e1a9d 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -353,6 +353,7 @@ GCproto *lj_bcread_proto(LexState *ls)
   pt->sizeuv = (uint8_t)sizeuv;
   pt->flags = (uint8_t)flags;
   pt->trace = 0;
+  pt->is_streamed = 0;
   setgcref(pt->chunkname, obj2gco(ls->chunkname));
 
   /* Close potentially uninitialized gap between bc and kgc. */
diff --git a/src/lj_memprof.c b/src/lj_memprof.c
index 14e4e67b..fb217a2c 100644
--- a/src/lj_memprof.c
+++ b/src/lj_memprof.c
@@ -67,7 +67,7 @@ static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
 static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
 {
   const GCRef *iter = &g->gc.root;
-  const GCobj *o;
+  GCobj *o;
   const size_t ljs_header_len = sizeof(ljs_header) / sizeof(ljs_header[0]);
 
   /* Write prologue. */
@@ -76,11 +76,12 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
   while ((o = gcref(*iter)) != NULL) {
     switch (o->gch.gct) {
     case (~LJ_TPROTO): {
-      const GCproto *pt = gco2pt(o);
+      GCproto *pt = gco2pt(o);
       lj_wbuf_addbyte(out, SYMTAB_LFUNC);
       lj_wbuf_addu64(out, (uintptr_t)pt);
       lj_wbuf_addstring(out, proto_chunknamestr(pt));
       lj_wbuf_addu64(out, (uint64_t)pt->firstline);
+      pt->is_streamed = 1;
       break;
     }
     case (~LJ_TTRACE): {
@@ -138,6 +139,7 @@ static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent,
   ** -DLUAJIT_DISABLE_DEBUGINFO flag.
   */
   const BCLine line = lj_debug_frameline(L, fn, nextframe);
+  GCproto *pt = funcproto(fn);
 
   if (line < 0) {
     /*
@@ -149,9 +151,20 @@ static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent,
     ** We report such allocations as internal in order not to confuse users.
     */
     lj_wbuf_addbyte(out, aevent | ASOURCE_INT);
+
+  } else if (LJ_UNLIKELY(!pt->is_streamed)) {
+
+    lj_wbuf_addbyte(out, aevent | ASOURCE_LFUNC | LJM_SYMTAB);
+    lj_wbuf_addu64(out, (uintptr_t)pt);
+    lj_wbuf_addstring(out, proto_chunknamestr(pt));
+    lj_wbuf_addu64(out, (uint64_t)pt->firstline);
+    lj_wbuf_addu64(out, (uint64_t)line);
+    pt->is_streamed = 1;
+
   } else {
+
     lj_wbuf_addbyte(out, aevent | ASOURCE_LFUNC);
-    lj_wbuf_addu64(out, (uintptr_t)funcproto(fn));
+    lj_wbuf_addu64(out, (uintptr_t)pt);
     lj_wbuf_addu64(out, (uint64_t)line);
   }
 }
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
index e72dadf7..0f5b4c6d 100644
--- a/src/lj_memprof.h
+++ b/src/lj_memprof.h
@@ -57,7 +57,7 @@
 #define SYMTAB_TRACE ((uint8_t)1)
 #define SYMTAB_FINAL ((uint8_t)0x80)
 
-#define LJM_CURRENT_FORMAT_VERSION 0x02
+#define LJM_CURRENT_FORMAT_VERSION 0x03
 
 /*
 ** Event stream format:
@@ -73,11 +73,14 @@
 ** event-realloc  := event-header loc? oaddr osize naddr nsize
 ** event-free     := event-header loc? oaddr osize
 ** event-header   := <BYTE>
-** loc            := loc-lua | loc-c | loc-trace
+** loc            := loc-lua | loc-lua-sym | loc-c | loc-trace
 ** loc-lua        := sym-addr line-no
+** loc-lua-sym    := sym-addr sym-chunk sym-line line-no
 ** loc-c          := sym-addr
 ** loc-trace      := trace-addr trace-no
 ** sym-addr       := <ULEB128>
+** sym-chunk      := string
+** sym-line       := <ULEB128>
 ** line-no        := <ULEB128>
 ** trace-addr     := <ULEB128>
 ** trace-no       := <ULEB128>
@@ -85,6 +88,9 @@
 ** naddr          := <ULEB128>
 ** osize          := <ULEB128>
 ** nsize          := <ULEB128>
+** string         := string-len string-payload
+** string-len     := <ULEB128>
+** string-payload := <BYTE> {string-len}
 ** epilogue       := event-header
 **
 ** <BYTE>   :  A single byte (no surprises here)
@@ -95,10 +101,11 @@
 ** version: [VVVVVVVV]
 **  * VVVVVVVV: Byte interpreted as a plain integer version number
 **
-** event-header: [FUUSSSEE]
+** event-header: [FUTSSSEE]
 **  * EE   : 2 bits for representing allocation event type (AEVENT_*)
 **  * SSS  : 3 bits for representing allocation source type (ASOURCE_*)
-**  * UU   : 2 unused bits
+**  * T    : 1 bit indicating if additional symtab entry provided or not
+**  * U    : 1 unused bit
 **  * F    : 0 for regular events, 1 for epilogue's *F*inal header
 **           (if F is set to 1, all other bits are currently ignored)
 */
@@ -114,6 +121,7 @@
 #define ASOURCE_CFUNC ((uint8_t)(3 << 2))
 #define ASOURCE_TRACE ((uint8_t)(4 << 2))
 
+#define LJM_SYMTAB          0x20
 #define LJM_EPILOGUE_HEADER 0x80
 
 /* Profiler public API. */
diff --git a/src/lj_obj.h b/src/lj_obj.h
index d26e60be..c8f6b13a 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -385,6 +385,13 @@ typedef struct GCproto {
   MRef lineinfo;	/* Compressed map from bytecode ins. to source line. */
   MRef uvinfo;		/* Upvalue names. */
   MRef varinfo;		/* Names and compressed extents of local variables. */
+#if LJ_HASMEMPROF
+  /*
+  ** Flag, indicating, if this proto has been streamed to the
+  ** memprof's binary output.
+  */
+  uint8_t is_streamed;
+#endif
 } GCproto;
 
 /* Flags for prototype. */
diff --git a/src/lj_parse.c b/src/lj_parse.c
index a6325a76..1a540610 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -1577,6 +1577,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
   pt->flags = (uint8_t)(fs->flags & ~(PROTO_HAS_RETURN|PROTO_FIXUP_RETURN));
   pt->numparams = fs->numparams;
   pt->framesize = fs->framesize;
+  pt->is_streamed = 0;
   setgcref(pt->chunkname, obj2gco(ls->chunkname));
 
   /* Close potentially uninitialized gap between bc and kgc. */
diff --git a/test/tarantool-tests/misclib-memprof-lapi.test.lua b/test/tarantool-tests/misclib-memprof-lapi.test.lua
index 17bcb2f1..f4666da5 100644
--- a/test/tarantool-tests/misclib-memprof-lapi.test.lua
+++ b/test/tarantool-tests/misclib-memprof-lapi.test.lua
@@ -7,7 +7,7 @@ require("utils").skipcond(
 local tap = require("tap")
 
 local test = tap.test("misc-memprof-lapi")
-test:plan(4)
+test:plan(5)
 
 local jit_opt_default = {
     3, -- level
@@ -70,7 +70,7 @@ local function generate_parsed_output(payload)
 
   local reader = bufread.new(TMP_BINFILE)
   local symbols = symtab.parse(reader)
-  local events = memprof.parse(reader)
+  local events = memprof.parse(reader, symbols)
 
   -- We don't need it any more.
   os.remove(TMP_BINFILE)
@@ -79,7 +79,9 @@ local function generate_parsed_output(payload)
 end
 
 local function fill_ev_type(events, symbols, event_type)
-  local ev_type = {}
+  local ev_type = {
+    trace = {},
+  }
   for _, event in pairs(events[event_type]) do
     local addr = event.loc.addr
     local traceno = event.loc.traceno
@@ -87,7 +89,7 @@ local function fill_ev_type(events, symbols, event_type)
     if traceno ~= 0 and symbols.trace[traceno] then
       local trace_loc = symbols.trace[traceno].sym_loc
       addr = trace_loc.addr
-      ev_type[trace_loc.line] = {
+      ev_type.trace[traceno] = {
         name = string.format("TRACE [%d] %s:%d",
           traceno, symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined
         ),
@@ -99,9 +101,13 @@ local function fill_ev_type(events, symbols, event_type)
         num = event.num,
     }
     elseif symbols.lfunc[addr] then
-      ev_type[event.loc.line] = {
+      local source = symbols.lfunc[addr].source
+      if ev_type[source] == nil then
+        ev_type[source] = {}
+      end
+      ev_type[source][event.loc.line] = {
         name = string.format(
-          "%s:%d", symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined
+          "%s:%d", source, symbols.lfunc[addr].linedefined
         ),
         num = event.num,
       }
@@ -110,23 +116,35 @@ local function fill_ev_type(events, symbols, event_type)
   return ev_type
 end
 
-local function form_source_line(line)
-  return string.format("@%s:%d", arg[0], line)
+local function form_source_line(loc)
+  return string.format("%s:%d", loc.source, loc.function_line)
 end
 
-local function check_alloc_report(alloc, traceno, line, function_line, nevents)
-  assert(alloc[line], ("no event on line %d"):format(line))
-  local event = alloc[line]
-  local expected_name
-  if traceno ~= 0 then
-    expected_name = string.format("TRACE [%d] ", traceno)..
-                    form_source_line(function_line)
+local function get_event_name_trace(loc, alloc)
+  local event = alloc.trace[loc.traceno]
+  return event, string.format("TRACE [%d] ", loc.traceno)..form_source_line(loc)
+end
+
+local function get_event_name_lua(loc, alloc)
+  local event = alloc[loc.source][loc.line]
+  return event, form_source_line(loc)
+end
+
+local function check_alloc_report(loc, alloc, nevents)
+  if loc.source == nil then
+    loc.source = '@'..arg[0]
+  end
+
+  local event, name
+  if loc.traceno ~= nil then
+    event, name = get_event_name_trace(loc, alloc)
   else
-    expected_name = form_source_line(function_line)
+    event, name = get_event_name_lua(loc, alloc)
   end
-  assert(expected_name == event.name, ("got='%s', expected='%s'"):format(
+
+  assert(name == event.name, ("got='%s', expected='%s'"):format(
     event.name,
-    expected_name
+    name
   ))
   assert(event.num == nevents, ("got=%d, expected=%d"):format(
     event.num,
@@ -175,9 +193,9 @@ test:test("output", function(subtest)
   -- the number of allocations.
   -- 1 event - alocation of table by itself + 1 allocation
   -- of array part as far it is bigger than LJ_MAX_COLOSIZE (16).
-  subtest:ok(check_alloc_report(alloc, 0, 34, 32, 2))
+  subtest:ok(check_alloc_report({line = 34, function_line = 32}, alloc, 2))
   -- 20 strings allocations.
-  subtest:ok(check_alloc_report(alloc, 0, 39, 32, 20))
+  subtest:ok(check_alloc_report({line = 39, function_line = 32}, alloc, 20))
 
   -- Collect all previous allocated objects.
   subtest:ok(free.INTERNAL.num == 22)
@@ -185,8 +203,8 @@ test:test("output", function(subtest)
   -- Tests for leak-only option.
   -- See also https://github.com/tarantool/tarantool/issues/5812.
   local heap_delta = process.form_heap_delta(events, symbols)
-  local tab_alloc_stats = heap_delta[form_source_line(34)]
-  local str_alloc_stats = heap_delta[form_source_line(39)]
+  local tab_alloc_stats = heap_delta[form_source_line({ function_line = 34 })]
+  local str_alloc_stats = heap_delta[form_source_line({ function_line = 39 })]
   subtest:ok(tab_alloc_stats.nalloc == tab_alloc_stats.nfree)
   subtest:ok(tab_alloc_stats.dbytes == 0)
   subtest:ok(str_alloc_stats.nalloc == str_alloc_stats.nfree)
@@ -215,6 +233,38 @@ test:test("stack-resize", function(subtest)
   misc.memprof.stop()
 end)
 
+-- Test for extending symtab with function prototypes
+-- while profiler is running.
+test:test("symtab-enriching", function(subtest)
+  subtest:plan(2)
+
+  local payload_str = [[
+  local M = {
+    tmp = string.rep("1", 100)  -- line 2.
+  }
+
+  function M.payload()
+    local str = string.rep("42", 100)  -- line 6.
+  end
+
+  return M
+  ]]
+
+  local symbols, events = generate_parsed_output(function()
+    local str_chunk = assert(loadstring(payload_str, 'str_chunk'))()
+    str_chunk.payload()
+  end)
+
+  local alloc = fill_ev_type(events, symbols, "alloc")
+
+  subtest:ok(check_alloc_report(
+    {source = 'str_chunk', line = 6, function_line = 5}, alloc, 1)
+  )
+  subtest:ok(check_alloc_report(
+    {source = 'str_chunk', line = 2, function_line = 0}, alloc, 1)
+  )
+end)
+
 -- Test profiler with enabled JIT.
 jit.on()
 
@@ -247,9 +297,13 @@ test:test("jit-output", function(subtest)
   alloc = fill_ev_type(events, symbols, "alloc")
 
   -- We expect, that loop will be compiled into a trace.
-  subtest:ok(check_alloc_report(alloc, 1, 37, 32, 20))
+  subtest:ok(check_alloc_report(
+    { traceno = 1, line = 37, function_line = 32 }, alloc, 20
+  ))
   -- See same checks with jit.off().
-  subtest:ok(check_alloc_report(alloc, 0, 34, 32, 2))
+  subtest:ok(check_alloc_report(
+    { line = 34, function_line = 32 }, alloc, 2
+  ))
 
   -- Restore default JIT settings.
   jit.opt.start(unpack(jit_opt_default))
diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
index adc7c072..888c6636 100644
--- a/tools/memprof/parse.lua
+++ b/tools/memprof/parse.lua
@@ -8,11 +8,14 @@ local bit = require "bit"
 local band = bit.band
 local lshift = bit.lshift
 
+local symtab = require "utils.symtab"
+
 local string_format = string.format
 
 local LJM_MAGIC = "ljm"
-local LJM_CURRENT_VERSION = 0x02
+local LJM_CURRENT_VERSION = 0x03
 
+local LJM_SYMTAB          = 0x20
 local LJM_EPILOGUE_HEADER = 0x80
 
 local AEVENT_ALLOC = 1
@@ -26,9 +29,10 @@ local ASOURCE_LFUNC = lshift(2, 2)
 local ASOURCE_CFUNC = lshift(3, 2)
 local ASOURCE_TRACE = lshift(4, 2)
 
-local ASOURCE_MASK = lshift(0x7, 2)
+local ASOURCE_MASK      = lshift(0xF, 2)
+local ASOURCE_TYPE_MASK = lshift(0x7, 2)
 
-local EV_HEADER_MAX = ASOURCE_TRACE + AEVENT_REALLOC
+local EV_HEADER_MAX = LJM_SYMTAB + ASOURCE_TRACE + AEVENT_REALLOC
 
 local M = {}
 
@@ -70,7 +74,18 @@ local function id_location(addr, line, traceno)
   }
 end
 
-local function parse_location(reader, asource)
+local function parse_location_symbols(reader, asource, symbols)
+  if asource == ASOURCE_LFUNC then
+    return id_location(
+      symtab.parse_sym_lfunc(reader, symbols),
+      reader:read_uleb128(),
+      0
+    )
+  end
+  error("Unknown asource "..asource)
+end
+
+local function parse_location_common(reader, asource)
   if asource == ASOURCE_INT then
     return id_location(0, 0, 0)
   elseif asource == ASOURCE_CFUNC then
@@ -83,8 +98,20 @@ local function parse_location(reader, asource)
   error("Unknown asource "..asource)
 end
 
-local function parse_alloc(reader, asource, events, heap)
-  local id, loc = parse_location(reader, asource)
+local function parse_location(reader, asource, symbols)
+  local has_symbols  = band(asource, LJM_SYMTAB)
+  local asource_type = band(asource, ASOURCE_TYPE_MASK)
+  local id, loc
+  if has_symbols == LJM_SYMTAB then
+    id, loc = parse_location_symbols(reader, asource_type, symbols)
+  else
+    id, loc = parse_location_common(reader, asource_type)
+  end
+  return id, loc
+end
+
+local function parse_alloc(reader, asource, events, heap, symbols)
+  local id, loc = parse_location(reader, asource, symbols)
 
   local naddr = reader:read_uleb128()
   local nsize = reader:read_uleb128()
@@ -99,8 +126,8 @@ local function parse_alloc(reader, asource, events, heap)
   heap[naddr] = {nsize, id, loc}
 end
 
-local function parse_realloc(reader, asource, events, heap)
-  local id, loc = parse_location(reader, asource)
+local function parse_realloc(reader, asource, events, heap, symbols)
+  local id, loc = parse_location(reader, asource, symbols)
 
   local oaddr = reader:read_uleb128()
   local osize = reader:read_uleb128()
@@ -121,8 +148,8 @@ local function parse_realloc(reader, asource, events, heap)
   heap[naddr] = {nsize, id, loc}
 end
 
-local function parse_free(reader, asource, events, heap)
-  local id, loc = parse_location(reader, asource)
+local function parse_free(reader, asource, events, heap, symbols)
+  local id, loc = parse_location(reader, asource, symbols)
 
   local oaddr = reader:read_uleb128()
   local osize = reader:read_uleb128()
@@ -155,7 +182,7 @@ local function ev_header_split(evh)
   return band(evh, AEVENT_MASK), band(evh, ASOURCE_MASK)
 end
 
-local function parse_event(reader, events)
+local function parse_event(reader, events, symbols)
   local ev_header = reader:read_octet()
 
   assert(ev_header_is_valid(ev_header), "Bad ev_header "..ev_header)
@@ -169,12 +196,12 @@ local function parse_event(reader, events)
 
   assert(parser, "Bad aevent "..aevent)
 
-  parser.parse(reader, asource, events[parser.evname], events.heap)
+  parser.parse(reader, asource, events[parser.evname], events.heap, symbols)
 
   return true
 end
 
-function M.parse(reader)
+function M.parse(reader, symbols)
   local events = {
     alloc = {},
     realloc = {},
@@ -199,7 +226,7 @@ function M.parse(reader)
     ))
   end
 
-  while parse_event(reader, events) do
+  while parse_event(reader, events, symbols) do
     -- Empty body.
   end
 
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index 0e742ee1..601c9563 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -20,7 +20,7 @@ local SYMTAB_TRACE = 1
 local M = {}
 
 -- Parse a single entry in a symtab: lfunc symbol.
-local function parse_sym_lfunc(reader, symtab)
+function M.parse_sym_lfunc(reader, symtab)
   local sym_addr = reader:read_uleb128()
   local sym_chunk = reader:read_string()
   local sym_line = reader:read_uleb128()
@@ -29,6 +29,8 @@ local function parse_sym_lfunc(reader, symtab)
     source = sym_chunk,
     linedefined = sym_line,
   }
+
+  return sym_addr
 end
 
 local function parse_sym_trace(reader, symtab)
@@ -48,7 +50,7 @@ local function parse_sym_trace(reader, symtab)
 end
 
 local parsers = {
-  [SYMTAB_LFUNC] = parse_sym_lfunc,
+  [SYMTAB_LFUNC] = M.parse_sym_lfunc,
   [SYMTAB_TRACE] = parse_sym_trace,
 }
 
-- 
2.32.0



More information about the Tarantool-patches mailing list