[Tarantool-patches] [PATCH luajit v3 4/5] memprof: extend symtab with info about traces

Mikhail Shishatskiy m.shishatskiy at tarantool.org
Fri Aug 20 10:05:45 MSK 2021


This patch extends the memprof symtab by adding information
about traces, which are present at the start of profiling.

The symtab format changed with adding <sym-trace> entry,
which consists of a trace address, trace number, address
of function proto, and line, where trace recording started:

| sym-trace  := sym-header trace-addr trace-no sym-addr sym-line
| trace-addr := <ULEB128>
| trace-no   := <ULEB128>

Also, the memprof parser is adjusted to recognize new
symtab entries and associate them with allocation events
from traces.

With adding traces to the symbol table the API of <utils/symtab.lua>
changed: now table with symbols contains two tables: `lfunc` for
Lua functions symbols and `trace` for trace entries.

As trace can be associated with a function proto, <utils/humanize.lua>
module extended with a function `describe_location` intended
to be used instead of symtab.demangle. The reason is that the
location name can be more than demangled `loc`: for example,
when getting trace location name, we want to assotiate the
trace with a function proto, where trace recording started.
On the one hand, we want to get a verbose trace description
with demangled proto location:

| TRACE [<trace-no>] <trace-addr> started at @<proto-name>:<proto-line>

On the other hand, idiomatically, this should be done not by
the demangler module but by the humanizer module.

Resolves tarantool/tarantool#5814
---

Issue: https://github.com/tarantool/tarantool/issues/5814
Luajit branch: https://github.com/tarantool/luajit/tree/shishqa/gh-5814-group-allocations-on-trace-by-trace-number
tarantool branch: https://github.com/tarantool/tarantool/tree/shishqa/gh-5814-group-allocations-on-trace-by-trace-number

 src/lj_memprof.c                              |  40 ++++++
 src/lj_memprof.h                              |   7 +-
 .../misclib-memprof-lapi.test.lua             | 135 +++++++++++++-----
 tools/memprof/humanize.lua                    |  21 ++-
 tools/memprof/process.lua                     |   6 +-
 tools/utils/symtab.lua                        |  45 ++++--
 6 files changed, 206 insertions(+), 48 deletions(-)

diff --git a/src/lj_memprof.c b/src/lj_memprof.c
index fb99829d..fc5bc301 100644
--- a/src/lj_memprof.c
+++ b/src/lj_memprof.c
@@ -28,6 +28,42 @@
 static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION,
 					   0x0, 0x0, 0x0};
 
+#if LJ_HASJIT
+
+static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
+{
+  const GCproto *pt = &gcref(trace->startpt)->pt;
+  BCLine lineno = -1;
+
+  const BCIns *startpc = mref(trace->startpc, const BCIns);
+  lua_assert(startpc >= proto_bc(pt) &&
+             startpc < proto_bc(pt) + pt->sizebc);
+
+  lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
+  lua_assert(lineno >= 0);
+
+  lj_wbuf_addbyte(out, SYMTAB_TRACE);
+  lj_wbuf_addu64(out, (uint64_t)trace->mcode);
+  lj_wbuf_addu64(out, (uint64_t)trace->traceno);
+  /*
+  ** All the existing prototypes have already been dumped, so we do not
+  ** need to repeat their dump for trace locations.
+  */
+  lj_wbuf_addu64(out, (uintptr_t)pt);
+  lj_wbuf_addu64(out, (uint64_t)lineno);
+}
+
+#else
+
+static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
+{
+  UNUSED(out);
+  UNUSED(trace);
+  lua_assert(0);
+}
+
+#endif
+
 static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
 {
   const GCRef *iter = &g->gc.root;
@@ -47,6 +83,10 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
       lj_wbuf_addu64(out, (uint64_t)pt->firstline);
       break;
     }
+    case (~LJ_TTRACE): {
+      dump_symtab_trace(out, gco2trace(o));
+      break;
+    }
     default:
       break;
     }
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
index 6a35385d..e72dadf7 100644
--- a/src/lj_memprof.h
+++ b/src/lj_memprof.h
@@ -16,7 +16,7 @@
 #include "lj_def.h"
 #include "lj_wbuf.h"
 
-#define LJS_CURRENT_VERSION 0x1
+#define LJS_CURRENT_VERSION 0x2
 
 /*
 ** symtab format:
@@ -25,13 +25,16 @@
 ** prologue       := 'l' 'j' 's' version reserved
 ** version        := <BYTE>
 ** reserved       := <BYTE> <BYTE> <BYTE>
-** sym            := sym-lua | sym-final
+** sym            := sym-lua | sym-trace | sym-final
 ** sym-lua        := sym-header sym-addr sym-chunk sym-line
+** sym-trace      := sym-header trace-addr trace-no sym-addr sym-line
 ** sym-header     := <BYTE>
 ** sym-addr       := <ULEB128>
 ** sym-chunk      := string
 ** sym-line       := <ULEB128>
 ** sym-final      := sym-header
+** trace-addr     := <ULEB128>
+** trace-no       := <ULEB128>
 ** string         := string-len string-payload
 ** string-len     := <ULEB128>
 ** string-payload := <BYTE> {string-len}
diff --git a/test/tarantool-tests/misclib-memprof-lapi.test.lua b/test/tarantool-tests/misclib-memprof-lapi.test.lua
index dbf384ed..f84b6df0 100644
--- a/test/tarantool-tests/misclib-memprof-lapi.test.lua
+++ b/test/tarantool-tests/misclib-memprof-lapi.test.lua
@@ -7,7 +7,14 @@ require("utils").skipcond(
 local tap = require("tap")
 
 local test = tap.test("misc-memprof-lapi")
-test:plan(3)
+test:plan(4)
+
+local jit_opt_default = {
+    3, -- level
+    "hotloop=56",
+    "hotexit=10",
+    "minstitch=0",
+}
 
 jit.off()
 jit.flush()
@@ -22,12 +29,12 @@ local symtab = require "utils.symtab"
 local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.memprofdata.tmp.bin")
 local BAD_PATH = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%1/memprofdata.tmp.bin")
 
-local function payload()
+local function default_payload()
   -- Preallocate table to avoid table array part reallocations.
-  local _ = table_new(100, 0)
+  local _ = table_new(20, 0)
 
-  -- Want too see 100 objects here.
-  for i = 1, 100 do
+  -- Want too see 20 objects here.
+  for i = 1, 20 do
     -- Try to avoid crossing with "test" module objects.
     _[i] = "memprof-str-"..i
   end
@@ -37,7 +44,7 @@ local function payload()
   collectgarbage()
 end
 
-local function generate_output(filename)
+local function generate_output(filename, payload)
   -- Clean up all garbage to avoid pollution of free.
   collectgarbage()
 
@@ -52,19 +59,49 @@ local function generate_output(filename)
   assert(res, err)
 end
 
+local function generate_parsed_output(payload)
+  local res, err = pcall(generate_output, TMP_BINFILE, payload)
+
+  -- Want to cleanup carefully if something went wrong.
+  if not res then
+    os.remove(TMP_BINFILE)
+    error(err)
+  end
+
+  local reader = bufread.new(TMP_BINFILE)
+  local symbols = symtab.parse(reader)
+  local events = memprof.parse(reader)
+
+  -- We don't need it any more.
+  os.remove(TMP_BINFILE)
+
+  return symbols, events
+end
+
 local function fill_ev_type(events, symbols, event_type)
   local ev_type = {}
   for _, event in pairs(events[event_type]) do
     local addr = event.loc.addr
-    if addr == 0 then
+    local traceno = event.loc.traceno
+
+    if traceno ~= 0 and symbols.trace[traceno] then
+      local trace_loc = symbols.trace[traceno].sym_loc
+      addr = trace_loc.addr
+      ev_type[trace_loc.line] = {
+        name = string.format("TRACE [%d] %s:%d",
+          traceno, symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined
+        ),
+        num = event.num,
+      }
+    elseif addr == 0 then
       ev_type.INTERNAL = {
         name = "INTERNAL",
         num = event.num,
     }
-    elseif symbols[addr] then
+    elseif symbols.lfunc[addr] then
       ev_type[event.loc.line] = {
         name = string.format(
-          "%s:%d", symbols[addr].source, symbols[addr].linedefined
+          "%s:%d", symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined
         ),
         num = event.num,
       }
@@ -77,10 +114,22 @@ local function form_source_line(line)
   return string.format("@%s:%d", arg[0], line)
 end
 
-local function check_alloc_report(alloc, line, function_line, nevents)
-  assert(form_source_line(function_line) == alloc[line].name)
-  assert(alloc[line].num == nevents, ("got=%d, expected=%d"):format(
-    alloc[line].num,
+local function check_alloc_report(alloc, traceno, line, function_line, nevents)
+  assert(alloc[line], ("no event on line %d"):format(line))
+  local event = alloc[line]
+  local expected_name
+  if traceno ~= 0 then
+    expected_name = string.format("TRACE [%d] ", traceno)..
+                    form_source_line(function_line)
+  else
+    expected_name = form_source_line(function_line)
+  end
+  assert(expected_name == event.name, ("got='%s', expected='%s'"):format(
+    event.name,
+    expected_name
+  ))
+  assert(event.num == nevents, ("got=%d, expected=%d"):format(
+    event.num,
     nevents
   ))
   return true
@@ -115,20 +164,7 @@ end)
 test:test("output", function(subtest)
   subtest:plan(7)
 
-  local res, err = pcall(generate_output, TMP_BINFILE)
-
-  -- Want to cleanup carefully if something went wrong.
-  if not res then
-    os.remove(TMP_BINFILE)
-    error(err)
-  end
-
-  local reader = bufread.new(TMP_BINFILE)
-  local symbols = symtab.parse(reader)
-  local events = memprof.parse(reader, symbols)
-
-  -- We don't need it any more.
-  os.remove(TMP_BINFILE)
+  local symbols, events = generate_parsed_output(default_payload)
 
   local alloc = fill_ev_type(events, symbols, "alloc")
   local free = fill_ev_type(events, symbols, "free")
@@ -139,18 +175,18 @@ test:test("output", function(subtest)
   -- the number of allocations.
   -- 1 event - alocation of table by itself + 1 allocation
   -- of array part as far it is bigger than LJ_MAX_COLOSIZE (16).
-  subtest:ok(check_alloc_report(alloc, 27, 25, 2))
-  -- 100 strings allocations.
-  subtest:ok(check_alloc_report(alloc, 32, 25, 100))
+  subtest:ok(check_alloc_report(alloc, 0, 34, 32, 2))
+  -- 20 strings allocations.
+  subtest:ok(check_alloc_report(alloc, 0, 39, 32, 20))
 
   -- Collect all previous allocated objects.
-  subtest:ok(free.INTERNAL.num == 102)
+  subtest:ok(free.INTERNAL.num == 22)
 
   -- Tests for leak-only option.
   -- See also https://github.com/tarantool/tarantool/issues/5812.
   local heap_delta = process.form_heap_delta(events, symbols)
-  local tab_alloc_stats = heap_delta[form_source_line(27)]
-  local str_alloc_stats = heap_delta[form_source_line(32)]
+  local tab_alloc_stats = heap_delta[form_source_line(34)]
+  local str_alloc_stats = heap_delta[form_source_line(39)]
   subtest:ok(tab_alloc_stats.nalloc == tab_alloc_stats.nfree)
   subtest:ok(tab_alloc_stats.dbytes == 0)
   subtest:ok(str_alloc_stats.nalloc == str_alloc_stats.nfree)
@@ -179,5 +215,38 @@ test:test("stack-resize", function(subtest)
   misc.memprof.stop()
 end)
 
+-- Test profiler with enabled JIT.
 jit.on()
+
+test:test("jit-output", function(subtest)
+  -- Disabled on *BSD due to #4819.
+  if jit.os == 'BSD' then
+    subtest:plan(1)
+    subtest:skip('Disabled due to #4819')
+    return
+  end
+
+  subtest:plan(3)
+
+  jit.opt.start(3, "hotloop=10")
+  jit.flush()
+
+  -- Pregenerate traces to fill symtab entries in the next run.
+  default_payload()
+
+  local symbols, events = generate_parsed_output(default_payload)
+
+  local alloc = fill_ev_type(events, symbols, "alloc")
+  local free = fill_ev_type(events, symbols, "free")
+
+  -- We expect, that loop will be compiled into a trace.
+  subtest:ok(check_alloc_report(alloc, 1, 37, 32, 20))
+  -- See same checks with jit.off().
+  subtest:ok(check_alloc_report(alloc, 0, 34, 32, 2))
+  subtest:ok(free.INTERNAL.num == 22)
+
+  -- Restore default JIT settings.
+  jit.opt.start(unpack(jit_opt_default))
+end)
+
 os.exit(test:check() and 0 or 1)
diff --git a/tools/memprof/humanize.lua b/tools/memprof/humanize.lua
index 7771005d..800a465e 100644
--- a/tools/memprof/humanize.lua
+++ b/tools/memprof/humanize.lua
@@ -21,7 +21,7 @@ function M.render(events, symbols)
   for i = 1, #ids do
     local event = events[ids[i]]
     print(string.format("%s: %d events\t+%d bytes\t-%d bytes",
-      symtab.demangle(symbols, event.loc),
+      M.describe_location(symbols, event.loc),
       event.num,
       event.alloc,
       event.free
@@ -29,7 +29,7 @@ function M.render(events, symbols)
 
     local prim_loc = {}
     for _, heap_chunk in pairs(event.primary) do
-      table.insert(prim_loc, symtab.demangle(symbols, heap_chunk.loc))
+      table.insert(prim_loc, M.describe_location(symbols, heap_chunk.loc))
     end
     if #prim_loc ~= 0 then
       table.sort(prim_loc)
@@ -81,4 +81,21 @@ function M.leak_info(dheap)
   print("")
 end
 
+function M.describe_location(symbols, loc)
+  if loc.traceno == 0 then
+    return symtab.demangle(symbols, loc)
+  end
+
+  local trace = symbols.trace[loc.traceno]
+
+  -- If trace, which was remembered in the symtab, has not
+  -- been flushed, assotiate it with a proto, where trace
+  -- recording started.
+  if trace and trace.addr == loc.addr then
+    return symtab.demangle(symbols, loc).." started at "..
+           symtab.demangle(symbols, trace.sym_loc)
+  end
+  return symtab.demangle(symbols, loc)
+end
+
 return M
diff --git a/tools/memprof/process.lua b/tools/memprof/process.lua
index 0bcb965b..f277ed84 100644
--- a/tools/memprof/process.lua
+++ b/tools/memprof/process.lua
@@ -2,7 +2,7 @@
 
 local M = {}
 
-local symtab = require "utils.symtab"
+local humanize = require "memprof.humanize"
 
 function M.form_heap_delta(events, symbols)
   -- Auto resurrects source event lines for counting/reporting.
@@ -17,7 +17,7 @@ function M.form_heap_delta(events, symbols)
 
   for _, event in pairs(events.alloc) do
     if event.loc then
-      local ev_line = symtab.demangle(symbols, event.loc)
+      local ev_line = humanize.describe_location(symbols, event.loc)
 
       if (event.alloc > 0) then
         dheap[ev_line].dbytes = dheap[ev_line].dbytes + event.alloc
@@ -37,7 +37,7 @@ function M.form_heap_delta(events, symbols)
       -- that references the table with memory changed
       -- (may be empty).
       for _, heap_chunk in pairs(event.primary) do
-        local ev_line = symtab.demangle(symbols, heap_chunk.loc)
+        local ev_line = humanize.describe_location(symbols, heap_chunk.loc)
 
         if (heap_chunk.alloced > 0) then
           dheap[ev_line].dbytes = dheap[ev_line].dbytes + heap_chunk.alloced
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index 3ed1dd13..0e742ee1 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -10,11 +10,12 @@ local band = bit.band
 local string_format = string.format
 
 local LJS_MAGIC = "ljs"
-local LJS_CURRENT_VERSION = 1
+local LJS_CURRENT_VERSION = 0x2
 local LJS_EPILOGUE_HEADER = 0x80
 local LJS_SYMTYPE_MASK = 0x03
 
 local SYMTAB_LFUNC = 0
+local SYMTAB_TRACE = 1
 
 local M = {}
 
@@ -24,18 +25,38 @@ local function parse_sym_lfunc(reader, symtab)
   local sym_chunk = reader:read_string()
   local sym_line = reader:read_uleb128()
 
-  symtab[sym_addr] = {
+  symtab.lfunc[sym_addr] = {
     source = sym_chunk,
     linedefined = sym_line,
   }
 end
 
+local function parse_sym_trace(reader, symtab)
+  local trace_addr = reader:read_uleb128()
+  local traceno = reader:read_uleb128()
+  local sym_addr = reader:read_uleb128()
+  local sym_line = reader:read_uleb128()
+
+  symtab.trace[traceno] = {
+    addr = trace_addr,
+    sym_loc = {
+      addr = sym_addr,
+      line = sym_line,
+      traceno = 0,
+    },
+  }
+end
+
 local parsers = {
   [SYMTAB_LFUNC] = parse_sym_lfunc,
+  [SYMTAB_TRACE] = parse_sym_trace,
 }
 
 function M.parse(reader)
-  local symtab = {}
+  local symtab = {
+    lfunc = {},
+    trace = {},
+  }
   local magic = reader:read_octets(3)
   local version = reader:read_octets(1)
 
@@ -73,18 +94,26 @@ function M.parse(reader)
   return symtab
 end
 
-function M.demangle(symtab, loc)
+local function demangle_lfunc(symtab, loc)
   local addr = loc.addr
 
   if addr == 0 then
     return "INTERNAL"
+  elseif symtab.lfunc[addr] then
+    return string_format("%s:%d", symtab.lfunc[loc.addr].source, loc.line)
   end
+  return string_format("CFUNC %#x", addr)
+end
 
-  if symtab[addr] then
-    return string_format("%s:%d", symtab[addr].source, loc.line)
-  end
+local function demangle_trace(loc)
+  return string_format("TRACE [%d] 0x%x", loc.traceno, loc.addr)
+end
 
-  return string_format("CFUNC %#x", addr)
+function M.demangle(symtab, loc)
+  if loc.traceno ~= 0 then
+    return demangle_trace(loc)
+  end
+  return demangle_lfunc(symtab, loc)
 end
 
 return M
-- 
2.32.0



More information about the Tarantool-patches mailing list