Tarantool development patches archive
 help / color / mirror / Atom feed
From: Mikhail Shishatskiy via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: tarantool-patches@dev.tarantool.org, imun@tarantool.org,
	skaplun@tarantool.org
Subject: [Tarantool-patches] [PATCH luajit v2 1/3] memprof: add symbol epochs
Date: Thu,  2 Dec 2021 14:03:27 +0300	[thread overview]
Message-ID: <20211202110329.664738-2-m.shishatskiy@tarantool.org> (raw)
In-Reply-To: <20211202110329.664738-1-m.shishatskiy@tarantool.org>

There may be situations, when new prototype or trace has the same
address than the previously dumped one. In this case, the new symtab
entry will shadow the preceeding, and if we demangle symbols when all
the events are parsed, we can get irrelevant information.

For this reason, the symtab now stores not a single entry, but array of
entries for each location. The structure <loc> was extended by the field
<epoch> representing the generation of symtab entry associated with this
particular location entry. Also, the <utils/symtab.lua> API has extended
by function symtab.new_loc(symbols, addr, line, traceno) so that it can
assign an epoch for a new location.

Part of tarantool/tarantool#5815
---

Issue: https://github.com/tarantool/tarantool/issues/5815
Branch: https://github.com/tarantool/luajit/tree/shishqa/gh-5815-enrich-symtab-when-prototype-is-allocated-v2
Tarantool branch: https://github.com/tarantool/tarantool/tree/shishqa/gh-5815-enrich-symtab-when-prototype-is-allocated

 .../misclib-memprof-lapi.test.lua             | 12 ++--
 tools/memprof/parse.lua                       | 41 +++++++-------
 tools/utils/symtab.lua                        | 55 +++++++++++++------
 3 files changed, 67 insertions(+), 41 deletions(-)

diff --git a/test/tarantool-tests/misclib-memprof-lapi.test.lua b/test/tarantool-tests/misclib-memprof-lapi.test.lua
index 7919c61f..dd973f2a 100644
--- a/test/tarantool-tests/misclib-memprof-lapi.test.lua
+++ b/test/tarantool-tests/misclib-memprof-lapi.test.lua
@@ -70,7 +70,7 @@ local function generate_parsed_output(payload)
 
   local reader = bufread.new(TMP_BINFILE)
   local symbols = symtab.parse(reader)
-  local events = memprof.parse(reader)
+  local events = memprof.parse(reader, symbols)
 
   -- We don't need it any more.
   os.remove(TMP_BINFILE)
@@ -86,13 +86,15 @@ local function fill_ev_type(events, symbols, event_type)
   for _, event in pairs(events[event_type]) do
     local addr = event.loc.addr
     local traceno = event.loc.traceno
+    local epoch = event.loc.epoch
 
     if traceno ~= 0 and symbols.trace[traceno] then
-      local trace_loc = symbols.trace[traceno].start
+      local trace_loc = symbols.trace[traceno][epoch].start
       addr = trace_loc.addr
+      epoch = trace_loc.epoch
       ev_type.trace[traceno] = {
         name = string.format("TRACE [%d] %s:%d",
-          traceno, symbols.lfunc[addr].source, trace_loc.line
+          traceno, symbols.lfunc[addr][epoch].source, trace_loc.line
         ),
         num = event.num,
       }
@@ -104,7 +106,9 @@ local function fill_ev_type(events, symbols, event_type)
     elseif symbols.lfunc[addr] then
       ev_type.line[event.loc.line] = {
         name = string.format(
-          "%s:%d", symbols.lfunc[addr].source, symbols.lfunc[addr].linedefined
+          "%s:%d",
+          symbols.lfunc[addr][epoch].source,
+          symbols.lfunc[addr][epoch].linedefined
         ),
         num = event.num,
       }
diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
index 47dbaee4..be5844a4 100644
--- a/tools/memprof/parse.lua
+++ b/tools/memprof/parse.lua
@@ -64,29 +64,28 @@ local function link_to_previous(heap_chunk, e, nsize)
   end
 end
 
-local function parse_location(reader, asource)
-  local loc = {
-    addr = 0,
-    line = 0,
-    traceno = 0,
-  }
+local function parse_location(reader, asource, symbols)
+  local addr = 0
+  local line = 0
+  local traceno = 0
   if asource == ASOURCE_INT then -- luacheck: ignore
   elseif asource == ASOURCE_CFUNC then
-    loc.addr = reader:read_uleb128()
+    addr = reader:read_uleb128()
   elseif asource == ASOURCE_LFUNC then
-    loc.addr = reader:read_uleb128()
-    loc.line = reader:read_uleb128()
+    addr = reader:read_uleb128()
+    line = reader:read_uleb128()
   elseif asource == ASOURCE_TRACE then
-    loc.traceno = reader:read_uleb128()
-    loc.addr = reader:read_uleb128()
+    traceno = reader:read_uleb128()
+    addr = reader:read_uleb128()
   else
     error("Unknown asource "..asource)
   end
+  local loc = symtab.new_loc(symbols, addr, line, traceno)
   return symtab.id(loc), loc
 end
 
-local function parse_alloc(reader, asource, events, heap)
-  local id, loc = parse_location(reader, asource)
+local function parse_alloc(reader, asource, events, heap, symbols)
+  local id, loc = parse_location(reader, asource, symbols)
 
   local naddr = reader:read_uleb128()
   local nsize = reader:read_uleb128()
@@ -101,8 +100,8 @@ local function parse_alloc(reader, asource, events, heap)
   heap[naddr] = {nsize, id, loc}
 end
 
-local function parse_realloc(reader, asource, events, heap)
-  local id, loc = parse_location(reader, asource)
+local function parse_realloc(reader, asource, events, heap, symbols)
+  local id, loc = parse_location(reader, asource, symbols)
 
   local oaddr = reader:read_uleb128()
   local osize = reader:read_uleb128()
@@ -123,8 +122,8 @@ local function parse_realloc(reader, asource, events, heap)
   heap[naddr] = {nsize, id, loc}
 end
 
-local function parse_free(reader, asource, events, heap)
-  local id, loc = parse_location(reader, asource)
+local function parse_free(reader, asource, events, heap, symbols)
+  local id, loc = parse_location(reader, asource, symbols)
 
   local oaddr = reader:read_uleb128()
   local osize = reader:read_uleb128()
@@ -157,7 +156,7 @@ local function ev_header_split(evh)
   return band(evh, AEVENT_MASK), band(evh, ASOURCE_MASK)
 end
 
-local function parse_event(reader, events)
+local function parse_event(reader, events, symbols)
   local ev_header = reader:read_octet()
 
   assert(ev_header_is_valid(ev_header), "Bad ev_header "..ev_header)
@@ -171,12 +170,12 @@ local function parse_event(reader, events)
 
   assert(parser, "Bad aevent "..aevent)
 
-  parser.parse(reader, asource, events[parser.evname], events.heap)
+  parser.parse(reader, asource, events[parser.evname], events.heap, symbols)
 
   return true
 end
 
-function M.parse(reader)
+function M.parse(reader, symbols)
   local events = {
     alloc = {},
     realloc = {},
@@ -201,7 +200,7 @@ function M.parse(reader)
     ))
   end
 
-  while parse_event(reader, events) do
+  while parse_event(reader, events, symbols) do
     -- Empty body.
   end
 
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index c7fcf77c..c758b67e 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -19,16 +19,37 @@ local SYMTAB_TRACE = 1
 
 local M = {}
 
+function M.new_loc(symtab, addr, line, traceno)
+  local loc = {
+    addr = addr,
+    line = line,
+    traceno = traceno,
+    epoch = 1,
+  }
+
+  if traceno ~= 0 and symtab.trace[traceno] then
+    loc.epoch = #symtab.trace[traceno]
+  elseif symtab.lfunc[addr] then
+    loc.epoch = #symtab.lfunc[addr]
+  end
+
+  return loc
+end
+
 -- Parse a single entry in a symtab: lfunc symbol.
 local function parse_sym_lfunc(reader, symtab)
   local sym_addr = reader:read_uleb128()
   local sym_chunk = reader:read_string()
   local sym_line = reader:read_uleb128()
 
-  symtab.lfunc[sym_addr] = {
+  if not symtab.lfunc[sym_addr] then
+    symtab.lfunc[sym_addr] = {}
+  end
+
+  table.insert(symtab.lfunc[sym_addr], {
     source = sym_chunk,
     linedefined = sym_line,
-  }
+  })
 end
 
 local function parse_sym_trace(reader, symtab)
@@ -37,17 +58,14 @@ local function parse_sym_trace(reader, symtab)
   local sym_addr = reader:read_uleb128()
   local sym_line = reader:read_uleb128()
 
-  symtab.trace[traceno] = {
+  if not symtab.trace[traceno] then
+    symtab.trace[traceno] = {}
+  end
+
+  table.insert(symtab.trace[traceno], {
     addr = trace_addr,
-    -- The structure <start> is the same as the one
-    -- yielded from the <parse_location> function
-    -- in the <memprof/parse.lua> module.
-    start = {
-      addr = sym_addr,
-      line = sym_line,
-      traceno = 0,
-    },
-  }
+    start = M.new_loc(symtab, sym_addr, sym_line, 0)
+  })
 end
 
 local parsers = {
@@ -98,7 +116,9 @@ function M.parse(reader)
 end
 
 function M.id(loc)
-  return string_format("f%#xl%dt%d", loc.addr, loc.line, loc.traceno)
+  return string_format(
+    "f%#xl%dt%de%d", loc.addr, loc.line, loc.traceno, loc.epoch
+  )
 end
 
 local function demangle_trace(symtab, loc)
@@ -108,7 +128,8 @@ local function demangle_trace(symtab, loc)
   assert(traceno ~= 0, "Location is a trace")
 
   local trace_str = string_format("TRACE [%d] %#x", traceno, addr)
-  local trace = symtab.trace[traceno]
+  local epochs = symtab.trace[traceno]
+  local trace = epochs and epochs[loc.epoch]
 
   -- If trace, which was remembered in the symtab, has not
   -- been flushed, associate it with a proto, where trace
@@ -131,8 +152,10 @@ function M.demangle(symtab, loc)
     return "INTERNAL"
   end
 
-  if symtab.lfunc[addr] then
-    return string_format("%s:%d", symtab.lfunc[addr].source, loc.line)
+  if symtab.lfunc[addr] and symtab.lfunc[addr][loc.epoch] then
+    return string_format(
+      "%s:%d", symtab.lfunc[addr][loc.epoch].source, loc.line
+    )
   end
 
   return string_format("CFUNC %#x", addr)
-- 
2.33.1


  reply	other threads:[~2021-12-02 11:04 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-02 11:03 [Tarantool-patches] [PATCH luajit v2 0/3] memprof: enrich symtab when meeting new prototype Mikhail Shishatskiy via Tarantool-patches
2021-12-02 11:03 ` Mikhail Shishatskiy via Tarantool-patches [this message]
2022-03-15  7:56   ` [Tarantool-patches] [PATCH luajit v2 1/3] memprof: add symbol epochs Sergey Kaplun via Tarantool-patches
2021-12-02 11:03 ` [Tarantool-patches] [PATCH luajit v2 2/3] memprof: enrich symtab when meeting new prototype Mikhail Shishatskiy via Tarantool-patches
2022-01-19 16:56   ` Mikhail Shishatskiy via Tarantool-patches
2022-01-25  8:14     ` Sergey Kaplun via Tarantool-patches
2021-12-02 11:03 ` [Tarantool-patches] [PATCH luajit v2 3/3] memprof: substitute long proto names with aliases Mikhail Shishatskiy via Tarantool-patches
2022-01-25 10:12   ` Sergey Kaplun via Tarantool-patches
2022-04-12 14:30 ` [Tarantool-patches] [PATCH luajit v2 0/3] memprof: enrich symtab when meeting new prototype Igor Munkin via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211202110329.664738-2-m.shishatskiy@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=imun@tarantool.org \
    --cc=m.shishatskiy@tarantool.org \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit v2 1/3] memprof: add symbol epochs' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox