[Tarantool-patches] [PATCH v2 luajit] tools: introduce --leak-only memprof parser option

Sergey Ostanevich sergos at tarantool.org
Thu Apr 8 15:49:47 MSK 2021


Hi!

Just couple of nits, LGTM.

Sergos

> On 31 Mar 2021, at 20:29, Sergey Kaplun <skaplun at tarantool.org> wrote:
> 
> This patch indtroduces new memprof parser module <process.lua> to
	     introduces
> post-process memory events.
> 
> Memprof parser now adds postamble with the source lines of Lua chunks
> (or "INTERNAL") that allocate and do not free some amount of bytes, when
> profiler finishes. The parser also reports the number of allocation and
> deallocation events related to each line.
> 
> Also, this patch adds a new --leak-only memory profiler parser option.
> When the parser runs with that option, it reports only leak
> information.
> 
> Resolves tarantool/tarantool#5812
> ---
> Changes in v2:
> * introduce new memprof's <process.lua> module to post-process parsed
>  events
> * add tests
> 
> ChangeLog entry (and postamble too Tarantool bump commit message):
                                 ^^^^^^^ I failed to parse, typo?
> 
> ===================================================================
> ##feature/luajit
> 
> * Now memory profiler parser reports heap difference occurring during
>  the measurement interval. New memory profiler's option `--leak-only`
>  to show only heap difference is introduced. New built-in module
    shows
>  `memprof.process` is introduced to perform memory events
>  post-processing and aggregation. Now to launch memory profiler
>  via Tarantool user should use the following command:
>  `tarantool -e 'require("memprof")(arg)' - --leak-only /tmp/memprof.bin`

> ===================================================================
> 
> Branch with tests and added the corresponding built-in:
> * https://github.com/tarantool/tarantool/tree/skaplun/gh-5812-memprof-memleaks-option
> LuaJIT branch:
> * https://github.com/tarantool/luajit/tree/skaplun/gh-5812-memprof-memleaks-option
> Issue: https://github.com/tarantool/tarantool/issues/5812
> 
> .../misclib-memprof-lapi.test.lua             | 21 +++++--
> tools/memprof.lua                             | 33 ++++++-----
> tools/memprof/humanize.lua                    | 43 +++++++++++++-
> tools/memprof/parse.lua                       | 20 +++++--
> tools/memprof/process.lua                     | 59 +++++++++++++++++++
> 5 files changed, 151 insertions(+), 25 deletions(-)
> create mode 100644 tools/memprof/process.lua
> 
> diff --git a/test/tarantool-tests/misclib-memprof-lapi.test.lua b/test/tarantool-tests/misclib-memprof-lapi.test.lua
> index cb63e1b8..9affc2fe 100644
> --- a/test/tarantool-tests/misclib-memprof-lapi.test.lua
> +++ b/test/tarantool-tests/misclib-memprof-lapi.test.lua
> @@ -1,7 +1,7 @@
> local tap = require("tap")
> 
> local test = tap.test("misc-memprof-lapi")
> -test:plan(9)
> +test:plan(13)
> 
> jit.off()
> jit.flush()
> @@ -10,6 +10,7 @@ local table_new = require "table.new"
> 
> local bufread = require "utils.bufread"
> local memprof = require "memprof.parse"
> +local process = require "memprof.process"
> local symtab = require "utils.symtab"
> 
> local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.memprofdata.tmp.bin")
> @@ -66,8 +67,12 @@ local function fill_ev_type(events, symbols, event_type)
>   return ev_type
> end
> 
> +local function form_source_line(line)
> +  return string.format("@%s:%d", arg[0], line)
> +end
> +
> local function check_alloc_report(alloc, line, function_line, nevents)
> -  assert(string.format("@%s:%d", arg[0], function_line) == alloc[line].name)
> +  assert(form_source_line(function_line) == alloc[line].name)
>   assert(alloc[line].num == nevents, ("got=%d, expected=%d"):format(
>     alloc[line].num,
>     nevents
> @@ -120,13 +125,21 @@ local free = fill_ev_type(events, symbols, "free")
> -- the number of allocations.
> -- 1 event - alocation of table by itself + 1 allocation
	       allocation
> -- of array part as far it is bigger than LJ_MAX_COLOSIZE (16).
> -test:ok(check_alloc_report(alloc, 20, 18, 2))
> +test:ok(check_alloc_report(alloc, 21, 19, 2))
> -- 100 strings allocations.
> -test:ok(check_alloc_report(alloc, 25, 18, 100))
> +test:ok(check_alloc_report(alloc, 26, 19, 100))
> 
> -- Collect all previous allocated objects.
> test:ok(free.INTERNAL.num == 102)
> 
> +local heap_diff = process.form_heap_diff(events, symbols)
> +local tab_alloc_source = heap_diff[form_source_line(21)]
> +local str_alloc_source = heap_diff[form_source_line(26)]
> +test:ok(tab_alloc_source.cnt_alloc == tab_alloc_source.cnt_free)
> +test:ok(tab_alloc_source.size_diff == 0)
> +test:ok(str_alloc_source.cnt_alloc == str_alloc_source.cnt_free)
> +test:ok(str_alloc_source.size_diff == 0)
> +
> -- Test for https://github.com/tarantool/tarantool/issues/5842.
> -- We are not interested in this report.
> misc.memprof.start("/dev/null")
> diff --git a/tools/memprof.lua b/tools/memprof.lua
> index 9f962085..c6c5f587 100644
> --- a/tools/memprof.lua
> +++ b/tools/memprof.lua
> @@ -12,6 +12,7 @@
> 
> local bufread = require "utils.bufread"
> local memprof = require "memprof.parse"
> +local process = require "memprof.process"
> local symtab = require "utils.symtab"
> local view = require "memprof.humanize"
> 
> @@ -33,10 +34,16 @@ luajit-parse-memprof [options] memprof.bin
> Supported options are:
> 
>   --help                            Show this help and exit
> +  --leak-only                       Report only leaks information
> ]]
>   os.exit(0)
> end
> 
> +local leak_only = false
> +opt_map["leak-only"] = function()
> +  leak_only = true
> +end
> +
> -- Print error and exit with error status.
> local function opterror(...)
>   stderr:write("luajit-parse-memprof.lua: ERROR: ", ...)
> @@ -94,26 +101,22 @@ local function dump(inputfile)
>   local reader = bufread.new(inputfile)
>   local symbols = symtab.parse(reader)
>   local events = memprof.parse(reader, symbols)
> -
> -  stdout:write("ALLOCATIONS", "\n")
> -  view.render(events.alloc, symbols)
> -  stdout:write("\n")
> -
> -  stdout:write("REALLOCATIONS", "\n")
> -  view.render(events.realloc, symbols)
> -  stdout:write("\n")
> -
> -  stdout:write("DEALLOCATIONS", "\n")
> -  view.render(events.free, symbols)
> -  stdout:write("\n")
> -
> +  if not leak_only then
> +    view.profile_info(events, symbols)
> +  end
> +  local heap_diff = process.form_heap_diff(events, symbols)
> +  view.leak_only(heap_diff)

The name of the function is confusing: you dump whole data if _not_ only
leaks, and then without alternative the leaks data. It sounds as ‘always’
but I propose to name it just ‘leaks’. Then the logic will be ‘dump all’
or ‘leaks’ only.

>   os.exit(0)
> end
> 
> +local function dump_wrapped(...)
> +  return dump(parseargs(...))
> +end
> +
> -- FIXME: this script should be application-independent.
> local args = {...}
> if #args == 1 and args[1] == "memprof" then
> -  return dump
> +  return dump_wrapped
> else
> -  dump(parseargs(args))
> +  dump_wrapped(args)
> end
> diff --git a/tools/memprof/humanize.lua b/tools/memprof/humanize.lua
> index 2d5814c6..6afd3ff1 100644
> --- a/tools/memprof/humanize.lua
> +++ b/tools/memprof/humanize.lua
> @@ -28,8 +28,8 @@ function M.render(events, symbols)
>     ))
> 
>     local prim_loc = {}
> -    for _, loc in pairs(event.primary) do
> -      table.insert(prim_loc, symtab.demangle(symbols, loc))
> +    for _, heap_chunk in pairs(event.primary) do
> +      table.insert(prim_loc, symtab.demangle(symbols, heap_chunk.loc))
>     end
>     if #prim_loc ~= 0 then
>       table.sort(prim_loc)
> @@ -42,4 +42,43 @@ function M.render(events, symbols)
>   end
> end
> 
> +function M.profile_info(events, symbols)
> +  print("ALLOCATIONS")
> +  M.render(events.alloc, symbols)
> +  print("")
> +
> +  print("REALLOCATIONS")
> +  M.render(events.realloc, symbols)
> +  print("")
> +
> +  print("DEALLOCATIONS")
> +  M.render(events.free, symbols)
> +  print("")
> +end
> +
> +function M.leak_only(heap_diff)
> +  local rest_heap = {}
> +  for line, info in pairs(heap_diff) do
> +    -- Report "INTERNAL" events inconsistencies for profiling
> +    -- with enabled jit.
> +    if info.size_diff > 0 then
> +      table.insert(rest_heap, {line = line, hold_bytes = info.size_diff})
> +    end
> +  end
> +
> +  table.sort(rest_heap, function(h1, h2)
> +    return h1.hold_bytes > h2.hold_bytes
> +  end)
> +
> +  print("HEAP SUMMARY:")
> +  for _, h in pairs(rest_heap) do
> +    print(string.format(
> +      "%s holds %d bytes: %d allocs, %d frees",
> +      h.line, h.hold_bytes, heap_diff[h.line].cnt_alloc,
> +      heap_diff[h.line].cnt_free
> +    ))
> +  end
> +  print("")
> +end
> +
> return M
> diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
> index 6dae22d5..df10a45f 100644
> --- a/tools/memprof/parse.lua
> +++ b/tools/memprof/parse.lua
> @@ -39,11 +39,23 @@ local function new_event(loc)
>   }
> end
> 
> -local function link_to_previous(heap_chunk, e)
> +local function link_to_previous(heap_chunk, e, nsize)
>   -- Memory at this chunk was allocated before we start tracking.
>   if heap_chunk then
>     -- Save Lua code location (line) by address (id).
> -    e.primary[heap_chunk[2]] = heap_chunk[3]
> +    if not e.primary[heap_chunk[2]] then
> +      e.primary[heap_chunk[2]] = {
> +        loc = heap_chunk[3],
> +        alloced = 0,
> +        freed = 0,
> +        cnt = 0,
> +      }
> +    end
> +    -- Save memory diff heap information.
> +    local location_data = e.primary[heap_chunk[2]]
> +    location_data.alloced = location_data.alloced + nsize
> +    location_data.freed = location_data.freed + heap_chunk[1]
> +    location_data.cnt = location_data.cnt + 1
>   end
> end
> 
> @@ -97,7 +109,7 @@ local function parse_realloc(reader, asource, events, heap)
>   e.free = e.free + osize
>   e.alloc = e.alloc + nsize
> 
> -  link_to_previous(heap[oaddr], e)
> +  link_to_previous(heap[oaddr], e, nsize)
> 
>   heap[oaddr] = nil
>   heap[naddr] = {nsize, id, loc}
> @@ -116,7 +128,7 @@ local function parse_free(reader, asource, events, heap)
>   e.num = e.num + 1
>   e.free = e.free + osize
> 
> -  link_to_previous(heap[oaddr], e)
> +  link_to_previous(heap[oaddr], e, 0)
> 
>   heap[oaddr] = nil
> end
> diff --git a/tools/memprof/process.lua b/tools/memprof/process.lua
> new file mode 100644
> index 00000000..94be187e
> --- /dev/null
> +++ b/tools/memprof/process.lua
> @@ -0,0 +1,59 @@
> +-- LuaJIT's memory profile post-processing module.
> +
> +local M = {}
> +
> +local symtab = require "utils.symtab"
> +
> +function M.form_heap_diff(events, symbols)
> +  -- Auto resurrects source event lines for counting/reporting.
> +  local heap = setmetatable({}, {__index = function(t, line)
> +    t[line] = {
> +      size_diff = 0,
> +      cnt_alloc = 0,
> +      cnt_free = 0,
> +    }
> +    return t[line]
> +  end})
> +
> +  for _, event in pairs(events.alloc) do
> +    if event.loc then
> +      local ev_line = symtab.demangle(symbols, event.loc)
> +
> +      if (event.alloc > 0) then
> +        heap[ev_line].size_diff = heap[ev_line].size_diff + event.alloc
> +        heap[ev_line].cnt_alloc = heap[ev_line].cnt_alloc + event.num
> +      end
> +    end
> +  end
> +
> +  -- Realloc and free events are pretty the same.
> +  -- We aren't interested in aggregated alloc/free sizes for
> +  -- the event, but only for new and old size values inside
> +  -- alloc-realloc-free chain. Assuming that we have
> +  -- no collisions between different object addresses.
> +  local function process_non_alloc_events(events_by_type)
> +    for _, event in pairs(events_by_type) do
> +      -- Realloc and free events always have "primary” key
                                               a
> +      -- that references table with rewrited memory
                          the          rewritten, although I’d use a different verb
                                       and put at the end: ‘memory changed’?
> +      -- (may be empty).
> +      for _, heap_chunk in pairs(event.primary) do
> +        local ev_line = symtab.demangle(symbols, heap_chunk.loc)
> +
> +        if (heap_chunk.alloced > 0) then
> +          heap[ev_line].size_diff = heap[ev_line].size_diff + heap_chunk.alloced
> +          heap[ev_line].cnt_alloc = heap[ev_line].cnt_alloc + heap_chunk.cnt
> +        end
> +
> +        if (heap_chunk.freed > 0) then
> +          heap[ev_line].size_diff = heap[ev_line].size_diff - heap_chunk.freed
> +          heap[ev_line].cnt_free = heap[ev_line].cnt_free + heap_chunk.cnt
> +        end
> +      end
> +    end
> +  end
> +  process_non_alloc_events(events.realloc)
> +  process_non_alloc_events(events.free)
> +  return heap
> +end
> +
> +return M
> -- 
> 2.31.0
> 



More information about the Tarantool-patches mailing list