[Tarantool-patches] [PATCH v2 luajit] tools: introduce --leak-only memprof parser option
Sergey Ostanevich
sergos at tarantool.org
Thu Apr 8 15:49:47 MSK 2021
Hi!
Just couple of nits, LGTM.
Sergos
> On 31 Mar 2021, at 20:29, Sergey Kaplun <skaplun at tarantool.org> wrote:
>
> This patch indtroduces new memprof parser module <process.lua> to
introduces
> post-process memory events.
>
> Memprof parser now adds postamble with the source lines of Lua chunks
> (or "INTERNAL") that allocate and do not free some amount of bytes, when
> profiler finishes. The parser also reports the number of allocation and
> deallocation events related to each line.
>
> Also, this patch adds a new --leak-only memory profiler parser option.
> When the parser runs with that option, it reports only leak
> information.
>
> Resolves tarantool/tarantool#5812
> ---
> Changes in v2:
> * introduce new memprof's <process.lua> module to post-process parsed
> events
> * add tests
>
> ChangeLog entry (and postamble too Tarantool bump commit message):
^^^^^^^ I failed to parse, typo?
>
> ===================================================================
> ##feature/luajit
>
> * Now memory profiler parser reports heap difference occurring during
> the measurement interval. New memory profiler's option `--leak-only`
> to show only heap difference is introduced. New built-in module
shows
> `memprof.process` is introduced to perform memory events
> post-processing and aggregation. Now to launch memory profiler
> via Tarantool user should use the following command:
> `tarantool -e 'require("memprof")(arg)' - --leak-only /tmp/memprof.bin`
> ===================================================================
>
> Branch with tests and added the corresponding built-in:
> * https://github.com/tarantool/tarantool/tree/skaplun/gh-5812-memprof-memleaks-option
> LuaJIT branch:
> * https://github.com/tarantool/luajit/tree/skaplun/gh-5812-memprof-memleaks-option
> Issue: https://github.com/tarantool/tarantool/issues/5812
>
> .../misclib-memprof-lapi.test.lua | 21 +++++--
> tools/memprof.lua | 33 ++++++-----
> tools/memprof/humanize.lua | 43 +++++++++++++-
> tools/memprof/parse.lua | 20 +++++--
> tools/memprof/process.lua | 59 +++++++++++++++++++
> 5 files changed, 151 insertions(+), 25 deletions(-)
> create mode 100644 tools/memprof/process.lua
>
> diff --git a/test/tarantool-tests/misclib-memprof-lapi.test.lua b/test/tarantool-tests/misclib-memprof-lapi.test.lua
> index cb63e1b8..9affc2fe 100644
> --- a/test/tarantool-tests/misclib-memprof-lapi.test.lua
> +++ b/test/tarantool-tests/misclib-memprof-lapi.test.lua
> @@ -1,7 +1,7 @@
> local tap = require("tap")
>
> local test = tap.test("misc-memprof-lapi")
> -test:plan(9)
> +test:plan(13)
>
> jit.off()
> jit.flush()
> @@ -10,6 +10,7 @@ local table_new = require "table.new"
>
> local bufread = require "utils.bufread"
> local memprof = require "memprof.parse"
> +local process = require "memprof.process"
> local symtab = require "utils.symtab"
>
> local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.memprofdata.tmp.bin")
> @@ -66,8 +67,12 @@ local function fill_ev_type(events, symbols, event_type)
> return ev_type
> end
>
> +local function form_source_line(line)
> + return string.format("@%s:%d", arg[0], line)
> +end
> +
> local function check_alloc_report(alloc, line, function_line, nevents)
> - assert(string.format("@%s:%d", arg[0], function_line) == alloc[line].name)
> + assert(form_source_line(function_line) == alloc[line].name)
> assert(alloc[line].num == nevents, ("got=%d, expected=%d"):format(
> alloc[line].num,
> nevents
> @@ -120,13 +125,21 @@ local free = fill_ev_type(events, symbols, "free")
> -- the number of allocations.
> -- 1 event - alocation of table by itself + 1 allocation
allocation
> -- of array part as far it is bigger than LJ_MAX_COLOSIZE (16).
> -test:ok(check_alloc_report(alloc, 20, 18, 2))
> +test:ok(check_alloc_report(alloc, 21, 19, 2))
> -- 100 strings allocations.
> -test:ok(check_alloc_report(alloc, 25, 18, 100))
> +test:ok(check_alloc_report(alloc, 26, 19, 100))
>
> -- Collect all previous allocated objects.
> test:ok(free.INTERNAL.num == 102)
>
> +local heap_diff = process.form_heap_diff(events, symbols)
> +local tab_alloc_source = heap_diff[form_source_line(21)]
> +local str_alloc_source = heap_diff[form_source_line(26)]
> +test:ok(tab_alloc_source.cnt_alloc == tab_alloc_source.cnt_free)
> +test:ok(tab_alloc_source.size_diff == 0)
> +test:ok(str_alloc_source.cnt_alloc == str_alloc_source.cnt_free)
> +test:ok(str_alloc_source.size_diff == 0)
> +
> -- Test for https://github.com/tarantool/tarantool/issues/5842.
> -- We are not interested in this report.
> misc.memprof.start("/dev/null")
> diff --git a/tools/memprof.lua b/tools/memprof.lua
> index 9f962085..c6c5f587 100644
> --- a/tools/memprof.lua
> +++ b/tools/memprof.lua
> @@ -12,6 +12,7 @@
>
> local bufread = require "utils.bufread"
> local memprof = require "memprof.parse"
> +local process = require "memprof.process"
> local symtab = require "utils.symtab"
> local view = require "memprof.humanize"
>
> @@ -33,10 +34,16 @@ luajit-parse-memprof [options] memprof.bin
> Supported options are:
>
> --help Show this help and exit
> + --leak-only Report only leaks information
> ]]
> os.exit(0)
> end
>
> +local leak_only = false
> +opt_map["leak-only"] = function()
> + leak_only = true
> +end
> +
> -- Print error and exit with error status.
> local function opterror(...)
> stderr:write("luajit-parse-memprof.lua: ERROR: ", ...)
> @@ -94,26 +101,22 @@ local function dump(inputfile)
> local reader = bufread.new(inputfile)
> local symbols = symtab.parse(reader)
> local events = memprof.parse(reader, symbols)
> -
> - stdout:write("ALLOCATIONS", "\n")
> - view.render(events.alloc, symbols)
> - stdout:write("\n")
> -
> - stdout:write("REALLOCATIONS", "\n")
> - view.render(events.realloc, symbols)
> - stdout:write("\n")
> -
> - stdout:write("DEALLOCATIONS", "\n")
> - view.render(events.free, symbols)
> - stdout:write("\n")
> -
> + if not leak_only then
> + view.profile_info(events, symbols)
> + end
> + local heap_diff = process.form_heap_diff(events, symbols)
> + view.leak_only(heap_diff)
The name of the function is confusing: you dump whole data if _not_ only
leaks, and then without alternative the leaks data. It sounds as ‘always’
but I propose to name it just ‘leaks’. Then the logic will be ‘dump all’
or ‘leaks’ only.
> os.exit(0)
> end
>
> +local function dump_wrapped(...)
> + return dump(parseargs(...))
> +end
> +
> -- FIXME: this script should be application-independent.
> local args = {...}
> if #args == 1 and args[1] == "memprof" then
> - return dump
> + return dump_wrapped
> else
> - dump(parseargs(args))
> + dump_wrapped(args)
> end
> diff --git a/tools/memprof/humanize.lua b/tools/memprof/humanize.lua
> index 2d5814c6..6afd3ff1 100644
> --- a/tools/memprof/humanize.lua
> +++ b/tools/memprof/humanize.lua
> @@ -28,8 +28,8 @@ function M.render(events, symbols)
> ))
>
> local prim_loc = {}
> - for _, loc in pairs(event.primary) do
> - table.insert(prim_loc, symtab.demangle(symbols, loc))
> + for _, heap_chunk in pairs(event.primary) do
> + table.insert(prim_loc, symtab.demangle(symbols, heap_chunk.loc))
> end
> if #prim_loc ~= 0 then
> table.sort(prim_loc)
> @@ -42,4 +42,43 @@ function M.render(events, symbols)
> end
> end
>
> +function M.profile_info(events, symbols)
> + print("ALLOCATIONS")
> + M.render(events.alloc, symbols)
> + print("")
> +
> + print("REALLOCATIONS")
> + M.render(events.realloc, symbols)
> + print("")
> +
> + print("DEALLOCATIONS")
> + M.render(events.free, symbols)
> + print("")
> +end
> +
> +function M.leak_only(heap_diff)
> + local rest_heap = {}
> + for line, info in pairs(heap_diff) do
> + -- Report "INTERNAL" events inconsistencies for profiling
> + -- with enabled jit.
> + if info.size_diff > 0 then
> + table.insert(rest_heap, {line = line, hold_bytes = info.size_diff})
> + end
> + end
> +
> + table.sort(rest_heap, function(h1, h2)
> + return h1.hold_bytes > h2.hold_bytes
> + end)
> +
> + print("HEAP SUMMARY:")
> + for _, h in pairs(rest_heap) do
> + print(string.format(
> + "%s holds %d bytes: %d allocs, %d frees",
> + h.line, h.hold_bytes, heap_diff[h.line].cnt_alloc,
> + heap_diff[h.line].cnt_free
> + ))
> + end
> + print("")
> +end
> +
> return M
> diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
> index 6dae22d5..df10a45f 100644
> --- a/tools/memprof/parse.lua
> +++ b/tools/memprof/parse.lua
> @@ -39,11 +39,23 @@ local function new_event(loc)
> }
> end
>
> -local function link_to_previous(heap_chunk, e)
> +local function link_to_previous(heap_chunk, e, nsize)
> -- Memory at this chunk was allocated before we start tracking.
> if heap_chunk then
> -- Save Lua code location (line) by address (id).
> - e.primary[heap_chunk[2]] = heap_chunk[3]
> + if not e.primary[heap_chunk[2]] then
> + e.primary[heap_chunk[2]] = {
> + loc = heap_chunk[3],
> + alloced = 0,
> + freed = 0,
> + cnt = 0,
> + }
> + end
> + -- Save memory diff heap information.
> + local location_data = e.primary[heap_chunk[2]]
> + location_data.alloced = location_data.alloced + nsize
> + location_data.freed = location_data.freed + heap_chunk[1]
> + location_data.cnt = location_data.cnt + 1
> end
> end
>
> @@ -97,7 +109,7 @@ local function parse_realloc(reader, asource, events, heap)
> e.free = e.free + osize
> e.alloc = e.alloc + nsize
>
> - link_to_previous(heap[oaddr], e)
> + link_to_previous(heap[oaddr], e, nsize)
>
> heap[oaddr] = nil
> heap[naddr] = {nsize, id, loc}
> @@ -116,7 +128,7 @@ local function parse_free(reader, asource, events, heap)
> e.num = e.num + 1
> e.free = e.free + osize
>
> - link_to_previous(heap[oaddr], e)
> + link_to_previous(heap[oaddr], e, 0)
>
> heap[oaddr] = nil
> end
> diff --git a/tools/memprof/process.lua b/tools/memprof/process.lua
> new file mode 100644
> index 00000000..94be187e
> --- /dev/null
> +++ b/tools/memprof/process.lua
> @@ -0,0 +1,59 @@
> +-- LuaJIT's memory profile post-processing module.
> +
> +local M = {}
> +
> +local symtab = require "utils.symtab"
> +
> +function M.form_heap_diff(events, symbols)
> + -- Auto resurrects source event lines for counting/reporting.
> + local heap = setmetatable({}, {__index = function(t, line)
> + t[line] = {
> + size_diff = 0,
> + cnt_alloc = 0,
> + cnt_free = 0,
> + }
> + return t[line]
> + end})
> +
> + for _, event in pairs(events.alloc) do
> + if event.loc then
> + local ev_line = symtab.demangle(symbols, event.loc)
> +
> + if (event.alloc > 0) then
> + heap[ev_line].size_diff = heap[ev_line].size_diff + event.alloc
> + heap[ev_line].cnt_alloc = heap[ev_line].cnt_alloc + event.num
> + end
> + end
> + end
> +
> + -- Realloc and free events are pretty the same.
> + -- We aren't interested in aggregated alloc/free sizes for
> + -- the event, but only for new and old size values inside
> + -- alloc-realloc-free chain. Assuming that we have
> + -- no collisions between different object addresses.
> + local function process_non_alloc_events(events_by_type)
> + for _, event in pairs(events_by_type) do
> + -- Realloc and free events always have "primary” key
a
> + -- that references table with rewrited memory
the rewritten, although I’d use a different verb
and put at the end: ‘memory changed’?
> + -- (may be empty).
> + for _, heap_chunk in pairs(event.primary) do
> + local ev_line = symtab.demangle(symbols, heap_chunk.loc)
> +
> + if (heap_chunk.alloced > 0) then
> + heap[ev_line].size_diff = heap[ev_line].size_diff + heap_chunk.alloced
> + heap[ev_line].cnt_alloc = heap[ev_line].cnt_alloc + heap_chunk.cnt
> + end
> +
> + if (heap_chunk.freed > 0) then
> + heap[ev_line].size_diff = heap[ev_line].size_diff - heap_chunk.freed
> + heap[ev_line].cnt_free = heap[ev_line].cnt_free + heap_chunk.cnt
> + end
> + end
> + end
> + end
> + process_non_alloc_events(events.realloc)
> + process_non_alloc_events(events.free)
> + return heap
> +end
> +
> +return M
> --
> 2.31.0
>
More information about the Tarantool-patches
mailing list