From: Maxim Kokryashkin via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: tarantool-patches@dev.tarantool.org, skaplun@tarantool.org, sergos@tarantool.org Subject: [Tarantool-patches] [PATCH luajit] sysprof: improve parser's memory footprint Date: Thu, 18 May 2023 14:49:27 +0300 [thread overview] Message-ID: <20230518114927.277888-1-m.kokryashkin@tarantool.org> (raw) This patch reduces sysprof's parser memory footprint, by avoiding reading all callchains before collapsing them. Instead of it, parser merges stacks immediately after reading them and stores counts in a lua table. Also, it fixes a bug in the AVL-tree implementation, which produced unnecessary inserts of values into nodes. --- Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-noticket-sysprof-parser-refactoring PR: https://github.com/tarantool/tarantool/pull/8670 NB: CI is red in LuaJIT repo because this patch requires changes in the tarantool repo, so please refer to CI runs in PR. tools/CMakeLists.txt | 2 - tools/sysprof.lua | 27 +------- tools/sysprof/collapse.lua | 124 ------------------------------------ tools/sysprof/parse.lua | 125 ++++++++++++++++++++++++++----------- tools/utils/avl.lua | 2 +- tools/utils/symtab.lua | 2 +- 6 files changed, 95 insertions(+), 187 deletions(-) delete mode 100755 tools/sysprof/collapse.lua diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index dd7ec6bd..3a919433 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -112,7 +112,6 @@ else() add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS luajit-parse-sysprof sysprof/parse.lua - sysprof/collapse.lua sysprof.lua utils/bufread.lua utils/symtab.lua @@ -121,7 +120,6 @@ else() install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua - ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof PERMISSIONS OWNER_READ OWNER_WRITE diff --git a/tools/sysprof.lua b/tools/sysprof.lua index 1afab195..be2a0565 100644 --- a/tools/sysprof.lua +++ b/tools/sysprof.lua @@ -1,13 +1,10 @@ local bufread = require "utils.bufread" local sysprof = require "sysprof.parse" local symtab = require "utils.symtab" -local misc = require "sysprof.collapse" local stdout, stderr = io.stdout, io.stderr local match, gmatch = string.match, string.gmatch -local split_by_vmstate = false - -- Program options. local opt_map = {} @@ -28,10 +25,6 @@ Supported options are: os.exit(0) end -function opt_map.split() - split_by_vmstate = true -end - -- Print error and exit with error status. local function opterror(...) stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...) @@ -85,28 +78,14 @@ local function parseargs(args) return args[args.argn] end -local function traverse_calltree(node, prefix) - if node.is_leaf then - print(prefix..' '..node.count) - end - - local sep_prefix = #prefix == 0 and prefix or prefix..';' - - for name,child in pairs(node.children) do - traverse_calltree(child, sep_prefix..name) - end -end - local function dump(inputfile) local reader = bufread.new(inputfile) - local symbols = symtab.parse(reader) - local events = sysprof.parse(reader, symbols) - local calltree = misc.collapse(events, symbols, split_by_vmstate) - - traverse_calltree(calltree, '') + for stack, count in pairs(events) do + print(stack, count) + end os.exit(0) end diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua deleted file mode 100755 index 3d83d5ea..00000000 --- a/tools/sysprof/collapse.lua +++ /dev/null @@ -1,124 +0,0 @@ -local parse = require "sysprof.parse" -local vmdef = require "jit.vmdef" -local symtab = require "utils.symtab" - -local VMST_NAMES = { - [parse.VMST.INTERP] = "VMST_INTERP", - [parse.VMST.LFUNC] = "VMST_LFUNC", - [parse.VMST.FFUNC] = "VMST_FFUNC", - [parse.VMST.CFUNC] = "VMST_CFUNC", - [parse.VMST.GC] = "VMST_GC", - [parse.VMST.EXIT] = "VMST_EXIT", - [parse.VMST.RECORD] = "VMST_RECORD", - [parse.VMST.OPT] = "VMST_OPT", - [parse.VMST.ASM] = "VMST_ASM", - [parse.VMST.TRACE] = "VMST_TRACE", -} - -local M = {} - -local function new_node(name, is_leaf) - return { - name = name, - count = 0, - is_leaf = is_leaf, - children = {} - } -end - --- insert new child into a node (or increase counter in existing one) -local function insert(name, node, is_leaf) - if node.children[name] == nil then - node.children[name] = new_node(name, is_leaf) - end - - local child = node.children[name] - child.count = child.count + 1 - - return child -end - -local function insert_lua_callchain(chain, lua, symbols) - local ins_cnt = 0 - for _,fr in pairs(lua.callchain) do - local name_lua - - ins_cnt = ins_cnt + 1 - if fr.type == parse.FRAME.FFUNC then - name_lua = vmdef.ffnames[fr.ffid] - else - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - line = fr.line, - gen = fr.gen - }) - if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and - lua.trace.line == fr.line then - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - traceno = lua.trace.traceno, - gen = fr.gen - }) - end - - if fr.type == parse.FRAME.CFUNC then - -- C function encountered, the next chunk - -- of frames is located on the C stack. - break - end - end - - table.insert(chain, 1, { name = name_lua }) - end - table.remove(lua.callchain, ins_cnt) -end - --- merge lua and host callchains into one callchain representing --- transfer of control -local function merge(event, symbols, sep_vmst) - local cc = {} - - for _,h_fr in pairs(event.host.callchain) do - local name_host = symtab.demangle(symbols, { - addr = h_fr.addr, - gen = h_fr.gen - }) - table.insert(cc, 1, { name = name_host }) - - if string.match(name_host, '^lua_cpcall') ~= nil then - -- Any C function is present on both the C and the Lua - -- stacks. It is more convenient to get its info from the - -- host stack, since it has information about child frames. - table.remove(event.lua.callchain, 1) - end - - if string.match(name_host, '^lua_p?call') ~= nil then - insert_lua_callchain(cc, event.lua, symbols) - end - - end - - if sep_vmst == true then - table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] }) - end - - return cc -end - --- Collapse all the events into call tree -function M.collapse(events, symbols, sep_vmst) - local root = new_node('root', false) - - for _,ev in pairs(events) do - local callchain = merge(ev, symbols, sep_vmst) - local curr_node = root - for i=#callchain,1,-1 do - curr_node = insert(callchain[i].name, curr_node, false) - end - insert('', curr_node, true) - end - - return root -end - -return M diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua index 5b52f104..3db36472 100755 --- a/tools/sysprof/parse.lua +++ b/tools/sysprof/parse.lua @@ -2,6 +2,7 @@ -- The format spec can be found in <src/lj_sysprof.h>. local symtab = require "utils.symtab" +local vmdef = require "jit.vmdef" local string_format = string.format @@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2 local M = {} -M.VMST = { +local VMST = { INTERP = 0, LFUNC = 1, FFUNC = 2, @@ -25,13 +26,14 @@ M.VMST = { } -M.FRAME = { +local FRAME = { LFUNC = 1, CFUNC = 2, FFUNC = 3, BOTTOM = 0x80 } + local STREAM_END = 0x80 local SYMTAB_LFUNC_EVENT = 10 local SYMTAB_CFUNC_EVENT = 11 @@ -54,42 +56,40 @@ local function new_event() } end -local function parse_lfunc(reader, event, symbols) +local function parse_lfunc(reader, symbols) local addr = reader:read_uleb128() local line = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr, line = line }) - loc.type = M.FRAME.LFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.LFUNC + return symtab.demangle(symbols, loc) end -local function parse_ffunc(reader, event, _) +local function parse_ffunc(reader, _) local ffid = reader:read_uleb128() - table.insert(event.lua.callchain, 1, { - type = M.FRAME.FFUNC, - ffid = ffid, - }) + return vmdef.ffnames[ffid] end -local function parse_cfunc(reader, event, symbols) +local function parse_cfunc(reader, symbols) local addr = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr }) - loc.type = M.FRAME.CFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.CFUNC + return symtab.demangle(symbols, loc) end local frame_parsers = { - [M.FRAME.LFUNC] = parse_lfunc, - [M.FRAME.FFUNC] = parse_ffunc, - [M.FRAME.CFUNC] = parse_cfunc + [FRAME.LFUNC] = parse_lfunc, + [FRAME.FFUNC] = parse_ffunc, + [FRAME.CFUNC] = parse_cfunc } local function parse_lua_callchain(reader, event, symbols) while true do local frame_header = reader:read_octet() - if frame_header == M.FRAME.BOTTOM then + if frame_header == FRAME.BOTTOM then break end - frame_parsers[frame_header](reader, event, symbols) + local name = frame_parsers[frame_header](reader, symbols) + table.insert(event.lua.callchain, 1, {name=name, type=frame_header}) end end @@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols) while addr ~= 0 do local loc = symtab.loc(symbols, { addr = addr }) - table.insert(event.host.callchain, 1, loc) + table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc)) addr = reader:read_uleb128() end end @@ -108,10 +108,20 @@ end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- local function parse_trace_callchain(reader, event, symbols) - event.lua.trace.traceno = reader:read_uleb128() - event.lua.trace.addr = reader:read_uleb128() - event.lua.trace.line = reader:read_uleb128() - event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen + local loc = { + traceno = reader:read_uleb128(), + addr = reader:read_uleb128(), + line = reader:read_uleb128() + } + local gen = symtab.loc(symbols, loc).gen + local name_lua = symtab.demangle(symbols, { + addr = loc.addr, + traceno = loc.traceno, + gen = gen + }) + event.lua.trace = loc + event.lua.trace.gen = gen + event.lua.trace.name = name_lua end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- @@ -143,18 +153,60 @@ local function parse_symtab(reader, symbols, vmstate) end local event_parsers = { - [M.VMST.INTERP] = parse_host_only, - [M.VMST.LFUNC] = parse_lua_host, - [M.VMST.FFUNC] = parse_lua_host, - [M.VMST.CFUNC] = parse_lua_host, - [M.VMST.GC] = parse_host_only, - [M.VMST.EXIT] = parse_host_only, - [M.VMST.RECORD] = parse_host_only, - [M.VMST.OPT] = parse_host_only, - [M.VMST.ASM] = parse_host_only, - [M.VMST.TRACE] = parse_trace, + [VMST.INTERP] = parse_host_only, + [VMST.LFUNC] = parse_lua_host, + [VMST.FFUNC] = parse_lua_host, + [VMST.CFUNC] = parse_lua_host, + [VMST.GC] = parse_host_only, + [VMST.EXIT] = parse_host_only, + [VMST.RECORD] = parse_host_only, + [VMST.OPT] = parse_host_only, + [VMST.ASM] = parse_host_only, + [VMST.TRACE] = parse_trace, } +local function insert_lua_callchain(chain, lua) + local ins_cnt = 0 + local name_lua + for _, fr in ipairs(lua.callchain) do + ins_cnt = ins_cnt + 1 + if fr.type == FRAME.CFUNC then + -- C function encountered, the next chunk + -- of frames is located on the C stack. + break + end + name_lua = fr.name + + if fr.type == FRAME.LFUNC and lua.trace.traceno ~= nil and + lua.trace.addr == fr.addr and lua.trace.line == fr.line then + name_lua = lua.trace.name + end + + table.insert(chain, name_lua) + end + table.remove(lua.callchain, ins_cnt) +end + +local function merge(event) + local cc = {} + + for _, name_host in ipairs(event.host.callchain) do + table.insert(cc, name_host) + if string.match(name_host, '^lua_cpcall') ~= nil then + -- Any C function is present on both the C and the Lua + -- stacks. It is more convenient to get its info from the + -- host stack, since it has information about child frames. + table.remove(event.lua.callchain) + end + + if string.match(name_host, '^lua_p?call') ~= nil then + insert_lua_callchain(cc, event.lua) + end + + end + return cc +end + local function parse_event(reader, events, symbols) local event = new_event() @@ -171,8 +223,10 @@ local function parse_event(reader, events, symbols) event.lua.vmstate = vmstate event_parsers[vmstate](reader, event, symbols) - - table.insert(events, event) + local cc = merge(event) + local cc_str = table.concat(cc, ';') .. ';' + local cur_cnt = events[cc_str] + events[cc_str] = (cur_cnt or 0) + 1 return true end @@ -203,4 +257,5 @@ function M.parse(reader, symbols) return events end + return M diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua index d5baa534..098f58ec 100644 --- a/tools/utils/avl.lua +++ b/tools/utils/avl.lua @@ -86,7 +86,7 @@ function M.insert(node, key, value) elseif key > node.key then node.right = M.insert(node.right, key, value) else - table.insert(node.value, value) + node.value = value end update_height(node) diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua index c26a9e8c..7f6c78f0 100644 --- a/tools/utils/symtab.lua +++ b/tools/utils/symtab.lua @@ -176,7 +176,7 @@ function M.demangle(symtab, loc) local key, value = avl.floor(symtab.cfunc, addr) if key then - return string_format("%s:%#x", value[gen].name, key) + return string_format("%s:%#x", value.name, key) end return string_format("CFUNC %#x", addr) -- 2.40.1
next reply other threads:[~2023-05-18 11:49 UTC|newest] Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-05-18 11:49 Maxim Kokryashkin via Tarantool-patches [this message] 2023-05-22 9:24 ` Sergey Kaplun via Tarantool-patches 2023-05-22 9:26 ` Sergey Kaplun via Tarantool-patches 2023-05-23 12:36 ` Sergey Kaplun via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230518114927.277888-1-m.kokryashkin@tarantool.org \ --to=tarantool-patches@dev.tarantool.org \ --cc=max.kokryashkin@gmail.com \ --cc=sergos@tarantool.org \ --cc=skaplun@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH luajit] sysprof: improve parser'\''s memory footprint' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox