Hi! Forgot to include my answers to questions.   > Also, it fixes a bug in the AVL-tree implementation, > which produced unnecessary inserts of values into nodes.   Q: Should it be any test for this? Also, may be this should be done in the separate commit (not patch?).   A: It is such a silly bug, that no, I don't believe it's needed. I've literlly changed the line from `table.insert(node.value, value)` to `node.value = value`. And we still have tests for the AVL tree itself, so we know for sure that this change doesn't break anything.   NB: CI is red in LuaJIT repo because this patch requires changes in the tarantool repo, so please refer to CI runs in PR.   -- Best regards, Maxim Kokryashkin     >Среда, 24 мая 2023, 23:35 +03:00 от Maxim Kokryashkin : >  >This patch reduces sysprof's parser memory footprint, >by avoiding reading all callchains before collapsing them. >Instead of it, parser merges stacks immediately after >reading them and stores counts in a lua table. > >Also, it fixes a bug in the AVL-tree implementation, >which produced unnecessary inserts of values into nodes. >--- > >Changes in v2: >- Fixed comments as per review by Sergey. > >Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-8700-sysprof-parser-refactoring >PR: https://github.com/tarantool/tarantool/pull/8703 > > tools/CMakeLists.txt | 2 - > tools/sysprof.lua | 27 +------- > tools/sysprof/collapse.lua | 124 ------------------------------------ > tools/sysprof/parse.lua | 127 +++++++++++++++++++++++++++---------- > tools/utils/avl.lua | 2 +- > tools/utils/symtab.lua | 2 +- > 6 files changed, 97 insertions(+), 187 deletions(-) > delete mode 100755 tools/sysprof/collapse.lua > >diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt >index dd7ec6bd..3a919433 100644 >--- a/tools/CMakeLists.txt >+++ b/tools/CMakeLists.txt >@@ -112,7 +112,6 @@ else() >   add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS >     luajit-parse-sysprof >     sysprof/parse.lua >- sysprof/collapse.lua >     sysprof.lua >     utils/bufread.lua >     utils/symtab.lua >@@ -121,7 +120,6 @@ else() >  >   install(FILES >       ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua >- ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua >     DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof >     PERMISSIONS >       OWNER_READ OWNER_WRITE >diff --git a/tools/sysprof.lua b/tools/sysprof.lua >index 1afab195..be2a0565 100644 >--- a/tools/sysprof.lua >+++ b/tools/sysprof.lua >@@ -1,13 +1,10 @@ > local bufread = require "utils.bufread" > local sysprof = require "sysprof.parse" > local symtab = require "utils.symtab" >-local misc = require "sysprof.collapse" >  > local stdout, stderr = io.stdout, io.stderr > local match, gmatch = string.match, string.gmatch >  >-local split_by_vmstate = false >- > -- Program options. > local opt_map = {} >  >@@ -28,10 +25,6 @@ Supported options are: >   os.exit(0) > end >  >-function opt_map.split() >- split_by_vmstate = true >-end >- > -- Print error and exit with error status. > local function opterror(...) >   stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...) >@@ -85,28 +78,14 @@ local function parseargs(args) >   return args[args.argn] > end >  >-local function traverse_calltree(node, prefix) >- if node.is_leaf then >- print(prefix..' '..node.count) >- end >- >- local sep_prefix = #prefix == 0 and prefix or prefix..';' >- >- for name,child in pairs(node.children) do >- traverse_calltree(child, sep_prefix..name) >- end >-end >- > local function dump(inputfile) >   local reader = bufread.new(inputfile) >- >   local symbols = symtab.parse(reader) >- >   local events = sysprof.parse(reader, symbols) >- local calltree = misc.collapse(events, symbols, split_by_vmstate) >- >- traverse_calltree(calltree, '') >  >+ for stack, count in pairs(events) do >+ print(stack, count) >+ end >   os.exit(0) > end >  >diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua >deleted file mode 100755 >index 3d83d5ea..00000000 >--- a/tools/sysprof/collapse.lua >+++ /dev/null >@@ -1,124 +0,0 @@ >-local parse = require "sysprof.parse" >-local vmdef = require "jit.vmdef" >-local symtab = require "utils.symtab" >- >-local VMST_NAMES = { >- [parse.VMST.INTERP] = "VMST_INTERP", >- [parse.VMST.LFUNC] = "VMST_LFUNC", >- [parse.VMST.FFUNC] = "VMST_FFUNC", >- [parse.VMST.CFUNC] = "VMST_CFUNC", >- [parse.VMST.GC] = "VMST_GC", >- [parse.VMST.EXIT] = "VMST_EXIT", >- [parse.VMST.RECORD] = "VMST_RECORD", >- [parse.VMST.OPT] = "VMST_OPT", >- [parse.VMST.ASM] = "VMST_ASM", >- [parse.VMST.TRACE] = "VMST_TRACE", >-} >- >-local M = {} >- >-local function new_node(name, is_leaf) >- return { >- name = name, >- count = 0, >- is_leaf = is_leaf, >- children = {} >- } >-end >- >--- insert new child into a node (or increase counter in existing one) >-local function insert(name, node, is_leaf) >- if node.children[name] == nil then >- node.children[name] = new_node(name, is_leaf) >- end >- >- local child = node.children[name] >- child.count = child.count + 1 >- >- return child >-end >- >-local function insert_lua_callchain(chain, lua, symbols) >- local ins_cnt = 0 >- for _,fr in pairs(lua.callchain) do >- local name_lua >- >- ins_cnt = ins_cnt + 1 >- if fr.type == parse.FRAME.FFUNC then >- name_lua = vmdef.ffnames[fr.ffid] >- else >- name_lua = symtab.demangle(symbols, { >- addr = fr.addr, >- line = fr.line, >- gen = fr.gen >- }) >- if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and >- lua.trace.line == fr.line then >- name_lua = symtab.demangle(symbols, { >- addr = fr.addr, >- traceno = lua.trace.traceno, >- gen = fr.gen >- }) >- end >- >- if fr.type == parse.FRAME.CFUNC then >- -- C function encountered, the next chunk >- -- of frames is located on the C stack. >- break >- end >- end >- >- table.insert(chain, 1, { name = name_lua }) >- end >- table.remove(lua.callchain, ins_cnt) >-end >- >--- merge lua and host callchains into one callchain representing >--- transfer of control >-local function merge(event, symbols, sep_vmst) >- local cc = {} >- >- for _,h_fr in pairs(event.host.callchain) do >- local name_host = symtab.demangle(symbols, { >- addr = h_fr.addr, >- gen = h_fr.gen >- }) >- table.insert(cc, 1, { name = name_host }) >- >- if string.match(name_host, '^lua_cpcall') ~= nil then >- -- Any C function is present on both the C and the Lua >- -- stacks. It is more convenient to get its info from the >- -- host stack, since it has information about child frames. >- table.remove(event.lua.callchain, 1) >- end >- >- if string.match(name_host, '^lua_p?call') ~= nil then >- insert_lua_callchain(cc, event.lua, symbols) >- end >- >- end >- >- if sep_vmst == true then >- table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] }) >- end >- >- return cc >-end >- >--- Collapse all the events into call tree >-function M.collapse(events, symbols, sep_vmst) >- local root = new_node('root', false) >- >- for _,ev in pairs(events) do >- local callchain = merge(ev, symbols, sep_vmst) >- local curr_node = root >- for i=#callchain,1,-1 do >- curr_node = insert(callchain[i].name, curr_node, false) >- end >- insert('', curr_node, true) >- end >- >- return root >-end >- >-return M >diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua >index 5b52f104..806297eb 100755 >--- a/tools/sysprof/parse.lua >+++ b/tools/sysprof/parse.lua >@@ -2,6 +2,7 @@ > -- The format spec can be found in . >  > local symtab = require "utils.symtab" >+local vmdef = require "jit.vmdef" >  > local string_format = string.format >  >@@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2 >  > local M = {} >  >-M.VMST = { >+local VMST = { >   INTERP = 0, >   LFUNC = 1, >   FFUNC = 2, >@@ -25,13 +26,14 @@ M.VMST = { > } >  >  >-M.FRAME = { >+local FRAME = { >   LFUNC = 1, >   CFUNC = 2, >   FFUNC = 3, >   BOTTOM = 0x80 > } >  >+ > local STREAM_END = 0x80 > local SYMTAB_LFUNC_EVENT = 10 > local SYMTAB_CFUNC_EVENT = 11 >@@ -54,42 +56,40 @@ local function new_event() >   } > end >  >-local function parse_lfunc(reader, event, symbols) >+local function parse_lfunc(reader, symbols) >   local addr = reader:read_uleb128() >   local line = reader:read_uleb128() >   local loc = symtab.loc(symbols, { addr = addr, line = line }) >- loc.type = M.FRAME.LFUNC >- table.insert(event.lua.callchain, 1, loc) >+ loc.type = FRAME.LFUNC >+ return symtab.demangle(symbols, loc) > end >  >-local function parse_ffunc(reader, event, _) >+local function parse_ffunc(reader, _) >   local ffid = reader:read_uleb128() >- table.insert(event.lua.callchain, 1, { >- type = M.FRAME.FFUNC, >- ffid = ffid, >- }) >+ return vmdef.ffnames[ffid] > end >  >-local function parse_cfunc(reader, event, symbols) >+local function parse_cfunc(reader, symbols) >   local addr = reader:read_uleb128() >   local loc = symtab.loc(symbols, { addr = addr }) >- loc.type = M.FRAME.CFUNC >- table.insert(event.lua.callchain, 1, loc) >+ loc.type = FRAME.CFUNC >+ return symtab.demangle(symbols, loc) > end >  > local frame_parsers = { >- [M.FRAME.LFUNC] = parse_lfunc, >- [M.FRAME.FFUNC] = parse_ffunc, >- [M.FRAME.CFUNC] = parse_cfunc >+ [FRAME.LFUNC] = parse_lfunc, >+ [FRAME.FFUNC] = parse_ffunc, >+ [FRAME.CFUNC] = parse_cfunc > } >  > local function parse_lua_callchain(reader, event, symbols) >   while true do >     local frame_header = reader:read_octet() >- if frame_header == M.FRAME.BOTTOM then >+ if frame_header == FRAME.BOTTOM then >       break >     end >- frame_parsers[frame_header](reader, event, symbols) >+ local name = frame_parsers[frame_header](reader, symbols) >+ table.insert(event.lua.callchain, 1, {name = name, type = frame_header}) >   end > end >  >@@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols) >  >   while addr ~= 0 do >     local loc = symtab.loc(symbols, { addr = addr }) >- table.insert(event.host.callchain, 1, loc) >+ table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc)) >     addr = reader:read_uleb128() >   end > end >@@ -108,10 +108,20 @@ end > --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- >  > local function parse_trace_callchain(reader, event, symbols) >- event.lua.trace.traceno = reader:read_uleb128() >- event.lua.trace.addr = reader:read_uleb128() >- event.lua.trace.line = reader:read_uleb128() >- event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen >+ local loc = {} >+ loc.traceno = reader:read_uleb128() >+ loc.addr = reader:read_uleb128() >+ loc.line = reader:read_uleb128() >+ >+ local gen = symtab.loc(symbols, loc).gen >+ local name_lua = symtab.demangle(symbols, { >+ addr = loc.addr, >+ traceno = loc.traceno, >+ gen = gen >+ }) >+ event.lua.trace = loc >+ event.lua.trace.gen = gen >+ event.lua.trace.name = name_lua > end >  > --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- >@@ -143,18 +153,63 @@ local function parse_symtab(reader, symbols, vmstate) > end >  > local event_parsers = { >- [M.VMST.INTERP] = parse_host_only, >- [M.VMST.LFUNC] = parse_lua_host, >- [M.VMST.FFUNC] = parse_lua_host, >- [M.VMST.CFUNC] = parse_lua_host, >- [M.VMST.GC] = parse_host_only, >- [M.VMST.EXIT] = parse_host_only, >- [M.VMST.RECORD] = parse_host_only, >- [M.VMST.OPT] = parse_host_only, >- [M.VMST.ASM] = parse_host_only, >- [M.VMST.TRACE] = parse_trace, >+ [VMST.INTERP] = parse_host_only, >+ [VMST.LFUNC] = parse_lua_host, >+ [VMST.FFUNC] = parse_lua_host, >+ [VMST.CFUNC] = parse_lua_host, >+ [VMST.GC] = parse_host_only, >+ [VMST.EXIT] = parse_host_only, >+ [VMST.RECORD] = parse_host_only, >+ [VMST.OPT] = parse_host_only, >+ [VMST.ASM] = parse_host_only, >+ [VMST.TRACE] = parse_trace, > } >  >+local function insert_lua_callchain(chain, lua) >+ local ins_cnt = 0 >+ local name_lua >+ for _, fr in ipairs(lua.callchain) do >+ ins_cnt = ins_cnt + 1 >+ if fr.type == FRAME.CFUNC then >+ -- C function encountered, the next chunk >+ -- of frames is located on the C stack. >+ break >+ end >+ name_lua = fr.name >+ >+ if fr.type == FRAME.LFUNC >+ and lua.trace.traceno ~= nil >+ and lua.trace.addr == fr.addr >+ and lua.trace.line == fr.line >+ then >+ name_lua = lua.trace.name >+ end >+ >+ table.insert(chain, name_lua) >+ end >+ table.remove(lua.callchain, ins_cnt) >+end >+ >+local function merge(event) >+ local callchain = {} >+ >+ for _, name_host in ipairs(event.host.callchain) do >+ table.insert(callchain, name_host) >+ if string.match(name_host, '^lua_cpcall') ~= nil then >+ -- Any C function is present on both the C and the Lua >+ -- stacks. It is more convenient to get its info from the >+ -- host stack, since it has information about child frames. >+ table.remove(event.lua.callchain) >+ end >+ >+ if string.match(name_host, '^lua_p?call') ~= nil then >+ insert_lua_callchain(callchain, event.lua) >+ end >+ >+ end >+ return table.concat(callchain, ';') .. ';' >+end >+ > local function parse_event(reader, events, symbols) >   local event = new_event() >  >@@ -171,8 +226,9 @@ local function parse_event(reader, events, symbols) >   event.lua.vmstate = vmstate >  >   event_parsers[vmstate](reader, event, symbols) >- >- table.insert(events, event) >+ local callchain = merge(event) >+ local cur_cnt = events[callchain] >+ events[callchain] = (cur_cnt or 0) + 1 >   return true > end >  >@@ -203,4 +259,5 @@ function M.parse(reader, symbols) >   return events > end >  >+ > return M >diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua >index d5baa534..098f58ec 100644 >--- a/tools/utils/avl.lua >+++ b/tools/utils/avl.lua >@@ -86,7 +86,7 @@ function M.insert(node, key, value) >   elseif key > node.key then >     node.right = M.insert(node.right, key, value) >   else >- table.insert(node.value, value) >+ node.value = value >   end >  >   update_height(node) >diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua >index c26a9e8c..7f6c78f0 100644 >--- a/tools/utils/symtab.lua >+++ b/tools/utils/symtab.lua >@@ -176,7 +176,7 @@ function M.demangle(symtab, loc) >   local key, value = avl.floor(symtab.cfunc, addr) >  >   if key then >- return string_format("%s:%#x", value[gen].name, key) >+ return string_format("%s:%#x", value.name, key) >   end >  >   return string_format("CFUNC %#x", addr) >-- >2.40.1