<HTML><BODY><div>Hi!</div><div>Forgot to include my answers to questions.</div><div> </div><div><div><div>> Also, it fixes a bug in the AVL-tree implementation,</div><div>> which produced unnecessary inserts of values into nodes.</div></div><div> </div><div><div>Q: Should it be any test for this?</div><div>Also, may be this should be done in the separate commit (not patch?).</div></div><div> </div><div><div>A: It is such a silly bug, that no, I don't believe it's needed. I've literlly</div><div>changed the line from `table.insert(node.value, value)` to</div><div>`node.value = value`. And we still have tests for the AVL tree itself, so</div><div>we know for sure that this change doesn't break anything.</div></div><div> </div><div><div>NB: CI is red in LuaJIT repo because this patch requires changes in the</div><div>tarantool repo, so please refer to CI runs in PR.</div></div></div><div> </div><div data-signature-widget="container"><div data-signature-widget="content"><div>--<br>Best regards,</div><div>Maxim Kokryashkin</div></div></div><div> </div><div> </div><blockquote style="border-left:1px solid #0857A6; margin:10px; padding:0 0 0 10px;">Среда, 24 мая 2023, 23:35 +03:00 от Maxim Kokryashkin <max.kokryashkin@gmail.com>:<br> <div id=""><div class="js-helper js-readmsg-msg"><div><div id="style_16849605191397823776_BODY">This patch reduces sysprof's parser memory footprint,<br>by avoiding reading all callchains before collapsing them.<br>Instead of it, parser merges stacks immediately after<br>reading them and stores counts in a lua table.<br><br>Also, it fixes a bug in the AVL-tree implementation,<br>which produced unnecessary inserts of values into nodes.<br>---<br><br>Changes in v2:<br>- Fixed comments as per review by Sergey.<br><br>Branch: <a href="https://github.com/tarantool/luajit/tree/fckxorg/gh-8700-sysprof-parser-refactoring" target="_blank">https://github.com/tarantool/luajit/tree/fckxorg/gh-8700-sysprof-parser-refactoring</a><br>PR: <a href="https://github.com/tarantool/tarantool/pull/8703" target="_blank">https://github.com/tarantool/tarantool/pull/8703</a><br><br> tools/CMakeLists.txt | 2 -<br> tools/sysprof.lua | 27 +-------<br> tools/sysprof/collapse.lua | 124 ------------------------------------<br> tools/sysprof/parse.lua | 127 +++++++++++++++++++++++++++----------<br> tools/utils/avl.lua | 2 +-<br> tools/utils/symtab.lua | 2 +-<br> 6 files changed, 97 insertions(+), 187 deletions(-)<br> delete mode 100755 tools/sysprof/collapse.lua<br><br>diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt<br>index dd7ec6bd..3a919433 100644<br>--- a/tools/CMakeLists.txt<br>+++ b/tools/CMakeLists.txt<br>@@ -112,7 +112,6 @@ else()<br> add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS<br> luajit-parse-sysprof<br> sysprof/parse.lua<br>- sysprof/collapse.lua<br> sysprof.lua<br> utils/bufread.lua<br> utils/symtab.lua<br>@@ -121,7 +120,6 @@ else()<br> <br> install(FILES<br> ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua<br>- ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua<br> DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof<br> PERMISSIONS<br> OWNER_READ OWNER_WRITE<br>diff --git a/tools/sysprof.lua b/tools/sysprof.lua<br>index 1afab195..be2a0565 100644<br>--- a/tools/sysprof.lua<br>+++ b/tools/sysprof.lua<br>@@ -1,13 +1,10 @@<br> local bufread = require "utils.bufread"<br> local sysprof = require "sysprof.parse"<br> local symtab = require "utils.symtab"<br>-local misc = require "sysprof.collapse"<br> <br> local stdout, stderr = io.stdout, io.stderr<br> local match, gmatch = string.match, string.gmatch<br> <br>-local split_by_vmstate = false<br>-<br> -- Program options.<br> local opt_map = {}<br> <br>@@ -28,10 +25,6 @@ Supported options are:<br> os.exit(0)<br> end<br> <br>-function opt_map.split()<br>- split_by_vmstate = true<br>-end<br>-<br> -- Print error and exit with error status.<br> local function opterror(...)<br> stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...)<br>@@ -85,28 +78,14 @@ local function parseargs(args)<br> return args[args.argn]<br> end<br> <br>-local function traverse_calltree(node, prefix)<br>- if node.is_leaf then<br>- print(prefix..' '..node.count)<br>- end<br>-<br>- local sep_prefix = #prefix == 0 and prefix or prefix..';'<br>-<br>- for name,child in pairs(node.children) do<br>- traverse_calltree(child, sep_prefix..name)<br>- end<br>-end<br>-<br> local function dump(inputfile)<br> local reader = bufread.new(inputfile)<br>-<br> local symbols = symtab.parse(reader)<br>-<br> local events = sysprof.parse(reader, symbols)<br>- local calltree = misc.collapse(events, symbols, split_by_vmstate)<br>-<br>- traverse_calltree(calltree, '')<br> <br>+ for stack, count in pairs(events) do<br>+ print(stack, count)<br>+ end<br> os.exit(0)<br> end<br> <br>diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua<br>deleted file mode 100755<br>index 3d83d5ea..00000000<br>--- a/tools/sysprof/collapse.lua<br>+++ /dev/null<br>@@ -1,124 +0,0 @@<br>-local parse = require "sysprof.parse"<br>-local vmdef = require "jit.vmdef"<br>-local symtab = require "utils.symtab"<br>-<br>-local VMST_NAMES = {<br>- [parse.VMST.INTERP] = "VMST_INTERP",<br>- [parse.VMST.LFUNC] = "VMST_LFUNC",<br>- [parse.VMST.FFUNC] = "VMST_FFUNC",<br>- [parse.VMST.CFUNC] = "VMST_CFUNC",<br>- [parse.VMST.GC] = "VMST_GC",<br>- [parse.VMST.EXIT] = "VMST_EXIT",<br>- [parse.VMST.RECORD] = "VMST_RECORD",<br>- [parse.VMST.OPT] = "VMST_OPT",<br>- [parse.VMST.ASM] = "VMST_ASM",<br>- [parse.VMST.TRACE] = "VMST_TRACE",<br>-}<br>-<br>-local M = {}<br>-<br>-local function new_node(name, is_leaf)<br>- return {<br>- name = name,<br>- count = 0,<br>- is_leaf = is_leaf,<br>- children = {}<br>- }<br>-end<br>-<br>--- insert new child into a node (or increase counter in existing one)<br>-local function insert(name, node, is_leaf)<br>- if node.children[name] == nil then<br>- node.children[name] = new_node(name, is_leaf)<br>- end<br>-<br>- local child = node.children[name]<br>- child.count = child.count + 1<br>-<br>- return child<br>-end<br>-<br>-local function insert_lua_callchain(chain, lua, symbols)<br>- local ins_cnt = 0<br>- for _,fr in pairs(lua.callchain) do<br>- local name_lua<br>-<br>- ins_cnt = ins_cnt + 1<br>- if fr.type == parse.FRAME.FFUNC then<br>- name_lua = vmdef.ffnames[fr.ffid]<br>- else<br>- name_lua = symtab.demangle(symbols, {<br>- addr = fr.addr,<br>- line = fr.line,<br>- gen = fr.gen<br>- })<br>- if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and<br>- lua.trace.line == fr.line then<br>- name_lua = symtab.demangle(symbols, {<br>- addr = fr.addr,<br>- traceno = lua.trace.traceno,<br>- gen = fr.gen<br>- })<br>- end<br>-<br>- if fr.type == parse.FRAME.CFUNC then<br>- -- C function encountered, the next chunk<br>- -- of frames is located on the C stack.<br>- break<br>- end<br>- end<br>-<br>- table.insert(chain, 1, { name = name_lua })<br>- end<br>- table.remove(lua.callchain, ins_cnt)<br>-end<br>-<br>--- merge lua and host callchains into one callchain representing<br>--- transfer of control<br>-local function merge(event, symbols, sep_vmst)<br>- local cc = {}<br>-<br>- for _,h_fr in pairs(event.host.callchain) do<br>- local name_host = symtab.demangle(symbols, {<br>- addr = h_fr.addr,<br>- gen = h_fr.gen<br>- })<br>- table.insert(cc, 1, { name = name_host })<br>-<br>- if string.match(name_host, '^lua_cpcall') ~= nil then<br>- -- Any C function is present on both the C and the Lua<br>- -- stacks. It is more convenient to get its info from the<br>- -- host stack, since it has information about child frames.<br>- table.remove(event.lua.callchain, 1)<br>- end<br>-<br>- if string.match(name_host, '^lua_p?call') ~= nil then<br>- insert_lua_callchain(cc, event.lua, symbols)<br>- end<br>-<br>- end<br>-<br>- if sep_vmst == true then<br>- table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] })<br>- end<br>-<br>- return cc<br>-end<br>-<br>--- Collapse all the events into call tree<br>-function M.collapse(events, symbols, sep_vmst)<br>- local root = new_node('root', false)<br>-<br>- for _,ev in pairs(events) do<br>- local callchain = merge(ev, symbols, sep_vmst)<br>- local curr_node = root<br>- for i=#callchain,1,-1 do<br>- curr_node = insert(callchain[i].name, curr_node, false)<br>- end<br>- insert('', curr_node, true)<br>- end<br>-<br>- return root<br>-end<br>-<br>-return M<br>diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua<br>index 5b52f104..806297eb 100755<br>--- a/tools/sysprof/parse.lua<br>+++ b/tools/sysprof/parse.lua<br>@@ -2,6 +2,7 @@<br> -- The format spec can be found in <src/lj_sysprof.h>.<br> <br> local symtab = require "utils.symtab"<br>+local vmdef = require "jit.vmdef"<br> <br> local string_format = string.format<br> <br>@@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2<br> <br> local M = {}<br> <br>-M.VMST = {<br>+local VMST = {<br> INTERP = 0,<br> LFUNC = 1,<br> FFUNC = 2,<br>@@ -25,13 +26,14 @@ M.VMST = {<br> }<br> <br> <br>-M.FRAME = {<br>+local FRAME = {<br> LFUNC = 1,<br> CFUNC = 2,<br> FFUNC = 3,<br> BOTTOM = 0x80<br> }<br> <br>+<br> local STREAM_END = 0x80<br> local SYMTAB_LFUNC_EVENT = 10<br> local SYMTAB_CFUNC_EVENT = 11<br>@@ -54,42 +56,40 @@ local function new_event()<br> }<br> end<br> <br>-local function parse_lfunc(reader, event, symbols)<br>+local function parse_lfunc(reader, symbols)<br> local addr = reader:read_uleb128()<br> local line = reader:read_uleb128()<br> local loc = symtab.loc(symbols, { addr = addr, line = line })<br>- loc.type = M.FRAME.LFUNC<br>- table.insert(event.lua.callchain, 1, loc)<br>+ loc.type = FRAME.LFUNC<br>+ return symtab.demangle(symbols, loc)<br> end<br> <br>-local function parse_ffunc(reader, event, _)<br>+local function parse_ffunc(reader, _)<br> local ffid = reader:read_uleb128()<br>- table.insert(event.lua.callchain, 1, {<br>- type = M.FRAME.FFUNC,<br>- ffid = ffid,<br>- })<br>+ return vmdef.ffnames[ffid]<br> end<br> <br>-local function parse_cfunc(reader, event, symbols)<br>+local function parse_cfunc(reader, symbols)<br> local addr = reader:read_uleb128()<br> local loc = symtab.loc(symbols, { addr = addr })<br>- loc.type = M.FRAME.CFUNC<br>- table.insert(event.lua.callchain, 1, loc)<br>+ loc.type = FRAME.CFUNC<br>+ return symtab.demangle(symbols, loc)<br> end<br> <br> local frame_parsers = {<br>- [M.FRAME.LFUNC] = parse_lfunc,<br>- [M.FRAME.FFUNC] = parse_ffunc,<br>- [M.FRAME.CFUNC] = parse_cfunc<br>+ [FRAME.LFUNC] = parse_lfunc,<br>+ [FRAME.FFUNC] = parse_ffunc,<br>+ [FRAME.CFUNC] = parse_cfunc<br> }<br> <br> local function parse_lua_callchain(reader, event, symbols)<br> while true do<br> local frame_header = reader:read_octet()<br>- if frame_header == M.FRAME.BOTTOM then<br>+ if frame_header == FRAME.BOTTOM then<br> break<br> end<br>- frame_parsers[frame_header](reader, event, symbols)<br>+ local name = frame_parsers[frame_header](reader, symbols)<br>+ table.insert(event.lua.callchain, 1, {name = name, type = frame_header})<br> end<br> end<br> <br>@@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols)<br> <br> while addr ~= 0 do<br> local loc = symtab.loc(symbols, { addr = addr })<br>- table.insert(event.host.callchain, 1, loc)<br>+ table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc))<br> addr = reader:read_uleb128()<br> end<br> end<br>@@ -108,10 +108,20 @@ end<br> --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--<br> <br> local function parse_trace_callchain(reader, event, symbols)<br>- event.lua.trace.traceno = reader:read_uleb128()<br>- event.lua.trace.addr = reader:read_uleb128()<br>- event.lua.trace.line = reader:read_uleb128()<br>- event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen<br>+ local loc = {}<br>+ loc.traceno = reader:read_uleb128()<br>+ loc.addr = reader:read_uleb128()<br>+ loc.line = reader:read_uleb128()<br>+<br>+ local gen = symtab.loc(symbols, loc).gen<br>+ local name_lua = symtab.demangle(symbols, {<br>+ addr = loc.addr,<br>+ traceno = loc.traceno,<br>+ gen = gen<br>+ })<br>+ event.lua.trace = loc<br>+ event.lua.trace.gen = gen<br>+ event.lua.trace.name = name_lua<br> end<br> <br> --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--<br>@@ -143,18 +153,63 @@ local function parse_symtab(reader, symbols, vmstate)<br> end<br> <br> local event_parsers = {<br>- [M.VMST.INTERP] = parse_host_only,<br>- [M.VMST.LFUNC] = parse_lua_host,<br>- [M.VMST.FFUNC] = parse_lua_host,<br>- [M.VMST.CFUNC] = parse_lua_host,<br>- [M.VMST.GC] = parse_host_only,<br>- [M.VMST.EXIT] = parse_host_only,<br>- [M.VMST.RECORD] = parse_host_only,<br>- [M.VMST.OPT] = parse_host_only,<br>- [M.VMST.ASM] = parse_host_only,<br>- [M.VMST.TRACE] = parse_trace,<br>+ [VMST.INTERP] = parse_host_only,<br>+ [VMST.LFUNC] = parse_lua_host,<br>+ [VMST.FFUNC] = parse_lua_host,<br>+ [VMST.CFUNC] = parse_lua_host,<br>+ [VMST.GC] = parse_host_only,<br>+ [VMST.EXIT] = parse_host_only,<br>+ [VMST.RECORD] = parse_host_only,<br>+ [VMST.OPT] = parse_host_only,<br>+ [VMST.ASM] = parse_host_only,<br>+ [VMST.TRACE] = parse_trace,<br> }<br> <br>+local function insert_lua_callchain(chain, lua)<br>+ local ins_cnt = 0<br>+ local name_lua<br>+ for _, fr in ipairs(lua.callchain) do<br>+ ins_cnt = ins_cnt + 1<br>+ if fr.type == FRAME.CFUNC then<br>+ -- C function encountered, the next chunk<br>+ -- of frames is located on the C stack.<br>+ break<br>+ end<br>+ name_lua = fr.name<br>+<br>+ if fr.type == FRAME.LFUNC<br>+ and lua.trace.traceno ~= nil<br>+ and lua.trace.addr == fr.addr<br>+ and lua.trace.line == fr.line<br>+ then<br>+ name_lua = lua.trace.name<br>+ end<br>+<br>+ table.insert(chain, name_lua)<br>+ end<br>+ table.remove(lua.callchain, ins_cnt)<br>+end<br>+<br>+local function merge(event)<br>+ local callchain = {}<br>+<br>+ for _, name_host in ipairs(event.host.callchain) do<br>+ table.insert(callchain, name_host)<br>+ if string.match(name_host, '^lua_cpcall') ~= nil then<br>+ -- Any C function is present on both the C and the Lua<br>+ -- stacks. It is more convenient to get its info from the<br>+ -- host stack, since it has information about child frames.<br>+ table.remove(event.lua.callchain)<br>+ end<br>+<br>+ if string.match(name_host, '^lua_p?call') ~= nil then<br>+ insert_lua_callchain(callchain, event.lua)<br>+ end<br>+<br>+ end<br>+ return table.concat(callchain, ';') .. ';'<br>+end<br>+<br> local function parse_event(reader, events, symbols)<br> local event = new_event()<br> <br>@@ -171,8 +226,9 @@ local function parse_event(reader, events, symbols)<br> event.lua.vmstate = vmstate<br> <br> event_parsers[vmstate](reader, event, symbols)<br>-<br>- table.insert(events, event)<br>+ local callchain = merge(event)<br>+ local cur_cnt = events[callchain]<br>+ events[callchain] = (cur_cnt or 0) + 1<br> return true<br> end<br> <br>@@ -203,4 +259,5 @@ function M.parse(reader, symbols)<br> return events<br> end<br> <br>+<br> return M<br>diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua<br>index d5baa534..098f58ec 100644<br>--- a/tools/utils/avl.lua<br>+++ b/tools/utils/avl.lua<br>@@ -86,7 +86,7 @@ function M.insert(node, key, value)<br> elseif key > node.key then<br> node.right = M.insert(node.right, key, value)<br> else<br>- table.insert(node.value, value)<br>+ node.value = value<br> end<br> <br> update_height(node)<br>diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua<br>index c26a9e8c..7f6c78f0 100644<br>--- a/tools/utils/symtab.lua<br>+++ b/tools/utils/symtab.lua<br>@@ -176,7 +176,7 @@ function M.demangle(symtab, loc)<br> local key, value = avl.floor(symtab.cfunc, addr)<br> <br> if key then<br>- return string_format("%s:%#x", value[gen].name, key)<br>+ return string_format("%s:%#x", value.name, key)<br> end<br> <br> return string_format("CFUNC %#x", addr)<br>--<br>2.40.1</div></div></div></div></blockquote><div> </div></BODY></HTML>