* [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint @ 2023-05-24 20:35 Maxim Kokryashkin via Tarantool-patches 2023-05-24 20:36 ` Maxim Kokryashkin via Tarantool-patches 2023-07-18 8:33 ` Igor Munkin via Tarantool-patches 0 siblings, 2 replies; 4+ messages in thread From: Maxim Kokryashkin via Tarantool-patches @ 2023-05-24 20:35 UTC (permalink / raw) To: tarantool-patches, skaplun, sergos This patch reduces sysprof's parser memory footprint, by avoiding reading all callchains before collapsing them. Instead of it, parser merges stacks immediately after reading them and stores counts in a lua table. Also, it fixes a bug in the AVL-tree implementation, which produced unnecessary inserts of values into nodes. --- Changes in v2: - Fixed comments as per review by Sergey. Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-8700-sysprof-parser-refactoring PR: https://github.com/tarantool/tarantool/pull/8703 tools/CMakeLists.txt | 2 - tools/sysprof.lua | 27 +------- tools/sysprof/collapse.lua | 124 ------------------------------------ tools/sysprof/parse.lua | 127 +++++++++++++++++++++++++++---------- tools/utils/avl.lua | 2 +- tools/utils/symtab.lua | 2 +- 6 files changed, 97 insertions(+), 187 deletions(-) delete mode 100755 tools/sysprof/collapse.lua diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index dd7ec6bd..3a919433 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -112,7 +112,6 @@ else() add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS luajit-parse-sysprof sysprof/parse.lua - sysprof/collapse.lua sysprof.lua utils/bufread.lua utils/symtab.lua @@ -121,7 +120,6 @@ else() install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua - ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof PERMISSIONS OWNER_READ OWNER_WRITE diff --git a/tools/sysprof.lua b/tools/sysprof.lua index 1afab195..be2a0565 100644 --- a/tools/sysprof.lua +++ b/tools/sysprof.lua @@ -1,13 +1,10 @@ local bufread = require "utils.bufread" local sysprof = require "sysprof.parse" local symtab = require "utils.symtab" -local misc = require "sysprof.collapse" local stdout, stderr = io.stdout, io.stderr local match, gmatch = string.match, string.gmatch -local split_by_vmstate = false - -- Program options. local opt_map = {} @@ -28,10 +25,6 @@ Supported options are: os.exit(0) end -function opt_map.split() - split_by_vmstate = true -end - -- Print error and exit with error status. local function opterror(...) stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...) @@ -85,28 +78,14 @@ local function parseargs(args) return args[args.argn] end -local function traverse_calltree(node, prefix) - if node.is_leaf then - print(prefix..' '..node.count) - end - - local sep_prefix = #prefix == 0 and prefix or prefix..';' - - for name,child in pairs(node.children) do - traverse_calltree(child, sep_prefix..name) - end -end - local function dump(inputfile) local reader = bufread.new(inputfile) - local symbols = symtab.parse(reader) - local events = sysprof.parse(reader, symbols) - local calltree = misc.collapse(events, symbols, split_by_vmstate) - - traverse_calltree(calltree, '') + for stack, count in pairs(events) do + print(stack, count) + end os.exit(0) end diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua deleted file mode 100755 index 3d83d5ea..00000000 --- a/tools/sysprof/collapse.lua +++ /dev/null @@ -1,124 +0,0 @@ -local parse = require "sysprof.parse" -local vmdef = require "jit.vmdef" -local symtab = require "utils.symtab" - -local VMST_NAMES = { - [parse.VMST.INTERP] = "VMST_INTERP", - [parse.VMST.LFUNC] = "VMST_LFUNC", - [parse.VMST.FFUNC] = "VMST_FFUNC", - [parse.VMST.CFUNC] = "VMST_CFUNC", - [parse.VMST.GC] = "VMST_GC", - [parse.VMST.EXIT] = "VMST_EXIT", - [parse.VMST.RECORD] = "VMST_RECORD", - [parse.VMST.OPT] = "VMST_OPT", - [parse.VMST.ASM] = "VMST_ASM", - [parse.VMST.TRACE] = "VMST_TRACE", -} - -local M = {} - -local function new_node(name, is_leaf) - return { - name = name, - count = 0, - is_leaf = is_leaf, - children = {} - } -end - --- insert new child into a node (or increase counter in existing one) -local function insert(name, node, is_leaf) - if node.children[name] == nil then - node.children[name] = new_node(name, is_leaf) - end - - local child = node.children[name] - child.count = child.count + 1 - - return child -end - -local function insert_lua_callchain(chain, lua, symbols) - local ins_cnt = 0 - for _,fr in pairs(lua.callchain) do - local name_lua - - ins_cnt = ins_cnt + 1 - if fr.type == parse.FRAME.FFUNC then - name_lua = vmdef.ffnames[fr.ffid] - else - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - line = fr.line, - gen = fr.gen - }) - if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and - lua.trace.line == fr.line then - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - traceno = lua.trace.traceno, - gen = fr.gen - }) - end - - if fr.type == parse.FRAME.CFUNC then - -- C function encountered, the next chunk - -- of frames is located on the C stack. - break - end - end - - table.insert(chain, 1, { name = name_lua }) - end - table.remove(lua.callchain, ins_cnt) -end - --- merge lua and host callchains into one callchain representing --- transfer of control -local function merge(event, symbols, sep_vmst) - local cc = {} - - for _,h_fr in pairs(event.host.callchain) do - local name_host = symtab.demangle(symbols, { - addr = h_fr.addr, - gen = h_fr.gen - }) - table.insert(cc, 1, { name = name_host }) - - if string.match(name_host, '^lua_cpcall') ~= nil then - -- Any C function is present on both the C and the Lua - -- stacks. It is more convenient to get its info from the - -- host stack, since it has information about child frames. - table.remove(event.lua.callchain, 1) - end - - if string.match(name_host, '^lua_p?call') ~= nil then - insert_lua_callchain(cc, event.lua, symbols) - end - - end - - if sep_vmst == true then - table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] }) - end - - return cc -end - --- Collapse all the events into call tree -function M.collapse(events, symbols, sep_vmst) - local root = new_node('root', false) - - for _,ev in pairs(events) do - local callchain = merge(ev, symbols, sep_vmst) - local curr_node = root - for i=#callchain,1,-1 do - curr_node = insert(callchain[i].name, curr_node, false) - end - insert('', curr_node, true) - end - - return root -end - -return M diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua index 5b52f104..806297eb 100755 --- a/tools/sysprof/parse.lua +++ b/tools/sysprof/parse.lua @@ -2,6 +2,7 @@ -- The format spec can be found in <src/lj_sysprof.h>. local symtab = require "utils.symtab" +local vmdef = require "jit.vmdef" local string_format = string.format @@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2 local M = {} -M.VMST = { +local VMST = { INTERP = 0, LFUNC = 1, FFUNC = 2, @@ -25,13 +26,14 @@ M.VMST = { } -M.FRAME = { +local FRAME = { LFUNC = 1, CFUNC = 2, FFUNC = 3, BOTTOM = 0x80 } + local STREAM_END = 0x80 local SYMTAB_LFUNC_EVENT = 10 local SYMTAB_CFUNC_EVENT = 11 @@ -54,42 +56,40 @@ local function new_event() } end -local function parse_lfunc(reader, event, symbols) +local function parse_lfunc(reader, symbols) local addr = reader:read_uleb128() local line = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr, line = line }) - loc.type = M.FRAME.LFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.LFUNC + return symtab.demangle(symbols, loc) end -local function parse_ffunc(reader, event, _) +local function parse_ffunc(reader, _) local ffid = reader:read_uleb128() - table.insert(event.lua.callchain, 1, { - type = M.FRAME.FFUNC, - ffid = ffid, - }) + return vmdef.ffnames[ffid] end -local function parse_cfunc(reader, event, symbols) +local function parse_cfunc(reader, symbols) local addr = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr }) - loc.type = M.FRAME.CFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.CFUNC + return symtab.demangle(symbols, loc) end local frame_parsers = { - [M.FRAME.LFUNC] = parse_lfunc, - [M.FRAME.FFUNC] = parse_ffunc, - [M.FRAME.CFUNC] = parse_cfunc + [FRAME.LFUNC] = parse_lfunc, + [FRAME.FFUNC] = parse_ffunc, + [FRAME.CFUNC] = parse_cfunc } local function parse_lua_callchain(reader, event, symbols) while true do local frame_header = reader:read_octet() - if frame_header == M.FRAME.BOTTOM then + if frame_header == FRAME.BOTTOM then break end - frame_parsers[frame_header](reader, event, symbols) + local name = frame_parsers[frame_header](reader, symbols) + table.insert(event.lua.callchain, 1, {name = name, type = frame_header}) end end @@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols) while addr ~= 0 do local loc = symtab.loc(symbols, { addr = addr }) - table.insert(event.host.callchain, 1, loc) + table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc)) addr = reader:read_uleb128() end end @@ -108,10 +108,20 @@ end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- local function parse_trace_callchain(reader, event, symbols) - event.lua.trace.traceno = reader:read_uleb128() - event.lua.trace.addr = reader:read_uleb128() - event.lua.trace.line = reader:read_uleb128() - event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen + local loc = {} + loc.traceno = reader:read_uleb128() + loc.addr = reader:read_uleb128() + loc.line = reader:read_uleb128() + + local gen = symtab.loc(symbols, loc).gen + local name_lua = symtab.demangle(symbols, { + addr = loc.addr, + traceno = loc.traceno, + gen = gen + }) + event.lua.trace = loc + event.lua.trace.gen = gen + event.lua.trace.name = name_lua end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- @@ -143,18 +153,63 @@ local function parse_symtab(reader, symbols, vmstate) end local event_parsers = { - [M.VMST.INTERP] = parse_host_only, - [M.VMST.LFUNC] = parse_lua_host, - [M.VMST.FFUNC] = parse_lua_host, - [M.VMST.CFUNC] = parse_lua_host, - [M.VMST.GC] = parse_host_only, - [M.VMST.EXIT] = parse_host_only, - [M.VMST.RECORD] = parse_host_only, - [M.VMST.OPT] = parse_host_only, - [M.VMST.ASM] = parse_host_only, - [M.VMST.TRACE] = parse_trace, + [VMST.INTERP] = parse_host_only, + [VMST.LFUNC] = parse_lua_host, + [VMST.FFUNC] = parse_lua_host, + [VMST.CFUNC] = parse_lua_host, + [VMST.GC] = parse_host_only, + [VMST.EXIT] = parse_host_only, + [VMST.RECORD] = parse_host_only, + [VMST.OPT] = parse_host_only, + [VMST.ASM] = parse_host_only, + [VMST.TRACE] = parse_trace, } +local function insert_lua_callchain(chain, lua) + local ins_cnt = 0 + local name_lua + for _, fr in ipairs(lua.callchain) do + ins_cnt = ins_cnt + 1 + if fr.type == FRAME.CFUNC then + -- C function encountered, the next chunk + -- of frames is located on the C stack. + break + end + name_lua = fr.name + + if fr.type == FRAME.LFUNC + and lua.trace.traceno ~= nil + and lua.trace.addr == fr.addr + and lua.trace.line == fr.line + then + name_lua = lua.trace.name + end + + table.insert(chain, name_lua) + end + table.remove(lua.callchain, ins_cnt) +end + +local function merge(event) + local callchain = {} + + for _, name_host in ipairs(event.host.callchain) do + table.insert(callchain, name_host) + if string.match(name_host, '^lua_cpcall') ~= nil then + -- Any C function is present on both the C and the Lua + -- stacks. It is more convenient to get its info from the + -- host stack, since it has information about child frames. + table.remove(event.lua.callchain) + end + + if string.match(name_host, '^lua_p?call') ~= nil then + insert_lua_callchain(callchain, event.lua) + end + + end + return table.concat(callchain, ';') .. ';' +end + local function parse_event(reader, events, symbols) local event = new_event() @@ -171,8 +226,9 @@ local function parse_event(reader, events, symbols) event.lua.vmstate = vmstate event_parsers[vmstate](reader, event, symbols) - - table.insert(events, event) + local callchain = merge(event) + local cur_cnt = events[callchain] + events[callchain] = (cur_cnt or 0) + 1 return true end @@ -203,4 +259,5 @@ function M.parse(reader, symbols) return events end + return M diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua index d5baa534..098f58ec 100644 --- a/tools/utils/avl.lua +++ b/tools/utils/avl.lua @@ -86,7 +86,7 @@ function M.insert(node, key, value) elseif key > node.key then node.right = M.insert(node.right, key, value) else - table.insert(node.value, value) + node.value = value end update_height(node) diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua index c26a9e8c..7f6c78f0 100644 --- a/tools/utils/symtab.lua +++ b/tools/utils/symtab.lua @@ -176,7 +176,7 @@ function M.demangle(symtab, loc) local key, value = avl.floor(symtab.cfunc, addr) if key then - return string_format("%s:%#x", value[gen].name, key) + return string_format("%s:%#x", value.name, key) end return string_format("CFUNC %#x", addr) -- 2.40.1 ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint 2023-05-24 20:35 [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint Maxim Kokryashkin via Tarantool-patches @ 2023-05-24 20:36 ` Maxim Kokryashkin via Tarantool-patches 2023-05-25 9:58 ` Sergey Kaplun via Tarantool-patches 2023-07-18 8:33 ` Igor Munkin via Tarantool-patches 1 sibling, 1 reply; 4+ messages in thread From: Maxim Kokryashkin via Tarantool-patches @ 2023-05-24 20:36 UTC (permalink / raw) To: Maxim Kokryashkin; +Cc: tarantool-patches [-- Attachment #1: Type: text/plain, Size: 14476 bytes --] Hi! Forgot to include my answers to questions. > Also, it fixes a bug in the AVL-tree implementation, > which produced unnecessary inserts of values into nodes. Q: Should it be any test for this? Also, may be this should be done in the separate commit (not patch?). A: It is such a silly bug, that no, I don't believe it's needed. I've literlly changed the line from `table.insert(node.value, value)` to `node.value = value`. And we still have tests for the AVL tree itself, so we know for sure that this change doesn't break anything. NB: CI is red in LuaJIT repo because this patch requires changes in the tarantool repo, so please refer to CI runs in PR. -- Best regards, Maxim Kokryashkin >Среда, 24 мая 2023, 23:35 +03:00 от Maxim Kokryashkin <max.kokryashkin@gmail.com>: > >This patch reduces sysprof's parser memory footprint, >by avoiding reading all callchains before collapsing them. >Instead of it, parser merges stacks immediately after >reading them and stores counts in a lua table. > >Also, it fixes a bug in the AVL-tree implementation, >which produced unnecessary inserts of values into nodes. >--- > >Changes in v2: >- Fixed comments as per review by Sergey. > >Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-8700-sysprof-parser-refactoring >PR: https://github.com/tarantool/tarantool/pull/8703 > > tools/CMakeLists.txt | 2 - > tools/sysprof.lua | 27 +------- > tools/sysprof/collapse.lua | 124 ------------------------------------ > tools/sysprof/parse.lua | 127 +++++++++++++++++++++++++++---------- > tools/utils/avl.lua | 2 +- > tools/utils/symtab.lua | 2 +- > 6 files changed, 97 insertions(+), 187 deletions(-) > delete mode 100755 tools/sysprof/collapse.lua > >diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt >index dd7ec6bd..3a919433 100644 >--- a/tools/CMakeLists.txt >+++ b/tools/CMakeLists.txt >@@ -112,7 +112,6 @@ else() > add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS > luajit-parse-sysprof > sysprof/parse.lua >- sysprof/collapse.lua > sysprof.lua > utils/bufread.lua > utils/symtab.lua >@@ -121,7 +120,6 @@ else() > > install(FILES > ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua >- ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua > DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof > PERMISSIONS > OWNER_READ OWNER_WRITE >diff --git a/tools/sysprof.lua b/tools/sysprof.lua >index 1afab195..be2a0565 100644 >--- a/tools/sysprof.lua >+++ b/tools/sysprof.lua >@@ -1,13 +1,10 @@ > local bufread = require "utils.bufread" > local sysprof = require "sysprof.parse" > local symtab = require "utils.symtab" >-local misc = require "sysprof.collapse" > > local stdout, stderr = io.stdout, io.stderr > local match, gmatch = string.match, string.gmatch > >-local split_by_vmstate = false >- > -- Program options. > local opt_map = {} > >@@ -28,10 +25,6 @@ Supported options are: > os.exit(0) > end > >-function opt_map.split() >- split_by_vmstate = true >-end >- > -- Print error and exit with error status. > local function opterror(...) > stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...) >@@ -85,28 +78,14 @@ local function parseargs(args) > return args[args.argn] > end > >-local function traverse_calltree(node, prefix) >- if node.is_leaf then >- print(prefix..' '..node.count) >- end >- >- local sep_prefix = #prefix == 0 and prefix or prefix..';' >- >- for name,child in pairs(node.children) do >- traverse_calltree(child, sep_prefix..name) >- end >-end >- > local function dump(inputfile) > local reader = bufread.new(inputfile) >- > local symbols = symtab.parse(reader) >- > local events = sysprof.parse(reader, symbols) >- local calltree = misc.collapse(events, symbols, split_by_vmstate) >- >- traverse_calltree(calltree, '') > >+ for stack, count in pairs(events) do >+ print(stack, count) >+ end > os.exit(0) > end > >diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua >deleted file mode 100755 >index 3d83d5ea..00000000 >--- a/tools/sysprof/collapse.lua >+++ /dev/null >@@ -1,124 +0,0 @@ >-local parse = require "sysprof.parse" >-local vmdef = require "jit.vmdef" >-local symtab = require "utils.symtab" >- >-local VMST_NAMES = { >- [parse.VMST.INTERP] = "VMST_INTERP", >- [parse.VMST.LFUNC] = "VMST_LFUNC", >- [parse.VMST.FFUNC] = "VMST_FFUNC", >- [parse.VMST.CFUNC] = "VMST_CFUNC", >- [parse.VMST.GC] = "VMST_GC", >- [parse.VMST.EXIT] = "VMST_EXIT", >- [parse.VMST.RECORD] = "VMST_RECORD", >- [parse.VMST.OPT] = "VMST_OPT", >- [parse.VMST.ASM] = "VMST_ASM", >- [parse.VMST.TRACE] = "VMST_TRACE", >-} >- >-local M = {} >- >-local function new_node(name, is_leaf) >- return { >- name = name, >- count = 0, >- is_leaf = is_leaf, >- children = {} >- } >-end >- >--- insert new child into a node (or increase counter in existing one) >-local function insert(name, node, is_leaf) >- if node.children[name] == nil then >- node.children[name] = new_node(name, is_leaf) >- end >- >- local child = node.children[name] >- child.count = child.count + 1 >- >- return child >-end >- >-local function insert_lua_callchain(chain, lua, symbols) >- local ins_cnt = 0 >- for _,fr in pairs(lua.callchain) do >- local name_lua >- >- ins_cnt = ins_cnt + 1 >- if fr.type == parse.FRAME.FFUNC then >- name_lua = vmdef.ffnames[fr.ffid] >- else >- name_lua = symtab.demangle(symbols, { >- addr = fr.addr, >- line = fr.line, >- gen = fr.gen >- }) >- if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and >- lua.trace.line == fr.line then >- name_lua = symtab.demangle(symbols, { >- addr = fr.addr, >- traceno = lua.trace.traceno, >- gen = fr.gen >- }) >- end >- >- if fr.type == parse.FRAME.CFUNC then >- -- C function encountered, the next chunk >- -- of frames is located on the C stack. >- break >- end >- end >- >- table.insert(chain, 1, { name = name_lua }) >- end >- table.remove(lua.callchain, ins_cnt) >-end >- >--- merge lua and host callchains into one callchain representing >--- transfer of control >-local function merge(event, symbols, sep_vmst) >- local cc = {} >- >- for _,h_fr in pairs(event.host.callchain) do >- local name_host = symtab.demangle(symbols, { >- addr = h_fr.addr, >- gen = h_fr.gen >- }) >- table.insert(cc, 1, { name = name_host }) >- >- if string.match(name_host, '^lua_cpcall') ~= nil then >- -- Any C function is present on both the C and the Lua >- -- stacks. It is more convenient to get its info from the >- -- host stack, since it has information about child frames. >- table.remove(event.lua.callchain, 1) >- end >- >- if string.match(name_host, '^lua_p?call') ~= nil then >- insert_lua_callchain(cc, event.lua, symbols) >- end >- >- end >- >- if sep_vmst == true then >- table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] }) >- end >- >- return cc >-end >- >--- Collapse all the events into call tree >-function M.collapse(events, symbols, sep_vmst) >- local root = new_node('root', false) >- >- for _,ev in pairs(events) do >- local callchain = merge(ev, symbols, sep_vmst) >- local curr_node = root >- for i=#callchain,1,-1 do >- curr_node = insert(callchain[i].name, curr_node, false) >- end >- insert('', curr_node, true) >- end >- >- return root >-end >- >-return M >diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua >index 5b52f104..806297eb 100755 >--- a/tools/sysprof/parse.lua >+++ b/tools/sysprof/parse.lua >@@ -2,6 +2,7 @@ > -- The format spec can be found in <src/lj_sysprof.h>. > > local symtab = require "utils.symtab" >+local vmdef = require "jit.vmdef" > > local string_format = string.format > >@@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2 > > local M = {} > >-M.VMST = { >+local VMST = { > INTERP = 0, > LFUNC = 1, > FFUNC = 2, >@@ -25,13 +26,14 @@ M.VMST = { > } > > >-M.FRAME = { >+local FRAME = { > LFUNC = 1, > CFUNC = 2, > FFUNC = 3, > BOTTOM = 0x80 > } > >+ > local STREAM_END = 0x80 > local SYMTAB_LFUNC_EVENT = 10 > local SYMTAB_CFUNC_EVENT = 11 >@@ -54,42 +56,40 @@ local function new_event() > } > end > >-local function parse_lfunc(reader, event, symbols) >+local function parse_lfunc(reader, symbols) > local addr = reader:read_uleb128() > local line = reader:read_uleb128() > local loc = symtab.loc(symbols, { addr = addr, line = line }) >- loc.type = M.FRAME.LFUNC >- table.insert(event.lua.callchain, 1, loc) >+ loc.type = FRAME.LFUNC >+ return symtab.demangle(symbols, loc) > end > >-local function parse_ffunc(reader, event, _) >+local function parse_ffunc(reader, _) > local ffid = reader:read_uleb128() >- table.insert(event.lua.callchain, 1, { >- type = M.FRAME.FFUNC, >- ffid = ffid, >- }) >+ return vmdef.ffnames[ffid] > end > >-local function parse_cfunc(reader, event, symbols) >+local function parse_cfunc(reader, symbols) > local addr = reader:read_uleb128() > local loc = symtab.loc(symbols, { addr = addr }) >- loc.type = M.FRAME.CFUNC >- table.insert(event.lua.callchain, 1, loc) >+ loc.type = FRAME.CFUNC >+ return symtab.demangle(symbols, loc) > end > > local frame_parsers = { >- [M.FRAME.LFUNC] = parse_lfunc, >- [M.FRAME.FFUNC] = parse_ffunc, >- [M.FRAME.CFUNC] = parse_cfunc >+ [FRAME.LFUNC] = parse_lfunc, >+ [FRAME.FFUNC] = parse_ffunc, >+ [FRAME.CFUNC] = parse_cfunc > } > > local function parse_lua_callchain(reader, event, symbols) > while true do > local frame_header = reader:read_octet() >- if frame_header == M.FRAME.BOTTOM then >+ if frame_header == FRAME.BOTTOM then > break > end >- frame_parsers[frame_header](reader, event, symbols) >+ local name = frame_parsers[frame_header](reader, symbols) >+ table.insert(event.lua.callchain, 1, {name = name, type = frame_header}) > end > end > >@@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols) > > while addr ~= 0 do > local loc = symtab.loc(symbols, { addr = addr }) >- table.insert(event.host.callchain, 1, loc) >+ table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc)) > addr = reader:read_uleb128() > end > end >@@ -108,10 +108,20 @@ end > --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- > > local function parse_trace_callchain(reader, event, symbols) >- event.lua.trace.traceno = reader:read_uleb128() >- event.lua.trace.addr = reader:read_uleb128() >- event.lua.trace.line = reader:read_uleb128() >- event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen >+ local loc = {} >+ loc.traceno = reader:read_uleb128() >+ loc.addr = reader:read_uleb128() >+ loc.line = reader:read_uleb128() >+ >+ local gen = symtab.loc(symbols, loc).gen >+ local name_lua = symtab.demangle(symbols, { >+ addr = loc.addr, >+ traceno = loc.traceno, >+ gen = gen >+ }) >+ event.lua.trace = loc >+ event.lua.trace.gen = gen >+ event.lua.trace.name = name_lua > end > > --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- >@@ -143,18 +153,63 @@ local function parse_symtab(reader, symbols, vmstate) > end > > local event_parsers = { >- [M.VMST.INTERP] = parse_host_only, >- [M.VMST.LFUNC] = parse_lua_host, >- [M.VMST.FFUNC] = parse_lua_host, >- [M.VMST.CFUNC] = parse_lua_host, >- [M.VMST.GC] = parse_host_only, >- [M.VMST.EXIT] = parse_host_only, >- [M.VMST.RECORD] = parse_host_only, >- [M.VMST.OPT] = parse_host_only, >- [M.VMST.ASM] = parse_host_only, >- [M.VMST.TRACE] = parse_trace, >+ [VMST.INTERP] = parse_host_only, >+ [VMST.LFUNC] = parse_lua_host, >+ [VMST.FFUNC] = parse_lua_host, >+ [VMST.CFUNC] = parse_lua_host, >+ [VMST.GC] = parse_host_only, >+ [VMST.EXIT] = parse_host_only, >+ [VMST.RECORD] = parse_host_only, >+ [VMST.OPT] = parse_host_only, >+ [VMST.ASM] = parse_host_only, >+ [VMST.TRACE] = parse_trace, > } > >+local function insert_lua_callchain(chain, lua) >+ local ins_cnt = 0 >+ local name_lua >+ for _, fr in ipairs(lua.callchain) do >+ ins_cnt = ins_cnt + 1 >+ if fr.type == FRAME.CFUNC then >+ -- C function encountered, the next chunk >+ -- of frames is located on the C stack. >+ break >+ end >+ name_lua = fr.name >+ >+ if fr.type == FRAME.LFUNC >+ and lua.trace.traceno ~= nil >+ and lua.trace.addr == fr.addr >+ and lua.trace.line == fr.line >+ then >+ name_lua = lua.trace.name >+ end >+ >+ table.insert(chain, name_lua) >+ end >+ table.remove(lua.callchain, ins_cnt) >+end >+ >+local function merge(event) >+ local callchain = {} >+ >+ for _, name_host in ipairs(event.host.callchain) do >+ table.insert(callchain, name_host) >+ if string.match(name_host, '^lua_cpcall') ~= nil then >+ -- Any C function is present on both the C and the Lua >+ -- stacks. It is more convenient to get its info from the >+ -- host stack, since it has information about child frames. >+ table.remove(event.lua.callchain) >+ end >+ >+ if string.match(name_host, '^lua_p?call') ~= nil then >+ insert_lua_callchain(callchain, event.lua) >+ end >+ >+ end >+ return table.concat(callchain, ';') .. ';' >+end >+ > local function parse_event(reader, events, symbols) > local event = new_event() > >@@ -171,8 +226,9 @@ local function parse_event(reader, events, symbols) > event.lua.vmstate = vmstate > > event_parsers[vmstate](reader, event, symbols) >- >- table.insert(events, event) >+ local callchain = merge(event) >+ local cur_cnt = events[callchain] >+ events[callchain] = (cur_cnt or 0) + 1 > return true > end > >@@ -203,4 +259,5 @@ function M.parse(reader, symbols) > return events > end > >+ > return M >diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua >index d5baa534..098f58ec 100644 >--- a/tools/utils/avl.lua >+++ b/tools/utils/avl.lua >@@ -86,7 +86,7 @@ function M.insert(node, key, value) > elseif key > node.key then > node.right = M.insert(node.right, key, value) > else >- table.insert(node.value, value) >+ node.value = value > end > > update_height(node) >diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua >index c26a9e8c..7f6c78f0 100644 >--- a/tools/utils/symtab.lua >+++ b/tools/utils/symtab.lua >@@ -176,7 +176,7 @@ function M.demangle(symtab, loc) > local key, value = avl.floor(symtab.cfunc, addr) > > if key then >- return string_format("%s:%#x", value[gen].name, key) >+ return string_format("%s:%#x", value.name, key) > end > > return string_format("CFUNC %#x", addr) >-- >2.40.1 [-- Attachment #2: Type: text/html, Size: 17436 bytes --] ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint 2023-05-24 20:36 ` Maxim Kokryashkin via Tarantool-patches @ 2023-05-25 9:58 ` Sergey Kaplun via Tarantool-patches 0 siblings, 0 replies; 4+ messages in thread From: Sergey Kaplun via Tarantool-patches @ 2023-05-25 9:58 UTC (permalink / raw) To: Maxim Kokryashkin; +Cc: Maxim Kokryashkin, tarantool-patches Hi, Maxim! Thanks for the explanations and fixes! LGTM! -- Best regards, Sergey Kaplun ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint 2023-05-24 20:35 [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint Maxim Kokryashkin via Tarantool-patches 2023-05-24 20:36 ` Maxim Kokryashkin via Tarantool-patches @ 2023-07-18 8:33 ` Igor Munkin via Tarantool-patches 1 sibling, 0 replies; 4+ messages in thread From: Igor Munkin via Tarantool-patches @ 2023-07-18 8:33 UTC (permalink / raw) To: Maxim Kokryashkin; +Cc: tarantool-patches Max, Thanks for the patch! My general comment relates to integrational testing. I'd rather left the stub for tools/sysprof/collapse.lua and remove it in a separate series. E.g. we update integrational workflows the same way, however hard update is also an option. On 24.05.23, Maxim Kokryashkin via Tarantool-patches wrote: > This patch reduces sysprof's parser memory footprint, > by avoiding reading all callchains before collapsing them. > Instead of it, parser merges stacks immediately after > reading them and stores counts in a lua table. > > Also, it fixes a bug in the AVL-tree implementation, > which produced unnecessary inserts of values into nodes. Minor: I guess it's better to split this patch into two: one fixing the bug in AVL tree, and another one for reducing memory footprint. AFAIU, you stop using tables to store nodes and this decision affects several parts of the sources, making review a bit harder. Hence, it's only partial for now and I'm waiting for v3 series. Side note: Honestly, I'd rather split this into three patches: 1. AVL-related fixes 2. Reducing memory footprint 3. Purging collapse.lua The order doesn't matter (until it follows the common sense and breaks nothing). > --- > > Changes in v2: > - Fixed comments as per review by Sergey. > > Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-8700-sysprof-parser-refactoring > PR: https://github.com/tarantool/tarantool/pull/8703 > > tools/CMakeLists.txt | 2 - > tools/sysprof.lua | 27 +------- > tools/sysprof/collapse.lua | 124 ------------------------------------ > tools/sysprof/parse.lua | 127 +++++++++++++++++++++++++++---------- > tools/utils/avl.lua | 2 +- > tools/utils/symtab.lua | 2 +- > 6 files changed, 97 insertions(+), 187 deletions(-) > delete mode 100755 tools/sysprof/collapse.lua <snipped> > diff --git a/tools/sysprof.lua b/tools/sysprof.lua > index 1afab195..be2a0565 100644 > --- a/tools/sysprof.lua > +++ b/tools/sysprof.lua <snipped> > > + for stack, count in pairs(events) do > + print(stack, count) Minor: Previous implementation was space separated, the current on is tab separated. Dunno, whether this is OK. > + end > os.exit(0) > end <snipped> > -- > 2.40.1 > -- Best regards, IM ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-07-18 8:44 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2023-05-24 20:35 [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint Maxim Kokryashkin via Tarantool-patches 2023-05-24 20:36 ` Maxim Kokryashkin via Tarantool-patches 2023-05-25 9:58 ` Sergey Kaplun via Tarantool-patches 2023-07-18 8:33 ` Igor Munkin via Tarantool-patches
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox