From: Maxim Kokryashkin via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: "Maxim Kokryashkin" <max.kokryashkin@gmail.com> Cc: tarantool-patches@dev.tarantool.org Subject: Re: [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint Date: Wed, 24 May 2023 23:36:02 +0300 [thread overview] Message-ID: <1684960562.12973543@f416.i.mail.ru> (raw) In-Reply-To: <20230524203513.246808-1-m.kokryashkin@tarantool.org> [-- Attachment #1: Type: text/plain, Size: 14476 bytes --] Hi! Forgot to include my answers to questions. > Also, it fixes a bug in the AVL-tree implementation, > which produced unnecessary inserts of values into nodes. Q: Should it be any test for this? Also, may be this should be done in the separate commit (not patch?). A: It is such a silly bug, that no, I don't believe it's needed. I've literlly changed the line from `table.insert(node.value, value)` to `node.value = value`. And we still have tests for the AVL tree itself, so we know for sure that this change doesn't break anything. NB: CI is red in LuaJIT repo because this patch requires changes in the tarantool repo, so please refer to CI runs in PR. -- Best regards, Maxim Kokryashkin >Среда, 24 мая 2023, 23:35 +03:00 от Maxim Kokryashkin <max.kokryashkin@gmail.com>: > >This patch reduces sysprof's parser memory footprint, >by avoiding reading all callchains before collapsing them. >Instead of it, parser merges stacks immediately after >reading them and stores counts in a lua table. > >Also, it fixes a bug in the AVL-tree implementation, >which produced unnecessary inserts of values into nodes. >--- > >Changes in v2: >- Fixed comments as per review by Sergey. > >Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-8700-sysprof-parser-refactoring >PR: https://github.com/tarantool/tarantool/pull/8703 > > tools/CMakeLists.txt | 2 - > tools/sysprof.lua | 27 +------- > tools/sysprof/collapse.lua | 124 ------------------------------------ > tools/sysprof/parse.lua | 127 +++++++++++++++++++++++++++---------- > tools/utils/avl.lua | 2 +- > tools/utils/symtab.lua | 2 +- > 6 files changed, 97 insertions(+), 187 deletions(-) > delete mode 100755 tools/sysprof/collapse.lua > >diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt >index dd7ec6bd..3a919433 100644 >--- a/tools/CMakeLists.txt >+++ b/tools/CMakeLists.txt >@@ -112,7 +112,6 @@ else() > add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS > luajit-parse-sysprof > sysprof/parse.lua >- sysprof/collapse.lua > sysprof.lua > utils/bufread.lua > utils/symtab.lua >@@ -121,7 +120,6 @@ else() > > install(FILES > ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua >- ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua > DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof > PERMISSIONS > OWNER_READ OWNER_WRITE >diff --git a/tools/sysprof.lua b/tools/sysprof.lua >index 1afab195..be2a0565 100644 >--- a/tools/sysprof.lua >+++ b/tools/sysprof.lua >@@ -1,13 +1,10 @@ > local bufread = require "utils.bufread" > local sysprof = require "sysprof.parse" > local symtab = require "utils.symtab" >-local misc = require "sysprof.collapse" > > local stdout, stderr = io.stdout, io.stderr > local match, gmatch = string.match, string.gmatch > >-local split_by_vmstate = false >- > -- Program options. > local opt_map = {} > >@@ -28,10 +25,6 @@ Supported options are: > os.exit(0) > end > >-function opt_map.split() >- split_by_vmstate = true >-end >- > -- Print error and exit with error status. > local function opterror(...) > stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...) >@@ -85,28 +78,14 @@ local function parseargs(args) > return args[args.argn] > end > >-local function traverse_calltree(node, prefix) >- if node.is_leaf then >- print(prefix..' '..node.count) >- end >- >- local sep_prefix = #prefix == 0 and prefix or prefix..';' >- >- for name,child in pairs(node.children) do >- traverse_calltree(child, sep_prefix..name) >- end >-end >- > local function dump(inputfile) > local reader = bufread.new(inputfile) >- > local symbols = symtab.parse(reader) >- > local events = sysprof.parse(reader, symbols) >- local calltree = misc.collapse(events, symbols, split_by_vmstate) >- >- traverse_calltree(calltree, '') > >+ for stack, count in pairs(events) do >+ print(stack, count) >+ end > os.exit(0) > end > >diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua >deleted file mode 100755 >index 3d83d5ea..00000000 >--- a/tools/sysprof/collapse.lua >+++ /dev/null >@@ -1,124 +0,0 @@ >-local parse = require "sysprof.parse" >-local vmdef = require "jit.vmdef" >-local symtab = require "utils.symtab" >- >-local VMST_NAMES = { >- [parse.VMST.INTERP] = "VMST_INTERP", >- [parse.VMST.LFUNC] = "VMST_LFUNC", >- [parse.VMST.FFUNC] = "VMST_FFUNC", >- [parse.VMST.CFUNC] = "VMST_CFUNC", >- [parse.VMST.GC] = "VMST_GC", >- [parse.VMST.EXIT] = "VMST_EXIT", >- [parse.VMST.RECORD] = "VMST_RECORD", >- [parse.VMST.OPT] = "VMST_OPT", >- [parse.VMST.ASM] = "VMST_ASM", >- [parse.VMST.TRACE] = "VMST_TRACE", >-} >- >-local M = {} >- >-local function new_node(name, is_leaf) >- return { >- name = name, >- count = 0, >- is_leaf = is_leaf, >- children = {} >- } >-end >- >--- insert new child into a node (or increase counter in existing one) >-local function insert(name, node, is_leaf) >- if node.children[name] == nil then >- node.children[name] = new_node(name, is_leaf) >- end >- >- local child = node.children[name] >- child.count = child.count + 1 >- >- return child >-end >- >-local function insert_lua_callchain(chain, lua, symbols) >- local ins_cnt = 0 >- for _,fr in pairs(lua.callchain) do >- local name_lua >- >- ins_cnt = ins_cnt + 1 >- if fr.type == parse.FRAME.FFUNC then >- name_lua = vmdef.ffnames[fr.ffid] >- else >- name_lua = symtab.demangle(symbols, { >- addr = fr.addr, >- line = fr.line, >- gen = fr.gen >- }) >- if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and >- lua.trace.line == fr.line then >- name_lua = symtab.demangle(symbols, { >- addr = fr.addr, >- traceno = lua.trace.traceno, >- gen = fr.gen >- }) >- end >- >- if fr.type == parse.FRAME.CFUNC then >- -- C function encountered, the next chunk >- -- of frames is located on the C stack. >- break >- end >- end >- >- table.insert(chain, 1, { name = name_lua }) >- end >- table.remove(lua.callchain, ins_cnt) >-end >- >--- merge lua and host callchains into one callchain representing >--- transfer of control >-local function merge(event, symbols, sep_vmst) >- local cc = {} >- >- for _,h_fr in pairs(event.host.callchain) do >- local name_host = symtab.demangle(symbols, { >- addr = h_fr.addr, >- gen = h_fr.gen >- }) >- table.insert(cc, 1, { name = name_host }) >- >- if string.match(name_host, '^lua_cpcall') ~= nil then >- -- Any C function is present on both the C and the Lua >- -- stacks. It is more convenient to get its info from the >- -- host stack, since it has information about child frames. >- table.remove(event.lua.callchain, 1) >- end >- >- if string.match(name_host, '^lua_p?call') ~= nil then >- insert_lua_callchain(cc, event.lua, symbols) >- end >- >- end >- >- if sep_vmst == true then >- table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] }) >- end >- >- return cc >-end >- >--- Collapse all the events into call tree >-function M.collapse(events, symbols, sep_vmst) >- local root = new_node('root', false) >- >- for _,ev in pairs(events) do >- local callchain = merge(ev, symbols, sep_vmst) >- local curr_node = root >- for i=#callchain,1,-1 do >- curr_node = insert(callchain[i].name, curr_node, false) >- end >- insert('', curr_node, true) >- end >- >- return root >-end >- >-return M >diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua >index 5b52f104..806297eb 100755 >--- a/tools/sysprof/parse.lua >+++ b/tools/sysprof/parse.lua >@@ -2,6 +2,7 @@ > -- The format spec can be found in <src/lj_sysprof.h>. > > local symtab = require "utils.symtab" >+local vmdef = require "jit.vmdef" > > local string_format = string.format > >@@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2 > > local M = {} > >-M.VMST = { >+local VMST = { > INTERP = 0, > LFUNC = 1, > FFUNC = 2, >@@ -25,13 +26,14 @@ M.VMST = { > } > > >-M.FRAME = { >+local FRAME = { > LFUNC = 1, > CFUNC = 2, > FFUNC = 3, > BOTTOM = 0x80 > } > >+ > local STREAM_END = 0x80 > local SYMTAB_LFUNC_EVENT = 10 > local SYMTAB_CFUNC_EVENT = 11 >@@ -54,42 +56,40 @@ local function new_event() > } > end > >-local function parse_lfunc(reader, event, symbols) >+local function parse_lfunc(reader, symbols) > local addr = reader:read_uleb128() > local line = reader:read_uleb128() > local loc = symtab.loc(symbols, { addr = addr, line = line }) >- loc.type = M.FRAME.LFUNC >- table.insert(event.lua.callchain, 1, loc) >+ loc.type = FRAME.LFUNC >+ return symtab.demangle(symbols, loc) > end > >-local function parse_ffunc(reader, event, _) >+local function parse_ffunc(reader, _) > local ffid = reader:read_uleb128() >- table.insert(event.lua.callchain, 1, { >- type = M.FRAME.FFUNC, >- ffid = ffid, >- }) >+ return vmdef.ffnames[ffid] > end > >-local function parse_cfunc(reader, event, symbols) >+local function parse_cfunc(reader, symbols) > local addr = reader:read_uleb128() > local loc = symtab.loc(symbols, { addr = addr }) >- loc.type = M.FRAME.CFUNC >- table.insert(event.lua.callchain, 1, loc) >+ loc.type = FRAME.CFUNC >+ return symtab.demangle(symbols, loc) > end > > local frame_parsers = { >- [M.FRAME.LFUNC] = parse_lfunc, >- [M.FRAME.FFUNC] = parse_ffunc, >- [M.FRAME.CFUNC] = parse_cfunc >+ [FRAME.LFUNC] = parse_lfunc, >+ [FRAME.FFUNC] = parse_ffunc, >+ [FRAME.CFUNC] = parse_cfunc > } > > local function parse_lua_callchain(reader, event, symbols) > while true do > local frame_header = reader:read_octet() >- if frame_header == M.FRAME.BOTTOM then >+ if frame_header == FRAME.BOTTOM then > break > end >- frame_parsers[frame_header](reader, event, symbols) >+ local name = frame_parsers[frame_header](reader, symbols) >+ table.insert(event.lua.callchain, 1, {name = name, type = frame_header}) > end > end > >@@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols) > > while addr ~= 0 do > local loc = symtab.loc(symbols, { addr = addr }) >- table.insert(event.host.callchain, 1, loc) >+ table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc)) > addr = reader:read_uleb128() > end > end >@@ -108,10 +108,20 @@ end > --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- > > local function parse_trace_callchain(reader, event, symbols) >- event.lua.trace.traceno = reader:read_uleb128() >- event.lua.trace.addr = reader:read_uleb128() >- event.lua.trace.line = reader:read_uleb128() >- event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen >+ local loc = {} >+ loc.traceno = reader:read_uleb128() >+ loc.addr = reader:read_uleb128() >+ loc.line = reader:read_uleb128() >+ >+ local gen = symtab.loc(symbols, loc).gen >+ local name_lua = symtab.demangle(symbols, { >+ addr = loc.addr, >+ traceno = loc.traceno, >+ gen = gen >+ }) >+ event.lua.trace = loc >+ event.lua.trace.gen = gen >+ event.lua.trace.name = name_lua > end > > --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- >@@ -143,18 +153,63 @@ local function parse_symtab(reader, symbols, vmstate) > end > > local event_parsers = { >- [M.VMST.INTERP] = parse_host_only, >- [M.VMST.LFUNC] = parse_lua_host, >- [M.VMST.FFUNC] = parse_lua_host, >- [M.VMST.CFUNC] = parse_lua_host, >- [M.VMST.GC] = parse_host_only, >- [M.VMST.EXIT] = parse_host_only, >- [M.VMST.RECORD] = parse_host_only, >- [M.VMST.OPT] = parse_host_only, >- [M.VMST.ASM] = parse_host_only, >- [M.VMST.TRACE] = parse_trace, >+ [VMST.INTERP] = parse_host_only, >+ [VMST.LFUNC] = parse_lua_host, >+ [VMST.FFUNC] = parse_lua_host, >+ [VMST.CFUNC] = parse_lua_host, >+ [VMST.GC] = parse_host_only, >+ [VMST.EXIT] = parse_host_only, >+ [VMST.RECORD] = parse_host_only, >+ [VMST.OPT] = parse_host_only, >+ [VMST.ASM] = parse_host_only, >+ [VMST.TRACE] = parse_trace, > } > >+local function insert_lua_callchain(chain, lua) >+ local ins_cnt = 0 >+ local name_lua >+ for _, fr in ipairs(lua.callchain) do >+ ins_cnt = ins_cnt + 1 >+ if fr.type == FRAME.CFUNC then >+ -- C function encountered, the next chunk >+ -- of frames is located on the C stack. >+ break >+ end >+ name_lua = fr.name >+ >+ if fr.type == FRAME.LFUNC >+ and lua.trace.traceno ~= nil >+ and lua.trace.addr == fr.addr >+ and lua.trace.line == fr.line >+ then >+ name_lua = lua.trace.name >+ end >+ >+ table.insert(chain, name_lua) >+ end >+ table.remove(lua.callchain, ins_cnt) >+end >+ >+local function merge(event) >+ local callchain = {} >+ >+ for _, name_host in ipairs(event.host.callchain) do >+ table.insert(callchain, name_host) >+ if string.match(name_host, '^lua_cpcall') ~= nil then >+ -- Any C function is present on both the C and the Lua >+ -- stacks. It is more convenient to get its info from the >+ -- host stack, since it has information about child frames. >+ table.remove(event.lua.callchain) >+ end >+ >+ if string.match(name_host, '^lua_p?call') ~= nil then >+ insert_lua_callchain(callchain, event.lua) >+ end >+ >+ end >+ return table.concat(callchain, ';') .. ';' >+end >+ > local function parse_event(reader, events, symbols) > local event = new_event() > >@@ -171,8 +226,9 @@ local function parse_event(reader, events, symbols) > event.lua.vmstate = vmstate > > event_parsers[vmstate](reader, event, symbols) >- >- table.insert(events, event) >+ local callchain = merge(event) >+ local cur_cnt = events[callchain] >+ events[callchain] = (cur_cnt or 0) + 1 > return true > end > >@@ -203,4 +259,5 @@ function M.parse(reader, symbols) > return events > end > >+ > return M >diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua >index d5baa534..098f58ec 100644 >--- a/tools/utils/avl.lua >+++ b/tools/utils/avl.lua >@@ -86,7 +86,7 @@ function M.insert(node, key, value) > elseif key > node.key then > node.right = M.insert(node.right, key, value) > else >- table.insert(node.value, value) >+ node.value = value > end > > update_height(node) >diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua >index c26a9e8c..7f6c78f0 100644 >--- a/tools/utils/symtab.lua >+++ b/tools/utils/symtab.lua >@@ -176,7 +176,7 @@ function M.demangle(symtab, loc) > local key, value = avl.floor(symtab.cfunc, addr) > > if key then >- return string_format("%s:%#x", value[gen].name, key) >+ return string_format("%s:%#x", value.name, key) > end > > return string_format("CFUNC %#x", addr) >-- >2.40.1 [-- Attachment #2: Type: text/html, Size: 17436 bytes --]
next prev parent reply other threads:[~2023-05-24 20:36 UTC|newest] Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-05-24 20:35 Maxim Kokryashkin via Tarantool-patches 2023-05-24 20:36 ` Maxim Kokryashkin via Tarantool-patches [this message] 2023-05-25 9:58 ` Sergey Kaplun via Tarantool-patches 2023-07-18 8:33 ` Igor Munkin via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1684960562.12973543@f416.i.mail.ru \ --to=tarantool-patches@dev.tarantool.org \ --cc=m.kokryashkin@tarantool.org \ --cc=max.kokryashkin@gmail.com \ --subject='Re: [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser'\''s memory footprint' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox