[Tarantool-patches] [PATCH luajit] sysprof: improve parser's memory footprint

Maxim Kokryashkin max.kokryashkin at gmail.com
Thu May 18 14:49:27 MSK 2023


This patch reduces sysprof's parser memory footprint,
by avoiding reading all callchains before collapsing them.
Instead of it, parser merges stacks immediately after
reading them and stores counts in a lua table.

Also, it fixes a bug in the AVL-tree implementation,
which produced unnecessary inserts of values into nodes.
---
Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-noticket-sysprof-parser-refactoring
PR: https://github.com/tarantool/tarantool/pull/8670

NB: CI is red in LuaJIT repo because this patch requires changes in the
tarantool repo, so please refer to CI runs in PR.

 tools/CMakeLists.txt       |   2 -
 tools/sysprof.lua          |  27 +-------
 tools/sysprof/collapse.lua | 124 ------------------------------------
 tools/sysprof/parse.lua    | 125 ++++++++++++++++++++++++++-----------
 tools/utils/avl.lua        |   2 +-
 tools/utils/symtab.lua     |   2 +-
 6 files changed, 95 insertions(+), 187 deletions(-)
 delete mode 100755 tools/sysprof/collapse.lua

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index dd7ec6bd..3a919433 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -112,7 +112,6 @@ else()
   add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS
     luajit-parse-sysprof
     sysprof/parse.lua
-    sysprof/collapse.lua
     sysprof.lua
     utils/bufread.lua
     utils/symtab.lua
@@ -121,7 +120,6 @@ else()
 
   install(FILES
       ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua
-      ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua
     DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof
     PERMISSIONS
       OWNER_READ OWNER_WRITE
diff --git a/tools/sysprof.lua b/tools/sysprof.lua
index 1afab195..be2a0565 100644
--- a/tools/sysprof.lua
+++ b/tools/sysprof.lua
@@ -1,13 +1,10 @@
 local bufread = require "utils.bufread"
 local sysprof = require "sysprof.parse"
 local symtab = require "utils.symtab"
-local misc = require "sysprof.collapse"
 
 local stdout, stderr = io.stdout, io.stderr
 local match, gmatch = string.match, string.gmatch
 
-local split_by_vmstate = false
-
 -- Program options.
 local opt_map = {}
 
@@ -28,10 +25,6 @@ Supported options are:
   os.exit(0)
 end
 
-function opt_map.split()
-  split_by_vmstate = true
-end
-
 -- Print error and exit with error status.
 local function opterror(...)
   stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...)
@@ -85,28 +78,14 @@ local function parseargs(args)
   return args[args.argn]
 end
 
-local function traverse_calltree(node, prefix)
-  if node.is_leaf then
-    print(prefix..' '..node.count)
-  end
-
-  local sep_prefix = #prefix == 0 and prefix or prefix..';'
-
-  for name,child in pairs(node.children) do
-    traverse_calltree(child, sep_prefix..name)
-  end
-end
-
 local function dump(inputfile)
   local reader = bufread.new(inputfile)
-
   local symbols = symtab.parse(reader)
-
   local events = sysprof.parse(reader, symbols)
-  local calltree = misc.collapse(events, symbols, split_by_vmstate)
-
-  traverse_calltree(calltree, '')
 
+  for stack, count in pairs(events) do
+    print(stack, count)
+  end
   os.exit(0)
 end
 
diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua
deleted file mode 100755
index 3d83d5ea..00000000
--- a/tools/sysprof/collapse.lua
+++ /dev/null
@@ -1,124 +0,0 @@
-local parse = require "sysprof.parse"
-local vmdef = require "jit.vmdef"
-local symtab = require "utils.symtab"
-
-local VMST_NAMES = {
-  [parse.VMST.INTERP] = "VMST_INTERP",
-  [parse.VMST.LFUNC]  = "VMST_LFUNC",
-  [parse.VMST.FFUNC]  = "VMST_FFUNC",
-  [parse.VMST.CFUNC]  = "VMST_CFUNC",
-  [parse.VMST.GC]     = "VMST_GC",
-  [parse.VMST.EXIT]   = "VMST_EXIT",
-  [parse.VMST.RECORD] = "VMST_RECORD",
-  [parse.VMST.OPT]    = "VMST_OPT",
-  [parse.VMST.ASM]    = "VMST_ASM",
-  [parse.VMST.TRACE]  = "VMST_TRACE",
-}
-
-local M = {}
-
-local function new_node(name, is_leaf)
-  return {
-    name = name,
-    count = 0,
-    is_leaf = is_leaf,
-    children = {}
-  }
-end
-
--- insert new child into a node (or increase counter in existing one)
-local function insert(name, node, is_leaf)
-  if node.children[name] == nil then
-    node.children[name] = new_node(name, is_leaf)
-  end
-
-  local child = node.children[name]
-  child.count = child.count + 1
-
-  return child
-end
-
-local function insert_lua_callchain(chain, lua, symbols)
-  local ins_cnt = 0
-  for _,fr in pairs(lua.callchain) do
-    local name_lua
-
-    ins_cnt = ins_cnt + 1
-    if fr.type == parse.FRAME.FFUNC then
-      name_lua = vmdef.ffnames[fr.ffid]
-    else
-      name_lua = symtab.demangle(symbols, {
-        addr = fr.addr,
-        line = fr.line,
-        gen = fr.gen
-      })
-      if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and
-          lua.trace.line == fr.line then
-        name_lua = symtab.demangle(symbols, {
-          addr = fr.addr,
-          traceno = lua.trace.traceno,
-          gen = fr.gen
-        })
-      end
-
-      if fr.type == parse.FRAME.CFUNC then
-        -- C function encountered, the next chunk
-        -- of frames is located on the C stack.
-        break
-      end
-    end
-
-    table.insert(chain, 1, { name = name_lua })
-  end
-  table.remove(lua.callchain, ins_cnt)
-end
-
--- merge lua and host callchains into one callchain representing
--- transfer of control
-local function merge(event, symbols, sep_vmst)
-  local cc = {}
-
-  for _,h_fr in pairs(event.host.callchain) do
-    local name_host = symtab.demangle(symbols, {
-      addr = h_fr.addr,
-      gen = h_fr.gen
-    })
-    table.insert(cc, 1, { name = name_host })
-
-    if string.match(name_host, '^lua_cpcall') ~= nil then
-      -- Any C function is present on both the C and the Lua
-      -- stacks. It is more convenient to get its info from the
-      -- host stack, since it has information about child frames.
-      table.remove(event.lua.callchain, 1)
-    end
-
-    if string.match(name_host, '^lua_p?call') ~= nil then
-      insert_lua_callchain(cc, event.lua, symbols)
-    end
-
-  end
-
-  if sep_vmst == true then
-    table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] })
-  end
-
-  return cc
-end
-
--- Collapse all the events into call tree
-function M.collapse(events, symbols, sep_vmst)
-  local root = new_node('root', false)
-
-  for _,ev in pairs(events) do
-    local callchain = merge(ev, symbols, sep_vmst)
-    local curr_node = root
-    for i=#callchain,1,-1 do
-      curr_node = insert(callchain[i].name, curr_node, false)
-    end
-    insert('', curr_node, true)
-  end
-
-  return root
-end
-
-return M
diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua
index 5b52f104..3db36472 100755
--- a/tools/sysprof/parse.lua
+++ b/tools/sysprof/parse.lua
@@ -2,6 +2,7 @@
 -- The format spec can be found in <src/lj_sysprof.h>.
 
 local symtab = require "utils.symtab"
+local vmdef = require "jit.vmdef"
 
 local string_format = string.format
 
@@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2
 
 local M = {}
 
-M.VMST = {
+local VMST = {
   INTERP = 0,
   LFUNC  = 1,
   FFUNC  = 2,
@@ -25,13 +26,14 @@ M.VMST = {
 }
 
 
-M.FRAME = {
+local FRAME = {
   LFUNC  = 1,
   CFUNC  = 2,
   FFUNC  = 3,
   BOTTOM = 0x80
 }
 
+
 local STREAM_END = 0x80
 local SYMTAB_LFUNC_EVENT = 10
 local SYMTAB_CFUNC_EVENT = 11
@@ -54,42 +56,40 @@ local function new_event()
   }
 end
 
-local function parse_lfunc(reader, event, symbols)
+local function parse_lfunc(reader, symbols)
   local addr = reader:read_uleb128()
   local line = reader:read_uleb128()
   local loc = symtab.loc(symbols, { addr = addr, line = line })
-  loc.type = M.FRAME.LFUNC
-  table.insert(event.lua.callchain, 1, loc)
+  loc.type = FRAME.LFUNC
+  return symtab.demangle(symbols, loc)
 end
 
-local function parse_ffunc(reader, event, _)
+local function parse_ffunc(reader, _)
   local ffid = reader:read_uleb128()
-  table.insert(event.lua.callchain, 1, {
-    type = M.FRAME.FFUNC,
-    ffid = ffid,
-  })
+  return vmdef.ffnames[ffid]
 end
 
-local function parse_cfunc(reader, event, symbols)
+local function parse_cfunc(reader, symbols)
   local addr = reader:read_uleb128()
   local loc = symtab.loc(symbols, { addr = addr })
-  loc.type = M.FRAME.CFUNC
-  table.insert(event.lua.callchain, 1, loc)
+  loc.type = FRAME.CFUNC
+  return symtab.demangle(symbols, loc)
 end
 
 local frame_parsers = {
-  [M.FRAME.LFUNC] = parse_lfunc,
-  [M.FRAME.FFUNC] = parse_ffunc,
-  [M.FRAME.CFUNC] = parse_cfunc
+  [FRAME.LFUNC] = parse_lfunc,
+  [FRAME.FFUNC] = parse_ffunc,
+  [FRAME.CFUNC] = parse_cfunc
 }
 
 local function parse_lua_callchain(reader, event, symbols)
   while true do
     local frame_header = reader:read_octet()
-    if frame_header == M.FRAME.BOTTOM then
+    if frame_header == FRAME.BOTTOM then
       break
     end
-    frame_parsers[frame_header](reader, event, symbols)
+    local name = frame_parsers[frame_header](reader, symbols)
+    table.insert(event.lua.callchain, 1, {name=name, type=frame_header})
   end
 end
 
@@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols)
 
   while addr ~= 0 do
     local loc = symtab.loc(symbols, { addr = addr })
-    table.insert(event.host.callchain, 1, loc)
+    table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc))
     addr = reader:read_uleb128()
   end
 end
@@ -108,10 +108,20 @@ end
 --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--
 
 local function parse_trace_callchain(reader, event, symbols)
-  event.lua.trace.traceno  = reader:read_uleb128()
-  event.lua.trace.addr = reader:read_uleb128()
-  event.lua.trace.line = reader:read_uleb128()
-  event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen
+  local loc = {
+    traceno  = reader:read_uleb128(),
+    addr = reader:read_uleb128(),
+    line = reader:read_uleb128()
+  }
+  local gen = symtab.loc(symbols, loc).gen
+  local name_lua = symtab.demangle(symbols, {
+    addr = loc.addr,
+    traceno = loc.traceno,
+    gen = gen
+  })
+  event.lua.trace = loc
+  event.lua.trace.gen = gen
+  event.lua.trace.name = name_lua
 end
 
 --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--
@@ -143,18 +153,60 @@ local function parse_symtab(reader, symbols, vmstate)
 end
 
 local event_parsers = {
-  [M.VMST.INTERP] = parse_host_only,
-  [M.VMST.LFUNC]  = parse_lua_host,
-  [M.VMST.FFUNC]  = parse_lua_host,
-  [M.VMST.CFUNC]  = parse_lua_host,
-  [M.VMST.GC]     = parse_host_only,
-  [M.VMST.EXIT]   = parse_host_only,
-  [M.VMST.RECORD] = parse_host_only,
-  [M.VMST.OPT]    = parse_host_only,
-  [M.VMST.ASM]    = parse_host_only,
-  [M.VMST.TRACE]  = parse_trace,
+  [VMST.INTERP] = parse_host_only,
+  [VMST.LFUNC]  = parse_lua_host,
+  [VMST.FFUNC]  = parse_lua_host,
+  [VMST.CFUNC]  = parse_lua_host,
+  [VMST.GC]     = parse_host_only,
+  [VMST.EXIT]   = parse_host_only,
+  [VMST.RECORD] = parse_host_only,
+  [VMST.OPT]    = parse_host_only,
+  [VMST.ASM]    = parse_host_only,
+  [VMST.TRACE]  = parse_trace,
 }
 
+local function insert_lua_callchain(chain, lua)
+  local ins_cnt = 0
+  local name_lua
+  for _, fr in ipairs(lua.callchain) do
+    ins_cnt = ins_cnt + 1
+    if fr.type == FRAME.CFUNC then
+      -- C function encountered, the next chunk
+      -- of frames is located on the C stack.
+      break
+    end
+    name_lua = fr.name
+
+    if fr.type == FRAME.LFUNC and lua.trace.traceno ~= nil and
+        lua.trace.addr == fr.addr and lua.trace.line == fr.line then
+            name_lua = lua.trace.name
+    end
+
+    table.insert(chain, name_lua)
+  end
+  table.remove(lua.callchain, ins_cnt)
+end
+
+local function merge(event)
+  local cc = {}
+
+  for _, name_host in ipairs(event.host.callchain) do
+    table.insert(cc, name_host)
+    if string.match(name_host, '^lua_cpcall') ~= nil then
+      -- Any C function is present on both the C and the Lua
+      -- stacks. It is more convenient to get its info from the
+      -- host stack, since it has information about child frames.
+      table.remove(event.lua.callchain)
+    end
+
+    if string.match(name_host, '^lua_p?call') ~= nil then
+      insert_lua_callchain(cc, event.lua)
+    end
+
+  end
+  return cc
+end
+
 local function parse_event(reader, events, symbols)
   local event = new_event()
 
@@ -171,8 +223,10 @@ local function parse_event(reader, events, symbols)
   event.lua.vmstate = vmstate
 
   event_parsers[vmstate](reader, event, symbols)
-
-  table.insert(events, event)
+  local cc = merge(event)
+  local cc_str = table.concat(cc, ';') .. ';'
+  local cur_cnt = events[cc_str]
+  events[cc_str] = (cur_cnt or 0) + 1
   return true
 end
 
@@ -203,4 +257,5 @@ function M.parse(reader, symbols)
   return events
 end
 
+
 return M
diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua
index d5baa534..098f58ec 100644
--- a/tools/utils/avl.lua
+++ b/tools/utils/avl.lua
@@ -86,7 +86,7 @@ function M.insert(node, key, value)
   elseif key > node.key then
     node.right = M.insert(node.right, key, value)
   else
-    table.insert(node.value, value)
+    node.value = value
   end
 
   update_height(node)
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index c26a9e8c..7f6c78f0 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -176,7 +176,7 @@ function M.demangle(symtab, loc)
   local key, value = avl.floor(symtab.cfunc, addr)
 
   if key then
-    return string_format("%s:%#x", value[gen].name, key)
+    return string_format("%s:%#x", value.name, key)
   end
 
   return string_format("CFUNC %#x", addr)
-- 
2.40.1



More information about the Tarantool-patches mailing list