Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint
@ 2023-05-24 20:35 Maxim Kokryashkin via Tarantool-patches
  2023-05-24 20:36 ` Maxim Kokryashkin via Tarantool-patches
  0 siblings, 1 reply; 3+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2023-05-24 20:35 UTC (permalink / raw)
  To: tarantool-patches, skaplun, sergos

This patch reduces sysprof's parser memory footprint,
by avoiding reading all callchains before collapsing them.
Instead of it, parser merges stacks immediately after
reading them and stores counts in a lua table.

Also, it fixes a bug in the AVL-tree implementation,
which produced unnecessary inserts of values into nodes.
---

Changes in v2:
- Fixed comments as per review by Sergey.

Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-8700-sysprof-parser-refactoring
PR: https://github.com/tarantool/tarantool/pull/8703

 tools/CMakeLists.txt       |   2 -
 tools/sysprof.lua          |  27 +-------
 tools/sysprof/collapse.lua | 124 ------------------------------------
 tools/sysprof/parse.lua    | 127 +++++++++++++++++++++++++++----------
 tools/utils/avl.lua        |   2 +-
 tools/utils/symtab.lua     |   2 +-
 6 files changed, 97 insertions(+), 187 deletions(-)
 delete mode 100755 tools/sysprof/collapse.lua

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index dd7ec6bd..3a919433 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -112,7 +112,6 @@ else()
   add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS
     luajit-parse-sysprof
     sysprof/parse.lua
-    sysprof/collapse.lua
     sysprof.lua
     utils/bufread.lua
     utils/symtab.lua
@@ -121,7 +120,6 @@ else()
 
   install(FILES
       ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua
-      ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua
     DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof
     PERMISSIONS
       OWNER_READ OWNER_WRITE
diff --git a/tools/sysprof.lua b/tools/sysprof.lua
index 1afab195..be2a0565 100644
--- a/tools/sysprof.lua
+++ b/tools/sysprof.lua
@@ -1,13 +1,10 @@
 local bufread = require "utils.bufread"
 local sysprof = require "sysprof.parse"
 local symtab = require "utils.symtab"
-local misc = require "sysprof.collapse"
 
 local stdout, stderr = io.stdout, io.stderr
 local match, gmatch = string.match, string.gmatch
 
-local split_by_vmstate = false
-
 -- Program options.
 local opt_map = {}
 
@@ -28,10 +25,6 @@ Supported options are:
   os.exit(0)
 end
 
-function opt_map.split()
-  split_by_vmstate = true
-end
-
 -- Print error and exit with error status.
 local function opterror(...)
   stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...)
@@ -85,28 +78,14 @@ local function parseargs(args)
   return args[args.argn]
 end
 
-local function traverse_calltree(node, prefix)
-  if node.is_leaf then
-    print(prefix..' '..node.count)
-  end
-
-  local sep_prefix = #prefix == 0 and prefix or prefix..';'
-
-  for name,child in pairs(node.children) do
-    traverse_calltree(child, sep_prefix..name)
-  end
-end
-
 local function dump(inputfile)
   local reader = bufread.new(inputfile)
-
   local symbols = symtab.parse(reader)
-
   local events = sysprof.parse(reader, symbols)
-  local calltree = misc.collapse(events, symbols, split_by_vmstate)
-
-  traverse_calltree(calltree, '')
 
+  for stack, count in pairs(events) do
+    print(stack, count)
+  end
   os.exit(0)
 end
 
diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua
deleted file mode 100755
index 3d83d5ea..00000000
--- a/tools/sysprof/collapse.lua
+++ /dev/null
@@ -1,124 +0,0 @@
-local parse = require "sysprof.parse"
-local vmdef = require "jit.vmdef"
-local symtab = require "utils.symtab"
-
-local VMST_NAMES = {
-  [parse.VMST.INTERP] = "VMST_INTERP",
-  [parse.VMST.LFUNC]  = "VMST_LFUNC",
-  [parse.VMST.FFUNC]  = "VMST_FFUNC",
-  [parse.VMST.CFUNC]  = "VMST_CFUNC",
-  [parse.VMST.GC]     = "VMST_GC",
-  [parse.VMST.EXIT]   = "VMST_EXIT",
-  [parse.VMST.RECORD] = "VMST_RECORD",
-  [parse.VMST.OPT]    = "VMST_OPT",
-  [parse.VMST.ASM]    = "VMST_ASM",
-  [parse.VMST.TRACE]  = "VMST_TRACE",
-}
-
-local M = {}
-
-local function new_node(name, is_leaf)
-  return {
-    name = name,
-    count = 0,
-    is_leaf = is_leaf,
-    children = {}
-  }
-end
-
--- insert new child into a node (or increase counter in existing one)
-local function insert(name, node, is_leaf)
-  if node.children[name] == nil then
-    node.children[name] = new_node(name, is_leaf)
-  end
-
-  local child = node.children[name]
-  child.count = child.count + 1
-
-  return child
-end
-
-local function insert_lua_callchain(chain, lua, symbols)
-  local ins_cnt = 0
-  for _,fr in pairs(lua.callchain) do
-    local name_lua
-
-    ins_cnt = ins_cnt + 1
-    if fr.type == parse.FRAME.FFUNC then
-      name_lua = vmdef.ffnames[fr.ffid]
-    else
-      name_lua = symtab.demangle(symbols, {
-        addr = fr.addr,
-        line = fr.line,
-        gen = fr.gen
-      })
-      if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and
-          lua.trace.line == fr.line then
-        name_lua = symtab.demangle(symbols, {
-          addr = fr.addr,
-          traceno = lua.trace.traceno,
-          gen = fr.gen
-        })
-      end
-
-      if fr.type == parse.FRAME.CFUNC then
-        -- C function encountered, the next chunk
-        -- of frames is located on the C stack.
-        break
-      end
-    end
-
-    table.insert(chain, 1, { name = name_lua })
-  end
-  table.remove(lua.callchain, ins_cnt)
-end
-
--- merge lua and host callchains into one callchain representing
--- transfer of control
-local function merge(event, symbols, sep_vmst)
-  local cc = {}
-
-  for _,h_fr in pairs(event.host.callchain) do
-    local name_host = symtab.demangle(symbols, {
-      addr = h_fr.addr,
-      gen = h_fr.gen
-    })
-    table.insert(cc, 1, { name = name_host })
-
-    if string.match(name_host, '^lua_cpcall') ~= nil then
-      -- Any C function is present on both the C and the Lua
-      -- stacks. It is more convenient to get its info from the
-      -- host stack, since it has information about child frames.
-      table.remove(event.lua.callchain, 1)
-    end
-
-    if string.match(name_host, '^lua_p?call') ~= nil then
-      insert_lua_callchain(cc, event.lua, symbols)
-    end
-
-  end
-
-  if sep_vmst == true then
-    table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] })
-  end
-
-  return cc
-end
-
--- Collapse all the events into call tree
-function M.collapse(events, symbols, sep_vmst)
-  local root = new_node('root', false)
-
-  for _,ev in pairs(events) do
-    local callchain = merge(ev, symbols, sep_vmst)
-    local curr_node = root
-    for i=#callchain,1,-1 do
-      curr_node = insert(callchain[i].name, curr_node, false)
-    end
-    insert('', curr_node, true)
-  end
-
-  return root
-end
-
-return M
diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua
index 5b52f104..806297eb 100755
--- a/tools/sysprof/parse.lua
+++ b/tools/sysprof/parse.lua
@@ -2,6 +2,7 @@
 -- The format spec can be found in <src/lj_sysprof.h>.
 
 local symtab = require "utils.symtab"
+local vmdef = require "jit.vmdef"
 
 local string_format = string.format
 
@@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2
 
 local M = {}
 
-M.VMST = {
+local VMST = {
   INTERP = 0,
   LFUNC  = 1,
   FFUNC  = 2,
@@ -25,13 +26,14 @@ M.VMST = {
 }
 
 
-M.FRAME = {
+local FRAME = {
   LFUNC  = 1,
   CFUNC  = 2,
   FFUNC  = 3,
   BOTTOM = 0x80
 }
 
+
 local STREAM_END = 0x80
 local SYMTAB_LFUNC_EVENT = 10
 local SYMTAB_CFUNC_EVENT = 11
@@ -54,42 +56,40 @@ local function new_event()
   }
 end
 
-local function parse_lfunc(reader, event, symbols)
+local function parse_lfunc(reader, symbols)
   local addr = reader:read_uleb128()
   local line = reader:read_uleb128()
   local loc = symtab.loc(symbols, { addr = addr, line = line })
-  loc.type = M.FRAME.LFUNC
-  table.insert(event.lua.callchain, 1, loc)
+  loc.type = FRAME.LFUNC
+  return symtab.demangle(symbols, loc)
 end
 
-local function parse_ffunc(reader, event, _)
+local function parse_ffunc(reader, _)
   local ffid = reader:read_uleb128()
-  table.insert(event.lua.callchain, 1, {
-    type = M.FRAME.FFUNC,
-    ffid = ffid,
-  })
+  return vmdef.ffnames[ffid]
 end
 
-local function parse_cfunc(reader, event, symbols)
+local function parse_cfunc(reader, symbols)
   local addr = reader:read_uleb128()
   local loc = symtab.loc(symbols, { addr = addr })
-  loc.type = M.FRAME.CFUNC
-  table.insert(event.lua.callchain, 1, loc)
+  loc.type = FRAME.CFUNC
+  return symtab.demangle(symbols, loc)
 end
 
 local frame_parsers = {
-  [M.FRAME.LFUNC] = parse_lfunc,
-  [M.FRAME.FFUNC] = parse_ffunc,
-  [M.FRAME.CFUNC] = parse_cfunc
+  [FRAME.LFUNC] = parse_lfunc,
+  [FRAME.FFUNC] = parse_ffunc,
+  [FRAME.CFUNC] = parse_cfunc
 }
 
 local function parse_lua_callchain(reader, event, symbols)
   while true do
     local frame_header = reader:read_octet()
-    if frame_header == M.FRAME.BOTTOM then
+    if frame_header == FRAME.BOTTOM then
       break
     end
-    frame_parsers[frame_header](reader, event, symbols)
+    local name = frame_parsers[frame_header](reader, symbols)
+    table.insert(event.lua.callchain, 1, {name = name, type = frame_header})
   end
 end
 
@@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols)
 
   while addr ~= 0 do
     local loc = symtab.loc(symbols, { addr = addr })
-    table.insert(event.host.callchain, 1, loc)
+    table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc))
     addr = reader:read_uleb128()
   end
 end
@@ -108,10 +108,20 @@ end
 --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--
 
 local function parse_trace_callchain(reader, event, symbols)
-  event.lua.trace.traceno  = reader:read_uleb128()
-  event.lua.trace.addr = reader:read_uleb128()
-  event.lua.trace.line = reader:read_uleb128()
-  event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen
+  local loc = {}
+  loc.traceno = reader:read_uleb128()
+  loc.addr = reader:read_uleb128()
+  loc.line = reader:read_uleb128()
+
+  local gen = symtab.loc(symbols, loc).gen
+  local name_lua = symtab.demangle(symbols, {
+    addr = loc.addr,
+    traceno = loc.traceno,
+    gen = gen
+  })
+  event.lua.trace = loc
+  event.lua.trace.gen = gen
+  event.lua.trace.name = name_lua
 end
 
 --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--
@@ -143,18 +153,63 @@ local function parse_symtab(reader, symbols, vmstate)
 end
 
 local event_parsers = {
-  [M.VMST.INTERP] = parse_host_only,
-  [M.VMST.LFUNC]  = parse_lua_host,
-  [M.VMST.FFUNC]  = parse_lua_host,
-  [M.VMST.CFUNC]  = parse_lua_host,
-  [M.VMST.GC]     = parse_host_only,
-  [M.VMST.EXIT]   = parse_host_only,
-  [M.VMST.RECORD] = parse_host_only,
-  [M.VMST.OPT]    = parse_host_only,
-  [M.VMST.ASM]    = parse_host_only,
-  [M.VMST.TRACE]  = parse_trace,
+  [VMST.INTERP] = parse_host_only,
+  [VMST.LFUNC]  = parse_lua_host,
+  [VMST.FFUNC]  = parse_lua_host,
+  [VMST.CFUNC]  = parse_lua_host,
+  [VMST.GC]     = parse_host_only,
+  [VMST.EXIT]   = parse_host_only,
+  [VMST.RECORD] = parse_host_only,
+  [VMST.OPT]    = parse_host_only,
+  [VMST.ASM]    = parse_host_only,
+  [VMST.TRACE]  = parse_trace,
 }
 
+local function insert_lua_callchain(chain, lua)
+  local ins_cnt = 0
+  local name_lua
+  for _, fr in ipairs(lua.callchain) do
+    ins_cnt = ins_cnt + 1
+    if fr.type == FRAME.CFUNC then
+      -- C function encountered, the next chunk
+      -- of frames is located on the C stack.
+      break
+    end
+    name_lua = fr.name
+
+    if fr.type == FRAME.LFUNC
+    and lua.trace.traceno ~= nil
+    and lua.trace.addr == fr.addr
+    and lua.trace.line == fr.line
+    then
+            name_lua = lua.trace.name
+    end
+
+    table.insert(chain, name_lua)
+  end
+  table.remove(lua.callchain, ins_cnt)
+end
+
+local function merge(event)
+  local callchain = {}
+
+  for _, name_host in ipairs(event.host.callchain) do
+    table.insert(callchain, name_host)
+    if string.match(name_host, '^lua_cpcall') ~= nil then
+      -- Any C function is present on both the C and the Lua
+      -- stacks. It is more convenient to get its info from the
+      -- host stack, since it has information about child frames.
+      table.remove(event.lua.callchain)
+    end
+
+    if string.match(name_host, '^lua_p?call') ~= nil then
+      insert_lua_callchain(callchain, event.lua)
+    end
+
+  end
+  return table.concat(callchain, ';') .. ';'
+end
+
 local function parse_event(reader, events, symbols)
   local event = new_event()
 
@@ -171,8 +226,9 @@ local function parse_event(reader, events, symbols)
   event.lua.vmstate = vmstate
 
   event_parsers[vmstate](reader, event, symbols)
-
-  table.insert(events, event)
+  local callchain = merge(event)
+  local cur_cnt = events[callchain]
+  events[callchain] = (cur_cnt or 0) + 1
   return true
 end
 
@@ -203,4 +259,5 @@ function M.parse(reader, symbols)
   return events
 end
 
+
 return M
diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua
index d5baa534..098f58ec 100644
--- a/tools/utils/avl.lua
+++ b/tools/utils/avl.lua
@@ -86,7 +86,7 @@ function M.insert(node, key, value)
   elseif key > node.key then
     node.right = M.insert(node.right, key, value)
   else
-    table.insert(node.value, value)
+    node.value = value
   end
 
   update_height(node)
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index c26a9e8c..7f6c78f0 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -176,7 +176,7 @@ function M.demangle(symtab, loc)
   local key, value = avl.floor(symtab.cfunc, addr)
 
   if key then
-    return string_format("%s:%#x", value[gen].name, key)
+    return string_format("%s:%#x", value.name, key)
   end
 
   return string_format("CFUNC %#x", addr)
-- 
2.40.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Tarantool-patches]  [PATCH luajit v2] sysprof: improve parser's memory footprint
  2023-05-24 20:35 [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint Maxim Kokryashkin via Tarantool-patches
@ 2023-05-24 20:36 ` Maxim Kokryashkin via Tarantool-patches
  2023-05-25  9:58   ` Sergey Kaplun via Tarantool-patches
  0 siblings, 1 reply; 3+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2023-05-24 20:36 UTC (permalink / raw)
  To: Maxim Kokryashkin; +Cc: tarantool-patches

[-- Attachment #1: Type: text/plain, Size: 14476 bytes --]


Hi!
Forgot to include my answers to questions.
 
> Also, it fixes a bug in the AVL-tree implementation,
> which produced unnecessary inserts of values into nodes.
 
Q: Should it be any test for this?
Also, may be this should be done in the separate commit (not patch?).
 
A: It is such a silly bug, that no, I don't believe it's needed. I've literlly
changed the line from `table.insert(node.value, value)` to
`node.value = value`. And we still have tests for the AVL tree itself, so
we know for sure that this change doesn't break anything.
 
NB: CI is red in LuaJIT repo because this patch requires changes in the
tarantool repo, so please refer to CI runs in PR.
 
--
Best regards,
Maxim Kokryashkin
 
  
>Среда, 24 мая 2023, 23:35 +03:00 от Maxim Kokryashkin <max.kokryashkin@gmail.com>:
> 
>This patch reduces sysprof's parser memory footprint,
>by avoiding reading all callchains before collapsing them.
>Instead of it, parser merges stacks immediately after
>reading them and stores counts in a lua table.
>
>Also, it fixes a bug in the AVL-tree implementation,
>which produced unnecessary inserts of values into nodes.
>---
>
>Changes in v2:
>- Fixed comments as per review by Sergey.
>
>Branch:  https://github.com/tarantool/luajit/tree/fckxorg/gh-8700-sysprof-parser-refactoring
>PR:  https://github.com/tarantool/tarantool/pull/8703
>
> tools/CMakeLists.txt | 2 -
> tools/sysprof.lua | 27 +-------
> tools/sysprof/collapse.lua | 124 ------------------------------------
> tools/sysprof/parse.lua | 127 +++++++++++++++++++++++++++----------
> tools/utils/avl.lua | 2 +-
> tools/utils/symtab.lua | 2 +-
> 6 files changed, 97 insertions(+), 187 deletions(-)
> delete mode 100755 tools/sysprof/collapse.lua
>
>diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
>index dd7ec6bd..3a919433 100644
>--- a/tools/CMakeLists.txt
>+++ b/tools/CMakeLists.txt
>@@ -112,7 +112,6 @@ else()
>   add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS
>     luajit-parse-sysprof
>     sysprof/parse.lua
>- sysprof/collapse.lua
>     sysprof.lua
>     utils/bufread.lua
>     utils/symtab.lua
>@@ -121,7 +120,6 @@ else()
> 
>   install(FILES
>       ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua
>- ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua
>     DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof
>     PERMISSIONS
>       OWNER_READ OWNER_WRITE
>diff --git a/tools/sysprof.lua b/tools/sysprof.lua
>index 1afab195..be2a0565 100644
>--- a/tools/sysprof.lua
>+++ b/tools/sysprof.lua
>@@ -1,13 +1,10 @@
> local bufread = require "utils.bufread"
> local sysprof = require "sysprof.parse"
> local symtab = require "utils.symtab"
>-local misc = require "sysprof.collapse"
> 
> local stdout, stderr = io.stdout, io.stderr
> local match, gmatch = string.match, string.gmatch
> 
>-local split_by_vmstate = false
>-
> -- Program options.
> local opt_map = {}
> 
>@@ -28,10 +25,6 @@ Supported options are:
>   os.exit(0)
> end
> 
>-function opt_map.split()
>- split_by_vmstate = true
>-end
>-
> -- Print error and exit with error status.
> local function opterror(...)
>   stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...)
>@@ -85,28 +78,14 @@ local function parseargs(args)
>   return args[args.argn]
> end
> 
>-local function traverse_calltree(node, prefix)
>- if node.is_leaf then
>- print(prefix..' '..node.count)
>- end
>-
>- local sep_prefix = #prefix == 0 and prefix or prefix..';'
>-
>- for name,child in pairs(node.children) do
>- traverse_calltree(child, sep_prefix..name)
>- end
>-end
>-
> local function dump(inputfile)
>   local reader = bufread.new(inputfile)
>-
>   local symbols = symtab.parse(reader)
>-
>   local events = sysprof.parse(reader, symbols)
>- local calltree = misc.collapse(events, symbols, split_by_vmstate)
>-
>- traverse_calltree(calltree, '')
> 
>+ for stack, count in pairs(events) do
>+ print(stack, count)
>+ end
>   os.exit(0)
> end
> 
>diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua
>deleted file mode 100755
>index 3d83d5ea..00000000
>--- a/tools/sysprof/collapse.lua
>+++ /dev/null
>@@ -1,124 +0,0 @@
>-local parse = require "sysprof.parse"
>-local vmdef = require "jit.vmdef"
>-local symtab = require "utils.symtab"
>-
>-local VMST_NAMES = {
>- [parse.VMST.INTERP] = "VMST_INTERP",
>- [parse.VMST.LFUNC] = "VMST_LFUNC",
>- [parse.VMST.FFUNC] = "VMST_FFUNC",
>- [parse.VMST.CFUNC] = "VMST_CFUNC",
>- [parse.VMST.GC] = "VMST_GC",
>- [parse.VMST.EXIT] = "VMST_EXIT",
>- [parse.VMST.RECORD] = "VMST_RECORD",
>- [parse.VMST.OPT] = "VMST_OPT",
>- [parse.VMST.ASM] = "VMST_ASM",
>- [parse.VMST.TRACE] = "VMST_TRACE",
>-}
>-
>-local M = {}
>-
>-local function new_node(name, is_leaf)
>- return {
>- name = name,
>- count = 0,
>- is_leaf = is_leaf,
>- children = {}
>- }
>-end
>-
>--- insert new child into a node (or increase counter in existing one)
>-local function insert(name, node, is_leaf)
>- if node.children[name] == nil then
>- node.children[name] = new_node(name, is_leaf)
>- end
>-
>- local child = node.children[name]
>- child.count = child.count + 1
>-
>- return child
>-end
>-
>-local function insert_lua_callchain(chain, lua, symbols)
>- local ins_cnt = 0
>- for _,fr in pairs(lua.callchain) do
>- local name_lua
>-
>- ins_cnt = ins_cnt + 1
>- if fr.type == parse.FRAME.FFUNC then
>- name_lua = vmdef.ffnames[fr.ffid]
>- else
>- name_lua = symtab.demangle(symbols, {
>- addr = fr.addr,
>- line = fr.line,
>- gen = fr.gen
>- })
>- if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and
>- lua.trace.line == fr.line then
>- name_lua = symtab.demangle(symbols, {
>- addr = fr.addr,
>- traceno = lua.trace.traceno,
>- gen = fr.gen
>- })
>- end
>-
>- if fr.type == parse.FRAME.CFUNC then
>- -- C function encountered, the next chunk
>- -- of frames is located on the C stack.
>- break
>- end
>- end
>-
>- table.insert(chain, 1, { name = name_lua })
>- end
>- table.remove(lua.callchain, ins_cnt)
>-end
>-
>--- merge lua and host callchains into one callchain representing
>--- transfer of control
>-local function merge(event, symbols, sep_vmst)
>- local cc = {}
>-
>- for _,h_fr in pairs(event.host.callchain) do
>- local name_host = symtab.demangle(symbols, {
>- addr = h_fr.addr,
>- gen = h_fr.gen
>- })
>- table.insert(cc, 1, { name = name_host })
>-
>- if string.match(name_host, '^lua_cpcall') ~= nil then
>- -- Any C function is present on both the C and the Lua
>- -- stacks. It is more convenient to get its info from the
>- -- host stack, since it has information about child frames.
>- table.remove(event.lua.callchain, 1)
>- end
>-
>- if string.match(name_host, '^lua_p?call') ~= nil then
>- insert_lua_callchain(cc, event.lua, symbols)
>- end
>-
>- end
>-
>- if sep_vmst == true then
>- table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] })
>- end
>-
>- return cc
>-end
>-
>--- Collapse all the events into call tree
>-function M.collapse(events, symbols, sep_vmst)
>- local root = new_node('root', false)
>-
>- for _,ev in pairs(events) do
>- local callchain = merge(ev, symbols, sep_vmst)
>- local curr_node = root
>- for i=#callchain,1,-1 do
>- curr_node = insert(callchain[i].name, curr_node, false)
>- end
>- insert('', curr_node, true)
>- end
>-
>- return root
>-end
>-
>-return M
>diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua
>index 5b52f104..806297eb 100755
>--- a/tools/sysprof/parse.lua
>+++ b/tools/sysprof/parse.lua
>@@ -2,6 +2,7 @@
> -- The format spec can be found in <src/lj_sysprof.h>.
> 
> local symtab = require "utils.symtab"
>+local vmdef = require "jit.vmdef"
> 
> local string_format = string.format
> 
>@@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2
> 
> local M = {}
> 
>-M.VMST = {
>+local VMST = {
>   INTERP = 0,
>   LFUNC = 1,
>   FFUNC = 2,
>@@ -25,13 +26,14 @@ M.VMST = {
> }
> 
> 
>-M.FRAME = {
>+local FRAME = {
>   LFUNC = 1,
>   CFUNC = 2,
>   FFUNC = 3,
>   BOTTOM = 0x80
> }
> 
>+
> local STREAM_END = 0x80
> local SYMTAB_LFUNC_EVENT = 10
> local SYMTAB_CFUNC_EVENT = 11
>@@ -54,42 +56,40 @@ local function new_event()
>   }
> end
> 
>-local function parse_lfunc(reader, event, symbols)
>+local function parse_lfunc(reader, symbols)
>   local addr = reader:read_uleb128()
>   local line = reader:read_uleb128()
>   local loc = symtab.loc(symbols, { addr = addr, line = line })
>- loc.type = M.FRAME.LFUNC
>- table.insert(event.lua.callchain, 1, loc)
>+ loc.type = FRAME.LFUNC
>+ return symtab.demangle(symbols, loc)
> end
> 
>-local function parse_ffunc(reader, event, _)
>+local function parse_ffunc(reader, _)
>   local ffid = reader:read_uleb128()
>- table.insert(event.lua.callchain, 1, {
>- type = M.FRAME.FFUNC,
>- ffid = ffid,
>- })
>+ return vmdef.ffnames[ffid]
> end
> 
>-local function parse_cfunc(reader, event, symbols)
>+local function parse_cfunc(reader, symbols)
>   local addr = reader:read_uleb128()
>   local loc = symtab.loc(symbols, { addr = addr })
>- loc.type = M.FRAME.CFUNC
>- table.insert(event.lua.callchain, 1, loc)
>+ loc.type = FRAME.CFUNC
>+ return symtab.demangle(symbols, loc)
> end
> 
> local frame_parsers = {
>- [M.FRAME.LFUNC] = parse_lfunc,
>- [M.FRAME.FFUNC] = parse_ffunc,
>- [M.FRAME.CFUNC] = parse_cfunc
>+ [FRAME.LFUNC] = parse_lfunc,
>+ [FRAME.FFUNC] = parse_ffunc,
>+ [FRAME.CFUNC] = parse_cfunc
> }
> 
> local function parse_lua_callchain(reader, event, symbols)
>   while true do
>     local frame_header = reader:read_octet()
>- if frame_header == M.FRAME.BOTTOM then
>+ if frame_header == FRAME.BOTTOM then
>       break
>     end
>- frame_parsers[frame_header](reader, event, symbols)
>+ local name = frame_parsers[frame_header](reader, symbols)
>+ table.insert(event.lua.callchain, 1, {name = name, type = frame_header})
>   end
> end
> 
>@@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols)
> 
>   while addr ~= 0 do
>     local loc = symtab.loc(symbols, { addr = addr })
>- table.insert(event.host.callchain, 1, loc)
>+ table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc))
>     addr = reader:read_uleb128()
>   end
> end
>@@ -108,10 +108,20 @@ end
> --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--
> 
> local function parse_trace_callchain(reader, event, symbols)
>- event.lua.trace.traceno = reader:read_uleb128()
>- event.lua.trace.addr = reader:read_uleb128()
>- event.lua.trace.line = reader:read_uleb128()
>- event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen
>+ local loc = {}
>+ loc.traceno = reader:read_uleb128()
>+ loc.addr = reader:read_uleb128()
>+ loc.line = reader:read_uleb128()
>+
>+ local gen = symtab.loc(symbols, loc).gen
>+ local name_lua = symtab.demangle(symbols, {
>+ addr = loc.addr,
>+ traceno = loc.traceno,
>+ gen = gen
>+ })
>+ event.lua.trace = loc
>+ event.lua.trace.gen = gen
>+ event.lua.trace.name = name_lua
> end
> 
> --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--
>@@ -143,18 +153,63 @@ local function parse_symtab(reader, symbols, vmstate)
> end
> 
> local event_parsers = {
>- [M.VMST.INTERP] = parse_host_only,
>- [M.VMST.LFUNC] = parse_lua_host,
>- [M.VMST.FFUNC] = parse_lua_host,
>- [M.VMST.CFUNC] = parse_lua_host,
>- [M.VMST.GC] = parse_host_only,
>- [M.VMST.EXIT] = parse_host_only,
>- [M.VMST.RECORD] = parse_host_only,
>- [M.VMST.OPT] = parse_host_only,
>- [M.VMST.ASM] = parse_host_only,
>- [M.VMST.TRACE] = parse_trace,
>+ [VMST.INTERP] = parse_host_only,
>+ [VMST.LFUNC] = parse_lua_host,
>+ [VMST.FFUNC] = parse_lua_host,
>+ [VMST.CFUNC] = parse_lua_host,
>+ [VMST.GC] = parse_host_only,
>+ [VMST.EXIT] = parse_host_only,
>+ [VMST.RECORD] = parse_host_only,
>+ [VMST.OPT] = parse_host_only,
>+ [VMST.ASM] = parse_host_only,
>+ [VMST.TRACE] = parse_trace,
> }
> 
>+local function insert_lua_callchain(chain, lua)
>+ local ins_cnt = 0
>+ local name_lua
>+ for _, fr in ipairs(lua.callchain) do
>+ ins_cnt = ins_cnt + 1
>+ if fr.type == FRAME.CFUNC then
>+ -- C function encountered, the next chunk
>+ -- of frames is located on the C stack.
>+ break
>+ end
>+ name_lua = fr.name
>+
>+ if fr.type == FRAME.LFUNC
>+ and lua.trace.traceno ~= nil
>+ and lua.trace.addr == fr.addr
>+ and lua.trace.line == fr.line
>+ then
>+ name_lua = lua.trace.name
>+ end
>+
>+ table.insert(chain, name_lua)
>+ end
>+ table.remove(lua.callchain, ins_cnt)
>+end
>+
>+local function merge(event)
>+ local callchain = {}
>+
>+ for _, name_host in ipairs(event.host.callchain) do
>+ table.insert(callchain, name_host)
>+ if string.match(name_host, '^lua_cpcall') ~= nil then
>+ -- Any C function is present on both the C and the Lua
>+ -- stacks. It is more convenient to get its info from the
>+ -- host stack, since it has information about child frames.
>+ table.remove(event.lua.callchain)
>+ end
>+
>+ if string.match(name_host, '^lua_p?call') ~= nil then
>+ insert_lua_callchain(callchain, event.lua)
>+ end
>+
>+ end
>+ return table.concat(callchain, ';') .. ';'
>+end
>+
> local function parse_event(reader, events, symbols)
>   local event = new_event()
> 
>@@ -171,8 +226,9 @@ local function parse_event(reader, events, symbols)
>   event.lua.vmstate = vmstate
> 
>   event_parsers[vmstate](reader, event, symbols)
>-
>- table.insert(events, event)
>+ local callchain = merge(event)
>+ local cur_cnt = events[callchain]
>+ events[callchain] = (cur_cnt or 0) + 1
>   return true
> end
> 
>@@ -203,4 +259,5 @@ function M.parse(reader, symbols)
>   return events
> end
> 
>+
> return M
>diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua
>index d5baa534..098f58ec 100644
>--- a/tools/utils/avl.lua
>+++ b/tools/utils/avl.lua
>@@ -86,7 +86,7 @@ function M.insert(node, key, value)
>   elseif key > node.key then
>     node.right = M.insert(node.right, key, value)
>   else
>- table.insert(node.value, value)
>+ node.value = value
>   end
> 
>   update_height(node)
>diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
>index c26a9e8c..7f6c78f0 100644
>--- a/tools/utils/symtab.lua
>+++ b/tools/utils/symtab.lua
>@@ -176,7 +176,7 @@ function M.demangle(symtab, loc)
>   local key, value = avl.floor(symtab.cfunc, addr)
> 
>   if key then
>- return string_format("%s:%#x", value[gen].name, key)
>+ return string_format("%s:%#x", value.name, key)
>   end
> 
>   return string_format("CFUNC %#x", addr)
>--
>2.40.1
 

[-- Attachment #2: Type: text/html, Size: 17436 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint
  2023-05-24 20:36 ` Maxim Kokryashkin via Tarantool-patches
@ 2023-05-25  9:58   ` Sergey Kaplun via Tarantool-patches
  0 siblings, 0 replies; 3+ messages in thread
From: Sergey Kaplun via Tarantool-patches @ 2023-05-25  9:58 UTC (permalink / raw)
  To: Maxim Kokryashkin; +Cc: Maxim Kokryashkin, tarantool-patches

Hi, Maxim!
Thanks for the explanations and fixes!
LGTM!

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-05-25 10:02 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-24 20:35 [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint Maxim Kokryashkin via Tarantool-patches
2023-05-24 20:36 ` Maxim Kokryashkin via Tarantool-patches
2023-05-25  9:58   ` Sergey Kaplun via Tarantool-patches

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox