From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 5621F48853E; Wed, 24 May 2023 23:35:20 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 5621F48853E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1684960520; bh=eqOWRHcUX/V0dnAJkHZBbWHxJtdX0JFIoYk34whDiWM=; h=To:Date:Subject:List-Id:List-Unsubscribe:List-Archive:List-Post: List-Help:List-Subscribe:From:Reply-To:From; b=SNk/jz4mNiXBm+MX41LZc27KHOwqL32JW4iZytvUvCUmARwOZFoRSgcmhxR9IotH+ X3mukoWOayk2gUrDAR0t0j3PpJPuU6bc/L0IujEEkN+SzG/TtKnS+9JMMmXvv3esJh akujhtLAgY5c9lB6Vcm67EA7PQWCEOU5M1etMZoA= Received: from mail-lj1-f177.google.com (mail-lj1-f177.google.com [209.85.208.177]) (using TLSv1.3 with cipher TLS_AES_128_GCM_SHA256 (128/128 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 1948C29EACD for ; Wed, 24 May 2023 23:35:19 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 1948C29EACD Received: by mail-lj1-f177.google.com with SMTP id 38308e7fff4ca-2af1a7d2f6aso17412491fa.1 for ; Wed, 24 May 2023 13:35:19 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1684960518; x=1687552518; h=content-transfer-encoding:mime-version:message-id:date:subject:cc :to:from:x-gm-message-state:from:to:cc:subject:date:message-id :reply-to; bh=ZlT7W1ii77gdNkFEZYd4hXaxfBv/vLN8dYKGaWhanGk=; b=dnEPpHJAtS/OMinBiO4eSZfbm53hLwMl2bQb/Rx9yWGO3YF6sH2ICuKPvEZJzajVzS sKMggEIk4SDyeqrm4zq4jxHbEiaI16RaNeTCuj5YRb8c0JcfngvqJqzL8Wt1xZPGoraB gNlHgGNMVq1acYGiyw3DCCmmsJu2NuN+i/F5oVdQ7ZYCyXAcXlV6adEMMdb+KREIhj1e UdqoOWrzU+jv8K0+TPCDgzc0ohAsCZpbc1RF3RyL2eqbEjBqYPHyHrn7AylopvSo+Po1 fVuWn8L+MoEw93pXO2NtskRj/UKztGeLubvFpYzY0Ld2qv3duI//N6oYWIGg3H/5cv+K 2Pbg== X-Gm-Message-State: AC+VfDywHmvjHElibO30IuVVOFDiqbgeQYe11flpcdPl9ZljXnVluxXh bhDcgd81Kz+GLag3lSsSw0EX4sB4KuDzJw== X-Google-Smtp-Source: ACHHUZ57owUIgfRcW9ASRy1Xu0UromtcPpLmi1zxTypHbX5BMeg1hn2FZxglJzDMhWhUeGKYDjmwZw== X-Received: by 2002:a2e:818c:0:b0:2ac:7ff1:8470 with SMTP id e12-20020a2e818c000000b002ac7ff18470mr317425ljg.43.1684960517559; Wed, 24 May 2023 13:35:17 -0700 (PDT) Received: from localhost.localdomain (89-179-106-78.broadband.corbina.ru. [89.179.106.78]) by smtp.gmail.com with ESMTPSA id m5-20020a2e9105000000b002a8ac166e55sm2235388ljg.44.2023.05.24.13.35.16 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 24 May 2023 13:35:16 -0700 (PDT) X-Google-Original-From: Maxim Kokryashkin To: tarantool-patches@dev.tarantool.org, skaplun@tarantool.org, sergos@tarantool.org Date: Wed, 24 May 2023 23:35:13 +0300 Message-Id: <20230524203513.246808-1-m.kokryashkin@tarantool.org> X-Mailer: git-send-email 2.40.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit v2] sysprof: improve parser's memory footprint X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Maxim Kokryashkin via Tarantool-patches Reply-To: Maxim Kokryashkin Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This patch reduces sysprof's parser memory footprint, by avoiding reading all callchains before collapsing them. Instead of it, parser merges stacks immediately after reading them and stores counts in a lua table. Also, it fixes a bug in the AVL-tree implementation, which produced unnecessary inserts of values into nodes. --- Changes in v2: - Fixed comments as per review by Sergey. Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-8700-sysprof-parser-refactoring PR: https://github.com/tarantool/tarantool/pull/8703 tools/CMakeLists.txt | 2 - tools/sysprof.lua | 27 +------- tools/sysprof/collapse.lua | 124 ------------------------------------ tools/sysprof/parse.lua | 127 +++++++++++++++++++++++++++---------- tools/utils/avl.lua | 2 +- tools/utils/symtab.lua | 2 +- 6 files changed, 97 insertions(+), 187 deletions(-) delete mode 100755 tools/sysprof/collapse.lua diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index dd7ec6bd..3a919433 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -112,7 +112,6 @@ else() add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS luajit-parse-sysprof sysprof/parse.lua - sysprof/collapse.lua sysprof.lua utils/bufread.lua utils/symtab.lua @@ -121,7 +120,6 @@ else() install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua - ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof PERMISSIONS OWNER_READ OWNER_WRITE diff --git a/tools/sysprof.lua b/tools/sysprof.lua index 1afab195..be2a0565 100644 --- a/tools/sysprof.lua +++ b/tools/sysprof.lua @@ -1,13 +1,10 @@ local bufread = require "utils.bufread" local sysprof = require "sysprof.parse" local symtab = require "utils.symtab" -local misc = require "sysprof.collapse" local stdout, stderr = io.stdout, io.stderr local match, gmatch = string.match, string.gmatch -local split_by_vmstate = false - -- Program options. local opt_map = {} @@ -28,10 +25,6 @@ Supported options are: os.exit(0) end -function opt_map.split() - split_by_vmstate = true -end - -- Print error and exit with error status. local function opterror(...) stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...) @@ -85,28 +78,14 @@ local function parseargs(args) return args[args.argn] end -local function traverse_calltree(node, prefix) - if node.is_leaf then - print(prefix..' '..node.count) - end - - local sep_prefix = #prefix == 0 and prefix or prefix..';' - - for name,child in pairs(node.children) do - traverse_calltree(child, sep_prefix..name) - end -end - local function dump(inputfile) local reader = bufread.new(inputfile) - local symbols = symtab.parse(reader) - local events = sysprof.parse(reader, symbols) - local calltree = misc.collapse(events, symbols, split_by_vmstate) - - traverse_calltree(calltree, '') + for stack, count in pairs(events) do + print(stack, count) + end os.exit(0) end diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua deleted file mode 100755 index 3d83d5ea..00000000 --- a/tools/sysprof/collapse.lua +++ /dev/null @@ -1,124 +0,0 @@ -local parse = require "sysprof.parse" -local vmdef = require "jit.vmdef" -local symtab = require "utils.symtab" - -local VMST_NAMES = { - [parse.VMST.INTERP] = "VMST_INTERP", - [parse.VMST.LFUNC] = "VMST_LFUNC", - [parse.VMST.FFUNC] = "VMST_FFUNC", - [parse.VMST.CFUNC] = "VMST_CFUNC", - [parse.VMST.GC] = "VMST_GC", - [parse.VMST.EXIT] = "VMST_EXIT", - [parse.VMST.RECORD] = "VMST_RECORD", - [parse.VMST.OPT] = "VMST_OPT", - [parse.VMST.ASM] = "VMST_ASM", - [parse.VMST.TRACE] = "VMST_TRACE", -} - -local M = {} - -local function new_node(name, is_leaf) - return { - name = name, - count = 0, - is_leaf = is_leaf, - children = {} - } -end - --- insert new child into a node (or increase counter in existing one) -local function insert(name, node, is_leaf) - if node.children[name] == nil then - node.children[name] = new_node(name, is_leaf) - end - - local child = node.children[name] - child.count = child.count + 1 - - return child -end - -local function insert_lua_callchain(chain, lua, symbols) - local ins_cnt = 0 - for _,fr in pairs(lua.callchain) do - local name_lua - - ins_cnt = ins_cnt + 1 - if fr.type == parse.FRAME.FFUNC then - name_lua = vmdef.ffnames[fr.ffid] - else - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - line = fr.line, - gen = fr.gen - }) - if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and - lua.trace.line == fr.line then - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - traceno = lua.trace.traceno, - gen = fr.gen - }) - end - - if fr.type == parse.FRAME.CFUNC then - -- C function encountered, the next chunk - -- of frames is located on the C stack. - break - end - end - - table.insert(chain, 1, { name = name_lua }) - end - table.remove(lua.callchain, ins_cnt) -end - --- merge lua and host callchains into one callchain representing --- transfer of control -local function merge(event, symbols, sep_vmst) - local cc = {} - - for _,h_fr in pairs(event.host.callchain) do - local name_host = symtab.demangle(symbols, { - addr = h_fr.addr, - gen = h_fr.gen - }) - table.insert(cc, 1, { name = name_host }) - - if string.match(name_host, '^lua_cpcall') ~= nil then - -- Any C function is present on both the C and the Lua - -- stacks. It is more convenient to get its info from the - -- host stack, since it has information about child frames. - table.remove(event.lua.callchain, 1) - end - - if string.match(name_host, '^lua_p?call') ~= nil then - insert_lua_callchain(cc, event.lua, symbols) - end - - end - - if sep_vmst == true then - table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] }) - end - - return cc -end - --- Collapse all the events into call tree -function M.collapse(events, symbols, sep_vmst) - local root = new_node('root', false) - - for _,ev in pairs(events) do - local callchain = merge(ev, symbols, sep_vmst) - local curr_node = root - for i=#callchain,1,-1 do - curr_node = insert(callchain[i].name, curr_node, false) - end - insert('', curr_node, true) - end - - return root -end - -return M diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua index 5b52f104..806297eb 100755 --- a/tools/sysprof/parse.lua +++ b/tools/sysprof/parse.lua @@ -2,6 +2,7 @@ -- The format spec can be found in . local symtab = require "utils.symtab" +local vmdef = require "jit.vmdef" local string_format = string.format @@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2 local M = {} -M.VMST = { +local VMST = { INTERP = 0, LFUNC = 1, FFUNC = 2, @@ -25,13 +26,14 @@ M.VMST = { } -M.FRAME = { +local FRAME = { LFUNC = 1, CFUNC = 2, FFUNC = 3, BOTTOM = 0x80 } + local STREAM_END = 0x80 local SYMTAB_LFUNC_EVENT = 10 local SYMTAB_CFUNC_EVENT = 11 @@ -54,42 +56,40 @@ local function new_event() } end -local function parse_lfunc(reader, event, symbols) +local function parse_lfunc(reader, symbols) local addr = reader:read_uleb128() local line = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr, line = line }) - loc.type = M.FRAME.LFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.LFUNC + return symtab.demangle(symbols, loc) end -local function parse_ffunc(reader, event, _) +local function parse_ffunc(reader, _) local ffid = reader:read_uleb128() - table.insert(event.lua.callchain, 1, { - type = M.FRAME.FFUNC, - ffid = ffid, - }) + return vmdef.ffnames[ffid] end -local function parse_cfunc(reader, event, symbols) +local function parse_cfunc(reader, symbols) local addr = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr }) - loc.type = M.FRAME.CFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.CFUNC + return symtab.demangle(symbols, loc) end local frame_parsers = { - [M.FRAME.LFUNC] = parse_lfunc, - [M.FRAME.FFUNC] = parse_ffunc, - [M.FRAME.CFUNC] = parse_cfunc + [FRAME.LFUNC] = parse_lfunc, + [FRAME.FFUNC] = parse_ffunc, + [FRAME.CFUNC] = parse_cfunc } local function parse_lua_callchain(reader, event, symbols) while true do local frame_header = reader:read_octet() - if frame_header == M.FRAME.BOTTOM then + if frame_header == FRAME.BOTTOM then break end - frame_parsers[frame_header](reader, event, symbols) + local name = frame_parsers[frame_header](reader, symbols) + table.insert(event.lua.callchain, 1, {name = name, type = frame_header}) end end @@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols) while addr ~= 0 do local loc = symtab.loc(symbols, { addr = addr }) - table.insert(event.host.callchain, 1, loc) + table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc)) addr = reader:read_uleb128() end end @@ -108,10 +108,20 @@ end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- local function parse_trace_callchain(reader, event, symbols) - event.lua.trace.traceno = reader:read_uleb128() - event.lua.trace.addr = reader:read_uleb128() - event.lua.trace.line = reader:read_uleb128() - event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen + local loc = {} + loc.traceno = reader:read_uleb128() + loc.addr = reader:read_uleb128() + loc.line = reader:read_uleb128() + + local gen = symtab.loc(symbols, loc).gen + local name_lua = symtab.demangle(symbols, { + addr = loc.addr, + traceno = loc.traceno, + gen = gen + }) + event.lua.trace = loc + event.lua.trace.gen = gen + event.lua.trace.name = name_lua end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- @@ -143,18 +153,63 @@ local function parse_symtab(reader, symbols, vmstate) end local event_parsers = { - [M.VMST.INTERP] = parse_host_only, - [M.VMST.LFUNC] = parse_lua_host, - [M.VMST.FFUNC] = parse_lua_host, - [M.VMST.CFUNC] = parse_lua_host, - [M.VMST.GC] = parse_host_only, - [M.VMST.EXIT] = parse_host_only, - [M.VMST.RECORD] = parse_host_only, - [M.VMST.OPT] = parse_host_only, - [M.VMST.ASM] = parse_host_only, - [M.VMST.TRACE] = parse_trace, + [VMST.INTERP] = parse_host_only, + [VMST.LFUNC] = parse_lua_host, + [VMST.FFUNC] = parse_lua_host, + [VMST.CFUNC] = parse_lua_host, + [VMST.GC] = parse_host_only, + [VMST.EXIT] = parse_host_only, + [VMST.RECORD] = parse_host_only, + [VMST.OPT] = parse_host_only, + [VMST.ASM] = parse_host_only, + [VMST.TRACE] = parse_trace, } +local function insert_lua_callchain(chain, lua) + local ins_cnt = 0 + local name_lua + for _, fr in ipairs(lua.callchain) do + ins_cnt = ins_cnt + 1 + if fr.type == FRAME.CFUNC then + -- C function encountered, the next chunk + -- of frames is located on the C stack. + break + end + name_lua = fr.name + + if fr.type == FRAME.LFUNC + and lua.trace.traceno ~= nil + and lua.trace.addr == fr.addr + and lua.trace.line == fr.line + then + name_lua = lua.trace.name + end + + table.insert(chain, name_lua) + end + table.remove(lua.callchain, ins_cnt) +end + +local function merge(event) + local callchain = {} + + for _, name_host in ipairs(event.host.callchain) do + table.insert(callchain, name_host) + if string.match(name_host, '^lua_cpcall') ~= nil then + -- Any C function is present on both the C and the Lua + -- stacks. It is more convenient to get its info from the + -- host stack, since it has information about child frames. + table.remove(event.lua.callchain) + end + + if string.match(name_host, '^lua_p?call') ~= nil then + insert_lua_callchain(callchain, event.lua) + end + + end + return table.concat(callchain, ';') .. ';' +end + local function parse_event(reader, events, symbols) local event = new_event() @@ -171,8 +226,9 @@ local function parse_event(reader, events, symbols) event.lua.vmstate = vmstate event_parsers[vmstate](reader, event, symbols) - - table.insert(events, event) + local callchain = merge(event) + local cur_cnt = events[callchain] + events[callchain] = (cur_cnt or 0) + 1 return true end @@ -203,4 +259,5 @@ function M.parse(reader, symbols) return events end + return M diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua index d5baa534..098f58ec 100644 --- a/tools/utils/avl.lua +++ b/tools/utils/avl.lua @@ -86,7 +86,7 @@ function M.insert(node, key, value) elseif key > node.key then node.right = M.insert(node.right, key, value) else - table.insert(node.value, value) + node.value = value end update_height(node) diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua index c26a9e8c..7f6c78f0 100644 --- a/tools/utils/symtab.lua +++ b/tools/utils/symtab.lua @@ -176,7 +176,7 @@ function M.demangle(symtab, loc) local key, value = avl.floor(symtab.cfunc, addr) if key then - return string_format("%s:%#x", value[gen].name, key) + return string_format("%s:%#x", value.name, key) end return string_format("CFUNC %#x", addr) -- 2.40.1