From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id C47AB481862; Thu, 18 May 2023 14:49:34 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org C47AB481862 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1684410574; bh=EdnWJTWH8Zx4eXPVvtGlichXSVOaQt0LwMyLzhH4F1Q=; h=To:Date:Subject:List-Id:List-Unsubscribe:List-Archive:List-Post: List-Help:List-Subscribe:From:Reply-To:From; b=LKePU+ITYtC4fwO4zBbaP96bICL+NI6g4rytORyDGuehmugYUWPFnmzDwFyZ/yayE zLVxjwNKbJ5phM+9oIgxOj7+Xc2aZr9SH2O4fRhIQf9xiowDEfhKWCyv4XEcklNdr9 JAt2qeLKteg6DCP6rGLbGVhf3mE9N7BRpaf0gT2I= Received: from mail-lj1-f174.google.com (mail-lj1-f174.google.com [209.85.208.174]) (using TLSv1.3 with cipher TLS_AES_128_GCM_SHA256 (128/128 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 4F660290BE0 for ; Thu, 18 May 2023 14:49:33 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 4F660290BE0 Received: by mail-lj1-f174.google.com with SMTP id 38308e7fff4ca-2af1ae3a21fso3819281fa.0 for ; Thu, 18 May 2023 04:49:33 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1684410572; x=1687002572; h=content-transfer-encoding:mime-version:message-id:date:subject:cc :to:from:x-gm-message-state:from:to:cc:subject:date:message-id :reply-to; bh=UPLaOOyPlpHvyddn1mzt0wP8pDWucMsYRfnh5Wc4vZo=; b=LXpg2q8a9CoIs4yfLYVvD4KmtF1paGRQPYfAKcThys5Qtjwu3TnqQFOI9zd6YR+A1A frVACAWNSEAmn7yybHeNz4+ST4eRYriUjH+redGHKzQ8jX88VBQY1LHEwswsCvDJkOEx 7EkApS3b6RBYPv7H0jdgefLWpNT9vib2/lQCLqGS8z9DpWqQ0f5CqW6l4v+TYYDBVyBZ AOQXJVTvOfell08aDqlMEYt/MvU/N3HODPUx2JcuVA0oyhvZtR98dw+10Hstjw+AP+P4 f+YVJFB2IQHJ1HkALHq93SYit7KdNmWc9IDzzOaS3VVj6wd5qQsQrIguRVqxdmAInjy8 kJ7Q== X-Gm-Message-State: AC+VfDwRJIEtvglyFT5P24lqTmXzb+FroRnbjgVNH6ZXGESO3uhOTRfL zkDBHGORrdN+azCUwzwTVfvbOaGrJo//aQ== X-Google-Smtp-Source: ACHHUZ7ierlt3tnkm675Pzcnob6B+aCPPDmo1AHvt4kBJ4S17s2AhuGp+twEZjXS+0TfnlCQpjIciQ== X-Received: by 2002:a2e:9c18:0:b0:2ad:bedc:995b with SMTP id s24-20020a2e9c18000000b002adbedc995bmr8670753lji.16.1684410571839; Thu, 18 May 2023 04:49:31 -0700 (PDT) Received: from fckxorg.mail.msk ([2a00:1148:b0ba:16:a3e8:bdc1:dbed:dbc8]) by smtp.gmail.com with ESMTPSA id r15-20020a2e80cf000000b002a784085edbsm255906ljg.99.2023.05.18.04.49.30 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Thu, 18 May 2023 04:49:31 -0700 (PDT) X-Google-Original-From: Maxim Kokryashkin To: tarantool-patches@dev.tarantool.org, skaplun@tarantool.org, sergos@tarantool.org Date: Thu, 18 May 2023 14:49:27 +0300 Message-Id: <20230518114927.277888-1-m.kokryashkin@tarantool.org> X-Mailer: git-send-email 2.40.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit] sysprof: improve parser's memory footprint X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Maxim Kokryashkin via Tarantool-patches Reply-To: Maxim Kokryashkin Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This patch reduces sysprof's parser memory footprint, by avoiding reading all callchains before collapsing them. Instead of it, parser merges stacks immediately after reading them and stores counts in a lua table. Also, it fixes a bug in the AVL-tree implementation, which produced unnecessary inserts of values into nodes. --- Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-noticket-sysprof-parser-refactoring PR: https://github.com/tarantool/tarantool/pull/8670 NB: CI is red in LuaJIT repo because this patch requires changes in the tarantool repo, so please refer to CI runs in PR. tools/CMakeLists.txt | 2 - tools/sysprof.lua | 27 +------- tools/sysprof/collapse.lua | 124 ------------------------------------ tools/sysprof/parse.lua | 125 ++++++++++++++++++++++++++----------- tools/utils/avl.lua | 2 +- tools/utils/symtab.lua | 2 +- 6 files changed, 95 insertions(+), 187 deletions(-) delete mode 100755 tools/sysprof/collapse.lua diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index dd7ec6bd..3a919433 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -112,7 +112,6 @@ else() add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS luajit-parse-sysprof sysprof/parse.lua - sysprof/collapse.lua sysprof.lua utils/bufread.lua utils/symtab.lua @@ -121,7 +120,6 @@ else() install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua - ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof PERMISSIONS OWNER_READ OWNER_WRITE diff --git a/tools/sysprof.lua b/tools/sysprof.lua index 1afab195..be2a0565 100644 --- a/tools/sysprof.lua +++ b/tools/sysprof.lua @@ -1,13 +1,10 @@ local bufread = require "utils.bufread" local sysprof = require "sysprof.parse" local symtab = require "utils.symtab" -local misc = require "sysprof.collapse" local stdout, stderr = io.stdout, io.stderr local match, gmatch = string.match, string.gmatch -local split_by_vmstate = false - -- Program options. local opt_map = {} @@ -28,10 +25,6 @@ Supported options are: os.exit(0) end -function opt_map.split() - split_by_vmstate = true -end - -- Print error and exit with error status. local function opterror(...) stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...) @@ -85,28 +78,14 @@ local function parseargs(args) return args[args.argn] end -local function traverse_calltree(node, prefix) - if node.is_leaf then - print(prefix..' '..node.count) - end - - local sep_prefix = #prefix == 0 and prefix or prefix..';' - - for name,child in pairs(node.children) do - traverse_calltree(child, sep_prefix..name) - end -end - local function dump(inputfile) local reader = bufread.new(inputfile) - local symbols = symtab.parse(reader) - local events = sysprof.parse(reader, symbols) - local calltree = misc.collapse(events, symbols, split_by_vmstate) - - traverse_calltree(calltree, '') + for stack, count in pairs(events) do + print(stack, count) + end os.exit(0) end diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua deleted file mode 100755 index 3d83d5ea..00000000 --- a/tools/sysprof/collapse.lua +++ /dev/null @@ -1,124 +0,0 @@ -local parse = require "sysprof.parse" -local vmdef = require "jit.vmdef" -local symtab = require "utils.symtab" - -local VMST_NAMES = { - [parse.VMST.INTERP] = "VMST_INTERP", - [parse.VMST.LFUNC] = "VMST_LFUNC", - [parse.VMST.FFUNC] = "VMST_FFUNC", - [parse.VMST.CFUNC] = "VMST_CFUNC", - [parse.VMST.GC] = "VMST_GC", - [parse.VMST.EXIT] = "VMST_EXIT", - [parse.VMST.RECORD] = "VMST_RECORD", - [parse.VMST.OPT] = "VMST_OPT", - [parse.VMST.ASM] = "VMST_ASM", - [parse.VMST.TRACE] = "VMST_TRACE", -} - -local M = {} - -local function new_node(name, is_leaf) - return { - name = name, - count = 0, - is_leaf = is_leaf, - children = {} - } -end - --- insert new child into a node (or increase counter in existing one) -local function insert(name, node, is_leaf) - if node.children[name] == nil then - node.children[name] = new_node(name, is_leaf) - end - - local child = node.children[name] - child.count = child.count + 1 - - return child -end - -local function insert_lua_callchain(chain, lua, symbols) - local ins_cnt = 0 - for _,fr in pairs(lua.callchain) do - local name_lua - - ins_cnt = ins_cnt + 1 - if fr.type == parse.FRAME.FFUNC then - name_lua = vmdef.ffnames[fr.ffid] - else - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - line = fr.line, - gen = fr.gen - }) - if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and - lua.trace.line == fr.line then - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - traceno = lua.trace.traceno, - gen = fr.gen - }) - end - - if fr.type == parse.FRAME.CFUNC then - -- C function encountered, the next chunk - -- of frames is located on the C stack. - break - end - end - - table.insert(chain, 1, { name = name_lua }) - end - table.remove(lua.callchain, ins_cnt) -end - --- merge lua and host callchains into one callchain representing --- transfer of control -local function merge(event, symbols, sep_vmst) - local cc = {} - - for _,h_fr in pairs(event.host.callchain) do - local name_host = symtab.demangle(symbols, { - addr = h_fr.addr, - gen = h_fr.gen - }) - table.insert(cc, 1, { name = name_host }) - - if string.match(name_host, '^lua_cpcall') ~= nil then - -- Any C function is present on both the C and the Lua - -- stacks. It is more convenient to get its info from the - -- host stack, since it has information about child frames. - table.remove(event.lua.callchain, 1) - end - - if string.match(name_host, '^lua_p?call') ~= nil then - insert_lua_callchain(cc, event.lua, symbols) - end - - end - - if sep_vmst == true then - table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] }) - end - - return cc -end - --- Collapse all the events into call tree -function M.collapse(events, symbols, sep_vmst) - local root = new_node('root', false) - - for _,ev in pairs(events) do - local callchain = merge(ev, symbols, sep_vmst) - local curr_node = root - for i=#callchain,1,-1 do - curr_node = insert(callchain[i].name, curr_node, false) - end - insert('', curr_node, true) - end - - return root -end - -return M diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua index 5b52f104..3db36472 100755 --- a/tools/sysprof/parse.lua +++ b/tools/sysprof/parse.lua @@ -2,6 +2,7 @@ -- The format spec can be found in . local symtab = require "utils.symtab" +local vmdef = require "jit.vmdef" local string_format = string.format @@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2 local M = {} -M.VMST = { +local VMST = { INTERP = 0, LFUNC = 1, FFUNC = 2, @@ -25,13 +26,14 @@ M.VMST = { } -M.FRAME = { +local FRAME = { LFUNC = 1, CFUNC = 2, FFUNC = 3, BOTTOM = 0x80 } + local STREAM_END = 0x80 local SYMTAB_LFUNC_EVENT = 10 local SYMTAB_CFUNC_EVENT = 11 @@ -54,42 +56,40 @@ local function new_event() } end -local function parse_lfunc(reader, event, symbols) +local function parse_lfunc(reader, symbols) local addr = reader:read_uleb128() local line = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr, line = line }) - loc.type = M.FRAME.LFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.LFUNC + return symtab.demangle(symbols, loc) end -local function parse_ffunc(reader, event, _) +local function parse_ffunc(reader, _) local ffid = reader:read_uleb128() - table.insert(event.lua.callchain, 1, { - type = M.FRAME.FFUNC, - ffid = ffid, - }) + return vmdef.ffnames[ffid] end -local function parse_cfunc(reader, event, symbols) +local function parse_cfunc(reader, symbols) local addr = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr }) - loc.type = M.FRAME.CFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.CFUNC + return symtab.demangle(symbols, loc) end local frame_parsers = { - [M.FRAME.LFUNC] = parse_lfunc, - [M.FRAME.FFUNC] = parse_ffunc, - [M.FRAME.CFUNC] = parse_cfunc + [FRAME.LFUNC] = parse_lfunc, + [FRAME.FFUNC] = parse_ffunc, + [FRAME.CFUNC] = parse_cfunc } local function parse_lua_callchain(reader, event, symbols) while true do local frame_header = reader:read_octet() - if frame_header == M.FRAME.BOTTOM then + if frame_header == FRAME.BOTTOM then break end - frame_parsers[frame_header](reader, event, symbols) + local name = frame_parsers[frame_header](reader, symbols) + table.insert(event.lua.callchain, 1, {name=name, type=frame_header}) end end @@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols) while addr ~= 0 do local loc = symtab.loc(symbols, { addr = addr }) - table.insert(event.host.callchain, 1, loc) + table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc)) addr = reader:read_uleb128() end end @@ -108,10 +108,20 @@ end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- local function parse_trace_callchain(reader, event, symbols) - event.lua.trace.traceno = reader:read_uleb128() - event.lua.trace.addr = reader:read_uleb128() - event.lua.trace.line = reader:read_uleb128() - event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen + local loc = { + traceno = reader:read_uleb128(), + addr = reader:read_uleb128(), + line = reader:read_uleb128() + } + local gen = symtab.loc(symbols, loc).gen + local name_lua = symtab.demangle(symbols, { + addr = loc.addr, + traceno = loc.traceno, + gen = gen + }) + event.lua.trace = loc + event.lua.trace.gen = gen + event.lua.trace.name = name_lua end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- @@ -143,18 +153,60 @@ local function parse_symtab(reader, symbols, vmstate) end local event_parsers = { - [M.VMST.INTERP] = parse_host_only, - [M.VMST.LFUNC] = parse_lua_host, - [M.VMST.FFUNC] = parse_lua_host, - [M.VMST.CFUNC] = parse_lua_host, - [M.VMST.GC] = parse_host_only, - [M.VMST.EXIT] = parse_host_only, - [M.VMST.RECORD] = parse_host_only, - [M.VMST.OPT] = parse_host_only, - [M.VMST.ASM] = parse_host_only, - [M.VMST.TRACE] = parse_trace, + [VMST.INTERP] = parse_host_only, + [VMST.LFUNC] = parse_lua_host, + [VMST.FFUNC] = parse_lua_host, + [VMST.CFUNC] = parse_lua_host, + [VMST.GC] = parse_host_only, + [VMST.EXIT] = parse_host_only, + [VMST.RECORD] = parse_host_only, + [VMST.OPT] = parse_host_only, + [VMST.ASM] = parse_host_only, + [VMST.TRACE] = parse_trace, } +local function insert_lua_callchain(chain, lua) + local ins_cnt = 0 + local name_lua + for _, fr in ipairs(lua.callchain) do + ins_cnt = ins_cnt + 1 + if fr.type == FRAME.CFUNC then + -- C function encountered, the next chunk + -- of frames is located on the C stack. + break + end + name_lua = fr.name + + if fr.type == FRAME.LFUNC and lua.trace.traceno ~= nil and + lua.trace.addr == fr.addr and lua.trace.line == fr.line then + name_lua = lua.trace.name + end + + table.insert(chain, name_lua) + end + table.remove(lua.callchain, ins_cnt) +end + +local function merge(event) + local cc = {} + + for _, name_host in ipairs(event.host.callchain) do + table.insert(cc, name_host) + if string.match(name_host, '^lua_cpcall') ~= nil then + -- Any C function is present on both the C and the Lua + -- stacks. It is more convenient to get its info from the + -- host stack, since it has information about child frames. + table.remove(event.lua.callchain) + end + + if string.match(name_host, '^lua_p?call') ~= nil then + insert_lua_callchain(cc, event.lua) + end + + end + return cc +end + local function parse_event(reader, events, symbols) local event = new_event() @@ -171,8 +223,10 @@ local function parse_event(reader, events, symbols) event.lua.vmstate = vmstate event_parsers[vmstate](reader, event, symbols) - - table.insert(events, event) + local cc = merge(event) + local cc_str = table.concat(cc, ';') .. ';' + local cur_cnt = events[cc_str] + events[cc_str] = (cur_cnt or 0) + 1 return true end @@ -203,4 +257,5 @@ function M.parse(reader, symbols) return events end + return M diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua index d5baa534..098f58ec 100644 --- a/tools/utils/avl.lua +++ b/tools/utils/avl.lua @@ -86,7 +86,7 @@ function M.insert(node, key, value) elseif key > node.key then node.right = M.insert(node.right, key, value) else - table.insert(node.value, value) + node.value = value end update_height(node) diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua index c26a9e8c..7f6c78f0 100644 --- a/tools/utils/symtab.lua +++ b/tools/utils/symtab.lua @@ -176,7 +176,7 @@ function M.demangle(symtab, loc) local key, value = avl.floor(symtab.cfunc, addr) if key then - return string_format("%s:%#x", value[gen].name, key) + return string_format("%s:%#x", value.name, key) end return string_format("CFUNC %#x", addr) -- 2.40.1