From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 389C7595685; Mon, 21 Aug 2023 13:07:42 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 389C7595685 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1692612462; bh=I17dl0cC8kJjJgH+2Hj0f2ncfiQUkZ+ICMpxXS03NOc=; h=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=GU51XQq4LYf+lMo3LarXtMFkWNZA+n/ysMyapxM77uZ+4zq85GIsyi/EwFxwklrpK E8VJWPc1hJEvoq/y4Ybw7n4/Vi+qF7Im9GhVjp1WQXzMa2eHA7M/dy6p2toAISKCpR pZm9E/FQ5u9t9Wln9xSHbcR/QUe2w4Vgy/111if8= Received: from mail-lj1-f178.google.com (mail-lj1-f178.google.com [209.85.208.178]) (using TLSv1.3 with cipher TLS_AES_128_GCM_SHA256 (128/128 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 39EED595682 for ; Mon, 21 Aug 2023 13:06:18 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 39EED595682 Received: by mail-lj1-f178.google.com with SMTP id 38308e7fff4ca-2bcc4347d2dso4503941fa.0 for ; Mon, 21 Aug 2023 03:06:18 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1692612377; x=1693217177; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=zXSdlsphCLFem0F2d0lyLM3RuknqM8o8Ei6BsfLLocM=; b=gOwKTVEHSBmbD3Hv2wN1bnpfsS777/0ticZtqW+j9kptXRYi2l2kN/olhhVP7cdWdB NrljzlmzN/nQl6lZpA5pY9v06xB3O3T4My2hBm1cTVTm2U0yIQroIfitUwTfKSaNA2y5 SH8iKs0pr2UIVQG5F6NVQvQBn3qLcmCXzq5a+9Cn4htnvbT98FXmBp9eIDGjeGq8A3Lx d989TP6gG1EK/bCHV3Z7pvstV5tSJc8aHg2DsTSkOlnr0FllJZAAGc5Ve/W9ppL8Q+7p T6jw72lBzaSutdD5B9qTNhqvJI5yO9jEDFey4B/GecGdZRjeL51X64V77aQ3OW5rDn4g XYMg== X-Gm-Message-State: AOJu0Yzw0lPJhGrdAZrjyB6pCabCkHKLmtcTipWlbNPfIEHs+K5e+J5p rCnUQnUuCIiJVDV15OyZxSVZO9CGb0Iw8r1u X-Google-Smtp-Source: AGHT+IEgF4hxPCeoi2mdVpCDIJvV1Sf+OhLENwYGKalZCtlHYAFs8qqeOmMiu0snJZMQc94IQrRJXA== X-Received: by 2002:a2e:4949:0:b0:2bc:beac:53e7 with SMTP id b9-20020a2e4949000000b002bcbeac53e7mr1509620ljd.14.1692612377086; Mon, 21 Aug 2023 03:06:17 -0700 (PDT) Received: from localhost.localdomain ([94.25.172.123]) by smtp.gmail.com with ESMTPSA id b18-20020a2e8952000000b002b657f10b78sm2216360ljk.58.2023.08.21.03.06.16 (version=TLS1_3 cipher=TLS_CHACHA20_POLY1305_SHA256 bits=256/256); Mon, 21 Aug 2023 03:06:16 -0700 (PDT) To: tarantool-patches@dev.tarantool.org, imun@tarantool.org, skaplun@tarantool.org, m.kokryashkin@tarantool.org Date: Mon, 21 Aug 2023 13:06:04 +0300 Message-Id: <20230821100604.61363-4-max.kokryashkin@gmail.com> X-Mailer: git-send-email 2.39.2 (Apple Git-143) In-Reply-To: <20230821100604.61363-1-max.kokryashkin@gmail.com> References: <20230821100604.61363-1-max.kokryashkin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit v4 3/3] sysprof: improve parser's memory footprint X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Maksim Kokryashkin via Tarantool-patches Reply-To: Maksim Kokryashkin Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" From: Maxim Kokryashkin This patch reduces sysprof's parser memory footprint, by avoiding reading all callchains before collapsing them. Instead of it, parser merges stacks immediately after reading them atnd stores counts in a lua table. The `collapse.lua` module is purged as a result of the patch, but it is left as a stub to keep the integrational testing intact. This stub should be removed in the next series. Resolves tarantool/tarantool#8700 --- Note: the assertion, proposed by Igor wasn't added, since it breaks integrational testing. tools/CMakeLists.txt | 4 ++ tools/sysprof.lua | 21 +------ tools/sysprof/collapse.lua | 123 +----------------------------------- tools/sysprof/parse.lua | 125 ++++++++++++++++++++++++++----------- 4 files changed, 100 insertions(+), 173 deletions(-) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index dd7ec6bd..51ee3877 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -112,6 +112,8 @@ else() add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS luajit-parse-sysprof sysprof/parse.lua + # TODO: This line is not deleted only for the sake of integrational + # testing. It should be deleted in the next series. sysprof/collapse.lua sysprof.lua utils/bufread.lua @@ -121,6 +123,8 @@ else() install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua + # TODO: This line is not deleted only for the sake of integrational + # testing. It should be deleted in the next series. ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof PERMISSIONS diff --git a/tools/sysprof.lua b/tools/sysprof.lua index 22c724e9..8e110a04 100644 --- a/tools/sysprof.lua +++ b/tools/sysprof.lua @@ -1,7 +1,6 @@ local bufread = require "utils.bufread" local sysprof = require "sysprof.parse" local symtab = require "utils.symtab" -local misc = require "sysprof.collapse" local stdout, stderr = io.stdout, io.stderr local match, gmatch = string.match, string.gmatch @@ -78,28 +77,14 @@ local function parseargs(args) return args[args.argn] end -local function traverse_calltree(node, prefix) - if node.is_leaf then - print(prefix..' '..node.count) - end - - local sep_prefix = #prefix == 0 and prefix or prefix..';' - - for name,child in pairs(node.children) do - traverse_calltree(child, sep_prefix..name) - end -end - local function dump(inputfile) local reader = bufread.new(inputfile) - local symbols = symtab.parse(reader) - local events = sysprof.parse(reader, symbols) - local calltree = misc.collapse(events, symbols) - - traverse_calltree(calltree, '') + for stack, count in pairs(events) do + print(stack, count) + end os.exit(0) end diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua index ac5269ea..cac92f1a 100755 --- a/tools/sysprof/collapse.lua +++ b/tools/sysprof/collapse.lua @@ -1,120 +1,3 @@ -local parse = require "sysprof.parse" -local vmdef = require "jit.vmdef" -local symtab = require "utils.symtab" - -local VMST_NAMES = { - [parse.VMST.INTERP] = "VMST_INTERP", - [parse.VMST.LFUNC] = "VMST_LFUNC", - [parse.VMST.FFUNC] = "VMST_FFUNC", - [parse.VMST.CFUNC] = "VMST_CFUNC", - [parse.VMST.GC] = "VMST_GC", - [parse.VMST.EXIT] = "VMST_EXIT", - [parse.VMST.RECORD] = "VMST_RECORD", - [parse.VMST.OPT] = "VMST_OPT", - [parse.VMST.ASM] = "VMST_ASM", - [parse.VMST.TRACE] = "VMST_TRACE", -} - -local M = {} - -local function new_node(name, is_leaf) - return { - name = name, - count = 0, - is_leaf = is_leaf, - children = {} - } -end - --- insert new child into a node (or increase counter in existing one) -local function insert(name, node, is_leaf) - if node.children[name] == nil then - node.children[name] = new_node(name, is_leaf) - end - - local child = node.children[name] - child.count = child.count + 1 - - return child -end - -local function insert_lua_callchain(chain, lua, symbols) - local ins_cnt = 0 - for _,fr in pairs(lua.callchain) do - local name_lua - - ins_cnt = ins_cnt + 1 - if fr.type == parse.FRAME.FFUNC then - name_lua = vmdef.ffnames[fr.ffid] - else - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - line = fr.line, - gen = fr.gen - }) - if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and - lua.trace.line == fr.line then - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - traceno = lua.trace.traceno, - gen = fr.gen - }) - end - - if fr.type == parse.FRAME.CFUNC then - -- C function encountered, the next chunk - -- of frames is located on the C stack. - break - end - end - - table.insert(chain, 1, { name = name_lua }) - end - table.remove(lua.callchain, ins_cnt) -end - --- merge lua and host callchains into one callchain representing --- transfer of control -local function merge(event, symbols) - local cc = {} - - for _,h_fr in pairs(event.host.callchain) do - local name_host = symtab.demangle(symbols, { - addr = h_fr.addr, - gen = h_fr.gen - }) - table.insert(cc, 1, { name = name_host }) - - if string.match(name_host, '^lua_cpcall') ~= nil then - -- Any C function is present on both the C and the Lua - -- stacks. It is more convenient to get its info from the - -- host stack, since it has information about child frames. - table.remove(event.lua.callchain, 1) - end - - if string.match(name_host, '^lua_p?call') ~= nil then - insert_lua_callchain(cc, event.lua, symbols) - end - - end - - return cc -end - --- Collapse all the events into call tree -function M.collapse(events, symbols) - local root = new_node('root', false) - - for _,ev in pairs(events) do - local callchain = merge(ev, symbols) - local curr_node = root - for i=#callchain,1,-1 do - curr_node = insert(callchain[i].name, curr_node, false) - end - insert('', curr_node, true) - end - - return root -end - -return M +-- FIXME: This file is not deleted only for the sake of +-- integrational testing. It should be deleted in the +-- next series. diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua index 5b52f104..92e475a6 100755 --- a/tools/sysprof/parse.lua +++ b/tools/sysprof/parse.lua @@ -2,6 +2,7 @@ -- The format spec can be found in . local symtab = require "utils.symtab" +local vmdef = require "jit.vmdef" local string_format = string.format @@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2 local M = {} -M.VMST = { +local VMST = { INTERP = 0, LFUNC = 1, FFUNC = 2, @@ -25,13 +26,14 @@ M.VMST = { } -M.FRAME = { +local FRAME = { LFUNC = 1, CFUNC = 2, FFUNC = 3, BOTTOM = 0x80 } + local STREAM_END = 0x80 local SYMTAB_LFUNC_EVENT = 10 local SYMTAB_CFUNC_EVENT = 11 @@ -54,42 +56,40 @@ local function new_event() } end -local function parse_lfunc(reader, event, symbols) +local function parse_lfunc(reader, symbols) local addr = reader:read_uleb128() local line = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr, line = line }) - loc.type = M.FRAME.LFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.LFUNC + return symtab.demangle(symbols, loc) end -local function parse_ffunc(reader, event, _) +local function parse_ffunc(reader, _) local ffid = reader:read_uleb128() - table.insert(event.lua.callchain, 1, { - type = M.FRAME.FFUNC, - ffid = ffid, - }) + return vmdef.ffnames[ffid] end -local function parse_cfunc(reader, event, symbols) +local function parse_cfunc(reader, symbols) local addr = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr }) - loc.type = M.FRAME.CFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.CFUNC + return symtab.demangle(symbols, loc) end local frame_parsers = { - [M.FRAME.LFUNC] = parse_lfunc, - [M.FRAME.FFUNC] = parse_ffunc, - [M.FRAME.CFUNC] = parse_cfunc + [FRAME.LFUNC] = parse_lfunc, + [FRAME.FFUNC] = parse_ffunc, + [FRAME.CFUNC] = parse_cfunc } local function parse_lua_callchain(reader, event, symbols) while true do local frame_header = reader:read_octet() - if frame_header == M.FRAME.BOTTOM then + if frame_header == FRAME.BOTTOM then break end - frame_parsers[frame_header](reader, event, symbols) + local name = frame_parsers[frame_header](reader, symbols) + table.insert(event.lua.callchain, 1, {name = name, type = frame_header}) end end @@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols) while addr ~= 0 do local loc = symtab.loc(symbols, { addr = addr }) - table.insert(event.host.callchain, 1, loc) + table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc)) addr = reader:read_uleb128() end end @@ -108,10 +108,20 @@ end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- local function parse_trace_callchain(reader, event, symbols) - event.lua.trace.traceno = reader:read_uleb128() - event.lua.trace.addr = reader:read_uleb128() - event.lua.trace.line = reader:read_uleb128() - event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen + local loc = {} + loc.traceno = reader:read_uleb128() + loc.addr = reader:read_uleb128() + loc.line = reader:read_uleb128() + + local gen = symtab.loc(symbols, loc).gen + local name_lua = symtab.demangle(symbols, { + addr = loc.addr, + traceno = loc.traceno, + gen = gen + }) + event.lua.trace = loc + event.lua.trace.gen = gen + event.lua.trace.name = name_lua end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- @@ -143,18 +153,63 @@ local function parse_symtab(reader, symbols, vmstate) end local event_parsers = { - [M.VMST.INTERP] = parse_host_only, - [M.VMST.LFUNC] = parse_lua_host, - [M.VMST.FFUNC] = parse_lua_host, - [M.VMST.CFUNC] = parse_lua_host, - [M.VMST.GC] = parse_host_only, - [M.VMST.EXIT] = parse_host_only, - [M.VMST.RECORD] = parse_host_only, - [M.VMST.OPT] = parse_host_only, - [M.VMST.ASM] = parse_host_only, - [M.VMST.TRACE] = parse_trace, + [VMST.INTERP] = parse_host_only, + [VMST.LFUNC] = parse_lua_host, + [VMST.FFUNC] = parse_lua_host, + [VMST.CFUNC] = parse_lua_host, + [VMST.GC] = parse_host_only, + [VMST.EXIT] = parse_host_only, + [VMST.RECORD] = parse_host_only, + [VMST.OPT] = parse_host_only, + [VMST.ASM] = parse_host_only, + [VMST.TRACE] = parse_trace, } +local function insert_lua_callchain(chain, lua) + local ins_cnt = 0 + local name_lua + for _, fr in ipairs(lua.callchain) do + ins_cnt = ins_cnt + 1 + if fr.type == FRAME.CFUNC then + -- C function encountered, the next chunk + -- of frames is located on the C stack. + break + end + name_lua = fr.name + + if fr.type == FRAME.LFUNC + and lua.trace.traceno ~= nil + and lua.trace.addr == fr.addr + and lua.trace.line == fr.line + then + name_lua = lua.trace.name + end + + table.insert(chain, name_lua) + end + table.remove(lua.callchain, ins_cnt) +end + +local function merge(event) + local callchain = {} + + for _, name_host in ipairs(event.host.callchain) do + table.insert(callchain, name_host) + if string.match(name_host, '^lua_cpcall') ~= nil then + -- Any C function is present on both the C and the Lua + -- stacks. It is more convenient to get its info from the + -- host stack, since it has information about child frames. + table.remove(event.lua.callchain) + end + + if string.match(name_host, '^lua_p?call') ~= nil then + insert_lua_callchain(callchain, event.lua) + end + + end + return table.concat(callchain, ';') .. ';' +end + local function parse_event(reader, events, symbols) local event = new_event() @@ -171,8 +226,8 @@ local function parse_event(reader, events, symbols) event.lua.vmstate = vmstate event_parsers[vmstate](reader, event, symbols) - - table.insert(events, event) + local callchain = merge(event) + events[callchain] = (events[callchain] or 0) + 1 return true end -- 2.39.2 (Apple Git-143)