From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 9E1DB560EB6; Mon, 31 Jul 2023 23:32:14 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 9E1DB560EB6 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1690835534; bh=OwY14gy51NM0lJMEs2JcS8LwQ1tBuNxwJsX9YB1OyUs=; h=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=Rgs9Zsb4Va68zNxltKSwfvxR0ITeQiVmGorvSxSRASmdKyKavx/uDID1zdVOuC9sf 4UuRfWznbkcKGnFaXITiHTup+UJM35oyoXW7fjc2nZjSOJqLDuKqauF28558Ro5pet TmhLEZFkanei9qf7ZHUeZ1A72pUvopQ7sXjF7ims= Received: from mail-lf1-f43.google.com (mail-lf1-f43.google.com [209.85.167.43]) (using TLSv1.3 with cipher TLS_AES_128_GCM_SHA256 (128/128 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 47466560EB6 for ; Mon, 31 Jul 2023 23:30:24 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 47466560EB6 Received: by mail-lf1-f43.google.com with SMTP id 2adb3069b0e04-4fe0d5f719dso8184479e87.2 for ; Mon, 31 Jul 2023 13:30:24 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1690835423; x=1691440223; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=AYSWu3ZVgKuIccm9U4Qrm8ePks+LicbQC68l8GA1+rI=; b=fYVvtEDfflzPBy9E+QJvAeK1eZ3EQzwLnjMAdjo2Eva2WtZ3YorVdyrWZCd1Vub1Mm RvCBeakIRPTNYJPKSwEgDikOQ+VO12/WwIqqw55ORisYJNkQcuAWISHBxMZNsB5PHcep gzH1PIrPbXTaIaUSUoeXLIaedd5X7OBWh1vi/i+lpFWyc+Ees1Rj34cxYdgHVwE5V3B/ 8zuXnkYuukZnTzU79tVdvBY5YxopzMkPiqIkgYGygDCX4bhj7jazJw8NdQk1q+G58yNs 5p+jPo2eUWYI7Eq6CSqVc+C4E6JY4aKldrYiTaGCjBU2OknsY4UH0ysYNRJo+rN68h6N FaBw== X-Gm-Message-State: ABy/qLY95nf5Ng2LNMYy7nJcmqR0vjioS8/TBN6a86/sdrKkyRV4dx2m t3f5xFEGutG0T8J2XzIS70jd6LH2FdAJlg== X-Google-Smtp-Source: APBJJlE0fSX4+n+uakaSJOutPQ///HUKO2pt2ywwfoNCI63zKTz1Bdhs2uqB8Ove2X7J5t+gJA9vHA== X-Received: by 2002:ac2:44d4:0:b0:4fd:f80e:4977 with SMTP id d20-20020ac244d4000000b004fdf80e4977mr639140lfm.19.1690835423267; Mon, 31 Jul 2023 13:30:23 -0700 (PDT) Received: from localhost.localdomain (95-24-1-220.broadband.corbina.ru. [95.24.1.220]) by smtp.gmail.com with ESMTPSA id r25-20020ac24d19000000b004fe1f1c0ee4sm1814906lfi.82.2023.07.31.13.30.22 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 31 Jul 2023 13:30:22 -0700 (PDT) X-Google-Original-From: Maxim Kokryashkin To: tarantool-patches@dev.tarantool.org, skaplun@tarantool.org, imun@tarantool.org Date: Mon, 31 Jul 2023 23:30:06 +0300 Message-ID: X-Mailer: git-send-email 2.41.0 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit v3 4/4] sysprof: improve parser's memory footprint X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Maxim Kokryashkin via Tarantool-patches Reply-To: Maxim Kokryashkin Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This patch reduces sysprof's parser memory footprint, by avoiding reading all callchains before collapsing them. Instead of it, parser merges stacks immediately after reading them and stores counts in a lua table. The `collapse.lua` module is purged as a result of the patch, but it is left as a stub to keep the integrational testing intact. This stub should be removed in the next series. Resolves tarantool/tarantool#8700 --- tools/CMakeLists.txt | 4 ++ tools/sysprof.lua | 21 +------ tools/sysprof/collapse.lua | 123 +----------------------------------- tools/sysprof/parse.lua | 126 ++++++++++++++++++++++++++----------- 4 files changed, 101 insertions(+), 173 deletions(-) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index dd7ec6bd..1ae559ee 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -112,6 +112,8 @@ else() add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS luajit-parse-sysprof sysprof/parse.lua + # FIXME: This line is not deleted only for the sake of integrational + # testing. It should be deleted in the next series. sysprof/collapse.lua sysprof.lua utils/bufread.lua @@ -121,6 +123,8 @@ else() install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua + # FIXME: This line is not deleted only for the sake of integrational + # testing. It should be deleted in the next series. ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof PERMISSIONS diff --git a/tools/sysprof.lua b/tools/sysprof.lua index 22c724e9..8e110a04 100644 --- a/tools/sysprof.lua +++ b/tools/sysprof.lua @@ -1,7 +1,6 @@ local bufread = require "utils.bufread" local sysprof = require "sysprof.parse" local symtab = require "utils.symtab" -local misc = require "sysprof.collapse" local stdout, stderr = io.stdout, io.stderr local match, gmatch = string.match, string.gmatch @@ -78,28 +77,14 @@ local function parseargs(args) return args[args.argn] end -local function traverse_calltree(node, prefix) - if node.is_leaf then - print(prefix..' '..node.count) - end - - local sep_prefix = #prefix == 0 and prefix or prefix..';' - - for name,child in pairs(node.children) do - traverse_calltree(child, sep_prefix..name) - end -end - local function dump(inputfile) local reader = bufread.new(inputfile) - local symbols = symtab.parse(reader) - local events = sysprof.parse(reader, symbols) - local calltree = misc.collapse(events, symbols) - - traverse_calltree(calltree, '') + for stack, count in pairs(events) do + print(stack, count) + end os.exit(0) end diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua index ac5269ea..9e815e0d 100755 --- a/tools/sysprof/collapse.lua +++ b/tools/sysprof/collapse.lua @@ -1,120 +1,3 @@ -local parse = require "sysprof.parse" -local vmdef = require "jit.vmdef" -local symtab = require "utils.symtab" - -local VMST_NAMES = { - [parse.VMST.INTERP] = "VMST_INTERP", - [parse.VMST.LFUNC] = "VMST_LFUNC", - [parse.VMST.FFUNC] = "VMST_FFUNC", - [parse.VMST.CFUNC] = "VMST_CFUNC", - [parse.VMST.GC] = "VMST_GC", - [parse.VMST.EXIT] = "VMST_EXIT", - [parse.VMST.RECORD] = "VMST_RECORD", - [parse.VMST.OPT] = "VMST_OPT", - [parse.VMST.ASM] = "VMST_ASM", - [parse.VMST.TRACE] = "VMST_TRACE", -} - -local M = {} - -local function new_node(name, is_leaf) - return { - name = name, - count = 0, - is_leaf = is_leaf, - children = {} - } -end - --- insert new child into a node (or increase counter in existing one) -local function insert(name, node, is_leaf) - if node.children[name] == nil then - node.children[name] = new_node(name, is_leaf) - end - - local child = node.children[name] - child.count = child.count + 1 - - return child -end - -local function insert_lua_callchain(chain, lua, symbols) - local ins_cnt = 0 - for _,fr in pairs(lua.callchain) do - local name_lua - - ins_cnt = ins_cnt + 1 - if fr.type == parse.FRAME.FFUNC then - name_lua = vmdef.ffnames[fr.ffid] - else - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - line = fr.line, - gen = fr.gen - }) - if lua.trace.traceno ~= nil and lua.trace.addr == fr.addr and - lua.trace.line == fr.line then - name_lua = symtab.demangle(symbols, { - addr = fr.addr, - traceno = lua.trace.traceno, - gen = fr.gen - }) - end - - if fr.type == parse.FRAME.CFUNC then - -- C function encountered, the next chunk - -- of frames is located on the C stack. - break - end - end - - table.insert(chain, 1, { name = name_lua }) - end - table.remove(lua.callchain, ins_cnt) -end - --- merge lua and host callchains into one callchain representing --- transfer of control -local function merge(event, symbols) - local cc = {} - - for _,h_fr in pairs(event.host.callchain) do - local name_host = symtab.demangle(symbols, { - addr = h_fr.addr, - gen = h_fr.gen - }) - table.insert(cc, 1, { name = name_host }) - - if string.match(name_host, '^lua_cpcall') ~= nil then - -- Any C function is present on both the C and the Lua - -- stacks. It is more convenient to get its info from the - -- host stack, since it has information about child frames. - table.remove(event.lua.callchain, 1) - end - - if string.match(name_host, '^lua_p?call') ~= nil then - insert_lua_callchain(cc, event.lua, symbols) - end - - end - - return cc -end - --- Collapse all the events into call tree -function M.collapse(events, symbols) - local root = new_node('root', false) - - for _,ev in pairs(events) do - local callchain = merge(ev, symbols) - local curr_node = root - for i=#callchain,1,-1 do - curr_node = insert(callchain[i].name, curr_node, false) - end - insert('', curr_node, true) - end - - return root -end - -return M +-- FIXME: This line is not deleted only for the sake of +-- integrational testing. It should be deleted in the +-- next series. diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua index 5b52f104..19add4f3 100755 --- a/tools/sysprof/parse.lua +++ b/tools/sysprof/parse.lua @@ -2,6 +2,7 @@ -- The format spec can be found in . local symtab = require "utils.symtab" +local vmdef = require "jit.vmdef" local string_format = string.format @@ -10,7 +11,7 @@ local LJP_CURRENT_VERSION = 2 local M = {} -M.VMST = { +local VMST = { INTERP = 0, LFUNC = 1, FFUNC = 2, @@ -25,13 +26,14 @@ M.VMST = { } -M.FRAME = { +local FRAME = { LFUNC = 1, CFUNC = 2, FFUNC = 3, BOTTOM = 0x80 } + local STREAM_END = 0x80 local SYMTAB_LFUNC_EVENT = 10 local SYMTAB_CFUNC_EVENT = 11 @@ -54,42 +56,40 @@ local function new_event() } end -local function parse_lfunc(reader, event, symbols) +local function parse_lfunc(reader, symbols) local addr = reader:read_uleb128() local line = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr, line = line }) - loc.type = M.FRAME.LFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.LFUNC + return symtab.demangle(symbols, loc) end -local function parse_ffunc(reader, event, _) +local function parse_ffunc(reader, _) local ffid = reader:read_uleb128() - table.insert(event.lua.callchain, 1, { - type = M.FRAME.FFUNC, - ffid = ffid, - }) + return vmdef.ffnames[ffid] end -local function parse_cfunc(reader, event, symbols) +local function parse_cfunc(reader, symbols) local addr = reader:read_uleb128() local loc = symtab.loc(symbols, { addr = addr }) - loc.type = M.FRAME.CFUNC - table.insert(event.lua.callchain, 1, loc) + loc.type = FRAME.CFUNC + return symtab.demangle(symbols, loc) end local frame_parsers = { - [M.FRAME.LFUNC] = parse_lfunc, - [M.FRAME.FFUNC] = parse_ffunc, - [M.FRAME.CFUNC] = parse_cfunc + [FRAME.LFUNC] = parse_lfunc, + [FRAME.FFUNC] = parse_ffunc, + [FRAME.CFUNC] = parse_cfunc } local function parse_lua_callchain(reader, event, symbols) while true do local frame_header = reader:read_octet() - if frame_header == M.FRAME.BOTTOM then + if frame_header == FRAME.BOTTOM then break end - frame_parsers[frame_header](reader, event, symbols) + local name = frame_parsers[frame_header](reader, symbols) + table.insert(event.lua.callchain, 1, {name = name, type = frame_header}) end end @@ -100,7 +100,7 @@ local function parse_host_callchain(reader, event, symbols) while addr ~= 0 do local loc = symtab.loc(symbols, { addr = addr }) - table.insert(event.host.callchain, 1, loc) + table.insert(event.host.callchain, 1, symtab.demangle(symbols, loc)) addr = reader:read_uleb128() end end @@ -108,10 +108,20 @@ end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- local function parse_trace_callchain(reader, event, symbols) - event.lua.trace.traceno = reader:read_uleb128() - event.lua.trace.addr = reader:read_uleb128() - event.lua.trace.line = reader:read_uleb128() - event.lua.trace.gen = symtab.loc(symbols, event.lua.trace).gen + local loc = {} + loc.traceno = reader:read_uleb128() + loc.addr = reader:read_uleb128() + loc.line = reader:read_uleb128() + + local gen = symtab.loc(symbols, loc).gen + local name_lua = symtab.demangle(symbols, { + addr = loc.addr, + traceno = loc.traceno, + gen = gen + }) + event.lua.trace = loc + event.lua.trace.gen = gen + event.lua.trace.name = name_lua end --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-- @@ -143,18 +153,63 @@ local function parse_symtab(reader, symbols, vmstate) end local event_parsers = { - [M.VMST.INTERP] = parse_host_only, - [M.VMST.LFUNC] = parse_lua_host, - [M.VMST.FFUNC] = parse_lua_host, - [M.VMST.CFUNC] = parse_lua_host, - [M.VMST.GC] = parse_host_only, - [M.VMST.EXIT] = parse_host_only, - [M.VMST.RECORD] = parse_host_only, - [M.VMST.OPT] = parse_host_only, - [M.VMST.ASM] = parse_host_only, - [M.VMST.TRACE] = parse_trace, + [VMST.INTERP] = parse_host_only, + [VMST.LFUNC] = parse_lua_host, + [VMST.FFUNC] = parse_lua_host, + [VMST.CFUNC] = parse_lua_host, + [VMST.GC] = parse_host_only, + [VMST.EXIT] = parse_host_only, + [VMST.RECORD] = parse_host_only, + [VMST.OPT] = parse_host_only, + [VMST.ASM] = parse_host_only, + [VMST.TRACE] = parse_trace, } +local function insert_lua_callchain(chain, lua) + local ins_cnt = 0 + local name_lua + for _, fr in ipairs(lua.callchain) do + ins_cnt = ins_cnt + 1 + if fr.type == FRAME.CFUNC then + -- C function encountered, the next chunk + -- of frames is located on the C stack. + break + end + name_lua = fr.name + + if fr.type == FRAME.LFUNC + and lua.trace.traceno ~= nil + and lua.trace.addr == fr.addr + and lua.trace.line == fr.line + then + name_lua = lua.trace.name + end + + table.insert(chain, name_lua) + end + table.remove(lua.callchain, ins_cnt) +end + +local function merge(event) + local callchain = {} + + for _, name_host in ipairs(event.host.callchain) do + table.insert(callchain, name_host) + if string.match(name_host, '^lua_cpcall') ~= nil then + -- Any C function is present on both the C and the Lua + -- stacks. It is more convenient to get its info from the + -- host stack, since it has information about child frames. + table.remove(event.lua.callchain) + end + + if string.match(name_host, '^lua_p?call') ~= nil then + insert_lua_callchain(callchain, event.lua) + end + + end + return table.concat(callchain, ';') .. ';' +end + local function parse_event(reader, events, symbols) local event = new_event() @@ -171,8 +226,9 @@ local function parse_event(reader, events, symbols) event.lua.vmstate = vmstate event_parsers[vmstate](reader, event, symbols) - - table.insert(events, event) + local callchain = merge(event) + local cur_cnt = events[callchain] + events[callchain] = (cur_cnt or 0) + 1 return true end -- 2.41.0