From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 228026FC83; Fri, 20 Aug 2021 14:11:42 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 228026FC83 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1629457902; bh=Kydn0k21jl1Lh9dCOxNRx69TM2G27bQqjuLxQGVykA4=; h=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=maqSe5x985Ri83fHs1BbuiYzMxqeioYCJoZBY+a+nHyBnoED20LGP9AGdCul4/a5a lYVNIOa4f65vpsrGtAhpHXtfXeAe6y+deoYOmXyI/dBk+ORfRwhGAZ10I+gcYNgt28 AO9AxwIsWtfu4dtNORTv+gQK832Qi+ZTI6VwAjHo= Received: from mail-lj1-f180.google.com (mail-lj1-f180.google.com [209.85.208.180]) (using TLSv1.3 with cipher TLS_AES_128_GCM_SHA256 (128/128 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 37D206DB03 for ; Fri, 20 Aug 2021 14:10:45 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 37D206DB03 Received: by mail-lj1-f180.google.com with SMTP id y6so16716932lje.2 for ; Fri, 20 Aug 2021 04:10:45 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=KF4UPT+r320R8nkISwbE/dF082IH/RAVIn+59Pc+K0g=; b=PXuy+cpWnoI52WCQGSnbbTLLtWxv/mCVrdXyGQ77ohX3U8AoGdp+K8DfA49QyLjkzr Gv+5pPUifASiJhxRFndrJa3gC+RHdjY3qJIZKtNMbPqw4kRsfZlLXQ6VAzIO8WHEIX7T nQAN6zgjueSL+Eaf4f9FUuuoWU+qCvlQKsgDaqrLH9ArS2pPXVaH/mgs82laNdQI0QPl 5gTYFH+ZWRJyoChvU4cwDCmiN6l2uGYZjDDwfZPXJvkkoiZx7IgES1rA7HB9R80/yAB4 UdgFa3hpTWrr/ZU1nWsYY2QzK96WLocROKFZWkCooxxGHJ72BheNYG7yrbDoY8jAO+kM qVzQ== X-Gm-Message-State: AOAM532yfenHg83H4+74Pv+XHBJN4aC/Lo/aFBioLjp3s8F7NvPWkaPE sTixM9mMK4gL9R0YUP3gTJTmvvT7EdhUogLg X-Google-Smtp-Source: ABdhPJxFI7xauqKSGDglfP+SiA+Btcxupu6uEOz9YzXSmBTnNIYFgYzSFLiRyAv2sF/PwDbcfacQDA== X-Received: by 2002:a2e:4c1a:: with SMTP id z26mr15760239lja.9.1629457844433; Fri, 20 Aug 2021 04:10:44 -0700 (PDT) Received: from localhost.localdomain ([93.175.2.170]) by smtp.gmail.com with ESMTPSA id k15sm183951lfv.141.2021.08.20.04.10.43 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Fri, 20 Aug 2021 04:10:43 -0700 (PDT) X-Google-Original-From: Maxim Kokryashkin To: tarantool-patches@dev.tarantool.org, imun@tarantool.org, skaplun@tarantool.org Date: Fri, 20 Aug 2021 14:10:34 +0300 Message-Id: <296488fa8dd8b16401d828b26e32d70c9e07b275.1629457244.git.m.kokryashkin@tarantool.org> X-Mailer: git-send-email 2.32.0 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit v2 3/3] memprof: update memprof parser X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Maxim Kokryashkin via Tarantool-patches Reply-To: Maxim Kokryashkin Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This commit introduces demangling of C symbols to memprof parser. As symbol table format has changed, parser needed to be updated too. Now the parser supports new symbol table entries, containing data about shared objects, that were loaded at the moment of data collection. Secondly, we needed a way to verify, that .so libraries that we encounter during the process of parsing are the same libraires we have encountered during the process of a data collection. A full CRC32 hash of .so file contents is used for that purpose. C symbols resolution is implemented with dladdr, which resolves symbols well only if they are loaded into the dynamic symbol table. Hence, LuaJIT now has to be compiled with -rdynamic option, which forces the dynamic linker to add all the symbols from loaded .so libraries to the dynamic symbol table. Closes tarantool/tarantool#5813 --- CMakeLists.txt | 2 + .../misclib-memprof-lapi.test.lua | 5 +- tools/CMakeLists.txt | 2 + tools/utils/hash.lua | 99 +++++++++++++++ tools/utils/symtab.lua | 114 +++++++++++++++++- 5 files changed, 214 insertions(+), 8 deletions(-) create mode 100644 tools/utils/hash.lua diff --git a/CMakeLists.txt b/CMakeLists.txt index 5348e043..08243f11 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,6 +81,8 @@ endif() # --- Compilation flags setup -------------------------------------------------- +AppendFlags(TARGET_C_FLAGS -rdynamic) + if(NOT CMAKE_INSTALL_PREFIX STREQUAL "/usr/local") AppendFlags(TARGET_C_FLAGS -DLUA_ROOT='"${CMAKE_INSTALL_PREFIX}"') endif() diff --git a/test/tarantool-tests/misclib-memprof-lapi.test.lua b/test/tarantool-tests/misclib-memprof-lapi.test.lua index 06d96b3b..65b6afa6 100644 --- a/test/tarantool-tests/misclib-memprof-lapi.test.lua +++ b/test/tarantool-tests/misclib-memprof-lapi.test.lua @@ -53,6 +53,7 @@ local function generate_output(filename) end local function fill_ev_type(events, symbols, event_type) + local SYMTAB_LFUNC = 0 local ev_type = {} for _, event in pairs(events[event_type]) do local addr = event.loc.addr @@ -61,10 +62,10 @@ local function fill_ev_type(events, symbols, event_type) name = "INTERNAL", num = event.num, } - elseif symbols[addr] then + elseif symbols[SYMTAB_LFUNC][addr] then ev_type[event.loc.line] = { name = string.format( - "%s:%d", symbols[addr].source, symbols[addr].linedefined + "%s:%d", symbols[SYMTAB_LFUNC][addr].source, symbols[SYMTAB_LFUNC][addr].linedefined ), num = event.num, } diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 61830e44..a70be4ea 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -32,6 +32,7 @@ else() memprof.lua utils/bufread.lua utils/symtab.lua + utils/hash.lua ) list(APPEND LUAJIT_TOOLS_DEPS tools-parse-memprof) @@ -48,6 +49,7 @@ else() install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/utils/bufread.lua ${CMAKE_CURRENT_SOURCE_DIR}/utils/symtab.lua + ${CMAKE_CURRENT_SOURCE_DIR}/utils/hash.lua DESTINATION ${LUAJIT_DATAROOTDIR}/utils PERMISSIONS OWNER_READ OWNER_WRITE diff --git a/tools/utils/hash.lua b/tools/utils/hash.lua new file mode 100644 index 00000000..2724e2bb --- /dev/null +++ b/tools/utils/hash.lua @@ -0,0 +1,99 @@ +local bit = require "bit" + +local M = {} + +local CRC32 = { + 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, + 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, + 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, + 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, + 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, + 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, + 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, + 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, + 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, + 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, + 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, + 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, + 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, + 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, + 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, + 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, + 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, + 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, + 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, + 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, + 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, + 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, + 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, + 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, + 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, + 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, + 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, + 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, + 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, + 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, + 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, + 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, + 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, + 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, + 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, + 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, + 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, + 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, + 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, + 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, + 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, + 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, + 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, + 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, + 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, + 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, + 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, + 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, + 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, + 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, + 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, + 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, + 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, + 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, + 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, + 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, + 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, + 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, + 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, + 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, + 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, + 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, + 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, + 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 +} + +local xor = bit.bxor +local lshift = bit.lshift +local rshift = bit.rshift +local band = bit.band + +function M.crc32(str) + str = tostring(str) + local len = string.len(str) + local crc = 0xffffffff + local i = 1 + + while len > 0 do + local byte = string.byte(str, i) + i = i + 1 + len = len - 1 + + local tab_idx = band(xor(rshift(crc, 24), byte), 0xFF) + 1 + crc = xor(band(lshift(crc, 8), 2^32 - 1), CRC32[tab_idx]) + end + + -- dirty hack for bitop return number < 0 + if crc < 0 then crc = crc + 2 ^ 32 end + + return crc +end + + +return M diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua index 3ed1dd13..cb14b516 100644 --- a/tools/utils/symtab.lua +++ b/tools/utils/symtab.lua @@ -5,16 +5,57 @@ -- Copyright (C) 2015-2019 IPONWEB Ltd. local bit = require "bit" +local io = require "io" +local hash = require "utils.hash" local band = bit.band local string_format = string.format +local ffi = require "ffi" +local dl = ffi.load "dl" + +ffi.cdef[[ + typedef struct { + const char *dli_fname; + void *dli_fbase; + const char *dli_sname; + void *dli_saddr; + } Dl_info; + + typedef struct + { + uint32_t p_type; + uint32_t p_flags; + uint64_t p_offset; + uint64_t p_vaddr; + uint64_t p_paddr; + uint64_t p_filesz; + uint64_t p_memsz; + uint64_t p_align; + } Elf64_Phdr; + + struct dl_phdr_info { + uint64_t dlpi_addr; + const char *dlpi_name; + const Elf64_Phdr *dlpi_phdr; + uint16_t dlpi_phnum; + }; + + void *dlopen(const char *filename, int flags); + int dlclose(void *handle); + int dladdr(const void *addr, Dl_info *info); + int dl_iterate_phdr(int (*callback) (struct dl_phdr_info *info, size_t size, void *data), void *data); +]] + local LJS_MAGIC = "ljs" -local LJS_CURRENT_VERSION = 1 +local LJS_CURRENT_VERSION = 2 local LJS_EPILOGUE_HEADER = 0x80 local LJS_SYMTYPE_MASK = 0x03 local SYMTAB_LFUNC = 0 +local SYMTAB_SO = 1 + +local RTLD_NOW = 0x00002 local M = {} @@ -24,18 +65,59 @@ local function parse_sym_lfunc(reader, symtab) local sym_chunk = reader:read_string() local sym_line = reader:read_uleb128() - symtab[sym_addr] = { + symtab[SYMTAB_LFUNC][sym_addr] = { source = sym_chunk, linedefined = sym_line, } end +-- Parse a single entry in a symtab: .so library +local function parse_sym_so(reader, symtab) + local path = reader:read_string() + local addr = reader:read_uleb128() + local so_hash = reader:read_uleb128() + + local handle = dl.dlopen(path, RTLD_NOW); + + if handle == nil then + return + end + + local file = io.open(path, "rb") + + if file == nil then + dl.dlclose(handle); + return + end + + local content = file:read("*a") + local size = string.len(content) + file:close() + + if hash.crc32(content) ~= so_hash then + dl.dlclose(handle) + return + end + + symtab[SYMTAB_SO][path] = { + handle = handle, + size = size, + old_addr = addr, + new_addr = ffi.cast("void*", ffi.cast("uint64_t*", handle)[0]) + } +end + local parsers = { [SYMTAB_LFUNC] = parse_sym_lfunc, + [SYMTAB_SO] = parse_sym_so } function M.parse(reader) - local symtab = {} + local symtab = { + [SYMTAB_LFUNC] = {}, + [SYMTAB_SO] = {} + } + local magic = reader:read_octets(3) local version = reader:read_octets(1) @@ -69,7 +151,6 @@ function M.parse(reader) parsers[sym_type](reader, symtab) end end - return symtab end @@ -80,8 +161,29 @@ function M.demangle(symtab, loc) return "INTERNAL" end - if symtab[addr] then - return string_format("%s:%d", symtab[addr].source, loc.line) + if symtab[SYMTAB_LFUNC][addr] then + return string_format("%s:%d", symtab[SYMTAB_LFUNC][addr].source, loc.line) + end + + local pDl_info = ffi.new'Dl_info[1]' + + for _, info in pairs(symtab[SYMTAB_SO]) do + local offset = addr - info.old_addr + + if offset <= info.size and offset > 0 then + local ptr = ffi.cast("void*", offset + ffi.cast('uintptr_t', info.new_addr)) + + if 0 ~= dl.dladdr(ptr, pDl_info) then + local file = ffi.string(pDl_info[0].dli_fname) + + local symbol = 'unresolved symbol' + if pDl_info[0].dli_sname ~= nil then + symbol = ffi.string(pDl_info[0].dli_sname) + end + + return string_format("%s:%s %#x", file, symbol, addr) + end + end end return string_format("CFUNC %#x", addr) -- 2.32.0