From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 045F66EC55; Fri, 23 Jul 2021 15:41:00 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 045F66EC55 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1627044060; bh=nu4ucOhzronBHxqNGpMf5E+0RDvcPzOr57YXhtK8oDE=; h=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=UeMhRp77G79SVsPX6Thv2fmXFiMrn0qI7FnIk9nsAI7zBBdLCp2t6bIFv2YWPebR5 zwcO9QGBAMjq790pjcg/jQX8nzmC+/RwNRsmPwDQ157aWtoUMkna9tEMbnVMbtxsHz CRAKvUQw5bunBimxqnhQ344b2+saPAhDAEvaYYp0= Received: from mail-lj1-f171.google.com (mail-lj1-f171.google.com [209.85.208.171]) (using TLSv1.3 with cipher TLS_AES_128_GCM_SHA256 (128/128 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 4F3FD6EC5C for ; Fri, 23 Jul 2021 15:40:00 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 4F3FD6EC5C Received: by mail-lj1-f171.google.com with SMTP id m9so1508896ljp.7 for ; Fri, 23 Jul 2021 05:40:00 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=PIAv1VlDtx6HqISCuRRaGJcqo6pCXTK2m8m3/sR9xCQ=; b=CwdTEsDanyqQ+C4w7oBGFZuXspByd+4zjx2NAIHSSXQKzxGzxy+v7iHLCB1Hb50vf6 QLmrLlOMdEi8HmnS5gxKhlRonyBwOYop43D1227JHkuxskOL8gXJiiUsfkysL28snV23 ya4Q/R2seqEkVBkXTyjPRTHK0GxRsob6KWnwBaJFqXtxPEN0Dw5CRX95l3vG8zfKdOqp wAk6WJhoS6+zJyydyWb4YRSCuvjRwFG+AQ424JdJ+Z0DTalNgO63LJBZTFbXotx+957Z sPQItFZGmJpJMhbIaHi05z/GZ7t0qXR1OzhkTZuk5y7spkT4b9V5ZLbAtR5Jjc27yIYW DYfA== X-Gm-Message-State: AOAM530s+qRjXoyy4xGqPXnts2Qe/FqiZmz3zBa1M04kZFaF7BcfXBKv 8HGrH2VHVgRpk/ReJQ07C82TMZZhofwCo/iW X-Google-Smtp-Source: ABdhPJz0ueHR4qXQSGhOOoAYHh/spbS188qSLPCVznD1iu/deptezEXn79YSwcAKEtDttoQMN28ZNQ== X-Received: by 2002:a2e:b0f6:: with SMTP id h22mr3391397ljl.274.1627043999620; Fri, 23 Jul 2021 05:39:59 -0700 (PDT) Received: from localhost.localdomain ([2a00:1370:8131:3d05:9ca1:57e9:6bb:c500]) by smtp.gmail.com with ESMTPSA id z23sm2224545lfg.153.2021.07.23.05.39.58 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Fri, 23 Jul 2021 05:39:59 -0700 (PDT) X-Google-Original-From: Maxim Kokryashkin To: tarantool-patches@dev.tarantool.org, imun@tarantool.org, skaplun@tarantool.org Date: Fri, 23 Jul 2021 15:39:51 +0300 Message-Id: <8cf56790469a71a3735edbd9116a12538781cb95.1627043674.git.m.kokryashkin@tarantool.org> X-Mailer: git-send-email 2.32.0 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH 2/3] memprof: update memprof parser X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Maxim Kokryashkin via Tarantool-patches Reply-To: Maxim Kokryashkin Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This commit introduces demangling of C symbols to memprof parser. As symbol table format has changed, parser needed to be updated too. Now the parser supports new symbol table entries, containing data about shared objects, that were loaded at the moment of data collection. Secondly, we needed a way to verify, that .so libraries that we encounter during the process of parsing are the same libraires we have encountered during the process of a data collection. A full CRC32 hash of .so file contents is used for that purpose. C symbols resolution is implemented with dladdr, which resolves symbols well only if they are loaded into the dynamic symbol table. Hence, LuaJIT now has to be compiled with -rdynamic option, which forces the dynamic linker to add all the symbols from loaded .so libraries to the dynamic symbol table. Part of tarantool/tarantool#5813 --- CMakeLists.txt | 2 + tools/CMakeLists.txt | 2 + tools/utils/hash.lua | 99 ++++++++++++++++++++++++++++++++++++ tools/utils/symtab.lua | 112 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 210 insertions(+), 5 deletions(-) create mode 100644 tools/utils/hash.lua diff --git a/CMakeLists.txt b/CMakeLists.txt index 5348e043..08243f11 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,6 +81,8 @@ endif() # --- Compilation flags setup -------------------------------------------------- +AppendFlags(TARGET_C_FLAGS -rdynamic) + if(NOT CMAKE_INSTALL_PREFIX STREQUAL "/usr/local") AppendFlags(TARGET_C_FLAGS -DLUA_ROOT='"${CMAKE_INSTALL_PREFIX}"') endif() diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 61830e44..a70be4ea 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -32,6 +32,7 @@ else() memprof.lua utils/bufread.lua utils/symtab.lua + utils/hash.lua ) list(APPEND LUAJIT_TOOLS_DEPS tools-parse-memprof) @@ -48,6 +49,7 @@ else() install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/utils/bufread.lua ${CMAKE_CURRENT_SOURCE_DIR}/utils/symtab.lua + ${CMAKE_CURRENT_SOURCE_DIR}/utils/hash.lua DESTINATION ${LUAJIT_DATAROOTDIR}/utils PERMISSIONS OWNER_READ OWNER_WRITE diff --git a/tools/utils/hash.lua b/tools/utils/hash.lua new file mode 100644 index 00000000..b6abc89c --- /dev/null +++ b/tools/utils/hash.lua @@ -0,0 +1,99 @@ +local bit = require "bit" + +local M = {} + +local CRC32 = { + 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, + 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, + 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, + 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, + 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, + 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, + 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, + 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, + 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, + 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, + 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, + 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, + 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, + 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, + 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, + 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, + 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, + 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, + 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, + 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, + 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, + 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, + 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, + 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, + 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, + 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, + 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, + 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, + 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, + 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, + 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, + 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, + 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, + 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, + 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, + 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, + 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, + 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, + 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, + 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, + 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, + 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, + 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, + 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, + 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, + 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, + 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, + 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, + 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, + 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, + 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, + 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, + 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, + 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, + 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, + 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, + 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, + 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, + 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, + 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, + 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, + 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, + 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, + 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 +} + +local xor = bit.bxor +local lshift = bit.lshift +local rshift = bit.rshift +local band = bit.band + +function M.crc32(str) + str = tostring(str) + local len = string.len(str) + local crc = 0 + local i = 1 + + while len > 0 do + local byte = string.byte(str, i) + i = i + 1 + len = len - 1 + + local tab_idx = band(xor(rshift(crc, 24), byte), 0xFF) + 1 + crc = xor(band(lshift(crc, 8), 2^32 - 1), CRC32[tab_idx]) + end + + -- dirty hack for bitop return number < 0 + if crc < 0 then crc = crc + 2 ^ 32 end + + return crc +end + + +return M diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua index 3ed1dd13..c5724d80 100644 --- a/tools/utils/symtab.lua +++ b/tools/utils/symtab.lua @@ -5,16 +5,57 @@ -- Copyright (C) 2015-2019 IPONWEB Ltd. local bit = require "bit" +local io = require "io" +local hash = require "utils.hash" local band = bit.band local string_format = string.format +local ffi = require "ffi" +local dl = ffi.load "dl" + +ffi.cdef[[ + typedef struct { + const char *dli_fname; + void *dli_fbase; + const char *dli_sname; + void *dli_saddr; + } Dl_info; + + typedef struct + { + uint32_t p_type; + uint32_t p_flags; + uint64_t p_offset; + uint64_t p_vaddr; + uint64_t p_paddr; + uint64_t p_filesz; + uint64_t p_memsz; + uint64_t p_align; + } Elf64_Phdr; + + struct dl_phdr_info { + uint64_t dlpi_addr; + const char *dlpi_name; + const Elf64_Phdr *dlpi_phdr; + uint16_t dlpi_phnum; + }; + + void *dlopen(const char *filename, int flags); + int dlclose(void *handle); + int dladdr(const void *addr, Dl_info *info); + int dl_iterate_phdr(int (*callback) (struct dl_phdr_info *info, size_t size, void *data), void *data); +]] + local LJS_MAGIC = "ljs" local LJS_CURRENT_VERSION = 1 local LJS_EPILOGUE_HEADER = 0x80 local LJS_SYMTYPE_MASK = 0x03 local SYMTAB_LFUNC = 0 +local SYMTAB_SO = 1 + +local RTLD_NOW = 0x00002 local M = {} @@ -24,18 +65,59 @@ local function parse_sym_lfunc(reader, symtab) local sym_chunk = reader:read_string() local sym_line = reader:read_uleb128() - symtab[sym_addr] = { + symtab[SYMTAB_LFUNC][sym_addr] = { source = sym_chunk, linedefined = sym_line, } end +-- Parse a single entry in a symtab: .so library +local function parse_sym_so(reader, symtab) + local path = reader:read_string() + local addr = reader:read_uleb128() + local so_hash = reader:read_uleb128() + + local handle = dl.dlopen(path, RTLD_NOW); + + if handle == nil then + return + end + + local file = io.open(path, "rb") + + if file == nil then + dl.dlclose(handle); + return + end + + local content = file:read("*a") + local size = string.len(content) + file:close() + + if hash.crc32(content) ~= so_hash then + dl.dlclose(handle) + return + end + + symtab[SYMTAB_SO][path] = { + handle = handle, + size = size, + old_addr = addr, + new_addr = ffi.cast("void*", ffi.cast("uint64_t*", handle)[0]) + } +end + local parsers = { [SYMTAB_LFUNC] = parse_sym_lfunc, + [SYMTAB_SO] = parse_sym_so } function M.parse(reader) - local symtab = {} + local symtab = { + [SYMTAB_LFUNC] = {}, + [SYMTAB_SO] = {} + } + local magic = reader:read_octets(3) local version = reader:read_octets(1) @@ -69,7 +151,6 @@ function M.parse(reader) parsers[sym_type](reader, symtab) end end - return symtab end @@ -80,8 +161,29 @@ function M.demangle(symtab, loc) return "INTERNAL" end - if symtab[addr] then - return string_format("%s:%d", symtab[addr].source, loc.line) + if symtab[SYMTAB_LFUNC][addr] then + return string_format("%s:%d", symtab[SYMTAB_LFUNC][addr].source, loc.line) + end + + local pDl_info = ffi.new'Dl_info[1]' + + for _, info in pairs(symtab[SYMTAB_SO]) do + local offset = addr - info.old_addr + + if offset <= info.size and offset > 0 then + local ptr = ffi.cast("void*", offset + ffi.cast('uintptr_t', info.new_addr)) + + if 0 ~= dl.dladdr(ptr, pDl_info) then + local file = ffi.string(pDl_info[0].dli_fname) + + local symbol = 'unresolved symbol' + if pDl_info[0].dli_sname ~= nil then + symbol = ffi.string(pDl_info[0].dli_sname) + end + + return string_format("%s:%s %#x", file, symbol, addr) + end + end end return string_format("CFUNC %#x", addr) -- 2.32.0