[Tarantool-patches] [PATCH 2/3] memprof: update memprof parser

Maxim Kokryashkin max.kokryashkin at gmail.com
Fri Jul 23 15:39:51 MSK 2021


This commit introduces demangling of C symbols to memprof parser.

As symbol table format has changed, parser needed to be updated too. Now
the parser supports new symbol table entries, containing data about shared
objects, that were loaded at the moment of data collection.

Secondly, we needed a way to verify, that .so libraries that we encounter
during the process of parsing are the same libraires we have encountered during
the process of a data collection. A full CRC32 hash of .so file contents is used
for that purpose.

C symbols resolution is implemented with dladdr, which resolves symbols
well only if they are loaded into the dynamic symbol table. Hence, LuaJIT now has to be
compiled with -rdynamic option, which forces the dynamic linker to add
all the symbols from loaded .so libraries to the dynamic symbol table.

Part of tarantool/tarantool#5813
---
 CMakeLists.txt         |   2 +
 tools/CMakeLists.txt   |   2 +
 tools/utils/hash.lua   |  99 ++++++++++++++++++++++++++++++++++++
 tools/utils/symtab.lua | 112 +++++++++++++++++++++++++++++++++++++++--
 4 files changed, 210 insertions(+), 5 deletions(-)
 create mode 100644 tools/utils/hash.lua

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5348e043..08243f11 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,6 +81,8 @@ endif()
 
 # --- Compilation flags setup --------------------------------------------------
 
+AppendFlags(TARGET_C_FLAGS -rdynamic)
+
 if(NOT CMAKE_INSTALL_PREFIX STREQUAL "/usr/local")
   AppendFlags(TARGET_C_FLAGS -DLUA_ROOT='"${CMAKE_INSTALL_PREFIX}"')
 endif()
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 61830e44..a70be4ea 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -32,6 +32,7 @@ else()
     memprof.lua
     utils/bufread.lua
     utils/symtab.lua
+    utils/hash.lua
   )
   list(APPEND LUAJIT_TOOLS_DEPS tools-parse-memprof)
 
@@ -48,6 +49,7 @@ else()
   install(FILES
       ${CMAKE_CURRENT_SOURCE_DIR}/utils/bufread.lua
       ${CMAKE_CURRENT_SOURCE_DIR}/utils/symtab.lua
+      ${CMAKE_CURRENT_SOURCE_DIR}/utils/hash.lua
     DESTINATION ${LUAJIT_DATAROOTDIR}/utils
     PERMISSIONS
       OWNER_READ OWNER_WRITE
diff --git a/tools/utils/hash.lua b/tools/utils/hash.lua
new file mode 100644
index 00000000..b6abc89c
--- /dev/null
+++ b/tools/utils/hash.lua
@@ -0,0 +1,99 @@
+local bit = require "bit"
+
+local M = {}
+
+local CRC32 = {
+  0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
+  0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
+  0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
+  0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
+  0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9,
+  0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
+  0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011,
+  0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd,
+  0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
+  0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
+  0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81,
+  0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
+  0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49,
+  0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
+  0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
+  0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d,
+  0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae,
+  0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
+  0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
+  0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
+  0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde,
+  0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
+  0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066,
+  0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
+  0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e,
+  0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692,
+  0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
+  0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a,
+  0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
+  0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
+  0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686,
+  0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
+  0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
+  0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
+  0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f,
+  0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
+  0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
+  0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b,
+  0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
+  0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
+  0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7,
+  0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
+  0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f,
+  0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
+  0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
+  0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b,
+  0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f,
+  0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
+  0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
+  0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
+  0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
+  0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
+  0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30,
+  0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
+  0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088,
+  0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
+  0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
+  0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c,
+  0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
+  0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
+  0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0,
+  0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
+  0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668,
+  0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
+}
+
+local xor = bit.bxor
+local lshift = bit.lshift
+local rshift = bit.rshift
+local band = bit.band
+
+function M.crc32(str)
+    str = tostring(str)
+    local len = string.len(str)
+    local crc = 0
+    local i = 1
+
+    while len > 0 do
+        local byte = string.byte(str, i)
+        i = i + 1
+        len = len - 1
+
+        local tab_idx = band(xor(rshift(crc, 24), byte), 0xFF) + 1
+        crc = xor(band(lshift(crc, 8), 2^32 - 1), CRC32[tab_idx])
+    end
+
+    -- dirty hack for bitop return number < 0
+    if crc < 0 then crc = crc + 2 ^ 32 end
+
+    return crc
+end
+
+
+return M
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index 3ed1dd13..c5724d80 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -5,16 +5,57 @@
 -- Copyright (C) 2015-2019 IPONWEB Ltd.
 
 local bit = require "bit"
+local io = require "io"
+local hash = require "utils.hash"
 
 local band = bit.band
 local string_format = string.format
 
+local ffi = require "ffi"
+local dl = ffi.load "dl"
+
+ffi.cdef[[
+  typedef struct {
+    const char *dli_fname;
+    void *dli_fbase;
+    const char *dli_sname;
+    void *dli_saddr;
+  } Dl_info;
+
+  typedef struct
+  {
+    uint32_t        p_type;
+    uint32_t        p_flags;
+    uint64_t        p_offset;
+    uint64_t        p_vaddr;
+    uint64_t        p_paddr;
+    uint64_t        p_filesz;
+    uint64_t        p_memsz;
+    uint64_t        p_align;
+  } Elf64_Phdr;
+
+  struct dl_phdr_info {
+    uint64_t dlpi_addr;
+    const char *dlpi_name;
+    const Elf64_Phdr *dlpi_phdr;
+    uint16_t dlpi_phnum;
+  };
+
+  void *dlopen(const char *filename, int flags);
+  int dlclose(void *handle);
+  int dladdr(const void *addr, Dl_info *info);
+  int dl_iterate_phdr(int (*callback) (struct dl_phdr_info *info, size_t size, void *data), void *data);
+]]
+
 local LJS_MAGIC = "ljs"
 local LJS_CURRENT_VERSION = 1
 local LJS_EPILOGUE_HEADER = 0x80
 local LJS_SYMTYPE_MASK = 0x03
 
 local SYMTAB_LFUNC = 0
+local SYMTAB_SO = 1
+
+local RTLD_NOW = 0x00002
 
 local M = {}
 
@@ -24,18 +65,59 @@ local function parse_sym_lfunc(reader, symtab)
   local sym_chunk = reader:read_string()
   local sym_line = reader:read_uleb128()
 
-  symtab[sym_addr] = {
+  symtab[SYMTAB_LFUNC][sym_addr] = {
     source = sym_chunk,
     linedefined = sym_line,
   }
 end
 
+-- Parse a single entry in a symtab: .so library
+local function parse_sym_so(reader, symtab)
+  local path = reader:read_string()
+  local addr = reader:read_uleb128()
+  local so_hash = reader:read_uleb128()
+
+  local handle = dl.dlopen(path, RTLD_NOW);
+
+  if handle == nil then
+    return
+  end
+
+  local file = io.open(path, "rb")
+
+  if file == nil then
+    dl.dlclose(handle);
+    return
+  end
+
+  local content = file:read("*a")
+  local size = string.len(content)
+  file:close()
+
+  if hash.crc32(content) ~= so_hash then
+    dl.dlclose(handle)
+    return
+  end
+
+  symtab[SYMTAB_SO][path] = {
+    handle = handle,
+    size = size,
+    old_addr = addr,
+    new_addr = ffi.cast("void*", ffi.cast("uint64_t*", handle)[0])
+  }
+end
+
 local parsers = {
   [SYMTAB_LFUNC] = parse_sym_lfunc,
+  [SYMTAB_SO] = parse_sym_so
 }
 
 function M.parse(reader)
-  local symtab = {}
+  local symtab = {
+    [SYMTAB_LFUNC] = {},
+    [SYMTAB_SO] = {}
+  }
+
   local magic = reader:read_octets(3)
   local version = reader:read_octets(1)
 
@@ -69,7 +151,6 @@ function M.parse(reader)
       parsers[sym_type](reader, symtab)
     end
   end
-
   return symtab
 end
 
@@ -80,8 +161,29 @@ function M.demangle(symtab, loc)
     return "INTERNAL"
   end
 
-  if symtab[addr] then
-    return string_format("%s:%d", symtab[addr].source, loc.line)
+  if symtab[SYMTAB_LFUNC][addr] then
+    return string_format("%s:%d", symtab[SYMTAB_LFUNC][addr].source, loc.line)
+  end
+
+  local pDl_info = ffi.new'Dl_info[1]'
+
+  for _, info in pairs(symtab[SYMTAB_SO]) do
+    local offset = addr - info.old_addr
+
+    if offset <= info.size and offset > 0 then
+      local ptr = ffi.cast("void*", offset + ffi.cast('uintptr_t', info.new_addr))
+
+      if 0 ~= dl.dladdr(ptr, pDl_info) then
+        local file = ffi.string(pDl_info[0].dli_fname)
+
+        local symbol = 'unresolved symbol'
+        if pDl_info[0].dli_sname ~= nil then
+          symbol = ffi.string(pDl_info[0].dli_sname)
+        end
+
+        return string_format("%s:%s %#x", file, symbol, addr)
+      end
+    end
   end
 
   return string_format("CFUNC %#x", addr)
-- 
2.32.0



More information about the Tarantool-patches mailing list