[Tarantool-patches] [PATCH luajit v7 2/2] memprof: enrich symtab with newly loaded symbols

Maxim Kokryashkin max.kokryashkin at gmail.com
Wed Apr 6 16:16:56 MSK 2022


This commit lets memprof extend its symtab when new
C-symbols appear after dlopen. The following data is
stored in event stream for each newly loaded symbol:

| (AEVENT_SYMTAB | ASOURCE_CFUNC) | symbol address | symbol name |
              1 byte                   8 bytes
              magic
              number

Every time an allocation occurs inside a C-function, memprof calls
the `dl_iterate_phdr` to check if the `dlpi_adds` has increased since
the last C-function call. If it has, memprof dumps symbols for last
`dlpi_adds` - `prev_dlpi_adds` libraries. Sometimes it may dump a
library that was already dumped, however, that's a rare case and
the parser handles it well.

Resolves tarantool/tarantool#5813
---
>> static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void *data)
>> @@ -350,23 +358,46 @@ static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void
>> struct symbol_resolver_conf *conf = data;
>> struct lj_wbuf *buf = conf->buf;
>> lua_State *L = conf->L;
>> + const uint8_t header = conf->header;
>> +
>> + const uint32_t lib_cnt_prev = *conf->lib_cnt;
>> + uint32_t lib_cnt = 0;

> Nit: I think that we can just declare `lib_cnt`, wo initialization.
I always initialize variables when declaring them. It's a good habit,
that sometimes really helps during the process of debugging. Ignoring


> Also, I noticed that this test is __really__ long now. May be we should
> use smaller amount of allocations?
Well, it takes a long time not because of the number of allocations, but because it
takes a long time to insert all of the C symbols into the AVL tree.
Anyway, I made the number of allocations less.

> Side note:
> Did you rebase you branch on Mikhail's one with enriched symtab for Lua
> functions and traces? It helps to avoid double review of the same things
> (and possible conflicts) in memprof.h, for example.
> Also, those commits should resolve address clashing when a different
> function allocates add the same address as the old deleted one.
No, I didn't because it is still not merged into master.
Also, it will be nice to add the following test:

> 0) Load 1 library, during memprof running, use functions from it with
>    allocations
> 1) Unload it, load the second one, use functions from it with
>    allocations
> 2) Load the 1st lib again, use functions again
> 3) Check that profile is correct
As Mike says in this[1] letter, it's impossible to unload a library
from Lua side. The only possible way for this to happen is when a
C-library for Lua loads and unloads another library. That case seems to
be really rare, so I don't think we need to test it.

[1]: https://www.freelists.org/post/luajit/BUG-Assertion-failures-when-unloading-and-reloading-the-ffi-package,1

 src/lj_memprof.c                              | 110 ++++++++++++++----
 src/lj_memprof.h                              |   8 +-
 .../gh-5813-resolving-of-c-symbols.test.lua   |  12 +-
 tools/memprof.lua                             |   5 +
 tools/memprof/parse.lua                       |  15 +++
 tools/utils/symtab.lua                        |   4 +
 6 files changed, 128 insertions(+), 26 deletions(-)

diff --git a/src/lj_memprof.c b/src/lj_memprof.c
index 74f9b810..2c6f67e7 100644
--- a/src/lj_memprof.c
+++ b/src/lj_memprof.c
@@ -122,7 +122,7 @@ static uint32_t ghashtab_size(ElfW(Addr) ghashtab)
 }
 
 static void write_c_symtab (ElfW(Sym *) sym, char *strtab, ElfW(Addr) so_addr,
-    size_t sym_cnt, struct lj_wbuf *buf)
+    size_t sym_cnt, const uint8_t header, struct lj_wbuf *buf)
 {
   /*
   ** Index 0 in ELF symtab is used to represent undefined symbols. Hence, we
@@ -142,7 +142,7 @@ static void write_c_symtab (ElfW(Sym *) sym, char *strtab, ElfW(Addr) so_addr,
     if (ELF32_ST_TYPE(sym[sym_index].st_info) == STT_FUNC &&
         sym[sym_index].st_name != 0) {
       char *sym_name = &strtab[sym[sym_index].st_name];
-      lj_wbuf_addbyte(buf, SYMTAB_CFUNC);
+      lj_wbuf_addbyte(buf, header);
       lj_wbuf_addu64(buf, sym[sym_index].st_value + so_addr);
       lj_wbuf_addstring(buf, sym_name);
     }
@@ -150,7 +150,7 @@ static void write_c_symtab (ElfW(Sym *) sym, char *strtab, ElfW(Addr) so_addr,
 }
 
 static int dump_sht_symtab(const char *elf_name, struct lj_wbuf *buf,
-    lua_State *L, const ElfW(Addr) so_addr)
+    lua_State *L, const uint8_t header, const ElfW(Addr) so_addr)
 {
   int status = 0;
 
@@ -244,7 +244,7 @@ static int dump_sht_symtab(const char *elf_name, struct lj_wbuf *buf,
       sizeof(char) * strtab_size && ferror(elf_file) != 0)
     goto error;
 
-  write_c_symtab(sym, strtab, so_addr, sym_cnt, buf);
+  write_c_symtab(sym, strtab, so_addr, sym_cnt, header, buf);
 
   goto end;
 
@@ -264,7 +264,8 @@ static int dump_sht_symtab(const char *elf_name, struct lj_wbuf *buf,
   return status;
 }
 
-static int dump_dyn_symtab(struct dl_phdr_info *info, struct lj_wbuf *buf)
+static int dump_dyn_symtab
+(struct dl_phdr_info *info, const uint8_t header, struct lj_wbuf *buf)
 {
   for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) {
     if (info->dlpi_phdr[header_index].p_type == PT_DYNAMIC) {
@@ -326,7 +327,7 @@ static int dump_dyn_symtab(struct dl_phdr_info *info, struct lj_wbuf *buf)
       ** For more, see https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html
       */
       sym_cnt = ghashtab == 0 ? hashtab[1] : ghashtab_size(ghashtab);
-      write_c_symtab(sym, strtab, info->dlpi_addr, sym_cnt, buf);
+      write_c_symtab(sym, strtab, info->dlpi_addr, sym_cnt, header, buf);
       return 0;
     }
   }
@@ -335,8 +336,13 @@ static int dump_dyn_symtab(struct dl_phdr_info *info, struct lj_wbuf *buf)
 }
 
 struct symbol_resolver_conf {
-  struct lj_wbuf *buf;
-  lua_State *L;
+  struct lj_wbuf *buf; /* Output buffer. */
+  lua_State *L; /* Current Lua state. */
+  const uint8_t header; /* Header for symbol entries to write. */
+
+  uint32_t cur_lib; /* Index of the lib currently dumped. */
+  uint32_t to_dump_cnt; /* Amount of libs to dump. */
+  uint32_t *lib_adds; /* Memprof's counter of libs. */
 };
 
 static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size,
@@ -345,23 +351,48 @@ static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size,
   struct symbol_resolver_conf *conf = data;
   struct lj_wbuf *buf = conf->buf;
   lua_State *L = conf->L;
+  const uint8_t header = conf->header;
+
+  uint32_t lib_cnt = 0;
+
+  /*
+  ** Check that dlpi_adds and dlpi_subs fields are available.
+  ** Assertion was taken from the GLIBC tests:
+  ** https://code.woboq.org/userspace/glibc/elf/tst-dlmodcount.c.html#37
+  */
+  lua_assert(info_size > offsetof(struct dl_phdr_info, dlpi_subs)
+      + sizeof(info->dlpi_subs));
 
-  UNUSED(info_size);
+  lib_cnt = info->dlpi_adds - *conf->lib_adds;
 
   /* Skip vDSO library. */
   if (info->dlpi_addr == getauxval(AT_SYSINFO_EHDR))
     return 0;
 
+  if ((conf->to_dump_cnt = info->dlpi_adds - *conf->lib_adds) == 0)
+    /* No new libraries, stop resolver. */
+    return 1;
+
+  if (conf->cur_lib < lib_cnt - conf->to_dump_cnt) {
+    /* That lib is already dumped, skip it. */
+    ++conf->cur_lib;
+    return 0;
+  }
+
+  if (conf->cur_lib == lib_cnt - conf->to_dump_cnt - 1)
+    /* Last library, update memrpof's lib counter. */
+    *conf->lib_adds = info->dlpi_adds;
+
   /*
   ** Main way: try to open ELF and read SHT_SYMTAB, SHT_STRTAB and SHT_HASH
   ** sections from it.
   */
-  if (dump_sht_symtab(info->dlpi_name, buf, L, info->dlpi_addr) == 0) {
-    /* Empty body. */
+  if (dump_sht_symtab(info->dlpi_name, buf, L, header, info->dlpi_addr) == 0) {
+    ++conf->cur_lib;
   }
   /* First fallback: dump functions only from PT_DYNAMIC segment. */
-  else if(dump_dyn_symtab(info, buf) == 0) {
-    /* Empty body. */
+  else if(dump_dyn_symtab(info, header, buf) == 0) {
+    ++conf->cur_lib;
   }
   /*
   ** Last resort: dump ELF size and address to show .so name for its functions
@@ -371,6 +402,7 @@ static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size,
     lj_wbuf_addbyte(buf, SYMTAB_CFUNC);
     lj_wbuf_addu64(buf, info->dlpi_addr);
     lj_wbuf_addstring(buf, info->dlpi_name);
+    ++conf->cur_lib;
   }
 
   return 0;
@@ -378,7 +410,8 @@ static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size,
 
 #endif /* LUAJIT_OS != LUAJIT_OS_OSX */
 
-static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
+static void dump_symtab(struct lj_wbuf *out, const struct global_State *g,
+    uint32_t *lib_adds)
 {
   const GCRef *iter = &g->gc.root;
   const GCobj *o;
@@ -387,8 +420,14 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
 #if LUAJIT_OS != LUAJIT_OS_OSX
   struct symbol_resolver_conf conf = {
     out,
-    gco2th(gcref(g->cur_L))
+    gco2th(gcref(g->cur_L)),
+    SYMTAB_CFUNC,
+    0,
+    0,
+    lib_adds
   };
+#else
+  UNUSED(lib_adds);
 #endif
 
   /* Write prologue. */
@@ -447,6 +486,7 @@ struct memprof {
   struct alloc orig_alloc; /* Original allocator. */
   struct lj_memprof_options opt; /* Profiling options. */
   int saved_errno; /* Saved errno when profiler deinstrumented. */
+  uint32_t lib_adds; /* Number of libs loaded. Monotonic. */
 };
 
 static struct memprof memprof = {0};
@@ -482,15 +522,43 @@ static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent,
 }
 
 static void memprof_write_cfunc(struct lj_wbuf *out, uint8_t aevent,
-				const GCfunc *fn)
+				const GCfunc *fn, lua_State *L, uint32_t *lib_adds)
 {
+#if LUAJIT_OS != LUAJIT_OS_OSX
+  /* Check if there are any new libs. */
+  struct symbol_resolver_conf conf = {
+    out,
+    L,
+    AEVENT_SYMTAB | ASOURCE_CFUNC,
+    0,
+    0,
+    lib_adds
+  };
+
+  /* XXX: Leaving the `vmstate` unchanged leads to an infinite
+  ** recursion, because allocations inside ELF parser are treated
+  ** as C-side allocations by memrpof. Setting the `vmstate` to
+  ** LJ_VMST_INTERP solves the issue.
+  */
+  global_State *g = G(L);
+  const uint32_t ostate = g->vmstate;
+  g->vmstate = ~LJ_VMST_INTERP;
+
+  dl_iterate_phdr(resolve_symbolnames, &conf);
+
+  /* Restore vmstate. */
+  g->vmstate = ostate;
+#else
+  UNUSED(lib_adds);
+#endif
+
   lj_wbuf_addbyte(out, aevent | ASOURCE_CFUNC);
   lj_wbuf_addu64(out, (uintptr_t)fn->c.f);
 }
 
 static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t aevent,
 				GCfunc *fn, struct lua_State *L,
-				cTValue *frame)
+				cTValue *frame, uint32_t *lib_adds)
 {
   cTValue *pframe = frame_prev(frame);
   GCfunc *pfn = frame_func(pframe);
@@ -503,7 +571,7 @@ static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t aevent,
   if (pfn != NULL && isluafunc(pfn))
     memprof_write_lfunc(out, aevent, pfn, L, frame);
   else
-    memprof_write_cfunc(out, aevent, fn);
+    memprof_write_cfunc(out, aevent, fn, L, lib_adds);
 }
 
 static void memprof_write_func(struct memprof *mp, uint8_t aevent)
@@ -516,9 +584,9 @@ static void memprof_write_func(struct memprof *mp, uint8_t aevent)
   if (isluafunc(fn))
     memprof_write_lfunc(out, aevent, fn, L, NULL);
   else if (isffunc(fn))
-    memprof_write_ffunc(out, aevent, fn, L, frame);
+    memprof_write_ffunc(out, aevent, fn, L, frame, &mp->lib_adds);
   else if (iscfunc(fn))
-    memprof_write_cfunc(out, aevent, fn);
+    memprof_write_cfunc(out, aevent, fn, L, &mp->lib_adds);
   else
     lua_assert(0);
 }
@@ -654,7 +722,7 @@ int lj_memprof_start(struct lua_State *L, const struct lj_memprof_options *opt)
 
   /* Init output. */
   lj_wbuf_init(&mp->out, mp_opt->writer, mp_opt->ctx, mp_opt->buf, mp_opt->len);
-  dump_symtab(&mp->out, mp->g);
+  dump_symtab(&mp->out, mp->g, &mp->lib_adds);
 
   /* Write prologue. */
   lj_wbuf_addn(&mp->out, ljm_header, ljm_header_len);
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
index e7eae4c6..75d9f438 100644
--- a/src/lj_memprof.h
+++ b/src/lj_memprof.h
@@ -71,10 +71,11 @@
 ** prologue       := 'l' 'j' 'm' version reserved
 ** version        := <BYTE>
 ** reserved       := <BYTE> <BYTE> <BYTE>
-** event          := event-alloc | event-realloc | event-free
+** event          := event-alloc | event-realloc | event-free | event-symtab
 ** event-alloc    := event-header loc? naddr nsize
 ** event-realloc  := event-header loc? oaddr osize naddr nsize
 ** event-free     := event-header loc? oaddr osize
+** event-symtab   := event-header sym-addr sym-name
 ** event-header   := <BYTE>
 ** loc            := loc-lua | loc-c | loc-trace
 ** loc-lua        := sym-addr line-no
@@ -88,7 +89,11 @@
 ** naddr          := <ULEB128>
 ** osize          := <ULEB128>
 ** nsize          := <ULEB128>
+** sym-name       := string
 ** epilogue       := event-header
+** string         := string-len string-payload
+** string-len     := <ULEB128>
+** string-payload := <BYTE> {string-len}
 **
 ** <BYTE>   :  A single byte (no surprises here)
 ** <ULEB128>:  Unsigned integer represented in ULEB128 encoding
@@ -107,6 +112,7 @@
 */
 
 /* Allocation events. */
+#define AEVENT_SYMTAB  ((uint8_t)0)
 #define AEVENT_ALLOC   ((uint8_t)1)
 #define AEVENT_FREE    ((uint8_t)2)
 #define AEVENT_REALLOC ((uint8_t)(AEVENT_ALLOC | AEVENT_FREE))
diff --git a/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
index b28a3948..e071d405 100644
--- a/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
+++ b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
@@ -18,6 +18,7 @@ local symtab = require "utils.symtab"
 local testboth = require "resboth"
 local testhash = require "reshash"
 local testgnuhash = require "resgnuhash"
+local memprof = require "memprof.parse"
 
 local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.memprofdata.tmp.bin")
 
@@ -51,13 +52,16 @@ misc.memprof.stop()
 
 local reader = bufread.new(TMP_BINFILE)
 local symbols = symtab.parse(reader)
+local events = memprof.parse(reader)
 
+for addr, event in pairs(events.symtab) do
+  symtab.add_cfunc(symbols, addr, event.name)
+end
+
+-- Static symbol resolution.
 test:ok(tree_contains(symbols.cfunc, "luaopen_os"))
 
--- Dynamic symbols resolution.
-generate_output(teststripped.allocate_string)
-reader = bufread.new(TMP_BINFILE)
-symbols = symtab.parse(reader)
+-- Dynamic symbol resolution. Newly loaded symbol resolution.
 test:ok(tree_contains(symbols.cfunc, "allocate_string"))
 
 -- .hash style symbol table.
diff --git a/tools/memprof.lua b/tools/memprof.lua
index 18b44fdd..805b7e74 100644
--- a/tools/memprof.lua
+++ b/tools/memprof.lua
@@ -101,6 +101,11 @@ local function dump(inputfile)
   local reader = bufread.new(inputfile)
   local symbols = symtab.parse(reader)
   local events = memprof.parse(reader, symbols)
+
+  for addr, event in pairs(events.symtab) do
+    symtab.add_cfunc(symbols, addr, event.name)
+  end
+
   if not leak_only then
     view.profile_info(events, symbols)
   end
diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
index 47dbaee4..181a7e92 100644
--- a/tools/memprof/parse.lua
+++ b/tools/memprof/parse.lua
@@ -17,6 +17,7 @@ local LJM_CURRENT_VERSION = 0x02
 
 local LJM_EPILOGUE_HEADER = 0x80
 
+local AEVENT_SYMTAB = 0
 local AEVENT_ALLOC = 1
 local AEVENT_FREE = 2
 local AEVENT_REALLOC = 3
@@ -41,6 +42,7 @@ local function new_event(loc)
     free = 0,
     alloc = 0,
     primary = {},
+    name = nil
   }
 end
 
@@ -85,6 +87,17 @@ local function parse_location(reader, asource)
   return symtab.id(loc), loc
 end
 
+local function parse_symtab(reader, _, events, _)
+  local id = reader:read_uleb128()
+  local name = reader:read_string()
+
+  if not events[id] then
+    events[id] = new_event(0)
+  end
+
+  events[id].name = name
+end
+
 local function parse_alloc(reader, asource, events, heap)
   local id, loc = parse_location(reader, asource)
 
@@ -142,6 +155,7 @@ local function parse_free(reader, asource, events, heap)
 end
 
 local parsers = {
+  [AEVENT_SYMTAB] = {evname = "symtab", parse = parse_symtab},
   [AEVENT_ALLOC] = {evname = "alloc", parse = parse_alloc},
   [AEVENT_FREE] = {evname = "free", parse = parse_free},
   [AEVENT_REALLOC] = {evname = "realloc", parse = parse_realloc},
@@ -182,6 +196,7 @@ function M.parse(reader)
     realloc = {},
     free = {},
     heap = {},
+    symtab = {}
   }
 
   local magic = reader:read_octets(3)
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index aedb8da0..5197d956 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -159,4 +159,8 @@ function M.demangle(symtab, loc)
   return string_format("CFUNC %#x", addr)
 end
 
+function M.add_cfunc(symtab, addr, name)
+  symtab.cfunc = avl.insert(symtab.cfunc, addr, {name = name})
+end
+
 return M
-- 
2.35.1



More information about the Tarantool-patches mailing list