[Tarantool-patches] [PATCH luajit v6 2/2] memprof: enrich symtab with newly loaded symbols

Sergey Kaplun skaplun at tarantool.org
Mon Apr 4 11:44:14 MSK 2022


Hi, Maxim!

Thanks for the patch!

Please consider my comments below.

Side note:
Did you rebase you branch on Mikhail's one with enriched symtab for Lua
functions and traces? It helps to avoid double review of the same things
(and possible conflicts) in memprof.h, for example.
Also, those commits should resolve address clashing when a different
function allocates add the same address as the old deleted one.

On 22.03.22, Maxim Kokryashkin wrote:
> This commit lets memprof extend its symtab when new
> C-symbols appear after dlopen. The following data is
> stored in event stream for each newly loaded symbol:
> 
> | (AEVENT_SYMTAB | ASOURCE_CFUNC) | symbol address | symbol name |
>               1 byte                   8 bytes
>               magic
>               number

Please, make commit message more specific. At least add information
about __how exactly__ C library is **detected**, considered as new and
dumped.

> 
> Resolves tarantool/tarantool#5813
> ---
>  src/lj_memprof.c                              | 103 ++++++++++++++----
>  src/lj_memprof.h                              |   8 +-
>  .../gh-5813-resolving-of-c-symbols.test.lua   |  23 ++--
>  tools/memprof.lua                             |   5 +
>  tools/memprof/parse.lua                       |  19 ++++
>  tools/utils/symtab.lua                        |   4 +
>  6 files changed, 130 insertions(+), 32 deletions(-)
> 
> diff --git a/src/lj_memprof.c b/src/lj_memprof.c
> index b1634f91..bd6f94f5 100644
> --- a/src/lj_memprof.c
> +++ b/src/lj_memprof.c

<snipped>

> @@ -343,6 +345,12 @@ static int dump_dyn_symtab(struct dl_phdr_info *info, struct lj_wbuf *buf)
>  struct symbol_resolver_conf {
>    struct lj_wbuf *buf;
>    lua_State *L;
> +  const uint8_t header;
> +
> +  uint32_t cur_lib;
> +  uint32_t lib_cnt_prev;
> +  uint32_t to_dump_cnt;
> +  uint32_t *lib_cnt;

Please add verbose comments for what each field states for.
Is `lib_cnt` incremented monotonically?

>  };
>  
>  static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void *data)
> @@ -350,23 +358,46 @@ static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void
>    struct symbol_resolver_conf *conf = data;
>    struct lj_wbuf *buf = conf->buf;
>    lua_State *L = conf->L;
> +  const uint8_t header = conf->header;
> +
> +  const uint32_t lib_cnt_prev = *conf->lib_cnt;
> +  uint32_t lib_cnt = 0;

Nit: I think that we can just declare `lib_cnt`, wo initialization.

> +
> +  /*
> +  ** Check that dlpi_adds and dlpi_subs fields are available.
> +  ** Assertion was taken from the GLIBC tests:
> +  ** https://code.woboq.org/userspace/glibc/elf/tst-dlmodcount.c.html#37
> +  */
> +  lua_assert(info_size > offsetof(struct dl_phdr_info, dlpi_subs)
> +      + sizeof(info->dlpi_subs));
>  
> -  UNUSED(info_size);
> +  lib_cnt = info->dlpi_adds - info->dlpi_subs;
> +  conf->lib_cnt_prev = *conf->lib_cnt;

Please, add a comment about detection new loads here. I don't get this
part.

>  
>    /* Skip vDSO library. */
>    if (info->dlpi_addr == getauxval(AT_SYSINFO_EHDR))
>      return 0;
>  
> +  if ((conf->to_dump_cnt = info->dlpi_adds - lib_cnt_prev) == 0)
> +    /* No new libraries, stop resolver. */
> +    return 1;
> +
> +  if (conf->cur_lib < lib_cnt - conf->to_dump_cnt) {
> +    /* That lib is already dumped, skip it. */
> +    ++conf->cur_lib;
> +    return 0;
> +  }
> +
>    /*
>    ** Main way: try to open ELF and read SHT_SYMTAB, SHT_STRTAB and SHT_HASH
>    ** sections from it.
>    */

<snipped>

> @@ -383,7 +415,7 @@ static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void
>  
>  #endif /* LUAJIT_OS != LUAJIT_OS_OSX */
>  
> -static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
> +static void dump_symtab(struct lj_wbuf *out, const struct global_State *g, uint32_t *lib_cnt)

Nit: Linewidth is more than 120 symbols.

>  {
>    const GCRef *iter = &g->gc.root;
>    const GCobj *o;
> @@ -392,8 +424,15 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)

<snipped>

> @@ -452,6 +491,7 @@ struct memprof {
>    struct alloc orig_alloc; /* Original allocator. */
>    struct lj_memprof_options opt; /* Profiling options. */
>    int saved_errno; /* Saved errno when profiler deinstrumented. */
> +  uint32_t lib_cnt; /* Number of currently loaded libs. */

Side note: Does it always increase, like `dlpi_adds`?

>  };
>  
>  static struct memprof memprof = {0};
> @@ -487,15 +527,40 @@ static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent,
>  }
>  
>  static void memprof_write_cfunc(struct lj_wbuf *out, uint8_t aevent,
> -				const GCfunc *fn)
> +				const GCfunc *fn, lua_State *L, uint32_t *lib_cnt)
>  {
> +#if LUAJIT_OS != LUAJIT_OS_OSX
> +  /* Check if there are any new libs. */
> +  struct symbol_resolver_conf conf = {
> +    out,
> +    L,
> +    AEVENT_SYMTAB | ASOURCE_CFUNC,
> +    0,
> +    *lib_cnt,
> +    0,
> +    lib_cnt
> +  };
> +
> +  /* Preserve old vmstate. */
> +  global_State *g = G(L);
> +  const uint32_t ostate = g->vmstate;
> +  g->vmstate = ~LJ_VMST_INTERP;

Nit: Please, add a comment why this is necessary.

> +
> +  dl_iterate_phdr(resolve_symbolnames, &conf);
> +
> +  /* Restore vmstate. */
> +  g->vmstate = ostate;
> +#else
> +  UNUSED(lib_cnt);
> +#endif
> +
>    lj_wbuf_addbyte(out, aevent | ASOURCE_CFUNC);
>    lj_wbuf_addu64(out, (uintptr_t)fn->c.f);
>  }

<snipped>

> diff --git a/src/lj_memprof.h b/src/lj_memprof.h
> index 0327a205..ea8f2362 100644
> --- a/src/lj_memprof.h
> +++ b/src/lj_memprof.h

<snipped>

> diff --git a/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
> index 8f20511c..5831a4f0 100644
> --- a/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
> +++ b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
> @@ -14,6 +14,7 @@ jit.flush()
>  
>  local bufread = require "utils.bufread"
>  local symtab = require "utils.symtab"
> +local memprof = require "memprof.parse"
>  
>  local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.memprofdata.tmp.bin")
>  
> @@ -30,25 +31,23 @@ end
>  -- Static symbols resolution.
>  local res, err = misc.memprof.start(TMP_BINFILE)
>  assert(res, err)
> --- That Lua module is required here to trigger the `luaopen_os`, which is not
> --- stripped in the debug build.
> +
>  local testlib = require "testresolving"
> +for _=1, 1e5 do testlib.allocate_string() end

Typo: s/_=1/_ = 1/
Also it better to use names for such constant like: `NALLOCS`.

> +
>  misc.memprof.stop()
>  
>  local reader = bufread.new(TMP_BINFILE)
>  local symbols = symtab.parse(reader)
> +local events = memprof.parse(reader)
>  
> -test:ok(tree_contains(symbols.cfunc, "luaopen_os"))
> -
> --- Dynamic symbols resolution.
> -res, err = misc.memprof.start(TMP_BINFILE)
> -assert(res, err)
> -for _=1, 1e5 do testlib.allocate_string() end
> -misc.memprof.stop()
> -
> -reader = bufread.new(TMP_BINFILE)
> -symbols = symtab.parse(reader)
> +for addr, event in pairs(events.symtab) do
> +  symtab.add_cfunc(symbols, addr, event.name)
> +end
>  
> +-- Static symbol resolution. `luaopen_os` is not stripped in the debug build.
> +test:ok(tree_contains(symbols.cfunc, "luaopen_os"))

Don't get this comment. Should test fail in Release build? (Now it
doesn't).

Also, I suggest to use another one test for newly loaded library, not
change the existing one.

Also, it will be nice to add the following test:

0) Load 1 library, during memprof running, use functions from it with
   allocations
1) Unload it, load the second one, use functions from it with
   allocations
2) Load the 1st lib again, use functions again
3) Check that profile is correct

> +-- Dynamic symbol resolution. Newly loaded symbol resolution.
>  test:ok(tree_contains(symbols.cfunc, "allocate_string"))
>  
>  -- FIXME: There is one case that is not tested -- shared objects, which

Also, I noticed that this test is __really__ long now. May be we should
use smaller amount of allocations?


> diff --git a/tools/memprof.lua b/tools/memprof.lua
> index 18b44fdd..805b7e74 100644
> --- a/tools/memprof.lua
> +++ b/tools/memprof.lua

<snipped>

> diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
> index 47dbaee4..36343e4a 100644
> --- a/tools/memprof/parse.lua
> +++ b/tools/memprof/parse.lua
> @@ -17,6 +17,7 @@ local LJM_CURRENT_VERSION = 0x02
>  
>  local LJM_EPILOGUE_HEADER = 0x80
>  
> +local AEVENT_SYMTAB = 0
>  local AEVENT_ALLOC = 1
>  local AEVENT_FREE = 2
>  local AEVENT_REALLOC = 3
> @@ -41,6 +42,7 @@ local function new_event(loc)
>      free = 0,
>      alloc = 0,
>      primary = {},
> +    name = nil
>    }
>  end
>  
> @@ -85,6 +87,21 @@ local function parse_location(reader, asource)
>    return symtab.id(loc), loc
>  end
>  
> +local function parse_symtab(reader, asource, events, heap)
> +  -- That instruction supresses unused variable warning
> +  -- from luacheck.
> +  local _ = asource or heap

I suggest the following diff to make luacheck happy:

===================================================================
diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
index 36343e4a..181a7e92 100644
--- a/tools/memprof/parse.lua
+++ b/tools/memprof/parse.lua
@@ -87,11 +87,7 @@ local function parse_location(reader, asource)
   return symtab.id(loc), loc
 end
 
-local function parse_symtab(reader, asource, events, heap)
-  -- That instruction supresses unused variable warning
-  -- from luacheck.
-  local _ = asource or heap
-
+local function parse_symtab(reader, _, events, _)
   local id = reader:read_uleb128()
   local name = reader:read_string()
===================================================================

> +
> +  local id = reader:read_uleb128()
> +  local name = reader:read_string()
> +
> +  if not events[id] then
> +    events[id] = new_event(0)
> +  end
> +
> +  events[id].name = name
> +end
> +
>  local function parse_alloc(reader, asource, events, heap)
>    local id, loc = parse_location(reader, asource)
>  
> @@ -142,6 +159,7 @@ local function parse_free(reader, asource, events, heap)

<snipped>

> diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
> index 6f1685f6..e5647711 100644
> --- a/tools/utils/symtab.lua
> +++ b/tools/utils/symtab.lua

<snipped>

> -- 
> 2.35.1
> 

-- 
Best regards,
Sergey Kaplun


More information about the Tarantool-patches mailing list