Tarantool development patches archive
 help / color / mirror / Atom feed
From: Sergey Kaplun via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Maxim Kokryashkin <max.kokryashkin@gmail.com>
Cc: tarantool-patches@dev.tarantool.org
Subject: Re: [Tarantool-patches] [PATCH luajit v6 2/2] memprof: enrich symtab with newly loaded symbols
Date: Mon, 4 Apr 2022 11:44:14 +0300	[thread overview]
Message-ID: <Ykqv3qYdDLJGZbOy@root> (raw)
In-Reply-To: <3449fe89891797d3948b134fafe81a5575489b0c.1647972832.git.m.kokryashkin@tarantool.org>

Hi, Maxim!

Thanks for the patch!

Please consider my comments below.

Side note:
Did you rebase you branch on Mikhail's one with enriched symtab for Lua
functions and traces? It helps to avoid double review of the same things
(and possible conflicts) in memprof.h, for example.
Also, those commits should resolve address clashing when a different
function allocates add the same address as the old deleted one.

On 22.03.22, Maxim Kokryashkin wrote:
> This commit lets memprof extend its symtab when new
> C-symbols appear after dlopen. The following data is
> stored in event stream for each newly loaded symbol:
> 
> | (AEVENT_SYMTAB | ASOURCE_CFUNC) | symbol address | symbol name |
>               1 byte                   8 bytes
>               magic
>               number

Please, make commit message more specific. At least add information
about __how exactly__ C library is **detected**, considered as new and
dumped.

> 
> Resolves tarantool/tarantool#5813
> ---
>  src/lj_memprof.c                              | 103 ++++++++++++++----
>  src/lj_memprof.h                              |   8 +-
>  .../gh-5813-resolving-of-c-symbols.test.lua   |  23 ++--
>  tools/memprof.lua                             |   5 +
>  tools/memprof/parse.lua                       |  19 ++++
>  tools/utils/symtab.lua                        |   4 +
>  6 files changed, 130 insertions(+), 32 deletions(-)
> 
> diff --git a/src/lj_memprof.c b/src/lj_memprof.c
> index b1634f91..bd6f94f5 100644
> --- a/src/lj_memprof.c
> +++ b/src/lj_memprof.c

<snipped>

> @@ -343,6 +345,12 @@ static int dump_dyn_symtab(struct dl_phdr_info *info, struct lj_wbuf *buf)
>  struct symbol_resolver_conf {
>    struct lj_wbuf *buf;
>    lua_State *L;
> +  const uint8_t header;
> +
> +  uint32_t cur_lib;
> +  uint32_t lib_cnt_prev;
> +  uint32_t to_dump_cnt;
> +  uint32_t *lib_cnt;

Please add verbose comments for what each field states for.
Is `lib_cnt` incremented monotonically?

>  };
>  
>  static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void *data)
> @@ -350,23 +358,46 @@ static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void
>    struct symbol_resolver_conf *conf = data;
>    struct lj_wbuf *buf = conf->buf;
>    lua_State *L = conf->L;
> +  const uint8_t header = conf->header;
> +
> +  const uint32_t lib_cnt_prev = *conf->lib_cnt;
> +  uint32_t lib_cnt = 0;

Nit: I think that we can just declare `lib_cnt`, wo initialization.

> +
> +  /*
> +  ** Check that dlpi_adds and dlpi_subs fields are available.
> +  ** Assertion was taken from the GLIBC tests:
> +  ** https://code.woboq.org/userspace/glibc/elf/tst-dlmodcount.c.html#37
> +  */
> +  lua_assert(info_size > offsetof(struct dl_phdr_info, dlpi_subs)
> +      + sizeof(info->dlpi_subs));
>  
> -  UNUSED(info_size);
> +  lib_cnt = info->dlpi_adds - info->dlpi_subs;
> +  conf->lib_cnt_prev = *conf->lib_cnt;

Please, add a comment about detection new loads here. I don't get this
part.

>  
>    /* Skip vDSO library. */
>    if (info->dlpi_addr == getauxval(AT_SYSINFO_EHDR))
>      return 0;
>  
> +  if ((conf->to_dump_cnt = info->dlpi_adds - lib_cnt_prev) == 0)
> +    /* No new libraries, stop resolver. */
> +    return 1;
> +
> +  if (conf->cur_lib < lib_cnt - conf->to_dump_cnt) {
> +    /* That lib is already dumped, skip it. */
> +    ++conf->cur_lib;
> +    return 0;
> +  }
> +
>    /*
>    ** Main way: try to open ELF and read SHT_SYMTAB, SHT_STRTAB and SHT_HASH
>    ** sections from it.
>    */

<snipped>

> @@ -383,7 +415,7 @@ static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void
>  
>  #endif /* LUAJIT_OS != LUAJIT_OS_OSX */
>  
> -static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
> +static void dump_symtab(struct lj_wbuf *out, const struct global_State *g, uint32_t *lib_cnt)

Nit: Linewidth is more than 120 symbols.

>  {
>    const GCRef *iter = &g->gc.root;
>    const GCobj *o;
> @@ -392,8 +424,15 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)

<snipped>

> @@ -452,6 +491,7 @@ struct memprof {
>    struct alloc orig_alloc; /* Original allocator. */
>    struct lj_memprof_options opt; /* Profiling options. */
>    int saved_errno; /* Saved errno when profiler deinstrumented. */
> +  uint32_t lib_cnt; /* Number of currently loaded libs. */

Side note: Does it always increase, like `dlpi_adds`?

>  };
>  
>  static struct memprof memprof = {0};
> @@ -487,15 +527,40 @@ static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent,
>  }
>  
>  static void memprof_write_cfunc(struct lj_wbuf *out, uint8_t aevent,
> -				const GCfunc *fn)
> +				const GCfunc *fn, lua_State *L, uint32_t *lib_cnt)
>  {
> +#if LUAJIT_OS != LUAJIT_OS_OSX
> +  /* Check if there are any new libs. */
> +  struct symbol_resolver_conf conf = {
> +    out,
> +    L,
> +    AEVENT_SYMTAB | ASOURCE_CFUNC,
> +    0,
> +    *lib_cnt,
> +    0,
> +    lib_cnt
> +  };
> +
> +  /* Preserve old vmstate. */
> +  global_State *g = G(L);
> +  const uint32_t ostate = g->vmstate;
> +  g->vmstate = ~LJ_VMST_INTERP;

Nit: Please, add a comment why this is necessary.

> +
> +  dl_iterate_phdr(resolve_symbolnames, &conf);
> +
> +  /* Restore vmstate. */
> +  g->vmstate = ostate;
> +#else
> +  UNUSED(lib_cnt);
> +#endif
> +
>    lj_wbuf_addbyte(out, aevent | ASOURCE_CFUNC);
>    lj_wbuf_addu64(out, (uintptr_t)fn->c.f);
>  }

<snipped>

> diff --git a/src/lj_memprof.h b/src/lj_memprof.h
> index 0327a205..ea8f2362 100644
> --- a/src/lj_memprof.h
> +++ b/src/lj_memprof.h

<snipped>

> diff --git a/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
> index 8f20511c..5831a4f0 100644
> --- a/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
> +++ b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
> @@ -14,6 +14,7 @@ jit.flush()
>  
>  local bufread = require "utils.bufread"
>  local symtab = require "utils.symtab"
> +local memprof = require "memprof.parse"
>  
>  local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.memprofdata.tmp.bin")
>  
> @@ -30,25 +31,23 @@ end
>  -- Static symbols resolution.
>  local res, err = misc.memprof.start(TMP_BINFILE)
>  assert(res, err)
> --- That Lua module is required here to trigger the `luaopen_os`, which is not
> --- stripped in the debug build.
> +
>  local testlib = require "testresolving"
> +for _=1, 1e5 do testlib.allocate_string() end

Typo: s/_=1/_ = 1/
Also it better to use names for such constant like: `NALLOCS`.

> +
>  misc.memprof.stop()
>  
>  local reader = bufread.new(TMP_BINFILE)
>  local symbols = symtab.parse(reader)
> +local events = memprof.parse(reader)
>  
> -test:ok(tree_contains(symbols.cfunc, "luaopen_os"))
> -
> --- Dynamic symbols resolution.
> -res, err = misc.memprof.start(TMP_BINFILE)
> -assert(res, err)
> -for _=1, 1e5 do testlib.allocate_string() end
> -misc.memprof.stop()
> -
> -reader = bufread.new(TMP_BINFILE)
> -symbols = symtab.parse(reader)
> +for addr, event in pairs(events.symtab) do
> +  symtab.add_cfunc(symbols, addr, event.name)
> +end
>  
> +-- Static symbol resolution. `luaopen_os` is not stripped in the debug build.
> +test:ok(tree_contains(symbols.cfunc, "luaopen_os"))

Don't get this comment. Should test fail in Release build? (Now it
doesn't).

Also, I suggest to use another one test for newly loaded library, not
change the existing one.

Also, it will be nice to add the following test:

0) Load 1 library, during memprof running, use functions from it with
   allocations
1) Unload it, load the second one, use functions from it with
   allocations
2) Load the 1st lib again, use functions again
3) Check that profile is correct

> +-- Dynamic symbol resolution. Newly loaded symbol resolution.
>  test:ok(tree_contains(symbols.cfunc, "allocate_string"))
>  
>  -- FIXME: There is one case that is not tested -- shared objects, which

Also, I noticed that this test is __really__ long now. May be we should
use smaller amount of allocations?


> diff --git a/tools/memprof.lua b/tools/memprof.lua
> index 18b44fdd..805b7e74 100644
> --- a/tools/memprof.lua
> +++ b/tools/memprof.lua

<snipped>

> diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
> index 47dbaee4..36343e4a 100644
> --- a/tools/memprof/parse.lua
> +++ b/tools/memprof/parse.lua
> @@ -17,6 +17,7 @@ local LJM_CURRENT_VERSION = 0x02
>  
>  local LJM_EPILOGUE_HEADER = 0x80
>  
> +local AEVENT_SYMTAB = 0
>  local AEVENT_ALLOC = 1
>  local AEVENT_FREE = 2
>  local AEVENT_REALLOC = 3
> @@ -41,6 +42,7 @@ local function new_event(loc)
>      free = 0,
>      alloc = 0,
>      primary = {},
> +    name = nil
>    }
>  end
>  
> @@ -85,6 +87,21 @@ local function parse_location(reader, asource)
>    return symtab.id(loc), loc
>  end
>  
> +local function parse_symtab(reader, asource, events, heap)
> +  -- That instruction supresses unused variable warning
> +  -- from luacheck.
> +  local _ = asource or heap

I suggest the following diff to make luacheck happy:

===================================================================
diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
index 36343e4a..181a7e92 100644
--- a/tools/memprof/parse.lua
+++ b/tools/memprof/parse.lua
@@ -87,11 +87,7 @@ local function parse_location(reader, asource)
   return symtab.id(loc), loc
 end
 
-local function parse_symtab(reader, asource, events, heap)
-  -- That instruction supresses unused variable warning
-  -- from luacheck.
-  local _ = asource or heap
-
+local function parse_symtab(reader, _, events, _)
   local id = reader:read_uleb128()
   local name = reader:read_string()
===================================================================

> +
> +  local id = reader:read_uleb128()
> +  local name = reader:read_string()
> +
> +  if not events[id] then
> +    events[id] = new_event(0)
> +  end
> +
> +  events[id].name = name
> +end
> +
>  local function parse_alloc(reader, asource, events, heap)
>    local id, loc = parse_location(reader, asource)
>  
> @@ -142,6 +159,7 @@ local function parse_free(reader, asource, events, heap)

<snipped>

> diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
> index 6f1685f6..e5647711 100644
> --- a/tools/utils/symtab.lua
> +++ b/tools/utils/symtab.lua

<snipped>

> -- 
> 2.35.1
> 

-- 
Best regards,
Sergey Kaplun

  reply	other threads:[~2022-04-04  8:46 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-22 18:31 [Tarantool-patches] [PATCH luajit v6 0/2] memprof: C-symbols resolving Maxim Kokryashkin via Tarantool-patches
2022-03-22 18:31 ` [Tarantool-patches] [PATCH luajit v6 1/2] memprof: extend symtab with C-symbols Maxim Kokryashkin via Tarantool-patches
2022-04-04  8:43   ` Sergey Kaplun via Tarantool-patches
2022-04-04 11:34     ` Sergey Kaplun via Tarantool-patches
2022-03-22 18:31 ` [Tarantool-patches] [PATCH luajit v6 2/2] memprof: enrich symtab with newly loaded symbols Maxim Kokryashkin via Tarantool-patches
2022-04-04  8:44   ` Sergey Kaplun via Tarantool-patches [this message]
2022-04-04  8:45 ` [Tarantool-patches] [PATCH luajit v6 0/2] memprof: C-symbols resolving Sergey Kaplun via Tarantool-patches
2022-04-22 13:22 ` Igor Munkin via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Ykqv3qYdDLJGZbOy@root \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=max.kokryashkin@gmail.com \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit v6 2/2] memprof: enrich symtab with newly loaded symbols' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox