From: Maxim Kokryashkin via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: tarantool-patches@dev.tarantool.org, imun@tarantool.org, skaplun@tarantool.org Subject: [Tarantool-patches] [PATCH luajit v5 2/2] memprof: enrich symtab with newly loaded symbols Date: Fri, 4 Mar 2022 22:22:19 +0300 [thread overview] Message-ID: <20220304192219.1266071-3-m.kokryashkin@tarantool.org> (raw) In-Reply-To: <20220304192219.1266071-1-m.kokryashkin@tarantool.org> This commit lets memprof extend its symtab when new C-symbols appear after dlopen. The following data is stored in event stream for each newly loaded symbol: | (AEVENT_SYMTAB | ASOURCE_CFUNC) | symbol address | symbol name | 1 byte 8 bytes magic number Resolves tarantool/tarantool#5813 --- src/lj_memprof.c | 70 +++++++++++++++++++++++++++++++++++------ src/lj_memprof.h | 8 ++++- tools/memprof.lua | 5 +++ tools/memprof/parse.lua | 19 +++++++++++ tools/utils/symtab.lua | 4 +++ 5 files changed, 96 insertions(+), 10 deletions(-) diff --git a/src/lj_memprof.c b/src/lj_memprof.c index 71c1da7f..2b609dfa 100644 --- a/src/lj_memprof.c +++ b/src/lj_memprof.c @@ -128,6 +128,11 @@ uint32_t ghashtab_size(ElfW(Addr) ghashtab) struct symbol_resolver_conf { struct lj_wbuf *buf; const uint8_t header; + + uint32_t cur_lib; + uint32_t lib_cnt_prev; + uint32_t to_dump_cnt; + uint32_t *lib_cnt; }; void write_c_symtab(ElfW(Sym*) sym, char *strtab, ElfW(Addr) so_addr, @@ -353,22 +358,46 @@ int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void *data) const uint8_t header = conf->header; struct lj_wbuf *buf = conf->buf; - UNUSED(info_size); + const uint32_t lib_cnt_prev = *conf->lib_cnt; + uint32_t lib_cnt = 0; + + /* + ** Check that dlpi_adds and dlpi_subs fields are available. + ** Assertion was taken from the GLIBC tests: + ** https://code.woboq.org/userspace/glibc/elf/tst-dlmodcount.c.html#37 + */ + assert(info_size > offsetof(struct dl_phdr_info, dlpi_subs) + + sizeof(info->dlpi_subs)); + + lib_cnt = info->dlpi_adds - info->dlpi_subs; + conf->lib_cnt_prev = *conf->lib_cnt; - /* Skip vDSO library. */ + /* Skip vDSO library. */ if (info->dlpi_addr == getauxval(AT_SYSINFO_EHDR)) return 0; + if ((conf->to_dump_cnt = info->dlpi_adds - lib_cnt_prev) == 0) + /* No new libraries, stop resolver. */ + return 1; + + if (conf->cur_lib < lib_cnt - conf->to_dump_cnt) { + /* That lib is already dumped, skip it. */ + ++conf->cur_lib; + return 0; + } + /* ** Main way: try to open ELF and read SHT_SYMTAB, SHT_STRTAB and SHT_HASH ** sections from it. */ if (dump_sht_symtab(info->dlpi_name, buf, header, info->dlpi_addr) == 0) { + ++conf->cur_lib; return 0; } /* First fallback: dump functions only from PT_DYNAMIC segment. */ if(dump_dyn_symtab(info, header, buf) == 0) { + ++conf->cur_lib; return 0; } @@ -380,12 +409,13 @@ int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void *data) lj_wbuf_addu64(buf, info->dlpi_addr); lj_wbuf_addstring(buf, info->dlpi_name); + ++conf->cur_lib; return 0; } #endif -static void dump_symtab(struct lj_wbuf *out, const struct global_State *g) +static void dump_symtab(struct lj_wbuf *out, const struct global_State *g, uint32_t *lib_cnt) { const GCRef *iter = &g->gc.root; const GCobj *o; @@ -395,7 +425,13 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g) struct symbol_resolver_conf conf = { out, SYMTAB_CFUNC, + 0, + *lib_cnt, + 0, + lib_cnt }; +#else + UNUSED(lib_cnt); #endif /* Write prologue. */ @@ -454,6 +490,7 @@ struct memprof { struct alloc orig_alloc; /* Original allocator. */ struct lj_memprof_options opt; /* Profiling options. */ int saved_errno; /* Saved errno when profiler deinstrumented. */ + uint32_t lib_cnt; /* Number of currently loaded libs. */ }; static struct memprof memprof = {0}; @@ -489,15 +526,30 @@ static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent, } static void memprof_write_cfunc(struct lj_wbuf *out, uint8_t aevent, - const GCfunc *fn) + const GCfunc *fn, uint32_t *lib_cnt) { +#if LUAJIT_OS != LUAJIT_OS_OSX + /* Check if there are any new libs. */ + struct symbol_resolver_conf conf = { + out, + AEVENT_SYMTAB | ASOURCE_CFUNC, + 0, + *lib_cnt, + 0, + lib_cnt + }; + dl_iterate_phdr(resolve_symbolnames, &conf); +#else + UNUSED(lib_cnt); +#endif + lj_wbuf_addbyte(out, aevent | ASOURCE_CFUNC); lj_wbuf_addu64(out, (uintptr_t)fn->c.f); } static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t aevent, GCfunc *fn, struct lua_State *L, - cTValue *frame) + cTValue *frame, uint32_t *lib_cnt) { cTValue *pframe = frame_prev(frame); GCfunc *pfn = frame_func(pframe); @@ -510,7 +562,7 @@ static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t aevent, if (pfn != NULL && isluafunc(pfn)) memprof_write_lfunc(out, aevent, pfn, L, frame); else - memprof_write_cfunc(out, aevent, fn); + memprof_write_cfunc(out, aevent, fn, lib_cnt); } static void memprof_write_func(struct memprof *mp, uint8_t aevent) @@ -523,9 +575,9 @@ static void memprof_write_func(struct memprof *mp, uint8_t aevent) if (isluafunc(fn)) memprof_write_lfunc(out, aevent, fn, L, NULL); else if (isffunc(fn)) - memprof_write_ffunc(out, aevent, fn, L, frame); + memprof_write_ffunc(out, aevent, fn, L, frame, &mp->lib_cnt); else if (iscfunc(fn)) - memprof_write_cfunc(out, aevent, fn); + memprof_write_cfunc(out, aevent, fn, &mp->lib_cnt); else lua_assert(0); } @@ -661,7 +713,7 @@ int lj_memprof_start(struct lua_State *L, const struct lj_memprof_options *opt) /* Init output. */ lj_wbuf_init(&mp->out, mp_opt->writer, mp_opt->ctx, mp_opt->buf, mp_opt->len); - dump_symtab(&mp->out, mp->g); + dump_symtab(&mp->out, mp->g, &mp->lib_cnt); /* Write prologue. */ lj_wbuf_addn(&mp->out, ljm_header, ljm_header_len); diff --git a/src/lj_memprof.h b/src/lj_memprof.h index 0327a205..ea8f2362 100644 --- a/src/lj_memprof.h +++ b/src/lj_memprof.h @@ -71,10 +71,11 @@ ** prologue := 'l' 'j' 'm' version reserved ** version := <BYTE> ** reserved := <BYTE> <BYTE> <BYTE> -** event := event-alloc | event-realloc | event-free +** event := event-alloc | event-realloc | event-free | event-symtab ** event-alloc := event-header loc? naddr nsize ** event-realloc := event-header loc? oaddr osize naddr nsize ** event-free := event-header loc? oaddr osize +** event-symtab := event-header sym-addr sym-name ** event-header := <BYTE> ** loc := loc-lua | loc-c | loc-trace ** loc-lua := sym-addr line-no @@ -88,7 +89,11 @@ ** naddr := <ULEB128> ** osize := <ULEB128> ** nsize := <ULEB128> +** sym-name := string ** epilogue := event-header +** string := string-len string-payload +** string-len := <ULEB128> +** string-payload := <BYTE> {string-len} ** ** <BYTE> : A single byte (no surprises here) ** <ULEB128>: Unsigned integer represented in ULEB128 encoding @@ -107,6 +112,7 @@ */ /* Allocation events. */ +#define AEVENT_SYMTAB ((uint8_t)0) #define AEVENT_ALLOC ((uint8_t)1) #define AEVENT_FREE ((uint8_t)2) #define AEVENT_REALLOC ((uint8_t)(AEVENT_ALLOC | AEVENT_FREE)) diff --git a/tools/memprof.lua b/tools/memprof.lua index 18b44fdd..805b7e74 100644 --- a/tools/memprof.lua +++ b/tools/memprof.lua @@ -101,6 +101,11 @@ local function dump(inputfile) local reader = bufread.new(inputfile) local symbols = symtab.parse(reader) local events = memprof.parse(reader, symbols) + + for addr, event in pairs(events.symtab) do + symtab.add_cfunc(symbols, addr, event.name) + end + if not leak_only then view.profile_info(events, symbols) end diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua index 47dbaee4..36343e4a 100644 --- a/tools/memprof/parse.lua +++ b/tools/memprof/parse.lua @@ -17,6 +17,7 @@ local LJM_CURRENT_VERSION = 0x02 local LJM_EPILOGUE_HEADER = 0x80 +local AEVENT_SYMTAB = 0 local AEVENT_ALLOC = 1 local AEVENT_FREE = 2 local AEVENT_REALLOC = 3 @@ -41,6 +42,7 @@ local function new_event(loc) free = 0, alloc = 0, primary = {}, + name = nil } end @@ -85,6 +87,21 @@ local function parse_location(reader, asource) return symtab.id(loc), loc end +local function parse_symtab(reader, asource, events, heap) + -- That instruction supresses unused variable warning + -- from luacheck. + local _ = asource or heap + + local id = reader:read_uleb128() + local name = reader:read_string() + + if not events[id] then + events[id] = new_event(0) + end + + events[id].name = name +end + local function parse_alloc(reader, asource, events, heap) local id, loc = parse_location(reader, asource) @@ -142,6 +159,7 @@ local function parse_free(reader, asource, events, heap) end local parsers = { + [AEVENT_SYMTAB] = {evname = "symtab", parse = parse_symtab}, [AEVENT_ALLOC] = {evname = "alloc", parse = parse_alloc}, [AEVENT_FREE] = {evname = "free", parse = parse_free}, [AEVENT_REALLOC] = {evname = "realloc", parse = parse_realloc}, @@ -182,6 +200,7 @@ function M.parse(reader) realloc = {}, free = {}, heap = {}, + symtab = {} } local magic = reader:read_octets(3) diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua index aa66269c..75b0285b 100644 --- a/tools/utils/symtab.lua +++ b/tools/utils/symtab.lua @@ -158,4 +158,8 @@ function M.demangle(symtab, loc) return string_format("CFUNC %#x", addr) end +function M.add_cfunc(symtab, addr, name) + symtab.cfunc = avl.insert(symtab.cfunc, addr, {name = name}) +end + return M -- 2.35.1
next prev parent reply other threads:[~2022-03-04 19:24 UTC|newest] Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-03-04 19:22 [Tarantool-patches] [PATCH luajit v5 0/2] memprof: add resolving capabilities Maxim Kokryashkin via Tarantool-patches 2022-03-04 19:22 ` [Tarantool-patches] [PATCH luajit v5 1/2] memprof: extend symtab with C-symbols Maxim Kokryashkin via Tarantool-patches 2022-03-18 6:48 ` Sergey Kaplun via Tarantool-patches 2022-03-19 13:08 ` Maxim Kokryashkin via Tarantool-patches 2022-03-04 19:22 ` Maxim Kokryashkin via Tarantool-patches [this message] 2022-03-04 19:24 ` [Tarantool-patches] [PATCH luajit v5 0/2] memprof: add resolving capabilities Maxim Kokryashkin via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20220304192219.1266071-3-m.kokryashkin@tarantool.org \ --to=tarantool-patches@dev.tarantool.org \ --cc=imun@tarantool.org \ --cc=max.kokryashkin@gmail.com \ --cc=skaplun@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH luajit v5 2/2] memprof: enrich symtab with newly loaded symbols' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox