[Tarantool-patches] [PATCH luajit v6 2/2] memprof: enrich symtab with newly loaded symbols
Maxim Kokryashkin
max.kokryashkin at gmail.com
Tue Mar 22 21:31:25 MSK 2022
This commit lets memprof extend its symtab when new
C-symbols appear after dlopen. The following data is
stored in event stream for each newly loaded symbol:
| (AEVENT_SYMTAB | ASOURCE_CFUNC) | symbol address | symbol name |
1 byte 8 bytes
magic
number
Resolves tarantool/tarantool#5813
---
src/lj_memprof.c | 103 ++++++++++++++----
src/lj_memprof.h | 8 +-
.../gh-5813-resolving-of-c-symbols.test.lua | 23 ++--
tools/memprof.lua | 5 +
tools/memprof/parse.lua | 19 ++++
tools/utils/symtab.lua | 4 +
6 files changed, 130 insertions(+), 32 deletions(-)
diff --git a/src/lj_memprof.c b/src/lj_memprof.c
index b1634f91..bd6f94f5 100644
--- a/src/lj_memprof.c
+++ b/src/lj_memprof.c
@@ -125,7 +125,7 @@ static uint32_t ghashtab_size(ElfW(Addr) ghashtab)
static void write_c_symtab
(ElfW(Sym*) sym, char *strtab, ElfW(Addr) so_addr,
-size_t sym_cnt, struct lj_wbuf *buf)
+size_t sym_cnt, const uint8_t header, struct lj_wbuf *buf)
{
/*
** Index 0 in ELF symtab is used to represent undefined symbols. Hence, we
@@ -144,7 +144,7 @@ size_t sym_cnt, struct lj_wbuf *buf)
*/
if (ELF32_ST_TYPE(sym[sym_index].st_info) == STT_FUNC && sym[sym_index].st_name != 0) {
char *sym_name = &strtab[sym[sym_index].st_name];
- lj_wbuf_addbyte(buf, SYMTAB_CFUNC);
+ lj_wbuf_addbyte(buf, header);
lj_wbuf_addu64(buf, sym[sym_index].st_value + so_addr);
lj_wbuf_addstring(buf, sym_name);
}
@@ -152,7 +152,8 @@ size_t sym_cnt, struct lj_wbuf *buf)
}
static int dump_sht_symtab
-(const char *elf_name, struct lj_wbuf *buf, lua_State *L, const ElfW(Addr) so_addr)
+(const char *elf_name, struct lj_wbuf *buf, lua_State *L,
+const uint8_t header, const ElfW(Addr) so_addr)
{
int status = 0;
@@ -249,7 +250,7 @@ static int dump_sht_symtab
ferror(elf_file) != 0)
goto error;
- write_c_symtab(sym, strtab, so_addr, sym_cnt, buf);
+ write_c_symtab(sym, strtab, so_addr, sym_cnt, header, buf);
goto end;
@@ -269,7 +270,8 @@ static int dump_sht_symtab
return status;
}
-static int dump_dyn_symtab(struct dl_phdr_info *info, struct lj_wbuf *buf)
+static int dump_dyn_symtab
+(struct dl_phdr_info *info, const uint8_t header, struct lj_wbuf *buf)
{
for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) {
if (info->dlpi_phdr[header_index].p_type == PT_DYNAMIC) {
@@ -332,7 +334,7 @@ static int dump_dyn_symtab(struct dl_phdr_info *info, struct lj_wbuf *buf)
** For more, see https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html
*/
sym_cnt = ghashtab == 0 ? hashtab[1] : ghashtab_size(ghashtab);
- write_c_symtab(sym, strtab, info->dlpi_addr, sym_cnt, buf);
+ write_c_symtab(sym, strtab, info->dlpi_addr, sym_cnt, header, buf);
return 0;
}
}
@@ -343,6 +345,12 @@ static int dump_dyn_symtab(struct dl_phdr_info *info, struct lj_wbuf *buf)
struct symbol_resolver_conf {
struct lj_wbuf *buf;
lua_State *L;
+ const uint8_t header;
+
+ uint32_t cur_lib;
+ uint32_t lib_cnt_prev;
+ uint32_t to_dump_cnt;
+ uint32_t *lib_cnt;
};
static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void *data)
@@ -350,23 +358,46 @@ static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void
struct symbol_resolver_conf *conf = data;
struct lj_wbuf *buf = conf->buf;
lua_State *L = conf->L;
+ const uint8_t header = conf->header;
+
+ const uint32_t lib_cnt_prev = *conf->lib_cnt;
+ uint32_t lib_cnt = 0;
+
+ /*
+ ** Check that dlpi_adds and dlpi_subs fields are available.
+ ** Assertion was taken from the GLIBC tests:
+ ** https://code.woboq.org/userspace/glibc/elf/tst-dlmodcount.c.html#37
+ */
+ lua_assert(info_size > offsetof(struct dl_phdr_info, dlpi_subs)
+ + sizeof(info->dlpi_subs));
- UNUSED(info_size);
+ lib_cnt = info->dlpi_adds - info->dlpi_subs;
+ conf->lib_cnt_prev = *conf->lib_cnt;
/* Skip vDSO library. */
if (info->dlpi_addr == getauxval(AT_SYSINFO_EHDR))
return 0;
+ if ((conf->to_dump_cnt = info->dlpi_adds - lib_cnt_prev) == 0)
+ /* No new libraries, stop resolver. */
+ return 1;
+
+ if (conf->cur_lib < lib_cnt - conf->to_dump_cnt) {
+ /* That lib is already dumped, skip it. */
+ ++conf->cur_lib;
+ return 0;
+ }
+
/*
** Main way: try to open ELF and read SHT_SYMTAB, SHT_STRTAB and SHT_HASH
** sections from it.
*/
- if (dump_sht_symtab(info->dlpi_name, buf, L, info->dlpi_addr) == 0) {
- /* Empty body. */
+ if (dump_sht_symtab(info->dlpi_name, buf, L, header, info->dlpi_addr) == 0) {
+ ++conf->cur_lib;
}
/* First fallback: dump functions only from PT_DYNAMIC segment. */
- if(dump_dyn_symtab(info, buf) == 0) {
- /* Empty body. */
+ if(dump_dyn_symtab(info, header, buf) == 0) {
+ ++conf->cur_lib;
}
/*
** Last resort: dump ELF size and address to show .so name for its functions
@@ -376,6 +407,7 @@ static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void
lj_wbuf_addbyte(buf, SYMTAB_CFUNC);
lj_wbuf_addu64(buf, info->dlpi_addr);
lj_wbuf_addstring(buf, info->dlpi_name);
+ ++conf->cur_lib;
}
return 0;
@@ -383,7 +415,7 @@ static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void
#endif /* LUAJIT_OS != LUAJIT_OS_OSX */
-static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
+static void dump_symtab(struct lj_wbuf *out, const struct global_State *g, uint32_t *lib_cnt)
{
const GCRef *iter = &g->gc.root;
const GCobj *o;
@@ -392,8 +424,15 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
#if LUAJIT_OS != LUAJIT_OS_OSX
struct symbol_resolver_conf conf = {
out,
- gco2th(gcref(g->cur_L))
+ gco2th(gcref(g->cur_L)),
+ SYMTAB_CFUNC,
+ 0,
+ *lib_cnt,
+ 0,
+ lib_cnt
};
+#else
+ UNUSED(lib_cnt);
#endif
/* Write prologue. */
@@ -452,6 +491,7 @@ struct memprof {
struct alloc orig_alloc; /* Original allocator. */
struct lj_memprof_options opt; /* Profiling options. */
int saved_errno; /* Saved errno when profiler deinstrumented. */
+ uint32_t lib_cnt; /* Number of currently loaded libs. */
};
static struct memprof memprof = {0};
@@ -487,15 +527,40 @@ static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent,
}
static void memprof_write_cfunc(struct lj_wbuf *out, uint8_t aevent,
- const GCfunc *fn)
+ const GCfunc *fn, lua_State *L, uint32_t *lib_cnt)
{
+#if LUAJIT_OS != LUAJIT_OS_OSX
+ /* Check if there are any new libs. */
+ struct symbol_resolver_conf conf = {
+ out,
+ L,
+ AEVENT_SYMTAB | ASOURCE_CFUNC,
+ 0,
+ *lib_cnt,
+ 0,
+ lib_cnt
+ };
+
+ /* Preserve old vmstate. */
+ global_State *g = G(L);
+ const uint32_t ostate = g->vmstate;
+ g->vmstate = ~LJ_VMST_INTERP;
+
+ dl_iterate_phdr(resolve_symbolnames, &conf);
+
+ /* Restore vmstate. */
+ g->vmstate = ostate;
+#else
+ UNUSED(lib_cnt);
+#endif
+
lj_wbuf_addbyte(out, aevent | ASOURCE_CFUNC);
lj_wbuf_addu64(out, (uintptr_t)fn->c.f);
}
static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t aevent,
GCfunc *fn, struct lua_State *L,
- cTValue *frame)
+ cTValue *frame, uint32_t *lib_cnt)
{
cTValue *pframe = frame_prev(frame);
GCfunc *pfn = frame_func(pframe);
@@ -508,7 +573,7 @@ static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t aevent,
if (pfn != NULL && isluafunc(pfn))
memprof_write_lfunc(out, aevent, pfn, L, frame);
else
- memprof_write_cfunc(out, aevent, fn);
+ memprof_write_cfunc(out, aevent, fn, L, lib_cnt);
}
static void memprof_write_func(struct memprof *mp, uint8_t aevent)
@@ -521,9 +586,9 @@ static void memprof_write_func(struct memprof *mp, uint8_t aevent)
if (isluafunc(fn))
memprof_write_lfunc(out, aevent, fn, L, NULL);
else if (isffunc(fn))
- memprof_write_ffunc(out, aevent, fn, L, frame);
+ memprof_write_ffunc(out, aevent, fn, L, frame, &mp->lib_cnt);
else if (iscfunc(fn))
- memprof_write_cfunc(out, aevent, fn);
+ memprof_write_cfunc(out, aevent, fn, L, &mp->lib_cnt);
else
lua_assert(0);
}
@@ -659,7 +724,7 @@ int lj_memprof_start(struct lua_State *L, const struct lj_memprof_options *opt)
/* Init output. */
lj_wbuf_init(&mp->out, mp_opt->writer, mp_opt->ctx, mp_opt->buf, mp_opt->len);
- dump_symtab(&mp->out, mp->g);
+ dump_symtab(&mp->out, mp->g, &mp->lib_cnt);
/* Write prologue. */
lj_wbuf_addn(&mp->out, ljm_header, ljm_header_len);
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
index 0327a205..ea8f2362 100644
--- a/src/lj_memprof.h
+++ b/src/lj_memprof.h
@@ -71,10 +71,11 @@
** prologue := 'l' 'j' 'm' version reserved
** version := <BYTE>
** reserved := <BYTE> <BYTE> <BYTE>
-** event := event-alloc | event-realloc | event-free
+** event := event-alloc | event-realloc | event-free | event-symtab
** event-alloc := event-header loc? naddr nsize
** event-realloc := event-header loc? oaddr osize naddr nsize
** event-free := event-header loc? oaddr osize
+** event-symtab := event-header sym-addr sym-name
** event-header := <BYTE>
** loc := loc-lua | loc-c | loc-trace
** loc-lua := sym-addr line-no
@@ -88,7 +89,11 @@
** naddr := <ULEB128>
** osize := <ULEB128>
** nsize := <ULEB128>
+** sym-name := string
** epilogue := event-header
+** string := string-len string-payload
+** string-len := <ULEB128>
+** string-payload := <BYTE> {string-len}
**
** <BYTE> : A single byte (no surprises here)
** <ULEB128>: Unsigned integer represented in ULEB128 encoding
@@ -107,6 +112,7 @@
*/
/* Allocation events. */
+#define AEVENT_SYMTAB ((uint8_t)0)
#define AEVENT_ALLOC ((uint8_t)1)
#define AEVENT_FREE ((uint8_t)2)
#define AEVENT_REALLOC ((uint8_t)(AEVENT_ALLOC | AEVENT_FREE))
diff --git a/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
index 8f20511c..5831a4f0 100644
--- a/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
+++ b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
@@ -14,6 +14,7 @@ jit.flush()
local bufread = require "utils.bufread"
local symtab = require "utils.symtab"
+local memprof = require "memprof.parse"
local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.memprofdata.tmp.bin")
@@ -30,25 +31,23 @@ end
-- Static symbols resolution.
local res, err = misc.memprof.start(TMP_BINFILE)
assert(res, err)
--- That Lua module is required here to trigger the `luaopen_os`, which is not
--- stripped in the debug build.
+
local testlib = require "testresolving"
+for _=1, 1e5 do testlib.allocate_string() end
+
misc.memprof.stop()
local reader = bufread.new(TMP_BINFILE)
local symbols = symtab.parse(reader)
+local events = memprof.parse(reader)
-test:ok(tree_contains(symbols.cfunc, "luaopen_os"))
-
--- Dynamic symbols resolution.
-res, err = misc.memprof.start(TMP_BINFILE)
-assert(res, err)
-for _=1, 1e5 do testlib.allocate_string() end
-misc.memprof.stop()
-
-reader = bufread.new(TMP_BINFILE)
-symbols = symtab.parse(reader)
+for addr, event in pairs(events.symtab) do
+ symtab.add_cfunc(symbols, addr, event.name)
+end
+-- Static symbol resolution. `luaopen_os` is not stripped in the debug build.
+test:ok(tree_contains(symbols.cfunc, "luaopen_os"))
+-- Dynamic symbol resolution. Newly loaded symbol resolution.
test:ok(tree_contains(symbols.cfunc, "allocate_string"))
-- FIXME: There is one case that is not tested -- shared objects, which
diff --git a/tools/memprof.lua b/tools/memprof.lua
index 18b44fdd..805b7e74 100644
--- a/tools/memprof.lua
+++ b/tools/memprof.lua
@@ -101,6 +101,11 @@ local function dump(inputfile)
local reader = bufread.new(inputfile)
local symbols = symtab.parse(reader)
local events = memprof.parse(reader, symbols)
+
+ for addr, event in pairs(events.symtab) do
+ symtab.add_cfunc(symbols, addr, event.name)
+ end
+
if not leak_only then
view.profile_info(events, symbols)
end
diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
index 47dbaee4..36343e4a 100644
--- a/tools/memprof/parse.lua
+++ b/tools/memprof/parse.lua
@@ -17,6 +17,7 @@ local LJM_CURRENT_VERSION = 0x02
local LJM_EPILOGUE_HEADER = 0x80
+local AEVENT_SYMTAB = 0
local AEVENT_ALLOC = 1
local AEVENT_FREE = 2
local AEVENT_REALLOC = 3
@@ -41,6 +42,7 @@ local function new_event(loc)
free = 0,
alloc = 0,
primary = {},
+ name = nil
}
end
@@ -85,6 +87,21 @@ local function parse_location(reader, asource)
return symtab.id(loc), loc
end
+local function parse_symtab(reader, asource, events, heap)
+ -- That instruction supresses unused variable warning
+ -- from luacheck.
+ local _ = asource or heap
+
+ local id = reader:read_uleb128()
+ local name = reader:read_string()
+
+ if not events[id] then
+ events[id] = new_event(0)
+ end
+
+ events[id].name = name
+end
+
local function parse_alloc(reader, asource, events, heap)
local id, loc = parse_location(reader, asource)
@@ -142,6 +159,7 @@ local function parse_free(reader, asource, events, heap)
end
local parsers = {
+ [AEVENT_SYMTAB] = {evname = "symtab", parse = parse_symtab},
[AEVENT_ALLOC] = {evname = "alloc", parse = parse_alloc},
[AEVENT_FREE] = {evname = "free", parse = parse_free},
[AEVENT_REALLOC] = {evname = "realloc", parse = parse_realloc},
@@ -182,6 +200,7 @@ function M.parse(reader)
realloc = {},
free = {},
heap = {},
+ symtab = {}
}
local magic = reader:read_octets(3)
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index 6f1685f6..e5647711 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -159,4 +159,8 @@ function M.demangle(symtab, loc)
return string_format("CFUNC %#x", addr)
end
+function M.add_cfunc(symtab, addr, name)
+ symtab.cfunc = avl.insert(symtab.cfunc, addr, {name = name})
+end
+
return M
--
2.35.1
More information about the Tarantool-patches
mailing list