[Tarantool-patches] [PATCH luajit v6 1/2] memprof: extend symtab with C-symbols
Sergey Kaplun
skaplun at tarantool.org
Mon Apr 4 11:43:09 MSK 2022
Hi, Maxim!
Thanks for the patch!
Please consider my comments below.
On 22.03.22, Maxim Kokryashkin wrote:
> This commit enriches memprof's symbol table with information
> about C-symbols. The parser is updated correspondingly.
>
> If there is .symtab section or at least .dynsym segment in
> a shared library, then the following data is stored in symtab
> for each symbol:
>
> | SYMTAB_CFUNC | symbol address | symbol name |
> 1 byte 8 bytes
> magic
> number
>
> If none of those are present, then instead of a symbol name
> and its address there will be name and address of a
> shared library containing that symbol.
>
> Part of tarantool/tarantool#5813
> ---
> Makefile.original | 2 +-
> src/lj_memprof.c | 328 ++++++++++++++++++
> src/lj_memprof.h | 7 +-
> test/tarantool-tests/CMakeLists.txt | 1 +
> .../gh-5813-resolving-of-c-symbols.test.lua | 61 ++++
> .../CMakeLists.txt | 2 +
> .../testresolving.c | 19 +
> test/tarantool-tests/tools-utils-avl.test.lua | 54 +++
> tools/CMakeLists.txt | 2 +
> tools/utils/avl.lua | 114 ++++++
> tools/utils/symtab.lua | 23 +-
> 11 files changed, 609 insertions(+), 4 deletions(-)
> create mode 100644 test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
> create mode 100644 test/tarantool-tests/gh-5813-resolving-of-c-symbols/CMakeLists.txt
> create mode 100644 test/tarantool-tests/gh-5813-resolving-of-c-symbols/testresolving.c
> create mode 100644 test/tarantool-tests/tools-utils-avl.test.lua
> create mode 100644 tools/utils/avl.lua
>
> diff --git a/Makefile.original b/Makefile.original
> index 33dc2ed5..0c92df9e 100644
> --- a/Makefile.original
> +++ b/Makefile.original
<snipped>
> diff --git a/src/lj_memprof.c b/src/lj_memprof.c
> index 2d779983..b1634f91 100644
> --- a/src/lj_memprof.c
> +++ b/src/lj_memprof.c
> @@ -8,16 +8,27 @@
> #define lj_memprof_c
> #define LUA_CORE
>
> +#define _GNU_SOURCE
> +
> #include <errno.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
Nit: <string.h> and <stdlib.h> are included in <lj_def.h>, there is no
need to include them again.
>
> #include "lj_arch.h"
> #include "lj_memprof.h"
>
> +#if LUAJIT_OS != LUAJIT_OS_OSX
> +#include <elf.h>
> +#include <link.h>
> +#include <sys/auxv.h>
> +#endif
> #if LJ_HASMEMPROF
>
> #include "lj_obj.h"
> #include "lj_frame.h"
> #include "lj_debug.h"
> +#include "lj_gc.h"
Nit: This include looks excess.
>
> #if LJ_HASJIT
> #include "lj_dispatch.h"
> @@ -66,12 +77,325 @@ static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
>
> #endif
>
> +#if LUAJIT_OS != LUAJIT_OS_OSX
> +
> +struct ghashtab_header {
> + uint32_t nbuckets;
> + uint32_t symoffset;
> + uint32_t bloom_size;
> + uint32_t bloom_shift;
> +};
> +
> +static uint32_t ghashtab_size(ElfW(Addr) ghashtab)
> +{
> + /*
> + ** There is no easy way to get count of symbols in GNU hashtable, so the
> + ** only way to do this is to take highest possible non-empty bucket and
> + ** iterate through its symbols until the last chain is over.
> + */
> + uint32_t last_entry = 0;
> +
> + const uint32_t *chain = NULL;
> + struct ghashtab_header *header = (struct ghashtab_header*)ghashtab;
> + /*
> + ** sizeof(size_t) returns 8, if compiled with 64-bit compiler, and 4 if
> + ** compiled with 32-bit compiler. It is the best option to determine which
> + ** kind of CPU we are running on.
> + */
> + const char *buckets = (char *)ghashtab + sizeof(struct ghashtab_header) +
> + sizeof(size_t) * header->bloom_size;
> +
> + uint32_t *cur_bucket = (uint32_t*)buckets;
Nit: use (uint32_t *) instead (uint32_t*) for casts. Same for other
structures.
Here and below.
> + for (uint32_t i = 0; i < header->nbuckets; ++i) {
> + if (last_entry < *cur_bucket)
> + last_entry = *cur_bucket;
> + cur_bucket++;
> + }
> +
> + if (last_entry < header->symoffset)
> + return header->symoffset;
> +
> + chain = (uint32_t*)(buckets + sizeof(uint32_t) * header->nbuckets);
> + /* The chain ends with the lowest bit set to 1. */
> + while (!(chain[last_entry - header->symoffset] & 1))
> + last_entry++;
> +
> + return ++last_entry;
> +}
> +
> +static void write_c_symtab
> +(ElfW(Sym*) sym, char *strtab, ElfW(Addr) so_addr,
Something strange with alignment here.
> +size_t sym_cnt, struct lj_wbuf *buf)
> +{
> + /*
> + ** Index 0 in ELF symtab is used to represent undefined symbols. Hence, we
> + ** can just start with index 1.
> + **
> + ** For more information, see:
> + ** https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-79797.html
> + */
> +
> + for (ElfW(Word) sym_index = 1; sym_index < sym_cnt; sym_index++) {
> + /*
> + ** ELF32_ST_TYPE and ELF64_ST_TYPE are the same, so we can use
> + ** ELF32_ST_TYPE for both 64-bit and 32-bit ELFs.
> + **
> + ** For more, see https://github.com/torvalds/linux/blob/9137eda53752ef73148e42b0d7640a00f1bc96b1/include/uapi/linux/elf.h#L135
> + */
> + if (ELF32_ST_TYPE(sym[sym_index].st_info) == STT_FUNC && sym[sym_index].st_name != 0) {
Nit: Linewidth is more than 120 symbols.
> + char *sym_name = &strtab[sym[sym_index].st_name];
> + lj_wbuf_addbyte(buf, SYMTAB_CFUNC);
> + lj_wbuf_addu64(buf, sym[sym_index].st_value + so_addr);
> + lj_wbuf_addstring(buf, sym_name);
> + }
> + }
> +}
> +
> +static int dump_sht_symtab
> +(const char *elf_name, struct lj_wbuf *buf, lua_State *L, const ElfW(Addr) so_addr)
Something strange with alibnment here.
Nit: Linewidth is more than 120 symbols.
> +{
> + int status = 0;
> +
> + char *strtab = NULL;
> + ElfW(Shdr*) section_headers = NULL;
> + ElfW(Sym*) sym = NULL;
> + ElfW(Ehdr) elf_header = {};
> +
> + ElfW(Off) sym_off = 0;
> + ElfW(Off) strtab_off = 0;
> +
> + size_t sym_cnt = 0;
> + size_t symtab_size = 0;
> + size_t strtab_size = 0;
> + size_t strtab_index = 0;
> +
> + size_t shoff = 0; /* Section headers offset. */
> + size_t shnum = 0; /* Section headers number. */
> + size_t shentsize = 0; /* Section header entry size. */
> +
> + FILE *elf_file = fopen(elf_name, "rb");
> +
> + if (elf_file == NULL)
> + return -1;
> +
> + if(fread(&elf_header, sizeof(elf_header), 1, elf_file) != sizeof(elf_header) &&
> + ferror(elf_file) != 0)
> + goto error;
> + if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0)
> + /* Not a valid ELF file. */
> + goto error;
> +
> + shoff = elf_header.e_shoff;
> + shnum = elf_header.e_shnum;
> + shentsize = elf_header.e_shentsize;
> +
> + if (shoff == 0 || shnum == 0 || shentsize == 0)
> + /* No sections in ELF. */
> + goto error;
> +
> + /*
> + ** Memory occupied by section headers is unlikely to be more than 160B, but
> + ** 32-bit and 64-bit ELF files may have sections of different sizes and some
> + ** of the sections may duiplicate, so we need to take that into account.
> + */
> + section_headers = lj_mem_new(L, shnum * shentsize);
> + if (section_headers == NULL)
> + goto error;
Nit: it may be rewritten as the following:
| if (!(section_headers = lj_mem_new(L, shnum * shentsize)))
| goto error;
Feel free to ignore.
Same for other initialization.
> +
> + if (fseek(elf_file, shoff, SEEK_SET) != 0)
> + goto error;
> +
> + if(fread(section_headers, shentsize, shnum, elf_file) != shentsize * shnum &&
> + ferror(elf_file) != 0)
> + goto error;
> +
> + for (size_t header_index = 0; header_index < shnum; ++header_index) {
> + if(section_headers[header_index].sh_type == SHT_SYMTAB) {
> + ElfW(Shdr) sym_hdr = section_headers[header_index];
> +
> + sym_off = sym_hdr.sh_offset;
> + symtab_size = sym_hdr.sh_size;
Nit: This variable is used only in this scope and can be declared here.
> + sym_cnt = symtab_size / sym_hdr.sh_entsize;
> +
> + ElfW(Shdr) strtab_index = sym_hdr.sh_link;
Nit: This variable is used only in this scope and can be declared here.
Moreover we can declare variable.
| strtab_hdr = section_headers[sym_hdr.sh_link];
and use it below instead.
Please check lexical scopes for other variables too.
> +
> + strtab_off = section_headers[strtab_index].sh_offset;
> + strtab_size = section_headers[strtab_index].sh_size;
> + break;
> + }
> + }
> +
> + if (sym_off == 0 || strtab_off == 0 || sym_cnt == 0)
> + goto error;
> +
> + /* Load symtab into memory. */
> + sym = lj_mem_new(L, sym_cnt * sizeof(ElfW(Sym)));
> + if (sym == NULL)
> + goto error;
> + if (fseek(elf_file, sym_off, SEEK_SET) != 0)
> + goto error;
> + if (fread(sym, sizeof(ElfW(Sym)), sym_cnt, elf_file) != sizeof(ElfW(Sym)) * sym_cnt &&
Nit: linewidth more than 120.
> + ferror(elf_file) != 0)
> + goto error;
> +
> +
> + /* Load strtab into memory. */
> + strtab = lj_mem_new(L, strtab_size * sizeof(char));
> + if (strtab == NULL)
> + goto error;
> + if (fseek(elf_file, strtab_off, SEEK_SET) != 0)
> + goto error;
> + if (fread(strtab, sizeof(char), strtab_size, elf_file) != sizeof(char) * strtab_size &&
Ditto.
> + ferror(elf_file) != 0)
> + goto error;
> +
> + write_c_symtab(sym, strtab, so_addr, sym_cnt, buf);
> +
> + goto end;
> +
> + error:
> + status = -1;
> +
> + end:
> + if (sym != NULL)
> + lj_mem_free(G(L), sym, sym_cnt * sizeof(ElfW(Sym)));
> + if(strtab != NULL)
> + lj_mem_free(G(L), strtab, strtab_size * sizeof(char));
> + if(section_headers != NULL)
> + lj_mem_free(G(L), section_headers, shnum * shentsize);
> +
> + fclose(elf_file);
> +
> + return status;
> +}
> +
> +static int dump_dyn_symtab(struct dl_phdr_info *info, struct lj_wbuf *buf)
> +{
> + for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) {
> + if (info->dlpi_phdr[header_index].p_type == PT_DYNAMIC) {
> + ElfW(Dyn*) dyn = NULL;
> + ElfW(Sym*) sym = NULL;
> + ElfW(Word*) hashtab = NULL;
> + ElfW(Addr) ghashtab = 0;
> + ElfW(Word) sym_cnt = 0;
> +
> + char *strtab = 0;
> +
> + dyn = (ElfW(Dyn)*)(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr);
Why do you initialize variable here, not above?
> +
<snipped>
> + return 0;
> + }
> + }
> +
> + return 1;
> +}
> +
> +struct symbol_resolver_conf {
> + struct lj_wbuf *buf;
> + lua_State *L;
> +};
We can pass struct memprof instead custom one -- it already has global state to
determine lua_State and struct lj_wbuf.
| lua_State *L = gco2th(gcref(mp->g->mem_L));
> +
> +static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void *data)
Line width is more than 120 symbols.
> +{
> + struct symbol_resolver_conf *conf = data;
> + struct lj_wbuf *buf = conf->buf;
> + lua_State *L = conf->L;
> +
> + UNUSED(info_size);
> +
> + /* Skip vDSO library. */
> + if (info->dlpi_addr == getauxval(AT_SYSINFO_EHDR))
> + return 0;
> +
> + /*
> + ** Main way: try to open ELF and read SHT_SYMTAB, SHT_STRTAB and SHT_HASH
> + ** sections from it.
> + */
> + if (dump_sht_symtab(info->dlpi_name, buf, L, info->dlpi_addr) == 0) {
> + /* Empty body. */
> + }
> + /* First fallback: dump functions only from PT_DYNAMIC segment. */
> + if(dump_dyn_symtab(info, buf) == 0) {
Nit: Why it's `if` and not `else if`?
> + /* Empty body. */
> + }
> + /*
> + ** Last resort: dump ELF size and address to show .so name for its functions
> + ** in memprof output.
> + */
> + else {
> + lj_wbuf_addbyte(buf, SYMTAB_CFUNC);
> + lj_wbuf_addu64(buf, info->dlpi_addr);
> + lj_wbuf_addstring(buf, info->dlpi_name);
> + }
> +
> + return 0;
> +}
> +
> +#endif /* LUAJIT_OS != LUAJIT_OS_OSX */
> +
> static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
> {
> const GCRef *iter = &g->gc.root;
> const GCobj *o;
> const size_t ljs_header_len = sizeof(ljs_header) / sizeof(ljs_header[0]);
>
> +#if LUAJIT_OS != LUAJIT_OS_OSX
> + struct symbol_resolver_conf conf = {
> + out,
> + gco2th(gcref(g->cur_L))
> + };
> +#endif
> +
> /* Write prologue. */
> lj_wbuf_addn(out, ljs_header, ljs_header_len);
>
> @@ -95,6 +419,10 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
<snipped>
> diff --git a/src/lj_memprof.h b/src/lj_memprof.h
> index 395fb429..0327a205 100644
> --- a/src/lj_memprof.h
> +++ b/src/lj_memprof.h
> @@ -25,13 +25,15 @@
> ** prologue := 'l' 'j' 's' version reserved
> ** version := <BYTE>
> ** reserved := <BYTE> <BYTE> <BYTE>
> -** sym := sym-lua | sym-trace | sym-final
> +** sym := sym-lua | sym-cfunc | sym-trace | sym-final
> ** sym-lua := sym-header sym-addr sym-chunk sym-line
> ** sym-trace := sym-header trace-no trace-addr sym-addr sym-line
> ** sym-header := <BYTE>
> ** sym-addr := <ULEB128>
> ** sym-chunk := string
> ** sym-line := <ULEB128>
> +** sym-cfunc := sym-header sym-addr sym-name
> +** sym-name := string
> ** sym-final := sym-header
> ** trace-no := <ULEB128>
> ** trace-addr := <ULEB128>
I suggest to bump LJS_CURRENT_VERSION as far as this commit change its
structure.
> @@ -54,7 +56,8 @@
> */
>
> #define SYMTAB_LFUNC ((uint8_t)0)
> -#define SYMTAB_TRACE ((uint8_t)1)
> +#define SYMTAB_CFUNC ((uint8_t)1)
> +#define SYMTAB_TRACE ((uint8_t)2)
> #define SYMTAB_FINAL ((uint8_t)0x80)
>
> #define LJM_CURRENT_FORMAT_VERSION 0x02
> diff --git a/test/tarantool-tests/CMakeLists.txt b/test/tarantool-tests/CMakeLists.txt
> index b21500a0..28c41f33 100644
> --- a/test/tarantool-tests/CMakeLists.txt
> +++ b/test/tarantool-tests/CMakeLists.txt
<snipped>
> diff --git a/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
> new file mode 100644
> index 00000000..8f20511c
> --- /dev/null
> +++ b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua
> @@ -0,0 +1,61 @@
> +-- Memprof is implemented for x86 and x64 architectures only.
> +require("utils").skipcond(
> + (jit.arch ~= "x86" and jit.arch ~= "x64")
> + or jit.os == "OSX",
> + jit.arch.." architecture is NIY for memprof"
Looks like it should be:
| jit.arch.." architecture or "..jit.os.." OS is NIY for memprof c
| symbols resolving"
> +)
> +
> +local tap = require("tap")
> +local test = tap.test("gh-5813-resolving-of-c-symbols")
> +test:plan(2)
> +
> +jit.off()
> +jit.flush()
> +
> +local bufread = require "utils.bufread"
> +local symtab = require "utils.symtab"
> +
> +local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.memprofdata.tmp.bin")
> +
> +local function tree_contains(node, name)
> + if node == nil then
> + return false
> + elseif node.value.name == name then
> + return true
> + else
> + return tree_contains(node.left, name) or tree_contains(node.right, name)
> + end
> +end
> +
> +-- Static symbols resolution.
> +local res, err = misc.memprof.start(TMP_BINFILE)
> +assert(res, err)
> +-- That Lua module is required here to trigger the `luaopen_os`, which is not
> +-- stripped in the debug build.
> +local testlib = require "testresolving"
> +misc.memprof.stop()
> +
> +local reader = bufread.new(TMP_BINFILE)
> +local symbols = symtab.parse(reader)
> +
> +test:ok(tree_contains(symbols.cfunc, "luaopen_os"))
> +
> +-- Dynamic symbols resolution.
> +res, err = misc.memprof.start(TMP_BINFILE)
> +assert(res, err)
> +for _=1, 1e5 do testlib.allocate_string() end
Typo: s/for _=1/for _ = 1/
> +misc.memprof.stop()
> +
> +reader = bufread.new(TMP_BINFILE)
> +symbols = symtab.parse(reader)
> +
> +test:ok(tree_contains(symbols.cfunc, "allocate_string"))
> +
> +-- FIXME: There is one case that is not tested -- shared objects, which
> +-- have neither .symtab section nor .dynsym segment. It is unclear how to
> +-- perform a test in that case, since it is impossible to load Lua module
> +-- written in C if it doesn't have a .dynsym segment.
> +
> +os.remove(TMP_BINFILE)
> +os.exit(test:check() and 0 or 1)
> +
This empty line is excess.
> diff --git a/test/tarantool-tests/gh-5813-resolving-of-c-symbols/CMakeLists.txt b/test/tarantool-tests/gh-5813-resolving-of-c-symbols/CMakeLists.txt
> new file mode 100644
> index 00000000..e467f0e9
> --- /dev/null
> +++ b/test/tarantool-tests/gh-5813-resolving-of-c-symbols/CMakeLists.txt
> @@ -0,0 +1,2 @@
> +BuildTestCLib(testresolving testresolving.c)
> +target_link_options(testresolving PRIVATE "-s")
And what about different type of hashes?
Please create 3 different .so libs with .gnu_hash, just .hash and both.
> diff --git a/test/tarantool-tests/gh-5813-resolving-of-c-symbols/testresolving.c b/test/tarantool-tests/gh-5813-resolving-of-c-symbols/testresolving.c
> new file mode 100644
> index 00000000..e0b38151
> --- /dev/null
> +++ b/test/tarantool-tests/gh-5813-resolving-of-c-symbols/testresolving.c
> @@ -0,0 +1,19 @@
> +#include <lua.h>
> +#include <lauxlib.h>
> +
> +#define TEST_STRING "test string"
Looks like this define is excess :). We just use it once.
> +
> +int allocate_string(lua_State *L) {
> + lua_pushstring(L, TEST_STRING);
> + return 1;
> +}
> +
> +static const struct luaL_Reg testresolving [] = {
> + {"allocate_string", allocate_string},
> + {NULL, NULL}
> +};
> +
> +int luaopen_testresolving(lua_State *L) {
> + luaL_register(L, "testresolving", testresolving);
> + return 1;
> +}
> diff --git a/test/tarantool-tests/tools-utils-avl.test.lua b/test/tarantool-tests/tools-utils-avl.test.lua
> new file mode 100644
> index 00000000..b49af3f7
> --- /dev/null
> +++ b/test/tarantool-tests/tools-utils-avl.test.lua
> @@ -0,0 +1,54 @@
<snipped>
> +local function compare(arr1, arr2)
> + for i, v in pairs(arr1) do
Typo: tab instead spaces.
> + if v ~= arr2[i] then
> + return false
> + end
> + end
> + return true
> +end
> +
> +-- 1L rotation test.
<snipped>
> +os.exit(test:check() and 0 or 1)
> +
> +
Nit: This 2 empty lines are excess.
> diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
> index 61830e44..c6803d00 100644
> --- a/tools/CMakeLists.txt
> +++ b/tools/CMakeLists.txt
<snipped>
> diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua
> new file mode 100644
> index 00000000..2e168c36
> --- /dev/null
> +++ b/tools/utils/avl.lua
<snipped>
> +return M
> +
Nit: This empty line is excess.
> diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
> index c7fcf77c..6f1685f6 100644
> --- a/tools/utils/symtab.lua
> +++ b/tools/utils/symtab.lua
<snipped>
> --
> 2.35.1
>
--
Best regards,
Sergey Kaplun
More information about the Tarantool-patches
mailing list