Tarantool development patches archive
 help / color / mirror / Atom feed
From: Maxim Kokryashkin via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: tarantool-patches@dev.tarantool.org, imun@tarantool.org,
	skaplun@tarantool.org
Subject: [Tarantool-patches] [PATCH luajit v3 1/2] memprof: extend symtab with C-symbols
Date: Wed, 15 Sep 2021 20:19:00 +0300	[thread overview]
Message-ID: <41ec703406d4cd7e572ea41616c841ee0c028e0d.1631725806.git.m.kokryashkin@tarantool.org> (raw)
In-Reply-To: <cover.1631725806.git.m.kokryashkin@tarantool.org>

This commit enriches memprof's symbol table and event stream with
information about C-symbols. That information will provide demangling
capabilities to the parser.

The following data is stored in symtab for each symbol:
| SYMTAB_CFUNC | symbol address | symbol name |
  1 byte            8 bytes
  magic
  number

The following data is stored in event for each newly loaded symbol:
| (AEVENT_SYMTAB | ASOURCE_CFUNC) | symbol address | symbol name |
              1 byte                   8 bytes
              magic
              number

Part of tarantool/tarantool#5813
---
 src/lj_memprof.c | 154 ++++++++++++++++++++++++++++++++++++++++++++---
 src/lj_memprof.h |  17 ++++--
 2 files changed, 160 insertions(+), 11 deletions(-)

diff --git a/src/lj_memprof.c b/src/lj_memprof.c
index 2c1ef3b8..17f97cc9 100644
--- a/src/lj_memprof.c
+++ b/src/lj_memprof.c
@@ -5,10 +5,16 @@
 ** Copyright (C) 2015-2019 IPONWEB Ltd.
 */
 
+#define _GNU_SOURCE
+#include <elf.h>
+
 #define lj_memprof_c
 #define LUA_CORE
 
 #include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <link.h>
 
 #include "lj_arch.h"
 #include "lj_memprof.h"
@@ -24,7 +30,118 @@
 static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION,
 					   0x0, 0x0, 0x0};
 
-static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
+typedef struct {
+    uint32_t nbuckets;
+    uint32_t symoffset;
+    uint32_t bloom_size;
+    uint32_t bloom_shift;
+} ghashtab_header;
+
+uint32_t ghashtab_size(ElfW(Addr) ghashtab)
+{
+    uint32_t last_entry = 0;
+    uint32_t* cur_bucket = NULL;
+    uint32_t* entry = NULL;
+
+    const void* chain_address = NULL;
+    ghashtab_header* header = (ghashtab_header*)ghashtab;
+    const void* buckets = (void*)ghashtab + sizeof(ghashtab_header) + (sizeof(uint64_t) * header->bloom_size);
+
+    cur_bucket = (uint32_t*)buckets;
+    for (uint32_t i = 0; i < header->nbuckets; ++i) {
+        if (last_entry < *cur_bucket)
+            last_entry = *cur_bucket;
+        cur_bucket++;
+    }
+
+    if (last_entry < header->symoffset)
+        return header->symoffset;
+
+    chain_address = buckets + (sizeof(uint32_t) * header->nbuckets);
+    do {
+        entry = (uint32_t*)(chain_address + (last_entry - header->symoffset) * sizeof(uint32_t));
+        last_entry++;
+    } while (!(*entry & 1));
+
+    return last_entry;
+}
+
+struct symbol_resolver_conf {
+  struct lj_wbuf *buf;
+  const uint8_t header;
+
+  uint32_t cur_lib;
+  uint32_t lib_cnt_prev;
+  uint32_t to_dump_cnt;
+  uint32_t *lib_cnt;
+};
+
+int resolve_symbolnames(struct dl_phdr_info* info, size_t info_size, void* data) 
+{
+  if(strcmp(info->dlpi_name, "linux-vdso.so.1") == 0) {
+    return 0;
+  }
+
+  ElfW(Dyn*) dyn = NULL;
+  ElfW(Sym*) sym = NULL;
+  ElfW(Word*) hashtab = NULL;
+  ElfW(Word) sym_cnt = 0;
+
+  char* strtab = 0;
+  char* sym_name = 0;
+
+  struct symbol_resolver_conf *conf = data;
+  const uint8_t header = conf->header;
+  struct lj_wbuf *buf = conf->buf;
+
+  conf->lib_cnt_prev = *conf->lib_cnt;
+  uint32_t lib_cnt_prev = conf->lib_cnt_prev;
+
+  if((conf->to_dump_cnt = info->dlpi_adds - lib_cnt_prev) == 0) {
+    /* No new libraries, stop resolver. */
+    return 1;
+  }
+
+  uint32_t lib_cnt = info->dlpi_adds - info->dlpi_subs;
+  if(conf->cur_lib < lib_cnt - conf->to_dump_cnt) {
+    /* That lib is already dumped, skip it. */
+    ++conf->cur_lib;
+    return 0;
+  }
+
+  for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) {
+    if (info->dlpi_phdr[header_index].p_type == PT_DYNAMIC) {
+      dyn = (ElfW(Dyn)*)(info->dlpi_addr +  info->dlpi_phdr[header_index].p_vaddr);
+
+      while(dyn->d_tag != DT_NULL) {
+        if (dyn->d_tag == DT_HASH) {
+          hashtab = (ElfW(Word*))dyn->d_un.d_ptr;
+          sym_cnt = hashtab[1];
+        }
+        else if (dyn->d_tag == DT_GNU_HASH && sym_cnt == 0)
+          sym_cnt = ghashtab_size(dyn->d_un.d_ptr);
+        else if (dyn->d_tag == DT_STRTAB)
+          strtab = (char*)dyn->d_un.d_ptr;
+        else if (dyn->d_tag == DT_SYMTAB) {
+          sym = (ElfW(Sym*))dyn->d_un.d_ptr;
+
+          for (ElfW(Word) sym_index = 0; sym_index < sym_cnt; sym_index++) {
+              sym_name = &strtab[sym[sym_index].st_name];
+              lj_wbuf_addbyte(buf, header);
+              lj_wbuf_addu64(buf, sym[sym_index].st_value + info->dlpi_addr);
+              lj_wbuf_addstring(buf, sym_name);
+          }
+        }
+        dyn++;
+      }
+    }
+  }
+
+  ++conf->cur_lib;
+  return 0;
+}
+
+static void dump_symtab(struct lj_wbuf *out, const struct global_State *g, uint32_t *lib_cnt)
 {
   const GCRef *iter = &g->gc.root;
   const GCobj *o;
@@ -49,6 +166,17 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
     iter = &o->gch.nextgc;
   }
 
+  /* Write symbols. */
+  struct symbol_resolver_conf conf = {
+    /* buf: */ out,
+    /* header: */ SYMTAB_CFUNC,
+    /* cur_lib: */ 0,
+    /* lib_cnt_prev: */ *lib_cnt,
+    /* to_dump_cnt: */ 0,
+    /* lib_cnt: */ lib_cnt
+  };
+  dl_iterate_phdr(resolve_symbolnames, &conf);
+
   lj_wbuf_addbyte(out, SYMTAB_FINAL);
 }
 
@@ -78,6 +206,7 @@ struct memprof {
   struct alloc orig_alloc; /* Original allocator. */
   struct lj_memprof_options opt; /* Profiling options. */
   int saved_errno; /* Saved errno when profiler deinstrumented. */
+  uint32_t lib_cnt; /* Number of currently loaded libs. */
 };
 
 static struct memprof memprof = {0};
@@ -105,15 +234,26 @@ static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent,
 }
 
 static void memprof_write_cfunc(struct lj_wbuf *out, uint8_t aevent,
-				const GCfunc *fn)
+				const GCfunc *fn, uint32_t *lib_cnt)
 {
+  /* Check if there are any new libs. */
+  struct symbol_resolver_conf conf = {
+    /* buf: */ out,
+    /* header: */ AEVENT_SYMTAB | ASOURCE_CFUNC,
+    /* cur_lib: */ 0,
+    /* lib_cnt_prev: */ *lib_cnt,
+    /* to_dump_cnt: */ 0,
+    /* lib_cnt: */ lib_cnt
+  };
+  dl_iterate_phdr(resolve_symbolnames, &conf);
+
   lj_wbuf_addbyte(out, aevent | ASOURCE_CFUNC);
   lj_wbuf_addu64(out, (uintptr_t)fn->c.f);
 }
 
 static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t aevent,
 				GCfunc *fn, struct lua_State *L,
-				cTValue *frame)
+				cTValue *frame, uint32_t *lib_cnt)
 {
   cTValue *pframe = frame_prev(frame);
   GCfunc *pfn = frame_func(pframe);
@@ -126,7 +266,7 @@ static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t aevent,
   if (pfn != NULL && isluafunc(pfn))
     memprof_write_lfunc(out, aevent, pfn, L, frame);
   else
-    memprof_write_cfunc(out, aevent, fn);
+    memprof_write_cfunc(out, aevent, fn, lib_cnt);
 }
 
 static void memprof_write_func(struct memprof *mp, uint8_t aevent)
@@ -139,9 +279,9 @@ static void memprof_write_func(struct memprof *mp, uint8_t aevent)
   if (isluafunc(fn))
     memprof_write_lfunc(out, aevent, fn, L, NULL);
   else if (isffunc(fn))
-    memprof_write_ffunc(out, aevent, fn, L, frame);
+    memprof_write_ffunc(out, aevent, fn, L, frame, &mp->lib_cnt);
   else if (iscfunc(fn))
-    memprof_write_cfunc(out, aevent, fn);
+    memprof_write_cfunc(out, aevent, fn, &mp->lib_cnt);
   else
     lua_assert(0);
 }
@@ -249,7 +389,7 @@ int lj_memprof_start(struct lua_State *L, const struct lj_memprof_options *opt)
 
   /* Init output. */
   lj_wbuf_init(&mp->out, mp_opt->writer, mp_opt->ctx, mp_opt->buf, mp_opt->len);
-  dump_symtab(&mp->out, mp->g);
+  dump_symtab(&mp->out, mp->g, &mp->lib_cnt);
 
   /* Write prologue. */
   lj_wbuf_addn(&mp->out, ljm_header, ljm_header_len);
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
index 3417475d..337fa76a 100644
--- a/src/lj_memprof.h
+++ b/src/lj_memprof.h
@@ -16,7 +16,7 @@
 #include "lj_def.h"
 #include "lj_wbuf.h"
 
-#define LJS_CURRENT_VERSION 0x1
+#define LJS_CURRENT_VERSION 0x2
 
 /*
 ** symtab format:
@@ -25,12 +25,14 @@
 ** prologue       := 'l' 'j' 's' version reserved
 ** version        := <BYTE>
 ** reserved       := <BYTE> <BYTE> <BYTE>
-** sym            := sym-lua | sym-final
+** sym            := sym-lua | sym-cfunc | sym-final
 ** sym-lua        := sym-header sym-addr sym-chunk sym-line
 ** sym-header     := <BYTE>
 ** sym-addr       := <ULEB128>
 ** sym-chunk      := string
 ** sym-line       := <ULEB128>
+** sym-cfunc      := sym-header sym-addr sym-name
+** sym-name       := string
 ** sym-final      := sym-header
 ** string         := string-len string-payload
 ** string-len     := <ULEB128>
@@ -51,9 +53,10 @@
 */
 
 #define SYMTAB_LFUNC ((uint8_t)0)
+#define SYMTAB_CFUNC ((uint8_t)1)
 #define SYMTAB_FINAL ((uint8_t)0x80)
 
-#define LJM_CURRENT_FORMAT_VERSION 0x01
+#define LJM_CURRENT_FORMAT_VERSION 0x02
 
 /*
 ** Event stream format:
@@ -64,10 +67,11 @@
 ** prologue       := 'l' 'j' 'm' version reserved
 ** version        := <BYTE>
 ** reserved       := <BYTE> <BYTE> <BYTE>
-** event          := event-alloc | event-realloc | event-free
+** event          := event-alloc | event-realloc | event-free | event-symtab
 ** event-alloc    := event-header loc? naddr nsize
 ** event-realloc  := event-header loc? oaddr osize naddr nsize
 ** event-free     := event-header loc? oaddr osize
+** event-symtab   := event-header sym-addr sym-name
 ** event-header   := <BYTE>
 ** loc            := loc-lua | loc-c
 ** loc-lua        := sym-addr line-no
@@ -78,7 +82,11 @@
 ** naddr          := <ULEB128>
 ** osize          := <ULEB128>
 ** nsize          := <ULEB128>
+** sym-name       := string
 ** epilogue       := event-header
+** string         := string-len string-payload
+** string-len     := <ULEB128>
+** string-payload := <BYTE> {string-len}
 **
 ** <BYTE>   :  A single byte (no surprises here)
 ** <ULEB128>:  Unsigned integer represented in ULEB128 encoding
@@ -97,6 +105,7 @@
 */
 
 /* Allocation events. */
+#define AEVENT_SYMTAB  ((uint8_t)0)
 #define AEVENT_ALLOC   ((uint8_t)1)
 #define AEVENT_FREE    ((uint8_t)2)
 #define AEVENT_REALLOC ((uint8_t)(AEVENT_ALLOC | AEVENT_FREE))
-- 
2.33.0


  reply	other threads:[~2021-09-15 17:19 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-15 17:18 [Tarantool-patches] [PATCH luajit v3 0/2] memprof: add demangling capabilities for C functions Maxim Kokryashkin via Tarantool-patches
2021-09-15 17:19 ` Maxim Kokryashkin via Tarantool-patches [this message]
2021-09-15 17:19 ` [Tarantool-patches] [PATCH luajit v3 2/2] memprof: update memprof parser Maxim Kokryashkin via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=41ec703406d4cd7e572ea41616c841ee0c028e0d.1631725806.git.m.kokryashkin@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=imun@tarantool.org \
    --cc=max.kokryashkin@gmail.com \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit v3 1/2] memprof: extend symtab with C-symbols' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox