Tarantool development patches archive
 help / color / mirror / Atom feed
From: Maxim Kokryashkin via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: tarantool-patches@dev.tarantool.org, imun@tarantool.org,
	skaplun@tarantool.org
Subject: [Tarantool-patches] [PATCH 1/3] memprof: extend memprof symbol table
Date: Fri, 23 Jul 2021 15:39:50 +0300	[thread overview]
Message-ID: <613dd34e9f2ecf2faec5da20a6232b7df31c6e7c.1627043674.git.m.kokryashkin@tarantool.org> (raw)
In-Reply-To: <cover.1627043674.git.m.kokryashkin@tarantool.org>

This commit enriches memprof's symbol table with information about loaded
".so" files. That information will provide demangling capabilities to
the parser.

The following data is stored for each ".so" library:
| SYMTAB_SO | path to .so | it's address |  CRC32 Hash  |
  1 byte                       8 bytes       8 bytes
  magic
  number

C symbol address from profile events with an address of the ".so" file will
give us an offset, and it can be used to get the symbol in that file.

CRC32 hash will be used by parser to determine whether the shared
object that was used during the process profiing and during the process
of parsing is the same.

The CRC32 hash implementation was taken from gcc libberty, which is
distributed under GNU GPL license

Part of tarantool/tarantool#5813
---
 src/CMakeLists.txt |   1 +
 src/lj_hash.c      | 136 +++++++++++++++++++++++++++++++++++++++++++++
 src/lj_hash.h      |  28 ++++++++++
 src/lj_memprof.c   |  27 ++++++++-
 src/lj_memprof.h   |   1 +
 5 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 src/lj_hash.c
 create mode 100644 src/lj_hash.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 809aac68..feefb412 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -63,6 +63,7 @@ make_source_list(SOURCES_UTILS
     lj_utils_leb128.c
     lj_vmmath.c
     lj_wbuf.c
+    lj_hash.c
 )
 
 make_source_list(SOURCES_PROFILER
diff --git a/src/lj_hash.c b/src/lj_hash.c
new file mode 100644
index 00000000..88c41a4b
--- /dev/null
+++ b/src/lj_hash.c
@@ -0,0 +1,136 @@
+/*
+   Copyright (C) 2009-2021 Free Software Foundation, Inc.
+   This file is part of the libiberty library.
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+   In addition to the permissions in the GNU General Public License, the
+   Free Software Foundation gives you unlimited permission to link the
+   compiled version of this file into combinations with other programs,
+   and to distribute those combinations without any restriction coming
+   from the use of this file.  (The General Public License restrictions
+   do apply in other respects; for example, they cover modification of
+   the file, and distribution when not linked into a combined
+   executable.)
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "lj_hash.h"
+
+static const uint32_t crc32_table[] =
+{
+  0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
+  0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
+  0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
+  0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
+  0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9,
+  0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
+  0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011,
+  0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd,
+  0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
+  0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
+  0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81,
+  0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
+  0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49,
+  0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
+  0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
+  0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d,
+  0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae,
+  0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
+  0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
+  0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
+  0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde,
+  0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
+  0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066,
+  0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
+  0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e,
+  0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692,
+  0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
+  0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a,
+  0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
+  0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
+  0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686,
+  0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
+  0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
+  0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
+  0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f,
+  0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
+  0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
+  0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b,
+  0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
+  0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
+  0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7,
+  0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
+  0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f,
+  0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
+  0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
+  0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b,
+  0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f,
+  0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
+  0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
+  0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
+  0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
+  0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
+  0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30,
+  0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
+  0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088,
+  0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
+  0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
+  0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c,
+  0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
+  0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
+  0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0,
+  0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
+  0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668,
+  0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
+};
+
+uint32_t lj_crc32 (const char *buf, size_t len, uint32_t init)
+{
+  uint32_t crc = init;
+  while (len--)
+    {
+      crc = (crc << 8) ^ crc32_table[((crc >> 24) ^ *buf) & 255];
+      buf++;
+    }
+  return crc;
+}
+
+uint32_t lj_get_file_crc32(const char* name) {
+  FILE* so = fopen(name, "r");
+  if(!so) {
+    return 0;
+  }
+
+  fseek(so, 0, SEEK_END);
+  size_t size = ftell(so);
+  rewind(so);
+
+  char* buf = calloc(size, sizeof(char));
+  if(!buf) {
+    fclose(so);
+    return 0;
+  }
+
+  if(size != fread(buf, sizeof(char), size, so)) {
+    free(buf);
+    fclose(so);
+    return 0;
+  }
+
+  uint32_t hash = lj_crc32(buf, size, 0);
+  free(buf);
+  fclose(so);
+  return hash;
+}
+
diff --git a/src/lj_hash.h b/src/lj_hash.h
new file mode 100644
index 00000000..aca24e97
--- /dev/null
+++ b/src/lj_hash.h
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <sys/types.h>
+
+/*
+@deftypefn Extension {unsigned int} crc32 (const unsigned char *@var{buf}, @
+  int @var{len}, unsigned int @var{init})
+Compute the 32-bit CRC of @var{buf} which has length @var{len}.  The
+starting value is @var{init}; this may be used to compute the CRC of
+data split across multiple buffers by passing the return value of each
+call as the @var{init} parameter of the next.
+This is used by the @command{gdb} remote protocol for the @samp{qCRC}
+command.  In order to get the same results as gdb for a block of data,
+you must pass the first CRC parameter as @code{0xffffffff}.
+This CRC can be specified as:
+  Width  : 32
+  Poly   : 0x04c11db7
+  Init   : parameter, typically 0xffffffff
+  RefIn  : false
+  RefOut : false
+  XorOut : 0
+This differs from the "standard" CRC-32 algorithm in that the values
+are not reflected, and there is no final XOR value.  These differences
+make it easy to compose the values of multiple blocks.
+@end deftypefn
+*/
+
+uint32_t lj_crc32(const char *buf, size_t len, uint32_t init);
+uint32_t lj_get_file_crc32(const char* name);
diff --git a/src/lj_memprof.c b/src/lj_memprof.c
index 2c1ef3b8..4e8183aa 100644
--- a/src/lj_memprof.c
+++ b/src/lj_memprof.c
@@ -5,13 +5,19 @@
 ** Copyright (C) 2015-2019 IPONWEB Ltd.
 */
 
+#define _GNU_SOURCE
+#include <link.h>
+
+#include "lj_wbuf.h"
 #define lj_memprof_c
 #define LUA_CORE
-
 #include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
 
 #include "lj_arch.h"
 #include "lj_memprof.h"
+#include "lj_hash.h"
 
 #if LJ_HASMEMPROF
 
@@ -24,6 +30,22 @@
 static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION,
 					   0x0, 0x0, 0x0};
 
+int write_shared_obj(struct dl_phdr_info *info, size_t size, void *data)
+{
+  struct lj_wbuf *buf = data;
+
+  if(info->dlpi_name && strlen(info->dlpi_name)) {
+    lj_wbuf_addbyte(buf, SYMTAB_SO);
+    lj_wbuf_addstring(buf, info->dlpi_name);
+    lj_wbuf_addu64(buf, info->dlpi_addr);
+
+    uint64_t so_hash = lj_get_file_crc32(info->dlpi_name);
+    lj_wbuf_addu64(buf, so_hash);
+  }
+
+  return 0;
+}
+
 static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
 {
   const GCRef *iter = &g->gc.root;
@@ -49,6 +71,9 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
     iter = &o->gch.nextgc;
   }
 
+  /* Write shared libraries */
+  dl_iterate_phdr(write_shared_obj, out);
+
   lj_wbuf_addbyte(out, SYMTAB_FINAL);
 }
 
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
index 3417475d..d53f77bc 100644
--- a/src/lj_memprof.h
+++ b/src/lj_memprof.h
@@ -51,6 +51,7 @@
 */
 
 #define SYMTAB_LFUNC ((uint8_t)0)
+#define SYMTAB_SO ((uint8_t)1)
 #define SYMTAB_FINAL ((uint8_t)0x80)
 
 #define LJM_CURRENT_FORMAT_VERSION 0x01
-- 
2.32.0


  reply	other threads:[~2021-07-23 12:40 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-23 12:39 [Tarantool-patches] [PATCH 0/3] memprof: add demangling capabilities for C functions Maxim Kokryashkin via Tarantool-patches
2021-07-23 12:39 ` Maxim Kokryashkin via Tarantool-patches [this message]
2021-07-28  8:16   ` [Tarantool-patches] [PATCH 1/3] memprof: extend memprof symbol table Sergey Kaplun via Tarantool-patches
2021-07-23 12:39 ` [Tarantool-patches] [PATCH 2/3] memprof: update memprof parser Maxim Kokryashkin via Tarantool-patches
2021-07-23 12:39 ` [Tarantool-patches] [PATCH 3/3] test: adapt memprof tests to new symbol table format Maxim Kokryashkin via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=613dd34e9f2ecf2faec5da20a6232b7df31c6e7c.1627043674.git.m.kokryashkin@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=imun@tarantool.org \
    --cc=max.kokryashkin@gmail.com \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH 1/3] memprof: extend memprof symbol table' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox