From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id D6C3F70358; Wed, 15 Sep 2021 20:19:38 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org D6C3F70358 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1631726378; bh=s4pvlKM1uSqKDRYeJyKHAnfYhyzz2D83Ot78oxF6dFY=; h=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=g/tucRRLMANVPVZGvGj0U7ACfXVWnq/aYGA9K26kPbG+G0yAWTMAbra43UED13oK9 bfUrJsYRaeuGOmDRZDBbuOdpPZu0yJDiLeD80Iubjjc70jsQzsY8ItDwjNOiQ16afn KSQtBqol5fOBblcO8ncVlYaY+bj4kkd9niBK8PSU= Received: from mail-lj1-f173.google.com (mail-lj1-f173.google.com [209.85.208.173]) (using TLSv1.3 with cipher TLS_AES_128_GCM_SHA256 (128/128 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 8D0C370CA8 for ; Wed, 15 Sep 2021 20:19:07 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 8D0C370CA8 Received: by mail-lj1-f173.google.com with SMTP id s3so2324333ljp.11 for ; Wed, 15 Sep 2021 10:19:07 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=27I9V1sxvqiEyd1GhqfsE3j1B8fcuU4a0bJdS2raOfE=; b=xAdQHCHPlOcLnYNk1qkPUPpk3XrbX4ioS1dDYDr0ponBNJe+OEVYGODJdJJDqXh87H adA95jsesTwed+qPmsVRptwkzxJAPVIkMpoHdtQ/0GPU627sPzELWkNTF4i1GIAl5SdX aKLRu9Zrg9h1q8ogUHy8q7WghxUS55ZuKIr7EwKkf/gNcy9v8RzYz3tXZOf3s7KScqvP j8ugwzmwgTlv2D83JzlGSiYSuyG47EmtwPO66ZGjVoFq1SStPuM/rBiqCeiogk0Mt8gL oHTXoA90pNJjBVCbOsY15mvo5t0s2NSKfn7GucXplZj/ioAgD+kqveqDw7DTS6QHMQP2 KF0g== X-Gm-Message-State: AOAM533WXObHazkoo+dnR38d4YkuoOq54Tiumy8n2Tj3bvH2W4streY5 /360Iy3na7kxWF+aM/zuQ0SBO/wMtu5nQuBO X-Google-Smtp-Source: ABdhPJzikO3Q4P9EOry/Nrl7ytqw862b9m4QmqKp2nMSWzStqcm+54eg8zGpWnaxMIMedoA4zFbIGQ== X-Received: by 2002:a2e:8881:: with SMTP id k1mr962915lji.443.1631726346690; Wed, 15 Sep 2021 10:19:06 -0700 (PDT) Received: from localhost.localdomain ([93.175.11.199]) by smtp.gmail.com with ESMTPSA id v5sm60271ljg.117.2021.09.15.10.19.05 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 15 Sep 2021 10:19:06 -0700 (PDT) X-Google-Original-From: Maxim Kokryashkin To: tarantool-patches@dev.tarantool.org, imun@tarantool.org, skaplun@tarantool.org Date: Wed, 15 Sep 2021 20:19:00 +0300 Message-Id: <41ec703406d4cd7e572ea41616c841ee0c028e0d.1631725806.git.m.kokryashkin@tarantool.org> X-Mailer: git-send-email 2.33.0 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit v3 1/2] memprof: extend symtab with C-symbols X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Maxim Kokryashkin via Tarantool-patches Reply-To: Maxim Kokryashkin Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This commit enriches memprof's symbol table and event stream with information about C-symbols. That information will provide demangling capabilities to the parser. The following data is stored in symtab for each symbol: | SYMTAB_CFUNC | symbol address | symbol name | 1 byte 8 bytes magic number The following data is stored in event for each newly loaded symbol: | (AEVENT_SYMTAB | ASOURCE_CFUNC) | symbol address | symbol name | 1 byte 8 bytes magic number Part of tarantool/tarantool#5813 --- src/lj_memprof.c | 154 ++++++++++++++++++++++++++++++++++++++++++++--- src/lj_memprof.h | 17 ++++-- 2 files changed, 160 insertions(+), 11 deletions(-) diff --git a/src/lj_memprof.c b/src/lj_memprof.c index 2c1ef3b8..17f97cc9 100644 --- a/src/lj_memprof.c +++ b/src/lj_memprof.c @@ -5,10 +5,16 @@ ** Copyright (C) 2015-2019 IPONWEB Ltd. */ +#define _GNU_SOURCE +#include + #define lj_memprof_c #define LUA_CORE #include +#include +#include +#include #include "lj_arch.h" #include "lj_memprof.h" @@ -24,7 +30,118 @@ static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION, 0x0, 0x0, 0x0}; -static void dump_symtab(struct lj_wbuf *out, const struct global_State *g) +typedef struct { + uint32_t nbuckets; + uint32_t symoffset; + uint32_t bloom_size; + uint32_t bloom_shift; +} ghashtab_header; + +uint32_t ghashtab_size(ElfW(Addr) ghashtab) +{ + uint32_t last_entry = 0; + uint32_t* cur_bucket = NULL; + uint32_t* entry = NULL; + + const void* chain_address = NULL; + ghashtab_header* header = (ghashtab_header*)ghashtab; + const void* buckets = (void*)ghashtab + sizeof(ghashtab_header) + (sizeof(uint64_t) * header->bloom_size); + + cur_bucket = (uint32_t*)buckets; + for (uint32_t i = 0; i < header->nbuckets; ++i) { + if (last_entry < *cur_bucket) + last_entry = *cur_bucket; + cur_bucket++; + } + + if (last_entry < header->symoffset) + return header->symoffset; + + chain_address = buckets + (sizeof(uint32_t) * header->nbuckets); + do { + entry = (uint32_t*)(chain_address + (last_entry - header->symoffset) * sizeof(uint32_t)); + last_entry++; + } while (!(*entry & 1)); + + return last_entry; +} + +struct symbol_resolver_conf { + struct lj_wbuf *buf; + const uint8_t header; + + uint32_t cur_lib; + uint32_t lib_cnt_prev; + uint32_t to_dump_cnt; + uint32_t *lib_cnt; +}; + +int resolve_symbolnames(struct dl_phdr_info* info, size_t info_size, void* data) +{ + if(strcmp(info->dlpi_name, "linux-vdso.so.1") == 0) { + return 0; + } + + ElfW(Dyn*) dyn = NULL; + ElfW(Sym*) sym = NULL; + ElfW(Word*) hashtab = NULL; + ElfW(Word) sym_cnt = 0; + + char* strtab = 0; + char* sym_name = 0; + + struct symbol_resolver_conf *conf = data; + const uint8_t header = conf->header; + struct lj_wbuf *buf = conf->buf; + + conf->lib_cnt_prev = *conf->lib_cnt; + uint32_t lib_cnt_prev = conf->lib_cnt_prev; + + if((conf->to_dump_cnt = info->dlpi_adds - lib_cnt_prev) == 0) { + /* No new libraries, stop resolver. */ + return 1; + } + + uint32_t lib_cnt = info->dlpi_adds - info->dlpi_subs; + if(conf->cur_lib < lib_cnt - conf->to_dump_cnt) { + /* That lib is already dumped, skip it. */ + ++conf->cur_lib; + return 0; + } + + for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { + if (info->dlpi_phdr[header_index].p_type == PT_DYNAMIC) { + dyn = (ElfW(Dyn)*)(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr); + + while(dyn->d_tag != DT_NULL) { + if (dyn->d_tag == DT_HASH) { + hashtab = (ElfW(Word*))dyn->d_un.d_ptr; + sym_cnt = hashtab[1]; + } + else if (dyn->d_tag == DT_GNU_HASH && sym_cnt == 0) + sym_cnt = ghashtab_size(dyn->d_un.d_ptr); + else if (dyn->d_tag == DT_STRTAB) + strtab = (char*)dyn->d_un.d_ptr; + else if (dyn->d_tag == DT_SYMTAB) { + sym = (ElfW(Sym*))dyn->d_un.d_ptr; + + for (ElfW(Word) sym_index = 0; sym_index < sym_cnt; sym_index++) { + sym_name = &strtab[sym[sym_index].st_name]; + lj_wbuf_addbyte(buf, header); + lj_wbuf_addu64(buf, sym[sym_index].st_value + info->dlpi_addr); + lj_wbuf_addstring(buf, sym_name); + } + } + dyn++; + } + } + } + + ++conf->cur_lib; + return 0; +} + +static void dump_symtab(struct lj_wbuf *out, const struct global_State *g, uint32_t *lib_cnt) { const GCRef *iter = &g->gc.root; const GCobj *o; @@ -49,6 +166,17 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g) iter = &o->gch.nextgc; } + /* Write symbols. */ + struct symbol_resolver_conf conf = { + /* buf: */ out, + /* header: */ SYMTAB_CFUNC, + /* cur_lib: */ 0, + /* lib_cnt_prev: */ *lib_cnt, + /* to_dump_cnt: */ 0, + /* lib_cnt: */ lib_cnt + }; + dl_iterate_phdr(resolve_symbolnames, &conf); + lj_wbuf_addbyte(out, SYMTAB_FINAL); } @@ -78,6 +206,7 @@ struct memprof { struct alloc orig_alloc; /* Original allocator. */ struct lj_memprof_options opt; /* Profiling options. */ int saved_errno; /* Saved errno when profiler deinstrumented. */ + uint32_t lib_cnt; /* Number of currently loaded libs. */ }; static struct memprof memprof = {0}; @@ -105,15 +234,26 @@ static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent, } static void memprof_write_cfunc(struct lj_wbuf *out, uint8_t aevent, - const GCfunc *fn) + const GCfunc *fn, uint32_t *lib_cnt) { + /* Check if there are any new libs. */ + struct symbol_resolver_conf conf = { + /* buf: */ out, + /* header: */ AEVENT_SYMTAB | ASOURCE_CFUNC, + /* cur_lib: */ 0, + /* lib_cnt_prev: */ *lib_cnt, + /* to_dump_cnt: */ 0, + /* lib_cnt: */ lib_cnt + }; + dl_iterate_phdr(resolve_symbolnames, &conf); + lj_wbuf_addbyte(out, aevent | ASOURCE_CFUNC); lj_wbuf_addu64(out, (uintptr_t)fn->c.f); } static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t aevent, GCfunc *fn, struct lua_State *L, - cTValue *frame) + cTValue *frame, uint32_t *lib_cnt) { cTValue *pframe = frame_prev(frame); GCfunc *pfn = frame_func(pframe); @@ -126,7 +266,7 @@ static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t aevent, if (pfn != NULL && isluafunc(pfn)) memprof_write_lfunc(out, aevent, pfn, L, frame); else - memprof_write_cfunc(out, aevent, fn); + memprof_write_cfunc(out, aevent, fn, lib_cnt); } static void memprof_write_func(struct memprof *mp, uint8_t aevent) @@ -139,9 +279,9 @@ static void memprof_write_func(struct memprof *mp, uint8_t aevent) if (isluafunc(fn)) memprof_write_lfunc(out, aevent, fn, L, NULL); else if (isffunc(fn)) - memprof_write_ffunc(out, aevent, fn, L, frame); + memprof_write_ffunc(out, aevent, fn, L, frame, &mp->lib_cnt); else if (iscfunc(fn)) - memprof_write_cfunc(out, aevent, fn); + memprof_write_cfunc(out, aevent, fn, &mp->lib_cnt); else lua_assert(0); } @@ -249,7 +389,7 @@ int lj_memprof_start(struct lua_State *L, const struct lj_memprof_options *opt) /* Init output. */ lj_wbuf_init(&mp->out, mp_opt->writer, mp_opt->ctx, mp_opt->buf, mp_opt->len); - dump_symtab(&mp->out, mp->g); + dump_symtab(&mp->out, mp->g, &mp->lib_cnt); /* Write prologue. */ lj_wbuf_addn(&mp->out, ljm_header, ljm_header_len); diff --git a/src/lj_memprof.h b/src/lj_memprof.h index 3417475d..337fa76a 100644 --- a/src/lj_memprof.h +++ b/src/lj_memprof.h @@ -16,7 +16,7 @@ #include "lj_def.h" #include "lj_wbuf.h" -#define LJS_CURRENT_VERSION 0x1 +#define LJS_CURRENT_VERSION 0x2 /* ** symtab format: @@ -25,12 +25,14 @@ ** prologue := 'l' 'j' 's' version reserved ** version := ** reserved := -** sym := sym-lua | sym-final +** sym := sym-lua | sym-cfunc | sym-final ** sym-lua := sym-header sym-addr sym-chunk sym-line ** sym-header := ** sym-addr := ** sym-chunk := string ** sym-line := +** sym-cfunc := sym-header sym-addr sym-name +** sym-name := string ** sym-final := sym-header ** string := string-len string-payload ** string-len := @@ -51,9 +53,10 @@ */ #define SYMTAB_LFUNC ((uint8_t)0) +#define SYMTAB_CFUNC ((uint8_t)1) #define SYMTAB_FINAL ((uint8_t)0x80) -#define LJM_CURRENT_FORMAT_VERSION 0x01 +#define LJM_CURRENT_FORMAT_VERSION 0x02 /* ** Event stream format: @@ -64,10 +67,11 @@ ** prologue := 'l' 'j' 'm' version reserved ** version := ** reserved := -** event := event-alloc | event-realloc | event-free +** event := event-alloc | event-realloc | event-free | event-symtab ** event-alloc := event-header loc? naddr nsize ** event-realloc := event-header loc? oaddr osize naddr nsize ** event-free := event-header loc? oaddr osize +** event-symtab := event-header sym-addr sym-name ** event-header := ** loc := loc-lua | loc-c ** loc-lua := sym-addr line-no @@ -78,7 +82,11 @@ ** naddr := ** osize := ** nsize := +** sym-name := string ** epilogue := event-header +** string := string-len string-payload +** string-len := +** string-payload := {string-len} ** ** : A single byte (no surprises here) ** : Unsigned integer represented in ULEB128 encoding @@ -97,6 +105,7 @@ */ /* Allocation events. */ +#define AEVENT_SYMTAB ((uint8_t)0) #define AEVENT_ALLOC ((uint8_t)1) #define AEVENT_FREE ((uint8_t)2) #define AEVENT_REALLOC ((uint8_t)(AEVENT_ALLOC | AEVENT_FREE)) -- 2.33.0