From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 2710C6ECDB; Tue, 22 Mar 2022 21:32:04 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 2710C6ECDB DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1647973924; bh=Qar06Y11INIZbCD4Ou+IG1kHZy7ABMczt5bUFVBytpk=; h=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=gPoTb10kv/hdYIFnV+gb3SadBMFI9icvORUyGxinzcbs5nydHaBpD+Bdt4a7gbzj3 H750GU9Syb/9LPV4i80JGgJMc9CfKKlhWQ65j2MSpDFhYvUCX3KN52tmQC5c/zvZho 4pnbBj8b+9+OgQ7dmMIgEGpC7TMKYXROtH91VFAY= Received: from mail-lf1-f48.google.com (mail-lf1-f48.google.com [209.85.167.48]) (using TLSv1.3 with cipher TLS_AES_128_GCM_SHA256 (128/128 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id B234B6ECFE for ; Tue, 22 Mar 2022 21:31:32 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org B234B6ECFE Received: by mail-lf1-f48.google.com with SMTP id e16so17062363lfc.13 for ; Tue, 22 Mar 2022 11:31:32 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=rX5Ax+WWcyMZuDiPvYCOy/6IuURkJ/fIJkrgJITaTgM=; b=akB+uTMHtnwyZjSnYMtzp/bU8WYCZTALlkx6s1QB97s7Fhs8lqbSD+LAeVs65AkVIK earevDEyTqMlXTqmw/VcIjyc6rhEQ4h5Fzx+i5c/gqDznEeCCPl0T9ncHaAl0sSrn310 LSwC0iPFVHXgm6ofdDBzEQe1ZG2O9XKM6pOSSYRU6/Kxp6DTv/ZdyVSBa41scnQmqCcz Lw0dpthNTrzFIA7XDljnJY8OUX0PDz29bordc88ihXqa6NgOvhZftUtGnI0AAFQ9GrwX cS/IrAvuM1vTKWHzgG+tmQRJlen3wu75W1GqZyCbwNfh+YwWNPHHRSO8YvvwdBHsYFFL ntEQ== X-Gm-Message-State: AOAM533HR55KPxPJW5nd1OCHJnCLiC4AYlFXTpfMjr/lXSyGcfFw3fkN wj8uiKz01c03FupH8r8ORXMgdASdlqoYLQ== X-Google-Smtp-Source: ABdhPJwDLWxqwNFa64CGuqwXX+wMq8HeqnfoTUcJOEVt/+mia6LXKTq6gaghDc89Ooi1bVcMgNtLPw== X-Received: by 2002:a05:6512:3a91:b0:448:58a8:3e52 with SMTP id q17-20020a0565123a9100b0044858a83e52mr18991750lfu.350.1647973891473; Tue, 22 Mar 2022 11:31:31 -0700 (PDT) Received: from localhost.localdomain ([93.175.11.199]) by smtp.gmail.com with ESMTPSA id p6-20020a056512234600b0044a0a2b061esm1772996lfu.82.2022.03.22.11.31.30 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 22 Mar 2022 11:31:30 -0700 (PDT) X-Google-Original-From: Maxim Kokryashkin To: tarantool-patches@dev.tarantool.org, imun@tarantool.org, skaplun@tarantool.org Date: Tue, 22 Mar 2022 21:31:24 +0300 Message-Id: <8bff86d09e351abbb3fb92b370bca72bfabf7369.1647972832.git.m.kokryashkin@tarantool.org> X-Mailer: git-send-email 2.35.1 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit v6 1/2] memprof: extend symtab with C-symbols X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Maxim Kokryashkin via Tarantool-patches Reply-To: Maxim Kokryashkin Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" This commit enriches memprof's symbol table with information about C-symbols. The parser is updated correspondingly. If there is .symtab section or at least .dynsym segment in a shared library, then the following data is stored in symtab for each symbol: | SYMTAB_CFUNC | symbol address | symbol name | 1 byte 8 bytes magic number If none of those are present, then instead of a symbol name and its address there will be name and address of a shared library containing that symbol. Part of tarantool/tarantool#5813 --- Makefile.original | 2 +- src/lj_memprof.c | 328 ++++++++++++++++++ src/lj_memprof.h | 7 +- test/tarantool-tests/CMakeLists.txt | 1 + .../gh-5813-resolving-of-c-symbols.test.lua | 61 ++++ .../CMakeLists.txt | 2 + .../testresolving.c | 19 + test/tarantool-tests/tools-utils-avl.test.lua | 54 +++ tools/CMakeLists.txt | 2 + tools/utils/avl.lua | 114 ++++++ tools/utils/symtab.lua | 23 +- 11 files changed, 609 insertions(+), 4 deletions(-) create mode 100644 test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua create mode 100644 test/tarantool-tests/gh-5813-resolving-of-c-symbols/CMakeLists.txt create mode 100644 test/tarantool-tests/gh-5813-resolving-of-c-symbols/testresolving.c create mode 100644 test/tarantool-tests/tools-utils-avl.test.lua create mode 100644 tools/utils/avl.lua diff --git a/Makefile.original b/Makefile.original index 33dc2ed5..0c92df9e 100644 --- a/Makefile.original +++ b/Makefile.original @@ -100,7 +100,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ dis_mips64.lua dis_mips64el.lua vmdef.lua -FILES_UTILSLIB= bufread.lua symtab.lua +FILES_UTILSLIB= avl.lua bufread.lua symtab.lua FILES_MEMPROFLIB= parse.lua humanize.lua FILES_TOOLSLIB= memprof.lua FILE_TMEMPROF= luajit-parse-memprof diff --git a/src/lj_memprof.c b/src/lj_memprof.c index 2d779983..b1634f91 100644 --- a/src/lj_memprof.c +++ b/src/lj_memprof.c @@ -8,16 +8,27 @@ #define lj_memprof_c #define LUA_CORE +#define _GNU_SOURCE + #include +#include +#include +#include #include "lj_arch.h" #include "lj_memprof.h" +#if LUAJIT_OS != LUAJIT_OS_OSX +#include +#include +#include +#endif #if LJ_HASMEMPROF #include "lj_obj.h" #include "lj_frame.h" #include "lj_debug.h" +#include "lj_gc.h" #if LJ_HASJIT #include "lj_dispatch.h" @@ -66,12 +77,325 @@ static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace) #endif +#if LUAJIT_OS != LUAJIT_OS_OSX + +struct ghashtab_header { + uint32_t nbuckets; + uint32_t symoffset; + uint32_t bloom_size; + uint32_t bloom_shift; +}; + +static uint32_t ghashtab_size(ElfW(Addr) ghashtab) +{ + /* + ** There is no easy way to get count of symbols in GNU hashtable, so the + ** only way to do this is to take highest possible non-empty bucket and + ** iterate through its symbols until the last chain is over. + */ + uint32_t last_entry = 0; + + const uint32_t *chain = NULL; + struct ghashtab_header *header = (struct ghashtab_header*)ghashtab; + /* + ** sizeof(size_t) returns 8, if compiled with 64-bit compiler, and 4 if + ** compiled with 32-bit compiler. It is the best option to determine which + ** kind of CPU we are running on. + */ + const char *buckets = (char *)ghashtab + sizeof(struct ghashtab_header) + + sizeof(size_t) * header->bloom_size; + + uint32_t *cur_bucket = (uint32_t*)buckets; + for (uint32_t i = 0; i < header->nbuckets; ++i) { + if (last_entry < *cur_bucket) + last_entry = *cur_bucket; + cur_bucket++; + } + + if (last_entry < header->symoffset) + return header->symoffset; + + chain = (uint32_t*)(buckets + sizeof(uint32_t) * header->nbuckets); + /* The chain ends with the lowest bit set to 1. */ + while (!(chain[last_entry - header->symoffset] & 1)) + last_entry++; + + return ++last_entry; +} + +static void write_c_symtab +(ElfW(Sym*) sym, char *strtab, ElfW(Addr) so_addr, +size_t sym_cnt, struct lj_wbuf *buf) +{ + /* + ** Index 0 in ELF symtab is used to represent undefined symbols. Hence, we + ** can just start with index 1. + ** + ** For more information, see: + ** https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-79797.html + */ + + for (ElfW(Word) sym_index = 1; sym_index < sym_cnt; sym_index++) { + /* + ** ELF32_ST_TYPE and ELF64_ST_TYPE are the same, so we can use + ** ELF32_ST_TYPE for both 64-bit and 32-bit ELFs. + ** + ** For more, see https://github.com/torvalds/linux/blob/9137eda53752ef73148e42b0d7640a00f1bc96b1/include/uapi/linux/elf.h#L135 + */ + if (ELF32_ST_TYPE(sym[sym_index].st_info) == STT_FUNC && sym[sym_index].st_name != 0) { + char *sym_name = &strtab[sym[sym_index].st_name]; + lj_wbuf_addbyte(buf, SYMTAB_CFUNC); + lj_wbuf_addu64(buf, sym[sym_index].st_value + so_addr); + lj_wbuf_addstring(buf, sym_name); + } + } +} + +static int dump_sht_symtab +(const char *elf_name, struct lj_wbuf *buf, lua_State *L, const ElfW(Addr) so_addr) +{ + int status = 0; + + char *strtab = NULL; + ElfW(Shdr*) section_headers = NULL; + ElfW(Sym*) sym = NULL; + ElfW(Ehdr) elf_header = {}; + + ElfW(Off) sym_off = 0; + ElfW(Off) strtab_off = 0; + + size_t sym_cnt = 0; + size_t symtab_size = 0; + size_t strtab_size = 0; + size_t strtab_index = 0; + + size_t shoff = 0; /* Section headers offset. */ + size_t shnum = 0; /* Section headers number. */ + size_t shentsize = 0; /* Section header entry size. */ + + FILE *elf_file = fopen(elf_name, "rb"); + + if (elf_file == NULL) + return -1; + + if(fread(&elf_header, sizeof(elf_header), 1, elf_file) != sizeof(elf_header) && + ferror(elf_file) != 0) + goto error; + if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) + /* Not a valid ELF file. */ + goto error; + + shoff = elf_header.e_shoff; + shnum = elf_header.e_shnum; + shentsize = elf_header.e_shentsize; + + if (shoff == 0 || shnum == 0 || shentsize == 0) + /* No sections in ELF. */ + goto error; + + /* + ** Memory occupied by section headers is unlikely to be more than 160B, but + ** 32-bit and 64-bit ELF files may have sections of different sizes and some + ** of the sections may duiplicate, so we need to take that into account. + */ + section_headers = lj_mem_new(L, shnum * shentsize); + if (section_headers == NULL) + goto error; + + if (fseek(elf_file, shoff, SEEK_SET) != 0) + goto error; + + if(fread(section_headers, shentsize, shnum, elf_file) != shentsize * shnum && + ferror(elf_file) != 0) + goto error; + + for (size_t header_index = 0; header_index < shnum; ++header_index) { + if(section_headers[header_index].sh_type == SHT_SYMTAB) { + ElfW(Shdr) sym_hdr = section_headers[header_index]; + + sym_off = sym_hdr.sh_offset; + symtab_size = sym_hdr.sh_size; + sym_cnt = symtab_size / sym_hdr.sh_entsize; + + strtab_index = sym_hdr.sh_link; + + strtab_off = section_headers[strtab_index].sh_offset; + strtab_size = section_headers[strtab_index].sh_size; + break; + } + } + + if (sym_off == 0 || strtab_off == 0 || sym_cnt == 0) + goto error; + + /* Load symtab into memory. */ + sym = lj_mem_new(L, sym_cnt * sizeof(ElfW(Sym))); + if (sym == NULL) + goto error; + if (fseek(elf_file, sym_off, SEEK_SET) != 0) + goto error; + if (fread(sym, sizeof(ElfW(Sym)), sym_cnt, elf_file) != sizeof(ElfW(Sym)) * sym_cnt && + ferror(elf_file) != 0) + goto error; + + + /* Load strtab into memory. */ + strtab = lj_mem_new(L, strtab_size * sizeof(char)); + if (strtab == NULL) + goto error; + if (fseek(elf_file, strtab_off, SEEK_SET) != 0) + goto error; + if (fread(strtab, sizeof(char), strtab_size, elf_file) != sizeof(char) * strtab_size && + ferror(elf_file) != 0) + goto error; + + write_c_symtab(sym, strtab, so_addr, sym_cnt, buf); + + goto end; + + error: + status = -1; + + end: + if (sym != NULL) + lj_mem_free(G(L), sym, sym_cnt * sizeof(ElfW(Sym))); + if(strtab != NULL) + lj_mem_free(G(L), strtab, strtab_size * sizeof(char)); + if(section_headers != NULL) + lj_mem_free(G(L), section_headers, shnum * shentsize); + + fclose(elf_file); + + return status; +} + +static int dump_dyn_symtab(struct dl_phdr_info *info, struct lj_wbuf *buf) +{ + for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { + if (info->dlpi_phdr[header_index].p_type == PT_DYNAMIC) { + ElfW(Dyn*) dyn = NULL; + ElfW(Sym*) sym = NULL; + ElfW(Word*) hashtab = NULL; + ElfW(Addr) ghashtab = 0; + ElfW(Word) sym_cnt = 0; + + char *strtab = 0; + + dyn = (ElfW(Dyn)*)(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr); + + for(; dyn->d_tag != DT_NULL; dyn++) { + switch(dyn->d_tag) { + case DT_HASH: + hashtab = (ElfW(Word*))dyn->d_un.d_ptr; + break; + case DT_GNU_HASH: + ghashtab = dyn->d_un.d_ptr; + break; + case DT_STRTAB: + strtab = (char*)dyn->d_un.d_ptr; + break; + case DT_SYMTAB: + sym = (ElfW(Sym*))dyn->d_un.d_ptr; + break; + default: + break; + } + } + + if ((hashtab == NULL && ghashtab == 0) || strtab == NULL || sym == NULL) + /* Not enough data to resolve symbols. */ + return 1; + + /* + ** A hash table consists of Elf32_Word or Elf64_Word objects that provide for + ** symbol table access. Hash table has the following organization: + ** +-------------------+ + ** | nbucket | + ** +-------------------+ + ** | nchain | + ** +-------------------+ + ** | bucket[0] | + ** | ... | + ** | bucket[nbucket-1] | + ** +-------------------+ + ** | chain[0] | + ** | ... | + ** | chain[nchain-1] | + ** +-------------------+ + ** Chain table entries parallel the symbol table. The number of symbol + ** table entries should equal nchain, so symbol table indexes also select + ** chain table entries. Since the chain array values are indexes for not only + ** the chain array itself, but also for the symbol table, the chain array must + ** be the same size as the symbol table. This makes nchain equal to the length + ** of the symbol table. + ** + ** For more, see https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html + */ + sym_cnt = ghashtab == 0 ? hashtab[1] : ghashtab_size(ghashtab); + write_c_symtab(sym, strtab, info->dlpi_addr, sym_cnt, buf); + return 0; + } + } + + return 1; +} + +struct symbol_resolver_conf { + struct lj_wbuf *buf; + lua_State *L; +}; + +static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size, void *data) +{ + struct symbol_resolver_conf *conf = data; + struct lj_wbuf *buf = conf->buf; + lua_State *L = conf->L; + + UNUSED(info_size); + + /* Skip vDSO library. */ + if (info->dlpi_addr == getauxval(AT_SYSINFO_EHDR)) + return 0; + + /* + ** Main way: try to open ELF and read SHT_SYMTAB, SHT_STRTAB and SHT_HASH + ** sections from it. + */ + if (dump_sht_symtab(info->dlpi_name, buf, L, info->dlpi_addr) == 0) { + /* Empty body. */ + } + /* First fallback: dump functions only from PT_DYNAMIC segment. */ + if(dump_dyn_symtab(info, buf) == 0) { + /* Empty body. */ + } + /* + ** Last resort: dump ELF size and address to show .so name for its functions + ** in memprof output. + */ + else { + lj_wbuf_addbyte(buf, SYMTAB_CFUNC); + lj_wbuf_addu64(buf, info->dlpi_addr); + lj_wbuf_addstring(buf, info->dlpi_name); + } + + return 0; +} + +#endif /* LUAJIT_OS != LUAJIT_OS_OSX */ + static void dump_symtab(struct lj_wbuf *out, const struct global_State *g) { const GCRef *iter = &g->gc.root; const GCobj *o; const size_t ljs_header_len = sizeof(ljs_header) / sizeof(ljs_header[0]); +#if LUAJIT_OS != LUAJIT_OS_OSX + struct symbol_resolver_conf conf = { + out, + gco2th(gcref(g->cur_L)) + }; +#endif + /* Write prologue. */ lj_wbuf_addn(out, ljs_header, ljs_header_len); @@ -95,6 +419,10 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g) iter = &o->gch.nextgc; } +#if LUAJIT_OS != LUAJIT_OS_OSX + /* Write C symbols. */ + dl_iterate_phdr(resolve_symbolnames, &conf); +#endif lj_wbuf_addbyte(out, SYMTAB_FINAL); } diff --git a/src/lj_memprof.h b/src/lj_memprof.h index 395fb429..0327a205 100644 --- a/src/lj_memprof.h +++ b/src/lj_memprof.h @@ -25,13 +25,15 @@ ** prologue := 'l' 'j' 's' version reserved ** version := ** reserved := -** sym := sym-lua | sym-trace | sym-final +** sym := sym-lua | sym-cfunc | sym-trace | sym-final ** sym-lua := sym-header sym-addr sym-chunk sym-line ** sym-trace := sym-header trace-no trace-addr sym-addr sym-line ** sym-header := ** sym-addr := ** sym-chunk := string ** sym-line := +** sym-cfunc := sym-header sym-addr sym-name +** sym-name := string ** sym-final := sym-header ** trace-no := ** trace-addr := @@ -54,7 +56,8 @@ */ #define SYMTAB_LFUNC ((uint8_t)0) -#define SYMTAB_TRACE ((uint8_t)1) +#define SYMTAB_CFUNC ((uint8_t)1) +#define SYMTAB_TRACE ((uint8_t)2) #define SYMTAB_FINAL ((uint8_t)0x80) #define LJM_CURRENT_FORMAT_VERSION 0x02 diff --git a/test/tarantool-tests/CMakeLists.txt b/test/tarantool-tests/CMakeLists.txt index b21500a0..28c41f33 100644 --- a/test/tarantool-tests/CMakeLists.txt +++ b/test/tarantool-tests/CMakeLists.txt @@ -57,6 +57,7 @@ macro(BuildTestCLib lib sources) endmacro() add_subdirectory(gh-4427-ffi-sandwich) +add_subdirectory(gh-5813-resolving-of-c-symbols) add_subdirectory(gh-6098-fix-side-exit-patching-on-arm64) add_subdirectory(gh-6189-cur_L) add_subdirectory(lj-49-bad-lightuserdata) diff --git a/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua new file mode 100644 index 00000000..8f20511c --- /dev/null +++ b/test/tarantool-tests/gh-5813-resolving-of-c-symbols.test.lua @@ -0,0 +1,61 @@ +-- Memprof is implemented for x86 and x64 architectures only. +require("utils").skipcond( + (jit.arch ~= "x86" and jit.arch ~= "x64") + or jit.os == "OSX", + jit.arch.." architecture is NIY for memprof" +) + +local tap = require("tap") +local test = tap.test("gh-5813-resolving-of-c-symbols") +test:plan(2) + +jit.off() +jit.flush() + +local bufread = require "utils.bufread" +local symtab = require "utils.symtab" + +local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.memprofdata.tmp.bin") + +local function tree_contains(node, name) + if node == nil then + return false + elseif node.value.name == name then + return true + else + return tree_contains(node.left, name) or tree_contains(node.right, name) + end +end + +-- Static symbols resolution. +local res, err = misc.memprof.start(TMP_BINFILE) +assert(res, err) +-- That Lua module is required here to trigger the `luaopen_os`, which is not +-- stripped in the debug build. +local testlib = require "testresolving" +misc.memprof.stop() + +local reader = bufread.new(TMP_BINFILE) +local symbols = symtab.parse(reader) + +test:ok(tree_contains(symbols.cfunc, "luaopen_os")) + +-- Dynamic symbols resolution. +res, err = misc.memprof.start(TMP_BINFILE) +assert(res, err) +for _=1, 1e5 do testlib.allocate_string() end +misc.memprof.stop() + +reader = bufread.new(TMP_BINFILE) +symbols = symtab.parse(reader) + +test:ok(tree_contains(symbols.cfunc, "allocate_string")) + +-- FIXME: There is one case that is not tested -- shared objects, which +-- have neither .symtab section nor .dynsym segment. It is unclear how to +-- perform a test in that case, since it is impossible to load Lua module +-- written in C if it doesn't have a .dynsym segment. + +os.remove(TMP_BINFILE) +os.exit(test:check() and 0 or 1) + diff --git a/test/tarantool-tests/gh-5813-resolving-of-c-symbols/CMakeLists.txt b/test/tarantool-tests/gh-5813-resolving-of-c-symbols/CMakeLists.txt new file mode 100644 index 00000000..e467f0e9 --- /dev/null +++ b/test/tarantool-tests/gh-5813-resolving-of-c-symbols/CMakeLists.txt @@ -0,0 +1,2 @@ +BuildTestCLib(testresolving testresolving.c) +target_link_options(testresolving PRIVATE "-s") diff --git a/test/tarantool-tests/gh-5813-resolving-of-c-symbols/testresolving.c b/test/tarantool-tests/gh-5813-resolving-of-c-symbols/testresolving.c new file mode 100644 index 00000000..e0b38151 --- /dev/null +++ b/test/tarantool-tests/gh-5813-resolving-of-c-symbols/testresolving.c @@ -0,0 +1,19 @@ +#include +#include + +#define TEST_STRING "test string" + +int allocate_string(lua_State *L) { + lua_pushstring(L, TEST_STRING); + return 1; +} + +static const struct luaL_Reg testresolving [] = { + {"allocate_string", allocate_string}, + {NULL, NULL} +}; + +int luaopen_testresolving(lua_State *L) { + luaL_register(L, "testresolving", testresolving); + return 1; +} diff --git a/test/tarantool-tests/tools-utils-avl.test.lua b/test/tarantool-tests/tools-utils-avl.test.lua new file mode 100644 index 00000000..b49af3f7 --- /dev/null +++ b/test/tarantool-tests/tools-utils-avl.test.lua @@ -0,0 +1,54 @@ +local avl = require('utils.avl') +local tap = require('tap') + +local test = tap.test('tools-utils-avl') +test:plan(7) + +local function traverse(node, result) + if node ~= nil then + table.insert(result, node.key) + traverse(node.left, result) + traverse(node.right, result) + end + return result +end + +local function batch_insert(root, values) + for i = 1, #values do + root = avl.insert(root, values[i]) + end + + return root +end + +local function compare(arr1, arr2) + for i, v in pairs(arr1) do + if v ~= arr2[i] then + return false + end + end + return true +end + +-- 1L rotation test. +local root = batch_insert(nil, {1, 2, 3}) +test:ok(compare(traverse(root, {}), {2, 1, 3})) +-- 1R rotation test. +root = batch_insert(nil, {3, 2, 1}) +test:ok(compare(traverse(root, {}), {2, 1, 3})) +-- 2L rotation test. +root = batch_insert(nil, {1, 3, 2}) +test:ok(compare(traverse(root, {}), {2, 1, 3})) +-- 2R rotation test. +root = batch_insert(nil, {3, 1, 2}) +test:ok(compare(traverse(root, {}), {2, 1, 3})) +-- Exact upper bound. +test:ok(avl.upper_bound(root, 1) == 1) +-- No upper bound. +test:ok(avl.upper_bound(root, -10) == nil) +-- Not exact upper bound. +test:ok(avl.upper_bound(root, 2.75) == 2) + +os.exit(test:check() and 0 or 1) + + diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 61830e44..c6803d00 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -30,6 +30,7 @@ else() memprof/humanize.lua memprof/parse.lua memprof.lua + utils/avl.lua utils/bufread.lua utils/symtab.lua ) @@ -46,6 +47,7 @@ else() COMPONENT tools-parse-memprof ) install(FILES + ${CMAKE_CURRENT_SOURCE_DIR}/utils/avl.lua ${CMAKE_CURRENT_SOURCE_DIR}/utils/bufread.lua ${CMAKE_CURRENT_SOURCE_DIR}/utils/symtab.lua DESTINATION ${LUAJIT_DATAROOTDIR}/utils diff --git a/tools/utils/avl.lua b/tools/utils/avl.lua new file mode 100644 index 00000000..2e168c36 --- /dev/null +++ b/tools/utils/avl.lua @@ -0,0 +1,114 @@ +local M = {} +local max = math.max + +local function create_node(key, value) + return { + key = key, + value = value, + left = nil, + right = nil, + height = 1, + } +end + +local function height(node) + if node == nil then + return 0 + end + return node.height +end + +local function update_height(node) + node.height = 1 + max(height(node.left), height(node.right)) +end + +local function get_balance(node) + if node == nil then + return 0 + end + return height(node.left) - height(node.right) +end + +local function rotate_left(node) + local r_subtree = node.right; + local rl_subtree = r_subtree.left; + + r_subtree.left = node; + node.right = rl_subtree; + + update_height(node) + update_height(r_subtree) + + return r_subtree; +end + +local function rotate_right(node) + local l_subtree = node.left + local lr_subtree = l_subtree.right; + + l_subtree.right = node; + node.left = lr_subtree; + + update_height(node) + update_height(l_subtree) + + return l_subtree; +end + +local function rebalance(node, key) + local balance = get_balance(node) + + if -1 <= balance and balance <=1 then + return node + end + + if balance > 1 and key < node.left.key then + return rotate_right(node) + elseif balance < -1 and key > node.right.key then + return rotate_left(node) + elseif balance > 1 and key > node.left.key then + node.left = rotate_left(node.left) + return rotate_right(node) + elseif balance < -1 and key < node.right.key then + node.right = rotate_right(node.right) + return rotate_left(node) + end +end + +function M.insert(node, key, value) + if node == nil then + return create_node(key, value) + end + + if key < node.key then + node.left = M.insert(node.left, key, value) + elseif key > node.key then + node.right = M.insert(node.right, key, value) + else + node.key = key + node.value = value + end + + update_height(node) + return rebalance(node, key) +end + +function M.upper_bound(node, key) + if node == nil then + return nil, nil + end + -- Explicit match. + if key == node.key then + return node.key, node.value + elseif key < node.key then + return M.upper_bound(node.left, key) + elseif key > node.key then + local right_key, value = M.upper_bound(node.right, key) + right_key = right_key or node.key + value = value or node.value + return right_key, value + end +end + +return M + diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua index c7fcf77c..6f1685f6 100644 --- a/tools/utils/symtab.lua +++ b/tools/utils/symtab.lua @@ -6,6 +6,8 @@ local bit = require "bit" +local avl = require "utils.avl" + local band = bit.band local string_format = string.format @@ -15,7 +17,8 @@ local LJS_EPILOGUE_HEADER = 0x80 local LJS_SYMTYPE_MASK = 0x03 local SYMTAB_LFUNC = 0 -local SYMTAB_TRACE = 1 +local SYMTAB_CFUNC = 1 +local SYMTAB_TRACE = 2 local M = {} @@ -50,15 +53,27 @@ local function parse_sym_trace(reader, symtab) } end +-- Parse a single entry in a symtab: .so library +local function parse_sym_cfunc(reader, symtab) + local addr = reader:read_uleb128() + local name = reader:read_string() + + symtab.cfunc = avl.insert(symtab.cfunc, addr, { + name = name + }) +end + local parsers = { [SYMTAB_LFUNC] = parse_sym_lfunc, [SYMTAB_TRACE] = parse_sym_trace, + [SYMTAB_CFUNC] = parse_sym_cfunc } function M.parse(reader) local symtab = { lfunc = {}, trace = {}, + cfunc = nil, } local magic = reader:read_octets(3) local version = reader:read_octets(1) @@ -135,6 +150,12 @@ function M.demangle(symtab, loc) return string_format("%s:%d", symtab.lfunc[addr].source, loc.line) end + local key, value = avl.upper_bound(symtab.cfunc, addr) + + if key then + return string_format("%s:%#x", value.name, key) + end + return string_format("CFUNC %#x", addr) end -- 2.35.1