From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp41.i.mail.ru (smtp41.i.mail.ru [94.100.177.101]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id B95074765E7 for ; Fri, 25 Dec 2020 18:27:08 +0300 (MSK) From: Sergey Kaplun Date: Fri, 25 Dec 2020 18:26:09 +0300 Message-Id: <03ac70e3bfb9bf7061ad71c1bac50ed3f8e853fc.1608907726.git.skaplun@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit v2 7/7] tools: introduce a memory profile parser List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Igor Munkin , Sergey Ostanevich Cc: tarantool-patches@dev.tarantool.org This patch adds a parser for binary data dumped via the memory profiler. It is a set of the following Lua modules: * utils/bufread.lua: read binary data from a file. * utils/symtab.lua: symbol table decode functions * memprof/parse.lua: decode the memory profiler event stream * memprof/humanize.lua: display decoded data in human readable format * memprof.lua: Lua script to display data There is also a stand-alone bash script that displays human readable parsed data to a stdout. It calls with a corresponding LUA_PATH. Part of tarantool/tarantool#5442 Part of tarantool/tarantool#5490 --- Changes in v2: - Add (un)?install sections in Makefile - Modify bash script correspondingly. - Change Lua modules layout. - Adjusted test. Check that errno returns in case of error is added. - Code clean up. Makefile | 39 +++++- test/misclib-memprof-lapi.test.lua | 135 +++++++++++++++++++++ tools/luajit-parse-memprof | 9 ++ tools/memprof.lua | 109 +++++++++++++++++ tools/memprof/humanize.lua | 45 +++++++ tools/memprof/parse.lua | 188 +++++++++++++++++++++++++++++ tools/utils/bufread.lua | 147 ++++++++++++++++++++++ tools/utils/symtab.lua | 89 ++++++++++++++ 8 files changed, 757 insertions(+), 4 deletions(-) create mode 100755 test/misclib-memprof-lapi.test.lua create mode 100755 tools/luajit-parse-memprof create mode 100644 tools/memprof.lua create mode 100644 tools/memprof/humanize.lua create mode 100644 tools/memprof/parse.lua create mode 100644 tools/utils/bufread.lua create mode 100644 tools/utils/symtab.lua diff --git a/Makefile b/Makefile index 4a56917..ba4aa2f 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,9 @@ INSTALL_INC= $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER) INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(VERSION) INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit +INSTALL_UTILSLIB= $(INSTALL_LJLIBD)/utils +INSTALL_MEMPROFLIB= $(INSTALL_LJLIBD)/memprof +INSTALL_TOOLSLIB= $(INSTALL_LJLIBD) INSTALL_LMODD= $(INSTALL_SHARE)/lua INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) INSTALL_CMODD= $(INSTALL_LIB)/lua @@ -54,6 +57,8 @@ INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib INSTALL_PCNAME= luajit.pc +INSTALL_TMEMPROFNAME= luajit-$(VERSION)-parse-memprof +INSTALL_TMEMPROFSYMNAME= luajit-parse-memprof INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME) INSTALL_DYN= $(INSTALL_LIB)/$(INSTALL_SONAME) @@ -62,11 +67,15 @@ INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT2) INSTALL_T= $(INSTALL_BIN)/$(INSTALL_TNAME) INSTALL_TSYM= $(INSTALL_BIN)/$(INSTALL_TSYMNAME) INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME) +INSTALL_TMEMPROF= $(INSTALL_BIN)/$(INSTALL_TMEMPROFNAME) +INSTALL_TMEMPROFSYM= $(INSTALL_BIN)/$(INSTALL_TMEMPROFSYMNAME) INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \ - $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD) + $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD) \ + $(INSTALL_UTILSLIB) $(INSTALL_MEMPROFLIB) $(INSTALL_TOOLSLIB) UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) \ - $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD) + $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD) \ + $(INSTALL_UTILSLIB) $(INSTALL_MEMPROFLIB) $(INSTALL_TOOLSLIB) RM= rm -f MKDIR= mkdir -p @@ -78,6 +87,8 @@ UNINSTALL= $(RM) LDCONFIG= ldconfig -n SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \ -e "s|^multilib=.*|multilib=$(MULTILIB)|" +SED_TMEMPROF= sed -e "s|^TOOL_DIR=.*|TOOL_DIR=$(INSTALL_TOOLSLIB)|" \ + -e "s|^LUAJIT_BIN=.*|LUAJIT_BIN=$(INSTALL_T)|" FILE_T= luajit FILE_A= libluajit.a @@ -89,6 +100,10 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ dis_mips64.lua dis_mips64el.lua vmdef.lua +FILES_UTILSLIB= bufread.lua symtab.lua +FILES_MEMPROFLIB= parse.lua humanize.lua +FILES_TOOLSLIB= memprof.lua +FILE_TMEMPROF= luajit-parse-memprof ifeq (,$(findstring Windows,$(OS))) HOST_SYS:= $(shell uname -s) @@ -130,21 +145,37 @@ install: $(INSTALL_DEP) $(RM) $(FILE_PC).tmp cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) + cd tools/utils && $(INSTALL_F) $(FILES_UTILSLIB) $(INSTALL_UTILSLIB) + cd tools/memprof && $(INSTALL_F) $(FILES_MEMPROFLIB) $(INSTALL_MEMPROFLIB) + cd tools && $(INSTALL_F) $(FILES_TOOLSLIB) $(INSTALL_TOOLSLIB) + cd tools && $(SED_TMEMPROF) $(FILE_TMEMPROF) > $(FILE_TMEMPROF).tmp && \ + $(INSTALL_X) $(FILE_TMEMPROF).tmp $(INSTALL_TMEMPROF) && \ + $(RM) $(FILE_TMEMPROF).tmp @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" @echo "" @echo "Note: the development releases deliberately do NOT install a symlink for luajit" - @echo "You can do this now by running this command (with sudo):" + @echo "You can do this now by running these commands (with sudo):" @echo "" @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)" + @echo " $(SYMLINK) $(INSTALL_TMEMPROFNAME) $(INSTALL_TMEMPROFSYM)" @echo "" uninstall: @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" - $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) + $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) $(INSTALL_TMEMPROF) for file in $(FILES_JITLIB); do \ $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ done + for file in $(FILES_UTILSLIB); do \ + $(UNINSTALL) $(INSTALL_UTILSLIB)/$$file; \ + done + for file in $(FILES_MEMPROFLIB); do \ + $(UNINSTALL) $(INSTALL_MEMPROFLIB)/$$file; \ + done + for file in $(FILES_TOOLSLIB); do \ + $(UNINSTALL) $(INSTALL_TOOLSLIB)/$$file; \ + done for file in $(FILES_INC); do \ $(UNINSTALL) $(INSTALL_INC)/$$file; \ done diff --git a/test/misclib-memprof-lapi.test.lua b/test/misclib-memprof-lapi.test.lua new file mode 100755 index 0000000..e02c6fa --- /dev/null +++ b/test/misclib-memprof-lapi.test.lua @@ -0,0 +1,135 @@ +#!/usr/bin/env tarantool + +local tap = require('tap') + +local test = tap.test("misc-memprof-lapi") +test:plan(9) + +jit.off() +jit.flush() + +-- FIXME: Launch tests with LUA_PATH enviroment variable. +local path = arg[0]:gsub('/[^/]+%.test%.lua', '') +local path_suffix = '../tools/?.lua;' +package.path = ('%s/%s;'):format(path, path_suffix)..package.path + +local table_new = require "table.new" + +local bufread = require "utils.bufread" +local memprof = require "memprof.parse" +local symtab = require "utils.symtab" + +local TMP_BINFILE = arg[0]:gsub('[^/]+%.test%.lua', '%.%1.memprofdata.tmp.bin') +local BAD_PATH = arg[0]:gsub('[^/]+%.test%.lua', '%1/memprofdata.tmp.bin') + +local function payload() + -- Preallocate table to avoid array part reallocations. + local _ = table_new(100, 0) + + -- Want too see 100 objects here. + for i = 1, 100 do + -- Try to avoid crossing with "test" module objects. + _[i] = "memprof-str-"..i + end + + _ = nil + -- VMSTATE == GC, reported as INTERNAL. + collectgarbage() +end + +local function generate_output(filename) + -- Clean up all garbage to avoid polution of free. + collectgarbage() + + local res, err = misc.memprof.start(filename) + -- Should start succesfully. + assert(res, err) + + payload() + + res, err = misc.memprof.stop() + -- Should stop succesfully. + assert(res, err) +end + +local function fill_ev_type(events, symbols, event_type) + local ev_type = {} + for _, event in pairs(events[event_type]) do + local addr = event.loc.addr + if addr == 0 then + ev_type.INTERNAL = { + name = "INTERNAL", + num = event.num, + } + elseif symbols[addr] then + ev_type[event.loc.line] = { + name = symbols[addr].name, + num = event.num, + } + end + end + return ev_type +end + +local function check_alloc_report(alloc, line, function_line, nevents) + assert(string.format("@%s:%d", arg[0], function_line) == alloc[line].name) + assert(alloc[line].num == nevents, ("got=%d, ecpected=%d"):format( + alloc[line].num, + nevents + )) + return true +end + +-- Not a directory. +local res, err, errno = misc.memprof.start(BAD_PATH) +test:ok(res == nil and err:match("Not a directory")) +test:ok(type(errno) == "number") + +-- Profiler is running. +res, err = misc.memprof.start(TMP_BINFILE) +assert(res, err) +res, err, errno = misc.memprof.start(TMP_BINFILE) +test:ok(res == nil and err:match("profiler is running already")) +test:ok(type(errno) == "number") + +res, err = misc.memprof.stop() +assert(res, err) + +-- Profiler is not running. +res, err, errno = misc.memprof.stop() +test:ok(res == nil and err:match("profiler is not running")) +test:ok(type(errno) == "number") + +-- Test profiler output and parse. +res, err = pcall(generate_output, TMP_BINFILE) + +-- Want to cleanup carefully if something went wrong. +if not res then + os.remove(TMP_BINFILE) + error(err) +end + +local reader = bufread.new(TMP_BINFILE) +local symbols = symtab.parse(reader) +local events = memprof.parse(reader, symbols) + +-- We don't need it any more. +os.remove(TMP_BINFILE) + +local alloc = fill_ev_type(events, symbols, "alloc") +local free = fill_ev_type(events, symbols, "free") + +-- Check allocation reports. The second argument is a line number +-- of the allocation event itself. The third is a line number of +-- the corresponding function definition. The last one is +-- the number of allocations. +-- 1 event - alocation of table by itself + 1 allocation +-- of array part as far it is bigger than LJ_MAX_COLOSIZE (16). +test:ok(check_alloc_report(alloc, 27, 25, 2)) +-- 100 strings allocations. +test:ok(check_alloc_report(alloc, 32, 25, 100)) + +-- Collect all previous allocated objects. +test:ok(free.INTERNAL.num == 102) + +os.exit(test:check() and 0 or 1) diff --git a/tools/luajit-parse-memprof b/tools/luajit-parse-memprof new file mode 100755 index 0000000..c814301 --- /dev/null +++ b/tools/luajit-parse-memprof @@ -0,0 +1,9 @@ +#!/bin/bash +# +# Launcher for memprof parser. + +# This two variables are replaced on installing. +TOOL_DIR=$(dirname `readlink -f $0`) +LUAJIT_BIN=$TOOL_DIR/../src/luajit + +LUA_PATH="$TOOL_DIR/?.lua;;" $LUAJIT_BIN $TOOL_DIR/memprof.lua $@ diff --git a/tools/memprof.lua b/tools/memprof.lua new file mode 100644 index 0000000..7476757 --- /dev/null +++ b/tools/memprof.lua @@ -0,0 +1,109 @@ +-- A tool for parsing and visualisation of LuaJIT's memory +-- profiler output. +-- +-- TODO: +-- * Think about callgraph memory profiling for complex +-- table reallocations +-- * Nicer output, probably an HTML view +-- * Demangling of C symbols +-- +-- Major portions taken verbatim or adapted from the LuaVela. +-- Copyright (C) 2015-2019 IPONWEB Ltd. + +local bufread = require "utils.bufread" +local memprof = require "memprof.parse" +local symtab = require "utils.symtab" +local view = require "memprof.humanize" + +local stdout, stderr = io.stdout, io.stderr +local match, gmatch = string.match, string.gmatch + +-- Program options. +local opt_map = {} + +function opt_map.help() + stdout:write [[ +luajit-parse-memprof - parser of the memory usage profile collected + with LuaJIT's memprof. + +SYNOPSIS + +luajit-parse-memprof [options] memprof.bin + +Supported options are: + + --help Show this help and exit +]] + os.exit(0) +end + +-- Print error and exit with error status. +local function opterror(...) + stderr:write("luajit-parse-memprof.lua: ERROR: ", ...) + stderr:write("\n") + os.exit(1) +end + +-- Parse single option. +local function parseopt(opt, args) + local opt_current = #opt == 1 and "-"..opt or "--"..opt + local f = opt_map[opt] + if not f then + opterror("unrecognized option `", opt_current, "'. Try `--help'.\n") + end + f(args) +end + +-- Parse arguments. +local function parseargs(args) + -- Process all option arguments. + args.argn = 1 + repeat + local a = args[args.argn] + if not a then + break + end + local lopt, opt = match(a, "^%-(%-?)(.+)") + if not opt then + break + end + args.argn = args.argn + 1 + if lopt == "" then + -- Loop through short options. + for o in gmatch(opt, ".") do + parseopt(o, args) + end + else + -- Long option. + parseopt(opt, args) + end + until false + + -- Check for proper number of arguments. + local nargs = #args - args.argn + 1 + if nargs ~= 1 then + opt_map.help() + end + + -- Translate a single input file. + -- TODO: Handle multiple files? + return args[args.argn] +end + +local inputfile = parseargs{...} + +local reader = bufread.new(inputfile) +local symbols = symtab.parse(reader) +local events = memprof.parse(reader, symbols) + +stdout:write("ALLOCATIONS", "\n") +view.render(events.alloc, symbols) +stdout:write("\n") + +stdout:write("REALLOCATIONS", "\n") +view.render(events.realloc, symbols) +stdout:write("\n") + +stdout:write("DEALLOCATIONS", "\n") +view.render(events.free, symbols) +stdout:write("\n") diff --git a/tools/memprof/humanize.lua b/tools/memprof/humanize.lua new file mode 100644 index 0000000..109a39d --- /dev/null +++ b/tools/memprof/humanize.lua @@ -0,0 +1,45 @@ +-- Simple human-readable renderer of LuaJIT's memprof profile. +-- +-- Major portions taken verbatim or adapted from the LuaVela. +-- Copyright (C) 2015-2019 IPONWEB Ltd. + +local symtab = require "utils.symtab" + +local M = {} + +function M.render(events, symbols) + local ids = {} + + for id, _ in pairs(events) do + table.insert(ids, id) + end + + table.sort(ids, function(id1, id2) + return events[id1].num > events[id2].num + end) + + for i = 1, #ids do + local event = events[ids[i]] + print(string.format("%s: %d\t%d\t%d", + symtab.demangle(symbols, event.loc), + event.num, + event.alloc, + event.free + )) + + local prim_loc = {} + for _, loc in pairs(event.primary) do + table.insert(prim_loc, symtab.demangle(symbols, loc)) + end + if #prim_loc ~= 0 then + table.sort(prim_loc) + print("\tOverrides:") + for j = 1, #prim_loc do + print(string.format("\t\t%s", prim_loc[j])) + end + print("") + end + end +end + +return M diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua new file mode 100644 index 0000000..f4996f4 --- /dev/null +++ b/tools/memprof/parse.lua @@ -0,0 +1,188 @@ +-- Parser of LuaJIT's memprof binary stream. +-- The format spec can be found in . +-- +-- Major portions taken verbatim or adapted from the LuaVela. +-- Copyright (C) 2015-2019 IPONWEB Ltd. + +local bit = require "bit" +local band = bit.band +local lshift = bit.lshift + +local string_format = string.format + +local LJM_MAGIC = "ljm" +local LJM_CURRENT_VERSION = 1 + +local LJM_EPILOGUE_HEADER = 0x80 + +local AEVENT_ALLOC = 1 +local AEVENT_FREE = 2 +local AEVENT_REALLOC = 3 + +local ASOURCE_INT = lshift(1, 2) +local ASOURCE_LFUNC = lshift(2, 2) +local ASOURCE_CFUNC = lshift(3, 2) + +local M = {} + +local function new_event(loc) + return { + loc = loc, + num = 0, + free = 0, + alloc = 0, + primary = {}, + } +end + +local function link_to_previous(heap, e, oaddr) + -- Memory at oaddr was allocated before we started tracking. + local heap_chunk = heap[oaddr] + if heap_chunk then + -- Save Lua code location (line) by address (id). + e.primary[heap_chunk[2]] = heap_chunk[3] + end +end + +local function id_location(addr, line) + return string_format("f%#xl%d", addr, line), { + addr = addr, + line = line, + } +end + +local function parse_location(reader, asource) + if asource == ASOURCE_INT then + return id_location(0, 0) + elseif asource == ASOURCE_CFUNC then + return id_location(reader:read_uleb128(), 0) + elseif asource == ASOURCE_LFUNC then + return id_location(reader:read_uleb128(), reader:read_uleb128()) + end + error("Unknown asource "..asource) +end + +local function parse_alloc(reader, asource, events, heap) + local id, loc = parse_location(reader, asource) + + local naddr = reader:read_uleb128() + local nsize = reader:read_uleb128() + + if not events[id] then + events[id] = new_event(loc) + end + local e = events[id] + e.num = e.num + 1 + e.alloc = e.alloc + nsize + + heap[naddr] = {nsize, id, loc} +end + +local function parse_realloc(reader, asource, events, heap) + local id, loc = parse_location(reader, asource) + + local oaddr = reader:read_uleb128() + local osize = reader:read_uleb128() + local naddr = reader:read_uleb128() + local nsize = reader:read_uleb128() + + if not events[id] then + events[id] = new_event(loc) + end + local e = events[id] + e.num = e.num + 1 + e.free = e.free + osize + e.alloc = e.alloc + nsize + + link_to_previous(heap, e, oaddr) + + heap[oaddr] = nil + heap[naddr] = {nsize, id, loc} +end + +local function parse_free(reader, asource, events, heap) + local id, loc = parse_location(reader, asource) + + local oaddr = reader:read_uleb128() + local osize = reader:read_uleb128() + + if not events[id] then + events[id] = new_event(loc) + end + local e = events[id] + e.num = e.num + 1 + e.free = e.free + osize + + link_to_previous(heap, e, oaddr) + + heap[oaddr] = nil +end + +local parsers = { + [AEVENT_ALLOC] = {evname = "alloc", parse = parse_alloc}, + [AEVENT_FREE] = {evname = "free", parse = parse_free}, + [AEVENT_REALLOC] = {evname = "realloc", parse = parse_realloc}, +} + +local function ev_header_is_valid(evh) + return evh <= 0x0f or evh == LJM_EPILOGUE_HEADER +end + +-- Splits event header into event type (aka aevent = allocation +-- event) and event source (aka asource = allocation source). +local function ev_header_split(evh) + return band(evh, 0x3), band(evh, lshift(0x3, 2)) +end + +local function parse_event(reader, events) + local ev_header = reader:read_octet() + + assert(ev_header_is_valid(ev_header), "Bad ev_header "..ev_header) + + if ev_header == LJM_EPILOGUE_HEADER then + return false + end + + local aevent, asource = ev_header_split(ev_header) + local parser = parsers[aevent] + + assert(parser, "Bad aevent "..aevent) + + parser.parse(reader, asource, events[parser.evname], events.heap) + + return true +end + +function M.parse(reader) + local events = { + alloc = {}, + realloc = {}, + free = {}, + heap = {}, + } + + local magic = reader:read_octets(3) + local version = reader:read_octets(1) + -- Dummy-consume reserved bytes. + local _ = reader:read_octets(3) + + if magic ~= LJM_MAGIC then + error("Bad LJM format prologue: "..magic) + end + + if string.byte(version) ~= LJM_CURRENT_VERSION then + error(string_format( + "LJM format version mismatch: the tool expects %d, but your data is %d", + LJM_CURRENT_VERSION, + string.byte(version) + )) + end + + while parse_event(reader, events) do + -- Empty body. + end + + return events +end + +return M diff --git a/tools/utils/bufread.lua b/tools/utils/bufread.lua new file mode 100644 index 0000000..873e06a --- /dev/null +++ b/tools/utils/bufread.lua @@ -0,0 +1,147 @@ +-- An implementation of buffered reading data from +-- an arbitrary binary file. +-- +-- Major portions taken verbatim or adapted from the LuaVela. +-- Copyright (C) 2015-2019 IPONWEB Ltd. + +local assert = assert + +local ffi = require "ffi" +local bit = require "bit" + +local ffi_C = ffi.C +local band = bit.band + +local LINK_BIT = 0x80 +local PAYLOAD_MASK = 0x7f +local SHIFT_STEP = 7 + +-- 10 Mb. +local BUFFER_SIZE = 10 * 1024 * 1024 + +local M = {} + +ffi.cdef[[ + void *memcpy(void *, const void *, size_t); + + typedef struct FILE_ FILE; + FILE *fopen(const char *, const char *); + size_t fread(void *, size_t, size_t, FILE *); + int feof(FILE *); + int fclose(FILE *); +]] + +local function _read_stream(reader, n) + local tail_size = reader._end - reader._pos + + if tail_size >= n then + -- Enough data to satisfy the request of n bytes. + return true + end + + -- Otherwise carry tail_size bytes from the end of the buffer + -- to the start and fill up free_size bytes with fresh data. + -- tail_size < n <= free_size (see assert below) ensures that + -- we don't copy overlapping memory regions. + -- reader._pos == 0 means filling buffer for the first time. + + local free_size = reader._pos > 0 and reader._pos or n + + assert(n <= free_size, "Internal buffer is large enough") + + if tail_size ~= 0 then + ffi_C.memcpy(reader._buf, reader._buf + reader._pos, tail_size) + end + + local bytes_read = ffi_C.fread( + reader._buf + tail_size, 1, free_size, reader._file + ) + + reader._pos = 0 + reader._end = tail_size + bytes_read + + return reader._end - reader._pos >= n +end + +function M.read_octet(reader) + if not _read_stream(reader, 1) then + return nil + end + + local oct = reader._buf[reader._pos] + reader._pos = reader._pos + 1 + return oct +end + +function M.read_octets(reader, n) + if not _read_stream(reader, n) then + return nil + end + + local octets = ffi.string(reader._buf + reader._pos, n) + reader._pos = reader._pos + n + return octets +end + +function M.read_uleb128(reader) + local value = ffi.new("uint64_t", 0) + local shift = 0 + + repeat + local oct = M.read_octet(reader) + + if oct == nil then + error(string.format("fread, errno: %d", ffi.errno())) + end + + -- Alas, bit library works only with 32-bit arguments. + local oct_u64 = ffi.new("uint64_t", band(oct, PAYLOAD_MASK)) + value = value + oct_u64 * (2 ^ shift) + shift = shift + SHIFT_STEP + + until band(oct, LINK_BIT) == 0 + + return tonumber(value) +end + +function M.read_string(reader) + local len = M.read_uleb128(reader) + return M.read_octets(reader, len) +end + +function M.eof(reader) + local sys_feof = ffi_C.feof(reader._file) + if sys_feof == 0 then + return false + end + -- Otherwise return true only we have reached + -- the end of the buffer. + return reader._pos == reader._end +end + +function M.new(fname) + local file = ffi_C.fopen(fname, "rb") + if file == nil then + error(string.format("fopen, errno: %d", ffi.errno())) + end + + local finalizer = function(f) + if ffi_C.fclose(f) ~= 0 then + error(string.format("fclose, errno: %d", ffi.errno())) + end + ffi.gc(f, nil) + end + + local reader = setmetatable({ + _file = ffi.gc(file, finalizer), + _buf = ffi.new("uint8_t[?]", BUFFER_SIZE), + _pos = 0, + _end = 0, + }, {__index = M}) + + _read_stream(reader, BUFFER_SIZE) + + return reader +end + +return M diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua new file mode 100644 index 0000000..f3e5e31 --- /dev/null +++ b/tools/utils/symtab.lua @@ -0,0 +1,89 @@ +-- Parser of LuaJIT's symtab binary stream. +-- The format spec can be found in . +-- +-- Major portions taken verbatim or adapted from the LuaVela. +-- Copyright (C) 2015-2019 IPONWEB Ltd. + +local bit = require "bit" + +local band = bit.band +local string_format = string.format + +local LJS_MAGIC = "ljs" +local LJS_CURRENT_VERSION = 1 +local LJS_EPILOGUE_HEADER = 0x80 +local LJS_SYMTYPE_MASK = 0x03 + +local SYMTAB_LFUNC = 0 + +local M = {} + +-- Parse a single entry in a symtab: lfunc symbol. +local function parse_sym_lfunc(reader, symtab) + local sym_addr = reader:read_uleb128() + local sym_chunk = reader:read_string() + local sym_line = reader:read_uleb128() + + symtab[sym_addr] = { + name = string_format("%s:%d", sym_chunk, sym_line), + } +end + +local parsers = { + [SYMTAB_LFUNC] = parse_sym_lfunc, +} + +function M.parse(reader) + local symtab = {} + local magic = reader:read_octets(3) + local version = reader:read_octets(1) + + -- Dummy-consume reserved bytes. + local _ = reader:read_octets(3) + + if magic ~= LJS_MAGIC then + error("Bad LJS format prologue: "..magic) + end + + if string.byte(version) ~= LJS_CURRENT_VERSION then + error(string_format( + "LJS format version mismatch:".. + "the tool expects %d, but your data is %d", + LJS_CURRENT_VERSION, + string.byte(version) + )) + + end + + while not reader:eof() do + local header = reader:read_octet() + local is_final = band(header, LJS_EPILOGUE_HEADER) ~= 0 + + if is_final then + break + end + + local sym_type = band(header, LJS_SYMTYPE_MASK) + if parsers[sym_type] then + parsers[sym_type](reader, symtab) + end + end + + return symtab +end + +function M.demangle(symtab, loc) + local addr = loc.addr + + if addr == 0 then + return "INTERNAL" + end + + if symtab[addr] then + return string_format("%s, line %d", symtab[addr].name, loc.line) + end + + return string_format("CFUNC %#x", addr) +end + +return M -- 2.28.0