[Tarantool-patches] [PATCH luajit v2 7/7] tools: introduce a memory profile parser

Sergey Kaplun skaplun at tarantool.org
Fri Dec 25 18:26:09 MSK 2020


This patch adds a parser for binary data dumped via the memory profiler. It is
a set of the following Lua modules:
* utils/bufread.lua: read binary data from a file.
* utils/symtab.lua: symbol table decode functions
* memprof/parse.lua: decode the memory profiler event stream
* memprof/humanize.lua: display decoded data in human readable format
* memprof.lua: Lua script to display data

There is also a stand-alone bash script <luajit-parse-memprof> that displays
human readable parsed data to a stdout. It calls <memprof.lua> with a
corresponding LUA_PATH.

Part of tarantool/tarantool#5442
Part of tarantool/tarantool#5490
---

Changes in v2:
  - Add (un)?install sections in Makefile
  - Modify bash script correspondingly.
  - Change Lua modules layout.
  - Adjusted test. Check that errno returns in case of error is added.
  - Code clean up.

 Makefile                           |  39 +++++-
 test/misclib-memprof-lapi.test.lua | 135 +++++++++++++++++++++
 tools/luajit-parse-memprof         |   9 ++
 tools/memprof.lua                  | 109 +++++++++++++++++
 tools/memprof/humanize.lua         |  45 +++++++
 tools/memprof/parse.lua            | 188 +++++++++++++++++++++++++++++
 tools/utils/bufread.lua            | 147 ++++++++++++++++++++++
 tools/utils/symtab.lua             |  89 ++++++++++++++
 8 files changed, 757 insertions(+), 4 deletions(-)
 create mode 100755 test/misclib-memprof-lapi.test.lua
 create mode 100755 tools/luajit-parse-memprof
 create mode 100644 tools/memprof.lua
 create mode 100644 tools/memprof/humanize.lua
 create mode 100644 tools/memprof/parse.lua
 create mode 100644 tools/utils/bufread.lua
 create mode 100644 tools/utils/symtab.lua

diff --git a/Makefile b/Makefile
index 4a56917..ba4aa2f 100644
--- a/Makefile
+++ b/Makefile
@@ -37,6 +37,9 @@ INSTALL_INC=   $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER)
 
 INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(VERSION)
 INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit
+INSTALL_UTILSLIB= $(INSTALL_LJLIBD)/utils
+INSTALL_MEMPROFLIB= $(INSTALL_LJLIBD)/memprof
+INSTALL_TOOLSLIB= $(INSTALL_LJLIBD)
 INSTALL_LMODD= $(INSTALL_SHARE)/lua
 INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER)
 INSTALL_CMODD= $(INSTALL_LIB)/lua
@@ -54,6 +57,8 @@ INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib
 INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib
 INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib
 INSTALL_PCNAME= luajit.pc
+INSTALL_TMEMPROFNAME= luajit-$(VERSION)-parse-memprof
+INSTALL_TMEMPROFSYMNAME= luajit-parse-memprof
 
 INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME)
 INSTALL_DYN= $(INSTALL_LIB)/$(INSTALL_SONAME)
@@ -62,11 +67,15 @@ INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT2)
 INSTALL_T= $(INSTALL_BIN)/$(INSTALL_TNAME)
 INSTALL_TSYM= $(INSTALL_BIN)/$(INSTALL_TSYMNAME)
 INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME)
+INSTALL_TMEMPROF= $(INSTALL_BIN)/$(INSTALL_TMEMPROFNAME)
+INSTALL_TMEMPROFSYM= $(INSTALL_BIN)/$(INSTALL_TMEMPROFSYMNAME)
 
 INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \
-  $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD)
+  $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD) \
+  $(INSTALL_UTILSLIB) $(INSTALL_MEMPROFLIB) $(INSTALL_TOOLSLIB)
 UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) \
-  $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD)
+  $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD) \
+  $(INSTALL_UTILSLIB) $(INSTALL_MEMPROFLIB) $(INSTALL_TOOLSLIB)
 
 RM= rm -f
 MKDIR= mkdir -p
@@ -78,6 +87,8 @@ UNINSTALL= $(RM)
 LDCONFIG= ldconfig -n
 SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \
             -e "s|^multilib=.*|multilib=$(MULTILIB)|"
+SED_TMEMPROF= sed -e "s|^TOOL_DIR=.*|TOOL_DIR=$(INSTALL_TOOLSLIB)|" \
+                  -e "s|^LUAJIT_BIN=.*|LUAJIT_BIN=$(INSTALL_T)|"
 
 FILE_T= luajit
 FILE_A= libluajit.a
@@ -89,6 +100,10 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
 	      dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
 	      dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
 	      dis_mips64.lua dis_mips64el.lua vmdef.lua
+FILES_UTILSLIB= bufread.lua symtab.lua
+FILES_MEMPROFLIB= parse.lua humanize.lua
+FILES_TOOLSLIB= memprof.lua
+FILE_TMEMPROF= luajit-parse-memprof
 
 ifeq (,$(findstring Windows,$(OS)))
   HOST_SYS:= $(shell uname -s)
@@ -130,21 +145,37 @@ install: $(INSTALL_DEP)
 	  $(RM) $(FILE_PC).tmp
 	cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
 	cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
+	cd tools/utils && $(INSTALL_F) $(FILES_UTILSLIB) $(INSTALL_UTILSLIB)
+	cd tools/memprof && $(INSTALL_F) $(FILES_MEMPROFLIB) $(INSTALL_MEMPROFLIB)
+	cd tools && $(INSTALL_F) $(FILES_TOOLSLIB) $(INSTALL_TOOLSLIB)
+	cd tools && $(SED_TMEMPROF) $(FILE_TMEMPROF) > $(FILE_TMEMPROF).tmp && \
+	  $(INSTALL_X) $(FILE_TMEMPROF).tmp $(INSTALL_TMEMPROF) && \
+	  $(RM) $(FILE_TMEMPROF).tmp
 	@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
 	@echo ""
 	@echo "Note: the development releases deliberately do NOT install a symlink for luajit"
-	@echo "You can do this now by running this command (with sudo):"
+	@echo "You can do this now by running these commands (with sudo):"
 	@echo ""
 	@echo "  $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
+	@echo "  $(SYMLINK) $(INSTALL_TMEMPROFNAME) $(INSTALL_TMEMPROFSYM)"
 	@echo ""
 
 
 uninstall:
 	@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
-	$(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
+	$(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) $(INSTALL_TMEMPROF)
 	for file in $(FILES_JITLIB); do \
 	  $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
 	  done
+	for file in $(FILES_UTILSLIB); do \
+	  $(UNINSTALL) $(INSTALL_UTILSLIB)/$$file; \
+	  done
+	for file in $(FILES_MEMPROFLIB); do \
+	  $(UNINSTALL) $(INSTALL_MEMPROFLIB)/$$file; \
+	  done
+	for file in $(FILES_TOOLSLIB); do \
+	  $(UNINSTALL) $(INSTALL_TOOLSLIB)/$$file; \
+	  done
 	for file in $(FILES_INC); do \
 	  $(UNINSTALL) $(INSTALL_INC)/$$file; \
 	  done
diff --git a/test/misclib-memprof-lapi.test.lua b/test/misclib-memprof-lapi.test.lua
new file mode 100755
index 0000000..e02c6fa
--- /dev/null
+++ b/test/misclib-memprof-lapi.test.lua
@@ -0,0 +1,135 @@
+#!/usr/bin/env tarantool
+
+local tap = require('tap')
+
+local test = tap.test("misc-memprof-lapi")
+test:plan(9)
+
+jit.off()
+jit.flush()
+
+-- FIXME: Launch tests with LUA_PATH enviroment variable.
+local path = arg[0]:gsub('/[^/]+%.test%.lua', '')
+local path_suffix = '../tools/?.lua;'
+package.path = ('%s/%s;'):format(path, path_suffix)..package.path
+
+local table_new = require "table.new"
+
+local bufread = require "utils.bufread"
+local memprof = require "memprof.parse"
+local symtab = require "utils.symtab"
+
+local TMP_BINFILE = arg[0]:gsub('[^/]+%.test%.lua', '%.%1.memprofdata.tmp.bin')
+local BAD_PATH = arg[0]:gsub('[^/]+%.test%.lua', '%1/memprofdata.tmp.bin')
+
+local function payload()
+  -- Preallocate table to avoid array part reallocations.
+  local _ = table_new(100, 0)
+
+  -- Want too see 100 objects here.
+  for i = 1, 100 do
+    -- Try to avoid crossing with "test" module objects.
+    _[i] = "memprof-str-"..i
+  end
+
+  _ = nil
+  -- VMSTATE == GC, reported as INTERNAL.
+  collectgarbage()
+end
+
+local function generate_output(filename)
+  -- Clean up all garbage to avoid polution of free.
+  collectgarbage()
+
+  local res, err = misc.memprof.start(filename)
+  -- Should start succesfully.
+  assert(res, err)
+
+  payload()
+
+  res, err = misc.memprof.stop()
+  -- Should stop succesfully.
+  assert(res, err)
+end
+
+local function fill_ev_type(events, symbols, event_type)
+  local ev_type = {}
+  for _, event in pairs(events[event_type]) do
+    local addr = event.loc.addr
+    if addr == 0 then
+      ev_type.INTERNAL = {
+        name = "INTERNAL",
+        num = event.num,
+    }
+    elseif symbols[addr] then
+      ev_type[event.loc.line] = {
+        name = symbols[addr].name,
+        num = event.num,
+      }
+    end
+  end
+  return ev_type
+end
+
+local function check_alloc_report(alloc, line, function_line, nevents)
+  assert(string.format("@%s:%d", arg[0], function_line) == alloc[line].name)
+  assert(alloc[line].num == nevents, ("got=%d, ecpected=%d"):format(
+    alloc[line].num,
+    nevents
+  ))
+  return true
+end
+
+-- Not a directory.
+local res, err, errno = misc.memprof.start(BAD_PATH)
+test:ok(res == nil and err:match("Not a directory"))
+test:ok(type(errno) == "number")
+
+-- Profiler is running.
+res, err = misc.memprof.start(TMP_BINFILE)
+assert(res, err)
+res, err, errno = misc.memprof.start(TMP_BINFILE)
+test:ok(res == nil and err:match("profiler is running already"))
+test:ok(type(errno) == "number")
+
+res, err = misc.memprof.stop()
+assert(res, err)
+
+-- Profiler is not running.
+res, err, errno = misc.memprof.stop()
+test:ok(res == nil and err:match("profiler is not running"))
+test:ok(type(errno) == "number")
+
+-- Test profiler output and parse.
+res, err = pcall(generate_output, TMP_BINFILE)
+
+-- Want to cleanup carefully if something went wrong.
+if not res then
+  os.remove(TMP_BINFILE)
+  error(err)
+end
+
+local reader = bufread.new(TMP_BINFILE)
+local symbols = symtab.parse(reader)
+local events = memprof.parse(reader, symbols)
+
+-- We don't need it any more.
+os.remove(TMP_BINFILE)
+
+local alloc = fill_ev_type(events, symbols, "alloc")
+local free = fill_ev_type(events, symbols, "free")
+
+-- Check allocation reports. The second argument is a line number
+-- of the allocation event itself. The third is a line number of
+-- the corresponding function definition. The last one is
+-- the number of allocations.
+-- 1 event - alocation of table by itself + 1 allocation
+-- of array part as far it is bigger than LJ_MAX_COLOSIZE (16).
+test:ok(check_alloc_report(alloc, 27, 25, 2))
+-- 100 strings allocations.
+test:ok(check_alloc_report(alloc, 32, 25, 100))
+
+-- Collect all previous allocated objects.
+test:ok(free.INTERNAL.num == 102)
+
+os.exit(test:check() and 0 or 1)
diff --git a/tools/luajit-parse-memprof b/tools/luajit-parse-memprof
new file mode 100755
index 0000000..c814301
--- /dev/null
+++ b/tools/luajit-parse-memprof
@@ -0,0 +1,9 @@
+#!/bin/bash
+#
+# Launcher for memprof parser.
+
+# This two variables are replaced on installing.
+TOOL_DIR=$(dirname `readlink -f $0`)
+LUAJIT_BIN=$TOOL_DIR/../src/luajit
+
+LUA_PATH="$TOOL_DIR/?.lua;;" $LUAJIT_BIN $TOOL_DIR/memprof.lua $@
diff --git a/tools/memprof.lua b/tools/memprof.lua
new file mode 100644
index 0000000..7476757
--- /dev/null
+++ b/tools/memprof.lua
@@ -0,0 +1,109 @@
+-- A tool for parsing and visualisation of LuaJIT's memory
+-- profiler output.
+--
+-- TODO:
+-- * Think about callgraph memory profiling for complex
+--   table reallocations
+-- * Nicer output, probably an HTML view
+-- * Demangling of C symbols
+--
+-- Major portions taken verbatim or adapted from the LuaVela.
+-- Copyright (C) 2015-2019 IPONWEB Ltd.
+
+local bufread = require "utils.bufread"
+local memprof = require "memprof.parse"
+local symtab  = require "utils.symtab"
+local view    = require "memprof.humanize"
+
+local stdout, stderr = io.stdout, io.stderr
+local match, gmatch = string.match, string.gmatch
+
+-- Program options.
+local opt_map = {}
+
+function opt_map.help()
+  stdout:write [[
+luajit-parse-memprof - parser of the memory usage profile collected
+                       with LuaJIT's memprof.
+
+SYNOPSIS
+
+luajit-parse-memprof [options] memprof.bin
+
+Supported options are:
+
+  --help                            Show this help and exit
+]]
+  os.exit(0)
+end
+
+-- Print error and exit with error status.
+local function opterror(...)
+  stderr:write("luajit-parse-memprof.lua: ERROR: ", ...)
+  stderr:write("\n")
+  os.exit(1)
+end
+
+-- Parse single option.
+local function parseopt(opt, args)
+  local opt_current = #opt == 1 and "-"..opt or "--"..opt
+  local f = opt_map[opt]
+  if not f then
+    opterror("unrecognized option `", opt_current, "'. Try `--help'.\n")
+  end
+  f(args)
+end
+
+-- Parse arguments.
+local function parseargs(args)
+  -- Process all option arguments.
+  args.argn = 1
+  repeat
+    local a = args[args.argn]
+    if not a then
+      break
+    end
+    local lopt, opt = match(a, "^%-(%-?)(.+)")
+    if not opt then
+      break
+    end
+    args.argn = args.argn + 1
+    if lopt == "" then
+      -- Loop through short options.
+      for o in gmatch(opt, ".") do
+        parseopt(o, args)
+      end
+    else
+      -- Long option.
+      parseopt(opt, args)
+    end
+  until false
+
+  -- Check for proper number of arguments.
+  local nargs = #args - args.argn + 1
+  if nargs ~= 1 then
+    opt_map.help()
+  end
+
+  -- Translate a single input file.
+  -- TODO: Handle multiple files?
+  return args[args.argn]
+end
+
+local inputfile = parseargs{...}
+
+local reader  = bufread.new(inputfile)
+local symbols = symtab.parse(reader)
+local events  = memprof.parse(reader, symbols)
+
+stdout:write("ALLOCATIONS", "\n")
+view.render(events.alloc, symbols)
+stdout:write("\n")
+
+stdout:write("REALLOCATIONS", "\n")
+view.render(events.realloc, symbols)
+stdout:write("\n")
+
+stdout:write("DEALLOCATIONS", "\n")
+view.render(events.free, symbols)
+stdout:write("\n")
diff --git a/tools/memprof/humanize.lua b/tools/memprof/humanize.lua
new file mode 100644
index 0000000..109a39d
--- /dev/null
+++ b/tools/memprof/humanize.lua
@@ -0,0 +1,45 @@
+-- Simple human-readable renderer of LuaJIT's memprof profile.
+--
+-- Major portions taken verbatim or adapted from the LuaVela.
+-- Copyright (C) 2015-2019 IPONWEB Ltd.
+
+local symtab = require "utils.symtab"
+
+local M = {}
+
+function M.render(events, symbols)
+  local ids = {}
+
+  for id, _ in pairs(events) do
+    table.insert(ids, id)
+  end
+
+  table.sort(ids, function(id1, id2)
+    return events[id1].num > events[id2].num
+  end)
+
+  for i = 1, #ids do
+    local event = events[ids[i]]
+    print(string.format("%s: %d\t%d\t%d",
+      symtab.demangle(symbols, event.loc),
+      event.num,
+      event.alloc,
+      event.free
+    ))
+
+    local prim_loc = {}
+    for _, loc in pairs(event.primary) do
+      table.insert(prim_loc, symtab.demangle(symbols, loc))
+    end
+    if #prim_loc ~= 0 then
+      table.sort(prim_loc)
+      print("\tOverrides:")
+      for j = 1, #prim_loc do
+        print(string.format("\t\t%s", prim_loc[j]))
+      end
+      print("")
+    end
+  end
+end
+
+return M
diff --git a/tools/memprof/parse.lua b/tools/memprof/parse.lua
new file mode 100644
index 0000000..f4996f4
--- /dev/null
+++ b/tools/memprof/parse.lua
@@ -0,0 +1,188 @@
+-- Parser of LuaJIT's memprof binary stream.
+-- The format spec can be found in <src/lj_memprof.h>.
+--
+-- Major portions taken verbatim or adapted from the LuaVela.
+-- Copyright (C) 2015-2019 IPONWEB Ltd.
+
+local bit = require "bit"
+local band = bit.band
+local lshift = bit.lshift
+
+local string_format = string.format
+
+local LJM_MAGIC = "ljm"
+local LJM_CURRENT_VERSION = 1
+
+local LJM_EPILOGUE_HEADER = 0x80
+
+local AEVENT_ALLOC = 1
+local AEVENT_FREE = 2
+local AEVENT_REALLOC = 3
+
+local ASOURCE_INT = lshift(1, 2)
+local ASOURCE_LFUNC = lshift(2, 2)
+local ASOURCE_CFUNC = lshift(3, 2)
+
+local M = {}
+
+local function new_event(loc)
+  return {
+    loc = loc,
+    num = 0,
+    free = 0,
+    alloc = 0,
+    primary = {},
+  }
+end
+
+local function link_to_previous(heap, e, oaddr)
+  -- Memory at oaddr was allocated before we started tracking.
+  local heap_chunk = heap[oaddr]
+  if heap_chunk then
+    -- Save Lua code location (line) by address (id).
+    e.primary[heap_chunk[2]] = heap_chunk[3]
+  end
+end
+
+local function id_location(addr, line)
+  return string_format("f%#xl%d", addr, line), {
+    addr = addr,
+    line = line,
+  }
+end
+
+local function parse_location(reader, asource)
+  if asource == ASOURCE_INT then
+    return id_location(0, 0)
+  elseif asource == ASOURCE_CFUNC then
+    return id_location(reader:read_uleb128(), 0)
+  elseif asource == ASOURCE_LFUNC then
+    return id_location(reader:read_uleb128(), reader:read_uleb128())
+  end
+  error("Unknown asource "..asource)
+end
+
+local function parse_alloc(reader, asource, events, heap)
+  local id, loc = parse_location(reader, asource)
+
+  local naddr = reader:read_uleb128()
+  local nsize = reader:read_uleb128()
+
+  if not events[id] then
+    events[id] = new_event(loc)
+  end
+  local e = events[id]
+  e.num = e.num + 1
+  e.alloc = e.alloc + nsize
+
+  heap[naddr] = {nsize, id, loc}
+end
+
+local function parse_realloc(reader, asource, events, heap)
+  local id, loc = parse_location(reader, asource)
+
+  local oaddr = reader:read_uleb128()
+  local osize = reader:read_uleb128()
+  local naddr = reader:read_uleb128()
+  local nsize = reader:read_uleb128()
+
+  if not events[id] then
+    events[id] = new_event(loc)
+  end
+  local e = events[id]
+  e.num = e.num + 1
+  e.free = e.free + osize
+  e.alloc = e.alloc + nsize
+
+  link_to_previous(heap, e, oaddr)
+
+  heap[oaddr] = nil
+  heap[naddr] = {nsize, id, loc}
+end
+
+local function parse_free(reader, asource, events, heap)
+  local id, loc = parse_location(reader, asource)
+
+  local oaddr = reader:read_uleb128()
+  local osize = reader:read_uleb128()
+
+  if not events[id] then
+    events[id] = new_event(loc)
+  end
+  local e = events[id]
+  e.num = e.num + 1
+  e.free = e.free + osize
+
+  link_to_previous(heap, e, oaddr)
+
+  heap[oaddr] = nil
+end
+
+local parsers = {
+  [AEVENT_ALLOC] = {evname = "alloc", parse = parse_alloc},
+  [AEVENT_FREE] = {evname = "free", parse = parse_free},
+  [AEVENT_REALLOC] = {evname = "realloc", parse = parse_realloc},
+}
+
+local function ev_header_is_valid(evh)
+  return evh <= 0x0f or evh == LJM_EPILOGUE_HEADER
+end
+
+-- Splits event header into event type (aka aevent = allocation
+-- event) and event source (aka asource = allocation source).
+local function ev_header_split(evh)
+  return band(evh, 0x3), band(evh, lshift(0x3, 2))
+end
+
+local function parse_event(reader, events)
+  local ev_header = reader:read_octet()
+
+  assert(ev_header_is_valid(ev_header), "Bad ev_header "..ev_header)
+
+  if ev_header == LJM_EPILOGUE_HEADER then
+    return false
+  end
+
+  local aevent, asource = ev_header_split(ev_header)
+  local parser = parsers[aevent]
+
+  assert(parser, "Bad aevent "..aevent)
+
+  parser.parse(reader, asource, events[parser.evname], events.heap)
+
+  return true
+end
+
+function M.parse(reader)
+  local events = {
+    alloc = {},
+    realloc = {},
+    free = {},
+    heap = {},
+  }
+
+  local magic = reader:read_octets(3)
+  local version = reader:read_octets(1)
+  -- Dummy-consume reserved bytes.
+  local _ = reader:read_octets(3)
+
+  if magic ~= LJM_MAGIC then
+    error("Bad LJM format prologue: "..magic)
+  end
+
+  if string.byte(version) ~= LJM_CURRENT_VERSION then
+    error(string_format(
+      "LJM format version mismatch: the tool expects %d, but your data is %d",
+      LJM_CURRENT_VERSION,
+      string.byte(version)
+    ))
+  end
+
+  while parse_event(reader, events) do
+    -- Empty body.
+  end
+
+  return events
+end
+
+return M
diff --git a/tools/utils/bufread.lua b/tools/utils/bufread.lua
new file mode 100644
index 0000000..873e06a
--- /dev/null
+++ b/tools/utils/bufread.lua
@@ -0,0 +1,147 @@
+-- An implementation of buffered reading data from
+-- an arbitrary binary file.
+--
+-- Major portions taken verbatim or adapted from the LuaVela.
+-- Copyright (C) 2015-2019 IPONWEB Ltd.
+
+local assert = assert
+
+local ffi = require "ffi"
+local bit = require "bit"
+
+local ffi_C = ffi.C
+local band = bit.band
+
+local LINK_BIT = 0x80
+local PAYLOAD_MASK = 0x7f
+local SHIFT_STEP = 7
+
+-- 10 Mb.
+local BUFFER_SIZE = 10 * 1024 * 1024
+
+local M = {}
+
+ffi.cdef[[
+  void *memcpy(void *, const void *, size_t);
+
+  typedef struct FILE_ FILE;
+  FILE *fopen(const char *, const char *);
+  size_t fread(void *, size_t, size_t, FILE *);
+  int feof(FILE *);
+  int fclose(FILE *);
+]]
+
+local function _read_stream(reader, n)
+  local tail_size = reader._end - reader._pos
+
+  if tail_size >= n then
+    -- Enough data to satisfy the request of n bytes.
+    return true
+  end
+
+  -- Otherwise carry tail_size bytes from the end of the buffer
+  -- to the start and fill up free_size bytes with fresh data.
+  -- tail_size < n <= free_size (see assert below) ensures that
+  -- we don't copy overlapping memory regions.
+  -- reader._pos == 0 means filling buffer for the first time.
+
+  local free_size = reader._pos > 0 and reader._pos or n
+
+  assert(n <= free_size, "Internal buffer is large enough")
+
+  if tail_size ~= 0 then
+    ffi_C.memcpy(reader._buf, reader._buf + reader._pos, tail_size)
+  end
+
+  local bytes_read = ffi_C.fread(
+    reader._buf + tail_size, 1, free_size, reader._file
+  )
+
+  reader._pos = 0
+  reader._end = tail_size + bytes_read
+
+  return reader._end - reader._pos >= n
+end
+
+function M.read_octet(reader)
+  if not _read_stream(reader, 1) then
+    return nil
+  end
+
+  local oct = reader._buf[reader._pos]
+  reader._pos = reader._pos + 1
+  return oct
+end
+
+function M.read_octets(reader, n)
+  if not _read_stream(reader, n) then
+    return nil
+  end
+
+  local octets = ffi.string(reader._buf + reader._pos, n)
+  reader._pos = reader._pos + n
+  return octets
+end
+
+function M.read_uleb128(reader)
+  local value = ffi.new("uint64_t", 0)
+  local shift = 0
+
+  repeat
+    local oct = M.read_octet(reader)
+
+    if oct == nil then
+      error(string.format("fread, errno: %d", ffi.errno()))
+    end
+
+    -- Alas, bit library works only with 32-bit arguments.
+    local oct_u64 = ffi.new("uint64_t", band(oct, PAYLOAD_MASK))
+    value = value + oct_u64 * (2 ^ shift)
+    shift = shift + SHIFT_STEP
+
+  until band(oct, LINK_BIT) == 0
+
+  return tonumber(value)
+end
+
+function M.read_string(reader)
+  local len = M.read_uleb128(reader)
+  return M.read_octets(reader, len)
+end
+
+function M.eof(reader)
+  local sys_feof = ffi_C.feof(reader._file)
+  if sys_feof == 0 then
+    return false
+  end
+  -- Otherwise return true only we have reached
+  -- the end of the buffer.
+  return reader._pos == reader._end
+end
+
+function M.new(fname)
+  local file = ffi_C.fopen(fname, "rb")
+  if file == nil then
+    error(string.format("fopen, errno: %d", ffi.errno()))
+  end
+
+  local finalizer = function(f)
+    if ffi_C.fclose(f) ~= 0 then
+      error(string.format("fclose, errno: %d", ffi.errno()))
+    end
+    ffi.gc(f, nil)
+  end
+
+  local reader = setmetatable({
+    _file = ffi.gc(file, finalizer),
+    _buf = ffi.new("uint8_t[?]", BUFFER_SIZE),
+    _pos = 0,
+    _end = 0,
+  }, {__index = M})
+
+  _read_stream(reader, BUFFER_SIZE)
+
+  return reader
+end
+
+return M
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
new file mode 100644
index 0000000..f3e5e31
--- /dev/null
+++ b/tools/utils/symtab.lua
@@ -0,0 +1,89 @@
+-- Parser of LuaJIT's symtab binary stream.
+-- The format spec can be found in <src/lj_symtab.h>.
+--
+-- Major portions taken verbatim or adapted from the LuaVela.
+-- Copyright (C) 2015-2019 IPONWEB Ltd.
+
+local bit = require "bit"
+
+local band = bit.band
+local string_format = string.format
+
+local LJS_MAGIC = "ljs"
+local LJS_CURRENT_VERSION = 1
+local LJS_EPILOGUE_HEADER = 0x80
+local LJS_SYMTYPE_MASK = 0x03
+
+local SYMTAB_LFUNC = 0
+
+local M = {}
+
+-- Parse a single entry in a symtab: lfunc symbol.
+local function parse_sym_lfunc(reader, symtab)
+  local sym_addr = reader:read_uleb128()
+  local sym_chunk = reader:read_string()
+  local sym_line = reader:read_uleb128()
+
+  symtab[sym_addr] = {
+    name = string_format("%s:%d", sym_chunk, sym_line),
+  }
+end
+
+local parsers = {
+  [SYMTAB_LFUNC] = parse_sym_lfunc,
+}
+
+function M.parse(reader)
+  local symtab = {}
+  local magic = reader:read_octets(3)
+  local version = reader:read_octets(1)
+
+  -- Dummy-consume reserved bytes.
+  local _ = reader:read_octets(3)
+
+  if magic ~= LJS_MAGIC then
+    error("Bad LJS format prologue: "..magic)
+  end
+
+  if string.byte(version) ~= LJS_CURRENT_VERSION then
+    error(string_format(
+         "LJS format version mismatch:"..
+         "the tool expects %d, but your data is %d",
+         LJS_CURRENT_VERSION,
+         string.byte(version)
+    ))
+
+  end
+
+  while not reader:eof() do
+    local header = reader:read_octet()
+    local is_final = band(header, LJS_EPILOGUE_HEADER) ~= 0
+
+    if is_final then
+      break
+    end
+
+    local sym_type = band(header, LJS_SYMTYPE_MASK)
+    if parsers[sym_type] then
+      parsers[sym_type](reader, symtab)
+    end
+  end
+
+  return symtab
+end
+
+function M.demangle(symtab, loc)
+  local addr = loc.addr
+
+  if addr == 0 then
+    return "INTERNAL"
+  end
+
+  if symtab[addr] then
+    return string_format("%s, line %d", symtab[addr].name, loc.line)
+  end
+
+  return string_format("CFUNC %#x", addr)
+end
+
+return M
-- 
2.28.0



More information about the Tarantool-patches mailing list