[Tarantool-patches] [PATCH luajit v2 5/7] core: introduce memory profiler

Sergey Kaplun skaplun at tarantool.org
Fri Dec 25 18:26:07 MSK 2020


This patch introduces memory profiler for Lua machine.

First of all profiler dumps the definitions of all loaded Lua functions
(symtab) via the write buffer introduced in one of the previous patches.

Profiler replaces the old allocation function with the instrumented one
after symtab is dumped. This new function reports all allocations,
reallocations or deallocations events via the write buffer during
profiling. Subsequent content depends on the function's type (LFUNC,
FFUNC or CFUNC).

To divide all traces into the one vmstate when being profiled, a special
macro LJ_VMST_TRACE equal to LJ_VMST__MAX is introduced.

When profiling is over, a special epilogue event header is written and
the old allocation function is restored back.

This change also makes debug_frameline function LuaJIT-wide visible to
be used in the memory profiler.

For more information, see <lj_memprof.h>.

Part of tarantool/tarantool#5442
---

Changes in v2:
  - Merged with debug-to-public commit and symtab.
  - Drop [T]imer bit description.

 src/Makefile     |   8 +-
 src/Makefile.dep |  31 ++--
 src/lj_arch.h    |  22 +++
 src/lj_debug.c   |   8 +-
 src/lj_debug.h   |   3 +
 src/lj_memprof.c | 430 +++++++++++++++++++++++++++++++++++++++++++++++
 src/lj_memprof.h | 165 ++++++++++++++++++
 src/lj_obj.h     |   8 +
 src/lj_state.c   |   8 +
 src/ljamalg.c    |   1 +
 10 files changed, 665 insertions(+), 19 deletions(-)
 create mode 100644 src/lj_memprof.c
 create mode 100644 src/lj_memprof.h

diff --git a/src/Makefile b/src/Makefile
index 384b590..3218dfd 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -113,6 +113,12 @@ XCFLAGS=
 # Enable GC64 mode for x64.
 #XCFLAGS+= -DLUAJIT_ENABLE_GC64
 #
+# Disable the memory profiler.
+#XCFLAGS+= -DLUAJIT_DISABLE_MEMPROF
+#
+# Disable the thread safe profiler.
+#XCFLAGS+= -DLUAJIT_DISABLE_THREAD_SAFE
+#
 ##############################################################################
 
 ##############################################################################
@@ -489,7 +495,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o lj_wbuf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
 	  lj_strfmt.o lj_strfmt_num.o lj_api.o lj_mapi.o lj_profile.o \
-	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
+	  lj_memprof.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
 	  lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 59ed450..8ae14a5 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -147,6 +147,9 @@ lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
  lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
+lj_memprof.o: lj_memprof.c lj_memprof.h lj_def.h lua.h luaconf.h \
+ lj_obj.h lj_arch.h lj_frame.h lj_bc.h lj_jit.h lj_ir.h lj_gc.h lj_debug.h \
+ lj_wbuf.h
 lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
 lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_ir.h lj_jit.h lj_iropt.h
@@ -220,20 +223,20 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
  lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_wbuf.c lj_wbuf.h lj_utils.h \
  lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \
  lj_debug.c lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c \
- lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c \
- lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_api.c lj_mapi.c lmisclib.h \
- lj_profile.c lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
- lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c \
- lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
- lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
- lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
- lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
- lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
- lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
- lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
- lj_utils_leb128.c lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c \
- lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
- lib_ffi.c lib_misc.c lib_init.c
+ lj_ccallback.h lj_profile.h lj_memprof.h lj_vmevent.c lj_vmevent.h \
+ lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_api.c lj_mapi.c \
+ lmisclib.h lj_profile.c lj_memprof.c lj_lex.c lualib.h lj_parse.h lj_parse.c \
+ lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c \
+ lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h \
+ lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \
+ lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h lj_opt_mem.c \
+ lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \
+ lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c \
+ lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h \
+ lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c \
+ lj_alloc.c lj_utils_leb128.c lib_aux.c lib_base.c lj_libdef.h lib_math.c \
+ lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c \
+ lib_bit.c lib_jit.c lib_ffi.c lib_misc.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
 host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
  lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
diff --git a/src/lj_arch.h b/src/lj_arch.h
index c8d7138..5967849 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -213,6 +213,8 @@
 #define LJ_ARCH_VERSION		50
 #endif
 
+#define LJ_ARCH_NOMEMPROF	1
+
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
 
 #define LJ_ARCH_BITS		64
@@ -234,6 +236,8 @@
 
 #define LJ_ARCH_VERSION		80
 
+#define LJ_ARCH_NOMEMPROF	1
+
 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
 
 #ifndef LJ_ARCH_ENDIAN
@@ -299,6 +303,8 @@
 #define LJ_ARCH_XENON		1
 #endif
 
+#define LJ_ARCH_NOMEMPROF	1
+
 #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
 
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
@@ -358,6 +364,8 @@
 #define LJ_ARCH_VERSION		10
 #endif
 
+#define LJ_ARCH_NOMEMPROF	1
+
 #else
 #error "No target architecture defined"
 #endif
@@ -564,4 +572,18 @@
 #define LJ_52			0
 #endif
 
+/* Disable or enable the memory profiler. */
+#if defined(LUAJIT_DISABLE_MEMPROF) || defined(LJ_ARCH_NOMEMPROF) || LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_XBOX360
+#define LJ_HASMEMPROF		0
+#else
+#define LJ_HASMEMPROF		1
+#endif
+
+/* Disable or enable the memory profiler's thread safety. */
+#if defined(LUAJIT_DISABLE_THREAD_SAFE) || LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
+#define LJ_THREAD_SAFE		0
+#else
+#define LJ_THREAD_SAFE		1
+#endif
+
 #endif
diff --git a/src/lj_debug.c b/src/lj_debug.c
index 73bd196..bb9ab28 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -128,7 +128,7 @@ BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc)
 }
 
 /* Get line number for function/frame. */
-static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
+BCLine lj_debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
 {
   BCPos pc = debug_framepc(L, fn, nextframe);
   if (pc != NO_BCPOS) {
@@ -353,7 +353,7 @@ void lj_debug_addloc(lua_State *L, const char *msg,
   if (frame) {
     GCfunc *fn = frame_func(frame);
     if (isluafunc(fn)) {
-      BCLine line = debug_frameline(L, fn, nextframe);
+      BCLine line = lj_debug_frameline(L, fn, nextframe);
       if (line >= 0) {
 	GCproto *pt = funcproto(fn);
 	char buf[LUA_IDSIZE];
@@ -470,7 +470,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
 	ar->what = "C";
       }
     } else if (*what == 'l') {
-      ar->currentline = frame ? debug_frameline(L, fn, nextframe) : -1;
+      ar->currentline = frame ? lj_debug_frameline(L, fn, nextframe) : -1;
     } else if (*what == 'u') {
       ar->nups = fn->c.nupvalues;
       if (ext) {
@@ -616,7 +616,7 @@ void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
 	    GCproto *pt = funcproto(fn);
 	    if (debug_putchunkname(sb, pt, pathstrip)) {
 	      /* Regular Lua function. */
-	      BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) :
+	      BCLine line = c == 'l' ? lj_debug_frameline(L, fn, nextframe) :
 				       pt->firstline;
 	      lj_buf_putb(sb, ':');
 	      lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline);
diff --git a/src/lj_debug.h b/src/lj_debug.h
index 5917c00..a157d28 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -40,6 +40,9 @@ LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
 LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
 LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
 			     int ext);
+#if LJ_HASMEMPROF
+LJ_FUNC BCLine lj_debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe);
+#endif
 #if LJ_HASPROFILE
 LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt,
 				int depth);
diff --git a/src/lj_memprof.c b/src/lj_memprof.c
new file mode 100644
index 0000000..e0df057
--- /dev/null
+++ b/src/lj_memprof.c
@@ -0,0 +1,430 @@
+/*
+** Implementation of memory profiler.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#define lj_memprof_c
+#define LUA_CORE
+
+#include <errno.h>
+
+#include "lj_memprof.h"
+#include "lj_def.h"
+#include "lj_arch.h"
+
+#if LJ_HASMEMPROF
+
+#if LJ_IS_THREAD_SAFE
+#include <pthread.h>
+#endif
+
+#include "lua.h"
+
+#include "lj_obj.h"
+#include "lj_frame.h"
+#include "lj_debug.h"
+#include "lj_gc.h"
+#include "lj_wbuf.h"
+
+/* --------------------------------- Symtab --------------------------------- */
+
+static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION,
+					   0x0, 0x0, 0x0};
+
+static void symtab_write_prologue(struct lj_wbuf *out)
+{
+  const size_t len = sizeof(ljs_header) / sizeof(ljs_header[0]);
+  lj_wbuf_addn(out, ljs_header, len);
+}
+
+static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
+{
+  const GCRef *iter = &g->gc.root;
+  const GCobj *o;
+
+  symtab_write_prologue(out);
+
+  while (NULL != (o = gcref(*iter))) {
+    switch (o->gch.gct) {
+    case (~LJ_TPROTO): {
+      const GCproto *pt = gco2pt(o);
+      lj_wbuf_addbyte(out, SYMTAB_LFUNC);
+      lj_wbuf_addu64(out, (uintptr_t)pt);
+      lj_wbuf_addstring(out, proto_chunknamestr(pt));
+      lj_wbuf_addu64(out, (uint64_t)pt->firstline);
+      break;
+    }
+    default:
+      break;
+    }
+    iter = &o->gch.nextgc;
+  }
+
+  lj_wbuf_addbyte(out, SYMTAB_FINAL);
+}
+
+/* ---------------------------- Memory profiler ----------------------------- */
+
+enum memprof_state {
+  /* memprof is not running. */
+  MPS_IDLE,
+  /* memprof is running. */
+  MPS_PROFILE,
+  /*
+  ** Stopped in case of stopped stream.
+  ** Saved errno is returned to user at memprof_stop.
+  */
+  MPS_HALT
+};
+
+struct alloc {
+  lua_Alloc allocf; /* Allocating function. */
+  void *state; /* Opaque allocator's state. */
+};
+
+struct memprof {
+  global_State *g; /* Profiled VM. */
+  enum memprof_state state; /* Internal state. */
+  struct lj_wbuf out; /* Output accumulator. */
+  struct alloc orig_alloc; /* Original allocator. */
+  struct lua_Prof_options opt; /* Profiling options. */
+  int saved_errno; /* Saved errno when profiler deinstrumented. */
+};
+
+#if LJ_IS_THREAD_SAFE
+
+pthread_mutex_t memprof_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static LJ_AINLINE int memprof_lock(void)
+{
+  return pthread_mutex_lock(&memprof_mutex);
+}
+
+static LJ_AINLINE int memprof_unlock(void)
+{
+  return pthread_mutex_unlock(&memprof_mutex);
+}
+
+#else /* LJ_IS_THREAD_SAFE */
+
+#define memprof_lock()
+#define memprof_unlock()
+
+#endif /* LJ_IS_THREAD_SAFE */
+
+static struct memprof memprof = {0};
+
+const unsigned char ljm_header[] = {'l', 'j', 'm', LJM_CURRENT_FORMAT_VERSION,
+				    0x0, 0x0, 0x0};
+
+static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t header,
+				GCfunc *fn, struct lua_State *L,
+				cTValue *nextframe)
+{
+  const BCLine line = lj_debug_frameline(L, fn, nextframe);
+  lj_wbuf_addbyte(out, header | ASOURCE_LFUNC);
+  lj_wbuf_addu64(out, (uintptr_t)funcproto(fn));
+  lj_wbuf_addu64(out, line >= 0 ? (uintptr_t)line : 0);
+}
+
+static void memprof_write_cfunc(struct lj_wbuf *out, uint8_t header,
+				const GCfunc *fn)
+{
+  lj_wbuf_addbyte(out, header | ASOURCE_CFUNC);
+  lj_wbuf_addu64(out, (uintptr_t)fn->c.f);
+}
+
+static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t header,
+				GCfunc *fn, struct lua_State *L,
+				cTValue *frame)
+{
+  cTValue *pframe = frame_prev(frame);
+  GCfunc *pfn = frame_func(pframe);
+
+  /*
+  ** XXX: If a fast function is called by a Lua function, report the
+  ** Lua function for more meaningful output. Otherwise report the fast
+  ** function as a C function.
+  */
+  if (pfn != NULL && isluafunc(pfn))
+    memprof_write_lfunc(out, header, pfn, L, frame);
+  else
+    memprof_write_cfunc(out, header, fn);
+}
+
+static void memprof_write_func(struct memprof *mp, uint8_t header)
+{
+  struct lj_wbuf *out = &mp->out;
+  lua_State *L = gco2th(gcref(mp->g->mem_L));
+  cTValue *frame = L->base - 1;
+  GCfunc *fn;
+
+  fn = frame_func(frame);
+
+  if (isluafunc(fn))
+    memprof_write_lfunc(out, header, fn, L, NULL);
+  else if (isffunc(fn))
+    memprof_write_ffunc(out, header, fn, L, frame);
+  else if (iscfunc(fn))
+    memprof_write_cfunc(out, header, fn);
+  else
+    lua_assert(0);
+}
+
+static void memprof_write_hvmstate(struct memprof *mp, uint8_t header)
+{
+  lj_wbuf_addbyte(&mp->out, header | ASOURCE_INT);
+}
+
+/*
+** XXX: In ideal world, we should report allocations from traces as well.
+** But since traces must follow the semantics of the original code, behaviour of
+** Lua and JITted code must match 1:1 in terms of allocations, which makes
+** using memprof with enabled JIT virtually redundant. Hence the stub below.
+*/
+static void memprof_write_trace(struct memprof *mp, uint8_t header)
+{
+  lj_wbuf_addbyte(&mp->out, header | ASOURCE_INT);
+}
+
+typedef void (*memprof_writer)(struct memprof *mp, uint8_t header);
+
+static const memprof_writer memprof_writers[] = {
+  memprof_write_hvmstate, /* LJ_VMST_INTERP */
+  memprof_write_func, /* LJ_VMST_LFUNC */
+  memprof_write_func, /* LJ_VMST_FFUNC */
+  memprof_write_func, /* LJ_VMST_CFUNC */
+  memprof_write_hvmstate, /* LJ_VMST_GC */
+  memprof_write_hvmstate, /* LJ_VMST_EXIT */
+  memprof_write_hvmstate, /* LJ_VMST_RECORD */
+  memprof_write_hvmstate, /* LJ_VMST_OPT */
+  memprof_write_hvmstate, /* LJ_VMST_ASM */
+  memprof_write_trace /* LJ_VMST_TRACE */
+};
+
+static void memprof_write_caller(struct memprof *mp, uint8_t aevent)
+{
+  const global_State *g = mp->g;
+  const uint32_t _vmstate = (uint32_t)~g->vmstate;
+  const uint32_t vmstate = _vmstate < LJ_VMST_TRACE ? _vmstate : LJ_VMST_TRACE;
+  const uint8_t header = aevent;
+
+  memprof_writers[vmstate](mp, header);
+}
+
+static int memprof_stop(const struct lua_State *L);
+
+static void *memprof_allocf(void *ud, void *ptr, size_t osize, size_t nsize)
+{
+  struct memprof *mp = &memprof;
+  struct alloc *oalloc = &mp->orig_alloc;
+  struct lj_wbuf *out = &mp->out;
+  void *nptr;
+
+  lua_assert(MPS_PROFILE == mp->state);
+  lua_assert(oalloc->allocf != memprof_allocf);
+  lua_assert(oalloc->allocf != NULL);
+  lua_assert(ud == oalloc->state);
+
+  nptr = oalloc->allocf(ud, ptr, osize, nsize);
+
+  if (nsize == 0) {
+    memprof_write_caller(mp, AEVENT_FREE);
+    lj_wbuf_addu64(out, (uintptr_t)ptr);
+    lj_wbuf_addu64(out, (uint64_t)osize);
+  } else if (ptr == NULL) {
+    memprof_write_caller(mp, AEVENT_ALLOC);
+    lj_wbuf_addu64(out, (uintptr_t)nptr);
+    lj_wbuf_addu64(out, (uint64_t)nsize);
+  } else {
+    memprof_write_caller(mp, AEVENT_REALLOC);
+    lj_wbuf_addu64(out, (uintptr_t)ptr);
+    lj_wbuf_addu64(out, (uint64_t)osize);
+    lj_wbuf_addu64(out, (uintptr_t)nptr);
+    lj_wbuf_addu64(out, (uint64_t)nsize);
+  }
+
+  /* Deinstrument memprof if required. */
+  if (LJ_UNLIKELY(lj_wbuf_test_flag(out, STREAM_STOP)))
+    memprof_stop(NULL);
+
+  return nptr;
+}
+
+static void memprof_write_prologue(struct lj_wbuf *out)
+{
+  const size_t len = sizeof(ljm_header) / sizeof(ljm_header[0]);
+  lj_wbuf_addn(out, ljm_header, len);
+}
+
+int lj_memprof_start(struct lua_State *L, const struct lua_Prof_options *opt)
+{
+  struct memprof *mp = &memprof;
+  struct alloc *oalloc = &mp->orig_alloc;
+
+  lua_assert(opt->writer != NULL && opt->on_stop != NULL);
+  lua_assert(opt->buf != NULL && opt->len != 0);
+
+  memprof_lock();
+
+  if (mp->state != MPS_IDLE) {
+    memprof_unlock();
+    return PROFILE_ERRRUN;
+  }
+
+  /* Discard possible old errno. */
+  mp->saved_errno = 0;
+
+  /* Init options: */
+  memcpy(&mp->opt, opt, sizeof(*opt));
+
+  /* Init general fields: */
+  mp->g = G(L);
+  mp->state = MPS_PROFILE;
+
+  /* Init output: */
+  lj_wbuf_init(&mp->out, mp->opt.writer, mp->opt.ctx, mp->opt.buf,
+	       mp->opt.len);
+  dump_symtab(&mp->out, mp->g);
+  memprof_write_prologue(&mp->out);
+
+  if (LJ_UNLIKELY(lj_wbuf_test_flag(&mp->out, STREAM_ERR_IO) ||
+		  lj_wbuf_test_flag(&mp->out, STREAM_STOP))) {
+    /* on_stop call may change errno value. */
+    int saved_errno = lj_wbuf_errno(&mp->out);
+    mp->opt.on_stop(mp->opt.ctx, mp->opt.buf);
+    lj_wbuf_terminate(&mp->out);
+    mp->state = MPS_IDLE;
+    memprof_unlock();
+    errno = saved_errno;
+    return PROFILE_ERRIO;
+  }
+
+  /* Override allocating function: */
+  oalloc->allocf = lua_getallocf(L, &oalloc->state);
+  lua_assert(oalloc->allocf != NULL);
+  lua_assert(oalloc->allocf != memprof_allocf);
+  lua_assert(oalloc->state != NULL);
+  lua_setallocf(L, memprof_allocf, oalloc->state);
+
+  memprof_unlock();
+  return PROFILE_SUCCESS;
+}
+
+static int memprof_stop(const struct lua_State *L)
+{
+  struct memprof *mp = &memprof;
+  struct alloc *oalloc = &mp->orig_alloc;
+  struct lj_wbuf *out = &mp->out;
+  int return_status = PROFILE_SUCCESS;
+  int saved_errno = 0;
+  struct lua_State *main_L;
+  int cb_status;
+
+  memprof_lock();
+
+  if (mp->state == MPS_HALT) {
+    errno = mp->saved_errno;
+    mp->state = MPS_IDLE
+    memprof_unlock();
+    return PROFILE_ERRIO;
+  }
+
+  if (mp->state != MPS_PROFILE) {
+    memprof_unlock();
+    return PROFILE_ERRRUN;
+  }
+
+  if (L != NULL && mp->g != G(L)) {
+    memprof_unlock();
+    return PROFILE_ERR;
+  }
+
+  mp->state = MPS_IDLE;
+
+  lua_assert(mp->g != NULL);
+  main_L = mainthread(mp->g);
+
+  lua_assert(memprof_allocf == lua_getallocf(main_L, NULL));
+  lua_assert(oalloc->allocf != NULL);
+  lua_assert(oalloc->state != NULL);
+  lua_setallocf(main_L, oalloc->allocf, oalloc->state);
+
+  if (LJ_UNLIKELY(lj_wbuf_test_flag(out, STREAM_STOP))) {
+    lua_assert(lj_wbuf_test_flag(out, STREAM_ERR_IO));
+    mp->state = MPS_HALT;
+    /* on_stop call may change errno value. */
+    mp->saved_errno = lj_wbuf_errno(out);
+    /* Ignore possible errors. mp->opt.buf == NULL here. */
+    mp->opt.on_stop(mp->opt.ctx, mp->opt.buf);
+    lj_wbuf_terminate(out);
+    memprof_unlock();
+    return PROFILE_ERRIO;
+  }
+  lj_wbuf_addbyte(out, LJM_EPILOGUE_HEADER);
+
+  lj_wbuf_flush(out);
+
+  cb_status = mp->opt.on_stop(mp->opt.ctx, mp->opt.buf);
+  if (LJ_UNLIKELY(lj_wbuf_test_flag(out, STREAM_ERR_IO) || cb_status != 0)) {
+    saved_errno = lj_wbuf_errno(out);
+    return_status = PROFILE_ERRIO;
+  }
+
+  lj_wbuf_terminate(out);
+
+  memprof_unlock();
+  errno = saved_errno;
+  return return_status;
+}
+
+int lj_memprof_stop(void)
+{
+  return memprof_stop(NULL);
+}
+
+int lj_memprof_stop_vm(const struct lua_State *L)
+{
+  return memprof_stop(L);
+}
+
+int lj_memprof_is_running(void)
+{
+  struct memprof *mp = &memprof;
+  int running;
+
+  memprof_lock();
+  running = mp->state == MPS_PROFILE;
+  memprof_unlock();
+
+  return running;
+}
+
+#else /* LJ_HASMEMPROF */
+
+int lj_memprof_start(struct lua_State *L, const struct lua_Prof_options *opt)
+{
+  UNUSED(L);
+  UNUSED(opt);
+  return PROFILE_ERR;
+}
+
+int lj_memprof_stop(void)
+{
+  return PROFILE_ERR;
+}
+
+int lj_memprof_stop_vm(const struct lua_State *L)
+{
+  UNUSED(L);
+  return PROFILE_ERR;
+}
+
+int lj_memprof_is_running(void)
+{
+  return 0;
+}
+
+#endif /* LJ_HASMEMPROF */
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
new file mode 100644
index 0000000..a96b72f
--- /dev/null
+++ b/src/lj_memprof.h
@@ -0,0 +1,165 @@
+/*
+** Memory profiler.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#ifndef _LJ_MEMPROF_H
+#define _LJ_MEMPROF_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define LJS_CURRENT_VERSION 0x1
+
+/*
+** symtab format:
+**
+** symtab         := prologue sym*
+** prologue       := 'l' 'j' 's' version reserved
+** version        := <BYTE>
+** reserved       := <BYTE> <BYTE> <BYTE>
+** sym            := sym-lua | sym-final
+** sym-lua        := sym-header sym-addr sym-chunk sym-line
+** sym-header     := <BYTE>
+** sym-addr       := <ULEB128>
+** sym-chunk      := string
+** sym-line       := <ULEB128>
+** sym-final      := sym-header
+** string         := string-len string-payload
+** string-len     := <ULEB128>
+** string-payload := <BYTE> {string-len}
+**
+** <BYTE>   :  A single byte (no surprises here)
+** <ULEB128>:  Unsigned integer represented in ULEB128 encoding
+**
+** (Order of bits below is hi -> lo)
+**
+** version: [VVVVVVVV]
+**  * VVVVVVVV: Byte interpreted as a plain numeric version number
+**
+** sym-header: [FUUUUUTT]
+**  * TT    : 2 bits for representing symbol type
+**  * UUUUU : 5 unused bits
+**  * F     : 1 bit marking the end of the symtab (final symbol)
+*/
+
+#define SYMTAB_LFUNC ((uint8_t)0)
+#define SYMTAB_FFUNC ((uint8_t)1)
+#define SYMTAB_CFUNC ((uint8_t)2)
+#define SYMTAB_TRACE ((uint8_t)3)
+#define SYMTAB_FINAL ((uint8_t)0x80)
+
+#define LJM_CURRENT_FORMAT_VERSION 0x01
+
+/*
+** Event stream format:
+**
+** stream         := symtab memprof
+** symtab         := see symtab description
+** memprof        := prologue event* epilogue
+** prologue       := 'l' 'j' 'm' version reserved
+** version        := <BYTE>
+** reserved       := <BYTE> <BYTE> <BYTE>
+** event          := event-alloc | event-realloc | event-free
+** event-alloc    := event-header loc? naddr nsize
+** event-realloc  := event-header loc? oaddr osize naddr nsize
+** event-free     := event-header loc? oaddr osize
+** event-header   := <BYTE>
+** loc            := loc-lua | loc-c
+** loc-lua        := sym-addr line-no
+** loc-c          := sym-addr
+** sym-addr       := <ULEB128>
+** line-no        := <ULEB128>
+** oaddr          := <ULEB128>
+** naddr          := <ULEB128>
+** osize          := <ULEB128>
+** nsize          := <ULEB128>
+** epilogue       := event-header
+**
+** <BYTE>   :  A single byte (no surprises here)
+** <ULEB128>:  Unsigned integer represented in ULEB128 encoding
+**
+** (Order of bits below is hi -> lo)
+**
+** version: [VVVVVVVV]
+**  * VVVVVVVV: Byte interpreted as a plain integer version number
+**
+** event-header: [FUUUSSEE]
+**  * EE   : 2 bits for representing allocation event type (AEVENT_*)
+**  * SS   : 2 bits for representing allocation source type (ASOURCE_*)
+**  * UUU  : 3 unused bits
+**  * F    : 0 for regular events, 1 for epilogue's *F*inal header
+**           (if F is set to 1, all other bits are currently ignored)
+*/
+
+/* Allocation events: */
+#define AEVENT_ALLOC   ((uint8_t)1)
+#define AEVENT_FREE    ((uint8_t)2)
+#define AEVENT_REALLOC ((uint8_t)(AEVENT_ALLOC | AEVENT_FREE))
+
+/* Allocation sources: */
+#define ASOURCE_INT   ((uint8_t)(1 << 2))
+#define ASOURCE_LFUNC ((uint8_t)(2 << 2))
+#define ASOURCE_CFUNC ((uint8_t)(3 << 2))
+
+#define LJM_EPILOGUE_HEADER 0x80
+
+/* Profiler public API. */
+#define PROFILE_SUCCESS 0
+#define PROFILE_ERR     1
+#define PROFILE_ERRRUN  2
+#define PROFILE_ERRMEM  3
+#define PROFILE_ERRIO   4
+
+/* Profiler options. */
+struct lua_Prof_options {
+  /* Context for the profile writer and final callback. */
+  void *ctx;
+  /* Custom buffer to write data. */
+  uint8_t *buf;
+  /* The buffer's size. */
+  size_t len;
+  /*
+  ** Writer function for profile events.
+  ** Should return amount of written bytes on success or zero in case of error.
+  ** Setting *data to NULL means end of profiling.
+  */
+  size_t (*writer)(const void **data, size_t len, void *ctx);
+  /*
+  ** Callback on profiler stopping. Required for correctly cleaning
+  ** at vm shoutdown when profiler still running.
+  ** Returns zero on success.
+  */
+  int (*on_stop)(void *ctx, uint8_t *buf);
+};
+
+/* Avoid to provide additional interfaces described in other headers. */
+struct lua_State;
+
+/*
+** Starts profiling. Returns LUAM_PROFILE_SUCCESS on success and one of
+** LUAM_PROFILE_ERR* codes otherwise. Destructor is called in case of
+** LUAM_PROFILE_ERRIO.
+*/
+int lj_memprof_start(struct lua_State *L, const struct lua_Prof_options *opt);
+
+/*
+** Stops profiling. Returns LUAM_PROFILE_SUCCESS on success and one of
+** LUAM_PROFILE_ERR* codes otherwise. If writer() function returns zero
+** on call at buffer flush, profiled stream stops, or on_stop() callback
+** returns non-zero value, returns LUAM_PROFILE_ERRIO.
+*/
+int lj_memprof_stop(void);
+
+/*
+** VM g is currently being profiled, behaves exactly as lj_memprof_stop().
+** Otherwise does nothing and returns LUAM_PROFILE_ERR.
+*/
+int lj_memprof_stop_vm(const struct lua_State *L);
+
+/* Check that profiler is running. */
+int lj_memprof_is_running(void);
+
+#endif
diff --git a/src/lj_obj.h b/src/lj_obj.h
index c94617d..c94b0bb 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -523,6 +523,14 @@ enum {
   LJ_VMST__MAX
 };
 
+/*
+** PROFILER HACK: VM is inside a trace. This is a pseudo-state used by profiler.
+** In fact, when VM executes a trace, vmstate is set to the trace number, but
+** we aggregate all such cases into one VM state during per-VM state profiling.
+*/
+
+#define LJ_VMST_TRACE		(LJ_VMST__MAX)
+
 #define setvmstate(g, st)	((g)->vmstate = ~LJ_VMST_##st)
 
 /* Metamethods. ORDER MM */
diff --git a/src/lj_state.c b/src/lj_state.c
index 1d9c628..6c46e3d 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -29,6 +29,10 @@
 #include "lj_alloc.h"
 #include "luajit.h"
 
+#if LJ_HASMEMPROF
+#include "lj_memprof.h"
+#endif
+
 /* -- Stack handling ------------------------------------------------------ */
 
 /* Stack sizes. */
@@ -243,6 +247,10 @@ LUA_API void lua_close(lua_State *L)
   global_State *g = G(L);
   int i;
   L = mainthread(g);  /* Only the main thread can be closed. */
+#if LJ_HASMEMPROF
+  if (lj_memprof_is_running())
+    lj_memprof_stop();
+#endif
 #if LJ_HASPROFILE
   luaJIT_profile_stop(L);
 #endif
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 705e296..3f7e686 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -51,6 +51,7 @@
 #include "lj_api.c"
 #include "lj_mapi.c"
 #include "lj_profile.c"
+#include "lj_memprof.c"
 #include "lj_lex.c"
 #include "lj_parse.c"
 #include "lj_bcread.c"
-- 
2.28.0



More information about the Tarantool-patches mailing list