[Tarantool-patches] [PATCH luajit v3 1/2] core: introduce memory profiler

Sergey Kaplun skaplun at tarantool.org
Mon Dec 28 04:57:30 MSK 2020


This patch introduces memory profiler for Lua machine.

To determine currently allocating coroutine (that may not be equal to
currently executed one) a new field mem_L is added to the
global_State structure. This field is set on each allocation event and
stores the coroutine address that is used for allocation.

First of all profiler dumps the definitions of all loaded Lua functions
(symtab) via the write buffer introduced in one of the previous patches.

Profiler replaces the old allocation function with the instrumented one
after symtab is dumped. This new function reports all allocations,
reallocations or deallocations events via the write buffer during
profiling. Subsequent content depends on the function's type (LFUNC,
FFUNC or CFUNC).

When profiling is over, a special epilogue event header is written and
the old allocation function is restored back.

This change also makes debug_frameline function LuaJIT-wide visible to
be used in the memory profiler.

For more information, see <lj_memprof.h>.

Part of tarantool/tarantool#5442
---

Changes in v3:
  * Fixed invalid pointer usage at on_stop cb.
  * Dropped thread safe logic.
  * Dropped unused functions.
  * Added assertion to memprof_write_lfunc.
  * Codestyle fixes.

 src/Makefile     |   5 +-
 src/Makefile.dep |  30 +++--
 src/lj_arch.h    |  15 +++
 src/lj_debug.c   |   8 +-
 src/lj_debug.h   |   3 +
 src/lj_gc.c      |   7 +-
 src/lj_gc.h      |   1 +
 src/lj_memprof.c | 344 +++++++++++++++++++++++++++++++++++++++++++++++
 src/lj_memprof.h | 159 ++++++++++++++++++++++
 src/lj_obj.h     |   1 +
 src/lj_state.c   |   7 +
 src/ljamalg.c    |   1 +
 12 files changed, 561 insertions(+), 20 deletions(-)
 create mode 100644 src/lj_memprof.c
 create mode 100644 src/lj_memprof.h

diff --git a/src/Makefile b/src/Makefile
index 936dcbb..825b01c 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -113,6 +113,9 @@ XCFLAGS=
 # Enable GC64 mode for x64.
 #XCFLAGS+= -DLUAJIT_ENABLE_GC64
 #
+# Disable the memory profiler.
+#XCFLAGS+= -DLUAJIT_DISABLE_MEMPROF
+#
 ##############################################################################
 
 ##############################################################################
@@ -488,7 +491,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o lj_wbuf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
 	  lj_strfmt.o lj_strfmt_num.o lj_api.o lj_mapi.o lj_profile.o \
-	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
+	  lj_memprof.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
 	  lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 59ed450..6813bc8 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -144,6 +144,8 @@ lj_mapi.o: lj_mapi.c lua.h luaconf.h lmisclib.h lj_obj.h lj_def.h lj_arch.h \
 lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
  lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
+lj_memprof.o: lj_memprof.c lj_arch.h lua.h luaconf.h lj_memprof.h \
+ lj_def.h lj_wbuf.h lj_obj.h lj_frame.h lj_bc.h lj_debug.h
 lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
  lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
@@ -220,20 +222,20 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
  lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_wbuf.c lj_wbuf.h lj_utils.h \
  lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \
  lj_debug.c lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c \
- lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c \
- lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_api.c lj_mapi.c lmisclib.h \
- lj_profile.c lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
- lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c \
- lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
- lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
- lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
- lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
- lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
- lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
- lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
- lj_utils_leb128.c lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c \
- lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
- lib_ffi.c lib_misc.c lib_init.c
+ lj_ccallback.h lj_profile.h lj_memprof.h lj_vmevent.c lj_vmevent.h \
+ lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_api.c lj_mapi.c \
+ lmisclib.h lj_profile.c lj_memprof.c lj_lex.c lualib.h lj_parse.h lj_parse.c \
+ lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c \
+ lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h \
+ lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \
+ lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h lj_opt_mem.c \
+ lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \
+ lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c \
+ lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h \
+ lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c \
+ lj_alloc.c lj_utils_leb128.c lib_aux.c lib_base.c lj_libdef.h lib_math.c \
+ lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c \
+ lib_bit.c lib_jit.c lib_ffi.c lib_misc.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
 host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
  lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
diff --git a/src/lj_arch.h b/src/lj_arch.h
index c8d7138..d8676e9 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -213,6 +213,8 @@
 #define LJ_ARCH_VERSION		50
 #endif
 
+#define LJ_ARCH_NOMEMPROF	1
+
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
 
 #define LJ_ARCH_BITS		64
@@ -234,6 +236,8 @@
 
 #define LJ_ARCH_VERSION		80
 
+#define LJ_ARCH_NOMEMPROF	1
+
 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
 
 #ifndef LJ_ARCH_ENDIAN
@@ -299,6 +303,8 @@
 #define LJ_ARCH_XENON		1
 #endif
 
+#define LJ_ARCH_NOMEMPROF	1
+
 #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
 
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
@@ -358,6 +364,8 @@
 #define LJ_ARCH_VERSION		10
 #endif
 
+#define LJ_ARCH_NOMEMPROF	1
+
 #else
 #error "No target architecture defined"
 #endif
@@ -564,4 +572,11 @@
 #define LJ_52			0
 #endif
 
+/* Disable or enable the memory profiler. */
+#if defined(LUAJIT_DISABLE_MEMPROF) || defined(LJ_ARCH_NOMEMPROF) || LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_XBOX360
+#define LJ_HASMEMPROF		0
+#else
+#define LJ_HASMEMPROF		1
+#endif
+
 #endif
diff --git a/src/lj_debug.c b/src/lj_debug.c
index 73bd196..bb9ab28 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -128,7 +128,7 @@ BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc)
 }
 
 /* Get line number for function/frame. */
-static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
+BCLine lj_debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
 {
   BCPos pc = debug_framepc(L, fn, nextframe);
   if (pc != NO_BCPOS) {
@@ -353,7 +353,7 @@ void lj_debug_addloc(lua_State *L, const char *msg,
   if (frame) {
     GCfunc *fn = frame_func(frame);
     if (isluafunc(fn)) {
-      BCLine line = debug_frameline(L, fn, nextframe);
+      BCLine line = lj_debug_frameline(L, fn, nextframe);
       if (line >= 0) {
 	GCproto *pt = funcproto(fn);
 	char buf[LUA_IDSIZE];
@@ -470,7 +470,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
 	ar->what = "C";
       }
     } else if (*what == 'l') {
-      ar->currentline = frame ? debug_frameline(L, fn, nextframe) : -1;
+      ar->currentline = frame ? lj_debug_frameline(L, fn, nextframe) : -1;
     } else if (*what == 'u') {
       ar->nups = fn->c.nupvalues;
       if (ext) {
@@ -616,7 +616,7 @@ void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
 	    GCproto *pt = funcproto(fn);
 	    if (debug_putchunkname(sb, pt, pathstrip)) {
 	      /* Regular Lua function. */
-	      BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) :
+	      BCLine line = c == 'l' ? lj_debug_frameline(L, fn, nextframe) :
 				       pt->firstline;
 	      lj_buf_putb(sb, ':');
 	      lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline);
diff --git a/src/lj_debug.h b/src/lj_debug.h
index 5917c00..a157d28 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -40,6 +40,9 @@ LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
 LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
 LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
 			     int ext);
+#if LJ_HASMEMPROF
+LJ_FUNC BCLine lj_debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe);
+#endif
 #if LJ_HASPROFILE
 LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt,
 				int depth);
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 44c8aa1..7f0ec89 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -852,6 +852,8 @@ void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz)
 {
   global_State *g = G(L);
   lua_assert((osz == 0) == (p == NULL));
+
+  setgcref(g->mem_L, obj2gco(L));
   p = g->allocf(g->allocd, p, osz, nsz);
   if (p == NULL && nsz > 0)
     lj_err_mem(L);
@@ -867,7 +869,10 @@ void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz)
 void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size)
 {
   global_State *g = G(L);
-  GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
+  GCobj *o;
+
+  setgcref(g->mem_L, obj2gco(L));
+  o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
   if (o == NULL)
     lj_err_mem(L);
   lua_assert(checkptrGC(o));
diff --git a/src/lj_gc.h b/src/lj_gc.h
index 2051220..40b02cb 100644
--- a/src/lj_gc.h
+++ b/src/lj_gc.h
@@ -113,6 +113,7 @@ static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize)
 {
   g->gc.total -= (GCSize)osize;
   g->gc.freed += osize;
+  /* All deallocations are reported as internal. Not necessary to set mem_L. */
   g->allocf(g->allocd, p, osize, 0);
 }
 
diff --git a/src/lj_memprof.c b/src/lj_memprof.c
new file mode 100644
index 0000000..4994de5
--- /dev/null
+++ b/src/lj_memprof.c
@@ -0,0 +1,344 @@
+/*
+** Implementation of memory profiler.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#define lj_memprof_c
+#define LUA_CORE
+
+#include <errno.h>
+
+#include "lj_arch.h"
+#include "lj_memprof.h"
+
+#if LJ_HASMEMPROF
+
+#include "lj_obj.h"
+#include "lj_frame.h"
+#include "lj_debug.h"
+
+/* --------------------------------- Symtab --------------------------------- */
+
+static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION,
+					   0x0, 0x0, 0x0};
+
+static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
+{
+  const GCRef *iter = &g->gc.root;
+  const GCobj *o;
+  const size_t ljs_header_len = sizeof(ljs_header) / sizeof(ljs_header[0]);
+
+  /* Write prologue. */
+  lj_wbuf_addn(out, ljs_header, ljs_header_len);
+
+  while ((o = gcref(*iter)) != NULL) {
+    switch (o->gch.gct) {
+    case (~LJ_TPROTO): {
+      const GCproto *pt = gco2pt(o);
+      lj_wbuf_addbyte(out, SYMTAB_LFUNC);
+      lj_wbuf_addu64(out, (uintptr_t)pt);
+      lj_wbuf_addstring(out, proto_chunknamestr(pt));
+      lj_wbuf_addu64(out, (uint64_t)pt->firstline);
+      break;
+    }
+    default:
+      break;
+    }
+    iter = &o->gch.nextgc;
+  }
+
+  lj_wbuf_addbyte(out, SYMTAB_FINAL);
+}
+
+/* ---------------------------- Memory profiler ----------------------------- */
+
+enum memprof_state {
+  /* Memory profiler is not running. */
+  MPS_IDLE,
+  /* Memory profiler is running. */
+  MPS_PROFILE,
+  /*
+  ** Stopped in case of stopped stream.
+  ** Saved errno is returned to user at lj_memprof_stop.
+  */
+  MPS_HALT
+};
+
+struct alloc {
+  lua_Alloc allocf; /* Allocating function. */
+  void *state; /* Opaque allocator's state. */
+};
+
+struct memprof {
+  global_State *g; /* Profiled VM. */
+  enum memprof_state state; /* Internal state. */
+  struct lj_wbuf out; /* Output accumulator. */
+  struct alloc orig_alloc; /* Original allocator. */
+  struct lj_memprof_options opt; /* Profiling options. */
+  int saved_errno; /* Saved errno when profiler deinstrumented. */
+};
+
+static struct memprof memprof = {0};
+
+const unsigned char ljm_header[] = {'l', 'j', 'm', LJM_CURRENT_FORMAT_VERSION,
+				    0x0, 0x0, 0x0};
+
+static void memprof_write_lfunc(struct lj_wbuf *out, uint8_t aevent,
+				GCfunc *fn, struct lua_State *L,
+				cTValue *nextframe)
+{
+  const BCLine line = lj_debug_frameline(L, fn, nextframe);
+  /*
+  ** Line is always >= 0 if we are inside a Lua function.
+  ** Equals to zero when LuaJIT is built with the
+  ** -DLUAJIT_DISABLE_DEBUGINFO flag.
+  */
+  lua_assert(line >= 0);
+  lj_wbuf_addbyte(out, aevent | ASOURCE_LFUNC);
+  lj_wbuf_addu64(out, (uintptr_t)funcproto(fn));
+  lj_wbuf_addu64(out, (uint64_t)line);
+}
+
+static void memprof_write_cfunc(struct lj_wbuf *out, uint8_t aevent,
+				const GCfunc *fn)
+{
+  lj_wbuf_addbyte(out, aevent | ASOURCE_CFUNC);
+  lj_wbuf_addu64(out, (uintptr_t)fn->c.f);
+}
+
+static void memprof_write_ffunc(struct lj_wbuf *out, uint8_t aevent,
+				GCfunc *fn, struct lua_State *L,
+				cTValue *frame)
+{
+  cTValue *pframe = frame_prev(frame);
+  GCfunc *pfn = frame_func(pframe);
+
+  /*
+  ** XXX: If a fast function is called by a Lua function, report the
+  ** Lua function for more meaningful output. Otherwise report the fast
+  ** function as a C function.
+  */
+  if (pfn != NULL && isluafunc(pfn))
+    memprof_write_lfunc(out, aevent, pfn, L, frame);
+  else
+    memprof_write_cfunc(out, aevent, fn);
+}
+
+static void memprof_write_func(struct memprof *mp, uint8_t aevent)
+{
+  struct lj_wbuf *out = &mp->out;
+  lua_State *L = gco2th(gcref(mp->g->mem_L));
+  cTValue *frame = L->base - 1;
+  GCfunc *fn = frame_func(frame);
+
+  if (isluafunc(fn))
+    memprof_write_lfunc(out, aevent, fn, L, NULL);
+  else if (isffunc(fn))
+    memprof_write_ffunc(out, aevent, fn, L, frame);
+  else if (iscfunc(fn))
+    memprof_write_cfunc(out, aevent, fn);
+  else
+    lua_assert(0);
+}
+
+static void memprof_write_hvmstate(struct memprof *mp, uint8_t aevent)
+{
+  lj_wbuf_addbyte(&mp->out, aevent | ASOURCE_INT);
+}
+
+typedef void (*memprof_writer)(struct memprof *mp, uint8_t aevent);
+
+static const memprof_writer memprof_writers[] = {
+  memprof_write_hvmstate, /* LJ_VMST_INTERP */
+  memprof_write_func, /* LJ_VMST_LFUNC */
+  memprof_write_func, /* LJ_VMST_FFUNC */
+  memprof_write_func, /* LJ_VMST_CFUNC */
+  memprof_write_hvmstate, /* LJ_VMST_GC */
+  memprof_write_hvmstate, /* LJ_VMST_EXIT */
+  memprof_write_hvmstate, /* LJ_VMST_RECORD */
+  memprof_write_hvmstate, /* LJ_VMST_OPT */
+  memprof_write_hvmstate, /* LJ_VMST_ASM */
+  /*
+  ** XXX: In ideal world, we should report allocations from traces as well.
+  ** But since traces must follow the semantics of the original code,
+  ** behaviour of Lua and JITted code must match 1:1 in terms of allocations,
+  ** which makes using memprof with enabled JIT virtually redundant.
+  ** Hence use the stub below.
+  */
+  memprof_write_hvmstate /* LJ_VMST_TRACE */
+};
+
+static void memprof_write_caller(struct memprof *mp, uint8_t aevent)
+{
+  const global_State *g = mp->g;
+  const uint32_t _vmstate = (uint32_t)~g->vmstate;
+  const uint32_t vmstate = _vmstate < LJ_VMST_TRACE ? _vmstate : LJ_VMST_TRACE;
+
+  memprof_writers[vmstate](mp, aevent);
+}
+
+static void *memprof_allocf(void *ud, void *ptr, size_t osize, size_t nsize)
+{
+  struct memprof *mp = &memprof;
+  const struct alloc *oalloc = &mp->orig_alloc;
+  struct lj_wbuf *out = &mp->out;
+  void *nptr;
+
+  lua_assert(MPS_PROFILE == mp->state);
+  lua_assert(oalloc->allocf != memprof_allocf);
+  lua_assert(oalloc->allocf != NULL);
+  lua_assert(ud == oalloc->state);
+
+  nptr = oalloc->allocf(ud, ptr, osize, nsize);
+
+  if (nsize == 0) {
+    memprof_write_caller(mp, AEVENT_FREE);
+    lj_wbuf_addu64(out, (uintptr_t)ptr);
+    lj_wbuf_addu64(out, (uint64_t)osize);
+  } else if (ptr == NULL) {
+    memprof_write_caller(mp, AEVENT_ALLOC);
+    lj_wbuf_addu64(out, (uintptr_t)nptr);
+    lj_wbuf_addu64(out, (uint64_t)nsize);
+  } else {
+    memprof_write_caller(mp, AEVENT_REALLOC);
+    lj_wbuf_addu64(out, (uintptr_t)ptr);
+    lj_wbuf_addu64(out, (uint64_t)osize);
+    lj_wbuf_addu64(out, (uintptr_t)nptr);
+    lj_wbuf_addu64(out, (uint64_t)nsize);
+  }
+
+  /* Deinstrument memprof if required. */
+  if (LJ_UNLIKELY(lj_wbuf_test_flag(out, STREAM_STOP)))
+    lj_memprof_stop(mainthread(mp->g));
+
+  return nptr;
+}
+
+int lj_memprof_start(struct lua_State *L, const struct lj_memprof_options *opt)
+{
+  struct memprof *mp = &memprof;
+  struct lj_memprof_options *mp_opt = &mp->opt;
+  struct alloc *oalloc = &mp->orig_alloc;
+  const size_t ljm_header_len = sizeof(ljm_header) / sizeof(ljm_header[0]);
+
+  lua_assert(opt->writer != NULL);
+  lua_assert(opt->on_stop != NULL);
+  lua_assert(opt->buf != NULL);
+  lua_assert(opt->len != 0);
+
+  if (mp->state != MPS_IDLE)
+    return PROFILE_ERRRUN;
+
+  /* Discard possible old errno. */
+  mp->saved_errno = 0;
+
+  /* Init options. */
+  memcpy(mp_opt, opt, sizeof(*opt));
+
+  /* Init general fields. */
+  mp->g = G(L);
+  mp->state = MPS_PROFILE;
+
+  /* Init output. */
+  lj_wbuf_init(&mp->out, mp_opt->writer, mp_opt->ctx, mp_opt->buf, mp_opt->len);
+  dump_symtab(&mp->out, mp->g);
+
+  /* Write prologue. */
+  lj_wbuf_addn(&mp->out, ljm_header, ljm_header_len);
+
+  if (LJ_UNLIKELY(lj_wbuf_test_flag(&mp->out, STREAM_ERRIO|STREAM_STOP))) {
+    /* on_stop call may change errno value. */
+    int saved_errno = lj_wbuf_errno(&mp->out);
+    /* Ignore possible errors. mp->out.buf may be NULL here. */
+    mp_opt->on_stop(mp_opt->ctx, mp->out.buf);
+    lj_wbuf_terminate(&mp->out);
+    mp->state = MPS_IDLE;
+    errno = saved_errno;
+    return PROFILE_ERRIO;
+  }
+
+  /* Override allocating function. */
+  oalloc->allocf = lua_getallocf(L, &oalloc->state);
+  lua_assert(oalloc->allocf != NULL);
+  lua_assert(oalloc->allocf != memprof_allocf);
+  lua_assert(oalloc->state != NULL);
+  lua_setallocf(L, memprof_allocf, oalloc->state);
+
+  return PROFILE_SUCCESS;
+}
+
+int lj_memprof_stop(struct lua_State *L)
+{
+  struct memprof *mp = &memprof;
+  struct lj_memprof_options *mp_opt = &mp->opt;
+  struct alloc *oalloc = &mp->orig_alloc;
+  struct lj_wbuf *out = &mp->out;
+  int cb_status;
+
+  if (mp->state == MPS_HALT) {
+    errno = mp->saved_errno;
+    mp->state = MPS_IDLE;
+    /* wbuf was terminated before. */
+    return PROFILE_ERRIO;
+  }
+
+  if (mp->state != MPS_PROFILE)
+    return PROFILE_ERRRUN;
+
+  if (mp->g != G(L))
+    return PROFILE_ERRUSE;
+
+  mp->state = MPS_IDLE;
+
+  lua_assert(mp->g != NULL);
+
+  lua_assert(memprof_allocf == lua_getallocf(L, NULL));
+  lua_assert(oalloc->allocf != NULL);
+  lua_assert(oalloc->state != NULL);
+  lua_setallocf(L, oalloc->allocf, oalloc->state);
+
+  if (LJ_UNLIKELY(lj_wbuf_test_flag(out, STREAM_STOP))) {
+    /* on_stop call may change errno value. */
+    int saved_errno = lj_wbuf_errno(out);
+    /* Ignore possible errors. out->buf may be NULL here. */
+    mp_opt->on_stop(mp_opt->ctx, out->buf);
+    errno = saved_errno;
+    goto errio;
+  }
+
+  lj_wbuf_addbyte(out, LJM_EPILOGUE_HEADER);
+
+  lj_wbuf_flush(out);
+
+  cb_status = mp_opt->on_stop(mp_opt->ctx, out->buf);
+  if (LJ_UNLIKELY(lj_wbuf_test_flag(out, STREAM_ERRIO|STREAM_STOP) ||
+		  cb_status != 0)) {
+    errno = lj_wbuf_errno(out);
+    goto errio;
+  }
+
+  lj_wbuf_terminate(out);
+  return PROFILE_SUCCESS;
+errio:
+  lj_wbuf_terminate(out);
+  return PROFILE_ERRIO;
+}
+
+#else /* LJ_HASMEMPROF */
+
+int lj_memprof_start(struct lua_State *L, const struct lj_memprof_options *opt)
+{
+  UNUSED(L);
+  UNUSED(opt);
+  return PROFILE_ERRUSE;
+}
+
+int lj_memprof_stop(struct lua_State *L)
+{
+  UNUSED(L);
+  return PROFILE_ERRUSE;
+}
+
+#endif /* LJ_HASMEMPROF */
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
new file mode 100644
index 0000000..3417475
--- /dev/null
+++ b/src/lj_memprof.h
@@ -0,0 +1,159 @@
+/*
+** Memory profiler.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+/*
+** XXX: Memory profiler is not thread safe. Please, don't try to
+** use it inside several VM, you can profile only one at a time.
+*/
+
+#ifndef _LJ_MEMPROF_H
+#define _LJ_MEMPROF_H
+
+#include "lj_def.h"
+#include "lj_wbuf.h"
+
+#define LJS_CURRENT_VERSION 0x1
+
+/*
+** symtab format:
+**
+** symtab         := prologue sym*
+** prologue       := 'l' 'j' 's' version reserved
+** version        := <BYTE>
+** reserved       := <BYTE> <BYTE> <BYTE>
+** sym            := sym-lua | sym-final
+** sym-lua        := sym-header sym-addr sym-chunk sym-line
+** sym-header     := <BYTE>
+** sym-addr       := <ULEB128>
+** sym-chunk      := string
+** sym-line       := <ULEB128>
+** sym-final      := sym-header
+** string         := string-len string-payload
+** string-len     := <ULEB128>
+** string-payload := <BYTE> {string-len}
+**
+** <BYTE>   :  A single byte (no surprises here)
+** <ULEB128>:  Unsigned integer represented in ULEB128 encoding
+**
+** (Order of bits below is hi -> lo)
+**
+** version: [VVVVVVVV]
+**  * VVVVVVVV: Byte interpreted as a plain numeric version number
+**
+** sym-header: [FUUUUUTT]
+**  * TT    : 2 bits for representing symbol type
+**  * UUUUU : 5 unused bits
+**  * F     : 1 bit marking the end of the symtab (final symbol)
+*/
+
+#define SYMTAB_LFUNC ((uint8_t)0)
+#define SYMTAB_FINAL ((uint8_t)0x80)
+
+#define LJM_CURRENT_FORMAT_VERSION 0x01
+
+/*
+** Event stream format:
+**
+** stream         := symtab memprof
+** symtab         := see symtab description
+** memprof        := prologue event* epilogue
+** prologue       := 'l' 'j' 'm' version reserved
+** version        := <BYTE>
+** reserved       := <BYTE> <BYTE> <BYTE>
+** event          := event-alloc | event-realloc | event-free
+** event-alloc    := event-header loc? naddr nsize
+** event-realloc  := event-header loc? oaddr osize naddr nsize
+** event-free     := event-header loc? oaddr osize
+** event-header   := <BYTE>
+** loc            := loc-lua | loc-c
+** loc-lua        := sym-addr line-no
+** loc-c          := sym-addr
+** sym-addr       := <ULEB128>
+** line-no        := <ULEB128>
+** oaddr          := <ULEB128>
+** naddr          := <ULEB128>
+** osize          := <ULEB128>
+** nsize          := <ULEB128>
+** epilogue       := event-header
+**
+** <BYTE>   :  A single byte (no surprises here)
+** <ULEB128>:  Unsigned integer represented in ULEB128 encoding
+**
+** (Order of bits below is hi -> lo)
+**
+** version: [VVVVVVVV]
+**  * VVVVVVVV: Byte interpreted as a plain integer version number
+**
+** event-header: [FUUUSSEE]
+**  * EE   : 2 bits for representing allocation event type (AEVENT_*)
+**  * SS   : 2 bits for representing allocation source type (ASOURCE_*)
+**  * UUU  : 3 unused bits
+**  * F    : 0 for regular events, 1 for epilogue's *F*inal header
+**           (if F is set to 1, all other bits are currently ignored)
+*/
+
+/* Allocation events. */
+#define AEVENT_ALLOC   ((uint8_t)1)
+#define AEVENT_FREE    ((uint8_t)2)
+#define AEVENT_REALLOC ((uint8_t)(AEVENT_ALLOC | AEVENT_FREE))
+
+/* Allocation sources. */
+#define ASOURCE_INT   ((uint8_t)(1 << 2))
+#define ASOURCE_LFUNC ((uint8_t)(2 << 2))
+#define ASOURCE_CFUNC ((uint8_t)(3 << 2))
+
+#define LJM_EPILOGUE_HEADER 0x80
+
+/* Profiler public API. */
+#define PROFILE_SUCCESS 0
+#define PROFILE_ERRUSE  1
+#define PROFILE_ERRRUN  2
+#define PROFILE_ERRMEM  3
+#define PROFILE_ERRIO   4
+
+/* Profiler options. */
+struct lj_memprof_options {
+  /* Context for the profile writer and final callback. */
+  void *ctx;
+  /* Custom buffer to write data. */
+  uint8_t *buf;
+  /* The buffer's size. */
+  size_t len;
+  /*
+  ** Writer function for profile events.
+  ** Should return amount of written bytes on success or zero in case of error.
+  ** Setting *data to NULL means end of profiling.
+  ** For details see <lj_wbuf.h>.
+  */
+  lj_wbuf_writer writer;
+  /*
+  ** Callback on profiler stopping. Required for correctly cleaning
+  ** at VM finalization when profiler is still running.
+  ** Returns zero on success.
+  */
+  int (*on_stop)(void *ctx, uint8_t *buf);
+};
+
+/* Avoid to provide additional interfaces described in other headers. */
+struct lua_State;
+
+/*
+** Starts profiling. Returns PROFILE_SUCCESS on success and one of
+** PROFILE_ERR* codes otherwise. Destructor is called in case of
+** PROFILE_ERRIO.
+*/
+int lj_memprof_start(struct lua_State *L, const struct lj_memprof_options *opt);
+
+/*
+** Stops profiling. Returns PROFILE_SUCCESS on success and one of
+** PROFILE_ERR* codes otherwise. If writer() function returns zero
+** on call at buffer flush, profiled stream stops, or on_stop() callback
+** returns non-zero value, returns PROFILE_ERRIO.
+*/
+int lj_memprof_stop(struct lua_State *L);
+
+#endif
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 1a0b1f6..4a4d77f 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -656,6 +656,7 @@ typedef struct global_State {
   BCIns bc_cfunc_int;	/* Bytecode for internal C function calls. */
   BCIns bc_cfunc_ext;	/* Bytecode for external C function calls. */
   GCRef cur_L;		/* Currently executing lua_State. */
+  GCRef mem_L;		/* Currently allocating lua_State. */
   MRef jit_base;	/* Current JIT code L->base or NULL. */
   MRef ctype_state;	/* Pointer to C type state. */
   GCRef gcroot[GCROOT_MAX];  /* GC roots. */
diff --git a/src/lj_state.c b/src/lj_state.c
index 1d9c628..1ed79a5 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -29,6 +29,10 @@
 #include "lj_alloc.h"
 #include "luajit.h"
 
+#if LJ_HASMEMPROF
+#include "lj_memprof.h"
+#endif
+
 /* -- Stack handling ------------------------------------------------------ */
 
 /* Stack sizes. */
@@ -243,6 +247,9 @@ LUA_API void lua_close(lua_State *L)
   global_State *g = G(L);
   int i;
   L = mainthread(g);  /* Only the main thread can be closed. */
+#if LJ_HASMEMPROF
+  lj_memprof_stop(L);
+#endif
 #if LJ_HASPROFILE
   luaJIT_profile_stop(L);
 #endif
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 705e296..3f7e686 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -51,6 +51,7 @@
 #include "lj_api.c"
 #include "lj_mapi.c"
 #include "lj_profile.c"
+#include "lj_memprof.c"
 #include "lj_lex.c"
 #include "lj_parse.c"
 #include "lj_bcread.c"
-- 
2.28.0



More information about the Tarantool-patches mailing list