Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH luajit v3 0/7] introuduce platform profiler
@ 2022-04-06 12:49 Maxim Kokryashkin via Tarantool-patches
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 1/7] vm: save topframe info into global_State Maxim Kokryashkin via Tarantool-patches
                   ` (6 more replies)
  0 siblings, 7 replies; 9+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2022-04-06 12:49 UTC (permalink / raw)
  To: tarantool-patches, imun, skaplun

Changes in v3:
- Fixed comments as per review by Sergey
- Rebased onto new master

Maxim Kokryashkin (4):
  memprof: move symtab to a separate module
  memprof: add profile common section
  sysprof: introduce Lua API
  tools: introduce parsers for sysprof

Mikhail Shishatskiy (3):
  vm: save topframe info into global_State
  core: separate the profiling timer from lj_profile
  core: introduce lua and platform profiler

 .gitignore                                    |   1 +
 CMakeLists.txt                                |   6 +
 src/CMakeLists.txt                            |   3 +
 src/Makefile.dep.original                     |  47 +-
 src/lib_misc.c                                | 268 +++++++++-
 src/lj_arch.h                                 |  11 +
 src/lj_errmsg.h                               |   2 +-
 src/lj_mapi.c                                 |  26 +
 src/lj_memprof.c                              |  82 +--
 src/lj_memprof.h                              |  41 --
 src/lj_obj.h                                  |  12 +
 src/lj_profile.c                              | 176 +------
 src/lj_profile_timer.c                        | 133 +++++
 src/lj_profile_timer.h                        |  83 +++
 src/lj_state.c                                |   7 +
 src/lj_symtab.c                               |  84 +++
 src/lj_symtab.h                               |  65 +++
 src/lj_sysprof.c                              | 483 ++++++++++++++++++
 src/lj_sysprof.h                              |  94 ++++
 src/ljamalg.c                                 |   2 +
 src/lmisclib.h                                |  93 ++++
 src/vm_x64.dasc                               |  52 +-
 src/vm_x86.dasc                               |  52 +-
 test/tarantool-tests/CMakeLists.txt           |   1 +
 .../misclib-sysprof-capi.test.lua             |  53 ++
 .../misclib-sysprof-capi/CMakeLists.txt       |   1 +
 .../misclib-sysprof-capi/testsysprof.c        | 269 ++++++++++
 .../misclib-sysprof-lapi.test.lua             | 120 +++++
 tools/CMakeLists.txt                          |  83 +++
 tools/luajit-parse-sysprof.in                 |   6 +
 tools/sysprof.lua                             | 119 +++++
 tools/sysprof/collapse.lua                    | 113 ++++
 tools/sysprof/parse.lua                       | 188 +++++++
 tools/utils/symtab.lua                        |   2 +-
 34 files changed, 2430 insertions(+), 348 deletions(-)
 create mode 100644 src/lj_profile_timer.c
 create mode 100644 src/lj_profile_timer.h
 create mode 100644 src/lj_symtab.c
 create mode 100644 src/lj_symtab.h
 create mode 100644 src/lj_sysprof.c
 create mode 100644 src/lj_sysprof.h
 create mode 100644 test/tarantool-tests/misclib-sysprof-capi.test.lua
 create mode 100644 test/tarantool-tests/misclib-sysprof-capi/CMakeLists.txt
 create mode 100644 test/tarantool-tests/misclib-sysprof-capi/testsysprof.c
 create mode 100644 test/tarantool-tests/misclib-sysprof-lapi.test.lua
 create mode 100644 tools/luajit-parse-sysprof.in
 create mode 100644 tools/sysprof.lua
 create mode 100755 tools/sysprof/collapse.lua
 create mode 100755 tools/sysprof/parse.lua

--
2.35.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH luajit v3 1/7] vm: save topframe info into global_State
  2022-04-06 12:49 [Tarantool-patches] [PATCH luajit v3 0/7] introuduce platform profiler Maxim Kokryashkin via Tarantool-patches
@ 2022-04-06 12:49 ` Maxim Kokryashkin via Tarantool-patches
  2022-04-07  9:47   ` Sergey Kaplun via Tarantool-patches
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 2/7] core: separate the profiling timer from lj_profile Maxim Kokryashkin via Tarantool-patches
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 9+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2022-04-06 12:49 UTC (permalink / raw)
  To: tarantool-patches, imun, skaplun

From: Mikhail Shishatskiy <m.shishatskiy@tarantool.org>

Since commit 111d377d524e54e02187148a1832683291d620b2
('vm: introduce VM states for Lua and fast functions')
the VM has LFUNC and FFUNC states. The upcoming sampling
profiler uses these vmstates to determine if the guest
stack is valid or not. So, we need to provide a There is an inconsistent behavior
of the VM when the Lua stack is not valid, but the state
is set to LFUNC. This patch is just a gross hack with which
the profiler works fine. The problem is to be investigated
more deeply :(
---
 src/lj_obj.h    | 12 ++++++++++++
 src/vm_x64.dasc | 52 +++++++++++++++++++++++++++++++++++++++----------
 src/vm_x86.dasc | 52 +++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 96 insertions(+), 20 deletions(-)

diff --git a/src/lj_obj.h b/src/lj_obj.h
index d26e60be..b76c3155 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -514,6 +514,17 @@ typedef struct GCtab {
 #define setfreetop(t, n, v)	(setmref((n)->freetop, (v)))
 #endif
 
+/* -- Misc objects -------------------------------------------------------- */
+
+struct lj_sysprof_topframe {
+  uint8_t ffid;          /* FFUNC: fast function id. */
+  union {
+    uint64_t raw;        /* Raw value for context save/restore. */
+    TValue *interp_base; /* LFUNC: Base of the executed coroutine. */
+    lua_CFunction cf;    /* CFUNC: Address of the C function. */
+  } guesttop;
+};
+
 /* -- State objects ------------------------------------------------------- */
 
 /* VM states. */
@@ -674,6 +685,7 @@ typedef struct global_State {
   MRef jit_base;	/* Current JIT code L->base or NULL. */
   MRef ctype_state;	/* Pointer to C type state. */
   GCRef gcroot[GCROOT_MAX];  /* GC roots. */
+  struct lj_sysprof_topframe top_frame;  /* Top frame for sysprof */
 } global_State;
 
 #define mainthread(g)	(&gcref(g->mainthref)->th)
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 974047d3..c4beb5e7 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -345,6 +345,35 @@
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
 |.endmacro
 |
+|// Stash interpreter's internal base and enter LFUNC VM state.
+|// PROFILER: Each time profiler sees LFUNC state, it will inspect [BASE-1]
+|// expecting to see a valid framelink there. So enter this state only when
+|// BASE is stable and slots are not moved on the stack.
+|.macro set_vmstate_lfunc
+|  set_vmstate INTERP // Guard for non-atomic VM context restoration
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], BASE
+|  set_vmstate LFUNC
+|.endmacro
+|
+|// Stash ID of the fast function about to be executed and enter FFUNC VM state.
+|// PROFILER: Each time profiler sees FFUNC state, it will write ffid
+|// to the profile stream.
+|.macro set_vmstate_ffunc
+|  set_vmstate INTERP // Guard for non-atomic VM context restoration
+|  mov XCHGd, dword [BASE-8]
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame.ffid)], XCHGd
+|  set_vmstate FFUNC
+|.endmacro
+|
+|// Stash address of the C function about to be executed and enter CFUNC VM state.
+|// PROFILER: Each time profiler sees CFUNC state, it will write this address
+|// to the profile stream.
+|.macro set_vmstate_cfunc
+|  set_vmstate INTERP // Guard for non-atomic VM context restoration
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], BASE
+|  set_vmstate CFUNC
+|.endmacro
+|
 |// Uses TMPRd (r10d).
 |.macro save_vmstate
 |.if not WIN
@@ -435,7 +464,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  jnz ->vm_returnp
   |
   |  // Return to C.
-  |  set_vmstate CFUNC
+  |  set_vmstate_cfunc
   |  and PC, -8
   |  sub PC, BASE
   |  neg PC				// Previous base = BASE - delta.
@@ -467,6 +496,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  xor eax, eax			// Ok return status for vm_pcall.
   |
   |->vm_leave_unw:
+  |  set_vmstate INTERP // Guard for non-atomic VM context restoration
+  |  mov XCHGd, L:RBa->base
+  |  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], XCHGd
   |  // DISPATCH required to set properly.
   |  restore_vmstate			// Caveat: uses TMPRd (r10d).
   |  restoreregs
@@ -725,7 +757,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  cleartp LFUNC:KBASE
   |  mov KBASE, LFUNC:KBASE->pc
   |  mov KBASE, [KBASE+PC2PROTO(k)]
-  |  set_vmstate LFUNC			// LFUNC after KBASE restoration.
+  |  set_vmstate_lfunc			// LFUNC after KBASE restoration.
   |  // BASE = base, RC = result, RB = meta base
   |  jmp RA				// Jump to continuation.
   |
@@ -1166,7 +1198,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
-  |  set_vmstate FFUNC
+  |  set_vmstate_ffunc
   |.endmacro
   |
   |.macro .ffunc_1, name
@@ -1748,7 +1780,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx RAd, PC_RA
   |  neg RA
   |  lea BASE, [BASE+RA*8-16]		// base = base - (RA+2)*8
-  |  set_vmstate LFUNC			// LFUNC state after BASE restoration.
+  |  set_vmstate_lfunc			// LFUNC state after BASE restoration.
   |  ins_next
   |
   |6:  // Fill up results with nil.
@@ -2513,7 +2545,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  mov L:RB->base, BASE
   |  mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
-  |  set_vmstate LFUNC			// LFUNC after BASE & KBASE restoration.
+  |  set_vmstate_lfunc			// LFUNC after BASE & KBASE restoration.
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  mov RCd, [PC]
   |  movzx RAd, RCH
@@ -2730,7 +2762,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  call extern lj_ccallback_enter	// (CTState *cts, void *cf)
   |  // lua_State * returned in eax (RD).
   |  mov BASE, L:RD->base
-  |  set_vmstate LFUNC			// LFUNC after BASE restoration.
+  |  set_vmstate_lfunc			// LFUNC after BASE restoration.
   |  mov RD, L:RD->top
   |  sub RD, BASE
   |  mov LFUNC:RB, [BASE-16]
@@ -4299,7 +4331,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov KBASE, LFUNC:KBASE->pc
     |  mov KBASE, [KBASE+PC2PROTO(k)]
     |  // LFUNC after the old BASE & KBASE is restored.
-    |  set_vmstate LFUNC
+    |  set_vmstate_lfunc
     |  ins_next
     |
     |6:  // Fill up results with nil.
@@ -4591,7 +4623,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
     |  mov KBASE, [PC-4+PC2PROTO(k)]
     |  mov L:RB, SAVE_L
-    |  set_vmstate LFUNC		// LFUNC after KBASE restoration.
+    |  set_vmstate_lfunc		// LFUNC after KBASE restoration.
     |  lea RA, [BASE+RA*8]		// Top of frame.
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_f
@@ -4629,7 +4661,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov [RD-8], RB			// Store delta + FRAME_VARG.
     |  mov [RD-16], LFUNC:KBASE		// Store copy of LFUNC.
     |  mov L:RB, SAVE_L
-    |  set_vmstate LFUNC		// LFUNC after KBASE restoration.
+    |  set_vmstate_lfunc		// LFUNC after KBASE restoration.
     |  lea RA, [RD+RA*8]
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_v		// Need to grow stack.
@@ -4685,7 +4717,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  mov CARG1, L:RB		// Caveat: CARG1 may be RA.
     }
     |  ja ->vm_growstack_c		// Need to grow stack.
-    |  set_vmstate CFUNC		// CFUNC before entering C function.
+    |  set_vmstate_cfunc		// CFUNC before entering C function.
     if (op == BC_FUNCC) {
       |  call KBASE			// (lua_State *L)
     } else {
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index ab8e6f27..222754fe 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -443,6 +443,35 @@
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
 |.endmacro
 |
+|// Stash interpreter's internal base and enter LFUNC VM state.
+|// PROFILER: Each time profiler sees LFUNC state, it will inspect [BASE-1]
+|// expecting to see a valid framelink there. So enter this state only when
+|// BASE is stable and slots are not moved on the stack.
+|.macro set_vmstate_lfunc
+|  set_vmstate INTERP // Guard for non-atomic VM context restoration
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], BASE
+|  set_vmstate LFUNC
+|.endmacro
+|
+|// Stash ID of the fast function about to be executed and enter FFUNC VM state.
+|// PROFILER: Each time profiler sees FFUNC state, it will write ffid
+|// to the profile stream.
+|.macro set_vmstate_ffunc
+|  set_vmstate INTERP // Guard for non-atomic VM context restoration
+|  mov XCHGd, dword [BASE-8]
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame.ffid)], XCHGd
+|  set_vmstate FFUNC
+|.endmacro
+|
+|// Stash address of the C function about to be executed and enter CFUNC VM state.
+|// PROFILER: Each time profiler sees CFUNC state, it will write this address
+|// to the profile stream.
+|.macro set_vmstate_cfunc
+|  set_vmstate INTERP // Guard for non-atomic VM context restoration
+|  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], BASE
+|  set_vmstate CFUNC
+|.endmacro
+|
 |// Uses spilled ecx on x86 or XCHGd (r11d) on x64.
 |.macro save_vmstate
 |.if not WIN
@@ -560,7 +589,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  jnz ->vm_returnp
   |
   |  // Return to C.
-  |  set_vmstate CFUNC
+  |  set_vmstate_cfunc
   |  and PC, -8
   |  sub PC, BASE
   |  neg PC				// Previous base = BASE - delta.
@@ -599,6 +628,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  xor eax, eax			// Ok return status for vm_pcall.
   |
   |->vm_leave_unw:
+  |  set_vmstate INTERP // Guard for non-atomic VM context restoration
+  |  mov XCHGd, L:RBa->base
+  |  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], XCHGd
   |  // DISPATCH required to set properly.
   |  restore_vmstate			// Caveat: on x64 uses XCHGd (r11d).
   |  restoreregs
@@ -934,7 +966,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, LFUNC:KBASE->pc
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  // BASE = base, RC = result, RB = meta base
-  |  set_vmstate LFUNC			// LFUNC after KBASE restoration.
+  |  set_vmstate_lfunc			// LFUNC after KBASE restoration.
   |  jmp RAa				// Jump to continuation.
   |
   |.if FFI
@@ -1459,7 +1491,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
-  |  set_vmstate FFUNC
+  |  set_vmstate_ffunc
   |.endmacro
   |
   |.macro .ffunc_1, name
@@ -2141,7 +2173,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx RA, PC_RA
   |  not RAa				// Note: ~RA = -(RA+1)
   |  lea BASE, [BASE+RA*8]		// base = base - (RA+1)*8
-  |  set_vmstate LFUNC			// LFUNC state after BASE restoration.
+  |  set_vmstate_lfunc			// LFUNC state after BASE restoration.
   |  ins_next
   |
   |6:  // Fill up results with nil.
@@ -2986,7 +3018,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  mov L:RB->base, BASE
   |  mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
-  |  set_vmstate LFUNC			// LFUNC after BASE & KBASE restoration.
+  |  set_vmstate_lfunc			// LFUNC after BASE & KBASE restoration.
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  mov RC, [PC]
   |  movzx RA, RCH
@@ -3257,7 +3289,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  call extern lj_ccallback_enter@8	// (CTState *cts, void *cf)
   |  // lua_State * returned in eax (RD).
   |  mov BASE, L:RD->base
-  |  set_vmstate LFUNC			// LFUNC after BASE restoration.
+  |  set_vmstate_lfunc			// LFUNC after BASE restoration.
   |  mov RD, L:RD->top
   |  sub RD, BASE
   |  mov LFUNC:RB, [BASE-8]
@@ -5103,7 +5135,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov KBASE, LFUNC:KBASE->pc
     |  mov KBASE, [KBASE+PC2PROTO(k)]
     |  // LFUNC after the old BASE & KBASE is restored.
-    |  set_vmstate LFUNC
+    |  set_vmstate_lfunc
     |  ins_next
     |
     |6:  // Fill up results with nil.
@@ -5391,7 +5423,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
     |  mov KBASE, [PC-4+PC2PROTO(k)]
     |  mov L:RB, SAVE_L
-    |  set_vmstate LFUNC		// LFUNC after KBASE restoration.
+    |  set_vmstate_lfunc		// LFUNC after KBASE restoration.
     |  lea RA, [BASE+RA*8]		// Top of frame.
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_f
@@ -5429,7 +5461,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov [RD-4], RB			// Store delta + FRAME_VARG.
     |  mov [RD-8], LFUNC:KBASE		// Store copy of LFUNC.
     |  mov L:RB, SAVE_L
-    |  set_vmstate LFUNC		// LFUNC after KBASE restoration.
+    |  set_vmstate_lfunc		// LFUNC after KBASE restoration.
     |  lea RA, [RD+RA*8]
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_v		// Need to grow stack.
@@ -5494,7 +5526,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |.endif
     }
     |  ja ->vm_growstack_c		// Need to grow stack.
-    |  set_vmstate CFUNC		// CFUNC before entering C function.
+    |  set_vmstate_cfunc		// CFUNC before entering C function.
     if (op == BC_FUNCC) {
       |  call KBASEa			// (lua_State *L)
     } else {
-- 
2.35.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH luajit v3 2/7] core: separate the profiling timer from lj_profile
  2022-04-06 12:49 [Tarantool-patches] [PATCH luajit v3 0/7] introuduce platform profiler Maxim Kokryashkin via Tarantool-patches
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 1/7] vm: save topframe info into global_State Maxim Kokryashkin via Tarantool-patches
@ 2022-04-06 12:49 ` Maxim Kokryashkin via Tarantool-patches
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 3/7] memprof: move symtab to a separate module Maxim Kokryashkin via Tarantool-patches
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2022-04-06 12:49 UTC (permalink / raw)
  To: tarantool-patches, imun, skaplun

From: Mikhail Shishatskiy <m.shishatskiy@tarantool.org>

This patch makes timer machinery in lj_profile self-reliant by
introducing lj_profile_timer structure and start/stop interface
for it. This timer is useful for other sampling profiling
features.

Part of tarantool/tarantool#781
---
 src/CMakeLists.txt        |   1 +
 src/Makefile.dep.original |  35 ++++----
 src/lj_profile.c          | 176 +++-----------------------------------
 src/lj_profile_timer.c    | 133 ++++++++++++++++++++++++++++
 src/lj_profile_timer.h    |  83 ++++++++++++++++++
 src/ljamalg.c             |   1 +
 6 files changed, 249 insertions(+), 180 deletions(-)
 create mode 100644 src/lj_profile_timer.c
 create mode 100644 src/lj_profile_timer.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 809aac68..c92d78cc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -69,6 +69,7 @@ make_source_list(SOURCES_PROFILER
   SOURCES
     lj_memprof.c
     lj_profile.c
+    lj_profile_timer.c
 )
 
 # Lua standard library + extensions by LuaJIT.
diff --git a/src/Makefile.dep.original b/src/Makefile.dep.original
index faa44a0b..fc0fb5d2 100644
--- a/src/Makefile.dep.original
+++ b/src/Makefile.dep.original
@@ -178,7 +178,10 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_vm.h lj_vmevent.h
 lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
- lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
+ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h \
+ lj_profile_timer.h luajit.h
+lj_profile_timer.o: lj_profile_timer.c lj_profile_timer.h lj_def.h lua.h \
+ luaconf.h lj_arch.h
 lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
  lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
@@ -221,21 +224,23 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
  lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
  lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
  lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
- lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_wbuf.c lj_wbuf.h lj_utils.h \
- lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \
- lj_debug.c lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c \
- lj_ccallback.h lj_profile.h lj_memprof.h lj_vmevent.c lj_vmevent.h \
+ lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_wbuf.c lj_wbuf.h \
+ lj_utils.h lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \
+ lj_lib.h lj_debug.c lj_state.c lj_lex.h lj_alloc.h luajit.h lj_memprof.h \
+ lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \
  lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_api.c lj_mapi.c \
- lmisclib.h lj_profile.c lj_memprof.c lj_lex.c lualib.h lj_parse.h lj_parse.c \
- lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c \
- lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h \
- lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \
- lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h lj_opt_mem.c \
- lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \
- lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c \
- lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h \
- lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c \
- lj_alloc.c lj_utils_leb128.c lib_aux.c lib_base.c lj_libdef.h lib_math.c \
+ lmisclib.h lj_profile.c lj_profile_timer.h lj_profile_timer.c \
+ lj_memprof.c lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c \
+ lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h \
+ lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h \
+ lj_target_x86.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \
+ lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h \
+ lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
+ lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \
+ lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
+ lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_x86.h \
+ lj_asm_x86.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
+ lj_utils_leb128.c lib_aux.c lib_base.c lj_libdef.h lib_math.c \
  lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c \
  lib_bit.c lib_jit.c lib_ffi.c lib_misc.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
diff --git a/src/lj_profile.c b/src/lj_profile.c
index 7b09a63a..4412d68b 100644
--- a/src/lj_profile.c
+++ b/src/lj_profile.c
@@ -19,66 +19,19 @@
 #include "lj_trace.h"
 #endif
 #include "lj_profile.h"
+#include "lj_profile_timer.h"
 
 #include "luajit.h"
 
-#if LJ_PROFILE_SIGPROF
-
-#include <sys/time.h>
-#include <signal.h>
-#define profile_lock(ps)	UNUSED(ps)
-#define profile_unlock(ps)	UNUSED(ps)
-
-#elif LJ_PROFILE_PTHREAD
-
-#include <pthread.h>
-#include <time.h>
-#if LJ_TARGET_PS3
-#include <sys/timer.h>
-#endif
-#define profile_lock(ps)	pthread_mutex_lock(&ps->lock)
-#define profile_unlock(ps)	pthread_mutex_unlock(&ps->lock)
-
-#elif LJ_PROFILE_WTHREAD
-
-#define WIN32_LEAN_AND_MEAN
-#if LJ_TARGET_XBOX360
-#include <xtl.h>
-#include <xbox.h>
-#else
-#include <windows.h>
-#endif
-typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int);
-#define profile_lock(ps)	EnterCriticalSection(&ps->lock)
-#define profile_unlock(ps)	LeaveCriticalSection(&ps->lock)
-
-#endif
-
 /* Profiler state. */
 typedef struct ProfileState {
   global_State *g;		/* VM state that started the profiler. */
   luaJIT_profile_callback cb;	/* Profiler callback. */
   void *data;			/* Profiler callback data. */
   SBuf sb;			/* String buffer for stack dumps. */
-  int interval;			/* Sample interval in milliseconds. */
   int samples;			/* Number of samples for next callback. */
   int vmstate;			/* VM state when profile timer triggered. */
-#if LJ_PROFILE_SIGPROF
-  struct sigaction oldsa;	/* Previous SIGPROF state. */
-#elif LJ_PROFILE_PTHREAD
-  pthread_mutex_t lock;		/* g->hookmask update lock. */
-  pthread_t thread;		/* Timer thread. */
-  int abort;			/* Abort timer thread. */
-#elif LJ_PROFILE_WTHREAD
-#if LJ_TARGET_WINDOWS
-  HINSTANCE wmm;		/* WinMM library handle. */
-  WMM_TPFUNC wmm_tbp;		/* WinMM timeBeginPeriod function. */
-  WMM_TPFUNC wmm_tep;		/* WinMM timeEndPeriod function. */
-#endif
-  CRITICAL_SECTION lock;	/* g->hookmask update lock. */
-  HANDLE thread;		/* Timer thread. */
-  int abort;			/* Abort timer thread. */
-#endif
+  lj_profile_timer timer;	/* Profiling timer */
 } ProfileState;
 
 /* Sadly, we have to use a static profiler state.
@@ -168,129 +121,21 @@ static void profile_trigger(ProfileState *ps)
   profile_unlock(ps);
 }
 
-/* -- OS-specific profile timer handling ---------------------------------- */
-
 #if LJ_PROFILE_SIGPROF
 
-/* SIGPROF handler. */
-static void profile_signal(int sig)
+static void profile_handler(int sig, siginfo_t *info, void *ctx)
 {
   UNUSED(sig);
+  UNUSED(info);
+  UNUSED(ctx);
   profile_trigger(&profile_state);
 }
 
-/* Start profiling timer. */
-static void profile_timer_start(ProfileState *ps)
-{
-  int interval = ps->interval;
-  struct itimerval tm;
-  struct sigaction sa;
-  tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
-  tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
-  setitimer(ITIMER_PROF, &tm, NULL);
-  sa.sa_flags = SA_RESTART;
-  sa.sa_handler = profile_signal;
-  sigemptyset(&sa.sa_mask);
-  sigaction(SIGPROF, &sa, &ps->oldsa);
-}
-
-/* Stop profiling timer. */
-static void profile_timer_stop(ProfileState *ps)
-{
-  struct itimerval tm;
-  tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
-  tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
-  setitimer(ITIMER_PROF, &tm, NULL);
-  sigaction(SIGPROF, &ps->oldsa, NULL);
-}
-
-#elif LJ_PROFILE_PTHREAD
-
-/* POSIX timer thread. */
-static void *profile_thread(ProfileState *ps)
-{
-  int interval = ps->interval;
-#if !LJ_TARGET_PS3
-  struct timespec ts;
-  ts.tv_sec = interval / 1000;
-  ts.tv_nsec = (interval % 1000) * 1000000;
-#endif
-  while (1) {
-#if LJ_TARGET_PS3
-    sys_timer_usleep(interval * 1000);
 #else
-    nanosleep(&ts, NULL);
-#endif
-    if (ps->abort) break;
-    profile_trigger(ps);
-  }
-  return NULL;
-}
-
-/* Start profiling timer thread. */
-static void profile_timer_start(ProfileState *ps)
-{
-  pthread_mutex_init(&ps->lock, 0);
-  ps->abort = 0;
-  pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps);
-}
-
-/* Stop profiling timer thread. */
-static void profile_timer_stop(ProfileState *ps)
-{
-  ps->abort = 1;
-  pthread_join(ps->thread, NULL);
-  pthread_mutex_destroy(&ps->lock);
-}
-
-#elif LJ_PROFILE_WTHREAD
-
-/* Windows timer thread. */
-static DWORD WINAPI profile_thread(void *psx)
-{
-  ProfileState *ps = (ProfileState *)psx;
-  int interval = ps->interval;
-#if LJ_TARGET_WINDOWS
-  ps->wmm_tbp(interval);
-#endif
-  while (1) {
-    Sleep(interval);
-    if (ps->abort) break;
-    profile_trigger(ps);
-  }
-#if LJ_TARGET_WINDOWS
-  ps->wmm_tep(interval);
-#endif
-  return 0;
-}
-
-/* Start profiling timer thread. */
-static void profile_timer_start(ProfileState *ps)
-{
-#if LJ_TARGET_WINDOWS
-  if (!ps->wmm) {  /* Load WinMM library on-demand. */
-    ps->wmm = LoadLibraryExA("winmm.dll", NULL, 0);
-    if (ps->wmm) {
-      ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod");
-      ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod");
-      if (!ps->wmm_tbp || !ps->wmm_tep) {
-	ps->wmm = NULL;
-	return;
-      }
-    }
-  }
-#endif
-  InitializeCriticalSection(&ps->lock);
-  ps->abort = 0;
-  ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL);
-}
 
-/* Stop profiling timer thread. */
-static void profile_timer_stop(ProfileState *ps)
+static void profile_handler()
 {
-  ps->abort = 1;
-  WaitForSingleObject(ps->thread, INFINITE);
-  DeleteCriticalSection(&ps->lock);
+  profile_trigger(&profile_state);
 }
 
 #endif
@@ -327,12 +172,13 @@ LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
     if (ps->g) return;  /* Profiler in use by another VM. */
   }
   ps->g = G(L);
-  ps->interval = interval;
   ps->cb = cb;
   ps->data = data;
   ps->samples = 0;
   lj_buf_init(L, &ps->sb);
-  profile_timer_start(ps);
+  ps->timer.opt.interval_msec = interval;
+  ps->timer.opt.handler = profile_handler;
+  lj_profile_timer_start(&ps->timer);
 }
 
 /* Stop profiling. */
@@ -341,7 +187,7 @@ LUA_API void luaJIT_profile_stop(lua_State *L)
   ProfileState *ps = &profile_state;
   global_State *g = ps->g;
   if (G(L) == g) {  /* Only stop profiler if started by this VM. */
-    profile_timer_stop(ps);
+    lj_profile_timer_stop(&ps->timer);
     g->hookmask &= ~HOOK_PROFILE;
     lj_dispatch_update(g);
 #if LJ_HASJIT
diff --git a/src/lj_profile_timer.c b/src/lj_profile_timer.c
new file mode 100644
index 00000000..056fd1f7
--- /dev/null
+++ b/src/lj_profile_timer.c
@@ -0,0 +1,133 @@
+/*
+** Simple profiling timer.
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_profile_timer_c
+#define LUA_CORE
+
+#include "lj_profile_timer.h"
+
+#if LJ_HASPROFILE
+
+#if LJ_PROFILE_SIGPROF
+
+/* Start profiling timer. */
+void lj_profile_timer_start(lj_profile_timer *timer)
+{
+  const int interval = timer->opt.interval_msec;
+  struct itimerval tm;
+  struct sigaction sa;
+  tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
+  tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
+  setitimer(ITIMER_PROF, &tm, NULL);
+  sa.sa_flags = SA_RESTART | SA_SIGINFO;
+  sa.sa_sigaction = timer->opt.handler;
+  sigemptyset(&sa.sa_mask);
+  sigaction(SIGPROF, &sa, &timer->oldsa);
+}
+
+/* Stop profiling timer. */
+void lj_profile_timer_stop(lj_profile_timer *timer)
+{
+  struct itimerval tm;
+  tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
+  tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
+  setitimer(ITIMER_PROF, &tm, NULL);
+  sigaction(SIGPROF, &timer->oldsa, NULL);
+}
+
+#elif LJ_PROFILE_PTHREAD
+
+/* POSIX timer thread. */
+static void *timer_thread(lj_profile_timer *timer)
+{
+  int interval = timer->opt.interval_msec;
+#if !LJ_TARGET_PS3
+  struct timespec ts;
+  ts.tv_sec = interval / 1000;
+  ts.tv_nsec = (interval % 1000) * 1000000;
+#endif
+  while (1) {
+#if LJ_TARGET_PS3
+    sys_timer_usleep(interval * 1000);
+#else
+    nanosleep(&ts, NULL);
+#endif
+    if (timer->abort) break;
+    timer->opt.handler();
+  }
+  return NULL;
+}
+
+/* Start profiling timer thread. */
+void lj_profile_timer_start(lj_profile_timer *timer)
+{
+  pthread_mutex_init(&timer->lock, 0);
+  timer->abort = 0;
+  pthread_create(&timer->thread, NULL, (void *(*)(void *))timer_thread,
+		 timer);
+}
+
+/* Stop profiling timer thread. */
+void lj_profile_timer_stop(lj_profile_timer *timer)
+{
+  timer->abort = 1;
+  pthread_join(timer->thread, NULL);
+  pthread_mutex_destroy(&timer->lock);
+}
+
+#elif LJ_PROFILE_WTHREAD
+
+/* Windows timer thread. */
+static DWORD WINAPI timer_thread(void *timerx)
+{
+  lj_profile_timer *timer = (lj_profile_timer *)timerx;
+  int interval = timer->opt.interval_msec;
+#if LJ_TARGET_WINDOWS
+  timer->wmm_tbp(interval);
+#endif
+  while (1) {
+    Sleep(interval);
+    if (timer->abort) break;
+    timer->opt.handler();
+  }
+#if LJ_TARGET_WINDOWS
+  timer->wmm_tep(interval);
+#endif
+  return 0;
+}
+
+/* Start profiling timer thread. */
+void lj_profile_timer_start(lj_profile_timer *timer)
+{
+#if LJ_TARGET_WINDOWS
+  if (!timer->wmm) { /* Load WinMM library on-demand. */
+    timer->wmm = LoadLibraryExA("winmm.dll", NULL, 0);
+    if (timer->wmm) {
+      timer->wmm_tbp =
+	(WMM_TPFUNC)GetProcAddress(timer->wmm, "timeBeginPeriod");
+      timer->wmm_tep = (WMM_TPFUNC)GetProcAddress(timer->wmm, "timeEndPeriod");
+      if (!timer->wmm_tbp || !timer->wmm_tep) {
+	timer->wmm = NULL;
+	return;
+      }
+    }
+  }
+#endif
+  InitializeCriticalSection(&timer->lock);
+  timer->abort = 0;
+  timer->thread = CreateThread(NULL, 0, timer_thread, timer, 0, NULL);
+}
+
+/* Stop profiling timer thread. */
+void lj_profile_timer_stop(lj_profile_timer *timer)
+{
+  timer->abort = 1;
+  WaitForSingleObject(timer->thread, INFINITE);
+  DeleteCriticalSection(&timer->lock);
+}
+
+#endif
+
+#endif  /* LJ_HASPROFILE */
diff --git a/src/lj_profile_timer.h b/src/lj_profile_timer.h
new file mode 100644
index 00000000..1deeea53
--- /dev/null
+++ b/src/lj_profile_timer.h
@@ -0,0 +1,83 @@
+/*
+** Simple profiling timer.
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_PROFILE_TIMER_H
+#define _LJ_PROFILE_TIMER_H
+
+#include "lj_def.h"
+#include "lj_arch.h"
+
+#if LJ_HASPROFILE
+
+#if LJ_PROFILE_SIGPROF
+
+#include <sys/time.h>
+#include <signal.h>
+#define profile_lock(ps)	UNUSED(ps)
+#define profile_unlock(ps)	UNUSED(ps)
+
+#elif LJ_PROFILE_PTHREAD
+
+#include <pthread.h>
+#include <time.h>
+#if LJ_TARGET_PS3
+#include <sys/timer.h>
+#endif
+#define profile_lock(ps)	pthread_mutex_lock(&ps->lock)
+#define profile_unlock(ps)	pthread_mutex_unlock(&ps->lock)
+
+#elif LJ_PROFILE_WTHREAD
+
+#define WIN32_LEAN_AND_MEAN
+#if LJ_TARGET_XBOX360
+#include <xtl.h>
+#include <xbox.h>
+#else
+#include <windows.h>
+#endif
+typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int);
+#define profile_lock(ps)	EnterCriticalSection(&ps->lock)
+#define profile_unlock(ps)	LeaveCriticalSection(&ps->lock)
+
+#endif
+
+typedef struct {
+#if LJ_PROFILE_SIGPROF
+  void (*handler)(int, siginfo_t*, void*);
+#else
+  void (*handler)(void);
+#endif
+  uint32_t interval_msec;
+} lj_profile_timer_opt;
+
+typedef struct {
+  lj_profile_timer_opt opt;
+#if LJ_PROFILE_SIGPROF
+  struct sigaction oldsa;	/* Previous SIGPROF state. */
+#elif LJ_PROFILE_PTHREAD
+  pthread_mutex_t lock;		/* g->hookmask update lock. */
+  pthread_t thread;		/* Timer thread. */
+  int abort;			/* Abort timer thread. */
+#elif LJ_PROFILE_WTHREAD
+#if LJ_TARGET_WINDOWS
+  HINSTANCE wmm;		/* WinMM library handle. */
+  WMM_TPFUNC wmm_tbp;		/* WinMM timeBeginPeriod function. */
+  WMM_TPFUNC wmm_tep;		/* WinMM timeEndPeriod function. */
+#endif
+  CRITICAL_SECTION lock;	/* g->hookmask update lock. */
+  HANDLE thread;		/* Timer thread. */
+  int abort;			/* Abort timer thread. */
+#endif
+} lj_profile_timer;
+
+/* Start profiling timer. */
+void lj_profile_timer_start(lj_profile_timer *timer);
+
+/* Stop profiling timer. */
+void lj_profile_timer_stop(lj_profile_timer *timer);
+
+#endif  /* LJ_HASPROFILE */
+
+#endif
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 3f7e6860..ce7a0d6c 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -51,6 +51,7 @@
 #include "lj_api.c"
 #include "lj_mapi.c"
 #include "lj_profile.c"
+#include "lj_profile_timer.c"
 #include "lj_memprof.c"
 #include "lj_lex.c"
 #include "lj_parse.c"
-- 
2.35.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH luajit v3 3/7] memprof: move symtab to a separate module
  2022-04-06 12:49 [Tarantool-patches] [PATCH luajit v3 0/7] introuduce platform profiler Maxim Kokryashkin via Tarantool-patches
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 1/7] vm: save topframe info into global_State Maxim Kokryashkin via Tarantool-patches
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 2/7] core: separate the profiling timer from lj_profile Maxim Kokryashkin via Tarantool-patches
@ 2022-04-06 12:49 ` Maxim Kokryashkin via Tarantool-patches
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 4/7] core: introduce lua and platform profiler Maxim Kokryashkin via Tarantool-patches
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2022-04-06 12:49 UTC (permalink / raw)
  To: tarantool-patches, imun, skaplun

Considering the symbol table format, it is obvious that it is suitable
not only for memprof, but for other profiling modules too. This commit
moves the symbol table to the separate module, so modules other than
memprof will be able to access it.

Part of tarantool/tarantool#781
---
 src/CMakeLists.txt        |  1 +
 src/Makefile.dep.original |  4 +-
 src/lj_memprof.c          | 82 +-------------------------------------
 src/lj_memprof.h          | 41 -------------------
 src/lj_symtab.c           | 84 +++++++++++++++++++++++++++++++++++++++
 src/lj_symtab.h           | 65 ++++++++++++++++++++++++++++++
 src/ljamalg.c             |  1 +
 7 files changed, 156 insertions(+), 122 deletions(-)
 create mode 100644 src/lj_symtab.c
 create mode 100644 src/lj_symtab.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c92d78cc..b4ce407b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -70,6 +70,7 @@ make_source_list(SOURCES_PROFILER
     lj_memprof.c
     lj_profile.c
     lj_profile_timer.c
+    lj_symtab.c
 )
 
 # Lua standard library + extensions by LuaJIT.
diff --git a/src/Makefile.dep.original b/src/Makefile.dep.original
index fc0fb5d2..efe39e84 100644
--- a/src/Makefile.dep.original
+++ b/src/Makefile.dep.original
@@ -147,7 +147,7 @@ lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
 lj_memprof.o: lj_memprof.c lj_arch.h lua.h luaconf.h lj_memprof.h \
  lj_def.h lj_wbuf.h lj_obj.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
- lj_jit.h lj_ir.h
+ lj_jit.h lj_ir.h lj_symtab.h
 lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
  lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
@@ -203,6 +203,8 @@ lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h
 lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_char.h lj_strscan.h
+lj_symtab.o: lj_symtab.c lj_symtab.h lj_wbuf.h lj_def.h lua.h luaconf.h \
+ lj_obj.h lj_arch.h
 lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_tab.h
 lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
diff --git a/src/lj_memprof.c b/src/lj_memprof.c
index 2d779983..e6ebf009 100644
--- a/src/lj_memprof.c
+++ b/src/lj_memprof.c
@@ -19,86 +19,8 @@
 #include "lj_frame.h"
 #include "lj_debug.h"
 
-#if LJ_HASJIT
-#include "lj_dispatch.h"
-#endif
-
-/* --------------------------------- Symtab --------------------------------- */
-
-static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION,
-					   0x0, 0x0, 0x0};
-
-#if LJ_HASJIT
-
-static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
-{
-  GCproto *pt = &gcref(trace->startpt)->pt;
-  BCLine lineno = 0;
-
-  const BCIns *startpc = mref(trace->startpc, const BCIns);
-  lua_assert(startpc >= proto_bc(pt) &&
-             startpc < proto_bc(pt) + pt->sizebc);
-
-  lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
-
-  lj_wbuf_addbyte(out, SYMTAB_TRACE);
-  lj_wbuf_addu64(out, (uint64_t)trace->traceno);
-  lj_wbuf_addu64(out, (uint64_t)trace->mcode);
-  /*
-  ** The information about the prototype, associated with the
-  ** trace's start has already been dumped, as it is anchored
-  ** via the trace and is not collected while the trace is alive.
-  ** For this reason, we do not need to repeat dumping the chunk
-  ** name for the prototype.
-  */
-  lj_wbuf_addu64(out, (uintptr_t)pt);
-  lj_wbuf_addu64(out, (uint64_t)lineno);
-}
-
-#else
-
-static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
-{
-  UNUSED(out);
-  UNUSED(trace);
-  lua_assert(0);
-}
-
-#endif
-
-static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
-{
-  const GCRef *iter = &g->gc.root;
-  const GCobj *o;
-  const size_t ljs_header_len = sizeof(ljs_header) / sizeof(ljs_header[0]);
-
-  /* Write prologue. */
-  lj_wbuf_addn(out, ljs_header, ljs_header_len);
-
-  while ((o = gcref(*iter)) != NULL) {
-    switch (o->gch.gct) {
-    case (~LJ_TPROTO): {
-      const GCproto *pt = gco2pt(o);
-      lj_wbuf_addbyte(out, SYMTAB_LFUNC);
-      lj_wbuf_addu64(out, (uintptr_t)pt);
-      lj_wbuf_addstring(out, proto_chunknamestr(pt));
-      lj_wbuf_addu64(out, (uint64_t)pt->firstline);
-      break;
-    }
-    case (~LJ_TTRACE): {
-      dump_symtab_trace(out, gco2trace(o));
-      break;
-    }
-    default:
-      break;
-    }
-    iter = &o->gch.nextgc;
-  }
-
-  lj_wbuf_addbyte(out, SYMTAB_FINAL);
-}
-
 /* ---------------------------- Memory profiler ----------------------------- */
+#include "lj_symtab.h"
 
 enum memprof_state {
   /* Memory profiler is not running. */
@@ -331,7 +253,7 @@ int lj_memprof_start(struct lua_State *L, const struct lj_memprof_options *opt)
 
   /* Init output. */
   lj_wbuf_init(&mp->out, mp_opt->writer, mp_opt->ctx, mp_opt->buf, mp_opt->len);
-  dump_symtab(&mp->out, mp->g);
+  lj_symtab_dump(&mp->out, mp->g);
 
   /* Write prologue. */
   lj_wbuf_addn(&mp->out, ljm_header, ljm_header_len);
diff --git a/src/lj_memprof.h b/src/lj_memprof.h
index 395fb429..7fe17af4 100644
--- a/src/lj_memprof.h
+++ b/src/lj_memprof.h
@@ -16,47 +16,6 @@
 #include "lj_def.h"
 #include "lj_wbuf.h"
 
-#define LJS_CURRENT_VERSION 0x2
-
-/*
-** symtab format:
-**
-** symtab         := prologue sym*
-** prologue       := 'l' 'j' 's' version reserved
-** version        := <BYTE>
-** reserved       := <BYTE> <BYTE> <BYTE>
-** sym            := sym-lua | sym-trace | sym-final
-** sym-lua        := sym-header sym-addr sym-chunk sym-line
-** sym-trace      := sym-header trace-no trace-addr sym-addr sym-line
-** sym-header     := <BYTE>
-** sym-addr       := <ULEB128>
-** sym-chunk      := string
-** sym-line       := <ULEB128>
-** sym-final      := sym-header
-** trace-no       := <ULEB128>
-** trace-addr     := <ULEB128>
-** string         := string-len string-payload
-** string-len     := <ULEB128>
-** string-payload := <BYTE> {string-len}
-**
-** <BYTE>   :  A single byte (no surprises here)
-** <ULEB128>:  Unsigned integer represented in ULEB128 encoding
-**
-** (Order of bits below is hi -> lo)
-**
-** version: [VVVVVVVV]
-**  * VVVVVVVV: Byte interpreted as a plain numeric version number
-**
-** sym-header: [FUUUUUTT]
-**  * TT    : 2 bits for representing symbol type
-**  * UUUUU : 5 unused bits
-**  * F     : 1 bit marking the end of the symtab (final symbol)
-*/
-
-#define SYMTAB_LFUNC ((uint8_t)0)
-#define SYMTAB_TRACE ((uint8_t)1)
-#define SYMTAB_FINAL ((uint8_t)0x80)
-
 #define LJM_CURRENT_FORMAT_VERSION 0x02
 
 /*
diff --git a/src/lj_symtab.c b/src/lj_symtab.c
new file mode 100644
index 00000000..0cd3e8fc
--- /dev/null
+++ b/src/lj_symtab.c
@@ -0,0 +1,84 @@
+/*
+** Implementation of symbol table for profilers.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#define lj_symtab_c
+#define LUA_CORE
+
+#include "lj_symtab.h"
+
+static const unsigned char ljs_header[] = {'l', 'j', 's', LJS_CURRENT_VERSION,
+                                          0x0, 0x0, 0x0};
+
+#if LJ_HASJIT
+
+static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
+{
+  GCproto *pt = &gcref(trace->startpt)->pt;
+  BCLine lineno = 0;
+
+  const BCIns *startpc = mref(trace->startpc, const BCIns);
+  lua_assert(startpc >= proto_bc(pt) &&
+             startpc < proto_bc(pt) + pt->sizebc);
+
+  lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
+
+  lj_wbuf_addbyte(out, SYMTAB_TRACE);
+  lj_wbuf_addu64(out, (uint64_t)trace->traceno);
+  lj_wbuf_addu64(out, (uint64_t)trace->mcode);
+  /*
+  ** The information about the prototype, associated with the
+  ** trace's start has already been dumped, as it is anchored
+  ** via the trace and is not collected while the trace is alive.
+  ** For this reason, we do not need to repeat dumping the chunk
+  ** name for the prototype.
+  */
+  lj_wbuf_addu64(out, (uintptr_t)pt);
+  lj_wbuf_addu64(out, (uint64_t)lineno);
+}
+
+#else
+
+static void dump_symtab_trace(struct lj_wbuf *out, const GCtrace *trace)
+{
+  UNUSED(out);
+  UNUSED(trace);
+  lua_assert(0);
+}
+
+#endif
+
+void lj_symtab_dump(struct lj_wbuf *out, const struct global_State *g)
+{
+  const GCRef *iter = &g->gc.root;
+  const GCobj *o;
+  const size_t ljs_header_len = sizeof(ljs_header) / sizeof(ljs_header[0]);
+
+  /* Write prologue. */
+  lj_wbuf_addn(out, ljs_header, ljs_header_len);
+
+  while ((o = gcref(*iter)) != NULL) {
+    switch (o->gch.gct) {
+    case (~LJ_TPROTO): {
+      const GCproto *pt = gco2pt(o);
+      lj_wbuf_addbyte(out, SYMTAB_LFUNC);
+      lj_wbuf_addu64(out, (uintptr_t)pt);
+      lj_wbuf_addstring(out, proto_chunknamestr(pt));
+      lj_wbuf_addu64(out, (uint64_t)pt->firstline);
+      break;
+    }
+    case (~LJ_TTRACE): {
+      dump_symtab_trace(out, gco2trace(o));
+      break;
+    }
+    default:
+      break;
+    }
+    iter = &o->gch.nextgc;
+  }
+
+  lj_wbuf_addbyte(out, SYMTAB_FINAL);
+}
diff --git a/src/lj_symtab.h b/src/lj_symtab.h
new file mode 100644
index 00000000..03384b1f
--- /dev/null
+++ b/src/lj_symtab.h
@@ -0,0 +1,65 @@
+/*
+** Symbol table for profilers.
+**
+** Major portions taken verbatim or adapted from the LuaVela.
+** Copyright (C) 2015-2019 IPONWEB Ltd.
+*/
+
+#ifndef LJ_SYMTAB_H
+#define LJ_SYMTAB_H
+
+#include "lj_wbuf.h"
+#include "lj_obj.h"
+#include "lj_debug.h"
+
+#if LJ_HASJIT
+#include "lj_dispatch.h"
+#endif
+
+#define LJS_CURRENT_VERSION 0x2
+
+/*
+** symtab format:
+**
+** symtab         := prologue sym*
+** prologue       := 'l' 'j' 's' version reserved
+** version        := <BYTE>
+** reserved       := <BYTE> <BYTE> <BYTE>
+** sym            := sym-lua | sym-trace | sym-final
+** sym-lua        := sym-header sym-addr sym-chunk sym-line
+** sym-trace      := sym-header trace-no trace-addr sym-addr sym-line
+** sym-header     := <BYTE>
+** sym-addr       := <ULEB128>
+** sym-chunk      := string
+** sym-line       := <ULEB128>
+** sym-final      := sym-header
+** trace-no       := <ULEB128>
+** trace-addr     := <ULEB128>
+** string         := string-len string-payload
+** string-len     := <ULEB128>
+** string-payload := <BYTE> {string-len}
+**
+** <BYTE>   :  A single byte (no surprises here)
+** <ULEB128>:  Unsigned integer represented in ULEB128 encoding
+**
+** (Order of bits below is hi -> lo)
+**
+** version: [VVVVVVVV]
+**  * VVVVVVVV: Byte interpreted as a plain numeric version number
+**
+** sym-header: [FUUUUUTT]
+**  * TT    : 2 bits for representing symbol type
+**  * UUUUU : 5 unused bits
+**  * F     : 1 bit marking the end of the symtab (final symbol)
+*/
+
+#define SYMTAB_LFUNC ((uint8_t)0)
+#define SYMTAB_TRACE ((uint8_t)1)
+#define SYMTAB_FINAL ((uint8_t)0x80)
+
+/*
+** Dumps symbol table for Lua functions into a buffer
+*/
+void lj_symtab_dump(struct lj_wbuf *out, const struct global_State *g);
+
+#endif
diff --git a/src/ljamalg.c b/src/ljamalg.c
index ce7a0d6c..6ad5289c 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -52,6 +52,7 @@
 #include "lj_mapi.c"
 #include "lj_profile.c"
 #include "lj_profile_timer.c"
+#include "lj_symtab.c"
 #include "lj_memprof.c"
 #include "lj_lex.c"
 #include "lj_parse.c"
-- 
2.35.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH luajit v3 4/7] core: introduce lua and platform profiler
  2022-04-06 12:49 [Tarantool-patches] [PATCH luajit v3 0/7] introuduce platform profiler Maxim Kokryashkin via Tarantool-patches
                   ` (2 preceding siblings ...)
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 3/7] memprof: move symtab to a separate module Maxim Kokryashkin via Tarantool-patches
@ 2022-04-06 12:49 ` Maxim Kokryashkin via Tarantool-patches
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 5/7] memprof: add profile common section Maxim Kokryashkin via Tarantool-patches
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2022-04-06 12:49 UTC (permalink / raw)
  To: tarantool-patches, imun, skaplun

From: Mikhail Shishatskiy <m.shishatskiy@tarantool.org>

This patch introduces a sampling platform profiler for
the Lua machine.

The profiler uses the signal sampling backend from the
low-level profiler built in vanilla LuaJIT, which was put
into a separate module in one of the previous patches.
Thus, one cannot use both profilers at the same time.

First of all profiler dumps the definitions of all
loaded Lua functions and all loaded shared libraries
(symtab) via the write buffer introduced in one of
the previous patches.

As the profiling signal may occur at any time, we need
to provide some guarantees for the sampling profiler
to unwind consistent stack frames. So, the VM is adjusted
to save stack's current top frame into a dedicated
variable `guesttop` in the virtual machine global state.

When signal occurs, the profiler dumps current VM state
and additional info about about stacks that can be
considered consistent in a given state. Also, profiling
can be done without writing to file: vmstate counters
are accumulated in static memory and can be received via
the `luaM_sysprof_report` function. For more details see
the <lmisclib.h> header file.

When profiling is over, the old signal handler is restored,
and a special epilogue header is written.

Part of tarantool/tarantool#781
---
 CMakeLists.txt                                |   6 +
 src/CMakeLists.txt                            |   1 +
 src/Makefile.dep.original                     |   8 +-
 src/lj_arch.h                                 |  11 +
 src/lj_errmsg.h                               |   2 +-
 src/lj_mapi.c                                 |  26 +
 src/lj_state.c                                |   7 +
 src/lj_sysprof.c                              | 483 ++++++++++++++++++
 src/lj_sysprof.h                              |  94 ++++
 src/lmisclib.h                                |  93 ++++
 test/tarantool-tests/CMakeLists.txt           |   1 +
 .../misclib-sysprof-capi.test.lua             |  53 ++
 .../misclib-sysprof-capi/CMakeLists.txt       |   1 +
 .../misclib-sysprof-capi/testsysprof.c        | 269 ++++++++++
 14 files changed, 1052 insertions(+), 3 deletions(-)
 create mode 100644 src/lj_sysprof.c
 create mode 100644 src/lj_sysprof.h
 create mode 100644 test/tarantool-tests/misclib-sysprof-capi.test.lua
 create mode 100644 test/tarantool-tests/misclib-sysprof-capi/CMakeLists.txt
 create mode 100644 test/tarantool-tests/misclib-sysprof-capi/testsysprof.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5348e043..0c702093 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -184,6 +184,12 @@ if(LUAJIT_DISABLE_MEMPROF)
   AppendFlags(TARGET_C_FLAGS -DLUAJIT_DISABLE_MEMPROF)
 endif()
 
+# Disable platform and lua profiler.
+option(LUAJIT_DISABLE_SYSPROF "LuaJIT platform and lua profiler support" OFF)
+if(LUAJIT_DISABLE_SYSPROF)
+  AppendFlags(TARGET_C_FLAGS -DLUAJIT_DISABLE_SYSPROF)
+endif()
+
 # Switch to harder (and slower) hash function when a collision
 # chain in the string hash table exceeds a certain length.
 option(LUAJIT_SMART_STRINGS "Harder string hashing function" ON)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b4ce407b..1a3f106a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -71,6 +71,7 @@ make_source_list(SOURCES_PROFILER
     lj_profile.c
     lj_profile_timer.c
     lj_symtab.c
+    lj_sysprof.c
 )
 
 # Lua standard library + extensions by LuaJIT.
diff --git a/src/Makefile.dep.original b/src/Makefile.dep.original
index efe39e84..158e702c 100644
--- a/src/Makefile.dep.original
+++ b/src/Makefile.dep.original
@@ -140,8 +140,8 @@ lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
 lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
  lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
-lj_mapi.o: lj_mapi.c lua.h luaconf.h lmisclib.h lj_obj.h lj_def.h lj_arch.h \
- lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h
+lj_mapi.o: lj_mapi.c lua.h luaconf.h lmisclib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h lj_sysprof.h
 lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
  lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
@@ -205,6 +205,10 @@ lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_char.h lj_strscan.h
 lj_symtab.o: lj_symtab.c lj_symtab.h lj_wbuf.h lj_def.h lua.h luaconf.h \
  lj_obj.h lj_arch.h
+lj_sysprof.o: lj_sysprof.c lj_arch.h lua.h luaconf.h lj_sysprof.h \
+ lj_obj.h lj_def.h lmisclib.h lj_debug.h lj_dispatch.h lj_bc.h lj_jit.h \
+ lj_ir.h lj_frame.h lj_trace.h lj_traceerr.h lj_wbuf.h lj_profile_timer.h \
+ lj_symtab.h
 lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_tab.h
 lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 5bf0afb8..f0b60092 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -220,6 +220,7 @@
 #endif
 
 #define LJ_ARCH_NOMEMPROF	1
+#define LJ_ARCH_NOSYSPROF	1
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
 
@@ -243,6 +244,7 @@
 #define LJ_ARCH_VERSION		80
 
 #define LJ_ARCH_NOMEMPROF	1
+#define LJ_ARCH_NOSYSPROF	1
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
 
@@ -310,6 +312,7 @@
 #endif
 
 #define LJ_ARCH_NOMEMPROF	1
+#define LJ_ARCH_NOSYSPROF	1
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
 
@@ -371,6 +374,7 @@
 #endif
 
 #define LJ_ARCH_NOMEMPROF	1
+#define LJ_ARCH_NOSYSPROF	1
 
 #else
 #error "No target architecture defined"
@@ -585,4 +589,11 @@
 #define LJ_HASMEMPROF		1
 #endif
 
+/* Disable or enable the platform and lua profiler. */
+#if defined(LUAJIT_DISABLE_SYSPROF) || defined(LJ_ARCH_NOSYSPROF) || LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_XBOX360
+#define LJ_HASSYSPROF		0
+#else
+#define LJ_HASSYSPROF		1
+#endif
+
 #endif
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index ae0a18c0..77a08cb0 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -187,7 +187,7 @@ ERRDEF(FFI_NYICALL,	"NYI: cannot call this C function (yet)")
 
 /* Profiler errors. */
 ERRDEF(PROF_MISUSE,	"profiler misuse")
-#if LJ_HASMEMPROF
+#if LJ_HASMEMPROF || LJ_HASSYSPROF
 ERRDEF(PROF_ISRUNNING,	"profiler is running already")
 ERRDEF(PROF_NOTRUNNING,	"profiler is not running")
 #endif
diff --git a/src/lj_mapi.c b/src/lj_mapi.c
index b2b35a17..0b2284f0 100644
--- a/src/lj_mapi.c
+++ b/src/lj_mapi.c
@@ -18,6 +18,8 @@
 #include "lj_jit.h"
 #endif
 
+#include "lj_sysprof.h"
+
 LUAMISC_API void luaM_metrics(lua_State *L, struct luam_Metrics *metrics)
 {
   global_State *g = G(L);
@@ -63,3 +65,27 @@ LUAMISC_API void luaM_metrics(lua_State *L, struct luam_Metrics *metrics)
   metrics->jit_trace_num = 0;
 #endif
 }
+
+/* --- Platform and Lua profiler ------------------------------------------ */
+
+LUAMISC_API int luaM_sysprof_configure(const struct luam_sysprof_config *config)
+{
+  return lj_sysprof_configure(config);
+}
+
+LUAMISC_API int luaM_sysprof_start(lua_State *L,
+                                   const struct luam_sysprof_options *opt)
+{
+  return lj_sysprof_start(L, opt);
+}
+
+/* Stop profiling. */
+LUAMISC_API int luaM_sysprof_stop(lua_State *L)
+{
+  return lj_sysprof_stop(L);
+}
+
+LUAMISC_API int luaM_sysprof_report(struct luam_sysprof_counters *counters)
+{
+  return lj_sysprof_report(counters);
+}
diff --git a/src/lj_state.c b/src/lj_state.c
index f82b1b5b..cc6f92f1 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -33,6 +33,10 @@
 #include "lj_memprof.h"
 #endif
 
+#if LJ_HASSYSPROF
+#include "lj_sysprof.h"
+#endif
+
 /* -- Stack handling ------------------------------------------------------ */
 
 /* Stack sizes. */
@@ -267,6 +271,9 @@ LUA_API void lua_close(lua_State *L)
 #if LJ_HASMEMPROF
   lj_memprof_stop(L);
 #endif
+#if LJ_HASSYSPROF
+  lj_sysprof_stop(L);
+#endif
 #if LJ_HASPROFILE
   luaJIT_profile_stop(L);
 #endif
diff --git a/src/lj_sysprof.c b/src/lj_sysprof.c
new file mode 100644
index 00000000..c0c83fa9
--- /dev/null
+++ b/src/lj_sysprof.c
@@ -0,0 +1,483 @@
+#define lj_sysprof_c
+#define LUA_CORE
+
+#include "lj_arch.h"
+#include "lj_sysprof.h"
+
+#if LJ_HASSYSPROF
+
+#include "lj_obj.h"
+#include "lj_debug.h"
+#include "lj_dispatch.h"
+#include "lj_frame.h"
+
+#if LJ_HASJIT
+#include "lj_jit.h"
+#include "lj_trace.h"
+#endif
+
+#include "lj_wbuf.h"
+#include "lj_profile_timer.h"
+#include "lj_symtab.h"
+
+#include <pthread.h>
+#include <errno.h>
+#include <execinfo.h>
+
+#define SYSPROF_HANDLER_STACK_DEPTH 4
+#define SYSPROF_BACKTRACE_BUF_SIZE 4096
+
+enum sysprof_state {
+  /* Profiler needs to be configured. */
+  SPS_UNCONFIGURED,
+  /* Profiler is not running. */
+  SPS_IDLE,
+  /* Profiler is running. */
+  SPS_PROFILE,
+  /*
+  ** Stopped in case of stopped or failed stream.
+  ** Saved errno is set at luaM_sysprof_stop.
+  */
+  SPS_HALT
+};
+
+struct sysprof {
+  global_State *g; /* Profiled VM. */
+  pthread_t thread; /* Profiled thread. */
+  enum sysprof_state state; /* Internal state. */
+  struct lj_wbuf out; /* Output accumulator. */
+  struct luam_sysprof_counters counters; /* Profiling counters. */
+  struct luam_sysprof_options opt; /* Profiling options. */
+  struct luam_sysprof_config config; /* Profiler configurations. */
+  lj_profile_timer timer; /* Profiling timer. */
+  int saved_errno; /* Saved errno when profiler failed. */
+};
+
+static struct sysprof sysprof = {0};
+
+/* --- Stream ------------------------------------------------------------- */
+
+static const uint8_t ljp_header[] = {'l', 'j', 'p', LJP_FORMAT_VERSION,
+                                     0x0, 0x0, 0x0};
+
+static int stream_is_needed(struct sysprof *sp)
+{
+  return LUAM_SYSPROF_DEFAULT != sp->opt.mode;
+}
+
+static void stream_prologue(struct sysprof *sp)
+{
+  lj_symtab_dump(&sp->out, sp->g);
+  lj_wbuf_addn(&sp->out, ljp_header, sizeof(ljp_header));
+}
+
+static void stream_epilogue(struct sysprof *sp)
+{
+  lj_wbuf_addbyte(&sp->out, LJP_EPILOGUE_BYTE);
+}
+
+static void stream_lfunc(struct lj_wbuf *buf, GCfunc *func)
+{
+  GCproto *pt = funcproto(func);
+  lua_assert(pt != NULL);
+  lj_wbuf_addbyte(buf, LJP_FRAME_LFUNC);
+  lj_wbuf_addu64(buf, (uintptr_t)pt);
+  lj_wbuf_addu64(buf, (uint64_t)pt->firstline);
+}
+
+static void stream_cfunc(struct lj_wbuf *buf, GCfunc *func)
+{
+  lj_wbuf_addbyte(buf, LJP_FRAME_CFUNC);
+  lj_wbuf_addu64(buf, (uintptr_t)func->c.f);
+}
+
+static void stream_ffunc(struct lj_wbuf *buf, GCfunc *func)
+{
+  lj_wbuf_addbyte(buf, LJP_FRAME_FFUNC);
+  lj_wbuf_addu64(buf, func->c.ffid);
+}
+
+static void stream_frame_lua(struct lj_wbuf *buf, cTValue *frame)
+{
+  GCfunc *func = frame_func(frame);
+  lua_assert(NULL != func);
+  if (isluafunc(func)) {
+    stream_lfunc(buf, func);
+  } else if (isffunc(func)) {
+    stream_ffunc(buf, func);
+  } else {
+    stream_cfunc(buf, func);
+  }
+}
+
+static void stream_backtrace_lua(struct sysprof *sp)
+{
+  global_State *g = sp->g;
+  struct lj_wbuf *buf = &sp->out;
+  cTValue *top_frame = NULL, *frame = NULL, *bot = NULL;
+  lua_State *L = NULL;
+
+  lua_assert(g != NULL);
+  L = gco2th(gcref(g->cur_L));
+  lua_assert(L != NULL);
+
+  top_frame = sp->g->top_frame.guesttop.interp_base - 1;
+  bot = tvref(L->stack) + LJ_FR2;
+  /* Traverse frames backwards */
+  for (frame = top_frame; frame > bot; frame = frame_prev(frame)) {
+    if (frame_gc(frame) == obj2gco(L)) {
+      continue;  /* Skip dummy frames. See lj_err_optype_call(). */
+    }
+    stream_frame_lua(buf, frame);
+  }
+
+  lj_wbuf_addbyte(buf, LJP_FRAME_LUA_LAST);
+}
+
+static void *stream_frame_host(int frame_no, void *addr)
+{
+  struct sysprof *sp = &sysprof;
+
+  if (LJ_UNLIKELY(frame_no <= SYSPROF_HANDLER_STACK_DEPTH)) {
+    /*
+    ** We don't want the profiler stack to be streamed, as it will
+    ** burden the profile with unnecessary information.
+    */
+    return addr;
+  } else if (LJ_UNLIKELY(sp->opt.mode == LUAM_SYSPROF_LEAF &&
+                         frame_no > SYSPROF_HANDLER_STACK_DEPTH + 1)) {
+    return NULL;
+  }
+
+  lj_wbuf_addu64(&sp->out, (uintptr_t)addr);
+  return addr;
+}
+
+static void default_backtrace_host(void *(writer)(int frame_no, void *addr))
+{
+  static void* backtrace_buf[SYSPROF_BACKTRACE_BUF_SIZE] = {};
+
+  struct sysprof *sp = &sysprof;
+  const int depth = backtrace(backtrace_buf,
+                              sp->opt.mode == LUAM_SYSPROF_LEAF
+                              ? SYSPROF_HANDLER_STACK_DEPTH + 1
+                              : SYSPROF_BACKTRACE_BUF_SIZE);
+  lua_assert(depth >= SYSPROF_HANDLER_STACK_DEPTH);
+
+  for (int i = SYSPROF_HANDLER_STACK_DEPTH; i < depth; ++i) {
+    if (!writer(i - SYSPROF_HANDLER_STACK_DEPTH + 1, backtrace_buf[i])) {
+      return;
+    }
+  }
+}
+
+static void stream_backtrace_host(struct sysprof *sp)
+{
+  lua_assert(sp->config.backtracer != NULL);
+  sp->config.backtracer(stream_frame_host);
+  lj_wbuf_addu64(&sp->out, (uintptr_t)LJP_FRAME_HOST_LAST);
+}
+
+static void stream_trace(struct sysprof *sp)
+{
+  struct lj_wbuf *out = &sp->out;
+
+  uint32_t traceno = sp->g->vmstate;
+  jit_State *J = G2J(sp->g);
+
+  lj_wbuf_addu64(out, traceno);
+  lj_wbuf_addu64(out, (uintptr_t)J->prev_pt);
+  lj_wbuf_addu64(out, J->prev_line);
+}
+
+static void stream_guest(struct sysprof *sp)
+{
+  stream_backtrace_lua(sp);
+  stream_backtrace_host(sp);
+}
+
+static void stream_host(struct sysprof *sp)
+{
+  stream_backtrace_host(sp);
+}
+
+typedef void (*event_streamer)(struct sysprof *sp);
+
+static event_streamer event_streamers[] = {
+  /* XXX: order is important */
+  stream_host,  /* LJ_VMST_INTERP */
+  stream_guest, /* LJ_VMST_LFUNC */
+  stream_guest, /* LJ_VMST_FFUNC */
+  stream_guest, /* LJ_VMST_CFUNC */
+  stream_host,  /* LJ_VMST_GC */
+  stream_host,  /* LJ_VMST_EXIT */
+  stream_host,  /* LJ_VMST_RECORD */
+  stream_host,  /* LJ_VMST_OPT */
+  stream_host,  /* LJ_VMST_ASM */
+  stream_trace  /* LJ_VMST_TRACE */
+};
+
+static void stream_event(struct sysprof *sp, uint32_t vmstate)
+{
+  event_streamer stream = NULL;
+
+  /* Check that vmstate fits in 4 bits (see streaming format) */
+  lua_assert((vmstate & ~(uint32_t)((1 << 4) - 1)) == 0);
+  lj_wbuf_addbyte(&sp->out, (uint8_t)vmstate);
+
+  stream = event_streamers[vmstate];
+  lua_assert(NULL != stream);
+  stream(sp);
+}
+
+/* -- Signal handler ------------------------------------------------------ */
+
+static void sysprof_record_sample(struct sysprof *sp, siginfo_t *info)
+{
+  global_State *g = sp->g;
+  uint32_t _vmstate = ~(uint32_t)(g->vmstate);
+  uint32_t vmstate = _vmstate < LJ_VMST_TRACE ? _vmstate : LJ_VMST_TRACE;
+
+  lua_assert(pthread_self() == sp->thread);
+
+  /* Caveat: order of counters must match vmstate order in <lj_obj.h>. */
+  ((uint64_t *)&sp->counters)[vmstate]++;
+
+  sp->counters.samples++;
+  sp->counters.overruns += info->si_overrun;
+
+  if (stream_is_needed(sp)) {
+    stream_event(sp, vmstate);
+    if (LJ_UNLIKELY(lj_wbuf_test_flag(&sp->out, STREAM_ERRIO|STREAM_STOP))) {
+      sp->saved_errno = lj_wbuf_errno(&sp->out);
+      lj_wbuf_terminate(&sp->out);
+      sp->state = SPS_HALT;
+    }
+  }
+}
+
+static void sysprof_signal_handler(int sig, siginfo_t *info, void *ctx)
+{
+  struct sysprof *sp = &sysprof;
+  UNUSED(sig);
+  UNUSED(ctx);
+
+  switch (sp->state) {
+    case SPS_PROFILE:
+      sysprof_record_sample(sp, info);
+      break;
+
+    case SPS_IDLE:
+    case SPS_HALT:
+      /* noop */
+      break;
+
+    default:
+      lua_assert(0);
+      break;
+  }
+}
+
+/* -- Internal ------------------------------------------------------------ */
+
+static int sysprof_validate(struct sysprof *sp,
+                            const struct luam_sysprof_options *opt)
+{
+  switch (sp->state) {
+    case SPS_UNCONFIGURED:
+      return SYSPROF_ERRUSE;
+
+    case SPS_IDLE: {
+      if (opt->mode > LUAM_SYSPROF_CALLGRAPH) {
+        return SYSPROF_ERRUSE;
+      } else if (opt->mode != LUAM_SYSPROF_DEFAULT &&
+                 (opt->buf == NULL || opt->len == 0 ||
+                  sp->config.writer == NULL || sp->config.on_stop == NULL)) {
+        return SYSPROF_ERRUSE;
+      } else if (opt->interval == 0) {
+        return SYSPROF_ERRUSE;
+      }
+      break;
+    }
+
+    case SPS_PROFILE:
+    case SPS_HALT:
+      return SYSPROF_ERRRUN;
+
+    default:
+      lua_assert(0);
+  }
+
+  return SYSPROF_SUCCESS;
+}
+
+static int sysprof_init(struct sysprof *sp, lua_State *L,
+                        const struct luam_sysprof_options *opt)
+{
+  int status = sysprof_validate(sp, opt);
+  if (SYSPROF_SUCCESS != status) {
+    return status;
+  }
+
+  /* Copy validated options to sysprof state. */
+  memcpy(&sp->opt, opt, sizeof(sp->opt));
+
+  /* Init general fields. */
+  sp->g = G(L);
+  sp->thread = pthread_self();
+
+  /* Reset counters. */
+  memset(&sp->counters, 0, sizeof(sp->counters));
+
+  /* Reset saved errno. */
+  sp->saved_errno = 0;
+
+  if (stream_is_needed(sp)) {
+    lj_wbuf_init(&sp->out, sp->config.writer, opt->ctx, opt->buf, opt->len);
+  }
+
+  return SYSPROF_SUCCESS;
+}
+
+/* -- Public profiling API ------------------------------------------------ */
+
+int lj_sysprof_configure(const struct luam_sysprof_config *config)
+{
+  struct sysprof *sp = &sysprof;
+  lua_assert(config != NULL);
+  if (sp->state != SPS_UNCONFIGURED && sp->state != SPS_IDLE) {
+    return SYSPROF_ERRUSE;
+  }
+
+  memcpy(&sp->config, config, sizeof(*config));
+
+  if (sp->config.backtracer == NULL) {
+    sp->config.backtracer = default_backtrace_host;
+  }
+
+  sp->state = SPS_IDLE;
+
+  return SYSPROF_SUCCESS;
+}
+
+int lj_sysprof_start(lua_State *L, const struct luam_sysprof_options *opt)
+{
+  struct sysprof *sp = &sysprof;
+
+  int status = sysprof_init(sp, L, opt);
+  if (SYSPROF_SUCCESS != status) {
+    if (NULL != sp->config.on_stop) {
+      /*
+      ** Initialization may fail in case of unconfigured sysprof,
+      ** so we cannot guarantee cleaning up resources in this case.
+      */
+      sp->config.on_stop(opt->ctx, opt->buf);
+    }
+    return status;
+  }
+
+  sp->state = SPS_PROFILE;
+
+  if (stream_is_needed(sp)) {
+    stream_prologue(sp);
+    if (LJ_UNLIKELY(lj_wbuf_test_flag(&sp->out, STREAM_ERRIO|STREAM_STOP))) {
+      /* on_stop call may change errno value. */
+      int saved_errno = lj_wbuf_errno(&sp->out);
+      /* Ignore possible errors. mp->out.buf may be NULL here. */
+      sp->config.on_stop(opt->ctx, sp->out.buf);
+      lj_wbuf_terminate(&sp->out);
+      sp->state = SPS_IDLE;
+      errno = saved_errno;
+      return SYSPROF_ERRIO;
+    }
+  }
+
+  sp->timer.opt.interval_msec = opt->interval;
+  sp->timer.opt.handler = sysprof_signal_handler;
+  lj_profile_timer_start(&sp->timer);
+
+  return SYSPROF_SUCCESS;
+}
+
+int lj_sysprof_stop(lua_State *L)
+{
+  struct sysprof *sp = &sysprof;
+  global_State *g = sp->g;
+  struct lj_wbuf *out = &sp->out;
+
+  if (SPS_IDLE == sp->state) {
+    return SYSPROF_ERRSTOP;
+  } else if (G(L) != g) {
+    return SYSPROF_ERRUSE;
+  }
+
+  lj_profile_timer_stop(&sp->timer);
+
+  if (SPS_HALT == sp->state) {
+    errno = sp->saved_errno;
+    sp->state = SPS_IDLE;
+    /* wbuf was terminated when error occured. */
+    return SYSPROF_ERRIO;
+  }
+
+  sp->state = SPS_IDLE;
+
+  if (stream_is_needed(sp)) {
+    int cb_status = 0;
+
+    stream_epilogue(sp);
+    lj_wbuf_flush(out);
+
+    cb_status = sp->config.on_stop(sp->opt.ctx, out->buf);
+    if (LJ_UNLIKELY(lj_wbuf_test_flag(out, STREAM_ERRIO|STREAM_STOP)) ||
+        0 != cb_status) {
+      errno = lj_wbuf_errno(out);
+      lj_wbuf_terminate(out);
+      return SYSPROF_ERRIO;
+    }
+
+    lj_wbuf_terminate(out);
+  }
+
+  return SYSPROF_SUCCESS;
+}
+
+int lj_sysprof_report(struct luam_sysprof_counters *counters)
+{
+  const struct sysprof *sp = &sysprof;
+  if (sp->state != SPS_IDLE) {
+    return SYSPROF_ERRUSE;
+  }
+  memcpy(counters, &sp->counters, sizeof(sp->counters));
+  return SYSPROF_SUCCESS;
+}
+
+#else /* LJ_HASSYSPROF */
+
+int lj_sysprof_configure(const struct luam_sysprof_config *config)
+{
+  UNUSED(config);
+  return SYSPROF_ERRUSE;
+}
+
+int lj_sysprof_start(lua_State *L, const struct luam_sysprof_options *opt)
+{
+  UNUSED(L);
+  opt->on_stop(opt->ctx, opt->buf);
+  return SYSPROF_ERRUSE;
+}
+
+int lj_sysprof_stop(lua_State *L)
+{
+  UNUSED(L);
+  return SYSPROF_ERRUSE;
+}
+
+int lj_sysprof_report(struct luam_sysprof_counters *counters)
+{
+  UNUSED(counters);
+  return SYSPROF_ERRUSE;
+}
+
+#endif /* LJ_HASSYSPROF */
+
diff --git a/src/lj_sysprof.h b/src/lj_sysprof.h
new file mode 100644
index 00000000..515bc08c
--- /dev/null
+++ b/src/lj_sysprof.h
@@ -0,0 +1,94 @@
+/*
+** Sysprof - platform and Lua profiler
+*/
+
+/*
+** XXX: Platform profiler is not thread safe. Please, don't try to
+** use it inside several VM, you can profile only one at a time.
+*/
+
+/*
+** XXX: Platform profiler uses the same signal backend as lj_profile. Please,
+** don't use both at the same time.
+*/
+
+#ifndef _LJ_SYSPROF_H
+#define _LJ_SYSPROF_H
+
+#include "lj_obj.h"
+#include "lmisclib.h"
+
+#define LJP_FORMAT_VERSION 0x1
+
+/*
+** Event stream format:
+**
+** stream          := symtab sysprof
+** symtab          := see symtab description
+** sysprof         := prologue sample* epilogue
+** prologue        := 'l' 'j' 'p' version reserved
+** version         := <BYTE>
+** reserved        := <BYTE> <BYTE> <BYTE>
+** sample          := sample-guest | sample-host | sample-trace
+** sample-guest    := sample-header stack-lua stack-host
+** sample-host     := sample-header stack-host
+** sample-trace    := sample-header traceno sym-addr line-no
+** sample-header   := <BYTE>
+** stack-lua       := frame-lua* frame-lua-last
+** stack-host      := frame-host* frame-host-last
+** frame-lua       := frame-lfunc | frame-cfunc | frame-ffunc
+** frame-lfunc     := frame-header sym-addr line-no
+** frame-cfunc     := frame-header exec-addr
+** frame-ffunc     := frame-header ffid
+** frame-lua-last  := frame-header
+** frame-header    := <BYTE>
+** frame-host      := exec-addr
+** frame-host-last := <ULEB128>
+** line-no         := <ULEB128>
+** traceno         := <ULEB128>
+** ffid            := <ULEB128>
+** sym-addr        := <ULEB128>
+** exec-addr       := <ULEB128>
+** epilogue        := sample-header
+**
+** <BYTE>   :  A single byte (no surprises here)
+** <ULEB128>:  Unsigned integer represented in ULEB128 encoding
+**
+** (Order of bits below is hi -> lo)
+**
+** version: [VVVVVVVV]
+**  * VVVVVVVV: Byte interpreted as a plain integer version number
+**
+** sample-header: [FUUUUEEE]
+**  * EEE  : 3 bits for representing vmstate (LJ_VMST_*)
+**  * UUUU : 4 unused bits
+**  * F    : 0 for regular samples, 1 for epilogue's Final header
+**           (if F is set to 1, all other bits are currently ignored)
+**
+** frame-header: [FUUUUUEE]
+**  * EE    : 2 bits for representing frame type (FRAME_*)
+**  * UUUUU : 5 unused bits
+**  * F     : 0 for regular frames, 1 for final frame
+**            (if F is set to 1, all other bits are currently ignored)
+**
+** frame-host-last = NULL
+*/
+
+#define LJP_FRAME_LFUNC ((uint8_t)1)
+#define LJP_FRAME_CFUNC ((uint8_t)2)
+#define LJP_FRAME_FFUNC ((uint8_t)3)
+#define LJP_FRAME_LUA_LAST  0x80
+#define LJP_FRAME_HOST_LAST NULL
+
+#define LJP_EPILOGUE_BYTE 0x80
+
+int lj_sysprof_configure(const struct luam_sysprof_config *config);
+
+int lj_sysprof_start(lua_State *L, const struct luam_sysprof_options *opt);
+
+int lj_sysprof_stop(lua_State *L);
+
+int lj_sysprof_report(struct luam_sysprof_counters *counters);
+
+#endif
+
diff --git a/src/lmisclib.h b/src/lmisclib.h
index 0c07707e..3545ff47 100644
--- a/src/lmisclib.h
+++ b/src/lmisclib.h
@@ -60,6 +60,99 @@ struct luam_Metrics {
 
 LUAMISC_API void luaM_metrics(lua_State *L, struct luam_Metrics *metrics);
 
+/* --- Sysprof - platform and lua profiler -------------------------------- */
+
+/* Profiler configurations */
+struct luam_sysprof_config {
+  /*
+  ** Writer function for profile events.
+  ** Should return amount of written bytes on success or zero in case of error.
+  ** Setting *data to NULL means end of profiling.
+  ** For details see <lj_wbuf.h>.
+  */
+  size_t (*writer)(const void **data, size_t len, void *ctx);
+  /*
+  ** Callback on profiler stopping. Required for correctly cleaning
+  ** at VM finalization when profiler is still running.
+  ** Returns zero on success.
+  */
+  int (*on_stop)(void *ctx, uint8_t *buf);
+  /*
+  ** Backtracing function for the host stack. Should call `frame_writer` on
+  ** each frame in the stack in the order from the stack top to the stack
+  ** bottom. The `frame_writer` function is implemented inside the sysprof
+  ** and will be passed to the `backtracer` function. If `frame_writer` returns
+  ** NULL, backtracing should be stopped. If `frame_writer` returns not NULL,
+  ** the backtracing should be continued if there are frames left.
+  */
+  void (*backtracer)(void *(*frame_writer)(int frame_no, void *addr));
+};
+
+enum luam_sysprof_mode {
+  /*
+  ** DEFAULT mode collects only data for luam_sysprof_counters, which is stored
+  ** in memory and can be collected with luaM_sysprof_report after profiler
+  ** stops.
+  */
+  LUAM_SYSPROF_DEFAULT,
+  /*
+  ** LEAF mode = DEFAULT + streams samples with only top frames of host and
+  ** guests stacks in format described in <lj_sysprof.h>
+  */
+  LUAM_SYSPROF_LEAF,
+  /*
+  ** CALLGRAPH mode = DEFAULT + streams samples with full callchains of host
+  ** and guest stacks in format described in <lj_sysprof.h>
+  */
+  LUAM_SYSPROF_CALLGRAPH
+};
+
+struct luam_sysprof_counters {
+  uint64_t vmst_interp;
+  uint64_t vmst_lfunc;
+  uint64_t vmst_ffunc;
+  uint64_t vmst_cfunc;
+  uint64_t vmst_gc;
+  uint64_t vmst_exit;
+  uint64_t vmst_record;
+  uint64_t vmst_opt;
+  uint64_t vmst_asm;
+  uint64_t vmst_trace;
+  /* XXX: order of vmst counters is important */
+  uint64_t samples;
+  uint64_t overruns;
+};
+
+/* Profiler options */
+struct luam_sysprof_options {
+  /* Profiling mode */
+  enum luam_sysprof_mode mode;
+  /* Sampling interval in msec */
+  uint64_t interval;
+  /* Custom buffer to write data. */
+  uint8_t *buf;
+  /* The buffer's size. */
+  size_t len;
+  /* Context for the profile writer and final callback. */
+  void *ctx;
+};
+
+#define SYSPROF_SUCCESS (0)
+#define SYSPROF_ERRUSE  (1)
+#define SYSPROF_ERRRUN  (2)
+#define SYSPROF_ERRSTOP (3)
+#define SYSPROF_ERRIO   (4)
+
+LUAMISC_API int luaM_sysprof_configure(const struct luam_sysprof_config *config);
+
+LUAMISC_API int luaM_sysprof_start(lua_State *L,
+                                   const struct luam_sysprof_options *opt);
+
+LUAMISC_API int luaM_sysprof_stop(lua_State *L);
+
+LUAMISC_API int luaM_sysprof_report(struct luam_sysprof_counters *counters);
+
+
 #define LUAM_MISCLIBNAME "misc"
 LUALIB_API int luaopen_misc(lua_State *L);
 
diff --git a/test/tarantool-tests/CMakeLists.txt b/test/tarantool-tests/CMakeLists.txt
index b21500a0..9435b667 100644
--- a/test/tarantool-tests/CMakeLists.txt
+++ b/test/tarantool-tests/CMakeLists.txt
@@ -63,6 +63,7 @@ add_subdirectory(lj-49-bad-lightuserdata)
 add_subdirectory(lj-601-fix-gc-finderrfunc)
 add_subdirectory(lj-flush-on-trace)
 add_subdirectory(misclib-getmetrics-capi)
+add_subdirectory(misclib-sysprof-capi)
 
 # The part of the memory profiler toolchain is located in tools
 # directory, jit, profiler, and bytecode toolchains are located
diff --git a/test/tarantool-tests/misclib-sysprof-capi.test.lua b/test/tarantool-tests/misclib-sysprof-capi.test.lua
new file mode 100644
index 00000000..d468572d
--- /dev/null
+++ b/test/tarantool-tests/misclib-sysprof-capi.test.lua
@@ -0,0 +1,53 @@
+-- Sysprof is implemented for x86 and x64 architectures only.
+require("utils").skipcond(
+  jit.arch ~= "x86" and jit.arch ~= "x64",
+  jit.arch.." architecture is NIY for memprof"
+)
+
+local testsysprof = require("testsysprof")
+
+local tap = require("tap")
+
+local test = tap.test("clib-misc-sysprof")
+test:plan(4)
+
+test:ok(testsysprof.base())
+test:ok(testsysprof.validation())
+
+local function lua_payload(n)
+  if n <= 1 then
+    return n
+  end
+  return lua_payload(n - 1) + lua_payload(n - 2)
+end
+
+local function payload()
+  local n_iterations = 500000
+
+  local co = coroutine.create(function ()
+    for i = 1, n_iterations do
+      if i % 2 == 0 then
+        testsysprof.c_payload(10)
+      else
+        lua_payload(10)
+      end
+      coroutine.yield()
+    end
+  end)
+
+  for _ = 1, n_iterations do
+    coroutine.resume(co)
+  end
+end
+
+local jit = require('jit')
+
+jit.off()
+
+test:ok(testsysprof.profile_func(payload))
+
+jit.on()
+jit.flush()
+
+test:ok(testsysprof.profile_func(payload))
+
diff --git a/test/tarantool-tests/misclib-sysprof-capi/CMakeLists.txt b/test/tarantool-tests/misclib-sysprof-capi/CMakeLists.txt
new file mode 100644
index 00000000..d9fb1a1a
--- /dev/null
+++ b/test/tarantool-tests/misclib-sysprof-capi/CMakeLists.txt
@@ -0,0 +1 @@
+BuildTestCLib(testsysprof testsysprof.c)
diff --git a/test/tarantool-tests/misclib-sysprof-capi/testsysprof.c b/test/tarantool-tests/misclib-sysprof-capi/testsysprof.c
new file mode 100644
index 00000000..46970a72
--- /dev/null
+++ b/test/tarantool-tests/misclib-sysprof-capi/testsysprof.c
@@ -0,0 +1,269 @@
+#include <lua.h>
+#include <luajit.h>
+#include <lauxlib.h>
+
+#include <lmisclib.h>
+
+#include <stdlib.h>
+#include <errno.h>
+
+#undef NDEBUG
+#include <assert.h>
+
+
+/* --- utils -------------------------------------------------------------- */
+
+#define SYSPROF_INTERVAL_DEFAULT 11
+
+/*
+** Yep, 8Mb. Tuned in order not to bother the platform with too often flushes.
+*/
+#define STREAM_BUFFER_SIZE (8 * 1024 * 1024)
+
+/* Structure given as ctx to memprof writer and on_stop callback. */
+struct sysprof_ctx {
+  /* Output file stream for data. */
+  FILE *stream;
+  /* Buffer for data. */
+  uint8_t buf[STREAM_BUFFER_SIZE];
+};
+
+/*
+** Default buffer writer function.
+** Just call fwrite to the corresponding FILE.
+*/
+static size_t buffer_writer_default(const void **buf_addr, size_t len,
+                                    void *opt)
+{
+  struct sysprof_ctx *ctx = opt;
+  FILE *stream = ctx->stream;
+  const void * const buf_start = *buf_addr;
+  const void *data = *buf_addr;
+  size_t write_total = 0;
+
+  assert(len <= STREAM_BUFFER_SIZE);
+
+  for (;;) {
+    const size_t written = fwrite(data, 1, len - write_total, stream);
+
+    if (written == 0) {
+      /* Re-tries write in case of EINTR. */
+      if (errno != EINTR) {
+        /* Will be freed as whole chunk later. */
+        *buf_addr = NULL;
+        return write_total;
+      }
+      errno = 0;
+      continue;
+    }
+
+    write_total += written;
+    assert(write_total <= len);
+
+    if (write_total == len)
+      break;
+
+    data = (uint8_t *)data + (ptrdiff_t)written;
+  }
+
+  *buf_addr = buf_start;
+  return write_total;
+}
+
+/* Default on stop callback. Just close the corresponding stream. */
+static int on_stop_cb_default(void *opt, uint8_t *buf)
+{
+  struct sysprof_ctx *ctx = opt;
+  FILE *stream = ctx->stream;
+  free(ctx);
+  return fclose(stream);
+}
+
+static int stream_init(struct luam_sysprof_options *opt)
+{
+  struct sysprof_ctx *ctx = calloc(1, sizeof(struct sysprof_ctx));
+  if (NULL == ctx) {
+    return SYSPROF_ERRIO;
+  }
+
+  ctx->stream = fopen("/dev/null", "wb");
+  if (NULL == ctx->stream) {
+    free(ctx);
+    return SYSPROF_ERRIO;
+  }
+
+  opt->ctx = ctx;
+  opt->buf = ctx->buf;
+  opt->len = STREAM_BUFFER_SIZE;
+
+  return SYSPROF_SUCCESS;
+}
+
+/* --- Payload ------------------------------------------------------------ */
+
+static double fib(double n)
+{
+  if (n <= 1) {
+    return n;
+  }
+  return fib(n - 1) + fib(n - 2);
+}
+
+static int c_payload(lua_State *L)
+{
+  fib(luaL_checknumber(L, 1));
+}
+
+/* --- sysprof C API tests ------------------------------------------------ */
+
+static int base(lua_State *L)
+{
+  struct luam_sysprof_config config = {};
+  (void)config.writer;
+  (void)config.on_stop;
+  (void)config.backtracer;
+
+  struct luam_sysprof_options opt = {};
+  (void)opt.interval;
+  (void)opt.mode;
+  (void)opt.ctx;
+  (void)opt.buf;
+  (void)opt.len;
+
+  struct luam_sysprof_counters cnt = {};
+  luaM_sysprof_report(&cnt);
+
+  (void)cnt.samples;
+  (void)cnt.overruns;
+  (void)cnt.vmst_interp;
+  (void)cnt.vmst_lfunc;
+  (void)cnt.vmst_ffunc;
+  (void)cnt.vmst_cfunc;
+  (void)cnt.vmst_gc;
+  (void)cnt.vmst_exit;
+  (void)cnt.vmst_record;
+  (void)cnt.vmst_opt;
+  (void)cnt.vmst_asm;
+  (void)cnt.vmst_trace;
+
+  lua_pushboolean(L, 1);
+  return 1;
+}
+
+static int validation(lua_State *L)
+{
+  struct luam_sysprof_config config = {};
+  struct luam_sysprof_options opt = {};
+  int status = SYSPROF_SUCCESS;
+
+  status = luaM_sysprof_configure(&config);
+  assert(SYSPROF_SUCCESS == status);
+
+  /* Unknown mode */
+  opt.mode = 0x40;
+  status = luaM_sysprof_start(L, &opt);
+  assert(SYSPROF_ERRUSE == status);
+
+  /* Buffer not configured */
+  opt.mode = LUAM_SYSPROF_CALLGRAPH;
+  opt.buf = NULL;
+  status = luaM_sysprof_start(L, &opt);
+  assert(SYSPROF_ERRUSE == status);
+
+  /* Bad interval */
+  opt.mode = LUAM_SYSPROF_DEFAULT;
+  opt.interval = 0;
+  status = luaM_sysprof_start(L, &opt);
+  assert(SYSPROF_ERRUSE == status);
+
+  /* Check if profiling started */
+  opt.mode = LUAM_SYSPROF_DEFAULT;
+  opt.interval = SYSPROF_INTERVAL_DEFAULT;
+  status = luaM_sysprof_start(L, &opt);
+  assert(SYSPROF_SUCCESS == status);
+
+  /* Already running */
+  status = luaM_sysprof_start(L, &opt);
+  assert(SYSPROF_ERRRUN == status);
+
+  /* Profiler stopping */
+  status = luaM_sysprof_stop(L);
+  assert(SYSPROF_SUCCESS == status);
+
+  /* Stopping profiler which is not running */
+  status = luaM_sysprof_stop(L);
+  assert(SYSPROF_ERRSTOP == status);
+
+  lua_pushboolean(L, 1);
+  return 1;
+}
+
+static int profile_func(lua_State *L)
+{
+  struct luam_sysprof_config config = {};
+  struct luam_sysprof_options opt = {};
+  struct luam_sysprof_counters cnt = {};
+  int status = SYSPROF_ERRUSE;
+
+  int n = lua_gettop(L);
+  if (n != 1 || !lua_isfunction(L, 1)) {
+    luaL_error(L, "incorrect argument: 1 function is required");
+  }
+
+  /*
+  ** Since all the other modes functionality is the
+  ** subset of CALLGRAPH mode, run this mode to test
+  ** the profiler's behavior.
+  */
+  opt.mode = LUAM_SYSPROF_CALLGRAPH;
+  opt.interval = SYSPROF_INTERVAL_DEFAULT;
+  stream_init(&opt);
+
+  config.on_stop = on_stop_cb_default;
+  config.writer = buffer_writer_default;
+  status = luaM_sysprof_configure(&config);
+  assert(SYSPROF_SUCCESS == status);
+
+  status = luaM_sysprof_start(L, &opt);
+  assert(SYSPROF_SUCCESS == status);
+
+  /* Run payload. */
+  if (lua_pcall(L, 0, 0, 0) != 0) {
+    luaL_error(L, "error running payload: %s", lua_tostring(L, -1));
+  }
+
+  status = luaM_sysprof_stop(L);
+  assert(SYSPROF_SUCCESS == status);
+
+  status = luaM_sysprof_report(&cnt);
+  assert(SYSPROF_SUCCESS == status);
+
+  assert(cnt.samples > 1);
+  assert(cnt.samples == cnt.vmst_asm +
+                        cnt.vmst_cfunc +
+                        cnt.vmst_exit +
+                        cnt.vmst_ffunc +
+                        cnt.vmst_gc +
+                        cnt.vmst_interp +
+                        cnt.vmst_lfunc +
+                        cnt.vmst_opt +
+                        cnt.vmst_record +
+                        cnt.vmst_trace);
+
+  lua_pushboolean(L, 1);
+  return 1;
+}
+
+static const struct luaL_Reg testsysprof[] = {
+  {"c_payload", c_payload},
+  {"base", base},
+  {"validation", validation},
+  {"profile_func", profile_func},
+  {NULL, NULL}
+};
+
+LUA_API int luaopen_testsysprof(lua_State *L)
+{
+  luaL_register(L, "testsysprof", testsysprof);
+  return 1;
+}
-- 
2.35.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH luajit v3 5/7] memprof: add profile common section
  2022-04-06 12:49 [Tarantool-patches] [PATCH luajit v3 0/7] introuduce platform profiler Maxim Kokryashkin via Tarantool-patches
                   ` (3 preceding siblings ...)
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 4/7] core: introduce lua and platform profiler Maxim Kokryashkin via Tarantool-patches
@ 2022-04-06 12:49 ` Maxim Kokryashkin via Tarantool-patches
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 6/7] sysprof: introduce Lua API Maxim Kokryashkin via Tarantool-patches
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 7/7] tools: introduce parsers for sysprof Maxim Kokryashkin via Tarantool-patches
  6 siblings, 0 replies; 9+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2022-04-06 12:49 UTC (permalink / raw)
  To: tarantool-patches, imun, skaplun

A Lua API for sysprof needs to be introduced, but sysprof's C API is
quite similar with memprof's. Considering this, there are some
structures and functions that should be common among memprof's and
sysprof's implementations of Lua API to avoid duplication.

Part of tarantool/tarantool#781
---
 src/lib_misc.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/lib_misc.c b/src/lib_misc.c
index 1dab08cc..22d29d78 100644
--- a/src/lib_misc.c
+++ b/src/lib_misc.c
@@ -8,8 +8,9 @@
 #define lib_misc_c
 #define LUA_LIB
 
-#include <stdio.h>
 #include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
 
 #include "lua.h"
 #include "lmisclib.h"
@@ -75,9 +76,7 @@ LJLIB_CF(misc_getmetrics)
 
 #include "lj_libdef.h"
 
-/* ----- misc.memprof module ---------------------------------------------- */
-
-#define LJLIB_MODULE_misc_memprof
+/* --------- profile common section --------------------------------------- */
 
 /*
 ** Yep, 8Mb. Tuned in order not to bother the platform with too often flushes.
@@ -85,9 +84,9 @@ LJLIB_CF(misc_getmetrics)
 #define STREAM_BUFFER_SIZE (8 * 1024 * 1024)
 
 /* Structure given as ctx to memprof writer and on_stop callback. */
-struct memprof_ctx {
-  /* Output file stream for data. */
-  FILE *stream;
+struct profile_ctx {
+  /* Output file descriptor for data. */
+  int fd;
   /* Profiled global_State for lj_mem_free at on_stop callback. */
   global_State *g;
   /* Buffer for data. */
@@ -96,13 +95,13 @@ struct memprof_ctx {
 
 /*
 ** Default buffer writer function.
-** Just call fwrite to the corresponding FILE.
+** Just call write to the corresponding descriptor.
 */
 static size_t buffer_writer_default(const void **buf_addr, size_t len,
 				    void *opt)
 {
-  struct memprof_ctx *ctx = opt;
-  FILE *stream = ctx->stream;
+  struct profile_ctx *ctx = opt;
+  const int fd = ctx->fd;
   const void * const buf_start = *buf_addr;
   const void *data = *buf_addr;
   size_t write_total = 0;
@@ -110,9 +109,9 @@ static size_t buffer_writer_default(const void **buf_addr, size_t len,
   lua_assert(len <= STREAM_BUFFER_SIZE);
 
   for (;;) {
-    const size_t written = fwrite(data, 1, len - write_total, stream);
+    const size_t written = write(fd, data, len - write_total);
 
-    if (LJ_UNLIKELY(written == 0)) {
+    if (LJ_UNLIKELY(written == -1)) {
       /* Re-tries write in case of EINTR. */
       if (errno != EINTR) {
 	/* Will be freed as whole chunk later. */
@@ -137,22 +136,25 @@ static size_t buffer_writer_default(const void **buf_addr, size_t len,
   return write_total;
 }
 
-/* Default on stop callback. Just close the corresponding stream. */
+/* Default on stop callback. Just close the corresponding descriptor. */
 static int on_stop_cb_default(void *opt, uint8_t *buf)
 {
-  struct memprof_ctx *ctx = opt;
-  FILE *stream = ctx->stream;
+  struct profile_ctx *ctx = opt;
+  const int fd = ctx->fd;
   UNUSED(buf);
   lj_mem_free(ctx->g, ctx, sizeof(*ctx));
-  return fclose(stream);
+  return close(fd);
 }
 
+/* ----- misc.memprof module ---------------------------------------------- */
+
+#define LJLIB_MODULE_misc_memprof
 /* local started, err, errno = misc.memprof.start(fname) */
 LJLIB_CF(misc_memprof_start)
 {
   struct lj_memprof_options opt = {0};
   const char *fname = strdata(lj_lib_checkstr(L, 1));
-  struct memprof_ctx *ctx;
+  struct profile_ctx *ctx;
   int memprof_status;
 
   /*
-- 
2.35.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH luajit v3 6/7] sysprof: introduce Lua API
  2022-04-06 12:49 [Tarantool-patches] [PATCH luajit v3 0/7] introuduce platform profiler Maxim Kokryashkin via Tarantool-patches
                   ` (4 preceding siblings ...)
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 5/7] memprof: add profile common section Maxim Kokryashkin via Tarantool-patches
@ 2022-04-06 12:49 ` Maxim Kokryashkin via Tarantool-patches
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 7/7] tools: introduce parsers for sysprof Maxim Kokryashkin via Tarantool-patches
  6 siblings, 0 replies; 9+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2022-04-06 12:49 UTC (permalink / raw)
  To: tarantool-patches, imun, skaplun

This commit introduces Lua API for sysprof, so it is now possible to
call it from anywhere in a Lua script. All of the parameters are passed
inside a single Lua table.
The following options are supported:
- mode:     "D" (Default mode, only virtual machine state counters)
            "L" (Leaf mode, shows the last frame on the stack)
            "C" (Callchain mode, full stack dump)
- interval: time between samples in milliseconds
- path:     Path to file in which profiling data should be stored

Part of tarantool/tarantool#781
---
 src/lib_misc.c                                | 238 +++++++++++++++++-
 .../misclib-sysprof-lapi.test.lua             | 118 +++++++++
 2 files changed, 348 insertions(+), 8 deletions(-)
 create mode 100644 test/tarantool-tests/misclib-sysprof-lapi.test.lua

diff --git a/src/lib_misc.c b/src/lib_misc.c
index 22d29d78..e93cf55b 100644
--- a/src/lib_misc.c
+++ b/src/lib_misc.c
@@ -109,14 +109,14 @@ static size_t buffer_writer_default(const void **buf_addr, size_t len,
   lua_assert(len <= STREAM_BUFFER_SIZE);
 
   for (;;) {
-    const size_t written = write(fd, data, len - write_total);
+    const ssize_t written = write(fd, data, len - write_total);
 
     if (LJ_UNLIKELY(written == -1)) {
       /* Re-tries write in case of EINTR. */
       if (errno != EINTR) {
-	/* Will be freed as whole chunk later. */
-	*buf_addr = NULL;
-	return write_total;
+  /* Will be freed as whole chunk later. */
+  *buf_addr = NULL;
+  return write_total;
       }
 
       errno = 0;
@@ -139,16 +139,236 @@ static size_t buffer_writer_default(const void **buf_addr, size_t len,
 /* Default on stop callback. Just close the corresponding descriptor. */
 static int on_stop_cb_default(void *opt, uint8_t *buf)
 {
-  struct profile_ctx *ctx = opt;
-  const int fd = ctx->fd;
+  struct profile_ctx *ctx = NULL;
+  int fd = 0;
+
+  if (opt == NULL) {
+    /* Nothing to do. */
+    return 0;
+  }
+
+  ctx = opt;
+  fd = ctx->fd;
   UNUSED(buf);
   lj_mem_free(ctx->g, ctx, sizeof(*ctx));
   return close(fd);
 }
 
+/* ----- misc.sysprof module ---------------------------------------------- */
+
+#define LJLIB_MODULE_misc_sysprof
+
+/* The default profiling interval equals to 11 ms. */
+#define SYSPROF_DEFAULT_INTERVAL 10
+#define SYSPROF_DEFAULT_OUTPUT "sysprof.bin"
+
+int set_output_path(const char *path, struct luam_sysprof_options *opt) {
+  struct profile_ctx *ctx = opt->ctx;
+  int fd = 0;
+  lua_assert(path != NULL);
+  fd = open(path, O_CREAT | O_WRONLY | O_TRUNC, 0644);
+  if(fd == -1) {
+    return SYSPROF_ERRIO;
+  }
+  ctx->fd = fd;
+  return SYSPROF_SUCCESS;
+}
+
+int parse_sysprof_opts(lua_State *L, struct luam_sysprof_options *opt, int idx) {
+  GCtab *options = lj_lib_checktab(L, idx);
+
+  /* Get profiling mode. */
+  {
+    const char *mode = NULL;
+
+    cTValue *mode_opt = lj_tab_getstr(options, lj_str_newlit(L, "mode"));
+    if (!mode_opt || !tvisstr(mode_opt)) {
+      return SYSPROF_ERRUSE;
+    }
+
+    mode = strVdata(mode_opt);
+    if (mode[1] != '\0')
+      return SYSPROF_ERRUSE;
+
+    switch (*mode) {
+      case 'D':
+        opt->mode = LUAM_SYSPROF_DEFAULT;
+        break;
+      case 'L':
+        opt->mode = LUAM_SYSPROF_LEAF;
+        break;
+      case 'C':
+        opt->mode = LUAM_SYSPROF_CALLGRAPH;
+        break;
+      default:
+        return SYSPROF_ERRUSE;
+    }
+  }
+
+  /* Get profiling interval. */
+  {
+    cTValue *interval = lj_tab_getstr(options, lj_str_newlit(L, "interval"));
+    opt->interval = SYSPROF_DEFAULT_INTERVAL;
+    if (interval && tvisnum(interval)) {
+      int32_t signed_interval = numberVint(interval);
+      if (signed_interval < 1)
+        return SYSPROF_ERRUSE;
+      opt->interval = signed_interval;
+    }
+  }
+
+  /* Get output path. */
+  if (opt->mode != LUAM_SYSPROF_DEFAULT)
+  {
+    const char *path = NULL;
+    struct profile_ctx *ctx = NULL;
+    int status = 0;
+
+    cTValue *pathtv = lj_tab_getstr(options, lj_str_newlit(L, "path"));
+    if (!pathtv)
+      path = SYSPROF_DEFAULT_OUTPUT;
+    else if (!tvisstr(pathtv))
+      return SYSPROF_ERRUSE;
+    else
+      path = strVdata(pathtv);
+
+    ctx = lj_mem_new(L, sizeof(*ctx));
+    ctx->g = G(L);
+    opt->ctx = ctx;
+    opt->buf = ctx->buf;
+    opt->len = STREAM_BUFFER_SIZE;
+
+    status = set_output_path(path, opt);
+    if (status != SYSPROF_SUCCESS) {
+      lj_mem_free(ctx->g, ctx, sizeof(*ctx));
+      return status;
+    }
+  }
+
+  return SYSPROF_SUCCESS;
+}
+
+int parse_options(lua_State *L, struct luam_sysprof_options *opt)
+{
+  if (lua_gettop(L) != 1)
+    return SYSPROF_ERRUSE;
+
+  if (!lua_istable(L, 1))
+    return SYSPROF_ERRUSE;
+
+  return parse_sysprof_opts(L, opt, 1);
+}
+
+int sysprof_error(lua_State *L, int status)
+{
+  switch (status) {
+    case SYSPROF_ERRUSE:
+      lua_pushnil(L);
+      lua_pushstring(L, err2msg(LJ_ERR_PROF_MISUSE));
+      lua_pushinteger(L, EINVAL);
+      return 3;
+    case SYSPROF_ERRRUN:
+      lua_pushnil(L);
+      lua_pushstring(L, err2msg(LJ_ERR_PROF_ISRUNNING));
+      lua_pushinteger(L, EINVAL);
+      return 3;
+    case SYSPROF_ERRSTOP:
+      lua_pushnil(L);
+      lua_pushstring(L, err2msg(LJ_ERR_PROF_NOTRUNNING));
+      lua_pushinteger(L, EINVAL);
+      return 3;
+    case SYSPROF_ERRIO:
+      return luaL_fileresult(L, 0, NULL);
+    default:
+      lua_assert(0);
+      return 0;
+  }
+}
+
+/* local res, err, errno = sysprof.start(options) */
+LJLIB_CF(misc_sysprof_start)
+{
+  int status = SYSPROF_SUCCESS;
+
+  struct luam_sysprof_options opt = {};
+  struct luam_sysprof_config conf = {};
+
+
+  status = parse_options(L, &opt);
+  if (LJ_UNLIKELY(status != PROFILE_SUCCESS)) {
+    return sysprof_error(L, status);
+  }
+
+  conf.writer = buffer_writer_default;
+  conf.on_stop = on_stop_cb_default;
+  conf.backtracer = NULL;
+
+  status = luaM_sysprof_configure(&conf);
+  if (LJ_UNLIKELY(status != PROFILE_SUCCESS)) {
+    on_stop_cb_default(opt.ctx, opt.buf);
+    return sysprof_error(L, status);
+  }
+
+  status = luaM_sysprof_start(L, &opt);
+  if (LJ_UNLIKELY(status != PROFILE_SUCCESS))
+    /* Allocated memory will be freed in on_stop callback. */
+    return sysprof_error(L, status);
+
+  lua_pushboolean(L, 1);
+  return 1;
+}
+
+/* local res, err, errno = profile.sysprof_stop() */
+LJLIB_CF(misc_sysprof_stop)
+{
+  int status = luaM_sysprof_stop(L);
+  if (LJ_UNLIKELY(status != PROFILE_SUCCESS))
+    return sysprof_error(L, status);
+
+  lua_pushboolean(L, 1);
+  return 1;
+}
+
+/* local counters, err, errno = sysprof.report() */
+LJLIB_CF(misc_sysprof_report)
+{
+  struct luam_sysprof_counters counters = {};
+  GCtab *data_tab = NULL;
+  GCtab *count_tab = NULL;
+
+  int status = luaM_sysprof_report(&counters);
+  if (status != SYSPROF_SUCCESS)
+    return sysprof_error(L, status);
+
+  lua_createtable(L, 0, 3);
+  data_tab = tabV(L->top - 1);
+
+  setnumfield(L, data_tab, "samples", counters.samples);
+  setnumfield(L, data_tab, "overruns", counters.overruns);
+
+  lua_createtable(L, 0, LJ_VMST__MAX + 1);
+  count_tab = tabV(L->top - 1);
+
+  setnumfield(L, count_tab, "INTERP", counters.vmst_interp);
+  setnumfield(L, count_tab, "LFUNC",  counters.vmst_lfunc);
+  setnumfield(L, count_tab, "FFUNC",  counters.vmst_ffunc);
+  setnumfield(L, count_tab, "CFUNC",  counters.vmst_cfunc);
+  setnumfield(L, count_tab, "GC",     counters.vmst_gc);
+  setnumfield(L, count_tab, "EXIT",   counters.vmst_exit);
+  setnumfield(L, count_tab, "RECORD", counters.vmst_record);
+  setnumfield(L, count_tab, "OPT",    counters.vmst_opt);
+  setnumfield(L, count_tab, "ASM",    counters.vmst_asm);
+  setnumfield(L, count_tab, "TRACE",  counters.vmst_trace);
+
+  lua_setfield(L, -2, "vmstate");
+
+  return 1;
+}
+
 /* ----- misc.memprof module ---------------------------------------------- */
 
 #define LJLIB_MODULE_misc_memprof
+
 /* local started, err, errno = misc.memprof.start(fname) */
 LJLIB_CF(misc_memprof_start)
 {
@@ -169,9 +389,9 @@ LJLIB_CF(misc_memprof_start)
   opt.len = STREAM_BUFFER_SIZE;
 
   ctx->g = G(L);
-  ctx->stream = fopen(fname, "wb");
+  ctx->fd = open(fname, O_CREAT | O_WRONLY | O_TRUNC);
 
-  if (ctx->stream == NULL) {
+  if (ctx->fd == -1) {
     lj_mem_free(ctx->g, ctx, sizeof(*ctx));
     return luaL_fileresult(L, 0, fname);
   }
@@ -240,5 +460,7 @@ LUALIB_API int luaopen_misc(struct lua_State *L)
 {
   LJ_LIB_REG(L, LUAM_MISCLIBNAME, misc);
   LJ_LIB_REG(L, LUAM_MISCLIBNAME ".memprof", misc_memprof);
+  LJ_LIB_REG(L, LUAM_MISCLIBNAME ".sysprof", misc_sysprof);
+
   return 1;
 }
diff --git a/test/tarantool-tests/misclib-sysprof-lapi.test.lua b/test/tarantool-tests/misclib-sysprof-lapi.test.lua
new file mode 100644
index 00000000..b4c9cb7b
--- /dev/null
+++ b/test/tarantool-tests/misclib-sysprof-lapi.test.lua
@@ -0,0 +1,118 @@
+-- Sysprof is implemented for x86 and x64 architectures only.
+require("utils").skipcond(
+  jit.arch ~= "x86" and jit.arch ~= "x64",
+  jit.arch.." architecture is NIY for sysprof"
+)
+
+local tap = require("tap")
+
+local test = tap.test("misc-sysprof-lapi")
+test:plan(14)
+
+jit.off()
+jit.flush()
+
+local bufread = require("utils.bufread")
+local symtab = require("utils.symtab")
+
+local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.sysprofdata.tmp.bin")
+local BAD_PATH = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%1/sysprofdata.tmp.bin")
+
+local function payload()
+  local function fib(n)
+    if n <= 1 then
+      return n
+    end
+    return fib(n - 1) + fib(n - 2)
+  end
+  return fib(32)
+end
+
+local function generate_output(opts)
+  local res, err = misc.sysprof.start(opts)
+  assert(res, err)
+
+  payload()
+
+  res,err = misc.sysprof.stop()
+  assert(res, err)
+end
+
+local function check_mode(mode, interval)
+  local res = pcall(
+    generate_output,
+    { mode = mode, interval = interval, path = TMP_BINFILE }
+  )
+
+  if not res then
+    test:fail(mode .. ' mode with interval ' .. interval)
+    os.remove(TMP_BINFILE)
+  end
+
+  local reader = bufread.new(TMP_BINFILE)
+  symtab.parse(reader)
+end
+
+-- GENERAL
+
+-- Wrong profiling mode.
+local res, err, errno = misc.sysprof.start{ mode = "A" }
+test:ok(res == nil and err:match("profiler misuse"))
+test:ok(type(errno) == "number")
+
+-- Already running.
+res, err = misc.sysprof.start{ mode = "D" }
+assert(res, err)
+
+res, err, errno = misc.sysprof.start{ mode = "D" }
+test:ok(res == nil and err:match("profiler misuse"))
+test:ok(type(errno) == "number")
+
+res, err = misc.sysprof.stop()
+assert(res, err)
+
+-- Not running.
+res, err, errno = misc.sysprof.stop()
+test:ok(res == nil and err:match("profiler is not running"))
+test:ok(type(errno) == "number")
+
+-- Bad path.
+res, err, errno = misc.sysprof.start({ mode = "C", path = BAD_PATH })
+test:ok(res == nil and err:match("No such file or directory"))
+test:ok(type(errno) == "number")
+
+-- Bad interval.
+res, err, errno = misc.sysprof.start{ mode = "C", interval = -1 }
+test:ok(res == nil and err:match("profiler misuse"))
+test:ok(type(errno) == "number")
+
+-- DEFAULT MODE
+
+if not pcall(generate_output, { mode = "D", interval = 11 }) then
+  test:fail('`default` mode with interval 11')
+end
+
+local report = misc.sysprof.report()
+
+test:ok(report.samples > 0)
+test:ok(report.vmstate.LFUNC > 0)
+test:ok(report.vmstate.TRACE == 0)
+
+-- With very big interval.
+if not pcall(generate_output, { mode = "D", interval = 1000 }) then
+  test:fail('`default` mode with interval 1000')
+end
+
+report = misc.sysprof.report()
+test:ok(report.samples == 0)
+
+-- LEAF MODE
+check_mode("L", 11)
+
+-- CALL MODE
+check_mode("C", 11)
+
+os.remove(TMP_BINFILE)
+
+jit.on()
+os.exit(test:check() and 0 or 1)
-- 
2.35.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Tarantool-patches] [PATCH luajit v3 7/7] tools: introduce parsers for sysprof
  2022-04-06 12:49 [Tarantool-patches] [PATCH luajit v3 0/7] introuduce platform profiler Maxim Kokryashkin via Tarantool-patches
                   ` (5 preceding siblings ...)
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 6/7] sysprof: introduce Lua API Maxim Kokryashkin via Tarantool-patches
@ 2022-04-06 12:49 ` Maxim Kokryashkin via Tarantool-patches
  6 siblings, 0 replies; 9+ messages in thread
From: Maxim Kokryashkin via Tarantool-patches @ 2022-04-06 12:49 UTC (permalink / raw)
  To: tarantool-patches, imun, skaplun

Since the sysprof's binary output is not human-readable, so there is a
demand to create a parser. The parser, which this commit provides,
converts sysprof's event stream into the format that the flamegraph.pl
can process.

The sysprof parser machinery uses the same symtab module as memprof's
parser since the format is the same.

Part of tarantool/tarantool#781
---
 .gitignore                                    |   1 +
 .../misclib-sysprof-lapi.test.lua             |   2 +
 tools/CMakeLists.txt                          |  83 ++++++++
 tools/luajit-parse-sysprof.in                 |   6 +
 tools/sysprof.lua                             | 119 +++++++++++
 tools/sysprof/collapse.lua                    | 113 +++++++++++
 tools/sysprof/parse.lua                       | 188 ++++++++++++++++++
 tools/utils/symtab.lua                        |   2 +-
 8 files changed, 513 insertions(+), 1 deletion(-)
 create mode 100644 tools/luajit-parse-sysprof.in
 create mode 100644 tools/sysprof.lua
 create mode 100755 tools/sysprof/collapse.lua
 create mode 100755 tools/sysprof/parse.lua

diff --git a/.gitignore b/.gitignore
index 2103a30f..099df060 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,4 +19,5 @@ cmake_uninstall.cmake
 compile_commands.json
 install_manifest.txt
 luajit-parse-memprof
+luajit-parse-sysprof
 luajit.pc
diff --git a/test/tarantool-tests/misclib-sysprof-lapi.test.lua b/test/tarantool-tests/misclib-sysprof-lapi.test.lua
index b4c9cb7b..01403d18 100644
--- a/test/tarantool-tests/misclib-sysprof-lapi.test.lua
+++ b/test/tarantool-tests/misclib-sysprof-lapi.test.lua
@@ -14,6 +14,7 @@ jit.flush()
 
 local bufread = require("utils.bufread")
 local symtab = require("utils.symtab")
+local sysprof = require("sysprof.parse")
 
 local TMP_BINFILE = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%.%1.sysprofdata.tmp.bin")
 local BAD_PATH = arg[0]:gsub(".+/([^/]+)%.test%.lua$", "%1/sysprofdata.tmp.bin")
@@ -51,6 +52,7 @@ local function check_mode(mode, interval)
 
   local reader = bufread.new(TMP_BINFILE)
   symtab.parse(reader)
+  sysprof.parse(reader)
 end
 
 -- GENERAL
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 61830e44..93a2f763 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -87,4 +87,87 @@ else()
   )
 endif()
 
+
+
+if(LUAJIT_DISABLE_SYSPROF)
+  message(STATUS "LuaJIT system profiler support is disabled")
+else()
+  # XXX: Can use genex here since the value need to be evaluated
+  # at the configuration phase. Fortunately, we know the exact
+  # path where LuaJIT binary is located.
+  set(LUAJIT_TOOLS_BIN ${LUAJIT_BINARY_DIR}/${LUAJIT_CLI_NAME})
+  set(LUAJIT_TOOLS_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+  # XXX: Unfortunately, there is no convenient way to set
+  # particular permissions to the output file via CMake.
+  # Furthermore, I even failed to copy the given file to the same
+  # path to change its permissions. After looking at the docs, I
+  # realized that the valid solution would be too monstrous for
+  # such a simple task. As a result I've made the template itself
+  # executable, so the issue is resolved.
+  configure_file(luajit-parse-sysprof.in luajit-parse-sysprof @ONLY ESCAPE_QUOTES)
+
+
+  add_custom_target(tools-parse-sysprof EXCLUDE_FROM_ALL DEPENDS
+    luajit-parse-sysprof
+    sysprof/parse.lua
+    sysprof/collapse.lua
+    sysprof.lua
+    utils/bufread.lua
+    utils/symtab.lua
+  )
+  list(APPEND LUAJIT_TOOLS_DEPS tools-parse-sysprof)
+
+  install(FILES
+      ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/parse.lua
+      ${CMAKE_CURRENT_SOURCE_DIR}/sysprof/collapse.lua
+    DESTINATION ${LUAJIT_DATAROOTDIR}/sysprof
+    PERMISSIONS
+      OWNER_READ OWNER_WRITE
+      GROUP_READ
+      WORLD_READ
+    COMPONENT tools-parse-sysprof
+  )
+  install(FILES
+      ${CMAKE_CURRENT_SOURCE_DIR}/utils/bufread.lua
+      ${CMAKE_CURRENT_SOURCE_DIR}/utils/symtab.lua
+    DESTINATION ${LUAJIT_DATAROOTDIR}/utils
+    PERMISSIONS
+      OWNER_READ OWNER_WRITE
+      GROUP_READ
+      WORLD_READ
+    COMPONENT tools-parse-sysprof
+  )
+  install(FILES
+      ${CMAKE_CURRENT_SOURCE_DIR}/sysprof.lua
+    DESTINATION ${LUAJIT_DATAROOTDIR}
+    PERMISSIONS
+      OWNER_READ OWNER_WRITE
+      GROUP_READ
+      WORLD_READ
+    COMPONENT tools-parse-sysprof
+  )
+  install(CODE
+    # XXX: The auxiliary script needs to be configured for to be
+    # used in repository directly. Furthermore, it needs to be
+    # reconfigured prior to its installation. The temporary
+    # <configure_file> output is stored to the project build
+    # directory and removed later after being installed. This
+    # script will have gone as a result of the issue:
+    # https://github.com/tarantool/tarantool/issues/5688.
+    "
+      set(LUAJIT_TOOLS_BIN ${CMAKE_INSTALL_PREFIX}/bin/${LUAJIT_CLI_NAME})
+      set(LUAJIT_TOOLS_DIR ${CMAKE_INSTALL_PREFIX}/${LUAJIT_DATAROOTDIR})
+      configure_file(${CMAKE_CURRENT_SOURCE_DIR}/luajit-parse-sysprof.in
+        ${PROJECT_BINARY_DIR}/luajit-parse-sysprof @ONLY ESCAPE_QUOTES)
+      file(INSTALL ${PROJECT_BINARY_DIR}/luajit-parse-sysprof
+        DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
+        USE_SOURCE_PERMISSIONS
+      )
+      file(REMOVE ${PROJECT_BINARY_DIR}/luajit-parse-sysprof)
+    "
+    COMPONENT tools-parse-sysprof
+  )
+endif()
+
 add_custom_target(LuaJIT-tools DEPENDS ${LUAJIT_TOOLS_DEPS})
+
diff --git a/tools/luajit-parse-sysprof.in b/tools/luajit-parse-sysprof.in
new file mode 100644
index 00000000..2be25eb3
--- /dev/null
+++ b/tools/luajit-parse-sysprof.in
@@ -0,0 +1,6 @@
+#!/bin/bash
+#
+# Launcher for sysprof parser.
+
+LUA_PATH="@LUAJIT_TOOLS_DIR@/?.lua;;" \
+  @LUAJIT_TOOLS_BIN@ @LUAJIT_TOOLS_DIR@/sysprof.lua $@
diff --git a/tools/sysprof.lua b/tools/sysprof.lua
new file mode 100644
index 00000000..e6d8cc34
--- /dev/null
+++ b/tools/sysprof.lua
@@ -0,0 +1,119 @@
+local bufread = require "utils.bufread"
+local sysprof = require "sysprof.parse"
+local symtab = require "utils.symtab"
+local misc = require "sysprof.collapse"
+
+local stdout, stderr = io.stdout, io.stderr
+local match, gmatch = string.match, string.gmatch
+
+local split_by_vmstate = false
+
+-- Program options.
+local opt_map = {}
+
+function opt_map.help()
+  stdout:write [[
+luajit-parse-sysprof - parser of the profile collected
+                       with LuaJIT's sysprof.
+
+SYNOPSIS
+
+luajit-parse-sysprof [options] sysprof.bin
+
+Supported options are:
+
+  --help                            Show this help and exit
+  --split                           Split callchains by vmstate
+]]
+  os.exit(0)
+end
+
+function opt_map.split()
+  split_by_vmstate = true
+end
+
+-- Print error and exit with error status.
+local function opterror(...)
+  stderr:write("luajit-parse-sysprof.lua: ERROR: ", ...)
+  stderr:write("\n")
+  os.exit(1)
+end
+
+-- Parse single option.
+local function parseopt(opt, args)
+  local opt_current = #opt == 1 and "-"..opt or "--"..opt
+  local f = opt_map[opt]
+  if not f then
+    opterror("unrecognized option `", opt_current, "'. Try `--help'.\n")
+  end
+  f(args)
+end
+
+-- Parse arguments.
+local function parseargs(args)
+  -- Process all option arguments.
+  args.argn = 1
+  repeat
+    local a = args[args.argn]
+    if not a then
+      break
+    end
+    local lopt, opt = match(a, "^%-(%-?)(.+)")
+    if not opt then
+      break
+    end
+    args.argn = args.argn + 1
+    if lopt == "" then
+      -- Loop through short options.
+      for o in gmatch(opt, ".") do
+        parseopt(o, args)
+      end
+    else
+      -- Long option.
+      parseopt(opt, args)
+    end
+  until false
+
+  -- Check for proper number of arguments.
+  local nargs = #args - args.argn + 1
+  if nargs ~= 1 then
+    opt_map.help()
+  end
+
+  -- Translate a single input file.
+  -- TODO: Handle multiple files?
+  return args[args.argn]
+end
+
+local function traverse_calltree(node, prefix)
+  if node.is_leaf then
+    print(prefix..' '..node.count)
+  end
+
+  local sep_prefix = #prefix == 0 and prefix or prefix..';'
+
+  for name,child in pairs(node.children) do
+    traverse_calltree(child, sep_prefix..name)
+  end
+end
+
+local function dump(inputfile)
+  local reader = bufread.new(inputfile)
+
+  local symbols = symtab.parse(reader)
+
+  local events = sysprof.parse(reader)
+  local calltree = misc.collapse(events, symbols, split_by_vmstate)
+
+  traverse_calltree(calltree, '')
+
+  os.exit(0)
+end
+
+-- FIXME: this script should be application-independent.
+local args = {...}
+if #args == 1 and args[1] == "sysprof" then
+  return dump
+else
+  dump(parseargs(args))
+end
diff --git a/tools/sysprof/collapse.lua b/tools/sysprof/collapse.lua
new file mode 100755
index 00000000..a123e6bd
--- /dev/null
+++ b/tools/sysprof/collapse.lua
@@ -0,0 +1,113 @@
+local parse = require "sysprof.parse"
+local vmdef = require "jit.vmdef"
+local symtab = require "utils.symtab"
+
+local VMST_NAMES = {
+  [parse.VMST.INTERP] = "VMST_INTERP",
+  [parse.VMST.LFUNC]  = "VMST_LFUNC",
+  [parse.VMST.FFUNC]  = "VMST_FFUNC",
+  [parse.VMST.CFUNC]  = "VMST_CFUNC",
+  [parse.VMST.GC]     = "VMST_GC",
+  [parse.VMST.EXIT]   = "VMST_EXIT",
+  [parse.VMST.RECORD] = "VMST_RECORD",
+  [parse.VMST.OPT]    = "VMST_OPT",
+  [parse.VMST.ASM]    = "VMST_ASM",
+  [parse.VMST.TRACE]  = "VMST_TRACE",
+}
+
+local M = {}
+
+local function new_node(name, is_leaf)
+  return {
+    name = name,
+    count = 0,
+    is_leaf = is_leaf,
+    children = {}
+  }
+end
+
+-- insert new child into a node (or increase counter in existing one)
+local function insert(name, node, is_leaf)
+  if node.children[name] == nil then
+    node.children[name] = new_node(name, is_leaf)
+  end
+
+  local child = node.children[name]
+  child.count = child.count + 1
+
+  return child
+end
+
+local function insert_lua_callchain(chain, lua, symbols)
+  for _,fr in pairs(lua.callchain) do
+    local name_lua
+
+    if fr.type == parse.FRAME.FFUNC then
+      name_lua = vmdef.ffnames[fr.ffid]
+    else
+      name_lua = symtab.demangle(symbols, {
+        addr = fr.addr,
+        line = fr.line
+      })
+      if lua.trace.id ~= nil and lua.trace.addr == fr.addr and
+          lua.trace.line == fr.line then
+        name_lua = symtab.demangle(symbols, {
+          addr = fr.addr,
+          traceno = lua.trace.id
+        })
+      end
+    end
+
+    table.insert(chain, { name = name_lua })
+  end
+end
+
+-- merge lua and host callchains into one callchain representing
+-- transfer of control
+local function merge(event, symbols, sep_vmst)
+  local cc = {}
+  local lua_inserted = false
+
+  for _,h_fr in pairs(event.host.callchain) do
+    local name_host = symtab.demangle(symbols, { addr = h_fr.addr })
+
+    -- We assume that usually the transfer of control
+    -- looks like:
+    --    HOST -> LUA -> HOST
+    -- so for now, lua callchain starts from lua_pcall() call
+    if name_host == 'lua_pcall' then
+      insert_lua_callchain(cc, event.lua, symbols)
+      lua_inserted = true
+    end
+
+    table.insert(cc, { name = name_host })
+  end
+
+  if lua_inserted == false then
+    insert_lua_callchain(cc, event.lua, symbols)
+  end
+
+  if sep_vmst == true then
+    table.insert(cc, { name = VMST_NAMES[event.lua.vmstate] })
+  end
+
+  return cc
+end
+
+-- Collapse all the events into call tree
+function M.collapse(events, symbols, sep_vmst)
+  local root = new_node('root', false)
+
+  for _,ev in pairs(events) do
+    local callchain = merge(ev, symbols, sep_vmst)
+    local curr_node = root
+    for i=#callchain,1,-1 do
+      curr_node = insert(callchain[i].name, curr_node, false)
+    end
+    insert('', curr_node, true)
+  end
+
+  return root
+end
+
+return M
diff --git a/tools/sysprof/parse.lua b/tools/sysprof/parse.lua
new file mode 100755
index 00000000..766b0c99
--- /dev/null
+++ b/tools/sysprof/parse.lua
@@ -0,0 +1,188 @@
+-- Parser of LuaJIT's sysprof binary stream.
+-- The format spec can be found in <src/lj_sysprof.h>.
+
+local string_format = string.format
+
+local LJP_MAGIC = "ljp"
+local LJP_CURRENT_VERSION = 1
+
+local M = {}
+
+M.VMST = {
+  INTERP = 0,
+  LFUNC  = 1,
+  FFUNC  = 2,
+  CFUNC  = 3,
+  GC     = 4,
+  EXIT   = 5,
+  RECORD = 6,
+  OPT    = 7,
+  ASM    = 8,
+  TRACE  = 9,
+}
+
+
+M.FRAME = {
+  LFUNC  = 1,
+  CFUNC  = 2,
+  FFUNC  = 3,
+  BOTTOM = 0x80
+}
+
+local STREAM_END = 0x80
+
+local function new_event()
+  return {
+    lua = {
+      vmstate = 0,
+      callchain = {},
+      trace = {
+        id = nil,
+        addr = 0,
+        line = 0
+      }
+    },
+    host = {
+      callchain = {}
+    }
+  }
+end
+
+local function parse_lfunc(reader, event)
+  local addr = reader:read_uleb128()
+  local line = reader:read_uleb128()
+  table.insert(event.lua.callchain, {
+    type = M.FRAME.LFUNC,
+    addr = addr,
+    line = line
+  })
+end
+
+local function parse_ffunc(reader, event)
+  local ffid = reader:read_uleb128()
+  table.insert(event.lua.callchain, {
+    type = M.FRAME.FFUNC,
+    ffid = ffid,
+  })
+end
+
+local function parse_cfunc(reader, event)
+  local addr = reader:read_uleb128()
+  table.insert(event.lua.callchain, {
+    type = M.FRAME.CFUNC,
+    addr = addr
+  })
+end
+
+local frame_parsers = {
+  [M.FRAME.LFUNC] = parse_lfunc,
+  [M.FRAME.FFUNC] = parse_ffunc,
+  [M.FRAME.CFUNC] = parse_cfunc
+}
+
+local function parse_lua_callchain(reader, event)
+  while true do
+    local frame_header = reader:read_octet()
+    if frame_header == M.FRAME.BOTTOM then
+      break
+    end
+    frame_parsers[frame_header](reader, event)
+  end
+end
+
+--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--
+
+local function parse_host_callchain(reader, event)
+  local addr = reader:read_uleb128()
+
+  while addr ~= 0 do
+    table.insert(event.host.callchain, {
+      addr = addr
+    })
+    addr = reader:read_uleb128()
+  end
+end
+
+--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--
+
+local function parse_trace_callchain(reader, event)
+  event.lua.trace.id   = reader:read_uleb128()
+  event.lua.trace.addr = reader:read_uleb128()
+  event.lua.trace.line = reader:read_uleb128()
+end
+
+--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~--
+
+local function parse_host_only(reader, event)
+  parse_host_callchain(reader, event)
+end
+
+local function parse_lua_host(reader, event)
+  parse_lua_callchain(reader, event)
+  parse_host_callchain(reader, event)
+end
+
+local function parse_trace(reader, event)
+  parse_trace_callchain(reader, event)
+  -- parse_lua_callchain(reader, event)
+end
+
+local event_parsers = {
+  [M.VMST.INTERP] = parse_host_only,
+  [M.VMST.LFUNC]  = parse_lua_host,
+  [M.VMST.FFUNC]  = parse_lua_host,
+  [M.VMST.CFUNC]  = parse_lua_host,
+  [M.VMST.GC]     = parse_host_only,
+  [M.VMST.EXIT]   = parse_host_only,
+  [M.VMST.RECORD] = parse_host_only,
+  [M.VMST.OPT]    = parse_host_only,
+  [M.VMST.ASM]    = parse_host_only,
+  [M.VMST.TRACE]  = parse_trace
+}
+
+local function parse_event(reader, events)
+  local event = new_event()
+
+  local vmstate = reader:read_octet()
+  if vmstate == STREAM_END then
+    -- TODO: samples & overruns
+    return false
+  end
+
+  assert(0 <= vmstate and vmstate <= 9, "Vmstate "..vmstate.." is not valid")
+  event.lua.vmstate = vmstate
+
+  event_parsers[vmstate](reader, event)
+
+  table.insert(events, event)
+  return true
+end
+
+function M.parse(reader)
+  local events = {}
+
+  local magic = reader:read_octets(3)
+  local version = reader:read_octets(1)
+  -- Dummy-consume reserved bytes.
+  local _ = reader:read_octets(3)
+
+  if magic ~= LJP_MAGIC then
+    error("Bad LJP format prologue: "..magic)
+  end
+
+  if string.byte(version) ~= LJP_CURRENT_VERSION then
+    error(string_format(
+      "LJP format version mismatch: the tool expects %d, but your data is %d",
+      LJP_CURRENT_VERSION,
+      string.byte(version)
+    ))
+  end
+
+  while parse_event(reader, events) do
+    -- Empty body.
+  end
+
+  return events
+end
+
+return M
diff --git a/tools/utils/symtab.lua b/tools/utils/symtab.lua
index c7fcf77c..cf27d70d 100644
--- a/tools/utils/symtab.lua
+++ b/tools/utils/symtab.lua
@@ -121,7 +121,7 @@ local function demangle_trace(symtab, loc)
 end
 
 function M.demangle(symtab, loc)
-  if loc.traceno ~= 0 then
+  if loc.traceno ~= 0 and loc.traceno ~= nil then
     return demangle_trace(symtab, loc)
   end
 
-- 
2.35.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit v3 1/7] vm: save topframe info into global_State
  2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 1/7] vm: save topframe info into global_State Maxim Kokryashkin via Tarantool-patches
@ 2022-04-07  9:47   ` Sergey Kaplun via Tarantool-patches
  0 siblings, 0 replies; 9+ messages in thread
From: Sergey Kaplun via Tarantool-patches @ 2022-04-07  9:47 UTC (permalink / raw)
  To: Maxim Kokryashkin; +Cc: tarantool-patches

Hi, Maxim!

Thanks for the patch!

Please consider my review comments below.

However, they are almost the same as for the previous (v1) version, since
there is no feedback nor related changes.

On 06.04.22, Maxim Kokryashkin wrote:
> From: Mikhail Shishatskiy <m.shishatskiy@tarantool.org>
> 
> Since commit 111d377d524e54e02187148a1832683291d620b2
> ('vm: introduce VM states for Lua and fast functions')
> the VM has LFUNC and FFUNC states. The upcoming sampling
> profiler uses these vmstates to determine if the guest
> stack is valid or not. So, we need to provide a There is an inconsistent behavior
> of the VM when the Lua stack is not valid, but the state
> is set to LFUNC. This patch is just a gross hack with which
> the profiler works fine.

About what hack are you talking about?

>                          The problem is to be investigated
> more deeply :(

Typo: s/ :(/./.

Minor: I suggest to drop this line.

Minor: lines of commit message look un-filled well (except the one line
that filled to the brim :).

> ---
>  src/lj_obj.h    | 12 ++++++++++++
>  src/vm_x64.dasc | 52 +++++++++++++++++++++++++++++++++++++++----------
>  src/vm_x86.dasc | 52 +++++++++++++++++++++++++++++++++++++++----------
>  3 files changed, 96 insertions(+), 20 deletions(-)
> 
> diff --git a/src/lj_obj.h b/src/lj_obj.h
> index d26e60be..b76c3155 100644
> --- a/src/lj_obj.h
> +++ b/src/lj_obj.h
> @@ -514,6 +514,17 @@ typedef struct GCtab {
>  #define setfreetop(t, n, v)	(setmref((n)->freetop, (v)))
>  #endif
>  
> +/* -- Misc objects -------------------------------------------------------- */

Minor: I suggest not Misc, but Profiler.

> +
> +struct lj_sysprof_topframe {
> +  uint8_t ffid;          /* FFUNC: fast function id. */

Why this field can't be a part of union?
Due to set vmstate we always know the necessary union's "subtype".

> +  union {
> +    uint64_t raw;        /* Raw value for context save/restore. */
> +    TValue *interp_base; /* LFUNC: Base of the executed coroutine. */
> +    lua_CFunction cf;    /* CFUNC: Address of the C function. */


Nit: please use tabs instead spaces for comments alignment, like it is
done for other structures in this header.

> +  } guesttop;
> +};
> +
>  /* -- State objects ------------------------------------------------------- */
>  
>  /* VM states. */
> @@ -674,6 +685,7 @@ typedef struct global_State {
>    MRef jit_base;	/* Current JIT code L->base or NULL. */
>    MRef ctype_state;	/* Pointer to C type state. */
>    GCRef gcroot[GCROOT_MAX];  /* GC roots. */
> +  struct lj_sysprof_topframe top_frame;  /* Top frame for sysprof */

Nit: I suppose that this structure should be introduced only if sysprof
is enabled. OTOH, I see no reason to hide this structure, so I suggest
to drop it as is for now.

Side note: Also, I'm not sure that this field has addressable offset for
ARM architecture (for DynASM).

Typo: s/for sysprof/for sysprof./

>  } global_State;
>  
>  #define mainthread(g)	(&gcref(g->mainthref)->th)
> diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
> index 974047d3..c4beb5e7 100644
> --- a/src/vm_x64.dasc
> +++ b/src/vm_x64.dasc
> @@ -345,6 +345,35 @@
>  |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
>  |.endmacro
>  |
> +|// Stash interpreter's internal base and enter LFUNC VM state.
> +|// PROFILER: Each time profiler sees LFUNC state, it will inspect [BASE-1]

I suppose, that this is not valid for GC64. LuaJIT uses 2-slot frame
info here. The func value is in BASE - 2 slot (see <src/lj_frame.h> for
details). So this part should be adjusted.

| (gdb) f 0
| #0  lj_cf_print (L=0x7ffff7c83378) at src/lib_base.c:486
| 486       ptrdiff_t i, nargs = L->top - L->base;
|
| (gdb) lj-arch
| LJ_64: True, LJ_GC64: True, LJ_DUALNUM: False
| (gdb) p gcval(L->base - 2)->fn.l.ffid
| $11 = 29 '\035' # print
| (gdb) p gcval(L->base - 1)->fn.l.ffid
| $12 = 200 '\310'

Minor: please use well-known special comments instead PROFILER.
XXX [1] looks good here, IMO.
Here and below.
| Use XXX in a comment to flag something that is bogus but works.

> +|// expecting to see a valid framelink there. So enter this state only when
> +|// BASE is stable and slots are not moved on the stack.
> +|.macro set_vmstate_lfunc
> +|  set_vmstate INTERP // Guard for non-atomic VM context restoration

Nit: missed dot at he end of the sentence.
Here and below.

> +|  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], BASE
> +|  set_vmstate LFUNC
> +|.endmacro
> +|
> +|// Stash ID of the fast function about to be executed and enter FFUNC VM state.
> +|// PROFILER: Each time profiler sees FFUNC state, it will write ffid
> +|// to the profile stream.
> +|.macro set_vmstate_ffunc
> +|  set_vmstate INTERP // Guard for non-atomic VM context restoration
> +|  mov XCHGd, dword [BASE-8]

XCHGd register is not defined in <vm_x64.dasm>.

| /home/burii/reviews/luajit/sysprof/src/vm_x64.dasc:501: error: bad operand mode in `mov i?,i?':
|  |  mov XCHGd, L:RBa->base
| ...

Also, BASE stands for 64-bit register (*).
| ...
| /home/burii/reviews/luajit/sysprof/src/vm_x64.dasc:467: error: mixed operand size in `mov xd,rq':
|   |  set_vmstate_cfunc
|   |    mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], BASE       [MACRO set_vmstate_cfunc (0)]

> +|  mov dword [DISPATCH+DISPATCH_GL(top_frame.ffid)], XCHGd

Don't get it. Why is it `dword` instead of `byte`?

> +|  set_vmstate FFUNC
> +|.endmacro
> +|
> +|// Stash address of the C function about to be executed and enter CFUNC VM state.

Nit: Line width is more than 80 symbols.

> +|// PROFILER: Each time profiler sees CFUNC state, it will write this address
> +|// to the profile stream.
> +|.macro set_vmstate_cfunc
> +|  set_vmstate INTERP // Guard for non-atomic VM context restoration
> +|  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], BASE

Ditto (*).

> +|  set_vmstate CFUNC
> +|.endmacro
> +|
>  |// Uses TMPRd (r10d).
>  |.macro save_vmstate
>  |.if not WIN
> @@ -435,7 +464,7 @@ static void build_subroutines(BuildCtx *ctx)
>    |  jnz ->vm_returnp
>    |
>    |  // Return to C.
> -  |  set_vmstate CFUNC
> +  |  set_vmstate_cfunc
>    |  and PC, -8
>    |  sub PC, BASE
>    |  neg PC				// Previous base = BASE - delta.
> @@ -467,6 +496,9 @@ static void build_subroutines(BuildCtx *ctx)
>    |  xor eax, eax			// Ok return status for vm_pcall.
>    |
>    |->vm_leave_unw:
> +  |  set_vmstate INTERP // Guard for non-atomic VM context restoration
> +  |  mov XCHGd, L:RBa->base

RBa register is not defined in <vm_x64.dasm>.

> +  |  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], XCHGd
>    |  // DISPATCH required to set properly.
>    |  restore_vmstate			// Caveat: uses TMPRd (r10d).
>    |  restoreregs
> @@ -725,7 +757,7 @@ static void build_subroutines(BuildCtx *ctx)

<snipped>

> diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
> index ab8e6f27..222754fe 100644
> --- a/src/vm_x86.dasc
> +++ b/src/vm_x86.dasc
> @@ -443,6 +443,35 @@
>  |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
>  |.endmacro
>  |
> +|// Stash interpreter's internal base and enter LFUNC VM state.
> +|// PROFILER: Each time profiler sees LFUNC state, it will inspect [BASE-1]
> +|// expecting to see a valid framelink there. So enter this state only when
> +|// BASE is stable and slots are not moved on the stack.
> +|.macro set_vmstate_lfunc
> +|  set_vmstate INTERP // Guard for non-atomic VM context restoration
> +|  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], BASE
> +|  set_vmstate LFUNC
> +|.endmacro
> +|
> +|// Stash ID of the fast function about to be executed and enter FFUNC VM state.
> +|// PROFILER: Each time profiler sees FFUNC state, it will write ffid
> +|// to the profile stream.
> +|.macro set_vmstate_ffunc
> +|  set_vmstate INTERP // Guard for non-atomic VM context restoration
> +|  mov XCHGd, dword [BASE-8]

This register is defined only for x64 architecture.
The error is occured, when build with the following command:

| $ make CC="gcc -m32" -f Makefile.original -j
| ...
| DYNASM    host/buildvm_arch.h
| vm_x86.dasc:632: error: bad operand mode in `mov i?,x?':
|   |  mov XCHGd, L:RBa->base
| vm_x86.dasc:1544: error: bad operand mode in `mov i?,xd':
|   |.ffunc_1 assert
|   |    mov XCHGd, dword [BASE-8]        [MACRO set_vmstate_ffunc (0)]
| vm_x86.dasc:1572: error: bad operand mode in `mov i?,xd':
|   |.ffunc_1 type
|   |    mov XCHGd, dword [BASE-8]        [MACRO set_vmstate_ffunc (0)]
| vm_x86.dasc:1599: error: bad operand mode in `mov i?,xd':
|   |.ffunc_1 getmetatable
|   |    mov XCHGd, dword [BASE-8]        [MACRO set_vmstate_ffunc (0)]

Side note: Unfortunately it's impossible for now forcify x32 build via
cmake. It requires to proxy CMAKE_C_FLAGS to macro in LuaJITUtils, IINM.

> +|  mov dword [DISPATCH+DISPATCH_GL(top_frame.ffid)], XCHGd

Please clarify the following things:

1) IINM, we save not ffid but the GCref for this function (since we not
   load ffid field from function).
2) Why do we store 64 bytes instead 8? Yes, the struct is not packed and
   there is a hole in it, so it works correct. But it is a little bit
   confusing. Also, why can't we write BASE here too and inspect ffid
   from this base later?

> +|  set_vmstate FFUNC
> +|.endmacro
> +|
> +|// Stash address of the C function about to be executed and enter CFUNC VM state.
> +|// PROFILER: Each time profiler sees CFUNC state, it will write this address
> +|// to the profile stream.
> +|.macro set_vmstate_cfunc
> +|  set_vmstate INTERP // Guard for non-atomic VM context restoration
> +|  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], BASE
> +|  set_vmstate CFUNC
> +|.endmacro
> +|
>  |// Uses spilled ecx on x86 or XCHGd (r11d) on x64.
>  |.macro save_vmstate
>  |.if not WIN
> @@ -560,7 +589,7 @@ static void build_subroutines(BuildCtx *ctx)

<snipped>

> @@ -599,6 +628,9 @@ static void build_subroutines(BuildCtx *ctx)
>    |  xor eax, eax			// Ok return status for vm_pcall.
>    |
>    |->vm_leave_unw:
> +  |  set_vmstate INTERP // Guard for non-atomic VM context restoration
> +  |  mov XCHGd, L:RBa->base

AFAICS, there is no garantee, that L:RBa is set up to `lua_State *`.

For example, during `vm_unwind_c_eh` RB register is set to
`global_State *`.

|->vm_unwind_c_eh:			// Landing pad for external unwinder.
|  mov L:DISPATCH, SAVE_L
|  mov GL:RB, L:DISPATCH->glref
|  mov dword GL:RB->cur_L, L:DISPATCH
|  mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC
|  mov DISPATCH, L:DISPATCH->glref	// Setup pointer to dispatch table.
|  add DISPATCH, GG_G2DISP
|  jmp ->vm_leave_unw

> +  |  mov dword [DISPATCH+DISPATCH_GL(top_frame.guesttop)], XCHGd
>    |  // DISPATCH required to set properly.
>    |  restore_vmstate			// Caveat: on x64 uses XCHGd (r11d).
>    |  restoreregs
> @@ -934,7 +966,7 @@ static void build_subroutines(BuildCtx *ctx)

<snipped>

> -- 
> 2.35.1
> 

[1]: https://www.oracle.com/java/technologies/javase/codeconventions-programmingpractices.html

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-04-07  9:49 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-06 12:49 [Tarantool-patches] [PATCH luajit v3 0/7] introuduce platform profiler Maxim Kokryashkin via Tarantool-patches
2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 1/7] vm: save topframe info into global_State Maxim Kokryashkin via Tarantool-patches
2022-04-07  9:47   ` Sergey Kaplun via Tarantool-patches
2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 2/7] core: separate the profiling timer from lj_profile Maxim Kokryashkin via Tarantool-patches
2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 3/7] memprof: move symtab to a separate module Maxim Kokryashkin via Tarantool-patches
2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 4/7] core: introduce lua and platform profiler Maxim Kokryashkin via Tarantool-patches
2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 5/7] memprof: add profile common section Maxim Kokryashkin via Tarantool-patches
2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 6/7] sysprof: introduce Lua API Maxim Kokryashkin via Tarantool-patches
2022-04-06 12:49 ` [Tarantool-patches] [PATCH luajit v3 7/7] tools: introduce parsers for sysprof Maxim Kokryashkin via Tarantool-patches

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox