[Tarantool-patches] [PATCH luajit] GC64: enable sysprof support
Maxim Kokryashkin
max.kokryashkin at gmail.com
Fri Nov 11 13:19:48 MSK 2022
This patch reverts changes introduced via commit
71e7020637bb15024ceb7e93ce66b61870567339 ('GC64: disable
sysprof support'), fixing the incorrect behavior in vm
by adding the `LJ_HASSYSPROF` flag to the DynASM flags.
Resolves tarantool/tarantool#7919
Related to tarantool/tarantool#781
---
Issue: https://github.com/tarantool/tarantool/issues/7919
Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-7919-sysprof-gc64
PR: https://github.com/tarantool/tarantool/pull/7922
cmake/SetDynASMFlags.cmake | 5 +++
src/lj_arch.h | 2 +-
src/lj_obj.h | 2 +
src/vm_x64.dasc | 42 ++++++++++++++-----
src/vm_x86.dasc | 8 ++++
.../misclib-sysprof-capi.test.lua | 7 ++--
.../misclib-sysprof-lapi.test.lua | 5 +--
7 files changed, 52 insertions(+), 19 deletions(-)
diff --git a/cmake/SetDynASMFlags.cmake b/cmake/SetDynASMFlags.cmake
index 8e10758f..142d7e64 100644
--- a/cmake/SetDynASMFlags.cmake
+++ b/cmake/SetDynASMFlags.cmake
@@ -113,6 +113,11 @@ if(NOT FOUND EQUAL -1)
AppendFlags(HOST_C_FLAGS -DLUAJIT_NO_UNWIND)
endif()
+string(FIND "${TESTARCH}" "LJ_HASSYSPROF 1" FOUND)
+if(NOT FOUND EQUAL -1)
+ list(APPEND DYNASM_FLAGS -D LJ_HASSYSPROF)
+endif()
+
string(REGEX MATCH "LJ_ARCH_VERSION ([0-9]+)" LUAJIT_ARCH_VERSION ${TESTARCH})
list(APPEND DYNASM_FLAGS -D VER=${CMAKE_MATCH_1})
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 40129d9e..cd02fee1 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -595,7 +595,7 @@
#endif
/* Disable or enable the platform and Lua profiler. */
-#if defined(LUAJIT_DISABLE_SYSPROF) || defined(LJ_ARCH_NOSYSPROF) || !LJ_TARGET_LINUX || LJ_GC64
+#if defined(LUAJIT_DISABLE_SYSPROF) || defined(LJ_ARCH_NOSYSPROF) || !LJ_TARGET_LINUX
#define LJ_HASSYSPROF 0
#else
#define LJ_HASSYSPROF 1
diff --git a/src/lj_obj.h b/src/lj_obj.h
index d1451c3a..45507e0d 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -674,7 +674,9 @@ typedef struct global_State {
MRef jit_base; /* Current JIT code L->base or NULL. */
MRef ctype_state; /* Pointer to C type state. */
GCRef gcroot[GCROOT_MAX]; /* GC roots. */
+#ifdef LJ_HASSYSPROF
TValue *top_frame; /* Top frame for sysprof. */
+#endif
} global_State;
#define mainthread(g) (&gcref(g->mainthref)->th)
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 974047d3..59f117ba 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -345,6 +345,22 @@
| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
|.endmacro
|
+|// Stash interpreter's internal base and set current VM state.
+|// XXX: Each time profiler sees LFUNC, CFUNC or FFUNC state, it tries
+|// to dump Lua calling stack, so it needs a stack base pointer.
+|// If the sampling signal arriving during the execution of the VM code,
+|// base pointer stored in the current lua_State can be irrelevant, as
+|// it syncs with the BASE register only when the control is passed to
+|// user code. So we need to sync the BASE on each vmstate change to
+|// keep it consistent.
+|.macro set_vmstate_sync_base, st
+|.if LJ_HASSYSPROF
+| set_vmstate INTERP // Guard for non-atomic VM context restoration
+| mov qword [DISPATCH+DISPATCH_GL(top_frame)], BASE
+|.endif
+| set_vmstate st
+|.endmacro
+|
|// Uses TMPRd (r10d).
|.macro save_vmstate
|.if not WIN
@@ -356,6 +372,12 @@
|// Uses r10d.
|.macro restore_vmstate
|.if not WIN
+|.if LJ_HASSYSPROF
+| set_vmstate INTERP
+| mov TMPR, SAVE_L
+| mov TMPR, L:TMPR->base
+| mov qword [DISPATCH+DISPATCH_GL(top_frame)], TMPR
+|.endif
| mov TMPRd, SAVE_VMSTATE
| mov dword [DISPATCH+DISPATCH_GL(vmstate)], TMPRd
|.endif // WIN
@@ -435,7 +457,7 @@ static void build_subroutines(BuildCtx *ctx)
| jnz ->vm_returnp
|
| // Return to C.
- | set_vmstate CFUNC
+ | set_vmstate_sync_base CFUNC
| and PC, -8
| sub PC, BASE
| neg PC // Previous base = BASE - delta.
@@ -725,7 +747,7 @@ static void build_subroutines(BuildCtx *ctx)
| cleartp LFUNC:KBASE
| mov KBASE, LFUNC:KBASE->pc
| mov KBASE, [KBASE+PC2PROTO(k)]
- | set_vmstate LFUNC // LFUNC after KBASE restoration.
+ | set_vmstate_sync_base LFUNC // LFUNC after KBASE restoration.
| // BASE = base, RC = result, RB = meta base
| jmp RA // Jump to continuation.
|
@@ -1166,7 +1188,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro .ffunc, name
|->ff_ .. name:
- | set_vmstate FFUNC
+ | set_vmstate_sync_base FFUNC
|.endmacro
|
|.macro .ffunc_1, name
@@ -1748,7 +1770,7 @@ static void build_subroutines(BuildCtx *ctx)
| movzx RAd, PC_RA
| neg RA
| lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
- | set_vmstate LFUNC // LFUNC state after BASE restoration.
+ | set_vmstate_sync_base LFUNC // LFUNC state after BASE restoration.
| ins_next
|
|6: // Fill up results with nil.
@@ -2513,7 +2535,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov KBASE, [KBASE+PC2PROTO(k)]
| mov L:RB->base, BASE
| mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
- | set_vmstate LFUNC // LFUNC after BASE & KBASE restoration.
+ | set_vmstate_sync_base LFUNC // LFUNC after BASE & KBASE restoration.
| // Modified copy of ins_next which handles function header dispatch, too.
| mov RCd, [PC]
| movzx RAd, RCH
@@ -2730,7 +2752,7 @@ static void build_subroutines(BuildCtx *ctx)
| call extern lj_ccallback_enter // (CTState *cts, void *cf)
| // lua_State * returned in eax (RD).
| mov BASE, L:RD->base
- | set_vmstate LFUNC // LFUNC after BASE restoration.
+ | set_vmstate_sync_base LFUNC // LFUNC after BASE restoration.
| mov RD, L:RD->top
| sub RD, BASE
| mov LFUNC:RB, [BASE-16]
@@ -4299,7 +4321,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov KBASE, LFUNC:KBASE->pc
| mov KBASE, [KBASE+PC2PROTO(k)]
| // LFUNC after the old BASE & KBASE is restored.
- | set_vmstate LFUNC
+ | set_vmstate_sync_base LFUNC
| ins_next
|
|6: // Fill up results with nil.
@@ -4591,7 +4613,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_AD // BASE = new base, RA = framesize, RD = nargs+1
| mov KBASE, [PC-4+PC2PROTO(k)]
| mov L:RB, SAVE_L
- | set_vmstate LFUNC // LFUNC after KBASE restoration.
+ | set_vmstate_sync_base LFUNC // LFUNC after KBASE restoration.
| lea RA, [BASE+RA*8] // Top of frame.
| cmp RA, L:RB->maxstack
| ja ->vm_growstack_f
@@ -4629,7 +4651,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov [RD-8], RB // Store delta + FRAME_VARG.
| mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
| mov L:RB, SAVE_L
- | set_vmstate LFUNC // LFUNC after KBASE restoration.
+ | set_vmstate_sync_base LFUNC // LFUNC after KBASE restoration.
| lea RA, [RD+RA*8]
| cmp RA, L:RB->maxstack
| ja ->vm_growstack_v // Need to grow stack.
@@ -4685,7 +4707,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov CARG1, L:RB // Caveat: CARG1 may be RA.
}
| ja ->vm_growstack_c // Need to grow stack.
- | set_vmstate CFUNC // CFUNC before entering C function.
+ | set_vmstate_sync_base CFUNC // CFUNC before entering C function.
if (op == BC_FUNCC) {
| call KBASE // (lua_State *L)
} else {
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 22f69edf..f7ffe5d2 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -452,8 +452,10 @@
|// user code. So we need to sync the BASE on each vmstate change to
|// keep it consistent.
|.macro set_vmstate_sync_base, st
+|.if LJ_HASSYSPROF
| set_vmstate INTERP // Guard for non-atomic VM context restoration
| mov dword [DISPATCH+DISPATCH_GL(top_frame)], BASE
+|.endif
| set_vmstate st
|.endmacro
|
@@ -475,19 +477,25 @@
|// Uses spilled ecx on x86 or XCHGd (r11d) on x64.
|.macro restore_vmstate
|.if not WIN
+|.if LJ_HASSYSPROF
| set_vmstate INTERP
+|.endif
|.if not X64
| mov SPILLECX, ecx
+|.if LJ_HASSYSPROF
| mov ecx, SAVE_L
| mov ecx, L:ecx->base
| mov dword [DISPATCH+DISPATCH_GL(top_frame)], ecx
+|.endif
| mov ecx, SAVE_VMSTATE
| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ecx
| mov ecx, SPILLECX
|.else // X64
+|.if LJ_HASSYSPROF
| mov XCHGd, SAVE_L
| mov XCHGd, L:XCHGd->base
| mov dword [DISPATCH+DISPATCH_GL(top_frame)], XCHGd
+|.endif
| mov XCHGd, SAVE_VMSTATE
| mov dword [DISPATCH+DISPATCH_GL(vmstate)], XCHGd
|.endif // X64
diff --git a/test/tarantool-tests/misclib-sysprof-capi.test.lua b/test/tarantool-tests/misclib-sysprof-capi.test.lua
index afb99cf2..dad0fe4a 100644
--- a/test/tarantool-tests/misclib-sysprof-capi.test.lua
+++ b/test/tarantool-tests/misclib-sysprof-capi.test.lua
@@ -1,8 +1,7 @@
-- Sysprof is implemented for x86 and x64 architectures only.
-local ffi = require("ffi")
-require("utils").skipcond(
- jit.arch ~= "x86" and jit.arch ~= "x64" or jit.os ~= "Linux"
- or ffi.abi("gc64"),
+local utils = require("utils")
+utils.skipcond(
+ jit.arch ~= "x86" and jit.arch ~= "x64" or jit.os ~= "Linux",
jit.arch.." architecture or "..jit.os..
" OS is NIY for sysprof"
)
diff --git a/test/tarantool-tests/misclib-sysprof-lapi.test.lua b/test/tarantool-tests/misclib-sysprof-lapi.test.lua
index dbff41b0..4bf10e8d 100644
--- a/test/tarantool-tests/misclib-sysprof-lapi.test.lua
+++ b/test/tarantool-tests/misclib-sysprof-lapi.test.lua
@@ -1,10 +1,7 @@
-- Sysprof is implemented for x86 and x64 architectures only.
-local ffi = require("ffi")
local utils = require("utils")
-
utils.skipcond(
- jit.arch ~= "x86" and jit.arch ~= "x64" or jit.os ~= "Linux"
- or ffi.abi("gc64"),
+ jit.arch ~= "x86" and jit.arch ~= "x64" or jit.os ~= "Linux",
jit.arch.." architecture or "..jit.os..
" OS is NIY for sysprof"
)
--
2.38.0
More information about the Tarantool-patches
mailing list