[Tarantool-patches] [PATCH luajit] GC64: enable sysprof support

sergos sergos at tarantool.org
Wed Nov 30 19:04:26 MSK 2022


Hi!

Thanks for the patch!

LGTM.

Sergos

> On 11 Nov 2022, at 13:19, Maxim Kokryashkin <max.kokryashkin at gmail.com> wrote:
> 
> This patch reverts changes introduced via commit
> 71e7020637bb15024ceb7e93ce66b61870567339 ('GC64: disable
> sysprof support'), fixing the incorrect behavior in vm
> by adding the `LJ_HASSYSPROF` flag to the DynASM flags.
> 
> Resolves tarantool/tarantool#7919
> Related to tarantool/tarantool#781
> ---
> Issue: https://github.com/tarantool/tarantool/issues/7919
> Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-7919-sysprof-gc64
> PR: https://github.com/tarantool/tarantool/pull/7922
> 
> cmake/SetDynASMFlags.cmake                    |  5 +++
> src/lj_arch.h                                 |  2 +-
> src/lj_obj.h                                  |  2 +
> src/vm_x64.dasc                               | 42 ++++++++++++++-----
> src/vm_x86.dasc                               |  8 ++++
> .../misclib-sysprof-capi.test.lua             |  7 ++--
> .../misclib-sysprof-lapi.test.lua             |  5 +--
> 7 files changed, 52 insertions(+), 19 deletions(-)
> 
> diff --git a/cmake/SetDynASMFlags.cmake b/cmake/SetDynASMFlags.cmake
> index 8e10758f..142d7e64 100644
> --- a/cmake/SetDynASMFlags.cmake
> +++ b/cmake/SetDynASMFlags.cmake
> @@ -113,6 +113,11 @@ if(NOT FOUND EQUAL -1)
>   AppendFlags(HOST_C_FLAGS -DLUAJIT_NO_UNWIND)
> endif()
> 
> +string(FIND "${TESTARCH}" "LJ_HASSYSPROF 1" FOUND)
> +if(NOT FOUND EQUAL -1)
> +  list(APPEND DYNASM_FLAGS -D LJ_HASSYSPROF)
> +endif()
> +
> string(REGEX MATCH "LJ_ARCH_VERSION ([0-9]+)" LUAJIT_ARCH_VERSION ${TESTARCH})
> list(APPEND DYNASM_FLAGS -D VER=${CMAKE_MATCH_1})
> 
> diff --git a/src/lj_arch.h b/src/lj_arch.h
> index 40129d9e..cd02fee1 100644
> --- a/src/lj_arch.h
> +++ b/src/lj_arch.h
> @@ -595,7 +595,7 @@
> #endif
> 
> /* Disable or enable the platform and Lua profiler. */
> -#if defined(LUAJIT_DISABLE_SYSPROF) || defined(LJ_ARCH_NOSYSPROF) || !LJ_TARGET_LINUX || LJ_GC64
> +#if defined(LUAJIT_DISABLE_SYSPROF) || defined(LJ_ARCH_NOSYSPROF) || !LJ_TARGET_LINUX
> #define LJ_HASSYSPROF		0
> #else
> #define LJ_HASSYSPROF		1
> diff --git a/src/lj_obj.h b/src/lj_obj.h
> index d1451c3a..45507e0d 100644
> --- a/src/lj_obj.h
> +++ b/src/lj_obj.h
> @@ -674,7 +674,9 @@ typedef struct global_State {
>   MRef jit_base;	/* Current JIT code L->base or NULL. */
>   MRef ctype_state;	/* Pointer to C type state. */
>   GCRef gcroot[GCROOT_MAX];  /* GC roots. */
> +#ifdef LJ_HASSYSPROF
>   TValue *top_frame;	/* Top frame for sysprof. */
> +#endif
> } global_State;
> 
> #define mainthread(g)	(&gcref(g->mainthref)->th)
> diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
> index 974047d3..59f117ba 100644
> --- a/src/vm_x64.dasc
> +++ b/src/vm_x64.dasc
> @@ -345,6 +345,22 @@
> |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
> |.endmacro
> |
> +|// Stash interpreter's internal base and set current VM state.
> +|// XXX: Each time profiler sees LFUNC, CFUNC or FFUNC state, it tries
> +|// to dump Lua calling stack, so it needs a stack base pointer.
> +|// If the sampling signal arriving during the execution of the VM code,
> +|// base pointer stored in the current lua_State can be irrelevant, as
> +|// it syncs with the BASE register only when the control is passed to
> +|// user code. So we need to sync the BASE on each vmstate change to
> +|// keep it consistent.
> +|.macro set_vmstate_sync_base, st
> +|.if LJ_HASSYSPROF
> +|  set_vmstate INTERP  // Guard for non-atomic VM context restoration
> +|  mov qword [DISPATCH+DISPATCH_GL(top_frame)], BASE
> +|.endif
> +|  set_vmstate st
> +|.endmacro
> +|
> |// Uses TMPRd (r10d).
> |.macro save_vmstate
> |.if not WIN
> @@ -356,6 +372,12 @@
> |// Uses r10d.
> |.macro restore_vmstate
> |.if not WIN
> +|.if LJ_HASSYSPROF
> +|  set_vmstate INTERP
> +|  mov TMPR, SAVE_L
> +|  mov TMPR, L:TMPR->base
> +|  mov qword [DISPATCH+DISPATCH_GL(top_frame)], TMPR
> +|.endif
> |  mov TMPRd, SAVE_VMSTATE
> |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], TMPRd
> |.endif // WIN
> @@ -435,7 +457,7 @@ static void build_subroutines(BuildCtx *ctx)
>   |  jnz ->vm_returnp
>   |
>   |  // Return to C.
> -  |  set_vmstate CFUNC
> +  |  set_vmstate_sync_base CFUNC
>   |  and PC, -8
>   |  sub PC, BASE
>   |  neg PC				// Previous base = BASE - delta.
> @@ -725,7 +747,7 @@ static void build_subroutines(BuildCtx *ctx)
>   |  cleartp LFUNC:KBASE
>   |  mov KBASE, LFUNC:KBASE->pc
>   |  mov KBASE, [KBASE+PC2PROTO(k)]
> -  |  set_vmstate LFUNC			// LFUNC after KBASE restoration.
> +  |  set_vmstate_sync_base LFUNC	// LFUNC after KBASE restoration.
>   |  // BASE = base, RC = result, RB = meta base
>   |  jmp RA				// Jump to continuation.
>   |
> @@ -1166,7 +1188,7 @@ static void build_subroutines(BuildCtx *ctx)
>   |
>   |.macro .ffunc, name
>   |->ff_ .. name:
> -  |  set_vmstate FFUNC
> +  |  set_vmstate_sync_base FFUNC
>   |.endmacro
>   |
>   |.macro .ffunc_1, name
> @@ -1748,7 +1770,7 @@ static void build_subroutines(BuildCtx *ctx)
>   |  movzx RAd, PC_RA
>   |  neg RA
>   |  lea BASE, [BASE+RA*8-16]		// base = base - (RA+2)*8
> -  |  set_vmstate LFUNC			// LFUNC state after BASE restoration.
> +  |  set_vmstate_sync_base LFUNC	// LFUNC state after BASE restoration.
>   |  ins_next
>   |
>   |6:  // Fill up results with nil.
> @@ -2513,7 +2535,7 @@ static void build_subroutines(BuildCtx *ctx)
>   |  mov KBASE, [KBASE+PC2PROTO(k)]
>   |  mov L:RB->base, BASE
>   |  mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
> -  |  set_vmstate LFUNC			// LFUNC after BASE & KBASE restoration.
> +  |  set_vmstate_sync_base LFUNC	// LFUNC after BASE & KBASE restoration.
>   |  // Modified copy of ins_next which handles function header dispatch, too.
>   |  mov RCd, [PC]
>   |  movzx RAd, RCH
> @@ -2730,7 +2752,7 @@ static void build_subroutines(BuildCtx *ctx)
>   |  call extern lj_ccallback_enter	// (CTState *cts, void *cf)
>   |  // lua_State * returned in eax (RD).
>   |  mov BASE, L:RD->base
> -  |  set_vmstate LFUNC			// LFUNC after BASE restoration.
> +  |  set_vmstate_sync_base LFUNC	// LFUNC after BASE restoration.
>   |  mov RD, L:RD->top
>   |  sub RD, BASE
>   |  mov LFUNC:RB, [BASE-16]
> @@ -4299,7 +4321,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>     |  mov KBASE, LFUNC:KBASE->pc
>     |  mov KBASE, [KBASE+PC2PROTO(k)]
>     |  // LFUNC after the old BASE & KBASE is restored.
> -    |  set_vmstate LFUNC
> +    |  set_vmstate_sync_base LFUNC
>     |  ins_next
>     |
>     |6:  // Fill up results with nil.
> @@ -4591,7 +4613,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>     |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
>     |  mov KBASE, [PC-4+PC2PROTO(k)]
>     |  mov L:RB, SAVE_L
> -    |  set_vmstate LFUNC		// LFUNC after KBASE restoration.
> +    |  set_vmstate_sync_base LFUNC	// LFUNC after KBASE restoration.
>     |  lea RA, [BASE+RA*8]		// Top of frame.
>     |  cmp RA, L:RB->maxstack
>     |  ja ->vm_growstack_f
> @@ -4629,7 +4651,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>     |  mov [RD-8], RB			// Store delta + FRAME_VARG.
>     |  mov [RD-16], LFUNC:KBASE		// Store copy of LFUNC.
>     |  mov L:RB, SAVE_L
> -    |  set_vmstate LFUNC		// LFUNC after KBASE restoration.
> +    |  set_vmstate_sync_base LFUNC	// LFUNC after KBASE restoration.
>     |  lea RA, [RD+RA*8]
>     |  cmp RA, L:RB->maxstack
>     |  ja ->vm_growstack_v		// Need to grow stack.
> @@ -4685,7 +4707,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>       |  mov CARG1, L:RB		// Caveat: CARG1 may be RA.
>     }
>     |  ja ->vm_growstack_c		// Need to grow stack.
> -    |  set_vmstate CFUNC		// CFUNC before entering C function.
> +    |  set_vmstate_sync_base CFUNC	// CFUNC before entering C function.
>     if (op == BC_FUNCC) {
>       |  call KBASE			// (lua_State *L)
>     } else {
> diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
> index 22f69edf..f7ffe5d2 100644
> --- a/src/vm_x86.dasc
> +++ b/src/vm_x86.dasc
> @@ -452,8 +452,10 @@
> |// user code. So we need to sync the BASE on each vmstate change to
> |// keep it consistent.
> |.macro set_vmstate_sync_base, st
> +|.if LJ_HASSYSPROF
> |  set_vmstate INTERP  // Guard for non-atomic VM context restoration
> |  mov dword [DISPATCH+DISPATCH_GL(top_frame)], BASE
> +|.endif
> |  set_vmstate st
> |.endmacro
> |
> @@ -475,19 +477,25 @@
> |// Uses spilled ecx on x86 or XCHGd (r11d) on x64.
> |.macro restore_vmstate
> |.if not WIN
> +|.if LJ_HASSYSPROF
> |  set_vmstate INTERP
> +|.endif
> |.if not X64
> |  mov SPILLECX, ecx
> +|.if LJ_HASSYSPROF
> |  mov ecx, SAVE_L
> |  mov ecx, L:ecx->base
> |  mov dword [DISPATCH+DISPATCH_GL(top_frame)], ecx
> +|.endif
> |  mov ecx, SAVE_VMSTATE
> |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ecx
> |  mov ecx, SPILLECX
> |.else // X64
> +|.if LJ_HASSYSPROF
> |  mov XCHGd, SAVE_L
> |  mov XCHGd, L:XCHGd->base
> |  mov dword [DISPATCH+DISPATCH_GL(top_frame)], XCHGd
> +|.endif
> |  mov XCHGd, SAVE_VMSTATE
> |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], XCHGd
> |.endif // X64
> diff --git a/test/tarantool-tests/misclib-sysprof-capi.test.lua b/test/tarantool-tests/misclib-sysprof-capi.test.lua
> index afb99cf2..dad0fe4a 100644
> --- a/test/tarantool-tests/misclib-sysprof-capi.test.lua
> +++ b/test/tarantool-tests/misclib-sysprof-capi.test.lua
> @@ -1,8 +1,7 @@
> -- Sysprof is implemented for x86 and x64 architectures only.
> -local ffi = require("ffi")
> -require("utils").skipcond(
> -  jit.arch ~= "x86" and jit.arch ~= "x64" or jit.os ~= "Linux"
> -    or ffi.abi("gc64"),
> +local utils = require("utils")
> +utils.skipcond(
> +  jit.arch ~= "x86" and jit.arch ~= "x64" or jit.os ~= "Linux",
>   jit.arch.." architecture or "..jit.os..
>   " OS is NIY for sysprof"
> )
> diff --git a/test/tarantool-tests/misclib-sysprof-lapi.test.lua b/test/tarantool-tests/misclib-sysprof-lapi.test.lua
> index dbff41b0..4bf10e8d 100644
> --- a/test/tarantool-tests/misclib-sysprof-lapi.test.lua
> +++ b/test/tarantool-tests/misclib-sysprof-lapi.test.lua
> @@ -1,10 +1,7 @@
> -- Sysprof is implemented for x86 and x64 architectures only.
> -local ffi = require("ffi")
> local utils = require("utils")
> -
> utils.skipcond(
> -  jit.arch ~= "x86" and jit.arch ~= "x64" or jit.os ~= "Linux"
> -    or ffi.abi("gc64"),
> +  jit.arch ~= "x86" and jit.arch ~= "x64" or jit.os ~= "Linux",
>   jit.arch.." architecture or "..jit.os..
>   " OS is NIY for sysprof"
> )
> -- 
> 2.38.0
> 



More information about the Tarantool-patches mailing list