[Tarantool-patches] [PATCH luajit v2 3/7] vm: introduce VM states for Lua and fast functions
Sergey Ostanevich
sergos at tarantool.org
Sat Dec 26 22:07:19 MSK 2020
Hi!
LGTM.
Sergos
> On 25 Dec 2020, at 18:26, Sergey Kaplun <skaplun at tarantool.org> wrote:
>
> This patch introduces LJ_VMST_LFUNC and LJ_VMST_FFUNC VM states
> separated from LJ_VMST_INERP. New VM states allow to determine the
> context of Lua VM execution for x86 and x64 arches. Also, LJ_VMST_C is
> renamed to LJ_VMST_CFUNC for naming consistence with new VM states.
>
> Also, this patch adjusts stack layout for x86 and x64 arches to save VM
> state for its consistency while stack unwinding when error is raised.
>
> Part of tarantool/tarantool#5442
> ---
>
> Changes in v2:
> - Moved `.if not WIN` macro check inside (save|restore)_vmstate_through
> - Fixed naming: SAVE_UNUSED\d -> UNUSED\d
>
> src/lj_frame.h | 18 +++----
> src/lj_obj.h | 4 +-
> src/lj_profile.c | 5 +-
> src/luajit-gdb.py | 14 ++---
> src/vm_arm.dasc | 6 +--
> src/vm_arm64.dasc | 6 +--
> src/vm_mips.dasc | 6 +--
> src/vm_mips64.dasc | 6 +--
> src/vm_ppc.dasc | 6 +--
> src/vm_x64.dasc | 93 ++++++++++++++++++++++----------
> src/vm_x86.dasc | 131 +++++++++++++++++++++++++++++----------------
> 11 files changed, 188 insertions(+), 107 deletions(-)
>
> diff --git a/src/lj_frame.h b/src/lj_frame.h
> index 19c49a4..2e693f9 100644
> --- a/src/lj_frame.h
> +++ b/src/lj_frame.h
> @@ -127,13 +127,13 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
> #define CFRAME_SIZE (16*4)
> #define CFRAME_SHIFT_MULTRES 0
> #else
> -#define CFRAME_OFS_ERRF (15*4)
> -#define CFRAME_OFS_NRES (14*4)
> -#define CFRAME_OFS_PREV (13*4)
> -#define CFRAME_OFS_L (12*4)
> +#define CFRAME_OFS_ERRF (19*4)
> +#define CFRAME_OFS_NRES (18*4)
> +#define CFRAME_OFS_PREV (17*4)
> +#define CFRAME_OFS_L (16*4)
> #define CFRAME_OFS_PC (6*4)
> #define CFRAME_OFS_MULTRES (5*4)
> -#define CFRAME_SIZE (12*4)
> +#define CFRAME_SIZE (16*4)
> #define CFRAME_SHIFT_MULTRES 0
> #endif
> #elif LJ_TARGET_X64
> @@ -152,11 +152,11 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
> #define CFRAME_OFS_NRES (22*4)
> #define CFRAME_OFS_MULTRES (21*4)
> #endif
> -#define CFRAME_SIZE (10*8)
> +#define CFRAME_SIZE (12*8)
> #define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8)
> #define CFRAME_SHIFT_MULTRES 0
> #else
> -#define CFRAME_OFS_PREV (4*8)
> +#define CFRAME_OFS_PREV (6*8)
> #if LJ_GC64
> #define CFRAME_OFS_PC (3*8)
> #define CFRAME_OFS_L (2*8)
> @@ -171,9 +171,9 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
> #define CFRAME_OFS_MULTRES (1*4)
> #endif
> #if LJ_NO_UNWIND
> -#define CFRAME_SIZE (12*8)
> +#define CFRAME_SIZE (14*8)
> #else
> -#define CFRAME_SIZE (10*8)
> +#define CFRAME_SIZE (12*8)
> #endif
> #define CFRAME_SIZE_JIT (CFRAME_SIZE + 16)
> #define CFRAME_SHIFT_MULTRES 0
> diff --git a/src/lj_obj.h b/src/lj_obj.h
> index 927b347..7fb715e 100644
> --- a/src/lj_obj.h
> +++ b/src/lj_obj.h
> @@ -512,7 +512,9 @@ typedef struct GCtab {
> /* VM states. */
> enum {
> LJ_VMST_INTERP, /* Interpreter. */
> - LJ_VMST_C, /* C function. */
> + LJ_VMST_LFUNC, /* Lua function. */
> + LJ_VMST_FFUNC, /* Fast function. */
> + LJ_VMST_CFUNC, /* C function. */
> LJ_VMST_GC, /* Garbage collector. */
> LJ_VMST_EXIT, /* Trace exit handler. */
> LJ_VMST_RECORD, /* Trace recorder. */
> diff --git a/src/lj_profile.c b/src/lj_profile.c
> index 116998e..637e03c 100644
> --- a/src/lj_profile.c
> +++ b/src/lj_profile.c
> @@ -157,7 +157,10 @@ static void profile_trigger(ProfileState *ps)
> int st = g->vmstate;
> ps->vmstate = st >= 0 ? 'N' :
> st == ~LJ_VMST_INTERP ? 'I' :
> - st == ~LJ_VMST_C ? 'C' :
> + st == ~LJ_VMST_CFUNC ? 'C' :
> + /* Stubs for profiler hooks. */
> + st == ~LJ_VMST_FFUNC ? 'I' :
> + st == ~LJ_VMST_LFUNC ? 'I' :
> st == ~LJ_VMST_GC ? 'G' : 'J';
> g->hookmask = (mask | HOOK_PROFILE);
> lj_dispatch_update(g);
> diff --git a/src/luajit-gdb.py b/src/luajit-gdb.py
> index 652c560..f1fd623 100644
> --- a/src/luajit-gdb.py
> +++ b/src/luajit-gdb.py
> @@ -206,12 +206,14 @@ def J(g):
> def vm_state(g):
> return {
> i2notu32(0): 'INTERP',
> - i2notu32(1): 'C',
> - i2notu32(2): 'GC',
> - i2notu32(3): 'EXIT',
> - i2notu32(4): 'RECORD',
> - i2notu32(5): 'OPT',
> - i2notu32(6): 'ASM',
> + i2notu32(1): 'LFUNC',
> + i2notu32(2): 'FFUNC',
> + i2notu32(3): 'CFUNC',
> + i2notu32(4): 'GC',
> + i2notu32(5): 'EXIT',
> + i2notu32(6): 'RECORD',
> + i2notu32(7): 'OPT',
> + i2notu32(8): 'ASM',
> }.get(int(tou32(g['vmstate'])), 'TRACE')
>
> def gc_state(g):
> diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
> index d4cdaf5..ae2efdf 100644
> --- a/src/vm_arm.dasc
> +++ b/src/vm_arm.dasc
> @@ -287,7 +287,7 @@ static void build_subroutines(BuildCtx *ctx)
> |
> | str RB, L->base
> | ldr KBASE, SAVE_NRES
> - | mv_vmstate CARG4, C
> + | mv_vmstate CARG4, CFUNC
> | sub BASE, BASE, #8
> | subs CARG3, RC, #8
> | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8
> @@ -348,7 +348,7 @@ static void build_subroutines(BuildCtx *ctx)
> | mov CRET1, CARG2
> |->vm_unwind_c_eh: // Landing pad for external unwinder.
> | ldr L, SAVE_L
> - | mv_vmstate CARG4, C
> + | mv_vmstate CARG4, CFUNC
> | ldr GL:CARG3, L->glref
> | str CARG4, GL:CARG3->vmstate
> | str L, GL:CARG3->cur_L
> @@ -4487,7 +4487,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> if (op == BC_FUNCCW) {
> | ldr CARG2, CFUNC:CARG3->f
> }
> - | mv_vmstate CARG3, C
> + | mv_vmstate CARG3, CFUNC
> | mov CARG1, L
> | bhi ->vm_growstack_c // Need to grow stack.
> | st_vmstate CARG3
> diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
> index 3eaf376..f783428 100644
> --- a/src/vm_arm64.dasc
> +++ b/src/vm_arm64.dasc
> @@ -332,7 +332,7 @@ static void build_subroutines(BuildCtx *ctx)
> |
> | str RB, L->base
> | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1.
> - | mv_vmstate TMP0w, C
> + | mv_vmstate TMP0w, CFUNC
> | sub BASE, BASE, #16
> | subs TMP2, RC, #8
> | st_vmstate TMP0w
> @@ -391,7 +391,7 @@ static void build_subroutines(BuildCtx *ctx)
> | mov CRET1, CARG2
> |->vm_unwind_c_eh: // Landing pad for external unwinder.
> | ldr L, SAVE_L
> - | mv_vmstate TMP0w, C
> + | mv_vmstate TMP0w, CFUNC
> | ldr GL, L->glref
> | st_vmstate TMP0w
> | b ->vm_leave_unw
> @@ -3816,7 +3816,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> if (op == BC_FUNCCW) {
> | ldr CARG2, CFUNC:CARG3->f
> }
> - | mv_vmstate TMP0w, C
> + | mv_vmstate TMP0w, CFUNC
> | mov CARG1, L
> | bhi ->vm_growstack_c // Need to grow stack.
> | st_vmstate TMP0w
> diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
> index 1afd611..ec57d78 100644
> --- a/src/vm_mips.dasc
> +++ b/src/vm_mips.dasc
> @@ -403,7 +403,7 @@ static void build_subroutines(BuildCtx *ctx)
> |
> | addiu TMP1, RD, -8
> | sw TMP2, L->base
> - | li_vmstate C
> + | li_vmstate CFUNC
> | lw TMP2, SAVE_NRES
> | addiu BASE, BASE, -8
> | st_vmstate
> @@ -473,7 +473,7 @@ static void build_subroutines(BuildCtx *ctx)
> | move CRET1, CARG2
> |->vm_unwind_c_eh: // Landing pad for external unwinder.
> | lw L, SAVE_L
> - | li TMP0, ~LJ_VMST_C
> + | li TMP0, ~LJ_VMST_CFUNC
> | lw GL:TMP1, L->glref
> | b ->vm_leave_unw
> |. sw TMP0, GL:TMP1->vmstate
> @@ -5085,7 +5085,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | sw BASE, L->base
> | sltu AT, TMP2, TMP1
> | sw RC, L->top
> - | li_vmstate C
> + | li_vmstate CFUNC
> if (op == BC_FUNCCW) {
> | lw CARG2, CFUNC:RB->f
> }
> diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
> index c06270a..9a749f9 100644
> --- a/src/vm_mips64.dasc
> +++ b/src/vm_mips64.dasc
> @@ -449,7 +449,7 @@ static void build_subroutines(BuildCtx *ctx)
> |
> | addiu TMP1, RD, -8
> | sd TMP2, L->base
> - | li_vmstate C
> + | li_vmstate CFUNC
> | lw TMP2, SAVE_NRES
> | daddiu BASE, BASE, -16
> | st_vmstate
> @@ -517,7 +517,7 @@ static void build_subroutines(BuildCtx *ctx)
> | move CRET1, CARG2
> |->vm_unwind_c_eh: // Landing pad for external unwinder.
> | ld L, SAVE_L
> - | li TMP0, ~LJ_VMST_C
> + | li TMP0, ~LJ_VMST_CFUNC
> | ld GL:TMP1, L->glref
> | b ->vm_leave_unw
> |. sw TMP0, GL:TMP1->vmstate
> @@ -4952,7 +4952,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | sd BASE, L->base
> | sltu AT, TMP2, TMP1
> | sd RC, L->top
> - | li_vmstate C
> + | li_vmstate CFUNC
> if (op == BC_FUNCCW) {
> | ld CARG2, CFUNC:RB->f
> }
> diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
> index b4260eb..62e9b68 100644
> --- a/src/vm_ppc.dasc
> +++ b/src/vm_ppc.dasc
> @@ -520,7 +520,7 @@ static void build_subroutines(BuildCtx *ctx)
> | // TMP0 = PC & FRAME_TYPE
> | cmpwi TMP0, FRAME_C
> | rlwinm TMP2, PC, 0, 0, 28
> - | li_vmstate C
> + | li_vmstate CFUNC
> | sub TMP2, BASE, TMP2 // TMP2 = previous base.
> | bney ->vm_returnp
> |
> @@ -596,7 +596,7 @@ static void build_subroutines(BuildCtx *ctx)
> |->vm_unwind_c_eh: // Landing pad for external unwinder.
> | lwz L, SAVE_L
> | .toc ld TOCREG, SAVE_TOC
> - | li TMP0, ~LJ_VMST_C
> + | li TMP0, ~LJ_VMST_CFUNC
> | lwz GL:TMP1, L->glref
> | stw TMP0, GL:TMP1->vmstate
> | b ->vm_leave_unw
> @@ -5060,7 +5060,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | stp BASE, L->base
> | cmplw TMP1, TMP2
> | stp RC, L->top
> - | li_vmstate C
> + | li_vmstate CFUNC
> |.if TOC
> | mtctr TMP3
> |.else
> diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
> index 80753e0..83cc3e1 100644
> --- a/src/vm_x64.dasc
> +++ b/src/vm_x64.dasc
> @@ -140,7 +140,7 @@
> |//-----------------------------------------------------------------------
> |.else // x64/POSIX stack layout
> |
> -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
> +|.define CFRAME_SPACE, qword*7 // Delta for rsp (see <--).
> |.macro saveregs_
> | push rbx; push r15; push r14
> |.if NO_UNWIND
> @@ -161,26 +161,29 @@
> |
> |//----- 16 byte aligned,
> |.if NO_UNWIND
> -|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
> -|.define SAVE_R4, aword [rsp+aword*10]
> -|.define SAVE_R3, aword [rsp+aword*9]
> -|.define SAVE_R2, aword [rsp+aword*8]
> -|.define SAVE_R1, aword [rsp+aword*7]
> -|.define SAVE_RU2, aword [rsp+aword*6]
> -|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
> +|.define SAVE_RET, qword [rsp+qword*13] //<-- rsp entering interpreter.
> +|.define SAVE_R4, qword [rsp+qword*12]
> +|.define SAVE_R3, qword [rsp+qword*11]
> +|.define SAVE_R2, qword [rsp+qword*10]
> +|.define SAVE_R1, qword [rsp+qword*9]
> +|.define SAVE_RU2, qword [rsp+qword*8]
> +|.define SAVE_RU1, qword [rsp+qword*7] //<-- rsp after register saves.
> |.else
> -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
> -|.define SAVE_R4, aword [rsp+aword*8]
> -|.define SAVE_R3, aword [rsp+aword*7]
> -|.define SAVE_R2, aword [rsp+aword*6]
> -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
> +|.define SAVE_RET, qword [rsp+qword*11] //<-- rsp entering interpreter.
> +|.define SAVE_R4, qword [rsp+qword*10]
> +|.define SAVE_R3, qword [rsp+qword*9]
> +|.define SAVE_R2, qword [rsp+qword*8]
> +|.define SAVE_R1, qword [rsp+qword*7] //<-- rsp after register saves.
> |.endif
> -|.define SAVE_CFRAME, aword [rsp+aword*4]
> -|.define SAVE_PC, aword [rsp+aword*3]
> -|.define SAVE_L, aword [rsp+aword*2]
> +|.define SAVE_CFRAME, qword [rsp+qword*6]
> +|.define UNUSED2, qword [rsp+qword*5]
> +|.define UNUSED1, dword [rsp+dword*8]
> +|.define SAVE_VMSTATE, dword [rsp+dword*8]
> +|.define SAVE_PC, qword [rsp+qword*3]
> +|.define SAVE_L, qword [rsp+qword*2]
> |.define SAVE_ERRF, dword [rsp+dword*3]
> |.define SAVE_NRES, dword [rsp+dword*2]
> -|.define TMP1, aword [rsp] //<-- rsp while in interpreter.
> +|.define TMP1, qword [rsp] //<-- rsp while in interpreter.
> |//----- 16 byte aligned
> |
> |.define TMP1d, dword [rsp]
> @@ -342,6 +345,22 @@
> | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
> |.endmacro
> |
> +|// Save vmstate through register.
> +|.macro save_vmstate_through, reg
> +|.if not WIN
> +| mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
> +| mov SAVE_VMSTATE, reg
> +|.endif // WIN
> +|.endmacro
> +|
> +|// Restore vmstate through register.
> +|.macro restore_vmstate_through, reg
> +|.if not WIN
> +| mov reg, SAVE_VMSTATE
> +| mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg
> +|.endif // WIN
> +|.endmacro
> +|
> |.macro fpop1; fstp st1; .endmacro
> |
> |// Synthesize SSE FP constants.
> @@ -416,7 +435,7 @@ static void build_subroutines(BuildCtx *ctx)
> | jnz ->vm_returnp
> |
> | // Return to C.
> - | set_vmstate C
> + | set_vmstate CFUNC
> | and PC, -8
> | sub PC, BASE
> | neg PC // Previous base = BASE - delta.
> @@ -448,6 +467,8 @@ static void build_subroutines(BuildCtx *ctx)
> | xor eax, eax // Ok return status for vm_pcall.
> |
> |->vm_leave_unw:
> + | // DISPATCH required to set properly.
> + | restore_vmstate_through RAd
> | restoreregs
> | ret
> |
> @@ -493,7 +514,9 @@ static void build_subroutines(BuildCtx *ctx)
> | mov L:DISPATCH, SAVE_L
> | mov GL:RB, L:DISPATCH->glref
> | mov GL:RB->cur_L, L:DISPATCH
> - | mov dword GL:RB->vmstate, ~LJ_VMST_C
> + | mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC
> + | mov DISPATCH, L:DISPATCH->glref // Setup pointer to dispatch table.
> + | add DISPATCH, GG_G2DISP
> | jmp ->vm_leave_unw
> |
> |->vm_unwind_rethrow:
> @@ -521,7 +544,7 @@ static void build_subroutines(BuildCtx *ctx)
> | mov [BASE-16], RA // Prepend false to error message.
> | mov [BASE-8], RB
> | mov RA, -16 // Results start at BASE+RA = BASE-16.
> - | set_vmstate INTERP
> + | set_vmstate INTERP // INTERP until jump to BC_RET* or return to C
> | jmp ->vm_returnc // Increments RD/MULTRES and returns.
> |
> |//-----------------------------------------------------------------------
> @@ -575,6 +598,7 @@ static void build_subroutines(BuildCtx *ctx)
> | lea KBASE, [esp+CFRAME_RESUME]
> | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
> | add DISPATCH, GG_G2DISP
> + | save_vmstate_through TMPRd
> | mov SAVE_PC, RD // Any value outside of bytecode is ok.
> | mov SAVE_CFRAME, RD
> | mov SAVE_NRES, RDd
> @@ -585,7 +609,7 @@ static void build_subroutines(BuildCtx *ctx)
> |
> | // Resume after yield (like a return).
> | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> - | set_vmstate INTERP
> + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> | mov byte L:RB->status, RDL
> | mov BASE, L:RB->base
> | mov RD, L:RB->top
> @@ -622,11 +646,12 @@ static void build_subroutines(BuildCtx *ctx)
> | mov SAVE_CFRAME, KBASE
> | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
> | add DISPATCH, GG_G2DISP
> + | save_vmstate_through RDd
> | mov L:RB->cframe, rsp
> |
> |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
> | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> - | set_vmstate INTERP
> + | set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*
> | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
> | add PC, RA
> | sub PC, BASE // PC = frame delta + frame type
> @@ -658,6 +683,7 @@ static void build_subroutines(BuildCtx *ctx)
> | mov SAVE_ERRF, 0 // No error function.
> | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
> | add DISPATCH, GG_G2DISP
> + | save_vmstate_through KBASEd
> | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
> |
> | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
> @@ -697,6 +723,7 @@ static void build_subroutines(BuildCtx *ctx)
> | cleartp LFUNC:KBASE
> | mov KBASE, LFUNC:KBASE->pc
> | mov KBASE, [KBASE+PC2PROTO(k)]
> + | set_vmstate LFUNC // LFUNC after KBASE restoration
> | // BASE = base, RC = result, RB = meta base
> | jmp RA // Jump to continuation.
> |
> @@ -1137,15 +1164,16 @@ static void build_subroutines(BuildCtx *ctx)
> |
> |.macro .ffunc, name
> |->ff_ .. name:
> + | set_vmstate FFUNC
> |.endmacro
> |
> |.macro .ffunc_1, name
> - |->ff_ .. name:
> + | .ffunc name
> | cmp NARGS:RDd, 1+1; jb ->fff_fallback
> |.endmacro
> |
> |.macro .ffunc_2, name
> - |->ff_ .. name:
> + | .ffunc name
> | cmp NARGS:RDd, 2+1; jb ->fff_fallback
> |.endmacro
> |
> @@ -1578,7 +1606,7 @@ static void build_subroutines(BuildCtx *ctx)
> | mov L:PC, TMP1
> | mov BASE, L:RB->base
> | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> - | set_vmstate INTERP
> + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> |
> | cmp eax, LUA_YIELD
> | ja >8
> @@ -1717,6 +1745,7 @@ static void build_subroutines(BuildCtx *ctx)
> | movzx RAd, PC_RA
> | neg RA
> | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
> + | set_vmstate LFUNC // LFUNC state after BASE restoration
> | ins_next
> |
> |6: // Fill up results with nil.
> @@ -2481,7 +2510,7 @@ static void build_subroutines(BuildCtx *ctx)
> | mov KBASE, [KBASE+PC2PROTO(k)]
> | mov L:RB->base, BASE
> | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
> - | set_vmstate INTERP
> + | set_vmstate LFUNC // LFUNC after BASE & KBASE restoration
> | // Modified copy of ins_next which handles function header dispatch, too.
> | mov RCd, [PC]
> | movzx RAd, RCH
> @@ -2697,8 +2726,8 @@ static void build_subroutines(BuildCtx *ctx)
> | mov CARG1, CTSTATE
> | call extern lj_ccallback_enter // (CTState *cts, void *cf)
> | // lua_State * returned in eax (RD).
> - | set_vmstate INTERP
> | mov BASE, L:RD->base
> + | set_vmstate LFUNC // LFUNC after BASE restoration
> | mov RD, L:RD->top
> | sub RD, BASE
> | mov LFUNC:RB, [BASE-16]
> @@ -3974,6 +4003,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>
> case BC_CALL: case BC_CALLM:
> | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
> + | set_vmstate INTERP // INTERP until a new BASE is setup
> if (op == BC_CALLM) {
> | add NARGS:RDd, MULTRES
> }
> @@ -3995,6 +4025,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | mov LFUNC:RB, [RA-16]
> | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
> |->BC_CALLT_Z:
> + | set_vmstate INTERP // INTERP until a new BASE is setup
> | mov PC, [BASE-8]
> | test PCd, FRAME_TYPE
> | jnz >7
> @@ -4219,6 +4250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | shl RAd, 3
> }
> |1:
> + | set_vmstate INTERP // INTERP until the old BASE & KBASE is restored
> | mov PC, [BASE-8]
> | mov MULTRES, RDd // Save nresults+1.
> | test PCd, FRAME_TYPE // Check frame type marker.
> @@ -4260,6 +4292,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | cleartp LFUNC:KBASE
> | mov KBASE, LFUNC:KBASE->pc
> | mov KBASE, [KBASE+PC2PROTO(k)]
> + | set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored
> | ins_next
> |
> |6: // Fill up results with nil.
> @@ -4551,6 +4584,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
> | mov KBASE, [PC-4+PC2PROTO(k)]
> | mov L:RB, SAVE_L
> + | set_vmstate LFUNC // LFUNC after KBASE restoration
> | lea RA, [BASE+RA*8] // Top of frame.
> | cmp RA, L:RB->maxstack
> | ja ->vm_growstack_f
> @@ -4588,6 +4622,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | mov [RD-8], RB // Store delta + FRAME_VARG.
> | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
> | mov L:RB, SAVE_L
> + | set_vmstate LFUNC // LFUNC after KBASE restoration
> | lea RA, [RD+RA*8]
> | cmp RA, L:RB->maxstack
> | ja ->vm_growstack_v // Need to grow stack.
> @@ -4643,7 +4678,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | mov CARG1, L:RB // Caveat: CARG1 may be RA.
> }
> | ja ->vm_growstack_c // Need to grow stack.
> - | set_vmstate C
> + | set_vmstate CFUNC // CFUNC before entering C function
> if (op == BC_FUNCC) {
> | call KBASE // (lua_State *L)
> } else {
> @@ -4653,7 +4688,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | // nresults returned in eax (RD).
> | mov BASE, L:RB->base
> | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> - | set_vmstate INTERP
> + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> | lea RA, [BASE+RD*8]
> | neg RA
> | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
> diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
> index d76fbe3..b9dffa9 100644
> --- a/src/vm_x86.dasc
> +++ b/src/vm_x86.dasc
> @@ -140,7 +140,7 @@
> |
> |.else
> |
> -|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
> +|.define CFRAME_SPACE, dword*11 // Delta for esp (see <--).
> |.macro saveregs_
> | push edi; push esi; push ebx
> | sub esp, CFRAME_SPACE
> @@ -183,25 +183,30 @@
> |.define ARG1, aword [esp] //<-- esp while in interpreter.
> |//----- 16 byte aligned, ^^^ arguments for C callee
> |.else
> -|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
> -|.define SAVE_NRES, aword [esp+aword*14]
> -|.define SAVE_CFRAME, aword [esp+aword*13]
> -|.define SAVE_L, aword [esp+aword*12]
> +|.define SAVE_ERRF, dword [esp+dword*19] // vm_pcall/vm_cpcall only.
> +|.define SAVE_NRES, dword [esp+dword*18]
> +|.define SAVE_CFRAME, dword [esp+dword*17]
> +|.define SAVE_L, dword [esp+dword*16]
> |//----- 16 byte aligned, ^^^ arguments from C caller
> -|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
> -|.define SAVE_R4, aword [esp+aword*10]
> -|.define SAVE_R3, aword [esp+aword*9]
> -|.define SAVE_R2, aword [esp+aword*8]
> +|.define SAVE_RET, dword [esp+dword*15] //<-- esp entering interpreter.
> +|.define SAVE_R4, dword [esp+dword*14]
> +|.define SAVE_R3, dword [esp+dword*13]
> +|.define SAVE_R2, dword [esp+dword*12]
> |//----- 16 byte aligned
> -|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
> -|.define SAVE_PC, aword [esp+aword*6]
> -|.define TMP2, aword [esp+aword*5]
> -|.define TMP1, aword [esp+aword*4]
> +|.define UNUSED3, dword [esp+dword*11]
> +|.define UNUSED2, dword [esp+dword*10]
> +|.define UNUSED1, dword [esp+dword*9]
> +|.define SAVE_VMSTATE, dword [esp+dword*8]
> |//----- 16 byte aligned
> -|.define ARG4, aword [esp+aword*3]
> -|.define ARG3, aword [esp+aword*2]
> -|.define ARG2, aword [esp+aword*1]
> -|.define ARG1, aword [esp] //<-- esp while in interpreter.
> +|.define SAVE_R1, dword [esp+dword*7] //<-- esp after register saves.
> +|.define SAVE_PC, dword [esp+dword*6]
> +|.define TMP2, dword [esp+dword*5]
> +|.define TMP1, dword [esp+dword*4]
> +|//----- 16 byte aligned
> +|.define ARG4, dword [esp+dword*3]
> +|.define ARG3, dword [esp+dword*2]
> +|.define ARG2, dword [esp+dword*1]
> +|.define ARG1, dword [esp] //<-- esp while in interpreter.
> |//----- 16 byte aligned, ^^^ arguments for C callee
> |.endif
> |
> @@ -269,7 +274,7 @@
> |//-----------------------------------------------------------------------
> |.else // x64/POSIX stack layout
> |
> -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
> +|.define CFRAME_SPACE, qword*7 // Delta for rsp (see <--).
> |.macro saveregs_
> | push rbx; push r15; push r14
> |.if NO_UNWIND
> @@ -290,33 +295,35 @@
> |
> |//----- 16 byte aligned,
> |.if NO_UNWIND
> -|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
> -|.define SAVE_R4, aword [rsp+aword*10]
> -|.define SAVE_R3, aword [rsp+aword*9]
> -|.define SAVE_R2, aword [rsp+aword*8]
> -|.define SAVE_R1, aword [rsp+aword*7]
> -|.define SAVE_RU2, aword [rsp+aword*6]
> -|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
> +|.define SAVE_RET, qword [rsp+qword*13] //<-- rsp entering interpreter.
> +|.define SAVE_R4, qword [rsp+qword*12]
> +|.define SAVE_R3, qword [rsp+qword*11]
> +|.define SAVE_R2, qword [rsp+qword*10]
> +|.define SAVE_R1, qword [rsp+qword*9]
> +|.define SAVE_RU2, qword [rsp+qword*8]
> +|.define SAVE_RU1, qword [rsp+qword*7] //<-- rsp after register saves.
> |.else
> -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
> -|.define SAVE_R4, aword [rsp+aword*8]
> -|.define SAVE_R3, aword [rsp+aword*7]
> -|.define SAVE_R2, aword [rsp+aword*6]
> -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
> +|.define SAVE_RET, qword [rsp+qword*11] //<-- rsp entering interpreter.
> +|.define SAVE_R4, qword [rsp+qword*10]
> +|.define SAVE_R3, qword [rsp+qword*9]
> +|.define SAVE_R2, qword [rsp+qword*8]
> +|.define SAVE_R1, qword [rsp+qword*7] //<-- rsp after register saves.
> |.endif
> -|.define SAVE_CFRAME, aword [rsp+aword*4]
> +|.define SAVE_CFRAME, qword [rsp+qword*6]
> +|.define UNUSED1, qword [rsp+qword*5]
> +|.define SAVE_VMSTATE, dword [rsp+dword*8]
> |.define SAVE_PC, dword [rsp+dword*7]
> |.define SAVE_L, dword [rsp+dword*6]
> |.define SAVE_ERRF, dword [rsp+dword*5]
> |.define SAVE_NRES, dword [rsp+dword*4]
> -|.define TMPa, aword [rsp+aword*1]
> +|.define TMPa, qword [rsp+qword*1]
> |.define TMP2, dword [rsp+dword*1]
> |.define TMP1, dword [rsp] //<-- rsp while in interpreter.
> |//----- 16 byte aligned
> |
> |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
> |.define TMPQ, qword [rsp]
> -|.define TMP3, dword [rsp+aword*1]
> +|.define TMP3, dword [rsp+qword*1]
> |.define MULTRES, TMP2
> |
> |.endif
> @@ -433,6 +440,22 @@
> | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
> |.endmacro
> |
> +|// Save vmstate through register.
> +|.macro save_vmstate_through, reg
> +|.if not WIN
> +| mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
> +| mov SAVE_VMSTATE, reg
> +|.endif // WIN
> +|.endmacro
> +|
> +|// Restore vmstate through register.
> +|.macro restore_vmstate_through, reg
> +|.if not WIN
> +| mov reg, SAVE_VMSTATE
> +| mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg
> +|.endif // WIN
> +|.endmacro
> +|
> |// x87 compares.
> |.macro fcomparepp // Compare and pop st0 >< st1.
> | fucomip st1
> @@ -520,7 +543,7 @@ static void build_subroutines(BuildCtx *ctx)
> | jnz ->vm_returnp
> |
> | // Return to C.
> - | set_vmstate C
> + | set_vmstate CFUNC
> | and PC, -8
> | sub PC, BASE
> | neg PC // Previous base = BASE - delta.
> @@ -559,6 +582,8 @@ static void build_subroutines(BuildCtx *ctx)
> | xor eax, eax // Ok return status for vm_pcall.
> |
> |->vm_leave_unw:
> + | // DISPATCH required to set properly.
> + | restore_vmstate_through RA
> | restoreregs
> | ret
> |
> @@ -613,7 +638,9 @@ static void build_subroutines(BuildCtx *ctx)
> | mov L:DISPATCH, SAVE_L
> | mov GL:RB, L:DISPATCH->glref
> | mov dword GL:RB->cur_L, L:DISPATCH
> - | mov dword GL:RB->vmstate, ~LJ_VMST_C
> + | mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC
> + | mov DISPATCH, L:DISPATCH->glref // Setup pointer to dispatch table.
> + | add DISPATCH, GG_G2DISP
> | jmp ->vm_leave_unw
> |
> |->vm_unwind_rethrow:
> @@ -647,7 +674,7 @@ static void build_subroutines(BuildCtx *ctx)
> | mov PC, [BASE-4] // Fetch PC of previous frame.
> | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
> | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> - | set_vmstate INTERP
> + | set_vmstate INTERP // INTERP until jump to BC_RET* or return to C
> | jmp ->vm_returnc // Increments RD/MULTRES and returns.
> |
> |.if WIN and not X64
> @@ -714,10 +741,11 @@ static void build_subroutines(BuildCtx *ctx)
> | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
> |.endif
> | mov PC, FRAME_CP
> - | xor RD, RD
> | lea KBASEa, [esp+CFRAME_RESUME]
> | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
> | add DISPATCH, GG_G2DISP
> + | save_vmstate_through RD
> + | xor RD, RD
> | mov SAVE_PC, RD // Any value outside of bytecode is ok.
> | mov SAVE_CFRAME, RDa
> |.if X64
> @@ -730,7 +758,7 @@ static void build_subroutines(BuildCtx *ctx)
> |
> | // Resume after yield (like a return).
> | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> - | set_vmstate INTERP
> + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> | mov byte L:RB->status, RDL
> | mov BASE, L:RB->base
> | mov RD, L:RB->top
> @@ -774,6 +802,7 @@ static void build_subroutines(BuildCtx *ctx)
> | mov SAVE_CFRAME, KBASEa
> | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
> | add DISPATCH, GG_G2DISP
> + | save_vmstate_through RD
> |.if X64
> | mov L:RB->cframe, rsp
> |.else
> @@ -782,7 +811,7 @@ static void build_subroutines(BuildCtx *ctx)
> |
> |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
> | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> - | set_vmstate INTERP
> + | set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*
> | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
> | add PC, RA
> | sub PC, BASE // PC = frame delta + frame type
> @@ -823,6 +852,7 @@ static void build_subroutines(BuildCtx *ctx)
> | mov SAVE_ERRF, 0 // No error function.
> | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
> | add DISPATCH, GG_G2DISP
> + | save_vmstate_through KBASE
> | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
> |
> |.if X64
> @@ -885,6 +915,7 @@ static void build_subroutines(BuildCtx *ctx)
> | mov KBASE, LFUNC:KBASE->pc
> | mov KBASE, [KBASE+PC2PROTO(k)]
> | // BASE = base, RC = result, RB = meta base
> + | set_vmstate LFUNC // LFUNC after KBASE restoration
> | jmp RAa // Jump to continuation.
> |
> |.if FFI
> @@ -1409,15 +1440,16 @@ static void build_subroutines(BuildCtx *ctx)
> |
> |.macro .ffunc, name
> |->ff_ .. name:
> + | set_vmstate FFUNC
> |.endmacro
> |
> |.macro .ffunc_1, name
> - |->ff_ .. name:
> + | .ffunc name
> | cmp NARGS:RD, 1+1; jb ->fff_fallback
> |.endmacro
> |
> |.macro .ffunc_2, name
> - |->ff_ .. name:
> + | .ffunc name
> | cmp NARGS:RD, 2+1; jb ->fff_fallback
> |.endmacro
> |
> @@ -1924,7 +1956,7 @@ static void build_subroutines(BuildCtx *ctx)
> |.endif
> | mov BASE, L:RB->base
> | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> - | set_vmstate INTERP
> + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> |
> | cmp eax, LUA_YIELD
> | ja >8
> @@ -2089,6 +2121,7 @@ static void build_subroutines(BuildCtx *ctx)
> | movzx RA, PC_RA
> | not RAa // Note: ~RA = -(RA+1)
> | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
> + | set_vmstate LFUNC // LFUNC state after BASE restoration
> | ins_next
> |
> |6: // Fill up results with nil.
> @@ -2933,7 +2966,7 @@ static void build_subroutines(BuildCtx *ctx)
> | mov KBASE, [KBASE+PC2PROTO(k)]
> | mov L:RB->base, BASE
> | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
> - | set_vmstate INTERP
> + | set_vmstate LFUNC // LFUNC after BASE & KBASE restoration
> | // Modified copy of ins_next which handles function header dispatch, too.
> | mov RC, [PC]
> | movzx RA, RCH
> @@ -3203,8 +3236,8 @@ static void build_subroutines(BuildCtx *ctx)
> | mov FCARG1, CTSTATE
> | call extern lj_ccallback_enter at 8 // (CTState *cts, void *cf)
> | // lua_State * returned in eax (RD).
> - | set_vmstate INTERP
> | mov BASE, L:RD->base
> + | set_vmstate LFUNC // LFUNC after BASE restoration
> | mov RD, L:RD->top
> | sub RD, BASE
> | mov LFUNC:RB, [BASE-8]
> @@ -4683,6 +4716,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>
> case BC_CALL: case BC_CALLM:
> | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
> + | set_vmstate INTERP // INTERP until a new BASE is setup
> if (op == BC_CALLM) {
> | add NARGS:RD, MULTRES
> }
> @@ -4706,6 +4740,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | cmp dword [RA-4], LJ_TFUNC
> | jne ->vmeta_call
> |->BC_CALLT_Z:
> + | set_vmstate INTERP // INTERP until a new BASE is setup
> | mov PC, [BASE-4]
> | test PC, FRAME_TYPE
> | jnz >7
> @@ -4989,6 +5024,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | shl RA, 3
> }
> |1:
> + | set_vmstate INTERP // INTERP until the old BASE & KBASE is restored
> | mov PC, [BASE-4]
> | mov MULTRES, RD // Save nresults+1.
> | test PC, FRAME_TYPE // Check frame type marker.
> @@ -5043,6 +5079,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | mov LFUNC:KBASE, [BASE-8]
> | mov KBASE, LFUNC:KBASE->pc
> | mov KBASE, [KBASE+PC2PROTO(k)]
> + | set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored
> | ins_next
> |
> |6: // Fill up results with nil.
> @@ -5330,6 +5367,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
> | mov KBASE, [PC-4+PC2PROTO(k)]
> | mov L:RB, SAVE_L
> + | set_vmstate LFUNC // LFUNC after KBASE restoration
> | lea RA, [BASE+RA*8] // Top of frame.
> | cmp RA, L:RB->maxstack
> | ja ->vm_growstack_f
> @@ -5367,6 +5405,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | mov [RD-4], RB // Store delta + FRAME_VARG.
> | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC.
> | mov L:RB, SAVE_L
> + | set_vmstate LFUNC // LFUNC after KBASE restoration
> | lea RA, [RD+RA*8]
> | cmp RA, L:RB->maxstack
> | ja ->vm_growstack_v // Need to grow stack.
> @@ -5431,7 +5470,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> |.endif
> }
> | ja ->vm_growstack_c // Need to grow stack.
> - | set_vmstate C
> + | set_vmstate CFUNC // CFUNC before entering C function
> if (op == BC_FUNCC) {
> | call KBASEa // (lua_State *L)
> } else {
> @@ -5441,7 +5480,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> | // nresults returned in eax (RD).
> | mov BASE, L:RB->base
> | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> - | set_vmstate INTERP
> + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> | lea RA, [BASE+RD*8]
> | neg RA
> | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
> --
> 2.28.0
>
More information about the Tarantool-patches
mailing list