From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp54.i.mail.ru (smtp54.i.mail.ru [217.69.128.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 32BE945C305 for ; Wed, 16 Dec 2020 22:14:38 +0300 (MSK) From: Sergey Kaplun Date: Wed, 16 Dec 2020 22:13:40 +0300 Message-Id: <333d99a8e4406e8c03cba132f9b50435f6d643bd.1608142899.git.skaplun@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH luajit v1 05/11] vm: introduce LFUNC and FFUNC vmstates List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Igor Munkin , Sergey Ostanevich Cc: tarantool-patches@dev.tarantool.org This patch slivers LJ_VMST_LFUNC and LJ_VMST_FFUNC from LJ_VMST_INERP to allow to determine the context of vm execution for x86/x64 arches. Also, LJ_VMST_C is renamed to LJ_VMST_CFUNC for naming consistence with newer vmstates. Also, this patch adjusts stack layout for x86/x64 arches to save vmstate to avoid inconsistent state while stack unwinding when an error is raised. Part of tarantool/tarantool#5442 --- src/lj_frame.h | 18 +++--- src/lj_obj.h | 4 +- src/lj_profile.c | 5 +- src/luajit-gdb.py | 14 +++-- src/vm_arm.dasc | 6 +- src/vm_arm64.dasc | 6 +- src/vm_mips.dasc | 6 +- src/vm_mips64.dasc | 6 +- src/vm_ppc.dasc | 6 +- src/vm_x64.dasc | 99 ++++++++++++++++++++++---------- src/vm_x86.dasc | 137 ++++++++++++++++++++++++++++++--------------- 11 files changed, 200 insertions(+), 107 deletions(-) diff --git a/src/lj_frame.h b/src/lj_frame.h index 19c49a4..2e693f9 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -127,13 +127,13 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #define CFRAME_SIZE (16*4) #define CFRAME_SHIFT_MULTRES 0 #else -#define CFRAME_OFS_ERRF (15*4) -#define CFRAME_OFS_NRES (14*4) -#define CFRAME_OFS_PREV (13*4) -#define CFRAME_OFS_L (12*4) +#define CFRAME_OFS_ERRF (19*4) +#define CFRAME_OFS_NRES (18*4) +#define CFRAME_OFS_PREV (17*4) +#define CFRAME_OFS_L (16*4) #define CFRAME_OFS_PC (6*4) #define CFRAME_OFS_MULTRES (5*4) -#define CFRAME_SIZE (12*4) +#define CFRAME_SIZE (16*4) #define CFRAME_SHIFT_MULTRES 0 #endif #elif LJ_TARGET_X64 @@ -152,11 +152,11 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #define CFRAME_OFS_NRES (22*4) #define CFRAME_OFS_MULTRES (21*4) #endif -#define CFRAME_SIZE (10*8) +#define CFRAME_SIZE (12*8) #define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) #define CFRAME_SHIFT_MULTRES 0 #else -#define CFRAME_OFS_PREV (4*8) +#define CFRAME_OFS_PREV (6*8) #if LJ_GC64 #define CFRAME_OFS_PC (3*8) #define CFRAME_OFS_L (2*8) @@ -171,9 +171,9 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #define CFRAME_OFS_MULTRES (1*4) #endif #if LJ_NO_UNWIND -#define CFRAME_SIZE (12*8) +#define CFRAME_SIZE (14*8) #else -#define CFRAME_SIZE (10*8) +#define CFRAME_SIZE (12*8) #endif #define CFRAME_SIZE_JIT (CFRAME_SIZE + 16) #define CFRAME_SHIFT_MULTRES 0 diff --git a/src/lj_obj.h b/src/lj_obj.h index 927b347..7fb715e 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -512,7 +512,9 @@ typedef struct GCtab { /* VM states. */ enum { LJ_VMST_INTERP, /* Interpreter. */ - LJ_VMST_C, /* C function. */ + LJ_VMST_LFUNC, /* Lua function. */ + LJ_VMST_FFUNC, /* Fast function. */ + LJ_VMST_CFUNC, /* C function. */ LJ_VMST_GC, /* Garbage collector. */ LJ_VMST_EXIT, /* Trace exit handler. */ LJ_VMST_RECORD, /* Trace recorder. */ diff --git a/src/lj_profile.c b/src/lj_profile.c index 116998e..637e03c 100644 --- a/src/lj_profile.c +++ b/src/lj_profile.c @@ -157,7 +157,10 @@ static void profile_trigger(ProfileState *ps) int st = g->vmstate; ps->vmstate = st >= 0 ? 'N' : st == ~LJ_VMST_INTERP ? 'I' : - st == ~LJ_VMST_C ? 'C' : + st == ~LJ_VMST_CFUNC ? 'C' : + /* Stubs for profiler hooks. */ + st == ~LJ_VMST_FFUNC ? 'I' : + st == ~LJ_VMST_LFUNC ? 'I' : st == ~LJ_VMST_GC ? 'G' : 'J'; g->hookmask = (mask | HOOK_PROFILE); lj_dispatch_update(g); diff --git a/src/luajit-gdb.py b/src/luajit-gdb.py index 652c560..f1fd623 100644 --- a/src/luajit-gdb.py +++ b/src/luajit-gdb.py @@ -206,12 +206,14 @@ def J(g): def vm_state(g): return { i2notu32(0): 'INTERP', - i2notu32(1): 'C', - i2notu32(2): 'GC', - i2notu32(3): 'EXIT', - i2notu32(4): 'RECORD', - i2notu32(5): 'OPT', - i2notu32(6): 'ASM', + i2notu32(1): 'LFUNC', + i2notu32(2): 'FFUNC', + i2notu32(3): 'CFUNC', + i2notu32(4): 'GC', + i2notu32(5): 'EXIT', + i2notu32(6): 'RECORD', + i2notu32(7): 'OPT', + i2notu32(8): 'ASM', }.get(int(tou32(g['vmstate'])), 'TRACE') def gc_state(g): diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index d4cdaf5..ae2efdf 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -287,7 +287,7 @@ static void build_subroutines(BuildCtx *ctx) | | str RB, L->base | ldr KBASE, SAVE_NRES - | mv_vmstate CARG4, C + | mv_vmstate CARG4, CFUNC | sub BASE, BASE, #8 | subs CARG3, RC, #8 | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8 @@ -348,7 +348,7 @@ static void build_subroutines(BuildCtx *ctx) | mov CRET1, CARG2 |->vm_unwind_c_eh: // Landing pad for external unwinder. | ldr L, SAVE_L - | mv_vmstate CARG4, C + | mv_vmstate CARG4, CFUNC | ldr GL:CARG3, L->glref | str CARG4, GL:CARG3->vmstate | str L, GL:CARG3->cur_L @@ -4487,7 +4487,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (op == BC_FUNCCW) { | ldr CARG2, CFUNC:CARG3->f } - | mv_vmstate CARG3, C + | mv_vmstate CARG3, CFUNC | mov CARG1, L | bhi ->vm_growstack_c // Need to grow stack. | st_vmstate CARG3 diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 3eaf376..f783428 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -332,7 +332,7 @@ static void build_subroutines(BuildCtx *ctx) | | str RB, L->base | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1. - | mv_vmstate TMP0w, C + | mv_vmstate TMP0w, CFUNC | sub BASE, BASE, #16 | subs TMP2, RC, #8 | st_vmstate TMP0w @@ -391,7 +391,7 @@ static void build_subroutines(BuildCtx *ctx) | mov CRET1, CARG2 |->vm_unwind_c_eh: // Landing pad for external unwinder. | ldr L, SAVE_L - | mv_vmstate TMP0w, C + | mv_vmstate TMP0w, CFUNC | ldr GL, L->glref | st_vmstate TMP0w | b ->vm_leave_unw @@ -3816,7 +3816,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (op == BC_FUNCCW) { | ldr CARG2, CFUNC:CARG3->f } - | mv_vmstate TMP0w, C + | mv_vmstate TMP0w, CFUNC | mov CARG1, L | bhi ->vm_growstack_c // Need to grow stack. | st_vmstate TMP0w diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 1afd611..ec57d78 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -403,7 +403,7 @@ static void build_subroutines(BuildCtx *ctx) | | addiu TMP1, RD, -8 | sw TMP2, L->base - | li_vmstate C + | li_vmstate CFUNC | lw TMP2, SAVE_NRES | addiu BASE, BASE, -8 | st_vmstate @@ -473,7 +473,7 @@ static void build_subroutines(BuildCtx *ctx) | move CRET1, CARG2 |->vm_unwind_c_eh: // Landing pad for external unwinder. | lw L, SAVE_L - | li TMP0, ~LJ_VMST_C + | li TMP0, ~LJ_VMST_CFUNC | lw GL:TMP1, L->glref | b ->vm_leave_unw |. sw TMP0, GL:TMP1->vmstate @@ -5085,7 +5085,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sw BASE, L->base | sltu AT, TMP2, TMP1 | sw RC, L->top - | li_vmstate C + | li_vmstate CFUNC if (op == BC_FUNCCW) { | lw CARG2, CFUNC:RB->f } diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index c06270a..9a749f9 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -449,7 +449,7 @@ static void build_subroutines(BuildCtx *ctx) | | addiu TMP1, RD, -8 | sd TMP2, L->base - | li_vmstate C + | li_vmstate CFUNC | lw TMP2, SAVE_NRES | daddiu BASE, BASE, -16 | st_vmstate @@ -517,7 +517,7 @@ static void build_subroutines(BuildCtx *ctx) | move CRET1, CARG2 |->vm_unwind_c_eh: // Landing pad for external unwinder. | ld L, SAVE_L - | li TMP0, ~LJ_VMST_C + | li TMP0, ~LJ_VMST_CFUNC | ld GL:TMP1, L->glref | b ->vm_leave_unw |. sw TMP0, GL:TMP1->vmstate @@ -4952,7 +4952,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sd BASE, L->base | sltu AT, TMP2, TMP1 | sd RC, L->top - | li_vmstate C + | li_vmstate CFUNC if (op == BC_FUNCCW) { | ld CARG2, CFUNC:RB->f } diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index b4260eb..62e9b68 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -520,7 +520,7 @@ static void build_subroutines(BuildCtx *ctx) | // TMP0 = PC & FRAME_TYPE | cmpwi TMP0, FRAME_C | rlwinm TMP2, PC, 0, 0, 28 - | li_vmstate C + | li_vmstate CFUNC | sub TMP2, BASE, TMP2 // TMP2 = previous base. | bney ->vm_returnp | @@ -596,7 +596,7 @@ static void build_subroutines(BuildCtx *ctx) |->vm_unwind_c_eh: // Landing pad for external unwinder. | lwz L, SAVE_L | .toc ld TOCREG, SAVE_TOC - | li TMP0, ~LJ_VMST_C + | li TMP0, ~LJ_VMST_CFUNC | lwz GL:TMP1, L->glref | stw TMP0, GL:TMP1->vmstate | b ->vm_leave_unw @@ -5060,7 +5060,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | stp BASE, L->base | cmplw TMP1, TMP2 | stp RC, L->top - | li_vmstate C + | li_vmstate CFUNC |.if TOC | mtctr TMP3 |.else diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 80753e0..d4d3a1d 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -140,7 +140,7 @@ |//----------------------------------------------------------------------- |.else // x64/POSIX stack layout | -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). +|.define CFRAME_SPACE, qword*7 // Delta for rsp (see <--). |.macro saveregs_ | push rbx; push r15; push r14 |.if NO_UNWIND @@ -161,26 +161,29 @@ | |//----- 16 byte aligned, |.if NO_UNWIND -|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*10] -|.define SAVE_R3, aword [rsp+aword*9] -|.define SAVE_R2, aword [rsp+aword*8] -|.define SAVE_R1, aword [rsp+aword*7] -|.define SAVE_RU2, aword [rsp+aword*6] -|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. +|.define SAVE_RET, qword [rsp+qword*13] //<-- rsp entering interpreter. +|.define SAVE_R4, qword [rsp+qword*12] +|.define SAVE_R3, qword [rsp+qword*11] +|.define SAVE_R2, qword [rsp+qword*10] +|.define SAVE_R1, qword [rsp+qword*9] +|.define SAVE_RU2, qword [rsp+qword*8] +|.define SAVE_RU1, qword [rsp+qword*7] //<-- rsp after register saves. |.else -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*8] -|.define SAVE_R3, aword [rsp+aword*7] -|.define SAVE_R2, aword [rsp+aword*6] -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. +|.define SAVE_RET, qword [rsp+qword*11] //<-- rsp entering interpreter. +|.define SAVE_R4, qword [rsp+qword*10] +|.define SAVE_R3, qword [rsp+qword*9] +|.define SAVE_R2, qword [rsp+qword*8] +|.define SAVE_R1, qword [rsp+qword*7] //<-- rsp after register saves. |.endif -|.define SAVE_CFRAME, aword [rsp+aword*4] -|.define SAVE_PC, aword [rsp+aword*3] -|.define SAVE_L, aword [rsp+aword*2] +|.define SAVE_CFRAME, qword [rsp+qword*6] +|.define SAVE_UNUSED2, qword [rsp+qword*5] +|.define SAVE_UNUSED1, dword [rsp+dword*8] +|.define SAVE_VMSTATE, dword [rsp+dword*8] +|.define SAVE_PC, qword [rsp+qword*3] +|.define SAVE_L, qword [rsp+qword*2] |.define SAVE_ERRF, dword [rsp+dword*3] |.define SAVE_NRES, dword [rsp+dword*2] -|.define TMP1, aword [rsp] //<-- rsp while in interpreter. +|.define TMP1, qword [rsp] //<-- rsp while in interpreter. |//----- 16 byte aligned | |.define TMP1d, dword [rsp] @@ -342,6 +345,20 @@ | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st |.endmacro | +|.if not WIN +|// Save vmstate through register. +|.macro save_vmstate_through, reg +| mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)] +| mov SAVE_VMSTATE, reg +|.endmacro +| +|// Restore vmstate through register. +|.macro restore_vmstate_through, reg +| mov reg, SAVE_VMSTATE +| mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg +|.endmacro +|.endif // WIN +| |.macro fpop1; fstp st1; .endmacro | |// Synthesize SSE FP constants. @@ -416,7 +433,7 @@ static void build_subroutines(BuildCtx *ctx) | jnz ->vm_returnp | | // Return to C. - | set_vmstate C + | set_vmstate CFUNC | and PC, -8 | sub PC, BASE | neg PC // Previous base = BASE - delta. @@ -448,6 +465,10 @@ static void build_subroutines(BuildCtx *ctx) | xor eax, eax // Ok return status for vm_pcall. | |->vm_leave_unw: + |.if not WIN + | // DISPATCH required to set properly. + | restore_vmstate_through RAd + |.endif | restoreregs | ret | @@ -493,7 +514,9 @@ static void build_subroutines(BuildCtx *ctx) | mov L:DISPATCH, SAVE_L | mov GL:RB, L:DISPATCH->glref | mov GL:RB->cur_L, L:DISPATCH - | mov dword GL:RB->vmstate, ~LJ_VMST_C + | mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC + | mov DISPATCH, L:DISPATCH->glref // Setup pointer to dispatch table. + | add DISPATCH, GG_G2DISP | jmp ->vm_leave_unw | |->vm_unwind_rethrow: @@ -521,7 +544,7 @@ static void build_subroutines(BuildCtx *ctx) | mov [BASE-16], RA // Prepend false to error message. | mov [BASE-8], RB | mov RA, -16 // Results start at BASE+RA = BASE-16. - | set_vmstate INTERP + | set_vmstate INTERP // INTERP until jump to BC_RET* or return to C | jmp ->vm_returnc // Increments RD/MULTRES and returns. | |//----------------------------------------------------------------------- @@ -575,6 +598,9 @@ static void build_subroutines(BuildCtx *ctx) | lea KBASE, [esp+CFRAME_RESUME] | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | add DISPATCH, GG_G2DISP + |.if not WIN + | save_vmstate_through TMPRd + |.endif | mov SAVE_PC, RD // Any value outside of bytecode is ok. | mov SAVE_CFRAME, RD | mov SAVE_NRES, RDd @@ -585,7 +611,7 @@ static void build_subroutines(BuildCtx *ctx) | | // Resume after yield (like a return). | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return | mov byte L:RB->status, RDL | mov BASE, L:RB->base | mov RD, L:RB->top @@ -622,11 +648,14 @@ static void build_subroutines(BuildCtx *ctx) | mov SAVE_CFRAME, KBASE | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | add DISPATCH, GG_G2DISP + |.if not WIN + | save_vmstate_through RDd + |.endif | mov L:RB->cframe, rsp | |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP + | set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC* | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). | add PC, RA | sub PC, BASE // PC = frame delta + frame type @@ -658,6 +687,9 @@ static void build_subroutines(BuildCtx *ctx) | mov SAVE_ERRF, 0 // No error function. | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame. | add DISPATCH, GG_G2DISP + |.if not WIN + | save_vmstate_through KBASEd + |.endif | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. @@ -697,6 +729,7 @@ static void build_subroutines(BuildCtx *ctx) | cleartp LFUNC:KBASE | mov KBASE, LFUNC:KBASE->pc | mov KBASE, [KBASE+PC2PROTO(k)] + | set_vmstate LFUNC // LFUNC after KBASE restoration | // BASE = base, RC = result, RB = meta base | jmp RA // Jump to continuation. | @@ -1137,15 +1170,16 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc, name |->ff_ .. name: + | set_vmstate FFUNC |.endmacro | |.macro .ffunc_1, name - |->ff_ .. name: + | .ffunc name | cmp NARGS:RDd, 1+1; jb ->fff_fallback |.endmacro | |.macro .ffunc_2, name - |->ff_ .. name: + | .ffunc name | cmp NARGS:RDd, 2+1; jb ->fff_fallback |.endmacro | @@ -1578,7 +1612,7 @@ static void build_subroutines(BuildCtx *ctx) | mov L:PC, TMP1 | mov BASE, L:RB->base | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return | | cmp eax, LUA_YIELD | ja >8 @@ -1717,6 +1751,7 @@ static void build_subroutines(BuildCtx *ctx) | movzx RAd, PC_RA | neg RA | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 + | set_vmstate LFUNC // LFUNC state after BASE restoration | ins_next | |6: // Fill up results with nil. @@ -2481,7 +2516,7 @@ static void build_subroutines(BuildCtx *ctx) | mov KBASE, [KBASE+PC2PROTO(k)] | mov L:RB->base, BASE | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 - | set_vmstate INTERP + | set_vmstate LFUNC // LFUNC after BASE & KBASE restoration | // Modified copy of ins_next which handles function header dispatch, too. | mov RCd, [PC] | movzx RAd, RCH @@ -2697,8 +2732,8 @@ static void build_subroutines(BuildCtx *ctx) | mov CARG1, CTSTATE | call extern lj_ccallback_enter // (CTState *cts, void *cf) | // lua_State * returned in eax (RD). - | set_vmstate INTERP | mov BASE, L:RD->base + | set_vmstate LFUNC // LFUNC after BASE restoration | mov RD, L:RD->top | sub RD, BASE | mov LFUNC:RB, [BASE-16] @@ -3974,6 +4009,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_CALL: case BC_CALLM: | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs + | set_vmstate INTERP // INTERP until a new BASE is setup if (op == BC_CALLM) { | add NARGS:RDd, MULTRES } @@ -3995,6 +4031,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov LFUNC:RB, [RA-16] | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call |->BC_CALLT_Z: + | set_vmstate INTERP // INTERP until a new BASE is setup | mov PC, [BASE-8] | test PCd, FRAME_TYPE | jnz >7 @@ -4219,6 +4256,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | shl RAd, 3 } |1: + | set_vmstate INTERP // INTERP until the old BASE & KBASE is restored | mov PC, [BASE-8] | mov MULTRES, RDd // Save nresults+1. | test PCd, FRAME_TYPE // Check frame type marker. @@ -4260,6 +4298,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cleartp LFUNC:KBASE | mov KBASE, LFUNC:KBASE->pc | mov KBASE, [KBASE+PC2PROTO(k)] + | set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored | ins_next | |6: // Fill up results with nil. @@ -4551,6 +4590,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 | mov KBASE, [PC-4+PC2PROTO(k)] | mov L:RB, SAVE_L + | set_vmstate LFUNC // LFUNC after KBASE restoration | lea RA, [BASE+RA*8] // Top of frame. | cmp RA, L:RB->maxstack | ja ->vm_growstack_f @@ -4588,6 +4628,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov [RD-8], RB // Store delta + FRAME_VARG. | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC. | mov L:RB, SAVE_L + | set_vmstate LFUNC // LFUNC after KBASE restoration | lea RA, [RD+RA*8] | cmp RA, L:RB->maxstack | ja ->vm_growstack_v // Need to grow stack. @@ -4643,7 +4684,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov CARG1, L:RB // Caveat: CARG1 may be RA. } | ja ->vm_growstack_c // Need to grow stack. - | set_vmstate C + | set_vmstate CFUNC // CFUNC before entering C function if (op == BC_FUNCC) { | call KBASE // (lua_State *L) } else { @@ -4653,7 +4694,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // nresults returned in eax (RD). | mov BASE, L:RB->base | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return | lea RA, [BASE+RD*8] | neg RA | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index d76fbe3..939c43f 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -140,7 +140,7 @@ | |.else | -|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). +|.define CFRAME_SPACE, dword*11 // Delta for esp (see <--). |.macro saveregs_ | push edi; push esi; push ebx | sub esp, CFRAME_SPACE @@ -183,25 +183,30 @@ |.define ARG1, aword [esp] //<-- esp while in interpreter. |//----- 16 byte aligned, ^^^ arguments for C callee |.else -|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. -|.define SAVE_NRES, aword [esp+aword*14] -|.define SAVE_CFRAME, aword [esp+aword*13] -|.define SAVE_L, aword [esp+aword*12] +|.define SAVE_ERRF, dword [esp+dword*19] // vm_pcall/vm_cpcall only. +|.define SAVE_NRES, dword [esp+dword*18] +|.define SAVE_CFRAME, dword [esp+dword*17] +|.define SAVE_L, dword [esp+dword*16] |//----- 16 byte aligned, ^^^ arguments from C caller -|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. -|.define SAVE_R4, aword [esp+aword*10] -|.define SAVE_R3, aword [esp+aword*9] -|.define SAVE_R2, aword [esp+aword*8] +|.define SAVE_RET, dword [esp+dword*15] //<-- esp entering interpreter. +|.define SAVE_R4, dword [esp+dword*14] +|.define SAVE_R3, dword [esp+dword*13] +|.define SAVE_R2, dword [esp+dword*12] |//----- 16 byte aligned -|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. -|.define SAVE_PC, aword [esp+aword*6] -|.define TMP2, aword [esp+aword*5] -|.define TMP1, aword [esp+aword*4] +|.define SAVE_UNUSED3, dword [esp+dword*11] +|.define SAVE_UNUSED2, dword [esp+dword*10] +|.define SAVE_UNUSED1, dword [esp+dword*9] +|.define SAVE_VMSTATE, dword [esp+dword*8] |//----- 16 byte aligned -|.define ARG4, aword [esp+aword*3] -|.define ARG3, aword [esp+aword*2] -|.define ARG2, aword [esp+aword*1] -|.define ARG1, aword [esp] //<-- esp while in interpreter. +|.define SAVE_R1, dword [esp+dword*7] //<-- esp after register saves. +|.define SAVE_PC, dword [esp+dword*6] +|.define TMP2, dword [esp+dword*5] +|.define TMP1, dword [esp+dword*4] +|//----- 16 byte aligned +|.define ARG4, dword [esp+dword*3] +|.define ARG3, dword [esp+dword*2] +|.define ARG2, dword [esp+dword*1] +|.define ARG1, dword [esp] //<-- esp while in interpreter. |//----- 16 byte aligned, ^^^ arguments for C callee |.endif | @@ -269,7 +274,7 @@ |//----------------------------------------------------------------------- |.else // x64/POSIX stack layout | -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). +|.define CFRAME_SPACE, qword*7 // Delta for rsp (see <--). |.macro saveregs_ | push rbx; push r15; push r14 |.if NO_UNWIND @@ -290,33 +295,35 @@ | |//----- 16 byte aligned, |.if NO_UNWIND -|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*10] -|.define SAVE_R3, aword [rsp+aword*9] -|.define SAVE_R2, aword [rsp+aword*8] -|.define SAVE_R1, aword [rsp+aword*7] -|.define SAVE_RU2, aword [rsp+aword*6] -|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. +|.define SAVE_RET, qword [rsp+qword*13] //<-- rsp entering interpreter. +|.define SAVE_R4, qword [rsp+qword*12] +|.define SAVE_R3, qword [rsp+qword*11] +|.define SAVE_R2, qword [rsp+qword*10] +|.define SAVE_R1, qword [rsp+qword*9] +|.define SAVE_RU2, qword [rsp+qword*8] +|.define SAVE_RU1, qword [rsp+qword*7] //<-- rsp after register saves. |.else -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*8] -|.define SAVE_R3, aword [rsp+aword*7] -|.define SAVE_R2, aword [rsp+aword*6] -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. +|.define SAVE_RET, qword [rsp+qword*11] //<-- rsp entering interpreter. +|.define SAVE_R4, qword [rsp+qword*10] +|.define SAVE_R3, qword [rsp+qword*9] +|.define SAVE_R2, qword [rsp+qword*8] +|.define SAVE_R1, qword [rsp+qword*7] //<-- rsp after register saves. |.endif -|.define SAVE_CFRAME, aword [rsp+aword*4] +|.define SAVE_CFRAME, qword [rsp+qword*6] +|.define SAVE_UNUSED1, qword [rsp+qword*5] +|.define SAVE_VMSTATE, dword [rsp+dword*8] |.define SAVE_PC, dword [rsp+dword*7] |.define SAVE_L, dword [rsp+dword*6] |.define SAVE_ERRF, dword [rsp+dword*5] |.define SAVE_NRES, dword [rsp+dword*4] -|.define TMPa, aword [rsp+aword*1] +|.define TMPa, qword [rsp+qword*1] |.define TMP2, dword [rsp+dword*1] |.define TMP1, dword [rsp] //<-- rsp while in interpreter. |//----- 16 byte aligned | |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). |.define TMPQ, qword [rsp] -|.define TMP3, dword [rsp+aword*1] +|.define TMP3, dword [rsp+qword*1] |.define MULTRES, TMP2 | |.endif @@ -433,6 +440,20 @@ | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st |.endmacro | +|.if not WIN +|// Save vmstate through register. +|.macro save_vmstate_through, reg +| mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)] +| mov SAVE_VMSTATE, reg +|.endmacro +| +|// Restore vmstate through register. +|.macro restore_vmstate_through, reg +| mov reg, SAVE_VMSTATE +| mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg +|.endmacro +|.endif // WIN +| |// x87 compares. |.macro fcomparepp // Compare and pop st0 >< st1. | fucomip st1 @@ -520,7 +541,7 @@ static void build_subroutines(BuildCtx *ctx) | jnz ->vm_returnp | | // Return to C. - | set_vmstate C + | set_vmstate CFUNC | and PC, -8 | sub PC, BASE | neg PC // Previous base = BASE - delta. @@ -559,6 +580,10 @@ static void build_subroutines(BuildCtx *ctx) | xor eax, eax // Ok return status for vm_pcall. | |->vm_leave_unw: + |.if not WIN + | // DISPATCH required to set properly. + | restore_vmstate_through RA + |.endif | restoreregs | ret | @@ -613,7 +638,9 @@ static void build_subroutines(BuildCtx *ctx) | mov L:DISPATCH, SAVE_L | mov GL:RB, L:DISPATCH->glref | mov dword GL:RB->cur_L, L:DISPATCH - | mov dword GL:RB->vmstate, ~LJ_VMST_C + | mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC + | mov DISPATCH, L:DISPATCH->glref // Setup pointer to dispatch table. + | add DISPATCH, GG_G2DISP | jmp ->vm_leave_unw | |->vm_unwind_rethrow: @@ -647,7 +674,7 @@ static void build_subroutines(BuildCtx *ctx) | mov PC, [BASE-4] // Fetch PC of previous frame. | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message. | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP + | set_vmstate INTERP // INTERP until jump to BC_RET* or return to C | jmp ->vm_returnc // Increments RD/MULTRES and returns. | |.if WIN and not X64 @@ -714,10 +741,13 @@ static void build_subroutines(BuildCtx *ctx) | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! |.endif | mov PC, FRAME_CP - | xor RD, RD | lea KBASEa, [esp+CFRAME_RESUME] | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | add DISPATCH, GG_G2DISP + |.if not WIN + | save_vmstate_through RD + |.endif + | xor RD, RD | mov SAVE_PC, RD // Any value outside of bytecode is ok. | mov SAVE_CFRAME, RDa |.if X64 @@ -730,7 +760,7 @@ static void build_subroutines(BuildCtx *ctx) | | // Resume after yield (like a return). | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return | mov byte L:RB->status, RDL | mov BASE, L:RB->base | mov RD, L:RB->top @@ -774,6 +804,9 @@ static void build_subroutines(BuildCtx *ctx) | mov SAVE_CFRAME, KBASEa | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | add DISPATCH, GG_G2DISP + |.if not WIN + | save_vmstate_through RD + |.endif |.if X64 | mov L:RB->cframe, rsp |.else @@ -782,7 +815,7 @@ static void build_subroutines(BuildCtx *ctx) | |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP + | set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC* | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). | add PC, RA | sub PC, BASE // PC = frame delta + frame type @@ -823,6 +856,9 @@ static void build_subroutines(BuildCtx *ctx) | mov SAVE_ERRF, 0 // No error function. | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. | add DISPATCH, GG_G2DISP + |.if not WIN + | save_vmstate_through KBASE + |.endif | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | |.if X64 @@ -885,6 +921,7 @@ static void build_subroutines(BuildCtx *ctx) | mov KBASE, LFUNC:KBASE->pc | mov KBASE, [KBASE+PC2PROTO(k)] | // BASE = base, RC = result, RB = meta base + | set_vmstate LFUNC // LFUNC after KBASE restoration | jmp RAa // Jump to continuation. | |.if FFI @@ -1409,15 +1446,16 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc, name |->ff_ .. name: + | set_vmstate FFUNC |.endmacro | |.macro .ffunc_1, name - |->ff_ .. name: + | .ffunc name | cmp NARGS:RD, 1+1; jb ->fff_fallback |.endmacro | |.macro .ffunc_2, name - |->ff_ .. name: + | .ffunc name | cmp NARGS:RD, 2+1; jb ->fff_fallback |.endmacro | @@ -1924,7 +1962,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | mov BASE, L:RB->base | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return | | cmp eax, LUA_YIELD | ja >8 @@ -2089,6 +2127,7 @@ static void build_subroutines(BuildCtx *ctx) | movzx RA, PC_RA | not RAa // Note: ~RA = -(RA+1) | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 + | set_vmstate LFUNC // LFUNC state after BASE restoration | ins_next | |6: // Fill up results with nil. @@ -2933,7 +2972,7 @@ static void build_subroutines(BuildCtx *ctx) | mov KBASE, [KBASE+PC2PROTO(k)] | mov L:RB->base, BASE | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 - | set_vmstate INTERP + | set_vmstate LFUNC // LFUNC after BASE & KBASE restoration | // Modified copy of ins_next which handles function header dispatch, too. | mov RC, [PC] | movzx RA, RCH @@ -3203,8 +3242,8 @@ static void build_subroutines(BuildCtx *ctx) | mov FCARG1, CTSTATE | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf) | // lua_State * returned in eax (RD). - | set_vmstate INTERP | mov BASE, L:RD->base + | set_vmstate LFUNC // LFUNC after BASE restoration | mov RD, L:RD->top | sub RD, BASE | mov LFUNC:RB, [BASE-8] @@ -4683,6 +4722,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_CALL: case BC_CALLM: | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs + | set_vmstate INTERP // INTERP until a new BASE is setup if (op == BC_CALLM) { | add NARGS:RD, MULTRES } @@ -4706,6 +4746,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmp dword [RA-4], LJ_TFUNC | jne ->vmeta_call |->BC_CALLT_Z: + | set_vmstate INTERP // INTERP until a new BASE is setup | mov PC, [BASE-4] | test PC, FRAME_TYPE | jnz >7 @@ -4989,6 +5030,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | shl RA, 3 } |1: + | set_vmstate INTERP // INTERP until the old BASE & KBASE is restored | mov PC, [BASE-4] | mov MULTRES, RD // Save nresults+1. | test PC, FRAME_TYPE // Check frame type marker. @@ -5043,6 +5085,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov LFUNC:KBASE, [BASE-8] | mov KBASE, LFUNC:KBASE->pc | mov KBASE, [KBASE+PC2PROTO(k)] + | set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored | ins_next | |6: // Fill up results with nil. @@ -5330,6 +5373,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 | mov KBASE, [PC-4+PC2PROTO(k)] | mov L:RB, SAVE_L + | set_vmstate LFUNC // LFUNC after KBASE restoration | lea RA, [BASE+RA*8] // Top of frame. | cmp RA, L:RB->maxstack | ja ->vm_growstack_f @@ -5367,6 +5411,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov [RD-4], RB // Store delta + FRAME_VARG. | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC. | mov L:RB, SAVE_L + | set_vmstate LFUNC // LFUNC after KBASE restoration | lea RA, [RD+RA*8] | cmp RA, L:RB->maxstack | ja ->vm_growstack_v // Need to grow stack. @@ -5431,7 +5476,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif } | ja ->vm_growstack_c // Need to grow stack. - | set_vmstate C + | set_vmstate CFUNC // CFUNC before entering C function if (op == BC_FUNCC) { | call KBASEa // (lua_State *L) } else { @@ -5441,7 +5486,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // nresults returned in eax (RD). | mov BASE, L:RB->base | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return | lea RA, [BASE+RD*8] | neg RA | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 -- 2.28.0