From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp55.i.mail.ru (smtp55.i.mail.ru [217.69.128.35]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id D66614765E0 for ; Sat, 26 Dec 2020 22:07:22 +0300 (MSK) Content-Type: text/plain; charset=us-ascii Mime-Version: 1.0 (Mac OS X Mail 13.4 \(3608.120.23.2.4\)) From: Sergey Ostanevich In-Reply-To: Date: Sat, 26 Dec 2020 22:07:19 +0300 Content-Transfer-Encoding: quoted-printable Message-Id: References: Subject: Re: [Tarantool-patches] [PATCH luajit v2 3/7] vm: introduce VM states for Lua and fast functions List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Sergey Kaplun Cc: tarantool-patches@dev.tarantool.org Hi! LGTM. Sergos > On 25 Dec 2020, at 18:26, Sergey Kaplun wrote: >=20 > This patch introduces LJ_VMST_LFUNC and LJ_VMST_FFUNC VM states > separated from LJ_VMST_INERP. New VM states allow to determine the > context of Lua VM execution for x86 and x64 arches. Also, LJ_VMST_C is > renamed to LJ_VMST_CFUNC for naming consistence with new VM states. >=20 > Also, this patch adjusts stack layout for x86 and x64 arches to save = VM > state for its consistency while stack unwinding when error is raised. >=20 > Part of tarantool/tarantool#5442 > --- >=20 > Changes in v2: > - Moved `.if not WIN` macro check inside = (save|restore)_vmstate_through > - Fixed naming: SAVE_UNUSED\d -> UNUSED\d >=20 > src/lj_frame.h | 18 +++---- > src/lj_obj.h | 4 +- > src/lj_profile.c | 5 +- > src/luajit-gdb.py | 14 ++--- > src/vm_arm.dasc | 6 +-- > src/vm_arm64.dasc | 6 +-- > src/vm_mips.dasc | 6 +-- > src/vm_mips64.dasc | 6 +-- > src/vm_ppc.dasc | 6 +-- > src/vm_x64.dasc | 93 ++++++++++++++++++++++---------- > src/vm_x86.dasc | 131 +++++++++++++++++++++++++++++---------------- > 11 files changed, 188 insertions(+), 107 deletions(-) >=20 > diff --git a/src/lj_frame.h b/src/lj_frame.h > index 19c49a4..2e693f9 100644 > --- a/src/lj_frame.h > +++ b/src/lj_frame.h > @@ -127,13 +127,13 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; = /* Special continuations. */ > #define CFRAME_SIZE (16*4) > #define CFRAME_SHIFT_MULTRES 0 > #else > -#define CFRAME_OFS_ERRF (15*4) > -#define CFRAME_OFS_NRES (14*4) > -#define CFRAME_OFS_PREV (13*4) > -#define CFRAME_OFS_L (12*4) > +#define CFRAME_OFS_ERRF (19*4) > +#define CFRAME_OFS_NRES (18*4) > +#define CFRAME_OFS_PREV (17*4) > +#define CFRAME_OFS_L (16*4) > #define CFRAME_OFS_PC (6*4) > #define CFRAME_OFS_MULTRES (5*4) > -#define CFRAME_SIZE (12*4) > +#define CFRAME_SIZE (16*4) > #define CFRAME_SHIFT_MULTRES 0 > #endif > #elif LJ_TARGET_X64 > @@ -152,11 +152,11 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; = /* Special continuations. */ > #define CFRAME_OFS_NRES (22*4) > #define CFRAME_OFS_MULTRES (21*4) > #endif > -#define CFRAME_SIZE (10*8) > +#define CFRAME_SIZE (12*8) > #define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) > #define CFRAME_SHIFT_MULTRES 0 > #else > -#define CFRAME_OFS_PREV (4*8) > +#define CFRAME_OFS_PREV (6*8) > #if LJ_GC64 > #define CFRAME_OFS_PC (3*8) > #define CFRAME_OFS_L (2*8) > @@ -171,9 +171,9 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; = /* Special continuations. */ > #define CFRAME_OFS_MULTRES (1*4) > #endif > #if LJ_NO_UNWIND > -#define CFRAME_SIZE (12*8) > +#define CFRAME_SIZE (14*8) > #else > -#define CFRAME_SIZE (10*8) > +#define CFRAME_SIZE (12*8) > #endif > #define CFRAME_SIZE_JIT (CFRAME_SIZE + 16) > #define CFRAME_SHIFT_MULTRES 0 > diff --git a/src/lj_obj.h b/src/lj_obj.h > index 927b347..7fb715e 100644 > --- a/src/lj_obj.h > +++ b/src/lj_obj.h > @@ -512,7 +512,9 @@ typedef struct GCtab { > /* VM states. */ > enum { > LJ_VMST_INTERP, /* Interpreter. */ > - LJ_VMST_C, /* C function. */ > + LJ_VMST_LFUNC, /* Lua function. */ > + LJ_VMST_FFUNC, /* Fast function. */ > + LJ_VMST_CFUNC, /* C function. */ > LJ_VMST_GC, /* Garbage collector. */ > LJ_VMST_EXIT, /* Trace exit handler. */ > LJ_VMST_RECORD, /* Trace recorder. */ > diff --git a/src/lj_profile.c b/src/lj_profile.c > index 116998e..637e03c 100644 > --- a/src/lj_profile.c > +++ b/src/lj_profile.c > @@ -157,7 +157,10 @@ static void profile_trigger(ProfileState *ps) > int st =3D g->vmstate; > ps->vmstate =3D st >=3D 0 ? 'N' : > st =3D=3D ~LJ_VMST_INTERP ? 'I' : > - st =3D=3D ~LJ_VMST_C ? 'C' : > + st =3D=3D ~LJ_VMST_CFUNC ? 'C' : > + /* Stubs for profiler hooks. */ > + st =3D=3D ~LJ_VMST_FFUNC ? 'I' : > + st =3D=3D ~LJ_VMST_LFUNC ? 'I' : > st =3D=3D ~LJ_VMST_GC ? 'G' : 'J'; > g->hookmask =3D (mask | HOOK_PROFILE); > lj_dispatch_update(g); > diff --git a/src/luajit-gdb.py b/src/luajit-gdb.py > index 652c560..f1fd623 100644 > --- a/src/luajit-gdb.py > +++ b/src/luajit-gdb.py > @@ -206,12 +206,14 @@ def J(g): > def vm_state(g): > return { > i2notu32(0): 'INTERP', > - i2notu32(1): 'C', > - i2notu32(2): 'GC', > - i2notu32(3): 'EXIT', > - i2notu32(4): 'RECORD', > - i2notu32(5): 'OPT', > - i2notu32(6): 'ASM', > + i2notu32(1): 'LFUNC', > + i2notu32(2): 'FFUNC', > + i2notu32(3): 'CFUNC', > + i2notu32(4): 'GC', > + i2notu32(5): 'EXIT', > + i2notu32(6): 'RECORD', > + i2notu32(7): 'OPT', > + i2notu32(8): 'ASM', > }.get(int(tou32(g['vmstate'])), 'TRACE') >=20 > def gc_state(g): > diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc > index d4cdaf5..ae2efdf 100644 > --- a/src/vm_arm.dasc > +++ b/src/vm_arm.dasc > @@ -287,7 +287,7 @@ static void build_subroutines(BuildCtx *ctx) > | > | str RB, L->base > | ldr KBASE, SAVE_NRES > - | mv_vmstate CARG4, C > + | mv_vmstate CARG4, CFUNC > | sub BASE, BASE, #8 > | subs CARG3, RC, #8 > | lsl KBASE, KBASE, #3 // KBASE =3D = (nresults_wanted+1)*8 > @@ -348,7 +348,7 @@ static void build_subroutines(BuildCtx *ctx) > | mov CRET1, CARG2 > |->vm_unwind_c_eh: // Landing pad for external = unwinder. > | ldr L, SAVE_L > - | mv_vmstate CARG4, C > + | mv_vmstate CARG4, CFUNC > | ldr GL:CARG3, L->glref > | str CARG4, GL:CARG3->vmstate > | str L, GL:CARG3->cur_L > @@ -4487,7 +4487,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > if (op =3D=3D BC_FUNCCW) { > | ldr CARG2, CFUNC:CARG3->f > } > - | mv_vmstate CARG3, C > + | mv_vmstate CARG3, CFUNC > | mov CARG1, L > | bhi ->vm_growstack_c // Need to grow stack. > | st_vmstate CARG3 > diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc > index 3eaf376..f783428 100644 > --- a/src/vm_arm64.dasc > +++ b/src/vm_arm64.dasc > @@ -332,7 +332,7 @@ static void build_subroutines(BuildCtx *ctx) > | > | str RB, L->base > | ldrsw CARG2, SAVE_NRES // CARG2 =3D nresults+1. > - | mv_vmstate TMP0w, C > + | mv_vmstate TMP0w, CFUNC > | sub BASE, BASE, #16 > | subs TMP2, RC, #8 > | st_vmstate TMP0w > @@ -391,7 +391,7 @@ static void build_subroutines(BuildCtx *ctx) > | mov CRET1, CARG2 > |->vm_unwind_c_eh: // Landing pad for external = unwinder. > | ldr L, SAVE_L > - | mv_vmstate TMP0w, C > + | mv_vmstate TMP0w, CFUNC > | ldr GL, L->glref > | st_vmstate TMP0w > | b ->vm_leave_unw > @@ -3816,7 +3816,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > if (op =3D=3D BC_FUNCCW) { > | ldr CARG2, CFUNC:CARG3->f > } > - | mv_vmstate TMP0w, C > + | mv_vmstate TMP0w, CFUNC > | mov CARG1, L > | bhi ->vm_growstack_c // Need to grow stack. > | st_vmstate TMP0w > diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc > index 1afd611..ec57d78 100644 > --- a/src/vm_mips.dasc > +++ b/src/vm_mips.dasc > @@ -403,7 +403,7 @@ static void build_subroutines(BuildCtx *ctx) > | > | addiu TMP1, RD, -8 > | sw TMP2, L->base > - | li_vmstate C > + | li_vmstate CFUNC > | lw TMP2, SAVE_NRES > | addiu BASE, BASE, -8 > | st_vmstate > @@ -473,7 +473,7 @@ static void build_subroutines(BuildCtx *ctx) > | move CRET1, CARG2 > |->vm_unwind_c_eh: // Landing pad for external = unwinder. > | lw L, SAVE_L > - | li TMP0, ~LJ_VMST_C > + | li TMP0, ~LJ_VMST_CFUNC > | lw GL:TMP1, L->glref > | b ->vm_leave_unw > |. sw TMP0, GL:TMP1->vmstate > @@ -5085,7 +5085,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | sw BASE, L->base > | sltu AT, TMP2, TMP1 > | sw RC, L->top > - | li_vmstate C > + | li_vmstate CFUNC > if (op =3D=3D BC_FUNCCW) { > | lw CARG2, CFUNC:RB->f > } > diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc > index c06270a..9a749f9 100644 > --- a/src/vm_mips64.dasc > +++ b/src/vm_mips64.dasc > @@ -449,7 +449,7 @@ static void build_subroutines(BuildCtx *ctx) > | > | addiu TMP1, RD, -8 > | sd TMP2, L->base > - | li_vmstate C > + | li_vmstate CFUNC > | lw TMP2, SAVE_NRES > | daddiu BASE, BASE, -16 > | st_vmstate > @@ -517,7 +517,7 @@ static void build_subroutines(BuildCtx *ctx) > | move CRET1, CARG2 > |->vm_unwind_c_eh: // Landing pad for external = unwinder. > | ld L, SAVE_L > - | li TMP0, ~LJ_VMST_C > + | li TMP0, ~LJ_VMST_CFUNC > | ld GL:TMP1, L->glref > | b ->vm_leave_unw > |. sw TMP0, GL:TMP1->vmstate > @@ -4952,7 +4952,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | sd BASE, L->base > | sltu AT, TMP2, TMP1 > | sd RC, L->top > - | li_vmstate C > + | li_vmstate CFUNC > if (op =3D=3D BC_FUNCCW) { > | ld CARG2, CFUNC:RB->f > } > diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc > index b4260eb..62e9b68 100644 > --- a/src/vm_ppc.dasc > +++ b/src/vm_ppc.dasc > @@ -520,7 +520,7 @@ static void build_subroutines(BuildCtx *ctx) > | // TMP0 =3D PC & FRAME_TYPE > | cmpwi TMP0, FRAME_C > | rlwinm TMP2, PC, 0, 0, 28 > - | li_vmstate C > + | li_vmstate CFUNC > | sub TMP2, BASE, TMP2 // TMP2 =3D previous base. > | bney ->vm_returnp > | > @@ -596,7 +596,7 @@ static void build_subroutines(BuildCtx *ctx) > |->vm_unwind_c_eh: // Landing pad for external = unwinder. > | lwz L, SAVE_L > | .toc ld TOCREG, SAVE_TOC > - | li TMP0, ~LJ_VMST_C > + | li TMP0, ~LJ_VMST_CFUNC > | lwz GL:TMP1, L->glref > | stw TMP0, GL:TMP1->vmstate > | b ->vm_leave_unw > @@ -5060,7 +5060,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | stp BASE, L->base > | cmplw TMP1, TMP2 > | stp RC, L->top > - | li_vmstate C > + | li_vmstate CFUNC > |.if TOC > | mtctr TMP3 > |.else > diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc > index 80753e0..83cc3e1 100644 > --- a/src/vm_x64.dasc > +++ b/src/vm_x64.dasc > @@ -140,7 +140,7 @@ > = |//-----------------------------------------------------------------------= > |.else // x64/POSIX stack layout > | > -|.define CFRAME_SPACE, aword*5 // Delta for rsp = (see <--). > +|.define CFRAME_SPACE, qword*7 // Delta for rsp = (see <--). > |.macro saveregs_ > | push rbx; push r15; push r14 > |.if NO_UNWIND > @@ -161,26 +161,29 @@ > | > |//----- 16 byte aligned, > |.if NO_UNWIND > -|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering = interpreter. > -|.define SAVE_R4, aword [rsp+aword*10] > -|.define SAVE_R3, aword [rsp+aword*9] > -|.define SAVE_R2, aword [rsp+aword*8] > -|.define SAVE_R1, aword [rsp+aword*7] > -|.define SAVE_RU2, aword [rsp+aword*6] > -|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register = saves. > +|.define SAVE_RET, qword [rsp+qword*13] //<-- rsp entering = interpreter. > +|.define SAVE_R4, qword [rsp+qword*12] > +|.define SAVE_R3, qword [rsp+qword*11] > +|.define SAVE_R2, qword [rsp+qword*10] > +|.define SAVE_R1, qword [rsp+qword*9] > +|.define SAVE_RU2, qword [rsp+qword*8] > +|.define SAVE_RU1, qword [rsp+qword*7] //<-- rsp after register = saves. > |.else > -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering = interpreter. > -|.define SAVE_R4, aword [rsp+aword*8] > -|.define SAVE_R3, aword [rsp+aword*7] > -|.define SAVE_R2, aword [rsp+aword*6] > -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register = saves. > +|.define SAVE_RET, qword [rsp+qword*11] //<-- rsp entering = interpreter. > +|.define SAVE_R4, qword [rsp+qword*10] > +|.define SAVE_R3, qword [rsp+qword*9] > +|.define SAVE_R2, qword [rsp+qword*8] > +|.define SAVE_R1, qword [rsp+qword*7] //<-- rsp after register = saves. > |.endif > -|.define SAVE_CFRAME, aword [rsp+aword*4] > -|.define SAVE_PC, aword [rsp+aword*3] > -|.define SAVE_L, aword [rsp+aword*2] > +|.define SAVE_CFRAME, qword [rsp+qword*6] > +|.define UNUSED2, qword [rsp+qword*5] > +|.define UNUSED1, dword [rsp+dword*8] > +|.define SAVE_VMSTATE, dword [rsp+dword*8] > +|.define SAVE_PC, qword [rsp+qword*3] > +|.define SAVE_L, qword [rsp+qword*2] > |.define SAVE_ERRF, dword [rsp+dword*3] > |.define SAVE_NRES, dword [rsp+dword*2] > -|.define TMP1, aword [rsp] //<-- rsp while = in interpreter. > +|.define TMP1, qword [rsp] //<-- rsp while = in interpreter. > |//----- 16 byte aligned > | > |.define TMP1d, dword [rsp] > @@ -342,6 +345,22 @@ > | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st > |.endmacro > | > +|// Save vmstate through register. > +|.macro save_vmstate_through, reg > +|.if not WIN > +| mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)] > +| mov SAVE_VMSTATE, reg > +|.endif // WIN > +|.endmacro > +| > +|// Restore vmstate through register. > +|.macro restore_vmstate_through, reg > +|.if not WIN > +| mov reg, SAVE_VMSTATE > +| mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg > +|.endif // WIN > +|.endmacro > +| > |.macro fpop1; fstp st1; .endmacro > | > |// Synthesize SSE FP constants. > @@ -416,7 +435,7 @@ static void build_subroutines(BuildCtx *ctx) > | jnz ->vm_returnp > | > | // Return to C. > - | set_vmstate C > + | set_vmstate CFUNC > | and PC, -8 > | sub PC, BASE > | neg PC // Previous base =3D BASE - = delta. > @@ -448,6 +467,8 @@ static void build_subroutines(BuildCtx *ctx) > | xor eax, eax // Ok return status for = vm_pcall. > | > |->vm_leave_unw: > + | // DISPATCH required to set properly. > + | restore_vmstate_through RAd > | restoreregs > | ret > | > @@ -493,7 +514,9 @@ static void build_subroutines(BuildCtx *ctx) > | mov L:DISPATCH, SAVE_L > | mov GL:RB, L:DISPATCH->glref > | mov GL:RB->cur_L, L:DISPATCH > - | mov dword GL:RB->vmstate, ~LJ_VMST_C > + | mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC > + | mov DISPATCH, L:DISPATCH->glref // Setup pointer to dispatch = table. > + | add DISPATCH, GG_G2DISP > | jmp ->vm_leave_unw > | > |->vm_unwind_rethrow: > @@ -521,7 +544,7 @@ static void build_subroutines(BuildCtx *ctx) > | mov [BASE-16], RA // Prepend false to = error message. > | mov [BASE-8], RB > | mov RA, -16 // Results start at BASE+RA =3D = BASE-16. > - | set_vmstate INTERP > + | set_vmstate INTERP // INTERP until jump to BC_RET* or return to = C > | jmp ->vm_returnc // Increments RD/MULTRES and = returns. > | > = |//-----------------------------------------------------------------------= > @@ -575,6 +598,7 @@ static void build_subroutines(BuildCtx *ctx) > | lea KBASE, [esp+CFRAME_RESUME] > | mov DISPATCH, L:RB->glref // Setup pointer to = dispatch table. > | add DISPATCH, GG_G2DISP > + | save_vmstate_through TMPRd > | mov SAVE_PC, RD // Any value outside of bytecode = is ok. > | mov SAVE_CFRAME, RD > | mov SAVE_NRES, RDd > @@ -585,7 +609,7 @@ static void build_subroutines(BuildCtx *ctx) > | > | // Resume after yield (like a return). > | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB > - | set_vmstate INTERP > + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return > | mov byte L:RB->status, RDL > | mov BASE, L:RB->base > | mov RD, L:RB->top > @@ -622,11 +646,12 @@ static void build_subroutines(BuildCtx *ctx) > | mov SAVE_CFRAME, KBASE > | mov SAVE_PC, L:RB // Any value outside of = bytecode is ok. > | add DISPATCH, GG_G2DISP > + | save_vmstate_through RDd > | mov L:RB->cframe, rsp > | > |2: // Entry point for vm_resume/vm_cpcall (RA =3D base, RB =3D L, = PC =3D ftype). > | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB > - | set_vmstate INTERP > + | set_vmstate INTERP // vm_resume: INTERP until executing = BC_IFUNC* > | mov BASE, L:RB->base // BASE =3D old base (used in = vmeta_call). > | add PC, RA > | sub PC, BASE // PC =3D frame delta + frame = type > @@ -658,6 +683,7 @@ static void build_subroutines(BuildCtx *ctx) > | mov SAVE_ERRF, 0 // No error function. > | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o = frame. > | add DISPATCH, GG_G2DISP > + | save_vmstate_through KBASEd > | // Handler may change cframe_nres(L->cframe) or = cframe_errfunc(L->cframe). > | > | mov KBASE, L:RB->cframe // Add our C frame to cframe = chain. > @@ -697,6 +723,7 @@ static void build_subroutines(BuildCtx *ctx) > | cleartp LFUNC:KBASE > | mov KBASE, LFUNC:KBASE->pc > | mov KBASE, [KBASE+PC2PROTO(k)] > + | set_vmstate LFUNC // LFUNC after KBASE = restoration > | // BASE =3D base, RC =3D result, RB =3D meta base > | jmp RA // Jump to continuation. > | > @@ -1137,15 +1164,16 @@ static void build_subroutines(BuildCtx *ctx) > | > |.macro .ffunc, name > |->ff_ .. name: > + | set_vmstate FFUNC > |.endmacro > | > |.macro .ffunc_1, name > - |->ff_ .. name: > + | .ffunc name > | cmp NARGS:RDd, 1+1; jb ->fff_fallback > |.endmacro > | > |.macro .ffunc_2, name > - |->ff_ .. name: > + | .ffunc name > | cmp NARGS:RDd, 2+1; jb ->fff_fallback > |.endmacro > | > @@ -1578,7 +1606,7 @@ static void build_subroutines(BuildCtx *ctx) > | mov L:PC, TMP1 > | mov BASE, L:RB->base > | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB > - | set_vmstate INTERP > + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return > | > | cmp eax, LUA_YIELD > | ja >8 > @@ -1717,6 +1745,7 @@ static void build_subroutines(BuildCtx *ctx) > | movzx RAd, PC_RA > | neg RA > | lea BASE, [BASE+RA*8-16] // base =3D base - (RA+2)*8 > + | set_vmstate LFUNC // LFUNC state after = BASE restoration > | ins_next > | > |6: // Fill up results with nil. > @@ -2481,7 +2510,7 @@ static void build_subroutines(BuildCtx *ctx) > | mov KBASE, [KBASE+PC2PROTO(k)] > | mov L:RB->base, BASE > | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 > - | set_vmstate INTERP > + | set_vmstate LFUNC // LFUNC after BASE & = KBASE restoration > | // Modified copy of ins_next which handles function header = dispatch, too. > | mov RCd, [PC] > | movzx RAd, RCH > @@ -2697,8 +2726,8 @@ static void build_subroutines(BuildCtx *ctx) > | mov CARG1, CTSTATE > | call extern lj_ccallback_enter // (CTState *cts, void *cf) > | // lua_State * returned in eax (RD). > - | set_vmstate INTERP > | mov BASE, L:RD->base > + | set_vmstate LFUNC // LFUNC after BASE = restoration > | mov RD, L:RD->top > | sub RD, BASE > | mov LFUNC:RB, [BASE-16] > @@ -3974,6 +4003,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) >=20 > case BC_CALL: case BC_CALLM: > | ins_A_C // RA =3D base, (RB =3D nresults+1,) RC =3D = nargs+1 | extra_nargs > + | set_vmstate INTERP // INTERP until a new BASE is = setup > if (op =3D=3D BC_CALLM) { > | add NARGS:RDd, MULTRES > } > @@ -3995,6 +4025,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | mov LFUNC:RB, [RA-16] > | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call > |->BC_CALLT_Z: > + | set_vmstate INTERP // INTERP until a new BASE is = setup > | mov PC, [BASE-8] > | test PCd, FRAME_TYPE > | jnz >7 > @@ -4219,6 +4250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | shl RAd, 3 > } > |1: > + | set_vmstate INTERP // INTERP until the old BASE & KBASE is = restored > | mov PC, [BASE-8] > | mov MULTRES, RDd // Save nresults+1. > | test PCd, FRAME_TYPE // Check frame type marker. > @@ -4260,6 +4292,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | cleartp LFUNC:KBASE > | mov KBASE, LFUNC:KBASE->pc > | mov KBASE, [KBASE+PC2PROTO(k)] > + | set_vmstate LFUNC // LFUNC after the old BASE & KBASE is = restored > | ins_next > | > |6: // Fill up results with nil. > @@ -4551,6 +4584,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | ins_AD // BASE =3D new base, RA =3D framesize, RD =3D nargs+1 > | mov KBASE, [PC-4+PC2PROTO(k)] > | mov L:RB, SAVE_L > + | set_vmstate LFUNC // LFUNC after KBASE restoration > | lea RA, [BASE+RA*8] // Top of frame. > | cmp RA, L:RB->maxstack > | ja ->vm_growstack_f > @@ -4588,6 +4622,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | mov [RD-8], RB // Store delta + FRAME_VARG. > | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC. > | mov L:RB, SAVE_L > + | set_vmstate LFUNC // LFUNC after KBASE restoration > | lea RA, [RD+RA*8] > | cmp RA, L:RB->maxstack > | ja ->vm_growstack_v // Need to grow stack. > @@ -4643,7 +4678,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | mov CARG1, L:RB // Caveat: CARG1 may be RA. > } > | ja ->vm_growstack_c // Need to grow stack. > - | set_vmstate C > + | set_vmstate CFUNC // CFUNC before entering C = function > if (op =3D=3D BC_FUNCC) { > | call KBASE // (lua_State *L) > } else { > @@ -4653,7 +4688,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | // nresults returned in eax (RD). > | mov BASE, L:RB->base > | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB > - | set_vmstate INTERP > + | set_vmstate INTERP // INTERP until jump to BC_RET* or = vm_return > | lea RA, [BASE+RD*8] > | neg RA > | add RA, L:RB->top // RA =3D = (L->top-(L->base+nresults))*8 > diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc > index d76fbe3..b9dffa9 100644 > --- a/src/vm_x86.dasc > +++ b/src/vm_x86.dasc > @@ -140,7 +140,7 @@ > | > |.else > | > -|.define CFRAME_SPACE, aword*7 // Delta for esp = (see <--). > +|.define CFRAME_SPACE, dword*11 // Delta = for esp (see <--). > |.macro saveregs_ > | push edi; push esi; push ebx > | sub esp, CFRAME_SPACE > @@ -183,25 +183,30 @@ > |.define ARG1, aword [esp] //<-- esp while = in interpreter. > |//----- 16 byte aligned, ^^^ arguments for C callee > |.else > -|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall = only. > -|.define SAVE_NRES, aword [esp+aword*14] > -|.define SAVE_CFRAME, aword [esp+aword*13] > -|.define SAVE_L, aword [esp+aword*12] > +|.define SAVE_ERRF, dword [esp+dword*19] // vm_pcall/vm_cpcall = only. > +|.define SAVE_NRES, dword [esp+dword*18] > +|.define SAVE_CFRAME, dword [esp+dword*17] > +|.define SAVE_L, dword [esp+dword*16] > |//----- 16 byte aligned, ^^^ arguments from C caller > -|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering = interpreter. > -|.define SAVE_R4, aword [esp+aword*10] > -|.define SAVE_R3, aword [esp+aword*9] > -|.define SAVE_R2, aword [esp+aword*8] > +|.define SAVE_RET, dword [esp+dword*15] //<-- esp entering = interpreter. > +|.define SAVE_R4, dword [esp+dword*14] > +|.define SAVE_R3, dword [esp+dword*13] > +|.define SAVE_R2, dword [esp+dword*12] > |//----- 16 byte aligned > -|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register = saves. > -|.define SAVE_PC, aword [esp+aword*6] > -|.define TMP2, aword [esp+aword*5] > -|.define TMP1, aword [esp+aword*4] > +|.define UNUSED3, dword [esp+dword*11] > +|.define UNUSED2, dword [esp+dword*10] > +|.define UNUSED1, dword [esp+dword*9] > +|.define SAVE_VMSTATE, dword [esp+dword*8] > |//----- 16 byte aligned > -|.define ARG4, aword [esp+aword*3] > -|.define ARG3, aword [esp+aword*2] > -|.define ARG2, aword [esp+aword*1] > -|.define ARG1, aword [esp] //<-- esp while = in interpreter. > +|.define SAVE_R1, dword [esp+dword*7] //<-- esp after register = saves. > +|.define SAVE_PC, dword [esp+dword*6] > +|.define TMP2, dword [esp+dword*5] > +|.define TMP1, dword [esp+dword*4] > +|//----- 16 byte aligned > +|.define ARG4, dword [esp+dword*3] > +|.define ARG3, dword [esp+dword*2] > +|.define ARG2, dword [esp+dword*1] > +|.define ARG1, dword [esp] //<-- esp while = in interpreter. > |//----- 16 byte aligned, ^^^ arguments for C callee > |.endif > | > @@ -269,7 +274,7 @@ > = |//-----------------------------------------------------------------------= > |.else // x64/POSIX stack layout > | > -|.define CFRAME_SPACE, aword*5 // Delta for rsp = (see <--). > +|.define CFRAME_SPACE, qword*7 // Delta for rsp = (see <--). > |.macro saveregs_ > | push rbx; push r15; push r14 > |.if NO_UNWIND > @@ -290,33 +295,35 @@ > | > |//----- 16 byte aligned, > |.if NO_UNWIND > -|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering = interpreter. > -|.define SAVE_R4, aword [rsp+aword*10] > -|.define SAVE_R3, aword [rsp+aword*9] > -|.define SAVE_R2, aword [rsp+aword*8] > -|.define SAVE_R1, aword [rsp+aword*7] > -|.define SAVE_RU2, aword [rsp+aword*6] > -|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register = saves. > +|.define SAVE_RET, qword [rsp+qword*13] //<-- rsp entering = interpreter. > +|.define SAVE_R4, qword [rsp+qword*12] > +|.define SAVE_R3, qword [rsp+qword*11] > +|.define SAVE_R2, qword [rsp+qword*10] > +|.define SAVE_R1, qword [rsp+qword*9] > +|.define SAVE_RU2, qword [rsp+qword*8] > +|.define SAVE_RU1, qword [rsp+qword*7] //<-- rsp after register = saves. > |.else > -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering = interpreter. > -|.define SAVE_R4, aword [rsp+aword*8] > -|.define SAVE_R3, aword [rsp+aword*7] > -|.define SAVE_R2, aword [rsp+aword*6] > -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register = saves. > +|.define SAVE_RET, qword [rsp+qword*11] //<-- rsp entering = interpreter. > +|.define SAVE_R4, qword [rsp+qword*10] > +|.define SAVE_R3, qword [rsp+qword*9] > +|.define SAVE_R2, qword [rsp+qword*8] > +|.define SAVE_R1, qword [rsp+qword*7] //<-- rsp after register = saves. > |.endif > -|.define SAVE_CFRAME, aword [rsp+aword*4] > +|.define SAVE_CFRAME, qword [rsp+qword*6] > +|.define UNUSED1, qword [rsp+qword*5] > +|.define SAVE_VMSTATE, dword [rsp+dword*8] > |.define SAVE_PC, dword [rsp+dword*7] > |.define SAVE_L, dword [rsp+dword*6] > |.define SAVE_ERRF, dword [rsp+dword*5] > |.define SAVE_NRES, dword [rsp+dword*4] > -|.define TMPa, aword [rsp+aword*1] > +|.define TMPa, qword [rsp+qword*1] > |.define TMP2, dword [rsp+dword*1] > |.define TMP1, dword [rsp] //<-- rsp while = in interpreter. > |//----- 16 byte aligned > | > |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). > |.define TMPQ, qword [rsp] > -|.define TMP3, dword [rsp+aword*1] > +|.define TMP3, dword [rsp+qword*1] > |.define MULTRES, TMP2 > | > |.endif > @@ -433,6 +440,22 @@ > | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st > |.endmacro > | > +|// Save vmstate through register. > +|.macro save_vmstate_through, reg > +|.if not WIN > +| mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)] > +| mov SAVE_VMSTATE, reg > +|.endif // WIN > +|.endmacro > +| > +|// Restore vmstate through register. > +|.macro restore_vmstate_through, reg > +|.if not WIN > +| mov reg, SAVE_VMSTATE > +| mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg > +|.endif // WIN > +|.endmacro > +| > |// x87 compares. > |.macro fcomparepp // Compare and pop st0 >< st1. > | fucomip st1 > @@ -520,7 +543,7 @@ static void build_subroutines(BuildCtx *ctx) > | jnz ->vm_returnp > | > | // Return to C. > - | set_vmstate C > + | set_vmstate CFUNC > | and PC, -8 > | sub PC, BASE > | neg PC // Previous base =3D BASE - = delta. > @@ -559,6 +582,8 @@ static void build_subroutines(BuildCtx *ctx) > | xor eax, eax // Ok return status for = vm_pcall. > | > |->vm_leave_unw: > + | // DISPATCH required to set properly. > + | restore_vmstate_through RA > | restoreregs > | ret > | > @@ -613,7 +638,9 @@ static void build_subroutines(BuildCtx *ctx) > | mov L:DISPATCH, SAVE_L > | mov GL:RB, L:DISPATCH->glref > | mov dword GL:RB->cur_L, L:DISPATCH > - | mov dword GL:RB->vmstate, ~LJ_VMST_C > + | mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC > + | mov DISPATCH, L:DISPATCH->glref // Setup pointer to dispatch = table. > + | add DISPATCH, GG_G2DISP > | jmp ->vm_leave_unw > | > |->vm_unwind_rethrow: > @@ -647,7 +674,7 @@ static void build_subroutines(BuildCtx *ctx) > | mov PC, [BASE-4] // Fetch PC of previous frame. > | mov dword [BASE-4], LJ_TFALSE // Prepend false to error = message. > | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB > - | set_vmstate INTERP > + | set_vmstate INTERP // INTERP until jump to BC_RET* or return to = C > | jmp ->vm_returnc // Increments RD/MULTRES and = returns. > | > |.if WIN and not X64 > @@ -714,10 +741,11 @@ static void build_subroutines(BuildCtx *ctx) > | mov RA, INARG_BASE // Caveat: overlaps = SAVE_CFRAME! > |.endif > | mov PC, FRAME_CP > - | xor RD, RD > | lea KBASEa, [esp+CFRAME_RESUME] > | mov DISPATCH, L:RB->glref // Setup pointer to = dispatch table. > | add DISPATCH, GG_G2DISP > + | save_vmstate_through RD > + | xor RD, RD > | mov SAVE_PC, RD // Any value outside of bytecode = is ok. > | mov SAVE_CFRAME, RDa > |.if X64 > @@ -730,7 +758,7 @@ static void build_subroutines(BuildCtx *ctx) > | > | // Resume after yield (like a return). > | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB > - | set_vmstate INTERP > + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return > | mov byte L:RB->status, RDL > | mov BASE, L:RB->base > | mov RD, L:RB->top > @@ -774,6 +802,7 @@ static void build_subroutines(BuildCtx *ctx) > | mov SAVE_CFRAME, KBASEa > | mov SAVE_PC, L:RB // Any value outside of = bytecode is ok. > | add DISPATCH, GG_G2DISP > + | save_vmstate_through RD > |.if X64 > | mov L:RB->cframe, rsp > |.else > @@ -782,7 +811,7 @@ static void build_subroutines(BuildCtx *ctx) > | > |2: // Entry point for vm_resume/vm_cpcall (RA =3D base, RB =3D L, = PC =3D ftype). > | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB > - | set_vmstate INTERP > + | set_vmstate INTERP // vm_resume: INTERP until executing = BC_IFUNC* > | mov BASE, L:RB->base // BASE =3D old base (used in = vmeta_call). > | add PC, RA > | sub PC, BASE // PC =3D frame delta + frame = type > @@ -823,6 +852,7 @@ static void build_subroutines(BuildCtx *ctx) > | mov SAVE_ERRF, 0 // No error function. > | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o = frame. > | add DISPATCH, GG_G2DISP > + | save_vmstate_through KBASE > | // Handler may change cframe_nres(L->cframe) or = cframe_errfunc(L->cframe). > | > |.if X64 > @@ -885,6 +915,7 @@ static void build_subroutines(BuildCtx *ctx) > | mov KBASE, LFUNC:KBASE->pc > | mov KBASE, [KBASE+PC2PROTO(k)] > | // BASE =3D base, RC =3D result, RB =3D meta base > + | set_vmstate LFUNC // LFUNC after KBASE = restoration > | jmp RAa // Jump to continuation. > | > |.if FFI > @@ -1409,15 +1440,16 @@ static void build_subroutines(BuildCtx *ctx) > | > |.macro .ffunc, name > |->ff_ .. name: > + | set_vmstate FFUNC > |.endmacro > | > |.macro .ffunc_1, name > - |->ff_ .. name: > + | .ffunc name > | cmp NARGS:RD, 1+1; jb ->fff_fallback > |.endmacro > | > |.macro .ffunc_2, name > - |->ff_ .. name: > + | .ffunc name > | cmp NARGS:RD, 2+1; jb ->fff_fallback > |.endmacro > | > @@ -1924,7 +1956,7 @@ static void build_subroutines(BuildCtx *ctx) > |.endif > | mov BASE, L:RB->base > | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB > - | set_vmstate INTERP > + | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return > | > | cmp eax, LUA_YIELD > | ja >8 > @@ -2089,6 +2121,7 @@ static void build_subroutines(BuildCtx *ctx) > | movzx RA, PC_RA > | not RAa // Note: ~RA =3D -(RA+1) > | lea BASE, [BASE+RA*8] // base =3D base - (RA+1)*8 > + | set_vmstate LFUNC // LFUNC state after = BASE restoration > | ins_next > | > |6: // Fill up results with nil. > @@ -2933,7 +2966,7 @@ static void build_subroutines(BuildCtx *ctx) > | mov KBASE, [KBASE+PC2PROTO(k)] > | mov L:RB->base, BASE > | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 > - | set_vmstate INTERP > + | set_vmstate LFUNC // LFUNC after BASE & = KBASE restoration > | // Modified copy of ins_next which handles function header = dispatch, too. > | mov RC, [PC] > | movzx RA, RCH > @@ -3203,8 +3236,8 @@ static void build_subroutines(BuildCtx *ctx) > | mov FCARG1, CTSTATE > | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf) > | // lua_State * returned in eax (RD). > - | set_vmstate INTERP > | mov BASE, L:RD->base > + | set_vmstate LFUNC // LFUNC after BASE = restoration > | mov RD, L:RD->top > | sub RD, BASE > | mov LFUNC:RB, [BASE-8] > @@ -4683,6 +4716,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) >=20 > case BC_CALL: case BC_CALLM: > | ins_A_C // RA =3D base, (RB =3D nresults+1,) RC =3D = nargs+1 | extra_nargs > + | set_vmstate INTERP // INTERP until a new BASE is = setup > if (op =3D=3D BC_CALLM) { > | add NARGS:RD, MULTRES > } > @@ -4706,6 +4740,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | cmp dword [RA-4], LJ_TFUNC > | jne ->vmeta_call > |->BC_CALLT_Z: > + | set_vmstate INTERP // INTERP until a new BASE is = setup > | mov PC, [BASE-4] > | test PC, FRAME_TYPE > | jnz >7 > @@ -4989,6 +5024,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | shl RA, 3 > } > |1: > + | set_vmstate INTERP // INTERP until the old BASE & KBASE is = restored > | mov PC, [BASE-4] > | mov MULTRES, RD // Save nresults+1. > | test PC, FRAME_TYPE // Check frame type marker. > @@ -5043,6 +5079,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | mov LFUNC:KBASE, [BASE-8] > | mov KBASE, LFUNC:KBASE->pc > | mov KBASE, [KBASE+PC2PROTO(k)] > + | set_vmstate LFUNC // LFUNC after the old BASE & KBASE is = restored > | ins_next > | > |6: // Fill up results with nil. > @@ -5330,6 +5367,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | ins_AD // BASE =3D new base, RA =3D framesize, RD =3D nargs+1 > | mov KBASE, [PC-4+PC2PROTO(k)] > | mov L:RB, SAVE_L > + | set_vmstate LFUNC // LFUNC after KBASE restoration > | lea RA, [BASE+RA*8] // Top of frame. > | cmp RA, L:RB->maxstack > | ja ->vm_growstack_f > @@ -5367,6 +5405,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | mov [RD-4], RB // Store delta + FRAME_VARG. > | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC. > | mov L:RB, SAVE_L > + | set_vmstate LFUNC // LFUNC after KBASE restoration > | lea RA, [RD+RA*8] > | cmp RA, L:RB->maxstack > | ja ->vm_growstack_v // Need to grow stack. > @@ -5431,7 +5470,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > |.endif > } > | ja ->vm_growstack_c // Need to grow stack. > - | set_vmstate C > + | set_vmstate CFUNC // CFUNC before entering C = function > if (op =3D=3D BC_FUNCC) { > | call KBASEa // (lua_State *L) > } else { > @@ -5441,7 +5480,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, = int defop) > | // nresults returned in eax (RD). > | mov BASE, L:RB->base > | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB > - | set_vmstate INTERP > + | set_vmstate INTERP // INTERP until jump to BC_RET* or = vm_return > | lea RA, [BASE+RD*8] > | neg RA > | add RA, L:RB->top // RA =3D = (L->top-(L->base+nresults))*8 > --=20 > 2.28.0 >=20