[Tarantool-patches] [PATCH luajit v1 05/11] vm: introduce LFUNC and FFUNC vmstates
Sergey Kaplun
skaplun at tarantool.org
Wed Dec 16 22:13:40 MSK 2020
This patch slivers LJ_VMST_LFUNC and LJ_VMST_FFUNC from LJ_VMST_INERP to
allow to determine the context of vm execution for x86/x64 arches. Also,
LJ_VMST_C is renamed to LJ_VMST_CFUNC for naming consistence with newer
vmstates.
Also, this patch adjusts stack layout for x86/x64 arches to save vmstate
to avoid inconsistent state while stack unwinding when an error is
raised.
Part of tarantool/tarantool#5442
---
src/lj_frame.h | 18 +++---
src/lj_obj.h | 4 +-
src/lj_profile.c | 5 +-
src/luajit-gdb.py | 14 +++--
src/vm_arm.dasc | 6 +-
src/vm_arm64.dasc | 6 +-
src/vm_mips.dasc | 6 +-
src/vm_mips64.dasc | 6 +-
src/vm_ppc.dasc | 6 +-
src/vm_x64.dasc | 99 ++++++++++++++++++++++----------
src/vm_x86.dasc | 137 ++++++++++++++++++++++++++++++---------------
11 files changed, 200 insertions(+), 107 deletions(-)
diff --git a/src/lj_frame.h b/src/lj_frame.h
index 19c49a4..2e693f9 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -127,13 +127,13 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_SIZE (16*4)
#define CFRAME_SHIFT_MULTRES 0
#else
-#define CFRAME_OFS_ERRF (15*4)
-#define CFRAME_OFS_NRES (14*4)
-#define CFRAME_OFS_PREV (13*4)
-#define CFRAME_OFS_L (12*4)
+#define CFRAME_OFS_ERRF (19*4)
+#define CFRAME_OFS_NRES (18*4)
+#define CFRAME_OFS_PREV (17*4)
+#define CFRAME_OFS_L (16*4)
#define CFRAME_OFS_PC (6*4)
#define CFRAME_OFS_MULTRES (5*4)
-#define CFRAME_SIZE (12*4)
+#define CFRAME_SIZE (16*4)
#define CFRAME_SHIFT_MULTRES 0
#endif
#elif LJ_TARGET_X64
@@ -152,11 +152,11 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_OFS_NRES (22*4)
#define CFRAME_OFS_MULTRES (21*4)
#endif
-#define CFRAME_SIZE (10*8)
+#define CFRAME_SIZE (12*8)
#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8)
#define CFRAME_SHIFT_MULTRES 0
#else
-#define CFRAME_OFS_PREV (4*8)
+#define CFRAME_OFS_PREV (6*8)
#if LJ_GC64
#define CFRAME_OFS_PC (3*8)
#define CFRAME_OFS_L (2*8)
@@ -171,9 +171,9 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_OFS_MULTRES (1*4)
#endif
#if LJ_NO_UNWIND
-#define CFRAME_SIZE (12*8)
+#define CFRAME_SIZE (14*8)
#else
-#define CFRAME_SIZE (10*8)
+#define CFRAME_SIZE (12*8)
#endif
#define CFRAME_SIZE_JIT (CFRAME_SIZE + 16)
#define CFRAME_SHIFT_MULTRES 0
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 927b347..7fb715e 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -512,7 +512,9 @@ typedef struct GCtab {
/* VM states. */
enum {
LJ_VMST_INTERP, /* Interpreter. */
- LJ_VMST_C, /* C function. */
+ LJ_VMST_LFUNC, /* Lua function. */
+ LJ_VMST_FFUNC, /* Fast function. */
+ LJ_VMST_CFUNC, /* C function. */
LJ_VMST_GC, /* Garbage collector. */
LJ_VMST_EXIT, /* Trace exit handler. */
LJ_VMST_RECORD, /* Trace recorder. */
diff --git a/src/lj_profile.c b/src/lj_profile.c
index 116998e..637e03c 100644
--- a/src/lj_profile.c
+++ b/src/lj_profile.c
@@ -157,7 +157,10 @@ static void profile_trigger(ProfileState *ps)
int st = g->vmstate;
ps->vmstate = st >= 0 ? 'N' :
st == ~LJ_VMST_INTERP ? 'I' :
- st == ~LJ_VMST_C ? 'C' :
+ st == ~LJ_VMST_CFUNC ? 'C' :
+ /* Stubs for profiler hooks. */
+ st == ~LJ_VMST_FFUNC ? 'I' :
+ st == ~LJ_VMST_LFUNC ? 'I' :
st == ~LJ_VMST_GC ? 'G' : 'J';
g->hookmask = (mask | HOOK_PROFILE);
lj_dispatch_update(g);
diff --git a/src/luajit-gdb.py b/src/luajit-gdb.py
index 652c560..f1fd623 100644
--- a/src/luajit-gdb.py
+++ b/src/luajit-gdb.py
@@ -206,12 +206,14 @@ def J(g):
def vm_state(g):
return {
i2notu32(0): 'INTERP',
- i2notu32(1): 'C',
- i2notu32(2): 'GC',
- i2notu32(3): 'EXIT',
- i2notu32(4): 'RECORD',
- i2notu32(5): 'OPT',
- i2notu32(6): 'ASM',
+ i2notu32(1): 'LFUNC',
+ i2notu32(2): 'FFUNC',
+ i2notu32(3): 'CFUNC',
+ i2notu32(4): 'GC',
+ i2notu32(5): 'EXIT',
+ i2notu32(6): 'RECORD',
+ i2notu32(7): 'OPT',
+ i2notu32(8): 'ASM',
}.get(int(tou32(g['vmstate'])), 'TRACE')
def gc_state(g):
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index d4cdaf5..ae2efdf 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -287,7 +287,7 @@ static void build_subroutines(BuildCtx *ctx)
|
| str RB, L->base
| ldr KBASE, SAVE_NRES
- | mv_vmstate CARG4, C
+ | mv_vmstate CARG4, CFUNC
| sub BASE, BASE, #8
| subs CARG3, RC, #8
| lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8
@@ -348,7 +348,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov CRET1, CARG2
|->vm_unwind_c_eh: // Landing pad for external unwinder.
| ldr L, SAVE_L
- | mv_vmstate CARG4, C
+ | mv_vmstate CARG4, CFUNC
| ldr GL:CARG3, L->glref
| str CARG4, GL:CARG3->vmstate
| str L, GL:CARG3->cur_L
@@ -4487,7 +4487,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
if (op == BC_FUNCCW) {
| ldr CARG2, CFUNC:CARG3->f
}
- | mv_vmstate CARG3, C
+ | mv_vmstate CARG3, CFUNC
| mov CARG1, L
| bhi ->vm_growstack_c // Need to grow stack.
| st_vmstate CARG3
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index 3eaf376..f783428 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -332,7 +332,7 @@ static void build_subroutines(BuildCtx *ctx)
|
| str RB, L->base
| ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1.
- | mv_vmstate TMP0w, C
+ | mv_vmstate TMP0w, CFUNC
| sub BASE, BASE, #16
| subs TMP2, RC, #8
| st_vmstate TMP0w
@@ -391,7 +391,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov CRET1, CARG2
|->vm_unwind_c_eh: // Landing pad for external unwinder.
| ldr L, SAVE_L
- | mv_vmstate TMP0w, C
+ | mv_vmstate TMP0w, CFUNC
| ldr GL, L->glref
| st_vmstate TMP0w
| b ->vm_leave_unw
@@ -3816,7 +3816,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
if (op == BC_FUNCCW) {
| ldr CARG2, CFUNC:CARG3->f
}
- | mv_vmstate TMP0w, C
+ | mv_vmstate TMP0w, CFUNC
| mov CARG1, L
| bhi ->vm_growstack_c // Need to grow stack.
| st_vmstate TMP0w
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 1afd611..ec57d78 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -403,7 +403,7 @@ static void build_subroutines(BuildCtx *ctx)
|
| addiu TMP1, RD, -8
| sw TMP2, L->base
- | li_vmstate C
+ | li_vmstate CFUNC
| lw TMP2, SAVE_NRES
| addiu BASE, BASE, -8
| st_vmstate
@@ -473,7 +473,7 @@ static void build_subroutines(BuildCtx *ctx)
| move CRET1, CARG2
|->vm_unwind_c_eh: // Landing pad for external unwinder.
| lw L, SAVE_L
- | li TMP0, ~LJ_VMST_C
+ | li TMP0, ~LJ_VMST_CFUNC
| lw GL:TMP1, L->glref
| b ->vm_leave_unw
|. sw TMP0, GL:TMP1->vmstate
@@ -5085,7 +5085,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sw BASE, L->base
| sltu AT, TMP2, TMP1
| sw RC, L->top
- | li_vmstate C
+ | li_vmstate CFUNC
if (op == BC_FUNCCW) {
| lw CARG2, CFUNC:RB->f
}
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index c06270a..9a749f9 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -449,7 +449,7 @@ static void build_subroutines(BuildCtx *ctx)
|
| addiu TMP1, RD, -8
| sd TMP2, L->base
- | li_vmstate C
+ | li_vmstate CFUNC
| lw TMP2, SAVE_NRES
| daddiu BASE, BASE, -16
| st_vmstate
@@ -517,7 +517,7 @@ static void build_subroutines(BuildCtx *ctx)
| move CRET1, CARG2
|->vm_unwind_c_eh: // Landing pad for external unwinder.
| ld L, SAVE_L
- | li TMP0, ~LJ_VMST_C
+ | li TMP0, ~LJ_VMST_CFUNC
| ld GL:TMP1, L->glref
| b ->vm_leave_unw
|. sw TMP0, GL:TMP1->vmstate
@@ -4952,7 +4952,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sd BASE, L->base
| sltu AT, TMP2, TMP1
| sd RC, L->top
- | li_vmstate C
+ | li_vmstate CFUNC
if (op == BC_FUNCCW) {
| ld CARG2, CFUNC:RB->f
}
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index b4260eb..62e9b68 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -520,7 +520,7 @@ static void build_subroutines(BuildCtx *ctx)
| // TMP0 = PC & FRAME_TYPE
| cmpwi TMP0, FRAME_C
| rlwinm TMP2, PC, 0, 0, 28
- | li_vmstate C
+ | li_vmstate CFUNC
| sub TMP2, BASE, TMP2 // TMP2 = previous base.
| bney ->vm_returnp
|
@@ -596,7 +596,7 @@ static void build_subroutines(BuildCtx *ctx)
|->vm_unwind_c_eh: // Landing pad for external unwinder.
| lwz L, SAVE_L
| .toc ld TOCREG, SAVE_TOC
- | li TMP0, ~LJ_VMST_C
+ | li TMP0, ~LJ_VMST_CFUNC
| lwz GL:TMP1, L->glref
| stw TMP0, GL:TMP1->vmstate
| b ->vm_leave_unw
@@ -5060,7 +5060,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| stp BASE, L->base
| cmplw TMP1, TMP2
| stp RC, L->top
- | li_vmstate C
+ | li_vmstate CFUNC
|.if TOC
| mtctr TMP3
|.else
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 80753e0..d4d3a1d 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -140,7 +140,7 @@
|//-----------------------------------------------------------------------
|.else // x64/POSIX stack layout
|
-|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
+|.define CFRAME_SPACE, qword*7 // Delta for rsp (see <--).
|.macro saveregs_
| push rbx; push r15; push r14
|.if NO_UNWIND
@@ -161,26 +161,29 @@
|
|//----- 16 byte aligned,
|.if NO_UNWIND
-|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
-|.define SAVE_R4, aword [rsp+aword*10]
-|.define SAVE_R3, aword [rsp+aword*9]
-|.define SAVE_R2, aword [rsp+aword*8]
-|.define SAVE_R1, aword [rsp+aword*7]
-|.define SAVE_RU2, aword [rsp+aword*6]
-|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
+|.define SAVE_RET, qword [rsp+qword*13] //<-- rsp entering interpreter.
+|.define SAVE_R4, qword [rsp+qword*12]
+|.define SAVE_R3, qword [rsp+qword*11]
+|.define SAVE_R2, qword [rsp+qword*10]
+|.define SAVE_R1, qword [rsp+qword*9]
+|.define SAVE_RU2, qword [rsp+qword*8]
+|.define SAVE_RU1, qword [rsp+qword*7] //<-- rsp after register saves.
|.else
-|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
-|.define SAVE_R4, aword [rsp+aword*8]
-|.define SAVE_R3, aword [rsp+aword*7]
-|.define SAVE_R2, aword [rsp+aword*6]
-|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
+|.define SAVE_RET, qword [rsp+qword*11] //<-- rsp entering interpreter.
+|.define SAVE_R4, qword [rsp+qword*10]
+|.define SAVE_R3, qword [rsp+qword*9]
+|.define SAVE_R2, qword [rsp+qword*8]
+|.define SAVE_R1, qword [rsp+qword*7] //<-- rsp after register saves.
|.endif
-|.define SAVE_CFRAME, aword [rsp+aword*4]
-|.define SAVE_PC, aword [rsp+aword*3]
-|.define SAVE_L, aword [rsp+aword*2]
+|.define SAVE_CFRAME, qword [rsp+qword*6]
+|.define SAVE_UNUSED2, qword [rsp+qword*5]
+|.define SAVE_UNUSED1, dword [rsp+dword*8]
+|.define SAVE_VMSTATE, dword [rsp+dword*8]
+|.define SAVE_PC, qword [rsp+qword*3]
+|.define SAVE_L, qword [rsp+qword*2]
|.define SAVE_ERRF, dword [rsp+dword*3]
|.define SAVE_NRES, dword [rsp+dword*2]
-|.define TMP1, aword [rsp] //<-- rsp while in interpreter.
+|.define TMP1, qword [rsp] //<-- rsp while in interpreter.
|//----- 16 byte aligned
|
|.define TMP1d, dword [rsp]
@@ -342,6 +345,20 @@
| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
|.endmacro
|
+|.if not WIN
+|// Save vmstate through register.
+|.macro save_vmstate_through, reg
+| mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
+| mov SAVE_VMSTATE, reg
+|.endmacro
+|
+|// Restore vmstate through register.
+|.macro restore_vmstate_through, reg
+| mov reg, SAVE_VMSTATE
+| mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg
+|.endmacro
+|.endif // WIN
+|
|.macro fpop1; fstp st1; .endmacro
|
|// Synthesize SSE FP constants.
@@ -416,7 +433,7 @@ static void build_subroutines(BuildCtx *ctx)
| jnz ->vm_returnp
|
| // Return to C.
- | set_vmstate C
+ | set_vmstate CFUNC
| and PC, -8
| sub PC, BASE
| neg PC // Previous base = BASE - delta.
@@ -448,6 +465,10 @@ static void build_subroutines(BuildCtx *ctx)
| xor eax, eax // Ok return status for vm_pcall.
|
|->vm_leave_unw:
+ |.if not WIN
+ | // DISPATCH required to set properly.
+ | restore_vmstate_through RAd
+ |.endif
| restoreregs
| ret
|
@@ -493,7 +514,9 @@ static void build_subroutines(BuildCtx *ctx)
| mov L:DISPATCH, SAVE_L
| mov GL:RB, L:DISPATCH->glref
| mov GL:RB->cur_L, L:DISPATCH
- | mov dword GL:RB->vmstate, ~LJ_VMST_C
+ | mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC
+ | mov DISPATCH, L:DISPATCH->glref // Setup pointer to dispatch table.
+ | add DISPATCH, GG_G2DISP
| jmp ->vm_leave_unw
|
|->vm_unwind_rethrow:
@@ -521,7 +544,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov [BASE-16], RA // Prepend false to error message.
| mov [BASE-8], RB
| mov RA, -16 // Results start at BASE+RA = BASE-16.
- | set_vmstate INTERP
+ | set_vmstate INTERP // INTERP until jump to BC_RET* or return to C
| jmp ->vm_returnc // Increments RD/MULTRES and returns.
|
|//-----------------------------------------------------------------------
@@ -575,6 +598,9 @@ static void build_subroutines(BuildCtx *ctx)
| lea KBASE, [esp+CFRAME_RESUME]
| mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
| add DISPATCH, GG_G2DISP
+ |.if not WIN
+ | save_vmstate_through TMPRd
+ |.endif
| mov SAVE_PC, RD // Any value outside of bytecode is ok.
| mov SAVE_CFRAME, RD
| mov SAVE_NRES, RDd
@@ -585,7 +611,7 @@ static void build_subroutines(BuildCtx *ctx)
|
| // Resume after yield (like a return).
| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
- | set_vmstate INTERP
+ | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
| mov byte L:RB->status, RDL
| mov BASE, L:RB->base
| mov RD, L:RB->top
@@ -622,11 +648,14 @@ static void build_subroutines(BuildCtx *ctx)
| mov SAVE_CFRAME, KBASE
| mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
| add DISPATCH, GG_G2DISP
+ |.if not WIN
+ | save_vmstate_through RDd
+ |.endif
| mov L:RB->cframe, rsp
|
|2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
- | set_vmstate INTERP
+ | set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*
| mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
| add PC, RA
| sub PC, BASE // PC = frame delta + frame type
@@ -658,6 +687,9 @@ static void build_subroutines(BuildCtx *ctx)
| mov SAVE_ERRF, 0 // No error function.
| mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
| add DISPATCH, GG_G2DISP
+ |.if not WIN
+ | save_vmstate_through KBASEd
+ |.endif
| // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
|
| mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
@@ -697,6 +729,7 @@ static void build_subroutines(BuildCtx *ctx)
| cleartp LFUNC:KBASE
| mov KBASE, LFUNC:KBASE->pc
| mov KBASE, [KBASE+PC2PROTO(k)]
+ | set_vmstate LFUNC // LFUNC after KBASE restoration
| // BASE = base, RC = result, RB = meta base
| jmp RA // Jump to continuation.
|
@@ -1137,15 +1170,16 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro .ffunc, name
|->ff_ .. name:
+ | set_vmstate FFUNC
|.endmacro
|
|.macro .ffunc_1, name
- |->ff_ .. name:
+ | .ffunc name
| cmp NARGS:RDd, 1+1; jb ->fff_fallback
|.endmacro
|
|.macro .ffunc_2, name
- |->ff_ .. name:
+ | .ffunc name
| cmp NARGS:RDd, 2+1; jb ->fff_fallback
|.endmacro
|
@@ -1578,7 +1612,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov L:PC, TMP1
| mov BASE, L:RB->base
| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
- | set_vmstate INTERP
+ | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
|
| cmp eax, LUA_YIELD
| ja >8
@@ -1717,6 +1751,7 @@ static void build_subroutines(BuildCtx *ctx)
| movzx RAd, PC_RA
| neg RA
| lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
+ | set_vmstate LFUNC // LFUNC state after BASE restoration
| ins_next
|
|6: // Fill up results with nil.
@@ -2481,7 +2516,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov KBASE, [KBASE+PC2PROTO(k)]
| mov L:RB->base, BASE
| mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
- | set_vmstate INTERP
+ | set_vmstate LFUNC // LFUNC after BASE & KBASE restoration
| // Modified copy of ins_next which handles function header dispatch, too.
| mov RCd, [PC]
| movzx RAd, RCH
@@ -2697,8 +2732,8 @@ static void build_subroutines(BuildCtx *ctx)
| mov CARG1, CTSTATE
| call extern lj_ccallback_enter // (CTState *cts, void *cf)
| // lua_State * returned in eax (RD).
- | set_vmstate INTERP
| mov BASE, L:RD->base
+ | set_vmstate LFUNC // LFUNC after BASE restoration
| mov RD, L:RD->top
| sub RD, BASE
| mov LFUNC:RB, [BASE-16]
@@ -3974,6 +4009,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_CALL: case BC_CALLM:
| ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+ | set_vmstate INTERP // INTERP until a new BASE is setup
if (op == BC_CALLM) {
| add NARGS:RDd, MULTRES
}
@@ -3995,6 +4031,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov LFUNC:RB, [RA-16]
| checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
|->BC_CALLT_Z:
+ | set_vmstate INTERP // INTERP until a new BASE is setup
| mov PC, [BASE-8]
| test PCd, FRAME_TYPE
| jnz >7
@@ -4219,6 +4256,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| shl RAd, 3
}
|1:
+ | set_vmstate INTERP // INTERP until the old BASE & KBASE is restored
| mov PC, [BASE-8]
| mov MULTRES, RDd // Save nresults+1.
| test PCd, FRAME_TYPE // Check frame type marker.
@@ -4260,6 +4298,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| cleartp LFUNC:KBASE
| mov KBASE, LFUNC:KBASE->pc
| mov KBASE, [KBASE+PC2PROTO(k)]
+ | set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored
| ins_next
|
|6: // Fill up results with nil.
@@ -4551,6 +4590,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_AD // BASE = new base, RA = framesize, RD = nargs+1
| mov KBASE, [PC-4+PC2PROTO(k)]
| mov L:RB, SAVE_L
+ | set_vmstate LFUNC // LFUNC after KBASE restoration
| lea RA, [BASE+RA*8] // Top of frame.
| cmp RA, L:RB->maxstack
| ja ->vm_growstack_f
@@ -4588,6 +4628,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov [RD-8], RB // Store delta + FRAME_VARG.
| mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
| mov L:RB, SAVE_L
+ | set_vmstate LFUNC // LFUNC after KBASE restoration
| lea RA, [RD+RA*8]
| cmp RA, L:RB->maxstack
| ja ->vm_growstack_v // Need to grow stack.
@@ -4643,7 +4684,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov CARG1, L:RB // Caveat: CARG1 may be RA.
}
| ja ->vm_growstack_c // Need to grow stack.
- | set_vmstate C
+ | set_vmstate CFUNC // CFUNC before entering C function
if (op == BC_FUNCC) {
| call KBASE // (lua_State *L)
} else {
@@ -4653,7 +4694,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // nresults returned in eax (RD).
| mov BASE, L:RB->base
| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
- | set_vmstate INTERP
+ | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
| lea RA, [BASE+RD*8]
| neg RA
| add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index d76fbe3..939c43f 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -140,7 +140,7 @@
|
|.else
|
-|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
+|.define CFRAME_SPACE, dword*11 // Delta for esp (see <--).
|.macro saveregs_
| push edi; push esi; push ebx
| sub esp, CFRAME_SPACE
@@ -183,25 +183,30 @@
|.define ARG1, aword [esp] //<-- esp while in interpreter.
|//----- 16 byte aligned, ^^^ arguments for C callee
|.else
-|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
-|.define SAVE_NRES, aword [esp+aword*14]
-|.define SAVE_CFRAME, aword [esp+aword*13]
-|.define SAVE_L, aword [esp+aword*12]
+|.define SAVE_ERRF, dword [esp+dword*19] // vm_pcall/vm_cpcall only.
+|.define SAVE_NRES, dword [esp+dword*18]
+|.define SAVE_CFRAME, dword [esp+dword*17]
+|.define SAVE_L, dword [esp+dword*16]
|//----- 16 byte aligned, ^^^ arguments from C caller
-|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
-|.define SAVE_R4, aword [esp+aword*10]
-|.define SAVE_R3, aword [esp+aword*9]
-|.define SAVE_R2, aword [esp+aword*8]
+|.define SAVE_RET, dword [esp+dword*15] //<-- esp entering interpreter.
+|.define SAVE_R4, dword [esp+dword*14]
+|.define SAVE_R3, dword [esp+dword*13]
+|.define SAVE_R2, dword [esp+dword*12]
|//----- 16 byte aligned
-|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
-|.define SAVE_PC, aword [esp+aword*6]
-|.define TMP2, aword [esp+aword*5]
-|.define TMP1, aword [esp+aword*4]
+|.define SAVE_UNUSED3, dword [esp+dword*11]
+|.define SAVE_UNUSED2, dword [esp+dword*10]
+|.define SAVE_UNUSED1, dword [esp+dword*9]
+|.define SAVE_VMSTATE, dword [esp+dword*8]
|//----- 16 byte aligned
-|.define ARG4, aword [esp+aword*3]
-|.define ARG3, aword [esp+aword*2]
-|.define ARG2, aword [esp+aword*1]
-|.define ARG1, aword [esp] //<-- esp while in interpreter.
+|.define SAVE_R1, dword [esp+dword*7] //<-- esp after register saves.
+|.define SAVE_PC, dword [esp+dword*6]
+|.define TMP2, dword [esp+dword*5]
+|.define TMP1, dword [esp+dword*4]
+|//----- 16 byte aligned
+|.define ARG4, dword [esp+dword*3]
+|.define ARG3, dword [esp+dword*2]
+|.define ARG2, dword [esp+dword*1]
+|.define ARG1, dword [esp] //<-- esp while in interpreter.
|//----- 16 byte aligned, ^^^ arguments for C callee
|.endif
|
@@ -269,7 +274,7 @@
|//-----------------------------------------------------------------------
|.else // x64/POSIX stack layout
|
-|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
+|.define CFRAME_SPACE, qword*7 // Delta for rsp (see <--).
|.macro saveregs_
| push rbx; push r15; push r14
|.if NO_UNWIND
@@ -290,33 +295,35 @@
|
|//----- 16 byte aligned,
|.if NO_UNWIND
-|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
-|.define SAVE_R4, aword [rsp+aword*10]
-|.define SAVE_R3, aword [rsp+aword*9]
-|.define SAVE_R2, aword [rsp+aword*8]
-|.define SAVE_R1, aword [rsp+aword*7]
-|.define SAVE_RU2, aword [rsp+aword*6]
-|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
+|.define SAVE_RET, qword [rsp+qword*13] //<-- rsp entering interpreter.
+|.define SAVE_R4, qword [rsp+qword*12]
+|.define SAVE_R3, qword [rsp+qword*11]
+|.define SAVE_R2, qword [rsp+qword*10]
+|.define SAVE_R1, qword [rsp+qword*9]
+|.define SAVE_RU2, qword [rsp+qword*8]
+|.define SAVE_RU1, qword [rsp+qword*7] //<-- rsp after register saves.
|.else
-|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
-|.define SAVE_R4, aword [rsp+aword*8]
-|.define SAVE_R3, aword [rsp+aword*7]
-|.define SAVE_R2, aword [rsp+aword*6]
-|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
+|.define SAVE_RET, qword [rsp+qword*11] //<-- rsp entering interpreter.
+|.define SAVE_R4, qword [rsp+qword*10]
+|.define SAVE_R3, qword [rsp+qword*9]
+|.define SAVE_R2, qword [rsp+qword*8]
+|.define SAVE_R1, qword [rsp+qword*7] //<-- rsp after register saves.
|.endif
-|.define SAVE_CFRAME, aword [rsp+aword*4]
+|.define SAVE_CFRAME, qword [rsp+qword*6]
+|.define SAVE_UNUSED1, qword [rsp+qword*5]
+|.define SAVE_VMSTATE, dword [rsp+dword*8]
|.define SAVE_PC, dword [rsp+dword*7]
|.define SAVE_L, dword [rsp+dword*6]
|.define SAVE_ERRF, dword [rsp+dword*5]
|.define SAVE_NRES, dword [rsp+dword*4]
-|.define TMPa, aword [rsp+aword*1]
+|.define TMPa, qword [rsp+qword*1]
|.define TMP2, dword [rsp+dword*1]
|.define TMP1, dword [rsp] //<-- rsp while in interpreter.
|//----- 16 byte aligned
|
|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
|.define TMPQ, qword [rsp]
-|.define TMP3, dword [rsp+aword*1]
+|.define TMP3, dword [rsp+qword*1]
|.define MULTRES, TMP2
|
|.endif
@@ -433,6 +440,20 @@
| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
|.endmacro
|
+|.if not WIN
+|// Save vmstate through register.
+|.macro save_vmstate_through, reg
+| mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
+| mov SAVE_VMSTATE, reg
+|.endmacro
+|
+|// Restore vmstate through register.
+|.macro restore_vmstate_through, reg
+| mov reg, SAVE_VMSTATE
+| mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg
+|.endmacro
+|.endif // WIN
+|
|// x87 compares.
|.macro fcomparepp // Compare and pop st0 >< st1.
| fucomip st1
@@ -520,7 +541,7 @@ static void build_subroutines(BuildCtx *ctx)
| jnz ->vm_returnp
|
| // Return to C.
- | set_vmstate C
+ | set_vmstate CFUNC
| and PC, -8
| sub PC, BASE
| neg PC // Previous base = BASE - delta.
@@ -559,6 +580,10 @@ static void build_subroutines(BuildCtx *ctx)
| xor eax, eax // Ok return status for vm_pcall.
|
|->vm_leave_unw:
+ |.if not WIN
+ | // DISPATCH required to set properly.
+ | restore_vmstate_through RA
+ |.endif
| restoreregs
| ret
|
@@ -613,7 +638,9 @@ static void build_subroutines(BuildCtx *ctx)
| mov L:DISPATCH, SAVE_L
| mov GL:RB, L:DISPATCH->glref
| mov dword GL:RB->cur_L, L:DISPATCH
- | mov dword GL:RB->vmstate, ~LJ_VMST_C
+ | mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC
+ | mov DISPATCH, L:DISPATCH->glref // Setup pointer to dispatch table.
+ | add DISPATCH, GG_G2DISP
| jmp ->vm_leave_unw
|
|->vm_unwind_rethrow:
@@ -647,7 +674,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov PC, [BASE-4] // Fetch PC of previous frame.
| mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
- | set_vmstate INTERP
+ | set_vmstate INTERP // INTERP until jump to BC_RET* or return to C
| jmp ->vm_returnc // Increments RD/MULTRES and returns.
|
|.if WIN and not X64
@@ -714,10 +741,13 @@ static void build_subroutines(BuildCtx *ctx)
| mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
|.endif
| mov PC, FRAME_CP
- | xor RD, RD
| lea KBASEa, [esp+CFRAME_RESUME]
| mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
| add DISPATCH, GG_G2DISP
+ |.if not WIN
+ | save_vmstate_through RD
+ |.endif
+ | xor RD, RD
| mov SAVE_PC, RD // Any value outside of bytecode is ok.
| mov SAVE_CFRAME, RDa
|.if X64
@@ -730,7 +760,7 @@ static void build_subroutines(BuildCtx *ctx)
|
| // Resume after yield (like a return).
| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
- | set_vmstate INTERP
+ | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
| mov byte L:RB->status, RDL
| mov BASE, L:RB->base
| mov RD, L:RB->top
@@ -774,6 +804,9 @@ static void build_subroutines(BuildCtx *ctx)
| mov SAVE_CFRAME, KBASEa
| mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
| add DISPATCH, GG_G2DISP
+ |.if not WIN
+ | save_vmstate_through RD
+ |.endif
|.if X64
| mov L:RB->cframe, rsp
|.else
@@ -782,7 +815,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
- | set_vmstate INTERP
+ | set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*
| mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
| add PC, RA
| sub PC, BASE // PC = frame delta + frame type
@@ -823,6 +856,9 @@ static void build_subroutines(BuildCtx *ctx)
| mov SAVE_ERRF, 0 // No error function.
| mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
| add DISPATCH, GG_G2DISP
+ |.if not WIN
+ | save_vmstate_through KBASE
+ |.endif
| // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
|
|.if X64
@@ -885,6 +921,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov KBASE, LFUNC:KBASE->pc
| mov KBASE, [KBASE+PC2PROTO(k)]
| // BASE = base, RC = result, RB = meta base
+ | set_vmstate LFUNC // LFUNC after KBASE restoration
| jmp RAa // Jump to continuation.
|
|.if FFI
@@ -1409,15 +1446,16 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro .ffunc, name
|->ff_ .. name:
+ | set_vmstate FFUNC
|.endmacro
|
|.macro .ffunc_1, name
- |->ff_ .. name:
+ | .ffunc name
| cmp NARGS:RD, 1+1; jb ->fff_fallback
|.endmacro
|
|.macro .ffunc_2, name
- |->ff_ .. name:
+ | .ffunc name
| cmp NARGS:RD, 2+1; jb ->fff_fallback
|.endmacro
|
@@ -1924,7 +1962,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| mov BASE, L:RB->base
| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
- | set_vmstate INTERP
+ | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
|
| cmp eax, LUA_YIELD
| ja >8
@@ -2089,6 +2127,7 @@ static void build_subroutines(BuildCtx *ctx)
| movzx RA, PC_RA
| not RAa // Note: ~RA = -(RA+1)
| lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
+ | set_vmstate LFUNC // LFUNC state after BASE restoration
| ins_next
|
|6: // Fill up results with nil.
@@ -2933,7 +2972,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov KBASE, [KBASE+PC2PROTO(k)]
| mov L:RB->base, BASE
| mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
- | set_vmstate INTERP
+ | set_vmstate LFUNC // LFUNC after BASE & KBASE restoration
| // Modified copy of ins_next which handles function header dispatch, too.
| mov RC, [PC]
| movzx RA, RCH
@@ -3203,8 +3242,8 @@ static void build_subroutines(BuildCtx *ctx)
| mov FCARG1, CTSTATE
| call extern lj_ccallback_enter at 8 // (CTState *cts, void *cf)
| // lua_State * returned in eax (RD).
- | set_vmstate INTERP
| mov BASE, L:RD->base
+ | set_vmstate LFUNC // LFUNC after BASE restoration
| mov RD, L:RD->top
| sub RD, BASE
| mov LFUNC:RB, [BASE-8]
@@ -4683,6 +4722,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_CALL: case BC_CALLM:
| ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+ | set_vmstate INTERP // INTERP until a new BASE is setup
if (op == BC_CALLM) {
| add NARGS:RD, MULTRES
}
@@ -4706,6 +4746,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| cmp dword [RA-4], LJ_TFUNC
| jne ->vmeta_call
|->BC_CALLT_Z:
+ | set_vmstate INTERP // INTERP until a new BASE is setup
| mov PC, [BASE-4]
| test PC, FRAME_TYPE
| jnz >7
@@ -4989,6 +5030,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| shl RA, 3
}
|1:
+ | set_vmstate INTERP // INTERP until the old BASE & KBASE is restored
| mov PC, [BASE-4]
| mov MULTRES, RD // Save nresults+1.
| test PC, FRAME_TYPE // Check frame type marker.
@@ -5043,6 +5085,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov LFUNC:KBASE, [BASE-8]
| mov KBASE, LFUNC:KBASE->pc
| mov KBASE, [KBASE+PC2PROTO(k)]
+ | set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored
| ins_next
|
|6: // Fill up results with nil.
@@ -5330,6 +5373,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_AD // BASE = new base, RA = framesize, RD = nargs+1
| mov KBASE, [PC-4+PC2PROTO(k)]
| mov L:RB, SAVE_L
+ | set_vmstate LFUNC // LFUNC after KBASE restoration
| lea RA, [BASE+RA*8] // Top of frame.
| cmp RA, L:RB->maxstack
| ja ->vm_growstack_f
@@ -5367,6 +5411,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov [RD-4], RB // Store delta + FRAME_VARG.
| mov [RD-8], LFUNC:KBASE // Store copy of LFUNC.
| mov L:RB, SAVE_L
+ | set_vmstate LFUNC // LFUNC after KBASE restoration
| lea RA, [RD+RA*8]
| cmp RA, L:RB->maxstack
| ja ->vm_growstack_v // Need to grow stack.
@@ -5431,7 +5476,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
}
| ja ->vm_growstack_c // Need to grow stack.
- | set_vmstate C
+ | set_vmstate CFUNC // CFUNC before entering C function
if (op == BC_FUNCC) {
| call KBASEa // (lua_State *L)
} else {
@@ -5441,7 +5486,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // nresults returned in eax (RD).
| mov BASE, L:RB->base
| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
- | set_vmstate INTERP
+ | set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
| lea RA, [BASE+RD*8]
| neg RA
| add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
--
2.28.0
More information about the Tarantool-patches
mailing list