[Tarantool-patches] [PATCH luajit v1 05/11] vm: introduce LFUNC and FFUNC vmstates

Sergey Kaplun skaplun at tarantool.org
Wed Dec 16 22:13:40 MSK 2020


This patch slivers LJ_VMST_LFUNC and LJ_VMST_FFUNC from LJ_VMST_INERP to
allow to determine the context of vm execution for x86/x64 arches. Also,
LJ_VMST_C is renamed to LJ_VMST_CFUNC for naming consistence with newer
vmstates.

Also, this patch adjusts stack layout for x86/x64 arches to save vmstate
to avoid inconsistent state while stack unwinding when an error is
raised.

Part of tarantool/tarantool#5442
---
 src/lj_frame.h     |  18 +++---
 src/lj_obj.h       |   4 +-
 src/lj_profile.c   |   5 +-
 src/luajit-gdb.py  |  14 +++--
 src/vm_arm.dasc    |   6 +-
 src/vm_arm64.dasc  |   6 +-
 src/vm_mips.dasc   |   6 +-
 src/vm_mips64.dasc |   6 +-
 src/vm_ppc.dasc    |   6 +-
 src/vm_x64.dasc    |  99 ++++++++++++++++++++++----------
 src/vm_x86.dasc    | 137 ++++++++++++++++++++++++++++++---------------
 11 files changed, 200 insertions(+), 107 deletions(-)

diff --git a/src/lj_frame.h b/src/lj_frame.h
index 19c49a4..2e693f9 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -127,13 +127,13 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #define CFRAME_SIZE		(16*4)
 #define CFRAME_SHIFT_MULTRES	0
 #else
-#define CFRAME_OFS_ERRF		(15*4)
-#define CFRAME_OFS_NRES		(14*4)
-#define CFRAME_OFS_PREV		(13*4)
-#define CFRAME_OFS_L		(12*4)
+#define CFRAME_OFS_ERRF		(19*4)
+#define CFRAME_OFS_NRES		(18*4)
+#define CFRAME_OFS_PREV		(17*4)
+#define CFRAME_OFS_L		(16*4)
 #define CFRAME_OFS_PC		(6*4)
 #define CFRAME_OFS_MULTRES	(5*4)
-#define CFRAME_SIZE		(12*4)
+#define CFRAME_SIZE		(16*4)
 #define CFRAME_SHIFT_MULTRES	0
 #endif
 #elif LJ_TARGET_X64
@@ -152,11 +152,11 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #define CFRAME_OFS_NRES		(22*4)
 #define CFRAME_OFS_MULTRES	(21*4)
 #endif
-#define CFRAME_SIZE		(10*8)
+#define CFRAME_SIZE		(12*8)
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 9*16 + 4*8)
 #define CFRAME_SHIFT_MULTRES	0
 #else
-#define CFRAME_OFS_PREV		(4*8)
+#define CFRAME_OFS_PREV		(6*8)
 #if LJ_GC64
 #define CFRAME_OFS_PC		(3*8)
 #define CFRAME_OFS_L		(2*8)
@@ -171,9 +171,9 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #define CFRAME_OFS_MULTRES	(1*4)
 #endif
 #if LJ_NO_UNWIND
-#define CFRAME_SIZE		(12*8)
+#define CFRAME_SIZE		(14*8)
 #else
-#define CFRAME_SIZE		(10*8)
+#define CFRAME_SIZE		(12*8)
 #endif
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 16)
 #define CFRAME_SHIFT_MULTRES	0
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 927b347..7fb715e 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -512,7 +512,9 @@ typedef struct GCtab {
 /* VM states. */
 enum {
   LJ_VMST_INTERP,	/* Interpreter. */
-  LJ_VMST_C,		/* C function. */
+  LJ_VMST_LFUNC,	/* Lua function. */
+  LJ_VMST_FFUNC,	/* Fast function. */
+  LJ_VMST_CFUNC,	/* C function. */
   LJ_VMST_GC,		/* Garbage collector. */
   LJ_VMST_EXIT,		/* Trace exit handler. */
   LJ_VMST_RECORD,	/* Trace recorder. */
diff --git a/src/lj_profile.c b/src/lj_profile.c
index 116998e..637e03c 100644
--- a/src/lj_profile.c
+++ b/src/lj_profile.c
@@ -157,7 +157,10 @@ static void profile_trigger(ProfileState *ps)
     int st = g->vmstate;
     ps->vmstate = st >= 0 ? 'N' :
 		  st == ~LJ_VMST_INTERP ? 'I' :
-		  st == ~LJ_VMST_C ? 'C' :
+		  st == ~LJ_VMST_CFUNC ? 'C' :
+		  /* Stubs for profiler hooks. */
+		  st == ~LJ_VMST_FFUNC ? 'I' :
+		  st == ~LJ_VMST_LFUNC ? 'I' :
 		  st == ~LJ_VMST_GC ? 'G' : 'J';
     g->hookmask = (mask | HOOK_PROFILE);
     lj_dispatch_update(g);
diff --git a/src/luajit-gdb.py b/src/luajit-gdb.py
index 652c560..f1fd623 100644
--- a/src/luajit-gdb.py
+++ b/src/luajit-gdb.py
@@ -206,12 +206,14 @@ def J(g):
 def vm_state(g):
     return {
         i2notu32(0): 'INTERP',
-        i2notu32(1): 'C',
-        i2notu32(2): 'GC',
-        i2notu32(3): 'EXIT',
-        i2notu32(4): 'RECORD',
-        i2notu32(5): 'OPT',
-        i2notu32(6): 'ASM',
+        i2notu32(1): 'LFUNC',
+        i2notu32(2): 'FFUNC',
+        i2notu32(3): 'CFUNC',
+        i2notu32(4): 'GC',
+        i2notu32(5): 'EXIT',
+        i2notu32(6): 'RECORD',
+        i2notu32(7): 'OPT',
+        i2notu32(8): 'ASM',
     }.get(int(tou32(g['vmstate'])), 'TRACE')
 
 def gc_state(g):
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index d4cdaf5..ae2efdf 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -287,7 +287,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  str RB, L->base
   |   ldr KBASE, SAVE_NRES
-  |    mv_vmstate CARG4, C
+  |    mv_vmstate CARG4, CFUNC
   |   sub BASE, BASE, #8
   |  subs CARG3, RC, #8
   |   lsl KBASE, KBASE, #3		// KBASE = (nresults_wanted+1)*8
@@ -348,7 +348,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  ldr L, SAVE_L
-  |   mv_vmstate CARG4, C
+  |   mv_vmstate CARG4, CFUNC
   |  ldr GL:CARG3, L->glref
   |   str CARG4, GL:CARG3->vmstate
   |   str L, GL:CARG3->cur_L
@@ -4487,7 +4487,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     if (op == BC_FUNCCW) {
       |  ldr CARG2, CFUNC:CARG3->f
     }
-    |    mv_vmstate CARG3, C
+    |    mv_vmstate CARG3, CFUNC
     |  mov CARG1, L
     |   bhi ->vm_growstack_c		// Need to grow stack.
     |    st_vmstate CARG3
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index 3eaf376..f783428 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -332,7 +332,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  str RB, L->base
   |   ldrsw CARG2, SAVE_NRES		// CARG2 = nresults+1.
-  |    mv_vmstate TMP0w, C
+  |    mv_vmstate TMP0w, CFUNC
   |   sub BASE, BASE, #16
   |  subs TMP2, RC, #8
   |    st_vmstate TMP0w
@@ -391,7 +391,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  ldr L, SAVE_L
-  |   mv_vmstate TMP0w, C
+  |   mv_vmstate TMP0w, CFUNC
   |  ldr GL, L->glref
   |   st_vmstate TMP0w
   |  b ->vm_leave_unw
@@ -3816,7 +3816,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     if (op == BC_FUNCCW) {
       |  ldr CARG2, CFUNC:CARG3->f
     }
-    |    mv_vmstate TMP0w, C
+    |    mv_vmstate TMP0w, CFUNC
     |  mov CARG1, L
     |   bhi ->vm_growstack_c		// Need to grow stack.
     |    st_vmstate TMP0w
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 1afd611..ec57d78 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -403,7 +403,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  addiu TMP1, RD, -8
   |   sw TMP2, L->base
-  |    li_vmstate C
+  |    li_vmstate CFUNC
   |   lw TMP2, SAVE_NRES
   |   addiu BASE, BASE, -8
   |    st_vmstate
@@ -473,7 +473,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  move CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  lw L, SAVE_L
-  |   li TMP0, ~LJ_VMST_C
+  |   li TMP0, ~LJ_VMST_CFUNC
   |  lw GL:TMP1, L->glref
   |  b ->vm_leave_unw
   |.  sw TMP0, GL:TMP1->vmstate
@@ -5085,7 +5085,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  sw BASE, L->base
     |  sltu AT, TMP2, TMP1
     |   sw RC, L->top
-    |    li_vmstate C
+    |    li_vmstate CFUNC
     if (op == BC_FUNCCW) {
       |  lw CARG2, CFUNC:RB->f
     }
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index c06270a..9a749f9 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -449,7 +449,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  addiu TMP1, RD, -8
   |   sd TMP2, L->base
-  |    li_vmstate C
+  |    li_vmstate CFUNC
   |   lw TMP2, SAVE_NRES
   |   daddiu BASE, BASE, -16
   |    st_vmstate
@@ -517,7 +517,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  move CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  ld L, SAVE_L
-  |   li TMP0, ~LJ_VMST_C
+  |   li TMP0, ~LJ_VMST_CFUNC
   |  ld GL:TMP1, L->glref
   |  b ->vm_leave_unw
   |.  sw TMP0, GL:TMP1->vmstate
@@ -4952,7 +4952,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  sd BASE, L->base
     |  sltu AT, TMP2, TMP1
     |   sd RC, L->top
-    |    li_vmstate C
+    |    li_vmstate CFUNC
     if (op == BC_FUNCCW) {
       |  ld CARG2, CFUNC:RB->f
     }
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index b4260eb..62e9b68 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -520,7 +520,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  // TMP0 = PC & FRAME_TYPE
   |  cmpwi TMP0, FRAME_C
   |   rlwinm TMP2, PC, 0, 0, 28
-  |    li_vmstate C
+  |    li_vmstate CFUNC
   |   sub TMP2, BASE, TMP2		// TMP2 = previous base.
   |  bney ->vm_returnp
   |
@@ -596,7 +596,7 @@ static void build_subroutines(BuildCtx *ctx)
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  lwz L, SAVE_L
   |  .toc ld TOCREG, SAVE_TOC
-  |   li TMP0, ~LJ_VMST_C
+  |   li TMP0, ~LJ_VMST_CFUNC
   |  lwz GL:TMP1, L->glref
   |   stw TMP0, GL:TMP1->vmstate
   |  b ->vm_leave_unw
@@ -5060,7 +5060,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   stp BASE, L->base
     |   cmplw TMP1, TMP2
     |    stp RC, L->top
-    |     li_vmstate C
+    |     li_vmstate CFUNC
     |.if TOC
     |  mtctr TMP3
     |.else
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 80753e0..d4d3a1d 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -140,7 +140,7 @@
 |//-----------------------------------------------------------------------
 |.else			// x64/POSIX stack layout
 |
-|.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
+|.define CFRAME_SPACE,	qword*7			// Delta for rsp (see <--).
 |.macro saveregs_
 |  push rbx; push r15; push r14
 |.if NO_UNWIND
@@ -161,26 +161,29 @@
 |
 |//----- 16 byte aligned,
 |.if NO_UNWIND
-|.define SAVE_RET,	aword [rsp+aword*11]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*10]
-|.define SAVE_R3,	aword [rsp+aword*9]
-|.define SAVE_R2,	aword [rsp+aword*8]
-|.define SAVE_R1,	aword [rsp+aword*7]
-|.define SAVE_RU2,	aword [rsp+aword*6]
-|.define SAVE_RU1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*13]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*12]
+|.define SAVE_R3,	qword [rsp+qword*11]
+|.define SAVE_R2,	qword [rsp+qword*10]
+|.define SAVE_R1,	qword [rsp+qword*9]
+|.define SAVE_RU2,	qword [rsp+qword*8]
+|.define SAVE_RU1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.else
-|.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*8]
-|.define SAVE_R3,	aword [rsp+aword*7]
-|.define SAVE_R2,	aword [rsp+aword*6]
-|.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*11]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*10]
+|.define SAVE_R3,	qword [rsp+qword*9]
+|.define SAVE_R2,	qword [rsp+qword*8]
+|.define SAVE_R1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.endif
-|.define SAVE_CFRAME,	aword [rsp+aword*4]
-|.define SAVE_PC,	aword [rsp+aword*3]
-|.define SAVE_L,	aword [rsp+aword*2]
+|.define SAVE_CFRAME,	qword [rsp+qword*6]
+|.define SAVE_UNUSED2,	qword [rsp+qword*5]
+|.define SAVE_UNUSED1,	dword [rsp+dword*8]
+|.define SAVE_VMSTATE,	dword [rsp+dword*8]
+|.define SAVE_PC,	qword [rsp+qword*3]
+|.define SAVE_L,	qword [rsp+qword*2]
 |.define SAVE_ERRF,	dword [rsp+dword*3]
 |.define SAVE_NRES,	dword [rsp+dword*2]
-|.define TMP1,		aword [rsp]		//<-- rsp while in interpreter.
+|.define TMP1,		qword [rsp]		//<-- rsp while in interpreter.
 |//----- 16 byte aligned
 |
 |.define TMP1d,		dword [rsp]
@@ -342,6 +345,20 @@
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
 |.endmacro
 |
+|.if not WIN
+|// Save vmstate through register.
+|.macro save_vmstate_through, reg
+|  mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
+|  mov SAVE_VMSTATE, reg
+|.endmacro
+|
+|// Restore vmstate through register.
+|.macro restore_vmstate_through, reg
+|  mov reg, SAVE_VMSTATE
+|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg
+|.endmacro
+|.endif // WIN
+|
 |.macro fpop1; fstp st1; .endmacro
 |
 |// Synthesize SSE FP constants.
@@ -416,7 +433,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  jnz ->vm_returnp
   |
   |  // Return to C.
-  |  set_vmstate C
+  |  set_vmstate CFUNC
   |  and PC, -8
   |  sub PC, BASE
   |  neg PC				// Previous base = BASE - delta.
@@ -448,6 +465,10 @@ static void build_subroutines(BuildCtx *ctx)
   |  xor eax, eax			// Ok return status for vm_pcall.
   |
   |->vm_leave_unw:
+  |.if not WIN
+  |  // DISPATCH required to set properly.
+  |  restore_vmstate_through RAd
+  |.endif
   |  restoreregs
   |  ret
   |
@@ -493,7 +514,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L:DISPATCH, SAVE_L
   |  mov GL:RB, L:DISPATCH->glref
   |  mov GL:RB->cur_L, L:DISPATCH
-  |  mov dword GL:RB->vmstate, ~LJ_VMST_C
+  |  mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC
+  |  mov DISPATCH, L:DISPATCH->glref	// Setup pointer to dispatch table.
+  |  add DISPATCH, GG_G2DISP
   |  jmp ->vm_leave_unw
   |
   |->vm_unwind_rethrow:
@@ -521,7 +544,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov [BASE-16], RA			// Prepend false to error message.
   |  mov [BASE-8], RB
   |  mov RA, -16			// Results start at BASE+RA = BASE-16.
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or return to C
   |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
   |
   |//-----------------------------------------------------------------------
@@ -575,6 +598,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  lea KBASE, [esp+CFRAME_RESUME]
   |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  add DISPATCH, GG_G2DISP
+  |.if not WIN
+  |  save_vmstate_through TMPRd
+  |.endif
   |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
   |  mov SAVE_CFRAME, RD
   |  mov SAVE_NRES, RDd
@@ -585,7 +611,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  // Resume after yield (like a return).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
   |  mov byte L:RB->status, RDL
   |  mov BASE, L:RB->base
   |  mov RD, L:RB->top
@@ -622,11 +648,14 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_CFRAME, KBASE
   |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
   |  add DISPATCH, GG_G2DISP
+  |.if not WIN
+  |  save_vmstate_through RDd
+  |.endif
   |  mov L:RB->cframe, rsp
   |
   |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*
   |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
   |  add PC, RA
   |  sub PC, BASE			// PC = frame delta + frame type
@@ -658,6 +687,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_ERRF, 0			// No error function.
   |  mov SAVE_NRES, KBASEd		// Neg. delta means cframe w/o frame.
   |   add DISPATCH, GG_G2DISP
+  |.if not WIN
+  |  save_vmstate_through KBASEd
+  |.endif
   |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
   |
   |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.
@@ -697,6 +729,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  cleartp LFUNC:KBASE
   |  mov KBASE, LFUNC:KBASE->pc
   |  mov KBASE, [KBASE+PC2PROTO(k)]
+  |  set_vmstate LFUNC			// LFUNC after KBASE restoration
   |  // BASE = base, RC = result, RB = meta base
   |  jmp RA				// Jump to continuation.
   |
@@ -1137,15 +1170,16 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
+  |  set_vmstate FFUNC
   |.endmacro
   |
   |.macro .ffunc_1, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RDd, 1+1;  jb ->fff_fallback
   |.endmacro
   |
   |.macro .ffunc_2, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RDd, 2+1;  jb ->fff_fallback
   |.endmacro
   |
@@ -1578,7 +1612,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L:PC, TMP1
   |  mov BASE, L:RB->base
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
   |
   |  cmp eax, LUA_YIELD
   |  ja >8
@@ -1717,6 +1751,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx RAd, PC_RA
   |  neg RA
   |  lea BASE, [BASE+RA*8-16]		// base = base - (RA+2)*8
+  |  set_vmstate LFUNC			// LFUNC state after BASE restoration
   |  ins_next
   |
   |6:  // Fill up results with nil.
@@ -2481,7 +2516,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  mov L:RB->base, BASE
   |  mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
-  |  set_vmstate INTERP
+  |  set_vmstate LFUNC			// LFUNC after BASE & KBASE restoration
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  mov RCd, [PC]
   |  movzx RAd, RCH
@@ -2697,8 +2732,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov CARG1, CTSTATE
   |  call extern lj_ccallback_enter	// (CTState *cts, void *cf)
   |  // lua_State * returned in eax (RD).
-  |  set_vmstate INTERP
   |  mov BASE, L:RD->base
+  |  set_vmstate LFUNC			// LFUNC after BASE restoration
   |  mov RD, L:RD->top
   |  sub RD, BASE
   |  mov LFUNC:RB, [BASE-16]
@@ -3974,6 +4009,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
 
   case BC_CALL: case BC_CALLM:
     |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+    |  set_vmstate INTERP		// INTERP until a new BASE is setup
     if (op == BC_CALLM) {
       |  add NARGS:RDd, MULTRES
     }
@@ -3995,6 +4031,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov LFUNC:RB, [RA-16]
     |  checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
     |->BC_CALLT_Z:
+    |  set_vmstate INTERP		// INTERP until a new BASE is setup
     |  mov PC, [BASE-8]
     |  test PCd, FRAME_TYPE
     |  jnz >7
@@ -4219,6 +4256,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  shl RAd, 3
     }
     |1:
+    |  set_vmstate INTERP // INTERP until the old BASE & KBASE is restored
     |  mov PC, [BASE-8]
     |  mov MULTRES, RDd			// Save nresults+1.
     |  test PCd, FRAME_TYPE		// Check frame type marker.
@@ -4260,6 +4298,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  cleartp LFUNC:KBASE
     |  mov KBASE, LFUNC:KBASE->pc
     |  mov KBASE, [KBASE+PC2PROTO(k)]
+    |  set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored
     |  ins_next
     |
     |6:  // Fill up results with nil.
@@ -4551,6 +4590,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
     |  mov KBASE, [PC-4+PC2PROTO(k)]
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
     |  lea RA, [BASE+RA*8]		// Top of frame.
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_f
@@ -4588,6 +4628,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov [RD-8], RB			// Store delta + FRAME_VARG.
     |  mov [RD-16], LFUNC:KBASE		// Store copy of LFUNC.
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
     |  lea RA, [RD+RA*8]
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_v		// Need to grow stack.
@@ -4643,7 +4684,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  mov CARG1, L:RB		// Caveat: CARG1 may be RA.
     }
     |  ja ->vm_growstack_c		// Need to grow stack.
-    |  set_vmstate C
+    |  set_vmstate CFUNC		// CFUNC before entering C function
     if (op == BC_FUNCC) {
       |  call KBASE			// (lua_State *L)
     } else {
@@ -4653,7 +4694,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // nresults returned in eax (RD).
     |  mov BASE, L:RB->base
     |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-    |  set_vmstate INTERP
+    |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
     |  lea RA, [BASE+RD*8]
     |  neg RA
     |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index d76fbe3..939c43f 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -140,7 +140,7 @@
 |
 |.else
 |
-|.define CFRAME_SPACE,	aword*7			// Delta for esp (see <--).
+|.define CFRAME_SPACE,	dword*11			// Delta for esp (see <--).
 |.macro saveregs_
 |  push edi; push esi; push ebx
 |  sub esp, CFRAME_SPACE
@@ -183,25 +183,30 @@
 |.define ARG1,		aword [esp]		//<-- esp while in interpreter.
 |//----- 16 byte aligned, ^^^ arguments for C callee
 |.else
-|.define SAVE_ERRF,	aword [esp+aword*15]	// vm_pcall/vm_cpcall only.
-|.define SAVE_NRES,	aword [esp+aword*14]
-|.define SAVE_CFRAME,	aword [esp+aword*13]
-|.define SAVE_L,	aword [esp+aword*12]
+|.define SAVE_ERRF,	dword [esp+dword*19]	// vm_pcall/vm_cpcall only.
+|.define SAVE_NRES,	dword [esp+dword*18]
+|.define SAVE_CFRAME,	dword [esp+dword*17]
+|.define SAVE_L,	dword [esp+dword*16]
 |//----- 16 byte aligned, ^^^ arguments from C caller
-|.define SAVE_RET,	aword [esp+aword*11]	//<-- esp entering interpreter.
-|.define SAVE_R4,	aword [esp+aword*10]
-|.define SAVE_R3,	aword [esp+aword*9]
-|.define SAVE_R2,	aword [esp+aword*8]
+|.define SAVE_RET,	dword [esp+dword*15]	//<-- esp entering interpreter.
+|.define SAVE_R4,	dword [esp+dword*14]
+|.define SAVE_R3,	dword [esp+dword*13]
+|.define SAVE_R2,	dword [esp+dword*12]
 |//----- 16 byte aligned
-|.define SAVE_R1,	aword [esp+aword*7]	//<-- esp after register saves.
-|.define SAVE_PC,	aword [esp+aword*6]
-|.define TMP2,		aword [esp+aword*5]
-|.define TMP1,		aword [esp+aword*4]
+|.define SAVE_UNUSED3,	dword [esp+dword*11]
+|.define SAVE_UNUSED2,	dword [esp+dword*10]
+|.define SAVE_UNUSED1,	dword [esp+dword*9]
+|.define SAVE_VMSTATE,	dword [esp+dword*8]
 |//----- 16 byte aligned
-|.define ARG4,		aword [esp+aword*3]
-|.define ARG3,		aword [esp+aword*2]
-|.define ARG2,		aword [esp+aword*1]
-|.define ARG1,		aword [esp]		//<-- esp while in interpreter.
+|.define SAVE_R1,	dword [esp+dword*7]	//<-- esp after register saves.
+|.define SAVE_PC,	dword [esp+dword*6]
+|.define TMP2,		dword [esp+dword*5]
+|.define TMP1,		dword [esp+dword*4]
+|//----- 16 byte aligned
+|.define ARG4,		dword [esp+dword*3]
+|.define ARG3,		dword [esp+dword*2]
+|.define ARG2,		dword [esp+dword*1]
+|.define ARG1,		dword [esp]		//<-- esp while in interpreter.
 |//----- 16 byte aligned, ^^^ arguments for C callee
 |.endif
 |
@@ -269,7 +274,7 @@
 |//-----------------------------------------------------------------------
 |.else			// x64/POSIX stack layout
 |
-|.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
+|.define CFRAME_SPACE,	qword*7			// Delta for rsp (see <--).
 |.macro saveregs_
 |  push rbx; push r15; push r14
 |.if NO_UNWIND
@@ -290,33 +295,35 @@
 |
 |//----- 16 byte aligned,
 |.if NO_UNWIND
-|.define SAVE_RET,	aword [rsp+aword*11]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*10]
-|.define SAVE_R3,	aword [rsp+aword*9]
-|.define SAVE_R2,	aword [rsp+aword*8]
-|.define SAVE_R1,	aword [rsp+aword*7]
-|.define SAVE_RU2,	aword [rsp+aword*6]
-|.define SAVE_RU1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*13]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*12]
+|.define SAVE_R3,	qword [rsp+qword*11]
+|.define SAVE_R2,	qword [rsp+qword*10]
+|.define SAVE_R1,	qword [rsp+qword*9]
+|.define SAVE_RU2,	qword [rsp+qword*8]
+|.define SAVE_RU1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.else
-|.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*8]
-|.define SAVE_R3,	aword [rsp+aword*7]
-|.define SAVE_R2,	aword [rsp+aword*6]
-|.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*11]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*10]
+|.define SAVE_R3,	qword [rsp+qword*9]
+|.define SAVE_R2,	qword [rsp+qword*8]
+|.define SAVE_R1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.endif
-|.define SAVE_CFRAME,	aword [rsp+aword*4]
+|.define SAVE_CFRAME,	qword [rsp+qword*6]
+|.define SAVE_UNUSED1,	qword [rsp+qword*5]
+|.define SAVE_VMSTATE,	dword [rsp+dword*8]
 |.define SAVE_PC,	dword [rsp+dword*7]
 |.define SAVE_L,	dword [rsp+dword*6]
 |.define SAVE_ERRF,	dword [rsp+dword*5]
 |.define SAVE_NRES,	dword [rsp+dword*4]
-|.define TMPa,		aword [rsp+aword*1]
+|.define TMPa,		qword [rsp+qword*1]
 |.define TMP2,		dword [rsp+dword*1]
 |.define TMP1,		dword [rsp]		//<-- rsp while in interpreter.
 |//----- 16 byte aligned
 |
 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
 |.define TMPQ,		qword [rsp]
-|.define TMP3,		dword [rsp+aword*1]
+|.define TMP3,		dword [rsp+qword*1]
 |.define MULTRES,	TMP2
 |
 |.endif
@@ -433,6 +440,20 @@
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
 |.endmacro
 |
+|.if not WIN
+|// Save vmstate through register.
+|.macro save_vmstate_through, reg
+|  mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
+|  mov SAVE_VMSTATE, reg
+|.endmacro
+|
+|// Restore vmstate through register.
+|.macro restore_vmstate_through, reg
+|  mov reg, SAVE_VMSTATE
+|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg
+|.endmacro
+|.endif // WIN
+|
 |// x87 compares.
 |.macro fcomparepp			// Compare and pop st0 >< st1.
 |  fucomip st1
@@ -520,7 +541,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  jnz ->vm_returnp
   |
   |  // Return to C.
-  |  set_vmstate C
+  |  set_vmstate CFUNC
   |  and PC, -8
   |  sub PC, BASE
   |  neg PC				// Previous base = BASE - delta.
@@ -559,6 +580,10 @@ static void build_subroutines(BuildCtx *ctx)
   |  xor eax, eax			// Ok return status for vm_pcall.
   |
   |->vm_leave_unw:
+  |.if not WIN
+  |  // DISPATCH required to set properly.
+  |  restore_vmstate_through RA
+  |.endif
   |  restoreregs
   |  ret
   |
@@ -613,7 +638,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L:DISPATCH, SAVE_L
   |  mov GL:RB, L:DISPATCH->glref
   |  mov dword GL:RB->cur_L, L:DISPATCH
-  |  mov dword GL:RB->vmstate, ~LJ_VMST_C
+  |  mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC
+  |  mov DISPATCH, L:DISPATCH->glref	// Setup pointer to dispatch table.
+  |  add DISPATCH, GG_G2DISP
   |  jmp ->vm_leave_unw
   |
   |->vm_unwind_rethrow:
@@ -647,7 +674,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov PC, [BASE-4]			// Fetch PC of previous frame.
   |  mov dword [BASE-4], LJ_TFALSE	// Prepend false to error message.
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or return to C
   |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
   |
   |.if WIN and not X64
@@ -714,10 +741,13 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov RA, INARG_BASE			// Caveat: overlaps SAVE_CFRAME!
   |.endif
   |  mov PC, FRAME_CP
-  |  xor RD, RD
   |  lea KBASEa, [esp+CFRAME_RESUME]
   |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  add DISPATCH, GG_G2DISP
+  |.if not WIN
+  |  save_vmstate_through RD
+  |.endif
+  |  xor RD, RD
   |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
   |  mov SAVE_CFRAME, RDa
   |.if X64
@@ -730,7 +760,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  // Resume after yield (like a return).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
   |  mov byte L:RB->status, RDL
   |  mov BASE, L:RB->base
   |  mov RD, L:RB->top
@@ -774,6 +804,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_CFRAME, KBASEa
   |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
   |  add DISPATCH, GG_G2DISP
+  |.if not WIN
+  |  save_vmstate_through RD
+  |.endif
   |.if X64
   |  mov L:RB->cframe, rsp
   |.else
@@ -782,7 +815,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*
   |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
   |  add PC, RA
   |  sub PC, BASE			// PC = frame delta + frame type
@@ -823,6 +856,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_ERRF, 0			// No error function.
   |  mov SAVE_NRES, KBASE		// Neg. delta means cframe w/o frame.
   |   add DISPATCH, GG_G2DISP
+  |.if not WIN
+  |  save_vmstate_through KBASE
+  |.endif
   |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
   |
   |.if X64
@@ -885,6 +921,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, LFUNC:KBASE->pc
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  // BASE = base, RC = result, RB = meta base
+  |  set_vmstate LFUNC			// LFUNC after KBASE restoration
   |  jmp RAa				// Jump to continuation.
   |
   |.if FFI
@@ -1409,15 +1446,16 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
+  |  set_vmstate FFUNC
   |.endmacro
   |
   |.macro .ffunc_1, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RD, 1+1;  jb ->fff_fallback
   |.endmacro
   |
   |.macro .ffunc_2, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RD, 2+1;  jb ->fff_fallback
   |.endmacro
   |
@@ -1924,7 +1962,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.endif
   |  mov BASE, L:RB->base
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
   |
   |  cmp eax, LUA_YIELD
   |  ja >8
@@ -2089,6 +2127,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx RA, PC_RA
   |  not RAa				// Note: ~RA = -(RA+1)
   |  lea BASE, [BASE+RA*8]		// base = base - (RA+1)*8
+  |  set_vmstate LFUNC			// LFUNC state after BASE restoration
   |  ins_next
   |
   |6:  // Fill up results with nil.
@@ -2933,7 +2972,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  mov L:RB->base, BASE
   |  mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
-  |  set_vmstate INTERP
+  |  set_vmstate LFUNC			// LFUNC after BASE & KBASE restoration
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  mov RC, [PC]
   |  movzx RA, RCH
@@ -3203,8 +3242,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov FCARG1, CTSTATE
   |  call extern lj_ccallback_enter at 8	// (CTState *cts, void *cf)
   |  // lua_State * returned in eax (RD).
-  |  set_vmstate INTERP
   |  mov BASE, L:RD->base
+  |  set_vmstate LFUNC			// LFUNC after BASE restoration
   |  mov RD, L:RD->top
   |  sub RD, BASE
   |  mov LFUNC:RB, [BASE-8]
@@ -4683,6 +4722,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
 
   case BC_CALL: case BC_CALLM:
     |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+    |  set_vmstate INTERP		// INTERP until a new BASE is setup
     if (op == BC_CALLM) {
       |  add NARGS:RD, MULTRES
     }
@@ -4706,6 +4746,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  cmp dword [RA-4], LJ_TFUNC
     |  jne ->vmeta_call
     |->BC_CALLT_Z:
+    |  set_vmstate INTERP		// INTERP until a new BASE is setup
     |  mov PC, [BASE-4]
     |  test PC, FRAME_TYPE
     |  jnz >7
@@ -4989,6 +5030,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  shl RA, 3
     }
     |1:
+    |  set_vmstate INTERP // INTERP until the old BASE & KBASE is restored
     |  mov PC, [BASE-4]
     |  mov MULTRES, RD			// Save nresults+1.
     |  test PC, FRAME_TYPE		// Check frame type marker.
@@ -5043,6 +5085,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov LFUNC:KBASE, [BASE-8]
     |  mov KBASE, LFUNC:KBASE->pc
     |  mov KBASE, [KBASE+PC2PROTO(k)]
+    |  set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored
     |  ins_next
     |
     |6:  // Fill up results with nil.
@@ -5330,6 +5373,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
     |  mov KBASE, [PC-4+PC2PROTO(k)]
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
     |  lea RA, [BASE+RA*8]		// Top of frame.
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_f
@@ -5367,6 +5411,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov [RD-4], RB			// Store delta + FRAME_VARG.
     |  mov [RD-8], LFUNC:KBASE		// Store copy of LFUNC.
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
     |  lea RA, [RD+RA*8]
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_v		// Need to grow stack.
@@ -5431,7 +5476,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |.endif
     }
     |  ja ->vm_growstack_c		// Need to grow stack.
-    |  set_vmstate C
+    |  set_vmstate CFUNC		// CFUNC before entering C function
     if (op == BC_FUNCC) {
       |  call KBASEa			// (lua_State *L)
     } else {
@@ -5441,7 +5486,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // nresults returned in eax (RD).
     |  mov BASE, L:RB->base
     |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-    |  set_vmstate INTERP
+    |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
     |  lea RA, [BASE+RD*8]
     |  neg RA
     |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
-- 
2.28.0



More information about the Tarantool-patches mailing list