[Tarantool-patches] [PATCH luajit v3 3/7] vm: introduce VM states for Lua and fast functions

Sergey Kaplun skaplun at tarantool.org
Mon Dec 28 07:05:33 MSK 2020


This patch introduces LJ_VMST_LFUNC and LJ_VMST_FFUNC VM states
separated from LJ_VMST_INERP. New VM states allow to determine the
context of Lua VM execution for x86 and x64 arches. Also, LJ_VMST_C is
renamed to LJ_VMST_CFUNC for naming consistence with new VM states.

Also, this patch adjusts stack layout for x86 and x64 arches to save VM
state for its consistency while stack unwinding when error is raised.

To group all traces into the one vmstate, a special
macro LJ_VMST_TRACE equal to LJ_VMST__MAX is introduced.

Part of tarantool/tarantool#5442
---

Changes in v3:
 * Adjusted vmstate saving.
 * Fix Win X64.
 * Fix typos in frame layout.
 * Codestyle fixes.

 src/lj_frame.h     |  16 ++---
 src/lj_obj.h       |  11 +++-
 src/lj_profile.c   |   5 +-
 src/luajit-gdb.py  |  14 +++--
 src/vm_arm.dasc    |   6 +-
 src/vm_arm64.dasc  |   6 +-
 src/vm_mips.dasc   |   6 +-
 src/vm_mips64.dasc |   6 +-
 src/vm_ppc.dasc    |   6 +-
 src/vm_x64.dasc    |  93 +++++++++++++++++++++--------
 src/vm_x86.dasc    | 146 ++++++++++++++++++++++++++++++++-------------
 11 files changed, 218 insertions(+), 97 deletions(-)

diff --git a/src/lj_frame.h b/src/lj_frame.h
index 19c49a4..9fd63fa 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -127,13 +127,13 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #define CFRAME_SIZE		(16*4)
 #define CFRAME_SHIFT_MULTRES	0
 #else
-#define CFRAME_OFS_ERRF		(15*4)
-#define CFRAME_OFS_NRES		(14*4)
-#define CFRAME_OFS_PREV		(13*4)
-#define CFRAME_OFS_L		(12*4)
+#define CFRAME_OFS_ERRF		(19*4)
+#define CFRAME_OFS_NRES		(18*4)
+#define CFRAME_OFS_PREV		(17*4)
+#define CFRAME_OFS_L		(16*4)
 #define CFRAME_OFS_PC		(6*4)
 #define CFRAME_OFS_MULTRES	(5*4)
-#define CFRAME_SIZE		(12*4)
+#define CFRAME_SIZE		(16*4)
 #define CFRAME_SHIFT_MULTRES	0
 #endif
 #elif LJ_TARGET_X64
@@ -156,7 +156,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 9*16 + 4*8)
 #define CFRAME_SHIFT_MULTRES	0
 #else
-#define CFRAME_OFS_PREV		(4*8)
+#define CFRAME_OFS_PREV		(6*8)
 #if LJ_GC64
 #define CFRAME_OFS_PC		(3*8)
 #define CFRAME_OFS_L		(2*8)
@@ -171,9 +171,9 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #define CFRAME_OFS_MULTRES	(1*4)
 #endif
 #if LJ_NO_UNWIND
-#define CFRAME_SIZE		(12*8)
+#define CFRAME_SIZE		(14*8)
 #else
-#define CFRAME_SIZE		(10*8)
+#define CFRAME_SIZE		(12*8)
 #endif
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 16)
 #define CFRAME_SHIFT_MULTRES	0
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 927b347..1a0b1f6 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -512,7 +512,9 @@ typedef struct GCtab {
 /* VM states. */
 enum {
   LJ_VMST_INTERP,	/* Interpreter. */
-  LJ_VMST_C,		/* C function. */
+  LJ_VMST_LFUNC,	/* Lua function. */
+  LJ_VMST_FFUNC,	/* Fast function. */
+  LJ_VMST_CFUNC,	/* C function. */
   LJ_VMST_GC,		/* Garbage collector. */
   LJ_VMST_EXIT,		/* Trace exit handler. */
   LJ_VMST_RECORD,	/* Trace recorder. */
@@ -521,6 +523,13 @@ enum {
   LJ_VMST__MAX
 };
 
+/*
+** In fact, when VM executes a trace, vmstate is set to the trace number,
+** but we set the boundary to group all traces in a single pseudo-vmstate.
+*/
+
+#define LJ_VMST_TRACE		(LJ_VMST__MAX)
+
 #define setvmstate(g, st)	((g)->vmstate = ~LJ_VMST_##st)
 
 /* Metamethods. ORDER MM */
diff --git a/src/lj_profile.c b/src/lj_profile.c
index 116998e..7b09a63 100644
--- a/src/lj_profile.c
+++ b/src/lj_profile.c
@@ -157,7 +157,10 @@ static void profile_trigger(ProfileState *ps)
     int st = g->vmstate;
     ps->vmstate = st >= 0 ? 'N' :
 		  st == ~LJ_VMST_INTERP ? 'I' :
-		  st == ~LJ_VMST_C ? 'C' :
+		  /* Stubs for profiler hooks. */
+		  st == ~LJ_VMST_LFUNC ? 'I' :
+		  st == ~LJ_VMST_FFUNC ? 'I' :
+		  st == ~LJ_VMST_CFUNC ? 'C' :
 		  st == ~LJ_VMST_GC ? 'G' : 'J';
     g->hookmask = (mask | HOOK_PROFILE);
     lj_dispatch_update(g);
diff --git a/src/luajit-gdb.py b/src/luajit-gdb.py
index 652c560..f1fd623 100644
--- a/src/luajit-gdb.py
+++ b/src/luajit-gdb.py
@@ -206,12 +206,14 @@ def J(g):
 def vm_state(g):
     return {
         i2notu32(0): 'INTERP',
-        i2notu32(1): 'C',
-        i2notu32(2): 'GC',
-        i2notu32(3): 'EXIT',
-        i2notu32(4): 'RECORD',
-        i2notu32(5): 'OPT',
-        i2notu32(6): 'ASM',
+        i2notu32(1): 'LFUNC',
+        i2notu32(2): 'FFUNC',
+        i2notu32(3): 'CFUNC',
+        i2notu32(4): 'GC',
+        i2notu32(5): 'EXIT',
+        i2notu32(6): 'RECORD',
+        i2notu32(7): 'OPT',
+        i2notu32(8): 'ASM',
     }.get(int(tou32(g['vmstate'])), 'TRACE')
 
 def gc_state(g):
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index d4cdaf5..ae2efdf 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -287,7 +287,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  str RB, L->base
   |   ldr KBASE, SAVE_NRES
-  |    mv_vmstate CARG4, C
+  |    mv_vmstate CARG4, CFUNC
   |   sub BASE, BASE, #8
   |  subs CARG3, RC, #8
   |   lsl KBASE, KBASE, #3		// KBASE = (nresults_wanted+1)*8
@@ -348,7 +348,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  ldr L, SAVE_L
-  |   mv_vmstate CARG4, C
+  |   mv_vmstate CARG4, CFUNC
   |  ldr GL:CARG3, L->glref
   |   str CARG4, GL:CARG3->vmstate
   |   str L, GL:CARG3->cur_L
@@ -4487,7 +4487,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     if (op == BC_FUNCCW) {
       |  ldr CARG2, CFUNC:CARG3->f
     }
-    |    mv_vmstate CARG3, C
+    |    mv_vmstate CARG3, CFUNC
     |  mov CARG1, L
     |   bhi ->vm_growstack_c		// Need to grow stack.
     |    st_vmstate CARG3
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index 3eaf376..f783428 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -332,7 +332,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  str RB, L->base
   |   ldrsw CARG2, SAVE_NRES		// CARG2 = nresults+1.
-  |    mv_vmstate TMP0w, C
+  |    mv_vmstate TMP0w, CFUNC
   |   sub BASE, BASE, #16
   |  subs TMP2, RC, #8
   |    st_vmstate TMP0w
@@ -391,7 +391,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  ldr L, SAVE_L
-  |   mv_vmstate TMP0w, C
+  |   mv_vmstate TMP0w, CFUNC
   |  ldr GL, L->glref
   |   st_vmstate TMP0w
   |  b ->vm_leave_unw
@@ -3816,7 +3816,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     if (op == BC_FUNCCW) {
       |  ldr CARG2, CFUNC:CARG3->f
     }
-    |    mv_vmstate TMP0w, C
+    |    mv_vmstate TMP0w, CFUNC
     |  mov CARG1, L
     |   bhi ->vm_growstack_c		// Need to grow stack.
     |    st_vmstate TMP0w
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 1afd611..ec57d78 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -403,7 +403,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  addiu TMP1, RD, -8
   |   sw TMP2, L->base
-  |    li_vmstate C
+  |    li_vmstate CFUNC
   |   lw TMP2, SAVE_NRES
   |   addiu BASE, BASE, -8
   |    st_vmstate
@@ -473,7 +473,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  move CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  lw L, SAVE_L
-  |   li TMP0, ~LJ_VMST_C
+  |   li TMP0, ~LJ_VMST_CFUNC
   |  lw GL:TMP1, L->glref
   |  b ->vm_leave_unw
   |.  sw TMP0, GL:TMP1->vmstate
@@ -5085,7 +5085,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  sw BASE, L->base
     |  sltu AT, TMP2, TMP1
     |   sw RC, L->top
-    |    li_vmstate C
+    |    li_vmstate CFUNC
     if (op == BC_FUNCCW) {
       |  lw CARG2, CFUNC:RB->f
     }
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index c06270a..9a749f9 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -449,7 +449,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  addiu TMP1, RD, -8
   |   sd TMP2, L->base
-  |    li_vmstate C
+  |    li_vmstate CFUNC
   |   lw TMP2, SAVE_NRES
   |   daddiu BASE, BASE, -16
   |    st_vmstate
@@ -517,7 +517,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  move CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  ld L, SAVE_L
-  |   li TMP0, ~LJ_VMST_C
+  |   li TMP0, ~LJ_VMST_CFUNC
   |  ld GL:TMP1, L->glref
   |  b ->vm_leave_unw
   |.  sw TMP0, GL:TMP1->vmstate
@@ -4952,7 +4952,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  sd BASE, L->base
     |  sltu AT, TMP2, TMP1
     |   sd RC, L->top
-    |    li_vmstate C
+    |    li_vmstate CFUNC
     if (op == BC_FUNCCW) {
       |  ld CARG2, CFUNC:RB->f
     }
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index b4260eb..62e9b68 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -520,7 +520,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  // TMP0 = PC & FRAME_TYPE
   |  cmpwi TMP0, FRAME_C
   |   rlwinm TMP2, PC, 0, 0, 28
-  |    li_vmstate C
+  |    li_vmstate CFUNC
   |   sub TMP2, BASE, TMP2		// TMP2 = previous base.
   |  bney ->vm_returnp
   |
@@ -596,7 +596,7 @@ static void build_subroutines(BuildCtx *ctx)
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  lwz L, SAVE_L
   |  .toc ld TOCREG, SAVE_TOC
-  |   li TMP0, ~LJ_VMST_C
+  |   li TMP0, ~LJ_VMST_CFUNC
   |  lwz GL:TMP1, L->glref
   |   stw TMP0, GL:TMP1->vmstate
   |  b ->vm_leave_unw
@@ -5060,7 +5060,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   stp BASE, L->base
     |   cmplw TMP1, TMP2
     |    stp RC, L->top
-    |     li_vmstate C
+    |     li_vmstate CFUNC
     |.if TOC
     |  mtctr TMP3
     |.else
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 80753e0..974047d 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -140,7 +140,7 @@
 |//-----------------------------------------------------------------------
 |.else			// x64/POSIX stack layout
 |
-|.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
+|.define CFRAME_SPACE,	qword*7			// Delta for rsp (see <--).
 |.macro saveregs_
 |  push rbx; push r15; push r14
 |.if NO_UNWIND
@@ -161,26 +161,29 @@
 |
 |//----- 16 byte aligned,
 |.if NO_UNWIND
-|.define SAVE_RET,	aword [rsp+aword*11]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*10]
-|.define SAVE_R3,	aword [rsp+aword*9]
-|.define SAVE_R2,	aword [rsp+aword*8]
-|.define SAVE_R1,	aword [rsp+aword*7]
-|.define SAVE_RU2,	aword [rsp+aword*6]
-|.define SAVE_RU1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*13]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*12]
+|.define SAVE_R3,	qword [rsp+qword*11]
+|.define SAVE_R2,	qword [rsp+qword*10]
+|.define SAVE_R1,	qword [rsp+qword*9]
+|.define SAVE_RU2,	qword [rsp+qword*8]
+|.define SAVE_RU1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.else
-|.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*8]
-|.define SAVE_R3,	aword [rsp+aword*7]
-|.define SAVE_R2,	aword [rsp+aword*6]
-|.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*11]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*10]
+|.define SAVE_R3,	qword [rsp+qword*9]
+|.define SAVE_R2,	qword [rsp+qword*8]
+|.define SAVE_R1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.endif
-|.define SAVE_CFRAME,	aword [rsp+aword*4]
-|.define SAVE_PC,	aword [rsp+aword*3]
-|.define SAVE_L,	aword [rsp+aword*2]
+|.define SAVE_CFRAME,	qword [rsp+qword*6]
+|.define UNUSED2,	qword [rsp+qword*5]
+|.define UNUSED1,	dword [rsp+dword*9]
+|.define SAVE_VMSTATE,	dword [rsp+dword*8]
+|.define SAVE_PC,	qword [rsp+qword*3]
+|.define SAVE_L,	qword [rsp+qword*2]
 |.define SAVE_ERRF,	dword [rsp+dword*3]
 |.define SAVE_NRES,	dword [rsp+dword*2]
-|.define TMP1,		aword [rsp]		//<-- rsp while in interpreter.
+|.define TMP1,		qword [rsp]		//<-- rsp while in interpreter.
 |//----- 16 byte aligned
 |
 |.define TMP1d,		dword [rsp]
@@ -342,6 +345,22 @@
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
 |.endmacro
 |
+|// Uses TMPRd (r10d).
+|.macro save_vmstate
+|.if not WIN
+|  mov TMPRd, dword [DISPATCH+DISPATCH_GL(vmstate)]
+|  mov SAVE_VMSTATE, TMPRd
+|.endif // WIN
+|.endmacro
+|
+|// Uses r10d.
+|.macro restore_vmstate
+|.if not WIN
+|  mov TMPRd, SAVE_VMSTATE
+|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], TMPRd
+|.endif // WIN
+|.endmacro
+|
 |.macro fpop1; fstp st1; .endmacro
 |
 |// Synthesize SSE FP constants.
@@ -416,7 +435,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  jnz ->vm_returnp
   |
   |  // Return to C.
-  |  set_vmstate C
+  |  set_vmstate CFUNC
   |  and PC, -8
   |  sub PC, BASE
   |  neg PC				// Previous base = BASE - delta.
@@ -448,6 +467,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  xor eax, eax			// Ok return status for vm_pcall.
   |
   |->vm_leave_unw:
+  |  // DISPATCH required to set properly.
+  |  restore_vmstate			// Caveat: uses TMPRd (r10d).
   |  restoreregs
   |  ret
   |
@@ -493,7 +514,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L:DISPATCH, SAVE_L
   |  mov GL:RB, L:DISPATCH->glref
   |  mov GL:RB->cur_L, L:DISPATCH
-  |  mov dword GL:RB->vmstate, ~LJ_VMST_C
+  |  mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC
+  |  mov DISPATCH, L:DISPATCH->glref	// Setup pointer to dispatch table.
+  |  add DISPATCH, GG_G2DISP
   |  jmp ->vm_leave_unw
   |
   |->vm_unwind_rethrow:
@@ -521,6 +544,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov [BASE-16], RA			// Prepend false to error message.
   |  mov [BASE-8], RB
   |  mov RA, -16			// Results start at BASE+RA = BASE-16.
+  |  // INTERP until jump to BC_RET* or return to C.
   |  set_vmstate INTERP
   |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
   |
@@ -575,6 +599,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  lea KBASE, [esp+CFRAME_RESUME]
   |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  add DISPATCH, GG_G2DISP
+  |  save_vmstate			// Caveat: uses TMPRd (r10d).
   |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
   |  mov SAVE_CFRAME, RD
   |  mov SAVE_NRES, RDd
@@ -585,6 +610,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  // Resume after yield (like a return).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+  |  // INTERP until jump to BC_RET* or vm_return.
   |  set_vmstate INTERP
   |  mov byte L:RB->status, RDL
   |  mov BASE, L:RB->base
@@ -622,11 +648,12 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_CFRAME, KBASE
   |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
   |  add DISPATCH, GG_G2DISP
+  |  save_vmstate			// Caveat: uses TMPRd (r10d).
   |  mov L:RB->cframe, rsp
   |
   |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP			// INTERP until executing BC_IFUNC*.
   |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
   |  add PC, RA
   |  sub PC, BASE			// PC = frame delta + frame type
@@ -658,6 +685,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_ERRF, 0			// No error function.
   |  mov SAVE_NRES, KBASEd		// Neg. delta means cframe w/o frame.
   |   add DISPATCH, GG_G2DISP
+  |  save_vmstate			// Caveat: uses TMPRd (r10d).
   |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
   |
   |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.
@@ -697,6 +725,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  cleartp LFUNC:KBASE
   |  mov KBASE, LFUNC:KBASE->pc
   |  mov KBASE, [KBASE+PC2PROTO(k)]
+  |  set_vmstate LFUNC			// LFUNC after KBASE restoration.
   |  // BASE = base, RC = result, RB = meta base
   |  jmp RA				// Jump to continuation.
   |
@@ -1137,15 +1166,16 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
+  |  set_vmstate FFUNC
   |.endmacro
   |
   |.macro .ffunc_1, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RDd, 1+1;  jb ->fff_fallback
   |.endmacro
   |
   |.macro .ffunc_2, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RDd, 2+1;  jb ->fff_fallback
   |.endmacro
   |
@@ -1578,6 +1608,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L:PC, TMP1
   |  mov BASE, L:RB->base
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+  |  // INTERP until jump to BC_RET* or vm_return.
   |  set_vmstate INTERP
   |
   |  cmp eax, LUA_YIELD
@@ -1717,6 +1748,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx RAd, PC_RA
   |  neg RA
   |  lea BASE, [BASE+RA*8-16]		// base = base - (RA+2)*8
+  |  set_vmstate LFUNC			// LFUNC state after BASE restoration.
   |  ins_next
   |
   |6:  // Fill up results with nil.
@@ -2481,7 +2513,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  mov L:RB->base, BASE
   |  mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
-  |  set_vmstate INTERP
+  |  set_vmstate LFUNC			// LFUNC after BASE & KBASE restoration.
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  mov RCd, [PC]
   |  movzx RAd, RCH
@@ -2697,8 +2729,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov CARG1, CTSTATE
   |  call extern lj_ccallback_enter	// (CTState *cts, void *cf)
   |  // lua_State * returned in eax (RD).
-  |  set_vmstate INTERP
   |  mov BASE, L:RD->base
+  |  set_vmstate LFUNC			// LFUNC after BASE restoration.
   |  mov RD, L:RD->top
   |  sub RD, BASE
   |  mov LFUNC:RB, [BASE-16]
@@ -3974,6 +4006,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
 
   case BC_CALL: case BC_CALLM:
     |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+    |  // INTERP until enters *FUNC* bytecode and a new BASE is setup.
+    |  set_vmstate INTERP
     if (op == BC_CALLM) {
       |  add NARGS:RDd, MULTRES
     }
@@ -3995,6 +4029,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov LFUNC:RB, [RA-16]
     |  checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
     |->BC_CALLT_Z:
+    |  // INTERP until enters *FUNC* bytecode and a new BASE is setup.
+    |  set_vmstate INTERP
     |  mov PC, [BASE-8]
     |  test PCd, FRAME_TYPE
     |  jnz >7
@@ -4219,6 +4255,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  shl RAd, 3
     }
     |1:
+    |  // INTERP until the old BASE & KBASE is restored.
+    |  set_vmstate INTERP
     |  mov PC, [BASE-8]
     |  mov MULTRES, RDd			// Save nresults+1.
     |  test PCd, FRAME_TYPE		// Check frame type marker.
@@ -4260,6 +4298,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  cleartp LFUNC:KBASE
     |  mov KBASE, LFUNC:KBASE->pc
     |  mov KBASE, [KBASE+PC2PROTO(k)]
+    |  // LFUNC after the old BASE & KBASE is restored.
+    |  set_vmstate LFUNC
     |  ins_next
     |
     |6:  // Fill up results with nil.
@@ -4551,6 +4591,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
     |  mov KBASE, [PC-4+PC2PROTO(k)]
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration.
     |  lea RA, [BASE+RA*8]		// Top of frame.
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_f
@@ -4588,6 +4629,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov [RD-8], RB			// Store delta + FRAME_VARG.
     |  mov [RD-16], LFUNC:KBASE		// Store copy of LFUNC.
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration.
     |  lea RA, [RD+RA*8]
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_v		// Need to grow stack.
@@ -4643,7 +4685,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  mov CARG1, L:RB		// Caveat: CARG1 may be RA.
     }
     |  ja ->vm_growstack_c		// Need to grow stack.
-    |  set_vmstate C
+    |  set_vmstate CFUNC		// CFUNC before entering C function.
     if (op == BC_FUNCC) {
       |  call KBASE			// (lua_State *L)
     } else {
@@ -4653,6 +4695,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // nresults returned in eax (RD).
     |  mov BASE, L:RB->base
     |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+    |  // INTERP until jump to BC_RET* or vm_return.
     |  set_vmstate INTERP
     |  lea RA, [BASE+RD*8]
     |  neg RA
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index d76fbe3..ab8e6f2 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -99,6 +99,8 @@
 |.define CARG6d,	r9d
 |.define FCARG1,	CARG1d		// Simulate x86 fastcall.
 |.define FCARG2,	CARG2d
+|
+|.define XCHGd,		r11d		// TMP on x64, used for exchange.
 |.endif
 |
 |// Type definitions. Some of these are only used for documentation.
@@ -140,7 +142,7 @@
 |
 |.else
 |
-|.define CFRAME_SPACE,	aword*7			// Delta for esp (see <--).
+|.define CFRAME_SPACE,	dword*11			// Delta for esp (see <--).
 |.macro saveregs_
 |  push edi; push esi; push ebx
 |  sub esp, CFRAME_SPACE
@@ -183,25 +185,30 @@
 |.define ARG1,		aword [esp]		//<-- esp while in interpreter.
 |//----- 16 byte aligned, ^^^ arguments for C callee
 |.else
-|.define SAVE_ERRF,	aword [esp+aword*15]	// vm_pcall/vm_cpcall only.
-|.define SAVE_NRES,	aword [esp+aword*14]
-|.define SAVE_CFRAME,	aword [esp+aword*13]
-|.define SAVE_L,	aword [esp+aword*12]
+|.define SAVE_ERRF,	dword [esp+dword*19]	// vm_pcall/vm_cpcall only.
+|.define SAVE_NRES,	dword [esp+dword*18]
+|.define SAVE_CFRAME,	dword [esp+dword*17]
+|.define SAVE_L,	dword [esp+dword*16]
 |//----- 16 byte aligned, ^^^ arguments from C caller
-|.define SAVE_RET,	aword [esp+aword*11]	//<-- esp entering interpreter.
-|.define SAVE_R4,	aword [esp+aword*10]
-|.define SAVE_R3,	aword [esp+aword*9]
-|.define SAVE_R2,	aword [esp+aword*8]
+|.define SAVE_RET,	dword [esp+dword*15]	//<-- esp entering interpreter.
+|.define SAVE_R4,	dword [esp+dword*14]
+|.define SAVE_R3,	dword [esp+dword*13]
+|.define SAVE_R2,	dword [esp+dword*12]
 |//----- 16 byte aligned
-|.define SAVE_R1,	aword [esp+aword*7]	//<-- esp after register saves.
-|.define SAVE_PC,	aword [esp+aword*6]
-|.define TMP2,		aword [esp+aword*5]
-|.define TMP1,		aword [esp+aword*4]
+|.define UNUSED2,	dword [esp+dword*11]
+|.define UNUSED1,	dword [esp+dword*10]
+|.define SPILLECX,	dword [esp+dword*9]
+|.define SAVE_VMSTATE,	dword [esp+dword*8]
 |//----- 16 byte aligned
-|.define ARG4,		aword [esp+aword*3]
-|.define ARG3,		aword [esp+aword*2]
-|.define ARG2,		aword [esp+aword*1]
-|.define ARG1,		aword [esp]		//<-- esp while in interpreter.
+|.define SAVE_R1,	dword [esp+dword*7]	//<-- esp after register saves.
+|.define SAVE_PC,	dword [esp+dword*6]
+|.define TMP2,		dword [esp+dword*5]
+|.define TMP1,		dword [esp+dword*4]
+|//----- 16 byte aligned
+|.define ARG4,		dword [esp+dword*3]
+|.define ARG3,		dword [esp+dword*2]
+|.define ARG2,		dword [esp+dword*1]
+|.define ARG1,		dword [esp]		//<-- esp while in interpreter.
 |//----- 16 byte aligned, ^^^ arguments for C callee
 |.endif
 |
@@ -269,7 +276,7 @@
 |//-----------------------------------------------------------------------
 |.else			// x64/POSIX stack layout
 |
-|.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
+|.define CFRAME_SPACE,	qword*7			// Delta for rsp (see <--).
 |.macro saveregs_
 |  push rbx; push r15; push r14
 |.if NO_UNWIND
@@ -290,33 +297,36 @@
 |
 |//----- 16 byte aligned,
 |.if NO_UNWIND
-|.define SAVE_RET,	aword [rsp+aword*11]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*10]
-|.define SAVE_R3,	aword [rsp+aword*9]
-|.define SAVE_R2,	aword [rsp+aword*8]
-|.define SAVE_R1,	aword [rsp+aword*7]
-|.define SAVE_RU2,	aword [rsp+aword*6]
-|.define SAVE_RU1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*13]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*12]
+|.define SAVE_R3,	qword [rsp+qword*11]
+|.define SAVE_R2,	qword [rsp+qword*10]
+|.define SAVE_R1,	qword [rsp+qword*9]
+|.define SAVE_RU2,	qword [rsp+qword*8]
+|.define SAVE_RU1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.else
-|.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*8]
-|.define SAVE_R3,	aword [rsp+aword*7]
-|.define SAVE_R2,	aword [rsp+aword*6]
-|.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*11]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*10]
+|.define SAVE_R3,	qword [rsp+qword*9]
+|.define SAVE_R2,	qword [rsp+qword*8]
+|.define SAVE_R1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.endif
-|.define SAVE_CFRAME,	aword [rsp+aword*4]
+|.define SAVE_CFRAME,	qword [rsp+qword*6]
+|.define UNUSED2,	qword [rsp+qword*5]
+|.define UNUSED1,	dword [rsp+dword*9]
+|.define SAVE_VMSTATE,	dword [rsp+dword*8]
 |.define SAVE_PC,	dword [rsp+dword*7]
 |.define SAVE_L,	dword [rsp+dword*6]
 |.define SAVE_ERRF,	dword [rsp+dword*5]
 |.define SAVE_NRES,	dword [rsp+dword*4]
-|.define TMPa,		aword [rsp+aword*1]
+|.define TMPa,		qword [rsp+qword*1]
 |.define TMP2,		dword [rsp+dword*1]
 |.define TMP1,		dword [rsp]		//<-- rsp while in interpreter.
 |//----- 16 byte aligned
 |
 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
 |.define TMPQ,		qword [rsp]
-|.define TMP3,		dword [rsp+aword*1]
+|.define TMP3,		dword [rsp+qword*1]
 |.define MULTRES,	TMP2
 |
 |.endif
@@ -433,6 +443,36 @@
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
 |.endmacro
 |
+|// Uses spilled ecx on x86 or XCHGd (r11d) on x64.
+|.macro save_vmstate
+|.if not WIN
+|.if not X64
+|  mov SPILLECX, ecx
+|  mov ecx, dword [DISPATCH+DISPATCH_GL(vmstate)]
+|  mov SAVE_VMSTATE, ecx
+|  mov ecx, SPILLECX
+|.else // X64
+|  mov XCHGd, dword [DISPATCH+DISPATCH_GL(vmstate)]
+|  mov SAVE_VMSTATE, XCHGd
+|.endif // X64
+|.endif // WIN
+|.endmacro
+|
+|// Uses spilled ecx on x86 or XCHGd (r11d) on x64.
+|.macro restore_vmstate
+|.if not WIN
+|.if not X64
+|  mov SPILLECX, ecx
+|  mov ecx, SAVE_VMSTATE
+|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ecx
+|  mov ecx, SPILLECX
+|.else // X64
+|  mov XCHGd, SAVE_VMSTATE
+|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], XCHGd
+|.endif // X64
+|.endif // WIN
+|.endmacro
+|
 |// x87 compares.
 |.macro fcomparepp			// Compare and pop st0 >< st1.
 |  fucomip st1
@@ -520,7 +560,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  jnz ->vm_returnp
   |
   |  // Return to C.
-  |  set_vmstate C
+  |  set_vmstate CFUNC
   |  and PC, -8
   |  sub PC, BASE
   |  neg PC				// Previous base = BASE - delta.
@@ -559,6 +599,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  xor eax, eax			// Ok return status for vm_pcall.
   |
   |->vm_leave_unw:
+  |  // DISPATCH required to set properly.
+  |  restore_vmstate			// Caveat: on x64 uses XCHGd (r11d).
   |  restoreregs
   |  ret
   |
@@ -613,7 +655,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L:DISPATCH, SAVE_L
   |  mov GL:RB, L:DISPATCH->glref
   |  mov dword GL:RB->cur_L, L:DISPATCH
-  |  mov dword GL:RB->vmstate, ~LJ_VMST_C
+  |  mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC
+  |  mov DISPATCH, L:DISPATCH->glref	// Setup pointer to dispatch table.
+  |  add DISPATCH, GG_G2DISP
   |  jmp ->vm_leave_unw
   |
   |->vm_unwind_rethrow:
@@ -647,6 +691,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov PC, [BASE-4]			// Fetch PC of previous frame.
   |  mov dword [BASE-4], LJ_TFALSE	// Prepend false to error message.
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+  |  // INTERP until jump to BC_RET* or return to C.
   |  set_vmstate INTERP
   |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
   |
@@ -718,6 +763,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  lea KBASEa, [esp+CFRAME_RESUME]
   |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  add DISPATCH, GG_G2DISP
+  |  save_vmstate			// Caveat: on x64 uses XCHGd (r11d).
   |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
   |  mov SAVE_CFRAME, RDa
   |.if X64
@@ -730,6 +776,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  // Resume after yield (like a return).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+  |  // INTERP until jump to BC_RET* or vm_return.
   |  set_vmstate INTERP
   |  mov byte L:RB->status, RDL
   |  mov BASE, L:RB->base
@@ -774,6 +821,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_CFRAME, KBASEa
   |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
   |  add DISPATCH, GG_G2DISP
+  |  save_vmstate			// Caveat: on x64 uses XCHGd (r11d).
   |.if X64
   |  mov L:RB->cframe, rsp
   |.else
@@ -782,7 +830,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until executing BC_IFUNC*.
   |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
   |  add PC, RA
   |  sub PC, BASE			// PC = frame delta + frame type
@@ -823,6 +871,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_ERRF, 0			// No error function.
   |  mov SAVE_NRES, KBASE		// Neg. delta means cframe w/o frame.
   |   add DISPATCH, GG_G2DISP
+  |  save_vmstate			// Caveat: on x64 uses XCHGd (r11d).
   |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
   |
   |.if X64
@@ -885,6 +934,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, LFUNC:KBASE->pc
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  // BASE = base, RC = result, RB = meta base
+  |  set_vmstate LFUNC			// LFUNC after KBASE restoration.
   |  jmp RAa				// Jump to continuation.
   |
   |.if FFI
@@ -1409,15 +1459,16 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
+  |  set_vmstate FFUNC
   |.endmacro
   |
   |.macro .ffunc_1, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RD, 1+1;  jb ->fff_fallback
   |.endmacro
   |
   |.macro .ffunc_2, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RD, 2+1;  jb ->fff_fallback
   |.endmacro
   |
@@ -1924,6 +1975,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.endif
   |  mov BASE, L:RB->base
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+  |  // INTERP until jump to BC_RET* or vm_return.
   |  set_vmstate INTERP
   |
   |  cmp eax, LUA_YIELD
@@ -2089,6 +2141,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx RA, PC_RA
   |  not RAa				// Note: ~RA = -(RA+1)
   |  lea BASE, [BASE+RA*8]		// base = base - (RA+1)*8
+  |  set_vmstate LFUNC			// LFUNC state after BASE restoration.
   |  ins_next
   |
   |6:  // Fill up results with nil.
@@ -2933,7 +2986,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  mov L:RB->base, BASE
   |  mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
-  |  set_vmstate INTERP
+  |  set_vmstate LFUNC			// LFUNC after BASE & KBASE restoration.
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  mov RC, [PC]
   |  movzx RA, RCH
@@ -3203,8 +3256,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov FCARG1, CTSTATE
   |  call extern lj_ccallback_enter at 8	// (CTState *cts, void *cf)
   |  // lua_State * returned in eax (RD).
-  |  set_vmstate INTERP
   |  mov BASE, L:RD->base
+  |  set_vmstate LFUNC			// LFUNC after BASE restoration.
   |  mov RD, L:RD->top
   |  sub RD, BASE
   |  mov LFUNC:RB, [BASE-8]
@@ -4683,6 +4736,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
 
   case BC_CALL: case BC_CALLM:
     |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+    |  // INTERP until enters *FUNC* bytecode and a new BASE is setup.
+    |  set_vmstate INTERP
     if (op == BC_CALLM) {
       |  add NARGS:RD, MULTRES
     }
@@ -4706,6 +4761,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  cmp dword [RA-4], LJ_TFUNC
     |  jne ->vmeta_call
     |->BC_CALLT_Z:
+    |  // INTERP until enters *FUNC* bytecode and a new BASE is setup.
+    |  set_vmstate INTERP
     |  mov PC, [BASE-4]
     |  test PC, FRAME_TYPE
     |  jnz >7
@@ -4989,6 +5046,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  shl RA, 3
     }
     |1:
+    |  // INTERP until the old BASE & KBASE is restored.
+    |  set_vmstate INTERP
     |  mov PC, [BASE-4]
     |  mov MULTRES, RD			// Save nresults+1.
     |  test PC, FRAME_TYPE		// Check frame type marker.
@@ -5043,6 +5102,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov LFUNC:KBASE, [BASE-8]
     |  mov KBASE, LFUNC:KBASE->pc
     |  mov KBASE, [KBASE+PC2PROTO(k)]
+    |  // LFUNC after the old BASE & KBASE is restored.
+    |  set_vmstate LFUNC
     |  ins_next
     |
     |6:  // Fill up results with nil.
@@ -5330,6 +5391,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
     |  mov KBASE, [PC-4+PC2PROTO(k)]
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration.
     |  lea RA, [BASE+RA*8]		// Top of frame.
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_f
@@ -5367,6 +5429,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov [RD-4], RB			// Store delta + FRAME_VARG.
     |  mov [RD-8], LFUNC:KBASE		// Store copy of LFUNC.
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration.
     |  lea RA, [RD+RA*8]
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_v		// Need to grow stack.
@@ -5431,7 +5494,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |.endif
     }
     |  ja ->vm_growstack_c		// Need to grow stack.
-    |  set_vmstate C
+    |  set_vmstate CFUNC		// CFUNC before entering C function.
     if (op == BC_FUNCC) {
       |  call KBASEa			// (lua_State *L)
     } else {
@@ -5441,6 +5504,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // nresults returned in eax (RD).
     |  mov BASE, L:RB->base
     |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+    |  // INTERP until jump to BC_RET* or vm_return.
     |  set_vmstate INTERP
     |  lea RA, [BASE+RD*8]
     |  neg RA
-- 
2.28.0



More information about the Tarantool-patches mailing list