[Tarantool-patches] [PATCH luajit v2 3/7] vm: introduce VM states for Lua and fast functions

Sergey Kaplun skaplun at tarantool.org
Fri Dec 25 18:26:05 MSK 2020


This patch introduces LJ_VMST_LFUNC and LJ_VMST_FFUNC VM states
separated from LJ_VMST_INERP. New VM states allow to determine the
context of Lua VM execution for x86 and x64 arches. Also, LJ_VMST_C is
renamed to LJ_VMST_CFUNC for naming consistence with new VM states.

Also, this patch adjusts stack layout for x86 and x64 arches to save VM
state for its consistency while stack unwinding when error is raised.

Part of tarantool/tarantool#5442
---

Changes in v2:
 - Moved `.if not WIN` macro check inside (save|restore)_vmstate_through
 - Fixed naming: SAVE_UNUSED\d -> UNUSED\d

 src/lj_frame.h     |  18 +++----
 src/lj_obj.h       |   4 +-
 src/lj_profile.c   |   5 +-
 src/luajit-gdb.py  |  14 ++---
 src/vm_arm.dasc    |   6 +--
 src/vm_arm64.dasc  |   6 +--
 src/vm_mips.dasc   |   6 +--
 src/vm_mips64.dasc |   6 +--
 src/vm_ppc.dasc    |   6 +--
 src/vm_x64.dasc    |  93 ++++++++++++++++++++++----------
 src/vm_x86.dasc    | 131 +++++++++++++++++++++++++++++----------------
 11 files changed, 188 insertions(+), 107 deletions(-)

diff --git a/src/lj_frame.h b/src/lj_frame.h
index 19c49a4..2e693f9 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -127,13 +127,13 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #define CFRAME_SIZE		(16*4)
 #define CFRAME_SHIFT_MULTRES	0
 #else
-#define CFRAME_OFS_ERRF		(15*4)
-#define CFRAME_OFS_NRES		(14*4)
-#define CFRAME_OFS_PREV		(13*4)
-#define CFRAME_OFS_L		(12*4)
+#define CFRAME_OFS_ERRF		(19*4)
+#define CFRAME_OFS_NRES		(18*4)
+#define CFRAME_OFS_PREV		(17*4)
+#define CFRAME_OFS_L		(16*4)
 #define CFRAME_OFS_PC		(6*4)
 #define CFRAME_OFS_MULTRES	(5*4)
-#define CFRAME_SIZE		(12*4)
+#define CFRAME_SIZE		(16*4)
 #define CFRAME_SHIFT_MULTRES	0
 #endif
 #elif LJ_TARGET_X64
@@ -152,11 +152,11 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #define CFRAME_OFS_NRES		(22*4)
 #define CFRAME_OFS_MULTRES	(21*4)
 #endif
-#define CFRAME_SIZE		(10*8)
+#define CFRAME_SIZE		(12*8)
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 9*16 + 4*8)
 #define CFRAME_SHIFT_MULTRES	0
 #else
-#define CFRAME_OFS_PREV		(4*8)
+#define CFRAME_OFS_PREV		(6*8)
 #if LJ_GC64
 #define CFRAME_OFS_PC		(3*8)
 #define CFRAME_OFS_L		(2*8)
@@ -171,9 +171,9 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #define CFRAME_OFS_MULTRES	(1*4)
 #endif
 #if LJ_NO_UNWIND
-#define CFRAME_SIZE		(12*8)
+#define CFRAME_SIZE		(14*8)
 #else
-#define CFRAME_SIZE		(10*8)
+#define CFRAME_SIZE		(12*8)
 #endif
 #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 16)
 #define CFRAME_SHIFT_MULTRES	0
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 927b347..7fb715e 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -512,7 +512,9 @@ typedef struct GCtab {
 /* VM states. */
 enum {
   LJ_VMST_INTERP,	/* Interpreter. */
-  LJ_VMST_C,		/* C function. */
+  LJ_VMST_LFUNC,	/* Lua function. */
+  LJ_VMST_FFUNC,	/* Fast function. */
+  LJ_VMST_CFUNC,	/* C function. */
   LJ_VMST_GC,		/* Garbage collector. */
   LJ_VMST_EXIT,		/* Trace exit handler. */
   LJ_VMST_RECORD,	/* Trace recorder. */
diff --git a/src/lj_profile.c b/src/lj_profile.c
index 116998e..637e03c 100644
--- a/src/lj_profile.c
+++ b/src/lj_profile.c
@@ -157,7 +157,10 @@ static void profile_trigger(ProfileState *ps)
     int st = g->vmstate;
     ps->vmstate = st >= 0 ? 'N' :
 		  st == ~LJ_VMST_INTERP ? 'I' :
-		  st == ~LJ_VMST_C ? 'C' :
+		  st == ~LJ_VMST_CFUNC ? 'C' :
+		  /* Stubs for profiler hooks. */
+		  st == ~LJ_VMST_FFUNC ? 'I' :
+		  st == ~LJ_VMST_LFUNC ? 'I' :
 		  st == ~LJ_VMST_GC ? 'G' : 'J';
     g->hookmask = (mask | HOOK_PROFILE);
     lj_dispatch_update(g);
diff --git a/src/luajit-gdb.py b/src/luajit-gdb.py
index 652c560..f1fd623 100644
--- a/src/luajit-gdb.py
+++ b/src/luajit-gdb.py
@@ -206,12 +206,14 @@ def J(g):
 def vm_state(g):
     return {
         i2notu32(0): 'INTERP',
-        i2notu32(1): 'C',
-        i2notu32(2): 'GC',
-        i2notu32(3): 'EXIT',
-        i2notu32(4): 'RECORD',
-        i2notu32(5): 'OPT',
-        i2notu32(6): 'ASM',
+        i2notu32(1): 'LFUNC',
+        i2notu32(2): 'FFUNC',
+        i2notu32(3): 'CFUNC',
+        i2notu32(4): 'GC',
+        i2notu32(5): 'EXIT',
+        i2notu32(6): 'RECORD',
+        i2notu32(7): 'OPT',
+        i2notu32(8): 'ASM',
     }.get(int(tou32(g['vmstate'])), 'TRACE')
 
 def gc_state(g):
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index d4cdaf5..ae2efdf 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -287,7 +287,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  str RB, L->base
   |   ldr KBASE, SAVE_NRES
-  |    mv_vmstate CARG4, C
+  |    mv_vmstate CARG4, CFUNC
   |   sub BASE, BASE, #8
   |  subs CARG3, RC, #8
   |   lsl KBASE, KBASE, #3		// KBASE = (nresults_wanted+1)*8
@@ -348,7 +348,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  ldr L, SAVE_L
-  |   mv_vmstate CARG4, C
+  |   mv_vmstate CARG4, CFUNC
   |  ldr GL:CARG3, L->glref
   |   str CARG4, GL:CARG3->vmstate
   |   str L, GL:CARG3->cur_L
@@ -4487,7 +4487,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     if (op == BC_FUNCCW) {
       |  ldr CARG2, CFUNC:CARG3->f
     }
-    |    mv_vmstate CARG3, C
+    |    mv_vmstate CARG3, CFUNC
     |  mov CARG1, L
     |   bhi ->vm_growstack_c		// Need to grow stack.
     |    st_vmstate CARG3
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index 3eaf376..f783428 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -332,7 +332,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  str RB, L->base
   |   ldrsw CARG2, SAVE_NRES		// CARG2 = nresults+1.
-  |    mv_vmstate TMP0w, C
+  |    mv_vmstate TMP0w, CFUNC
   |   sub BASE, BASE, #16
   |  subs TMP2, RC, #8
   |    st_vmstate TMP0w
@@ -391,7 +391,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  ldr L, SAVE_L
-  |   mv_vmstate TMP0w, C
+  |   mv_vmstate TMP0w, CFUNC
   |  ldr GL, L->glref
   |   st_vmstate TMP0w
   |  b ->vm_leave_unw
@@ -3816,7 +3816,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     if (op == BC_FUNCCW) {
       |  ldr CARG2, CFUNC:CARG3->f
     }
-    |    mv_vmstate TMP0w, C
+    |    mv_vmstate TMP0w, CFUNC
     |  mov CARG1, L
     |   bhi ->vm_growstack_c		// Need to grow stack.
     |    st_vmstate TMP0w
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 1afd611..ec57d78 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -403,7 +403,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  addiu TMP1, RD, -8
   |   sw TMP2, L->base
-  |    li_vmstate C
+  |    li_vmstate CFUNC
   |   lw TMP2, SAVE_NRES
   |   addiu BASE, BASE, -8
   |    st_vmstate
@@ -473,7 +473,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  move CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  lw L, SAVE_L
-  |   li TMP0, ~LJ_VMST_C
+  |   li TMP0, ~LJ_VMST_CFUNC
   |  lw GL:TMP1, L->glref
   |  b ->vm_leave_unw
   |.  sw TMP0, GL:TMP1->vmstate
@@ -5085,7 +5085,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  sw BASE, L->base
     |  sltu AT, TMP2, TMP1
     |   sw RC, L->top
-    |    li_vmstate C
+    |    li_vmstate CFUNC
     if (op == BC_FUNCCW) {
       |  lw CARG2, CFUNC:RB->f
     }
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index c06270a..9a749f9 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -449,7 +449,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  addiu TMP1, RD, -8
   |   sd TMP2, L->base
-  |    li_vmstate C
+  |    li_vmstate CFUNC
   |   lw TMP2, SAVE_NRES
   |   daddiu BASE, BASE, -16
   |    st_vmstate
@@ -517,7 +517,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  move CRET1, CARG2
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  ld L, SAVE_L
-  |   li TMP0, ~LJ_VMST_C
+  |   li TMP0, ~LJ_VMST_CFUNC
   |  ld GL:TMP1, L->glref
   |  b ->vm_leave_unw
   |.  sw TMP0, GL:TMP1->vmstate
@@ -4952,7 +4952,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  sd BASE, L->base
     |  sltu AT, TMP2, TMP1
     |   sd RC, L->top
-    |    li_vmstate C
+    |    li_vmstate CFUNC
     if (op == BC_FUNCCW) {
       |  ld CARG2, CFUNC:RB->f
     }
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index b4260eb..62e9b68 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -520,7 +520,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  // TMP0 = PC & FRAME_TYPE
   |  cmpwi TMP0, FRAME_C
   |   rlwinm TMP2, PC, 0, 0, 28
-  |    li_vmstate C
+  |    li_vmstate CFUNC
   |   sub TMP2, BASE, TMP2		// TMP2 = previous base.
   |  bney ->vm_returnp
   |
@@ -596,7 +596,7 @@ static void build_subroutines(BuildCtx *ctx)
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  lwz L, SAVE_L
   |  .toc ld TOCREG, SAVE_TOC
-  |   li TMP0, ~LJ_VMST_C
+  |   li TMP0, ~LJ_VMST_CFUNC
   |  lwz GL:TMP1, L->glref
   |   stw TMP0, GL:TMP1->vmstate
   |  b ->vm_leave_unw
@@ -5060,7 +5060,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   stp BASE, L->base
     |   cmplw TMP1, TMP2
     |    stp RC, L->top
-    |     li_vmstate C
+    |     li_vmstate CFUNC
     |.if TOC
     |  mtctr TMP3
     |.else
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 80753e0..83cc3e1 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -140,7 +140,7 @@
 |//-----------------------------------------------------------------------
 |.else			// x64/POSIX stack layout
 |
-|.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
+|.define CFRAME_SPACE,	qword*7			// Delta for rsp (see <--).
 |.macro saveregs_
 |  push rbx; push r15; push r14
 |.if NO_UNWIND
@@ -161,26 +161,29 @@
 |
 |//----- 16 byte aligned,
 |.if NO_UNWIND
-|.define SAVE_RET,	aword [rsp+aword*11]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*10]
-|.define SAVE_R3,	aword [rsp+aword*9]
-|.define SAVE_R2,	aword [rsp+aword*8]
-|.define SAVE_R1,	aword [rsp+aword*7]
-|.define SAVE_RU2,	aword [rsp+aword*6]
-|.define SAVE_RU1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*13]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*12]
+|.define SAVE_R3,	qword [rsp+qword*11]
+|.define SAVE_R2,	qword [rsp+qword*10]
+|.define SAVE_R1,	qword [rsp+qword*9]
+|.define SAVE_RU2,	qword [rsp+qword*8]
+|.define SAVE_RU1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.else
-|.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*8]
-|.define SAVE_R3,	aword [rsp+aword*7]
-|.define SAVE_R2,	aword [rsp+aword*6]
-|.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*11]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*10]
+|.define SAVE_R3,	qword [rsp+qword*9]
+|.define SAVE_R2,	qword [rsp+qword*8]
+|.define SAVE_R1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.endif
-|.define SAVE_CFRAME,	aword [rsp+aword*4]
-|.define SAVE_PC,	aword [rsp+aword*3]
-|.define SAVE_L,	aword [rsp+aword*2]
+|.define SAVE_CFRAME,	qword [rsp+qword*6]
+|.define UNUSED2,	qword [rsp+qword*5]
+|.define UNUSED1,	dword [rsp+dword*8]
+|.define SAVE_VMSTATE,	dword [rsp+dword*8]
+|.define SAVE_PC,	qword [rsp+qword*3]
+|.define SAVE_L,	qword [rsp+qword*2]
 |.define SAVE_ERRF,	dword [rsp+dword*3]
 |.define SAVE_NRES,	dword [rsp+dword*2]
-|.define TMP1,		aword [rsp]		//<-- rsp while in interpreter.
+|.define TMP1,		qword [rsp]		//<-- rsp while in interpreter.
 |//----- 16 byte aligned
 |
 |.define TMP1d,		dword [rsp]
@@ -342,6 +345,22 @@
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
 |.endmacro
 |
+|// Save vmstate through register.
+|.macro save_vmstate_through, reg
+|.if not WIN
+|  mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
+|  mov SAVE_VMSTATE, reg
+|.endif // WIN
+|.endmacro
+|
+|// Restore vmstate through register.
+|.macro restore_vmstate_through, reg
+|.if not WIN
+|  mov reg, SAVE_VMSTATE
+|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg
+|.endif // WIN
+|.endmacro
+|
 |.macro fpop1; fstp st1; .endmacro
 |
 |// Synthesize SSE FP constants.
@@ -416,7 +435,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  jnz ->vm_returnp
   |
   |  // Return to C.
-  |  set_vmstate C
+  |  set_vmstate CFUNC
   |  and PC, -8
   |  sub PC, BASE
   |  neg PC				// Previous base = BASE - delta.
@@ -448,6 +467,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  xor eax, eax			// Ok return status for vm_pcall.
   |
   |->vm_leave_unw:
+  |  // DISPATCH required to set properly.
+  |  restore_vmstate_through RAd
   |  restoreregs
   |  ret
   |
@@ -493,7 +514,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L:DISPATCH, SAVE_L
   |  mov GL:RB, L:DISPATCH->glref
   |  mov GL:RB->cur_L, L:DISPATCH
-  |  mov dword GL:RB->vmstate, ~LJ_VMST_C
+  |  mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC
+  |  mov DISPATCH, L:DISPATCH->glref	// Setup pointer to dispatch table.
+  |  add DISPATCH, GG_G2DISP
   |  jmp ->vm_leave_unw
   |
   |->vm_unwind_rethrow:
@@ -521,7 +544,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov [BASE-16], RA			// Prepend false to error message.
   |  mov [BASE-8], RB
   |  mov RA, -16			// Results start at BASE+RA = BASE-16.
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or return to C
   |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
   |
   |//-----------------------------------------------------------------------
@@ -575,6 +598,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  lea KBASE, [esp+CFRAME_RESUME]
   |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  add DISPATCH, GG_G2DISP
+  |  save_vmstate_through TMPRd
   |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
   |  mov SAVE_CFRAME, RD
   |  mov SAVE_NRES, RDd
@@ -585,7 +609,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  // Resume after yield (like a return).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
   |  mov byte L:RB->status, RDL
   |  mov BASE, L:RB->base
   |  mov RD, L:RB->top
@@ -622,11 +646,12 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_CFRAME, KBASE
   |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
   |  add DISPATCH, GG_G2DISP
+  |  save_vmstate_through RDd
   |  mov L:RB->cframe, rsp
   |
   |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*
   |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
   |  add PC, RA
   |  sub PC, BASE			// PC = frame delta + frame type
@@ -658,6 +683,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_ERRF, 0			// No error function.
   |  mov SAVE_NRES, KBASEd		// Neg. delta means cframe w/o frame.
   |   add DISPATCH, GG_G2DISP
+  |  save_vmstate_through KBASEd
   |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
   |
   |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.
@@ -697,6 +723,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  cleartp LFUNC:KBASE
   |  mov KBASE, LFUNC:KBASE->pc
   |  mov KBASE, [KBASE+PC2PROTO(k)]
+  |  set_vmstate LFUNC			// LFUNC after KBASE restoration
   |  // BASE = base, RC = result, RB = meta base
   |  jmp RA				// Jump to continuation.
   |
@@ -1137,15 +1164,16 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
+  |  set_vmstate FFUNC
   |.endmacro
   |
   |.macro .ffunc_1, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RDd, 1+1;  jb ->fff_fallback
   |.endmacro
   |
   |.macro .ffunc_2, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RDd, 2+1;  jb ->fff_fallback
   |.endmacro
   |
@@ -1578,7 +1606,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L:PC, TMP1
   |  mov BASE, L:RB->base
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
   |
   |  cmp eax, LUA_YIELD
   |  ja >8
@@ -1717,6 +1745,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx RAd, PC_RA
   |  neg RA
   |  lea BASE, [BASE+RA*8-16]		// base = base - (RA+2)*8
+  |  set_vmstate LFUNC			// LFUNC state after BASE restoration
   |  ins_next
   |
   |6:  // Fill up results with nil.
@@ -2481,7 +2510,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  mov L:RB->base, BASE
   |  mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
-  |  set_vmstate INTERP
+  |  set_vmstate LFUNC			// LFUNC after BASE & KBASE restoration
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  mov RCd, [PC]
   |  movzx RAd, RCH
@@ -2697,8 +2726,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov CARG1, CTSTATE
   |  call extern lj_ccallback_enter	// (CTState *cts, void *cf)
   |  // lua_State * returned in eax (RD).
-  |  set_vmstate INTERP
   |  mov BASE, L:RD->base
+  |  set_vmstate LFUNC			// LFUNC after BASE restoration
   |  mov RD, L:RD->top
   |  sub RD, BASE
   |  mov LFUNC:RB, [BASE-16]
@@ -3974,6 +4003,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
 
   case BC_CALL: case BC_CALLM:
     |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+    |  set_vmstate INTERP		// INTERP until a new BASE is setup
     if (op == BC_CALLM) {
       |  add NARGS:RDd, MULTRES
     }
@@ -3995,6 +4025,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov LFUNC:RB, [RA-16]
     |  checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
     |->BC_CALLT_Z:
+    |  set_vmstate INTERP		// INTERP until a new BASE is setup
     |  mov PC, [BASE-8]
     |  test PCd, FRAME_TYPE
     |  jnz >7
@@ -4219,6 +4250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  shl RAd, 3
     }
     |1:
+    |  set_vmstate INTERP // INTERP until the old BASE & KBASE is restored
     |  mov PC, [BASE-8]
     |  mov MULTRES, RDd			// Save nresults+1.
     |  test PCd, FRAME_TYPE		// Check frame type marker.
@@ -4260,6 +4292,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  cleartp LFUNC:KBASE
     |  mov KBASE, LFUNC:KBASE->pc
     |  mov KBASE, [KBASE+PC2PROTO(k)]
+    |  set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored
     |  ins_next
     |
     |6:  // Fill up results with nil.
@@ -4551,6 +4584,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
     |  mov KBASE, [PC-4+PC2PROTO(k)]
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
     |  lea RA, [BASE+RA*8]		// Top of frame.
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_f
@@ -4588,6 +4622,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov [RD-8], RB			// Store delta + FRAME_VARG.
     |  mov [RD-16], LFUNC:KBASE		// Store copy of LFUNC.
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
     |  lea RA, [RD+RA*8]
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_v		// Need to grow stack.
@@ -4643,7 +4678,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  mov CARG1, L:RB		// Caveat: CARG1 may be RA.
     }
     |  ja ->vm_growstack_c		// Need to grow stack.
-    |  set_vmstate C
+    |  set_vmstate CFUNC		// CFUNC before entering C function
     if (op == BC_FUNCC) {
       |  call KBASE			// (lua_State *L)
     } else {
@@ -4653,7 +4688,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // nresults returned in eax (RD).
     |  mov BASE, L:RB->base
     |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-    |  set_vmstate INTERP
+    |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
     |  lea RA, [BASE+RD*8]
     |  neg RA
     |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index d76fbe3..b9dffa9 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -140,7 +140,7 @@
 |
 |.else
 |
-|.define CFRAME_SPACE,	aword*7			// Delta for esp (see <--).
+|.define CFRAME_SPACE,	dword*11			// Delta for esp (see <--).
 |.macro saveregs_
 |  push edi; push esi; push ebx
 |  sub esp, CFRAME_SPACE
@@ -183,25 +183,30 @@
 |.define ARG1,		aword [esp]		//<-- esp while in interpreter.
 |//----- 16 byte aligned, ^^^ arguments for C callee
 |.else
-|.define SAVE_ERRF,	aword [esp+aword*15]	// vm_pcall/vm_cpcall only.
-|.define SAVE_NRES,	aword [esp+aword*14]
-|.define SAVE_CFRAME,	aword [esp+aword*13]
-|.define SAVE_L,	aword [esp+aword*12]
+|.define SAVE_ERRF,	dword [esp+dword*19]	// vm_pcall/vm_cpcall only.
+|.define SAVE_NRES,	dword [esp+dword*18]
+|.define SAVE_CFRAME,	dword [esp+dword*17]
+|.define SAVE_L,	dword [esp+dword*16]
 |//----- 16 byte aligned, ^^^ arguments from C caller
-|.define SAVE_RET,	aword [esp+aword*11]	//<-- esp entering interpreter.
-|.define SAVE_R4,	aword [esp+aword*10]
-|.define SAVE_R3,	aword [esp+aword*9]
-|.define SAVE_R2,	aword [esp+aword*8]
+|.define SAVE_RET,	dword [esp+dword*15]	//<-- esp entering interpreter.
+|.define SAVE_R4,	dword [esp+dword*14]
+|.define SAVE_R3,	dword [esp+dword*13]
+|.define SAVE_R2,	dword [esp+dword*12]
 |//----- 16 byte aligned
-|.define SAVE_R1,	aword [esp+aword*7]	//<-- esp after register saves.
-|.define SAVE_PC,	aword [esp+aword*6]
-|.define TMP2,		aword [esp+aword*5]
-|.define TMP1,		aword [esp+aword*4]
+|.define UNUSED3,	dword [esp+dword*11]
+|.define UNUSED2,	dword [esp+dword*10]
+|.define UNUSED1,	dword [esp+dword*9]
+|.define SAVE_VMSTATE,	dword [esp+dword*8]
 |//----- 16 byte aligned
-|.define ARG4,		aword [esp+aword*3]
-|.define ARG3,		aword [esp+aword*2]
-|.define ARG2,		aword [esp+aword*1]
-|.define ARG1,		aword [esp]		//<-- esp while in interpreter.
+|.define SAVE_R1,	dword [esp+dword*7]	//<-- esp after register saves.
+|.define SAVE_PC,	dword [esp+dword*6]
+|.define TMP2,		dword [esp+dword*5]
+|.define TMP1,		dword [esp+dword*4]
+|//----- 16 byte aligned
+|.define ARG4,		dword [esp+dword*3]
+|.define ARG3,		dword [esp+dword*2]
+|.define ARG2,		dword [esp+dword*1]
+|.define ARG1,		dword [esp]		//<-- esp while in interpreter.
 |//----- 16 byte aligned, ^^^ arguments for C callee
 |.endif
 |
@@ -269,7 +274,7 @@
 |//-----------------------------------------------------------------------
 |.else			// x64/POSIX stack layout
 |
-|.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
+|.define CFRAME_SPACE,	qword*7			// Delta for rsp (see <--).
 |.macro saveregs_
 |  push rbx; push r15; push r14
 |.if NO_UNWIND
@@ -290,33 +295,35 @@
 |
 |//----- 16 byte aligned,
 |.if NO_UNWIND
-|.define SAVE_RET,	aword [rsp+aword*11]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*10]
-|.define SAVE_R3,	aword [rsp+aword*9]
-|.define SAVE_R2,	aword [rsp+aword*8]
-|.define SAVE_R1,	aword [rsp+aword*7]
-|.define SAVE_RU2,	aword [rsp+aword*6]
-|.define SAVE_RU1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*13]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*12]
+|.define SAVE_R3,	qword [rsp+qword*11]
+|.define SAVE_R2,	qword [rsp+qword*10]
+|.define SAVE_R1,	qword [rsp+qword*9]
+|.define SAVE_RU2,	qword [rsp+qword*8]
+|.define SAVE_RU1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.else
-|.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
-|.define SAVE_R4,	aword [rsp+aword*8]
-|.define SAVE_R3,	aword [rsp+aword*7]
-|.define SAVE_R2,	aword [rsp+aword*6]
-|.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
+|.define SAVE_RET,	qword [rsp+qword*11]	//<-- rsp entering interpreter.
+|.define SAVE_R4,	qword [rsp+qword*10]
+|.define SAVE_R3,	qword [rsp+qword*9]
+|.define SAVE_R2,	qword [rsp+qword*8]
+|.define SAVE_R1,	qword [rsp+qword*7]	//<-- rsp after register saves.
 |.endif
-|.define SAVE_CFRAME,	aword [rsp+aword*4]
+|.define SAVE_CFRAME,	qword [rsp+qword*6]
+|.define UNUSED1,	qword [rsp+qword*5]
+|.define SAVE_VMSTATE,	dword [rsp+dword*8]
 |.define SAVE_PC,	dword [rsp+dword*7]
 |.define SAVE_L,	dword [rsp+dword*6]
 |.define SAVE_ERRF,	dword [rsp+dword*5]
 |.define SAVE_NRES,	dword [rsp+dword*4]
-|.define TMPa,		aword [rsp+aword*1]
+|.define TMPa,		qword [rsp+qword*1]
 |.define TMP2,		dword [rsp+dword*1]
 |.define TMP1,		dword [rsp]		//<-- rsp while in interpreter.
 |//----- 16 byte aligned
 |
 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
 |.define TMPQ,		qword [rsp]
-|.define TMP3,		dword [rsp+aword*1]
+|.define TMP3,		dword [rsp+qword*1]
 |.define MULTRES,	TMP2
 |
 |.endif
@@ -433,6 +440,22 @@
 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
 |.endmacro
 |
+|// Save vmstate through register.
+|.macro save_vmstate_through, reg
+|.if not WIN
+|  mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
+|  mov SAVE_VMSTATE, reg
+|.endif // WIN
+|.endmacro
+|
+|// Restore vmstate through register.
+|.macro restore_vmstate_through, reg
+|.if not WIN
+|  mov reg, SAVE_VMSTATE
+|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], reg
+|.endif // WIN
+|.endmacro
+|
 |// x87 compares.
 |.macro fcomparepp			// Compare and pop st0 >< st1.
 |  fucomip st1
@@ -520,7 +543,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  jnz ->vm_returnp
   |
   |  // Return to C.
-  |  set_vmstate C
+  |  set_vmstate CFUNC
   |  and PC, -8
   |  sub PC, BASE
   |  neg PC				// Previous base = BASE - delta.
@@ -559,6 +582,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  xor eax, eax			// Ok return status for vm_pcall.
   |
   |->vm_leave_unw:
+  |  // DISPATCH required to set properly.
+  |  restore_vmstate_through RA
   |  restoreregs
   |  ret
   |
@@ -613,7 +638,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov L:DISPATCH, SAVE_L
   |  mov GL:RB, L:DISPATCH->glref
   |  mov dword GL:RB->cur_L, L:DISPATCH
-  |  mov dword GL:RB->vmstate, ~LJ_VMST_C
+  |  mov dword GL:RB->vmstate, ~LJ_VMST_CFUNC
+  |  mov DISPATCH, L:DISPATCH->glref	// Setup pointer to dispatch table.
+  |  add DISPATCH, GG_G2DISP
   |  jmp ->vm_leave_unw
   |
   |->vm_unwind_rethrow:
@@ -647,7 +674,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov PC, [BASE-4]			// Fetch PC of previous frame.
   |  mov dword [BASE-4], LJ_TFALSE	// Prepend false to error message.
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or return to C
   |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
   |
   |.if WIN and not X64
@@ -714,10 +741,11 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov RA, INARG_BASE			// Caveat: overlaps SAVE_CFRAME!
   |.endif
   |  mov PC, FRAME_CP
-  |  xor RD, RD
   |  lea KBASEa, [esp+CFRAME_RESUME]
   |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
   |  add DISPATCH, GG_G2DISP
+  |  save_vmstate_through RD
+  |  xor RD, RD
   |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
   |  mov SAVE_CFRAME, RDa
   |.if X64
@@ -730,7 +758,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  // Resume after yield (like a return).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
   |  mov byte L:RB->status, RDL
   |  mov BASE, L:RB->base
   |  mov RD, L:RB->top
@@ -774,6 +802,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_CFRAME, KBASEa
   |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
   |  add DISPATCH, GG_G2DISP
+  |  save_vmstate_through RD
   |.if X64
   |  mov L:RB->cframe, rsp
   |.else
@@ -782,7 +811,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*
   |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
   |  add PC, RA
   |  sub PC, BASE			// PC = frame delta + frame type
@@ -823,6 +852,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov SAVE_ERRF, 0			// No error function.
   |  mov SAVE_NRES, KBASE		// Neg. delta means cframe w/o frame.
   |   add DISPATCH, GG_G2DISP
+  |  save_vmstate_through KBASE
   |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
   |
   |.if X64
@@ -885,6 +915,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, LFUNC:KBASE->pc
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  // BASE = base, RC = result, RB = meta base
+  |  set_vmstate LFUNC			// LFUNC after KBASE restoration
   |  jmp RAa				// Jump to continuation.
   |
   |.if FFI
@@ -1409,15 +1440,16 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
+  |  set_vmstate FFUNC
   |.endmacro
   |
   |.macro .ffunc_1, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RD, 1+1;  jb ->fff_fallback
   |.endmacro
   |
   |.macro .ffunc_2, name
-  |->ff_ .. name:
+  |  .ffunc name
   |  cmp NARGS:RD, 2+1;  jb ->fff_fallback
   |.endmacro
   |
@@ -1924,7 +1956,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.endif
   |  mov BASE, L:RB->base
   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-  |  set_vmstate INTERP
+  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
   |
   |  cmp eax, LUA_YIELD
   |  ja >8
@@ -2089,6 +2121,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  movzx RA, PC_RA
   |  not RAa				// Note: ~RA = -(RA+1)
   |  lea BASE, [BASE+RA*8]		// base = base - (RA+1)*8
+  |  set_vmstate LFUNC			// LFUNC state after BASE restoration
   |  ins_next
   |
   |6:  // Fill up results with nil.
@@ -2933,7 +2966,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  mov L:RB->base, BASE
   |  mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
-  |  set_vmstate INTERP
+  |  set_vmstate LFUNC			// LFUNC after BASE & KBASE restoration
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  mov RC, [PC]
   |  movzx RA, RCH
@@ -3203,8 +3236,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov FCARG1, CTSTATE
   |  call extern lj_ccallback_enter at 8	// (CTState *cts, void *cf)
   |  // lua_State * returned in eax (RD).
-  |  set_vmstate INTERP
   |  mov BASE, L:RD->base
+  |  set_vmstate LFUNC			// LFUNC after BASE restoration
   |  mov RD, L:RD->top
   |  sub RD, BASE
   |  mov LFUNC:RB, [BASE-8]
@@ -4683,6 +4716,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
 
   case BC_CALL: case BC_CALLM:
     |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+    |  set_vmstate INTERP		// INTERP until a new BASE is setup
     if (op == BC_CALLM) {
       |  add NARGS:RD, MULTRES
     }
@@ -4706,6 +4740,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  cmp dword [RA-4], LJ_TFUNC
     |  jne ->vmeta_call
     |->BC_CALLT_Z:
+    |  set_vmstate INTERP		// INTERP until a new BASE is setup
     |  mov PC, [BASE-4]
     |  test PC, FRAME_TYPE
     |  jnz >7
@@ -4989,6 +5024,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |  shl RA, 3
     }
     |1:
+    |  set_vmstate INTERP // INTERP until the old BASE & KBASE is restored
     |  mov PC, [BASE-4]
     |  mov MULTRES, RD			// Save nresults+1.
     |  test PC, FRAME_TYPE		// Check frame type marker.
@@ -5043,6 +5079,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov LFUNC:KBASE, [BASE-8]
     |  mov KBASE, LFUNC:KBASE->pc
     |  mov KBASE, [KBASE+PC2PROTO(k)]
+    |  set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored
     |  ins_next
     |
     |6:  // Fill up results with nil.
@@ -5330,6 +5367,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
     |  mov KBASE, [PC-4+PC2PROTO(k)]
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
     |  lea RA, [BASE+RA*8]		// Top of frame.
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_f
@@ -5367,6 +5405,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  mov [RD-4], RB			// Store delta + FRAME_VARG.
     |  mov [RD-8], LFUNC:KBASE		// Store copy of LFUNC.
     |  mov L:RB, SAVE_L
+    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
     |  lea RA, [RD+RA*8]
     |  cmp RA, L:RB->maxstack
     |  ja ->vm_growstack_v		// Need to grow stack.
@@ -5431,7 +5470,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
       |.endif
     }
     |  ja ->vm_growstack_c		// Need to grow stack.
-    |  set_vmstate C
+    |  set_vmstate CFUNC		// CFUNC before entering C function
     if (op == BC_FUNCC) {
       |  call KBASEa			// (lua_State *L)
     } else {
@@ -5441,7 +5480,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |  // nresults returned in eax (RD).
     |  mov BASE, L:RB->base
     |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-    |  set_vmstate INTERP
+    |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
     |  lea RA, [BASE+RD*8]
     |  neg RA
     |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
-- 
2.28.0



More information about the Tarantool-patches mailing list