[Tarantool-patches] [PATCH luajit v2 3/7] vm: introduce VM states for Lua and fast functions

Igor Munkin imun at tarantool.org
Mon Dec 28 02:48:26 MSK 2020


Sergey,

Thanks for the patch! Please consider the comments below.

On 25.12.20, Sergey Kaplun wrote:
> This patch introduces LJ_VMST_LFUNC and LJ_VMST_FFUNC VM states
> separated from LJ_VMST_INERP. New VM states allow to determine the
> context of Lua VM execution for x86 and x64 arches. Also, LJ_VMST_C is
> renamed to LJ_VMST_CFUNC for naming consistence with new VM states.
> 
> Also, this patch adjusts stack layout for x86 and x64 arches to save VM
> state for its consistency while stack unwinding when error is raised.
> 
> Part of tarantool/tarantool#5442
> ---
> 
> Changes in v2:
>  - Moved `.if not WIN` macro check inside (save|restore)_vmstate_through
>  - Fixed naming: SAVE_UNUSED\d -> UNUSED\d
> 
>  src/lj_frame.h     |  18 +++----
>  src/lj_obj.h       |   4 +-
>  src/lj_profile.c   |   5 +-
>  src/luajit-gdb.py  |  14 ++---
>  src/vm_arm.dasc    |   6 +--
>  src/vm_arm64.dasc  |   6 +--
>  src/vm_mips.dasc   |   6 +--
>  src/vm_mips64.dasc |   6 +--
>  src/vm_ppc.dasc    |   6 +--
>  src/vm_x64.dasc    |  93 ++++++++++++++++++++++----------
>  src/vm_x86.dasc    | 131 +++++++++++++++++++++++++++++----------------
>  11 files changed, 188 insertions(+), 107 deletions(-)
> 
> diff --git a/src/lj_frame.h b/src/lj_frame.h
> index 19c49a4..2e693f9 100644
> --- a/src/lj_frame.h
> +++ b/src/lj_frame.h

<snipped>

> @@ -152,11 +152,11 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
>  #define CFRAME_OFS_NRES		(22*4)
>  #define CFRAME_OFS_MULTRES	(21*4)
>  #endif
> -#define CFRAME_SIZE		(10*8)
> +#define CFRAME_SIZE		(12*8)

This change looks Windows-related, so is excess.

>  #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 9*16 + 4*8)

<snipped>

> diff --git a/src/lj_profile.c b/src/lj_profile.c
> index 116998e..637e03c 100644
> --- a/src/lj_profile.c
> +++ b/src/lj_profile.c
> @@ -157,7 +157,10 @@ static void profile_trigger(ProfileState *ps)
>      int st = g->vmstate;
>      ps->vmstate = st >= 0 ? 'N' :
>  		  st == ~LJ_VMST_INTERP ? 'I' :
> -		  st == ~LJ_VMST_C ? 'C' :
> +		  st == ~LJ_VMST_CFUNC ? 'C' :
> +		  /* Stubs for profiler hooks. */
> +		  st == ~LJ_VMST_FFUNC ? 'I' :
> +		  st == ~LJ_VMST_LFUNC ? 'I' :

Minor: Move this comparisons above to save the ordering from <lj_obj.h>.

>  		  st == ~LJ_VMST_GC ? 'G' : 'J';
>      g->hookmask = (mask | HOOK_PROFILE);
>      lj_dispatch_update(g);

<snipped>

> diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
> index 80753e0..83cc3e1 100644
> --- a/src/vm_x64.dasc
> +++ b/src/vm_x64.dasc

<snipped>

> @@ -161,26 +161,29 @@

<snipped>

> +|.define SAVE_CFRAME,	qword [rsp+qword*6]
> +|.define UNUSED2,	qword [rsp+qword*5]
> +|.define UNUSED1,	dword [rsp+dword*8]

UNUSED1 should alias <dword [rsp + dword*9]>

Minor: What about UNUSEDd and UNUSEDq instead? Feel free to ignore.

> +|.define SAVE_VMSTATE,	dword [rsp+dword*8]

<snipped>

> @@ -342,6 +345,22 @@
>  |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
>  |.endmacro
>  |
> +|// Save vmstate through register.
> +|.macro save_vmstate_through, reg
> +|.if not WIN
> +|  mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
> +|  mov SAVE_VMSTATE, reg

This is too complicated. VM is implemented as a file with 6kloc of
pseudo-assembly in it and I don't want to make its support harder. We've
recently found a bug with miscomparison that we coudn't catch for a
year. This is not a fun. This is not a kinda sport. This is damn 6k kloc
of assembly we ought to support. So I propose to make this macro work
with no args at all. Consider the following:
|.macro save_vmstate
|.if not WIN
|  mov TMPRd, dword [DISPATCH+DISPATCH_GL(vmstate)]	// TMPRd is r10d
|  mov SAVE_VMSTATE, TMPRd
|.endif // WIN
|
|.macro restore_vmstate
|.if not WIN
|  mov TMPRd, SAVE_VMSTATE	// XCHGd is r11d
|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], TMPRd
|.endif // WIN

I agree that TMPRd is used in other places within vm_x64.dasc but this
is temporary register and I failed to find any conflict. I left the
comments below to ease the check.

> +|.endif // WIN
> +|.endmacro

<snipped>

> @@ -448,6 +467,8 @@ static void build_subroutines(BuildCtx *ctx)
>    |  xor eax, eax			// Ok return status for vm_pcall.
>    |
>    |->vm_leave_unw:
> +  |  // DISPATCH required to set properly.
> +  |  restore_vmstate_through RAd

Re TMPR: The TMP register is not used, so there is no need to restore
vmstate via RAd. Everything is OK.

>    |  restoreregs
>    |  ret
>    |

<snipped>

> @@ -521,7 +544,7 @@ static void build_subroutines(BuildCtx *ctx)
>    |  mov [BASE-16], RA			// Prepend false to error message.
>    |  mov [BASE-8], RB
>    |  mov RA, -16			// Results start at BASE+RA = BASE-16.
> -  |  set_vmstate INTERP
> +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or return to C

Typo: Please, adjust the comments considering the ones nearby.

>    |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
>    |
>    |//-----------------------------------------------------------------------
> @@ -575,6 +598,7 @@ static void build_subroutines(BuildCtx *ctx)
>    |  lea KBASE, [esp+CFRAME_RESUME]
>    |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
>    |  add DISPATCH, GG_G2DISP
> +  |  save_vmstate_through TMPRd

Re TMPR: It is manually used here. Everything is OK.

>    |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
>    |  mov SAVE_CFRAME, RD
>    |  mov SAVE_NRES, RDd
> @@ -585,7 +609,7 @@ static void build_subroutines(BuildCtx *ctx)
>    |
>    |  // Resume after yield (like a return).
>    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> -  |  set_vmstate INTERP
> +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return

Typo: Please, adjust the comments considering the ones nearby.

>    |  mov byte L:RB->status, RDL
>    |  mov BASE, L:RB->base
>    |  mov RD, L:RB->top
> @@ -622,11 +646,12 @@ static void build_subroutines(BuildCtx *ctx)
>    |  mov SAVE_CFRAME, KBASE
>    |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
>    |  add DISPATCH, GG_G2DISP
> +  |  save_vmstate_through RDd

Re TMPR: The temporary register is not used prior to this line.
Everything is OK.

>    |  mov L:RB->cframe, rsp
>    |
>    |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
>    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> -  |  set_vmstate INTERP
> +  |  set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*

This branch is also taken for lj_vm_pcall/lj_vm_call/lj_vm_cpcall.

Typo: Please, adjust the comments considering the ones nearby.

>    |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
>    |  add PC, RA
>    |  sub PC, BASE			// PC = frame delta + frame type
> @@ -658,6 +683,7 @@ static void build_subroutines(BuildCtx *ctx)
>    |  mov SAVE_ERRF, 0			// No error function.
>    |  mov SAVE_NRES, KBASEd		// Neg. delta means cframe w/o frame.
>    |   add DISPATCH, GG_G2DISP
> +  |  save_vmstate_through KBASEd

Re TMPR: The temporary register is not used prior to this line.
Everything is OK.

>    |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
>    |
>    |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.

<snipped>

> @@ -1578,7 +1606,7 @@ static void build_subroutines(BuildCtx *ctx)
>    |  mov L:PC, TMP1
>    |  mov BASE, L:RB->base
>    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> -  |  set_vmstate INTERP
> +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return

Typo: Please, adjust the comments considering the ones nearby.

>    |
>    |  cmp eax, LUA_YIELD
>    |  ja >8

<snipped>

> @@ -3974,6 +4003,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>  
>    case BC_CALL: case BC_CALLM:
>      |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
> +    |  set_vmstate INTERP		// INTERP until a new BASE is setup

It looks like VM state is INTERP until the call enters *FUNC* bytecode.

>      if (op == BC_CALLM) {
>        |  add NARGS:RDd, MULTRES
>      }
> @@ -3995,6 +4025,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>      |  mov LFUNC:RB, [RA-16]
>      |  checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
>      |->BC_CALLT_Z:
> +    |  set_vmstate INTERP		// INTERP until a new BASE is setup

Ditto.

>      |  mov PC, [BASE-8]
>      |  test PCd, FRAME_TYPE
>      |  jnz >7
> @@ -4219,6 +4250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>        |  shl RAd, 3
>      }
>      |1:
> +    |  set_vmstate INTERP // INTERP until the old BASE & KBASE is restored

Typo: Please, adjust the comments considering the ones nearby.

>      |  mov PC, [BASE-8]
>      |  mov MULTRES, RDd			// Save nresults+1.
>      |  test PCd, FRAME_TYPE		// Check frame type marker.

<snipped>

> @@ -4551,6 +4584,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>      |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
>      |  mov KBASE, [PC-4+PC2PROTO(k)]
>      |  mov L:RB, SAVE_L
> +    |  set_vmstate LFUNC		// LFUNC after KBASE restoration

This is OK, but this path is also taken for the JFUNC. Do we need to set
another VM state for this case (in scope of JLOOP)?

>      |  lea RA, [BASE+RA*8]		// Top of frame.
>      |  cmp RA, L:RB->maxstack
>      |  ja ->vm_growstack_f
> @@ -4588,6 +4622,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>      |  mov [RD-8], RB			// Store delta + FRAME_VARG.
>      |  mov [RD-16], LFUNC:KBASE		// Store copy of LFUNC.
>      |  mov L:RB, SAVE_L
> +    |  set_vmstate LFUNC		// LFUNC after KBASE restoration

Ditto.

>      |  lea RA, [RD+RA*8]
>      |  cmp RA, L:RB->maxstack
>      |  ja ->vm_growstack_v		// Need to grow stack.

<snipped>

> @@ -4653,7 +4688,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>      |  // nresults returned in eax (RD).
>      |  mov BASE, L:RB->base
>      |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> -    |  set_vmstate INTERP
> +    |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return

Typo: Please, adjust the comments considering the ones nearby.

>      |  lea RA, [BASE+RD*8]
>      |  neg RA
>      |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
> diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
> index d76fbe3..b9dffa9 100644
> --- a/src/vm_x86.dasc
> +++ b/src/vm_x86.dasc

<snipped>

> @@ -290,33 +295,35 @@

<snipped>

> +|.define SAVE_CFRAME,	qword [rsp+qword*6]
> +|.define UNUSED1,	qword [rsp+qword*5]

There is lost unused dword right here.

> +|.define SAVE_VMSTATE,	dword [rsp+dword*8]

<snipped>

> @@ -433,6 +440,22 @@
>  |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
>  |.endmacro
>  |
> +|// Save vmstate through register.
> +|.macro save_vmstate_through, reg
> +|.if not WIN
> +|  mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
> +|  mov SAVE_VMSTATE, reg

See the rationale in the note to vm_x64.dasc. Unfortunately, for
vm_x86.dasc the macro has two branches, *but* I think it is much better
than managing free registers in *whole* VM. Consider the following:
|.macro save_vmstate
|.if not WIN
|.if not X64
|  mov UNUSED1, ecx	// Please rename this field (e.g SAVE_XCHG).
|  mov ecx, dword [DISPATCH+DISPATCH_GL(vmstate)]
|  mov SAVE_VMSTATE, ecx
|  mov ecx, UNUSED1
|.else // X64
|  mov XCHGd, dword [DISPATCH+DISPATCH_GL(vmstate)]	// XCHGd is r11d
|  mov SAVE_VMSTATE, XCHGd
|.endif // X64
|.endif // WIN
|
|.macro restore_vmstate
|.if not WIN
|.if not X64
|  mov UNUSED1, ecx	// Please rename this field (e.g SPILLECX).
|  mov ecx, SAVE_VMSTATE
|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ecx
|  mov ecx, UNUSED1
|.else // X64
|  mov XCHGd, SAVE_VMSTATE	// XCHGd is r11d
|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], XCHGd
|.endif // X64
|.endif // WIN

> +|.endif // WIN
> +|.endmacro

<snipped>

>    |->vm_unwind_rethrow:
> @@ -647,7 +674,7 @@ static void build_subroutines(BuildCtx *ctx)
>    |  mov PC, [BASE-4]			// Fetch PC of previous frame.
>    |  mov dword [BASE-4], LJ_TFALSE	// Prepend false to error message.
>    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> -  |  set_vmstate INTERP
> +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or return to C

Typo: Please, adjust the comments considering the ones nearby.

>    |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
>    |
>    |.if WIN and not X64

<snipped>

> @@ -730,7 +758,7 @@ static void build_subroutines(BuildCtx *ctx)
>    |
>    |  // Resume after yield (like a return).
>    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> -  |  set_vmstate INTERP
> +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return

Typo: Please, adjust the comments considering the ones nearby.

>    |  mov byte L:RB->status, RDL
>    |  mov BASE, L:RB->base
>    |  mov RD, L:RB->top

<snipped>

> @@ -782,7 +811,7 @@ static void build_subroutines(BuildCtx *ctx)
>    |
>    |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
>    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> -  |  set_vmstate INTERP
> +  |  set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*

Typo: Please, adjust the comments considering the ones nearby.

>    |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
>    |  add PC, RA
>    |  sub PC, BASE			// PC = frame delta + frame type

<snipped>

> @@ -1924,7 +1956,7 @@ static void build_subroutines(BuildCtx *ctx)
>    |.endif
>    |  mov BASE, L:RB->base
>    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> -  |  set_vmstate INTERP
> +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return

Typo: Please, adjust the comments considering the ones nearby.

>    |
>    |  cmp eax, LUA_YIELD
>    |  ja >8

<snipped>

> @@ -4683,6 +4716,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>  
>    case BC_CALL: case BC_CALLM:
>      |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
> +    |  set_vmstate INTERP		// INTERP until a new BASE is setup

It looks like VM state is INTERP until the call enters *FUNC* bytecode.

>      if (op == BC_CALLM) {
>        |  add NARGS:RD, MULTRES
>      }
> @@ -4706,6 +4740,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>      |  cmp dword [RA-4], LJ_TFUNC
>      |  jne ->vmeta_call
>      |->BC_CALLT_Z:
> +    |  set_vmstate INTERP		// INTERP until a new BASE is setup

Ditto.

>      |  mov PC, [BASE-4]
>      |  test PC, FRAME_TYPE
>      |  jnz >7
> @@ -4989,6 +5024,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>        |  shl RA, 3
>      }
>      |1:
> +    |  set_vmstate INTERP // INTERP until the old BASE & KBASE is restored

Typo: Please, adjust the comments considering the ones nearby.

>      |  mov PC, [BASE-4]
>      |  mov MULTRES, RD			// Save nresults+1.
>      |  test PC, FRAME_TYPE		// Check frame type marker.
> @@ -5043,6 +5079,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>      |  mov LFUNC:KBASE, [BASE-8]
>      |  mov KBASE, LFUNC:KBASE->pc
>      |  mov KBASE, [KBASE+PC2PROTO(k)]
> +    |  set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored

Typo: Please, adjust the comments considering the ones nearby.

>      |  ins_next
>      |
>      |6:  // Fill up results with nil.
> @@ -5330,6 +5367,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>      |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
>      |  mov KBASE, [PC-4+PC2PROTO(k)]
>      |  mov L:RB, SAVE_L
> +    |  set_vmstate LFUNC		// LFUNC after KBASE restoration

This is OK, but this path is also taken for the JFUNC. Do we need to set
another VM state for this case (in scope of JLOOP)?

>      |  lea RA, [BASE+RA*8]		// Top of frame.
>      |  cmp RA, L:RB->maxstack
>      |  ja ->vm_growstack_f
> @@ -5367,6 +5405,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>      |  mov [RD-4], RB			// Store delta + FRAME_VARG.
>      |  mov [RD-8], LFUNC:KBASE		// Store copy of LFUNC.
>      |  mov L:RB, SAVE_L
> +    |  set_vmstate LFUNC		// LFUNC after KBASE restoration

Ditto.

>      |  lea RA, [RD+RA*8]
>      |  cmp RA, L:RB->maxstack
>      |  ja ->vm_growstack_v		// Need to grow stack.

<snipped>

> @@ -5441,7 +5480,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>      |  // nresults returned in eax (RD).
>      |  mov BASE, L:RB->base
>      |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> -    |  set_vmstate INTERP
> +    |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return

Typo: Please, adjust the comments considering the ones nearby.

>      |  lea RA, [BASE+RD*8]
>      |  neg RA
>      |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
> -- 
> 2.28.0
> 

-- 
Best regards,
IM


More information about the Tarantool-patches mailing list