[Tarantool-patches] [PATCH luajit v2 3/7] vm: introduce VM states for Lua and fast functions

Sergey Kaplun skaplun at tarantool.org
Mon Dec 28 06:54:36 MSK 2020


Hi, Igor!

Thanks for the review!

On 28.12.20, Igor Munkin wrote:
> Sergey,
> 
> Thanks for the patch! Please consider the comments below.
> 
> On 25.12.20, Sergey Kaplun wrote:
> > This patch introduces LJ_VMST_LFUNC and LJ_VMST_FFUNC VM states
> > separated from LJ_VMST_INERP. New VM states allow to determine the
> > context of Lua VM execution for x86 and x64 arches. Also, LJ_VMST_C is
> > renamed to LJ_VMST_CFUNC for naming consistence with new VM states.
> > 
> > Also, this patch adjusts stack layout for x86 and x64 arches to save VM
> > state for its consistency while stack unwinding when error is raised.
> > 
> > Part of tarantool/tarantool#5442
> > ---
> > 
> > Changes in v2:
> >  - Moved `.if not WIN` macro check inside (save|restore)_vmstate_through
> >  - Fixed naming: SAVE_UNUSED\d -> UNUSED\d
> > 
> >  src/lj_frame.h     |  18 +++----
> >  src/lj_obj.h       |   4 +-
> >  src/lj_profile.c   |   5 +-
> >  src/luajit-gdb.py  |  14 ++---
> >  src/vm_arm.dasc    |   6 +--
> >  src/vm_arm64.dasc  |   6 +--
> >  src/vm_mips.dasc   |   6 +--
> >  src/vm_mips64.dasc |   6 +--
> >  src/vm_ppc.dasc    |   6 +--
> >  src/vm_x64.dasc    |  93 ++++++++++++++++++++++----------
> >  src/vm_x86.dasc    | 131 +++++++++++++++++++++++++++++----------------
> >  11 files changed, 188 insertions(+), 107 deletions(-)
> > 
> > diff --git a/src/lj_frame.h b/src/lj_frame.h
> > index 19c49a4..2e693f9 100644
> > --- a/src/lj_frame.h
> > +++ b/src/lj_frame.h
> 
> <snipped>
> 
> > @@ -152,11 +152,11 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
> >  #define CFRAME_OFS_NRES		(22*4)
> >  #define CFRAME_OFS_MULTRES	(21*4)
> >  #endif
> > -#define CFRAME_SIZE		(10*8)
> > +#define CFRAME_SIZE		(12*8)
> 
> This change looks Windows-related, so is excess.

Reverted.

> 
> >  #define CFRAME_SIZE_JIT		(CFRAME_SIZE + 9*16 + 4*8)
> 
> <snipped>
> 
> > diff --git a/src/lj_profile.c b/src/lj_profile.c
> > index 116998e..637e03c 100644
> > --- a/src/lj_profile.c
> > +++ b/src/lj_profile.c
> > @@ -157,7 +157,10 @@ static void profile_trigger(ProfileState *ps)
> >      int st = g->vmstate;
> >      ps->vmstate = st >= 0 ? 'N' :
> >  		  st == ~LJ_VMST_INTERP ? 'I' :
> > -		  st == ~LJ_VMST_C ? 'C' :
> > +		  st == ~LJ_VMST_CFUNC ? 'C' :
> > +		  /* Stubs for profiler hooks. */
> > +		  st == ~LJ_VMST_FFUNC ? 'I' :
> > +		  st == ~LJ_VMST_LFUNC ? 'I' :
> 
> Minor: Move this comparisons above to save the ordering from <lj_obj.h>.

Fixed.

> 
> >  		  st == ~LJ_VMST_GC ? 'G' : 'J';
> >      g->hookmask = (mask | HOOK_PROFILE);
> >      lj_dispatch_update(g);
> 
> <snipped>
> 
> > diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
> > index 80753e0..83cc3e1 100644
> > --- a/src/vm_x64.dasc
> > +++ b/src/vm_x64.dasc
> 
> <snipped>
> 
> > @@ -161,26 +161,29 @@
> 
> <snipped>
> 
> > +|.define SAVE_CFRAME,	qword [rsp+qword*6]
> > +|.define UNUSED2,	qword [rsp+qword*5]
> > +|.define UNUSED1,	dword [rsp+dword*8]
> 
> UNUSED1 should alias <dword [rsp + dword*9]>

Fixed this typo.

> 
> Minor: What about UNUSEDd and UNUSEDq instead? Feel free to ignore.

Looks inconsistent with respect to original code style.

> 
> > +|.define SAVE_VMSTATE,	dword [rsp+dword*8]
> 
> <snipped>
> 
> > @@ -342,6 +345,22 @@
> >  |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
> >  |.endmacro
> >  |
> > +|// Save vmstate through register.
> > +|.macro save_vmstate_through, reg
> > +|.if not WIN
> > +|  mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
> > +|  mov SAVE_VMSTATE, reg
> 
> This is too complicated. VM is implemented as a file with 6kloc of
> pseudo-assembly in it and I don't want to make its support harder. We've
> recently found a bug with miscomparison that we coudn't catch for a
> year. This is not a fun. This is not a kinda sport. This is damn 6k kloc
> of assembly we ought to support. So I propose to make this macro work
> with no args at all. Consider the following:
> |.macro save_vmstate
> |.if not WIN
> |  mov TMPRd, dword [DISPATCH+DISPATCH_GL(vmstate)]	// TMPRd is r10d
> |  mov SAVE_VMSTATE, TMPRd
> |.endif // WIN
> |
> |.macro restore_vmstate
> |.if not WIN
> |  mov TMPRd, SAVE_VMSTATE	// XCHGd is r11d
> |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], TMPRd
> |.endif // WIN
> 
> I agree that TMPRd is used in other places within vm_x64.dasc but this
> is temporary register and I failed to find any conflict. I left the
> comments below to ease the check.

Agree, update in the next version.

> 
> > +|.endif // WIN
> > +|.endmacro
> 
> <snipped>
> 
> > @@ -448,6 +467,8 @@ static void build_subroutines(BuildCtx *ctx)
> >    |  xor eax, eax			// Ok return status for vm_pcall.
> >    |
> >    |->vm_leave_unw:
> > +  |  // DISPATCH required to set properly.
> > +  |  restore_vmstate_through RAd
> 
> Re TMPR: The TMP register is not used, so there is no need to restore
> vmstate via RAd. Everything is OK.

Thanks!

> 
> >    |  restoreregs
> >    |  ret
> >    |
> 
> <snipped>
> 
> > @@ -521,7 +544,7 @@ static void build_subroutines(BuildCtx *ctx)
> >    |  mov [BASE-16], RA			// Prepend false to error message.
> >    |  mov [BASE-8], RB
> >    |  mov RA, -16			// Results start at BASE+RA = BASE-16.
> > -  |  set_vmstate INTERP
> > +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or return to C
> 
> Typo: Please, adjust the comments considering the ones nearby.

Fixed.

> 
> >    |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
> >    |
> >    |//-----------------------------------------------------------------------
> > @@ -575,6 +598,7 @@ static void build_subroutines(BuildCtx *ctx)
> >    |  lea KBASE, [esp+CFRAME_RESUME]
> >    |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
> >    |  add DISPATCH, GG_G2DISP
> > +  |  save_vmstate_through TMPRd
> 
> Re TMPR: It is manually used here. Everything is OK.

Yep.

> 
> >    |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
> >    |  mov SAVE_CFRAME, RD
> >    |  mov SAVE_NRES, RDd
> > @@ -585,7 +609,7 @@ static void build_subroutines(BuildCtx *ctx)
> >    |
> >    |  // Resume after yield (like a return).
> >    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> > -  |  set_vmstate INTERP
> > +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> 
> Typo: Please, adjust the comments considering the ones nearby.

Fixed.

> 
> >    |  mov byte L:RB->status, RDL
> >    |  mov BASE, L:RB->base
> >    |  mov RD, L:RB->top
> > @@ -622,11 +646,12 @@ static void build_subroutines(BuildCtx *ctx)
> >    |  mov SAVE_CFRAME, KBASE
> >    |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
> >    |  add DISPATCH, GG_G2DISP
> > +  |  save_vmstate_through RDd
> 
> Re TMPR: The temporary register is not used prior to this line.
> Everything is OK.

Agree.

> 
> >    |  mov L:RB->cframe, rsp
> >    |
> >    |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
> >    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> > -  |  set_vmstate INTERP
> > +  |  set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*
> 
> This branch is also taken for lj_vm_pcall/lj_vm_call/lj_vm_cpcall.

Adjusted comment.

> 
> Typo: Please, adjust the comments considering the ones nearby.

Done.

> 
> >    |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
> >    |  add PC, RA
> >    |  sub PC, BASE			// PC = frame delta + frame type
> > @@ -658,6 +683,7 @@ static void build_subroutines(BuildCtx *ctx)
> >    |  mov SAVE_ERRF, 0			// No error function.
> >    |  mov SAVE_NRES, KBASEd		// Neg. delta means cframe w/o frame.
> >    |   add DISPATCH, GG_G2DISP
> > +  |  save_vmstate_through KBASEd
> 
> Re TMPR: The temporary register is not used prior to this line.
> Everything is OK.

Agree.

> 
> >    |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
> >    |
> >    |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.
> 
> <snipped>
> 
> > @@ -1578,7 +1606,7 @@ static void build_subroutines(BuildCtx *ctx)
> >    |  mov L:PC, TMP1
> >    |  mov BASE, L:RB->base
> >    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> > -  |  set_vmstate INTERP
> > +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> 
> Typo: Please, adjust the comments considering the ones nearby.

Done.

> 
> >    |
> >    |  cmp eax, LUA_YIELD
> >    |  ja >8
> 
> <snipped>
> 
> > @@ -3974,6 +4003,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >  
> >    case BC_CALL: case BC_CALLM:
> >      |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
> > +    |  set_vmstate INTERP		// INTERP until a new BASE is setup
> 
> It looks like VM state is INTERP until the call enters *FUNC* bytecode.

Adjusted.

> 
> >      if (op == BC_CALLM) {
> >        |  add NARGS:RDd, MULTRES
> >      }
> > @@ -3995,6 +4025,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >      |  mov LFUNC:RB, [RA-16]
> >      |  checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
> >      |->BC_CALLT_Z:
> > +    |  set_vmstate INTERP		// INTERP until a new BASE is setup
> 
> Ditto.

Adjusted.

> 
> >      |  mov PC, [BASE-8]
> >      |  test PCd, FRAME_TYPE
> >      |  jnz >7
> > @@ -4219,6 +4250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >        |  shl RAd, 3
> >      }
> >      |1:
> > +    |  set_vmstate INTERP // INTERP until the old BASE & KBASE is restored
> 
> Typo: Please, adjust the comments considering the ones nearby.

Fixed.

> 
> >      |  mov PC, [BASE-8]
> >      |  mov MULTRES, RDd			// Save nresults+1.
> >      |  test PCd, FRAME_TYPE		// Check frame type marker.
> 
> <snipped>
> 
> > @@ -4551,6 +4584,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >      |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
> >      |  mov KBASE, [PC-4+PC2PROTO(k)]
> >      |  mov L:RB, SAVE_L
> > +    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
> 
> This is OK, but this path is also taken for the JFUNC. Do we need to set
> another VM state for this case (in scope of JLOOP)?

AFAIK, it doesn't purpose. LFUNC is consistent since JIT follows
Lua semantics.

> 
> >      |  lea RA, [BASE+RA*8]		// Top of frame.
> >      |  cmp RA, L:RB->maxstack
> >      |  ja ->vm_growstack_f
> > @@ -4588,6 +4622,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >      |  mov [RD-8], RB			// Store delta + FRAME_VARG.
> >      |  mov [RD-16], LFUNC:KBASE		// Store copy of LFUNC.
> >      |  mov L:RB, SAVE_L
> > +    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
> 
> Ditto.

Ditto to ditto :).

> 
> >      |  lea RA, [RD+RA*8]
> >      |  cmp RA, L:RB->maxstack
> >      |  ja ->vm_growstack_v		// Need to grow stack.
> 
> <snipped>
> 
> > @@ -4653,7 +4688,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >      |  // nresults returned in eax (RD).
> >      |  mov BASE, L:RB->base
> >      |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> > -    |  set_vmstate INTERP
> > +    |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> 
> Typo: Please, adjust the comments considering the ones nearby.

Done.

> 
> >      |  lea RA, [BASE+RD*8]
> >      |  neg RA
> >      |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
> > diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
> > index d76fbe3..b9dffa9 100644
> > --- a/src/vm_x86.dasc
> > +++ b/src/vm_x86.dasc
> 
> <snipped>
> 
> > @@ -290,33 +295,35 @@
> 
> <snipped>
> 
> > +|.define SAVE_CFRAME,	qword [rsp+qword*6]
> > +|.define UNUSED1,	qword [rsp+qword*5]
> 
> There is lost unused dword right here.

Thanks, fixed.

> 
> > +|.define SAVE_VMSTATE,	dword [rsp+dword*8]
> 
> <snipped>
> 
> > @@ -433,6 +440,22 @@
> >  |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
> >  |.endmacro
> >  |
> > +|// Save vmstate through register.
> > +|.macro save_vmstate_through, reg
> > +|.if not WIN
> > +|  mov reg, dword [DISPATCH+DISPATCH_GL(vmstate)]
> > +|  mov SAVE_VMSTATE, reg
> 
> See the rationale in the note to vm_x64.dasc. Unfortunately, for
> vm_x86.dasc the macro has two branches, *but* I think it is much better
> than managing free registers in *whole* VM. Consider the following:
> |.macro save_vmstate
> |.if not WIN
> |.if not X64
> |  mov UNUSED1, ecx	// Please rename this field (e.g SAVE_XCHG).
> |  mov ecx, dword [DISPATCH+DISPATCH_GL(vmstate)]
> |  mov SAVE_VMSTATE, ecx
> |  mov ecx, UNUSED1
> |.else // X64
> |  mov XCHGd, dword [DISPATCH+DISPATCH_GL(vmstate)]	// XCHGd is r11d
> |  mov SAVE_VMSTATE, XCHGd
> |.endif // X64
> |.endif // WIN
> |
> |.macro restore_vmstate
> |.if not WIN
> |.if not X64
> |  mov UNUSED1, ecx	// Please rename this field (e.g SPILLECX).
> |  mov ecx, SAVE_VMSTATE
> |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ecx
> |  mov ecx, UNUSED1
> |.else // X64
> |  mov XCHGd, SAVE_VMSTATE	// XCHGd is r11d
> |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], XCHGd
> |.endif // X64
> |.endif // WIN

Applied.

> 
> > +|.endif // WIN
> > +|.endmacro
> 
> <snipped>
> 
> >    |->vm_unwind_rethrow:
> > @@ -647,7 +674,7 @@ static void build_subroutines(BuildCtx *ctx)
> >    |  mov PC, [BASE-4]			// Fetch PC of previous frame.
> >    |  mov dword [BASE-4], LJ_TFALSE	// Prepend false to error message.
> >    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> > -  |  set_vmstate INTERP
> > +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or return to C
> 
> Typo: Please, adjust the comments considering the ones nearby.

Done.

> 
> >    |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
> >    |
> >    |.if WIN and not X64
> 
> <snipped>
> 
> > @@ -730,7 +758,7 @@ static void build_subroutines(BuildCtx *ctx)
> >    |
> >    |  // Resume after yield (like a return).
> >    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> > -  |  set_vmstate INTERP
> > +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> 
> Typo: Please, adjust the comments considering the ones nearby.

Done.

> 
> >    |  mov byte L:RB->status, RDL
> >    |  mov BASE, L:RB->base
> >    |  mov RD, L:RB->top
> 
> <snipped>
> 
> > @@ -782,7 +811,7 @@ static void build_subroutines(BuildCtx *ctx)
> >    |
> >    |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
> >    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> > -  |  set_vmstate INTERP
> > +  |  set_vmstate INTERP // vm_resume: INTERP until executing BC_IFUNC*
> 
> Typo: Please, adjust the comments considering the ones nearby.
> 
> >    |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
> >    |  add PC, RA
> >    |  sub PC, BASE			// PC = frame delta + frame type
> 

Done.

> <snipped>
> 
> > @@ -1924,7 +1956,7 @@ static void build_subroutines(BuildCtx *ctx)
> >    |.endif
> >    |  mov BASE, L:RB->base
> >    |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> > -  |  set_vmstate INTERP
> > +  |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> 
> Typo: Please, adjust the comments considering the ones nearby.
> 

Done.

> >    |
> >    |  cmp eax, LUA_YIELD
> >    |  ja >8
> 
> <snipped>
> 
> > @@ -4683,6 +4716,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >  
> >    case BC_CALL: case BC_CALLM:
> >      |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
> > +    |  set_vmstate INTERP		// INTERP until a new BASE is setup
> 
> It looks like VM state is INTERP until the call enters *FUNC* bytecode.

Adjusted comment.

> 
> >      if (op == BC_CALLM) {
> >        |  add NARGS:RD, MULTRES
> >      }
> > @@ -4706,6 +4740,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >      |  cmp dword [RA-4], LJ_TFUNC
> >      |  jne ->vmeta_call
> >      |->BC_CALLT_Z:
> > +    |  set_vmstate INTERP		// INTERP until a new BASE is setup
> 
> Ditto.

Adjusted comment.

> 
> >      |  mov PC, [BASE-4]
> >      |  test PC, FRAME_TYPE
> >      |  jnz >7
> > @@ -4989,6 +5024,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >        |  shl RA, 3
> >      }
> >      |1:
> > +    |  set_vmstate INTERP // INTERP until the old BASE & KBASE is restored
> 
> Typo: Please, adjust the comments considering the ones nearby.

Done.

> 
> >      |  mov PC, [BASE-4]
> >      |  mov MULTRES, RD			// Save nresults+1.
> >      |  test PC, FRAME_TYPE		// Check frame type marker.
> > @@ -5043,6 +5079,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >      |  mov LFUNC:KBASE, [BASE-8]
> >      |  mov KBASE, LFUNC:KBASE->pc
> >      |  mov KBASE, [KBASE+PC2PROTO(k)]
> > +    |  set_vmstate LFUNC // LFUNC after the old BASE & KBASE is restored
> 
> Typo: Please, adjust the comments considering the ones nearby.

Done.

> 
> >      |  ins_next
> >      |
> >      |6:  // Fill up results with nil.
> > @@ -5330,6 +5367,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >      |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
> >      |  mov KBASE, [PC-4+PC2PROTO(k)]
> >      |  mov L:RB, SAVE_L
> > +    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
> 
> This is OK, but this path is also taken for the JFUNC. Do we need to set
> another VM state for this case (in scope of JLOOP)?

See the answer above.

> 
> >      |  lea RA, [BASE+RA*8]		// Top of frame.
> >      |  cmp RA, L:RB->maxstack
> >      |  ja ->vm_growstack_f
> > @@ -5367,6 +5405,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >      |  mov [RD-4], RB			// Store delta + FRAME_VARG.
> >      |  mov [RD-8], LFUNC:KBASE		// Store copy of LFUNC.
> >      |  mov L:RB, SAVE_L
> > +    |  set_vmstate LFUNC		// LFUNC after KBASE restoration
> 
> Ditto.

See the answer above.

> 
> >      |  lea RA, [RD+RA*8]
> >      |  cmp RA, L:RB->maxstack
> >      |  ja ->vm_growstack_v		// Need to grow stack.
> 
> <snipped>
> 
> > @@ -5441,7 +5480,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
> >      |  // nresults returned in eax (RD).
> >      |  mov BASE, L:RB->base
> >      |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
> > -    |  set_vmstate INTERP
> > +    |  set_vmstate INTERP // INTERP until jump to BC_RET* or vm_return
> 
> Typo: Please, adjust the comments considering the ones nearby.

Done.

> 
> >      |  lea RA, [BASE+RD*8]
> >      |  neg RA
> >      |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
> > -- 
> > 2.28.0
> > 
> 
> -- 
> Best regards,
> IM

I'll send v3 version soon.

-- 
Best regards,
Sergey Kaplun


More information about the Tarantool-patches mailing list