Tarantool development patches archive
 help / color / mirror / Atom feed
From: Sergey Bronnikov via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Sergey Kaplun <skaplun@tarantool.org>,
	Evgeniy Temirgaleev <e.temirgaleev@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: Re: [Tarantool-patches] [PATCH luajit 1/5] FFI: Unify stack setup for C calls in interpreter.
Date: Fri, 5 Jun 2026 17:18:02 +0300	[thread overview]
Message-ID: <22168fcd-7936-4234-9c3d-bf28443024d3@tarantool.org> (raw)
In-Reply-To: <20260530160409.4043089-2-skaplun@tarantool.org>

[-- Attachment #1: Type: text/plain, Size: 12494 bytes --]

Thanks for the patch! LGTM

On 5/30/26 19:04, Sergey Kaplun wrote:
> From: Mike Pall <mike>
>
> (cherry picked from commit cf903edb30e0cbd620ebd4bac02d4e2b4410fd02)
>
> This patch refactors the FFI CCallState structure. The `nsp` field now
> contains the number of bytes of the stack slot occupied. Also, it
> effectively decreased the number of arguments for the callee to 31.
> This patch is required for the next commit.
>
> Sergey Kaplun:
> * added the description for the patch
>
> Part of tarantool/tarantool#12480
> ---
>   src/lj_ccall.c     | 57 +++++++++++++++++++++++++---------------------
>   src/lj_ccall.h     |  7 +++---
>   src/vm_arm.dasc    |  8 +++----
>   src/vm_arm64.dasc  |  8 +++----
>   src/vm_mips.dasc   |  1 -
>   src/vm_mips64.dasc |  1 -
>   src/vm_ppc.dasc    |  3 +--
>   src/vm_x64.dasc    |  8 +++----
>   src/vm_x86.dasc    | 22 +++++++++++-------
>   9 files changed, 62 insertions(+), 53 deletions(-)
>
> diff --git a/src/lj_ccall.c b/src/lj_ccall.c
> index c3b27572..394255eb 100644
> --- a/src/lj_ccall.c
> +++ b/src/lj_ccall.c
> @@ -20,12 +20,15 @@
>   #if LJ_TARGET_X86
>   /* -- x86 calling conventions --------------------------------------------- */
>   
> +#define CCALL_PUSH(arg) \
> +  *(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR
> +
>   #if LJ_ABI_WIN
>   
>   #define CCALL_HANDLE_STRUCTRET \
>     /* Return structs bigger than 8 by reference (on stack only). */ \
>     cc->retref = (sz > 8); \
> -  if (cc->retref) cc->stack[nsp++] = (GPRArg)dp;
> +  if (cc->retref) CCALL_PUSH(dp);
>   
>   #define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
>   
> @@ -40,7 +43,7 @@
>       if (ngpr < maxgpr) \
>         cc->gpr[ngpr++] = (GPRArg)dp; \
>       else \
> -      cc->stack[nsp++] = (GPRArg)dp; \
> +      CCALL_PUSH(dp); \
>     } else {  /* Struct with single FP field ends up in FPR. */ \
>       cc->resx87 = ccall_classify_struct(cts, ctr); \
>     }
> @@ -56,7 +59,7 @@
>     if (ngpr < maxgpr) \
>       cc->gpr[ngpr++] = (GPRArg)dp; \
>     else \
> -    cc->stack[nsp++] = (GPRArg)dp;
> +    CCALL_PUSH(dp);
>   
>   #endif
>   
> @@ -67,7 +70,7 @@
>       if (ngpr < maxgpr) \
>         cc->gpr[ngpr++] = (GPRArg)dp; \
>       else \
> -      cc->stack[nsp++] = (GPRArg)dp; \
> +      CCALL_PUSH(dp); \
>     }
>   
>   #endif
> @@ -278,8 +281,8 @@
>     if (ngpr < maxgpr) { \
>       dp = &cc->gpr[ngpr]; \
>       if (ngpr + n > maxgpr) { \
> -      nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
> -      if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
> +      nsp += (ngpr + n - maxgpr) * CTSIZE_PTR;  /* Assumes contiguous gpr/stack fields. */ \
> +      if (nsp > CCALL_SIZE_STACK) goto err_nyi;  /* Too many arguments. */ \
>         ngpr = maxgpr; \
>       } else { \
>         ngpr += n; \
> @@ -471,8 +474,8 @@
>     if (ngpr < maxgpr) { \
>       dp = &cc->gpr[ngpr]; \
>       if (ngpr + n > maxgpr) { \
> -     nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
> -     if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
> +     nsp += (ngpr + n - maxgpr) * CTSIZE_PTR;  /* Assumes contiguous gpr/stack fields. */ \
> +     if (nsp > CCALL_SIZE_STACK) goto err_nyi;  /* Too many arguments. */ \
>        ngpr = maxgpr; \
>       } else { \
>        ngpr += n; \
> @@ -565,8 +568,8 @@
>     if (ngpr < maxgpr) { \
>       dp = &cc->gpr[ngpr]; \
>       if (ngpr + n > maxgpr) { \
> -      nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
> -      if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
> +      nsp += (ngpr + n - maxgpr) * CTSIZE_PTR;  /* Assumes contiguous gpr/stack fields. */ \
> +      if (nsp > CCALL_SIZE_STACK) goto err_nyi;  /* Too many arguments. */ \
>         ngpr = maxgpr; \
>       } else { \
>         ngpr += n; \
> @@ -698,10 +701,11 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
>     lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
>     if (ccall_struct_reg(cc, cts, dp, rcl)) {
>       /* Register overflow? Pass on stack. */
> -    MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
> -    if (nsp + n > CCALL_MAXSTACK) return 1;  /* Too many arguments. */
> -    cc->nsp = nsp + n;
> -    memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR);
> +    MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR;
> +    if (nsp + sz > CCALL_SIZE_STACK)
> +      return 1;  /* Too many arguments. */
> +    cc->nsp = nsp + sz;
> +    memcpy((uint8_t *)cc->stack + nsp, dp, sz);
>     }
>     return 0;  /* Ok. */
>   }
> @@ -1026,22 +1030,23 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
>       } else {
>         sz = CTSIZE_PTR;
>       }
> -    sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
> -    n = sz / CTSIZE_PTR;  /* Number of GPRs or stack slots needed. */
> +    n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR;  /* Number of GPRs or stack slots needed. */
>   
>       CCALL_HANDLE_REGARG  /* Handle register arguments. */
>   
>       /* Otherwise pass argument on stack. */
> -    if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
> -      MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
> -      nsp = (nsp + align) & ~align;  /* Align argument on stack. */
> +    if (CCALL_ALIGN_STACKARG) {  /* Align argument on stack. */
> +      MSize align = (1u << ctype_align(d->info)) - 1;
> +      if (rp)
> +	align = CTSIZE_PTR-1;
> +      nsp = (nsp + align) & ~align;
>       }
> -    if (nsp + n > CCALL_MAXSTACK) {  /* Too many arguments. */
> +    dp = ((uint8_t *)cc->stack) + nsp;
> +    nsp += n * CTSIZE_PTR;
> +    if (nsp > CCALL_SIZE_STACK) {  /* Too many arguments. */
>       err_nyi:
>         lj_err_caller(L, LJ_ERR_FFI_NYICALL);
>       }
> -    dp = &cc->stack[nsp];
> -    nsp += n;
>       isva = 0;
>   
>     done:
> @@ -1103,10 +1108,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
>   #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
>     cc->nfpr = nfpr;  /* Required for vararg functions. */
>   #endif
> -  cc->nsp = nsp;
> -  cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR;
> -  if (nsp > CCALL_SPS_FREE)
> -    cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u);
> +  cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
> +  cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR;
> +  if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR)
> +    cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u);
>     return gcsteps;
>   }
>   
> diff --git a/src/lj_ccall.h b/src/lj_ccall.h
> index 6efa48c7..af7a8e84 100644
> --- a/src/lj_ccall.h
> +++ b/src/lj_ccall.h
> @@ -152,14 +152,15 @@ typedef union FPRArg {
>   LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
>   LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
>   
> -#define CCALL_MAXSTACK		32
> +#define CCALL_NUM_STACK		31
> +#define CCALL_SIZE_STACK	(CCALL_NUM_STACK * CTSIZE_PTR)
>   
>   /* -- C call state -------------------------------------------------------- */
>   
>   typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
>     void (*func)(void);		/* Pointer to called function. */
>     uint32_t spadj;		/* Stack pointer adjustment. */
> -  uint8_t nsp;			/* Number of stack slots. */
> +  uint8_t nsp;			/* Number of bytes on stack. */
>     uint8_t retref;		/* Return value by reference. */
>   #if LJ_TARGET_X64
>     uint8_t ngpr;			/* Number of arguments in GPRs. */
> @@ -178,7 +179,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
>     FPRArg fpr[CCALL_NUM_FPR];	/* Arguments/results in FPRs. */
>   #endif
>     GPRArg gpr[CCALL_NUM_GPR];	/* Arguments/results in GPRs. */
> -  GPRArg stack[CCALL_MAXSTACK];	/* Stack slots. */
> +  GPRArg stack[CCALL_NUM_STACK];	/* Stack slots. */
>   } CCallState;
>   
>   /* -- C call handling ----------------------------------------------------- */
> diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
> index 628c1c24..7ed555f8 100644
> --- a/src/vm_arm.dasc
> +++ b/src/vm_arm.dasc
> @@ -2513,16 +2513,16 @@ static void build_subroutines(BuildCtx *ctx)
>     |.endif
>     |  mov r11, sp
>     |  sub sp, sp, CARG1			// Readjust stack.
> -  |   subs CARG2, CARG2, #1
> +  |   subs CARG2, CARG2, #4
>     |.if HFABI
>     |  vldm RB, {d0-d7}
>     |.endif
>     |    ldr RB, CCSTATE->func
>     |   bmi >2
>     |1:  // Copy stack slots.
> -  |  ldr CARG4, [CARG3, CARG2, lsl #2]
> -  |  str CARG4, [sp, CARG2, lsl #2]
> -  |  subs CARG2, CARG2, #1
> +  |  ldr CARG4, [CARG3, CARG2]
> +  |  str CARG4, [sp, CARG2]
> +  |  subs CARG2, CARG2, #4
>     |  bpl <1
>     |2:
>     |  ldrd CARG12, CCSTATE->gpr[0]
> diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
> index c35eaf12..57131140 100644
> --- a/src/vm_arm64.dasc
> +++ b/src/vm_arm64.dasc
> @@ -2142,14 +2142,14 @@ static void build_subroutines(BuildCtx *ctx)
>     |  ldr TMP0w,CCSTATE:x0->spadj
>     |   ldrb TMP1w, CCSTATE->nsp
>     |    add TMP2, CCSTATE, #offsetof(CCallState, stack)
> -  |   subs TMP1, TMP1, #1
> +  |   subs TMP1, TMP1, #8
>     |    ldr TMP3, CCSTATE->func
>     |  sub sp, sp, TMP0
>     |   bmi >2
>     |1:  // Copy stack slots
> -  |  ldr TMP0, [TMP2, TMP1, lsl #3]
> -  |  str TMP0, [sp, TMP1, lsl #3]
> -  |  subs TMP1, TMP1, #1
> +  |  ldr TMP0, [TMP2, TMP1]
> +  |  str TMP0, [sp, TMP1]
> +  |  subs TMP1, TMP1, #8
>     |  bpl <1
>     |2:
>     |  ldp x0, x1, CCSTATE->gpr[0]
> diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
> index 52366b88..4db9308f 100644
> --- a/src/vm_mips.dasc
> +++ b/src/vm_mips.dasc
> @@ -2836,7 +2836,6 @@ static void build_subroutines(BuildCtx *ctx)
>     |  move TMP2, sp
>     |  subu sp, sp, TMP1
>     |  sw ra, -4(TMP2)
> -  |   sll CARG2, CARG2, 2
>     |  sw r16, -8(TMP2)
>     |  sw CCSTATE, -12(TMP2)
>     |  move r16, TMP2
> diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
> index c41b27f4..87e240f7 100644
> --- a/src/vm_mips64.dasc
> +++ b/src/vm_mips64.dasc
> @@ -2963,7 +2963,6 @@ static void build_subroutines(BuildCtx *ctx)
>     |  move TMP2, sp
>     |  dsubu sp, sp, TMP1
>     |  sd ra, -8(TMP2)
> -  |   sll CARG2, CARG2, 3
>     |  sd r16, -16(TMP2)
>     |  sd CCSTATE, -24(TMP2)
>     |  move r16, TMP2
> diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
> index edae7b98..23d6e316 100644
> --- a/src/vm_ppc.dasc
> +++ b/src/vm_ppc.dasc
> @@ -3275,14 +3275,13 @@ static void build_subroutines(BuildCtx *ctx)
>     |    stw TMP0, 4(sp)
>     |   cmpwi cr1, CARG3, 0
>     |  mr TMP2, sp
> -  |   addic. CARG2, CARG2, -1
> +  |   addic. CARG2, CARG2, -4
>     |  stwux sp, sp, TMP1
>     |   crnot 4*cr1+eq, 4*cr1+eq		// For vararg calls.
>     |  stw r14, -4(TMP2)
>     |  stw CCSTATE, -8(TMP2)
>     |  mr r14, TMP2
>     |  la TMP1, CCSTATE->stack
> -  |   slwi CARG2, CARG2, 2
>     |   blty >2
>     |  la TMP2, 8(sp)
>     |1:
> diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
> index 6ac88d70..7f0da677 100644
> --- a/src/vm_x64.dasc
> +++ b/src/vm_x64.dasc
> @@ -2773,12 +2773,12 @@ static void build_subroutines(BuildCtx *ctx)
>     |
>     |  // Copy stack slots.
>     |  movzx ecx, byte CCSTATE->nsp
> -  |  sub ecx, 1
> +  |  sub ecx, 8
>     |  js >2
>     |1:
> -  |  mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
> -  |  mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
> -  |  sub ecx, 1
> +  |  mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
> +  |  mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
> +  |  sub ecx, 8
>     |  jns <1
>     |2:
>     |
> diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
> index d9234f3b..98546550 100644
> --- a/src/vm_x86.dasc
> +++ b/src/vm_x86.dasc
> @@ -3348,19 +3348,25 @@ static void build_subroutines(BuildCtx *ctx)
>     |
>     |  // Copy stack slots.
>     |  movzx ecx, byte CCSTATE->nsp
> -  |  sub ecx, 1
> +  |.if X64
> +  |  sub ecx, 8
>     |  js >2
>     |1:
> -  |.if X64
> -  |  mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
> -  |  mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
> +  |  mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
> +  |  mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
> +  |  sub ecx, 8
> +  |  jns <1
> +  |2:
>     |.else
> -  |  mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
> -  |  mov [esp+ecx*4], eax
> -  |.endif
> -  |  sub ecx, 1
> +  |  sub ecx, 4
> +  |  js >2
> +  |1:
> +  |  mov eax, [CCSTATE+ecx+offsetof(CCallState, stack)]
> +  |  mov [esp+ecx], eax
> +  |  sub ecx, 4
>     |  jns <1
>     |2:
> +  |.endif
>     |
>     |.if X64
>     |  movzx eax, byte CCSTATE->nfpr

[-- Attachment #2: Type: text/html, Size: 12465 bytes --]

  reply	other threads:[~2026-06-05 14:18 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-30 16:04 [Tarantool-patches] [PATCH luajit 0/5] Various FFI ABI calling conventions fixes Sergey Kaplun via Tarantool-patches
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 1/5] FFI: Unify stack setup for C calls in interpreter Sergey Kaplun via Tarantool-patches
2026-06-05 14:18   ` Sergey Bronnikov via Tarantool-patches [this message]
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 2/5] FFI/ARM64/OSX: Handle non-standard OSX C calling conventions Sergey Kaplun via Tarantool-patches
2026-06-01 11:40   ` Sergey Bronnikov via Tarantool-patches
2026-06-04  9:46     ` Sergey Kaplun via Tarantool-patches
2026-06-05 14:12       ` Sergey Bronnikov via Tarantool-patches
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 3/5] ARM64: Fix pass-by-value struct " Sergey Kaplun via Tarantool-patches
2026-06-01 12:27   ` Sergey Bronnikov via Tarantool-patches
2026-06-04 10:05     ` Sergey Kaplun via Tarantool-patches
2026-06-05 14:14       ` Sergey Bronnikov via Tarantool-patches
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 4/5] FFI: Various ABI and calling convention fixes Sergey Kaplun via Tarantool-patches
2026-06-01 13:02   ` Sergey Bronnikov via Tarantool-patches
2026-06-04 12:06     ` Sergey Kaplun via Tarantool-patches
2026-06-05 14:15       ` Sergey Bronnikov via Tarantool-patches
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 5/5] FFI/MacOS: Fix calling convention for enums Sergey Kaplun via Tarantool-patches
2026-06-01 13:07   ` Sergey Bronnikov via Tarantool-patches
2026-06-02 16:04 ` [Tarantool-patches] [PATCH luajit 0/5] Various FFI ABI calling conventions fixes Evgeniy Temirgaleev via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=22168fcd-7936-4234-9c3d-bf28443024d3@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=e.temirgaleev@tarantool.org \
    --cc=sergeyb@tarantool.org \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit 1/5] FFI: Unify stack setup for C calls in interpreter.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox