From: Sergey Bronnikov via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Sergey Kaplun <skaplun@tarantool.org>,
Evgeniy Temirgaleev <e.temirgaleev@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: Re: [Tarantool-patches] [PATCH luajit 1/5] FFI: Unify stack setup for C calls in interpreter.
Date: Fri, 5 Jun 2026 17:18:02 +0300 [thread overview]
Message-ID: <22168fcd-7936-4234-9c3d-bf28443024d3@tarantool.org> (raw)
In-Reply-To: <20260530160409.4043089-2-skaplun@tarantool.org>
[-- Attachment #1: Type: text/plain, Size: 12494 bytes --]
Thanks for the patch! LGTM
On 5/30/26 19:04, Sergey Kaplun wrote:
> From: Mike Pall <mike>
>
> (cherry picked from commit cf903edb30e0cbd620ebd4bac02d4e2b4410fd02)
>
> This patch refactors the FFI CCallState structure. The `nsp` field now
> contains the number of bytes of the stack slot occupied. Also, it
> effectively decreased the number of arguments for the callee to 31.
> This patch is required for the next commit.
>
> Sergey Kaplun:
> * added the description for the patch
>
> Part of tarantool/tarantool#12480
> ---
> src/lj_ccall.c | 57 +++++++++++++++++++++++++---------------------
> src/lj_ccall.h | 7 +++---
> src/vm_arm.dasc | 8 +++----
> src/vm_arm64.dasc | 8 +++----
> src/vm_mips.dasc | 1 -
> src/vm_mips64.dasc | 1 -
> src/vm_ppc.dasc | 3 +--
> src/vm_x64.dasc | 8 +++----
> src/vm_x86.dasc | 22 +++++++++++-------
> 9 files changed, 62 insertions(+), 53 deletions(-)
>
> diff --git a/src/lj_ccall.c b/src/lj_ccall.c
> index c3b27572..394255eb 100644
> --- a/src/lj_ccall.c
> +++ b/src/lj_ccall.c
> @@ -20,12 +20,15 @@
> #if LJ_TARGET_X86
> /* -- x86 calling conventions --------------------------------------------- */
>
> +#define CCALL_PUSH(arg) \
> + *(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR
> +
> #if LJ_ABI_WIN
>
> #define CCALL_HANDLE_STRUCTRET \
> /* Return structs bigger than 8 by reference (on stack only). */ \
> cc->retref = (sz > 8); \
> - if (cc->retref) cc->stack[nsp++] = (GPRArg)dp;
> + if (cc->retref) CCALL_PUSH(dp);
>
> #define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
>
> @@ -40,7 +43,7 @@
> if (ngpr < maxgpr) \
> cc->gpr[ngpr++] = (GPRArg)dp; \
> else \
> - cc->stack[nsp++] = (GPRArg)dp; \
> + CCALL_PUSH(dp); \
> } else { /* Struct with single FP field ends up in FPR. */ \
> cc->resx87 = ccall_classify_struct(cts, ctr); \
> }
> @@ -56,7 +59,7 @@
> if (ngpr < maxgpr) \
> cc->gpr[ngpr++] = (GPRArg)dp; \
> else \
> - cc->stack[nsp++] = (GPRArg)dp;
> + CCALL_PUSH(dp);
>
> #endif
>
> @@ -67,7 +70,7 @@
> if (ngpr < maxgpr) \
> cc->gpr[ngpr++] = (GPRArg)dp; \
> else \
> - cc->stack[nsp++] = (GPRArg)dp; \
> + CCALL_PUSH(dp); \
> }
>
> #endif
> @@ -278,8 +281,8 @@
> if (ngpr < maxgpr) { \
> dp = &cc->gpr[ngpr]; \
> if (ngpr + n > maxgpr) { \
> - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
> - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
> + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
> + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
> ngpr = maxgpr; \
> } else { \
> ngpr += n; \
> @@ -471,8 +474,8 @@
> if (ngpr < maxgpr) { \
> dp = &cc->gpr[ngpr]; \
> if (ngpr + n > maxgpr) { \
> - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
> - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
> + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
> + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
> ngpr = maxgpr; \
> } else { \
> ngpr += n; \
> @@ -565,8 +568,8 @@
> if (ngpr < maxgpr) { \
> dp = &cc->gpr[ngpr]; \
> if (ngpr + n > maxgpr) { \
> - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
> - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
> + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
> + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
> ngpr = maxgpr; \
> } else { \
> ngpr += n; \
> @@ -698,10 +701,11 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
> lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
> if (ccall_struct_reg(cc, cts, dp, rcl)) {
> /* Register overflow? Pass on stack. */
> - MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
> - if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */
> - cc->nsp = nsp + n;
> - memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR);
> + MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR;
> + if (nsp + sz > CCALL_SIZE_STACK)
> + return 1; /* Too many arguments. */
> + cc->nsp = nsp + sz;
> + memcpy((uint8_t *)cc->stack + nsp, dp, sz);
> }
> return 0; /* Ok. */
> }
> @@ -1026,22 +1030,23 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
> } else {
> sz = CTSIZE_PTR;
> }
> - sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
> - n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
> + n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
>
> CCALL_HANDLE_REGARG /* Handle register arguments. */
>
> /* Otherwise pass argument on stack. */
> - if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
> - MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
> - nsp = (nsp + align) & ~align; /* Align argument on stack. */
> + if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */
> + MSize align = (1u << ctype_align(d->info)) - 1;
> + if (rp)
> + align = CTSIZE_PTR-1;
> + nsp = (nsp + align) & ~align;
> }
> - if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */
> + dp = ((uint8_t *)cc->stack) + nsp;
> + nsp += n * CTSIZE_PTR;
> + if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
> err_nyi:
> lj_err_caller(L, LJ_ERR_FFI_NYICALL);
> }
> - dp = &cc->stack[nsp];
> - nsp += n;
> isva = 0;
>
> done:
> @@ -1103,10 +1108,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
> #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
> cc->nfpr = nfpr; /* Required for vararg functions. */
> #endif
> - cc->nsp = nsp;
> - cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR;
> - if (nsp > CCALL_SPS_FREE)
> - cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u);
> + cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
> + cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR;
> + if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR)
> + cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u);
> return gcsteps;
> }
>
> diff --git a/src/lj_ccall.h b/src/lj_ccall.h
> index 6efa48c7..af7a8e84 100644
> --- a/src/lj_ccall.h
> +++ b/src/lj_ccall.h
> @@ -152,14 +152,15 @@ typedef union FPRArg {
> LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
> LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
>
> -#define CCALL_MAXSTACK 32
> +#define CCALL_NUM_STACK 31
> +#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR)
>
> /* -- C call state -------------------------------------------------------- */
>
> typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
> void (*func)(void); /* Pointer to called function. */
> uint32_t spadj; /* Stack pointer adjustment. */
> - uint8_t nsp; /* Number of stack slots. */
> + uint8_t nsp; /* Number of bytes on stack. */
> uint8_t retref; /* Return value by reference. */
> #if LJ_TARGET_X64
> uint8_t ngpr; /* Number of arguments in GPRs. */
> @@ -178,7 +179,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
> FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */
> #endif
> GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */
> - GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */
> + GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */
> } CCallState;
>
> /* -- C call handling ----------------------------------------------------- */
> diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
> index 628c1c24..7ed555f8 100644
> --- a/src/vm_arm.dasc
> +++ b/src/vm_arm.dasc
> @@ -2513,16 +2513,16 @@ static void build_subroutines(BuildCtx *ctx)
> |.endif
> | mov r11, sp
> | sub sp, sp, CARG1 // Readjust stack.
> - | subs CARG2, CARG2, #1
> + | subs CARG2, CARG2, #4
> |.if HFABI
> | vldm RB, {d0-d7}
> |.endif
> | ldr RB, CCSTATE->func
> | bmi >2
> |1: // Copy stack slots.
> - | ldr CARG4, [CARG3, CARG2, lsl #2]
> - | str CARG4, [sp, CARG2, lsl #2]
> - | subs CARG2, CARG2, #1
> + | ldr CARG4, [CARG3, CARG2]
> + | str CARG4, [sp, CARG2]
> + | subs CARG2, CARG2, #4
> | bpl <1
> |2:
> | ldrd CARG12, CCSTATE->gpr[0]
> diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
> index c35eaf12..57131140 100644
> --- a/src/vm_arm64.dasc
> +++ b/src/vm_arm64.dasc
> @@ -2142,14 +2142,14 @@ static void build_subroutines(BuildCtx *ctx)
> | ldr TMP0w,CCSTATE:x0->spadj
> | ldrb TMP1w, CCSTATE->nsp
> | add TMP2, CCSTATE, #offsetof(CCallState, stack)
> - | subs TMP1, TMP1, #1
> + | subs TMP1, TMP1, #8
> | ldr TMP3, CCSTATE->func
> | sub sp, sp, TMP0
> | bmi >2
> |1: // Copy stack slots
> - | ldr TMP0, [TMP2, TMP1, lsl #3]
> - | str TMP0, [sp, TMP1, lsl #3]
> - | subs TMP1, TMP1, #1
> + | ldr TMP0, [TMP2, TMP1]
> + | str TMP0, [sp, TMP1]
> + | subs TMP1, TMP1, #8
> | bpl <1
> |2:
> | ldp x0, x1, CCSTATE->gpr[0]
> diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
> index 52366b88..4db9308f 100644
> --- a/src/vm_mips.dasc
> +++ b/src/vm_mips.dasc
> @@ -2836,7 +2836,6 @@ static void build_subroutines(BuildCtx *ctx)
> | move TMP2, sp
> | subu sp, sp, TMP1
> | sw ra, -4(TMP2)
> - | sll CARG2, CARG2, 2
> | sw r16, -8(TMP2)
> | sw CCSTATE, -12(TMP2)
> | move r16, TMP2
> diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
> index c41b27f4..87e240f7 100644
> --- a/src/vm_mips64.dasc
> +++ b/src/vm_mips64.dasc
> @@ -2963,7 +2963,6 @@ static void build_subroutines(BuildCtx *ctx)
> | move TMP2, sp
> | dsubu sp, sp, TMP1
> | sd ra, -8(TMP2)
> - | sll CARG2, CARG2, 3
> | sd r16, -16(TMP2)
> | sd CCSTATE, -24(TMP2)
> | move r16, TMP2
> diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
> index edae7b98..23d6e316 100644
> --- a/src/vm_ppc.dasc
> +++ b/src/vm_ppc.dasc
> @@ -3275,14 +3275,13 @@ static void build_subroutines(BuildCtx *ctx)
> | stw TMP0, 4(sp)
> | cmpwi cr1, CARG3, 0
> | mr TMP2, sp
> - | addic. CARG2, CARG2, -1
> + | addic. CARG2, CARG2, -4
> | stwux sp, sp, TMP1
> | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls.
> | stw r14, -4(TMP2)
> | stw CCSTATE, -8(TMP2)
> | mr r14, TMP2
> | la TMP1, CCSTATE->stack
> - | slwi CARG2, CARG2, 2
> | blty >2
> | la TMP2, 8(sp)
> |1:
> diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
> index 6ac88d70..7f0da677 100644
> --- a/src/vm_x64.dasc
> +++ b/src/vm_x64.dasc
> @@ -2773,12 +2773,12 @@ static void build_subroutines(BuildCtx *ctx)
> |
> | // Copy stack slots.
> | movzx ecx, byte CCSTATE->nsp
> - | sub ecx, 1
> + | sub ecx, 8
> | js >2
> |1:
> - | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
> - | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
> - | sub ecx, 1
> + | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
> + | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
> + | sub ecx, 8
> | jns <1
> |2:
> |
> diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
> index d9234f3b..98546550 100644
> --- a/src/vm_x86.dasc
> +++ b/src/vm_x86.dasc
> @@ -3348,19 +3348,25 @@ static void build_subroutines(BuildCtx *ctx)
> |
> | // Copy stack slots.
> | movzx ecx, byte CCSTATE->nsp
> - | sub ecx, 1
> + |.if X64
> + | sub ecx, 8
> | js >2
> |1:
> - |.if X64
> - | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
> - | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
> + | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
> + | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
> + | sub ecx, 8
> + | jns <1
> + |2:
> |.else
> - | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
> - | mov [esp+ecx*4], eax
> - |.endif
> - | sub ecx, 1
> + | sub ecx, 4
> + | js >2
> + |1:
> + | mov eax, [CCSTATE+ecx+offsetof(CCallState, stack)]
> + | mov [esp+ecx], eax
> + | sub ecx, 4
> | jns <1
> |2:
> + |.endif
> |
> |.if X64
> | movzx eax, byte CCSTATE->nfpr
[-- Attachment #2: Type: text/html, Size: 12465 bytes --]
next prev parent reply other threads:[~2026-06-05 14:18 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-30 16:04 [Tarantool-patches] [PATCH luajit 0/5] Various FFI ABI calling conventions fixes Sergey Kaplun via Tarantool-patches
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 1/5] FFI: Unify stack setup for C calls in interpreter Sergey Kaplun via Tarantool-patches
2026-06-05 14:18 ` Sergey Bronnikov via Tarantool-patches [this message]
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 2/5] FFI/ARM64/OSX: Handle non-standard OSX C calling conventions Sergey Kaplun via Tarantool-patches
2026-06-01 11:40 ` Sergey Bronnikov via Tarantool-patches
2026-06-04 9:46 ` Sergey Kaplun via Tarantool-patches
2026-06-05 14:12 ` Sergey Bronnikov via Tarantool-patches
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 3/5] ARM64: Fix pass-by-value struct " Sergey Kaplun via Tarantool-patches
2026-06-01 12:27 ` Sergey Bronnikov via Tarantool-patches
2026-06-04 10:05 ` Sergey Kaplun via Tarantool-patches
2026-06-05 14:14 ` Sergey Bronnikov via Tarantool-patches
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 4/5] FFI: Various ABI and calling convention fixes Sergey Kaplun via Tarantool-patches
2026-06-01 13:02 ` Sergey Bronnikov via Tarantool-patches
2026-06-04 12:06 ` Sergey Kaplun via Tarantool-patches
2026-06-05 14:15 ` Sergey Bronnikov via Tarantool-patches
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 5/5] FFI/MacOS: Fix calling convention for enums Sergey Kaplun via Tarantool-patches
2026-06-01 13:07 ` Sergey Bronnikov via Tarantool-patches
2026-06-02 16:04 ` [Tarantool-patches] [PATCH luajit 0/5] Various FFI ABI calling conventions fixes Evgeniy Temirgaleev via Tarantool-patches
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=22168fcd-7936-4234-9c3d-bf28443024d3@tarantool.org \
--to=tarantool-patches@dev.tarantool.org \
--cc=e.temirgaleev@tarantool.org \
--cc=sergeyb@tarantool.org \
--cc=skaplun@tarantool.org \
--subject='Re: [Tarantool-patches] [PATCH luajit 1/5] FFI: Unify stack setup for C calls in interpreter.' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox