From: Sergey Kaplun via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Sergey Bronnikov <sergeyb@tarantool.org>,
Evgeniy Temirgaleev <e.temirgaleev@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH luajit 1/5] FFI: Unify stack setup for C calls in interpreter.
Date: Sat, 30 May 2026 19:04:05 +0300 [thread overview]
Message-ID: <20260530160409.4043089-2-skaplun@tarantool.org> (raw)
In-Reply-To: <20260530160409.4043089-1-skaplun@tarantool.org>
From: Mike Pall <mike>
(cherry picked from commit cf903edb30e0cbd620ebd4bac02d4e2b4410fd02)
This patch refactors the FFI CCallState structure. The `nsp` field now
contains the number of bytes of the stack slot occupied. Also, it
effectively decreased the number of arguments for the callee to 31.
This patch is required for the next commit.
Sergey Kaplun:
* added the description for the patch
Part of tarantool/tarantool#12480
---
src/lj_ccall.c | 57 +++++++++++++++++++++++++---------------------
src/lj_ccall.h | 7 +++---
src/vm_arm.dasc | 8 +++----
src/vm_arm64.dasc | 8 +++----
src/vm_mips.dasc | 1 -
src/vm_mips64.dasc | 1 -
src/vm_ppc.dasc | 3 +--
src/vm_x64.dasc | 8 +++----
src/vm_x86.dasc | 22 +++++++++++-------
9 files changed, 62 insertions(+), 53 deletions(-)
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index c3b27572..394255eb 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -20,12 +20,15 @@
#if LJ_TARGET_X86
/* -- x86 calling conventions --------------------------------------------- */
+#define CCALL_PUSH(arg) \
+ *(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR
+
#if LJ_ABI_WIN
#define CCALL_HANDLE_STRUCTRET \
/* Return structs bigger than 8 by reference (on stack only). */ \
cc->retref = (sz > 8); \
- if (cc->retref) cc->stack[nsp++] = (GPRArg)dp;
+ if (cc->retref) CCALL_PUSH(dp);
#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
@@ -40,7 +43,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
- cc->stack[nsp++] = (GPRArg)dp; \
+ CCALL_PUSH(dp); \
} else { /* Struct with single FP field ends up in FPR. */ \
cc->resx87 = ccall_classify_struct(cts, ctr); \
}
@@ -56,7 +59,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
- cc->stack[nsp++] = (GPRArg)dp;
+ CCALL_PUSH(dp);
#endif
@@ -67,7 +70,7 @@
if (ngpr < maxgpr) \
cc->gpr[ngpr++] = (GPRArg)dp; \
else \
- cc->stack[nsp++] = (GPRArg)dp; \
+ CCALL_PUSH(dp); \
}
#endif
@@ -278,8 +281,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
- nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
- if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
+ nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
+ if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
@@ -471,8 +474,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
- nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
- if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
+ nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
+ if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
@@ -565,8 +568,8 @@
if (ngpr < maxgpr) { \
dp = &cc->gpr[ngpr]; \
if (ngpr + n > maxgpr) { \
- nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
- if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
+ nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
+ if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
ngpr = maxgpr; \
} else { \
ngpr += n; \
@@ -698,10 +701,11 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
if (ccall_struct_reg(cc, cts, dp, rcl)) {
/* Register overflow? Pass on stack. */
- MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
- if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */
- cc->nsp = nsp + n;
- memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR);
+ MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR;
+ if (nsp + sz > CCALL_SIZE_STACK)
+ return 1; /* Too many arguments. */
+ cc->nsp = nsp + sz;
+ memcpy((uint8_t *)cc->stack + nsp, dp, sz);
}
return 0; /* Ok. */
}
@@ -1026,22 +1030,23 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
} else {
sz = CTSIZE_PTR;
}
- sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
- n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
+ n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
CCALL_HANDLE_REGARG /* Handle register arguments. */
/* Otherwise pass argument on stack. */
- if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
- MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
- nsp = (nsp + align) & ~align; /* Align argument on stack. */
+ if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */
+ MSize align = (1u << ctype_align(d->info)) - 1;
+ if (rp)
+ align = CTSIZE_PTR-1;
+ nsp = (nsp + align) & ~align;
}
- if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */
+ dp = ((uint8_t *)cc->stack) + nsp;
+ nsp += n * CTSIZE_PTR;
+ if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
err_nyi:
lj_err_caller(L, LJ_ERR_FFI_NYICALL);
}
- dp = &cc->stack[nsp];
- nsp += n;
isva = 0;
done:
@@ -1103,10 +1108,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */
#endif
- cc->nsp = nsp;
- cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR;
- if (nsp > CCALL_SPS_FREE)
- cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u);
+ cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
+ cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR;
+ if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR)
+ cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u);
return gcsteps;
}
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 6efa48c7..af7a8e84 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -152,14 +152,15 @@ typedef union FPRArg {
LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
-#define CCALL_MAXSTACK 32
+#define CCALL_NUM_STACK 31
+#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR)
/* -- C call state -------------------------------------------------------- */
typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
void (*func)(void); /* Pointer to called function. */
uint32_t spadj; /* Stack pointer adjustment. */
- uint8_t nsp; /* Number of stack slots. */
+ uint8_t nsp; /* Number of bytes on stack. */
uint8_t retref; /* Return value by reference. */
#if LJ_TARGET_X64
uint8_t ngpr; /* Number of arguments in GPRs. */
@@ -178,7 +179,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */
#endif
GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */
- GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */
+ GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */
} CCallState;
/* -- C call handling ----------------------------------------------------- */
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 628c1c24..7ed555f8 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -2513,16 +2513,16 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| mov r11, sp
| sub sp, sp, CARG1 // Readjust stack.
- | subs CARG2, CARG2, #1
+ | subs CARG2, CARG2, #4
|.if HFABI
| vldm RB, {d0-d7}
|.endif
| ldr RB, CCSTATE->func
| bmi >2
|1: // Copy stack slots.
- | ldr CARG4, [CARG3, CARG2, lsl #2]
- | str CARG4, [sp, CARG2, lsl #2]
- | subs CARG2, CARG2, #1
+ | ldr CARG4, [CARG3, CARG2]
+ | str CARG4, [sp, CARG2]
+ | subs CARG2, CARG2, #4
| bpl <1
|2:
| ldrd CARG12, CCSTATE->gpr[0]
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index c35eaf12..57131140 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -2142,14 +2142,14 @@ static void build_subroutines(BuildCtx *ctx)
| ldr TMP0w, CCSTATE:x0->spadj
| ldrb TMP1w, CCSTATE->nsp
| add TMP2, CCSTATE, #offsetof(CCallState, stack)
- | subs TMP1, TMP1, #1
+ | subs TMP1, TMP1, #8
| ldr TMP3, CCSTATE->func
| sub sp, sp, TMP0
| bmi >2
|1: // Copy stack slots
- | ldr TMP0, [TMP2, TMP1, lsl #3]
- | str TMP0, [sp, TMP1, lsl #3]
- | subs TMP1, TMP1, #1
+ | ldr TMP0, [TMP2, TMP1]
+ | str TMP0, [sp, TMP1]
+ | subs TMP1, TMP1, #8
| bpl <1
|2:
| ldp x0, x1, CCSTATE->gpr[0]
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 52366b88..4db9308f 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -2836,7 +2836,6 @@ static void build_subroutines(BuildCtx *ctx)
| move TMP2, sp
| subu sp, sp, TMP1
| sw ra, -4(TMP2)
- | sll CARG2, CARG2, 2
| sw r16, -8(TMP2)
| sw CCSTATE, -12(TMP2)
| move r16, TMP2
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index c41b27f4..87e240f7 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -2963,7 +2963,6 @@ static void build_subroutines(BuildCtx *ctx)
| move TMP2, sp
| dsubu sp, sp, TMP1
| sd ra, -8(TMP2)
- | sll CARG2, CARG2, 3
| sd r16, -16(TMP2)
| sd CCSTATE, -24(TMP2)
| move r16, TMP2
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index edae7b98..23d6e316 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -3275,14 +3275,13 @@ static void build_subroutines(BuildCtx *ctx)
| stw TMP0, 4(sp)
| cmpwi cr1, CARG3, 0
| mr TMP2, sp
- | addic. CARG2, CARG2, -1
+ | addic. CARG2, CARG2, -4
| stwux sp, sp, TMP1
| crnot 4*cr1+eq, 4*cr1+eq // For vararg calls.
| stw r14, -4(TMP2)
| stw CCSTATE, -8(TMP2)
| mr r14, TMP2
| la TMP1, CCSTATE->stack
- | slwi CARG2, CARG2, 2
| blty >2
| la TMP2, 8(sp)
|1:
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 6ac88d70..7f0da677 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -2773,12 +2773,12 @@ static void build_subroutines(BuildCtx *ctx)
|
| // Copy stack slots.
| movzx ecx, byte CCSTATE->nsp
- | sub ecx, 1
+ | sub ecx, 8
| js >2
|1:
- | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
- | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
- | sub ecx, 1
+ | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
+ | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
+ | sub ecx, 8
| jns <1
|2:
|
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index d9234f3b..98546550 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -3348,19 +3348,25 @@ static void build_subroutines(BuildCtx *ctx)
|
| // Copy stack slots.
| movzx ecx, byte CCSTATE->nsp
- | sub ecx, 1
+ |.if X64
+ | sub ecx, 8
| js >2
|1:
- |.if X64
- | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
- | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
+ | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
+ | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
+ | sub ecx, 8
+ | jns <1
+ |2:
|.else
- | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
- | mov [esp+ecx*4], eax
- |.endif
- | sub ecx, 1
+ | sub ecx, 4
+ | js >2
+ |1:
+ | mov eax, [CCSTATE+ecx+offsetof(CCallState, stack)]
+ | mov [esp+ecx], eax
+ | sub ecx, 4
| jns <1
|2:
+ |.endif
|
|.if X64
| movzx eax, byte CCSTATE->nfpr
--
2.54.0
next prev parent reply other threads:[~2026-05-30 16:05 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-30 16:04 [Tarantool-patches] [PATCH luajit 0/5] Various FFI ABI calling conventions fixes Sergey Kaplun via Tarantool-patches
2026-05-30 16:04 ` Sergey Kaplun via Tarantool-patches [this message]
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 2/5] FFI/ARM64/OSX: Handle non-standard OSX C calling conventions Sergey Kaplun via Tarantool-patches
2026-06-01 11:40 ` Sergey Bronnikov via Tarantool-patches
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 3/5] ARM64: Fix pass-by-value struct " Sergey Kaplun via Tarantool-patches
2026-06-01 12:27 ` Sergey Bronnikov via Tarantool-patches
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 4/5] FFI: Various ABI and calling convention fixes Sergey Kaplun via Tarantool-patches
2026-06-01 13:02 ` Sergey Bronnikov via Tarantool-patches
2026-05-30 16:04 ` [Tarantool-patches] [PATCH luajit 5/5] FFI/MacOS: Fix calling convention for enums Sergey Kaplun via Tarantool-patches
2026-06-01 13:07 ` Sergey Bronnikov via Tarantool-patches
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260530160409.4043089-2-skaplun@tarantool.org \
--to=tarantool-patches@dev.tarantool.org \
--cc=e.temirgaleev@tarantool.org \
--cc=sergeyb@tarantool.org \
--cc=skaplun@tarantool.org \
--subject='Re: [Tarantool-patches] [PATCH luajit 1/5] FFI: Unify stack setup for C calls in interpreter.' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox