From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id 126486ECCC; Sat, 30 May 2026 19:05:18 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 126486ECCC DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1780157118; bh=IvygSTxqGPcIF2WfwsL0YCO1JtS4/zWjmVH8G9B52zQ=; h=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To:Cc: From; b=bJsK6rZ79k1TwUQRX7lk+UyIfGlauC7GPB8MjoIEjq2+rBq9BU5Qq6Hu/eoxX/axE XWhg0S8tDTlded4uoGn/E+lb83Z6PxeR3Pp7SgToxhbTCsBVpFeR2OEN6uijS3t6f0 GWhG6X6M6gutMmV5n/UiR3dCxcoCgwFNtpIvyI2w= Received: from send60.i.mail.ru (send60.i.mail.ru [89.221.237.155]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id A71836ECCC for ; Sat, 30 May 2026 19:04:46 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org A71836ECCC Received: by exim-smtp-5b85998476-w27zp with esmtpa (envelope-from ) id 1wTMB3-00000000NxS-3Nza; Sat, 30 May 2026 19:04:46 +0300 To: Sergey Bronnikov , Evgeniy Temirgaleev Date: Sat, 30 May 2026 19:04:05 +0300 Message-ID: <20260530160409.4043089-2-skaplun@tarantool.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260530160409.4043089-1-skaplun@tarantool.org> References: <20260530160409.4043089-1-skaplun@tarantool.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Mailru-Src: smtp X-4EC0790: 10 X-7564579A: B8F34718100C35BD X-77F55803: 4F1203BC0FB41BD9402BF17F4A9A44D6FE1CAFF6BCFE0381C93F0DC45E80EDCF00894C459B0CD1B93B15D3DF1F19C2B166FA44935DFD4124C41AD750323404C58063F32B492FEB8016B550D20AEFFB87 X-7FA49CB5: FF5795518A3D127A4AD6D5ED66289B5278DA827A17800CE792E16514AF283DFAEA1F7E6F0F101C67BD4B6F7A4D31EC0BCC500DACC3FED6E28638F802B75D45FF8AA50765F7900637AC83A81C8FD4AD23D82A6BABE6F325AC2E85FA5F3EDFCBAA7353EFBB55337566A66E13412CE89A70A3032E010E5B774B83B4A56FFE45714B0EF866B92581B910389733CBF5DBD5E913377AFFFEAFD269176DF2183F8FC7C04CF195F1528592878941B15DA834481FCF19DD082D7633A0EF3E4896CB9E6436389733CBF5DBD5E9D5E8D9A59859A8B6AEEA5BB16A939343CC7F00164DA146DA6F5DAA56C3B73B237318B6A418E8EAB86D1867E19FE14079C09775C1D3CA48CF3D321E7403792E342EB15956EA79C166A417C69337E82CC275ECD9A6C639B01B78DA827A17800CE71F4E386938175FB9731C566533BA786AA5CC5B56E945C8DA X-C1DE0DAB: 0D63561A33F958A529077B530104E94F5002B1117B3ED696E5CA9E49725FE132250A03108B67251B823CB91A9FED034534781492E4B8EEADA4009540AE3588E9C79554A2A72441328621D336A7BC284946AD531847A6065A535571D14F44ED41 X-C8649E89: 1C3962B70DF3F0AD73CAD6646DEDE191716CD42B3DD1D34CAB70F9BE574AE9C625B6776AC983F447FC0B9F89525902EE6F57B2FD27647F25E66C117BDB76D6590D7BA7CE5D83113EB0F423534A0A0ED40BAF34F3160FBB36B182CC036B4E2F3F41D2C63C5CCD737FB8341EE9D5BE9A0A0A53FABAC7FD4FC94EEFE4E6A881BF00CF080B56DFBCF2D3C7CEAA0681F5848F4C41F94D744909CECFA6C6B0C050A61A8CAF69B82BA93681CD72808BE417F3B9E0E7457915DAA85F X-D57D3AED: 3ZO7eAau8CL7WIMRKs4sN3D3tLDjz0dLbV79QFUyzQ2Ujvy7cMT6pYYqY16iZVKkSc3dCLJ7zSJH7+u4VD18S7Vl4ZUrpaVfd2+vE6kuoey4m4VkSEu53w8ahmwBjZKM/YPHZyZHvz5uv+WouB9+ObcCpyrx6l7KImUglyhkEat/+ysWwi0gdhEs0JGjl6ggRWTy1haxBpVdbIX1nthFXMZebaIdHP2ghjoIc/363UZI6Kf1ptIMVRI2994ruhLU7r9coS+3TCw= X-DA7885C5: 13857D450C638342F255D290C0D534F9A204E9A3BB3A31AEA341D30FBBF412530A7FA24AB565860D5B1A4C17EAA7BC4BEF2421ABFA55128DAF83EF9164C44C7E X-Mailru-Sender: 689FA8AB762F7393520AF17B8A65FDE21303CC4278704DE308715E8504D67BFA6C2A228F509489A0E49D44BB4BD9522A059A1ED8796F048DB274557F927329BE89D5A3BC2B10C37545BD1C3CC395C826B4A721A3011E896F X-Mras: Ok Subject: [Tarantool-patches] [PATCH luajit 1/5] FFI: Unify stack setup for C calls in interpreter. X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Sergey Kaplun via Tarantool-patches Reply-To: Sergey Kaplun Cc: tarantool-patches@dev.tarantool.org Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" From: Mike Pall (cherry picked from commit cf903edb30e0cbd620ebd4bac02d4e2b4410fd02) This patch refactors the FFI CCallState structure. The `nsp` field now contains the number of bytes of the stack slot occupied. Also, it effectively decreased the number of arguments for the callee to 31. This patch is required for the next commit. Sergey Kaplun: * added the description for the patch Part of tarantool/tarantool#12480 --- src/lj_ccall.c | 57 +++++++++++++++++++++++++--------------------- src/lj_ccall.h | 7 +++--- src/vm_arm.dasc | 8 +++---- src/vm_arm64.dasc | 8 +++---- src/vm_mips.dasc | 1 - src/vm_mips64.dasc | 1 - src/vm_ppc.dasc | 3 +-- src/vm_x64.dasc | 8 +++---- src/vm_x86.dasc | 22 +++++++++++------- 9 files changed, 62 insertions(+), 53 deletions(-) diff --git a/src/lj_ccall.c b/src/lj_ccall.c index c3b27572..394255eb 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -20,12 +20,15 @@ #if LJ_TARGET_X86 /* -- x86 calling conventions --------------------------------------------- */ +#define CCALL_PUSH(arg) \ + *(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR + #if LJ_ABI_WIN #define CCALL_HANDLE_STRUCTRET \ /* Return structs bigger than 8 by reference (on stack only). */ \ cc->retref = (sz > 8); \ - if (cc->retref) cc->stack[nsp++] = (GPRArg)dp; + if (cc->retref) CCALL_PUSH(dp); #define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET @@ -40,7 +43,7 @@ if (ngpr < maxgpr) \ cc->gpr[ngpr++] = (GPRArg)dp; \ else \ - cc->stack[nsp++] = (GPRArg)dp; \ + CCALL_PUSH(dp); \ } else { /* Struct with single FP field ends up in FPR. */ \ cc->resx87 = ccall_classify_struct(cts, ctr); \ } @@ -56,7 +59,7 @@ if (ngpr < maxgpr) \ cc->gpr[ngpr++] = (GPRArg)dp; \ else \ - cc->stack[nsp++] = (GPRArg)dp; + CCALL_PUSH(dp); #endif @@ -67,7 +70,7 @@ if (ngpr < maxgpr) \ cc->gpr[ngpr++] = (GPRArg)dp; \ else \ - cc->stack[nsp++] = (GPRArg)dp; \ + CCALL_PUSH(dp); \ } #endif @@ -278,8 +281,8 @@ if (ngpr < maxgpr) { \ dp = &cc->gpr[ngpr]; \ if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ ngpr = maxgpr; \ } else { \ ngpr += n; \ @@ -471,8 +474,8 @@ if (ngpr < maxgpr) { \ dp = &cc->gpr[ngpr]; \ if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ ngpr = maxgpr; \ } else { \ ngpr += n; \ @@ -565,8 +568,8 @@ if (ngpr < maxgpr) { \ dp = &cc->gpr[ngpr]; \ if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ ngpr = maxgpr; \ } else { \ ngpr += n; \ @@ -698,10 +701,11 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl, lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); if (ccall_struct_reg(cc, cts, dp, rcl)) { /* Register overflow? Pass on stack. */ - MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; - if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ - cc->nsp = nsp + n; - memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR); + MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR; + if (nsp + sz > CCALL_SIZE_STACK) + return 1; /* Too many arguments. */ + cc->nsp = nsp + sz; + memcpy((uint8_t *)cc->stack + nsp, dp, sz); } return 0; /* Ok. */ } @@ -1026,22 +1030,23 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, } else { sz = CTSIZE_PTR; } - sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); - n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */ + n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */ CCALL_HANDLE_REGARG /* Handle register arguments. */ /* Otherwise pass argument on stack. */ - if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) { - MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1; - nsp = (nsp + align) & ~align; /* Align argument on stack. */ + if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */ + MSize align = (1u << ctype_align(d->info)) - 1; + if (rp) + align = CTSIZE_PTR-1; + nsp = (nsp + align) & ~align; } - if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */ + dp = ((uint8_t *)cc->stack) + nsp; + nsp += n * CTSIZE_PTR; + if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ err_nyi: lj_err_caller(L, LJ_ERR_FFI_NYICALL); } - dp = &cc->stack[nsp]; - nsp += n; isva = 0; done: @@ -1103,10 +1108,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) cc->nfpr = nfpr; /* Required for vararg functions. */ #endif - cc->nsp = nsp; - cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR; - if (nsp > CCALL_SPS_FREE) - cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u); + cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); + cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR; + if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR) + cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u); return gcsteps; } diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 6efa48c7..af7a8e84 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -152,14 +152,15 @@ typedef union FPRArg { LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR); LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR); -#define CCALL_MAXSTACK 32 +#define CCALL_NUM_STACK 31 +#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR) /* -- C call state -------------------------------------------------------- */ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { void (*func)(void); /* Pointer to called function. */ uint32_t spadj; /* Stack pointer adjustment. */ - uint8_t nsp; /* Number of stack slots. */ + uint8_t nsp; /* Number of bytes on stack. */ uint8_t retref; /* Return value by reference. */ #if LJ_TARGET_X64 uint8_t ngpr; /* Number of arguments in GPRs. */ @@ -178,7 +179,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ #endif GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */ - GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */ + GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */ } CCallState; /* -- C call handling ----------------------------------------------------- */ diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 628c1c24..7ed555f8 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -2513,16 +2513,16 @@ static void build_subroutines(BuildCtx *ctx) |.endif | mov r11, sp | sub sp, sp, CARG1 // Readjust stack. - | subs CARG2, CARG2, #1 + | subs CARG2, CARG2, #4 |.if HFABI | vldm RB, {d0-d7} |.endif | ldr RB, CCSTATE->func | bmi >2 |1: // Copy stack slots. - | ldr CARG4, [CARG3, CARG2, lsl #2] - | str CARG4, [sp, CARG2, lsl #2] - | subs CARG2, CARG2, #1 + | ldr CARG4, [CARG3, CARG2] + | str CARG4, [sp, CARG2] + | subs CARG2, CARG2, #4 | bpl <1 |2: | ldrd CARG12, CCSTATE->gpr[0] diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index c35eaf12..57131140 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -2142,14 +2142,14 @@ static void build_subroutines(BuildCtx *ctx) | ldr TMP0w, CCSTATE:x0->spadj | ldrb TMP1w, CCSTATE->nsp | add TMP2, CCSTATE, #offsetof(CCallState, stack) - | subs TMP1, TMP1, #1 + | subs TMP1, TMP1, #8 | ldr TMP3, CCSTATE->func | sub sp, sp, TMP0 | bmi >2 |1: // Copy stack slots - | ldr TMP0, [TMP2, TMP1, lsl #3] - | str TMP0, [sp, TMP1, lsl #3] - | subs TMP1, TMP1, #1 + | ldr TMP0, [TMP2, TMP1] + | str TMP0, [sp, TMP1] + | subs TMP1, TMP1, #8 | bpl <1 |2: | ldp x0, x1, CCSTATE->gpr[0] diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 52366b88..4db9308f 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -2836,7 +2836,6 @@ static void build_subroutines(BuildCtx *ctx) | move TMP2, sp | subu sp, sp, TMP1 | sw ra, -4(TMP2) - | sll CARG2, CARG2, 2 | sw r16, -8(TMP2) | sw CCSTATE, -12(TMP2) | move r16, TMP2 diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index c41b27f4..87e240f7 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -2963,7 +2963,6 @@ static void build_subroutines(BuildCtx *ctx) | move TMP2, sp | dsubu sp, sp, TMP1 | sd ra, -8(TMP2) - | sll CARG2, CARG2, 3 | sd r16, -16(TMP2) | sd CCSTATE, -24(TMP2) | move r16, TMP2 diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index edae7b98..23d6e316 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -3275,14 +3275,13 @@ static void build_subroutines(BuildCtx *ctx) | stw TMP0, 4(sp) | cmpwi cr1, CARG3, 0 | mr TMP2, sp - | addic. CARG2, CARG2, -1 + | addic. CARG2, CARG2, -4 | stwux sp, sp, TMP1 | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. | stw r14, -4(TMP2) | stw CCSTATE, -8(TMP2) | mr r14, TMP2 | la TMP1, CCSTATE->stack - | slwi CARG2, CARG2, 2 | blty >2 | la TMP2, 8(sp) |1: diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 6ac88d70..7f0da677 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -2773,12 +2773,12 @@ static void build_subroutines(BuildCtx *ctx) | | // Copy stack slots. | movzx ecx, byte CCSTATE->nsp - | sub ecx, 1 + | sub ecx, 8 | js >2 |1: - | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] - | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax - | sub ecx, 1 + | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)] + | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax + | sub ecx, 8 | jns <1 |2: | diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index d9234f3b..98546550 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -3348,19 +3348,25 @@ static void build_subroutines(BuildCtx *ctx) | | // Copy stack slots. | movzx ecx, byte CCSTATE->nsp - | sub ecx, 1 + |.if X64 + | sub ecx, 8 | js >2 |1: - |.if X64 - | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] - | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax + | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)] + | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax + | sub ecx, 8 + | jns <1 + |2: |.else - | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] - | mov [esp+ecx*4], eax - |.endif - | sub ecx, 1 + | sub ecx, 4 + | js >2 + |1: + | mov eax, [CCSTATE+ecx+offsetof(CCallState, stack)] + | mov [esp+ecx], eax + | sub ecx, 4 | jns <1 |2: + |.endif | |.if X64 | movzx eax, byte CCSTATE->nfpr -- 2.54.0