Hi, Sergey! thanks for the patch! LGTM Sergey On 7/24/25 12:03, Sergey Kaplun wrote: > From: Mike Pall > > (cherry picked from commit ebc4919affbc0f9e8adfb5dede378017c7dd1fdd) > > This patch improves the generation of immediates, which are used in the > cdata alignment or arguments to the function calls. Before this patch, > these immediates are rematerialized to the registers like the following > (if they may be encoded in K13 format for logical data processing > instructions): > | orr w1, wzr, 0x3 > > Instead of recommended [1][2]: > | mov x1, 0x3 > > Since for the case when a constant may be placed in the register with > the single `mov` [3] instruction, it is preferable to encode the `mov` > instruction instead (which is an alias to `orr`, in most cases). > > For the cases when the constant needs at least `mov` and `mov[kn]` > instructions, it is still preferable to try short emitting via `orr` if > possible. > > Sergey Kaplun: > * added the description for the patch > > [1]:https://developer.arm.com/documentation/ddi0602/2025-06/Shared-Pseudocode/aarch64-functions-movwpreferred > [2]:https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/ORR--immediate---Bitwise-OR--immediate-- > [3]:https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/MOV--register---Move-register-value--an-alias-of-ORR--shifted-register-- > > Part of tarantool/tarantool#11691 > --- > src/lj_emit_arm64.h | 64 +++++++++++++++++++++++---------------------- > 1 file changed, 33 insertions(+), 31 deletions(-) > > diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h > index 5c1bc372..2bb93dd9 100644 > --- a/src/lj_emit_arm64.h > +++ b/src/lj_emit_arm64.h > @@ -194,39 +194,41 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) > > static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) > { > - uint32_t k13 = emit_isk13(u64, is64); > - if (k13) { /* Can the constant be represented as a bitmask immediate? */ > - emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); > - } else { > - int i, zeros = 0, ones = 0, neg; > - if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ > - /* Count homogeneous 16 bit fragments. */ > - for (i = 0; i < 4; i++) { > - uint64_t frag = (u64 >> i*16) & 0xffff; > - zeros += (frag == 0); > - ones += (frag == 0xffff); > + int i, zeros = 0, ones = 0, neg; > + if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ > + /* Count homogeneous 16 bit fragments. */ > + for (i = 0; i < 4; i++) { > + uint64_t frag = (u64 >> i*16) & 0xffff; > + zeros += (frag == 0); > + ones += (frag == 0xffff); > + } > + neg = ones > zeros; /* Use MOVN if it pays off. */ > + if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */ > + uint32_t k13 = emit_isk13(u64, is64); > + if (k13) { > + emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); > + return; > } > - neg = ones > zeros; /* Use MOVN if it pays off. */ > - if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { > - int shift = 0, lshift = 0; > - uint64_t n64 = neg ? ~u64 : u64; > - if (n64 != 0) { > - /* Find first/last fragment to be filled. */ > - shift = (63-emit_clz64(n64)) & ~15; > - lshift = emit_ctz64(n64) & ~15; > - } > - /* MOVK requires the original value (u64). */ > - while (shift > lshift) { > - uint32_t u16 = (u64 >> shift) & 0xffff; > - /* Skip fragments that are correctly filled by MOVN/MOVZ. */ > - if (u16 != (neg ? 0xffff : 0)) > - emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); > - shift -= 16; > - } > - /* But MOVN needs an inverted value (n64). */ > - emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | > - A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); > + } > + if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { > + int shift = 0, lshift = 0; > + uint64_t n64 = neg ? ~u64 : u64; > + if (n64 != 0) { > + /* Find first/last fragment to be filled. */ > + shift = (63-emit_clz64(n64)) & ~15; > + lshift = emit_ctz64(n64) & ~15; > + } > + /* MOVK requires the original value (u64). */ > + while (shift > lshift) { > + uint32_t u16 = (u64 >> shift) & 0xffff; > + /* Skip fragments that are correctly filled by MOVN/MOVZ. */ > + if (u16 != (neg ? 0xffff : 0)) > + emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); > + shift -= 16; > } > + /* But MOVN needs an inverted value (n64). */ > + emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | > + A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); > } > } >