Hi, Sergey, thanks for the patch! LGTM Sergey On 6/11/25 19:01, Sergey Kaplun wrote: > From: Mike Pall > > Thanks to Zhongwei Yao. > > (cherry picked from commit 9493acc1a28f15b0ac4453e716f33436186c7acd) > > When fusing two LDR (STR) instructions to the single LDP (STP) > instruction, the arm64 emitter shifts the offset value to encode the > immediate. In the case when the offset is negative, the resulting field > value exceeds the 7-bit length of the immediate, see [1]. This results > in the invalid instruction decoding. > > This patch fixes this by masking the value with the 7-bit-width mask > `0x7f`. > > Sergey Kaplun: > * added the description and the test for the problem > > [1]:https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/LDP--Load-pair-of-registers- > > Part of tarantool/tarantool#11278 > --- > > Related issues: > *https://github.com/LuaJIT/LuaJIT/pull/1028 > *https://github.com/tarantool/tarantool/issues/11278 > Branch:https://github.com/tarantool/luajit/tree/skaplun/lj-1028-ldr-fusion-to-ldp-negative-offset > > src/lj_emit_arm64.h | 2 +- > ...ldr-fusion-to-ldp-negative-offset.test.lua | 45 +++++++++++++++++++ > 2 files changed, 46 insertions(+), 1 deletion(-) > create mode 100644 test/tarantool-tests/lj-1028-ldr-fusion-to-ldp-negative-offset.test.lua > > diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h > index e1a9d3e4..30cd3505 100644 > --- a/src/lj_emit_arm64.h > +++ b/src/lj_emit_arm64.h > @@ -143,7 +143,7 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) > goto nopair; > } > if (ofsm >= (int)((unsigned int)-64< - *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | > + *as->mcp = aip | A64F_N(rn) | (((ofsm >> sc) & 0x7f) << 15) | > (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); > return; > } > diff --git a/test/tarantool-tests/lj-1028-ldr-fusion-to-ldp-negative-offset.test.lua b/test/tarantool-tests/lj-1028-ldr-fusion-to-ldp-negative-offset.test.lua > new file mode 100644 > index 00000000..1ba28449 > --- /dev/null > +++ b/test/tarantool-tests/lj-1028-ldr-fusion-to-ldp-negative-offset.test.lua > @@ -0,0 +1,45 @@ > +local tap = require('tap') > +local ffi = require('ffi') > + > +-- This test demonstrates LuaJIT's incorrect emitting of LDP > +-- instruction with negative offset fused from LDR on arm64. > +-- See alsohttps://github.com/LuaJIT/LuaJIT/pull/1028. > +local test = tap.test('lj-1028-ldr-fusion-to-ldp-negative-offset'):skipcond({ > + ['Test requires JIT enabled'] = not jit.status(), > +}) > + > +test:plan(1) > + > +-- Amount of iterations to compile and start the trace. > +local N_ITERATIONS = 4 > + > +ffi.cdef[[ > + typedef struct data { > + int32_t m1; > + int32_t m2; > + } data; > +]] > + > +local data_arr = ffi.new('data[' .. N_ITERATIONS .. ']') > + > +local const_data_ptr = ffi.typeof('const data *') > +local data = ffi.cast(const_data_ptr, data_arr) > + > +local results = {} > + > +jit.opt.start('hotloop=1') > + > +for i = 1, N_ITERATIONS do > + -- Pair loading from the negative offset generates an invalid > + -- instruction on AArch64 before this patch. > + local field = data[i - 1] > + local m1 = field.m1 > + local m2 = field.m2 > + > + -- Use loaded values to avoid DCE. > + results[i] = m1 + m2 > +end > + > +test:samevalues(results, 'no invalid instruction') > + > +test:done(true)