[Tarantool-patches] [PATCH luajit 2/3] ARM64: More improvements to the generation of immediates.
Sergey Kaplun
skaplun at tarantool.org
Thu Jul 24 12:03:59 MSK 2025
From: Mike Pall <mike>
(cherry picked from commit 69138082a3166105faa8cbb25fadb1e4298686c0)
This patch refactors the emitting of immediates for the arm64
architecture. The main changes are the following:
* Use `emit_getgl()`, `emit_setgl()` instead of `emit_lso()`, where it
is possible, since it makes the code cleaner.
* The `RID_GL` is allocated for `g` at the start of the trace emitting.
Also, this register is considered as a candidate to be used as a base
for the N-step offset in `emit_kdelta()`.
* The address of `tmptv` is not rematerialized to the register from the
constant not. It is calculated via the adding the corresponding
offset to `RID_GL`.
Sergey Kaplun:
* added the description for the patch
Part of tarantool/tarantool#11691
---
src/lj_asm.c | 3 +++
src/lj_asm_arm64.h | 23 ++++++++---------------
src/lj_emit_arm64.h | 2 +-
3 files changed, 12 insertions(+), 16 deletions(-)
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 9e81dbc9..f163b2e3 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -2113,6 +2113,9 @@ static void asm_setup_regsp(ASMState *as)
#endif
ra_setup(as);
+#if LJ_TARGET_ARM64
+ ra_setkref(as, RID_GL, (intptr_t)J2G(as->J));
+#endif
/* Clear reg/sp for constants. */
for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index a7f059a2..5a6c60b7 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -690,7 +690,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
} else {
/* Otherwise use g->tmptv to hold the TValue. */
asm_tvstore64(as, dest, 0, ref);
- ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest);
+ emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL);
}
}
@@ -1269,17 +1269,13 @@ static void asm_tbar(ASMState *as, IRIns *ir)
{
Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
- Reg gr = ra_allock(as, i64ptr(J2G(as->J)),
- rset_exclude(rset_exclude(RSET_GPR, tab), link));
Reg mark = RID_TMP;
MCLabel l_end = emit_label(as);
emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
- emit_lso(as, A64I_STRx, tab, gr,
- (int32_t)offsetof(global_State, gc.grayagain));
+ emit_setgl(as, tab, gc.grayagain);
emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
- emit_lso(as, A64I_LDRx, link, gr,
- (int32_t)offsetof(global_State, gc.grayagain));
+ emit_getgl(as, link, gc.grayagain);
emit_cond_branch(as, CC_EQ, l_end);
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
@@ -1299,7 +1295,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
args[0] = ASMREF_TMP1; /* global_State *g */
args[1] = ir->op1; /* TValue *tv */
asm_gencall(as, ci, args);
- ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) );
+ emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
obj = IR(ir->op1)->r;
tmp = ra_scratch(as, rset_exclude(allow, obj));
emit_cond_branch(as, CC_EQ, l_end);
@@ -1808,7 +1804,7 @@ static void asm_gc_check(ASMState *as)
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
IRRef args[2];
MCLabel l_end;
- Reg tmp1, tmp2;
+ Reg tmp2;
ra_evictset(as, RSET_SCRATCH);
l_end = emit_label(as);
/* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
@@ -1816,17 +1812,14 @@ static void asm_gc_check(ASMState *as)
args[0] = ASMREF_TMP1; /* global_State *g */
args[1] = ASMREF_TMP2; /* MSize steps */
asm_gencall(as, ci, args);
- tmp1 = ra_releasetmp(as, ASMREF_TMP1);
+ emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
tmp2 = ra_releasetmp(as, ASMREF_TMP2);
emit_loadi(as, tmp2, as->gcsteps);
/* Jump around GC step if GC total < GC threshold. */
emit_cond_branch(as, CC_LS, l_end);
emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
- emit_lso(as, A64I_LDRx, tmp2, tmp1,
- (int32_t)offsetof(global_State, gc.threshold));
- emit_lso(as, A64I_LDRx, RID_TMP, tmp1,
- (int32_t)offsetof(global_State, gc.total));
- ra_allockreg(as, i64ptr(J2G(as->J)), tmp1);
+ emit_getgl(as, tmp2, gc.threshold);
+ emit_getgl(as, RID_TMP, gc.total);
as->gcsteps = 0;
checkmclim(as);
}
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
index 2bb93dd9..184a05ca 100644
--- a/src/lj_emit_arm64.h
+++ b/src/lj_emit_arm64.h
@@ -163,7 +163,7 @@ nopair:
/* Try to find an N-step delta relative to other consts with N < lim. */
static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
{
- RegSet work = ~as->freeset & RSET_GPR;
+ RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL);
if (lim <= 1) return 0; /* Can't beat that. */
while (work) {
Reg r = rset_picktop(work);
--
2.50.0
More information about the Tarantool-patches
mailing list