From: sergos via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: Maksim Kokryashkin <max.kokryashkin@gmail.com> Cc: tarantool-patches@dev.tarantool.org Subject: Re: [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding. Date: Tue, 4 Oct 2022 18:48:59 +0300 [thread overview] Message-ID: <382D9CBF-CB51-4D2F-9B4A-FC20F206371E@tarantool.org> (raw) In-Reply-To: <725566e75d613842c9f80021e500cdb86a84b3fa.1664207262.git.max.kokryashkin@gmail.com> Hi! Thanks for the patch! I believe there should be a revamp of the message as per SergeyK comments to the previous patch - no direct links to LuaJIT repo. And frankly, the magic behind the reordering is not clear. Can you elaborate on the matter a little? Regards, Sergos > On 26 Sep 2022, at 18:55, Maksim Kokryashkin <max.kokryashkin@gmail.com> wrote: > > From: Mike Pall <mike> > > Reported by Yichun Zhang. Fixes LuaJIT/LuaJIT#722. > May help towards fixing LuaJIT/LuaJIT#698, too. > > (cherry picked from commit 421c4c798791d27b7f967df39891c4e4fa1d107c) > > The `_Unwind_Find_FDE` fails to find FDE (frame descriptor element) > for `lj_vm_ffi_call` in DWARF unwind info, despite the presence > of its data in `.debug_frame` section. > > Reordering of interpreter stack frame fixes the issue. > > For more info, see https://github.com/LuaJIT/LuaJIT/issues/722 > > Maxim Kokryashkin: > * added the description for the problem > > Needed for tarantool/tarantool#6096 > Needed for tarantool/tarantool#7230 > --- > src/lj_frame.h | 12 +-- > src/vm_arm64.dasc | 189 +++++++++++++++++++++++++++++++++++----------- > 2 files changed, 152 insertions(+), 49 deletions(-) > > diff --git a/src/lj_frame.h b/src/lj_frame.h > index 9fd63fa2..1e4adaa3 100644 > --- a/src/lj_frame.h > +++ b/src/lj_frame.h > @@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ > #endif > #define CFRAME_SHIFT_MULTRES 3 > #elif LJ_TARGET_ARM64 > -#define CFRAME_OFS_ERRF 196 > -#define CFRAME_OFS_NRES 200 > -#define CFRAME_OFS_PREV 160 > -#define CFRAME_OFS_L 176 > -#define CFRAME_OFS_PC 168 > -#define CFRAME_OFS_MULTRES 192 > +#define CFRAME_OFS_ERRF 36 > +#define CFRAME_OFS_NRES 40 > +#define CFRAME_OFS_PREV 0 > +#define CFRAME_OFS_L 16 > +#define CFRAME_OFS_PC 8 > +#define CFRAME_OFS_MULTRES 32 > #define CFRAME_SIZE 208 > #define CFRAME_SHIFT_MULTRES 3 > #elif LJ_TARGET_PPC > diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc > index 313cc94f..ad57bca3 100644 > --- a/src/vm_arm64.dasc > +++ b/src/vm_arm64.dasc > @@ -81,47 +81,49 @@ > | > |.define CFRAME_SPACE, 208 > |//----- 16 byte aligned, <-- sp entering interpreter > -|// Unused [sp, #204] // 32 bit values > -|.define SAVE_NRES, [sp, #200] > -|.define SAVE_ERRF, [sp, #196] > -|.define SAVE_MULTRES, [sp, #192] > -|.define TMPD, [sp, #184] // 64 bit values > -|.define SAVE_L, [sp, #176] > -|.define SAVE_PC, [sp, #168] > -|.define SAVE_CFRAME, [sp, #160] > -|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves > -|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves > -|.define SAVE_LR, [sp, #8] > -|.define SAVE_FP, [sp] > +|.define SAVE_LR, [sp, #200] > +|.define SAVE_FP, [sp, #192] > +|.define SAVE_GPR_, 112 // 112+10*8: 64 bit GPR saves (Not for change, just random thoughts) Why it is not .define SAVE_GPR_, SAVE_FPR_+8*8 > +|.define SAVE_FPR_, 48 // 48+8*8: 64 bit FPR saves > +|// Unused [sp, #44] // 32 bit values > +|.define SAVE_NRES, [sp, #40] > +|.define SAVE_ERRF, [sp, #36] > +|.define SAVE_MULTRES, [sp, #32] > +|.define TMPD, [sp, #24] // 64 bit values > +|.define SAVE_L, [sp, #16] > +|.define SAVE_PC, [sp, #8] > +|.define SAVE_CFRAME, [sp, #0] > |//----- 16 byte aligned, <-- sp while in interpreter. > | > -|.define TMPDofs, #184 > +|.define TMPDofs, #24 > | > |.macro save_, gpr1, gpr2, fpr1, fpr2 > -| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] > -| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] > +| stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] > +| stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] (Not for change, just random thoughts) Should those 14 and 27 magic numbers be a define also, describing the idea behind them? > |.endmacro > |.macro rest_, gpr1, gpr2, fpr1, fpr2 > -| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] > -| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] > +| ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] > +| ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] > |.endmacro > | > |.macro saveregs > -| stp fp, lr, [sp, #-CFRAME_SPACE]! > +| sub sp, sp, # CFRAME_SPACE > +| stp fp, lr, SAVE_FP > | add fp, sp, #0 > -| stp x19, x20, [sp, # SAVE_GPR_] > +| stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] > | save_ 21, 22, 8, 9 > | save_ 23, 24, 10, 11 > | save_ 25, 26, 12, 13 > | save_ 27, 28, 14, 15 > |.endmacro > |.macro restoreregs > -| ldp x19, x20, [sp, # SAVE_GPR_] > +| ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] > | rest_ 21, 22, 8, 9 > | rest_ 23, 24, 10, 11 > | rest_ 25, 26, 12, 13 > | rest_ 27, 28, 14, 15 > -| ldp fp, lr, [sp], # CFRAME_SPACE > +| ldp fp, lr, SAVE_FP > +| add sp, sp, # CFRAME_SPACE > |.endmacro > | > |// Type definitions. Some of these are only used for documentation. > @@ -2125,9 +2127,9 @@ static void build_subroutines(BuildCtx *ctx) > | // Caveat: needs special frame unwinding, see below. > |.if FFI > | .type CCSTATE, CCallState, x19 > - | stp fp, lr, [sp, #-32]! > + | stp x20, CCSTATE, [sp, #-32]! > + | stp fp, lr, [sp, #16] > | add fp, sp, #0 > - | str CCSTATE, [sp, #16] > | mov CCSTATE, x0 > | ldr TMP0w, CCSTATE:x0->spadj > | ldrb TMP1w, CCSTATE->nsp > @@ -2156,8 +2158,8 @@ static void build_subroutines(BuildCtx *ctx) > | stp x0, x1, CCSTATE->gpr[0] > | stp d0, d1, CCSTATE->fpr[0] > | stp d2, d3, CCSTATE->fpr[2] > - | ldr CCSTATE, [sp, #16] > - | ldp fp, lr, [sp], #32 > + | ldp fp, lr, [sp, #16] > + | ldp x20, CCSTATE, [sp], #32 > | ret > |.endif > |// Note: vm_ffi_call must be the last function in this object file! > @@ -2887,7 +2889,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) > case BC_GGET: > | // RA = dst, RC = str_const (~) > case BC_GSET: > - | // RA = dst, RC = str_const (~) > + | // RA = src, RC = str_const (~) > | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] > | mvn RC, RC > | and LFUNC:CARG1, CARG1, #LJ_GCVMASK > @@ -3863,7 +3865,7 @@ static int build_backend(BuildCtx *ctx) > static void emit_asm_debug(BuildCtx *ctx) > { > int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); > - int i, cf = CFRAME_SIZE >> 3; > + int i; > switch (ctx->mode) { > case BUILD_elfasm: > fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); > @@ -3888,14 +3890,14 @@ static void emit_asm_debug(BuildCtx *ctx) > "\t.quad .Lbegin\n" > "\t.quad %d\n" > "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ > - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ > - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ > - fcofs, CFRAME_SIZE, cf, cf-1); > + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ > + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ > + fcofs, CFRAME_SIZE); > for (i = 19; i <= 28; i++) /* offset x19-x28 */ > - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); > + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); > for (i = 8; i <= 15; i++) /* offset d8-d15 */ > fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", > - 64+i, cf-i-4); > + 64+i, i+(3+(28-19+1)-8)); > fprintf(ctx->fp, > "\t.align 3\n" > ".LEFDE0:\n\n"); > @@ -3908,9 +3910,11 @@ static void emit_asm_debug(BuildCtx *ctx) > "\t.quad lj_vm_ffi_call\n" > "\t.quad %d\n" > "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ > - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ > - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ > - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ > + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ > + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ > + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ > + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ > + "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */ > "\t.align 3\n" > ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); > #endif > @@ -3941,14 +3945,14 @@ static void emit_asm_debug(BuildCtx *ctx) > "\t.long %d\n" > "\t.uleb128 0\n" /* augmentation length */ > "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ > - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ > - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ > - fcofs, CFRAME_SIZE, cf, cf-1); > + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ > + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ > + fcofs, CFRAME_SIZE); > for (i = 19; i <= 28; i++) /* offset x19-x28 */ > - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); > + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); > for (i = 8; i <= 15; i++) /* offset d8-d15 */ > fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", > - 64+i, cf-i-4); > + 64+i, i+(3+(28-19+1)-8)); > fprintf(ctx->fp, > "\t.align 3\n" > ".LEFDE2:\n\n"); > @@ -3977,13 +3981,112 @@ static void emit_asm_debug(BuildCtx *ctx) > "\t.long %d\n" > "\t.uleb128 0\n" /* augmentation length */ > "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ > - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ > - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ > - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ > + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ > + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ > + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ > + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ > + "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */ > "\t.align 3\n" > ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); > #endif > break; > +#if !LJ_NO_UNWIND > + case BUILD_machasm: { > +#if LJ_HASFFI > + int fcsize = 0; > +#endif > + int j; > + fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); > + fprintf(ctx->fp, > + "EH_frame1:\n" > + "\t.set L$set$x,LECIEX-LSCIEX\n" > + "\t.long L$set$x\n" > + "LSCIEX:\n" > + "\t.long 0\n" > + "\t.byte 0x1\n" > + "\t.ascii \"zPR\\0\"\n" > + "\t.byte 0x1\n" > + "\t.byte 128-8\n" > + "\t.byte 30\n" /* Return address is in lr. */ > + "\t.byte 6\n" /* augmentation length */ > + "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ > + "\t.long _lj_err_unwind_dwarf@GOTPCREL\n" > + "\t.byte 0x1b\n" /* pcrel|sdata4 */ > + "\t.byte 0xc\n\t.byte 31\n\t.byte 0\n" /* def_cfa sp */ > + "\t.align 3\n" > + "LECIEX:\n\n"); > + for (j = 0; j < ctx->nsym; j++) { > + const char *name = ctx->sym[j].name; > + int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs; > + if (size == 0) continue; > +#if LJ_HASFFI > + if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } > +#endif > + fprintf(ctx->fp, > + "%s.eh:\n" > + "LSFDE%d:\n" > + "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" > + "\t.long L$set$%d\n" > + "LASFDE%d:\n" > + "\t.long LASFDE%d-EH_frame1\n" > + "\t.long %s-.\n" > + "\t.long %d\n" > + "\t.byte 0\n" /* augmentation length */ > + "\t.byte 0xe\n\t.byte %d\n\t.byte 1\n" /* def_cfa_offset */ > + "\t.byte 0x9e\n\t.byte 1\n" /* offset lr */ > + "\t.byte 0x9d\n\t.byte 2\n", /* offset fp */ > + name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE); > + for (i = 19; i <= 28; i++) /* offset x19-x28 */ > + fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19)); > + for (i = 8; i <= 15; i++) /* offset d8-d15 */ > + fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n", > + 64+i, i+(3+(28-19+1)-8)); > + fprintf(ctx->fp, > + "\t.align 3\n" > + "LEFDE%d:\n\n", j); > + } > +#if LJ_HASFFI > + if (fcsize) { > + fprintf(ctx->fp, > + "EH_frame2:\n" > + "\t.set L$set$y,LECIEY-LSCIEY\n" > + "\t.long L$set$y\n" > + "LSCIEY:\n" > + "\t.long 0\n" > + "\t.byte 0x1\n" > + "\t.ascii \"zR\\0\"\n" > + "\t.byte 0x1\n" > + "\t.byte 128-8\n" > + "\t.byte 30\n" /* Return address is in lr. */ > + "\t.byte 1\n" /* augmentation length */ > + "\t.byte 0x1b\n" /* pcrel|sdata4 */ > + "\t.byte 0xc\n\t.byte 31\n\t.byte 0\n" /* def_cfa sp */ > + "\t.align 3\n" > + "LECIEY:\n\n"); > + fprintf(ctx->fp, > + "_lj_vm_ffi_call.eh:\n" > + "LSFDEY:\n" > + "\t.set L$set$yy,LEFDEY-LASFDEY\n" > + "\t.long L$set$yy\n" > + "LASFDEY:\n" > + "\t.long LASFDEY-EH_frame2\n" > + "\t.long _lj_vm_ffi_call-.\n" > + "\t.long %d\n" > + "\t.byte 0\n" /* augmentation length */ > + "\t.byte 0xe\n\t.byte 32\n" /* def_cfa_offset */ > + "\t.byte 0x9e\n\t.byte 1\n" /* offset lr */ > + "\t.byte 0x9d\n\t.byte 2\n" /* offset fp */ > + "\t.byte 0x93\n\t.byte 3\n" /* offset x19 */ > + "\t.byte 0x94\n\t.byte 4\n" /* offset x20 */ > + "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */ > + "\t.align 3\n" > + "LEFDEY:\n\n", fcsize); > + } > +#endif > + fprintf(ctx->fp, ".subsections_via_symbols\n"); > + } > + break; > +#endif > default: > break; > } > -- > 2.32.1 (Apple Git-133) >
next prev parent reply other threads:[~2022-10-04 15:49 UTC|newest] Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-09-26 15:54 [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches 2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches 2022-09-28 8:20 ` sergos via Tarantool-patches 2022-10-03 7:36 ` Sergey Kaplun via Tarantool-patches 2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches 2022-10-03 10:54 ` Sergey Kaplun via Tarantool-patches 2022-10-03 15:58 ` sergos via Tarantool-patches 2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches 2022-10-03 11:08 ` Sergey Kaplun via Tarantool-patches 2022-10-04 8:26 ` sergos via Tarantool-patches 2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding Maksim Kokryashkin via Tarantool-patches 2022-10-04 15:48 ` sergos via Tarantool-patches [this message] 2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches 2022-10-04 15:52 ` sergos via Tarantool-patches 2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches 2022-10-04 16:05 ` sergos via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 4/6] ARM64: Reorder interpreter stack frame and fix unwinding Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=382D9CBF-CB51-4D2F-9B4A-FC20F206371E@tarantool.org \ --to=tarantool-patches@dev.tarantool.org \ --cc=max.kokryashkin@gmail.com \ --cc=sergos@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding.' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox