From: Maksim Kokryashkin via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: tarantool-patches@dev.tarantool.org, sergos@tarantool.org, skaplun@tarantool.org Subject: [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding. Date: Mon, 26 Sep 2022 18:55:01 +0300 [thread overview] Message-ID: <725566e75d613842c9f80021e500cdb86a84b3fa.1664207262.git.max.kokryashkin@gmail.com> (raw) In-Reply-To: <cover.1664207262.git.max.kokryashkin@gmail.com> From: Mike Pall <mike> Reported by Yichun Zhang. Fixes LuaJIT/LuaJIT#722. May help towards fixing LuaJIT/LuaJIT#698, too. (cherry picked from commit 421c4c798791d27b7f967df39891c4e4fa1d107c) The `_Unwind_Find_FDE` fails to find FDE (frame descriptor element) for `lj_vm_ffi_call` in DWARF unwind info, despite the presence of its data in `.debug_frame` section. Reordering of interpreter stack frame fixes the issue. For more info, see https://github.com/LuaJIT/LuaJIT/issues/722 Maxim Kokryashkin: * added the description for the problem Needed for tarantool/tarantool#6096 Needed for tarantool/tarantool#7230 --- src/lj_frame.h | 12 +-- src/vm_arm64.dasc | 189 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 152 insertions(+), 49 deletions(-) diff --git a/src/lj_frame.h b/src/lj_frame.h index 9fd63fa2..1e4adaa3 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ #endif #define CFRAME_SHIFT_MULTRES 3 #elif LJ_TARGET_ARM64 -#define CFRAME_OFS_ERRF 196 -#define CFRAME_OFS_NRES 200 -#define CFRAME_OFS_PREV 160 -#define CFRAME_OFS_L 176 -#define CFRAME_OFS_PC 168 -#define CFRAME_OFS_MULTRES 192 +#define CFRAME_OFS_ERRF 36 +#define CFRAME_OFS_NRES 40 +#define CFRAME_OFS_PREV 0 +#define CFRAME_OFS_L 16 +#define CFRAME_OFS_PC 8 +#define CFRAME_OFS_MULTRES 32 #define CFRAME_SIZE 208 #define CFRAME_SHIFT_MULTRES 3 #elif LJ_TARGET_PPC diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 313cc94f..ad57bca3 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -81,47 +81,49 @@ | |.define CFRAME_SPACE, 208 |//----- 16 byte aligned, <-- sp entering interpreter -|// Unused [sp, #204] // 32 bit values -|.define SAVE_NRES, [sp, #200] -|.define SAVE_ERRF, [sp, #196] -|.define SAVE_MULTRES, [sp, #192] -|.define TMPD, [sp, #184] // 64 bit values -|.define SAVE_L, [sp, #176] -|.define SAVE_PC, [sp, #168] -|.define SAVE_CFRAME, [sp, #160] -|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves -|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves -|.define SAVE_LR, [sp, #8] -|.define SAVE_FP, [sp] +|.define SAVE_LR, [sp, #200] +|.define SAVE_FP, [sp, #192] +|.define SAVE_GPR_, 112 // 112+10*8: 64 bit GPR saves +|.define SAVE_FPR_, 48 // 48+8*8: 64 bit FPR saves +|// Unused [sp, #44] // 32 bit values +|.define SAVE_NRES, [sp, #40] +|.define SAVE_ERRF, [sp, #36] +|.define SAVE_MULTRES, [sp, #32] +|.define TMPD, [sp, #24] // 64 bit values +|.define SAVE_L, [sp, #16] +|.define SAVE_PC, [sp, #8] +|.define SAVE_CFRAME, [sp, #0] |//----- 16 byte aligned, <-- sp while in interpreter. | -|.define TMPDofs, #184 +|.define TMPDofs, #24 | |.macro save_, gpr1, gpr2, fpr1, fpr2 -| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] -| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] +| stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] +| stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] |.endmacro |.macro rest_, gpr1, gpr2, fpr1, fpr2 -| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] -| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] +| ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] +| ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] |.endmacro | |.macro saveregs -| stp fp, lr, [sp, #-CFRAME_SPACE]! +| sub sp, sp, # CFRAME_SPACE +| stp fp, lr, SAVE_FP | add fp, sp, #0 -| stp x19, x20, [sp, # SAVE_GPR_] +| stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] | save_ 21, 22, 8, 9 | save_ 23, 24, 10, 11 | save_ 25, 26, 12, 13 | save_ 27, 28, 14, 15 |.endmacro |.macro restoreregs -| ldp x19, x20, [sp, # SAVE_GPR_] +| ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] | rest_ 21, 22, 8, 9 | rest_ 23, 24, 10, 11 | rest_ 25, 26, 12, 13 | rest_ 27, 28, 14, 15 -| ldp fp, lr, [sp], # CFRAME_SPACE +| ldp fp, lr, SAVE_FP +| add sp, sp, # CFRAME_SPACE |.endmacro | |// Type definitions. Some of these are only used for documentation. @@ -2125,9 +2127,9 @@ static void build_subroutines(BuildCtx *ctx) | // Caveat: needs special frame unwinding, see below. |.if FFI | .type CCSTATE, CCallState, x19 - | stp fp, lr, [sp, #-32]! + | stp x20, CCSTATE, [sp, #-32]! + | stp fp, lr, [sp, #16] | add fp, sp, #0 - | str CCSTATE, [sp, #16] | mov CCSTATE, x0 | ldr TMP0w, CCSTATE:x0->spadj | ldrb TMP1w, CCSTATE->nsp @@ -2156,8 +2158,8 @@ static void build_subroutines(BuildCtx *ctx) | stp x0, x1, CCSTATE->gpr[0] | stp d0, d1, CCSTATE->fpr[0] | stp d2, d3, CCSTATE->fpr[2] - | ldr CCSTATE, [sp, #16] - | ldp fp, lr, [sp], #32 + | ldp fp, lr, [sp, #16] + | ldp x20, CCSTATE, [sp], #32 | ret |.endif |// Note: vm_ffi_call must be the last function in this object file! @@ -2887,7 +2889,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_GGET: | // RA = dst, RC = str_const (~) case BC_GSET: - | // RA = dst, RC = str_const (~) + | // RA = src, RC = str_const (~) | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] | mvn RC, RC | and LFUNC:CARG1, CARG1, #LJ_GCVMASK @@ -3863,7 +3865,7 @@ static int build_backend(BuildCtx *ctx) static void emit_asm_debug(BuildCtx *ctx) { int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i, cf = CFRAME_SIZE >> 3; + int i; switch (ctx->mode) { case BUILD_elfasm: fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); @@ -3888,14 +3890,14 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.quad .Lbegin\n" "\t.quad %d\n" "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ - fcofs, CFRAME_SIZE, cf, cf-1); + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ + fcofs, CFRAME_SIZE); for (i = 19; i <= 28; i++) /* offset x19-x28 */ - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); for (i = 8; i <= 15; i++) /* offset d8-d15 */ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", - 64+i, cf-i-4); + 64+i, i+(3+(28-19+1)-8)); fprintf(ctx->fp, "\t.align 3\n" ".LEFDE0:\n\n"); @@ -3908,9 +3910,11 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.quad lj_vm_ffi_call\n" "\t.quad %d\n" "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ + "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */ "\t.align 3\n" ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); #endif @@ -3941,14 +3945,14 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long %d\n" "\t.uleb128 0\n" /* augmentation length */ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ - fcofs, CFRAME_SIZE, cf, cf-1); + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ + fcofs, CFRAME_SIZE); for (i = 19; i <= 28; i++) /* offset x19-x28 */ - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); for (i = 8; i <= 15; i++) /* offset d8-d15 */ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", - 64+i, cf-i-4); + 64+i, i+(3+(28-19+1)-8)); fprintf(ctx->fp, "\t.align 3\n" ".LEFDE2:\n\n"); @@ -3977,13 +3981,112 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.long %d\n" "\t.uleb128 0\n" /* augmentation length */ "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ + "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */ "\t.align 3\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); #endif break; +#if !LJ_NO_UNWIND + case BUILD_machasm: { +#if LJ_HASFFI + int fcsize = 0; +#endif + int j; + fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); + fprintf(ctx->fp, + "EH_frame1:\n" + "\t.set L$set$x,LECIEX-LSCIEX\n" + "\t.long L$set$x\n" + "LSCIEX:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.ascii \"zPR\\0\"\n" + "\t.byte 0x1\n" + "\t.byte 128-8\n" + "\t.byte 30\n" /* Return address is in lr. */ + "\t.byte 6\n" /* augmentation length */ + "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ + "\t.long _lj_err_unwind_dwarf@GOTPCREL\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.byte 31\n\t.byte 0\n" /* def_cfa sp */ + "\t.align 3\n" + "LECIEX:\n\n"); + for (j = 0; j < ctx->nsym; j++) { + const char *name = ctx->sym[j].name; + int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs; + if (size == 0) continue; +#if LJ_HASFFI + if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } +#endif + fprintf(ctx->fp, + "%s.eh:\n" + "LSFDE%d:\n" + "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" + "\t.long L$set$%d\n" + "LASFDE%d:\n" + "\t.long LASFDE%d-EH_frame1\n" + "\t.long %s-.\n" + "\t.long %d\n" + "\t.byte 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.byte %d\n\t.byte 1\n" /* def_cfa_offset */ + "\t.byte 0x9e\n\t.byte 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.byte 2\n", /* offset fp */ + name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE); + for (i = 19; i <= 28; i++) /* offset x19-x28 */ + fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19)); + for (i = 8; i <= 15; i++) /* offset d8-d15 */ + fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n", + 64+i, i+(3+(28-19+1)-8)); + fprintf(ctx->fp, + "\t.align 3\n" + "LEFDE%d:\n\n", j); + } +#if LJ_HASFFI + if (fcsize) { + fprintf(ctx->fp, + "EH_frame2:\n" + "\t.set L$set$y,LECIEY-LSCIEY\n" + "\t.long L$set$y\n" + "LSCIEY:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.ascii \"zR\\0\"\n" + "\t.byte 0x1\n" + "\t.byte 128-8\n" + "\t.byte 30\n" /* Return address is in lr. */ + "\t.byte 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.byte 31\n\t.byte 0\n" /* def_cfa sp */ + "\t.align 3\n" + "LECIEY:\n\n"); + fprintf(ctx->fp, + "_lj_vm_ffi_call.eh:\n" + "LSFDEY:\n" + "\t.set L$set$yy,LEFDEY-LASFDEY\n" + "\t.long L$set$yy\n" + "LASFDEY:\n" + "\t.long LASFDEY-EH_frame2\n" + "\t.long _lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.byte 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.byte 32\n" /* def_cfa_offset */ + "\t.byte 0x9e\n\t.byte 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.byte 2\n" /* offset fp */ + "\t.byte 0x93\n\t.byte 3\n" /* offset x19 */ + "\t.byte 0x94\n\t.byte 4\n" /* offset x20 */ + "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */ + "\t.align 3\n" + "LEFDEY:\n\n", fcsize); + } +#endif + fprintf(ctx->fp, ".subsections_via_symbols\n"); + } + break; +#endif default: break; } -- 2.32.1 (Apple Git-133)
next prev parent reply other threads:[~2022-09-26 15:57 UTC|newest] Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-09-26 15:54 [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches 2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches 2022-09-28 8:20 ` sergos via Tarantool-patches 2022-10-03 7:36 ` Sergey Kaplun via Tarantool-patches 2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches 2022-10-03 10:54 ` Sergey Kaplun via Tarantool-patches 2022-10-03 15:58 ` sergos via Tarantool-patches 2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches 2022-10-03 11:08 ` Sergey Kaplun via Tarantool-patches 2022-10-04 8:26 ` sergos via Tarantool-patches 2022-09-26 15:55 ` Maksim Kokryashkin via Tarantool-patches [this message] 2022-10-04 15:48 ` [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding sergos via Tarantool-patches 2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches 2022-10-04 15:52 ` sergos via Tarantool-patches 2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches 2022-10-04 16:05 ` sergos via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 4/6] ARM64: Reorder interpreter stack frame and fix unwinding Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches 2022-10-06 9:48 ` [Tarantool-patches] [PATCH luajit v2 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=725566e75d613842c9f80021e500cdb86a84b3fa.1664207262.git.max.kokryashkin@gmail.com \ --to=tarantool-patches@dev.tarantool.org \ --cc=max.kokryashkin@gmail.com \ --cc=sergos@tarantool.org \ --cc=skaplun@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding.' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox