Hi! Thanks for the review!   >  >>Hi, Maksim! >> >>Thanks for the patch! >> >>LGTM, but I have a bunch of questions to clarify it. >> >>On 28.10.22, Maksim Kokryashkin wrote: >>> Contributed by Edmund Kapusniak. For more info, >>> see #698 and #757. >>> >>> (cherry picked from commit c38747b626b978555324504ec29a110f6b04902f) >>> >>> To allow compiler generate compact unwind info generation >>> for Mach-O, fp must point to the saved fp, and the frame >>> must be specified relative to fp+16. >> >>Is there any link to documentation or source code to inspect this >>behaviour? >Unfortunately, there are no official docs for that. However, there is >a community effort to create one here[1]. Also, this header file from >the Apple’s sources is quite useful. Added both of them to commit >message for those who will get the masculine urge to dive into this. >> >>> ELF unwind info has >>> been updated to also use fp+16 rather than sp+CFRAME_SIZE. >> >>Also, I see that `.byte` notation is replaced with `.uleb128` or >>`.sleb128`. What is the reason? >>Same question for removing names of entries ("_lj_vm_ffi_call.eh:" for >>example). >IINM, it is something Apple expects from the compact unwind info[2]. >> >>> >>> Offset to pointer to personality routine specified as @GOT-. rather >>> than @GOTPCREL. >> >>Does it mean that we use incorrect encoded offset (I see encoding for >>offset is still the same) for our personality routine? >>If so, maybe the other changes are just refactoring? >No, that is not correct. Offset has changed, because any  >`func@GOT` expression is translated into offset. >Moreover, I doubt it is possible to fill in offset to GOT by hand. >What goes after the pointer to the personality routine >is LSDA, according to the Apple’s source[2]. @GOTPCREL and @GOT >are interchangeable most of the time, except for the cases when signed >32-bit RIP references are not enough for you, which seems to be the case here. >> >>> >>> Re-enabled LUAJIT_UNWIND_EXTERNAL by default on OSX. >>> >>> Maxim Kokryashkin: >>> * added the description for the issue and the test >>> >>> Resolves tarantool/tarantool#6096 >>> Part of tarantool/tarantool#7230 >>> --- >>> cmake/SetTargetFlags.cmake | 4 +- >>> src/Makefile.original | 5 +- >>> src/vm_arm64.dasc | 89 ++++++++----------- >>> ...-6096-external-unwinding-on-arm64.test.lua | 13 +++ >>> 4 files changed, 54 insertions(+), 57 deletions(-) >>> create mode 100644 test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua >>> >>> diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake >>> index 8abb6288..7cf26be1 100644 >>> --- a/cmake/SetTargetFlags.cmake >>> +++ b/cmake/SetTargetFlags.cmake >> >> >> >>> diff --git a/src/Makefile.original b/src/Makefile.original >>> index 2d014e43..813d9c12 100644 >>> --- a/src/Makefile.original >>> +++ b/src/Makefile.original >> >> >> >>> diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc >>> index ccfa72bd..f517a808 100644 >>> --- a/src/vm_arm64.dasc >>> +++ b/src/vm_arm64.dasc >> >> >> >>> @@ -504,8 +503,9 @@ static void build_subroutines(BuildCtx *ctx) >>> | ldr GL, L->glref // Setup pointer to global state. >>> | mov BASE, CARG2 >>> | str CARG1, SAVE_PC // Any value outside of bytecode is ok. >>> - | str RC, SAVE_CFRAME >>> - | str fp, L->cframe // Add our C frame to cframe chain. >>> + | add TMP0, sp, #0 >>> + | str RC, SAVE_CFRAME >>> + | str TMP0, L->cframe // Add our C frame to cframe chain. >> >>Why can't we just use the following? >>| str RC, SAVE_CFRAME >>| str sp, L->cframe // Add our C frame to cframe chain. >> >>> | >>> |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). >>> | str L, GL->cur_L >>> @@ -540,8 +540,9 @@ static void build_subroutines(BuildCtx *ctx) >>> | sub RA, RA, RB // Compute -savestack(L, L->top). >>> | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame. >>> | str wzr, SAVE_ERRF // No error function. >>> - | str RC, SAVE_CFRAME >>> - | str fp, L->cframe // Add our C frame to cframe chain. >>> + | add TMP0, sp, #0 >>> + | str RC, SAVE_CFRAME >>> + | str TMP0, L->cframe // Add our C frame to cframe chain. >> >>Ditto. >> >>> | str L, GL->cur_L >>> | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud) >>> | mov BASE, CRET1 >>> @@ -2129,14 +2130,14 @@ static void build_subroutines(BuildCtx *ctx) >> >> >> >>> @@ -3879,7 +3880,7 @@ static void emit_asm_debug(BuildCtx *ctx) >>> "\t.uleb128 0x1\n" >>> "\t.sleb128 -8\n" >>> "\t.byte 30\n" /* Return address is in lr. */ >>> - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ >>> + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ >>> "\t.align 3\n" >>> ".LECIE0:\n\n"); >>> fprintf(ctx->fp, >>> @@ -3889,10 +3890,9 @@ static void emit_asm_debug(BuildCtx *ctx) >>> "\t.long .Lframe0\n" >>> "\t.quad .Lbegin\n" >>> "\t.quad %d\n" >>> - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ >>> "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ >>> "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ >>> - fcofs, CFRAME_SIZE); >>> + fcofs); >>> for (i = 19; i <= 28; i++) /* offset x19-x28 */ >>> fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); >>> for (i = 8; i <= 15; i++) /* offset d8-d15 */ >>> @@ -3909,12 +3909,10 @@ static void emit_asm_debug(BuildCtx *ctx) >>> "\t.long .Lframe0\n" >>> "\t.quad lj_vm_ffi_call\n" >>> "\t.quad %d\n" >>> - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ >>> "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ >>> "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ >>> "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ >>> "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ >>> - "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */ >>> "\t.align 3\n" >>> ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); >>> #endif >>> @@ -3933,7 +3931,7 @@ static void emit_asm_debug(BuildCtx *ctx) >>> "\t.byte 0x1b\n" /* pcrel|sdata4 */ >>> "\t.long lj_err_unwind_dwarf-.\n" >>> "\t.byte 0x1b\n" /* pcrel|sdata4 */ >>> - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ >>> + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ >>> "\t.align 3\n" >>> ".LECIE1:\n\n"); >>> fprintf(ctx->fp, >>> @@ -3944,10 +3942,9 @@ static void emit_asm_debug(BuildCtx *ctx) >>> "\t.long .Lbegin-.\n" >>> "\t.long %d\n" >>> "\t.uleb128 0\n" /* augmentation length */ >>> - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ >>> "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ >>> "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ >>> - fcofs, CFRAME_SIZE); >>> + fcofs); >>> for (i = 19; i <= 28; i++) /* offset x19-x28 */ >>> fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); >>> for (i = 8; i <= 15; i++) /* offset d8-d15 */ >>> @@ -3969,7 +3966,7 @@ static void emit_asm_debug(BuildCtx *ctx) >>> "\t.byte 30\n" /* Return address is in lr. */ >>> "\t.uleb128 1\n" /* augmentation length */ >>> "\t.byte 0x1b\n" /* pcrel|sdata4 */ >>> - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ >>> + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ >>> "\t.align 3\n" >>> ".LECIE2:\n\n"); >>> fprintf(ctx->fp, >>> @@ -3980,18 +3977,15 @@ static void emit_asm_debug(BuildCtx *ctx) >>> "\t.long lj_vm_ffi_call-.\n" >>> "\t.long %d\n" >>> "\t.uleb128 0\n" /* augmentation length */ >>> - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ >>> "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ >>> "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ >>> "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ >>> "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ >>> - "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */ >>> "\t.align 3\n" >>> ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); >>> #endif >>> break; >>> - /* Disabled until someone finds a fix. See #698. */ >>> -#if !LJ_NO_UNWIND && 0 >>> +#if !LJ_NO_UNWIND >>> case BUILD_machasm: { >>> #if LJ_HASFFI >>> int fcsize = 0; >>> @@ -4006,14 +4000,14 @@ static void emit_asm_debug(BuildCtx *ctx) >>> "\t.long 0\n" >>> "\t.byte 0x1\n" >>> "\t.ascii \"zPR\\0\"\n" >>> - "\t.byte 0x1\n" >>> - "\t.byte 128-8\n" >>> + "\t.uleb128 0x1\n" >>> + "\t.sleb128 -8\n" >>> "\t.byte 30\n" /* Return address is in lr. */ >>> - "\t.byte 6\n" /* augmentation length */ >>> + "\t.uleb128 6\n" /* augmentation length */ >>> "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ >>> - "\t.long _lj_err_unwind_dwarf@GOTPCREL\n" >>> + "\t.long _lj_err_unwind_dwarf@GOT-.\n" >>> "\t.byte 0x1b\n" /* pcrel|sdata4 */ >>> - "\t.byte 0xc\n\t.byte 31\n\t.byte 0\n" /* def_cfa sp */ >>> + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ >>> "\t.align 3\n" >>> "LECIEX:\n\n"); >>> for (j = 0; j < ctx->nsym; j++) { >>> @@ -4024,7 +4018,6 @@ static void emit_asm_debug(BuildCtx *ctx) >>> if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } >>> #endif >>> fprintf(ctx->fp, >>> - "%s.eh:\n" >>> "LSFDE%d:\n" >>> "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" >>> "\t.long L$set$%d\n" >>> @@ -4032,15 +4025,14 @@ static void emit_asm_debug(BuildCtx *ctx) >>> "\t.long LASFDE%d-EH_frame1\n" >>> "\t.long %s-.\n" >>> "\t.long %d\n" >>> - "\t.byte 0\n" /* augmentation length */ >>> - "\t.byte 0xe\n\t.byte %d\n\t.byte 1\n" /* def_cfa_offset */ >>> - "\t.byte 0x9e\n\t.byte 1\n" /* offset lr */ >>> - "\t.byte 0x9d\n\t.byte 2\n", /* offset fp */ >>> - name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE); >>> + "\t.uleb128 0\n" /* augmentation length */ >>> + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ >>> + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ >>> + j, j, j, j, j, j, j, name, size); >>> for (i = 19; i <= 28; i++) /* offset x19-x28 */ >>> - fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19)); >>> + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); >>> for (i = 8; i <= 15; i++) /* offset d8-d15 */ >>> - fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n", >>> + fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", >>> 64+i, i+(3+(28-19+1)-8)); >>> fprintf(ctx->fp, >>> "\t.align 3\n" >>> @@ -4056,16 +4048,15 @@ static void emit_asm_debug(BuildCtx *ctx) >>> "\t.long 0\n" >>> "\t.byte 0x1\n" >>> "\t.ascii \"zR\\0\"\n" >>> - "\t.byte 0x1\n" >>> - "\t.byte 128-8\n" >>> + "\t.uleb128 0x1\n" >>> + "\t.sleb128 -8\n" >>> "\t.byte 30\n" /* Return address is in lr. */ >>> - "\t.byte 1\n" /* augmentation length */ >>> + "\t.uleb128 1\n" /* augmentation length */ >>> "\t.byte 0x1b\n" /* pcrel|sdata4 */ >>> - "\t.byte 0xc\n\t.byte 31\n\t.byte 0\n" /* def_cfa sp */ >>> + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ >>> "\t.align 3\n" >>> "LECIEY:\n\n"); >>> fprintf(ctx->fp, >>> - "_lj_vm_ffi_call.eh:\n" >>> "LSFDEY:\n" >>> "\t.set L$set$yy,LEFDEY-LASFDEY\n" >>> "\t.long L$set$yy\n" >>> @@ -4073,13 +4064,11 @@ static void emit_asm_debug(BuildCtx *ctx) >>> "\t.long LASFDEY-EH_frame2\n" >>> "\t.long _lj_vm_ffi_call-.\n" >>> "\t.long %d\n" >>> - "\t.byte 0\n" /* augmentation length */ >>> - "\t.byte 0xe\n\t.byte 32\n" /* def_cfa_offset */ >>> - "\t.byte 0x9e\n\t.byte 1\n" /* offset lr */ >>> - "\t.byte 0x9d\n\t.byte 2\n" /* offset fp */ >>> - "\t.byte 0x93\n\t.byte 3\n" /* offset x19 */ >>> - "\t.byte 0x94\n\t.byte 4\n" /* offset x20 */ >>> - "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */ >>> + "\t.uleb128 0\n" /* augmentation length */ >>> + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ >>> + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ >>> + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ >>> + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ >>> "\t.align 3\n" >>> "LEFDEY:\n\n", fcsize); >>> } >>> diff --git a/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua >>> new file mode 100644 >>> index 00000000..cdeea441 >>> --- /dev/null >>> +++ b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua >> >> >> >>> -- >>> 2.37.0 (Apple Git-136) >>> >> >>-- >>Best regards, >>Sergey Kaplun >[1]:  https://faultlore.com/blah/compact-unwinding/#unwinding-tables-dwarf-cfi >[2]:  https://opensource.apple.com/source/libunwind/libunwind-35.3/include/mach-o/compact_unwind_encoding.h >-- >Best regards, >Maxim Kokryashkin >