Hi!
Thanks for the review!
 
 
Hi, Maksim!

Thanks for the patch!

LGTM, but I have a bunch of questions to clarify it.

On 28.10.22, Maksim Kokryashkin wrote:
> Contributed by Edmund Kapusniak. For more info,
> see #698 and #757.
>
> (cherry picked from commit c38747b626b978555324504ec29a110f6b04902f)
>
> To allow compiler generate compact unwind info generation
> for Mach-O, fp must point to the saved fp, and the frame
> must be specified relative to fp+16.

Is there any link to documentation or source code to inspect this
behaviour?
Unfortunately, there are no official docs for that. However, there is
a community effort to create one here[1]. Also, this header file from
the Apple’s sources is quite useful. Added both of them to commit
message for those who will get the masculine urge to dive into this.

> ELF unwind info has
> been updated to also use fp+16 rather than sp+CFRAME_SIZE.

Also, I see that `.byte` notation is replaced with `.uleb128` or
`.sleb128`. What is the reason?
Same question for removing names of entries ("_lj_vm_ffi_call.eh:" for
example).
IINM, it is something Apple expects from the compact unwind info[2].

>
> Offset to pointer to personality routine specified as @GOT-. rather
> than @GOTPCREL.

Does it mean that we use incorrect encoded offset (I see encoding for
offset is still the same) for our personality routine?
If so, maybe the other changes are just refactoring?
No, that is not correct. Offset has changed, because any 
`func@GOT` expression is translated into offset.
Moreover, I doubt it is possible to fill in offset to GOT by hand.
What goes after the pointer to the personality routine
is LSDA, according to the Apple’s source[2]. @GOTPCREL and @GOT
are interchangeable most of the time, except for the cases when signed
32-bit RIP references are not enough for you, which seems to be the case here.

>
> Re-enabled LUAJIT_UNWIND_EXTERNAL by default on OSX.
>
> Maxim Kokryashkin:
> * added the description for the issue and the test
>
> Resolves tarantool/tarantool#6096
> Part of tarantool/tarantool#7230
> ---
> cmake/SetTargetFlags.cmake | 4 +-
> src/Makefile.original | 5 +-
> src/vm_arm64.dasc | 89 ++++++++-----------
> ...-6096-external-unwinding-on-arm64.test.lua | 13 +++
> 4 files changed, 54 insertions(+), 57 deletions(-)
> create mode 100644 test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua
>
> diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
> index 8abb6288..7cf26be1 100644
> --- a/cmake/SetTargetFlags.cmake
> +++ b/cmake/SetTargetFlags.cmake

<snipped>

> diff --git a/src/Makefile.original b/src/Makefile.original
> index 2d014e43..813d9c12 100644
> --- a/src/Makefile.original
> +++ b/src/Makefile.original

<snipped>

> diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
> index ccfa72bd..f517a808 100644
> --- a/src/vm_arm64.dasc
> +++ b/src/vm_arm64.dasc

<snipped>

> @@ -504,8 +503,9 @@ static void build_subroutines(BuildCtx *ctx)
> | ldr GL, L->glref // Setup pointer to global state.
> | mov BASE, CARG2
> | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
> - | str RC, SAVE_CFRAME
> - | str fp, L->cframe // Add our C frame to cframe chain.
> + | add TMP0, sp, #0
> + | str RC, SAVE_CFRAME
> + | str TMP0, L->cframe // Add our C frame to cframe chain.

Why can't we just use the following?
| str RC, SAVE_CFRAME
| str sp, L->cframe // Add our C frame to cframe chain.

> |
> |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
> | str L, GL->cur_L
> @@ -540,8 +540,9 @@ static void build_subroutines(BuildCtx *ctx)
> | sub RA, RA, RB // Compute -savestack(L, L->top).
> | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame.
> | str wzr, SAVE_ERRF // No error function.
> - | str RC, SAVE_CFRAME
> - | str fp, L->cframe // Add our C frame to cframe chain.
> + | add TMP0, sp, #0
> + | str RC, SAVE_CFRAME
> + | str TMP0, L->cframe // Add our C frame to cframe chain.

Ditto.

> | str L, GL->cur_L
> | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
> | mov BASE, CRET1
> @@ -2129,14 +2130,14 @@ static void build_subroutines(BuildCtx *ctx)

<snipped>

> @@ -3879,7 +3880,7 @@ static void emit_asm_debug(BuildCtx *ctx)
> "\t.uleb128 0x1\n"
> "\t.sleb128 -8\n"
> "\t.byte 30\n" /* Return address is in lr. */
> - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
> + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
> "\t.align 3\n"
> ".LECIE0:\n\n");
> fprintf(ctx->fp,
> @@ -3889,10 +3890,9 @@ static void emit_asm_debug(BuildCtx *ctx)
> "\t.long .Lframe0\n"
> "\t.quad .Lbegin\n"
> "\t.quad %d\n"
> - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
> "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
> "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
> - fcofs, CFRAME_SIZE);
> + fcofs);
> for (i = 19; i <= 28; i++) /* offset x19-x28 */
> fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
> for (i = 8; i <= 15; i++) /* offset d8-d15 */
> @@ -3909,12 +3909,10 @@ static void emit_asm_debug(BuildCtx *ctx)
> "\t.long .Lframe0\n"
> "\t.quad lj_vm_ffi_call\n"
> "\t.quad %d\n"
> - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
> "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
> "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
> "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
> "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
> - "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */
> "\t.align 3\n"
> ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
> #endif
> @@ -3933,7 +3931,7 @@ static void emit_asm_debug(BuildCtx *ctx)
> "\t.byte 0x1b\n" /* pcrel|sdata4 */
> "\t.long lj_err_unwind_dwarf-.\n"
> "\t.byte 0x1b\n" /* pcrel|sdata4 */
> - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
> + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
> "\t.align 3\n"
> ".LECIE1:\n\n");
> fprintf(ctx->fp,
> @@ -3944,10 +3942,9 @@ static void emit_asm_debug(BuildCtx *ctx)
> "\t.long .Lbegin-.\n"
> "\t.long %d\n"
> "\t.uleb128 0\n" /* augmentation length */
> - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
> "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
> "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
> - fcofs, CFRAME_SIZE);
> + fcofs);
> for (i = 19; i <= 28; i++) /* offset x19-x28 */
> fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
> for (i = 8; i <= 15; i++) /* offset d8-d15 */
> @@ -3969,7 +3966,7 @@ static void emit_asm_debug(BuildCtx *ctx)
> "\t.byte 30\n" /* Return address is in lr. */
> "\t.uleb128 1\n" /* augmentation length */
> "\t.byte 0x1b\n" /* pcrel|sdata4 */
> - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
> + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
> "\t.align 3\n"
> ".LECIE2:\n\n");
> fprintf(ctx->fp,
> @@ -3980,18 +3977,15 @@ static void emit_asm_debug(BuildCtx *ctx)
> "\t.long lj_vm_ffi_call-.\n"
> "\t.long %d\n"
> "\t.uleb128 0\n" /* augmentation length */
> - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
> "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
> "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
> "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
> "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
> - "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */
> "\t.align 3\n"
> ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
> #endif
> break;
> - /* Disabled until someone finds a fix. See #698. */
> -#if !LJ_NO_UNWIND && 0
> +#if !LJ_NO_UNWIND
> case BUILD_machasm: {
> #if LJ_HASFFI
> int fcsize = 0;
> @@ -4006,14 +4000,14 @@ static void emit_asm_debug(BuildCtx *ctx)
> "\t.long 0\n"
> "\t.byte 0x1\n"
> "\t.ascii \"zPR\\0\"\n"
> - "\t.byte 0x1\n"
> - "\t.byte 128-8\n"
> + "\t.uleb128 0x1\n"
> + "\t.sleb128 -8\n"
> "\t.byte 30\n" /* Return address is in lr. */
> - "\t.byte 6\n" /* augmentation length */
> + "\t.uleb128 6\n" /* augmentation length */
> "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
> - "\t.long _lj_err_unwind_dwarf@GOTPCREL\n"
> + "\t.long _lj_err_unwind_dwarf@GOT-.\n"
> "\t.byte 0x1b\n" /* pcrel|sdata4 */
> - "\t.byte 0xc\n\t.byte 31\n\t.byte 0\n" /* def_cfa sp */
> + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
> "\t.align 3\n"
> "LECIEX:\n\n");
> for (j = 0; j < ctx->nsym; j++) {
> @@ -4024,7 +4018,6 @@ static void emit_asm_debug(BuildCtx *ctx)
> if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
> #endif
> fprintf(ctx->fp,
> - "%s.eh:\n"
> "LSFDE%d:\n"
> "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
> "\t.long L$set$%d\n"
> @@ -4032,15 +4025,14 @@ static void emit_asm_debug(BuildCtx *ctx)
> "\t.long LASFDE%d-EH_frame1\n"
> "\t.long %s-.\n"
> "\t.long %d\n"
> - "\t.byte 0\n" /* augmentation length */
> - "\t.byte 0xe\n\t.byte %d\n\t.byte 1\n" /* def_cfa_offset */
> - "\t.byte 0x9e\n\t.byte 1\n" /* offset lr */
> - "\t.byte 0x9d\n\t.byte 2\n", /* offset fp */
> - name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE);
> + "\t.uleb128 0\n" /* augmentation length */
> + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
> + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
> + j, j, j, j, j, j, j, name, size);
> for (i = 19; i <= 28; i++) /* offset x19-x28 */
> - fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19));
> + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
> for (i = 8; i <= 15; i++) /* offset d8-d15 */
> - fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n",
> + fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
> 64+i, i+(3+(28-19+1)-8));
> fprintf(ctx->fp,
> "\t.align 3\n"
> @@ -4056,16 +4048,15 @@ static void emit_asm_debug(BuildCtx *ctx)
> "\t.long 0\n"
> "\t.byte 0x1\n"
> "\t.ascii \"zR\\0\"\n"
> - "\t.byte 0x1\n"
> - "\t.byte 128-8\n"
> + "\t.uleb128 0x1\n"
> + "\t.sleb128 -8\n"
> "\t.byte 30\n" /* Return address is in lr. */
> - "\t.byte 1\n" /* augmentation length */
> + "\t.uleb128 1\n" /* augmentation length */
> "\t.byte 0x1b\n" /* pcrel|sdata4 */
> - "\t.byte 0xc\n\t.byte 31\n\t.byte 0\n" /* def_cfa sp */
> + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
> "\t.align 3\n"
> "LECIEY:\n\n");
> fprintf(ctx->fp,
> - "_lj_vm_ffi_call.eh:\n"
> "LSFDEY:\n"
> "\t.set L$set$yy,LEFDEY-LASFDEY\n"
> "\t.long L$set$yy\n"
> @@ -4073,13 +4064,11 @@ static void emit_asm_debug(BuildCtx *ctx)
> "\t.long LASFDEY-EH_frame2\n"
> "\t.long _lj_vm_ffi_call-.\n"
> "\t.long %d\n"
> - "\t.byte 0\n" /* augmentation length */
> - "\t.byte 0xe\n\t.byte 32\n" /* def_cfa_offset */
> - "\t.byte 0x9e\n\t.byte 1\n" /* offset lr */
> - "\t.byte 0x9d\n\t.byte 2\n" /* offset fp */
> - "\t.byte 0x93\n\t.byte 3\n" /* offset x19 */
> - "\t.byte 0x94\n\t.byte 4\n" /* offset x20 */
> - "\t.byte 0xd\n\t.uleb128 0x1d\n" /* def_cfa_register fp */
> + "\t.uleb128 0\n" /* augmentation length */
> + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
> + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
> + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
> + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
> "\t.align 3\n"
> "LEFDEY:\n\n", fcsize);
> }
> diff --git a/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua
> new file mode 100644
> index 00000000..cdeea441
> --- /dev/null
> +++ b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua

<snipped>

> --
> 2.37.0 (Apple Git-136)
>

--
Best regards,
Sergey Kaplun
[1]: https://faultlore.com/blah/compact-unwinding/#unwinding-tables-dwarf-cfi
[2]: https://opensource.apple.com/source/libunwind/libunwind-35.3/include/mach-o/compact_unwind_encoding.h
--
Best regards,
Maxim Kokryashkin