[Tarantool-patches] [PATCH luajit v4 8/8] OSX/ARM64: Fix external unwinding.

Sergey Kaplun skaplun at tarantool.org
Thu Nov 24 16:10:42 MSK 2022


Hi, Maksim!

Thanks for the patch!

LGTM, but I have a bunch of questions to clarify it.

On 28.10.22, Maksim Kokryashkin wrote:
> Contributed by Edmund Kapusniak. For more info,
> see #698 and #757.
> 
> (cherry picked from commit c38747b626b978555324504ec29a110f6b04902f)
> 
> To allow compiler generate compact unwind info generation
> for Mach-O, fp must point to the saved fp, and the frame
> must be specified relative to fp+16.

Is there any link to documentation or source code to inspect this
behaviour?

>                                      ELF unwind info has
> been updated to also use fp+16 rather than sp+CFRAME_SIZE.

Also, I see that `.byte` notation is replaced with `.uleb128` or
`.sleb128`. What is the reason?
Same question for removing names of entries ("_lj_vm_ffi_call.eh:" for
example).

> 
> Offset to pointer to personality routine specified as @GOT-. rather
> than @GOTPCREL.

Does it mean that we use incorrect encoded offset (I see encoding for
offset is still the same) for our personality routine?
If so, maybe the other changes are just refactoring?

> 
> Re-enabled LUAJIT_UNWIND_EXTERNAL by default on OSX.
> 
> Maxim Kokryashkin:
> * added the description for the issue and the test
> 
> Resolves tarantool/tarantool#6096
> Part of tarantool/tarantool#7230
> ---
>  cmake/SetTargetFlags.cmake                    |  4 +-
>  src/Makefile.original                         |  5 +-
>  src/vm_arm64.dasc                             | 89 ++++++++-----------
>  ...-6096-external-unwinding-on-arm64.test.lua | 13 +++
>  4 files changed, 54 insertions(+), 57 deletions(-)
>  create mode 100644 test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua
> 
> diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
> index 8abb6288..7cf26be1 100644
> --- a/cmake/SetTargetFlags.cmake
> +++ b/cmake/SetTargetFlags.cmake

<snipped>

> diff --git a/src/Makefile.original b/src/Makefile.original
> index 2d014e43..813d9c12 100644
> --- a/src/Makefile.original
> +++ b/src/Makefile.original

<snipped>

> diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
> index ccfa72bd..f517a808 100644
> --- a/src/vm_arm64.dasc
> +++ b/src/vm_arm64.dasc

<snipped>

> @@ -504,8 +503,9 @@ static void build_subroutines(BuildCtx *ctx)
>    |    ldr GL, L->glref			// Setup pointer to global state.
>    |     mov BASE, CARG2
>    |   str CARG1, SAVE_PC		// Any value outside of bytecode is ok.
> -  |  str RC, SAVE_CFRAME
> -  |  str fp, L->cframe			// Add our C frame to cframe chain.
> +  |  add TMP0, sp, #0
> +  |   str RC, SAVE_CFRAME
> +  |  str TMP0, L->cframe		// Add our C frame to cframe chain.

Why can't we just use the following?
|  str RC, SAVE_CFRAME
|  str sp, L->cframe			// Add our C frame to cframe chain.

>    |
>    |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
>    |  str L, GL->cur_L
> @@ -540,8 +540,9 @@ static void build_subroutines(BuildCtx *ctx)
>    |   sub RA, RA, RB			// Compute -savestack(L, L->top).
>    |   str RAw, SAVE_NRES		// Neg. delta means cframe w/o frame.
>    |  str wzr, SAVE_ERRF			// No error function.
> -  |  str RC, SAVE_CFRAME
> -  |  str fp, L->cframe			// Add our C frame to cframe chain.
> +  |  add TMP0, sp, #0
> +  |   str RC, SAVE_CFRAME
> +  |  str TMP0, L->cframe		// Add our C frame to cframe chain.

Ditto.

>    |    str L, GL->cur_L
>    |  blr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
>    |  mov BASE, CRET1
> @@ -2129,14 +2130,14 @@ static void build_subroutines(BuildCtx *ctx)

<snipped>

> @@ -3879,7 +3880,7 @@ static void emit_asm_debug(BuildCtx *ctx)
>  	"\t.uleb128 0x1\n"
>  	"\t.sleb128 -8\n"
>  	"\t.byte 30\n"				/* Return address is in lr. */
> -	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
> +	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
>  	"\t.align 3\n"
>  	".LECIE0:\n\n");
>      fprintf(ctx->fp,
> @@ -3889,10 +3890,9 @@ static void emit_asm_debug(BuildCtx *ctx)
>  	"\t.long .Lframe0\n"
>  	"\t.quad .Lbegin\n"
>  	"\t.quad %d\n"
> -	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
>  	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
>  	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
> -	fcofs, CFRAME_SIZE);
> +	fcofs);
>      for (i = 19; i <= 28; i++)  /* offset x19-x28 */
>        fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
>      for (i = 8; i <= 15; i++)  /* offset d8-d15 */
> @@ -3909,12 +3909,10 @@ static void emit_asm_debug(BuildCtx *ctx)
>  	"\t.long .Lframe0\n"
>  	"\t.quad lj_vm_ffi_call\n"
>  	"\t.quad %d\n"
> -	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
>  	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
>  	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
>  	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
>  	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
> -	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
>  	"\t.align 3\n"
>  	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
>  #endif
> @@ -3933,7 +3931,7 @@ static void emit_asm_debug(BuildCtx *ctx)
>  	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
>  	"\t.long lj_err_unwind_dwarf-.\n"
>  	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> -	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
> +	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
>  	"\t.align 3\n"
>  	".LECIE1:\n\n");
>      fprintf(ctx->fp,
> @@ -3944,10 +3942,9 @@ static void emit_asm_debug(BuildCtx *ctx)
>  	"\t.long .Lbegin-.\n"
>  	"\t.long %d\n"
>  	"\t.uleb128 0\n"			/* augmentation length */
> -	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
>  	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
>  	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
> -	fcofs, CFRAME_SIZE);
> +	fcofs);
>      for (i = 19; i <= 28; i++)  /* offset x19-x28 */
>        fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
>      for (i = 8; i <= 15; i++)  /* offset d8-d15 */
> @@ -3969,7 +3966,7 @@ static void emit_asm_debug(BuildCtx *ctx)
>  	"\t.byte 30\n"				/* Return address is in lr. */
>  	"\t.uleb128 1\n"			/* augmentation length */
>  	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> -	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
> +	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
>  	"\t.align 3\n"
>  	".LECIE2:\n\n");
>      fprintf(ctx->fp,
> @@ -3980,18 +3977,15 @@ static void emit_asm_debug(BuildCtx *ctx)
>  	"\t.long lj_vm_ffi_call-.\n"
>  	"\t.long %d\n"
>  	"\t.uleb128 0\n"			/* augmentation length */
> -	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
>  	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
>  	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
>  	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
>  	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
> -	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
>  	"\t.align 3\n"
>  	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
>  #endif
>      break;
> -    /* Disabled until someone finds a fix. See #698. */
> -#if !LJ_NO_UNWIND && 0
> +#if !LJ_NO_UNWIND
>    case BUILD_machasm: {
>  #if LJ_HASFFI
>      int fcsize = 0;
> @@ -4006,14 +4000,14 @@ static void emit_asm_debug(BuildCtx *ctx)
>  	"\t.long 0\n"
>  	"\t.byte 0x1\n"
>  	"\t.ascii \"zPR\\0\"\n"
> -	"\t.byte 0x1\n"
> -	"\t.byte 128-8\n"
> +	"\t.uleb128 0x1\n"
> +	"\t.sleb128 -8\n"
>  	"\t.byte 30\n"				/* Return address is in lr. */
> -	"\t.byte 6\n"				/* augmentation length */
> +	"\t.uleb128 6\n"			/* augmentation length */
>  	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
> -	"\t.long _lj_err_unwind_dwarf at GOTPCREL\n"
> +	"\t.long _lj_err_unwind_dwarf at GOT-.\n"
>  	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> -	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
> +	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
>  	"\t.align 3\n"
>  	"LECIEX:\n\n");
>      for (j = 0; j < ctx->nsym; j++) {
> @@ -4024,7 +4018,6 @@ static void emit_asm_debug(BuildCtx *ctx)
>        if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
>  #endif
>        fprintf(ctx->fp,
> -	"%s.eh:\n"
>  	"LSFDE%d:\n"
>  	"\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
>  	"\t.long L$set$%d\n"
> @@ -4032,15 +4025,14 @@ static void emit_asm_debug(BuildCtx *ctx)
>  	"\t.long LASFDE%d-EH_frame1\n"
>  	"\t.long %s-.\n"
>  	"\t.long %d\n"
> -	"\t.byte 0\n"				/* augmentation length */
> -	"\t.byte 0xe\n\t.byte %d\n\t.byte 1\n"	/* def_cfa_offset */
> -	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
> -	"\t.byte 0x9d\n\t.byte 2\n",		/* offset fp */
> -	name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE);
> +	"\t.uleb128 0\n"			/* augmentation length */
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
> +	j, j, j, j, j, j, j, name, size);
>        for (i = 19; i <= 28; i++)  /* offset x19-x28 */
> -	fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19));
> +	fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
>        for (i = 8; i <= 15; i++)  /* offset d8-d15 */
> -	fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n",
> +	fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
>  		64+i, i+(3+(28-19+1)-8));
>        fprintf(ctx->fp,
>  	"\t.align 3\n"
> @@ -4056,16 +4048,15 @@ static void emit_asm_debug(BuildCtx *ctx)
>  	"\t.long 0\n"
>  	"\t.byte 0x1\n"
>  	"\t.ascii \"zR\\0\"\n"
> -	"\t.byte 0x1\n"
> -	"\t.byte 128-8\n"
> +	"\t.uleb128 0x1\n"
> +	"\t.sleb128 -8\n"
>  	"\t.byte 30\n"				/* Return address is in lr. */
> -	"\t.byte 1\n"				/* augmentation length */
> +	"\t.uleb128 1\n"			/* augmentation length */
>  	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> -	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
> +	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
>  	"\t.align 3\n"
>  	"LECIEY:\n\n");
>        fprintf(ctx->fp,
> -	"_lj_vm_ffi_call.eh:\n"
>  	"LSFDEY:\n"
>  	"\t.set L$set$yy,LEFDEY-LASFDEY\n"
>  	"\t.long L$set$yy\n"
> @@ -4073,13 +4064,11 @@ static void emit_asm_debug(BuildCtx *ctx)
>  	"\t.long LASFDEY-EH_frame2\n"
>  	"\t.long _lj_vm_ffi_call-.\n"
>  	"\t.long %d\n"
> -	"\t.byte 0\n"				/* augmentation length */
> -	"\t.byte 0xe\n\t.byte 32\n"		/* def_cfa_offset */
> -	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
> -	"\t.byte 0x9d\n\t.byte 2\n"		/* offset fp */
> -	"\t.byte 0x93\n\t.byte 3\n"		/* offset x19 */
> -	"\t.byte 0x94\n\t.byte 4\n"		/* offset x20 */
> -	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
> +	"\t.uleb128 0\n"			/* augmentation length */
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
> +	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
> +	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
>  	"\t.align 3\n"
>  	"LEFDEY:\n\n", fcsize);
>      }
> diff --git a/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua
> new file mode 100644
> index 00000000..cdeea441
> --- /dev/null
> +++ b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua

<snipped>

> -- 
> 2.37.0 (Apple Git-136)
> 

-- 
Best regards,
Sergey Kaplun


More information about the Tarantool-patches mailing list