[Tarantool-patches] [PATCH luajit v4 4/8] ARM64: Reorder interpreter stack frame and fix unwinding.

sergos sergos at tarantool.org
Tue Dec 6 00:42:16 MSK 2022


Hi!

Thanks for the patch!

LGTM with fixes after SergeyK review.

Sergos


> On 28 Oct 2022, at 12:26, Maksim Kokryashkin <max.kokryashkin at gmail.com> wrote:
> 
> From: Mike Pall <mike>
> 
> Reported by Yichun Zhang. Fixes #722.
> May help towards fixing #698, too.
> 
> (cherry picked from commit 421c4c798791d27b7f967df39891c4e4fa1d107c)
> 
> The `_Unwind_Find_FDE` fails to find the FDE (frame descriptor
> element) for `lj_vm_ffi_call` in DWARF unwind info, despite
> the presence of its data in the `.debug_frame` section.
> 
> LuaJIT emits its own DWARF entries for the CFI (call frame
> information, section 6.4.1 in DWARF standard)[1].The FP
> register value is vital to perform unwinding, and it is
> possible to restore that register using the Canonical
> Frame Address, or CFA. It can be obtained as `CFA - offset`.
> By default, the CFA register is SP, however, it can be
> changed to any other.
> 
> According to ARM's calling convention, the first eight
> arguments of a function must be passed in x0-x7 registers,
> and all the remaining must be passed on the stack. The
> latter fact is important because it affects the SP and,
> because of that, the CFA invalidates. This patch changes
> the CFA register to the FP for the lj_vm_ffi_call, which
> fixes the issue.
> 
> All the other changes are made just for refactoring purposes.
> 
> [1]: https://dwarfstd.org/doc/DWARF5.pdf
> 
> Maxim Kokryashkin:
> * added the description and the test case for the problem
> 
> Needed for tarantool/tarantool#6096
> Part of tarantool/tarantool#7230
> ---
> src/lj_frame.h                                |  12 +-
> src/vm_arm64.dasc                             | 189 ++++++++++++++----
> .../lj-698-arm-pcall-panic.test.lua           |  18 ++
> 3 files changed, 170 insertions(+), 49 deletions(-)
> create mode 100644 test/tarantool-tests/lj-698-arm-pcall-panic.test.lua
> 
> diff --git a/src/lj_frame.h b/src/lj_frame.h
> index 9fd63fa2..1e4adaa3 100644
> --- a/src/lj_frame.h
> +++ b/src/lj_frame.h
> @@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
> #endif
> #define CFRAME_SHIFT_MULTRES	3
> #elif LJ_TARGET_ARM64
> -#define CFRAME_OFS_ERRF		196
> -#define CFRAME_OFS_NRES		200
> -#define CFRAME_OFS_PREV		160
> -#define CFRAME_OFS_L		176
> -#define CFRAME_OFS_PC		168
> -#define CFRAME_OFS_MULTRES	192
> +#define CFRAME_OFS_ERRF		36
> +#define CFRAME_OFS_NRES		40
> +#define CFRAME_OFS_PREV		0
> +#define CFRAME_OFS_L		16
> +#define CFRAME_OFS_PC		8
> +#define CFRAME_OFS_MULTRES	32
> #define CFRAME_SIZE		208
> #define CFRAME_SHIFT_MULTRES	3
> #elif LJ_TARGET_PPC
> diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
> index 313cc94f..ad57bca3 100644
> --- a/src/vm_arm64.dasc
> +++ b/src/vm_arm64.dasc
> @@ -81,47 +81,49 @@
> |
> |.define CFRAME_SPACE,	208
> |//----- 16 byte aligned, <-- sp entering interpreter
> -|// Unused		[sp, #204]	// 32 bit values
> -|.define SAVE_NRES,	[sp, #200]
> -|.define SAVE_ERRF,	[sp, #196]
> -|.define SAVE_MULTRES,	[sp, #192]
> -|.define TMPD,		[sp, #184]	// 64 bit values
> -|.define SAVE_L,	[sp, #176]
> -|.define SAVE_PC,	[sp, #168]
> -|.define SAVE_CFRAME,	[sp, #160]
> -|.define SAVE_FPR_,	96		// 96+8*8: 64 bit FPR saves
> -|.define SAVE_GPR_,	16		// 16+10*8: 64 bit GPR saves
> -|.define SAVE_LR,	[sp, #8]
> -|.define SAVE_FP,	[sp]
> +|.define SAVE_LR,	[sp, #200]
> +|.define SAVE_FP,	[sp, #192]
> +|.define SAVE_GPR_,	112		// 112+10*8: 64 bit GPR saves
> +|.define SAVE_FPR_,	48		// 48+8*8: 64 bit FPR saves
> +|// Unused		[sp, #44]	// 32 bit values
> +|.define SAVE_NRES,	[sp, #40]
> +|.define SAVE_ERRF,	[sp, #36]
> +|.define SAVE_MULTRES,	[sp, #32]
> +|.define TMPD,		[sp, #24]	// 64 bit values
> +|.define SAVE_L,	[sp, #16]
> +|.define SAVE_PC,	[sp, #8]
> +|.define SAVE_CFRAME,	[sp, #0]
> |//----- 16 byte aligned, <-- sp while in interpreter.
> |
> -|.define TMPDofs,	#184
> +|.define TMPDofs,	#24
> |
> |.macro save_, gpr1, gpr2, fpr1, fpr2
> -|  stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
> -|  stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
> +|  stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
> +|  stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
> |.endmacro
> |.macro rest_, gpr1, gpr2, fpr1, fpr2
> -|  ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
> -|  ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
> +|  ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
> +|  ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
> |.endmacro
> |
> |.macro saveregs
> -|  stp fp, lr, [sp, #-CFRAME_SPACE]!
> +|  sub sp, sp, # CFRAME_SPACE
> +|  stp fp, lr, SAVE_FP
> |  add fp, sp, #0
> -|  stp x19, x20, [sp, # SAVE_GPR_]
> +|  stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
> |  save_ 21, 22, 8, 9
> |  save_ 23, 24, 10, 11
> |  save_ 25, 26, 12, 13
> |  save_ 27, 28, 14, 15
> |.endmacro
> |.macro restoreregs
> -|  ldp x19, x20, [sp, # SAVE_GPR_]
> +|  ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
> |  rest_ 21, 22, 8, 9
> |  rest_ 23, 24, 10, 11
> |  rest_ 25, 26, 12, 13
> |  rest_ 27, 28, 14, 15
> -|  ldp fp, lr, [sp], # CFRAME_SPACE
> +|  ldp fp, lr, SAVE_FP
> +|  add sp, sp, # CFRAME_SPACE
> |.endmacro
> |
> |// Type definitions. Some of these are only used for documentation.
> @@ -2125,9 +2127,9 @@ static void build_subroutines(BuildCtx *ctx)
>   |  // Caveat: needs special frame unwinding, see below.
>   |.if FFI
>   |  .type CCSTATE, CCallState, x19
> -  |  stp fp, lr, [sp, #-32]!
> +  |  stp x20, CCSTATE, [sp, #-32]!
> +  |  stp fp, lr, [sp, #16]
>   |  add fp, sp, #0
> -  |  str CCSTATE, [sp, #16]
>   |  mov CCSTATE, x0
>   |  ldr TMP0w, CCSTATE:x0->spadj
>   |   ldrb TMP1w, CCSTATE->nsp
> @@ -2156,8 +2158,8 @@ static void build_subroutines(BuildCtx *ctx)
>   |  stp x0, x1, CCSTATE->gpr[0]
>   |   stp d0, d1, CCSTATE->fpr[0]
>   |   stp d2, d3, CCSTATE->fpr[2]
> -  |  ldr CCSTATE, [sp, #16]
> -  |  ldp fp, lr, [sp], #32
> +  |  ldp fp, lr, [sp, #16]
> +  |  ldp x20, CCSTATE, [sp], #32
>   |  ret
>   |.endif
>   |// Note: vm_ffi_call must be the last function in this object file!
> @@ -2887,7 +2889,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>   case BC_GGET:
>     |  // RA = dst, RC = str_const (~)
>   case BC_GSET:
> -    |  // RA = dst, RC = str_const (~)
> +    |  // RA = src, RC = str_const (~)
>     |  ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
>     |   mvn RC, RC
>     |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
> @@ -3863,7 +3865,7 @@ static int build_backend(BuildCtx *ctx)
> static void emit_asm_debug(BuildCtx *ctx)
> {
>   int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
> -  int i, cf = CFRAME_SIZE >> 3;
> +  int i;
>   switch (ctx->mode) {
>   case BUILD_elfasm:
>     fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
> @@ -3888,14 +3890,14 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.quad .Lbegin\n"
> 	"\t.quad %d\n"
> 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
> -	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
> -	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
> -	fcofs, CFRAME_SIZE, cf, cf-1);
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
> +	fcofs, CFRAME_SIZE);
>     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
> -      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
> +      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
>     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
>       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
> -	      64+i, cf-i-4);
> +	      64+i, i+(3+(28-19+1)-8));
>     fprintf(ctx->fp,
> 	"\t.align 3\n"
> 	".LEFDE0:\n\n");
> @@ -3908,9 +3910,11 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.quad lj_vm_ffi_call\n"
> 	"\t.quad %d\n"
> 	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
> -	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
> -	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
> -	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
> +	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
> +	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
> +	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
> 	"\t.align 3\n"
> 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
> #endif
> @@ -3941,14 +3945,14 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long %d\n"
> 	"\t.uleb128 0\n"			/* augmentation length */
> 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
> -	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
> -	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
> -	fcofs, CFRAME_SIZE, cf, cf-1);
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
> +	fcofs, CFRAME_SIZE);
>     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
> -      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
> +      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
>     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
>       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
> -	      64+i, cf-i-4);
> +	      64+i, i+(3+(28-19+1)-8));
>     fprintf(ctx->fp,
> 	"\t.align 3\n"
> 	".LEFDE2:\n\n");
> @@ -3977,13 +3981,112 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long %d\n"
> 	"\t.uleb128 0\n"			/* augmentation length */
> 	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
> -	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
> -	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
> -	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
> +	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
> +	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
> +	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
> 	"\t.align 3\n"
> 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
> #endif
>     break;
> +#if !LJ_NO_UNWIND
> +  case BUILD_machasm: {
> +#if LJ_HASFFI
> +    int fcsize = 0;
> +#endif
> +    int j;
> +    fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
> +    fprintf(ctx->fp,
> +	"EH_frame1:\n"
> +	"\t.set L$set$x,LECIEX-LSCIEX\n"
> +	"\t.long L$set$x\n"
> +	"LSCIEX:\n"
> +	"\t.long 0\n"
> +	"\t.byte 0x1\n"
> +	"\t.ascii \"zPR\\0\"\n"
> +	"\t.byte 0x1\n"
> +	"\t.byte 128-8\n"
> +	"\t.byte 30\n"				/* Return address is in lr. */
> +	"\t.byte 6\n"				/* augmentation length */
> +	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
> +	"\t.long _lj_err_unwind_dwarf at GOTPCREL\n"
> +	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> +	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
> +	"\t.align 3\n"
> +	"LECIEX:\n\n");
> +    for (j = 0; j < ctx->nsym; j++) {
> +      const char *name = ctx->sym[j].name;
> +      int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs;
> +      if (size == 0) continue;
> +#if LJ_HASFFI
> +      if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
> +#endif
> +      fprintf(ctx->fp,
> +	"%s.eh:\n"
> +	"LSFDE%d:\n"
> +	"\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
> +	"\t.long L$set$%d\n"
> +	"LASFDE%d:\n"
> +	"\t.long LASFDE%d-EH_frame1\n"
> +	"\t.long %s-.\n"
> +	"\t.long %d\n"
> +	"\t.byte 0\n"				/* augmentation length */
> +	"\t.byte 0xe\n\t.byte %d\n\t.byte 1\n"	/* def_cfa_offset */
> +	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.byte 2\n",		/* offset fp */
> +	name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE);
> +      for (i = 19; i <= 28; i++)  /* offset x19-x28 */
> +	fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19));
> +      for (i = 8; i <= 15; i++)  /* offset d8-d15 */
> +	fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n",
> +		64+i, i+(3+(28-19+1)-8));
> +      fprintf(ctx->fp,
> +	"\t.align 3\n"
> +	"LEFDE%d:\n\n", j);
> +    }
> +#if LJ_HASFFI
> +    if (fcsize) {
> +      fprintf(ctx->fp,
> +	"EH_frame2:\n"
> +	"\t.set L$set$y,LECIEY-LSCIEY\n"
> +	"\t.long L$set$y\n"
> +	"LSCIEY:\n"
> +	"\t.long 0\n"
> +	"\t.byte 0x1\n"
> +	"\t.ascii \"zR\\0\"\n"
> +	"\t.byte 0x1\n"
> +	"\t.byte 128-8\n"
> +	"\t.byte 30\n"				/* Return address is in lr. */
> +	"\t.byte 1\n"				/* augmentation length */
> +	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> +	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
> +	"\t.align 3\n"
> +	"LECIEY:\n\n");
> +      fprintf(ctx->fp,
> +	"_lj_vm_ffi_call.eh:\n"
> +	"LSFDEY:\n"
> +	"\t.set L$set$yy,LEFDEY-LASFDEY\n"
> +	"\t.long L$set$yy\n"
> +	"LASFDEY:\n"
> +	"\t.long LASFDEY-EH_frame2\n"
> +	"\t.long _lj_vm_ffi_call-.\n"
> +	"\t.long %d\n"
> +	"\t.byte 0\n"				/* augmentation length */
> +	"\t.byte 0xe\n\t.byte 32\n"		/* def_cfa_offset */
> +	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.byte 2\n"		/* offset fp */
> +	"\t.byte 0x93\n\t.byte 3\n"		/* offset x19 */
> +	"\t.byte 0x94\n\t.byte 4\n"		/* offset x20 */
> +	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
> +	"\t.align 3\n"
> +	"LEFDEY:\n\n", fcsize);
> +    }
> +#endif
> +    fprintf(ctx->fp, ".subsections_via_symbols\n");
> +    }
> +    break;
> +#endif
>   default:
>     break;
>   }
> diff --git a/test/tarantool-tests/lj-698-arm-pcall-panic.test.lua b/test/tarantool-tests/lj-698-arm-pcall-panic.test.lua
> new file mode 100644
> index 00000000..88476d3e
> --- /dev/null
> +++ b/test/tarantool-tests/lj-698-arm-pcall-panic.test.lua
> @@ -0,0 +1,18 @@
> +local tap = require('tap')
> +
> +-- See also https://github.com/LuaJIT/LuaJIT/issues/698.
> +local test = tap.test('lj-418-arm-pcall-panic')
> +test:plan(1)
> +
> +local ffi = require('ffi')
> +-- The test case below was taken from the LuaJIT-tests
> +-- suite (lib/ffi/ffi_callback.lua), and should be removed
> +-- after the integration of the mentioned suite.
> +local runner = ffi.cast("int (*)(int, int, int, int, int, int, int, int, int)",
> +                        function() error("test") end
> +                      )
> +
> +local st = pcall(runner, 1, 1, 1, 1, 1, 1, 1, 1, 1)
> +test:ok(not st, 'error handling completed correctly')
> +
> +os.exit(test:check() and 0 or 1)
> -- 
> 2.37.0 (Apple Git-136)
> 



More information about the Tarantool-patches mailing list