[Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding.

Tarantool development patches archive
 help / color / mirror / Atom feed

From: Maksim Kokryashkin via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: tarantool-patches@dev.tarantool.org, sergos@tarantool.org,
	skaplun@tarantool.org
Subject: [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding.
Date: Mon, 26 Sep 2022 18:55:01 +0300	[thread overview]
Message-ID: <725566e75d613842c9f80021e500cdb86a84b3fa.1664207262.git.max.kokryashkin@gmail.com> (raw)
In-Reply-To: <cover.1664207262.git.max.kokryashkin@gmail.com>

From: Mike Pall <mike>

Reported by Yichun Zhang. Fixes LuaJIT/LuaJIT#722.
May help towards fixing LuaJIT/LuaJIT#698, too.

(cherry picked from commit 421c4c798791d27b7f967df39891c4e4fa1d107c)

The `_Unwind_Find_FDE` fails to find FDE (frame descriptor element)
for `lj_vm_ffi_call` in DWARF unwind info, despite the presence
of its data in `.debug_frame` section.

Reordering of interpreter stack frame fixes the issue.

For more info, see https://github.com/LuaJIT/LuaJIT/issues/722

Maxim Kokryashkin:
* added the description for the problem

Needed for tarantool/tarantool#6096
Needed for tarantool/tarantool#7230
---
 src/lj_frame.h    |  12 +--
 src/vm_arm64.dasc | 189 +++++++++++++++++++++++++++++++++++-----------
 2 files changed, 152 insertions(+), 49 deletions(-)

diff --git a/src/lj_frame.h b/src/lj_frame.h
index 9fd63fa2..1e4adaa3 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #endif
 #define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_ARM64
-#define CFRAME_OFS_ERRF		196
-#define CFRAME_OFS_NRES		200
-#define CFRAME_OFS_PREV		160
-#define CFRAME_OFS_L		176
-#define CFRAME_OFS_PC		168
-#define CFRAME_OFS_MULTRES	192
+#define CFRAME_OFS_ERRF		36
+#define CFRAME_OFS_NRES		40
+#define CFRAME_OFS_PREV		0
+#define CFRAME_OFS_L		16
+#define CFRAME_OFS_PC		8
+#define CFRAME_OFS_MULTRES	32
 #define CFRAME_SIZE		208
 #define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_PPC
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index 313cc94f..ad57bca3 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -81,47 +81,49 @@
 |
 |.define CFRAME_SPACE,	208
 |//----- 16 byte aligned, <-- sp entering interpreter
-|// Unused		[sp, #204]	// 32 bit values
-|.define SAVE_NRES,	[sp, #200]
-|.define SAVE_ERRF,	[sp, #196]
-|.define SAVE_MULTRES,	[sp, #192]
-|.define TMPD,		[sp, #184]	// 64 bit values
-|.define SAVE_L,	[sp, #176]
-|.define SAVE_PC,	[sp, #168]
-|.define SAVE_CFRAME,	[sp, #160]
-|.define SAVE_FPR_,	96		// 96+8*8: 64 bit FPR saves
-|.define SAVE_GPR_,	16		// 16+10*8: 64 bit GPR saves
-|.define SAVE_LR,	[sp, #8]
-|.define SAVE_FP,	[sp]
+|.define SAVE_LR,	[sp, #200]
+|.define SAVE_FP,	[sp, #192]
+|.define SAVE_GPR_,	112		// 112+10*8: 64 bit GPR saves
+|.define SAVE_FPR_,	48		// 48+8*8: 64 bit FPR saves
+|// Unused		[sp, #44]	// 32 bit values
+|.define SAVE_NRES,	[sp, #40]
+|.define SAVE_ERRF,	[sp, #36]
+|.define SAVE_MULTRES,	[sp, #32]
+|.define TMPD,		[sp, #24]	// 64 bit values
+|.define SAVE_L,	[sp, #16]
+|.define SAVE_PC,	[sp, #8]
+|.define SAVE_CFRAME,	[sp, #0]
 |//----- 16 byte aligned, <-- sp while in interpreter.
 |
-|.define TMPDofs,	#184
+|.define TMPDofs,	#24
 |
 |.macro save_, gpr1, gpr2, fpr1, fpr2
-|  stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
-|  stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|  stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
+|  stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
 |.endmacro
 |.macro rest_, gpr1, gpr2, fpr1, fpr2
-|  ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
-|  ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|  ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
+|  ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
 |.endmacro
 |
 |.macro saveregs
-|  stp fp, lr, [sp, #-CFRAME_SPACE]!
+|  sub sp, sp, # CFRAME_SPACE
+|  stp fp, lr, SAVE_FP
 |  add fp, sp, #0
-|  stp x19, x20, [sp, # SAVE_GPR_]
+|  stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
 |  save_ 21, 22, 8, 9
 |  save_ 23, 24, 10, 11
 |  save_ 25, 26, 12, 13
 |  save_ 27, 28, 14, 15
 |.endmacro
 |.macro restoreregs
-|  ldp x19, x20, [sp, # SAVE_GPR_]
+|  ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
 |  rest_ 21, 22, 8, 9
 |  rest_ 23, 24, 10, 11
 |  rest_ 25, 26, 12, 13
 |  rest_ 27, 28, 14, 15
-|  ldp fp, lr, [sp], # CFRAME_SPACE
+|  ldp fp, lr, SAVE_FP
+|  add sp, sp, # CFRAME_SPACE
 |.endmacro
 |
 |// Type definitions. Some of these are only used for documentation.
@@ -2125,9 +2127,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  // Caveat: needs special frame unwinding, see below.
   |.if FFI
   |  .type CCSTATE, CCallState, x19
-  |  stp fp, lr, [sp, #-32]!
+  |  stp x20, CCSTATE, [sp, #-32]!
+  |  stp fp, lr, [sp, #16]
   |  add fp, sp, #0
-  |  str CCSTATE, [sp, #16]
   |  mov CCSTATE, x0
   |  ldr TMP0w, CCSTATE:x0->spadj
   |   ldrb TMP1w, CCSTATE->nsp
@@ -2156,8 +2158,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  stp x0, x1, CCSTATE->gpr[0]
   |   stp d0, d1, CCSTATE->fpr[0]
   |   stp d2, d3, CCSTATE->fpr[2]
-  |  ldr CCSTATE, [sp, #16]
-  |  ldp fp, lr, [sp], #32
+  |  ldp fp, lr, [sp, #16]
+  |  ldp x20, CCSTATE, [sp], #32
   |  ret
   |.endif
   |// Note: vm_ffi_call must be the last function in this object file!
@@ -2887,7 +2889,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
   case BC_GGET:
     |  // RA = dst, RC = str_const (~)
   case BC_GSET:
-    |  // RA = dst, RC = str_const (~)
+    |  // RA = src, RC = str_const (~)
     |  ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
     |   mvn RC, RC
     |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
@@ -3863,7 +3865,7 @@ static int build_backend(BuildCtx *ctx)
 static void emit_asm_debug(BuildCtx *ctx)
 {
   int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
-  int i, cf = CFRAME_SIZE >> 3;
+  int i;
   switch (ctx->mode) {
   case BUILD_elfasm:
     fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
@@ -3888,14 +3890,14 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.quad .Lbegin\n"
 	"\t.quad %d\n"
 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
-	fcofs, CFRAME_SIZE, cf, cf-1);
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
+	fcofs, CFRAME_SIZE);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
-      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
-	      64+i, cf-i-4);
+	      64+i, i+(3+(28-19+1)-8));
     fprintf(ctx->fp,
 	"\t.align 3\n"
 	".LEFDE0:\n\n");
@@ -3908,9 +3910,11 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.quad lj_vm_ffi_call\n"
 	"\t.quad %d\n"
 	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
-	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
+	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
 	"\t.align 3\n"
 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
 #endif
@@ -3941,14 +3945,14 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
-	fcofs, CFRAME_SIZE, cf, cf-1);
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
+	fcofs, CFRAME_SIZE);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
-      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
-	      64+i, cf-i-4);
+	      64+i, i+(3+(28-19+1)-8));
     fprintf(ctx->fp,
 	"\t.align 3\n"
 	".LEFDE2:\n\n");
@@ -3977,13 +3981,112 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
 	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
-	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
+	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
 	"\t.align 3\n"
 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
 #endif
     break;
+#if !LJ_NO_UNWIND
+  case BUILD_machasm: {
+#if LJ_HASFFI
+    int fcsize = 0;
+#endif
+    int j;
+    fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
+    fprintf(ctx->fp,
+	"EH_frame1:\n"
+	"\t.set L$set$x,LECIEX-LSCIEX\n"
+	"\t.long L$set$x\n"
+	"LSCIEX:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.ascii \"zPR\\0\"\n"
+	"\t.byte 0x1\n"
+	"\t.byte 128-8\n"
+	"\t.byte 30\n"				/* Return address is in lr. */
+	"\t.byte 6\n"				/* augmentation length */
+	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
+	"\t.long _lj_err_unwind_dwarf@GOTPCREL\n"
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
+	"\t.align 3\n"
+	"LECIEX:\n\n");
+    for (j = 0; j < ctx->nsym; j++) {
+      const char *name = ctx->sym[j].name;
+      int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs;
+      if (size == 0) continue;
+#if LJ_HASFFI
+      if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
+#endif
+      fprintf(ctx->fp,
+	"%s.eh:\n"
+	"LSFDE%d:\n"
+	"\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
+	"\t.long L$set$%d\n"
+	"LASFDE%d:\n"
+	"\t.long LASFDE%d-EH_frame1\n"
+	"\t.long %s-.\n"
+	"\t.long %d\n"
+	"\t.byte 0\n"				/* augmentation length */
+	"\t.byte 0xe\n\t.byte %d\n\t.byte 1\n"	/* def_cfa_offset */
+	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.byte 2\n",		/* offset fp */
+	name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE);
+      for (i = 19; i <= 28; i++)  /* offset x19-x28 */
+	fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19));
+      for (i = 8; i <= 15; i++)  /* offset d8-d15 */
+	fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n",
+		64+i, i+(3+(28-19+1)-8));
+      fprintf(ctx->fp,
+	"\t.align 3\n"
+	"LEFDE%d:\n\n", j);
+    }
+#if LJ_HASFFI
+    if (fcsize) {
+      fprintf(ctx->fp,
+	"EH_frame2:\n"
+	"\t.set L$set$y,LECIEY-LSCIEY\n"
+	"\t.long L$set$y\n"
+	"LSCIEY:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.ascii \"zR\\0\"\n"
+	"\t.byte 0x1\n"
+	"\t.byte 128-8\n"
+	"\t.byte 30\n"				/* Return address is in lr. */
+	"\t.byte 1\n"				/* augmentation length */
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
+	"\t.align 3\n"
+	"LECIEY:\n\n");
+      fprintf(ctx->fp,
+	"_lj_vm_ffi_call.eh:\n"
+	"LSFDEY:\n"
+	"\t.set L$set$yy,LEFDEY-LASFDEY\n"
+	"\t.long L$set$yy\n"
+	"LASFDEY:\n"
+	"\t.long LASFDEY-EH_frame2\n"
+	"\t.long _lj_vm_ffi_call-.\n"
+	"\t.long %d\n"
+	"\t.byte 0\n"				/* augmentation length */
+	"\t.byte 0xe\n\t.byte 32\n"		/* def_cfa_offset */
+	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.byte 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.byte 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.byte 4\n"		/* offset x20 */
+	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
+	"\t.align 3\n"
+	"LEFDEY:\n\n", fcsize);
+    }
+#endif
+    fprintf(ctx->fp, ".subsections_via_symbols\n");
+    }
+    break;
+#endif
   default:
     break;
   }
-- 
2.32.1 (Apple Git-133)

next prev parent reply	other threads:[~2022-09-26 15:57 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-26 15:54 [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
2022-09-28  8:20   ` sergos via Tarantool-patches
2022-10-03  7:36   ` Sergey Kaplun via Tarantool-patches
2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches
2022-10-03 10:54   ` Sergey Kaplun via Tarantool-patches
2022-10-03 15:58   ` sergos via Tarantool-patches
2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches
2022-10-03 11:08   ` Sergey Kaplun via Tarantool-patches
2022-10-04  8:26   ` sergos via Tarantool-patches
2022-09-26 15:55 ` Maksim Kokryashkin via Tarantool-patches [this message]
2022-10-04 15:48   ` [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding sergos via Tarantool-patches
2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches
2022-10-04 15:52   ` sergos via Tarantool-patches
2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches
2022-10-04 16:05   ` sergos via Tarantool-patches
2022-10-06  9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 4/6] ARM64: Reorder interpreter stack frame and fix unwinding Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=725566e75d613842c9f80021e500cdb86a84b3fa.1664207262.git.max.kokryashkin@gmail.com \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=max.kokryashkin@gmail.com \
    --cc=sergos@tarantool.org \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox