Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1
@ 2022-09-26 15:54 Maksim Kokryashkin via Tarantool-patches
  2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
                   ` (6 more replies)
  0 siblings, 7 replies; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-09-26 15:54 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun; +Cc: Maksim Kokryashkin

This patch enables external unwinding on all platforms,
that produce unwind tables by default.

Follow-up patches fix external unwinding behavior on
arm64 MacOS machines.

PR: https://github.com/tarantool/tarantool/pull/7719
Branch: https://github.com/tarantool/luajit/tree/fckxorg/gh-6096-external-unwinding
Original issue: https://github.com/LuaJIT/LuaJIT/issues/698
Tarantool issue: https://github.com/tarantool/tarantool/issues/6096


Maksim Kokryashkin (3):
  Cleanup and enable external unwinding for more platforms.
  OSX: Fix build by hardcoding external frame unwinding.
  OSX/ARM64: Fix external unwinding.

Mike Pall (3):
  OSX/ARM64: Disable external unwinding for now.
  ARM64: Reorder interpreter stack frame and fix unwinding.
  OSX/ARM64: Disable unwind info.

 .github/workflows/macos-x86_64.yml            |  20 +-
 CMakeLists.txt                                |   3 +
 cmake/SetTargetFlags.cmake                    |  26 +-
 doc/extensions.html                           |  22 +-
 src/Makefile.original                         |  12 +-
 src/lj_arch.h                                 |  27 +-
 src/lj_err.c                                  | 321 +++++++++---------
 src/lj_frame.h                                |  12 +-
 src/vm_arm64.dasc                             | 209 ++++++++----
 ...-6096-external-unwinding-on-arm64.test.lua |  13 +
 10 files changed, 389 insertions(+), 276 deletions(-)
 create mode 100644 test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua

--
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms.
  2022-09-26 15:54 [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
@ 2022-09-26 15:54 ` Maksim Kokryashkin via Tarantool-patches
  2022-09-28  8:20   ` sergos via Tarantool-patches
  2022-10-03  7:36   ` Sergey Kaplun via Tarantool-patches
  2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches
                   ` (5 subsequent siblings)
  6 siblings, 2 replies; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-09-26 15:54 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun; +Cc: Maksim Kokryashkin

(cherry picked from commit e131936133c58de4426c595db2341caf5a1665b5)

This commit enables external unwiding on all platforms, that
create unwind tables by default. Corresponding check is added
to the build routine.

Also, GC64 mode is now enabled on MacOS by default.

Maxim Kokryashkin:
* added the description for the patch

Needed for tarantool/tarantool#6096
Needed for tarantool/tarantool#7230
---
 CMakeLists.txt             |   3 +
 cmake/SetTargetFlags.cmake |  22 ++-
 doc/extensions.html        |  22 +--
 src/Makefile.original      |  11 +-
 src/lj_arch.h              |  27 ++--
 src/lj_err.c               | 321 +++++++++++++++++++------------------
 6 files changed, 213 insertions(+), 193 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8b49f9d7..c4f3ef62 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,6 +176,9 @@ endif()
 
 # Enable GC64 mode for x64.
 option(LUAJIT_ENABLE_GC64 "GC64 mode for x64" OFF)
+if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+  set(LUAJIT_ENABLE_GC64 ON)
+endif()
 if(LUAJIT_ENABLE_GC64)
   AppendFlags(TARGET_C_FLAGS -DLUAJIT_ENABLE_GC64)
 endif()
diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
index b544d2ac..36896aff 100644
--- a/cmake/SetTargetFlags.cmake
+++ b/cmake/SetTargetFlags.cmake
@@ -15,6 +15,22 @@ endif()
 LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
 LuaJITArch(LUAJIT_ARCH "${TESTARCH}")
 
+string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
+if(UNWIND_POS EQUAL -1)
+  execute_process(
+    COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"
+    WORKING_DIRECTORY ${LUAJIT_SOURCE_DIR}
+    OUTPUT_VARIABLE TESTUNWIND
+    RESULT_VARIABLE TESTUNWIND_RC
+  )
+  if(TESTUNWIND_RC EQUAL 0)
+    string(FIND "${TESTUNWIND}" "E" UNW_TEST_POS)
+    if(NOT UNW_TEST_POS EQUAL -1)
+      AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+    endif()
+  endif()
+endif()
+
 # Target-specific compiler options.
 #
 # x86/x64 only: For GCC 4.2 or higher and if you don't intend to
@@ -25,12 +41,6 @@ if(LUAJIT_ARCH STREQUAL "x86")
 endif()
 
 if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
-  if(LUAJIT_ARCH STREQUAL "x64")
-    # XXX: Set -pagezero_size to hint <mmap> when allocating 32
-    # bit memory on OSX/x64, otherwise the lower 4GB are blocked.
-    AppendFlags(TARGET_BIN_FLAGS -pagezero_size 10000 -image_base 100000000)
-    AppendFlags(TARGET_SHARED_FLAGS -image_base 7fff04c4a000)
-  endif()
   AppendFlags(TARGET_SHARED_FLAGS -single_module -undefined dynamic_lookup)
 else() # Linux and FreeBSD.
   AppendFlags(TARGET_BIN_FLAGS -Wl,-E)
diff --git a/doc/extensions.html b/doc/extensions.html
index e0f136e2..777e9928 100644
--- a/doc/extensions.html
+++ b/doc/extensions.html
@@ -395,29 +395,19 @@ the toolchain used to compile LuaJIT:
 <td class="excinterop">Interoperability</td>
 </tr>
 <tr class="odd separate">
-<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
-<td class="exccompiler">GCC 4.3+, Clang</td>
+<td class="excplatform">External frame unwinding</td>
+<td class="exccompiler">GCC, Clang, MSVC</td>
 <td class="excinterop"><b style="color: #00a000;">Full</b></td>
 </tr>
 <tr class="even">
-<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
-<td class="exccompiler">GCC, Clang</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
-</tr>
-<tr class="odd">
-<td class="excplatform">Other platforms, DWARF2 unwinding</td>
+<td class="excplatform">Internal frame unwinding + DWARF2</td>
 <td class="exccompiler">GCC, Clang</td>
 <td class="excinterop"><b style="color: #c06000;">Limited</b></td>
 </tr>
-<tr class="even">
-<td class="excplatform">Windows/x64</td>
-<td class="exccompiler">MSVC or WinSDK</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
-</tr>
 <tr class="odd">
-<td class="excplatform">Windows/x86</td>
-<td class="exccompiler">Any</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
+<td class="excplatform">Windows 64 bit</td>
+<td class="exccompiler">non-MSVC</td>
+<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
 </tr>
 <tr class="even">
 <td class="excplatform">Other platforms</td>
diff --git a/src/Makefile.original b/src/Makefile.original
index dd1c6a76..c9609700 100644
--- a/src/Makefile.original
+++ b/src/Makefile.original
@@ -320,6 +320,13 @@ else
 ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
   TARGET_XCFLAGS+= -fno-stack-protector
 endif
+ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
+  # Find out whether the target toolchain always generates unwind tables.
+  TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o)
+  ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
+    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+  endif
+endif
 ifeq (Darwin,$(TARGET_SYS))
   ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
     export MACOSX_DEPLOYMENT_TARGET=10.4
@@ -328,10 +335,6 @@ ifeq (Darwin,$(TARGET_SYS))
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
-  ifeq (x64,$(TARGET_LJARCH))
-    TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000
-    TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000
-  endif
 else
 ifeq (iOS,$(TARGET_SYS))
   TARGET_STRIP+= -x
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 40129d9e..730be5bf 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -152,11 +152,6 @@
 #define LJ_ARCH_NAME		"x86"
 #define LJ_ARCH_BITS		32
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
-#define LJ_ABI_WIN		1
-#else
-#define LJ_ABI_WIN		0
-#endif
 #define LJ_TARGET_X86		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
@@ -170,11 +165,6 @@
 #define LJ_ARCH_NAME		"x64"
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
-#define LJ_ABI_WIN		1
-#else
-#define LJ_ABI_WIN		0
-#endif
 #define LJ_TARGET_X64		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
@@ -185,6 +175,8 @@
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
 #ifdef LUAJIT_ENABLE_GC64
 #define LJ_TARGET_GC64		1
+#elif LJ_TARGET_OSX
+#error "macOS requires GC64 -- don't disable it"
 #endif
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
@@ -566,15 +558,22 @@
 #define LJ_NO_SYSTEM		1
 #endif
 
-#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
-/* NYI: no support for compact unwind specification, yet. */
-#define LUAJIT_NO_UNWIND	1
+#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
+#define LJ_ABI_WIN		1
+#else
+#define LJ_ABI_WIN		0
 #endif
 
-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
+#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
 #define LJ_NO_UNWIND		1
 #endif
 
+#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__))))
+#define LJ_UNWIND_EXT		1
+#else
+#define LJ_UNWIND_EXT		0
+#endif
+
 /* Compatibility with Lua 5.1 vs. 5.2. */
 #ifdef LUAJIT_ENABLE_LUA52COMPAT
 #define LJ_52			1
diff --git a/src/lj_err.c b/src/lj_err.c
index c310daf6..298e5434 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -29,12 +29,18 @@
 ** Pros and Cons:
 **
 ** - EXT requires unwind tables for *all* functions on the C stack between
-**   the pcall/catch and the error/throw. This is the default on x64,
-**   but needs to be manually enabled on x86/PPC for non-C++ code.
+**   the pcall/catch and the error/throw. C modules used by Lua code can
+**   throw errors, so these need to have unwind tables, too. Transitively
+**   this applies to all system libraries used by C modules -- at least
+**   when they have callbacks which may throw an error.
 **
-** - INT is faster when actually throwing errors (but this happens rarely).
+** - INT is faster when actually throwing errors, but this happens rarely.
 **   Setting up error handlers is zero-cost in any case.
 **
+** - INT needs to save *all* callee-saved registers when entering the
+**   interpreter. EXT only needs to save those actually used inside the
+**   interpreter. JIT-compiled code may need to save some more.
+**
 ** - EXT provides full interoperability with C++ exceptions. You can throw
 **   Lua errors or C++ exceptions through a mix of Lua frames and C++ frames.
 **   C++ destructors are called as needed. C++ exceptions caught by pcall
@@ -46,27 +52,33 @@
 **   the wrapper function feature. Lua errors thrown through C++ frames
 **   cannot be caught by C++ code and C++ destructors are not run.
 **
-** EXT is the default on x64 systems and on Windows, INT is the default on all
-** other systems.
+** EXT is the default on all systems where the toolchain produces unwind
+** tables by default (*). This is hard-coded and/or detected in src/Makefile.
+** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL
+**
+** INT is the default on all other systems.
+**
+** EXT can be manually enabled for toolchains that are able to produce
+** conforming unwind tables:
+**   "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL"
+** As explained above, *all* C code used directly or indirectly by LuaJIT
+** must be compiled with -funwind-tables (or -fexceptions). C++ code must
+** *not* be compiled with -fno-exceptions.
+**
+** If you're unsure whether error handling inside the VM works correctly,
+** try running this and check whether it prints "OK":
 **
-** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack
-** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
-** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set
-** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules
-** and all C libraries that have callbacks which may be used to call back
-** into Lua. C++ code must *not* be compiled with -fno-exceptions.
+**   luajit -e "print(select(2, load('OK')):match('OK'))"
 **
-** EXT is mandatory on WIN64 since the calling convention has an abundance
-** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
-** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
+** (*) Originally, toolchains only generated unwind tables for C++ code. For
+** interoperability reasons, this can be manually enabled for plain C code,
+** too (with -funwind-tables). With the introduction of the x64 architecture,
+** the corresponding POSIX and Windows ABIs mandated unwind tables for all
+** code. Over the following years most desktop and server platforms have
+** enabled unwind tables by default on all architectures. OTOH mobile and
+** embedded platforms do not consistently mandate unwind tables.
 */
 
-#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
-#define LJ_UNWIND_EXT	1
-#elif LJ_TARGET_WINDOWS
-#define LJ_UNWIND_EXT	1
-#endif
-
 /* -- Error messages ------------------------------------------------------ */
 
 /* Error message strings. */
@@ -183,7 +195,125 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
 
 /* -- External frame unwinding -------------------------------------------- */
 
-#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN
+#if LJ_ABI_WIN
+
+/*
+** Someone in Redmond owes me several days of my life. A lot of this is
+** undocumented or just plain wrong on MSDN. Some of it can be gathered
+** from 3rd party docs or must be found by trial-and-error. They really
+** don't want you to write your own language-specific exception handler
+** or to interact gracefully with MSVC. :-(
+**
+** Apparently MSVC doesn't call C++ destructors for foreign exceptions
+** unless you compile your C++ code with /EHa. Unfortunately this means
+** catch (...) also catches things like access violations. The use of
+** _set_se_translator doesn't really help, because it requires /EHa, too.
+*/
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#if LJ_TARGET_X86
+typedef void *UndocumentedDispatcherContext;  /* Unused on x86. */
+#else
+/* Taken from: http://www.nynaeve.net/?p=99 */
+typedef struct UndocumentedDispatcherContext {
+  ULONG64 ControlPc;
+  ULONG64 ImageBase;
+  PRUNTIME_FUNCTION FunctionEntry;
+  ULONG64 EstablisherFrame;
+  ULONG64 TargetIp;
+  PCONTEXT ContextRecord;
+  void (*LanguageHandler)(void);
+  PVOID HandlerData;
+  PUNWIND_HISTORY_TABLE HistoryTable;
+  ULONG ScopeIndex;
+  ULONG Fill0;
+} UndocumentedDispatcherContext;
+#endif
+
+/* Another wild guess. */
+extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
+
+#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
+/* Workaround for broken MinGW64 declaration. */
+VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
+#define RtlUnwindEx RtlUnwindEx_FIXED
+#endif
+
+#define LJ_MSVC_EXCODE		((DWORD)0xe06d7363)
+#define LJ_GCC_EXCODE		((DWORD)0x20474343)
+
+#define LJ_EXCODE		((DWORD)0xe24c4a00)
+#define LJ_EXCODE_MAKE(c)	(LJ_EXCODE | (DWORD)(c))
+#define LJ_EXCODE_CHECK(cl)	(((cl) ^ LJ_EXCODE) <= 0xff)
+#define LJ_EXCODE_ERRCODE(cl)	((int)((cl) & 0xff))
+
+/* Windows exception handler for interpreter frame. */
+LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
+  void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
+{
+#if LJ_TARGET_X86
+  void *cf = (char *)f - CFRAME_OFS_SEH;
+#else
+  void *cf = f;
+#endif
+  lua_State *L = cframe_L(cf);
+  int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
+		LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
+  if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
+    /* Unwind internal frames. */
+    err_unwind(L, cf, errcode);
+  } else {
+    void *cf2 = err_unwind(L, cf, 0);
+    if (cf2) {  /* We catch it, so start unwinding the upper frames. */
+      if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
+	  rec->ExceptionCode == LJ_GCC_EXCODE) {
+#if !LJ_TARGET_CYGWIN
+	__DestructExceptionObject(rec, 1);
+#endif
+	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+      } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
+	/* Don't catch access violations etc. */
+	return 1;  /* ExceptionContinueSearch */
+      }
+#if LJ_TARGET_X86
+      UNUSED(ctx);
+      UNUSED(dispatch);
+      /* Call all handlers for all lower C frames (including ourselves) again
+      ** with EH_UNWINDING set. Then call the specified function, passing cf
+      ** and errcode.
+      */
+      lj_vm_rtlunwind(cf, (void *)rec,
+	(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+	(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
+      /* lj_vm_rtlunwind does not return. */
+#else
+      /* Unwind the stack and call all handlers for all lower C frames
+      ** (including ourselves) again with EH_UNWINDING set. Then set
+      ** stack pointer = cf, result = errcode and jump to the specified target.
+      */
+      RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+			       lj_vm_unwind_ff_eh :
+			       lj_vm_unwind_c_eh),
+		  rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
+      /* RtlUnwindEx should never return. */
+#endif
+    }
+  }
+  return 1;  /* ExceptionContinueSearch */
+}
+
+/* Raise Windows exception. */
+static void err_raise_ext(global_State *g, int errcode)
+{
+#if LJ_HASJIT
+  setmref(g->jit_base, NULL);
+#endif
+  RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
+}
+
+#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__))
 
 /*
 ** We have to use our own definitions instead of the mandatory (!) unwind.h,
@@ -232,7 +362,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
   lua_State *L;
   if (version != 1)
     return _URC_FATAL_PHASE1_ERROR;
-  UNUSED(uexclass);
   cf = (void *)_Unwind_GetCFA(ctx);
   L = cframe_L(cf);
   if ((actions & _UA_SEARCH_PHASE)) {
@@ -280,6 +409,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
     ** it on non-x64 because the interpreter restores all callee-saved regs.
     */
     lj_err_throw(L, errcode);
+#if LJ_TARGET_X64
+#error "Broken build system -- only use the provided Makefiles!"
+#endif
 #endif
   }
   return _URC_CONTINUE_UNWIND;
@@ -292,14 +424,6 @@ static _Unwind_Exception static_uex;
 #else
 static __thread _Unwind_Exception static_uex;
 #endif
-
-/* Raise DWARF2 exception. */
-static void err_raise_ext(int errcode)
-{
-  static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
-  static_uex.excleanup = NULL;
-  _Unwind_RaiseException(&static_uex);
-}
 #endif
 
 #else /* LJ_TARGET_ARM */
@@ -373,132 +497,22 @@ LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb,
 
 #if LJ_UNWIND_EXT
 static __thread _Unwind_Control_Block static_uex;
+#endif
+#endif /* LJ_TARGET_ARM */
 
-static void err_raise_ext(int errcode)
+#if LJ_UNWIND_EXT
+/* Raise external exception. */
+static void err_raise_ext(global_State *g, int errcode)
 {
+#if LJ_HASJIT
+  setmref(g->jit_base, NULL);
+#endif
   memset(&static_uex, 0, sizeof(static_uex));
   static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
   _Unwind_RaiseException(&static_uex);
 }
 #endif
 
-#endif /* LJ_TARGET_ARM */
-
-#elif LJ_ABI_WIN
-
-/*
-** Someone in Redmond owes me several days of my life. A lot of this is
-** undocumented or just plain wrong on MSDN. Some of it can be gathered
-** from 3rd party docs or must be found by trial-and-error. They really
-** don't want you to write your own language-specific exception handler
-** or to interact gracefully with MSVC. :-(
-**
-** Apparently MSVC doesn't call C++ destructors for foreign exceptions
-** unless you compile your C++ code with /EHa. Unfortunately this means
-** catch (...) also catches things like access violations. The use of
-** _set_se_translator doesn't really help, because it requires /EHa, too.
-*/
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-#if LJ_TARGET_X64
-/* Taken from: http://www.nynaeve.net/?p=99 */
-typedef struct UndocumentedDispatcherContext {
-  ULONG64 ControlPc;
-  ULONG64 ImageBase;
-  PRUNTIME_FUNCTION FunctionEntry;
-  ULONG64 EstablisherFrame;
-  ULONG64 TargetIp;
-  PCONTEXT ContextRecord;
-  void (*LanguageHandler)(void);
-  PVOID HandlerData;
-  PUNWIND_HISTORY_TABLE HistoryTable;
-  ULONG ScopeIndex;
-  ULONG Fill0;
-} UndocumentedDispatcherContext;
-#else
-typedef void *UndocumentedDispatcherContext;
-#endif
-
-/* Another wild guess. */
-extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
-
-#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
-/* Workaround for broken MinGW64 declaration. */
-VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
-#define RtlUnwindEx RtlUnwindEx_FIXED
-#endif
-
-#define LJ_MSVC_EXCODE		((DWORD)0xe06d7363)
-#define LJ_GCC_EXCODE		((DWORD)0x20474343)
-
-#define LJ_EXCODE		((DWORD)0xe24c4a00)
-#define LJ_EXCODE_MAKE(c)	(LJ_EXCODE | (DWORD)(c))
-#define LJ_EXCODE_CHECK(cl)	(((cl) ^ LJ_EXCODE) <= 0xff)
-#define LJ_EXCODE_ERRCODE(cl)	((int)((cl) & 0xff))
-
-/* Windows exception handler for interpreter frame. */
-LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
-  void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
-{
-#if LJ_TARGET_X64
-  void *cf = f;
-#else
-  void *cf = (char *)f - CFRAME_OFS_SEH;
-#endif
-  lua_State *L = cframe_L(cf);
-  int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
-		LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
-  if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
-    /* Unwind internal frames. */
-    err_unwind(L, cf, errcode);
-  } else {
-    void *cf2 = err_unwind(L, cf, 0);
-    if (cf2) {  /* We catch it, so start unwinding the upper frames. */
-      if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
-	  rec->ExceptionCode == LJ_GCC_EXCODE) {
-#if LJ_TARGET_WINDOWS
-	__DestructExceptionObject(rec, 1);
-#endif
-	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
-      } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
-	/* Don't catch access violations etc. */
-	return 1;  /* ExceptionContinueSearch */
-      }
-#if LJ_TARGET_X64
-      /* Unwind the stack and call all handlers for all lower C frames
-      ** (including ourselves) again with EH_UNWINDING set. Then set
-      ** rsp = cf, rax = errcode and jump to the specified target.
-      */
-      RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
-			       lj_vm_unwind_ff_eh :
-			       lj_vm_unwind_c_eh),
-		  rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
-      /* RtlUnwindEx should never return. */
-#else
-      UNUSED(ctx);
-      UNUSED(dispatch);
-      /* Call all handlers for all lower C frames (including ourselves) again
-      ** with EH_UNWINDING set. Then call the specified function, passing cf
-      ** and errcode.
-      */
-      lj_vm_rtlunwind(cf, (void *)rec,
-	(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
-	(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
-      /* lj_vm_rtlunwind does not return. */
-#endif
-    }
-  }
-  return 1;  /* ExceptionContinueSearch */
-}
-
-/* Raise Windows exception. */
-static void err_raise_ext(int errcode)
-{
-  RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
-}
-
 #endif
 
 /* -- Error handling ------------------------------------------------------ */
@@ -508,22 +522,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
 {
   global_State *g = G(L);
   lj_trace_abort(g);
-  setmref(g->jit_base, NULL);
   L->status = LUA_OK;
 #if LJ_UNWIND_EXT
-  err_raise_ext(errcode);
+  err_raise_ext(g, errcode);
   /*
   ** A return from this function signals a corrupt C stack that cannot be
   ** unwound. We have no choice but to call the panic function and exit.
   **
   ** Usually this is caused by a C function without unwind information.
-  ** This should never happen on x64, but may happen if you've manually
-  ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every*
-  ** non-C++ file with -funwind-tables.
+  ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL
+  ** and forgot to recompile *every* non-C++ file with -funwind-tables.
   */
   if (G(L)->panic)
     G(L)->panic(L);
 #else
+#if LJ_HASJIT
+  setmref(g->jit_base, NULL);
+#endif
   {
     void *cf = err_unwind(L, NULL, errcode);
     if (cframe_unwind_ff(cf))
-- 
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit 2/6] OSX: Fix build by hardcoding external frame unwinding.
  2022-09-26 15:54 [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
  2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
@ 2022-09-26 15:54 ` Maksim Kokryashkin via Tarantool-patches
  2022-10-03 10:54   ` Sergey Kaplun via Tarantool-patches
  2022-10-03 15:58   ` sergos via Tarantool-patches
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-09-26 15:54 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun; +Cc: Maksim Kokryashkin

Apparently they can't even get 'grep' right, let alone a keyboard.

(cherry picked from commit d4a554d6ee1507f7313641b26ed09bf1b518fa1f)

MacOS uses BSD grep, which is slightly different from GNU grep.
Because of that, the shell script determining whether external
unwinding is possible doesn't work right.

External unwinding is possible on MacOS, so this patch enables
it by default.

Maxim Kokryashkin:
* added the description for the problem

Needed for tarantool/tarantool#6096
Needed for tarantool/tarantool#7230
---
 .github/workflows/macos-x86_64.yml | 20 +-------------------
 cmake/SetTargetFlags.cmake         | 28 ++++++++++++++++------------
 src/Makefile.original              | 15 ++++++++-------
 3 files changed, 25 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/macos-x86_64.yml b/.github/workflows/macos-x86_64.yml
index 840806e3..dafd1796 100644
--- a/.github/workflows/macos-x86_64.yml
+++ b/.github/workflows/macos-x86_64.yml
@@ -35,7 +35,7 @@ jobs:
       fail-fast: false
       matrix:
         BUILDTYPE: [Debug, Release]
-        GC64: [ON, OFF]
+        GC64: ON
         include:
           - BUILDTYPE: Debug
             CMAKEFLAGS: -DCMAKE_BUILD_TYPE=Debug -DLUA_USE_ASSERT=ON -DLUA_USE_APICHECK=ON
@@ -69,15 +69,6 @@ jobs:
       - name: test
         run: cmake --build . --parallel --target test
 
-  test-tarantool-debug-wo-GC64:
-    name: Tarantool Debug GC64:OFF
-    needs: test-luajit
-    uses: tarantool/tarantool/.github/workflows/luajit-integration.yml@master
-    with:
-      GC64: OFF
-      buildtype: Debug
-      host: macos-11
-      revision: ${{ github.sha }}
   test-tarantool-debug-w-GC64:
     name: Tarantool Debug GC64:ON
     needs: test-luajit
@@ -87,15 +78,6 @@ jobs:
       buildtype: Debug
       host: macos-11
       revision: ${{ github.sha }}
-  test-tarantool-release-wo-GC64:
-    name: Tarantool Release GC64:OFF
-    needs: test-luajit
-    uses: tarantool/tarantool/.github/workflows/luajit-integration.yml@master
-    with:
-      GC64: OFF
-      buildtype: RelWithDebInfo
-      host: macos-11
-      revision: ${{ github.sha }}
   test-tarantool-release-w-GC64:
     name: Tarantool Release GC64:ON
     needs: test-luajit
diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
index 36896aff..a5a3407f 100644
--- a/cmake/SetTargetFlags.cmake
+++ b/cmake/SetTargetFlags.cmake
@@ -15,18 +15,22 @@ endif()
 LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
 LuaJITArch(LUAJIT_ARCH "${TESTARCH}")
 
-string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
-if(UNWIND_POS EQUAL -1)
-  execute_process(
-    COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"
-    WORKING_DIRECTORY ${LUAJIT_SOURCE_DIR}
-    OUTPUT_VARIABLE TESTUNWIND
-    RESULT_VARIABLE TESTUNWIND_RC
-  )
-  if(TESTUNWIND_RC EQUAL 0)
-    string(FIND "${TESTUNWIND}" "E" UNW_TEST_POS)
-    if(NOT UNW_TEST_POS EQUAL -1)
-      AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+  AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+else()
+  string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
+  if(UNWIND_POS EQUAL -1)
+    execute_process(
+      COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"
+      WORKING_DIRECTORY ${LUAJIT_SOURCE_DIR}
+      OUTPUT_VARIABLE TESTUNWIND
+      RESULT_VARIABLE TESTUNWIND_RC
+    )
+    if(TESTUNWIND_RC EQUAL 0)
+      string(FIND "${TESTUNWIND}" "E" UNW_TEST_POS)
+      if(NOT UNW_TEST_POS EQUAL -1)
+        AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+      endif()
     endif()
   endif()
 endif()
diff --git a/src/Makefile.original b/src/Makefile.original
index c9609700..d1373b40 100644
--- a/src/Makefile.original
+++ b/src/Makefile.original
@@ -320,18 +320,12 @@ else
 ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
   TARGET_XCFLAGS+= -fno-stack-protector
 endif
-ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
-  # Find out whether the target toolchain always generates unwind tables.
-  TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o)
-  ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
-    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
-  endif
-endif
 ifeq (Darwin,$(TARGET_SYS))
   ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
     export MACOSX_DEPLOYMENT_TARGET=10.4
   endif
   TARGET_STRIP+= -x
+  TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
@@ -345,6 +339,13 @@ ifeq (iOS,$(TARGET_SYS))
     TARGET_XCFLAGS+= -fno-omit-frame-pointer
   endif
 else
+  ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
+    # Find out whether the target toolchain always generates unwind tables.
+    TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o)
+    ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
+      TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+    endif
+  endif
   ifneq (SunOS,$(TARGET_SYS))
     ifneq (PS3,$(TARGET_SYS))
       TARGET_XLDFLAGS+= -Wl,-E
-- 
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit 3/6] OSX/ARM64: Disable external unwinding for now.
  2022-09-26 15:54 [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
  2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
  2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches
@ 2022-09-26 15:55 ` Maksim Kokryashkin via Tarantool-patches
  2022-10-03 11:08   ` Sergey Kaplun via Tarantool-patches
  2022-10-04  8:26   ` sergos via Tarantool-patches
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding Maksim Kokryashkin via Tarantool-patches
                   ` (3 subsequent siblings)
  6 siblings, 2 replies; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-09-26 15:55 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun

From: Mike Pall <mike>

This reduces functionality, e.g. no handling of on-trace errors.
Someone with very deep knowledge about macOS and MACH-O/DWARF stack
unwinding internals is needed to fix this. See issue LuaJIT/LuaJIT#698.

(cherry picked from commit 27ee3bcd79b12a0c71f00427ee1a2e486c684486)

ARM64 interpreter doesn't have unwind info for Mach-O assembler
format, which makes unwinding impossible.

For more info, see https://github.com/LuaJIT/LuaJIT/issues/698

Maxim Kokryashkin:
* added the description for the problem

Needed for tarantool/tarantool#6096
Needed for tarantool/tarantool#7230
---
 cmake/SetTargetFlags.cmake | 4 +++-
 src/Makefile.original      | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
index a5a3407f..d99e1f9a 100644
--- a/cmake/SetTargetFlags.cmake
+++ b/cmake/SetTargetFlags.cmake
@@ -16,7 +16,9 @@ LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
 LuaJITArch(LUAJIT_ARCH "${TESTARCH}")
 
 if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
-  AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+  if(NOT LUAJIT_ARCH STREQUAL "arm64")
+    AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+  endif()
 else()
   string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
   if(UNWIND_POS EQUAL -1)
diff --git a/src/Makefile.original b/src/Makefile.original
index d1373b40..5826a56a 100644
--- a/src/Makefile.original
+++ b/src/Makefile.original
@@ -325,7 +325,10 @@ ifeq (Darwin,$(TARGET_SYS))
     export MACOSX_DEPLOYMENT_TARGET=10.4
   endif
   TARGET_STRIP+= -x
-  TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+  # Ext. unwinding is broken on OSX/ARM64 until someone finds a fix. See #698.
+  ifneq (arm64,$(TARGET_LJARCH))
+    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+  endif
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
-- 
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding.
  2022-09-26 15:54 [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
                   ` (2 preceding siblings ...)
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches
@ 2022-09-26 15:55 ` Maksim Kokryashkin via Tarantool-patches
  2022-10-04 15:48   ` sergos via Tarantool-patches
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-09-26 15:55 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun

From: Mike Pall <mike>

Reported by Yichun Zhang. Fixes LuaJIT/LuaJIT#722.
May help towards fixing LuaJIT/LuaJIT#698, too.

(cherry picked from commit 421c4c798791d27b7f967df39891c4e4fa1d107c)

The `_Unwind_Find_FDE` fails to find FDE (frame descriptor element)
for `lj_vm_ffi_call` in DWARF unwind info, despite the presence
of its data in `.debug_frame` section.

Reordering of interpreter stack frame fixes the issue.

For more info, see https://github.com/LuaJIT/LuaJIT/issues/722

Maxim Kokryashkin:
* added the description for the problem

Needed for tarantool/tarantool#6096
Needed for tarantool/tarantool#7230
---
 src/lj_frame.h    |  12 +--
 src/vm_arm64.dasc | 189 +++++++++++++++++++++++++++++++++++-----------
 2 files changed, 152 insertions(+), 49 deletions(-)

diff --git a/src/lj_frame.h b/src/lj_frame.h
index 9fd63fa2..1e4adaa3 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #endif
 #define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_ARM64
-#define CFRAME_OFS_ERRF		196
-#define CFRAME_OFS_NRES		200
-#define CFRAME_OFS_PREV		160
-#define CFRAME_OFS_L		176
-#define CFRAME_OFS_PC		168
-#define CFRAME_OFS_MULTRES	192
+#define CFRAME_OFS_ERRF		36
+#define CFRAME_OFS_NRES		40
+#define CFRAME_OFS_PREV		0
+#define CFRAME_OFS_L		16
+#define CFRAME_OFS_PC		8
+#define CFRAME_OFS_MULTRES	32
 #define CFRAME_SIZE		208
 #define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_PPC
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index 313cc94f..ad57bca3 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -81,47 +81,49 @@
 |
 |.define CFRAME_SPACE,	208
 |//----- 16 byte aligned, <-- sp entering interpreter
-|// Unused		[sp, #204]	// 32 bit values
-|.define SAVE_NRES,	[sp, #200]
-|.define SAVE_ERRF,	[sp, #196]
-|.define SAVE_MULTRES,	[sp, #192]
-|.define TMPD,		[sp, #184]	// 64 bit values
-|.define SAVE_L,	[sp, #176]
-|.define SAVE_PC,	[sp, #168]
-|.define SAVE_CFRAME,	[sp, #160]
-|.define SAVE_FPR_,	96		// 96+8*8: 64 bit FPR saves
-|.define SAVE_GPR_,	16		// 16+10*8: 64 bit GPR saves
-|.define SAVE_LR,	[sp, #8]
-|.define SAVE_FP,	[sp]
+|.define SAVE_LR,	[sp, #200]
+|.define SAVE_FP,	[sp, #192]
+|.define SAVE_GPR_,	112		// 112+10*8: 64 bit GPR saves
+|.define SAVE_FPR_,	48		// 48+8*8: 64 bit FPR saves
+|// Unused		[sp, #44]	// 32 bit values
+|.define SAVE_NRES,	[sp, #40]
+|.define SAVE_ERRF,	[sp, #36]
+|.define SAVE_MULTRES,	[sp, #32]
+|.define TMPD,		[sp, #24]	// 64 bit values
+|.define SAVE_L,	[sp, #16]
+|.define SAVE_PC,	[sp, #8]
+|.define SAVE_CFRAME,	[sp, #0]
 |//----- 16 byte aligned, <-- sp while in interpreter.
 |
-|.define TMPDofs,	#184
+|.define TMPDofs,	#24
 |
 |.macro save_, gpr1, gpr2, fpr1, fpr2
-|  stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
-|  stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|  stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
+|  stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
 |.endmacro
 |.macro rest_, gpr1, gpr2, fpr1, fpr2
-|  ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
-|  ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|  ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
+|  ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
 |.endmacro
 |
 |.macro saveregs
-|  stp fp, lr, [sp, #-CFRAME_SPACE]!
+|  sub sp, sp, # CFRAME_SPACE
+|  stp fp, lr, SAVE_FP
 |  add fp, sp, #0
-|  stp x19, x20, [sp, # SAVE_GPR_]
+|  stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
 |  save_ 21, 22, 8, 9
 |  save_ 23, 24, 10, 11
 |  save_ 25, 26, 12, 13
 |  save_ 27, 28, 14, 15
 |.endmacro
 |.macro restoreregs
-|  ldp x19, x20, [sp, # SAVE_GPR_]
+|  ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
 |  rest_ 21, 22, 8, 9
 |  rest_ 23, 24, 10, 11
 |  rest_ 25, 26, 12, 13
 |  rest_ 27, 28, 14, 15
-|  ldp fp, lr, [sp], # CFRAME_SPACE
+|  ldp fp, lr, SAVE_FP
+|  add sp, sp, # CFRAME_SPACE
 |.endmacro
 |
 |// Type definitions. Some of these are only used for documentation.
@@ -2125,9 +2127,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  // Caveat: needs special frame unwinding, see below.
   |.if FFI
   |  .type CCSTATE, CCallState, x19
-  |  stp fp, lr, [sp, #-32]!
+  |  stp x20, CCSTATE, [sp, #-32]!
+  |  stp fp, lr, [sp, #16]
   |  add fp, sp, #0
-  |  str CCSTATE, [sp, #16]
   |  mov CCSTATE, x0
   |  ldr TMP0w, CCSTATE:x0->spadj
   |   ldrb TMP1w, CCSTATE->nsp
@@ -2156,8 +2158,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  stp x0, x1, CCSTATE->gpr[0]
   |   stp d0, d1, CCSTATE->fpr[0]
   |   stp d2, d3, CCSTATE->fpr[2]
-  |  ldr CCSTATE, [sp, #16]
-  |  ldp fp, lr, [sp], #32
+  |  ldp fp, lr, [sp, #16]
+  |  ldp x20, CCSTATE, [sp], #32
   |  ret
   |.endif
   |// Note: vm_ffi_call must be the last function in this object file!
@@ -2887,7 +2889,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
   case BC_GGET:
     |  // RA = dst, RC = str_const (~)
   case BC_GSET:
-    |  // RA = dst, RC = str_const (~)
+    |  // RA = src, RC = str_const (~)
     |  ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
     |   mvn RC, RC
     |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
@@ -3863,7 +3865,7 @@ static int build_backend(BuildCtx *ctx)
 static void emit_asm_debug(BuildCtx *ctx)
 {
   int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
-  int i, cf = CFRAME_SIZE >> 3;
+  int i;
   switch (ctx->mode) {
   case BUILD_elfasm:
     fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
@@ -3888,14 +3890,14 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.quad .Lbegin\n"
 	"\t.quad %d\n"
 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
-	fcofs, CFRAME_SIZE, cf, cf-1);
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
+	fcofs, CFRAME_SIZE);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
-      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
-	      64+i, cf-i-4);
+	      64+i, i+(3+(28-19+1)-8));
     fprintf(ctx->fp,
 	"\t.align 3\n"
 	".LEFDE0:\n\n");
@@ -3908,9 +3910,11 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.quad lj_vm_ffi_call\n"
 	"\t.quad %d\n"
 	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
-	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
+	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
 	"\t.align 3\n"
 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
 #endif
@@ -3941,14 +3945,14 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
-	fcofs, CFRAME_SIZE, cf, cf-1);
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
+	fcofs, CFRAME_SIZE);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
-      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
-	      64+i, cf-i-4);
+	      64+i, i+(3+(28-19+1)-8));
     fprintf(ctx->fp,
 	"\t.align 3\n"
 	".LEFDE2:\n\n");
@@ -3977,13 +3981,112 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
 	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
-	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
+	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
 	"\t.align 3\n"
 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
 #endif
     break;
+#if !LJ_NO_UNWIND
+  case BUILD_machasm: {
+#if LJ_HASFFI
+    int fcsize = 0;
+#endif
+    int j;
+    fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
+    fprintf(ctx->fp,
+	"EH_frame1:\n"
+	"\t.set L$set$x,LECIEX-LSCIEX\n"
+	"\t.long L$set$x\n"
+	"LSCIEX:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.ascii \"zPR\\0\"\n"
+	"\t.byte 0x1\n"
+	"\t.byte 128-8\n"
+	"\t.byte 30\n"				/* Return address is in lr. */
+	"\t.byte 6\n"				/* augmentation length */
+	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
+	"\t.long _lj_err_unwind_dwarf@GOTPCREL\n"
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
+	"\t.align 3\n"
+	"LECIEX:\n\n");
+    for (j = 0; j < ctx->nsym; j++) {
+      const char *name = ctx->sym[j].name;
+      int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs;
+      if (size == 0) continue;
+#if LJ_HASFFI
+      if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
+#endif
+      fprintf(ctx->fp,
+	"%s.eh:\n"
+	"LSFDE%d:\n"
+	"\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
+	"\t.long L$set$%d\n"
+	"LASFDE%d:\n"
+	"\t.long LASFDE%d-EH_frame1\n"
+	"\t.long %s-.\n"
+	"\t.long %d\n"
+	"\t.byte 0\n"				/* augmentation length */
+	"\t.byte 0xe\n\t.byte %d\n\t.byte 1\n"	/* def_cfa_offset */
+	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.byte 2\n",		/* offset fp */
+	name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE);
+      for (i = 19; i <= 28; i++)  /* offset x19-x28 */
+	fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19));
+      for (i = 8; i <= 15; i++)  /* offset d8-d15 */
+	fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n",
+		64+i, i+(3+(28-19+1)-8));
+      fprintf(ctx->fp,
+	"\t.align 3\n"
+	"LEFDE%d:\n\n", j);
+    }
+#if LJ_HASFFI
+    if (fcsize) {
+      fprintf(ctx->fp,
+	"EH_frame2:\n"
+	"\t.set L$set$y,LECIEY-LSCIEY\n"
+	"\t.long L$set$y\n"
+	"LSCIEY:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.ascii \"zR\\0\"\n"
+	"\t.byte 0x1\n"
+	"\t.byte 128-8\n"
+	"\t.byte 30\n"				/* Return address is in lr. */
+	"\t.byte 1\n"				/* augmentation length */
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
+	"\t.align 3\n"
+	"LECIEY:\n\n");
+      fprintf(ctx->fp,
+	"_lj_vm_ffi_call.eh:\n"
+	"LSFDEY:\n"
+	"\t.set L$set$yy,LEFDEY-LASFDEY\n"
+	"\t.long L$set$yy\n"
+	"LASFDEY:\n"
+	"\t.long LASFDEY-EH_frame2\n"
+	"\t.long _lj_vm_ffi_call-.\n"
+	"\t.long %d\n"
+	"\t.byte 0\n"				/* augmentation length */
+	"\t.byte 0xe\n\t.byte 32\n"		/* def_cfa_offset */
+	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.byte 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.byte 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.byte 4\n"		/* offset x20 */
+	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
+	"\t.align 3\n"
+	"LEFDEY:\n\n", fcsize);
+    }
+#endif
+    fprintf(ctx->fp, ".subsections_via_symbols\n");
+    }
+    break;
+#endif
   default:
     break;
   }
-- 
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit 5/6] OSX/ARM64: Disable unwind info.
  2022-09-26 15:54 [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
                   ` (3 preceding siblings ...)
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding Maksim Kokryashkin via Tarantool-patches
@ 2022-09-26 15:55 ` Maksim Kokryashkin via Tarantool-patches
  2022-10-04 15:52   ` sergos via Tarantool-patches
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches
  2022-10-06  9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
  6 siblings, 1 reply; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-09-26 15:55 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun

From: Mike Pall <mike>

See LuaJIT/LuaJIT#698.

(cherry picked from commit 78350a2565e1cf1102bcd25be406f02953d4dd3b)

External unwinding support is already disabled for OSX on ARM64
platfrom so there is no point in generation of incorrect unwind
info for it. This patch disables that generation.

Maxim Kokryashkin:
* added the description for the problem

Needed for tarantool/tarantool#6096
Needed for tarantool/tarantool#7230
---
 src/vm_arm64.dasc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index ad57bca3..ccfa72bd 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -3990,7 +3990,8 @@ static void emit_asm_debug(BuildCtx *ctx)
 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
 #endif
     break;
-#if !LJ_NO_UNWIND
+    /* Disabled until someone finds a fix. See #698. */
+#if !LJ_NO_UNWIND && 0
   case BUILD_machasm: {
 #if LJ_HASFFI
     int fcsize = 0;
-- 
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit 6/6] OSX/ARM64: Fix external unwinding.
  2022-09-26 15:54 [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
                   ` (4 preceding siblings ...)
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches
@ 2022-09-26 15:55 ` Maksim Kokryashkin via Tarantool-patches
  2022-10-04 16:05   ` sergos via Tarantool-patches
  2022-10-06  9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
  6 siblings, 1 reply; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-09-26 15:55 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun; +Cc: Maksim Kokryashkin

Contributed by Edmund Kapusniak. For more info,
see LuaJIT/LuaJIT#698 and LuaJIT/LuaJIT#757.

(cherry picked from commit c38747b626b978555324504ec29a110f6b04902f)

To allow compiler generate compact unwind info generation
for Mach-O, fp must point to the saved fp, and the frame
must be specified relative to fp+16. ELF unwind info has
been updated to also use fp+16 rather than sp+CFRAME_SIZE.

Offset to pointer to personality routine specified as @GOT-. rather
than @GOTPCREL.

Re-enabled LUAJIT_UNWIND_EXTERNAL by default on OSX.

Maxim Kokryashkin:
* added the description for the issue and the test

Resolves tarantool/tarantool#6096
Needed for tarantool/tarantool#7230
---
 cmake/SetTargetFlags.cmake                    |  4 +-
 src/Makefile.original                         |  5 +-
 src/vm_arm64.dasc                             | 89 ++++++++-----------
 ...-6096-external-unwinding-on-arm64.test.lua | 13 +++
 4 files changed, 54 insertions(+), 57 deletions(-)
 create mode 100644 test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua

diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
index d99e1f9a..a5a3407f 100644
--- a/cmake/SetTargetFlags.cmake
+++ b/cmake/SetTargetFlags.cmake
@@ -16,9 +16,7 @@ LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
 LuaJITArch(LUAJIT_ARCH "${TESTARCH}")
 
 if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
-  if(NOT LUAJIT_ARCH STREQUAL "arm64")
-    AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
-  endif()
+  AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
 else()
   string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
   if(UNWIND_POS EQUAL -1)
diff --git a/src/Makefile.original b/src/Makefile.original
index 5826a56a..d1373b40 100644
--- a/src/Makefile.original
+++ b/src/Makefile.original
@@ -325,10 +325,7 @@ ifeq (Darwin,$(TARGET_SYS))
     export MACOSX_DEPLOYMENT_TARGET=10.4
   endif
   TARGET_STRIP+= -x
-  # Ext. unwinding is broken on OSX/ARM64 until someone finds a fix. See #698.
-  ifneq (arm64,$(TARGET_LJARCH))
-    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
-  endif
+  TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index ccfa72bd..f517a808 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -81,8 +81,7 @@
 |
 |.define CFRAME_SPACE,	208
 |//----- 16 byte aligned, <-- sp entering interpreter
-|.define SAVE_LR,	[sp, #200]
-|.define SAVE_FP,	[sp, #192]
+|.define SAVE_FP_LR_,	192
 |.define SAVE_GPR_,	112		// 112+10*8: 64 bit GPR saves
 |.define SAVE_FPR_,	48		// 48+8*8: 64 bit FPR saves
 |// Unused		[sp, #44]	// 32 bit values
@@ -108,8 +107,8 @@
 |
 |.macro saveregs
 |  sub sp, sp, # CFRAME_SPACE
-|  stp fp, lr, SAVE_FP
-|  add fp, sp, #0
+|  stp fp, lr, [sp, # SAVE_FP_LR_]
+|  add fp, sp, # SAVE_FP_LR_
 |  stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
 |  save_ 21, 22, 8, 9
 |  save_ 23, 24, 10, 11
@@ -122,7 +121,7 @@
 |  rest_ 23, 24, 10, 11
 |  rest_ 25, 26, 12, 13
 |  rest_ 27, 28, 14, 15
-|  ldp fp, lr, SAVE_FP
+|  ldp fp, lr, [sp, # SAVE_FP_LR_]
 |  add sp, sp, # CFRAME_SPACE
 |.endmacro
 |
@@ -504,8 +503,9 @@ static void build_subroutines(BuildCtx *ctx)
   |    ldr GL, L->glref			// Setup pointer to global state.
   |     mov BASE, CARG2
   |   str CARG1, SAVE_PC		// Any value outside of bytecode is ok.
-  |  str RC, SAVE_CFRAME
-  |  str fp, L->cframe			// Add our C frame to cframe chain.
+  |  add TMP0, sp, #0
+  |   str RC, SAVE_CFRAME
+  |  str TMP0, L->cframe		// Add our C frame to cframe chain.
   |
   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
   |  str L, GL->cur_L
@@ -540,8 +540,9 @@ static void build_subroutines(BuildCtx *ctx)
   |   sub RA, RA, RB			// Compute -savestack(L, L->top).
   |   str RAw, SAVE_NRES		// Neg. delta means cframe w/o frame.
   |  str wzr, SAVE_ERRF			// No error function.
-  |  str RC, SAVE_CFRAME
-  |  str fp, L->cframe			// Add our C frame to cframe chain.
+  |  add TMP0, sp, #0
+  |   str RC, SAVE_CFRAME
+  |  str TMP0, L->cframe		// Add our C frame to cframe chain.
   |    str L, GL->cur_L
   |  blr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
   |  mov BASE, CRET1
@@ -2129,14 +2130,14 @@ static void build_subroutines(BuildCtx *ctx)
   |  .type CCSTATE, CCallState, x19
   |  stp x20, CCSTATE, [sp, #-32]!
   |  stp fp, lr, [sp, #16]
-  |  add fp, sp, #0
+  |  add fp, sp, #16
   |  mov CCSTATE, x0
   |  ldr TMP0w, CCSTATE:x0->spadj
   |   ldrb TMP1w, CCSTATE->nsp
   |    add TMP2, CCSTATE, #offsetof(CCallState, stack)
   |   subs TMP1, TMP1, #1
   |    ldr TMP3, CCSTATE->func
-  |  sub sp, fp, TMP0
+  |  sub sp, sp, TMP0
   |   bmi >2
   |1:  // Copy stack slots
   |  ldr TMP0, [TMP2, TMP1, lsl #3]
@@ -2154,7 +2155,7 @@ static void build_subroutines(BuildCtx *ctx)
   |   ldp d6, d7, CCSTATE->fpr[6]
   |  ldr x8, CCSTATE->retp
   |  blr TMP3
-  |  mov sp, fp
+  |  sub sp, fp, #16
   |  stp x0, x1, CCSTATE->gpr[0]
   |   stp d0, d1, CCSTATE->fpr[0]
   |   stp d2, d3, CCSTATE->fpr[2]
@@ -3879,7 +3880,7 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.uleb128 0x1\n"
 	"\t.sleb128 -8\n"
 	"\t.byte 30\n"				/* Return address is in lr. */
-	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	".LECIE0:\n\n");
     fprintf(ctx->fp,
@@ -3889,10 +3890,9 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long .Lframe0\n"
 	"\t.quad .Lbegin\n"
 	"\t.quad %d\n"
-	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
 	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
-	fcofs, CFRAME_SIZE);
+	fcofs);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
       fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
@@ -3909,12 +3909,10 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long .Lframe0\n"
 	"\t.quad lj_vm_ffi_call\n"
 	"\t.quad %d\n"
-	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
 	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
 	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
 	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
-	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
 	"\t.align 3\n"
 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
 #endif
@@ -3933,7 +3931,7 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
 	"\t.long lj_err_unwind_dwarf-.\n"
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	".LECIE1:\n\n");
     fprintf(ctx->fp,
@@ -3944,10 +3942,9 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long .Lbegin-.\n"
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
-	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
 	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
-	fcofs, CFRAME_SIZE);
+	fcofs);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
       fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
@@ -3969,7 +3966,7 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.byte 30\n"				/* Return address is in lr. */
 	"\t.uleb128 1\n"			/* augmentation length */
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	".LECIE2:\n\n");
     fprintf(ctx->fp,
@@ -3980,18 +3977,15 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long lj_vm_ffi_call-.\n"
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
-	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
 	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
 	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
 	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
-	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
 	"\t.align 3\n"
 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
 #endif
     break;
-    /* Disabled until someone finds a fix. See #698. */
-#if !LJ_NO_UNWIND && 0
+#if !LJ_NO_UNWIND
   case BUILD_machasm: {
 #if LJ_HASFFI
     int fcsize = 0;
@@ -4006,14 +4000,14 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long 0\n"
 	"\t.byte 0x1\n"
 	"\t.ascii \"zPR\\0\"\n"
-	"\t.byte 0x1\n"
-	"\t.byte 128-8\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
 	"\t.byte 30\n"				/* Return address is in lr. */
-	"\t.byte 6\n"				/* augmentation length */
+	"\t.uleb128 6\n"			/* augmentation length */
 	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
-	"\t.long _lj_err_unwind_dwarf@GOTPCREL\n"
+	"\t.long _lj_err_unwind_dwarf@GOT-.\n"
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	"LECIEX:\n\n");
     for (j = 0; j < ctx->nsym; j++) {
@@ -4024,7 +4018,6 @@ static void emit_asm_debug(BuildCtx *ctx)
       if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
 #endif
       fprintf(ctx->fp,
-	"%s.eh:\n"
 	"LSFDE%d:\n"
 	"\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
 	"\t.long L$set$%d\n"
@@ -4032,15 +4025,14 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long LASFDE%d-EH_frame1\n"
 	"\t.long %s-.\n"
 	"\t.long %d\n"
-	"\t.byte 0\n"				/* augmentation length */
-	"\t.byte 0xe\n\t.byte %d\n\t.byte 1\n"	/* def_cfa_offset */
-	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
-	"\t.byte 0x9d\n\t.byte 2\n",		/* offset fp */
-	name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE);
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
+	j, j, j, j, j, j, j, name, size);
       for (i = 19; i <= 28; i++)  /* offset x19-x28 */
-	fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19));
+	fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
       for (i = 8; i <= 15; i++)  /* offset d8-d15 */
-	fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n",
+	fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
 		64+i, i+(3+(28-19+1)-8));
       fprintf(ctx->fp,
 	"\t.align 3\n"
@@ -4056,16 +4048,15 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long 0\n"
 	"\t.byte 0x1\n"
 	"\t.ascii \"zR\\0\"\n"
-	"\t.byte 0x1\n"
-	"\t.byte 128-8\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
 	"\t.byte 30\n"				/* Return address is in lr. */
-	"\t.byte 1\n"				/* augmentation length */
+	"\t.uleb128 1\n"			/* augmentation length */
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	"LECIEY:\n\n");
       fprintf(ctx->fp,
-	"_lj_vm_ffi_call.eh:\n"
 	"LSFDEY:\n"
 	"\t.set L$set$yy,LEFDEY-LASFDEY\n"
 	"\t.long L$set$yy\n"
@@ -4073,13 +4064,11 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long LASFDEY-EH_frame2\n"
 	"\t.long _lj_vm_ffi_call-.\n"
 	"\t.long %d\n"
-	"\t.byte 0\n"				/* augmentation length */
-	"\t.byte 0xe\n\t.byte 32\n"		/* def_cfa_offset */
-	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
-	"\t.byte 0x9d\n\t.byte 2\n"		/* offset fp */
-	"\t.byte 0x93\n\t.byte 3\n"		/* offset x19 */
-	"\t.byte 0x94\n\t.byte 4\n"		/* offset x20 */
-	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
 	"\t.align 3\n"
 	"LEFDEY:\n\n", fcsize);
     }
diff --git a/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua
new file mode 100644
index 00000000..cdeea441
--- /dev/null
+++ b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua
@@ -0,0 +1,13 @@
+local tap = require('tap')
+
+-- Test file to check correctnes of external unwinding
+-- in LuaJIT.
+-- See also https://github.com/LuaJIT/LuaJIT/issues/698,
+-- https://github.com/LuaJIT/LuaJIT/pull/757.
+local test = tap.test('gh-6096-external-unwinding-on-arm64')
+test:plan(1)
+
+local res = pcall(require, 'not-existing-module')
+test:ok(res == false, 'successful unwinding in pcall')
+
+os.exit(test:check() and 0 or 1)
-- 
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms.
  2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
@ 2022-09-28  8:20   ` sergos via Tarantool-patches
  2022-10-03  7:36   ` Sergey Kaplun via Tarantool-patches
  1 sibling, 0 replies; 23+ messages in thread
From: sergos via Tarantool-patches @ 2022-09-28  8:20 UTC (permalink / raw)
  To: Maksim Kokryashkin; +Cc: tarantool-patches

Hi!

Thanks for the patch!

Just one nit in message.

Otherwise - LGTM.
Sergos


> On 26 Sep 2022, at 18:54, Maksim Kokryashkin <max.kokryashkin@gmail.com> wrote:
> 
> (cherry picked from commit e131936133c58de4426c595db2341caf5a1665b5)
> 
> This commit enables external unwiding on all platforms, that
                            ^^^^^^^^^
> create unwind tables by default. Corresponding check is added
> to the build routine.
> 
> Also, GC64 mode is now enabled on MacOS by default.
> 
> Maxim Kokryashkin:
> * added the description for the patch
> 
> Needed for tarantool/tarantool#6096
> Needed for tarantool/tarantool#7230
> ---
> CMakeLists.txt             |   3 +
> cmake/SetTargetFlags.cmake |  22 ++-
> doc/extensions.html        |  22 +--
> src/Makefile.original      |  11 +-
> src/lj_arch.h              |  27 ++--
> src/lj_err.c               | 321 +++++++++++++++++++------------------
> 6 files changed, 213 insertions(+), 193 deletions(-)
> 
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index 8b49f9d7..c4f3ef62 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -176,6 +176,9 @@ endif()
> 
> # Enable GC64 mode for x64.
> option(LUAJIT_ENABLE_GC64 "GC64 mode for x64" OFF)
> +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
> +  set(LUAJIT_ENABLE_GC64 ON)
> +endif()
> if(LUAJIT_ENABLE_GC64)
> AppendFlags(TARGET_C_FLAGS -DLUAJIT_ENABLE_GC64)
> endif()
> diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
> index b544d2ac..36896aff 100644
> --- a/cmake/SetTargetFlags.cmake
> +++ b/cmake/SetTargetFlags.cmake
> @@ -15,6 +15,22 @@ endif()
> LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
> LuaJITArch(LUAJIT_ARCH "${TESTARCH}")
> 
> +string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
> +if(UNWIND_POS EQUAL -1)
> +  execute_process(
> +    COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"
> +    WORKING_DIRECTORY ${LUAJIT_SOURCE_DIR}
> +    OUTPUT_VARIABLE TESTUNWIND
> +    RESULT_VARIABLE TESTUNWIND_RC
> +  )
> +  if(TESTUNWIND_RC EQUAL 0)
> +    string(FIND "${TESTUNWIND}" "E" UNW_TEST_POS)
> +    if(NOT UNW_TEST_POS EQUAL -1)
> +      AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
> +    endif()
> +  endif()
> +endif()
> +
> # Target-specific compiler options.
> #
> # x86/x64 only: For GCC 4.2 or higher and if you don't intend to
> @@ -25,12 +41,6 @@ if(LUAJIT_ARCH STREQUAL "x86")
> endif()
> 
> if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
> -  if(LUAJIT_ARCH STREQUAL "x64")
> -    # XXX: Set -pagezero_size to hint <mmap> when allocating 32
> -    # bit memory on OSX/x64, otherwise the lower 4GB are blocked.
> -    AppendFlags(TARGET_BIN_FLAGS -pagezero_size 10000 -image_base 100000000)
> -    AppendFlags(TARGET_SHARED_FLAGS -image_base 7fff04c4a000)
> -  endif()
> AppendFlags(TARGET_SHARED_FLAGS -single_module -undefined dynamic_lookup)
> else() # Linux and FreeBSD.
> AppendFlags(TARGET_BIN_FLAGS -Wl,-E)
> diff --git a/doc/extensions.html b/doc/extensions.html
> index e0f136e2..777e9928 100644
> --- a/doc/extensions.html
> +++ b/doc/extensions.html
> @@ -395,29 +395,19 @@ the toolchain used to compile LuaJIT:
> <td class="excinterop">Interoperability</td>
> </tr>
> <tr class="odd separate">
> -<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
> -<td class="exccompiler">GCC 4.3+, Clang</td>
> +<td class="excplatform">External frame unwinding</td>
> +<td class="exccompiler">GCC, Clang, MSVC</td>
> <td class="excinterop"><b style="color: #00a000;">Full</b></td>
> </tr>
> <tr class="even">
> -<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
> -<td class="exccompiler">GCC, Clang</td>
> -<td class="excinterop"><b style="color: #00a000;">Full</b></td>
> -</tr>
> -<tr class="odd">
> -<td class="excplatform">Other platforms, DWARF2 unwinding</td>
> +<td class="excplatform">Internal frame unwinding + DWARF2</td>
> <td class="exccompiler">GCC, Clang</td>
> <td class="excinterop"><b style="color: #c06000;">Limited</b></td>
> </tr>
> -<tr class="even">
> -<td class="excplatform">Windows/x64</td>
> -<td class="exccompiler">MSVC or WinSDK</td>
> -<td class="excinterop"><b style="color: #00a000;">Full</b></td>
> -</tr>
> <tr class="odd">
> -<td class="excplatform">Windows/x86</td>
> -<td class="exccompiler">Any</td>
> -<td class="excinterop"><b style="color: #00a000;">Full</b></td>
> +<td class="excplatform">Windows 64 bit</td>
> +<td class="exccompiler">non-MSVC</td>
> +<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
> </tr>
> <tr class="even">
> <td class="excplatform">Other platforms</td>
> diff --git a/src/Makefile.original b/src/Makefile.original
> index dd1c6a76..c9609700 100644
> --- a/src/Makefile.original
> +++ b/src/Makefile.original
> @@ -320,6 +320,13 @@ else
> ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
> TARGET_XCFLAGS+= -fno-stack-protector
> endif
> +ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
> +  # Find out whether the target toolchain always generates unwind tables.
> +  TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o)
> +  ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
> +    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
> +  endif
> +endif
> ifeq (Darwin,$(TARGET_SYS))
> ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
>  export MACOSX_DEPLOYMENT_TARGET=10.4
> @@ -328,10 +335,6 @@ ifeq (Darwin,$(TARGET_SYS))
> TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
> TARGET_DYNXLDOPTS=
> TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
> -  ifeq (x64,$(TARGET_LJARCH))
> -    TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000
> -    TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000
> -  endif
> else
> ifeq (iOS,$(TARGET_SYS))
> TARGET_STRIP+= -x
> diff --git a/src/lj_arch.h b/src/lj_arch.h
> index 40129d9e..730be5bf 100644
> --- a/src/lj_arch.h
> +++ b/src/lj_arch.h
> @@ -152,11 +152,6 @@
> #define LJ_ARCH_NAME		"x86"
> #define LJ_ARCH_BITS		32
> #define LJ_ARCH_ENDIAN		LUAJIT_LE
> -#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
> -#define LJ_ABI_WIN		1
> -#else
> -#define LJ_ABI_WIN		0
> -#endif
> #define LJ_TARGET_X86		1
> #define LJ_TARGET_X86ORX64	1
> #define LJ_TARGET_EHRETREG	0
> @@ -170,11 +165,6 @@
> #define LJ_ARCH_NAME		"x64"
> #define LJ_ARCH_BITS		64
> #define LJ_ARCH_ENDIAN		LUAJIT_LE
> -#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
> -#define LJ_ABI_WIN		1
> -#else
> -#define LJ_ABI_WIN		0
> -#endif
> #define LJ_TARGET_X64		1
> #define LJ_TARGET_X86ORX64	1
> #define LJ_TARGET_EHRETREG	0
> @@ -185,6 +175,8 @@
> #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
> #ifdef LUAJIT_ENABLE_GC64
> #define LJ_TARGET_GC64		1
> +#elif LJ_TARGET_OSX
> +#error "macOS requires GC64 -- don't disable it"
> #endif
> 
> #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
> @@ -566,15 +558,22 @@
> #define LJ_NO_SYSTEM		1
> #endif
> 
> -#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
> -/* NYI: no support for compact unwind specification, yet. */
> -#define LUAJIT_NO_UNWIND	1
> +#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
> +#define LJ_ABI_WIN		1
> +#else
> +#define LJ_ABI_WIN		0
> #endif
> 
> -#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
> +#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
> #define LJ_NO_UNWIND		1
> #endif
> 
> +#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__))))
> +#define LJ_UNWIND_EXT		1
> +#else
> +#define LJ_UNWIND_EXT		0
> +#endif
> +
> /* Compatibility with Lua 5.1 vs. 5.2. */
> #ifdef LUAJIT_ENABLE_LUA52COMPAT
> #define LJ_52			1
> diff --git a/src/lj_err.c b/src/lj_err.c
> index c310daf6..298e5434 100644
> --- a/src/lj_err.c
> +++ b/src/lj_err.c
> @@ -29,12 +29,18 @@
> ** Pros and Cons:
> **
> ** - EXT requires unwind tables for *all* functions on the C stack between
> -**   the pcall/catch and the error/throw. This is the default on x64,
> -**   but needs to be manually enabled on x86/PPC for non-C++ code.
> +**   the pcall/catch and the error/throw. C modules used by Lua code can
> +**   throw errors, so these need to have unwind tables, too. Transitively
> +**   this applies to all system libraries used by C modules -- at least
> +**   when they have callbacks which may throw an error.
> **
> -** - INT is faster when actually throwing errors (but this happens rarely).
> +** - INT is faster when actually throwing errors, but this happens rarely.
> **   Setting up error handlers is zero-cost in any case.
> **
> +** - INT needs to save *all* callee-saved registers when entering the
> +**   interpreter. EXT only needs to save those actually used inside the
> +**   interpreter. JIT-compiled code may need to save some more.
> +**
> ** - EXT provides full interoperability with C++ exceptions. You can throw
> **   Lua errors or C++ exceptions through a mix of Lua frames and C++ frames.
> **   C++ destructors are called as needed. C++ exceptions caught by pcall
> @@ -46,27 +52,33 @@
> **   the wrapper function feature. Lua errors thrown through C++ frames
> **   cannot be caught by C++ code and C++ destructors are not run.
> **
> -** EXT is the default on x64 systems and on Windows, INT is the default on all
> -** other systems.
> +** EXT is the default on all systems where the toolchain produces unwind
> +** tables by default (*). This is hard-coded and/or detected in src/Makefile.
> +** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL
> +**
> +** INT is the default on all other systems.
> +**
> +** EXT can be manually enabled for toolchains that are able to produce
> +** conforming unwind tables:
> +**   "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL"
> +** As explained above, *all* C code used directly or indirectly by LuaJIT
> +** must be compiled with -funwind-tables (or -fexceptions). C++ code must
> +** *not* be compiled with -fno-exceptions.
> +**
> +** If you're unsure whether error handling inside the VM works correctly,
> +** try running this and check whether it prints "OK":
> **
> -** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack
> -** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
> -** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set
> -** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules
> -** and all C libraries that have callbacks which may be used to call back
> -** into Lua. C++ code must *not* be compiled with -fno-exceptions.
> +**   luajit -e "print(select(2, load('OK')):match('OK'))"
> **
> -** EXT is mandatory on WIN64 since the calling convention has an abundance
> -** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
> -** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
> +** (*) Originally, toolchains only generated unwind tables for C++ code. For
> +** interoperability reasons, this can be manually enabled for plain C code,
> +** too (with -funwind-tables). With the introduction of the x64 architecture,
> +** the corresponding POSIX and Windows ABIs mandated unwind tables for all
> +** code. Over the following years most desktop and server platforms have
> +** enabled unwind tables by default on all architectures. OTOH mobile and
> +** embedded platforms do not consistently mandate unwind tables.
> */
> 
> -#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
> -#define LJ_UNWIND_EXT	1
> -#elif LJ_TARGET_WINDOWS
> -#define LJ_UNWIND_EXT	1
> -#endif
> -
> /* -- Error messages ------------------------------------------------------ */
> 
> /* Error message strings. */
> @@ -183,7 +195,125 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
> 
> /* -- External frame unwinding -------------------------------------------- */
> 
> -#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN
> +#if LJ_ABI_WIN
> +
> +/*
> +** Someone in Redmond owes me several days of my life. A lot of this is
> +** undocumented or just plain wrong on MSDN. Some of it can be gathered
> +** from 3rd party docs or must be found by trial-and-error. They really
> +** don't want you to write your own language-specific exception handler
> +** or to interact gracefully with MSVC. :-(
> +**
> +** Apparently MSVC doesn't call C++ destructors for foreign exceptions
> +** unless you compile your C++ code with /EHa. Unfortunately this means
> +** catch (...) also catches things like access violations. The use of
> +** _set_se_translator doesn't really help, because it requires /EHa, too.
> +*/
> +
> +#define WIN32_LEAN_AND_MEAN
> +#include <windows.h>
> +
> +#if LJ_TARGET_X86
> +typedef void *UndocumentedDispatcherContext;  /* Unused on x86. */
> +#else
> +/* Taken from: http://www.nynaeve.net/?p=99 */
> +typedef struct UndocumentedDispatcherContext {
> +  ULONG64 ControlPc;
> +  ULONG64 ImageBase;
> +  PRUNTIME_FUNCTION FunctionEntry;
> +  ULONG64 EstablisherFrame;
> +  ULONG64 TargetIp;
> +  PCONTEXT ContextRecord;
> +  void (*LanguageHandler)(void);
> +  PVOID HandlerData;
> +  PUNWIND_HISTORY_TABLE HistoryTable;
> +  ULONG ScopeIndex;
> +  ULONG Fill0;
> +} UndocumentedDispatcherContext;
> +#endif
> +
> +/* Another wild guess. */
> +extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
> +
> +#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
> +/* Workaround for broken MinGW64 declaration. */
> +VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
> +#define RtlUnwindEx RtlUnwindEx_FIXED
> +#endif
> +
> +#define LJ_MSVC_EXCODE		((DWORD)0xe06d7363)
> +#define LJ_GCC_EXCODE		((DWORD)0x20474343)
> +
> +#define LJ_EXCODE		((DWORD)0xe24c4a00)
> +#define LJ_EXCODE_MAKE(c)	(LJ_EXCODE | (DWORD)(c))
> +#define LJ_EXCODE_CHECK(cl)	(((cl) ^ LJ_EXCODE) <= 0xff)
> +#define LJ_EXCODE_ERRCODE(cl)	((int)((cl) & 0xff))
> +
> +/* Windows exception handler for interpreter frame. */
> +LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
> +  void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
> +{
> +#if LJ_TARGET_X86
> +  void *cf = (char *)f - CFRAME_OFS_SEH;
> +#else
> +  void *cf = f;
> +#endif
> +  lua_State *L = cframe_L(cf);
> +  int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
> +		LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
> +  if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
> +    /* Unwind internal frames. */
> +    err_unwind(L, cf, errcode);
> +  } else {
> +    void *cf2 = err_unwind(L, cf, 0);
> +    if (cf2) {  /* We catch it, so start unwinding the upper frames. */
> +      if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
> +	  rec->ExceptionCode == LJ_GCC_EXCODE) {
> +#if !LJ_TARGET_CYGWIN
> +	__DestructExceptionObject(rec, 1);
> +#endif
> +	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
> +      } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
> +	/* Don't catch access violations etc. */
> +	return 1;  /* ExceptionContinueSearch */
> +      }
> +#if LJ_TARGET_X86
> +      UNUSED(ctx);
> +      UNUSED(dispatch);
> +      /* Call all handlers for all lower C frames (including ourselves) again
> +      ** with EH_UNWINDING set. Then call the specified function, passing cf
> +      ** and errcode.
> +      */
> +      lj_vm_rtlunwind(cf, (void *)rec,
> +	(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
> +	(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
> +      /* lj_vm_rtlunwind does not return. */
> +#else
> +      /* Unwind the stack and call all handlers for all lower C frames
> +      ** (including ourselves) again with EH_UNWINDING set. Then set
> +      ** stack pointer = cf, result = errcode and jump to the specified target.
> +      */
> +      RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
> +			       lj_vm_unwind_ff_eh :
> +			       lj_vm_unwind_c_eh),
> +		  rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
> +      /* RtlUnwindEx should never return. */
> +#endif
> +    }
> +  }
> +  return 1;  /* ExceptionContinueSearch */
> +}
> +
> +/* Raise Windows exception. */
> +static void err_raise_ext(global_State *g, int errcode)
> +{
> +#if LJ_HASJIT
> +  setmref(g->jit_base, NULL);
> +#endif
> +  RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
> +}
> +
> +#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__))
> 
> /*
> ** We have to use our own definitions instead of the mandatory (!) unwind.h,
> @@ -232,7 +362,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
> lua_State *L;
> if (version != 1)
>  return _URC_FATAL_PHASE1_ERROR;
> -  UNUSED(uexclass);
> cf = (void *)_Unwind_GetCFA(ctx);
> L = cframe_L(cf);
> if ((actions & _UA_SEARCH_PHASE)) {
> @@ -280,6 +409,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
>  ** it on non-x64 because the interpreter restores all callee-saved regs.
>  */
>  lj_err_throw(L, errcode);
> +#if LJ_TARGET_X64
> +#error "Broken build system -- only use the provided Makefiles!"
> +#endif
> #endif
> }
> return _URC_CONTINUE_UNWIND;
> @@ -292,14 +424,6 @@ static _Unwind_Exception static_uex;
> #else
> static __thread _Unwind_Exception static_uex;
> #endif
> -
> -/* Raise DWARF2 exception. */
> -static void err_raise_ext(int errcode)
> -{
> -  static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
> -  static_uex.excleanup = NULL;
> -  _Unwind_RaiseException(&static_uex);
> -}
> #endif
> 
> #else /* LJ_TARGET_ARM */
> @@ -373,132 +497,22 @@ LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb,
> 
> #if LJ_UNWIND_EXT
> static __thread _Unwind_Control_Block static_uex;
> +#endif
> +#endif /* LJ_TARGET_ARM */
> 
> -static void err_raise_ext(int errcode)
> +#if LJ_UNWIND_EXT
> +/* Raise external exception. */
> +static void err_raise_ext(global_State *g, int errcode)
> {
> +#if LJ_HASJIT
> +  setmref(g->jit_base, NULL);
> +#endif
> memset(&static_uex, 0, sizeof(static_uex));
> static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
> _Unwind_RaiseException(&static_uex);
> }
> #endif
> 
> -#endif /* LJ_TARGET_ARM */
> -
> -#elif LJ_ABI_WIN
> -
> -/*
> -** Someone in Redmond owes me several days of my life. A lot of this is
> -** undocumented or just plain wrong on MSDN. Some of it can be gathered
> -** from 3rd party docs or must be found by trial-and-error. They really
> -** don't want you to write your own language-specific exception handler
> -** or to interact gracefully with MSVC. :-(
> -**
> -** Apparently MSVC doesn't call C++ destructors for foreign exceptions
> -** unless you compile your C++ code with /EHa. Unfortunately this means
> -** catch (...) also catches things like access violations. The use of
> -** _set_se_translator doesn't really help, because it requires /EHa, too.
> -*/
> -
> -#define WIN32_LEAN_AND_MEAN
> -#include <windows.h>
> -
> -#if LJ_TARGET_X64
> -/* Taken from: http://www.nynaeve.net/?p=99 */
> -typedef struct UndocumentedDispatcherContext {
> -  ULONG64 ControlPc;
> -  ULONG64 ImageBase;
> -  PRUNTIME_FUNCTION FunctionEntry;
> -  ULONG64 EstablisherFrame;
> -  ULONG64 TargetIp;
> -  PCONTEXT ContextRecord;
> -  void (*LanguageHandler)(void);
> -  PVOID HandlerData;
> -  PUNWIND_HISTORY_TABLE HistoryTable;
> -  ULONG ScopeIndex;
> -  ULONG Fill0;
> -} UndocumentedDispatcherContext;
> -#else
> -typedef void *UndocumentedDispatcherContext;
> -#endif
> -
> -/* Another wild guess. */
> -extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
> -
> -#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
> -/* Workaround for broken MinGW64 declaration. */
> -VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
> -#define RtlUnwindEx RtlUnwindEx_FIXED
> -#endif
> -
> -#define LJ_MSVC_EXCODE		((DWORD)0xe06d7363)
> -#define LJ_GCC_EXCODE		((DWORD)0x20474343)
> -
> -#define LJ_EXCODE		((DWORD)0xe24c4a00)
> -#define LJ_EXCODE_MAKE(c)	(LJ_EXCODE | (DWORD)(c))
> -#define LJ_EXCODE_CHECK(cl)	(((cl) ^ LJ_EXCODE) <= 0xff)
> -#define LJ_EXCODE_ERRCODE(cl)	((int)((cl) & 0xff))
> -
> -/* Windows exception handler for interpreter frame. */
> -LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
> -  void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
> -{
> -#if LJ_TARGET_X64
> -  void *cf = f;
> -#else
> -  void *cf = (char *)f - CFRAME_OFS_SEH;
> -#endif
> -  lua_State *L = cframe_L(cf);
> -  int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
> -		LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
> -  if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
> -    /* Unwind internal frames. */
> -    err_unwind(L, cf, errcode);
> -  } else {
> -    void *cf2 = err_unwind(L, cf, 0);
> -    if (cf2) {  /* We catch it, so start unwinding the upper frames. */
> -      if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
> -	  rec->ExceptionCode == LJ_GCC_EXCODE) {
> -#if LJ_TARGET_WINDOWS
> -	__DestructExceptionObject(rec, 1);
> -#endif
> -	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
> -      } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
> -	/* Don't catch access violations etc. */
> -	return 1;  /* ExceptionContinueSearch */
> -      }
> -#if LJ_TARGET_X64
> -      /* Unwind the stack and call all handlers for all lower C frames
> -      ** (including ourselves) again with EH_UNWINDING set. Then set
> -      ** rsp = cf, rax = errcode and jump to the specified target.
> -      */
> -      RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
> -			       lj_vm_unwind_ff_eh :
> -			       lj_vm_unwind_c_eh),
> -		  rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
> -      /* RtlUnwindEx should never return. */
> -#else
> -      UNUSED(ctx);
> -      UNUSED(dispatch);
> -      /* Call all handlers for all lower C frames (including ourselves) again
> -      ** with EH_UNWINDING set. Then call the specified function, passing cf
> -      ** and errcode.
> -      */
> -      lj_vm_rtlunwind(cf, (void *)rec,
> -	(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
> -	(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
> -      /* lj_vm_rtlunwind does not return. */
> -#endif
> -    }
> -  }
> -  return 1;  /* ExceptionContinueSearch */
> -}
> -
> -/* Raise Windows exception. */
> -static void err_raise_ext(int errcode)
> -{
> -  RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
> -}
> -
> #endif
> 
> /* -- Error handling ------------------------------------------------------ */
> @@ -508,22 +522,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
> {
> global_State *g = G(L);
> lj_trace_abort(g);
> -  setmref(g->jit_base, NULL);
> L->status = LUA_OK;
> #if LJ_UNWIND_EXT
> -  err_raise_ext(errcode);
> +  err_raise_ext(g, errcode);
> /*
> ** A return from this function signals a corrupt C stack that cannot be
> ** unwound. We have no choice but to call the panic function and exit.
> **
> ** Usually this is caused by a C function without unwind information.
> -  ** This should never happen on x64, but may happen if you've manually
> -  ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every*
> -  ** non-C++ file with -funwind-tables.
> +  ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL
> +  ** and forgot to recompile *every* non-C++ file with -funwind-tables.
> */
> if (G(L)->panic)
>  G(L)->panic(L);
> #else
> +#if LJ_HASJIT
> +  setmref(g->jit_base, NULL);
> +#endif
> {
>  void *cf = err_unwind(L, NULL, errcode);
>  if (cframe_unwind_ff(cf))
> -- 
> 2.32.1 (Apple Git-133)
> 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms.
  2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
  2022-09-28  8:20   ` sergos via Tarantool-patches
@ 2022-10-03  7:36   ` Sergey Kaplun via Tarantool-patches
  1 sibling, 0 replies; 23+ messages in thread
From: Sergey Kaplun via Tarantool-patches @ 2022-10-03  7:36 UTC (permalink / raw)
  To: Maksim Kokryashkin; +Cc: tarantool-patches


Hi, Maksim!

Thanks for the patch!
LGTM, just a few nits and several small side notes below.

On 26.09.22, Maksim Kokryashkin wrote:
> (cherry picked from commit e131936133c58de4426c595db2341caf5a1665b5)
> 
> This commit enables external unwiding on all platforms, that

Typo: s/unwiding/unwinding/

> create unwind tables by default. Corresponding check is added
> to the build routine.
> 
> Also, GC64 mode is now enabled on MacOS by default.

Maybe we should notice, that non-GC64 mode is now forbidden for MacOs?

> 
> Maxim Kokryashkin:
> * added the description for the patch
> 
> Needed for tarantool/tarantool#6096
> Needed for tarantool/tarantool#7230
> ---
>  CMakeLists.txt             |   3 +
>  cmake/SetTargetFlags.cmake |  22 ++-
>  doc/extensions.html        |  22 +--
>  src/Makefile.original      |  11 +-
>  src/lj_arch.h              |  27 ++--
>  src/lj_err.c               | 321 +++++++++++++++++++------------------
>  6 files changed, 213 insertions(+), 193 deletions(-)
> 
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index 8b49f9d7..c4f3ef62 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -176,6 +176,9 @@ endif()
>  
>  # Enable GC64 mode for x64.
>  option(LUAJIT_ENABLE_GC64 "GC64 mode for x64" OFF)
> +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
> +  set(LUAJIT_ENABLE_GC64 ON)
> +endif()

Side note: So, we shouldn't forget to remove it additional set in the
Tarantool build system.

Also, I see no fortified enabled in the LuaJIT original commit. Is this
because this commit is later than enabling GC64 mode by default?

>  if(LUAJIT_ENABLE_GC64)
>    AppendFlags(TARGET_C_FLAGS -DLUAJIT_ENABLE_GC64)
>  endif()
> diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
> index b544d2ac..36896aff 100644
> --- a/cmake/SetTargetFlags.cmake
> +++ b/cmake/SetTargetFlags.cmake
> @@ -15,6 +15,22 @@ endif()
>  LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
>  LuaJITArch(LUAJIT_ARCH "${TESTARCH}")
>  
> +string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
> +if(UNWIND_POS EQUAL -1)
> +  execute_process(
> +    COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"

Side note: I'm OK with the same check as Mike did.
Does `check_c_compiler_flag(-funwind-tables TESTUNWIND)` check the same
thing, but in a more readable way?

> +    WORKING_DIRECTORY ${LUAJIT_SOURCE_DIR}
> +    OUTPUT_VARIABLE TESTUNWIND
> +    RESULT_VARIABLE TESTUNWIND_RC
> +  )
> +  if(TESTUNWIND_RC EQUAL 0)
> +    string(FIND "${TESTUNWIND}" "E" UNW_TEST_POS)
> +    if(NOT UNW_TEST_POS EQUAL -1)
> +      AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
> +    endif()
> +  endif()
> +endif()
> +
>  # Target-specific compiler options.
>  #
>  # x86/x64 only: For GCC 4.2 or higher and if you don't intend to
> @@ -25,12 +41,6 @@ if(LUAJIT_ARCH STREQUAL "x86")
>  endif()
>  

<snipped>

> diff --git a/doc/extensions.html b/doc/extensions.html
> index e0f136e2..777e9928 100644
> --- a/doc/extensions.html
> +++ b/doc/extensions.html

<snipped>

> diff --git a/src/Makefile.original b/src/Makefile.original
> index dd1c6a76..c9609700 100644
> --- a/src/Makefile.original
> +++ b/src/Makefile.original

<snipped>

> diff --git a/src/lj_arch.h b/src/lj_arch.h
> index 40129d9e..730be5bf 100644
> --- a/src/lj_arch.h
> +++ b/src/lj_arch.h

<snipped>

> diff --git a/src/lj_err.c b/src/lj_err.c
> index c310daf6..298e5434 100644
> --- a/src/lj_err.c
> +++ b/src/lj_err.c

<snipped>

> 

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit 2/6] OSX: Fix build by hardcoding external frame unwinding.
  2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches
@ 2022-10-03 10:54   ` Sergey Kaplun via Tarantool-patches
  2022-10-03 15:58   ` sergos via Tarantool-patches
  1 sibling, 0 replies; 23+ messages in thread
From: Sergey Kaplun via Tarantool-patches @ 2022-10-03 10:54 UTC (permalink / raw)
  To: Maksim Kokryashkin; +Cc: tarantool-patches

Hi, Maksim!

Thanks for the patch!
LGTM with bunch of comments below.

On 26.09.22, Maksim Kokryashkin wrote:
> Apparently they can't even get 'grep' right, let alone a keyboard.
> 
> (cherry picked from commit d4a554d6ee1507f7313641b26ed09bf1b518fa1f)
> 
> MacOS uses BSD grep, which is slightly different from GNU grep.
> Because of that, the shell script determining whether external
> unwinding is possible doesn't work right.

Nit: It detects, that unwinding is impossible, when it is possible
actually. So we can mention here, that scripts yields false negative
result for MacOS.

> 
> External unwinding is possible on MacOS, so this patch enables

I suggest to write here "always possible" to clarify the sentence.

> it by default.
> 
> Maxim Kokryashkin:
> * added the description for the problem
> 
> Needed for tarantool/tarantool#6096
> Needed for tarantool/tarantool#7230

I suggest to replace the line above with:

| Part of tarantool/tarantool#7230

(Forgot to mention this in the previous patch, so it should be done for
the previous commit too.)

> ---
>  .github/workflows/macos-x86_64.yml | 20 +-------------------

Looks like this fixes should be done in the previous commit.

>  cmake/SetTargetFlags.cmake         | 28 ++++++++++++++++------------
>  src/Makefile.original              | 15 ++++++++-------
>  3 files changed, 25 insertions(+), 38 deletions(-)
> 
> diff --git a/.github/workflows/macos-x86_64.yml b/.github/workflows/macos-x86_64.yml
> index 840806e3..dafd1796 100644
> --- a/.github/workflows/macos-x86_64.yml
> +++ b/.github/workflows/macos-x86_64.yml

<snipped>

> diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
> index 36896aff..a5a3407f 100644
> --- a/cmake/SetTargetFlags.cmake
> +++ b/cmake/SetTargetFlags.cmake

<snipped>

> diff --git a/src/Makefile.original b/src/Makefile.original
> index c9609700..d1373b40 100644
> --- a/src/Makefile.original
> +++ b/src/Makefile.original

<snipped>

> -- 
> 2.32.1 (Apple Git-133)
> 

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit 3/6] OSX/ARM64: Disable external unwinding for now.
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches
@ 2022-10-03 11:08   ` Sergey Kaplun via Tarantool-patches
  2022-10-04  8:26   ` sergos via Tarantool-patches
  1 sibling, 0 replies; 23+ messages in thread
From: Sergey Kaplun via Tarantool-patches @ 2022-10-03 11:08 UTC (permalink / raw)
  To: Maksim Kokryashkin; +Cc: tarantool-patches

Hi, Maksim!

Thanks for the patch!
LGTM, with the several nits below.

On 26.09.22, Maksim Kokryashkin wrote:
> From: Mike Pall <mike>
> 
> This reduces functionality, e.g. no handling of on-trace errors.
> Someone with very deep knowledge about macOS and MACH-O/DWARF stack
> unwinding internals is needed to fix this. See issue LuaJIT/LuaJIT#698.

Side note: I'm not sure that we should mention the LuaJIT issues in our
commit message -- every time you repush your branch, it adds another
mention in upstream (you can check :)). That's definitely not what Mike
wants :).

Perhaps we should just mention the issue by number without a direct link
to it.

> 
> (cherry picked from commit 27ee3bcd79b12a0c71f00427ee1a2e486c684486)
> 
> ARM64 interpreter doesn't have unwind info for Mach-O assembler
> format, which makes unwinding impossible.
> 
> For more info, see https://github.com/LuaJIT/LuaJIT/issues/698

Ditto.
Nit: missed dot in the end of the sentence.

> 
> Maxim Kokryashkin:
> * added the description for the problem
> 
> Needed for tarantool/tarantool#6096
> Needed for tarantool/tarantool#7230

Minor: s/Needed for/Part of/

> ---
>  cmake/SetTargetFlags.cmake | 4 +++-
>  src/Makefile.original      | 5 ++++-
>  2 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
> index a5a3407f..d99e1f9a 100644
> --- a/cmake/SetTargetFlags.cmake
> +++ b/cmake/SetTargetFlags.cmake

<snipped>

> diff --git a/src/Makefile.original b/src/Makefile.original
> index d1373b40..5826a56a 100644
> --- a/src/Makefile.original
> +++ b/src/Makefile.original

<snipped>

> -- 
> 2.32.1 (Apple Git-133)
> 

-- 
Best regards,
Sergey Kaplun

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit 2/6] OSX: Fix build by hardcoding external frame unwinding.
  2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches
  2022-10-03 10:54   ` Sergey Kaplun via Tarantool-patches
@ 2022-10-03 15:58   ` sergos via Tarantool-patches
  1 sibling, 0 replies; 23+ messages in thread
From: sergos via Tarantool-patches @ 2022-10-03 15:58 UTC (permalink / raw)
  To: Maksim Kokryashkin; +Cc: tarantool-patches

Hi!

Thanks for the patch!

I have two questions below and also there’s no mentioning in the message that
you removes the non-GC64 workflow for the Darwin platform. Why is it done 
at all? Previous commit just mentioned that GC64 is enabled by default.


Sergos


> On 26 Sep 2022, at 18:54, Maksim Kokryashkin <max.kokryashkin@gmail.com> wrote:
> 
> Apparently they can't even get 'grep' right, let alone a keyboard.
> 
> (cherry picked from commit d4a554d6ee1507f7313641b26ed09bf1b518fa1f)
> 
> MacOS uses BSD grep, which is slightly different from GNU grep.
> Because of that, the shell script determining whether external
> unwinding is possible doesn't work right.
> 
> External unwinding is possible on MacOS, so this patch enables
> it by default.
> 
> Maxim Kokryashkin:
> * added the description for the problem
> 
> Needed for tarantool/tarantool#6096
> Needed for tarantool/tarantool#7230
> ---
> .github/workflows/macos-x86_64.yml | 20 +-------------------
> cmake/SetTargetFlags.cmake         | 28 ++++++++++++++++------------
> src/Makefile.original              | 15 ++++++++-------
> 3 files changed, 25 insertions(+), 38 deletions(-)
> 
> diff --git a/.github/workflows/macos-x86_64.yml b/.github/workflows/macos-x86_64.yml
> index 840806e3..dafd1796 100644
> --- a/.github/workflows/macos-x86_64.yml
> +++ b/.github/workflows/macos-x86_64.yml
> @@ -35,7 +35,7 @@ jobs:
>       fail-fast: false
>       matrix:
>         BUILDTYPE: [Debug, Release]
> -        GC64: [ON, OFF]
> +        GC64: ON

1. how is it relvant to grep??

>         include:
>           - BUILDTYPE: Debug
>             CMAKEFLAGS: -DCMAKE_BUILD_TYPE=Debug -DLUA_USE_ASSERT=ON -DLUA_USE_APICHECK=ON
> @@ -69,15 +69,6 @@ jobs:
>       - name: test
>         run: cmake --build . --parallel --target test
> 
> -  test-tarantool-debug-wo-GC64:
> -    name: Tarantool Debug GC64:OFF
> -    needs: test-luajit
> -    uses: tarantool/tarantool/.github/workflows/luajit-integration.yml@master
> -    with:
> -      GC64: OFF
> -      buildtype: Debug
> -      host: macos-11
> -      revision: ${{ github.sha }}

(1)

>   test-tarantool-debug-w-GC64:
>     name: Tarantool Debug GC64:ON
>     needs: test-luajit
> @@ -87,15 +78,6 @@ jobs:
>       buildtype: Debug
>       host: macos-11
>       revision: ${{ github.sha }}
> -  test-tarantool-release-wo-GC64:
> -    name: Tarantool Release GC64:OFF
> -    needs: test-luajit
> -    uses: tarantool/tarantool/.github/workflows/luajit-integration.yml@master
> -    with:
> -      GC64: OFF
> -      buildtype: RelWithDebInfo
> -      host: macos-11
> -      revision: ${{ github.sha }}
(1)

>   test-tarantool-release-w-GC64:
>     name: Tarantool Release GC64:ON
>     needs: test-luajit
> diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
> index 36896aff..a5a3407f 100644
> --- a/cmake/SetTargetFlags.cmake
> +++ b/cmake/SetTargetFlags.cmake
> @@ -15,18 +15,22 @@ endif()
> LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
> LuaJITArch(LUAJIT_ARCH "${TESTARCH}")
> 
> -string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
> -if(UNWIND_POS EQUAL -1)
> -  execute_process(
> -    COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"

2. I don’t see any changes to the grep operation here and below - does the cherry-pick message is relevant at all?

> -    WORKING_DIRECTORY ${LUAJIT_SOURCE_DIR}
> -    OUTPUT_VARIABLE TESTUNWIND
> -    RESULT_VARIABLE TESTUNWIND_RC
> -  )
> -  if(TESTUNWIND_RC EQUAL 0)
> -    string(FIND "${TESTUNWIND}" "E" UNW_TEST_POS)
> -    if(NOT UNW_TEST_POS EQUAL -1)
> -      AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
> +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
> +  AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
> +else()
> +  string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
> +  if(UNWIND_POS EQUAL -1)
> +    execute_process(
> +      COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"
> +      WORKING_DIRECTORY ${LUAJIT_SOURCE_DIR}
> +      OUTPUT_VARIABLE TESTUNWIND
> +      RESULT_VARIABLE TESTUNWIND_RC
> +    )
> +    if(TESTUNWIND_RC EQUAL 0)
> +      string(FIND "${TESTUNWIND}" "E" UNW_TEST_POS)
> +      if(NOT UNW_TEST_POS EQUAL -1)
> +        AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
> +      endif()
>     endif()
>   endif()
> endif()
> diff --git a/src/Makefile.original b/src/Makefile.original
> index c9609700..d1373b40 100644
> --- a/src/Makefile.original
> +++ b/src/Makefile.original
> @@ -320,18 +320,12 @@ else
> ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
>   TARGET_XCFLAGS+= -fno-stack-protector
> endif
> -ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
> -  # Find out whether the target toolchain always generates unwind tables.
> -  TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o)
> -  ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
> -    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
> -  endif
> -endif
> ifeq (Darwin,$(TARGET_SYS))
>   ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
>     export MACOSX_DEPLOYMENT_TARGET=10.4
>   endif
>   TARGET_STRIP+= -x
> +  TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
>   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
>   TARGET_DYNXLDOPTS=
>   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
> @@ -345,6 +339,13 @@ ifeq (iOS,$(TARGET_SYS))
>     TARGET_XCFLAGS+= -fno-omit-frame-pointer
>   endif
> else
> +  ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
> +    # Find out whether the target toolchain always generates unwind tables.
> +    TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o)
> +    ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
> +      TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
> +    endif
> +  endif
>   ifneq (SunOS,$(TARGET_SYS))
>     ifneq (PS3,$(TARGET_SYS))
>       TARGET_XLDFLAGS+= -Wl,-E
> -- 
> 2.32.1 (Apple Git-133)
> 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit 3/6] OSX/ARM64: Disable external unwinding for now.
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches
  2022-10-03 11:08   ` Sergey Kaplun via Tarantool-patches
@ 2022-10-04  8:26   ` sergos via Tarantool-patches
  1 sibling, 0 replies; 23+ messages in thread
From: sergos via Tarantool-patches @ 2022-10-04  8:26 UTC (permalink / raw)
  To: Maksim Kokryashkin; +Cc: tarantool-patches

Hi!

Thanks for the patch!

I would rephrase the original message, so that it’ll be clear that changes
are arm64-related only. 


Otherwise LGTM.

Sergos

> On 26 Sep 2022, at 18:55, Maksim Kokryashkin <max.kokryashkin@gmail.com> wrote:
> 
> From: Mike Pall <mike>
> 
> This reduces functionality, e.g. no handling of on-trace errors.
> Someone with very deep knowledge about macOS and MACH-O/DWARF stack
> unwinding internals is needed to fix this. See issue LuaJIT/LuaJIT#698.
> 
> (cherry picked from commit 27ee3bcd79b12a0c71f00427ee1a2e486c684486)
> 
> ARM64 interpreter doesn't have unwind info for Mach-O assembler
> format, which makes unwinding impossible.
> 
> For more info, see https://github.com/LuaJIT/LuaJIT/issues/698
> 
> Maxim Kokryashkin:
> * added the description for the problem
> 
> Needed for tarantool/tarantool#6096
> Needed for tarantool/tarantool#7230
> ---
> cmake/SetTargetFlags.cmake | 4 +++-
> src/Makefile.original      | 5 ++++-
> 2 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
> index a5a3407f..d99e1f9a 100644
> --- a/cmake/SetTargetFlags.cmake
> +++ b/cmake/SetTargetFlags.cmake
> @@ -16,7 +16,9 @@ LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
> LuaJITArch(LUAJIT_ARCH "${TESTARCH}")
> 
> if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
> -  AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
> +  if(NOT LUAJIT_ARCH STREQUAL "arm64")
> +    AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
> +  endif()
> else()
>   string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
>   if(UNWIND_POS EQUAL -1)
> diff --git a/src/Makefile.original b/src/Makefile.original
> index d1373b40..5826a56a 100644
> --- a/src/Makefile.original
> +++ b/src/Makefile.original
> @@ -325,7 +325,10 @@ ifeq (Darwin,$(TARGET_SYS))
>     export MACOSX_DEPLOYMENT_TARGET=10.4
>   endif
>   TARGET_STRIP+= -x
> -  TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
> +  # Ext. unwinding is broken on OSX/ARM64 until someone finds a fix. See #698.
> +  ifneq (arm64,$(TARGET_LJARCH))
> +    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
> +  endif
>   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
>   TARGET_DYNXLDOPTS=
>   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
> -- 
> 2.32.1 (Apple Git-133)
> 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding.
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding Maksim Kokryashkin via Tarantool-patches
@ 2022-10-04 15:48   ` sergos via Tarantool-patches
  0 siblings, 0 replies; 23+ messages in thread
From: sergos via Tarantool-patches @ 2022-10-04 15:48 UTC (permalink / raw)
  To: Maksim Kokryashkin; +Cc: tarantool-patches

Hi!

Thanks for the patch!

I believe there should be a revamp of the message as per SergeyK comments to the 
previous patch - no direct links to LuaJIT repo. And frankly, the magic behind the
reordering is not clear. Can you elaborate on the matter a little?

Regards,
Sergos


> On 26 Sep 2022, at 18:55, Maksim Kokryashkin <max.kokryashkin@gmail.com> wrote:
> 
> From: Mike Pall <mike>
> 
> Reported by Yichun Zhang. Fixes LuaJIT/LuaJIT#722.
> May help towards fixing LuaJIT/LuaJIT#698, too.
> 
> (cherry picked from commit 421c4c798791d27b7f967df39891c4e4fa1d107c)
> 
> The `_Unwind_Find_FDE` fails to find FDE (frame descriptor element)
> for `lj_vm_ffi_call` in DWARF unwind info, despite the presence
> of its data in `.debug_frame` section.
> 
> Reordering of interpreter stack frame fixes the issue.
> 
> For more info, see https://github.com/LuaJIT/LuaJIT/issues/722
> 
> Maxim Kokryashkin:
> * added the description for the problem
> 
> Needed for tarantool/tarantool#6096
> Needed for tarantool/tarantool#7230
> ---
> src/lj_frame.h    |  12 +--
> src/vm_arm64.dasc | 189 +++++++++++++++++++++++++++++++++++-----------
> 2 files changed, 152 insertions(+), 49 deletions(-)
> 
> diff --git a/src/lj_frame.h b/src/lj_frame.h
> index 9fd63fa2..1e4adaa3 100644
> --- a/src/lj_frame.h
> +++ b/src/lj_frame.h
> @@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
> #endif
> #define CFRAME_SHIFT_MULTRES	3
> #elif LJ_TARGET_ARM64
> -#define CFRAME_OFS_ERRF		196
> -#define CFRAME_OFS_NRES		200
> -#define CFRAME_OFS_PREV		160
> -#define CFRAME_OFS_L		176
> -#define CFRAME_OFS_PC		168
> -#define CFRAME_OFS_MULTRES	192
> +#define CFRAME_OFS_ERRF		36
> +#define CFRAME_OFS_NRES		40
> +#define CFRAME_OFS_PREV		0
> +#define CFRAME_OFS_L		16
> +#define CFRAME_OFS_PC		8
> +#define CFRAME_OFS_MULTRES	32
> #define CFRAME_SIZE		208
> #define CFRAME_SHIFT_MULTRES	3
> #elif LJ_TARGET_PPC
> diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
> index 313cc94f..ad57bca3 100644
> --- a/src/vm_arm64.dasc
> +++ b/src/vm_arm64.dasc
> @@ -81,47 +81,49 @@
> |
> |.define CFRAME_SPACE,	208
> |//----- 16 byte aligned, <-- sp entering interpreter
> -|// Unused		[sp, #204]	// 32 bit values
> -|.define SAVE_NRES,	[sp, #200]
> -|.define SAVE_ERRF,	[sp, #196]
> -|.define SAVE_MULTRES,	[sp, #192]
> -|.define TMPD,		[sp, #184]	// 64 bit values
> -|.define SAVE_L,	[sp, #176]
> -|.define SAVE_PC,	[sp, #168]
> -|.define SAVE_CFRAME,	[sp, #160]
> -|.define SAVE_FPR_,	96		// 96+8*8: 64 bit FPR saves

> -|.define SAVE_GPR_,	16		// 16+10*8: 64 bit GPR saves
> -|.define SAVE_LR,	[sp, #8]
> -|.define SAVE_FP,	[sp]
> +|.define SAVE_LR,	[sp, #200]
> +|.define SAVE_FP,	[sp, #192]
> +|.define SAVE_GPR_,	112		// 112+10*8: 64 bit GPR saves

(Not for change, just random thoughts)
Why it is not  .define SAVE_GPR_, SAVE_FPR_+8*8

> +|.define SAVE_FPR_,	48		// 48+8*8: 64 bit FPR saves
> +|// Unused		[sp, #44]	// 32 bit values
> +|.define SAVE_NRES,	[sp, #40]
> +|.define SAVE_ERRF,	[sp, #36]
> +|.define SAVE_MULTRES,	[sp, #32]
> +|.define TMPD,		[sp, #24]	// 64 bit values
> +|.define SAVE_L,	[sp, #16]
> +|.define SAVE_PC,	[sp, #8]
> +|.define SAVE_CFRAME,	[sp, #0]
> |//----- 16 byte aligned, <-- sp while in interpreter.
> |
> -|.define TMPDofs,	#184
> +|.define TMPDofs,	#24
> |
> |.macro save_, gpr1, gpr2, fpr1, fpr2
> -|  stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
> -|  stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
> +|  stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
> +|  stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]

(Not for change, just random thoughts)
Should those 14 and 27 magic numbers be a define also, describing the idea
behind them?

> |.endmacro
> |.macro rest_, gpr1, gpr2, fpr1, fpr2
> -|  ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
> -|  ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
> +|  ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
> +|  ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
> |.endmacro
> |
> |.macro saveregs
> -|  stp fp, lr, [sp, #-CFRAME_SPACE]!
> +|  sub sp, sp, # CFRAME_SPACE
> +|  stp fp, lr, SAVE_FP
> |  add fp, sp, #0
> -|  stp x19, x20, [sp, # SAVE_GPR_]
> +|  stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
> |  save_ 21, 22, 8, 9
> |  save_ 23, 24, 10, 11
> |  save_ 25, 26, 12, 13
> |  save_ 27, 28, 14, 15
> |.endmacro
> |.macro restoreregs
> -|  ldp x19, x20, [sp, # SAVE_GPR_]
> +|  ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
> |  rest_ 21, 22, 8, 9
> |  rest_ 23, 24, 10, 11
> |  rest_ 25, 26, 12, 13
> |  rest_ 27, 28, 14, 15
> -|  ldp fp, lr, [sp], # CFRAME_SPACE
> +|  ldp fp, lr, SAVE_FP
> +|  add sp, sp, # CFRAME_SPACE
> |.endmacro
> |
> |// Type definitions. Some of these are only used for documentation.
> @@ -2125,9 +2127,9 @@ static void build_subroutines(BuildCtx *ctx)
>   |  // Caveat: needs special frame unwinding, see below.
>   |.if FFI
>   |  .type CCSTATE, CCallState, x19
> -  |  stp fp, lr, [sp, #-32]!
> +  |  stp x20, CCSTATE, [sp, #-32]!
> +  |  stp fp, lr, [sp, #16]
>   |  add fp, sp, #0
> -  |  str CCSTATE, [sp, #16]
>   |  mov CCSTATE, x0
>   |  ldr TMP0w, CCSTATE:x0->spadj
>   |   ldrb TMP1w, CCSTATE->nsp
> @@ -2156,8 +2158,8 @@ static void build_subroutines(BuildCtx *ctx)
>   |  stp x0, x1, CCSTATE->gpr[0]
>   |   stp d0, d1, CCSTATE->fpr[0]
>   |   stp d2, d3, CCSTATE->fpr[2]
> -  |  ldr CCSTATE, [sp, #16]
> -  |  ldp fp, lr, [sp], #32
> +  |  ldp fp, lr, [sp, #16]
> +  |  ldp x20, CCSTATE, [sp], #32
>   |  ret
>   |.endif
>   |// Note: vm_ffi_call must be the last function in this object file!
> @@ -2887,7 +2889,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
>   case BC_GGET:
>     |  // RA = dst, RC = str_const (~)
>   case BC_GSET:
> -    |  // RA = dst, RC = str_const (~)
> +    |  // RA = src, RC = str_const (~)
>     |  ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
>     |   mvn RC, RC
>     |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
> @@ -3863,7 +3865,7 @@ static int build_backend(BuildCtx *ctx)
> static void emit_asm_debug(BuildCtx *ctx)
> {
>   int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
> -  int i, cf = CFRAME_SIZE >> 3;
> +  int i;
>   switch (ctx->mode) {
>   case BUILD_elfasm:
>     fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
> @@ -3888,14 +3890,14 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.quad .Lbegin\n"
> 	"\t.quad %d\n"
> 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
> -	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
> -	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
> -	fcofs, CFRAME_SIZE, cf, cf-1);
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
> +	fcofs, CFRAME_SIZE);
>     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
> -      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
> +      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
>     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
>       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
> -	      64+i, cf-i-4);
> +	      64+i, i+(3+(28-19+1)-8));
>     fprintf(ctx->fp,
> 	"\t.align 3\n"
> 	".LEFDE0:\n\n");
> @@ -3908,9 +3910,11 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.quad lj_vm_ffi_call\n"
> 	"\t.quad %d\n"
> 	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
> -	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
> -	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
> -	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
> +	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
> +	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
> +	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
> 	"\t.align 3\n"
> 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
> #endif
> @@ -3941,14 +3945,14 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long %d\n"
> 	"\t.uleb128 0\n"			/* augmentation length */
> 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
> -	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
> -	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
> -	fcofs, CFRAME_SIZE, cf, cf-1);
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
> +	fcofs, CFRAME_SIZE);
>     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
> -      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
> +      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
>     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
>       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
> -	      64+i, cf-i-4);
> +	      64+i, i+(3+(28-19+1)-8));
>     fprintf(ctx->fp,
> 	"\t.align 3\n"
> 	".LEFDE2:\n\n");
> @@ -3977,13 +3981,112 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long %d\n"
> 	"\t.uleb128 0\n"			/* augmentation length */
> 	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
> -	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
> -	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
> -	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
> +	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
> +	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
> +	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
> 	"\t.align 3\n"
> 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
> #endif
>     break;
> +#if !LJ_NO_UNWIND
> +  case BUILD_machasm: {
> +#if LJ_HASFFI
> +    int fcsize = 0;
> +#endif
> +    int j;
> +    fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
> +    fprintf(ctx->fp,
> +	"EH_frame1:\n"
> +	"\t.set L$set$x,LECIEX-LSCIEX\n"
> +	"\t.long L$set$x\n"
> +	"LSCIEX:\n"
> +	"\t.long 0\n"
> +	"\t.byte 0x1\n"
> +	"\t.ascii \"zPR\\0\"\n"
> +	"\t.byte 0x1\n"
> +	"\t.byte 128-8\n"
> +	"\t.byte 30\n"				/* Return address is in lr. */
> +	"\t.byte 6\n"				/* augmentation length */
> +	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
> +	"\t.long _lj_err_unwind_dwarf@GOTPCREL\n"
> +	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> +	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
> +	"\t.align 3\n"
> +	"LECIEX:\n\n");
> +    for (j = 0; j < ctx->nsym; j++) {
> +      const char *name = ctx->sym[j].name;
> +      int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs;
> +      if (size == 0) continue;
> +#if LJ_HASFFI
> +      if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
> +#endif
> +      fprintf(ctx->fp,
> +	"%s.eh:\n"
> +	"LSFDE%d:\n"
> +	"\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
> +	"\t.long L$set$%d\n"
> +	"LASFDE%d:\n"
> +	"\t.long LASFDE%d-EH_frame1\n"
> +	"\t.long %s-.\n"
> +	"\t.long %d\n"
> +	"\t.byte 0\n"				/* augmentation length */
> +	"\t.byte 0xe\n\t.byte %d\n\t.byte 1\n"	/* def_cfa_offset */
> +	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.byte 2\n",		/* offset fp */
> +	name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE);
> +      for (i = 19; i <= 28; i++)  /* offset x19-x28 */
> +	fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19));
> +      for (i = 8; i <= 15; i++)  /* offset d8-d15 */
> +	fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n",
> +		64+i, i+(3+(28-19+1)-8));
> +      fprintf(ctx->fp,
> +	"\t.align 3\n"
> +	"LEFDE%d:\n\n", j);
> +    }
> +#if LJ_HASFFI
> +    if (fcsize) {
> +      fprintf(ctx->fp,
> +	"EH_frame2:\n"
> +	"\t.set L$set$y,LECIEY-LSCIEY\n"
> +	"\t.long L$set$y\n"
> +	"LSCIEY:\n"
> +	"\t.long 0\n"
> +	"\t.byte 0x1\n"
> +	"\t.ascii \"zR\\0\"\n"
> +	"\t.byte 0x1\n"
> +	"\t.byte 128-8\n"
> +	"\t.byte 30\n"				/* Return address is in lr. */
> +	"\t.byte 1\n"				/* augmentation length */
> +	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> +	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
> +	"\t.align 3\n"
> +	"LECIEY:\n\n");
> +      fprintf(ctx->fp,
> +	"_lj_vm_ffi_call.eh:\n"
> +	"LSFDEY:\n"
> +	"\t.set L$set$yy,LEFDEY-LASFDEY\n"
> +	"\t.long L$set$yy\n"
> +	"LASFDEY:\n"
> +	"\t.long LASFDEY-EH_frame2\n"
> +	"\t.long _lj_vm_ffi_call-.\n"
> +	"\t.long %d\n"
> +	"\t.byte 0\n"				/* augmentation length */
> +	"\t.byte 0xe\n\t.byte 32\n"		/* def_cfa_offset */
> +	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.byte 2\n"		/* offset fp */
> +	"\t.byte 0x93\n\t.byte 3\n"		/* offset x19 */
> +	"\t.byte 0x94\n\t.byte 4\n"		/* offset x20 */
> +	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
> +	"\t.align 3\n"
> +	"LEFDEY:\n\n", fcsize);
> +    }
> +#endif
> +    fprintf(ctx->fp, ".subsections_via_symbols\n");
> +    }
> +    break;
> +#endif
>   default:
>     break;
>   }
> -- 
> 2.32.1 (Apple Git-133)
> 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit 5/6] OSX/ARM64: Disable unwind info.
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches
@ 2022-10-04 15:52   ` sergos via Tarantool-patches
  0 siblings, 0 replies; 23+ messages in thread
From: sergos via Tarantool-patches @ 2022-10-04 15:52 UTC (permalink / raw)
  To: Maksim Kokryashkin; +Cc: tarantool-patches

Hi!

Thanks for the patch!

Just one nit. LGTM.

Sergos


> On 26 Sep 2022, at 18:55, Maksim Kokryashkin <max.kokryashkin@gmail.com> wrote:
> 
> From: Mike Pall <mike>
> 
> See LuaJIT/LuaJIT#698.
> 
> (cherry picked from commit 78350a2565e1cf1102bcd25be406f02953d4dd3b)
> 
> External unwinding support is already disabled for OSX on ARM64
> platfrom so there is no point in generation of incorrect unwind
s/platfrom/platform/
> info for it. This patch disables that generation.
> 
> Maxim Kokryashkin:
> * added the description for the problem
> 
> Needed for tarantool/tarantool#6096
> Needed for tarantool/tarantool#7230
> ---
> src/vm_arm64.dasc | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
> index ad57bca3..ccfa72bd 100644
> --- a/src/vm_arm64.dasc
> +++ b/src/vm_arm64.dasc
> @@ -3990,7 +3990,8 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
> #endif
>     break;
> -#if !LJ_NO_UNWIND
> +    /* Disabled until someone finds a fix. See #698. */
> +#if !LJ_NO_UNWIND && 0
>   case BUILD_machasm: {
> #if LJ_HASFFI
>     int fcsize = 0;
> -- 
> 2.32.1 (Apple Git-133)
> 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Tarantool-patches] [PATCH luajit 6/6] OSX/ARM64: Fix external unwinding.
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches
@ 2022-10-04 16:05   ` sergos via Tarantool-patches
  0 siblings, 0 replies; 23+ messages in thread
From: sergos via Tarantool-patches @ 2022-10-04 16:05 UTC (permalink / raw)
  To: Maksim Kokryashkin; +Cc: tarantool-patches

Hi!

Thanks for the patch!

Ok, so you’re re-enable the generation back. Then in 5/6 message I would like to
see a mention that the disabling is just for the history match, will be superseded
with the following (this) patch.

Also, I did’t get the point behind the test: what will be it’s behavior with unwind
info generation turned off - which will be the behavior of luajit before this patch?


Regards,
Sergos

> On 26 Sep 2022, at 18:55, Maksim Kokryashkin <max.kokryashkin@gmail.com> wrote:
> 
> Contributed by Edmund Kapusniak. For more info,
> see LuaJIT/LuaJIT#698 and LuaJIT/LuaJIT#757.
> 
> (cherry picked from commit c38747b626b978555324504ec29a110f6b04902f)
> 
> To allow compiler generate compact unwind info generation
> for Mach-O, fp must point to the saved fp, and the frame
> must be specified relative to fp+16. ELF unwind info has
> been updated to also use fp+16 rather than sp+CFRAME_SIZE.
> 
> Offset to pointer to personality routine specified as @GOT-. rather
> than @GOTPCREL.
> 
> Re-enabled LUAJIT_UNWIND_EXTERNAL by default on OSX.
> 
> Maxim Kokryashkin:
> * added the description for the issue and the test
> 
> Resolves tarantool/tarantool#6096
> Needed for tarantool/tarantool#7230
> ---
> cmake/SetTargetFlags.cmake                    |  4 +-
> src/Makefile.original                         |  5 +-
> src/vm_arm64.dasc                             | 89 ++++++++-----------
> ...-6096-external-unwinding-on-arm64.test.lua | 13 +++
> 4 files changed, 54 insertions(+), 57 deletions(-)
> create mode 100644 test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua
> 
> diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
> index d99e1f9a..a5a3407f 100644
> --- a/cmake/SetTargetFlags.cmake
> +++ b/cmake/SetTargetFlags.cmake
> @@ -16,9 +16,7 @@ LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
> LuaJITArch(LUAJIT_ARCH "${TESTARCH}")
> 
> if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
> -  if(NOT LUAJIT_ARCH STREQUAL "arm64")
> -    AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
> -  endif()
> +  AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
> else()
>   string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
>   if(UNWIND_POS EQUAL -1)
> diff --git a/src/Makefile.original b/src/Makefile.original
> index 5826a56a..d1373b40 100644
> --- a/src/Makefile.original
> +++ b/src/Makefile.original
> @@ -325,10 +325,7 @@ ifeq (Darwin,$(TARGET_SYS))
>     export MACOSX_DEPLOYMENT_TARGET=10.4
>   endif
>   TARGET_STRIP+= -x
> -  # Ext. unwinding is broken on OSX/ARM64 until someone finds a fix. See #698.
> -  ifneq (arm64,$(TARGET_LJARCH))
> -    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
> -  endif
> +  TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
>   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
>   TARGET_DYNXLDOPTS=
>   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
> diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
> index ccfa72bd..f517a808 100644
> --- a/src/vm_arm64.dasc
> +++ b/src/vm_arm64.dasc
> @@ -81,8 +81,7 @@
> |
> |.define CFRAME_SPACE,	208
> |//----- 16 byte aligned, <-- sp entering interpreter
> -|.define SAVE_LR,	[sp, #200]
> -|.define SAVE_FP,	[sp, #192]
> +|.define SAVE_FP_LR_,	192
> |.define SAVE_GPR_,	112		// 112+10*8: 64 bit GPR saves
> |.define SAVE_FPR_,	48		// 48+8*8: 64 bit FPR saves
> |// Unused		[sp, #44]	// 32 bit values
> @@ -108,8 +107,8 @@
> |
> |.macro saveregs
> |  sub sp, sp, # CFRAME_SPACE
> -|  stp fp, lr, SAVE_FP
> -|  add fp, sp, #0
> +|  stp fp, lr, [sp, # SAVE_FP_LR_]
> +|  add fp, sp, # SAVE_FP_LR_
> |  stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
> |  save_ 21, 22, 8, 9
> |  save_ 23, 24, 10, 11
> @@ -122,7 +121,7 @@
> |  rest_ 23, 24, 10, 11
> |  rest_ 25, 26, 12, 13
> |  rest_ 27, 28, 14, 15
> -|  ldp fp, lr, SAVE_FP
> +|  ldp fp, lr, [sp, # SAVE_FP_LR_]
> |  add sp, sp, # CFRAME_SPACE
> |.endmacro
> |
> @@ -504,8 +503,9 @@ static void build_subroutines(BuildCtx *ctx)
>   |    ldr GL, L->glref			// Setup pointer to global state.
>   |     mov BASE, CARG2
>   |   str CARG1, SAVE_PC		// Any value outside of bytecode is ok.
> -  |  str RC, SAVE_CFRAME
> -  |  str fp, L->cframe			// Add our C frame to cframe chain.
> +  |  add TMP0, sp, #0
> +  |   str RC, SAVE_CFRAME
> +  |  str TMP0, L->cframe		// Add our C frame to cframe chain.
>   |
>   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
>   |  str L, GL->cur_L
> @@ -540,8 +540,9 @@ static void build_subroutines(BuildCtx *ctx)
>   |   sub RA, RA, RB			// Compute -savestack(L, L->top).
>   |   str RAw, SAVE_NRES		// Neg. delta means cframe w/o frame.
>   |  str wzr, SAVE_ERRF			// No error function.
> -  |  str RC, SAVE_CFRAME
> -  |  str fp, L->cframe			// Add our C frame to cframe chain.
> +  |  add TMP0, sp, #0
> +  |   str RC, SAVE_CFRAME
> +  |  str TMP0, L->cframe		// Add our C frame to cframe chain.
>   |    str L, GL->cur_L
>   |  blr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
>   |  mov BASE, CRET1
> @@ -2129,14 +2130,14 @@ static void build_subroutines(BuildCtx *ctx)
>   |  .type CCSTATE, CCallState, x19
>   |  stp x20, CCSTATE, [sp, #-32]!
>   |  stp fp, lr, [sp, #16]
> -  |  add fp, sp, #0
> +  |  add fp, sp, #16
>   |  mov CCSTATE, x0
>   |  ldr TMP0w, CCSTATE:x0->spadj
>   |   ldrb TMP1w, CCSTATE->nsp
>   |    add TMP2, CCSTATE, #offsetof(CCallState, stack)
>   |   subs TMP1, TMP1, #1
>   |    ldr TMP3, CCSTATE->func
> -  |  sub sp, fp, TMP0
> +  |  sub sp, sp, TMP0
>   |   bmi >2
>   |1:  // Copy stack slots
>   |  ldr TMP0, [TMP2, TMP1, lsl #3]
> @@ -2154,7 +2155,7 @@ static void build_subroutines(BuildCtx *ctx)
>   |   ldp d6, d7, CCSTATE->fpr[6]
>   |  ldr x8, CCSTATE->retp
>   |  blr TMP3
> -  |  mov sp, fp
> +  |  sub sp, fp, #16
>   |  stp x0, x1, CCSTATE->gpr[0]
>   |   stp d0, d1, CCSTATE->fpr[0]
>   |   stp d2, d3, CCSTATE->fpr[2]
> @@ -3879,7 +3880,7 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.uleb128 0x1\n"
> 	"\t.sleb128 -8\n"
> 	"\t.byte 30\n"				/* Return address is in lr. */
> -	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
> +	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
> 	"\t.align 3\n"
> 	".LECIE0:\n\n");
>     fprintf(ctx->fp,
> @@ -3889,10 +3890,9 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long .Lframe0\n"
> 	"\t.quad .Lbegin\n"
> 	"\t.quad %d\n"
> -	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
> 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> 	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
> -	fcofs, CFRAME_SIZE);
> +	fcofs);
>     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
>       fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
>     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
> @@ -3909,12 +3909,10 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long .Lframe0\n"
> 	"\t.quad lj_vm_ffi_call\n"
> 	"\t.quad %d\n"
> -	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
> 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> 	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
> 	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
> 	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
> -	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
> 	"\t.align 3\n"
> 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
> #endif
> @@ -3933,7 +3931,7 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> 	"\t.long lj_err_unwind_dwarf-.\n"
> 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> -	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
> +	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
> 	"\t.align 3\n"
> 	".LECIE1:\n\n");
>     fprintf(ctx->fp,
> @@ -3944,10 +3942,9 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long .Lbegin-.\n"
> 	"\t.long %d\n"
> 	"\t.uleb128 0\n"			/* augmentation length */
> -	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
> 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> 	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
> -	fcofs, CFRAME_SIZE);
> +	fcofs);
>     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
>       fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
>     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
> @@ -3969,7 +3966,7 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.byte 30\n"				/* Return address is in lr. */
> 	"\t.uleb128 1\n"			/* augmentation length */
> 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> -	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
> +	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
> 	"\t.align 3\n"
> 	".LECIE2:\n\n");
>     fprintf(ctx->fp,
> @@ -3980,18 +3977,15 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long lj_vm_ffi_call-.\n"
> 	"\t.long %d\n"
> 	"\t.uleb128 0\n"			/* augmentation length */
> -	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
> 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> 	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
> 	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
> 	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
> -	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
> 	"\t.align 3\n"
> 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
> #endif
>     break;
> -    /* Disabled until someone finds a fix. See #698. */
> -#if !LJ_NO_UNWIND && 0
> +#if !LJ_NO_UNWIND
>   case BUILD_machasm: {
> #if LJ_HASFFI
>     int fcsize = 0;
> @@ -4006,14 +4000,14 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long 0\n"
> 	"\t.byte 0x1\n"
> 	"\t.ascii \"zPR\\0\"\n"
> -	"\t.byte 0x1\n"
> -	"\t.byte 128-8\n"
> +	"\t.uleb128 0x1\n"
> +	"\t.sleb128 -8\n"
> 	"\t.byte 30\n"				/* Return address is in lr. */
> -	"\t.byte 6\n"				/* augmentation length */
> +	"\t.uleb128 6\n"			/* augmentation length */
> 	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
> -	"\t.long _lj_err_unwind_dwarf@GOTPCREL\n"
> +	"\t.long _lj_err_unwind_dwarf@GOT-.\n"
> 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> -	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
> +	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
> 	"\t.align 3\n"
> 	"LECIEX:\n\n");
>     for (j = 0; j < ctx->nsym; j++) {
> @@ -4024,7 +4018,6 @@ static void emit_asm_debug(BuildCtx *ctx)
>       if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
> #endif
>       fprintf(ctx->fp,
> -	"%s.eh:\n"
> 	"LSFDE%d:\n"
> 	"\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
> 	"\t.long L$set$%d\n"
> @@ -4032,15 +4025,14 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long LASFDE%d-EH_frame1\n"
> 	"\t.long %s-.\n"
> 	"\t.long %d\n"
> -	"\t.byte 0\n"				/* augmentation length */
> -	"\t.byte 0xe\n\t.byte %d\n\t.byte 1\n"	/* def_cfa_offset */
> -	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
> -	"\t.byte 0x9d\n\t.byte 2\n",		/* offset fp */
> -	name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE);
> +	"\t.uleb128 0\n"			/* augmentation length */
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
> +	j, j, j, j, j, j, j, name, size);
>       for (i = 19; i <= 28; i++)  /* offset x19-x28 */
> -	fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19));
> +	fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
>       for (i = 8; i <= 15; i++)  /* offset d8-d15 */
> -	fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n",
> +	fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
> 		64+i, i+(3+(28-19+1)-8));
>       fprintf(ctx->fp,
> 	"\t.align 3\n"
> @@ -4056,16 +4048,15 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long 0\n"
> 	"\t.byte 0x1\n"
> 	"\t.ascii \"zR\\0\"\n"
> -	"\t.byte 0x1\n"
> -	"\t.byte 128-8\n"
> +	"\t.uleb128 0x1\n"
> +	"\t.sleb128 -8\n"
> 	"\t.byte 30\n"				/* Return address is in lr. */
> -	"\t.byte 1\n"				/* augmentation length */
> +	"\t.uleb128 1\n"			/* augmentation length */
> 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
> -	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
> +	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
> 	"\t.align 3\n"
> 	"LECIEY:\n\n");
>       fprintf(ctx->fp,
> -	"_lj_vm_ffi_call.eh:\n"
> 	"LSFDEY:\n"
> 	"\t.set L$set$yy,LEFDEY-LASFDEY\n"
> 	"\t.long L$set$yy\n"
> @@ -4073,13 +4064,11 @@ static void emit_asm_debug(BuildCtx *ctx)
> 	"\t.long LASFDEY-EH_frame2\n"
> 	"\t.long _lj_vm_ffi_call-.\n"
> 	"\t.long %d\n"
> -	"\t.byte 0\n"				/* augmentation length */
> -	"\t.byte 0xe\n\t.byte 32\n"		/* def_cfa_offset */
> -	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
> -	"\t.byte 0x9d\n\t.byte 2\n"		/* offset fp */
> -	"\t.byte 0x93\n\t.byte 3\n"		/* offset x19 */
> -	"\t.byte 0x94\n\t.byte 4\n"		/* offset x20 */
> -	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
> +	"\t.uleb128 0\n"			/* augmentation length */
> +	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
> +	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
> +	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
> +	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
> 	"\t.align 3\n"
> 	"LEFDEY:\n\n", fcsize);
>     }
> diff --git a/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua
> new file mode 100644
> index 00000000..cdeea441
> --- /dev/null
> +++ b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua
> @@ -0,0 +1,13 @@
> +local tap = require('tap')
> +
> +-- Test file to check correctnes of external unwinding
> +-- in LuaJIT.
> +-- See also https://github.com/LuaJIT/LuaJIT/issues/698,
> +-- https://github.com/LuaJIT/LuaJIT/pull/757.
> +local test = tap.test('gh-6096-external-unwinding-on-arm64')
> +test:plan(1)
> +
> +local res = pcall(require, 'not-existing-module')
> +test:ok(res == false, 'successful unwinding in pcall')
> +
> +os.exit(test:check() and 0 or 1)
> -- 
> 2.32.1 (Apple Git-133)
> 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1
  2022-09-26 15:54 [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
                   ` (5 preceding siblings ...)
  2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches
@ 2022-10-06  9:48 ` Maksim Kokryashkin via Tarantool-patches
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
                     ` (5 more replies)
  6 siblings, 6 replies; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-10-06  9:48 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun; +Cc: Maksim Kokryashkin

Changes in v2:
- Fixed nits as per review by Sergey and Sergos.
- Elaborated on questions from reviewers.

Branch and PR are force-pushed.

Maksim Kokryashkin (3):
  Cleanup and enable external unwinding for more platforms.
  OSX: Fix build by hardcoding external frame unwinding.
  OSX/ARM64: Fix external unwinding.

Mike Pall (3):
  OSX/ARM64: Disable external unwinding for now.
  ARM64: Reorder interpreter stack frame and fix unwinding.
  OSX/ARM64: Disable unwind info.

 .github/workflows/macos-x86_64.yml            |  20 +-
 CMakeLists.txt                                |   3 +
 cmake/SetTargetFlags.cmake                    |  26 +-
 doc/extensions.html                           |  22 +-
 src/Makefile.original                         |  12 +-
 src/lj_arch.h                                 |  27 +-
 src/lj_err.c                                  | 321 +++++++++---------
 src/lj_frame.h                                |  12 +-
 src/vm_arm64.dasc                             | 209 ++++++++----
 ...-6096-external-unwinding-on-arm64.test.lua |  13 +
 10 files changed, 389 insertions(+), 276 deletions(-)
 create mode 100644 test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua

-- 
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit v2 1/6] Cleanup and enable external unwinding for more platforms.
  2022-10-06  9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
@ 2022-10-06  9:48   ` Maksim Kokryashkin via Tarantool-patches
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-10-06  9:48 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun; +Cc: Maksim Kokryashkin

(cherry picked from commit e131936133c58de4426c595db2341caf5a1665b5)

This commit enables external unwinding on all platforms, that
create unwind tables by default. Corresponding check is added
to the build routine.

Also, GC64 mode is now enabled on MacOS by default, as
non-GC64 mode is forbidden.

Maxim Kokryashkin:
* added the description for the patch

Needed for tarantool/tarantool#6096
Part of tarantool/tarantool#7230
---
>> +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
>> + set(LUAJIT_ENABLE_GC64 ON)
>> +endif()
>Also, I see no fortified enabled in the LuaJIT original commit. Is this
>because this commit is later than enabling GC64 mode by default?
Yes, exactly.

>> + execute_process(
>> + COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"
>
>Side note: I'm OK with the same check as Mike did.
>Does `check_c_compiler_flag(-funwind-tables TESTUNWIND)` check the same
>thing, but in a more readable way?
No, because it only checks that the flag is supported, but it
doesn’t check whether it is enabled by default. In our case,
it is vital to know that compiler enables that
option automatically for us.

 .github/workflows/macos-x86_64.yml |  20 +-
 CMakeLists.txt                     |   3 +
 cmake/SetTargetFlags.cmake         |  22 +-
 doc/extensions.html                |  22 +-
 src/Makefile.original              |  11 +-
 src/lj_arch.h                      |  27 ++-
 src/lj_err.c                       | 321 +++++++++++++++--------------
 7 files changed, 214 insertions(+), 212 deletions(-)

diff --git a/.github/workflows/macos-x86_64.yml b/.github/workflows/macos-x86_64.yml
index 840806e3..dafd1796 100644
--- a/.github/workflows/macos-x86_64.yml
+++ b/.github/workflows/macos-x86_64.yml
@@ -35,7 +35,7 @@ jobs:
       fail-fast: false
       matrix:
         BUILDTYPE: [Debug, Release]
-        GC64: [ON, OFF]
+        GC64: ON
         include:
           - BUILDTYPE: Debug
             CMAKEFLAGS: -DCMAKE_BUILD_TYPE=Debug -DLUA_USE_ASSERT=ON -DLUA_USE_APICHECK=ON
@@ -69,15 +69,6 @@ jobs:
       - name: test
         run: cmake --build . --parallel --target test

-  test-tarantool-debug-wo-GC64:
-    name: Tarantool Debug GC64:OFF
-    needs: test-luajit
-    uses: tarantool/tarantool/.github/workflows/luajit-integration.yml@master
-    with:
-      GC64: OFF
-      buildtype: Debug
-      host: macos-11
-      revision: ${{ github.sha }}
   test-tarantool-debug-w-GC64:
     name: Tarantool Debug GC64:ON
     needs: test-luajit
@@ -87,15 +78,6 @@ jobs:
       buildtype: Debug
       host: macos-11
       revision: ${{ github.sha }}
-  test-tarantool-release-wo-GC64:
-    name: Tarantool Release GC64:OFF
-    needs: test-luajit
-    uses: tarantool/tarantool/.github/workflows/luajit-integration.yml@master
-    with:
-      GC64: OFF
-      buildtype: RelWithDebInfo
-      host: macos-11
-      revision: ${{ github.sha }}
   test-tarantool-release-w-GC64:
     name: Tarantool Release GC64:ON
     needs: test-luajit
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8b49f9d7..c4f3ef62 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,6 +176,9 @@ endif()

 # Enable GC64 mode for x64.
 option(LUAJIT_ENABLE_GC64 "GC64 mode for x64" OFF)
+if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+  set(LUAJIT_ENABLE_GC64 ON)
+endif()
 if(LUAJIT_ENABLE_GC64)
   AppendFlags(TARGET_C_FLAGS -DLUAJIT_ENABLE_GC64)
 endif()
diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
index b544d2ac..36896aff 100644
--- a/cmake/SetTargetFlags.cmake
+++ b/cmake/SetTargetFlags.cmake
@@ -15,6 +15,22 @@ endif()
 LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
 LuaJITArch(LUAJIT_ARCH "${TESTARCH}")

+string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
+if(UNWIND_POS EQUAL -1)
+  execute_process(
+    COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"
+    WORKING_DIRECTORY ${LUAJIT_SOURCE_DIR}
+    OUTPUT_VARIABLE TESTUNWIND
+    RESULT_VARIABLE TESTUNWIND_RC
+  )
+  if(TESTUNWIND_RC EQUAL 0)
+    string(FIND "${TESTUNWIND}" "E" UNW_TEST_POS)
+    if(NOT UNW_TEST_POS EQUAL -1)
+      AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+    endif()
+  endif()
+endif()
+
 # Target-specific compiler options.
 #
 # x86/x64 only: For GCC 4.2 or higher and if you don't intend to
@@ -25,12 +41,6 @@ if(LUAJIT_ARCH STREQUAL "x86")
 endif()

 if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
-  if(LUAJIT_ARCH STREQUAL "x64")
-    # XXX: Set -pagezero_size to hint <mmap> when allocating 32
-    # bit memory on OSX/x64, otherwise the lower 4GB are blocked.
-    AppendFlags(TARGET_BIN_FLAGS -pagezero_size 10000 -image_base 100000000)
-    AppendFlags(TARGET_SHARED_FLAGS -image_base 7fff04c4a000)
-  endif()
   AppendFlags(TARGET_SHARED_FLAGS -single_module -undefined dynamic_lookup)
 else() # Linux and FreeBSD.
   AppendFlags(TARGET_BIN_FLAGS -Wl,-E)
diff --git a/doc/extensions.html b/doc/extensions.html
index e0f136e2..777e9928 100644
--- a/doc/extensions.html
+++ b/doc/extensions.html
@@ -395,29 +395,19 @@ the toolchain used to compile LuaJIT:
 <td class="excinterop">Interoperability</td>
 </tr>
 <tr class="odd separate">
-<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
-<td class="exccompiler">GCC 4.3+, Clang</td>
+<td class="excplatform">External frame unwinding</td>
+<td class="exccompiler">GCC, Clang, MSVC</td>
 <td class="excinterop"><b style="color: #00a000;">Full</b></td>
 </tr>
 <tr class="even">
-<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
-<td class="exccompiler">GCC, Clang</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
-</tr>
-<tr class="odd">
-<td class="excplatform">Other platforms, DWARF2 unwinding</td>
+<td class="excplatform">Internal frame unwinding + DWARF2</td>
 <td class="exccompiler">GCC, Clang</td>
 <td class="excinterop"><b style="color: #c06000;">Limited</b></td>
 </tr>
-<tr class="even">
-<td class="excplatform">Windows/x64</td>
-<td class="exccompiler">MSVC or WinSDK</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
-</tr>
 <tr class="odd">
-<td class="excplatform">Windows/x86</td>
-<td class="exccompiler">Any</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
+<td class="excplatform">Windows 64 bit</td>
+<td class="exccompiler">non-MSVC</td>
+<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
 </tr>
 <tr class="even">
 <td class="excplatform">Other platforms</td>
diff --git a/src/Makefile.original b/src/Makefile.original
index dd1c6a76..c9609700 100644
--- a/src/Makefile.original
+++ b/src/Makefile.original
@@ -320,6 +320,13 @@ else
 ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
   TARGET_XCFLAGS+= -fno-stack-protector
 endif
+ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
+  # Find out whether the target toolchain always generates unwind tables.
+  TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o)
+  ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
+    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+  endif
+endif
 ifeq (Darwin,$(TARGET_SYS))
   ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
     export MACOSX_DEPLOYMENT_TARGET=10.4
@@ -328,10 +335,6 @@ ifeq (Darwin,$(TARGET_SYS))
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
-  ifeq (x64,$(TARGET_LJARCH))
-    TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000
-    TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000
-  endif
 else
 ifeq (iOS,$(TARGET_SYS))
   TARGET_STRIP+= -x
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 40129d9e..730be5bf 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -152,11 +152,6 @@
 #define LJ_ARCH_NAME		"x86"
 #define LJ_ARCH_BITS		32
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
-#define LJ_ABI_WIN		1
-#else
-#define LJ_ABI_WIN		0
-#endif
 #define LJ_TARGET_X86		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
@@ -170,11 +165,6 @@
 #define LJ_ARCH_NAME		"x64"
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
-#define LJ_ABI_WIN		1
-#else
-#define LJ_ABI_WIN		0
-#endif
 #define LJ_TARGET_X64		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
@@ -185,6 +175,8 @@
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
 #ifdef LUAJIT_ENABLE_GC64
 #define LJ_TARGET_GC64		1
+#elif LJ_TARGET_OSX
+#error "macOS requires GC64 -- don't disable it"
 #endif

 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
@@ -566,15 +558,22 @@
 #define LJ_NO_SYSTEM		1
 #endif

-#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
-/* NYI: no support for compact unwind specification, yet. */
-#define LUAJIT_NO_UNWIND	1
+#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
+#define LJ_ABI_WIN		1
+#else
+#define LJ_ABI_WIN		0
 #endif

-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
+#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
 #define LJ_NO_UNWIND		1
 #endif

+#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__))))
+#define LJ_UNWIND_EXT		1
+#else
+#define LJ_UNWIND_EXT		0
+#endif
+
 /* Compatibility with Lua 5.1 vs. 5.2. */
 #ifdef LUAJIT_ENABLE_LUA52COMPAT
 #define LJ_52			1
diff --git a/src/lj_err.c b/src/lj_err.c
index c310daf6..298e5434 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -29,12 +29,18 @@
 ** Pros and Cons:
 **
 ** - EXT requires unwind tables for *all* functions on the C stack between
-**   the pcall/catch and the error/throw. This is the default on x64,
-**   but needs to be manually enabled on x86/PPC for non-C++ code.
+**   the pcall/catch and the error/throw. C modules used by Lua code can
+**   throw errors, so these need to have unwind tables, too. Transitively
+**   this applies to all system libraries used by C modules -- at least
+**   when they have callbacks which may throw an error.
 **
-** - INT is faster when actually throwing errors (but this happens rarely).
+** - INT is faster when actually throwing errors, but this happens rarely.
 **   Setting up error handlers is zero-cost in any case.
 **
+** - INT needs to save *all* callee-saved registers when entering the
+**   interpreter. EXT only needs to save those actually used inside the
+**   interpreter. JIT-compiled code may need to save some more.
+**
 ** - EXT provides full interoperability with C++ exceptions. You can throw
 **   Lua errors or C++ exceptions through a mix of Lua frames and C++ frames.
 **   C++ destructors are called as needed. C++ exceptions caught by pcall
@@ -46,27 +52,33 @@
 **   the wrapper function feature. Lua errors thrown through C++ frames
 **   cannot be caught by C++ code and C++ destructors are not run.
 **
-** EXT is the default on x64 systems and on Windows, INT is the default on all
-** other systems.
+** EXT is the default on all systems where the toolchain produces unwind
+** tables by default (*). This is hard-coded and/or detected in src/Makefile.
+** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL
+**
+** INT is the default on all other systems.
+**
+** EXT can be manually enabled for toolchains that are able to produce
+** conforming unwind tables:
+**   "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL"
+** As explained above, *all* C code used directly or indirectly by LuaJIT
+** must be compiled with -funwind-tables (or -fexceptions). C++ code must
+** *not* be compiled with -fno-exceptions.
+**
+** If you're unsure whether error handling inside the VM works correctly,
+** try running this and check whether it prints "OK":
 **
-** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack
-** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
-** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set
-** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules
-** and all C libraries that have callbacks which may be used to call back
-** into Lua. C++ code must *not* be compiled with -fno-exceptions.
+**   luajit -e "print(select(2, load('OK')):match('OK'))"
 **
-** EXT is mandatory on WIN64 since the calling convention has an abundance
-** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
-** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
+** (*) Originally, toolchains only generated unwind tables for C++ code. For
+** interoperability reasons, this can be manually enabled for plain C code,
+** too (with -funwind-tables). With the introduction of the x64 architecture,
+** the corresponding POSIX and Windows ABIs mandated unwind tables for all
+** code. Over the following years most desktop and server platforms have
+** enabled unwind tables by default on all architectures. OTOH mobile and
+** embedded platforms do not consistently mandate unwind tables.
 */

-#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
-#define LJ_UNWIND_EXT	1
-#elif LJ_TARGET_WINDOWS
-#define LJ_UNWIND_EXT	1
-#endif
-
 /* -- Error messages ------------------------------------------------------ */

 /* Error message strings. */
@@ -183,7 +195,125 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)

 /* -- External frame unwinding -------------------------------------------- */

-#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN
+#if LJ_ABI_WIN
+
+/*
+** Someone in Redmond owes me several days of my life. A lot of this is
+** undocumented or just plain wrong on MSDN. Some of it can be gathered
+** from 3rd party docs or must be found by trial-and-error. They really
+** don't want you to write your own language-specific exception handler
+** or to interact gracefully with MSVC. :-(
+**
+** Apparently MSVC doesn't call C++ destructors for foreign exceptions
+** unless you compile your C++ code with /EHa. Unfortunately this means
+** catch (...) also catches things like access violations. The use of
+** _set_se_translator doesn't really help, because it requires /EHa, too.
+*/
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#if LJ_TARGET_X86
+typedef void *UndocumentedDispatcherContext;  /* Unused on x86. */
+#else
+/* Taken from: http://www.nynaeve.net/?p=99 */
+typedef struct UndocumentedDispatcherContext {
+  ULONG64 ControlPc;
+  ULONG64 ImageBase;
+  PRUNTIME_FUNCTION FunctionEntry;
+  ULONG64 EstablisherFrame;
+  ULONG64 TargetIp;
+  PCONTEXT ContextRecord;
+  void (*LanguageHandler)(void);
+  PVOID HandlerData;
+  PUNWIND_HISTORY_TABLE HistoryTable;
+  ULONG ScopeIndex;
+  ULONG Fill0;
+} UndocumentedDispatcherContext;
+#endif
+
+/* Another wild guess. */
+extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
+
+#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
+/* Workaround for broken MinGW64 declaration. */
+VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
+#define RtlUnwindEx RtlUnwindEx_FIXED
+#endif
+
+#define LJ_MSVC_EXCODE		((DWORD)0xe06d7363)
+#define LJ_GCC_EXCODE		((DWORD)0x20474343)
+
+#define LJ_EXCODE		((DWORD)0xe24c4a00)
+#define LJ_EXCODE_MAKE(c)	(LJ_EXCODE | (DWORD)(c))
+#define LJ_EXCODE_CHECK(cl)	(((cl) ^ LJ_EXCODE) <= 0xff)
+#define LJ_EXCODE_ERRCODE(cl)	((int)((cl) & 0xff))
+
+/* Windows exception handler for interpreter frame. */
+LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
+  void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
+{
+#if LJ_TARGET_X86
+  void *cf = (char *)f - CFRAME_OFS_SEH;
+#else
+  void *cf = f;
+#endif
+  lua_State *L = cframe_L(cf);
+  int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
+		LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
+  if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
+    /* Unwind internal frames. */
+    err_unwind(L, cf, errcode);
+  } else {
+    void *cf2 = err_unwind(L, cf, 0);
+    if (cf2) {  /* We catch it, so start unwinding the upper frames. */
+      if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
+	  rec->ExceptionCode == LJ_GCC_EXCODE) {
+#if !LJ_TARGET_CYGWIN
+	__DestructExceptionObject(rec, 1);
+#endif
+	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+      } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
+	/* Don't catch access violations etc. */
+	return 1;  /* ExceptionContinueSearch */
+      }
+#if LJ_TARGET_X86
+      UNUSED(ctx);
+      UNUSED(dispatch);
+      /* Call all handlers for all lower C frames (including ourselves) again
+      ** with EH_UNWINDING set. Then call the specified function, passing cf
+      ** and errcode.
+      */
+      lj_vm_rtlunwind(cf, (void *)rec,
+	(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+	(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
+      /* lj_vm_rtlunwind does not return. */
+#else
+      /* Unwind the stack and call all handlers for all lower C frames
+      ** (including ourselves) again with EH_UNWINDING set. Then set
+      ** stack pointer = cf, result = errcode and jump to the specified target.
+      */
+      RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+			       lj_vm_unwind_ff_eh :
+			       lj_vm_unwind_c_eh),
+		  rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
+      /* RtlUnwindEx should never return. */
+#endif
+    }
+  }
+  return 1;  /* ExceptionContinueSearch */
+}
+
+/* Raise Windows exception. */
+static void err_raise_ext(global_State *g, int errcode)
+{
+#if LJ_HASJIT
+  setmref(g->jit_base, NULL);
+#endif
+  RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
+}
+
+#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__))

 /*
 ** We have to use our own definitions instead of the mandatory (!) unwind.h,
@@ -232,7 +362,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
   lua_State *L;
   if (version != 1)
     return _URC_FATAL_PHASE1_ERROR;
-  UNUSED(uexclass);
   cf = (void *)_Unwind_GetCFA(ctx);
   L = cframe_L(cf);
   if ((actions & _UA_SEARCH_PHASE)) {
@@ -280,6 +409,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
     ** it on non-x64 because the interpreter restores all callee-saved regs.
     */
     lj_err_throw(L, errcode);
+#if LJ_TARGET_X64
+#error "Broken build system -- only use the provided Makefiles!"
+#endif
 #endif
   }
   return _URC_CONTINUE_UNWIND;
@@ -292,14 +424,6 @@ static _Unwind_Exception static_uex;
 #else
 static __thread _Unwind_Exception static_uex;
 #endif
-
-/* Raise DWARF2 exception. */
-static void err_raise_ext(int errcode)
-{
-  static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
-  static_uex.excleanup = NULL;
-  _Unwind_RaiseException(&static_uex);
-}
 #endif

 #else /* LJ_TARGET_ARM */
@@ -373,132 +497,22 @@ LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb,

 #if LJ_UNWIND_EXT
 static __thread _Unwind_Control_Block static_uex;
+#endif
+#endif /* LJ_TARGET_ARM */

-static void err_raise_ext(int errcode)
+#if LJ_UNWIND_EXT
+/* Raise external exception. */
+static void err_raise_ext(global_State *g, int errcode)
 {
+#if LJ_HASJIT
+  setmref(g->jit_base, NULL);
+#endif
   memset(&static_uex, 0, sizeof(static_uex));
   static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
   _Unwind_RaiseException(&static_uex);
 }
 #endif

-#endif /* LJ_TARGET_ARM */
-
-#elif LJ_ABI_WIN
-
-/*
-** Someone in Redmond owes me several days of my life. A lot of this is
-** undocumented or just plain wrong on MSDN. Some of it can be gathered
-** from 3rd party docs or must be found by trial-and-error. They really
-** don't want you to write your own language-specific exception handler
-** or to interact gracefully with MSVC. :-(
-**
-** Apparently MSVC doesn't call C++ destructors for foreign exceptions
-** unless you compile your C++ code with /EHa. Unfortunately this means
-** catch (...) also catches things like access violations. The use of
-** _set_se_translator doesn't really help, because it requires /EHa, too.
-*/
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-#if LJ_TARGET_X64
-/* Taken from: http://www.nynaeve.net/?p=99 */
-typedef struct UndocumentedDispatcherContext {
-  ULONG64 ControlPc;
-  ULONG64 ImageBase;
-  PRUNTIME_FUNCTION FunctionEntry;
-  ULONG64 EstablisherFrame;
-  ULONG64 TargetIp;
-  PCONTEXT ContextRecord;
-  void (*LanguageHandler)(void);
-  PVOID HandlerData;
-  PUNWIND_HISTORY_TABLE HistoryTable;
-  ULONG ScopeIndex;
-  ULONG Fill0;
-} UndocumentedDispatcherContext;
-#else
-typedef void *UndocumentedDispatcherContext;
-#endif
-
-/* Another wild guess. */
-extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
-
-#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
-/* Workaround for broken MinGW64 declaration. */
-VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
-#define RtlUnwindEx RtlUnwindEx_FIXED
-#endif
-
-#define LJ_MSVC_EXCODE		((DWORD)0xe06d7363)
-#define LJ_GCC_EXCODE		((DWORD)0x20474343)
-
-#define LJ_EXCODE		((DWORD)0xe24c4a00)
-#define LJ_EXCODE_MAKE(c)	(LJ_EXCODE | (DWORD)(c))
-#define LJ_EXCODE_CHECK(cl)	(((cl) ^ LJ_EXCODE) <= 0xff)
-#define LJ_EXCODE_ERRCODE(cl)	((int)((cl) & 0xff))
-
-/* Windows exception handler for interpreter frame. */
-LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
-  void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
-{
-#if LJ_TARGET_X64
-  void *cf = f;
-#else
-  void *cf = (char *)f - CFRAME_OFS_SEH;
-#endif
-  lua_State *L = cframe_L(cf);
-  int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
-		LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
-  if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
-    /* Unwind internal frames. */
-    err_unwind(L, cf, errcode);
-  } else {
-    void *cf2 = err_unwind(L, cf, 0);
-    if (cf2) {  /* We catch it, so start unwinding the upper frames. */
-      if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
-	  rec->ExceptionCode == LJ_GCC_EXCODE) {
-#if LJ_TARGET_WINDOWS
-	__DestructExceptionObject(rec, 1);
-#endif
-	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
-      } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
-	/* Don't catch access violations etc. */
-	return 1;  /* ExceptionContinueSearch */
-      }
-#if LJ_TARGET_X64
-      /* Unwind the stack and call all handlers for all lower C frames
-      ** (including ourselves) again with EH_UNWINDING set. Then set
-      ** rsp = cf, rax = errcode and jump to the specified target.
-      */
-      RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
-			       lj_vm_unwind_ff_eh :
-			       lj_vm_unwind_c_eh),
-		  rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
-      /* RtlUnwindEx should never return. */
-#else
-      UNUSED(ctx);
-      UNUSED(dispatch);
-      /* Call all handlers for all lower C frames (including ourselves) again
-      ** with EH_UNWINDING set. Then call the specified function, passing cf
-      ** and errcode.
-      */
-      lj_vm_rtlunwind(cf, (void *)rec,
-	(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
-	(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
-      /* lj_vm_rtlunwind does not return. */
-#endif
-    }
-  }
-  return 1;  /* ExceptionContinueSearch */
-}
-
-/* Raise Windows exception. */
-static void err_raise_ext(int errcode)
-{
-  RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
-}
-
 #endif

 /* -- Error handling ------------------------------------------------------ */
@@ -508,22 +522,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
 {
   global_State *g = G(L);
   lj_trace_abort(g);
-  setmref(g->jit_base, NULL);
   L->status = LUA_OK;
 #if LJ_UNWIND_EXT
-  err_raise_ext(errcode);
+  err_raise_ext(g, errcode);
   /*
   ** A return from this function signals a corrupt C stack that cannot be
   ** unwound. We have no choice but to call the panic function and exit.
   **
   ** Usually this is caused by a C function without unwind information.
-  ** This should never happen on x64, but may happen if you've manually
-  ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every*
-  ** non-C++ file with -funwind-tables.
+  ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL
+  ** and forgot to recompile *every* non-C++ file with -funwind-tables.
   */
   if (G(L)->panic)
     G(L)->panic(L);
 #else
+#if LJ_HASJIT
+  setmref(g->jit_base, NULL);
+#endif
   {
     void *cf = err_unwind(L, NULL, errcode);
     if (cframe_unwind_ff(cf))
--
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit v2 2/6] OSX: Fix build by hardcoding external frame unwinding.
  2022-10-06  9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
@ 2022-10-06  9:48   ` Maksim Kokryashkin via Tarantool-patches
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-10-06  9:48 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun; +Cc: Maksim Kokryashkin

Apparently they can't even get 'grep' right, let alone a keyboard.

(cherry picked from commit d4a554d6ee1507f7313641b26ed09bf1b518fa1f)

MacOS uses BSD grep, which is slightly different from GNU grep.
Because of that, the shell script determining whether external
unwinding is possible doesn't work right.

External unwinding is possible on MacOS, so this patch enables
it by default.

Maxim Kokryashkin:
* added the description for the problem

Needed for tarantool/tarantool#6096
Needed for tarantool/tarantool#7230
---
>I have two questions below and also there’s no mentioning in the message that
>you removes the non-GC64 workflow for the Darwin platform. Why is it done
>at all? Previous commit just mentioned that GC64 is enabled by default.
My bad! As Sergey mentioned, we needed to clarify that it is not only
enabled by default, it is now forbidden to run on MacOS in non-GC64
mode. Also, I moved those changes to the first patch, because it is
more logical in terms of structure.

>1. how is it relvant to grep??
>
>> include:
>> - BUILDTYPE: Debug
>> CMAKEFLAGS: -DCMAKE_BUILD_TYPE=Debug -DLUA_USE_ASSERT=ON -DLUA_USE_APICHECK=ON
>> @@ -69,15 +69,6 @@ jobs:
>> - name: test
>> run: cmake --build . --parallel --target test
>>
>> - test-tarantool-debug-wo-GC64:
>> - name: Tarantool Debug GC64:OFF
>> - needs: test-luajit
>> - uses: tarantool/tarantool/.github/workflows/luajit-integration.yml@master
>> - with:
>> - GC64: OFF
>> - buildtype: Debug
>> - host: macos-11
>> - revision: ${{ github.sha }}
>
>(1)
>
>> test-tarantool-debug-w-GC64:
>> name: Tarantool Debug GC64:ON
>> needs: test-luajit
>> @@ -87,15 +78,6 @@ jobs:
>> buildtype: Debug
>> host: macos-11
>> revision: ${{ github.sha }}
>> - test-tarantool-release-wo-GC64:
>> - name: Tarantool Release GC64:OFF
>> - needs: test-luajit
>> - uses: tarantool/tarantool/.github/workflows/luajit-integration.yml@master
>> - with:
>> - GC64: OFF
>> - buildtype: RelWithDebInfo
>> - host: macos-11
>> - revision: ${{ github.sha }}
>(1)
It is not relevant to grep in any way, see the comment above.

>> - COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"
>
>2. I don’t see any changes to the grep operation here and below - does the cherry-pick message is relevant at all?
Yes, there are no changes to grep, we just enable the external
unwinding on every Darwin system without any checks at all,
because of the issue with grep.
It is reliable, because, clang always generates unwind tables.

 cmake/SetTargetFlags.cmake | 28 ++++++++++++++++------------
 src/Makefile.original      | 15 ++++++++-------
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
index 36896aff..a5a3407f 100644
--- a/cmake/SetTargetFlags.cmake
+++ b/cmake/SetTargetFlags.cmake
@@ -15,18 +15,22 @@ endif()
 LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
 LuaJITArch(LUAJIT_ARCH "${TESTARCH}")

-string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
-if(UNWIND_POS EQUAL -1)
-  execute_process(
-    COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"
-    WORKING_DIRECTORY ${LUAJIT_SOURCE_DIR}
-    OUTPUT_VARIABLE TESTUNWIND
-    RESULT_VARIABLE TESTUNWIND_RC
-  )
-  if(TESTUNWIND_RC EQUAL 0)
-    string(FIND "${TESTUNWIND}" "E" UNW_TEST_POS)
-    if(NOT UNW_TEST_POS EQUAL -1)
-      AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+  AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+else()
+  string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
+  if(UNWIND_POS EQUAL -1)
+    execute_process(
+      COMMAND bash -c "exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | ${CMAKE_C_COMPILER} -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o"
+      WORKING_DIRECTORY ${LUAJIT_SOURCE_DIR}
+      OUTPUT_VARIABLE TESTUNWIND
+      RESULT_VARIABLE TESTUNWIND_RC
+    )
+    if(TESTUNWIND_RC EQUAL 0)
+      string(FIND "${TESTUNWIND}" "E" UNW_TEST_POS)
+      if(NOT UNW_TEST_POS EQUAL -1)
+        AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+      endif()
     endif()
   endif()
 endif()
diff --git a/src/Makefile.original b/src/Makefile.original
index c9609700..d1373b40 100644
--- a/src/Makefile.original
+++ b/src/Makefile.original
@@ -320,18 +320,12 @@ else
 ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
   TARGET_XCFLAGS+= -fno-stack-protector
 endif
-ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
-  # Find out whether the target toolchain always generates unwind tables.
-  TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o)
-  ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
-    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
-  endif
-endif
 ifeq (Darwin,$(TARGET_SYS))
   ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
     export MACOSX_DEPLOYMENT_TARGET=10.4
   endif
   TARGET_STRIP+= -x
+  TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
@@ -345,6 +339,13 @@ ifeq (iOS,$(TARGET_SYS))
     TARGET_XCFLAGS+= -fno-omit-frame-pointer
   endif
 else
+  ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
+    # Find out whether the target toolchain always generates unwind tables.
+    TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && grep -qa -e eh_frame -e __unwind_info tmpunwind.o && echo E; rm -f tmpunwind.o)
+    ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
+      TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+    endif
+  endif
   ifneq (SunOS,$(TARGET_SYS))
     ifneq (PS3,$(TARGET_SYS))
       TARGET_XLDFLAGS+= -Wl,-E
--
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit v2 3/6] OSX/ARM64: Disable external unwinding for now.
  2022-10-06  9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches
@ 2022-10-06  9:48   ` Maksim Kokryashkin via Tarantool-patches
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 4/6] ARM64: Reorder interpreter stack frame and fix unwinding Maksim Kokryashkin via Tarantool-patches
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-10-06  9:48 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun

From: Mike Pall <mike>

This reduces functionality, e.g. no handling of on-trace errors.
Someone with very deep knowledge about macOS and MACH-O/DWARF stack
unwinding internals is needed to fix this. See issue #698.

(cherry picked from commit 27ee3bcd79b12a0c71f00427ee1a2e486c684486)

ARM64 interpreter doesn't have unwind info for Mach-O assembler
format, which makes unwinding impossible. Because of that
external unwinding on ARM64 MacOS is temporarily disabled.

Maxim Kokryashkin:
* added the description for the problem

Needed for tarantool/tarantool#6096
Part of tarantool/tarantool#7230
---
 cmake/SetTargetFlags.cmake | 4 +++-
 src/Makefile.original      | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
index a5a3407f..d99e1f9a 100644
--- a/cmake/SetTargetFlags.cmake
+++ b/cmake/SetTargetFlags.cmake
@@ -16,7 +16,9 @@ LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
 LuaJITArch(LUAJIT_ARCH "${TESTARCH}")
 
 if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
-  AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+  if(NOT LUAJIT_ARCH STREQUAL "arm64")
+    AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
+  endif()
 else()
   string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
   if(UNWIND_POS EQUAL -1)
diff --git a/src/Makefile.original b/src/Makefile.original
index d1373b40..5826a56a 100644
--- a/src/Makefile.original
+++ b/src/Makefile.original
@@ -325,7 +325,10 @@ ifeq (Darwin,$(TARGET_SYS))
     export MACOSX_DEPLOYMENT_TARGET=10.4
   endif
   TARGET_STRIP+= -x
-  TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+  # Ext. unwinding is broken on OSX/ARM64 until someone finds a fix. See #698.
+  ifneq (arm64,$(TARGET_LJARCH))
+    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+  endif
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
-- 
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit v2 4/6] ARM64: Reorder interpreter stack frame and fix unwinding.
  2022-10-06  9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
                     ` (2 preceding siblings ...)
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches
@ 2022-10-06  9:48   ` Maksim Kokryashkin via Tarantool-patches
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches
  5 siblings, 0 replies; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-10-06  9:48 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun

From: Mike Pall <mike>

Reported by Yichun Zhang. Fixes #722.
May help towards fixing #698, too.

(cherry picked from commit 421c4c798791d27b7f967df39891c4e4fa1d107c)

The `_Unwind_Find_FDE` fails to find the FDE (frame descriptor
element) for `lj_vm_ffi_call` in DWARF unwind info, despite
the presence of its data in the `.debug_frame` section.

LuaJIT emits its own DWARF entries for the CFI (call frame
information, section 6.4.1 in DWARF standard)[1]. Also,
DWARF has the augmentation mechanism, which you can use to
provide additional platform-specific info, which affects unwinder.
Normally, DWARF saves and restores the GPRs and the FPRs in the
natural order, starting with the GPRs. However, LuaJIT has
augmentation segment in emitted debug info, which specifies,
that it saves registers in different order and uses GOT in
PC-relative mode. Those debug entries had incorrect offsets.

Reordering of interpreter stack frame fixes the issue,
making offsets consistent with augmentation and the
DWARF-standard itself.

[1]: https://dwarfstd.org/doc/DWARF5.pdf

Maxim Kokryashkin:
* added the description for the problem

Needed for tarantool/tarantool#6096
Part of tarantool/tarantool#7230
---
>> +|.define SAVE_GPR_, 112 // 112+10*8: 64 bit GPR saves
>
>(Not for change, just random thoughts)
>Why it is not .define SAVE_GPR_, SAVE_FPR_+8*8
>
>> +|.define SAVE_FPR_, 48 // 48+8*8: 64 bit FPR saves
I have no clue whatsoever. It is Mike-way of doing things.
Furthermore, normally, FPRs are stored after the GPRs.

>> +| stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
>> +| stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
>
>(Not for change, just random thoughts)
>Should those 14 and 27 magic numbers be a define also, describing the idea
>behind them?
Those a just some register numbers (like x14 or x27). I don't
really think they should be defined. Libunwind has several similar
constructions in its source and nothing is defined there.

 src/lj_frame.h    |  12 +--
 src/vm_arm64.dasc | 189 +++++++++++++++++++++++++++++++++++-----------
 2 files changed, 152 insertions(+), 49 deletions(-)

diff --git a/src/lj_frame.h b/src/lj_frame.h
index 9fd63fa2..1e4adaa3 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK };  /* Special continuations. */
 #endif
 #define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_ARM64
-#define CFRAME_OFS_ERRF		196
-#define CFRAME_OFS_NRES		200
-#define CFRAME_OFS_PREV		160
-#define CFRAME_OFS_L		176
-#define CFRAME_OFS_PC		168
-#define CFRAME_OFS_MULTRES	192
+#define CFRAME_OFS_ERRF		36
+#define CFRAME_OFS_NRES		40
+#define CFRAME_OFS_PREV		0
+#define CFRAME_OFS_L		16
+#define CFRAME_OFS_PC		8
+#define CFRAME_OFS_MULTRES	32
 #define CFRAME_SIZE		208
 #define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_PPC
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index 313cc94f..ad57bca3 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -81,47 +81,49 @@
 |
 |.define CFRAME_SPACE,	208
 |//----- 16 byte aligned, <-- sp entering interpreter
-|// Unused		[sp, #204]	// 32 bit values
-|.define SAVE_NRES,	[sp, #200]
-|.define SAVE_ERRF,	[sp, #196]
-|.define SAVE_MULTRES,	[sp, #192]
-|.define TMPD,		[sp, #184]	// 64 bit values
-|.define SAVE_L,	[sp, #176]
-|.define SAVE_PC,	[sp, #168]
-|.define SAVE_CFRAME,	[sp, #160]
-|.define SAVE_FPR_,	96		// 96+8*8: 64 bit FPR saves
-|.define SAVE_GPR_,	16		// 16+10*8: 64 bit GPR saves
-|.define SAVE_LR,	[sp, #8]
-|.define SAVE_FP,	[sp]
+|.define SAVE_LR,	[sp, #200]
+|.define SAVE_FP,	[sp, #192]
+|.define SAVE_GPR_,	112		// 112+10*8: 64 bit GPR saves
+|.define SAVE_FPR_,	48		// 48+8*8: 64 bit FPR saves
+|// Unused		[sp, #44]	// 32 bit values
+|.define SAVE_NRES,	[sp, #40]
+|.define SAVE_ERRF,	[sp, #36]
+|.define SAVE_MULTRES,	[sp, #32]
+|.define TMPD,		[sp, #24]	// 64 bit values
+|.define SAVE_L,	[sp, #16]
+|.define SAVE_PC,	[sp, #8]
+|.define SAVE_CFRAME,	[sp, #0]
 |//----- 16 byte aligned, <-- sp while in interpreter.
 |
-|.define TMPDofs,	#184
+|.define TMPDofs,	#24
 |
 |.macro save_, gpr1, gpr2, fpr1, fpr2
-|  stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
-|  stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|  stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
+|  stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
 |.endmacro
 |.macro rest_, gpr1, gpr2, fpr1, fpr2
-|  ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
-|  ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|  ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
+|  ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
 |.endmacro
 |
 |.macro saveregs
-|  stp fp, lr, [sp, #-CFRAME_SPACE]!
+|  sub sp, sp, # CFRAME_SPACE
+|  stp fp, lr, SAVE_FP
 |  add fp, sp, #0
-|  stp x19, x20, [sp, # SAVE_GPR_]
+|  stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
 |  save_ 21, 22, 8, 9
 |  save_ 23, 24, 10, 11
 |  save_ 25, 26, 12, 13
 |  save_ 27, 28, 14, 15
 |.endmacro
 |.macro restoreregs
-|  ldp x19, x20, [sp, # SAVE_GPR_]
+|  ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
 |  rest_ 21, 22, 8, 9
 |  rest_ 23, 24, 10, 11
 |  rest_ 25, 26, 12, 13
 |  rest_ 27, 28, 14, 15
-|  ldp fp, lr, [sp], # CFRAME_SPACE
+|  ldp fp, lr, SAVE_FP
+|  add sp, sp, # CFRAME_SPACE
 |.endmacro
 |
 |// Type definitions. Some of these are only used for documentation.
@@ -2125,9 +2127,9 @@ static void build_subroutines(BuildCtx *ctx)
   |  // Caveat: needs special frame unwinding, see below.
   |.if FFI
   |  .type CCSTATE, CCallState, x19
-  |  stp fp, lr, [sp, #-32]!
+  |  stp x20, CCSTATE, [sp, #-32]!
+  |  stp fp, lr, [sp, #16]
   |  add fp, sp, #0
-  |  str CCSTATE, [sp, #16]
   |  mov CCSTATE, x0
   |  ldr TMP0w, CCSTATE:x0->spadj
   |   ldrb TMP1w, CCSTATE->nsp
@@ -2156,8 +2158,8 @@ static void build_subroutines(BuildCtx *ctx)
   |  stp x0, x1, CCSTATE->gpr[0]
   |   stp d0, d1, CCSTATE->fpr[0]
   |   stp d2, d3, CCSTATE->fpr[2]
-  |  ldr CCSTATE, [sp, #16]
-  |  ldp fp, lr, [sp], #32
+  |  ldp fp, lr, [sp, #16]
+  |  ldp x20, CCSTATE, [sp], #32
   |  ret
   |.endif
   |// Note: vm_ffi_call must be the last function in this object file!
@@ -2887,7 +2889,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
   case BC_GGET:
     |  // RA = dst, RC = str_const (~)
   case BC_GSET:
-    |  // RA = dst, RC = str_const (~)
+    |  // RA = src, RC = str_const (~)
     |  ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
     |   mvn RC, RC
     |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
@@ -3863,7 +3865,7 @@ static int build_backend(BuildCtx *ctx)
 static void emit_asm_debug(BuildCtx *ctx)
 {
   int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
-  int i, cf = CFRAME_SIZE >> 3;
+  int i;
   switch (ctx->mode) {
   case BUILD_elfasm:
     fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
@@ -3888,14 +3890,14 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.quad .Lbegin\n"
 	"\t.quad %d\n"
 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
-	fcofs, CFRAME_SIZE, cf, cf-1);
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
+	fcofs, CFRAME_SIZE);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
-      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
-	      64+i, cf-i-4);
+	      64+i, i+(3+(28-19+1)-8));
     fprintf(ctx->fp,
 	"\t.align 3\n"
 	".LEFDE0:\n\n");
@@ -3908,9 +3910,11 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.quad lj_vm_ffi_call\n"
 	"\t.quad %d\n"
 	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
-	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
+	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
 	"\t.align 3\n"
 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
 #endif
@@ -3941,14 +3945,14 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
-	fcofs, CFRAME_SIZE, cf, cf-1);
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
+	fcofs, CFRAME_SIZE);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
-      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
-	      64+i, cf-i-4);
+	      64+i, i+(3+(28-19+1)-8));
     fprintf(ctx->fp,
 	"\t.align 3\n"
 	".LEFDE2:\n\n");
@@ -3977,13 +3981,112 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
 	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
-	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
+	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
 	"\t.align 3\n"
 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
 #endif
     break;
+#if !LJ_NO_UNWIND
+  case BUILD_machasm: {
+#if LJ_HASFFI
+    int fcsize = 0;
+#endif
+    int j;
+    fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
+    fprintf(ctx->fp,
+	"EH_frame1:\n"
+	"\t.set L$set$x,LECIEX-LSCIEX\n"
+	"\t.long L$set$x\n"
+	"LSCIEX:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.ascii \"zPR\\0\"\n"
+	"\t.byte 0x1\n"
+	"\t.byte 128-8\n"
+	"\t.byte 30\n"				/* Return address is in lr. */
+	"\t.byte 6\n"				/* augmentation length */
+	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
+	"\t.long _lj_err_unwind_dwarf@GOTPCREL\n"
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
+	"\t.align 3\n"
+	"LECIEX:\n\n");
+    for (j = 0; j < ctx->nsym; j++) {
+      const char *name = ctx->sym[j].name;
+      int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs;
+      if (size == 0) continue;
+#if LJ_HASFFI
+      if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
+#endif
+      fprintf(ctx->fp,
+	"%s.eh:\n"
+	"LSFDE%d:\n"
+	"\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
+	"\t.long L$set$%d\n"
+	"LASFDE%d:\n"
+	"\t.long LASFDE%d-EH_frame1\n"
+	"\t.long %s-.\n"
+	"\t.long %d\n"
+	"\t.byte 0\n"				/* augmentation length */
+	"\t.byte 0xe\n\t.byte %d\n\t.byte 1\n"	/* def_cfa_offset */
+	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.byte 2\n",		/* offset fp */
+	name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE);
+      for (i = 19; i <= 28; i++)  /* offset x19-x28 */
+	fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19));
+      for (i = 8; i <= 15; i++)  /* offset d8-d15 */
+	fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n",
+		64+i, i+(3+(28-19+1)-8));
+      fprintf(ctx->fp,
+	"\t.align 3\n"
+	"LEFDE%d:\n\n", j);
+    }
+#if LJ_HASFFI
+    if (fcsize) {
+      fprintf(ctx->fp,
+	"EH_frame2:\n"
+	"\t.set L$set$y,LECIEY-LSCIEY\n"
+	"\t.long L$set$y\n"
+	"LSCIEY:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.ascii \"zR\\0\"\n"
+	"\t.byte 0x1\n"
+	"\t.byte 128-8\n"
+	"\t.byte 30\n"				/* Return address is in lr. */
+	"\t.byte 1\n"				/* augmentation length */
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
+	"\t.align 3\n"
+	"LECIEY:\n\n");
+      fprintf(ctx->fp,
+	"_lj_vm_ffi_call.eh:\n"
+	"LSFDEY:\n"
+	"\t.set L$set$yy,LEFDEY-LASFDEY\n"
+	"\t.long L$set$yy\n"
+	"LASFDEY:\n"
+	"\t.long LASFDEY-EH_frame2\n"
+	"\t.long _lj_vm_ffi_call-.\n"
+	"\t.long %d\n"
+	"\t.byte 0\n"				/* augmentation length */
+	"\t.byte 0xe\n\t.byte 32\n"		/* def_cfa_offset */
+	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.byte 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.byte 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.byte 4\n"		/* offset x20 */
+	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
+	"\t.align 3\n"
+	"LEFDEY:\n\n", fcsize);
+    }
+#endif
+    fprintf(ctx->fp, ".subsections_via_symbols\n");
+    }
+    break;
+#endif
   default:
     break;
   }
--
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit v2 5/6] OSX/ARM64: Disable unwind info.
  2022-10-06  9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
                     ` (3 preceding siblings ...)
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 4/6] ARM64: Reorder interpreter stack frame and fix unwinding Maksim Kokryashkin via Tarantool-patches
@ 2022-10-06  9:48   ` Maksim Kokryashkin via Tarantool-patches
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches
  5 siblings, 0 replies; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-10-06  9:48 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun

From: Mike Pall <mike>

See #698.

(cherry picked from commit 78350a2565e1cf1102bcd25be406f02953d4dd3b)

External unwinding support is already disabled for OSX on ARM64
platform so there is no point in generation of incorrect unwind
info for it. This patch disables that generation for the commit
history match, and it will be re-enabled in the next commit,
which contains the fix for the issue.

Maxim Kokryashkin:
* added the description for the problem

Needed for tarantool/tarantool#6096
Part of tarantool/tarantool#7230
---
 src/vm_arm64.dasc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index ad57bca3..ccfa72bd 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -3990,7 +3990,8 @@ static void emit_asm_debug(BuildCtx *ctx)
 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
 #endif
     break;
-#if !LJ_NO_UNWIND
+    /* Disabled until someone finds a fix. See #698. */
+#if !LJ_NO_UNWIND && 0
   case BUILD_machasm: {
 #if LJ_HASFFI
     int fcsize = 0;
-- 
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Tarantool-patches] [PATCH luajit v2 6/6] OSX/ARM64: Fix external unwinding.
  2022-10-06  9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
                     ` (4 preceding siblings ...)
  2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches
@ 2022-10-06  9:48   ` Maksim Kokryashkin via Tarantool-patches
  5 siblings, 0 replies; 23+ messages in thread
From: Maksim Kokryashkin via Tarantool-patches @ 2022-10-06  9:48 UTC (permalink / raw)
  To: tarantool-patches, sergos, skaplun; +Cc: Maksim Kokryashkin

Contributed by Edmund Kapusniak. For more info,
see #698 and #757.

(cherry picked from commit c38747b626b978555324504ec29a110f6b04902f)

To allow compiler generate compact unwind info generation
for Mach-O, fp must point to the saved fp, and the frame
must be specified relative to fp+16. ELF unwind info has
been updated to also use fp+16 rather than sp+CFRAME_SIZE.

Offset to pointer to personality routine specified as @GOT-. rather
than @GOTPCREL.

Re-enabled LUAJIT_UNWIND_EXTERNAL by default on OSX.

Maxim Kokryashkin:
* added the description for the issue and the test

Resolves tarantool/tarantool#6096
Part of tarantool/tarantool#7230
---
>Also, I did’t get the point behind the test: what will be it’s behavior with unwind
>info generation turned off - which will be the behavior of luajit before this patch?

Oh, although the test itself is quite simple, the things that happen
under the hood are not so simple. If you want to really dive into the
process of exception handling, I recommend you to start from this[1]
diagram and explanation to it to get basic understanding of how exceptions
are handled (it is from Itanium docs, but whatever), then you can read
this[2] explanation of how it works in raptorjit. Finally, if you are
not overwhelmed yet, you can proceed with this talk[3] (again, it is
about Windows, but main concepts are essentially the same).

TL;DR:
Without that patch set `pcall` uses the internal unwinder, which has
several downsides[4] when compared to external unwinding.

After the first two patches, the external unwinding is enabled on
several platforms, including MacOS ARM64. However,  external unwinder
lacks the unwind info on the last-mentioned platform, so instead of
finding the so called `landing pad` on the C-side and safely catching
an exception, it causes panic.

Finally, the last patch fixes the unwind info generation,
and external unwinder starts to work just fine.

I think it is clear now, that the test just checks whether all
requirements needed for successful exception handling are
satisfied on all platforms, regardless of the unwinding type.

[1]: https://itanium-cxx-abi.github.io/cxx-abi/exceptions.pdf
[2]: https://github.com/raptorjit/raptorjit/issues/110
[3]: https://www.youtube.com/watch?v=COEv2kq_Ht8&t=25s&ab_channel=CppCon
[4]: https://github.com/tarantool/luajit/blob/0e1616485f2d841cfd392d9640ee894ba80f8f9b/src/lj_err.c#L22

 cmake/SetTargetFlags.cmake                    |  4 +-
 src/Makefile.original                         |  5 +-
 src/vm_arm64.dasc                             | 89 ++++++++-----------
 ...-6096-external-unwinding-on-arm64.test.lua | 13 +++
 4 files changed, 54 insertions(+), 57 deletions(-)
 create mode 100644 test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua

diff --git a/cmake/SetTargetFlags.cmake b/cmake/SetTargetFlags.cmake
index d99e1f9a..a5a3407f 100644
--- a/cmake/SetTargetFlags.cmake
+++ b/cmake/SetTargetFlags.cmake
@@ -16,9 +16,7 @@ LuaJITTestArch(TESTARCH "${TARGET_C_FLAGS}")
 LuaJITArch(LUAJIT_ARCH "${TESTARCH}")

 if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
-  if(NOT LUAJIT_ARCH STREQUAL "arm64")
-    AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
-  endif()
+  AppendFlags(TARGET_C_FLAGS -DLUAJIT_UNWIND_EXTERNAL)
 else()
   string(FIND ${TARGET_C_FLAGS} "LJ_NO_UNWIND 1" UNWIND_POS)
   if(UNWIND_POS EQUAL -1)
diff --git a/src/Makefile.original b/src/Makefile.original
index 5826a56a..d1373b40 100644
--- a/src/Makefile.original
+++ b/src/Makefile.original
@@ -325,10 +325,7 @@ ifeq (Darwin,$(TARGET_SYS))
     export MACOSX_DEPLOYMENT_TARGET=10.4
   endif
   TARGET_STRIP+= -x
-  # Ext. unwinding is broken on OSX/ARM64 until someone finds a fix. See #698.
-  ifneq (arm64,$(TARGET_LJARCH))
-    TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
-  endif
+  TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
   TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index ccfa72bd..f517a808 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -81,8 +81,7 @@
 |
 |.define CFRAME_SPACE,	208
 |//----- 16 byte aligned, <-- sp entering interpreter
-|.define SAVE_LR,	[sp, #200]
-|.define SAVE_FP,	[sp, #192]
+|.define SAVE_FP_LR_,	192
 |.define SAVE_GPR_,	112		// 112+10*8: 64 bit GPR saves
 |.define SAVE_FPR_,	48		// 48+8*8: 64 bit FPR saves
 |// Unused		[sp, #44]	// 32 bit values
@@ -108,8 +107,8 @@
 |
 |.macro saveregs
 |  sub sp, sp, # CFRAME_SPACE
-|  stp fp, lr, SAVE_FP
-|  add fp, sp, #0
+|  stp fp, lr, [sp, # SAVE_FP_LR_]
+|  add fp, sp, # SAVE_FP_LR_
 |  stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
 |  save_ 21, 22, 8, 9
 |  save_ 23, 24, 10, 11
@@ -122,7 +121,7 @@
 |  rest_ 23, 24, 10, 11
 |  rest_ 25, 26, 12, 13
 |  rest_ 27, 28, 14, 15
-|  ldp fp, lr, SAVE_FP
+|  ldp fp, lr, [sp, # SAVE_FP_LR_]
 |  add sp, sp, # CFRAME_SPACE
 |.endmacro
 |
@@ -504,8 +503,9 @@ static void build_subroutines(BuildCtx *ctx)
   |    ldr GL, L->glref			// Setup pointer to global state.
   |     mov BASE, CARG2
   |   str CARG1, SAVE_PC		// Any value outside of bytecode is ok.
-  |  str RC, SAVE_CFRAME
-  |  str fp, L->cframe			// Add our C frame to cframe chain.
+  |  add TMP0, sp, #0
+  |   str RC, SAVE_CFRAME
+  |  str TMP0, L->cframe		// Add our C frame to cframe chain.
   |
   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
   |  str L, GL->cur_L
@@ -540,8 +540,9 @@ static void build_subroutines(BuildCtx *ctx)
   |   sub RA, RA, RB			// Compute -savestack(L, L->top).
   |   str RAw, SAVE_NRES		// Neg. delta means cframe w/o frame.
   |  str wzr, SAVE_ERRF			// No error function.
-  |  str RC, SAVE_CFRAME
-  |  str fp, L->cframe			// Add our C frame to cframe chain.
+  |  add TMP0, sp, #0
+  |   str RC, SAVE_CFRAME
+  |  str TMP0, L->cframe		// Add our C frame to cframe chain.
   |    str L, GL->cur_L
   |  blr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
   |  mov BASE, CRET1
@@ -2129,14 +2130,14 @@ static void build_subroutines(BuildCtx *ctx)
   |  .type CCSTATE, CCallState, x19
   |  stp x20, CCSTATE, [sp, #-32]!
   |  stp fp, lr, [sp, #16]
-  |  add fp, sp, #0
+  |  add fp, sp, #16
   |  mov CCSTATE, x0
   |  ldr TMP0w, CCSTATE:x0->spadj
   |   ldrb TMP1w, CCSTATE->nsp
   |    add TMP2, CCSTATE, #offsetof(CCallState, stack)
   |   subs TMP1, TMP1, #1
   |    ldr TMP3, CCSTATE->func
-  |  sub sp, fp, TMP0
+  |  sub sp, sp, TMP0
   |   bmi >2
   |1:  // Copy stack slots
   |  ldr TMP0, [TMP2, TMP1, lsl #3]
@@ -2154,7 +2155,7 @@ static void build_subroutines(BuildCtx *ctx)
   |   ldp d6, d7, CCSTATE->fpr[6]
   |  ldr x8, CCSTATE->retp
   |  blr TMP3
-  |  mov sp, fp
+  |  sub sp, fp, #16
   |  stp x0, x1, CCSTATE->gpr[0]
   |   stp d0, d1, CCSTATE->fpr[0]
   |   stp d2, d3, CCSTATE->fpr[2]
@@ -3879,7 +3880,7 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.uleb128 0x1\n"
 	"\t.sleb128 -8\n"
 	"\t.byte 30\n"				/* Return address is in lr. */
-	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	".LECIE0:\n\n");
     fprintf(ctx->fp,
@@ -3889,10 +3890,9 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long .Lframe0\n"
 	"\t.quad .Lbegin\n"
 	"\t.quad %d\n"
-	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
 	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
-	fcofs, CFRAME_SIZE);
+	fcofs);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
       fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
@@ -3909,12 +3909,10 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long .Lframe0\n"
 	"\t.quad lj_vm_ffi_call\n"
 	"\t.quad %d\n"
-	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
 	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
 	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
 	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
-	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
 	"\t.align 3\n"
 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
 #endif
@@ -3933,7 +3931,7 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
 	"\t.long lj_err_unwind_dwarf-.\n"
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	".LECIE1:\n\n");
     fprintf(ctx->fp,
@@ -3944,10 +3942,9 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long .Lbegin-.\n"
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
-	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
 	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
-	fcofs, CFRAME_SIZE);
+	fcofs);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
       fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
@@ -3969,7 +3966,7 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.byte 30\n"				/* Return address is in lr. */
 	"\t.uleb128 1\n"			/* augmentation length */
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	".LECIE2:\n\n");
     fprintf(ctx->fp,
@@ -3980,18 +3977,15 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long lj_vm_ffi_call-.\n"
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
-	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
 	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
 	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
 	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
 	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
-	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
 	"\t.align 3\n"
 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
 #endif
     break;
-    /* Disabled until someone finds a fix. See #698. */
-#if !LJ_NO_UNWIND && 0
+#if !LJ_NO_UNWIND
   case BUILD_machasm: {
 #if LJ_HASFFI
     int fcsize = 0;
@@ -4006,14 +4000,14 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long 0\n"
 	"\t.byte 0x1\n"
 	"\t.ascii \"zPR\\0\"\n"
-	"\t.byte 0x1\n"
-	"\t.byte 128-8\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
 	"\t.byte 30\n"				/* Return address is in lr. */
-	"\t.byte 6\n"				/* augmentation length */
+	"\t.uleb128 6\n"			/* augmentation length */
 	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
-	"\t.long _lj_err_unwind_dwarf@GOTPCREL\n"
+	"\t.long _lj_err_unwind_dwarf@GOT-.\n"
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	"LECIEX:\n\n");
     for (j = 0; j < ctx->nsym; j++) {
@@ -4024,7 +4018,6 @@ static void emit_asm_debug(BuildCtx *ctx)
       if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
 #endif
       fprintf(ctx->fp,
-	"%s.eh:\n"
 	"LSFDE%d:\n"
 	"\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
 	"\t.long L$set$%d\n"
@@ -4032,15 +4025,14 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long LASFDE%d-EH_frame1\n"
 	"\t.long %s-.\n"
 	"\t.long %d\n"
-	"\t.byte 0\n"				/* augmentation length */
-	"\t.byte 0xe\n\t.byte %d\n\t.byte 1\n"	/* def_cfa_offset */
-	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
-	"\t.byte 0x9d\n\t.byte 2\n",		/* offset fp */
-	name, j, j, j, j, j, j, j, name, size, CFRAME_SIZE);
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
+	j, j, j, j, j, j, j, name, size);
       for (i = 19; i <= 28; i++)  /* offset x19-x28 */
-	fprintf(ctx->fp, "\t.byte 0x%x\n\t.byte %d\n", 0x80+i, i+(3-19));
+	fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
       for (i = 8; i <= 15; i++)  /* offset d8-d15 */
-	fprintf(ctx->fp, "\t.byte 5\n\t.byte 0x%x\n\t.byte %d\n",
+	fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
 		64+i, i+(3+(28-19+1)-8));
       fprintf(ctx->fp,
 	"\t.align 3\n"
@@ -4056,16 +4048,15 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long 0\n"
 	"\t.byte 0x1\n"
 	"\t.ascii \"zR\\0\"\n"
-	"\t.byte 0x1\n"
-	"\t.byte 128-8\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
 	"\t.byte 30\n"				/* Return address is in lr. */
-	"\t.byte 1\n"				/* augmentation length */
+	"\t.uleb128 1\n"			/* augmentation length */
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.byte 0xc\n\t.byte 31\n\t.byte 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	"LECIEY:\n\n");
       fprintf(ctx->fp,
-	"_lj_vm_ffi_call.eh:\n"
 	"LSFDEY:\n"
 	"\t.set L$set$yy,LEFDEY-LASFDEY\n"
 	"\t.long L$set$yy\n"
@@ -4073,13 +4064,11 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.long LASFDEY-EH_frame2\n"
 	"\t.long _lj_vm_ffi_call-.\n"
 	"\t.long %d\n"
-	"\t.byte 0\n"				/* augmentation length */
-	"\t.byte 0xe\n\t.byte 32\n"		/* def_cfa_offset */
-	"\t.byte 0x9e\n\t.byte 1\n"		/* offset lr */
-	"\t.byte 0x9d\n\t.byte 2\n"		/* offset fp */
-	"\t.byte 0x93\n\t.byte 3\n"		/* offset x19 */
-	"\t.byte 0x94\n\t.byte 4\n"		/* offset x20 */
-	"\t.byte 0xd\n\t.uleb128 0x1d\n"	/* def_cfa_register fp */
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
 	"\t.align 3\n"
 	"LEFDEY:\n\n", fcsize);
     }
diff --git a/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua
new file mode 100644
index 00000000..cdeea441
--- /dev/null
+++ b/test/tarantool-tests/gh-6096-external-unwinding-on-arm64.test.lua
@@ -0,0 +1,13 @@
+local tap = require('tap')
+
+-- Test file to check correctnes of external unwinding
+-- in LuaJIT.
+-- See also https://github.com/LuaJIT/LuaJIT/issues/698,
+-- https://github.com/LuaJIT/LuaJIT/pull/757.
+local test = tap.test('gh-6096-external-unwinding-on-arm64')
+test:plan(1)
+
+local res = pcall(require, 'not-existing-module')
+test:ok(res == false, 'successful unwinding in pcall')
+
+os.exit(test:check() and 0 or 1)
--
2.32.1 (Apple Git-133)


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2022-10-06  9:52 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-26 15:54 [Tarantool-patches] [PATCH luajit 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
2022-09-28  8:20   ` sergos via Tarantool-patches
2022-10-03  7:36   ` Sergey Kaplun via Tarantool-patches
2022-09-26 15:54 ` [Tarantool-patches] [PATCH luajit 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches
2022-10-03 10:54   ` Sergey Kaplun via Tarantool-patches
2022-10-03 15:58   ` sergos via Tarantool-patches
2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches
2022-10-03 11:08   ` Sergey Kaplun via Tarantool-patches
2022-10-04  8:26   ` sergos via Tarantool-patches
2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 4/6] ARM64: Reorder interpreter stack frame and fix unwinding Maksim Kokryashkin via Tarantool-patches
2022-10-04 15:48   ` sergos via Tarantool-patches
2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches
2022-10-04 15:52   ` sergos via Tarantool-patches
2022-09-26 15:55 ` [Tarantool-patches] [PATCH luajit 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches
2022-10-04 16:05   ` sergos via Tarantool-patches
2022-10-06  9:48 ` [Tarantool-patches] [PATCH luajit v2 0/6] Fix external unwinding on M1 Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 1/6] Cleanup and enable external unwinding for more platforms Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 2/6] OSX: Fix build by hardcoding external frame unwinding Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 3/6] OSX/ARM64: Disable external unwinding for now Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 4/6] ARM64: Reorder interpreter stack frame and fix unwinding Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 5/6] OSX/ARM64: Disable unwind info Maksim Kokryashkin via Tarantool-patches
2022-10-06  9:48   ` [Tarantool-patches] [PATCH luajit v2 6/6] OSX/ARM64: Fix external unwinding Maksim Kokryashkin via Tarantool-patches

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox