Tarantool development patches archive
 help / color / mirror / Atom feed
From: Sergey Kaplun via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Sergey Bronnikov <sergeyb@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH luajit 1/2] Cleanup CPU detection and tuning for old CPUs.
Date: Tue, 14 Jan 2025 14:06:57 +0300	[thread overview]
Message-ID: <f618e5abfe0cf7853176d2cc40f08347f6d7fc03.1736779534.git.skaplun@tarantool.org> (raw)
In-Reply-To: <cover.1736779534.git.skaplun@tarantool.org>

From: Mike Pall <mike>

(cherry picked from commit 0eddcbead2d67c16dcd4039a6765b9d2fc8ea631)

This patch does the following refactoring:
1) Drops optimizations for the Intel Atom CPU [1]: removes the
   `JIT_F_LEA_AGU` flag and related optimizations. The considerations
   for the use of LEA are complex and very CPU-specific, mostly
   dependent on the number of operands. Mostly, it isn't worth it due to
   the extra register pressure and/or extra instructions.
2) Drops optimizations for the AMD K8, K10 CPU [2][3]: removes the
   `JIT_F_PREFER_IMUL` flag and related optimizations.
3) Refactors JIT flags defined in the <lj_jit.h>. Now all CPU-specific
   JIT flags are defined as the left shift of `JIT_F_CPU` instead of
   hardcoded constants, similar for the optimization flags.
4) Adds detection of the ARM8 CPU.
5) Drops the check for SSE2 since the VM already presumes CPU supports
   it.
6) Adds checks for `__ARM_ARCH`[4] macro in <lj_arch.h>.
7) Drops outdated comment in the amalgamation file about memory
   requirements.

Sergey Kaplun:
* added the description for the patch

[1]: https://en.wikipedia.org/wiki/Intel_Atom
[2]: https://en.wikipedia.org/wiki/AMD_K8
[3]: https://en.wikipedia.org/wiki/AMD_K10
[4]: https://developer.arm.com/documentation/dui0774/l/Other-Compiler-specific-Features/Predefined-macros

Part of tarantool/tarantool#10709
---
 src/Makefile.original |  1 -
 src/lib_jit.c         | 65 +++++++++++-------------------
 src/lj_arch.h         |  6 +--
 src/lj_asm_x86.h      | 33 +++++----------
 src/lj_dispatch.c     |  7 ----
 src/lj_emit_x86.h     |  5 +--
 src/lj_errmsg.h       |  4 --
 src/lj_jit.h          | 94 +++++++++++++++++++++++--------------------
 src/ljamalg.c         | 10 -----
 9 files changed, 87 insertions(+), 138 deletions(-)

diff --git a/src/Makefile.original b/src/Makefile.original
index 9f55fa32..8d925e3a 100644
--- a/src/Makefile.original
+++ b/src/Makefile.original
@@ -621,7 +621,6 @@ E= @echo
 default all:	$(TARGET_T)
 
 amalg:
-	@grep "^[+|]" ljamalg.c
 	$(MAKE) -f Makefile.original all "LJCORE_O=ljamalg.o"
 
 clean:
diff --git a/src/lib_jit.c b/src/lib_jit.c
index f705f334..9f870f68 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -104,8 +104,8 @@ LJLIB_CF(jit_status)
   jit_State *J = L2J(L);
   L->top = L->base;
   setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0);
-  flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING);
-  flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING);
+  flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING);
+  flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING);
   return (int)(L->top - L->base);
 #else
   setboolV(L->top++, 0);
@@ -467,7 +467,7 @@ static int jitopt_flag(jit_State *J, const char *str)
     str += str[2] == '-' ? 3 : 2;
     set = 0;
   }
-  for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) {
+  for (opt = JIT_F_OPT; ; opt <<= 1) {
     size_t len = *(const uint8_t *)lst;
     if (len == 0)
       break;
@@ -636,59 +636,41 @@ JIT_PARAMDEF(JIT_PARAMINIT)
 #undef JIT_PARAMINIT
   0
 };
-#endif
 
 #if LJ_TARGET_ARM && LJ_TARGET_LINUX
 #include <sys/utsname.h>
 #endif
 
-/* Arch-dependent CPU detection. */
-static uint32_t jit_cpudetect(lua_State *L)
+/* Arch-dependent CPU feature detection. */
+static uint32_t jit_cpudetect(void)
 {
   uint32_t flags = 0;
 #if LJ_TARGET_X86ORX64
+
   uint32_t vendor[4];
   uint32_t features[4];
   if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
-#if !LJ_HASJIT
-#define JIT_F_SSE2	2
-#endif
-    flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
-#if LJ_HASJIT
     flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
     flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
-    if (vendor[2] == 0x6c65746e) {  /* Intel. */
-      if ((features[0] & 0x0fff0ff0) == 0x000106c0)  /* Atom. */
-	flags |= JIT_F_LEA_AGU;
-    } else if (vendor[2] == 0x444d4163) {  /* AMD. */
-      uint32_t fam = (features[0] & 0x0ff00f00);
-      if (fam >= 0x00000f00)  /* K8, K10. */
-	flags |= JIT_F_PREFER_IMUL;
-    }
     if (vendor[0] >= 7) {
       uint32_t xfeatures[4];
       lj_vm_cpuid(7, xfeatures);
       flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
     }
-#endif
   }
-  /* Check for required instruction set support on x86 (unnecessary on x64). */
-#if LJ_TARGET_X86
-  if (!(flags & JIT_F_SSE2))
-    luaL_error(L, "CPU with SSE2 required");
-#endif
+  /* Don't bother checking for SSE2 -- the VM will crash before getting here. */
+
 #elif LJ_TARGET_ARM
-#if LJ_HASJIT
+
   int ver = LJ_ARCH_VERSION;  /* Compile-time ARM CPU detection. */
 #if LJ_TARGET_LINUX
   if (ver < 70) {  /* Runtime ARM CPU detection. */
     struct utsname ut;
     uname(&ut);
     if (strncmp(ut.machine, "armv", 4) == 0) {
-      if (ut.machine[4] >= '7')
-	ver = 70;
-      else if (ut.machine[4] == '6')
-	ver = 60;
+      if (ut.machine[4] >= '8') ver = 80;
+      else if (ut.machine[4] == '7') ver = 70;
+      else if (ut.machine[4] == '6') ver = 60;
     }
   }
 #endif
@@ -696,20 +678,22 @@ static uint32_t jit_cpudetect(lua_State *L)
 	   ver >= 61 ? JIT_F_ARMV6T2_ :
 	   ver >= 60 ? JIT_F_ARMV6_ : 0;
   flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
-#endif
+
 #elif LJ_TARGET_ARM64
+
   /* No optional CPU features to detect (for now). */
+
 #elif LJ_TARGET_PPC
-#if LJ_HASJIT
+
 #if LJ_ARCH_SQRT
   flags |= JIT_F_SQRT;
 #endif
 #if LJ_ARCH_ROUND
   flags |= JIT_F_ROUND;
 #endif
-#endif
+
 #elif LJ_TARGET_MIPS
-#if LJ_HASJIT
+
   /* Compile-time MIPS CPU detection. */
 #if LJ_ARCH_VERSION >= 20
   flags |= JIT_F_MIPSXXR2;
@@ -727,31 +711,28 @@ static uint32_t jit_cpudetect(lua_State *L)
     if (x) flags |= JIT_F_MIPSXXR2;  /* Either 0x80000000 (R2) or 0 (R1). */
   }
 #endif
-#endif
+
 #else
 #error "Missing CPU detection for this architecture"
 #endif
-  UNUSED(L);
   return flags;
 }
 
 /* Initialize JIT compiler. */
 static void jit_init(lua_State *L)
 {
-  uint32_t flags = jit_cpudetect(L);
-#if LJ_HASJIT
   jit_State *J = L2J(L);
-  J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
+  J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT;
   memcpy(J->param, jit_param_default, sizeof(J->param));
   lj_dispatch_update(G(L));
-#else
-  UNUSED(flags);
-#endif
 }
+#endif
 
 LUALIB_API int luaopen_jit(lua_State *L)
 {
+#if LJ_HASJIT
   jit_init(L);
+#endif
   lua_pushliteral(L, LJ_OS_NAME);
   lua_pushliteral(L, LJ_ARCH_NAME);
   lua_pushinteger(L, LUAJIT_VERSION_NUM);
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 3bdbe84e..e853c4a4 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -209,13 +209,13 @@
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 
-#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
+#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
 #define LJ_ARCH_VERSION		80
-#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
+#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
 #define LJ_ARCH_VERSION		70
 #elif __ARM_ARCH_6T2__
 #define LJ_ARCH_VERSION		61
-#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__
+#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__
 #define LJ_ARCH_VERSION		60
 #else
 #define LJ_ARCH_VERSION		50
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 86ce3937..5819fa7a 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1222,13 +1222,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
     emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
   } else {
     emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
-    if ((as->flags & JIT_F_PREFER_IMUL)) {
-      emit_i8(as, sizeof(Node));
-      emit_rr(as, XO_IMULi8, dest, dest);
-    } else {
-      emit_shifti(as, XOg_SHL, dest, 3);
-      emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
-    }
+    emit_shifti(as, XOg_SHL, dest, 3);
+    emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
     if (isk) {
       emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
       emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
@@ -1287,7 +1282,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
   lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
   if (ra_hasreg(dest)) {
     if (ofs != 0) {
-      if (dest == node && !(as->flags & JIT_F_LEA_AGU))
+      if (dest == node)
 	emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
       else
 	emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
@@ -2181,8 +2176,7 @@ static void asm_add(ASMState *as, IRIns *ir)
 {
   if (irt_isnum(ir->t))
     asm_fparith(as, ir, XO_ADDSD);
-  else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp ||
-	   irt_is64(ir->t) || !asm_lea(as, ir))
+  else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir))
     asm_intarith(as, ir, XOg_ADD);
 }
 
@@ -2887,7 +2881,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
   MCode *target, *q;
   int32_t spadj = as->T->spadjust;
   if (spadj == 0) {
-    p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0);
+    p -= LJ_64 ? 7 : 6;
   } else {
     MCode *p1;
     /* Patch stack adjustment. */
@@ -2899,20 +2893,11 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
       p1 = p-9;
       *(int32_t *)p1 = spadj;
     }
-    if ((as->flags & JIT_F_LEA_AGU)) {
-#if LJ_64
-      p1[-4] = 0x48;
-#endif
-      p1[-3] = (MCode)XI_LEA;
-      p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
-      p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
-    } else {
 #if LJ_64
-      p1[-3] = 0x48;
+    p1[-3] = 0x48;
 #endif
-      p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
-      p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
-    }
+    p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
+    p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
   }
   /* Patch exit branch. */
   target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
@@ -2943,7 +2928,7 @@ static void asm_tail_prep(ASMState *as)
     as->invmcp = as->mcp = p;
   } else {
     /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
-    as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6)  + (LJ_64 ? 1 : 0));
+    as->mcp = p - (LJ_64 ? 7 : 6);
     as->invmcp = NULL;
   }
 }
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
index ddee68de..a44a5adf 100644
--- a/src/lj_dispatch.c
+++ b/src/lj_dispatch.c
@@ -258,15 +258,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
     } else {
       if (!(mode & LUAJIT_MODE_ON))
 	G2J(g)->flags &= ~(uint32_t)JIT_F_ON;
-#if LJ_TARGET_X86ORX64
-      else if ((G2J(g)->flags & JIT_F_SSE2))
-	G2J(g)->flags |= (uint32_t)JIT_F_ON;
-      else
-	return 0;  /* Don't turn on JIT compiler without SSE2 support. */
-#else
       else
 	G2J(g)->flags |= (uint32_t)JIT_F_ON;
-#endif
       lj_dispatch_update(g);
     }
     break;
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index f4990151..85978027 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -561,10 +561,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
 {
   if (ofs) {
-    if ((as->flags & JIT_F_LEA_AGU))
-      emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs);
-    else
-      emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
+    emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
   }
 }
 
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index 77a08cb0..19c41f0b 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -101,11 +101,7 @@ ERRDEF(STRGSRV,	"invalid replacement value (a %s)")
 ERRDEF(BADMODN,	"name conflict for module " LUA_QS)
 #if LJ_HASJIT
 ERRDEF(JITPROT,	"runtime code generation failed, restricted kernel?")
-#if LJ_TARGET_X86ORX64
-ERRDEF(NOJIT,	"JIT compiler disabled, CPU does not support SSE2")
-#else
 ERRDEF(NOJIT,	"JIT compiler disabled")
-#endif
 #elif defined(LJ_ARCH_NOJIT)
 ERRDEF(NOJIT,	"no JIT compiler for this architecture (yet)")
 #else
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 361570a0..47df85c6 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -9,47 +9,49 @@
 #include "lj_obj.h"
 #include "lj_ir.h"
 
-/* JIT engine flags. */
+/* -- JIT engine flags ---------------------------------------------------- */
+
+/* General JIT engine flags. 4 bits. */
 #define JIT_F_ON		0x00000001
 
-/* CPU-specific JIT engine flags. */
+/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */
+#define JIT_F_CPU		0x00000010
+
 #if LJ_TARGET_X86ORX64
-#define JIT_F_SSE2		0x00000010
-#define JIT_F_SSE3		0x00000020
-#define JIT_F_SSE4_1		0x00000040
-#define JIT_F_PREFER_IMUL	0x00000080
-#define JIT_F_LEA_AGU		0x00000100
-#define JIT_F_BMI2		0x00000200
-
-/* Names for the CPU-specific flags. Must match the order above. */
-#define JIT_F_CPU_FIRST		JIT_F_SSE2
-#define JIT_F_CPUSTRING		"\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2"
+
+#define JIT_F_SSE3		(JIT_F_CPU << 0)
+#define JIT_F_SSE4_1		(JIT_F_CPU << 1)
+#define JIT_F_BMI2		(JIT_F_CPU << 2)
+
+
+#define JIT_F_CPUSTRING		"\4SSE3\6SSE4.1\4BMI2"
+
 #elif LJ_TARGET_ARM
-#define JIT_F_ARMV6_		0x00000010
-#define JIT_F_ARMV6T2_		0x00000020
-#define JIT_F_ARMV7		0x00000040
-#define JIT_F_VFPV2		0x00000080
-#define JIT_F_VFPV3		0x00000100
-
-#define JIT_F_ARMV6		(JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7)
-#define JIT_F_ARMV6T2		(JIT_F_ARMV6T2_|JIT_F_ARMV7)
+
+#define JIT_F_ARMV6_		(JIT_F_CPU << 0)
+#define JIT_F_ARMV6T2_		(JIT_F_CPU << 1)
+#define JIT_F_ARMV7		(JIT_F_CPU << 2)
+#define JIT_F_ARMV8		(JIT_F_CPU << 3)
+#define JIT_F_VFPV2		(JIT_F_CPU << 4)
+#define JIT_F_VFPV3		(JIT_F_CPU << 5)
+
+#define JIT_F_ARMV6		(JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
+#define JIT_F_ARMV6T2		(JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
 #define JIT_F_VFP		(JIT_F_VFPV2|JIT_F_VFPV3)
 
-/* Names for the CPU-specific flags. Must match the order above. */
-#define JIT_F_CPU_FIRST		JIT_F_ARMV6_
-#define JIT_F_CPUSTRING		"\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3"
+#define JIT_F_CPUSTRING		"\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3"
+
 #elif LJ_TARGET_PPC
-#define JIT_F_SQRT		0x00000010
-#define JIT_F_ROUND		0x00000020
 
-/* Names for the CPU-specific flags. Must match the order above. */
-#define JIT_F_CPU_FIRST		JIT_F_SQRT
+#define JIT_F_SQRT		(JIT_F_CPU << 0)
+#define JIT_F_ROUND		(JIT_F_CPU << 1)
+
 #define JIT_F_CPUSTRING		"\4SQRT\5ROUND"
+
 #elif LJ_TARGET_MIPS
-#define JIT_F_MIPSXXR2		0x00000010
 
-/* Names for the CPU-specific flags. Must match the order above. */
-#define JIT_F_CPU_FIRST		JIT_F_MIPSXXR2
+#define JIT_F_MIPSXXR2		(JIT_F_CPU << 0)
+
 #if LJ_TARGET_MIPS32
 #if LJ_TARGET_MIPSR6
 #define JIT_F_CPUSTRING		"\010MIPS32R6"
@@ -63,27 +65,29 @@
 #define JIT_F_CPUSTRING		"\010MIPS64R2"
 #endif
 #endif
+
 #else
-#define JIT_F_CPU_FIRST		0
+
 #define JIT_F_CPUSTRING		""
+
 #endif
 
-/* Optimization flags. */
+/* Optimization flags. 12 bits. */
+#define JIT_F_OPT		0x00010000
 #define JIT_F_OPT_MASK		0x0fff0000
 
-#define JIT_F_OPT_FOLD		0x00010000
-#define JIT_F_OPT_CSE		0x00020000
-#define JIT_F_OPT_DCE		0x00040000
-#define JIT_F_OPT_FWD		0x00080000
-#define JIT_F_OPT_DSE		0x00100000
-#define JIT_F_OPT_NARROW	0x00200000
-#define JIT_F_OPT_LOOP		0x00400000
-#define JIT_F_OPT_ABC		0x00800000
-#define JIT_F_OPT_SINK		0x01000000
-#define JIT_F_OPT_FUSE		0x02000000
+#define JIT_F_OPT_FOLD		(JIT_F_OPT << 0)
+#define JIT_F_OPT_CSE		(JIT_F_OPT << 1)
+#define JIT_F_OPT_DCE		(JIT_F_OPT << 2)
+#define JIT_F_OPT_FWD		(JIT_F_OPT << 3)
+#define JIT_F_OPT_DSE		(JIT_F_OPT << 4)
+#define JIT_F_OPT_NARROW	(JIT_F_OPT << 5)
+#define JIT_F_OPT_LOOP		(JIT_F_OPT << 6)
+#define JIT_F_OPT_ABC		(JIT_F_OPT << 7)
+#define JIT_F_OPT_SINK		(JIT_F_OPT << 8)
+#define JIT_F_OPT_FUSE		(JIT_F_OPT << 9)
 
 /* Optimizations names for -O. Must match the order above. */
-#define JIT_F_OPT_FIRST		JIT_F_OPT_FOLD
 #define JIT_F_OPTSTRING	\
   "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
 
@@ -95,6 +99,8 @@
   JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
 #define JIT_F_OPT_DEFAULT	JIT_F_OPT_3
 
+/* -- JIT engine parameters ----------------------------------------------- */
+
 #if LJ_TARGET_WINDOWS || LJ_64
 /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
 #define JIT_P_sizemcode_DEFAULT		64
@@ -137,6 +143,8 @@ JIT_PARAMDEF(JIT_PARAMENUM)
 #define JIT_PARAMSTR(len, name, value)	#len #name
 #define JIT_P_STRING	JIT_PARAMDEF(JIT_PARAMSTR)
 
+/* -- JIT engine data structures ------------------------------------------ */
+
 /* Trace compiler state. */
 typedef enum {
   LJ_TRACE_IDLE,	/* Trace compiler idle. */
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 0ffc7e81..63b4ec87 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -3,16 +3,6 @@
 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
 */
 
-/*
-+--------------------------------------------------------------------------+
-| WARNING: Compiling the amalgamation needs a lot of virtual memory        |
-| (around 300 MB with GCC 4.x)! If you don't have enough physical memory   |
-| your machine will start swapping to disk and the compile will not finish |
-| within a reasonable amount of time.                                      |
-| So either compile on a bigger machine or use the non-amalgamated build.  |
-+--------------------------------------------------------------------------+
-*/
-
 #define ljamalg_c
 #define LUA_CORE
 
-- 
2.47.1


  reply	other threads:[~2025-01-14 11:08 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-14 11:06 [Tarantool-patches] [PATCH luajit 0/2] Refactoring and FMA optimizations Sergey Kaplun via Tarantool-patches
2025-01-14 11:06 ` Sergey Kaplun via Tarantool-patches [this message]
2025-01-14 11:25   ` [Tarantool-patches] [PATCH luajit 1/2] Cleanup CPU detection and tuning for old CPUs Sergey Bronnikov via Tarantool-patches
2025-01-15 13:10     ` Sergey Kaplun via Tarantool-patches
2025-01-14 11:06 ` [Tarantool-patches] [PATCH luajit 2/2] Disable FMA by default. Use -Ofma or jit.opt.start("+fma") to enable Sergey Kaplun via Tarantool-patches
2025-01-14 12:45   ` Sergey Bronnikov via Tarantool-patches
2025-01-15 13:06     ` Sergey Kaplun via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f618e5abfe0cf7853176d2cc40f08347f6d7fc03.1736779534.git.skaplun@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=sergeyb@tarantool.org \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit 1/2] Cleanup CPU detection and tuning for old CPUs.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox