From: Sergey Kaplun via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: Sergey Bronnikov <sergeyb@tarantool.org> Cc: tarantool-patches@dev.tarantool.org Subject: [Tarantool-patches] [PATCH luajit 1/2] Cleanup CPU detection and tuning for old CPUs. Date: Tue, 14 Jan 2025 14:06:57 +0300 [thread overview] Message-ID: <f618e5abfe0cf7853176d2cc40f08347f6d7fc03.1736779534.git.skaplun@tarantool.org> (raw) In-Reply-To: <cover.1736779534.git.skaplun@tarantool.org> From: Mike Pall <mike> (cherry picked from commit 0eddcbead2d67c16dcd4039a6765b9d2fc8ea631) This patch does the following refactoring: 1) Drops optimizations for the Intel Atom CPU [1]: removes the `JIT_F_LEA_AGU` flag and related optimizations. The considerations for the use of LEA are complex and very CPU-specific, mostly dependent on the number of operands. Mostly, it isn't worth it due to the extra register pressure and/or extra instructions. 2) Drops optimizations for the AMD K8, K10 CPU [2][3]: removes the `JIT_F_PREFER_IMUL` flag and related optimizations. 3) Refactors JIT flags defined in the <lj_jit.h>. Now all CPU-specific JIT flags are defined as the left shift of `JIT_F_CPU` instead of hardcoded constants, similar for the optimization flags. 4) Adds detection of the ARM8 CPU. 5) Drops the check for SSE2 since the VM already presumes CPU supports it. 6) Adds checks for `__ARM_ARCH`[4] macro in <lj_arch.h>. 7) Drops outdated comment in the amalgamation file about memory requirements. Sergey Kaplun: * added the description for the patch [1]: https://en.wikipedia.org/wiki/Intel_Atom [2]: https://en.wikipedia.org/wiki/AMD_K8 [3]: https://en.wikipedia.org/wiki/AMD_K10 [4]: https://developer.arm.com/documentation/dui0774/l/Other-Compiler-specific-Features/Predefined-macros Part of tarantool/tarantool#10709 --- src/Makefile.original | 1 - src/lib_jit.c | 65 +++++++++++------------------- src/lj_arch.h | 6 +-- src/lj_asm_x86.h | 33 +++++---------- src/lj_dispatch.c | 7 ---- src/lj_emit_x86.h | 5 +-- src/lj_errmsg.h | 4 -- src/lj_jit.h | 94 +++++++++++++++++++++++-------------------- src/ljamalg.c | 10 ----- 9 files changed, 87 insertions(+), 138 deletions(-) diff --git a/src/Makefile.original b/src/Makefile.original index 9f55fa32..8d925e3a 100644 --- a/src/Makefile.original +++ b/src/Makefile.original @@ -621,7 +621,6 @@ E= @echo default all: $(TARGET_T) amalg: - @grep "^[+|]" ljamalg.c $(MAKE) -f Makefile.original all "LJCORE_O=ljamalg.o" clean: diff --git a/src/lib_jit.c b/src/lib_jit.c index f705f334..9f870f68 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -104,8 +104,8 @@ LJLIB_CF(jit_status) jit_State *J = L2J(L); L->top = L->base; setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); - flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); - flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); + flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING); + flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING); return (int)(L->top - L->base); #else setboolV(L->top++, 0); @@ -467,7 +467,7 @@ static int jitopt_flag(jit_State *J, const char *str) str += str[2] == '-' ? 3 : 2; set = 0; } - for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { + for (opt = JIT_F_OPT; ; opt <<= 1) { size_t len = *(const uint8_t *)lst; if (len == 0) break; @@ -636,59 +636,41 @@ JIT_PARAMDEF(JIT_PARAMINIT) #undef JIT_PARAMINIT 0 }; -#endif #if LJ_TARGET_ARM && LJ_TARGET_LINUX #include <sys/utsname.h> #endif -/* Arch-dependent CPU detection. */ -static uint32_t jit_cpudetect(lua_State *L) +/* Arch-dependent CPU feature detection. */ +static uint32_t jit_cpudetect(void) { uint32_t flags = 0; #if LJ_TARGET_X86ORX64 + uint32_t vendor[4]; uint32_t features[4]; if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { -#if !LJ_HASJIT -#define JIT_F_SSE2 2 -#endif - flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; -#if LJ_HASJIT flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; - if (vendor[2] == 0x6c65746e) { /* Intel. */ - if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ - flags |= JIT_F_LEA_AGU; - } else if (vendor[2] == 0x444d4163) { /* AMD. */ - uint32_t fam = (features[0] & 0x0ff00f00); - if (fam >= 0x00000f00) /* K8, K10. */ - flags |= JIT_F_PREFER_IMUL; - } if (vendor[0] >= 7) { uint32_t xfeatures[4]; lj_vm_cpuid(7, xfeatures); flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; } -#endif } - /* Check for required instruction set support on x86 (unnecessary on x64). */ -#if LJ_TARGET_X86 - if (!(flags & JIT_F_SSE2)) - luaL_error(L, "CPU with SSE2 required"); -#endif + /* Don't bother checking for SSE2 -- the VM will crash before getting here. */ + #elif LJ_TARGET_ARM -#if LJ_HASJIT + int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ #if LJ_TARGET_LINUX if (ver < 70) { /* Runtime ARM CPU detection. */ struct utsname ut; uname(&ut); if (strncmp(ut.machine, "armv", 4) == 0) { - if (ut.machine[4] >= '7') - ver = 70; - else if (ut.machine[4] == '6') - ver = 60; + if (ut.machine[4] >= '8') ver = 80; + else if (ut.machine[4] == '7') ver = 70; + else if (ut.machine[4] == '6') ver = 60; } } #endif @@ -696,20 +678,22 @@ static uint32_t jit_cpudetect(lua_State *L) ver >= 61 ? JIT_F_ARMV6T2_ : ver >= 60 ? JIT_F_ARMV6_ : 0; flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; -#endif + #elif LJ_TARGET_ARM64 + /* No optional CPU features to detect (for now). */ + #elif LJ_TARGET_PPC -#if LJ_HASJIT + #if LJ_ARCH_SQRT flags |= JIT_F_SQRT; #endif #if LJ_ARCH_ROUND flags |= JIT_F_ROUND; #endif -#endif + #elif LJ_TARGET_MIPS -#if LJ_HASJIT + /* Compile-time MIPS CPU detection. */ #if LJ_ARCH_VERSION >= 20 flags |= JIT_F_MIPSXXR2; @@ -727,31 +711,28 @@ static uint32_t jit_cpudetect(lua_State *L) if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ } #endif -#endif + #else #error "Missing CPU detection for this architecture" #endif - UNUSED(L); return flags; } /* Initialize JIT compiler. */ static void jit_init(lua_State *L) { - uint32_t flags = jit_cpudetect(L); -#if LJ_HASJIT jit_State *J = L2J(L); - J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; + J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; memcpy(J->param, jit_param_default, sizeof(J->param)); lj_dispatch_update(G(L)); -#else - UNUSED(flags); -#endif } +#endif LUALIB_API int luaopen_jit(lua_State *L) { +#if LJ_HASJIT jit_init(L); +#endif lua_pushliteral(L, LJ_OS_NAME); lua_pushliteral(L, LJ_ARCH_NAME); lua_pushinteger(L, LUAJIT_VERSION_NUM); diff --git a/src/lj_arch.h b/src/lj_arch.h index 3bdbe84e..e853c4a4 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -209,13 +209,13 @@ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ +#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ #define LJ_ARCH_VERSION 80 -#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ +#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ #define LJ_ARCH_VERSION 70 #elif __ARM_ARCH_6T2__ #define LJ_ARCH_VERSION 61 -#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ +#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ #define LJ_ARCH_VERSION 60 #else #define LJ_ARCH_VERSION 50 diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 86ce3937..5819fa7a 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1222,13 +1222,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); } else { emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); - if ((as->flags & JIT_F_PREFER_IMUL)) { - emit_i8(as, sizeof(Node)); - emit_rr(as, XO_IMULi8, dest, dest); - } else { - emit_shifti(as, XOg_SHL, dest, 3); - emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); - } + emit_shifti(as, XOg_SHL, dest, 3); + emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); if (isk) { emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); @@ -1287,7 +1282,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ra_hasreg(dest)) { if (ofs != 0) { - if (dest == node && !(as->flags & JIT_F_LEA_AGU)) + if (dest == node) emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); else emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); @@ -2181,8 +2176,7 @@ static void asm_add(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) asm_fparith(as, ir, XO_ADDSD); - else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || - irt_is64(ir->t) || !asm_lea(as, ir)) + else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir)) asm_intarith(as, ir, XOg_ADD); } @@ -2887,7 +2881,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) MCode *target, *q; int32_t spadj = as->T->spadjust; if (spadj == 0) { - p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); + p -= LJ_64 ? 7 : 6; } else { MCode *p1; /* Patch stack adjustment. */ @@ -2899,20 +2893,11 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) p1 = p-9; *(int32_t *)p1 = spadj; } - if ((as->flags & JIT_F_LEA_AGU)) { -#if LJ_64 - p1[-4] = 0x48; -#endif - p1[-3] = (MCode)XI_LEA; - p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); - p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); - } else { #if LJ_64 - p1[-3] = 0x48; + p1[-3] = 0x48; #endif - p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); - p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); - } + p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); + p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } /* Patch exit branch. */ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; @@ -2943,7 +2928,7 @@ static void asm_tail_prep(ASMState *as) as->invmcp = as->mcp = p; } else { /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ - as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); + as->mcp = p - (LJ_64 ? 7 : 6); as->invmcp = NULL; } } diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index ddee68de..a44a5adf 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -258,15 +258,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) } else { if (!(mode & LUAJIT_MODE_ON)) G2J(g)->flags &= ~(uint32_t)JIT_F_ON; -#if LJ_TARGET_X86ORX64 - else if ((G2J(g)->flags & JIT_F_SSE2)) - G2J(g)->flags |= (uint32_t)JIT_F_ON; - else - return 0; /* Don't turn on JIT compiler without SSE2 support. */ -#else else G2J(g)->flags |= (uint32_t)JIT_F_ON; -#endif lj_dispatch_update(g); } break; diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index f4990151..85978027 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -561,10 +561,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { - if ((as->flags & JIT_F_LEA_AGU)) - emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); - else - emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); } } diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index 77a08cb0..19c41f0b 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -101,11 +101,7 @@ ERRDEF(STRGSRV, "invalid replacement value (a %s)") ERRDEF(BADMODN, "name conflict for module " LUA_QS) #if LJ_HASJIT ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") -#if LJ_TARGET_X86ORX64 -ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2") -#else ERRDEF(NOJIT, "JIT compiler disabled") -#endif #elif defined(LJ_ARCH_NOJIT) ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") #else diff --git a/src/lj_jit.h b/src/lj_jit.h index 361570a0..47df85c6 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -9,47 +9,49 @@ #include "lj_obj.h" #include "lj_ir.h" -/* JIT engine flags. */ +/* -- JIT engine flags ---------------------------------------------------- */ + +/* General JIT engine flags. 4 bits. */ #define JIT_F_ON 0x00000001 -/* CPU-specific JIT engine flags. */ +/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */ +#define JIT_F_CPU 0x00000010 + #if LJ_TARGET_X86ORX64 -#define JIT_F_SSE2 0x00000010 -#define JIT_F_SSE3 0x00000020 -#define JIT_F_SSE4_1 0x00000040 -#define JIT_F_PREFER_IMUL 0x00000080 -#define JIT_F_LEA_AGU 0x00000100 -#define JIT_F_BMI2 0x00000200 - -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_SSE2 -#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" + +#define JIT_F_SSE3 (JIT_F_CPU << 0) +#define JIT_F_SSE4_1 (JIT_F_CPU << 1) +#define JIT_F_BMI2 (JIT_F_CPU << 2) + + +#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2" + #elif LJ_TARGET_ARM -#define JIT_F_ARMV6_ 0x00000010 -#define JIT_F_ARMV6T2_ 0x00000020 -#define JIT_F_ARMV7 0x00000040 -#define JIT_F_VFPV2 0x00000080 -#define JIT_F_VFPV3 0x00000100 - -#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) -#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) + +#define JIT_F_ARMV6_ (JIT_F_CPU << 0) +#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1) +#define JIT_F_ARMV7 (JIT_F_CPU << 2) +#define JIT_F_ARMV8 (JIT_F_CPU << 3) +#define JIT_F_VFPV2 (JIT_F_CPU << 4) +#define JIT_F_VFPV3 (JIT_F_CPU << 5) + +#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) +#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_ARMV6_ -#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3" +#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3" + #elif LJ_TARGET_PPC -#define JIT_F_SQRT 0x00000010 -#define JIT_F_ROUND 0x00000020 -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_SQRT +#define JIT_F_SQRT (JIT_F_CPU << 0) +#define JIT_F_ROUND (JIT_F_CPU << 1) + #define JIT_F_CPUSTRING "\4SQRT\5ROUND" + #elif LJ_TARGET_MIPS -#define JIT_F_MIPSXXR2 0x00000010 -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 +#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0) + #if LJ_TARGET_MIPS32 #if LJ_TARGET_MIPSR6 #define JIT_F_CPUSTRING "\010MIPS32R6" @@ -63,27 +65,29 @@ #define JIT_F_CPUSTRING "\010MIPS64R2" #endif #endif + #else -#define JIT_F_CPU_FIRST 0 + #define JIT_F_CPUSTRING "" + #endif -/* Optimization flags. */ +/* Optimization flags. 12 bits. */ +#define JIT_F_OPT 0x00010000 #define JIT_F_OPT_MASK 0x0fff0000 -#define JIT_F_OPT_FOLD 0x00010000 -#define JIT_F_OPT_CSE 0x00020000 -#define JIT_F_OPT_DCE 0x00040000 -#define JIT_F_OPT_FWD 0x00080000 -#define JIT_F_OPT_DSE 0x00100000 -#define JIT_F_OPT_NARROW 0x00200000 -#define JIT_F_OPT_LOOP 0x00400000 -#define JIT_F_OPT_ABC 0x00800000 -#define JIT_F_OPT_SINK 0x01000000 -#define JIT_F_OPT_FUSE 0x02000000 +#define JIT_F_OPT_FOLD (JIT_F_OPT << 0) +#define JIT_F_OPT_CSE (JIT_F_OPT << 1) +#define JIT_F_OPT_DCE (JIT_F_OPT << 2) +#define JIT_F_OPT_FWD (JIT_F_OPT << 3) +#define JIT_F_OPT_DSE (JIT_F_OPT << 4) +#define JIT_F_OPT_NARROW (JIT_F_OPT << 5) +#define JIT_F_OPT_LOOP (JIT_F_OPT << 6) +#define JIT_F_OPT_ABC (JIT_F_OPT << 7) +#define JIT_F_OPT_SINK (JIT_F_OPT << 8) +#define JIT_F_OPT_FUSE (JIT_F_OPT << 9) /* Optimizations names for -O. Must match the order above. */ -#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD #define JIT_F_OPTSTRING \ "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" @@ -95,6 +99,8 @@ JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 +/* -- JIT engine parameters ----------------------------------------------- */ + #if LJ_TARGET_WINDOWS || LJ_64 /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ #define JIT_P_sizemcode_DEFAULT 64 @@ -137,6 +143,8 @@ JIT_PARAMDEF(JIT_PARAMENUM) #define JIT_PARAMSTR(len, name, value) #len #name #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) +/* -- JIT engine data structures ------------------------------------------ */ + /* Trace compiler state. */ typedef enum { LJ_TRACE_IDLE, /* Trace compiler idle. */ diff --git a/src/ljamalg.c b/src/ljamalg.c index 0ffc7e81..63b4ec87 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -3,16 +3,6 @@ ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h */ -/* -+--------------------------------------------------------------------------+ -| WARNING: Compiling the amalgamation needs a lot of virtual memory | -| (around 300 MB with GCC 4.x)! If you don't have enough physical memory | -| your machine will start swapping to disk and the compile will not finish | -| within a reasonable amount of time. | -| So either compile on a bigger machine or use the non-amalgamated build. | -+--------------------------------------------------------------------------+ -*/ - #define ljamalg_c #define LUA_CORE -- 2.47.1
next prev parent reply other threads:[~2025-01-14 11:08 UTC|newest] Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top 2025-01-14 11:06 [Tarantool-patches] [PATCH luajit 0/2] Refactoring and FMA optimizations Sergey Kaplun via Tarantool-patches 2025-01-14 11:06 ` Sergey Kaplun via Tarantool-patches [this message] 2025-01-14 11:25 ` [Tarantool-patches] [PATCH luajit 1/2] Cleanup CPU detection and tuning for old CPUs Sergey Bronnikov via Tarantool-patches 2025-01-15 13:10 ` Sergey Kaplun via Tarantool-patches 2025-01-14 11:06 ` [Tarantool-patches] [PATCH luajit 2/2] Disable FMA by default. Use -Ofma or jit.opt.start("+fma") to enable Sergey Kaplun via Tarantool-patches 2025-01-14 12:45 ` Sergey Bronnikov via Tarantool-patches 2025-01-15 13:06 ` Sergey Kaplun via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=f618e5abfe0cf7853176d2cc40f08347f6d7fc03.1736779534.git.skaplun@tarantool.org \ --to=tarantool-patches@dev.tarantool.org \ --cc=sergeyb@tarantool.org \ --cc=skaplun@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH luajit 1/2] Cleanup CPU detection and tuning for old CPUs.' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox