From: Sergey Bronnikov via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: tarantool-patches@dev.tarantool.org, Sergey Kaplun <skaplun@tarantool.org>, Maxim Kokryashkin <m.kokryashkin@tarantool.org> Subject: [Tarantool-patches] [PATCH luajit 2/2] OSX/iOS: Always generate 64 bit non-FAT Mach-O object files. Date: Fri, 5 Jul 2024 16:10:40 +0300 [thread overview] Message-ID: <10ed208fcfacfa4c772f1cebe090595af3452ff3.1720182442.git.sergeyb@tarantool.org> (raw) In-Reply-To: <cover.1720182442.git.sergeyb@tarantool.org> Reported by Sergey Bronnikov. (cherry picked from commit 7110b935672489afd6ba3eef3e5139d2f3bd05b6) Previously, LuaJIT generated Mach-O FAT object files for ARM and ARM64 on macOS. The patch removes support of 32-bit ARM and FAT object files and now LuaJIT generate Mach-O object files for ARM64. Sergey Bronnikov: * added the description and the trimmed the test for the problem Part of tarantool/tarantool#10199 --- src/jit/bcsave.lua | 155 ++------- ...-865-cross-generation-mach-o-file.test.lua | 294 +++--------------- 2 files changed, 70 insertions(+), 379 deletions(-) diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 26ec29c6..61953c2d 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -412,24 +412,12 @@ typedef struct { mach_header; uint32_t reserved; } mach_header_64; -typedef struct { - uint32_t cmd, cmdsize; - char segname[16]; - uint32_t vmaddr, vmsize, fileoff, filesize; - uint32_t maxprot, initprot, nsects, flags; -} mach_segment_command; typedef struct { uint32_t cmd, cmdsize; char segname[16]; uint64_t vmaddr, vmsize, fileoff, filesize; uint32_t maxprot, initprot, nsects, flags; } mach_segment_command_64; -typedef struct { - char sectname[16], segname[16]; - uint32_t addr, size; - uint32_t offset, align, reloff, nreloc, flags; - uint32_t reserved1, reserved2; -} mach_section; typedef struct { char sectname[16], segname[16]; uint64_t addr, size; @@ -439,133 +427,58 @@ typedef struct { typedef struct { uint32_t cmd, cmdsize, symoff, nsyms, stroff, strsize; } mach_symtab_command; -typedef struct { - int32_t strx; - uint8_t type, sect; - int16_t desc; - uint32_t value; -} mach_nlist; typedef struct { int32_t strx; uint8_t type, sect; uint16_t desc; uint64_t value; } mach_nlist_64; -typedef struct -{ - int32_t magic, nfat_arch; -} mach_fat_header; -typedef struct -{ - int32_t cputype, cpusubtype, offset, size, align; -} mach_fat_arch; -typedef struct { - struct { - mach_header hdr; - mach_segment_command seg; - mach_section sec; - mach_symtab_command sym; - } arch[1]; - mach_nlist sym_entry; - uint8_t space[4096]; -} mach_obj; typedef struct { - struct { - mach_header_64 hdr; - mach_segment_command_64 seg; - mach_section_64 sec; - mach_symtab_command sym; - } arch[1]; + mach_header_64 hdr; + mach_segment_command_64 seg; + mach_section_64 sec; + mach_symtab_command sym; mach_nlist_64 sym_entry; uint8_t space[4096]; } mach_obj_64; -typedef struct { - mach_fat_header fat; - mach_fat_arch fat_arch[2]; - struct { - mach_header hdr; - mach_segment_command seg; - mach_section sec; - mach_symtab_command sym; - } arch[2]; - mach_nlist sym_entry; - uint8_t space[4096]; -} mach_fat_obj; -typedef struct { - mach_fat_header fat; - mach_fat_arch fat_arch[2]; - struct { - mach_header_64 hdr; - mach_segment_command_64 seg; - mach_section_64 sec; - mach_symtab_command sym; - } arch[2]; - mach_nlist_64 sym_entry; - uint8_t space[4096]; -} mach_fat_obj_64; ]] local symname = '_'..LJBC_PREFIX..ctx.modname - local isfat, is64, align, mobj = false, false, 4, "mach_obj" - if ctx.arch == "x64" then - is64, align, mobj = true, 8, "mach_obj_64" - elseif ctx.arch == "arm" then - isfat, mobj = true, "mach_fat_obj" - elseif ctx.arch == "arm64" then - is64, align, isfat, mobj = true, 8, true, "mach_fat_obj_64" - else - check(ctx.arch == "x86", "unsupported architecture for OSX") + local cputype, cpusubtype = 0x01000007, 3 + if ctx.arch ~= "x64" then + check(ctx.arch == "arm64", "unsupported architecture for OSX") + cputype, cpusubtype = 0x0100000c, 0 end local function aligned(v, a) return bit.band(v+a-1, -a) end - local be32 = bit.bswap -- Mach-O FAT is BE, supported archs are LE. -- Create Mach-O object and fill in header. - local o = ffi.new(mobj) - local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) - local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch] - local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch] - if isfat then - o.fat.magic = be32(0xcafebabe) - o.fat.nfat_arch = be32(#cpusubtype) - end + local o = ffi.new("mach_obj_64") + local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, 8) -- Fill in sections and symbols. - for i=0,#cpusubtype-1 do - local ofs = 0 - if isfat then - local a = o.fat_arch[i] - a.cputype = be32(cputype[i+1]) - a.cpusubtype = be32(cpusubtype[i+1]) - -- Subsequent slices overlap each other to share data. - ofs = ffi.offsetof(o, "arch") + i*ffi.sizeof(o.arch[0]) - a.offset = be32(ofs) - a.size = be32(mach_size-ofs+#s) - end - local a = o.arch[i] - a.hdr.magic = is64 and 0xfeedfacf or 0xfeedface - a.hdr.cputype = cputype[i+1] - a.hdr.cpusubtype = cpusubtype[i+1] - a.hdr.filetype = 1 - a.hdr.ncmds = 2 - a.hdr.sizeofcmds = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)+ffi.sizeof(a.sym) - a.seg.cmd = is64 and 0x19 or 0x1 - a.seg.cmdsize = ffi.sizeof(a.seg)+ffi.sizeof(a.sec) - a.seg.vmsize = #s - a.seg.fileoff = mach_size-ofs - a.seg.filesize = #s - a.seg.maxprot = 1 - a.seg.initprot = 1 - a.seg.nsects = 1 - ffi.copy(a.sec.sectname, "__data") - ffi.copy(a.sec.segname, "__DATA") - a.sec.size = #s - a.sec.offset = mach_size-ofs - a.sym.cmd = 2 - a.sym.cmdsize = ffi.sizeof(a.sym) - a.sym.symoff = ffi.offsetof(o, "sym_entry")-ofs - a.sym.nsyms = 1 - a.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)-ofs - a.sym.strsize = aligned(#symname+2, align) - end + o.hdr.magic = 0xfeedfacf + o.hdr.cputype = cputype + o.hdr.cpusubtype = cpusubtype + o.hdr.filetype = 1 + o.hdr.ncmds = 2 + o.hdr.sizeofcmds = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)+ffi.sizeof(o.sym) + o.seg.cmd = 0x19 + o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec) + o.seg.vmsize = #s + o.seg.fileoff = mach_size + o.seg.filesize = #s + o.seg.maxprot = 1 + o.seg.initprot = 1 + o.seg.nsects = 1 + ffi.copy(o.sec.sectname, "__data") + ffi.copy(o.sec.segname, "__DATA") + o.sec.size = #s + o.sec.offset = mach_size + o.sym.cmd = 2 + o.sym.cmdsize = ffi.sizeof(o.sym) + o.sym.symoff = ffi.offsetof(o, "sym_entry") + o.sym.nsyms = 1 + o.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry) + o.sym.strsize = aligned(#symname+2, 8) o.sym_entry.type = 0xf o.sym_entry.sect = 1 o.sym_entry.strx = 1 diff --git a/test/tarantool-tests/lj-865-cross-generation-mach-o-file.test.lua b/test/tarantool-tests/lj-865-cross-generation-mach-o-file.test.lua index f008f3bd..6a58de95 100644 --- a/test/tarantool-tests/lj-865-cross-generation-mach-o-file.test.lua +++ b/test/tarantool-tests/lj-865-cross-generation-mach-o-file.test.lua @@ -3,109 +3,11 @@ local test = tap.test('lj-865-cross-generation-mach-o-file') local utils = require('utils') local ffi = require('ffi') -test:plan(2) +test:plan(1) -- The test creates an object file in Mach-O format with LuaJIT -- bytecode and checks the validity of the object file fields. --- --- The original problem is reproduced with LuaJIT, which is built --- with enabled AVX512F instructions. The support for AVX512F --- could be checked in `/proc/cpuinfo` on Linux and --- `sysctl hw.optional.avx512f` on Mac. AVX512F must be --- implicitly enabled in a C compiler by passing a CPU codename. --- Please take a look at the GCC Online Documentation [1] for --- available CPU codenames. Also, see the Wikipedia for CPUs with --- AVX-512 support [2]. --- Execute command below to detect the CPU codename: --- `gcc -march=native -Q --help=target | grep march`. --- --- 1. https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html --- 2. https://en.wikipedia.org/wiki/AVX-512#CPUs_with_AVX-512 --- --- Manual steps for reproducing are the following: --- --- $ CC=gcc TARGET_CFLAGS='skylake-avx512' cmake -S . -B build --- $ cmake --build build --parallel --- $ echo > test.lua --- $ LUA_PATH="src/?.lua;;" luajit -b -o osx -a arm test.lua test.o --- $ file test.o --- empty.o: DOS executable (block device driver) --- LuaJIT can generate so called Universal Binary with Lua --- bytecode. The Universal Binary format is a format for --- executable files that run natively on hardware platforms with --- different hardware architectures. This concept is more --- generally known as a fat binary. --- --- The format of the Mach-O is described in the document --- "OS X ABI Mach-O File Format Reference", published by Apple --- company. The copy of the (now removed) official documentation --- can be found here [1]. Yet another source of truth is --- XNU headers, see the definition of C-structures in: --- [2] (`nlist_64`), [3] (`fat_arch` and `fat_header`). --- --- There is a good visual representation of Universal Binary --- in "Mac OS X Internals" book (pages 67-68) [5] and in the [6]. --- Below is the schematic structure of Universal Binary, which --- includes two executables for PowerPC and Intel i386 (omitted): --- --- 0x0000000 --------------------------------------- --- | --- struct | 0xcafebabe FAT_MAGIC magic --- fat_header | ------------------------------------- --- | 0x00000003 nfat_arch --- --------------------------------------- --- | 0x00000012 CPU_TYPE_POWERPC cputype --- | ------------------------------------- --- | 0x00000000 CPU_SUBTYPE_POWERPC_ALL cpusubtype --- struct | ------------------------------------- --- fat_arch | 0x00001000 4096 bytes offset --- | ------------------------------------- --- | 0x00004224 16932 bytes size --- | ------------------------------------- --- | 0x0000000c 2^12 = 4096 bytes align --- --------------------------------------- --- --------------------------------------- --- | 0x00000007 CPU_TYPE_I386 cputype --- | ------------------------------------- --- | 0x00000003 CPU_SUBTYPE_I386_ALL cpusubtype --- struct | ------------------------------------- --- fat_arch | 0x00006000 24576 bytes offset --- | ------------------------------------- --- | 0x0000292c 10540 bytes size --- | ------------------------------------- --- | 0x0000000c 2^12 = 4096 bytes align --- --------------------------------------- --- Unused --- 0x00001000 --------------------------------------- --- | 0xfeedface MH_MAGIC magic --- | ------------------------------------ --- | 0x00000012 CPU_TYPE_POWERPC cputype --- | ------------------------------------ --- struct | 0x00000000 CPU_SUBTYPE_POWERPC_ALL cpusubtype --- mach_header | ------------------------------------ --- | 0x00000002 MH_EXECUTE filetype --- | ------------------------------------ --- | 0x0000000b 10 load commands ncmds --- | ------------------------------------ --- | 0x00000574 1396 bytes sizeofcmds --- | ------------------------------------ --- | 0x00000085 DYLDLINK TWOLEVEL flags --- -------------------------------------- --- Load commands --- --------------------------------------- --- Data --- --------------------------------------- --- --- < x86 executable > --- --- 1. https://github.com/aidansteele/osx-abi-macho-file-format-reference --- 2. https://github.com/apple-oss-distributions/xnu/blob/xnu-10002.1.13/EXTERNAL_HEADERS/mach-o/nlist.h --- 3. https://github.com/apple-oss-distributions/xnu/blob/xnu-10002.1.13/EXTERNAL_HEADERS/mach-o/fat.h --- 4. https://developer.apple.com/documentation/apple-silicon/addressing-architectural-differences-in-your-macos-code --- 5. https://reverseengineering.stackexchange.com/a/6357/46029 --- 6. http://formats.kaitai.io/mach_o/index.html --- -- Using the same declarations as defined in <src/jit/bcsave.lua>. ffi.cdef[[ typedef struct @@ -118,13 +20,6 @@ typedef struct mach_header; uint32_t reserved; } mach_header_64; -typedef struct { - uint32_t cmd, cmdsize; - char segname[16]; - uint32_t vmaddr, vmsize, fileoff, filesize; - uint32_t maxprot, initprot, nsects, flags; -} mach_segment_command; - typedef struct { uint32_t cmd, cmdsize; char segname[16]; @@ -132,13 +27,6 @@ typedef struct { uint32_t maxprot, initprot, nsects, flags; } mach_segment_command_64; -typedef struct { - char sectname[16], segname[16]; - uint32_t addr, size; - uint32_t offset, align, reloff, nreloc, flags; - uint32_t reserved1, reserved2; -} mach_section; - typedef struct { char sectname[16], segname[16]; uint64_t addr, size; @@ -150,13 +38,6 @@ typedef struct { uint32_t cmd, cmdsize, symoff, nsyms, stroff, strsize; } mach_symtab_command; -typedef struct { - int32_t strx; - uint8_t type, sect; - int16_t desc; - uint32_t value; -} mach_nlist; - typedef struct { int32_t strx; uint8_t type, sect; @@ -164,41 +45,14 @@ typedef struct { uint64_t value; } mach_nlist_64; -typedef struct -{ - int32_t magic, nfat_arch; -} mach_fat_header; - -typedef struct -{ - int32_t cputype, cpusubtype, offset, size, align; -} mach_fat_arch; - -typedef struct { - mach_fat_header fat; - mach_fat_arch fat_arch[2]; - struct { - mach_header hdr; - mach_segment_command seg; - mach_section sec; - mach_symtab_command sym; - } arch[2]; - mach_nlist sym_entry; - uint8_t space[4096]; -} mach_fat_obj; - typedef struct { - mach_fat_header fat; - mach_fat_arch fat_arch[2]; - struct { - mach_header_64 hdr; - mach_segment_command_64 seg; - mach_section_64 sec; - mach_symtab_command sym; - } arch[2]; + mach_header_64 hdr; + mach_segment_command_64 seg; + mach_section_64 sec; + mach_symtab_command sym; mach_nlist_64 sym_entry; uint8_t space[4096]; -} mach_fat_obj_64; +} mach_obj_64; ]] local function create_obj_file(name, arch) @@ -212,108 +66,37 @@ local function create_obj_file(name, arch) return mach_o_path end --- Parses a buffer in the Mach-O format and returns the FAT magic --- number and `nfat_arch`. +-- Parses a buffer in the Mach-O format and returns its fields +-- in a table. local function read_mach_o(buf, hw_arch) - local res = { - header = { - magic = 0, - nfat_arch = 0, - }, - fat_arch = {}, - } - local is64 = hw_arch == 'arm64' - -- Mach-O FAT object. - local mach_fat_obj_type = ffi.typeof(is64 and - 'mach_fat_obj_64 *' or - 'mach_fat_obj *') - local obj = ffi.cast(mach_fat_obj_type, buf) + -- Mach-O object. + local mach_obj_type = ffi.typeof(is64 and 'mach_obj_64 *') + local obj = ffi.cast(mach_obj_type, buf) - -- Mach-O FAT object header. - local mach_fat_header = obj.fat - -- Mach-O FAT is BE, target arch is LE. - local be32 = bit.bswap - res.header.magic = be32(mach_fat_header.magic) - res.header.nfat_arch = be32(mach_fat_header.nfat_arch) + -- Mach-O object header. + local mach_header = obj.hdr - -- Mach-O FAT object arches. - for i = 0, res.header.nfat_arch - 1 do - local fat_arch = obj.fat_arch[i] - local arch = { - cputype = be32(fat_arch.cputype), - cpusubtype = be32(fat_arch.cpusubtype), - } - table.insert(res.fat_arch, arch) - end - - return res + return { + header = { + magic = mach_header.magic, + cputype = mach_header.cputype, + cpusubtype = mach_header.cpusubtype, + filetype = mach_header.filetype, + ncmds = mach_header.ncmds, + }, + } end --- Universal Binary can contain executables for more than one --- CPU architecture. For simplicity, the test compares the *sum* --- of CPU types and CPU subtypes. --- --- <src/jit/bcsave.lua:bcsave_machobj> has the definitions of the --- numbers below. The original XNU source code may be found in --- <osfmk/mach/machine.h> [1]. --- --- 1. https://opensource.apple.com/source/xnu/xnu-4570.41.2/osfmk/mach/machine.h.auto.html --- -local SUM_CPUTYPE = { - -- x86 + arm. - arm = 7 + 12, - -- x64 + arm64. - arm64 = 0x01000007 + 0x0100000c, -} -local SUM_CPUSUBTYPE = { - -- x86 + arm. - arm = 3 + 9, - -- x64 + arm64. - arm64 = 3 + 0, -} - --- The function builds Mach-O FAT object file and retrieves --- its header fields (magic and nfat_arch) and fields of each arch --- (cputype, cpusubtype). --- --- The Mach-O FAT object header can be retrieved with `otool` on --- macOS: --- --- $ otool -f empty.o --- Fat headers --- fat_magic 0xcafebabe --- nfat_arch 2 --- <snipped> --- --- CPU type and subtype can be retrieved with `lipo` on macOS: --- --- $ luajit -b -o osx -a arm empty.lua empty.o --- $ lipo -archs empty.o --- i386 armv7 --- $ luajit -b -o osx -a arm64 empty.lua empty.o --- $ lipo -archs empty.o --- x86_64 arm64 +-- The function builds Mach-O object file and retrieves +-- its header fields. local function build_and_check_mach_o(subtest) local hw_arch = subtest.name - assert(hw_arch == 'arm' or hw_arch == 'arm64') + -- LuaJIT always generate 64-bit non-FAT Mach-O object files. + assert(hw_arch == 'arm64') - subtest:plan(4) - -- FAT_MAGIC is an integer containing the value 0xCAFEBABE in - -- big-endian byte order format. On a big-endian host CPU, - -- this can be validated using the constant FAT_MAGIC; - -- on a little-endian host CPU, it can be validated using - -- the constant FAT_CIGAM. - -- - -- FAT_NARCH is an integer specifying the number of fat_arch - -- data structures that follow. This is the number of - -- architectures contained in this binary. - -- - -- See the aforementioned "OS X ABI Mach-O File Format - -- Reference". - local FAT_MAGIC = '0xffffffffcafebabe' - local FAT_NARCH = 2 + subtest:plan(5) local MODULE_NAME = 'lango_team' @@ -327,24 +110,19 @@ local function build_and_check_mach_o(subtest) assert(os.remove(mach_o_obj_path), 'remove an object file') local magic_str = string.format('%#x', mach_o.header.magic) - subtest:is(magic_str, FAT_MAGIC, - 'fat_magic is correct in Mach-O') - subtest:is(mach_o.header.nfat_arch, FAT_NARCH, - 'nfat_arch is correct in Mach-O') - - local total_cputype = 0 - local total_cpusubtype = 0 - for i = 1, FAT_NARCH do - total_cputype = total_cputype + mach_o.fat_arch[i].cputype - total_cpusubtype = total_cpusubtype + mach_o.fat_arch[i].cpusubtype - end - subtest:is(total_cputype, SUM_CPUTYPE[hw_arch], + subtest:is(magic_str, '0xfeedfacf', + 'magic is correct in Mach-O') + local cputype_str = string.format('%#x', mach_o.header.cputype) + subtest:is(cputype_str, '0x100000c', 'cputype is correct in Mach-O') - subtest:is(total_cpusubtype, SUM_CPUSUBTYPE[hw_arch], + subtest:is(mach_o.header.cpusubtype, 0, 'cpusubtype is correct in Mach-O') + subtest:is(mach_o.header.filetype, 1, + 'filetype is correct in Mach-O') + subtest:is(mach_o.header.ncmds, 2, + 'ncmds is correct in Mach-O') end -test:test('arm', build_and_check_mach_o) test:test('arm64', build_and_check_mach_o) test:done(true) -- 2.34.1
next prev parent reply other threads:[~2024-07-05 13:11 UTC|newest] Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-07-05 13:10 [Tarantool-patches] [PATCH luajit 0/2] Fixes in Mach-O generation code Sergey Bronnikov via Tarantool-patches 2024-07-05 13:10 ` [Tarantool-patches] [PATCH luajit 1/2] Fix typo Sergey Bronnikov via Tarantool-patches 2024-07-09 12:15 ` Sergey Kaplun via Tarantool-patches 2024-07-11 9:15 ` Maxim Kokryashkin via Tarantool-patches 2024-07-05 13:10 ` Sergey Bronnikov via Tarantool-patches [this message] 2024-07-09 13:03 ` [Tarantool-patches] [PATCH luajit 2/2] OSX/iOS: Always generate 64 bit non-FAT Mach-O object files Sergey Kaplun via Tarantool-patches 2024-07-10 12:43 ` Sergey Bronnikov via Tarantool-patches 2024-07-11 7:29 ` Sergey Kaplun via Tarantool-patches 2024-07-23 20:07 ` Sergey Bronnikov via Tarantool-patches 2024-08-19 9:45 ` Maxim Kokryashkin via Tarantool-patches 2024-08-20 8:57 ` Sergey Bronnikov via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=10ed208fcfacfa4c772f1cebe090595af3452ff3.1720182442.git.sergeyb@tarantool.org \ --to=tarantool-patches@dev.tarantool.org \ --cc=estetus@gmail.com \ --cc=m.kokryashkin@tarantool.org \ --cc=skaplun@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH luajit 2/2] OSX/iOS: Always generate 64 bit non-FAT Mach-O object files.' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox