From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp1.mail.ru (smtp1.mail.ru [94.100.179.111]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 6E01042F4AD for ; Thu, 25 Jun 2020 12:45:17 +0300 (MSK) From: "Timur Safin" References: <20200312100549.31608-1-arkholga@tarantool.org> <20200616010232.3j2zvwo7z6lmnody@tkn_work_nb> In-Reply-To: <20200616010232.3j2zvwo7z6lmnody@tkn_work_nb> Date: Thu, 25 Jun 2020 12:45:14 +0300 Message-ID: <147101d64ad5$54139fb0$fc3adf10$@tarantool.org> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Content-Language: ru Subject: Re: [Tarantool-patches] [PATCH] cmake: add LTO support for building luajit List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: 'Alexander Turenko' , 'Olga Arkhangelskaia' Cc: tarantool-patches@dev.tarantool.org : From: Tarantool-patches = On : Subject: Re: [Tarantool-patches] [PATCH] cmake: add LTO support for : building luajit :=20 : I vote to use the same build tools and flags that CMake uses to build : files under its control, because otherwise we would write the same = logic : again. It may be not as accurate as one that CMake provides (see = example : below). It may become outdated in a future. And, last but not least, : duplicated code is painful to maintain. :=20 I've looked into cmake IPO implementation for different compilers, and = essentially they set these 5 variables if asked for IPO: CMAKE_${lang}_COMPILE_OPTIONS_IPO CMAKE_${lang}_LINK_OPTIONS_IPO CMAKE_${lang}_ARCHIVE_CREATE_IPO CMAKE_${lang}_ARCHIVE_APPEND_IPO CMAKE_${lang}_ARCHIVE_FINISH_IPO For C, CXX and ASM as ${lang} So indeed, propagating those relevant variables down to luajit make = variables=20 seems adequate approach and flexible enough to all supported by cmake = compilers : I think it would be good to expose related build tool names and flags : from cmake/lto.cmake and use them in cmake/luajit.cmake. I implemented : the former part (I would even left STATUS messages as is, they provide : useful information): :=20 : | diff --git a/cmake/lto.cmake b/cmake/lto.cmake : | index 95ade75f4..79b908e26 100644 : | --- a/cmake/lto.cmake : | +++ b/cmake/lto.cmake : | @@ -90,8 +90,40 @@ if (NOT TARGET_OS_DARWIN) : | endif() : | endif() : | : | -# gh-3742: investigate LTO warnings. : | -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} = -Wno-lto-type- : mismatch") : | +# {{{ Expose build tools and flags : | +# : | +# It is convenient for building non-cmake targets with the same : | +# flags as we use for sources under CMake control. : | +# : | +# It leans on uncodumented variables that are set in the following : | +# CMake modules: Compiler/GNU.cmake and Compiler/Clang.cmake. : | + : | +# CFLAGS_LTO (list) : | +set(CFLAGS_LTO ${CMAKE_C_COMPILE_OPTIONS_IPO}) : | +message(STATUS "CFLAGS_LTO: ${CFLAGS_LTO}") : | : | +# LDFLAGS_LTO (list) : | +set(LDFLAGS_LTO ${CMAKE_C_LINK_OPTIONS_IPO}) : | +# FIXME: gh-3742: investigate LTO warnings. : | +list(APPEND LDFLAGS_LTO -Wno-lto-type-mismatch) : | +message(STATUS "LDFLAGS_LTO: ${LDFLAGS_LTO}") : | + : | +# AR_LTO (string) : | +# : | +# Note: Platform/Linux-Intel.cmake and Platform/Windows-MSVC.cmake : | +# set CMAKE_C_CREATE_STATIC_LIBRARY_IPO, but not : | +# CMAKE_C_ARCHIVE_CREATE_IPO. So this snippet is only for GCC and : | +# clang. : | +set(_ar_command ${CMAKE_C_ARCHIVE_CREATE_IPO}) : | +separate_arguments(_ar_command) : | +list(GET _ar_command 0 AR_LTO) : | +unset(_ar_command) : | +message(STATUS "AR_LTO: ${AR_LTO}") : | + : | +# }}} : | + : | +# Set build tools and flags for files that are built using CMake. : | +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} = -Wno-lto-type- : mismatch") : | set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) : | + : | message(STATUS "Enabling LTO: TRUE") :=20 : Olga, Igor, what do you think about this way? Looks like this is already applied to the branch, and works the=20 as expected. Here is the theory how to make sure that .o files contain necessary information for inter-procedure optimizations: 1. For clang it's simple - .o is not ELF file, but rather LLVM IR (bc) = file ``` tsafin@M1BOOK6319:~/tarantool/build_lto$ file = ./third_party/luajit/src/lj_opt_sink.o ./third_party/luajit/src/lj_opt_sink.o: LLVM IR bitcode ``` we are ok then 2. For gcc it's more complicated - .o file is still ELF file but should contain GIMPLE IR code in the special sections .gnu.lto.* ``` tsafin@M1BOOK6319:~/tarantool/build_lto$ file = ./third_party/luajit/src/lj_trace.o ./third_party/luajit/src/lj_trace.o: ELF 64-bit LSB relocatable, x86-64, = version 1 (SYSV), not stripped tsafin@M1BOOK6319:~/tarantool/build_lto$ readelf -a = ./third_party/luajit/src/lj_trace.o ELF Header: Magic: 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 Class: ELF64 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: REL (Relocatable file) Machine: Advanced Micro Devices X86-64 Version: 0x1 Entry point address: 0x0 Start of program headers: 0 (bytes into file) Start of section headers: 82944 (bytes into file) Flags: 0x0 Size of this header: 64 (bytes) Size of program headers: 0 (bytes) Number of program headers: 0 Size of section headers: 64 (bytes) Number of section headers: 48 Section header string table index: 47 Section Headers: [Nr] Name Type Address Offset Size EntSize Flags Link Info Align [ 0] NULL 0000000000000000 00000000 0000000000000000 0000000000000000 0 0 0 [ 1] .text PROGBITS 0000000000000000 00000040 0000000000000000 0000000000000000 AX 0 0 1 [ 2] .data PROGBITS 0000000000000000 00000040 0000000000000000 0000000000000000 WA 0 0 1 [ 3] .bss NOBITS 0000000000000000 00000040 0000000000000000 0000000000000000 WA 0 0 1 [ 4] .gnu.lto_.profile PROGBITS 0000000000000000 00000040 000000000000000e 0000000000000000 E 0 0 1 [ 5] .gnu.lto_.icf.bee PROGBITS 0000000000000000 0000004e 00000000000000ca 0000000000000000 E 0 0 1 [ 6] .gnu.lto_.jmpfunc PROGBITS 0000000000000000 00000118 00000000000004fb 0000000000000000 E 0 0 1 [ 7] .gnu.lto_.inline. PROGBITS 0000000000000000 00000613 0000000000000524 0000000000000000 E 0 0 1 [ 8] .gnu.lto_.purecon PROGBITS 0000000000000000 00000b37 000000000000004b 0000000000000000 E 0 0 1 [ 9] .gnu.lto_penalty_ PROGBITS 0000000000000000 00000b82 0000000000000684 0000000000000000 E 0 0 1 [10] .gnu.lto_trace_fi PROGBITS 0000000000000000 00001206 0000000000000595 0000000000000000 E 0 0 1 [11] .gnu.lto_trace_sa PROGBITS 0000000000000000 0000179b 0000000000000611 0000000000000000 E 0 0 1 [12] .gnu.lto_trace_st PROGBITS 0000000000000000 00001dac 0000000000000d49 0000000000000000 E 0 0 1 [13] .gnu.lto_trace_ex PROGBITS 0000000000000000 00002af5 0000000000000255 0000000000000000 E 0 0 1 [14] .gnu.lto_trace_un PROGBITS 0000000000000000 00002d4a 00000000000004f9 0000000000000000 E 0 0 1 [15] .gnu.lto_trace_fl PROGBITS 0000000000000000 00003243 000000000000050c 0000000000000000 E 0 0 1 [16] .gnu.lto_trace_ex PROGBITS 0000000000000000 0000374f 00000000000004cc 0000000000000000 E 0 0 1 [17] .gnu.lto_lj_trace PROGBITS 0000000000000000 00003c1b 000000000000031f 0000000000000000 E 0 0 1 [18] .gnu.lto_lj_trace PROGBITS 0000000000000000 00003f3a 0000000000000291 0000000000000000 E 0 0 1 [19] .gnu.lto_lj_trace PROGBITS 0000000000000000 000041cb 000000000000024a 0000000000000000 E 0 0 1 [20] .gnu.lto_lj_trace PROGBITS 0000000000000000 00004415 000000000000047e 0000000000000000 E 0 0 1 [21] .gnu.lto_lj_trace PROGBITS 0000000000000000 00004893 0000000000000429 0000000000000000 E 0 0 1 [22] .gnu.lto_lj_trace PROGBITS 0000000000000000 00004cbc 00000000000004a7 0000000000000000 E 0 0 1 [23] .gnu.lto_lj_trace PROGBITS 0000000000000000 00005163 00000000000001f3 0000000000000000 E 0 0 1 [24] .gnu.lto_lj_trace PROGBITS 0000000000000000 00005356 00000000000002a9 0000000000000000 E 0 0 1 [25] .gnu.lto_lj_trace PROGBITS 0000000000000000 000055ff 0000000000000840 0000000000000000 E 0 0 1 [26] .gnu.lto_trace_st PROGBITS 0000000000000000 00005e3f 0000000000000be5 0000000000000000 E 0 0 1 [27] .gnu.lto_trace_st PROGBITS 0000000000000000 00006a24 000000000000030c 0000000000000000 E 0 0 1 [28] .gnu.lto_trace_ab PROGBITS 0000000000000000 00006d30 0000000000001335 0000000000000000 E 0 0 1 [29] .gnu.lto_trace_st PROGBITS 0000000000000000 00008065 0000000000000f5b 0000000000000000 E 0 0 1 [30] .gnu.lto_lj_trace PROGBITS 0000000000000000 00008fc0 0000000000000387 0000000000000000 E 0 0 1 [31] .gnu.lto_lj_trace PROGBITS 0000000000000000 00009347 000000000000055c 0000000000000000 E 0 0 1 [32] .gnu.lto_lj_trace PROGBITS 0000000000000000 000098a3 00000000000003ed 0000000000000000 E 0 0 1 [33] .gnu.lto_trace_ho PROGBITS 0000000000000000 00009c90 000000000000051c 0000000000000000 E 0 0 1 [34] .gnu.lto_lj_trace PROGBITS 0000000000000000 0000a1ac 00000000000001ef 0000000000000000 E 0 0 1 [35] .gnu.lto_lj_trace PROGBITS 0000000000000000 0000a39b 000000000000041c 0000000000000000 E 0 0 1 [36] .gnu.lto_lj_trace PROGBITS 0000000000000000 0000a7b7 0000000000000251 0000000000000000 E 0 0 1 [37] .gnu.lto_lj_trace PROGBITS 0000000000000000 0000aa08 0000000000000f22 0000000000000000 E 0 0 1 [38] .gnu.lto_.symbol_ PROGBITS 0000000000000000 0000b92a 00000000000003cb 0000000000000000 E 0 0 1 [39] .gnu.lto_.refs.be PROGBITS 0000000000000000 0000bcf5 000000000000001b 0000000000000000 E 0 0 1 [40] .gnu.lto_.decls.b PROGBITS 0000000000000000 0000bd10 000000000000769a 0000000000000000 E 0 0 1 [41] .gnu.lto_.symtab. PROGBITS 0000000000000000 000133aa 0000000000000465 0000000000000000 E 0 0 1 [42] .gnu.lto_.opts PROGBITS 0000000000000000 0001380f 00000000000000d1 0000000000000000 E 0 0 1 ... ``` So it looks LGTM at the moment (once been rebased) Timur