01f435caf2306e9d8681f06bbcd909941acc9f99
[u/mrichter/AliRoot.git] / Vc / cmake / OptimizeForArchitecture.cmake
1 get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH)
2 include("${_currentDir}/AddCompilerFlag.cmake")
3
4 macro(_my_find _list _value _ret)
5    list(FIND ${_list} "${_value}" _found)
6    if(_found EQUAL -1)
7       set(${_ret} FALSE)
8    else(_found EQUAL -1)
9       set(${_ret} TRUE)
10    endif(_found EQUAL -1)
11 endmacro(_my_find)
12
13 macro(AutodetectHostArchitecture)
14    set(TARGET_ARCHITECTURE "generic")
15    set(Vc_ARCHITECTURE_FLAGS)
16    set(_vendor_id)
17    set(_cpu_family)
18    set(_cpu_model)
19    if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
20       file(READ "/proc/cpuinfo" _cpuinfo)
21       string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}")
22       string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}")
23       string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}")
24       string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_flags "${_cpuinfo}")
25    elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
26       exec_program("/usr/sbin/sysctl -n machdep.cpu.vendor" OUTPUT_VARIABLE _vendor_id)
27       exec_program("/usr/sbin/sysctl -n machdep.cpu.model"  OUTPUT_VARIABLE _cpu_model)
28       exec_program("/usr/sbin/sysctl -n machdep.cpu.family" OUTPUT_VARIABLE _cpu_family)
29       exec_program("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE _cpu_flags)
30       string(TOLOWER "${_cpu_flags}" _cpu_flags)
31       string(REPLACE "." "_" _cpu_flags "${_cpu_flags}")
32    elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
33       get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE)
34       get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE)
35       mark_as_advanced(_vendor_id _cpu_id)
36       string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}")
37       string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}")
38    endif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
39    if(_vendor_id STREQUAL "GenuineIntel")
40       if(_cpu_family EQUAL 6)
41          # Any recent Intel CPU except NetBurst
42          if(_cpu_model EQUAL 46)     # Xeon 7500 series
43             set(TARGET_ARCHITECTURE "westmere")
44          elseif(_cpu_model EQUAL 45) # Xeon TNG
45             set(TARGET_ARCHITECTURE "sandy-bridge")
46          elseif(_cpu_model EQUAL 44) # Xeon 5600 series
47             set(TARGET_ARCHITECTURE "westmere")
48          elseif(_cpu_model EQUAL 42) # Core TNG
49             set(TARGET_ARCHITECTURE "sandy-bridge")
50          elseif(_cpu_model EQUAL 37) # Core i7/i5/i3
51             set(TARGET_ARCHITECTURE "westmere")
52          elseif(_cpu_model EQUAL 31) # Core i7/i5
53             set(TARGET_ARCHITECTURE "westmere")
54          elseif(_cpu_model EQUAL 30) # Core i7/i5
55             set(TARGET_ARCHITECTURE "westmere")
56          elseif(_cpu_model EQUAL 29)
57             set(TARGET_ARCHITECTURE "penryn")
58          elseif(_cpu_model EQUAL 28)
59             set(TARGET_ARCHITECTURE "atom")
60          elseif(_cpu_model EQUAL 26)
61             set(TARGET_ARCHITECTURE "nehalem")
62          elseif(_cpu_model EQUAL 23)
63             set(TARGET_ARCHITECTURE "penryn")
64          elseif(_cpu_model EQUAL 15)
65             set(TARGET_ARCHITECTURE "merom")
66          elseif(_cpu_model EQUAL 14)
67             set(TARGET_ARCHITECTURE "core")
68          elseif(_cpu_model LESS 14)
69             message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the generic CPU settings with SSE2.")
70             set(TARGET_ARCHITECTURE "generic")
71          else()
72             message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.")
73             set(TARGET_ARCHITECTURE "merom")
74          endif()
75       elseif(_cpu_family EQUAL 7) # Itanium (not supported)
76          message(WARNING "Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.")
77       elseif(_cpu_family EQUAL 15) # NetBurst
78          list(APPEND _available_vector_units_list "sse" "sse2")
79          if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead
80             list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
81          endif(_cpu_model GREATER 2)
82       endif(_cpu_family EQUAL 6)
83    elseif(_vendor_id STREQUAL "AuthenticAMD")
84       if(_cpu_family EQUAL 21) # 15h
85          set(TARGET_ARCHITECTURE "bulldozer")
86       elseif(_cpu_family EQUAL 20) # 14h
87       elseif(_cpu_family EQUAL 18) # 12h
88       elseif(_cpu_family EQUAL 16) # 10h
89          set(TARGET_ARCHITECTURE "barcelona")
90       elseif(_cpu_family EQUAL 15)
91          set(TARGET_ARCHITECTURE "k8")
92          if(_cpu_model GREATER 64) # I don't know the right number to put here. This is just a guess from the hardware I have access to
93             set(TARGET_ARCHITECTURE "k8-sse3")
94          endif(_cpu_model GREATER 64)
95       endif()
96    endif(_vendor_id STREQUAL "GenuineIntel")
97 endmacro()
98
99 macro(OptimizeForArchitecture)
100    set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\".")
101    set(_force)
102    if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}")
103       message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"")
104       set(_force FORCE)
105    endif()
106    set(_last_target_arch "${TARGET_ARCHITECTURE}" CACHE STRING "" FORCE)
107    mark_as_advanced(_last_target_arch)
108    string(TOLOWER "${TARGET_ARCHITECTURE}" TARGET_ARCHITECTURE)
109
110    set(_march_flag_list)
111    set(_available_vector_units_list)
112
113    if(TARGET_ARCHITECTURE STREQUAL "auto")
114       AutodetectHostArchitecture()
115       message(STATUS "Detected CPU: ${TARGET_ARCHITECTURE}")
116    endif(TARGET_ARCHITECTURE STREQUAL "auto")
117
118    if(TARGET_ARCHITECTURE STREQUAL "core")
119       list(APPEND _march_flag_list "core2")
120       list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
121    elseif(TARGET_ARCHITECTURE STREQUAL "merom")
122       list(APPEND _march_flag_list "merom")
123       list(APPEND _march_flag_list "core2")
124       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
125    elseif(TARGET_ARCHITECTURE STREQUAL "penryn")
126       list(APPEND _march_flag_list "penryn")
127       list(APPEND _march_flag_list "core2")
128       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
129       message(STATUS "Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.")
130       if(_cpu_flags MATCHES "sse4_1")
131          message(STATUS "SSE4.1: enabled (auto-detected from this computer's CPU flags)")
132          list(APPEND _available_vector_units_list "sse4.1")
133       else()
134          message(STATUS "SSE4.1: disabled (auto-detected from this computer's CPU flags)")
135       endif()
136    elseif(TARGET_ARCHITECTURE STREQUAL "nehalem")
137       list(APPEND _march_flag_list "nehalem")
138       list(APPEND _march_flag_list "corei7")
139       list(APPEND _march_flag_list "core2")
140       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
141    elseif(TARGET_ARCHITECTURE STREQUAL "westmere")
142       list(APPEND _march_flag_list "westmere")
143       list(APPEND _march_flag_list "corei7")
144       list(APPEND _march_flag_list "core2")
145       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
146    elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge")
147       list(APPEND _march_flag_list "sandybridge")
148       list(APPEND _march_flag_list "corei7-avx")
149       list(APPEND _march_flag_list "core2")
150       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx")
151    elseif(TARGET_ARCHITECTURE STREQUAL "atom")
152       list(APPEND _march_flag_list "atom")
153       list(APPEND _march_flag_list "core2")
154       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
155    elseif(TARGET_ARCHITECTURE STREQUAL "k8")
156       list(APPEND _march_flag_list "k8")
157       list(APPEND _available_vector_units_list "sse" "sse2")
158    elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3")
159       list(APPEND _march_flag_list "k8-sse3")
160       list(APPEND _march_flag_list "k8")
161       list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
162    elseif(TARGET_ARCHITECTURE STREQUAL "interlagos")
163       list(APPEND _march_flag_list "bulldozer")
164       list(APPEND _march_flag_list "barcelona")
165       list(APPEND _march_flag_list "core2")
166       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
167    elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer")
168       list(APPEND _march_flag_list "bulldozer")
169       list(APPEND _march_flag_list "barcelona")
170       list(APPEND _march_flag_list "core2")
171       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
172    elseif(TARGET_ARCHITECTURE STREQUAL "barcelona")
173       list(APPEND _march_flag_list "barcelona")
174       list(APPEND _march_flag_list "core2")
175       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
176    elseif(TARGET_ARCHITECTURE STREQUAL "istanbul")
177       list(APPEND _march_flag_list "barcelona")
178       list(APPEND _march_flag_list "core2")
179       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
180    elseif(TARGET_ARCHITECTURE STREQUAL "magny-cours")
181       list(APPEND _march_flag_list "barcelona")
182       list(APPEND _march_flag_list "core2")
183       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
184    elseif(TARGET_ARCHITECTURE STREQUAL "generic")
185       list(APPEND _march_flag_list "generic")
186    else(TARGET_ARCHITECTURE STREQUAL "core")
187       message(FATAL_ERROR "Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.")
188    endif(TARGET_ARCHITECTURE STREQUAL "core")
189
190    set(_disable_vector_unit_list)
191    set(_enable_vector_unit_list)
192    _my_find(_available_vector_units_list "sse2" SSE2_FOUND)
193    _my_find(_available_vector_units_list "sse3" SSE3_FOUND)
194    _my_find(_available_vector_units_list "ssse3" SSSE3_FOUND)
195    _my_find(_available_vector_units_list "sse4.1" SSE4_1_FOUND)
196    _my_find(_available_vector_units_list "sse4.2" SSE4_2_FOUND)
197    _my_find(_available_vector_units_list "sse4a" SSE4a_FOUND)
198    if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN)
199       UserWarning("AVX disabled per default because of old/broken compiler")
200       set(AVX_FOUND false)
201       set(XOP_FOUND false)
202       set(FMA4_FOUND false)
203    else()
204       _my_find(_available_vector_units_list "avx" AVX_FOUND)
205       _my_find(_available_vector_units_list "xop" XOP_FOUND)
206       _my_find(_available_vector_units_list "fma4" FMA4_FOUND)
207    endif()
208    set(USE_SSE2   ${SSE2_FOUND}   CACHE BOOL "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." ${_force})
209    set(USE_SSE3   ${SSE3_FOUND}   CACHE BOOL "Use SSE3. If SSE3 instructions are not enabled they will be emulated." ${_force})
210    set(USE_SSSE3  ${SSSE3_FOUND}  CACHE BOOL "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." ${_force})
211    set(USE_SSE4_1 ${SSE4_1_FOUND} CACHE BOOL "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." ${_force})
212    set(USE_SSE4_2 ${SSE4_2_FOUND} CACHE BOOL "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." ${_force})
213    set(USE_SSE4a  ${SSE4a_FOUND}  CACHE BOOL "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." ${_force})
214    set(USE_AVX    ${AVX_FOUND}    CACHE BOOL "Use AVX. This will double some of the vector sizes relative to SSE." ${_force})
215    set(USE_XOP    ${XOP_FOUND}    CACHE BOOL "Use XOP." ${_force})
216    set(USE_FMA4   ${FMA4_FOUND}   CACHE BOOL "Use FMA4." ${_force})
217    mark_as_advanced(USE_SSE2 USE_SSE3 USE_SSSE3 USE_SSE4_1 USE_SSE4_2 USE_SSE4a USE_AVX USE_XOP USE_FMA4)
218    if(USE_SSE2)
219       list(APPEND _enable_vector_unit_list "sse2")
220    else(USE_SSE2)
221       list(APPEND _disable_vector_unit_list "sse2")
222    endif(USE_SSE2)
223    if(USE_SSE3)
224       list(APPEND _enable_vector_unit_list "sse3")
225    else(USE_SSE3)
226       list(APPEND _disable_vector_unit_list "sse3")
227    endif(USE_SSE3)
228    if(USE_SSSE3)
229       list(APPEND _enable_vector_unit_list "ssse3")
230    else(USE_SSSE3)
231       list(APPEND _disable_vector_unit_list "ssse3")
232    endif(USE_SSSE3)
233    if(USE_SSE4_1)
234       list(APPEND _enable_vector_unit_list "sse4.1")
235    else(USE_SSE4_1)
236       list(APPEND _disable_vector_unit_list "sse4.1")
237    endif(USE_SSE4_1)
238    if(USE_SSE4_2)
239       list(APPEND _enable_vector_unit_list "sse4.2")
240    else(USE_SSE4_2)
241       list(APPEND _disable_vector_unit_list "sse4.2")
242    endif(USE_SSE4_2)
243    if(USE_SSE4a)
244       list(APPEND _enable_vector_unit_list "sse4a")
245    else(USE_SSE4a)
246       list(APPEND _disable_vector_unit_list "sse4a")
247    endif(USE_SSE4a)
248    if(USE_AVX)
249       list(APPEND _enable_vector_unit_list "avx")
250       # we want SSE intrinsics to result in instructions using the VEX prefix.
251       # Otherwise integer ops (which require the older SSE intrinsics) would
252       # always have a large penalty.
253       list(APPEND _enable_vector_unit_list "sse2avx")
254    else(USE_AVX)
255       list(APPEND _disable_vector_unit_list "avx")
256    endif(USE_AVX)
257    if(USE_XOP)
258       list(APPEND _enable_vector_unit_list "xop")
259    else()
260       list(APPEND _disable_vector_unit_list "xop")
261    endif()
262    if(USE_FMA4)
263       list(APPEND _enable_vector_unit_list "fma4")
264    else()
265       list(APPEND _disable_vector_unit_list "fma4")
266    endif()
267    if(MSVC)
268       # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX)
269       # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010)
270       _my_find(_enable_vector_unit_list "avx" _avx)
271       set(_avx_flag FALSE)
272       if(_avx)
273          AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _avx_flag)
274       endif()
275       if(NOT _avx_flag)
276          _my_find(_enable_vector_unit_list "sse2" _found)
277          if(_found)
278             AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
279          endif()
280       endif()
281       foreach(_flag ${_enable_vector_unit_list})
282          string(TOUPPER "${_flag}" _flag)
283          string(REPLACE "." "_" _flag "__${_flag}__")
284          add_definitions("-D${_flag}")
285       endforeach(_flag)
286    elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux)
287       _my_find(_available_vector_units_list "avx"    _found)
288       if(_found)
289          AddCompilerFlag("-xAVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
290       else(_found)
291          _my_find(_available_vector_units_list "sse4.2" _found)
292          if(_found)
293             AddCompilerFlag("-xSSE4.2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
294          else(_found)
295             _my_find(_available_vector_units_list "sse4.1" _found)
296             if(_found)
297                AddCompilerFlag("-xSSE4.1" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
298             else(_found)
299                _my_find(_available_vector_units_list "ssse3"  _found)
300                if(_found)
301                   AddCompilerFlag("-xSSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
302                else(_found)
303                   _my_find(_available_vector_units_list "sse3"   _found)
304                   if(_found)
305                      # If the target host is an AMD machine then we still want to use -xSSE2 because the binary would refuse to run at all otherwise
306                      _my_find(_march_flag_list "barcelona" _found)
307                      if(NOT _found)
308                         _my_find(_march_flag_list "k8-sse3" _found)
309                      endif(NOT _found)
310                      if(_found)
311                         AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
312                      else(_found)
313                         AddCompilerFlag("-xSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
314                      endif(_found)
315                   else(_found)
316                      _my_find(_available_vector_units_list "sse2"   _found)
317                      if(_found)
318                         AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
319                      endif(_found)
320                   endif(_found)
321                endif(_found)
322             endif(_found)
323          endif(_found)
324       endif(_found)
325    else() # not MSVC and not ICC => GCC, Clang, Open64
326       foreach(_flag ${_march_flag_list})
327          AddCompilerFlag("-march=${_flag}" CXX_RESULT _good CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
328          if(_good)
329             break()
330          endif(_good)
331       endforeach(_flag)
332       foreach(_flag ${_enable_vector_unit_list})
333          AddCompilerFlag("-m${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
334       endforeach(_flag)
335       foreach(_flag ${_disable_vector_unit_list})
336          AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
337       endforeach(_flag)
338    endif()
339 endmacro(OptimizeForArchitecture)