]> git.uio.no Git - u/mrichter/AliRoot.git/blame - Vc/cmake/OptimizeForArchitecture.cmake
Vc package added (version 0.6.79-dev)
[u/mrichter/AliRoot.git] / Vc / cmake / OptimizeForArchitecture.cmake
CommitLineData
f22341db 1get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH)
2include("${_currentDir}/AddCompilerFlag.cmake")
3
4macro(_my_find _list _value _ret)
5 list(FIND ${_list} "${_value}" _found)
6 if(_found EQUAL -1)
7 set(${_ret} FALSE)
8 else(_found EQUAL -1)
9 set(${_ret} TRUE)
10 endif(_found EQUAL -1)
11endmacro(_my_find)
12
13macro(AutodetectHostArchitecture)
14 set(TARGET_ARCHITECTURE "generic")
15 set(Vc_ARCHITECTURE_FLAGS)
16 set(_vendor_id)
17 set(_cpu_family)
18 set(_cpu_model)
19 if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
20 file(READ "/proc/cpuinfo" _cpuinfo)
21 string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}")
22 string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}")
23 string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}")
24 string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_flags "${_cpuinfo}")
25 elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
26 exec_program("/usr/sbin/sysctl -n machdep.cpu.vendor" OUTPUT_VARIABLE _vendor_id)
27 exec_program("/usr/sbin/sysctl -n machdep.cpu.model" OUTPUT_VARIABLE _cpu_model)
28 exec_program("/usr/sbin/sysctl -n machdep.cpu.family" OUTPUT_VARIABLE _cpu_family)
29 exec_program("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE _cpu_flags)
30 string(TOLOWER "${_cpu_flags}" _cpu_flags)
31 string(REPLACE "." "_" _cpu_flags "${_cpu_flags}")
32 elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
33 get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE)
34 get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE)
35 mark_as_advanced(_vendor_id _cpu_id)
36 string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}")
37 string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}")
38 endif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
39 if(_vendor_id STREQUAL "GenuineIntel")
40 if(_cpu_family EQUAL 6)
41 # Any recent Intel CPU except NetBurst
42 if(_cpu_model EQUAL 46) # Xeon 7500 series
43 set(TARGET_ARCHITECTURE "westmere")
44 elseif(_cpu_model EQUAL 45) # Xeon TNG
45 set(TARGET_ARCHITECTURE "sandy-bridge")
46 elseif(_cpu_model EQUAL 44) # Xeon 5600 series
47 set(TARGET_ARCHITECTURE "westmere")
48 elseif(_cpu_model EQUAL 42) # Core TNG
49 set(TARGET_ARCHITECTURE "sandy-bridge")
50 elseif(_cpu_model EQUAL 37) # Core i7/i5/i3
51 set(TARGET_ARCHITECTURE "westmere")
52 elseif(_cpu_model EQUAL 31) # Core i7/i5
53 set(TARGET_ARCHITECTURE "westmere")
54 elseif(_cpu_model EQUAL 30) # Core i7/i5
55 set(TARGET_ARCHITECTURE "westmere")
56 elseif(_cpu_model EQUAL 29)
57 set(TARGET_ARCHITECTURE "penryn")
58 elseif(_cpu_model EQUAL 28)
59 set(TARGET_ARCHITECTURE "atom")
60 elseif(_cpu_model EQUAL 26)
61 set(TARGET_ARCHITECTURE "nehalem")
62 elseif(_cpu_model EQUAL 23)
63 set(TARGET_ARCHITECTURE "penryn")
64 elseif(_cpu_model EQUAL 15)
65 set(TARGET_ARCHITECTURE "merom")
66 elseif(_cpu_model EQUAL 14)
67 set(TARGET_ARCHITECTURE "core")
68 elseif(_cpu_model LESS 14)
69 message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the generic CPU settings with SSE2.")
70 set(TARGET_ARCHITECTURE "generic")
71 else()
72 message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.")
73 set(TARGET_ARCHITECTURE "merom")
74 endif()
75 elseif(_cpu_family EQUAL 7) # Itanium (not supported)
76 message(WARNING "Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.")
77 elseif(_cpu_family EQUAL 15) # NetBurst
78 list(APPEND _available_vector_units_list "sse" "sse2")
79 if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead
80 list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
81 endif(_cpu_model GREATER 2)
82 endif(_cpu_family EQUAL 6)
83 elseif(_vendor_id STREQUAL "AuthenticAMD")
84 if(_cpu_family EQUAL 21) # 15h
85 set(TARGET_ARCHITECTURE "bulldozer")
86 elseif(_cpu_family EQUAL 20) # 14h
87 elseif(_cpu_family EQUAL 18) # 12h
88 elseif(_cpu_family EQUAL 16) # 10h
89 set(TARGET_ARCHITECTURE "barcelona")
90 elseif(_cpu_family EQUAL 15)
91 set(TARGET_ARCHITECTURE "k8")
92 if(_cpu_model GREATER 64) # I don't know the right number to put here. This is just a guess from the hardware I have access to
93 set(TARGET_ARCHITECTURE "k8-sse3")
94 endif(_cpu_model GREATER 64)
95 endif()
96 endif(_vendor_id STREQUAL "GenuineIntel")
97endmacro()
98
99macro(OptimizeForArchitecture)
100 set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\".")
101 set(_force)
102 if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}")
103 message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"")
104 set(_force FORCE)
105 endif()
106 set(_last_target_arch "${TARGET_ARCHITECTURE}" CACHE STRING "" FORCE)
107 mark_as_advanced(_last_target_arch)
108 string(TOLOWER "${TARGET_ARCHITECTURE}" TARGET_ARCHITECTURE)
109
110 set(_march_flag_list)
111 set(_available_vector_units_list)
112
113 if(TARGET_ARCHITECTURE STREQUAL "auto")
114 AutodetectHostArchitecture()
115 message(STATUS "Detected CPU: ${TARGET_ARCHITECTURE}")
116 endif(TARGET_ARCHITECTURE STREQUAL "auto")
117
118 if(TARGET_ARCHITECTURE STREQUAL "core")
119 list(APPEND _march_flag_list "core2")
120 list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
121 elseif(TARGET_ARCHITECTURE STREQUAL "merom")
122 list(APPEND _march_flag_list "merom")
123 list(APPEND _march_flag_list "core2")
124 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
125 elseif(TARGET_ARCHITECTURE STREQUAL "penryn")
126 list(APPEND _march_flag_list "penryn")
127 list(APPEND _march_flag_list "core2")
128 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
129 message(STATUS "Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.")
130 if(_cpu_flags MATCHES "sse4_1")
131 message(STATUS "SSE4.1: enabled (auto-detected from this computer's CPU flags)")
132 list(APPEND _available_vector_units_list "sse4.1")
133 else()
134 message(STATUS "SSE4.1: disabled (auto-detected from this computer's CPU flags)")
135 endif()
136 elseif(TARGET_ARCHITECTURE STREQUAL "nehalem")
137 list(APPEND _march_flag_list "nehalem")
138 list(APPEND _march_flag_list "corei7")
139 list(APPEND _march_flag_list "core2")
140 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
141 elseif(TARGET_ARCHITECTURE STREQUAL "westmere")
142 list(APPEND _march_flag_list "westmere")
143 list(APPEND _march_flag_list "corei7")
144 list(APPEND _march_flag_list "core2")
145 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
146 elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge")
147 list(APPEND _march_flag_list "sandybridge")
148 list(APPEND _march_flag_list "corei7-avx")
149 list(APPEND _march_flag_list "core2")
150 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx")
151 elseif(TARGET_ARCHITECTURE STREQUAL "atom")
152 list(APPEND _march_flag_list "atom")
153 list(APPEND _march_flag_list "core2")
154 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
155 elseif(TARGET_ARCHITECTURE STREQUAL "k8")
156 list(APPEND _march_flag_list "k8")
157 list(APPEND _available_vector_units_list "sse" "sse2")
158 elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3")
159 list(APPEND _march_flag_list "k8-sse3")
160 list(APPEND _march_flag_list "k8")
161 list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
162 elseif(TARGET_ARCHITECTURE STREQUAL "interlagos")
163 list(APPEND _march_flag_list "bulldozer")
164 list(APPEND _march_flag_list "barcelona")
165 list(APPEND _march_flag_list "core2")
166 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
167 elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer")
168 list(APPEND _march_flag_list "bulldozer")
169 list(APPEND _march_flag_list "barcelona")
170 list(APPEND _march_flag_list "core2")
171 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
172 elseif(TARGET_ARCHITECTURE STREQUAL "barcelona")
173 list(APPEND _march_flag_list "barcelona")
174 list(APPEND _march_flag_list "core2")
175 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
176 elseif(TARGET_ARCHITECTURE STREQUAL "istanbul")
177 list(APPEND _march_flag_list "barcelona")
178 list(APPEND _march_flag_list "core2")
179 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
180 elseif(TARGET_ARCHITECTURE STREQUAL "magny-cours")
181 list(APPEND _march_flag_list "barcelona")
182 list(APPEND _march_flag_list "core2")
183 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
184 elseif(TARGET_ARCHITECTURE STREQUAL "generic")
185 list(APPEND _march_flag_list "generic")
186 else(TARGET_ARCHITECTURE STREQUAL "core")
187 message(FATAL_ERROR "Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.")
188 endif(TARGET_ARCHITECTURE STREQUAL "core")
189
190 set(_disable_vector_unit_list)
191 set(_enable_vector_unit_list)
192 _my_find(_available_vector_units_list "sse2" SSE2_FOUND)
193 _my_find(_available_vector_units_list "sse3" SSE3_FOUND)
194 _my_find(_available_vector_units_list "ssse3" SSSE3_FOUND)
195 _my_find(_available_vector_units_list "sse4.1" SSE4_1_FOUND)
196 _my_find(_available_vector_units_list "sse4.2" SSE4_2_FOUND)
197 _my_find(_available_vector_units_list "sse4a" SSE4a_FOUND)
198 if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN)
199 UserWarning("AVX disabled per default because of old/broken compiler")
200 set(AVX_FOUND false)
201 set(XOP_FOUND false)
202 set(FMA4_FOUND false)
203 else()
204 _my_find(_available_vector_units_list "avx" AVX_FOUND)
205 _my_find(_available_vector_units_list "xop" XOP_FOUND)
206 _my_find(_available_vector_units_list "fma4" FMA4_FOUND)
207 endif()
208 set(USE_SSE2 ${SSE2_FOUND} CACHE BOOL "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." ${_force})
209 set(USE_SSE3 ${SSE3_FOUND} CACHE BOOL "Use SSE3. If SSE3 instructions are not enabled they will be emulated." ${_force})
210 set(USE_SSSE3 ${SSSE3_FOUND} CACHE BOOL "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." ${_force})
211 set(USE_SSE4_1 ${SSE4_1_FOUND} CACHE BOOL "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." ${_force})
212 set(USE_SSE4_2 ${SSE4_2_FOUND} CACHE BOOL "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." ${_force})
213 set(USE_SSE4a ${SSE4a_FOUND} CACHE BOOL "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." ${_force})
214 set(USE_AVX ${AVX_FOUND} CACHE BOOL "Use AVX. This will double some of the vector sizes relative to SSE." ${_force})
215 set(USE_XOP ${XOP_FOUND} CACHE BOOL "Use XOP." ${_force})
216 set(USE_FMA4 ${FMA4_FOUND} CACHE BOOL "Use FMA4." ${_force})
217 mark_as_advanced(USE_SSE2 USE_SSE3 USE_SSSE3 USE_SSE4_1 USE_SSE4_2 USE_SSE4a USE_AVX USE_XOP USE_FMA4)
218 if(USE_SSE2)
219 list(APPEND _enable_vector_unit_list "sse2")
220 else(USE_SSE2)
221 list(APPEND _disable_vector_unit_list "sse2")
222 endif(USE_SSE2)
223 if(USE_SSE3)
224 list(APPEND _enable_vector_unit_list "sse3")
225 else(USE_SSE3)
226 list(APPEND _disable_vector_unit_list "sse3")
227 endif(USE_SSE3)
228 if(USE_SSSE3)
229 list(APPEND _enable_vector_unit_list "ssse3")
230 else(USE_SSSE3)
231 list(APPEND _disable_vector_unit_list "ssse3")
232 endif(USE_SSSE3)
233 if(USE_SSE4_1)
234 list(APPEND _enable_vector_unit_list "sse4.1")
235 else(USE_SSE4_1)
236 list(APPEND _disable_vector_unit_list "sse4.1")
237 endif(USE_SSE4_1)
238 if(USE_SSE4_2)
239 list(APPEND _enable_vector_unit_list "sse4.2")
240 else(USE_SSE4_2)
241 list(APPEND _disable_vector_unit_list "sse4.2")
242 endif(USE_SSE4_2)
243 if(USE_SSE4a)
244 list(APPEND _enable_vector_unit_list "sse4a")
245 else(USE_SSE4a)
246 list(APPEND _disable_vector_unit_list "sse4a")
247 endif(USE_SSE4a)
248 if(USE_AVX)
249 list(APPEND _enable_vector_unit_list "avx")
250 # we want SSE intrinsics to result in instructions using the VEX prefix.
251 # Otherwise integer ops (which require the older SSE intrinsics) would
252 # always have a large penalty.
253 list(APPEND _enable_vector_unit_list "sse2avx")
254 else(USE_AVX)
255 list(APPEND _disable_vector_unit_list "avx")
256 endif(USE_AVX)
257 if(USE_XOP)
258 list(APPEND _enable_vector_unit_list "xop")
259 else()
260 list(APPEND _disable_vector_unit_list "xop")
261 endif()
262 if(USE_FMA4)
263 list(APPEND _enable_vector_unit_list "fma4")
264 else()
265 list(APPEND _disable_vector_unit_list "fma4")
266 endif()
267 if(MSVC)
268 # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX)
269 # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010)
270 _my_find(_enable_vector_unit_list "avx" _avx)
271 set(_avx_flag FALSE)
272 if(_avx)
273 AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _avx_flag)
274 endif()
275 if(NOT _avx_flag)
276 _my_find(_enable_vector_unit_list "sse2" _found)
277 if(_found)
278 AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
279 endif()
280 endif()
281 foreach(_flag ${_enable_vector_unit_list})
282 string(TOUPPER "${_flag}" _flag)
283 string(REPLACE "." "_" _flag "__${_flag}__")
284 add_definitions("-D${_flag}")
285 endforeach(_flag)
286 elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux)
287 _my_find(_available_vector_units_list "avx" _found)
288 if(_found)
289 AddCompilerFlag("-xAVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
290 else(_found)
291 _my_find(_available_vector_units_list "sse4.2" _found)
292 if(_found)
293 AddCompilerFlag("-xSSE4.2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
294 else(_found)
295 _my_find(_available_vector_units_list "sse4.1" _found)
296 if(_found)
297 AddCompilerFlag("-xSSE4.1" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
298 else(_found)
299 _my_find(_available_vector_units_list "ssse3" _found)
300 if(_found)
301 AddCompilerFlag("-xSSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
302 else(_found)
303 _my_find(_available_vector_units_list "sse3" _found)
304 if(_found)
305 # If the target host is an AMD machine then we still want to use -xSSE2 because the binary would refuse to run at all otherwise
306 _my_find(_march_flag_list "barcelona" _found)
307 if(NOT _found)
308 _my_find(_march_flag_list "k8-sse3" _found)
309 endif(NOT _found)
310 if(_found)
311 AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
312 else(_found)
313 AddCompilerFlag("-xSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
314 endif(_found)
315 else(_found)
316 _my_find(_available_vector_units_list "sse2" _found)
317 if(_found)
318 AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
319 endif(_found)
320 endif(_found)
321 endif(_found)
322 endif(_found)
323 endif(_found)
324 endif(_found)
325 else() # not MSVC and not ICC => GCC, Clang, Open64
326 foreach(_flag ${_march_flag_list})
327 AddCompilerFlag("-march=${_flag}" CXX_RESULT _good CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
328 if(_good)
329 break()
330 endif(_good)
331 endforeach(_flag)
332 foreach(_flag ${_enable_vector_unit_list})
333 AddCompilerFlag("-m${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
334 endforeach(_flag)
335 foreach(_flag ${_disable_vector_unit_list})
336 AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
337 endforeach(_flag)
338 endif()
339endmacro(OptimizeForArchitecture)