]> git.uio.no Git - u/mrichter/AliRoot.git/blame - Vc/cmake/OptimizeForArchitecture.cmake
update to Vc 0.7.3-dev
[u/mrichter/AliRoot.git] / Vc / cmake / OptimizeForArchitecture.cmake
CommitLineData
c017a39f 1#=============================================================================
2# Copyright 2010-2013 Matthias Kretz <kretz@kde.org>
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8# * Redistributions of source code must retain the above copyright notice,
9# this list of conditions and the following disclaimer.
10#
11# * Redistributions in binary form must reproduce the above copyright notice,
12# this list of conditions and the following disclaimer in the documentation
13# and/or other materials provided with the distribution.
14#
15# * The names of Kitware, Inc., the Insight Consortium, or the names of
16# any consortium members, or of any contributors, may not be used to
17# endorse or promote products derived from this software without
18# specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
21# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
24# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30#=============================================================================
31
f22341db 32get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH)
33include("${_currentDir}/AddCompilerFlag.cmake")
c017a39f 34include(CheckIncludeFile)
f22341db 35
36macro(_my_find _list _value _ret)
37 list(FIND ${_list} "${_value}" _found)
38 if(_found EQUAL -1)
39 set(${_ret} FALSE)
40 else(_found EQUAL -1)
41 set(${_ret} TRUE)
42 endif(_found EQUAL -1)
43endmacro(_my_find)
44
45macro(AutodetectHostArchitecture)
46 set(TARGET_ARCHITECTURE "generic")
47 set(Vc_ARCHITECTURE_FLAGS)
48 set(_vendor_id)
49 set(_cpu_family)
50 set(_cpu_model)
51 if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
52 file(READ "/proc/cpuinfo" _cpuinfo)
53 string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}")
54 string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}")
55 string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}")
c017a39f 56 string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}")
f22341db 57 elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
58 exec_program("/usr/sbin/sysctl -n machdep.cpu.vendor" OUTPUT_VARIABLE _vendor_id)
59 exec_program("/usr/sbin/sysctl -n machdep.cpu.model" OUTPUT_VARIABLE _cpu_model)
60 exec_program("/usr/sbin/sysctl -n machdep.cpu.family" OUTPUT_VARIABLE _cpu_family)
61 exec_program("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE _cpu_flags)
62 string(TOLOWER "${_cpu_flags}" _cpu_flags)
63 string(REPLACE "." "_" _cpu_flags "${_cpu_flags}")
64 elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
65 get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE)
66 get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE)
67 mark_as_advanced(_vendor_id _cpu_id)
68 string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}")
69 string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}")
70 endif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
71 if(_vendor_id STREQUAL "GenuineIntel")
72 if(_cpu_family EQUAL 6)
73 # Any recent Intel CPU except NetBurst
c017a39f 74 if(_cpu_model EQUAL 58)
75 set(TARGET_ARCHITECTURE "ivy-bridge")
76 elseif(_cpu_model EQUAL 47) # Xeon E7 4860
77 set(TARGET_ARCHITECTURE "westmere")
78 elseif(_cpu_model EQUAL 46) # Xeon 7500 series
f22341db 79 set(TARGET_ARCHITECTURE "westmere")
80 elseif(_cpu_model EQUAL 45) # Xeon TNG
81 set(TARGET_ARCHITECTURE "sandy-bridge")
82 elseif(_cpu_model EQUAL 44) # Xeon 5600 series
83 set(TARGET_ARCHITECTURE "westmere")
84 elseif(_cpu_model EQUAL 42) # Core TNG
85 set(TARGET_ARCHITECTURE "sandy-bridge")
86 elseif(_cpu_model EQUAL 37) # Core i7/i5/i3
87 set(TARGET_ARCHITECTURE "westmere")
88 elseif(_cpu_model EQUAL 31) # Core i7/i5
89 set(TARGET_ARCHITECTURE "westmere")
90 elseif(_cpu_model EQUAL 30) # Core i7/i5
91 set(TARGET_ARCHITECTURE "westmere")
92 elseif(_cpu_model EQUAL 29)
93 set(TARGET_ARCHITECTURE "penryn")
94 elseif(_cpu_model EQUAL 28)
95 set(TARGET_ARCHITECTURE "atom")
96 elseif(_cpu_model EQUAL 26)
97 set(TARGET_ARCHITECTURE "nehalem")
98 elseif(_cpu_model EQUAL 23)
99 set(TARGET_ARCHITECTURE "penryn")
100 elseif(_cpu_model EQUAL 15)
101 set(TARGET_ARCHITECTURE "merom")
102 elseif(_cpu_model EQUAL 14)
103 set(TARGET_ARCHITECTURE "core")
104 elseif(_cpu_model LESS 14)
105 message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the generic CPU settings with SSE2.")
106 set(TARGET_ARCHITECTURE "generic")
107 else()
108 message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.")
109 set(TARGET_ARCHITECTURE "merom")
110 endif()
111 elseif(_cpu_family EQUAL 7) # Itanium (not supported)
112 message(WARNING "Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.")
113 elseif(_cpu_family EQUAL 15) # NetBurst
114 list(APPEND _available_vector_units_list "sse" "sse2")
115 if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead
116 list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
117 endif(_cpu_model GREATER 2)
118 endif(_cpu_family EQUAL 6)
119 elseif(_vendor_id STREQUAL "AuthenticAMD")
120 if(_cpu_family EQUAL 21) # 15h
c017a39f 121 if(_cpu_model LESS 2)
122 set(TARGET_ARCHITECTURE "bulldozer")
123 else()
124 set(TARGET_ARCHITECTURE "piledriver")
125 endif()
f22341db 126 elseif(_cpu_family EQUAL 20) # 14h
127 elseif(_cpu_family EQUAL 18) # 12h
128 elseif(_cpu_family EQUAL 16) # 10h
129 set(TARGET_ARCHITECTURE "barcelona")
130 elseif(_cpu_family EQUAL 15)
131 set(TARGET_ARCHITECTURE "k8")
132 if(_cpu_model GREATER 64) # I don't know the right number to put here. This is just a guess from the hardware I have access to
133 set(TARGET_ARCHITECTURE "k8-sse3")
134 endif(_cpu_model GREATER 64)
135 endif()
136 endif(_vendor_id STREQUAL "GenuineIntel")
137endmacro()
138
139macro(OptimizeForArchitecture)
c017a39f 140 set(TARGET_ARCHITECTURE "none" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"ivy-bridge\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\".")
f22341db 141 set(_force)
142 if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}")
143 message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"")
144 set(_force FORCE)
145 endif()
146 set(_last_target_arch "${TARGET_ARCHITECTURE}" CACHE STRING "" FORCE)
147 mark_as_advanced(_last_target_arch)
148 string(TOLOWER "${TARGET_ARCHITECTURE}" TARGET_ARCHITECTURE)
149
150 set(_march_flag_list)
151 set(_available_vector_units_list)
152
153 if(TARGET_ARCHITECTURE STREQUAL "auto")
154 AutodetectHostArchitecture()
155 message(STATUS "Detected CPU: ${TARGET_ARCHITECTURE}")
156 endif(TARGET_ARCHITECTURE STREQUAL "auto")
157
158 if(TARGET_ARCHITECTURE STREQUAL "core")
159 list(APPEND _march_flag_list "core2")
160 list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
161 elseif(TARGET_ARCHITECTURE STREQUAL "merom")
162 list(APPEND _march_flag_list "merom")
163 list(APPEND _march_flag_list "core2")
164 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
165 elseif(TARGET_ARCHITECTURE STREQUAL "penryn")
166 list(APPEND _march_flag_list "penryn")
167 list(APPEND _march_flag_list "core2")
168 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
169 message(STATUS "Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.")
170 if(_cpu_flags MATCHES "sse4_1")
171 message(STATUS "SSE4.1: enabled (auto-detected from this computer's CPU flags)")
172 list(APPEND _available_vector_units_list "sse4.1")
173 else()
174 message(STATUS "SSE4.1: disabled (auto-detected from this computer's CPU flags)")
175 endif()
176 elseif(TARGET_ARCHITECTURE STREQUAL "nehalem")
177 list(APPEND _march_flag_list "nehalem")
178 list(APPEND _march_flag_list "corei7")
179 list(APPEND _march_flag_list "core2")
180 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
181 elseif(TARGET_ARCHITECTURE STREQUAL "westmere")
182 list(APPEND _march_flag_list "westmere")
183 list(APPEND _march_flag_list "corei7")
184 list(APPEND _march_flag_list "core2")
185 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
c017a39f 186 elseif(TARGET_ARCHITECTURE STREQUAL "ivy-bridge")
187 list(APPEND _march_flag_list "core-avx-i")
188 list(APPEND _march_flag_list "corei7-avx")
189 list(APPEND _march_flag_list "core2")
190 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx" "rdrnd" "f16c")
f22341db 191 elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge")
192 list(APPEND _march_flag_list "sandybridge")
193 list(APPEND _march_flag_list "corei7-avx")
194 list(APPEND _march_flag_list "core2")
195 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx")
196 elseif(TARGET_ARCHITECTURE STREQUAL "atom")
197 list(APPEND _march_flag_list "atom")
198 list(APPEND _march_flag_list "core2")
199 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
200 elseif(TARGET_ARCHITECTURE STREQUAL "k8")
201 list(APPEND _march_flag_list "k8")
202 list(APPEND _available_vector_units_list "sse" "sse2")
203 elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3")
204 list(APPEND _march_flag_list "k8-sse3")
205 list(APPEND _march_flag_list "k8")
206 list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
c017a39f 207 elseif(TARGET_ARCHITECTURE STREQUAL "piledriver")
208 list(APPEND _march_flag_list "bdver2")
209 list(APPEND _march_flag_list "bdver1")
210 list(APPEND _march_flag_list "bulldozer")
211 list(APPEND _march_flag_list "barcelona")
212 list(APPEND _march_flag_list "core2")
213 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4" "fma" "f16c")
f22341db 214 elseif(TARGET_ARCHITECTURE STREQUAL "interlagos")
c017a39f 215 list(APPEND _march_flag_list "bdver1")
f22341db 216 list(APPEND _march_flag_list "bulldozer")
217 list(APPEND _march_flag_list "barcelona")
218 list(APPEND _march_flag_list "core2")
219 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
220 elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer")
c017a39f 221 list(APPEND _march_flag_list "bdver1")
f22341db 222 list(APPEND _march_flag_list "bulldozer")
223 list(APPEND _march_flag_list "barcelona")
224 list(APPEND _march_flag_list "core2")
225 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
226 elseif(TARGET_ARCHITECTURE STREQUAL "barcelona")
227 list(APPEND _march_flag_list "barcelona")
228 list(APPEND _march_flag_list "core2")
229 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
230 elseif(TARGET_ARCHITECTURE STREQUAL "istanbul")
231 list(APPEND _march_flag_list "barcelona")
232 list(APPEND _march_flag_list "core2")
233 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
234 elseif(TARGET_ARCHITECTURE STREQUAL "magny-cours")
235 list(APPEND _march_flag_list "barcelona")
236 list(APPEND _march_flag_list "core2")
237 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
238 elseif(TARGET_ARCHITECTURE STREQUAL "generic")
239 list(APPEND _march_flag_list "generic")
6936ae04 240 elseif(TARGET_ARCHITECTURE STREQUAL "none")
241 # add this clause to remove it from the else clause
f22341db 242 else(TARGET_ARCHITECTURE STREQUAL "core")
243 message(FATAL_ERROR "Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.")
244 endif(TARGET_ARCHITECTURE STREQUAL "core")
245
6936ae04 246 if(NOT TARGET_ARCHITECTURE STREQUAL "none")
247 set(_disable_vector_unit_list)
248 set(_enable_vector_unit_list)
249 _my_find(_available_vector_units_list "sse2" SSE2_FOUND)
250 _my_find(_available_vector_units_list "sse3" SSE3_FOUND)
251 _my_find(_available_vector_units_list "ssse3" SSSE3_FOUND)
252 _my_find(_available_vector_units_list "sse4.1" SSE4_1_FOUND)
253 _my_find(_available_vector_units_list "sse4.2" SSE4_2_FOUND)
254 _my_find(_available_vector_units_list "sse4a" SSE4a_FOUND)
255 if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN)
256 UserWarning("AVX disabled per default because of old/broken compiler")
257 set(AVX_FOUND false)
258 set(XOP_FOUND false)
259 set(FMA4_FOUND false)
260 else()
261 _my_find(_available_vector_units_list "avx" AVX_FOUND)
c017a39f 262 if(DEFINED Vc_FMA4_INTRINSICS_BROKEN AND Vc_FMA4_INTRINSICS_BROKEN)
263 UserWarning("FMA4 disabled per default because of old/broken compiler")
264 set(FMA4_FOUND false)
265 else()
266 _my_find(_available_vector_units_list "fma4" FMA4_FOUND)
267 endif()
6936ae04 268 if(DEFINED Vc_XOP_INTRINSICS_BROKEN AND Vc_XOP_INTRINSICS_BROKEN)
269 UserWarning("XOP disabled per default because of old/broken compiler")
270 set(XOP_FOUND false)
271 else()
272 _my_find(_available_vector_units_list "xop" XOP_FOUND)
f22341db 273 endif()
274 endif()
6936ae04 275 set(USE_SSE2 ${SSE2_FOUND} CACHE BOOL "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." ${_force})
276 set(USE_SSE3 ${SSE3_FOUND} CACHE BOOL "Use SSE3. If SSE3 instructions are not enabled they will be emulated." ${_force})
277 set(USE_SSSE3 ${SSSE3_FOUND} CACHE BOOL "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." ${_force})
278 set(USE_SSE4_1 ${SSE4_1_FOUND} CACHE BOOL "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." ${_force})
279 set(USE_SSE4_2 ${SSE4_2_FOUND} CACHE BOOL "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." ${_force})
280 set(USE_SSE4a ${SSE4a_FOUND} CACHE BOOL "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." ${_force})
281 set(USE_AVX ${AVX_FOUND} CACHE BOOL "Use AVX. This will double some of the vector sizes relative to SSE." ${_force})
282 set(USE_XOP ${XOP_FOUND} CACHE BOOL "Use XOP." ${_force})
283 set(USE_FMA4 ${FMA4_FOUND} CACHE BOOL "Use FMA4." ${_force})
284 mark_as_advanced(USE_SSE2 USE_SSE3 USE_SSSE3 USE_SSE4_1 USE_SSE4_2 USE_SSE4a USE_AVX USE_XOP USE_FMA4)
285 if(USE_SSE2)
286 list(APPEND _enable_vector_unit_list "sse2")
287 else(USE_SSE2)
288 list(APPEND _disable_vector_unit_list "sse2")
289 endif(USE_SSE2)
290 if(USE_SSE3)
291 list(APPEND _enable_vector_unit_list "sse3")
292 else(USE_SSE3)
293 list(APPEND _disable_vector_unit_list "sse3")
294 endif(USE_SSE3)
295 if(USE_SSSE3)
296 list(APPEND _enable_vector_unit_list "ssse3")
297 else(USE_SSSE3)
298 list(APPEND _disable_vector_unit_list "ssse3")
299 endif(USE_SSSE3)
300 if(USE_SSE4_1)
301 list(APPEND _enable_vector_unit_list "sse4.1")
302 else(USE_SSE4_1)
303 list(APPEND _disable_vector_unit_list "sse4.1")
304 endif(USE_SSE4_1)
305 if(USE_SSE4_2)
306 list(APPEND _enable_vector_unit_list "sse4.2")
307 else(USE_SSE4_2)
308 list(APPEND _disable_vector_unit_list "sse4.2")
309 endif(USE_SSE4_2)
310 if(USE_SSE4a)
311 list(APPEND _enable_vector_unit_list "sse4a")
312 else(USE_SSE4a)
313 list(APPEND _disable_vector_unit_list "sse4a")
314 endif(USE_SSE4a)
315 if(USE_AVX)
316 list(APPEND _enable_vector_unit_list "avx")
317 # we want SSE intrinsics to result in instructions using the VEX prefix.
318 # Otherwise integer ops (which require the older SSE intrinsics) would
319 # always have a large penalty.
320 list(APPEND _enable_vector_unit_list "sse2avx")
321 else(USE_AVX)
322 list(APPEND _disable_vector_unit_list "avx")
323 endif(USE_AVX)
324 if(USE_XOP)
325 list(APPEND _enable_vector_unit_list "xop")
326 else()
327 list(APPEND _disable_vector_unit_list "xop")
328 endif()
329 if(USE_FMA4)
330 list(APPEND _enable_vector_unit_list "fma4")
331 else()
332 list(APPEND _disable_vector_unit_list "fma4")
333 endif()
334 if(MSVC)
335 # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX)
336 # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010)
337 _my_find(_enable_vector_unit_list "avx" _avx)
338 set(_avx_flag FALSE)
339 if(_avx)
340 AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _avx_flag)
341 endif()
342 if(NOT _avx_flag)
343 _my_find(_enable_vector_unit_list "sse2" _found)
344 if(_found)
345 AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
346 endif()
347 endif()
348 foreach(_flag ${_enable_vector_unit_list})
349 string(TOUPPER "${_flag}" _flag)
350 string(REPLACE "." "_" _flag "__${_flag}__")
351 add_definitions("-D${_flag}")
352 endforeach(_flag)
353 elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux)
c017a39f 354 _my_find(_available_vector_units_list "avx2" _found)
f22341db 355 if(_found)
c017a39f 356 AddCompilerFlag("-xCORE-AVX2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
f22341db 357 else(_found)
c017a39f 358 _my_find(_available_vector_units_list "f16c" _found)
f22341db 359 if(_found)
c017a39f 360 AddCompilerFlag("-xCORE-AVX-I" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
f22341db 361 else(_found)
c017a39f 362 _my_find(_available_vector_units_list "avx" _found)
f22341db 363 if(_found)
c017a39f 364 AddCompilerFlag("-xAVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
f22341db 365 else(_found)
c017a39f 366 _my_find(_available_vector_units_list "sse4.2" _found)
f22341db 367 if(_found)
c017a39f 368 AddCompilerFlag("-xSSE4.2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
f22341db 369 else(_found)
c017a39f 370 _my_find(_available_vector_units_list "sse4.1" _found)
f22341db 371 if(_found)
c017a39f 372 AddCompilerFlag("-xSSE4.1" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
6936ae04 373 else(_found)
c017a39f 374 _my_find(_available_vector_units_list "ssse3" _found)
6936ae04 375 if(_found)
c017a39f 376 AddCompilerFlag("-xSSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
377 else(_found)
378 _my_find(_available_vector_units_list "sse3" _found)
379 if(_found)
380 # If the target host is an AMD machine then we still want to use -xSSE2 because the binary would refuse to run at all otherwise
381 _my_find(_march_flag_list "barcelona" _found)
382 if(NOT _found)
383 _my_find(_march_flag_list "k8-sse3" _found)
384 endif(NOT _found)
385 if(_found)
386 AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
387 else(_found)
388 AddCompilerFlag("-xSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
389 endif(_found)
390 else(_found)
391 _my_find(_available_vector_units_list "sse2" _found)
392 if(_found)
393 AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
394 endif(_found)
395 endif(_found)
6936ae04 396 endif(_found)
f22341db 397 endif(_found)
398 endif(_found)
399 endif(_found)
400 endif(_found)
401 endif(_found)
6936ae04 402 else() # not MSVC and not ICC => GCC, Clang, Open64
403 foreach(_flag ${_march_flag_list})
404 AddCompilerFlag("-march=${_flag}" CXX_RESULT _good CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
405 if(_good)
406 break()
407 endif(_good)
408 endforeach(_flag)
409 foreach(_flag ${_enable_vector_unit_list})
c017a39f 410 AddCompilerFlag("-m${_flag}" CXX_RESULT _result)
411 if(_result)
412 set(_header FALSE)
413 if(_flag STREQUAL "sse3")
414 set(_header "pmmintrin.h")
415 elseif(_flag STREQUAL "ssse3")
416 set(_header "tmmintrin.h")
417 elseif(_flag STREQUAL "sse4.1")
418 set(_header "smmintrin.h")
419 elseif(_flag STREQUAL "sse4.2")
420 set(_header "smmintrin.h")
421 elseif(_flag STREQUAL "sse4a")
422 set(_header "ammintrin.h")
423 elseif(_flag STREQUAL "avx")
424 set(_header "immintrin.h")
425 elseif(_flag STREQUAL "fma4")
426 set(_header "x86intrin.h")
427 elseif(_flag STREQUAL "xop")
428 set(_header "x86intrin.h")
429 endif()
430 set(_resultVar "HAVE_${_header}")
431 string(REPLACE "." "_" _resultVar "${_resultVar}")
432 if(_header)
433 CHECK_INCLUDE_FILE("${_header}" ${_resultVar} "-m${_flag}")
434 if(NOT ${_resultVar})
435 set(_useVar "USE_${_flag}")
436 string(TOUPPER "${_useVar}" _useVar)
437 string(REPLACE "." "_" _useVar "${_useVar}")
438 message(STATUS "disabling ${_useVar} because ${_header} is missing")
439 set(${_useVar} FALSE)
440 list(APPEND _disable_vector_unit_list "${_flag}")
441 endif()
442 endif()
443 if(NOT _header OR ${_resultVar})
444 set(Vc_ARCHITECTURE_FLAGS "${Vc_ARCHITECTURE_FLAGS} -m${_flag}")
445 endif()
446 endif()
6936ae04 447 endforeach(_flag)
448 foreach(_flag ${_disable_vector_unit_list})
449 AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
450 endforeach(_flag)
451 endif()
f22341db 452 endif()
453endmacro(OptimizeForArchitecture)