]> git.uio.no Git - u/mrichter/AliRoot.git/blame - Vc/cmake/OptimizeForArchitecture.cmake
comments on some sed magic
[u/mrichter/AliRoot.git] / Vc / cmake / OptimizeForArchitecture.cmake
CommitLineData
c017a39f 1#=============================================================================
2# Copyright 2010-2013 Matthias Kretz <kretz@kde.org>
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8# * Redistributions of source code must retain the above copyright notice,
9# this list of conditions and the following disclaimer.
10#
11# * Redistributions in binary form must reproduce the above copyright notice,
12# this list of conditions and the following disclaimer in the documentation
13# and/or other materials provided with the distribution.
14#
15# * The names of Kitware, Inc., the Insight Consortium, or the names of
16# any consortium members, or of any contributors, may not be used to
17# endorse or promote products derived from this software without
18# specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
21# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
24# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30#=============================================================================
31
f22341db 32get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH)
33include("${_currentDir}/AddCompilerFlag.cmake")
c017a39f 34include(CheckIncludeFile)
f22341db 35
36macro(_my_find _list _value _ret)
37 list(FIND ${_list} "${_value}" _found)
38 if(_found EQUAL -1)
39 set(${_ret} FALSE)
40 else(_found EQUAL -1)
41 set(${_ret} TRUE)
42 endif(_found EQUAL -1)
43endmacro(_my_find)
44
45macro(AutodetectHostArchitecture)
46 set(TARGET_ARCHITECTURE "generic")
47 set(Vc_ARCHITECTURE_FLAGS)
48 set(_vendor_id)
49 set(_cpu_family)
50 set(_cpu_model)
51 if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
52 file(READ "/proc/cpuinfo" _cpuinfo)
53 string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}")
54 string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}")
55 string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}")
c017a39f 56 string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}")
f22341db 57 elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
58 exec_program("/usr/sbin/sysctl -n machdep.cpu.vendor" OUTPUT_VARIABLE _vendor_id)
59 exec_program("/usr/sbin/sysctl -n machdep.cpu.model" OUTPUT_VARIABLE _cpu_model)
60 exec_program("/usr/sbin/sysctl -n machdep.cpu.family" OUTPUT_VARIABLE _cpu_family)
61 exec_program("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE _cpu_flags)
62 string(TOLOWER "${_cpu_flags}" _cpu_flags)
63 string(REPLACE "." "_" _cpu_flags "${_cpu_flags}")
64 elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
65 get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE)
66 get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE)
67 mark_as_advanced(_vendor_id _cpu_id)
68 string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}")
69 string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}")
70 endif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
71 if(_vendor_id STREQUAL "GenuineIntel")
72 if(_cpu_family EQUAL 6)
73 # Any recent Intel CPU except NetBurst
79c86c14 74 if(_cpu_model EQUAL 62)
75 set(TARGET_ARCHITECTURE "ivy-bridge")
76 elseif(_cpu_model EQUAL 58)
c017a39f 77 set(TARGET_ARCHITECTURE "ivy-bridge")
78 elseif(_cpu_model EQUAL 47) # Xeon E7 4860
79 set(TARGET_ARCHITECTURE "westmere")
80 elseif(_cpu_model EQUAL 46) # Xeon 7500 series
f22341db 81 set(TARGET_ARCHITECTURE "westmere")
82 elseif(_cpu_model EQUAL 45) # Xeon TNG
83 set(TARGET_ARCHITECTURE "sandy-bridge")
84 elseif(_cpu_model EQUAL 44) # Xeon 5600 series
85 set(TARGET_ARCHITECTURE "westmere")
86 elseif(_cpu_model EQUAL 42) # Core TNG
87 set(TARGET_ARCHITECTURE "sandy-bridge")
88 elseif(_cpu_model EQUAL 37) # Core i7/i5/i3
89 set(TARGET_ARCHITECTURE "westmere")
90 elseif(_cpu_model EQUAL 31) # Core i7/i5
91 set(TARGET_ARCHITECTURE "westmere")
92 elseif(_cpu_model EQUAL 30) # Core i7/i5
93 set(TARGET_ARCHITECTURE "westmere")
94 elseif(_cpu_model EQUAL 29)
95 set(TARGET_ARCHITECTURE "penryn")
96 elseif(_cpu_model EQUAL 28)
97 set(TARGET_ARCHITECTURE "atom")
98 elseif(_cpu_model EQUAL 26)
99 set(TARGET_ARCHITECTURE "nehalem")
100 elseif(_cpu_model EQUAL 23)
101 set(TARGET_ARCHITECTURE "penryn")
102 elseif(_cpu_model EQUAL 15)
103 set(TARGET_ARCHITECTURE "merom")
104 elseif(_cpu_model EQUAL 14)
105 set(TARGET_ARCHITECTURE "core")
106 elseif(_cpu_model LESS 14)
107 message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the generic CPU settings with SSE2.")
108 set(TARGET_ARCHITECTURE "generic")
109 else()
110 message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.")
111 set(TARGET_ARCHITECTURE "merom")
112 endif()
113 elseif(_cpu_family EQUAL 7) # Itanium (not supported)
114 message(WARNING "Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.")
115 elseif(_cpu_family EQUAL 15) # NetBurst
116 list(APPEND _available_vector_units_list "sse" "sse2")
117 if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead
118 list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
119 endif(_cpu_model GREATER 2)
120 endif(_cpu_family EQUAL 6)
121 elseif(_vendor_id STREQUAL "AuthenticAMD")
79c86c14 122 if(_cpu_family EQUAL 22) # 16h
123 set(TARGET_ARCHITECTURE "AMD 16h")
124 elseif(_cpu_family EQUAL 21) # 15h
c017a39f 125 if(_cpu_model LESS 2)
126 set(TARGET_ARCHITECTURE "bulldozer")
127 else()
128 set(TARGET_ARCHITECTURE "piledriver")
129 endif()
f22341db 130 elseif(_cpu_family EQUAL 20) # 14h
79c86c14 131 set(TARGET_ARCHITECTURE "AMD 14h")
f22341db 132 elseif(_cpu_family EQUAL 18) # 12h
133 elseif(_cpu_family EQUAL 16) # 10h
134 set(TARGET_ARCHITECTURE "barcelona")
135 elseif(_cpu_family EQUAL 15)
136 set(TARGET_ARCHITECTURE "k8")
137 if(_cpu_model GREATER 64) # I don't know the right number to put here. This is just a guess from the hardware I have access to
138 set(TARGET_ARCHITECTURE "k8-sse3")
139 endif(_cpu_model GREATER 64)
140 endif()
141 endif(_vendor_id STREQUAL "GenuineIntel")
142endmacro()
143
144macro(OptimizeForArchitecture)
79c86c14 145 set(TARGET_ARCHITECTURE "none" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"ivy-bridge\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\", \"AMD 14h\", \"AMD 16h\".")
f22341db 146 set(_force)
147 if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}")
148 message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"")
149 set(_force FORCE)
150 endif()
151 set(_last_target_arch "${TARGET_ARCHITECTURE}" CACHE STRING "" FORCE)
152 mark_as_advanced(_last_target_arch)
153 string(TOLOWER "${TARGET_ARCHITECTURE}" TARGET_ARCHITECTURE)
154
155 set(_march_flag_list)
156 set(_available_vector_units_list)
157
158 if(TARGET_ARCHITECTURE STREQUAL "auto")
159 AutodetectHostArchitecture()
160 message(STATUS "Detected CPU: ${TARGET_ARCHITECTURE}")
161 endif(TARGET_ARCHITECTURE STREQUAL "auto")
162
163 if(TARGET_ARCHITECTURE STREQUAL "core")
164 list(APPEND _march_flag_list "core2")
165 list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
166 elseif(TARGET_ARCHITECTURE STREQUAL "merom")
167 list(APPEND _march_flag_list "merom")
168 list(APPEND _march_flag_list "core2")
169 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
170 elseif(TARGET_ARCHITECTURE STREQUAL "penryn")
171 list(APPEND _march_flag_list "penryn")
172 list(APPEND _march_flag_list "core2")
173 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
174 message(STATUS "Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.")
175 if(_cpu_flags MATCHES "sse4_1")
176 message(STATUS "SSE4.1: enabled (auto-detected from this computer's CPU flags)")
177 list(APPEND _available_vector_units_list "sse4.1")
178 else()
179 message(STATUS "SSE4.1: disabled (auto-detected from this computer's CPU flags)")
180 endif()
181 elseif(TARGET_ARCHITECTURE STREQUAL "nehalem")
182 list(APPEND _march_flag_list "nehalem")
183 list(APPEND _march_flag_list "corei7")
184 list(APPEND _march_flag_list "core2")
185 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
186 elseif(TARGET_ARCHITECTURE STREQUAL "westmere")
187 list(APPEND _march_flag_list "westmere")
188 list(APPEND _march_flag_list "corei7")
189 list(APPEND _march_flag_list "core2")
190 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
c017a39f 191 elseif(TARGET_ARCHITECTURE STREQUAL "ivy-bridge")
192 list(APPEND _march_flag_list "core-avx-i")
193 list(APPEND _march_flag_list "corei7-avx")
194 list(APPEND _march_flag_list "core2")
195 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx" "rdrnd" "f16c")
f22341db 196 elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge")
197 list(APPEND _march_flag_list "sandybridge")
198 list(APPEND _march_flag_list "corei7-avx")
79c86c14 199 list(APPEND _march_flag_list "corei7")
f22341db 200 list(APPEND _march_flag_list "core2")
201 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx")
202 elseif(TARGET_ARCHITECTURE STREQUAL "atom")
203 list(APPEND _march_flag_list "atom")
204 list(APPEND _march_flag_list "core2")
205 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
206 elseif(TARGET_ARCHITECTURE STREQUAL "k8")
207 list(APPEND _march_flag_list "k8")
208 list(APPEND _available_vector_units_list "sse" "sse2")
209 elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3")
210 list(APPEND _march_flag_list "k8-sse3")
211 list(APPEND _march_flag_list "k8")
212 list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
79c86c14 213 elseif(TARGET_ARCHITECTURE STREQUAL "AMD 16h")
214 list(APPEND _march_flag_list "btver2")
215 list(APPEND _march_flag_list "btver1")
216 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "f16c")
217 elseif(TARGET_ARCHITECTURE STREQUAL "AMD 14h")
218 list(APPEND _march_flag_list "btver1")
219 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a")
c017a39f 220 elseif(TARGET_ARCHITECTURE STREQUAL "piledriver")
221 list(APPEND _march_flag_list "bdver2")
222 list(APPEND _march_flag_list "bdver1")
223 list(APPEND _march_flag_list "bulldozer")
224 list(APPEND _march_flag_list "barcelona")
225 list(APPEND _march_flag_list "core2")
226 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4" "fma" "f16c")
f22341db 227 elseif(TARGET_ARCHITECTURE STREQUAL "interlagos")
c017a39f 228 list(APPEND _march_flag_list "bdver1")
f22341db 229 list(APPEND _march_flag_list "bulldozer")
230 list(APPEND _march_flag_list "barcelona")
231 list(APPEND _march_flag_list "core2")
232 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
233 elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer")
c017a39f 234 list(APPEND _march_flag_list "bdver1")
f22341db 235 list(APPEND _march_flag_list "bulldozer")
236 list(APPEND _march_flag_list "barcelona")
237 list(APPEND _march_flag_list "core2")
238 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
239 elseif(TARGET_ARCHITECTURE STREQUAL "barcelona")
240 list(APPEND _march_flag_list "barcelona")
241 list(APPEND _march_flag_list "core2")
242 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
243 elseif(TARGET_ARCHITECTURE STREQUAL "istanbul")
244 list(APPEND _march_flag_list "barcelona")
245 list(APPEND _march_flag_list "core2")
246 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
247 elseif(TARGET_ARCHITECTURE STREQUAL "magny-cours")
248 list(APPEND _march_flag_list "barcelona")
249 list(APPEND _march_flag_list "core2")
250 list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
251 elseif(TARGET_ARCHITECTURE STREQUAL "generic")
252 list(APPEND _march_flag_list "generic")
6936ae04 253 elseif(TARGET_ARCHITECTURE STREQUAL "none")
254 # add this clause to remove it from the else clause
f22341db 255 else(TARGET_ARCHITECTURE STREQUAL "core")
256 message(FATAL_ERROR "Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.")
257 endif(TARGET_ARCHITECTURE STREQUAL "core")
258
6936ae04 259 if(NOT TARGET_ARCHITECTURE STREQUAL "none")
260 set(_disable_vector_unit_list)
261 set(_enable_vector_unit_list)
262 _my_find(_available_vector_units_list "sse2" SSE2_FOUND)
263 _my_find(_available_vector_units_list "sse3" SSE3_FOUND)
264 _my_find(_available_vector_units_list "ssse3" SSSE3_FOUND)
265 _my_find(_available_vector_units_list "sse4.1" SSE4_1_FOUND)
266 _my_find(_available_vector_units_list "sse4.2" SSE4_2_FOUND)
267 _my_find(_available_vector_units_list "sse4a" SSE4a_FOUND)
268 if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN)
269 UserWarning("AVX disabled per default because of old/broken compiler")
270 set(AVX_FOUND false)
271 set(XOP_FOUND false)
272 set(FMA4_FOUND false)
273 else()
274 _my_find(_available_vector_units_list "avx" AVX_FOUND)
c017a39f 275 if(DEFINED Vc_FMA4_INTRINSICS_BROKEN AND Vc_FMA4_INTRINSICS_BROKEN)
276 UserWarning("FMA4 disabled per default because of old/broken compiler")
277 set(FMA4_FOUND false)
278 else()
279 _my_find(_available_vector_units_list "fma4" FMA4_FOUND)
280 endif()
6936ae04 281 if(DEFINED Vc_XOP_INTRINSICS_BROKEN AND Vc_XOP_INTRINSICS_BROKEN)
282 UserWarning("XOP disabled per default because of old/broken compiler")
283 set(XOP_FOUND false)
284 else()
285 _my_find(_available_vector_units_list "xop" XOP_FOUND)
f22341db 286 endif()
287 endif()
6936ae04 288 set(USE_SSE2 ${SSE2_FOUND} CACHE BOOL "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." ${_force})
289 set(USE_SSE3 ${SSE3_FOUND} CACHE BOOL "Use SSE3. If SSE3 instructions are not enabled they will be emulated." ${_force})
290 set(USE_SSSE3 ${SSSE3_FOUND} CACHE BOOL "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." ${_force})
291 set(USE_SSE4_1 ${SSE4_1_FOUND} CACHE BOOL "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." ${_force})
292 set(USE_SSE4_2 ${SSE4_2_FOUND} CACHE BOOL "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." ${_force})
293 set(USE_SSE4a ${SSE4a_FOUND} CACHE BOOL "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." ${_force})
294 set(USE_AVX ${AVX_FOUND} CACHE BOOL "Use AVX. This will double some of the vector sizes relative to SSE." ${_force})
295 set(USE_XOP ${XOP_FOUND} CACHE BOOL "Use XOP." ${_force})
296 set(USE_FMA4 ${FMA4_FOUND} CACHE BOOL "Use FMA4." ${_force})
297 mark_as_advanced(USE_SSE2 USE_SSE3 USE_SSSE3 USE_SSE4_1 USE_SSE4_2 USE_SSE4a USE_AVX USE_XOP USE_FMA4)
298 if(USE_SSE2)
299 list(APPEND _enable_vector_unit_list "sse2")
300 else(USE_SSE2)
301 list(APPEND _disable_vector_unit_list "sse2")
302 endif(USE_SSE2)
303 if(USE_SSE3)
304 list(APPEND _enable_vector_unit_list "sse3")
305 else(USE_SSE3)
306 list(APPEND _disable_vector_unit_list "sse3")
307 endif(USE_SSE3)
308 if(USE_SSSE3)
309 list(APPEND _enable_vector_unit_list "ssse3")
310 else(USE_SSSE3)
311 list(APPEND _disable_vector_unit_list "ssse3")
312 endif(USE_SSSE3)
313 if(USE_SSE4_1)
314 list(APPEND _enable_vector_unit_list "sse4.1")
315 else(USE_SSE4_1)
316 list(APPEND _disable_vector_unit_list "sse4.1")
317 endif(USE_SSE4_1)
318 if(USE_SSE4_2)
319 list(APPEND _enable_vector_unit_list "sse4.2")
320 else(USE_SSE4_2)
321 list(APPEND _disable_vector_unit_list "sse4.2")
322 endif(USE_SSE4_2)
323 if(USE_SSE4a)
324 list(APPEND _enable_vector_unit_list "sse4a")
325 else(USE_SSE4a)
326 list(APPEND _disable_vector_unit_list "sse4a")
327 endif(USE_SSE4a)
328 if(USE_AVX)
329 list(APPEND _enable_vector_unit_list "avx")
330 # we want SSE intrinsics to result in instructions using the VEX prefix.
331 # Otherwise integer ops (which require the older SSE intrinsics) would
332 # always have a large penalty.
333 list(APPEND _enable_vector_unit_list "sse2avx")
334 else(USE_AVX)
335 list(APPEND _disable_vector_unit_list "avx")
336 endif(USE_AVX)
337 if(USE_XOP)
338 list(APPEND _enable_vector_unit_list "xop")
339 else()
340 list(APPEND _disable_vector_unit_list "xop")
341 endif()
342 if(USE_FMA4)
343 list(APPEND _enable_vector_unit_list "fma4")
344 else()
345 list(APPEND _disable_vector_unit_list "fma4")
346 endif()
347 if(MSVC)
348 # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX)
349 # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010)
350 _my_find(_enable_vector_unit_list "avx" _avx)
351 set(_avx_flag FALSE)
352 if(_avx)
353 AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _avx_flag)
354 endif()
355 if(NOT _avx_flag)
356 _my_find(_enable_vector_unit_list "sse2" _found)
357 if(_found)
358 AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
359 endif()
360 endif()
361 foreach(_flag ${_enable_vector_unit_list})
362 string(TOUPPER "${_flag}" _flag)
363 string(REPLACE "." "_" _flag "__${_flag}__")
364 add_definitions("-D${_flag}")
365 endforeach(_flag)
366 elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux)
c017a39f 367 _my_find(_available_vector_units_list "avx2" _found)
f22341db 368 if(_found)
c017a39f 369 AddCompilerFlag("-xCORE-AVX2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
f22341db 370 else(_found)
c017a39f 371 _my_find(_available_vector_units_list "f16c" _found)
f22341db 372 if(_found)
c017a39f 373 AddCompilerFlag("-xCORE-AVX-I" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
f22341db 374 else(_found)
c017a39f 375 _my_find(_available_vector_units_list "avx" _found)
f22341db 376 if(_found)
c017a39f 377 AddCompilerFlag("-xAVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
f22341db 378 else(_found)
c017a39f 379 _my_find(_available_vector_units_list "sse4.2" _found)
f22341db 380 if(_found)
c017a39f 381 AddCompilerFlag("-xSSE4.2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
f22341db 382 else(_found)
c017a39f 383 _my_find(_available_vector_units_list "sse4.1" _found)
f22341db 384 if(_found)
c017a39f 385 AddCompilerFlag("-xSSE4.1" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
6936ae04 386 else(_found)
c017a39f 387 _my_find(_available_vector_units_list "ssse3" _found)
6936ae04 388 if(_found)
c017a39f 389 AddCompilerFlag("-xSSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
390 else(_found)
391 _my_find(_available_vector_units_list "sse3" _found)
392 if(_found)
393 # If the target host is an AMD machine then we still want to use -xSSE2 because the binary would refuse to run at all otherwise
394 _my_find(_march_flag_list "barcelona" _found)
395 if(NOT _found)
396 _my_find(_march_flag_list "k8-sse3" _found)
397 endif(NOT _found)
398 if(_found)
399 AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
400 else(_found)
401 AddCompilerFlag("-xSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
402 endif(_found)
403 else(_found)
404 _my_find(_available_vector_units_list "sse2" _found)
405 if(_found)
406 AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
407 endif(_found)
408 endif(_found)
6936ae04 409 endif(_found)
f22341db 410 endif(_found)
411 endif(_found)
412 endif(_found)
413 endif(_found)
414 endif(_found)
6936ae04 415 else() # not MSVC and not ICC => GCC, Clang, Open64
416 foreach(_flag ${_march_flag_list})
417 AddCompilerFlag("-march=${_flag}" CXX_RESULT _good CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
418 if(_good)
419 break()
420 endif(_good)
421 endforeach(_flag)
422 foreach(_flag ${_enable_vector_unit_list})
c017a39f 423 AddCompilerFlag("-m${_flag}" CXX_RESULT _result)
424 if(_result)
425 set(_header FALSE)
426 if(_flag STREQUAL "sse3")
427 set(_header "pmmintrin.h")
428 elseif(_flag STREQUAL "ssse3")
429 set(_header "tmmintrin.h")
430 elseif(_flag STREQUAL "sse4.1")
431 set(_header "smmintrin.h")
432 elseif(_flag STREQUAL "sse4.2")
433 set(_header "smmintrin.h")
434 elseif(_flag STREQUAL "sse4a")
435 set(_header "ammintrin.h")
436 elseif(_flag STREQUAL "avx")
437 set(_header "immintrin.h")
438 elseif(_flag STREQUAL "fma4")
439 set(_header "x86intrin.h")
440 elseif(_flag STREQUAL "xop")
441 set(_header "x86intrin.h")
442 endif()
443 set(_resultVar "HAVE_${_header}")
444 string(REPLACE "." "_" _resultVar "${_resultVar}")
445 if(_header)
446 CHECK_INCLUDE_FILE("${_header}" ${_resultVar} "-m${_flag}")
447 if(NOT ${_resultVar})
448 set(_useVar "USE_${_flag}")
449 string(TOUPPER "${_useVar}" _useVar)
450 string(REPLACE "." "_" _useVar "${_useVar}")
451 message(STATUS "disabling ${_useVar} because ${_header} is missing")
452 set(${_useVar} FALSE)
453 list(APPEND _disable_vector_unit_list "${_flag}")
454 endif()
455 endif()
456 if(NOT _header OR ${_resultVar})
457 set(Vc_ARCHITECTURE_FLAGS "${Vc_ARCHITECTURE_FLAGS} -m${_flag}")
458 endif()
459 endif()
6936ae04 460 endforeach(_flag)
461 foreach(_flag ${_disable_vector_unit_list})
462 AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
463 endforeach(_flag)
464 endif()
f22341db 465 endif()
466endmacro(OptimizeForArchitecture)