added slewing correction by data
[u/mrichter/AliRoot.git] / Vc / cmake / OptimizeForArchitecture.cmake
1 #=============================================================================
2 # Copyright 2010-2013 Matthias Kretz <kretz@kde.org>
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #  * Redistributions of source code must retain the above copyright notice,
9 #    this list of conditions and the following disclaimer.
10 #
11 #  * Redistributions in binary form must reproduce the above copyright notice,
12 #    this list of conditions and the following disclaimer in the documentation
13 #    and/or other materials provided with the distribution.
14 #
15 #  * The names of Kitware, Inc., the Insight Consortium, or the names of
16 #    any consortium members, or of any contributors, may not be used to
17 #    endorse or promote products derived from this software without
18 #    specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
24 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #=============================================================================
31
32 get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH)
33 include("${_currentDir}/AddCompilerFlag.cmake")
34 include(CheckIncludeFile)
35
36 macro(_my_find _list _value _ret)
37    list(FIND ${_list} "${_value}" _found)
38    if(_found EQUAL -1)
39       set(${_ret} FALSE)
40    else(_found EQUAL -1)
41       set(${_ret} TRUE)
42    endif(_found EQUAL -1)
43 endmacro(_my_find)
44
45 macro(AutodetectHostArchitecture)
46    set(TARGET_ARCHITECTURE "generic")
47    set(Vc_ARCHITECTURE_FLAGS)
48    set(_vendor_id)
49    set(_cpu_family)
50    set(_cpu_model)
51    if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
52       file(READ "/proc/cpuinfo" _cpuinfo)
53       string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}")
54       string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}")
55       string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}")
56       string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}")
57    elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
58       exec_program("/usr/sbin/sysctl -n machdep.cpu.vendor" OUTPUT_VARIABLE _vendor_id)
59       exec_program("/usr/sbin/sysctl -n machdep.cpu.model"  OUTPUT_VARIABLE _cpu_model)
60       exec_program("/usr/sbin/sysctl -n machdep.cpu.family" OUTPUT_VARIABLE _cpu_family)
61       exec_program("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE _cpu_flags)
62       string(TOLOWER "${_cpu_flags}" _cpu_flags)
63       string(REPLACE "." "_" _cpu_flags "${_cpu_flags}")
64    elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
65       get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE)
66       get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE)
67       mark_as_advanced(_vendor_id _cpu_id)
68       string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}")
69       string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}")
70    endif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
71    if(_vendor_id STREQUAL "GenuineIntel")
72       if(_cpu_family EQUAL 6)
73          # Any recent Intel CPU except NetBurst
74          if(_cpu_model EQUAL 58)
75             set(TARGET_ARCHITECTURE "ivy-bridge")
76          elseif(_cpu_model EQUAL 47) # Xeon E7 4860
77             set(TARGET_ARCHITECTURE "westmere")
78          elseif(_cpu_model EQUAL 46) # Xeon 7500 series
79             set(TARGET_ARCHITECTURE "westmere")
80          elseif(_cpu_model EQUAL 45) # Xeon TNG
81             set(TARGET_ARCHITECTURE "sandy-bridge")
82          elseif(_cpu_model EQUAL 44) # Xeon 5600 series
83             set(TARGET_ARCHITECTURE "westmere")
84          elseif(_cpu_model EQUAL 42) # Core TNG
85             set(TARGET_ARCHITECTURE "sandy-bridge")
86          elseif(_cpu_model EQUAL 37) # Core i7/i5/i3
87             set(TARGET_ARCHITECTURE "westmere")
88          elseif(_cpu_model EQUAL 31) # Core i7/i5
89             set(TARGET_ARCHITECTURE "westmere")
90          elseif(_cpu_model EQUAL 30) # Core i7/i5
91             set(TARGET_ARCHITECTURE "westmere")
92          elseif(_cpu_model EQUAL 29)
93             set(TARGET_ARCHITECTURE "penryn")
94          elseif(_cpu_model EQUAL 28)
95             set(TARGET_ARCHITECTURE "atom")
96          elseif(_cpu_model EQUAL 26)
97             set(TARGET_ARCHITECTURE "nehalem")
98          elseif(_cpu_model EQUAL 23)
99             set(TARGET_ARCHITECTURE "penryn")
100          elseif(_cpu_model EQUAL 15)
101             set(TARGET_ARCHITECTURE "merom")
102          elseif(_cpu_model EQUAL 14)
103             set(TARGET_ARCHITECTURE "core")
104          elseif(_cpu_model LESS 14)
105             message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the generic CPU settings with SSE2.")
106             set(TARGET_ARCHITECTURE "generic")
107          else()
108             message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.")
109             set(TARGET_ARCHITECTURE "merom")
110          endif()
111       elseif(_cpu_family EQUAL 7) # Itanium (not supported)
112          message(WARNING "Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.")
113       elseif(_cpu_family EQUAL 15) # NetBurst
114          list(APPEND _available_vector_units_list "sse" "sse2")
115          if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead
116             list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
117          endif(_cpu_model GREATER 2)
118       endif(_cpu_family EQUAL 6)
119    elseif(_vendor_id STREQUAL "AuthenticAMD")
120       if(_cpu_family EQUAL 21) # 15h
121          if(_cpu_model LESS 2)
122             set(TARGET_ARCHITECTURE "bulldozer")
123          else()
124             set(TARGET_ARCHITECTURE "piledriver")
125          endif()
126       elseif(_cpu_family EQUAL 20) # 14h
127       elseif(_cpu_family EQUAL 18) # 12h
128       elseif(_cpu_family EQUAL 16) # 10h
129          set(TARGET_ARCHITECTURE "barcelona")
130       elseif(_cpu_family EQUAL 15)
131          set(TARGET_ARCHITECTURE "k8")
132          if(_cpu_model GREATER 64) # I don't know the right number to put here. This is just a guess from the hardware I have access to
133             set(TARGET_ARCHITECTURE "k8-sse3")
134          endif(_cpu_model GREATER 64)
135       endif()
136    endif(_vendor_id STREQUAL "GenuineIntel")
137 endmacro()
138
139 macro(OptimizeForArchitecture)
140    set(TARGET_ARCHITECTURE "none" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"ivy-bridge\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\".")
141    set(_force)
142    if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}")
143       message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"")
144       set(_force FORCE)
145    endif()
146    set(_last_target_arch "${TARGET_ARCHITECTURE}" CACHE STRING "" FORCE)
147    mark_as_advanced(_last_target_arch)
148    string(TOLOWER "${TARGET_ARCHITECTURE}" TARGET_ARCHITECTURE)
149
150    set(_march_flag_list)
151    set(_available_vector_units_list)
152
153    if(TARGET_ARCHITECTURE STREQUAL "auto")
154       AutodetectHostArchitecture()
155       message(STATUS "Detected CPU: ${TARGET_ARCHITECTURE}")
156    endif(TARGET_ARCHITECTURE STREQUAL "auto")
157
158    if(TARGET_ARCHITECTURE STREQUAL "core")
159       list(APPEND _march_flag_list "core2")
160       list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
161    elseif(TARGET_ARCHITECTURE STREQUAL "merom")
162       list(APPEND _march_flag_list "merom")
163       list(APPEND _march_flag_list "core2")
164       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
165    elseif(TARGET_ARCHITECTURE STREQUAL "penryn")
166       list(APPEND _march_flag_list "penryn")
167       list(APPEND _march_flag_list "core2")
168       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
169       message(STATUS "Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.")
170       if(_cpu_flags MATCHES "sse4_1")
171          message(STATUS "SSE4.1: enabled (auto-detected from this computer's CPU flags)")
172          list(APPEND _available_vector_units_list "sse4.1")
173       else()
174          message(STATUS "SSE4.1: disabled (auto-detected from this computer's CPU flags)")
175       endif()
176    elseif(TARGET_ARCHITECTURE STREQUAL "nehalem")
177       list(APPEND _march_flag_list "nehalem")
178       list(APPEND _march_flag_list "corei7")
179       list(APPEND _march_flag_list "core2")
180       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
181    elseif(TARGET_ARCHITECTURE STREQUAL "westmere")
182       list(APPEND _march_flag_list "westmere")
183       list(APPEND _march_flag_list "corei7")
184       list(APPEND _march_flag_list "core2")
185       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
186    elseif(TARGET_ARCHITECTURE STREQUAL "ivy-bridge")
187       list(APPEND _march_flag_list "core-avx-i")
188       list(APPEND _march_flag_list "corei7-avx")
189       list(APPEND _march_flag_list "core2")
190       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx" "rdrnd" "f16c")
191    elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge")
192       list(APPEND _march_flag_list "sandybridge")
193       list(APPEND _march_flag_list "corei7-avx")
194       list(APPEND _march_flag_list "core2")
195       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx")
196    elseif(TARGET_ARCHITECTURE STREQUAL "atom")
197       list(APPEND _march_flag_list "atom")
198       list(APPEND _march_flag_list "core2")
199       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
200    elseif(TARGET_ARCHITECTURE STREQUAL "k8")
201       list(APPEND _march_flag_list "k8")
202       list(APPEND _available_vector_units_list "sse" "sse2")
203    elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3")
204       list(APPEND _march_flag_list "k8-sse3")
205       list(APPEND _march_flag_list "k8")
206       list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
207    elseif(TARGET_ARCHITECTURE STREQUAL "piledriver")
208       list(APPEND _march_flag_list "bdver2")
209       list(APPEND _march_flag_list "bdver1")
210       list(APPEND _march_flag_list "bulldozer")
211       list(APPEND _march_flag_list "barcelona")
212       list(APPEND _march_flag_list "core2")
213       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4" "fma" "f16c")
214    elseif(TARGET_ARCHITECTURE STREQUAL "interlagos")
215       list(APPEND _march_flag_list "bdver1")
216       list(APPEND _march_flag_list "bulldozer")
217       list(APPEND _march_flag_list "barcelona")
218       list(APPEND _march_flag_list "core2")
219       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
220    elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer")
221       list(APPEND _march_flag_list "bdver1")
222       list(APPEND _march_flag_list "bulldozer")
223       list(APPEND _march_flag_list "barcelona")
224       list(APPEND _march_flag_list "core2")
225       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
226    elseif(TARGET_ARCHITECTURE STREQUAL "barcelona")
227       list(APPEND _march_flag_list "barcelona")
228       list(APPEND _march_flag_list "core2")
229       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
230    elseif(TARGET_ARCHITECTURE STREQUAL "istanbul")
231       list(APPEND _march_flag_list "barcelona")
232       list(APPEND _march_flag_list "core2")
233       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
234    elseif(TARGET_ARCHITECTURE STREQUAL "magny-cours")
235       list(APPEND _march_flag_list "barcelona")
236       list(APPEND _march_flag_list "core2")
237       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
238    elseif(TARGET_ARCHITECTURE STREQUAL "generic")
239       list(APPEND _march_flag_list "generic")
240    elseif(TARGET_ARCHITECTURE STREQUAL "none")
241       # add this clause to remove it from the else clause
242    else(TARGET_ARCHITECTURE STREQUAL "core")
243       message(FATAL_ERROR "Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.")
244    endif(TARGET_ARCHITECTURE STREQUAL "core")
245
246    if(NOT TARGET_ARCHITECTURE STREQUAL "none")
247       set(_disable_vector_unit_list)
248       set(_enable_vector_unit_list)
249       _my_find(_available_vector_units_list "sse2" SSE2_FOUND)
250       _my_find(_available_vector_units_list "sse3" SSE3_FOUND)
251       _my_find(_available_vector_units_list "ssse3" SSSE3_FOUND)
252       _my_find(_available_vector_units_list "sse4.1" SSE4_1_FOUND)
253       _my_find(_available_vector_units_list "sse4.2" SSE4_2_FOUND)
254       _my_find(_available_vector_units_list "sse4a" SSE4a_FOUND)
255       if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN)
256          UserWarning("AVX disabled per default because of old/broken compiler")
257          set(AVX_FOUND false)
258          set(XOP_FOUND false)
259          set(FMA4_FOUND false)
260       else()
261          _my_find(_available_vector_units_list "avx" AVX_FOUND)
262          if(DEFINED Vc_FMA4_INTRINSICS_BROKEN AND Vc_FMA4_INTRINSICS_BROKEN)
263             UserWarning("FMA4 disabled per default because of old/broken compiler")
264             set(FMA4_FOUND false)
265          else()
266             _my_find(_available_vector_units_list "fma4" FMA4_FOUND)
267          endif()
268          if(DEFINED Vc_XOP_INTRINSICS_BROKEN AND Vc_XOP_INTRINSICS_BROKEN)
269             UserWarning("XOP disabled per default because of old/broken compiler")
270             set(XOP_FOUND false)
271          else()
272             _my_find(_available_vector_units_list "xop" XOP_FOUND)
273          endif()
274       endif()
275       set(USE_SSE2   ${SSE2_FOUND}   CACHE BOOL "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." ${_force})
276       set(USE_SSE3   ${SSE3_FOUND}   CACHE BOOL "Use SSE3. If SSE3 instructions are not enabled they will be emulated." ${_force})
277       set(USE_SSSE3  ${SSSE3_FOUND}  CACHE BOOL "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." ${_force})
278       set(USE_SSE4_1 ${SSE4_1_FOUND} CACHE BOOL "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." ${_force})
279       set(USE_SSE4_2 ${SSE4_2_FOUND} CACHE BOOL "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." ${_force})
280       set(USE_SSE4a  ${SSE4a_FOUND}  CACHE BOOL "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." ${_force})
281       set(USE_AVX    ${AVX_FOUND}    CACHE BOOL "Use AVX. This will double some of the vector sizes relative to SSE." ${_force})
282       set(USE_XOP    ${XOP_FOUND}    CACHE BOOL "Use XOP." ${_force})
283       set(USE_FMA4   ${FMA4_FOUND}   CACHE BOOL "Use FMA4." ${_force})
284       mark_as_advanced(USE_SSE2 USE_SSE3 USE_SSSE3 USE_SSE4_1 USE_SSE4_2 USE_SSE4a USE_AVX USE_XOP USE_FMA4)
285       if(USE_SSE2)
286          list(APPEND _enable_vector_unit_list "sse2")
287       else(USE_SSE2)
288          list(APPEND _disable_vector_unit_list "sse2")
289       endif(USE_SSE2)
290       if(USE_SSE3)
291          list(APPEND _enable_vector_unit_list "sse3")
292       else(USE_SSE3)
293          list(APPEND _disable_vector_unit_list "sse3")
294       endif(USE_SSE3)
295       if(USE_SSSE3)
296          list(APPEND _enable_vector_unit_list "ssse3")
297       else(USE_SSSE3)
298          list(APPEND _disable_vector_unit_list "ssse3")
299       endif(USE_SSSE3)
300       if(USE_SSE4_1)
301          list(APPEND _enable_vector_unit_list "sse4.1")
302       else(USE_SSE4_1)
303          list(APPEND _disable_vector_unit_list "sse4.1")
304       endif(USE_SSE4_1)
305       if(USE_SSE4_2)
306          list(APPEND _enable_vector_unit_list "sse4.2")
307       else(USE_SSE4_2)
308          list(APPEND _disable_vector_unit_list "sse4.2")
309       endif(USE_SSE4_2)
310       if(USE_SSE4a)
311          list(APPEND _enable_vector_unit_list "sse4a")
312       else(USE_SSE4a)
313          list(APPEND _disable_vector_unit_list "sse4a")
314       endif(USE_SSE4a)
315       if(USE_AVX)
316          list(APPEND _enable_vector_unit_list "avx")
317          # we want SSE intrinsics to result in instructions using the VEX prefix.
318          # Otherwise integer ops (which require the older SSE intrinsics) would
319          # always have a large penalty.
320          list(APPEND _enable_vector_unit_list "sse2avx")
321       else(USE_AVX)
322          list(APPEND _disable_vector_unit_list "avx")
323       endif(USE_AVX)
324       if(USE_XOP)
325          list(APPEND _enable_vector_unit_list "xop")
326       else()
327          list(APPEND _disable_vector_unit_list "xop")
328       endif()
329       if(USE_FMA4)
330          list(APPEND _enable_vector_unit_list "fma4")
331       else()
332          list(APPEND _disable_vector_unit_list "fma4")
333       endif()
334       if(MSVC)
335          # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX)
336          # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010)
337          _my_find(_enable_vector_unit_list "avx" _avx)
338          set(_avx_flag FALSE)
339          if(_avx)
340             AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _avx_flag)
341          endif()
342          if(NOT _avx_flag)
343             _my_find(_enable_vector_unit_list "sse2" _found)
344             if(_found)
345                AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
346             endif()
347          endif()
348          foreach(_flag ${_enable_vector_unit_list})
349             string(TOUPPER "${_flag}" _flag)
350             string(REPLACE "." "_" _flag "__${_flag}__")
351             add_definitions("-D${_flag}")
352          endforeach(_flag)
353       elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux)
354          _my_find(_available_vector_units_list "avx2"    _found)
355          if(_found)
356             AddCompilerFlag("-xCORE-AVX2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
357          else(_found)
358             _my_find(_available_vector_units_list "f16c"    _found)
359             if(_found)
360                AddCompilerFlag("-xCORE-AVX-I" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
361             else(_found)
362                _my_find(_available_vector_units_list "avx"    _found)
363                if(_found)
364                   AddCompilerFlag("-xAVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
365                else(_found)
366                   _my_find(_available_vector_units_list "sse4.2" _found)
367                   if(_found)
368                      AddCompilerFlag("-xSSE4.2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
369                   else(_found)
370                      _my_find(_available_vector_units_list "sse4.1" _found)
371                      if(_found)
372                         AddCompilerFlag("-xSSE4.1" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
373                      else(_found)
374                         _my_find(_available_vector_units_list "ssse3"  _found)
375                         if(_found)
376                            AddCompilerFlag("-xSSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
377                         else(_found)
378                            _my_find(_available_vector_units_list "sse3"   _found)
379                            if(_found)
380                               # If the target host is an AMD machine then we still want to use -xSSE2 because the binary would refuse to run at all otherwise
381                               _my_find(_march_flag_list "barcelona" _found)
382                               if(NOT _found)
383                                  _my_find(_march_flag_list "k8-sse3" _found)
384                               endif(NOT _found)
385                               if(_found)
386                                  AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
387                               else(_found)
388                                  AddCompilerFlag("-xSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
389                               endif(_found)
390                            else(_found)
391                               _my_find(_available_vector_units_list "sse2"   _found)
392                               if(_found)
393                                  AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
394                               endif(_found)
395                            endif(_found)
396                         endif(_found)
397                      endif(_found)
398                   endif(_found)
399                endif(_found)
400             endif(_found)
401          endif(_found)
402       else() # not MSVC and not ICC => GCC, Clang, Open64
403          foreach(_flag ${_march_flag_list})
404             AddCompilerFlag("-march=${_flag}" CXX_RESULT _good CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
405             if(_good)
406                break()
407             endif(_good)
408          endforeach(_flag)
409          foreach(_flag ${_enable_vector_unit_list})
410             AddCompilerFlag("-m${_flag}" CXX_RESULT _result)
411             if(_result)
412                set(_header FALSE)
413                if(_flag STREQUAL "sse3")
414                   set(_header "pmmintrin.h")
415                elseif(_flag STREQUAL "ssse3")
416                   set(_header "tmmintrin.h")
417                elseif(_flag STREQUAL "sse4.1")
418                   set(_header "smmintrin.h")
419                elseif(_flag STREQUAL "sse4.2")
420                   set(_header "smmintrin.h")
421                elseif(_flag STREQUAL "sse4a")
422                   set(_header "ammintrin.h")
423                elseif(_flag STREQUAL "avx")
424                   set(_header "immintrin.h")
425                elseif(_flag STREQUAL "fma4")
426                   set(_header "x86intrin.h")
427                elseif(_flag STREQUAL "xop")
428                   set(_header "x86intrin.h")
429                endif()
430                set(_resultVar "HAVE_${_header}")
431                string(REPLACE "." "_" _resultVar "${_resultVar}")
432                if(_header)
433                   CHECK_INCLUDE_FILE("${_header}" ${_resultVar} "-m${_flag}")
434                   if(NOT ${_resultVar})
435                      set(_useVar "USE_${_flag}")
436                      string(TOUPPER "${_useVar}" _useVar)
437                      string(REPLACE "." "_" _useVar "${_useVar}")
438                      message(STATUS "disabling ${_useVar} because ${_header} is missing")
439                      set(${_useVar} FALSE)
440                      list(APPEND _disable_vector_unit_list "${_flag}")
441                   endif()
442                endif()
443                if(NOT _header OR ${_resultVar})
444                   set(Vc_ARCHITECTURE_FLAGS "${Vc_ARCHITECTURE_FLAGS} -m${_flag}")
445                endif()
446             endif()
447          endforeach(_flag)
448          foreach(_flag ${_disable_vector_unit_list})
449             AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
450          endforeach(_flag)
451       endif()
452    endif()
453 endmacro(OptimizeForArchitecture)