]> git.uio.no Git - u/mrichter/AliRoot.git/blob - Vc/cmake/OptimizeForArchitecture.cmake
comments on some sed magic
[u/mrichter/AliRoot.git] / Vc / cmake / OptimizeForArchitecture.cmake
1 #=============================================================================
2 # Copyright 2010-2013 Matthias Kretz <kretz@kde.org>
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #  * Redistributions of source code must retain the above copyright notice,
9 #    this list of conditions and the following disclaimer.
10 #
11 #  * Redistributions in binary form must reproduce the above copyright notice,
12 #    this list of conditions and the following disclaimer in the documentation
13 #    and/or other materials provided with the distribution.
14 #
15 #  * The names of Kitware, Inc., the Insight Consortium, or the names of
16 #    any consortium members, or of any contributors, may not be used to
17 #    endorse or promote products derived from this software without
18 #    specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
24 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #=============================================================================
31
32 get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH)
33 include("${_currentDir}/AddCompilerFlag.cmake")
34 include(CheckIncludeFile)
35
36 macro(_my_find _list _value _ret)
37    list(FIND ${_list} "${_value}" _found)
38    if(_found EQUAL -1)
39       set(${_ret} FALSE)
40    else(_found EQUAL -1)
41       set(${_ret} TRUE)
42    endif(_found EQUAL -1)
43 endmacro(_my_find)
44
45 macro(AutodetectHostArchitecture)
46    set(TARGET_ARCHITECTURE "generic")
47    set(Vc_ARCHITECTURE_FLAGS)
48    set(_vendor_id)
49    set(_cpu_family)
50    set(_cpu_model)
51    if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
52       file(READ "/proc/cpuinfo" _cpuinfo)
53       string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}")
54       string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}")
55       string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}")
56       string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}")
57    elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
58       exec_program("/usr/sbin/sysctl -n machdep.cpu.vendor" OUTPUT_VARIABLE _vendor_id)
59       exec_program("/usr/sbin/sysctl -n machdep.cpu.model"  OUTPUT_VARIABLE _cpu_model)
60       exec_program("/usr/sbin/sysctl -n machdep.cpu.family" OUTPUT_VARIABLE _cpu_family)
61       exec_program("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE _cpu_flags)
62       string(TOLOWER "${_cpu_flags}" _cpu_flags)
63       string(REPLACE "." "_" _cpu_flags "${_cpu_flags}")
64    elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
65       get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE)
66       get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE)
67       mark_as_advanced(_vendor_id _cpu_id)
68       string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}")
69       string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}")
70    endif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
71    if(_vendor_id STREQUAL "GenuineIntel")
72       if(_cpu_family EQUAL 6)
73          # Any recent Intel CPU except NetBurst
74          if(_cpu_model EQUAL 62)
75             set(TARGET_ARCHITECTURE "ivy-bridge")
76          elseif(_cpu_model EQUAL 58)
77             set(TARGET_ARCHITECTURE "ivy-bridge")
78          elseif(_cpu_model EQUAL 47) # Xeon E7 4860
79             set(TARGET_ARCHITECTURE "westmere")
80          elseif(_cpu_model EQUAL 46) # Xeon 7500 series
81             set(TARGET_ARCHITECTURE "westmere")
82          elseif(_cpu_model EQUAL 45) # Xeon TNG
83             set(TARGET_ARCHITECTURE "sandy-bridge")
84          elseif(_cpu_model EQUAL 44) # Xeon 5600 series
85             set(TARGET_ARCHITECTURE "westmere")
86          elseif(_cpu_model EQUAL 42) # Core TNG
87             set(TARGET_ARCHITECTURE "sandy-bridge")
88          elseif(_cpu_model EQUAL 37) # Core i7/i5/i3
89             set(TARGET_ARCHITECTURE "westmere")
90          elseif(_cpu_model EQUAL 31) # Core i7/i5
91             set(TARGET_ARCHITECTURE "westmere")
92          elseif(_cpu_model EQUAL 30) # Core i7/i5
93             set(TARGET_ARCHITECTURE "westmere")
94          elseif(_cpu_model EQUAL 29)
95             set(TARGET_ARCHITECTURE "penryn")
96          elseif(_cpu_model EQUAL 28)
97             set(TARGET_ARCHITECTURE "atom")
98          elseif(_cpu_model EQUAL 26)
99             set(TARGET_ARCHITECTURE "nehalem")
100          elseif(_cpu_model EQUAL 23)
101             set(TARGET_ARCHITECTURE "penryn")
102          elseif(_cpu_model EQUAL 15)
103             set(TARGET_ARCHITECTURE "merom")
104          elseif(_cpu_model EQUAL 14)
105             set(TARGET_ARCHITECTURE "core")
106          elseif(_cpu_model LESS 14)
107             message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the generic CPU settings with SSE2.")
108             set(TARGET_ARCHITECTURE "generic")
109          else()
110             message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.")
111             set(TARGET_ARCHITECTURE "merom")
112          endif()
113       elseif(_cpu_family EQUAL 7) # Itanium (not supported)
114          message(WARNING "Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.")
115       elseif(_cpu_family EQUAL 15) # NetBurst
116          list(APPEND _available_vector_units_list "sse" "sse2")
117          if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead
118             list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
119          endif(_cpu_model GREATER 2)
120       endif(_cpu_family EQUAL 6)
121    elseif(_vendor_id STREQUAL "AuthenticAMD")
122       if(_cpu_family EQUAL 22) # 16h
123          set(TARGET_ARCHITECTURE "AMD 16h")
124       elseif(_cpu_family EQUAL 21) # 15h
125          if(_cpu_model LESS 2)
126             set(TARGET_ARCHITECTURE "bulldozer")
127          else()
128             set(TARGET_ARCHITECTURE "piledriver")
129          endif()
130       elseif(_cpu_family EQUAL 20) # 14h
131          set(TARGET_ARCHITECTURE "AMD 14h")
132       elseif(_cpu_family EQUAL 18) # 12h
133       elseif(_cpu_family EQUAL 16) # 10h
134          set(TARGET_ARCHITECTURE "barcelona")
135       elseif(_cpu_family EQUAL 15)
136          set(TARGET_ARCHITECTURE "k8")
137          if(_cpu_model GREATER 64) # I don't know the right number to put here. This is just a guess from the hardware I have access to
138             set(TARGET_ARCHITECTURE "k8-sse3")
139          endif(_cpu_model GREATER 64)
140       endif()
141    endif(_vendor_id STREQUAL "GenuineIntel")
142 endmacro()
143
144 macro(OptimizeForArchitecture)
145    set(TARGET_ARCHITECTURE "none" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"ivy-bridge\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\", \"AMD 14h\", \"AMD 16h\".")
146    set(_force)
147    if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}")
148       message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"")
149       set(_force FORCE)
150    endif()
151    set(_last_target_arch "${TARGET_ARCHITECTURE}" CACHE STRING "" FORCE)
152    mark_as_advanced(_last_target_arch)
153    string(TOLOWER "${TARGET_ARCHITECTURE}" TARGET_ARCHITECTURE)
154
155    set(_march_flag_list)
156    set(_available_vector_units_list)
157
158    if(TARGET_ARCHITECTURE STREQUAL "auto")
159       AutodetectHostArchitecture()
160       message(STATUS "Detected CPU: ${TARGET_ARCHITECTURE}")
161    endif(TARGET_ARCHITECTURE STREQUAL "auto")
162
163    if(TARGET_ARCHITECTURE STREQUAL "core")
164       list(APPEND _march_flag_list "core2")
165       list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
166    elseif(TARGET_ARCHITECTURE STREQUAL "merom")
167       list(APPEND _march_flag_list "merom")
168       list(APPEND _march_flag_list "core2")
169       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
170    elseif(TARGET_ARCHITECTURE STREQUAL "penryn")
171       list(APPEND _march_flag_list "penryn")
172       list(APPEND _march_flag_list "core2")
173       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
174       message(STATUS "Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.")
175       if(_cpu_flags MATCHES "sse4_1")
176          message(STATUS "SSE4.1: enabled (auto-detected from this computer's CPU flags)")
177          list(APPEND _available_vector_units_list "sse4.1")
178       else()
179          message(STATUS "SSE4.1: disabled (auto-detected from this computer's CPU flags)")
180       endif()
181    elseif(TARGET_ARCHITECTURE STREQUAL "nehalem")
182       list(APPEND _march_flag_list "nehalem")
183       list(APPEND _march_flag_list "corei7")
184       list(APPEND _march_flag_list "core2")
185       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
186    elseif(TARGET_ARCHITECTURE STREQUAL "westmere")
187       list(APPEND _march_flag_list "westmere")
188       list(APPEND _march_flag_list "corei7")
189       list(APPEND _march_flag_list "core2")
190       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
191    elseif(TARGET_ARCHITECTURE STREQUAL "ivy-bridge")
192       list(APPEND _march_flag_list "core-avx-i")
193       list(APPEND _march_flag_list "corei7-avx")
194       list(APPEND _march_flag_list "core2")
195       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx" "rdrnd" "f16c")
196    elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge")
197       list(APPEND _march_flag_list "sandybridge")
198       list(APPEND _march_flag_list "corei7-avx")
199       list(APPEND _march_flag_list "corei7")
200       list(APPEND _march_flag_list "core2")
201       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx")
202    elseif(TARGET_ARCHITECTURE STREQUAL "atom")
203       list(APPEND _march_flag_list "atom")
204       list(APPEND _march_flag_list "core2")
205       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3")
206    elseif(TARGET_ARCHITECTURE STREQUAL "k8")
207       list(APPEND _march_flag_list "k8")
208       list(APPEND _available_vector_units_list "sse" "sse2")
209    elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3")
210       list(APPEND _march_flag_list "k8-sse3")
211       list(APPEND _march_flag_list "k8")
212       list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
213    elseif(TARGET_ARCHITECTURE STREQUAL "AMD 16h")
214       list(APPEND _march_flag_list "btver2")
215       list(APPEND _march_flag_list "btver1")
216       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "f16c")
217    elseif(TARGET_ARCHITECTURE STREQUAL "AMD 14h")
218       list(APPEND _march_flag_list "btver1")
219       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a")
220    elseif(TARGET_ARCHITECTURE STREQUAL "piledriver")
221       list(APPEND _march_flag_list "bdver2")
222       list(APPEND _march_flag_list "bdver1")
223       list(APPEND _march_flag_list "bulldozer")
224       list(APPEND _march_flag_list "barcelona")
225       list(APPEND _march_flag_list "core2")
226       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4" "fma" "f16c")
227    elseif(TARGET_ARCHITECTURE STREQUAL "interlagos")
228       list(APPEND _march_flag_list "bdver1")
229       list(APPEND _march_flag_list "bulldozer")
230       list(APPEND _march_flag_list "barcelona")
231       list(APPEND _march_flag_list "core2")
232       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
233    elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer")
234       list(APPEND _march_flag_list "bdver1")
235       list(APPEND _march_flag_list "bulldozer")
236       list(APPEND _march_flag_list "barcelona")
237       list(APPEND _march_flag_list "core2")
238       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
239    elseif(TARGET_ARCHITECTURE STREQUAL "barcelona")
240       list(APPEND _march_flag_list "barcelona")
241       list(APPEND _march_flag_list "core2")
242       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
243    elseif(TARGET_ARCHITECTURE STREQUAL "istanbul")
244       list(APPEND _march_flag_list "barcelona")
245       list(APPEND _march_flag_list "core2")
246       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
247    elseif(TARGET_ARCHITECTURE STREQUAL "magny-cours")
248       list(APPEND _march_flag_list "barcelona")
249       list(APPEND _march_flag_list "core2")
250       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
251    elseif(TARGET_ARCHITECTURE STREQUAL "generic")
252       list(APPEND _march_flag_list "generic")
253    elseif(TARGET_ARCHITECTURE STREQUAL "none")
254       # add this clause to remove it from the else clause
255    else(TARGET_ARCHITECTURE STREQUAL "core")
256       message(FATAL_ERROR "Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.")
257    endif(TARGET_ARCHITECTURE STREQUAL "core")
258
259    if(NOT TARGET_ARCHITECTURE STREQUAL "none")
260       set(_disable_vector_unit_list)
261       set(_enable_vector_unit_list)
262       _my_find(_available_vector_units_list "sse2" SSE2_FOUND)
263       _my_find(_available_vector_units_list "sse3" SSE3_FOUND)
264       _my_find(_available_vector_units_list "ssse3" SSSE3_FOUND)
265       _my_find(_available_vector_units_list "sse4.1" SSE4_1_FOUND)
266       _my_find(_available_vector_units_list "sse4.2" SSE4_2_FOUND)
267       _my_find(_available_vector_units_list "sse4a" SSE4a_FOUND)
268       if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN)
269          UserWarning("AVX disabled per default because of old/broken compiler")
270          set(AVX_FOUND false)
271          set(XOP_FOUND false)
272          set(FMA4_FOUND false)
273       else()
274          _my_find(_available_vector_units_list "avx" AVX_FOUND)
275          if(DEFINED Vc_FMA4_INTRINSICS_BROKEN AND Vc_FMA4_INTRINSICS_BROKEN)
276             UserWarning("FMA4 disabled per default because of old/broken compiler")
277             set(FMA4_FOUND false)
278          else()
279             _my_find(_available_vector_units_list "fma4" FMA4_FOUND)
280          endif()
281          if(DEFINED Vc_XOP_INTRINSICS_BROKEN AND Vc_XOP_INTRINSICS_BROKEN)
282             UserWarning("XOP disabled per default because of old/broken compiler")
283             set(XOP_FOUND false)
284          else()
285             _my_find(_available_vector_units_list "xop" XOP_FOUND)
286          endif()
287       endif()
288       set(USE_SSE2   ${SSE2_FOUND}   CACHE BOOL "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." ${_force})
289       set(USE_SSE3   ${SSE3_FOUND}   CACHE BOOL "Use SSE3. If SSE3 instructions are not enabled they will be emulated." ${_force})
290       set(USE_SSSE3  ${SSSE3_FOUND}  CACHE BOOL "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." ${_force})
291       set(USE_SSE4_1 ${SSE4_1_FOUND} CACHE BOOL "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." ${_force})
292       set(USE_SSE4_2 ${SSE4_2_FOUND} CACHE BOOL "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." ${_force})
293       set(USE_SSE4a  ${SSE4a_FOUND}  CACHE BOOL "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." ${_force})
294       set(USE_AVX    ${AVX_FOUND}    CACHE BOOL "Use AVX. This will double some of the vector sizes relative to SSE." ${_force})
295       set(USE_XOP    ${XOP_FOUND}    CACHE BOOL "Use XOP." ${_force})
296       set(USE_FMA4   ${FMA4_FOUND}   CACHE BOOL "Use FMA4." ${_force})
297       mark_as_advanced(USE_SSE2 USE_SSE3 USE_SSSE3 USE_SSE4_1 USE_SSE4_2 USE_SSE4a USE_AVX USE_XOP USE_FMA4)
298       if(USE_SSE2)
299          list(APPEND _enable_vector_unit_list "sse2")
300       else(USE_SSE2)
301          list(APPEND _disable_vector_unit_list "sse2")
302       endif(USE_SSE2)
303       if(USE_SSE3)
304          list(APPEND _enable_vector_unit_list "sse3")
305       else(USE_SSE3)
306          list(APPEND _disable_vector_unit_list "sse3")
307       endif(USE_SSE3)
308       if(USE_SSSE3)
309          list(APPEND _enable_vector_unit_list "ssse3")
310       else(USE_SSSE3)
311          list(APPEND _disable_vector_unit_list "ssse3")
312       endif(USE_SSSE3)
313       if(USE_SSE4_1)
314          list(APPEND _enable_vector_unit_list "sse4.1")
315       else(USE_SSE4_1)
316          list(APPEND _disable_vector_unit_list "sse4.1")
317       endif(USE_SSE4_1)
318       if(USE_SSE4_2)
319          list(APPEND _enable_vector_unit_list "sse4.2")
320       else(USE_SSE4_2)
321          list(APPEND _disable_vector_unit_list "sse4.2")
322       endif(USE_SSE4_2)
323       if(USE_SSE4a)
324          list(APPEND _enable_vector_unit_list "sse4a")
325       else(USE_SSE4a)
326          list(APPEND _disable_vector_unit_list "sse4a")
327       endif(USE_SSE4a)
328       if(USE_AVX)
329          list(APPEND _enable_vector_unit_list "avx")
330          # we want SSE intrinsics to result in instructions using the VEX prefix.
331          # Otherwise integer ops (which require the older SSE intrinsics) would
332          # always have a large penalty.
333          list(APPEND _enable_vector_unit_list "sse2avx")
334       else(USE_AVX)
335          list(APPEND _disable_vector_unit_list "avx")
336       endif(USE_AVX)
337       if(USE_XOP)
338          list(APPEND _enable_vector_unit_list "xop")
339       else()
340          list(APPEND _disable_vector_unit_list "xop")
341       endif()
342       if(USE_FMA4)
343          list(APPEND _enable_vector_unit_list "fma4")
344       else()
345          list(APPEND _disable_vector_unit_list "fma4")
346       endif()
347       if(MSVC)
348          # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX)
349          # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010)
350          _my_find(_enable_vector_unit_list "avx" _avx)
351          set(_avx_flag FALSE)
352          if(_avx)
353             AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _avx_flag)
354          endif()
355          if(NOT _avx_flag)
356             _my_find(_enable_vector_unit_list "sse2" _found)
357             if(_found)
358                AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
359             endif()
360          endif()
361          foreach(_flag ${_enable_vector_unit_list})
362             string(TOUPPER "${_flag}" _flag)
363             string(REPLACE "." "_" _flag "__${_flag}__")
364             add_definitions("-D${_flag}")
365          endforeach(_flag)
366       elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux)
367          _my_find(_available_vector_units_list "avx2"    _found)
368          if(_found)
369             AddCompilerFlag("-xCORE-AVX2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
370          else(_found)
371             _my_find(_available_vector_units_list "f16c"    _found)
372             if(_found)
373                AddCompilerFlag("-xCORE-AVX-I" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
374             else(_found)
375                _my_find(_available_vector_units_list "avx"    _found)
376                if(_found)
377                   AddCompilerFlag("-xAVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
378                else(_found)
379                   _my_find(_available_vector_units_list "sse4.2" _found)
380                   if(_found)
381                      AddCompilerFlag("-xSSE4.2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
382                   else(_found)
383                      _my_find(_available_vector_units_list "sse4.1" _found)
384                      if(_found)
385                         AddCompilerFlag("-xSSE4.1" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
386                      else(_found)
387                         _my_find(_available_vector_units_list "ssse3"  _found)
388                         if(_found)
389                            AddCompilerFlag("-xSSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
390                         else(_found)
391                            _my_find(_available_vector_units_list "sse3"   _found)
392                            if(_found)
393                               # If the target host is an AMD machine then we still want to use -xSSE2 because the binary would refuse to run at all otherwise
394                               _my_find(_march_flag_list "barcelona" _found)
395                               if(NOT _found)
396                                  _my_find(_march_flag_list "k8-sse3" _found)
397                               endif(NOT _found)
398                               if(_found)
399                                  AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
400                               else(_found)
401                                  AddCompilerFlag("-xSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
402                               endif(_found)
403                            else(_found)
404                               _my_find(_available_vector_units_list "sse2"   _found)
405                               if(_found)
406                                  AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
407                               endif(_found)
408                            endif(_found)
409                         endif(_found)
410                      endif(_found)
411                   endif(_found)
412                endif(_found)
413             endif(_found)
414          endif(_found)
415       else() # not MSVC and not ICC => GCC, Clang, Open64
416          foreach(_flag ${_march_flag_list})
417             AddCompilerFlag("-march=${_flag}" CXX_RESULT _good CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
418             if(_good)
419                break()
420             endif(_good)
421          endforeach(_flag)
422          foreach(_flag ${_enable_vector_unit_list})
423             AddCompilerFlag("-m${_flag}" CXX_RESULT _result)
424             if(_result)
425                set(_header FALSE)
426                if(_flag STREQUAL "sse3")
427                   set(_header "pmmintrin.h")
428                elseif(_flag STREQUAL "ssse3")
429                   set(_header "tmmintrin.h")
430                elseif(_flag STREQUAL "sse4.1")
431                   set(_header "smmintrin.h")
432                elseif(_flag STREQUAL "sse4.2")
433                   set(_header "smmintrin.h")
434                elseif(_flag STREQUAL "sse4a")
435                   set(_header "ammintrin.h")
436                elseif(_flag STREQUAL "avx")
437                   set(_header "immintrin.h")
438                elseif(_flag STREQUAL "fma4")
439                   set(_header "x86intrin.h")
440                elseif(_flag STREQUAL "xop")
441                   set(_header "x86intrin.h")
442                endif()
443                set(_resultVar "HAVE_${_header}")
444                string(REPLACE "." "_" _resultVar "${_resultVar}")
445                if(_header)
446                   CHECK_INCLUDE_FILE("${_header}" ${_resultVar} "-m${_flag}")
447                   if(NOT ${_resultVar})
448                      set(_useVar "USE_${_flag}")
449                      string(TOUPPER "${_useVar}" _useVar)
450                      string(REPLACE "." "_" _useVar "${_useVar}")
451                      message(STATUS "disabling ${_useVar} because ${_header} is missing")
452                      set(${_useVar} FALSE)
453                      list(APPEND _disable_vector_unit_list "${_flag}")
454                   endif()
455                endif()
456                if(NOT _header OR ${_resultVar})
457                   set(Vc_ARCHITECTURE_FLAGS "${Vc_ARCHITECTURE_FLAGS} -m${_flag}")
458                endif()
459             endif()
460          endforeach(_flag)
461          foreach(_flag ${_disable_vector_unit_list})
462             AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
463          endforeach(_flag)
464       endif()
465    endif()
466 endmacro(OptimizeForArchitecture)