]> git.uio.no Git - u/mrichter/AliRoot.git/blobdiff - Vc/cmake/OptimizeForArchitecture.cmake
added slewing correction by data
[u/mrichter/AliRoot.git] / Vc / cmake / OptimizeForArchitecture.cmake
index 81942830473e8a29b43e2e4f99ffc27d3b36e7d5..246889c4943207b05a52345742afa419d023160e 100644 (file)
@@ -1,5 +1,37 @@
+#=============================================================================
+# Copyright 2010-2013 Matthias Kretz <kretz@kde.org>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#  * Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+#  * The names of Kitware, Inc., the Insight Consortium, or the names of
+#    any consortium members, or of any contributors, may not be used to
+#    endorse or promote products derived from this software without
+#    specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#=============================================================================
+
 get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH)
 include("${_currentDir}/AddCompilerFlag.cmake")
+include(CheckIncludeFile)
 
 macro(_my_find _list _value _ret)
    list(FIND ${_list} "${_value}" _found)
@@ -21,7 +53,7 @@ macro(AutodetectHostArchitecture)
       string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}")
       string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}")
       string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}")
-      string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_flags "${_cpuinfo}")
+      string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}")
    elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
       exec_program("/usr/sbin/sysctl -n machdep.cpu.vendor" OUTPUT_VARIABLE _vendor_id)
       exec_program("/usr/sbin/sysctl -n machdep.cpu.model"  OUTPUT_VARIABLE _cpu_model)
@@ -39,7 +71,11 @@ macro(AutodetectHostArchitecture)
    if(_vendor_id STREQUAL "GenuineIntel")
       if(_cpu_family EQUAL 6)
          # Any recent Intel CPU except NetBurst
-         if(_cpu_model EQUAL 46)     # Xeon 7500 series
+         if(_cpu_model EQUAL 58)
+            set(TARGET_ARCHITECTURE "ivy-bridge")
+         elseif(_cpu_model EQUAL 47) # Xeon E7 4860
+            set(TARGET_ARCHITECTURE "westmere")
+         elseif(_cpu_model EQUAL 46) # Xeon 7500 series
             set(TARGET_ARCHITECTURE "westmere")
          elseif(_cpu_model EQUAL 45) # Xeon TNG
             set(TARGET_ARCHITECTURE "sandy-bridge")
@@ -82,7 +118,11 @@ macro(AutodetectHostArchitecture)
       endif(_cpu_family EQUAL 6)
    elseif(_vendor_id STREQUAL "AuthenticAMD")
       if(_cpu_family EQUAL 21) # 15h
-         set(TARGET_ARCHITECTURE "bulldozer")
+         if(_cpu_model LESS 2)
+            set(TARGET_ARCHITECTURE "bulldozer")
+         else()
+            set(TARGET_ARCHITECTURE "piledriver")
+         endif()
       elseif(_cpu_family EQUAL 20) # 14h
       elseif(_cpu_family EQUAL 18) # 12h
       elseif(_cpu_family EQUAL 16) # 10h
@@ -97,7 +137,7 @@ macro(AutodetectHostArchitecture)
 endmacro()
 
 macro(OptimizeForArchitecture)
-   set(TARGET_ARCHITECTURE "none" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\".")
+   set(TARGET_ARCHITECTURE "none" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"ivy-bridge\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\", \"piledriver\".")
    set(_force)
    if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}")
       message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"")
@@ -143,6 +183,11 @@ macro(OptimizeForArchitecture)
       list(APPEND _march_flag_list "corei7")
       list(APPEND _march_flag_list "core2")
       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2")
+   elseif(TARGET_ARCHITECTURE STREQUAL "ivy-bridge")
+      list(APPEND _march_flag_list "core-avx-i")
+      list(APPEND _march_flag_list "corei7-avx")
+      list(APPEND _march_flag_list "core2")
+      list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx" "rdrnd" "f16c")
    elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge")
       list(APPEND _march_flag_list "sandybridge")
       list(APPEND _march_flag_list "corei7-avx")
@@ -159,12 +204,21 @@ macro(OptimizeForArchitecture)
       list(APPEND _march_flag_list "k8-sse3")
       list(APPEND _march_flag_list "k8")
       list(APPEND _available_vector_units_list "sse" "sse2" "sse3")
+   elseif(TARGET_ARCHITECTURE STREQUAL "piledriver")
+      list(APPEND _march_flag_list "bdver2")
+      list(APPEND _march_flag_list "bdver1")
+      list(APPEND _march_flag_list "bulldozer")
+      list(APPEND _march_flag_list "barcelona")
+      list(APPEND _march_flag_list "core2")
+      list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4" "fma" "f16c")
    elseif(TARGET_ARCHITECTURE STREQUAL "interlagos")
+      list(APPEND _march_flag_list "bdver1")
       list(APPEND _march_flag_list "bulldozer")
       list(APPEND _march_flag_list "barcelona")
       list(APPEND _march_flag_list "core2")
       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4")
    elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer")
+      list(APPEND _march_flag_list "bdver1")
       list(APPEND _march_flag_list "bulldozer")
       list(APPEND _march_flag_list "barcelona")
       list(APPEND _march_flag_list "core2")
@@ -205,7 +259,12 @@ macro(OptimizeForArchitecture)
          set(FMA4_FOUND false)
       else()
          _my_find(_available_vector_units_list "avx" AVX_FOUND)
-         _my_find(_available_vector_units_list "fma4" FMA4_FOUND)
+         if(DEFINED Vc_FMA4_INTRINSICS_BROKEN AND Vc_FMA4_INTRINSICS_BROKEN)
+            UserWarning("FMA4 disabled per default because of old/broken compiler")
+            set(FMA4_FOUND false)
+         else()
+            _my_find(_available_vector_units_list "fma4" FMA4_FOUND)
+         endif()
          if(DEFINED Vc_XOP_INTRINSICS_BROKEN AND Vc_XOP_INTRINSICS_BROKEN)
             UserWarning("XOP disabled per default because of old/broken compiler")
             set(XOP_FOUND false)
@@ -292,38 +351,48 @@ macro(OptimizeForArchitecture)
             add_definitions("-D${_flag}")
          endforeach(_flag)
       elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux)
-         _my_find(_available_vector_units_list "avx"    _found)
+         _my_find(_available_vector_units_list "avx2"    _found)
          if(_found)
-            AddCompilerFlag("-xAVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+            AddCompilerFlag("-xCORE-AVX2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
          else(_found)
-            _my_find(_available_vector_units_list "sse4.2" _found)
+            _my_find(_available_vector_units_list "f16c"    _found)
             if(_found)
-               AddCompilerFlag("-xSSE4.2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+               AddCompilerFlag("-xCORE-AVX-I" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
             else(_found)
-               _my_find(_available_vector_units_list "sse4.1" _found)
+               _my_find(_available_vector_units_list "avx"    _found)
                if(_found)
-                  AddCompilerFlag("-xSSE4.1" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                  AddCompilerFlag("-xAVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
                else(_found)
-                  _my_find(_available_vector_units_list "ssse3"  _found)
+                  _my_find(_available_vector_units_list "sse4.2" _found)
                   if(_found)
-                     AddCompilerFlag("-xSSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                     AddCompilerFlag("-xSSE4.2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
                   else(_found)
-                     _my_find(_available_vector_units_list "sse3"   _found)
+                     _my_find(_available_vector_units_list "sse4.1" _found)
                      if(_found)
-                        # If the target host is an AMD machine then we still want to use -xSSE2 because the binary would refuse to run at all otherwise
-                        _my_find(_march_flag_list "barcelona" _found)
-                        if(NOT _found)
-                           _my_find(_march_flag_list "k8-sse3" _found)
-                        endif(NOT _found)
-                        if(_found)
-                           AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
-                        else(_found)
-                           AddCompilerFlag("-xSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
-                        endif(_found)
+                        AddCompilerFlag("-xSSE4.1" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
                      else(_found)
-                        _my_find(_available_vector_units_list "sse2"   _found)
+                        _my_find(_available_vector_units_list "ssse3"  _found)
                         if(_found)
-                           AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                           AddCompilerFlag("-xSSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                        else(_found)
+                           _my_find(_available_vector_units_list "sse3"   _found)
+                           if(_found)
+                              # If the target host is an AMD machine then we still want to use -xSSE2 because the binary would refuse to run at all otherwise
+                              _my_find(_march_flag_list "barcelona" _found)
+                              if(NOT _found)
+                                 _my_find(_march_flag_list "k8-sse3" _found)
+                              endif(NOT _found)
+                              if(_found)
+                                 AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                              else(_found)
+                                 AddCompilerFlag("-xSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                              endif(_found)
+                           else(_found)
+                              _my_find(_available_vector_units_list "sse2"   _found)
+                              if(_found)
+                                 AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                              endif(_found)
+                           endif(_found)
                         endif(_found)
                      endif(_found)
                   endif(_found)
@@ -338,7 +407,43 @@ macro(OptimizeForArchitecture)
             endif(_good)
          endforeach(_flag)
          foreach(_flag ${_enable_vector_unit_list})
-            AddCompilerFlag("-m${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+            AddCompilerFlag("-m${_flag}" CXX_RESULT _result)
+            if(_result)
+               set(_header FALSE)
+               if(_flag STREQUAL "sse3")
+                  set(_header "pmmintrin.h")
+               elseif(_flag STREQUAL "ssse3")
+                  set(_header "tmmintrin.h")
+               elseif(_flag STREQUAL "sse4.1")
+                  set(_header "smmintrin.h")
+               elseif(_flag STREQUAL "sse4.2")
+                  set(_header "smmintrin.h")
+               elseif(_flag STREQUAL "sse4a")
+                  set(_header "ammintrin.h")
+               elseif(_flag STREQUAL "avx")
+                  set(_header "immintrin.h")
+               elseif(_flag STREQUAL "fma4")
+                  set(_header "x86intrin.h")
+               elseif(_flag STREQUAL "xop")
+                  set(_header "x86intrin.h")
+               endif()
+               set(_resultVar "HAVE_${_header}")
+               string(REPLACE "." "_" _resultVar "${_resultVar}")
+               if(_header)
+                  CHECK_INCLUDE_FILE("${_header}" ${_resultVar} "-m${_flag}")
+                  if(NOT ${_resultVar})
+                     set(_useVar "USE_${_flag}")
+                     string(TOUPPER "${_useVar}" _useVar)
+                     string(REPLACE "." "_" _useVar "${_useVar}")
+                     message(STATUS "disabling ${_useVar} because ${_header} is missing")
+                     set(${_useVar} FALSE)
+                     list(APPEND _disable_vector_unit_list "${_flag}")
+                  endif()
+               endif()
+               if(NOT _header OR ${_resultVar})
+                  set(Vc_ARCHITECTURE_FLAGS "${Vc_ARCHITECTURE_FLAGS} -m${_flag}")
+               endif()
+            endif()
          endforeach(_flag)
          foreach(_flag ${_disable_vector_unit_list})
             AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)