]> git.uio.no Git - u/mrichter/AliRoot.git/blobdiff - Vc/cmake/OptimizeForArchitecture.cmake
- support compilation with GCC 4.1 and 4.2
[u/mrichter/AliRoot.git] / Vc / cmake / OptimizeForArchitecture.cmake
index 01f435caf2306e9d8681f06bbcd909941acc9f99..81942830473e8a29b43e2e4f99ffc27d3b36e7d5 100644 (file)
@@ -97,7 +97,7 @@ macro(AutodetectHostArchitecture)
 endmacro()
 
 macro(OptimizeForArchitecture)
-   set(TARGET_ARCHITECTURE "auto" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\".")
+   set(TARGET_ARCHITECTURE "none" CACHE STRING "CPU architecture to optimize for. Using an incorrect setting here can result in crashes of the resulting binary because of invalid instructions used.\nSetting the value to \"auto\" will try to optimize for the architecture where cmake is called.\nOther supported values are: \"none\", \"generic\", \"core\", \"merom\" (65nm Core2), \"penryn\" (45nm Core2), \"nehalem\", \"westmere\", \"sandy-bridge\", \"atom\", \"k8\", \"k8-sse3\", \"barcelona\", \"istanbul\", \"magny-cours\", \"bulldozer\", \"interlagos\".")
    set(_force)
    if(NOT _last_target_arch STREQUAL "${TARGET_ARCHITECTURE}")
       message(STATUS "target changed from \"${_last_target_arch}\" to \"${TARGET_ARCHITECTURE}\"")
@@ -183,157 +183,166 @@ macro(OptimizeForArchitecture)
       list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a")
    elseif(TARGET_ARCHITECTURE STREQUAL "generic")
       list(APPEND _march_flag_list "generic")
+   elseif(TARGET_ARCHITECTURE STREQUAL "none")
+      # add this clause to remove it from the else clause
    else(TARGET_ARCHITECTURE STREQUAL "core")
       message(FATAL_ERROR "Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.")
    endif(TARGET_ARCHITECTURE STREQUAL "core")
 
-   set(_disable_vector_unit_list)
-   set(_enable_vector_unit_list)
-   _my_find(_available_vector_units_list "sse2" SSE2_FOUND)
-   _my_find(_available_vector_units_list "sse3" SSE3_FOUND)
-   _my_find(_available_vector_units_list "ssse3" SSSE3_FOUND)
-   _my_find(_available_vector_units_list "sse4.1" SSE4_1_FOUND)
-   _my_find(_available_vector_units_list "sse4.2" SSE4_2_FOUND)
-   _my_find(_available_vector_units_list "sse4a" SSE4a_FOUND)
-   if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN)
-      UserWarning("AVX disabled per default because of old/broken compiler")
-      set(AVX_FOUND false)
-      set(XOP_FOUND false)
-      set(FMA4_FOUND false)
-   else()
-      _my_find(_available_vector_units_list "avx" AVX_FOUND)
-      _my_find(_available_vector_units_list "xop" XOP_FOUND)
-      _my_find(_available_vector_units_list "fma4" FMA4_FOUND)
-   endif()
-   set(USE_SSE2   ${SSE2_FOUND}   CACHE BOOL "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." ${_force})
-   set(USE_SSE3   ${SSE3_FOUND}   CACHE BOOL "Use SSE3. If SSE3 instructions are not enabled they will be emulated." ${_force})
-   set(USE_SSSE3  ${SSSE3_FOUND}  CACHE BOOL "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." ${_force})
-   set(USE_SSE4_1 ${SSE4_1_FOUND} CACHE BOOL "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." ${_force})
-   set(USE_SSE4_2 ${SSE4_2_FOUND} CACHE BOOL "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." ${_force})
-   set(USE_SSE4a  ${SSE4a_FOUND}  CACHE BOOL "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." ${_force})
-   set(USE_AVX    ${AVX_FOUND}    CACHE BOOL "Use AVX. This will double some of the vector sizes relative to SSE." ${_force})
-   set(USE_XOP    ${XOP_FOUND}    CACHE BOOL "Use XOP." ${_force})
-   set(USE_FMA4   ${FMA4_FOUND}   CACHE BOOL "Use FMA4." ${_force})
-   mark_as_advanced(USE_SSE2 USE_SSE3 USE_SSSE3 USE_SSE4_1 USE_SSE4_2 USE_SSE4a USE_AVX USE_XOP USE_FMA4)
-   if(USE_SSE2)
-      list(APPEND _enable_vector_unit_list "sse2")
-   else(USE_SSE2)
-      list(APPEND _disable_vector_unit_list "sse2")
-   endif(USE_SSE2)
-   if(USE_SSE3)
-      list(APPEND _enable_vector_unit_list "sse3")
-   else(USE_SSE3)
-      list(APPEND _disable_vector_unit_list "sse3")
-   endif(USE_SSE3)
-   if(USE_SSSE3)
-      list(APPEND _enable_vector_unit_list "ssse3")
-   else(USE_SSSE3)
-      list(APPEND _disable_vector_unit_list "ssse3")
-   endif(USE_SSSE3)
-   if(USE_SSE4_1)
-      list(APPEND _enable_vector_unit_list "sse4.1")
-   else(USE_SSE4_1)
-      list(APPEND _disable_vector_unit_list "sse4.1")
-   endif(USE_SSE4_1)
-   if(USE_SSE4_2)
-      list(APPEND _enable_vector_unit_list "sse4.2")
-   else(USE_SSE4_2)
-      list(APPEND _disable_vector_unit_list "sse4.2")
-   endif(USE_SSE4_2)
-   if(USE_SSE4a)
-      list(APPEND _enable_vector_unit_list "sse4a")
-   else(USE_SSE4a)
-      list(APPEND _disable_vector_unit_list "sse4a")
-   endif(USE_SSE4a)
-   if(USE_AVX)
-      list(APPEND _enable_vector_unit_list "avx")
-      # we want SSE intrinsics to result in instructions using the VEX prefix.
-      # Otherwise integer ops (which require the older SSE intrinsics) would
-      # always have a large penalty.
-      list(APPEND _enable_vector_unit_list "sse2avx")
-   else(USE_AVX)
-      list(APPEND _disable_vector_unit_list "avx")
-   endif(USE_AVX)
-   if(USE_XOP)
-      list(APPEND _enable_vector_unit_list "xop")
-   else()
-      list(APPEND _disable_vector_unit_list "xop")
-   endif()
-   if(USE_FMA4)
-      list(APPEND _enable_vector_unit_list "fma4")
-   else()
-      list(APPEND _disable_vector_unit_list "fma4")
-   endif()
-   if(MSVC)
-      # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX)
-      # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010)
-      _my_find(_enable_vector_unit_list "avx" _avx)
-      set(_avx_flag FALSE)
-      if(_avx)
-         AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _avx_flag)
-      endif()
-      if(NOT _avx_flag)
-         _my_find(_enable_vector_unit_list "sse2" _found)
-         if(_found)
-            AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+   if(NOT TARGET_ARCHITECTURE STREQUAL "none")
+      set(_disable_vector_unit_list)
+      set(_enable_vector_unit_list)
+      _my_find(_available_vector_units_list "sse2" SSE2_FOUND)
+      _my_find(_available_vector_units_list "sse3" SSE3_FOUND)
+      _my_find(_available_vector_units_list "ssse3" SSSE3_FOUND)
+      _my_find(_available_vector_units_list "sse4.1" SSE4_1_FOUND)
+      _my_find(_available_vector_units_list "sse4.2" SSE4_2_FOUND)
+      _my_find(_available_vector_units_list "sse4a" SSE4a_FOUND)
+      if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN)
+         UserWarning("AVX disabled per default because of old/broken compiler")
+         set(AVX_FOUND false)
+         set(XOP_FOUND false)
+         set(FMA4_FOUND false)
+      else()
+         _my_find(_available_vector_units_list "avx" AVX_FOUND)
+         _my_find(_available_vector_units_list "fma4" FMA4_FOUND)
+         if(DEFINED Vc_XOP_INTRINSICS_BROKEN AND Vc_XOP_INTRINSICS_BROKEN)
+            UserWarning("XOP disabled per default because of old/broken compiler")
+            set(XOP_FOUND false)
+         else()
+            _my_find(_available_vector_units_list "xop" XOP_FOUND)
          endif()
       endif()
-      foreach(_flag ${_enable_vector_unit_list})
-         string(TOUPPER "${_flag}" _flag)
-         string(REPLACE "." "_" _flag "__${_flag}__")
-         add_definitions("-D${_flag}")
-      endforeach(_flag)
-   elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux)
-      _my_find(_available_vector_units_list "avx"    _found)
-      if(_found)
-         AddCompilerFlag("-xAVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
-      else(_found)
-         _my_find(_available_vector_units_list "sse4.2" _found)
+      set(USE_SSE2   ${SSE2_FOUND}   CACHE BOOL "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." ${_force})
+      set(USE_SSE3   ${SSE3_FOUND}   CACHE BOOL "Use SSE3. If SSE3 instructions are not enabled they will be emulated." ${_force})
+      set(USE_SSSE3  ${SSSE3_FOUND}  CACHE BOOL "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." ${_force})
+      set(USE_SSE4_1 ${SSE4_1_FOUND} CACHE BOOL "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." ${_force})
+      set(USE_SSE4_2 ${SSE4_2_FOUND} CACHE BOOL "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." ${_force})
+      set(USE_SSE4a  ${SSE4a_FOUND}  CACHE BOOL "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." ${_force})
+      set(USE_AVX    ${AVX_FOUND}    CACHE BOOL "Use AVX. This will double some of the vector sizes relative to SSE." ${_force})
+      set(USE_XOP    ${XOP_FOUND}    CACHE BOOL "Use XOP." ${_force})
+      set(USE_FMA4   ${FMA4_FOUND}   CACHE BOOL "Use FMA4." ${_force})
+      mark_as_advanced(USE_SSE2 USE_SSE3 USE_SSSE3 USE_SSE4_1 USE_SSE4_2 USE_SSE4a USE_AVX USE_XOP USE_FMA4)
+      if(USE_SSE2)
+         list(APPEND _enable_vector_unit_list "sse2")
+      else(USE_SSE2)
+         list(APPEND _disable_vector_unit_list "sse2")
+      endif(USE_SSE2)
+      if(USE_SSE3)
+         list(APPEND _enable_vector_unit_list "sse3")
+      else(USE_SSE3)
+         list(APPEND _disable_vector_unit_list "sse3")
+      endif(USE_SSE3)
+      if(USE_SSSE3)
+         list(APPEND _enable_vector_unit_list "ssse3")
+      else(USE_SSSE3)
+         list(APPEND _disable_vector_unit_list "ssse3")
+      endif(USE_SSSE3)
+      if(USE_SSE4_1)
+         list(APPEND _enable_vector_unit_list "sse4.1")
+      else(USE_SSE4_1)
+         list(APPEND _disable_vector_unit_list "sse4.1")
+      endif(USE_SSE4_1)
+      if(USE_SSE4_2)
+         list(APPEND _enable_vector_unit_list "sse4.2")
+      else(USE_SSE4_2)
+         list(APPEND _disable_vector_unit_list "sse4.2")
+      endif(USE_SSE4_2)
+      if(USE_SSE4a)
+         list(APPEND _enable_vector_unit_list "sse4a")
+      else(USE_SSE4a)
+         list(APPEND _disable_vector_unit_list "sse4a")
+      endif(USE_SSE4a)
+      if(USE_AVX)
+         list(APPEND _enable_vector_unit_list "avx")
+         # we want SSE intrinsics to result in instructions using the VEX prefix.
+         # Otherwise integer ops (which require the older SSE intrinsics) would
+         # always have a large penalty.
+         list(APPEND _enable_vector_unit_list "sse2avx")
+      else(USE_AVX)
+         list(APPEND _disable_vector_unit_list "avx")
+      endif(USE_AVX)
+      if(USE_XOP)
+         list(APPEND _enable_vector_unit_list "xop")
+      else()
+         list(APPEND _disable_vector_unit_list "xop")
+      endif()
+      if(USE_FMA4)
+         list(APPEND _enable_vector_unit_list "fma4")
+      else()
+         list(APPEND _disable_vector_unit_list "fma4")
+      endif()
+      if(MSVC)
+         # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX)
+         # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010)
+         _my_find(_enable_vector_unit_list "avx" _avx)
+         set(_avx_flag FALSE)
+         if(_avx)
+            AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _avx_flag)
+         endif()
+         if(NOT _avx_flag)
+            _my_find(_enable_vector_unit_list "sse2" _found)
+            if(_found)
+               AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+            endif()
+         endif()
+         foreach(_flag ${_enable_vector_unit_list})
+            string(TOUPPER "${_flag}" _flag)
+            string(REPLACE "." "_" _flag "__${_flag}__")
+            add_definitions("-D${_flag}")
+         endforeach(_flag)
+      elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux)
+         _my_find(_available_vector_units_list "avx"    _found)
          if(_found)
-            AddCompilerFlag("-xSSE4.2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+            AddCompilerFlag("-xAVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
          else(_found)
-            _my_find(_available_vector_units_list "sse4.1" _found)
+            _my_find(_available_vector_units_list "sse4.2" _found)
             if(_found)
-               AddCompilerFlag("-xSSE4.1" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+               AddCompilerFlag("-xSSE4.2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
             else(_found)
-               _my_find(_available_vector_units_list "ssse3"  _found)
+               _my_find(_available_vector_units_list "sse4.1" _found)
                if(_found)
-                  AddCompilerFlag("-xSSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                  AddCompilerFlag("-xSSE4.1" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
                else(_found)
-                  _my_find(_available_vector_units_list "sse3"   _found)
+                  _my_find(_available_vector_units_list "ssse3"  _found)
                   if(_found)
-                     # If the target host is an AMD machine then we still want to use -xSSE2 because the binary would refuse to run at all otherwise
-                     _my_find(_march_flag_list "barcelona" _found)
-                     if(NOT _found)
-                        _my_find(_march_flag_list "k8-sse3" _found)
-                     endif(NOT _found)
-                     if(_found)
-                        AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
-                     else(_found)
-                        AddCompilerFlag("-xSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
-                     endif(_found)
+                     AddCompilerFlag("-xSSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
                   else(_found)
-                     _my_find(_available_vector_units_list "sse2"   _found)
+                     _my_find(_available_vector_units_list "sse3"   _found)
                      if(_found)
-                        AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                        # If the target host is an AMD machine then we still want to use -xSSE2 because the binary would refuse to run at all otherwise
+                        _my_find(_march_flag_list "barcelona" _found)
+                        if(NOT _found)
+                           _my_find(_march_flag_list "k8-sse3" _found)
+                        endif(NOT _found)
+                        if(_found)
+                           AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                        else(_found)
+                           AddCompilerFlag("-xSSE3" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                        endif(_found)
+                     else(_found)
+                        _my_find(_available_vector_units_list "sse2"   _found)
+                        if(_found)
+                           AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+                        endif(_found)
                      endif(_found)
                   endif(_found)
                endif(_found)
             endif(_found)
          endif(_found)
-      endif(_found)
-   else() # not MSVC and not ICC => GCC, Clang, Open64
-      foreach(_flag ${_march_flag_list})
-         AddCompilerFlag("-march=${_flag}" CXX_RESULT _good CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
-         if(_good)
-            break()
-         endif(_good)
-      endforeach(_flag)
-      foreach(_flag ${_enable_vector_unit_list})
-         AddCompilerFlag("-m${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
-      endforeach(_flag)
-      foreach(_flag ${_disable_vector_unit_list})
-         AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
-      endforeach(_flag)
+      else() # not MSVC and not ICC => GCC, Clang, Open64
+         foreach(_flag ${_march_flag_list})
+            AddCompilerFlag("-march=${_flag}" CXX_RESULT _good CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+            if(_good)
+               break()
+            endif(_good)
+         endforeach(_flag)
+         foreach(_flag ${_enable_vector_unit_list})
+            AddCompilerFlag("-m${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+         endforeach(_flag)
+         foreach(_flag ${_disable_vector_unit_list})
+            AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS)
+         endforeach(_flag)
+      endif()
    endif()
 endmacro(OptimizeForArchitecture)