[u/mrichter/AliRoot.git] / Vc / include / Vc / global.h

/*  This file is part of the Vc library.

    Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>

    Vc is free software: you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as
    published by the Free Software Foundation, either version 3 of
    the License, or (at your option) any later version.

    Vc is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with Vc.  If not, see <http://www.gnu.org/licenses/>.

*/

#ifndef VC_GLOBAL_H
#define VC_GLOBAL_H

#ifndef DOXYGEN

// Compiler defines
#ifdef __INTEL_COMPILER
#define VC_ICC __INTEL_COMPILER_BUILD_DATE
#elif defined(__OPENCC__)
#define VC_OPEN64 1
#elif defined(__clang__)
#define VC_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
#elif defined(__GNUC__)
#define VC_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
#elif defined(_MSC_VER)
#define VC_MSVC _MSC_FULL_VER
#else
#define VC_UNSUPPORTED_COMPILER 1
#endif

// Features/Quirks defines
#if defined VC_MSVC && defined _WIN32
// the Win32 ABI can't handle function parameters with alignment >= 16
#define VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN 1
#endif
#if defined(__GNUC__) && !defined(VC_NO_INLINE_ASM)
#define VC_GNU_ASM 1
#endif
#if defined(VC_GCC) && (VC_GCC <= 0x40405 || (VC_GCC >= 0x40500 && VC_GCC <= 0x40502)) && !(VC_GCC == 0x40502 && defined(__GNUC_UBUNTU_VERSION__) && __GNUC_UBUNTU_VERSION__ == 0xb0408)
// GCC 4.6.0 / 4.5.3 / 4.4.6 switched to the interface as defined by ICC
// (Ubuntu 11.04 ships a GCC 4.5.2 with the new interface)
#define VC_MM256_MASKSTORE_WRONG_MASK_TYPE 1
#endif
#if defined(VC_GCC) && VC_GCC >= 0x40300
#define VC_HAVE_ATTRIBUTE_ERROR 1
#define VC_HAVE_ATTRIBUTE_WARNING 1
#endif

#if (defined(__GXX_EXPERIMENTAL_CXX0X__) && VC_GCC >= 0x40600) || __cplusplus >= 201103
#  define VC_CXX11 1
#  ifdef VC_GCC
#    if VC_GCC >= 0x40700 // && VC_GCC < 0x408000)
//     ::max_align_t was introduced with GCC 4.7. std::max_align_t took a bit longer.
#      define VC_HAVE_MAX_ALIGN_T 1
#    endif
#  elif !defined(VC_CLANG)
//   Clang doesn't provide max_align_t at all
#    define VC_HAVE_STD_MAX_ALIGN_T 1
#  endif
#endif

// ICC ships the AVX2 intrinsics inside the AVX1 header.
// FIXME: the number 20120731 is too large, but I don't know which one is the right one
#if (defined(VC_ICC) && VC_ICC >= 20120731) || (defined(VC_MSVC) && VC_MSVC >= 170000000)
#define VC_UNCONDITIONAL_AVX2_INTRINSICS 1
#endif

/* Define the following strings to a unique integer, which is the only type the preprocessor can
 * compare. This allows to use -DVC_IMPL=SSE3. The preprocessor will then consider VC_IMPL and SSE3
 * to be equal. Of course, it is important to undefine the strings later on!
 */
#define Scalar 0x00100000
#define SSE    0x00200000
#define SSE2   0x00300000
#define SSE3   0x00400000
#define SSSE3  0x00500000
#define SSE4_1 0x00600000
#define SSE4_2 0x00700000
#define AVX    0x00800000

#define XOP    0x00000001
#define FMA4   0x00000002
#define F16C   0x00000004
#define POPCNT 0x00000008
#define SSE4a  0x00000010
#define FMA    0x00000020

#define IMPL_MASK 0xFFF00000
#define EXT_MASK  0x000FFFFF

#ifdef VC_MSVC
# ifdef _M_IX86_FP
#  if _M_IX86_FP >= 1
#   ifndef __SSE__
#    define __SSE__ 1
#   endif
#  endif
#  if _M_IX86_FP >= 2
#   ifndef __SSE2__
#    define __SSE2__ 1
#   endif
#  endif
# elif defined(_M_AMD64)
// If the target is x86_64 then SSE2 is guaranteed
#  ifndef __SSE__
#   define __SSE__ 1
#  endif
#  ifndef __SSE2__
#   define __SSE2__ 1
#  endif
# endif
#endif

#ifndef VC_IMPL

#  if defined(__AVX__)
#    define VC_IMPL_AVX 1
#  else
#    if defined(__SSE4_2__)
#      define VC_IMPL_SSE 1
#      define VC_IMPL_SSE4_2 1
#    endif
#    if defined(__SSE4_1__)
#      define VC_IMPL_SSE 1
#      define VC_IMPL_SSE4_1 1
#    endif
#    if defined(__SSE3__)
#      define VC_IMPL_SSE 1
#      define VC_IMPL_SSE3 1
#    endif
#    if defined(__SSSE3__)
#      define VC_IMPL_SSE 1
#      define VC_IMPL_SSSE3 1
#    endif
#    if defined(__SSE2__)
#      define VC_IMPL_SSE 1
#      define VC_IMPL_SSE2 1
#    endif

#    if defined(VC_IMPL_SSE)
       // nothing
#    else
#      define VC_IMPL_Scalar 1
#    endif
#  endif
#  if defined(VC_IMPL_AVX) || defined(VC_IMPL_SSE)
#    ifdef __FMA4__
#      define VC_IMPL_FMA4 1
#    endif
#    ifdef __XOP__
#      define VC_IMPL_XOP 1
#    endif
#    ifdef __F16C__
#      define VC_IMPL_F16C 1
#    endif
#    ifdef __POPCNT__
#      define VC_IMPL_POPCNT 1
#    endif
#    ifdef __SSE4A__
#      define VC_IMPL_SSE4a 1
#    endif
#    ifdef __FMA__
#      define VC_IMPL_FMA 1
#    endif
#  endif

#else // VC_IMPL

#  if (VC_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
#    define VC_IMPL_AVX 1
#  elif (VC_IMPL & IMPL_MASK) == Scalar
#    define VC_IMPL_Scalar 1
#  elif (VC_IMPL & IMPL_MASK) == SSE4_2
#    define VC_IMPL_SSE4_2 1
#    define VC_IMPL_SSE4_1 1
#    define VC_IMPL_SSSE3 1
#    define VC_IMPL_SSE3 1
#    define VC_IMPL_SSE2 1
#    define VC_IMPL_SSE 1
#  elif (VC_IMPL & IMPL_MASK) == SSE4_1
#    define VC_IMPL_SSE4_1 1
#    define VC_IMPL_SSSE3 1
#    define VC_IMPL_SSE3 1
#    define VC_IMPL_SSE2 1
#    define VC_IMPL_SSE 1
#  elif (VC_IMPL & IMPL_MASK) == SSSE3
#    define VC_IMPL_SSSE3 1
#    define VC_IMPL_SSE3 1
#    define VC_IMPL_SSE2 1
#    define VC_IMPL_SSE 1
#  elif (VC_IMPL & IMPL_MASK) == SSE3
#    define VC_IMPL_SSE3 1
#    define VC_IMPL_SSE2 1
#    define VC_IMPL_SSE 1
#  elif (VC_IMPL & IMPL_MASK) == SSE2
#    define VC_IMPL_SSE2 1
#    define VC_IMPL_SSE 1
#  elif (VC_IMPL & IMPL_MASK) == SSE
#    define VC_IMPL_SSE 1
#    if defined(__SSE4_2__)
#      define VC_IMPL_SSE4_2 1
#    endif
#    if defined(__SSE4_1__)
#      define VC_IMPL_SSE4_1 1
#    endif
#    if defined(__SSE3__)
#      define VC_IMPL_SSE3 1
#    endif
#    if defined(__SSSE3__)
#      define VC_IMPL_SSSE3 1
#    endif
#    if defined(__SSE2__)
#      define VC_IMPL_SSE2 1
#    endif
#  elif (VC_IMPL & IMPL_MASK) == 0 && (VC_IMPL & SSE4a)
     // this is for backward compatibility only where SSE4a was included in the main
     // line of available SIMD instruction sets
#    define VC_IMPL_SSE3 1
#    define VC_IMPL_SSE2 1
#    define VC_IMPL_SSE 1
#  endif
#  if (VC_IMPL & XOP)
#    define VC_IMPL_XOP 1
#  endif
#  if (VC_IMPL & FMA4)
#    define VC_IMPL_FMA4 1
#  endif
#  if (VC_IMPL & F16C)
#    define VC_IMPL_F16C 1
#  endif
#  if (VC_IMPL & POPCNT)
#    define VC_IMPL_POPCNT 1
#  endif
#  if (VC_IMPL & SSE4a)
#    define VC_IMPL_SSE4a 1
#  endif
#  if (VC_IMPL & FMA)
#    define VC_IMPL_FMA 1
#  endif
#  undef VC_IMPL

#endif // VC_IMPL

// If AVX is enabled in the compiler it will use VEX coding for the SIMD instructions.
#ifdef __AVX__
#  define VC_USE_VEX_CODING 1
#endif

#if defined(VC_GCC) && VC_GCC < 0x40300 && !defined(VC_IMPL_Scalar)
#    ifndef VC_DONT_WARN_OLD_GCC
#      warning "GCC < 4.3 does not have full support for SSE2 intrinsics. Using scalar types/operations only. Define VC_DONT_WARN_OLD_GCC to silence this warning."
#    endif
#    undef VC_IMPL_SSE
#    undef VC_IMPL_SSE2
#    undef VC_IMPL_SSE3
#    undef VC_IMPL_SSE4_1
#    undef VC_IMPL_SSE4_2
#    undef VC_IMPL_SSSE3
#    undef VC_IMPL_AVX
#    undef VC_IMPL_FMA4
#    undef VC_IMPL_XOP
#    undef VC_IMPL_F16C
#    undef VC_IMPL_POPCNT
#    undef VC_IMPL_SSE4a
#    undef VC_IMPL_FMA
#    undef VC_USE_VEX_CODING
#    define VC_IMPL_Scalar 1
#endif

# if !defined(VC_IMPL_Scalar) && !defined(VC_IMPL_SSE) && !defined(VC_IMPL_AVX)
#  error "No suitable Vc implementation was selected! Probably VC_IMPL was set to an invalid value."
# elif defined(VC_IMPL_SSE) && !defined(VC_IMPL_SSE2)
#  error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
# endif

#undef Scalar
#undef SSE
#undef SSE2
#undef SSE3
#undef SSSE3
#undef SSE4_1
#undef SSE4_2
#undef AVX

#undef XOP
#undef FMA4
#undef F16C
#undef POPCNT
#undef SSE4a
#undef FMA

#undef IMPL_MASK
#undef EXT_MASK

namespace AliRoot {
namespace Vc {
enum AlignedFlag {
    Aligned = 0
};
enum UnalignedFlag {
    Unaligned = 1
};
enum StreamingAndAlignedFlag { // implies Aligned
    Streaming = 2
};
enum StreamingAndUnalignedFlag {
    StreamingAndUnaligned = 3
};
#endif // DOXYGEN

/**
 * \ingroup Utilities
 *
 * Enum that specifies the alignment and padding restrictions to use for memory allocation with
 * Vc::malloc.
 */
enum MallocAlignment {
    /**
     * Align on boundary of vector sizes (e.g. 16 Bytes on SSE platforms) and pad to allow
     * vector access to the end. Thus the allocated memory contains a multiple of
     * VectorAlignment bytes.
     */
    AlignOnVector,
    /**
     * Align on boundary of cache line sizes (e.g. 64 Bytes on x86) and pad to allow
     * full cache line access to the end. Thus the allocated memory contains a multiple of
     * 64 bytes.
     */
    AlignOnCacheline,
    /**
     * Align on boundary of page sizes (e.g. 4096 Bytes on x86) and pad to allow
     * full page access to the end. Thus the allocated memory contains a multiple of
     * 4096 bytes.
     */
    AlignOnPage
};

#if __cplusplus >= 201103 /*C++11*/
#define Vc_CONSTEXPR static constexpr
#elif defined(__GNUC__)
#define Vc_CONSTEXPR static inline __attribute__((__always_inline__, __const__))
#elif defined(VC_MSVC)
#define Vc_CONSTEXPR static inline __forceinline
#else
#define Vc_CONSTEXPR static inline
#endif
Vc_CONSTEXPR StreamingAndUnalignedFlag operator|(UnalignedFlag, StreamingAndAlignedFlag) { return StreamingAndUnaligned; }
Vc_CONSTEXPR StreamingAndUnalignedFlag operator|(StreamingAndAlignedFlag, UnalignedFlag) { return StreamingAndUnaligned; }
Vc_CONSTEXPR StreamingAndUnalignedFlag operator&(UnalignedFlag, StreamingAndAlignedFlag) { return StreamingAndUnaligned; }
Vc_CONSTEXPR StreamingAndUnalignedFlag operator&(StreamingAndAlignedFlag, UnalignedFlag) { return StreamingAndUnaligned; }

Vc_CONSTEXPR StreamingAndAlignedFlag operator|(AlignedFlag, StreamingAndAlignedFlag) { return Streaming; }
Vc_CONSTEXPR StreamingAndAlignedFlag operator|(StreamingAndAlignedFlag, AlignedFlag) { return Streaming; }
Vc_CONSTEXPR StreamingAndAlignedFlag operator&(AlignedFlag, StreamingAndAlignedFlag) { return Streaming; }
Vc_CONSTEXPR StreamingAndAlignedFlag operator&(StreamingAndAlignedFlag, AlignedFlag) { return Streaming; }

/**
 * \ingroup Utilities
 *
 * Enum to identify a certain SIMD instruction set.
 *
 * You can use \ref VC_IMPL for the currently active implementation.
 *
 * \see ExtraInstructions
 */
enum Implementation {
    /// uses only fundamental types
    ScalarImpl,
    /// x86 SSE + SSE2
    SSE2Impl,
    /// x86 SSE + SSE2 + SSE3
    SSE3Impl,
    /// x86 SSE + SSE2 + SSE3 + SSSE3
    SSSE3Impl,
    /// x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
    SSE41Impl,
    /// x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
    SSE42Impl,
    /// x86 AVX
    AVXImpl,
    /// x86 AVX + AVX2
    AVX2Impl,
    ImplementationMask = 0xfff
};

/**
 * \ingroup Utilities
 *
 * The list of available instructions is not easily described by a linear list of instruction sets.
 * On x86 the following instruction sets always include their predecessors:
 * SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2
 *
 * But there are additional instructions that are not necessarily required by this list. These are
 * covered in this enum.
 */
enum ExtraInstructions {
    //! Support for float16 conversions in hardware
    Float16cInstructions  = 0x01000,
    //! Support for FMA4 instructions
    Fma4Instructions      = 0x02000,
    //! Support for XOP instructions
    XopInstructions       = 0x04000,
    //! Support for the population count instruction
    PopcntInstructions    = 0x08000,
    //! Support for SSE4a instructions
    Sse4aInstructions     = 0x10000,
    //! Support for FMA instructions (3 operand variant)
    FmaInstructions       = 0x20000,
    // PclmulqdqInstructions,
    // AesInstructions,
    // RdrandInstructions
    ExtraInstructionsMask = 0xfffff000u
};

#ifndef DOXYGEN

#ifdef VC_IMPL_Scalar
#define VC_IMPL ::AliRoot::Vc::ScalarImpl
#elif defined(VC_IMPL_AVX)
#define VC_IMPL ::AliRoot::Vc::AVXImpl
#elif defined(VC_IMPL_SSE4_2)
#define VC_IMPL ::AliRoot::Vc::SSE42Impl
#elif defined(VC_IMPL_SSE4_1)
#define VC_IMPL ::AliRoot::Vc::SSE41Impl
#elif defined(VC_IMPL_SSSE3)
#define VC_IMPL ::AliRoot::Vc::SSSE3Impl
#elif defined(VC_IMPL_SSE3)
#define VC_IMPL ::AliRoot::Vc::SSE3Impl
#elif defined(VC_IMPL_SSE2)
#define VC_IMPL ::AliRoot::Vc::SSE2Impl
#endif

template<unsigned int Features> struct ImplementationT { enum _Value {
    Value = Features,
    Implementation = Features & Vc::ImplementationMask,
    ExtraInstructions = Features & Vc::ExtraInstructionsMask
}; };

typedef ImplementationT<
#ifdef VC_USE_VEX_CODING
    // everything will use VEX coding, so the system has to support AVX even if VC_IMPL_AVX is not set
    // but AFAIU the OSXSAVE and xgetbv tests do not have to positive (unless, of course, the
    // compiler decides to insert an instruction that uses the full register size - so better be on
    // the safe side)
    AVXImpl
#else
    VC_IMPL
#endif
#ifdef VC_IMPL_SSE4a
    + Vc::Sse4aInstructions
#ifdef VC_IMPL_XOP
    + Vc::XopInstructions
#ifdef VC_IMPL_FMA4
    + Vc::Fma4Instructions
#endif
#endif
#endif
#ifdef VC_IMPL_POPCNT
    + Vc::PopcntInstructions
#endif
#ifdef VC_IMPL_FMA
    + Vc::FmaInstructions
#endif
    > CurrentImplementation;

namespace Internal {
    template<Implementation Impl> struct HelperImpl;
    typedef HelperImpl<VC_IMPL> Helper;

    template<typename A> struct FlagObject;
    template<> struct FlagObject<AlignedFlag> { Vc_CONSTEXPR AlignedFlag the() { return Aligned; } };
    template<> struct FlagObject<UnalignedFlag> { Vc_CONSTEXPR UnalignedFlag the() { return Unaligned; } };
    template<> struct FlagObject<StreamingAndAlignedFlag> { Vc_CONSTEXPR StreamingAndAlignedFlag the() { return Streaming; } };
    template<> struct FlagObject<StreamingAndUnalignedFlag> { Vc_CONSTEXPR StreamingAndUnalignedFlag the() { return StreamingAndUnaligned; } };
} // namespace Internal

namespace Warnings
{
    void _operator_bracket_warning()
#ifdef VC_HAVE_ATTRIBUTE_WARNING
        __attribute__((warning("\n\tUse of Vc::Vector::operator[] to modify scalar entries is known to miscompile with GCC 4.3.x.\n\tPlease upgrade to a more recent GCC or avoid operator[] altogether.\n\t(This warning adds an unnecessary function call to operator[] which should work around the problem at a little extra cost.)")))
#endif
        ;
} // namespace Warnings

namespace Error
{
    template<typename L, typename R> struct invalid_operands_of_types {};
} // namespace Error

#endif // DOXYGEN
} // namespace Vc
} // namespace AliRoot

#undef Vc_CONSTEXPR
#include "version.h"

#endif // VC_GLOBAL_H
Commit	Line	Data
f22341db	1	/* This file is part of the Vc library.
	2
	3	Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
	4
	5	Vc is free software: you can redistribute it and/or modify
	6	it under the terms of the GNU Lesser General Public License as
	7	published by the Free Software Foundation, either version 3 of
	8	the License, or (at your option) any later version.
	9
	10	Vc is distributed in the hope that it will be useful, but
	11	WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	GNU Lesser General Public License for more details.
	14
	15	You should have received a copy of the GNU Lesser General Public
	16	License along with Vc. If not, see <http://www.gnu.org/licenses/>.
	17
	18	*/
	19
	20	#ifndef VC_GLOBAL_H
	21	#define VC_GLOBAL_H
	22
c017a39f	23	#ifndef DOXYGEN
c017a39f	24
f22341db	25	// Compiler defines
	26	#ifdef __INTEL_COMPILER
	27	#define VC_ICC __INTEL_COMPILER_BUILD_DATE
	28	#elif defined(__OPENCC__)
	29	#define VC_OPEN64 1
	30	#elif defined(__clang__)
	31	#define VC_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
	32	#elif defined(__GNUC__)
	33	#define VC_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
	34	#elif defined(_MSC_VER)
	35	#define VC_MSVC _MSC_FULL_VER
	36	#else
	37	#define VC_UNSUPPORTED_COMPILER 1
	38	#endif
	39
	40	// Features/Quirks defines
	41	#if defined VC_MSVC && defined _WIN32
	42	// the Win32 ABI can't handle function parameters with alignment >= 16
	43	#define VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN 1
	44	#endif
	45	#if defined(__GNUC__) && !defined(VC_NO_INLINE_ASM)
	46	#define VC_GNU_ASM 1
	47	#endif
	48	#if defined(VC_GCC) && (VC_GCC <= 0x40405 \|\| (VC_GCC >= 0x40500 && VC_GCC <= 0x40502)) && !(VC_GCC == 0x40502 && defined(__GNUC_UBUNTU_VERSION__) && __GNUC_UBUNTU_VERSION__ == 0xb0408)
	49	// GCC 4.6.0 / 4.5.3 / 4.4.6 switched to the interface as defined by ICC
	50	// (Ubuntu 11.04 ships a GCC 4.5.2 with the new interface)
	51	#define VC_MM256_MASKSTORE_WRONG_MASK_TYPE 1
	52	#endif
	53	#if defined(VC_GCC) && VC_GCC >= 0x40300
	54	#define VC_HAVE_ATTRIBUTE_ERROR 1
	55	#define VC_HAVE_ATTRIBUTE_WARNING 1
	56	#endif
	57
c017a39f	58	#if (defined(__GXX_EXPERIMENTAL_CXX0X__) && VC_GCC >= 0x40600) \|\| __cplusplus >= 201103
	59	# define VC_CXX11 1
	60	# ifdef VC_GCC
	61	# if VC_GCC >= 0x40700 // && VC_GCC < 0x408000)
	62	// ::max_align_t was introduced with GCC 4.7. std::max_align_t took a bit longer.
	63	# define VC_HAVE_MAX_ALIGN_T 1
	64	# endif
	65	# elif !defined(VC_CLANG)
	66	// Clang doesn't provide max_align_t at all
	67	# define VC_HAVE_STD_MAX_ALIGN_T 1
	68	# endif
	69	#endif
	70
	71	// ICC ships the AVX2 intrinsics inside the AVX1 header.
	72	// FIXME: the number 20120731 is too large, but I don't know which one is the right one
	73	#if (defined(VC_ICC) && VC_ICC >= 20120731) \|\| (defined(VC_MSVC) && VC_MSVC >= 170000000)
	74	#define VC_UNCONDITIONAL_AVX2_INTRINSICS 1
	75	#endif
	76
	77	/* Define the following strings to a unique integer, which is the only type the preprocessor can
	78	* compare. This allows to use -DVC_IMPL=SSE3. The preprocessor will then consider VC_IMPL and SSE3
	79	* to be equal. Of course, it is important to undefine the strings later on!
	80	*/
	81	#define Scalar 0x00100000
	82	#define SSE 0x00200000
	83	#define SSE2 0x00300000
	84	#define SSE3 0x00400000
	85	#define SSSE3 0x00500000
	86	#define SSE4_1 0x00600000
	87	#define SSE4_2 0x00700000
	88	#define AVX 0x00800000
	89
	90	#define XOP 0x00000001
	91	#define FMA4 0x00000002
	92	#define F16C 0x00000004
	93	#define POPCNT 0x00000008
	94	#define SSE4a 0x00000010
	95	#define FMA 0x00000020
	96
	97	#define IMPL_MASK 0xFFF00000
	98	#define EXT_MASK 0x000FFFFF
	99
	100	#ifdef VC_MSVC
	101	# ifdef _M_IX86_FP
	102	# if _M_IX86_FP >= 1
	103	# ifndef __SSE__
	104	# define __SSE__ 1
	105	# endif
	106	# endif
	107	# if _M_IX86_FP >= 2
	108	# ifndef __SSE2__
	109	# define __SSE2__ 1
	110	# endif
	111	# endif
	112	# elif defined(_M_AMD64)
	113	// If the target is x86_64 then SSE2 is guaranteed
f22341db	114	# ifndef __SSE__
	115	# define __SSE__ 1
	116	# endif
f22341db	117	# ifndef __SSE2__
	118	# define __SSE2__ 1
	119	# endif
	120	# endif
	121	#endif
	122
	123	#ifndef VC_IMPL
	124
	125	# if defined(__AVX__)
	126	# define VC_IMPL_AVX 1
	127	# else
f22341db	128	# if defined(__SSE4_2__)
	129	# define VC_IMPL_SSE 1
	130	# define VC_IMPL_SSE4_2 1
	131	# endif
	132	# if defined(__SSE4_1__)
	133	# define VC_IMPL_SSE 1
	134	# define VC_IMPL_SSE4_1 1
	135	# endif
	136	# if defined(__SSE3__)
	137	# define VC_IMPL_SSE 1
	138	# define VC_IMPL_SSE3 1
	139	# endif
	140	# if defined(__SSSE3__)
	141	# define VC_IMPL_SSE 1
	142	# define VC_IMPL_SSSE3 1
	143	# endif
	144	# if defined(__SSE2__)
	145	# define VC_IMPL_SSE 1
	146	# define VC_IMPL_SSE2 1
	147	# endif
	148
	149	# if defined(VC_IMPL_SSE)
	150	// nothing
	151	# else
	152	# define VC_IMPL_Scalar 1
	153	# endif
	154	# endif
c017a39f	155	# if defined(VC_IMPL_AVX) \|\| defined(VC_IMPL_SSE)
	156	# ifdef __FMA4__
	157	# define VC_IMPL_FMA4 1
	158	# endif
	159	# ifdef __XOP__
	160	# define VC_IMPL_XOP 1
	161	# endif
	162	# ifdef __F16C__
	163	# define VC_IMPL_F16C 1
	164	# endif
	165	# ifdef __POPCNT__
	166	# define VC_IMPL_POPCNT 1
	167	# endif
	168	# ifdef __SSE4A__
	169	# define VC_IMPL_SSE4a 1
	170	# endif
	171	# ifdef __FMA__
	172	# define VC_IMPL_FMA 1
	173	# endif
	174	# endif
f22341db	175
	176	#else // VC_IMPL
	177
c017a39f	178	# if (VC_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
f22341db	179	# define VC_IMPL_AVX 1
c017a39f	180	# elif (VC_IMPL & IMPL_MASK) == Scalar
f22341db	181	# define VC_IMPL_Scalar 1
c017a39f	182	# elif (VC_IMPL & IMPL_MASK) == SSE4_2
f22341db	183	# define VC_IMPL_SSE4_2 1
	184	# define VC_IMPL_SSE4_1 1
	185	# define VC_IMPL_SSSE3 1
	186	# define VC_IMPL_SSE3 1
	187	# define VC_IMPL_SSE2 1
	188	# define VC_IMPL_SSE 1
c017a39f	189	# elif (VC_IMPL & IMPL_MASK) == SSE4_1
f22341db	190	# define VC_IMPL_SSE4_1 1
	191	# define VC_IMPL_SSSE3 1
	192	# define VC_IMPL_SSE3 1
	193	# define VC_IMPL_SSE2 1
	194	# define VC_IMPL_SSE 1
c017a39f	195	# elif (VC_IMPL & IMPL_MASK) == SSSE3
f22341db	196	# define VC_IMPL_SSSE3 1
	197	# define VC_IMPL_SSE3 1
	198	# define VC_IMPL_SSE2 1
	199	# define VC_IMPL_SSE 1
c017a39f	200	# elif (VC_IMPL & IMPL_MASK) == SSE3
f22341db	201	# define VC_IMPL_SSE3 1
	202	# define VC_IMPL_SSE2 1
	203	# define VC_IMPL_SSE 1
c017a39f	204	# elif (VC_IMPL & IMPL_MASK) == SSE2
f22341db	205	# define VC_IMPL_SSE2 1
f22341db	206	# define VC_IMPL_SSE 1
c017a39f	207	# elif (VC_IMPL & IMPL_MASK) == SSE
f22341db	208	# define VC_IMPL_SSE 1
f22341db	209	# if defined(__SSE4_2__)
	210	# define VC_IMPL_SSE4_2 1
	211	# endif
	212	# if defined(__SSE4_1__)
	213	# define VC_IMPL_SSE4_1 1
	214	# endif
	215	# if defined(__SSE3__)
	216	# define VC_IMPL_SSE3 1
	217	# endif
	218	# if defined(__SSSE3__)
	219	# define VC_IMPL_SSSE3 1
	220	# endif
	221	# if defined(__SSE2__)
	222	# define VC_IMPL_SSE2 1
	223	# endif
c017a39f	224	# elif (VC_IMPL & IMPL_MASK) == 0 && (VC_IMPL & SSE4a)
	225	// this is for backward compatibility only where SSE4a was included in the main
	226	// line of available SIMD instruction sets
	227	# define VC_IMPL_SSE3 1
	228	# define VC_IMPL_SSE2 1
	229	# define VC_IMPL_SSE 1
	230	# endif
	231	# if (VC_IMPL & XOP)
	232	# define VC_IMPL_XOP 1
	233	# endif
	234	# if (VC_IMPL & FMA4)
	235	# define VC_IMPL_FMA4 1
	236	# endif
	237	# if (VC_IMPL & F16C)
	238	# define VC_IMPL_F16C 1
	239	# endif
	240	# if (VC_IMPL & POPCNT)
	241	# define VC_IMPL_POPCNT 1
	242	# endif
	243	# if (VC_IMPL & SSE4a)
	244	# define VC_IMPL_SSE4a 1
	245	# endif
	246	# if (VC_IMPL & FMA)
	247	# define VC_IMPL_FMA 1
f22341db	248	# endif
	249	# undef VC_IMPL
	250
	251	#endif // VC_IMPL
	252
	253	// If AVX is enabled in the compiler it will use VEX coding for the SIMD instructions.
	254	#ifdef __AVX__
	255	# define VC_USE_VEX_CODING 1
	256	#endif
	257
f22341db	258	#if defined(VC_GCC) && VC_GCC < 0x40300 && !defined(VC_IMPL_Scalar)
	259	# ifndef VC_DONT_WARN_OLD_GCC
	260	# warning "GCC < 4.3 does not have full support for SSE2 intrinsics. Using scalar types/operations only. Define VC_DONT_WARN_OLD_GCC to silence this warning."
	261	# endif
	262	# undef VC_IMPL_SSE
	263	# undef VC_IMPL_SSE2
	264	# undef VC_IMPL_SSE3
f22341db	265	# undef VC_IMPL_SSE4_1
	266	# undef VC_IMPL_SSE4_2
	267	# undef VC_IMPL_SSSE3
	268	# undef VC_IMPL_AVX
	269	# undef VC_IMPL_FMA4
	270	# undef VC_IMPL_XOP
c017a39f	271	# undef VC_IMPL_F16C
	272	# undef VC_IMPL_POPCNT
	273	# undef VC_IMPL_SSE4a
	274	# undef VC_IMPL_FMA
f22341db	275	# undef VC_USE_VEX_CODING
	276	# define VC_IMPL_Scalar 1
	277	#endif
	278
	279	# if !defined(VC_IMPL_Scalar) && !defined(VC_IMPL_SSE) && !defined(VC_IMPL_AVX)
	280	# error "No suitable Vc implementation was selected! Probably VC_IMPL was set to an invalid value."
	281	# elif defined(VC_IMPL_SSE) && !defined(VC_IMPL_SSE2)
	282	# error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
	283	# endif
	284
c017a39f	285	#undef Scalar
f22341db	286	#undef SSE
	287	#undef SSE2
	288	#undef SSE3
	289	#undef SSSE3
	290	#undef SSE4_1
	291	#undef SSE4_2
f22341db	292	#undef AVX
f22341db	293
c017a39f	294	#undef XOP
	295	#undef FMA4
	296	#undef F16C
	297	#undef POPCNT
	298	#undef SSE4a
	299	#undef FMA
	300
	301	#undef IMPL_MASK
	302	#undef EXT_MASK
	303
	304	namespace AliRoot {
f22341db	305	namespace Vc {
	306	enum AlignedFlag {
	307	Aligned = 0
	308	};
	309	enum UnalignedFlag {
	310	Unaligned = 1
	311	};
	312	enum StreamingAndAlignedFlag { // implies Aligned
	313	Streaming = 2
	314	};
	315	enum StreamingAndUnalignedFlag {
	316	StreamingAndUnaligned = 3
	317	};
c017a39f	318	#endif // DOXYGEN
f22341db	319
	320	/**
	321	* \ingroup Utilities
	322	*
	323	* Enum that specifies the alignment and padding restrictions to use for memory allocation with
	324	* Vc::malloc.
	325	*/
	326	enum MallocAlignment {
	327	/**
	328	* Align on boundary of vector sizes (e.g. 16 Bytes on SSE platforms) and pad to allow
	329	* vector access to the end. Thus the allocated memory contains a multiple of
	330	* VectorAlignment bytes.
	331	*/
	332	AlignOnVector,
	333	/**
	334	* Align on boundary of cache line sizes (e.g. 64 Bytes on x86) and pad to allow
	335	* full cache line access to the end. Thus the allocated memory contains a multiple of
	336	* 64 bytes.
	337	*/
	338	AlignOnCacheline,
	339	/**
	340	* Align on boundary of page sizes (e.g. 4096 Bytes on x86) and pad to allow
	341	* full page access to the end. Thus the allocated memory contains a multiple of
	342	* 4096 bytes.
	343	*/
	344	AlignOnPage
	345	};
	346
c017a39f	347	#if __cplusplus >= 201103 /C++11/
	348	#define Vc_CONSTEXPR static constexpr
	349	#elif defined(__GNUC__)
	350	#define Vc_CONSTEXPR static inline __attribute__((__always_inline__, __const__))
	351	#elif defined(VC_MSVC)
	352	#define Vc_CONSTEXPR static inline __forceinline
	353	#else
	354	#define Vc_CONSTEXPR static inline
	355	#endif
	356	Vc_CONSTEXPR StreamingAndUnalignedFlag operator\|(UnalignedFlag, StreamingAndAlignedFlag) { return StreamingAndUnaligned; }
	357	Vc_CONSTEXPR StreamingAndUnalignedFlag operator\|(StreamingAndAlignedFlag, UnalignedFlag) { return StreamingAndUnaligned; }
	358	Vc_CONSTEXPR StreamingAndUnalignedFlag operator&(UnalignedFlag, StreamingAndAlignedFlag) { return StreamingAndUnaligned; }
	359	Vc_CONSTEXPR StreamingAndUnalignedFlag operator&(StreamingAndAlignedFlag, UnalignedFlag) { return StreamingAndUnaligned; }
f22341db	360
c017a39f	361	Vc_CONSTEXPR StreamingAndAlignedFlag operator\|(AlignedFlag, StreamingAndAlignedFlag) { return Streaming; }
	362	Vc_CONSTEXPR StreamingAndAlignedFlag operator\|(StreamingAndAlignedFlag, AlignedFlag) { return Streaming; }
	363	Vc_CONSTEXPR StreamingAndAlignedFlag operator&(AlignedFlag, StreamingAndAlignedFlag) { return Streaming; }
	364	Vc_CONSTEXPR StreamingAndAlignedFlag operator&(StreamingAndAlignedFlag, AlignedFlag) { return Streaming; }
f22341db	365
	366	/**
	367	* \ingroup Utilities
	368	*
	369	* Enum to identify a certain SIMD instruction set.
	370	*
	371	* You can use \ref VC_IMPL for the currently active implementation.
c017a39f	372	*
c017a39f	373	* \see ExtraInstructions
f22341db	374	*/
f22341db	375	enum Implementation {
c017a39f	376	/// uses only fundamental types
f22341db	377	ScalarImpl,
	378	/// x86 SSE + SSE2
	379	SSE2Impl,
	380	/// x86 SSE + SSE2 + SSE3
	381	SSE3Impl,
	382	/// x86 SSE + SSE2 + SSE3 + SSSE3
	383	SSSE3Impl,
	384	/// x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
	385	SSE41Impl,
	386	/// x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
	387	SSE42Impl,
f22341db	388	/// x86 AVX
f22341db	389	AVXImpl,
c017a39f	390	/// x86 AVX + AVX2
	391	AVX2Impl,
	392	ImplementationMask = 0xfff
f22341db	393	};
f22341db	394
f22341db	395	/**
	396	* \ingroup Utilities
	397	*
c017a39f	398	* The list of available instructions is not easily described by a linear list of instruction sets.
	399	* On x86 the following instruction sets always include their predecessors:
	400	* SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2
	401	*
	402	* But there are additional instructions that are not necessarily required by this list. These are
	403	* covered in this enum.
f22341db	404	*/
c017a39f	405	enum ExtraInstructions {
	406	//! Support for float16 conversions in hardware
	407	Float16cInstructions = 0x01000,
	408	//! Support for FMA4 instructions
	409	Fma4Instructions = 0x02000,
	410	//! Support for XOP instructions
	411	XopInstructions = 0x04000,
	412	//! Support for the population count instruction
	413	PopcntInstructions = 0x08000,
	414	//! Support for SSE4a instructions
	415	Sse4aInstructions = 0x10000,
	416	//! Support for FMA instructions (3 operand variant)
	417	FmaInstructions = 0x20000,
	418	// PclmulqdqInstructions,
	419	// AesInstructions,
	420	// RdrandInstructions
	421	ExtraInstructionsMask = 0xfffff000u
	422	};
	423
	424	#ifndef DOXYGEN
	425
	426	#ifdef VC_IMPL_Scalar
	427	#define VC_IMPL ::AliRoot::Vc::ScalarImpl
	428	#elif defined(VC_IMPL_AVX)
	429	#define VC_IMPL ::AliRoot::Vc::AVXImpl
	430	#elif defined(VC_IMPL_SSE4_2)
	431	#define VC_IMPL ::AliRoot::Vc::SSE42Impl
	432	#elif defined(VC_IMPL_SSE4_1)
	433	#define VC_IMPL ::AliRoot::Vc::SSE41Impl
	434	#elif defined(VC_IMPL_SSSE3)
	435	#define VC_IMPL ::AliRoot::Vc::SSSE3Impl
	436	#elif defined(VC_IMPL_SSE3)
	437	#define VC_IMPL ::AliRoot::Vc::SSE3Impl
	438	#elif defined(VC_IMPL_SSE2)
	439	#define VC_IMPL ::AliRoot::Vc::SSE2Impl
	440	#endif
	441
	442	template<unsigned int Features> struct ImplementationT { enum _Value {
	443	Value = Features,
	444	Implementation = Features & Vc::ImplementationMask,
	445	ExtraInstructions = Features & Vc::ExtraInstructionsMask
	446	}; };
	447
	448	typedef ImplementationT<
	449	#ifdef VC_USE_VEX_CODING
	450	// everything will use VEX coding, so the system has to support AVX even if VC_IMPL_AVX is not set
	451	// but AFAIU the OSXSAVE and xgetbv tests do not have to positive (unless, of course, the
	452	// compiler decides to insert an instruction that uses the full register size - so better be on
	453	// the safe side)
	454	AVXImpl
	455	#else
	456	VC_IMPL
	457	#endif
	458	#ifdef VC_IMPL_SSE4a
	459	+ Vc::Sse4aInstructions
	460	#ifdef VC_IMPL_XOP
	461	+ Vc::XopInstructions
	462	#ifdef VC_IMPL_FMA4
	463	+ Vc::Fma4Instructions
	464	#endif
	465	#endif
	466	#endif
	467	#ifdef VC_IMPL_POPCNT
	468	+ Vc::PopcntInstructions
469	#endif
470	#ifdef VC_IMPL_FMA
471	+ Vc::FmaInstructions
f22341db	472	#endif
c017a39f	473	> CurrentImplementation;
f22341db	474
	475	namespace Internal {
	476	template<Implementation Impl> struct HelperImpl;
	477	typedef HelperImpl<VC_IMPL> Helper;
	478
	479	template<typename A> struct FlagObject;
c017a39f	480	template<> struct FlagObject<AlignedFlag> { Vc_CONSTEXPR AlignedFlag the() { return Aligned; } };
	481	template<> struct FlagObject<UnalignedFlag> { Vc_CONSTEXPR UnalignedFlag the() { return Unaligned; } };
	482	template<> struct FlagObject<StreamingAndAlignedFlag> { Vc_CONSTEXPR StreamingAndAlignedFlag the() { return Streaming; } };
	483	template<> struct FlagObject<StreamingAndUnalignedFlag> { Vc_CONSTEXPR StreamingAndUnalignedFlag the() { return StreamingAndUnaligned; } };
f22341db	484	} // namespace Internal
	485
	486	namespace Warnings
	487	{
	488	void _operator_bracket_warning()
c017a39f	489	#ifdef VC_HAVE_ATTRIBUTE_WARNING
f22341db	490	__attribute__((warning("\n\tUse of Vc::Vector::operator[] to modify scalar entries is known to miscompile with GCC 4.3.x.\n\tPlease upgrade to a more recent GCC or avoid operator[] altogether.\n\t(This warning adds an unnecessary function call to operator[] which should work around the problem at a little extra cost.)")))
	491	#endif
	492	;
	493	} // namespace Warnings
	494
	495	namespace Error
	496	{
	497	template<typename L, typename R> struct invalid_operands_of_types {};
	498	} // namespace Error
	499
c017a39f	500	#endif // DOXYGEN
f22341db	501	} // namespace Vc
c017a39f	502	} // namespace AliRoot
f22341db	503
c017a39f	504	#undef Vc_CONSTEXPR
f22341db	505	#include "version.h"
	506
	507	#endif // VC_GLOBAL_H