1 /* This file is part of the Vc library.
3 Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
26 #ifdef __INTEL_COMPILER
27 #define VC_ICC __INTEL_COMPILER_BUILD_DATE
28 #elif defined(__OPENCC__)
30 #elif defined(__clang__)
31 #define VC_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
32 #elif defined(__GNUC__)
33 #define VC_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
34 #elif defined(_MSC_VER)
35 #define VC_MSVC _MSC_FULL_VER
37 #define VC_UNSUPPORTED_COMPILER 1
40 // Features/Quirks defines
41 #if defined VC_MSVC && defined _WIN32
42 // the Win32 ABI can't handle function parameters with alignment >= 16
43 #define VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN 1
45 #if defined(__GNUC__) && !defined(VC_NO_INLINE_ASM)
48 #if defined(VC_GCC) && (VC_GCC <= 0x40405 || (VC_GCC >= 0x40500 && VC_GCC <= 0x40502)) && !(VC_GCC == 0x40502 && defined(__GNUC_UBUNTU_VERSION__) && __GNUC_UBUNTU_VERSION__ == 0xb0408)
49 // GCC 4.6.0 / 4.5.3 / 4.4.6 switched to the interface as defined by ICC
50 // (Ubuntu 11.04 ships a GCC 4.5.2 with the new interface)
51 #define VC_MM256_MASKSTORE_WRONG_MASK_TYPE 1
53 #if defined(VC_GCC) && VC_GCC >= 0x40300
54 #define VC_HAVE_ATTRIBUTE_ERROR 1
55 #define VC_HAVE_ATTRIBUTE_WARNING 1
58 #if (defined(__GXX_EXPERIMENTAL_CXX0X__) && VC_GCC >= 0x40600) || __cplusplus >= 201103
61 # if VC_GCC >= 0x40700 // && VC_GCC < 0x408000)
62 // ::max_align_t was introduced with GCC 4.7. std::max_align_t took a bit longer.
63 # define VC_HAVE_MAX_ALIGN_T 1
65 # elif !defined(VC_CLANG)
66 // Clang doesn't provide max_align_t at all
67 # define VC_HAVE_STD_MAX_ALIGN_T 1
71 // ICC ships the AVX2 intrinsics inside the AVX1 header.
72 // FIXME: the number 20120731 is too large, but I don't know which one is the right one
73 #if (defined(VC_ICC) && VC_ICC >= 20120731) || (defined(VC_MSVC) && VC_MSVC >= 170000000)
74 #define VC_UNCONDITIONAL_AVX2_INTRINSICS 1
77 /* Define the following strings to a unique integer, which is the only type the preprocessor can
78 * compare. This allows to use -DVC_IMPL=SSE3. The preprocessor will then consider VC_IMPL and SSE3
79 * to be equal. Of course, it is important to undefine the strings later on!
81 #define Scalar 0x00100000
82 #define SSE 0x00200000
83 #define SSE2 0x00300000
84 #define SSE3 0x00400000
85 #define SSSE3 0x00500000
86 #define SSE4_1 0x00600000
87 #define SSE4_2 0x00700000
88 #define AVX 0x00800000
90 #define XOP 0x00000001
91 #define FMA4 0x00000002
92 #define F16C 0x00000004
93 #define POPCNT 0x00000008
94 #define SSE4a 0x00000010
95 #define FMA 0x00000020
97 #define IMPL_MASK 0xFFF00000
98 #define EXT_MASK 0x000FFFFF
112 # elif defined(_M_AMD64)
113 // If the target is x86_64 then SSE2 is guaranteed
125 # if defined(__AVX__)
126 # define VC_IMPL_AVX 1
128 # if defined(__SSE4_2__)
129 # define VC_IMPL_SSE 1
130 # define VC_IMPL_SSE4_2 1
132 # if defined(__SSE4_1__)
133 # define VC_IMPL_SSE 1
134 # define VC_IMPL_SSE4_1 1
136 # if defined(__SSE3__)
137 # define VC_IMPL_SSE 1
138 # define VC_IMPL_SSE3 1
140 # if defined(__SSSE3__)
141 # define VC_IMPL_SSE 1
142 # define VC_IMPL_SSSE3 1
144 # if defined(__SSE2__)
145 # define VC_IMPL_SSE 1
146 # define VC_IMPL_SSE2 1
149 # if defined(VC_IMPL_SSE)
152 # define VC_IMPL_Scalar 1
155 # if defined(VC_IMPL_AVX) || defined(VC_IMPL_SSE)
157 # define VC_IMPL_FMA4 1
160 # define VC_IMPL_XOP 1
163 # define VC_IMPL_F16C 1
166 # define VC_IMPL_POPCNT 1
169 # define VC_IMPL_SSE4a 1
172 # define VC_IMPL_FMA 1
178 # if (VC_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
179 # define VC_IMPL_AVX 1
180 # elif (VC_IMPL & IMPL_MASK) == Scalar
181 # define VC_IMPL_Scalar 1
182 # elif (VC_IMPL & IMPL_MASK) == SSE4_2
183 # define VC_IMPL_SSE4_2 1
184 # define VC_IMPL_SSE4_1 1
185 # define VC_IMPL_SSSE3 1
186 # define VC_IMPL_SSE3 1
187 # define VC_IMPL_SSE2 1
188 # define VC_IMPL_SSE 1
189 # elif (VC_IMPL & IMPL_MASK) == SSE4_1
190 # define VC_IMPL_SSE4_1 1
191 # define VC_IMPL_SSSE3 1
192 # define VC_IMPL_SSE3 1
193 # define VC_IMPL_SSE2 1
194 # define VC_IMPL_SSE 1
195 # elif (VC_IMPL & IMPL_MASK) == SSSE3
196 # define VC_IMPL_SSSE3 1
197 # define VC_IMPL_SSE3 1
198 # define VC_IMPL_SSE2 1
199 # define VC_IMPL_SSE 1
200 # elif (VC_IMPL & IMPL_MASK) == SSE3
201 # define VC_IMPL_SSE3 1
202 # define VC_IMPL_SSE2 1
203 # define VC_IMPL_SSE 1
204 # elif (VC_IMPL & IMPL_MASK) == SSE2
205 # define VC_IMPL_SSE2 1
206 # define VC_IMPL_SSE 1
207 # elif (VC_IMPL & IMPL_MASK) == SSE
208 # define VC_IMPL_SSE 1
209 # if defined(__SSE4_2__)
210 # define VC_IMPL_SSE4_2 1
212 # if defined(__SSE4_1__)
213 # define VC_IMPL_SSE4_1 1
215 # if defined(__SSE3__)
216 # define VC_IMPL_SSE3 1
218 # if defined(__SSSE3__)
219 # define VC_IMPL_SSSE3 1
221 # if defined(__SSE2__)
222 # define VC_IMPL_SSE2 1
224 # elif (VC_IMPL & IMPL_MASK) == 0 && (VC_IMPL & SSE4a)
225 // this is for backward compatibility only where SSE4a was included in the main
226 // line of available SIMD instruction sets
227 # define VC_IMPL_SSE3 1
228 # define VC_IMPL_SSE2 1
229 # define VC_IMPL_SSE 1
232 # define VC_IMPL_XOP 1
234 # if (VC_IMPL & FMA4)
235 # define VC_IMPL_FMA4 1
237 # if (VC_IMPL & F16C)
238 # define VC_IMPL_F16C 1
240 # if (VC_IMPL & POPCNT)
241 # define VC_IMPL_POPCNT 1
243 # if (VC_IMPL & SSE4a)
244 # define VC_IMPL_SSE4a 1
247 # define VC_IMPL_FMA 1
253 // If AVX is enabled in the compiler it will use VEX coding for the SIMD instructions.
255 # define VC_USE_VEX_CODING 1
258 #if defined(VC_GCC) && VC_GCC < 0x40300 && !defined(VC_IMPL_Scalar)
259 # ifndef VC_DONT_WARN_OLD_GCC
260 # warning "GCC < 4.3 does not have full support for SSE2 intrinsics. Using scalar types/operations only. Define VC_DONT_WARN_OLD_GCC to silence this warning."
265 # undef VC_IMPL_SSE4_1
266 # undef VC_IMPL_SSE4_2
267 # undef VC_IMPL_SSSE3
272 # undef VC_IMPL_POPCNT
273 # undef VC_IMPL_SSE4a
275 # undef VC_USE_VEX_CODING
276 # define VC_IMPL_Scalar 1
279 # if !defined(VC_IMPL_Scalar) && !defined(VC_IMPL_SSE) && !defined(VC_IMPL_AVX)
280 # error "No suitable Vc implementation was selected! Probably VC_IMPL was set to an invalid value."
281 # elif defined(VC_IMPL_SSE) && !defined(VC_IMPL_SSE2)
282 # error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
312 enum StreamingAndAlignedFlag { // implies Aligned
315 enum StreamingAndUnalignedFlag {
316 StreamingAndUnaligned = 3
323 * Enum that specifies the alignment and padding restrictions to use for memory allocation with
326 enum MallocAlignment {
328 * Align on boundary of vector sizes (e.g. 16 Bytes on SSE platforms) and pad to allow
329 * vector access to the end. Thus the allocated memory contains a multiple of
330 * VectorAlignment bytes.
334 * Align on boundary of cache line sizes (e.g. 64 Bytes on x86) and pad to allow
335 * full cache line access to the end. Thus the allocated memory contains a multiple of
340 * Align on boundary of page sizes (e.g. 4096 Bytes on x86) and pad to allow
341 * full page access to the end. Thus the allocated memory contains a multiple of
347 #if __cplusplus >= 201103 /*C++11*/
348 #define Vc_CONSTEXPR static constexpr
349 #elif defined(__GNUC__)
350 #define Vc_CONSTEXPR static inline __attribute__((__always_inline__, __const__))
351 #elif defined(VC_MSVC)
352 #define Vc_CONSTEXPR static inline __forceinline
354 #define Vc_CONSTEXPR static inline
356 Vc_CONSTEXPR StreamingAndUnalignedFlag operator|(UnalignedFlag, StreamingAndAlignedFlag) { return StreamingAndUnaligned; }
357 Vc_CONSTEXPR StreamingAndUnalignedFlag operator|(StreamingAndAlignedFlag, UnalignedFlag) { return StreamingAndUnaligned; }
358 Vc_CONSTEXPR StreamingAndUnalignedFlag operator&(UnalignedFlag, StreamingAndAlignedFlag) { return StreamingAndUnaligned; }
359 Vc_CONSTEXPR StreamingAndUnalignedFlag operator&(StreamingAndAlignedFlag, UnalignedFlag) { return StreamingAndUnaligned; }
361 Vc_CONSTEXPR StreamingAndAlignedFlag operator|(AlignedFlag, StreamingAndAlignedFlag) { return Streaming; }
362 Vc_CONSTEXPR StreamingAndAlignedFlag operator|(StreamingAndAlignedFlag, AlignedFlag) { return Streaming; }
363 Vc_CONSTEXPR StreamingAndAlignedFlag operator&(AlignedFlag, StreamingAndAlignedFlag) { return Streaming; }
364 Vc_CONSTEXPR StreamingAndAlignedFlag operator&(StreamingAndAlignedFlag, AlignedFlag) { return Streaming; }
369 * Enum to identify a certain SIMD instruction set.
371 * You can use \ref VC_IMPL for the currently active implementation.
373 * \see ExtraInstructions
375 enum Implementation {
376 /// uses only fundamental types
380 /// x86 SSE + SSE2 + SSE3
382 /// x86 SSE + SSE2 + SSE3 + SSSE3
384 /// x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
386 /// x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
392 ImplementationMask = 0xfff
398 * The list of available instructions is not easily described by a linear list of instruction sets.
399 * On x86 the following instruction sets always include their predecessors:
400 * SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2
402 * But there are additional instructions that are not necessarily required by this list. These are
403 * covered in this enum.
405 enum ExtraInstructions {
406 //! Support for float16 conversions in hardware
407 Float16cInstructions = 0x01000,
408 //! Support for FMA4 instructions
409 Fma4Instructions = 0x02000,
410 //! Support for XOP instructions
411 XopInstructions = 0x04000,
412 //! Support for the population count instruction
413 PopcntInstructions = 0x08000,
414 //! Support for SSE4a instructions
415 Sse4aInstructions = 0x10000,
416 //! Support for FMA instructions (3 operand variant)
417 FmaInstructions = 0x20000,
418 // PclmulqdqInstructions,
420 // RdrandInstructions
421 ExtraInstructionsMask = 0xfffff000u
426 #ifdef VC_IMPL_Scalar
427 #define VC_IMPL ::AliRoot::Vc::ScalarImpl
428 #elif defined(VC_IMPL_AVX)
429 #define VC_IMPL ::AliRoot::Vc::AVXImpl
430 #elif defined(VC_IMPL_SSE4_2)
431 #define VC_IMPL ::AliRoot::Vc::SSE42Impl
432 #elif defined(VC_IMPL_SSE4_1)
433 #define VC_IMPL ::AliRoot::Vc::SSE41Impl
434 #elif defined(VC_IMPL_SSSE3)
435 #define VC_IMPL ::AliRoot::Vc::SSSE3Impl
436 #elif defined(VC_IMPL_SSE3)
437 #define VC_IMPL ::AliRoot::Vc::SSE3Impl
438 #elif defined(VC_IMPL_SSE2)
439 #define VC_IMPL ::AliRoot::Vc::SSE2Impl
442 template<unsigned int Features> struct ImplementationT { enum _Value {
444 Implementation = Features & Vc::ImplementationMask,
445 ExtraInstructions = Features & Vc::ExtraInstructionsMask
448 typedef ImplementationT<
449 #ifdef VC_USE_VEX_CODING
450 // everything will use VEX coding, so the system has to support AVX even if VC_IMPL_AVX is not set
451 // but AFAIU the OSXSAVE and xgetbv tests do not have to positive (unless, of course, the
452 // compiler decides to insert an instruction that uses the full register size - so better be on
459 + Vc::Sse4aInstructions
461 + Vc::XopInstructions
463 + Vc::Fma4Instructions
467 #ifdef VC_IMPL_POPCNT
468 + Vc::PopcntInstructions
471 + Vc::FmaInstructions
473 > CurrentImplementation;
476 template<Implementation Impl> struct HelperImpl;
477 typedef HelperImpl<VC_IMPL> Helper;
479 template<typename A> struct FlagObject;
480 template<> struct FlagObject<AlignedFlag> { Vc_CONSTEXPR AlignedFlag the() { return Aligned; } };
481 template<> struct FlagObject<UnalignedFlag> { Vc_CONSTEXPR UnalignedFlag the() { return Unaligned; } };
482 template<> struct FlagObject<StreamingAndAlignedFlag> { Vc_CONSTEXPR StreamingAndAlignedFlag the() { return Streaming; } };
483 template<> struct FlagObject<StreamingAndUnalignedFlag> { Vc_CONSTEXPR StreamingAndUnalignedFlag the() { return StreamingAndUnaligned; } };
484 } // namespace Internal
488 void _operator_bracket_warning()
489 #ifdef VC_HAVE_ATTRIBUTE_WARNING
490 __attribute__((warning("\n\tUse of Vc::Vector::operator[] to modify scalar entries is known to miscompile with GCC 4.3.x.\n\tPlease upgrade to a more recent GCC or avoid operator[] altogether.\n\t(This warning adds an unnecessary function call to operator[] which should work around the problem at a little extra cost.)")))
493 } // namespace Warnings
497 template<typename L, typename R> struct invalid_operands_of_types {};
502 } // namespace AliRoot
507 #endif // VC_GLOBAL_H