]> git.uio.no Git - u/mrichter/AliRoot.git/blame - Vc/include/Vc/global.h
update to Vc 0.7.3-dev
[u/mrichter/AliRoot.git] / Vc / include / Vc / global.h
CommitLineData
f22341db 1/* This file is part of the Vc library.
2
3 Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
4
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
9
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17
18*/
19
20#ifndef VC_GLOBAL_H
21#define VC_GLOBAL_H
22
c017a39f 23#ifndef DOXYGEN
24
f22341db 25// Compiler defines
26#ifdef __INTEL_COMPILER
27#define VC_ICC __INTEL_COMPILER_BUILD_DATE
28#elif defined(__OPENCC__)
29#define VC_OPEN64 1
30#elif defined(__clang__)
31#define VC_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
32#elif defined(__GNUC__)
33#define VC_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
34#elif defined(_MSC_VER)
35#define VC_MSVC _MSC_FULL_VER
36#else
37#define VC_UNSUPPORTED_COMPILER 1
38#endif
39
40// Features/Quirks defines
41#if defined VC_MSVC && defined _WIN32
42// the Win32 ABI can't handle function parameters with alignment >= 16
43#define VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN 1
44#endif
45#if defined(__GNUC__) && !defined(VC_NO_INLINE_ASM)
46#define VC_GNU_ASM 1
47#endif
48#if defined(VC_GCC) && (VC_GCC <= 0x40405 || (VC_GCC >= 0x40500 && VC_GCC <= 0x40502)) && !(VC_GCC == 0x40502 && defined(__GNUC_UBUNTU_VERSION__) && __GNUC_UBUNTU_VERSION__ == 0xb0408)
49// GCC 4.6.0 / 4.5.3 / 4.4.6 switched to the interface as defined by ICC
50// (Ubuntu 11.04 ships a GCC 4.5.2 with the new interface)
51#define VC_MM256_MASKSTORE_WRONG_MASK_TYPE 1
52#endif
53#if defined(VC_GCC) && VC_GCC >= 0x40300
54#define VC_HAVE_ATTRIBUTE_ERROR 1
55#define VC_HAVE_ATTRIBUTE_WARNING 1
56#endif
57
c017a39f 58#if (defined(__GXX_EXPERIMENTAL_CXX0X__) && VC_GCC >= 0x40600) || __cplusplus >= 201103
59# define VC_CXX11 1
60# ifdef VC_GCC
61# if VC_GCC >= 0x40700 // && VC_GCC < 0x408000)
62// ::max_align_t was introduced with GCC 4.7. std::max_align_t took a bit longer.
63# define VC_HAVE_MAX_ALIGN_T 1
64# endif
65# elif !defined(VC_CLANG)
66// Clang doesn't provide max_align_t at all
67# define VC_HAVE_STD_MAX_ALIGN_T 1
68# endif
69#endif
70
71// ICC ships the AVX2 intrinsics inside the AVX1 header.
72// FIXME: the number 20120731 is too large, but I don't know which one is the right one
73#if (defined(VC_ICC) && VC_ICC >= 20120731) || (defined(VC_MSVC) && VC_MSVC >= 170000000)
74#define VC_UNCONDITIONAL_AVX2_INTRINSICS 1
75#endif
76
77/* Define the following strings to a unique integer, which is the only type the preprocessor can
78 * compare. This allows to use -DVC_IMPL=SSE3. The preprocessor will then consider VC_IMPL and SSE3
79 * to be equal. Of course, it is important to undefine the strings later on!
80 */
81#define Scalar 0x00100000
82#define SSE 0x00200000
83#define SSE2 0x00300000
84#define SSE3 0x00400000
85#define SSSE3 0x00500000
86#define SSE4_1 0x00600000
87#define SSE4_2 0x00700000
88#define AVX 0x00800000
89
90#define XOP 0x00000001
91#define FMA4 0x00000002
92#define F16C 0x00000004
93#define POPCNT 0x00000008
94#define SSE4a 0x00000010
95#define FMA 0x00000020
96
97#define IMPL_MASK 0xFFF00000
98#define EXT_MASK 0x000FFFFF
99
100#ifdef VC_MSVC
101# ifdef _M_IX86_FP
102# if _M_IX86_FP >= 1
103# ifndef __SSE__
104# define __SSE__ 1
105# endif
106# endif
107# if _M_IX86_FP >= 2
108# ifndef __SSE2__
109# define __SSE2__ 1
110# endif
111# endif
112# elif defined(_M_AMD64)
113// If the target is x86_64 then SSE2 is guaranteed
f22341db 114# ifndef __SSE__
115# define __SSE__ 1
116# endif
f22341db 117# ifndef __SSE2__
118# define __SSE2__ 1
119# endif
120# endif
121#endif
122
123#ifndef VC_IMPL
124
125# if defined(__AVX__)
126# define VC_IMPL_AVX 1
127# else
f22341db 128# if defined(__SSE4_2__)
129# define VC_IMPL_SSE 1
130# define VC_IMPL_SSE4_2 1
131# endif
132# if defined(__SSE4_1__)
133# define VC_IMPL_SSE 1
134# define VC_IMPL_SSE4_1 1
135# endif
136# if defined(__SSE3__)
137# define VC_IMPL_SSE 1
138# define VC_IMPL_SSE3 1
139# endif
140# if defined(__SSSE3__)
141# define VC_IMPL_SSE 1
142# define VC_IMPL_SSSE3 1
143# endif
144# if defined(__SSE2__)
145# define VC_IMPL_SSE 1
146# define VC_IMPL_SSE2 1
147# endif
148
149# if defined(VC_IMPL_SSE)
150 // nothing
151# else
152# define VC_IMPL_Scalar 1
153# endif
154# endif
c017a39f 155# if defined(VC_IMPL_AVX) || defined(VC_IMPL_SSE)
156# ifdef __FMA4__
157# define VC_IMPL_FMA4 1
158# endif
159# ifdef __XOP__
160# define VC_IMPL_XOP 1
161# endif
162# ifdef __F16C__
163# define VC_IMPL_F16C 1
164# endif
165# ifdef __POPCNT__
166# define VC_IMPL_POPCNT 1
167# endif
168# ifdef __SSE4A__
169# define VC_IMPL_SSE4a 1
170# endif
171# ifdef __FMA__
172# define VC_IMPL_FMA 1
173# endif
174# endif
f22341db 175
176#else // VC_IMPL
177
c017a39f 178# if (VC_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
f22341db 179# define VC_IMPL_AVX 1
c017a39f 180# elif (VC_IMPL & IMPL_MASK) == Scalar
f22341db 181# define VC_IMPL_Scalar 1
c017a39f 182# elif (VC_IMPL & IMPL_MASK) == SSE4_2
f22341db 183# define VC_IMPL_SSE4_2 1
184# define VC_IMPL_SSE4_1 1
185# define VC_IMPL_SSSE3 1
186# define VC_IMPL_SSE3 1
187# define VC_IMPL_SSE2 1
188# define VC_IMPL_SSE 1
c017a39f 189# elif (VC_IMPL & IMPL_MASK) == SSE4_1
f22341db 190# define VC_IMPL_SSE4_1 1
191# define VC_IMPL_SSSE3 1
192# define VC_IMPL_SSE3 1
193# define VC_IMPL_SSE2 1
194# define VC_IMPL_SSE 1
c017a39f 195# elif (VC_IMPL & IMPL_MASK) == SSSE3
f22341db 196# define VC_IMPL_SSSE3 1
197# define VC_IMPL_SSE3 1
198# define VC_IMPL_SSE2 1
199# define VC_IMPL_SSE 1
c017a39f 200# elif (VC_IMPL & IMPL_MASK) == SSE3
f22341db 201# define VC_IMPL_SSE3 1
202# define VC_IMPL_SSE2 1
203# define VC_IMPL_SSE 1
c017a39f 204# elif (VC_IMPL & IMPL_MASK) == SSE2
f22341db 205# define VC_IMPL_SSE2 1
206# define VC_IMPL_SSE 1
c017a39f 207# elif (VC_IMPL & IMPL_MASK) == SSE
f22341db 208# define VC_IMPL_SSE 1
f22341db 209# if defined(__SSE4_2__)
210# define VC_IMPL_SSE4_2 1
211# endif
212# if defined(__SSE4_1__)
213# define VC_IMPL_SSE4_1 1
214# endif
215# if defined(__SSE3__)
216# define VC_IMPL_SSE3 1
217# endif
218# if defined(__SSSE3__)
219# define VC_IMPL_SSSE3 1
220# endif
221# if defined(__SSE2__)
222# define VC_IMPL_SSE2 1
223# endif
c017a39f 224# elif (VC_IMPL & IMPL_MASK) == 0 && (VC_IMPL & SSE4a)
225 // this is for backward compatibility only where SSE4a was included in the main
226 // line of available SIMD instruction sets
227# define VC_IMPL_SSE3 1
228# define VC_IMPL_SSE2 1
229# define VC_IMPL_SSE 1
230# endif
231# if (VC_IMPL & XOP)
232# define VC_IMPL_XOP 1
233# endif
234# if (VC_IMPL & FMA4)
235# define VC_IMPL_FMA4 1
236# endif
237# if (VC_IMPL & F16C)
238# define VC_IMPL_F16C 1
239# endif
240# if (VC_IMPL & POPCNT)
241# define VC_IMPL_POPCNT 1
242# endif
243# if (VC_IMPL & SSE4a)
244# define VC_IMPL_SSE4a 1
245# endif
246# if (VC_IMPL & FMA)
247# define VC_IMPL_FMA 1
f22341db 248# endif
249# undef VC_IMPL
250
251#endif // VC_IMPL
252
253// If AVX is enabled in the compiler it will use VEX coding for the SIMD instructions.
254#ifdef __AVX__
255# define VC_USE_VEX_CODING 1
256#endif
257
f22341db 258#if defined(VC_GCC) && VC_GCC < 0x40300 && !defined(VC_IMPL_Scalar)
259# ifndef VC_DONT_WARN_OLD_GCC
260# warning "GCC < 4.3 does not have full support for SSE2 intrinsics. Using scalar types/operations only. Define VC_DONT_WARN_OLD_GCC to silence this warning."
261# endif
262# undef VC_IMPL_SSE
263# undef VC_IMPL_SSE2
264# undef VC_IMPL_SSE3
f22341db 265# undef VC_IMPL_SSE4_1
266# undef VC_IMPL_SSE4_2
267# undef VC_IMPL_SSSE3
268# undef VC_IMPL_AVX
269# undef VC_IMPL_FMA4
270# undef VC_IMPL_XOP
c017a39f 271# undef VC_IMPL_F16C
272# undef VC_IMPL_POPCNT
273# undef VC_IMPL_SSE4a
274# undef VC_IMPL_FMA
f22341db 275# undef VC_USE_VEX_CODING
276# define VC_IMPL_Scalar 1
277#endif
278
279# if !defined(VC_IMPL_Scalar) && !defined(VC_IMPL_SSE) && !defined(VC_IMPL_AVX)
280# error "No suitable Vc implementation was selected! Probably VC_IMPL was set to an invalid value."
281# elif defined(VC_IMPL_SSE) && !defined(VC_IMPL_SSE2)
282# error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
283# endif
284
c017a39f 285#undef Scalar
f22341db 286#undef SSE
287#undef SSE2
288#undef SSE3
289#undef SSSE3
290#undef SSE4_1
291#undef SSE4_2
f22341db 292#undef AVX
f22341db 293
c017a39f 294#undef XOP
295#undef FMA4
296#undef F16C
297#undef POPCNT
298#undef SSE4a
299#undef FMA
300
301#undef IMPL_MASK
302#undef EXT_MASK
303
304namespace AliRoot {
f22341db 305namespace Vc {
306enum AlignedFlag {
307 Aligned = 0
308};
309enum UnalignedFlag {
310 Unaligned = 1
311};
312enum StreamingAndAlignedFlag { // implies Aligned
313 Streaming = 2
314};
315enum StreamingAndUnalignedFlag {
316 StreamingAndUnaligned = 3
317};
c017a39f 318#endif // DOXYGEN
f22341db 319
320/**
321 * \ingroup Utilities
322 *
323 * Enum that specifies the alignment and padding restrictions to use for memory allocation with
324 * Vc::malloc.
325 */
326enum MallocAlignment {
327 /**
328 * Align on boundary of vector sizes (e.g. 16 Bytes on SSE platforms) and pad to allow
329 * vector access to the end. Thus the allocated memory contains a multiple of
330 * VectorAlignment bytes.
331 */
332 AlignOnVector,
333 /**
334 * Align on boundary of cache line sizes (e.g. 64 Bytes on x86) and pad to allow
335 * full cache line access to the end. Thus the allocated memory contains a multiple of
336 * 64 bytes.
337 */
338 AlignOnCacheline,
339 /**
340 * Align on boundary of page sizes (e.g. 4096 Bytes on x86) and pad to allow
341 * full page access to the end. Thus the allocated memory contains a multiple of
342 * 4096 bytes.
343 */
344 AlignOnPage
345};
346
c017a39f 347#if __cplusplus >= 201103 /*C++11*/
348#define Vc_CONSTEXPR static constexpr
349#elif defined(__GNUC__)
350#define Vc_CONSTEXPR static inline __attribute__((__always_inline__, __const__))
351#elif defined(VC_MSVC)
352#define Vc_CONSTEXPR static inline __forceinline
353#else
354#define Vc_CONSTEXPR static inline
355#endif
356Vc_CONSTEXPR StreamingAndUnalignedFlag operator|(UnalignedFlag, StreamingAndAlignedFlag) { return StreamingAndUnaligned; }
357Vc_CONSTEXPR StreamingAndUnalignedFlag operator|(StreamingAndAlignedFlag, UnalignedFlag) { return StreamingAndUnaligned; }
358Vc_CONSTEXPR StreamingAndUnalignedFlag operator&(UnalignedFlag, StreamingAndAlignedFlag) { return StreamingAndUnaligned; }
359Vc_CONSTEXPR StreamingAndUnalignedFlag operator&(StreamingAndAlignedFlag, UnalignedFlag) { return StreamingAndUnaligned; }
f22341db 360
c017a39f 361Vc_CONSTEXPR StreamingAndAlignedFlag operator|(AlignedFlag, StreamingAndAlignedFlag) { return Streaming; }
362Vc_CONSTEXPR StreamingAndAlignedFlag operator|(StreamingAndAlignedFlag, AlignedFlag) { return Streaming; }
363Vc_CONSTEXPR StreamingAndAlignedFlag operator&(AlignedFlag, StreamingAndAlignedFlag) { return Streaming; }
364Vc_CONSTEXPR StreamingAndAlignedFlag operator&(StreamingAndAlignedFlag, AlignedFlag) { return Streaming; }
f22341db 365
366/**
367 * \ingroup Utilities
368 *
369 * Enum to identify a certain SIMD instruction set.
370 *
371 * You can use \ref VC_IMPL for the currently active implementation.
c017a39f 372 *
373 * \see ExtraInstructions
f22341db 374 */
375enum Implementation {
c017a39f 376 /// uses only fundamental types
f22341db 377 ScalarImpl,
378 /// x86 SSE + SSE2
379 SSE2Impl,
380 /// x86 SSE + SSE2 + SSE3
381 SSE3Impl,
382 /// x86 SSE + SSE2 + SSE3 + SSSE3
383 SSSE3Impl,
384 /// x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
385 SSE41Impl,
386 /// x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
387 SSE42Impl,
f22341db 388 /// x86 AVX
389 AVXImpl,
c017a39f 390 /// x86 AVX + AVX2
391 AVX2Impl,
392 ImplementationMask = 0xfff
f22341db 393};
394
f22341db 395/**
396 * \ingroup Utilities
397 *
c017a39f 398 * The list of available instructions is not easily described by a linear list of instruction sets.
399 * On x86 the following instruction sets always include their predecessors:
400 * SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2
401 *
402 * But there are additional instructions that are not necessarily required by this list. These are
403 * covered in this enum.
f22341db 404 */
c017a39f 405enum ExtraInstructions {
406 //! Support for float16 conversions in hardware
407 Float16cInstructions = 0x01000,
408 //! Support for FMA4 instructions
409 Fma4Instructions = 0x02000,
410 //! Support for XOP instructions
411 XopInstructions = 0x04000,
412 //! Support for the population count instruction
413 PopcntInstructions = 0x08000,
414 //! Support for SSE4a instructions
415 Sse4aInstructions = 0x10000,
416 //! Support for FMA instructions (3 operand variant)
417 FmaInstructions = 0x20000,
418 // PclmulqdqInstructions,
419 // AesInstructions,
420 // RdrandInstructions
421 ExtraInstructionsMask = 0xfffff000u
422};
423
424#ifndef DOXYGEN
425
426#ifdef VC_IMPL_Scalar
427#define VC_IMPL ::AliRoot::Vc::ScalarImpl
428#elif defined(VC_IMPL_AVX)
429#define VC_IMPL ::AliRoot::Vc::AVXImpl
430#elif defined(VC_IMPL_SSE4_2)
431#define VC_IMPL ::AliRoot::Vc::SSE42Impl
432#elif defined(VC_IMPL_SSE4_1)
433#define VC_IMPL ::AliRoot::Vc::SSE41Impl
434#elif defined(VC_IMPL_SSSE3)
435#define VC_IMPL ::AliRoot::Vc::SSSE3Impl
436#elif defined(VC_IMPL_SSE3)
437#define VC_IMPL ::AliRoot::Vc::SSE3Impl
438#elif defined(VC_IMPL_SSE2)
439#define VC_IMPL ::AliRoot::Vc::SSE2Impl
440#endif
441
442template<unsigned int Features> struct ImplementationT { enum _Value {
443 Value = Features,
444 Implementation = Features & Vc::ImplementationMask,
445 ExtraInstructions = Features & Vc::ExtraInstructionsMask
446}; };
447
448typedef ImplementationT<
449#ifdef VC_USE_VEX_CODING
450 // everything will use VEX coding, so the system has to support AVX even if VC_IMPL_AVX is not set
451 // but AFAIU the OSXSAVE and xgetbv tests do not have to positive (unless, of course, the
452 // compiler decides to insert an instruction that uses the full register size - so better be on
453 // the safe side)
454 AVXImpl
455#else
456 VC_IMPL
457#endif
458#ifdef VC_IMPL_SSE4a
459 + Vc::Sse4aInstructions
460#ifdef VC_IMPL_XOP
461 + Vc::XopInstructions
462#ifdef VC_IMPL_FMA4
463 + Vc::Fma4Instructions
464#endif
465#endif
466#endif
467#ifdef VC_IMPL_POPCNT
468 + Vc::PopcntInstructions
469#endif
470#ifdef VC_IMPL_FMA
471 + Vc::FmaInstructions
f22341db 472#endif
c017a39f 473 > CurrentImplementation;
f22341db 474
475namespace Internal {
476 template<Implementation Impl> struct HelperImpl;
477 typedef HelperImpl<VC_IMPL> Helper;
478
479 template<typename A> struct FlagObject;
c017a39f 480 template<> struct FlagObject<AlignedFlag> { Vc_CONSTEXPR AlignedFlag the() { return Aligned; } };
481 template<> struct FlagObject<UnalignedFlag> { Vc_CONSTEXPR UnalignedFlag the() { return Unaligned; } };
482 template<> struct FlagObject<StreamingAndAlignedFlag> { Vc_CONSTEXPR StreamingAndAlignedFlag the() { return Streaming; } };
483 template<> struct FlagObject<StreamingAndUnalignedFlag> { Vc_CONSTEXPR StreamingAndUnalignedFlag the() { return StreamingAndUnaligned; } };
f22341db 484} // namespace Internal
485
486namespace Warnings
487{
488 void _operator_bracket_warning()
c017a39f 489#ifdef VC_HAVE_ATTRIBUTE_WARNING
f22341db 490 __attribute__((warning("\n\tUse of Vc::Vector::operator[] to modify scalar entries is known to miscompile with GCC 4.3.x.\n\tPlease upgrade to a more recent GCC or avoid operator[] altogether.\n\t(This warning adds an unnecessary function call to operator[] which should work around the problem at a little extra cost.)")))
491#endif
492 ;
493} // namespace Warnings
494
495namespace Error
496{
497 template<typename L, typename R> struct invalid_operands_of_types {};
498} // namespace Error
499
c017a39f 500#endif // DOXYGEN
f22341db 501} // namespace Vc
c017a39f 502} // namespace AliRoot
f22341db 503
c017a39f 504#undef Vc_CONSTEXPR
f22341db 505#include "version.h"
506
507#endif // VC_GLOBAL_H