#define SSE_MASK_H
#include "intrinsics.h"
+#include "macros.h"
+namespace AliRoot {
namespace Vc
{
namespace SSE
template<unsigned int Size1> struct MaskHelper;
template<> struct MaskHelper<2> {
- static inline bool cmpeq (_M128 k1, _M128 k2) { return _mm_movemask_pd(_mm_castps_pd(k1)) == _mm_movemask_pd(_mm_castps_pd(k2)); }
- static inline bool cmpneq(_M128 k1, _M128 k2) { return _mm_movemask_pd(_mm_castps_pd(k1)) != _mm_movemask_pd(_mm_castps_pd(k2)); }
+ static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq (_M128 k1, _M128 k2) { return _mm_movemask_pd(_mm_castps_pd(k1)) == _mm_movemask_pd(_mm_castps_pd(k2)); }
+ static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2) { return _mm_movemask_pd(_mm_castps_pd(k1)) != _mm_movemask_pd(_mm_castps_pd(k2)); }
};
template<> struct MaskHelper<4> {
- static inline bool cmpeq (_M128 k1, _M128 k2) { return _mm_movemask_ps(k1) == _mm_movemask_ps(k2); }
- static inline bool cmpneq(_M128 k1, _M128 k2) { return _mm_movemask_ps(k1) != _mm_movemask_ps(k2); }
+ static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq (_M128 k1, _M128 k2) { return _mm_movemask_ps(k1) == _mm_movemask_ps(k2); }
+ static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2) { return _mm_movemask_ps(k1) != _mm_movemask_ps(k2); }
};
template<> struct MaskHelper<8> {
- static inline bool cmpeq (_M128 k1, _M128 k2) { return _mm_movemask_epi8(_mm_castps_si128(k1)) == _mm_movemask_epi8(_mm_castps_si128(k2)); }
- static inline bool cmpneq(_M128 k1, _M128 k2) { return _mm_movemask_epi8(_mm_castps_si128(k1)) != _mm_movemask_epi8(_mm_castps_si128(k2)); }
+ static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq (_M128 k1, _M128 k2) { return _mm_movemask_epi8(_mm_castps_si128(k1)) == _mm_movemask_epi8(_mm_castps_si128(k2)); }
+ static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2) { return _mm_movemask_epi8(_mm_castps_si128(k1)) != _mm_movemask_epi8(_mm_castps_si128(k2)); }
};
class Float8Mask;
typedef Mask<VectorSize> Argument;
#endif
- inline Mask() {}
- inline Mask(const __m128 &x) : k(x) {}
- inline Mask(const __m128d &x) : k(_mm_castpd_ps(x)) {}
- inline Mask(const __m128i &x) : k(_mm_castsi128_ps(x)) {}
- inline explicit Mask(VectorSpecialInitializerZero::ZEnum) : k(_mm_setzero_ps()) {}
- inline explicit Mask(VectorSpecialInitializerOne::OEnum) : k(_mm_setallone_ps()) {}
- inline explicit Mask(bool b) : k(b ? _mm_setallone_ps() : _mm_setzero_ps()) {}
- inline Mask(const Mask &rhs) : k(rhs.k) {}
- inline Mask(const Mask<VectorSize / 2> *a)
+ Vc_ALWAYS_INLINE Mask() {}
+ Vc_ALWAYS_INLINE Mask(const __m128 &x) : k(x) {}
+ Vc_ALWAYS_INLINE Mask(const __m128d &x) : k(_mm_castpd_ps(x)) {}
+ Vc_ALWAYS_INLINE Mask(const __m128i &x) : k(_mm_castsi128_ps(x)) {}
+ Vc_ALWAYS_INLINE explicit Mask(VectorSpecialInitializerZero::ZEnum) : k(_mm_setzero_ps()) {}
+ Vc_ALWAYS_INLINE explicit Mask(VectorSpecialInitializerOne::OEnum) : k(_mm_setallone_ps()) {}
+ Vc_ALWAYS_INLINE explicit Mask(bool b) : k(b ? _mm_setallone_ps() : _mm_setzero_ps()) {}
+ Vc_ALWAYS_INLINE Mask(const Mask &rhs) : k(rhs.k) {}
+ Vc_ALWAYS_INLINE Mask(const Mask<VectorSize / 2> *a)
: k(_mm_castsi128_ps(_mm_packs_epi16(a[0].dataI(), a[1].dataI()))) {}
- inline explicit Mask(const Float8Mask &m);
+ Vc_ALWAYS_INLINE explicit Mask(const Float8Mask &m);
- template<unsigned int OtherSize> explicit Mask(const Mask<OtherSize> &x);
+ template<unsigned int OtherSize> Vc_ALWAYS_INLINE_L explicit Mask(const Mask<OtherSize> &x) Vc_ALWAYS_INLINE_R;
//X {
//X _M128I tmp = x.dataI();
//X if (OtherSize < VectorSize) {
//X k = _mm_castsi128_ps(tmp);
//X }
- void expand(Mask<VectorSize / 2> *x) const;
+ inline void expand(Mask<VectorSize / 2> *x) const;
- inline bool operator==(const Mask &rhs) const { return MaskHelper<VectorSize>::cmpeq (k, rhs.k); }
- inline bool operator!=(const Mask &rhs) const { return MaskHelper<VectorSize>::cmpneq(k, rhs.k); }
+ Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Mask &rhs) const { return MaskHelper<VectorSize>::cmpeq (k, rhs.k); }
+ Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Mask &rhs) const { return MaskHelper<VectorSize>::cmpneq(k, rhs.k); }
- inline Mask operator&&(const Mask &rhs) const { return _mm_and_ps(k, rhs.k); }
- inline Mask operator& (const Mask &rhs) const { return _mm_and_ps(k, rhs.k); }
- inline Mask operator||(const Mask &rhs) const { return _mm_or_ps (k, rhs.k); }
- inline Mask operator| (const Mask &rhs) const { return _mm_or_ps (k, rhs.k); }
- inline Mask operator^ (const Mask &rhs) const { return _mm_xor_ps(k, rhs.k); }
- inline Mask operator!() const { return _mm_andnot_si128(dataI(), _mm_setallone_si128()); }
+ Vc_ALWAYS_INLINE Vc_PURE Mask operator!() const { return _mm_andnot_si128(dataI(), _mm_setallone_si128()); }
- inline Mask &operator&=(const Mask &rhs) { k = _mm_and_ps(k, rhs.k); return *this; }
- inline Mask &operator|=(const Mask &rhs) { k = _mm_or_ps (k, rhs.k); return *this; }
+ Vc_ALWAYS_INLINE Mask &operator&=(const Mask &rhs) { k = _mm_and_ps(k, rhs.k); return *this; }
+ Vc_ALWAYS_INLINE Mask &operator|=(const Mask &rhs) { k = _mm_or_ps (k, rhs.k); return *this; }
+ Vc_ALWAYS_INLINE Mask &operator^=(const Mask &rhs) { k = _mm_xor_ps(k, rhs.k); return *this; }
- inline bool isFull () const { return
+ Vc_ALWAYS_INLINE Vc_PURE bool isFull () const { return
#ifdef VC_USE_PTEST
_mm_testc_si128(dataI(), _mm_setallone_si128()); // return 1 if (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff) == (~0 & k)
#else
_mm_movemask_epi8(dataI()) == 0xffff;
#endif
}
- inline bool isEmpty() const { return
+ Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const { return
#ifdef VC_USE_PTEST
_mm_testz_si128(dataI(), dataI()); // return 1 if (0, 0, 0, 0) == (k & k)
#else
_mm_movemask_epi8(dataI()) == 0x0000;
#endif
}
- inline bool isMix() const {
+ Vc_ALWAYS_INLINE Vc_PURE bool isMix() const {
#ifdef VC_USE_PTEST
return _mm_test_mix_ones_zeros(dataI(), _mm_setallone_si128());
#else
}
#ifndef VC_NO_AUTOMATIC_BOOL_FROM_MASK
- inline operator bool() const { return isFull(); }
+ Vc_ALWAYS_INLINE Vc_PURE operator bool() const { return isFull(); }
#endif
- inline int CONST_L shiftMask() const CONST_R;
+ Vc_ALWAYS_INLINE_L Vc_PURE_L int shiftMask() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
- int CONST_L toInt() const CONST_R;
+ Vc_ALWAYS_INLINE_L Vc_PURE_L int toInt() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
- inline _M128 data () const { return k; }
- inline _M128I dataI() const { return _mm_castps_si128(k); }
- inline _M128D dataD() const { return _mm_castps_pd(k); }
+ Vc_ALWAYS_INLINE Vc_PURE _M128 data () const { return k; }
+ Vc_ALWAYS_INLINE Vc_PURE _M128I dataI() const { return _mm_castps_si128(k); }
+ Vc_ALWAYS_INLINE Vc_PURE _M128D dataD() const { return _mm_castps_pd(k); }
- template<unsigned int OtherSize> inline Mask<OtherSize> cast() const { return Mask<OtherSize>(k); }
+ template<unsigned int OtherSize> Vc_ALWAYS_INLINE Vc_PURE Mask<OtherSize> cast() const { return Mask<OtherSize>(k); }
- bool operator[](int index) const;
+ Vc_ALWAYS_INLINE_L Vc_PURE_L bool operator[](int index) const Vc_ALWAYS_INLINE_R Vc_PURE_R;
- int count() const;
+ Vc_ALWAYS_INLINE_L Vc_PURE_L int count() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
/**
* Returns the index of the first one in the mask.
*
* The return value is undefined if the mask is empty.
*/
- int firstOne() const;
+ Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
private:
+#ifdef VC_COMPILE_BENCHMARKS
+ public:
+#endif
_M128 k;
};
{
_long mask;
bool brk;
- inline ForeachHelper(_long _mask) : mask(_mask), brk(false) {}
- inline bool outer() const { return mask != 0; }
- inline bool inner() { return (brk = !brk); }
- inline _long next() {
+ bool outerBreak;
+ Vc_ALWAYS_INLINE ForeachHelper(_long _mask) : mask(_mask), brk(false), outerBreak(false) {}
+ Vc_ALWAYS_INLINE bool outer() const { return (mask != 0) && !outerBreak; }
+ Vc_ALWAYS_INLINE bool inner() { return (brk = !brk); }
+ Vc_ALWAYS_INLINE void noBreak() { outerBreak = false; }
+ Vc_ALWAYS_INLINE _long next() {
+ outerBreak = true;
#ifdef VC_GNU_ASM
const _long bit = __builtin_ctzl(mask);
__asm__("btr %1,%0" : "+r"(mask) : "r"(bit));
};
#define Vc_foreach_bit(_it_, _mask_) \
- for (Vc::SSE::ForeachHelper _Vc_foreach_bit_helper((_mask_).toInt()); _Vc_foreach_bit_helper.outer(); ) \
- for (_it_ = _Vc_foreach_bit_helper.next(); _Vc_foreach_bit_helper.inner(); )
+ for (Vc::SSE::ForeachHelper Vc__make_unique(foreach_bit_obj)((_mask_).toInt()); Vc__make_unique(foreach_bit_obj).outer(); ) \
+ for (_it_ = Vc__make_unique(foreach_bit_obj).next(); Vc__make_unique(foreach_bit_obj).inner(); Vc__make_unique(foreach_bit_obj).noBreak())
-template<unsigned int Size> inline int Mask<Size>::shiftMask() const
+template<unsigned int Size> Vc_ALWAYS_INLINE Vc_PURE int Mask<Size>::shiftMask() const
{
return _mm_movemask_epi8(dataI());
}
-template<> template<> inline Mask<2>::Mask(const Mask<4> &x) {
+template<> template<> Vc_ALWAYS_INLINE Mask<2>::Mask(const Mask<4> &x) {
k = _mm_unpacklo_ps(x.data(), x.data());
}
-template<> template<> inline Mask<2>::Mask(const Mask<8> &x) {
+template<> template<> Vc_ALWAYS_INLINE Mask<2>::Mask(const Mask<8> &x) {
_M128I tmp = _mm_unpacklo_epi16(x.dataI(), x.dataI());
k = _mm_castsi128_ps(_mm_unpacklo_epi32(tmp, tmp));
}
-template<> template<> inline Mask<2>::Mask(const Mask<16> &x) {
+template<> template<> Vc_ALWAYS_INLINE Mask<2>::Mask(const Mask<16> &x) {
_M128I tmp = _mm_unpacklo_epi8(x.dataI(), x.dataI());
tmp = _mm_unpacklo_epi16(tmp, tmp);
k = _mm_castsi128_ps(_mm_unpacklo_epi32(tmp, tmp));
}
-template<> template<> inline Mask<4>::Mask(const Mask<2> &x) {
+template<> template<> Vc_ALWAYS_INLINE Mask<4>::Mask(const Mask<2> &x) {
k = _mm_castsi128_ps(_mm_packs_epi16(x.dataI(), _mm_setzero_si128()));
}
-template<> template<> inline Mask<4>::Mask(const Mask<8> &x) {
+template<> template<> Vc_ALWAYS_INLINE Mask<4>::Mask(const Mask<8> &x) {
k = _mm_castsi128_ps(_mm_unpacklo_epi16(x.dataI(), x.dataI()));
}
-template<> template<> inline Mask<4>::Mask(const Mask<16> &x) {
+template<> template<> Vc_ALWAYS_INLINE Mask<4>::Mask(const Mask<16> &x) {
_M128I tmp = _mm_unpacklo_epi8(x.dataI(), x.dataI());
k = _mm_castsi128_ps(_mm_unpacklo_epi16(tmp, tmp));
}
-template<> template<> inline Mask<8>::Mask(const Mask<2> &x) {
+template<> template<> Vc_ALWAYS_INLINE Mask<8>::Mask(const Mask<2> &x) {
_M128I tmp = _mm_packs_epi16(x.dataI(), x.dataI());
k = _mm_castsi128_ps(_mm_packs_epi16(tmp, tmp));
}
-template<> template<> inline Mask<8>::Mask(const Mask<4> &x) {
+template<> template<> Vc_ALWAYS_INLINE Mask<8>::Mask(const Mask<4> &x) {
k = _mm_castsi128_ps(_mm_packs_epi16(x.dataI(), x.dataI()));
}
-template<> template<> inline Mask<8>::Mask(const Mask<16> &x) {
+template<> template<> Vc_ALWAYS_INLINE Mask<8>::Mask(const Mask<16> &x) {
k = _mm_castsi128_ps(_mm_unpacklo_epi8(x.dataI(), x.dataI()));
}
x[1].k = _mm_castsi128_ps(_mm_unpackhi_epi8 (dataI(), dataI()));
}
-template<> inline int Mask< 2>::toInt() const { return _mm_movemask_pd(dataD()); }
-template<> inline int Mask< 4>::toInt() const { return _mm_movemask_ps(data ()); }
-template<> inline int Mask< 8>::toInt() const { return _mm_movemask_epi8(_mm_packs_epi16(dataI(), _mm_setzero_si128())); }
-template<> inline int Mask<16>::toInt() const { return _mm_movemask_epi8(dataI()); }
+template<> Vc_ALWAYS_INLINE Vc_PURE int Mask< 2>::toInt() const { return _mm_movemask_pd(dataD()); }
+template<> Vc_ALWAYS_INLINE Vc_PURE int Mask< 4>::toInt() const { return _mm_movemask_ps(data ()); }
+template<> Vc_ALWAYS_INLINE Vc_PURE int Mask< 8>::toInt() const { return _mm_movemask_epi8(_mm_packs_epi16(dataI(), _mm_setzero_si128())); }
+template<> Vc_ALWAYS_INLINE Vc_PURE int Mask<16>::toInt() const { return _mm_movemask_epi8(dataI()); }
-template<> inline bool Mask< 2>::operator[](int index) const { return toInt() & (1 << index); }
-template<> inline bool Mask< 4>::operator[](int index) const { return toInt() & (1 << index); }
-template<> inline bool Mask< 8>::operator[](int index) const { return shiftMask() & (1 << 2 * index); }
-template<> inline bool Mask<16>::operator[](int index) const { return toInt() & (1 << index); }
+template<> Vc_ALWAYS_INLINE Vc_PURE bool Mask< 2>::operator[](int index) const { return toInt() & (1 << index); }
+template<> Vc_ALWAYS_INLINE Vc_PURE bool Mask< 4>::operator[](int index) const { return toInt() & (1 << index); }
+template<> Vc_ALWAYS_INLINE Vc_PURE bool Mask< 8>::operator[](int index) const { return shiftMask() & (1 << 2 * index); }
+template<> Vc_ALWAYS_INLINE Vc_PURE bool Mask<16>::operator[](int index) const { return toInt() & (1 << index); }
-template<> inline int Mask<2>::count() const
+template<> Vc_ALWAYS_INLINE Vc_PURE int Mask<2>::count() const
{
int mask = _mm_movemask_pd(dataD());
return (mask & 1) + (mask >> 1);
}
-template<> inline int Mask<4>::count() const
+template<> Vc_ALWAYS_INLINE Vc_PURE int Mask<4>::count() const
{
-#ifdef VC_IMPL_SSE4_2
+#ifdef VC_IMPL_POPCNT
return _mm_popcnt_u32(_mm_movemask_ps(data()));
//X tmp = (tmp & 5) + ((tmp >> 1) & 5);
//X return (tmp & 3) + ((tmp >> 2) & 3);
#endif
}
-template<> inline int Mask<8>::count() const
+template<> Vc_ALWAYS_INLINE Vc_PURE int Mask<8>::count() const
{
-#ifdef VC_IMPL_SSE4_2
+#ifdef VC_IMPL_POPCNT
return _mm_popcnt_u32(_mm_movemask_epi8(dataI())) / 2;
#else
//X int tmp = _mm_movemask_epi8(dataI());
#endif
}
-template<> inline int Mask<16>::count() const
+template<> Vc_ALWAYS_INLINE Vc_PURE int Mask<16>::count() const
{
int tmp = _mm_movemask_epi8(dataI());
-#ifdef VC_IMPL_SSE4_2
+#ifdef VC_IMPL_POPCNT
return _mm_popcnt_u32(tmp);
#else
tmp = (tmp & 0x5555) + ((tmp >> 1) & 0x5555);
typedef Float8Mask Argument;
#endif
- inline Float8Mask() {}
- inline Float8Mask(const M256 &x) : k(x) {}
- inline explicit Float8Mask(VectorSpecialInitializerZero::ZEnum) {
+ Vc_ALWAYS_INLINE Float8Mask() {}
+ Vc_ALWAYS_INLINE Float8Mask(const M256 &x) : k(x) {}
+ Vc_ALWAYS_INLINE explicit Float8Mask(VectorSpecialInitializerZero::ZEnum) {
k[0] = _mm_setzero_ps();
k[1] = _mm_setzero_ps();
}
- inline explicit Float8Mask(VectorSpecialInitializerOne::OEnum) {
+ Vc_ALWAYS_INLINE explicit Float8Mask(VectorSpecialInitializerOne::OEnum) {
k[0] = _mm_setallone_ps();
k[1] = _mm_setallone_ps();
}
- inline explicit Float8Mask(bool b) {
+ Vc_ALWAYS_INLINE explicit Float8Mask(bool b) {
const __m128 tmp = b ? _mm_setallone_ps() : _mm_setzero_ps();
k[0] = tmp;
k[1] = tmp;
}
- inline Float8Mask(const Mask<VectorSize> &a) {
+ Vc_ALWAYS_INLINE Float8Mask(const Mask<VectorSize> &a) {
k[0] = _mm_castsi128_ps(_mm_unpacklo_epi16(a.dataI(), a.dataI()));
k[1] = _mm_castsi128_ps(_mm_unpackhi_epi16(a.dataI(), a.dataI()));
}
- inline bool operator==(const Float8Mask &rhs) const {
+ Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Float8Mask &rhs) const {
return MaskHelper<PartialSize>::cmpeq (k[0], rhs.k[0])
&& MaskHelper<PartialSize>::cmpeq (k[1], rhs.k[1]);
}
- inline bool operator!=(const Float8Mask &rhs) const {
+ Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Float8Mask &rhs) const {
return MaskHelper<PartialSize>::cmpneq(k[0], rhs.k[0])
- && MaskHelper<PartialSize>::cmpneq(k[1], rhs.k[1]);
+ || MaskHelper<PartialSize>::cmpneq(k[1], rhs.k[1]);
}
- inline Float8Mask operator&&(const Float8Mask &rhs) const {
+ Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator&&(const Float8Mask &rhs) const {
Float8Mask r;
r.k[0] = _mm_and_ps(k[0], rhs.k[0]);
r.k[1] = _mm_and_ps(k[1], rhs.k[1]);
return r;
}
- inline Float8Mask operator& (const Float8Mask &rhs) const {
+ Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator& (const Float8Mask &rhs) const {
Float8Mask r;
r.k[0] = _mm_and_ps(k[0], rhs.k[0]);
r.k[1] = _mm_and_ps(k[1], rhs.k[1]);
return r;
}
- inline Float8Mask operator||(const Float8Mask &rhs) const {
+ Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator||(const Float8Mask &rhs) const {
Float8Mask r;
r.k[0] = _mm_or_ps(k[0], rhs.k[0]);
r.k[1] = _mm_or_ps(k[1], rhs.k[1]);
return r;
}
- inline Float8Mask operator| (const Float8Mask &rhs) const {
+ Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator| (const Float8Mask &rhs) const {
Float8Mask r;
r.k[0] = _mm_or_ps(k[0], rhs.k[0]);
r.k[1] = _mm_or_ps(k[1], rhs.k[1]);
return r;
}
- inline Float8Mask operator^ (const Float8Mask &rhs) const {
+ Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator^ (const Float8Mask &rhs) const {
Float8Mask r;
r.k[0] = _mm_xor_ps(k[0], rhs.k[0]);
r.k[1] = _mm_xor_ps(k[1], rhs.k[1]);
return r;
}
- inline Float8Mask operator!() const {
+ Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator!() const {
Float8Mask r;
r.k[0] = _mm_andnot_ps(k[0], _mm_setallone_ps());
r.k[1] = _mm_andnot_ps(k[1], _mm_setallone_ps());
return r;
}
- inline Float8Mask &operator&=(const Float8Mask &rhs) {
+ Vc_ALWAYS_INLINE Float8Mask &operator&=(const Float8Mask &rhs) {
k[0] = _mm_and_ps(k[0], rhs.k[0]);
k[1] = _mm_and_ps(k[1], rhs.k[1]);
return *this;
}
- inline Float8Mask &operator|=(const Float8Mask &rhs) {
+ Vc_ALWAYS_INLINE Float8Mask &operator|=(const Float8Mask &rhs) {
k[0] = _mm_or_ps (k[0], rhs.k[0]);
k[1] = _mm_or_ps (k[1], rhs.k[1]);
return *this;
}
+ Vc_ALWAYS_INLINE Float8Mask &operator^=(const Float8Mask &rhs) {
+ k[0] = _mm_xor_ps(k[0], rhs.k[0]);
+ k[1] = _mm_xor_ps(k[1], rhs.k[1]);
+ return *this;
+ }
- inline bool isFull () const {
+ Vc_ALWAYS_INLINE Vc_PURE bool isFull () const {
const _M128 tmp = _mm_and_ps(k[0], k[1]);
#ifdef VC_USE_PTEST
return _mm_testc_si128(_mm_castps_si128(tmp), _mm_setallone_si128());
//_mm_movemask_ps(k[1]) == 0xf;
#endif
}
- inline bool isEmpty() const {
+ Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const {
const _M128 tmp = _mm_or_ps(k[0], k[1]);
#ifdef VC_USE_PTEST
return _mm_testz_si128(_mm_castps_si128(tmp), _mm_castps_si128(tmp));
//_mm_movemask_ps(k[1]) == 0x0;
#endif
}
- inline bool isMix() const {
+ Vc_ALWAYS_INLINE Vc_PURE bool isMix() const {
+ // consider [1111 0000]
+ // solution:
+ // if k[0] != k[1] => return true
+ // if k[0] == k[1] => return k[0].isMix
#ifdef VC_USE_PTEST
- return _mm_test_mix_ones_zeros(_mm_castps_si128(k[0]), _mm_castps_si128(k[0])) &&
- _mm_test_mix_ones_zeros(_mm_castps_si128(k[1]), _mm_castps_si128(k[1]));
+ __m128i tmp = _mm_castps_si128(_mm_xor_ps(k[0], k[1]));
+ // tmp == 0 <=> k[0] == k[1]
+ return !_mm_testz_si128(tmp, tmp) ||
+ _mm_test_mix_ones_zeros(_mm_castps_si128(k[0]), _mm_setallone_si128());
#else
const int tmp = _mm_movemask_ps(k[0]) + _mm_movemask_ps(k[1]);
return tmp > 0x0 && tmp < (0xf + 0xf);
}
#ifndef VC_NO_AUTOMATIC_BOOL_FROM_MASK
- inline operator bool() const { return isFull(); }
+ Vc_ALWAYS_INLINE Vc_PURE operator bool() const { return isFull(); }
#endif
- inline int shiftMask() const {
+ Vc_ALWAYS_INLINE Vc_PURE int shiftMask() const {
return (_mm_movemask_ps(k[1]) << 4) + _mm_movemask_ps(k[0]);
}
- inline int toInt() const { return (_mm_movemask_ps(k[1]) << 4) + _mm_movemask_ps(k[0]); }
+ Vc_ALWAYS_INLINE Vc_PURE int toInt() const { return (_mm_movemask_ps(k[1]) << 4) + _mm_movemask_ps(k[0]); }
- inline const M256 &data () const { return k; }
+ Vc_ALWAYS_INLINE Vc_PURE const M256 &data () const { return k; }
- inline bool operator[](int index) const {
+ Vc_ALWAYS_INLINE Vc_PURE bool operator[](int index) const {
return (toInt() & (1 << index)) != 0;
}
- inline int count() const {
-#ifdef VC_IMPL_SSE4_2
+ Vc_ALWAYS_INLINE Vc_PURE int count() const {
+#ifdef VC_IMPL_POPCNT
return _mm_popcnt_u32(toInt());
#else
//X int tmp1 = _mm_movemask_ps(k[0]);
#endif
}
- int firstOne() const;
+ Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
private:
+#ifdef VC_COMPILE_BENCHMARKS
+ public:
+#endif
M256 k;
};
-template<unsigned int Size> inline int Mask<Size>::firstOne() const
+template<unsigned int Size> Vc_ALWAYS_INLINE Vc_PURE int Mask<Size>::firstOne() const
{
const int mask = toInt();
#ifdef _MSC_VER
#endif
return bit;
}
-inline int Float8Mask::firstOne() const
+Vc_ALWAYS_INLINE Vc_PURE int Float8Mask::firstOne() const
{
const int mask = toInt();
#ifdef _MSC_VER
}
template<unsigned int VectorSize>
-inline Mask<VectorSize>::Mask(const Float8Mask &m)
+Vc_ALWAYS_INLINE Mask<VectorSize>::Mask(const Float8Mask &m)
: k(_mm_castsi128_ps(_mm_packs_epi32(_mm_castps_si128(m.data()[0]), _mm_castps_si128(m.data()[1])))) {}
class Float8GatherMask
//X _sse_vector_foreach_it = _sse_bitscan_initialized(_sse_vector_foreach_it, mask.data()))
//X for (int _sse_vector_foreach_inner = 1, it = _sse_vector_foreach_it; _sse_vector_foreach_inner; --_sse_vector_foreach_inner)
+// Operators
+// let binary and/or/xor work for any combination of masks (as long as they have the same sizeof)
+template<unsigned int LSize, unsigned int RSize> Mask<LSize> operator& (const Mask<LSize> &lhs, const Mask<RSize> &rhs) { return _mm_and_ps(lhs.data(), rhs.data()); }
+template<unsigned int LSize, unsigned int RSize> Mask<LSize> operator| (const Mask<LSize> &lhs, const Mask<RSize> &rhs) { return _mm_or_ps (lhs.data(), rhs.data()); }
+template<unsigned int LSize, unsigned int RSize> Mask<LSize> operator^ (const Mask<LSize> &lhs, const Mask<RSize> &rhs) { return _mm_xor_ps(lhs.data(), rhs.data()); }
+
+// binary and/or/xor cannot work with one operand larger than the other
+template<unsigned int Size> void operator& (const Mask<Size> &lhs, const Float8Mask &rhs);
+template<unsigned int Size> void operator| (const Mask<Size> &lhs, const Float8Mask &rhs);
+template<unsigned int Size> void operator^ (const Mask<Size> &lhs, const Float8Mask &rhs);
+template<unsigned int Size> void operator& (const Float8Mask &rhs, const Mask<Size> &lhs);
+template<unsigned int Size> void operator| (const Float8Mask &rhs, const Mask<Size> &lhs);
+template<unsigned int Size> void operator^ (const Float8Mask &rhs, const Mask<Size> &lhs);
+
+// disable logical and/or for incompatible masks
+template<unsigned int LSize, unsigned int RSize> void operator&&(const Mask<LSize> &lhs, const Mask<RSize> &rhs);
+template<unsigned int LSize, unsigned int RSize> void operator||(const Mask<LSize> &lhs, const Mask<RSize> &rhs);
+template<unsigned int Size> void operator&&(const Mask<Size> &lhs, const Float8Mask &rhs);
+template<unsigned int Size> void operator||(const Mask<Size> &lhs, const Float8Mask &rhs);
+template<unsigned int Size> void operator&&(const Float8Mask &rhs, const Mask<Size> &lhs);
+template<unsigned int Size> void operator||(const Float8Mask &rhs, const Mask<Size> &lhs);
+
+// logical and/or for compatible masks
+template<unsigned int Size> Vc_ALWAYS_INLINE Vc_PURE Mask<Size> operator&&(const Mask<Size> &lhs, const Mask<Size> &rhs) { return _mm_and_ps(lhs.data(), rhs.data()); }
+template<unsigned int Size> Vc_ALWAYS_INLINE Vc_PURE Mask<Size> operator||(const Mask<Size> &lhs, const Mask<Size> &rhs) { return _mm_or_ps (lhs.data(), rhs.data()); }
+Vc_ALWAYS_INLINE Vc_PURE Mask<8> operator&&(const Float8Mask &rhs, const Mask<8> &lhs) { return static_cast<Mask<8> >(rhs) && lhs; }
+Vc_ALWAYS_INLINE Vc_PURE Mask<8> operator||(const Float8Mask &rhs, const Mask<8> &lhs) { return static_cast<Mask<8> >(rhs) || lhs; }
+Vc_ALWAYS_INLINE Vc_PURE Mask<8> operator&&(const Mask<8> &rhs, const Float8Mask &lhs) { return rhs && static_cast<Mask<8> >(lhs); }
+Vc_ALWAYS_INLINE Vc_PURE Mask<8> operator||(const Mask<8> &rhs, const Float8Mask &lhs) { return rhs || static_cast<Mask<8> >(lhs); }
+
} // namespace SSE
} // namespace Vc
+} // namespace AliRoot
+
+#include "undomacros.h"
#endif // SSE_MASK_H