]> git.uio.no Git - u/mrichter/AliRoot.git/blame - Vc/include/Vc/sse/vector.h
Vc package added (version 0.6.79-dev)
[u/mrichter/AliRoot.git] / Vc / include / Vc / sse / vector.h
CommitLineData
f22341db 1/* This file is part of the Vc library.
2
3 Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
4
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
9
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17
18*/
19
20#ifndef SSE_VECTOR_H
21#define SSE_VECTOR_H
22
23#include "intrinsics.h"
24#include "types.h"
25#include "vectorhelper.h"
26#include "mask.h"
27#include "../common/aliasingentryhelper.h"
28#include "../common/memoryfwd.h"
29#include <algorithm>
30#include <cmath>
31
32#include "macros.h"
33
34#ifdef isfinite
35#undef isfinite
36#endif
37#ifdef isnan
38#undef isnan
39#endif
40
41namespace Vc
42{
43namespace SSE
44{
45template<typename T>
46class WriteMaskedVector
47{
48 friend class Vector<T>;
49 typedef typename VectorTraits<T>::MaskType Mask;
50 typedef typename Vector<T>::EntryType EntryType;
51 public:
52 FREE_STORE_OPERATORS_ALIGNED(16)
53 //prefix
54 inline INTRINSIC Vector<T> &operator++() {
55 vec->data() = VectorHelper<T>::add(vec->data(),
56 VectorHelper<T>::notMaskedToZero(VectorHelper<T>::one(), mask.data())
57 );
58 return *vec;
59 }
60 inline INTRINSIC Vector<T> &operator--() {
61 vec->data() = VectorHelper<T>::sub(vec->data(),
62 VectorHelper<T>::notMaskedToZero(VectorHelper<T>::one(), mask.data())
63 );
64 return *vec;
65 }
66 //postfix
67 inline INTRINSIC Vector<T> operator++(int) {
68 Vector<T> ret(*vec);
69 vec->data() = VectorHelper<T>::add(vec->data(),
70 VectorHelper<T>::notMaskedToZero(VectorHelper<T>::one(), mask.data())
71 );
72 return ret;
73 }
74 inline INTRINSIC Vector<T> operator--(int) {
75 Vector<T> ret(*vec);
76 vec->data() = VectorHelper<T>::sub(vec->data(),
77 VectorHelper<T>::notMaskedToZero(VectorHelper<T>::one(), mask.data())
78 );
79 return ret;
80 }
81
82 inline INTRINSIC Vector<T> &operator+=(const Vector<T> &x) {
83 vec->data() = VectorHelper<T>::add(vec->data(), VectorHelper<T>::notMaskedToZero(x.data(), mask.data()));
84 return *vec;
85 }
86 inline INTRINSIC Vector<T> &operator-=(const Vector<T> &x) {
87 vec->data() = VectorHelper<T>::sub(vec->data(), VectorHelper<T>::notMaskedToZero(x.data(), mask.data()));
88 return *vec;
89 }
90 inline INTRINSIC Vector<T> &operator*=(const Vector<T> &x) {
91 vec->data() = VectorHelper<T>::mul(vec->data(), x.data(), mask.data());
92 return *vec;
93 }
94 inline INTRINSIC CONST Vector<T> &operator/=(const Vector<T> &x);
95
96 inline INTRINSIC Vector<T> &operator+=(EntryType x) {
97 return operator+=(Vector<T>(x));
98 }
99 inline INTRINSIC Vector<T> &operator-=(EntryType x) {
100 return operator-=(Vector<T>(x));
101 }
102 inline INTRINSIC Vector<T> &operator*=(EntryType x) {
103 return operator*=(Vector<T>(x));
104 }
105 inline INTRINSIC Vector<T> &operator/=(EntryType x) {
106 return operator/=(Vector<T>(x));
107 }
108
109 inline INTRINSIC Vector<T> &operator=(const Vector<T> &x) {
110 vec->assign(x, mask);
111 return *vec;
112 }
113
114 inline INTRINSIC Vector<T> &operator=(EntryType x) {
115 vec->assign(Vector<T>(x), mask);
116 return *vec;
117 }
118
119 template<typename F> inline void INTRINSIC call(const F &f) const {
120 return vec->call(f, mask);
121 }
122 template<typename F> inline void INTRINSIC call(F &f) const {
123 return vec->call(f, mask);
124 }
125 template<typename F> inline Vector<T> INTRINSIC apply(const F &f) const {
126 return vec->apply(f, mask);
127 }
128 template<typename F> inline Vector<T> INTRINSIC apply(F &f) const {
129 return vec->apply(f, mask);
130 }
131
132 private:
133 WriteMaskedVector(Vector<T> *v, const Mask &k) : vec(v), mask(k) {}
134 Vector<T> *const vec;
135 Mask mask;
136};
137
138template<typename T> class Vector
139{
140 friend class WriteMaskedVector<T>;
141 protected:
142 typedef typename VectorTraits<T>::StorageType StorageType;
143 StorageType d;
144 typedef typename VectorTraits<T>::GatherMaskType GatherMask;
145 typedef VectorHelper<typename VectorTraits<T>::VectorType> HV;
146 typedef VectorHelper<T> HT;
147 public:
148 FREE_STORE_OPERATORS_ALIGNED(16)
149
150 enum Constants { Size = VectorTraits<T>::Size };
151 typedef typename VectorTraits<T>::VectorType VectorType;
152 typedef typename VectorTraits<T>::EntryType EntryType;
153 typedef typename VectorTraits<T>::IndexType IndexType;
154 typedef typename VectorTraits<T>::MaskType Mask;
155 typedef typename Mask::Argument MaskArg;
156 typedef Vc::Memory<Vector<T>, Size> Memory;
157#ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
158 typedef const Vector<T> &AsArg;
159#else
160 typedef const Vector<T> AsArg;
161#endif
162
163 typedef T _T;
164
165 ///////////////////////////////////////////////////////////////////////////////////////////
166 // uninitialized
167 inline Vector() {}
168
169 ///////////////////////////////////////////////////////////////////////////////////////////
170 // constants
171 explicit inline INTRINSIC_L Vector(VectorSpecialInitializerZero::ZEnum) INTRINSIC_R;
172 explicit inline INTRINSIC_L Vector(VectorSpecialInitializerOne::OEnum) INTRINSIC_R;
173 explicit inline INTRINSIC_L Vector(VectorSpecialInitializerIndexesFromZero::IEnum) INTRINSIC_R;
174 static inline INTRINSIC_L Vector Zero() INTRINSIC_R;
175 static inline INTRINSIC_L Vector One() INTRINSIC_R;
176 static inline INTRINSIC_L Vector IndexesFromZero() INTRINSIC_R;
177 static inline INTRINSIC_L Vector Random() INTRINSIC_R;
178
179 ///////////////////////////////////////////////////////////////////////////////////////////
180 // internal: required to enable returning objects of VectorType
181 inline Vector(const VectorType &x) : d(x) {}
182
183 ///////////////////////////////////////////////////////////////////////////////////////////
184 // static_cast / copy ctor
185 template<typename OtherT> explicit inline INTRINSIC_L Vector(const Vector<OtherT> &x) INTRINSIC_R;
186
187 // implicit cast
188 template<typename OtherT> inline INTRINSIC_L Vector &operator=(const Vector<OtherT> &x) INTRINSIC_R;
189
190 // copy assignment
191 inline Vector &operator=(AsArg v) { d.v() = v.d.v(); return *this; }
192
193 ///////////////////////////////////////////////////////////////////////////////////////////
194 // broadcast
195 explicit Vector(EntryType a);
196 template<typename TT> inline INTRINSIC Vector(TT x, VC_EXACT_TYPE(TT, EntryType, void *) = 0) : d(HT::set(x)) {}
197 static inline Vector INTRINSIC broadcast4(const EntryType *x) { return Vector<T>(x); }
198 inline Vector &operator=(EntryType a) { d.v() = HT::set(a); return *this; }
199
200 ///////////////////////////////////////////////////////////////////////////////////////////
201 // load ctors
202 explicit inline INTRINSIC_L
203 Vector(const EntryType *x) INTRINSIC_R;
204 template<typename Alignment> inline INTRINSIC_L
205 Vector(const EntryType *x, Alignment align) INTRINSIC_R;
206 template<typename OtherT> explicit inline INTRINSIC_L
207 Vector(const OtherT *x) INTRINSIC_R;
208 template<typename OtherT, typename Alignment> inline INTRINSIC_L
209 Vector(const OtherT *x, Alignment align) INTRINSIC_R;
210
211 ///////////////////////////////////////////////////////////////////////////////////////////
212 // load member functions
213 inline INTRINSIC_L
214 void load(const EntryType *mem) INTRINSIC_R;
215 template<typename Alignment> inline INTRINSIC_L
216 void load(const EntryType *mem, Alignment align) INTRINSIC_R;
217 template<typename OtherT> inline INTRINSIC_L
218 void load(const OtherT *mem) INTRINSIC_R;
219 template<typename OtherT, typename Alignment> inline INTRINSIC_L
220 void load(const OtherT *mem, Alignment align) INTRINSIC_R;
221
222 ///////////////////////////////////////////////////////////////////////////////////////////
223 // expand 1 float_v to 2 double_v XXX rationale? remove it for release? XXX
224 explicit inline INTRINSIC_L Vector(const Vector<typename CtorTypeHelper<T>::Type> *a) INTRINSIC_R;
225 void expand(Vector<typename ExpandTypeHelper<T>::Type> *x) const;
226
227 ///////////////////////////////////////////////////////////////////////////////////////////
228 // zeroing
229 inline void INTRINSIC_L setZero() INTRINSIC_R;
230 inline void INTRINSIC_L setZero(const Mask &k) INTRINSIC_R;
231
232 inline void INTRINSIC_L setQnan() INTRINSIC_R;
233 inline void INTRINSIC_L setQnan(typename Mask::Argument k) INTRINSIC_R;
234
235 ///////////////////////////////////////////////////////////////////////////////////////////
236 // stores
237 inline void INTRINSIC_L store(EntryType *mem) const INTRINSIC_R;
238 inline void INTRINSIC_L store(EntryType *mem, const Mask &mask) const INTRINSIC_R;
239 template<typename A> inline void INTRINSIC_L store(EntryType *mem, A align) const INTRINSIC_R;
240 template<typename A> inline void INTRINSIC_L store(EntryType *mem, const Mask &mask, A align) const INTRINSIC_R;
241
242 ///////////////////////////////////////////////////////////////////////////////////////////
243 // swizzles
244 inline const Vector<T> INTRINSIC_L CONST_L &abcd() const INTRINSIC_R CONST_R;
245 inline const Vector<T> INTRINSIC_L CONST_L cdab() const INTRINSIC_R CONST_R;
246 inline const Vector<T> INTRINSIC_L CONST_L badc() const INTRINSIC_R CONST_R;
247 inline const Vector<T> INTRINSIC_L CONST_L aaaa() const INTRINSIC_R CONST_R;
248 inline const Vector<T> INTRINSIC_L CONST_L bbbb() const INTRINSIC_R CONST_R;
249 inline const Vector<T> INTRINSIC_L CONST_L cccc() const INTRINSIC_R CONST_R;
250 inline const Vector<T> INTRINSIC_L CONST_L dddd() const INTRINSIC_R CONST_R;
251 inline const Vector<T> INTRINSIC_L CONST_L bcad() const INTRINSIC_R CONST_R;
252 inline const Vector<T> INTRINSIC_L CONST_L bcda() const INTRINSIC_R CONST_R;
253 inline const Vector<T> INTRINSIC_L CONST_L dabc() const INTRINSIC_R CONST_R;
254 inline const Vector<T> INTRINSIC_L CONST_L acbd() const INTRINSIC_R CONST_R;
255 inline const Vector<T> INTRINSIC_L CONST_L dbca() const INTRINSIC_R CONST_R;
256 inline const Vector<T> INTRINSIC_L CONST_L dcba() const INTRINSIC_R CONST_R;
257
258 ///////////////////////////////////////////////////////////////////////////////////////////
259 // gathers
260 template<typename IndexT> Vector(const EntryType *mem, const IndexT *indexes);
261 template<typename IndexT> Vector(const EntryType *mem, const Vector<IndexT> indexes);
262 template<typename IndexT> Vector(const EntryType *mem, const IndexT *indexes, MaskArg mask);
263 template<typename IndexT> Vector(const EntryType *mem, const Vector<IndexT> indexes, MaskArg mask);
264 template<typename S1, typename IT> Vector(const S1 *array, const EntryType S1::* member1, const IT indexes);
265 template<typename S1, typename IT> Vector(const S1 *array, const EntryType S1::* member1, const IT indexes, MaskArg mask);
266 template<typename S1, typename S2, typename IT> Vector(const S1 *array, const S2 S1::* member1, const EntryType S2::* member2, const IT indexes);
267 template<typename S1, typename S2, typename IT> Vector(const S1 *array, const S2 S1::* member1, const EntryType S2::* member2, const IT indexes, MaskArg mask);
268 template<typename S1, typename IT1, typename IT2> Vector(const S1 *array, const EntryType *const S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes);
269 template<typename S1, typename IT1, typename IT2> Vector(const S1 *array, const EntryType *const S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes, MaskArg mask);
270 template<typename Index> void gather(const EntryType *mem, const Index indexes);
271 template<typename Index> void gather(const EntryType *mem, const Index indexes, MaskArg mask);
272#ifdef VC_USE_SET_GATHERS
273 template<typename IT> void gather(const EntryType *mem, Vector<IT> indexes, MaskArg mask);
274#endif
275 template<typename S1, typename IT> void gather(const S1 *array, const EntryType S1::* member1, const IT indexes);
276 template<typename S1, typename IT> void gather(const S1 *array, const EntryType S1::* member1, const IT indexes, MaskArg mask);
277 template<typename S1, typename S2, typename IT> void gather(const S1 *array, const S2 S1::* member1, const EntryType S2::* member2, const IT indexes);
278 template<typename S1, typename S2, typename IT> void gather(const S1 *array, const S2 S1::* member1, const EntryType S2::* member2, const IT indexes, MaskArg mask);
279 template<typename S1, typename IT1, typename IT2> void gather(const S1 *array, const EntryType *const S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes);
280 template<typename S1, typename IT1, typename IT2> void gather(const S1 *array, const EntryType *const S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes, MaskArg mask);
281
282 ///////////////////////////////////////////////////////////////////////////////////////////
283 // scatters
284 template<typename Index> void scatter(EntryType *mem, const Index indexes) const;
285 template<typename Index> void scatter(EntryType *mem, const Index indexes, MaskArg mask) const;
286 template<typename S1, typename IT> void scatter(S1 *array, EntryType S1::* member1, const IT indexes) const;
287 template<typename S1, typename IT> void scatter(S1 *array, EntryType S1::* member1, const IT indexes, MaskArg mask) const;
288 template<typename S1, typename S2, typename IT> void scatter(S1 *array, S2 S1::* member1, EntryType S2::* member2, const IT indexes) const;
289 template<typename S1, typename S2, typename IT> void scatter(S1 *array, S2 S1::* member1, EntryType S2::* member2, const IT indexes, MaskArg mask) const;
290 template<typename S1, typename IT1, typename IT2> void scatter(S1 *array, EntryType *S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes) const;
291 template<typename S1, typename IT1, typename IT2> void scatter(S1 *array, EntryType *S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes, MaskArg mask) const;
292
293 //prefix
294 inline Vector INTRINSIC &operator++() { data() = VectorHelper<T>::add(data(), VectorHelper<T>::one()); return *this; }
295 //postfix
296 inline Vector INTRINSIC operator++(int) { const Vector<T> r = *this; data() = VectorHelper<T>::add(data(), VectorHelper<T>::one()); return r; }
297
298 inline Common::AliasingEntryHelper<StorageType> INTRINSIC operator[](size_t index) {
299#if defined(VC_GCC) && VC_GCC >= 0x40300 && VC_GCC < 0x40400
300 ::Vc::Warnings::_operator_bracket_warning();
301#endif
302 return d.m(index);
303 }
304 inline EntryType INTRINSIC_L operator[](size_t index) const PURE INTRINSIC_R;
305
306 inline Vector PURE INTRINSIC operator~() const { return VectorHelper<VectorType>::andnot_(data(), VectorHelper<VectorType>::allone()); }
307 inline Vector<typename NegateTypeHelper<T>::Type> operator-() const;
308
309#define OP(symbol, fun) \
310 inline Vector INTRINSIC &operator symbol##=(const Vector<T> &x) { data() = VectorHelper<T>::fun(data(), x.data()); return *this; } \
311 inline Vector INTRINSIC &operator symbol##=(EntryType x) { return operator symbol##=(Vector<T>(x)); } \
312 inline Vector PURE INTRINSIC operator symbol(const Vector<T> &x) const { return HT::fun(data(), x.data()); } \
313 template<typename TT> inline VC_EXACT_TYPE(TT, EntryType, Vector) PURE INTRINSIC operator symbol(TT x) const { return operator symbol(Vector(x)); }
314
315 OP(+, add)
316 OP(-, sub)
317 OP(*, mul)
318#undef OP
319
320 inline INTRINSIC_L Vector &operator<<=(AsArg shift) INTRINSIC_R;
321 inline INTRINSIC_L Vector operator<< (AsArg shift) const INTRINSIC_R;
322 inline INTRINSIC_L Vector &operator<<=( int shift) INTRINSIC_R;
323 inline INTRINSIC_L Vector operator<< ( int shift) const INTRINSIC_R;
324 inline INTRINSIC_L Vector &operator>>=(AsArg shift) INTRINSIC_R;
325 inline INTRINSIC_L Vector operator>> (AsArg shift) const INTRINSIC_R;
326 inline INTRINSIC_L Vector &operator>>=( int shift) INTRINSIC_R;
327 inline INTRINSIC_L Vector operator>> ( int shift) const INTRINSIC_R;
328
329 inline INTRINSIC_L Vector &operator/=(const Vector<T> &x) INTRINSIC_R;
330 inline INTRINSIC_L Vector operator/ (const Vector<T> &x) const PURE INTRINSIC_R;
331 inline INTRINSIC_L Vector &operator/=(EntryType x) INTRINSIC_R;
332 template<typename TT> inline INTRINSIC_L VC_EXACT_TYPE(TT, typename DetermineEntryType<T>::Type, Vector<T>) operator/(TT x) const PURE INTRINSIC_R;
333
334#define OP(symbol, fun) \
335 inline Vector INTRINSIC_L &operator symbol##=(const Vector<T> &x) INTRINSIC_R; \
336 inline Vector INTRINSIC_L operator symbol(const Vector<T> &x) const PURE INTRINSIC_R; \
337 inline Vector INTRINSIC &operator symbol##=(EntryType x) { return operator symbol##=(Vector(x)); } \
338 template<typename TT> inline VC_EXACT_TYPE(TT, EntryType, Vector) PURE INTRINSIC operator symbol(TT x) const { return operator symbol(Vector(x)); }
339 OP(|, or_)
340 OP(&, and_)
341 OP(^, xor_)
342#undef OP
343#define OPcmp(symbol, fun) \
344 inline Mask PURE INTRINSIC operator symbol(const Vector<T> &x) const { return VectorHelper<T>::fun(data(), x.data()); } \
345 template<typename TT> inline VC_EXACT_TYPE(TT, EntryType, Mask) PURE INTRINSIC operator symbol(TT x) const { return operator symbol(Vector(x)); }
346
347 OPcmp(==, cmpeq)
348 OPcmp(!=, cmpneq)
349 OPcmp(>=, cmpnlt)
350 OPcmp(>, cmpnle)
351 OPcmp(<, cmplt)
352 OPcmp(<=, cmple)
353#undef OPcmp
354
355 inline void multiplyAndAdd(const Vector<T> &factor, const Vector<T> &summand) {
356 VectorHelper<T>::multiplyAndAdd(data(), factor, summand);
357 }
358
359 inline void assign( const Vector<T> &v, const Mask &mask ) {
360 const VectorType k = mm128_reinterpret_cast<VectorType>(mask.data());
361 data() = VectorHelper<VectorType>::blend(data(), v.data(), k);
362 }
363
364 template<typename V2> inline V2 staticCast() const { return StaticCastHelper<T, typename V2::_T>::cast(data()); }
365 template<typename V2> inline V2 reinterpretCast() const { return mm128_reinterpret_cast<typename V2::VectorType>(data()); }
366
367 inline WriteMaskedVector<T> INTRINSIC operator()(const Mask &k) { return WriteMaskedVector<T>(this, k); }
368
369 /**
370 * \return \p true This vector was completely filled. m2 might be 0 or != 0. You still have
371 * to test this.
372 * \p false This vector was not completely filled. m2 is all 0.
373 */
374 //inline bool pack(Mask &m1, Vector<T> &v2, Mask &m2) {
375 //return VectorHelper<T>::pack(data(), m1.data, v2.data(), m2.data);
376 //}
377
378 inline VectorType &data() { return d.v(); }
379 inline const VectorType &data() const { return d.v(); }
380
381 inline EntryType INTRINSIC min() const { return VectorHelper<T>::min(data()); }
382 inline EntryType INTRINSIC max() const { return VectorHelper<T>::max(data()); }
383 inline EntryType INTRINSIC product() const { return VectorHelper<T>::mul(data()); }
384 inline EntryType INTRINSIC sum() const { return VectorHelper<T>::add(data()); }
385 inline INTRINSIC_L EntryType min(MaskArg m) const INTRINSIC_R;
386 inline INTRINSIC_L EntryType max(MaskArg m) const INTRINSIC_R;
387 inline INTRINSIC_L EntryType product(MaskArg m) const INTRINSIC_R;
388 inline INTRINSIC_L EntryType sum(MaskArg m) const INTRINSIC_R;
389
390 inline Vector sorted() const { return SortHelper<VectorType, Size>::sort(data()); }
391
392 template<typename F> void callWithValuesSorted(F &f) {
393 EntryType value = d.m(0);
394 f(value);
395 for (int i = 1; i < Size; ++i) {
396 if (d.m(i) != value) {
397 value = d.m(i);
398 f(value);
399 }
400 }
401 }
402
403 template<typename F> inline void INTRINSIC call(const F &f) const {
404 for_all_vector_entries(i,
405 f(EntryType(d.m(i)));
406 );
407 }
408 template<typename F> inline void INTRINSIC call(F &f) const {
409 for_all_vector_entries(i,
410 f(EntryType(d.m(i)));
411 );
412 }
413
414 template<typename F> inline void INTRINSIC call(const F &f, const Mask &mask) const {
415 Vc_foreach_bit(size_t i, mask) {
416 f(EntryType(d.m(i)));
417 }
418 }
419 template<typename F> inline void INTRINSIC call(F &f, const Mask &mask) const {
420 Vc_foreach_bit(size_t i, mask) {
421 f(EntryType(d.m(i)));
422 }
423 }
424
425 template<typename F> inline Vector<T> INTRINSIC apply(const F &f) const {
426 Vector<T> r;
427 for_all_vector_entries(i,
428 r.d.m(i) = f(EntryType(d.m(i)));
429 );
430 return r;
431 }
432 template<typename F> inline Vector<T> INTRINSIC apply(F &f) const {
433 Vector<T> r;
434 for_all_vector_entries(i,
435 r.d.m(i) = f(EntryType(d.m(i)));
436 );
437 return r;
438 }
439
440 template<typename F> inline Vector<T> INTRINSIC apply(const F &f, const Mask &mask) const {
441 Vector<T> r(*this);
442 Vc_foreach_bit (size_t i, mask) {
443 r.d.m(i) = f(EntryType(r.d.m(i)));
444 }
445 return r;
446 }
447 template<typename F> inline Vector<T> INTRINSIC apply(F &f, const Mask &mask) const {
448 Vector<T> r(*this);
449 Vc_foreach_bit (size_t i, mask) {
450 r.d.m(i) = f(EntryType(r.d.m(i)));
451 }
452 return r;
453 }
454
455 template<typename IndexT> inline void INTRINSIC fill(EntryType (&f)(IndexT)) {
456 for_all_vector_entries(i,
457 d.m(i) = f(i);
458 );
459 }
460 inline void INTRINSIC fill(EntryType (&f)()) {
461 for_all_vector_entries(i,
462 d.m(i) = f();
463 );
464 }
465
466 inline INTRINSIC_L Vector copySign(typename Vector::AsArg reference) const INTRINSIC_R;
467 inline INTRINSIC_L Vector exponent() const INTRINSIC_R;
468};
469
470typedef Vector<double> double_v;
471typedef Vector<float> float_v;
472typedef Vector<float8> sfloat_v;
473typedef Vector<int> int_v;
474typedef Vector<unsigned int> uint_v;
475typedef Vector<short> short_v;
476typedef Vector<unsigned short> ushort_v;
477typedef double_v::Mask double_m;
478typedef float_v::Mask float_m;
479typedef sfloat_v::Mask sfloat_m;
480typedef int_v::Mask int_m;
481typedef uint_v::Mask uint_m;
482typedef short_v::Mask short_m;
483typedef ushort_v::Mask ushort_m;
484
485template<> inline Vector<float8> Vector<float8>::broadcast4(const float *x) {
486 const _M128 &v = VectorHelper<_M128>::load(x, Aligned);
487 return Vector<float8>(M256::create(v, v));
488}
489
490template<typename T> class SwizzledVector : public Vector<T> {};
491
492static inline int_v min(const int_v &x, const int_v &y) { return _mm_min_epi32(x.data(), y.data()); }
493static inline uint_v min(const uint_v &x, const uint_v &y) { return _mm_min_epu32(x.data(), y.data()); }
494static inline short_v min(const short_v &x, const short_v &y) { return _mm_min_epi16(x.data(), y.data()); }
495static inline ushort_v min(const ushort_v &x, const ushort_v &y) { return _mm_min_epu16(x.data(), y.data()); }
496static inline float_v min(const float_v &x, const float_v &y) { return _mm_min_ps(x.data(), y.data()); }
497static inline double_v min(const double_v &x, const double_v &y) { return _mm_min_pd(x.data(), y.data()); }
498static inline int_v max(const int_v &x, const int_v &y) { return _mm_max_epi32(x.data(), y.data()); }
499static inline uint_v max(const uint_v &x, const uint_v &y) { return _mm_max_epu32(x.data(), y.data()); }
500static inline short_v max(const short_v &x, const short_v &y) { return _mm_max_epi16(x.data(), y.data()); }
501static inline ushort_v max(const ushort_v &x, const ushort_v &y) { return _mm_max_epu16(x.data(), y.data()); }
502static inline float_v max(const float_v &x, const float_v &y) { return _mm_max_ps(x.data(), y.data()); }
503static inline double_v max(const double_v &x, const double_v &y) { return _mm_max_pd(x.data(), y.data()); }
504
505static inline sfloat_v min(const sfloat_v &x, const sfloat_v &y) {
506 return M256::create(_mm_min_ps(x.data()[0], y.data()[0]), _mm_min_ps(x.data()[1], y.data()[1]));
507}
508static inline sfloat_v max(const sfloat_v &x, const sfloat_v &y) {
509 return M256::create(_mm_max_ps(x.data()[0], y.data()[0]), _mm_max_ps(x.data()[1], y.data()[1]));
510}
511
512 template<typename T> static inline Vector<T> sqrt (const Vector<T> &x) { return VectorHelper<T>::sqrt(x.data()); }
513 template<typename T> static inline Vector<T> rsqrt(const Vector<T> &x) { return VectorHelper<T>::rsqrt(x.data()); }
514 template<typename T> static inline Vector<T> abs (const Vector<T> &x) { return VectorHelper<T>::abs(x.data()); }
515 template<typename T> static inline Vector<T> reciprocal(const Vector<T> &x) { return VectorHelper<T>::reciprocal(x.data()); }
516 template<typename T> static inline Vector<T> round(const Vector<T> &x) { return VectorHelper<T>::round(x.data()); }
517
518 template<typename T> static inline typename Vector<T>::Mask isfinite(const Vector<T> &x) { return VectorHelper<T>::isFinite(x.data()); }
519 template<typename T> static inline typename Vector<T>::Mask isnan(const Vector<T> &x) { return VectorHelper<T>::isNaN(x.data()); }
520
521#include "forceToRegisters.tcc"
522#ifdef VC_GNU_ASM
523template<>
524inline void ALWAYS_INLINE forceToRegisters(const Vector<float8> &x1) {
525 __asm__ __volatile__(""::"x"(x1.data()[0]), "x"(x1.data()[1]));
526}
527#elif defined(VC_MSVC)
528#pragma optimize("g", off)
529template<>
530inline void ALWAYS_INLINE forceToRegisters(const Vector<float8> &/*x1*/) {
531}
532#endif
533} // namespace SSE
534} // namespace Vc
535
536#include "undomacros.h"
537#include "vector.tcc"
538#include "math.h"
539#endif // SSE_VECTOR_H