]>
Commit | Line | Data |
---|---|---|
f22341db | 1 | /* This file is part of the Vc library. |
2 | ||
3 | Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org> | |
4 | ||
5 | Vc is free software: you can redistribute it and/or modify | |
6 | it under the terms of the GNU Lesser General Public License as | |
7 | published by the Free Software Foundation, either version 3 of | |
8 | the License, or (at your option) any later version. | |
9 | ||
10 | Vc is distributed in the hope that it will be useful, but | |
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with Vc. If not, see <http://www.gnu.org/licenses/>. | |
17 | ||
18 | */ | |
19 | ||
20 | #ifndef SSE_VECTOR_H | |
21 | #define SSE_VECTOR_H | |
22 | ||
23 | #include "intrinsics.h" | |
24 | #include "types.h" | |
25 | #include "vectorhelper.h" | |
26 | #include "mask.h" | |
27 | #include "../common/aliasingentryhelper.h" | |
28 | #include "../common/memoryfwd.h" | |
29 | #include <algorithm> | |
30 | #include <cmath> | |
31 | ||
32 | #include "macros.h" | |
33 | ||
34 | #ifdef isfinite | |
35 | #undef isfinite | |
36 | #endif | |
37 | #ifdef isnan | |
38 | #undef isnan | |
39 | #endif | |
40 | ||
41 | namespace Vc | |
42 | { | |
43 | namespace SSE | |
44 | { | |
45 | template<typename T> | |
46 | class WriteMaskedVector | |
47 | { | |
48 | friend class Vector<T>; | |
49 | typedef typename VectorTraits<T>::MaskType Mask; | |
50 | typedef typename Vector<T>::EntryType EntryType; | |
51 | public: | |
52 | FREE_STORE_OPERATORS_ALIGNED(16) | |
53 | //prefix | |
54 | inline INTRINSIC Vector<T> &operator++() { | |
55 | vec->data() = VectorHelper<T>::add(vec->data(), | |
56 | VectorHelper<T>::notMaskedToZero(VectorHelper<T>::one(), mask.data()) | |
57 | ); | |
58 | return *vec; | |
59 | } | |
60 | inline INTRINSIC Vector<T> &operator--() { | |
61 | vec->data() = VectorHelper<T>::sub(vec->data(), | |
62 | VectorHelper<T>::notMaskedToZero(VectorHelper<T>::one(), mask.data()) | |
63 | ); | |
64 | return *vec; | |
65 | } | |
66 | //postfix | |
67 | inline INTRINSIC Vector<T> operator++(int) { | |
68 | Vector<T> ret(*vec); | |
69 | vec->data() = VectorHelper<T>::add(vec->data(), | |
70 | VectorHelper<T>::notMaskedToZero(VectorHelper<T>::one(), mask.data()) | |
71 | ); | |
72 | return ret; | |
73 | } | |
74 | inline INTRINSIC Vector<T> operator--(int) { | |
75 | Vector<T> ret(*vec); | |
76 | vec->data() = VectorHelper<T>::sub(vec->data(), | |
77 | VectorHelper<T>::notMaskedToZero(VectorHelper<T>::one(), mask.data()) | |
78 | ); | |
79 | return ret; | |
80 | } | |
81 | ||
82 | inline INTRINSIC Vector<T> &operator+=(const Vector<T> &x) { | |
83 | vec->data() = VectorHelper<T>::add(vec->data(), VectorHelper<T>::notMaskedToZero(x.data(), mask.data())); | |
84 | return *vec; | |
85 | } | |
86 | inline INTRINSIC Vector<T> &operator-=(const Vector<T> &x) { | |
87 | vec->data() = VectorHelper<T>::sub(vec->data(), VectorHelper<T>::notMaskedToZero(x.data(), mask.data())); | |
88 | return *vec; | |
89 | } | |
90 | inline INTRINSIC Vector<T> &operator*=(const Vector<T> &x) { | |
91 | vec->data() = VectorHelper<T>::mul(vec->data(), x.data(), mask.data()); | |
92 | return *vec; | |
93 | } | |
94 | inline INTRINSIC CONST Vector<T> &operator/=(const Vector<T> &x); | |
95 | ||
96 | inline INTRINSIC Vector<T> &operator+=(EntryType x) { | |
97 | return operator+=(Vector<T>(x)); | |
98 | } | |
99 | inline INTRINSIC Vector<T> &operator-=(EntryType x) { | |
100 | return operator-=(Vector<T>(x)); | |
101 | } | |
102 | inline INTRINSIC Vector<T> &operator*=(EntryType x) { | |
103 | return operator*=(Vector<T>(x)); | |
104 | } | |
105 | inline INTRINSIC Vector<T> &operator/=(EntryType x) { | |
106 | return operator/=(Vector<T>(x)); | |
107 | } | |
108 | ||
109 | inline INTRINSIC Vector<T> &operator=(const Vector<T> &x) { | |
110 | vec->assign(x, mask); | |
111 | return *vec; | |
112 | } | |
113 | ||
114 | inline INTRINSIC Vector<T> &operator=(EntryType x) { | |
115 | vec->assign(Vector<T>(x), mask); | |
116 | return *vec; | |
117 | } | |
118 | ||
119 | template<typename F> inline void INTRINSIC call(const F &f) const { | |
120 | return vec->call(f, mask); | |
121 | } | |
122 | template<typename F> inline void INTRINSIC call(F &f) const { | |
123 | return vec->call(f, mask); | |
124 | } | |
125 | template<typename F> inline Vector<T> INTRINSIC apply(const F &f) const { | |
126 | return vec->apply(f, mask); | |
127 | } | |
128 | template<typename F> inline Vector<T> INTRINSIC apply(F &f) const { | |
129 | return vec->apply(f, mask); | |
130 | } | |
131 | ||
132 | private: | |
133 | WriteMaskedVector(Vector<T> *v, const Mask &k) : vec(v), mask(k) {} | |
134 | Vector<T> *const vec; | |
135 | Mask mask; | |
136 | }; | |
137 | ||
138 | template<typename T> class Vector | |
139 | { | |
140 | friend class WriteMaskedVector<T>; | |
141 | protected: | |
142 | typedef typename VectorTraits<T>::StorageType StorageType; | |
143 | StorageType d; | |
144 | typedef typename VectorTraits<T>::GatherMaskType GatherMask; | |
145 | typedef VectorHelper<typename VectorTraits<T>::VectorType> HV; | |
146 | typedef VectorHelper<T> HT; | |
147 | public: | |
148 | FREE_STORE_OPERATORS_ALIGNED(16) | |
149 | ||
150 | enum Constants { Size = VectorTraits<T>::Size }; | |
151 | typedef typename VectorTraits<T>::VectorType VectorType; | |
152 | typedef typename VectorTraits<T>::EntryType EntryType; | |
153 | typedef typename VectorTraits<T>::IndexType IndexType; | |
154 | typedef typename VectorTraits<T>::MaskType Mask; | |
155 | typedef typename Mask::Argument MaskArg; | |
156 | typedef Vc::Memory<Vector<T>, Size> Memory; | |
157 | #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN | |
158 | typedef const Vector<T> &AsArg; | |
159 | #else | |
160 | typedef const Vector<T> AsArg; | |
161 | #endif | |
162 | ||
163 | typedef T _T; | |
164 | ||
165 | /////////////////////////////////////////////////////////////////////////////////////////// | |
166 | // uninitialized | |
167 | inline Vector() {} | |
168 | ||
169 | /////////////////////////////////////////////////////////////////////////////////////////// | |
170 | // constants | |
171 | explicit inline INTRINSIC_L Vector(VectorSpecialInitializerZero::ZEnum) INTRINSIC_R; | |
172 | explicit inline INTRINSIC_L Vector(VectorSpecialInitializerOne::OEnum) INTRINSIC_R; | |
173 | explicit inline INTRINSIC_L Vector(VectorSpecialInitializerIndexesFromZero::IEnum) INTRINSIC_R; | |
174 | static inline INTRINSIC_L Vector Zero() INTRINSIC_R; | |
175 | static inline INTRINSIC_L Vector One() INTRINSIC_R; | |
176 | static inline INTRINSIC_L Vector IndexesFromZero() INTRINSIC_R; | |
177 | static inline INTRINSIC_L Vector Random() INTRINSIC_R; | |
178 | ||
179 | /////////////////////////////////////////////////////////////////////////////////////////// | |
180 | // internal: required to enable returning objects of VectorType | |
181 | inline Vector(const VectorType &x) : d(x) {} | |
182 | ||
183 | /////////////////////////////////////////////////////////////////////////////////////////// | |
184 | // static_cast / copy ctor | |
185 | template<typename OtherT> explicit inline INTRINSIC_L Vector(const Vector<OtherT> &x) INTRINSIC_R; | |
186 | ||
187 | // implicit cast | |
188 | template<typename OtherT> inline INTRINSIC_L Vector &operator=(const Vector<OtherT> &x) INTRINSIC_R; | |
189 | ||
190 | // copy assignment | |
191 | inline Vector &operator=(AsArg v) { d.v() = v.d.v(); return *this; } | |
192 | ||
193 | /////////////////////////////////////////////////////////////////////////////////////////// | |
194 | // broadcast | |
195 | explicit Vector(EntryType a); | |
196 | template<typename TT> inline INTRINSIC Vector(TT x, VC_EXACT_TYPE(TT, EntryType, void *) = 0) : d(HT::set(x)) {} | |
197 | static inline Vector INTRINSIC broadcast4(const EntryType *x) { return Vector<T>(x); } | |
198 | inline Vector &operator=(EntryType a) { d.v() = HT::set(a); return *this; } | |
199 | ||
200 | /////////////////////////////////////////////////////////////////////////////////////////// | |
201 | // load ctors | |
202 | explicit inline INTRINSIC_L | |
203 | Vector(const EntryType *x) INTRINSIC_R; | |
204 | template<typename Alignment> inline INTRINSIC_L | |
205 | Vector(const EntryType *x, Alignment align) INTRINSIC_R; | |
206 | template<typename OtherT> explicit inline INTRINSIC_L | |
207 | Vector(const OtherT *x) INTRINSIC_R; | |
208 | template<typename OtherT, typename Alignment> inline INTRINSIC_L | |
209 | Vector(const OtherT *x, Alignment align) INTRINSIC_R; | |
210 | ||
211 | /////////////////////////////////////////////////////////////////////////////////////////// | |
212 | // load member functions | |
213 | inline INTRINSIC_L | |
214 | void load(const EntryType *mem) INTRINSIC_R; | |
215 | template<typename Alignment> inline INTRINSIC_L | |
216 | void load(const EntryType *mem, Alignment align) INTRINSIC_R; | |
217 | template<typename OtherT> inline INTRINSIC_L | |
218 | void load(const OtherT *mem) INTRINSIC_R; | |
219 | template<typename OtherT, typename Alignment> inline INTRINSIC_L | |
220 | void load(const OtherT *mem, Alignment align) INTRINSIC_R; | |
221 | ||
222 | /////////////////////////////////////////////////////////////////////////////////////////// | |
223 | // expand 1 float_v to 2 double_v XXX rationale? remove it for release? XXX | |
224 | explicit inline INTRINSIC_L Vector(const Vector<typename CtorTypeHelper<T>::Type> *a) INTRINSIC_R; | |
225 | void expand(Vector<typename ExpandTypeHelper<T>::Type> *x) const; | |
226 | ||
227 | /////////////////////////////////////////////////////////////////////////////////////////// | |
228 | // zeroing | |
229 | inline void INTRINSIC_L setZero() INTRINSIC_R; | |
230 | inline void INTRINSIC_L setZero(const Mask &k) INTRINSIC_R; | |
231 | ||
232 | inline void INTRINSIC_L setQnan() INTRINSIC_R; | |
233 | inline void INTRINSIC_L setQnan(typename Mask::Argument k) INTRINSIC_R; | |
234 | ||
235 | /////////////////////////////////////////////////////////////////////////////////////////// | |
236 | // stores | |
237 | inline void INTRINSIC_L store(EntryType *mem) const INTRINSIC_R; | |
238 | inline void INTRINSIC_L store(EntryType *mem, const Mask &mask) const INTRINSIC_R; | |
239 | template<typename A> inline void INTRINSIC_L store(EntryType *mem, A align) const INTRINSIC_R; | |
240 | template<typename A> inline void INTRINSIC_L store(EntryType *mem, const Mask &mask, A align) const INTRINSIC_R; | |
241 | ||
242 | /////////////////////////////////////////////////////////////////////////////////////////// | |
243 | // swizzles | |
244 | inline const Vector<T> INTRINSIC_L CONST_L &abcd() const INTRINSIC_R CONST_R; | |
245 | inline const Vector<T> INTRINSIC_L CONST_L cdab() const INTRINSIC_R CONST_R; | |
246 | inline const Vector<T> INTRINSIC_L CONST_L badc() const INTRINSIC_R CONST_R; | |
247 | inline const Vector<T> INTRINSIC_L CONST_L aaaa() const INTRINSIC_R CONST_R; | |
248 | inline const Vector<T> INTRINSIC_L CONST_L bbbb() const INTRINSIC_R CONST_R; | |
249 | inline const Vector<T> INTRINSIC_L CONST_L cccc() const INTRINSIC_R CONST_R; | |
250 | inline const Vector<T> INTRINSIC_L CONST_L dddd() const INTRINSIC_R CONST_R; | |
251 | inline const Vector<T> INTRINSIC_L CONST_L bcad() const INTRINSIC_R CONST_R; | |
252 | inline const Vector<T> INTRINSIC_L CONST_L bcda() const INTRINSIC_R CONST_R; | |
253 | inline const Vector<T> INTRINSIC_L CONST_L dabc() const INTRINSIC_R CONST_R; | |
254 | inline const Vector<T> INTRINSIC_L CONST_L acbd() const INTRINSIC_R CONST_R; | |
255 | inline const Vector<T> INTRINSIC_L CONST_L dbca() const INTRINSIC_R CONST_R; | |
256 | inline const Vector<T> INTRINSIC_L CONST_L dcba() const INTRINSIC_R CONST_R; | |
257 | ||
258 | /////////////////////////////////////////////////////////////////////////////////////////// | |
259 | // gathers | |
260 | template<typename IndexT> Vector(const EntryType *mem, const IndexT *indexes); | |
261 | template<typename IndexT> Vector(const EntryType *mem, const Vector<IndexT> indexes); | |
262 | template<typename IndexT> Vector(const EntryType *mem, const IndexT *indexes, MaskArg mask); | |
263 | template<typename IndexT> Vector(const EntryType *mem, const Vector<IndexT> indexes, MaskArg mask); | |
264 | template<typename S1, typename IT> Vector(const S1 *array, const EntryType S1::* member1, const IT indexes); | |
265 | template<typename S1, typename IT> Vector(const S1 *array, const EntryType S1::* member1, const IT indexes, MaskArg mask); | |
266 | template<typename S1, typename S2, typename IT> Vector(const S1 *array, const S2 S1::* member1, const EntryType S2::* member2, const IT indexes); | |
267 | template<typename S1, typename S2, typename IT> Vector(const S1 *array, const S2 S1::* member1, const EntryType S2::* member2, const IT indexes, MaskArg mask); | |
268 | template<typename S1, typename IT1, typename IT2> Vector(const S1 *array, const EntryType *const S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes); | |
269 | template<typename S1, typename IT1, typename IT2> Vector(const S1 *array, const EntryType *const S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes, MaskArg mask); | |
270 | template<typename Index> void gather(const EntryType *mem, const Index indexes); | |
271 | template<typename Index> void gather(const EntryType *mem, const Index indexes, MaskArg mask); | |
272 | #ifdef VC_USE_SET_GATHERS | |
273 | template<typename IT> void gather(const EntryType *mem, Vector<IT> indexes, MaskArg mask); | |
274 | #endif | |
275 | template<typename S1, typename IT> void gather(const S1 *array, const EntryType S1::* member1, const IT indexes); | |
276 | template<typename S1, typename IT> void gather(const S1 *array, const EntryType S1::* member1, const IT indexes, MaskArg mask); | |
277 | template<typename S1, typename S2, typename IT> void gather(const S1 *array, const S2 S1::* member1, const EntryType S2::* member2, const IT indexes); | |
278 | template<typename S1, typename S2, typename IT> void gather(const S1 *array, const S2 S1::* member1, const EntryType S2::* member2, const IT indexes, MaskArg mask); | |
279 | template<typename S1, typename IT1, typename IT2> void gather(const S1 *array, const EntryType *const S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes); | |
280 | template<typename S1, typename IT1, typename IT2> void gather(const S1 *array, const EntryType *const S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes, MaskArg mask); | |
281 | ||
282 | /////////////////////////////////////////////////////////////////////////////////////////// | |
283 | // scatters | |
284 | template<typename Index> void scatter(EntryType *mem, const Index indexes) const; | |
285 | template<typename Index> void scatter(EntryType *mem, const Index indexes, MaskArg mask) const; | |
286 | template<typename S1, typename IT> void scatter(S1 *array, EntryType S1::* member1, const IT indexes) const; | |
287 | template<typename S1, typename IT> void scatter(S1 *array, EntryType S1::* member1, const IT indexes, MaskArg mask) const; | |
288 | template<typename S1, typename S2, typename IT> void scatter(S1 *array, S2 S1::* member1, EntryType S2::* member2, const IT indexes) const; | |
289 | template<typename S1, typename S2, typename IT> void scatter(S1 *array, S2 S1::* member1, EntryType S2::* member2, const IT indexes, MaskArg mask) const; | |
290 | template<typename S1, typename IT1, typename IT2> void scatter(S1 *array, EntryType *S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes) const; | |
291 | template<typename S1, typename IT1, typename IT2> void scatter(S1 *array, EntryType *S1::* ptrMember1, const IT1 outerIndexes, const IT2 innerIndexes, MaskArg mask) const; | |
292 | ||
293 | //prefix | |
294 | inline Vector INTRINSIC &operator++() { data() = VectorHelper<T>::add(data(), VectorHelper<T>::one()); return *this; } | |
295 | //postfix | |
296 | inline Vector INTRINSIC operator++(int) { const Vector<T> r = *this; data() = VectorHelper<T>::add(data(), VectorHelper<T>::one()); return r; } | |
297 | ||
298 | inline Common::AliasingEntryHelper<StorageType> INTRINSIC operator[](size_t index) { | |
299 | #if defined(VC_GCC) && VC_GCC >= 0x40300 && VC_GCC < 0x40400 | |
300 | ::Vc::Warnings::_operator_bracket_warning(); | |
301 | #endif | |
302 | return d.m(index); | |
303 | } | |
304 | inline EntryType INTRINSIC_L operator[](size_t index) const PURE INTRINSIC_R; | |
305 | ||
306 | inline Vector PURE INTRINSIC operator~() const { return VectorHelper<VectorType>::andnot_(data(), VectorHelper<VectorType>::allone()); } | |
307 | inline Vector<typename NegateTypeHelper<T>::Type> operator-() const; | |
308 | ||
309 | #define OP(symbol, fun) \ | |
310 | inline Vector INTRINSIC &operator symbol##=(const Vector<T> &x) { data() = VectorHelper<T>::fun(data(), x.data()); return *this; } \ | |
311 | inline Vector INTRINSIC &operator symbol##=(EntryType x) { return operator symbol##=(Vector<T>(x)); } \ | |
312 | inline Vector PURE INTRINSIC operator symbol(const Vector<T> &x) const { return HT::fun(data(), x.data()); } \ | |
313 | template<typename TT> inline VC_EXACT_TYPE(TT, EntryType, Vector) PURE INTRINSIC operator symbol(TT x) const { return operator symbol(Vector(x)); } | |
314 | ||
315 | OP(+, add) | |
316 | OP(-, sub) | |
317 | OP(*, mul) | |
318 | #undef OP | |
319 | ||
320 | inline INTRINSIC_L Vector &operator<<=(AsArg shift) INTRINSIC_R; | |
321 | inline INTRINSIC_L Vector operator<< (AsArg shift) const INTRINSIC_R; | |
322 | inline INTRINSIC_L Vector &operator<<=( int shift) INTRINSIC_R; | |
323 | inline INTRINSIC_L Vector operator<< ( int shift) const INTRINSIC_R; | |
324 | inline INTRINSIC_L Vector &operator>>=(AsArg shift) INTRINSIC_R; | |
325 | inline INTRINSIC_L Vector operator>> (AsArg shift) const INTRINSIC_R; | |
326 | inline INTRINSIC_L Vector &operator>>=( int shift) INTRINSIC_R; | |
327 | inline INTRINSIC_L Vector operator>> ( int shift) const INTRINSIC_R; | |
328 | ||
329 | inline INTRINSIC_L Vector &operator/=(const Vector<T> &x) INTRINSIC_R; | |
330 | inline INTRINSIC_L Vector operator/ (const Vector<T> &x) const PURE INTRINSIC_R; | |
331 | inline INTRINSIC_L Vector &operator/=(EntryType x) INTRINSIC_R; | |
332 | template<typename TT> inline INTRINSIC_L VC_EXACT_TYPE(TT, typename DetermineEntryType<T>::Type, Vector<T>) operator/(TT x) const PURE INTRINSIC_R; | |
333 | ||
334 | #define OP(symbol, fun) \ | |
335 | inline Vector INTRINSIC_L &operator symbol##=(const Vector<T> &x) INTRINSIC_R; \ | |
336 | inline Vector INTRINSIC_L operator symbol(const Vector<T> &x) const PURE INTRINSIC_R; \ | |
337 | inline Vector INTRINSIC &operator symbol##=(EntryType x) { return operator symbol##=(Vector(x)); } \ | |
338 | template<typename TT> inline VC_EXACT_TYPE(TT, EntryType, Vector) PURE INTRINSIC operator symbol(TT x) const { return operator symbol(Vector(x)); } | |
339 | OP(|, or_) | |
340 | OP(&, and_) | |
341 | OP(^, xor_) | |
342 | #undef OP | |
343 | #define OPcmp(symbol, fun) \ | |
344 | inline Mask PURE INTRINSIC operator symbol(const Vector<T> &x) const { return VectorHelper<T>::fun(data(), x.data()); } \ | |
345 | template<typename TT> inline VC_EXACT_TYPE(TT, EntryType, Mask) PURE INTRINSIC operator symbol(TT x) const { return operator symbol(Vector(x)); } | |
346 | ||
347 | OPcmp(==, cmpeq) | |
348 | OPcmp(!=, cmpneq) | |
349 | OPcmp(>=, cmpnlt) | |
350 | OPcmp(>, cmpnle) | |
351 | OPcmp(<, cmplt) | |
352 | OPcmp(<=, cmple) | |
353 | #undef OPcmp | |
354 | ||
355 | inline void multiplyAndAdd(const Vector<T> &factor, const Vector<T> &summand) { | |
356 | VectorHelper<T>::multiplyAndAdd(data(), factor, summand); | |
357 | } | |
358 | ||
359 | inline void assign( const Vector<T> &v, const Mask &mask ) { | |
360 | const VectorType k = mm128_reinterpret_cast<VectorType>(mask.data()); | |
361 | data() = VectorHelper<VectorType>::blend(data(), v.data(), k); | |
362 | } | |
363 | ||
364 | template<typename V2> inline V2 staticCast() const { return StaticCastHelper<T, typename V2::_T>::cast(data()); } | |
365 | template<typename V2> inline V2 reinterpretCast() const { return mm128_reinterpret_cast<typename V2::VectorType>(data()); } | |
366 | ||
367 | inline WriteMaskedVector<T> INTRINSIC operator()(const Mask &k) { return WriteMaskedVector<T>(this, k); } | |
368 | ||
369 | /** | |
370 | * \return \p true This vector was completely filled. m2 might be 0 or != 0. You still have | |
371 | * to test this. | |
372 | * \p false This vector was not completely filled. m2 is all 0. | |
373 | */ | |
374 | //inline bool pack(Mask &m1, Vector<T> &v2, Mask &m2) { | |
375 | //return VectorHelper<T>::pack(data(), m1.data, v2.data(), m2.data); | |
376 | //} | |
377 | ||
378 | inline VectorType &data() { return d.v(); } | |
379 | inline const VectorType &data() const { return d.v(); } | |
380 | ||
381 | inline EntryType INTRINSIC min() const { return VectorHelper<T>::min(data()); } | |
382 | inline EntryType INTRINSIC max() const { return VectorHelper<T>::max(data()); } | |
383 | inline EntryType INTRINSIC product() const { return VectorHelper<T>::mul(data()); } | |
384 | inline EntryType INTRINSIC sum() const { return VectorHelper<T>::add(data()); } | |
385 | inline INTRINSIC_L EntryType min(MaskArg m) const INTRINSIC_R; | |
386 | inline INTRINSIC_L EntryType max(MaskArg m) const INTRINSIC_R; | |
387 | inline INTRINSIC_L EntryType product(MaskArg m) const INTRINSIC_R; | |
388 | inline INTRINSIC_L EntryType sum(MaskArg m) const INTRINSIC_R; | |
389 | ||
390 | inline Vector sorted() const { return SortHelper<VectorType, Size>::sort(data()); } | |
391 | ||
392 | template<typename F> void callWithValuesSorted(F &f) { | |
393 | EntryType value = d.m(0); | |
394 | f(value); | |
395 | for (int i = 1; i < Size; ++i) { | |
396 | if (d.m(i) != value) { | |
397 | value = d.m(i); | |
398 | f(value); | |
399 | } | |
400 | } | |
401 | } | |
402 | ||
403 | template<typename F> inline void INTRINSIC call(const F &f) const { | |
404 | for_all_vector_entries(i, | |
405 | f(EntryType(d.m(i))); | |
406 | ); | |
407 | } | |
408 | template<typename F> inline void INTRINSIC call(F &f) const { | |
409 | for_all_vector_entries(i, | |
410 | f(EntryType(d.m(i))); | |
411 | ); | |
412 | } | |
413 | ||
414 | template<typename F> inline void INTRINSIC call(const F &f, const Mask &mask) const { | |
415 | Vc_foreach_bit(size_t i, mask) { | |
416 | f(EntryType(d.m(i))); | |
417 | } | |
418 | } | |
419 | template<typename F> inline void INTRINSIC call(F &f, const Mask &mask) const { | |
420 | Vc_foreach_bit(size_t i, mask) { | |
421 | f(EntryType(d.m(i))); | |
422 | } | |
423 | } | |
424 | ||
425 | template<typename F> inline Vector<T> INTRINSIC apply(const F &f) const { | |
426 | Vector<T> r; | |
427 | for_all_vector_entries(i, | |
428 | r.d.m(i) = f(EntryType(d.m(i))); | |
429 | ); | |
430 | return r; | |
431 | } | |
432 | template<typename F> inline Vector<T> INTRINSIC apply(F &f) const { | |
433 | Vector<T> r; | |
434 | for_all_vector_entries(i, | |
435 | r.d.m(i) = f(EntryType(d.m(i))); | |
436 | ); | |
437 | return r; | |
438 | } | |
439 | ||
440 | template<typename F> inline Vector<T> INTRINSIC apply(const F &f, const Mask &mask) const { | |
441 | Vector<T> r(*this); | |
442 | Vc_foreach_bit (size_t i, mask) { | |
443 | r.d.m(i) = f(EntryType(r.d.m(i))); | |
444 | } | |
445 | return r; | |
446 | } | |
447 | template<typename F> inline Vector<T> INTRINSIC apply(F &f, const Mask &mask) const { | |
448 | Vector<T> r(*this); | |
449 | Vc_foreach_bit (size_t i, mask) { | |
450 | r.d.m(i) = f(EntryType(r.d.m(i))); | |
451 | } | |
452 | return r; | |
453 | } | |
454 | ||
455 | template<typename IndexT> inline void INTRINSIC fill(EntryType (&f)(IndexT)) { | |
456 | for_all_vector_entries(i, | |
457 | d.m(i) = f(i); | |
458 | ); | |
459 | } | |
460 | inline void INTRINSIC fill(EntryType (&f)()) { | |
461 | for_all_vector_entries(i, | |
462 | d.m(i) = f(); | |
463 | ); | |
464 | } | |
465 | ||
466 | inline INTRINSIC_L Vector copySign(typename Vector::AsArg reference) const INTRINSIC_R; | |
467 | inline INTRINSIC_L Vector exponent() const INTRINSIC_R; | |
468 | }; | |
469 | ||
470 | typedef Vector<double> double_v; | |
471 | typedef Vector<float> float_v; | |
472 | typedef Vector<float8> sfloat_v; | |
473 | typedef Vector<int> int_v; | |
474 | typedef Vector<unsigned int> uint_v; | |
475 | typedef Vector<short> short_v; | |
476 | typedef Vector<unsigned short> ushort_v; | |
477 | typedef double_v::Mask double_m; | |
478 | typedef float_v::Mask float_m; | |
479 | typedef sfloat_v::Mask sfloat_m; | |
480 | typedef int_v::Mask int_m; | |
481 | typedef uint_v::Mask uint_m; | |
482 | typedef short_v::Mask short_m; | |
483 | typedef ushort_v::Mask ushort_m; | |
484 | ||
485 | template<> inline Vector<float8> Vector<float8>::broadcast4(const float *x) { | |
486 | const _M128 &v = VectorHelper<_M128>::load(x, Aligned); | |
487 | return Vector<float8>(M256::create(v, v)); | |
488 | } | |
489 | ||
490 | template<typename T> class SwizzledVector : public Vector<T> {}; | |
491 | ||
492 | static inline int_v min(const int_v &x, const int_v &y) { return _mm_min_epi32(x.data(), y.data()); } | |
493 | static inline uint_v min(const uint_v &x, const uint_v &y) { return _mm_min_epu32(x.data(), y.data()); } | |
494 | static inline short_v min(const short_v &x, const short_v &y) { return _mm_min_epi16(x.data(), y.data()); } | |
495 | static inline ushort_v min(const ushort_v &x, const ushort_v &y) { return _mm_min_epu16(x.data(), y.data()); } | |
496 | static inline float_v min(const float_v &x, const float_v &y) { return _mm_min_ps(x.data(), y.data()); } | |
497 | static inline double_v min(const double_v &x, const double_v &y) { return _mm_min_pd(x.data(), y.data()); } | |
498 | static inline int_v max(const int_v &x, const int_v &y) { return _mm_max_epi32(x.data(), y.data()); } | |
499 | static inline uint_v max(const uint_v &x, const uint_v &y) { return _mm_max_epu32(x.data(), y.data()); } | |
500 | static inline short_v max(const short_v &x, const short_v &y) { return _mm_max_epi16(x.data(), y.data()); } | |
501 | static inline ushort_v max(const ushort_v &x, const ushort_v &y) { return _mm_max_epu16(x.data(), y.data()); } | |
502 | static inline float_v max(const float_v &x, const float_v &y) { return _mm_max_ps(x.data(), y.data()); } | |
503 | static inline double_v max(const double_v &x, const double_v &y) { return _mm_max_pd(x.data(), y.data()); } | |
504 | ||
505 | static inline sfloat_v min(const sfloat_v &x, const sfloat_v &y) { | |
506 | return M256::create(_mm_min_ps(x.data()[0], y.data()[0]), _mm_min_ps(x.data()[1], y.data()[1])); | |
507 | } | |
508 | static inline sfloat_v max(const sfloat_v &x, const sfloat_v &y) { | |
509 | return M256::create(_mm_max_ps(x.data()[0], y.data()[0]), _mm_max_ps(x.data()[1], y.data()[1])); | |
510 | } | |
511 | ||
512 | template<typename T> static inline Vector<T> sqrt (const Vector<T> &x) { return VectorHelper<T>::sqrt(x.data()); } | |
513 | template<typename T> static inline Vector<T> rsqrt(const Vector<T> &x) { return VectorHelper<T>::rsqrt(x.data()); } | |
514 | template<typename T> static inline Vector<T> abs (const Vector<T> &x) { return VectorHelper<T>::abs(x.data()); } | |
515 | template<typename T> static inline Vector<T> reciprocal(const Vector<T> &x) { return VectorHelper<T>::reciprocal(x.data()); } | |
516 | template<typename T> static inline Vector<T> round(const Vector<T> &x) { return VectorHelper<T>::round(x.data()); } | |
517 | ||
518 | template<typename T> static inline typename Vector<T>::Mask isfinite(const Vector<T> &x) { return VectorHelper<T>::isFinite(x.data()); } | |
519 | template<typename T> static inline typename Vector<T>::Mask isnan(const Vector<T> &x) { return VectorHelper<T>::isNaN(x.data()); } | |
520 | ||
521 | #include "forceToRegisters.tcc" | |
522 | #ifdef VC_GNU_ASM | |
523 | template<> | |
524 | inline void ALWAYS_INLINE forceToRegisters(const Vector<float8> &x1) { | |
525 | __asm__ __volatile__(""::"x"(x1.data()[0]), "x"(x1.data()[1])); | |
526 | } | |
527 | #elif defined(VC_MSVC) | |
528 | #pragma optimize("g", off) | |
529 | template<> | |
530 | inline void ALWAYS_INLINE forceToRegisters(const Vector<float8> &/*x1*/) { | |
531 | } | |
532 | #endif | |
533 | } // namespace SSE | |
534 | } // namespace Vc | |
535 | ||
536 | #include "undomacros.h" | |
537 | #include "vector.tcc" | |
538 | #include "math.h" | |
539 | #endif // SSE_VECTOR_H |