]>
Commit | Line | Data |
---|---|---|
f22341db | 1 | /* This file is part of the Vc library. |
2 | ||
3 | Copyright (C) 2009-2011 Matthias Kretz <kretz@kde.org> | |
4 | ||
5 | Vc is free software: you can redistribute it and/or modify | |
6 | it under the terms of the GNU Lesser General Public License as | |
7 | published by the Free Software Foundation, either version 3 of | |
8 | the License, or (at your option) any later version. | |
9 | ||
10 | Vc is distributed in the hope that it will be useful, but | |
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with Vc. If not, see <http://www.gnu.org/licenses/>. | |
17 | ||
18 | */ | |
19 | ||
20 | #ifndef SSE_CASTS_H | |
21 | #define SSE_CASTS_H | |
22 | ||
23 | #include "intrinsics.h" | |
24 | #include "types.h" | |
25 | ||
c017a39f | 26 | namespace AliRoot { |
f22341db | 27 | namespace Vc |
28 | { | |
29 | namespace SSE | |
30 | { | |
c017a39f | 31 | template<typename To, typename From> static Vc_ALWAYS_INLINE To Vc_CONST mm128_reinterpret_cast(VC_ALIGNED_PARAMETER(From) v) { return v; } |
32 | template<> Vc_ALWAYS_INLINE _M128I Vc_CONST mm128_reinterpret_cast<_M128I, _M128 >(VC_ALIGNED_PARAMETER(_M128 ) v) { return _mm_castps_si128(v); } | |
33 | template<> Vc_ALWAYS_INLINE _M128I Vc_CONST mm128_reinterpret_cast<_M128I, _M128D>(VC_ALIGNED_PARAMETER(_M128D) v) { return _mm_castpd_si128(v); } | |
34 | template<> Vc_ALWAYS_INLINE _M128 Vc_CONST mm128_reinterpret_cast<_M128 , _M128D>(VC_ALIGNED_PARAMETER(_M128D) v) { return _mm_castpd_ps(v); } | |
35 | template<> Vc_ALWAYS_INLINE _M128 Vc_CONST mm128_reinterpret_cast<_M128 , _M128I>(VC_ALIGNED_PARAMETER(_M128I) v) { return _mm_castsi128_ps(v); } | |
36 | template<> Vc_ALWAYS_INLINE _M128D Vc_CONST mm128_reinterpret_cast<_M128D, _M128I>(VC_ALIGNED_PARAMETER(_M128I) v) { return _mm_castsi128_pd(v); } | |
37 | template<> Vc_ALWAYS_INLINE _M128D Vc_CONST mm128_reinterpret_cast<_M128D, _M128 >(VC_ALIGNED_PARAMETER(_M128 ) v) { return _mm_castps_pd(v); } | |
38 | template<typename To, typename From> static Vc_ALWAYS_INLINE To Vc_CONST sse_cast(VC_ALIGNED_PARAMETER(From) v) { return mm128_reinterpret_cast<To, From>(v); } | |
f22341db | 39 | |
40 | template<typename From, typename To> struct StaticCastHelper {}; | |
c017a39f | 41 | template<> struct StaticCastHelper<float , int > { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) { return _mm_cvttps_epi32(v); } }; |
42 | template<> struct StaticCastHelper<double , int > { static Vc_ALWAYS_INLINE _M128I cast(const _M128D &v) { return _mm_cvttpd_epi32(v); } }; | |
43 | template<> struct StaticCastHelper<int , int > { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; | |
44 | template<> struct StaticCastHelper<unsigned int, int > { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; | |
45 | template<> struct StaticCastHelper<float , unsigned int> { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) { | |
f22341db | 46 | return _mm_castps_si128(_mm_blendv_ps( |
47 | _mm_castsi128_ps(_mm_cvttps_epi32(v)), | |
48 | _mm_castsi128_ps(_mm_add_epi32(_mm_cvttps_epi32(_mm_sub_ps(v, _mm_set1_ps(1u << 31))), _mm_set1_epi32(1 << 31))), | |
49 | _mm_cmpge_ps(v, _mm_set1_ps(1u << 31)) | |
50 | )); | |
51 | ||
52 | } }; | |
c017a39f | 53 | template<> struct StaticCastHelper<double , unsigned int> { static Vc_ALWAYS_INLINE _M128I cast(const _M128D &v) { return _mm_cvttpd_epi32(v); } }; |
54 | template<> struct StaticCastHelper<int , unsigned int> { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; | |
55 | template<> struct StaticCastHelper<unsigned int, unsigned int> { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; | |
56 | template<> struct StaticCastHelper<float , float > { static Vc_ALWAYS_INLINE _M128 cast(const _M128 &v) { return v; } }; | |
57 | template<> struct StaticCastHelper<double , float > { static Vc_ALWAYS_INLINE _M128 cast(const _M128D &v) { return _mm_cvtpd_ps(v); } }; | |
58 | template<> struct StaticCastHelper<int , float > { static Vc_ALWAYS_INLINE _M128 cast(const _M128I &v) { return _mm_cvtepi32_ps(v); } }; | |
59 | template<> struct StaticCastHelper<unsigned int, float > { static Vc_ALWAYS_INLINE _M128 cast(const _M128I &v) { | |
f22341db | 60 | return _mm_blendv_ps( |
61 | _mm_cvtepi32_ps(v), | |
62 | _mm_add_ps(_mm_cvtepi32_ps(_mm_sub_epi32(v, _mm_set1_epi32(1 << 31))), _mm_set1_ps(1u << 31)), | |
63 | _mm_castsi128_ps(_mm_cmplt_epi32(v, _mm_setzero_si128())) | |
64 | ); | |
65 | } }; | |
c017a39f | 66 | template<> struct StaticCastHelper<float , double > { static Vc_ALWAYS_INLINE _M128D cast(const _M128 &v) { return _mm_cvtps_pd(v); } }; |
67 | template<> struct StaticCastHelper<double , double > { static Vc_ALWAYS_INLINE _M128D cast(const _M128D &v) { return v; } }; | |
68 | template<> struct StaticCastHelper<int , double > { static Vc_ALWAYS_INLINE _M128D cast(const _M128I &v) { return _mm_cvtepi32_pd(v); } }; | |
69 | template<> struct StaticCastHelper<unsigned int, double > { static Vc_ALWAYS_INLINE _M128D cast(const _M128I &v) { return _mm_cvtepi32_pd(v); } }; | |
f22341db | 70 | |
c017a39f | 71 | template<> struct StaticCastHelper<unsigned short, float8 > { static Vc_ALWAYS_INLINE M256 cast(const _M128I &v) { |
f22341db | 72 | return M256::create(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v, _mm_setzero_si128())), |
73 | _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, _mm_setzero_si128()))); | |
74 | } }; | |
c017a39f | 75 | template<> struct StaticCastHelper<short , float8 > { static Vc_ALWAYS_INLINE M256 cast(const _M128I &v) { |
f22341db | 76 | const _M128I neg = _mm_cmplt_epi16(v, _mm_setzero_si128()); |
77 | return M256::create(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v, neg)), | |
78 | _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, neg))); | |
79 | } }; | |
c017a39f | 80 | template<> struct StaticCastHelper<float8 , short > { static Vc_ALWAYS_INLINE _M128I cast(const M256 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v[0]), _mm_cvttps_epi32(v[1])); } }; |
f22341db | 81 | #ifdef VC_IMPL_SSE4_1 |
c017a39f | 82 | template<> struct StaticCastHelper<float8 , unsigned short> { static Vc_ALWAYS_INLINE _M128I cast(const M256 &v) { return _mm_packus_epi32(_mm_cvttps_epi32(v[0]), _mm_cvttps_epi32(v[1])); } }; |
f22341db | 83 | #else |
c017a39f | 84 | template<> struct StaticCastHelper<float8 , unsigned short> { static Vc_ALWAYS_INLINE _M128I cast(const M256 &v) { |
f22341db | 85 | return _mm_add_epi16(_mm_set1_epi16(-32768), |
86 | _mm_packs_epi32( | |
87 | _mm_add_epi32(_mm_set1_epi32(-32768), _mm_cvttps_epi32(v[0])), | |
88 | _mm_add_epi32(_mm_set1_epi32(-32768), _mm_cvttps_epi32(v[1])) | |
89 | ) | |
90 | ); | |
91 | } }; | |
92 | #endif | |
93 | ||
c017a39f | 94 | template<> struct StaticCastHelper<float , short > { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v), _mm_setzero_si128()); } }; |
95 | template<> struct StaticCastHelper<short , short > { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; | |
96 | template<> struct StaticCastHelper<unsigned short, short > { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; | |
97 | template<> struct StaticCastHelper<float , unsigned short> { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v), _mm_setzero_si128()); } }; | |
98 | template<> struct StaticCastHelper<short , unsigned short> { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; | |
99 | template<> struct StaticCastHelper<unsigned short, unsigned short> { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; | |
f22341db | 100 | } // namespace SSE |
101 | } // namespace Vc | |
c017a39f | 102 | } // namespace AliRoot |
f22341db | 103 | |
104 | #endif // SSE_CASTS_H |