]> git.uio.no Git - u/mrichter/AliRoot.git/blame - Vc/include/Vc/sse/casts.h
update to Vc 0.7.3-dev
[u/mrichter/AliRoot.git] / Vc / include / Vc / sse / casts.h
CommitLineData
f22341db 1/* This file is part of the Vc library.
2
3 Copyright (C) 2009-2011 Matthias Kretz <kretz@kde.org>
4
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
9
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17
18*/
19
20#ifndef SSE_CASTS_H
21#define SSE_CASTS_H
22
23#include "intrinsics.h"
24#include "types.h"
25
c017a39f 26namespace AliRoot {
f22341db 27namespace Vc
28{
29namespace SSE
30{
c017a39f 31 template<typename To, typename From> static Vc_ALWAYS_INLINE To Vc_CONST mm128_reinterpret_cast(VC_ALIGNED_PARAMETER(From) v) { return v; }
32 template<> Vc_ALWAYS_INLINE _M128I Vc_CONST mm128_reinterpret_cast<_M128I, _M128 >(VC_ALIGNED_PARAMETER(_M128 ) v) { return _mm_castps_si128(v); }
33 template<> Vc_ALWAYS_INLINE _M128I Vc_CONST mm128_reinterpret_cast<_M128I, _M128D>(VC_ALIGNED_PARAMETER(_M128D) v) { return _mm_castpd_si128(v); }
34 template<> Vc_ALWAYS_INLINE _M128 Vc_CONST mm128_reinterpret_cast<_M128 , _M128D>(VC_ALIGNED_PARAMETER(_M128D) v) { return _mm_castpd_ps(v); }
35 template<> Vc_ALWAYS_INLINE _M128 Vc_CONST mm128_reinterpret_cast<_M128 , _M128I>(VC_ALIGNED_PARAMETER(_M128I) v) { return _mm_castsi128_ps(v); }
36 template<> Vc_ALWAYS_INLINE _M128D Vc_CONST mm128_reinterpret_cast<_M128D, _M128I>(VC_ALIGNED_PARAMETER(_M128I) v) { return _mm_castsi128_pd(v); }
37 template<> Vc_ALWAYS_INLINE _M128D Vc_CONST mm128_reinterpret_cast<_M128D, _M128 >(VC_ALIGNED_PARAMETER(_M128 ) v) { return _mm_castps_pd(v); }
38 template<typename To, typename From> static Vc_ALWAYS_INLINE To Vc_CONST sse_cast(VC_ALIGNED_PARAMETER(From) v) { return mm128_reinterpret_cast<To, From>(v); }
f22341db 39
40 template<typename From, typename To> struct StaticCastHelper {};
c017a39f 41 template<> struct StaticCastHelper<float , int > { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) { return _mm_cvttps_epi32(v); } };
42 template<> struct StaticCastHelper<double , int > { static Vc_ALWAYS_INLINE _M128I cast(const _M128D &v) { return _mm_cvttpd_epi32(v); } };
43 template<> struct StaticCastHelper<int , int > { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } };
44 template<> struct StaticCastHelper<unsigned int, int > { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } };
45 template<> struct StaticCastHelper<float , unsigned int> { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) {
f22341db 46 return _mm_castps_si128(_mm_blendv_ps(
47 _mm_castsi128_ps(_mm_cvttps_epi32(v)),
48 _mm_castsi128_ps(_mm_add_epi32(_mm_cvttps_epi32(_mm_sub_ps(v, _mm_set1_ps(1u << 31))), _mm_set1_epi32(1 << 31))),
49 _mm_cmpge_ps(v, _mm_set1_ps(1u << 31))
50 ));
51
52 } };
c017a39f 53 template<> struct StaticCastHelper<double , unsigned int> { static Vc_ALWAYS_INLINE _M128I cast(const _M128D &v) { return _mm_cvttpd_epi32(v); } };
54 template<> struct StaticCastHelper<int , unsigned int> { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } };
55 template<> struct StaticCastHelper<unsigned int, unsigned int> { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } };
56 template<> struct StaticCastHelper<float , float > { static Vc_ALWAYS_INLINE _M128 cast(const _M128 &v) { return v; } };
57 template<> struct StaticCastHelper<double , float > { static Vc_ALWAYS_INLINE _M128 cast(const _M128D &v) { return _mm_cvtpd_ps(v); } };
58 template<> struct StaticCastHelper<int , float > { static Vc_ALWAYS_INLINE _M128 cast(const _M128I &v) { return _mm_cvtepi32_ps(v); } };
59 template<> struct StaticCastHelper<unsigned int, float > { static Vc_ALWAYS_INLINE _M128 cast(const _M128I &v) {
f22341db 60 return _mm_blendv_ps(
61 _mm_cvtepi32_ps(v),
62 _mm_add_ps(_mm_cvtepi32_ps(_mm_sub_epi32(v, _mm_set1_epi32(1 << 31))), _mm_set1_ps(1u << 31)),
63 _mm_castsi128_ps(_mm_cmplt_epi32(v, _mm_setzero_si128()))
64 );
65 } };
c017a39f 66 template<> struct StaticCastHelper<float , double > { static Vc_ALWAYS_INLINE _M128D cast(const _M128 &v) { return _mm_cvtps_pd(v); } };
67 template<> struct StaticCastHelper<double , double > { static Vc_ALWAYS_INLINE _M128D cast(const _M128D &v) { return v; } };
68 template<> struct StaticCastHelper<int , double > { static Vc_ALWAYS_INLINE _M128D cast(const _M128I &v) { return _mm_cvtepi32_pd(v); } };
69 template<> struct StaticCastHelper<unsigned int, double > { static Vc_ALWAYS_INLINE _M128D cast(const _M128I &v) { return _mm_cvtepi32_pd(v); } };
f22341db 70
c017a39f 71 template<> struct StaticCastHelper<unsigned short, float8 > { static Vc_ALWAYS_INLINE M256 cast(const _M128I &v) {
f22341db 72 return M256::create(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v, _mm_setzero_si128())),
73 _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, _mm_setzero_si128())));
74 } };
c017a39f 75 template<> struct StaticCastHelper<short , float8 > { static Vc_ALWAYS_INLINE M256 cast(const _M128I &v) {
f22341db 76 const _M128I neg = _mm_cmplt_epi16(v, _mm_setzero_si128());
77 return M256::create(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v, neg)),
78 _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, neg)));
79 } };
c017a39f 80 template<> struct StaticCastHelper<float8 , short > { static Vc_ALWAYS_INLINE _M128I cast(const M256 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v[0]), _mm_cvttps_epi32(v[1])); } };
f22341db 81#ifdef VC_IMPL_SSE4_1
c017a39f 82 template<> struct StaticCastHelper<float8 , unsigned short> { static Vc_ALWAYS_INLINE _M128I cast(const M256 &v) { return _mm_packus_epi32(_mm_cvttps_epi32(v[0]), _mm_cvttps_epi32(v[1])); } };
f22341db 83#else
c017a39f 84 template<> struct StaticCastHelper<float8 , unsigned short> { static Vc_ALWAYS_INLINE _M128I cast(const M256 &v) {
f22341db 85 return _mm_add_epi16(_mm_set1_epi16(-32768),
86 _mm_packs_epi32(
87 _mm_add_epi32(_mm_set1_epi32(-32768), _mm_cvttps_epi32(v[0])),
88 _mm_add_epi32(_mm_set1_epi32(-32768), _mm_cvttps_epi32(v[1]))
89 )
90 );
91 } };
92#endif
93
c017a39f 94 template<> struct StaticCastHelper<float , short > { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v), _mm_setzero_si128()); } };
95 template<> struct StaticCastHelper<short , short > { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } };
96 template<> struct StaticCastHelper<unsigned short, short > { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } };
97 template<> struct StaticCastHelper<float , unsigned short> { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v), _mm_setzero_si128()); } };
98 template<> struct StaticCastHelper<short , unsigned short> { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } };
99 template<> struct StaticCastHelper<unsigned short, unsigned short> { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } };
f22341db 100} // namespace SSE
101} // namespace Vc
c017a39f 102} // namespace AliRoot
f22341db 103
104#endif // SSE_CASTS_H