1 /* This file is part of the Vc library.
3 Copyright (C) 2009-2011 Matthias Kretz <kretz@kde.org>
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
23 #include "intrinsics.h"
30 template<typename To, typename From> static inline To CONST mm128_reinterpret_cast(From v) { return v; }
31 template<> inline _M128I CONST mm128_reinterpret_cast<_M128I, _M128 >(_M128 v) { return _mm_castps_si128(v); }
32 template<> inline _M128I CONST mm128_reinterpret_cast<_M128I, _M128D>(_M128D v) { return _mm_castpd_si128(v); }
33 template<> inline _M128 CONST mm128_reinterpret_cast<_M128 , _M128D>(_M128D v) { return _mm_castpd_ps(v); }
34 template<> inline _M128 CONST mm128_reinterpret_cast<_M128 , _M128I>(_M128I v) { return _mm_castsi128_ps(v); }
35 template<> inline _M128D CONST mm128_reinterpret_cast<_M128D, _M128I>(_M128I v) { return _mm_castsi128_pd(v); }
36 template<> inline _M128D CONST mm128_reinterpret_cast<_M128D, _M128 >(_M128 v) { return _mm_castps_pd(v); }
38 template<typename From, typename To> struct StaticCastHelper {};
39 template<> struct StaticCastHelper<float , int > { static _M128I cast(const _M128 &v) { return _mm_cvttps_epi32(v); } };
40 template<> struct StaticCastHelper<double , int > { static _M128I cast(const _M128D &v) { return _mm_cvttpd_epi32(v); } };
41 template<> struct StaticCastHelper<int , int > { static _M128I cast(const _M128I &v) { return v; } };
42 template<> struct StaticCastHelper<unsigned int, int > { static _M128I cast(const _M128I &v) { return v; } };
43 template<> struct StaticCastHelper<float , unsigned int> { static _M128I cast(const _M128 &v) {
44 return _mm_castps_si128(_mm_blendv_ps(
45 _mm_castsi128_ps(_mm_cvttps_epi32(v)),
46 _mm_castsi128_ps(_mm_add_epi32(_mm_cvttps_epi32(_mm_sub_ps(v, _mm_set1_ps(1u << 31))), _mm_set1_epi32(1 << 31))),
47 _mm_cmpge_ps(v, _mm_set1_ps(1u << 31))
51 template<> struct StaticCastHelper<double , unsigned int> { static _M128I cast(const _M128D &v) { return _mm_cvttpd_epi32(v); } };
52 template<> struct StaticCastHelper<int , unsigned int> { static _M128I cast(const _M128I &v) { return v; } };
53 template<> struct StaticCastHelper<unsigned int, unsigned int> { static _M128I cast(const _M128I &v) { return v; } };
54 template<> struct StaticCastHelper<float , float > { static _M128 cast(const _M128 &v) { return v; } };
55 template<> struct StaticCastHelper<double , float > { static _M128 cast(const _M128D &v) { return _mm_cvtpd_ps(v); } };
56 template<> struct StaticCastHelper<int , float > { static _M128 cast(const _M128I &v) { return _mm_cvtepi32_ps(v); } };
57 template<> struct StaticCastHelper<unsigned int, float > { static _M128 cast(const _M128I &v) {
60 _mm_add_ps(_mm_cvtepi32_ps(_mm_sub_epi32(v, _mm_set1_epi32(1 << 31))), _mm_set1_ps(1u << 31)),
61 _mm_castsi128_ps(_mm_cmplt_epi32(v, _mm_setzero_si128()))
64 template<> struct StaticCastHelper<float , double > { static _M128D cast(const _M128 &v) { return _mm_cvtps_pd(v); } };
65 template<> struct StaticCastHelper<double , double > { static _M128D cast(const _M128D &v) { return v; } };
66 template<> struct StaticCastHelper<int , double > { static _M128D cast(const _M128I &v) { return _mm_cvtepi32_pd(v); } };
67 template<> struct StaticCastHelper<unsigned int, double > { static _M128D cast(const _M128I &v) { return _mm_cvtepi32_pd(v); } };
69 template<> struct StaticCastHelper<unsigned short, float8 > { static M256 cast(const _M128I &v) {
70 return M256::create(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v, _mm_setzero_si128())),
71 _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, _mm_setzero_si128())));
73 template<> struct StaticCastHelper<short , float8 > { static M256 cast(const _M128I &v) {
74 const _M128I neg = _mm_cmplt_epi16(v, _mm_setzero_si128());
75 return M256::create(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v, neg)),
76 _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, neg)));
78 template<> struct StaticCastHelper<float8 , short > { static _M128I cast(const M256 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v[0]), _mm_cvttps_epi32(v[1])); } };
80 template<> struct StaticCastHelper<float8 , unsigned short> { static _M128I cast(const M256 &v) { return _mm_packus_epi32(_mm_cvttps_epi32(v[0]), _mm_cvttps_epi32(v[1])); } };
82 template<> struct StaticCastHelper<float8 , unsigned short> { static _M128I cast(const M256 &v) {
83 return _mm_add_epi16(_mm_set1_epi16(-32768),
85 _mm_add_epi32(_mm_set1_epi32(-32768), _mm_cvttps_epi32(v[0])),
86 _mm_add_epi32(_mm_set1_epi32(-32768), _mm_cvttps_epi32(v[1]))
92 template<> struct StaticCastHelper<float , short > { static _M128I cast(const _M128 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v), _mm_setzero_si128()); } };
93 template<> struct StaticCastHelper<short , short > { static _M128I cast(const _M128I &v) { return v; } };
94 template<> struct StaticCastHelper<unsigned short, short > { static _M128I cast(const _M128I &v) { return v; } };
95 template<> struct StaticCastHelper<float , unsigned short> { static _M128I cast(const _M128 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v), _mm_setzero_si128()); } };
96 template<> struct StaticCastHelper<short , unsigned short> { static _M128I cast(const _M128I &v) { return v; } };
97 template<> struct StaticCastHelper<unsigned short, unsigned short> { static _M128I cast(const _M128I &v) { return v; } };
101 #endif // SSE_CASTS_H