1 /* This file is part of the Vc library.
3 Copyright (C) 2010-2011 Matthias Kretz <kretz@kde.org>
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
25 inline void deinterleave(Vector<float> &a, Vector<float> &b)
27 const _M128 tmp0 = _mm_unpacklo_ps(a.data(), b.data());
28 const _M128 tmp1 = _mm_unpackhi_ps(a.data(), b.data());
29 a.data() = _mm_unpacklo_ps(tmp0, tmp1);
30 b.data() = _mm_unpackhi_ps(tmp0, tmp1);
33 inline void deinterleave(Vector<float> &a, Vector<float> &b, Vector<short>::AsArg tmp)
35 a.data() = _mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(tmp.data(), 16), 16));
36 b.data() = _mm_cvtepi32_ps(_mm_srai_epi32(tmp.data(), 16));
39 inline void deinterleave(Vector<float> &a, Vector<float> &b, Vector<unsigned short>::AsArg tmp)
41 a.data() = _mm_cvtepi32_ps(_mm_srli_epi32(_mm_slli_epi32(tmp.data(), 16), 16));
42 b.data() = _mm_cvtepi32_ps(_mm_srli_epi32(tmp.data(), 16));
45 inline void deinterleave(Vector<float8> &a, Vector<float8> &b)
47 _M128 tmp0 = _mm_unpacklo_ps(a.data()[0], a.data()[1]);
48 _M128 tmp1 = _mm_unpackhi_ps(a.data()[0], a.data()[1]);
49 _M128 tmp2 = _mm_unpacklo_ps(b.data()[0], b.data()[1]);
50 _M128 tmp3 = _mm_unpackhi_ps(b.data()[0], b.data()[1]);
51 a.data()[0] = _mm_unpacklo_ps(tmp0, tmp1);
52 b.data()[0] = _mm_unpackhi_ps(tmp0, tmp1);
53 a.data()[1] = _mm_unpacklo_ps(tmp2, tmp3);
54 b.data()[1] = _mm_unpackhi_ps(tmp2, tmp3);
57 inline void deinterleave(Vector<float8> &a, Vector<float8> &b, Vector<short>::AsArg tmp0, Vector<short>::AsArg tmp1)
59 a.data()[0] = _mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(tmp0.data(), 16), 16));
60 b.data()[0] = _mm_cvtepi32_ps(_mm_srai_epi32(tmp0.data(), 16));
61 a.data()[1] = _mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(tmp1.data(), 16), 16));
62 b.data()[1] = _mm_cvtepi32_ps(_mm_srai_epi32(tmp1.data(), 16));
65 inline void deinterleave(Vector<float8> &a, Vector<float8> &b, Vector<unsigned short>::AsArg tmp0, Vector<unsigned short>::AsArg tmp1)
67 a.data()[0] = _mm_cvtepi32_ps(_mm_srli_epi32(_mm_slli_epi32(tmp0.data(), 16), 16));
68 b.data()[0] = _mm_cvtepi32_ps(_mm_srli_epi32(tmp0.data(), 16));
69 a.data()[1] = _mm_cvtepi32_ps(_mm_srli_epi32(_mm_slli_epi32(tmp1.data(), 16), 16));
70 b.data()[1] = _mm_cvtepi32_ps(_mm_srli_epi32(tmp1.data(), 16));
73 inline void deinterleave(Vector<double> &a, Vector<double> &b)
75 _M128D tmp = _mm_unpacklo_pd(a.data(), b.data());
76 b.data() = _mm_unpackhi_pd(a.data(), b.data());
80 inline void deinterleave(Vector<int> &a, Vector<int> &b)
82 const _M128I tmp0 = _mm_unpacklo_epi32(a.data(), b.data());
83 const _M128I tmp1 = _mm_unpackhi_epi32(a.data(), b.data());
84 a.data() = _mm_unpacklo_epi32(tmp0, tmp1);
85 b.data() = _mm_unpackhi_epi32(tmp0, tmp1);
88 inline void deinterleave(Vector<unsigned int> &a, Vector<unsigned int> &b)
90 const _M128I tmp0 = _mm_unpacklo_epi32(a.data(), b.data());
91 const _M128I tmp1 = _mm_unpackhi_epi32(a.data(), b.data());
92 a.data() = _mm_unpacklo_epi32(tmp0, tmp1);
93 b.data() = _mm_unpackhi_epi32(tmp0, tmp1);
96 inline void deinterleave(Vector<short> &a, Vector<short> &b)
98 _M128I tmp0 = _mm_unpacklo_epi16(a.data(), b.data()); // a0 a4 b0 b4 a1 a5 b1 b5
99 _M128I tmp1 = _mm_unpackhi_epi16(a.data(), b.data()); // a2 a6 b2 b6 a3 a7 b3 b7
100 _M128I tmp2 = _mm_unpacklo_epi16(tmp0, tmp1); // a0 a2 a4 a6 b0 b2 b4 b6
101 _M128I tmp3 = _mm_unpackhi_epi16(tmp0, tmp1); // a1 a3 a5 a7 b1 b3 b5 b7
102 a.data() = _mm_unpacklo_epi16(tmp2, tmp3);
103 b.data() = _mm_unpackhi_epi16(tmp2, tmp3);
106 inline void deinterleave(Vector<unsigned short> &a, Vector<unsigned short> &b)
108 _M128I tmp0 = _mm_unpacklo_epi16(a.data(), b.data()); // a0 a4 b0 b4 a1 a5 b1 b5
109 _M128I tmp1 = _mm_unpackhi_epi16(a.data(), b.data()); // a2 a6 b2 b6 a3 a7 b3 b7
110 _M128I tmp2 = _mm_unpacklo_epi16(tmp0, tmp1); // a0 a2 a4 a6 b0 b2 b4 b6
111 _M128I tmp3 = _mm_unpackhi_epi16(tmp0, tmp1); // a1 a3 a5 a7 b1 b3 b5 b7
112 a.data() = _mm_unpacklo_epi16(tmp2, tmp3);
113 b.data() = _mm_unpackhi_epi16(tmp2, tmp3);
116 inline void deinterleave(Vector<int> &a, Vector<int> &b, Vector<short>::AsArg tmp)
118 a.data() = _mm_srai_epi32(_mm_slli_epi32(tmp.data(), 16), 16);
119 b.data() = _mm_srai_epi32(tmp.data(), 16);
122 inline void deinterleave(Vector<unsigned int> &a, Vector<unsigned int> &b, Vector<unsigned short>::AsArg tmp)
124 a.data() = _mm_srli_epi32(_mm_slli_epi32(tmp.data(), 16), 16);
125 b.data() = _mm_srli_epi32(tmp.data(), 16);
134 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
135 float_v &a, float_v &b, const float *m, A align)
138 b.load(m + float_v::Size, align);
139 Vc::SSE::deinterleave(a, b);
142 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
143 float_v &a, float_v &b, const short *m, A align)
145 short_v tmp(m, align);
146 Vc::SSE::deinterleave(a, b, tmp);
149 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
150 float_v &a, float_v &b, const unsigned short *m, A align)
152 ushort_v tmp(m, align);
153 Vc::SSE::deinterleave(a, b, tmp);
156 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
157 sfloat_v &a, sfloat_v &b, const float *m, A align)
160 b.load(m + sfloat_v::Size, align);
161 Vc::SSE::deinterleave(a, b);
164 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
165 sfloat_v &a, sfloat_v &b, const short *m, A align)
167 short_v tmp0(m, align);
168 short_v tmp1(m + short_v::Size, align);
169 Vc::SSE::deinterleave(a, b, tmp0, tmp1);
172 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
173 sfloat_v &a, sfloat_v &b, const unsigned short *m, A align)
175 ushort_v tmp0(m, align);
176 ushort_v tmp1(m + short_v::Size, align);
177 Vc::SSE::deinterleave(a, b, tmp0, tmp1);
180 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
181 double_v &a, double_v &b, const double *m, A align)
184 b.load(m + double_v::Size, align);
185 Vc::SSE::deinterleave(a, b);
188 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
189 int_v &a, int_v &b, const int *m, A align)
192 b.load(m + int_v::Size, align);
193 Vc::SSE::deinterleave(a, b);
196 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
197 int_v &a, int_v &b, const short *m, A align)
199 short_v tmp(m, align);
200 Vc::SSE::deinterleave(a, b, tmp);
203 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
204 uint_v &a, uint_v &b, const unsigned int *m, A align)
207 b.load(m + uint_v::Size, align);
208 Vc::SSE::deinterleave(a, b);
211 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
212 uint_v &a, uint_v &b, const unsigned short *m, A align)
214 ushort_v tmp(m, align);
215 Vc::SSE::deinterleave(a, b, tmp);
218 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
219 short_v &a, short_v &b, const short *m, A align)
222 b.load(m + short_v::Size, align);
223 Vc::SSE::deinterleave(a, b);
226 template<typename A> inline void HelperImpl<Vc::SSE2Impl>::deinterleave(
227 ushort_v &a, ushort_v &b, const unsigned short *m, A align)
230 b.load(m + ushort_v::Size, align);
231 Vc::SSE::deinterleave(a, b);
234 } // namespace Internal