]>
Commit | Line | Data |
---|---|---|
f22341db | 1 | /* This file is part of the Vc library. |
2 | ||
3 | Copyright (C) 2009-2011 Matthias Kretz <kretz@kde.org> | |
4 | ||
5 | Vc is free software: you can redistribute it and/or modify | |
6 | it under the terms of the GNU Lesser General Public License as | |
7 | published by the Free Software Foundation, either version 3 of | |
8 | the License, or (at your option) any later version. | |
9 | ||
10 | Vc is distributed in the hope that it will be useful, but | |
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with Vc. If not, see <http://www.gnu.org/licenses/>. | |
17 | ||
18 | */ | |
19 | ||
20 | #include "casts.h" | |
21 | #include <cstdlib> | |
22 | ||
23 | namespace Vc | |
24 | { | |
25 | namespace SSE | |
26 | { | |
27 | ||
28 | //////////////////////////////////////////////////////////////////////////////////////////////////// | |
29 | // float_v | |
30 | template<> inline _M128 VectorHelper<_M128>::load(const float *x, AlignedFlag) | |
31 | { | |
32 | return _mm_load_ps(x); | |
33 | } | |
34 | ||
35 | template<> inline _M128 VectorHelper<_M128>::load(const float *x, UnalignedFlag) | |
36 | { | |
37 | return _mm_loadu_ps(x); | |
38 | } | |
39 | ||
40 | template<> inline _M128 VectorHelper<_M128>::load(const float *x, StreamingAndAlignedFlag) | |
41 | { | |
42 | return _mm_stream_load(x); | |
43 | } | |
44 | ||
45 | template<> inline _M128 VectorHelper<_M128>::load(const float *x, StreamingAndUnalignedFlag) | |
46 | { | |
47 | return load(x, Unaligned); | |
48 | } | |
49 | ||
50 | //////////////////////////////////////////////////////////////////////////////////////////////////// | |
51 | // stores | |
52 | inline void VectorHelper<_M128>::store(float *mem, const VectorType x, AlignedFlag) | |
53 | { | |
54 | _mm_store_ps(mem, x); | |
55 | } | |
56 | inline void VectorHelper<_M128>::store(float *mem, const VectorType x, UnalignedFlag) | |
57 | { | |
58 | _mm_storeu_ps(mem, x); | |
59 | } | |
60 | inline void VectorHelper<_M128>::store(float *mem, const VectorType x, StreamingAndAlignedFlag) | |
61 | { | |
62 | _mm_stream_ps(mem, x); | |
63 | } | |
64 | inline void VectorHelper<_M128>::store(float *mem, const VectorType x, StreamingAndUnalignedFlag) | |
65 | { | |
66 | _mm_maskmoveu_si128(_mm_castps_si128(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem)); | |
67 | } | |
68 | inline void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, AlignedFlag) | |
69 | { | |
70 | _mm_store_ps(mem, _mm_blendv_ps(_mm_load_ps(mem), x, m)); | |
71 | } | |
72 | inline void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, UnalignedFlag) | |
73 | { | |
74 | _mm_storeu_ps(mem, _mm_blendv_ps(_mm_loadu_ps(mem), x, m)); | |
75 | } | |
76 | inline void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag) | |
77 | { | |
78 | _mm_maskmoveu_si128(_mm_castps_si128(x), _mm_castps_si128(m), reinterpret_cast<char *>(mem)); | |
79 | } | |
80 | inline void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag) | |
81 | { | |
82 | _mm_maskmoveu_si128(_mm_castps_si128(x), _mm_castps_si128(m), reinterpret_cast<char *>(mem)); | |
83 | } | |
84 | ||
85 | //////////////////////////////////////////////////////////////////////////////////////////////////// | |
86 | // sfloat_v | |
87 | template<> inline M256 VectorHelper<M256>::load(const float *x, AlignedFlag) | |
88 | { | |
89 | return VectorType::create(_mm_load_ps(x), _mm_load_ps(x + 4)); | |
90 | } | |
91 | ||
92 | template<> inline M256 VectorHelper<M256>::load(const float *x, UnalignedFlag) | |
93 | { | |
94 | return VectorType::create(_mm_loadu_ps(x), _mm_loadu_ps(x + 4)); | |
95 | } | |
96 | ||
97 | template<> inline M256 VectorHelper<M256>::load(const float *x, StreamingAndAlignedFlag) | |
98 | { | |
99 | return VectorType::create(_mm_stream_load(&x[0]), _mm_stream_load(&x[4])); | |
100 | } | |
101 | ||
102 | template<> inline M256 VectorHelper<M256>::load(const float *x, StreamingAndUnalignedFlag) | |
103 | { | |
104 | return load(x, Unaligned); | |
105 | } | |
106 | ||
107 | //////////////////////////////////////////////////////////////////////////////////////////////////// | |
108 | // stores | |
109 | inline void VectorHelper<M256>::store(float *mem, const VectorType &x, AlignedFlag) | |
110 | { | |
111 | _mm_store_ps(mem, x[0]); | |
112 | _mm_store_ps(mem + 4, x[1]); | |
113 | } | |
114 | inline void VectorHelper<M256>::store(float *mem, const VectorType &x, UnalignedFlag) | |
115 | { | |
116 | _mm_storeu_ps(mem, x[0]); | |
117 | _mm_storeu_ps(mem + 4, x[1]); | |
118 | } | |
119 | inline void VectorHelper<M256>::store(float *mem, const VectorType &x, StreamingAndAlignedFlag) | |
120 | { | |
121 | _mm_stream_ps(mem, x[0]); | |
122 | _mm_stream_ps(mem + 4, x[1]); | |
123 | } | |
124 | inline void VectorHelper<M256>::store(float *mem, const VectorType &x, StreamingAndUnalignedFlag) | |
125 | { | |
126 | _mm_maskmoveu_si128(_mm_castps_si128(x[0]), _mm_setallone_si128(), reinterpret_cast<char *>(mem)); | |
127 | _mm_maskmoveu_si128(_mm_castps_si128(x[1]), _mm_setallone_si128(), reinterpret_cast<char *>(mem + 4)); | |
128 | } | |
129 | inline void VectorHelper<M256>::store(float *mem, const VectorType &x, const VectorType &m, AlignedFlag) | |
130 | { | |
131 | _mm_store_ps(mem, _mm_blendv_ps(_mm_load_ps(mem), x[0], m[0])); | |
132 | _mm_store_ps(mem + 4, _mm_blendv_ps(_mm_load_ps(mem + 4), x[1], m[1])); | |
133 | } | |
134 | inline void VectorHelper<M256>::store(float *mem, const VectorType &x, const VectorType &m, UnalignedFlag) | |
135 | { | |
136 | _mm_storeu_ps(mem, _mm_blendv_ps(_mm_loadu_ps(mem), x[0], m[0])); | |
137 | _mm_storeu_ps(mem + 4, _mm_blendv_ps(_mm_loadu_ps(mem + 4), x[1], m[1])); | |
138 | } | |
139 | inline void VectorHelper<M256>::store(float *mem, const VectorType &x, const VectorType &m, StreamingAndAlignedFlag) | |
140 | { | |
141 | _mm_maskmoveu_si128(_mm_castps_si128(x[0]), _mm_castps_si128(m[0]), reinterpret_cast<char *>(mem)); | |
142 | _mm_maskmoveu_si128(_mm_castps_si128(x[1]), _mm_castps_si128(m[1]), reinterpret_cast<char *>(mem + 4)); | |
143 | } | |
144 | inline void VectorHelper<M256>::store(float *mem, const VectorType &x, const VectorType &m, StreamingAndUnalignedFlag) | |
145 | { | |
146 | _mm_maskmoveu_si128(_mm_castps_si128(x[0]), _mm_castps_si128(m[0]), reinterpret_cast<char *>(mem)); | |
147 | _mm_maskmoveu_si128(_mm_castps_si128(x[1]), _mm_castps_si128(m[1]), reinterpret_cast<char *>(mem + 4)); | |
148 | } | |
149 | ||
150 | //////////////////////////////////////////////////////////////////////////////////////////////////// | |
151 | // double_v | |
152 | template<> inline _M128D VectorHelper<_M128D>::load(const double *x, AlignedFlag) | |
153 | { | |
154 | return _mm_load_pd(x); | |
155 | } | |
156 | ||
157 | template<> inline _M128D VectorHelper<_M128D>::load(const double *x, UnalignedFlag) | |
158 | { | |
159 | return _mm_loadu_pd(x); | |
160 | } | |
161 | ||
162 | template<> inline _M128D VectorHelper<_M128D>::load(const double *x, StreamingAndAlignedFlag) | |
163 | { | |
164 | return _mm_stream_load(x); | |
165 | } | |
166 | ||
167 | template<> inline _M128D VectorHelper<_M128D>::load(const double *x, StreamingAndUnalignedFlag) | |
168 | { | |
169 | return load(x, Unaligned); | |
170 | } | |
171 | ||
172 | //////////////////////////////////////////////////////////////////////////////////////////////////// | |
173 | // stores | |
174 | inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, AlignedFlag) | |
175 | { | |
176 | _mm_store_pd(mem, x); | |
177 | } | |
178 | inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, UnalignedFlag) | |
179 | { | |
180 | _mm_storeu_pd(mem, x); | |
181 | } | |
182 | inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, StreamingAndAlignedFlag) | |
183 | { | |
184 | _mm_stream_pd(mem, x); | |
185 | } | |
186 | inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, StreamingAndUnalignedFlag) | |
187 | { | |
188 | _mm_maskmoveu_si128(_mm_castpd_si128(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem)); | |
189 | } | |
190 | inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, AlignedFlag) | |
191 | { | |
192 | _mm_store_pd(mem, _mm_blendv_pd(_mm_load_pd(mem), x, m)); | |
193 | } | |
194 | inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, UnalignedFlag) | |
195 | { | |
196 | _mm_storeu_pd(mem, _mm_blendv_pd(_mm_loadu_pd(mem), x, m)); | |
197 | } | |
198 | inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag) | |
199 | { | |
200 | _mm_maskmoveu_si128(_mm_castpd_si128(x), _mm_castpd_si128(m), reinterpret_cast<char *>(mem)); | |
201 | } | |
202 | inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag) | |
203 | { | |
204 | _mm_maskmoveu_si128(_mm_castpd_si128(x), _mm_castpd_si128(m), reinterpret_cast<char *>(mem)); | |
205 | } | |
206 | ||
207 | //////////////////////////////////////////////////////////////////////////////////////////////////// | |
208 | // int_v, uint_v, short_v, ushort_v | |
209 | template<typename T> inline _M128I VectorHelper<_M128I>::load(const T *x, AlignedFlag) | |
210 | { | |
211 | return _mm_load_si128(reinterpret_cast<const VectorType *>(x)); | |
212 | } | |
213 | ||
214 | template<typename T> inline _M128I VectorHelper<_M128I>::load(const T *x, UnalignedFlag) | |
215 | { | |
216 | return _mm_loadu_si128(reinterpret_cast<const VectorType *>(x)); | |
217 | } | |
218 | ||
219 | template<typename T> inline _M128I VectorHelper<_M128I>::load(const T *x, StreamingAndAlignedFlag) | |
220 | { | |
221 | return _mm_stream_load(x); | |
222 | } | |
223 | ||
224 | template<typename T> inline _M128I VectorHelper<_M128I>::load(const T *x, StreamingAndUnalignedFlag) | |
225 | { | |
226 | return load(x, Unaligned); | |
227 | } | |
228 | ||
229 | //////////////////////////////////////////////////////////////////////////////////////////////////// | |
230 | // stores | |
231 | template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, AlignedFlag) | |
232 | { | |
233 | _mm_store_si128(reinterpret_cast<VectorType *>(mem), x); | |
234 | } | |
235 | template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, UnalignedFlag) | |
236 | { | |
237 | _mm_storeu_si128(reinterpret_cast<VectorType *>(mem), x); | |
238 | } | |
239 | template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, StreamingAndAlignedFlag) | |
240 | { | |
241 | _mm_stream_si128(reinterpret_cast<VectorType *>(mem), x); | |
242 | } | |
243 | template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, StreamingAndUnalignedFlag) | |
244 | { | |
245 | _mm_maskmoveu_si128(x, _mm_setallone_si128(), reinterpret_cast<char *>(mem)); | |
246 | } | |
247 | template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, AlignedFlag align) | |
248 | { | |
249 | store(mem, _mm_blendv_epi8(load(mem, align), x, m), align); | |
250 | } | |
251 | template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, UnalignedFlag align) | |
252 | { | |
253 | store(mem, _mm_blendv_epi8(load(mem, align), x, m), align); | |
254 | } | |
255 | template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag) | |
256 | { | |
257 | _mm_maskmoveu_si128(x, m, reinterpret_cast<char *>(mem)); | |
258 | } | |
259 | template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag) | |
260 | { | |
261 | _mm_maskmoveu_si128(x, m, reinterpret_cast<char *>(mem)); | |
262 | } | |
263 | ||
264 | template<> inline _M128I SortHelper<_M128I, 8>::sort(_M128I x) | |
265 | { | |
266 | _M128I lo, hi, y; | |
267 | // sort pairs | |
268 | y = _mm_shufflelo_epi16(_mm_shufflehi_epi16(x, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1)); | |
269 | lo = _mm_min_epi16(x, y); | |
270 | hi = _mm_max_epi16(x, y); | |
271 | x = _mm_blend_epi16(lo, hi, 0xaa); | |
272 | ||
273 | // merge left and right quads | |
274 | y = _mm_shufflelo_epi16(_mm_shufflehi_epi16(x, _MM_SHUFFLE(0, 1, 2, 3)), _MM_SHUFFLE(0, 1, 2, 3)); | |
275 | lo = _mm_min_epi16(x, y); | |
276 | hi = _mm_max_epi16(x, y); | |
277 | x = _mm_blend_epi16(lo, hi, 0xcc); | |
278 | y = _mm_srli_si128(x, 2); | |
279 | lo = _mm_min_epi16(x, y); | |
280 | hi = _mm_max_epi16(x, y); | |
281 | x = _mm_blend_epi16(lo, _mm_slli_si128(hi, 2), 0xaa); | |
282 | ||
283 | // merge quads into octs | |
284 | y = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)); | |
285 | y = _mm_shufflelo_epi16(y, _MM_SHUFFLE(0, 1, 2, 3)); | |
286 | lo = _mm_min_epi16(x, y); | |
287 | hi = _mm_max_epi16(x, y); | |
288 | ||
289 | x = _mm_unpacklo_epi16(lo, hi); | |
290 | y = _mm_srli_si128(x, 8); | |
291 | lo = _mm_min_epi16(x, y); | |
292 | hi = _mm_max_epi16(x, y); | |
293 | ||
294 | x = _mm_unpacklo_epi16(lo, hi); | |
295 | y = _mm_srli_si128(x, 8); | |
296 | lo = _mm_min_epi16(x, y); | |
297 | hi = _mm_max_epi16(x, y); | |
298 | ||
299 | return _mm_unpacklo_epi16(lo, hi); | |
300 | } | |
301 | template<> inline _M128I SortHelper<_M128I, 4>::sort(_M128I x) | |
302 | { | |
303 | /* | |
304 | // in 16,67% of the cases the merge can be replaced by an append | |
305 | ||
306 | // x = [a b c d] | |
307 | // y = [c d a b] | |
308 | _M128I y = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)); | |
309 | _M128I l = _mm_min_epi32(x, y); // min[ac bd ac bd] | |
310 | _M128I h = _mm_max_epi32(x, y); // max[ac bd ac bd] | |
311 | if (IS_UNLIKELY(_mm_cvtsi128_si32(h) <= l[1])) { // l[0] < h[0] < l[1] < h[1] | |
312 | return _mm_unpacklo_epi32(l, h); | |
313 | } | |
314 | // h[0] > l[1] | |
315 | */ | |
316 | ||
317 | // sort pairs | |
318 | _M128I y = _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)); | |
319 | _M128I l = _mm_min_epi32(x, y); | |
320 | _M128I h = _mm_max_epi32(x, y); | |
321 | x = _mm_unpacklo_epi32(l, h); | |
322 | y = _mm_unpackhi_epi32(h, l); | |
323 | ||
324 | // sort quads | |
325 | l = _mm_min_epi32(x, y); | |
326 | h = _mm_max_epi32(x, y); | |
327 | x = _mm_unpacklo_epi32(l, h); | |
328 | y = _mm_unpackhi_epi64(x, x); | |
329 | ||
330 | l = _mm_min_epi32(x, y); | |
331 | h = _mm_max_epi32(x, y); | |
332 | return _mm_unpacklo_epi32(l, h); | |
333 | } | |
334 | template<> inline _M128 SortHelper<_M128, 4>::sort(_M128 x) | |
335 | { | |
336 | _M128 y = _mm_shuffle_ps(x, x, _MM_SHUFFLE(2, 3, 0, 1)); | |
337 | _M128 l = _mm_min_ps(x, y); | |
338 | _M128 h = _mm_max_ps(x, y); | |
339 | x = _mm_unpacklo_ps(l, h); | |
340 | y = _mm_unpackhi_ps(h, l); | |
341 | ||
342 | l = _mm_min_ps(x, y); | |
343 | h = _mm_max_ps(x, y); | |
344 | x = _mm_unpacklo_ps(l, h); | |
345 | y = _mm_movehl_ps(x, x); | |
346 | ||
347 | l = _mm_min_ps(x, y); | |
348 | h = _mm_max_ps(x, y); | |
349 | return _mm_unpacklo_ps(l, h); | |
350 | //X _M128 k = _mm_cmpgt_ps(x, y); | |
351 | //X k = _mm_shuffle_ps(k, k, _MM_SHUFFLE(2, 2, 0, 0)); | |
352 | //X x = _mm_blendv_ps(x, y, k); | |
353 | //X y = _mm_shuffle_ps(x, x, _MM_SHUFFLE(1, 0, 3, 2)); | |
354 | //X k = _mm_cmpgt_ps(x, y); | |
355 | //X k = _mm_shuffle_ps(k, k, _MM_SHUFFLE(1, 0, 1, 0)); | |
356 | //X x = _mm_blendv_ps(x, y, k); | |
357 | //X y = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 1, 2, 0)); | |
358 | //X k = _mm_cmpgt_ps(x, y); | |
359 | //X k = _mm_shuffle_ps(k, k, _MM_SHUFFLE(0, 1, 1, 0)); | |
360 | //X return _mm_blendv_ps(x, y, k); | |
361 | } | |
362 | template<> inline M256 SortHelper<M256, 8>::sort(const M256 &_x) | |
363 | { | |
364 | M256 x = _x; | |
365 | typedef SortHelper<_M128, 4> H; | |
366 | ||
367 | _M128 a, b, l, h; | |
368 | a = H::sort(x[0]); | |
369 | b = H::sort(x[1]); | |
370 | ||
371 | // merge | |
372 | b = _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 1, 2, 3)); | |
373 | l = _mm_min_ps(a, b); | |
374 | h = _mm_max_ps(a, b); | |
375 | ||
376 | a = _mm_unpacklo_ps(l, h); | |
377 | b = _mm_unpackhi_ps(l, h); | |
378 | l = _mm_min_ps(a, b); | |
379 | h = _mm_max_ps(a, b); | |
380 | ||
381 | a = _mm_unpacklo_ps(l, h); | |
382 | b = _mm_unpackhi_ps(l, h); | |
383 | l = _mm_min_ps(a, b); | |
384 | h = _mm_max_ps(a, b); | |
385 | ||
386 | x[0] = _mm_unpacklo_ps(l, h); | |
387 | x[1] = _mm_unpackhi_ps(l, h); | |
388 | return x; | |
389 | } | |
390 | template<> inline _M128D SortHelper<_M128D, 2>::sort(_M128D x) | |
391 | { | |
392 | const _M128D y = _mm_shuffle_pd(x, x, _MM_SHUFFLE2(0, 1)); | |
393 | return _mm_unpacklo_pd(_mm_min_sd(x, y), _mm_max_sd(x, y)); | |
394 | } | |
395 | ||
396 | // can be used to multiply with a constant. For some special constants it doesn't need an extra | |
397 | // vector but can use a shift instead, basically encoding the factor in the instruction. | |
398 | template<typename IndexType, unsigned int constant> inline IndexType mulConst(const IndexType &x) { | |
399 | typedef VectorHelper<typename IndexType::EntryType> H; | |
400 | switch (constant) { | |
401 | case 0: return H::zero(); | |
402 | case 1: return x; | |
403 | case 2: return H::slli(x.data(), 1); | |
404 | case 4: return H::slli(x.data(), 2); | |
405 | case 8: return H::slli(x.data(), 3); | |
406 | case 16: return H::slli(x.data(), 4); | |
407 | case 32: return H::slli(x.data(), 5); | |
408 | case 64: return H::slli(x.data(), 6); | |
409 | case 128: return H::slli(x.data(), 7); | |
410 | case 256: return H::slli(x.data(), 8); | |
411 | case 512: return H::slli(x.data(), 9); | |
412 | case 1024: return H::slli(x.data(), 10); | |
413 | case 2048: return H::slli(x.data(), 11); | |
414 | } | |
415 | #ifndef VC_IMPL_SSE4_1 | |
416 | // without SSE 4.1 int multiplication is not so nice | |
417 | if (sizeof(typename IndexType::EntryType) == 4) { | |
418 | switch (constant) { | |
419 | case 3: return H::add( x.data() , H::slli(x.data(), 1)); | |
420 | case 5: return H::add( x.data() , H::slli(x.data(), 2)); | |
421 | case 9: return H::add( x.data() , H::slli(x.data(), 3)); | |
422 | case 17: return H::add( x.data() , H::slli(x.data(), 4)); | |
423 | case 33: return H::add( x.data() , H::slli(x.data(), 5)); | |
424 | case 65: return H::add( x.data() , H::slli(x.data(), 6)); | |
425 | case 129: return H::add( x.data() , H::slli(x.data(), 7)); | |
426 | case 257: return H::add( x.data() , H::slli(x.data(), 8)); | |
427 | case 513: return H::add( x.data() , H::slli(x.data(), 9)); | |
428 | case 1025: return H::add( x.data() , H::slli(x.data(), 10)); | |
429 | case 2049: return H::add( x.data() , H::slli(x.data(), 11)); | |
430 | case 6: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 2)); | |
431 | case 10: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 3)); | |
432 | case 18: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 4)); | |
433 | case 34: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 5)); | |
434 | case 66: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 6)); | |
435 | case 130: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 7)); | |
436 | case 258: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 8)); | |
437 | case 514: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 9)); | |
438 | case 1026: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 10)); | |
439 | case 2050: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 11)); | |
440 | case 12: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 3)); | |
441 | case 20: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 4)); | |
442 | case 36: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 5)); | |
443 | case 68: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 6)); | |
444 | case 132: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 7)); | |
445 | case 260: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 8)); | |
446 | case 516: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 9)); | |
447 | case 1028: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 10)); | |
448 | case 2052: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 11)); | |
449 | case 24: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 4)); | |
450 | case 40: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 5)); | |
451 | case 72: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 6)); | |
452 | case 136: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 7)); | |
453 | case 264: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 8)); | |
454 | case 520: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 9)); | |
455 | case 1032: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 10)); | |
456 | case 2056: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 11)); | |
457 | case 48: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 5)); | |
458 | case 80: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 6)); | |
459 | case 144: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 7)); | |
460 | case 272: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 8)); | |
461 | case 528: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 9)); | |
462 | case 1040: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 10)); | |
463 | case 2064: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 11)); | |
464 | case 96: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 6)); | |
465 | case 160: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 7)); | |
466 | case 288: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 8)); | |
467 | case 544: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 9)); | |
468 | case 1056: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 10)); | |
469 | case 2080: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 11)); | |
470 | case 192: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 7)); | |
471 | case 320: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 8)); | |
472 | case 576: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 9)); | |
473 | case 1088: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 10)); | |
474 | case 2112: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 11)); | |
475 | case 384: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 8)); | |
476 | case 640: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 9)); | |
477 | case 1152: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 10)); | |
478 | case 2176: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 11)); | |
479 | case 768: return H::add(H::slli(x.data(), 8), H::slli(x.data(), 9)); | |
480 | case 1280: return H::add(H::slli(x.data(), 8), H::slli(x.data(), 10)); | |
481 | case 2304: return H::add(H::slli(x.data(), 8), H::slli(x.data(), 11)); | |
482 | case 1536: return H::add(H::slli(x.data(), 9), H::slli(x.data(), 10)); | |
483 | case 2560: return H::add(H::slli(x.data(), 9), H::slli(x.data(), 11)); | |
484 | case 3072: return H::add(H::slli(x.data(),10), H::slli(x.data(), 11)); | |
485 | } | |
486 | } | |
487 | #endif | |
488 | return H::mul(x.data(), H::set(constant)); | |
489 | } | |
490 | } // namespace SSE | |
491 | } // namespace Vc |