]> git.uio.no Git - u/mrichter/AliRoot.git/blame - Vc/include/Vc/sse/vectorhelper.tcc
Vc package added (version 0.6.79-dev)
[u/mrichter/AliRoot.git] / Vc / include / Vc / sse / vectorhelper.tcc
CommitLineData
f22341db 1/* This file is part of the Vc library.
2
3 Copyright (C) 2009-2011 Matthias Kretz <kretz@kde.org>
4
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
9
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17
18*/
19
20#include "casts.h"
21#include <cstdlib>
22
23namespace Vc
24{
25namespace SSE
26{
27
28////////////////////////////////////////////////////////////////////////////////////////////////////
29// float_v
30template<> inline _M128 VectorHelper<_M128>::load(const float *x, AlignedFlag)
31{
32 return _mm_load_ps(x);
33}
34
35template<> inline _M128 VectorHelper<_M128>::load(const float *x, UnalignedFlag)
36{
37 return _mm_loadu_ps(x);
38}
39
40template<> inline _M128 VectorHelper<_M128>::load(const float *x, StreamingAndAlignedFlag)
41{
42 return _mm_stream_load(x);
43}
44
45template<> inline _M128 VectorHelper<_M128>::load(const float *x, StreamingAndUnalignedFlag)
46{
47 return load(x, Unaligned);
48}
49
50////////////////////////////////////////////////////////////////////////////////////////////////////
51// stores
52inline void VectorHelper<_M128>::store(float *mem, const VectorType x, AlignedFlag)
53{
54 _mm_store_ps(mem, x);
55}
56inline void VectorHelper<_M128>::store(float *mem, const VectorType x, UnalignedFlag)
57{
58 _mm_storeu_ps(mem, x);
59}
60inline void VectorHelper<_M128>::store(float *mem, const VectorType x, StreamingAndAlignedFlag)
61{
62 _mm_stream_ps(mem, x);
63}
64inline void VectorHelper<_M128>::store(float *mem, const VectorType x, StreamingAndUnalignedFlag)
65{
66 _mm_maskmoveu_si128(_mm_castps_si128(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
67}
68inline void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, AlignedFlag)
69{
70 _mm_store_ps(mem, _mm_blendv_ps(_mm_load_ps(mem), x, m));
71}
72inline void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, UnalignedFlag)
73{
74 _mm_storeu_ps(mem, _mm_blendv_ps(_mm_loadu_ps(mem), x, m));
75}
76inline void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag)
77{
78 _mm_maskmoveu_si128(_mm_castps_si128(x), _mm_castps_si128(m), reinterpret_cast<char *>(mem));
79}
80inline void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag)
81{
82 _mm_maskmoveu_si128(_mm_castps_si128(x), _mm_castps_si128(m), reinterpret_cast<char *>(mem));
83}
84
85////////////////////////////////////////////////////////////////////////////////////////////////////
86// sfloat_v
87template<> inline M256 VectorHelper<M256>::load(const float *x, AlignedFlag)
88{
89 return VectorType::create(_mm_load_ps(x), _mm_load_ps(x + 4));
90}
91
92template<> inline M256 VectorHelper<M256>::load(const float *x, UnalignedFlag)
93{
94 return VectorType::create(_mm_loadu_ps(x), _mm_loadu_ps(x + 4));
95}
96
97template<> inline M256 VectorHelper<M256>::load(const float *x, StreamingAndAlignedFlag)
98{
99 return VectorType::create(_mm_stream_load(&x[0]), _mm_stream_load(&x[4]));
100}
101
102template<> inline M256 VectorHelper<M256>::load(const float *x, StreamingAndUnalignedFlag)
103{
104 return load(x, Unaligned);
105}
106
107////////////////////////////////////////////////////////////////////////////////////////////////////
108// stores
109inline void VectorHelper<M256>::store(float *mem, const VectorType &x, AlignedFlag)
110{
111 _mm_store_ps(mem, x[0]);
112 _mm_store_ps(mem + 4, x[1]);
113}
114inline void VectorHelper<M256>::store(float *mem, const VectorType &x, UnalignedFlag)
115{
116 _mm_storeu_ps(mem, x[0]);
117 _mm_storeu_ps(mem + 4, x[1]);
118}
119inline void VectorHelper<M256>::store(float *mem, const VectorType &x, StreamingAndAlignedFlag)
120{
121 _mm_stream_ps(mem, x[0]);
122 _mm_stream_ps(mem + 4, x[1]);
123}
124inline void VectorHelper<M256>::store(float *mem, const VectorType &x, StreamingAndUnalignedFlag)
125{
126 _mm_maskmoveu_si128(_mm_castps_si128(x[0]), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
127 _mm_maskmoveu_si128(_mm_castps_si128(x[1]), _mm_setallone_si128(), reinterpret_cast<char *>(mem + 4));
128}
129inline void VectorHelper<M256>::store(float *mem, const VectorType &x, const VectorType &m, AlignedFlag)
130{
131 _mm_store_ps(mem, _mm_blendv_ps(_mm_load_ps(mem), x[0], m[0]));
132 _mm_store_ps(mem + 4, _mm_blendv_ps(_mm_load_ps(mem + 4), x[1], m[1]));
133}
134inline void VectorHelper<M256>::store(float *mem, const VectorType &x, const VectorType &m, UnalignedFlag)
135{
136 _mm_storeu_ps(mem, _mm_blendv_ps(_mm_loadu_ps(mem), x[0], m[0]));
137 _mm_storeu_ps(mem + 4, _mm_blendv_ps(_mm_loadu_ps(mem + 4), x[1], m[1]));
138}
139inline void VectorHelper<M256>::store(float *mem, const VectorType &x, const VectorType &m, StreamingAndAlignedFlag)
140{
141 _mm_maskmoveu_si128(_mm_castps_si128(x[0]), _mm_castps_si128(m[0]), reinterpret_cast<char *>(mem));
142 _mm_maskmoveu_si128(_mm_castps_si128(x[1]), _mm_castps_si128(m[1]), reinterpret_cast<char *>(mem + 4));
143}
144inline void VectorHelper<M256>::store(float *mem, const VectorType &x, const VectorType &m, StreamingAndUnalignedFlag)
145{
146 _mm_maskmoveu_si128(_mm_castps_si128(x[0]), _mm_castps_si128(m[0]), reinterpret_cast<char *>(mem));
147 _mm_maskmoveu_si128(_mm_castps_si128(x[1]), _mm_castps_si128(m[1]), reinterpret_cast<char *>(mem + 4));
148}
149
150////////////////////////////////////////////////////////////////////////////////////////////////////
151// double_v
152template<> inline _M128D VectorHelper<_M128D>::load(const double *x, AlignedFlag)
153{
154 return _mm_load_pd(x);
155}
156
157template<> inline _M128D VectorHelper<_M128D>::load(const double *x, UnalignedFlag)
158{
159 return _mm_loadu_pd(x);
160}
161
162template<> inline _M128D VectorHelper<_M128D>::load(const double *x, StreamingAndAlignedFlag)
163{
164 return _mm_stream_load(x);
165}
166
167template<> inline _M128D VectorHelper<_M128D>::load(const double *x, StreamingAndUnalignedFlag)
168{
169 return load(x, Unaligned);
170}
171
172////////////////////////////////////////////////////////////////////////////////////////////////////
173// stores
174inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, AlignedFlag)
175{
176 _mm_store_pd(mem, x);
177}
178inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, UnalignedFlag)
179{
180 _mm_storeu_pd(mem, x);
181}
182inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, StreamingAndAlignedFlag)
183{
184 _mm_stream_pd(mem, x);
185}
186inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, StreamingAndUnalignedFlag)
187{
188 _mm_maskmoveu_si128(_mm_castpd_si128(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
189}
190inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, AlignedFlag)
191{
192 _mm_store_pd(mem, _mm_blendv_pd(_mm_load_pd(mem), x, m));
193}
194inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, UnalignedFlag)
195{
196 _mm_storeu_pd(mem, _mm_blendv_pd(_mm_loadu_pd(mem), x, m));
197}
198inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag)
199{
200 _mm_maskmoveu_si128(_mm_castpd_si128(x), _mm_castpd_si128(m), reinterpret_cast<char *>(mem));
201}
202inline void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag)
203{
204 _mm_maskmoveu_si128(_mm_castpd_si128(x), _mm_castpd_si128(m), reinterpret_cast<char *>(mem));
205}
206
207////////////////////////////////////////////////////////////////////////////////////////////////////
208// int_v, uint_v, short_v, ushort_v
209template<typename T> inline _M128I VectorHelper<_M128I>::load(const T *x, AlignedFlag)
210{
211 return _mm_load_si128(reinterpret_cast<const VectorType *>(x));
212}
213
214template<typename T> inline _M128I VectorHelper<_M128I>::load(const T *x, UnalignedFlag)
215{
216 return _mm_loadu_si128(reinterpret_cast<const VectorType *>(x));
217}
218
219template<typename T> inline _M128I VectorHelper<_M128I>::load(const T *x, StreamingAndAlignedFlag)
220{
221 return _mm_stream_load(x);
222}
223
224template<typename T> inline _M128I VectorHelper<_M128I>::load(const T *x, StreamingAndUnalignedFlag)
225{
226 return load(x, Unaligned);
227}
228
229////////////////////////////////////////////////////////////////////////////////////////////////////
230// stores
231template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, AlignedFlag)
232{
233 _mm_store_si128(reinterpret_cast<VectorType *>(mem), x);
234}
235template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, UnalignedFlag)
236{
237 _mm_storeu_si128(reinterpret_cast<VectorType *>(mem), x);
238}
239template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, StreamingAndAlignedFlag)
240{
241 _mm_stream_si128(reinterpret_cast<VectorType *>(mem), x);
242}
243template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, StreamingAndUnalignedFlag)
244{
245 _mm_maskmoveu_si128(x, _mm_setallone_si128(), reinterpret_cast<char *>(mem));
246}
247template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, AlignedFlag align)
248{
249 store(mem, _mm_blendv_epi8(load(mem, align), x, m), align);
250}
251template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, UnalignedFlag align)
252{
253 store(mem, _mm_blendv_epi8(load(mem, align), x, m), align);
254}
255template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag)
256{
257 _mm_maskmoveu_si128(x, m, reinterpret_cast<char *>(mem));
258}
259template<typename T> inline void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag)
260{
261 _mm_maskmoveu_si128(x, m, reinterpret_cast<char *>(mem));
262}
263
264 template<> inline _M128I SortHelper<_M128I, 8>::sort(_M128I x)
265 {
266 _M128I lo, hi, y;
267 // sort pairs
268 y = _mm_shufflelo_epi16(_mm_shufflehi_epi16(x, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
269 lo = _mm_min_epi16(x, y);
270 hi = _mm_max_epi16(x, y);
271 x = _mm_blend_epi16(lo, hi, 0xaa);
272
273 // merge left and right quads
274 y = _mm_shufflelo_epi16(_mm_shufflehi_epi16(x, _MM_SHUFFLE(0, 1, 2, 3)), _MM_SHUFFLE(0, 1, 2, 3));
275 lo = _mm_min_epi16(x, y);
276 hi = _mm_max_epi16(x, y);
277 x = _mm_blend_epi16(lo, hi, 0xcc);
278 y = _mm_srli_si128(x, 2);
279 lo = _mm_min_epi16(x, y);
280 hi = _mm_max_epi16(x, y);
281 x = _mm_blend_epi16(lo, _mm_slli_si128(hi, 2), 0xaa);
282
283 // merge quads into octs
284 y = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2));
285 y = _mm_shufflelo_epi16(y, _MM_SHUFFLE(0, 1, 2, 3));
286 lo = _mm_min_epi16(x, y);
287 hi = _mm_max_epi16(x, y);
288
289 x = _mm_unpacklo_epi16(lo, hi);
290 y = _mm_srli_si128(x, 8);
291 lo = _mm_min_epi16(x, y);
292 hi = _mm_max_epi16(x, y);
293
294 x = _mm_unpacklo_epi16(lo, hi);
295 y = _mm_srli_si128(x, 8);
296 lo = _mm_min_epi16(x, y);
297 hi = _mm_max_epi16(x, y);
298
299 return _mm_unpacklo_epi16(lo, hi);
300 }
301 template<> inline _M128I SortHelper<_M128I, 4>::sort(_M128I x)
302 {
303 /*
304 // in 16,67% of the cases the merge can be replaced by an append
305
306 // x = [a b c d]
307 // y = [c d a b]
308 _M128I y = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2));
309 _M128I l = _mm_min_epi32(x, y); // min[ac bd ac bd]
310 _M128I h = _mm_max_epi32(x, y); // max[ac bd ac bd]
311 if (IS_UNLIKELY(_mm_cvtsi128_si32(h) <= l[1])) { // l[0] < h[0] < l[1] < h[1]
312 return _mm_unpacklo_epi32(l, h);
313 }
314 // h[0] > l[1]
315 */
316
317 // sort pairs
318 _M128I y = _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1));
319 _M128I l = _mm_min_epi32(x, y);
320 _M128I h = _mm_max_epi32(x, y);
321 x = _mm_unpacklo_epi32(l, h);
322 y = _mm_unpackhi_epi32(h, l);
323
324 // sort quads
325 l = _mm_min_epi32(x, y);
326 h = _mm_max_epi32(x, y);
327 x = _mm_unpacklo_epi32(l, h);
328 y = _mm_unpackhi_epi64(x, x);
329
330 l = _mm_min_epi32(x, y);
331 h = _mm_max_epi32(x, y);
332 return _mm_unpacklo_epi32(l, h);
333 }
334 template<> inline _M128 SortHelper<_M128, 4>::sort(_M128 x)
335 {
336 _M128 y = _mm_shuffle_ps(x, x, _MM_SHUFFLE(2, 3, 0, 1));
337 _M128 l = _mm_min_ps(x, y);
338 _M128 h = _mm_max_ps(x, y);
339 x = _mm_unpacklo_ps(l, h);
340 y = _mm_unpackhi_ps(h, l);
341
342 l = _mm_min_ps(x, y);
343 h = _mm_max_ps(x, y);
344 x = _mm_unpacklo_ps(l, h);
345 y = _mm_movehl_ps(x, x);
346
347 l = _mm_min_ps(x, y);
348 h = _mm_max_ps(x, y);
349 return _mm_unpacklo_ps(l, h);
350//X _M128 k = _mm_cmpgt_ps(x, y);
351//X k = _mm_shuffle_ps(k, k, _MM_SHUFFLE(2, 2, 0, 0));
352//X x = _mm_blendv_ps(x, y, k);
353//X y = _mm_shuffle_ps(x, x, _MM_SHUFFLE(1, 0, 3, 2));
354//X k = _mm_cmpgt_ps(x, y);
355//X k = _mm_shuffle_ps(k, k, _MM_SHUFFLE(1, 0, 1, 0));
356//X x = _mm_blendv_ps(x, y, k);
357//X y = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 1, 2, 0));
358//X k = _mm_cmpgt_ps(x, y);
359//X k = _mm_shuffle_ps(k, k, _MM_SHUFFLE(0, 1, 1, 0));
360//X return _mm_blendv_ps(x, y, k);
361 }
362 template<> inline M256 SortHelper<M256, 8>::sort(const M256 &_x)
363 {
364 M256 x = _x;
365 typedef SortHelper<_M128, 4> H;
366
367 _M128 a, b, l, h;
368 a = H::sort(x[0]);
369 b = H::sort(x[1]);
370
371 // merge
372 b = _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 1, 2, 3));
373 l = _mm_min_ps(a, b);
374 h = _mm_max_ps(a, b);
375
376 a = _mm_unpacklo_ps(l, h);
377 b = _mm_unpackhi_ps(l, h);
378 l = _mm_min_ps(a, b);
379 h = _mm_max_ps(a, b);
380
381 a = _mm_unpacklo_ps(l, h);
382 b = _mm_unpackhi_ps(l, h);
383 l = _mm_min_ps(a, b);
384 h = _mm_max_ps(a, b);
385
386 x[0] = _mm_unpacklo_ps(l, h);
387 x[1] = _mm_unpackhi_ps(l, h);
388 return x;
389 }
390 template<> inline _M128D SortHelper<_M128D, 2>::sort(_M128D x)
391 {
392 const _M128D y = _mm_shuffle_pd(x, x, _MM_SHUFFLE2(0, 1));
393 return _mm_unpacklo_pd(_mm_min_sd(x, y), _mm_max_sd(x, y));
394 }
395
396 // can be used to multiply with a constant. For some special constants it doesn't need an extra
397 // vector but can use a shift instead, basically encoding the factor in the instruction.
398 template<typename IndexType, unsigned int constant> inline IndexType mulConst(const IndexType &x) {
399 typedef VectorHelper<typename IndexType::EntryType> H;
400 switch (constant) {
401 case 0: return H::zero();
402 case 1: return x;
403 case 2: return H::slli(x.data(), 1);
404 case 4: return H::slli(x.data(), 2);
405 case 8: return H::slli(x.data(), 3);
406 case 16: return H::slli(x.data(), 4);
407 case 32: return H::slli(x.data(), 5);
408 case 64: return H::slli(x.data(), 6);
409 case 128: return H::slli(x.data(), 7);
410 case 256: return H::slli(x.data(), 8);
411 case 512: return H::slli(x.data(), 9);
412 case 1024: return H::slli(x.data(), 10);
413 case 2048: return H::slli(x.data(), 11);
414 }
415#ifndef VC_IMPL_SSE4_1
416 // without SSE 4.1 int multiplication is not so nice
417 if (sizeof(typename IndexType::EntryType) == 4) {
418 switch (constant) {
419 case 3: return H::add( x.data() , H::slli(x.data(), 1));
420 case 5: return H::add( x.data() , H::slli(x.data(), 2));
421 case 9: return H::add( x.data() , H::slli(x.data(), 3));
422 case 17: return H::add( x.data() , H::slli(x.data(), 4));
423 case 33: return H::add( x.data() , H::slli(x.data(), 5));
424 case 65: return H::add( x.data() , H::slli(x.data(), 6));
425 case 129: return H::add( x.data() , H::slli(x.data(), 7));
426 case 257: return H::add( x.data() , H::slli(x.data(), 8));
427 case 513: return H::add( x.data() , H::slli(x.data(), 9));
428 case 1025: return H::add( x.data() , H::slli(x.data(), 10));
429 case 2049: return H::add( x.data() , H::slli(x.data(), 11));
430 case 6: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 2));
431 case 10: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 3));
432 case 18: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 4));
433 case 34: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 5));
434 case 66: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 6));
435 case 130: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 7));
436 case 258: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 8));
437 case 514: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 9));
438 case 1026: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 10));
439 case 2050: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 11));
440 case 12: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 3));
441 case 20: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 4));
442 case 36: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 5));
443 case 68: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 6));
444 case 132: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 7));
445 case 260: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 8));
446 case 516: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 9));
447 case 1028: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 10));
448 case 2052: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 11));
449 case 24: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 4));
450 case 40: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 5));
451 case 72: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 6));
452 case 136: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 7));
453 case 264: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 8));
454 case 520: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 9));
455 case 1032: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 10));
456 case 2056: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 11));
457 case 48: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 5));
458 case 80: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 6));
459 case 144: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 7));
460 case 272: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 8));
461 case 528: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 9));
462 case 1040: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 10));
463 case 2064: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 11));
464 case 96: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 6));
465 case 160: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 7));
466 case 288: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 8));
467 case 544: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 9));
468 case 1056: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 10));
469 case 2080: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 11));
470 case 192: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 7));
471 case 320: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 8));
472 case 576: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 9));
473 case 1088: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 10));
474 case 2112: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 11));
475 case 384: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 8));
476 case 640: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 9));
477 case 1152: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 10));
478 case 2176: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 11));
479 case 768: return H::add(H::slli(x.data(), 8), H::slli(x.data(), 9));
480 case 1280: return H::add(H::slli(x.data(), 8), H::slli(x.data(), 10));
481 case 2304: return H::add(H::slli(x.data(), 8), H::slli(x.data(), 11));
482 case 1536: return H::add(H::slli(x.data(), 9), H::slli(x.data(), 10));
483 case 2560: return H::add(H::slli(x.data(), 9), H::slli(x.data(), 11));
484 case 3072: return H::add(H::slli(x.data(),10), H::slli(x.data(), 11));
485 }
486 }
487#endif
488 return H::mul(x.data(), H::set(constant));
489 }
490} // namespace SSE
491} // namespace Vc