]> git.uio.no Git - u/mrichter/AliRoot.git/blame - Vc/include/Vc/sse/vectorhelper.tcc
update to Vc 0.7.3-dev
[u/mrichter/AliRoot.git] / Vc / include / Vc / sse / vectorhelper.tcc
CommitLineData
f22341db 1/* This file is part of the Vc library.
2
3 Copyright (C) 2009-2011 Matthias Kretz <kretz@kde.org>
4
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
9
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17
18*/
19
20#include "casts.h"
21#include <cstdlib>
22
c017a39f 23namespace AliRoot {
f22341db 24namespace Vc
25{
26namespace SSE
27{
28
29////////////////////////////////////////////////////////////////////////////////////////////////////
30// float_v
c017a39f 31template<> Vc_ALWAYS_INLINE Vc_PURE _M128 VectorHelper<_M128>::load(const float *x, AlignedFlag)
f22341db 32{
33 return _mm_load_ps(x);
34}
35
c017a39f 36template<> Vc_ALWAYS_INLINE Vc_PURE _M128 VectorHelper<_M128>::load(const float *x, UnalignedFlag)
f22341db 37{
38 return _mm_loadu_ps(x);
39}
40
c017a39f 41template<> Vc_ALWAYS_INLINE Vc_PURE _M128 VectorHelper<_M128>::load(const float *x, StreamingAndAlignedFlag)
f22341db 42{
43 return _mm_stream_load(x);
44}
45
c017a39f 46template<> Vc_ALWAYS_INLINE Vc_PURE _M128 VectorHelper<_M128>::load(const float *x, StreamingAndUnalignedFlag)
f22341db 47{
48 return load(x, Unaligned);
49}
50
51////////////////////////////////////////////////////////////////////////////////////////////////////
52// stores
c017a39f 53Vc_ALWAYS_INLINE void VectorHelper<_M128>::store(float *mem, const VectorType x, AlignedFlag)
f22341db 54{
55 _mm_store_ps(mem, x);
56}
c017a39f 57Vc_ALWAYS_INLINE void VectorHelper<_M128>::store(float *mem, const VectorType x, UnalignedFlag)
f22341db 58{
59 _mm_storeu_ps(mem, x);
60}
c017a39f 61Vc_ALWAYS_INLINE void VectorHelper<_M128>::store(float *mem, const VectorType x, StreamingAndAlignedFlag)
f22341db 62{
63 _mm_stream_ps(mem, x);
64}
c017a39f 65Vc_ALWAYS_INLINE void VectorHelper<_M128>::store(float *mem, const VectorType x, StreamingAndUnalignedFlag)
f22341db 66{
67 _mm_maskmoveu_si128(_mm_castps_si128(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
68}
c017a39f 69Vc_ALWAYS_INLINE void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, AlignedFlag)
f22341db 70{
71 _mm_store_ps(mem, _mm_blendv_ps(_mm_load_ps(mem), x, m));
72}
c017a39f 73Vc_ALWAYS_INLINE void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, UnalignedFlag)
f22341db 74{
75 _mm_storeu_ps(mem, _mm_blendv_ps(_mm_loadu_ps(mem), x, m));
76}
c017a39f 77Vc_ALWAYS_INLINE void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag)
f22341db 78{
79 _mm_maskmoveu_si128(_mm_castps_si128(x), _mm_castps_si128(m), reinterpret_cast<char *>(mem));
80}
c017a39f 81Vc_ALWAYS_INLINE void VectorHelper<_M128>::store(float *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag)
f22341db 82{
83 _mm_maskmoveu_si128(_mm_castps_si128(x), _mm_castps_si128(m), reinterpret_cast<char *>(mem));
84}
85
86////////////////////////////////////////////////////////////////////////////////////////////////////
87// sfloat_v
c017a39f 88template<> Vc_ALWAYS_INLINE Vc_PURE M256 VectorHelper<M256>::load(const float *x, AlignedFlag)
f22341db 89{
90 return VectorType::create(_mm_load_ps(x), _mm_load_ps(x + 4));
91}
92
c017a39f 93template<> Vc_ALWAYS_INLINE Vc_PURE M256 VectorHelper<M256>::load(const float *x, UnalignedFlag)
f22341db 94{
95 return VectorType::create(_mm_loadu_ps(x), _mm_loadu_ps(x + 4));
96}
97
c017a39f 98template<> Vc_ALWAYS_INLINE Vc_PURE M256 VectorHelper<M256>::load(const float *x, StreamingAndAlignedFlag)
f22341db 99{
100 return VectorType::create(_mm_stream_load(&x[0]), _mm_stream_load(&x[4]));
101}
102
c017a39f 103template<> Vc_ALWAYS_INLINE Vc_PURE M256 VectorHelper<M256>::load(const float *x, StreamingAndUnalignedFlag)
f22341db 104{
105 return load(x, Unaligned);
106}
107
108////////////////////////////////////////////////////////////////////////////////////////////////////
109// stores
c017a39f 110Vc_ALWAYS_INLINE void VectorHelper<M256>::store(float *mem, VectorTypeArg x, AlignedFlag)
f22341db 111{
112 _mm_store_ps(mem, x[0]);
113 _mm_store_ps(mem + 4, x[1]);
114}
c017a39f 115Vc_ALWAYS_INLINE void VectorHelper<M256>::store(float *mem, VectorTypeArg x, UnalignedFlag)
f22341db 116{
117 _mm_storeu_ps(mem, x[0]);
118 _mm_storeu_ps(mem + 4, x[1]);
119}
c017a39f 120Vc_ALWAYS_INLINE void VectorHelper<M256>::store(float *mem, VectorTypeArg x, StreamingAndAlignedFlag)
f22341db 121{
122 _mm_stream_ps(mem, x[0]);
123 _mm_stream_ps(mem + 4, x[1]);
124}
c017a39f 125Vc_ALWAYS_INLINE void VectorHelper<M256>::store(float *mem, VectorTypeArg x, StreamingAndUnalignedFlag)
f22341db 126{
127 _mm_maskmoveu_si128(_mm_castps_si128(x[0]), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
128 _mm_maskmoveu_si128(_mm_castps_si128(x[1]), _mm_setallone_si128(), reinterpret_cast<char *>(mem + 4));
129}
c017a39f 130Vc_ALWAYS_INLINE void VectorHelper<M256>::store(float *mem, VectorTypeArg x, VectorTypeArg m, AlignedFlag)
f22341db 131{
132 _mm_store_ps(mem, _mm_blendv_ps(_mm_load_ps(mem), x[0], m[0]));
133 _mm_store_ps(mem + 4, _mm_blendv_ps(_mm_load_ps(mem + 4), x[1], m[1]));
134}
c017a39f 135Vc_ALWAYS_INLINE void VectorHelper<M256>::store(float *mem, VectorTypeArg x, VectorTypeArg m, UnalignedFlag)
f22341db 136{
137 _mm_storeu_ps(mem, _mm_blendv_ps(_mm_loadu_ps(mem), x[0], m[0]));
138 _mm_storeu_ps(mem + 4, _mm_blendv_ps(_mm_loadu_ps(mem + 4), x[1], m[1]));
139}
c017a39f 140Vc_ALWAYS_INLINE void VectorHelper<M256>::store(float *mem, VectorTypeArg x, VectorTypeArg m, StreamingAndAlignedFlag)
f22341db 141{
142 _mm_maskmoveu_si128(_mm_castps_si128(x[0]), _mm_castps_si128(m[0]), reinterpret_cast<char *>(mem));
143 _mm_maskmoveu_si128(_mm_castps_si128(x[1]), _mm_castps_si128(m[1]), reinterpret_cast<char *>(mem + 4));
144}
c017a39f 145Vc_ALWAYS_INLINE void VectorHelper<M256>::store(float *mem, VectorTypeArg x, VectorTypeArg m, StreamingAndUnalignedFlag)
f22341db 146{
147 _mm_maskmoveu_si128(_mm_castps_si128(x[0]), _mm_castps_si128(m[0]), reinterpret_cast<char *>(mem));
148 _mm_maskmoveu_si128(_mm_castps_si128(x[1]), _mm_castps_si128(m[1]), reinterpret_cast<char *>(mem + 4));
149}
150
151////////////////////////////////////////////////////////////////////////////////////////////////////
152// double_v
c017a39f 153template<> Vc_ALWAYS_INLINE Vc_PURE _M128D VectorHelper<_M128D>::load(const double *x, AlignedFlag)
f22341db 154{
155 return _mm_load_pd(x);
156}
157
c017a39f 158template<> Vc_ALWAYS_INLINE Vc_PURE _M128D VectorHelper<_M128D>::load(const double *x, UnalignedFlag)
f22341db 159{
160 return _mm_loadu_pd(x);
161}
162
c017a39f 163template<> Vc_ALWAYS_INLINE Vc_PURE _M128D VectorHelper<_M128D>::load(const double *x, StreamingAndAlignedFlag)
f22341db 164{
165 return _mm_stream_load(x);
166}
167
c017a39f 168template<> Vc_ALWAYS_INLINE Vc_PURE _M128D VectorHelper<_M128D>::load(const double *x, StreamingAndUnalignedFlag)
f22341db 169{
170 return load(x, Unaligned);
171}
172
173////////////////////////////////////////////////////////////////////////////////////////////////////
174// stores
c017a39f 175Vc_ALWAYS_INLINE void VectorHelper<_M128D>::store(double *mem, const VectorType x, AlignedFlag)
f22341db 176{
177 _mm_store_pd(mem, x);
178}
c017a39f 179Vc_ALWAYS_INLINE void VectorHelper<_M128D>::store(double *mem, const VectorType x, UnalignedFlag)
f22341db 180{
181 _mm_storeu_pd(mem, x);
182}
c017a39f 183Vc_ALWAYS_INLINE void VectorHelper<_M128D>::store(double *mem, const VectorType x, StreamingAndAlignedFlag)
f22341db 184{
185 _mm_stream_pd(mem, x);
186}
c017a39f 187Vc_ALWAYS_INLINE void VectorHelper<_M128D>::store(double *mem, const VectorType x, StreamingAndUnalignedFlag)
f22341db 188{
189 _mm_maskmoveu_si128(_mm_castpd_si128(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
190}
c017a39f 191Vc_ALWAYS_INLINE void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, AlignedFlag)
f22341db 192{
193 _mm_store_pd(mem, _mm_blendv_pd(_mm_load_pd(mem), x, m));
194}
c017a39f 195Vc_ALWAYS_INLINE void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, UnalignedFlag)
f22341db 196{
197 _mm_storeu_pd(mem, _mm_blendv_pd(_mm_loadu_pd(mem), x, m));
198}
c017a39f 199Vc_ALWAYS_INLINE void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag)
f22341db 200{
201 _mm_maskmoveu_si128(_mm_castpd_si128(x), _mm_castpd_si128(m), reinterpret_cast<char *>(mem));
202}
c017a39f 203Vc_ALWAYS_INLINE void VectorHelper<_M128D>::store(double *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag)
f22341db 204{
205 _mm_maskmoveu_si128(_mm_castpd_si128(x), _mm_castpd_si128(m), reinterpret_cast<char *>(mem));
206}
207
208////////////////////////////////////////////////////////////////////////////////////////////////////
209// int_v, uint_v, short_v, ushort_v
c017a39f 210template<typename T> Vc_ALWAYS_INLINE Vc_PURE _M128I VectorHelper<_M128I>::load(const T *x, AlignedFlag)
f22341db 211{
212 return _mm_load_si128(reinterpret_cast<const VectorType *>(x));
213}
214
c017a39f 215template<typename T> Vc_ALWAYS_INLINE Vc_PURE _M128I VectorHelper<_M128I>::load(const T *x, UnalignedFlag)
f22341db 216{
217 return _mm_loadu_si128(reinterpret_cast<const VectorType *>(x));
218}
219
c017a39f 220template<typename T> Vc_ALWAYS_INLINE Vc_PURE _M128I VectorHelper<_M128I>::load(const T *x, StreamingAndAlignedFlag)
f22341db 221{
222 return _mm_stream_load(x);
223}
224
c017a39f 225template<typename T> Vc_ALWAYS_INLINE Vc_PURE _M128I VectorHelper<_M128I>::load(const T *x, StreamingAndUnalignedFlag)
f22341db 226{
227 return load(x, Unaligned);
228}
229
230////////////////////////////////////////////////////////////////////////////////////////////////////
231// stores
c017a39f 232template<typename T> Vc_ALWAYS_INLINE void VectorHelper<_M128I>::store(T *mem, const VectorType x, AlignedFlag)
f22341db 233{
234 _mm_store_si128(reinterpret_cast<VectorType *>(mem), x);
235}
c017a39f 236template<typename T> Vc_ALWAYS_INLINE void VectorHelper<_M128I>::store(T *mem, const VectorType x, UnalignedFlag)
f22341db 237{
238 _mm_storeu_si128(reinterpret_cast<VectorType *>(mem), x);
239}
c017a39f 240template<typename T> Vc_ALWAYS_INLINE void VectorHelper<_M128I>::store(T *mem, const VectorType x, StreamingAndAlignedFlag)
f22341db 241{
242 _mm_stream_si128(reinterpret_cast<VectorType *>(mem), x);
243}
c017a39f 244template<typename T> Vc_ALWAYS_INLINE void VectorHelper<_M128I>::store(T *mem, const VectorType x, StreamingAndUnalignedFlag)
f22341db 245{
246 _mm_maskmoveu_si128(x, _mm_setallone_si128(), reinterpret_cast<char *>(mem));
247}
c017a39f 248template<typename T> Vc_ALWAYS_INLINE void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, AlignedFlag align)
f22341db 249{
250 store(mem, _mm_blendv_epi8(load(mem, align), x, m), align);
251}
c017a39f 252template<typename T> Vc_ALWAYS_INLINE void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, UnalignedFlag align)
f22341db 253{
254 store(mem, _mm_blendv_epi8(load(mem, align), x, m), align);
255}
c017a39f 256template<typename T> Vc_ALWAYS_INLINE void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag)
f22341db 257{
258 _mm_maskmoveu_si128(x, m, reinterpret_cast<char *>(mem));
259}
c017a39f 260template<typename T> Vc_ALWAYS_INLINE void VectorHelper<_M128I>::store(T *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag)
f22341db 261{
262 _mm_maskmoveu_si128(x, m, reinterpret_cast<char *>(mem));
263}
264
c017a39f 265 template<> inline Vc_CONST _M128I SortHelper<_M128I, 8>::sort(_M128I x)
f22341db 266 {
267 _M128I lo, hi, y;
268 // sort pairs
c017a39f 269 y = Mem::permute<X1, X0, X3, X2, X5, X4, X7, X6>(x);
f22341db 270 lo = _mm_min_epi16(x, y);
271 hi = _mm_max_epi16(x, y);
272 x = _mm_blend_epi16(lo, hi, 0xaa);
273
274 // merge left and right quads
c017a39f 275 y = Mem::permute<X3, X2, X1, X0, X7, X6, X5, X4>(x);
f22341db 276 lo = _mm_min_epi16(x, y);
277 hi = _mm_max_epi16(x, y);
278 x = _mm_blend_epi16(lo, hi, 0xcc);
279 y = _mm_srli_si128(x, 2);
280 lo = _mm_min_epi16(x, y);
281 hi = _mm_max_epi16(x, y);
282 x = _mm_blend_epi16(lo, _mm_slli_si128(hi, 2), 0xaa);
283
284 // merge quads into octs
285 y = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2));
286 y = _mm_shufflelo_epi16(y, _MM_SHUFFLE(0, 1, 2, 3));
287 lo = _mm_min_epi16(x, y);
288 hi = _mm_max_epi16(x, y);
289
290 x = _mm_unpacklo_epi16(lo, hi);
291 y = _mm_srli_si128(x, 8);
292 lo = _mm_min_epi16(x, y);
293 hi = _mm_max_epi16(x, y);
294
295 x = _mm_unpacklo_epi16(lo, hi);
296 y = _mm_srli_si128(x, 8);
297 lo = _mm_min_epi16(x, y);
298 hi = _mm_max_epi16(x, y);
299
300 return _mm_unpacklo_epi16(lo, hi);
301 }
c017a39f 302 template<> inline Vc_CONST _M128I SortHelper<_M128I, 4>::sort(_M128I x)
f22341db 303 {
304 /*
305 // in 16,67% of the cases the merge can be replaced by an append
306
307 // x = [a b c d]
308 // y = [c d a b]
309 _M128I y = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2));
310 _M128I l = _mm_min_epi32(x, y); // min[ac bd ac bd]
311 _M128I h = _mm_max_epi32(x, y); // max[ac bd ac bd]
312 if (IS_UNLIKELY(_mm_cvtsi128_si32(h) <= l[1])) { // l[0] < h[0] < l[1] < h[1]
313 return _mm_unpacklo_epi32(l, h);
314 }
315 // h[0] > l[1]
316 */
317
318 // sort pairs
319 _M128I y = _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1));
320 _M128I l = _mm_min_epi32(x, y);
321 _M128I h = _mm_max_epi32(x, y);
322 x = _mm_unpacklo_epi32(l, h);
323 y = _mm_unpackhi_epi32(h, l);
324
325 // sort quads
326 l = _mm_min_epi32(x, y);
327 h = _mm_max_epi32(x, y);
328 x = _mm_unpacklo_epi32(l, h);
329 y = _mm_unpackhi_epi64(x, x);
330
331 l = _mm_min_epi32(x, y);
332 h = _mm_max_epi32(x, y);
333 return _mm_unpacklo_epi32(l, h);
334 }
c017a39f 335 template<> inline Vc_CONST _M128 SortHelper<_M128, 4>::sort(_M128 x)
f22341db 336 {
337 _M128 y = _mm_shuffle_ps(x, x, _MM_SHUFFLE(2, 3, 0, 1));
338 _M128 l = _mm_min_ps(x, y);
339 _M128 h = _mm_max_ps(x, y);
340 x = _mm_unpacklo_ps(l, h);
341 y = _mm_unpackhi_ps(h, l);
342
343 l = _mm_min_ps(x, y);
344 h = _mm_max_ps(x, y);
345 x = _mm_unpacklo_ps(l, h);
346 y = _mm_movehl_ps(x, x);
347
348 l = _mm_min_ps(x, y);
349 h = _mm_max_ps(x, y);
350 return _mm_unpacklo_ps(l, h);
351//X _M128 k = _mm_cmpgt_ps(x, y);
352//X k = _mm_shuffle_ps(k, k, _MM_SHUFFLE(2, 2, 0, 0));
353//X x = _mm_blendv_ps(x, y, k);
354//X y = _mm_shuffle_ps(x, x, _MM_SHUFFLE(1, 0, 3, 2));
355//X k = _mm_cmpgt_ps(x, y);
356//X k = _mm_shuffle_ps(k, k, _MM_SHUFFLE(1, 0, 1, 0));
357//X x = _mm_blendv_ps(x, y, k);
358//X y = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 1, 2, 0));
359//X k = _mm_cmpgt_ps(x, y);
360//X k = _mm_shuffle_ps(k, k, _MM_SHUFFLE(0, 1, 1, 0));
361//X return _mm_blendv_ps(x, y, k);
362 }
c017a39f 363 template<> inline Vc_PURE M256 SortHelper<M256, 8>::sort(const M256 &_x)
f22341db 364 {
365 M256 x = _x;
366 typedef SortHelper<_M128, 4> H;
367
368 _M128 a, b, l, h;
369 a = H::sort(x[0]);
370 b = H::sort(x[1]);
371
372 // merge
373 b = _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 1, 2, 3));
374 l = _mm_min_ps(a, b);
375 h = _mm_max_ps(a, b);
376
377 a = _mm_unpacklo_ps(l, h);
378 b = _mm_unpackhi_ps(l, h);
379 l = _mm_min_ps(a, b);
380 h = _mm_max_ps(a, b);
381
382 a = _mm_unpacklo_ps(l, h);
383 b = _mm_unpackhi_ps(l, h);
384 l = _mm_min_ps(a, b);
385 h = _mm_max_ps(a, b);
386
387 x[0] = _mm_unpacklo_ps(l, h);
388 x[1] = _mm_unpackhi_ps(l, h);
389 return x;
390 }
c017a39f 391 template<> inline Vc_CONST _M128D SortHelper<_M128D, 2>::sort(_M128D x)
f22341db 392 {
393 const _M128D y = _mm_shuffle_pd(x, x, _MM_SHUFFLE2(0, 1));
394 return _mm_unpacklo_pd(_mm_min_sd(x, y), _mm_max_sd(x, y));
395 }
396
397 // can be used to multiply with a constant. For some special constants it doesn't need an extra
398 // vector but can use a shift instead, basically encoding the factor in the instruction.
c017a39f 399 template<typename IndexType, unsigned int constant> Vc_ALWAYS_INLINE Vc_CONST IndexType mulConst(const IndexType x) {
f22341db 400 typedef VectorHelper<typename IndexType::EntryType> H;
401 switch (constant) {
402 case 0: return H::zero();
403 case 1: return x;
404 case 2: return H::slli(x.data(), 1);
405 case 4: return H::slli(x.data(), 2);
406 case 8: return H::slli(x.data(), 3);
407 case 16: return H::slli(x.data(), 4);
408 case 32: return H::slli(x.data(), 5);
409 case 64: return H::slli(x.data(), 6);
410 case 128: return H::slli(x.data(), 7);
411 case 256: return H::slli(x.data(), 8);
412 case 512: return H::slli(x.data(), 9);
413 case 1024: return H::slli(x.data(), 10);
414 case 2048: return H::slli(x.data(), 11);
415 }
416#ifndef VC_IMPL_SSE4_1
417 // without SSE 4.1 int multiplication is not so nice
418 if (sizeof(typename IndexType::EntryType) == 4) {
419 switch (constant) {
420 case 3: return H::add( x.data() , H::slli(x.data(), 1));
421 case 5: return H::add( x.data() , H::slli(x.data(), 2));
422 case 9: return H::add( x.data() , H::slli(x.data(), 3));
423 case 17: return H::add( x.data() , H::slli(x.data(), 4));
424 case 33: return H::add( x.data() , H::slli(x.data(), 5));
425 case 65: return H::add( x.data() , H::slli(x.data(), 6));
426 case 129: return H::add( x.data() , H::slli(x.data(), 7));
427 case 257: return H::add( x.data() , H::slli(x.data(), 8));
428 case 513: return H::add( x.data() , H::slli(x.data(), 9));
429 case 1025: return H::add( x.data() , H::slli(x.data(), 10));
430 case 2049: return H::add( x.data() , H::slli(x.data(), 11));
431 case 6: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 2));
432 case 10: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 3));
433 case 18: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 4));
434 case 34: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 5));
435 case 66: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 6));
436 case 130: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 7));
437 case 258: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 8));
438 case 514: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 9));
439 case 1026: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 10));
440 case 2050: return H::add(H::slli(x.data(), 1), H::slli(x.data(), 11));
441 case 12: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 3));
442 case 20: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 4));
443 case 36: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 5));
444 case 68: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 6));
445 case 132: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 7));
446 case 260: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 8));
447 case 516: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 9));
448 case 1028: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 10));
449 case 2052: return H::add(H::slli(x.data(), 2), H::slli(x.data(), 11));
450 case 24: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 4));
451 case 40: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 5));
452 case 72: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 6));
453 case 136: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 7));
454 case 264: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 8));
455 case 520: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 9));
456 case 1032: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 10));
457 case 2056: return H::add(H::slli(x.data(), 3), H::slli(x.data(), 11));
458 case 48: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 5));
459 case 80: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 6));
460 case 144: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 7));
461 case 272: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 8));
462 case 528: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 9));
463 case 1040: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 10));
464 case 2064: return H::add(H::slli(x.data(), 4), H::slli(x.data(), 11));
465 case 96: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 6));
466 case 160: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 7));
467 case 288: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 8));
468 case 544: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 9));
469 case 1056: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 10));
470 case 2080: return H::add(H::slli(x.data(), 5), H::slli(x.data(), 11));
471 case 192: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 7));
472 case 320: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 8));
473 case 576: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 9));
474 case 1088: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 10));
475 case 2112: return H::add(H::slli(x.data(), 6), H::slli(x.data(), 11));
476 case 384: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 8));
477 case 640: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 9));
478 case 1152: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 10));
479 case 2176: return H::add(H::slli(x.data(), 7), H::slli(x.data(), 11));
480 case 768: return H::add(H::slli(x.data(), 8), H::slli(x.data(), 9));
481 case 1280: return H::add(H::slli(x.data(), 8), H::slli(x.data(), 10));
482 case 2304: return H::add(H::slli(x.data(), 8), H::slli(x.data(), 11));
483 case 1536: return H::add(H::slli(x.data(), 9), H::slli(x.data(), 10));
484 case 2560: return H::add(H::slli(x.data(), 9), H::slli(x.data(), 11));
485 case 3072: return H::add(H::slli(x.data(),10), H::slli(x.data(), 11));
486 }
487 }
488#endif
489 return H::mul(x.data(), H::set(constant));
490 }
491} // namespace SSE
492} // namespace Vc
c017a39f 493} // namespace AliRoot