]> git.uio.no Git - u/mrichter/AliRoot.git/blame - Vc/include/Vc/avx/vectorhelper.tcc
update to Vc 0.7.3-dev
[u/mrichter/AliRoot.git] / Vc / include / Vc / avx / vectorhelper.tcc
CommitLineData
f22341db 1/* This file is part of the Vc library.
2
3 Copyright (C) 2009-2011 Matthias Kretz <kretz@kde.org>
4
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
9
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17
18*/
19
20#include "casts.h"
21#include <cstdlib>
22
c017a39f 23namespace AliRoot {
f22341db 24namespace Vc
25{
26namespace AVX
27{
28
29////////////////////////////////////////////////////////////////////////////////////////////////////
30// float_v
31////////////////////////////////////////////////////////////////////////////////////////////////////
32//// loads
c017a39f 33template<> Vc_ALWAYS_INLINE Vc_PURE m256 VectorHelper<m256>::load(const float *m, AlignedFlag)
f22341db 34{
35 return _mm256_load_ps(m);
36}
c017a39f 37template<> Vc_ALWAYS_INLINE Vc_PURE m256 VectorHelper<m256>::load(const float *m, UnalignedFlag)
f22341db 38{
39 return _mm256_loadu_ps(m);
40}
c017a39f 41template<> Vc_ALWAYS_INLINE Vc_PURE m256 VectorHelper<m256>::load(const float *m, StreamingAndAlignedFlag)
f22341db 42{
c017a39f 43 return avx_cast<m256>(concat(_mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<float *>(m))),
f22341db 44 _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<float *>(&m[4])))));
45}
c017a39f 46template<> Vc_ALWAYS_INLINE Vc_PURE m256
f22341db 47 VC_WARN("AVX does not support streaming unaligned loads. Will use non-streaming unaligned load instead.")
c017a39f 48VectorHelper<m256>::load(const float *m, StreamingAndUnalignedFlag)
f22341db 49{
50 return _mm256_loadu_ps(m);
51}
52////////////////////////////////////////////////////////////////////////////////////////////////////
53//// stores
c017a39f 54Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, AlignedFlag)
f22341db 55{
56 _mm256_store_ps(mem, x);
57}
c017a39f 58Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, UnalignedFlag)
f22341db 59{
60 _mm256_storeu_ps(mem, x);
61}
c017a39f 62Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, StreamingAndAlignedFlag)
f22341db 63{
64 _mm256_stream_ps(mem, x);
65}
c017a39f 66Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, StreamingAndUnalignedFlag)
f22341db 67{
c017a39f 68 _mm_maskmoveu_si128(avx_cast<m128i>(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
69 _mm_maskmoveu_si128(_mm256_extractf128_si256(avx_cast<m256i>(x), 1), _mm_setallone_si128(), reinterpret_cast<char *>(mem + 4));
f22341db 70}
c017a39f 71Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, VTArg m, AlignedFlag)
f22341db 72{
73 _mm256_maskstore(mem, m, x);
74}
c017a39f 75Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, VTArg m, UnalignedFlag)
f22341db 76{
77 _mm256_maskstore(mem, m, x);
78}
c017a39f 79Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, VTArg m, StreamingAndAlignedFlag)
f22341db 80{
c017a39f 81 _mm_maskmoveu_si128(avx_cast<m128i>(x), avx_cast<m128i>(m), reinterpret_cast<char *>(mem));
82 _mm_maskmoveu_si128(_mm256_extractf128_si256(avx_cast<m256i>(x), 1), _mm256_extractf128_si256(avx_cast<m256i>(m), 1), reinterpret_cast<char *>(mem + 4));
f22341db 83}
c017a39f 84Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag)
f22341db 85{
c017a39f 86 _mm_maskmoveu_si128(avx_cast<m128i>(x), avx_cast<m128i>(m), reinterpret_cast<char *>(mem));
87 _mm_maskmoveu_si128(_mm256_extractf128_si256(avx_cast<m256i>(x), 1), _mm256_extractf128_si256(avx_cast<m256i>(m), 1), reinterpret_cast<char *>(mem + 4));
f22341db 88}
89
90////////////////////////////////////////////////////////////////////////////////////////////////////
91// double_v
92////////////////////////////////////////////////////////////////////////////////////////////////////
93//// loads
c017a39f 94template<> Vc_ALWAYS_INLINE Vc_PURE m256d VectorHelper<m256d>::load(const double *m, AlignedFlag)
f22341db 95{
96 return _mm256_load_pd(m);
97}
c017a39f 98template<> Vc_ALWAYS_INLINE Vc_PURE m256d VectorHelper<m256d>::load(const double *m, UnalignedFlag)
f22341db 99{
100 return _mm256_loadu_pd(m);
101}
c017a39f 102template<> Vc_ALWAYS_INLINE Vc_PURE m256d VectorHelper<m256d>::load(const double *m, StreamingAndAlignedFlag)
f22341db 103{
c017a39f 104 return avx_cast<m256d>(concat(
f22341db 105 _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<double *>(m))),
106 _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<double *>(&m[2])))));
107}
c017a39f 108template<> Vc_ALWAYS_INLINE Vc_PURE m256d
f22341db 109 VC_WARN("AVX does not support streaming unaligned loads. Will use non-streaming unaligned load instead.")
c017a39f 110VectorHelper<m256d>::load(const double *m, StreamingAndUnalignedFlag)
f22341db 111{
112 return _mm256_loadu_pd(m);
113}
114////////////////////////////////////////////////////////////////////////////////////////////////////
115//// stores
c017a39f 116Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, AlignedFlag)
f22341db 117{
118 _mm256_store_pd(mem, x);
119}
c017a39f 120Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, UnalignedFlag)
f22341db 121{
122 _mm256_storeu_pd(mem, x);
123}
c017a39f 124Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, StreamingAndAlignedFlag)
f22341db 125{
126 _mm256_stream_pd(mem, x);
127}
c017a39f 128Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, StreamingAndUnalignedFlag)
f22341db 129{
c017a39f 130 _mm_maskmoveu_si128(avx_cast<m128i>(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
131 _mm_maskmoveu_si128(avx_cast<m128i>(_mm256_extractf128_pd(x, 1)), _mm_setallone_si128(), reinterpret_cast<char *>(mem + 2));
f22341db 132}
c017a39f 133Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, VTArg m, AlignedFlag)
f22341db 134{
135 _mm256_maskstore(mem, m, x);
136}
c017a39f 137Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, VTArg m, UnalignedFlag)
f22341db 138{
139 _mm256_maskstore(mem, m, x);
140}
c017a39f 141Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, VTArg m, StreamingAndAlignedFlag)
f22341db 142{
c017a39f 143 _mm_maskmoveu_si128(avx_cast<m128i>(x), avx_cast<m128i>(m), reinterpret_cast<char *>(mem));
144 _mm_maskmoveu_si128(avx_cast<m128i>(_mm256_extractf128_pd(x, 1)), avx_cast<m128i>(_mm256_extractf128_pd(m, 1)), reinterpret_cast<char *>(mem + 2));
f22341db 145}
c017a39f 146Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag)
f22341db 147{
c017a39f 148 _mm_maskmoveu_si128(avx_cast<m128i>(x), avx_cast<m128i>(m), reinterpret_cast<char *>(mem));
149 _mm_maskmoveu_si128(avx_cast<m128i>(_mm256_extractf128_pd(x, 1)), avx_cast<m128i>(_mm256_extractf128_pd(m, 1)), reinterpret_cast<char *>(mem + 2));
f22341db 150}
151////////////////////////////////////////////////////////////////////////////////////////////////////
152// (u)int_v
153////////////////////////////////////////////////////////////////////////////////////////////////////
154//// loads
c017a39f 155template<typename T> Vc_ALWAYS_INLINE Vc_PURE m256i VectorHelper<m256i>::load(const T *m, AlignedFlag)
f22341db 156{
157 return _mm256_load_si256(reinterpret_cast<const __m256i *>(m));
158}
c017a39f 159template<typename T> Vc_ALWAYS_INLINE Vc_PURE m256i VectorHelper<m256i>::load(const T *m, UnalignedFlag)
f22341db 160{
161 return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(m));
162}
c017a39f 163template<typename T> Vc_ALWAYS_INLINE Vc_PURE m256i VectorHelper<m256i>::load(const T *m, StreamingAndAlignedFlag)
f22341db 164{
165 return concat(_mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<T *>(m))),
166 _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<T *>(&m[4]))));
167}
c017a39f 168template<typename T> Vc_ALWAYS_INLINE Vc_PURE m256i
f22341db 169 VC_WARN("AVX does not support streaming unaligned loads. Will use non-streaming unaligned load instead.")
c017a39f 170VectorHelper<m256i>::load(const T *m, StreamingAndUnalignedFlag)
f22341db 171{
172 return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(m));
173}
174////////////////////////////////////////////////////////////////////////////////////////////////////
175//// stores
c017a39f 176template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, AlignedFlag)
f22341db 177{
c017a39f 178 _mm256_store_si256(reinterpret_cast<__m256i *>(mem), x);
f22341db 179}
c017a39f 180template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, UnalignedFlag)
f22341db 181{
c017a39f 182 _mm256_storeu_si256(reinterpret_cast<__m256i *>(mem), x);
f22341db 183}
c017a39f 184template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, StreamingAndAlignedFlag)
f22341db 185{
c017a39f 186 _mm256_stream_si256(reinterpret_cast<__m256i *>(mem), x);
f22341db 187}
c017a39f 188template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, StreamingAndUnalignedFlag)
f22341db 189{
c017a39f 190 _mm_maskmoveu_si128(avx_cast<m128i>(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
f22341db 191 _mm_maskmoveu_si128(_mm256_extractf128_si256(x, 1), _mm_setallone_si128(), reinterpret_cast<char *>(mem + 4));
192}
c017a39f 193template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, VTArg m, AlignedFlag)
f22341db 194{
195 _mm256_maskstore(mem, m, x);
196}
c017a39f 197template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, VTArg m, UnalignedFlag)
f22341db 198{
199 _mm256_maskstore(mem, m, x);
200}
c017a39f 201template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, VTArg m, StreamingAndAlignedFlag)
f22341db 202{
203 _mm_maskmoveu_si128(lo128(x), lo128(m), reinterpret_cast<char *>(mem));
204 _mm_maskmoveu_si128(hi128(x), hi128(m), reinterpret_cast<char *>(mem + 4));
205}
c017a39f 206template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag)
f22341db 207{
208 _mm_maskmoveu_si128(lo128(x), lo128(m), reinterpret_cast<char *>(mem));
209 _mm_maskmoveu_si128(hi128(x), hi128(m), reinterpret_cast<char *>(mem + 4));
210}
211////////////////////////////////////////////////////////////////////////////////////////////////////
212// (u)short_v
213////////////////////////////////////////////////////////////////////////////////////////////////////
214//// loads
c017a39f 215template<typename T> Vc_ALWAYS_INLINE Vc_PURE m128i VectorHelper<m128i>::load(const T *m, AlignedFlag)
f22341db 216{
217 return _mm_load_si128(reinterpret_cast<const __m128i *>(m));
218}
c017a39f 219template<typename T> Vc_ALWAYS_INLINE Vc_PURE m128i VectorHelper<m128i>::load(const T *m, UnalignedFlag)
f22341db 220{
221 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(m));
222}
c017a39f 223template<typename T> Vc_ALWAYS_INLINE Vc_PURE m128i VectorHelper<m128i>::load(const T *m, StreamingAndAlignedFlag)
f22341db 224{
225 return _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<T *>(m)));
226}
c017a39f 227template<typename T> Vc_ALWAYS_INLINE Vc_PURE m128i
f22341db 228 VC_WARN("AVX does not support streaming unaligned loads. Will use non-streaming unaligned load instead.")
c017a39f 229VectorHelper<m128i>::load(const T *m, StreamingAndUnalignedFlag)
f22341db 230{
231 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(m));
232}
233////////////////////////////////////////////////////////////////////////////////////////////////////
234//// stores
c017a39f 235template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, AlignedFlag)
f22341db 236{
c017a39f 237 _mm_store_si128(reinterpret_cast<__m128i *>(mem), x);
f22341db 238}
c017a39f 239template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, UnalignedFlag)
f22341db 240{
c017a39f 241 _mm_storeu_si128(reinterpret_cast<__m128i *>(mem), x);
f22341db 242}
c017a39f 243template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, StreamingAndAlignedFlag)
f22341db 244{
c017a39f 245 _mm_stream_si128(reinterpret_cast<__m128i *>(mem), x);
f22341db 246}
c017a39f 247template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, StreamingAndUnalignedFlag)
f22341db 248{
249 _mm_maskmoveu_si128(x, _mm_setallone_si128(), reinterpret_cast<char *>(mem));
250}
c017a39f 251template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, VTArg m, AlignedFlag align)
f22341db 252{
253 store(mem, _mm_blendv_epi8(load(mem, align), x, m), align);
254}
c017a39f 255template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, VTArg m, UnalignedFlag align)
f22341db 256{
257 store(mem, _mm_blendv_epi8(load(mem, align), x, m), align);
258}
c017a39f 259template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, VTArg m, StreamingAndAlignedFlag)
f22341db 260{
261 _mm_maskmoveu_si128(x, m, reinterpret_cast<char *>(mem));
262}
c017a39f 263template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag)
f22341db 264{
265 _mm_maskmoveu_si128(x, m, reinterpret_cast<char *>(mem));
266}
267
268} // namespace AVX
269} // namespace Vc
c017a39f 270} // namespace AliRoot