1 /* This file is part of the Vc library.
3 Copyright (C) 2009-2011 Matthias Kretz <kretz@kde.org>
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
29 ////////////////////////////////////////////////////////////////////////////////////////////////////
31 ////////////////////////////////////////////////////////////////////////////////////////////////////
33 template<> Vc_ALWAYS_INLINE Vc_PURE m256 VectorHelper<m256>::load(const float *m, AlignedFlag)
35 return _mm256_load_ps(m);
37 template<> Vc_ALWAYS_INLINE Vc_PURE m256 VectorHelper<m256>::load(const float *m, UnalignedFlag)
39 return _mm256_loadu_ps(m);
41 template<> Vc_ALWAYS_INLINE Vc_PURE m256 VectorHelper<m256>::load(const float *m, StreamingAndAlignedFlag)
43 return avx_cast<m256>(concat(_mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<float *>(m))),
44 _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<float *>(&m[4])))));
46 template<> Vc_ALWAYS_INLINE Vc_PURE m256
47 VC_WARN("AVX does not support streaming unaligned loads. Will use non-streaming unaligned load instead.")
48 VectorHelper<m256>::load(const float *m, StreamingAndUnalignedFlag)
50 return _mm256_loadu_ps(m);
52 ////////////////////////////////////////////////////////////////////////////////////////////////////
54 Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, AlignedFlag)
56 _mm256_store_ps(mem, x);
58 Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, UnalignedFlag)
60 _mm256_storeu_ps(mem, x);
62 Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, StreamingAndAlignedFlag)
64 _mm256_stream_ps(mem, x);
66 Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, StreamingAndUnalignedFlag)
68 _mm_maskmoveu_si128(avx_cast<m128i>(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
69 _mm_maskmoveu_si128(_mm256_extractf128_si256(avx_cast<m256i>(x), 1), _mm_setallone_si128(), reinterpret_cast<char *>(mem + 4));
71 Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, VTArg m, AlignedFlag)
73 _mm256_maskstore(mem, m, x);
75 Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, VTArg m, UnalignedFlag)
77 _mm256_maskstore(mem, m, x);
79 Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, VTArg m, StreamingAndAlignedFlag)
81 _mm_maskmoveu_si128(avx_cast<m128i>(x), avx_cast<m128i>(m), reinterpret_cast<char *>(mem));
82 _mm_maskmoveu_si128(_mm256_extractf128_si256(avx_cast<m256i>(x), 1), _mm256_extractf128_si256(avx_cast<m256i>(m), 1), reinterpret_cast<char *>(mem + 4));
84 Vc_ALWAYS_INLINE void VectorHelper<m256>::store(float *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag)
86 _mm_maskmoveu_si128(avx_cast<m128i>(x), avx_cast<m128i>(m), reinterpret_cast<char *>(mem));
87 _mm_maskmoveu_si128(_mm256_extractf128_si256(avx_cast<m256i>(x), 1), _mm256_extractf128_si256(avx_cast<m256i>(m), 1), reinterpret_cast<char *>(mem + 4));
90 ////////////////////////////////////////////////////////////////////////////////////////////////////
92 ////////////////////////////////////////////////////////////////////////////////////////////////////
94 template<> Vc_ALWAYS_INLINE Vc_PURE m256d VectorHelper<m256d>::load(const double *m, AlignedFlag)
96 return _mm256_load_pd(m);
98 template<> Vc_ALWAYS_INLINE Vc_PURE m256d VectorHelper<m256d>::load(const double *m, UnalignedFlag)
100 return _mm256_loadu_pd(m);
102 template<> Vc_ALWAYS_INLINE Vc_PURE m256d VectorHelper<m256d>::load(const double *m, StreamingAndAlignedFlag)
104 return avx_cast<m256d>(concat(
105 _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<double *>(m))),
106 _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<double *>(&m[2])))));
108 template<> Vc_ALWAYS_INLINE Vc_PURE m256d
109 VC_WARN("AVX does not support streaming unaligned loads. Will use non-streaming unaligned load instead.")
110 VectorHelper<m256d>::load(const double *m, StreamingAndUnalignedFlag)
112 return _mm256_loadu_pd(m);
114 ////////////////////////////////////////////////////////////////////////////////////////////////////
116 Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, AlignedFlag)
118 _mm256_store_pd(mem, x);
120 Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, UnalignedFlag)
122 _mm256_storeu_pd(mem, x);
124 Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, StreamingAndAlignedFlag)
126 _mm256_stream_pd(mem, x);
128 Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, StreamingAndUnalignedFlag)
130 _mm_maskmoveu_si128(avx_cast<m128i>(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
131 _mm_maskmoveu_si128(avx_cast<m128i>(_mm256_extractf128_pd(x, 1)), _mm_setallone_si128(), reinterpret_cast<char *>(mem + 2));
133 Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, VTArg m, AlignedFlag)
135 _mm256_maskstore(mem, m, x);
137 Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, VTArg m, UnalignedFlag)
139 _mm256_maskstore(mem, m, x);
141 Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, VTArg m, StreamingAndAlignedFlag)
143 _mm_maskmoveu_si128(avx_cast<m128i>(x), avx_cast<m128i>(m), reinterpret_cast<char *>(mem));
144 _mm_maskmoveu_si128(avx_cast<m128i>(_mm256_extractf128_pd(x, 1)), avx_cast<m128i>(_mm256_extractf128_pd(m, 1)), reinterpret_cast<char *>(mem + 2));
146 Vc_ALWAYS_INLINE void VectorHelper<m256d>::store(double *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag)
148 _mm_maskmoveu_si128(avx_cast<m128i>(x), avx_cast<m128i>(m), reinterpret_cast<char *>(mem));
149 _mm_maskmoveu_si128(avx_cast<m128i>(_mm256_extractf128_pd(x, 1)), avx_cast<m128i>(_mm256_extractf128_pd(m, 1)), reinterpret_cast<char *>(mem + 2));
151 ////////////////////////////////////////////////////////////////////////////////////////////////////
153 ////////////////////////////////////////////////////////////////////////////////////////////////////
155 template<typename T> Vc_ALWAYS_INLINE Vc_PURE m256i VectorHelper<m256i>::load(const T *m, AlignedFlag)
157 return _mm256_load_si256(reinterpret_cast<const __m256i *>(m));
159 template<typename T> Vc_ALWAYS_INLINE Vc_PURE m256i VectorHelper<m256i>::load(const T *m, UnalignedFlag)
161 return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(m));
163 template<typename T> Vc_ALWAYS_INLINE Vc_PURE m256i VectorHelper<m256i>::load(const T *m, StreamingAndAlignedFlag)
165 return concat(_mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<T *>(m))),
166 _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<T *>(&m[4]))));
168 template<typename T> Vc_ALWAYS_INLINE Vc_PURE m256i
169 VC_WARN("AVX does not support streaming unaligned loads. Will use non-streaming unaligned load instead.")
170 VectorHelper<m256i>::load(const T *m, StreamingAndUnalignedFlag)
172 return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(m));
174 ////////////////////////////////////////////////////////////////////////////////////////////////////
176 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, AlignedFlag)
178 _mm256_store_si256(reinterpret_cast<__m256i *>(mem), x);
180 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, UnalignedFlag)
182 _mm256_storeu_si256(reinterpret_cast<__m256i *>(mem), x);
184 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, StreamingAndAlignedFlag)
186 _mm256_stream_si256(reinterpret_cast<__m256i *>(mem), x);
188 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, StreamingAndUnalignedFlag)
190 _mm_maskmoveu_si128(avx_cast<m128i>(x), _mm_setallone_si128(), reinterpret_cast<char *>(mem));
191 _mm_maskmoveu_si128(_mm256_extractf128_si256(x, 1), _mm_setallone_si128(), reinterpret_cast<char *>(mem + 4));
193 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, VTArg m, AlignedFlag)
195 _mm256_maskstore(mem, m, x);
197 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, VTArg m, UnalignedFlag)
199 _mm256_maskstore(mem, m, x);
201 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, VTArg m, StreamingAndAlignedFlag)
203 _mm_maskmoveu_si128(lo128(x), lo128(m), reinterpret_cast<char *>(mem));
204 _mm_maskmoveu_si128(hi128(x), hi128(m), reinterpret_cast<char *>(mem + 4));
206 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m256i>::store(T *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag)
208 _mm_maskmoveu_si128(lo128(x), lo128(m), reinterpret_cast<char *>(mem));
209 _mm_maskmoveu_si128(hi128(x), hi128(m), reinterpret_cast<char *>(mem + 4));
211 ////////////////////////////////////////////////////////////////////////////////////////////////////
213 ////////////////////////////////////////////////////////////////////////////////////////////////////
215 template<typename T> Vc_ALWAYS_INLINE Vc_PURE m128i VectorHelper<m128i>::load(const T *m, AlignedFlag)
217 return _mm_load_si128(reinterpret_cast<const __m128i *>(m));
219 template<typename T> Vc_ALWAYS_INLINE Vc_PURE m128i VectorHelper<m128i>::load(const T *m, UnalignedFlag)
221 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(m));
223 template<typename T> Vc_ALWAYS_INLINE Vc_PURE m128i VectorHelper<m128i>::load(const T *m, StreamingAndAlignedFlag)
225 return _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<T *>(m)));
227 template<typename T> Vc_ALWAYS_INLINE Vc_PURE m128i
228 VC_WARN("AVX does not support streaming unaligned loads. Will use non-streaming unaligned load instead.")
229 VectorHelper<m128i>::load(const T *m, StreamingAndUnalignedFlag)
231 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(m));
233 ////////////////////////////////////////////////////////////////////////////////////////////////////
235 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, AlignedFlag)
237 _mm_store_si128(reinterpret_cast<__m128i *>(mem), x);
239 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, UnalignedFlag)
241 _mm_storeu_si128(reinterpret_cast<__m128i *>(mem), x);
243 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, StreamingAndAlignedFlag)
245 _mm_stream_si128(reinterpret_cast<__m128i *>(mem), x);
247 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, StreamingAndUnalignedFlag)
249 _mm_maskmoveu_si128(x, _mm_setallone_si128(), reinterpret_cast<char *>(mem));
251 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, VTArg m, AlignedFlag align)
253 store(mem, _mm_blendv_epi8(load(mem, align), x, m), align);
255 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, VTArg m, UnalignedFlag align)
257 store(mem, _mm_blendv_epi8(load(mem, align), x, m), align);
259 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, VTArg m, StreamingAndAlignedFlag)
261 _mm_maskmoveu_si128(x, m, reinterpret_cast<char *>(mem));
263 template<typename T> Vc_ALWAYS_INLINE void VectorHelper<m128i>::store(T *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag)
265 _mm_maskmoveu_si128(x, m, reinterpret_cast<char *>(mem));
270 } // namespace AliRoot