1 /* This file is part of the Vc library. {{{
3 Copyright (C) 2012 Matthias Kretz <kretz@kde.org>
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
20 #ifndef VC_COMMON_INTERLEAVEDMEMORY_H
21 #define VC_COMMON_INTERLEAVEDMEMORY_H
34 template<typename V> struct InterleavedMemoryAccessBase
36 typedef typename V::EntryType T;
37 typedef typename V::IndexType I;
38 typedef typename V::AsArg VArg;
39 typedef T Ta Vc_MAY_ALIAS;
43 Vc_ALWAYS_INLINE InterleavedMemoryAccessBase(typename I::AsArg indexes, Ta *data)
44 : m_indexes(indexes), m_data(data)
48 // implementations of the following are in {scalar,sse,avx}/interleavedmemory.tcc
49 void deinterleave(V &v0, V &v1) const;
50 void deinterleave(V &v0, V &v1, V &v2) const;
51 void deinterleave(V &v0, V &v1, V &v2, V &v3) const;
52 void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4) const;
53 void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4, V &v5) const;
54 void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6) const;
55 void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6, V &v7) const;
57 void interleave(VArg v0, VArg v1);
58 void interleave(VArg v0, VArg v1, VArg v2);
59 void interleave(VArg v0, VArg v1, VArg v2, VArg v3);
60 void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4);
61 void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4, VArg v5);
62 void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4, VArg v5, VArg v6);
63 void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4, VArg v5, VArg v6, VArg v7);
69 // delay execution of the deinterleaving gather until operator=
70 template<size_t StructSize, typename V> struct InterleavedMemoryReadAccess : public InterleavedMemoryAccessBase<V>
72 typedef InterleavedMemoryAccessBase<V> Base;
73 typedef typename Base::Ta Ta;
74 typedef typename Base::I I;
76 Vc_ALWAYS_INLINE InterleavedMemoryReadAccess(Ta *data, typename I::AsArg indexes)
77 : Base(indexes * I(StructSize), data)
85 template<size_t StructSize, typename V> struct InterleavedMemoryAccess : public InterleavedMemoryReadAccess<StructSize, V>
87 typedef InterleavedMemoryAccessBase<V> Base;
88 typedef typename Base::Ta Ta;
89 typedef typename Base::I I;
91 Vc_ALWAYS_INLINE InterleavedMemoryAccess(Ta *data, typename I::AsArg indexes)
92 : InterleavedMemoryReadAccess<StructSize, V>(data, indexes)
96 #define _VC_SCATTER_ASSIGNMENT(LENGTH, parameters) \
97 Vc_ALWAYS_INLINE void operator=(const VectorTuple<LENGTH, V> &rhs) \
99 VC_STATIC_ASSERT(LENGTH <= StructSize, You_are_trying_to_scatter_more_data_into_the_struct_than_it_has); \
100 this->interleave parameters ; \
102 Vc_ALWAYS_INLINE void operator=(const VectorTuple<LENGTH, const V> &rhs) \
104 VC_STATIC_ASSERT(LENGTH <= StructSize, You_are_trying_to_scatter_more_data_into_the_struct_than_it_has); \
105 checkIndexesUnique(); \
106 this->interleave parameters ; \
108 _VC_SCATTER_ASSIGNMENT(2, (rhs.l, rhs.r))
109 _VC_SCATTER_ASSIGNMENT(3, (rhs.l.l, rhs.l.r, rhs.r));
110 _VC_SCATTER_ASSIGNMENT(4, (rhs.l.l.l, rhs.l.l.r, rhs.l.r, rhs.r));
111 _VC_SCATTER_ASSIGNMENT(5, (rhs.l.l.l.l, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
112 _VC_SCATTER_ASSIGNMENT(6, (rhs.l.l.l.l.l, rhs.l.l.l.l.r, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
113 _VC_SCATTER_ASSIGNMENT(7, (rhs.l.l.l.l.l.l, rhs.l.l.l.l.l.r, rhs.l.l.l.l.r, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
114 _VC_SCATTER_ASSIGNMENT(8, (rhs.l.l.l.l.l.l.l, rhs.l.l.l.l.l.l.r, rhs.l.l.l.l.l.r, rhs.l.l.l.l.r, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
115 #undef _VC_SCATTER_ASSIGNMENT
119 Vc_ALWAYS_INLINE void checkIndexesUnique() const {}
121 void checkIndexesUnique() const
123 const I test = Base::m_indexes.sorted();
124 VC_ASSERT(I::Size == 1 || (test == test.rotated(1)).isEmpty())
130 } // namespace Common
131 // in doxygen InterleavedMemoryWrapper should appear in the Vc namespace (see the using statement
136 * Wraps a pointer to memory with convenience functions to access it via vectors.
138 * \param S The type of the struct.
139 * \param V The type of the vector to be returned when read. This should reflect the type of the
140 * members inside the struct.
144 * \headerfile interleavedmemory.h <Vc/Memory>
146 template<typename S, typename V> class InterleavedMemoryWrapper
148 typedef typename V::EntryType T;
149 typedef typename V::IndexType I;
150 typedef typename V::AsArg VArg;
151 typedef typename I::AsArg IndexType;
152 typedef InterleavedMemoryAccess<sizeof(S) / sizeof(T), V> Access;
153 typedef InterleavedMemoryReadAccess<sizeof(S) / sizeof(T), V> ReadAccess;
154 typedef T Ta Vc_MAY_ALIAS;
157 VC_STATIC_ASSERT((sizeof(S) / sizeof(T)) * sizeof(T) == sizeof(S), InterleavedMemoryAccess_does_not_support_packed_structs);
161 * Constructs the wrapper object.
163 * \param s A pointer to a C-array.
165 Vc_ALWAYS_INLINE InterleavedMemoryWrapper(S *s)
166 : m_data(reinterpret_cast<Ta *>(s))
171 * Interleaved scatter/gather access.
173 * Assuming you have a struct of floats and a vector of \p indexes into the array, this function
174 * can be used to access the struct entries as vectors using the minimal number of store or load
177 * \param indexes Vector of indexes that determine the gather locations.
179 * \return A special (magic) object that executes the loads and deinterleave on assignment to a
188 * void fillWithBar(Foo *_data, uint_v indexes)
190 * Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
191 * const float_v x = bar(1);
192 * const float_v y = bar(2);
193 * const float_v z = bar(3);
194 * data[indexes] = (x, y, z);
195 * // it's also possible to just store a subset at the front of the struct:
196 * data[indexes] = (x, y);
197 * // if you want to store a single entry, use scatter:
198 * z.scatter(_data, &Foo::x, indexes);
201 * float_v normalizeStuff(Foo *_data, uint_v indexes)
203 * Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
205 * (x, y, z) = data[indexes];
206 * // it is also possible to just load a subset from the front of the struct:
207 * // (x, y) = data[indexes];
208 * return Vc::sqrt(x * x + y * y + z * z);
212 * You may think of the gather operation (or scatter as the inverse) like this:
214 Memory: {x0 y0 z0 x1 y1 z1 x2 y2 z2 x3 y3 z3 x4 y4 z4 x5 y5 z5 x6 y6 z6 x7 y7 z7 x8 y8 z8}
215 indexes: [5, 0, 1, 7]
216 Result in (x, y, z): ({x5 x0 x1 x7}, {y5 y0 y1 y7}, {z5 z0 z1 z7})
219 * \warning If \p indexes contains non-unique entries on scatter, the result is undefined. If
220 * \c NDEBUG is not defined the implementation will assert that the \p indexes entries are unique.
222 Vc_ALWAYS_INLINE Access operator[](IndexType indexes)
224 return Access(m_data, indexes);
227 /// const overload (gathers only) of the above function
228 Vc_ALWAYS_INLINE ReadAccess operator[](IndexType indexes) const
230 return ReadAccess(m_data, indexes);
233 /// alias of the above function
234 Vc_ALWAYS_INLINE ReadAccess gather(IndexType indexes) const { return operator[](indexes); }
236 //Vc_ALWAYS_INLINE Access scatter(I indexes, VArg v0, VArg v1);
239 } // namespace Common
241 using Common::InterleavedMemoryWrapper;
245 } // namespace AliRoot
247 #include "undomacros.h"
249 #endif // VC_COMMON_INTERLEAVEDMEMORY_H