--- /dev/null
+/* This file is part of the Vc library. {{{
+
+ Copyright (C) 2012 Matthias Kretz <kretz@kde.org>
+
+ Vc is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation, either version 3 of
+ the License, or (at your option) any later version.
+
+ Vc is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with Vc. If not, see <http://www.gnu.org/licenses/>.
+
+}}}*/
+
+#ifndef VC_COMMON_INTERLEAVEDMEMORY_H
+#define VC_COMMON_INTERLEAVEDMEMORY_H
+
+#include "macros.h"
+
+namespace AliRoot {
+namespace Vc
+{
+namespace Common
+{
+
+/**
+ * \internal
+ */
+template<typename V> struct InterleavedMemoryAccessBase
+{
+ typedef typename V::EntryType T;
+ typedef typename V::IndexType I;
+ typedef typename V::AsArg VArg;
+ typedef T Ta Vc_MAY_ALIAS;
+ const I m_indexes;
+ Ta *const m_data;
+
+ Vc_ALWAYS_INLINE InterleavedMemoryAccessBase(typename I::AsArg indexes, Ta *data)
+ : m_indexes(indexes), m_data(data)
+ {
+ }
+
+ // implementations of the following are in {scalar,sse,avx}/interleavedmemory.tcc
+ void deinterleave(V &v0, V &v1) const;
+ void deinterleave(V &v0, V &v1, V &v2) const;
+ void deinterleave(V &v0, V &v1, V &v2, V &v3) const;
+ void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4) const;
+ void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4, V &v5) const;
+ void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6) const;
+ void deinterleave(V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6, V &v7) const;
+
+ void interleave(VArg v0, VArg v1);
+ void interleave(VArg v0, VArg v1, VArg v2);
+ void interleave(VArg v0, VArg v1, VArg v2, VArg v3);
+ void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4);
+ void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4, VArg v5);
+ void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4, VArg v5, VArg v6);
+ void interleave(VArg v0, VArg v1, VArg v2, VArg v3, VArg v4, VArg v5, VArg v6, VArg v7);
+};
+
+/**
+ * \internal
+ */
+// delay execution of the deinterleaving gather until operator=
+template<size_t StructSize, typename V> struct InterleavedMemoryReadAccess : public InterleavedMemoryAccessBase<V>
+{
+ typedef InterleavedMemoryAccessBase<V> Base;
+ typedef typename Base::Ta Ta;
+ typedef typename Base::I I;
+
+ Vc_ALWAYS_INLINE InterleavedMemoryReadAccess(Ta *data, typename I::AsArg indexes)
+ : Base(indexes * I(StructSize), data)
+ {
+ }
+};
+
+/**
+ * \internal
+ */
+template<size_t StructSize, typename V> struct InterleavedMemoryAccess : public InterleavedMemoryReadAccess<StructSize, V>
+{
+ typedef InterleavedMemoryAccessBase<V> Base;
+ typedef typename Base::Ta Ta;
+ typedef typename Base::I I;
+
+ Vc_ALWAYS_INLINE InterleavedMemoryAccess(Ta *data, typename I::AsArg indexes)
+ : InterleavedMemoryReadAccess<StructSize, V>(data, indexes)
+ {
+ }
+
+#define _VC_SCATTER_ASSIGNMENT(LENGTH, parameters) \
+ Vc_ALWAYS_INLINE void operator=(const VectorTuple<LENGTH, V> &rhs) \
+ { \
+ VC_STATIC_ASSERT(LENGTH <= StructSize, You_are_trying_to_scatter_more_data_into_the_struct_than_it_has); \
+ this->interleave parameters ; \
+ } \
+ Vc_ALWAYS_INLINE void operator=(const VectorTuple<LENGTH, const V> &rhs) \
+ { \
+ VC_STATIC_ASSERT(LENGTH <= StructSize, You_are_trying_to_scatter_more_data_into_the_struct_than_it_has); \
+ checkIndexesUnique(); \
+ this->interleave parameters ; \
+ }
+ _VC_SCATTER_ASSIGNMENT(2, (rhs.l, rhs.r))
+ _VC_SCATTER_ASSIGNMENT(3, (rhs.l.l, rhs.l.r, rhs.r));
+ _VC_SCATTER_ASSIGNMENT(4, (rhs.l.l.l, rhs.l.l.r, rhs.l.r, rhs.r));
+ _VC_SCATTER_ASSIGNMENT(5, (rhs.l.l.l.l, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
+ _VC_SCATTER_ASSIGNMENT(6, (rhs.l.l.l.l.l, rhs.l.l.l.l.r, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
+ _VC_SCATTER_ASSIGNMENT(7, (rhs.l.l.l.l.l.l, rhs.l.l.l.l.l.r, rhs.l.l.l.l.r, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
+ _VC_SCATTER_ASSIGNMENT(8, (rhs.l.l.l.l.l.l.l, rhs.l.l.l.l.l.l.r, rhs.l.l.l.l.l.r, rhs.l.l.l.l.r, rhs.l.l.l.r, rhs.l.l.r, rhs.l.r, rhs.r));
+#undef _VC_SCATTER_ASSIGNMENT
+
+private:
+#ifdef NDEBUG
+ Vc_ALWAYS_INLINE void checkIndexesUnique() const {}
+#else
+ void checkIndexesUnique() const
+ {
+ const I test = Base::m_indexes.sorted();
+ VC_ASSERT(I::Size == 1 || (test == test.rotated(1)).isEmpty())
+ }
+#endif
+};
+
+#ifdef DOXYGEN
+} // namespace Common
+// in doxygen InterleavedMemoryWrapper should appear in the Vc namespace (see the using statement
+// below)
+#endif
+
+/**
+ * Wraps a pointer to memory with convenience functions to access it via vectors.
+ *
+ * \param S The type of the struct.
+ * \param V The type of the vector to be returned when read. This should reflect the type of the
+ * members inside the struct.
+ *
+ * \see operator[]
+ * \ingroup Utilities
+ * \headerfile interleavedmemory.h <Vc/Memory>
+ */
+template<typename S, typename V> class InterleavedMemoryWrapper
+{
+ typedef typename V::EntryType T;
+ typedef typename V::IndexType I;
+ typedef typename V::AsArg VArg;
+ typedef typename I::AsArg IndexType;
+ typedef InterleavedMemoryAccess<sizeof(S) / sizeof(T), V> Access;
+ typedef InterleavedMemoryReadAccess<sizeof(S) / sizeof(T), V> ReadAccess;
+ typedef T Ta Vc_MAY_ALIAS;
+ Ta *const m_data;
+
+ VC_STATIC_ASSERT((sizeof(S) / sizeof(T)) * sizeof(T) == sizeof(S), InterleavedMemoryAccess_does_not_support_packed_structs);
+
+public:
+ /**
+ * Constructs the wrapper object.
+ *
+ * \param s A pointer to a C-array.
+ */
+ Vc_ALWAYS_INLINE InterleavedMemoryWrapper(S *s)
+ : m_data(reinterpret_cast<Ta *>(s))
+ {
+ }
+
+ /**
+ * Interleaved scatter/gather access.
+ *
+ * Assuming you have a struct of floats and a vector of \p indexes into the array, this function
+ * can be used to access the struct entries as vectors using the minimal number of store or load
+ * instructions.
+ *
+ * \param indexes Vector of indexes that determine the gather locations.
+ *
+ * \return A special (magic) object that executes the loads and deinterleave on assignment to a
+ * vector tuple.
+ *
+ * Example:
+ * \code
+ * struct Foo {
+ * float x, y, z;
+ * };
+ *
+ * void fillWithBar(Foo *_data, uint_v indexes)
+ * {
+ * Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
+ * const float_v x = bar(1);
+ * const float_v y = bar(2);
+ * const float_v z = bar(3);
+ * data[indexes] = (x, y, z);
+ * // it's also possible to just store a subset at the front of the struct:
+ * data[indexes] = (x, y);
+ * // if you want to store a single entry, use scatter:
+ * z.scatter(_data, &Foo::x, indexes);
+ * }
+ *
+ * float_v normalizeStuff(Foo *_data, uint_v indexes)
+ * {
+ * Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
+ * float_v x, y, z;
+ * (x, y, z) = data[indexes];
+ * // it is also possible to just load a subset from the front of the struct:
+ * // (x, y) = data[indexes];
+ * return Vc::sqrt(x * x + y * y + z * z);
+ * }
+ * \endcode
+ *
+ * You may think of the gather operation (or scatter as the inverse) like this:
+\verbatim
+ Memory: {x0 y0 z0 x1 y1 z1 x2 y2 z2 x3 y3 z3 x4 y4 z4 x5 y5 z5 x6 y6 z6 x7 y7 z7 x8 y8 z8}
+ indexes: [5, 0, 1, 7]
+Result in (x, y, z): ({x5 x0 x1 x7}, {y5 y0 y1 y7}, {z5 z0 z1 z7})
+\endverbatim
+ *
+ * \warning If \p indexes contains non-unique entries on scatter, the result is undefined. If
+ * \c NDEBUG is not defined the implementation will assert that the \p indexes entries are unique.
+ */
+ Vc_ALWAYS_INLINE Access operator[](IndexType indexes)
+ {
+ return Access(m_data, indexes);
+ }
+
+ /// const overload (gathers only) of the above function
+ Vc_ALWAYS_INLINE ReadAccess operator[](IndexType indexes) const
+ {
+ return ReadAccess(m_data, indexes);
+ }
+
+ /// alias of the above function
+ Vc_ALWAYS_INLINE ReadAccess gather(IndexType indexes) const { return operator[](indexes); }
+
+ //Vc_ALWAYS_INLINE Access scatter(I indexes, VArg v0, VArg v1);
+};
+#ifndef DOXYGEN
+} // namespace Common
+
+using Common::InterleavedMemoryWrapper;
+#endif
+
+} // namespace Vc
+} // namespace AliRoot
+
+#include "undomacros.h"
+
+#endif // VC_COMMON_INTERLEAVEDMEMORY_H