-/****************************************************************************
- * This file is property of and copyright by the ALICE HLT Project *
- * ALICE Experiment at CERN, All rights reserved. *
- * *
- * Copyright (C) 2009 Matthias Kretz <kretz@kde.org> *
- * for The ALICE HLT Project. *
- * *
- * Permission to use, copy, modify and distribute this software and its *
- * documentation strictly for non-commercial purposes is hereby granted *
- * without fee, provided that the above copyright notice appears in all *
- * copies and that both the copyright notice and this permission notice *
- * appear in the supporting documentation. The authors make no claims *
- * about the suitability of this software for any purpose. It is *
- * provided "as is" without express or implied warranty. *
- ***************************************************************************/
+// ****************************************************************************
+// * This file is property of and copyright by the ALICE HLT Project *
+// * ALICE Experiment at CERN, All rights reserved. *
+// * *
+// * Copyright (C) 2009 Matthias Kretz <kretz@kde.org> *
+// * for The ALICE HLT Project. *
+// * *
+// * Permission to use, copy, modify and distribute this software and its *
+// * documentation strictly for non-commercial purposes is hereby granted *
+// * without fee, provided that the above copyright notice appears in all *
+// * copies and that both the copyright notice and this permission notice *
+// * appear in the supporting documentation. The authors make no claims *
+// * about the suitability of this software for any purpose. It is *
+// * provided "as is" without express or implied warranty. *
+// ****************************************************************************
/**
* \file AliHLTArray.h
#include <assert.h>
#endif
-#if defined(__MMX__) || defined(__SSE__)
+#if (defined(__MMX__) || defined(__SSE__))
+#if defined(__GNUC__)
+#if __GNUC__ > 3
+#define USE_MM_MALLOC
+#endif
+#else // not gcc, assume it can use _mm_malloc since it supports MMX/SSE
+#define USE_MM_MALLOC
+#endif
+#endif
+
+#ifdef USE_MM_MALLOC
#include <mm_malloc.h>
#else
#include <cstdlib>
#endif
+enum {
+ AliHLTFullyCacheLineAligned = -1
+};
+
+#if defined(__CUDACC__) & 0
+#define ALIHLTARRAY_STATIC_ASSERT(a, b)
+#define ALIHLTARRAY_STATIC_ASSERT_NC(a, b)
+#else
namespace AliHLTArrayInternal
{
template<bool> class STATIC_ASSERT_FAILURE;
#define ALIHLTARRAY_STATIC_ASSERT_CONCAT_HELPER(a, b) a##b
#define ALIHLTARRAY_STATIC_ASSERT_CONCAT(a, b) ALIHLTARRAY_STATIC_ASSERT_CONCAT_HELPER(a, b)
-#define ALIHLTARRAY_STATIC_ASSERT(cond, msg) \
+#define ALIHLTARRAY_STATIC_ASSERT_NC(cond, msg) \
typedef AliHLTArrayInternal::STATIC_ASSERT_FAILURE<cond> ALIHLTARRAY_STATIC_ASSERT_CONCAT(_STATIC_ASSERTION_FAILED_##msg, __LINE__); \
- ALIHLTARRAY_STATIC_ASSERT_CONCAT(_STATIC_ASSERTION_FAILED_##msg, __LINE__) Error_##msg; \
- (void) Error_##msg
+ ALIHLTARRAY_STATIC_ASSERT_CONCAT(_STATIC_ASSERTION_FAILED_##msg, __LINE__) Error_##msg
+#define ALIHLTARRAY_STATIC_ASSERT(cond, msg) ALIHLTARRAY_STATIC_ASSERT_NC(cond, msg); (void) Error_##msg
+#endif
template<typename T, int Dim> class AliHLTArray;
namespace AliHLTInternal
{
+ template<unsigned int Size> struct Padding { char fPadding[Size]; };
+ template<> struct Padding<0> {};
+ template<typename T> struct CacheLineSizeHelperData { T fData; };
+ template<typename T> struct CacheLineSizeHelperEnums {
+ enum {
+ CacheLineSize = 64,
+ MaskedSize = sizeof( T ) & ( CacheLineSize - 1 ),
+ RequiredSize = MaskedSize == 0 ? sizeof( T ) : sizeof( T ) + CacheLineSize - MaskedSize,
+ PaddingSize = RequiredSize - sizeof( T )
+ };
+ };
+ template<typename T> class CacheLineSizeHelper : private CacheLineSizeHelperData<T>, private Padding<CacheLineSizeHelperEnums<T>::PaddingSize>
+ {
+ public:
+ operator T &() { return CacheLineSizeHelperData<T>::fData; }
+ operator const T &() const { return CacheLineSizeHelperData<T>::fData; }
+ //const T &operator=( const T &rhs ) { CacheLineSizeHelperData<T>::fData = rhs; }
+
+ private:
+ };
+ template<typename T, int alignment> struct TypeForAlignmentHelper { typedef T Type; };
+ template<typename T> struct TypeForAlignmentHelper<T, AliHLTFullyCacheLineAligned> { typedef CacheLineSizeHelper<T> Type; };
+
// XXX
// The ArrayBoundsCheck and Allocator classes implement a virtual destructor only in order to
- // silence the -Weff-c++ warning. It really is not required for these classes to have a virtual
+ // silence the -Weffc++ warning. It really is not required for these classes to have a virtual
// dtor since polymorphism is not used (AliHLTResizableArray and AliHLTFixedArray are allocated on
// the stack only). The virtual dtor only adds an unnecessary vtable to the code.
#ifndef ENABLE_ARRAY_BOUNDS_CHECKING
inline bool IsInBounds( int ) const { return true; }
inline void SetBounds( int, int ) {}
inline void MoveBounds( int ) {}
+ inline void ReinterpretCast( const ArrayBoundsCheck &, int, int ) {}
};
#define BOUNDS_CHECK(x, y)
#else
*/
inline void MoveBounds( int d ) { fStart += d; fEnd += d; }
+ inline void ReinterpretCast( const ArrayBoundsCheck &other, int sizeofOld, int sizeofNew ) {
+ fStart = other.fStart * sizeofNew / sizeofOld;
+ fEnd = other.fEnd * sizeofNew / sizeofOld;
+ }
+
private:
int fStart;
int fEnd;
#endif
template<typename T, int alignment> class Allocator
{
- protected:
- virtual inline ~Allocator() {}
-#if defined(__MMX__) || defined(__SSE__)
+ public:
+#ifdef USE_MM_MALLOC
static inline T *Alloc( int s ) { T *p = reinterpret_cast<T *>( _mm_malloc( s * sizeof( T ), alignment ) ); return new( p ) T[s]; }
- static inline void Free( const T *const p ) { /** p->~T(); */ _mm_free( p ); } // XXX: doesn't call dtor because it's an array
+ static inline void Free( T *const p, int size ) {
+ for ( int i = 0; i < size; ++i ) {
+ p[i].~T();
+ }
+ _mm_free( p );
+ }
#else
static inline T *Alloc( int s ) { T *p; posix_memalign( &p, alignment, s * sizeof( T ) ); return new( p ) T[s]; }
- static inline void Free( const T *const p ) { std::free( p ); } // XXX: doesn't call dtor because it's an array
+ static inline void Free( T *const p, int size ) {
+ for ( int i = 0; i < size; ++i ) {
+ p[i].~T();
+ }
+ std::free( p );
+ }
+#endif
+ };
+ template<typename T> class Allocator<T, AliHLTFullyCacheLineAligned>
+ {
+ public:
+ typedef CacheLineSizeHelper<T> T2;
+#ifdef USE_MM_MALLOC
+ static inline T2 *Alloc( int s ) { T2 *p = reinterpret_cast<T2 *>( _mm_malloc( s * sizeof( T2 ), 128 ) ); return new( p ) T2[s]; }
+ static inline void Free( T2 *const p, int size ) {
+ for ( int i = 0; i < size; ++i ) {
+ p[i].~T2();
+ }
+ _mm_free( p );
+ }
+#else
+ static inline T2 *Alloc( int s ) { T2 *p; posix_memalign( &p, 128, s * sizeof( T2 ) ); return new( p ) T2[s]; }
+ static inline void Free( T2 *const p, int size ) {
+ for ( int i = 0; i < size; ++i ) {
+ p[i].~T2();
+ }
+ std::free( p );
+ }
#endif
};
template<typename T> class Allocator<T, 0>
{
- protected:
- virtual inline ~Allocator() {}
+ public:
static inline T *Alloc( int s ) { return new T[s]; }
- static inline void Free( const T *const p ) { delete[] p; }
+ static inline void Free( const T *const p, int ) { delete[] p; }
};
+
+ template<typename T> struct ReturnTypeHelper { typedef T Type; };
+ template<typename T> struct ReturnTypeHelper<CacheLineSizeHelper<T> > { typedef T Type; };
/**
* Array base class for dimension dependent behavior
*/
{
friend class ArrayBase<T, 2>;
public:
+ ArrayBase() : fData( 0 ), fSize( 0 ) {} // XXX really shouldn't be done. But -Weffc++ wants it so
+ ArrayBase( const ArrayBase &rhs ) : ArrayBoundsCheck( rhs ), fData( rhs.fData ), fSize( rhs.fSize ) {} // XXX
+ ArrayBase &operator=( const ArrayBase &rhs ) { ArrayBoundsCheck::operator=( rhs ); fData = rhs.fData; return *this; } // XXX
+ typedef typename ReturnTypeHelper<T>::Type R;
/**
* return a reference to the value at the given index
*/
- inline T &operator[]( int x ) { BOUNDS_CHECK( x, fData[0] ); return fData[x]; }
+ inline R &operator[]( int x ) { BOUNDS_CHECK( x, fData[0] ); return fData[x]; }
/**
* return a const reference to the value at the given index
*/
- inline const T &operator[]( int x ) const { BOUNDS_CHECK( x, fData[0] ); return fData[x]; }
+ inline const R &operator[]( int x ) const { BOUNDS_CHECK( x, fData[0] ); return fData[x]; }
protected:
T *fData;
- inline void SetSize( int, int, int ) {}
+ int fSize;
+ inline void SetSize( int x, int, int ) { fSize = x; }
};
/**
{
friend class ArrayBase<T, 3>;
public:
+ ArrayBase() : fData( 0 ), fSize( 0 ), fStride( 0 ) {} // XXX really shouldn't be done. But -Weffc++ wants it so
+ ArrayBase( const ArrayBase &rhs ) : ArrayBoundsCheck( rhs ), fData( rhs.fData ), fSize( rhs.fSize ), fStride( rhs.fStride ) {} // XXX
+ ArrayBase &operator=( const ArrayBase &rhs ) { ArrayBoundsCheck::operator=( rhs ); fData = rhs.fData; fSize = rhs.fSize; fStride = rhs.fStride; return *this; } // XXX
+ typedef typename ReturnTypeHelper<T>::Type R;
/**
* return a reference to the value at the given indexes
*/
- inline T &operator()( int x, int y ) { BOUNDS_CHECK( x * fStride + y, fData[0] ); return fData[x * fStride + y]; }
+ inline R &operator()( int x, int y ) { BOUNDS_CHECK( x * fStride + y, fData[0] ); return fData[x * fStride + y]; }
/**
* return a const reference to the value at the given indexes
*/
- inline const T &operator()( int x, int y ) const { BOUNDS_CHECK( x * fStride + y, fData[0] ); return fData[x * fStride + y]; }
+ inline const R &operator()( int x, int y ) const { BOUNDS_CHECK( x * fStride + y, fData[0] ); return fData[x * fStride + y]; }
/**
* return a 1-dim array at the given index. This makes it behave like a 2-dim C-Array.
*/
protected:
T *fData;
+ int fSize;
int fStride;
- inline void SetSize( int, int y, int ) { fStride = y; }
+ inline void SetSize( int x, int y, int ) { fStride = y; fSize = x * y; }
};
/**
class ArrayBase<T, 3> : public ArrayBoundsCheck
{
public:
+ ArrayBase() : fData( 0 ), fSize( 0 ), fStrideX( 0 ), fStrideY( 0 ) {} // XXX really shouldn't be done. But -Weffc++ wants it so
+ ArrayBase( const ArrayBase &rhs ) : ArrayBoundsCheck( rhs ), fData( rhs.fData ), fSize( rhs.fSize ), fStrideX( rhs.fStrideX ), fStrideY( rhs.fStrideY ) {} // XXX
+ ArrayBase &operator=( const ArrayBase &rhs ) { ArrayBoundsCheck::operator=( rhs ); fData = rhs.fData; fSize = rhs.fSize; fStrideX = rhs.fStrideX; fStrideY = rhs.fStrideY; return *this; } // XXX
+ typedef typename ReturnTypeHelper<T>::Type R;
/**
* return a reference to the value at the given indexes
*/
- inline T &operator()( int x, int y, int z );
+ inline R &operator()( int x, int y, int z );
/**
* return a const reference to the value at the given indexes
*/
- inline const T &operator()( int x, int y, int z ) const;
+ inline const R &operator()( int x, int y, int z ) const;
/**
* return a 2-dim array at the given index. This makes it behave like a 3-dim C-Array.
*/
protected:
T *fData;
+ int fSize;
int fStrideX;
int fStrideY;
- inline void SetSize( int, int y, int z ) { fStrideX = y * z; fStrideY = z; }
+ inline void SetSize( int x, int y, int z ) { fStrideX = y * z; fStrideY = z; fSize = fStrideX * x; }
};
- // XXX AlignedData really is an internal struct, but the RuleChecker doesn't understand that
- template<typename T, unsigned int Size, int alignment> class AlignedData;
- template<typename T, unsigned int Size> class AlignedData<T, Size, 0>
- {
- protected:
- T d[Size];
- };
-#ifdef __GNUC__
-#define ALIGN(n) __attribute__((aligned(n)))
-#else
-#define ALIGN(n) __declspec(align(n))
-#endif
- template<typename T, unsigned int Size> class AlignedData<T, Size, 4>
- {
- protected:
- ALIGN( 4 ) T d[Size];
- };
- template<typename T, unsigned int Size> class AlignedData<T, Size, 8>
+ template<typename T, unsigned int Size, int _alignment> class AlignedData
{
- protected:
- ALIGN( 8 ) T d[Size];
- };
- template<typename T, unsigned int Size> class AlignedData<T, Size, 16>
- {
- protected:
- ALIGN( 16 ) T d[Size];
- };
- template<typename T, unsigned int Size> class AlignedData<T, Size, 32>
- {
- protected:
- ALIGN( 32 ) T d[Size];
- };
- template<typename T, unsigned int Size> class AlignedData<T, Size, 64>
- {
- protected:
- ALIGN( 64 ) T d[Size];
+ public:
+ T *ConstructAlignedData() {
+ const int offset = reinterpret_cast<unsigned long>( &fUnalignedArray[0] ) & ( Alignment - 1 );
+ void *mem = &fUnalignedArray[0] + ( Alignment - offset );
+ return new( mem ) T[Size];
+ }
+ ~AlignedData() {
+ const int offset = reinterpret_cast<unsigned long>( &fUnalignedArray[0] ) & ( Alignment - 1 );
+ T *mem = reinterpret_cast<T *>( &fUnalignedArray[0] + ( Alignment - offset ) );
+ for ( unsigned int i = 0; i < Size; ++i ) {
+ mem[i].~T();
+ }
+ }
+ private:
+ enum {
+ Alignment = _alignment == AliHLTFullyCacheLineAligned ? 128 : _alignment,
+ PaddedSize = Size * sizeof( T ) + Alignment
+ };
+ ALIHLTARRAY_STATIC_ASSERT_NC( ( Alignment & ( Alignment - 1 ) ) == 0, alignment_needs_to_be_a_multiple_of_2 );
+
+ char fUnalignedArray[PaddedSize];
};
- template<typename T, unsigned int Size> class AlignedData<T, Size, 128>
+ template<typename T, unsigned int Size> class AlignedData<T, Size, 0>
{
- protected:
- ALIGN( 128 ) T d[Size];
+ public:
+ T *ConstructAlignedData() { return &fArray[0]; }
+ private:
+ T fArray[Size];
};
-#undef ALIGN
} // namespace AliHLTInternal
/**
{
public:
typedef AliHLTInternal::ArrayBase<T, Dim> Parent;
+
+ /**
+ * Returns the number of elements in the array. If it is a multi-dimensional array the size is
+ * the multiplication of the dimensions ( e.g. a 10 x 20 array returns 200 as its size ).
+ */
+ inline int Size() const { return Parent::fSize; }
+
/**
* allows you to check for validity of the array by casting to bool
*/
* moves the array base pointer so that the data that was once at index 0 will then be at index x
*/
inline AliHLTArray operator-( int x ) const;
+
+#ifndef HLTCA_GPUCODE
+ template<typename Other> inline AliHLTArray<Other, Dim> ReinterpretCast() const {
+ AliHLTArray<Other, Dim> r;
+ r.fData = reinterpret_cast<Other *>( Parent::fData );
+ r.ReinterpretCast( *this, sizeof( T ), sizeof( Other ) );
+ }
+#endif
};
/**
* \endcode
*/
template < typename T, int Dim = 1, int alignment = 0 >
-class AliHLTResizableArray : public AliHLTArray<T, Dim>, public AliHLTInternal::Allocator<T, alignment>
+class AliHLTResizableArray : public AliHLTArray<typename AliHLTInternal::TypeForAlignmentHelper<T, alignment>::Type, Dim>
{
public:
- typedef AliHLTInternal::ArrayBase<T, Dim> Parent;
+ typedef typename AliHLTInternal::TypeForAlignmentHelper<T, alignment>::Type T2;
+ typedef AliHLTInternal::ArrayBase<T2, Dim> Parent;
/**
* does not allocate any memory
*/
/**
* frees the data
*/
- inline ~AliHLTResizableArray() { AliHLTInternal::Allocator<T, alignment>::Free( Parent::fData ); }
+ inline ~AliHLTResizableArray() { AliHLTInternal::Allocator<T, alignment>::Free( Parent::fData, Parent::fSize ); }
/**
* use for 1-dim arrays: resizes the memory for the array to x * sizeof(T) bytes.
AliHLTResizableArray &operator=( const AliHLTResizableArray & );
};
+template < unsigned int x, unsigned int y = 0, unsigned int z = 0 > class AliHLTArraySize
+{
+ public:
+ enum {
+ Size = y == 0 ? x : ( z == 0 ? x * y : x * y * z ),
+ Dim = y == 0 ? 1 : ( z == 0 ? 2 : 3 ),
+ X = x, Y = y, Z = z
+ };
+};
+
/**
* Owns the data. When it goes out of scope the data is freed.
*
* \param Dim selects the operator[]/operator() behavior it should have. I.e. makes it behave like a
* 1-, 2- or 3-dim array. (defaults to 1)
*/
-template < typename T, unsigned int Size, int Dim = 1, int alignment = 0 >
-class AliHLTFixedArray : public AliHLTArray<T, Dim>
+template < typename T, typename Size, int alignment = 0 >
+class AliHLTFixedArray : public AliHLTArray<typename AliHLTInternal::TypeForAlignmentHelper<T, alignment>::Type, Size::Dim>
{
public:
- typedef AliHLTInternal::ArrayBase<T, Dim> Parent;
- inline AliHLTFixedArray() { Parent::fData = &fDataOnStack.d[0]; Parent::SetBounds( 0, Size - 1 ); }
+ typedef typename AliHLTInternal::TypeForAlignmentHelper<T, alignment>::Type T2;
+ typedef AliHLTInternal::ArrayBase<T2, Size::Dim> Parent;
+ inline AliHLTFixedArray() {
+ Parent::fData = fFixedArray.ConstructAlignedData();
+ Parent::SetBounds( 0, Size::Size - 1 );
+ SetSize( Size::X, Size::Y, Size::Z );
+ }
private:
+ AliHLTInternal::AlignedData<typename AliHLTInternal::TypeForAlignmentHelper<T, alignment>::Type, Size::Size, alignment> fFixedArray;
+
// disable allocation on the heap
void *operator new( size_t );
- AliHLTInternal::AlignedData<T, Size, alignment> fDataOnStack;
-
// disable copy
+#ifdef HLTCA_GPUCODE
+#else
AliHLTFixedArray( const AliHLTFixedArray & );
AliHLTFixedArray &operator=( const AliHLTFixedArray & );
+#endif
};
-
////////////////////////
//// implementation ////
////////////////////////
}
template<typename T>
- inline T &ArrayBase<T, 3>::operator()( int x, int y, int z )
+ inline typename AliHLTInternal::ReturnTypeHelper<T>::Type &ArrayBase<T, 3>::operator()( int x, int y, int z )
{
BOUNDS_CHECK( x * fStrideX + y + fStrideY + z, fData[0] );
return fData[x * fStrideX + y + fStrideY + z];
}
template<typename T>
- inline const T &ArrayBase<T, 3>::operator()( int x, int y, int z ) const
+ inline const typename AliHLTInternal::ReturnTypeHelper<T>::Type &ArrayBase<T, 3>::operator()( int x, int y, int z ) const
{
BOUNDS_CHECK( x * fStrideX + y + fStrideY + z, fData[0] );
return fData[x * fStrideX + y + fStrideY + z];
inline AliHLTResizableArray<T, Dim, alignment>::AliHLTResizableArray()
{
Parent::fData = 0;
+ Parent::SetSize( 0, 0, 0 );
Parent::SetBounds( 0, -1 );
}
template<typename T, int Dim, int alignment>
{
ALIHLTARRAY_STATIC_ASSERT( Dim == 1, AliHLTResizableArray1_used_with_incorrect_dimension );
Parent::fData = AliHLTInternal::Allocator<T, alignment>::Alloc( x );
+ Parent::SetSize( x, 0, 0 );
Parent::SetBounds( 0, x - 1 );
}
template<typename T, int Dim, int alignment>
inline void AliHLTResizableArray<T, Dim, alignment>::Resize( int x )
{
ALIHLTARRAY_STATIC_ASSERT( Dim == 1, AliHLTResizableArray1_resize_used_with_incorrect_dimension );
- AliHLTInternal::Allocator<T, alignment>::Free( Parent::fData );
+ AliHLTInternal::Allocator<T, alignment>::Free( Parent::fData, Parent::fSize );
Parent::fData = ( x == 0 ) ? 0 : AliHLTInternal::Allocator<T, alignment>::Alloc( x );
+ Parent::SetSize( x, 0, 0 );
Parent::SetBounds( 0, x - 1 );
}
template<typename T, int Dim, int alignment>
inline void AliHLTResizableArray<T, Dim, alignment>::Resize( int x, int y )
{
ALIHLTARRAY_STATIC_ASSERT( Dim == 2, AliHLTResizableArray2_resize_used_with_incorrect_dimension );
- AliHLTInternal::Allocator<T, alignment>::Free( Parent::fData );
+ AliHLTInternal::Allocator<T, alignment>::Free( Parent::fData, Parent::fSize );
Parent::fData = ( x == 0 ) ? 0 : AliHLTInternal::Allocator<T, alignment>::Alloc( x * y );
Parent::SetSize( x, y, 0 );
Parent::SetBounds( 0, x * y - 1 );
inline void AliHLTResizableArray<T, Dim, alignment>::Resize( int x, int y, int z )
{
ALIHLTARRAY_STATIC_ASSERT( Dim == 3, AliHLTResizableArray3_resize_used_with_incorrect_dimension );
- AliHLTInternal::Allocator<T, alignment>::Free( Parent::fData );
+ AliHLTInternal::Allocator<T, alignment>::Free( Parent::fData, Parent::fSize );
Parent::fData = ( x == 0 ) ? 0 : AliHLTInternal::Allocator<T, alignment>::Alloc( x * y * z );
Parent::SetSize( x, y, z );
Parent::SetBounds( 0, x * y * z - 1 );