-/**************************************************************************
- * This file is property of and copyright by the ALICE HLT Project *
- * All rights reserved. *
- * *
- * Primary Authors: *
- * Copyright 2009 Matthias Kretz <kretz@kde.org> *
- * *
- * Permission to use, copy, modify and distribute this software and its *
- * documentation strictly for non-commercial purposes is hereby granted *
- * without fee, provided that the above copyright notice appears in all *
- * copies and that both the copyright notice and this permission notice *
- * appear in the supporting documentation. The authors make no claims *
- * about the suitability of this software for any purpose. It is *
- * provided "as is" without express or implied warranty. *
- **************************************************************************/
+// **************************************************************************
+// * This file is property of and copyright by the ALICE HLT Project *
+// * All rights reserved. *
+// * *
+// * Primary Authors: *
+// * Copyright 2009 Matthias Kretz <kretz@kde.org> *
+// * *
+// * Permission to use, copy, modify and distribute this software and its *
+// * documentation strictly for non-commercial purposes is hereby granted *
+// * without fee, provided that the above copyright notice appears in all *
+// * copies and that both the copyright notice and this permission notice *
+// * appear in the supporting documentation. The authors make no claims *
+// * about the suitability of this software for any purpose. It is *
+// * provided "as is" without express or implied warranty. *
+// **************************************************************************
#include "AliHLTTPCCASliceData.h"
#include "AliHLTTPCCAClusterData.h"
#include "AliHLTTPCCAHit.h"
#include "AliHLTTPCCAParam.h"
#include "MemoryAssignmentHelpers.h"
+#include "AliHLTTPCCAGPUConfig.h"
+#include "AliHLTTPCCAGPUTracker.h"
#include <iostream>
// calculates an approximation for 1/sqrt(x)
// Google for 0x5f3759df :)
static inline float fastInvSqrt( float _x )
{
+ // the function calculates fast inverse sqrt
+
union { float f; int i; } x = { _x };
const float xhalf = 0.5f * x.f;
x.i = 0x5f3759df - ( x.i >> 1 );
return x.f;
}
-inline void AliHLTTPCCASliceData::createGrid( AliHLTTPCCARow *row, const AliHLTTPCCAClusterData &data )
+inline void AliHLTTPCCASliceData::CreateGrid( AliHLTTPCCARow *row, const AliHLTTPCCAClusterData &data, int ClusterDataHitNumberOffset )
{
+ // grid creation
+
if ( row->NHits() <= 0 ) { // no hits or invalid data
// grid coordinates don't matter, since there are no hits
row->fGrid.CreateEmpty();
float yMax = -1.e3f;
float zMin = 1.e3f;
float zMax = -1.e3f;
- for ( int i = row->fHitNumberOffset; i < row->fHitNumberOffset + row->fNHits; ++i ) {
+ for ( int i = ClusterDataHitNumberOffset; i < ClusterDataHitNumberOffset + row->fNHits; ++i ) {
const float y = data.Y( i );
const float z = data.Z( i );
if ( yMax < y ) yMax = y;
CAMath::Max( ( zMax - zMin ) * norm, 2.f ) );
}
-inline void AliHLTTPCCASliceData::PackHitData( AliHLTTPCCARow *row, const AliHLTArray<AliHLTTPCCAHit> &binSortedHits )
+inline void AliHLTTPCCASliceData::PackHitData( AliHLTTPCCARow* const row, const AliHLTArray<AliHLTTPCCAHit> &binSortedHits )
{
+ // hit data packing
+
static const float shortPackingConstant = 1.f / 65535.f;
const float y0 = row->fGrid.YMin();
const float z0 = row->fGrid.ZMin();
std::cout << "!!!! hit packing error!!! " << xx << " " << yy << " " << std::endl;
}
// HitData is bin sorted
- fHitDataY[row->fHitNumberOffset + hitIndex] = xx;
- fHitDataZ[row->fHitNumberOffset + hitIndex] = yy;
+ fHitData[row->fHitNumberOffset + hitIndex].x = xx;
+ fHitData[row->fHitNumberOffset + hitIndex].y = yy;
}
}
void AliHLTTPCCASliceData::InitializeRows( const AliHLTTPCCAParam &p )
{
+ // initialisation of rows
+ if (!fRows) fRows = new AliHLTTPCCARow[HLTCA_ROW_COUNT + 1];
for ( int i = 0; i < p.NRows(); ++i ) {
fRows[i].fX = p.RowX( i );
fRows[i].fMaxY = CAMath::Tan( p.DAlpha() / 2. ) * fRows[i].fX;
}
}
+#ifndef HLTCA_GPUCODE
+ AliHLTTPCCASliceData::~AliHLTTPCCASliceData()
+ {
+ //Standard Destrcutor
+ if (fRows)
+ {
+ if (!fIsGpuSliceData) delete[] fRows;
+ fRows = NULL;
+ }
+ if (fMemory)
+ {
+ if (!fIsGpuSliceData) delete[] fMemory;
+ fMemory = NULL;
+ }
+
+ }
+#endif
+
+GPUh() void AliHLTTPCCASliceData::SetGPUSliceDataMemory(void* const pSliceMemory, void* const pRowMemory)
+{
+ //Set Pointer to slice data memory to external memory
+ fMemory = (char*) pSliceMemory;
+ fRows = (AliHLTTPCCARow*) pRowMemory;
+}
+
+size_t AliHLTTPCCASliceData::SetPointers(const AliHLTTPCCAClusterData *data, bool allocate)
+{
+ //Set slice data internal pointers
+ int hitMemCount = 0;
+ for ( int rowIndex = data->FirstRow(); rowIndex <= data->LastRow(); ++rowIndex )
+ {
+ hitMemCount += NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(data->NumberOfClusters( rowIndex ));
+ }
+ //Calculate Memory needed to store hits in rows
+
+ const int numberOfRows = data->LastRow() - data->FirstRow() + 1;
+ const unsigned int kVectorAlignment = 256 /*sizeof( uint4 )*/ ;
+ fNumberOfHitsPlusAlign = NextMultipleOf < ( kVectorAlignment > sizeof(HLTCA_GPU_ROWALIGNMENT) ? kVectorAlignment : sizeof(HLTCA_GPU_ROWALIGNMENT)) / sizeof( int ) > ( hitMemCount );
+ fNumberOfHits = data->NumberOfClusters();
+ const int firstHitInBinSize = (23 + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(int)) * numberOfRows + 4 * fNumberOfHits + 3;
+ //FIXME: sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(int) * numberOfRows is way to big and only to ensure to reserve enough memory for GPU Alignment.
+ //Might be replaced by correct value
+
+ const int memorySize =
+ // LinkData, HitData
+ fNumberOfHitsPlusAlign * 4 * sizeof( short ) +
+ // FirstHitInBin
+ NextMultipleOf<kVectorAlignment>( ( firstHitInBinSize ) * sizeof( int ) ) +
+ // HitWeights, ClusterDataIndex
+ fNumberOfHitsPlusAlign * 2 * sizeof( int );
+
+ if ( 1||fMemorySize < memorySize ) { // release the memory on CPU
+ fMemorySize = memorySize;
+ if (allocate && !fIsGpuSliceData)
+ {
+ if (fMemory)
+ {
+ delete[] fMemory;
+ }
+ fMemory = new char[fMemorySize + 4];// kVectorAlignment];
+ }
+ }
+
+ char *mem = fMemory;
+ AssignMemory( fLinkUpData, mem, fNumberOfHitsPlusAlign );
+ AssignMemory( fLinkDownData, mem, fNumberOfHitsPlusAlign );
+ AssignMemory( fHitData, mem, fNumberOfHitsPlusAlign );
+ AssignMemory( fFirstHitInBin, mem, firstHitInBinSize );
+ fGpuMemorySize = mem - fMemory;
+
+ //Memory Allocated below will not be copied to GPU but instead be initialized on the gpu itself. Therefore it must not be copied to GPU!
+ AssignMemory( fHitWeights, mem, fNumberOfHitsPlusAlign );
+ AssignMemory( fClusterDataIndex, mem, fNumberOfHitsPlusAlign );
+ return(mem - fMemory);
+}
+
void AliHLTTPCCASliceData::InitFromClusterData( const AliHLTTPCCAClusterData &data )
{
+ // initialisation from cluster data
+
////////////////////////////////////
// 1. prepare arrays
////////////////////////////////////
+ const int numberOfRows = data.LastRow() - data.FirstRow() + 1;
fNumberOfHits = data.NumberOfClusters();
/* TODO Vectorization
}
const int memorySize = fNumberOfHits * sizeof( short_v::Type )
*/
- const int numberOfRows = data.LastRow() - data.FirstRow();
- enum { VectorAlignment = sizeof( int ) };
- const int numberOfHitsPlusAlignment = NextMultipleOf < VectorAlignment / sizeof( int ) > ( fNumberOfHits );
- const int memorySize =
- // LinkData, HitData
- numberOfHitsPlusAlignment * 4 * sizeof( short ) +
- // FirstHitInBin
- NextMultipleOf<VectorAlignment>( ( 23 * numberOfRows + 4 * fNumberOfHits ) * sizeof( int ) ) +
- // HitWeights, ClusterDataIndex
- numberOfHitsPlusAlignment * 2 * sizeof( int );
-
- if ( fMemorySize < memorySize ) {
- fMemorySize = memorySize;
- delete[] fMemory;
- fMemory = new char[fMemorySize + 4];// VectorAlignment];
- }
-
- char *mem = fMemory;
- AssignMemory( fLinkUpData, mem, numberOfHitsPlusAlignment );
- AssignMemory( fLinkDownData, mem, numberOfHitsPlusAlignment );
- AssignMemory( fHitDataY, mem, numberOfHitsPlusAlignment );
- AssignMemory( fHitDataZ, mem, numberOfHitsPlusAlignment );
- AssignMemory( fFirstHitInBin, mem, 23 * numberOfRows + 4 * fNumberOfHits );
- AssignMemory( fHitWeights, mem, numberOfHitsPlusAlignment );
- AssignMemory( fClusterDataIndex, mem, numberOfHitsPlusAlignment );
+ SetPointers(&data, true);
////////////////////////////////////
// 2. fill HitData and FirstHitInBin
row.fHstepYi = 1.f;
row.fHstepZi = 1.f;
}
- for ( int rowIndex = data.LastRow() + 1; rowIndex < 160; ++rowIndex ) {
+ for ( int rowIndex = data.LastRow() + 1; rowIndex < HLTCA_ROW_COUNT + 1; ++rowIndex ) {
AliHLTTPCCARow &row = fRows[rowIndex];
row.fGrid.CreateEmpty();
row.fNHits = 0;
row.fHstepZi = 1.f;
}
- AliHLTResizableArray<AliHLTTPCCAHit> binSortedHits( fNumberOfHits );
+
+ AliHLTResizableArray<AliHLTTPCCAHit> binSortedHits( fNumberOfHits + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v) * numberOfRows + 1 );
int gridContentOffset = 0;
+ int hitOffset = 0;
int binCreationMemorySize = 103 * 2 + fNumberOfHits;
AliHLTResizableArray<unsigned short> binCreationMemory( binCreationMemorySize );
+ fGPUSharedDataReq = 0;
+
for ( int rowIndex = data.FirstRow(); rowIndex <= data.LastRow(); ++rowIndex ) {
AliHLTTPCCARow &row = fRows[rowIndex];
-
row.fNHits = data.NumberOfClusters( rowIndex );
assert( row.fNHits < ( 1 << sizeof( unsigned short ) * 8 ) );
- row.fHitNumberOffset = data.RowOffset( rowIndex );
+ row.fHitNumberOffset = hitOffset;
+ hitOffset += NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(data.NumberOfClusters( rowIndex ));
+
row.fFirstHitInBinOffset = gridContentOffset;
- createGrid( &row, data );
+ CreateGrid( &row, data, data.RowOffset( rowIndex ) );
const AliHLTTPCCAGrid &grid = row.fGrid;
const int numberOfBins = grid.N();
int binCreationMemorySizeNew;
- if ( ( binCreationMemorySizeNew = numberOfBins * 2 + 6 + row.fNHits ) > binCreationMemorySize ) {
+ if ( ( binCreationMemorySizeNew = numberOfBins * 2 + 6 + row.fNHits + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(unsigned short) * numberOfRows + 1) > binCreationMemorySize ) {
binCreationMemorySize = binCreationMemorySizeNew;
binCreationMemory.Resize( binCreationMemorySize );
}
}
for ( int hitIndex = 0; hitIndex < row.fNHits; ++hitIndex ) {
- const int globalHitIndex = row.fHitNumberOffset + hitIndex;
+ const int globalHitIndex = data.RowOffset( rowIndex ) + hitIndex;
const unsigned short bin = row.fGrid.GetBin( data.Y( globalHitIndex ), data.Z( globalHitIndex ) );
+
bins[hitIndex] = bin;
++filled[bin];
}
--filled[bin];
const unsigned short ind = c[bin] + filled[bin]; // generate an index for this hit that is >= c[bin] and < c[bin + 1]
const int globalBinsortedIndex = row.fHitNumberOffset + ind;
- const int globalHitIndex = row.fHitNumberOffset + hitIndex;
+ const int globalHitIndex = data.RowOffset( rowIndex ) + hitIndex;
// allows to find the global hit index / coordinates from a global bin sorted hit index
fClusterDataIndex[globalBinsortedIndex] = globalHitIndex;
// grid.N is <= row.fNHits
const int nn = numberOfBins + grid.Ny() + 3;
for ( int i = numberOfBins; i < nn; ++i ) {
- assert( row.fFirstHitInBinOffset + i < 23 * numberOfRows + 4 * fNumberOfHits );
+ assert( (signed) row.fFirstHitInBinOffset + i < 23 * numberOfRows + 4 * fNumberOfHits + 3 );
fFirstHitInBin[row.fFirstHitInBinOffset + i] = a;
}
row.fFullSize = nn;
gridContentOffset += nn;
+
+ if (NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.fNHits) + nn > (unsigned) fGPUSharedDataReq)
+ fGPUSharedDataReq = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.fNHits) + nn;
+
+ //Make pointer aligned
+ gridContentOffset = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(gridContentOffset);
}
#if 0
void AliHLTTPCCASliceData::ClearHitWeights()
{
+ // clear hit weights
+
#ifdef ENABLE_VECTORIZATION
const int_v v0( Zero );
const int *const end = fHitWeights + fNumberOfHits;
v0.store( mem );
}
#else
- for ( int i = 0; i < fNumberOfHits; ++i ) {
+ for ( int i = 0; i < fNumberOfHitsPlusAlign; ++i ) {
fHitWeights[i] = 0;
}
#endif
void AliHLTTPCCASliceData::ClearLinks()
{
+ // link cleaning
+
#ifdef ENABLE_VECTORIZATION
const short_v v0( -1 );
const short *const end1 = fLinkUpData + fNumberOfHits;