1 // **************************************************************************
2 // * This file is property of and copyright by the ALICE HLT Project *
3 // * All rights reserved. *
5 // * Primary Authors: *
6 // * Copyright 2009 Matthias Kretz <kretz@kde.org> *
8 // * Permission to use, copy, modify and distribute this software and its *
9 // * documentation strictly for non-commercial purposes is hereby granted *
10 // * without fee, provided that the above copyright notice appears in all *
11 // * copies and that both the copyright notice and this permission notice *
12 // * appear in the supporting documentation. The authors make no claims *
13 // * about the suitability of this software for any purpose. It is *
14 // * provided "as is" without express or implied warranty. *
15 // **************************************************************************
17 #include "AliHLTTPCCASliceData.h"
18 #include "AliHLTTPCCAClusterData.h"
19 #include "AliHLTTPCCAMath.h"
20 #include "AliHLTArray.h"
21 #include "AliHLTTPCCAHit.h"
22 #include "AliHLTTPCCAParam.h"
23 #include "AliHLTTPCCAGPUConfig.h"
24 #include "AliHLTTPCCAGPUTracker.h"
25 #include "MemoryAssignmentHelpers.h"
29 // calculates an approximation for 1/sqrt(x)
30 // Google for 0x5f3759df :)
31 static inline float fastInvSqrt( float _x )
33 // the function calculates fast inverse sqrt
35 union { float f; int i; } x = { _x };
36 const float xhalf = 0.5f * x.f;
37 x.i = 0x5f3759df - ( x.i >> 1 );
38 x.f = x.f * ( 1.5f - xhalf * x.f * x.f );
42 inline void AliHLTTPCCASliceData::CreateGrid( AliHLTTPCCARow *row, const float2* data, int ClusterDataHitNumberOffset )
46 if ( row->NHits() <= 0 ) { // no hits or invalid data
47 // grid coordinates don't matter, since there are no hits
48 row->fGrid.CreateEmpty();
56 for ( int i = ClusterDataHitNumberOffset; i < ClusterDataHitNumberOffset + row->fNHits; ++i ) {
57 const float y = data[i].x;
58 const float z = data[i].y;
59 if ( yMax < y ) yMax = y;
60 if ( yMin > y ) yMin = y;
61 if ( zMax < z ) zMax = z;
62 if ( zMin > z ) zMin = z;
65 const float norm = fastInvSqrt( row->fNHits );
66 row->fGrid.Create( yMin, yMax, zMin, zMax,
67 CAMath::Max( ( yMax - yMin ) * norm, 2.f ),
68 CAMath::Max( ( zMax - zMin ) * norm, 2.f ) );
71 inline void AliHLTTPCCASliceData::PackHitData( AliHLTTPCCARow* const row, const AliHLTArray<AliHLTTPCCAHit> &binSortedHits )
75 static const float shortPackingConstant = 1.f / 65535.f;
76 const float y0 = row->fGrid.YMin();
77 const float z0 = row->fGrid.ZMin();
78 const float stepY = ( row->fGrid.YMax() - y0 ) * shortPackingConstant;
79 const float stepZ = ( row->fGrid.ZMax() - z0 ) * shortPackingConstant;
80 const float stepYi = 1.f / stepY;
81 const float stepZi = 1.f / stepZ;
87 row->fHstepYi = stepYi;
88 row->fHstepZi = stepZi;
90 for ( int hitIndex = 0; hitIndex < row->fNHits; ++hitIndex ) {
92 const int globalHitIndex = row->fHitNumberOffset + hitIndex;
93 const AliHLTTPCCAHit &hh = binSortedHits[hitIndex];
94 const float xx = ( ( hh.Y() - y0 ) * stepYi ) + .5 ;
95 const float yy = ( ( hh.Z() - z0 ) * stepZi ) + .5 ;
96 if ( xx < 0 || yy < 0 || xx >= 65536 || yy >= 65536 ) {
97 std::cout << "!!!! hit packing error!!! " << xx << " " << yy << " " << std::endl;
99 // HitData is bin sorted
100 fHitData[globalHitIndex].x = (unsigned short) xx;
101 fHitData[globalHitIndex].y = (unsigned short) yy;
105 void AliHLTTPCCASliceData::Clear()
110 void AliHLTTPCCASliceData::InitializeRows( const AliHLTTPCCAParam &p )
112 // initialisation of rows
113 if (!fRows) fRows = new AliHLTTPCCARow[HLTCA_ROW_COUNT + 1];
114 for ( int i = 0; i < p.NRows(); ++i ) {
115 fRows[i].fX = p.RowX( i );
116 fRows[i].fMaxY = CAMath::Tan( p.DAlpha() / 2. ) * fRows[i].fX;
120 #ifndef HLTCA_GPUCODE
121 AliHLTTPCCASliceData::~AliHLTTPCCASliceData()
123 //Standard Destrcutor
126 if (!fIsGpuSliceData) delete[] fRows;
131 if (!fIsGpuSliceData) delete[] fMemory;
138 GPUh() void AliHLTTPCCASliceData::SetGPUSliceDataMemory(void* const pSliceMemory, void* const pRowMemory)
140 //Set Pointer to slice data memory to external memory
141 fMemory = (char*) pSliceMemory;
142 fRows = (AliHLTTPCCARow*) pRowMemory;
145 size_t AliHLTTPCCASliceData::SetPointers(const AliHLTTPCCAClusterData *data, bool allocate)
147 //Set slice data internal pointers
149 int hitMemCount = HLTCA_ROW_COUNT * (sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v) - 1) + data->NumberOfClusters();
150 //Calculate Memory needed to store hits in rows
152 const unsigned int kVectorAlignment = 256 /*sizeof( uint4 )*/ ;
153 fNumberOfHitsPlusAlign = NextMultipleOf < ( kVectorAlignment > sizeof(HLTCA_GPU_ROWALIGNMENT) ? kVectorAlignment : sizeof(HLTCA_GPU_ROWALIGNMENT)) / sizeof( int ) > ( hitMemCount );
154 fNumberOfHits = data->NumberOfClusters();
155 const int firstHitInBinSize = (23 + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(int)) * HLTCA_ROW_COUNT + 4 * fNumberOfHits + 3;
156 //FIXME: sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(int) * HLTCA_ROW_COUNT is way to big and only to ensure to reserve enough memory for GPU Alignment.
157 //Might be replaced by correct value
159 const int memorySize =
161 fNumberOfHitsPlusAlign * 4 * sizeof( short ) +
163 NextMultipleOf<kVectorAlignment>( ( firstHitInBinSize ) * sizeof( int ) ) +
164 // HitWeights, ClusterDataIndex
165 fNumberOfHitsPlusAlign * 2 * sizeof( int );
167 if ( 1 )// fMemorySize < memorySize ) { // release the memory on CPU
169 fMemorySize = memorySize + 4;
172 if (!fIsGpuSliceData)
178 fMemory = new char[fMemorySize];// kVectorAlignment];
182 if (fMemorySize > HLTCA_GPU_SLICE_DATA_MEMORY)
191 AssignMemory( fLinkUpData, mem, fNumberOfHitsPlusAlign );
192 AssignMemory( fLinkDownData, mem, fNumberOfHitsPlusAlign );
193 AssignMemory( fHitData, mem, fNumberOfHitsPlusAlign );
194 AssignMemory( fFirstHitInBin, mem, firstHitInBinSize );
195 fGpuMemorySize = mem - fMemory;
197 //Memory Allocated below will not be copied to GPU but instead be initialized on the gpu itself. Therefore it must not be copied to GPU!
198 AssignMemory( fHitWeights, mem, fNumberOfHitsPlusAlign );
199 AssignMemory( fClusterDataIndex, mem, fNumberOfHitsPlusAlign );
200 return(mem - fMemory);
203 void AliHLTTPCCASliceData::InitFromClusterData( const AliHLTTPCCAClusterData &data )
205 // initialisation from cluster data
207 ////////////////////////////////////
209 ////////////////////////////////////
211 fNumberOfHits = data.NumberOfClusters();
213 float2* YZData = new float2[fNumberOfHits];
214 int* tmpHitIndex = new int[fNumberOfHits];
216 int RowOffset[HLTCA_ROW_COUNT];
217 int NumberOfClustersInRow[HLTCA_ROW_COUNT];
218 memset(NumberOfClustersInRow, 0, HLTCA_ROW_COUNT * sizeof(int));
219 fFirstRow = HLTCA_ROW_COUNT;
222 for (int i = 0;i < fNumberOfHits;i++)
224 const int tmpRow = data.RowNumber(i);
225 NumberOfClustersInRow[tmpRow]++;
226 if (tmpRow > fLastRow) fLastRow = tmpRow;
227 if (tmpRow < fFirstRow) fFirstRow = tmpRow;
230 for (int i = fFirstRow;i <= fLastRow;i++)
232 RowOffset[i] = tmpOffset;
233 tmpOffset += NumberOfClustersInRow[i];
236 int RowsFilled[HLTCA_ROW_COUNT];
237 memset(RowsFilled, 0, HLTCA_ROW_COUNT * sizeof(int));
238 for (int i = 0;i < fNumberOfHits;i++)
243 int tmpRow = data.RowNumber(i);
244 int newIndex = RowOffset[tmpRow] + (RowsFilled[tmpRow])++;
245 YZData[newIndex] = tmp;
246 tmpHitIndex[newIndex] = i;
249 if (fFirstRow == HLTCA_ROW_COUNT) fFirstRow = 0;
251 ////////////////////////////////////
253 ////////////////////////////////////
255 const int numberOfRows = fLastRow - fFirstRow + 1;
257 if (SetPointers(&data, true) == 0)
260 delete[] tmpHitIndex;
264 ////////////////////////////////////
265 // 2. fill HitData and FirstHitInBin
266 ////////////////////////////////////
268 for ( int rowIndex = 0; rowIndex < fFirstRow; ++rowIndex ) {
269 AliHLTTPCCARow &row = fRows[rowIndex];
270 row.fGrid.CreateEmpty();
273 row.fHitNumberOffset = 0;
274 row.fFirstHitInBinOffset = 0;
283 for ( int rowIndex = fLastRow + 1; rowIndex < HLTCA_ROW_COUNT + 1; ++rowIndex ) {
284 AliHLTTPCCARow &row = fRows[rowIndex];
285 row.fGrid.CreateEmpty();
288 row.fHitNumberOffset = 0;
289 row.fFirstHitInBinOffset = 0;
300 AliHLTResizableArray<AliHLTTPCCAHit> binSortedHits( fNumberOfHits + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v));
302 int gridContentOffset = 0;
305 int binCreationMemorySize = 103 * 2 + fNumberOfHits;
306 AliHLTResizableArray<unsigned short> binCreationMemory( binCreationMemorySize );
308 fGPUSharedDataReq = 0;
310 for ( int rowIndex = fFirstRow; rowIndex <= fLastRow; ++rowIndex ) {
311 AliHLTTPCCARow &row = fRows[rowIndex];
312 row.fNHits = NumberOfClustersInRow[rowIndex];
313 assert( row.fNHits < ( 1 << sizeof( unsigned short ) * 8 ) );
314 row.fHitNumberOffset = hitOffset;
315 hitOffset += NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(NumberOfClustersInRow[rowIndex]);
317 row.fFirstHitInBinOffset = gridContentOffset;
319 CreateGrid( &row, YZData, RowOffset[rowIndex] );
320 const AliHLTTPCCAGrid &grid = row.fGrid;
321 const int numberOfBins = grid.N();
323 int binCreationMemorySizeNew;
324 if ( ( binCreationMemorySizeNew = numberOfBins * 2 + 6 + row.fNHits + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(unsigned short) * numberOfRows + 1) > binCreationMemorySize ) {
325 binCreationMemorySize = binCreationMemorySizeNew;
326 binCreationMemory.Resize( binCreationMemorySize );
329 AliHLTArray<unsigned short> c = binCreationMemory; // number of hits in all previous bins
330 AliHLTArray<unsigned short> bins = c + ( numberOfBins + 3 ); // cache for the bin index for every hit in this row
331 AliHLTArray<unsigned short> filled = bins + row.fNHits; // counts how many hits there are per bin
333 for ( unsigned int bin = 0; bin < row.fGrid.N() + 3; ++bin ) {
334 filled[bin] = 0; // initialize filled[] to 0
337 for ( int hitIndex = 0; hitIndex < row.fNHits; ++hitIndex ) {
338 const int globalHitIndex = RowOffset[rowIndex] + hitIndex;
339 const unsigned short bin = row.fGrid.GetBin( YZData[globalHitIndex].x, YZData[globalHitIndex].y );
341 bins[hitIndex] = bin;
345 unsigned short n = 0;
346 for ( int bin = 0; bin < numberOfBins + 3; ++bin ) {
351 for ( int hitIndex = 0; hitIndex < row.fNHits; ++hitIndex ) {
352 const unsigned short bin = bins[hitIndex];
354 const unsigned short ind = c[bin] + filled[bin]; // generate an index for this hit that is >= c[bin] and < c[bin + 1]
355 const int globalBinsortedIndex = row.fHitNumberOffset + ind;
356 const int globalHitIndex = RowOffset[rowIndex] + hitIndex;
358 // allows to find the global hit index / coordinates from a global bin sorted hit index
359 fClusterDataIndex[globalBinsortedIndex] = tmpHitIndex[globalHitIndex];
360 binSortedHits[ind].SetY( YZData[globalHitIndex].x );
361 binSortedHits[ind].SetZ( YZData[globalHitIndex].y );
364 PackHitData( &row, binSortedHits );
366 for ( int i = 0; i < numberOfBins; ++i ) {
367 fFirstHitInBin[row.fFirstHitInBinOffset + i] = c[i]; // global bin-sorted hit index
369 const unsigned short a = c[numberOfBins];
370 // grid.N is <= row.fNHits
371 const int nn = numberOfBins + grid.Ny() + 3;
372 for ( int i = numberOfBins; i < nn; ++i ) {
373 assert( (signed) row.fFirstHitInBinOffset + i < 23 * numberOfRows + 4 * fNumberOfHits + 3 );
374 fFirstHitInBin[row.fFirstHitInBinOffset + i] = a;
378 gridContentOffset += nn;
380 if (NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.fNHits) + nn > (unsigned) fGPUSharedDataReq)
381 fGPUSharedDataReq = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.fNHits) + nn;
383 //Make pointer aligned
384 gridContentOffset = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(gridContentOffset);
388 delete[] tmpHitIndex;
391 //SG cell finder - test code
393 if ( fTmpHitInputIDs ) delete[] fTmpHitInputIDs;
394 fTmpHitInputIDs = new int [NHits];
395 const float areaY = .5;
396 const float areaZ = .5;
397 int newRowNHitsTotal = 0;
398 bool *usedHits = new bool [NHits];
399 for ( int iHit = 0; iHit < NHits; iHit++ ) usedHits[iHit] = 0;
400 for ( int iRow = 0; iRow < fParam.NRows(); iRow++ ) {
401 rowHeaders[iRow*2 ] = newRowNHitsTotal; // new first hit
402 rowHeaders[iRow*2+1] = 0; // new N hits
404 int oldRowFirstHit = RowFirstHit[iRow];
405 int oldRowLastHit = oldRowFirstHit + RowNHits[iRow];
406 for ( int iHit = oldRowFirstHit; iHit < oldRowLastHit; iHit++ ) {
407 if ( usedHits[iHit] ) continue;
416 if ( 0 ) for ( int jHit = iHit + 1; jHit < oldRowLastHit; jHit++ ) {//SG!!!
417 //if( usedHits[jHit] ) continue;
418 float dy = Y[jHit] - y0;
419 float dz = Z[jHit] - z0;
420 if ( CAMath::Abs( dy ) < areaY && CAMath::Abs( dz ) < areaZ ) {
428 int id = newRowNHitsTotal + newRowNHits;
429 hitsXYZ[id*3+0 ] = cx / nclu;
430 hitsXYZ[id*3+1 ] = cy / nclu;
431 hitsXYZ[id*3+2 ] = cz / nclu;
432 fTmpHitInputIDs[id] = iHit;
435 rowHeaders[iRow*2+1] = newRowNHits;
436 newRowNHitsTotal += newRowNHits;
438 NHitsTotal() = newRowNHitsTotal;
439 reinterpret_cast<int*>( fInputEvent )[1+fParam.NRows()*2] = newRowNHitsTotal;
445 void AliHLTTPCCASliceData::ClearHitWeights()
449 #ifdef ENABLE_VECTORIZATION
450 const int_v v0( Zero );
451 const int *const end = fHitWeights + fNumberOfHits;
452 for ( int *mem = fHitWeights; mem < end; mem += v0.Size ) {
456 for ( int i = 0; i < fNumberOfHitsPlusAlign; ++i ) {
462 void AliHLTTPCCASliceData::ClearLinks()
466 #ifdef ENABLE_VECTORIZATION
467 const short_v v0( -1 );
468 const short *const end1 = fLinkUpData + fNumberOfHits;
469 for ( short *mem = fLinkUpData; mem < end; mem += v0.Size ) {
472 const short *const end2 = fLinkDownData + fNumberOfHits;
473 for ( short *mem = fLinkDownData; mem < end; mem += v0.Size ) {
477 for ( int i = 0; i < fNumberOfHits; ++i ) {
480 for ( int i = 0; i < fNumberOfHits; ++i ) {
481 fLinkDownData[i] = -1;