]> git.uio.no Git - u/mrichter/AliRoot.git/blame - HLT/TPCLib/tracking-ca/AliHLTTPCCASliceData.cxx
Merge branch 'master' of https://git.cern.ch/reps/AliRoot
[u/mrichter/AliRoot.git] / HLT / TPCLib / tracking-ca / AliHLTTPCCASliceData.cxx
CommitLineData
6de2bc40 1// **************************************************************************
2// * This file is property of and copyright by the ALICE HLT Project *
3// * All rights reserved. *
4// * *
5// * Primary Authors: *
6// * Copyright 2009 Matthias Kretz <kretz@kde.org> *
7// * *
8// * Permission to use, copy, modify and distribute this software and its *
9// * documentation strictly for non-commercial purposes is hereby granted *
10// * without fee, provided that the above copyright notice appears in all *
11// * copies and that both the copyright notice and this permission notice *
12// * appear in the supporting documentation. The authors make no claims *
13// * about the suitability of this software for any purpose. It is *
14// * provided "as is" without express or implied warranty. *
15// **************************************************************************
4acc2401 16
17#include "AliHLTTPCCASliceData.h"
18#include "AliHLTTPCCAClusterData.h"
19#include "AliHLTTPCCAMath.h"
20#include "AliHLTArray.h"
21#include "AliHLTTPCCAHit.h"
22#include "AliHLTTPCCAParam.h"
b22af1bf 23#include "AliHLTTPCCAGPUConfig.h"
24#include "AliHLTTPCCAGPUTracker.h"
1e63725a 25#include "MemoryAssignmentHelpers.h"
4acc2401 26#include <iostream>
e01a1f52 27#include <string.h>
4acc2401 28
29// calculates an approximation for 1/sqrt(x)
30// Google for 0x5f3759df :)
31static inline float fastInvSqrt( float _x )
32{
6de2bc40 33 // the function calculates fast inverse sqrt
34
4acc2401 35 union { float f; int i; } x = { _x };
36 const float xhalf = 0.5f * x.f;
37 x.i = 0x5f3759df - ( x.i >> 1 );
38 x.f = x.f * ( 1.5f - xhalf * x.f * x.f );
39 return x.f;
40}
41
e01a1f52 42inline void AliHLTTPCCASliceData::CreateGrid( AliHLTTPCCARow *row, const float2* data, int ClusterDataHitNumberOffset )
4acc2401 43{
6de2bc40 44 // grid creation
45
4acc2401 46 if ( row->NHits() <= 0 ) { // no hits or invalid data
47 // grid coordinates don't matter, since there are no hits
48 row->fGrid.CreateEmpty();
49 return;
50 }
51
52 float yMin = 1.e3f;
53 float yMax = -1.e3f;
54 float zMin = 1.e3f;
55 float zMax = -1.e3f;
b22af1bf 56 for ( int i = ClusterDataHitNumberOffset; i < ClusterDataHitNumberOffset + row->fNHits; ++i ) {
e01a1f52 57 const float y = data[i].x;
58 const float z = data[i].y;
4acc2401 59 if ( yMax < y ) yMax = y;
60 if ( yMin > y ) yMin = y;
61 if ( zMax < z ) zMax = z;
62 if ( zMin > z ) zMin = z;
63 }
64
65 const float norm = fastInvSqrt( row->fNHits );
66 row->fGrid.Create( yMin, yMax, zMin, zMax,
67 CAMath::Max( ( yMax - yMin ) * norm, 2.f ),
68 CAMath::Max( ( zMax - zMin ) * norm, 2.f ) );
69}
70
444e5682 71inline void AliHLTTPCCASliceData::PackHitData( AliHLTTPCCARow* const row, const AliHLTArray<AliHLTTPCCAHit> &binSortedHits )
4acc2401 72{
6de2bc40 73 // hit data packing
74
4acc2401 75 static const float shortPackingConstant = 1.f / 65535.f;
76 const float y0 = row->fGrid.YMin();
77 const float z0 = row->fGrid.ZMin();
78 const float stepY = ( row->fGrid.YMax() - y0 ) * shortPackingConstant;
79 const float stepZ = ( row->fGrid.ZMax() - z0 ) * shortPackingConstant;
80 const float stepYi = 1.f / stepY;
81 const float stepZi = 1.f / stepZ;
82
83 row->fHy0 = y0;
84 row->fHz0 = z0;
85 row->fHstepY = stepY;
86 row->fHstepZ = stepZ;
87 row->fHstepYi = stepYi;
88 row->fHstepZi = stepZi;
89
90 for ( int hitIndex = 0; hitIndex < row->fNHits; ++hitIndex ) {
91 // bin sorted index!
92 const int globalHitIndex = row->fHitNumberOffset + hitIndex;
e01a1f52 93 const AliHLTTPCCAHit &hh = binSortedHits[hitIndex];
4acc2401 94 const float xx = ( ( hh.Y() - y0 ) * stepYi ) + .5 ;
95 const float yy = ( ( hh.Z() - z0 ) * stepZi ) + .5 ;
96 if ( xx < 0 || yy < 0 || xx >= 65536 || yy >= 65536 ) {
97 std::cout << "!!!! hit packing error!!! " << xx << " " << yy << " " << std::endl;
98 }
99 // HitData is bin sorted
e01a1f52 100 fHitData[globalHitIndex].x = (unsigned short) xx;
101 fHitData[globalHitIndex].y = (unsigned short) yy;
4acc2401 102 }
103}
104
105void AliHLTTPCCASliceData::Clear()
106{
107 fNumberOfHits = 0;
108}
109
110void AliHLTTPCCASliceData::InitializeRows( const AliHLTTPCCAParam &p )
111{
6de2bc40 112 // initialisation of rows
b22af1bf 113 if (!fRows) fRows = new AliHLTTPCCARow[HLTCA_ROW_COUNT + 1];
4acc2401 114 for ( int i = 0; i < p.NRows(); ++i ) {
115 fRows[i].fX = p.RowX( i );
116 fRows[i].fMaxY = CAMath::Tan( p.DAlpha() / 2. ) * fRows[i].fX;
117 }
118}
119
b22af1bf 120#ifndef HLTCA_GPUCODE
121 AliHLTTPCCASliceData::~AliHLTTPCCASliceData()
122 {
123 //Standard Destrcutor
124 if (fRows)
125 {
126 if (!fIsGpuSliceData) delete[] fRows;
127 fRows = NULL;
128 }
129 if (fMemory)
130 {
131 if (!fIsGpuSliceData) delete[] fMemory;
132 fMemory = NULL;
133 }
134
135 }
136#endif
137
138GPUh() void AliHLTTPCCASliceData::SetGPUSliceDataMemory(void* const pSliceMemory, void* const pRowMemory)
4acc2401 139{
b22af1bf 140 //Set Pointer to slice data memory to external memory
141 fMemory = (char*) pSliceMemory;
142 fRows = (AliHLTTPCCARow*) pRowMemory;
7be9b0d7 143}
4acc2401 144
7be9b0d7 145size_t AliHLTTPCCASliceData::SetPointers(const AliHLTTPCCAClusterData *data, bool allocate)
146{
e01a1f52 147 //Set slice data internal pointers
148
149 int hitMemCount = HLTCA_ROW_COUNT * (sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v) - 1) + data->NumberOfClusters();
150 //Calculate Memory needed to store hits in rows
b22af1bf 151
a59a784e 152 const unsigned int kVectorAlignment = 256 /*sizeof( uint4 )*/ ;
153 fNumberOfHitsPlusAlign = NextMultipleOf < ( kVectorAlignment > sizeof(HLTCA_GPU_ROWALIGNMENT) ? kVectorAlignment : sizeof(HLTCA_GPU_ROWALIGNMENT)) / sizeof( int ) > ( hitMemCount );
b22af1bf 154 fNumberOfHits = data->NumberOfClusters();
e01a1f52 155 const int firstHitInBinSize = (23 + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(int)) * HLTCA_ROW_COUNT + 4 * fNumberOfHits + 3;
156 //FIXME: sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(int) * HLTCA_ROW_COUNT is way to big and only to ensure to reserve enough memory for GPU Alignment.
b22af1bf 157 //Might be replaced by correct value
158
4acc2401 159 const int memorySize =
160 // LinkData, HitData
b22af1bf 161 fNumberOfHitsPlusAlign * 4 * sizeof( short ) +
4acc2401 162 // FirstHitInBin
b22af1bf 163 NextMultipleOf<kVectorAlignment>( ( firstHitInBinSize ) * sizeof( int ) ) +
4acc2401 164 // HitWeights, ClusterDataIndex
b22af1bf 165 fNumberOfHitsPlusAlign * 2 * sizeof( int );
4acc2401 166
9a3194d4 167 if ( 1 )// fMemorySize < memorySize ) { // release the memory on CPU
168 {
169 fMemorySize = memorySize + 4;
170 if (allocate)
7be9b0d7 171 {
9a3194d4 172 if (!fIsGpuSliceData)
173 {
174 if (fMemory)
175 {
176 delete[] fMemory;
177 }
178 fMemory = new char[fMemorySize];// kVectorAlignment];
179 }
180 else
b22af1bf 181 {
9a3194d4 182 if (fMemorySize > HLTCA_GPU_SLICE_DATA_MEMORY)
183 {
184 return(0);
185 }
b22af1bf 186 }
7be9b0d7 187 }
4acc2401 188 }
189
190 char *mem = fMemory;
b22af1bf 191 AssignMemory( fLinkUpData, mem, fNumberOfHitsPlusAlign );
192 AssignMemory( fLinkDownData, mem, fNumberOfHitsPlusAlign );
193 AssignMemory( fHitData, mem, fNumberOfHitsPlusAlign );
194 AssignMemory( fFirstHitInBin, mem, firstHitInBinSize );
195 fGpuMemorySize = mem - fMemory;
196
197 //Memory Allocated below will not be copied to GPU but instead be initialized on the gpu itself. Therefore it must not be copied to GPU!
198 AssignMemory( fHitWeights, mem, fNumberOfHitsPlusAlign );
199 AssignMemory( fClusterDataIndex, mem, fNumberOfHitsPlusAlign );
7be9b0d7 200 return(mem - fMemory);
201}
202
203void AliHLTTPCCASliceData::InitFromClusterData( const AliHLTTPCCAClusterData &data )
204{
205 // initialisation from cluster data
206
207 ////////////////////////////////////
e01a1f52 208 // 0. sort rows
7be9b0d7 209 ////////////////////////////////////
210
7be9b0d7 211 fNumberOfHits = data.NumberOfClusters();
212
e01a1f52 213 float2* YZData = new float2[fNumberOfHits];
214 int* tmpHitIndex = new int[fNumberOfHits];
215
216 int RowOffset[HLTCA_ROW_COUNT];
217 int NumberOfClustersInRow[HLTCA_ROW_COUNT];
218 memset(NumberOfClustersInRow, 0, HLTCA_ROW_COUNT * sizeof(int));
219 fFirstRow = HLTCA_ROW_COUNT;
220 fLastRow = 0;
221
222 for (int i = 0;i < fNumberOfHits;i++)
223 {
224 const int tmpRow = data.RowNumber(i);
225 NumberOfClustersInRow[tmpRow]++;
226 if (tmpRow > fLastRow) fLastRow = tmpRow;
227 if (tmpRow < fFirstRow) fFirstRow = tmpRow;
228 }
229 int tmpOffset = 0;
230 for (int i = fFirstRow;i <= fLastRow;i++)
231 {
232 RowOffset[i] = tmpOffset;
233 tmpOffset += NumberOfClustersInRow[i];
234 }
235 {
236 int RowsFilled[HLTCA_ROW_COUNT];
237 memset(RowsFilled, 0, HLTCA_ROW_COUNT * sizeof(int));
238 for (int i = 0;i < fNumberOfHits;i++)
239 {
240 float2 tmp;
241 tmp.x = data.Y(i);
242 tmp.y = data.Z(i);
243 int tmpRow = data.RowNumber(i);
244 int newIndex = RowOffset[tmpRow] + (RowsFilled[tmpRow])++;
245 YZData[newIndex] = tmp;
246 tmpHitIndex[newIndex] = i;
247 }
248 }
249 if (fFirstRow == HLTCA_ROW_COUNT) fFirstRow = 0;
250
251 ////////////////////////////////////
252 // 1. prepare arrays
253 ////////////////////////////////////
254
255 const int numberOfRows = fLastRow - fFirstRow + 1;
256
257 if (SetPointers(&data, true) == 0)
258 {
259 delete[] YZData;
260 delete[] tmpHitIndex;
261 return;
7be9b0d7 262 }
4acc2401 263
264 ////////////////////////////////////
265 // 2. fill HitData and FirstHitInBin
266 ////////////////////////////////////
b8139972 267
e01a1f52 268 for ( int rowIndex = 0; rowIndex < fFirstRow; ++rowIndex ) {
4acc2401 269 AliHLTTPCCARow &row = fRows[rowIndex];
270 row.fGrid.CreateEmpty();
271 row.fNHits = 0;
272 row.fFullSize = 0;
273 row.fHitNumberOffset = 0;
274 row.fFirstHitInBinOffset = 0;
275
276 row.fHy0 = 0.f;
277 row.fHz0 = 0.f;
278 row.fHstepY = 1.f;
279 row.fHstepZ = 1.f;
280 row.fHstepYi = 1.f;
281 row.fHstepZi = 1.f;
282 }
e01a1f52 283 for ( int rowIndex = fLastRow + 1; rowIndex < HLTCA_ROW_COUNT + 1; ++rowIndex ) {
4acc2401 284 AliHLTTPCCARow &row = fRows[rowIndex];
285 row.fGrid.CreateEmpty();
286 row.fNHits = 0;
287 row.fFullSize = 0;
288 row.fHitNumberOffset = 0;
289 row.fFirstHitInBinOffset = 0;
290
291 row.fHy0 = 0.f;
292 row.fHz0 = 0.f;
293 row.fHstepY = 1.f;
294 row.fHstepZ = 1.f;
295 row.fHstepYi = 1.f;
296 row.fHstepZi = 1.f;
297 }
298
6de2bc40 299
e01a1f52 300 AliHLTResizableArray<AliHLTTPCCAHit> binSortedHits( fNumberOfHits + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v));
4acc2401 301
302 int gridContentOffset = 0;
b22af1bf 303 int hitOffset = 0;
4acc2401 304
305 int binCreationMemorySize = 103 * 2 + fNumberOfHits;
306 AliHLTResizableArray<unsigned short> binCreationMemory( binCreationMemorySize );
307
b22af1bf 308 fGPUSharedDataReq = 0;
309
e01a1f52 310 for ( int rowIndex = fFirstRow; rowIndex <= fLastRow; ++rowIndex ) {
4acc2401 311 AliHLTTPCCARow &row = fRows[rowIndex];
e01a1f52 312 row.fNHits = NumberOfClustersInRow[rowIndex];
4acc2401 313 assert( row.fNHits < ( 1 << sizeof( unsigned short ) * 8 ) );
b22af1bf 314 row.fHitNumberOffset = hitOffset;
e01a1f52 315 hitOffset += NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(NumberOfClustersInRow[rowIndex]);
b22af1bf 316
4acc2401 317 row.fFirstHitInBinOffset = gridContentOffset;
318
e01a1f52 319 CreateGrid( &row, YZData, RowOffset[rowIndex] );
4acc2401 320 const AliHLTTPCCAGrid &grid = row.fGrid;
321 const int numberOfBins = grid.N();
322
323 int binCreationMemorySizeNew;
b22af1bf 324 if ( ( binCreationMemorySizeNew = numberOfBins * 2 + 6 + row.fNHits + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(unsigned short) * numberOfRows + 1) > binCreationMemorySize ) {
4acc2401 325 binCreationMemorySize = binCreationMemorySizeNew;
326 binCreationMemory.Resize( binCreationMemorySize );
327 }
b8139972 328
4acc2401 329 AliHLTArray<unsigned short> c = binCreationMemory; // number of hits in all previous bins
330 AliHLTArray<unsigned short> bins = c + ( numberOfBins + 3 ); // cache for the bin index for every hit in this row
331 AliHLTArray<unsigned short> filled = bins + row.fNHits; // counts how many hits there are per bin
332
333 for ( unsigned int bin = 0; bin < row.fGrid.N() + 3; ++bin ) {
334 filled[bin] = 0; // initialize filled[] to 0
335 }
b8139972 336
4acc2401 337 for ( int hitIndex = 0; hitIndex < row.fNHits; ++hitIndex ) {
e01a1f52 338 const int globalHitIndex = RowOffset[rowIndex] + hitIndex;
339 const unsigned short bin = row.fGrid.GetBin( YZData[globalHitIndex].x, YZData[globalHitIndex].y );
b22af1bf 340
4acc2401 341 bins[hitIndex] = bin;
342 ++filled[bin];
343 }
344
345 unsigned short n = 0;
346 for ( int bin = 0; bin < numberOfBins + 3; ++bin ) {
347 c[bin] = n;
348 n += filled[bin];
349 }
350
351 for ( int hitIndex = 0; hitIndex < row.fNHits; ++hitIndex ) {
352 const unsigned short bin = bins[hitIndex];
353 --filled[bin];
354 const unsigned short ind = c[bin] + filled[bin]; // generate an index for this hit that is >= c[bin] and < c[bin + 1]
355 const int globalBinsortedIndex = row.fHitNumberOffset + ind;
e01a1f52 356 const int globalHitIndex = RowOffset[rowIndex] + hitIndex;
4acc2401 357
358 // allows to find the global hit index / coordinates from a global bin sorted hit index
e01a1f52 359 fClusterDataIndex[globalBinsortedIndex] = tmpHitIndex[globalHitIndex];
360 binSortedHits[ind].SetY( YZData[globalHitIndex].x );
361 binSortedHits[ind].SetZ( YZData[globalHitIndex].y );
4acc2401 362 }
363
364 PackHitData( &row, binSortedHits );
365
366 for ( int i = 0; i < numberOfBins; ++i ) {
367 fFirstHitInBin[row.fFirstHitInBinOffset + i] = c[i]; // global bin-sorted hit index
368 }
369 const unsigned short a = c[numberOfBins];
370 // grid.N is <= row.fNHits
b8139972 371 const int nn = numberOfBins + grid.Ny() + 3;
4acc2401 372 for ( int i = numberOfBins; i < nn; ++i ) {
b22af1bf 373 assert( (signed) row.fFirstHitInBinOffset + i < 23 * numberOfRows + 4 * fNumberOfHits + 3 );
4acc2401 374 fFirstHitInBin[row.fFirstHitInBinOffset + i] = a;
375 }
376
377 row.fFullSize = nn;
378 gridContentOffset += nn;
b22af1bf 379
380 if (NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.fNHits) + nn > (unsigned) fGPUSharedDataReq)
381 fGPUSharedDataReq = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.fNHits) + nn;
382
383 //Make pointer aligned
384 gridContentOffset = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(gridContentOffset);
4acc2401 385 }
386
e01a1f52 387 delete[] YZData;
388 delete[] tmpHitIndex;
389
4acc2401 390#if 0
391 //SG cell finder - test code
392
393 if ( fTmpHitInputIDs ) delete[] fTmpHitInputIDs;
394 fTmpHitInputIDs = new int [NHits];
395 const float areaY = .5;
396 const float areaZ = .5;
397 int newRowNHitsTotal = 0;
398 bool *usedHits = new bool [NHits];
399 for ( int iHit = 0; iHit < NHits; iHit++ ) usedHits[iHit] = 0;
400 for ( int iRow = 0; iRow < fParam.NRows(); iRow++ ) {
401 rowHeaders[iRow*2 ] = newRowNHitsTotal; // new first hit
402 rowHeaders[iRow*2+1] = 0; // new N hits
403 int newRowNHits = 0;
404 int oldRowFirstHit = RowFirstHit[iRow];
405 int oldRowLastHit = oldRowFirstHit + RowNHits[iRow];
406 for ( int iHit = oldRowFirstHit; iHit < oldRowLastHit; iHit++ ) {
407 if ( usedHits[iHit] ) continue;
408 float x0 = X[iHit];
409 float y0 = Y[iHit];
410 float z0 = Z[iHit];
411 float cx = x0;
412 float cy = y0;
413 float cz = z0;
414 int nclu = 1;
415 usedHits[iHit] = 1;
416 if ( 0 ) for ( int jHit = iHit + 1; jHit < oldRowLastHit; jHit++ ) {//SG!!!
417 //if( usedHits[jHit] ) continue;
418 float dy = Y[jHit] - y0;
419 float dz = Z[jHit] - z0;
420 if ( CAMath::Abs( dy ) < areaY && CAMath::Abs( dz ) < areaZ ) {
421 cx += X[jHit];
422 cy += Y[jHit];
423 cz += Z[jHit];
424 nclu++;
425 usedHits[jHit] = 1;
426 }
427 }
428 int id = newRowNHitsTotal + newRowNHits;
429 hitsXYZ[id*3+0 ] = cx / nclu;
430 hitsXYZ[id*3+1 ] = cy / nclu;
431 hitsXYZ[id*3+2 ] = cz / nclu;
432 fTmpHitInputIDs[id] = iHit;
433 newRowNHits++;
434 }
435 rowHeaders[iRow*2+1] = newRowNHits;
436 newRowNHitsTotal += newRowNHits;
437 }
438 NHitsTotal() = newRowNHitsTotal;
439 reinterpret_cast<int*>( fInputEvent )[1+fParam.NRows()*2] = newRowNHitsTotal;
440
441 delete[] usedHits;
442#endif
443}
444
445void AliHLTTPCCASliceData::ClearHitWeights()
446{
6de2bc40 447 // clear hit weights
448
4acc2401 449#ifdef ENABLE_VECTORIZATION
450 const int_v v0( Zero );
451 const int *const end = fHitWeights + fNumberOfHits;
452 for ( int *mem = fHitWeights; mem < end; mem += v0.Size ) {
453 v0.store( mem );
454 }
455#else
b22af1bf 456 for ( int i = 0; i < fNumberOfHitsPlusAlign; ++i ) {
4acc2401 457 fHitWeights[i] = 0;
458 }
459#endif
460}
461
462void AliHLTTPCCASliceData::ClearLinks()
463{
6de2bc40 464 // link cleaning
465
4acc2401 466#ifdef ENABLE_VECTORIZATION
467 const short_v v0( -1 );
468 const short *const end1 = fLinkUpData + fNumberOfHits;
469 for ( short *mem = fLinkUpData; mem < end; mem += v0.Size ) {
470 v0.store( mem );
471 }
472 const short *const end2 = fLinkDownData + fNumberOfHits;
473 for ( short *mem = fLinkDownData; mem < end; mem += v0.Size ) {
474 v0.store( mem );
475 }
476#else
477 for ( int i = 0; i < fNumberOfHits; ++i ) {
478 fLinkUpData[i] = -1;
479 }
480 for ( int i = 0; i < fNumberOfHits; ++i ) {
481 fLinkDownData[i] = -1;
482 }
483#endif
484}
485