]> git.uio.no Git - u/mrichter/AliRoot.git/blame - HLT/TPCLib/tracking-ca/AliHLTTPCCASliceData.cxx
fix of segm. violation in the GPU tracker (by D.Rohr)
[u/mrichter/AliRoot.git] / HLT / TPCLib / tracking-ca / AliHLTTPCCASliceData.cxx
CommitLineData
6de2bc40 1// **************************************************************************
2// * This file is property of and copyright by the ALICE HLT Project *
3// * All rights reserved. *
4// * *
5// * Primary Authors: *
6// * Copyright 2009 Matthias Kretz <kretz@kde.org> *
7// * *
8// * Permission to use, copy, modify and distribute this software and its *
9// * documentation strictly for non-commercial purposes is hereby granted *
10// * without fee, provided that the above copyright notice appears in all *
11// * copies and that both the copyright notice and this permission notice *
12// * appear in the supporting documentation. The authors make no claims *
13// * about the suitability of this software for any purpose. It is *
14// * provided "as is" without express or implied warranty. *
15// **************************************************************************
4acc2401 16
17#include "AliHLTTPCCASliceData.h"
18#include "AliHLTTPCCAClusterData.h"
19#include "AliHLTTPCCAMath.h"
20#include "AliHLTArray.h"
21#include "AliHLTTPCCAHit.h"
22#include "AliHLTTPCCAParam.h"
b22af1bf 23#include "AliHLTTPCCAGPUConfig.h"
24#include "AliHLTTPCCAGPUTracker.h"
1e63725a 25#include "MemoryAssignmentHelpers.h"
4acc2401 26#include <iostream>
27
28// calculates an approximation for 1/sqrt(x)
29// Google for 0x5f3759df :)
30static inline float fastInvSqrt( float _x )
31{
6de2bc40 32 // the function calculates fast inverse sqrt
33
4acc2401 34 union { float f; int i; } x = { _x };
35 const float xhalf = 0.5f * x.f;
36 x.i = 0x5f3759df - ( x.i >> 1 );
37 x.f = x.f * ( 1.5f - xhalf * x.f * x.f );
38 return x.f;
39}
40
b22af1bf 41inline void AliHLTTPCCASliceData::CreateGrid( AliHLTTPCCARow *row, const AliHLTTPCCAClusterData &data, int ClusterDataHitNumberOffset )
4acc2401 42{
6de2bc40 43 // grid creation
44
4acc2401 45 if ( row->NHits() <= 0 ) { // no hits or invalid data
46 // grid coordinates don't matter, since there are no hits
47 row->fGrid.CreateEmpty();
48 return;
49 }
50
51 float yMin = 1.e3f;
52 float yMax = -1.e3f;
53 float zMin = 1.e3f;
54 float zMax = -1.e3f;
b22af1bf 55 for ( int i = ClusterDataHitNumberOffset; i < ClusterDataHitNumberOffset + row->fNHits; ++i ) {
4acc2401 56 const float y = data.Y( i );
57 const float z = data.Z( i );
58 if ( yMax < y ) yMax = y;
59 if ( yMin > y ) yMin = y;
60 if ( zMax < z ) zMax = z;
61 if ( zMin > z ) zMin = z;
62 }
63
64 const float norm = fastInvSqrt( row->fNHits );
65 row->fGrid.Create( yMin, yMax, zMin, zMax,
66 CAMath::Max( ( yMax - yMin ) * norm, 2.f ),
67 CAMath::Max( ( zMax - zMin ) * norm, 2.f ) );
68}
69
444e5682 70inline void AliHLTTPCCASliceData::PackHitData( AliHLTTPCCARow* const row, const AliHLTArray<AliHLTTPCCAHit> &binSortedHits )
4acc2401 71{
6de2bc40 72 // hit data packing
73
4acc2401 74 static const float shortPackingConstant = 1.f / 65535.f;
75 const float y0 = row->fGrid.YMin();
76 const float z0 = row->fGrid.ZMin();
77 const float stepY = ( row->fGrid.YMax() - y0 ) * shortPackingConstant;
78 const float stepZ = ( row->fGrid.ZMax() - z0 ) * shortPackingConstant;
79 const float stepYi = 1.f / stepY;
80 const float stepZi = 1.f / stepZ;
81
82 row->fHy0 = y0;
83 row->fHz0 = z0;
84 row->fHstepY = stepY;
85 row->fHstepZ = stepZ;
86 row->fHstepYi = stepYi;
87 row->fHstepZi = stepZi;
88
89 for ( int hitIndex = 0; hitIndex < row->fNHits; ++hitIndex ) {
90 // bin sorted index!
91 const int globalHitIndex = row->fHitNumberOffset + hitIndex;
92 const AliHLTTPCCAHit &hh = binSortedHits[globalHitIndex];
93 const float xx = ( ( hh.Y() - y0 ) * stepYi ) + .5 ;
94 const float yy = ( ( hh.Z() - z0 ) * stepZi ) + .5 ;
95 if ( xx < 0 || yy < 0 || xx >= 65536 || yy >= 65536 ) {
96 std::cout << "!!!! hit packing error!!! " << xx << " " << yy << " " << std::endl;
97 }
98 // HitData is bin sorted
78c167cd 99 fHitData[row->fHitNumberOffset + hitIndex].x = (unsigned short) xx;
100 fHitData[row->fHitNumberOffset + hitIndex].y = (unsigned short) yy;
4acc2401 101 }
102}
103
104void AliHLTTPCCASliceData::Clear()
105{
106 fNumberOfHits = 0;
107}
108
109void AliHLTTPCCASliceData::InitializeRows( const AliHLTTPCCAParam &p )
110{
6de2bc40 111 // initialisation of rows
b22af1bf 112 if (!fRows) fRows = new AliHLTTPCCARow[HLTCA_ROW_COUNT + 1];
4acc2401 113 for ( int i = 0; i < p.NRows(); ++i ) {
114 fRows[i].fX = p.RowX( i );
115 fRows[i].fMaxY = CAMath::Tan( p.DAlpha() / 2. ) * fRows[i].fX;
116 }
117}
118
b22af1bf 119#ifndef HLTCA_GPUCODE
120 AliHLTTPCCASliceData::~AliHLTTPCCASliceData()
121 {
122 //Standard Destrcutor
123 if (fRows)
124 {
125 if (!fIsGpuSliceData) delete[] fRows;
126 fRows = NULL;
127 }
128 if (fMemory)
129 {
130 if (!fIsGpuSliceData) delete[] fMemory;
131 fMemory = NULL;
132 }
133
134 }
135#endif
136
137GPUh() void AliHLTTPCCASliceData::SetGPUSliceDataMemory(void* const pSliceMemory, void* const pRowMemory)
4acc2401 138{
b22af1bf 139 //Set Pointer to slice data memory to external memory
140 fMemory = (char*) pSliceMemory;
141 fRows = (AliHLTTPCCARow*) pRowMemory;
7be9b0d7 142}
4acc2401 143
7be9b0d7 144size_t AliHLTTPCCASliceData::SetPointers(const AliHLTTPCCAClusterData *data, bool allocate)
145{
b22af1bf 146 //Set slice data internal pointers
147 int hitMemCount = 0;
148 for ( int rowIndex = data->FirstRow(); rowIndex <= data->LastRow(); ++rowIndex )
149 {
150 hitMemCount += NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(data->NumberOfClusters( rowIndex ));
151 }
152 //Calculate Memory needed to store hits in rows
153
154 const int numberOfRows = data->LastRow() - data->FirstRow() + 1;
a59a784e 155 const unsigned int kVectorAlignment = 256 /*sizeof( uint4 )*/ ;
156 fNumberOfHitsPlusAlign = NextMultipleOf < ( kVectorAlignment > sizeof(HLTCA_GPU_ROWALIGNMENT) ? kVectorAlignment : sizeof(HLTCA_GPU_ROWALIGNMENT)) / sizeof( int ) > ( hitMemCount );
b22af1bf 157 fNumberOfHits = data->NumberOfClusters();
158 const int firstHitInBinSize = (23 + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(int)) * numberOfRows + 4 * fNumberOfHits + 3;
159 //FIXME: sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(int) * numberOfRows is way to big and only to ensure to reserve enough memory for GPU Alignment.
160 //Might be replaced by correct value
161
4acc2401 162 const int memorySize =
163 // LinkData, HitData
b22af1bf 164 fNumberOfHitsPlusAlign * 4 * sizeof( short ) +
4acc2401 165 // FirstHitInBin
b22af1bf 166 NextMultipleOf<kVectorAlignment>( ( firstHitInBinSize ) * sizeof( int ) ) +
4acc2401 167 // HitWeights, ClusterDataIndex
b22af1bf 168 fNumberOfHitsPlusAlign * 2 * sizeof( int );
4acc2401 169
9a3194d4 170 if ( 1 )// fMemorySize < memorySize ) { // release the memory on CPU
171 {
172 fMemorySize = memorySize + 4;
173 if (allocate)
7be9b0d7 174 {
9a3194d4 175 if (!fIsGpuSliceData)
176 {
177 if (fMemory)
178 {
179 delete[] fMemory;
180 }
181 fMemory = new char[fMemorySize];// kVectorAlignment];
182 }
183 else
b22af1bf 184 {
9a3194d4 185 if (fMemorySize > HLTCA_GPU_SLICE_DATA_MEMORY)
186 {
187 return(0);
188 }
b22af1bf 189 }
7be9b0d7 190 }
4acc2401 191 }
192
193 char *mem = fMemory;
b22af1bf 194 AssignMemory( fLinkUpData, mem, fNumberOfHitsPlusAlign );
195 AssignMemory( fLinkDownData, mem, fNumberOfHitsPlusAlign );
196 AssignMemory( fHitData, mem, fNumberOfHitsPlusAlign );
197 AssignMemory( fFirstHitInBin, mem, firstHitInBinSize );
198 fGpuMemorySize = mem - fMemory;
199
200 //Memory Allocated below will not be copied to GPU but instead be initialized on the gpu itself. Therefore it must not be copied to GPU!
201 AssignMemory( fHitWeights, mem, fNumberOfHitsPlusAlign );
202 AssignMemory( fClusterDataIndex, mem, fNumberOfHitsPlusAlign );
7be9b0d7 203 return(mem - fMemory);
204}
205
206void AliHLTTPCCASliceData::InitFromClusterData( const AliHLTTPCCAClusterData &data )
207{
208 // initialisation from cluster data
209
210 ////////////////////////////////////
211 // 1. prepare arrays
212 ////////////////////////////////////
213
b22af1bf 214 const int numberOfRows = data.LastRow() - data.FirstRow() + 1;
7be9b0d7 215 fNumberOfHits = data.NumberOfClusters();
216
217 /* TODO Vectorization
218 for ( int rowIndex = data.FirstRow(); rowIndex <= data.LastRow(); ++rowIndex ) {
219 int NumberOfClusters( int rowIndex ) const;
220 }
221 const int memorySize = fNumberOfHits * sizeof( short_v::Type )
222 */
9a3194d4 223 if (SetPointers(&data, true) == 0) return;
4acc2401 224
225 ////////////////////////////////////
226 // 2. fill HitData and FirstHitInBin
227 ////////////////////////////////////
b8139972 228
4acc2401 229 for ( int rowIndex = 0; rowIndex < data.FirstRow(); ++rowIndex ) {
230 AliHLTTPCCARow &row = fRows[rowIndex];
231 row.fGrid.CreateEmpty();
232 row.fNHits = 0;
233 row.fFullSize = 0;
234 row.fHitNumberOffset = 0;
235 row.fFirstHitInBinOffset = 0;
236
237 row.fHy0 = 0.f;
238 row.fHz0 = 0.f;
239 row.fHstepY = 1.f;
240 row.fHstepZ = 1.f;
241 row.fHstepYi = 1.f;
242 row.fHstepZi = 1.f;
243 }
b22af1bf 244 for ( int rowIndex = data.LastRow() + 1; rowIndex < HLTCA_ROW_COUNT + 1; ++rowIndex ) {
4acc2401 245 AliHLTTPCCARow &row = fRows[rowIndex];
246 row.fGrid.CreateEmpty();
247 row.fNHits = 0;
248 row.fFullSize = 0;
249 row.fHitNumberOffset = 0;
250 row.fFirstHitInBinOffset = 0;
251
252 row.fHy0 = 0.f;
253 row.fHz0 = 0.f;
254 row.fHstepY = 1.f;
255 row.fHstepZ = 1.f;
256 row.fHstepYi = 1.f;
257 row.fHstepZi = 1.f;
258 }
259
6de2bc40 260
b22af1bf 261 AliHLTResizableArray<AliHLTTPCCAHit> binSortedHits( fNumberOfHits + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v) * numberOfRows + 1 );
4acc2401 262
263 int gridContentOffset = 0;
b22af1bf 264 int hitOffset = 0;
4acc2401 265
266 int binCreationMemorySize = 103 * 2 + fNumberOfHits;
267 AliHLTResizableArray<unsigned short> binCreationMemory( binCreationMemorySize );
268
b22af1bf 269 fGPUSharedDataReq = 0;
270
4acc2401 271 for ( int rowIndex = data.FirstRow(); rowIndex <= data.LastRow(); ++rowIndex ) {
272 AliHLTTPCCARow &row = fRows[rowIndex];
4acc2401 273 row.fNHits = data.NumberOfClusters( rowIndex );
274 assert( row.fNHits < ( 1 << sizeof( unsigned short ) * 8 ) );
b22af1bf 275 row.fHitNumberOffset = hitOffset;
276 hitOffset += NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(data.NumberOfClusters( rowIndex ));
277
4acc2401 278 row.fFirstHitInBinOffset = gridContentOffset;
279
b22af1bf 280 CreateGrid( &row, data, data.RowOffset( rowIndex ) );
4acc2401 281 const AliHLTTPCCAGrid &grid = row.fGrid;
282 const int numberOfBins = grid.N();
283
284 int binCreationMemorySizeNew;
b22af1bf 285 if ( ( binCreationMemorySizeNew = numberOfBins * 2 + 6 + row.fNHits + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(unsigned short) * numberOfRows + 1) > binCreationMemorySize ) {
4acc2401 286 binCreationMemorySize = binCreationMemorySizeNew;
287 binCreationMemory.Resize( binCreationMemorySize );
288 }
b8139972 289
4acc2401 290 AliHLTArray<unsigned short> c = binCreationMemory; // number of hits in all previous bins
291 AliHLTArray<unsigned short> bins = c + ( numberOfBins + 3 ); // cache for the bin index for every hit in this row
292 AliHLTArray<unsigned short> filled = bins + row.fNHits; // counts how many hits there are per bin
293
294 for ( unsigned int bin = 0; bin < row.fGrid.N() + 3; ++bin ) {
295 filled[bin] = 0; // initialize filled[] to 0
296 }
b8139972 297
4acc2401 298 for ( int hitIndex = 0; hitIndex < row.fNHits; ++hitIndex ) {
b22af1bf 299 const int globalHitIndex = data.RowOffset( rowIndex ) + hitIndex;
4acc2401 300 const unsigned short bin = row.fGrid.GetBin( data.Y( globalHitIndex ), data.Z( globalHitIndex ) );
b22af1bf 301
4acc2401 302 bins[hitIndex] = bin;
303 ++filled[bin];
304 }
305
306 unsigned short n = 0;
307 for ( int bin = 0; bin < numberOfBins + 3; ++bin ) {
308 c[bin] = n;
309 n += filled[bin];
310 }
311
312 for ( int hitIndex = 0; hitIndex < row.fNHits; ++hitIndex ) {
313 const unsigned short bin = bins[hitIndex];
314 --filled[bin];
315 const unsigned short ind = c[bin] + filled[bin]; // generate an index for this hit that is >= c[bin] and < c[bin + 1]
316 const int globalBinsortedIndex = row.fHitNumberOffset + ind;
b22af1bf 317 const int globalHitIndex = data.RowOffset( rowIndex ) + hitIndex;
4acc2401 318
319 // allows to find the global hit index / coordinates from a global bin sorted hit index
320 fClusterDataIndex[globalBinsortedIndex] = globalHitIndex;
321 binSortedHits[globalBinsortedIndex].SetY( data.Y( globalHitIndex ) );
322 binSortedHits[globalBinsortedIndex].SetZ( data.Z( globalHitIndex ) );
323 }
324
325 PackHitData( &row, binSortedHits );
326
327 for ( int i = 0; i < numberOfBins; ++i ) {
328 fFirstHitInBin[row.fFirstHitInBinOffset + i] = c[i]; // global bin-sorted hit index
329 }
330 const unsigned short a = c[numberOfBins];
331 // grid.N is <= row.fNHits
b8139972 332 const int nn = numberOfBins + grid.Ny() + 3;
4acc2401 333 for ( int i = numberOfBins; i < nn; ++i ) {
b22af1bf 334 assert( (signed) row.fFirstHitInBinOffset + i < 23 * numberOfRows + 4 * fNumberOfHits + 3 );
4acc2401 335 fFirstHitInBin[row.fFirstHitInBinOffset + i] = a;
336 }
337
338 row.fFullSize = nn;
339 gridContentOffset += nn;
b22af1bf 340
341 if (NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.fNHits) + nn > (unsigned) fGPUSharedDataReq)
342 fGPUSharedDataReq = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.fNHits) + nn;
343
344 //Make pointer aligned
345 gridContentOffset = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(gridContentOffset);
4acc2401 346 }
347
348#if 0
349 //SG cell finder - test code
350
351 if ( fTmpHitInputIDs ) delete[] fTmpHitInputIDs;
352 fTmpHitInputIDs = new int [NHits];
353 const float areaY = .5;
354 const float areaZ = .5;
355 int newRowNHitsTotal = 0;
356 bool *usedHits = new bool [NHits];
357 for ( int iHit = 0; iHit < NHits; iHit++ ) usedHits[iHit] = 0;
358 for ( int iRow = 0; iRow < fParam.NRows(); iRow++ ) {
359 rowHeaders[iRow*2 ] = newRowNHitsTotal; // new first hit
360 rowHeaders[iRow*2+1] = 0; // new N hits
361 int newRowNHits = 0;
362 int oldRowFirstHit = RowFirstHit[iRow];
363 int oldRowLastHit = oldRowFirstHit + RowNHits[iRow];
364 for ( int iHit = oldRowFirstHit; iHit < oldRowLastHit; iHit++ ) {
365 if ( usedHits[iHit] ) continue;
366 float x0 = X[iHit];
367 float y0 = Y[iHit];
368 float z0 = Z[iHit];
369 float cx = x0;
370 float cy = y0;
371 float cz = z0;
372 int nclu = 1;
373 usedHits[iHit] = 1;
374 if ( 0 ) for ( int jHit = iHit + 1; jHit < oldRowLastHit; jHit++ ) {//SG!!!
375 //if( usedHits[jHit] ) continue;
376 float dy = Y[jHit] - y0;
377 float dz = Z[jHit] - z0;
378 if ( CAMath::Abs( dy ) < areaY && CAMath::Abs( dz ) < areaZ ) {
379 cx += X[jHit];
380 cy += Y[jHit];
381 cz += Z[jHit];
382 nclu++;
383 usedHits[jHit] = 1;
384 }
385 }
386 int id = newRowNHitsTotal + newRowNHits;
387 hitsXYZ[id*3+0 ] = cx / nclu;
388 hitsXYZ[id*3+1 ] = cy / nclu;
389 hitsXYZ[id*3+2 ] = cz / nclu;
390 fTmpHitInputIDs[id] = iHit;
391 newRowNHits++;
392 }
393 rowHeaders[iRow*2+1] = newRowNHits;
394 newRowNHitsTotal += newRowNHits;
395 }
396 NHitsTotal() = newRowNHitsTotal;
397 reinterpret_cast<int*>( fInputEvent )[1+fParam.NRows()*2] = newRowNHitsTotal;
398
399 delete[] usedHits;
400#endif
401}
402
403void AliHLTTPCCASliceData::ClearHitWeights()
404{
6de2bc40 405 // clear hit weights
406
4acc2401 407#ifdef ENABLE_VECTORIZATION
408 const int_v v0( Zero );
409 const int *const end = fHitWeights + fNumberOfHits;
410 for ( int *mem = fHitWeights; mem < end; mem += v0.Size ) {
411 v0.store( mem );
412 }
413#else
b22af1bf 414 for ( int i = 0; i < fNumberOfHitsPlusAlign; ++i ) {
4acc2401 415 fHitWeights[i] = 0;
416 }
417#endif
418}
419
420void AliHLTTPCCASliceData::ClearLinks()
421{
6de2bc40 422 // link cleaning
423
4acc2401 424#ifdef ENABLE_VECTORIZATION
425 const short_v v0( -1 );
426 const short *const end1 = fLinkUpData + fNumberOfHits;
427 for ( short *mem = fLinkUpData; mem < end; mem += v0.Size ) {
428 v0.store( mem );
429 }
430 const short *const end2 = fLinkDownData + fNumberOfHits;
431 for ( short *mem = fLinkDownData; mem < end; mem += v0.Size ) {
432 v0.store( mem );
433 }
434#else
435 for ( int i = 0; i < fNumberOfHits; ++i ) {
436 fLinkUpData[i] = -1;
437 }
438 for ( int i = 0; i < fNumberOfHits; ++i ) {
439 fLinkDownData[i] = -1;
440 }
441#endif
442}
443