1 // @(#) $Id: AliHLTTPCCATrackletConstructor.cxx 27042 2008-07-02 12:06:02Z richterm $
2 // **************************************************************************
3 // This file is property of and copyright by the ALICE HLT Project *
4 // ALICE Experiment at CERN, All rights reserved. *
6 // Primary Authors: Sergey Gorbunov <sergey.gorbunov@kip.uni-heidelberg.de> *
7 // Ivan Kisel <kisel@kip.uni-heidelberg.de> *
8 // for The ALICE HLT Project. *
10 // Permission to use, copy, modify and distribute this software and its *
11 // documentation strictly for non-commercial purposes is hereby granted *
12 // without fee, provided that the above copyright notice appears in all *
13 // copies and that both the copyright notice and this permission notice *
14 // appear in the supporting documentation. The authors make no claims *
15 // about the suitability of this software for any purpose. It is *
16 // provided "as is" without express or implied warranty. *
18 //***************************************************************************
20 #include "AliHLTTPCCATracker.h"
21 #include "AliHLTTPCCATrackParam.h"
22 #include "AliHLTTPCCATrackParam.h"
23 #include "AliHLTTPCCAGrid.h"
24 #include "AliHLTTPCCAMath.h"
25 #include "AliHLTTPCCADef.h"
26 #include "AliHLTTPCCATracklet.h"
27 #include "AliHLTTPCCATrackletConstructor.h"
31 GPUdi() void AliHLTTPCCATrackletConstructor::InitTracklet( AliHLTTPCCATrackParam &tParam )
33 //Initialize Tracklet Parameters using default values
38 GPUdi() void AliHLTTPCCATrackletConstructor::StoreTracklet
39 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
40 AliHLTTPCCASharedMemory
41 #if defined(HLTCA_GPUCODE) | defined(EXTERN_ROW_HITS)
45 #endif //!HLTCA_GPUCODE
46 , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam )
48 // reconstruction of tracklets, tracklet store step
51 if ( r.fNHits < TRACKLET_SELECTOR_MIN_HITS ) {
57 if ( 1. / .5 < CAMath::Abs( tParam.QPt() ) ) { //SG!!!
65 const float *c = tParam.Cov();
66 for ( int i = 0; i < 15; i++ ) ok = ok && CAMath::Finite( c[i] );
67 for ( int i = 0; i < 5; i++ ) ok = ok && CAMath::Finite( tParam.Par()[i] );
68 ok = ok && ( tParam.X() > 50 );
70 if ( c[0] <= 0 || c[2] <= 0 || c[5] <= 0 || c[9] <= 0 || c[14] <= 0 ) ok = 0;
79 if ( !SAVE() ) return;
81 AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
83 tracklet.SetNHits( r.fNHits );
86 if ( CAMath::Abs( tParam.Par()[4] ) < 1.e-4 ) tParam.SetPar( 4, 1.e-4 );
87 if (r.fStartRow < r.fFirstRow) r.fFirstRow = r.fStartRow;
88 tracklet.SetFirstRow( r.fFirstRow );
89 tracklet.SetLastRow( r.fLastRow );
91 tracklet.SetParam( tParam.fParam );
93 tracklet.SetParam( tParam.GetParam() );
94 #endif //HLTCA_GPUCODE
95 int w = ( r.fNHits << 16 ) + r.fItr;
96 for ( int iRow = r.fFirstRow; iRow <= r.fLastRow; iRow++ ) {
97 #ifdef EXTERN_ROW_HITS
98 int ih = tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr];
100 int ih = tracklet.RowHit( iRow );
101 #endif //EXTERN_ROW_HITS
103 #if defined(HLTCA_GPUCODE)
104 tracker.MaximizeHitWeight( s.fRows[ iRow ], ih, w );
106 tracker.MaximizeHitWeight( tracker.Row( iRow ), ih, w );
107 #endif //HLTCA_GPUCODE
114 GPUdi() void AliHLTTPCCATrackletConstructor::UpdateTracklet
115 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
116 AliHLTTPCCASharedMemory
117 #if defined(HLTCA_GPUCODE) | defined(EXTERN_ROW_HITS)
121 #endif //HLTCA_GPUCODE
122 , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam, int iRow )
124 // reconstruction of tracklets, tracklets update step
126 if ( !r.fGo ) return;
128 #ifndef EXTERN_ROW_HITS
129 AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
130 #endif //EXTERN_ROW_HITS
132 #if defined(HLTCA_GPUCODE)
133 const AliHLTTPCCARow &row = s.fRows[iRow];
135 const AliHLTTPCCARow &row = tracker.Row( iRow );
136 #endif //HLTCA_GPUCODE
138 float y0 = row.Grid().YMin();
139 float stepY = row.HstepY();
140 float z0 = row.Grid().ZMin();
141 float stepZ = row.HstepZ();
142 float stepYi = row.HstepYi();
143 float stepZi = row.HstepZi();
145 if ( r.fStage == 0 ) { // fitting part
148 if ( iRow < r.fStartRow || r.fCurrIH < 0 ) break;
149 if ( ( iRow - r.fStartRow ) % 2 != 0 )
151 #ifndef EXTERN_ROW_HITS
152 tracklet.SetRowHit(iRow, -1);
154 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
155 #endif //EXTERN_ROW_HITS
156 break; // SG!!! - jump over the row
161 #if defined(HLTCA_GPU_TEXTURE_FETCH)
162 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + r.fCurrIH);
164 hh = tracker.HitData(row)[r.fCurrIH];
165 #endif //HLTCA_GPU_TEXTURE_FETCH
167 int oldIH = r.fCurrIH;
168 #if defined(HLTCA_GPU_TEXTURE_FETCH)
169 r.fCurrIH = tex1Dfetch(gAliTexRefs, ((char*) tracker.Data().HitLinkUpData(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + r.fCurrIH);
171 r.fCurrIH = tracker.HitLinkUpData(row)[r.fCurrIH]; // read from linkup data
172 #endif //HLTCA_GPU_TEXTURE_FETCH
175 float y = y0 + hh.x * stepY;
176 float z = z0 + hh.y * stepZ;
178 if ( iRow == r.fStartRow ) {
187 float dx = x - tParam.X();
188 float dy = y - r.fLastY;//tParam.Y();
189 float dz = z - r.fLastZ;//tParam.Z();
193 float ri = 1. / CAMath::Sqrt( dx * dx + dy * dy );
194 if ( iRow == r.fStartRow + 2 ) { //SG!!! important - thanks to Matthias
195 tParam.SetSinPhi( dy*ri );
196 tParam.SetSignCosPhi( dx );
197 tParam.SetDzDs( dz*ri );
198 //std::cout << "Init. errors... " << r.fItr << std::endl;
199 tracker.GetErrors2( iRow, tParam, err2Y, err2Z );
200 //std::cout << "Init. errors = " << err2Y << " " << err2Z << std::endl;
201 tParam.SetCov( 0, err2Y );
202 tParam.SetCov( 2, err2Z );
204 float sinPhi, cosPhi;
205 if ( r.fNHits >= 10 && CAMath::Abs( tParam.SinPhi() ) < .99 ) {
206 sinPhi = tParam.SinPhi();
207 cosPhi = CAMath::Sqrt( 1 - sinPhi * sinPhi );
212 if ( !tParam.TransportToX( x, sinPhi, cosPhi, tracker.Param().ConstBz(), -1 ) ) {
213 #ifndef EXTERN_ROW_HITS
214 tracklet.SetRowHit( iRow, -1 );
216 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
217 #endif //EXTERN_ROW_HITS
220 tracker.GetErrors2( iRow, tParam.GetZ(), sinPhi, cosPhi, tParam.GetDzDs(), err2Y, err2Z );
222 if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
223 #ifndef EXTERN_ROW_HITS
224 tracklet.SetRowHit( iRow, -1 );
226 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
227 #endif //EXTERN_ROW_HITS
231 #ifndef EXTERN_ROW_HITS
232 tracklet.SetRowHit( iRow, oldIH );
234 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = oldIH;
235 #endif //!EXTERN_ROW_HITS
242 if ( r.fCurrIH < 0 ) {
244 if ( CAMath::Abs( tParam.SinPhi() ) > .999 ) {
245 r.fNHits = 0; r.fGo = 0;
247 //tParam.SetCosPhi( CAMath::Sqrt(1-tParam.SinPhi()*tParam.SinPhi()) );
250 } else { // forward/backward searching part
252 if ( r.fStage == 2 && ( ( iRow >= r.fEndRow ) ||
253 ( iRow >= r.fStartRow && ( iRow - r.fStartRow ) % 2 == 0 )
255 if ( r.fNMissed > kMaxRowGap ) {
263 if ( !tParam.TransportToX( x, tParam.SinPhi(), tParam.GetCosPhi(), tracker.Param().ConstBz(), .99 ) ) {
264 #ifndef EXTERN_ROW_HITS
265 tracklet.SetRowHit(iRow, -1);
267 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
268 #endif //!EXTERN_ROW_HITS
271 if ( row.NHits() < 1 ) {
273 #ifndef EXTERN_ROW_HITS
274 tracklet.SetRowHit(iRow, -1);
276 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
277 #endif //!EXTERN_ROW_HITS
281 #ifndef HLTCA_GPU_TEXTURE_FETCH
282 const ushort2 *hits = tracker.HitData(row);
283 #endif //!HLTCA_GPU_TEXTURE_FETCH
285 float fY = tParam.GetY();
286 float fZ = tParam.GetZ();
289 { // search for the closest hit
290 const int fIndYmin = row.Grid().GetBinBounded( fY - 1.f, fZ - 1.f );
291 assert( fIndYmin >= 0 );
294 int fY0 = ( int ) ( ( fY - y0 ) * stepYi );
295 int fZ0 = ( int ) ( ( fZ - z0 ) * stepZi );
296 int ds0 = ( ( ( int )1 ) << 30 );
299 unsigned int fHitYfst = 1, fHitYlst = 0, fHitYfst1 = 1, fHitYlst1 = 0;
302 int nY = row.Grid().Ny();
304 #ifndef HLTCA_GPU_TEXTURE_FETCH
305 const unsigned short *sGridP = tracker.FirstHitInBin(row);
306 #endif //!HLTCA_GPU_TEXTURE_FETCH
308 #ifdef HLTCA_GPU_TEXTURE_FETCH
309 fHitYfst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin);
310 fHitYlst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+2);
311 fHitYfst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY);
312 fHitYlst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY+2);
314 fHitYfst = sGridP[fIndYmin];
315 fHitYlst = sGridP[fIndYmin+2];
316 fHitYfst1 = sGridP[fIndYmin+nY];
317 fHitYlst1 = sGridP[fIndYmin+nY+2];
318 #endif //HLTCA_GPU_TEXTURE_FETCH
319 assert( (signed) fHitYfst <= row.NHits() );
320 assert( (signed) fHitYlst <= row.NHits() );
321 assert( (signed) fHitYfst1 <= row.NHits() );
322 assert( (signed) fHitYlst1 <= row.NHits() );
325 for ( unsigned int fIh = fHitYfst; fIh < fHitYlst; fIh++ ) {
326 assert( (signed) fIh < row.NHits() );
328 #if defined(HLTCA_GPU_TEXTURE_FETCH)
329 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
332 #endif //HLTCA_GPU_TEXTURE_FETCH
333 int ddy = ( int )( hh.x ) - fY0;
334 int ddz = ( int )( hh.y ) - fZ0;
335 int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
342 for ( unsigned int fIh = fHitYfst1; fIh < fHitYlst1; fIh++ ) {
344 #if defined(HLTCA_GPU_TEXTURE_FETCH)
345 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
348 #endif //HLTCA_GPU_TEXTURE_FETCH
349 int ddy = ( int )( hh.x ) - fY0;
350 int ddz = ( int )( hh.y ) - fZ0;
351 int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
357 }// end of search for the closest hit
361 #ifndef EXTERN_ROW_HITS
362 tracklet.SetRowHit(iRow, -1);
364 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
365 #endif //!EXTERN_ROW_HITS
370 #if defined(HLTCA_GPU_TEXTURE_FETCH)
371 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + best);
374 #endif //HLTCA_GPU_TEXTURE_FETCH
376 tracker.GetErrors2( iRow, *( ( AliHLTTPCCATrackParam* )&tParam ), err2Y, err2Z );
378 float y = y0 + hh.x * stepY;
379 float z = z0 + hh.y * stepZ;
383 const float kFactor = tracker.Param().HitPickUpFactor() * tracker.Param().HitPickUpFactor() * 3.5 * 3.5;
384 float sy2 = kFactor * ( tParam.GetErr2Y() + err2Y );
385 float sz2 = kFactor * ( tParam.GetErr2Z() + err2Z );
386 if ( sy2 > 2. ) sy2 = 2.;
387 if ( sz2 > 2. ) sz2 = 2.;
389 if ( CAMath::FMulRZ( dy, dy ) > sy2 || CAMath::FMulRZ( dz, dz ) > sz2 ) {
390 #ifndef EXTERN_ROW_HITS
391 tracklet.SetRowHit(iRow, -1);
393 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
394 #endif //!EXTERN_ROW_HITS
397 if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
400 #ifndef EXTERN_ROW_HITS
401 tracklet.SetRowHit( iRow, best );
403 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = best;
404 #endif //!EXTERN_ROW_HITS
407 if ( r.fStage == 1 ) r.fLastRow = iRow;
408 else r.fFirstRow = iRow;
414 GPUdi() void AliHLTTPCCATrackletConstructor::CopyTrackletTempData( AliHLTTPCCAThreadMemory &rMemSrc, AliHLTTPCCAThreadMemory &rMemDst, AliHLTTPCCATrackParam &tParamSrc, AliHLTTPCCATrackParam &tParamDst)
416 //Copy Temporary Tracklet data from registers to global mem and vice versa
417 rMemDst.fStartRow = rMemSrc.fStartRow;
418 rMemDst.fEndRow = rMemSrc.fEndRow;
419 rMemDst.fFirstRow = rMemSrc.fFirstRow;
420 rMemDst.fLastRow = rMemSrc.fLastRow;
421 rMemDst.fCurrIH = rMemSrc.fCurrIH;
422 rMemDst.fGo = rMemSrc.fGo;
423 rMemDst.fStage = rMemSrc.fStage;
424 rMemDst.fNHits = rMemSrc.fNHits;
425 rMemDst.fNMissed = rMemSrc.fNMissed;
426 rMemDst.fLastY = rMemSrc.fLastY;
427 rMemDst.fLastZ = rMemSrc.fLastZ;
429 tParamDst.SetSinPhi( tParamSrc.GetSinPhi() );
430 tParamDst.SetDzDs( tParamSrc.GetDzDs() );
431 tParamDst.SetQPt( tParamSrc.GetQPt() );
432 tParamDst.SetSignCosPhi( tParamSrc.GetSignCosPhi() );
433 tParamDst.SetChi2( tParamSrc.GetChi2() );
434 tParamDst.SetNDF( tParamSrc.GetNDF() );
435 tParamDst.SetCov( 0, tParamSrc.GetCov(0) );
436 tParamDst.SetCov( 1, tParamSrc.GetCov(1) );
437 tParamDst.SetCov( 2, tParamSrc.GetCov(2) );
438 tParamDst.SetCov( 3, tParamSrc.GetCov(3) );
439 tParamDst.SetCov( 4, tParamSrc.GetCov(4) );
440 tParamDst.SetCov( 5, tParamSrc.GetCov(5) );
441 tParamDst.SetCov( 6, tParamSrc.GetCov(6) );
442 tParamDst.SetCov( 7, tParamSrc.GetCov(7) );
443 tParamDst.SetCov( 8, tParamSrc.GetCov(8) );
444 tParamDst.SetCov( 9, tParamSrc.GetCov(9) );
445 tParamDst.SetCov( 10, tParamSrc.GetCov(10) );
446 tParamDst.SetCov( 11, tParamSrc.GetCov(11) );
447 tParamDst.SetCov( 12, tParamSrc.GetCov(12) );
448 tParamDst.SetCov( 13, tParamSrc.GetCov(13) );
449 tParamDst.SetCov( 14, tParamSrc.GetCov(14) );
450 tParamDst.SetX( tParamSrc.GetX() );
451 tParamDst.SetY( tParamSrc.GetY() );
452 tParamDst.SetZ( tParamSrc.GetZ() );
455 GPUdi() int AliHLTTPCCATrackletConstructor::FetchTracklet(AliHLTTPCCATracker &tracker, AliHLTTPCCASharedMemory &sMem, int Reverse, int RowBlock, int &mustInit)
457 //Fetch a new trackled to be processed by this thread
459 int nextTracketlFirstRun = sMem.fNextTrackletFirstRun;
460 if (threadIdx.x == 0)
462 sMem.fNTracklets = *tracker.NTracklets();
463 if (sMem.fNextTrackletFirstRun)
465 #ifdef HLTCA_GPU_SCHED_FIXED_START
466 const int iSlice = tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
467 const int nSliceBlockOffset = HLTCA_GPU_BLOCK_COUNT * iSlice / tracker.GPUParametersConst()->fGPUnSlices;
468 const uint2 &nTracklet = tracker.BlockStartingTracklet()[blockIdx.x - nSliceBlockOffset];
470 sMem.fNextTrackletCount = nTracklet.y;
471 if (sMem.fNextTrackletCount == 0)
473 sMem.fNextTrackletFirstRun = 0;
477 if (tracker.TrackletStartHits()[nTracklet.x].RowIndex() / HLTCA_GPU_SCHED_ROW_STEP != RowBlock)
479 sMem.fNextTrackletCount = 0;
483 sMem.fNextTrackletFirst = nTracklet.x;
486 #endif //HLTCA_GPU_SCHED_FIXED_START
490 const int nFetchTracks = CAMath::Max(CAMath::Min((*tracker.RowBlockPos(Reverse, RowBlock)).x - (*tracker.RowBlockPos(Reverse, RowBlock)).y, HLTCA_GPU_THREAD_COUNT), 0);
491 sMem.fNextTrackletCount = nFetchTracks;
492 const int nUseTrack = nFetchTracks ? CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).y, nFetchTracks) : 0;
493 sMem.fNextTrackletFirst = nUseTrack;
495 const int nFillTracks = CAMath::Min(nFetchTracks, nUseTrack + nFetchTracks - (*((volatile int2*) (tracker.RowBlockPos(Reverse, RowBlock)))).x);
498 const int nStartFillTrack = CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).x, nFillTracks);
499 if (nFillTracks + nStartFillTrack >= HLTCA_GPU_MAX_TRACKLETS)
501 tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_ROWBLOCK_TRACKLET_OVERFLOW;
503 for (int i = 0;i < nFillTracks;i++)
505 tracker.RowBlockTracklets(Reverse, RowBlock)[(nStartFillTrack + i) % HLTCA_GPU_MAX_TRACKLETS] = -3; //Dummy filling track
512 if (sMem.fNextTrackletCount == 0)
514 return(-2); //No more track in this RowBlock
516 else if (threadIdx.x >= sMem.fNextTrackletCount)
518 return(-1); //No track in this RowBlock for this thread
520 else if (nextTracketlFirstRun)
522 if (threadIdx.x == 0) sMem.fNextTrackletFirstRun = 0;
524 return(sMem.fNextTrackletFirst + threadIdx.x);
528 const int nTrackPos = sMem.fNextTrackletFirst + threadIdx.x;
529 mustInit = (nTrackPos < tracker.RowBlockPos(Reverse, RowBlock)->w);
530 volatile int* const ptrTracklet = &tracker.RowBlockTracklets(Reverse, RowBlock)[nTrackPos % HLTCA_GPU_MAX_TRACKLETS];
533 while ((nTracklet = *ptrTracklet) == -1)
535 for (int i = 0;i < 20000;i++)
536 sMem.fNextTrackletStupidDummy++;
540 tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_SCHEDULE_COLLISION;
548 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU(AliHLTTPCCATracker *pTracker)
550 //Main Tracklet construction function that calls the scheduled (FetchTracklet) and then Processes the tracklet (mainly UpdataTracklet) and at the end stores the tracklet.
551 //Can also dispatch a tracklet to be rescheduled
552 #ifdef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
553 pTracker[0].BlockStartingTracklet()[0].x = HLTCA_GPU_EMULATION_SINGLE_TRACKLET;
554 pTracker[0].BlockStartingTracklet()[0].y = 1;
555 for (int i = 1;i < HLTCA_GPU_BLOCK_COUNT;i++)
557 pTracker[0].BlockStartingTracklet()[i].x = pTracker[0].BlockStartingTracklet()[i].y = 0;
559 #endif //HLTCA_GPU_EMULATION_SINGLE_TRACKLET
561 GPUshared() AliHLTTPCCASharedMemory sMem;
563 #ifdef HLTCA_GPU_SCHED_FIXED_START
564 if (threadIdx.x == 0)
566 sMem.fNextTrackletFirstRun = 1;
569 #endif //HLTCA_GPU_SCHED_FIXED_START
571 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
572 if (threadIdx.x == 0)
577 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
579 for (int iReverse = 0;iReverse < 2;iReverse++)
581 for (volatile int iRowBlock = 0;iRowBlock < HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1;iRowBlock++)
583 #ifdef HLTCA_GPU_SCHED_FIXED_SLICE
584 int iSlice = pTracker[0].GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % pTracker[0].GPUParametersConst()->fGPUnSlices != 0 && pTracker[0].GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
586 for (int iSlice = 0;iSlice < pTracker[0].GPUParametersConst()->fGPUnSlices;iSlice++)
587 #endif //HLTCA_GPU_SCHED_FIXED_SLICE
589 AliHLTTPCCATracker &tracker = pTracker[iSlice];
590 if (sMem.fNextTrackletFirstRun && iSlice != tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT)
594 /*if (!sMem.fNextTrackletFirstRun && tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->x <= tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->y)
598 int sharedRowsInitialized = 0;
602 while ((iTracklet = FetchTracklet(tracker, sMem, iReverse, iRowBlock, mustInit)) != -2)
604 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
605 CAMath::AtomicMax(&sMem.fMaxSync, threadSync);
607 threadSync = CAMath::Min(sMem.fMaxSync, 100000000 / blockDim.x / gridDim.x);
608 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
609 if (!sharedRowsInitialized)
611 for (int i = threadIdx.x;i < HLTCA_ROW_COUNT * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
613 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
615 sharedRowsInitialized = 1;
617 #ifdef HLTCA_GPU_RESCHED
618 short2 storeToRowBlock;
619 int storePosition = 0;
620 if (threadIdx.x < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
622 const int nReverse = threadIdx.x / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
623 const int nRowBlock = threadIdx.x % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
624 sMem.fTrackletStoreCount[nReverse][nRowBlock] = 0;
626 #endif //HLTCA_GPU_RESCHED
628 AliHLTTPCCATrackParam tParam;
629 AliHLTTPCCAThreadMemory rMem;
631 rMem.fCurrentData = 0;
633 #ifdef HLTCA_GPU_EMULATION_DEBUG_TRACKLET
634 if (iTracklet == HLTCA_GPU_EMULATION_DEBUG_TRACKLET)
636 tracker.GPUParameters()->fGPUError = 1;
638 #endif //HLTCA_GPU_EMULATION_DEBUG_TRACKLET
639 AliHLTTPCCAThreadMemory &rMemGlobal = tracker.GPUTrackletTemp()[iTracklet].fThreadMem;
640 AliHLTTPCCATrackParam &tParamGlobal = tracker.GPUTrackletTemp()[iTracklet].fParam;
643 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
645 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
646 rMem.fCurrIH = id.HitIndex();
651 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
653 else if (iTracklet >= 0)
655 CopyTrackletTempData( rMemGlobal, rMem, tParamGlobal, tParam );
657 rMem.fItr = iTracklet;
658 rMem.fGo = (iTracklet >= 0);
660 #ifdef HLTCA_GPU_RESCHED
661 storeToRowBlock.x = iRowBlock + 1;
662 storeToRowBlock.y = iReverse;
665 for (int j = HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP;j >= CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP);j--)
667 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
668 if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && !(j >= rMem.fEndRow || ( j >= rMem.fStartRow && j - rMem.fStartRow % 2 == 0)))
669 pTracker[0].StageAtSync()[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
670 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
673 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
674 if (rMem.fNMissed > kMaxRowGap && j <= rMem.fStartRow)
682 if (iTracklet >= 0 && (!rMem.fGo || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
684 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
689 for (int j = CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP);j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT);j++)
691 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
692 if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && j >= rMem.fStartRow && (rMem.fStage > 0 || rMem.fCurrIH >= 0 || (j - rMem.fStartRow) % 2 == 0 ))
693 pTracker[0].StageAtSync()[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
694 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
697 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
698 //if (rMem.fNMissed > kMaxRowGap || rMem.fGo == 0) break; //DR!!! CUDA Crashes with this enabled
701 if (rMem.fGo && (rMem.fNMissed > kMaxRowGap || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
703 if ( !tParam.TransportToX( sMem.fRows[ rMem.fEndRow ].X(), tracker.Param().ConstBz(), .999 ) )
709 storeToRowBlock.x = (HLTCA_ROW_COUNT - rMem.fEndRow) / HLTCA_GPU_SCHED_ROW_STEP;
710 storeToRowBlock.y = 1;
716 if (iTracklet >= 0 && !rMem.fGo)
718 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
722 if (rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
724 CopyTrackletTempData( rMem, rMemGlobal, tParam, tParamGlobal );
725 storePosition = CAMath::AtomicAdd(&sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x], 1);
728 if (threadIdx.x % HLTCA_GPU_WARP_SIZE == 0)
730 sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 160;
731 sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 0;
736 CAMath::AtomicMin(&sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fStartRow);
741 for (int j = sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE];j < HLTCA_ROW_COUNT;j++)
743 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
744 if (!rMem.fGo) break;
751 if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) ) rMem.fGo = 0;
753 CAMath::AtomicMax(&sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fEndRow);
759 for (int j = rMem.fEndRow;j >= 0;j--)
761 if (!rMem.fGo) break;
762 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
765 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
767 #endif //HLTCA_GPU_RESCHED
769 #ifdef HLTCA_GPU_RESCHED
771 if (threadIdx.x < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
773 const int nReverse = threadIdx.x / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
774 const int nRowBlock = threadIdx.x % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
775 if (sMem.fTrackletStoreCount[nReverse][nRowBlock])
777 sMem.fTrackletStoreCount[nReverse][nRowBlock] = CAMath::AtomicAdd(&tracker.RowBlockPos(nReverse, nRowBlock)->x, sMem.fTrackletStoreCount[nReverse][nRowBlock]);
781 if (iTracklet >= 0 && rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
783 tracker.RowBlockTracklets(storeToRowBlock.y, storeToRowBlock.x)[sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x] + storePosition] = iTracklet;
786 #endif //HLTCA_GPU_RESCHED
793 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(int iTracklet, AliHLTTPCCATracker &tracker)
795 //Initialize Row Blocks
797 #ifndef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
798 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
799 #ifdef HLTCA_GPU_SCHED_FIXED_START
800 const int firstDynamicTracklet = tracker.GPUParameters()->fScheduleFirstDynamicTracklet;
801 if (iTracklet >= firstDynamicTracklet)
802 #endif //HLTCA_GPU_SCHED_FIXED_START
804 #ifdef HLTCA_GPU_SCHED_FIXED_START
805 const int firstTrackletInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[CAMath::Max(id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP * HLTCA_GPU_SCHED_ROW_STEP, 1)].z);
807 const int firstTrackletInRowBlock = tracker.RowStartHitCountOffset()[CAMath::Max(id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP * HLTCA_GPU_SCHED_ROW_STEP, 1)].z;
808 #endif //HLTCA_GPU_SCHED_FIXED_START
810 if (iTracklet == firstTrackletInRowBlock)
812 const int firstRowInNextBlock = (id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP + 1) * HLTCA_GPU_SCHED_ROW_STEP;
813 int trackletsInRowBlock;
814 if (firstRowInNextBlock >= HLTCA_ROW_COUNT - 3)
815 trackletsInRowBlock = *tracker.NTracklets() - firstTrackletInRowBlock;
817 #ifdef HLTCA_GPU_SCHED_FIXED_START
818 trackletsInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[firstRowInNextBlock].z) - firstTrackletInRowBlock;
820 trackletsInRowBlock = tracker.RowStartHitCountOffset()[firstRowInNextBlock].z - firstTrackletInRowBlock;
821 #endif //HLTCA_GPU_SCHED_FIXED_START
823 tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->x = trackletsInRowBlock;
824 tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->w = trackletsInRowBlock;
826 tracker.RowBlockTracklets(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)[iTracklet - firstTrackletInRowBlock] = iTracklet;
828 #endif //!HLTCA_GPU_EMULATION_SINGLE_TRACKLET
831 GPUg() void AliHLTTPCCATrackletConstructorInit(int iSlice)
833 //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit
834 AliHLTTPCCATracker &tracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker )[iSlice];
835 int i = blockIdx.x * blockDim.x + threadIdx.x;
836 if (i >= *tracker.NTracklets()) return;
837 AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(i, tracker);
840 GPUg() void AliHLTTPCCATrackletConstructorGPU()
842 //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU
843 AliHLTTPCCATracker *pTracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker );
844 AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU(pTracker);
847 GPUg() void AliHLTTPCCATrackletConstructorGPUPP(int firstSlice, int sliceCount)
849 if (blockIdx.x >= sliceCount) return;
850 AliHLTTPCCATracker *pTracker = &( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker )[firstSlice + blockIdx.x];
851 AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPUPP(pTracker);
854 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPUPP(AliHLTTPCCATracker *tracker)
856 GPUshared() AliHLTTPCCASharedMemory sMem;
857 sMem.fNTracklets = *tracker->NTracklets();
859 for (int i = threadIdx.x;i < HLTCA_ROW_COUNT * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
861 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker->SliceDataRows())[i];
864 for (int iTracklet = threadIdx.x;iTracklet < (*tracker->NTracklets() / HLTCA_GPU_THREAD_COUNT + 1) * HLTCA_GPU_THREAD_COUNT;iTracklet += blockDim.x)
866 AliHLTTPCCATrackParam tParam;
867 AliHLTTPCCAThreadMemory rMem;
869 if (iTracklet < *tracker->NTracklets())
871 AliHLTTPCCAHitId id = tracker->TrackletTmpStartHits()[iTracklet];
873 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
874 rMem.fCurrIH = id.HitIndex();
879 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
881 rMem.fItr = iTracklet;
885 if (threadIdx.x % HLTCA_GPU_WARP_SIZE == 0)
887 sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 160;
888 sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 0;
891 if (iTracklet < *tracker->NTracklets())
893 CAMath::AtomicMin(&sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fStartRow);
896 if (iTracklet < *tracker->NTracklets())
898 for (int j = sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE];j < HLTCA_ROW_COUNT;j++)
900 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, *tracker, tParam, j);
901 if (!rMem.fGo) break;
908 if ( !tParam.TransportToX( tracker->Row( rMem.fEndRow ).X(), tracker->Param().ConstBz(), .999 ) ) rMem.fGo = 0;
910 CAMath::AtomicMax(&sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fEndRow);
914 if (iTracklet < *tracker->NTracklets())
916 for (int j = rMem.fEndRow;j >= 0;j--)
918 if (!rMem.fGo) break;
919 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, *tracker, tParam, j);
921 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, *tracker, tParam );
926 #else //HLTCA_GPUCODE
928 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorCPU(AliHLTTPCCATracker &tracker)
930 //Tracklet constructor simple CPU Function that does not neew a scheduler
931 GPUshared() AliHLTTPCCASharedMemory sMem;
932 sMem.fNTracklets = *tracker.NTracklets();
933 for (int iTracklet = 0;iTracklet < *tracker.NTracklets();iTracklet++)
935 AliHLTTPCCATrackParam tParam;
936 AliHLTTPCCAThreadMemory rMem;
938 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
940 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
941 rMem.fCurrIH = id.HitIndex();
946 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
948 rMem.fItr = iTracklet;
951 for (int j = rMem.fStartRow;j < tracker.Param().NRows();j++)
953 UpdateTracklet(1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
954 if (!rMem.fGo) break;
961 if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) ) rMem.fGo = 0;
964 for (int j = rMem.fEndRow;j >= 0;j--)
966 if (!rMem.fGo) break;
967 UpdateTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
970 StoreTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam );
973 #endif //HLTCA_GPUCODE