1 // @(#) $Id: AliHLTTPCCATrackletConstructor.cxx 27042 2008-07-02 12:06:02Z richterm $
2 // **************************************************************************
3 // This file is property of and copyright by the ALICE HLT Project *
4 // ALICE Experiment at CERN, All rights reserved. *
6 // Primary Authors: Sergey Gorbunov <sergey.gorbunov@kip.uni-heidelberg.de> *
7 // Ivan Kisel <kisel@kip.uni-heidelberg.de> *
8 // for The ALICE HLT Project. *
10 // Permission to use, copy, modify and distribute this software and its *
11 // documentation strictly for non-commercial purposes is hereby granted *
12 // without fee, provided that the above copyright notice appears in all *
13 // copies and that both the copyright notice and this permission notice *
14 // appear in the supporting documentation. The authors make no claims *
15 // about the suitability of this software for any purpose. It is *
16 // provided "as is" without express or implied warranty. *
18 //***************************************************************************
20 #include "AliHLTTPCCATracker.h"
21 #include "AliHLTTPCCATrackParam.h"
22 #include "AliHLTTPCCATrackParam.h"
23 #include "AliHLTTPCCAGrid.h"
24 #include "AliHLTTPCCAMath.h"
25 #include "AliHLTTPCCADef.h"
26 #include "AliHLTTPCCATracklet.h"
27 #include "AliHLTTPCCATrackletConstructor.h"
28 #include "MemoryAssignmentHelpers.h"
30 //#include "AliHLTTPCCAPerformance.h"
36 #include "AliHLTTPCCADisplay.h"
41 GPUd() void AliHLTTPCCATrackletConstructor::InitTracklet( AliHLTTPCCATrackParam &tParam )
43 //Initialize Tracklet Parameters using default values
44 tParam.SetSinPhi( 0 );
47 tParam.SetSignCosPhi( 1 );
50 tParam.SetCov( 0, 1 );
51 tParam.SetCov( 1, 0 );
52 tParam.SetCov( 2, 1 );
53 tParam.SetCov( 3, 0 );
54 tParam.SetCov( 4, 0 );
55 tParam.SetCov( 5, 1 );
56 tParam.SetCov( 6, 0 );
57 tParam.SetCov( 7, 0 );
58 tParam.SetCov( 8, 0 );
59 tParam.SetCov( 9, 1 );
60 tParam.SetCov( 10, 0 );
61 tParam.SetCov( 11, 0 );
62 tParam.SetCov( 12, 0 );
63 tParam.SetCov( 13, 0 );
64 tParam.SetCov( 14, 10. );
67 GPUd() void AliHLTTPCCATrackletConstructor::ReadData
68 #ifndef HLTCA_GPU_PREFETCHDATA
69 ( int /*iThread*/, AliHLTTPCCASharedMemory& /*s*/, AliHLTTPCCAThreadMemory& /*r*/, AliHLTTPCCATracker& /*tracker*/, int /*iRow*/ )
71 //Prefetch Data to shared memory
73 ( int iThread, AliHLTTPCCASharedMemory& s, AliHLTTPCCAThreadMemory& r, AliHLTTPCCATracker& tracker, int iRow )
75 // reconstruction of tracklets, read data step
76 const AliHLTTPCCARow &row = tracker.Row( iRow );
77 //bool jr = !r.fCurrentData;
79 // copy hits, grid content and links
81 // FIXME: inefficient copy
82 //const int numberOfHitsAligned = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits());
85 #ifdef HLTCA_GPU_REORDERHITDATA
86 ushort2 *sMem1 = reinterpret_cast<ushort2 *>( s.fData[jr] );
87 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
88 sMem1[i].x = tracker.HitDataY( row, i );
89 sMem1[i].y = tracker.HitDataZ( row, i );
92 ushort_v *sMem1 = reinterpret_cast<ushort_v *>( s.fData[jr] );
93 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
94 sMem1[i] = tracker.HitDataY( row, i );
97 ushort_v *sMem1a = reinterpret_cast<ushort_v *>( s.fData[jr] ) + numberOfHitsAligned;
98 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
99 sMem1a[i] = tracker.HitDataZ( row, i );
103 short *sMem2 = reinterpret_cast<short *>( s.fData[jr] ) + 2 * numberOfHitsAligned;
104 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
105 sMem2[i] = tracker.HitLinkUpData( row, i );
108 unsigned short *sMem3 = reinterpret_cast<unsigned short *>( s.fData[jr] ) + 3 * numberOfHitsAligned;
109 const int n = row.FullSize(); // + grid content size
110 for ( int i = iThread; i < n; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
111 sMem3[i] = tracker.FirstHitInBin( row, i );
114 /*for (int k = 0;k < 2;k++)
116 HLTCA_GPU_ROWCOPY* sharedMem;
117 const HLTCA_GPU_ROWCOPY* sourceMem;
122 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitDataY(row) );
123 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
124 copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
127 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitDataZ(row) );
128 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
129 copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
132 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitLinkUpData(row) );
133 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
134 copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
137 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.FirstHitInBin(row) );
138 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
139 copyCount = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.FullSize()) * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
142 for (int i = iThread;i < copyCount;i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
144 sharedMem[i] = sourceMem[i];
148 for (unsigned int i = iThread;i < row.FullSize() * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
150 reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[!r.fCurrentData] ))[i] = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.FirstHitInBin(row) )[i];
153 const HLTCA_GPU_ROWCOPY* const sourceMem = (const HLTCA_GPU_ROWCOPY *) &row;
154 HLTCA_GPU_ROWCOPY* const sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> ( &s.fRow[!r.fCurrentData] );
155 for (unsigned int i = iThread;i < sizeof(AliHLTTPCCARow) / sizeof(HLTCA_GPU_ROWCOPY);i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
157 sharedMem[i] = sourceMem[i];
163 GPUd() void AliHLTTPCCATrackletConstructor::StoreTracklet
164 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
165 AliHLTTPCCASharedMemory
171 , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam )
173 // reconstruction of tracklets, tracklet store step
175 //AliHLTTPCCAPerformance::Instance().HNHitsPerTrackCand()->Fill(r.fNHits);
179 //std::cout<<"tracklet to store: "<<r.fItr<<", nhits = "<<r.fNHits<<std::endl;
182 if ( r.fNHits < TRACKLET_SELECTOR_MIN_HITS ) {
188 if ( 1. / .5 < CAMath::Abs( tParam.QPt() ) ) { //SG!!!
196 const float *c = tParam.Cov();
197 for ( int i = 0; i < 15; i++ ) ok = ok && CAMath::Finite( c[i] );
198 for ( int i = 0; i < 5; i++ ) ok = ok && CAMath::Finite( tParam.Par()[i] );
199 ok = ok && ( tParam.X() > 50 );
201 if ( c[0] <= 0 || c[2] <= 0 || c[5] <= 0 || c[9] <= 0 || c[14] <= 0 ) ok = 0;
210 if ( !SAVE() ) return;
212 AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
214 tracklet.SetNHits( r.fNHits );
216 if ( r.fNHits > 0 ) {
219 std::cout << "store tracklet " << r.fItr << ", nhits = " << r.fNHits << std::endl;
220 if ( AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 1. ) ) {
221 AliHLTTPCCADisplay::Instance().Ask();
225 if ( CAMath::Abs( tParam.Par()[4] ) < 1.e-4 ) tParam.SetPar( 4, 1.e-4 );
226 if (r.fStartRow < r.fFirstRow) r.fFirstRow = r.fStartRow;
227 tracklet.SetFirstRow( r.fFirstRow );
228 tracklet.SetLastRow( r.fLastRow );
229 tracklet.SetParam( tParam );
230 int w = ( r.fNHits << 16 ) + r.fItr;
231 for ( int iRow = r.fFirstRow; iRow <= r.fLastRow; iRow++ ) {
232 #ifdef EXTERN_ROW_HITS
233 int ih = tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr];
235 int ih = tracklet.RowHit( iRow );
238 #if defined(HLTCA_GPUCODE) & !defined(HLTCA_GPU_PREFETCHDATA) & !defined(HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY)
239 tracker.MaximizeHitWeight( s.fRows[ iRow ], ih, w );
241 tracker.MaximizeHitWeight( tracker.Row( iRow ), ih, w );
249 GPUd() void AliHLTTPCCATrackletConstructor::UpdateTracklet
250 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
251 AliHLTTPCCASharedMemory
257 , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam, int iRow )
259 // reconstruction of tracklets, tracklets update step
261 //std::cout<<"Update tracklet: "<<r.fItr<<" "<<r.fGo<<" "<<r.fStage<<" "<<iRow<<std::endl;
262 bool drawSearch = 0;//r.fItr==2;
263 bool drawFit = 0;//r.fItr==2;
264 bool drawFitted = drawFit ;//|| 1;//r.fItr==16;
266 if ( !r.fGo ) return;
268 #ifndef EXTERN_ROW_HITS
269 AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
272 #ifdef HLTCA_GPU_PREFETCHDATA
273 const AliHLTTPCCARow &row = s.fRow[r.fCurrentData];
274 #elif defined(HLTCA_GPUCODE)
275 const AliHLTTPCCARow &row = s.fRows[iRow];
277 const AliHLTTPCCARow &row = tracker.Row( iRow );
280 float y0 = row.Grid().YMin();
281 float stepY = row.HstepY();
282 float z0 = row.Grid().ZMin();
283 float stepZ = row.HstepZ();
284 float stepYi = row.HstepYi();
285 float stepZi = row.HstepZi();
287 if ( r.fStage == 0 ) { // fitting part
290 if ( iRow < r.fStartRow || r.fCurrIH < 0 ) break;
292 if ( ( iRow - r.fStartRow ) % 2 != 0 )
294 #ifndef EXTERN_ROW_HITS
295 tracklet.SetRowHit(iRow, -1);
297 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
299 break; // SG!!! - jump over the row
302 //#ifdef HLTCA_GPU_PREFETCHDATA
303 // uint4 *tmpint4 = s.fData[r.fCurrentData];
306 //#ifdef HLTCA_GPU_REORDERHITDATA
307 // hh = reinterpret_cast<ushort2*>( tmpint4 )[r.fCurrIH];
309 //#ifdef HLTCA_GPU_PREFETCHDATA
310 // hh.x = reinterpret_cast<ushort_v*>( tmpint4 )[r.fCurrIH];
311 // hh.y = reinterpret_cast<ushort_v*>( tmpint4 )[NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits()) + r.fCurrIH];
313 #if defined(HLTCA_GPU_TEXTURE_FETCH)
314 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + r.fCurrIH);
316 hh = tracker.HitData(row)[r.fCurrIH];
321 int oldIH = r.fCurrIH;
322 //#ifdef HLTCA_GPU_PREFETCHDATA
323 // r.fCurrIH = reinterpret_cast<short*>( tmpint4 )[2 * NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits()) + r.fCurrIH]; // read from linkup data
325 #if defined(HLTCA_GPU_TEXTURE_FETCH)
326 r.fCurrIH = tex1Dfetch(gAliTexRefs, ((char*) tracker.Data().HitLinkUpData(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + r.fCurrIH);
328 r.fCurrIH = tracker.HitLinkUpData(row)[r.fCurrIH]; // read from linkup data
333 float y = y0 + hh.x * stepY;
334 float z = z0 + hh.y * stepZ;
336 if ( drawFit ) std::cout << " fit tracklet: new hit " << oldIH << ", xyz=" << x << " " << y << " " << z << std::endl;
339 if ( iRow == r.fStartRow ) {
346 if ( drawFit ) std::cout << " fit tracklet " << r.fItr << ", row " << iRow << " first row" << std::endl;
351 float dx = x - tParam.X();
352 float dy = y - r.fLastY;//tParam.Y();
353 float dz = z - r.fLastZ;//tParam.Z();
357 float ri = 1. / CAMath::Sqrt( dx * dx + dy * dy );
358 if ( iRow == r.fStartRow + 2 ) { //SG!!! important - thanks to Matthias
359 tParam.SetSinPhi( dy*ri );
360 tParam.SetSignCosPhi( dx );
361 tParam.SetDzDs( dz*ri );
362 //std::cout << "Init. errors... " << r.fItr << std::endl;
363 tracker.GetErrors2( iRow, tParam, err2Y, err2Z );
364 //std::cout << "Init. errors = " << err2Y << " " << err2Z << std::endl;
365 tParam.SetCov( 0, err2Y );
366 tParam.SetCov( 2, err2Z );
370 std::cout << " fit tracklet " << r.fItr << ", row " << iRow << " transporting.." << std::endl;
371 std::cout << " params before transport=" << std::endl;
375 float sinPhi, cosPhi;
376 if ( r.fNHits >= 10 && CAMath::Abs( tParam.SinPhi() ) < .99 ) {
377 sinPhi = tParam.SinPhi();
378 cosPhi = CAMath::Sqrt( 1 - sinPhi * sinPhi );
384 if ( drawFit ) std::cout << "sinPhi0 = " << sinPhi << ", cosPhi0 = " << cosPhi << std::endl;
386 if ( !tParam.TransportToX( x, sinPhi, cosPhi, tracker.Param().ConstBz(), -1 ) ) {
388 if ( drawFit ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not transport!!" << std::endl;
390 #ifndef EXTERN_ROW_HITS
391 tracklet.SetRowHit( iRow, -1 );
393 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
397 //std::cout<<"mark1 "<<r.fItr<<std::endl;
399 tracker.GetErrors2( iRow, tParam.GetZ(), sinPhi, cosPhi, tParam.GetDzDs(), err2Y, err2Z );
400 //std::cout<<"mark2"<<std::endl;
404 std::cout << " params after transport=" << std::endl;
406 std::cout << "fit tracklet before filter: " << r.fItr << ", row " << iRow << " errs=" << err2Y << " " << err2Z << std::endl;
407 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 2., 1 );
408 AliHLTTPCCADisplay::Instance().Ask();
411 if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
413 if ( drawFit ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not filter!!" << std::endl;
415 #ifndef EXTERN_ROW_HITS
416 tracklet.SetRowHit( iRow, -1 );
418 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
423 #ifndef EXTERN_ROW_HITS
424 tracklet.SetRowHit( iRow, oldIH );
426 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = oldIH;
430 std::cout << "fit tracklet after filter " << r.fItr << ", row " << iRow << std::endl;
432 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kGreen, 2. );
433 AliHLTTPCCADisplay::Instance().Ask();
442 if ( r.fCurrIH < 0 ) {
444 if ( drawFitted ) std::cout << "fitted tracklet " << r.fItr << ", nhits=" << r.fNHits << std::endl;
447 //AliHLTTPCCAPerformance::Instance().HNHitsPerSeed()->Fill(r.fNHits);
448 if ( r.fNHits < 3 ) { r.fNHits = 0; r.fGo = 0;}//SG!!!
449 if ( CAMath::Abs( tParam.SinPhi() ) > .999 ) {
451 if ( drawFitted ) std::cout << " fitted tracklet error: sinPhi=" << tParam.SinPhi() << std::endl;
453 r.fNHits = 0; r.fGo = 0;
455 //tParam.SetCosPhi( CAMath::Sqrt(1-tParam.SinPhi()*tParam.SinPhi()) );
459 std::cout << "fitted tracklet " << r.fItr << " miss=" << r.fNMissed << " go=" << r.fGo << std::endl;
461 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue );
462 AliHLTTPCCADisplay::Instance().Ask();
466 } else { // forward/backward searching part
470 std::cout << "search tracklet " << r.fItr << " row " << iRow << " miss=" << r.fNMissed << " go=" << r.fGo << " stage=" << r.fStage << std::endl;
474 if ( r.fStage == 2 && ( ( iRow >= r.fEndRow ) ||
475 ( iRow >= r.fStartRow && ( iRow - r.fStartRow ) % 2 == 0 )
477 if ( r.fNMissed > kMaxRowGap ) {
487 std::cout << "tracklet " << r.fItr << " before transport to row " << iRow << " : " << std::endl;
491 if ( !tParam.TransportToX( x, tParam.SinPhi(), tParam.GetCosPhi(), tracker.Param().ConstBz(), .99 ) ) {
493 if ( drawSearch ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not transport!!" << std::endl;
495 #ifndef EXTERN_ROW_HITS
496 tracklet.SetRowHit(iRow, -1);
498 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
502 if ( row.NHits() < 1 ) {
504 #ifndef EXTERN_ROW_HITS
505 tracklet.SetRowHit(iRow, -1);
507 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
513 std::cout << "tracklet " << r.fItr << " after transport to row " << iRow << " : " << std::endl;
515 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 2., 1 );
516 AliHLTTPCCADisplay::Instance().Ask();
519 #ifdef HLTCA_GPU_PREFETCHDATA
520 uint4 *tmpint4 = s.fData[r.fCurrentData];
523 //#ifdef HLTCA_GPU_REORDERHITDATA
524 // const ushort2 *hits = reinterpret_cast<ushort2*>( tmpint4 );
526 //#ifdef HLTCA_GPU_PREFETCHDATA
527 // const ushort_v *hitsx = reinterpret_cast<ushort_v*>( tmpint4 );
528 // const ushort_v *hitsy = reinterpret_cast<ushort_v*>( tmpint4 ) + NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits());
530 #ifndef HLTCA_GPU_TEXTURE_FETCH
531 const ushort2 *hits = tracker.HitData(row);
536 float fY = tParam.GetY();
537 float fZ = tParam.GetZ();
540 { // search for the closest hit
541 const int fIndYmin = row.Grid().GetBinBounded( fY - 1.f, fZ - 1.f );
542 assert( fIndYmin >= 0 );
545 int fY0 = ( int ) ( ( fY - y0 ) * stepYi );
546 int fZ0 = ( int ) ( ( fZ - z0 ) * stepZi );
547 int ds0 = ( ( ( int )1 ) << 30 );
550 unsigned int fHitYfst = 1, fHitYlst = 0, fHitYfst1 = 1, fHitYlst1 = 0;
554 std::cout << " tracklet " << r.fItr << ", row " << iRow << ": grid N=" << row.Grid().N() << std::endl;
555 std::cout << " tracklet " << r.fItr << ", row " << iRow << ": minbin=" << fIndYmin << std::endl;
559 int nY = row.Grid().Ny();
561 //#ifdef HLTCA_GPU_PREFETCHDATA
562 // const unsigned short *sGridP = ( reinterpret_cast<unsigned short*>( tmpint4 ) );
564 #ifndef HLTCA_GPU_TEXTURE_FETCH
565 const unsigned short *sGridP = tracker.FirstHitInBin(row);
569 #ifdef HLTCA_GPU_TEXTURE_FETCH
570 fHitYfst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin);
571 fHitYlst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+2);
572 fHitYfst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY);
573 fHitYlst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY+2);
575 fHitYfst = sGridP[fIndYmin];
576 fHitYlst = sGridP[fIndYmin+2];
577 fHitYfst1 = sGridP[fIndYmin+nY];
578 fHitYlst1 = sGridP[fIndYmin+nY+2];
580 assert( (signed) fHitYfst <= row.NHits() );
581 assert( (signed) fHitYlst <= row.NHits() );
582 assert( (signed) fHitYfst1 <= row.NHits() );
583 assert( (signed) fHitYlst1 <= row.NHits() );
586 std::cout << " Grid, row " << iRow << ": nHits=" << row.NHits() << ", grid n=" << row.Grid().N() << ", c[n]=" << sGridP[row.Grid().N()] << std::endl;
587 std::cout << "hit steps = " << stepY << " " << stepZ << std::endl;
588 std::cout << " Grid bins:" << std::endl;
589 for ( unsigned int i = 0; i < row.Grid().N(); i++ ) {
590 std::cout << " bin " << i << ": ";
591 for ( int j = sGridP[i]; j < sGridP[i+1]; j++ ) {
592 ushort2 hh = hits[j];
593 float y = y0 + hh.x * stepY;
594 float z = z0 + hh.y * stepZ;
595 std::cout << "[" << j << "|" << y << "," << z << "] ";
597 std::cout << std::endl;
602 if ( sGridP[row.Grid().N()] != row.NHits() ) {
603 std::cout << " grid, row " << iRow << ": nHits=" << row.NHits() << ", grid n=" << row.Grid().N() << ", c[n]=" << sGridP[row.Grid().N()] << std::endl;
610 std::cout << " tracklet " << r.fItr << ", row " << iRow << ", yz= " << fY << "," << fZ << ": search hits=" << fHitYfst << " " << fHitYlst << " / " << fHitYfst1 << " " << fHitYlst1 << std::endl;
611 std::cout << " hit search :" << std::endl;
614 for ( unsigned int fIh = fHitYfst; fIh < fHitYlst; fIh++ ) {
615 assert( (signed) fIh < row.NHits() );
617 #if defined(HLTCA_GPU_TEXTURE_FETCH)
618 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
622 int ddy = ( int )( hh.x ) - fY0;
623 int ddz = ( int )( hh.y ) - fZ0;
624 int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
627 std::cout << fIh << ": hityz= " << hh.x << " " << hh.y << "(" << hh.x*stepY << " " << hh.y*stepZ << "), trackyz=" << fY0 << " " << fZ0 << "(" << fY0*stepY << " " << fZ0*stepZ << "), dy,dz,ds= " << ddy << " " << ddz << " " << dds << "(" << ddy*stepY << " " << ddz*stepZ << std::endl;
636 for ( unsigned int fIh = fHitYfst1; fIh < fHitYlst1; fIh++ ) {
638 #if defined(HLTCA_GPU_TEXTURE_FETCH)
639 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
643 int ddy = ( int )( hh.x ) - fY0;
644 int ddz = ( int )( hh.y ) - fZ0;
645 int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
648 std::cout << fIh << ": hityz= " << hh.x << " " << hh.y << "(" << hh.x*stepY << " " << hh.y*stepZ << "), trackyz=" << fY0 << " " << fZ0 << "(" << fY0*stepY << " " << fZ0*stepZ << "), dy,dz,ds= " << ddy << " " << ddz << " " << dds << "(" << ddy*stepY << " " << ddz*stepZ << std::endl;
656 }// end of search for the closest hit
660 #ifndef EXTERN_ROW_HITS
661 tracklet.SetRowHit(iRow, -1);
663 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
669 std::cout << "hit search " << r.fItr << ", row " << iRow << " hit " << best << " found" << std::endl;
670 AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best, kRed, 1. );
671 AliHLTTPCCADisplay::Instance().Ask();
672 AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best, kWhite, 1 );
673 AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best );
678 #if defined(HLTCA_GPU_TEXTURE_FETCH)
679 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + best);
684 //std::cout<<"mark 3, "<<r.fItr<<std::endl;
686 tracker.GetErrors2( iRow, *( ( AliHLTTPCCATrackParam* )&tParam ), err2Y, err2Z );
687 //std::cout<<"mark 4"<<std::endl;
689 float y = y0 + hh.x * stepY;
690 float z = z0 + hh.y * stepZ;
694 const float kFactor = tracker.Param().HitPickUpFactor() * tracker.Param().HitPickUpFactor() * 3.5 * 3.5;
695 float sy2 = kFactor * ( tParam.GetErr2Y() + err2Y );
696 float sz2 = kFactor * ( tParam.GetErr2Z() + err2Z );
697 if ( sy2 > 2. ) sy2 = 2.;
698 if ( sz2 > 2. ) sz2 = 2.;
702 std::cout << "dy,sy= " << dy << " " << CAMath::Sqrt( sy2 ) << ", dz,sz= " << dz << " " << CAMath::Sqrt( sz2 ) << std::endl;
703 std::cout << "dy,dz= " << dy << " " << dz << ", sy,sz= " << CAMath::Sqrt( sy2 ) << " " << CAMath::Sqrt( sz2 ) << ", sy,sz= " << CAMath::Sqrt( kFactor*( tParam.GetErr2Y() + err2Y ) ) << " " << CAMath::Sqrt( kFactor*( tParam.GetErr2Z() + err2Z ) ) << std::endl;
706 if ( CAMath::FMulRZ( dy, dy ) > sy2 || CAMath::FMulRZ( dz, dz ) > sz2 ) {
709 std::cout << "found hit is out of the chi2 window\n " << std::endl;
712 #ifndef EXTERN_ROW_HITS
713 tracklet.SetRowHit(iRow, -1);
715 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
720 //if( SAVE() ) hitstore[ iRow ] = best;
721 //std::cout<<"hit search before filter: "<<r.fItr<<", row "<<iRow<<std::endl;
722 //AliHLTTPCCADisplay::Instance().DrawTracklet(tParam, hitstore, kBlue);
723 //AliHLTTPCCADisplay::Instance().Ask();
725 if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
728 std::cout << "tracklet " << r.fItr << " at row " << iRow << " : can not filter!!!! " << std::endl;
733 #ifndef EXTERN_ROW_HITS
734 tracklet.SetRowHit( iRow, best );
736 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = best;
740 std::cout << "tracklet " << r.fItr << " after filter at row " << iRow << " : " << std::endl;
742 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kRed );
743 AliHLTTPCCADisplay::Instance().Ask();
748 if ( r.fStage == 1 ) r.fLastRow = iRow;
749 else r.fFirstRow = iRow;
755 GPUd() void AliHLTTPCCATrackletConstructor::CopyTrackletTempData( AliHLTTPCCAThreadMemory &rMemSrc, AliHLTTPCCAThreadMemory &rMemDst, AliHLTTPCCATrackParam &tParamSrc, AliHLTTPCCATrackParam &tParamDst)
757 //Copy Temporary Tracklet data from registers to global mem and vice versa
758 rMemDst.fStartRow = rMemSrc.fStartRow;
759 rMemDst.fEndRow = rMemSrc.fEndRow;
760 rMemDst.fFirstRow = rMemSrc.fFirstRow;
761 rMemDst.fLastRow = rMemSrc.fLastRow;
762 rMemDst.fCurrIH = rMemSrc.fCurrIH;
763 rMemDst.fGo = rMemSrc.fGo;
764 rMemDst.fStage = rMemSrc.fStage;
765 rMemDst.fNHits = rMemSrc.fNHits;
766 rMemDst.fNMissed = rMemSrc.fNMissed;
767 rMemDst.fLastY = rMemSrc.fLastY;
768 rMemDst.fLastZ = rMemSrc.fLastZ;
770 tParamDst.SetSinPhi( tParamSrc.GetSinPhi() );
771 tParamDst.SetDzDs( tParamSrc.GetDzDs() );
772 tParamDst.SetQPt( tParamSrc.GetQPt() );
773 tParamDst.SetSignCosPhi( tParamSrc.GetSignCosPhi() );
774 tParamDst.SetChi2( tParamSrc.GetChi2() );
775 tParamDst.SetNDF( tParamSrc.GetNDF() );
776 tParamDst.SetCov( 0, tParamSrc.GetCov(0) );
777 tParamDst.SetCov( 1, tParamSrc.GetCov(1) );
778 tParamDst.SetCov( 2, tParamSrc.GetCov(2) );
779 tParamDst.SetCov( 3, tParamSrc.GetCov(3) );
780 tParamDst.SetCov( 4, tParamSrc.GetCov(4) );
781 tParamDst.SetCov( 5, tParamSrc.GetCov(5) );
782 tParamDst.SetCov( 6, tParamSrc.GetCov(6) );
783 tParamDst.SetCov( 7, tParamSrc.GetCov(7) );
784 tParamDst.SetCov( 8, tParamSrc.GetCov(8) );
785 tParamDst.SetCov( 9, tParamSrc.GetCov(9) );
786 tParamDst.SetCov( 10, tParamSrc.GetCov(10) );
787 tParamDst.SetCov( 11, tParamSrc.GetCov(11) );
788 tParamDst.SetCov( 12, tParamSrc.GetCov(12) );
789 tParamDst.SetCov( 13, tParamSrc.GetCov(13) );
790 tParamDst.SetCov( 14, tParamSrc.GetCov(14) );
791 tParamDst.SetX( tParamSrc.GetX() );
792 tParamDst.SetY( tParamSrc.GetY() );
793 tParamDst.SetZ( tParamSrc.GetZ() );
796 GPUd() int AliHLTTPCCATrackletConstructor::FetchTracklet(AliHLTTPCCATracker &tracker, AliHLTTPCCASharedMemory &sMem, int Reverse, int RowBlock, int &mustInit)
798 //Fetch a new trackled to be processed by this thread
800 int nextTracketlFirstRun = sMem.fNextTrackletFirstRun;
801 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
803 sMem.fNTracklets = *tracker.NTracklets();
804 if (sMem.fNextTrackletFirstRun)
806 #ifdef HLTCA_GPU_SCHED_FIXED_START
807 const int iSlice = tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
808 const int nSliceBlockOffset = HLTCA_GPU_BLOCK_COUNT * iSlice / tracker.GPUParametersConst()->fGPUnSlices;
809 const uint2 &nTracklet = tracker.BlockStartingTracklet()[blockIdx.x - nSliceBlockOffset];
811 sMem.fNextTrackletCount = nTracklet.y;
812 if (sMem.fNextTrackletCount == 0)
814 sMem.fNextTrackletFirstRun = 0;
818 if (tracker.TrackletStartHits()[nTracklet.x].RowIndex() / HLTCA_GPU_SCHED_ROW_STEP != RowBlock)
820 sMem.fNextTrackletCount = 0;
824 sMem.fNextTrackletFirst = nTracklet.x;
825 sMem.fNextTrackletNoDummy = 1;
832 const int nFetchTracks = CAMath::Max(CAMath::Min((*tracker.RowBlockPos(Reverse, RowBlock)).x - (*tracker.RowBlockPos(Reverse, RowBlock)).y, HLTCA_GPU_THREAD_COUNT - TRACKLET_CONSTRUCTOR_NMEMTHREDS), 0);
833 sMem.fNextTrackletCount = nFetchTracks;
834 const int nUseTrack = nFetchTracks ? CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).y, nFetchTracks) : 0;
835 sMem.fNextTrackletFirst = nUseTrack;
837 const int nFillTracks = CAMath::Min(nFetchTracks, nUseTrack + nFetchTracks - (*((volatile int2*) (tracker.RowBlockPos(Reverse, RowBlock)))).x);
840 const int nStartFillTrack = CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).x, nFillTracks);
841 if (nFillTracks + nStartFillTrack >= HLTCA_GPU_MAX_TRACKLETS)
843 tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_ROWBLOCK_TRACKLET_OVERFLOW;
845 for (int i = 0;i < nFillTracks;i++)
847 tracker.RowBlockTracklets(Reverse, RowBlock)[(nStartFillTrack + i) % HLTCA_GPU_MAX_TRACKLETS] = -3; //Dummy filling track
850 sMem.fNextTrackletNoDummy = 0;
855 if (sMem.fNextTrackletCount == 0)
857 return(-2); //No more track in this RowBlock
859 #if HLTCA_GPU_TRACKLET_CONSTRUCTOR_NMEMTHREDS > 0
860 else if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS)
865 else if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS >= sMem.fNextTrackletCount)
867 return(-1); //No track in this RowBlock for this thread
869 else if (nextTracketlFirstRun)
871 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS) sMem.fNextTrackletFirstRun = 0;
873 return(sMem.fNextTrackletFirst + threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS);
877 const int nTrackPos = sMem.fNextTrackletFirst + threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS;
878 mustInit = (nTrackPos < tracker.RowBlockPos(Reverse, RowBlock)->w);
879 volatile int* const ptrTracklet = &tracker.RowBlockTracklets(Reverse, RowBlock)[nTrackPos % HLTCA_GPU_MAX_TRACKLETS];
882 while ((nTracklet = *ptrTracklet) == -1)
884 for (int i = 0;i < 10000;i++)
885 sMem.fNextTrackletStupidDummy++;
889 tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_SCHEDULE_COLLISION;
897 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewGPU(AliHLTTPCCATracker *pTracker)
899 //Main Tracklet construction function that calls the scheduled (FetchTracklet) and then Processes the tracklet (mainly UpdataTracklet) and at the end stores the tracklet.
900 //Can also dispatch a tracklet to be rescheduled
901 #ifdef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
902 pTracker[0].BlockStartingTracklet()[0].x = HLTCA_GPU_EMULATION_SINGLE_TRACKLET;
903 pTracker[0].BlockStartingTracklet()[0].y = 1;
904 for (int i = 1;i < HLTCA_GPU_BLOCK_COUNT;i++)
906 pTracker[0].BlockStartingTracklet()[i].x = pTracker[0].BlockStartingTracklet()[i].y = 0;
910 GPUshared() AliHLTTPCCASharedMemory sMem;
912 #ifdef HLTCA_GPU_SCHED_FIXED_START
913 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
915 sMem.fNextTrackletFirstRun = 1;
920 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
921 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
928 for (int iReverse = 0;iReverse < 2;iReverse++)
930 for (int iRowBlock = 0;iRowBlock < HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1;iRowBlock++)
932 #ifdef HLTCA_GPU_SCHED_FIXED_SLICE
933 int iSlice = tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
935 for (int iSlice = 0;iSlice < pTracker[0].GPUParametersConst()->fGPUnSlices;iSlice++)
938 AliHLTTPCCATracker &tracker = pTracker[iSlice];
939 if (sMem.fNextTrackletFirstRun && iSlice != tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT)
943 /*if (!sMem.fNextTrackletFirstRun && tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->x <= tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->y)
947 int sharedRowsInitialized = 0;
951 while ((iTracklet = FetchTracklet(tracker, sMem, iReverse, iRowBlock, mustInit)) != -2)
953 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
954 CAMath::AtomicMax(&sMem.fMaxSync, threadSync);
956 threadSync = CAMath::Min(sMem.fMaxSync, 100000000 / blockDim.x / gridDim.x);
958 #ifndef HLTCA_GPU_PREFETCHDATA
959 if (!sharedRowsInitialized)
961 #ifdef HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY
964 for (int i = CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP) * sizeof(AliHLTTPCCARow) / sizeof(int) + threadIdx.x;i < (HLTCA_ROW_COUNT - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP) * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
966 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
971 for (int i = CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP) * sizeof(AliHLTTPCCARow) / sizeof(int) + threadIdx.x;i < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT) * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
973 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
977 for (int i = threadIdx.x;i < HLTCA_ROW_COUNT * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
979 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
982 sharedRowsInitialized = 1;
985 #ifdef HLTCA_GPU_RESCHED
986 short2 storeToRowBlock;
987 int storePosition = 0;
988 if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
990 const int nReverse = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
991 const int nRowBlock = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
992 sMem.fTrackletStoreCount[nReverse][nRowBlock] = 0;
996 AliHLTTPCCATrackParam tParam;
997 AliHLTTPCCAThreadMemory rMem;
999 rMem.fCurrentData = 0;
1001 #ifdef HLTCA_GPU_EMULATION_DEBUG_TRACKLET
1002 if (iTracklet == HLTCA_GPU_EMULATION_DEBUG_TRACKLET)
1004 tracker.GPUParameters()->fGPUError = 1;
1007 AliHLTTPCCAThreadMemory &rMemGlobal = tracker.GPUTrackletTemp()[iTracklet].fThreadMem;
1008 AliHLTTPCCATrackParam &tParamGlobal = tracker.GPUTrackletTemp()[iTracklet].fParam;
1011 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1013 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
1014 rMem.fCurrIH = id.HitIndex();
1019 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
1021 else if (iTracklet >= 0)
1023 CopyTrackletTempData( rMemGlobal, rMem, tParamGlobal, tParam );
1025 #ifdef HLTCA_GPU_PREFETCHDATA
1026 else if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS)
1028 ReadData(threadIdx.x, sMem, rMem, tracker, iReverse ? (HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP) : (CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP)));
1031 rMem.fItr = iTracklet;
1032 rMem.fGo = (iTracklet >= 0);
1034 #ifdef HLTCA_GPU_RESCHED
1035 storeToRowBlock.x = iRowBlock + 1;
1036 storeToRowBlock.y = iReverse;
1037 #ifdef HLTCA_GPU_PREFETCHDATA
1038 rMem.fCurrentData ^= 1;
1043 for (int j = HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP;j >= CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP);j--)
1045 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1046 if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && !(j >= rMem.fEndRow || ( j >= rMem.fStartRow && j - rMem.fStartRow % 2 == 0)))
1047 pTracker[0].fStageAtSync[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
1049 #ifdef HLTCA_GPU_PREFETCHDATA
1050 if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS && j > CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP))
1052 ReadData(threadIdx.x, sMem, rMem, tracker, j - 1);
1058 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1059 if (rMem.fNMissed > kMaxRowGap)
1062 #ifndef HLTCA_GPU_PREFETCHDATA
1067 #ifdef HLTCA_GPU_PREFETCHDATA
1069 rMem.fCurrentData ^= 1;
1073 if (iTracklet >= 0 && (!rMem.fGo || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
1075 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1080 for (int j = CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP);j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT);j++)
1082 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1083 if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && j >= rMem.fStartRow && (rMem.fStage > 0 || rMem.fCurrIH >= 0 || (j - rMem.fStartRow) % 2 == 0 ))
1084 pTracker[0].fStageAtSync[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
1086 #ifdef HLTCA_GPU_PREFETCHDATA
1087 if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS && j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT) - 1)
1089 ReadData(threadIdx.x, sMem, rMem, tracker, j + 1);
1095 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1096 #ifndef HLTCA_GPU_PREFETCHDATA
1097 //if (rMem.fNMissed > kMaxRowGap || rMem.fGo == 0) break; //DR!!! CUDA Crashes with this enabled
1100 #ifdef HLTCA_GPU_PREFETCHDATA
1102 rMem.fCurrentData ^= 1;
1105 if (rMem.fGo && (rMem.fNMissed > kMaxRowGap || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
1107 #if defined(HLTCA_GPU_PREFETCHDATA) | !defined(HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY)
1108 if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) )
1110 if ( !tParam.TransportToX( sMem.fRows[ rMem.fEndRow ].X(), tracker.Param().ConstBz(), .999 ) )
1117 storeToRowBlock.x = (HLTCA_ROW_COUNT - rMem.fEndRow) / HLTCA_GPU_SCHED_ROW_STEP;
1118 storeToRowBlock.y = 1;
1124 if (iTracklet >= 0 && !rMem.fGo)
1126 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1130 if (rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
1132 CopyTrackletTempData( rMem, rMemGlobal, tParam, tParamGlobal );
1133 storePosition = CAMath::AtomicAdd(&sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x], 1);
1138 for (int j = rMem.fStartRow;j < HLTCA_ROW_COUNT;j++)
1140 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1141 if (!rMem.fGo) break;
1148 if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) ) rMem.fGo = 0;
1151 for (int j = rMem.fEndRow;j >= 0;j--)
1153 if (!rMem.fGo) break;
1154 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1157 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1160 #ifdef HLTCA_GPU_RESCHED
1162 if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
1164 const int nReverse = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
1165 const int nRowBlock = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
1166 if (sMem.fTrackletStoreCount[nReverse][nRowBlock])
1168 sMem.fTrackletStoreCount[nReverse][nRowBlock] = CAMath::AtomicAdd(&tracker.RowBlockPos(nReverse, nRowBlock)->x, sMem.fTrackletStoreCount[nReverse][nRowBlock]);
1172 if (iTracklet >= 0 && rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
1174 tracker.RowBlockTracklets(storeToRowBlock.y, storeToRowBlock.x)[sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x] + storePosition] = iTracklet;
1182 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1187 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(int iTracklet, AliHLTTPCCATracker &tracker)
1189 //Initialize Row Blocks
1191 #ifndef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
1192 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1193 #ifdef HLTCA_GPU_SCHED_FIXED_START
1194 const int firstDynamicTracklet = tracker.GPUParameters()->fScheduleFirstDynamicTracklet;
1195 if (iTracklet >= firstDynamicTracklet)
1198 const int firstTrackletInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[CAMath::Max(id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP * HLTCA_GPU_SCHED_ROW_STEP, 1)].z);
1199 if (iTracklet == firstTrackletInRowBlock)
1201 const int firstRowInNextBlock = (id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP + 1) * HLTCA_GPU_SCHED_ROW_STEP;
1202 int trackletsInRowBlock;
1203 if (firstRowInNextBlock >= HLTCA_ROW_COUNT - 3)
1204 trackletsInRowBlock = *tracker.NTracklets() - firstTrackletInRowBlock;
1206 trackletsInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[firstRowInNextBlock].z) - firstTrackletInRowBlock;
1208 tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->x = trackletsInRowBlock;
1209 tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->w = trackletsInRowBlock;
1211 tracker.RowBlockTracklets(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)[iTracklet - firstTrackletInRowBlock] = iTracklet;
1216 GPUg() void AliHLTTPCCATrackletConstructorInit(int iSlice)
1218 //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit
1219 AliHLTTPCCATracker &tracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker )[iSlice];
1220 int i = blockIdx.x * blockDim.x + threadIdx.x;
1221 if (i >= *tracker.NTracklets()) return;
1222 AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(i, tracker);
1225 GPUg() void AliHLTTPCCATrackletConstructorNewGPU()
1227 //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewGPU
1228 AliHLTTPCCATracker *pTracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker );
1229 AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewGPU(pTracker);
1233 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewCPU(AliHLTTPCCATracker &tracker)
1235 //Tracklet constructor simple CPU Function that does not neew a scheduler
1236 GPUshared() AliHLTTPCCASharedMemory sMem;
1237 sMem.fNTracklets = *tracker.NTracklets();
1238 for (int iTracklet = 0;iTracklet < *tracker.NTracklets();iTracklet++)
1240 AliHLTTPCCATrackParam tParam;
1241 AliHLTTPCCAThreadMemory rMem;
1243 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1245 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
1246 rMem.fCurrIH = id.HitIndex();
1251 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
1253 rMem.fItr = iTracklet;
1256 for (int j = rMem.fStartRow;j < tracker.Param().NRows();j++)
1258 UpdateTracklet(1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
1259 if (!rMem.fGo) break;
1266 if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) ) rMem.fGo = 0;
1269 for (int j = rMem.fEndRow;j >= 0;j--)
1271 if (!rMem.fGo) break;
1272 UpdateTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
1275 StoreTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam );