1 // @(#) $Id: AliHLTTPCCATrackletConstructor.cxx 27042 2008-07-02 12:06:02Z richterm $
2 // **************************************************************************
3 // This file is property of and copyright by the ALICE HLT Project *
4 // ALICE Experiment at CERN, All rights reserved. *
6 // Primary Authors: Sergey Gorbunov <sergey.gorbunov@kip.uni-heidelberg.de> *
7 // Ivan Kisel <kisel@kip.uni-heidelberg.de> *
8 // for The ALICE HLT Project. *
10 // Permission to use, copy, modify and distribute this software and its *
11 // documentation strictly for non-commercial purposes is hereby granted *
12 // without fee, provided that the above copyright notice appears in all *
13 // copies and that both the copyright notice and this permission notice *
14 // appear in the supporting documentation. The authors make no claims *
15 // about the suitability of this software for any purpose. It is *
16 // provided "as is" without express or implied warranty. *
18 //***************************************************************************
20 #include "AliHLTTPCCATracker.h"
21 #include "AliHLTTPCCATrackParam.h"
22 #include "AliHLTTPCCATrackParam.h"
23 #include "AliHLTTPCCAGrid.h"
24 #include "AliHLTTPCCAMath.h"
25 #include "AliHLTTPCCADef.h"
26 #include "AliHLTTPCCATracklet.h"
27 #include "AliHLTTPCCATrackletConstructor.h"
28 #include "MemoryAssignmentHelpers.h"
30 //#include "AliHLTTPCCAPerformance.h"
36 #include "AliHLTTPCCADisplay.h"
41 GPUd() void AliHLTTPCCATrackletConstructor::InitTracklet( AliHLTTPCCATrackParam &tParam )
43 //Initialize Tracklet Parameters using default values
47 GPUd() void AliHLTTPCCATrackletConstructor::ReadData
48 #ifndef HLTCA_GPU_PREFETCHDATA
49 ( int /*iThread*/, AliHLTTPCCASharedMemory& /*s*/, AliHLTTPCCAThreadMemory& /*r*/, AliHLTTPCCATracker& /*tracker*/, int /*iRow*/ )
51 //Prefetch Data to shared memory
53 ( int iThread, AliHLTTPCCASharedMemory& s, AliHLTTPCCAThreadMemory& r, AliHLTTPCCATracker& tracker, int iRow )
55 // reconstruction of tracklets, read data step
56 const AliHLTTPCCARow &row = tracker.Row( iRow );
57 //bool jr = !r.fCurrentData;
59 // copy hits, grid content and links
61 // FIXME: inefficient copy
62 //const int numberOfHitsAligned = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits());
65 #ifdef HLTCA_GPU_REORDERHITDATA
66 ushort2 *sMem1 = reinterpret_cast<ushort2 *>( s.fData[jr] );
67 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
68 sMem1[i].x = tracker.HitDataY( row, i );
69 sMem1[i].y = tracker.HitDataZ( row, i );
72 ushort_v *sMem1 = reinterpret_cast<ushort_v *>( s.fData[jr] );
73 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
74 sMem1[i] = tracker.HitDataY( row, i );
77 ushort_v *sMem1a = reinterpret_cast<ushort_v *>( s.fData[jr] ) + numberOfHitsAligned;
78 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
79 sMem1a[i] = tracker.HitDataZ( row, i );
81 #endif //HLTCA_GPU_REORDERHITDATA
83 short *sMem2 = reinterpret_cast<short *>( s.fData[jr] ) + 2 * numberOfHitsAligned;
84 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
85 sMem2[i] = tracker.HitLinkUpData( row, i );
88 unsigned short *sMem3 = reinterpret_cast<unsigned short *>( s.fData[jr] ) + 3 * numberOfHitsAligned;
89 const int n = row.FullSize(); // + grid content size
90 for ( int i = iThread; i < n; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
91 sMem3[i] = tracker.FirstHitInBin( row, i );
94 /*for (int k = 0;k < 2;k++)
96 HLTCA_GPU_ROWCOPY* sharedMem;
97 const HLTCA_GPU_ROWCOPY* sourceMem;
102 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitDataY(row) );
103 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
104 copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
107 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitDataZ(row) );
108 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
109 copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
112 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitLinkUpData(row) );
113 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
114 copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
117 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.FirstHitInBin(row) );
118 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
119 copyCount = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.FullSize()) * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
122 for (int i = iThread;i < copyCount;i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
124 sharedMem[i] = sourceMem[i];
128 for (unsigned int i = iThread;i < row.FullSize() * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
130 reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[!r.fCurrentData] ))[i] = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.FirstHitInBin(row) )[i];
133 const HLTCA_GPU_ROWCOPY* const sourceMem = (const HLTCA_GPU_ROWCOPY *) &row;
134 HLTCA_GPU_ROWCOPY* const sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> ( &s.fRow[!r.fCurrentData] );
135 for (unsigned int i = iThread;i < sizeof(AliHLTTPCCARow) / sizeof(HLTCA_GPU_ROWCOPY);i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
137 sharedMem[i] = sourceMem[i];
139 #endif //!HLTCA_GPU_PREFETCHDATA
143 GPUd() void AliHLTTPCCATrackletConstructor::StoreTracklet
144 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
145 AliHLTTPCCASharedMemory
150 #endif //!HLTCA_GPUCODE
151 , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam )
153 // reconstruction of tracklets, tracklet store step
155 //AliHLTTPCCAPerformance::Instance().HNHitsPerTrackCand()->Fill(r.fNHits);
159 //std::cout<<"tracklet to store: "<<r.fItr<<", nhits = "<<r.fNHits<<std::endl;
162 if ( r.fNHits < TRACKLET_SELECTOR_MIN_HITS ) {
168 if ( 1. / .5 < CAMath::Abs( tParam.QPt() ) ) { //SG!!!
176 const float *c = tParam.Cov();
177 for ( int i = 0; i < 15; i++ ) ok = ok && CAMath::Finite( c[i] );
178 for ( int i = 0; i < 5; i++ ) ok = ok && CAMath::Finite( tParam.Par()[i] );
179 ok = ok && ( tParam.X() > 50 );
181 if ( c[0] <= 0 || c[2] <= 0 || c[5] <= 0 || c[9] <= 0 || c[14] <= 0 ) ok = 0;
190 if ( !SAVE() ) return;
192 AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
194 tracklet.SetNHits( r.fNHits );
196 if ( r.fNHits > 0 ) {
199 std::cout << "store tracklet " << r.fItr << ", nhits = " << r.fNHits << std::endl;
200 if ( AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 1. ) ) {
201 AliHLTTPCCADisplay::Instance().Ask();
205 if ( CAMath::Abs( tParam.Par()[4] ) < 1.e-4 ) tParam.SetPar( 4, 1.e-4 );
206 if (r.fStartRow < r.fFirstRow) r.fFirstRow = r.fStartRow;
207 tracklet.SetFirstRow( r.fFirstRow );
208 tracklet.SetLastRow( r.fLastRow );
210 tracklet.SetParam( tParam.fParam );
212 tracklet.SetParam( tParam.GetParam() );
213 #endif //HLTCA_GPUCODE
214 int w = ( r.fNHits << 16 ) + r.fItr;
215 for ( int iRow = r.fFirstRow; iRow <= r.fLastRow; iRow++ ) {
216 #ifdef EXTERN_ROW_HITS
217 int ih = tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr];
219 int ih = tracklet.RowHit( iRow );
220 #endif //EXTERN_ROW_HITS
222 #if defined(HLTCA_GPUCODE) & !defined(HLTCA_GPU_PREFETCHDATA) & !defined(HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY)
223 tracker.MaximizeHitWeight( s.fRows[ iRow ], ih, w );
225 tracker.MaximizeHitWeight( tracker.Row( iRow ), ih, w );
226 #endif //HLTCA_GPUCODE & !HLTCA_GPU_PREFETCHDATA & !HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY
233 GPUd() void AliHLTTPCCATrackletConstructor::UpdateTracklet
234 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
235 AliHLTTPCCASharedMemory
240 #endif //HLTCA_GPUCODE
241 , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam, int iRow )
243 // reconstruction of tracklets, tracklets update step
245 //std::cout<<"Update tracklet: "<<r.fItr<<" "<<r.fGo<<" "<<r.fStage<<" "<<iRow<<std::endl;
246 bool drawSearch = 0;//r.fItr==2;
247 bool drawFit = 0;//r.fItr==2;
248 bool drawFitted = drawFit ;//|| 1;//r.fItr==16;
250 if ( !r.fGo ) return;
252 #ifndef EXTERN_ROW_HITS
253 AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
254 #endif //EXTERN_ROW_HITS
256 #ifdef HLTCA_GPU_PREFETCHDATA
257 const AliHLTTPCCARow &row = s.fRow[r.fCurrentData];
258 #elif defined(HLTCA_GPUCODE)
259 const AliHLTTPCCARow &row = s.fRows[iRow];
261 const AliHLTTPCCARow &row = tracker.Row( iRow );
262 #endif //HLTCA_GPU_PREFETCHDATA
264 float y0 = row.Grid().YMin();
265 float stepY = row.HstepY();
266 float z0 = row.Grid().ZMin();
267 float stepZ = row.HstepZ();
268 float stepYi = row.HstepYi();
269 float stepZi = row.HstepZi();
271 if ( r.fStage == 0 ) { // fitting part
274 if ( iRow < r.fStartRow || r.fCurrIH < 0 ) break;
276 if ( ( iRow - r.fStartRow ) % 2 != 0 )
278 #ifndef EXTERN_ROW_HITS
279 tracklet.SetRowHit(iRow, -1);
281 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
282 #endif //EXTERN_ROW_HITS
283 break; // SG!!! - jump over the row
286 //#ifdef HLTCA_GPU_PREFETCHDATA
287 // uint4 *tmpint4 = s.fData[r.fCurrentData];
290 //#ifdef HLTCA_GPU_REORDERHITDATA
291 // hh = reinterpret_cast<ushort2*>( tmpint4 )[r.fCurrIH];
293 //#ifdef HLTCA_GPU_PREFETCHDATA
294 // hh.x = reinterpret_cast<ushort_v*>( tmpint4 )[r.fCurrIH];
295 // hh.y = reinterpret_cast<ushort_v*>( tmpint4 )[NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits()) + r.fCurrIH];
297 #if defined(HLTCA_GPU_TEXTURE_FETCH)
298 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + r.fCurrIH);
300 hh = tracker.HitData(row)[r.fCurrIH];
301 #endif //HLTCA_GPU_TEXTURE_FETCH
305 int oldIH = r.fCurrIH;
306 //#ifdef HLTCA_GPU_PREFETCHDATA
307 // r.fCurrIH = reinterpret_cast<short*>( tmpint4 )[2 * NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits()) + r.fCurrIH]; // read from linkup data
309 #if defined(HLTCA_GPU_TEXTURE_FETCH)
310 r.fCurrIH = tex1Dfetch(gAliTexRefs, ((char*) tracker.Data().HitLinkUpData(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + r.fCurrIH);
312 r.fCurrIH = tracker.HitLinkUpData(row)[r.fCurrIH]; // read from linkup data
313 #endif //HLTCA_GPU_TEXTURE_FETCH
317 float y = y0 + hh.x * stepY;
318 float z = z0 + hh.y * stepZ;
320 if ( drawFit ) std::cout << " fit tracklet: new hit " << oldIH << ", xyz=" << x << " " << y << " " << z << std::endl;
323 if ( iRow == r.fStartRow ) {
330 if ( drawFit ) std::cout << " fit tracklet " << r.fItr << ", row " << iRow << " first row" << std::endl;
335 float dx = x - tParam.X();
336 float dy = y - r.fLastY;//tParam.Y();
337 float dz = z - r.fLastZ;//tParam.Z();
341 float ri = 1. / CAMath::Sqrt( dx * dx + dy * dy );
342 if ( iRow == r.fStartRow + 2 ) { //SG!!! important - thanks to Matthias
343 tParam.SetSinPhi( dy*ri );
344 tParam.SetSignCosPhi( dx );
345 tParam.SetDzDs( dz*ri );
346 //std::cout << "Init. errors... " << r.fItr << std::endl;
347 tracker.GetErrors2( iRow, tParam, err2Y, err2Z );
348 //std::cout << "Init. errors = " << err2Y << " " << err2Z << std::endl;
349 tParam.SetCov( 0, err2Y );
350 tParam.SetCov( 2, err2Z );
354 std::cout << " fit tracklet " << r.fItr << ", row " << iRow << " transporting.." << std::endl;
355 std::cout << " params before transport=" << std::endl;
359 float sinPhi, cosPhi;
360 if ( r.fNHits >= 10 && CAMath::Abs( tParam.SinPhi() ) < .99 ) {
361 sinPhi = tParam.SinPhi();
362 cosPhi = CAMath::Sqrt( 1 - sinPhi * sinPhi );
368 if ( drawFit ) std::cout << "sinPhi0 = " << sinPhi << ", cosPhi0 = " << cosPhi << std::endl;
370 if ( !tParam.TransportToX( x, sinPhi, cosPhi, tracker.Param().ConstBz(), -1 ) ) {
372 if ( drawFit ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not transport!!" << std::endl;
374 #ifndef EXTERN_ROW_HITS
375 tracklet.SetRowHit( iRow, -1 );
377 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
378 #endif //EXTERN_ROW_HITS
381 //std::cout<<"mark1 "<<r.fItr<<std::endl;
383 tracker.GetErrors2( iRow, tParam.GetZ(), sinPhi, cosPhi, tParam.GetDzDs(), err2Y, err2Z );
384 //std::cout<<"mark2"<<std::endl;
388 std::cout << " params after transport=" << std::endl;
390 std::cout << "fit tracklet before filter: " << r.fItr << ", row " << iRow << " errs=" << err2Y << " " << err2Z << std::endl;
391 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 2., 1 );
392 AliHLTTPCCADisplay::Instance().Ask();
395 if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
397 if ( drawFit ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not filter!!" << std::endl;
399 #ifndef EXTERN_ROW_HITS
400 tracklet.SetRowHit( iRow, -1 );
402 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
403 #endif //EXTERN_ROW_HITS
407 #ifndef EXTERN_ROW_HITS
408 tracklet.SetRowHit( iRow, oldIH );
410 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = oldIH;
411 #endif //!EXTERN_ROW_HITS
414 std::cout << "fit tracklet after filter " << r.fItr << ", row " << iRow << std::endl;
416 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kGreen, 2. );
417 AliHLTTPCCADisplay::Instance().Ask();
426 if ( r.fCurrIH < 0 ) {
428 if ( drawFitted ) std::cout << "fitted tracklet " << r.fItr << ", nhits=" << r.fNHits << std::endl;
431 //AliHLTTPCCAPerformance::Instance().HNHitsPerSeed()->Fill(r.fNHits);
432 if ( r.fNHits < 3 ) { r.fNHits = 0; r.fGo = 0;}//SG!!!
433 if ( CAMath::Abs( tParam.SinPhi() ) > .999 ) {
435 if ( drawFitted ) std::cout << " fitted tracklet error: sinPhi=" << tParam.SinPhi() << std::endl;
437 r.fNHits = 0; r.fGo = 0;
439 //tParam.SetCosPhi( CAMath::Sqrt(1-tParam.SinPhi()*tParam.SinPhi()) );
443 std::cout << "fitted tracklet " << r.fItr << " miss=" << r.fNMissed << " go=" << r.fGo << std::endl;
445 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue );
446 AliHLTTPCCADisplay::Instance().Ask();
450 } else { // forward/backward searching part
454 std::cout << "search tracklet " << r.fItr << " row " << iRow << " miss=" << r.fNMissed << " go=" << r.fGo << " stage=" << r.fStage << std::endl;
458 if ( r.fStage == 2 && ( ( iRow >= r.fEndRow ) ||
459 ( iRow >= r.fStartRow && ( iRow - r.fStartRow ) % 2 == 0 )
461 if ( r.fNMissed > kMaxRowGap ) {
471 std::cout << "tracklet " << r.fItr << " before transport to row " << iRow << " : " << std::endl;
475 if ( !tParam.TransportToX( x, tParam.SinPhi(), tParam.GetCosPhi(), tracker.Param().ConstBz(), .99 ) ) {
477 if ( drawSearch ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not transport!!" << std::endl;
479 #ifndef EXTERN_ROW_HITS
480 tracklet.SetRowHit(iRow, -1);
482 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
483 #endif //!EXTERN_ROW_HITS
486 if ( row.NHits() < 1 ) {
488 #ifndef EXTERN_ROW_HITS
489 tracklet.SetRowHit(iRow, -1);
491 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
492 #endif //!EXTERN_ROW_HITS
497 std::cout << "tracklet " << r.fItr << " after transport to row " << iRow << " : " << std::endl;
499 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 2., 1 );
500 AliHLTTPCCADisplay::Instance().Ask();
503 #ifdef HLTCA_GPU_PREFETCHDATA
504 uint4 *tmpint4 = s.fData[r.fCurrentData];
505 #endif //HLTCA_GPU_PREFETCHDATA
507 //#ifdef HLTCA_GPU_REORDERHITDATA
508 // const ushort2 *hits = reinterpret_cast<ushort2*>( tmpint4 );
510 //#ifdef HLTCA_GPU_PREFETCHDATA
511 // const ushort_v *hitsx = reinterpret_cast<ushort_v*>( tmpint4 );
512 // const ushort_v *hitsy = reinterpret_cast<ushort_v*>( tmpint4 ) + NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits());
514 #ifndef HLTCA_GPU_TEXTURE_FETCH
515 const ushort2 *hits = tracker.HitData(row);
516 #endif //!HLTCA_GPU_TEXTURE_FETCH
520 float fY = tParam.GetY();
521 float fZ = tParam.GetZ();
524 { // search for the closest hit
525 const int fIndYmin = row.Grid().GetBinBounded( fY - 1.f, fZ - 1.f );
526 assert( fIndYmin >= 0 );
529 int fY0 = ( int ) ( ( fY - y0 ) * stepYi );
530 int fZ0 = ( int ) ( ( fZ - z0 ) * stepZi );
531 int ds0 = ( ( ( int )1 ) << 30 );
534 unsigned int fHitYfst = 1, fHitYlst = 0, fHitYfst1 = 1, fHitYlst1 = 0;
538 std::cout << " tracklet " << r.fItr << ", row " << iRow << ": grid N=" << row.Grid().N() << std::endl;
539 std::cout << " tracklet " << r.fItr << ", row " << iRow << ": minbin=" << fIndYmin << std::endl;
543 int nY = row.Grid().Ny();
545 //#ifdef HLTCA_GPU_PREFETCHDATA
546 // const unsigned short *sGridP = ( reinterpret_cast<unsigned short*>( tmpint4 ) );
548 #ifndef HLTCA_GPU_TEXTURE_FETCH
549 const unsigned short *sGridP = tracker.FirstHitInBin(row);
550 #endif //!HLTCA_GPU_TEXTURE_FETCH
553 #ifdef HLTCA_GPU_TEXTURE_FETCH
554 fHitYfst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin);
555 fHitYlst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+2);
556 fHitYfst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY);
557 fHitYlst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY+2);
559 fHitYfst = sGridP[fIndYmin];
560 fHitYlst = sGridP[fIndYmin+2];
561 fHitYfst1 = sGridP[fIndYmin+nY];
562 fHitYlst1 = sGridP[fIndYmin+nY+2];
563 #endif //HLTCA_GPU_TEXTURE_FETCH
564 assert( (signed) fHitYfst <= row.NHits() );
565 assert( (signed) fHitYlst <= row.NHits() );
566 assert( (signed) fHitYfst1 <= row.NHits() );
567 assert( (signed) fHitYlst1 <= row.NHits() );
570 std::cout << " Grid, row " << iRow << ": nHits=" << row.NHits() << ", grid n=" << row.Grid().N() << ", c[n]=" << sGridP[row.Grid().N()] << std::endl;
571 std::cout << "hit steps = " << stepY << " " << stepZ << std::endl;
572 std::cout << " Grid bins:" << std::endl;
573 for ( unsigned int i = 0; i < row.Grid().N(); i++ ) {
574 std::cout << " bin " << i << ": ";
575 for ( int j = sGridP[i]; j < sGridP[i+1]; j++ ) {
576 ushort2 hh = hits[j];
577 float y = y0 + hh.x * stepY;
578 float z = z0 + hh.y * stepZ;
579 std::cout << "[" << j << "|" << y << "," << z << "] ";
581 std::cout << std::endl;
586 if ( sGridP[row.Grid().N()] != row.NHits() ) {
587 std::cout << " grid, row " << iRow << ": nHits=" << row.NHits() << ", grid n=" << row.Grid().N() << ", c[n]=" << sGridP[row.Grid().N()] << std::endl;
594 std::cout << " tracklet " << r.fItr << ", row " << iRow << ", yz= " << fY << "," << fZ << ": search hits=" << fHitYfst << " " << fHitYlst << " / " << fHitYfst1 << " " << fHitYlst1 << std::endl;
595 std::cout << " hit search :" << std::endl;
598 for ( unsigned int fIh = fHitYfst; fIh < fHitYlst; fIh++ ) {
599 assert( (signed) fIh < row.NHits() );
601 #if defined(HLTCA_GPU_TEXTURE_FETCH)
602 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
605 #endif //HLTCA_GPU_TEXTURE_FETCH
606 int ddy = ( int )( hh.x ) - fY0;
607 int ddz = ( int )( hh.y ) - fZ0;
608 int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
611 std::cout << fIh << ": hityz= " << hh.x << " " << hh.y << "(" << hh.x*stepY << " " << hh.y*stepZ << "), trackyz=" << fY0 << " " << fZ0 << "(" << fY0*stepY << " " << fZ0*stepZ << "), dy,dz,ds= " << ddy << " " << ddz << " " << dds << "(" << ddy*stepY << " " << ddz*stepZ << std::endl;
620 for ( unsigned int fIh = fHitYfst1; fIh < fHitYlst1; fIh++ ) {
622 #if defined(HLTCA_GPU_TEXTURE_FETCH)
623 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
626 #endif //HLTCA_GPU_TEXTURE_FETCH
627 int ddy = ( int )( hh.x ) - fY0;
628 int ddz = ( int )( hh.y ) - fZ0;
629 int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
632 std::cout << fIh << ": hityz= " << hh.x << " " << hh.y << "(" << hh.x*stepY << " " << hh.y*stepZ << "), trackyz=" << fY0 << " " << fZ0 << "(" << fY0*stepY << " " << fZ0*stepZ << "), dy,dz,ds= " << ddy << " " << ddz << " " << dds << "(" << ddy*stepY << " " << ddz*stepZ << std::endl;
640 }// end of search for the closest hit
644 #ifndef EXTERN_ROW_HITS
645 tracklet.SetRowHit(iRow, -1);
647 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
648 #endif //!EXTERN_ROW_HITS
653 std::cout << "hit search " << r.fItr << ", row " << iRow << " hit " << best << " found" << std::endl;
654 AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best, kRed, 1. );
655 AliHLTTPCCADisplay::Instance().Ask();
656 AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best, kWhite, 1 );
657 AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best );
662 #if defined(HLTCA_GPU_TEXTURE_FETCH)
663 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + best);
666 #endif //HLTCA_GPU_TEXTURE_FETCH
668 //std::cout<<"mark 3, "<<r.fItr<<std::endl;
670 tracker.GetErrors2( iRow, *( ( AliHLTTPCCATrackParam* )&tParam ), err2Y, err2Z );
671 //std::cout<<"mark 4"<<std::endl;
673 float y = y0 + hh.x * stepY;
674 float z = z0 + hh.y * stepZ;
678 const float kFactor = tracker.Param().HitPickUpFactor() * tracker.Param().HitPickUpFactor() * 3.5 * 3.5;
679 float sy2 = kFactor * ( tParam.GetErr2Y() + err2Y );
680 float sz2 = kFactor * ( tParam.GetErr2Z() + err2Z );
681 if ( sy2 > 2. ) sy2 = 2.;
682 if ( sz2 > 2. ) sz2 = 2.;
686 std::cout << "dy,sy= " << dy << " " << CAMath::Sqrt( sy2 ) << ", dz,sz= " << dz << " " << CAMath::Sqrt( sz2 ) << std::endl;
687 std::cout << "dy,dz= " << dy << " " << dz << ", sy,sz= " << CAMath::Sqrt( sy2 ) << " " << CAMath::Sqrt( sz2 ) << ", sy,sz= " << CAMath::Sqrt( kFactor*( tParam.GetErr2Y() + err2Y ) ) << " " << CAMath::Sqrt( kFactor*( tParam.GetErr2Z() + err2Z ) ) << std::endl;
690 if ( CAMath::FMulRZ( dy, dy ) > sy2 || CAMath::FMulRZ( dz, dz ) > sz2 ) {
693 std::cout << "found hit is out of the chi2 window\n " << std::endl;
696 #ifndef EXTERN_ROW_HITS
697 tracklet.SetRowHit(iRow, -1);
699 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
700 #endif //!EXTERN_ROW_HITS
704 //if( SAVE() ) hitstore[ iRow ] = best;
705 //std::cout<<"hit search before filter: "<<r.fItr<<", row "<<iRow<<std::endl;
706 //AliHLTTPCCADisplay::Instance().DrawTracklet(tParam, hitstore, kBlue);
707 //AliHLTTPCCADisplay::Instance().Ask();
709 if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
712 std::cout << "tracklet " << r.fItr << " at row " << iRow << " : can not filter!!!! " << std::endl;
717 #ifndef EXTERN_ROW_HITS
718 tracklet.SetRowHit( iRow, best );
720 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = best;
721 #endif //!EXTERN_ROW_HITS
724 std::cout << "tracklet " << r.fItr << " after filter at row " << iRow << " : " << std::endl;
726 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kRed );
727 AliHLTTPCCADisplay::Instance().Ask();
732 if ( r.fStage == 1 ) r.fLastRow = iRow;
733 else r.fFirstRow = iRow;
739 GPUd() void AliHLTTPCCATrackletConstructor::CopyTrackletTempData( AliHLTTPCCAThreadMemory &rMemSrc, AliHLTTPCCAThreadMemory &rMemDst, AliHLTTPCCATrackParam &tParamSrc, AliHLTTPCCATrackParam &tParamDst)
741 //Copy Temporary Tracklet data from registers to global mem and vice versa
742 rMemDst.fStartRow = rMemSrc.fStartRow;
743 rMemDst.fEndRow = rMemSrc.fEndRow;
744 rMemDst.fFirstRow = rMemSrc.fFirstRow;
745 rMemDst.fLastRow = rMemSrc.fLastRow;
746 rMemDst.fCurrIH = rMemSrc.fCurrIH;
747 rMemDst.fGo = rMemSrc.fGo;
748 rMemDst.fStage = rMemSrc.fStage;
749 rMemDst.fNHits = rMemSrc.fNHits;
750 rMemDst.fNMissed = rMemSrc.fNMissed;
751 rMemDst.fLastY = rMemSrc.fLastY;
752 rMemDst.fLastZ = rMemSrc.fLastZ;
754 tParamDst.SetSinPhi( tParamSrc.GetSinPhi() );
755 tParamDst.SetDzDs( tParamSrc.GetDzDs() );
756 tParamDst.SetQPt( tParamSrc.GetQPt() );
757 tParamDst.SetSignCosPhi( tParamSrc.GetSignCosPhi() );
758 tParamDst.SetChi2( tParamSrc.GetChi2() );
759 tParamDst.SetNDF( tParamSrc.GetNDF() );
760 tParamDst.SetCov( 0, tParamSrc.GetCov(0) );
761 tParamDst.SetCov( 1, tParamSrc.GetCov(1) );
762 tParamDst.SetCov( 2, tParamSrc.GetCov(2) );
763 tParamDst.SetCov( 3, tParamSrc.GetCov(3) );
764 tParamDst.SetCov( 4, tParamSrc.GetCov(4) );
765 tParamDst.SetCov( 5, tParamSrc.GetCov(5) );
766 tParamDst.SetCov( 6, tParamSrc.GetCov(6) );
767 tParamDst.SetCov( 7, tParamSrc.GetCov(7) );
768 tParamDst.SetCov( 8, tParamSrc.GetCov(8) );
769 tParamDst.SetCov( 9, tParamSrc.GetCov(9) );
770 tParamDst.SetCov( 10, tParamSrc.GetCov(10) );
771 tParamDst.SetCov( 11, tParamSrc.GetCov(11) );
772 tParamDst.SetCov( 12, tParamSrc.GetCov(12) );
773 tParamDst.SetCov( 13, tParamSrc.GetCov(13) );
774 tParamDst.SetCov( 14, tParamSrc.GetCov(14) );
775 tParamDst.SetX( tParamSrc.GetX() );
776 tParamDst.SetY( tParamSrc.GetY() );
777 tParamDst.SetZ( tParamSrc.GetZ() );
780 GPUd() int AliHLTTPCCATrackletConstructor::FetchTracklet(AliHLTTPCCATracker &tracker, AliHLTTPCCASharedMemory &sMem, int Reverse, int RowBlock, int &mustInit)
782 //Fetch a new trackled to be processed by this thread
784 int nextTracketlFirstRun = sMem.fNextTrackletFirstRun;
785 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
787 sMem.fNTracklets = *tracker.NTracklets();
788 if (sMem.fNextTrackletFirstRun)
790 #ifdef HLTCA_GPU_SCHED_FIXED_START
791 const int iSlice = tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
792 const int nSliceBlockOffset = HLTCA_GPU_BLOCK_COUNT * iSlice / tracker.GPUParametersConst()->fGPUnSlices;
793 const uint2 &nTracklet = tracker.BlockStartingTracklet()[blockIdx.x - nSliceBlockOffset];
795 sMem.fNextTrackletCount = nTracklet.y;
796 if (sMem.fNextTrackletCount == 0)
798 sMem.fNextTrackletFirstRun = 0;
802 if (tracker.TrackletStartHits()[nTracklet.x].RowIndex() / HLTCA_GPU_SCHED_ROW_STEP != RowBlock)
804 sMem.fNextTrackletCount = 0;
808 sMem.fNextTrackletFirst = nTracklet.x;
809 sMem.fNextTrackletNoDummy = 1;
812 #endif //HLTCA_GPU_SCHED_FIXED_START
816 const int nFetchTracks = CAMath::Max(CAMath::Min((*tracker.RowBlockPos(Reverse, RowBlock)).x - (*tracker.RowBlockPos(Reverse, RowBlock)).y, HLTCA_GPU_THREAD_COUNT - TRACKLET_CONSTRUCTOR_NMEMTHREDS), 0);
817 sMem.fNextTrackletCount = nFetchTracks;
818 const int nUseTrack = nFetchTracks ? CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).y, nFetchTracks) : 0;
819 sMem.fNextTrackletFirst = nUseTrack;
821 const int nFillTracks = CAMath::Min(nFetchTracks, nUseTrack + nFetchTracks - (*((volatile int2*) (tracker.RowBlockPos(Reverse, RowBlock)))).x);
824 const int nStartFillTrack = CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).x, nFillTracks);
825 if (nFillTracks + nStartFillTrack >= HLTCA_GPU_MAX_TRACKLETS)
827 tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_ROWBLOCK_TRACKLET_OVERFLOW;
829 for (int i = 0;i < nFillTracks;i++)
831 tracker.RowBlockTracklets(Reverse, RowBlock)[(nStartFillTrack + i) % HLTCA_GPU_MAX_TRACKLETS] = -3; //Dummy filling track
834 sMem.fNextTrackletNoDummy = 0;
839 if (sMem.fNextTrackletCount == 0)
841 return(-2); //No more track in this RowBlock
843 #if HLTCA_GPU_TRACKLET_CONSTRUCTOR_NMEMTHREDS > 0
844 else if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS)
848 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_NMEMTHREDS > 0
849 else if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS >= sMem.fNextTrackletCount)
851 return(-1); //No track in this RowBlock for this thread
853 else if (nextTracketlFirstRun)
855 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS) sMem.fNextTrackletFirstRun = 0;
857 return(sMem.fNextTrackletFirst + threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS);
861 const int nTrackPos = sMem.fNextTrackletFirst + threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS;
862 mustInit = (nTrackPos < tracker.RowBlockPos(Reverse, RowBlock)->w);
863 volatile int* const ptrTracklet = &tracker.RowBlockTracklets(Reverse, RowBlock)[nTrackPos % HLTCA_GPU_MAX_TRACKLETS];
866 while ((nTracklet = *ptrTracklet) == -1)
868 for (int i = 0;i < 20000;i++)
869 sMem.fNextTrackletStupidDummy++;
873 tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_SCHEDULE_COLLISION;
881 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewGPU(AliHLTTPCCATracker *pTracker)
883 //Main Tracklet construction function that calls the scheduled (FetchTracklet) and then Processes the tracklet (mainly UpdataTracklet) and at the end stores the tracklet.
884 //Can also dispatch a tracklet to be rescheduled
885 #ifdef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
886 pTracker[0].BlockStartingTracklet()[0].x = HLTCA_GPU_EMULATION_SINGLE_TRACKLET;
887 pTracker[0].BlockStartingTracklet()[0].y = 1;
888 for (int i = 1;i < HLTCA_GPU_BLOCK_COUNT;i++)
890 pTracker[0].BlockStartingTracklet()[i].x = pTracker[0].BlockStartingTracklet()[i].y = 0;
892 #endif //HLTCA_GPU_EMULATION_SINGLE_TRACKLET
894 GPUshared() AliHLTTPCCASharedMemory sMem;
896 #ifdef HLTCA_GPU_SCHED_FIXED_START
897 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
899 sMem.fNextTrackletFirstRun = 1;
902 #endif //HLTCA_GPU_SCHED_FIXED_START
904 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
905 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
910 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
912 for (int iReverse = 0;iReverse < 2;iReverse++)
914 for (int iRowBlock = 0;iRowBlock < HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1;iRowBlock++)
916 #ifdef HLTCA_GPU_SCHED_FIXED_SLICE
917 int iSlice = tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
919 for (int iSlice = 0;iSlice < pTracker[0].GPUParametersConst()->fGPUnSlices;iSlice++)
920 #endif //HLTCA_GPU_SCHED_FIXED_SLICE
922 AliHLTTPCCATracker &tracker = pTracker[iSlice];
923 if (sMem.fNextTrackletFirstRun && iSlice != tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT)
927 /*if (!sMem.fNextTrackletFirstRun && tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->x <= tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->y)
931 int sharedRowsInitialized = 0;
935 while ((iTracklet = FetchTracklet(tracker, sMem, iReverse, iRowBlock, mustInit)) != -2)
937 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
938 CAMath::AtomicMax(&sMem.fMaxSync, threadSync);
940 threadSync = CAMath::Min(sMem.fMaxSync, 100000000 / blockDim.x / gridDim.x);
941 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
942 #ifndef HLTCA_GPU_PREFETCHDATA
943 if (!sharedRowsInitialized)
945 #ifdef HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY
948 for (int i = CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP) * sizeof(AliHLTTPCCARow) / sizeof(int) + threadIdx.x;i < (HLTCA_ROW_COUNT - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP) * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
950 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
955 for (int i = CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP) * sizeof(AliHLTTPCCARow) / sizeof(int) + threadIdx.x;i < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT) * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
957 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
961 for (int i = threadIdx.x;i < HLTCA_ROW_COUNT * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
963 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
965 #endif //HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY
966 sharedRowsInitialized = 1;
968 #endif //!HLTCA_GPU_PREFETCHDATA
969 #ifdef HLTCA_GPU_RESCHED
970 short2 storeToRowBlock;
971 int storePosition = 0;
972 if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
974 const int nReverse = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
975 const int nRowBlock = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
976 sMem.fTrackletStoreCount[nReverse][nRowBlock] = 0;
978 #endif //HLTCA_GPU_RESCHED
980 AliHLTTPCCATrackParam tParam;
981 AliHLTTPCCAThreadMemory rMem;
983 rMem.fCurrentData = 0;
985 #ifdef HLTCA_GPU_EMULATION_DEBUG_TRACKLET
986 if (iTracklet == HLTCA_GPU_EMULATION_DEBUG_TRACKLET)
988 tracker.GPUParameters()->fGPUError = 1;
990 #endif //HLTCA_GPU_EMULATION_DEBUG_TRACKLET
991 AliHLTTPCCAThreadMemory &rMemGlobal = tracker.GPUTrackletTemp()[iTracklet].fThreadMem;
992 AliHLTTPCCATrackParam &tParamGlobal = tracker.GPUTrackletTemp()[iTracklet].fParam;
995 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
997 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
998 rMem.fCurrIH = id.HitIndex();
1003 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
1005 else if (iTracklet >= 0)
1007 CopyTrackletTempData( rMemGlobal, rMem, tParamGlobal, tParam );
1009 #ifdef HLTCA_GPU_PREFETCHDATA
1010 else if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS)
1012 ReadData(threadIdx.x, sMem, rMem, tracker, iReverse ? (HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP) : (CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP)));
1014 #endif //HLTCA_GPU_PREFETCHDATA
1015 rMem.fItr = iTracklet;
1016 rMem.fGo = (iTracklet >= 0);
1018 #ifdef HLTCA_GPU_RESCHED
1019 storeToRowBlock.x = iRowBlock + 1;
1020 storeToRowBlock.y = iReverse;
1021 #ifdef HLTCA_GPU_PREFETCHDATA
1022 rMem.fCurrentData ^= 1;
1024 #endif //HLTCA_GPU_PREFETCHDATA
1027 for (int j = HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP;j >= CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP);j--)
1029 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1030 if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && !(j >= rMem.fEndRow || ( j >= rMem.fStartRow && j - rMem.fStartRow % 2 == 0)))
1031 pTracker[0].fStageAtSync[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
1032 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1033 #ifdef HLTCA_GPU_PREFETCHDATA
1034 if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS && j > CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP))
1036 ReadData(threadIdx.x, sMem, rMem, tracker, j - 1);
1039 #endif //HLTCA_GPU_PREFETCHDATA
1042 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1043 if (rMem.fNMissed > kMaxRowGap)
1046 #ifndef HLTCA_GPU_PREFETCHDATA
1048 #endif //!HLTCA_GPU_PREFETCHDATA
1051 #ifdef HLTCA_GPU_PREFETCHDATA
1053 rMem.fCurrentData ^= 1;
1054 #endif //HLTCA_GPU_PREFETCHDATA
1057 if (iTracklet >= 0 && (!rMem.fGo || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
1059 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1064 for (int j = CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP);j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT);j++)
1066 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1067 if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && j >= rMem.fStartRow && (rMem.fStage > 0 || rMem.fCurrIH >= 0 || (j - rMem.fStartRow) % 2 == 0 ))
1068 pTracker[0].fStageAtSync[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
1069 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1070 #ifdef HLTCA_GPU_PREFETCHDATA
1071 if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS && j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT) - 1)
1073 ReadData(threadIdx.x, sMem, rMem, tracker, j + 1);
1076 #endif //HLTCA_GPU_PREFETCHDATA
1079 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1080 #ifndef HLTCA_GPU_PREFETCHDATA
1081 //if (rMem.fNMissed > kMaxRowGap || rMem.fGo == 0) break; //DR!!! CUDA Crashes with this enabled
1082 #endif //!HLTCA_GPU_PREFETCHDATA
1084 #ifdef HLTCA_GPU_PREFETCHDATA
1086 rMem.fCurrentData ^= 1;
1087 #endif //HLTCA_GPU_PREFETCHDATA
1089 if (rMem.fGo && (rMem.fNMissed > kMaxRowGap || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
1091 #if defined(HLTCA_GPU_PREFETCHDATA) | !defined(HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY)
1092 if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) )
1094 if ( !tParam.TransportToX( sMem.fRows[ rMem.fEndRow ].X(), tracker.Param().ConstBz(), .999 ) )
1095 #endif //HLTCA_GPU_PREFETCHDATA | !HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY
1101 storeToRowBlock.x = (HLTCA_ROW_COUNT - rMem.fEndRow) / HLTCA_GPU_SCHED_ROW_STEP;
1102 storeToRowBlock.y = 1;
1108 if (iTracklet >= 0 && !rMem.fGo)
1110 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1114 if (rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
1116 CopyTrackletTempData( rMem, rMemGlobal, tParam, tParamGlobal );
1117 storePosition = CAMath::AtomicAdd(&sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x], 1);
1122 for (int j = rMem.fStartRow;j < HLTCA_ROW_COUNT;j++)
1124 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1125 if (!rMem.fGo) break;
1132 if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) ) rMem.fGo = 0;
1135 for (int j = rMem.fEndRow;j >= 0;j--)
1137 if (!rMem.fGo) break;
1138 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1141 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1143 #endif //HLTCA_GPU_RESCHED
1144 #ifdef HLTCA_GPU_RESCHED
1146 if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
1148 const int nReverse = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
1149 const int nRowBlock = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
1150 if (sMem.fTrackletStoreCount[nReverse][nRowBlock])
1152 sMem.fTrackletStoreCount[nReverse][nRowBlock] = CAMath::AtomicAdd(&tracker.RowBlockPos(nReverse, nRowBlock)->x, sMem.fTrackletStoreCount[nReverse][nRowBlock]);
1156 if (iTracklet >= 0 && rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
1158 tracker.RowBlockTracklets(storeToRowBlock.y, storeToRowBlock.x)[sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x] + storePosition] = iTracklet;
1161 #endif //HLTCA_GPU_RESCHED
1168 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(int iTracklet, AliHLTTPCCATracker &tracker)
1170 //Initialize Row Blocks
1172 #ifndef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
1173 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1174 #ifdef HLTCA_GPU_SCHED_FIXED_START
1175 const int firstDynamicTracklet = tracker.GPUParameters()->fScheduleFirstDynamicTracklet;
1176 if (iTracklet >= firstDynamicTracklet)
1177 #endif //HLTCA_GPU_SCHED_FIXED_START
1179 const int firstTrackletInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[CAMath::Max(id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP * HLTCA_GPU_SCHED_ROW_STEP, 1)].z);
1180 if (iTracklet == firstTrackletInRowBlock)
1182 const int firstRowInNextBlock = (id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP + 1) * HLTCA_GPU_SCHED_ROW_STEP;
1183 int trackletsInRowBlock;
1184 if (firstRowInNextBlock >= HLTCA_ROW_COUNT - 3)
1185 trackletsInRowBlock = *tracker.NTracklets() - firstTrackletInRowBlock;
1187 trackletsInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[firstRowInNextBlock].z) - firstTrackletInRowBlock;
1189 tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->x = trackletsInRowBlock;
1190 tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->w = trackletsInRowBlock;
1192 tracker.RowBlockTracklets(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)[iTracklet - firstTrackletInRowBlock] = iTracklet;
1194 #endif //!HLTCA_GPU_EMULATION_SINGLE_TRACKLET
1197 GPUg() void AliHLTTPCCATrackletConstructorInit(int iSlice)
1199 //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit
1200 AliHLTTPCCATracker &tracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker )[iSlice];
1201 int i = blockIdx.x * blockDim.x + threadIdx.x;
1202 if (i >= *tracker.NTracklets()) return;
1203 AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(i, tracker);
1206 GPUg() void AliHLTTPCCATrackletConstructorNewGPU()
1208 //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewGPU
1209 AliHLTTPCCATracker *pTracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker );
1210 AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewGPU(pTracker);
1213 #else //HLTCA_GPUCODE
1215 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewCPU(AliHLTTPCCATracker &tracker)
1217 //Tracklet constructor simple CPU Function that does not neew a scheduler
1218 GPUshared() AliHLTTPCCASharedMemory sMem;
1219 sMem.fNTracklets = *tracker.NTracklets();
1220 for (int iTracklet = 0;iTracklet < *tracker.NTracklets();iTracklet++)
1222 AliHLTTPCCATrackParam tParam;
1223 AliHLTTPCCAThreadMemory rMem;
1225 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1227 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
1228 rMem.fCurrIH = id.HitIndex();
1233 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
1235 rMem.fItr = iTracklet;
1238 for (int j = rMem.fStartRow;j < tracker.Param().NRows();j++)
1240 UpdateTracklet(1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
1241 if (!rMem.fGo) break;
1248 if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) ) rMem.fGo = 0;
1251 for (int j = rMem.fEndRow;j >= 0;j--)
1253 if (!rMem.fGo) break;
1254 UpdateTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
1257 StoreTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam );
1260 #endif //HLTCA_GPUCODE