1 // @(#) $Id: AliHLTTPCCATrackletConstructor.cxx 27042 2008-07-02 12:06:02Z richterm $
2 // **************************************************************************
3 // This file is property of and copyright by the ALICE HLT Project *
4 // ALICE Experiment at CERN, All rights reserved. *
6 // Primary Authors: Sergey Gorbunov <sergey.gorbunov@kip.uni-heidelberg.de> *
7 // Ivan Kisel <kisel@kip.uni-heidelberg.de> *
8 // for The ALICE HLT Project. *
10 // Permission to use, copy, modify and distribute this software and its *
11 // documentation strictly for non-commercial purposes is hereby granted *
12 // without fee, provided that the above copyright notice appears in all *
13 // copies and that both the copyright notice and this permission notice *
14 // appear in the supporting documentation. The authors make no claims *
15 // about the suitability of this software for any purpose. It is *
16 // provided "as is" without express or implied warranty. *
18 //***************************************************************************
20 #include "AliHLTTPCCATracker.h"
21 #include "AliHLTTPCCATrackParam.h"
22 #include "AliHLTTPCCATrackParam.h"
23 #include "AliHLTTPCCAGrid.h"
24 #include "AliHLTTPCCAMath.h"
25 #include "AliHLTTPCCADef.h"
26 #include "AliHLTTPCCATracklet.h"
27 #include "AliHLTTPCCATrackletConstructor.h"
28 #include "MemoryAssignmentHelpers.h"
30 //#include "AliHLTTPCCAPerformance.h"
36 #include "AliHLTTPCCADisplay.h"
41 GPUdi() void AliHLTTPCCATrackletConstructor::InitTracklet( AliHLTTPCCATrackParam &tParam )
43 //Initialize Tracklet Parameters using default values
47 GPUdi() void AliHLTTPCCATrackletConstructor::ReadData
48 #ifndef HLTCA_GPU_PREFETCHDATA
49 ( int /*iThread*/, AliHLTTPCCASharedMemory& /*s*/, AliHLTTPCCAThreadMemory& /*r*/, AliHLTTPCCATracker& /*tracker*/, int /*iRow*/ )
51 //Prefetch Data to shared memory
53 ( int iThread, AliHLTTPCCASharedMemory& s, AliHLTTPCCAThreadMemory& r, AliHLTTPCCATracker& tracker, int iRow )
55 // reconstruction of tracklets, read data step
56 const AliHLTTPCCARow &row = tracker.Row( iRow );
57 //bool jr = !r.fCurrentData;
59 // copy hits, grid content and links
61 // FIXME: inefficient copy
62 //const int numberOfHitsAligned = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits());
65 #ifdef HLTCA_GPU_REORDERHITDATA
66 ushort2 *sMem1 = reinterpret_cast<ushort2 *>( s.fData[jr] );
67 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
68 sMem1[i].x = tracker.HitDataY( row, i );
69 sMem1[i].y = tracker.HitDataZ( row, i );
72 ushort_v *sMem1 = reinterpret_cast<ushort_v *>( s.fData[jr] );
73 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
74 sMem1[i] = tracker.HitDataY( row, i );
77 ushort_v *sMem1a = reinterpret_cast<ushort_v *>( s.fData[jr] ) + numberOfHitsAligned;
78 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
79 sMem1a[i] = tracker.HitDataZ( row, i );
81 #endif //HLTCA_GPU_REORDERHITDATA
83 short *sMem2 = reinterpret_cast<short *>( s.fData[jr] ) + 2 * numberOfHitsAligned;
84 for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
85 sMem2[i] = tracker.HitLinkUpData( row, i );
88 unsigned short *sMem3 = reinterpret_cast<unsigned short *>( s.fData[jr] ) + 3 * numberOfHitsAligned;
89 const int n = row.FullSize(); // + grid content size
90 for ( int i = iThread; i < n; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
91 sMem3[i] = tracker.FirstHitInBin( row, i );
94 /*for (int k = 0;k < 2;k++)
96 HLTCA_GPU_ROWCOPY* sharedMem;
97 const HLTCA_GPU_ROWCOPY* sourceMem;
102 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitDataY(row) );
103 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
104 copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
107 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitDataZ(row) );
108 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
109 copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
112 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitLinkUpData(row) );
113 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
114 copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
117 sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.FirstHitInBin(row) );
118 sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
119 copyCount = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.FullSize()) * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
122 for (int i = iThread;i < copyCount;i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
124 sharedMem[i] = sourceMem[i];
128 for (unsigned int i = iThread;i < row.FullSize() * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
130 reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[!r.fCurrentData] ))[i] = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.FirstHitInBin(row) )[i];
133 const HLTCA_GPU_ROWCOPY* const sourceMem = (const HLTCA_GPU_ROWCOPY *) &row;
134 HLTCA_GPU_ROWCOPY* const sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> ( &s.fRow[!r.fCurrentData] );
135 for (unsigned int i = iThread;i < sizeof(AliHLTTPCCARow) / sizeof(HLTCA_GPU_ROWCOPY);i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
137 sharedMem[i] = sourceMem[i];
139 #endif //!HLTCA_GPU_PREFETCHDATA
143 GPUdi() void AliHLTTPCCATrackletConstructor::StoreTracklet
144 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
145 AliHLTTPCCASharedMemory
146 #if defined(HLTCA_GPUCODE) | defined(EXTERN_ROW_HITS)
150 #endif //!HLTCA_GPUCODE
151 , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam )
153 // reconstruction of tracklets, tracklet store step
155 //AliHLTTPCCAPerformance::Instance().HNHitsPerTrackCand()->Fill(r.fNHits);
159 //std::cout<<"tracklet to store: "<<r.fItr<<", nhits = "<<r.fNHits<<std::endl;
162 if ( r.fNHits < TRACKLET_SELECTOR_MIN_HITS ) {
168 if ( 1. / .5 < CAMath::Abs( tParam.QPt() ) ) { //SG!!!
176 const float *c = tParam.Cov();
177 for ( int i = 0; i < 15; i++ ) ok = ok && CAMath::Finite( c[i] );
178 for ( int i = 0; i < 5; i++ ) ok = ok && CAMath::Finite( tParam.Par()[i] );
179 ok = ok && ( tParam.X() > 50 );
181 if ( c[0] <= 0 || c[2] <= 0 || c[5] <= 0 || c[9] <= 0 || c[14] <= 0 ) ok = 0;
190 if ( !SAVE() ) return;
192 AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
194 tracklet.SetNHits( r.fNHits );
196 if ( r.fNHits > 0 ) {
199 std::cout << "store tracklet " << r.fItr << ", nhits = " << r.fNHits << std::endl;
200 if ( AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 1. ) ) {
201 AliHLTTPCCADisplay::Instance().Ask();
205 if ( CAMath::Abs( tParam.Par()[4] ) < 1.e-4 ) tParam.SetPar( 4, 1.e-4 );
206 if (r.fStartRow < r.fFirstRow) r.fFirstRow = r.fStartRow;
207 tracklet.SetFirstRow( r.fFirstRow );
208 tracklet.SetLastRow( r.fLastRow );
210 tracklet.SetParam( tParam.fParam );
212 tracklet.SetParam( tParam.GetParam() );
213 #endif //HLTCA_GPUCODE
214 int w = ( r.fNHits << 16 ) + r.fItr;
215 for ( int iRow = r.fFirstRow; iRow <= r.fLastRow; iRow++ ) {
216 #ifdef EXTERN_ROW_HITS
217 int ih = tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr];
219 int ih = tracklet.RowHit( iRow );
220 #endif //EXTERN_ROW_HITS
222 #if defined(HLTCA_GPUCODE) & !defined(HLTCA_GPU_PREFETCHDATA)
223 tracker.MaximizeHitWeight( s.fRows[ iRow ], ih, w );
225 tracker.MaximizeHitWeight( tracker.Row( iRow ), ih, w );
226 #endif //HLTCA_GPUCODE & !HLTCA_GPU_PREFETCHDATA
233 GPUdi() void AliHLTTPCCATrackletConstructor::UpdateTracklet
234 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
235 AliHLTTPCCASharedMemory
236 #if defined(HLTCA_GPUCODE) | defined(EXTERN_ROW_HITS)
240 #endif //HLTCA_GPUCODE
241 , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam, int iRow )
243 // reconstruction of tracklets, tracklets update step
245 //std::cout<<"Update tracklet: "<<r.fItr<<" "<<r.fGo<<" "<<r.fStage<<" "<<iRow<<std::endl;
246 bool drawSearch = 0;//r.fItr==2;
247 bool drawFit = 0;//r.fItr==2;
248 bool drawFitted = drawFit ;//|| 1;//r.fItr==16;
250 if ( !r.fGo ) return;
252 #ifndef EXTERN_ROW_HITS
253 AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
254 #endif //EXTERN_ROW_HITS
256 #ifdef HLTCA_GPU_PREFETCHDATA
257 const AliHLTTPCCARow &row = s.fRow[r.fCurrentData];
258 #elif defined(HLTCA_GPUCODE)
259 const AliHLTTPCCARow &row = s.fRows[iRow];
261 const AliHLTTPCCARow &row = tracker.Row( iRow );
262 #endif //HLTCA_GPU_PREFETCHDATA
264 float y0 = row.Grid().YMin();
265 float stepY = row.HstepY();
266 float z0 = row.Grid().ZMin();
267 float stepZ = row.HstepZ();
268 float stepYi = row.HstepYi();
269 float stepZi = row.HstepZi();
271 if ( r.fStage == 0 ) { // fitting part
274 if ( iRow < r.fStartRow || r.fCurrIH < 0 ) break;
275 if ( ( iRow - r.fStartRow ) % 2 != 0 )
277 #ifndef EXTERN_ROW_HITS
278 tracklet.SetRowHit(iRow, -1);
280 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
281 #endif //EXTERN_ROW_HITS
282 break; // SG!!! - jump over the row
286 //#ifdef HLTCA_GPU_PREFETCHDATA
287 // uint4 *tmpint4 = s.fData[r.fCurrentData];
290 //#ifdef HLTCA_GPU_REORDERHITDATA
291 // hh = reinterpret_cast<ushort2*>( tmpint4 )[r.fCurrIH];
293 //#ifdef HLTCA_GPU_PREFETCHDATA
294 // hh.x = reinterpret_cast<ushort_v*>( tmpint4 )[r.fCurrIH];
295 // hh.y = reinterpret_cast<ushort_v*>( tmpint4 )[NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits()) + r.fCurrIH];
297 #if defined(HLTCA_GPU_TEXTURE_FETCH)
298 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + r.fCurrIH);
300 hh = tracker.HitData(row)[r.fCurrIH];
301 #endif //HLTCA_GPU_TEXTURE_FETCH
305 int oldIH = r.fCurrIH;
306 //#ifdef HLTCA_GPU_PREFETCHDATA
307 // r.fCurrIH = reinterpret_cast<short*>( tmpint4 )[2 * NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits()) + r.fCurrIH]; // read from linkup data
309 #if defined(HLTCA_GPU_TEXTURE_FETCH)
310 r.fCurrIH = tex1Dfetch(gAliTexRefs, ((char*) tracker.Data().HitLinkUpData(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + r.fCurrIH);
312 r.fCurrIH = tracker.HitLinkUpData(row)[r.fCurrIH]; // read from linkup data
313 #endif //HLTCA_GPU_TEXTURE_FETCH
317 float y = y0 + hh.x * stepY;
318 float z = z0 + hh.y * stepZ;
320 if ( drawFit ) std::cout << " fit tracklet: new hit " << oldIH << ", xyz=" << x << " " << y << " " << z << std::endl;
323 if ( iRow == r.fStartRow ) {
330 if ( drawFit ) std::cout << " fit tracklet " << r.fItr << ", row " << iRow << " first row" << std::endl;
335 float dx = x - tParam.X();
336 float dy = y - r.fLastY;//tParam.Y();
337 float dz = z - r.fLastZ;//tParam.Z();
341 float ri = 1. / CAMath::Sqrt( dx * dx + dy * dy );
342 if ( iRow == r.fStartRow + 2 ) { //SG!!! important - thanks to Matthias
343 tParam.SetSinPhi( dy*ri );
344 tParam.SetSignCosPhi( dx );
345 tParam.SetDzDs( dz*ri );
346 //std::cout << "Init. errors... " << r.fItr << std::endl;
347 tracker.GetErrors2( iRow, tParam, err2Y, err2Z );
348 //std::cout << "Init. errors = " << err2Y << " " << err2Z << std::endl;
349 tParam.SetCov( 0, err2Y );
350 tParam.SetCov( 2, err2Z );
354 std::cout << " fit tracklet " << r.fItr << ", row " << iRow << " transporting.." << std::endl;
355 std::cout << " params before transport=" << std::endl;
359 float sinPhi, cosPhi;
360 if ( r.fNHits >= 10 && CAMath::Abs( tParam.SinPhi() ) < .99 ) {
361 sinPhi = tParam.SinPhi();
362 cosPhi = CAMath::Sqrt( 1 - sinPhi * sinPhi );
368 if ( drawFit ) std::cout << "sinPhi0 = " << sinPhi << ", cosPhi0 = " << cosPhi << std::endl;
370 if ( !tParam.TransportToX( x, sinPhi, cosPhi, tracker.Param().ConstBz(), -1 ) ) {
372 if ( drawFit ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not transport!!" << std::endl;
374 #ifndef EXTERN_ROW_HITS
375 tracklet.SetRowHit( iRow, -1 );
377 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
378 #endif //EXTERN_ROW_HITS
381 //std::cout<<"mark1 "<<r.fItr<<std::endl;
383 tracker.GetErrors2( iRow, tParam.GetZ(), sinPhi, cosPhi, tParam.GetDzDs(), err2Y, err2Z );
384 //std::cout<<"mark2"<<std::endl;
388 std::cout << " params after transport=" << std::endl;
390 std::cout << "fit tracklet before filter: " << r.fItr << ", row " << iRow << " errs=" << err2Y << " " << err2Z << std::endl;
391 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 2., 1 );
392 AliHLTTPCCADisplay::Instance().Ask();
395 if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
397 if ( drawFit ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not filter!!" << std::endl;
399 #ifndef EXTERN_ROW_HITS
400 tracklet.SetRowHit( iRow, -1 );
402 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
403 #endif //EXTERN_ROW_HITS
407 #ifndef EXTERN_ROW_HITS
408 tracklet.SetRowHit( iRow, oldIH );
410 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = oldIH;
411 #endif //!EXTERN_ROW_HITS
414 std::cout << "fit tracklet after filter " << r.fItr << ", row " << iRow << std::endl;
416 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kGreen, 2. );
417 AliHLTTPCCADisplay::Instance().Ask();
426 if ( r.fCurrIH < 0 ) {
428 if ( drawFitted ) std::cout << "fitted tracklet " << r.fItr << ", nhits=" << r.fNHits << std::endl;
431 //AliHLTTPCCAPerformance::Instance().HNHitsPerSeed()->Fill(r.fNHits);
432 if ( CAMath::Abs( tParam.SinPhi() ) > .999 ) {
434 if ( drawFitted ) std::cout << " fitted tracklet error: sinPhi=" << tParam.SinPhi() << std::endl;
436 r.fNHits = 0; r.fGo = 0;
438 //tParam.SetCosPhi( CAMath::Sqrt(1-tParam.SinPhi()*tParam.SinPhi()) );
442 std::cout << "fitted tracklet " << r.fItr << " miss=" << r.fNMissed << " go=" << r.fGo << std::endl;
444 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue );
445 AliHLTTPCCADisplay::Instance().Ask();
449 } else { // forward/backward searching part
453 std::cout << "search tracklet " << r.fItr << " row " << iRow << " miss=" << r.fNMissed << " go=" << r.fGo << " stage=" << r.fStage << std::endl;
457 if ( r.fStage == 2 && ( ( iRow >= r.fEndRow ) ||
458 ( iRow >= r.fStartRow && ( iRow - r.fStartRow ) % 2 == 0 )
460 if ( r.fNMissed > kMaxRowGap ) {
470 std::cout << "tracklet " << r.fItr << " before transport to row " << iRow << " : " << std::endl;
474 if ( !tParam.TransportToX( x, tParam.SinPhi(), tParam.GetCosPhi(), tracker.Param().ConstBz(), .99 ) ) {
476 if ( drawSearch ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not transport!!" << std::endl;
478 #ifndef EXTERN_ROW_HITS
479 tracklet.SetRowHit(iRow, -1);
481 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
482 #endif //!EXTERN_ROW_HITS
485 if ( row.NHits() < 1 ) {
487 #ifndef EXTERN_ROW_HITS
488 tracklet.SetRowHit(iRow, -1);
490 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
491 #endif //!EXTERN_ROW_HITS
496 std::cout << "tracklet " << r.fItr << " after transport to row " << iRow << " : " << std::endl;
498 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 2., 1 );
499 AliHLTTPCCADisplay::Instance().Ask();
502 #ifdef HLTCA_GPU_PREFETCHDATA
503 uint4 *tmpint4 = s.fData[r.fCurrentData];
504 #endif //HLTCA_GPU_PREFETCHDATA
506 //#ifdef HLTCA_GPU_REORDERHITDATA
507 // const ushort2 *hits = reinterpret_cast<ushort2*>( tmpint4 );
509 //#ifdef HLTCA_GPU_PREFETCHDATA
510 // const ushort_v *hitsx = reinterpret_cast<ushort_v*>( tmpint4 );
511 // const ushort_v *hitsy = reinterpret_cast<ushort_v*>( tmpint4 ) + NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits());
513 #ifndef HLTCA_GPU_TEXTURE_FETCH
514 const ushort2 *hits = tracker.HitData(row);
515 #endif //!HLTCA_GPU_TEXTURE_FETCH
519 float fY = tParam.GetY();
520 float fZ = tParam.GetZ();
523 { // search for the closest hit
524 const int fIndYmin = row.Grid().GetBinBounded( fY - 1.f, fZ - 1.f );
525 assert( fIndYmin >= 0 );
528 int fY0 = ( int ) ( ( fY - y0 ) * stepYi );
529 int fZ0 = ( int ) ( ( fZ - z0 ) * stepZi );
530 int ds0 = ( ( ( int )1 ) << 30 );
533 unsigned int fHitYfst = 1, fHitYlst = 0, fHitYfst1 = 1, fHitYlst1 = 0;
537 std::cout << " tracklet " << r.fItr << ", row " << iRow << ": grid N=" << row.Grid().N() << std::endl;
538 std::cout << " tracklet " << r.fItr << ", row " << iRow << ": minbin=" << fIndYmin << std::endl;
542 int nY = row.Grid().Ny();
544 //#ifdef HLTCA_GPU_PREFETCHDATA
545 // const unsigned short *sGridP = ( reinterpret_cast<unsigned short*>( tmpint4 ) );
547 #ifndef HLTCA_GPU_TEXTURE_FETCH
548 const unsigned short *sGridP = tracker.FirstHitInBin(row);
549 #endif //!HLTCA_GPU_TEXTURE_FETCH
552 #ifdef HLTCA_GPU_TEXTURE_FETCH
553 fHitYfst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin);
554 fHitYlst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+2);
555 fHitYfst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY);
556 fHitYlst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY+2);
558 fHitYfst = sGridP[fIndYmin];
559 fHitYlst = sGridP[fIndYmin+2];
560 fHitYfst1 = sGridP[fIndYmin+nY];
561 fHitYlst1 = sGridP[fIndYmin+nY+2];
562 #endif //HLTCA_GPU_TEXTURE_FETCH
563 assert( (signed) fHitYfst <= row.NHits() );
564 assert( (signed) fHitYlst <= row.NHits() );
565 assert( (signed) fHitYfst1 <= row.NHits() );
566 assert( (signed) fHitYlst1 <= row.NHits() );
569 std::cout << " Grid, row " << iRow << ": nHits=" << row.NHits() << ", grid n=" << row.Grid().N() << ", c[n]=" << sGridP[row.Grid().N()] << std::endl;
570 std::cout << "hit steps = " << stepY << " " << stepZ << std::endl;
571 std::cout << " Grid bins:" << std::endl;
572 for ( unsigned int i = 0; i < row.Grid().N(); i++ ) {
573 std::cout << " bin " << i << ": ";
574 for ( int j = sGridP[i]; j < sGridP[i+1]; j++ ) {
575 ushort2 hh = hits[j];
576 float y = y0 + hh.x * stepY;
577 float z = z0 + hh.y * stepZ;
578 std::cout << "[" << j << "|" << y << "," << z << "] ";
580 std::cout << std::endl;
585 if ( sGridP[row.Grid().N()] != row.NHits() ) {
586 std::cout << " grid, row " << iRow << ": nHits=" << row.NHits() << ", grid n=" << row.Grid().N() << ", c[n]=" << sGridP[row.Grid().N()] << std::endl;
593 std::cout << " tracklet " << r.fItr << ", row " << iRow << ", yz= " << fY << "," << fZ << ": search hits=" << fHitYfst << " " << fHitYlst << " / " << fHitYfst1 << " " << fHitYlst1 << std::endl;
594 std::cout << " hit search :" << std::endl;
597 for ( unsigned int fIh = fHitYfst; fIh < fHitYlst; fIh++ ) {
598 assert( (signed) fIh < row.NHits() );
600 #if defined(HLTCA_GPU_TEXTURE_FETCH)
601 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
604 #endif //HLTCA_GPU_TEXTURE_FETCH
605 int ddy = ( int )( hh.x ) - fY0;
606 int ddz = ( int )( hh.y ) - fZ0;
607 int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
610 std::cout << fIh << ": hityz= " << hh.x << " " << hh.y << "(" << hh.x*stepY << " " << hh.y*stepZ << "), trackyz=" << fY0 << " " << fZ0 << "(" << fY0*stepY << " " << fZ0*stepZ << "), dy,dz,ds= " << ddy << " " << ddz << " " << dds << "(" << ddy*stepY << " " << ddz*stepZ << std::endl;
619 for ( unsigned int fIh = fHitYfst1; fIh < fHitYlst1; fIh++ ) {
621 #if defined(HLTCA_GPU_TEXTURE_FETCH)
622 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
625 #endif //HLTCA_GPU_TEXTURE_FETCH
626 int ddy = ( int )( hh.x ) - fY0;
627 int ddz = ( int )( hh.y ) - fZ0;
628 int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
631 std::cout << fIh << ": hityz= " << hh.x << " " << hh.y << "(" << hh.x*stepY << " " << hh.y*stepZ << "), trackyz=" << fY0 << " " << fZ0 << "(" << fY0*stepY << " " << fZ0*stepZ << "), dy,dz,ds= " << ddy << " " << ddz << " " << dds << "(" << ddy*stepY << " " << ddz*stepZ << std::endl;
639 }// end of search for the closest hit
643 #ifndef EXTERN_ROW_HITS
644 tracklet.SetRowHit(iRow, -1);
646 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
647 #endif //!EXTERN_ROW_HITS
652 std::cout << "hit search " << r.fItr << ", row " << iRow << " hit " << best << " found" << std::endl;
653 AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best, kRed, 1. );
654 AliHLTTPCCADisplay::Instance().Ask();
655 AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best, kWhite, 1 );
656 AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best );
661 #if defined(HLTCA_GPU_TEXTURE_FETCH)
662 hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + best);
665 #endif //HLTCA_GPU_TEXTURE_FETCH
667 //std::cout<<"mark 3, "<<r.fItr<<std::endl;
669 tracker.GetErrors2( iRow, *( ( AliHLTTPCCATrackParam* )&tParam ), err2Y, err2Z );
670 //std::cout<<"mark 4"<<std::endl;
672 float y = y0 + hh.x * stepY;
673 float z = z0 + hh.y * stepZ;
677 const float kFactor = tracker.Param().HitPickUpFactor() * tracker.Param().HitPickUpFactor() * 3.5 * 3.5;
678 float sy2 = kFactor * ( tParam.GetErr2Y() + err2Y );
679 float sz2 = kFactor * ( tParam.GetErr2Z() + err2Z );
680 if ( sy2 > 2. ) sy2 = 2.;
681 if ( sz2 > 2. ) sz2 = 2.;
685 std::cout << "dy,sy= " << dy << " " << CAMath::Sqrt( sy2 ) << ", dz,sz= " << dz << " " << CAMath::Sqrt( sz2 ) << std::endl;
686 std::cout << "dy,dz= " << dy << " " << dz << ", sy,sz= " << CAMath::Sqrt( sy2 ) << " " << CAMath::Sqrt( sz2 ) << ", sy,sz= " << CAMath::Sqrt( kFactor*( tParam.GetErr2Y() + err2Y ) ) << " " << CAMath::Sqrt( kFactor*( tParam.GetErr2Z() + err2Z ) ) << std::endl;
689 if ( CAMath::FMulRZ( dy, dy ) > sy2 || CAMath::FMulRZ( dz, dz ) > sz2 ) {
692 std::cout << "found hit is out of the chi2 window\n " << std::endl;
695 #ifndef EXTERN_ROW_HITS
696 tracklet.SetRowHit(iRow, -1);
698 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
699 #endif //!EXTERN_ROW_HITS
703 //if( SAVE() ) hitstore[ iRow ] = best;
704 //std::cout<<"hit search before filter: "<<r.fItr<<", row "<<iRow<<std::endl;
705 //AliHLTTPCCADisplay::Instance().DrawTracklet(tParam, hitstore, kBlue);
706 //AliHLTTPCCADisplay::Instance().Ask();
708 if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
711 std::cout << "tracklet " << r.fItr << " at row " << iRow << " : can not filter!!!! " << std::endl;
716 #ifndef EXTERN_ROW_HITS
717 tracklet.SetRowHit( iRow, best );
719 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = best;
720 #endif //!EXTERN_ROW_HITS
723 std::cout << "tracklet " << r.fItr << " after filter at row " << iRow << " : " << std::endl;
725 AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kRed );
726 AliHLTTPCCADisplay::Instance().Ask();
731 if ( r.fStage == 1 ) r.fLastRow = iRow;
732 else r.fFirstRow = iRow;
738 GPUdi() void AliHLTTPCCATrackletConstructor::CopyTrackletTempData( AliHLTTPCCAThreadMemory &rMemSrc, AliHLTTPCCAThreadMemory &rMemDst, AliHLTTPCCATrackParam &tParamSrc, AliHLTTPCCATrackParam &tParamDst)
740 //Copy Temporary Tracklet data from registers to global mem and vice versa
741 rMemDst.fStartRow = rMemSrc.fStartRow;
742 rMemDst.fEndRow = rMemSrc.fEndRow;
743 rMemDst.fFirstRow = rMemSrc.fFirstRow;
744 rMemDst.fLastRow = rMemSrc.fLastRow;
745 rMemDst.fCurrIH = rMemSrc.fCurrIH;
746 rMemDst.fGo = rMemSrc.fGo;
747 rMemDst.fStage = rMemSrc.fStage;
748 rMemDst.fNHits = rMemSrc.fNHits;
749 rMemDst.fNMissed = rMemSrc.fNMissed;
750 rMemDst.fLastY = rMemSrc.fLastY;
751 rMemDst.fLastZ = rMemSrc.fLastZ;
753 tParamDst.SetSinPhi( tParamSrc.GetSinPhi() );
754 tParamDst.SetDzDs( tParamSrc.GetDzDs() );
755 tParamDst.SetQPt( tParamSrc.GetQPt() );
756 tParamDst.SetSignCosPhi( tParamSrc.GetSignCosPhi() );
757 tParamDst.SetChi2( tParamSrc.GetChi2() );
758 tParamDst.SetNDF( tParamSrc.GetNDF() );
759 tParamDst.SetCov( 0, tParamSrc.GetCov(0) );
760 tParamDst.SetCov( 1, tParamSrc.GetCov(1) );
761 tParamDst.SetCov( 2, tParamSrc.GetCov(2) );
762 tParamDst.SetCov( 3, tParamSrc.GetCov(3) );
763 tParamDst.SetCov( 4, tParamSrc.GetCov(4) );
764 tParamDst.SetCov( 5, tParamSrc.GetCov(5) );
765 tParamDst.SetCov( 6, tParamSrc.GetCov(6) );
766 tParamDst.SetCov( 7, tParamSrc.GetCov(7) );
767 tParamDst.SetCov( 8, tParamSrc.GetCov(8) );
768 tParamDst.SetCov( 9, tParamSrc.GetCov(9) );
769 tParamDst.SetCov( 10, tParamSrc.GetCov(10) );
770 tParamDst.SetCov( 11, tParamSrc.GetCov(11) );
771 tParamDst.SetCov( 12, tParamSrc.GetCov(12) );
772 tParamDst.SetCov( 13, tParamSrc.GetCov(13) );
773 tParamDst.SetCov( 14, tParamSrc.GetCov(14) );
774 tParamDst.SetX( tParamSrc.GetX() );
775 tParamDst.SetY( tParamSrc.GetY() );
776 tParamDst.SetZ( tParamSrc.GetZ() );
779 GPUdi() int AliHLTTPCCATrackletConstructor::FetchTracklet(AliHLTTPCCATracker &tracker, AliHLTTPCCASharedMemory &sMem, int Reverse, int RowBlock, int &mustInit)
781 //Fetch a new trackled to be processed by this thread
783 int nextTracketlFirstRun = sMem.fNextTrackletFirstRun;
784 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
786 sMem.fNTracklets = *tracker.NTracklets();
787 if (sMem.fNextTrackletFirstRun)
789 #ifdef HLTCA_GPU_SCHED_FIXED_START
790 const int iSlice = tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
791 const int nSliceBlockOffset = HLTCA_GPU_BLOCK_COUNT * iSlice / tracker.GPUParametersConst()->fGPUnSlices;
792 const uint2 &nTracklet = tracker.BlockStartingTracklet()[blockIdx.x - nSliceBlockOffset];
794 sMem.fNextTrackletCount = nTracklet.y;
795 if (sMem.fNextTrackletCount == 0)
797 sMem.fNextTrackletFirstRun = 0;
801 if (tracker.TrackletStartHits()[nTracklet.x].RowIndex() / HLTCA_GPU_SCHED_ROW_STEP != RowBlock)
803 sMem.fNextTrackletCount = 0;
807 sMem.fNextTrackletFirst = nTracklet.x;
808 sMem.fNextTrackletNoDummy = 1;
811 #endif //HLTCA_GPU_SCHED_FIXED_START
815 const int nFetchTracks = CAMath::Max(CAMath::Min((*tracker.RowBlockPos(Reverse, RowBlock)).x - (*tracker.RowBlockPos(Reverse, RowBlock)).y, HLTCA_GPU_THREAD_COUNT - TRACKLET_CONSTRUCTOR_NMEMTHREDS), 0);
816 sMem.fNextTrackletCount = nFetchTracks;
817 const int nUseTrack = nFetchTracks ? CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).y, nFetchTracks) : 0;
818 sMem.fNextTrackletFirst = nUseTrack;
820 const int nFillTracks = CAMath::Min(nFetchTracks, nUseTrack + nFetchTracks - (*((volatile int2*) (tracker.RowBlockPos(Reverse, RowBlock)))).x);
823 const int nStartFillTrack = CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).x, nFillTracks);
824 if (nFillTracks + nStartFillTrack >= HLTCA_GPU_MAX_TRACKLETS)
826 tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_ROWBLOCK_TRACKLET_OVERFLOW;
828 for (int i = 0;i < nFillTracks;i++)
830 tracker.RowBlockTracklets(Reverse, RowBlock)[(nStartFillTrack + i) % HLTCA_GPU_MAX_TRACKLETS] = -3; //Dummy filling track
833 sMem.fNextTrackletNoDummy = 0;
838 if (sMem.fNextTrackletCount == 0)
840 return(-2); //No more track in this RowBlock
842 #if HLTCA_GPU_TRACKLET_CONSTRUCTOR_NMEMTHREDS > 0
843 else if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS)
847 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_NMEMTHREDS > 0
848 else if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS >= sMem.fNextTrackletCount)
850 return(-1); //No track in this RowBlock for this thread
852 else if (nextTracketlFirstRun)
854 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS) sMem.fNextTrackletFirstRun = 0;
856 return(sMem.fNextTrackletFirst + threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS);
860 const int nTrackPos = sMem.fNextTrackletFirst + threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS;
861 mustInit = (nTrackPos < tracker.RowBlockPos(Reverse, RowBlock)->w);
862 volatile int* const ptrTracklet = &tracker.RowBlockTracklets(Reverse, RowBlock)[nTrackPos % HLTCA_GPU_MAX_TRACKLETS];
865 while ((nTracklet = *ptrTracklet) == -1)
867 for (int i = 0;i < 20000;i++)
868 sMem.fNextTrackletStupidDummy++;
872 tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_SCHEDULE_COLLISION;
880 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU(AliHLTTPCCATracker *pTracker)
882 //Main Tracklet construction function that calls the scheduled (FetchTracklet) and then Processes the tracklet (mainly UpdataTracklet) and at the end stores the tracklet.
883 //Can also dispatch a tracklet to be rescheduled
884 #ifdef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
885 pTracker[0].BlockStartingTracklet()[0].x = HLTCA_GPU_EMULATION_SINGLE_TRACKLET;
886 pTracker[0].BlockStartingTracklet()[0].y = 1;
887 for (int i = 1;i < HLTCA_GPU_BLOCK_COUNT;i++)
889 pTracker[0].BlockStartingTracklet()[i].x = pTracker[0].BlockStartingTracklet()[i].y = 0;
891 #endif //HLTCA_GPU_EMULATION_SINGLE_TRACKLET
893 GPUshared() AliHLTTPCCASharedMemory sMem;
895 #ifdef HLTCA_GPU_SCHED_FIXED_START
896 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
898 sMem.fNextTrackletFirstRun = 1;
901 #endif //HLTCA_GPU_SCHED_FIXED_START
903 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
904 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
909 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
911 for (int iReverse = 0;iReverse < 2;iReverse++)
913 for (volatile int iRowBlock = 0;iRowBlock < HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1;iRowBlock++)
915 #ifdef HLTCA_GPU_SCHED_FIXED_SLICE
916 int iSlice = pTracker[0].GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % pTracker[0].GPUParametersConst()->fGPUnSlices != 0 && pTracker[0].GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
918 for (int iSlice = 0;iSlice < pTracker[0].GPUParametersConst()->fGPUnSlices;iSlice++)
919 #endif //HLTCA_GPU_SCHED_FIXED_SLICE
921 AliHLTTPCCATracker &tracker = pTracker[iSlice];
922 if (sMem.fNextTrackletFirstRun && iSlice != tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT)
926 /*if (!sMem.fNextTrackletFirstRun && tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->x <= tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->y)
930 int sharedRowsInitialized = 0;
934 while ((iTracklet = FetchTracklet(tracker, sMem, iReverse, iRowBlock, mustInit)) != -2)
936 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
937 CAMath::AtomicMax(&sMem.fMaxSync, threadSync);
939 threadSync = CAMath::Min(sMem.fMaxSync, 100000000 / blockDim.x / gridDim.x);
940 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
941 #ifndef HLTCA_GPU_PREFETCHDATA
942 if (!sharedRowsInitialized)
944 for (int i = threadIdx.x;i < HLTCA_ROW_COUNT * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
946 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
948 sharedRowsInitialized = 1;
950 #endif //!HLTCA_GPU_PREFETCHDATA
951 #ifdef HLTCA_GPU_RESCHED
952 short2 storeToRowBlock;
953 int storePosition = 0;
954 if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
956 const int nReverse = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
957 const int nRowBlock = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
958 sMem.fTrackletStoreCount[nReverse][nRowBlock] = 0;
960 #endif //HLTCA_GPU_RESCHED
962 AliHLTTPCCATrackParam tParam;
963 AliHLTTPCCAThreadMemory rMem;
965 rMem.fCurrentData = 0;
967 #ifdef HLTCA_GPU_EMULATION_DEBUG_TRACKLET
968 if (iTracklet == HLTCA_GPU_EMULATION_DEBUG_TRACKLET)
970 tracker.GPUParameters()->fGPUError = 1;
972 #endif //HLTCA_GPU_EMULATION_DEBUG_TRACKLET
973 AliHLTTPCCAThreadMemory &rMemGlobal = tracker.GPUTrackletTemp()[iTracklet].fThreadMem;
974 AliHLTTPCCATrackParam &tParamGlobal = tracker.GPUTrackletTemp()[iTracklet].fParam;
977 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
979 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
980 rMem.fCurrIH = id.HitIndex();
985 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
987 else if (iTracklet >= 0)
989 CopyTrackletTempData( rMemGlobal, rMem, tParamGlobal, tParam );
991 #ifdef HLTCA_GPU_PREFETCHDATA
992 else if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS)
994 ReadData(threadIdx.x, sMem, rMem, tracker, iReverse ? (HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP) : (CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP)));
996 #endif //HLTCA_GPU_PREFETCHDATA
997 rMem.fItr = iTracklet;
998 rMem.fGo = (iTracklet >= 0);
1000 #ifdef HLTCA_GPU_RESCHED
1001 storeToRowBlock.x = iRowBlock + 1;
1002 storeToRowBlock.y = iReverse;
1003 #ifdef HLTCA_GPU_PREFETCHDATA
1004 rMem.fCurrentData ^= 1;
1006 #endif //HLTCA_GPU_PREFETCHDATA
1009 for (int j = HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP;j >= CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP);j--)
1011 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1012 if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && !(j >= rMem.fEndRow || ( j >= rMem.fStartRow && j - rMem.fStartRow % 2 == 0)))
1013 pTracker[0].StageAtSync()[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
1014 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1015 #ifdef HLTCA_GPU_PREFETCHDATA
1016 if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS && j > CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP))
1018 ReadData(threadIdx.x, sMem, rMem, tracker, j - 1);
1021 #endif //HLTCA_GPU_PREFETCHDATA
1024 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1025 if (rMem.fNMissed > kMaxRowGap && j <= rMem.fStartRow)
1028 #ifndef HLTCA_GPU_PREFETCHDATA
1030 #endif //!HLTCA_GPU_PREFETCHDATA
1033 #ifdef HLTCA_GPU_PREFETCHDATA
1035 rMem.fCurrentData ^= 1;
1036 #endif //HLTCA_GPU_PREFETCHDATA
1039 if (iTracklet >= 0 && (!rMem.fGo || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
1041 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1046 for (int j = CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP);j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT);j++)
1048 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1049 if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && j >= rMem.fStartRow && (rMem.fStage > 0 || rMem.fCurrIH >= 0 || (j - rMem.fStartRow) % 2 == 0 ))
1050 pTracker[0].StageAtSync()[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
1051 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1052 #ifdef HLTCA_GPU_PREFETCHDATA
1053 if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS && j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT) - 1)
1055 ReadData(threadIdx.x, sMem, rMem, tracker, j + 1);
1058 #endif //HLTCA_GPU_PREFETCHDATA
1061 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1062 #ifndef HLTCA_GPU_PREFETCHDATA
1063 //if (rMem.fNMissed > kMaxRowGap || rMem.fGo == 0) break; //DR!!! CUDA Crashes with this enabled
1064 #endif //!HLTCA_GPU_PREFETCHDATA
1066 #ifdef HLTCA_GPU_PREFETCHDATA
1068 rMem.fCurrentData ^= 1;
1069 #endif //HLTCA_GPU_PREFETCHDATA
1071 if (rMem.fGo && (rMem.fNMissed > kMaxRowGap || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
1073 #if defined(HLTCA_GPU_PREFETCHDATA)
1074 if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) )
1076 if ( !tParam.TransportToX( sMem.fRows[ rMem.fEndRow ].X(), tracker.Param().ConstBz(), .999 ) )
1077 #endif //HLTCA_GPU_PREFETCHDATA
1083 storeToRowBlock.x = (HLTCA_ROW_COUNT - rMem.fEndRow) / HLTCA_GPU_SCHED_ROW_STEP;
1084 storeToRowBlock.y = 1;
1090 if (iTracklet >= 0 && !rMem.fGo)
1092 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1096 if (rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
1098 CopyTrackletTempData( rMem, rMemGlobal, tParam, tParamGlobal );
1099 storePosition = CAMath::AtomicAdd(&sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x], 1);
1102 if (threadIdx.x % HLTCA_GPU_WARP_SIZE == 0)
1104 sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 160;
1105 sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 0;
1110 CAMath::AtomicMin(&sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fStartRow);
1115 for (int j = sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE];j < HLTCA_ROW_COUNT;j++)
1117 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1118 if (!rMem.fGo) break;
1125 if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) ) rMem.fGo = 0;
1127 CAMath::AtomicMax(&sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fEndRow);
1133 for (int j = rMem.fEndRow;j >= 0;j--)
1135 if (!rMem.fGo) break;
1136 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1139 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1141 #endif //HLTCA_GPU_RESCHED
1143 #ifdef HLTCA_GPU_RESCHED
1145 if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
1147 const int nReverse = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
1148 const int nRowBlock = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
1149 if (sMem.fTrackletStoreCount[nReverse][nRowBlock])
1151 sMem.fTrackletStoreCount[nReverse][nRowBlock] = CAMath::AtomicAdd(&tracker.RowBlockPos(nReverse, nRowBlock)->x, sMem.fTrackletStoreCount[nReverse][nRowBlock]);
1155 if (iTracklet >= 0 && rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
1157 tracker.RowBlockTracklets(storeToRowBlock.y, storeToRowBlock.x)[sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x] + storePosition] = iTracklet;
1160 #endif //HLTCA_GPU_RESCHED
1167 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(int iTracklet, AliHLTTPCCATracker &tracker)
1169 //Initialize Row Blocks
1171 #ifndef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
1172 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1173 #ifdef HLTCA_GPU_SCHED_FIXED_START
1174 const int firstDynamicTracklet = tracker.GPUParameters()->fScheduleFirstDynamicTracklet;
1175 if (iTracklet >= firstDynamicTracklet)
1176 #endif //HLTCA_GPU_SCHED_FIXED_START
1178 #ifdef HLTCA_GPU_SCHED_FIXED_START
1179 const int firstTrackletInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[CAMath::Max(id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP * HLTCA_GPU_SCHED_ROW_STEP, 1)].z);
1181 const int firstTrackletInRowBlock = tracker.RowStartHitCountOffset()[CAMath::Max(id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP * HLTCA_GPU_SCHED_ROW_STEP, 1)].z;
1182 #endif //HLTCA_GPU_SCHED_FIXED_START
1184 if (iTracklet == firstTrackletInRowBlock)
1186 const int firstRowInNextBlock = (id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP + 1) * HLTCA_GPU_SCHED_ROW_STEP;
1187 int trackletsInRowBlock;
1188 if (firstRowInNextBlock >= HLTCA_ROW_COUNT - 3)
1189 trackletsInRowBlock = *tracker.NTracklets() - firstTrackletInRowBlock;
1191 #ifdef HLTCA_GPU_SCHED_FIXED_START
1192 trackletsInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[firstRowInNextBlock].z) - firstTrackletInRowBlock;
1194 trackletsInRowBlock = tracker.RowStartHitCountOffset()[firstRowInNextBlock].z - firstTrackletInRowBlock;
1195 #endif //HLTCA_GPU_SCHED_FIXED_START
1197 tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->x = trackletsInRowBlock;
1198 tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->w = trackletsInRowBlock;
1200 tracker.RowBlockTracklets(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)[iTracklet - firstTrackletInRowBlock] = iTracklet;
1202 #endif //!HLTCA_GPU_EMULATION_SINGLE_TRACKLET
1205 GPUg() void AliHLTTPCCATrackletConstructorInit(int iSlice)
1207 //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit
1208 AliHLTTPCCATracker &tracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker )[iSlice];
1209 int i = blockIdx.x * blockDim.x + threadIdx.x;
1210 if (i >= *tracker.NTracklets()) return;
1211 AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(i, tracker);
1214 GPUg() void AliHLTTPCCATrackletConstructorGPU()
1216 //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU
1217 AliHLTTPCCATracker *pTracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker );
1218 AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU(pTracker);
1221 GPUg() void AliHLTTPCCATrackletConstructorGPUPP(int firstSlice, int sliceCount)
1223 if (blockIdx.x >= sliceCount) return;
1224 AliHLTTPCCATracker *pTracker = &( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker )[firstSlice + blockIdx.x];
1225 AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPUPP(pTracker);
1228 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPUPP(AliHLTTPCCATracker *tracker)
1230 GPUshared() AliHLTTPCCASharedMemory sMem;
1231 sMem.fNTracklets = *tracker->NTracklets();
1233 for (int i = threadIdx.x;i < HLTCA_ROW_COUNT * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
1235 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker->SliceDataRows())[i];
1238 for (int iTracklet = threadIdx.x;iTracklet < (*tracker->NTracklets() / HLTCA_GPU_THREAD_COUNT + 1) * HLTCA_GPU_THREAD_COUNT;iTracklet += blockDim.x)
1240 AliHLTTPCCATrackParam tParam;
1241 AliHLTTPCCAThreadMemory rMem;
1243 if (iTracklet < *tracker->NTracklets())
1245 AliHLTTPCCAHitId id = tracker->TrackletTmpStartHits()[iTracklet];
1247 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
1248 rMem.fCurrIH = id.HitIndex();
1253 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
1255 rMem.fItr = iTracklet;
1259 if (threadIdx.x % HLTCA_GPU_WARP_SIZE == 0)
1261 sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 160;
1262 sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 0;
1265 if (iTracklet < *tracker->NTracklets())
1267 CAMath::AtomicMin(&sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fStartRow);
1270 if (iTracklet < *tracker->NTracklets())
1272 for (int j = sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE];j < HLTCA_ROW_COUNT;j++)
1274 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, *tracker, tParam, j);
1275 if (!rMem.fGo) break;
1282 if ( !tParam.TransportToX( tracker->Row( rMem.fEndRow ).X(), tracker->Param().ConstBz(), .999 ) ) rMem.fGo = 0;
1284 CAMath::AtomicMax(&sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fEndRow);
1288 if (iTracklet < *tracker->NTracklets())
1290 for (int j = rMem.fEndRow;j >= 0;j--)
1292 if (!rMem.fGo) break;
1293 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, *tracker, tParam, j);
1295 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, *tracker, tParam );
1300 #else //HLTCA_GPUCODE
1302 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorCPU(AliHLTTPCCATracker &tracker)
1304 //Tracklet constructor simple CPU Function that does not neew a scheduler
1305 GPUshared() AliHLTTPCCASharedMemory sMem;
1306 sMem.fNTracklets = *tracker.NTracklets();
1307 for (int iTracklet = 0;iTracklet < *tracker.NTracklets();iTracklet++)
1309 AliHLTTPCCATrackParam tParam;
1310 AliHLTTPCCAThreadMemory rMem;
1312 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1314 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
1315 rMem.fCurrIH = id.HitIndex();
1320 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
1322 rMem.fItr = iTracklet;
1325 for (int j = rMem.fStartRow;j < tracker.Param().NRows();j++)
1327 UpdateTracklet(1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
1328 if (!rMem.fGo) break;
1335 if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) ) rMem.fGo = 0;
1338 for (int j = rMem.fEndRow;j >= 0;j--)
1340 if (!rMem.fGo) break;
1341 UpdateTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
1344 StoreTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam );
1347 #endif //HLTCA_GPUCODE