]> git.uio.no Git - u/mrichter/AliRoot.git/blob - HLT/TPCLib/tracking-ca/AliHLTTPCCATrackletConstructor.cxx
b737ab7a181483d8190c7d76713ad00f89f2f454
[u/mrichter/AliRoot.git] / HLT / TPCLib / tracking-ca / AliHLTTPCCATrackletConstructor.cxx
1 // @(#) $Id: AliHLTTPCCATrackletConstructor.cxx 27042 2008-07-02 12:06:02Z richterm $
2 // **************************************************************************
3 // This file is property of and copyright by the ALICE HLT Project          *
4 // ALICE Experiment at CERN, All rights reserved.                           *
5 //                                                                          *
6 // Primary Authors: Sergey Gorbunov <sergey.gorbunov@kip.uni-heidelberg.de> *
7 //                  Ivan Kisel <kisel@kip.uni-heidelberg.de>                *
8 //                  for The ALICE HLT Project.                              *
9 //                                                                          *
10 // Permission to use, copy, modify and distribute this software and its     *
11 // documentation strictly for non-commercial purposes is hereby granted     *
12 // without fee, provided that the above copyright notice appears in all     *
13 // copies and that both the copyright notice and this permission notice     *
14 // appear in the supporting documentation. The authors make no claims       *
15 // about the suitability of this software for any purpose. It is            *
16 // provided "as is" without express or implied warranty.                    *
17 //                                                                          *
18 //***************************************************************************
19
20 #include "AliHLTTPCCATracker.h"
21 #include "AliHLTTPCCATrackParam.h"
22 #include "AliHLTTPCCATrackParam.h"
23 #include "AliHLTTPCCAGrid.h"
24 #include "AliHLTTPCCAMath.h"
25 #include "AliHLTTPCCADef.h"
26 #include "AliHLTTPCCATracklet.h"
27 #include "AliHLTTPCCATrackletConstructor.h"
28 #include "MemoryAssignmentHelpers.h"
29
30 //#include "AliHLTTPCCAPerformance.h"
31 //#include "TH1D.h"
32
33 //#define DRAW
34
35 #ifdef DRAW
36 #include "AliHLTTPCCADisplay.h"
37 #endif //DRAW
38
39 #define kMaxRowGap 4
40
41 GPUdi() void AliHLTTPCCATrackletConstructor::InitTracklet( AliHLTTPCCATrackParam &tParam )
42 {
43   //Initialize Tracklet Parameters using default values
44   tParam.InitParam();
45 }
46
47 GPUdi() void AliHLTTPCCATrackletConstructor::ReadData
48 #ifndef HLTCA_GPU_PREFETCHDATA
49 ( int /*iThread*/, AliHLTTPCCASharedMemory& /*s*/, AliHLTTPCCAThreadMemory& /*r*/, AliHLTTPCCATracker& /*tracker*/, int /*iRow*/ )
50 {
51         //Prefetch Data to shared memory
52 #else
53 ( int iThread, AliHLTTPCCASharedMemory& s, AliHLTTPCCAThreadMemory& r, AliHLTTPCCATracker& tracker, int iRow )
54 {
55   // reconstruction of tracklets, read data step
56     const AliHLTTPCCARow &row = tracker.Row( iRow );
57     //bool jr = !r.fCurrentData;
58
59     // copy hits, grid content and links
60
61     // FIXME: inefficient copy
62     //const int numberOfHitsAligned = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits());
63
64 /*      
65 #ifdef HLTCA_GPU_REORDERHITDATA
66     ushort2 *sMem1 = reinterpret_cast<ushort2 *>( s.fData[jr] );
67     for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
68       sMem1[i].x = tracker.HitDataY( row, i );
69       sMem1[i].y = tracker.HitDataZ( row, i );
70     }
71 #else
72     ushort_v *sMem1 = reinterpret_cast<ushort_v *>( s.fData[jr] );
73     for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
74       sMem1[i] = tracker.HitDataY( row, i );
75     }
76
77     ushort_v *sMem1a = reinterpret_cast<ushort_v *>( s.fData[jr] ) + numberOfHitsAligned;
78     for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
79       sMem1a[i] = tracker.HitDataZ( row, i );
80     }
81 #endif //HLTCA_GPU_REORDERHITDATA
82
83     short *sMem2 = reinterpret_cast<short *>( s.fData[jr] ) + 2 * numberOfHitsAligned;
84     for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
85       sMem2[i] = tracker.HitLinkUpData( row, i );
86     }
87         
88     unsigned short *sMem3 = reinterpret_cast<unsigned short *>( s.fData[jr] ) + 3 * numberOfHitsAligned;
89     const int n = row.FullSize(); // + grid content size
90     for ( int i = iThread; i < n; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
91       sMem3[i] = tracker.FirstHitInBin( row, i );
92     }*/
93
94         /*for (int k = 0;k < 2;k++)
95         {
96                 HLTCA_GPU_ROWCOPY* sharedMem;
97                 const HLTCA_GPU_ROWCOPY* sourceMem;
98                 int copyCount;
99                 switch (k)
100                 {
101                 case 0:
102                         sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitDataY(row) );
103                         sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
104                         copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
105                         break;
106                 case 1:
107                         sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitDataZ(row) );
108                         sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
109                         copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
110                         break;
111                 case 2:
112                         sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitLinkUpData(row) );
113                         sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
114                         copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
115                         break;
116                 case 1:
117                         sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.FirstHitInBin(row) );
118                         sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
119                         copyCount = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.FullSize()) * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
120                         break;
121                 }
122                 for (int i = iThread;i < copyCount;i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
123                 {
124                         sharedMem[i] = sourceMem[i];
125                 }
126         }*/
127
128         for (unsigned int i = iThread;i < row.FullSize() * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
129         {
130                 reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[!r.fCurrentData] ))[i] = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.FirstHitInBin(row) )[i];
131         }
132
133         const HLTCA_GPU_ROWCOPY* const sourceMem = (const HLTCA_GPU_ROWCOPY *) &row;
134         HLTCA_GPU_ROWCOPY* const sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> ( &s.fRow[!r.fCurrentData] );
135         for (unsigned int i = iThread;i < sizeof(AliHLTTPCCARow) / sizeof(HLTCA_GPU_ROWCOPY);i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
136         {
137                 sharedMem[i] = sourceMem[i];
138         }
139 #endif //!HLTCA_GPU_PREFETCHDATA
140 }
141
142
143 GPUdi() void AliHLTTPCCATrackletConstructor::StoreTracklet
144 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
145   AliHLTTPCCASharedMemory
146 #if defined(HLTCA_GPUCODE) | defined(EXTERN_ROW_HITS)
147   &s
148 #else
149   &/*s*/
150 #endif  //!HLTCA_GPUCODE
151   , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam )
152 {
153   // reconstruction of tracklets, tracklet store step
154
155   //AliHLTTPCCAPerformance::Instance().HNHitsPerTrackCand()->Fill(r.fNHits);
156
157   do {
158     {
159         //std::cout<<"tracklet to store: "<<r.fItr<<", nhits = "<<r.fNHits<<std::endl;
160     }
161
162     if ( r.fNHits < TRACKLET_SELECTOR_MIN_HITS ) {
163       r.fNHits = 0;
164       break;
165     }
166
167     if ( 0 ) {
168       if ( 1. / .5 < CAMath::Abs( tParam.QPt() ) ) { //SG!!!
169         r.fNHits = 0;
170         break;
171       }
172     }
173
174     {
175       bool ok = 1;
176       const float *c = tParam.Cov();
177       for ( int i = 0; i < 15; i++ ) ok = ok && CAMath::Finite( c[i] );
178       for ( int i = 0; i < 5; i++ ) ok = ok && CAMath::Finite( tParam.Par()[i] );
179       ok = ok && ( tParam.X() > 50 );
180
181       if ( c[0] <= 0 || c[2] <= 0 || c[5] <= 0 || c[9] <= 0 || c[14] <= 0 ) ok = 0;
182
183       if ( !ok ) {
184         r.fNHits = 0;
185         break;
186       }
187     }
188   } while ( 0 );
189
190   if ( !SAVE() ) return;
191
192   AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
193
194   tracklet.SetNHits( r.fNHits );
195
196   if ( r.fNHits > 0 ) {
197 #ifdef DRAW
198     if ( 0 ) {
199       std::cout << "store tracklet " << r.fItr << ", nhits = " << r.fNHits << std::endl;
200       if ( AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 1. ) ) {
201         AliHLTTPCCADisplay::Instance().Ask();
202       }
203     }
204 #endif //DRAW
205     if ( CAMath::Abs( tParam.Par()[4] ) < 1.e-4 ) tParam.SetPar( 4, 1.e-4 );
206         if (r.fStartRow < r.fFirstRow) r.fFirstRow = r.fStartRow;
207         tracklet.SetFirstRow( r.fFirstRow );
208     tracklet.SetLastRow( r.fLastRow );
209 #ifdef HLTCA_GPUCODE
210     tracklet.SetParam( tParam.fParam );
211 #else
212     tracklet.SetParam( tParam.GetParam() );
213 #endif //HLTCA_GPUCODE
214     int w = ( r.fNHits << 16 ) + r.fItr;
215     for ( int iRow = r.fFirstRow; iRow <= r.fLastRow; iRow++ ) {
216 #ifdef EXTERN_ROW_HITS
217       int ih = tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr];
218 #else
219           int ih = tracklet.RowHit( iRow );
220 #endif //EXTERN_ROW_HITS
221       if ( ih >= 0 ) {
222 #if defined(HLTCA_GPUCODE) & !defined(HLTCA_GPU_PREFETCHDATA)
223             tracker.MaximizeHitWeight( s.fRows[ iRow ], ih, w );
224 #else
225             tracker.MaximizeHitWeight( tracker.Row( iRow ), ih, w );
226 #endif //HLTCA_GPUCODE & !HLTCA_GPU_PREFETCHDATA
227       }
228     }
229   }
230
231 }
232
233 GPUdi() void AliHLTTPCCATrackletConstructor::UpdateTracklet
234 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
235   AliHLTTPCCASharedMemory 
236 #if defined(HLTCA_GPUCODE) | defined(EXTERN_ROW_HITS)
237   &s
238 #else
239   &/*s*/
240 #endif //HLTCA_GPUCODE
241   , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam, int iRow )
242 {
243   // reconstruction of tracklets, tracklets update step
244
245   //std::cout<<"Update tracklet: "<<r.fItr<<" "<<r.fGo<<" "<<r.fStage<<" "<<iRow<<std::endl;
246   bool drawSearch = 0;//r.fItr==2;
247   bool drawFit = 0;//r.fItr==2;
248   bool drawFitted = drawFit ;//|| 1;//r.fItr==16;
249
250   if ( !r.fGo ) return;
251
252 #ifndef EXTERN_ROW_HITS
253   AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
254 #endif //EXTERN_ROW_HITS
255
256 #ifdef HLTCA_GPU_PREFETCHDATA
257   const AliHLTTPCCARow &row = s.fRow[r.fCurrentData];
258 #elif defined(HLTCA_GPUCODE)
259   const AliHLTTPCCARow &row = s.fRows[iRow];
260 #else
261   const AliHLTTPCCARow &row = tracker.Row( iRow );
262 #endif //HLTCA_GPU_PREFETCHDATA
263
264   float y0 = row.Grid().YMin();
265   float stepY = row.HstepY();
266   float z0 = row.Grid().ZMin();
267   float stepZ = row.HstepZ();
268   float stepYi = row.HstepYi();
269   float stepZi = row.HstepZi();
270
271   if ( r.fStage == 0 ) { // fitting part
272     do {
273
274       if ( iRow < r.fStartRow || r.fCurrIH < 0  ) break;
275       if ( ( iRow - r.fStartRow ) % 2 != 0 )
276           {
277 #ifndef EXTERN_ROW_HITS
278                   tracklet.SetRowHit(iRow, -1);
279 #else
280                   tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
281 #endif //EXTERN_ROW_HITS
282                   break; // SG!!! - jump over the row
283           }
284
285
286 //#ifdef HLTCA_GPU_PREFETCHDATA
287 //      uint4 *tmpint4 = s.fData[r.fCurrentData];
288 //#endif
289           ushort2 hh;
290 //#ifdef HLTCA_GPU_REORDERHITDATA
291 //      hh = reinterpret_cast<ushort2*>( tmpint4 )[r.fCurrIH];
292 //#else
293 //#ifdef HLTCA_GPU_PREFETCHDATA
294 //        hh.x = reinterpret_cast<ushort_v*>( tmpint4 )[r.fCurrIH];
295 //        hh.y = reinterpret_cast<ushort_v*>( tmpint4 )[NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits()) + r.fCurrIH];
296 //#else
297 #if defined(HLTCA_GPU_TEXTURE_FETCH)
298           hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + r.fCurrIH);
299 #else
300           hh = tracker.HitData(row)[r.fCurrIH];
301 #endif //HLTCA_GPU_TEXTURE_FETCH
302 //#endif
303 //#endif
304
305       int oldIH = r.fCurrIH;
306 //#ifdef HLTCA_GPU_PREFETCHDATA
307 //      r.fCurrIH = reinterpret_cast<short*>( tmpint4 )[2 * NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits()) + r.fCurrIH]; // read from linkup data
308 //#else
309 #if defined(HLTCA_GPU_TEXTURE_FETCH)
310           r.fCurrIH = tex1Dfetch(gAliTexRefs, ((char*) tracker.Data().HitLinkUpData(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + r.fCurrIH);
311 #else
312           r.fCurrIH = tracker.HitLinkUpData(row)[r.fCurrIH]; // read from linkup data
313 #endif //HLTCA_GPU_TEXTURE_FETCH
314 //#endif
315
316       float x = row.X();
317       float y = y0 + hh.x * stepY;
318       float z = z0 + hh.y * stepZ;
319 #ifdef DRAW
320       if ( drawFit ) std::cout << " fit tracklet: new hit " << oldIH << ", xyz=" << x << " " << y << " " << z << std::endl;
321 #endif //DRAW
322
323       if ( iRow == r.fStartRow ) {
324         tParam.SetX( x );
325         tParam.SetY( y );
326         tParam.SetZ( z );
327         r.fLastY = y;
328         r.fLastZ = z;
329         #ifdef DRAW
330         if ( drawFit ) std::cout << " fit tracklet " << r.fItr << ", row " << iRow << " first row" << std::endl;
331         #endif //DRAW
332       } else {
333
334         float err2Y, err2Z;
335         float dx = x - tParam.X();
336         float dy = y - r.fLastY;//tParam.Y();
337         float dz = z - r.fLastZ;//tParam.Z();
338         r.fLastY = y;
339         r.fLastZ = z;
340
341         float ri = 1. / CAMath::Sqrt( dx * dx + dy * dy );
342         if ( iRow == r.fStartRow + 2 ) { //SG!!! important - thanks to Matthias
343           tParam.SetSinPhi( dy*ri );
344           tParam.SetSignCosPhi( dx );
345           tParam.SetDzDs( dz*ri );
346           //std::cout << "Init. errors... " << r.fItr << std::endl;
347           tracker.GetErrors2( iRow, tParam, err2Y, err2Z );
348           //std::cout << "Init. errors = " << err2Y << " " << err2Z << std::endl;
349           tParam.SetCov( 0, err2Y );
350           tParam.SetCov( 2, err2Z );
351         }
352         if ( drawFit ) {
353           #ifdef DRAW
354           std::cout << " fit tracklet " << r.fItr << ", row " << iRow << " transporting.." << std::endl;
355           std::cout << " params before transport=" << std::endl;
356           tParam.Print();
357           #endif //DRAW
358         }
359         float sinPhi, cosPhi;
360         if ( r.fNHits >= 10 && CAMath::Abs( tParam.SinPhi() ) < .99 ) {
361           sinPhi = tParam.SinPhi();
362           cosPhi = CAMath::Sqrt( 1 - sinPhi * sinPhi );
363         } else {
364           sinPhi = dy * ri;
365           cosPhi = dx * ri;
366         }
367         #ifdef DRAW
368         if ( drawFit ) std::cout << "sinPhi0 = " << sinPhi << ", cosPhi0 = " << cosPhi << std::endl;
369         #endif //DRAW
370         if ( !tParam.TransportToX( x, sinPhi, cosPhi, tracker.Param().ConstBz(), -1 ) ) {
371           #ifdef DRAW
372           if ( drawFit ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not transport!!" << std::endl;
373                   #endif //DRAW
374 #ifndef EXTERN_ROW_HITS
375           tracklet.SetRowHit( iRow, -1 );
376 #else
377                   tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
378 #endif //EXTERN_ROW_HITS
379           break;
380         }
381         //std::cout<<"mark1 "<<r.fItr<<std::endl;
382         //tParam.Print();
383         tracker.GetErrors2( iRow, tParam.GetZ(), sinPhi, cosPhi, tParam.GetDzDs(), err2Y, err2Z );
384         //std::cout<<"mark2"<<std::endl;
385
386         if ( drawFit ) {
387           #ifdef DRAW
388           std::cout << " params after transport=" << std::endl;
389           tParam.Print();
390           std::cout << "fit tracklet before filter: " << r.fItr << ", row " << iRow << " errs=" << err2Y << " " << err2Z << std::endl;
391           AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 2., 1 );
392           AliHLTTPCCADisplay::Instance().Ask();
393                   #endif //DRAW
394         }
395         if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
396           #ifdef DRAW
397           if ( drawFit ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not filter!!" << std::endl;
398           #endif //DRAW
399 #ifndef EXTERN_ROW_HITS
400           tracklet.SetRowHit( iRow, -1 );
401 #else
402                   tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
403 #endif //EXTERN_ROW_HITS
404           break;
405         }
406       }
407 #ifndef EXTERN_ROW_HITS
408       tracklet.SetRowHit( iRow, oldIH );
409 #else
410           tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = oldIH;
411 #endif //!EXTERN_ROW_HITS
412       if ( drawFit ) {
413         #ifdef DRAW
414         std::cout << "fit tracklet after filter " << r.fItr << ", row " << iRow << std::endl;
415         tParam.Print();
416         AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kGreen, 2. );
417         AliHLTTPCCADisplay::Instance().Ask();
418                 #endif //DRAW
419       }
420       r.fNHits++;
421       r.fLastRow = iRow;
422       r.fEndRow = iRow;
423       break;
424     } while ( 0 );
425
426     if ( r.fCurrIH < 0 ) {
427       #ifdef DRAW
428       if ( drawFitted ) std::cout << "fitted tracklet " << r.fItr << ", nhits=" << r.fNHits << std::endl;
429       #endif //DRAW
430       r.fStage = 1;
431       //AliHLTTPCCAPerformance::Instance().HNHitsPerSeed()->Fill(r.fNHits);
432       if ( CAMath::Abs( tParam.SinPhi() ) > .999 ) {
433         #ifdef DRAW
434         if ( drawFitted ) std::cout << " fitted tracklet  error: sinPhi=" << tParam.SinPhi() << std::endl;
435         #endif //DRAW
436         r.fNHits = 0; r.fGo = 0;
437       } else {
438         //tParam.SetCosPhi( CAMath::Sqrt(1-tParam.SinPhi()*tParam.SinPhi()) );
439       }
440       if ( drawFitted ) {
441         #ifdef DRAW
442         std::cout << "fitted tracklet " << r.fItr << " miss=" << r.fNMissed << " go=" << r.fGo << std::endl;
443         tParam.Print();
444         AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue );
445         AliHLTTPCCADisplay::Instance().Ask();
446                 #endif //DRAW
447       }
448     }
449   } else { // forward/backward searching part
450     do {
451       if ( drawSearch ) {
452         #ifdef DRAW
453         std::cout << "search tracklet " << r.fItr << " row " << iRow << " miss=" << r.fNMissed << " go=" << r.fGo << " stage=" << r.fStage << std::endl;
454         #endif //DRAW
455       }
456
457       if ( r.fStage == 2 && ( ( iRow >= r.fEndRow ) ||
458                               ( iRow >= r.fStartRow && ( iRow - r.fStartRow ) % 2 == 0 )
459                             ) ) break;
460       if ( r.fNMissed > kMaxRowGap  ) {
461         break;
462       }
463
464       r.fNMissed++;
465
466       float x = row.X();
467       float err2Y, err2Z;
468       if ( drawSearch ) {
469         #ifdef DRAW
470         std::cout << "tracklet " << r.fItr << " before transport to row " << iRow << " : " << std::endl;
471         tParam.Print();
472         #endif //DRAW
473       }
474       if ( !tParam.TransportToX( x, tParam.SinPhi(), tParam.GetCosPhi(), tracker.Param().ConstBz(), .99 ) ) {
475         #ifdef DRAW
476         if ( drawSearch ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not transport!!" << std::endl;
477         #endif //DRAW
478 #ifndef EXTERN_ROW_HITS
479                 tracklet.SetRowHit(iRow, -1);
480 #else
481                 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
482 #endif //!EXTERN_ROW_HITS
483         break;
484       }
485       if ( row.NHits() < 1 ) {
486         // skip empty row
487 #ifndef EXTERN_ROW_HITS
488                   tracklet.SetRowHit(iRow, -1);
489 #else
490                   tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
491 #endif //!EXTERN_ROW_HITS
492         break;
493       }
494       if ( drawSearch ) {
495                 #ifdef DRAW
496         std::cout << "tracklet " << r.fItr << " after transport to row " << iRow << " : " << std::endl;
497         tParam.Print();
498         AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 2., 1 );
499         AliHLTTPCCADisplay::Instance().Ask();
500                 #endif //DRAW
501       }
502 #ifdef HLTCA_GPU_PREFETCHDATA
503       uint4 *tmpint4 = s.fData[r.fCurrentData];
504 #endif //HLTCA_GPU_PREFETCHDATA
505
506 //#ifdef HLTCA_GPU_REORDERHITDATA
507 //      const ushort2 *hits = reinterpret_cast<ushort2*>( tmpint4 );
508 //#else
509 //#ifdef HLTCA_GPU_PREFETCHDATA
510 //        const ushort_v *hitsx = reinterpret_cast<ushort_v*>( tmpint4 );
511 //        const ushort_v *hitsy = reinterpret_cast<ushort_v*>( tmpint4 ) + NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits());
512 //#else
513 #ifndef HLTCA_GPU_TEXTURE_FETCH
514           const ushort2 *hits = tracker.HitData(row);
515 #endif //!HLTCA_GPU_TEXTURE_FETCH
516 //#endif
517 //#endif
518
519       float fY = tParam.GetY();
520       float fZ = tParam.GetZ();
521       int best = -1;
522
523       { // search for the closest hit
524         const int fIndYmin = row.Grid().GetBinBounded( fY - 1.f, fZ - 1.f );
525         assert( fIndYmin >= 0 );
526
527         int ds;
528         int fY0 = ( int ) ( ( fY - y0 ) * stepYi );
529         int fZ0 = ( int ) ( ( fZ - z0 ) * stepZi );
530         int ds0 = ( ( ( int )1 ) << 30 );
531         ds = ds0;
532
533         unsigned int fHitYfst = 1, fHitYlst = 0, fHitYfst1 = 1, fHitYlst1 = 0;
534
535         if ( drawSearch ) {
536 #ifdef DRAW
537           std::cout << " tracklet " << r.fItr << ", row " << iRow << ": grid N=" << row.Grid().N() << std::endl;
538           std::cout << " tracklet " << r.fItr << ", row " << iRow << ": minbin=" << fIndYmin << std::endl;
539 #endif //DRAW
540         }
541         {
542           int nY = row.Grid().Ny();
543
544 //#ifdef HLTCA_GPU_PREFETCHDATA
545 //                const unsigned short *sGridP = ( reinterpret_cast<unsigned short*>( tmpint4 ) );
546 //#else
547 #ifndef HLTCA_GPU_TEXTURE_FETCH
548                   const unsigned short *sGridP = tracker.FirstHitInBin(row);
549 #endif //!HLTCA_GPU_TEXTURE_FETCH
550 //#endif
551
552 #ifdef HLTCA_GPU_TEXTURE_FETCH
553                   fHitYfst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin);
554                   fHitYlst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+2);
555                   fHitYfst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY);
556                   fHitYlst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY+2);
557 #else
558           fHitYfst = sGridP[fIndYmin];
559           fHitYlst = sGridP[fIndYmin+2];
560           fHitYfst1 = sGridP[fIndYmin+nY];
561           fHitYlst1 = sGridP[fIndYmin+nY+2];
562 #endif //HLTCA_GPU_TEXTURE_FETCH
563           assert( (signed) fHitYfst <= row.NHits() );
564           assert( (signed) fHitYlst <= row.NHits() );
565           assert( (signed) fHitYfst1 <= row.NHits() );
566           assert( (signed) fHitYlst1 <= row.NHits() );
567           if ( drawSearch ) {
568 #ifdef DRAW
569             std::cout << " Grid, row " << iRow << ": nHits=" << row.NHits() << ", grid n=" << row.Grid().N() << ", c[n]=" << sGridP[row.Grid().N()] << std::endl;
570             std::cout << "hit steps = " << stepY << " " << stepZ << std::endl;
571             std::cout << " Grid bins:" << std::endl;
572             for ( unsigned int i = 0; i < row.Grid().N(); i++ ) {
573               std::cout << " bin " << i << ": ";
574               for ( int j = sGridP[i]; j < sGridP[i+1]; j++ ) {
575                 ushort2 hh = hits[j];
576                 float y = y0 + hh.x * stepY;
577                 float z = z0 + hh.y * stepZ;
578                 std::cout << "[" << j << "|" << y << "," << z << "] ";
579               }
580               std::cout << std::endl;
581             }
582 #endif //DRAW
583           }
584 #ifdef DRAW
585           if ( sGridP[row.Grid().N()] != row.NHits() ) {
586             std::cout << " grid, row " << iRow << ": nHits=" << row.NHits() << ", grid n=" << row.Grid().N() << ", c[n]=" << sGridP[row.Grid().N()] << std::endl;
587             //exit(0);
588           }
589 #endif //DRAW
590         }
591 #ifdef DRAW
592         if ( drawSearch ) {
593           std::cout << " tracklet " << r.fItr << ", row " << iRow << ", yz= " << fY << "," << fZ << ": search hits=" << fHitYfst << " " << fHitYlst << " / " << fHitYfst1 << " " << fHitYlst1 << std::endl;
594           std::cout << " hit search :" << std::endl;
595         }
596 #endif //DRAW
597         for ( unsigned int fIh = fHitYfst; fIh < fHitYlst; fIh++ ) {
598           assert( (signed) fIh < row.NHits() );
599           ushort2 hh;
600 #if defined(HLTCA_GPU_TEXTURE_FETCH)
601                  hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
602 #else
603                   hh = hits[fIh];
604 #endif //HLTCA_GPU_TEXTURE_FETCH
605           int ddy = ( int )( hh.x ) - fY0;
606           int ddz = ( int )( hh.y ) - fZ0;
607           int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
608           if ( drawSearch ) {
609             #ifdef DRAW
610             std::cout << fIh << ": hityz= " << hh.x << " " << hh.y << "(" << hh.x*stepY << " " << hh.y*stepZ << "), trackyz=" << fY0 << " " << fZ0 << "(" << fY0*stepY << " " << fZ0*stepZ << "), dy,dz,ds= " << ddy << " " << ddz << " " << dds << "(" << ddy*stepY << " " << ddz*stepZ << std::endl;
611             #endif //DRAW
612           }
613           if ( dds < ds ) {
614             ds = dds;
615             best = fIh;
616           }
617         }
618
619                 for ( unsigned int fIh = fHitYfst1; fIh < fHitYlst1; fIh++ ) {
620           ushort2 hh;
621 #if defined(HLTCA_GPU_TEXTURE_FETCH)
622                   hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
623 #else
624                   hh = hits[fIh];
625 #endif //HLTCA_GPU_TEXTURE_FETCH
626           int ddy = ( int )( hh.x ) - fY0;
627           int ddz = ( int )( hh.y ) - fZ0;
628           int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
629           if ( drawSearch ) {
630             #ifdef DRAW
631             std::cout << fIh << ": hityz= " << hh.x << " " << hh.y << "(" << hh.x*stepY << " " << hh.y*stepZ << "), trackyz=" << fY0 << " " << fZ0 << "(" << fY0*stepY << " " << fZ0*stepZ << "), dy,dz,ds= " << ddy << " " << ddz << " " << dds << "(" << ddy*stepY << " " << ddz*stepZ << std::endl;
632             #endif //DRAW
633           }
634           if ( dds < ds ) {
635             ds = dds;
636             best = fIh;
637           }
638         }
639       }// end of search for the closest hit
640
641       if ( best < 0 )
642           {
643 #ifndef EXTERN_ROW_HITS
644                   tracklet.SetRowHit(iRow, -1);
645 #else
646                   tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
647 #endif //!EXTERN_ROW_HITS
648                   break;
649           }
650       if ( drawSearch ) {
651         #ifdef DRAW
652         std::cout << "hit search " << r.fItr << ", row " << iRow << " hit " << best << " found" << std::endl;
653         AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best, kRed, 1. );
654         AliHLTTPCCADisplay::Instance().Ask();
655         AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best, kWhite, 1 );
656         AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best );
657                 #endif //DRAW
658       }
659
660       ushort2 hh;
661 #if defined(HLTCA_GPU_TEXTURE_FETCH)
662                  hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + best);
663 #else
664                   hh = hits[best];
665 #endif //HLTCA_GPU_TEXTURE_FETCH
666
667       //std::cout<<"mark 3, "<<r.fItr<<std::endl;
668       //tParam.Print();
669       tracker.GetErrors2( iRow, *( ( AliHLTTPCCATrackParam* )&tParam ), err2Y, err2Z );
670       //std::cout<<"mark 4"<<std::endl;
671
672       float y = y0 + hh.x * stepY;
673       float z = z0 + hh.y * stepZ;
674       float dy = y - fY;
675       float dz = z - fZ;
676
677       const float kFactor = tracker.Param().HitPickUpFactor() * tracker.Param().HitPickUpFactor() * 3.5 * 3.5;
678       float sy2 = kFactor * ( tParam.GetErr2Y() +  err2Y );
679       float sz2 = kFactor * ( tParam.GetErr2Z() +  err2Z );
680       if ( sy2 > 2. ) sy2 = 2.;
681       if ( sz2 > 2. ) sz2 = 2.;
682
683       if ( drawSearch ) {
684         #ifdef DRAW
685         std::cout << "dy,sy= " << dy << " " << CAMath::Sqrt( sy2 ) << ", dz,sz= " << dz << " " << CAMath::Sqrt( sz2 ) << std::endl;
686         std::cout << "dy,dz= " << dy << " " << dz << ", sy,sz= " << CAMath::Sqrt( sy2 ) << " " << CAMath::Sqrt( sz2 ) << ", sy,sz= " << CAMath::Sqrt( kFactor*( tParam.GetErr2Y() +  err2Y ) ) << " " << CAMath::Sqrt( kFactor*( tParam.GetErr2Z() +  err2Z ) ) << std::endl;
687         #endif //DRAW
688       }
689       if ( CAMath::FMulRZ( dy, dy ) > sy2 || CAMath::FMulRZ( dz, dz ) > sz2  ) {
690         if ( drawSearch ) {
691           #ifdef DRAW
692           std::cout << "found hit is out of the chi2 window\n " << std::endl;
693           #endif //DRAW
694         }
695 #ifndef EXTERN_ROW_HITS
696                 tracklet.SetRowHit(iRow, -1);
697 #else
698                 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
699 #endif //!EXTERN_ROW_HITS
700         break;
701       }
702 #ifdef DRAW
703       //if( SAVE() ) hitstore[ iRow ] = best;
704       //std::cout<<"hit search before filter: "<<r.fItr<<", row "<<iRow<<std::endl;
705       //AliHLTTPCCADisplay::Instance().DrawTracklet(tParam, hitstore, kBlue);
706       //AliHLTTPCCADisplay::Instance().Ask();
707 #endif //DRAW
708       if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
709         if ( drawSearch ) {
710           #ifdef DRAW
711           std::cout << "tracklet " << r.fItr << " at row " << iRow << " : can not filter!!!! " << std::endl;
712           #endif //DRAW
713         }
714         break;
715       }
716 #ifndef EXTERN_ROW_HITS
717           tracklet.SetRowHit( iRow, best );
718 #else
719           tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = best;
720 #endif //!EXTERN_ROW_HITS
721       if ( drawSearch ) {
722         #ifdef DRAW
723         std::cout << "tracklet " << r.fItr << " after filter at row " << iRow << " : " << std::endl;
724         tParam.Print();
725         AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kRed );
726         AliHLTTPCCADisplay::Instance().Ask();
727                 #endif //DRAW
728       }
729       r.fNHits++;
730       r.fNMissed = 0;
731       if ( r.fStage == 1 ) r.fLastRow = iRow;
732       else r.fFirstRow = iRow;
733     } while ( 0 );
734   }
735 }
736
737 #ifdef HLTCA_GPUCODE
738 GPUdi() void AliHLTTPCCATrackletConstructor::CopyTrackletTempData( AliHLTTPCCAThreadMemory &rMemSrc, AliHLTTPCCAThreadMemory &rMemDst, AliHLTTPCCATrackParam &tParamSrc, AliHLTTPCCATrackParam &tParamDst)
739 {
740         //Copy Temporary Tracklet data from registers to global mem and vice versa
741         rMemDst.fStartRow = rMemSrc.fStartRow;
742         rMemDst.fEndRow = rMemSrc.fEndRow;
743         rMemDst.fFirstRow = rMemSrc.fFirstRow;
744         rMemDst.fLastRow = rMemSrc.fLastRow;
745         rMemDst.fCurrIH =  rMemSrc.fCurrIH;
746         rMemDst.fGo = rMemSrc.fGo;
747         rMemDst.fStage = rMemSrc.fStage;
748         rMemDst.fNHits = rMemSrc.fNHits;
749         rMemDst.fNMissed = rMemSrc.fNMissed;
750         rMemDst.fLastY = rMemSrc.fLastY;
751         rMemDst.fLastZ = rMemSrc.fLastZ;
752
753         tParamDst.SetSinPhi( tParamSrc.GetSinPhi() );
754         tParamDst.SetDzDs( tParamSrc.GetDzDs() );
755         tParamDst.SetQPt( tParamSrc.GetQPt() );
756         tParamDst.SetSignCosPhi( tParamSrc.GetSignCosPhi() );
757         tParamDst.SetChi2( tParamSrc.GetChi2() );
758         tParamDst.SetNDF( tParamSrc.GetNDF() );
759         tParamDst.SetCov( 0, tParamSrc.GetCov(0) );
760         tParamDst.SetCov( 1, tParamSrc.GetCov(1) );
761         tParamDst.SetCov( 2, tParamSrc.GetCov(2) );
762         tParamDst.SetCov( 3, tParamSrc.GetCov(3) );
763         tParamDst.SetCov( 4, tParamSrc.GetCov(4) );
764         tParamDst.SetCov( 5, tParamSrc.GetCov(5) );
765         tParamDst.SetCov( 6, tParamSrc.GetCov(6) );
766         tParamDst.SetCov( 7, tParamSrc.GetCov(7) );
767         tParamDst.SetCov( 8, tParamSrc.GetCov(8) );
768         tParamDst.SetCov( 9, tParamSrc.GetCov(9) );
769         tParamDst.SetCov( 10, tParamSrc.GetCov(10) );
770         tParamDst.SetCov( 11, tParamSrc.GetCov(11) );
771         tParamDst.SetCov( 12, tParamSrc.GetCov(12) );
772         tParamDst.SetCov( 13, tParamSrc.GetCov(13) );
773         tParamDst.SetCov( 14, tParamSrc.GetCov(14) );
774         tParamDst.SetX( tParamSrc.GetX() );
775         tParamDst.SetY( tParamSrc.GetY() );
776         tParamDst.SetZ( tParamSrc.GetZ() );
777 }
778
779 GPUdi() int AliHLTTPCCATrackletConstructor::FetchTracklet(AliHLTTPCCATracker &tracker, AliHLTTPCCASharedMemory &sMem, int Reverse, int RowBlock, int &mustInit)
780 {
781         //Fetch a new trackled to be processed by this thread
782         __syncthreads();
783         int nextTracketlFirstRun = sMem.fNextTrackletFirstRun;
784         if (threadIdx.x  == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
785         {
786                 sMem.fNTracklets = *tracker.NTracklets();
787                 if (sMem.fNextTrackletFirstRun)
788                 {
789 #ifdef HLTCA_GPU_SCHED_FIXED_START
790                         const int iSlice = tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
791                         const int nSliceBlockOffset = HLTCA_GPU_BLOCK_COUNT * iSlice / tracker.GPUParametersConst()->fGPUnSlices;
792                         const uint2 &nTracklet = tracker.BlockStartingTracklet()[blockIdx.x - nSliceBlockOffset];
793
794                         sMem.fNextTrackletCount = nTracklet.y;
795                         if (sMem.fNextTrackletCount == 0)
796                         {
797                                 sMem.fNextTrackletFirstRun = 0;
798                         }
799                         else
800                         {
801                                 if (tracker.TrackletStartHits()[nTracklet.x].RowIndex() / HLTCA_GPU_SCHED_ROW_STEP != RowBlock)
802                                 {
803                                         sMem.fNextTrackletCount = 0;
804                                 }
805                                 else
806                                 {
807                                         sMem.fNextTrackletFirst = nTracklet.x;
808                                         sMem.fNextTrackletNoDummy = 1;
809                                 }
810                         }
811 #endif //HLTCA_GPU_SCHED_FIXED_START
812                 }
813                 else
814                 {
815                         const int nFetchTracks = CAMath::Max(CAMath::Min((*tracker.RowBlockPos(Reverse, RowBlock)).x - (*tracker.RowBlockPos(Reverse, RowBlock)).y, HLTCA_GPU_THREAD_COUNT - TRACKLET_CONSTRUCTOR_NMEMTHREDS), 0);
816                         sMem.fNextTrackletCount = nFetchTracks;
817                         const int nUseTrack = nFetchTracks ? CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).y, nFetchTracks) : 0;
818                         sMem.fNextTrackletFirst = nUseTrack;
819
820                         const int nFillTracks = CAMath::Min(nFetchTracks, nUseTrack + nFetchTracks - (*((volatile int2*) (tracker.RowBlockPos(Reverse, RowBlock)))).x);
821                         if (nFillTracks > 0)
822                         {
823                                 const int nStartFillTrack = CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).x, nFillTracks);
824                                 if (nFillTracks + nStartFillTrack >= HLTCA_GPU_MAX_TRACKLETS)
825                                 {
826                                         tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_ROWBLOCK_TRACKLET_OVERFLOW;
827                                 }
828                                 for (int i = 0;i < nFillTracks;i++)
829                                 {
830                                         tracker.RowBlockTracklets(Reverse, RowBlock)[(nStartFillTrack + i) % HLTCA_GPU_MAX_TRACKLETS] = -3;     //Dummy filling track
831                                 }
832                         }
833                         sMem.fNextTrackletNoDummy = 0;
834                 }
835         }
836         __syncthreads();
837         mustInit = 0;
838         if (sMem.fNextTrackletCount == 0)
839         {
840                 return(-2);             //No more track in this RowBlock
841         }
842 #if HLTCA_GPU_TRACKLET_CONSTRUCTOR_NMEMTHREDS > 0
843         else if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS)
844         {
845                 return(-1);
846         }
847 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_NMEMTHREDS > 0
848         else if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS >= sMem.fNextTrackletCount)
849         {
850                 return(-1);             //No track in this RowBlock for this thread
851         }
852         else if (nextTracketlFirstRun)
853         {
854                 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS) sMem.fNextTrackletFirstRun = 0;
855                 mustInit = 1;
856                 return(sMem.fNextTrackletFirst + threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS);
857         }
858         else
859         {
860                 const int nTrackPos = sMem.fNextTrackletFirst + threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS;
861                 mustInit = (nTrackPos < tracker.RowBlockPos(Reverse, RowBlock)->w);
862                 volatile int* const ptrTracklet = &tracker.RowBlockTracklets(Reverse, RowBlock)[nTrackPos % HLTCA_GPU_MAX_TRACKLETS];
863                 int nTracklet;
864                 int nTryCount = 0;
865                 while ((nTracklet = *ptrTracklet) == -1)
866                 {
867                         for (int i = 0;i < 20000;i++)
868                                 sMem.fNextTrackletStupidDummy++;
869                         nTryCount++;
870                         if (nTryCount > 30)
871                         {
872                                 tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_SCHEDULE_COLLISION;
873                                 return(-1);
874                         }
875                 };
876                 return(nTracklet);
877         }
878 }
879
880 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU(AliHLTTPCCATracker *pTracker)
881 {
882         //Main Tracklet construction function that calls the scheduled (FetchTracklet) and then Processes the tracklet (mainly UpdataTracklet) and at the end stores the tracklet.
883         //Can also dispatch a tracklet to be rescheduled
884 #ifdef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
885         pTracker[0].BlockStartingTracklet()[0].x = HLTCA_GPU_EMULATION_SINGLE_TRACKLET;
886         pTracker[0].BlockStartingTracklet()[0].y = 1;
887         for (int i = 1;i < HLTCA_GPU_BLOCK_COUNT;i++)
888         {
889                 pTracker[0].BlockStartingTracklet()[i].x = pTracker[0].BlockStartingTracklet()[i].y = 0;
890         }
891 #endif //HLTCA_GPU_EMULATION_SINGLE_TRACKLET
892
893         GPUshared() AliHLTTPCCASharedMemory sMem;
894
895 #ifdef HLTCA_GPU_SCHED_FIXED_START
896         if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
897         {
898                 sMem.fNextTrackletFirstRun = 1;
899         }
900         __syncthreads();
901 #endif //HLTCA_GPU_SCHED_FIXED_START
902
903 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
904         if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
905         {
906                 sMem.fMaxSync = 0;
907         }
908         int threadSync = 0;
909 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
910
911         for (int iReverse = 0;iReverse < 2;iReverse++)
912         {
913                 for (volatile int iRowBlock = 0;iRowBlock < HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1;iRowBlock++)
914                 {
915 #ifdef HLTCA_GPU_SCHED_FIXED_SLICE
916                         int iSlice = pTracker[0].GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % pTracker[0].GPUParametersConst()->fGPUnSlices != 0 && pTracker[0].GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
917 #else
918                         for (int iSlice = 0;iSlice < pTracker[0].GPUParametersConst()->fGPUnSlices;iSlice++)
919 #endif //HLTCA_GPU_SCHED_FIXED_SLICE
920                         {
921                                 AliHLTTPCCATracker &tracker = pTracker[iSlice];
922                                 if (sMem.fNextTrackletFirstRun && iSlice != tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT)
923                                 {
924                                         continue;
925                                 }
926                                 /*if (!sMem.fNextTrackletFirstRun && tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->x <= tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->y)
927                                 {
928                                         continue;
929                                 }*/
930                                 int sharedRowsInitialized = 0;
931
932                                 int iTracklet;
933                                 int mustInit;
934                                 while ((iTracklet = FetchTracklet(tracker, sMem, iReverse, iRowBlock, mustInit)) != -2)
935                                 {
936 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
937                                         CAMath::AtomicMax(&sMem.fMaxSync, threadSync);
938                                         __syncthreads();
939                                         threadSync = CAMath::Min(sMem.fMaxSync, 100000000 / blockDim.x / gridDim.x);
940 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
941 #ifndef HLTCA_GPU_PREFETCHDATA
942                                         if (!sharedRowsInitialized)
943                                         {
944                                                 for (int i = threadIdx.x;i < HLTCA_ROW_COUNT * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
945                                                 {
946                                                         reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
947                                                 }
948                                                 sharedRowsInitialized = 1;
949                                         }
950 #endif //!HLTCA_GPU_PREFETCHDATA
951 #ifdef HLTCA_GPU_RESCHED
952                                         short2 storeToRowBlock;
953                                         int storePosition = 0;
954                                         if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
955                                         {
956                                                 const int nReverse = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
957                                                 const int nRowBlock = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
958                                                 sMem.fTrackletStoreCount[nReverse][nRowBlock] = 0;
959                                         }
960 #endif //HLTCA_GPU_RESCHED
961                                         __syncthreads();
962                                         AliHLTTPCCATrackParam tParam;
963                                         AliHLTTPCCAThreadMemory rMem;
964
965                                         rMem.fCurrentData = 0;
966
967 #ifdef HLTCA_GPU_EMULATION_DEBUG_TRACKLET
968                                         if (iTracklet == HLTCA_GPU_EMULATION_DEBUG_TRACKLET)
969                                         {
970                                                 tracker.GPUParameters()->fGPUError = 1;
971                                         }
972 #endif //HLTCA_GPU_EMULATION_DEBUG_TRACKLET
973                                         AliHLTTPCCAThreadMemory &rMemGlobal = tracker.GPUTrackletTemp()[iTracklet].fThreadMem;
974                                         AliHLTTPCCATrackParam &tParamGlobal = tracker.GPUTrackletTemp()[iTracklet].fParam;
975                                         if (mustInit)
976                                         {
977                                                 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
978
979                                                 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
980                                                 rMem.fCurrIH = id.HitIndex();
981                                                 rMem.fStage = 0;
982                                                 rMem.fNHits = 0;
983                                                 rMem.fNMissed = 0;
984
985                                                 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
986                                         }
987                                         else if (iTracklet >= 0)
988                                         {
989                                                 CopyTrackletTempData( rMemGlobal, rMem, tParamGlobal, tParam );
990                                         }
991 #ifdef HLTCA_GPU_PREFETCHDATA
992                                         else if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS)
993                                         {
994                                                 ReadData(threadIdx.x, sMem, rMem, tracker, iReverse ? (HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP) : (CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP)));
995                                         }
996 #endif //HLTCA_GPU_PREFETCHDATA
997                                         rMem.fItr = iTracklet;
998                                         rMem.fGo = (iTracklet >= 0);
999
1000 #ifdef HLTCA_GPU_RESCHED
1001                                         storeToRowBlock.x = iRowBlock + 1;
1002                                         storeToRowBlock.y = iReverse;
1003 #ifdef HLTCA_GPU_PREFETCHDATA
1004                                         rMem.fCurrentData ^= 1;
1005                                         __syncthreads();
1006 #endif //HLTCA_GPU_PREFETCHDATA
1007                                         if (iReverse)
1008                                         {
1009                                                 for (int j = HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP;j >= CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP);j--)
1010                                                 {
1011 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1012                                                         if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && !(j >= rMem.fEndRow || ( j >= rMem.fStartRow && j - rMem.fStartRow % 2 == 0)))
1013                                                                 pTracker[0].StageAtSync()[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
1014 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1015 #ifdef HLTCA_GPU_PREFETCHDATA
1016                                                         if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS && j > CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP))
1017                                                         {
1018                                                                 ReadData(threadIdx.x, sMem, rMem, tracker, j - 1);
1019                                                         }
1020                                                         else
1021 #endif //HLTCA_GPU_PREFETCHDATA
1022                                                         if (iTracklet >= 0)
1023                                                         {
1024                                                                 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1025                                                                 if (rMem.fNMissed > kMaxRowGap && j <= rMem.fStartRow)
1026                                                                 {
1027                                                                         rMem.fGo = 0;
1028 #ifndef HLTCA_GPU_PREFETCHDATA
1029                                                                         break;
1030 #endif //!HLTCA_GPU_PREFETCHDATA
1031                                                                 }
1032                                                         }
1033 #ifdef HLTCA_GPU_PREFETCHDATA
1034                                                         __syncthreads();
1035                                                         rMem.fCurrentData ^= 1;
1036 #endif //HLTCA_GPU_PREFETCHDATA
1037                                                 }
1038                                                         
1039                                                 if (iTracklet >= 0 && (!rMem.fGo || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
1040                                                 {
1041                                                         StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1042                                                 }
1043                                         }
1044                                         else
1045                                         {
1046                                                 for (int j = CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP);j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT);j++)
1047                                                 {
1048 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1049                                                         if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && j >= rMem.fStartRow && (rMem.fStage > 0 || rMem.fCurrIH >= 0 || (j - rMem.fStartRow) % 2 == 0 ))
1050                                                                 pTracker[0].StageAtSync()[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
1051 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1052 #ifdef HLTCA_GPU_PREFETCHDATA
1053                                                         if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS && j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT) - 1)
1054                                                         {
1055                                                                 ReadData(threadIdx.x, sMem, rMem, tracker, j + 1);
1056                                                         }
1057                                                         else
1058 #endif  //HLTCA_GPU_PREFETCHDATA
1059                                                         if (iTracklet >= 0)
1060                                                         {
1061                                                                 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1062 #ifndef HLTCA_GPU_PREFETCHDATA
1063                                                                 //if (rMem.fNMissed > kMaxRowGap || rMem.fGo == 0) break;       //DR!!! CUDA Crashes with this enabled
1064 #endif //!HLTCA_GPU_PREFETCHDATA
1065                                                         }
1066 #ifdef HLTCA_GPU_PREFETCHDATA
1067                                                         __syncthreads();
1068                                                         rMem.fCurrentData ^= 1;
1069 #endif //HLTCA_GPU_PREFETCHDATA
1070                                                 }
1071                                                 if (rMem.fGo && (rMem.fNMissed > kMaxRowGap || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
1072                                                 {
1073 #if defined(HLTCA_GPU_PREFETCHDATA)
1074                                                         if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) )
1075 #else
1076                                                         if ( !tParam.TransportToX( sMem.fRows[ rMem.fEndRow ].X(), tracker.Param().ConstBz(), .999 ) )
1077 #endif //HLTCA_GPU_PREFETCHDATA
1078                                                         {
1079                                                                 rMem.fGo = 0;
1080                                                         }
1081                                                         else
1082                                                         {
1083                                                                 storeToRowBlock.x = (HLTCA_ROW_COUNT - rMem.fEndRow) / HLTCA_GPU_SCHED_ROW_STEP;
1084                                                                 storeToRowBlock.y = 1;
1085                                                                 rMem.fNMissed = 0;
1086                                                                 rMem.fStage = 2;
1087                                                         }
1088                                                 }
1089
1090                                                 if (iTracklet >= 0 && !rMem.fGo)
1091                                                 {
1092                                                         StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1093                                                 }
1094                                         }
1095
1096                                         if (rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
1097                                         {
1098                                                 CopyTrackletTempData( rMem, rMemGlobal, tParam, tParamGlobal );
1099                                                 storePosition = CAMath::AtomicAdd(&sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x], 1);
1100                                         }
1101 #else
1102                                         if (threadIdx.x % HLTCA_GPU_WARP_SIZE == 0)
1103                                         {
1104                                                 sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 160;
1105                                                 sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 0;
1106                                         }
1107                                         __syncthreads();
1108                                         if (iTracklet >= 0)
1109                                         {
1110                                                 CAMath::AtomicMin(&sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fStartRow);
1111                                         }
1112                                         __syncthreads();
1113                                         if (iTracklet >= 0)
1114                                         {
1115                                                 for (int j = sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE];j < HLTCA_ROW_COUNT;j++)
1116                                                 {
1117                                                         UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1118                                                         if (!rMem.fGo) break;
1119                                                 }
1120
1121                                                 rMem.fNMissed = 0;
1122                                                 rMem.fStage = 2;
1123                                                 if ( rMem.fGo )
1124                                                 {
1125                                                         if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) )  rMem.fGo = 0;
1126                                                 }
1127                                                 CAMath::AtomicMax(&sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fEndRow);
1128                                         }
1129
1130                                         __syncthreads();
1131                                         if (iTracklet >= 0)
1132                                         {
1133                                                 for (int j = rMem.fEndRow;j >= 0;j--)
1134                                                 {
1135                                                         if (!rMem.fGo) break;
1136                                                         UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1137                                                 }
1138
1139                                                 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1140                                         }
1141 #endif //HLTCA_GPU_RESCHED
1142
1143 #ifdef HLTCA_GPU_RESCHED
1144                                         __syncthreads();
1145                                         if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
1146                                         {
1147                                                 const int nReverse = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
1148                                                 const int nRowBlock = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
1149                                                 if (sMem.fTrackletStoreCount[nReverse][nRowBlock])
1150                                                 {
1151                                                         sMem.fTrackletStoreCount[nReverse][nRowBlock] = CAMath::AtomicAdd(&tracker.RowBlockPos(nReverse, nRowBlock)->x, sMem.fTrackletStoreCount[nReverse][nRowBlock]);
1152                                                 }
1153                                         }
1154                                         __syncthreads();
1155                                         if (iTracklet >= 0 && rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
1156                                         {
1157                                                 tracker.RowBlockTracklets(storeToRowBlock.y, storeToRowBlock.x)[sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x] + storePosition] = iTracklet;
1158                                         }
1159                                         __syncthreads();
1160 #endif //HLTCA_GPU_RESCHED
1161                                 }
1162                         }
1163                 }
1164         }
1165 }
1166
1167 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(int iTracklet, AliHLTTPCCATracker &tracker)
1168 {
1169         //Initialize Row Blocks
1170
1171 #ifndef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
1172 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1173 #ifdef HLTCA_GPU_SCHED_FIXED_START
1174         const int firstDynamicTracklet = tracker.GPUParameters()->fScheduleFirstDynamicTracklet;
1175         if (iTracklet >= firstDynamicTracklet)
1176 #endif //HLTCA_GPU_SCHED_FIXED_START
1177         {
1178 #ifdef HLTCA_GPU_SCHED_FIXED_START
1179                 const int firstTrackletInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[CAMath::Max(id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP * HLTCA_GPU_SCHED_ROW_STEP, 1)].z);
1180 #else
1181                 const int firstTrackletInRowBlock = tracker.RowStartHitCountOffset()[CAMath::Max(id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP * HLTCA_GPU_SCHED_ROW_STEP, 1)].z;
1182 #endif //HLTCA_GPU_SCHED_FIXED_START
1183
1184                 if (iTracklet == firstTrackletInRowBlock)
1185                 {
1186                         const int firstRowInNextBlock = (id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP + 1) * HLTCA_GPU_SCHED_ROW_STEP;
1187                         int trackletsInRowBlock;
1188                         if (firstRowInNextBlock >= HLTCA_ROW_COUNT - 3)
1189                                 trackletsInRowBlock = *tracker.NTracklets() - firstTrackletInRowBlock;
1190                         else
1191 #ifdef HLTCA_GPU_SCHED_FIXED_START
1192                                 trackletsInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[firstRowInNextBlock].z) - firstTrackletInRowBlock;
1193 #else
1194                                 trackletsInRowBlock = tracker.RowStartHitCountOffset()[firstRowInNextBlock].z - firstTrackletInRowBlock;
1195 #endif //HLTCA_GPU_SCHED_FIXED_START
1196
1197                         tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->x = trackletsInRowBlock;
1198                         tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->w = trackletsInRowBlock;
1199                 }
1200                 tracker.RowBlockTracklets(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)[iTracklet - firstTrackletInRowBlock] = iTracklet;
1201         }
1202 #endif //!HLTCA_GPU_EMULATION_SINGLE_TRACKLET
1203 }
1204
1205 GPUg() void AliHLTTPCCATrackletConstructorInit(int iSlice)
1206 {
1207         //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit
1208         AliHLTTPCCATracker &tracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker )[iSlice];
1209         int i = blockIdx.x * blockDim.x + threadIdx.x;
1210         if (i >= *tracker.NTracklets()) return;
1211         AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(i, tracker);
1212 }
1213
1214 GPUg() void AliHLTTPCCATrackletConstructorGPU()
1215 {
1216         //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU
1217         AliHLTTPCCATracker *pTracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker );
1218         AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU(pTracker);
1219 }
1220
1221 GPUg() void AliHLTTPCCATrackletConstructorGPUPP(int firstSlice, int sliceCount)
1222 {
1223         if (blockIdx.x >= sliceCount) return;
1224         AliHLTTPCCATracker *pTracker = &( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker )[firstSlice + blockIdx.x];
1225         AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPUPP(pTracker);
1226 }
1227
1228 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPUPP(AliHLTTPCCATracker *tracker)
1229 {
1230         GPUshared() AliHLTTPCCASharedMemory sMem;
1231         sMem.fNTracklets = *tracker->NTracklets();
1232
1233         for (int i = threadIdx.x;i < HLTCA_ROW_COUNT * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
1234         {
1235                 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker->SliceDataRows())[i];
1236         }
1237
1238         for (int iTracklet = threadIdx.x;iTracklet < (*tracker->NTracklets() / HLTCA_GPU_THREAD_COUNT + 1) * HLTCA_GPU_THREAD_COUNT;iTracklet += blockDim.x)
1239         {
1240                 AliHLTTPCCATrackParam tParam;
1241                 AliHLTTPCCAThreadMemory rMem;
1242                 
1243                 if (iTracklet < *tracker->NTracklets())
1244                 {
1245                         AliHLTTPCCAHitId id = tracker->TrackletTmpStartHits()[iTracklet];
1246
1247                         rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
1248                         rMem.fCurrIH = id.HitIndex();
1249                         rMem.fStage = 0;
1250                         rMem.fNHits = 0;
1251                         rMem.fNMissed = 0;
1252
1253                         AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
1254
1255                         rMem.fItr = iTracklet;
1256                         rMem.fGo = 1;
1257                 }
1258
1259                 if (threadIdx.x % HLTCA_GPU_WARP_SIZE == 0)
1260                 {
1261                         sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 160;
1262                         sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE] = 0;
1263                 }
1264                 __syncthreads();
1265                 if (iTracklet < *tracker->NTracklets())
1266                 {
1267                         CAMath::AtomicMin(&sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fStartRow);
1268                 }
1269                 __syncthreads();
1270                 if (iTracklet < *tracker->NTracklets())
1271                 {
1272                         for (int j = sMem.fStartRows[threadIdx.x / HLTCA_GPU_WARP_SIZE];j < HLTCA_ROW_COUNT;j++)
1273                         {
1274                                 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, *tracker, tParam, j);
1275                                 if (!rMem.fGo) break;
1276                         }
1277
1278                         rMem.fNMissed = 0;
1279                         rMem.fStage = 2;
1280                         if ( rMem.fGo )
1281                         {
1282                                 if ( !tParam.TransportToX( tracker->Row( rMem.fEndRow ).X(), tracker->Param().ConstBz(), .999 ) )  rMem.fGo = 0;
1283                         }
1284                         CAMath::AtomicMax(&sMem.fEndRows[threadIdx.x / HLTCA_GPU_WARP_SIZE], rMem.fEndRow);
1285                 }
1286
1287                 __syncthreads();
1288                 if (iTracklet < *tracker->NTracklets())
1289                 {
1290                         for (int j = rMem.fEndRow;j >= 0;j--)
1291                         {
1292                                 if (!rMem.fGo) break;
1293                                 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, *tracker, tParam, j);
1294                         }
1295                         StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, *tracker, tParam );
1296                 }
1297         }
1298 }
1299
1300 #else //HLTCA_GPUCODE
1301
1302 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorCPU(AliHLTTPCCATracker &tracker)
1303 {
1304         //Tracklet constructor simple CPU Function that does not neew a scheduler
1305         GPUshared() AliHLTTPCCASharedMemory sMem;
1306         sMem.fNTracklets = *tracker.NTracklets();
1307         for (int iTracklet = 0;iTracklet < *tracker.NTracklets();iTracklet++)
1308         {
1309                 AliHLTTPCCATrackParam tParam;
1310                 AliHLTTPCCAThreadMemory rMem;
1311                 
1312                 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1313
1314                 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
1315                 rMem.fCurrIH = id.HitIndex();
1316                 rMem.fStage = 0;
1317                 rMem.fNHits = 0;
1318                 rMem.fNMissed = 0;
1319
1320                 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
1321
1322                 rMem.fItr = iTracklet;
1323                 rMem.fGo = 1;
1324
1325                 for (int j = rMem.fStartRow;j < tracker.Param().NRows();j++)
1326                 {
1327                         UpdateTracklet(1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
1328                         if (!rMem.fGo) break;
1329                 }
1330
1331                 rMem.fNMissed = 0;
1332                 rMem.fStage = 2;
1333                 if ( rMem.fGo )
1334                 {
1335                         if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) ) rMem.fGo = 0;
1336                 }
1337
1338                 for (int j = rMem.fEndRow;j >= 0;j--)
1339                 {
1340                         if (!rMem.fGo) break;
1341                         UpdateTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
1342                 }
1343
1344                 StoreTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam );
1345         }
1346 }
1347 #endif //HLTCA_GPUCODE