]> git.uio.no Git - u/mrichter/AliRoot.git/blob - HLT/TPCLib/tracking-ca/AliHLTTPCCATrackletConstructor.cxx
cosmetic
[u/mrichter/AliRoot.git] / HLT / TPCLib / tracking-ca / AliHLTTPCCATrackletConstructor.cxx
1 // @(#) $Id: AliHLTTPCCATrackletConstructor.cxx 27042 2008-07-02 12:06:02Z richterm $
2 // **************************************************************************
3 // This file is property of and copyright by the ALICE HLT Project          *
4 // ALICE Experiment at CERN, All rights reserved.                           *
5 //                                                                          *
6 // Primary Authors: Sergey Gorbunov <sergey.gorbunov@kip.uni-heidelberg.de> *
7 //                  Ivan Kisel <kisel@kip.uni-heidelberg.de>                *
8 //                  for The ALICE HLT Project.                              *
9 //                                                                          *
10 // Permission to use, copy, modify and distribute this software and its     *
11 // documentation strictly for non-commercial purposes is hereby granted     *
12 // without fee, provided that the above copyright notice appears in all     *
13 // copies and that both the copyright notice and this permission notice     *
14 // appear in the supporting documentation. The authors make no claims       *
15 // about the suitability of this software for any purpose. It is            *
16 // provided "as is" without express or implied warranty.                    *
17 //                                                                          *
18 //***************************************************************************
19
20 #include "AliHLTTPCCATracker.h"
21 #include "AliHLTTPCCATrackParam.h"
22 #include "AliHLTTPCCATrackParam.h"
23 #include "AliHLTTPCCAGrid.h"
24 #include "AliHLTTPCCAMath.h"
25 #include "AliHLTTPCCADef.h"
26 #include "AliHLTTPCCATracklet.h"
27 #include "AliHLTTPCCATrackletConstructor.h"
28 #include "MemoryAssignmentHelpers.h"
29
30 //#include "AliHLTTPCCAPerformance.h"
31 //#include "TH1D.h"
32
33 //#define DRAW
34
35 #ifdef DRAW
36 #include "AliHLTTPCCADisplay.h"
37 #endif
38
39 #define kMaxRowGap 4
40
41 GPUd() void AliHLTTPCCATrackletConstructor::InitTracklet( AliHLTTPCCATrackParam &tParam )
42 {
43         //Initialize Tracklet Parameters using default values
44   tParam.SetSinPhi( 0 );
45   tParam.SetDzDs( 0 );
46   tParam.SetQPt( 0 );
47   tParam.SetSignCosPhi( 1 );
48   tParam.SetChi2( 0 );
49   tParam.SetNDF( -3 );
50   tParam.SetCov( 0, 1 );
51   tParam.SetCov( 1, 0 );
52   tParam.SetCov( 2, 1 );
53   tParam.SetCov( 3, 0 );
54   tParam.SetCov( 4, 0 );
55   tParam.SetCov( 5, 1 );
56   tParam.SetCov( 6, 0 );
57   tParam.SetCov( 7, 0 );
58   tParam.SetCov( 8, 0 );
59   tParam.SetCov( 9, 1 );
60   tParam.SetCov( 10, 0 );
61   tParam.SetCov( 11, 0 );
62   tParam.SetCov( 12, 0 );
63   tParam.SetCov( 13, 0 );
64   tParam.SetCov( 14, 10. );
65 }
66
67 GPUd() void AliHLTTPCCATrackletConstructor::ReadData
68 #ifndef HLTCA_GPU_PREFETCHDATA
69 ( int /*iThread*/, AliHLTTPCCASharedMemory& /*s*/, AliHLTTPCCAThreadMemory& /*r*/, AliHLTTPCCATracker& /*tracker*/, int /*iRow*/ )
70 {
71         //Prefetch Data to shared memory
72 #else
73 ( int iThread, AliHLTTPCCASharedMemory& s, AliHLTTPCCAThreadMemory& r, AliHLTTPCCATracker& tracker, int iRow )
74 {
75   // reconstruction of tracklets, read data step
76     const AliHLTTPCCARow &row = tracker.Row( iRow );
77     //bool jr = !r.fCurrentData;
78
79     // copy hits, grid content and links
80
81     // FIXME: inefficient copy
82     //const int numberOfHitsAligned = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits());
83
84 /*      
85 #ifdef HLTCA_GPU_REORDERHITDATA
86     ushort2 *sMem1 = reinterpret_cast<ushort2 *>( s.fData[jr] );
87     for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
88       sMem1[i].x = tracker.HitDataY( row, i );
89       sMem1[i].y = tracker.HitDataZ( row, i );
90     }
91 #else
92     ushort_v *sMem1 = reinterpret_cast<ushort_v *>( s.fData[jr] );
93     for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
94       sMem1[i] = tracker.HitDataY( row, i );
95     }
96
97     ushort_v *sMem1a = reinterpret_cast<ushort_v *>( s.fData[jr] ) + numberOfHitsAligned;
98     for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
99       sMem1a[i] = tracker.HitDataZ( row, i );
100     }
101 #endif
102
103     short *sMem2 = reinterpret_cast<short *>( s.fData[jr] ) + 2 * numberOfHitsAligned;
104     for ( int i = iThread; i < numberOfHitsAligned; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
105       sMem2[i] = tracker.HitLinkUpData( row, i );
106     }
107         
108     unsigned short *sMem3 = reinterpret_cast<unsigned short *>( s.fData[jr] ) + 3 * numberOfHitsAligned;
109     const int n = row.FullSize(); // + grid content size
110     for ( int i = iThread; i < n; i += TRACKLET_CONSTRUCTOR_NMEMTHREDS ) {
111       sMem3[i] = tracker.FirstHitInBin( row, i );
112     }*/
113
114         /*for (int k = 0;k < 2;k++)
115         {
116                 HLTCA_GPU_ROWCOPY* sharedMem;
117                 const HLTCA_GPU_ROWCOPY* sourceMem;
118                 int copyCount;
119                 switch (k)
120                 {
121                 case 0:
122                         sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitDataY(row) );
123                         sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
124                         copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
125                         break;
126                 case 1:
127                         sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitDataZ(row) );
128                         sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
129                         copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
130                         break;
131                 case 2:
132                         sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.HitLinkUpData(row) );
133                         sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
134                         copyCount = numberOfHitsAligned * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
135                         break;
136                 case 1:
137                         sourceMem = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.FirstHitInBin(row) );
138                         sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[jr] ) + k * numberOfHitsAligned);
139                         copyCount = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.FullSize()) * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);
140                         break;
141                 }
142                 for (int i = iThread;i < copyCount;i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
143                 {
144                         sharedMem[i] = sourceMem[i];
145                 }
146         }*/
147
148         for (unsigned int i = iThread;i < row.FullSize() * sizeof(ushort_v) / sizeof(HLTCA_GPU_ROWCOPY);i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
149         {
150                 reinterpret_cast<HLTCA_GPU_ROWCOPY *> (reinterpret_cast<ushort_v *>( s.fData[!r.fCurrentData] ))[i] = reinterpret_cast<const HLTCA_GPU_ROWCOPY *>( tracker.FirstHitInBin(row) )[i];
151         }
152
153         const HLTCA_GPU_ROWCOPY* const sourceMem = (const HLTCA_GPU_ROWCOPY *) &row;
154         HLTCA_GPU_ROWCOPY* const sharedMem = reinterpret_cast<HLTCA_GPU_ROWCOPY *> ( &s.fRow[!r.fCurrentData] );
155         for (unsigned int i = iThread;i < sizeof(AliHLTTPCCARow) / sizeof(HLTCA_GPU_ROWCOPY);i += TRACKLET_CONSTRUCTOR_NMEMTHREDS)
156         {
157                 sharedMem[i] = sourceMem[i];
158         }
159 #endif
160 }
161
162
163 GPUd() void AliHLTTPCCATrackletConstructor::StoreTracklet
164 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
165   AliHLTTPCCASharedMemory
166 #ifdef HLTCA_GPUCODE
167   &s
168 #else
169   &/*s*/
170 #endif  
171   , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam )
172 {
173   // reconstruction of tracklets, tracklet store step
174
175   //AliHLTTPCCAPerformance::Instance().HNHitsPerTrackCand()->Fill(r.fNHits);
176
177   do {
178     {
179         //std::cout<<"tracklet to store: "<<r.fItr<<", nhits = "<<r.fNHits<<std::endl;
180     }
181
182     if ( r.fNHits < TRACKLET_SELECTOR_MIN_HITS ) {
183       r.fNHits = 0;
184       break;
185     }
186
187     if ( 0 ) {
188       if ( 1. / .5 < CAMath::Abs( tParam.QPt() ) ) { //SG!!!
189         r.fNHits = 0;
190         break;
191       }
192     }
193
194     {
195       bool ok = 1;
196       const float *c = tParam.Cov();
197       for ( int i = 0; i < 15; i++ ) ok = ok && CAMath::Finite( c[i] );
198       for ( int i = 0; i < 5; i++ ) ok = ok && CAMath::Finite( tParam.Par()[i] );
199       ok = ok && ( tParam.X() > 50 );
200
201       if ( c[0] <= 0 || c[2] <= 0 || c[5] <= 0 || c[9] <= 0 || c[14] <= 0 ) ok = 0;
202
203       if ( !ok ) {
204         r.fNHits = 0;
205         break;
206       }
207     }
208   } while ( 0 );
209
210   if ( !SAVE() ) return;
211
212   AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
213
214   tracklet.SetNHits( r.fNHits );
215
216   if ( r.fNHits > 0 ) {
217 #ifdef DRAW
218     if ( 0 ) {
219       std::cout << "store tracklet " << r.fItr << ", nhits = " << r.fNHits << std::endl;
220       if ( AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 1. ) ) {
221         AliHLTTPCCADisplay::Instance().Ask();
222       }
223     }
224 #endif
225     if ( CAMath::Abs( tParam.Par()[4] ) < 1.e-4 ) tParam.SetPar( 4, 1.e-4 );
226         if (r.fStartRow < r.fFirstRow) r.fFirstRow = r.fStartRow;
227         tracklet.SetFirstRow( r.fFirstRow );
228     tracklet.SetLastRow( r.fLastRow );
229     tracklet.SetParam( tParam );
230     int w = ( r.fNHits << 16 ) + r.fItr;
231     for ( int iRow = r.fFirstRow; iRow <= r.fLastRow; iRow++ ) {
232 #ifdef EXTERN_ROW_HITS
233       int ih = tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr];
234 #else
235           int ih = tracklet.RowHit( iRow );
236 #endif
237       if ( ih >= 0 ) {
238 #if defined(HLTCA_GPUCODE) & !defined(HLTCA_GPU_PREFETCHDATA) & !defined(HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY)
239             tracker.MaximizeHitWeight( s.fRows[ iRow ], ih, w );
240 #else
241             tracker.MaximizeHitWeight( tracker.Row( iRow ), ih, w );
242 #endif
243       }
244     }
245   }
246
247 }
248
249 GPUd() void AliHLTTPCCATrackletConstructor::UpdateTracklet
250 ( int /*nBlocks*/, int /*nThreads*/, int /*iBlock*/, int /*iThread*/,
251   AliHLTTPCCASharedMemory 
252 #ifdef HLTCA_GPUCODE
253   &s
254 #else
255   &/*s*/
256 #endif
257   , AliHLTTPCCAThreadMemory &r, AliHLTTPCCATracker &tracker, AliHLTTPCCATrackParam &tParam, int iRow )
258 {
259   // reconstruction of tracklets, tracklets update step
260
261   //std::cout<<"Update tracklet: "<<r.fItr<<" "<<r.fGo<<" "<<r.fStage<<" "<<iRow<<std::endl;
262   bool drawSearch = 0;//r.fItr==2;
263   bool drawFit = 0;//r.fItr==2;
264   bool drawFitted = drawFit ;//|| 1;//r.fItr==16;
265
266   if ( !r.fGo ) return;
267
268 #ifndef EXTERN_ROW_HITS
269   AliHLTTPCCATracklet &tracklet = tracker.Tracklets()[r.fItr];
270 #endif
271
272 #ifdef HLTCA_GPU_PREFETCHDATA
273   const AliHLTTPCCARow &row = s.fRow[r.fCurrentData];
274 #elif defined(HLTCA_GPUCODE)
275   const AliHLTTPCCARow &row = s.fRows[iRow];
276 #else
277   const AliHLTTPCCARow &row = tracker.Row( iRow );
278 #endif
279
280   float y0 = row.Grid().YMin();
281   float stepY = row.HstepY();
282   float z0 = row.Grid().ZMin();
283   float stepZ = row.HstepZ();
284   float stepYi = row.HstepYi();
285   float stepZi = row.HstepZi();
286
287   if ( r.fStage == 0 ) { // fitting part
288     do {
289
290       if ( iRow < r.fStartRow || r.fCurrIH < 0  ) break;
291
292       if ( ( iRow - r.fStartRow ) % 2 != 0 )
293           {
294 #ifndef EXTERN_ROW_HITS
295                   tracklet.SetRowHit(iRow, -1);
296 #else
297                   tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
298 #endif
299                   break; // SG!!! - jump over the row
300           }
301
302 //#ifdef HLTCA_GPU_PREFETCHDATA
303 //      uint4 *tmpint4 = s.fData[r.fCurrentData];
304 //#endif
305           ushort2 hh;
306 //#ifdef HLTCA_GPU_REORDERHITDATA
307 //      hh = reinterpret_cast<ushort2*>( tmpint4 )[r.fCurrIH];
308 //#else
309 //#ifdef HLTCA_GPU_PREFETCHDATA
310 //        hh.x = reinterpret_cast<ushort_v*>( tmpint4 )[r.fCurrIH];
311 //        hh.y = reinterpret_cast<ushort_v*>( tmpint4 )[NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits()) + r.fCurrIH];
312 //#else
313 #if defined(HLTCA_GPU_TEXTURE_FETCH)
314           hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + r.fCurrIH);
315 #else
316           hh = tracker.HitData(row)[r.fCurrIH];
317 #endif
318 //#endif
319 //#endif
320
321       int oldIH = r.fCurrIH;
322 //#ifdef HLTCA_GPU_PREFETCHDATA
323 //      r.fCurrIH = reinterpret_cast<short*>( tmpint4 )[2 * NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits()) + r.fCurrIH]; // read from linkup data
324 //#else
325 #if defined(HLTCA_GPU_TEXTURE_FETCH)
326           r.fCurrIH = tex1Dfetch(gAliTexRefs, ((char*) tracker.Data().HitLinkUpData(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + r.fCurrIH);
327 #else
328           r.fCurrIH = tracker.HitLinkUpData(row)[r.fCurrIH]; // read from linkup data
329 #endif
330 //#endif
331
332       float x = row.X();
333       float y = y0 + hh.x * stepY;
334       float z = z0 + hh.y * stepZ;
335 #ifdef DRAW
336       if ( drawFit ) std::cout << " fit tracklet: new hit " << oldIH << ", xyz=" << x << " " << y << " " << z << std::endl;
337 #endif
338
339       if ( iRow == r.fStartRow ) {
340         tParam.SetX( x );
341         tParam.SetY( y );
342         tParam.SetZ( z );
343         r.fLastY = y;
344         r.fLastZ = z;
345         #ifdef DRAW
346         if ( drawFit ) std::cout << " fit tracklet " << r.fItr << ", row " << iRow << " first row" << std::endl;
347         #endif
348       } else {
349
350         float err2Y, err2Z;
351         float dx = x - tParam.X();
352         float dy = y - r.fLastY;//tParam.Y();
353         float dz = z - r.fLastZ;//tParam.Z();
354         r.fLastY = y;
355         r.fLastZ = z;
356
357         float ri = 1. / CAMath::Sqrt( dx * dx + dy * dy );
358         if ( iRow == r.fStartRow + 2 ) { //SG!!! important - thanks to Matthias
359           tParam.SetSinPhi( dy*ri );
360           tParam.SetSignCosPhi( dx );
361           tParam.SetDzDs( dz*ri );
362           //std::cout << "Init. errors... " << r.fItr << std::endl;
363           tracker.GetErrors2( iRow, tParam, err2Y, err2Z );
364           //std::cout << "Init. errors = " << err2Y << " " << err2Z << std::endl;
365           tParam.SetCov( 0, err2Y );
366           tParam.SetCov( 2, err2Z );
367         }
368         if ( drawFit ) {
369           #ifdef DRAW
370           std::cout << " fit tracklet " << r.fItr << ", row " << iRow << " transporting.." << std::endl;
371           std::cout << " params before transport=" << std::endl;
372           tParam.Print();
373           #endif
374         }
375         float sinPhi, cosPhi;
376         if ( r.fNHits >= 10 && CAMath::Abs( tParam.SinPhi() ) < .99 ) {
377           sinPhi = tParam.SinPhi();
378           cosPhi = CAMath::Sqrt( 1 - sinPhi * sinPhi );
379         } else {
380           sinPhi = dy * ri;
381           cosPhi = dx * ri;
382         }
383         #ifdef DRAW
384         if ( drawFit ) std::cout << "sinPhi0 = " << sinPhi << ", cosPhi0 = " << cosPhi << std::endl;
385         #endif
386         if ( !tParam.TransportToX( x, sinPhi, cosPhi, tracker.Param().ConstBz(), -1 ) ) {
387           #ifdef DRAW
388           if ( drawFit ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not transport!!" << std::endl;
389                   #endif
390 #ifndef EXTERN_ROW_HITS
391           tracklet.SetRowHit( iRow, -1 );
392 #else
393                   tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
394 #endif
395           break;
396         }
397         //std::cout<<"mark1 "<<r.fItr<<std::endl;
398         //tParam.Print();
399         tracker.GetErrors2( iRow, tParam.GetZ(), sinPhi, cosPhi, tParam.GetDzDs(), err2Y, err2Z );
400         //std::cout<<"mark2"<<std::endl;
401
402         if ( drawFit ) {
403           #ifdef DRAW
404           std::cout << " params after transport=" << std::endl;
405           tParam.Print();
406           std::cout << "fit tracklet before filter: " << r.fItr << ", row " << iRow << " errs=" << err2Y << " " << err2Z << std::endl;
407           AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 2., 1 );
408           AliHLTTPCCADisplay::Instance().Ask();
409                   #endif
410         }
411         if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
412           #ifdef DRAW
413           if ( drawFit ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not filter!!" << std::endl;
414           #endif
415 #ifndef EXTERN_ROW_HITS
416           tracklet.SetRowHit( iRow, -1 );
417 #else
418                   tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
419 #endif
420           break;
421         }
422       }
423 #ifndef EXTERN_ROW_HITS
424       tracklet.SetRowHit( iRow, oldIH );
425 #else
426           tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = oldIH;
427 #endif
428       if ( drawFit ) {
429         #ifdef DRAW
430         std::cout << "fit tracklet after filter " << r.fItr << ", row " << iRow << std::endl;
431         tParam.Print();
432         AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kGreen, 2. );
433         AliHLTTPCCADisplay::Instance().Ask();
434                 #endif
435       }
436       r.fNHits++;
437       r.fLastRow = iRow;
438       r.fEndRow = iRow;
439       break;
440     } while ( 0 );
441
442     if ( r.fCurrIH < 0 ) {
443       #ifdef DRAW
444       if ( drawFitted ) std::cout << "fitted tracklet " << r.fItr << ", nhits=" << r.fNHits << std::endl;
445       #endif
446       r.fStage = 1;
447       //AliHLTTPCCAPerformance::Instance().HNHitsPerSeed()->Fill(r.fNHits);
448       if ( r.fNHits < 3 ) { r.fNHits = 0; r.fGo = 0;}//SG!!!
449       if ( CAMath::Abs( tParam.SinPhi() ) > .999 ) {
450         #ifdef DRAW
451         if ( drawFitted ) std::cout << " fitted tracklet  error: sinPhi=" << tParam.SinPhi() << std::endl;
452         #endif
453         r.fNHits = 0; r.fGo = 0;
454       } else {
455         //tParam.SetCosPhi( CAMath::Sqrt(1-tParam.SinPhi()*tParam.SinPhi()) );
456       }
457       if ( drawFitted ) {
458         #ifdef DRAW
459         std::cout << "fitted tracklet " << r.fItr << " miss=" << r.fNMissed << " go=" << r.fGo << std::endl;
460         tParam.Print();
461         AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue );
462         AliHLTTPCCADisplay::Instance().Ask();
463                 #endif
464       }
465     }
466   } else { // forward/backward searching part
467     do {
468       if ( drawSearch ) {
469         #ifdef DRAW
470         std::cout << "search tracklet " << r.fItr << " row " << iRow << " miss=" << r.fNMissed << " go=" << r.fGo << " stage=" << r.fStage << std::endl;
471         #endif
472       }
473
474       if ( r.fStage == 2 && ( ( iRow >= r.fEndRow ) ||
475                               ( iRow >= r.fStartRow && ( iRow - r.fStartRow ) % 2 == 0 )
476                             ) ) break;
477       if ( r.fNMissed > kMaxRowGap  ) {
478         break;
479       }
480
481       r.fNMissed++;
482
483       float x = row.X();
484       float err2Y, err2Z;
485       if ( drawSearch ) {
486         #ifdef DRAW
487         std::cout << "tracklet " << r.fItr << " before transport to row " << iRow << " : " << std::endl;
488         tParam.Print();
489         #endif
490       }
491       if ( !tParam.TransportToX( x, tParam.SinPhi(), tParam.GetCosPhi(), tracker.Param().ConstBz(), .99 ) ) {
492         #ifdef DRAW
493         if ( drawSearch ) std::cout << " tracklet " << r.fItr << ", row " << iRow << ": can not transport!!" << std::endl;
494         #endif
495 #ifndef EXTERN_ROW_HITS
496                 tracklet.SetRowHit(iRow, -1);
497 #else
498                 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
499 #endif
500         break;
501       }
502       if ( row.NHits() < 1 ) {
503         // skip empty row
504 #ifndef EXTERN_ROW_HITS
505                   tracklet.SetRowHit(iRow, -1);
506 #else
507                   tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
508 #endif
509         break;
510       }
511       if ( drawSearch ) {
512                 #ifdef DRAW
513         std::cout << "tracklet " << r.fItr << " after transport to row " << iRow << " : " << std::endl;
514         tParam.Print();
515         AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kBlue, 2., 1 );
516         AliHLTTPCCADisplay::Instance().Ask();
517                 #endif
518       }
519 #ifdef HLTCA_GPU_PREFETCHDATA
520       uint4 *tmpint4 = s.fData[r.fCurrentData];
521 #endif
522
523 //#ifdef HLTCA_GPU_REORDERHITDATA
524 //      const ushort2 *hits = reinterpret_cast<ushort2*>( tmpint4 );
525 //#else
526 //#ifdef HLTCA_GPU_PREFETCHDATA
527 //        const ushort_v *hitsx = reinterpret_cast<ushort_v*>( tmpint4 );
528 //        const ushort_v *hitsy = reinterpret_cast<ushort_v*>( tmpint4 ) + NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.NHits());
529 //#else
530 #ifndef HLTCA_GPU_TEXTURE_FETCH
531           const ushort2 *hits = tracker.HitData(row);
532 #endif
533 //#endif
534 //#endif
535
536       float fY = tParam.GetY();
537       float fZ = tParam.GetZ();
538       int best = -1;
539
540       { // search for the closest hit
541         const int fIndYmin = row.Grid().GetBinBounded( fY - 1.f, fZ - 1.f );
542         assert( fIndYmin >= 0 );
543
544         int ds;
545         int fY0 = ( int ) ( ( fY - y0 ) * stepYi );
546         int fZ0 = ( int ) ( ( fZ - z0 ) * stepZi );
547         int ds0 = ( ( ( int )1 ) << 30 );
548         ds = ds0;
549
550         unsigned int fHitYfst = 1, fHitYlst = 0, fHitYfst1 = 1, fHitYlst1 = 0;
551
552         if ( drawSearch ) {
553 #ifdef DRAW
554           std::cout << " tracklet " << r.fItr << ", row " << iRow << ": grid N=" << row.Grid().N() << std::endl;
555           std::cout << " tracklet " << r.fItr << ", row " << iRow << ": minbin=" << fIndYmin << std::endl;
556 #endif
557         }
558         {
559           int nY = row.Grid().Ny();
560
561 //#ifdef HLTCA_GPU_PREFETCHDATA
562 //                const unsigned short *sGridP = ( reinterpret_cast<unsigned short*>( tmpint4 ) );
563 //#else
564 #ifndef HLTCA_GPU_TEXTURE_FETCH
565                   const unsigned short *sGridP = tracker.FirstHitInBin(row);
566 #endif
567 //#endif
568
569 #ifdef HLTCA_GPU_TEXTURE_FETCH
570                   fHitYfst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin);
571                   fHitYlst = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+2);
572                   fHitYfst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY);
573                   fHitYlst1 = tex1Dfetch(gAliTexRefu, ((char*) tracker.Data().FirstHitInBin(row) - tracker.Data().GPUTextureBase()) / sizeof(unsigned short) + fIndYmin+nY+2);
574 #else
575           fHitYfst = sGridP[fIndYmin];
576           fHitYlst = sGridP[fIndYmin+2];
577           fHitYfst1 = sGridP[fIndYmin+nY];
578           fHitYlst1 = sGridP[fIndYmin+nY+2];
579 #endif
580           assert( (signed) fHitYfst <= row.NHits() );
581           assert( (signed) fHitYlst <= row.NHits() );
582           assert( (signed) fHitYfst1 <= row.NHits() );
583           assert( (signed) fHitYlst1 <= row.NHits() );
584           if ( drawSearch ) {
585 #ifdef DRAW
586             std::cout << " Grid, row " << iRow << ": nHits=" << row.NHits() << ", grid n=" << row.Grid().N() << ", c[n]=" << sGridP[row.Grid().N()] << std::endl;
587             std::cout << "hit steps = " << stepY << " " << stepZ << std::endl;
588             std::cout << " Grid bins:" << std::endl;
589             for ( unsigned int i = 0; i < row.Grid().N(); i++ ) {
590               std::cout << " bin " << i << ": ";
591               for ( int j = sGridP[i]; j < sGridP[i+1]; j++ ) {
592                 ushort2 hh = hits[j];
593                 float y = y0 + hh.x * stepY;
594                 float z = z0 + hh.y * stepZ;
595                 std::cout << "[" << j << "|" << y << "," << z << "] ";
596               }
597               std::cout << std::endl;
598             }
599 #endif
600           }
601 #ifdef DRAW
602           if ( sGridP[row.Grid().N()] != row.NHits() ) {
603             std::cout << " grid, row " << iRow << ": nHits=" << row.NHits() << ", grid n=" << row.Grid().N() << ", c[n]=" << sGridP[row.Grid().N()] << std::endl;
604             //exit(0);
605           }
606 #endif
607         }
608 #ifdef DRAW
609         if ( drawSearch ) {
610           std::cout << " tracklet " << r.fItr << ", row " << iRow << ", yz= " << fY << "," << fZ << ": search hits=" << fHitYfst << " " << fHitYlst << " / " << fHitYfst1 << " " << fHitYlst1 << std::endl;
611           std::cout << " hit search :" << std::endl;
612         }
613 #endif
614         for ( unsigned int fIh = fHitYfst; fIh < fHitYlst; fIh++ ) {
615           assert( (signed) fIh < row.NHits() );
616           ushort2 hh;
617 #if defined(HLTCA_GPU_TEXTURE_FETCH)
618                  hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
619 #else
620                   hh = hits[fIh];
621 #endif
622           int ddy = ( int )( hh.x ) - fY0;
623           int ddz = ( int )( hh.y ) - fZ0;
624           int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
625           if ( drawSearch ) {
626             #ifdef DRAW
627             std::cout << fIh << ": hityz= " << hh.x << " " << hh.y << "(" << hh.x*stepY << " " << hh.y*stepZ << "), trackyz=" << fY0 << " " << fZ0 << "(" << fY0*stepY << " " << fZ0*stepZ << "), dy,dz,ds= " << ddy << " " << ddz << " " << dds << "(" << ddy*stepY << " " << ddz*stepZ << std::endl;
628             #endif
629           }
630           if ( dds < ds ) {
631             ds = dds;
632             best = fIh;
633           }
634         }
635
636                 for ( unsigned int fIh = fHitYfst1; fIh < fHitYlst1; fIh++ ) {
637           ushort2 hh;
638 #if defined(HLTCA_GPU_TEXTURE_FETCH)
639                   hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + fIh);
640 #else
641                   hh = hits[fIh];
642 #endif
643           int ddy = ( int )( hh.x ) - fY0;
644           int ddz = ( int )( hh.y ) - fZ0;
645           int dds = CAMath::Abs( ddy ) + CAMath::Abs( ddz );
646           if ( drawSearch ) {
647             #ifdef DRAW
648             std::cout << fIh << ": hityz= " << hh.x << " " << hh.y << "(" << hh.x*stepY << " " << hh.y*stepZ << "), trackyz=" << fY0 << " " << fZ0 << "(" << fY0*stepY << " " << fZ0*stepZ << "), dy,dz,ds= " << ddy << " " << ddz << " " << dds << "(" << ddy*stepY << " " << ddz*stepZ << std::endl;
649             #endif
650           }
651           if ( dds < ds ) {
652             ds = dds;
653             best = fIh;
654           }
655         }
656       }// end of search for the closest hit
657
658       if ( best < 0 )
659           {
660 #ifndef EXTERN_ROW_HITS
661                   tracklet.SetRowHit(iRow, -1);
662 #else
663                   tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
664 #endif
665                   break;
666           }
667       if ( drawSearch ) {
668         #ifdef DRAW
669         std::cout << "hit search " << r.fItr << ", row " << iRow << " hit " << best << " found" << std::endl;
670         AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best, kRed, 1. );
671         AliHLTTPCCADisplay::Instance().Ask();
672         AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best, kWhite, 1 );
673         AliHLTTPCCADisplay::Instance().DrawSliceHit( iRow, best );
674                 #endif
675       }
676
677       ushort2 hh;
678 #if defined(HLTCA_GPU_TEXTURE_FETCH)
679                  hh = tex1Dfetch(gAliTexRefu2, ((char*) tracker.Data().HitData() - tracker.Data().GPUTextureBase()) / sizeof(ushort2) + row.HitNumberOffset() + best);
680 #else
681                   hh = hits[best];
682 #endif
683
684       //std::cout<<"mark 3, "<<r.fItr<<std::endl;
685       //tParam.Print();
686       tracker.GetErrors2( iRow, *( ( AliHLTTPCCATrackParam* )&tParam ), err2Y, err2Z );
687       //std::cout<<"mark 4"<<std::endl;
688
689       float y = y0 + hh.x * stepY;
690       float z = z0 + hh.y * stepZ;
691       float dy = y - fY;
692       float dz = z - fZ;
693
694       const float kFactor = tracker.Param().HitPickUpFactor() * tracker.Param().HitPickUpFactor() * 3.5 * 3.5;
695       float sy2 = kFactor * ( tParam.GetErr2Y() +  err2Y );
696       float sz2 = kFactor * ( tParam.GetErr2Z() +  err2Z );
697       if ( sy2 > 2. ) sy2 = 2.;
698       if ( sz2 > 2. ) sz2 = 2.;
699
700       if ( drawSearch ) {
701         #ifdef DRAW
702         std::cout << "dy,sy= " << dy << " " << CAMath::Sqrt( sy2 ) << ", dz,sz= " << dz << " " << CAMath::Sqrt( sz2 ) << std::endl;
703         std::cout << "dy,dz= " << dy << " " << dz << ", sy,sz= " << CAMath::Sqrt( sy2 ) << " " << CAMath::Sqrt( sz2 ) << ", sy,sz= " << CAMath::Sqrt( kFactor*( tParam.GetErr2Y() +  err2Y ) ) << " " << CAMath::Sqrt( kFactor*( tParam.GetErr2Z() +  err2Z ) ) << std::endl;
704         #endif
705       }
706       if ( CAMath::FMulRZ( dy, dy ) > sy2 || CAMath::FMulRZ( dz, dz ) > sz2  ) {
707         if ( drawSearch ) {
708           #ifdef DRAW
709           std::cout << "found hit is out of the chi2 window\n " << std::endl;
710           #endif
711         }
712 #ifndef EXTERN_ROW_HITS
713                 tracklet.SetRowHit(iRow, -1);
714 #else
715                 tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = -1;
716 #endif
717         break;
718       }
719 #ifdef DRAW
720       //if( SAVE() ) hitstore[ iRow ] = best;
721       //std::cout<<"hit search before filter: "<<r.fItr<<", row "<<iRow<<std::endl;
722       //AliHLTTPCCADisplay::Instance().DrawTracklet(tParam, hitstore, kBlue);
723       //AliHLTTPCCADisplay::Instance().Ask();
724 #endif
725       if ( !tParam.Filter( y, z, err2Y, err2Z, .99 ) ) {
726         if ( drawSearch ) {
727           #ifdef DRAW
728           std::cout << "tracklet " << r.fItr << " at row " << iRow << " : can not filter!!!! " << std::endl;
729           #endif
730         }
731         break;
732       }
733 #ifndef EXTERN_ROW_HITS
734           tracklet.SetRowHit( iRow, best );
735 #else
736           tracker.TrackletRowHits()[iRow * s.fNTracklets + r.fItr] = best;
737 #endif
738       if ( drawSearch ) {
739         #ifdef DRAW
740         std::cout << "tracklet " << r.fItr << " after filter at row " << iRow << " : " << std::endl;
741         tParam.Print();
742         AliHLTTPCCADisplay::Instance().DrawTracklet( tParam, hitstore, kRed );
743         AliHLTTPCCADisplay::Instance().Ask();
744                 #endif
745       }
746       r.fNHits++;
747       r.fNMissed = 0;
748       if ( r.fStage == 1 ) r.fLastRow = iRow;
749       else r.fFirstRow = iRow;
750     } while ( 0 );
751   }
752 }
753
754 #ifdef HLTCA_GPUCODE
755 GPUd() void AliHLTTPCCATrackletConstructor::CopyTrackletTempData( AliHLTTPCCAThreadMemory &rMemSrc, AliHLTTPCCAThreadMemory &rMemDst, AliHLTTPCCATrackParam &tParamSrc, AliHLTTPCCATrackParam &tParamDst)
756 {
757         //Copy Temporary Tracklet data from registers to global mem and vice versa
758         rMemDst.fStartRow = rMemSrc.fStartRow;
759         rMemDst.fEndRow = rMemSrc.fEndRow;
760         rMemDst.fFirstRow = rMemSrc.fFirstRow;
761         rMemDst.fLastRow = rMemSrc.fLastRow;
762         rMemDst.fCurrIH =  rMemSrc.fCurrIH;
763         rMemDst.fGo = rMemSrc.fGo;
764         rMemDst.fStage = rMemSrc.fStage;
765         rMemDst.fNHits = rMemSrc.fNHits;
766         rMemDst.fNMissed = rMemSrc.fNMissed;
767         rMemDst.fLastY = rMemSrc.fLastY;
768         rMemDst.fLastZ = rMemSrc.fLastZ;
769
770         tParamDst.SetSinPhi( tParamSrc.GetSinPhi() );
771         tParamDst.SetDzDs( tParamSrc.GetDzDs() );
772         tParamDst.SetQPt( tParamSrc.GetQPt() );
773         tParamDst.SetSignCosPhi( tParamSrc.GetSignCosPhi() );
774         tParamDst.SetChi2( tParamSrc.GetChi2() );
775         tParamDst.SetNDF( tParamSrc.GetNDF() );
776         tParamDst.SetCov( 0, tParamSrc.GetCov(0) );
777         tParamDst.SetCov( 1, tParamSrc.GetCov(1) );
778         tParamDst.SetCov( 2, tParamSrc.GetCov(2) );
779         tParamDst.SetCov( 3, tParamSrc.GetCov(3) );
780         tParamDst.SetCov( 4, tParamSrc.GetCov(4) );
781         tParamDst.SetCov( 5, tParamSrc.GetCov(5) );
782         tParamDst.SetCov( 6, tParamSrc.GetCov(6) );
783         tParamDst.SetCov( 7, tParamSrc.GetCov(7) );
784         tParamDst.SetCov( 8, tParamSrc.GetCov(8) );
785         tParamDst.SetCov( 9, tParamSrc.GetCov(9) );
786         tParamDst.SetCov( 10, tParamSrc.GetCov(10) );
787         tParamDst.SetCov( 11, tParamSrc.GetCov(11) );
788         tParamDst.SetCov( 12, tParamSrc.GetCov(12) );
789         tParamDst.SetCov( 13, tParamSrc.GetCov(13) );
790         tParamDst.SetCov( 14, tParamSrc.GetCov(14) );
791         tParamDst.SetX( tParamSrc.GetX() );
792         tParamDst.SetY( tParamSrc.GetY() );
793         tParamDst.SetZ( tParamSrc.GetZ() );
794 }
795
796 GPUd() int AliHLTTPCCATrackletConstructor::FetchTracklet(AliHLTTPCCATracker &tracker, AliHLTTPCCASharedMemory &sMem, int Reverse, int RowBlock, int &mustInit)
797 {
798         //Fetch a new trackled to be processed by this thread
799         __syncthreads();
800         int nextTracketlFirstRun = sMem.fNextTrackletFirstRun;
801         if (threadIdx.x  == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
802         {
803                 sMem.fNTracklets = *tracker.NTracklets();
804                 if (sMem.fNextTrackletFirstRun)
805                 {
806 #ifdef HLTCA_GPU_SCHED_FIXED_START
807                         const int iSlice = tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
808                         const int nSliceBlockOffset = HLTCA_GPU_BLOCK_COUNT * iSlice / tracker.GPUParametersConst()->fGPUnSlices;
809                         const uint2 &nTracklet = tracker.BlockStartingTracklet()[blockIdx.x - nSliceBlockOffset];
810
811                         sMem.fNextTrackletCount = nTracklet.y;
812                         if (sMem.fNextTrackletCount == 0)
813                         {
814                                 sMem.fNextTrackletFirstRun = 0;
815                         }
816                         else
817                         {
818                                 if (tracker.TrackletStartHits()[nTracklet.x].RowIndex() / HLTCA_GPU_SCHED_ROW_STEP != RowBlock)
819                                 {
820                                         sMem.fNextTrackletCount = 0;
821                                 }
822                                 else
823                                 {
824                                         sMem.fNextTrackletFirst = nTracklet.x;
825                                         sMem.fNextTrackletNoDummy = 1;
826                                 }
827                         }
828 #endif
829                 }
830                 else
831                 {
832                         const int nFetchTracks = CAMath::Max(CAMath::Min((*tracker.RowBlockPos(Reverse, RowBlock)).x - (*tracker.RowBlockPos(Reverse, RowBlock)).y, HLTCA_GPU_THREAD_COUNT - TRACKLET_CONSTRUCTOR_NMEMTHREDS), 0);
833                         sMem.fNextTrackletCount = nFetchTracks;
834                         const int nUseTrack = nFetchTracks ? CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).y, nFetchTracks) : 0;
835                         sMem.fNextTrackletFirst = nUseTrack;
836
837                         const int nFillTracks = CAMath::Min(nFetchTracks, nUseTrack + nFetchTracks - (*((volatile int2*) (tracker.RowBlockPos(Reverse, RowBlock)))).x);
838                         if (nFillTracks > 0)
839                         {
840                                 const int nStartFillTrack = CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).x, nFillTracks);
841                                 if (nFillTracks + nStartFillTrack >= HLTCA_GPU_MAX_TRACKLETS)
842                                 {
843                                         tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_ROWBLOCK_TRACKLET_OVERFLOW;
844                                 }
845                                 for (int i = 0;i < nFillTracks;i++)
846                                 {
847                                         tracker.RowBlockTracklets(Reverse, RowBlock)[(nStartFillTrack + i) % HLTCA_GPU_MAX_TRACKLETS] = -3;     //Dummy filling track
848                                 }
849                         }
850                         sMem.fNextTrackletNoDummy = 0;
851                 }
852         }
853         __syncthreads();
854         mustInit = 0;
855         if (sMem.fNextTrackletCount == 0)
856         {
857                 return(-2);             //No more track in this RowBlock
858         }
859 #if HLTCA_GPU_TRACKLET_CONSTRUCTOR_NMEMTHREDS > 0
860         else if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS)
861         {
862                 return(-1);
863         }
864 #endif
865         else if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS >= sMem.fNextTrackletCount)
866         {
867                 return(-1);             //No track in this RowBlock for this thread
868         }
869         else if (nextTracketlFirstRun)
870         {
871                 if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS) sMem.fNextTrackletFirstRun = 0;
872                 mustInit = 1;
873                 return(sMem.fNextTrackletFirst + threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS);
874         }
875         else
876         {
877                 const int nTrackPos = sMem.fNextTrackletFirst + threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS;
878                 mustInit = (nTrackPos < tracker.RowBlockPos(Reverse, RowBlock)->w);
879                 volatile int* const ptrTracklet = &tracker.RowBlockTracklets(Reverse, RowBlock)[nTrackPos % HLTCA_GPU_MAX_TRACKLETS];
880                 int nTracklet;
881                 int nTryCount = 0;
882                 while ((nTracklet = *ptrTracklet) == -1)
883                 {
884                         for (int i = 0;i < 10000;i++)
885                                 sMem.fNextTrackletStupidDummy++;
886                         nTryCount++;
887                         if (nTryCount > 20)
888                         {
889                                 tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_SCHEDULE_COLLISION;
890                                 return(-1);
891                         }
892                 };
893                 return(nTracklet);
894         }
895 }
896
897 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewGPU(AliHLTTPCCATracker *pTracker)
898 {
899         //Main Tracklet construction function that calls the scheduled (FetchTracklet) and then Processes the tracklet (mainly UpdataTracklet) and at the end stores the tracklet.
900         //Can also dispatch a tracklet to be rescheduled
901 #ifdef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
902         pTracker[0].BlockStartingTracklet()[0].x = HLTCA_GPU_EMULATION_SINGLE_TRACKLET;
903         pTracker[0].BlockStartingTracklet()[0].y = 1;
904         for (int i = 1;i < HLTCA_GPU_BLOCK_COUNT;i++)
905         {
906                 pTracker[0].BlockStartingTracklet()[i].x = pTracker[0].BlockStartingTracklet()[i].y = 0;
907         }
908 #endif
909
910         GPUshared() AliHLTTPCCASharedMemory sMem;
911
912 #ifdef HLTCA_GPU_SCHED_FIXED_START
913         if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
914         {
915                 sMem.fNextTrackletFirstRun = 1;
916         }
917         __syncthreads();
918 #endif
919
920 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
921         if (threadIdx.x == TRACKLET_CONSTRUCTOR_NMEMTHREDS)
922         {
923                 sMem.fMaxSync = 0;
924         }
925         int threadSync = 0;
926 #endif
927
928         for (int iReverse = 0;iReverse < 2;iReverse++)
929         {
930                 for (int iRowBlock = 0;iRowBlock < HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1;iRowBlock++)
931                 {
932 #ifdef HLTCA_GPU_SCHED_FIXED_SLICE
933                         int iSlice = tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT;
934 #else
935                         for (int iSlice = 0;iSlice < pTracker[0].GPUParametersConst()->fGPUnSlices;iSlice++)
936 #endif
937                         {
938                                 AliHLTTPCCATracker &tracker = pTracker[iSlice];
939                                 if (sMem.fNextTrackletFirstRun && iSlice != tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + (HLTCA_GPU_BLOCK_COUNT % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (blockIdx.x + 1) % HLTCA_GPU_BLOCK_COUNT != 0)) / HLTCA_GPU_BLOCK_COUNT)
940                                 {
941                                         continue;
942                                 }
943                                 /*if (!sMem.fNextTrackletFirstRun && tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->x <= tracker.RowBlockPos(1, HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP)->y)
944                                 {
945                                         continue;
946                                 }*/
947                                 int sharedRowsInitialized = 0;
948
949                                 int iTracklet;
950                                 int mustInit;
951                                 while ((iTracklet = FetchTracklet(tracker, sMem, iReverse, iRowBlock, mustInit)) != -2)
952                                 {
953 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
954                                         CAMath::AtomicMax(&sMem.fMaxSync, threadSync);
955                                         __syncthreads();
956                                         threadSync = CAMath::Min(sMem.fMaxSync, 100000000 / blockDim.x / gridDim.x);
957 #endif
958 #ifndef HLTCA_GPU_PREFETCHDATA
959                                         if (!sharedRowsInitialized)
960                                         {
961 #ifdef HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY
962                                                 if (iReverse)
963                                                 {
964                                                         for (int i = CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP) * sizeof(AliHLTTPCCARow) / sizeof(int) + threadIdx.x;i < (HLTCA_ROW_COUNT - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP) * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
965                                                         {
966                                                                 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
967                                                         }
968                                                 }
969                                                 else
970                                                 {
971                                                         for (int i = CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP) * sizeof(AliHLTTPCCARow) / sizeof(int) + threadIdx.x;i < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT) * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
972                                                         {
973                                                                 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
974                                                         }
975                                                 }
976 #else
977                                                 for (int i = threadIdx.x;i < HLTCA_ROW_COUNT * sizeof(AliHLTTPCCARow) / sizeof(int);i += blockDim.x)
978                                                 {
979                                                         reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
980                                                 }
981 #endif
982                                                 sharedRowsInitialized = 1;
983                                         }
984 #endif
985 #ifdef HLTCA_GPU_RESCHED
986                                         short2 storeToRowBlock;
987                                         int storePosition = 0;
988                                         if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
989                                         {
990                                                 const int nReverse = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
991                                                 const int nRowBlock = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
992                                                 sMem.fTrackletStoreCount[nReverse][nRowBlock] = 0;
993                                         }
994 #endif
995                                         __syncthreads();
996                                         AliHLTTPCCATrackParam tParam;
997                                         AliHLTTPCCAThreadMemory rMem;
998
999                                         rMem.fCurrentData = 0;
1000
1001 #ifdef HLTCA_GPU_EMULATION_DEBUG_TRACKLET
1002                                         if (iTracklet == HLTCA_GPU_EMULATION_DEBUG_TRACKLET)
1003                                         {
1004                                                 tracker.GPUParameters()->fGPUError = 1;
1005                                         }
1006 #endif
1007                                         AliHLTTPCCAThreadMemory &rMemGlobal = tracker.GPUTrackletTemp()[iTracklet].fThreadMem;
1008                                         AliHLTTPCCATrackParam &tParamGlobal = tracker.GPUTrackletTemp()[iTracklet].fParam;
1009                                         if (mustInit)
1010                                         {
1011                                                 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1012
1013                                                 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
1014                                                 rMem.fCurrIH = id.HitIndex();
1015                                                 rMem.fStage = 0;
1016                                                 rMem.fNHits = 0;
1017                                                 rMem.fNMissed = 0;
1018
1019                                                 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
1020                                         }
1021                                         else if (iTracklet >= 0)
1022                                         {
1023                                                 CopyTrackletTempData( rMemGlobal, rMem, tParamGlobal, tParam );
1024                                         }
1025 #ifdef HLTCA_GPU_PREFETCHDATA
1026                                         else if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS)
1027                                         {
1028                                                 ReadData(threadIdx.x, sMem, rMem, tracker, iReverse ? (HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP) : (CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP)));
1029                                         }
1030 #endif
1031                                         rMem.fItr = iTracklet;
1032                                         rMem.fGo = (iTracklet >= 0);
1033
1034 #ifdef HLTCA_GPU_RESCHED
1035                                         storeToRowBlock.x = iRowBlock + 1;
1036                                         storeToRowBlock.y = iReverse;
1037 #ifdef HLTCA_GPU_PREFETCHDATA
1038                                         rMem.fCurrentData ^= 1;
1039                                         __syncthreads();
1040 #endif
1041                                         if (iReverse)
1042                                         {
1043                                                 for (int j = HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP;j >= CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP);j--)
1044                                                 {
1045 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1046                                                         if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && !(j >= rMem.fEndRow || ( j >= rMem.fStartRow && j - rMem.fStartRow % 2 == 0)))
1047                                                                 pTracker[0].fStageAtSync[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
1048 #endif
1049 #ifdef HLTCA_GPU_PREFETCHDATA
1050                                                         if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS && j > CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP))
1051                                                         {
1052                                                                 ReadData(threadIdx.x, sMem, rMem, tracker, j - 1);
1053                                                         }
1054                                                         else
1055 #endif
1056                                                         if (iTracklet >= 0)
1057                                                         {
1058                                                                 UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1059                                                                 if (rMem.fNMissed > kMaxRowGap)
1060                                                                 {
1061                                                                         rMem.fGo = 0;
1062 #ifndef HLTCA_GPU_PREFETCHDATA
1063                                                                         break;
1064 #endif
1065                                                                 }
1066                                                         }
1067 #ifdef HLTCA_GPU_PREFETCHDATA
1068                                                         __syncthreads();
1069                                                         rMem.fCurrentData ^= 1;
1070 #endif
1071                                                 }
1072                                                         
1073                                                 if (iTracklet >= 0 && (!rMem.fGo || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
1074                                                 {
1075                                                         StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1076                                                 }
1077                                         }
1078                                         else
1079                                         {
1080                                                 for (int j = CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP);j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT);j++)
1081                                                 {
1082 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1083                                                         if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && j >= rMem.fStartRow && (rMem.fStage > 0 || rMem.fCurrIH >= 0 || (j - rMem.fStartRow) % 2 == 0 ))
1084                                                                 pTracker[0].fStageAtSync[threadSync++ * blockDim.x * gridDim.x + blockIdx.x * blockDim.x + threadIdx.x] = rMem.fStage + 1;
1085 #endif
1086 #ifdef HLTCA_GPU_PREFETCHDATA
1087                                                         if (threadIdx.x < TRACKLET_CONSTRUCTOR_NMEMTHREDS && j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT) - 1)
1088                                                         {
1089                                                                 ReadData(threadIdx.x, sMem, rMem, tracker, j + 1);
1090                                                         }
1091                                                         else
1092 #endif  
1093                                                         if (iTracklet >= 0)
1094                                                         {
1095                                                                 UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1096 #ifndef HLTCA_GPU_PREFETCHDATA
1097                                                                 //if (rMem.fNMissed > kMaxRowGap || rMem.fGo == 0) break;       //DR!!! CUDA Crashes with this enabled
1098 #endif
1099                                                         }
1100 #ifdef HLTCA_GPU_PREFETCHDATA
1101                                                         __syncthreads();
1102                                                         rMem.fCurrentData ^= 1;
1103 #endif
1104                                                 }
1105                                                 if (rMem.fGo && (rMem.fNMissed > kMaxRowGap || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
1106                                                 {
1107 #if defined(HLTCA_GPU_PREFETCHDATA) | !defined(HLTCA_GPU_PREFETCH_ROWBLOCK_ONLY)
1108                                                         if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) )
1109 #else
1110                                                         if ( !tParam.TransportToX( sMem.fRows[ rMem.fEndRow ].X(), tracker.Param().ConstBz(), .999 ) )
1111 #endif
1112                                                         {
1113                                                                 rMem.fGo = 0;
1114                                                         }
1115                                                         else
1116                                                         {
1117                                                                 storeToRowBlock.x = (HLTCA_ROW_COUNT - rMem.fEndRow) / HLTCA_GPU_SCHED_ROW_STEP;
1118                                                                 storeToRowBlock.y = 1;
1119                                                                 rMem.fNMissed = 0;
1120                                                                 rMem.fStage = 2;
1121                                                         }
1122                                                 }
1123
1124                                                 if (iTracklet >= 0 && !rMem.fGo)
1125                                                 {
1126                                                         StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1127                                                 }
1128                                         }
1129
1130                                         if (rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
1131                                         {
1132                                                 CopyTrackletTempData( rMem, rMemGlobal, tParam, tParamGlobal );
1133                                                 storePosition = CAMath::AtomicAdd(&sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x], 1);
1134                                         }
1135 #else
1136                                         if (iTracklet >= 0)
1137                                         {
1138                                                 for (int j = rMem.fStartRow;j < HLTCA_ROW_COUNT;j++)
1139                                                 {
1140                                                         UpdateTracklet(gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1141                                                         if (!rMem.fGo) break;
1142                                                 }
1143
1144                                                 rMem.fNMissed = 0;
1145                                                 rMem.fStage = 2;
1146                                                 if ( rMem.fGo )
1147                                                 {
1148                                                         if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) )  rMem.fGo = 0;
1149                                                 }
1150
1151                                                 for (int j = rMem.fEndRow;j >= 0;j--)
1152                                                 {
1153                                                         if (!rMem.fGo) break;
1154                                                         UpdateTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam, j);
1155                                                 }
1156
1157                                                 StoreTracklet( gridDim.x, blockDim.x, blockIdx.x, threadIdx.x, sMem, rMem, tracker, tParam );
1158                                         }
1159 #endif
1160 #ifdef HLTCA_GPU_RESCHED
1161                                         __syncthreads();
1162                                         if (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
1163                                         {
1164                                                 const int nReverse = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
1165                                                 const int nRowBlock = (threadIdx.x - TRACKLET_CONSTRUCTOR_NMEMTHREDS) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
1166                                                 if (sMem.fTrackletStoreCount[nReverse][nRowBlock])
1167                                                 {
1168                                                         sMem.fTrackletStoreCount[nReverse][nRowBlock] = CAMath::AtomicAdd(&tracker.RowBlockPos(nReverse, nRowBlock)->x, sMem.fTrackletStoreCount[nReverse][nRowBlock]);
1169                                                 }
1170                                         }
1171                                         __syncthreads();
1172                                         if (iTracklet >= 0 && rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
1173                                         {
1174                                                 tracker.RowBlockTracklets(storeToRowBlock.y, storeToRowBlock.x)[sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x] + storePosition] = iTracklet;
1175                                         }
1176                                         __syncthreads();
1177 #endif
1178                                 }
1179                         }
1180                 }
1181         }
1182 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
1183
1184 #endif
1185 }
1186
1187 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(int iTracklet, AliHLTTPCCATracker &tracker)
1188 {
1189         //Initialize Row Blocks
1190
1191 #ifndef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
1192 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1193 #ifdef HLTCA_GPU_SCHED_FIXED_START
1194         const int firstDynamicTracklet = tracker.GPUParameters()->fScheduleFirstDynamicTracklet;
1195         if (iTracklet >= firstDynamicTracklet)
1196 #endif
1197         {
1198                 const int firstTrackletInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[CAMath::Max(id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP * HLTCA_GPU_SCHED_ROW_STEP, 1)].z);
1199                 if (iTracklet == firstTrackletInRowBlock)
1200                 {
1201                         const int firstRowInNextBlock = (id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP + 1) * HLTCA_GPU_SCHED_ROW_STEP;
1202                         int trackletsInRowBlock;
1203                         if (firstRowInNextBlock >= HLTCA_ROW_COUNT - 3)
1204                                 trackletsInRowBlock = *tracker.NTracklets() - firstTrackletInRowBlock;
1205                         else
1206                                 trackletsInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[firstRowInNextBlock].z) - firstTrackletInRowBlock;
1207
1208                         tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->x = trackletsInRowBlock;
1209                         tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->w = trackletsInRowBlock;
1210                 }
1211                 tracker.RowBlockTracklets(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)[iTracklet - firstTrackletInRowBlock] = iTracklet;
1212         }
1213 #endif
1214 }
1215
1216 GPUg() void AliHLTTPCCATrackletConstructorInit(int iSlice)
1217 {
1218         //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit
1219         AliHLTTPCCATracker &tracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker )[iSlice];
1220         int i = blockIdx.x * blockDim.x + threadIdx.x;
1221         if (i >= *tracker.NTracklets()) return;
1222         AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(i, tracker);
1223 }
1224
1225 GPUg() void AliHLTTPCCATrackletConstructorNewGPU()
1226 {
1227         //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewGPU
1228         AliHLTTPCCATracker *pTracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker );
1229         AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewGPU(pTracker);
1230 }
1231
1232 #else
1233 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorNewCPU(AliHLTTPCCATracker &tracker)
1234 {
1235         //Tracklet constructor simple CPU Function that does not neew a scheduler
1236         GPUshared() AliHLTTPCCASharedMemory sMem;
1237         sMem.fNTracklets = *tracker.NTracklets();
1238         for (int iTracklet = 0;iTracklet < *tracker.NTracklets();iTracklet++)
1239         {
1240                 AliHLTTPCCATrackParam tParam;
1241                 AliHLTTPCCAThreadMemory rMem;
1242                 
1243                 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
1244
1245                 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
1246                 rMem.fCurrIH = id.HitIndex();
1247                 rMem.fStage = 0;
1248                 rMem.fNHits = 0;
1249                 rMem.fNMissed = 0;
1250
1251                 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
1252
1253                 rMem.fItr = iTracklet;
1254                 rMem.fGo = 1;
1255
1256                 for (int j = rMem.fStartRow;j < tracker.Param().NRows();j++)
1257                 {
1258                         UpdateTracklet(1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
1259                         if (!rMem.fGo) break;
1260                 }
1261
1262                 rMem.fNMissed = 0;
1263                 rMem.fStage = 2;
1264                 if ( rMem.fGo )
1265                 {
1266                         if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) ) rMem.fGo = 0;
1267                 }
1268
1269                 for (int j = rMem.fEndRow;j >= 0;j--)
1270                 {
1271                         if (!rMem.fGo) break;
1272                         UpdateTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam, j);
1273                 }
1274
1275                 StoreTracklet( 1, 1, 0, iTracklet, sMem, rMem, tracker, tParam );
1276         }
1277 }
1278 #endif