]> git.uio.no Git - u/mrichter/AliRoot.git/blob - HLT/TPCLib/tracking-ca/cagpu/AliHLTTPCCATrackletConstructorGPU.h
Update NVIDIA GPU Tracking library to be compatible to AliRoot patch 64473, add preli...
[u/mrichter/AliRoot.git] / HLT / TPCLib / tracking-ca / cagpu / AliHLTTPCCATrackletConstructorGPU.h
1 #include "AliHLTTPCCAGPUConfig.h"
2
3 MEM_TEMPLATE4() GPUdi() void AliHLTTPCCATrackletConstructor::CopyTrackletTempData( MEM_TYPE(AliHLTTPCCAThreadMemory) &rMemSrc, MEM_TYPE2(AliHLTTPCCAThreadMemory) &rMemDst, MEM_TYPE3(AliHLTTPCCATrackParam) &tParamSrc, MEM_TYPE4(AliHLTTPCCATrackParam) &tParamDst)
4 {
5         //Copy Temporary Tracklet data from registers to global mem and vice versa
6         rMemDst.fStartRow = rMemSrc.fStartRow;
7         rMemDst.fEndRow = rMemSrc.fEndRow;
8         rMemDst.fFirstRow = rMemSrc.fFirstRow;
9         rMemDst.fLastRow = rMemSrc.fLastRow;
10         rMemDst.fCurrIH =  rMemSrc.fCurrIH;
11         rMemDst.fGo = rMemSrc.fGo;
12         rMemDst.fStage = rMemSrc.fStage;
13         rMemDst.fNHits = rMemSrc.fNHits;
14         rMemDst.fNMissed = rMemSrc.fNMissed;
15         rMemDst.fLastY = rMemSrc.fLastY;
16         rMemDst.fLastZ = rMemSrc.fLastZ;
17
18 #if defined(HLTCA_GPU_ALTERNATIVE_SCHEDULER) & !defined(HLTCA_GPU_ALTERNATIVE_SCHEDULER_SIMPLE)
19         rMemDst.fItr = rMemSrc.fItr;
20         rMemDst.fIRow = rMemSrc.fIRow;
21         rMemDst.fIRowEnd = rMemSrc.fIRowEnd;
22 #endif
23
24         tParamDst.SetSinPhi( tParamSrc.GetSinPhi() );
25         tParamDst.SetDzDs( tParamSrc.GetDzDs() );
26         tParamDst.SetQPt( tParamSrc.GetQPt() );
27         tParamDst.SetSignCosPhi( tParamSrc.GetSignCosPhi() );
28         tParamDst.SetChi2( tParamSrc.GetChi2() );
29         tParamDst.SetNDF( tParamSrc.GetNDF() );
30         tParamDst.SetCov( 0, tParamSrc.GetCov(0) );
31         tParamDst.SetCov( 1, tParamSrc.GetCov(1) );
32         tParamDst.SetCov( 2, tParamSrc.GetCov(2) );
33         tParamDst.SetCov( 3, tParamSrc.GetCov(3) );
34         tParamDst.SetCov( 4, tParamSrc.GetCov(4) );
35         tParamDst.SetCov( 5, tParamSrc.GetCov(5) );
36         tParamDst.SetCov( 6, tParamSrc.GetCov(6) );
37         tParamDst.SetCov( 7, tParamSrc.GetCov(7) );
38         tParamDst.SetCov( 8, tParamSrc.GetCov(8) );
39         tParamDst.SetCov( 9, tParamSrc.GetCov(9) );
40         tParamDst.SetCov( 10, tParamSrc.GetCov(10) );
41         tParamDst.SetCov( 11, tParamSrc.GetCov(11) );
42         tParamDst.SetCov( 12, tParamSrc.GetCov(12) );
43         tParamDst.SetCov( 13, tParamSrc.GetCov(13) );
44         tParamDst.SetCov( 14, tParamSrc.GetCov(14) );
45         tParamDst.SetX( tParamSrc.GetX() );
46         tParamDst.SetY( tParamSrc.GetY() );
47         tParamDst.SetZ( tParamSrc.GetZ() );
48 }
49
50 #ifndef HLTCA_GPU_ALTERNATIVE_SCHEDULER
51 GPUdi() int AliHLTTPCCATrackletConstructor::FetchTracklet(GPUconstant() MEM_CONSTANT(AliHLTTPCCATracker) &tracker, GPUshared() MEM_LOCAL(AliHLTTPCCASharedMemory) &sMem, int Reverse, int RowBlock, int &mustInit)
52 {
53         //Fetch a new trackled to be processed by this thread
54         GPUsync();
55         int nextTrackletFirstRun = sMem.fNextTrackletFirstRun;
56         if (get_local_id(0) == 0)
57         {
58                 sMem.fNTracklets = *tracker.NTracklets();
59                 if (sMem.fNextTrackletFirstRun)
60                 {
61 #ifdef HLTCA_GPU_SCHED_FIXED_START
62                         const int iSlice = tracker.GPUParametersConst()->fGPUnSlices * (get_group_id(0) + (get_num_groups(0) % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (get_group_id(0) + 1) % get_num_groups(0) != 0)) / get_num_groups(0);
63                         const int nSliceBlockOffset = get_num_groups(0) * iSlice / tracker.GPUParametersConst()->fGPUnSlices;
64                         const uint2 &nTracklet = tracker.BlockStartingTracklet()[get_group_id(0) - nSliceBlockOffset];
65
66                         sMem.fNextTrackletCount = nTracklet.y;
67                         if (sMem.fNextTrackletCount == 0)
68                         {
69                                 sMem.fNextTrackletFirstRun = 0;
70                         }
71                         else
72                         {
73                                 if (tracker.TrackletStartHits()[nTracklet.x].RowIndex() / HLTCA_GPU_SCHED_ROW_STEP != RowBlock)
74                                 {
75                                         sMem.fNextTrackletCount = 0;
76                                 }
77                                 else
78                                 {
79                                         sMem.fNextTrackletFirst = nTracklet.x;
80                                 }
81                         }
82 #endif //HLTCA_GPU_SCHED_FIXED_START
83                 }
84                 else
85                 {
86                         const int4 oldPos = *tracker.RowBlockPos(Reverse, RowBlock);
87                         const int nFetchTracks = CAMath::Max(CAMath::Min(oldPos.x - oldPos.y, HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR), 0);
88                         sMem.fNextTrackletCount = nFetchTracks;
89                         const int nUseTrack = nFetchTracks ? CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).y, nFetchTracks) : 0;
90                         sMem.fNextTrackletFirst = nUseTrack;
91
92                         const int nFillTracks = CAMath::Min(nFetchTracks, nUseTrack + nFetchTracks - (*((volatile int2*) (tracker.RowBlockPos(Reverse, RowBlock)))).x);
93                         if (nFillTracks > 0)
94                         {
95                                 const int nStartFillTrack = CAMath::AtomicAdd(&(*tracker.RowBlockPos(Reverse, RowBlock)).x, nFillTracks);
96                                 if (nFillTracks + nStartFillTrack >= HLTCA_GPU_MAX_TRACKLETS)
97                                 {
98                                         tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_ROWBLOCK_TRACKLET_OVERFLOW;
99                                 }
100                                 for (int i = 0;i < nFillTracks;i++)
101                                 {
102                                         tracker.RowBlockTracklets(Reverse, RowBlock)[(nStartFillTrack + i) % HLTCA_GPU_MAX_TRACKLETS] = -(get_group_id(0) * 1000000 + nFetchTracks * 10000 + oldPos.x * 100 + oldPos.y);        //Dummy filling track
103                                 }
104                         }
105                 }
106         }
107         GPUsync();
108         mustInit = 0;
109         if (sMem.fNextTrackletCount == 0)
110         {
111                 return(-2);             //No more track in this RowBlock
112         }
113         else if (get_local_id(0) >= sMem.fNextTrackletCount)
114         {
115                 return(-1);             //No track in this RowBlock for this thread
116         }
117         else if (nextTrackletFirstRun)
118         {
119                 if (get_local_id(0) == 0) sMem.fNextTrackletFirstRun = 0;
120                 mustInit = 1;
121                 return(sMem.fNextTrackletFirst + get_local_id(0));
122         }
123         else
124         {
125                 const int nTrackPos = sMem.fNextTrackletFirst + get_local_id(0);
126                 mustInit = (nTrackPos < tracker.RowBlockPos(Reverse, RowBlock)->w);
127                 volatile int* const ptrTracklet = &tracker.RowBlockTracklets(Reverse, RowBlock)[nTrackPos % HLTCA_GPU_MAX_TRACKLETS];
128                 int nTracklet;
129                 int nTryCount = 0;
130                 while ((nTracklet = *ptrTracklet) == -1)
131                 {
132                         for (int i = 0;i < 20000;i++)
133                                 sMem.fNextTrackletStupidDummy++;
134                         nTryCount++;
135                         if (nTryCount > 30)
136                         {
137                                 tracker.GPUParameters()->fGPUError = HLTCA_GPU_ERROR_SCHEDULE_COLLISION;
138                                 return(-1);
139                         }
140                 };
141                 return(nTracklet);
142         }
143 }
144
145 MEM_CLASS_PRE2 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU(MEM_LG2(AliHLTTPCCATracker) *pTracker, GPUsharedref() AliHLTTPCCATrackletConstructor::MEM_LOCAL(AliHLTTPCCASharedMemory)& sMem)
146 {
147         //Main Tracklet construction function that calls the scheduled (FetchTracklet) and then Processes the tracklet (mainly UpdataTracklet) and at the end stores the tracklet.
148         //Can also dispatch a tracklet to be rescheduled
149 #ifdef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
150         pTracker[0].BlockStartingTracklet()[0].x = HLTCA_GPU_EMULATION_SINGLE_TRACKLET;
151         pTracker[0].BlockStartingTracklet()[0].y = 1;
152         for (int i = 1;i < get_num_groups(0);i++)
153         {
154                 pTracker[0].BlockStartingTracklet()[i].x = pTracker[0].BlockStartingTracklet()[i].y = 0;
155         }
156 #endif //HLTCA_GPU_EMULATION_SINGLE_TRACKLET
157
158         //GPUshared() AliHLTTPCCASharedMemory sMem;
159
160 #ifdef HLTCA_GPU_SCHED_FIXED_START
161         if (get_local_id(0) == 0)
162         {
163                 sMem.fNextTrackletFirstRun = 1;
164         }
165         GPUsync();
166 #endif //HLTCA_GPU_SCHED_FIXED_START
167
168 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
169         if (get_local_id(0) == 0)
170         {
171                 sMem.fMaxSync = 0;
172         }
173         int threadSync = 0;
174 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
175
176         for (int iReverse = 0;iReverse < 2;iReverse++)
177         {
178                 for (volatile int iRowBlock = 0;iRowBlock < HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1;iRowBlock++)
179                 {
180 #ifdef HLTCA_GPU_SCHED_FIXED_SLICE
181                         int iSlice = pTracker[0].GPUParametersConst()->fGPUnSlices * (get_group_id(0) + (get_num_groups(0) % pTracker[0].GPUParametersConst()->fGPUnSlices != 0 && pTracker[0].GPUParametersConst()->fGPUnSlices * (get_group_id(0) + 1) % get_num_groups(0) != 0)) / get_num_groups(0);
182 #else
183                         for (int iSlice = 0;iSlice < pTracker[0].GPUParametersConst()->fGPUnSlices;iSlice++)
184 #endif //HLTCA_GPU_SCHED_FIXED_SLICE
185                         {
186                                 AliHLTTPCCATracker &tracker = pTracker[iSlice];
187                                 if (get_group_id(0) != 7 && sMem.fNextTrackletFirstRun && iSlice != (tracker.GPUParametersConst()->fGPUnSlices > get_num_groups(0) ? get_group_id(0) : (tracker.GPUParametersConst()->fGPUnSlices * (get_group_id(0) + (get_num_groups(0) % tracker.GPUParametersConst()->fGPUnSlices != 0 && tracker.GPUParametersConst()->fGPUnSlices * (get_group_id(0) + 1) % get_num_groups(0) != 0)) / get_num_groups(0))))
188                                 {
189                                         continue;
190                                 }
191
192                                 int sharedRowsInitialized = 0;
193
194                                 int iTracklet;
195                                 int mustInit;
196                                 while ((iTracklet = FetchTracklet(tracker, sMem, iReverse, iRowBlock, mustInit)) != -2)
197                                 {
198 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
199                                         CAMath::AtomicMaxShared(&sMem.fMaxSync, threadSync);
200                                         GPUsync();
201                                         threadSync = CAMath::Min(sMem.fMaxSync, 100000000 / get_local_size(0) / get_num_groups(0));
202 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
203                                         if (!sharedRowsInitialized)
204                                         {
205                                                 for (int i = get_local_id(0);i < HLTCA_ROW_COUNT * sizeof(AliHLTTPCCARow) / sizeof(int);i += get_local_size(0))
206                                                 {
207                                                         reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker.SliceDataRows())[i];
208                                                 }
209                                                 sharedRowsInitialized = 1;
210                                         }
211 #ifdef HLTCA_GPU_RESCHED
212                                         short2 storeToRowBlock;
213                                         int storePosition = 0;
214                                         if (get_local_id(0) < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
215                                         {
216                                                 const int nReverse = get_local_id(0) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
217                                                 const int nRowBlock = get_local_id(0) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
218                                                 sMem.fTrackletStoreCount[nReverse][nRowBlock] = 0;
219                                         }
220 #else
221                                         mustInit = 1;
222 #endif //HLTCA_GPU_RESCHED
223                                         GPUsync();
224                                         AliHLTTPCCATrackParam tParam;
225                                         AliHLTTPCCAThreadMemory rMem;
226
227 #ifdef HLTCA_GPU_EMULATION_DEBUG_TRACKLET
228                                         if (iTracklet == HLTCA_GPU_EMULATION_DEBUG_TRACKLET)
229                                         {
230                                                 tracker.GPUParameters()->fGPUError = 1;
231                                         }
232 #endif //HLTCA_GPU_EMULATION_DEBUG_TRACKLET
233                                         AliHLTTPCCAThreadMemory &rMemGlobal = tracker.GPUTrackletTemp()[iTracklet].fThreadMem;
234                                         AliHLTTPCCATrackParam &tParamGlobal = tracker.GPUTrackletTemp()[iTracklet].fParam;
235                                         if (mustInit)
236                                         {
237                                                 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
238
239                                                 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
240                                                 rMem.fCurrIH = id.HitIndex();
241                                                 rMem.fStage = 0;
242                                                 rMem.fNHits = 0;
243                                                 rMem.fNMissed = 0;
244
245                                                 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
246                                         }
247                                         else if (iTracklet >= 0)
248                                         {
249                                                 CopyTrackletTempData( rMemGlobal, rMem, tParamGlobal, tParam );
250                                         }
251                                         rMem.fItr = iTracklet;
252                                         rMem.fGo = (iTracklet >= 0);
253
254 #ifdef HLTCA_GPU_RESCHED
255                                         storeToRowBlock.x = iRowBlock + 1;
256                                         storeToRowBlock.y = iReverse;
257                                         if (iReverse)
258                                         {
259                                                 for (int j = HLTCA_ROW_COUNT - 1 - iRowBlock * HLTCA_GPU_SCHED_ROW_STEP;j >= CAMath::Max(0, HLTCA_ROW_COUNT - (iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP);j--)
260                                                 {
261 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
262                                                         if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && !(j >= rMem.fEndRow || ( j >= rMem.fStartRow && j - rMem.fStartRow % 2 == 0)))
263                                                                 pTracker[0].StageAtSync()[threadSync++ * get_global_size(0) + get_global_id(0)] = rMem.fStage + 1;
264 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
265                                                         if (iTracklet >= 0)
266                                                         {
267                                                                 UpdateTracklet(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, tracker, tParam, j);
268                                                                 if (rMem.fNMissed > kMaxRowGap && j <= rMem.fStartRow)
269                                                                 {
270                                                                         rMem.fGo = 0;
271                                                                         break;
272                                                                 }
273                                                         }
274                                                 }
275                                                         
276                                                 if (iTracklet >= 0 && (!rMem.fGo || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
277                                                 {
278                                                         StoreTracklet( get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, tracker, tParam );
279                                                 }
280                                         }
281                                         else
282                                         {
283                                                 for (int j = CAMath::Max(1, iRowBlock * HLTCA_GPU_SCHED_ROW_STEP);j < CAMath::Min((iRowBlock + 1) * HLTCA_GPU_SCHED_ROW_STEP, HLTCA_ROW_COUNT);j++)
284                                                 {
285 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
286                                                         if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && j >= rMem.fStartRow && (rMem.fStage > 0 || rMem.fCurrIH >= 0 || (j - rMem.fStartRow) % 2 == 0 ))
287                                                                 pTracker[0].StageAtSync()[threadSync++ * get_global_size(0) + get_global_id(0)] = rMem.fStage + 1;
288 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
289                                                         if (iTracklet >= 0)
290                                                         {
291                                                                 UpdateTracklet( get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, tracker, tParam, j);
292                                                                 //if (rMem.fNMissed > kMaxRowGap || rMem.fGo == 0) break;       //DR!!! CUDA Crashes with this enabled
293                                                         }
294                                                 }
295                                                 if (rMem.fGo && (rMem.fNMissed > kMaxRowGap || iRowBlock == HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP))
296                                                 {
297                                                         if ( !tParam.TransportToX( sMem.fRows[ rMem.fEndRow ].X(), tracker.Param().ConstBz(), .999 ) )
298                                                         {
299                                                                 rMem.fGo = 0;
300                                                         }
301                                                         else
302                                                         {
303                                                                 storeToRowBlock.x = (HLTCA_ROW_COUNT - rMem.fEndRow) / HLTCA_GPU_SCHED_ROW_STEP;
304                                                                 storeToRowBlock.y = 1;
305                                                                 rMem.fNMissed = 0;
306                                                                 rMem.fStage = 2;
307                                                         }
308                                                 }
309
310                                                 if (iTracklet >= 0 && !rMem.fGo)
311                                                 {
312                                                         StoreTracklet( get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, tracker, tParam );
313                                                 }
314                                         }
315
316                                         if (rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
317                                         {
318                                                 CopyTrackletTempData( rMem, rMemGlobal, tParam, tParamGlobal );
319                                                 storePosition = CAMath::AtomicAddShared(&sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x], 1);
320                                         }
321
322                                         GPUsync();
323                                         if (get_local_id(0) < 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1))
324                                         {
325                                                 const int nReverse = get_local_id(0) / (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
326                                                 const int nRowBlock = get_local_id(0) % (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1);
327                                                 if (sMem.fTrackletStoreCount[nReverse][nRowBlock])
328                                                 {
329                                                         sMem.fTrackletStoreCount[nReverse][nRowBlock] = CAMath::AtomicAdd(&tracker.RowBlockPos(nReverse, nRowBlock)->x, sMem.fTrackletStoreCount[nReverse][nRowBlock]);
330                                                 }
331                                         }
332                                         GPUsync();
333                                         if (iTracklet >= 0 && rMem.fGo && (iRowBlock != HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP || iReverse == 0))
334                                         {
335                                                 tracker.RowBlockTracklets(storeToRowBlock.y, storeToRowBlock.x)[sMem.fTrackletStoreCount[storeToRowBlock.y][storeToRowBlock.x] + storePosition] = iTracklet;
336                                         }
337                                         GPUsync();
338 #else
339                                         if (get_local_id(0) % HLTCA_GPU_WARP_SIZE == 0)
340                                         {
341                                                 sMem.fStartRows[get_local_id(0) / HLTCA_GPU_WARP_SIZE] = 160;
342                                                 sMem.fEndRows[get_local_id(0) / HLTCA_GPU_WARP_SIZE] = 0;
343                                         }
344                                         GPUsync();
345                                         if (iTracklet >= 0)
346                                         {
347                                                 CAMath::AtomicMinShared(&sMem.fStartRows[get_local_id(0) / HLTCA_GPU_WARP_SIZE], rMem.fStartRow);
348                                         }
349                                         GPUsync();
350                                         if (iTracklet >= 0)
351                                         {
352                                                 for (int j = sMem.fStartRows[get_local_id(0) / HLTCA_GPU_WARP_SIZE];j < HLTCA_ROW_COUNT;j++)
353                                                 {
354                                                         UpdateTracklet(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, tracker, tParam, j);
355                                                         if (!rMem.fGo) break;
356                                                 }
357
358                                                 rMem.fNMissed = 0;
359                                                 rMem.fStage = 2;
360                                                 if ( rMem.fGo )
361                                                 {
362                                                         if ( !tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999 ) )  rMem.fGo = 0;
363                                                 }
364                                                 CAMath::AtomicMaxShared(&sMem.fEndRows[get_local_id(0) / HLTCA_GPU_WARP_SIZE], rMem.fEndRow);
365                                         }
366
367                                         GPUsync();
368                                         if (iTracklet >= 0)
369                                         {
370                                                 for (int j = rMem.fEndRow;j >= 0;j--)
371                                                 {
372                                                         if (!rMem.fGo) break;
373                                                         UpdateTracklet( get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, tracker, tParam, j);
374                                                 }
375
376                                                 StoreTracklet( get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, tracker, tParam );
377                                         }
378 #endif //HLTCA_GPU_RESCHED
379                                 }
380                         }
381                 }
382         }
383 }
384
385 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(int iTracklet, AliHLTTPCCATracker &tracker)
386 {
387         //Initialize Row Blocks
388
389 #ifndef HLTCA_GPU_EMULATION_SINGLE_TRACKLET
390 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[iTracklet];
391 #ifdef HLTCA_GPU_SCHED_FIXED_START
392         const int firstDynamicTracklet = tracker.GPUParameters()->fScheduleFirstDynamicTracklet;
393         if (iTracklet >= firstDynamicTracklet)
394 #endif //HLTCA_GPU_SCHED_FIXED_START
395         {
396 #ifdef HLTCA_GPU_SCHED_FIXED_START
397                 const int firstTrackletInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[CAMath::Max(id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP * HLTCA_GPU_SCHED_ROW_STEP, 1)].z);
398 #else
399                 const int firstTrackletInRowBlock = tracker.RowStartHitCountOffset()[CAMath::Max(id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP * HLTCA_GPU_SCHED_ROW_STEP, 1)].z;
400 #endif //HLTCA_GPU_SCHED_FIXED_START
401
402                 if (iTracklet == firstTrackletInRowBlock)
403                 {
404                         const int firstRowInNextBlock = (id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP + 1) * HLTCA_GPU_SCHED_ROW_STEP;
405                         int trackletsInRowBlock;
406                         if (firstRowInNextBlock >= HLTCA_ROW_COUNT - 3)
407                                 trackletsInRowBlock = *tracker.NTracklets() - firstTrackletInRowBlock;
408                         else
409 #ifdef HLTCA_GPU_SCHED_FIXED_START
410                                 trackletsInRowBlock = CAMath::Max(firstDynamicTracklet, tracker.RowStartHitCountOffset()[firstRowInNextBlock].z) - firstTrackletInRowBlock;
411 #else
412                                 trackletsInRowBlock = tracker.RowStartHitCountOffset()[firstRowInNextBlock].z - firstTrackletInRowBlock;
413 #endif //HLTCA_GPU_SCHED_FIXED_START
414
415                         tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->x = trackletsInRowBlock;
416                         tracker.RowBlockPos(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)->w = trackletsInRowBlock;
417                 }
418                 tracker.RowBlockTracklets(0, id.RowIndex() / HLTCA_GPU_SCHED_ROW_STEP)[iTracklet - firstTrackletInRowBlock] = iTracklet;
419         }
420 #endif //!HLTCA_GPU_EMULATION_SINGLE_TRACKLET
421 }
422
423 GPUg() void AliHLTTPCCATrackletConstructorInit(int iSlice)
424 {
425         //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit
426         AliHLTTPCCATracker &tracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker )[iSlice];
427         int i = get_global_id(0);
428         if (i >= *tracker.NTracklets()) return;
429         AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorInit(i, tracker);
430 }
431
432 #elif defined(HLTCA_GPU_ALTERNATIVE_SCHEDULER_SIMPLE)
433
434 GPUdi() int AliHLTTPCCATrackletConstructor::FetchTracklet(GPUconstant() MEM_CONSTANT(AliHLTTPCCATracker) &tracker, GPUsharedref() MEM_LOCAL(AliHLTTPCCASharedMemory) &sMem, AliHLTTPCCAThreadMemory& /*rMem*/, MEM_PLAIN(AliHLTTPCCATrackParam)& /*tParam*/)
435 {
436         const int nativeslice = get_group_id(0) % tracker.GPUParametersConst()->fGPUnSlices;
437         const int nTracklets = *tracker.NTracklets();
438         GPUsync();
439         if (sMem.fNextTrackletFirstRun == 1)
440         {
441                 if (get_local_id(0) == 0)
442                 {
443                         sMem.fNextTrackletFirst = (get_group_id(0) - nativeslice) / tracker.GPUParametersConst()->fGPUnSlices * HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR;
444                         sMem.fNextTrackletFirstRun = 0;
445                 }
446         }
447         else
448         {
449                 if (get_local_id(0) == 0)
450                 {
451                         if (tracker.GPUParameters()->fNextTracklet < nTracklets)
452                         {
453                                 const int firstTracklet = CAMath::AtomicAdd(&tracker.GPUParameters()->fNextTracklet, HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR);
454                                 if (firstTracklet < nTracklets) sMem.fNextTrackletFirst = firstTracklet;
455                                 else sMem.fNextTrackletFirst = -2;
456                         }
457                         else
458                         {
459                                 sMem.fNextTrackletFirst = -2;
460                         }
461                 }
462         }
463         GPUsync();
464         return (sMem.fNextTrackletFirst);
465 }
466
467 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU(GPUconstant() MEM_CONSTANT(AliHLTTPCCATracker) *pTracker, GPUsharedref() AliHLTTPCCATrackletConstructor::MEM_LOCAL(AliHLTTPCCASharedMemory)& sMem)
468 {
469         const int nSlices = pTracker[0].GPUParametersConst()->fGPUnSlices;
470         const int nativeslice = get_group_id(0) % nSlices;
471         int currentSlice = -1;
472
473         if (get_local_id(0))
474         {
475                 sMem.fNextTrackletFirstRun = 1;
476         }
477
478         for (int iSlice = 0;iSlice < nSlices;iSlice++)
479         {
480                 GPUconstant() MEM_CONSTANT(AliHLTTPCCATracker) &tracker = pTracker[(nativeslice + iSlice) % nSlices];
481                 int iRow, iRowEnd;
482
483                 MEM_PLAIN(AliHLTTPCCATrackParam) tParam;
484                 AliHLTTPCCAThreadMemory rMem;
485
486                 int tmpTracklet;
487                 while ((tmpTracklet = FetchTracklet(tracker, sMem, rMem, tParam)) != -2)
488                 {
489                         if (tmpTracklet >= 0)
490                         {
491                                 rMem.fItr = tmpTracklet + get_local_id(0);
492                         }
493                         else
494                         {
495                                 rMem.fItr = -1;
496                         }
497
498                         if (iSlice != currentSlice)
499                         {
500                                 if (get_local_id(0) == 0)
501                                 {
502                                         sMem.fNTracklets = *tracker.NTracklets();
503                                 }
504
505                                 for (int i = get_local_id(0);i < HLTCA_ROW_COUNT * sizeof(MEM_PLAIN(AliHLTTPCCARow)) / sizeof(int);i += get_local_size(0))
506                                 {
507                                         reinterpret_cast<GPUsharedref() int*>(&sMem.fRows)[i] = reinterpret_cast<GPUglobalref() int*>(tracker.SliceDataRows())[i];
508                                 }
509                                 currentSlice = iSlice;
510                                 GPUsync();
511                         }
512
513                         if (rMem.fItr < sMem.fNTracklets)
514                         {
515                                 AliHLTTPCCAHitId id = tracker.TrackletStartHits()[rMem.fItr];
516
517                                 rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
518                                 rMem.fCurrIH = id.HitIndex();
519                                 rMem.fStage = 0;
520                                 rMem.fNHits = 0;
521                                 rMem.fNMissed = 0;
522
523                                 AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
524
525                                 rMem.fGo = 1;
526
527
528                                 iRow = rMem.fStartRow;
529                                 iRowEnd = tracker.Param().NRows();
530                         }
531                         else
532                         {
533                                 rMem.fGo = 0;
534                                 rMem.fStartRow = rMem.fEndRow = 0;
535                                 iRow = iRowEnd = 0;
536                                 rMem.fStage = 0;
537                         }
538
539                         for (int k = 0;k < 2;k++)
540                         {
541                                 for (;iRow != iRowEnd;iRow += rMem.fStage == 2 ? -1 : 1)
542                                 {
543                                         UpdateTracklet(0, 0, 0, 0, sMem, rMem, tracker, tParam, iRow);
544                                 }
545
546                                 if (rMem.fStage == 2)
547                                 {
548                                         if (rMem.fItr < sMem.fNTracklets)
549                                         {
550                                                 StoreTracklet( 0, 0, 0, 0, sMem, rMem, tracker, tParam );
551                                         }
552                                 }
553                                 else
554                                 {
555                                         rMem.fNMissed = 0;
556                                         rMem.fStage = 2;
557                                         if (rMem.fGo) if (!tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999)) rMem.fGo = 0;
558                                         iRow = rMem.fEndRow;
559                                         iRowEnd = -1;
560                                 }
561                         }
562                 }
563         }
564 }
565  
566
567 #else //HLTCA_GPU_ALTERNATIVE_SCHEDULER
568
569 GPUdi() int AliHLTTPCCATrackletConstructor::FetchTracklet(GPUconstant() MEM_CONSTANT(AliHLTTPCCATracker) &tracker, GPUsharedref() MEM_LOCAL(AliHLTTPCCASharedMemory) &sMem, AliHLTTPCCAThreadMemory &rMem, MEM_PLAIN(AliHLTTPCCATrackParam) &tParam)
570 {
571         const int nativeslice = get_group_id(0) % tracker.GPUParametersConst()->fGPUnSlices;
572         const int nTracklets = *tracker.NTracklets();
573         GPUsync();
574         if (get_local_id(0) == 0) sMem.fTrackletStorePos = 0;
575         int nStorePos = -1;
576         if (sMem.fNextTrackletFirstRun == 1)
577         {
578                 if (get_local_id(0) == 0)
579                 {
580                         sMem.fNextTrackletFirst = (get_group_id(0) - nativeslice) / tracker.GPUParametersConst()->fGPUnSlices * HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR;
581                         sMem.fNextTrackletFirstRun = 0;
582                         sMem.fNextTrackletCount = HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR;
583                 }
584         }
585         else
586         {
587                 if (sMem.fNextTrackletCount < HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR - HLTCA_GPU_ALTSCHED_MIN_THREADS)
588                 {
589                         if (get_local_id(0) == 0)
590                         {
591                                 sMem.fNextTrackletFirst = -1;
592                         }
593                 }
594                 else
595                 {
596                         GPUsync();
597                         if (rMem.fItr != -1)
598                         {
599                                 nStorePos = CAMath::AtomicAddShared(&sMem.fTrackletStorePos, 1);
600                                 CopyTrackletTempData(rMem, sMem.swapMemory[nStorePos].fThreadMem, tParam, sMem.swapMemory[nStorePos].fParam);
601                                 rMem.fItr = -1;
602                         }
603                         if (get_local_id(0) == 0)
604                         {
605                                 if (tracker.GPUParameters()->fNextTracklet >= nTracklets)
606                                 {
607                                         sMem.fNextTrackletFirst = -1;
608                                 }
609                                 else
610                                 {
611                                         const int firstTracklet = CAMath::AtomicAdd(&tracker.GPUParameters()->fNextTracklet, sMem.fNextTrackletCount);
612                                         if (firstTracklet >= nTracklets)
613                                         {
614                                                 sMem.fNextTrackletFirst = -1;
615                                         }
616                                         else
617                                         {
618                                                 sMem.fNextTrackletFirst = firstTracklet;
619                                         }
620                                 }
621                         }
622                 }
623         }
624
625         if (get_local_id(0) == 0)
626         {
627                 if (sMem.fNextTrackletFirst == -1 && sMem.fNextTrackletCount == HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR)
628                 {
629                         sMem.fNextTrackletFirst = -2;
630                         sMem.fNextTrackletCount = HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR;
631                 }
632                 else if (sMem.fNextTrackletFirst >= 0)
633                 {
634                         if (sMem.fNextTrackletFirst + sMem.fNextTrackletCount >= nTracklets)
635                         {
636                                 sMem.fNextTrackletCount = sMem.fNextTrackletFirst + sMem.fNextTrackletCount - nTracklets;
637                         }
638                         else
639                         {
640                                 sMem.fNextTrackletCount = 0;
641                         }
642                 }
643         }
644         GPUsync();
645         if (get_local_id(0) < sMem.fTrackletStorePos)
646         {
647                 CopyTrackletTempData(sMem.swapMemory[get_local_id(0)].fThreadMem, rMem, sMem.swapMemory[get_local_id(0)].fParam, tParam);
648         }
649         return (sMem.fNextTrackletFirst);
650 }
651
652 GPUdi() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU(GPUconstant() MEM_CONSTANT(AliHLTTPCCATracker) *pTracker, GPUsharedref() AliHLTTPCCATrackletConstructor::MEM_LOCAL(AliHLTTPCCASharedMemory)& sMem)
653 {
654         const int nSlices = pTracker[0].GPUParametersConst()->fGPUnSlices;
655         const int nativeslice = get_group_id(0) % nSlices;
656         //GPUshared() AliHLTTPCCASharedMemory sMem;
657         int currentSlice = -1;
658
659         if (get_local_id(0))
660         {
661                 sMem.fNextTrackletFirstRun = 1;
662         }
663
664 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
665         if (get_local_id(0) == 0)
666         {
667                 sMem.fMaxSync = 0;
668         }
669         int threadSync = 0;
670 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
671
672         for (int iSlice = 0;iSlice < nSlices;iSlice++)
673         {
674                 GPUconstant() MEM_CONSTANT(AliHLTTPCCATracker) &tracker = pTracker[(nativeslice + iSlice) % nSlices];
675
676                 MEM_PLAIN(AliHLTTPCCATrackParam) tParam;
677                 AliHLTTPCCAThreadMemory rMem;
678                 rMem.fItr = -1;
679
680                 int tmpTracklet;
681                 while ((tmpTracklet = FetchTracklet(tracker, sMem, rMem, tParam)) != -2)
682                 {
683
684 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
685                                         CAMath::AtomicMaxShared(&sMem.fMaxSync, threadSync);
686                                         GPUsync();
687                                         threadSync = CAMath::Min(sMem.fMaxSync, 100000000 / get_local_size(0) / get_num_groups(0));
688 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
689
690                         if (iSlice != currentSlice)
691                         {
692                                 if (get_local_id(0) == 0) sMem.fNTracklets = *tracker.NTracklets();
693
694                                 for (int i = get_local_id(0);i < HLTCA_ROW_COUNT * sizeof(MEM_PLAIN(AliHLTTPCCARow)) / sizeof(int);i += get_local_size(0))
695                                 {
696                                         reinterpret_cast<GPUsharedref() int*>(&sMem.fRows)[i] = reinterpret_cast<GPUglobalref() int*>(tracker.SliceDataRows())[i];
697                                 }
698                                 currentSlice = iSlice;
699                                 GPUsync();
700                         }
701
702                         if (tmpTracklet >= 0 && rMem.fItr < 0)
703                         {
704                                 rMem.fItr = tmpTracklet + (signed) get_local_id(0) - sMem.fTrackletStorePos;
705                                 if (rMem.fItr >= sMem.fNTracklets)
706                                 {
707                                         rMem.fItr = -1;
708                                 }
709                                 else
710                                 {
711                                         AliHLTTPCCAHitId id = tracker.TrackletStartHits()[rMem.fItr];
712
713                                         rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
714                                         rMem.fCurrIH = id.HitIndex();
715                                         rMem.fStage = 0;
716                                         rMem.fNHits = 0;
717                                         rMem.fNMissed = 0;
718
719                                         AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
720
721                                         rMem.fGo = 1;
722
723                                         rMem.fIRow = rMem.fStartRow;
724                                         rMem.fIRowEnd = tracker.Param().NRows();
725                                 }
726                         }
727
728                         if (rMem.fItr >= 0)
729                         {
730                                 for (int j = 0;j < HLTCA_GPU_ALTSCHED_STEPSIZE && rMem.fIRow != rMem.fIRowEnd;j++,rMem.fIRow += rMem.fStage == 2 ? -1 : 1)
731                                 {
732 #ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
733                                         if (rMem.fStage == 2)
734                                         {
735                                                 if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && !(rMem.fIRow >= rMem.fEndRow || ( rMem.fIRow >= rMem.fStartRow && rMem.fIRow - rMem.fStartRow % 2 == 0)))
736                                                         pTracker[0].StageAtSync()[threadSync++ * get_global_size(0) + get_global_id(0)] = rMem.fStage + 1;
737                                         }
738                                         else
739                                         {
740                                                 if (rMem.fNMissed <= kMaxRowGap && rMem.fGo && rMem.fIRow >= rMem.fStartRow && (rMem.fStage > 0 || rMem.fCurrIH >= 0 || (rMem.fIRow - rMem.fStartRow) % 2 == 0 ))
741                                                         pTracker[0].StageAtSync()[threadSync++ * get_global_size(0) + get_global_id(0)] = rMem.fStage + 1;
742                                         }
743 #endif //HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
744                                         UpdateTracklet(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, tracker, tParam, rMem.fIRow);
745                                 }
746
747                                 if (rMem.fIRow == rMem.fIRowEnd || rMem.fNMissed > kMaxRowGap)
748                                 {
749                                         if (rMem.fStage >= 2)
750                                         {
751                                                 rMem.fGo = 0;
752                                         }
753                                         else if (rMem.fGo)
754                                         {
755                                                 rMem.fNMissed = 0;
756                                                 rMem.fStage = 2;
757                                                 if (!tParam.TransportToX( tracker.Row( rMem.fEndRow ).X(), tracker.Param().ConstBz(), .999)) rMem.fGo = 0;
758                                                 rMem.fIRow = rMem.fEndRow;
759                                                 rMem.fIRowEnd = -1;
760                                         }
761                                 }
762
763                                 if (!rMem.fGo)
764                                 {
765                                         StoreTracklet( get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, tracker, tParam );
766                                         rMem.fItr = -1;
767                                         CAMath::AtomicAddShared(&sMem.fNextTrackletCount, 1);
768                                 }
769                         }
770                 }
771         }
772 }
773
774 #endif //HLTCA_GPU_ALTERNATIVE_SCHEDULER
775
776 #ifndef __OPENCL__
777 GPUg() void AliHLTTPCCATrackletConstructorGPU()
778 {
779         //GPU Wrapper for AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU
780         AliHLTTPCCATracker *pTracker = ( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker );
781         GPUshared() AliHLTTPCCATrackletConstructor::MEM_LOCAL(AliHLTTPCCASharedMemory) sMem;
782         AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPU(pTracker, sMem);
783 }
784
785 GPUg() void AliHLTTPCCATrackletConstructorGPUPP(int firstSlice, int sliceCount)
786 {
787         if (get_group_id(0) >= sliceCount) return;
788         AliHLTTPCCATracker *pTracker = &( ( AliHLTTPCCATracker* ) gAliHLTTPCCATracker )[firstSlice + get_group_id(0)];
789         AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPUPP(pTracker);
790 }
791
792 GPUd() void AliHLTTPCCATrackletConstructor::AliHLTTPCCATrackletConstructorGPUPP(AliHLTTPCCATracker *tracker)
793 {
794         GPUshared() AliHLTTPCCASharedMemory sMem;
795 #if defined(HLTCA_GPU_RESCHED) & !defined(HLTCA_GPU_ALTERNATIVE_SCHEDULER)
796 #define startRows sMem.fStartRows
797 #define endRows sMem.fEndRows
798 #else
799         GPUshared() int startRows[HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR / HLTCA_GPU_WARP_SIZE + 1];
800         GPUshared() int endRows[HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR / HLTCA_GPU_WARP_SIZE + 1];
801 #endif
802         sMem.fNTracklets = *tracker->NTracklets();
803
804         for (int i = get_local_id(0);i < HLTCA_ROW_COUNT * sizeof(AliHLTTPCCARow) / sizeof(int);i += get_local_size(0))
805         {
806                 reinterpret_cast<int*>(&sMem.fRows)[i] = reinterpret_cast<int*>(tracker->SliceDataRows())[i];
807         }
808
809         for (int iTracklet = get_local_id(0);iTracklet < (*tracker->NTracklets() / HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR + 1) * HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR;iTracklet += get_local_size(0))
810         {
811                 AliHLTTPCCATrackParam tParam;
812                 AliHLTTPCCAThreadMemory rMem;
813                 
814                 if (iTracklet < *tracker->NTracklets())
815                 {
816                         AliHLTTPCCAHitId id = tracker->TrackletTmpStartHits()[iTracklet];
817
818                         rMem.fStartRow = rMem.fEndRow = rMem.fFirstRow = rMem.fLastRow = id.RowIndex();
819                         rMem.fCurrIH = id.HitIndex();
820                         rMem.fStage = 0;
821                         rMem.fNHits = 0;
822                         rMem.fNMissed = 0;
823
824                         AliHLTTPCCATrackletConstructor::InitTracklet(tParam);
825
826                         rMem.fItr = iTracklet;
827                         rMem.fGo = 1;
828                 }
829
830                 if (get_local_id(0) % HLTCA_GPU_WARP_SIZE == 0)
831                 {
832                         startRows[get_local_id(0) / HLTCA_GPU_WARP_SIZE] = 160;
833                         endRows[get_local_id(0) / HLTCA_GPU_WARP_SIZE] = 0;
834                 }
835                 GPUsync();
836                 if (iTracklet < *tracker->NTracklets())
837                 {
838                         CAMath::AtomicMinShared(&startRows[get_local_id(0) / HLTCA_GPU_WARP_SIZE], rMem.fStartRow);
839                 }
840                 GPUsync();
841                 if (iTracklet < *tracker->NTracklets())
842                 {
843                         for (int j = startRows[get_local_id(0) / HLTCA_GPU_WARP_SIZE];j < HLTCA_ROW_COUNT;j++)
844                         {
845                                 UpdateTracklet(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, *tracker, tParam, j);
846                                 if (!rMem.fGo) break;
847                         }
848
849                         rMem.fNMissed = 0;
850                         rMem.fStage = 2;
851                         if ( rMem.fGo )
852                         {
853                                 if ( !tParam.TransportToX( tracker->Row( rMem.fEndRow ).X(), tracker->Param().ConstBz(), .999 ) )  rMem.fGo = 0;
854                         }
855                         CAMath::AtomicMaxShared(&endRows[get_local_id(0) / HLTCA_GPU_WARP_SIZE], rMem.fEndRow);
856                 }
857
858                 GPUsync();
859                 if (iTracklet < *tracker->NTracklets())
860                 {
861                         for (int j = rMem.fEndRow;j >= 0;j--)
862                         {
863                                 if (!rMem.fGo) break;
864                                 UpdateTracklet( get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, *tracker, tParam, j);
865                         }
866                         StoreTracklet( get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), sMem, rMem, *tracker, tParam );
867                 }
868         }
869 }
870
871 #endif