]>
Commit | Line | Data |
---|---|---|
d3821846 | 1 | // ************************************************************************** |
2 | // This file is property of and copyright by the ALICE HLT Project * | |
3 | // ALICE Experiment at CERN, All rights reserved. * | |
4 | // * | |
5 | // Primary Authors: Sergey Gorbunov <sergey.gorbunov@kip.uni-heidelberg.de> * | |
6 | // Ivan Kisel <kisel@kip.uni-heidelberg.de> * | |
7 | // David Rohr <drohr@kip.uni-heidelberg.de> * | |
8 | // for The ALICE HLT Project. * | |
9 | // * | |
10 | // Permission to use, copy, modify and distribute this software and its * | |
11 | // documentation strictly for non-commercial purposes is hereby granted * | |
12 | // without fee, provided that the above copyright notice appears in all * | |
13 | // copies and that both the copyright notice and this permission notice * | |
14 | // appear in the supporting documentation. The authors make no claims * | |
15 | // about the suitability of this software for any purpose. It is * | |
16 | // provided "as is" without express or implied warranty. * | |
17 | // * | |
18 | //*************************************************************************** | |
19 | ||
20 | #include <string.h> | |
21 | #ifndef _WIN32 | |
22 | #include <unistd.h> | |
23 | #endif | |
24 | #include "AliHLTTPCCAGPUTrackerBase.h" | |
25 | #include "AliHLTTPCCAClusterData.h" | |
26 | #include "AliHLTTPCCAGPUTrackerCommon.h" | |
27 | ||
28 | ClassImp( AliHLTTPCCAGPUTrackerBase ) | |
29 | ||
30 | int AliHLTTPCCAGPUTrackerBase::GlobalTracking(int iSlice, int threadId, AliHLTTPCCAGPUTrackerBase::helperParam* hParam) | |
31 | { | |
32 | if (fDebugLevel >= 3) {HLTDebug("GPU Tracker running Global Tracking for slice %d on thread %d\n", iSlice, threadId);} | |
33 | ||
34 | int sliceLeft = (iSlice + (fgkNSlices / 2 - 1)) % (fgkNSlices / 2); | |
35 | int sliceRight = (iSlice + 1) % (fgkNSlices / 2); | |
36 | if (iSlice >= fgkNSlices / 2) | |
37 | { | |
38 | sliceLeft += fgkNSlices / 2; | |
39 | sliceRight += fgkNSlices / 2; | |
40 | } | |
41 | while (fSliceOutputReady < iSlice || fSliceOutputReady < sliceLeft || fSliceOutputReady < sliceRight) | |
42 | { | |
43 | if (hParam != NULL && hParam->fReset) return(1); | |
44 | } | |
45 | ||
46 | pthread_mutex_lock(&((pthread_mutex_t*) fSliceGlobalMutexes)[sliceLeft]); | |
47 | pthread_mutex_lock(&((pthread_mutex_t*) fSliceGlobalMutexes)[sliceRight]); | |
48 | fSlaveTrackers[iSlice].PerformGlobalTracking(fSlaveTrackers[sliceLeft], fSlaveTrackers[sliceRight], HLTCA_GPU_MAX_TRACKS); | |
49 | pthread_mutex_unlock(&((pthread_mutex_t*) fSliceGlobalMutexes)[sliceLeft]); | |
50 | pthread_mutex_unlock(&((pthread_mutex_t*) fSliceGlobalMutexes)[sliceRight]); | |
51 | ||
52 | fSliceLeftGlobalReady[sliceLeft] = 1; | |
53 | fSliceRightGlobalReady[sliceRight] = 1; | |
54 | if (fDebugLevel >= 3) {HLTDebug("GPU Tracker finished Global Tracking for slice %d on thread %d\n", iSlice, threadId);} | |
55 | return(0); | |
56 | } | |
57 | ||
58 | void* AliHLTTPCCAGPUTrackerBase::helperWrapper(void* arg) | |
59 | { | |
60 | AliHLTTPCCAGPUTrackerBase::helperParam* par = (AliHLTTPCCAGPUTrackerBase::helperParam*) arg; | |
61 | AliHLTTPCCAGPUTrackerBase* cls = par->fCls; | |
62 | ||
63 | AliHLTTPCCATracker* tmpTracker = new AliHLTTPCCATracker; | |
64 | ||
65 | #ifdef HLTCA_STANDALONE | |
66 | if (cls->fDebugLevel >= 2) HLTInfo("\tHelper thread %d starting", par->fNum); | |
67 | #endif | |
68 | ||
69 | #if defined(HLTCA_STANDALONE) & !defined(_WIN32) | |
70 | cpu_set_t mask; | |
71 | CPU_ZERO(&mask); | |
72 | CPU_SET(par->fNum * 2 + 2, &mask); | |
73 | //sched_setaffinity(0, sizeof(mask), &mask); | |
74 | #endif | |
75 | ||
76 | while(pthread_mutex_lock(&((pthread_mutex_t*) par->fMutex)[0]) == 0 && par->fTerminate == false) | |
77 | { | |
78 | if (par->CPUTracker) | |
79 | { | |
80 | for (int i = 0;i < cls->fNSlicesPerCPUTracker;i++) | |
81 | { | |
82 | int myISlice = cls->fSliceCount - cls->fNCPUTrackers * cls->fNSlicesPerCPUTracker + (par->fNum - cls->fNHelperThreads) * cls->fNSlicesPerCPUTracker + i; | |
83 | #ifdef HLTCA_STANDALONE | |
84 | if (cls->fDebugLevel >= 3) HLTInfo("\tHelper Thread %d Doing full CPU tracking, Slice %d", par->fNum, myISlice); | |
85 | #endif | |
86 | if (myISlice >= 0) | |
87 | { | |
88 | tmpTracker->Initialize(cls->fSlaveTrackers[par->fFirstSlice + myISlice].Param()); | |
89 | tmpTracker->ReadEvent(&par->pClusterData[myISlice]); | |
90 | tmpTracker->DoTracking(); | |
91 | tmpTracker->SetOutput(&par->pOutput[myISlice]); | |
92 | pthread_mutex_lock((pthread_mutex_t*) cls->fHelperMemMutex); | |
93 | tmpTracker->WriteOutputPrepare(); | |
94 | pthread_mutex_unlock((pthread_mutex_t*) cls->fHelperMemMutex); | |
95 | tmpTracker->WriteOutput(); | |
96 | ||
97 | /*cls->fSlaveTrackers[par->fFirstSlice + myISlice].SetGPUSliceDataMemory((char*) new uint4[HLTCA_GPU_SLICE_DATA_MEMORY/sizeof(uint4)], (char*) new uint4[HLTCA_GPU_ROWS_MEMORY/sizeof(uint4)]); | |
98 | cls->fSlaveTrackers[par->fFirstSlice + myISlice].ReadEvent(&par->pClusterData[myISlice]); | |
99 | cls->fSlaveTrackers[par->fFirstSlice + myISlice].SetPointersTracklets(HLTCA_GPU_MAX_TRACKLETS); | |
100 | cls->fSlaveTrackers[par->fFirstSlice + myISlice].SetPointersHits(par->pClusterData[myISlice].NumberOfClusters()); | |
101 | cls->fSlaveTrackers[par->fFirstSlice + myISlice].SetPointersTracks(HLTCA_GPU_MAX_TRACKS, par->pClusterData[myISlice].NumberOfClusters()); | |
102 | cls->fSlaveTrackers[par->fFirstSlice + myISlice].SetGPUTrackerTrackletsMemory(reinterpret_cast<char*> ( new uint4 [ cls->fSlaveTrackers[par->fFirstSlice + myISlice].TrackletMemorySize()/sizeof( uint4 ) + 100] ), HLTCA_GPU_MAX_TRACKLETS, cls->fConstructorBlockCount); | |
103 | cls->fSlaveTrackers[par->fFirstSlice + myISlice].SetGPUTrackerHitsMemory(reinterpret_cast<char*> ( new uint4 [ cls->fSlaveTrackers[par->fFirstSlice + myISlice].HitMemorySize()/sizeof( uint4 ) + 100]), par->pClusterData[myISlice].NumberOfClusters()); | |
104 | cls->fSlaveTrackers[par->fFirstSlice + myISlice].SetGPUTrackerTracksMemory(reinterpret_cast<char*> ( new uint4 [ cls->fSlaveTrackers[par->fFirstSlice + myISlice].TrackMemorySize()/sizeof( uint4 ) + 100]), HLTCA_GPU_MAX_TRACKS, par->pClusterData[myISlice].NumberOfClusters()); | |
105 | cls->fSlaveTrackers[par->fFirstSlice + myISlice].DoTracking(); | |
106 | cls->WriteOutput(par->pOutput, par->fFirstSlice, myISlice, par->fNum + 1); | |
107 | delete[] cls->fSlaveTrackers[par->fFirstSlice + myISlice].HitMemory(); | |
108 | delete[] cls->fSlaveTrackers[par->fFirstSlice + myISlice].TrackletMemory(); | |
109 | delete[] cls->fSlaveTrackers[par->fFirstSlice + myISlice].TrackMemory();*/ | |
110 | } | |
111 | #ifdef HLTCA_STANDALONE | |
112 | if (cls->fDebugLevel >= 3) HLTInfo("\tHelper Thread %d Finished, Slice %d", par->fNum, myISlice); | |
113 | #endif | |
114 | } | |
115 | } | |
116 | else | |
117 | { | |
118 | int mustRunSlice19 = 0; | |
119 | for (int i = par->fNum + 1;i < par->fSliceCount;i += cls->fNHelperThreads + 1) | |
120 | { | |
121 | //if (cls->fDebugLevel >= 3) HLTInfo("\tHelper Thread %d Running, Slice %d+%d, Phase %d", par->fNum, par->fFirstSlice, i, par->fPhase); | |
122 | if (par->fPhase) | |
123 | { | |
124 | if (cls->fUseGlobalTracking) | |
125 | { | |
126 | int realSlice = i + 1; | |
127 | if (realSlice % (fgkNSlices / 2) < 1) realSlice -= fgkNSlices / 2; | |
128 | ||
129 | if (realSlice % (fgkNSlices / 2) != 1) | |
130 | { | |
131 | cls->GlobalTracking(realSlice, par->fNum + 1, par); | |
132 | } | |
133 | ||
134 | if (realSlice == 19) | |
135 | { | |
136 | mustRunSlice19 = 1; | |
137 | } | |
138 | else | |
139 | { | |
140 | while (cls->fSliceLeftGlobalReady[realSlice] == 0 || cls->fSliceRightGlobalReady[realSlice] == 0) | |
141 | { | |
142 | if (par->fReset) goto ResetHelperThread; | |
143 | } | |
144 | cls->WriteOutput(par->pOutput, par->fFirstSlice, realSlice, par->fNum + 1); | |
145 | } | |
146 | } | |
147 | else | |
148 | { | |
149 | while (cls->fSliceOutputReady < i) | |
150 | { | |
151 | if (par->fReset) goto ResetHelperThread; | |
152 | } | |
153 | cls->WriteOutput(par->pOutput, par->fFirstSlice, i, par->fNum + 1); | |
154 | } | |
155 | } | |
156 | else | |
157 | { | |
158 | cls->ReadEvent(par->pClusterData, par->fFirstSlice, i, par->fNum + 1); | |
159 | par->fDone = i + 1; | |
160 | } | |
161 | //if (cls->fDebugLevel >= 3) HLTInfo("\tHelper Thread %d Finished, Slice %d+%d, Phase %d", par->fNum, par->fFirstSlice, i, par->fPhase); | |
162 | } | |
163 | if (mustRunSlice19) | |
164 | { | |
165 | while (cls->fSliceLeftGlobalReady[19] == 0 || cls->fSliceRightGlobalReady[19] == 0) | |
166 | { | |
167 | if (par->fReset) goto ResetHelperThread; | |
168 | } | |
169 | cls->WriteOutput(par->pOutput, par->fFirstSlice, 19, par->fNum + 1); | |
170 | } | |
171 | } | |
172 | ResetHelperThread: | |
173 | cls->ResetThisHelperThread(par); | |
174 | } | |
175 | #ifdef HLTCA_STANDALONE | |
176 | if (cls->fDebugLevel >= 2) HLTInfo("\tHelper thread %d terminating", par->fNum); | |
177 | #endif | |
178 | delete tmpTracker; | |
179 | pthread_mutex_unlock(&((pthread_mutex_t*) par->fMutex)[1]); | |
180 | pthread_exit(NULL); | |
181 | return(NULL); | |
182 | } | |
183 | ||
184 | void AliHLTTPCCAGPUTrackerBase::ResetThisHelperThread(AliHLTTPCCAGPUTrackerBase::helperParam* par) | |
185 | { | |
186 | if (par->fReset) HLTImportant("GPU Helper Thread %d reseting", par->fNum); | |
187 | par->fReset = false; | |
188 | pthread_mutex_unlock(&((pthread_mutex_t*) par->fMutex)[1]); | |
189 | } | |
190 | ||
191 | #define SemLockName "AliceHLTTPCCAGPUTrackerInitLockSem" | |
192 | ||
193 | AliHLTTPCCAGPUTrackerBase::AliHLTTPCCAGPUTrackerBase() : | |
194 | fGpuTracker(NULL), | |
195 | fGPUMemory(NULL), | |
196 | fHostLockedMemory(NULL), | |
197 | fGPUMergerMemory(NULL), | |
198 | fGPUMergerHostMemory(NULL), | |
199 | fGPUMergerMaxMemory(0), | |
200 | fDebugLevel(0), | |
201 | fDebugMask(0xFFFFFFFF), | |
202 | fOutFile(NULL), | |
203 | fGPUMemSize(0), | |
204 | fSliceCount(HLTCA_GPU_DEFAULT_MAX_SLICE_COUNT), | |
205 | fCudaDevice(0), | |
206 | fOutputControl(NULL), | |
207 | fThreadId(0), | |
208 | fCudaInitialized(0), | |
209 | fPPMode(0), | |
210 | fSelfheal(0), | |
211 | fConstructorBlockCount(30), | |
212 | selectorBlockCount(30), | |
213 | fNHelperThreads(HLTCA_GPU_DEFAULT_HELPER_THREADS), | |
214 | fHelperParams(NULL), | |
215 | fHelperMemMutex(NULL), | |
216 | fSliceOutputReady(0), | |
217 | fSliceGlobalMutexes(NULL), | |
218 | fNCPUTrackers(0), | |
219 | fNSlicesPerCPUTracker(0), | |
220 | fGlobalTracking(0), | |
221 | fUseGlobalTracking(0), | |
222 | fNSlaveThreads(0) | |
223 | {} | |
224 | ||
225 | AliHLTTPCCAGPUTrackerBase::~AliHLTTPCCAGPUTrackerBase() | |
226 | { | |
227 | } | |
228 | ||
229 | void AliHLTTPCCAGPUTrackerBase::ReleaseGlobalLock(void* sem) | |
230 | { | |
231 | //Release the global named semaphore that locks GPU Initialization | |
232 | #ifdef R__WIN32 | |
233 | HANDLE* h = (HANDLE*) sem; | |
234 | ReleaseSemaphore(*h, 1, NULL); | |
235 | CloseHandle(*h); | |
236 | delete h; | |
237 | #else | |
238 | sem_t* pSem = (sem_t*) sem; | |
239 | sem_post(pSem); | |
240 | sem_unlink(SemLockName); | |
241 | #endif | |
242 | } | |
243 | ||
244 | int AliHLTTPCCAGPUTrackerBase::CheckMemorySizes(int sliceCount) | |
245 | { | |
246 | //Check constants for correct memory sizes | |
247 | if (sizeof(AliHLTTPCCATracker) * sliceCount > HLTCA_GPU_TRACKER_OBJECT_MEMORY) | |
248 | { | |
249 | HLTError("Insufficiant Tracker Object Memory for %d slices", sliceCount); | |
250 | return(1); | |
251 | } | |
252 | ||
253 | if (fgkNSlices * AliHLTTPCCATracker::CommonMemorySize() > HLTCA_GPU_COMMON_MEMORY) | |
254 | { | |
255 | HLTError("Insufficiant Common Memory"); | |
256 | return(1); | |
257 | } | |
258 | ||
259 | if (fgkNSlices * (HLTCA_ROW_COUNT + 1) * sizeof(AliHLTTPCCARow) > HLTCA_GPU_ROWS_MEMORY) | |
260 | { | |
261 | HLTError("Insufficiant Row Memory"); | |
262 | return(1); | |
263 | } | |
264 | ||
265 | if (fDebugLevel >= 3) | |
266 | { | |
267 | HLTInfo("Memory usage: Tracker Object %d / %d, Common Memory %d / %d, Row Memory %d / %d", (int) sizeof(AliHLTTPCCATracker) * sliceCount, HLTCA_GPU_TRACKER_OBJECT_MEMORY, (int) (fgkNSlices * AliHLTTPCCATracker::CommonMemorySize()), HLTCA_GPU_COMMON_MEMORY, (int) (fgkNSlices * (HLTCA_ROW_COUNT + 1) * sizeof(AliHLTTPCCARow)), HLTCA_GPU_ROWS_MEMORY); | |
268 | } | |
269 | return(0); | |
270 | } | |
271 | ||
272 | void AliHLTTPCCAGPUTrackerBase::SetDebugLevel(const int dwLevel, std::ostream* const NewOutFile) | |
273 | { | |
274 | //Set Debug Level and Debug output File if applicable | |
275 | fDebugLevel = dwLevel; | |
276 | if (NewOutFile) fOutFile = NewOutFile; | |
277 | } | |
278 | ||
279 | int AliHLTTPCCAGPUTrackerBase::SetGPUTrackerOption(char* OptionName, int OptionValue) | |
280 | { | |
281 | //Set a specific GPU Tracker Option | |
282 | if (strcmp(OptionName, "PPMode") == 0) | |
283 | { | |
284 | fPPMode = OptionValue; | |
285 | } | |
286 | else if (strcmp(OptionName, "DebugMask") == 0) | |
287 | { | |
288 | fDebugMask = OptionValue; | |
289 | } | |
290 | else if (strcmp(OptionName, "HelperThreads") == 0) | |
291 | { | |
292 | fNHelperThreads = OptionValue; | |
293 | } | |
294 | else if (strcmp(OptionName, "CPUTrackers") == 0) | |
295 | { | |
296 | fNCPUTrackers = OptionValue; | |
297 | } | |
298 | else if (strcmp(OptionName, "SlicesPerCPUTracker") == 0) | |
299 | { | |
300 | fNSlicesPerCPUTracker = OptionValue; | |
301 | } | |
302 | else if (strcmp(OptionName, "GlobalTracking") == 0) | |
303 | { | |
304 | fGlobalTracking = OptionValue; | |
305 | } | |
306 | else | |
307 | { | |
308 | HLTError("Unknown Option: %s", OptionName); | |
309 | return(1); | |
310 | } | |
311 | ||
312 | if (fNHelperThreads + fNCPUTrackers > fNSlaveThreads && fCudaInitialized) | |
313 | { | |
314 | HLTInfo("Insufficient Slave Threads available (%d), creating additional Slave Threads (%d+%d)\n", fNSlaveThreads, fNHelperThreads, fNCPUTrackers); | |
315 | StopHelperThreads(); | |
316 | StartHelperThreads(); | |
317 | } | |
318 | ||
319 | return(0); | |
320 | } | |
321 | ||
322 | #ifdef HLTCA_STANDALONE | |
323 | void AliHLTTPCCAGPUTrackerBase::StandalonePerfTime(int iSlice, int i) | |
324 | { | |
325 | //Run Performance Query for timer i of slice iSlice | |
326 | if (fDebugLevel >= 1) | |
327 | { | |
328 | AliHLTTPCCATracker::StandaloneQueryTime( fSlaveTrackers[iSlice].PerfTimer(i)); | |
329 | } | |
330 | } | |
331 | #else | |
332 | void AliHLTTPCCAGPUTrackerBase::StandalonePerfTime(int /*iSlice*/, int /*i*/) {} | |
333 | #endif | |
334 | ||
335 | int AliHLTTPCCAGPUTrackerBase::SelfHealReconstruct(AliHLTTPCCASliceOutput** pOutput, AliHLTTPCCAClusterData* pClusterData, int firstSlice, int sliceCountLocal) | |
336 | { | |
337 | if (!fSelfheal) | |
338 | { | |
339 | ReleaseThreadContext(); | |
340 | return(1); | |
341 | } | |
342 | static bool selfHealing = false; | |
343 | if (selfHealing) | |
344 | { | |
345 | HLTError("Selfhealing failed, giving up"); | |
346 | ReleaseThreadContext(); | |
347 | return(1); | |
348 | } | |
349 | else | |
350 | { | |
351 | HLTError("Unsolvable CUDA error occured, trying to reinitialize GPU"); | |
352 | } | |
353 | selfHealing = true; | |
354 | ExitGPU(); | |
355 | if (InitGPU(fSliceCount, fCudaDevice)) | |
356 | { | |
357 | HLTError("Could not reinitialize CUDA device, disabling GPU tracker"); | |
358 | ExitGPU(); | |
359 | return(1); | |
360 | } | |
361 | HLTInfo("GPU tracker successfully reinitialized, restarting tracking"); | |
362 | int retVal = Reconstruct(pOutput, pClusterData, firstSlice, sliceCountLocal); | |
363 | selfHealing = false; | |
364 | return(retVal); | |
365 | } | |
366 | ||
367 | void AliHLTTPCCAGPUTrackerBase::ReadEvent(AliHLTTPCCAClusterData* pClusterData, int firstSlice, int iSlice, int threadId) | |
368 | { | |
369 | fSlaveTrackers[firstSlice + iSlice].SetGPUSliceDataMemory(SliceDataMemory(fHostLockedMemory, iSlice), RowMemory(fHostLockedMemory, firstSlice + iSlice)); | |
370 | #ifdef HLTCA_GPU_TIME_PROFILE | |
371 | unsigned long long int a, b; | |
372 | AliHLTTPCCATracker::StandaloneQueryTime(&a); | |
373 | #endif | |
374 | fSlaveTrackers[firstSlice + iSlice].ReadEvent(&pClusterData[iSlice]); | |
375 | #ifdef HLTCA_GPU_TIME_PROFILE | |
376 | AliHLTTPCCATracker::StandaloneQueryTime(&b); | |
377 | HLTInfo("Read %d %f %f\n", threadId, ((double) b - (double) a) / (double) fProfTimeC, ((double) a - (double) fProfTimeD) / (double) fProfTimeC); | |
378 | #endif | |
379 | } | |
380 | ||
381 | void AliHLTTPCCAGPUTrackerBase::WriteOutput(AliHLTTPCCASliceOutput** pOutput, int firstSlice, int iSlice, int threadId) | |
382 | { | |
383 | if (fDebugLevel >= 3) {HLTDebug("GPU Tracker running WriteOutput for slice %d on thread %d\n", firstSlice + iSlice, threadId);} | |
384 | fSlaveTrackers[firstSlice + iSlice].SetOutput(&pOutput[iSlice]); | |
385 | #ifdef HLTCA_GPU_TIME_PROFILE | |
386 | unsigned long long int a, b; | |
387 | AliHLTTPCCATracker::StandaloneQueryTime(&a); | |
388 | #endif | |
389 | if (fNHelperThreads) pthread_mutex_lock((pthread_mutex_t*) fHelperMemMutex); | |
390 | fSlaveTrackers[firstSlice + iSlice].WriteOutputPrepare(); | |
391 | if (fNHelperThreads) pthread_mutex_unlock((pthread_mutex_t*) fHelperMemMutex); | |
392 | fSlaveTrackers[firstSlice + iSlice].WriteOutput(); | |
393 | #ifdef HLTCA_GPU_TIME_PROFILE | |
394 | AliHLTTPCCATracker::StandaloneQueryTime(&b); | |
395 | HLTInfo("Write %d %f %f\n", threadId, ((double) b - (double) a) / (double) fProfTimeC, ((double) a - (double) fProfTimeD) / (double) fProfTimeC); | |
396 | #endif | |
397 | if (fDebugLevel >= 3) {HLTDebug("GPU Tracker finished WriteOutput for slice %d on thread %d\n", firstSlice + iSlice, threadId);} | |
398 | } | |
399 | ||
400 | int AliHLTTPCCAGPUTrackerBase::InitializeSliceParam(int iSlice, AliHLTTPCCAParam ¶m) | |
401 | { | |
402 | //Initialize Slice Tracker Parameter for a slave tracker | |
403 | fSlaveTrackers[iSlice].Initialize(param); | |
404 | if (fSlaveTrackers[iSlice].Param().NRows() != HLTCA_ROW_COUNT) | |
405 | { | |
406 | HLTError("Error, Slice Tracker %d Row Count of %d exceeds Constant of %d", iSlice, fSlaveTrackers[iSlice].Param().NRows(), HLTCA_ROW_COUNT); | |
407 | return(1); | |
408 | } | |
409 | return(0); | |
410 | } | |
411 | ||
412 | void AliHLTTPCCAGPUTrackerBase::ResetHelperThreads(int helpers) | |
413 | { | |
414 | HLTImportant("Error occurred, GPU tracker helper threads will be reset (Number of threads %d/%d)", fNHelperThreads, fNCPUTrackers); | |
415 | SynchronizeGPU(); | |
416 | ReleaseThreadContext(); | |
417 | for (int i = 0;i < fNHelperThreads + fNCPUTrackers;i++) | |
418 | { | |
419 | fHelperParams[i].fReset = true; | |
420 | if (helpers || i >= fNHelperThreads) pthread_mutex_lock(&((pthread_mutex_t*) fHelperParams[i].fMutex)[1]); | |
421 | } | |
422 | HLTImportant("GPU Tracker helper threads have ben reset"); | |
423 | } | |
424 | ||
425 | int AliHLTTPCCAGPUTrackerBase::StartHelperThreads() | |
426 | { | |
427 | int nThreads = fNHelperThreads + fNCPUTrackers; | |
428 | if (nThreads) | |
429 | { | |
430 | fHelperParams = new helperParam[nThreads]; | |
431 | if (fHelperParams == NULL) | |
432 | { | |
433 | HLTError("Memory allocation error"); | |
434 | ExitGPU(); | |
435 | return(1); | |
436 | } | |
437 | for (int i = 0;i < nThreads;i++) | |
438 | { | |
439 | fHelperParams[i].fCls = this; | |
440 | fHelperParams[i].fTerminate = false; | |
441 | fHelperParams[i].fReset = false; | |
442 | fHelperParams[i].fNum = i; | |
443 | fHelperParams[i].fMutex = malloc(2 * sizeof(pthread_mutex_t)); | |
444 | if (fHelperParams[i].fMutex == NULL) | |
445 | { | |
446 | HLTError("Memory allocation error"); | |
447 | ExitGPU(); | |
448 | return(1); | |
449 | } | |
450 | for (int j = 0;j < 2;j++) | |
451 | { | |
452 | if (pthread_mutex_init(&((pthread_mutex_t*) fHelperParams[i].fMutex)[j], NULL)) | |
453 | { | |
454 | HLTError("Error creating pthread mutex"); | |
455 | ExitGPU(); | |
456 | return(1); | |
457 | } | |
458 | ||
459 | pthread_mutex_lock(&((pthread_mutex_t*) fHelperParams[i].fMutex)[j]); | |
460 | } | |
461 | fHelperParams[i].fThreadId = (void*) malloc(sizeof(pthread_t)); | |
462 | ||
463 | if (pthread_create((pthread_t*) fHelperParams[i].fThreadId, NULL, helperWrapper, &fHelperParams[i])) | |
464 | { | |
465 | HLTError("Error starting slave thread"); | |
466 | ExitGPU(); | |
467 | return(1); | |
468 | } | |
469 | } | |
470 | } | |
471 | fNSlaveThreads = nThreads; | |
472 | return(0); | |
473 | } | |
474 | ||
475 | int AliHLTTPCCAGPUTrackerBase::StopHelperThreads() | |
476 | { | |
477 | if (fNSlaveThreads) | |
478 | { | |
479 | for (int i = 0;i < fNSlaveThreads;i++) | |
480 | { | |
481 | fHelperParams[i].fTerminate = true; | |
482 | if (pthread_mutex_unlock(&((pthread_mutex_t*) fHelperParams[i].fMutex)[0])) | |
483 | { | |
484 | HLTError("Error unlocking mutex to terminate slave"); | |
485 | return(1); | |
486 | } | |
487 | if (pthread_mutex_lock(&((pthread_mutex_t*) fHelperParams[i].fMutex)[1])) | |
488 | { | |
489 | HLTError("Error locking mutex"); | |
490 | return(1); | |
491 | } | |
492 | if (pthread_join( *((pthread_t*) fHelperParams[i].fThreadId), NULL)) | |
493 | { | |
494 | HLTError("Error waiting for thread to terminate"); | |
495 | return(1); | |
496 | } | |
497 | free(fHelperParams[i].fThreadId); | |
498 | for (int j = 0;j < 2;j++) | |
499 | { | |
500 | if (pthread_mutex_unlock(&((pthread_mutex_t*) fHelperParams[i].fMutex)[j])) | |
501 | { | |
502 | HLTError("Error unlocking mutex before destroying"); | |
503 | return(1); | |
504 | } | |
505 | pthread_mutex_destroy(&((pthread_mutex_t*) fHelperParams[i].fMutex)[j]); | |
506 | } | |
507 | free(fHelperParams[i].fMutex); | |
508 | } | |
509 | delete[] fHelperParams; | |
510 | } | |
511 | fNSlaveThreads = 0; | |
512 | return(0); | |
513 | } | |
514 | ||
515 | void AliHLTTPCCAGPUTrackerBase::SetOutputControl( AliHLTTPCCASliceOutput::outputControlStruct* val) | |
516 | { | |
517 | //Set Output Control Pointers | |
518 | fOutputControl = val; | |
519 | for (int i = 0;i < fgkNSlices;i++) | |
520 | { | |
521 | fSlaveTrackers[i].SetOutputControl(val); | |
522 | } | |
523 | } | |
524 | ||
525 | int AliHLTTPCCAGPUTrackerBase::GetThread() | |
526 | { | |
527 | //Get Thread ID | |
528 | #ifdef R__WIN32 | |
529 | return((int) (size_t) GetCurrentThread()); | |
530 | #else | |
531 | return((int) syscall (SYS_gettid)); | |
532 | #endif | |
533 | } | |
534 | ||
535 | unsigned long long int* AliHLTTPCCAGPUTrackerBase::PerfTimer(int iSlice, unsigned int i) | |
536 | { | |
537 | //Returns pointer to PerfTimer i of slice iSlice | |
538 | return(fSlaveTrackers ? fSlaveTrackers[iSlice].PerfTimer(i) : NULL); | |
539 | } | |
540 | ||
541 | const AliHLTTPCCASliceOutput::outputControlStruct* AliHLTTPCCAGPUTrackerBase::OutputControl() const | |
542 | { | |
543 | //Return Pointer to Output Control Structure | |
544 | return fOutputControl; | |
545 | } | |
546 | ||
547 | int AliHLTTPCCAGPUTrackerBase::GetSliceCount() const | |
548 | { | |
549 | //Return max slice count processable | |
550 | return(fSliceCount); | |
551 | } | |
552 | ||
553 | char* AliHLTTPCCAGPUTrackerBase::MergerBaseMemory() | |
554 | { | |
555 | return(alignPointer((char*) fGPUMergerHostMemory, 1024 * 1024)); | |
556 | } | |
557 | ||
558 | int AliHLTTPCCAGPUTrackerBase::IsInitialized() | |
559 | { | |
560 | return(fCudaInitialized); | |
561 | } | |
562 | ||
563 | int AliHLTTPCCAGPUTrackerBase::InitGPU(int sliceCount, int forceDeviceID) | |
564 | { | |
565 | #if defined(HLTCA_STANDALONE) & !defined(_WIN32) | |
566 | cpu_set_t mask; | |
567 | CPU_ZERO(&mask); | |
568 | CPU_SET(0, &mask); | |
569 | //sched_setaffinity(0, sizeof(mask), &mask); | |
570 | #endif | |
571 | ||
572 | if (sliceCount == -1) sliceCount = fSliceCount; | |
573 | ||
574 | if (CheckMemorySizes(sliceCount)) return(1); | |
575 | ||
576 | #ifdef R__WIN32 | |
577 | HANDLE* semLock = new HANDLE; | |
578 | *semLock = CreateSemaphore(NULL, 1, 1, SemLockName); | |
579 | if (*semLock == NULL) | |
580 | { | |
581 | HLTError("Error creating GPUInit Semaphore"); | |
582 | return(1); | |
583 | } | |
584 | WaitForSingleObject(*semLock, INFINITE); | |
585 | #else | |
586 | sem_t* semLock = sem_open(SemLockName, O_CREAT, 0x01B6, 1); | |
587 | if (semLock == SEM_FAILED) | |
588 | { | |
589 | HLTError("Error creating GPUInit Semaphore"); | |
590 | return(1); | |
591 | } | |
592 | timespec semtime; | |
593 | clock_gettime(CLOCK_REALTIME, &semtime); | |
594 | semtime.tv_sec += 10; | |
595 | while (sem_timedwait(semLock, &semtime) != 0) | |
596 | { | |
597 | HLTError("Global Lock for GPU initialisation was not released for 10 seconds, assuming another thread died"); | |
598 | HLTWarning("Resetting the global lock"); | |
599 | sem_post(semLock); | |
600 | } | |
601 | #endif | |
602 | ||
603 | fThreadId = GetThread(); | |
604 | ||
605 | fGPUMemSize = HLTCA_GPU_ROWS_MEMORY + HLTCA_GPU_COMMON_MEMORY + sliceCount * (HLTCA_GPU_SLICE_DATA_MEMORY + HLTCA_GPU_GLOBAL_MEMORY); | |
606 | ||
607 | #ifdef HLTCA_GPU_MERGER | |
608 | fGPUMergerMaxMemory = 2000000 * 5 * sizeof(float); | |
609 | fGPUMemSize += fGPUMergerMaxMemory; | |
610 | #endif | |
611 | ||
612 | int retVal = InitGPU_Runtime(sliceCount, forceDeviceID); | |
613 | ReleaseGlobalLock(semLock); | |
614 | ||
615 | if (retVal) | |
616 | { | |
617 | HLTImportant("GPU Tracker initialization failed"); | |
618 | return(1); | |
619 | } | |
620 | ||
621 | fSliceCount = sliceCount; | |
622 | //Don't run constructor / destructor here, this will be just local memcopy of Tracker in GPU Memory | |
623 | fGpuTracker = (AliHLTTPCCATracker*) TrackerMemory(fHostLockedMemory, 0); | |
624 | ||
625 | for (int i = 0;i < fgkNSlices;i++) | |
626 | { | |
627 | fSlaveTrackers[i].SetGPUTracker(); | |
628 | fSlaveTrackers[i].SetGPUTrackerCommonMemory((char*) CommonMemory(fHostLockedMemory, i)); | |
629 | fSlaveTrackers[i].SetGPUSliceDataMemory(SliceDataMemory(fHostLockedMemory, i), RowMemory(fHostLockedMemory, i)); | |
630 | } | |
631 | ||
632 | if (StartHelperThreads()) return(1); | |
633 | ||
634 | fHelperMemMutex = malloc(sizeof(pthread_mutex_t)); | |
635 | if (fHelperMemMutex == NULL) | |
636 | { | |
637 | HLTError("Memory allocation error"); | |
638 | ExitGPU_Runtime(); | |
639 | return(1); | |
640 | } | |
641 | ||
642 | if (pthread_mutex_init((pthread_mutex_t*) fHelperMemMutex, NULL)) | |
643 | { | |
644 | HLTError("Error creating pthread mutex"); | |
645 | ExitGPU_Runtime(); | |
646 | free(fHelperMemMutex); | |
647 | return(1); | |
648 | } | |
649 | ||
650 | fSliceGlobalMutexes = malloc(sizeof(pthread_mutex_t) * fgkNSlices); | |
651 | if (fSliceGlobalMutexes == NULL) | |
652 | { | |
653 | HLTError("Memory allocation error"); | |
654 | ExitGPU_Runtime(); | |
655 | return(1); | |
656 | } | |
657 | for (int i = 0;i < fgkNSlices;i++) | |
658 | { | |
659 | if (pthread_mutex_init(&((pthread_mutex_t*) fSliceGlobalMutexes)[i], NULL)) | |
660 | { | |
661 | HLTError("Error creating pthread mutex"); | |
662 | ExitGPU_Runtime(); | |
663 | return(1); | |
664 | } | |
665 | } | |
666 | ||
667 | fCudaInitialized = 1; | |
668 | HLTImportant("GPU Tracker initialization successfull"); | |
669 | ||
670 | #if defined(HLTCA_STANDALONE) & !defined(CUDA_DEVICE_EMULATION) | |
671 | if (fDebugLevel < 2 && 0) | |
672 | { | |
673 | //Do one initial run for Benchmark reasons | |
674 | const int useDebugLevel = fDebugLevel; | |
675 | fDebugLevel = 0; | |
676 | AliHLTTPCCAClusterData* tmpCluster = new AliHLTTPCCAClusterData[sliceCount]; | |
677 | ||
678 | std::ifstream fin; | |
679 | ||
680 | AliHLTTPCCAParam tmpParam; | |
681 | AliHLTTPCCASliceOutput::outputControlStruct tmpOutputControl; | |
682 | ||
683 | fin.open("events/settings.dump"); | |
684 | int tmpCount; | |
685 | fin >> tmpCount; | |
686 | for (int i = 0;i < sliceCount;i++) | |
687 | { | |
688 | fSlaveTrackers[i].SetOutputControl(&tmpOutputControl); | |
689 | tmpParam.ReadSettings(fin); | |
690 | InitializeSliceParam(i, tmpParam); | |
691 | } | |
692 | fin.close(); | |
693 | ||
694 | fin.open("eventspbpbc/event.0.dump", std::ifstream::binary); | |
695 | for (int i = 0;i < sliceCount;i++) | |
696 | { | |
697 | tmpCluster[i].StartReading(i, 0); | |
698 | tmpCluster[i].ReadEvent(fin); | |
699 | } | |
700 | fin.close(); | |
701 | ||
702 | AliHLTTPCCASliceOutput **tmpOutput = new AliHLTTPCCASliceOutput*[sliceCount]; | |
703 | memset(tmpOutput, 0, sliceCount * sizeof(AliHLTTPCCASliceOutput*)); | |
704 | ||
705 | Reconstruct(tmpOutput, tmpCluster, 0, sliceCount); | |
706 | for (int i = 0;i < sliceCount;i++) | |
707 | { | |
708 | free(tmpOutput[i]); | |
709 | tmpOutput[i] = NULL; | |
710 | fSlaveTrackers[i].SetOutputControl(NULL); | |
711 | } | |
712 | delete[] tmpOutput; | |
713 | delete[] tmpCluster; | |
714 | fDebugLevel = useDebugLevel; | |
715 | } | |
716 | #endif | |
717 | ||
718 | return(retVal); | |
719 | } | |
720 | ||
721 | int AliHLTTPCCAGPUTrackerBase::ExitGPU() | |
722 | { | |
723 | if (StopHelperThreads()) return(1); | |
724 | pthread_mutex_destroy((pthread_mutex_t*) fHelperMemMutex); | |
725 | free(fHelperMemMutex); | |
726 | ||
727 | for (int i = 0;i < fgkNSlices;i++) pthread_mutex_destroy(&((pthread_mutex_t*) fSliceGlobalMutexes)[i]); | |
728 | free(fSliceGlobalMutexes); | |
729 | ||
730 | return(ExitGPU_Runtime()); | |
731 | } | |
732 | ||
733 | int AliHLTTPCCAGPUTrackerBase::Reconstruct_Base_FinishSlices(AliHLTTPCCASliceOutput** pOutput, int& iSlice, int& firstSlice) | |
734 | { | |
735 | fSlaveTrackers[firstSlice + iSlice].CommonMemory()->fNLocalTracks = fSlaveTrackers[firstSlice + iSlice].CommonMemory()->fNTracks; | |
736 | fSlaveTrackers[firstSlice + iSlice].CommonMemory()->fNLocalTrackHits = fSlaveTrackers[firstSlice + iSlice].CommonMemory()->fNTrackHits; | |
737 | if (fUseGlobalTracking) fSlaveTrackers[firstSlice + iSlice].CommonMemory()->fNTracklets = 1; | |
738 | ||
739 | if (fDebugLevel >= 3) HLTInfo("Data ready for slice %d, helper thread %d", iSlice, iSlice % (fNHelperThreads + 1)); | |
740 | fSliceOutputReady = iSlice; | |
741 | ||
742 | if (fUseGlobalTracking) | |
743 | { | |
744 | if (iSlice % (fgkNSlices / 2) == 2) | |
745 | { | |
746 | int tmpId = iSlice % (fgkNSlices / 2) - 1; | |
747 | if (iSlice >= fgkNSlices / 2) tmpId += fgkNSlices / 2; | |
748 | GlobalTracking(tmpId, 0, NULL); | |
749 | fGlobalTrackingDone[tmpId] = 1; | |
750 | } | |
751 | for (int tmpSlice3a = 0;tmpSlice3a < iSlice;tmpSlice3a += fNHelperThreads + 1) | |
752 | { | |
753 | int tmpSlice3 = tmpSlice3a + 1; | |
754 | if (tmpSlice3 % (fgkNSlices / 2) < 1) tmpSlice3 -= (fgkNSlices / 2); | |
755 | if (tmpSlice3 >= iSlice) break; | |
756 | ||
757 | int sliceLeft = (tmpSlice3 + (fgkNSlices / 2 - 1)) % (fgkNSlices / 2); | |
758 | int sliceRight = (tmpSlice3 + 1) % (fgkNSlices / 2); | |
759 | if (tmpSlice3 >= fgkNSlices / 2) | |
760 | { | |
761 | sliceLeft += fgkNSlices / 2; | |
762 | sliceRight += fgkNSlices / 2; | |
763 | } | |
764 | ||
765 | if (tmpSlice3 % (fgkNSlices / 2) != 1 && fGlobalTrackingDone[tmpSlice3] == 0 && sliceLeft < iSlice && sliceRight < iSlice) | |
766 | { | |
767 | GlobalTracking(tmpSlice3, 0, NULL); | |
768 | fGlobalTrackingDone[tmpSlice3] = 1; | |
769 | } | |
770 | ||
771 | if (fWriteOutputDone[tmpSlice3] == 0 && fSliceLeftGlobalReady[tmpSlice3] && fSliceRightGlobalReady[tmpSlice3]) | |
772 | { | |
773 | WriteOutput(pOutput, firstSlice, tmpSlice3, 0); | |
774 | fWriteOutputDone[tmpSlice3] = 1; | |
775 | } | |
776 | } | |
777 | } | |
778 | else | |
779 | { | |
780 | if (iSlice % (fNHelperThreads + 1) == 0) | |
781 | { | |
782 | WriteOutput(pOutput, firstSlice, iSlice, 0); | |
783 | } | |
784 | } | |
785 | return(0); | |
786 | } | |
787 | ||
788 | int AliHLTTPCCAGPUTrackerBase::Reconstruct_Base_Finalize(AliHLTTPCCASliceOutput** pOutput, char*& tmpMemoryGlobalTracking, int& firstSlice) | |
789 | { | |
790 | if (fUseGlobalTracking) | |
791 | { | |
792 | for (int tmpSlice3a = 0;tmpSlice3a < fgkNSlices;tmpSlice3a += fNHelperThreads + 1) | |
793 | { | |
794 | int tmpSlice3 = (tmpSlice3a + 1); | |
795 | if (tmpSlice3 % (fgkNSlices / 2) < 1) tmpSlice3 -= (fgkNSlices / 2); | |
796 | if (fGlobalTrackingDone[tmpSlice3] == 0) GlobalTracking(tmpSlice3, 0, NULL); | |
797 | } | |
798 | for (int tmpSlice3a = 0;tmpSlice3a < fgkNSlices;tmpSlice3a += fNHelperThreads + 1) | |
799 | { | |
800 | int tmpSlice3 = (tmpSlice3a + 1); | |
801 | if (tmpSlice3 % (fgkNSlices / 2) < 1) tmpSlice3 -= (fgkNSlices / 2); | |
802 | if (fWriteOutputDone[tmpSlice3] == 0) | |
803 | { | |
804 | while (fSliceLeftGlobalReady[tmpSlice3] == 0 || fSliceRightGlobalReady[tmpSlice3] == 0); | |
805 | WriteOutput(pOutput, firstSlice, tmpSlice3, 0); | |
806 | } | |
807 | } | |
808 | } | |
809 | ||
810 | for (int i = 0;i < fNHelperThreads + fNCPUTrackers;i++) | |
811 | { | |
812 | pthread_mutex_lock(&((pthread_mutex_t*) fHelperParams[i].fMutex)[1]); | |
813 | } | |
814 | ||
815 | if (fUseGlobalTracking) | |
816 | { | |
817 | free(tmpMemoryGlobalTracking); | |
818 | if (fDebugLevel >= 3) | |
819 | { | |
820 | for (int iSlice = 0;iSlice < fgkNSlices;iSlice++) | |
821 | { | |
822 | HLTDebug("Slice %d - Tracks: Local %d Global %d - Hits: Local %d Global %d\n", iSlice, fSlaveTrackers[iSlice].CommonMemory()->fNLocalTracks, fSlaveTrackers[iSlice].CommonMemory()->fNTracks, fSlaveTrackers[iSlice].CommonMemory()->fNLocalTrackHits, fSlaveTrackers[iSlice].CommonMemory()->fNTrackHits); | |
823 | } | |
824 | } | |
825 | } | |
826 | ||
827 | StandalonePerfTime(firstSlice, 10); | |
828 | ||
829 | if (fDebugLevel >= 3) HLTInfo("GPU Reconstruction finished"); | |
830 | return(0); | |
831 | } | |
832 | ||
833 | int AliHLTTPCCAGPUTrackerBase::Reconstruct_Base_StartGlobal(AliHLTTPCCASliceOutput** pOutput, char*& tmpMemoryGlobalTracking) | |
834 | { | |
835 | if (fUseGlobalTracking) | |
836 | { | |
837 | int tmpmemSize = sizeof(AliHLTTPCCATracklet) | |
838 | #ifdef EXTERN_ROW_HITS | |
839 | + HLTCA_ROW_COUNT * sizeof(int) | |
840 | #endif | |
841 | + 16; | |
842 | tmpMemoryGlobalTracking = (char*) malloc(tmpmemSize * fgkNSlices); | |
843 | for (int i = 0;i < fgkNSlices;i++) | |
844 | { | |
845 | fSliceLeftGlobalReady[i] = 0; | |
846 | fSliceRightGlobalReady[i] = 0; | |
847 | } | |
848 | memset(fGlobalTrackingDone, 0, fgkNSlices); | |
849 | memset(fWriteOutputDone, 0, fgkNSlices); | |
850 | ||
851 | for (int iSlice = 0;iSlice < fgkNSlices;iSlice++) | |
852 | { | |
853 | fSlaveTrackers[iSlice].SetGPUTrackerTrackletsMemory(tmpMemoryGlobalTracking + (tmpmemSize * iSlice), 1, fConstructorBlockCount); | |
854 | } | |
855 | } | |
856 | for (int i = 0;i < fNHelperThreads;i++) | |
857 | { | |
858 | fHelperParams[i].fPhase = 1; | |
859 | fHelperParams[i].pOutput = pOutput; | |
860 | pthread_mutex_unlock(&((pthread_mutex_t*) fHelperParams[i].fMutex)[0]); | |
861 | } | |
862 | return(0); | |
863 | } | |
864 | ||
865 | int AliHLTTPCCAGPUTrackerBase::Reconstruct_Base_SliceInit(AliHLTTPCCAClusterData* pClusterData, int& iSlice, int& firstSlice) | |
866 | { | |
867 | StandalonePerfTime(firstSlice + iSlice, 0); | |
868 | ||
869 | //Initialize GPU Slave Tracker | |
870 | if (fDebugLevel >= 3) HLTInfo("Creating Slice Data (Slice %d)", iSlice); | |
871 | if (iSlice % (fNHelperThreads + 1) == 0) | |
872 | { | |
873 | ReadEvent(pClusterData, firstSlice, iSlice, 0); | |
874 | } | |
875 | else | |
876 | { | |
877 | if (fDebugLevel >= 3) HLTInfo("Waiting for helper thread %d", iSlice % (fNHelperThreads + 1) - 1); | |
878 | while(fHelperParams[iSlice % (fNHelperThreads + 1) - 1].fDone < iSlice); | |
879 | } | |
880 | ||
881 | if (fDebugLevel >= 4) | |
882 | { | |
883 | #ifndef BITWISE_COMPATIBLE_DEBUG_OUTPUT | |
884 | *fOutFile << std::endl << std::endl << "Reconstruction: " << iSlice << "/" << sliceCountLocal << " Total Slice: " << fSlaveTrackers[firstSlice + iSlice].Param().ISlice() << " / " << fgkNSlices << std::endl; | |
885 | #endif | |
886 | if (fDebugMask & 1) fSlaveTrackers[firstSlice + iSlice].DumpSliceData(*fOutFile); | |
887 | } | |
888 | ||
889 | if (fSlaveTrackers[firstSlice + iSlice].Data().MemorySize() > HLTCA_GPU_SLICE_DATA_MEMORY RANDOM_ERROR) | |
890 | { | |
891 | HLTError("Insufficiant Slice Data Memory"); | |
892 | ResetHelperThreads(1); | |
893 | return(1); | |
894 | } | |
895 | ||
896 | if (fDebugLevel >= 3) | |
897 | { | |
898 | HLTInfo("GPU Slice Data Memory Used: %d/%d", (int) fSlaveTrackers[firstSlice + iSlice].Data().MemorySize(), HLTCA_GPU_SLICE_DATA_MEMORY); | |
899 | } | |
900 | return(0); | |
901 | } | |
902 | ||
903 | int AliHLTTPCCAGPUTrackerBase::Reconstruct_Base_Init(AliHLTTPCCASliceOutput** pOutput, AliHLTTPCCAClusterData* pClusterData, int& firstSlice, int& sliceCountLocal) | |
904 | { | |
905 | if (sliceCountLocal == -1) sliceCountLocal = fSliceCount; | |
906 | ||
907 | if (!fCudaInitialized) | |
908 | { | |
909 | HLTError("GPUTracker not initialized"); | |
910 | return(1); | |
911 | } | |
912 | if (sliceCountLocal > fSliceCount) | |
913 | { | |
914 | HLTError("GPU Tracker was initialized to run with %d slices but was called to process %d slices", fSliceCount, sliceCountLocal); | |
915 | return(1); | |
916 | } | |
917 | if (fThreadId != GetThread()) | |
918 | { | |
b19cbd6d | 919 | HLTDebug("CUDA thread changed, migrating context, Previous Thread: %d, New Thread: %d", fThreadId, GetThread()); |
d3821846 | 920 | fThreadId = GetThread(); |
921 | } | |
922 | ||
923 | if (fDebugLevel >= 2) HLTInfo("Running GPU Tracker (Slices %d to %d)", fSlaveTrackers[firstSlice].Param().ISlice(), fSlaveTrackers[firstSlice].Param().ISlice() + sliceCountLocal); | |
924 | ||
925 | if (sliceCountLocal * sizeof(AliHLTTPCCATracker) > HLTCA_GPU_TRACKER_CONSTANT_MEM) | |
926 | { | |
927 | HLTError("Insuffissant constant memory (Required %d, Available %d, Tracker %d, Param %d, SliceData %d)", sliceCountLocal * (int) sizeof(AliHLTTPCCATracker), (int) HLTCA_GPU_TRACKER_CONSTANT_MEM, (int) sizeof(AliHLTTPCCATracker), (int) sizeof(AliHLTTPCCAParam), (int) sizeof(AliHLTTPCCASliceData)); | |
928 | return(1); | |
929 | } | |
930 | ||
931 | ActivateThreadContext(); | |
932 | if (fPPMode) | |
933 | { | |
934 | int retVal = ReconstructPP(pOutput, pClusterData, firstSlice, sliceCountLocal); | |
935 | ReleaseThreadContext(); | |
936 | return(retVal); | |
937 | } | |
938 | ||
939 | for (int i = fNHelperThreads;i < fNCPUTrackers + fNHelperThreads;i++) | |
940 | { | |
941 | fHelperParams[i].CPUTracker = 1; | |
942 | fHelperParams[i].pClusterData = pClusterData; | |
943 | fHelperParams[i].pOutput = pOutput; | |
944 | fHelperParams[i].fSliceCount = sliceCountLocal; | |
945 | fHelperParams[i].fFirstSlice = firstSlice; | |
946 | pthread_mutex_unlock(&((pthread_mutex_t*) fHelperParams[i].fMutex)[0]); | |
947 | } | |
948 | sliceCountLocal -= fNCPUTrackers * fNSlicesPerCPUTracker; | |
949 | if (sliceCountLocal < 0) sliceCountLocal = 0; | |
950 | ||
951 | fUseGlobalTracking = fGlobalTracking && sliceCountLocal == fgkNSlices; | |
952 | ||
953 | memcpy(fGpuTracker, &fSlaveTrackers[firstSlice], sizeof(AliHLTTPCCATracker) * sliceCountLocal); | |
954 | ||
955 | if (fDebugLevel >= 3) HLTInfo("Allocating GPU Tracker memory and initializing constants"); | |
956 | ||
957 | #ifdef HLTCA_GPU_TIME_PROFILE | |
958 | AliHLTTPCCATracker::StandaloneQueryFreq(&fProfTimeC); | |
959 | AliHLTTPCCATracker::StandaloneQueryTime(&fProfTimeD); | |
960 | #endif | |
961 | ||
962 | for (int iSlice = 0;iSlice < sliceCountLocal;iSlice++) | |
963 | { | |
964 | //Make this a GPU Tracker | |
965 | fGpuTracker[iSlice].SetGPUTracker(); | |
966 | fGpuTracker[iSlice].SetGPUTrackerCommonMemory((char*) CommonMemory(fGPUMemory, iSlice)); | |
967 | fGpuTracker[iSlice].SetGPUSliceDataMemory(SliceDataMemory(fGPUMemory, iSlice), RowMemory(fGPUMemory, iSlice)); | |
968 | fGpuTracker[iSlice].SetPointersSliceData(&pClusterData[iSlice], false); | |
969 | fGpuTracker[iSlice].GPUParametersConst()->fGPUMem = (char*) fGPUMemory; | |
970 | ||
971 | //Set Pointers to GPU Memory | |
972 | char* tmpMem = (char*) GlobalMemory(fGPUMemory, iSlice); | |
973 | ||
974 | if (fDebugLevel >= 3) HLTInfo("Initialising GPU Hits Memory"); | |
975 | tmpMem = fGpuTracker[iSlice].SetGPUTrackerHitsMemory(tmpMem, pClusterData[iSlice].NumberOfClusters()); | |
976 | tmpMem = alignPointer(tmpMem, 1024 * 1024); | |
977 | ||
978 | if (fDebugLevel >= 3) HLTInfo("Initialising GPU Tracklet Memory"); | |
979 | tmpMem = fGpuTracker[iSlice].SetGPUTrackerTrackletsMemory(tmpMem, HLTCA_GPU_MAX_TRACKLETS, fConstructorBlockCount); | |
980 | tmpMem = alignPointer(tmpMem, 1024 * 1024); | |
981 | ||
982 | if (fDebugLevel >= 3) HLTInfo("Initialising GPU Track Memory"); | |
983 | tmpMem = fGpuTracker[iSlice].SetGPUTrackerTracksMemory(tmpMem, HLTCA_GPU_MAX_TRACKS, pClusterData[iSlice].NumberOfClusters()); | |
984 | tmpMem = alignPointer(tmpMem, 1024 * 1024); | |
985 | ||
986 | if (fGpuTracker[iSlice].TrackMemorySize() >= HLTCA_GPU_TRACKS_MEMORY RANDOM_ERROR) | |
987 | { | |
988 | HLTError("Insufficiant Track Memory"); | |
989 | ResetHelperThreads(0); | |
990 | return(1); | |
991 | } | |
992 | ||
993 | if (tmpMem - (char*) GlobalMemory(fGPUMemory, iSlice) > HLTCA_GPU_GLOBAL_MEMORY RANDOM_ERROR) | |
994 | { | |
995 | HLTError("Insufficiant Global Memory"); | |
996 | ResetHelperThreads(0); | |
997 | return(1); | |
998 | } | |
999 | ||
1000 | if (fDebugLevel >= 3) | |
1001 | { | |
1002 | HLTInfo("GPU Global Memory Used: %d/%d, Page Locked Tracks Memory used: %d / %d", (int) (tmpMem - (char*) GlobalMemory(fGPUMemory, iSlice)), HLTCA_GPU_GLOBAL_MEMORY, (int) fGpuTracker[iSlice].TrackMemorySize(), HLTCA_GPU_TRACKS_MEMORY); | |
1003 | } | |
1004 | ||
1005 | //Initialize Startup Constants | |
1006 | *fSlaveTrackers[firstSlice + iSlice].NTracklets() = 0; | |
1007 | *fSlaveTrackers[firstSlice + iSlice].NTracks() = 0; | |
1008 | *fSlaveTrackers[firstSlice + iSlice].NTrackHits() = 0; | |
1009 | fGpuTracker[iSlice].GPUParametersConst()->fGPUFixedBlockCount = sliceCountLocal > fConstructorBlockCount ? (iSlice < fConstructorBlockCount) : fConstructorBlockCount * (iSlice + 1) / sliceCountLocal - fConstructorBlockCount * (iSlice) / sliceCountLocal; | |
1010 | if (fDebugLevel >= 3) HLTInfo("Blocks for Slice %d: %d", iSlice, fGpuTracker[iSlice].GPUParametersConst()->fGPUFixedBlockCount); | |
1011 | fGpuTracker[iSlice].GPUParametersConst()->fGPUiSlice = iSlice; | |
1012 | fGpuTracker[iSlice].GPUParametersConst()->fGPUnSlices = sliceCountLocal; | |
1013 | fSlaveTrackers[firstSlice + iSlice].GPUParameters()->fGPUError = 0; | |
1014 | fSlaveTrackers[firstSlice + iSlice].GPUParameters()->fNextTracklet = (fConstructorBlockCount / sliceCountLocal + (fConstructorBlockCount % sliceCountLocal > iSlice)) * HLTCA_GPU_THREAD_COUNT_CONSTRUCTOR; | |
1015 | fGpuTracker[iSlice].SetGPUTextureBase(fGpuTracker[0].Data().Memory()); | |
1016 | } | |
1017 | ||
1018 | for (int i = 0;i < fNHelperThreads;i++) | |
1019 | { | |
1020 | fHelperParams[i].CPUTracker = 0; | |
1021 | fHelperParams[i].fDone = 0; | |
1022 | fHelperParams[i].fPhase = 0; | |
1023 | fHelperParams[i].pClusterData = pClusterData; | |
1024 | fHelperParams[i].fSliceCount = sliceCountLocal; | |
1025 | fHelperParams[i].fFirstSlice = firstSlice; | |
1026 | pthread_mutex_unlock(&((pthread_mutex_t*) fHelperParams[i].fMutex)[0]); | |
1027 | } | |
1028 | ||
1029 | return(0); | |
1030 | } |