#endif
#if defined(R__WIN32)
-#ifdef INTEL_RUNTIME
-#pragma warning(disable : 1786)
-#pragma warning(disable : 1478)
-#pragma warning(disable : 161)
-#endif
-
-#ifdef VSNET_RUNTIME
-#pragma warning(disable : 4616)
-#pragma warning(disable : 4996)
-#pragma warning(disable : 1684)
+#ifdef INTEL_RUNTIME\r
+#pragma warning(disable : 1786)\r
+#pragma warning(disable : 1478)\r
+#pragma warning(disable : 161)\r
+#endif\r
+\r
+#ifdef VSNET_RUNTIME\r
+#pragma warning(disable : 4616)\r
+#pragma warning(disable : 4996)\r
+#pragma warning(disable : 1684)\r
#endif
#endif
#define HLTCA_GPU_TEXTURE_FETCH
//#define HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
+//#define HLTCA_GPU_TIME_PROFILE
#define HLTCA_GPU_TRACKLET_SELECTOR_HITS_REG_SIZE 12
#define HLTCA_GPU_TRACKLET_SELECTOR_SLICE_COUNT 3 //Currently must be smaller than avaiable MultiProcessors on GPU or will result in wrong results
+++ /dev/null
-// **************************************************************************
-// This file is property of and copyright by the ALICE HLT Project *
-// ALICE Experiment at CERN, All rights reserved. *
-// *
-// Primary Authors: Sergey Gorbunov <sergey.gorbunov@kip.uni-heidelberg.de> *
-// Ivan Kisel <kisel@kip.uni-heidelberg.de> *
-// David Rohr <drohr@kip.uni-heidelberg.de> *
-// for The ALICE HLT Project. *
-// *
-// Permission to use, copy, modify and distribute this software and its *
-// documentation strictly for non-commercial purposes is hereby granted *
-// without fee, provided that the above copyright notice appears in all *
-// copies and that both the copyright notice and this permission notice *
-// appear in the supporting documentation. The authors make no claims *
-// about the suitability of this software for any purpose. It is *
-// provided "as is" without express or implied warranty. *
-// *
-//***************************************************************************
-
-#include "AliHLTTPCCADef.h"
-#include "AliHLTTPCCAGPUConfig.h"
-
-#include <sm_11_atomic_functions.h>
-#include <sm_12_atomic_functions.h>
-
-#include <iostream>
-
-//Disable assertions since they produce errors in GPU Code
-#ifdef assert
-#undef assert
-#endif
-#define assert(param)
-
-#include "AliHLTTPCCAGPUTracker.h"
-
-__constant__ float4 gAliHLTTPCCATracker[HLTCA_GPU_TRACKER_CONSTANT_MEM / sizeof( float4 )];
-#ifdef HLTCA_GPU_TEXTURE_FETCH
-texture<ushort2, 1, cudaReadModeElementType> gAliTexRefu2;
-texture<unsigned short, 1, cudaReadModeElementType> gAliTexRefu;
-texture<signed short, 1, cudaReadModeElementType> gAliTexRefs;
-#endif
-
-#include "AliHLTTPCCAHit.h"
-
-//Include CXX Files, GPUd() macro will then produce CUDA device code out of the tracker source code
-#include "AliHLTTPCCATrackParam.cxx"
-#include "AliHLTTPCCATrack.cxx"
-
-#include "AliHLTTPCCATrackletSelector.cxx"
-
-#include "AliHLTTPCCAHitArea.cxx"
-#include "AliHLTTPCCAGrid.cxx"
-#include "AliHLTTPCCARow.cxx"
-#include "AliHLTTPCCAParam.cxx"
-#include "AliHLTTPCCATracker.cxx"
-
-#include "AliHLTTPCCAOutTrack.cxx"
-
-#include "AliHLTTPCCAProcess.h"
-
-#include "AliHLTTPCCANeighboursFinder.cxx"
-
-#include "AliHLTTPCCANeighboursCleaner.cxx"
-#include "AliHLTTPCCAStartHitsFinder.cxx"
-#include "AliHLTTPCCAStartHitsSorter.cxx"
-#include "AliHLTTPCCATrackletConstructor.cxx"
-#include "AliHLTTPCCASliceOutput.cxx"
-
-#include "MemoryAssignmentHelpers.h"
-
-#ifndef HLTCA_STANDALONE
-#include "AliHLTDefinitions.h"
-#include "AliHLTSystem.h"
-#endif
-
-ClassImp( AliHLTTPCCAGPUTracker )
-
-int AliHLTTPCCAGPUTracker::InitGPU(int sliceCount, int forceDeviceID)
-{
- //Find best CUDA device, initialize and allocate memory
-
- cudaDeviceProp fCudaDeviceProp;
-
-#ifndef CUDA_DEVICE_EMULATION
- int count, bestDevice = -1, bestDeviceSpeed = 0;
- if (CudaFailedMsg(cudaGetDeviceCount(&count)))
- {
- HLTError("Error getting CUDA Device Count");
- return(1);
- }
- if (fDebugLevel >= 2) std::cout << "Available CUDA devices: ";
- for (int i = 0;i < count;i++)
- {
- cudaGetDeviceProperties(&fCudaDeviceProp, i);
- if (fDebugLevel >= 2) std::cout << fCudaDeviceProp.name << " (" << i << ") ";
- if (fCudaDeviceProp.major < 9 && !(fCudaDeviceProp.major < 1 || (fCudaDeviceProp.major == 1 && fCudaDeviceProp.minor < 2)) && fCudaDeviceProp.multiProcessorCount * fCudaDeviceProp.clockRate > bestDeviceSpeed)
- {
- bestDevice = i;
- bestDeviceSpeed = fCudaDeviceProp.multiProcessorCount * fCudaDeviceProp.clockRate;
- }
- }
- if (fDebugLevel >= 2) std::cout << std::endl;
-
- if (bestDevice == -1)
- {
- HLTWarning("No CUDA Device available, aborting CUDA Initialisation");
- return(1);
- }
-
- int cudaDevice;
- if (forceDeviceID == -1)
- cudaDevice = bestDevice;
- else
- cudaDevice = forceDeviceID;
-#else
- int cudaDevice = 0;
-#endif
-
- cudaGetDeviceProperties(&fCudaDeviceProp ,cudaDevice );
-
- if (fDebugLevel >= 1)
- {
- std::cout<<"CUDA Device Properties: "<<std::endl;
- std::cout<<"name = "<<fCudaDeviceProp.name<<std::endl;
- std::cout<<"totalGlobalMem = "<<fCudaDeviceProp.totalGlobalMem<<std::endl;
- std::cout<<"sharedMemPerBlock = "<<fCudaDeviceProp.sharedMemPerBlock<<std::endl;
- std::cout<<"regsPerBlock = "<<fCudaDeviceProp.regsPerBlock<<std::endl;
- std::cout<<"warpSize = "<<fCudaDeviceProp.warpSize<<std::endl;
- std::cout<<"memPitch = "<<fCudaDeviceProp.memPitch<<std::endl;
- std::cout<<"maxThreadsPerBlock = "<<fCudaDeviceProp.maxThreadsPerBlock<<std::endl;
- std::cout<<"maxThreadsDim = "<<fCudaDeviceProp.maxThreadsDim[0]<<" "<<fCudaDeviceProp.maxThreadsDim[1]<<" "<<fCudaDeviceProp.maxThreadsDim[2]<<std::endl;
- std::cout<<"maxGridSize = " <<fCudaDeviceProp.maxGridSize[0]<<" "<<fCudaDeviceProp.maxGridSize[1]<<" "<<fCudaDeviceProp.maxGridSize[2]<<std::endl;
- std::cout<<"totalConstMem = "<<fCudaDeviceProp.totalConstMem<<std::endl;
- std::cout<<"major = "<<fCudaDeviceProp.major<<std::endl;
- std::cout<<"minor = "<<fCudaDeviceProp.minor<<std::endl;
- std::cout<<"clockRate = "<<fCudaDeviceProp.clockRate<<std::endl;
- std::cout<<"textureAlignment = "<<fCudaDeviceProp.textureAlignment<<std::endl;
- }
-
- if (fCudaDeviceProp.major < 1 || (fCudaDeviceProp.major == 1 && fCudaDeviceProp.minor < 2))
- {
- HLTError( "Unsupported CUDA Device" );
- return(1);
- }
-
- if (CudaFailedMsg(cudaSetDevice(cudaDevice)))
- {
- HLTError("Could not set CUDA Device!");
- return(1);
- }
-
- if (fgkNSlices * AliHLTTPCCATracker::CommonMemorySize() > HLTCA_GPU_COMMON_MEMORY)
- {
- HLTError("Insufficiant Common Memory");
- cudaThreadExit();
- return(1);
- }
-
- if (fgkNSlices * (HLTCA_ROW_COUNT + 1) * sizeof(AliHLTTPCCARow) > HLTCA_GPU_ROWS_MEMORY)
- {
- HLTError("Insufficiant Row Memory");
- cudaThreadExit();
- return(1);
- }
-
- fGPUMemSize = HLTCA_GPU_ROWS_MEMORY + HLTCA_GPU_COMMON_MEMORY + sliceCount * (HLTCA_GPU_SLICE_DATA_MEMORY + HLTCA_GPU_GLOBAL_MEMORY);
- if (fGPUMemSize > fCudaDeviceProp.totalGlobalMem || CudaFailedMsg(cudaMalloc(&fGPUMemory, (size_t) fGPUMemSize)))
- {
- HLTError("CUDA Memory Allocation Error");
- cudaThreadExit();
- return(1);
- }
- if (fDebugLevel >= 1) HLTInfo("GPU Memory used: %d", (int) fGPUMemSize);
- int hostMemSize = HLTCA_GPU_ROWS_MEMORY + HLTCA_GPU_COMMON_MEMORY + sliceCount * (HLTCA_GPU_SLICE_DATA_MEMORY + HLTCA_GPU_TRACKS_MEMORY) + HLTCA_GPU_TRACKER_OBJECT_MEMORY;
- if (CudaFailedMsg(cudaMallocHost(&fHostLockedMemory, hostMemSize)))
- {
- cudaFree(fGPUMemory);\r
- cudaThreadExit();\r
- HLTError("Error allocating Page Locked Host Memory");
- return(1);
- }
- if (fDebugLevel >= 1) HLTInfo("Host Memory used: %d", hostMemSize);
-
- if (fDebugLevel >= 1)
- {
- CudaFailedMsg(cudaMemset(fGPUMemory, 143, (size_t) fGPUMemSize));
- }
- HLTInfo("CUDA Initialisation successfull");
-
- //Don't run constructor / destructor here, this will be just local memcopy of Tracker in GPU Memory
- if (sizeof(AliHLTTPCCATracker) * sliceCount > HLTCA_GPU_TRACKER_OBJECT_MEMORY)
- {
- HLTError("Insufficiant Tracker Object Memory");
- return(1);
- }
- fSliceCount = sliceCount;
- fGpuTracker = (AliHLTTPCCATracker*) TrackerMemory(fHostLockedMemory, 0);
-
- for (int i = 0;i < fgkNSlices;i++)
- {
- fSlaveTrackers[i].SetGPUTracker();
- fSlaveTrackers[i].SetGPUTrackerCommonMemory((char*) CommonMemory(fHostLockedMemory, i));
- fSlaveTrackers[i].pData()->SetGPUSliceDataMemory(SliceDataMemory(fHostLockedMemory, i), RowMemory(fHostLockedMemory, i));
- }
-
- fpCudaStreams = malloc(CAMath::Max(3, fSliceCount) * sizeof(cudaStream_t));
- cudaStream_t* const cudaStreams = (cudaStream_t*) fpCudaStreams;
- for (int i = 0;i < CAMath::Max(3, fSliceCount);i++)
- {
- if (CudaFailedMsg(cudaStreamCreate(&cudaStreams[i])))
- {
- HLTError("Error creating CUDA Stream");
- return(1);
- }
- }
-
-#if defined(HLTCA_STANDALONE) & !defined(CUDA_DEVICE_EMULATION)
- if (fDebugLevel < 2)
- {
- //Do one initial run for Benchmark reasons
- const int useDebugLevel = fDebugLevel;
- fDebugLevel = 0;
- AliHLTTPCCAClusterData tmpCluster;
- AliHLTTPCCASliceOutput *tmpOutput = NULL;
- AliHLTTPCCAParam tmpParam;
- tmpParam.SetNRows(HLTCA_ROW_COUNT);
- fSlaveTrackers[0].SetParam(tmpParam);
- Reconstruct(&tmpOutput, &tmpCluster, 0, 1);
- free(tmpOutput);
- fDebugLevel = useDebugLevel;
- }
-#endif
- return(0);
-}
-
-template <class T> inline T* AliHLTTPCCAGPUTracker::alignPointer(T* ptr, int alignment)
-{
- //Macro to align Pointers.
- //Will align to start at 1 MB segments, this should be consistent with every alignment in the tracker
- //(As long as every single data structure is <= 1 MB)
-
- size_t adr = (size_t) ptr;
- if (adr % alignment)
- {
- adr += alignment - (adr % alignment);
- }
- return((T*) adr);
-}
-
-bool AliHLTTPCCAGPUTracker::CudaFailedMsg(cudaError_t error)
-{
- //Check for CUDA Error and in the case of an error display the corresponding error string
- if (error == cudaSuccess) return(false);
- HLTWarning("CUDA Error: %d / %s", error, cudaGetErrorString(error));
- return(true);
-}
-
-int AliHLTTPCCAGPUTracker::CUDASync(char* state)
-{
- //Wait for CUDA-Kernel to finish and check for CUDA errors afterwards
-
- if (fDebugLevel == 0) return(0);
- cudaError cuErr;
- cuErr = cudaGetLastError();
- if (cuErr != cudaSuccess)
- {
- HLTError("Cuda Error %s while invoking kernel (%s)", cudaGetErrorString(cuErr), state);
- return(1);
- }
- if (CudaFailedMsg(cudaThreadSynchronize()))
- {
- HLTError("CUDA Error while synchronizing (%s)", state);
- return(1);
- }
- if (fDebugLevel >= 5) HLTInfo("CUDA Sync Done");
- return(0);
-}
-
-void AliHLTTPCCAGPUTracker::SetDebugLevel(const int dwLevel, std::ostream* const NewOutFile)
-{
- //Set Debug Level and Debug output File if applicable
- fDebugLevel = dwLevel;
- if (NewOutFile) fOutFile = NewOutFile;
-}
-
-int AliHLTTPCCAGPUTracker::SetGPUTrackerOption(char* OptionName, int OptionValue)
-{
- //Set a specific GPU Tracker Option
- {
- HLTError("Unknown Option: %s", OptionName);
- return(1);
- }
- //No Options used at the moment
- //return(0);
-}
-
-#ifdef HLTCA_STANDALONE
-void AliHLTTPCCAGPUTracker::StandalonePerfTime(int iSlice, int i)
-{
- //Run Performance Query for timer i of slice iSlice
- if (fDebugLevel >= 1)
- {
- AliHLTTPCCAStandaloneFramework::StandaloneQueryTime( fSlaveTrackers[iSlice].PerfTimer(i));
- }
-}
-#else
-void AliHLTTPCCAGPUTracker::StandalonePerfTime(int /*iSlice*/, int /*i*/) {}
-#endif
-
-void AliHLTTPCCAGPUTracker::DumpRowBlocks(AliHLTTPCCATracker* tracker, int iSlice, bool check)
-{
- //Dump Rowblocks to File
- if (fDebugLevel >= 4)
- {
- *fOutFile << "RowBlock Tracklets" << std::endl;
-
- int4* rowBlockPos = (int4*) malloc(sizeof(int4) * (tracker[iSlice].Param().NRows() / HLTCA_GPU_SCHED_ROW_STEP + 1) * 2);
- int* rowBlockTracklets = (int*) malloc(sizeof(int) * (tracker[iSlice].Param().NRows() / HLTCA_GPU_SCHED_ROW_STEP + 1) * HLTCA_GPU_MAX_TRACKLETS * 2);
- uint2* blockStartingTracklet = (uint2*) malloc(sizeof(uint2) * HLTCA_GPU_BLOCK_COUNT);
- CudaFailedMsg(cudaMemcpy(rowBlockPos, fGpuTracker[iSlice].RowBlockPos(), sizeof(int4) * (tracker[iSlice].Param().NRows() / HLTCA_GPU_SCHED_ROW_STEP + 1) * 2, cudaMemcpyDeviceToHost));
- CudaFailedMsg(cudaMemcpy(rowBlockTracklets, fGpuTracker[iSlice].RowBlockTracklets(), sizeof(int) * (tracker[iSlice].Param().NRows() / HLTCA_GPU_SCHED_ROW_STEP + 1) * HLTCA_GPU_MAX_TRACKLETS * 2, cudaMemcpyDeviceToHost));
- CudaFailedMsg(cudaMemcpy(blockStartingTracklet, fGpuTracker[iSlice].BlockStartingTracklet(), sizeof(uint2) * HLTCA_GPU_BLOCK_COUNT, cudaMemcpyDeviceToHost));
- CudaFailedMsg(cudaMemcpy(tracker[iSlice].CommonMemory(), fGpuTracker[iSlice].CommonMemory(), fGpuTracker[iSlice].CommonMemorySize(), cudaMemcpyDeviceToHost));
-
- int k = tracker[iSlice].GPUParameters()->fScheduleFirstDynamicTracklet;
- for (int i = 0; i < tracker[iSlice].Param().NRows() / HLTCA_GPU_SCHED_ROW_STEP + 1;i++)
- {
- *fOutFile << "Rowblock: " << i << ", up " << rowBlockPos[i].y << "/" << rowBlockPos[i].x << ", down " <<
- rowBlockPos[tracker[iSlice].Param().NRows() / HLTCA_GPU_SCHED_ROW_STEP + 1 + i].y << "/" << rowBlockPos[tracker[iSlice].Param().NRows() / HLTCA_GPU_SCHED_ROW_STEP + 1 + i].x << endl << "Phase 1: ";
- for (int j = 0;j < rowBlockPos[i].x;j++)
- {
- //Use Tracker Object to calculate Offset instead of fGpuTracker, since *fNTracklets of fGpuTracker points to GPU Mem!
- *fOutFile << rowBlockTracklets[(tracker[iSlice].RowBlockTracklets(0, i) - tracker[iSlice].RowBlockTracklets(0, 0)) + j] << ", ";
- if (check && rowBlockTracklets[(tracker[iSlice].RowBlockTracklets(0, i) - tracker[iSlice].RowBlockTracklets(0, 0)) + j] != k)
- {
- HLTError("Wrong starting Row Block %d, entry %d, is %d, should be %d", i, j, rowBlockTracklets[(tracker[iSlice].RowBlockTracklets(0, i) - tracker[iSlice].RowBlockTracklets(0, 0)) + j], k);
- }
- k++;
- if (rowBlockTracklets[(tracker[iSlice].RowBlockTracklets(0, i) - tracker[iSlice].RowBlockTracklets(0, 0)) + j] == -1)
- {
- HLTError("Error, -1 Tracklet found");
- }
- }
- *fOutFile << endl << "Phase 2: ";
- for (int j = 0;j < rowBlockPos[tracker[iSlice].Param().NRows() / HLTCA_GPU_SCHED_ROW_STEP + 1 + i].x;j++)
- {
- *fOutFile << rowBlockTracklets[(tracker[iSlice].RowBlockTracklets(1, i) - tracker[iSlice].RowBlockTracklets(0, 0)) + j] << ", ";
- }
- *fOutFile << endl;
- }
-
- if (check)
- {
- *fOutFile << "Starting Threads: (First Dynamic: " << tracker[iSlice].GPUParameters()->fScheduleFirstDynamicTracklet << ")" << std::endl;
- for (int i = 0;i < HLTCA_GPU_BLOCK_COUNT;i++)
- {
- *fOutFile << i << ": " << blockStartingTracklet[i].x << " - " << blockStartingTracklet[i].y << std::endl;
- }
- }
-
- free(rowBlockPos);
- free(rowBlockTracklets);
- free(blockStartingTracklet);
- }
-}
-
-__global__ void PreInitRowBlocks(int4* const RowBlockPos, int* const RowBlockTracklets, int* const SliceDataHitWeights, int nSliceDataHits)
-{
- //Initialize GPU RowBlocks and HitWeights
- int4* const rowBlockTracklets4 = (int4*) RowBlockTracklets;
- int4* const sliceDataHitWeights4 = (int4*) SliceDataHitWeights;
- const int stride = blockDim.x * gridDim.x;
- int4 i0, i1;
- i0.x = i0.y = i0.z = i0.w = 0;
- i1.x = i1.y = i1.z = i1.w = -1;
- for (int i = blockIdx.x * blockDim.x + threadIdx.x;i < sizeof(int4) * 2 * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1) / sizeof(int4);i += stride)
- RowBlockPos[i] = i0;
- for (int i = blockIdx.x * blockDim.x + threadIdx.x;i < sizeof(int) * (HLTCA_ROW_COUNT / HLTCA_GPU_SCHED_ROW_STEP + 1) * HLTCA_GPU_MAX_TRACKLETS * 2 / sizeof(int4);i += stride)
- rowBlockTracklets4[i] = i1;
- for (int i = blockIdx.x * blockDim.x + threadIdx.x;i < nSliceDataHits * sizeof(int) / sizeof(int4);i += stride)
- sliceDataHitWeights4[i] = i0;
-}
-
-int AliHLTTPCCAGPUTracker::Reconstruct(AliHLTTPCCASliceOutput** pOutput, AliHLTTPCCAClusterData* pClusterData, int firstSlice, int sliceCountLocal)
-{
- //Primary reconstruction function
- cudaStream_t* const cudaStreams = (cudaStream_t*) fpCudaStreams;
-
- if (sliceCountLocal == -1) sliceCountLocal = this->fSliceCount;
-
- if (sliceCountLocal * sizeof(AliHLTTPCCATracker) > HLTCA_GPU_TRACKER_CONSTANT_MEM)
- {
- HLTError("Insuffissant constant memory (Required %d, Available %d, Tracker %d, Param %d, SliceData %d)", sliceCountLocal * (int) sizeof(AliHLTTPCCATracker), (int) HLTCA_GPU_TRACKER_CONSTANT_MEM, (int) sizeof(AliHLTTPCCATracker), (int) sizeof(AliHLTTPCCAParam), (int) sizeof(AliHLTTPCCASliceData));
- return(1);
- }
-
- if (fDebugLevel >= 4)
- {
- for (int iSlice = 0;iSlice < sliceCountLocal;iSlice++)
- {
- *fOutFile << endl << endl << "Slice: " << fSlaveTrackers[firstSlice + iSlice].Param().ISlice() << endl;
- }
- }
-
- memcpy(fGpuTracker, &fSlaveTrackers[firstSlice], sizeof(AliHLTTPCCATracker) * sliceCountLocal);\r
-
- if (fDebugLevel >= 2) HLTInfo("Running GPU Tracker (Slices %d to %d)", fSlaveTrackers[firstSlice].Param().ISlice(), fSlaveTrackers[firstSlice + sliceCountLocal].Param().ISlice());
- if (fDebugLevel >= 5) HLTInfo("Allocating GPU Tracker memory and initializing constants");
-
- for (int iSlice = 0;iSlice < sliceCountLocal;iSlice++)
- {
- //Make this a GPU Tracker
- fGpuTracker[iSlice].SetGPUTracker();
- fGpuTracker[iSlice].SetGPUTrackerCommonMemory((char*) CommonMemory(fGPUMemory, iSlice));
- fGpuTracker[iSlice].pData()->SetGPUSliceDataMemory(SliceDataMemory(fGPUMemory, iSlice), RowMemory(fGPUMemory, iSlice));
- fGpuTracker[iSlice].pData()->SetPointers(&pClusterData[iSlice], false);
-
- //Set Pointers to GPU Memory
- char* tmpMem = (char*) GlobalMemory(fGPUMemory, iSlice);
-
- if (fDebugLevel >= 5) HLTInfo("Initialising GPU Hits Memory");
- tmpMem = fGpuTracker[iSlice].SetGPUTrackerHitsMemory(tmpMem, pClusterData[iSlice].NumberOfClusters());
- tmpMem = alignPointer(tmpMem, 1024 * 1024);
-
- if (fDebugLevel >= 5) HLTInfo("Initialising GPU Tracklet Memory");
- tmpMem = fGpuTracker[iSlice].SetGPUTrackerTrackletsMemory(tmpMem, HLTCA_GPU_MAX_TRACKLETS /* *fSlaveTrackers[firstSlice + iSlice].NTracklets()*/);
- tmpMem = alignPointer(tmpMem, 1024 * 1024);
-
- if (fDebugLevel >= 5) HLTInfo("Initialising GPU Track Memory");
- tmpMem = fGpuTracker[iSlice].SetGPUTrackerTracksMemory(tmpMem, HLTCA_GPU_MAX_TRACKS /* *fSlaveTrackers[firstSlice + iSlice].NTracklets()*/, pClusterData[iSlice].NumberOfClusters());
- tmpMem = alignPointer(tmpMem, 1024 * 1024);
-
- if (fGpuTracker[iSlice].TrackMemorySize() >= HLTCA_GPU_TRACKS_MEMORY)
- {
- HLTError("Insufficiant Track Memory");
- return(1);
- }
-
- if (tmpMem - (char*) GlobalMemory(fGPUMemory, iSlice) > HLTCA_GPU_GLOBAL_MEMORY)
- {
- HLTError("Insufficiant Global Memory");
- return(1);
- }
-
- //Initialize Startup Constants
- *fSlaveTrackers[firstSlice + iSlice].NTracklets() = 0;
- *fSlaveTrackers[firstSlice + iSlice].NTracks() = 0;
- *fSlaveTrackers[firstSlice + iSlice].NTrackHits() = 0;
- fGpuTracker[iSlice].GPUParametersConst()->fGPUFixedBlockCount = HLTCA_GPU_BLOCK_COUNT * (iSlice + 1) / sliceCountLocal - HLTCA_GPU_BLOCK_COUNT * (iSlice) / sliceCountLocal;
- if (fDebugLevel >= 5) HLTInfo("Blocks for Slice %d: %d", iSlice, fGpuTracker[iSlice].GPUParametersConst()->fGPUFixedBlockCount);
- fGpuTracker[iSlice].GPUParametersConst()->fGPUiSlice = iSlice;
- fGpuTracker[iSlice].GPUParametersConst()->fGPUnSlices = sliceCountLocal;
- fSlaveTrackers[firstSlice + iSlice].GPUParameters()->fGPUError = 0;
- fGpuTracker[iSlice].pData()->SetGPUTextureBase(fGpuTracker[0].Data().Memory());
- }
-
-#ifdef HLTCA_GPU_TEXTURE_FETCH
- cudaChannelFormatDesc channelDescu2 = cudaCreateChannelDesc<ushort2>();\r
- size_t offset;\r
- if (CudaFailedMsg(cudaBindTexture(&offset, &gAliTexRefu2, fGpuTracker[0].Data().Memory(), &channelDescu2, sliceCountLocal * HLTCA_GPU_SLICE_DATA_MEMORY)) || offset)\r
- {
- HLTError("Error binding CUDA Texture (Offset %d)", (int) offset);
- return(1);
- }
- cudaChannelFormatDesc channelDescu = cudaCreateChannelDesc<unsigned short>();
- if (CudaFailedMsg(cudaBindTexture(&offset, &gAliTexRefu, fGpuTracker[0].Data().Memory(), &channelDescu, sliceCountLocal * HLTCA_GPU_SLICE_DATA_MEMORY)) || offset)\r
- {
- HLTError("Error binding CUDA Texture (Offset %d)", (int) offset);
- return(1);
- }
- cudaChannelFormatDesc channelDescs = cudaCreateChannelDesc<signed short>();
- if (CudaFailedMsg(cudaBindTexture(&offset, &gAliTexRefs, fGpuTracker[0].Data().Memory(), &channelDescs, sliceCountLocal * HLTCA_GPU_SLICE_DATA_MEMORY)) || offset)\r
- {
- HLTError("Error binding CUDA Texture (Offset %d)", (int) offset);
- return(1);
- }
-#endif
-
- //Copy Tracker Object to GPU Memory
- if (fDebugLevel >= 5) HLTInfo("Copying Tracker objects to GPU");
-#ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
- if (CudaFailedMsg(cudaMalloc(&fGpuTracker[0].fStageAtSync, 100000000))) return(1);
- CudaFailedMsg(cudaMemset(fGpuTracker[0].fStageAtSync, 0, 100000000));
-#endif
- CudaFailedMsg(cudaMemcpyToSymbolAsync(gAliHLTTPCCATracker, fGpuTracker, sizeof(AliHLTTPCCATracker) * sliceCountLocal, 0, cudaMemcpyHostToDevice, cudaStreams[0]));
-
- for (int iSlice = 0;iSlice < sliceCountLocal;iSlice++)
- {
- StandalonePerfTime(firstSlice + iSlice, 0);
-
- //Initialize GPU Slave Tracker
- if (fDebugLevel >= 5) HLTInfo("Creating Slice Data");
- fSlaveTrackers[firstSlice + iSlice].pData()->SetGPUSliceDataMemory(SliceDataMemory(fHostLockedMemory, iSlice), RowMemory(fHostLockedMemory, firstSlice + iSlice));
- fSlaveTrackers[firstSlice + iSlice].ReadEvent(&pClusterData[iSlice]);
- if (fSlaveTrackers[firstSlice + iSlice].Data().MemorySize() > HLTCA_GPU_SLICE_DATA_MEMORY)
- {
- HLTError("Insufficiant Slice Data Memory");
- return(1);
- }
-
- /*if (fSlaveTrackers[firstSlice + iSlice].CheckEmptySlice())
- {
- if (fDebugLevel >= 5) HLTInfo("Slice Empty, not running GPU Tracker");
- if (sliceCountLocal == 1)
- return(0);
- }*/
-
- //Initialize temporary memory where needed
- if (fDebugLevel >= 5) HLTInfo("Copying Slice Data to GPU and initializing temporary memory");
- PreInitRowBlocks<<<30, 256, 0, cudaStreams[2]>>>(fGpuTracker[iSlice].RowBlockPos(), fGpuTracker[iSlice].RowBlockTracklets(), fGpuTracker[iSlice].Data().HitWeights(), fSlaveTrackers[firstSlice + iSlice].Data().NumberOfHitsPlusAlign());
-
- //Copy Data to GPU Global Memory
- CudaFailedMsg(cudaMemcpyAsync(fGpuTracker[iSlice].CommonMemory(), fSlaveTrackers[firstSlice + iSlice].CommonMemory(), fSlaveTrackers[firstSlice + iSlice].CommonMemorySize(), cudaMemcpyHostToDevice, cudaStreams[iSlice & 1]));
- CudaFailedMsg(cudaMemcpyAsync(fGpuTracker[iSlice].Data().Memory(), fSlaveTrackers[firstSlice + iSlice].Data().Memory(), fSlaveTrackers[firstSlice + iSlice].Data().GpuMemorySize(), cudaMemcpyHostToDevice, cudaStreams[iSlice & 1]));
- CudaFailedMsg(cudaMemcpyAsync(fGpuTracker[iSlice].SliceDataRows(), fSlaveTrackers[firstSlice + iSlice].SliceDataRows(), (HLTCA_ROW_COUNT + 1) * sizeof(AliHLTTPCCARow), cudaMemcpyHostToDevice, cudaStreams[iSlice & 1]));
-
- if (fDebugLevel >= 4)
- {
- if (fDebugLevel >= 5) HLTInfo("Allocating Debug Output Memory");
- fSlaveTrackers[firstSlice + iSlice].TrackletMemory() = reinterpret_cast<char*> ( new uint4 [ fGpuTracker[iSlice].TrackletMemorySize()/sizeof( uint4 ) + 100] );
- fSlaveTrackers[firstSlice + iSlice].SetPointersTracklets( HLTCA_GPU_MAX_TRACKLETS );
- fSlaveTrackers[firstSlice + iSlice].HitMemory() = reinterpret_cast<char*> ( new uint4 [ fGpuTracker[iSlice].HitMemorySize()/sizeof( uint4 ) + 100] );
- fSlaveTrackers[firstSlice + iSlice].SetPointersHits( pClusterData[iSlice].NumberOfClusters() );
- }
-
- if (CUDASync("Initialization")) return(1);
- StandalonePerfTime(firstSlice + iSlice, 1);
-
- if (fDebugLevel >= 5) HLTInfo("Running GPU Neighbours Finder");
- AliHLTTPCCAProcess<AliHLTTPCCANeighboursFinder> <<<fSlaveTrackers[firstSlice + iSlice].Param().NRows(), 256, 0, cudaStreams[iSlice & 1]>>>(iSlice);
-
- if (CUDASync("Neighbours finder")) return 1;
-
- StandalonePerfTime(firstSlice + iSlice, 2);
-
- if (fDebugLevel >= 4)
- {
- CudaFailedMsg(cudaMemcpy(fSlaveTrackers[firstSlice + iSlice].Data().Memory(), fGpuTracker[iSlice].Data().Memory(), fSlaveTrackers[firstSlice + iSlice].Data().GpuMemorySize(), cudaMemcpyDeviceToHost));
- fSlaveTrackers[firstSlice + iSlice].DumpLinks(*fOutFile);
- }
-
- if (fDebugLevel >= 5) HLTInfo("Running GPU Neighbours Cleaner");
- AliHLTTPCCAProcess<AliHLTTPCCANeighboursCleaner> <<<fSlaveTrackers[firstSlice + iSlice].Param().NRows()-2, 256, 0, cudaStreams[iSlice & 1]>>>(iSlice);
- if (CUDASync("Neighbours Cleaner")) return 1;
-
- StandalonePerfTime(firstSlice + iSlice, 3);
-
- if (fDebugLevel >= 4)
- {
- CudaFailedMsg(cudaMemcpy(fSlaveTrackers[firstSlice + iSlice].Data().Memory(), fGpuTracker[iSlice].Data().Memory(), fSlaveTrackers[firstSlice + iSlice].Data().GpuMemorySize(), cudaMemcpyDeviceToHost));
- fSlaveTrackers[firstSlice + iSlice].DumpLinks(*fOutFile);
- }
-
- if (fDebugLevel >= 5) HLTInfo("Running GPU Start Hits Finder");
- AliHLTTPCCAProcess<AliHLTTPCCAStartHitsFinder> <<<fSlaveTrackers[firstSlice + iSlice].Param().NRows()-6, 256, 0, cudaStreams[iSlice & 1]>>>(iSlice);
- if (CUDASync("Start Hits Finder")) return 1;
-
- StandalonePerfTime(firstSlice + iSlice, 4);
-
- if (fDebugLevel >= 5) HLTInfo("Running GPU Start Hits Sorter");
- AliHLTTPCCAProcess<AliHLTTPCCAStartHitsSorter> <<<30, 256, 0, cudaStreams[iSlice & 1]>>>(iSlice);
- if (CUDASync("Start Hits Sorter")) return 1;
-
- StandalonePerfTime(firstSlice + iSlice, 5);
-
- if (fDebugLevel >= 2)
- {
- CudaFailedMsg(cudaMemcpy(fSlaveTrackers[firstSlice + iSlice].CommonMemory(), fGpuTracker[iSlice].CommonMemory(), fGpuTracker[iSlice].CommonMemorySize(), cudaMemcpyDeviceToHost));
- if (fDebugLevel >= 5) HLTInfo("Obtaining Number of Start Hits from GPU: %d", *fSlaveTrackers[firstSlice + iSlice].NTracklets());
- if (*fSlaveTrackers[firstSlice + iSlice].NTracklets() > HLTCA_GPU_MAX_TRACKLETS)
- {
- HLTError("HLTCA_GPU_MAX_TRACKLETS constant insuffisant");
- return(1);
- }
- }
-
- if (fDebugLevel >= 4)
- {
- *fOutFile << "Temporary ";
- CudaFailedMsg(cudaMemcpy(fSlaveTrackers[firstSlice + iSlice].TrackletStartHits(), fGpuTracker[iSlice].TrackletTmpStartHits(), pClusterData[iSlice].NumberOfClusters() * sizeof(AliHLTTPCCAHitId), cudaMemcpyDeviceToHost));
- fSlaveTrackers[firstSlice + iSlice].DumpStartHits(*fOutFile);
- uint3* tmpMemory = (uint3*) malloc(sizeof(uint3) * fSlaveTrackers[firstSlice + iSlice].Param().NRows());
- CudaFailedMsg(cudaMemcpy(tmpMemory, fGpuTracker[iSlice].RowStartHitCountOffset(), fSlaveTrackers[firstSlice + iSlice].Param().NRows() * sizeof(uint3), cudaMemcpyDeviceToHost));
- *fOutFile << "Start Hits Sort Vector:" << std::endl;
- for (int i = 0;i < fSlaveTrackers[firstSlice + iSlice].Param().NRows();i++)
- {
- *fOutFile << "Row: " << i << ", Len: " << tmpMemory[i].x << ", Offset: " << tmpMemory[i].y << ", New Offset: " << tmpMemory[i].z << std::endl;
- }
- free(tmpMemory);
-
- CudaFailedMsg(cudaMemcpy(fSlaveTrackers[firstSlice + iSlice].HitMemory(), fGpuTracker[iSlice].HitMemory(), fSlaveTrackers[firstSlice + iSlice].HitMemorySize(), cudaMemcpyDeviceToHost));
- fSlaveTrackers[firstSlice + iSlice].DumpStartHits(*fOutFile);
- }
-
- StandalonePerfTime(firstSlice + iSlice, 6);
-
- fSlaveTrackers[firstSlice + iSlice].SetGPUTrackerTracksMemory((char*) TracksMemory(fHostLockedMemory, iSlice), HLTCA_GPU_MAX_TRACKS, pClusterData[iSlice].NumberOfClusters());
- }
-
- StandalonePerfTime(firstSlice, 7);
-#ifdef HLTCA_GPU_PREFETCHDATA
- for (int iSlice = 0;iSlice < sliceCountLocal;iSlice++)
- {
- if (fSlaveTrackers[firstSlice + iSlice].Data().GPUSharedDataReq() * sizeof(ushort_v) > ALIHLTTPCCATRACKLET_CONSTRUCTOR_TEMP_MEM / 4 * sizeof(uint4))
- {
- HLTError("Insufficiant GPU shared Memory, required: %d, available %d", fSlaveTrackers[firstSlice + iSlice].Data().GPUSharedDataReq() * sizeof(ushort_v), ALIHLTTPCCATRACKLET_CONSTRUCTOR_TEMP_MEM / 4 * sizeof(uint4));
- return(1);
- }
- if (fDebugLevel >= 1)
- {
- static int infoShown = 0;
- if (!infoShown)
- {
- HLTInfo("GPU Shared Memory Cache Size: %d", 2 * fSlaveTrackers[firstSlice + iSlice].Data().GPUSharedDataReq() * sizeof(ushort_v));
- infoShown = 1;
- }
- }
- }
-#endif
-
- if (fDebugLevel >= 5) HLTInfo("Initialising Tracklet Constructor Scheduler");
- for (int iSlice = 0;iSlice < sliceCountLocal;iSlice++)
- {
- AliHLTTPCCATrackletConstructorInit<<<HLTCA_GPU_MAX_TRACKLETS /* *fSlaveTrackers[firstSlice + iSlice].NTracklets() */ / HLTCA_GPU_THREAD_COUNT + 1, HLTCA_GPU_THREAD_COUNT>>>(iSlice);
- if (CUDASync("Tracklet Initializer")) return 1;
- DumpRowBlocks(fSlaveTrackers, iSlice);
- }
-
- if (fDebugLevel >= 5) HLTInfo("Running GPU Tracklet Constructor");
- AliHLTTPCCATrackletConstructorNewGPU<<<HLTCA_GPU_BLOCK_COUNT, HLTCA_GPU_THREAD_COUNT>>>();
- if (CUDASync("Tracklet Constructor (new)")) return 1;
-
- StandalonePerfTime(firstSlice, 8);
-
- if (fDebugLevel >= 4)
- {
- for (int iSlice = 0;iSlice < sliceCountLocal;iSlice++)
- {
- DumpRowBlocks(&fSlaveTrackers[firstSlice], iSlice, false);
- CudaFailedMsg(cudaMemcpy(fSlaveTrackers[firstSlice + iSlice].CommonMemory(), fGpuTracker[iSlice].CommonMemory(), fGpuTracker[iSlice].CommonMemorySize(), cudaMemcpyDeviceToHost));
- if (fDebugLevel >= 5)
- {
- HLTInfo("Obtained %d tracklets", *fSlaveTrackers[firstSlice + iSlice].NTracklets());
- }
- CudaFailedMsg(cudaMemcpy(fSlaveTrackers[firstSlice + iSlice].TrackletMemory(), fGpuTracker[iSlice].TrackletMemory(), fGpuTracker[iSlice].TrackletMemorySize(), cudaMemcpyDeviceToHost));
- CudaFailedMsg(cudaMemcpy(fSlaveTrackers[firstSlice + iSlice].HitMemory(), fGpuTracker[iSlice].HitMemory(), fGpuTracker[iSlice].HitMemorySize(), cudaMemcpyDeviceToHost));
- fSlaveTrackers[firstSlice + iSlice].DumpTrackletHits(*fOutFile);
- }
- }
-
- for (int iSlice = 0;iSlice < sliceCountLocal;iSlice += HLTCA_GPU_TRACKLET_SELECTOR_SLICE_COUNT)
- {
- if (fDebugLevel >= 5) HLTInfo("Running HLT Tracklet selector (Slice %d to %d)", iSlice, iSlice + HLTCA_GPU_TRACKLET_SELECTOR_SLICE_COUNT);
- AliHLTTPCCAProcessMulti<AliHLTTPCCATrackletSelector><<<HLTCA_GPU_BLOCK_COUNT, HLTCA_GPU_THREAD_COUNT, 0, cudaStreams[iSlice]>>>(iSlice, CAMath::Min(HLTCA_GPU_TRACKLET_SELECTOR_SLICE_COUNT, sliceCountLocal - iSlice));
- }
- if (CUDASync("Tracklet Selector")) return 1;
- StandalonePerfTime(firstSlice, 9);
-
- CudaFailedMsg(cudaMemcpyAsync(fSlaveTrackers[firstSlice + 0].CommonMemory(), fGpuTracker[0].CommonMemory(), fGpuTracker[0].CommonMemorySize(), cudaMemcpyDeviceToHost, cudaStreams[0]));
- for (int iSliceTmp = 0;iSliceTmp <= sliceCountLocal;iSliceTmp++)
- {
- if (iSliceTmp < sliceCountLocal)
- {
- int iSlice = iSliceTmp;
- if (fDebugLevel >= 5) HLTInfo("Transfering Tracks from GPU to Host");
- cudaStreamSynchronize(cudaStreams[iSlice]);
- CudaFailedMsg(cudaMemcpyAsync(fSlaveTrackers[firstSlice + iSlice].Tracks(), fGpuTracker[iSlice].Tracks(), sizeof(AliHLTTPCCATrack) * *fSlaveTrackers[firstSlice + iSlice].NTracks(), cudaMemcpyDeviceToHost, cudaStreams[iSlice]));
- CudaFailedMsg(cudaMemcpyAsync(fSlaveTrackers[firstSlice + iSlice].TrackHits(), fGpuTracker[iSlice].TrackHits(), sizeof(AliHLTTPCCAHitId) * *fSlaveTrackers[firstSlice + iSlice].NTrackHits(), cudaMemcpyDeviceToHost, cudaStreams[iSlice]));
- if (iSlice + 1 < sliceCountLocal)
- CudaFailedMsg(cudaMemcpyAsync(fSlaveTrackers[firstSlice + iSlice + 1].CommonMemory(), fGpuTracker[iSlice + 1].CommonMemory(), fGpuTracker[iSlice + 1].CommonMemorySize(), cudaMemcpyDeviceToHost, cudaStreams[iSlice + 1]));
- }
-
- if (iSliceTmp)
- {
- int iSlice = iSliceTmp - 1;
- cudaStreamSynchronize(cudaStreams[iSlice]);
-
- if (fDebugLevel >= 4)
- {
- CudaFailedMsg(cudaMemcpy(fSlaveTrackers[firstSlice + iSlice].Data().HitWeights(), fGpuTracker[iSlice].Data().HitWeights(), fSlaveTrackers[firstSlice + iSlice].Data().NumberOfHitsPlusAlign() * sizeof(int), cudaMemcpyDeviceToHost));
- fSlaveTrackers[firstSlice + iSlice].DumpHitWeights(*fOutFile);
- fSlaveTrackers[firstSlice + iSlice].DumpTrackHits(*fOutFile);
- }
-
- if (fSlaveTrackers[firstSlice + iSlice].GPUParameters()->fGPUError)
- {
- HLTError("GPU Tracker returned Error Code %d", fSlaveTrackers[firstSlice + iSlice].GPUParameters()->fGPUError);
- return(1);
- }
- if (fDebugLevel >= 5) HLTInfo("Tracks Transfered: %d / %d", *fSlaveTrackers[firstSlice + iSlice].NTracks(), *fSlaveTrackers[firstSlice + iSlice].NTrackHits());
-
- fSlaveTrackers[firstSlice + iSlice].SetOutput(&pOutput[iSlice]);
- fSlaveTrackers[firstSlice + iSlice].WriteOutput();\r
-
- if (fDebugLevel >= 4)
- {
- delete[] fSlaveTrackers[firstSlice + iSlice].HitMemory();
- delete[] fSlaveTrackers[firstSlice + iSlice].TrackletMemory();
- }
- }
- }
-
- StandalonePerfTime(firstSlice, 10);
-
- if (fDebugLevel >= 5) HLTInfo("GPU Reconstruction finished");
-
-#ifdef HLTCA_GPU_TRACKLET_CONSTRUCTOR_DO_PROFILE
- char* stageAtSync = (char*) malloc(100000000);
- CudaFailedMsg(cudaMemcpy(stageAtSync, fGpuTracker[0].fStageAtSync, 100 * 1000 * 1000, cudaMemcpyDeviceToHost));
- cudaFree(fGpuTracker[0].fStageAtSync);
-
- FILE* fp = fopen("profile.txt", "w+");
- FILE* fp2 = fopen("profile.bmp", "w+b");
- int nEmptySync = 0, fEmpty;
-
- const int bmpheight = 1000;
- BITMAPFILEHEADER bmpFH;
- BITMAPINFOHEADER bmpIH;
- ZeroMemory(&bmpFH, sizeof(bmpFH));
- ZeroMemory(&bmpIH, sizeof(bmpIH));
-
- bmpFH.bfType = 19778; //"BM"
- bmpFH.bfSize = sizeof(bmpFH) + sizeof(bmpIH) + (HLTCA_GPU_BLOCK_COUNT * HLTCA_GPU_THREAD_COUNT / 32 * 33 - 1) * bmpheight ;
- bmpFH.bfOffBits = sizeof(bmpFH) + sizeof(bmpIH);
-
- bmpIH.biSize = sizeof(bmpIH);
- bmpIH.biWidth = HLTCA_GPU_BLOCK_COUNT * HLTCA_GPU_THREAD_COUNT / 32 * 33 - 1;
- bmpIH.biHeight = bmpheight;
- bmpIH.biPlanes = 1;
- bmpIH.biBitCount = 32;
-
- fwrite(&bmpFH, 1, sizeof(bmpFH), fp2);
- fwrite(&bmpIH, 1, sizeof(bmpIH), fp2);
-
- for (int i = 0;i < bmpheight * HLTCA_GPU_BLOCK_COUNT * HLTCA_GPU_THREAD_COUNT;i += HLTCA_GPU_BLOCK_COUNT * HLTCA_GPU_THREAD_COUNT)
- {
- fEmpty = 1;
- for (int j = 0;j < HLTCA_GPU_BLOCK_COUNT * HLTCA_GPU_THREAD_COUNT;j++)
- {
- fprintf(fp, "%d\t", stageAtSync[i + j]);
- int color = 0;
- if (stageAtSync[i + j] == 1) color = RGB(255, 0, 0);
- if (stageAtSync[i + j] == 2) color = RGB(0, 255, 0);
- if (stageAtSync[i + j] == 3) color = RGB(0, 0, 255);
- if (stageAtSync[i + j] == 4) color = RGB(255, 255, 0);
- fwrite(&color, 1, sizeof(int), fp2);
- if (j > 0 && j % 32 == 0)
- {
- color = RGB(255, 255, 255);
- fwrite(&color, 1, 4, fp2);
- }
- if (stageAtSync[i + j]) fEmpty = 0;
- }
- fprintf(fp, "\n");
- if (fEmpty) nEmptySync++;
- else nEmptySync = 0;
- //if (nEmptySync == HLTCA_GPU_SCHED_ROW_STEP + 2) break;
- }
-
- fclose(fp);
- fclose(fp2);
- free(stageAtSync);
-#endif
-
- return(0);
-}
-
-int AliHLTTPCCAGPUTracker::InitializeSliceParam(int iSlice, AliHLTTPCCAParam ¶m)
-{
- //Initialize Slice Tracker Parameter for a slave tracker
- fSlaveTrackers[iSlice].Initialize(param);
- if (fSlaveTrackers[iSlice].Param().NRows() != HLTCA_ROW_COUNT)
- {
- HLTError("Error, Slice Tracker %d Row Count of %d exceeds Constant of %d", iSlice, fSlaveTrackers[iSlice].Param().NRows(), HLTCA_ROW_COUNT);
- return(1);
- }
- return(0);
-}
-
-int AliHLTTPCCAGPUTracker::ExitGPU()
-{
- //Uninitialize CUDA
- cudaThreadSynchronize();
- if (fGPUMemory)
- {
- cudaFree(fGPUMemory);
- fGPUMemory = NULL;
- }
- if (fHostLockedMemory)
- {
- for (int i = 0;i < CAMath::Max(3, fSliceCount);i++)
- {
- cudaStreamDestroy(((cudaStream_t*) fpCudaStreams)[i]);
- }
- free(fpCudaStreams);
- fGpuTracker = NULL;
- cudaFreeHost(fHostLockedMemory);
- }
-
- if (CudaFailedMsg(cudaThreadExit()))
- {
- HLTError("Could not uninitialize GPU");
- return(1);
- }
- HLTInfo("CUDA Uninitialized");
- return(0);
-}
+++ /dev/null
---- AliHLTTPCCAGPUTracker.cucpp 2009-05-28 12:14:09.000000000 +0200
-+++ release/x86_64-pc-linux-gnu/code/AliHLTTPCCAGPUTracker.cucpp 2009-05-28 12:10:25.000000000 +0200
-@@ -23186,7 +23186,7 @@
- static T2 *Alloc(int s) { auto T2 *p = (reinterpret_cast< T2 *>(_mm_malloc(s * sizeof(CacheLineSizeHelper< T> ), 128))); return new (p) T2 [s]; }
- static void Free(T2 *const p, int size) {
- for (int i = 0; i < size; ++i) {
--((p[i]).~CacheLineSizeHelper());
-+((p[i]).~T2());
- }
- _mm_free(p);
- }
//If not building GPU Code then build dummy functions to link against
#include "AliHLTTPCCAGPUTracker.h"
+#ifndef BUILD_GPU
int AliHLTTPCCAGPUTracker::InitGPU(int /*sliceCount*/, int /*forceDeviceID*/)
{
//Dummy init function if CUDA is not available
int AliHLTTPCCAGPUTracker::Reconstruct(AliHLTTPCCASliceOutput** /*pTracker*/, AliHLTTPCCAClusterData* /*pClusterData*/, int /*fFirstSlice*/, int /*fSliceCount*/) {return(1);}
int AliHLTTPCCAGPUTracker::ExitGPU() {return(0);}
int AliHLTTPCCAGPUTracker::InitializeSliceParam(int /*iSlice*/, AliHLTTPCCAParam& /*param*/) {}
-
+void AliHLTTPCCAGPUTracker::SetOutputControl( AliHLTTPCCASliceOutput::outputControlStruct* /*val*/) {};
+#endif
#include "AliHLTTPCCADef.h"
#include "AliHLTTPCCATracker.h"
#include "AliHLTLogging.h"
+#include "AliHLTTPCCASliceOutput.h"
class AliHLTTPCCARow;
fOutFile(NULL),
fGPUMemSize(0),
fpCudaStreams(NULL),
- fSliceCount(0)
+ fSliceCount(0),
+ fOutputControl(NULL)
{};
~AliHLTTPCCAGPUTracker() {};
- int InitGPU(int sliceCount = 1, int forceDeviceID = -1);
+ int InitGPU(int sliceCount = 12, int forceDeviceID = -1);
int Reconstruct(AliHLTTPCCASliceOutput** pOutput, AliHLTTPCCAClusterData* pClusterData, int fFirstSlice, int fSliceCount = -1);
int ExitGPU();
int InitializeSliceParam(int iSlice, AliHLTTPCCAParam ¶m);
+ const AliHLTTPCCASliceOutput::outputControlStruct* OutputControl() const { return fOutputControl; }
+ void SetOutputControl( AliHLTTPCCASliceOutput::outputControlStruct* val);
+
+ int GetSliceCount() const { return(fSliceCount); }
+
private:
static void* RowMemory(void* const BaseMemory, int iSlice) { return( ((char*) BaseMemory) + iSlice * sizeof(AliHLTTPCCARow) * (HLTCA_ROW_COUNT + 1) ); }
static void* CommonMemory(void* const BaseMemory, int iSlice) { return( ((char*) BaseMemory) + HLTCA_GPU_ROWS_MEMORY + iSlice * AliHLTTPCCATracker::CommonMemorySize() ); }
bool CudaFailedMsg(cudaError_t error);
#endif
+ AliHLTTPCCASliceOutput::outputControlStruct* fOutputControl;
+
+ static bool fgGPUUsed;
+
// disable copy
AliHLTTPCCAGPUTracker( const AliHLTTPCCAGPUTracker& );
AliHLTTPCCAGPUTracker &operator=( const AliHLTTPCCAGPUTracker& );
AliHLTTPCCASliceOutput *sliceOut = reinterpret_cast<AliHLTTPCCASliceOutput *>( block->fPtr );
sliceOut->SetPointers();
fGlobalMerger->SetSliceData( slice, sliceOut );
+
+ /*char filename[256];
+ sprintf(filename, "debug%d.out", slice);
+ FILE* fp = fopen(filename, "w+b");
+ if (fp == NULL) printf("Error!!!\n");
+ fwrite(sliceOut, 1, sliceOut->EstimateSize(sliceOut->NTracks(), sliceOut->NTrackClusters()), fp);
+ fclose(fp);*/
}
fGlobalMerger->Reconstruct();
mem = ( char * )( dst + count );
}
-void AliHLTTPCCASliceOutput::SetPointers(int nTracks, int nTrackClusters)
+void AliHLTTPCCASliceOutput::SetPointers(int nTracks, int nTrackClusters, const outputControlStruct* outputControl)
{
// set all pointers
if (nTracks == -1) nTracks = fNTracks;
if (nTrackClusters == -1) nTrackClusters = fNTrackClusters;
char *mem = fMemory;
- AssignNoAlignment( fTracks, mem, nTracks );
- AssignNoAlignment( fClusterUnpackedYZ, mem, nTrackClusters );
- AssignNoAlignment( fClusterUnpackedX, mem, nTrackClusters );
- AssignNoAlignment( fClusterId, mem, nTrackClusters );
- AssignNoAlignment( fClusterPackedYZ, mem, nTrackClusters );
- AssignNoAlignment( fClusterRow, mem, nTrackClusters );
- AssignNoAlignment( fClusterPackedAmp, mem, nTrackClusters );
- // memory for output tracks
-
- AssignMemory( fOutTracks, mem, nTracks );
-
- // arrays for track hits
-
- AssignMemory( fOutTrackHits, mem, nTrackClusters );
-
-
- fMemorySize = (mem - fMemory);
+ if (outputControl == NULL || outputControl->fDefaultOutput)
+ {
+ AssignNoAlignment( fTracks, mem, nTracks );
+ AssignNoAlignment( fClusterUnpackedYZ, mem, nTrackClusters );
+ AssignNoAlignment( fClusterUnpackedX, mem, nTrackClusters );
+ AssignNoAlignment( fClusterId, mem, nTrackClusters );
+ AssignNoAlignment( fClusterPackedYZ, mem, nTrackClusters );
+ AssignNoAlignment( fClusterRow, mem, nTrackClusters );
+ AssignNoAlignment( fClusterPackedAmp, mem, nTrackClusters );
+ }
+
+ if (outputControl == NULL || outputControl->fObsoleteOutput)
+ {
+ // memory for output tracks
+ AssignMemory( fOutTracks, mem, nTracks );
+ // arrays for track hits
+ AssignMemory( fOutTrackHits, mem, nTrackClusters );
+ }
+ if ((size_t) (mem - fMemory) + sizeof(AliHLTTPCCASliceOutput) > fMemorySize)
+ {
+ fMemorySize = NULL;
+ //printf("\nINTERNAL ERROR IN AliHLTTPCCASliceOutput MEMORY MANAGEMENT req: %d avail: %d\n", (int) ((size_t) (mem - fMemory) + sizeof(AliHLTTPCCASliceOutput)), (int) fMemorySize);
+ }
}
-void AliHLTTPCCASliceOutput::Allocate(AliHLTTPCCASliceOutput* &ptrOutput, int nTracks, int nTrackHits)
+void AliHLTTPCCASliceOutput::Allocate(AliHLTTPCCASliceOutput* &ptrOutput, int nTracks, int nTrackHits, outputControlStruct* outputControl)
{
//Allocate All memory needed for slice output
- if (ptrOutput) free(ptrOutput);
- ptrOutput = (AliHLTTPCCASliceOutput*) malloc(EstimateSize(nTracks, nTrackHits) + nTracks * sizeof(AliHLTTPCCAOutTrack) + nTrackHits * sizeof(int) + 1024);
- ptrOutput->SetPointers(nTracks, nTrackHits); // set pointers
+ const int memsize = (outputControl->fDefaultOutput ? EstimateSize(nTracks, nTrackHits) : sizeof(AliHLTTPCCASliceOutput)) +
+ (outputControl->fObsoleteOutput? (nTracks * sizeof(AliHLTTPCCAOutTrack) + nTrackHits * sizeof(int)) : 0);
+ if (outputControl->fOutputPtr)
+ {
+ if (outputControl->fOutputMaxSize < memsize)
+ {
+ ptrOutput = NULL;
+ return;
+ }
+ ptrOutput = (AliHLTTPCCASliceOutput*) outputControl->fOutputPtr;
+ outputControl->fOutputPtr += memsize;
+ outputControl->fOutputMaxSize -= memsize;
+ }
+ else
+ {
+ if (ptrOutput) free(ptrOutput);
+ ptrOutput = (AliHLTTPCCASliceOutput*) malloc(memsize);
+ }
+ ptrOutput->SetMemorySize(memsize);
+ ptrOutput->SetPointers(nTracks, nTrackHits, outputControl); // set pointers
}
#endif
class AliHLTTPCCASliceOutput
{
public:
+
+ struct outputControlStruct
+ {
+ outputControlStruct() : fObsoleteOutput( 1 ), fDefaultOutput( 1 ), fOutputPtr( NULL ), fOutputMaxSize ( 0 ) {}
+ int fObsoleteOutput; //Enable Obsolete Output
+ int fDefaultOutput; //Enable Default Output
+ char* fOutputPtr; //Pointer to Output Space, NULL to allocate output space
+ int fOutputMaxSize; //Max Size of Output Data if Pointer to output space is given
+ };
+
GPUhd() int NTracks() const { return fNTracks; }
GPUhd() int NTrackClusters() const { return fNTrackClusters; }
GPUhd() float ClusterUnpackedX ( int i ) const { return fClusterUnpackedX[i]; }
GPUhd() static int EstimateSize( int nOfTracks, int nOfTrackClusters );
- void SetPointers(int nTracks = -1, int nTrackClusters = -1);
- static void Allocate(AliHLTTPCCASliceOutput* &ptrOutput, int nTracks, int nTrackHits);
+ void SetPointers(int nTracks = -1, int nTrackClusters = -1, const outputControlStruct* outputControl = NULL);
+ static void Allocate(AliHLTTPCCASliceOutput* &ptrOutput, int nTracks, int nTrackHits, outputControlStruct* outputControl);
GPUhd() void SetNTracks ( int v ) { fNTracks = v; }
GPUhd() void SetNTrackClusters( int v ) { fNTrackClusters = v; }
GPUhd() void SetClusterUnpackedYZ( int i, float2 v ) { fClusterUnpackedYZ[i] = v; }
GPUhd() void SetClusterUnpackedX( int i, float v ) { fClusterUnpackedX[i] = v; }
+ GPUhd() size_t OutputMemorySize() const { return(fMemorySize); }
+
//Obsolete Output
GPUhd() int NOutTracks() const { return(fNOutTracks); }
const AliHLTTPCCASliceOutput& operator=( const AliHLTTPCCASliceOutput& ) const { return *this; }
AliHLTTPCCASliceOutput( const AliHLTTPCCASliceOutput& );
+ GPUh() void SetMemorySize(size_t val) { fMemorySize = val; }
+
int fNTracks; // number of reconstructed tracks
int fNTrackClusters; // total number of track clusters
AliHLTTPCCASliceTrack *fTracks; // pointer to reconstructed tracks
}
AliHLTTPCCAStandaloneFramework::AliHLTTPCCAStandaloneFramework()
- : fMerger(), fTracker(), fStatNEvents( 0 ), fDebugLevel(0)
+ : fMerger(), fOutputControl(), fTracker(), fStatNEvents( 0 ), fDebugLevel(0)
{
//* constructor
fStatTime[i] = 0;
}
for ( int i = 0;i < fgkNSlices;i++) fSliceOutput[i] = NULL;
+ fTracker.SetOutputControl(&fOutputControl);
}
AliHLTTPCCAStandaloneFramework::AliHLTTPCCAStandaloneFramework( const AliHLTTPCCAStandaloneFramework& )
- : fMerger(), fTracker(), fStatNEvents( 0 ), fDebugLevel(0)
+ : fMerger(), fOutputControl(), fTracker(), fStatNEvents( 0 ), fDebugLevel(0)
{
//* dummy
}
unsigned long long int cpuTimers[16], gpuTimers[16], tmpFreq;
StandaloneQueryFreq(&tmpFreq);
StandaloneQueryTime(&startTime);
+
+ fOutputControl.fObsoleteOutput = 0;
#endif
if (forceSingleSlice != -1)
fMerger.SetSliceParam( fTracker.Param(0) );
for ( int i = 0; i < fgkNSlices; i++ ) {
+ //printf("slice %d clusters %d tracks %d\n", i, fClusterData[i].NumberOfClusters(), fSliceOutput[i]->NTracks());
fMerger.SetSliceData( i, fSliceOutput[i] );
}
void SetGPUDebugLevel(int Level, std::ostream *OutFile = NULL, std::ostream *GPUOutFile = NULL) { fDebugLevel = Level; fTracker.SetGPUDebugLevel(Level, OutFile, GPUOutFile); }
int SetGPUTrackerOption(char* OptionName, int OptionValue) {return(fTracker.SetGPUTrackerOption(OptionName, OptionValue));}
int SetGPUTracker(bool enable) { return(fTracker.SetGPUTracker(enable)); }
- int GetGPUStatus() { return(fTracker.GetGPUStatus()); }
+ int GetGPUStatus() const { return(fTracker.GetGPUStatus()); }
+ int GetGPUMaxSliceCount() const { return(fTracker.MaxSliceCount()); }
int InitializeSliceParam(int iSlice, AliHLTTPCCAParam& param) { return(fTracker.InitializeSliceParam(iSlice, param)); }
AliHLTTPCCAMerger fMerger; //* global merger
AliHLTTPCCAClusterData fClusterData[fgkNSlices];
AliHLTTPCCASliceOutput* fSliceOutput[fgkNSlices];
+ AliHLTTPCCASliceOutput::outputControlStruct fOutputControl;
AliHLTTPCCATrackerFramework fTracker;
{
if ( NHitsTotal() < 1 ) {
{
- AliHLTTPCCASliceOutput::Allocate(*fOutput, 0, 0);
+ AliHLTTPCCASliceOutput::Allocate(*fOutput, 0, 0, fOutputControl);
AliHLTTPCCASliceOutput* useOutput = *fOutput;
+ if (fOutput == NULL) return(1);
useOutput->SetNTracks( 0 );
useOutput->SetNTrackClusters( 0 );
useOutput->SetNOutTracks(0);
//cout<<"output: nTracks = "<<*fNTracks<<", nHitsTotal="<<NHitsTotal()<<std::endl;
- AliHLTTPCCASliceOutput::Allocate(*fOutput, fCommonMem->fNTracks, fCommonMem->fNTrackHits);
+ if (fOutputControl == NULL) fOutputControl = new AliHLTTPCCASliceOutput::outputControlStruct;
+ AliHLTTPCCASliceOutput::Allocate(*fOutput, fCommonMem->fNTracks, fCommonMem->fNTrackHits, fOutputControl);
AliHLTTPCCASliceOutput* useOutput = *fOutput;
+ if (useOutput == NULL) return;
- useOutput->SetNTracks( fCommonMem->fNTracks );
- useOutput->SetNTrackClusters( fCommonMem->fNTrackHits );
-
- int nStoredHits = 0;
-
- for ( int iTr = 0; iTr < fCommonMem->fNTracks; iTr++ ) {
- AliHLTTPCCATrack &iTrack = fTracks[iTr];
-
- AliHLTTPCCASliceTrack out;
- out.SetFirstClusterRef( nStoredHits );
- out.SetNClusters( iTrack.NHits() );
- out.SetParam( iTrack.Param() );
-
- useOutput->SetTrack( iTr, out );
-
- int iID = iTrack.FirstHitID();
- for ( int ith = 0; ith < iTrack.NHits(); ith++ ) {
- const AliHLTTPCCAHitId &ic = fTrackHits[iID + ith];
- int iRow = ic.RowIndex();
- int ih = ic.HitIndex();
-
- const AliHLTTPCCARow &row = fData.Row( iRow );
-
- //float y0 = row.Grid().YMin();
- //float z0 = row.Grid().ZMin();
- //float stepY = row.HstepY();
- //float stepZ = row.HstepZ();
- //float x = row.X();
-
- //const uint4 *tmpint4 = RowData() + row.FullOffset();
- //const ushort2 *hits = reinterpret_cast<const ushort2*>(tmpint4);
- //ushort2 hh = hits[ih];
- //float y = y0 + hh.x*stepY;
- //float z = z0 + hh.y*stepZ;
-
- int clusterIndex = fData.ClusterDataIndex( row, ih );
- int clusterRowIndex = clusterIndex - fClusterData->RowOffset( iRow );
-
- if ( clusterIndex < 0 || clusterIndex >= fClusterData->NumberOfClusters() ) {
- //std::cout << inpIDtot << ", " << fClusterData->NumberOfClusters()
- //<< "; " << inpID << ", " << fClusterData->NumberOfClusters( iRow ) << std::endl;
- //abort();
- continue;
- }
- if ( clusterRowIndex < 0 || clusterRowIndex >= fClusterData->NumberOfClusters( iRow ) ) {
- //std::cout << inpIDtot << ", " << fClusterData->NumberOfClusters()
- //<< "; " << inpID << ", " << fClusterData->NumberOfClusters( iRow ) << std::endl;
- //abort();
- continue;
- }
-
- float origX = fClusterData->X( clusterIndex );
- float origY = fClusterData->Y( clusterIndex );
- float origZ = fClusterData->Z( clusterIndex );
-
-
- int id = fClusterData->Id( clusterIndex );
-
- unsigned short hPackedYZ = 0;
- UChar_t hPackedAmp = 0;
- float2 hUnpackedYZ;
- hUnpackedYZ.x = origY;
- hUnpackedYZ.y = origZ;
- float hUnpackedX = origX;
-
- useOutput->SetClusterId( nStoredHits, id );
- useOutput->SetClusterRow( nStoredHits, ( unsigned char ) iRow );
- useOutput->SetClusterPackedYZ( nStoredHits, hPackedYZ );
- useOutput->SetClusterPackedAmp( nStoredHits, hPackedAmp );
- useOutput->SetClusterUnpackedYZ( nStoredHits, hUnpackedYZ );
- useOutput->SetClusterUnpackedX( nStoredHits, hUnpackedX );
- nStoredHits++;
- }
+ if (fOutputControl->fDefaultOutput)
+ {
+ useOutput->SetNTracks( fCommonMem->fNTracks );
+ useOutput->SetNTrackClusters( fCommonMem->fNTrackHits );
+
+ int nStoredHits = 0;
+
+ for ( int iTr = 0; iTr < fCommonMem->fNTracks; iTr++ ) {
+ AliHLTTPCCATrack &iTrack = fTracks[iTr];
+
+ AliHLTTPCCASliceTrack out;
+ out.SetFirstClusterRef( nStoredHits );
+ out.SetNClusters( iTrack.NHits() );
+ out.SetParam( iTrack.Param() );
+
+ useOutput->SetTrack( iTr, out );
+
+ int iID = iTrack.FirstHitID();
+ for ( int ith = 0; ith < iTrack.NHits(); ith++ ) {
+ const AliHLTTPCCAHitId &ic = fTrackHits[iID + ith];
+ int iRow = ic.RowIndex();
+ int ih = ic.HitIndex();
+
+ const AliHLTTPCCARow &row = fData.Row( iRow );
+
+ //float y0 = row.Grid().YMin();
+ //float z0 = row.Grid().ZMin();
+ //float stepY = row.HstepY();
+ //float stepZ = row.HstepZ();
+ //float x = row.X();
+
+ //const uint4 *tmpint4 = RowData() + row.FullOffset();
+ //const ushort2 *hits = reinterpret_cast<const ushort2*>(tmpint4);
+ //ushort2 hh = hits[ih];
+ //float y = y0 + hh.x*stepY;
+ //float z = z0 + hh.y*stepZ;
+
+ int clusterIndex = fData.ClusterDataIndex( row, ih );
+ int clusterRowIndex = clusterIndex - fClusterData->RowOffset( iRow );
+
+ if ( clusterIndex < 0 || clusterIndex >= fClusterData->NumberOfClusters() ) {
+ //std::cout << inpIDtot << ", " << fClusterData->NumberOfClusters()
+ //<< "; " << inpID << ", " << fClusterData->NumberOfClusters( iRow ) << std::endl;
+ //abort();
+ continue;
+ }
+ if ( clusterRowIndex < 0 || clusterRowIndex >= fClusterData->NumberOfClusters( iRow ) ) {
+ //std::cout << inpIDtot << ", " << fClusterData->NumberOfClusters()
+ //<< "; " << inpID << ", " << fClusterData->NumberOfClusters( iRow ) << std::endl;
+ //abort();
+ continue;
+ }
+
+ float origX = fClusterData->X( clusterIndex );
+ float origY = fClusterData->Y( clusterIndex );
+ float origZ = fClusterData->Z( clusterIndex );
+
+
+ int id = fClusterData->Id( clusterIndex );
+
+ unsigned short hPackedYZ = 0;
+ UChar_t hPackedAmp = 0;
+ float2 hUnpackedYZ;
+ hUnpackedYZ.x = origY;
+ hUnpackedYZ.y = origZ;
+ float hUnpackedX = origX;
+
+ useOutput->SetClusterId( nStoredHits, id );
+ useOutput->SetClusterRow( nStoredHits, ( unsigned char ) iRow );
+ useOutput->SetClusterPackedYZ( nStoredHits, hPackedYZ );
+ useOutput->SetClusterPackedAmp( nStoredHits, hPackedAmp );
+ useOutput->SetClusterUnpackedYZ( nStoredHits, hUnpackedYZ );
+ useOutput->SetClusterUnpackedX( nStoredHits, hUnpackedX );
+ nStoredHits++;
+ }
+ }
}
// old stuff
-#ifndef HLTCA_STANDALONE
- useOutput->SetNOutTrackHits(0);
- useOutput->SetNOutTracks(0);
-
-
- for ( int iTr = 0; iTr < fCommonMem->fNTracks; iTr++ ) {
-
- const AliHLTTPCCATrack &iTrack = fTracks[iTr];
-
- //std::cout<<"iTr = "<<iTr<<", nHits="<<iTrack.NHits()<<std::endl;
-
- //if( !iTrack.Alive() ) continue;
- if ( iTrack.NHits() < 3 ) continue;
- AliHLTTPCCAOutTrack &out = useOutput->OutTracks()[useOutput->NOutTracks()];
- out.SetFirstHitRef( useOutput->NOutTrackHits() );
- out.SetNHits( 0 );
- out.SetOrigTrackID( iTr );
- out.SetStartPoint( iTrack.Param() );
- out.SetEndPoint( iTrack.Param() );
-
- int iID = iTrack.FirstHitID();
- int nOutTrackHitsOld = useOutput->NOutTrackHits();
-
- for ( int ith = 0; ith < iTrack.NHits(); ith++ ) {
- const AliHLTTPCCAHitId &ic = fTrackHits[iID + ith];
- const AliHLTTPCCARow &row = Row( ic );
- int ih = ic.HitIndex();
- useOutput->SetOutTrackHit(useOutput->NOutTrackHits(), HitInputID( row, ih ));
- useOutput->SetNOutTrackHits(useOutput->NOutTrackHits() + 1 );
- //std::cout<<"write i,row,hit,id="<<ith<<", "<<ID2IRow(ic)<<", "<<ih<<", "<<HitInputID( row, ih )<<std::endl;
- if ( useOutput->NOutTrackHits() >= 10*NHitsTotal() ) {
- std::cout << "fNOutTrackHits>NHitsTotal()" << std::endl;
- //exit(0);
- return;//SG!!!
- }
- out.SetNHits( out.NHits() + 1 );
- }
- if ( out.NHits() >= 2 ) {
- useOutput->SetNOutTracks(useOutput->NOutTracks() + 1);
- } else {
- useOutput->SetNOutTrackHits(nOutTrackHitsOld);
- }
+ if (fOutputControl->fObsoleteOutput)
+ {
+ useOutput->SetNOutTrackHits(0);
+ useOutput->SetNOutTracks(0);
+
+
+ for ( int iTr = 0; iTr < fCommonMem->fNTracks; iTr++ ) {
+
+ const AliHLTTPCCATrack &iTrack = fTracks[iTr];
+
+ //std::cout<<"iTr = "<<iTr<<", nHits="<<iTrack.NHits()<<std::endl;
+
+ //if( !iTrack.Alive() ) continue;
+ if ( iTrack.NHits() < 3 ) continue;
+ AliHLTTPCCAOutTrack &out = useOutput->OutTracks()[useOutput->NOutTracks()];
+ out.SetFirstHitRef( useOutput->NOutTrackHits() );
+ out.SetNHits( 0 );
+ out.SetOrigTrackID( iTr );
+ out.SetStartPoint( iTrack.Param() );
+ out.SetEndPoint( iTrack.Param() );
+
+ int iID = iTrack.FirstHitID();
+ int nOutTrackHitsOld = useOutput->NOutTrackHits();
+
+ for ( int ith = 0; ith < iTrack.NHits(); ith++ ) {
+ const AliHLTTPCCAHitId &ic = fTrackHits[iID + ith];
+ const AliHLTTPCCARow &row = Row( ic );
+ int ih = ic.HitIndex();
+ useOutput->SetOutTrackHit(useOutput->NOutTrackHits(), HitInputID( row, ih ));
+ useOutput->SetNOutTrackHits(useOutput->NOutTrackHits() + 1 );
+ //std::cout<<"write i,row,hit,id="<<ith<<", "<<ID2IRow(ic)<<", "<<ih<<", "<<HitInputID( row, ih )<<std::endl;
+ if ( useOutput->NOutTrackHits() >= 10*NHitsTotal() ) {
+ std::cout << "fNOutTrackHits>NHitsTotal()" << std::endl;
+ //exit(0);
+ return;//SG!!!
+ }
+ out.SetNHits( out.NHits() + 1 );
+ }
+ if ( out.NHits() >= 2 ) {
+ useOutput->SetNOutTracks(useOutput->NOutTracks() + 1);
+ } else {
+ useOutput->SetNOutTrackHits(nOutTrackHitsOld);
+ }
+ }
}
-#endif
timer.Stop();
fTimers[5] += timer.CpuTime();
GPUh() void AliHLTTPCCATracker::ReadTracks( std::istream &in )
{
//* Read tracks from file
- AliHLTTPCCASliceOutput::Allocate(*fOutput, 4096, 16384);//Just some max values
+ AliHLTTPCCASliceOutput::Allocate(*fOutput, 4096, 16384, fOutputControl);//Just some max values
AliHLTTPCCASliceOutput* useOutput = *fOutput;
int tmpval;
AliHLTTPCCATracker()
: fParam(),
+ fOutputControl(),
fClusterData( 0 ),
fData(),
fIsGPUTracker( false ),
struct StructGPUParameters
{
+ StructGPUParameters() : fScheduleFirstDynamicTracklet( 0 ), fGPUError( 0 ) {}
int fScheduleFirstDynamicTracklet; //Last Tracklet with fixed position in sheduling
int fGPUError; //Signalizes error on GPU during GPU Reconstruction, kind of return value
};
struct StructGPUParametersConst
{
+ StructGPUParametersConst() : fGPUFixedBlockCount( 0 ), fGPUiSlice( 0 ), fGPUnSlices( 0 ) {}
int fGPUFixedBlockCount; //Count of blocks that is used for this tracker in fixed schedule situations
int fGPUiSlice;
int fGPUnSlices;
struct commonMemoryStruct
{
+ commonMemoryStruct() : fNTracklets( 0 ), fNTracks( 0 ), fNTrackHits( 0 ), fGPUParameters() {}
int fNTracklets; // number of tracklets
int fNTracks; // number of reconstructed tracks
int fNTrackHits; // number of track hits
GPUhd() const AliHLTTPCCAParam &Param() const { return fParam; }
GPUhd() void SetParam( const AliHLTTPCCAParam &v ) { fParam = v; }
+ GPUhd() const AliHLTTPCCASliceOutput::outputControlStruct* OutputControl() const { return fOutputControl; }
+ GPUh() void SetOutputControl( AliHLTTPCCASliceOutput::outputControlStruct* val) { fOutputControl = val; }
+
GPUhd() AliHLTTPCCAClusterData *ClusterData() const { return fClusterData; }
GPUhd() const AliHLTTPCCASliceData &Data() const { return fData; }
GPUhd() AliHLTTPCCASliceData *pData() {return &fData; }
unsigned long long int fPerfTimers[16]; // running CPU time for different parts of the algorithm
void StandalonePerfTime(int i);
+ AliHLTTPCCASliceOutput::outputControlStruct* fOutputControl;
+
/** A pointer to the ClusterData object that the SliceData was created from. This can be used to
* merge clusters from inside the SliceTracker code and recreate the SliceData. */
AliHLTTPCCAClusterData *fClusterData; // ^
AliHLTTPCCATrackerComponent::AliHLTTPCCATrackerComponent()
:
fTracker( NULL ),
- fOutput( NULL ),
fSolenoidBz( 0 ),
fMinNTrackClusters( 0 ),
fClusterZCut( 500. ),
:
AliHLTProcessor(),
fTracker( NULL ),
- fOutput( NULL),
fSolenoidBz( 0 ),
fMinNTrackClusters( 30 ),
fClusterZCut( 500. ),
{
// see header file for class documentation
delete fTracker;
- if (fOutput) free(fOutput);
}
//
{
// Configure the CA tracker component
- if ( fTracker || fOutput ) return EINPROGRESS;
+ if ( fTracker ) return EINPROGRESS;
fTracker = new AliHLTTPCCATrackerFramework();
// see header file for class documentation
delete fTracker;
fTracker = NULL;
- free(fOutput);
- fOutput = NULL;
return 0;
}
// Determine the slice number
+ //Find min and max slice number with now slice missing in between (for default output)
+ int minslice = -1, maxslice = -1;
int slice = -1;
{
std::vector<int> slices;
if ( iter->fDataType != AliHLTTPCDefinitions::fgkClustersDataType ) continue;
slice = AliHLTTPCDefinitions::GetMinSliceNr( *iter );
+ if (slice < minslice || minslice == -1) minslice = slice;
+ if (slice > maxslice) maxslice = slice;
bool found = 0;
slCntIter = sliceCnts.begin();
} else *slCntIter++;
}
+ if ( slices.size() == 0 ) {
+ HLTWarning( "no slices found in event" );
+ return 0;
+ }
- // Determine slice number to really use.
+
+ // Determine slice number to really use. (for obsolete output)
if ( slices.size() > 1 ) {
- Logging( kHLTLogError, "HLT::TPCSliceTracker::DoEvent", "Multiple slices found in event",
+ Logging( fOutputTRAKSEGS ? kHLTLogError : kHLTLogDebug, "HLT::TPCSliceTracker::DoEvent", "Multiple slices found in event",
"Multiple slice numbers found in event 0x%08lX (%lu). Determining maximum occuring slice number...",
evtData.fEventID, evtData.fEventID );
unsigned maxCntSlice = 0;
slCntIter = sliceCnts.begin();
for ( slIter = slices.begin(); slIter != slices.end(); slIter++, slCntIter++ ) {
- Logging( kHLTLogError, "HLT::TPCSliceTracker::DoEvent", "Multiple slices found in event",
+ Logging( fOutputTRAKSEGS ? kHLTLogError : kHLTLogDebug, "HLT::TPCSliceTracker::DoEvent", "Multiple slices found in event",
"Slice %lu found %lu times.", *slIter, *slCntIter );
- if ( maxCntSlice < *slCntIter ) {
+ if ( fOutputTRAKSEGS && maxCntSlice < *slCntIter ) {
maxCntSlice = *slCntIter;
slice = *slIter;
}
}
- Logging( kHLTLogError, "HLT::TPCSliceTracker::DoEvent", "Multiple slices found in event",
+ if (fOutputTRAKSEGS)
+ {
+ Logging( kHLTLogError, "HLT::TPCSliceTracker::DoEvent", "Multiple slices found in event",
"Using slice %lu.", slice );
+ }
} else if ( slices.size() > 0 ) {
slice = *( slices.begin() );
}
- }
-
- if ( slice < 0 ) {
- HLTWarning( "no slices found in event" );
- return 0;
- }
-
- // Initialize the tracker
+ if (fOutputTRAKSEGS)
+ {
+ minslice = maxslice = slice;
+ }
+ else
+ {
+ for (int islice = minslice;islice <= maxslice;islice++)
+ {
+ bool found = false;
+ for(slIter = slices.begin(); slIter != slices.end();slIter++)
+ {
+ if (*slIter == islice)
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ maxslice = islice - 1;
+ break;
+ }
+ }
+ }
+ }
+ if ( !fTracker ) fTracker = new AliHLTTPCCATrackerFramework;
+ int slicecount = maxslice + 1 - minslice;
+ if (slicecount > fTracker->MaxSliceCount())
{
- if ( !fTracker ) fTracker = new AliHLTTPCCATrackerFramework;
- int iSec = slice;
- float inRmin = 83.65;
- // float inRmax = 133.3;
- // float outRmin = 133.5;
- float outRmax = 247.7;
- float plusZmin = 0.0529937;
- float plusZmax = 249.778;
- float minusZmin = -249.645;
- float minusZmax = -0.0799937;
- float dalpha = 0.349066;
- float alpha = 0.174533 + dalpha * iSec;
-
- bool zPlus = ( iSec < 18 );
- float zMin = zPlus ? plusZmin : minusZmin;
- float zMax = zPlus ? plusZmax : minusZmax;
- //TPCZmin = -249.645, ZMax = 249.778
- // float rMin = inRmin;
- // float rMax = outRmax;
- int nRows = AliHLTTPCTransform::GetNRows();
-
- float padPitch = 0.4;
- float sigmaZ = 0.228808;
-
- float *rowX = new float [nRows];
- for ( int irow = 0; irow < nRows; irow++ ) {
- rowX[irow] = AliHLTTPCTransform::Row2X( irow );
- }
-
- AliHLTTPCCAParam param;
-
- param.Initialize( iSec, nRows, rowX, alpha, dalpha,
- inRmin, outRmax, zMin, zMax, padPitch, sigmaZ, fSolenoidBz );
- param.SetHitPickUpFactor( 2 );
- if( fNeighboursSearchArea>0 ) param.SetNeighboursSearchArea( fNeighboursSearchArea );
- if( fClusterErrorCorrectionY>1.e-4 ) param.SetClusterError2CorrectionY( fClusterErrorCorrectionY*fClusterErrorCorrectionY );
- if( fClusterErrorCorrectionZ>1.e-4 ) param.SetClusterError2CorrectionZ( fClusterErrorCorrectionZ*fClusterErrorCorrectionZ );
- param.Update();
- fTracker->InitializeSliceParam( slice, param );
- delete[] rowX;
+ maxslice = minslice + (slicecount = fTracker->MaxSliceCount());
}
+ int nClustersTotalSum = 0;
+ AliHLTTPCCAClusterData* clusterData = new AliHLTTPCCAClusterData[slicecount];
// min and max patch numbers and row numbers
-
- int row[2] = {0, 0};
- int minPatch = 100, maxPatch = -1;
-
- // total n Hits
-
- int nClustersTotal = 0;
-
- // sort patches
-
- std::vector<unsigned long> patchIndices;
-
- for ( ndx = 0; ndx < evtData.fBlockCnt; ndx++ ) {
- iter = blocks + ndx;
- if ( iter->fDataType != AliHLTTPCDefinitions::fgkClustersDataType ) continue;
- if ( slice != AliHLTTPCDefinitions::GetMinSliceNr( *iter ) ) continue;
- inPtrSP = ( AliHLTTPCClusterData* )( iter->fPtr );
- nClustersTotal += inPtrSP->fSpacePointCnt;
- int patch = AliHLTTPCDefinitions::GetMinPatchNr( *iter );
- if ( minPatch > patch ) {
- minPatch = patch;
- row[0] = AliHLTTPCTransform::GetFirstRow( patch );
- }
- if ( maxPatch < patch ) {
- maxPatch = patch;
- row[1] = AliHLTTPCTransform::GetLastRow( patch );
- }
- std::vector<unsigned long>::iterator pIter = patchIndices.begin();
- while ( pIter != patchIndices.end() && AliHLTTPCDefinitions::GetMinPatchNr( blocks[*pIter] ) < patch ) {
- pIter++;
- }
- patchIndices.insert( pIter, ndx );
+ int* slicerow = new int[slicecount * 2];
+ int* sliceminPatch = new int[slicecount];
+ int* slicemaxPatch = new int[slicecount];
+ memset(slicerow, 0, slicecount * 2 * sizeof(int));
+ for (int i = 0;i < slicecount;i++)
+ {
+ sliceminPatch[i] = 100;
+ slicemaxPatch[i] = -1;
}
+ //Prepare everything for all slices
- // pass event to CA Tracker
-
-
- Logging( kHLTLogDebug, "HLT::TPCCATracker::DoEvent", "Reading hits",
- "Total %d hits to read for slice %d", nClustersTotal, slice );
-
-
- AliHLTTPCCAClusterData clusterData;
- clusterData.StartReading( slice, nClustersTotal );
-
- for ( std::vector<unsigned long>::iterator pIter = patchIndices.begin(); pIter != patchIndices.end(); pIter++ ) {
- ndx = *pIter;
- iter = blocks + ndx;
-
- int patch = AliHLTTPCDefinitions::GetMinPatchNr( *iter );
- inPtrSP = ( AliHLTTPCClusterData* )( iter->fPtr );
+ for (int islice = 0;islice < slicecount;islice++)
+ {
+ slice = minslice + islice;
+
+ // Initialize the tracker slice
+ {
+ int iSec = slice;
+ float inRmin = 83.65;
+ // float inRmax = 133.3;
+ // float outRmin = 133.5;
+ float outRmax = 247.7;
+ float plusZmin = 0.0529937;
+ float plusZmax = 249.778;
+ float minusZmin = -249.645;
+ float minusZmax = -0.0799937;
+ float dalpha = 0.349066;
+ float alpha = 0.174533 + dalpha * iSec;
+
+ bool zPlus = ( iSec < 18 );
+ float zMin = zPlus ? plusZmin : minusZmin;
+ float zMax = zPlus ? plusZmax : minusZmax;
+ //TPCZmin = -249.645, ZMax = 249.778
+ // float rMin = inRmin;
+ // float rMax = outRmax;
+ int nRows = AliHLTTPCTransform::GetNRows();
+
+ float padPitch = 0.4;
+ float sigmaZ = 0.228808;
+
+ float *rowX = new float [nRows];
+ for ( int irow = 0; irow < nRows; irow++ ) {
+ rowX[irow] = AliHLTTPCTransform::Row2X( irow );
+ }
+
+ AliHLTTPCCAParam param;
+
+ param.Initialize( iSec, nRows, rowX, alpha, dalpha,
+ inRmin, outRmax, zMin, zMax, padPitch, sigmaZ, fSolenoidBz );
+ param.SetHitPickUpFactor( 2 );
+ if( fNeighboursSearchArea>0 ) param.SetNeighboursSearchArea( fNeighboursSearchArea );
+ if( fClusterErrorCorrectionY>1.e-4 ) param.SetClusterError2CorrectionY( fClusterErrorCorrectionY*fClusterErrorCorrectionY );
+ if( fClusterErrorCorrectionZ>1.e-4 ) param.SetClusterError2CorrectionZ( fClusterErrorCorrectionZ*fClusterErrorCorrectionZ );
+ param.Update();
+ fTracker->InitializeSliceParam( slice, param );
+ delete[] rowX;
+ }
+
+ // total n Hits
+ int nClustersTotal = 0;
+
+ // sort patches
+ std::vector<unsigned long> patchIndices;
+
+ for ( ndx = 0; ndx < evtData.fBlockCnt; ndx++ ) {
+ iter = blocks + ndx;
+ if ( iter->fDataType != AliHLTTPCDefinitions::fgkClustersDataType ) continue;
+ if ( slice != AliHLTTPCDefinitions::GetMinSliceNr( *iter ) ) continue;
+ inPtrSP = ( AliHLTTPCClusterData* )( iter->fPtr );
+ nClustersTotal += inPtrSP->fSpacePointCnt;
+ int patch = AliHLTTPCDefinitions::GetMinPatchNr( *iter );
+ if ( sliceminPatch[islice] > patch ) {
+ sliceminPatch[islice] = patch;
+ slicerow[2 * islice + 0] = AliHLTTPCTransform::GetFirstRow( patch );
+ }
+ if ( slicemaxPatch[islice] < patch ) {
+ slicemaxPatch[islice] = patch;
+ slicerow[2 * islice + 1] = AliHLTTPCTransform::GetLastRow( patch );
+ }
+ std::vector<unsigned long>::iterator pIter = patchIndices.begin();
+ while ( pIter != patchIndices.end() && AliHLTTPCDefinitions::GetMinPatchNr( blocks[*pIter] ) < patch ) {
+ pIter++;
+ }
+ patchIndices.insert( pIter, ndx );
+ }
+
+
+ // pass event to CA Tracker
+
+
+ Logging( kHLTLogDebug, "HLT::TPCCATracker::DoEvent", "Reading hits",
+ "Total %d hits to read for slice %d", nClustersTotal, slice );
+
+
+ clusterData[islice].StartReading( slice, nClustersTotal );
+
+ for ( std::vector<unsigned long>::iterator pIter = patchIndices.begin(); pIter != patchIndices.end(); pIter++ ) {
+ ndx = *pIter;
+ iter = blocks + ndx;
+
+ int patch = AliHLTTPCDefinitions::GetMinPatchNr( *iter );
+ inPtrSP = ( AliHLTTPCClusterData* )( iter->fPtr );
+
+ Logging( kHLTLogDebug, "HLT::TPCCATracker::DoEvent", "Reading hits",
+ "Reading %d hits for slice %d - patch %d", inPtrSP->fSpacePointCnt, slice, patch );
+
+ for ( unsigned int i = 0; i < inPtrSP->fSpacePointCnt; i++ ) {
+ AliHLTTPCSpacePointData *c = &( inPtrSP->fSpacePoints[i] );
+ if ( CAMath::Abs( c->fZ ) > fClusterZCut ) continue;
+ if ( c->fPadRow > 159 ) {
+ HLTError( "Wrong TPC cluster with row number %d received", c->fPadRow );
+ continue;
+ }
+ clusterData[islice].ReadCluster( c->fID, c->fPadRow, c->fX, c->fY, c->fZ, c->fCharge );
+ }
+ }
+
+ clusterData[islice].FinishReading();
+ nClustersTotalSum += nClustersTotal;
+ }
- Logging( kHLTLogDebug, "HLT::TPCCATracker::DoEvent", "Reading hits",
- "Reading %d hits for slice %d - patch %d", inPtrSP->fSpacePointCnt, slice, patch );
+ //Prepare Output
+ AliHLTTPCCASliceOutput::outputControlStruct outputControl;
+ //Set tracker output so tracker does not have to output both formats!
+ outputControl.fObsoleteOutput = fOutputTRAKSEGS;
+ outputControl.fDefaultOutput = !fOutputTRAKSEGS;
- for ( unsigned int i = 0; i < inPtrSP->fSpacePointCnt; i++ ) {
- AliHLTTPCSpacePointData *c = &( inPtrSP->fSpacePoints[i] );
- if ( CAMath::Abs( c->fZ ) > fClusterZCut ) continue;
- if ( c->fPadRow > 159 ) {
- HLTError( "Wrong TPC cluster with row number %d received", c->fPadRow );
- continue;
- }
- clusterData.ReadCluster( c->fID, c->fPadRow, c->fX, c->fY, c->fZ, c->fCharge );
- }
- }
+ //For new output we can write directly to output buffer
+ outputControl.fOutputPtr = fOutputTRAKSEGS ? NULL : (char*) outputPtr;
+ outputControl.fOutputMaxSize = maxBufferSize;
- clusterData.FinishReading();
+ AliHLTTPCCASliceOutput** sliceOutput = new AliHLTTPCCASliceOutput*[slicecount];
+ memset(sliceOutput, 0, slicecount * sizeof(AliHLTTPCCASliceOutput*));
// reconstruct the event
-
TStopwatch timerReco;
-
- fTracker->ProcessSlices(slice, 1, &clusterData, &fOutput);
-
+ fTracker->SetOutputControl(&outputControl);
+ fTracker->ProcessSlices(minslice, slicecount, clusterData, sliceOutput);
timerReco.Stop();
-
+
int ret = 0;
-
- Logging( kHLTLogDebug, "HLT::TPCCATracker::DoEvent", "Reconstruct",
- "%d tracks found for slice %d", fOutput->NOutTracks(), slice );
-
-
- // write reconstructed tracks
-
unsigned int mySize = 0;
- int ntracks = fOutput->NOutTracks();
-
-
- if ( fOutputTRAKSEGS ) {
-
- AliHLTTPCTrackletData* outPtr = ( AliHLTTPCTrackletData* )( outputPtr );
-
- AliHLTTPCTrackSegmentData* currOutTracklet = outPtr->fTracklets;
-
- mySize = ( ( AliHLTUInt8_t * )currOutTracklet ) - ( ( AliHLTUInt8_t * )outputPtr );
-
- outPtr->fTrackletCnt = 0;
-
- for ( int itr = 0; itr < ntracks; itr++ ) {
+ int ntracks;
+ int error = 0;
- AliHLTTPCCAOutTrack &t = fOutput->OutTracks()[itr];
-
- //Logging( kHLTLogDebug, "HLT::TPCCATracker::DoEvent", "Wrtite output","track %d with %d hits", itr, t.NHits());
-
- if ( t.NHits() < fMinNTrackClusters ) continue;
-
- // calculate output track size
+ for (int islice = 0;islice < slicecount;islice++)
+ {
+ slice = minslice + islice;
+
+ if (sliceOutput[islice])
+ {
+ Logging( kHLTLogDebug, "HLT::TPCCATracker::DoEvent", "Reconstruct",
+ "%d tracks found for slice %d", sliceOutput[islice]->NOutTracks(), slice );
- unsigned int dSize = sizeof( AliHLTTPCTrackSegmentData ) + t.NHits() * sizeof( unsigned int );
- if ( mySize + dSize > maxBufferSize ) {
- HLTWarning( "Output buffer size exceed (buffer size %d, current size %d), %d tracks are not stored", maxBufferSize, mySize, ntracks - itr + 1 );
- ret = -ENOSPC;
- break;
- }
+ // write reconstructed tracks
- // convert CA track parameters to HLT Track Segment
-
- int iFirstRow = 1000;
- int iLastRow = -1;
- int iFirstHit = fOutput->OutTrackHit(t.FirstHitRef());
- int iLastHit = iFirstHit;
- for ( int ih = 0; ih < t.NHits(); ih++ ) {
- int hitID = fOutput->OutTrackHit(t.FirstHitRef() + ih);
- int iRow = clusterData.RowNumber( hitID );
- if ( iRow < iFirstRow ) { iFirstRow = iRow; iFirstHit = hitID; }
- if ( iRow > iLastRow ) { iLastRow = iRow; iLastHit = hitID; }
- }
+ if ( fOutputTRAKSEGS ) {
- AliHLTTPCCATrackParam par = t.StartPoint();
-
- par.TransportToX( clusterData.X( iFirstHit ), .99 );
-
- AliExternalTrackParam tp;
- AliHLTTPCCATrackConvertor::GetExtParam( par, tp, 0 );
-
- currOutTracklet->fX = tp.GetX();
- currOutTracklet->fY = tp.GetY();
- currOutTracklet->fZ = tp.GetZ();
- currOutTracklet->fCharge = ( int ) tp.GetSign();
- currOutTracklet->fPt = TMath::Abs( tp.GetSignedPt() );
- float snp = tp.GetSnp() ;
- if ( snp > .999 ) snp = .999;
- if ( snp < -.999 ) snp = -.999;
- currOutTracklet->fPsi = TMath::ASin( snp );
- currOutTracklet->fTgl = tp.GetTgl();
-
- currOutTracklet->fY0err = tp.GetSigmaY2();
- currOutTracklet->fZ0err = tp.GetSigmaZ2();
- float h = -currOutTracklet->fPt * currOutTracklet->fPt;
- currOutTracklet->fPterr = h * h * tp.GetSigma1Pt2();
- h = 1. / TMath::Sqrt( 1 - snp * snp );
- currOutTracklet->fPsierr = h * h * tp.GetSigmaSnp2();
- currOutTracklet->fTglerr = tp.GetSigmaTgl2();
-
- if ( par.TransportToX( clusterData.X( iLastHit ), .99 ) ) {
- currOutTracklet->fLastX = par.GetX();
- currOutTracklet->fLastY = par.GetY();
- currOutTracklet->fLastZ = par.GetZ();
- } else {
- currOutTracklet->fLastX = clusterData.X( iLastHit );
- currOutTracklet->fLastY = clusterData.Y( iLastHit );
- currOutTracklet->fLastZ = clusterData.Z( iLastHit );
- }
- //if( currOutTracklet->fLastX<10. ) {
- //HLTError("CA last point: hitxyz=%f,%f,%f, track=%f,%f,%f, tracklet=%f,%f,%f, nhits=%d",clusterData.X( iLastHit ),clusterData.Y( iLastHit],clusterData.Z( iLastHit],
- //par.GetX(), par.GetY(),par.GetZ(),currOutTracklet->fLastX,currOutTracklet->fLastY ,currOutTracklet->fLastZ, t.NHits());
- //}
-#ifdef INCLUDE_TPC_HOUGH
-#ifdef ROWHOUGHPARAMS
- currOutTracklet->fTrackID = 0;
- currOutTracklet->fRowRange1 = clusterData.RowNumber( iFirstHit );
- currOutTracklet->fRowRange2 = clusterData.RowNumber( iLastHit );
- currOutTracklet->fSector = slice;
- currOutTracklet->fPID = 211;
-#endif
-#endif // INCLUDE_TPC_HOUGH
+ ntracks = sliceOutput[islice]->NOutTracks();
+ AliHLTTPCTrackletData* outPtr = ( AliHLTTPCTrackletData* )( outputPtr );
- currOutTracklet->fNPoints = t.NHits();
+ AliHLTTPCTrackSegmentData* currOutTracklet = outPtr->fTracklets;
- for ( int i = 0; i < t.NHits(); i++ ) {
- currOutTracklet->fPointIDs[i] = clusterData.Id( fOutput->OutTrackHit(t.FirstHitRef()+i) );
- }
-
- currOutTracklet = ( AliHLTTPCTrackSegmentData* )( ( Byte_t * )currOutTracklet + dSize );
- mySize += dSize;
- outPtr->fTrackletCnt++;
- }
-
- } else { // default output type
-
- mySize = fOutput->EstimateSize( fOutput->NTracks(),
- fOutput->NTrackClusters() );
- if ( mySize <= maxBufferSize ) {
- const AliHLTUInt8_t* outputevent = reinterpret_cast<const AliHLTUInt8_t*>( fOutput );
- for ( unsigned int i = 0; i < mySize; i++ ) outputPtr[i] = outputevent[i];
- } else {
- HLTWarning( "Output buffer size exceed (buffer size %d, current size %d), tracks are not stored", maxBufferSize, mySize );
- mySize = 0;
- ret = -ENOSPC;
- }
+ mySize = ( ( AliHLTUInt8_t * )currOutTracklet ) - ( ( AliHLTUInt8_t * )outputPtr );
+
+ outPtr->fTrackletCnt = 0;
+
+ for ( int itr = 0; itr < ntracks; itr++ ) {
+
+ AliHLTTPCCAOutTrack &t = sliceOutput[islice]->OutTracks()[itr];
+
+ //Logging( kHLTLogDebug, "HLT::TPCCATracker::DoEvent", "Wrtite output","track %d with %d hits", itr, t.NHits());
+
+ if ( t.NHits() < fMinNTrackClusters ) continue;
+
+ // calculate output track size
+
+ unsigned int dSize = sizeof( AliHLTTPCTrackSegmentData ) + t.NHits() * sizeof( unsigned int );
+
+ if ( mySize + dSize > maxBufferSize ) {
+ HLTWarning( "Output buffer size exceed (buffer size %d, current size %d), %d tracks are not stored", maxBufferSize, mySize, ntracks - itr + 1 );
+ ret = -ENOSPC;
+ error = 1;
+ break;
+ }
+
+ // convert CA track parameters to HLT Track Segment
+
+ int iFirstRow = 1000;
+ int iLastRow = -1;
+ int iFirstHit = sliceOutput[islice]->OutTrackHit(t.FirstHitRef());
+ int iLastHit = iFirstHit;
+ for ( int ih = 0; ih < t.NHits(); ih++ ) {
+ int hitID = sliceOutput[islice]->OutTrackHit(t.FirstHitRef() + ih);
+ int iRow = clusterData[islice].RowNumber( hitID );
+ if ( iRow < iFirstRow ) { iFirstRow = iRow; iFirstHit = hitID; }
+ if ( iRow > iLastRow ) { iLastRow = iRow; iLastHit = hitID; }
+ }
+
+ AliHLTTPCCATrackParam par = t.StartPoint();
+
+ par.TransportToX( clusterData[islice].X( iFirstHit ), .99 );
+
+ AliExternalTrackParam tp;
+ AliHLTTPCCATrackConvertor::GetExtParam( par, tp, 0 );
+
+ currOutTracklet->fX = tp.GetX();
+ currOutTracklet->fY = tp.GetY();
+ currOutTracklet->fZ = tp.GetZ();
+ currOutTracklet->fCharge = ( int ) tp.GetSign();
+ currOutTracklet->fPt = TMath::Abs( tp.GetSignedPt() );
+ float snp = tp.GetSnp() ;
+ if ( snp > .999 ) snp = .999;
+ if ( snp < -.999 ) snp = -.999;
+ currOutTracklet->fPsi = TMath::ASin( snp );
+ currOutTracklet->fTgl = tp.GetTgl();
+
+ currOutTracklet->fY0err = tp.GetSigmaY2();
+ currOutTracklet->fZ0err = tp.GetSigmaZ2();
+ float h = -currOutTracklet->fPt * currOutTracklet->fPt;
+ currOutTracklet->fPterr = h * h * tp.GetSigma1Pt2();
+ h = 1. / TMath::Sqrt( 1 - snp * snp );
+ currOutTracklet->fPsierr = h * h * tp.GetSigmaSnp2();
+ currOutTracklet->fTglerr = tp.GetSigmaTgl2();
+
+ if ( par.TransportToX( clusterData[islice].X( iLastHit ), .99 ) ) {
+ currOutTracklet->fLastX = par.GetX();
+ currOutTracklet->fLastY = par.GetY();
+ currOutTracklet->fLastZ = par.GetZ();
+ } else {
+ currOutTracklet->fLastX = clusterData[islice].X( iLastHit );
+ currOutTracklet->fLastY = clusterData[islice].Y( iLastHit );
+ currOutTracklet->fLastZ = clusterData[islice].Z( iLastHit );
+ }
+ //if( currOutTracklet->fLastX<10. ) {
+ //HLTError("CA last point: hitxyz=%f,%f,%f, track=%f,%f,%f, tracklet=%f,%f,%f, nhits=%d",clusterData[islice].X( iLastHit ),clusterData[islice].Y( iLastHit],clusterData[islice].Z( iLastHit],
+ //par.GetX(), par.GetY(),par.GetZ(),currOutTracklet->fLastX,currOutTracklet->fLastY ,currOutTracklet->fLastZ, t.NHits());
+ //}
+ #ifdef INCLUDE_TPC_HOUGH
+ #ifdef ROWHOUGHPARAMS
+ currOutTracklet->fTrackID = 0;
+ currOutTracklet->fRowRange1 = clusterData[islice].RowNumber( iFirstHit );
+ currOutTracklet->fRowRange2 = clusterData[islice].RowNumber( iLastHit );
+ currOutTracklet->fSector = slice;
+ currOutTracklet->fPID = 211;
+ #endif
+ #endif // INCLUDE_TPC_HOUGH
+
+
+ currOutTracklet->fNPoints = t.NHits();
+
+ for ( int i = 0; i < t.NHits(); i++ ) {
+ currOutTracklet->fPointIDs[i] = clusterData[islice].Id( sliceOutput[islice]->OutTrackHit(t.FirstHitRef()+i) );
+ }
+
+ currOutTracklet = ( AliHLTTPCTrackSegmentData* )( ( Byte_t * )currOutTracklet + dSize );
+ mySize += dSize;
+ outPtr->fTrackletCnt++;
+ }
+
+ } else { // default output type
+ mySize += sliceOutput[islice]->OutputMemorySize();
+ ntracks += sliceOutput[islice]->NTracks();
+ }
+ }
+ else
+ {
+ HLTWarning( "Output buffer size exceed (buffer size %d, current size %d), tracks are not stored", maxBufferSize, mySize );
+ mySize = 0;
+ ret = -ENOSPC;
+ ntracks = 0;
+ error = 1;
+ break;
+ }
}
- if ( mySize > 0 ) {
- AliHLTComponentBlockData bd;
- FillBlockData( bd );
- bd.fOffset = 0;
- bd.fSize = mySize;
- bd.fSpecification = AliHLTTPCDefinitions::EncodeDataSpecification( slice, slice, minPatch, maxPatch );
- bd.fDataType = GetOutputDataType();
- outputBlocks.push_back( bd );
+ size = 0;
+ if (error == 0)
+ {
+ for (int islice = 0;islice < slicecount;islice++)
+ {
+ slice = minslice + islice;
+ if (!fOutputTRAKSEGS) mySize = sliceOutput[islice]->OutputMemorySize();
+ if (mySize > 0)
+ {
+ AliHLTComponentBlockData bd;
+ FillBlockData( bd );
+ bd.fOffset = fOutputTRAKSEGS ? 0 : ((char*) sliceOutput[islice] - (char*) outputPtr);
+ bd.fSize = mySize;
+ bd.fSpecification = AliHLTTPCDefinitions::EncodeDataSpecification( slice, slice, sliceminPatch[islice], slicemaxPatch[islice] );
+ bd.fDataType = GetOutputDataType();
+ outputBlocks.push_back( bd );
+ size += mySize;
+ }
+ }
}
- size = mySize;
+
+ //No longer needed
+ delete[] clusterData;
+ //These are only temporary pointers to the output and no longer needed
+ delete[] sliceOutput;
timer.Stop();
// Set log level to "Warning" for on-line system monitoring
int hz = ( int ) ( fFullTime > 1.e-10 ? fNEvents / fFullTime : 100000 );
int hz1 = ( int ) ( fRecoTime > 1.e-10 ? fNEvents / fRecoTime : 100000 );
- HLTInfo( "CATracker slice %d: output %d tracks; input %d clusters, patches %d..%d, rows %d..%d; time: full %d / reco %d Hz",
- slice, ntracks, nClustersTotal, minPatch, maxPatch, row[0], row[1], hz, hz1 );
+ //Min and Max Patch are taken for first slice processed...
+ HLTInfo( "CATracker slices %d-%d: output %d tracks; input %d clusters, patches %d..%d, rows %d..%d; time: full %d / reco %d Hz",
+ minslice, maxslice, ntracks, nClustersTotalSum, sliceminPatch[0], slicemaxPatch[0], slicerow[0], slicerow[1], hz, hz1 );
return ret;
}
/** the tracker object */
AliHLTTPCCATrackerFramework* fTracker; //! transient
- AliHLTTPCCASliceOutput* fOutput;
/** magnetic field */
double fSolenoidBz; // see above
return(0);
}
+GPUhd() void AliHLTTPCCATrackerFramework::SetOutputControl( AliHLTTPCCASliceOutput::outputControlStruct* val)
+{
+ fOutputControl = val;
+ fGPUTracker.SetOutputControl(val);
+ for (int i = 0;i < fgkNSlices;i++)
+ {
+ fCPUTrackers[i].SetOutputControl(val);
+ }
+}
+
int AliHLTTPCCATrackerFramework::ProcessSlices(int firstSlice, int sliceCount, AliHLTTPCCAClusterData* pClusterData, AliHLTTPCCASliceOutput** pOutput)
{
//Process sliceCount slices starting from firstslice, in is pClusterData array, out pOutput array
else
{
#ifdef HLTCA_STANDALONE
+ if (fOutputControl->fOutputPtr && omp_get_max_threads() > 1)
+ {
+ printf("fOutputPtr must not be used with OpenMP\n");
+ return(1);
+ }
+
#pragma omp parallel for
#endif
for (int iSlice = 0;iSlice < CAMath::Min(sliceCount, fgkNSlices - firstSlice);iSlice++)
#include "AliHLTTPCCATracker.h"
#include "AliHLTTPCCAGPUTracker.h"
#include "AliHLTTPCCAParam.h"
+#include "AliHLTTPCCASliceOutput.h"
#include <iostream>
class AliHLTTPCCASliceOutput;
{
public:
AliHLTTPCCATrackerFramework() :
- fGPUTrackerAvailable(false), fUseGPUTracker(false), fGPUDebugLevel(0), fGPUSliceCount(0), fGPUTracker(), fCPUSliceCount(fgkNSlices)
+ fGPUTrackerAvailable(false), fUseGPUTracker(false), fGPUDebugLevel(0), fGPUSliceCount(0), fGPUTracker(), fOutputControl( NULL ), fCPUSliceCount(fgkNSlices)
{
- fGPUTrackerAvailable= (fGPUTracker.InitGPU(1, -1) == 0);
- fGPUSliceCount = fGPUTrackerAvailable;
- fUseGPUTracker = fGPUTrackerAvailable;
+ fUseGPUTracker = (fGPUTrackerAvailable= (fGPUTracker.InitGPU() == 0));
+ fGPUSliceCount = fGPUTrackerAvailable ? fGPUTracker.GetSliceCount() : 0;
}
- ~AliHLTTPCCATrackerFramework()
+ ~AliHLTTPCCATrackerFramework()
{}
int InitGPU(int sliceCount = 1, int forceDeviceID = -1);
int InitializeSliceParam(int iSlice, AliHLTTPCCAParam ¶m);
+ GPUhd() const AliHLTTPCCASliceOutput::outputControlStruct* OutputControl() const { return fOutputControl; }
+ GPUhd() void SetOutputControl( AliHLTTPCCASliceOutput::outputControlStruct* val);
+
int ProcessSlices(int firstSlice, int sliceCount, AliHLTTPCCAClusterData* pClusterData, AliHLTTPCCASliceOutput** pOutput);
unsigned long long int* PerfTimer(int GPU, int iSlice, int iTimer);
int fGPUSliceCount; //How many slices to process parallel
AliHLTTPCCAGPUTracker fGPUTracker;
+ AliHLTTPCCASliceOutput::outputControlStruct* fOutputControl;
+
AliHLTTPCCATracker fCPUTrackers[fgkNSlices];
int fCPUSliceCount;
ifdef ALIHLT_MLUCDIR
HLTDEFS += -Duse_logging
-EINCLUDE := $(ALIHLT_MLUCDIR)/include
-ELIBS := MLUC
-ELIBSDIR :=$(ALIHLT_MLUCDIR)/lib/tgt_$(ALICE_TARGET)
+EINCLUDE += $(ALIHLT_MLUCDIR)/include
+ELIBS += MLUC
+ELIBSDIR +=$(ALIHLT_MLUCDIR)/lib/tgt_$(ALICE_TARGET)
endif
#GPU Tracker Build for the libAliHLTTPC
ifdef NVCC
-ELIBS := cudart
-ELIBSDIR := $(NVCC:/bin/nvcc=/lib64)
-MODULE_CUHDRS := $(TRACKING_CA) tracking-ca/AliHLTTPCCAGPUTracker.h
-MODULE_CUSRCS := tracking-ca/AliHLTTPCCAGPUTracker.cu
-else
-MODULE_SRCS += tracking-ca/AliHLTTPCCAGPUTracker.cxx
+ELIBS += cudart
+ELIBSDIR += $(NVCC:/bin/nvcc=/lib64)
+MODULE_CUHDRS += $(TRACKING_CA) tracking-ca/AliHLTTPCCAGPUTracker.h
+MODULE_CUSRCS += tracking-ca/AliHLTTPCCAGPUTrackerNVCC.cu
+EDEFINE += -DBUILD_GPU
endif
CLASS_HDRS += tracking-ca/AliHLTTPCCAGPUTracker.h
include $(MODDIR)/libAliHLTTPC.conf
-MODULE_SRCS:= $(CLASS_HDRS:.h=.cxx) \
+MODULE_SRCS = $(CLASS_HDRS:.h=.cxx) \
AliHLTTPCLog.cxx
MODULE_HDRS:= $(CLASS_HDRS) \
DHDR:=$(patsubst %,TPCLib/%,$(MODULE_DHDR))
CINTAUTOLINK:= $(shell test "x$(MODULE_DHDR)" = "x" && echo 1)
-EDEFINE := ${HLTDEFS}
+EDEFINE += ${HLTDEFS}
PACKCXXFLAGS := ${HLTCXXFLAGS} -Wshadow
PACKCFLAGS := ${HLTCLFAGS}
PACKDCXXFLAGS:= ${HLTDCXXFLAGS}