#!/bin/bash
#include benchmark.config
+# blame: Mikolaj Krzewicki, mkrzewic@cern.ch
# this script runs the CPass0/CPass1 train
# produced OCDB updates are local
jobindex=${7}
shift 7
if ! parseConfig ${configFile} "$@"; then return 1; fi
-
+ echo Start: goCPass0
#record the working directory provided by the batch system
batchWorkingDirectory=${PWD}
#runCPassX/C expects the raw chunk to be linked in the run dir
#despite it being accessed by the full path
- ln -s ${infile} ${runpath}/${chunkName}
+ if [[ $copyInputData == 0 ]]; then
+ ln -s ${infile} ${runpath}/${chunkName}
+ else
+ copyFileToLocal ${infile} ${runpath}/${chunkName}
+ fi
#####MC
if [[ -n ${generateMC} ]]; then
fi
######
- if [[ ! -f ${inputList} && -z ${pretend} ]]; then
+ if [[ "${inputList}" == "${inputList%%://*}" && ! -f "${inputList}" && -z ${pretend} ]]; then
touch ${doneFileTmp}
echo "input file ${inputList} not found, exiting..." >> ${doneFileTmp}
cp "$doneFileTmp" "$doneFile" || rm -f "$doneFileTmp" "$doneFile"
echo
chmod u+x runCPass0.sh
+ #remove spaces from around arguments to root macros
+ #for example this sometimes fails:
+ # root 'macro.C(argument1, argument2)'
sed -i '/.*root .*\.C/ s|\s*,\s*|,|g' *.sh
if [[ -n ${postSetUpActionCPass0} ]]; then
fi
#run CPass0
- echo "${runpath}/runCPass0.sh ${infile} ${nEvents} ${runNumber} ${ocdbPath} ${recoTriggerOptions}"
+ echo "${runpath}/runCPass0.sh /${infile} ${nEvents} ${runNumber} ${ocdbPath} ${recoTriggerOptions}"
if [[ -n ${pretend} ]]; then
sleep ${pretendDelay}
touch AliESDs.root
touch rec.log
touch calib.log
else
- echo ./runCPass0.sh "${infile}" "${nEvents}" "${runNumber}" "${ocdbPath}" "${recoTriggerOptions}"
- ./runCPass0.sh "${infile}" "${nEvents}" "${runNumber}" "${ocdbPath}" "${recoTriggerOptions}"
+ #caveat: in the local case, first arg must start with a slash
+ ./runCPass0.sh "/${infile}" "${nEvents}" "${runNumber}" "${ocdbPath}" "${recoTriggerOptions}"
fi
#move stuff to final destination
#validate CPass0
cd ${outputDir}
- touch ${doneFileTmp}
- echo "dir ${outputDir}" >> ${doneFileTmp}
if summarizeLogs >> ${doneFileTmp}; then
[[ -f ${outputDirMC}/galice.root ]] && echo "sim ${outputDirMC}/galice.root" >> ${doneFileTmp}
[[ -f AliESDfriends_v1.root ]] && echo "calibfile ${outputDir}/AliESDfriends_v1.root" >> ${doneFileTmp}
[[ "${runpath}" != "${outputDir}" ]] && rm -rf ${runpath} && echo "removing ${runpath}"
cp "$doneFileTmp" "$doneFile" || rm -f "$doneFileTmp" "$doneFile"
[[ -n ${removeTMPdoneFile} ]] && rm -f ${doneFileTmp}
+ echo End: goCPass0
return 0
)
shift 7
extraOpts=("$@")
if ! parseConfig ${configFile} "$@"; then return 1; fi
+ echo Start: goCPass1
#record the working directory provided by the batch system
batchWorkingDirectory=${PWD}
#Packages= ;OutputDir= ;LPMPass= ;TriggerAlias= ;LPMRunNumber= ;LPMProductionType= ;LPMInteractionType= ;LPMProductionTag= ;LPMAnchorRun= ;LPMAnchorProduction= ;LPMAnchorYear=
export PRODUCTION_METADATA="OutputDir=cpass1"
- if [[ ! -f ${inputList} && -z ${pretend} ]]; then
+ if [[ "${inputList}" == "${inputList%%://*}" && ! -f "${inputList}" && -z ${pretend} ]]; then
touch ${doneFileTmp}
echo "input file ${inputList} not found, exiting..." >> ${doneFileTmp}
cp "$doneFileTmp" "$doneFile" || rm -f "$doneFileTmp" "$doneFile"
fi
#this is needed for runCPass1.sh
- ln -s ${infile} ${runpath}/${chunkName}
+ if [[ $copyInputData == 0 ]]; then
+ ln -s ${infile} ${runpath}/${chunkName}
+ else
+ copyFileToLocal ${infile} ${runpath}/${chunkName}
+ fi
logOutputDir=${runpath}
[[ -n ${logToFinalDestination} ]] && logOutputDir=${outputDir}
/bin/ls
echo
- #remove spaces around commas from calls to root
+ #remove spaces from around arguments to root macros
+ #for example this sometimes fails:
+ # root 'macro.C(argument1, argument2)'
sed -i '/.*root .*\.C/ s|\s*,\s*|,|g' *.sh
if [[ -n ${postSetUpActionCPass1} ]]; then
#run CPass1
chmod u+x runCPass1.sh
- echo "${runpath}/runCPass1.sh ${infile} ${nEvents} ${runNumber} ${ocdbPath} ${recoTriggerOptions}"
+ echo "${runpath}/runCPass1.sh /${infile} ${nEvents} ${runNumber} ${ocdbPath} ${recoTriggerOptions}"
if [[ -n ${pretend} ]]; then
sleep ${pretendDelay}
touch AliESDs_Barrel.root
touch qa.log
touch filtering.log FilterEvents_Trees.root
else
- ./runCPass1.sh "${infile}" "${nEvents}" "${runNumber}" "${ocdbPath}" "${recoTriggerOptions}"
+ #caveat: in the local case, first arg must start with a slash
+ ./runCPass1.sh "/${infile}" "${nEvents}" "${runNumber}" "${ocdbPath}" "${recoTriggerOptions}"
[[ ! -f AliESDs_Barrel.root && -f Barrel/AliESDs.root ]] && mv Barrel/AliESDs.root AliESDs_Barrel.root
[[ ! -f AliESDfriends_Barrel.root && -f Barrel/AliESDfriends.root ]] && mv Barrel/AliESDfriends.root AliESDfriends_Barrel.root
#validate CPass1
cd ${outputDir}
- touch ${doneFileTmp}
- echo "dir ${outputDir}" >> ${doneFileTmp}
if summarizeLogs >> ${doneFileTmp}; then
[[ -f AliESDs_Barrel.root ]] && echo "esd ${outputDir}/AliESDs_Barrel.root" >> ${doneFileTmp}
[[ -f AliESDfriends_v1.root ]] && echo "calibfile ${outputDir}/AliESDfriends_v1.root" >> ${doneFileTmp}
[[ "${runpath}" != "${outputDir}" ]] && rm -rf ${runpath}
cp "$doneFileTmp" "$doneFile" || rm -f "$doneFileTmp" "$doneFile"
[[ -n ${removeTMPdoneFile} ]] && rm -f ${doneFileTmp}
+ echo End: goCPass1
return 0
)
calibrationFilesToMerge=${5} #can be a non-existent file, will then be produced on the fly
shift 5
if ! parseConfig ${configFile} "$@"; then return 1; fi
+ echo Start: goMergeCPass0
#record the working directory provided by the batch system
batchWorkingDirectory=${PWD}
[[ ${file##*/} =~ .*\.sh ]] && chmod +x ${file##*/}
done
+ #remove spaces from around arguments to root macros
+ #for example this sometimes fails:
+ # root 'macro.C(argument1, argument2)'
sed -i '/.*root .*\.C/ s|\s*,\s*|,|g' *.sh
alirootInfo > ALICE_ROOT.log
/bin/ls -1 ${outputDir}/*/AliESDfriends_v1.root 2>/dev/null > ${calibrationFilesToMerge}
fi
- echo "${mergingScript} ${calibrationFilesToMerge} ${runNumber} local://./OCDB ${ocdbStorage}"
+ echo "${mergingScript} ${calibrationFilesToMerge} ${runNumber} local://./OCDB defaultOCDB=${ocdbStorage} fileAccessMethod=nocopy"
if [[ -n ${pretend} ]]; then
sleep ${pretendDelay}
touch CalibObjects.root
echo "some calibration" >> ./OCDB/TPC/Calib/TimeGain/someCalibObject_0-999999_cpass0.root
echo "some calibration" >> ./OCDB/TPC/Calib/TimeDrift/otherCalibObject_0-999999_cpass0.root
else
- ./${mergingScript} ${calibrationFilesToMerge} ${runNumber} "local://./OCDB" ${ocdbStorage} >> "mergeMakeOCDB.log"
+ ./${mergingScript} ${calibrationFilesToMerge} ${runNumber} "local://./OCDB" defaultOCDB=${ocdbStorage} fileAccessMethod=nocopy >> "mergeMakeOCDB.log"
#produce the calib trees for expert QA (dcsTime.root)
goMakeLocalOCDBaccessConfig ./OCDB
#validate merging cpass0
cd ${outputDir}
- touch ${doneFileTmp}
- echo "dir ${outputDir}" >> ${doneFileTmp}
if summarizeLogs >> ${doneFileTmp}; then
[[ -f CalibObjects.root ]] && echo "calibfile ${outputDir}/CalibObjects.root" >> ${doneFileTmp}
[[ -f dcsTime.root ]] && echo "dcsTree ${outputDir}/dcsTime.root" >> ${doneFileTmp}
[[ "${runpath}" != "${outputDir}" ]] && rm -rf ${runpath}
cp "$doneFileTmp" "$doneFile" || rm -f "$doneFileTmp" "$doneFile"
[[ -n ${removeTMPdoneFile} ]] && rm -f ${doneFileTmp}
+ echo End: goMergeCPass0
return 0
)
filteredFilesToMerge=${7}
shift 7
if ! parseConfig ${configFile} "$@"; then return 1; fi
+ echo Start: goMergeCPass1
#record the working directory provided by the batch system
batchWorkingDirectory=${PWD}
[[ ${file##*/} =~ .*\.sh ]] && chmod +x ${file##*/}
done
+ #remove spaces from around arguments to root macros
+ #for example this sometimes fails:
+ # root 'macro.C(argument1, argument2)'
sed -i '/.*root .*\.C/ s|\s*,\s*|,|g' *.sh
#configure local OCDB storage from CPass0 (creates the localOCDBaccessConfig.C script)
echo "/bin/ls -1 ${outputDir}/*/QAresults*.root | while read x; do echo ${x%/*}; done | sort | uniq > ${qaFilesToMerge}"
/bin/ls -1 ${outputDir}/*/QAresults*.root | while read x; do echo ${x%/*}; done | sort | uniq > ${qaFilesToMerge}
fi
-
- echo "${mergingScript} ${calibrationFilesToMerge} ${runNumber} local://./OCDB ${ocdbStorage}"
+
+ echo "${mergingScript} ${calibrationFilesToMerge} ${runNumber} local://./OCDB defaultOCDB=${ocdbStorage} fileAccessMethod=nocopy"
if [[ -n ${pretend} ]]; then
sleep ${pretendDelay}
touch ocdb.log
touch ${qaMergedOutputFileName}
mkdir -p OCDB
else
- ./${mergingScript} ${calibrationFilesToMerge} ${runNumber} "local://./OCDB" ${ocdbStorage}
+ ./${mergingScript} ${calibrationFilesToMerge} ${runNumber} "local://./OCDB" defaultOCDB=${ocdbStorage} fileAccessMethod=nocopy
#merge QA (and filtered trees)
[[ -n ${AliAnalysisTaskFilteredTree_fLowPtTrackDownscaligF} ]] && export AliAnalysisTaskFilteredTree_fLowPtTrackDownscaligF
#validate merge cpass1
cd ${outputDir}
- touch ${doneFileTmp}
- echo "dir ${outputDir}" >> ${doneFileTmp}
if summarizeLogs >> ${doneFileTmp}; then
[[ -f CalibObjects.root ]] && echo "calibfile ${outputDir}/CalibObjects.root" >> ${doneFileTmp}
[[ -f ${qaMergedOutputFileName} ]] && echo "qafile ${outputDir}/${qaMergedOutputFileName}" >> ${doneFileTmp}
[[ "${runpath}" != "${outputDir}" ]] && rm -rf ${runpath}
cp "$doneFileTmp" "$doneFile" || rm -f "$doneFileTmp" "$doneFile"
[[ -n ${removeTMPdoneFile} ]] && rm -f ${doneFileTmp}
+ echo End: goMergeCPass1
return 0
)
#if which greadlink; then self=$(greadlink -f "${0}"); fi
#for reference copy the setup to the output dir
- cp ${self} ${commonOutputPath}
- cp ${configFile} ${commonOutputPath}
- cp ${inputFileList} ${commonOutputPath}
+ paranoidCp ${self} ${commonOutputPath}
+ paranoidCp ${configFile} ${commonOutputPath}
+ paranoidCp ${inputFileList} ${commonOutputPath}
#submit - use makeflow if available, fall back to old stuff when makeflow not there
if which makeflow; then
else
echo "no makeflow!"
fi
+
+ #summarize the run based on the makeflow log
+ #and add it to the end of summary log
+ awk '/STARTED/ {startTime=$3}
+ /COMPLETED/ {endTime=$3}
+ END {print "makeflow running time: "(endTime-startTime)/1000000/3600" hours"}' \
+ benchmark.makeflow.makeflowlog | tee -a summary.log
+ paranoidCp summary.log ${commonOutputPath}
+
return 0
}
"stderr"
)
+ #put dir information in the output
+ echo "dir $PWD"
+
#check logs
local logstatus=0
for log in ${logFiles[*]}; do
fi
pwd
/bin/ls
- touch ${doneFile}
summarizeLogs >> ${doneFile}
#echo mv -f * ${outputDir}
echo "submit make a summary"
echo
- submit "${JOBID6}" 1 1 "${LASTJOB}" "${alirootEnv} ${self}" MakeSummary ${configFile}
+ [[ -z ${alirootEnvQA} ]] && alirootEnvQA=$(encSpaces "${alirootEnv}")
+ submit "${JOBID6}" 1 1 "${LASTJOB}" "${alirootEnvQA} ${self}" MakeSummary ${configFile} "commonOutputPath=${commonOutputPath}"
LASTJOB=${JOBID6}
#################################################################################
(
umask 0002
[[ $# -lt 3 ]] && echo "goWaitForOutput() wrong number of arguments, exiting.." && return 1
+ echo Start:goWaitForOutput
echo searchPath=${1}
echo fileName=${2}
echo numberOfFiles=${3}
sleep 60
done
echo "DONE! exiting..."
+ echo End:goWaitForOutput
return 0
)
stackTraceTree()
(
- # make stacktrace processing in case of standard root crash log
- # input is a (list of) text files with the stack trace (either gdb aoutput
- # produced with e.g. gdb --batch --quiet -ex "bt" -ex "quit" aliroot core, or the root crash log), output is a TTree formatted table.
-# example usage:
-# benchmark.sh stackTraceTree /foo/*/rec.log
+ if [[ $# -lt 1 ]]; then
+ echo 'make stacktrace processing in case of standard root crash log'
+ echo 'input is a (list of) text files with the stack trace (either gdb aoutput'
+ echo 'produced with e.g. gdb --batch --quiet -ex "bt" -ex "quit" aliroot core,'
+ echo 'or the root crash log), output is a TTree formatted table.'
+ echo 'example usage:'
+ echo 'benchmark.sh stackTraceTree /foo/*/rec.log'
+ echo 'benchmark.sh stackTraceTree $(cat file.list)'
+ echo 'benchmark.sh stackTraceTree `cat file.list`'
+ return 0
+ fi
gawk '
BEGIN {
print "frame/I:method/C:line/C:cpass/I:aliroot/I:file/C";
# will appear in the submission dir.
#some defaults:
log="summary.log"
+ jsonLog="summary.json"
productionID="qa"
configFile=${1}
#record the working directory provided by the batch system
batchWorkingDirectory=${PWD}
- logTmp=${batchWorkingDirectory}/${log}
- logDest=${commonOutputPath}/${log}
+ logTmp="${batchWorkingDirectory}/${log}"
+ jsonLogTmp="${batchWorkingDirectory}/${jsonLog}"
[[ -f ${alirootSource} && -z ${ALICE_ROOT} ]] && source ${alirootSource}
[[ -z ${commonOutputPath} ]] && commonOutputPath=${PWD}
#copy some useful stuff
- #and go to the commonOutputPath
- cp ${configFile} ${commonOutputPath}
+ [ -f "${commonOutputPath}/${configFile}" ] || paranoidCp "${configFile}" "${commonOutputPath}"
exec &> >(tee ${logTmp})
stackTraceTree ${commonOutputPath}/*/*/000*/cpass0/*/stacktrace* > stacktrace_cpass0.tree
stackTraceTree ${commonOutputPath}/*/*/000*/cpass1/*/stacktrace* > stacktrace_cpass1.tree
+ # json header: open array of objects
+ echo '[' > "${jsonLogTmp}"
+
echo total numbers for the production:
echo
awk 'BEGIN {nFiles=0;nCore=0;}
statusQA=$(awk '/mergeMakeOCDB.log/ {print $2}' ${x/cpass0/cpass1} 2>/dev/null)
printf "%s\t ocdb.log cpass0: %s\t ocdb.log cpass1: %s\tqa.log:%s\t| cpass0: rec:%s/%s stderr:%s/%s calib:%s/%s cpass1: rec:%s/%s stderr:%s/%s calib:%s/%s QAbarrel:%s/%s QAouter:%s/%s\n" ${runNumber} ${statusOCDBcpass0} ${statusOCDBcpass1} ${statusQA} ${statusCPass0[0]} ${statusCPass0[1]} ${statusCPass0[2]} ${statusCPass0[3]} ${statusCPass0[4]} ${statusCPass0[5]} ${statusCPass1[0]} ${statusCPass1[1]} ${statusCPass1[2]} ${statusCPass1[3]} ${statusCPass1[4]} ${statusCPass1[5]} ${statusCPass1[6]} ${statusCPass1[7]} ${statusCPass1[8]} ${statusCPass1[9]}
+
+ # produce json summary
+ statusOCDBcpass0json=false
+ statusOCDBcpass1json=false
+ statusQAjson=false
+ [[ "$statusOCDBcpass0" == 'OK' ]] && statusOCDBcpass0json=true
+ [[ "$statusOCDBcpass1" == 'OK' ]] && statusOCDBcpass1json=true
+ [[ "$statusQA" == 'OK' ]] && statusQAjson=true
+ cat >> "$jsonLogTmp" <<EOF
+ {
+ run: ${runNumber},
+ status: { ocdb_pass0: ${statusOCDBcpass0json}, ocdb_pass1: ${statusOCDBcpass1json}, qa: ${statusQAjson} },
+ cpass0: {
+ reco: { n_ok: ${statusCPass0[0]}, n_bad: ${statusCPass0[1]} },
+ stderr: { n_ok: ${statusCPass0[2]}, n_bad: ${statusCPass0[3]} },
+ calib: { n_ok: ${statusCPass0[4]}, n_bad: ${statusCPass0[5]} }
+ },
+ cpass1: {
+ reco: { n_ok: ${statusCPass1[0]}, n_bad: ${statusCPass1[1]} },
+ stderr: { n_ok: ${statusCPass1[2]}, n_bad: ${statusCPass1[3]} },
+ calib: { n_ok: ${statusCPass1[4]}, n_bad: ${statusCPass1[5]} },
+ qabarrel: { n_ok: ${statusCPass1[6]}, n_bad: ${statusCPass1[7]} },
+ qarouter: { n_ok: ${statusCPass1[8]}, n_bad: ${statusCPass1[9]} }
+ }
+ },
+EOF
+
done
+ # json footer: close array of objects
+ echo ']' >> "${jsonLogTmp}"
+
#make lists with output files - QA, trending, filtering and calibration
### wait for the merging of all runs to be over ###
rm -f qa.list
#if set, email the summary
[[ -n ${MAILTO} ]] && cat ${logTmp} | mail -s "benchmark ${productionID} done" ${MAILTO}
- # Copy log to destination (delete all on failure to signal error)
- cp "$logTmp" "$logDest" || rm -f "$logTmp" "$logDest"
+ #copy logs to destination
+ paranoidCp "$logTmp" "${commonOutputPath}"
+ paranoidCp "$jsonLogTmp" "${commonOutputPath}"
#copy output files
+ exec &> >(tee fileCopy.log)
paranoidCp QAplots ${commonOutputPath}
paranoidCp *.list ${commonOutputPath}
paranoidCp *.root ${commonOutputPath}
paranoidCp *.log ${commonOutputPath}
+ paranoidCp fileCopy.log ${commonOutputPath}
return 0
)
parseConfig()
{
+ echo Start: parseConfig
configFile=${1}
shift
args=("$@")
logToFinalDestination=1
ALIROOT_FORCE_COREDUMP=1
pretendDelay=0
+ copyInputData=0
#first, source the config file
if [ -f ${configFile} ]; then
#export the aliroot function if defined to override normal behaviour
[[ $(type -t aliroot) =~ "function" ]] && export -f aliroot && echo "exporting aliroot() function..."
-
+ echo End: parseConfig
return 0
}
return 0
}
+copyFileToLocal()
+(
+ #copies a single file to a local destination: the file may either come from
+ #a local filesystem or from a remote location (whose protocol must be
+ #supported)
+ #copy is "robust" and it is repeated some times in case of failure before
+ #giving up (1 is returned in that case)
+ src="$1"
+ dst="$2"
+ ok=0
+ [[ -z "${maxCopyTries}" ]] && maxCopyTries=10
+
+ proto="${src%%://*}"
+
+ echo "copy file to local dest started: $src -> $dst"
+
+ for (( i=1 ; i<=maxCopyTries ; i++ )) ; do
+
+ echo "...attempt $i of $maxCopyTries"
+ rm -f "$dst"
+
+ if [[ "$proto" == "$src" ]]; then
+ cp "$src" "$dst"
+ else
+ case "$proto" in
+ root)
+ xrdcp -f "$src" "$dst"
+ ;;
+ http)
+ curl -L "$src" -O "$dst"
+ ;;
+ *)
+ echo "protocol not supported: $proto"
+ return 2
+ ;;
+ esac
+ fi
+
+ if [ $? == 0 ] ; then
+ ok=1
+ break
+ fi
+
+ done
+
+ if [[ "$ok" == 1 ]] ; then
+ echo "copy file to local dest OK after $i attempt(s): $src -> $dst"
+ return 0
+ fi
+
+ echo "copy file to local dest FAILED after $maxCopyTries attempt(s): $src -> $dst"
+ return 1
+)
+
paranoidCp()
(
#recursively copy files and directories
(
#copy a single file to a target in an existing dir
#repeat a few times if copy fails
+ #returns 1 on failure, 0 on success
src="${1}"
dst="${2}"
+ ok=0
[[ -d "${dst}" ]] && dst="${dst}/${src##*/}"
- [[ -z "${maxCopyTries}" ]] && maxCopyTries=5
- echo "maxCopyTries=${maxCopyTries}"
- echo "cp ${src} ${dst}"
- cp "${src}" "${dst}"
- i=0
- until cmp -s "${src}" "${dst}"; do
- echo "try: ${i}"
- [[ -f "${dst}" ]] && rm "${dst}"
- cp "${src}" "${dst}"
- [[ ${i} -gt ${maxCopyTries} ]] && ret=1 && return 1
- (( i++ ))
+ [[ -z "${maxCopyTries}" ]] && maxCopyTries=10
+
+ echo "paranoid copy started: $src -> $dst"
+
+ for (( i=1 ; i<=maxCopyTries ; i++ )) ; do
+
+ echo "...attempt $i of $maxCopyTries"
+ rm -f "$dst"
+ cp "$src" "$dst"
+
+ cmp -s "$src" "$dst"
+ if [ $? == 0 ] ; then
+ ok=1
+ break
+ fi
+
done
- return 0
+
+ if [[ "$ok" == 1 ]] ; then
+ echo "paranoid copy OK after $i attempt(s): $src -> $dst"
+ return 0
+ fi
+
+ echo "paranoid copy FAILED after $maxCopyTries attempt(s): $src -> $dst"
+ return 1
)
guessRunData()