2 # process QA output into plots and trending
3 # run without arguments for examples
4 # origin: Mikolaj Krzewicki, mkrzewic@cern.ch
6 if [ ${BASH_VERSINFO} -lt 4 ]; then
7 echo "bash version >= 4 needed, you have ${BASH_VERSION}, exiting..."
15 echo " ${0##*/} option=value [option=value]"
16 echo " at least inputList should be specified, or configFile containing it:"
17 echo " ${0##*/} inputList=file.list"
18 echo " options override config file (if any), e.g.:"
19 echo " ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
20 echo "some expert options"
21 echo " inputListHighPtTrees=file.list - external list of filtered trees, requires inputList to be set"
22 echo " includeDetectors=TPC,V0,MU - only process those"
23 echo " excludeDetectors=EVS,TPC - skip processing of those"
24 echo " - see example config file for more"
28 if ! parseConfig "$@"; then
33 [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
36 if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
37 alien-token-init ${alienUserName}
38 #this is a hack! alien-token init seems not enough
39 #but the gclient_env script messes up the LD_LIBRARY_PATH
42 done < <(grep -v "LD_LIBRARY_PATH" /tmp/gclient_env_${UID})
54 #be paranoid and make some full paths
55 [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
56 inputList=$(get_realpath ${inputList})
57 mkdir -p ${workingDirectory}
58 #this is a trick to get the full path of workingDirectory
59 #(on a mac 'readlink -f' does not work...)
60 workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
61 if [[ ! -d ${workingDirectory} ]]; then
62 echo "working dir $workingDirectory does not exist and cannot be created"
65 cd ${workingDirectory}
68 echo inputList=$inputList
69 echo outputDirectory=$outputDirectory
72 dateString=$(date +%Y-%m-%d-%H-%M-%S-%N)
73 echo "Start time QA process: $dateString"
76 mkdir -p $logDirectory
77 [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
78 logFile="$logDirectory/${0##*/}.${dateString}.log"
80 [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
81 echo "logFile = $logFile"
84 lockFile=${workingDirectory}/runQA.lock
85 [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" | tee ${logFile} && return 1
87 [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" | tee ${logFile} && return 1
91 ################################################################
93 for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
95 echo "##############################################"
98 [[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
99 detector=${detectorScript%.sh}
100 detector=${detector##*/}
101 #by default we expect the container in the QA root file to de named like
103 detectorQAcontainerName=${detector}
106 if [[ "${excludeDetectors}" =~ ${detector} ]]; then
107 echo "${detector} is excluded in config, skipping..."
111 #if includeDetectors set, only process thoe detectors specified there
112 if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
113 echo "${detector} not included in includeDetectors, skipping..."
117 logSummary=${logDirectory}/summary-${detector}-${dateString}.log
118 hostInfo >> ${logSummary}
119 outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
120 tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
121 if ! mkdir -p ${tmpDetectorRunDir}; then
122 echo "cannot create the temp dir $tmpDetectorRunDir"
125 cd ${tmpDetectorRunDir}
127 tmpPrefix=${tmpDetectorRunDir}/${outputDir}
128 echo "running QA for ${detector}"
129 echo " outputDir=$outputDir"
130 echo " tmpPrefix=$tmpPrefix"
132 #source the detector script
133 #unset the detector functions from previous iterations (detectors)
135 unset -f runLevelQAouter
136 unset -f periodLevelQA
137 unset -f runLevelEventStatQA
138 unset -f runLevelHighPtTreeQA
139 unset -f periodLevelHighPtTreeQA
140 source ${detectorScript}
142 #################################################################
143 #produce the QA and trending tree for each file (run)
144 unset arrOfTouchedProductions
145 declare -A arrOfTouchedProductions
146 while read inputFile; do
150 #first check if input file exists
151 [[ ! -f ${inputFile%\#*} ]] && echo "file ${inputFile%\#*} not accessible" && continue
153 if ! guessRunData ${inputFile}; then
154 echo "could not guess run data from ${inputFile}"
157 echo "anchorYear for ${originalPeriod} is: ${anchorYear}"
159 tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
160 tmpRunDir=${tmpProductionDir}/000${runNumber}
161 mkdir -p ${tmpRunDir}
164 #check what kind of input file we have, default is a zip archive
165 #set the inputs accordingly
170 #it is possible we get the highPt trees from somewhere else
171 #search the list of high pt trees for the proper run number
172 if [[ -n ${inputListHighPtTrees} ]]; then
173 highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
174 echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
176 #if we are explicit about the input file this takes precedence
177 #over earlier additions
178 [[ "${inputFile}" =~ QAresults.root$ ]] && qaFile=${inputFile}
179 [[ "${inputFile}" =~ QAresults_outer.root$ ]] && qaFileOuter=${inputFile}
180 [[ "${inputFile}" =~ FilterEvents_Trees.root$ ]] && highPtTree=${inputFile}
181 [[ "${inputFile}" =~ event_stat.root$ ]] && eventStatFile=${inputFile}
182 if [[ "${inputFile}" =~ \.zip$ ]]; then
183 [[ -z ${qaFile} ]] && qaFile=${inputFile}
184 [[ -z ${qaFileOuter} ]] && qaFileOuter=${inputFile}
185 [[ -z ${highPtTree} ]] && highPtTree=${inputFile}
186 [[ -z ${eventStatFile} ]] && eventStatFile=${inputFile}
189 #if we have zip archives in the input, extract the proper file name
190 #from the archive and append in a root-like fashion
191 if [[ "$qaFile" =~ .*.zip$ ]]; then
192 if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
193 qaFile+="#QAresults.root"
194 elif unzip -l ${qaFile} | egrep "QAresults_barrel.root" &>/dev/null; then
195 qaFile+="#QAresults_barrel.root"
200 if [[ "$qaFileOuter" =~ .*.zip$ ]]; then
201 if unzip -l ${qaFileOuter} | egrep "QAresults_outer.root" &>/dev/null; then
202 qaFileOuter+="#QAresults_outer.root"
207 if [[ "$highPtTree" =~ .*.zip$ ]]; then
208 if unzip -l ${highPtTree} | egrep "FilterEvents_Trees.root" &>/dev/null; then
209 highPtTree+="#FilterEvents_Trees.root"
214 if [[ "${eventStatFile}" =~ .*.zip$ ]]; then
215 if unzip -l ${eventStatFile} | egrep "event_stat.root" &>/dev/null; then
216 eventStatFile+="#event_stat.root"
217 elif unzip -l ${eventStatFile} | egrep "event_stat_barrel.root" &>/dev/null; then
218 eventStatFile+="#event_stat_barrel.root"
225 echo qaFileOuter=$qaFileOuter
226 echo highPtTree=$highPtTree
227 echo eventStatFile=$eventStatFile
228 echo ocdbStorage=${ocdbStorage}
231 #standard QA based on QAresults.root file (and variants)
232 if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
233 echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
234 ( runLevelQA "${qaFile}" ) &>> runLevelQA.log
235 #cache the touched production + an example file to guarantee consistent run data parsing
236 arrOfTouchedProductions[${tmpProductionDir}]="${inputFile%\#*}"
238 #standard QA based on QAresults_outer.root file (there in cpass, with different triggers)
239 if [[ -n ${qaFileOuter} && $(type -t runLevelQAouter) =~ "function" ]]; then
240 echo running ${detector} runLevelQAouter for run ${runNumber} from ${qaFileOuter}
241 ( runLevelQAouter "${qaFileOuter}" ) &>> runLevelQA.log
242 #cache the touched production + an example file to guarantee consistent run data parsing
243 arrOfTouchedProductions[${tmpProductionDir}]="${inputFile%\#*}"
245 #expert QA based on high pt trees
246 if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
247 echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
248 ( runLevelHighPtTreeQA "${highPtTree}" ) &>> runLevelQA.log
249 #cache the touched production + an example file to guarantee consistent run data parsing
250 arrOfTouchedProductions[${tmpProductionDir}]="${inputFile%\#*}"
252 #event stat QA based on event_stat.root file
253 if [[ -n ${eventStatFile} && $(type -t runLevelEventStatQA) =~ "function" ]]; then
254 echo running ${detector} runLevelEventStatQA for run ${runNumber} from ${eventStatFile}
255 ( runLevelEventStatQA "${eventStatFile}" ) &>> runLevelQA.log
256 #cache the touched production + an example file to guarantee consistent run data parsing
257 arrOfTouchedProductions[${tmpProductionDir}]="${inputFile%\#*}"
260 #perform some default actions:
261 #if trending.root not created, create a default one
262 if [[ ! -f trending.root ]]; then
263 aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detectorQAcontainerName}\",\"trending.root\",\"trending\",\"recreate\")" 2>&1 | tee -a runLevelQA.log
265 if [[ ! -f trending.root ]]; then
266 echo "trending.root not created"
269 cd ${tmpDetectorRunDir}
273 #################################################################
274 #cache which productions were (re)done
275 echo "list of processed productions:"
276 echo " ${!arrOfTouchedProductions[@]}"
279 #################################################################
280 #(re)do the merging/trending
281 for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
282 cd ${tmpProductionDir}
284 echo "running period level stuff in ${tmpProductionDir}"
287 productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
288 echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
290 mkdir -p ${productionDir}
291 if [[ ! -d ${productionDir} ]]; then
292 echo "cannot make productionDir $productionDir" && continue
295 #move runs to final destination
296 for dir in ${tmpProductionDir}/000*; do
298 oldRunDir=${outputDir}/${dir#${tmpPrefix}}
299 if ! guessRunData "${arrOfTouchedProductions[${tmpProductionDir}]}"; then
300 echo "could not guess run data from ${arrOfTouchedProductions[${tmpProductionDir}]}"
304 #before moving - VALIDATE!!!
305 if ! validate ${dir}; then
309 #moving a dir is an atomic operation, no locking necessary
310 if [[ -d ${oldRunDir} ]]; then
311 echo "removing old ${oldRunDir}"
314 echo "moving new ${runNumber} to ${productionDir}"
315 mv -f ${dir} ${productionDir}
318 #go to a temp dir to do the period level stuff in a completely clean dir
319 tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
321 echo tmpPeriodLevelQAdir="${tmpPeriodLevelQAdir}"
322 if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
323 cd ${tmpPeriodLevelQAdir}
325 #link the final list of per-run dirs here, just the dirs
326 #to have a clean working directory
328 declare -a linkedStuff
329 for x in ${productionDir}/000*; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##*/}); done
331 #merge trending files if any
332 if /bin/ls 000*/trending.root &>/dev/null; then
333 hadd trending.root 000*/trending.root &> periodLevelQA.log
336 #run the period level trending/QA
337 if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
338 echo running ${detector} periodLevelQA for production ${period}/${pass}
339 ( periodLevelQA trending.root ) &>> periodLevelQA.log
341 echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
344 if ! validate ${PWD}; then continue; fi
346 #here we are validated so move the produced QA to the final place
347 #clean up linked stuff first
348 [[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
349 periodLevelLock=${productionDir}/runQA.lock
350 if [[ ! -f ${periodLevelLock} ]]; then
351 #some of the output could be a directory, so handle that
352 #TODO: maybe use rsync?
353 #lock to avoid conflicts:
354 echo "${HOSTNAME} ${dateString}" > ${periodLevelLock}
355 for x in ${tmpPeriodLevelQAdir}/*; do
356 if [[ -d ${x} ]]; then
357 echo "removing ${productionDir}/${x##*/}"
358 rm -rf ${productionDir}/${x##*/}
359 echo "moving ${x} to ${productionDir}"
360 mv ${x} ${productionDir}
362 if [[ -f ${x} ]]; then
363 echo "moving ${x} to ${productionDir}"
364 mv -f ${x} ${productionDir}
367 rm -f ${periodLevelLock}
369 rm -rf ${tmpPeriodLevelQAdir}
371 echo "ERROR: cannot move to destination" >> ${logSummary}
372 echo "production dir ${productionDir} locked!" >> ${logSummary}
373 echo "check and maybe manually do:" >> ${logSummary}
374 echo " rm ${periodLevelLock}" >> ${logSummary}
375 echo " rsync -av ${tmpPeriodLevelQAdir}/ ${productionDir}/" >> ${logSummary}
381 cd ${workingDirectory}
383 if [[ -z ${planB} ]]; then
385 echo removing ${tmpDetectorRunDir}
386 rm -rf ${tmpDetectorRunDir}
390 done #end of detector loop
399 #in case of emergency
400 #first check if we have the email of the detector expert defined,
401 #if yes, append to the mailing list
402 local mailTo=${MAILTO}
403 local detExpertEmailVar="MAILTO_${detector}"
404 [[ -n "${!detExpertEmailVar}" ]] && mailTo+=" ${!detExpertEmailVar}"
405 if [[ -n ${mailTo} ]]; then
407 echo "trouble detected, sending email to ${mailTo}"
408 cat ${logSummary} | mail -s "${detector} QA in need of assistance" ${mailTo}
415 summarizeLogs ${1} >> ${logSummary}
417 if [[ ${logStatus} -ne 0 ]]; then
418 echo "WARNING not validated: ${1}"
428 [[ ! -d ${dir} ]] && dir=${PWD}
430 #print a summary of logs
439 for log in ${dir}/${logFiles[*]}; do
440 [[ ! -f ${log} ]] && continue
441 errorSummary=$(validateLog ${log})
443 [[ validationStatus -ne 0 ]] && logstatus=1
444 if [[ ${validationStatus} -eq 0 ]]; then
445 #in pretend mode randomly report an error in rec.log some cases
446 if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
447 [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${log} BAD random error" || echo "${log} OK"
451 elif [[ ${validationStatus} -eq 1 ]]; then
452 echo "${log} BAD ${errorSummary}"
453 elif [[ ${validationStatus} -eq 2 ]]; then
454 echo "${log} OK MWAH ${errorSummary}"
462 gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
463 done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
474 'error while loading shared libraries'
477 'Thread [0-9]* (Thread'
480 '\.C.*error:.*\.h: No such file'
482 'Interpreter error recovered'
490 local errorSummary=""
491 local warningSummary=""
493 for ((i=0; i<${#errorConditions[@]};i++)); do
494 local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
495 [[ -n ${tmp} ]] && tmp+=" : "
499 for ((i=0; i<${#warningConditions[@]};i++)); do
500 local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
501 [[ -n ${tmp} ]] && tmp+=" : "
502 warningSummary+=${tmp}
505 if [[ -n ${errorSummary} ]]; then
506 echo "${errorSummary}"
510 if [[ -n ${warningSummary} ]]; then
511 echo "${warningSummary}"
524 #where to search for qa files
527 workingDirectory="${PWD}"
528 #where to place the final qa plots
529 #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
530 outputDirectory="${workingDirectory}/%DET"
531 #filter out detector option
532 excludeDetectors="EXAMPLE"
534 logDirectory=${workingDirectory}/logs
538 #MAILTO="fbellini@cern.ch"
546 #first, check if the config file is configured
547 #is yes - source it so that other options can override it
549 for opt in "${args[@]}"; do
550 if [[ ${opt} =~ configFile=.* ]]; then
552 [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
553 echo "using config file: ${configFile}"
554 source "${configFile}"
559 #then, parse the options as they override the options from file
560 for opt in "${args[@]}"; do
561 if [[ ! "${opt}" =~ .*=.* ]]; then
562 echo "badly formatted option ${var}, should be: option=value, stopping..."
565 local var="${opt%%=*}"
566 local value="${opt#*=}"
567 echo "${var}=${value}"
568 export ${var}="${value}"
575 #guess the period from the path, pick the rightmost one
580 legoTrainRunNumber=""
589 declare -a path=( $1 )
591 local dirDepth=$(( ${#path[*]}-1 ))
593 for ((x=${dirDepth};x>=0;x--)); do
595 [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
596 local field=${path[${x}]}
597 local fieldNext=${path[$((x+1))]}
599 [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
600 [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
601 [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*} && originalPeriod=${field}
602 [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
603 [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
604 [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
605 [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
609 [[ -n ${shortRunNumber} && "${legoTrainRunNumber}" =~ ${shortRunNumber} ]] && legoTrainRunNumber=""
610 [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
611 [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber} && originalPass="" #for MC not from lego, the runnumber is identified as lego train number, thus needs to be nulled
612 [[ -n ${legoTrainRunNumber} ]] && pass+="_lego${legoTrainRunNumber}"
614 #modify the OCDB: set the year
615 if [[ ${dataType} =~ sim ]]; then
616 anchorYear=$(run2year $runNumber)
617 if [[ -z "${anchorYear}" ]]; then
618 echo "WARNING: anchorYear not available for this production: ${originalPeriod}, runNumber: ${runNumber}. Cannot set the OCDB."
621 ocdbStorage=$(setYear ${anchorYear} ${ocdbStorage})
623 ocdbStorage=$(setYear ${year} ${ocdbStorage})
626 #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
627 if [[ -z ${runNumber} ]]
639 #for a given run print the year.
640 #the run-year table is ${runMap} (a string)
641 #defined in the config file
642 #one line per year, format: year runMin runMax
644 [[ -z ${run} ]] && return 1
648 while read year runMin runMax; do
649 [[ -z ${year} || -z ${runMin} || -z ${runMax} ]] && continue
650 [[ ${run} -ge ${runMin} && ${run} -le ${runMax} ]] && echo ${year} && break
651 done < <(echo "${runMap}")
655 substituteDetectorName()
659 [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
660 [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
669 if cd "$(echo "${1%/*}")" &>/dev/null
671 # file *may* not be local
672 # exception is ./file.ext
673 # try 'cd .; cd -;' *works!*
677 # file *must* be local
681 # file *cannot* exist
684 # reassemble realpath
685 echo "$tmppwd"/"${1##*/}"
692 # ${1} - year to be set
693 # ${2} - where to set the year
694 local year1=$(guessYear ${1})
695 local year2=$(guessYear ${2})
697 [[ ${year1} -ne ${year2} && -n ${year2} && -n ${year1} ]] && path=${2/\/${year2}\//\/${year1}\/}
704 #guess the year from the path, pick the rightmost one
706 declare -a pathArray=( ${1} )
709 for field in ${pathArray[@]}; do
710 [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
718 # Hallo world - Print AliRoot/Root/Alien system info
724 echo --------------------------------------
728 echo HOSTINFO HOSTNAME" "$HOSTNAME
729 echo HOSTINFO DATE" "`date`
730 echo HOSTINFO gccpath" "`which gcc`
731 echo HOSTINFO gcc version" "`gcc --version | grep gcc`
732 echo --------------------------------------
737 echo --------------------------------------
741 echo ROOTINFO ROOT" "`which root`
742 echo ROOTINFO VERSION" "`root-config --version`
744 echo --------------------------------------
750 echo --------------------------------------
754 echo ALIROOTINFO ALIROOT" "`which aliroot`
755 echo ALIROOTINFO VERSION" "`echo $ALICE_LEVEL`
756 echo ALIROOTINFO TARGET" "`echo $ALICE_TARGET`
758 echo --------------------------------------
763 #echo --------------------------------------
766 #for a in `alien --printenv`; do echo ALIENINFO $a; done
768 #echo --------------------------------------