]> git.uio.no Git - u/mrichter/AliRoot.git/blame - PWGPP/QA/scripts/runQA.sh
remove debug message
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
CommitLineData
f7707400 1#!/bin/bash
2main()
3{
0c9bef99 4 if [[ -z $1 ]]; then
0adcf494 5 echo "Usage: "
6 echo " ${0##*/} option=value [option=value]"
161409c5 7 echo " at least inputList should be specified, or configFile containing it:"
8 echo " ${0##*/} inputList=file.list"
0adcf494 9 echo " options override config file (if any), e.g.:"
10 echo " ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
2c46f154 11 return 1
93e1d132 12 fi
0c9bef99 13
0adcf494 14 if ! parseConfig $@; then
15 ${0}
16 return 1
17 fi
0c9bef99 18
2c46f154 19 [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
93e1d132 20
21 ocdbregex='raw://'
2c46f154 22 if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
2c46f154 23 alien-token-init
24 fi
0c9bef99 25
0adcf494 26 updateQA $@
f7707400 27}
28
0c9bef99 29updateQA()
f7707400 30{
31 umask 0002
0adcf494 32 parseConfig $@
33
34 #be paranoid and make some full paths
0adcf494 35 [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
7f49f812 36 inputList=$(get_realpath ${inputList})
0adcf494 37 mkdir -p ${workingDirectory}
7f49f812 38 workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
0adcf494 39 if [[ ! -d ${workingDirectory} ]]; then
40 echo "working dir $workingDirectory does not exist and cannot be created"
41 return 1
42 fi
43 cd ${workingDirectory}
44
45 echo JOB config:
46 echo inputList=$inputList
47 echo outputDirectory=$outputDirectory
48 echo
f7707400 49
93e1d132 50 dateString=$(date +%Y-%m-%d-%H-%M)
51 echo "Start time QA process: $dateString"
52
9cc5d265 53 #logging
54 mkdir -p $logDirectory
2c46f154 55 [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
93e1d132 56 logFile="$logDirectory/${0##*/}.${dateString}.log"
9cc5d265 57 touch ${logFile}
2c46f154 58 [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
93e1d132 59 echo "logFile = $logFile"
9cc5d265 60
61 #check lock
62 lockFile=${logDirectory}/runQA.lock
bf4e7ceb 63 [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" | tee ${logFile} && return 1
9cc5d265 64 touch ${lockFile}
bf4e7ceb 65 [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" | tee ${logFile} && return 1
9cc5d265 66
bf4e7ceb 67 exec &>${logFile}
68
0c9bef99 69 ################################################################
70 #ze detector loop
f7707400 71 for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
bf4e7ceb 72 unset planB
73 [[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
f7707400 74 detector=${detectorScript%.sh}
75 detector=${detector##*/}
2c46f154 76
77 #skip if excluded
c1386fec 78 if [[ "${excludeDetectors}" =~ ${detector} ]]; then
79 echo "${detector} is excluded in config, skipping..."
80 continue
81 fi
2c46f154 82
161409c5 83 #if includeDetectors set, only process thoe detectors specified there
84 if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
85 echo "${detector} not included in includeDetectors, skipping..."
86 continue
87 fi
88
2c46f154 89 logSummary=${logDirectory}/summary-${detector}-${dateString}.log
f7707400 90 outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
bf4e7ceb 91 tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
8e7a87c1 92 if ! mkdir -p ${tmpDetectorRunDir}; then
93 echo "cannot create the temp dir $tmpDetectorRunDir"
9cc5d265 94 continue
95 fi
8e7a87c1 96 cd ${tmpDetectorRunDir}
9cc5d265 97
8e7a87c1 98 tmpPrefix=${tmpDetectorRunDir}/${outputDir}
9a5f8a19 99 echo
100 echo "##############################################"
93e1d132 101 echo "running QA for ${detector}"
102 echo " outputDir=$outputDir"
103 echo " tmpPrefix=$tmpPrefix"
f7707400 104
105 unset -f runLevelQA
106 unset -f periodLevelQA
645f513c 107 unset -f runLevelHighPtTreeQA
108 unset -f periodLevelHighPtTreeQA
f7707400 109 source ${detectorScript}
110
111 #################################################################
112 #produce the QA and trending tree for each file (run)
2a6472ef 113 unset arrOfTouchedProductions
114 declare -A arrOfTouchedProductions
f7707400 115 while read qaFile; do
2c46f154 116 echo
117
0adcf494 118 if ! guessRunData ${qaFile}; then
119 echo "could not guess run data from ${qaFile}"
120 continue
121 fi
f7707400 122
2a6472ef 123 tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
2a6472ef 124 tmpRunDir=${tmpProductionDir}/000${runNumber}
125 mkdir -p ${tmpRunDir}
126 cd ${tmpRunDir}
f7707400 127
645f513c 128 #by default we expect to have everything in the same archive
129 highPtTree=${qaFile}
130
131 #maybe the input is not an archive, but a file
132 [[ "${qaFile}" =~ "QAresults.root" ]] && highPtTree=""
133 [[ "${qaFile}" =~ "FilterEvents_Trees.root" ]] && qaFile=""
f7707400 134
645f513c 135 #it is possible we get the highPt trees from somewhere else
136 #search the list of high pt trees for the proper run number
137 if [[ -n ${inputListHighPtTrees} ]]; then
645f513c 138 highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
f8619ac0 139 echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
645f513c 140 fi
141
142 echo qaFile=$qaFile
143 echo highPtTree=$highPtTree
144
145 #what if we have a zip archive?
146 if [[ "$qaFile" =~ .*.zip$ ]]; then
147 if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
148 qaFile="${qaFile}#QAresults.root"
149 else
150 qaFile=""
151 fi
152 fi
153 if [[ "$highPtTree" =~ .*.zip$ ]]; then
154 if unzip -l ${highPtTree} | egrep "FilterEvents_Trees.root" &>/dev/null; then
155 highPtTree="${highPtTree}#FilterEvents_Trees.root"
156 else
157 highPtTree=""
158 fi
de540baf 159 fi
645f513c 160
161 if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
162 echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
163 runLevelQA "${qaFile}" &> runLevelQA.log
164 #perform some default actions:
165 #if trending.root not created, create a default one
166 if [[ ! -f trending.root ]]; then
167 aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" &>> runLevelQA.log
168 fi
169 arrOfTouchedProductions[${tmpProductionDir}]=1
170 fi
171 #expert QA based on high pt trees
172 if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
173 echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
174 runLevelHighPtTreeQA "${highPtTree}" &> runLevelHighPtTreeQA.log
175 arrOfTouchedProductions[${tmpProductionDir}]=1
176 fi
177
8e7a87c1 178 cd ${tmpDetectorRunDir}
f7707400 179
180 done < ${inputList}
181
182 #################################################################
183 #cache which productions were (re)done
93e1d132 184 echo "list of processed productions:"
2a6472ef 185 echo " ${!arrOfTouchedProductions[@]}"
93e1d132 186 echo
2a6472ef 187
f7707400 188 #################################################################
bf4e7ceb 189 #(re)do the merging/trending
2a6472ef 190 for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
bf4e7ceb 191 cd ${tmpProductionDir}
2c46f154 192 echo
193 echo "running period level stuff in ${tmpProductionDir}"
f7707400 194
f7707400 195 productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
bf4e7ceb 196 echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
2c46f154 197
f7707400 198 mkdir -p ${productionDir}
9cc5d265 199 if [[ ! -d ${productionDir} ]]; then
93e1d132 200 echo "cannot make productionDir $productionDir" && continue
9cc5d265 201 fi
f7707400 202
bf4e7ceb 203 #move runs to final destination
204 for dir in ${tmpProductionDir}/000*; do
205 echo
93e1d132 206 oldRunDir=${outputDir}/${dir#${tmpPrefix}}
0adcf494 207 if ! guessRunData "${dir}/dummyName"; then
208 echo "could not guess run data from ${dir}"
209 continue
210 fi
2c46f154 211
212 #before moving - VALIDATE!!!
bf4e7ceb 213 if ! validate ${dir}; then
214 continue
215 fi
2c46f154 216
93e1d132 217 if [[ -d ${oldRunDir} ]]; then
50864a2d 218 echo "removing old ${oldRunDir}"
93e1d132 219 rm -rf ${oldRunDir}
220 fi
2c46f154 221 echo "moving new ${runNumber} to ${productionDir}"
93e1d132 222 mv -f ${dir} ${productionDir}
223 done
033dacdc 224
bf4e7ceb 225 #go to a temp dir to do the period level stuff
226 tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
227 echo
228 echo tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
229 if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
230 cd ${tmpPeriodLevelQAdir}
231
232 #link the final list of per-run dirs here, just the dirs
233 #to have a clean working directory
234 unset linkedStuff
235 declare -a linkedStuff
236 for x in ${productionDir}/000*; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##*/}); done
bf4e7ceb 237
8e7a87c1 238 #merge trending files if any
239 if /bin/ls 000*/trending.root &>/dev/null; then
240 hadd trending.root 000*/trending.root &> periodLevelQA.log
645f513c 241 fi
242
bf4e7ceb 243 #run the period level trending/QA
645f513c 244 if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
245 echo running ${detector} periodLevelQA for production ${period}/${pass}
8e7a87c1 246 periodLevelQA trending.root &>> periodLevelQA.log
645f513c 247 else
248 echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
8e7a87c1 249 fi
250
2c46f154 251 if ! validate ${PWD}; then continue; fi
2c46f154 252
bf4e7ceb 253 #here we are validated so move the produced QA to the final place
254 #clean up linked stuff first
255 [[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
256 #some of the output could be a directory, so handle that
257 #TODO: maybe use rsync?
258 for x in ${tmpPeriodLevelQAdir}/*; do
259 if [[ -d ${x} ]]; then
260 echo "removing ${productionDir}/${x##*/}"
261 rm -rf ${productionDir}/${x##*/}
262 echo "moving ${x} to ${productionDir}"
263 mv ${x} ${productionDir}
264 fi
265 if [[ -f ${x} ]]; then
266 echo "moving ${x} to ${productionDir}"
267 mv -f ${x} ${productionDir}
268 fi
269 done
270
271 #remove the temp dir
272 rm -rf ${tmpPeriodLevelQAdir}
f7707400 273
274 done
275
9cc5d265 276 cd ${workingDirectory}
2c46f154 277
278 if [[ -z ${planB} ]]; then
9a5f8a19 279 echo
8e7a87c1 280 echo removing ${tmpDetectorRunDir}
281 rm -rf ${tmpDetectorRunDir}
2c46f154 282 else
283 executePlanB
284 fi
f7707400 285 done
286
9cc5d265 287 #remove lock
288 rm -f ${lockFile}
f7707400 289}
290
2c46f154 291executePlanB()
292{
293 #in case of emergency
294 if [[ -n ${MAILTO} ]]; then
295 echo
296 echo "trouble detected, sending email to ${MAILTO}"
297
298 cat ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
299 fi
300}
301
302validate()
303{
304 summarizeLogs ${1} >> ${logSummary}
305 logStatus=$?
306 if [[ ${logStatus} -ne 0 ]]; then
307 echo "WARNING not validated: ${1}"
308 planB=1
309 return 1
310 fi
311 return 0
312}
313
314summarizeLogs()
315{
316 local dir=$1
2a6472ef 317 [[ ! -d ${dir} ]] && dir=${PWD}
2c46f154 318
319 #print a summary of logs
320 logFiles=(
321 "*.log"
322 "stdout"
323 "stderr"
324 )
325
326 #check logs
327 local logstatus=0
328 for log in ${dir}/${logFiles[*]}; do
329 finallog=${PWD%/}/${log}
330 [[ ! -f ${log} ]] && continue
331 errorSummary=$(validateLog ${log})
332 validationStatus=$?
333 [[ validationStatus -ne 0 ]] && logstatus=1
334 if [[ ${validationStatus} -eq 0 ]]; then
335 #in pretend mode randomly report an error in rec.log some cases
336 if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
337 [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
338 else
339 echo "${finallog} OK"
340 fi
341 elif [[ ${validationStatus} -eq 1 ]]; then
342 echo "${finallog} BAD ${errorSummary}"
343 elif [[ ${validationStatus} -eq 2 ]]; then
344 echo "${finallog} OK MWAH ${errorSummary}"
345 fi
346 done
347
348 #report core files
349 while read x; do
350 echo ${x}
351 chmod 644 ${x}
352 gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
353 done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
354
355 return ${logstatus}
356}
357
358validateLog()
359{
360 log=${1}
361 errorConditions=(
362 'There was a crash'
363 'floating'
364 'error while loading shared libraries'
365 'std::bad_alloc'
366 's_err_syswatch_'
367 'Thread [0-9]* (Thread'
368 'AliFatal'
369 'core dumped'
370 '\.C.*error:.*\.h: No such file'
371 'segmentation'
645f513c 372 'Interpreter error recovered'
2c46f154 373 )
374
375 warningConditions=(
376 'This is serious'
377 )
378
379 local logstatus=0
380 local errorSummary=""
381 local warningSummary=""
382
383 for ((i=0; i<${#errorConditions[@]};i++)); do
384 local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
385 [[ -n ${tmp} ]] && tmp+=" : "
386 errorSummary+=${tmp}
387 done
388
389 for ((i=0; i<${#warningConditions[@]};i++)); do
390 local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
391 [[ -n ${tmp} ]] && tmp+=" : "
392 warningSummary+=${tmp}
393 done
394
395 if [[ -n ${errorSummary} ]]; then
396 echo "${errorSummary}"
397 return 1
398 fi
399
400 if [[ -n ${warningSummary} ]]; then
401 echo "${warningSummary}"
402 return 2
403 fi
404
405 return 0
406}
407
f7707400 408parseConfig()
409{
0adcf494 410 #config file
411 configFile=""
412 #where to search for qa files
413 inputList=file.list
414 #working directory
415 workingDirectory="${PWD}"
416 #where to place the final qa plots
417 #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
418 outputDirectory="${workingDirectory}/%DET"
419 #filter out detector option
420 excludeDetectors="EXAMPLE"
421 #logs
422 logDirectory=${workingDirectory}/logs
0adcf494 423 #OCDB storage
424 #ocdbStorage="raw://"
425 #email to
426 #MAILTO="fbellini@cern.ch"
427
428 #first, check if the config file is configured
429 #is yes - source it so that other options can override it
430 #if any
431 for opt in $@; do
432 if [[ ${opt} =~ configFile=.* ]]; then
433 eval "${opt}"
7f49f812 434 [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
0adcf494 435 source "${configFile}"
436 break
437 fi
438 done
f7707400 439
440 #then, parse the options as they override the options from file
441 while [[ -n ${1} ]]; do
442 local var=${1#--}
0adcf494 443 if [[ ${var} =~ .*=.* ]]; then
444 eval "${var}"
445 else
446 echo "badly formatted option ${var}, should be: option=value, stopping..."
447 return 1
448 fi
f7707400 449 shift
450 done
451}
452
453guessRunData()
454{
455 #guess the period from the path, pick the rightmost one
456 period=""
457 runNumber=""
458 year=""
459 pass=""
460 legoTrainRunNumber=""
461 dataType=""
462
463 local shortRunNumber=""
464 local IFS="/"
465 declare -a path=( $1 )
466 local dirDepth=$(( ${#path[*]}-1 ))
467 i=0
468 for ((x=${dirDepth};x>=0;x--)); do
469
470 [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
471 local field=${path[${x}]}
472 local fieldNext=${path[$((x+1))]}
473
474 [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
475 [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
476 [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
477 [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
478 [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
479 [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
480 [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
481 (( i++ ))
482 done
483 [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
484 [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
0c9bef99 485
2a6472ef 486 #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
487 if [[ -z ${runNumber}} ]];
0c9bef99 488 then
489 #error condition
490 return 1
491 else
492 #ALL OK
493 return 0
494 fi
f7707400 495}
496
497substituteDetectorName()
498{
499 local det=$1
500 local dir=$2
501 [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
502 [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
503}
504
7f49f812 505get_realpath()
506{
507 if [[ -f "$1" ]]
508 then
509 # file *must* exist
510 if cd "$(echo "${1%/*}")" &>/dev/null
511 then
512 # file *may* not be local
513 # exception is ./file.ext
514 # try 'cd .; cd -;' *works!*
515 local tmppwd="$PWD"
516 cd - &>/dev/null
517 else
518 # file *must* be local
519 local tmppwd="$PWD"
520 fi
521 else
522 # file *cannot* exist
523 return 1 # failure
524 fi
525 # reassemble realpath
526 echo "$tmppwd"/"${1##*/}"
527 return 0 # success
528}
529
f7707400 530main $@