handle bash args properly using arrays
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
CommitLineData
f7707400 1#!/bin/bash
2main()
3{
0c9bef99 4 if [[ -z $1 ]]; then
0adcf494 5 echo "Usage: "
6 echo " ${0##*/} option=value [option=value]"
161409c5 7 echo " at least inputList should be specified, or configFile containing it:"
8 echo " ${0##*/} inputList=file.list"
0adcf494 9 echo " options override config file (if any), e.g.:"
10 echo " ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
2c46f154 11 return 1
93e1d132 12 fi
0c9bef99 13
0adcf494 14 if ! parseConfig $@; then
15 ${0}
16 return 1
17 fi
0c9bef99 18
2c46f154 19 [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
93e1d132 20
21 ocdbregex='raw://'
2c46f154 22 if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
823d0979 23 alien-token-init ${alienUserName}
5ed6085c 24 #this is a hack! alien-token init seems not enough
25 #but the gclient_env script messes up the LD_LIBRARY_PATH
26 while read x; do
27 eval ${x};
28 done < <(grep -v "LD_LIBRARY_PATH" /tmp/gclient_env_${UID})
2c46f154 29 fi
0c9bef99 30
0adcf494 31 updateQA $@
f7707400 32}
33
0c9bef99 34updateQA()
f7707400 35{
36 umask 0002
0adcf494 37 parseConfig $@
38
39 #be paranoid and make some full paths
0adcf494 40 [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
7f49f812 41 inputList=$(get_realpath ${inputList})
0adcf494 42 mkdir -p ${workingDirectory}
7f49f812 43 workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
0adcf494 44 if [[ ! -d ${workingDirectory} ]]; then
45 echo "working dir $workingDirectory does not exist and cannot be created"
46 return 1
47 fi
48 cd ${workingDirectory}
49
50 echo JOB config:
51 echo inputList=$inputList
52 echo outputDirectory=$outputDirectory
53 echo
f7707400 54
93e1d132 55 dateString=$(date +%Y-%m-%d-%H-%M)
56 echo "Start time QA process: $dateString"
57
9cc5d265 58 #logging
59 mkdir -p $logDirectory
2c46f154 60 [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
93e1d132 61 logFile="$logDirectory/${0##*/}.${dateString}.log"
9cc5d265 62 touch ${logFile}
2c46f154 63 [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
93e1d132 64 echo "logFile = $logFile"
9cc5d265 65
66 #check lock
67 lockFile=${logDirectory}/runQA.lock
bf4e7ceb 68 [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" | tee ${logFile} && return 1
9cc5d265 69 touch ${lockFile}
bf4e7ceb 70 [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" | tee ${logFile} && return 1
9cc5d265 71
bf4e7ceb 72 exec &>${logFile}
73
0c9bef99 74 ################################################################
75 #ze detector loop
f7707400 76 for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
3db1b975 77 echo
78 echo "##############################################"
244ec455 79 echo $(date)
bf4e7ceb 80 unset planB
81 [[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
f7707400 82 detector=${detectorScript%.sh}
83 detector=${detector##*/}
2c46f154 84
85 #skip if excluded
c1386fec 86 if [[ "${excludeDetectors}" =~ ${detector} ]]; then
87 echo "${detector} is excluded in config, skipping..."
88 continue
89 fi
2c46f154 90
161409c5 91 #if includeDetectors set, only process thoe detectors specified there
92 if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
93 echo "${detector} not included in includeDetectors, skipping..."
94 continue
95 fi
96
2c46f154 97 logSummary=${logDirectory}/summary-${detector}-${dateString}.log
f7707400 98 outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
bf4e7ceb 99 tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
8e7a87c1 100 if ! mkdir -p ${tmpDetectorRunDir}; then
101 echo "cannot create the temp dir $tmpDetectorRunDir"
9cc5d265 102 continue
103 fi
8e7a87c1 104 cd ${tmpDetectorRunDir}
9cc5d265 105
8e7a87c1 106 tmpPrefix=${tmpDetectorRunDir}/${outputDir}
93e1d132 107 echo "running QA for ${detector}"
108 echo " outputDir=$outputDir"
109 echo " tmpPrefix=$tmpPrefix"
f7707400 110
61fa85ce 111 #unset the detector functions from previous iterations (detectors)
f7707400 112 unset -f runLevelQA
113 unset -f periodLevelQA
645f513c 114 unset -f runLevelHighPtTreeQA
115 unset -f periodLevelHighPtTreeQA
f7707400 116 source ${detectorScript}
117
118 #################################################################
119 #produce the QA and trending tree for each file (run)
2a6472ef 120 unset arrOfTouchedProductions
121 declare -A arrOfTouchedProductions
f7707400 122 while read qaFile; do
2c46f154 123 echo
244ec455 124 echo $(date)
3db1b975 125
126 #first check if input file exists
127 [[ ! -f ${qaFile%\#*} ]] && echo "file ${qaFile%\#*} not accessible" && continue
2c46f154 128
0adcf494 129 if ! guessRunData ${qaFile}; then
130 echo "could not guess run data from ${qaFile}"
131 continue
132 fi
f7707400 133
2a6472ef 134 tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
2a6472ef 135 tmpRunDir=${tmpProductionDir}/000${runNumber}
136 mkdir -p ${tmpRunDir}
137 cd ${tmpRunDir}
f7707400 138
645f513c 139 #by default we expect to have everything in the same archive
140 highPtTree=${qaFile}
141
142 #maybe the input is not an archive, but a file
3db1b975 143 [[ "${qaFile}" =~ QAresults.root$ ]] && highPtTree=""
144 [[ "${qaFile}" =~ FilterEvents_Trees.root$ ]] && qaFile=""
f7707400 145
645f513c 146 #it is possible we get the highPt trees from somewhere else
147 #search the list of high pt trees for the proper run number
148 if [[ -n ${inputListHighPtTrees} ]]; then
645f513c 149 highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
f8619ac0 150 echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
645f513c 151 fi
152
153 echo qaFile=$qaFile
154 echo highPtTree=$highPtTree
155
156 #what if we have a zip archive?
157 if [[ "$qaFile" =~ .*.zip$ ]]; then
158 if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
159 qaFile="${qaFile}#QAresults.root"
160 else
161 qaFile=""
162 fi
163 fi
164 if [[ "$highPtTree" =~ .*.zip$ ]]; then
165 if unzip -l ${highPtTree} | egrep "FilterEvents_Trees.root" &>/dev/null; then
166 highPtTree="${highPtTree}#FilterEvents_Trees.root"
167 else
168 highPtTree=""
169 fi
de540baf 170 fi
645f513c 171
172 if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
173 echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
174 runLevelQA "${qaFile}" &> runLevelQA.log
175 #perform some default actions:
176 #if trending.root not created, create a default one
177 if [[ ! -f trending.root ]]; then
3db1b975 178 aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" 2>&1 | tee -a runLevelQA.log
179 fi
180 if [[ -f trending.root ]]; then
181 arrOfTouchedProductions[${tmpProductionDir}]=1
182 else
183 echo "trending.root not created"
645f513c 184 fi
645f513c 185 fi
186 #expert QA based on high pt trees
187 if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
188 echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
189 runLevelHighPtTreeQA "${highPtTree}" &> runLevelHighPtTreeQA.log
190 arrOfTouchedProductions[${tmpProductionDir}]=1
191 fi
192
8e7a87c1 193 cd ${tmpDetectorRunDir}
f7707400 194
195 done < ${inputList}
196
197 #################################################################
198 #cache which productions were (re)done
93e1d132 199 echo "list of processed productions:"
2a6472ef 200 echo " ${!arrOfTouchedProductions[@]}"
93e1d132 201 echo
2a6472ef 202
f7707400 203 #################################################################
bf4e7ceb 204 #(re)do the merging/trending
2a6472ef 205 for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
bf4e7ceb 206 cd ${tmpProductionDir}
2c46f154 207 echo
208 echo "running period level stuff in ${tmpProductionDir}"
244ec455 209 echo $(date)
f7707400 210
f7707400 211 productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
bf4e7ceb 212 echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
2c46f154 213
f7707400 214 mkdir -p ${productionDir}
9cc5d265 215 if [[ ! -d ${productionDir} ]]; then
93e1d132 216 echo "cannot make productionDir $productionDir" && continue
9cc5d265 217 fi
f7707400 218
bf4e7ceb 219 #move runs to final destination
220 for dir in ${tmpProductionDir}/000*; do
221 echo
93e1d132 222 oldRunDir=${outputDir}/${dir#${tmpPrefix}}
0adcf494 223 if ! guessRunData "${dir}/dummyName"; then
224 echo "could not guess run data from ${dir}"
225 continue
226 fi
2c46f154 227
228 #before moving - VALIDATE!!!
bf4e7ceb 229 if ! validate ${dir}; then
230 continue
231 fi
2c46f154 232
93e1d132 233 if [[ -d ${oldRunDir} ]]; then
50864a2d 234 echo "removing old ${oldRunDir}"
93e1d132 235 rm -rf ${oldRunDir}
236 fi
2c46f154 237 echo "moving new ${runNumber} to ${productionDir}"
93e1d132 238 mv -f ${dir} ${productionDir}
239 done
033dacdc 240
3db1b975 241 #go to a temp dir to do the period level stuff in a completely clean dir
bf4e7ceb 242 tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
243 echo
244 echo tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
245 if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
246 cd ${tmpPeriodLevelQAdir}
247
248 #link the final list of per-run dirs here, just the dirs
249 #to have a clean working directory
250 unset linkedStuff
251 declare -a linkedStuff
252 for x in ${productionDir}/000*; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##*/}); done
bf4e7ceb 253
8e7a87c1 254 #merge trending files if any
255 if /bin/ls 000*/trending.root &>/dev/null; then
256 hadd trending.root 000*/trending.root &> periodLevelQA.log
645f513c 257 fi
258
bf4e7ceb 259 #run the period level trending/QA
645f513c 260 if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
261 echo running ${detector} periodLevelQA for production ${period}/${pass}
8e7a87c1 262 periodLevelQA trending.root &>> periodLevelQA.log
645f513c 263 else
264 echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
8e7a87c1 265 fi
266
2c46f154 267 if ! validate ${PWD}; then continue; fi
2c46f154 268
bf4e7ceb 269 #here we are validated so move the produced QA to the final place
270 #clean up linked stuff first
271 [[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
272 #some of the output could be a directory, so handle that
273 #TODO: maybe use rsync?
274 for x in ${tmpPeriodLevelQAdir}/*; do
275 if [[ -d ${x} ]]; then
276 echo "removing ${productionDir}/${x##*/}"
277 rm -rf ${productionDir}/${x##*/}
278 echo "moving ${x} to ${productionDir}"
279 mv ${x} ${productionDir}
280 fi
281 if [[ -f ${x} ]]; then
282 echo "moving ${x} to ${productionDir}"
283 mv -f ${x} ${productionDir}
284 fi
285 done
286
287 #remove the temp dir
288 rm -rf ${tmpPeriodLevelQAdir}
f7707400 289
290 done
291
9cc5d265 292 cd ${workingDirectory}
2c46f154 293
294 if [[ -z ${planB} ]]; then
9a5f8a19 295 echo
8e7a87c1 296 echo removing ${tmpDetectorRunDir}
297 rm -rf ${tmpDetectorRunDir}
2c46f154 298 else
299 executePlanB
300 fi
3db1b975 301 done #end of detector loop
f7707400 302
9cc5d265 303 #remove lock
304 rm -f ${lockFile}
f7707400 305}
306
2c46f154 307executePlanB()
308{
309 #in case of emergency
310 if [[ -n ${MAILTO} ]]; then
311 echo
312 echo "trouble detected, sending email to ${MAILTO}"
313
5c7749f0 314 grep BAD ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
2c46f154 315 fi
316}
317
318validate()
319{
320 summarizeLogs ${1} >> ${logSummary}
321 logStatus=$?
322 if [[ ${logStatus} -ne 0 ]]; then
323 echo "WARNING not validated: ${1}"
324 planB=1
325 return 1
326 fi
327 return 0
328}
329
330summarizeLogs()
331{
332 local dir=$1
2a6472ef 333 [[ ! -d ${dir} ]] && dir=${PWD}
2c46f154 334
335 #print a summary of logs
336 logFiles=(
337 "*.log"
338 "stdout"
339 "stderr"
340 )
341
342 #check logs
343 local logstatus=0
344 for log in ${dir}/${logFiles[*]}; do
345 finallog=${PWD%/}/${log}
346 [[ ! -f ${log} ]] && continue
347 errorSummary=$(validateLog ${log})
348 validationStatus=$?
349 [[ validationStatus -ne 0 ]] && logstatus=1
350 if [[ ${validationStatus} -eq 0 ]]; then
351 #in pretend mode randomly report an error in rec.log some cases
352 if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
353 [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
354 else
355 echo "${finallog} OK"
356 fi
357 elif [[ ${validationStatus} -eq 1 ]]; then
358 echo "${finallog} BAD ${errorSummary}"
359 elif [[ ${validationStatus} -eq 2 ]]; then
360 echo "${finallog} OK MWAH ${errorSummary}"
361 fi
362 done
363
364 #report core files
365 while read x; do
366 echo ${x}
367 chmod 644 ${x}
368 gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
369 done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
370
371 return ${logstatus}
372}
373
374validateLog()
375{
376 log=${1}
377 errorConditions=(
378 'There was a crash'
379 'floating'
380 'error while loading shared libraries'
381 'std::bad_alloc'
382 's_err_syswatch_'
383 'Thread [0-9]* (Thread'
384 'AliFatal'
385 'core dumped'
386 '\.C.*error:.*\.h: No such file'
387 'segmentation'
645f513c 388 'Interpreter error recovered'
2c46f154 389 )
390
391 warningConditions=(
392 'This is serious'
393 )
394
395 local logstatus=0
396 local errorSummary=""
397 local warningSummary=""
398
399 for ((i=0; i<${#errorConditions[@]};i++)); do
400 local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
401 [[ -n ${tmp} ]] && tmp+=" : "
402 errorSummary+=${tmp}
403 done
404
405 for ((i=0; i<${#warningConditions[@]};i++)); do
406 local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
407 [[ -n ${tmp} ]] && tmp+=" : "
408 warningSummary+=${tmp}
409 done
410
411 if [[ -n ${errorSummary} ]]; then
412 echo "${errorSummary}"
413 return 1
414 fi
415
416 if [[ -n ${warningSummary} ]]; then
417 echo "${warningSummary}"
418 return 2
419 fi
420
421 return 0
422}
423
f7707400 424parseConfig()
425{
d56eeaab 426 args=("$@")
427
0adcf494 428 #config file
429 configFile=""
430 #where to search for qa files
431 inputList=file.list
432 #working directory
433 workingDirectory="${PWD}"
434 #where to place the final qa plots
435 #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
436 outputDirectory="${workingDirectory}/%DET"
437 #filter out detector option
438 excludeDetectors="EXAMPLE"
439 #logs
440 logDirectory=${workingDirectory}/logs
0adcf494 441 #OCDB storage
b001e589 442 ocdbStorage="raw://"
0adcf494 443 #email to
444 #MAILTO="fbellini@cern.ch"
445
446 #first, check if the config file is configured
447 #is yes - source it so that other options can override it
448 #if any
d56eeaab 449 for opt in "${args[@]}"; do
0adcf494 450 if [[ ${opt} =~ configFile=.* ]]; then
451 eval "${opt}"
7f49f812 452 [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
d56eeaab 453 echo "using config file: ${configFile}"
0adcf494 454 source "${configFile}"
455 break
456 fi
457 done
f7707400 458
459 #then, parse the options as they override the options from file
d56eeaab 460 for opt in "${args[@]}"; do
461 if [[ ! "${opt}" =~ .*=.* ]]; then
0adcf494 462 echo "badly formatted option ${var}, should be: option=value, stopping..."
463 return 1
464 fi
d56eeaab 465 local var="${opt%%=*}"
466 local value="${opt#*=}"
467 echo "${var} = ${value}"
468 export ${var}="${value}"
f7707400 469 done
470}
471
472guessRunData()
473{
474 #guess the period from the path, pick the rightmost one
475 period=""
476 runNumber=""
477 year=""
478 pass=""
479 legoTrainRunNumber=""
480 dataType=""
481
482 local shortRunNumber=""
483 local IFS="/"
484 declare -a path=( $1 )
485 local dirDepth=$(( ${#path[*]}-1 ))
486 i=0
487 for ((x=${dirDepth};x>=0;x--)); do
488
489 [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
490 local field=${path[${x}]}
491 local fieldNext=${path[$((x+1))]}
492
493 [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
494 [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
495 [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
496 [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
497 [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
498 [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
499 [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
500 (( i++ ))
501 done
502 [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
503 [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
0c9bef99 504
2a6472ef 505 #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
506 if [[ -z ${runNumber}} ]];
0c9bef99 507 then
508 #error condition
509 return 1
510 else
511 #ALL OK
512 return 0
513 fi
f7707400 514}
515
516substituteDetectorName()
517{
518 local det=$1
519 local dir=$2
520 [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
521 [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
522}
523
7f49f812 524get_realpath()
525{
526 if [[ -f "$1" ]]
527 then
528 # file *must* exist
529 if cd "$(echo "${1%/*}")" &>/dev/null
530 then
531 # file *may* not be local
532 # exception is ./file.ext
533 # try 'cd .; cd -;' *works!*
534 local tmppwd="$PWD"
535 cd - &>/dev/null
536 else
537 # file *must* be local
538 local tmppwd="$PWD"
539 fi
540 else
541 # file *cannot* exist
542 return 1 # failure
543 fi
544 # reassemble realpath
545 echo "$tmppwd"/"${1##*/}"
546 return 0 # success
547}
548
f7707400 549main $@