]> git.uio.no Git - u/mrichter/AliRoot.git/blame_incremental - PWGPP/QA/scripts/runQA.sh
if processing lego train output - add _lego${legoNumber} to pass
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
... / ...
CommitLineData
1#!/bin/bash
2main()
3{
4 if [[ -z $1 ]]; then
5 echo "Usage: "
6 echo " ${0##*/} option=value [option=value]"
7 echo " at least inputList should be specified, or configFile containing it:"
8 echo " ${0##*/} inputList=file.list"
9 echo " options override config file (if any), e.g.:"
10 echo " ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
11 return 1
12 fi
13
14 if ! parseConfig "$@"; then
15 ${0}
16 return 1
17 fi
18
19 [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
20
21 ocdbregex='raw://'
22 if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
23 alien-token-init ${alienUserName}
24 #this is a hack! alien-token init seems not enough
25 #but the gclient_env script messes up the LD_LIBRARY_PATH
26 while read x; do
27 eval ${x};
28 done < <(grep -v "LD_LIBRARY_PATH" /tmp/gclient_env_${UID})
29 fi
30
31 updateQA "$@"
32}
33
34updateQA()
35{
36 umask 0002
37 parseConfig "$@"
38
39 #be paranoid and make some full paths
40 [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
41 inputList=$(get_realpath ${inputList})
42 mkdir -p ${workingDirectory}
43 workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
44 if [[ ! -d ${workingDirectory} ]]; then
45 echo "working dir $workingDirectory does not exist and cannot be created"
46 return 1
47 fi
48 cd ${workingDirectory}
49
50 echo JOB config:
51 echo inputList=$inputList
52 echo outputDirectory=$outputDirectory
53 echo
54
55 dateString=$(date +%Y-%m-%d-%H-%M)
56 echo "Start time QA process: $dateString"
57
58 #logging
59 mkdir -p $logDirectory
60 [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
61 logFile="$logDirectory/${0##*/}.${dateString}.log"
62 touch ${logFile}
63 [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
64 echo "logFile = $logFile"
65
66 #check lock
67 lockFile=${logDirectory}/runQA.lock
68 [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" | tee ${logFile} && return 1
69 touch ${lockFile}
70 [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" | tee ${logFile} && return 1
71
72 exec &>${logFile}
73
74 ################################################################
75 #ze detector loop
76 for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
77 echo
78 echo "##############################################"
79 echo $(date)
80 unset planB
81 [[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
82 detector=${detectorScript%.sh}
83 detector=${detector##*/}
84
85 #skip if excluded
86 if [[ "${excludeDetectors}" =~ ${detector} ]]; then
87 echo "${detector} is excluded in config, skipping..."
88 continue
89 fi
90
91 #if includeDetectors set, only process thoe detectors specified there
92 if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
93 echo "${detector} not included in includeDetectors, skipping..."
94 continue
95 fi
96
97 logSummary=${logDirectory}/summary-${detector}-${dateString}.log
98 outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
99 tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
100 if ! mkdir -p ${tmpDetectorRunDir}; then
101 echo "cannot create the temp dir $tmpDetectorRunDir"
102 continue
103 fi
104 cd ${tmpDetectorRunDir}
105
106 tmpPrefix=${tmpDetectorRunDir}/${outputDir}
107 echo "running QA for ${detector}"
108 echo " outputDir=$outputDir"
109 echo " tmpPrefix=$tmpPrefix"
110
111 #unset the detector functions from previous iterations (detectors)
112 unset -f runLevelQA
113 unset -f periodLevelQA
114 unset -f runLevelHighPtTreeQA
115 unset -f periodLevelHighPtTreeQA
116 source ${detectorScript}
117
118 #################################################################
119 #produce the QA and trending tree for each file (run)
120 unset arrOfTouchedProductions
121 declare -A arrOfTouchedProductions
122 while read qaFile; do
123 echo
124 echo $(date)
125
126 #first check if input file exists
127 [[ ! -f ${qaFile%\#*} ]] && echo "file ${qaFile%\#*} not accessible" && continue
128
129 if ! guessRunData ${qaFile}; then
130 echo "could not guess run data from ${qaFile}"
131 continue
132 fi
133
134 tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
135 tmpRunDir=${tmpProductionDir}/000${runNumber}
136 mkdir -p ${tmpRunDir}
137 cd ${tmpRunDir}
138
139 #by default we expect to have everything in the same archive
140 highPtTree=${qaFile}
141
142 #maybe the input is not an archive, but a file
143 [[ "${qaFile}" =~ QAresults.root$ ]] && highPtTree=""
144 [[ "${qaFile}" =~ FilterEvents_Trees.root$ ]] && qaFile=""
145
146 #it is possible we get the highPt trees from somewhere else
147 #search the list of high pt trees for the proper run number
148 if [[ -n ${inputListHighPtTrees} ]]; then
149 highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
150 echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
151 fi
152
153 echo qaFile=$qaFile
154 echo highPtTree=$highPtTree
155 echo ocdbStorage=${ocdbStorage}
156 echo
157
158 #what if we have a zip archive?
159 if [[ "$qaFile" =~ .*.zip$ ]]; then
160 if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
161 qaFile="${qaFile}#QAresults.root"
162 else
163 qaFile=""
164 fi
165 fi
166 if [[ "$highPtTree" =~ .*.zip$ ]]; then
167 if unzip -l ${highPtTree} | egrep "FilterEvents_Trees.root" &>/dev/null; then
168 highPtTree="${highPtTree}#FilterEvents_Trees.root"
169 else
170 highPtTree=""
171 fi
172 fi
173
174 if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
175 echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
176 runLevelQA "${qaFile}" &> runLevelQA.log
177 #perform some default actions:
178 #if trending.root not created, create a default one
179 if [[ ! -f trending.root ]]; then
180 aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" 2>&1 | tee -a runLevelQA.log
181 fi
182 if [[ -f trending.root ]]; then
183 arrOfTouchedProductions[${tmpProductionDir}]=1
184 else
185 echo "trending.root not created"
186 fi
187 fi
188 #expert QA based on high pt trees
189 if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
190 echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
191 runLevelHighPtTreeQA "${highPtTree}" &> runLevelHighPtTreeQA.log
192 arrOfTouchedProductions[${tmpProductionDir}]=1
193 fi
194
195 cd ${tmpDetectorRunDir}
196
197 done < ${inputList}
198
199 #################################################################
200 #cache which productions were (re)done
201 echo "list of processed productions:"
202 echo " ${!arrOfTouchedProductions[@]}"
203 echo
204
205 #################################################################
206 #(re)do the merging/trending
207 for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
208 cd ${tmpProductionDir}
209 echo
210 echo "running period level stuff in ${tmpProductionDir}"
211 echo $(date)
212
213 productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
214 echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
215
216 mkdir -p ${productionDir}
217 if [[ ! -d ${productionDir} ]]; then
218 echo "cannot make productionDir $productionDir" && continue
219 fi
220
221 #move runs to final destination
222 for dir in ${tmpProductionDir}/000*; do
223 echo
224 oldRunDir=${outputDir}/${dir#${tmpPrefix}}
225 if ! guessRunData "${dir}/dummyName"; then
226 echo "could not guess run data from ${dir}"
227 continue
228 fi
229
230 #before moving - VALIDATE!!!
231 if ! validate ${dir}; then
232 continue
233 fi
234
235 #moving a dir is an atomic operation, no locking necessary
236 if [[ -d ${oldRunDir} ]]; then
237 echo "removing old ${oldRunDir}"
238 rm -rf ${oldRunDir}
239 fi
240 echo "moving new ${runNumber} to ${productionDir}"
241 mv -f ${dir} ${productionDir}
242 done
243
244 #go to a temp dir to do the period level stuff in a completely clean dir
245 tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
246 echo
247 echo tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
248 if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
249 cd ${tmpPeriodLevelQAdir}
250
251 #link the final list of per-run dirs here, just the dirs
252 #to have a clean working directory
253 unset linkedStuff
254 declare -a linkedStuff
255 for x in ${productionDir}/000*; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##*/}); done
256
257 #merge trending files if any
258 if /bin/ls 000*/trending.root &>/dev/null; then
259 hadd trending.root 000*/trending.root &> periodLevelQA.log
260 fi
261
262 #run the period level trending/QA
263 if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
264 echo running ${detector} periodLevelQA for production ${period}/${pass}
265 periodLevelQA trending.root &>> periodLevelQA.log
266 else
267 echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
268 fi
269
270 if ! validate ${PWD}; then continue; fi
271
272 #here we are validated so move the produced QA to the final place
273 #clean up linked stuff first
274 [[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
275 periodLevelLock=${productionDir}/runQA.lock
276 if [[ ! -f ${periodLevelLock} ]]; then
277 #some of the output could be a directory, so handle that
278 #TODO: maybe use rsync?
279 #lock to avoid conflicts:
280 echo "${HOSTNAME} ${dateString}" > ${periodLevelLock}
281 for x in ${tmpPeriodLevelQAdir}/*; do
282 if [[ -d ${x} ]]; then
283 echo "removing ${productionDir}/${x##*/}"
284 rm -rf ${productionDir}/${x##*/}
285 echo "moving ${x} to ${productionDir}"
286 mv ${x} ${productionDir}
287 fi
288 if [[ -f ${x} ]]; then
289 echo "moving ${x} to ${productionDir}"
290 mv -f ${x} ${productionDir}
291 fi
292 done
293 rm -f ${periodLevelLock}
294 #remove the temp dir
295 rm -rf ${tmpPeriodLevelQAdir}
296 else
297 echo "ERROR: cannot move to destination" >> ${logSummary}
298 echo "production dir ${productionDir} locked!" >> ${logSummary}
299 echo "check and maybe manually do:" >> ${logSummary}
300 echo " rm ${periodLevelLock}" >> ${logSummary}
301 echo " rsync -av ${tmpPeriodLevelQAdir}/ ${productionDir}/" >> ${logSummary}
302 planB=1
303 fi
304
305 done
306
307 cd ${workingDirectory}
308
309 if [[ -z ${planB} ]]; then
310 echo
311 echo removing ${tmpDetectorRunDir}
312 rm -rf ${tmpDetectorRunDir}
313 else
314 executePlanB
315 fi
316 done #end of detector loop
317
318 #remove lock
319 rm -f ${lockFile}
320}
321
322executePlanB()
323{
324 #in case of emergency
325 if [[ -n ${MAILTO} ]]; then
326 echo
327 echo "trouble detected, sending email to ${MAILTO}"
328
329 grep BAD ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
330 fi
331}
332
333validate()
334{
335 summarizeLogs ${1} >> ${logSummary}
336 logStatus=$?
337 if [[ ${logStatus} -ne 0 ]]; then
338 echo "WARNING not validated: ${1}"
339 planB=1
340 return 1
341 fi
342 return 0
343}
344
345summarizeLogs()
346{
347 local dir=$1
348 [[ ! -d ${dir} ]] && dir=${PWD}
349
350 #print a summary of logs
351 logFiles=(
352 "*.log"
353 "stdout"
354 "stderr"
355 )
356
357 #check logs
358 local logstatus=0
359 for log in ${dir}/${logFiles[*]}; do
360 finallog=${PWD%/}/${log}
361 [[ ! -f ${log} ]] && continue
362 errorSummary=$(validateLog ${log})
363 validationStatus=$?
364 [[ validationStatus -ne 0 ]] && logstatus=1
365 if [[ ${validationStatus} -eq 0 ]]; then
366 #in pretend mode randomly report an error in rec.log some cases
367 if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
368 [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
369 else
370 echo "${finallog} OK"
371 fi
372 elif [[ ${validationStatus} -eq 1 ]]; then
373 echo "${finallog} BAD ${errorSummary}"
374 elif [[ ${validationStatus} -eq 2 ]]; then
375 echo "${finallog} OK MWAH ${errorSummary}"
376 fi
377 done
378
379 #report core files
380 while read x; do
381 echo ${x}
382 chmod 644 ${x}
383 gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
384 done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
385
386 return ${logstatus}
387}
388
389validateLog()
390{
391 log=${1}
392 errorConditions=(
393 'There was a crash'
394 'floating'
395 'error while loading shared libraries'
396 'std::bad_alloc'
397 's_err_syswatch_'
398 'Thread [0-9]* (Thread'
399 'AliFatal'
400 'core dumped'
401 '\.C.*error:.*\.h: No such file'
402 'segmentation'
403 'Interpreter error recovered'
404 )
405
406 warningConditions=(
407 'This is serious'
408 )
409
410 local logstatus=0
411 local errorSummary=""
412 local warningSummary=""
413
414 for ((i=0; i<${#errorConditions[@]};i++)); do
415 local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
416 [[ -n ${tmp} ]] && tmp+=" : "
417 errorSummary+=${tmp}
418 done
419
420 for ((i=0; i<${#warningConditions[@]};i++)); do
421 local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
422 [[ -n ${tmp} ]] && tmp+=" : "
423 warningSummary+=${tmp}
424 done
425
426 if [[ -n ${errorSummary} ]]; then
427 echo "${errorSummary}"
428 return 1
429 fi
430
431 if [[ -n ${warningSummary} ]]; then
432 echo "${warningSummary}"
433 return 2
434 fi
435
436 return 0
437}
438
439parseConfig()
440{
441 args=("$@")
442
443 #config file
444 configFile=""
445 #where to search for qa files
446 inputList=file.list
447 #working directory
448 workingDirectory="${PWD}"
449 #where to place the final qa plots
450 #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
451 outputDirectory="${workingDirectory}/%DET"
452 #filter out detector option
453 excludeDetectors="EXAMPLE"
454 #logs
455 logDirectory=${workingDirectory}/logs
456 #OCDB storage
457 ocdbStorage="raw://"
458 #email to
459 #MAILTO="fbellini@cern.ch"
460
461 #first, check if the config file is configured
462 #is yes - source it so that other options can override it
463 #if any
464 for opt in "${args[@]}"; do
465 if [[ ${opt} =~ configFile=.* ]]; then
466 eval "${opt}"
467 [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
468 echo "using config file: ${configFile}"
469 source "${configFile}"
470 break
471 fi
472 done
473
474 #then, parse the options as they override the options from file
475 for opt in "${args[@]}"; do
476 if [[ ! "${opt}" =~ .*=.* ]]; then
477 echo "badly formatted option ${var}, should be: option=value, stopping..."
478 return 1
479 fi
480 local var="${opt%%=*}"
481 local value="${opt#*=}"
482 echo "${var}=${value}"
483 export ${var}="${value}"
484 done
485}
486
487guessRunData()
488{
489 #guess the period from the path, pick the rightmost one
490 period=""
491 runNumber=""
492 year=""
493 pass=""
494 legoTrainRunNumber=""
495 dataType=""
496
497 local shortRunNumber=""
498 oldIFS=${IFS}
499 local IFS="/"
500 declare -a path=( $1 )
501 IFS="${oldIFS}"
502 local dirDepth=$(( ${#path[*]}-1 ))
503 i=0
504 for ((x=${dirDepth};x>=0;x--)); do
505
506 [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
507 local field=${path[${x}]}
508 local fieldNext=${path[$((x+1))]}
509
510 [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
511 [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
512 [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
513 [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
514 [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
515 [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
516 [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
517 (( i++ ))
518 done
519 [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
520 [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
521 [[ -n ${legoTrainRunNumber} ]] && pass+="_lego${legoTrainRunNumber}"
522
523 #modify the OCDB: set the year
524 ocdbStorage=$(setYear ${year} ${ocdbStorage})
525
526 #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
527 if [[ -z ${runNumber}} ]]
528 then
529 #error condition
530 return 1
531 else
532 #ALL OK
533 return 0
534 fi
535}
536
537substituteDetectorName()
538{
539 local det=$1
540 local dir=$2
541 [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
542 [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
543}
544
545get_realpath()
546{
547 if [[ -f "$1" ]]
548 then
549 # file *must* exist
550 if cd "$(echo "${1%/*}")" &>/dev/null
551 then
552 # file *may* not be local
553 # exception is ./file.ext
554 # try 'cd .; cd -;' *works!*
555 local tmppwd="$PWD"
556 cd - &>/dev/null
557 else
558 # file *must* be local
559 local tmppwd="$PWD"
560 fi
561 else
562 # file *cannot* exist
563 return 1 # failure
564 fi
565 # reassemble realpath
566 echo "$tmppwd"/"${1##*/}"
567 return 0 # success
568}
569
570setYear()
571{
572 #set the year
573 # ${1} - year to be set
574 # ${2} - where to set the year
575 local year1=$(guessYear ${1})
576 local year2=$(guessYear ${2})
577 local path=${2}
578 [[ ${year1} -ne ${year2} && -n ${year2} && -n ${year1} ]] && path=${2/\/${year2}\//\/${year1}\/}
579 echo ${path}
580 return 0
581}
582
583guessYear()
584{
585 #guess the year from the path, pick the rightmost one
586 local IFS="/"
587 declare -a pathArray=( ${1} )
588 local field
589 local year
590 for field in ${pathArray[@]}; do
591 [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
592 done
593 echo ${year}
594 return 0
595}
596
597main "$@"