]> git.uio.no Git - u/mrichter/AliRoot.git/blob - PWGPP/QA/scripts/runQA.sh
dont process if detector container not present in the qa file to avoid overwriting...
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
1 #!/bin/bash
2 main()
3 {
4   if [[ -z $1 ]]; then
5     echo "Usage: "
6     echo "  ${0##*/} option=value [option=value]"
7     echo "  at least inputList should be specified, or configFile containing it:"
8     echo "  ${0##*/} inputList=file.list"
9     echo "  options override config file (if any), e.g.:"
10     echo "  ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
11     return 1
12   fi
13  
14   if ! parseConfig $@; then
15     ${0}
16     return 1
17   fi
18
19   [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
20
21   ocdbregex='raw://'
22   if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
23     alien-token-init
24   fi
25
26   updateQA $@
27 }
28
29 updateQA()
30 {
31   umask 0002
32   parseConfig $@
33
34   #be paranoid and make some full paths
35   [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
36   inputList=$(get_realpath ${inputList})
37   mkdir -p ${workingDirectory}
38   workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
39   if [[ ! -d ${workingDirectory} ]]; then
40     echo "working dir $workingDirectory does not exist and cannot be created"
41     return 1
42   fi
43   cd ${workingDirectory}
44
45   echo JOB config:
46   echo inputList=$inputList
47   echo outputDirectory=$outputDirectory
48   echo
49
50   dateString=$(date +%Y-%m-%d-%H-%M)
51   echo "Start time QA process: $dateString"
52
53   #logging
54   mkdir -p $logDirectory
55   [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
56   logFile="$logDirectory/${0##*/}.${dateString}.log"
57   touch ${logFile}
58   [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
59   echo "logFile = $logFile"
60
61   #check lock
62   lockFile=${logDirectory}/runQA.lock
63   [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" | tee ${logFile} && return 1
64   touch ${lockFile}
65   [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" | tee ${logFile} && return 1
66   
67   exec &>${logFile}
68
69   ################################################################
70   #ze detector loop
71   for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
72     echo
73     echo "##############################################"
74     unset planB
75     [[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
76     detector=${detectorScript%.sh}
77     detector=${detector##*/}
78     
79     #skip if excluded
80     if [[ "${excludeDetectors}" =~ ${detector} ]]; then
81       echo "${detector} is excluded in config, skipping..."
82       continue
83     fi
84
85     #if includeDetectors set, only process thoe detectors specified there
86     if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
87       echo "${detector} not included in includeDetectors, skipping..."
88       continue
89     fi
90
91     logSummary=${logDirectory}/summary-${detector}-${dateString}.log
92     outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
93     tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
94     if ! mkdir -p ${tmpDetectorRunDir}; then
95       echo "cannot create the temp dir $tmpDetectorRunDir"
96       continue
97     fi
98     cd ${tmpDetectorRunDir}
99
100     tmpPrefix=${tmpDetectorRunDir}/${outputDir}
101     echo "running QA for ${detector}"
102     echo "  outputDir=$outputDir"
103     echo "  tmpPrefix=$tmpPrefix"
104     
105     unset -f runLevelQA
106     unset -f periodLevelQA
107     unset -f runLevelHighPtTreeQA
108     unset -f periodLevelHighPtTreeQA
109     source ${detectorScript}
110
111     #################################################################
112     #produce the QA and trending tree for each file (run)
113     unset arrOfTouchedProductions
114     declare -A arrOfTouchedProductions
115     while read qaFile; do
116       echo
117       
118       #first check if input file exists
119       [[ ! -f ${qaFile%\#*} ]] && echo "file ${qaFile%\#*} not accessible" && continue
120
121       if ! guessRunData ${qaFile}; then
122         echo "could not guess run data from ${qaFile}"
123         continue
124       fi
125
126       tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
127       tmpRunDir=${tmpProductionDir}/000${runNumber}
128       mkdir -p ${tmpRunDir}
129       cd ${tmpRunDir}
130
131       #by default we expect to have everything in the same archive
132       highPtTree=${qaFile}
133
134       #maybe the input is not an archive, but a file
135       [[ "${qaFile}" =~ QAresults.root$ ]] && highPtTree=""
136       [[ "${qaFile}" =~ FilterEvents_Trees.root$ ]] && qaFile=""
137
138       #it is possible we get the highPt trees from somewhere else
139       #search the list of high pt trees for the proper run number
140       if [[ -n ${inputListHighPtTrees} ]]; then
141         highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
142         echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
143       fi
144       
145       echo qaFile=$qaFile
146       echo highPtTree=$highPtTree
147
148       #what if we have a zip archive?
149       if [[ "$qaFile" =~ .*.zip$ ]]; then
150         if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
151           qaFile="${qaFile}#QAresults.root"
152         else
153           qaFile=""
154         fi
155       fi
156       if [[ "$highPtTree" =~ .*.zip$ ]]; then
157         if unzip -l ${highPtTree} | egrep "FilterEvents_Trees.root" &>/dev/null; then
158           highPtTree="${highPtTree}#FilterEvents_Trees.root"
159         else
160           highPtTree=""
161         fi
162       fi
163      
164       if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
165         echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
166         runLevelQA "${qaFile}" &> runLevelQA.log
167         #perform some default actions:
168         #if trending.root not created, create a default one
169         if [[ ! -f trending.root ]]; then
170           aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" 2>&1 | tee -a runLevelQA.log
171         fi
172         if [[ -f trending.root ]]; then
173           arrOfTouchedProductions[${tmpProductionDir}]=1
174         else
175           echo "trending.root not created"
176         fi
177       fi
178       #expert QA based on high pt trees
179       if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
180         echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
181         runLevelHighPtTreeQA "${highPtTree}" &> runLevelHighPtTreeQA.log
182         arrOfTouchedProductions[${tmpProductionDir}]=1
183       fi
184
185       cd ${tmpDetectorRunDir}
186     
187     done < ${inputList}
188
189     #################################################################
190     #cache which productions were (re)done
191     echo "list of processed productions:"
192     echo "    ${!arrOfTouchedProductions[@]}"
193     echo
194
195     #################################################################
196     #(re)do the merging/trending 
197     for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
198       cd ${tmpProductionDir}
199       echo
200       echo "running period level stuff in ${tmpProductionDir}"
201     
202       productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
203       echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
204
205       mkdir -p ${productionDir}
206       if [[ ! -d ${productionDir} ]]; then 
207         echo "cannot make productionDir $productionDir" && continue
208       fi
209       
210       #move runs to final destination
211       for dir in ${tmpProductionDir}/000*; do
212         echo 
213         oldRunDir=${outputDir}/${dir#${tmpPrefix}}
214         if ! guessRunData "${dir}/dummyName"; then
215           echo "could not guess run data from ${dir}"
216           continue
217         fi
218
219         #before moving - VALIDATE!!!
220         if ! validate ${dir}; then 
221           continue
222         fi
223
224         if [[ -d ${oldRunDir} ]]; then
225           echo "removing old ${oldRunDir}"
226           rm -rf ${oldRunDir}
227         fi
228         echo "moving new ${runNumber} to ${productionDir}"
229         mv -f ${dir} ${productionDir}
230       done
231    
232       #go to a temp dir to do the period level stuff in a completely clean dir
233       tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
234       echo
235       echo tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
236       if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
237       cd ${tmpPeriodLevelQAdir}
238
239       #link the final list of per-run dirs here, just the dirs
240       #to have a clean working directory
241       unset linkedStuff
242       declare -a linkedStuff
243       for x in ${productionDir}/000*; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##*/}); done
244
245       #merge trending files if any
246       if /bin/ls 000*/trending.root &>/dev/null; then
247         hadd trending.root 000*/trending.root &> periodLevelQA.log
248       fi
249       
250       #run the period level trending/QA
251       if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
252         echo running ${detector} periodLevelQA for production ${period}/${pass}
253         periodLevelQA trending.root &>> periodLevelQA.log
254       else 
255         echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
256       fi
257
258       if ! validate ${PWD}; then continue; fi
259
260       #here we are validated so move the produced QA to the final place
261       #clean up linked stuff first
262       [[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
263       #some of the output could be a directory, so handle that
264       #TODO: maybe use rsync?
265       for x in ${tmpPeriodLevelQAdir}/*; do  
266         if [[ -d ${x} ]]; then
267           echo "removing ${productionDir}/${x##*/}"
268           rm -rf ${productionDir}/${x##*/}
269           echo "moving ${x} to ${productionDir}"
270           mv ${x} ${productionDir}
271         fi
272         if [[ -f ${x} ]]; then
273           echo "moving ${x} to ${productionDir}"
274           mv -f ${x} ${productionDir} 
275         fi
276       done
277
278       #remove the temp dir
279       rm -rf ${tmpPeriodLevelQAdir}
280     
281     done
282
283     cd ${workingDirectory}
284
285     if [[ -z ${planB} ]]; then
286       echo
287       echo removing ${tmpDetectorRunDir}
288       rm -rf ${tmpDetectorRunDir}
289     else
290       executePlanB
291     fi
292   done #end of detector loop
293
294   #remove lock
295   rm -f ${lockFile}
296 }
297
298 executePlanB()
299 {
300   #in case of emergency
301   if [[ -n ${MAILTO} ]]; then 
302     echo
303     echo "trouble detected, sending email to ${MAILTO}"
304
305     cat ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
306   fi
307 }
308
309 validate()
310 {
311   summarizeLogs ${1} >> ${logSummary}
312   logStatus=$?
313   if [[ ${logStatus} -ne 0 ]]; then 
314     echo "WARNING not validated: ${1}"
315     planB=1
316     return 1
317   fi
318   return 0
319 }
320
321 summarizeLogs()
322 {
323   local dir=$1
324   [[ ! -d ${dir} ]] && dir=${PWD}
325
326   #print a summary of logs
327   logFiles=(
328       "*.log"
329       "stdout"
330       "stderr"
331   )
332
333   #check logs
334   local logstatus=0
335   for log in ${dir}/${logFiles[*]}; do
336     finallog=${PWD%/}/${log}
337     [[ ! -f ${log} ]] && continue
338     errorSummary=$(validateLog ${log})
339     validationStatus=$?
340     [[ validationStatus -ne 0 ]] && logstatus=1
341     if [[ ${validationStatus} -eq 0 ]]; then 
342       #in pretend mode randomly report an error in rec.log some cases
343       if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
344         [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
345       else
346         echo "${finallog} OK"
347       fi
348     elif [[ ${validationStatus} -eq 1 ]]; then
349       echo "${finallog} BAD ${errorSummary}"
350     elif [[ ${validationStatus} -eq 2 ]]; then
351       echo "${finallog} OK MWAH ${errorSummary}"
352     fi
353   done
354
355   #report core files
356   while read x; do
357     echo ${x}
358     chmod 644 ${x}
359     gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
360   done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
361
362   return ${logstatus}
363 }
364
365 validateLog()
366 {
367   log=${1}
368   errorConditions=(
369             'There was a crash'
370             'floating'
371             'error while loading shared libraries'
372             'std::bad_alloc'
373             's_err_syswatch_'
374             'Thread [0-9]* (Thread'
375             'AliFatal'
376             'core dumped'
377             '\.C.*error:.*\.h: No such file'
378             'segmentation'
379             'Interpreter error recovered'
380   )
381
382   warningConditions=(
383             'This is serious'
384   )
385
386   local logstatus=0
387   local errorSummary=""
388   local warningSummary=""
389
390   for ((i=0; i<${#errorConditions[@]};i++)); do
391     local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
392     [[ -n ${tmp} ]] && tmp+=" : "
393     errorSummary+=${tmp}
394   done
395
396   for ((i=0; i<${#warningConditions[@]};i++)); do
397     local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
398     [[ -n ${tmp} ]] && tmp+=" : "
399     warningSummary+=${tmp}
400   done
401
402   if [[ -n ${errorSummary} ]]; then 
403     echo "${errorSummary}"
404     return 1
405   fi
406
407   if [[ -n ${warningSummary} ]]; then
408     echo "${warningSummary}"
409     return 2
410   fi
411
412   return 0
413 }
414
415 parseConfig()
416 {
417   #config file
418   configFile=""
419   #where to search for qa files
420   inputList=file.list
421   #working directory
422   workingDirectory="${PWD}"
423   #where to place the final qa plots
424   #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
425   outputDirectory="${workingDirectory}/%DET"
426   #filter out detector option
427   excludeDetectors="EXAMPLE"
428   #logs
429   logDirectory=${workingDirectory}/logs
430   #OCDB storage
431   #ocdbStorage="raw://"
432   #email to
433   #MAILTO="fbellini@cern.ch"
434
435   #first, check if the config file is configured
436   #is yes - source it so that other options can override it
437   #if any
438   for opt in $@; do
439     if [[ ${opt} =~ configFile=.* ]]; then
440       eval "${opt}"
441       [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
442       source "${configFile}"
443       break
444     fi
445   done
446
447   #then, parse the options as they override the options from file
448   while [[ -n ${1} ]]; do
449     local var=${1#--}
450     if [[ ${var} =~ .*=.* ]]; then
451       eval "${var}"
452     else
453       echo "badly formatted option ${var}, should be: option=value, stopping..."
454       return 1
455     fi
456     shift
457   done
458 }
459
460 guessRunData()
461 {
462   #guess the period from the path, pick the rightmost one
463   period=""
464   runNumber=""
465   year=""
466   pass=""
467   legoTrainRunNumber=""
468   dataType=""
469
470   local shortRunNumber=""
471   local IFS="/"
472   declare -a path=( $1 )
473   local dirDepth=$(( ${#path[*]}-1 ))
474   i=0
475   for ((x=${dirDepth};x>=0;x--)); do
476
477     [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
478     local field=${path[${x}]}
479     local fieldNext=${path[$((x+1))]}
480
481     [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
482     [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
483     [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
484     [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
485     [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
486     [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
487     [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
488     (( i++ ))
489   done
490   [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
491   [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
492   
493   #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
494   if [[ -z ${runNumber}} ]];
495   then
496     #error condition
497     return 1
498   else
499     #ALL OK
500     return 0
501   fi
502 }
503
504 substituteDetectorName()
505 {
506   local det=$1
507   local dir=$2
508   [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
509   [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
510 }
511
512 get_realpath() 
513 {
514   if [[ -f "$1" ]]
515   then
516     # file *must* exist
517     if cd "$(echo "${1%/*}")" &>/dev/null
518     then
519       # file *may* not be local
520       # exception is ./file.ext
521       # try 'cd .; cd -;' *works!*
522       local tmppwd="$PWD"
523       cd - &>/dev/null
524     else
525       # file *must* be local
526       local tmppwd="$PWD"
527     fi
528   else
529     # file *cannot* exist
530     return 1 # failure
531   fi
532   # reassemble realpath
533   echo "$tmppwd"/"${1##*/}"
534   return 0 # success
535 }
536
537 main $@