]> git.uio.no Git - u/mrichter/AliRoot.git/blob - PWGPP/QA/scripts/runQA.sh
Merge branch 'TPCdev' of https://git.cern.ch/reps/AliRoot into TPCdev
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
1 #!/bin/bash
2 main()
3 {
4   if [[ -z $1 ]]; then
5     echo "Usage: "
6     echo "  ${0##*/} option=value [option=value]"
7     echo "  at least inputList should be specified, or configFile containing it:"
8     echo "  ${0##*/} inputList=file.list"
9     echo "  options override config file (if any), e.g.:"
10     echo "  ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
11     return 1
12   fi
13  
14   if ! parseConfig $@; then
15     ${0}
16     return 1
17   fi
18
19   [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
20
21   ocdbregex='raw://'
22   if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
23     alien-token-init
24   fi
25
26   updateQA $@
27 }
28
29 updateQA()
30 {
31   umask 0002
32   parseConfig $@
33
34   #be paranoid and make some full paths
35   [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
36   inputList=$(get_realpath ${inputList})
37   mkdir -p ${workingDirectory}
38   workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
39   if [[ ! -d ${workingDirectory} ]]; then
40     echo "working dir $workingDirectory does not exist and cannot be created"
41     return 1
42   fi
43   cd ${workingDirectory}
44
45   echo JOB config:
46   echo inputList=$inputList
47   echo outputDirectory=$outputDirectory
48   echo
49
50   dateString=$(date +%Y-%m-%d-%H-%M)
51   echo "Start time QA process: $dateString"
52
53   #logging
54   mkdir -p $logDirectory
55   [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
56   logFile="$logDirectory/${0##*/}.${dateString}.log"
57   touch ${logFile}
58   [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
59   echo "logFile = $logFile"
60
61   #check lock
62   lockFile=${logDirectory}/runQA.lock
63   [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" | tee ${logFile} && return 1
64   touch ${lockFile}
65   [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" | tee ${logFile} && return 1
66   
67   exec &>${logFile}
68
69   ################################################################
70   #ze detector loop
71   for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
72     unset planB
73     [[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
74     detector=${detectorScript%.sh}
75     detector=${detector##*/}
76     
77     #skip if excluded
78     if [[ "${excludeDetectors}" =~ ${detector} ]]; then
79       echo "${detector} is excluded in config, skipping..."
80       continue
81     fi
82
83     #if includeDetectors set, only process thoe detectors specified there
84     if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
85       echo "${detector} not included in includeDetectors, skipping..."
86       continue
87     fi
88
89     logSummary=${logDirectory}/summary-${detector}-${dateString}.log
90     outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
91     tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
92     if ! mkdir -p ${tmpDetectorRunDir}; then
93       echo "cannot create the temp dir $tmpDetectorRunDir"
94       continue
95     fi
96     cd ${tmpDetectorRunDir}
97
98     tmpPrefix=${tmpDetectorRunDir}/${outputDir}
99     echo
100     echo "##############################################"
101     echo "running QA for ${detector}"
102     echo "  outputDir=$outputDir"
103     echo "  tmpPrefix=$tmpPrefix"
104     
105     unset -f runLevelQA
106     unset -f periodLevelQA
107     unset -f runLevelHighPtTreeQA
108     unset -f periodLevelHighPtTreeQA
109     source ${detectorScript}
110
111     #################################################################
112     #produce the QA and trending tree for each file (run)
113     unset arrOfTouchedProductions
114     declare -A arrOfTouchedProductions
115     while read qaFile; do
116       echo
117
118       if ! guessRunData ${qaFile}; then
119         echo "could not guess run data from ${qaFile}"
120         continue
121       fi
122
123       tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
124       tmpRunDir=${tmpProductionDir}/000${runNumber}
125       mkdir -p ${tmpRunDir}
126       cd ${tmpRunDir}
127
128       #by default we expect to have everything in the same archive
129       highPtTree=${qaFile}
130
131       #maybe the input is not an archive, but a file
132       [[ "${qaFile}" =~ "QAresults.root" ]] && highPtTree=""
133       [[ "${qaFile}" =~ "FilterEvents_Trees.root" ]] && qaFile=""
134
135       #it is possible we get the highPt trees from somewhere else
136       #search the list of high pt trees for the proper run number
137       if [[ -n ${inputListHighPtTrees} ]]; then
138         highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
139         echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
140       fi
141       
142       echo qaFile=$qaFile
143       echo highPtTree=$highPtTree
144
145       #what if we have a zip archive?
146       if [[ "$qaFile" =~ .*.zip$ ]]; then
147         if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
148           qaFile="${qaFile}#QAresults.root"
149         else
150           qaFile=""
151         fi
152       fi
153       if [[ "$highPtTree" =~ .*.zip$ ]]; then
154         if unzip -l ${highPtTree} | egrep "FilterEvents_Trees.root" &>/dev/null; then
155           highPtTree="${highPtTree}#FilterEvents_Trees.root"
156         else
157           highPtTree=""
158         fi
159       fi
160      
161       if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
162         echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
163         runLevelQA "${qaFile}" &> runLevelQA.log
164         #perform some default actions:
165         #if trending.root not created, create a default one
166         if [[ ! -f trending.root ]]; then
167           aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" &>> runLevelQA.log
168         fi
169         arrOfTouchedProductions[${tmpProductionDir}]=1
170       fi
171       #expert QA based on high pt trees
172       if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
173         echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
174         runLevelHighPtTreeQA "${highPtTree}" &> runLevelHighPtTreeQA.log
175         arrOfTouchedProductions[${tmpProductionDir}]=1
176       fi
177
178       cd ${tmpDetectorRunDir}
179     
180     done < ${inputList}
181
182     #################################################################
183     #cache which productions were (re)done
184     echo "list of processed productions:"
185     echo "    ${!arrOfTouchedProductions[@]}"
186     echo
187
188     #################################################################
189     #(re)do the merging/trending 
190     for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
191       cd ${tmpProductionDir}
192       echo
193       echo "running period level stuff in ${tmpProductionDir}"
194     
195       productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
196       echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
197
198       mkdir -p ${productionDir}
199       if [[ ! -d ${productionDir} ]]; then 
200         echo "cannot make productionDir $productionDir" && continue
201       fi
202       
203       #move runs to final destination
204       for dir in ${tmpProductionDir}/000*; do
205         echo 
206         oldRunDir=${outputDir}/${dir#${tmpPrefix}}
207         if ! guessRunData "${dir}/dummyName"; then
208           echo "could not guess run data from ${dir}"
209           continue
210         fi
211
212         #before moving - VALIDATE!!!
213         if ! validate ${dir}; then 
214           continue
215         fi
216
217         if [[ -d ${oldRunDir} ]]; then
218           echo "removing old ${oldRunDir}"
219           rm -rf ${oldRunDir}
220         fi
221         echo "moving new ${runNumber} to ${productionDir}"
222         mv -f ${dir} ${productionDir}
223       done
224    
225       #go to a temp dir to do the period level stuff
226       tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
227       echo
228       echo tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
229       if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
230       cd ${tmpPeriodLevelQAdir}
231
232       #link the final list of per-run dirs here, just the dirs
233       #to have a clean working directory
234       unset linkedStuff
235       declare -a linkedStuff
236       for x in ${productionDir}/000*; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##*/}); done
237
238       #merge trending files if any
239       if /bin/ls 000*/trending.root &>/dev/null; then
240         hadd trending.root 000*/trending.root &> periodLevelQA.log
241       fi
242       
243       #run the period level trending/QA
244       if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
245         echo running ${detector} periodLevelQA for production ${period}/${pass}
246         periodLevelQA trending.root &>> periodLevelQA.log
247       else 
248         echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
249       fi
250
251       if ! validate ${PWD}; then continue; fi
252
253       #here we are validated so move the produced QA to the final place
254       #clean up linked stuff first
255       [[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
256       #some of the output could be a directory, so handle that
257       #TODO: maybe use rsync?
258       for x in ${tmpPeriodLevelQAdir}/*; do  
259         if [[ -d ${x} ]]; then
260           echo "removing ${productionDir}/${x##*/}"
261           rm -rf ${productionDir}/${x##*/}
262           echo "moving ${x} to ${productionDir}"
263           mv ${x} ${productionDir}
264         fi
265         if [[ -f ${x} ]]; then
266           echo "moving ${x} to ${productionDir}"
267           mv -f ${x} ${productionDir} 
268         fi
269       done
270
271       #remove the temp dir
272       rm -rf ${tmpPeriodLevelQAdir}
273     
274     done
275
276     cd ${workingDirectory}
277
278     if [[ -z ${planB} ]]; then
279       echo
280       echo removing ${tmpDetectorRunDir}
281       rm -rf ${tmpDetectorRunDir}
282     else
283       executePlanB
284     fi
285   done
286
287   #remove lock
288   rm -f ${lockFile}
289 }
290
291 executePlanB()
292 {
293   #in case of emergency
294   if [[ -n ${MAILTO} ]]; then 
295     echo
296     echo "trouble detected, sending email to ${MAILTO}"
297
298     cat ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
299   fi
300 }
301
302 validate()
303 {
304   summarizeLogs ${1} >> ${logSummary}
305   logStatus=$?
306   if [[ ${logStatus} -ne 0 ]]; then 
307     echo "WARNING not validated: ${1}"
308     planB=1
309     return 1
310   fi
311   return 0
312 }
313
314 summarizeLogs()
315 {
316   local dir=$1
317   [[ ! -d ${dir} ]] && dir=${PWD}
318
319   #print a summary of logs
320   logFiles=(
321       "*.log"
322       "stdout"
323       "stderr"
324   )
325
326   #check logs
327   local logstatus=0
328   for log in ${dir}/${logFiles[*]}; do
329     finallog=${PWD%/}/${log}
330     [[ ! -f ${log} ]] && continue
331     errorSummary=$(validateLog ${log})
332     validationStatus=$?
333     [[ validationStatus -ne 0 ]] && logstatus=1
334     if [[ ${validationStatus} -eq 0 ]]; then 
335       #in pretend mode randomly report an error in rec.log some cases
336       if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
337         [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
338       else
339         echo "${finallog} OK"
340       fi
341     elif [[ ${validationStatus} -eq 1 ]]; then
342       echo "${finallog} BAD ${errorSummary}"
343     elif [[ ${validationStatus} -eq 2 ]]; then
344       echo "${finallog} OK MWAH ${errorSummary}"
345     fi
346   done
347
348   #report core files
349   while read x; do
350     echo ${x}
351     chmod 644 ${x}
352     gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
353   done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
354
355   return ${logstatus}
356 }
357
358 validateLog()
359 {
360   log=${1}
361   errorConditions=(
362             'There was a crash'
363             'floating'
364             'error while loading shared libraries'
365             'std::bad_alloc'
366             's_err_syswatch_'
367             'Thread [0-9]* (Thread'
368             'AliFatal'
369             'core dumped'
370             '\.C.*error:.*\.h: No such file'
371             'segmentation'
372             'Interpreter error recovered'
373   )
374
375   warningConditions=(
376             'This is serious'
377   )
378
379   local logstatus=0
380   local errorSummary=""
381   local warningSummary=""
382
383   for ((i=0; i<${#errorConditions[@]};i++)); do
384     local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
385     [[ -n ${tmp} ]] && tmp+=" : "
386     errorSummary+=${tmp}
387   done
388
389   for ((i=0; i<${#warningConditions[@]};i++)); do
390     local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
391     [[ -n ${tmp} ]] && tmp+=" : "
392     warningSummary+=${tmp}
393   done
394
395   if [[ -n ${errorSummary} ]]; then 
396     echo "${errorSummary}"
397     return 1
398   fi
399
400   if [[ -n ${warningSummary} ]]; then
401     echo "${warningSummary}"
402     return 2
403   fi
404
405   return 0
406 }
407
408 parseConfig()
409 {
410   #config file
411   configFile=""
412   #where to search for qa files
413   inputList=file.list
414   #working directory
415   workingDirectory="${PWD}"
416   #where to place the final qa plots
417   #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
418   outputDirectory="${workingDirectory}/%DET"
419   #filter out detector option
420   excludeDetectors="EXAMPLE"
421   #logs
422   logDirectory=${workingDirectory}/logs
423   #OCDB storage
424   #ocdbStorage="raw://"
425   #email to
426   #MAILTO="fbellini@cern.ch"
427
428   #first, check if the config file is configured
429   #is yes - source it so that other options can override it
430   #if any
431   for opt in $@; do
432     if [[ ${opt} =~ configFile=.* ]]; then
433       eval "${opt}"
434       [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
435       source "${configFile}"
436       break
437     fi
438   done
439
440   #then, parse the options as they override the options from file
441   while [[ -n ${1} ]]; do
442     local var=${1#--}
443     if [[ ${var} =~ .*=.* ]]; then
444       eval "${var}"
445     else
446       echo "badly formatted option ${var}, should be: option=value, stopping..."
447       return 1
448     fi
449     shift
450   done
451 }
452
453 guessRunData()
454 {
455   #guess the period from the path, pick the rightmost one
456   period=""
457   runNumber=""
458   year=""
459   pass=""
460   legoTrainRunNumber=""
461   dataType=""
462
463   local shortRunNumber=""
464   local IFS="/"
465   declare -a path=( $1 )
466   local dirDepth=$(( ${#path[*]}-1 ))
467   i=0
468   for ((x=${dirDepth};x>=0;x--)); do
469
470     [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
471     local field=${path[${x}]}
472     local fieldNext=${path[$((x+1))]}
473
474     [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
475     [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
476     [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
477     [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
478     [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
479     [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
480     [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
481     (( i++ ))
482   done
483   [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
484   [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
485   
486   #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
487   if [[ -z ${runNumber}} ]];
488   then
489     #error condition
490     return 1
491   else
492     #ALL OK
493     return 0
494   fi
495 }
496
497 substituteDetectorName()
498 {
499   local det=$1
500   local dir=$2
501   [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
502   [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
503 }
504
505 get_realpath() 
506 {
507   if [[ -f "$1" ]]
508   then
509     # file *must* exist
510     if cd "$(echo "${1%/*}")" &>/dev/null
511     then
512       # file *may* not be local
513       # exception is ./file.ext
514       # try 'cd .; cd -;' *works!*
515       local tmppwd="$PWD"
516       cd - &>/dev/null
517     else
518       # file *must* be local
519       local tmppwd="$PWD"
520     fi
521   else
522     # file *cannot* exist
523     return 1 # failure
524   fi
525   # reassemble realpath
526   echo "$tmppwd"/"${1##*/}"
527   return 0 # success
528 }
529
530 main $@