]> git.uio.no Git - u/mrichter/AliRoot.git/blob - PWGPP/QA/scripts/runQA.sh
3ad2e9f50aaaffd3459455086d3ff819aa96fd47
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
1 #!/bin/bash
2 main()
3 {
4   if [[ -z $1 ]]; then
5     echo "Usage: "
6     echo "  ${0##*/} option=value [option=value]"
7     echo "  at least inputList should be specified, or configFile containing it:"
8     echo "  ${0##*/} inputList=file.list"
9     echo "  options override config file (if any), e.g.:"
10     echo "  ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
11     return 1
12   fi
13  
14   if ! parseConfig $@; then
15     ${0}
16     return 1
17   fi
18
19   [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
20
21   ocdbregex='raw://'
22   if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
23     alien-token-init
24   fi
25
26   updateQA $@
27 }
28
29 updateQA()
30 {
31   umask 0002
32   parseConfig $@
33
34   #be paranoid and make some full paths
35   [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
36   inputList=$(get_realpath ${inputList})
37   mkdir -p ${workingDirectory}
38   workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
39   if [[ ! -d ${workingDirectory} ]]; then
40     echo "working dir $workingDirectory does not exist and cannot be created"
41     return 1
42   fi
43   cd ${workingDirectory}
44
45   echo JOB config:
46   echo inputList=$inputList
47   echo outputDirectory=$outputDirectory
48   echo
49
50   dateString=$(date +%Y-%m-%d-%H-%M)
51   echo "Start time QA process: $dateString"
52
53   #logging
54   mkdir -p $logDirectory
55   [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
56   logFile="$logDirectory/${0##*/}.${dateString}.log"
57   touch ${logFile}
58   [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
59   echo "logFile = $logFile"
60
61   #check lock
62   lockFile=${logDirectory}/runQA.lock
63   [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" | tee ${logFile} && return 1
64   touch ${lockFile}
65   [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" | tee ${logFile} && return 1
66   
67   exec &>${logFile}
68
69   ################################################################
70   #ze detector loop
71   for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
72     unset planB
73     [[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
74     detector=${detectorScript%.sh}
75     detector=${detector##*/}
76     
77     #skip if excluded
78     if [[ "${excludeDetectors}" =~ ${detector} ]]; then
79       echo "${detector} is excluded in config, skipping..."
80       continue
81     fi
82
83     #if includeDetectors set, only process thoe detectors specified there
84     if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
85       echo "${detector} not included in includeDetectors, skipping..."
86       continue
87     fi
88
89     logSummary=${logDirectory}/summary-${detector}-${dateString}.log
90     outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
91     tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
92     if ! mkdir -p ${tmpDetectorRunDir}; then
93       echo "cannot create the temp dir $tmpDetectorRunDir"
94       continue
95     fi
96     cd ${tmpDetectorRunDir}
97
98     tmpPrefix=${tmpDetectorRunDir}/${outputDir}
99     echo
100     echo "##############################################"
101     echo "running QA for ${detector}"
102     echo "  outputDir=$outputDir"
103     echo "  tmpPrefix=$tmpPrefix"
104     
105     unset -f runLevelQA
106     unset -f periodLevelQA
107     unset -f runLevelHighPtTreeQA
108     unset -f periodLevelHighPtTreeQA
109     source ${detectorScript}
110
111     #################################################################
112     #produce the QA and trending tree for each file (run)
113     unset arrOfTouchedProductions
114     declare -A arrOfTouchedProductions
115     while read qaFile; do
116       echo
117
118       if ! guessRunData ${qaFile}; then
119         echo "could not guess run data from ${qaFile}"
120         continue
121       fi
122
123       tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
124       tmpRunDir=${tmpProductionDir}/000${runNumber}
125       mkdir -p ${tmpRunDir}
126       cd ${tmpRunDir}
127
128       #by default we expect to have everything in the same archive
129       highPtTree=${qaFile}
130
131       #maybe the input is not an archive, but a file
132       [[ "${qaFile}" =~ "QAresults.root" ]] && highPtTree=""
133       [[ "${qaFile}" =~ "FilterEvents_Trees.root" ]] && qaFile=""
134
135       #it is possible we get the highPt trees from somewhere else
136       #search the list of high pt trees for the proper run number
137       if [[ -n ${inputListHighPtTrees} ]]; then
138         highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
139         echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
140       fi
141       
142       echo qaFile=$qaFile
143       echo highPtTree=$highPtTree
144
145       #what if we have a zip archive?
146       if [[ "$qaFile" =~ .*.zip$ ]]; then
147         if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
148           qaFile="${qaFile}#QAresults.root"
149         else
150           qaFile=""
151         fi
152       fi
153       if [[ "$highPtTree" =~ .*.zip$ ]]; then
154         if unzip -l ${highPtTree} | egrep "FilterEvents_Trees.root" &>/dev/null; then
155           highPtTree="${highPtTree}#FilterEvents_Trees.root"
156         else
157           highPtTree=""
158         fi
159       fi
160      
161       if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
162         echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
163         runLevelQA "${qaFile}" &> runLevelQA.log
164         #perform some default actions:
165         #if trending.root not created, create a default one
166         if [[ ! -f trending.root ]]; then
167           aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" &>> runLevelQA.log
168         fi
169         arrOfTouchedProductions[${tmpProductionDir}]=1
170       fi
171       #expert QA based on high pt trees
172       if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
173         echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
174         runLevelHighPtTreeQA "${highPtTree}" &> runLevelHighPtTreeQA.log
175         arrOfTouchedProductions[${tmpProductionDir}]=1
176       fi
177
178       cd ${tmpDetectorRunDir}
179     
180     done < ${inputList}
181
182     #################################################################
183     #cache which productions were (re)done
184     echo "list of processed productions:"
185     echo "    ${!arrOfTouchedProductions[@]}"
186     echo
187
188     #################################################################
189     #(re)do the merging/trending 
190     for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
191       cd ${tmpProductionDir}
192       echo
193       echo "running period level stuff in ${tmpProductionDir}"
194     
195       productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
196       echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
197
198       mkdir -p ${productionDir}
199       if [[ ! -d ${productionDir} ]]; then 
200         echo "cannot make productionDir $productionDir" && continue
201       fi
202       
203       #move runs to final destination
204       for dir in ${tmpProductionDir}/000*; do
205         echo 
206         oldRunDir=${outputDir}/${dir#${tmpPrefix}}
207         if ! guessRunData "${dir}/dummyName"; then
208           echo "could not guess run data from ${dir}"
209           continue
210         fi
211
212         #before moving - VALIDATE!!!
213         if ! validate ${dir}; then 
214           continue
215         fi
216
217         if [[ -d ${oldRunDir} ]]; then
218           echo "removing old ${oldRunDir}"
219           rm -rf ${oldRunDir}
220         fi
221         echo "moving new ${runNumber} to ${productionDir}"
222         mv -f ${dir} ${productionDir}
223       done
224    
225       #go to a temp dir to do the period level stuff
226       tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
227       echo
228       echo tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
229       if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
230       cd ${tmpPeriodLevelQAdir}
231
232       #link the final list of per-run dirs here, just the dirs
233       #to have a clean working directory
234       unset linkedStuff
235       declare -a linkedStuff
236       for x in ${productionDir}/000*; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##*/}); done
237       ls
238
239       #merge trending files if any
240       if /bin/ls 000*/trending.root &>/dev/null; then
241         hadd trending.root 000*/trending.root &> periodLevelQA.log
242       fi
243       
244       #run the period level trending/QA
245       if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
246         echo running ${detector} periodLevelQA for production ${period}/${pass}
247         periodLevelQA trending.root &>> periodLevelQA.log
248       else 
249         echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
250       fi
251
252       if ! validate ${PWD}; then continue; fi
253
254       #here we are validated so move the produced QA to the final place
255       #clean up linked stuff first
256       [[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
257       #some of the output could be a directory, so handle that
258       #TODO: maybe use rsync?
259       for x in ${tmpPeriodLevelQAdir}/*; do  
260         if [[ -d ${x} ]]; then
261           echo "removing ${productionDir}/${x##*/}"
262           rm -rf ${productionDir}/${x##*/}
263           echo "moving ${x} to ${productionDir}"
264           mv ${x} ${productionDir}
265         fi
266         if [[ -f ${x} ]]; then
267           echo "moving ${x} to ${productionDir}"
268           mv -f ${x} ${productionDir} 
269         fi
270       done
271
272       #remove the temp dir
273       rm -rf ${tmpPeriodLevelQAdir}
274     
275     done
276
277     cd ${workingDirectory}
278
279     if [[ -z ${planB} ]]; then
280       echo
281       echo removing ${tmpDetectorRunDir}
282       rm -rf ${tmpDetectorRunDir}
283     else
284       executePlanB
285     fi
286   done
287
288   #remove lock
289   rm -f ${lockFile}
290 }
291
292 executePlanB()
293 {
294   #in case of emergency
295   if [[ -n ${MAILTO} ]]; then 
296     echo
297     echo "trouble detected, sending email to ${MAILTO}"
298
299     cat ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
300   fi
301 }
302
303 validate()
304 {
305   summarizeLogs ${1} >> ${logSummary}
306   logStatus=$?
307   if [[ ${logStatus} -ne 0 ]]; then 
308     echo "WARNING not validated: ${1}"
309     planB=1
310     return 1
311   fi
312   return 0
313 }
314
315 summarizeLogs()
316 {
317   local dir=$1
318   [[ ! -d ${dir} ]] && dir=${PWD}
319
320   #print a summary of logs
321   logFiles=(
322       "*.log"
323       "stdout"
324       "stderr"
325   )
326
327   #check logs
328   local logstatus=0
329   for log in ${dir}/${logFiles[*]}; do
330     finallog=${PWD%/}/${log}
331     [[ ! -f ${log} ]] && continue
332     errorSummary=$(validateLog ${log})
333     validationStatus=$?
334     [[ validationStatus -ne 0 ]] && logstatus=1
335     if [[ ${validationStatus} -eq 0 ]]; then 
336       #in pretend mode randomly report an error in rec.log some cases
337       if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
338         [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
339       else
340         echo "${finallog} OK"
341       fi
342     elif [[ ${validationStatus} -eq 1 ]]; then
343       echo "${finallog} BAD ${errorSummary}"
344     elif [[ ${validationStatus} -eq 2 ]]; then
345       echo "${finallog} OK MWAH ${errorSummary}"
346     fi
347   done
348
349   #report core files
350   while read x; do
351     echo ${x}
352     chmod 644 ${x}
353     gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
354   done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
355
356   return ${logstatus}
357 }
358
359 validateLog()
360 {
361   log=${1}
362   errorConditions=(
363             'There was a crash'
364             'floating'
365             'error while loading shared libraries'
366             'std::bad_alloc'
367             's_err_syswatch_'
368             'Thread [0-9]* (Thread'
369             'AliFatal'
370             'core dumped'
371             '\.C.*error:.*\.h: No such file'
372             'segmentation'
373             'Interpreter error recovered'
374   )
375
376   warningConditions=(
377             'This is serious'
378   )
379
380   local logstatus=0
381   local errorSummary=""
382   local warningSummary=""
383
384   for ((i=0; i<${#errorConditions[@]};i++)); do
385     local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
386     [[ -n ${tmp} ]] && tmp+=" : "
387     errorSummary+=${tmp}
388   done
389
390   for ((i=0; i<${#warningConditions[@]};i++)); do
391     local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
392     [[ -n ${tmp} ]] && tmp+=" : "
393     warningSummary+=${tmp}
394   done
395
396   if [[ -n ${errorSummary} ]]; then 
397     echo "${errorSummary}"
398     return 1
399   fi
400
401   if [[ -n ${warningSummary} ]]; then
402     echo "${warningSummary}"
403     return 2
404   fi
405
406   return 0
407 }
408
409 parseConfig()
410 {
411   #config file
412   configFile=""
413   #where to search for qa files
414   inputList=file.list
415   #working directory
416   workingDirectory="${PWD}"
417   #where to place the final qa plots
418   #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
419   outputDirectory="${workingDirectory}/%DET"
420   #filter out detector option
421   excludeDetectors="EXAMPLE"
422   #logs
423   logDirectory=${workingDirectory}/logs
424   #set aliroot
425   #alirootEnv="/home/mkrzewic/alisoft/balice_master.sh"
426   #OCDB storage
427   #ocdbStorage="raw://"
428   #email to
429   #MAILTO="fbellini@cern.ch"
430
431   #first, check if the config file is configured
432   #is yes - source it so that other options can override it
433   #if any
434   for opt in $@; do
435     if [[ ${opt} =~ configFile=.* ]]; then
436       eval "${opt}"
437       [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
438       source "${configFile}"
439       break
440     fi
441   done
442
443   #then, parse the options as they override the options from file
444   while [[ -n ${1} ]]; do
445     local var=${1#--}
446     if [[ ${var} =~ .*=.* ]]; then
447       eval "${var}"
448     else
449       echo "badly formatted option ${var}, should be: option=value, stopping..."
450       return 1
451     fi
452     shift
453   done
454 }
455
456 guessRunData()
457 {
458   #guess the period from the path, pick the rightmost one
459   period=""
460   runNumber=""
461   year=""
462   pass=""
463   legoTrainRunNumber=""
464   dataType=""
465
466   local shortRunNumber=""
467   local IFS="/"
468   declare -a path=( $1 )
469   local dirDepth=$(( ${#path[*]}-1 ))
470   i=0
471   for ((x=${dirDepth};x>=0;x--)); do
472
473     [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
474     local field=${path[${x}]}
475     local fieldNext=${path[$((x+1))]}
476
477     [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
478     [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
479     [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
480     [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
481     [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
482     [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
483     [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
484     (( i++ ))
485   done
486   [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
487   [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
488   
489   #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
490   if [[ -z ${runNumber}} ]];
491   then
492     #error condition
493     return 1
494   else
495     #ALL OK
496     return 0
497   fi
498 }
499
500 substituteDetectorName()
501 {
502   local det=$1
503   local dir=$2
504   [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
505   [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
506 }
507
508 get_realpath() 
509 {
510   if [[ -f "$1" ]]
511   then
512     # file *must* exist
513     if cd "$(echo "${1%/*}")" &>/dev/null
514     then
515       # file *may* not be local
516       # exception is ./file.ext
517       # try 'cd .; cd -;' *works!*
518       local tmppwd="$PWD"
519       cd - &>/dev/null
520     else
521       # file *must* be local
522       local tmppwd="$PWD"
523     fi
524   else
525     # file *cannot* exist
526     return 1 # failure
527   fi
528   # reassemble realpath
529   echo "$tmppwd"/"${1##*/}"
530   return 0 # success
531 }
532
533 main $@