]> git.uio.no Git - u/mrichter/AliRoot.git/blob - PWGPP/QA/scripts/runQA.sh
Merge branch 'master' of https://git.cern.ch/reps/AliRoot
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
1 #!/bin/bash
2 main()
3 {
4   if [[ -z $1 ]]; then
5     echo "Usage: "
6     echo "  ${0##*/} option=value [option=value]"
7     echo "  at least inputList should be specified, or configFile containing it:"
8     echo "  ${0##*/} inputList=file.list"
9     echo "  options override config file (if any), e.g.:"
10     echo "  ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
11     return 1
12   fi
13  
14   if ! parseConfig $@; then
15     ${0}
16     return 1
17   fi
18
19   [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
20
21   ocdbregex='raw://'
22   if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
23     alien-token-init ${alienUserName}
24     #this is a hack! alien-token init seems not enough
25     #but the gclient_env script messes up the LD_LIBRARY_PATH
26     while read x; do
27       eval ${x};
28     done < <(grep -v "LD_LIBRARY_PATH" /tmp/gclient_env_${UID})
29   fi
30
31   updateQA $@
32 }
33
34 updateQA()
35 {
36   umask 0002
37   parseConfig $@
38
39   #be paranoid and make some full paths
40   [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
41   inputList=$(get_realpath ${inputList})
42   mkdir -p ${workingDirectory}
43   workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
44   if [[ ! -d ${workingDirectory} ]]; then
45     echo "working dir $workingDirectory does not exist and cannot be created"
46     return 1
47   fi
48   cd ${workingDirectory}
49
50   echo JOB config:
51   echo inputList=$inputList
52   echo outputDirectory=$outputDirectory
53   echo
54
55   dateString=$(date +%Y-%m-%d-%H-%M)
56   echo "Start time QA process: $dateString"
57
58   #logging
59   mkdir -p $logDirectory
60   [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
61   logFile="$logDirectory/${0##*/}.${dateString}.log"
62   touch ${logFile}
63   [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
64   echo "logFile = $logFile"
65
66   #check lock
67   lockFile=${logDirectory}/runQA.lock
68   [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" | tee ${logFile} && return 1
69   touch ${lockFile}
70   [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" | tee ${logFile} && return 1
71   
72   exec &>${logFile}
73
74   ################################################################
75   #ze detector loop
76   for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
77     echo
78     echo "##############################################"
79     unset planB
80     [[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
81     detector=${detectorScript%.sh}
82     detector=${detector##*/}
83     
84     #skip if excluded
85     if [[ "${excludeDetectors}" =~ ${detector} ]]; then
86       echo "${detector} is excluded in config, skipping..."
87       continue
88     fi
89
90     #if includeDetectors set, only process thoe detectors specified there
91     if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
92       echo "${detector} not included in includeDetectors, skipping..."
93       continue
94     fi
95
96     logSummary=${logDirectory}/summary-${detector}-${dateString}.log
97     outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
98     tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
99     if ! mkdir -p ${tmpDetectorRunDir}; then
100       echo "cannot create the temp dir $tmpDetectorRunDir"
101       continue
102     fi
103     cd ${tmpDetectorRunDir}
104
105     tmpPrefix=${tmpDetectorRunDir}/${outputDir}
106     echo "running QA for ${detector}"
107     echo "  outputDir=$outputDir"
108     echo "  tmpPrefix=$tmpPrefix"
109     
110     unset -f runLevelQA
111     unset -f periodLevelQA
112     unset -f runLevelHighPtTreeQA
113     unset -f periodLevelHighPtTreeQA
114     source ${detectorScript}
115
116     #################################################################
117     #produce the QA and trending tree for each file (run)
118     unset arrOfTouchedProductions
119     declare -A arrOfTouchedProductions
120     while read qaFile; do
121       echo
122       
123       #first check if input file exists
124       [[ ! -f ${qaFile%\#*} ]] && echo "file ${qaFile%\#*} not accessible" && continue
125
126       if ! guessRunData ${qaFile}; then
127         echo "could not guess run data from ${qaFile}"
128         continue
129       fi
130
131       tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
132       tmpRunDir=${tmpProductionDir}/000${runNumber}
133       mkdir -p ${tmpRunDir}
134       cd ${tmpRunDir}
135
136       #by default we expect to have everything in the same archive
137       highPtTree=${qaFile}
138
139       #maybe the input is not an archive, but a file
140       [[ "${qaFile}" =~ QAresults.root$ ]] && highPtTree=""
141       [[ "${qaFile}" =~ FilterEvents_Trees.root$ ]] && qaFile=""
142
143       #it is possible we get the highPt trees from somewhere else
144       #search the list of high pt trees for the proper run number
145       if [[ -n ${inputListHighPtTrees} ]]; then
146         highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
147         echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
148       fi
149       
150       echo qaFile=$qaFile
151       echo highPtTree=$highPtTree
152
153       #what if we have a zip archive?
154       if [[ "$qaFile" =~ .*.zip$ ]]; then
155         if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
156           qaFile="${qaFile}#QAresults.root"
157         else
158           qaFile=""
159         fi
160       fi
161       if [[ "$highPtTree" =~ .*.zip$ ]]; then
162         if unzip -l ${highPtTree} | egrep "FilterEvents_Trees.root" &>/dev/null; then
163           highPtTree="${highPtTree}#FilterEvents_Trees.root"
164         else
165           highPtTree=""
166         fi
167       fi
168      
169       if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
170         echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
171         runLevelQA "${qaFile}" &> runLevelQA.log
172         #perform some default actions:
173         #if trending.root not created, create a default one
174         if [[ ! -f trending.root ]]; then
175           aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" 2>&1 | tee -a runLevelQA.log
176         fi
177         if [[ -f trending.root ]]; then
178           arrOfTouchedProductions[${tmpProductionDir}]=1
179         else
180           echo "trending.root not created"
181         fi
182       fi
183       #expert QA based on high pt trees
184       if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
185         echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
186         runLevelHighPtTreeQA "${highPtTree}" &> runLevelHighPtTreeQA.log
187         arrOfTouchedProductions[${tmpProductionDir}]=1
188       fi
189
190       cd ${tmpDetectorRunDir}
191     
192     done < ${inputList}
193
194     #################################################################
195     #cache which productions were (re)done
196     echo "list of processed productions:"
197     echo "    ${!arrOfTouchedProductions[@]}"
198     echo
199
200     #################################################################
201     #(re)do the merging/trending 
202     for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
203       cd ${tmpProductionDir}
204       echo
205       echo "running period level stuff in ${tmpProductionDir}"
206     
207       productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
208       echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
209
210       mkdir -p ${productionDir}
211       if [[ ! -d ${productionDir} ]]; then 
212         echo "cannot make productionDir $productionDir" && continue
213       fi
214       
215       #move runs to final destination
216       for dir in ${tmpProductionDir}/000*; do
217         echo 
218         oldRunDir=${outputDir}/${dir#${tmpPrefix}}
219         if ! guessRunData "${dir}/dummyName"; then
220           echo "could not guess run data from ${dir}"
221           continue
222         fi
223
224         #before moving - VALIDATE!!!
225         if ! validate ${dir}; then 
226           continue
227         fi
228
229         if [[ -d ${oldRunDir} ]]; then
230           echo "removing old ${oldRunDir}"
231           rm -rf ${oldRunDir}
232         fi
233         echo "moving new ${runNumber} to ${productionDir}"
234         mv -f ${dir} ${productionDir}
235       done
236    
237       #go to a temp dir to do the period level stuff in a completely clean dir
238       tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
239       echo
240       echo tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
241       if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
242       cd ${tmpPeriodLevelQAdir}
243
244       #link the final list of per-run dirs here, just the dirs
245       #to have a clean working directory
246       unset linkedStuff
247       declare -a linkedStuff
248       for x in ${productionDir}/000*; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##*/}); done
249
250       #merge trending files if any
251       if /bin/ls 000*/trending.root &>/dev/null; then
252         hadd trending.root 000*/trending.root &> periodLevelQA.log
253       fi
254       
255       #run the period level trending/QA
256       if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
257         echo running ${detector} periodLevelQA for production ${period}/${pass}
258         periodLevelQA trending.root &>> periodLevelQA.log
259       else 
260         echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
261       fi
262
263       if ! validate ${PWD}; then continue; fi
264
265       #here we are validated so move the produced QA to the final place
266       #clean up linked stuff first
267       [[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
268       #some of the output could be a directory, so handle that
269       #TODO: maybe use rsync?
270       for x in ${tmpPeriodLevelQAdir}/*; do  
271         if [[ -d ${x} ]]; then
272           echo "removing ${productionDir}/${x##*/}"
273           rm -rf ${productionDir}/${x##*/}
274           echo "moving ${x} to ${productionDir}"
275           mv ${x} ${productionDir}
276         fi
277         if [[ -f ${x} ]]; then
278           echo "moving ${x} to ${productionDir}"
279           mv -f ${x} ${productionDir} 
280         fi
281       done
282
283       #remove the temp dir
284       rm -rf ${tmpPeriodLevelQAdir}
285     
286     done
287
288     cd ${workingDirectory}
289
290     if [[ -z ${planB} ]]; then
291       echo
292       echo removing ${tmpDetectorRunDir}
293       rm -rf ${tmpDetectorRunDir}
294     else
295       executePlanB
296     fi
297   done #end of detector loop
298
299   #remove lock
300   rm -f ${lockFile}
301 }
302
303 executePlanB()
304 {
305   #in case of emergency
306   if [[ -n ${MAILTO} ]]; then 
307     echo
308     echo "trouble detected, sending email to ${MAILTO}"
309
310     cat ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
311   fi
312 }
313
314 validate()
315 {
316   summarizeLogs ${1} >> ${logSummary}
317   logStatus=$?
318   if [[ ${logStatus} -ne 0 ]]; then 
319     echo "WARNING not validated: ${1}"
320     planB=1
321     return 1
322   fi
323   return 0
324 }
325
326 summarizeLogs()
327 {
328   local dir=$1
329   [[ ! -d ${dir} ]] && dir=${PWD}
330
331   #print a summary of logs
332   logFiles=(
333       "*.log"
334       "stdout"
335       "stderr"
336   )
337
338   #check logs
339   local logstatus=0
340   for log in ${dir}/${logFiles[*]}; do
341     finallog=${PWD%/}/${log}
342     [[ ! -f ${log} ]] && continue
343     errorSummary=$(validateLog ${log})
344     validationStatus=$?
345     [[ validationStatus -ne 0 ]] && logstatus=1
346     if [[ ${validationStatus} -eq 0 ]]; then 
347       #in pretend mode randomly report an error in rec.log some cases
348       if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
349         [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
350       else
351         echo "${finallog} OK"
352       fi
353     elif [[ ${validationStatus} -eq 1 ]]; then
354       echo "${finallog} BAD ${errorSummary}"
355     elif [[ ${validationStatus} -eq 2 ]]; then
356       echo "${finallog} OK MWAH ${errorSummary}"
357     fi
358   done
359
360   #report core files
361   while read x; do
362     echo ${x}
363     chmod 644 ${x}
364     gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
365   done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
366
367   return ${logstatus}
368 }
369
370 validateLog()
371 {
372   log=${1}
373   errorConditions=(
374             'There was a crash'
375             'floating'
376             'error while loading shared libraries'
377             'std::bad_alloc'
378             's_err_syswatch_'
379             'Thread [0-9]* (Thread'
380             'AliFatal'
381             'core dumped'
382             '\.C.*error:.*\.h: No such file'
383             'segmentation'
384             'Interpreter error recovered'
385   )
386
387   warningConditions=(
388             'This is serious'
389   )
390
391   local logstatus=0
392   local errorSummary=""
393   local warningSummary=""
394
395   for ((i=0; i<${#errorConditions[@]};i++)); do
396     local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
397     [[ -n ${tmp} ]] && tmp+=" : "
398     errorSummary+=${tmp}
399   done
400
401   for ((i=0; i<${#warningConditions[@]};i++)); do
402     local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
403     [[ -n ${tmp} ]] && tmp+=" : "
404     warningSummary+=${tmp}
405   done
406
407   if [[ -n ${errorSummary} ]]; then 
408     echo "${errorSummary}"
409     return 1
410   fi
411
412   if [[ -n ${warningSummary} ]]; then
413     echo "${warningSummary}"
414     return 2
415   fi
416
417   return 0
418 }
419
420 parseConfig()
421 {
422   #config file
423   configFile=""
424   #where to search for qa files
425   inputList=file.list
426   #working directory
427   workingDirectory="${PWD}"
428   #where to place the final qa plots
429   #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
430   outputDirectory="${workingDirectory}/%DET"
431   #filter out detector option
432   excludeDetectors="EXAMPLE"
433   #logs
434   logDirectory=${workingDirectory}/logs
435   #OCDB storage
436   #ocdbStorage="raw://"
437   #email to
438   #MAILTO="fbellini@cern.ch"
439
440   #first, check if the config file is configured
441   #is yes - source it so that other options can override it
442   #if any
443   for opt in $@; do
444     if [[ ${opt} =~ configFile=.* ]]; then
445       eval "${opt}"
446       [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
447       source "${configFile}"
448       break
449     fi
450   done
451
452   #then, parse the options as they override the options from file
453   while [[ -n ${1} ]]; do
454     local var=${1#--}
455     if [[ ${var} =~ .*=.* ]]; then
456       eval "${var}"
457     else
458       echo "badly formatted option ${var}, should be: option=value, stopping..."
459       return 1
460     fi
461     shift
462   done
463 }
464
465 guessRunData()
466 {
467   #guess the period from the path, pick the rightmost one
468   period=""
469   runNumber=""
470   year=""
471   pass=""
472   legoTrainRunNumber=""
473   dataType=""
474
475   local shortRunNumber=""
476   local IFS="/"
477   declare -a path=( $1 )
478   local dirDepth=$(( ${#path[*]}-1 ))
479   i=0
480   for ((x=${dirDepth};x>=0;x--)); do
481
482     [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
483     local field=${path[${x}]}
484     local fieldNext=${path[$((x+1))]}
485
486     [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
487     [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
488     [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
489     [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
490     [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
491     [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
492     [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
493     (( i++ ))
494   done
495   [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
496   [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
497   
498   #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
499   if [[ -z ${runNumber}} ]];
500   then
501     #error condition
502     return 1
503   else
504     #ALL OK
505     return 0
506   fi
507 }
508
509 substituteDetectorName()
510 {
511   local det=$1
512   local dir=$2
513   [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
514   [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
515 }
516
517 get_realpath() 
518 {
519   if [[ -f "$1" ]]
520   then
521     # file *must* exist
522     if cd "$(echo "${1%/*}")" &>/dev/null
523     then
524       # file *may* not be local
525       # exception is ./file.ext
526       # try 'cd .; cd -;' *works!*
527       local tmppwd="$PWD"
528       cd - &>/dev/null
529     else
530       # file *must* be local
531       local tmppwd="$PWD"
532     fi
533   else
534     # file *cannot* exist
535     return 1 # failure
536   fi
537   # reassemble realpath
538   echo "$tmppwd"/"${1##*/}"
539   return 0 # success
540 }
541
542 main $@