]> git.uio.no Git - u/mrichter/AliRoot.git/blob - PWGPP/QA/scripts/runQA.sh
update comments, add some possible TODOs
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
1 #!/bin/bash
2 main()
3 {
4   if [[ -z $1 ]]; then
5     echo "Usage: "
6     echo "  ${0##*/} option=value [option=value]"
7     echo "  at least inputList should be specified, or configFile containing it:"
8     echo "  ${0##*/} inputList=file.list"
9     echo "  options override config file (if any), e.g.:"
10     echo "  ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
11     return 1
12   fi
13  
14   if ! parseConfig $@; then
15     ${0}
16     return 1
17   fi
18
19   [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
20
21   ocdbregex='raw://'
22   if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
23     alien-token-init
24   fi
25
26   updateQA $@
27 }
28
29 updateQA()
30 {
31   umask 0002
32   parseConfig $@
33
34   #be paranoid and make some full paths
35   [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
36   inputList=$(get_realpath ${inputList})
37   mkdir -p ${workingDirectory}
38   workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
39   if [[ ! -d ${workingDirectory} ]]; then
40     echo "working dir $workingDirectory does not exist and cannot be created"
41     return 1
42   fi
43   cd ${workingDirectory}
44
45   echo JOB config:
46   echo inputList=$inputList
47   echo outputDirectory=$outputDirectory
48   echo
49
50   dateString=$(date +%Y-%m-%d-%H-%M)
51   echo "Start time QA process: $dateString"
52
53   #logging
54   mkdir -p $logDirectory
55   [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
56   logFile="$logDirectory/${0##*/}.${dateString}.log"
57   touch ${logFile}
58   [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
59   echo "logFile = $logFile"
60   exec &>${logFile}
61
62   #check lock
63   lockFile=${logDirectory}/runQA.lock
64   [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" && return 1
65   touch ${lockFile}
66   [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" && return 1
67   
68   ################################################################
69   #ze detector loop
70   for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
71
72     [[ ! ${detectorScript} =~ .*\.sh ]] && continue
73     detector=${detectorScript%.sh}
74     detector=${detector##*/}
75     
76     #skip if excluded
77     if [[ "${excludeDetectors}" =~ ${detector} ]]; then
78       echo "${detector} is excluded in config, skipping..."
79       continue
80     fi
81
82     #if includeDetectors set, only process thoe detectors specified there
83     if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
84       echo "${detector} not included in includeDetectors, skipping..."
85       continue
86     fi
87
88     logSummary=${logDirectory}/summary-${detector}-${dateString}.log
89     outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
90     tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}
91     if ! mkdir -p ${tmpDetectorRunDir}; then
92       echo "cannot create the temp dir $tmpDetectorRunDir"
93       continue
94     fi
95     cd ${tmpDetectorRunDir}
96
97     tmpPrefix=${tmpDetectorRunDir}/${outputDir}
98     echo
99     echo "##############################################"
100     echo "running QA for ${detector}"
101     echo "  outputDir=$outputDir"
102     echo "  tmpPrefix=$tmpPrefix"
103     
104     unset -f runLevelQA
105     unset -f periodLevelQA
106     unset -f runLevelHighPtTreeQA
107     unset -f periodLevelHighPtTreeQA
108     source ${detectorScript}
109
110     #################################################################
111     #produce the QA and trending tree for each file (run)
112     unset arrOfTouchedProductions
113     declare -A arrOfTouchedProductions
114     while read qaFile; do
115       echo
116
117       if ! guessRunData ${qaFile}; then
118         echo "could not guess run data from ${qaFile}"
119         continue
120       fi
121
122       tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
123       tmpRunDir=${tmpProductionDir}/000${runNumber}
124       mkdir -p ${tmpRunDir}
125       cd ${tmpRunDir}
126
127       #by default we expect to have everything in the same archive
128       highPtTree=${qaFile}
129
130       #maybe the input is not an archive, but a file
131       [[ "${qaFile}" =~ "QAresults.root" ]] && highPtTree=""
132       [[ "${qaFile}" =~ "FilterEvents_Trees.root" ]] && qaFile=""
133
134       #it is possible we get the highPt trees from somewhere else
135       #search the list of high pt trees for the proper run number
136       if [[ -n ${inputListHighPtTrees} ]]; then
137         highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
138         echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
139       fi
140       
141       echo qaFile=$qaFile
142       echo highPtTree=$highPtTree
143
144       #what if we have a zip archive?
145       if [[ "$qaFile" =~ .*.zip$ ]]; then
146         if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
147           qaFile="${qaFile}#QAresults.root"
148         else
149           qaFile=""
150         fi
151       fi
152       if [[ "$highPtTree" =~ .*.zip$ ]]; then
153         if unzip -l ${highPtTree} | egrep "FilterEvents_Trees.root" &>/dev/null; then
154           highPtTree="${highPtTree}#FilterEvents_Trees.root"
155         else
156           highPtTree=""
157         fi
158       fi
159      
160       if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
161         echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
162         runLevelQA "${qaFile}" &> runLevelQA.log
163         #perform some default actions:
164         #if trending.root not created, create a default one
165         if [[ ! -f trending.root ]]; then
166           aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" &>> runLevelQA.log
167         fi
168         arrOfTouchedProductions[${tmpProductionDir}]=1
169       fi
170       #expert QA based on high pt trees
171       if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
172         echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
173         runLevelHighPtTreeQA "${highPtTree}" &> runLevelHighPtTreeQA.log
174         arrOfTouchedProductions[${tmpProductionDir}]=1
175       fi
176
177       cd ${tmpDetectorRunDir}
178     
179     done < ${inputList}
180
181     #################################################################
182     #cache which productions were (re)done
183     echo "list of processed productions:"
184     echo "    ${!arrOfTouchedProductions[@]}"
185     echo
186
187     #################################################################
188     #(re)do the merging/trending in the final destination
189     for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
190       echo
191       echo "running period level stuff in ${tmpProductionDir}"
192     
193       productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
194
195       mkdir -p ${productionDir}
196       if [[ ! -d ${productionDir} ]]; then 
197         echo "cannot make productionDir $productionDir" && continue
198       fi
199       cd ${productionDir}
200       
201       #move to final destination
202       for dir in ${tmpProductionDir}/*; do
203         oldRunDir=${outputDir}/${dir#${tmpPrefix}}
204         if ! guessRunData "${dir}/dummyName"; then
205           echo "could not guess run data from ${dir}"
206           continue
207         fi
208
209         #before moving - VALIDATE!!!
210         if ! validate ${dir}; then continue; fi
211
212         if [[ -d ${oldRunDir} ]]; then
213           echo "removing old ${oldRunDir}"
214           rm -rf ${oldRunDir}
215         fi
216         echo "moving new ${runNumber} to ${productionDir}"
217         mv -f ${dir} ${productionDir}
218       done
219    
220       #here we are in the updated period dir, all runs there
221       #TODO: maybe cleanup all old output first?
222       rm -f trending.root
223       
224       #merge trending files if any
225       if /bin/ls 000*/trending.root &>/dev/null; then
226         hadd trending.root 000*/trending.root &> periodLevelQA.log
227       fi
228       
229       #TODO: maybe run this in a tmp dir (with links to run dirs) and only move
230       #the plots after validation
231       if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
232         echo running ${detector} periodLevelQA for production ${period}/${pass}
233         periodLevelQA trending.root &>> periodLevelQA.log
234       else 
235         echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
236       fi
237
238
239       if ! validate ${PWD}; then continue; fi
240
241       cd ${tmpDetectorRunDir}
242     
243     done
244
245     cd ${workingDirectory}
246
247     if [[ -z ${planB} ]]; then
248       echo
249       echo removing ${tmpDetectorRunDir}
250       rm -rf ${tmpDetectorRunDir}
251     else
252       executePlanB
253     fi
254   done
255
256   #remove lock
257   rm -f ${lockFile}
258 }
259
260 executePlanB()
261 {
262   #in case of emergency
263   if [[ -n ${MAILTO} ]]; then 
264     echo
265     echo "trouble detected, sending email to ${MAILTO}"
266
267     cat ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
268   fi
269 }
270
271 validate()
272 {
273   summarizeLogs ${1} >> ${logSummary}
274   logStatus=$?
275   if [[ ${logStatus} -ne 0 ]]; then 
276     echo "WARNING not validated: ${1}"
277     planB=1
278     return 1
279   fi
280   return 0
281 }
282
283 summarizeLogs()
284 {
285   local dir=$1
286   [[ ! -d ${dir} ]] && dir=${PWD}
287
288   #print a summary of logs
289   logFiles=(
290       "*.log"
291       "stdout"
292       "stderr"
293   )
294
295   #check logs
296   local logstatus=0
297   for log in ${dir}/${logFiles[*]}; do
298     finallog=${PWD%/}/${log}
299     [[ ! -f ${log} ]] && continue
300     errorSummary=$(validateLog ${log})
301     validationStatus=$?
302     [[ validationStatus -ne 0 ]] && logstatus=1
303     if [[ ${validationStatus} -eq 0 ]]; then 
304       #in pretend mode randomly report an error in rec.log some cases
305       if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
306         [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
307       else
308         echo "${finallog} OK"
309       fi
310     elif [[ ${validationStatus} -eq 1 ]]; then
311       echo "${finallog} BAD ${errorSummary}"
312     elif [[ ${validationStatus} -eq 2 ]]; then
313       echo "${finallog} OK MWAH ${errorSummary}"
314     fi
315   done
316
317   #report core files
318   while read x; do
319     echo ${x}
320     chmod 644 ${x}
321     gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
322   done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
323
324   return ${logstatus}
325 }
326
327 validateLog()
328 {
329   log=${1}
330   errorConditions=(
331             'There was a crash'
332             'floating'
333             'error while loading shared libraries'
334             'std::bad_alloc'
335             's_err_syswatch_'
336             'Thread [0-9]* (Thread'
337             'AliFatal'
338             'core dumped'
339             '\.C.*error:.*\.h: No such file'
340             'segmentation'
341             'Interpreter error recovered'
342   )
343
344   warningConditions=(
345             'This is serious'
346   )
347
348   local logstatus=0
349   local errorSummary=""
350   local warningSummary=""
351
352   for ((i=0; i<${#errorConditions[@]};i++)); do
353     local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
354     [[ -n ${tmp} ]] && tmp+=" : "
355     errorSummary+=${tmp}
356   done
357
358   for ((i=0; i<${#warningConditions[@]};i++)); do
359     local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
360     [[ -n ${tmp} ]] && tmp+=" : "
361     warningSummary+=${tmp}
362   done
363
364   if [[ -n ${errorSummary} ]]; then 
365     echo "${errorSummary}"
366     return 1
367   fi
368
369   if [[ -n ${warningSummary} ]]; then
370     echo "${warningSummary}"
371     return 2
372   fi
373
374   return 0
375 }
376
377 parseConfig()
378 {
379   #config file
380   configFile=""
381   #where to search for qa files
382   inputList=file.list
383   #working directory
384   workingDirectory="${PWD}"
385   #where to place the final qa plots
386   #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
387   outputDirectory="${workingDirectory}/%DET"
388   #filter out detector option
389   excludeDetectors="EXAMPLE"
390   #logs
391   logDirectory=${workingDirectory}/logs
392   #set aliroot
393   #alirootEnv="/home/mkrzewic/alisoft/balice_master.sh"
394   #OCDB storage
395   #ocdbStorage="raw://"
396   #email to
397   #MAILTO="fbellini@cern.ch"
398
399   #first, check if the config file is configured
400   #is yes - source it so that other options can override it
401   #if any
402   for opt in $@; do
403     if [[ ${opt} =~ configFile=.* ]]; then
404       eval "${opt}"
405       [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
406       source "${configFile}"
407       break
408     fi
409   done
410
411   #then, parse the options as they override the options from file
412   while [[ -n ${1} ]]; do
413     local var=${1#--}
414     if [[ ${var} =~ .*=.* ]]; then
415       eval "${var}"
416     else
417       echo "badly formatted option ${var}, should be: option=value, stopping..."
418       return 1
419     fi
420     shift
421   done
422 }
423
424 guessRunData()
425 {
426   #guess the period from the path, pick the rightmost one
427   period=""
428   runNumber=""
429   year=""
430   pass=""
431   legoTrainRunNumber=""
432   dataType=""
433
434   local shortRunNumber=""
435   local IFS="/"
436   declare -a path=( $1 )
437   local dirDepth=$(( ${#path[*]}-1 ))
438   i=0
439   for ((x=${dirDepth};x>=0;x--)); do
440
441     [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
442     local field=${path[${x}]}
443     local fieldNext=${path[$((x+1))]}
444
445     [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
446     [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
447     [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
448     [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
449     [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
450     [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
451     [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
452     (( i++ ))
453   done
454   [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
455   [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
456   
457   #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
458   if [[ -z ${runNumber}} ]];
459   then
460     #error condition
461     return 1
462   else
463     #ALL OK
464     return 0
465   fi
466 }
467
468 substituteDetectorName()
469 {
470   local det=$1
471   local dir=$2
472   [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
473   [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
474 }
475
476 get_realpath() 
477 {
478   if [[ -f "$1" ]]
479   then
480     # file *must* exist
481     if cd "$(echo "${1%/*}")" &>/dev/null
482     then
483       # file *may* not be local
484       # exception is ./file.ext
485       # try 'cd .; cd -;' *works!*
486       local tmppwd="$PWD"
487       cd - &>/dev/null
488     else
489       # file *must* be local
490       local tmppwd="$PWD"
491     fi
492   else
493     # file *cannot* exist
494     return 1 # failure
495   fi
496   # reassemble realpath
497   echo "$tmppwd"/"${1##*/}"
498   return 0 # success
499 }
500
501 main $@