]> git.uio.no Git - u/mrichter/AliRoot.git/blob - PWGPP/QA/scripts/runQA.sh
make it work on MacOSX
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
1 #!/bin/bash
2 main()
3 {
4   if [[ -z $1 ]]; then
5     echo "Usage: "
6     echo "  ${0##*/} option=value [option=value]"
7     echo "  at least inputList should be specified, or configFile containing it:"
8     echo "  ${0##*/} inputList=file.list"
9     echo "  options override config file (if any), e.g.:"
10     echo "  ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
11     return 1
12   fi
13  
14   if ! parseConfig $@; then
15     ${0}
16     return 1
17   fi
18
19   [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
20
21   ocdbregex='raw://'
22   if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
23     alien-token-init
24   fi
25
26   updateQA $@
27 }
28
29 updateQA()
30 {
31   umask 0002
32   parseConfig $@
33
34   #be paranoid and make some full paths
35   [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
36   inputList=$(get_realpath ${inputList})
37   mkdir -p ${workingDirectory}
38   workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
39   if [[ ! -d ${workingDirectory} ]]; then
40     echo "working dir $workingDirectory does not exist and cannot be created"
41     return 1
42   fi
43   cd ${workingDirectory}
44
45   echo JOB config:
46   echo inputList=$inputList
47   echo outputDirectory=$outputDirectory
48   echo
49
50   dateString=$(date +%Y-%m-%d-%H-%M)
51   echo "Start time QA process: $dateString"
52
53   #logging
54   mkdir -p $logDirectory
55   [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
56   logFile="$logDirectory/${0##*/}.${dateString}.log"
57   touch ${logFile}
58   [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
59   echo "logFile = $logFile"
60   exec &>${logFile}
61
62   #check lock
63   lockFile=${logDirectory}/runQA.lock
64   [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" && return 1
65   touch ${lockFile}
66   [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" && return 1
67   
68   ################################################################
69   #ze detector loop
70   for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
71
72     [[ ! ${detectorScript} =~ .*\.sh ]] && continue
73     detector=${detectorScript%.sh}
74     detector=${detector##*/}
75     
76     #skip if excluded
77     if [[ "${excludeDetectors}" =~ ${detector} ]]; then
78       echo "${detector} is excluded in config, skipping..."
79       continue
80     fi
81
82     #if includeDetectors set, only process thoe detectors specified there
83     if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
84       echo "${detector} not included in includeDetectors, skipping..."
85       continue
86     fi
87
88     logSummary=${logDirectory}/summary-${detector}-${dateString}.log
89     outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
90     tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}
91     if ! mkdir -p ${tmpDetectorRunDir}; then
92       echo "cannot create the temp dir $tmpDetectorRunDir"
93       continue
94     fi
95     cd ${tmpDetectorRunDir}
96
97     tmpPrefix=${tmpDetectorRunDir}/${outputDir}
98     echo
99     echo "##############################################"
100     echo "running QA for ${detector}"
101     echo "  outputDir=$outputDir"
102     echo "  tmpPrefix=$tmpPrefix"
103     
104     unset -f runLevelQA
105     unset -f periodLevelQA
106     source ${detectorScript}
107
108     #################################################################
109     #produce the QA and trending tree for each file (run)
110     unset arrOfTouchedProductions
111     declare -A arrOfTouchedProductions
112     while read qaFile; do
113       echo
114
115       if ! guessRunData ${qaFile}; then
116         echo "could not guess run data from ${qaFile}"
117         continue
118       fi
119
120       tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
121       arrOfTouchedProductions[${tmpProductionDir}]=1
122       tmpRunDir=${tmpProductionDir}/000${runNumber}
123       mkdir -p ${tmpRunDir}
124       cd ${tmpRunDir}
125
126       #handle the case of a zip archive
127       [[ "$qaFile" =~ .*.zip$ ]] && qaFile="${qaFile}#QAresults.root"
128       [[ "$qaFile" =~ .*.zip$ ]] && highPtTree="${qaFile}#FilterEvents_Trees.root"
129       
130       echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
131       runLevelQA ${qaFile} &> runLevelQA.log
132
133       #perform some default actions:
134       #if trending.root not created, create a default one
135       if [[ ! -f trending.root ]]; then
136         aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" &>> runLevelQA.log
137       fi
138       cd ${tmpDetectorRunDir}
139     
140     done < ${inputList}
141
142     #################################################################
143     #cache which productions were (re)done
144     echo "list of processed productions:"
145     echo "    ${!arrOfTouchedProductions[@]}"
146     echo
147
148     #################################################################
149     #(re)do the merging/trending in the final destination
150     for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
151       echo
152       echo "running period level stuff in ${tmpProductionDir}"
153     
154       productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
155
156       mkdir -p ${productionDir}
157       if [[ ! -d ${productionDir} ]]; then 
158         echo "cannot make productionDir $productionDir" && continue
159       fi
160       cd ${productionDir}
161       
162       #move to final destination
163       for dir in ${tmpProductionDir}/*; do
164         oldRunDir=${outputDir}/${dir#${tmpPrefix}}
165         if ! guessRunData "${dir}/dummyName"; then
166           echo "could not guess run data from ${dir}"
167           continue
168         fi
169
170         #before moving - VALIDATE!!!
171         if ! validate ${dir}; then continue; fi
172
173         if [[ -d ${oldRunDir} ]]; then
174           echo "removing old ${oldRunDir}"
175           rm -rf ${oldRunDir}
176         fi
177         echo "moving new ${runNumber} to ${productionDir}"
178         mv -f ${dir} ${productionDir}
179       done
180     
181       echo running ${detector} periodLevelQA for production ${period}/${pass}
182       rm -f trending.root
183       
184       #merge trending files if any
185       if /bin/ls 000*/trending.root &>/dev/null; then
186         hadd trending.root 000*/trending.root &> periodLevelQA.log
187         periodLevelQA trending.root &>> periodLevelQA.log
188       fi
189
190       if ! validate ${PWD}; then continue; fi
191
192       cd ${tmpDetectorRunDir}
193     
194     done
195
196     cd ${workingDirectory}
197
198     if [[ -z ${planB} ]]; then
199       echo
200       echo removing ${tmpDetectorRunDir}
201       rm -rf ${tmpDetectorRunDir}
202     else
203       executePlanB
204     fi
205   done
206
207   #remove lock
208   rm -f ${lockFile}
209 }
210
211 executePlanB()
212 {
213   #in case of emergency
214   if [[ -n ${MAILTO} ]]; then 
215     echo
216     echo "trouble detected, sending email to ${MAILTO}"
217
218     cat ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
219   fi
220 }
221
222 validate()
223 {
224   summarizeLogs ${1} >> ${logSummary}
225   logStatus=$?
226   if [[ ${logStatus} -ne 0 ]]; then 
227     echo "WARNING not validated: ${1}"
228     planB=1
229     return 1
230   fi
231   return 0
232 }
233
234 summarizeLogs()
235 {
236   local dir=$1
237   [[ ! -d ${dir} ]] && dir=${PWD}
238
239   #print a summary of logs
240   logFiles=(
241       "*.log"
242       "stdout"
243       "stderr"
244   )
245
246   #check logs
247   local logstatus=0
248   for log in ${dir}/${logFiles[*]}; do
249     finallog=${PWD%/}/${log}
250     [[ ! -f ${log} ]] && continue
251     errorSummary=$(validateLog ${log})
252     validationStatus=$?
253     [[ validationStatus -ne 0 ]] && logstatus=1
254     if [[ ${validationStatus} -eq 0 ]]; then 
255       #in pretend mode randomly report an error in rec.log some cases
256       if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
257         [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
258       else
259         echo "${finallog} OK"
260       fi
261     elif [[ ${validationStatus} -eq 1 ]]; then
262       echo "${finallog} BAD ${errorSummary}"
263     elif [[ ${validationStatus} -eq 2 ]]; then
264       echo "${finallog} OK MWAH ${errorSummary}"
265     fi
266   done
267
268   #report core files
269   while read x; do
270     echo ${x}
271     chmod 644 ${x}
272     gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
273   done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
274
275   return ${logstatus}
276 }
277
278 validateLog()
279 {
280   log=${1}
281   errorConditions=(
282             'There was a crash'
283             'floating'
284             'error while loading shared libraries'
285             'std::bad_alloc'
286             's_err_syswatch_'
287             'Thread [0-9]* (Thread'
288             'AliFatal'
289             'core dumped'
290             '\.C.*error:.*\.h: No such file'
291             'segmentation'
292   )
293
294   warningConditions=(
295             'This is serious'
296   )
297
298   local logstatus=0
299   local errorSummary=""
300   local warningSummary=""
301
302   for ((i=0; i<${#errorConditions[@]};i++)); do
303     local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
304     [[ -n ${tmp} ]] && tmp+=" : "
305     errorSummary+=${tmp}
306   done
307
308   for ((i=0; i<${#warningConditions[@]};i++)); do
309     local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
310     [[ -n ${tmp} ]] && tmp+=" : "
311     warningSummary+=${tmp}
312   done
313
314   if [[ -n ${errorSummary} ]]; then 
315     echo "${errorSummary}"
316     return 1
317   fi
318
319   if [[ -n ${warningSummary} ]]; then
320     echo "${warningSummary}"
321     return 2
322   fi
323
324   return 0
325 }
326
327 parseConfig()
328 {
329   #config file
330   configFile=""
331   #where to search for qa files
332   inputList=file.list
333   #working directory
334   workingDirectory="${PWD}"
335   #where to place the final qa plots
336   #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
337   outputDirectory="${workingDirectory}/%DET"
338   #filter out detector option
339   excludeDetectors="EXAMPLE"
340   #logs
341   logDirectory=${workingDirectory}/logs
342   #set aliroot
343   #alirootEnv="/home/mkrzewic/alisoft/balice_master.sh"
344   #OCDB storage
345   #ocdbStorage="raw://"
346   #email to
347   #MAILTO="fbellini@cern.ch"
348
349   #first, check if the config file is configured
350   #is yes - source it so that other options can override it
351   #if any
352   for opt in $@; do
353     if [[ ${opt} =~ configFile=.* ]]; then
354       eval "${opt}"
355       [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
356       source "${configFile}"
357       break
358     fi
359   done
360
361   #then, parse the options as they override the options from file
362   while [[ -n ${1} ]]; do
363     local var=${1#--}
364     if [[ ${var} =~ .*=.* ]]; then
365       eval "${var}"
366     else
367       echo "badly formatted option ${var}, should be: option=value, stopping..."
368       return 1
369     fi
370     shift
371   done
372 }
373
374 guessRunData()
375 {
376   #guess the period from the path, pick the rightmost one
377   period=""
378   runNumber=""
379   year=""
380   pass=""
381   legoTrainRunNumber=""
382   dataType=""
383
384   local shortRunNumber=""
385   local IFS="/"
386   declare -a path=( $1 )
387   local dirDepth=$(( ${#path[*]}-1 ))
388   i=0
389   for ((x=${dirDepth};x>=0;x--)); do
390
391     [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
392     local field=${path[${x}]}
393     local fieldNext=${path[$((x+1))]}
394
395     [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
396     [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
397     [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
398     [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
399     [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
400     [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
401     [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
402     (( i++ ))
403   done
404   [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
405   [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
406   
407   #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
408   if [[ -z ${runNumber}} ]];
409   then
410     #error condition
411     return 1
412   else
413     #ALL OK
414     return 0
415   fi
416 }
417
418 substituteDetectorName()
419 {
420   local det=$1
421   local dir=$2
422   [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
423   [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
424 }
425
426 get_realpath() 
427 {
428   if [[ -f "$1" ]]
429   then
430     # file *must* exist
431     if cd "$(echo "${1%/*}")" &>/dev/null
432     then
433       # file *may* not be local
434       # exception is ./file.ext
435       # try 'cd .; cd -;' *works!*
436       local tmppwd="$PWD"
437       cd - &>/dev/null
438     else
439       # file *must* be local
440       local tmppwd="$PWD"
441     fi
442   else
443     # file *cannot* exist
444     return 1 # failure
445   fi
446   # reassemble realpath
447   echo "$tmppwd"/"${1##*/}"
448   return 0 # success
449 }
450
451 main $@