]> git.uio.no Git - u/mrichter/AliRoot.git/blob - PWGPP/QA/scripts/runQA.sh
make a default trending tree if a detector does not generate its own
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
1 #!/bin/bash
2 main()
3 {
4   if [[ -z $1 ]]; then
5     echo "Usage: "
6     echo "  ${0##*/} option=value [option=value]"
7     echo "  at least one option, either inputList or configFile should be specified,"
8     echo "  options override config file (if any), e.g.:"
9     echo "  ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
10     return 1
11   fi
12  
13   if ! parseConfig $@; then
14     ${0}
15     return 1
16   fi
17
18   [[ -z $ALICE_ROOT ]] && source ${alirootEnv}
19   [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
20
21   ocdbregex='raw://'
22   if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
23     alien-token-init
24   fi
25
26   updateQA $@
27 }
28
29 updateQA()
30 {
31   umask 0002
32   parseConfig $@
33
34   #be paranoid and make some full paths
35   inputList=$(readlink -f ${inputList})
36   [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
37   workingDirectory=$(readlink -f ${workingDirectory})
38   mkdir -p ${workingDirectory}
39   if [[ ! -d ${workingDirectory} ]]; then
40     echo "working dir $workingDirectory does not exist and cannot be created"
41     return 1
42   fi
43   cd ${workingDirectory}
44
45   echo JOB config:
46   echo inputList=$inputList
47   echo outputDirectory=$outputDirectory
48   echo
49
50   dateString=$(date +%Y-%m-%d-%H-%M)
51   echo "Start time QA process: $dateString"
52
53   #logging
54   mkdir -p $logDirectory
55   [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
56   logFile="$logDirectory/${0##*/}.${dateString}.log"
57   touch ${logFile}
58   [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
59   echo "logFile = $logFile"
60   exec &>${logFile}
61
62   #check lock
63   lockFile=${logDirectory}/runQA.lock
64   [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" && return 1
65   touch ${lockFile}
66   [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" && return 1
67   
68   ################################################################
69   #ze detector loop
70   for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
71
72     [[ ! ${detectorScript} =~ .*\.sh ]] && continue
73     detector=${detectorScript%.sh}
74     detector=${detector##*/}
75     
76     #skip if excluded
77     skipDetector=0
78     for excluded in ${excludeDetectors}; do
79       if [[ ${detector} =~ ${excluded} ]]; then
80         echo "${detector} is excluded in config, skipping..."
81         skipDetector=1
82         break
83       fi
84     done
85     [[ ${skipDetector} -eq 1 ]] && continue
86
87     logSummary=${logDirectory}/summary-${detector}-${dateString}.log
88     outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
89     tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}
90     if ! mkdir -p ${tmpDetectorRunDir}; then
91       echo "cannot create the temp dir $tmpDetectorRunDir"
92       continue
93     fi
94     cd ${tmpDetectorRunDir}
95
96     tmpPrefix=${tmpDetectorRunDir}/${outputDir}
97     echo
98     echo "##############################################"
99     echo "running QA for ${detector}"
100     echo "  outputDir=$outputDir"
101     echo "  tmpPrefix=$tmpPrefix"
102     
103     unset -f runLevelQA
104     unset -f periodLevelQA
105     source ${detectorScript}
106
107     #################################################################
108     #produce the QA and trending tree for each file (run)
109     unset arrOfTouchedProductions
110     declare -A arrOfTouchedProductions
111     while read qaFile; do
112       echo
113
114       if ! guessRunData ${qaFile}; then
115         echo "could not guess run data from ${qaFile}"
116         continue
117       fi
118
119       tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
120       arrOfTouchedProductions[${tmpProductionDir}]=1
121       tmpRunDir=${tmpProductionDir}/000${runNumber}
122       mkdir -p ${tmpRunDir}
123       cd ${tmpRunDir}
124
125       #handle the case of a zip archive
126       [[ "$qaFile" =~ .*.zip$ ]] && qaFile="${qaFile}#QAresults.root"
127       
128       echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
129       runLevelQA ${qaFile} &> runLevelQA.log
130
131       #perform some default actions:
132       #if trending.root not created, create a default one
133       if [[ ! -f trending.root ]]; then
134         aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" &>> runLevelQA.log
135       fi
136       cd ${tmpDetectorRunDir}
137     
138     done < ${inputList}
139
140     #################################################################
141     #cache which productions were (re)done
142     echo "list of processed productions:"
143     echo "    ${!arrOfTouchedProductions[@]}"
144     echo
145
146     #################################################################
147     #(re)do the merging/trending in the final destination
148     for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
149       echo
150       echo "running period level stuff in ${tmpProductionDir}"
151     
152       productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
153
154       mkdir -p ${productionDir}
155       if [[ ! -d ${productionDir} ]]; then 
156         echo "cannot make productionDir $productionDir" && continue
157       fi
158       cd ${productionDir}
159       
160       #move to final destination
161       for dir in ${tmpProductionDir}/*; do
162         oldRunDir=${outputDir}/${dir#${tmpPrefix}}
163         if ! guessRunData "${dir}/dummyName"; then
164           echo "could not guess run data from ${dir}"
165           continue
166         fi
167
168         #before moving - VALIDATE!!!
169         if ! validate ${dir}; then continue; fi
170
171         if [[ -d ${oldRunDir} ]]; then
172           echo "removing old ${oldRunDir}"
173           rm -rf ${oldRunDir}
174         fi
175         echo "moving new ${runNumber} to ${productionDir}"
176         mv -f ${dir} ${productionDir}
177       done
178     
179       echo running ${detector} periodLevelQA for production ${period}/${pass}
180       rm -f trending.root
181       
182       #merge trending files if any
183       if /bin/ls 000*/trending.root &>/dev/null; then
184         hadd trending.root 000*/trending.root &> periodLevelQA.log
185         periodLevelQA trending.root &>> periodLevelQA.log
186       fi
187
188       if ! validate ${PWD}; then continue; fi
189
190       cd ${tmpDetectorRunDir}
191     
192     done
193
194     cd ${workingDirectory}
195
196     if [[ -z ${planB} ]]; then
197       echo
198       echo removing ${tmpDetectorRunDir}
199       rm -rf ${tmpDetectorRunDir}
200     else
201       executePlanB
202     fi
203   done
204
205   #remove lock
206   rm -f ${lockFile}
207 }
208
209 executePlanB()
210 {
211   #in case of emergency
212   if [[ -n ${MAILTO} ]]; then 
213     echo
214     echo "trouble detected, sending email to ${MAILTO}"
215
216     cat ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
217   fi
218 }
219
220 validate()
221 {
222   summarizeLogs ${1} >> ${logSummary}
223   logStatus=$?
224   if [[ ${logStatus} -ne 0 ]]; then 
225     echo "WARNING not validated: ${1}"
226     planB=1
227     return 1
228   fi
229   return 0
230 }
231
232 summarizeLogs()
233 {
234   local dir=$1
235   [[ ! -d ${dir} ]] && dir=${PWD}
236
237   #print a summary of logs
238   logFiles=(
239       "*.log"
240       "stdout"
241       "stderr"
242   )
243
244   #check logs
245   local logstatus=0
246   for log in ${dir}/${logFiles[*]}; do
247     finallog=${PWD%/}/${log}
248     [[ ! -f ${log} ]] && continue
249     errorSummary=$(validateLog ${log})
250     validationStatus=$?
251     [[ validationStatus -ne 0 ]] && logstatus=1
252     if [[ ${validationStatus} -eq 0 ]]; then 
253       #in pretend mode randomly report an error in rec.log some cases
254       if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
255         [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
256       else
257         echo "${finallog} OK"
258       fi
259     elif [[ ${validationStatus} -eq 1 ]]; then
260       echo "${finallog} BAD ${errorSummary}"
261     elif [[ ${validationStatus} -eq 2 ]]; then
262       echo "${finallog} OK MWAH ${errorSummary}"
263     fi
264   done
265
266   #report core files
267   while read x; do
268     echo ${x}
269     chmod 644 ${x}
270     gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
271   done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
272
273   return ${logstatus}
274 }
275
276 validateLog()
277 {
278   log=${1}
279   errorConditions=(
280             'There was a crash'
281             'floating'
282             'error while loading shared libraries'
283             'std::bad_alloc'
284             's_err_syswatch_'
285             'Thread [0-9]* (Thread'
286             'AliFatal'
287             'core dumped'
288             '\.C.*error:.*\.h: No such file'
289             'segmentation'
290   )
291
292   warningConditions=(
293             'This is serious'
294   )
295
296   local logstatus=0
297   local errorSummary=""
298   local warningSummary=""
299
300   for ((i=0; i<${#errorConditions[@]};i++)); do
301     local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
302     [[ -n ${tmp} ]] && tmp+=" : "
303     errorSummary+=${tmp}
304   done
305
306   for ((i=0; i<${#warningConditions[@]};i++)); do
307     local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
308     [[ -n ${tmp} ]] && tmp+=" : "
309     warningSummary+=${tmp}
310   done
311
312   if [[ -n ${errorSummary} ]]; then 
313     echo "${errorSummary}"
314     return 1
315   fi
316
317   if [[ -n ${warningSummary} ]]; then
318     echo "${warningSummary}"
319     return 2
320   fi
321
322   return 0
323 }
324
325 parseConfig()
326 {
327   #config file
328   configFile=""
329   #where to search for qa files
330   inputList=file.list
331   #working directory
332   workingDirectory="${PWD}"
333   #where to place the final qa plots
334   #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
335   outputDirectory="${workingDirectory}/%DET"
336   #filter out detector option
337   excludeDetectors="EXAMPLE"
338   #logs
339   logDirectory=${workingDirectory}/logs
340   #set aliroot
341   #alirootEnv="/home/mkrzewic/alisoft/balice_master.sh"
342   #OCDB storage
343   #ocdbStorage="raw://"
344   #email to
345   #MAILTO="fbellini@cern.ch"
346
347   #first, check if the config file is configured
348   #is yes - source it so that other options can override it
349   #if any
350   for opt in $@; do
351     if [[ ${opt} =~ configFile=.* ]]; then
352       eval "${opt}"
353       configFile=$(readlink -f ${configFile})
354       source "${configFile}"
355       break
356     fi
357   done
358
359   #then, parse the options as they override the options from file
360   while [[ -n ${1} ]]; do
361     local var=${1#--}
362     if [[ ${var} =~ .*=.* ]]; then
363       eval "${var}"
364     else
365       echo "badly formatted option ${var}, should be: option=value, stopping..."
366       return 1
367     fi
368     shift
369   done
370 }
371
372 guessRunData()
373 {
374   #guess the period from the path, pick the rightmost one
375   period=""
376   runNumber=""
377   year=""
378   pass=""
379   legoTrainRunNumber=""
380   dataType=""
381
382   local shortRunNumber=""
383   local IFS="/"
384   declare -a path=( $1 )
385   local dirDepth=$(( ${#path[*]}-1 ))
386   i=0
387   for ((x=${dirDepth};x>=0;x--)); do
388
389     [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
390     local field=${path[${x}]}
391     local fieldNext=${path[$((x+1))]}
392
393     [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
394     [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
395     [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
396     [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
397     [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
398     [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
399     [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
400     (( i++ ))
401   done
402   [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
403   [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
404   
405   #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
406   if [[ -z ${runNumber}} ]];
407   then
408     #error condition
409     return 1
410   else
411     #ALL OK
412     return 0
413   fi
414 }
415
416 substituteDetectorName()
417 {
418   local det=$1
419   local dir=$2
420   [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
421   [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
422 }
423
424 main $@