]> git.uio.no Git - u/mrichter/AliRoot.git/blob - PWGPP/QA/scripts/runQA.sh
fix excluding of detectors
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
1 #!/bin/bash
2 main()
3 {
4   if [[ -z $1 ]]; then
5     echo "Usage: "
6     echo "  ${0##*/} option=value [option=value]"
7     echo "  at least one option, either inputList or configFile should be specified,"
8     echo "  options override config file (if any), e.g.:"
9     echo "  ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
10     return 1
11   fi
12  
13   if ! parseConfig $@; then
14     ${0}
15     return 1
16   fi
17
18   [[ -z $ALICE_ROOT ]] && source ${alirootEnv}
19   [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
20
21   ocdbregex='raw://'
22   if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
23     alien-token-init
24   fi
25
26   updateQA $@
27 }
28
29 updateQA()
30 {
31   umask 0002
32   parseConfig $@
33
34   #be paranoid and make some full paths
35   inputList=$(readlink -f ${inputList})
36   [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
37   workingDirectory=$(readlink -f ${workingDirectory})
38   mkdir -p ${workingDirectory}
39   if [[ ! -d ${workingDirectory} ]]; then
40     echo "working dir $workingDirectory does not exist and cannot be created"
41     return 1
42   fi
43   cd ${workingDirectory}
44
45   echo JOB config:
46   echo inputList=$inputList
47   echo outputDirectory=$outputDirectory
48   echo
49
50   dateString=$(date +%Y-%m-%d-%H-%M)
51   echo "Start time QA process: $dateString"
52
53   #logging
54   mkdir -p $logDirectory
55   [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
56   logFile="$logDirectory/${0##*/}.${dateString}.log"
57   touch ${logFile}
58   [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
59   echo "logFile = $logFile"
60   exec &>${logFile}
61
62   #check lock
63   lockFile=${logDirectory}/runQA.lock
64   [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" && return 1
65   touch ${lockFile}
66   [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" && return 1
67   
68   ################################################################
69   #ze detector loop
70   for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
71
72     [[ ! ${detectorScript} =~ .*\.sh ]] && continue
73     detector=${detectorScript%.sh}
74     detector=${detector##*/}
75     
76     #skip if excluded
77     if [[ "${excludeDetectors}" =~ ${detector} ]]; then
78       echo "${detector} is excluded in config, skipping..."
79       continue
80     fi
81
82     logSummary=${logDirectory}/summary-${detector}-${dateString}.log
83     outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
84     tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}
85     if ! mkdir -p ${tmpDetectorRunDir}; then
86       echo "cannot create the temp dir $tmpDetectorRunDir"
87       continue
88     fi
89     cd ${tmpDetectorRunDir}
90
91     tmpPrefix=${tmpDetectorRunDir}/${outputDir}
92     echo
93     echo "##############################################"
94     echo "running QA for ${detector}"
95     echo "  outputDir=$outputDir"
96     echo "  tmpPrefix=$tmpPrefix"
97     
98     unset -f runLevelQA
99     unset -f periodLevelQA
100     source ${detectorScript}
101
102     #################################################################
103     #produce the QA and trending tree for each file (run)
104     unset arrOfTouchedProductions
105     declare -A arrOfTouchedProductions
106     while read qaFile; do
107       echo
108
109       if ! guessRunData ${qaFile}; then
110         echo "could not guess run data from ${qaFile}"
111         continue
112       fi
113
114       tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
115       arrOfTouchedProductions[${tmpProductionDir}]=1
116       tmpRunDir=${tmpProductionDir}/000${runNumber}
117       mkdir -p ${tmpRunDir}
118       cd ${tmpRunDir}
119
120       #handle the case of a zip archive
121       [[ "$qaFile" =~ .*.zip$ ]] && qaFile="${qaFile}#QAresults.root"
122       [[ "$qaFile" =~ .*.zip$ ]] && highPtTree="${qaFile}#FilterEvents_Trees.root"
123       
124       echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
125       runLevelQA ${qaFile} &> runLevelQA.log
126
127       #perform some default actions:
128       #if trending.root not created, create a default one
129       if [[ ! -f trending.root ]]; then
130         aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" &>> runLevelQA.log
131       fi
132       cd ${tmpDetectorRunDir}
133     
134     done < ${inputList}
135
136     #################################################################
137     #cache which productions were (re)done
138     echo "list of processed productions:"
139     echo "    ${!arrOfTouchedProductions[@]}"
140     echo
141
142     #################################################################
143     #(re)do the merging/trending in the final destination
144     for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
145       echo
146       echo "running period level stuff in ${tmpProductionDir}"
147     
148       productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
149
150       mkdir -p ${productionDir}
151       if [[ ! -d ${productionDir} ]]; then 
152         echo "cannot make productionDir $productionDir" && continue
153       fi
154       cd ${productionDir}
155       
156       #move to final destination
157       for dir in ${tmpProductionDir}/*; do
158         oldRunDir=${outputDir}/${dir#${tmpPrefix}}
159         if ! guessRunData "${dir}/dummyName"; then
160           echo "could not guess run data from ${dir}"
161           continue
162         fi
163
164         #before moving - VALIDATE!!!
165         if ! validate ${dir}; then continue; fi
166
167         if [[ -d ${oldRunDir} ]]; then
168           echo "removing old ${oldRunDir}"
169           rm -rf ${oldRunDir}
170         fi
171         echo "moving new ${runNumber} to ${productionDir}"
172         mv -f ${dir} ${productionDir}
173       done
174     
175       echo running ${detector} periodLevelQA for production ${period}/${pass}
176       rm -f trending.root
177       
178       #merge trending files if any
179       if /bin/ls 000*/trending.root &>/dev/null; then
180         hadd trending.root 000*/trending.root &> periodLevelQA.log
181         periodLevelQA trending.root &>> periodLevelQA.log
182       fi
183
184       if ! validate ${PWD}; then continue; fi
185
186       cd ${tmpDetectorRunDir}
187     
188     done
189
190     cd ${workingDirectory}
191
192     if [[ -z ${planB} ]]; then
193       echo
194       echo removing ${tmpDetectorRunDir}
195       rm -rf ${tmpDetectorRunDir}
196     else
197       executePlanB
198     fi
199   done
200
201   #remove lock
202   rm -f ${lockFile}
203 }
204
205 executePlanB()
206 {
207   #in case of emergency
208   if [[ -n ${MAILTO} ]]; then 
209     echo
210     echo "trouble detected, sending email to ${MAILTO}"
211
212     cat ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
213   fi
214 }
215
216 validate()
217 {
218   summarizeLogs ${1} >> ${logSummary}
219   logStatus=$?
220   if [[ ${logStatus} -ne 0 ]]; then 
221     echo "WARNING not validated: ${1}"
222     planB=1
223     return 1
224   fi
225   return 0
226 }
227
228 summarizeLogs()
229 {
230   local dir=$1
231   [[ ! -d ${dir} ]] && dir=${PWD}
232
233   #print a summary of logs
234   logFiles=(
235       "*.log"
236       "stdout"
237       "stderr"
238   )
239
240   #check logs
241   local logstatus=0
242   for log in ${dir}/${logFiles[*]}; do
243     finallog=${PWD%/}/${log}
244     [[ ! -f ${log} ]] && continue
245     errorSummary=$(validateLog ${log})
246     validationStatus=$?
247     [[ validationStatus -ne 0 ]] && logstatus=1
248     if [[ ${validationStatus} -eq 0 ]]; then 
249       #in pretend mode randomly report an error in rec.log some cases
250       if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
251         [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
252       else
253         echo "${finallog} OK"
254       fi
255     elif [[ ${validationStatus} -eq 1 ]]; then
256       echo "${finallog} BAD ${errorSummary}"
257     elif [[ ${validationStatus} -eq 2 ]]; then
258       echo "${finallog} OK MWAH ${errorSummary}"
259     fi
260   done
261
262   #report core files
263   while read x; do
264     echo ${x}
265     chmod 644 ${x}
266     gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
267   done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
268
269   return ${logstatus}
270 }
271
272 validateLog()
273 {
274   log=${1}
275   errorConditions=(
276             'There was a crash'
277             'floating'
278             'error while loading shared libraries'
279             'std::bad_alloc'
280             's_err_syswatch_'
281             'Thread [0-9]* (Thread'
282             'AliFatal'
283             'core dumped'
284             '\.C.*error:.*\.h: No such file'
285             'segmentation'
286   )
287
288   warningConditions=(
289             'This is serious'
290   )
291
292   local logstatus=0
293   local errorSummary=""
294   local warningSummary=""
295
296   for ((i=0; i<${#errorConditions[@]};i++)); do
297     local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
298     [[ -n ${tmp} ]] && tmp+=" : "
299     errorSummary+=${tmp}
300   done
301
302   for ((i=0; i<${#warningConditions[@]};i++)); do
303     local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
304     [[ -n ${tmp} ]] && tmp+=" : "
305     warningSummary+=${tmp}
306   done
307
308   if [[ -n ${errorSummary} ]]; then 
309     echo "${errorSummary}"
310     return 1
311   fi
312
313   if [[ -n ${warningSummary} ]]; then
314     echo "${warningSummary}"
315     return 2
316   fi
317
318   return 0
319 }
320
321 parseConfig()
322 {
323   #config file
324   configFile=""
325   #where to search for qa files
326   inputList=file.list
327   #working directory
328   workingDirectory="${PWD}"
329   #where to place the final qa plots
330   #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
331   outputDirectory="${workingDirectory}/%DET"
332   #filter out detector option
333   excludeDetectors="EXAMPLE"
334   #logs
335   logDirectory=${workingDirectory}/logs
336   #set aliroot
337   #alirootEnv="/home/mkrzewic/alisoft/balice_master.sh"
338   #OCDB storage
339   #ocdbStorage="raw://"
340   #email to
341   #MAILTO="fbellini@cern.ch"
342
343   #first, check if the config file is configured
344   #is yes - source it so that other options can override it
345   #if any
346   for opt in $@; do
347     if [[ ${opt} =~ configFile=.* ]]; then
348       eval "${opt}"
349       configFile=$(readlink -f ${configFile})
350       source "${configFile}"
351       break
352     fi
353   done
354
355   #then, parse the options as they override the options from file
356   while [[ -n ${1} ]]; do
357     local var=${1#--}
358     if [[ ${var} =~ .*=.* ]]; then
359       eval "${var}"
360     else
361       echo "badly formatted option ${var}, should be: option=value, stopping..."
362       return 1
363     fi
364     shift
365   done
366 }
367
368 guessRunData()
369 {
370   #guess the period from the path, pick the rightmost one
371   period=""
372   runNumber=""
373   year=""
374   pass=""
375   legoTrainRunNumber=""
376   dataType=""
377
378   local shortRunNumber=""
379   local IFS="/"
380   declare -a path=( $1 )
381   local dirDepth=$(( ${#path[*]}-1 ))
382   i=0
383   for ((x=${dirDepth};x>=0;x--)); do
384
385     [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
386     local field=${path[${x}]}
387     local fieldNext=${path[$((x+1))]}
388
389     [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
390     [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
391     [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
392     [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
393     [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
394     [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
395     [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
396     (( i++ ))
397   done
398   [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
399   [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
400   
401   #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
402   if [[ -z ${runNumber}} ]];
403   then
404     #error condition
405     return 1
406   else
407     #ALL OK
408     return 0
409   fi
410 }
411
412 substituteDetectorName()
413 {
414   local det=$1
415   local dir=$2
416   [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
417   [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
418 }
419
420 main $@