]> git.uio.no Git - u/mrichter/AliRoot.git/blob - PWGPP/QA/scripts/runQA.sh
Merge branch 'master' into TPCdev
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
1 #!/bin/bash
2 main()
3 {
4   if [[ -z $1 ]]; then
5     echo "Usage: "
6     echo "  ${0##*/} option=value [option=value]"
7     echo "  at least inputList should be specified, or configFile containing it:"
8     echo "  ${0##*/} inputList=file.list"
9     echo "  options override config file (if any), e.g.:"
10     echo "  ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
11     return 1
12   fi
13  
14   if ! parseConfig "$@"; then
15     ${0}
16     return 1
17   fi
18
19   [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
20
21   ocdbregex='raw://'
22   if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
23     alien-token-init ${alienUserName}
24     #this is a hack! alien-token init seems not enough
25     #but the gclient_env script messes up the LD_LIBRARY_PATH
26     while read x; do
27       eval ${x};
28     done < <(grep -v "LD_LIBRARY_PATH" /tmp/gclient_env_${UID})
29   fi
30
31   updateQA "$@"
32 }
33
34 updateQA()
35 {
36   umask 0002
37   parseConfig "$@"
38
39   #be paranoid and make some full paths
40   [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
41   inputList=$(get_realpath ${inputList})
42   mkdir -p ${workingDirectory}
43   workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
44   if [[ ! -d ${workingDirectory} ]]; then
45     echo "working dir $workingDirectory does not exist and cannot be created"
46     return 1
47   fi
48   cd ${workingDirectory}
49
50   echo JOB config:
51   echo inputList=$inputList
52   echo outputDirectory=$outputDirectory
53   echo
54
55   dateString=$(date +%Y-%m-%d-%H-%M-%S-%N)
56   echo "Start time QA process: $dateString"
57
58   #logging
59   mkdir -p $logDirectory
60   [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
61   logFile="$logDirectory/${0##*/}.${dateString}.log"
62   touch ${logFile}
63   [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
64   echo "logFile = $logFile"
65
66   #check lock
67   lockFile=${workingDirectory}/runQA.lock
68   [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" | tee ${logFile} && return 1
69   touch ${lockFile}
70   [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" | tee ${logFile} && return 1
71   
72   exec &>${logFile}
73
74   ################################################################
75   #ze detector loop
76   for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
77     echo
78     echo "##############################################"
79     echo $(date)
80     unset planB
81     [[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
82     detector=${detectorScript%.sh}
83     detector=${detector##*/}
84     
85     #skip if excluded
86     if [[ "${excludeDetectors}" =~ ${detector} ]]; then
87       echo "${detector} is excluded in config, skipping..."
88       continue
89     fi
90
91     #if includeDetectors set, only process thoe detectors specified there
92     if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
93       echo "${detector} not included in includeDetectors, skipping..."
94       continue
95     fi
96
97     logSummary=${logDirectory}/summary-${detector}-${dateString}.log
98     hostInfo >> ${logSummary}
99     outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
100     tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
101     if ! mkdir -p ${tmpDetectorRunDir}; then
102       echo "cannot create the temp dir $tmpDetectorRunDir"
103       continue
104     fi
105     cd ${tmpDetectorRunDir}
106
107     tmpPrefix=${tmpDetectorRunDir}/${outputDir}
108     echo "running QA for ${detector}"
109     echo "  outputDir=$outputDir"
110     echo "  tmpPrefix=$tmpPrefix"
111     
112     #unset the detector functions from previous iterations (detectors)
113     unset -f runLevelQA
114     unset -f periodLevelQA
115     unset -f runLevelHighPtTreeQA
116     unset -f periodLevelHighPtTreeQA
117     source ${detectorScript}
118
119     #################################################################
120     #produce the QA and trending tree for each file (run)
121     unset arrOfTouchedProductions
122     declare -A arrOfTouchedProductions
123     while read qaFile; do
124       echo
125       echo $(date)
126       
127       #first check if input file exists
128       [[ ! -f ${qaFile%\#*} ]] && echo "file ${qaFile%\#*} not accessible" && continue
129
130       if ! guessRunData ${qaFile}; then
131         echo "could not guess run data from ${qaFile}"
132         continue
133       fi
134       echo "anchorYear for ${originalPeriod} is: ${anchorYear}"
135
136       tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
137       tmpRunDir=${tmpProductionDir}/000${runNumber}
138       mkdir -p ${tmpRunDir}
139       cd ${tmpRunDir}
140
141       #by default we expect to have everything in the same archive
142       highPtTree=${qaFile}
143
144       #maybe the input is not an archive, but a file
145       [[ "${qaFile}" =~ QAresults.root$ ]] && highPtTree=""
146       [[ "${qaFile}" =~ FilterEvents_Trees.root$ ]] && qaFile=""
147
148       #it is possible we get the highPt trees from somewhere else
149       #search the list of high pt trees for the proper run number
150       if [[ -n ${inputListHighPtTrees} ]]; then
151         highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
152         echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
153       fi
154       
155       echo qaFile=$qaFile
156       echo highPtTree=$highPtTree
157       echo ocdbStorage=${ocdbStorage}
158       echo
159
160       #what if we have a zip archive?
161       if [[ "$qaFile" =~ .*.zip$ ]]; then
162         if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
163           qaFile="${qaFile}#QAresults.root"
164         else
165           qaFile=""
166         fi
167       fi
168       if [[ "$highPtTree" =~ .*.zip$ ]]; then
169         if unzip -l ${highPtTree} | egrep "FilterEvents_Trees.root" &>/dev/null; then
170           highPtTree="${highPtTree}#FilterEvents_Trees.root"
171         else
172           highPtTree=""
173         fi
174       fi
175      
176       if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
177         echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
178         runLevelQA "${qaFile}" &> runLevelQA.log
179         #perform some default actions:
180         #if trending.root not created, create a default one
181         if [[ ! -f trending.root ]]; then
182           aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" 2>&1 | tee -a runLevelQA.log
183         fi
184         if [[ -f trending.root ]]; then
185           arrOfTouchedProductions[${tmpProductionDir}]=1
186         else
187           echo "trending.root not created"
188         fi
189       fi
190       #expert QA based on high pt trees
191       if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
192         echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
193         runLevelHighPtTreeQA "${highPtTree}" &> runLevelHighPtTreeQA.log
194         arrOfTouchedProductions[${tmpProductionDir}]=1
195       fi
196
197       cd ${tmpDetectorRunDir}
198     
199     done < ${inputList}
200
201     #################################################################
202     #cache which productions were (re)done
203     echo "list of processed productions:"
204     echo "    ${!arrOfTouchedProductions[@]}"
205     echo
206
207     #################################################################
208     #(re)do the merging/trending 
209     for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
210       cd ${tmpProductionDir}
211       echo
212       echo "running period level stuff in ${tmpProductionDir}"
213       echo $(date)
214     
215       productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
216       echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
217
218       mkdir -p ${productionDir}
219       if [[ ! -d ${productionDir} ]]; then 
220         echo "cannot make productionDir $productionDir" && continue
221       fi
222       
223       #move runs to final destination
224       for dir in ${tmpProductionDir}/000*; do
225         echo 
226         oldRunDir=${outputDir}/${dir#${tmpPrefix}}
227         if ! guessRunData "${dir}/dummyName"; then
228           echo "could not guess run data from ${dir}"
229           continue
230         fi
231
232         #before moving - VALIDATE!!!
233         if ! validate ${dir}; then 
234           continue
235         fi
236
237         #moving a dir is an atomic operation, no locking necessary
238         if [[ -d ${oldRunDir} ]]; then
239           echo "removing old ${oldRunDir}"
240           rm -rf ${oldRunDir}
241         fi
242         echo "moving new ${runNumber} to ${productionDir}"
243         mv -f ${dir} ${productionDir}
244       done
245    
246       #go to a temp dir to do the period level stuff in a completely clean dir
247       tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
248       echo
249       echo tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
250       if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
251       cd ${tmpPeriodLevelQAdir}
252
253       #link the final list of per-run dirs here, just the dirs
254       #to have a clean working directory
255       unset linkedStuff
256       declare -a linkedStuff
257       for x in ${productionDir}/000*; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##*/}); done
258
259       #merge trending files if any
260       if /bin/ls 000*/trending.root &>/dev/null; then
261         hadd trending.root 000*/trending.root &> periodLevelQA.log
262       fi
263       
264       #run the period level trending/QA
265       if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
266         echo running ${detector} periodLevelQA for production ${period}/${pass}
267         periodLevelQA trending.root &>> periodLevelQA.log
268       else 
269         echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
270       fi
271
272       if ! validate ${PWD}; then continue; fi
273
274       #here we are validated so move the produced QA to the final place
275       #clean up linked stuff first
276       [[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
277       periodLevelLock=${productionDir}/runQA.lock
278       if [[ ! -f ${periodLevelLock} ]]; then
279         #some of the output could be a directory, so handle that
280         #TODO: maybe use rsync?
281         #lock to avoid conflicts:
282         echo "${HOSTNAME} ${dateString}" > ${periodLevelLock}
283         for x in ${tmpPeriodLevelQAdir}/*; do  
284           if [[ -d ${x} ]]; then
285             echo "removing ${productionDir}/${x##*/}"
286             rm -rf ${productionDir}/${x##*/}
287             echo "moving ${x} to ${productionDir}"
288             mv ${x} ${productionDir}
289           fi
290           if [[ -f ${x} ]]; then
291             echo "moving ${x} to ${productionDir}"
292             mv -f ${x} ${productionDir} 
293           fi
294         done
295         rm -f ${periodLevelLock}
296         #remove the temp dir
297         rm -rf ${tmpPeriodLevelQAdir}
298       else
299         echo "ERROR: cannot move to destination"                     >> ${logSummary}
300         echo "production dir ${productionDir} locked!"               >> ${logSummary}
301         echo "check and maybe manually do:"                          >> ${logSummary}
302         echo " rm ${periodLevelLock}"                                >> ${logSummary}
303         echo " rsync -av ${tmpPeriodLevelQAdir}/ ${productionDir}/"  >> ${logSummary}
304         planB=1
305       fi
306
307     done
308
309     cd ${workingDirectory}
310
311     if [[ -z ${planB} ]]; then
312       echo
313       echo removing ${tmpDetectorRunDir}
314       rm -rf ${tmpDetectorRunDir}
315     else
316       executePlanB
317     fi
318   done #end of detector loop
319
320   #remove lock
321   rm -f ${lockFile}
322 }
323
324 executePlanB()
325 {
326   #in case of emergency
327   if [[ -n ${MAILTO} ]]; then 
328     echo
329     echo "trouble detected, sending email to ${MAILTO}"
330     cat ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
331   fi
332 }
333
334 validate()
335 {
336   summarizeLogs ${1} >> ${logSummary}
337   logStatus=$?
338   if [[ ${logStatus} -ne 0 ]]; then 
339     echo "WARNING not validated: ${1}"
340     planB=1
341     return 1
342   fi
343   return 0
344 }
345
346 summarizeLogs()
347 {
348   local dir=$1
349   [[ ! -d ${dir} ]] && dir=${PWD}
350
351   #print a summary of logs
352   logFiles=(
353       "*.log"
354       "stdout"
355       "stderr"
356   )
357
358   #check logs
359   local logstatus=0
360   for log in ${dir}/${logFiles[*]}; do
361     [[ ! -f ${log} ]] && continue
362     errorSummary=$(validateLog ${log})
363     validationStatus=$?
364     [[ validationStatus -ne 0 ]] && logstatus=1
365     if [[ ${validationStatus} -eq 0 ]]; then 
366       #in pretend mode randomly report an error in rec.log some cases
367       if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
368         [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${log} BAD random error" || echo "${log} OK"
369       else
370         echo "${log} OK"
371       fi
372     elif [[ ${validationStatus} -eq 1 ]]; then
373       echo "${log} BAD ${errorSummary}"
374     elif [[ ${validationStatus} -eq 2 ]]; then
375       echo "${log} OK MWAH ${errorSummary}"
376     fi
377   done
378
379   #report core files
380   while read x; do
381     echo ${x}
382     chmod 644 ${x}
383     gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
384   done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
385
386   return ${logstatus}
387 }
388
389 validateLog()
390 {
391   log=${1}
392   errorConditions=(
393             'There was a crash'
394             'floating'
395             'error while loading shared libraries'
396             'std::bad_alloc'
397             's_err_syswatch_'
398             'Thread [0-9]* (Thread'
399             'AliFatal'
400             'core dumped'
401             '\.C.*error:.*\.h: No such file'
402             'segmentation'
403             'Interpreter error recovered'
404   )
405
406   warningConditions=(
407             'This is serious'
408   )
409
410   local logstatus=0
411   local errorSummary=""
412   local warningSummary=""
413
414   for ((i=0; i<${#errorConditions[@]};i++)); do
415     local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
416     [[ -n ${tmp} ]] && tmp+=" : "
417     errorSummary+=${tmp}
418   done
419
420   for ((i=0; i<${#warningConditions[@]};i++)); do
421     local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
422     [[ -n ${tmp} ]] && tmp+=" : "
423     warningSummary+=${tmp}
424   done
425
426   if [[ -n ${errorSummary} ]]; then 
427     echo "${errorSummary}"
428     return 1
429   fi
430
431   if [[ -n ${warningSummary} ]]; then
432     echo "${warningSummary}"
433     return 2
434   fi
435
436   return 0
437 }
438
439 parseConfig()
440 {
441   args=("$@")
442
443   #config file
444   configFile=""
445   #where to search for qa files
446   inputList=file.list
447   #working directory
448   workingDirectory="${PWD}"
449   #where to place the final qa plots
450   #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
451   outputDirectory="${workingDirectory}/%DET"
452   #filter out detector option
453   excludeDetectors="EXAMPLE"
454   #logs
455   logDirectory=${workingDirectory}/logs
456   #OCDB storage
457   ocdbStorage="raw://"
458   #email to
459   #MAILTO="fbellini@cern.ch"
460
461   #first, check if the config file is configured
462   #is yes - source it so that other options can override it
463   #if any
464   for opt in "${args[@]}"; do
465     if [[ ${opt} =~ configFile=.* ]]; then
466       eval "${opt}"
467       [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
468       echo "using config file: ${configFile}"
469       source "${configFile}"
470       break
471     fi
472   done
473
474   #then, parse the options as they override the options from file
475   for opt in "${args[@]}"; do
476     if [[ ! "${opt}" =~ .*=.* ]]; then
477       echo "badly formatted option ${var}, should be: option=value, stopping..."
478       return 1
479     fi
480     local var="${opt%%=*}"
481     local value="${opt#*=}"
482     echo "${var}=${value}"
483     export ${var}="${value}"
484   done
485 }
486
487 guessRunData()
488 {
489   #guess the period from the path, pick the rightmost one
490   period=""
491   runNumber=""
492   year=""
493   pass=""
494   legoTrainRunNumber=""
495   dataType=""
496   originalPass=""
497   originalPeriod=""
498   anchorYear=""
499
500   shortRunNumber=""
501   oldIFS=${IFS}
502   local IFS="/"
503   declare -a path=( $1 )
504   IFS="${oldIFS}"
505   local dirDepth=$(( ${#path[*]}-1 ))
506   i=0
507   for ((x=${dirDepth};x>=0;x--)); do
508
509     [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
510     local field=${path[${x}]}
511     local fieldNext=${path[$((x+1))]}
512
513     [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
514     [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
515     [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*} && originalPeriod=${field}
516     [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
517     [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
518     [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
519     [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
520     (( i++ ))
521   done
522   originalPass=${pass}
523   [[ -n ${shortRunNumber} && "${legoTrainRunNumber}" =~ ${shortRunNumber} ]] && legoTrainRunNumber=""
524   [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
525   [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber} && originalPass="" #for MC not from lego, the runnumber is identified as lego train number, thus needs to be nulled
526   [[ -n ${legoTrainRunNumber} ]] && pass+="_lego${legoTrainRunNumber}"
527   
528   #modify the OCDB: set the year
529   if [[ ${dataType} =~ sim ]]; then 
530     anchorYear=$(for x in $mcProductionMap ; do [[ "${x}" =~ ${originalPeriod} ]] && echo ${x} && break; done)
531     anchorYear=${anchorYear#*=}
532     ocdbStorage=$(setYear ${anchorYear} ${ocdbStorage})
533   else
534     ocdbStorage=$(setYear ${year} ${ocdbStorage})
535   fi
536
537   #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
538   if [[ -z ${runNumber}} ]]
539   then
540     #error condition
541     return 1
542   else
543     #ALL OK
544     return 0
545   fi
546 }
547
548 substituteDetectorName()
549 {
550   local det=$1
551   local dir=$2
552   [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
553   [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
554 }
555
556 get_realpath() 
557 {
558   if [[ -f "$1" ]]
559   then
560     # file *must* exist
561     if cd "$(echo "${1%/*}")" &>/dev/null
562     then
563       # file *may* not be local
564       # exception is ./file.ext
565       # try 'cd .; cd -;' *works!*
566       local tmppwd="$PWD"
567       cd - &>/dev/null
568     else
569       # file *must* be local
570       local tmppwd="$PWD"
571     fi
572   else
573     # file *cannot* exist
574     return 1 # failure
575   fi
576   # reassemble realpath
577   echo "$tmppwd"/"${1##*/}"
578   return 0 # success
579 }
580
581 setYear()
582 {
583   #set the year
584   #  ${1} - year to be set
585   #  ${2} - where to set the year
586   local year1=$(guessYear ${1})
587   local year2=$(guessYear ${2})
588   local path=${2}
589   [[ ${year1} -ne ${year2} && -n ${year2} && -n ${year1} ]] && path=${2/\/${year2}\//\/${year1}\/}
590   echo ${path}
591   return 0
592 }
593
594 guessYear()
595 {
596   #guess the year from the path, pick the rightmost one
597   local IFS="/"
598   declare -a pathArray=( ${1} )
599   local field
600   local year
601   for field in ${pathArray[@]}; do
602     [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
603   done
604   echo ${year}
605   return 0
606 }
607
608 hostInfo(){
609 #
610 # Hallo world -  Print AliRoot/Root/Alien system info
611 #
612
613 #
614 # HOST info
615 #
616     echo --------------------------------------
617         echo 
618         echo HOSTINFO
619         echo 
620         echo HOSTINFO HOSTNAME"      "$HOSTNAME
621         echo HOSTINFO DATE"          "`date`
622         echo HOSTINFO gccpath"       "`which gcc` 
623         echo HOSTINFO gcc version"   "`gcc --version | grep gcc`
624         echo --------------------------------------    
625
626 #
627 # ROOT info
628 #
629         echo --------------------------------------
630         echo
631         echo ROOTINFO
632         echo 
633         echo ROOTINFO ROOT"           "`which root`
634         echo ROOTINFO VERSION"        "`root-config --version`
635         echo 
636         echo --------------------------------------
637
638
639 #
640 # ALIROOT info
641 #
642         echo --------------------------------------
643         echo
644         echo ALIROOTINFO
645         echo 
646         echo ALIROOTINFO ALIROOT"        "`which aliroot`
647         echo ALIROOTINFO VERSION"        "`echo $ALICE_LEVEL`
648         echo ALIROOTINFO TARGET"         "`echo $ALICE_TARGET`
649         echo 
650         echo --------------------------------------
651
652 #
653 # Alien info
654 #
655 #echo --------------------------------------
656 #echo
657 #echo ALIENINFO
658 #for a in `alien --printenv`; do echo ALIENINFO $a; done 
659 #echo
660 #echo --------------------------------------
661
662 #
663 # Local Info
664 #
665         echo PWD `pwd`
666         echo Dir 
667         ls -al
668         echo
669         echo
670         echo
671 }
672
673 main "$@"