]> git.uio.no Git - u/mrichter/AliRoot.git/blobdiff - PWGPP/QA/scripts/runQA.sh
explicitly return a return code
[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh
index 6761f8854ded9dd3a1ad68401d70465f1ee1078c..b141870dce0ae04e4a278dc9a7d9a3f6d1f7ef4b 100755 (executable)
@@ -1,4 +1,9 @@
 #!/bin/bash
+if [ ${BASH_VERSINFO} -lt 4 ]; then
+  echo "bash version >= 4 needed, you have ${BASH_VERSION}, exiting..."
+  exit 1
+fi
+
 main()
 {
   if [[ -z $1 ]]; then
@@ -10,8 +15,8 @@ main()
     echo "  ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
     return 1
   fi
-  if ! parseConfig $@; then
+
+  if ! parseConfig "$@"; then
     ${0}
     return 1
   fi
@@ -20,16 +25,22 @@ main()
 
   ocdbregex='raw://'
   if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
-    alien-token-init
+    alien-token-init ${alienUserName}
+    #this is a hack! alien-token init seems not enough
+    #but the gclient_env script messes up the LD_LIBRARY_PATH
+    while read x; do
+      eval ${x};
+    done < <(grep -v "LD_LIBRARY_PATH" /tmp/gclient_env_${UID})
   fi
 
-  updateQA $@
+  updateQA "$@"
+  return 0
 }
 
 updateQA()
 {
   umask 0002
-  parseConfig $@
+  parseConfig "$@"
 
   #be paranoid and make some full paths
   [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
@@ -47,7 +58,7 @@ updateQA()
   echo outputDirectory=$outputDirectory
   echo
 
-  dateString=$(date +%Y-%m-%d-%H-%M)
+  dateString=$(date +%Y-%m-%d-%H-%M-%S-%N)
   echo "Start time QA process: $dateString"
 
   #logging
@@ -57,19 +68,23 @@ updateQA()
   touch ${logFile}
   [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
   echo "logFile = $logFile"
-  exec &>${logFile}
 
   #check lock
-  lockFile=${logDirectory}/runQA.lock
-  [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" && return 1
+  lockFile=${workingDirectory}/runQA.lock
+  [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" | tee ${logFile} && return 1
   touch ${lockFile}
-  [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" && return 1
+  [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" | tee ${logFile} && return 1
   
+  exec &>${logFile}
+
   ################################################################
   #ze detector loop
   for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
-
-    [[ ! ${detectorScript} =~ .*\.sh ]] && continue
+    echo
+    echo "##############################################"
+    echo $(date)
+    unset planB
+    [[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
     detector=${detectorScript%.sh}
     detector=${detector##*/}
     
@@ -86,8 +101,9 @@ updateQA()
     fi
 
     logSummary=${logDirectory}/summary-${detector}-${dateString}.log
+    hostInfo >> ${logSummary}
     outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
-    tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}
+    tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
     if ! mkdir -p ${tmpDetectorRunDir}; then
       echo "cannot create the temp dir $tmpDetectorRunDir"
       continue
@@ -95,12 +111,11 @@ updateQA()
     cd ${tmpDetectorRunDir}
 
     tmpPrefix=${tmpDetectorRunDir}/${outputDir}
-    echo
-    echo "##############################################"
     echo "running QA for ${detector}"
     echo "  outputDir=$outputDir"
     echo "  tmpPrefix=$tmpPrefix"
     
+    #unset the detector functions from previous iterations (detectors)
     unset -f runLevelQA
     unset -f periodLevelQA
     unset -f runLevelHighPtTreeQA
@@ -113,11 +128,16 @@ updateQA()
     declare -A arrOfTouchedProductions
     while read qaFile; do
       echo
+      echo $(date)
+      
+      #first check if input file exists
+      [[ ! -f ${qaFile%\#*} ]] && echo "file ${qaFile%\#*} not accessible" && continue
 
       if ! guessRunData ${qaFile}; then
         echo "could not guess run data from ${qaFile}"
         continue
       fi
+      echo "anchorYear for ${originalPeriod} is: ${anchorYear}"
 
       tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
       tmpRunDir=${tmpProductionDir}/000${runNumber}
@@ -128,8 +148,8 @@ updateQA()
       highPtTree=${qaFile}
 
       #maybe the input is not an archive, but a file
-      [[ "${qaFile}" =~ "QAresults.root" ]] && highPtTree=""
-      [[ "${qaFile}" =~ "FilterEvents_Trees.root" ]] && qaFile=""
+      [[ "${qaFile}" =~ QAresults.root$ ]] && highPtTree=""
+      [[ "${qaFile}" =~ FilterEvents_Trees.root$ ]] && qaFile=""
 
       #it is possible we get the highPt trees from somewhere else
       #search the list of high pt trees for the proper run number
@@ -140,11 +160,15 @@ updateQA()
       
       echo qaFile=$qaFile
       echo highPtTree=$highPtTree
+      echo ocdbStorage=${ocdbStorage}
+      echo
 
       #what if we have a zip archive?
       if [[ "$qaFile" =~ .*.zip$ ]]; then
         if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
           qaFile="${qaFile}#QAresults.root"
+        elif unzip -l ${qaFile} | egrep "QAresults_barrel.root" &>/dev/null; then
+          qaFile="${qaFile}#QAresults_barrel.root"
         else
           qaFile=""
         fi
@@ -163,9 +187,14 @@ updateQA()
         #perform some default actions:
         #if trending.root not created, create a default one
         if [[ ! -f trending.root ]]; then
-          aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" &>> runLevelQA.log
+          aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" 2>&1 | tee -a runLevelQA.log
+        fi
+        if [[ -f trending.root ]]; then
+          #cache the touched production + an example file to guarantee consistent run data parsing
+          arrOfTouchedProductions[${tmpProductionDir}]="${qaFile%\#*}"
+        else
+          echo "trending.root not created"
         fi
-        arrOfTouchedProductions[${tmpProductionDir}]=1
       fi
       #expert QA based on high pt trees
       if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
@@ -185,30 +214,36 @@ updateQA()
     echo
 
     #################################################################
-    #(re)do the merging/trending in the final destination
+    #(re)do the merging/trending 
     for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
+      cd ${tmpProductionDir}
       echo
       echo "running period level stuff in ${tmpProductionDir}"
+      echo $(date)
     
       productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
+      echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
 
       mkdir -p ${productionDir}
       if [[ ! -d ${productionDir} ]]; then 
         echo "cannot make productionDir $productionDir" && continue
       fi
-      cd ${productionDir}
       
-      #move to final destination
-      for dir in ${tmpProductionDir}/*; do
+      #move runs to final destination
+      for dir in ${tmpProductionDir}/000*; do
+        echo 
         oldRunDir=${outputDir}/${dir#${tmpPrefix}}
-        if ! guessRunData "${dir}/dummyName"; then
+        if ! guessRunData "${arrOfTouchedProductions[${tmpProductionDir}]}"; then
           echo "could not guess run data from ${dir}"
           continue
         fi
 
         #before moving - VALIDATE!!!
-        if ! validate ${dir}; then continue; fi
+        if ! validate ${dir}; then 
+          continue
+        fi
 
+        #moving a dir is an atomic operation, no locking necessary
         if [[ -d ${oldRunDir} ]]; then
           echo "removing old ${oldRunDir}"
           rm -rf ${oldRunDir}
@@ -216,14 +251,26 @@ updateQA()
         echo "moving new ${runNumber} to ${productionDir}"
         mv -f ${dir} ${productionDir}
       done
-    
-      rm -f trending.root
-      
+   
+      #go to a temp dir to do the period level stuff in a completely clean dir
+      tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
+      echo
+      echo tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
+      if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
+      cd ${tmpPeriodLevelQAdir}
+
+      #link the final list of per-run dirs here, just the dirs
+      #to have a clean working directory
+      unset linkedStuff
+      declare -a linkedStuff
+      for x in ${productionDir}/000*; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##*/}); done
+
       #merge trending files if any
       if /bin/ls 000*/trending.root &>/dev/null; then
         hadd trending.root 000*/trending.root &> periodLevelQA.log
       fi
       
+      #run the period level trending/QA
       if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
         echo running ${detector} periodLevelQA for production ${period}/${pass}
         periodLevelQA trending.root &>> periodLevelQA.log
@@ -231,11 +278,41 @@ updateQA()
         echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
       fi
 
-
       if ! validate ${PWD}; then continue; fi
 
-      cd ${tmpDetectorRunDir}
-    
+      #here we are validated so move the produced QA to the final place
+      #clean up linked stuff first
+      [[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
+      periodLevelLock=${productionDir}/runQA.lock
+      if [[ ! -f ${periodLevelLock} ]]; then
+        #some of the output could be a directory, so handle that
+        #TODO: maybe use rsync?
+        #lock to avoid conflicts:
+        echo "${HOSTNAME} ${dateString}" > ${periodLevelLock}
+        for x in ${tmpPeriodLevelQAdir}/*; do  
+          if [[ -d ${x} ]]; then
+            echo "removing ${productionDir}/${x##*/}"
+            rm -rf ${productionDir}/${x##*/}
+            echo "moving ${x} to ${productionDir}"
+            mv ${x} ${productionDir}
+          fi
+          if [[ -f ${x} ]]; then
+            echo "moving ${x} to ${productionDir}"
+            mv -f ${x} ${productionDir} 
+          fi
+        done
+        rm -f ${periodLevelLock}
+        #remove the temp dir
+        rm -rf ${tmpPeriodLevelQAdir}
+      else
+        echo "ERROR: cannot move to destination"                     >> ${logSummary}
+        echo "production dir ${productionDir} locked!"               >> ${logSummary}
+        echo "check and maybe manually do:"                          >> ${logSummary}
+        echo " rm ${periodLevelLock}"                                >> ${logSummary}
+        echo " rsync -av ${tmpPeriodLevelQAdir}/ ${productionDir}/"  >> ${logSummary}
+        planB=1
+      fi
+
     done
 
     cd ${workingDirectory}
@@ -247,21 +324,27 @@ updateQA()
     else
       executePlanB
     fi
-  done
+  done #end of detector loop
 
   #remove lock
   rm -f ${lockFile}
+  return 0
 }
 
 executePlanB()
 {
   #in case of emergency
-  if [[ -n ${MAILTO} ]]; then 
+  #first check if we have the email of the detector expert defined,
+  #if yes, append to the mailing list
+  local mailTo=${MAILTO}
+  local detExpertEmailVar="MAILTO_${detector}"
+  [[ -n "${!detExpertEmailVar}" ]] && mailTo+=" ${!detExpertEmailVar}"
+  if [[ -n ${mailTo} ]]; then 
     echo
-    echo "trouble detected, sending email to ${MAILTO}"
-
-    cat ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
+    echo "trouble detected, sending email to ${mailTo}"
+    cat ${logSummary} | mail -s "${detector} QA in need of assistance" ${mailTo}
   fi
+  return 0
 }
 
 validate()
@@ -291,7 +374,6 @@ summarizeLogs()
   #check logs
   local logstatus=0
   for log in ${dir}/${logFiles[*]}; do
-    finallog=${PWD%/}/${log}
     [[ ! -f ${log} ]] && continue
     errorSummary=$(validateLog ${log})
     validationStatus=$?
@@ -299,14 +381,14 @@ summarizeLogs()
     if [[ ${validationStatus} -eq 0 ]]; then 
       #in pretend mode randomly report an error in rec.log some cases
       if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
-        [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
+        [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${log} BAD random error" || echo "${log} OK"
       else
-        echo "${finallog} OK"
+        echo "${log} OK"
       fi
     elif [[ ${validationStatus} -eq 1 ]]; then
-      echo "${finallog} BAD ${errorSummary}"
+      echo "${log} BAD ${errorSummary}"
     elif [[ ${validationStatus} -eq 2 ]]; then
-      echo "${finallog} OK MWAH ${errorSummary}"
+      echo "${log} OK MWAH ${errorSummary}"
     fi
   done
 
@@ -372,6 +454,8 @@ validateLog()
 
 parseConfig()
 {
+  args=("$@")
+
   #config file
   configFile=""
   #where to search for qa files
@@ -385,36 +469,36 @@ parseConfig()
   excludeDetectors="EXAMPLE"
   #logs
   logDirectory=${workingDirectory}/logs
-  #set aliroot
-  #alirootEnv="/home/mkrzewic/alisoft/balice_master.sh"
   #OCDB storage
-  #ocdbStorage="raw://"
+  ocdbStorage="raw://"
   #email to
   #MAILTO="fbellini@cern.ch"
 
   #first, check if the config file is configured
   #is yes - source it so that other options can override it
   #if any
-  for opt in $@; do
+  for opt in "${args[@]}"; do
     if [[ ${opt} =~ configFile=.* ]]; then
       eval "${opt}"
       [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
+      echo "using config file: ${configFile}"
       source "${configFile}"
       break
     fi
   done
 
   #then, parse the options as they override the options from file
-  while [[ -n ${1} ]]; do
-    local var=${1#--}
-    if [[ ${var} =~ .*=.* ]]; then
-      eval "${var}"
-    else
+  for opt in "${args[@]}"; do
+    if [[ ! "${opt}" =~ .*=.* ]]; then
       echo "badly formatted option ${var}, should be: option=value, stopping..."
       return 1
     fi
-    shift
+    local var="${opt%%=*}"
+    local value="${opt#*=}"
+    echo "${var}=${value}"
+    export ${var}="${value}"
   done
+  return 0
 }
 
 guessRunData()
@@ -426,10 +510,15 @@ guessRunData()
   pass=""
   legoTrainRunNumber=""
   dataType=""
+  originalPass=""
+  originalPeriod=""
+  anchorYear=""
 
-  local shortRunNumber=""
+  shortRunNumber=""
+  oldIFS=${IFS}
   local IFS="/"
   declare -a path=( $1 )
+  IFS="${oldIFS}"
   local dirDepth=$(( ${#path[*]}-1 ))
   i=0
   for ((x=${dirDepth};x>=0;x--)); do
@@ -440,25 +529,58 @@ guessRunData()
 
     [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
     [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
-    [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
+    [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*} && originalPeriod=${field}
     [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
     [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
     [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
     [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
     (( i++ ))
   done
+  originalPass=${pass}
+  [[ -n ${shortRunNumber} && "${legoTrainRunNumber}" =~ ${shortRunNumber} ]] && legoTrainRunNumber=""
   [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
-  [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
+  [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber} && originalPass="" #for MC not from lego, the runnumber is identified as lego train number, thus needs to be nulled
+  [[ -n ${legoTrainRunNumber} ]] && pass+="_lego${legoTrainRunNumber}"
   
+  #modify the OCDB: set the year
+  if [[ ${dataType} =~ sim ]]; then 
+    anchorYear=$(run2year $runNumber)
+    if [[ -z "${anchorYear}" ]]; then
+      echo "WARNING: anchorYear not available for this production: ${originalPeriod}, runNumber: ${runNumber}. Cannot set the OCDB."
+      return 1
+    fi
+    ocdbStorage=$(setYear ${anchorYear} ${ocdbStorage})
+  else
+    ocdbStorage=$(setYear ${year} ${ocdbStorage})
+  fi
+
   #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
-  if [[ -z ${runNumber}} ]];
+  if [[ -z ${runNumber} ]]
   then
     #error condition
     return 1
-  else
-    #ALL OK
-    return 0
   fi
+  
+  #ALL OK
+  return 0
+}
+
+run2year()
+{
+  #for a given run print the year.
+  #the run-year table is ${runMap} (a string)
+  #defined in the config file
+  #one line per year, format: year runMin runMax
+  local run=$1
+  [[ -z ${run} ]] && return 1
+  local year=""
+  local runMin=""
+  local runMax=""
+  while read year runMin runMax; do
+    [[ -z ${year} || -z ${runMin} || -z ${runMax} ]] && continue
+    [[ ${run} -ge ${runMin} && ${run} -le ${runMax} ]] && echo ${year} && break
+  done < <(echo "${runMap}")
+  return 0
 }
 
 substituteDetectorName()
@@ -467,6 +589,7 @@ substituteDetectorName()
   local dir=$2
   [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
   [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
+  return 0
 }
 
 get_realpath() 
@@ -494,4 +617,98 @@ get_realpath()
   return 0 # success
 }
 
-main $@
+setYear()
+{
+  #set the year
+  #  ${1} - year to be set
+  #  ${2} - where to set the year
+  local year1=$(guessYear ${1})
+  local year2=$(guessYear ${2})
+  local path=${2}
+  [[ ${year1} -ne ${year2} && -n ${year2} && -n ${year1} ]] && path=${2/\/${year2}\//\/${year1}\/}
+  echo ${path}
+  return 0
+}
+
+guessYear()
+{
+  #guess the year from the path, pick the rightmost one
+  local IFS="/"
+  declare -a pathArray=( ${1} )
+  local field
+  local year
+  for field in ${pathArray[@]}; do
+    [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
+  done
+  echo ${year}
+  return 0
+}
+
+hostInfo(){
+#
+# Hallo world -  Print AliRoot/Root/Alien system info
+#
+
+#
+# HOST info
+#
+    echo --------------------------------------
+        echo 
+        echo HOSTINFO
+        echo 
+        echo HOSTINFO HOSTNAME"      "$HOSTNAME
+        echo HOSTINFO DATE"          "`date`
+        echo HOSTINFO gccpath"       "`which gcc` 
+        echo HOSTINFO gcc version"   "`gcc --version | grep gcc`
+        echo --------------------------------------    
+
+#
+# ROOT info
+#
+        echo --------------------------------------
+        echo
+        echo ROOTINFO
+        echo 
+        echo ROOTINFO ROOT"           "`which root`
+        echo ROOTINFO VERSION"        "`root-config --version`
+        echo 
+        echo --------------------------------------
+
+
+#
+# ALIROOT info
+#
+        echo --------------------------------------
+        echo
+        echo ALIROOTINFO
+        echo 
+        echo ALIROOTINFO ALIROOT"        "`which aliroot`
+        echo ALIROOTINFO VERSION"        "`echo $ALICE_LEVEL`
+        echo ALIROOTINFO TARGET"         "`echo $ALICE_TARGET`
+        echo 
+        echo --------------------------------------
+
+#
+# Alien info
+#
+#echo --------------------------------------
+#echo
+#echo ALIENINFO
+#for a in `alien --printenv`; do echo ALIENINFO $a; done 
+#echo
+#echo --------------------------------------
+
+#
+# Local Info
+#
+        echo PWD `pwd`
+        echo Dir 
+        ls -al
+        echo
+        echo
+        echo
+  
+  return 0
+}
+
+main "$@"