[u/mrichter/AliRoot.git] / PWGPP / QA / scripts / runQA.sh

#!/bin/bash
main()
{
  if [[ -z $1 ]]; then
    echo "Usage: "
    echo "  ${0##*/} option=value [option=value]"
    echo "  at least inputList should be specified, or configFile containing it:"
    echo "  ${0##*/} inputList=file.list"
    echo "  options override config file (if any), e.g.:"
    echo "  ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
    return 1
  fi
 
  if ! parseConfig "$@"; then
    ${0}
    return 1
  fi

  [[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1

  ocdbregex='raw://'
  if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
    alien-token-init ${alienUserName}
    #this is a hack! alien-token init seems not enough
    #but the gclient_env script messes up the LD_LIBRARY_PATH
    while read x; do
      eval ${x};
    done < <(grep -v "LD_LIBRARY_PATH" /tmp/gclient_env_${UID})
  fi

  updateQA "$@"
}

updateQA()
{
  umask 0002
  parseConfig "$@"

  #be paranoid and make some full paths
  [[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
  inputList=$(get_realpath ${inputList})
  mkdir -p ${workingDirectory}
  workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/*}; echo "${PWD}/${workingDirectory##*/}")
  if [[ ! -d ${workingDirectory} ]]; then
    echo "working dir $workingDirectory does not exist and cannot be created"
    return 1
  fi
  cd ${workingDirectory}

  echo JOB config:
  echo inputList=$inputList
  echo outputDirectory=$outputDirectory
  echo

  dateString=$(date +%Y-%m-%d-%H-%M)
  echo "Start time QA process: $dateString"

  #logging
  mkdir -p $logDirectory
  [[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
  logFile="$logDirectory/${0##*/}.${dateString}.log"
  touch ${logFile}
  [[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
  echo "logFile = $logFile"

  #check lock
  lockFile=${logDirectory}/runQA.lock
  [[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" | tee ${logFile} && return 1
  touch ${lockFile}
  [[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" | tee ${logFile} && return 1
  
  exec &>${logFile}

  ################################################################
  #ze detector loop
  for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
    echo
    echo "##############################################"
    echo $(date)
    unset planB
    [[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
    detector=${detectorScript%.sh}
    detector=${detector##*/}
    
    #skip if excluded
    if [[ "${excludeDetectors}" =~ ${detector} ]]; then
      echo "${detector} is excluded in config, skipping..."
      continue
    fi

    #if includeDetectors set, only process thoe detectors specified there
    if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
      echo "${detector} not included in includeDetectors, skipping..."
      continue
    fi

    logSummary=${logDirectory}/summary-${detector}-${dateString}.log
    outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
    tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
    if ! mkdir -p ${tmpDetectorRunDir}; then
      echo "cannot create the temp dir $tmpDetectorRunDir"
      continue
    fi
    cd ${tmpDetectorRunDir}

    tmpPrefix=${tmpDetectorRunDir}/${outputDir}
    echo "running QA for ${detector}"
    echo "  outputDir=$outputDir"
    echo "  tmpPrefix=$tmpPrefix"
    
    #unset the detector functions from previous iterations (detectors)
    unset -f runLevelQA
    unset -f periodLevelQA
    unset -f runLevelHighPtTreeQA
    unset -f periodLevelHighPtTreeQA
    source ${detectorScript}

    #################################################################
    #produce the QA and trending tree for each file (run)
    unset arrOfTouchedProductions
    declare -A arrOfTouchedProductions
    while read qaFile; do
      echo
      echo $(date)
      
      #first check if input file exists
      [[ ! -f ${qaFile%\#*} ]] && echo "file ${qaFile%\#*} not accessible" && continue

      if ! guessRunData ${qaFile}; then
        echo "could not guess run data from ${qaFile}"
        continue
      fi

      tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
      tmpRunDir=${tmpProductionDir}/000${runNumber}
      mkdir -p ${tmpRunDir}
      cd ${tmpRunDir}

      #by default we expect to have everything in the same archive
      highPtTree=${qaFile}

      #maybe the input is not an archive, but a file
      [[ "${qaFile}" =~ QAresults.root$ ]] && highPtTree=""
      [[ "${qaFile}" =~ FilterEvents_Trees.root$ ]] && qaFile=""

      #it is possible we get the highPt trees from somewhere else
      #search the list of high pt trees for the proper run number
      if [[ -n ${inputListHighPtTrees} ]]; then
        highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
        echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
      fi
      
      echo qaFile=$qaFile
      echo highPtTree=$highPtTree
      echo ocdbStorage=${ocdbStorage}
      echo

      #what if we have a zip archive?
      if [[ "$qaFile" =~ .*.zip$ ]]; then
        if unzip -l ${qaFile} | egrep "QAresults.root" &>/dev/null; then
          qaFile="${qaFile}#QAresults.root"
        else
          qaFile=""
        fi
      fi
      if [[ "$highPtTree" =~ .*.zip$ ]]; then
        if unzip -l ${highPtTree} | egrep "FilterEvents_Trees.root" &>/dev/null; then
          highPtTree="${highPtTree}#FilterEvents_Trees.root"
        else
          highPtTree=""
        fi
      fi
     
      if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
        echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
        runLevelQA "${qaFile}" &> runLevelQA.log
        #perform some default actions:
        #if trending.root not created, create a default one
        if [[ ! -f trending.root ]]; then
          aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" 2>&1 | tee -a runLevelQA.log
        fi
        if [[ -f trending.root ]]; then
          arrOfTouchedProductions[${tmpProductionDir}]=1
        else
          echo "trending.root not created"
        fi
      fi
      #expert QA based on high pt trees
      if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
        echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
        runLevelHighPtTreeQA "${highPtTree}" &> runLevelHighPtTreeQA.log
        arrOfTouchedProductions[${tmpProductionDir}]=1
      fi

      cd ${tmpDetectorRunDir}
    
    done < ${inputList}

    #################################################################
    #cache which productions were (re)done
    echo "list of processed productions:"
    echo "    ${!arrOfTouchedProductions[@]}"
    echo

    #################################################################
    #(re)do the merging/trending 
    for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
      cd ${tmpProductionDir}
      echo
      echo "running period level stuff in ${tmpProductionDir}"
      echo $(date)
    
      productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
      echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}

      mkdir -p ${productionDir}
      if [[ ! -d ${productionDir} ]]; then 
        echo "cannot make productionDir $productionDir" && continue
      fi
      
      #move runs to final destination
      for dir in ${tmpProductionDir}/000*; do
        echo 
        oldRunDir=${outputDir}/${dir#${tmpPrefix}}
        if ! guessRunData "${dir}/dummyName"; then
          echo "could not guess run data from ${dir}"
          continue
        fi

        #before moving - VALIDATE!!!
        if ! validate ${dir}; then 
          continue
        fi

        #moving a dir is an atomic operation, no locking necessary
        if [[ -d ${oldRunDir} ]]; then
          echo "removing old ${oldRunDir}"
          rm -rf ${oldRunDir}
        fi
        echo "moving new ${runNumber} to ${productionDir}"
        mv -f ${dir} ${productionDir}
      done
   
      #go to a temp dir to do the period level stuff in a completely clean dir
      tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
      echo
      echo tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
      if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
      cd ${tmpPeriodLevelQAdir}

      #link the final list of per-run dirs here, just the dirs
      #to have a clean working directory
      unset linkedStuff
      declare -a linkedStuff
      for x in ${productionDir}/000*; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##*/}); done

      #merge trending files if any
      if /bin/ls 000*/trending.root &>/dev/null; then
        hadd trending.root 000*/trending.root &> periodLevelQA.log
      fi
      
      #run the period level trending/QA
      if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
        echo running ${detector} periodLevelQA for production ${period}/${pass}
        periodLevelQA trending.root &>> periodLevelQA.log
      else 
        echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
      fi

      if ! validate ${PWD}; then continue; fi

      #here we are validated so move the produced QA to the final place
      #clean up linked stuff first
      [[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
      periodLevelLock=${productionDir}/runQA.lock
      if [[ ! -f ${periodLevelLock} ]]; then
        #some of the output could be a directory, so handle that
        #TODO: maybe use rsync?
        #lock to avoid conflicts:
        echo "${HOSTNAME} ${dateString}" > ${periodLevelLock}
        for x in ${tmpPeriodLevelQAdir}/*; do  
          if [[ -d ${x} ]]; then
            echo "removing ${productionDir}/${x##*/}"
            rm -rf ${productionDir}/${x##*/}
            echo "moving ${x} to ${productionDir}"
            mv ${x} ${productionDir}
          fi
          if [[ -f ${x} ]]; then
            echo "moving ${x} to ${productionDir}"
            mv -f ${x} ${productionDir} 
          fi
        done
        rm -f ${periodLevelLock}
        #remove the temp dir
        rm -rf ${tmpPeriodLevelQAdir}
      else
        echo "ERROR: cannot move to destination"                     >> ${logSummary}
        echo "production dir ${productionDir} locked!"               >> ${logSummary}
        echo "check and maybe manually do:"                          >> ${logSummary}
        echo " rm ${periodLevelLock}"                                >> ${logSummary}
        echo " rsync -av ${tmpPeriodLevelQAdir}/ ${productionDir}/"  >> ${logSummary}
        planB=1
      fi

    done

    cd ${workingDirectory}

    if [[ -z ${planB} ]]; then
      echo
      echo removing ${tmpDetectorRunDir}
      rm -rf ${tmpDetectorRunDir}
    else
      executePlanB
    fi
  done #end of detector loop

  #remove lock
  rm -f ${lockFile}
}

executePlanB()
{
  #in case of emergency
  if [[ -n ${MAILTO} ]]; then 
    echo
    echo "trouble detected, sending email to ${MAILTO}"

    grep BAD ${logSummary} | mail -s "qa in need of assistance" ${MAILTO}
  fi
}

validate()
{
  summarizeLogs ${1} >> ${logSummary}
  logStatus=$?
  if [[ ${logStatus} -ne 0 ]]; then 
    echo "WARNING not validated: ${1}"
    planB=1
    return 1
  fi
  return 0
}

summarizeLogs()
{
  local dir=$1
  [[ ! -d ${dir} ]] && dir=${PWD}

  #print a summary of logs
  logFiles=(
      "*.log"
      "stdout"
      "stderr"
  )

  #check logs
  local logstatus=0
  for log in ${dir}/${logFiles[*]}; do
    finallog=${PWD%/}/${log}
    [[ ! -f ${log} ]] && continue
    errorSummary=$(validateLog ${log})
    validationStatus=$?
    [[ validationStatus -ne 0 ]] && logstatus=1
    if [[ ${validationStatus} -eq 0 ]]; then 
      #in pretend mode randomly report an error in rec.log some cases
      if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
        [[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" || echo "${finallog} OK"
      else
        echo "${finallog} OK"
      fi
    elif [[ ${validationStatus} -eq 1 ]]; then
      echo "${finallog} BAD ${errorSummary}"
    elif [[ ${validationStatus} -eq 2 ]]; then
      echo "${finallog} OK MWAH ${errorSummary}"
    fi
  done

  #report core files
  while read x; do
    echo ${x}
    chmod 644 ${x}
    gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
  done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)

  return ${logstatus}
}

validateLog()
{
  log=${1}
  errorConditions=(
            'There was a crash'
            'floating'
            'error while loading shared libraries'
            'std::bad_alloc'
            's_err_syswatch_'
            'Thread [0-9]* (Thread'
            'AliFatal'
            'core dumped'
            '\.C.*error:.*\.h: No such file'
            'segmentation'
            'Interpreter error recovered'
  )

  warningConditions=(
            'This is serious'
  )

  local logstatus=0
  local errorSummary=""
  local warningSummary=""

  for ((i=0; i<${#errorConditions[@]};i++)); do
    local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
    [[ -n ${tmp} ]] && tmp+=" : "
    errorSummary+=${tmp}
  done

  for ((i=0; i<${#warningConditions[@]};i++)); do
    local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
    [[ -n ${tmp} ]] && tmp+=" : "
    warningSummary+=${tmp}
  done

  if [[ -n ${errorSummary} ]]; then 
    echo "${errorSummary}"
    return 1
  fi

  if [[ -n ${warningSummary} ]]; then
    echo "${warningSummary}"
    return 2
  fi

  return 0
}

parseConfig()
{
  args=("$@")

  #config file
  configFile=""
  #where to search for qa files
  inputList=file.list
  #working directory
  workingDirectory="${PWD}"
  #where to place the final qa plots
  #outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
  outputDirectory="${workingDirectory}/%DET"
  #filter out detector option
  excludeDetectors="EXAMPLE"
  #logs
  logDirectory=${workingDirectory}/logs
  #OCDB storage
  ocdbStorage="raw://"
  #email to
  #MAILTO="fbellini@cern.ch"

  #first, check if the config file is configured
  #is yes - source it so that other options can override it
  #if any
  for opt in "${args[@]}"; do
    if [[ ${opt} =~ configFile=.* ]]; then
      eval "${opt}"
      [[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
      echo "using config file: ${configFile}"
      source "${configFile}"
      break
    fi
  done

  #then, parse the options as they override the options from file
  for opt in "${args[@]}"; do
    if [[ ! "${opt}" =~ .*=.* ]]; then
      echo "badly formatted option ${var}, should be: option=value, stopping..."
      return 1
    fi
    local var="${opt%%=*}"
    local value="${opt#*=}"
    echo "${var}=${value}"
    export ${var}="${value}"
  done
}

guessRunData()
{
  #guess the period from the path, pick the rightmost one
  period=""
  runNumber=""
  year=""
  pass=""
  legoTrainRunNumber=""
  dataType=""

  local shortRunNumber=""
  oldIFS=${IFS}
  local IFS="/"
  declare -a path=( $1 )
  IFS="${oldIFS}"
  local dirDepth=$(( ${#path[*]}-1 ))
  i=0
  for ((x=${dirDepth};x>=0;x--)); do

    [[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
    local field=${path[${x}]}
    local fieldNext=${path[$((x+1))]}

    [[ ${field} =~ ^[0-9]*$ && ${fieldNext} =~ (.*\.zip$|.*\.root$) ]] && legoTrainRunNumber=${field}
    [[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
    [[ ${field} =~ ^LHC[0-9][0-9][a-z].*$ ]] && period=${field%_*}
    [[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
    [[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
    [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
    [[ ${field} =~ ^(^sim$|^data$) ]] && dataType=${field}
    (( i++ ))
  done
  [[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
  [[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
  [[ -n ${legoTrainRunNumber} ]] && pass+="_lego${legoTrainRunNumber}"
  
  #modify the OCDB: set the year
  ocdbStorage=$(setYear ${year} ${ocdbStorage})

  #if [[ -z ${dataType} || -z ${year} || -z ${period} || -z ${runNumber}} || -z ${pass} ]];
  if [[ -z ${runNumber}} ]]
  then
    #error condition
    return 1
  else
    #ALL OK
    return 0
  fi
}

substituteDetectorName()
{
  local det=$1
  local dir=$2
  [[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
  [[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
}

get_realpath() 
{
  if [[ -f "$1" ]]
  then
    # file *must* exist
    if cd "$(echo "${1%/*}")" &>/dev/null
    then
      # file *may* not be local
      # exception is ./file.ext
      # try 'cd .; cd -;' *works!*
      local tmppwd="$PWD"
      cd - &>/dev/null
    else
      # file *must* be local
      local tmppwd="$PWD"
    fi
  else
    # file *cannot* exist
    return 1 # failure
  fi
  # reassemble realpath
  echo "$tmppwd"/"${1##*/}"
  return 0 # success
}

setYear()
{
  #set the year
  #  ${1} - year to be set
  #  ${2} - where to set the year
  local year1=$(guessYear ${1})
  local year2=$(guessYear ${2})
  local path=${2}
  [[ ${year1} -ne ${year2} && -n ${year2} && -n ${year1} ]] && path=${2/\/${year2}\//\/${year1}\/}
  echo ${path}
  return 0
}

guessYear()
{
  #guess the year from the path, pick the rightmost one
  local IFS="/"
  declare -a pathArray=( ${1} )
  local field
  local year
  for field in ${pathArray[@]}; do
    [[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
  done
  echo ${year}
  return 0
}

main "$@"
Commit	Line	Data
	1	#!/bin/bash
	2	main()
	3	{
	4	if [[ -z $1 ]]; then
	5	echo "Usage: "
	6	echo " ${0##*/} option=value [option=value]"
	7	echo " at least inputList should be specified, or configFile containing it:"
	8	echo " ${0##*/} inputList=file.list"
	9	echo " options override config file (if any), e.g.:"
	10	echo " ${0##*/} configFile=runQA.config inputList=file.list outputDirectory=%det"
	11	return 1
	12	fi
	13
	14	if ! parseConfig "$@"; then
	15	${0}
	16	return 1
	17	fi
	18
	19	[[ -z $ALICE_ROOT ]] && echo "ALICE_ROOT not defined" && return 1
	20
	21	ocdbregex='raw://'
	22	if [[ ${ocdbStorage} =~ ${ocdbregex} ]]; then
	23	alien-token-init ${alienUserName}
	24	#this is a hack! alien-token init seems not enough
	25	#but the gclient_env script messes up the LD_LIBRARY_PATH
	26	while read x; do
	27	eval ${x};
	28	done < <(grep -v "LD_LIBRARY_PATH" /tmp/gclient_env_${UID})
	29	fi
	30
	31	updateQA "$@"
	32	}
	33
	34	updateQA()
	35	{
	36	umask 0002
	37	parseConfig "$@"
	38
	39	#be paranoid and make some full paths
	40	[[ ! -f ${inputList} ]] && echo "no input list: ${inputList}" && return 1
	41	inputList=$(get_realpath ${inputList})
	42	mkdir -p ${workingDirectory}
	43	workingDirectory=$(workingDirectory=${workingDirectory%/}; cd ${workingDirectory%/}; echo "${PWD}/${workingDirectory##/}")
	44	if [[ ! -d ${workingDirectory} ]]; then
	45	echo "working dir $workingDirectory does not exist and cannot be created"
	46	return 1
	47	fi
	48	cd ${workingDirectory}
	49
	50	echo JOB config:
	51	echo inputList=$inputList
	52	echo outputDirectory=$outputDirectory
	53	echo
	54
	55	dateString=$(date +%Y-%m-%d-%H-%M)
	56	echo "Start time QA process: $dateString"
	57
	58	#logging
	59	mkdir -p $logDirectory
	60	[[ ! -d $logDirectory ]] && echo "no log dir $logDirectory" && return 1
	61	logFile="$logDirectory/${0##*/}.${dateString}.log"
	62	touch ${logFile}
	63	[[ ! -f ${logFile} ]] && echo "cannot write logfile $logfile" && return 1
	64	echo "logFile = $logFile"
	65
	66	#check lock
	67	lockFile=${logDirectory}/runQA.lock
	68	[[ -f ${lockFile} ]] && echo "lock ${lockFile} exists!" \| tee ${logFile} && return 1
	69	touch ${lockFile}
	70	[[ ! -f ${lockFile} ]] && echo "cannot lock $lockFile" \| tee ${logFile} && return 1
	71
	72	exec &>${logFile}
	73
	74	################################################################
	75	#ze detector loop
	76	for detectorScript in $ALICE_ROOT/PWGPP/QA/detectorQAscripts/*; do
	77	echo
	78	echo "##############################################"
	79	echo $(date)
	80	unset planB
	81	[[ ! ${detectorScript} =~ .*\.sh$ ]] && continue
	82	detector=${detectorScript%.sh}
	83	detector=${detector##*/}
	84
	85	#skip if excluded
	86	if [[ "${excludeDetectors}" =~ ${detector} ]]; then
	87	echo "${detector} is excluded in config, skipping..."
	88	continue
	89	fi
	90
	91	#if includeDetectors set, only process thoe detectors specified there
	92	if [[ -n ${includeDetectors} && ! "${includeDetectors}" =~ ${detector} ]]; then
	93	echo "${detector} not included in includeDetectors, skipping..."
	94	continue
	95	fi
	96
	97	logSummary=${logDirectory}/summary-${detector}-${dateString}.log
	98	outputDir=$(substituteDetectorName ${detector} ${outputDirectory})
	99	tmpDetectorRunDir=${workingDirectory}/tmpQAtmpRunDir${detector}-${dateString}
	100	if ! mkdir -p ${tmpDetectorRunDir}; then
	101	echo "cannot create the temp dir $tmpDetectorRunDir"
	102	continue
	103	fi
	104	cd ${tmpDetectorRunDir}
	105
	106	tmpPrefix=${tmpDetectorRunDir}/${outputDir}
	107	echo "running QA for ${detector}"
	108	echo " outputDir=$outputDir"
	109	echo " tmpPrefix=$tmpPrefix"
	110
	111	#unset the detector functions from previous iterations (detectors)
	112	unset -f runLevelQA
	113	unset -f periodLevelQA
	114	unset -f runLevelHighPtTreeQA
	115	unset -f periodLevelHighPtTreeQA
	116	source ${detectorScript}
	117
	118	#################################################################
	119	#produce the QA and trending tree for each file (run)
	120	unset arrOfTouchedProductions
	121	declare -A arrOfTouchedProductions
	122	while read qaFile; do
	123	echo
	124	echo $(date)
	125
	126	#first check if input file exists
	127	[[ ! -f ${qaFile%\#} ]] && echo "file ${qaFile%\#} not accessible" && continue
	128
	129	if ! guessRunData ${qaFile}; then
	130	echo "could not guess run data from ${qaFile}"
	131	continue
	132	fi
	133
	134	tmpProductionDir=${tmpPrefix}/${dataType}/${year}/${period}/${pass}
	135	tmpRunDir=${tmpProductionDir}/000${runNumber}
	136	mkdir -p ${tmpRunDir}
	137	cd ${tmpRunDir}
	138
	139	#by default we expect to have everything in the same archive
	140	highPtTree=${qaFile}
	141
	142	#maybe the input is not an archive, but a file
	143	[[ "${qaFile}" =~ QAresults.root$ ]] && highPtTree=""
	144	[[ "${qaFile}" =~ FilterEvents_Trees.root$ ]] && qaFile=""
	145
	146	#it is possible we get the highPt trees from somewhere else
	147	#search the list of high pt trees for the proper run number
	148	if [[ -n ${inputListHighPtTrees} ]]; then
	149	highPtTree=$(egrep -m1 ${runNumber} ${inputListHighPtTrees})
	150	echo "loaded the highPtTree ${highPtTree} from external file ${inputListHighPtTrees}"
	151	fi
	152
	153	echo qaFile=$qaFile
	154	echo highPtTree=$highPtTree
	155	echo ocdbStorage=${ocdbStorage}
	156	echo
	157
	158	#what if we have a zip archive?
	159	if [[ "$qaFile" =~ .*.zip$ ]]; then
	160	if unzip -l ${qaFile} \| egrep "QAresults.root" &>/dev/null; then
	161	qaFile="${qaFile}#QAresults.root"
	162	else
	163	qaFile=""
	164	fi
	165	fi
	166	if [[ "$highPtTree" =~ .*.zip$ ]]; then
	167	if unzip -l ${highPtTree} \| egrep "FilterEvents_Trees.root" &>/dev/null; then
	168	highPtTree="${highPtTree}#FilterEvents_Trees.root"
	169	else
	170	highPtTree=""
	171	fi
	172	fi
	173
	174	if [[ -n ${qaFile} && $(type -t runLevelQA) =~ "function" ]]; then
	175	echo running ${detector} runLevelQA for run ${runNumber} from ${qaFile}
	176	runLevelQA "${qaFile}" &> runLevelQA.log
	177	#perform some default actions:
	178	#if trending.root not created, create a default one
	179	if [[ ! -f trending.root ]]; then
	180	aliroot -b -q -l "$ALICE_ROOT/PWGPP/macros/simpleTrending.C(\"${qaFile}\",${runNumber},\"${detector}\",\"trending.root\",\"trending\",\"recreate\")" 2>&1 \| tee -a runLevelQA.log
	181	fi
	182	if [[ -f trending.root ]]; then
	183	arrOfTouchedProductions[${tmpProductionDir}]=1
	184	else
	185	echo "trending.root not created"
	186	fi
	187	fi
	188	#expert QA based on high pt trees
	189	if [[ -n ${highPtTree} && $(type -t runLevelHighPtTreeQA) =~ "function" ]]; then
	190	echo running ${detector} runLevelHighPtTreeQA for run ${runNumber} from ${highPtTree}
	191	runLevelHighPtTreeQA "${highPtTree}" &> runLevelHighPtTreeQA.log
	192	arrOfTouchedProductions[${tmpProductionDir}]=1
	193	fi
	194
	195	cd ${tmpDetectorRunDir}
	196
	197	done < ${inputList}
	198
	199	#################################################################
	200	#cache which productions were (re)done
	201	echo "list of processed productions:"
	202	echo " ${!arrOfTouchedProductions[@]}"
	203	echo
	204
	205	#################################################################
	206	#(re)do the merging/trending
	207	for tmpProductionDir in ${!arrOfTouchedProductions[@]}; do
	208	cd ${tmpProductionDir}
	209	echo
	210	echo "running period level stuff in ${tmpProductionDir}"
	211	echo $(date)
	212
	213	productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
	214	echo productionDir=${outputDir}/${tmpProductionDir#${tmpPrefix}}
	215
	216	mkdir -p ${productionDir}
	217	if [[ ! -d ${productionDir} ]]; then
	218	echo "cannot make productionDir $productionDir" && continue
	219	fi
	220
	221	#move runs to final destination
	222	for dir in ${tmpProductionDir}/000*; do
	223	echo
	224	oldRunDir=${outputDir}/${dir#${tmpPrefix}}
	225	if ! guessRunData "${dir}/dummyName"; then
	226	echo "could not guess run data from ${dir}"
	227	continue
	228	fi
	229
	230	#before moving - VALIDATE!!!
	231	if ! validate ${dir}; then
	232	continue
	233	fi
	234
	235	#moving a dir is an atomic operation, no locking necessary
	236	if [[ -d ${oldRunDir} ]]; then
	237	echo "removing old ${oldRunDir}"
	238	rm -rf ${oldRunDir}
	239	fi
	240	echo "moving new ${runNumber} to ${productionDir}"
	241	mv -f ${dir} ${productionDir}
	242	done
	243
	244	#go to a temp dir to do the period level stuff in a completely clean dir
	245	tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
	246	echo
	247	echo tmpPeriodLevelQAdir="${tmpProductionDir}/periodLevelQA"
	248	if ! mkdir -p ${tmpPeriodLevelQAdir}; then continue; fi
	249	cd ${tmpPeriodLevelQAdir}
	250
	251	#link the final list of per-run dirs here, just the dirs
	252	#to have a clean working directory
	253	unset linkedStuff
	254	declare -a linkedStuff
	255	for x in ${productionDir}/000; do [[ -d $x ]] && ln -s $x && linkedStuff+=(${x##/}); done
	256
	257	#merge trending files if any
	258	if /bin/ls 000*/trending.root &>/dev/null; then
	259	hadd trending.root 000*/trending.root &> periodLevelQA.log
	260	fi
	261
	262	#run the period level trending/QA
	263	if [[ -f "trending.root" && $(type -t periodLevelQA) =~ "function" ]]; then
	264	echo running ${detector} periodLevelQA for production ${period}/${pass}
	265	periodLevelQA trending.root &>> periodLevelQA.log
	266	else
	267	echo "WARNING: not running ${detector} periodLevelQA for production ${period}/${pass}, no trending.root"
	268	fi
	269
	270	if ! validate ${PWD}; then continue; fi
	271
	272	#here we are validated so move the produced QA to the final place
	273	#clean up linked stuff first
	274	[[ -n ${linkedStuff[@]} ]] && rm ${linkedStuff[@]}
	275	periodLevelLock=${productionDir}/runQA.lock
	276	if [[ ! -f ${periodLevelLock} ]]; then
	277	#some of the output could be a directory, so handle that
	278	#TODO: maybe use rsync?
	279	#lock to avoid conflicts:
	280	echo "${HOSTNAME} ${dateString}" > ${periodLevelLock}
	281	for x in ${tmpPeriodLevelQAdir}/*; do
	282	if [[ -d ${x} ]]; then
	283	echo "removing ${productionDir}/${x##*/}"
	284	rm -rf ${productionDir}/${x##*/}
	285	echo "moving ${x} to ${productionDir}"
	286	mv ${x} ${productionDir}
	287	fi
	288	if [[ -f ${x} ]]; then
	289	echo "moving ${x} to ${productionDir}"
	290	mv -f ${x} ${productionDir}
	291	fi
	292	done
	293	rm -f ${periodLevelLock}
	294	#remove the temp dir
	295	rm -rf ${tmpPeriodLevelQAdir}
	296	else
	297	echo "ERROR: cannot move to destination" >> ${logSummary}
	298	echo "production dir ${productionDir} locked!" >> ${logSummary}
	299	echo "check and maybe manually do:" >> ${logSummary}
	300	echo " rm ${periodLevelLock}" >> ${logSummary}
	301	echo " rsync -av ${tmpPeriodLevelQAdir}/ ${productionDir}/" >> ${logSummary}
	302	planB=1
	303	fi
	304
	305	done
	306
	307	cd ${workingDirectory}
	308
	309	if [[ -z ${planB} ]]; then
	310	echo
	311	echo removing ${tmpDetectorRunDir}
	312	rm -rf ${tmpDetectorRunDir}
	313	else
	314	executePlanB
	315	fi
	316	done #end of detector loop
	317
	318	#remove lock
	319	rm -f ${lockFile}
	320	}
	321
	322	executePlanB()
	323	{
	324	#in case of emergency
	325	if [[ -n ${MAILTO} ]]; then
	326	echo
	327	echo "trouble detected, sending email to ${MAILTO}"
	328
	329	grep BAD ${logSummary} \| mail -s "qa in need of assistance" ${MAILTO}
	330	fi
	331	}
	332
	333	validate()
	334	{
	335	summarizeLogs ${1} >> ${logSummary}
	336	logStatus=$?
	337	if [[ ${logStatus} -ne 0 ]]; then
	338	echo "WARNING not validated: ${1}"
	339	planB=1
	340	return 1
	341	fi
	342	return 0
	343	}
	344
	345	summarizeLogs()
	346	{
	347	local dir=$1
	348	[[ ! -d ${dir} ]] && dir=${PWD}
	349
	350	#print a summary of logs
	351	logFiles=(
	352	"*.log"
	353	"stdout"
	354	"stderr"
	355	)
	356
	357	#check logs
	358	local logstatus=0
	359	for log in ${dir}/${logFiles[*]}; do
	360	finallog=${PWD%/}/${log}
	361	[[ ! -f ${log} ]] && continue
	362	errorSummary=$(validateLog ${log})
	363	validationStatus=$?
	364	[[ validationStatus -ne 0 ]] && logstatus=1
	365	if [[ ${validationStatus} -eq 0 ]]; then
	366	#in pretend mode randomly report an error in rec.log some cases
	367	if [[ -n ${pretend} && "${log}" == "rec.log" ]]; then
	368	[[ $(( ${RANDOM}%2 )) -ge 1 ]] && echo "${finallog} BAD random error" \|\| echo "${finallog} OK"
	369	else
	370	echo "${finallog} OK"
	371	fi
	372	elif [[ ${validationStatus} -eq 1 ]]; then
	373	echo "${finallog} BAD ${errorSummary}"
	374	elif [[ ${validationStatus} -eq 2 ]]; then
	375	echo "${finallog} OK MWAH ${errorSummary}"
	376	fi
	377	done
	378
	379	#report core files
	380	while read x; do
	381	echo ${x}
	382	chmod 644 ${x}
	383	gdb --batch --quiet -ex "bt" -ex "quit" aliroot ${x} > stacktrace_${x//\//_}.log
	384	done < <(/bin/ls ${PWD}/*/core 2>/dev/null; /bin/ls ${PWD}/core 2>/dev/null)
	385
	386	return ${logstatus}
	387	}
	388
	389	validateLog()
	390	{
	391	log=${1}
	392	errorConditions=(
	393	'There was a crash'
	394	'floating'
	395	'error while loading shared libraries'
	396	'std::bad_alloc'
	397	's_err_syswatch_'
	398	'Thread [0-9]* (Thread'
	399	'AliFatal'
	400	'core dumped'
	401	'\.C.error:.\.h: No such file'
	402	'segmentation'
	403	'Interpreter error recovered'
	404	)
	405
	406	warningConditions=(
	407	'This is serious'
	408	)
	409
	410	local logstatus=0
	411	local errorSummary=""
	412	local warningSummary=""
	413
	414	for ((i=0; i<${#errorConditions[@]};i++)); do
	415	local tmp=$(grep -m1 -e "${errorConditions[${i}]}" ${log})
	416	[[ -n ${tmp} ]] && tmp+=" : "
	417	errorSummary+=${tmp}
	418	done
	419
	420	for ((i=0; i<${#warningConditions[@]};i++)); do
	421	local tmp=$(grep -m1 -e "${warningConditions[${i}]}" ${log})
	422	[[ -n ${tmp} ]] && tmp+=" : "
	423	warningSummary+=${tmp}
	424	done
	425
	426	if [[ -n ${errorSummary} ]]; then
	427	echo "${errorSummary}"
	428	return 1
	429	fi
	430
	431	if [[ -n ${warningSummary} ]]; then
	432	echo "${warningSummary}"
	433	return 2
	434	fi
	435
	436	return 0
	437	}
	438
	439	parseConfig()
	440	{
	441	args=("$@")
	442
	443	#config file
	444	configFile=""
	445	#where to search for qa files
	446	inputList=file.list
	447	#working directory
	448	workingDirectory="${PWD}"
	449	#where to place the final qa plots
	450	#outputDirectory="/afs/cern.ch/work/a/aliqa%det/www/"
	451	outputDirectory="${workingDirectory}/%DET"
	452	#filter out detector option
	453	excludeDetectors="EXAMPLE"
	454	#logs
	455	logDirectory=${workingDirectory}/logs
	456	#OCDB storage
	457	ocdbStorage="raw://"
	458	#email to
	459	#MAILTO="fbellini@cern.ch"
	460
	461	#first, check if the config file is configured
	462	#is yes - source it so that other options can override it
	463	#if any
	464	for opt in "${args[@]}"; do
	465	if [[ ${opt} =~ configFile=.* ]]; then
	466	eval "${opt}"
	467	[[ ! -f ${configFile} ]] && echo "configFile ${configFile} not found, exiting..." && return 1
	468	echo "using config file: ${configFile}"
	469	source "${configFile}"
	470	break
	471	fi
	472	done
	473
	474	#then, parse the options as they override the options from file
	475	for opt in "${args[@]}"; do
	476	if [[ ! "${opt}" =~ .=. ]]; then
	477	echo "badly formatted option ${var}, should be: option=value, stopping..."
	478	return 1
	479	fi
	480	local var="${opt%%=*}"
	481	local value="${opt#*=}"
	482	echo "${var}=${value}"
	483	export ${var}="${value}"
	484	done
	485	}
	486
	487	guessRunData()
	488	{
	489	#guess the period from the path, pick the rightmost one
	490	period=""
	491	runNumber=""
	492	year=""
	493	pass=""
	494	legoTrainRunNumber=""
	495	dataType=""
	496
	497	local shortRunNumber=""
	498	oldIFS=${IFS}
	499	local IFS="/"
	500	declare -a path=( $1 )
	501	IFS="${oldIFS}"
	502	local dirDepth=$(( ${#path[*]}-1 ))
	503	i=0
	504	for ((x=${dirDepth};x>=0;x--)); do
	505
	506	[[ $((x-1)) -ge 0 ]] && local fieldPrev=${path[$((x-1))]}
	507	local field=${path[${x}]}
	508	local fieldNext=${path[$((x+1))]}
	509
	510	[[ ${field} =~ ^[0-9]$ && ${fieldNext} =~ (.\.zip$\|.*\.root$) ]] && legoTrainRunNumber=${field}
	511	[[ -n ${legoTrainRunNumber} && -z ${pass} ]] && pass=${fieldPrev}
	512	[[ ${field} =~ ^LHC[0-9][0-9][a-z].$ ]] && period=${field%_}
	513	[[ ${field} =~ ^000[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && runNumber=${field#000}
	514	[[ ${field} =~ ^[0-9][0-9][0-9][0-9][0-9][0-9]$ ]] && shortRunNumber=${field}
	515	[[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
	516	[[ ${field} =~ ^(^sim$\|^data$) ]] && dataType=${field}
	517	(( i++ ))
	518	done
	519	[[ -z ${legoTrainRunNumber} ]] && pass=${path[$((dirDepth-1))]}
	520	[[ "${dataType}" =~ ^sim$ ]] && pass="passMC" && runNumber=${shortRunNumber}
	521	[[ -n ${legoTrainRunNumber} ]] && pass+="_lego${legoTrainRunNumber}"
	522
	523	#modify the OCDB: set the year
	524	ocdbStorage=$(setYear ${year} ${ocdbStorage})
	525
	526	#if [[ -z ${dataType} \|\| -z ${year} \|\| -z ${period} \|\| -z ${runNumber}} \|\| -z ${pass} ]];
	527	if [[ -z ${runNumber}} ]]
	528	then
	529	#error condition
	530	return 1
	531	else
	532	#ALL OK
	533	return 0
	534	fi
	535	}
	536
	537	substituteDetectorName()
	538	{
	539	local det=$1
	540	local dir=$2
	541	[[ ${dir} =~ \%det ]] && det=${det,,} && echo ${dir/\%det/${det}}
	542	[[ ${dir} =~ \%DET ]] && det=${det} && echo ${dir/\%DET/${det}}
	543	}
	544
	545	get_realpath()
	546	{
	547	if [[ -f "$1" ]]
	548	then
	549	# file must exist
	550	if cd "$(echo "${1%/*}")" &>/dev/null
	551	then
	552	# file may not be local
	553	# exception is ./file.ext
	554	# try 'cd .; cd -;' works!
	555	local tmppwd="$PWD"
	556	cd - &>/dev/null
	557	else
	558	# file must be local
	559	local tmppwd="$PWD"
	560	fi
	561	else
	562	# file cannot exist
	563	return 1 # failure
	564	fi
	565	# reassemble realpath
	566	echo "$tmppwd"/"${1##*/}"
	567	return 0 # success
	568	}
	569
	570	setYear()
	571	{
	572	#set the year
	573	# ${1} - year to be set
	574	# ${2} - where to set the year
	575	local year1=$(guessYear ${1})
	576	local year2=$(guessYear ${2})
	577	local path=${2}
	578	[[ ${year1} -ne ${year2} && -n ${year2} && -n ${year1} ]] && path=${2/\/${year2}\//\/${year1}\/}
	579	echo ${path}
	580	return 0
	581	}
	582
	583	guessYear()
	584	{
	585	#guess the year from the path, pick the rightmost one
	586	local IFS="/"
	587	declare -a pathArray=( ${1} )
	588	local field
	589	local year
	590	for field in ${pathArray[@]}; do
	591	[[ ${field} =~ ^20[0-9][0-9]$ ]] && year=${field}
	592	done
	593	echo ${year}
	594	return 0
	595	}
	596
	597	main "$@"