flat friends update

[u/mrichter/AliRoot.git] / PWGPP / benchmark / alirelval
diff --git a/PWGPP/benchmark/alirelval b/PWGPP/benchmark/alirelval

index 476ce2d4d6a3432eb90ca1940c292f3fd8496ab1..bd2c9d06b33c7a2c8bfacbd0142ff50495a40b28 100755 (executable)
--- a/PWGPP/benchmark/alirelval
+++ b/PWGPP/benchmark/alirelval
@@ -1,7 +1,7 @@
  #!/bin/bash
  
  #
-# launch-relval.sh -- by Dario Berzano <dario.berzano@cern.ch>
+# alirelval -- by Dario Berzano <dario.berzano@cern.ch>
  #
  # Controls the release validation submission by managing the validation virtual
  # cluster.
@@ -25,6 +25,7 @@ errStatusUnavailable=10
  errPickSession=11
  errCopyKey=12
  errAttachScreen=13
+errRecycleSession=14
  
  # error codes not treated as errors (100 to 140)
  errStatusRunning=100
@@ -33,15 +34,15 @@ errStatusDoneOk=102
  errStatusDoneFail=103
  
  # thresholds
-maxVmLaunchAttempts=10
+maxVmLaunchAttempts=100
  maxSshConnectAttempts=400
-maxVmAddressWait=120
+maxVmAddressWait=200
  
  # working directory prefix
-sessionPrefix='/tmp/alirelval'
+sessionPrefix="$HOME/.alice-release-validation"
  
-# screen name for the validation
-screenName='AliceReleaseValidation'
+# screen name: <screenPrefix>-<sessionTag>
+screenPrefix='AliRelVal'
  
  # program name
  Prog=$(basename "$0")
@@ -96,7 +97,7 @@ function RunVM() {
    userData="$3"
    keyName="$4"
    keyFile="$5"
-  local raw iip iid ret attempt createdKeypair
+  local raw iip iid ret attempt createdKeypair error
  
    # keypair part: if file does not exist, create keypair
    if [ ! -e "$keyFile" ] ; then
@@ -121,9 +122,13 @@ function RunVM() {
          ( euca-delete-keypair "$keyName" ; rm -f "$keyFile" ) > /dev/null 2>&1
        fi
        return $errRunVm
+    elif [ $attempt != 1 ] ; then
+      pr " * Pausing between retries"
+      sleep 5
      fi
  
      pr -n " * Launching VM (attempt #$attempt/$maxVmLaunchAttempts)..."
+    error=0
  
      raw=$( euca-run-instances "$imageId" -t "$profile" -d "$userData" -k "$keyName" 2>&1 )
      ret=$?
@@ -150,7 +155,8 @@ function RunVM() {
        # error state?
        echo "$raw" | grep -i error -q
        if [ $? == 0 ] ; then
-        pr ; pr -n " * VM went to error state"
+        pr ; pr " * VM went to error state"
+        error=1
          break
        fi
  
@@ -173,7 +179,8 @@ function RunVM() {
      fi
  
      # we don't: terminate (timeout)
-    pr 'timeout' ; pr " * Terminating instance $iid"
+    [ "$error" != 1 ] && pr 'timeout'
+    pr " * Terminating instance $iid"
      euca-terminate-instances "$iid" > /dev/null 2>&1
  
    done
@@ -187,13 +194,20 @@ function RunVM() {
  
  # Prepare the validation session directory. Syntax:
  #
-#   PrepareSession <aliroot_tag>
+#   PrepareSession <aliroot_tag> <new_session_name>
  #
  # Returns 0 on success, nonzero on failure. Session tag returned on stdout.
  function PrepareSession() {
    local aliRootTag sessionTag sessionDir
    aliRootTag="$1"
-  sessionTag="${aliRootTag}_$(ndate)"
+
+  # session tag can be "auto" or any user-specified value
+  if [ "$2" != 'auto' ] ; then
+    sessionTag="$2"
+  else
+    sessionTag="${aliRootTag}_$(ndate)"
+  fi
+  shift 2
    sessionDir="$sessionPrefix/$sessionTag"
  
    # session directory already exists? abort
@@ -212,10 +226,11 @@ function PrepareSession() {
    # aliroot version written to a file
    echo "$aliRootTag" > "$sessionDir/aliroot-version.txt"
  
-  # benchmark script, configuration and file list
-  cp benchmark.sh benchmark.config files.list "$sessionDir/"
+  # benchmark script, benchmark config, cloud config and file list
+  cp -L benchmark.sh cloud.config benchmark.config files.list "$sessionDir/"
    if [ $? != 0 ] ; then
-    pr "Cannot copy benchmark configuration and script to $sessionDir"
+    pr "Cannot copy configuration files to $sessionDir"
+    rm -rf "$sessionDir"
      return $errSessionDir
    fi
  
@@ -229,6 +244,24 @@ function PrepareSession() {
      ) >> "$sessionDir/benchmark.config"
    done
  
+  # command-line options override the configuration
+  if [ $# != 0 ] ; then
+    pr "Note: the following command-line options will override the corresponding ones in the config files:"
+    ( echo ''
+      echo "### from the command line ###"
+      while [ $# -gt 0 ] ; do
+        extraName="${1%%=*}"
+        extraVal="${1#*=}"
+        if [ "$extraName" != "$1" ] ; then
+          pr " * $extraName = $extraVal"
+          echo "$1"
+        fi
+        shift
+      done
+      echo ''
+    ) >> "$sessionDir/benchmark.config"
+  fi
+
    # success: return the session tag and move to the session directory
    pr "*** Creating new working session: $sessionTag ***"
    pr "*** Use this name for future session operations ***"
@@ -241,6 +274,24 @@ function PrepareSession_Undo() {
    rm -rf "$sessionPrefix/$1"
  }
  
+# Recycle the VM from an existing session
+function RecycleSession() {
+  local sessionTag="$1"
+  local fromSessionTag="$2"
+  local fromSessionDir="$sessionPrefix/$fromSessionTag"
+  local f
+
+  for f in 'instance-id.txt' 'instance-address.txt' 'key.pem' ; do
+    cp -L "$fromSessionDir/$f" "$f" > /dev/null 2>&1
+    if [ $? != 0 ] ; then
+      pr "Cannot copy $f from the source session dir $fromSessionDir"
+      return $errRecycleSession
+    fi
+  done
+
+  return 0
+}
+
  # Move into the session tag directory. Usage:
  #
  #   MoveToSessionDir <session_tag>
@@ -259,7 +310,7 @@ function MoveToSessionDir_Undo() {
  
  # Load the benchmark configuration
  function LoadConfig() {
-  source benchmark.config > /dev/null 2>&1
+  source cloud.config > /dev/null 2>&1
    if [ $? != 0 ] ; then
      pr "Cannot load benchmark configuration"
      return $errCfg
@@ -290,7 +341,8 @@ function InstantiateValidationVM() {
    elif [ -e "$cloudKeyFile" ] ; then
      # copy key to session dir
      pr -n "Copying private key $cloudKeyFile to session directory..."
-    cp "$cloudKeyFile" 'key.pem' 2> /dev/null
+    rm -f 'key.pem'
+    cp -L "$cloudKeyFile" 'key.pem' 2> /dev/null
      if [ $? != 0 ] ; then
        pr 'error'
        return $errCopyKey
@@ -357,8 +409,9 @@ function Shell() {
  
  # Checks status of the validation
  function Status() {
-  local raw ret screen exitcode
-  raw=$( VMSSH -t "screen -ls 2> /dev/null | grep -q .$screenName && echo -n 'screen_yes ' || echo -n 'screen_no ' ; cat alirelval/validation.done 2> /dev/null || echo 'not_done' ; true" 2> /dev/null )
+  local raw ret screen exitcode sessionTag
+  sessionTag="$1"
+  raw=$( VMSSH -t "screen -ls 2> /dev/null | grep -q .${screenPrefix}-${sessionTag} && echo -n 'screen_yes ' || echo -n 'screen_no ' ; cat $sessionTag/validation.done 2> /dev/null || echo 'not_done' ; true" 2> /dev/null )
    raw=$( echo "$raw" | tr -cd '[:alnum:]_ ' ) # garbage removal
    ret=$?
  
@@ -419,10 +472,11 @@ function Validate() {
    # create helper script to launch benchmark
    cat > run-benchmark.sh <<_EoF_
  #!/bin/bash
+export LANG=C
  cd \$(dirname "\$0")
  v=validation.done
  rm -f \$v
-env ALIROOT_VERSION=$(cat aliroot-version.txt) ./benchmark.sh run $sessionTag files.list benchmark.config
+env ALIROOT_VERSION=$(cat aliroot-version.txt) ./benchmark.sh run $sessionTag files.list benchmark.config 2>&1 | tee run-benchmark.log
  #sleep 1000
  ret=\$?
  echo \$ret > \$v
@@ -435,12 +489,12 @@ _EoF_
  
    # transfer files
    pr 'Transferring files to the VM'
-  rsync -av -e "$(VMSSH --rsync-cmd)" $PWD/ $cloudUserName@$instanceIp:alirelval/ || return $errLaunchValidation
+  rsync -av -e "$(VMSSH --rsync-cmd)" $PWD/ $cloudUserName@$instanceIp:$sessionTag/ || return $errLaunchValidation
  
    # open a screen that does something; note that the command is not executed if
    # the screen already exists, which is what we want
    # note: sleep necessary to avoid "dead" screens
-  VMSSH -t "screen -wipe > /dev/null 2>&1 ; if screen -ls | grep -q $screenName ; then ret=42 ; else screen -dmS AliceReleaseValidation alirelval/run-benchmark.sh ; ret=0 ; sleep 3 ; fi ; exit \$ret"
+  VMSSH -t "screen -wipe > /dev/null 2>&1 ; if screen -ls | grep -q ${screenPrefix}-${sessionTag} ; then ret=42 ; else screen -dmS ${screenPrefix}-${sessionTag} $sessionTag/run-benchmark.sh ; ret=0 ; sleep 3 ; fi ; exit \$ret"
    ret=$?
  
    # message
@@ -468,7 +522,7 @@ function Attach() {
    local sessionTag
    sessionTag="$1"
  
-  VMSSH -t "( screen -wipe ; screen -rx $screenName ) > /dev/null 2>&1"
+  VMSSH -t "( screen -wipe ; screen -rx ${screenPrefix}-${sessionTag} ) > /dev/null 2>&1"
  
    if [ $? != 0 ] ; then
      pr "Cannot attach screen: check if validation is running with:"
@@ -483,7 +537,8 @@ function Attach() {
  
  # Pick session interactively
  function PickSession() {
-  local sessionTag sess listSessions
+  local sessionTag sess listSessions mess
+  mess="$1"
    listSessions=()
    mkdir -p "$sessionPrefix"
  
@@ -497,6 +552,9 @@ function PickSession() {
      return $errPickSession
    fi
  
+  # print user message if provided
+  [ "$mess" != '' ] && pr "$mess"
+
    pr 'Available sessions (most recent first):'
    for ((i=0; i<${#listSessions[@]}; i++)) ; do
      pr "$( printf "  % 2d. ${listSessions[$i]}" $((i+1)) )"
@@ -535,15 +593,24 @@ function Help() {
    pr "$Prog -- by Dario Berzano <dario.berzano@cern.ch>"
    pr 'Controls the Release Validation workflow on the cloud for AliRoot.'
    pr
-  pr "Usage 1: $Prog [--prepare|--launch] --aliroot <aliroot_tag>"
+  pr "Usage 1: $Prog [--prepare|--launch|--recycle] [--from-session] --aliroot <aliroot_tag> [--session <custom_session_tag>] [-- arbitraryOpt1=value [arbitraryOpt2=value2...]]"
    pr
    pr 'A new session is created to validate the specified AliRoot tag.'
    pr
    pr '  --prepare  : prepares the session directory containing the files needed'
-  pr '              for the validation'
+  pr '               for the validation'
+  pr '  --recycle  : prepares a new session by recycling the head node from an'
+  pr '               existing one. Source session is specified via the'
+  pr '               --from-session switch or it can be interactively selected'
    pr '  --launch   : launches the full validation process: prepares session,'
-  pr '              runs the virtual machine, launches the validation program'
+  pr '               runs the virtual machine, launches the validation program'
    pr '  --aliroot  : the AliRoot tag to validate, in the form "vAN-20140610"'
+  pr '  --session  : custom session name to provide to the validation session:'
+  pr '               if omitted, defaults to <aliroot_tag>_<utc_datetime_now>'
+  pr
+  pr 'Arbitrary options (in the form variable=value) can be specified after the'
+  pr 'double dash and will override the corresponding options in any of the'
+  pr 'configuration files.'
    pr ; pr
    pr "Usage 2: $Prog [--runvm|--validate|--shell|--status] --session <session_tag>"
    pr
@@ -580,7 +647,7 @@ function Help() {
  function Main() {
  
    # local variables
-  local Args aliRootTag EnterShell Actions sessionTag
+  local Args aliRootTag EnterShell Actions sessionTag fromSessionTag
    Actions=()
  
    # parse command line options
@@ -596,6 +663,10 @@ function Main() {
          sessionTag="$2"
          shift 2
        ;;
+      --from-session)
+        fromSessionTag="$2"
+        shift 2
+      ;;
  
        # actions
        --launch)
@@ -607,6 +678,10 @@ function Main() {
          Actions=( PrepareSession MoveToSessionDir )
          shift
        ;;
+      --recycle)
+        Actions=( PrepareSession MoveToSessionDir RecycleSession )
+        shift
+      ;;
        --runvm)
          Actions=( MoveToSessionDir LoadConfig InstantiateValidationVM )
          shift
@@ -634,13 +709,18 @@ function Main() {
          exit 0
        ;;
  
+      # end of options
+      --)
+        shift
+        break
+      ;;
+
        *)
          pr "Invalid option: $1. Use --help for assistance."
          return $errInvalidOpt
        ;;
      esac
    done
-  shift # --
  
    # check for the presence of the required tools in the $PATH
    for T in euca-describe-instances euca-describe-regions euca-run-instances euca-create-keypair euca-delete-keypair rsync ; do
@@ -679,17 +759,23 @@ function Main() {
          pr 'Specify an AliRoot version with --aliroot <tag>'
          return $errInvalidOpt
        fi
-      if [ "$sessionTag" != '' ] ; then
-        pr 'Cannot use --session with --prepare. Use --help for assistance.'
-        return $errInvalidOpt
+      [ "$sessionTag" == '' ] && sessionTag='auto'
+      sessionTag=$( RunAction "$A" "$aliRootTag" "$sessionTag" "$@" )
+      ret=$?
+    elif [ "$A" == 'RecycleSession' ] ; then
+      # special action requiring additional parameters
+      if [ "$fromSessionTag" == '' ] ; then
+        fromSessionTag=$( PickSession 'Select a source session to recycle.' )
+        ret=$?
+        [ $ret != 0 ] && break
        fi
-      sessionTag=$( RunAction "$A" "$aliRootTag" )
+      RunAction "$A" "$sessionTag" "$fromSessionTag"
        ret=$?
      else
        if [ "$sessionTag" == '' ] ; then
          sessionTag=$( PickSession )
          ret=$?
-        [ $ret != 0 ] && return $ret
+        [ $ret != 0 ] && break
        fi
        RunAction "$A" "$sessionTag"
        ret=$?