#!/bin/bash

#
# alirelval -- by Dario Berzano <dario.berzano@cern.ch>
#
# Controls the release validation submission by managing the validation virtual
# cluster.
#

#
# Variables
#

# error codes
errCfg=1
errMissingCmd=2
errEc2Auth=3
errInvalidOpt=4
errSessionDir=5
errCreateKey=6
errRunVm=7
errLaunchValidation=8
errSshNotReady=9
errStatusUnavailable=10
errPickSession=11
errCopyKey=12
errAttachScreen=13
errRecycleSession=14

# error codes not treated as errors (100 to 140)
errStatusRunning=100
errStatusNotRunning=101
errStatusDoneOk=102
errStatusDoneFail=103

# thresholds
maxVmLaunchAttempts=100
maxSshConnectAttempts=400
maxVmAddressWait=200

# working directory prefix
sessionPrefix="$HOME/.alice-release-validation"

# screen name: <screenPrefix>-<sessionTag>
screenPrefix='AliRelVal'

# program name
Prog=$(basename "$0")

#
# Functions
#

# Pretty print
function pr() {
  local nl
  if [ "$1" == '-n' ] ; then
    nl="-n"
    shift
  fi
  echo $nl -e "\033[1m$@\033[m" >&2
}

# Nice date in UTC
function ndate() {
  date -u +%Y%m%d-%H%M%S-utc
}

# Temporary file
function tmpf() {
  mktemp /tmp/alirelval-XXXX
}

# Swallow output. Show only if something goes wrong
function swallow() {
  local tout ret
  tout=$(tmpf)
  "$@" > "$tout" 2>&1
  ret=$?
  if [ $ret != 0 ] ; then
    pr "Command failed (exit status: $ret): $@"
    cat "$tout" >&2
  fi
  rm -f "$tout"
  return $ret
}

# Launch a VM. Create the keypair if the given keyfile does not exist. Syntax:
#
#   RunVM <image_id> <profile> <user_data> <key_name> <key_file>
#
# Returns 0 on success, nonzero on failure. IP address is returned on stdout.
function RunVM() {
  local imageId profile userData keyName
  imageId="$1"
  profile="$2"
  userData="$3"
  keyName="$4"
  keyFile="$5"
  local raw iip iid ret attempt createdKeypair error

  # keypair part: if file does not exist, create keypair
  if [ ! -e "$keyFile" ] ; then
    pr "Creating a new keypair: $keyName (private key: $keyFile)"
    swallow euca-create-keypair -f "$keyFile" "$keyName"
    if [ $? != 0 ] ; then
      pr 'Problems creating the keypair'
      return $errCreateKey
    fi
    createdKeypair=1
  fi

  attempt=0
  pr 'Attempting to run virtual machine'

  # resubmit loop
  while true ; do

    if [ $((++attempt)) -gt $maxVmLaunchAttempts ] ; then
      pr " * Reached maximum number of attempts, giving up"
      if [ "$createdKeypair" == 1 ] ; then
        ( euca-delete-keypair "$keyName" ; rm -f "$keyFile" ) > /dev/null 2>&1
      fi
      return $errRunVm
    elif [ $attempt != 1 ] ; then
      pr " * Pausing between retries"
      sleep 5
    fi

    pr -n " * Launching VM (attempt #$attempt/$maxVmLaunchAttempts)..."
    error=0

    raw=$( euca-run-instances "$imageId" -t "$profile" -d "$userData" -k "$keyName" 2>&1 )
    ret=$?
    iid=$( echo "$raw" | egrep '^INSTANCE' | head -n1 | awk '{ print $2 }' )
    if [ $ret != 0 ] || [ "$iid" == '' ] ; then
      # 'hard' error, but can be temporary
      pr 'error: message follows'
      echo "$raw" >&2
      sleep 1
      continue
    else
      pr 'ok'
    fi

    pr " * VM has instance ID $iid"
    pr -n " * Waiting for IP address..."

    # wait for address loop
    iip=''
    for ((i=0; i<$maxVmAddressWait; i++)) ; do
      sleep 1
      raw=$( euca-describe-instances 2>&1 | grep -E '^INSTANCE' | grep "$iid" | head -n1 )

      # error state?
      echo "$raw" | grep -i error -q
      if [ $? == 0 ] ; then
        pr ; pr " * VM went to error state"
        error=1
        break
      fi

      # no error: try to parse address (NOTE: only IPv4 for the moment)
      iip=$( echo "$raw" | grep -oE '([0-9]{1,3}\.){3}[0-9]{1,3}' )
      if [ "$iip" != '' ] ; then
        pr
        break
      fi

      # no address
      pr -n '.'

    done

    # do we have address?
    if [ "$iip" != '' ] ; then
      pr " * VM has address $iip"
      break
    fi

    # we don't: terminate (timeout)
    [ "$error" != 1 ] && pr 'timeout'
    pr " * Terminating instance $iid"
    euca-terminate-instances "$iid" > /dev/null 2>&1

  done

  # success
  [ "$createdKeypair" == 1 ] && euca-delete-keypair "$keyName" > /dev/null 2>&1
  echo "$iid $iip" # must be parsed
  return 0

}

# Prepare the validation session directory. Syntax:
#
#   PrepareSession <aliroot_tag> <new_session_name>
#
# Returns 0 on success, nonzero on failure. Session tag returned on stdout.
function PrepareSession() {
  local aliRootTag sessionTag sessionDir
  aliRootTag="$1"

  # session tag can be "auto" or any user-specified value
  if [ "$2" != 'auto' ] ; then
    sessionTag="$2"
  else
    sessionTag="${aliRootTag}_$(ndate)"
  fi
  shift 2
  sessionDir="$sessionPrefix/$sessionTag"

  # session directory already exists? abort
  if [ -d "$sessionDir" ] ; then
    pr "Session directory already exists, aborting"
    return $errSessionDir
  fi

  # create working directory
  mkdir -p "$sessionDir"
  if [ $? != 0 ] ; then
    pr "Fatal: cannot create session directory $sessionDir"
    return $errSessionDir
  fi

  # aliroot version written to a file
  echo "$aliRootTag" > "$sessionDir/aliroot-version.txt"

  # benchmark script, benchmark config, cloud config and file list
  cp -L benchmark.sh cloud.config benchmark.config files.list "$sessionDir/"
  if [ $? != 0 ] ; then
    pr "Cannot copy configuration files to $sessionDir"
    rm -rf "$sessionDir"
    return $errSessionDir
  fi

  # append local files to the configuration
  for f in benchmark.config.d/*.config ; do
    [ ! -e "$f" ] && continue
    ( echo ''
      echo "### from $f ###"
      cat $f
      echo ''
    ) >> "$sessionDir/benchmark.config"
  done

  # command-line options override the configuration
  if [ $# != 0 ] ; then
    pr "Note: the following command-line options will override the corresponding ones in the config files:"
    ( echo ''
      echo "### from the command line ###"
      while [ $# -gt 0 ] ; do
        extraName="${1%%=*}"
        extraVal="${1#*=}"
        if [ "$extraName" != "$1" ] ; then
          pr " * $extraName = $extraVal"
          echo "$1"
        fi
        shift
      done
      echo ''
    ) >> "$sessionDir/benchmark.config"
  fi

  # success: return the session tag and move to the session directory
  pr "*** Creating new working session: $sessionTag ***"
  pr "*** Use this name for future session operations ***"
  echo "$sessionTag"
  return 0
}

# Undo the previous action
function PrepareSession_Undo() {
  rm -rf "$sessionPrefix/$1"
}

# Recycle the VM from an existing session
function RecycleSession() {
  local sessionTag="$1"
  local fromSessionTag="$2"
  local fromSessionDir="$sessionPrefix/$fromSessionTag"
  local f

  for f in 'instance-id.txt' 'instance-address.txt' 'key.pem' ; do
    cp -L "$fromSessionDir/$f" "$f" > /dev/null 2>&1
    if [ $? != 0 ] ; then
      pr "Cannot copy $f from the source session dir $fromSessionDir"
      return $errRecycleSession
    fi
  done

  return 0
}

# Move into the session tag directory. Usage:
#
#   MoveToSessionDir <session_tag>
#
# Returns 0 on success, nonzero on error.
function MoveToSessionDir() {
  originalWorkDir="$PWD"
  cd "$sessionPrefix/$sessionTag" || return $errSessionDir
  return 0
}

# Undo the previous action
function MoveToSessionDir_Undo() {
  cd "$originalWorkDir"
}

# Load the benchmark configuration
function LoadConfig() {
  source cloud.config > /dev/null 2>&1
  if [ $? != 0 ] ; then
    pr "Cannot load benchmark configuration"
    return $errCfg
  fi
  return 0
}

# Instantiate the validation VM
function InstantiateValidationVM() {
  local sessionTag instanceId instanceIp ret raw
  sessionTag="$1"

  # check if we already have a vm
  instanceId="$(cat instance-id.txt 2> /dev/null)"
  if [ "$instanceId" != '' ] ; then
    pr "Virtual machine $instanceId is already running"
    return 0 # consider it a success
  else
    rm -f instance-id.txt instance-address.txt
  fi

  # do we need to create a keypair?
  if [ "$cloudKeyName" == '' ] ; then
    pr "Note: temporary SSH keys will be created for this VM"
    cloudKeyName="$sessionTag"
    cloudKeyFile="$PWD/key.pem"
    rm -f "$cloudKeyFile"
  elif [ -e "$cloudKeyFile" ] ; then
    # copy key to session dir
    pr -n "Copying private key $cloudKeyFile to session directory..."
    rm -f 'key.pem'
    cp -L "$cloudKeyFile" 'key.pem' 2> /dev/null
    if [ $? != 0 ] ; then
      pr 'error'
      return $errCopyKey
    else
      pr 'ok'
    fi
    cloudKeyFile="$PWD/key.pem"
  else
    pr "Cannot find private key to access virtual machines: $cloudKeyFile"
    return $errCopyKey
  fi

  # launch virtual machine and get its address
  raw=$( RunVM "$cloudImageId" "$cloudProfile" "$cloudUserData" "$cloudKeyName" "$cloudKeyFile" )
  ret=$?

  if [ $ret == 0 ] ; then
    instanceId=$( echo $raw | cut -d' ' -f1 )
    instanceIp=$( echo $raw | cut -d' ' -f2 )

    # write both parameters to files
    echo $instanceId > 'instance-id.txt'
    echo $instanceIp > 'instance-address.txt'
  fi

  return $ret
}

# Undo the previous action
function InstantiateValidationVM_Undo() {
  local sessionTag
  sessionTag="$1"
  if [ -e 'instance-id.txt' ] ; then
    swallow euca-terminate-instances $(cat instance-id.txt)
    if [ $? == 0 ] ; then
      rm -f instance-id.txt instance-address.txt key.pem
    fi
  fi
}

# Generic SSH function to the VM
function VMSSH() {
  local instanceIp sshParams ret
  instanceIp=$(cat instance-address.txt 2> /dev/null)
  sshParams="-oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no -oPasswordAuthentication=no -i $PWD/key.pem"

  if [ "$1" == '--rsync-cmd' ] ; then
    shift
    echo ssh $sshParams "$@"
    ret=0
  else
    ssh $sshParams "$cloudUserName"@"$instanceIp" "$@"
    ret=$?
  fi
  return $ret
}

# Opens a shell to the remote VM
function Shell() {
  local sessionTag
  sessionTag="$1"
  VMSSH
}

# Checks status of the validation
function Status() {
  local raw ret screen exitcode sessionTag
  sessionTag="$1"
  raw=$( VMSSH -t "screen -ls 2> /dev/null | grep -q .${screenPrefix}-${sessionTag} && echo -n 'screen_yes ' || echo -n 'screen_no ' ; cat $sessionTag/validation.done 2> /dev/null || echo 'not_done' ; true" 2> /dev/null )
  raw=$( echo "$raw" | tr -cd '[:alnum:]_ ' ) # garbage removal
  ret=$?

  if [ "$ret" != 0 ] ; then
    pr "Cannot get status"
    return $errStatusUnavailable
  fi

  screen="${raw%% *}"
  exitcode="${raw#* }"

  if [ "$screen" == 'screen_yes' ] ; then
    pr 'Status: validation still running'
    return $errStatusRunning
  else
    if [ "$exitcode" == 'not_done' ] ; then
      pr 'Status: validation not running'
      return $errStatusNotRunning
    elif [ "$exitcode" == 0 ] ; then
      pr 'Status: validation completed successfully'
      return $errStatusDoneOk
    else
      pr "Status: validation finished with errors (exitcode: $exitcode)"
      return $errStatusDoneFail
    fi
  fi

}

# Wait for host to be ready
function WaitSsh() {
  local attempt error
  attempt=0
  pr -n 'Waiting for the VM to accept SSH connections...'

  while ! VMSSH -Tq true > /dev/null 2>&1 ; do
    if [ $((++attempt)) -gt $maxSshConnectAttempts ] ; then
      pr 'timeout'
      error=1
      break
    fi
    pr -n '.'
    sleep 3
  done

  [ "$error" == 1 ] && return $errSshNotReady
  pr 'ok'
  return 0
}

# Run the validation
function Validate() {
  local instanceIp sshParams sessionTag
  sessionTag="$1"
  instanceIp=$(cat instance-address.txt 2> /dev/null)
  sshParams="-oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no -oPasswordAuthentication=no -i $PWD/key.pem"

  # create helper script to launch benchmark
  cat > run-benchmark.sh <<_EoF_
#!/bin/bash
cd \$(dirname "\$0")
v=validation.done
rm -f \$v
env ALIROOT_VERSION=$(cat aliroot-version.txt) ./benchmark.sh run $sessionTag files.list benchmark.config
#sleep 1000
ret=\$?
echo \$ret > \$v
echo ; echo ; echo
echo "*** Validation finished with exitcode \$ret ***"
echo ; echo ; echo
read -p 'Press ENTER to dismiss: automatic dismiss in 60 seconds...' -t 60
_EoF_
  chmod +x run-benchmark.sh

  # transfer files
  pr 'Transferring files to the VM'
  rsync -av -e "$(VMSSH --rsync-cmd)" $PWD/ $cloudUserName@$instanceIp:$sessionTag/ || return $errLaunchValidation

  # open a screen that does something; note that the command is not executed if
  # the screen already exists, which is what we want
  # note: sleep necessary to avoid "dead" screens
  VMSSH -t "screen -wipe > /dev/null 2>&1 ; if screen -ls | grep -q ${screenPrefix}-${sessionTag} ; then ret=42 ; else screen -dmS ${screenPrefix}-${sessionTag} $sessionTag/run-benchmark.sh ; ret=0 ; sleep 3 ; fi ; exit \$ret"
  ret=$?

  # message
  if [ $ret == 42 ] ; then
    pr 'Validation already running inside a screen.'
  else
    pr 'Validation launched inside a screen.'
  fi

  pr
  pr 'Check the progress status with:'
  pr "  $Prog --session $sessionTag --status"
  pr 'Attach to the screen for debug:'
  pr "  $Prog --session $sessionTag --attach"
  pr 'Open a shell to the virtual machine:'
  pr "  $Prog --session $sessionTag --shell"
  pr

  # ignore ssh errors
  return 0
}

# Attach current validation screen, if possible
function Attach() {
  local sessionTag
  sessionTag="$1"

  VMSSH -t "( screen -wipe ; screen -rx ${screenPrefix}-${sessionTag} ) > /dev/null 2>&1"

  if [ $? != 0 ] ; then
    pr "Cannot attach screen: check if validation is running with:"
    pr "  $Prog --session $sessionTag --status"
    pr "or connect manually to the VM for debug:"
    pr "  $Prog --session $sessionTag --attach"
    return $errAttachScreen
  fi

  return 0
}

# Pick session interactively
function PickSession() {
  local sessionTag sess listSessions mess
  mess="$1"
  listSessions=()
  mkdir -p "$sessionPrefix"

  while read sess ; do
    [ ! -d "$sessionPrefix/$sess" ] && continue
    listSessions+=( $sess )
  done < <( cd $sessionPrefix ; ls -1t )

  if [ ${#listSessions[@]} == 0 ] ; then
    pr "No session available in session directory $sessionPrefix"
    return $errPickSession
  fi

  # print user message if provided
  [ "$mess" != '' ] && pr "$mess"

  pr 'Available sessions (most recent first):'
  for ((i=0; i<${#listSessions[@]}; i++)) ; do
    pr "$( printf "  % 2d. ${listSessions[$i]}" $((i+1)) )"
  done
  pr -n 'Pick one: '
  read i

  let i--
  if [ "$i" -lt 0 ] || [ "${listSessions[$i]}" == '' ] ; then
    pr 'Invalid session'
    return $errPickSession
  fi

  sess="${listSessions[$i]}"
  pr "You chose session $sess"
  echo $sess
  return 0
}

# Run an action
function RunAction() {
  local ret
  type "$1" > /dev/null 2>&1
  if [ $? == 0 ] ; then
    #pr "--> $1 (wd: $PWD)"
    eval "$@"
    ret=$?
    #pr "<-- $1 (ret: $ret, wd: $PWD)"
    return $ret
  fi
  return 0
}

# Print help screen
function Help() {
  pr "$Prog -- by Dario Berzano <dario.berzano@cern.ch>"
  pr 'Controls the Release Validation workflow on the cloud for AliRoot.'
  pr
  pr "Usage 1: $Prog [--prepare|--launch|--recycle] [--from-session] --aliroot <aliroot_tag> [--session <custom_session_tag>] [-- arbitraryOpt1=value [arbitraryOpt2=value2...]]"
  pr
  pr 'A new session is created to validate the specified AliRoot tag.'
  pr
  pr '  --prepare  : prepares the session directory containing the files needed'
  pr '               for the validation'
  pr '  --recycle  : prepares a new session by recycling the head node from an'
  pr '               existing one. Source session is specified via the'
  pr '               --from-session switch or it can be interactively selected'
  pr '  --launch   : launches the full validation process: prepares session,'
  pr '               runs the virtual machine, launches the validation program'
  pr '  --aliroot  : the AliRoot tag to validate, in the form "vAN-20140610"'
  pr '  --session  : custom session name to provide to the validation session:'
  pr '               if omitted, defaults to <aliroot_tag>_<utc_datetime_now>'
  pr
  pr 'Arbitrary options (in the form variable=value) can be specified after the'
  pr 'double dash and will override the corresponding options in any of the'
  pr 'configuration files.'
  pr ; pr
  pr "Usage 2: $Prog [--runvm|--validate|--shell|--status] --session <session_tag>"
  pr
  pr 'Runs the validation step by step after a session is created with'
  pr '--prepare, and runs other actions on a certain session.'
  pr
  pr '  --session  : session identifier, e.g. vAN-20140610_20140612-123047-utc:'
  pr '               if no session is specified an interactive prompt is'
  pr '               presented'
  pr '  --runvm    : instantiates the head node of the validation cluster on'
  pr '               the cloud' 
  pr '  --validate : runs the validation script on the head node for the'
  pr '               current session. Head node must be already up, or it'
  pr '               should be created with --runvm. If validation is running'
  pr '               already, connects to the existing validation shell'
  pr '  --attach   : attach a currently running validation screen; remember to'
  pr '               detach with Ctrl+A+D (and *not* Ctrl-C)'
  pr '  --shell    : does SSH on the head node'
  pr '  --status   : returns the status of the validation'
  pr ; pr
  pr 'Example 1: run the validation of AliRoot tag vAN-20140610:'
  pr
  pr "  $Prog --aliroot vAN-20140610 --launch"
  pr
  pr 'Example 2: do the same thing step-by-step:'
  pr
  pr "  $Prog --aliroot vAN-20140610 --prepare"
  pr "  $Prog --runvm"
  pr "  $Prog --validate"
  pr
}

# The main function
function Main() {

  # local variables
  local Args aliRootTag EnterShell Actions sessionTag fromSessionTag
  Actions=()

  # parse command line options
  while [ $# -gt 0 ] ; do
    case "$1" in

      # options
      --aliroot|-a)
        aliRootTag="$2"
        shift 2
      ;;
      --session)
        sessionTag="$2"
        shift 2
      ;;
      --from-session)
        fromSessionTag="$2"
        shift 2
      ;;

      # actions
      --launch)
        # all actions
        Actions=( PrepareSession MoveToSessionDir LoadConfig InstantiateValidationVM WaitSsh Validate )
        shift
      ;;
      --prepare)
        Actions=( PrepareSession MoveToSessionDir )
        shift
      ;;
      --recycle)
        Actions=( PrepareSession MoveToSessionDir RecycleSession )
        shift
      ;;
      --runvm)
        Actions=( MoveToSessionDir LoadConfig InstantiateValidationVM )
        shift
      ;;
      --validate)
        Actions=( MoveToSessionDir LoadConfig WaitSsh Validate )
        shift
      ;;
      --attach)
        Actions=( MoveToSessionDir LoadConfig WaitSsh Attach )
        shift
      ;;

      # extra actions
      --shell)
        Actions=( MoveToSessionDir LoadConfig WaitSsh Shell )
        shift
      ;;
      --status)
        Actions=( MoveToSessionDir LoadConfig WaitSsh Status )
        shift
      ;;
      --help)
        Help
        exit 0
      ;;

      # end of options
      --)
        shift
        break
      ;;

      *)
        pr "Invalid option: $1. Use --help for assistance."
        return $errInvalidOpt
      ;;
    esac
  done

  # check for the presence of the required tools in the $PATH
  for T in euca-describe-instances euca-describe-regions euca-run-instances euca-create-keypair euca-delete-keypair rsync ; do
    which "$T" > /dev/null 2>&1
    if [ $? != 0 ] ; then
      pr "Cannot find one of the required commands: $T"
      return $errMissingCmd
    fi
  done

  # test EC2 credentials
  # euca-describe-regions > /dev/null 2>&1
  # if [ $? != 0 ] ; then
  #   pr 'Cannot authenticate to EC2.'
  #   pr 'Note: you must have at least the following variables properly set in your environment:'
  #   pr "  * EC2_URL (current value: ${EC2_URL-<not set>})"
  #   pr "  * EC2_ACCESS_KEY (current value: ${EC2_ACCESS_KEY-<not set>})"
  #   pr "  * EC2_SECRET_KEY (current value: ${EC2_SECRET_KEY-<not set>})"
  #   return $errEc2Auth
  # fi

  # what to do?
  if [ ${#Actions[@]} == 0 ] ; then
    pr 'Nothing to do. Use --help for assistance.'
    return $errInvalidOpt
  fi

  # run actions
  for ((i=0; i<${#Actions[@]}; i++)) ; do

    A=${Actions[$i]}

    if [ "$A" == 'PrepareSession' ] ; then
      # special action returning the session tag
      if [ "$aliRootTag" == '' ] ; then
        pr 'Specify an AliRoot version with --aliroot <tag>'
        return $errInvalidOpt
      fi
      [ "$sessionTag" == '' ] && sessionTag='auto'
      sessionTag=$( RunAction "$A" "$aliRootTag" "$sessionTag" "$@" )
      ret=$?
    elif [ "$A" == 'RecycleSession' ] ; then
      # special action requiring additional parameters
      if [ "$fromSessionTag" == '' ] ; then
        fromSessionTag=$( PickSession 'Select a source session to recycle.' )
        ret=$?
        [ $ret != 0 ] && break
      fi
      RunAction "$A" "$sessionTag" "$fromSessionTag"
      ret=$?
    else
      if [ "$sessionTag" == '' ] ; then
        sessionTag=$( PickSession )
        ret=$?
        [ $ret != 0 ] && break
      fi
      RunAction "$A" "$sessionTag"
      ret=$?
    fi

    # 100 to 140 --> not errors
    ( [ $ret != 0 ] && ( [ $ret -ge 100 ] || [ $ret -le 140 ] ) ) && break

  done

  # undo actions
  let i--
  if [ $ret != 0 ] && ( [ $ret -ge 100 ] || [ $ret -le 140 ] ) ; then
    for ((; i>=0; i--)) ; do
      RunAction "${Actions[$i]}_Undo" "$sessionTag"
    done
  fi

  # return last value
  return $ret

}

#
# Entry point
#

Main "$@" || exit $?