#!/bin/bash # # alirelval -- by Dario Berzano # # Controls the release validation submission by managing the validation virtual # cluster. # # # Variables # # error codes errCfg=1 errMissingCmd=2 errEc2Auth=3 errInvalidOpt=4 errSessionDir=5 errCreateKey=6 errRunVm=7 errLaunchValidation=8 errSshNotReady=9 errStatusUnavailable=10 errPickSession=11 errCopyKey=12 errAttachScreen=13 errRecycleSession=14 # error codes not treated as errors (100 to 140) errStatusRunning=100 errStatusNotRunning=101 errStatusDoneOk=102 errStatusDoneFail=103 # thresholds maxVmLaunchAttempts=100 maxSshConnectAttempts=400 maxVmAddressWait=200 # working directory prefix sessionPrefix="$HOME/.alice-release-validation" # screen name: - screenPrefix='AliRelVal' # program name Prog=$(basename "$0") # # Functions # # Pretty print function pr() { local nl if [ "$1" == '-n' ] ; then nl="-n" shift fi echo $nl -e "\033[1m$@\033[m" >&2 } # Nice date in UTC function ndate() { date -u +%Y%m%d-%H%M%S-utc } # Temporary file function tmpf() { mktemp /tmp/alirelval-XXXX } # Swallow output. Show only if something goes wrong function swallow() { local tout ret tout=$(tmpf) "$@" > "$tout" 2>&1 ret=$? if [ $ret != 0 ] ; then pr "Command failed (exit status: $ret): $@" cat "$tout" >&2 fi rm -f "$tout" return $ret } # Launch a VM. Create the keypair if the given keyfile does not exist. Syntax: # # RunVM # # Returns 0 on success, nonzero on failure. IP address is returned on stdout. function RunVM() { local imageId profile userData keyName imageId="$1" profile="$2" userData="$3" keyName="$4" keyFile="$5" local raw iip iid ret attempt createdKeypair error # keypair part: if file does not exist, create keypair if [ ! -e "$keyFile" ] ; then pr "Creating a new keypair: $keyName (private key: $keyFile)" swallow euca-create-keypair -f "$keyFile" "$keyName" if [ $? != 0 ] ; then pr 'Problems creating the keypair' return $errCreateKey fi createdKeypair=1 fi attempt=0 pr 'Attempting to run virtual machine' # resubmit loop while true ; do if [ $((++attempt)) -gt $maxVmLaunchAttempts ] ; then pr " * Reached maximum number of attempts, giving up" if [ "$createdKeypair" == 1 ] ; then ( euca-delete-keypair "$keyName" ; rm -f "$keyFile" ) > /dev/null 2>&1 fi return $errRunVm elif [ $attempt != 1 ] ; then pr " * Pausing between retries" sleep 5 fi pr -n " * Launching VM (attempt #$attempt/$maxVmLaunchAttempts)..." error=0 raw=$( euca-run-instances "$imageId" -t "$profile" -d "$userData" -k "$keyName" 2>&1 ) ret=$? iid=$( echo "$raw" | egrep '^INSTANCE' | head -n1 | awk '{ print $2 }' ) if [ $ret != 0 ] || [ "$iid" == '' ] ; then # 'hard' error, but can be temporary pr 'error: message follows' echo "$raw" >&2 sleep 1 continue else pr 'ok' fi pr " * VM has instance ID $iid" pr -n " * Waiting for IP address..." # wait for address loop iip='' for ((i=0; i<$maxVmAddressWait; i++)) ; do sleep 1 raw=$( euca-describe-instances 2>&1 | grep -E '^INSTANCE' | grep "$iid" | head -n1 ) # error state? echo "$raw" | grep -i error -q if [ $? == 0 ] ; then pr ; pr " * VM went to error state" error=1 break fi # no error: try to parse address (NOTE: only IPv4 for the moment) iip=$( echo "$raw" | grep -oE '([0-9]{1,3}\.){3}[0-9]{1,3}' ) if [ "$iip" != '' ] ; then pr break fi # no address pr -n '.' done # do we have address? if [ "$iip" != '' ] ; then pr " * VM has address $iip" break fi # we don't: terminate (timeout) [ "$error" != 1 ] && pr 'timeout' pr " * Terminating instance $iid" euca-terminate-instances "$iid" > /dev/null 2>&1 done # success [ "$createdKeypair" == 1 ] && euca-delete-keypair "$keyName" > /dev/null 2>&1 echo "$iid $iip" # must be parsed return 0 } # Prepare the validation session directory. Syntax: # # PrepareSession # # Returns 0 on success, nonzero on failure. Session tag returned on stdout. function PrepareSession() { local aliRootTag sessionTag sessionDir aliRootTag="$1" # session tag can be "auto" or any user-specified value if [ "$2" != 'auto' ] ; then sessionTag="$2" else sessionTag="${aliRootTag}_$(ndate)" fi shift 2 sessionDir="$sessionPrefix/$sessionTag" # session directory already exists? abort if [ -d "$sessionDir" ] ; then pr "Session directory already exists, aborting" return $errSessionDir fi # create working directory mkdir -p "$sessionDir" if [ $? != 0 ] ; then pr "Fatal: cannot create session directory $sessionDir" return $errSessionDir fi # aliroot version written to a file echo "$aliRootTag" > "$sessionDir/aliroot-version.txt" # benchmark script, benchmark config, cloud config and file list cp -L benchmark.sh cloud.config benchmark.config files.list "$sessionDir/" if [ $? != 0 ] ; then pr "Cannot copy configuration files to $sessionDir" rm -rf "$sessionDir" return $errSessionDir fi # append local files to the configuration for f in benchmark.config.d/*.config ; do [ ! -e "$f" ] && continue ( echo '' echo "### from $f ###" cat $f echo '' ) >> "$sessionDir/benchmark.config" done # command-line options override the configuration if [ $# != 0 ] ; then pr "Note: the following command-line options will override the corresponding ones in the config files:" ( echo '' echo "### from the command line ###" while [ $# -gt 0 ] ; do extraName="${1%%=*}" extraVal="${1#*=}" if [ "$extraName" != "$1" ] ; then pr " * $extraName = $extraVal" echo "$1" fi shift done echo '' ) >> "$sessionDir/benchmark.config" fi # success: return the session tag and move to the session directory pr "*** Creating new working session: $sessionTag ***" pr "*** Use this name for future session operations ***" echo "$sessionTag" return 0 } # Undo the previous action function PrepareSession_Undo() { rm -rf "$sessionPrefix/$1" } # Recycle the VM from an existing session function RecycleSession() { local sessionTag="$1" local fromSessionTag="$2" local fromSessionDir="$sessionPrefix/$fromSessionTag" local f for f in 'instance-id.txt' 'instance-address.txt' 'key.pem' ; do cp -L "$fromSessionDir/$f" "$f" > /dev/null 2>&1 if [ $? != 0 ] ; then pr "Cannot copy $f from the source session dir $fromSessionDir" return $errRecycleSession fi done return 0 } # Move into the session tag directory. Usage: # # MoveToSessionDir # # Returns 0 on success, nonzero on error. function MoveToSessionDir() { originalWorkDir="$PWD" cd "$sessionPrefix/$sessionTag" || return $errSessionDir return 0 } # Undo the previous action function MoveToSessionDir_Undo() { cd "$originalWorkDir" } # Load the benchmark configuration function LoadConfig() { source cloud.config > /dev/null 2>&1 if [ $? != 0 ] ; then pr "Cannot load benchmark configuration" return $errCfg fi return 0 } # Instantiate the validation VM function InstantiateValidationVM() { local sessionTag instanceId instanceIp ret raw sessionTag="$1" # check if we already have a vm instanceId="$(cat instance-id.txt 2> /dev/null)" if [ "$instanceId" != '' ] ; then pr "Virtual machine $instanceId is already running" return 0 # consider it a success else rm -f instance-id.txt instance-address.txt fi # do we need to create a keypair? if [ "$cloudKeyName" == '' ] ; then pr "Note: temporary SSH keys will be created for this VM" cloudKeyName="$sessionTag" cloudKeyFile="$PWD/key.pem" rm -f "$cloudKeyFile" elif [ -e "$cloudKeyFile" ] ; then # copy key to session dir pr -n "Copying private key $cloudKeyFile to session directory..." rm -f 'key.pem' cp -L "$cloudKeyFile" 'key.pem' 2> /dev/null if [ $? != 0 ] ; then pr 'error' return $errCopyKey else pr 'ok' fi cloudKeyFile="$PWD/key.pem" else pr "Cannot find private key to access virtual machines: $cloudKeyFile" return $errCopyKey fi # launch virtual machine and get its address raw=$( RunVM "$cloudImageId" "$cloudProfile" "$cloudUserData" "$cloudKeyName" "$cloudKeyFile" ) ret=$? if [ $ret == 0 ] ; then instanceId=$( echo $raw | cut -d' ' -f1 ) instanceIp=$( echo $raw | cut -d' ' -f2 ) # write both parameters to files echo $instanceId > 'instance-id.txt' echo $instanceIp > 'instance-address.txt' fi return $ret } # Undo the previous action function InstantiateValidationVM_Undo() { local sessionTag sessionTag="$1" if [ -e 'instance-id.txt' ] ; then swallow euca-terminate-instances $(cat instance-id.txt) if [ $? == 0 ] ; then rm -f instance-id.txt instance-address.txt key.pem fi fi } # Generic SSH function to the VM function VMSSH() { local instanceIp sshParams ret instanceIp=$(cat instance-address.txt 2> /dev/null) sshParams="-oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no -oPasswordAuthentication=no -i $PWD/key.pem" if [ "$1" == '--rsync-cmd' ] ; then shift echo ssh $sshParams "$@" ret=0 else ssh $sshParams "$cloudUserName"@"$instanceIp" "$@" ret=$? fi return $ret } # Opens a shell to the remote VM function Shell() { local sessionTag sessionTag="$1" VMSSH } # Checks status of the validation function Status() { local raw ret screen exitcode sessionTag sessionTag="$1" raw=$( VMSSH -t "screen -ls 2> /dev/null | grep -q .${screenPrefix}-${sessionTag} && echo -n 'screen_yes ' || echo -n 'screen_no ' ; cat $sessionTag/validation.done 2> /dev/null || echo 'not_done' ; true" 2> /dev/null ) raw=$( echo "$raw" | tr -cd '[:alnum:]_ ' ) # garbage removal ret=$? if [ "$ret" != 0 ] ; then pr "Cannot get status" return $errStatusUnavailable fi screen="${raw%% *}" exitcode="${raw#* }" if [ "$screen" == 'screen_yes' ] ; then pr 'Status: validation still running' return $errStatusRunning else if [ "$exitcode" == 'not_done' ] ; then pr 'Status: validation not running' return $errStatusNotRunning elif [ "$exitcode" == 0 ] ; then pr 'Status: validation completed successfully' return $errStatusDoneOk else pr "Status: validation finished with errors (exitcode: $exitcode)" return $errStatusDoneFail fi fi } # Wait for host to be ready function WaitSsh() { local attempt error attempt=0 pr -n 'Waiting for the VM to accept SSH connections...' while ! VMSSH -Tq true > /dev/null 2>&1 ; do if [ $((++attempt)) -gt $maxSshConnectAttempts ] ; then pr 'timeout' error=1 break fi pr -n '.' sleep 3 done [ "$error" == 1 ] && return $errSshNotReady pr 'ok' return 0 } # Run the validation function Validate() { local instanceIp sshParams sessionTag sessionTag="$1" instanceIp=$(cat instance-address.txt 2> /dev/null) sshParams="-oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no -oPasswordAuthentication=no -i $PWD/key.pem" # create helper script to launch benchmark cat > run-benchmark.sh <<_EoF_ #!/bin/bash cd \$(dirname "\$0") v=validation.done rm -f \$v env ALIROOT_VERSION=$(cat aliroot-version.txt) ./benchmark.sh run $sessionTag files.list benchmark.config #sleep 1000 ret=\$? echo \$ret > \$v echo ; echo ; echo echo "*** Validation finished with exitcode \$ret ***" echo ; echo ; echo read -p 'Press ENTER to dismiss: automatic dismiss in 60 seconds...' -t 60 _EoF_ chmod +x run-benchmark.sh # transfer files pr 'Transferring files to the VM' rsync -av -e "$(VMSSH --rsync-cmd)" $PWD/ $cloudUserName@$instanceIp:$sessionTag/ || return $errLaunchValidation # open a screen that does something; note that the command is not executed if # the screen already exists, which is what we want # note: sleep necessary to avoid "dead" screens VMSSH -t "screen -wipe > /dev/null 2>&1 ; if screen -ls | grep -q ${screenPrefix}-${sessionTag} ; then ret=42 ; else screen -dmS ${screenPrefix}-${sessionTag} $sessionTag/run-benchmark.sh ; ret=0 ; sleep 3 ; fi ; exit \$ret" ret=$? # message if [ $ret == 42 ] ; then pr 'Validation already running inside a screen.' else pr 'Validation launched inside a screen.' fi pr pr 'Check the progress status with:' pr " $Prog --session $sessionTag --status" pr 'Attach to the screen for debug:' pr " $Prog --session $sessionTag --attach" pr 'Open a shell to the virtual machine:' pr " $Prog --session $sessionTag --shell" pr # ignore ssh errors return 0 } # Attach current validation screen, if possible function Attach() { local sessionTag sessionTag="$1" VMSSH -t "( screen -wipe ; screen -rx ${screenPrefix}-${sessionTag} ) > /dev/null 2>&1" if [ $? != 0 ] ; then pr "Cannot attach screen: check if validation is running with:" pr " $Prog --session $sessionTag --status" pr "or connect manually to the VM for debug:" pr " $Prog --session $sessionTag --attach" return $errAttachScreen fi return 0 } # Pick session interactively function PickSession() { local sessionTag sess listSessions mess mess="$1" listSessions=() mkdir -p "$sessionPrefix" while read sess ; do [ ! -d "$sessionPrefix/$sess" ] && continue listSessions+=( $sess ) done < <( cd $sessionPrefix ; ls -1t ) if [ ${#listSessions[@]} == 0 ] ; then pr "No session available in session directory $sessionPrefix" return $errPickSession fi # print user message if provided [ "$mess" != '' ] && pr "$mess" pr 'Available sessions (most recent first):' for ((i=0; i<${#listSessions[@]}; i++)) ; do pr "$( printf " % 2d. ${listSessions[$i]}" $((i+1)) )" done pr -n 'Pick one: ' read i let i-- if [ "$i" -lt 0 ] || [ "${listSessions[$i]}" == '' ] ; then pr 'Invalid session' return $errPickSession fi sess="${listSessions[$i]}" pr "You chose session $sess" echo $sess return 0 } # Run an action function RunAction() { local ret type "$1" > /dev/null 2>&1 if [ $? == 0 ] ; then #pr "--> $1 (wd: $PWD)" eval "$@" ret=$? #pr "<-- $1 (ret: $ret, wd: $PWD)" return $ret fi return 0 } # Print help screen function Help() { pr "$Prog -- by Dario Berzano " pr 'Controls the Release Validation workflow on the cloud for AliRoot.' pr pr "Usage 1: $Prog [--prepare|--launch|--recycle] [--from-session] --aliroot [--session ] [-- arbitraryOpt1=value [arbitraryOpt2=value2...]]" pr pr 'A new session is created to validate the specified AliRoot tag.' pr pr ' --prepare : prepares the session directory containing the files needed' pr ' for the validation' pr ' --recycle : prepares a new session by recycling the head node from an' pr ' existing one. Source session is specified via the' pr ' --from-session switch or it can be interactively selected' pr ' --launch : launches the full validation process: prepares session,' pr ' runs the virtual machine, launches the validation program' pr ' --aliroot : the AliRoot tag to validate, in the form "vAN-20140610"' pr ' --session : custom session name to provide to the validation session:' pr ' if omitted, defaults to _' pr pr 'Arbitrary options (in the form variable=value) can be specified after the' pr 'double dash and will override the corresponding options in any of the' pr 'configuration files.' pr ; pr pr "Usage 2: $Prog [--runvm|--validate|--shell|--status] --session " pr pr 'Runs the validation step by step after a session is created with' pr '--prepare, and runs other actions on a certain session.' pr pr ' --session : session identifier, e.g. vAN-20140610_20140612-123047-utc:' pr ' if no session is specified an interactive prompt is' pr ' presented' pr ' --runvm : instantiates the head node of the validation cluster on' pr ' the cloud' pr ' --validate : runs the validation script on the head node for the' pr ' current session. Head node must be already up, or it' pr ' should be created with --runvm. If validation is running' pr ' already, connects to the existing validation shell' pr ' --attach : attach a currently running validation screen; remember to' pr ' detach with Ctrl+A+D (and *not* Ctrl-C)' pr ' --shell : does SSH on the head node' pr ' --status : returns the status of the validation' pr ; pr pr 'Example 1: run the validation of AliRoot tag vAN-20140610:' pr pr " $Prog --aliroot vAN-20140610 --launch" pr pr 'Example 2: do the same thing step-by-step:' pr pr " $Prog --aliroot vAN-20140610 --prepare" pr " $Prog --runvm" pr " $Prog --validate" pr } # The main function function Main() { # local variables local Args aliRootTag EnterShell Actions sessionTag fromSessionTag Actions=() # parse command line options while [ $# -gt 0 ] ; do case "$1" in # options --aliroot|-a) aliRootTag="$2" shift 2 ;; --session) sessionTag="$2" shift 2 ;; --from-session) fromSessionTag="$2" shift 2 ;; # actions --launch) # all actions Actions=( PrepareSession MoveToSessionDir LoadConfig InstantiateValidationVM WaitSsh Validate ) shift ;; --prepare) Actions=( PrepareSession MoveToSessionDir ) shift ;; --recycle) Actions=( PrepareSession MoveToSessionDir RecycleSession ) shift ;; --runvm) Actions=( MoveToSessionDir LoadConfig InstantiateValidationVM ) shift ;; --validate) Actions=( MoveToSessionDir LoadConfig WaitSsh Validate ) shift ;; --attach) Actions=( MoveToSessionDir LoadConfig WaitSsh Attach ) shift ;; # extra actions --shell) Actions=( MoveToSessionDir LoadConfig WaitSsh Shell ) shift ;; --status) Actions=( MoveToSessionDir LoadConfig WaitSsh Status ) shift ;; --help) Help exit 0 ;; # end of options --) shift break ;; *) pr "Invalid option: $1. Use --help for assistance." return $errInvalidOpt ;; esac done # check for the presence of the required tools in the $PATH for T in euca-describe-instances euca-describe-regions euca-run-instances euca-create-keypair euca-delete-keypair rsync ; do which "$T" > /dev/null 2>&1 if [ $? != 0 ] ; then pr "Cannot find one of the required commands: $T" return $errMissingCmd fi done # test EC2 credentials # euca-describe-regions > /dev/null 2>&1 # if [ $? != 0 ] ; then # pr 'Cannot authenticate to EC2.' # pr 'Note: you must have at least the following variables properly set in your environment:' # pr " * EC2_URL (current value: ${EC2_URL-})" # pr " * EC2_ACCESS_KEY (current value: ${EC2_ACCESS_KEY-})" # pr " * EC2_SECRET_KEY (current value: ${EC2_SECRET_KEY-})" # return $errEc2Auth # fi # what to do? if [ ${#Actions[@]} == 0 ] ; then pr 'Nothing to do. Use --help for assistance.' return $errInvalidOpt fi # run actions for ((i=0; i<${#Actions[@]}; i++)) ; do A=${Actions[$i]} if [ "$A" == 'PrepareSession' ] ; then # special action returning the session tag if [ "$aliRootTag" == '' ] ; then pr 'Specify an AliRoot version with --aliroot ' return $errInvalidOpt fi [ "$sessionTag" == '' ] && sessionTag='auto' sessionTag=$( RunAction "$A" "$aliRootTag" "$sessionTag" "$@" ) ret=$? elif [ "$A" == 'RecycleSession' ] ; then # special action requiring additional parameters if [ "$fromSessionTag" == '' ] ; then fromSessionTag=$( PickSession 'Select a source session to recycle.' ) ret=$? [ $ret != 0 ] && break fi RunAction "$A" "$sessionTag" "$fromSessionTag" ret=$? else if [ "$sessionTag" == '' ] ; then sessionTag=$( PickSession ) ret=$? [ $ret != 0 ] && break fi RunAction "$A" "$sessionTag" ret=$? fi # 100 to 140 --> not errors ( [ $ret != 0 ] && ( [ $ret -ge 100 ] || [ $ret -le 140 ] ) ) && break done # undo actions let i-- if [ $ret != 0 ] && ( [ $ret -ge 100 ] || [ $ret -le 140 ] ) ; then for ((; i>=0; i--)) ; do RunAction "${Actions[$i]}_Undo" "$sessionTag" done fi # return last value return $ret } # # Entry point # Main "$@" || exit $?