4 # launch-relval.sh -- by Dario Berzano <dario.berzano@cern.ch>
6 # Controls the release validation submission by managing the validation virtual
24 errStatusUnavailable=10
29 # error codes not treated as errors (100 to 140)
31 errStatusNotRunning=101
36 maxVmLaunchAttempts=10
37 maxSshConnectAttempts=400
40 # working directory prefix
41 sessionPrefix="$HOME/.alice-release-validation"
43 # screen name for the validation
44 screenName='AliceReleaseValidation'
56 if [ "$1" == '-n' ] ; then
60 echo $nl -e "\033[1m$@\033[m" >&2
65 date -u +%Y%m%d-%H%M%S-utc
70 mktemp /tmp/alirelval-XXXX
73 # Swallow output. Show only if something goes wrong
79 if [ $ret != 0 ] ; then
80 pr "Command failed (exit status: $ret): $@"
87 # Launch a VM. Create the keypair if the given keyfile does not exist. Syntax:
89 # RunVM <image_id> <profile> <user_data> <key_name> <key_file>
91 # Returns 0 on success, nonzero on failure. IP address is returned on stdout.
93 local imageId profile userData keyName
99 local raw iip iid ret attempt createdKeypair error
101 # keypair part: if file does not exist, create keypair
102 if [ ! -e "$keyFile" ] ; then
103 pr "Creating a new keypair: $keyName (private key: $keyFile)"
104 swallow euca-create-keypair -f "$keyFile" "$keyName"
105 if [ $? != 0 ] ; then
106 pr 'Problems creating the keypair'
113 pr 'Attempting to run virtual machine'
118 if [ $((++attempt)) -gt $maxVmLaunchAttempts ] ; then
119 pr " * Reached maximum number of attempts, giving up"
120 if [ "$createdKeypair" == 1 ] ; then
121 ( euca-delete-keypair "$keyName" ; rm -f "$keyFile" ) > /dev/null 2>&1
126 pr -n " * Launching VM (attempt #$attempt/$maxVmLaunchAttempts)..."
129 raw=$( euca-run-instances "$imageId" -t "$profile" -d "$userData" -k "$keyName" 2>&1 )
131 iid=$( echo "$raw" | egrep '^INSTANCE' | head -n1 | awk '{ print $2 }' )
132 if [ $ret != 0 ] || [ "$iid" == '' ] ; then
133 # 'hard' error, but can be temporary
134 pr 'error: message follows'
142 pr " * VM has instance ID $iid"
143 pr -n " * Waiting for IP address..."
145 # wait for address loop
147 for ((i=0; i<$maxVmAddressWait; i++)) ; do
149 raw=$( euca-describe-instances 2>&1 | grep -E '^INSTANCE' | grep "$iid" | head -n1 )
152 echo "$raw" | grep -i error -q
153 if [ $? == 0 ] ; then
154 pr ; pr " * VM went to error state"
159 # no error: try to parse address (NOTE: only IPv4 for the moment)
160 iip=$( echo "$raw" | grep -oE '([0-9]{1,3}\.){3}[0-9]{1,3}' )
161 if [ "$iip" != '' ] ; then
171 # do we have address?
172 if [ "$iip" != '' ] ; then
173 pr " * VM has address $iip"
177 # we don't: terminate (timeout)
178 [ "$error" != 1 ] && pr 'timeout'
179 pr " * Terminating instance $iid"
180 euca-terminate-instances "$iid" > /dev/null 2>&1
185 [ "$createdKeypair" == 1 ] && euca-delete-keypair "$keyName" > /dev/null 2>&1
186 echo "$iid $iip" # must be parsed
191 # Prepare the validation session directory. Syntax:
193 # PrepareSession <aliroot_tag>
195 # Returns 0 on success, nonzero on failure. Session tag returned on stdout.
196 function PrepareSession() {
197 local aliRootTag sessionTag sessionDir
200 sessionTag="${aliRootTag}_$(ndate)"
201 sessionDir="$sessionPrefix/$sessionTag"
203 # session directory already exists? abort
204 if [ -d "$sessionDir" ] ; then
205 pr "Session directory already exists, aborting"
206 return $errSessionDir
209 # create working directory
210 mkdir -p "$sessionDir"
211 if [ $? != 0 ] ; then
212 pr "Fatal: cannot create session directory $sessionDir"
213 return $errSessionDir
216 # aliroot version written to a file
217 echo "$aliRootTag" > "$sessionDir/aliroot-version.txt"
219 # benchmark script, configuration and file list
220 cp benchmark.sh benchmark.config files.list "$sessionDir/"
221 if [ $? != 0 ] ; then
222 pr "Cannot copy benchmark configuration and script to $sessionDir"
223 return $errSessionDir
226 # append local files to the configuration
227 for f in benchmark.config.d/*.config ; do
228 [ ! -e "$f" ] && continue
230 echo "### from $f ###"
233 ) >> "$sessionDir/benchmark.config"
236 # command-line options override the configuration
237 if [ $# != 0 ] ; then
238 pr "Note: the following command-line options will override the corresponding ones in the config files:"
240 echo "### from the command line ###"
241 while [ $# -gt 0 ] ; do
244 if [ "$extraName" != "$1" ] ; then
245 pr " * $extraName = $extraVal"
251 ) >> "$sessionDir/benchmark.config"
254 # success: return the session tag and move to the session directory
255 pr "*** Creating new working session: $sessionTag ***"
256 pr "*** Use this name for future session operations ***"
261 # Undo the previous action
262 function PrepareSession_Undo() {
263 rm -rf "$sessionPrefix/$1"
266 # Move into the session tag directory. Usage:
268 # MoveToSessionDir <session_tag>
270 # Returns 0 on success, nonzero on error.
271 function MoveToSessionDir() {
272 originalWorkDir="$PWD"
273 cd "$sessionPrefix/$sessionTag" || return $errSessionDir
277 # Undo the previous action
278 function MoveToSessionDir_Undo() {
279 cd "$originalWorkDir"
282 # Load the benchmark configuration
283 function LoadConfig() {
284 source benchmark.config > /dev/null 2>&1
285 if [ $? != 0 ] ; then
286 pr "Cannot load benchmark configuration"
292 # Instantiate the validation VM
293 function InstantiateValidationVM() {
294 local sessionTag instanceId instanceIp ret raw
297 # check if we already have a vm
298 instanceId="$(cat instance-id.txt 2> /dev/null)"
299 if [ "$instanceId" != '' ] ; then
300 pr "Virtual machine $instanceId is already running"
301 return 0 # consider it a success
303 rm -f instance-id.txt instance-address.txt
306 # do we need to create a keypair?
307 if [ "$cloudKeyName" == '' ] ; then
308 pr "Note: temporary SSH keys will be created for this VM"
309 cloudKeyName="$sessionTag"
310 cloudKeyFile="$PWD/key.pem"
311 rm -f "$cloudKeyFile"
312 elif [ -e "$cloudKeyFile" ] ; then
313 # copy key to session dir
314 pr -n "Copying private key $cloudKeyFile to session directory..."
315 cp "$cloudKeyFile" 'key.pem' 2> /dev/null
316 if [ $? != 0 ] ; then
322 cloudKeyFile="$PWD/key.pem"
324 pr "Cannot find private key to access virtual machines: $cloudKeyFile"
328 # launch virtual machine and get its address
329 raw=$( RunVM "$cloudImageId" "$cloudProfile" "$cloudUserData" "$cloudKeyName" "$cloudKeyFile" )
332 if [ $ret == 0 ] ; then
333 instanceId=$( echo $raw | cut -d' ' -f1 )
334 instanceIp=$( echo $raw | cut -d' ' -f2 )
336 # write both parameters to files
337 echo $instanceId > 'instance-id.txt'
338 echo $instanceIp > 'instance-address.txt'
344 # Undo the previous action
345 function InstantiateValidationVM_Undo() {
348 if [ -e 'instance-id.txt' ] ; then
349 swallow euca-terminate-instances $(cat instance-id.txt)
350 if [ $? == 0 ] ; then
351 rm -f instance-id.txt instance-address.txt key.pem
356 # Generic SSH function to the VM
358 local instanceIp sshParams ret
359 instanceIp=$(cat instance-address.txt 2> /dev/null)
360 sshParams="-oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no -oPasswordAuthentication=no -i $PWD/key.pem"
362 if [ "$1" == '--rsync-cmd' ] ; then
364 echo ssh $sshParams "$@"
367 ssh $sshParams "$cloudUserName"@"$instanceIp" "$@"
373 # Opens a shell to the remote VM
380 # Checks status of the validation
382 local raw ret screen exitcode
383 raw=$( VMSSH -t "screen -ls 2> /dev/null | grep -q .$screenName && echo -n 'screen_yes ' || echo -n 'screen_no ' ; cat alirelval/validation.done 2> /dev/null || echo 'not_done' ; true" 2> /dev/null )
384 raw=$( echo "$raw" | tr -cd '[:alnum:]_ ' ) # garbage removal
387 if [ "$ret" != 0 ] ; then
388 pr "Cannot get status"
389 return $errStatusUnavailable
395 if [ "$screen" == 'screen_yes' ] ; then
396 pr 'Status: validation still running'
397 return $errStatusRunning
399 if [ "$exitcode" == 'not_done' ] ; then
400 pr 'Status: validation not running'
401 return $errStatusNotRunning
402 elif [ "$exitcode" == 0 ] ; then
403 pr 'Status: validation completed successfully'
404 return $errStatusDoneOk
406 pr "Status: validation finished with errors (exitcode: $exitcode)"
407 return $errStatusDoneFail
413 # Wait for host to be ready
417 pr -n 'Waiting for the VM to accept SSH connections...'
419 while ! VMSSH -Tq true > /dev/null 2>&1 ; do
420 if [ $((++attempt)) -gt $maxSshConnectAttempts ] ; then
429 [ "$error" == 1 ] && return $errSshNotReady
435 function Validate() {
436 local instanceIp sshParams sessionTag
438 instanceIp=$(cat instance-address.txt 2> /dev/null)
439 sshParams="-oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no -oPasswordAuthentication=no -i $PWD/key.pem"
441 # create helper script to launch benchmark
442 cat > run-benchmark.sh <<_EoF_
447 env ALIROOT_VERSION=$(cat aliroot-version.txt) ./benchmark.sh run $sessionTag files.list benchmark.config
452 echo "*** Validation finished with exitcode \$ret ***"
454 read -p 'Press ENTER to dismiss: automatic dismiss in 60 seconds...' -t 60
456 chmod +x run-benchmark.sh
459 pr 'Transferring files to the VM'
460 rsync -av -e "$(VMSSH --rsync-cmd)" $PWD/ $cloudUserName@$instanceIp:alirelval/ || return $errLaunchValidation
462 # open a screen that does something; note that the command is not executed if
463 # the screen already exists, which is what we want
464 # note: sleep necessary to avoid "dead" screens
465 VMSSH -t "screen -wipe > /dev/null 2>&1 ; if screen -ls | grep -q $screenName ; then ret=42 ; else screen -dmS AliceReleaseValidation alirelval/run-benchmark.sh ; ret=0 ; sleep 3 ; fi ; exit \$ret"
469 if [ $ret == 42 ] ; then
470 pr 'Validation already running inside a screen.'
472 pr 'Validation launched inside a screen.'
476 pr 'Check the progress status with:'
477 pr " $Prog --session $sessionTag --status"
478 pr 'Attach to the screen for debug:'
479 pr " $Prog --session $sessionTag --attach"
480 pr 'Open a shell to the virtual machine:'
481 pr " $Prog --session $sessionTag --shell"
488 # Attach current validation screen, if possible
493 VMSSH -t "( screen -wipe ; screen -rx $screenName ) > /dev/null 2>&1"
495 if [ $? != 0 ] ; then
496 pr "Cannot attach screen: check if validation is running with:"
497 pr " $Prog --session $sessionTag --status"
498 pr "or connect manually to the VM for debug:"
499 pr " $Prog --session $sessionTag --attach"
500 return $errAttachScreen
506 # Pick session interactively
507 function PickSession() {
508 local sessionTag sess listSessions
510 mkdir -p "$sessionPrefix"
513 [ ! -d "$sessionPrefix/$sess" ] && continue
514 listSessions+=( $sess )
515 done < <( cd $sessionPrefix ; ls -1t )
517 if [ ${#listSessions[@]} == 0 ] ; then
518 pr "No session available in session directory $sessionPrefix"
519 return $errPickSession
522 pr 'Available sessions (most recent first):'
523 for ((i=0; i<${#listSessions[@]}; i++)) ; do
524 pr "$( printf " % 2d. ${listSessions[$i]}" $((i+1)) )"
530 if [ "$i" -lt 0 ] || [ "${listSessions[$i]}" == '' ] ; then
532 return $errPickSession
535 sess="${listSessions[$i]}"
536 pr "You chose session $sess"
542 function RunAction() {
544 type "$1" > /dev/null 2>&1
545 if [ $? == 0 ] ; then
546 #pr "--> $1 (wd: $PWD)"
549 #pr "<-- $1 (ret: $ret, wd: $PWD)"
557 pr "$Prog -- by Dario Berzano <dario.berzano@cern.ch>"
558 pr 'Controls the Release Validation workflow on the cloud for AliRoot.'
560 pr "Usage 1: $Prog [--prepare|--launch] --aliroot <aliroot_tag> [-- arbitraryOpt1=value [arbitraryOpt2=value2...]]"
562 pr 'A new session is created to validate the specified AliRoot tag.'
564 pr ' --prepare : prepares the session directory containing the files needed'
565 pr ' for the validation'
566 pr ' --launch : launches the full validation process: prepares session,'
567 pr ' runs the virtual machine, launches the validation program'
568 pr ' --aliroot : the AliRoot tag to validate, in the form "vAN-20140610"'
570 pr 'Arbitrary options (in the form variable=value) can be specified after the'
571 pr 'double dash and will override the corresponding options in any of the'
572 pr 'configuration files.'
574 pr "Usage 2: $Prog [--runvm|--validate|--shell|--status] --session <session_tag>"
576 pr 'Runs the validation step by step after a session is created with'
577 pr '--prepare, and runs other actions on a certain session.'
579 pr ' --session : session identifier, e.g. vAN-20140610_20140612-123047-utc:'
580 pr ' if no session is specified an interactive prompt is'
582 pr ' --runvm : instantiates the head node of the validation cluster on'
584 pr ' --validate : runs the validation script on the head node for the'
585 pr ' current session. Head node must be already up, or it'
586 pr ' should be created with --runvm. If validation is running'
587 pr ' already, connects to the existing validation shell'
588 pr ' --attach : attach a currently running validation screen; remember to'
589 pr ' detach with Ctrl+A+D (and *not* Ctrl-C)'
590 pr ' --shell : does SSH on the head node'
591 pr ' --status : returns the status of the validation'
593 pr 'Example 1: run the validation of AliRoot tag vAN-20140610:'
595 pr " $Prog --aliroot vAN-20140610 --launch"
597 pr 'Example 2: do the same thing step-by-step:'
599 pr " $Prog --aliroot vAN-20140610 --prepare"
601 pr " $Prog --validate"
609 local Args aliRootTag EnterShell Actions sessionTag
612 # parse command line options
613 while [ $# -gt 0 ] ; do
629 Actions=( PrepareSession MoveToSessionDir LoadConfig InstantiateValidationVM WaitSsh Validate )
633 Actions=( PrepareSession MoveToSessionDir )
637 Actions=( MoveToSessionDir LoadConfig InstantiateValidationVM )
641 Actions=( MoveToSessionDir LoadConfig WaitSsh Validate )
645 Actions=( MoveToSessionDir LoadConfig WaitSsh Attach )
651 Actions=( MoveToSessionDir LoadConfig WaitSsh Shell )
655 Actions=( MoveToSessionDir LoadConfig WaitSsh Status )
670 pr "Invalid option: $1. Use --help for assistance."
671 return $errInvalidOpt
676 # check for the presence of the required tools in the $PATH
677 for T in euca-describe-instances euca-describe-regions euca-run-instances euca-create-keypair euca-delete-keypair rsync ; do
678 which "$T" > /dev/null 2>&1
679 if [ $? != 0 ] ; then
680 pr "Cannot find one of the required commands: $T"
681 return $errMissingCmd
685 # test EC2 credentials
686 # euca-describe-regions > /dev/null 2>&1
687 # if [ $? != 0 ] ; then
688 # pr 'Cannot authenticate to EC2.'
689 # pr 'Note: you must have at least the following variables properly set in your environment:'
690 # pr " * EC2_URL (current value: ${EC2_URL-<not set>})"
691 # pr " * EC2_ACCESS_KEY (current value: ${EC2_ACCESS_KEY-<not set>})"
692 # pr " * EC2_SECRET_KEY (current value: ${EC2_SECRET_KEY-<not set>})"
697 if [ ${#Actions[@]} == 0 ] ; then
698 pr 'Nothing to do. Use --help for assistance.'
699 return $errInvalidOpt
703 for ((i=0; i<${#Actions[@]}; i++)) ; do
707 if [ "$A" == 'PrepareSession' ] ; then
708 # special action returning the session tag
709 if [ "$aliRootTag" == '' ] ; then
710 pr 'Specify an AliRoot version with --aliroot <tag>'
711 return $errInvalidOpt
713 if [ "$sessionTag" != '' ] ; then
714 pr 'Cannot use --session with --prepare. Use --help for assistance.'
715 return $errInvalidOpt
717 sessionTag=$( RunAction "$A" "$aliRootTag" "$@" )
720 if [ "$sessionTag" == '' ] ; then
721 sessionTag=$( PickSession )
723 [ $ret != 0 ] && return $ret
725 RunAction "$A" "$sessionTag"
729 # 100 to 140 --> not errors
730 ( [ $ret != 0 ] && ( [ $ret -ge 100 ] || [ $ret -le 140 ] ) ) && break
736 if [ $ret != 0 ] && ( [ $ret -ge 100 ] || [ $ret -le 140 ] ) ; then
737 for ((; i>=0; i--)) ; do
738 RunAction "${Actions[$i]}_Undo" "$sessionTag"