4 # alirelval -- by Dario Berzano <dario.berzano@cern.ch>
6 # Controls the release validation submission by managing the validation virtual
24 errStatusUnavailable=10
30 # error codes not treated as errors (100 to 140)
32 errStatusNotRunning=101
37 maxVmLaunchAttempts=100
38 maxSshConnectAttempts=400
41 # working directory prefix
42 sessionPrefix="$HOME/.alice-release-validation"
44 # screen name: <screenPrefix>-<sessionTag>
45 screenPrefix='AliRelVal'
57 if [ "$1" == '-n' ] ; then
61 echo $nl -e "\033[1m$@\033[m" >&2
66 date -u +%Y%m%d-%H%M%S-utc
71 mktemp /tmp/alirelval-XXXX
74 # Swallow output. Show only if something goes wrong
80 if [ $ret != 0 ] ; then
81 pr "Command failed (exit status: $ret): $@"
88 # Launch a VM. Create the keypair if the given keyfile does not exist. Syntax:
90 # RunVM <image_id> <profile> <user_data> <key_name> <key_file>
92 # Returns 0 on success, nonzero on failure. IP address is returned on stdout.
94 local imageId profile userData keyName
100 local raw iip iid ret attempt createdKeypair error
102 # keypair part: if file does not exist, create keypair
103 if [ ! -e "$keyFile" ] ; then
104 pr "Creating a new keypair: $keyName (private key: $keyFile)"
105 swallow euca-create-keypair -f "$keyFile" "$keyName"
106 if [ $? != 0 ] ; then
107 pr 'Problems creating the keypair'
114 pr 'Attempting to run virtual machine'
119 if [ $((++attempt)) -gt $maxVmLaunchAttempts ] ; then
120 pr " * Reached maximum number of attempts, giving up"
121 if [ "$createdKeypair" == 1 ] ; then
122 ( euca-delete-keypair "$keyName" ; rm -f "$keyFile" ) > /dev/null 2>&1
125 elif [ $attempt != 1 ] ; then
126 pr " * Pausing between retries"
130 pr -n " * Launching VM (attempt #$attempt/$maxVmLaunchAttempts)..."
133 raw=$( euca-run-instances "$imageId" -t "$profile" -d "$userData" -k "$keyName" 2>&1 )
135 iid=$( echo "$raw" | egrep '^INSTANCE' | head -n1 | awk '{ print $2 }' )
136 if [ $ret != 0 ] || [ "$iid" == '' ] ; then
137 # 'hard' error, but can be temporary
138 pr 'error: message follows'
146 pr " * VM has instance ID $iid"
147 pr -n " * Waiting for IP address..."
149 # wait for address loop
151 for ((i=0; i<$maxVmAddressWait; i++)) ; do
153 raw=$( euca-describe-instances 2>&1 | grep -E '^INSTANCE' | grep "$iid" | head -n1 )
156 echo "$raw" | grep -i error -q
157 if [ $? == 0 ] ; then
158 pr ; pr " * VM went to error state"
163 # no error: try to parse address (NOTE: only IPv4 for the moment)
164 iip=$( echo "$raw" | grep -oE '([0-9]{1,3}\.){3}[0-9]{1,3}' )
165 if [ "$iip" != '' ] ; then
175 # do we have address?
176 if [ "$iip" != '' ] ; then
177 pr " * VM has address $iip"
181 # we don't: terminate (timeout)
182 [ "$error" != 1 ] && pr 'timeout'
183 pr " * Terminating instance $iid"
184 euca-terminate-instances "$iid" > /dev/null 2>&1
189 [ "$createdKeypair" == 1 ] && euca-delete-keypair "$keyName" > /dev/null 2>&1
190 echo "$iid $iip" # must be parsed
195 # Prepare the validation session directory. Syntax:
197 # PrepareSession <aliroot_tag> <new_session_name>
199 # Returns 0 on success, nonzero on failure. Session tag returned on stdout.
200 function PrepareSession() {
201 local aliRootTag sessionTag sessionDir
204 # session tag can be "auto" or any user-specified value
205 if [ "$2" != 'auto' ] ; then
208 sessionTag="${aliRootTag}_$(ndate)"
211 sessionDir="$sessionPrefix/$sessionTag"
213 # session directory already exists? abort
214 if [ -d "$sessionDir" ] ; then
215 pr "Session directory already exists, aborting"
216 return $errSessionDir
219 # create working directory
220 mkdir -p "$sessionDir"
221 if [ $? != 0 ] ; then
222 pr "Fatal: cannot create session directory $sessionDir"
223 return $errSessionDir
226 # aliroot version written to a file
227 echo "$aliRootTag" > "$sessionDir/aliroot-version.txt"
229 # benchmark script, benchmark config, cloud config and file list
230 cp -L benchmark.sh cloud.config benchmark.config files.list "$sessionDir/"
231 if [ $? != 0 ] ; then
232 pr "Cannot copy configuration files to $sessionDir"
234 return $errSessionDir
237 # append local files to the configuration
238 for f in benchmark.config.d/*.config ; do
239 [ ! -e "$f" ] && continue
241 echo "### from $f ###"
244 ) >> "$sessionDir/benchmark.config"
247 # command-line options override the configuration
248 if [ $# != 0 ] ; then
249 pr "Note: the following command-line options will override the corresponding ones in the config files:"
251 echo "### from the command line ###"
252 while [ $# -gt 0 ] ; do
255 if [ "$extraName" != "$1" ] ; then
256 pr " * $extraName = $extraVal"
262 ) >> "$sessionDir/benchmark.config"
265 # success: return the session tag and move to the session directory
266 pr "*** Creating new working session: $sessionTag ***"
267 pr "*** Use this name for future session operations ***"
272 # Undo the previous action
273 function PrepareSession_Undo() {
274 rm -rf "$sessionPrefix/$1"
277 # Recycle the VM from an existing session
278 function RecycleSession() {
279 local sessionTag="$1"
280 local fromSessionTag="$2"
281 local fromSessionDir="$sessionPrefix/$fromSessionTag"
284 for f in 'instance-id.txt' 'instance-address.txt' 'key.pem' ; do
285 cp -L "$fromSessionDir/$f" "$f" > /dev/null 2>&1
286 if [ $? != 0 ] ; then
287 pr "Cannot copy $f from the source session dir $fromSessionDir"
288 return $errRecycleSession
295 # Move into the session tag directory. Usage:
297 # MoveToSessionDir <session_tag>
299 # Returns 0 on success, nonzero on error.
300 function MoveToSessionDir() {
301 originalWorkDir="$PWD"
302 cd "$sessionPrefix/$sessionTag" || return $errSessionDir
306 # Undo the previous action
307 function MoveToSessionDir_Undo() {
308 cd "$originalWorkDir"
311 # Load the benchmark configuration
312 function LoadConfig() {
313 source cloud.config > /dev/null 2>&1
314 if [ $? != 0 ] ; then
315 pr "Cannot load benchmark configuration"
321 # Instantiate the validation VM
322 function InstantiateValidationVM() {
323 local sessionTag instanceId instanceIp ret raw
326 # check if we already have a vm
327 instanceId="$(cat instance-id.txt 2> /dev/null)"
328 if [ "$instanceId" != '' ] ; then
329 pr "Virtual machine $instanceId is already running"
330 return 0 # consider it a success
332 rm -f instance-id.txt instance-address.txt
335 # do we need to create a keypair?
336 if [ "$cloudKeyName" == '' ] ; then
337 pr "Note: temporary SSH keys will be created for this VM"
338 cloudKeyName="$sessionTag"
339 cloudKeyFile="$PWD/key.pem"
340 rm -f "$cloudKeyFile"
341 elif [ -e "$cloudKeyFile" ] ; then
342 # copy key to session dir
343 pr -n "Copying private key $cloudKeyFile to session directory..."
345 cp -L "$cloudKeyFile" 'key.pem' 2> /dev/null
346 if [ $? != 0 ] ; then
352 cloudKeyFile="$PWD/key.pem"
354 pr "Cannot find private key to access virtual machines: $cloudKeyFile"
358 # launch virtual machine and get its address
359 raw=$( RunVM "$cloudImageId" "$cloudProfile" "$cloudUserData" "$cloudKeyName" "$cloudKeyFile" )
362 if [ $ret == 0 ] ; then
363 instanceId=$( echo $raw | cut -d' ' -f1 )
364 instanceIp=$( echo $raw | cut -d' ' -f2 )
366 # write both parameters to files
367 echo $instanceId > 'instance-id.txt'
368 echo $instanceIp > 'instance-address.txt'
374 # Undo the previous action
375 function InstantiateValidationVM_Undo() {
378 if [ -e 'instance-id.txt' ] ; then
379 swallow euca-terminate-instances $(cat instance-id.txt)
380 if [ $? == 0 ] ; then
381 rm -f instance-id.txt instance-address.txt key.pem
386 # Generic SSH function to the VM
388 local instanceIp sshParams ret
389 instanceIp=$(cat instance-address.txt 2> /dev/null)
390 sshParams="-oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no -oPasswordAuthentication=no -i $PWD/key.pem"
392 if [ "$1" == '--rsync-cmd' ] ; then
394 echo ssh $sshParams "$@"
397 ssh $sshParams "$cloudUserName"@"$instanceIp" "$@"
403 # Opens a shell to the remote VM
410 # Checks status of the validation
412 local raw ret screen exitcode sessionTag
414 raw=$( VMSSH -t "screen -ls 2> /dev/null | grep -q .${screenPrefix}-${sessionTag} && echo -n 'screen_yes ' || echo -n 'screen_no ' ; cat $sessionTag/validation.done 2> /dev/null || echo 'not_done' ; true" 2> /dev/null )
415 raw=$( echo "$raw" | tr -cd '[:alnum:]_ ' ) # garbage removal
418 if [ "$ret" != 0 ] ; then
419 pr "Cannot get status"
420 return $errStatusUnavailable
426 if [ "$screen" == 'screen_yes' ] ; then
427 pr 'Status: validation still running'
428 return $errStatusRunning
430 if [ "$exitcode" == 'not_done' ] ; then
431 pr 'Status: validation not running'
432 return $errStatusNotRunning
433 elif [ "$exitcode" == 0 ] ; then
434 pr 'Status: validation completed successfully'
435 return $errStatusDoneOk
437 pr "Status: validation finished with errors (exitcode: $exitcode)"
438 return $errStatusDoneFail
444 # Wait for host to be ready
448 pr -n 'Waiting for the VM to accept SSH connections...'
450 while ! VMSSH -Tq true > /dev/null 2>&1 ; do
451 if [ $((++attempt)) -gt $maxSshConnectAttempts ] ; then
460 [ "$error" == 1 ] && return $errSshNotReady
466 function Validate() {
467 local instanceIp sshParams sessionTag
469 instanceIp=$(cat instance-address.txt 2> /dev/null)
470 sshParams="-oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no -oPasswordAuthentication=no -i $PWD/key.pem"
472 # create helper script to launch benchmark
473 cat > run-benchmark.sh <<_EoF_
479 env ALIROOT_VERSION=$(cat aliroot-version.txt) ./benchmark.sh run $sessionTag files.list benchmark.config 2>&1 | tee run-benchmark.log
484 echo "*** Validation finished with exitcode \$ret ***"
486 read -p 'Press ENTER to dismiss: automatic dismiss in 60 seconds...' -t 60
488 chmod +x run-benchmark.sh
491 pr 'Transferring files to the VM'
492 rsync -av -e "$(VMSSH --rsync-cmd)" $PWD/ $cloudUserName@$instanceIp:$sessionTag/ || return $errLaunchValidation
494 # open a screen that does something; note that the command is not executed if
495 # the screen already exists, which is what we want
496 # note: sleep necessary to avoid "dead" screens
497 VMSSH -t "screen -wipe > /dev/null 2>&1 ; if screen -ls | grep -q ${screenPrefix}-${sessionTag} ; then ret=42 ; else screen -dmS ${screenPrefix}-${sessionTag} $sessionTag/run-benchmark.sh ; ret=0 ; sleep 3 ; fi ; exit \$ret"
501 if [ $ret == 42 ] ; then
502 pr 'Validation already running inside a screen.'
504 pr 'Validation launched inside a screen.'
508 pr 'Check the progress status with:'
509 pr " $Prog --session $sessionTag --status"
510 pr 'Attach to the screen for debug:'
511 pr " $Prog --session $sessionTag --attach"
512 pr 'Open a shell to the virtual machine:'
513 pr " $Prog --session $sessionTag --shell"
520 # Attach current validation screen, if possible
525 VMSSH -t "( screen -wipe ; screen -rx ${screenPrefix}-${sessionTag} ) > /dev/null 2>&1"
527 if [ $? != 0 ] ; then
528 pr "Cannot attach screen: check if validation is running with:"
529 pr " $Prog --session $sessionTag --status"
530 pr "or connect manually to the VM for debug:"
531 pr " $Prog --session $sessionTag --attach"
532 return $errAttachScreen
538 # Pick session interactively
539 function PickSession() {
540 local sessionTag sess listSessions mess
543 mkdir -p "$sessionPrefix"
546 [ ! -d "$sessionPrefix/$sess" ] && continue
547 listSessions+=( $sess )
548 done < <( cd $sessionPrefix ; ls -1t )
550 if [ ${#listSessions[@]} == 0 ] ; then
551 pr "No session available in session directory $sessionPrefix"
552 return $errPickSession
555 # print user message if provided
556 [ "$mess" != '' ] && pr "$mess"
558 pr 'Available sessions (most recent first):'
559 for ((i=0; i<${#listSessions[@]}; i++)) ; do
560 pr "$( printf " % 2d. ${listSessions[$i]}" $((i+1)) )"
566 if [ "$i" -lt 0 ] || [ "${listSessions[$i]}" == '' ] ; then
568 return $errPickSession
571 sess="${listSessions[$i]}"
572 pr "You chose session $sess"
578 function RunAction() {
580 type "$1" > /dev/null 2>&1
581 if [ $? == 0 ] ; then
582 #pr "--> $1 (wd: $PWD)"
585 #pr "<-- $1 (ret: $ret, wd: $PWD)"
593 pr "$Prog -- by Dario Berzano <dario.berzano@cern.ch>"
594 pr 'Controls the Release Validation workflow on the cloud for AliRoot.'
596 pr "Usage 1: $Prog [--prepare|--launch|--recycle] [--from-session] --aliroot <aliroot_tag> [--session <custom_session_tag>] [-- arbitraryOpt1=value [arbitraryOpt2=value2...]]"
598 pr 'A new session is created to validate the specified AliRoot tag.'
600 pr ' --prepare : prepares the session directory containing the files needed'
601 pr ' for the validation'
602 pr ' --recycle : prepares a new session by recycling the head node from an'
603 pr ' existing one. Source session is specified via the'
604 pr ' --from-session switch or it can be interactively selected'
605 pr ' --launch : launches the full validation process: prepares session,'
606 pr ' runs the virtual machine, launches the validation program'
607 pr ' --aliroot : the AliRoot tag to validate, in the form "vAN-20140610"'
608 pr ' --session : custom session name to provide to the validation session:'
609 pr ' if omitted, defaults to <aliroot_tag>_<utc_datetime_now>'
611 pr 'Arbitrary options (in the form variable=value) can be specified after the'
612 pr 'double dash and will override the corresponding options in any of the'
613 pr 'configuration files.'
615 pr "Usage 2: $Prog [--runvm|--validate|--shell|--status] --session <session_tag>"
617 pr 'Runs the validation step by step after a session is created with'
618 pr '--prepare, and runs other actions on a certain session.'
620 pr ' --session : session identifier, e.g. vAN-20140610_20140612-123047-utc:'
621 pr ' if no session is specified an interactive prompt is'
623 pr ' --runvm : instantiates the head node of the validation cluster on'
625 pr ' --validate : runs the validation script on the head node for the'
626 pr ' current session. Head node must be already up, or it'
627 pr ' should be created with --runvm. If validation is running'
628 pr ' already, connects to the existing validation shell'
629 pr ' --attach : attach a currently running validation screen; remember to'
630 pr ' detach with Ctrl+A+D (and *not* Ctrl-C)'
631 pr ' --shell : does SSH on the head node'
632 pr ' --status : returns the status of the validation'
634 pr 'Example 1: run the validation of AliRoot tag vAN-20140610:'
636 pr " $Prog --aliroot vAN-20140610 --launch"
638 pr 'Example 2: do the same thing step-by-step:'
640 pr " $Prog --aliroot vAN-20140610 --prepare"
642 pr " $Prog --validate"
650 local Args aliRootTag EnterShell Actions sessionTag fromSessionTag
653 # parse command line options
654 while [ $# -gt 0 ] ; do
674 Actions=( PrepareSession MoveToSessionDir LoadConfig InstantiateValidationVM WaitSsh Validate )
678 Actions=( PrepareSession MoveToSessionDir )
682 Actions=( PrepareSession MoveToSessionDir RecycleSession )
686 Actions=( MoveToSessionDir LoadConfig InstantiateValidationVM )
690 Actions=( MoveToSessionDir LoadConfig WaitSsh Validate )
694 Actions=( MoveToSessionDir LoadConfig WaitSsh Attach )
700 Actions=( MoveToSessionDir LoadConfig WaitSsh Shell )
704 Actions=( MoveToSessionDir LoadConfig WaitSsh Status )
719 pr "Invalid option: $1. Use --help for assistance."
720 return $errInvalidOpt
725 # check for the presence of the required tools in the $PATH
726 for T in euca-describe-instances euca-describe-regions euca-run-instances euca-create-keypair euca-delete-keypair rsync ; do
727 which "$T" > /dev/null 2>&1
728 if [ $? != 0 ] ; then
729 pr "Cannot find one of the required commands: $T"
730 return $errMissingCmd
734 # test EC2 credentials
735 # euca-describe-regions > /dev/null 2>&1
736 # if [ $? != 0 ] ; then
737 # pr 'Cannot authenticate to EC2.'
738 # pr 'Note: you must have at least the following variables properly set in your environment:'
739 # pr " * EC2_URL (current value: ${EC2_URL-<not set>})"
740 # pr " * EC2_ACCESS_KEY (current value: ${EC2_ACCESS_KEY-<not set>})"
741 # pr " * EC2_SECRET_KEY (current value: ${EC2_SECRET_KEY-<not set>})"
746 if [ ${#Actions[@]} == 0 ] ; then
747 pr 'Nothing to do. Use --help for assistance.'
748 return $errInvalidOpt
752 for ((i=0; i<${#Actions[@]}; i++)) ; do
756 if [ "$A" == 'PrepareSession' ] ; then
757 # special action returning the session tag
758 if [ "$aliRootTag" == '' ] ; then
759 pr 'Specify an AliRoot version with --aliroot <tag>'
760 return $errInvalidOpt
762 [ "$sessionTag" == '' ] && sessionTag='auto'
763 sessionTag=$( RunAction "$A" "$aliRootTag" "$sessionTag" "$@" )
765 elif [ "$A" == 'RecycleSession' ] ; then
766 # special action requiring additional parameters
767 if [ "$fromSessionTag" == '' ] ; then
768 fromSessionTag=$( PickSession 'Select a source session to recycle.' )
770 [ $ret != 0 ] && break
772 RunAction "$A" "$sessionTag" "$fromSessionTag"
775 if [ "$sessionTag" == '' ] ; then
776 sessionTag=$( PickSession )
778 [ $ret != 0 ] && break
780 RunAction "$A" "$sessionTag"
784 # 100 to 140 --> not errors
785 ( [ $ret != 0 ] && ( [ $ret -ge 100 ] || [ $ret -le 140 ] ) ) && break
791 if [ $ret != 0 ] && ( [ $ret -ge 100 ] || [ $ret -le 140 ] ) ; then
792 for ((; i>=0; i--)) ; do
793 RunAction "${Actions[$i]}_Undo" "$sessionTag"