4 # alirelval -- by Dario Berzano <dario.berzano@cern.ch>
6 # Controls the release validation submission by managing the validation virtual
24 errStatusUnavailable=10
30 # error codes not treated as errors (100 to 140)
32 errStatusNotRunning=101
37 maxVmLaunchAttempts=100
38 maxSshConnectAttempts=400
41 # working directory prefix
42 sessionPrefix="$HOME/.alice-release-validation"
44 # screen name: <screenPrefix>-<sessionTag>
45 screenPrefix='AliRelVal'
57 if [ "$1" == '-n' ] ; then
61 echo $nl -e "\033[1m$@\033[m" >&2
66 date -u +%Y%m%d-%H%M%S-utc
71 mktemp /tmp/alirelval-XXXX
74 # Swallow output. Show only if something goes wrong
80 if [ $ret != 0 ] ; then
81 pr "Command failed (exit status: $ret): $@"
88 # Launch a VM. Create the keypair if the given keyfile does not exist. Syntax:
90 # RunVM <image_id> <profile> <user_data> <key_name> <key_file>
92 # Returns 0 on success, nonzero on failure. IP address is returned on stdout.
94 local imageId profile userData keyName
100 local raw iip iid ret attempt createdKeypair error
102 # keypair part: if file does not exist, create keypair
103 if [ ! -e "$keyFile" ] ; then
104 pr "Creating a new keypair: $keyName (private key: $keyFile)"
105 swallow euca-create-keypair -f "$keyFile" "$keyName"
106 if [ $? != 0 ] ; then
107 pr 'Problems creating the keypair'
114 pr 'Attempting to run virtual machine'
119 if [ $((++attempt)) -gt $maxVmLaunchAttempts ] ; then
120 pr " * Reached maximum number of attempts, giving up"
121 if [ "$createdKeypair" == 1 ] ; then
122 ( euca-delete-keypair "$keyName" ; rm -f "$keyFile" ) > /dev/null 2>&1
125 elif [ $attempt != 1 ] ; then
126 pr " * Pausing between retries"
130 pr -n " * Launching VM (attempt #$attempt/$maxVmLaunchAttempts)..."
133 raw=$( euca-run-instances "$imageId" -t "$profile" -d "$userData" -k "$keyName" 2>&1 )
135 iid=$( echo "$raw" | egrep '^INSTANCE' | head -n1 | awk '{ print $2 }' )
136 if [ $ret != 0 ] || [ "$iid" == '' ] ; then
137 # 'hard' error, but can be temporary
138 pr 'error: message follows'
146 pr " * VM has instance ID $iid"
147 pr -n " * Waiting for IP address..."
149 # wait for address loop
151 for ((i=0; i<$maxVmAddressWait; i++)) ; do
153 raw=$( euca-describe-instances 2>&1 | grep -E '^INSTANCE' | grep "$iid" | head -n1 )
156 echo "$raw" | grep -i error -q
157 if [ $? == 0 ] ; then
158 pr ; pr " * VM went to error state"
163 # no error: try to parse address (NOTE: only IPv4 for the moment)
164 iip=$( echo "$raw" | grep -oE '([0-9]{1,3}\.){3}[0-9]{1,3}' )
165 if [ "$iip" != '' ] ; then
175 # do we have address?
176 if [ "$iip" != '' ] ; then
177 pr " * VM has address $iip"
181 # we don't: terminate (timeout)
182 [ "$error" != 1 ] && pr 'timeout'
183 pr " * Terminating instance $iid"
184 euca-terminate-instances "$iid" > /dev/null 2>&1
189 [ "$createdKeypair" == 1 ] && euca-delete-keypair "$keyName" > /dev/null 2>&1
190 echo "$iid $iip" # must be parsed
195 # Prepare the validation session directory. Syntax:
197 # PrepareSession <aliroot_tag>
199 # Returns 0 on success, nonzero on failure. Session tag returned on stdout.
200 function PrepareSession() {
201 local aliRootTag sessionTag sessionDir
204 sessionTag="${aliRootTag}_$(ndate)"
205 sessionDir="$sessionPrefix/$sessionTag"
207 # session directory already exists? abort
208 if [ -d "$sessionDir" ] ; then
209 pr "Session directory already exists, aborting"
210 return $errSessionDir
213 # create working directory
214 mkdir -p "$sessionDir"
215 if [ $? != 0 ] ; then
216 pr "Fatal: cannot create session directory $sessionDir"
217 return $errSessionDir
220 # aliroot version written to a file
221 echo "$aliRootTag" > "$sessionDir/aliroot-version.txt"
223 # benchmark script, configuration and file list
224 cp benchmark.sh benchmark.config files.list "$sessionDir/"
225 if [ $? != 0 ] ; then
226 pr "Cannot copy benchmark configuration and script to $sessionDir"
227 return $errSessionDir
230 # append local files to the configuration
231 for f in benchmark.config.d/*.config ; do
232 [ ! -e "$f" ] && continue
234 echo "### from $f ###"
237 ) >> "$sessionDir/benchmark.config"
240 # command-line options override the configuration
241 if [ $# != 0 ] ; then
242 pr "Note: the following command-line options will override the corresponding ones in the config files:"
244 echo "### from the command line ###"
245 while [ $# -gt 0 ] ; do
248 if [ "$extraName" != "$1" ] ; then
249 pr " * $extraName = $extraVal"
255 ) >> "$sessionDir/benchmark.config"
258 # success: return the session tag and move to the session directory
259 pr "*** Creating new working session: $sessionTag ***"
260 pr "*** Use this name for future session operations ***"
265 # Undo the previous action
266 function PrepareSession_Undo() {
267 rm -rf "$sessionPrefix/$1"
270 # Recycle the VM from an existing session
271 function RecycleSession() {
272 local sessionTag="$1"
273 local fromSessionTag="$2"
274 local fromSessionDir="$sessionPrefix/$fromSessionTag"
277 for f in 'instance-id.txt' 'instance-address.txt' 'key.pem' ; do
278 cp "$fromSessionDir/$f" "$f" > /dev/null 2>&1
279 if [ $? != 0 ] ; then
280 pr "Cannot copy $f from the source session dir $fromSessionDir"
281 return $errRecycleSession
288 # Move into the session tag directory. Usage:
290 # MoveToSessionDir <session_tag>
292 # Returns 0 on success, nonzero on error.
293 function MoveToSessionDir() {
294 originalWorkDir="$PWD"
295 cd "$sessionPrefix/$sessionTag" || return $errSessionDir
299 # Undo the previous action
300 function MoveToSessionDir_Undo() {
301 cd "$originalWorkDir"
304 # Load the benchmark configuration
305 function LoadConfig() {
306 source benchmark.config > /dev/null 2>&1
307 if [ $? != 0 ] ; then
308 pr "Cannot load benchmark configuration"
314 # Instantiate the validation VM
315 function InstantiateValidationVM() {
316 local sessionTag instanceId instanceIp ret raw
319 # check if we already have a vm
320 instanceId="$(cat instance-id.txt 2> /dev/null)"
321 if [ "$instanceId" != '' ] ; then
322 pr "Virtual machine $instanceId is already running"
323 return 0 # consider it a success
325 rm -f instance-id.txt instance-address.txt
328 # do we need to create a keypair?
329 if [ "$cloudKeyName" == '' ] ; then
330 pr "Note: temporary SSH keys will be created for this VM"
331 cloudKeyName="$sessionTag"
332 cloudKeyFile="$PWD/key.pem"
333 rm -f "$cloudKeyFile"
334 elif [ -e "$cloudKeyFile" ] ; then
335 # copy key to session dir
336 pr -n "Copying private key $cloudKeyFile to session directory..."
338 cp "$cloudKeyFile" 'key.pem' 2> /dev/null
339 if [ $? != 0 ] ; then
345 cloudKeyFile="$PWD/key.pem"
347 pr "Cannot find private key to access virtual machines: $cloudKeyFile"
351 # launch virtual machine and get its address
352 raw=$( RunVM "$cloudImageId" "$cloudProfile" "$cloudUserData" "$cloudKeyName" "$cloudKeyFile" )
355 if [ $ret == 0 ] ; then
356 instanceId=$( echo $raw | cut -d' ' -f1 )
357 instanceIp=$( echo $raw | cut -d' ' -f2 )
359 # write both parameters to files
360 echo $instanceId > 'instance-id.txt'
361 echo $instanceIp > 'instance-address.txt'
367 # Undo the previous action
368 function InstantiateValidationVM_Undo() {
371 if [ -e 'instance-id.txt' ] ; then
372 swallow euca-terminate-instances $(cat instance-id.txt)
373 if [ $? == 0 ] ; then
374 rm -f instance-id.txt instance-address.txt key.pem
379 # Generic SSH function to the VM
381 local instanceIp sshParams ret
382 instanceIp=$(cat instance-address.txt 2> /dev/null)
383 sshParams="-oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no -oPasswordAuthentication=no -i $PWD/key.pem"
385 if [ "$1" == '--rsync-cmd' ] ; then
387 echo ssh $sshParams "$@"
390 ssh $sshParams "$cloudUserName"@"$instanceIp" "$@"
396 # Opens a shell to the remote VM
403 # Checks status of the validation
405 local raw ret screen exitcode sessionTag
407 raw=$( VMSSH -t "screen -ls 2> /dev/null | grep -q .${screenPrefix}-${sessionTag} && echo -n 'screen_yes ' || echo -n 'screen_no ' ; cat $sessionTag/validation.done 2> /dev/null || echo 'not_done' ; true" 2> /dev/null )
408 raw=$( echo "$raw" | tr -cd '[:alnum:]_ ' ) # garbage removal
411 if [ "$ret" != 0 ] ; then
412 pr "Cannot get status"
413 return $errStatusUnavailable
419 if [ "$screen" == 'screen_yes' ] ; then
420 pr 'Status: validation still running'
421 return $errStatusRunning
423 if [ "$exitcode" == 'not_done' ] ; then
424 pr 'Status: validation not running'
425 return $errStatusNotRunning
426 elif [ "$exitcode" == 0 ] ; then
427 pr 'Status: validation completed successfully'
428 return $errStatusDoneOk
430 pr "Status: validation finished with errors (exitcode: $exitcode)"
431 return $errStatusDoneFail
437 # Wait for host to be ready
441 pr -n 'Waiting for the VM to accept SSH connections...'
443 while ! VMSSH -Tq true > /dev/null 2>&1 ; do
444 if [ $((++attempt)) -gt $maxSshConnectAttempts ] ; then
453 [ "$error" == 1 ] && return $errSshNotReady
459 function Validate() {
460 local instanceIp sshParams sessionTag
462 instanceIp=$(cat instance-address.txt 2> /dev/null)
463 sshParams="-oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no -oPasswordAuthentication=no -i $PWD/key.pem"
465 # create helper script to launch benchmark
466 cat > run-benchmark.sh <<_EoF_
471 env ALIROOT_VERSION=$(cat aliroot-version.txt) ./benchmark.sh run $sessionTag files.list benchmark.config
476 echo "*** Validation finished with exitcode \$ret ***"
478 read -p 'Press ENTER to dismiss: automatic dismiss in 60 seconds...' -t 60
480 chmod +x run-benchmark.sh
483 pr 'Transferring files to the VM'
484 rsync -av -e "$(VMSSH --rsync-cmd)" $PWD/ $cloudUserName@$instanceIp:$sessionTag/ || return $errLaunchValidation
486 # open a screen that does something; note that the command is not executed if
487 # the screen already exists, which is what we want
488 # note: sleep necessary to avoid "dead" screens
489 VMSSH -t "screen -wipe > /dev/null 2>&1 ; if screen -ls | grep -q ${screenPrefix}-${sessionTag} ; then ret=42 ; else screen -dmS ${screenPrefix}-${sessionTag} $sessionTag/run-benchmark.sh ; ret=0 ; sleep 3 ; fi ; exit \$ret"
493 if [ $ret == 42 ] ; then
494 pr 'Validation already running inside a screen.'
496 pr 'Validation launched inside a screen.'
500 pr 'Check the progress status with:'
501 pr " $Prog --session $sessionTag --status"
502 pr 'Attach to the screen for debug:'
503 pr " $Prog --session $sessionTag --attach"
504 pr 'Open a shell to the virtual machine:'
505 pr " $Prog --session $sessionTag --shell"
512 # Attach current validation screen, if possible
517 VMSSH -t "( screen -wipe ; screen -rx ${screenPrefix}-${sessionTag} ) > /dev/null 2>&1"
519 if [ $? != 0 ] ; then
520 pr "Cannot attach screen: check if validation is running with:"
521 pr " $Prog --session $sessionTag --status"
522 pr "or connect manually to the VM for debug:"
523 pr " $Prog --session $sessionTag --attach"
524 return $errAttachScreen
530 # Pick session interactively
531 function PickSession() {
532 local sessionTag sess listSessions mess
535 mkdir -p "$sessionPrefix"
538 [ ! -d "$sessionPrefix/$sess" ] && continue
539 listSessions+=( $sess )
540 done < <( cd $sessionPrefix ; ls -1t )
542 if [ ${#listSessions[@]} == 0 ] ; then
543 pr "No session available in session directory $sessionPrefix"
544 return $errPickSession
547 # print user message if provided
548 [ "$mess" != '' ] && pr "$mess"
550 pr 'Available sessions (most recent first):'
551 for ((i=0; i<${#listSessions[@]}; i++)) ; do
552 pr "$( printf " % 2d. ${listSessions[$i]}" $((i+1)) )"
558 if [ "$i" -lt 0 ] || [ "${listSessions[$i]}" == '' ] ; then
560 return $errPickSession
563 sess="${listSessions[$i]}"
564 pr "You chose session $sess"
570 function RunAction() {
572 type "$1" > /dev/null 2>&1
573 if [ $? == 0 ] ; then
574 #pr "--> $1 (wd: $PWD)"
577 #pr "<-- $1 (ret: $ret, wd: $PWD)"
585 pr "$Prog -- by Dario Berzano <dario.berzano@cern.ch>"
586 pr 'Controls the Release Validation workflow on the cloud for AliRoot.'
588 pr "Usage 1: $Prog [--prepare|--launch|--recycle] [--from-session] --aliroot <aliroot_tag> [-- arbitraryOpt1=value [arbitraryOpt2=value2...]]"
590 pr 'A new session is created to validate the specified AliRoot tag.'
592 pr ' --prepare : prepares the session directory containing the files needed'
593 pr ' for the validation'
594 pr ' --recycle : prepares a new session by recycling the head node from an'
595 pr ' existing one. Source session is specified via the'
596 pr ' --from-session switch or it can be interactively selected'
597 pr ' --launch : launches the full validation process: prepares session,'
598 pr ' runs the virtual machine, launches the validation program'
599 pr ' --aliroot : the AliRoot tag to validate, in the form "vAN-20140610"'
601 pr 'Arbitrary options (in the form variable=value) can be specified after the'
602 pr 'double dash and will override the corresponding options in any of the'
603 pr 'configuration files.'
605 pr "Usage 2: $Prog [--runvm|--validate|--shell|--status] --session <session_tag>"
607 pr 'Runs the validation step by step after a session is created with'
608 pr '--prepare, and runs other actions on a certain session.'
610 pr ' --session : session identifier, e.g. vAN-20140610_20140612-123047-utc:'
611 pr ' if no session is specified an interactive prompt is'
613 pr ' --runvm : instantiates the head node of the validation cluster on'
615 pr ' --validate : runs the validation script on the head node for the'
616 pr ' current session. Head node must be already up, or it'
617 pr ' should be created with --runvm. If validation is running'
618 pr ' already, connects to the existing validation shell'
619 pr ' --attach : attach a currently running validation screen; remember to'
620 pr ' detach with Ctrl+A+D (and *not* Ctrl-C)'
621 pr ' --shell : does SSH on the head node'
622 pr ' --status : returns the status of the validation'
624 pr 'Example 1: run the validation of AliRoot tag vAN-20140610:'
626 pr " $Prog --aliroot vAN-20140610 --launch"
628 pr 'Example 2: do the same thing step-by-step:'
630 pr " $Prog --aliroot vAN-20140610 --prepare"
632 pr " $Prog --validate"
640 local Args aliRootTag EnterShell Actions sessionTag fromSessionTag
643 # parse command line options
644 while [ $# -gt 0 ] ; do
664 Actions=( PrepareSession MoveToSessionDir LoadConfig InstantiateValidationVM WaitSsh Validate )
668 Actions=( PrepareSession MoveToSessionDir )
672 Actions=( PrepareSession MoveToSessionDir RecycleSession )
676 Actions=( MoveToSessionDir LoadConfig InstantiateValidationVM )
680 Actions=( MoveToSessionDir LoadConfig WaitSsh Validate )
684 Actions=( MoveToSessionDir LoadConfig WaitSsh Attach )
690 Actions=( MoveToSessionDir LoadConfig WaitSsh Shell )
694 Actions=( MoveToSessionDir LoadConfig WaitSsh Status )
709 pr "Invalid option: $1. Use --help for assistance."
710 return $errInvalidOpt
715 # check for the presence of the required tools in the $PATH
716 for T in euca-describe-instances euca-describe-regions euca-run-instances euca-create-keypair euca-delete-keypair rsync ; do
717 which "$T" > /dev/null 2>&1
718 if [ $? != 0 ] ; then
719 pr "Cannot find one of the required commands: $T"
720 return $errMissingCmd
724 # test EC2 credentials
725 # euca-describe-regions > /dev/null 2>&1
726 # if [ $? != 0 ] ; then
727 # pr 'Cannot authenticate to EC2.'
728 # pr 'Note: you must have at least the following variables properly set in your environment:'
729 # pr " * EC2_URL (current value: ${EC2_URL-<not set>})"
730 # pr " * EC2_ACCESS_KEY (current value: ${EC2_ACCESS_KEY-<not set>})"
731 # pr " * EC2_SECRET_KEY (current value: ${EC2_SECRET_KEY-<not set>})"
736 if [ ${#Actions[@]} == 0 ] ; then
737 pr 'Nothing to do. Use --help for assistance.'
738 return $errInvalidOpt
742 for ((i=0; i<${#Actions[@]}; i++)) ; do
746 if [ "$A" == 'PrepareSession' ] ; then
747 # special action returning the session tag
748 if [ "$aliRootTag" == '' ] ; then
749 pr 'Specify an AliRoot version with --aliroot <tag>'
750 return $errInvalidOpt
752 if [ "$sessionTag" != '' ] ; then
753 pr 'Cannot use --session with --prepare. Use --help for assistance.'
754 return $errInvalidOpt
756 sessionTag=$( RunAction "$A" "$aliRootTag" "$@" )
758 elif [ "$A" == 'RecycleSession' ] ; then
759 # special action requiring additional parameters
760 if [ "$fromSessionTag" == '' ] ; then
761 fromSessionTag=$( PickSession 'Select a source session to recycle.' )
763 [ $ret != 0 ] && break
765 RunAction "$A" "$sessionTag" "$fromSessionTag"
768 if [ "$sessionTag" == '' ] ; then
769 sessionTag=$( PickSession )
771 [ $ret != 0 ] && break
773 RunAction "$A" "$sessionTag"
777 # 100 to 140 --> not errors
778 ( [ $ret != 0 ] && ( [ $ret -ge 100 ] || [ $ret -le 140 ] ) ) && break
784 if [ $ret != 0 ] && ( [ $ret -ge 100 ] || [ $ret -le 140 ] ) ; then
785 for ((; i>=0; i--)) ; do
786 RunAction "${Actions[$i]}_Undo" "$sessionTag"