Adding the option to zero shared entries below threshold
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttleTrigger.cxx
index 8599c4b..9105d35 100644 (file)
@@ -15,6 +15,9 @@
 
 /*
  $Log$
+ Revision 1.15  2007/12/10 18:29:23  acolla
+ Some log added to the listen mode
+
  Revision 1.14  2007/12/07 19:14:36  acolla
  in AliShuttleTrigger:
 
 #include "AliShuttleTrigger.h"
 
 #include <TSystem.h>
+#include <TObjString.h>
 
 #include "AliLog.h"
 #include "AliShuttleConfig.h"
 #include "AliShuttle.h"
 #include "DATENotifier.h"
 
+#include <fstream>
+
 ClassImp(TerminateSignalHandler)
 ClassImp(AliShuttleTrigger)
 
@@ -126,13 +132,13 @@ Bool_t TerminateSignalHandler::Notify()
 }
 
 //______________________________________________________________________________________________
-AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config,
-               UInt_t timeout, Int_t retries):
+AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config):
        fConfig(config), fShuttle(NULL),
        fNotified(kFALSE), fTerminate(kFALSE),
        fMutex(), fCondition(&fMutex),
        fQuitSignalHandler(0),
-       fInterruptSignalHandler(0)
+       fInterruptSignalHandler(0),
+       fLastMailDiskSpace(0)
 {
        //
        // config - pointer to the AliShuttleConfig object which represents
@@ -142,10 +148,12 @@ AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config,
        //
 
        if (!fConfig->IsValid()) AliFatal("********** !!!!! Invalid configuration !!!!! **********");
+       UInt_t timeout = fConfig->GetDCSTimeOut();
+       Int_t retries = fConfig->GetDCSRetries();
        fShuttle = new AliShuttle(config, timeout, retries);
 
-       TerminateSignalHandler* fQuitSignalHandler = new TerminateSignalHandler(this, kSigQuit);
-       TerminateSignalHandler* fInterruptSignalHandler = new TerminateSignalHandler(this, kSigInterrupt);
+       fQuitSignalHandler = new TerminateSignalHandler(this, kSigQuit);
+       fInterruptSignalHandler = new TerminateSignalHandler(this, kSigInterrupt);
 
        gSystem->AddSignalHandler(fQuitSignalHandler);
        gSystem->AddSignalHandler(fInterruptSignalHandler);
@@ -199,6 +207,24 @@ void AliShuttleTrigger::Terminate() {
 }
 
 //______________________________________________________________________________________________
+void AliShuttleTrigger::CheckTerminate() 
+{
+       // 
+       // Checks if the Shuttle got an external terminate request by a created file 
+       // This is an alternative to the signal which causes problems with the API libraries
+       //
+
+       if (strlen(fConfig->GetTerminateFilePath()) == 0)
+               return;
+
+       if (gSystem->AccessPathName(fConfig->GetTerminateFilePath()) == kFALSE)
+       {
+               AliInfo("Terminate file exists. Terminating Shuttle...");
+               fTerminate = kTRUE;
+       }
+}
+
+//______________________________________________________________________________________________
 void AliShuttleTrigger::Run() {
        //
        // AliShuttleTrigger main loop for asynchronized (listen) mode.
@@ -208,7 +234,7 @@ void AliShuttleTrigger::Run() {
 
        fTerminate = kFALSE;
 
-       DATENotifier* notifier = new DATENotifier(this, "/DATE/LOGBOOK/UPDATE");
+       DATENotifier* notifier = new DATENotifier(this, "/LOGBOOK/SUBSCRIBE/ECS_EOR");
 
        Int_t nTry=0; 
        Int_t nMaxTry = fConfig->GetMaxRetries()+1;
@@ -222,6 +248,7 @@ void AliShuttleTrigger::Run() {
 
                while (!(fNotified || fTerminate)) {
                        received=fCondition.TimedWaitRelative(1000*fConfig->GetTriggerWait());
+                       CheckTerminate();
                        if (received==1) break; // 1 = timeout
                }
 
@@ -246,10 +273,11 @@ void AliShuttleTrigger::Run() {
                        break;
                }
                
+               nTry++;
+               AliInfo(Form("Received %d triggers so far", nTry));
+               
                if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
                {
-                       nTry++;
-                       AliInfo(Form("Received %d triggers so far", nTry));
                        if(nTry>=nMaxTry)
                        {
                                AliInfo(Form("Collect() ran more than %d times -> Exiting!", 
@@ -259,6 +287,7 @@ void AliShuttleTrigger::Run() {
                }
        
                Collect();
+               CheckTerminate();
        }
 
        delete notifier;
@@ -272,5 +301,119 @@ Bool_t AliShuttleTrigger::Collect(Int_t run)
        // then it checks if the shuttle is still running by checking the monitoring functions of the shuttle
        //
 
-  return fShuttle->Collect(run);
+       // first checking disk space
+       Long_t id = 0;
+       Long_t bsize = 0;
+       Long_t blocks = 0;
+       Long_t bfree = 0;
+
+       gSystem->GetFsInfo(fConfig->GetShuttleFileSystem(), &id, &bsize, &blocks, &bfree);
+
+       AliInfo(Form("n. of free blocks = %ld, total n. of blocks = %ld",bfree,blocks));
+       Int_t spaceFree = (Int_t)(((Float_t)bfree/(Float_t)blocks)*100);
+
+       if (spaceFree < fConfig->GetFreeDiskWarningThreshold()) {
+               AliWarning(Form("************** Free space left = %d%%, below the Warning Threshold (%d%%)",spaceFree,fConfig->GetFreeDiskWarningThreshold()));
+               if (TMath::Abs(time(0) - fLastMailDiskSpace) >= 86400){   // 86400 = n. of seconds in 1 d
+                       SendMailDiskSpace(fConfig->GetFreeDiskWarningThreshold());
+                       fLastMailDiskSpace = time(0);  // resetting fLastMailDiskSpace to time(0) = now
+               }
+               if (spaceFree < fConfig->GetFreeDiskFatalThreshold()){
+                       AliError(Form("*************** Free space left = %d%%, below the Fatal Threshold (%d%%), terminating....",spaceFree,fConfig->GetFreeDiskFatalThreshold()));
+                       SendMailDiskSpace(fConfig->GetFreeDiskFatalThreshold());
+                       fTerminate = kTRUE; // terminating....
+               }
+       }       
+
+       if (fTerminate) {
+               return kFALSE;
+       }
+
+       return fShuttle->Collect(run);
+}
+//______________________________________________________________________________________________
+Bool_t AliShuttleTrigger::SendMailDiskSpace(Short_t percentage)
+{
+       //
+       // sends a mail to the shuttle experts in case of free disk space < theshold
+       //
+       
+               
+       AliInfo("******************* Sending the Mail!! *********************");
+       if (!fConfig->SendMail()) 
+               return kTRUE;
+
+       Int_t runMode = (Int_t)fConfig->GetRunMode();
+       TString tmpStr;
+       if (runMode == 0) tmpStr = " Nightly Test:";
+       else tmpStr = " Data Taking:"; 
+       void* dir = gSystem->OpenDirectory(fShuttle->GetShuttleLogDir());
+       if (dir == NULL)
+       {
+               if (gSystem->mkdir(fShuttle->GetShuttleLogDir(), kTRUE))
+               {
+                       AliWarning(Form("SendMail - Can't open directory <%s>", fShuttle->GetShuttleLogDir()));
+                       return kFALSE;
+               }
+
+       } else {
+               gSystem->FreeDirectory(dir);
+       }
+
+       // SHUTTLE responsibles in to
+       TString to="";
+       TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
+       TObjString *anAdmin=0;
+       while ((anAdmin = (TObjString*) iterAdmins.Next()))
+       {
+               to += Form("%s,", anAdmin->GetName());
+       }
+       if (to.Length() > 0)
+         to.Remove(to.Length()-1);
+       AliDebug(2, Form("to: %s",to.Data()));
+
+       // mail body 
+       TString bodyFileName;
+       bodyFileName.Form("%s/mail.body", fShuttle->GetShuttleLogDir());
+       gSystem->ExpandPathName(bodyFileName);
+
+       ofstream mailBody;
+       mailBody.open(bodyFileName, ofstream::out);
+
+       if (!mailBody.is_open())
+       {
+               AliWarning(Form("Could not open mail body file %s", bodyFileName.Data()));
+               return kFALSE;
+       }
+
+       TString subject;
+       TString body;
+
+       Int_t percentage_used = 100 - percentage;       
+       subject = Form("%s CRITICAL Disk Space usage exceeds %d%c!",
+                      tmpStr.Data(),percentage_used,'%');
+       AliDebug(2, Form("subject: %s", subject.Data()));
+
+       body = "Dear SHUTTLE experts, \n\n";
+       body += "The usage of the disk space on the shuttle machine has overcome \n"; 
+       body += Form("the threshold of %d%%. \n \n",percentage_used);
+       body += "Please check! \n \n";
+       body += "Please do not answer this message directly, it is automatically generated.\n\n";
+       body += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
+
+       AliDebug(2, Form("Body : %s", body.Data()));
+
+       mailBody << body.Data();
+       mailBody.close();
+
+       // send mail!
+       TString mailCommand = Form("mail -s \"%s\" %s < %s",
+                                               subject.Data(),
+                                               to.Data(),
+                                               bodyFileName.Data());
+       AliDebug(2, Form("mail command: %s", mailCommand.Data()));
+
+       Bool_t result = gSystem->Exec(mailCommand.Data());
+
+       return result == 0;
 }