Added new parameters in the LDAP configuration (AliShuttleConfig) to check the
authorzampolli <zampolli@f7af4fe6-9843-0410-8265-dc069ae4e863>
Fri, 12 Dec 2008 09:45:49 +0000 (09:45 +0000)
committerzampolli <zampolli@f7af4fe6-9843-0410-8265-dc069ae4e863>
Fri, 12 Dec 2008 09:45:49 +0000 (09:45 +0000)
status of the free disk on the shuttle machine (AliShutleTrigger). Proper
actions taken (sending mail/terminating) in case of reaching the different
thresholds.

SHUTTLE/AliShuttleConfig.cxx
SHUTTLE/AliShuttleConfig.h
SHUTTLE/AliShuttleTrigger.cxx
SHUTTLE/AliShuttleTrigger.h

index d4ad90f..4eac7cc 100644 (file)
@@ -266,137 +266,6 @@ fIsValid(kFALSE)
        
        fIsValid = kTRUE;
 }
-
-//______________________________________________________________________________________________
-AliShuttleConfig::AliShuttleConfig(const AliShuttleConfig & other):
-       TObject(),
-       fConfigHost(other.fConfigHost),
-       fAlienPath(other.fAlienPath),
-       fDAQlbHost(other.fDAQlbHost),
-       fDAQlbPort(other.fDAQlbPort),
-       fDAQlbUser(other.fDAQlbUser),
-       fDAQlbPass(other.fDAQlbPass),
-       fDAQlbDB(other.fDAQlbDB),
-       fDAQlbTable(other.fDAQlbTable),
-       fShuttlelbTable(other.fShuttlelbTable),
-       fRunTypelbTable(other.fRunTypelbTable),
-       fPasswdFilePath(other.fPasswdFilePath),
-       fMaxRetries(other.fMaxRetries),
-       fPPTimeOut(other.fPPTimeOut),
-       fDCSTimeOut(other.fDCSTimeOut),
-       fDCSRetries(other.fDCSRetries),
-       fDCSQueryOffset(other.fDCSQueryOffset),
-       fDCSDelay(other.fDCSDelay),
-       fPPMaxMem(other.fPPMaxMem),
-       fMonitorHost(other.fMonitorHost),
-       fMonitorTable(other.fMonitorTable),
-       fTriggerWait(other.fTriggerWait),
-       fRunMode(other.fRunMode),
-       fDetectorMap(),
-       fDetectorList(other.fDetectorList),
-       fShuttleInstanceHost(other.fShuttleInstanceHost),
-       fProcessedDetectors(other.fProcessedDetectors),
-       fKeepDCSMap(other.fKeepDCSMap),
-       fKeepTempFolder(other.fKeepTempFolder),
-       fSendMail(other.fSendMail),
-       fProcessAll(other.fProcessAll),
-       fIsValid(other.fIsValid)
-{
-       //
-       // copy ctor
-       //
-       for (Int_t i = 0; i<3; i++){
-               fFXSHost[i]=other.fFXSHost[i];
-               fFXSPort[i]=other.fFXSPort[i];
-               fFXSUser[i]=other.fFXSUser[i];
-               fFXSPass[i]=other.fFXSPass[i];
-               fFXSBaseFolder[i]=other.fFXSBaseFolder[i];
-               fFXSdbHost[i]=other.fFXSdbHost[i];
-               fFXSdbPort[i]=other.fFXSdbPort[i];
-               fFXSdbUser[i]=other.fFXSdbUser[i];
-               fFXSdbPass[i]=other.fFXSdbPass[i];
-               fFXSdbName[i]=other.fFXSdbName[i];
-               fFXSdbTable[i]=other.fFXSdbTable[i];
-       }
-       for (Int_t i = 0; i<5; i++){
-               fAdmin[i] = new TObjArray();
-               fAdmin[i]->AddAt(other.fAdmin[i]->At(i),i);
-       }
-
-       TIter iter((other.fDetectorMap).GetTable());
-       TPair* aPair = 0;
-       
-       while ((aPair = (TPair*) iter.Next())) {
-               AliShuttleDetConfigHolder *holder =(AliShuttleDetConfigHolder *)aPair->Value();
-               TKey *key = (TKey*)aPair->Key();
-               fDetectorMap.Add(key,holder);
-       }
-
-
-} 
-//_____________________________________________________________________________________________                
-AliShuttleConfig& AliShuttleConfig::operator=(const AliShuttleConfig &other) 
-{
-       //
-       //assignment operator
-       //
-       this->fConfigHost=other.fConfigHost;
-       this->fDAQlbHost=other.fDAQlbHost;
-       this->fDAQlbPort=other.fDAQlbPort;
-       this->fDAQlbUser=other.fDAQlbUser;
-       this->fDAQlbPass=other.fDAQlbPass;
-       this->fDAQlbDB=other.fDAQlbDB;
-       this->fDAQlbTable=other.fDAQlbTable;
-       this->fShuttlelbTable=other.fShuttlelbTable;
-       this->fRunTypelbTable=other.fRunTypelbTable;
-       this->fPasswdFilePath=other.fPasswdFilePath;
-       this->fMaxRetries=other.fMaxRetries;
-       this->fPPTimeOut=other.fPPTimeOut;
-       this->fDCSTimeOut=other.fDCSTimeOut;
-       this->fDCSRetries=other.fDCSRetries;
-       this->fDCSQueryOffset=other.fDCSQueryOffset;
-        this->fDCSDelay=other.fDCSDelay;
-       this->fPPMaxMem=other.fPPMaxMem;
-       this->fMonitorHost=other.fMonitorHost;
-       this->fMonitorTable=other.fMonitorTable;
-       this->fTriggerWait=other.fTriggerWait;
-       this->fRunMode=other.fRunMode;
-       this->fDetectorList=other.fDetectorList;
-       this->fShuttleInstanceHost=other.fShuttleInstanceHost;
-       this->fProcessedDetectors=other.fProcessedDetectors;
-       this->fKeepDCSMap=other.fKeepDCSMap;
-       this->fKeepTempFolder=other.fKeepTempFolder;
-       this->fSendMail=other.fSendMail;
-       this->fProcessAll=other.fProcessAll;
-       this->fIsValid=other.fIsValid;
-       for (Int_t i = 0; i<3; i++){
-               this->fFXSHost[i]=other.fFXSHost[i];
-               this->fFXSPort[i]=other.fFXSPort[i];
-               this->fFXSUser[i]=other.fFXSUser[i];
-               this->fFXSPass[i]=other.fFXSPass[i];
-               this->fFXSBaseFolder[i]=other.fFXSBaseFolder[i];
-               this->fFXSdbHost[i]=other.fFXSdbHost[i];
-               this->fFXSdbPort[i]=other.fFXSdbPort[i];
-               this->fFXSdbUser[i]=other.fFXSdbUser[i];
-               this->fFXSdbPass[i]=other.fFXSdbPass[i];
-               this->fFXSdbName[i]=other.fFXSdbName[i];
-               this->fFXSdbTable[i]=other.fFXSdbTable[i];
-       }
-       for (Int_t i = 0; i<5; i++){
-               this->fAdmin[i] = new TObjArray();
-               this->fAdmin[i]->AddAt(other.fAdmin[i]->At(i),i);
-       }
-
-       TIter iter((other.fDetectorMap).GetTable());
-       TPair* aPair = 0;
-       
-       while ((aPair = (TPair*) iter.Next())) {
-               AliShuttleDetConfigHolder *holder =(AliShuttleDetConfigHolder *)aPair->Value();
-               TKey *key = (TKey*)aPair->Key();
-               this->fDetectorMap.Add(key,holder);
-       }
-       return *this;
-} 
 //______________________________________________________________________________________________
 void AliShuttleConfig::AliShuttleDCSConfigHolder::ExpandAndAdd(TObjArray* target, const char* entry)
 {
@@ -678,6 +547,9 @@ AliShuttleConfig::AliShuttleConfig(const char* host, Int_t port,
        fMonitorHost(""), 
        fMonitorTable(""), 
        fTriggerWait(3600),
+       fShuttleFileSystem("/"),
+       fFreeDiskWarningThreshold(20),
+       fFreeDiskFatalThreshold(10),
        fRunMode(kTest),
        fDetectorMap(), 
        fDetectorList(),
@@ -1182,11 +1054,31 @@ UInt_t AliShuttleConfig::SetGlobalConfig(TList* list)
 
        anAttribute = anEntry->GetAttribute("triggerWait"); // MAY
        if (!anAttribute) {
-               AliWarning(Form("triggerWait not set! default = ", fTriggerWait));
+               AliWarning(Form("triggerWait not set! default = %d", fTriggerWait));
        }
        tmpStr = anAttribute->GetValue();
        fTriggerWait = tmpStr.Atoi();
-       
+
+        anAttribute = anEntry->GetAttribute("ShuttleFileSystem"); 
+       if (!anAttribute) {
+               AliWarning(Form("ShuttleFileSystem not set! default = %s", fShuttleFileSystem.Data()));
+       }
+       fShuttleFileSystem = anAttribute->GetValue();
+
+       anAttribute = anEntry->GetAttribute("FreeDiskWarningThreshold"); // MAY
+       if (!anAttribute) {
+               AliWarning(Form("FreeDiskWarningThreshold not set! default = %d", fFreeDiskWarningThreshold));
+       }
+       tmpStr = anAttribute->GetValue();
+       fFreeDiskWarningThreshold = tmpStr.Atoi();
+
+       anAttribute = anEntry->GetAttribute("FreeDiskFatalThreshold"); // MAY
+       if (!anAttribute) {
+               AliWarning(Form("FreeDiskFatalThreshold not set! default = %d", fFreeDiskFatalThreshold));
+       }
+       tmpStr = anAttribute->GetValue();
+       fFreeDiskFatalThreshold = tmpStr.Atoi();                
+
        anAttribute = anEntry->GetAttribute("mode");
        if (!anAttribute) {
                AliWarning("Run mode not set! Running in test mode.");
index af9417f..0d4e394 100644 (file)
@@ -27,8 +27,6 @@ public:
                        const char* binddn = 0, const char* password = 0,
                        const char* basedn = "o=alice,dc=cern,dc=ch");
        virtual ~AliShuttleConfig();
-       AliShuttleConfig(const AliShuttleConfig & other);
-       AliShuttleConfig& operator= (const AliShuttleConfig& other);
 
        Bool_t IsValid() const {return fIsValid;};
 
@@ -79,6 +77,9 @@ public:
        const char* GetMonitorTable() const {return fMonitorTable.Data();}
 
        Int_t GetTriggerWait() const {return fTriggerWait;}
+       const char* GetShuttleFileSystem() const {return fShuttleFileSystem.Data();}
+       Int_t GetFreeDiskWarningThreshold() const {return fFreeDiskWarningThreshold;}
+       Int_t GetFreeDiskFatalThreshold() const {return fFreeDiskFatalThreshold;}
        
        RunMode GetRunMode() const {return fRunMode;}
        
@@ -183,6 +184,8 @@ private:
        };
 
 
+       AliShuttleConfig(const AliShuttleConfig& other);
+       AliShuttleConfig& operator= (const AliShuttleConfig& other);
        UInt_t SetGlobalConfig(TList* globalList);
        UInt_t SetSysConfig(TList* sysList);
        UInt_t SetPasswords();
@@ -230,6 +233,9 @@ private:
        TString fMonitorTable;          // Monalisa's SHUTTLE table name
        
        Int_t fTriggerWait;             // time to wait for DIM trigger before starting new collection 
+       TString fShuttleFileSystem;     // path of the Shuttle file system
+       Int_t fFreeDiskWarningThreshold;   // threshold for free space in the Shuttle file system to send a mail to the responsibles
+       Int_t fFreeDiskFatalThreshold;     // threshold for free space in the Shuttle file system to send a mail to the responsibles and terminate the Shuttle
        RunMode  fRunMode;              // Working mode (0=test; 1=prod)
 
        TMap fDetectorMap;              // Map of the detector-by-detector configuration
index 18730e0..149bef7 100644 (file)
 #include "AliShuttleTrigger.h"
 
 #include <TSystem.h>
+#include <TObjString.h>
 
 #include "AliLog.h"
 #include "AliShuttleConfig.h"
 #include "AliShuttle.h"
 #include "DATENotifier.h"
 
+#include <fstream>
+
 ClassImp(TerminateSignalHandler)
 ClassImp(AliShuttleTrigger)
 
@@ -134,7 +137,8 @@ AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config):
        fNotified(kFALSE), fTerminate(kFALSE),
        fMutex(), fCondition(&fMutex),
        fQuitSignalHandler(0),
-       fInterruptSignalHandler(0)
+       fInterruptSignalHandler(0),
+       fLastMailDiskSpace(0)
 {
        //
        // config - pointer to the AliShuttleConfig object which represents
@@ -277,5 +281,119 @@ Bool_t AliShuttleTrigger::Collect(Int_t run)
        // then it checks if the shuttle is still running by checking the monitoring functions of the shuttle
        //
 
-  return fShuttle->Collect(run);
+       // first checking disk space
+       Long_t id = 0;
+       Long_t bsize = 0;
+       Long_t blocks = 0;
+       Long_t bfree = 0;
+
+       gSystem->GetFsInfo(fConfig->GetShuttleFileSystem(), &id, &bsize, &blocks, &bfree);
+
+       AliInfo(Form("n. of free blocks = %d, total n. of blocks = %d",bfree,blocks));
+       Int_t spaceFree = (Int_t)(((Float_t)bfree/(Float_t)blocks)*100);
+
+       if (spaceFree < fConfig->GetFreeDiskWarningThreshold()) {
+               AliWarning(Form("************** Free space left = %d%%, below the Warning Threshold (%d%%)",spaceFree,fConfig->GetFreeDiskWarningThreshold()));
+               if (TMath::Abs(time(0) - fLastMailDiskSpace) >= 86400){   // 86400 = n. of seconds in 1 d
+                       SendMailDiskSpace(fConfig->GetFreeDiskWarningThreshold());
+                       fLastMailDiskSpace = time(0);  // resetting fLastMailDiskSpace to time(0) = now
+               }
+               if (spaceFree < fConfig->GetFreeDiskFatalThreshold()){
+                       AliError(Form("*************** Free space left = %d%%, below the Fatal Threshold (%d%%), terminating....",spaceFree,fConfig->GetFreeDiskFatalThreshold()));
+                       SendMailDiskSpace(fConfig->GetFreeDiskFatalThreshold());
+                       fTerminate = kTRUE; // terminating....
+               }
+       }       
+
+       if (fTerminate) {
+               return kFALSE;
+       }
+
+       return fShuttle->Collect(run);
+}
+//______________________________________________________________________________________________
+Bool_t AliShuttleTrigger::SendMailDiskSpace(Short_t percentage)
+{
+       //
+       // sends a mail to the shuttle experts in case of free disk space < theshold
+       //
+       
+               
+       AliInfo("******************* Sending the Mail!! *********************");
+       if (!fConfig->SendMail()) 
+               return kTRUE;
+
+       Int_t runMode = (Int_t)fConfig->GetRunMode();
+       TString tmpStr;
+       if (runMode == 0) tmpStr = " Nightly Test:";
+       else tmpStr = " Data Taking:"; 
+       void* dir = gSystem->OpenDirectory(fShuttle->GetShuttleLogDir());
+       if (dir == NULL)
+       {
+               if (gSystem->mkdir(fShuttle->GetShuttleLogDir(), kTRUE))
+               {
+                       AliWarning(Form("SendMail - Can't open directory <%s>", fShuttle->GetShuttleLogDir()));
+                       return kFALSE;
+               }
+
+       } else {
+               gSystem->FreeDirectory(dir);
+       }
+
+       // SHUTTLE responsibles in to
+       TString to="";
+       TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal));
+       TObjString *anAdmin=0;
+       while ((anAdmin = (TObjString*) iterAdmins.Next()))
+       {
+               to += Form("%s,", anAdmin->GetName());
+       }
+       if (to.Length() > 0)
+         to.Remove(to.Length()-1);
+       AliDebug(2, Form("to: %s",to.Data()));
+
+       // mail body 
+       TString bodyFileName;
+       bodyFileName.Form("%s/mail.body", fShuttle->GetShuttleLogDir());
+       gSystem->ExpandPathName(bodyFileName);
+
+       ofstream mailBody;
+       mailBody.open(bodyFileName, ofstream::out);
+
+       if (!mailBody.is_open())
+       {
+               AliWarning(Form("Could not open mail body file %s", bodyFileName.Data()));
+               return kFALSE;
+       }
+
+       TString subject;
+       TString body;
+
+       subject = Form("%s CRITICAL Disk Space usage exceeds %d%c!",
+                      tmpStr.Data(),percentage,'%');
+       AliDebug(2, Form("subject: %s", subject.Data()));
+       Int_t percentage_used = 100 - percentage;       
+
+       body = "Dear SHUTTLE experts, \n\n";
+       body += "The usage of the disk space on the shuttle machine has overcome \n"; 
+       body += Form("the threshold of %d%%. \n \n",percentage_used);
+       body += "Please check! \n \n";
+       body += "Please do not answer this message directly, it is automatically generated.\n\n";
+       body += "Greetings,\n\n \t\t\tthe SHUTTLE\n";
+
+       AliDebug(2, Form("Body : %s", body.Data()));
+
+       mailBody << body.Data();
+       mailBody.close();
+
+       // send mail!
+       TString mailCommand = Form("mail -s \"%s\" %s < %s",
+                                               subject.Data(),
+                                               to.Data(),
+                                               bodyFileName.Data());
+       AliDebug(2, Form("mail command: %s", mailCommand.Data()));
+
+       Bool_t result = gSystem->Exec(mailCommand.Data());
+
+       return result == 0;
 }
index 827f210..4a1fb66 100644 (file)
@@ -62,6 +62,8 @@ private:
        AliShuttleTrigger(const AliShuttleTrigger& other);
        AliShuttleTrigger& operator= (const AliShuttleTrigger& other);
 
+       Bool_t SendMailDiskSpace(Short_t percentage);
+
        const AliShuttleConfig* fConfig;
 
        AliShuttle* fShuttle;           // Pointer to the actual Shuttle instance
@@ -75,6 +77,8 @@ private:
        TerminateSignalHandler* fQuitSignalHandler;             // Quit signal
        TerminateSignalHandler* fInterruptSignalHandler;        // Interrupt signal
 
+       time_t fLastMailDiskSpace;       // timestamp when the last mail was sent
+
 
        ClassDef(AliShuttleTrigger, 0)
 };