From fb2975a2a0a25aae3e528d08ac2d46e12d505d8a Mon Sep 17 00:00:00 2001 From: zampolli Date: Fri, 12 Dec 2008 09:45:49 +0000 Subject: [PATCH] Added new parameters in the LDAP configuration (AliShuttleConfig) to check the status of the free disk on the shuttle machine (AliShutleTrigger). Proper actions taken (sending mail/terminating) in case of reaching the different thresholds. --- SHUTTLE/AliShuttleConfig.cxx | 158 ++++++---------------------------- SHUTTLE/AliShuttleConfig.h | 10 ++- SHUTTLE/AliShuttleTrigger.cxx | 122 +++++++++++++++++++++++++- SHUTTLE/AliShuttleTrigger.h | 4 + 4 files changed, 157 insertions(+), 137 deletions(-) diff --git a/SHUTTLE/AliShuttleConfig.cxx b/SHUTTLE/AliShuttleConfig.cxx index d4ad90f8bf6..4eac7ccbf33 100644 --- a/SHUTTLE/AliShuttleConfig.cxx +++ b/SHUTTLE/AliShuttleConfig.cxx @@ -266,137 +266,6 @@ fIsValid(kFALSE) fIsValid = kTRUE; } - -//______________________________________________________________________________________________ -AliShuttleConfig::AliShuttleConfig(const AliShuttleConfig & other): - TObject(), - fConfigHost(other.fConfigHost), - fAlienPath(other.fAlienPath), - fDAQlbHost(other.fDAQlbHost), - fDAQlbPort(other.fDAQlbPort), - fDAQlbUser(other.fDAQlbUser), - fDAQlbPass(other.fDAQlbPass), - fDAQlbDB(other.fDAQlbDB), - fDAQlbTable(other.fDAQlbTable), - fShuttlelbTable(other.fShuttlelbTable), - fRunTypelbTable(other.fRunTypelbTable), - fPasswdFilePath(other.fPasswdFilePath), - fMaxRetries(other.fMaxRetries), - fPPTimeOut(other.fPPTimeOut), - fDCSTimeOut(other.fDCSTimeOut), - fDCSRetries(other.fDCSRetries), - fDCSQueryOffset(other.fDCSQueryOffset), - fDCSDelay(other.fDCSDelay), - fPPMaxMem(other.fPPMaxMem), - fMonitorHost(other.fMonitorHost), - fMonitorTable(other.fMonitorTable), - fTriggerWait(other.fTriggerWait), - fRunMode(other.fRunMode), - fDetectorMap(), - fDetectorList(other.fDetectorList), - fShuttleInstanceHost(other.fShuttleInstanceHost), - fProcessedDetectors(other.fProcessedDetectors), - fKeepDCSMap(other.fKeepDCSMap), - fKeepTempFolder(other.fKeepTempFolder), - fSendMail(other.fSendMail), - fProcessAll(other.fProcessAll), - fIsValid(other.fIsValid) -{ - // - // copy ctor - // - for (Int_t i = 0; i<3; i++){ - fFXSHost[i]=other.fFXSHost[i]; - fFXSPort[i]=other.fFXSPort[i]; - fFXSUser[i]=other.fFXSUser[i]; - fFXSPass[i]=other.fFXSPass[i]; - fFXSBaseFolder[i]=other.fFXSBaseFolder[i]; - fFXSdbHost[i]=other.fFXSdbHost[i]; - fFXSdbPort[i]=other.fFXSdbPort[i]; - fFXSdbUser[i]=other.fFXSdbUser[i]; - fFXSdbPass[i]=other.fFXSdbPass[i]; - fFXSdbName[i]=other.fFXSdbName[i]; - fFXSdbTable[i]=other.fFXSdbTable[i]; - } - for (Int_t i = 0; i<5; i++){ - fAdmin[i] = new TObjArray(); - fAdmin[i]->AddAt(other.fAdmin[i]->At(i),i); - } - - TIter iter((other.fDetectorMap).GetTable()); - TPair* aPair = 0; - - while ((aPair = (TPair*) iter.Next())) { - AliShuttleDetConfigHolder *holder =(AliShuttleDetConfigHolder *)aPair->Value(); - TKey *key = (TKey*)aPair->Key(); - fDetectorMap.Add(key,holder); - } - - -} -//_____________________________________________________________________________________________ -AliShuttleConfig& AliShuttleConfig::operator=(const AliShuttleConfig &other) -{ - // - //assignment operator - // - this->fConfigHost=other.fConfigHost; - this->fDAQlbHost=other.fDAQlbHost; - this->fDAQlbPort=other.fDAQlbPort; - this->fDAQlbUser=other.fDAQlbUser; - this->fDAQlbPass=other.fDAQlbPass; - this->fDAQlbDB=other.fDAQlbDB; - this->fDAQlbTable=other.fDAQlbTable; - this->fShuttlelbTable=other.fShuttlelbTable; - this->fRunTypelbTable=other.fRunTypelbTable; - this->fPasswdFilePath=other.fPasswdFilePath; - this->fMaxRetries=other.fMaxRetries; - this->fPPTimeOut=other.fPPTimeOut; - this->fDCSTimeOut=other.fDCSTimeOut; - this->fDCSRetries=other.fDCSRetries; - this->fDCSQueryOffset=other.fDCSQueryOffset; - this->fDCSDelay=other.fDCSDelay; - this->fPPMaxMem=other.fPPMaxMem; - this->fMonitorHost=other.fMonitorHost; - this->fMonitorTable=other.fMonitorTable; - this->fTriggerWait=other.fTriggerWait; - this->fRunMode=other.fRunMode; - this->fDetectorList=other.fDetectorList; - this->fShuttleInstanceHost=other.fShuttleInstanceHost; - this->fProcessedDetectors=other.fProcessedDetectors; - this->fKeepDCSMap=other.fKeepDCSMap; - this->fKeepTempFolder=other.fKeepTempFolder; - this->fSendMail=other.fSendMail; - this->fProcessAll=other.fProcessAll; - this->fIsValid=other.fIsValid; - for (Int_t i = 0; i<3; i++){ - this->fFXSHost[i]=other.fFXSHost[i]; - this->fFXSPort[i]=other.fFXSPort[i]; - this->fFXSUser[i]=other.fFXSUser[i]; - this->fFXSPass[i]=other.fFXSPass[i]; - this->fFXSBaseFolder[i]=other.fFXSBaseFolder[i]; - this->fFXSdbHost[i]=other.fFXSdbHost[i]; - this->fFXSdbPort[i]=other.fFXSdbPort[i]; - this->fFXSdbUser[i]=other.fFXSdbUser[i]; - this->fFXSdbPass[i]=other.fFXSdbPass[i]; - this->fFXSdbName[i]=other.fFXSdbName[i]; - this->fFXSdbTable[i]=other.fFXSdbTable[i]; - } - for (Int_t i = 0; i<5; i++){ - this->fAdmin[i] = new TObjArray(); - this->fAdmin[i]->AddAt(other.fAdmin[i]->At(i),i); - } - - TIter iter((other.fDetectorMap).GetTable()); - TPair* aPair = 0; - - while ((aPair = (TPair*) iter.Next())) { - AliShuttleDetConfigHolder *holder =(AliShuttleDetConfigHolder *)aPair->Value(); - TKey *key = (TKey*)aPair->Key(); - this->fDetectorMap.Add(key,holder); - } - return *this; -} //______________________________________________________________________________________________ void AliShuttleConfig::AliShuttleDCSConfigHolder::ExpandAndAdd(TObjArray* target, const char* entry) { @@ -678,6 +547,9 @@ AliShuttleConfig::AliShuttleConfig(const char* host, Int_t port, fMonitorHost(""), fMonitorTable(""), fTriggerWait(3600), + fShuttleFileSystem("/"), + fFreeDiskWarningThreshold(20), + fFreeDiskFatalThreshold(10), fRunMode(kTest), fDetectorMap(), fDetectorList(), @@ -1182,11 +1054,31 @@ UInt_t AliShuttleConfig::SetGlobalConfig(TList* list) anAttribute = anEntry->GetAttribute("triggerWait"); // MAY if (!anAttribute) { - AliWarning(Form("triggerWait not set! default = ", fTriggerWait)); + AliWarning(Form("triggerWait not set! default = %d", fTriggerWait)); } tmpStr = anAttribute->GetValue(); fTriggerWait = tmpStr.Atoi(); - + + anAttribute = anEntry->GetAttribute("ShuttleFileSystem"); + if (!anAttribute) { + AliWarning(Form("ShuttleFileSystem not set! default = %s", fShuttleFileSystem.Data())); + } + fShuttleFileSystem = anAttribute->GetValue(); + + anAttribute = anEntry->GetAttribute("FreeDiskWarningThreshold"); // MAY + if (!anAttribute) { + AliWarning(Form("FreeDiskWarningThreshold not set! default = %d", fFreeDiskWarningThreshold)); + } + tmpStr = anAttribute->GetValue(); + fFreeDiskWarningThreshold = tmpStr.Atoi(); + + anAttribute = anEntry->GetAttribute("FreeDiskFatalThreshold"); // MAY + if (!anAttribute) { + AliWarning(Form("FreeDiskFatalThreshold not set! default = %d", fFreeDiskFatalThreshold)); + } + tmpStr = anAttribute->GetValue(); + fFreeDiskFatalThreshold = tmpStr.Atoi(); + anAttribute = anEntry->GetAttribute("mode"); if (!anAttribute) { AliWarning("Run mode not set! Running in test mode."); diff --git a/SHUTTLE/AliShuttleConfig.h b/SHUTTLE/AliShuttleConfig.h index af9417f7e6c..0d4e39400f0 100644 --- a/SHUTTLE/AliShuttleConfig.h +++ b/SHUTTLE/AliShuttleConfig.h @@ -27,8 +27,6 @@ public: const char* binddn = 0, const char* password = 0, const char* basedn = "o=alice,dc=cern,dc=ch"); virtual ~AliShuttleConfig(); - AliShuttleConfig(const AliShuttleConfig & other); - AliShuttleConfig& operator= (const AliShuttleConfig& other); Bool_t IsValid() const {return fIsValid;}; @@ -79,6 +77,9 @@ public: const char* GetMonitorTable() const {return fMonitorTable.Data();} Int_t GetTriggerWait() const {return fTriggerWait;} + const char* GetShuttleFileSystem() const {return fShuttleFileSystem.Data();} + Int_t GetFreeDiskWarningThreshold() const {return fFreeDiskWarningThreshold;} + Int_t GetFreeDiskFatalThreshold() const {return fFreeDiskFatalThreshold;} RunMode GetRunMode() const {return fRunMode;} @@ -183,6 +184,8 @@ private: }; + AliShuttleConfig(const AliShuttleConfig& other); + AliShuttleConfig& operator= (const AliShuttleConfig& other); UInt_t SetGlobalConfig(TList* globalList); UInt_t SetSysConfig(TList* sysList); UInt_t SetPasswords(); @@ -230,6 +233,9 @@ private: TString fMonitorTable; // Monalisa's SHUTTLE table name Int_t fTriggerWait; // time to wait for DIM trigger before starting new collection + TString fShuttleFileSystem; // path of the Shuttle file system + Int_t fFreeDiskWarningThreshold; // threshold for free space in the Shuttle file system to send a mail to the responsibles + Int_t fFreeDiskFatalThreshold; // threshold for free space in the Shuttle file system to send a mail to the responsibles and terminate the Shuttle RunMode fRunMode; // Working mode (0=test; 1=prod) TMap fDetectorMap; // Map of the detector-by-detector configuration diff --git a/SHUTTLE/AliShuttleTrigger.cxx b/SHUTTLE/AliShuttleTrigger.cxx index 18730e00f19..149bef760d0 100644 --- a/SHUTTLE/AliShuttleTrigger.cxx +++ b/SHUTTLE/AliShuttleTrigger.cxx @@ -108,12 +108,15 @@ #include "AliShuttleTrigger.h" #include +#include #include "AliLog.h" #include "AliShuttleConfig.h" #include "AliShuttle.h" #include "DATENotifier.h" +#include + ClassImp(TerminateSignalHandler) ClassImp(AliShuttleTrigger) @@ -134,7 +137,8 @@ AliShuttleTrigger::AliShuttleTrigger(const AliShuttleConfig* config): fNotified(kFALSE), fTerminate(kFALSE), fMutex(), fCondition(&fMutex), fQuitSignalHandler(0), - fInterruptSignalHandler(0) + fInterruptSignalHandler(0), + fLastMailDiskSpace(0) { // // config - pointer to the AliShuttleConfig object which represents @@ -277,5 +281,119 @@ Bool_t AliShuttleTrigger::Collect(Int_t run) // then it checks if the shuttle is still running by checking the monitoring functions of the shuttle // - return fShuttle->Collect(run); + // first checking disk space + Long_t id = 0; + Long_t bsize = 0; + Long_t blocks = 0; + Long_t bfree = 0; + + gSystem->GetFsInfo(fConfig->GetShuttleFileSystem(), &id, &bsize, &blocks, &bfree); + + AliInfo(Form("n. of free blocks = %d, total n. of blocks = %d",bfree,blocks)); + Int_t spaceFree = (Int_t)(((Float_t)bfree/(Float_t)blocks)*100); + + if (spaceFree < fConfig->GetFreeDiskWarningThreshold()) { + AliWarning(Form("************** Free space left = %d%%, below the Warning Threshold (%d%%)",spaceFree,fConfig->GetFreeDiskWarningThreshold())); + if (TMath::Abs(time(0) - fLastMailDiskSpace) >= 86400){ // 86400 = n. of seconds in 1 d + SendMailDiskSpace(fConfig->GetFreeDiskWarningThreshold()); + fLastMailDiskSpace = time(0); // resetting fLastMailDiskSpace to time(0) = now + } + if (spaceFree < fConfig->GetFreeDiskFatalThreshold()){ + AliError(Form("*************** Free space left = %d%%, below the Fatal Threshold (%d%%), terminating....",spaceFree,fConfig->GetFreeDiskFatalThreshold())); + SendMailDiskSpace(fConfig->GetFreeDiskFatalThreshold()); + fTerminate = kTRUE; // terminating.... + } + } + + if (fTerminate) { + return kFALSE; + } + + return fShuttle->Collect(run); +} +//______________________________________________________________________________________________ +Bool_t AliShuttleTrigger::SendMailDiskSpace(Short_t percentage) +{ + // + // sends a mail to the shuttle experts in case of free disk space < theshold + // + + + AliInfo("******************* Sending the Mail!! *********************"); + if (!fConfig->SendMail()) + return kTRUE; + + Int_t runMode = (Int_t)fConfig->GetRunMode(); + TString tmpStr; + if (runMode == 0) tmpStr = " Nightly Test:"; + else tmpStr = " Data Taking:"; + void* dir = gSystem->OpenDirectory(fShuttle->GetShuttleLogDir()); + if (dir == NULL) + { + if (gSystem->mkdir(fShuttle->GetShuttleLogDir(), kTRUE)) + { + AliWarning(Form("SendMail - Can't open directory <%s>", fShuttle->GetShuttleLogDir())); + return kFALSE; + } + + } else { + gSystem->FreeDirectory(dir); + } + + // SHUTTLE responsibles in to + TString to=""; + TIter iterAdmins(fConfig->GetAdmins(AliShuttleConfig::kGlobal)); + TObjString *anAdmin=0; + while ((anAdmin = (TObjString*) iterAdmins.Next())) + { + to += Form("%s,", anAdmin->GetName()); + } + if (to.Length() > 0) + to.Remove(to.Length()-1); + AliDebug(2, Form("to: %s",to.Data())); + + // mail body + TString bodyFileName; + bodyFileName.Form("%s/mail.body", fShuttle->GetShuttleLogDir()); + gSystem->ExpandPathName(bodyFileName); + + ofstream mailBody; + mailBody.open(bodyFileName, ofstream::out); + + if (!mailBody.is_open()) + { + AliWarning(Form("Could not open mail body file %s", bodyFileName.Data())); + return kFALSE; + } + + TString subject; + TString body; + + subject = Form("%s CRITICAL Disk Space usage exceeds %d%c!", + tmpStr.Data(),percentage,'%'); + AliDebug(2, Form("subject: %s", subject.Data())); + Int_t percentage_used = 100 - percentage; + + body = "Dear SHUTTLE experts, \n\n"; + body += "The usage of the disk space on the shuttle machine has overcome \n"; + body += Form("the threshold of %d%%. \n \n",percentage_used); + body += "Please check! \n \n"; + body += "Please do not answer this message directly, it is automatically generated.\n\n"; + body += "Greetings,\n\n \t\t\tthe SHUTTLE\n"; + + AliDebug(2, Form("Body : %s", body.Data())); + + mailBody << body.Data(); + mailBody.close(); + + // send mail! + TString mailCommand = Form("mail -s \"%s\" %s < %s", + subject.Data(), + to.Data(), + bodyFileName.Data()); + AliDebug(2, Form("mail command: %s", mailCommand.Data())); + + Bool_t result = gSystem->Exec(mailCommand.Data()); + + return result == 0; } diff --git a/SHUTTLE/AliShuttleTrigger.h b/SHUTTLE/AliShuttleTrigger.h index 827f2104c83..4a1fb66d3f3 100644 --- a/SHUTTLE/AliShuttleTrigger.h +++ b/SHUTTLE/AliShuttleTrigger.h @@ -62,6 +62,8 @@ private: AliShuttleTrigger(const AliShuttleTrigger& other); AliShuttleTrigger& operator= (const AliShuttleTrigger& other); + Bool_t SendMailDiskSpace(Short_t percentage); + const AliShuttleConfig* fConfig; AliShuttle* fShuttle; // Pointer to the actual Shuttle instance @@ -75,6 +77,8 @@ private: TerminateSignalHandler* fQuitSignalHandler; // Quit signal TerminateSignalHandler* fInterruptSignalHandler; // Interrupt signal + time_t fLastMailDiskSpace; // timestamp when the last mail was sent + ClassDef(AliShuttleTrigger, 0) }; -- 2.39.3