implementation of sending mail to subdetector expert in case the preprocessor fails.
authoracolla <acolla@f7af4fe6-9843-0410-8265-dc069ae4e863>
Mon, 15 Jan 2007 18:27:11 +0000 (18:27 +0000)
committeracolla <acolla@f7af4fe6-9843-0410-8265-dc069ae4e863>
Mon, 15 Jan 2007 18:27:11 +0000 (18:27 +0000)
shuttle.schema updated with expert's email entry

SHUTTLE/AliShuttle.cxx
SHUTTLE/AliShuttle.h
SHUTTLE/AliShuttleConfig.cxx
SHUTTLE/AliShuttleConfig.h

index bd512ca..13474e7 100644 (file)
@@ -448,6 +448,16 @@ Bool_t AliShuttle::ContinueProcessing()
 // checks if the processing should be continued
 // if yes it returns kTRUE and updates the AliShuttleStatus with nextStatus
 
+       if (!fConfig->HostProcessDetector(fCurrentDetector)) return kFALSE;
+
+       AliPreprocessor* aPreprocessor =
+               dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
+       if (!aPreprocessor)
+       {
+               AliInfo(Form("%s: no preprocessor registered", fCurrentDetector.Data()));
+               return kFALSE;
+       }
+
        AliShuttleLogbookEntry::Status entryStatus =
                fLogbookEntry->GetDetectorStatus(fCurrentDetector);
 
@@ -506,24 +516,29 @@ Bool_t AliShuttle::ContinueProcessing()
        }
 
        // if we get here, there is a restart
+       Bool_t cont = kFALSE;
 
        // abort conditions
        if (status->GetCount() >= fConfig->GetMaxRetries()) {
-               Log("SHUTTLE",
-                       Form("ContinueProcessing - %s failed %d times in status %s - Updating Shuttle Logbook",
-                               fCurrentDetector.Data(),
+               Log("SHUTTLE", Form("ContinueProcessing - %s failed %d times in status %s - "
+                               "Updating Shuttle Logbook", fCurrentDetector.Data(),
                                status->GetCount(), status->GetStatusName()));
                UpdateShuttleLogbook(fCurrentDetector.Data(), "FAILED");
-               return kFALSE;
+       } else {
+               Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. "
+                               "Aborted before with %s. Retry number %d.", fCurrentDetector.Data(),
+                               status->GetStatusName(), status->GetCount()));
+               UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
+               cont = kTRUE;
        }
 
-       Log("SHUTTLE", Form("ContinueProcessing - %s: restarting. Aborted before with %s. Retry number %d.",
-                       fCurrentDetector.Data(),
-                       status->GetStatusName(), status->GetCount()));
-
-       UpdateShuttleStatus(AliShuttleStatus::kStarted, kTRUE);
+       // Send mail to detector expert!
+       AliInfo(Form("Sending mail to %s expert...", fCurrentDetector.Data()));
+       if (!SendMail())
+               Log("SHUTTLE", Form("ContinueProcessing - Could not send mail to %s expert",
+                               fCurrentDetector.Data()));
 
-       return kTRUE;
+       return cont;
 }
 
 //______________________________________________________________________________________________
@@ -570,19 +585,8 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
        {
                fCurrentDetector = aDetector->String();
 
-               if (!fConfig->HostProcessDetector(fCurrentDetector)) continue;
-
                if (ContinueProcessing() == kFALSE) continue;
 
-               AliPreprocessor* aPreprocessor =
-                       dynamic_cast<AliPreprocessor*> (fPreprocessorMap.GetValue(fCurrentDetector));
-               if (!aPreprocessor)
-               {
-                       Log("SHUTTLE",Form("Process - %s: no preprocessor registered. Skipping",
-                                                       fCurrentDetector.Data()));
-                       continue;
-               }
-
                AliInfo(Form("\n\n \t\t\t****** run %d - %s: START  ******",
                                                GetCurrentRun(), aDetector->GetName()));
 
@@ -2042,3 +2046,93 @@ const char* AliShuttle::GetRunParameter(const char* param)
 
        return fLogbookEntry->GetRunParameter(param);
 }
+
+//______________________________________________________________________________________________
+Bool_t AliShuttle::SendMail()
+{
+// sends a mail to the subdetector expert in case of preprocessor error
+
+       void* dir = gSystem->OpenDirectory(fgkShuttleLogDir);
+       if (dir == NULL)
+       {
+               if (gSystem->mkdir(fgkShuttleLogDir, kTRUE))
+               {
+                       AliError(Form("Can't open directory <%s>", fgkShuttleTempDir));
+                       return kFALSE;
+               }
+
+       } else {
+               gSystem->FreeDirectory(dir);
+       }
+
+       TString bodyFileName;
+       bodyFileName.Form("%s/mail.body", fgkShuttleLogDir);
+       gSystem->ExpandPathName(bodyFileName);
+
+       ofstream mailBody;
+       mailBody.open(bodyFileName, ofstream::out);
+
+       if (!mailBody.is_open())
+       {
+               AliError(Form("Could not open mail body file %s", bodyFileName.Data()));
+               return kFALSE;
+       }
+
+       TString to="";
+       TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
+       TObjString *anExpert=0;
+       while ((anExpert = (TObjString*) iterExperts.Next()))
+       {
+               to += Form("%s,", anExpert->GetName());
+       }
+       to.Remove(to.Length()-1);
+       AliInfo(Form("to: %s",to.Data()));
+
+       TString cc="alberto.colla@cern.ch";
+
+       TString subject = Form("%s Shuttle preprocessor error in run %d !",
+                               fCurrentDetector.Data(), GetCurrentRun());
+       AliInfo(Form("subject: %s", subject.Data()));
+
+       TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
+       body += Form("SHUTTLE just detected that your preprocessor "
+                       "exited with ERROR state in run %d !!\n\n", GetCurrentRun());
+       body += Form("Please check %s status on the web page asap!\n\n", fCurrentDetector.Data());
+       body += Form("The last 10 lines of %s log file are following:\n\n");
+
+       AliInfo(Form("Body begin: %s", body.Data()));
+
+       mailBody << body.Data();
+       mailBody.close();
+       mailBody.open(bodyFileName, ofstream::out | ofstream::app);
+
+       TString logFileName = Form("%s/%s.log", fgkShuttleLogDir, fCurrentDetector.Data());
+       TString tailCommand = Form("tail -n 10 %s >> %s", logFileName.Data(), bodyFileName.Data());
+       if (gSystem->Exec(tailCommand.Data()))
+       {
+               mailBody << Form("%s log file not found ...\n\n", fCurrentDetector.Data());
+       }
+
+       TString endBody = Form("------------------------------------------------------\n\n");
+       endBody += Form("In case of problems please contact the SHUTTLE core team!\n\n");
+       endBody += "Please do not answer this message directly, it is automatically generated!\n\n";
+       endBody += "Sincerely yours,\n\n \t\t\tthe SHUTTLE\n";
+
+       AliInfo(Form("Body end: %s", endBody.Data()));
+
+       mailBody << endBody.Data();
+
+       mailBody.close();
+
+       // send mail!
+       TString mailCommand = Form("mail -s \"%s\" -c %s %s < %s",
+                                               subject.Data(),
+                                               cc.Data(),
+                                               to.Data(),
+                                               bodyFileName.Data());
+       AliInfo(Form("mail command: %s", mailCommand.Data()));
+
+       Bool_t result = gSystem->Exec(mailCommand.Data());
+
+       return result == 0;
+}
index e6af920..d48fef4 100644 (file)
@@ -96,15 +96,16 @@ private:
        Bool_t RetrieveDAQFile(const char* daqFileName, const char* localFileName);
        TList* GetDAQFileSources(const char* detector, const char* id);
        Bool_t UpdateDAQTable();
-       Bool_t UpdateHLTTable();
 
        const char* GetDCSFileName(const char* detector, const char* id, const char* source);
 //     Bool_t RetrieveDCSFile(const char* daqFileName const char* localFileName);
        TList* GetDCSFileSources(const char* detector, const char* id);
+//     Bool_t UpdateDCSTable();
 
        const char* GetHLTFileName(const char* detector, const char* id, const char* source);
        Bool_t RetrieveHLTFile(const char* hltFileName, const char* localFileName);
        TList* GetHLTFileSources(const char* detector, const char* id);
+       Bool_t UpdateHLTTable();
 
        UInt_t WriteToCDB(const char* mainUri, const char* localUri,
                                const AliCDBPath& path, TObject* object, AliCDBMetaData* metaData,
@@ -118,7 +119,8 @@ private:
        Bool_t ContinueProcessing();
        void UpdateShuttleStatus(AliShuttleStatus::Status newStatus, Bool_t increaseCount = kFALSE);
        Bool_t UpdateShuttleLogbook(const char* detector, const char* status=0);
-       
+       Bool_t SendMail();
+
        void SetLastAction(const char* action);
 
        const AliShuttleConfig* fConfig;        // pointer to configuration object
index f147632..9cd95c8 100644 (file)
@@ -120,6 +120,7 @@ fDCSHost(""),
 fDCSPort(0),
 fDCSAliases(0),
 fDCSDataPoints(0),
+fResponsibles(0),
 fIsValid(kFALSE),
 fSkipDCSQuery(kFALSE),
 fStrictRunOrder(kFALSE)
@@ -131,6 +132,8 @@ fStrictRunOrder(kFALSE)
        fDCSAliases->SetOwner(1);
        fDCSDataPoints = new TObjArray();
        fDCSDataPoints->SetOwner(1);
+       fResponsibles = new TObjArray();
+       fResponsibles->SetOwner(1);
 
        anAttribute = entry->GetAttribute("det"); // MUST
         if (!anAttribute)
@@ -155,10 +158,22 @@ fStrictRunOrder(kFALSE)
                fStrictRunOrder = (Bool_t) strictRunStr.Atoi();
        }
 
+       anAttribute = entry->GetAttribute("responsible"); // MUST
+        if (!anAttribute)
+       {
+               AliError(Form("Invalid configuration! No \"responsible\" attribute!"));
+               return;
+        }
+       const char* aResponsible;
+       while ((aResponsible = anAttribute->GetValue()))
+       {
+               fResponsibles->AddLast(new TObjString(aResponsible));
+       }
+
        anAttribute = entry->GetAttribute("DCSHost"); // MAY
        if (!anAttribute)
        {
-               AliWarning(
+               AliDebug(2,
                        Form("%s has not DCS host entry - Shuttle will skip DCS data query!",
                                fDetector.Data()));
                fIsValid = kTRUE;
@@ -199,8 +214,6 @@ fStrictRunOrder(kFALSE)
        }
 
        fIsValid = kTRUE;
-
-
 }
 
 //______________________________________________________________________________________________
@@ -210,6 +223,7 @@ AliShuttleConfig::AliShuttleConfigHolder::~AliShuttleConfigHolder()
 
        delete fDCSAliases;
        delete fDCSDataPoints;
+       delete fResponsibles;
 }
 
 ClassImp(AliShuttleConfig)
@@ -217,7 +231,7 @@ ClassImp(AliShuttleConfig)
 //______________________________________________________________________________________________
 AliShuttleConfig::AliShuttleConfig(const char* host, Int_t port,
        const char* binddn, const char* password, const char* basedn):
-       fIsValid(kFALSE),
+       fIsValid(kFALSE), fConfigHost(host),
        fDAQlbHost(""), fDAQlbPort(), fDAQlbUser(""), fDAQlbPass(""),
        fDAQlbDB(""), fDAQlbTable(""),
        fMaxRetries(0), fPPTimeOut(0), fDetectorMap(), fDetectorList(),
@@ -615,6 +629,24 @@ const TObjArray* AliShuttleConfig::GetDCSDataPoints(const char* detector) const
 }
 
 //______________________________________________________________________________________________
+const TObjArray* AliShuttleConfig::GetResponsibles(const char* detector) const
+{
+       //
+       // returns collection of TObjString which represents the list of mail addresses
+       // of the detector's responsible(s)
+       //
+
+       AliShuttleConfigHolder* aHolder = (AliShuttleConfigHolder*) fDetectorMap.GetValue(detector);
+        if (!aHolder) {
+                AliError(Form("There isn't configuration for detector: %s",
+                        detector));
+                return NULL;
+        }
+
+       return aHolder->GetResponsibles();
+}
+
+//______________________________________________________________________________________________
 Bool_t AliShuttleConfig::HostProcessDetector(const char* detector) const
 {
        // return TRUE if detector is handled by host or if fProcessAll is TRUE
@@ -652,10 +684,13 @@ void AliShuttleConfig::Print(Option_t* /*option*/) const
        TString result;
        result += '\n';
 
-       result += Form("\nShuttle running on %s \n\n", fShuttleInstanceHost.Data());
+       result += "####################################################\n";
+       result += Form(" Shuttle configuration from %s \n", fConfigHost.Data());
+       result += "####################################################\n";
+       result += Form("\nShuttle running on %s \n", fShuttleInstanceHost.Data());
 
        if(fProcessAll) {
-               result += Form("All detectors will be processed! \n\n");
+               result += Form("All detectors will be processed! \n");
        } else {
                result += "Detectors processed by this host: ";
                TIter it(&fProcessedDetectors);
@@ -663,12 +698,13 @@ void AliShuttleConfig::Print(Option_t* /*option*/) const
                while ((aDet = (TObjString*) it.Next())) {
                        result += Form("%s ", aDet->String().Data());
                }
-               result += "\n\n";
+               result += "\n";
        }
 
        result += Form("PP time out = %d - Max total retries = %d\n\n", fPPTimeOut, fMaxRetries);
+       result += "------------------------------------------------------\n";
 
-       result += Form("DAQ Logbook Configuration \n \tHost: %s:%d; \tUser: %s; ",
+       result += Form("Logbook Configuration \n\n \tHost: %s:%d; \tUser: %s; ",
                fDAQlbHost.Data(), fDAQlbPort, fDAQlbUser.Data());
 
 //     result += "Password: ";
@@ -678,8 +714,11 @@ void AliShuttleConfig::Print(Option_t* /*option*/) const
 
        result += "\n\n";
 
+       result += "------------------------------------------------------\n";
+       result += "FXS configuration\n\n";
+
        for(int iSys=0;iSys<3;iSys++){
-               result += Form("FXS Configuration for %s system\n", AliShuttleInterface::GetSystemName(iSys));
+               result += Form("*** %s ***\n", AliShuttleInterface::GetSystemName(iSys));
                result += Form("\tDB  host: %s:%d; \tUser: %s; \tName: %s; \tTable: %s\n",
                                                fFXSdbHost[iSys].Data(), fFXSdbPort[iSys], fFXSdbUser[iSys].Data(),
                                                fFXSdbName[iSys].Data(), fFXSdbTable[iSys].Data());
@@ -689,12 +728,28 @@ void AliShuttleConfig::Print(Option_t* /*option*/) const
                // result += Form("FXS Password:",fFXSPass[iSys].Data());
        }
 
+       result += "------------------------------------------------------\n";
+       result += "Detector-specific configuration\n\n";
        TIter iter(fDetectorMap.GetTable());
        TPair* aPair;
        while ((aPair = (TPair*) iter.Next())) {
                AliShuttleConfigHolder* aHolder = (AliShuttleConfigHolder*) aPair->Value();
-               result += Form("Detector-specific configuration: *** %s *** \n", aHolder->GetDetector());
-               result += Form("\tStrict run ordering flag: %s \n", aHolder->StrictRunOrder() ? "TRUE" : "FALSE");
+               result += Form("*** %s *** \n", aHolder->GetDetector());
+
+               const TObjArray* responsibles = aHolder->GetResponsibles();
+               if (responsibles->GetEntries() != 0)
+               {
+                       result += "\tDetector responsible(s): ";
+                       TIter it(responsibles);
+                       TObjString* aResponsible;
+                       while ((aResponsible = (TObjString*) it.Next()))
+                       {
+                               result += Form("%s ", aResponsible->String().Data());
+                       }
+                       result += "\n";
+               }
+
+               result += Form("\tStrict run ordering: %s \n", aHolder->StrictRunOrder() ? "YES" : "NO");
                if(aHolder->SkipDCSQuery())
                {
                        result += "\n";
@@ -715,7 +770,6 @@ void AliShuttleConfig::Print(Option_t* /*option*/) const
                        result += "\n";
                }
 
-
                const TObjArray* dataPoints = aHolder->GetDCSDataPoints();
                if (dataPoints->GetEntries() != 0)
                {
@@ -728,7 +782,6 @@ void AliShuttleConfig::Print(Option_t* /*option*/) const
                                result += "\n";
                }
                result += "\n";
-               
        }
 
        if(!fIsValid) result += "\n\n********** !!!!! Configuration is INVALID !!!!! **********\n";
index a9e92ef..9d2284f 100644 (file)
@@ -27,6 +27,8 @@ public:
 
        Bool_t IsValid() const {return fIsValid;};
 
+       const char* GetConfigHost() const {return fConfigHost.Data();}
+
        const char* GetDAQlbHost() const {return fDAQlbHost.Data();}
        UInt_t      GetDAQlbPort() const {return fDAQlbPort;}
        const char* GetDAQlbUser() const {return fDAQlbUser.Data();}
@@ -57,6 +59,7 @@ public:
        Int_t GetDCSPort(const char* detector) const;
        const TObjArray* GetDCSAliases(const char* detector) const;
        const TObjArray* GetDCSDataPoints(const char* detector) const;
+       const TObjArray* GetResponsibles(const char* detector) const;
        Bool_t StrictRunOrder(const char* detector) const;
 
        void SetProcessAll(Bool_t flag=kTRUE) {fProcessAll=flag;}
@@ -78,6 +81,7 @@ private:
                Int_t GetDCSPort() const {return fDCSPort;}
                const TObjArray* GetDCSAliases() const {return fDCSAliases;}
                const TObjArray* GetDCSDataPoints() const {return fDCSDataPoints;}
+               const TObjArray* GetResponsibles() const {return fResponsibles;}
 
                Bool_t IsValid() const {return fIsValid;}
                Bool_t SkipDCSQuery() const {return fSkipDCSQuery;}
@@ -89,6 +93,7 @@ private:
                Int_t   fDCSPort;       // port of the DCS server
                TObjArray* fDCSAliases; // List of DCS aliases to be retrieved
                TObjArray* fDCSDataPoints; // List of DCS data points to be retrieved
+               TObjArray* fResponsibles; // List of email addresses of the detector's responsible(s)
                Bool_t fIsValid;        // flag for the validity of the configuration
                Bool_t fSkipDCSQuery;   // flag - if TRUE (-> DCS config empty) skip DCS archive data query
                Bool_t fStrictRunOrder; // flag - if TRUE connect data in a strict run ordering
@@ -100,6 +105,8 @@ private:
 
        Bool_t fIsValid;                //! flag for the validity of the configuration
 
+       TString fConfigHost;            //! Host of the Shuttle configuration LDAP server
+
        TString fDAQlbHost;             //! Host of the DAQ logbook MySQL Server
        UInt_t  fDAQlbPort;             //! port of the DAQ logbook MySQL Server
        TString fDAQlbUser;             //! username of the DAQ logbook MySQL Server