]> git.uio.no Git - u/mrichter/AliRoot.git/blobdiff - SHUTTLE/AliShuttle.cxx
several bugfixes
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
index 17a60660c7144422ec7800ac5b1a0112c80d3b51..df9b23b978d7bc40c80aa50746862a597f820f15 100644 (file)
 
 /*
 $Log$
+Revision 1.73  2007/12/14 19:31:36  acolla
+Sending email to DCS experts is temporarily commented
+
+Revision 1.72  2007/12/13 15:44:28  acolla
+Run type added in mail sent to detector expert (eases understanding)
+
+Revision 1.71  2007/12/12 14:56:14  jgrosseo
+sending shuttle_ignore to ML also in case of 0 events
+
+Revision 1.70  2007/12/12 13:45:35  acolla
+Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
+
+Revision 1.69  2007/12/12 10:06:29  acolla
+in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
+
+time_start==0 && time_end==0
+
+logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
+
+Revision 1.68  2007/12/11 10:15:17  acolla
+Added marking SHUTTLE=DONE for invalid runs
+(invalid start time or end time) and runs with totalEvents < 1
+
+Revision 1.67  2007/12/07 19:14:36  acolla
+in AliShuttleTrigger:
+
+Added automatic collection of new runs on a regular time basis (settable from the configuration)
+
+in AliShuttleConfig: new members
+
+- triggerWait: time to wait for DIM trigger (s) before starting automatic collection of new runs
+- mode: run mode (test, prod) -> used to build log folder (logs or logs_PROD)
+
+in AliShuttle:
+
+- logs now stored in logs/#RUN/DET_#RUN.log
+
 Revision 1.66  2007/12/05 10:45:19  jgrosseo
 changed order of arguments to TMonaLisaWriter
 
@@ -620,6 +657,7 @@ Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
                        Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
                                                "there are previous unprocessed runs!",
                                                fCurrentDetector.Data(), aLocId.GetPath().Data()));
+                       result = kFALSE;
                        continue;
                }
 
@@ -1246,7 +1284,9 @@ void AliShuttle::SendMLInfo()
        mlList.Add(&mlStatus);
        mlList.Add(&mlRetryCount);
 
-       fMonaLisa->SendParameters(&mlList);
+       TString mlID;
+       mlID.Form("%d", GetCurrentRun());
+       fMonaLisa->SendParameters(&mlList, mlID);
 }
 
 //______________________________________________________________________________________________
@@ -1328,8 +1368,6 @@ Bool_t AliShuttle::ContinueProcessing()
                        Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
                                "successfully stored into main storage",
                                fCurrentDetector.Data()));
-                       UpdateShuttleStatus(AliShuttleStatus::kDone);
-                       UpdateShuttleLogbook(fCurrentDetector.Data(), "DONE");
                } else {
                        Log("SHUTTLE",
                                Form("ContinueProcessing - %s: Grid storage failed again",
@@ -1398,9 +1436,6 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
        Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
                                        GetCurrentRun()));
 
-       // create ML instance that monitors this run
-       fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable(), Form("%d", GetCurrentRun()));
-
        // Send the information to ML
        TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
        TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
@@ -1409,7 +1444,9 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
        mlList.Add(&mlStatus);
        mlList.Add(&mlRunType);
 
-       fMonaLisa->SendParameters(&mlList);
+       TString mlID;
+       mlID.Form("%d", GetCurrentRun());
+       fMonaLisa->SendParameters(&mlList, mlID);
 
        if (fLogbookEntry->IsDone())
        {
@@ -1473,20 +1510,27 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
                                        GetCurrentYear(), lhcPeriod.Data());
        
-       AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
-       if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
-       AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
-       if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
-
        // Loop on detectors in the configuration
        TIter iter(fConfig->GetDetectors());
        TObjString* aDetector = 0;
 
+       Bool_t first = kTRUE;
+
        while ((aDetector = (TObjString*) iter.Next()))
        {
                fCurrentDetector = aDetector->String();
 
                if (ContinueProcessing() == kFALSE) continue;
+               
+               if (first)
+               {
+                 // only read QueryCDB when needed and only once
+                 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
+                 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
+                 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
+                 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
+                 first = kFALSE;
+               }
 
                Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
                                                GetCurrentRun(), aDetector->GetName()));
@@ -1563,9 +1607,12 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                                        }
                                        
                                        if (expiredTime % 60 == 0)
+                                       {
                                                Log("SHUTTLE", Form("Process - %s: Checking process. "
                                                        "Run time: %d seconds - Memory consumption: %d KB",
                                                        fCurrentDetector.Data(), expiredTime, mem));
+                                               SendAlive();
+                                       }
                                        
                                        if (mem > fConfig->GetPPMaxMem())
                                        {
@@ -1702,7 +1749,8 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
        TObjArray checkEntryArray;
        checkEntryArray.SetOwner(1);
        TString whereClause = Form("where run=%d", GetCurrentRun());
-       if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || checkEntryArray.GetEntries() == 0) {
+       if (!QueryShuttleLogbook(whereClause.Data(), checkEntryArray) || 
+                       checkEntryArray.GetEntries() == 0) {
                Log("SHUTTLE", Form("Process - Warning: Cannot check status of run %d on Shuttle logbook!",
                                                GetCurrentRun()));
                return hasError == kFALSE;
@@ -1732,10 +1780,6 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                }
        }
 
-       // remove ML instance
-       delete fMonaLisa;
-       fMonaLisa = 0;
-
        fLogbookEntry = 0;
 
        return hasError == kFALSE;
@@ -1818,8 +1862,8 @@ Bool_t AliShuttle::ProcessCurrentDetector()
                                                        " Sending mail to DCS experts!", host.Data()));
                                        UpdateShuttleStatus(AliShuttleStatus::kDCSError);
                                        
-                                       if (!SendMailToDCS())
-                                               Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+                                       //if (!SendMailToDCS())
+                                       //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
 
                                        delete dcsMap;
                                        return kFALSE;
@@ -1839,8 +1883,8 @@ Bool_t AliShuttle::ProcessCurrentDetector()
                                                        " Sending mail to DCS experts!", host.Data()));
                                        UpdateShuttleStatus(AliShuttleStatus::kDCSError);
                                        
-                                       if (!SendMailToDCS())
-                                               Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+                                       //if (!SendMailToDCS())
+                                       //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
                                        
                                        if (aliasMap) delete aliasMap;
                                        delete dcsMap;
@@ -1872,11 +1916,11 @@ Bool_t AliShuttle::ProcessCurrentDetector()
        }
        
        // save map into file, to help debugging in case of preprocessor error
-       TFile* f = TFile::Open("DCSMap.root","recreate");
+       /*TFile* f = TFile::Open("DCSMap.root","recreate");
        f->cd();
        dcsMap->Write("DCSMap", TObject::kSingleKey);
        f->Close();
-       delete f;
+       delete f;*/
        
        // DCS Archive DB processing successful. Call Preprocessor!
        UpdateShuttleStatus(AliShuttleStatus::kPPStarted);
@@ -2012,10 +2056,106 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
        UInt_t startTime = entry->GetStartTime();
        UInt_t endTime = entry->GetEndTime();
 
-       if (!startTime || !endTime || startTime > endTime) {
+//     if (!startTime || !endTime || startTime > endTime) 
+//     {
+//             Log("SHUTTLE",
+//                     Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
+//                             run, startTime, endTime));              
+//             
+//             Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+//             fLogbookEntry = entry;  
+//             if (!UpdateShuttleLogbook("shuttle_done"))
+//             {
+//                     AliError(Form("Could not update logbook for run %d !", run));
+//             }
+//             fLogbookEntry = 0;
+//                             
+//             delete entry;
+//             delete aRow;
+//             delete aResult;
+//             return 0;
+//     }
+
+       if (!startTime) 
+       {
+               Log("SHUTTLE",
+                       Form("QueryRunParameters - Invalid parameters for Run %d: " 
+                               "startTime = %d, endTime = %d. Skipping!",
+                                       run, startTime, endTime));              
+               
+               Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+               fLogbookEntry = entry;  
+               if (!UpdateShuttleLogbook("shuttle_ignored"))
+               {
+                       AliError(Form("Could not update logbook for run %d !", run));
+               }
+               fLogbookEntry = 0;
+                               
+               delete entry;
+               delete aRow;
+               delete aResult;
+               return 0;
+       }
+       
+       if (startTime && !endTime) 
+       {
+               // TODO Here we don't mark SHUTTLE done, because this may mean 
+               //the run is still ongoing!!            
+               Log("SHUTTLE",
+                       Form("QueryRunParameters - Invalid parameters for Run %d: "
+                            "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
+                                       run, startTime, endTime));              
+               
+               //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+               //fLogbookEntry = entry;        
+               //if (!UpdateShuttleLogbook("shuttle_done"))
+               //{
+               //      AliError(Form("Could not update logbook for run %d !", run));
+               //}
+               //fLogbookEntry = 0;
+                               
+               delete entry;
+               delete aRow;
+               delete aResult;
+               return 0;
+       }
+                       
+       if (startTime && endTime && (startTime > endTime)) 
+       {
+               Log("SHUTTLE",
+                       Form("QueryRunParameters - Invalid parameters for Run %d: "
+                               "startTime = %d, endTime = %d. Skipping!",
+                                       run, startTime, endTime));              
+               
+               Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+               fLogbookEntry = entry;  
+               if (!UpdateShuttleLogbook("shuttle_ignored"))
+               {
+                       AliError(Form("Could not update logbook for run %d !", run));
+               }
+               fLogbookEntry = 0;
+                               
+               delete entry;
+               delete aRow;
+               delete aResult;
+               return 0;
+       }
+                       
+       TString totEventsStr = entry->GetRunParameter("totalEvents");  
+       Int_t totEvents = totEventsStr.Atoi();
+       if (totEvents < 1) 
+       {
                Log("SHUTTLE",
-                       Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d",
-                               run, startTime, endTime));
+                       Form("QueryRunParameters - Run %d has 0 events - Skipping!", run));             
+               
+               Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));           
+               fLogbookEntry = entry;  
+               if (!UpdateShuttleLogbook("shuttle_ignored"))
+               {
+                       AliError(Form("Could not update logbook for run %d !", run));
+               }
+               fLogbookEntry = 0;
+                               
                delete entry;
                delete aRow;
                delete aResult;
@@ -2659,17 +2799,22 @@ Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status
 
        TString detName(detector);
        TString setClause;
-       if(detName == "shuttle_done")
+       if (detName == "shuttle_done" || detName == "shuttle_ignored")
        {
                setClause = "set shuttle_done=1";
 
-               // Send the information to ML
-               TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
-
-               TList mlList;
-               mlList.Add(&mlStatus);
+               if (detName == "shuttle_done")
+               {
+                       // Send the information to ML
+                       TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
 
-               fMonaLisa->SendParameters(&mlList);
+                       TList mlList;
+                       mlList.Add(&mlStatus);
+               
+                       TString mlID;
+                       mlID.Form("%d", GetCurrentRun());
+                       fMonaLisa->SendParameters(&mlList, mlID);
+               }
        } else {
                TString statusStr(status);
                if(statusStr.Contains("done", TString::kIgnoreCase) ||
@@ -2830,6 +2975,19 @@ TString AliShuttle::GetLogFileName(const char* detector) const
        return fileName;
 }
 
+//______________________________________________________________________________________________
+void AliShuttle::SendAlive()
+{
+       // sends alive message to ML
+       
+       TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
+
+       TList mlList;
+       mlList.Add(&mlStatus);
+
+       fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
+}
+
 //______________________________________________________________________________________________
 Bool_t AliShuttle::Collect(Int_t run)
 {
@@ -2847,6 +3005,13 @@ Bool_t AliShuttle::Collect(Int_t run)
 
        SetLastAction("Starting");
 
+       // create ML instance
+       if (!fMonaLisa)
+               fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
+               
+
+       SendAlive();
+
        TString whereClause("where shuttle_done=0");
        if (run != -1)
                whereClause += Form(" and run=%d", run);
@@ -3048,19 +3213,6 @@ Bool_t AliShuttle::SendMail()
                gSystem->FreeDirectory(dir);
        }
 
-       TString bodyFileName;
-       bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
-       gSystem->ExpandPathName(bodyFileName);
-
-       ofstream mailBody;
-       mailBody.open(bodyFileName, ofstream::out);
-
-       if (!mailBody.is_open())
-       {
-               Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
-               return kFALSE;
-       }
-
        TString to="";
        TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
        TObjString *anExpert=0;
@@ -3068,7 +3220,8 @@ Bool_t AliShuttle::SendMail()
        {
                to += Form("%s,", anExpert->GetName());
        }
-       to.Remove(to.Length()-1);
+       if (to.Length() > 0)
+         to.Remove(to.Length()-1);
        AliDebug(2, Form("to: %s",to.Data()));
 
        if (to.IsNull()) {
@@ -3076,18 +3229,38 @@ Bool_t AliShuttle::SendMail()
                return kFALSE;
        }
 
+       TString bodyFileName;
+       bodyFileName.Form("%s/mail.body", GetShuttleLogDir());
+       gSystem->ExpandPathName(bodyFileName);
+
+       ofstream mailBody;
+       mailBody.open(bodyFileName, ofstream::out);
+
+       if (!mailBody.is_open())
+       {
+               Log("SHUTTLE", Form("Could not open mail body file %s", bodyFileName.Data()));
+               return kFALSE;
+       }
+
        TString cc="alberto.colla@cern.ch";
 
-       TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
-                               fCurrentDetector.Data(), GetCurrentRun());
+       TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
+                               fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
        AliDebug(2, Form("subject: %s", subject.Data()));
 
        TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
        body += Form("SHUTTLE just detected that your preprocessor "
-                       "failed processing run %d!!\n\n", GetCurrentRun());
+                       "failed processing run %d (run type = %s)!!\n\n", 
+                                       GetCurrentRun(), GetRunType());
        body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
                                fCurrentDetector.Data());
-       body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+       if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
+       {
+               body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+       } else {
+               body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
+       }
+       
        
        TString logFolder = "logs";
        if (fConfig->GetRunMode() == AliShuttleConfig::kProd) 
@@ -3194,7 +3367,12 @@ Bool_t AliShuttle::SendMailToDCS()
                        "in run %d!!\n\n", fCurrentDetector.Data(), GetCurrentRun());
        body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
                                fCurrentDetector.Data());
-       body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+       if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
+       {
+               body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
+       } else {
+               body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
+       }
 
        TString logFolder = "logs";
        if (fConfig->GetRunMode() == AliShuttleConfig::kProd)