]> git.uio.no Git - u/mrichter/AliRoot.git/blobdiff - SHUTTLE/AliShuttle.cxx
bug fix in the name of the raw tag files (Raw instead of raw)
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
index af34760896b6a509f701e5585efd4f21e1b59597..02a723f4e0bd2cab204aa03a01ae174018434be9 100644 (file)
 
 /*
 $Log$
+Revision 1.75  2007/12/18 15:42:14  jgrosseo
+adding number of open runs to monitoring
+
+Revision 1.74  2007/12/17 03:23:32  jgrosseo
+several bugfixes
+added "empty preprocessor" as placeholder for Acorde in FDR
+
+Revision 1.73  2007/12/14 19:31:36  acolla
+Sending email to DCS experts is temporarily commented
+
+Revision 1.72  2007/12/13 15:44:28  acolla
+Run type added in mail sent to detector expert (eases understanding)
+
+Revision 1.71  2007/12/12 14:56:14  jgrosseo
+sending shuttle_ignore to ML also in case of 0 events
+
+Revision 1.70  2007/12/12 13:45:35  acolla
+Monalisa started in Collect() function. Alive message to monitor is sent at each Collect and every minute during preprocessor processing.
+
+Revision 1.69  2007/12/12 10:06:29  acolla
+in AliShuttle.cxx: SHUTTLE logbook is updated in case of invalid run times:
+
+time_start==0 && time_end==0
+
+logbook is NOT updated if time_start != 0 && time_end == 0, because it may mean that the run is still ongoing.
+
+Revision 1.68  2007/12/11 10:15:17  acolla
+Added marking SHUTTLE=DONE for invalid runs
+(invalid start time or end time) and runs with totalEvents < 1
+
 Revision 1.67  2007/12/07 19:14:36  acolla
 in AliShuttleTrigger:
 
@@ -634,6 +664,7 @@ Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
                        Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
                                                "there are previous unprocessed runs!",
                                                fCurrentDetector.Data(), aLocId.GetPath().Data()));
+                       result = kFALSE;
                        continue;
                }
 
@@ -832,7 +863,7 @@ Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridF
        TString localBaseFolder = sto->GetBaseFolder();
        
        // Build Run level folder
-       // folder = /alice/data/year/lhcPeriod/runNb/Raw
+       // folder = /alice/data/year/lhcPeriod/runNb/raw
        
                
        TString lhcPeriod = GetLHCPeriod();     
@@ -842,7 +873,7 @@ Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridF
                return 0;
        }
        
-       TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw/%s", 
+       TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s", 
                                localBaseFolder.Data(), GetCurrentYear(), 
                                lhcPeriod.Data(), GetCurrentRun(), gridFileName);
                                        
@@ -954,7 +985,7 @@ Bool_t AliShuttle::CopyFilesToGrid(const char* type)
                        return 0;
                }
                
-               dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw", 
+               dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw", 
                                localBaseFolder.Data(), GetCurrentYear(), 
                                lhcPeriod.Data(), GetCurrentRun());
                alienDir = dir(dir.Index("/alice/data/"), dir.Length());
@@ -1260,7 +1291,9 @@ void AliShuttle::SendMLInfo()
        mlList.Add(&mlStatus);
        mlList.Add(&mlRetryCount);
 
-       fMonaLisa->SendParameters(&mlList);
+       TString mlID;
+       mlID.Form("%d", GetCurrentRun());
+       fMonaLisa->SendParameters(&mlList, mlID);
 }
 
 //______________________________________________________________________________________________
@@ -1410,9 +1443,6 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
        Log("SHUTTLE", Form("\t\t\t^*^*^*^*^*^*^*^*^*^*^*^* run %d: START ^*^*^*^*^*^*^*^*^*^*^*^*",
                                        GetCurrentRun()));
 
-       // create ML instance that monitors this run
-       fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable(), Form("%d", GetCurrentRun()));
-
        // Send the information to ML
        TMonaLisaText  mlStatus("SHUTTLE_status", "Processing");
        TMonaLisaText  mlRunType("SHUTTLE_runtype", Form("%s (%s)", entry->GetRunType(), entry->GetRunParameter("log")));
@@ -1421,7 +1451,9 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
        mlList.Add(&mlStatus);
        mlList.Add(&mlRunType);
 
-       fMonaLisa->SendParameters(&mlList);
+       TString mlID;
+       mlID.Form("%d", GetCurrentRun());
+       fMonaLisa->SendParameters(&mlList, mlID);
 
        if (fLogbookEntry->IsDone())
        {
@@ -1485,20 +1517,27 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
                                        GetCurrentYear(), lhcPeriod.Data());
        
-       AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
-       if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
-       AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
-       if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
-
        // Loop on detectors in the configuration
        TIter iter(fConfig->GetDetectors());
        TObjString* aDetector = 0;
 
+       Bool_t first = kTRUE;
+
        while ((aDetector = (TObjString*) iter.Next()))
        {
                fCurrentDetector = aDetector->String();
 
                if (ContinueProcessing() == kFALSE) continue;
+               
+               if (first)
+               {
+                 // only read QueryCDB when needed and only once
+                 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
+                 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
+                 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
+                 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
+                 first = kFALSE;
+               }
 
                Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
                                                GetCurrentRun(), aDetector->GetName()));
@@ -1575,9 +1614,12 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                                        }
                                        
                                        if (expiredTime % 60 == 0)
+                                       {
                                                Log("SHUTTLE", Form("Process - %s: Checking process. "
                                                        "Run time: %d seconds - Memory consumption: %d KB",
                                                        fCurrentDetector.Data(), expiredTime, mem));
+                                               SendAlive();
+                                       }
                                        
                                        if (mem > fConfig->GetPPMaxMem())
                                        {
@@ -1662,7 +1704,8 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                        if (success) // Preprocessor finished successfully!
                        { 
                                // remove temporary folder
-                               gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
+                                // temporary commented (JF)
+                               //gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
                                
                                // Update time_processed field in FXS DB
                                if (UpdateTable() == kFALSE)
@@ -1745,10 +1788,6 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                }
        }
 
-       // remove ML instance
-       delete fMonaLisa;
-       fMonaLisa = 0;
-
        fLogbookEntry = 0;
 
        return hasError == kFALSE;
@@ -1831,8 +1870,8 @@ Bool_t AliShuttle::ProcessCurrentDetector()
                                                        " Sending mail to DCS experts!", host.Data()));
                                        UpdateShuttleStatus(AliShuttleStatus::kDCSError);
                                        
-                                       if (!SendMailToDCS())
-                                               Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+                                       //if (!SendMailToDCS())
+                                       //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
 
                                        delete dcsMap;
                                        return kFALSE;
@@ -1852,8 +1891,8 @@ Bool_t AliShuttle::ProcessCurrentDetector()
                                                        " Sending mail to DCS experts!", host.Data()));
                                        UpdateShuttleStatus(AliShuttleStatus::kDCSError);
                                        
-                                       if (!SendMailToDCS())
-                                               Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+                                       //if (!SendMailToDCS())
+                                       //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
                                        
                                        if (aliasMap) delete aliasMap;
                                        delete dcsMap;
@@ -1917,6 +1956,58 @@ Bool_t AliShuttle::ProcessCurrentDetector()
        return kTRUE;
 }
 
+//______________________________________________________________________________________________
+void AliShuttle::CountOpenRuns()
+{
+       // Query DAQ's Shuttle logbook and sends the number of open runs to ML
+       
+       // check connection, in case connect
+       if (!Connect(3)) 
+               return;
+
+       TString sqlQuery;
+       sqlQuery = Form("select count(*) from %s where shuttle_done=0", fConfig->GetShuttlelbTable());
+       
+       TSQLResult* aResult = fServer[3]->Query(sqlQuery);
+       if (!aResult) {
+               AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
+               return;
+       }
+
+       AliDebug(2,Form("Query = %s", sqlQuery.Data()));
+       
+       if (aResult->GetRowCount() == 0) {
+               AliError(Form("No result for query %s received", sqlQuery.Data()));
+               return;
+       }
+
+       if (aResult->GetFieldCount() != 1) {
+               AliError(Form("Invalid field count for query %s received", sqlQuery.Data()));
+               return;
+       }
+
+       TSQLRow* aRow = aResult->Next();
+       if (!aRow) {
+               AliError(Form("Could not receive result of query %s", sqlQuery.Data()));
+               return;
+       }
+       
+       TString result(aRow->GetField(0), aRow->GetFieldLength(0));
+       Int_t count = result.Atoi();
+       
+       Log("SHUTTLE", Form("%d unprocessed runs", count));
+       
+       delete aRow;
+       delete aResult;
+
+       TMonaLisaValue mlStatus("SHUTTLE_openruns", count);
+
+       TList mlList;
+       mlList.Add(&mlStatus);
+
+       fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
+}
+
 //______________________________________________________________________________________________
 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
                TObjArray& entries)
@@ -1928,7 +2019,7 @@ Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
        entries.SetOwner(1);
 
        // check connection, in case connect
-       if(!Connect(3)) return kFALSE;
+       if (!Connect(3)) return kFALSE;
 
        TString sqlQuery;
        sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
@@ -2025,15 +2116,36 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
        UInt_t startTime = entry->GetStartTime();
        UInt_t endTime = entry->GetEndTime();
 
-       if (!startTime || !endTime || startTime > endTime) 
+//     if (!startTime || !endTime || startTime > endTime) 
+//     {
+//             Log("SHUTTLE",
+//                     Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
+//                             run, startTime, endTime));              
+//             
+//             Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+//             fLogbookEntry = entry;  
+//             if (!UpdateShuttleLogbook("shuttle_done"))
+//             {
+//                     AliError(Form("Could not update logbook for run %d !", run));
+//             }
+//             fLogbookEntry = 0;
+//                             
+//             delete entry;
+//             delete aRow;
+//             delete aResult;
+//             return 0;
+//     }
+
+       if (!startTime) 
        {
                Log("SHUTTLE",
-                       Form("QueryRunParameters - Invalid parameters for Run %d: startTime = %d, endTime = %d. Skipping!",
-                               run, startTime, endTime));              
+                       Form("QueryRunParameters - Invalid parameters for Run %d: " 
+                               "startTime = %d, endTime = %d. Skipping!",
+                                       run, startTime, endTime));              
                
                Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
                fLogbookEntry = entry;  
-               if (!UpdateShuttleLogbook("shuttle_done"))
+               if (!UpdateShuttleLogbook("shuttle_ignored"))
                {
                        AliError(Form("Could not update logbook for run %d !", run));
                }
@@ -2045,6 +2157,50 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
                return 0;
        }
        
+       if (startTime && !endTime) 
+       {
+               // TODO Here we don't mark SHUTTLE done, because this may mean 
+               //the run is still ongoing!!            
+               Log("SHUTTLE",
+                       Form("QueryRunParameters - Invalid parameters for Run %d: "
+                            "startTime = %d, endTime = %d. Skipping (Shuttle won't be marked as DONE)!",
+                                       run, startTime, endTime));              
+               
+               //Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+               //fLogbookEntry = entry;        
+               //if (!UpdateShuttleLogbook("shuttle_done"))
+               //{
+               //      AliError(Form("Could not update logbook for run %d !", run));
+               //}
+               //fLogbookEntry = 0;
+                               
+               delete entry;
+               delete aRow;
+               delete aResult;
+               return 0;
+       }
+                       
+       if (startTime && endTime && (startTime > endTime)) 
+       {
+               Log("SHUTTLE",
+                       Form("QueryRunParameters - Invalid parameters for Run %d: "
+                               "startTime = %d, endTime = %d. Skipping!",
+                                       run, startTime, endTime));              
+               
+               Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));
+               fLogbookEntry = entry;  
+               if (!UpdateShuttleLogbook("shuttle_ignored"))
+               {
+                       AliError(Form("Could not update logbook for run %d !", run));
+               }
+               fLogbookEntry = 0;
+                               
+               delete entry;
+               delete aRow;
+               delete aResult;
+               return 0;
+       }
+                       
        TString totEventsStr = entry->GetRunParameter("totalEvents");  
        Int_t totEvents = totEventsStr.Atoi();
        if (totEvents < 1) 
@@ -2054,7 +2210,7 @@ AliShuttleLogbookEntry* AliShuttle::QueryRunParameters(Int_t run)
                
                Log("SHUTTLE", Form("Marking SHUTTLE done for run %d", run));           
                fLogbookEntry = entry;  
-               if (!UpdateShuttleLogbook("shuttle_done"))
+               if (!UpdateShuttleLogbook("shuttle_ignored"))
                {
                        AliError(Form("Could not update logbook for run %d !", run));
                }
@@ -2703,11 +2859,11 @@ Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status
 
        TString detName(detector);
        TString setClause;
-       if(detName == "shuttle_done")
+       if (detName == "shuttle_done" || detName == "shuttle_ignored")
        {
                setClause = "set shuttle_done=1";
 
-               if (fMonaLisa)
+               if (detName == "shuttle_done")
                {
                        // Send the information to ML
                        TMonaLisaText  mlStatus("SHUTTLE_status", "Done");
@@ -2715,7 +2871,9 @@ Bool_t AliShuttle::UpdateShuttleLogbook(const char* detector, const char* status
                        TList mlList;
                        mlList.Add(&mlStatus);
                
-                       fMonaLisa->SendParameters(&mlList);
+                       TString mlID;
+                       mlID.Form("%d", GetCurrentRun());
+                       fMonaLisa->SendParameters(&mlList, mlID);
                }
        } else {
                TString statusStr(status);
@@ -2877,6 +3035,19 @@ TString AliShuttle::GetLogFileName(const char* detector) const
        return fileName;
 }
 
+//______________________________________________________________________________________________
+void AliShuttle::SendAlive()
+{
+       // sends alive message to ML
+       
+       TMonaLisaText mlStatus("SHUTTLE_status", "Alive");
+
+       TList mlList;
+       mlList.Add(&mlStatus);
+
+       fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
+}
+
 //______________________________________________________________________________________________
 Bool_t AliShuttle::Collect(Int_t run)
 {
@@ -2894,6 +3065,13 @@ Bool_t AliShuttle::Collect(Int_t run)
 
        SetLastAction("Starting");
 
+       // create ML instance
+       if (!fMonaLisa)
+               fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
+               
+       SendAlive();
+       CountOpenRuns();
+
        TString whereClause("where shuttle_done=0");
        if (run != -1)
                whereClause += Form(" and run=%d", run);
@@ -3082,6 +3260,22 @@ Bool_t AliShuttle::SendMail()
        if (fTestMode != kNone)
                return kTRUE;
 
+       TString to="";
+       TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
+       TObjString *anExpert=0;
+       while ((anExpert = (TObjString*) iterExperts.Next()))
+       {
+               to += Form("%s,", anExpert->GetName());
+       }
+       if (to.Length() > 0)
+         to.Remove(to.Length()-1);
+       AliDebug(2, Form("to: %s",to.Data()));
+
+       if (to.IsNull()) {
+               Log("SHUTTLE", "List of detector responsibles not yet set!");
+               return kFALSE;
+       }
+
        void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
        if (dir == NULL)
        {
@@ -3108,37 +3302,23 @@ Bool_t AliShuttle::SendMail()
                return kFALSE;
        }
 
-       TString to="";
-       TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
-       TObjString *anExpert=0;
-       while ((anExpert = (TObjString*) iterExperts.Next()))
-       {
-               to += Form("%s,", anExpert->GetName());
-       }
-       to.Remove(to.Length()-1);
-       AliDebug(2, Form("to: %s",to.Data()));
-
-       if (to.IsNull()) {
-               Log("SHUTTLE", "List of detector responsibles not yet set!");
-               return kFALSE;
-       }
-
        TString cc="alberto.colla@cern.ch";
 
-       TString subject = Form("%s Shuttle preprocessor FAILED in run %d !",
-                               fCurrentDetector.Data(), GetCurrentRun());
+       TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
+                               fCurrentDetector.Data(), GetCurrentRun(), GetRunType());
        AliDebug(2, Form("subject: %s", subject.Data()));
 
        TString body = Form("Dear %s expert(s), \n\n", fCurrentDetector.Data());
        body += Form("SHUTTLE just detected that your preprocessor "
-                       "failed processing run %d!!\n\n", GetCurrentRun());
+                       "failed processing run %d (run type = %s)!!\n\n", 
+                                       GetCurrentRun(), GetRunType());
        body += Form("Please check %s status on the SHUTTLE monitoring page: \n\n", 
                                fCurrentDetector.Data());
        if (fConfig->GetRunMode() == AliShuttleConfig::kTest)
        {
                body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
        } else {
-               body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
+               body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
        }