]> git.uio.no Git - u/mrichter/AliRoot.git/blobdiff - SHUTTLE/AliShuttle.cxx
Bug fix (Jan Fiete): recovering from StoreError, if the store to OCDB is successful,
[u/mrichter/AliRoot.git] / SHUTTLE / AliShuttle.cxx
index 5a064fe971f1cbe5cec781742413f7d27f91d4ad..0ffb9dd9177152e95f42a5ba70336a4f2f2756af 100644 (file)
 
 /*
 $Log$
+Revision 1.79  2007/12/19 14:03:01  acolla
+
+detector name to build the lhcPeriod_DET is to be looked in "detector" column, not "partition"
+
+Revision 1.78  2007/12/19 11:50:41  acolla
+
+Raw data tag merged files is written in /alice/data/.../lhcPeriod_DET/runNb/raw if partition is made of DET only
+
+Revision 1.77  2007/12/19 11:16:16  acolla
+More meaningful log message added in GetFileSources
+
+Revision 1.76  2007/12/19 07:45:20  acolla
+bug fix in the name of the raw tag files (Raw instead of raw)
+
+Revision 1.75  2007/12/18 15:42:14  jgrosseo
+adding number of open runs to monitoring
+
+Revision 1.74  2007/12/17 03:23:32  jgrosseo
+several bugfixes
+added "empty preprocessor" as placeholder for Acorde in FDR
+
+Revision 1.73  2007/12/14 19:31:36  acolla
+Sending email to DCS experts is temporarily commented
+
+Revision 1.72  2007/12/13 15:44:28  acolla
+Run type added in mail sent to detector expert (eases understanding)
+
 Revision 1.71  2007/12/12 14:56:14  jgrosseo
 sending shuttle_ignore to ML also in case of 0 events
 
@@ -651,6 +678,7 @@ Bool_t AliShuttle::StoreOCDB(const TString& gridURI)
                        Log("SHUTTLE", Form("StoreOCDB - %s: object %s has validity infinite but "
                                                "there are previous unprocessed runs!",
                                                fCurrentDetector.Data(), aLocId.GetPath().Data()));
+                       result = kFALSE;
                        continue;
                }
 
@@ -849,7 +877,7 @@ Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridF
        TString localBaseFolder = sto->GetBaseFolder();
        
        // Build Run level folder
-       // folder = /alice/data/year/lhcPeriod/runNb/Raw
+       // folder = /alice/data/year/lhcPeriod/runNb/raw
        
                
        TString lhcPeriod = GetLHCPeriod();     
@@ -859,7 +887,17 @@ Bool_t AliShuttle::StoreRunMetadataFile(const char* localFile, const char* gridF
                return 0;
        }
        
-       TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw/%s", 
+       // TODO partitions with one detector only write data into LHCperiod_DET
+       TString partition = GetRunParameter("detector");
+       
+       if (partition.Length() > 0 && partition != "ALICE")
+       {
+               lhcPeriod.Append(Form("_%s", partition.Data()));
+               Log(fCurrentDetector, Form("Run data tags merged file will be written in %s", 
+                               lhcPeriod.Data()));
+       }
+               
+       TString target = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw/%s", 
                                localBaseFolder.Data(), GetCurrentYear(), 
                                lhcPeriod.Data(), GetCurrentRun(), gridFileName);
                                        
@@ -882,7 +920,7 @@ Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
        void* dir = gSystem->OpenDirectory(targetDir.Data());
        if (dir == NULL) {
                if (gSystem->mkdir(targetDir.Data(), kTRUE)) {
-                       Log("SHUTTLE", Form("StoreFileLocally - Can't open directory <%s>", targetDir.Data()));
+                       Log("SHUTTLE", Form("CopyFileLocally - Can't open directory <%s>", targetDir.Data()));
                        return kFALSE;
                }
 
@@ -895,17 +933,17 @@ Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
        result = gSystem->GetPathInfo(localFile, 0, (Long64_t*) 0, 0, 0);
        if (result)
        {
-               Log("SHUTTLE", Form("StoreFileLocally - %s does not exist", localFile));
+               Log("SHUTTLE", Form("CopyFileLocally - %s does not exist", localFile));
                return kFALSE;
        }
 
        result = gSystem->GetPathInfo(target, 0, (Long64_t*) 0, 0, 0);
        if (!result)
        {
-               Log("SHUTTLE", Form("StoreFileLocally - target file %s already exist, removing...", target.Data()));
+               Log("SHUTTLE", Form("CopyFileLocally - target file %s already exist, removing...", target.Data()));
                if (gSystem->Unlink(target.Data()))
                {
-                       Log("SHUTTLE", Form("StoreFileLocally - Could not remove existing target file %s!", target.Data()));
+                       Log("SHUTTLE", Form("CopyFileLocally - Could not remove existing target file %s!", target.Data()));
                        return kFALSE;
                }
        }       
@@ -914,12 +952,12 @@ Bool_t AliShuttle::CopyFileLocally(const char* localFile, const TString& target)
 
        if (result == 0)
        {
-               Log("SHUTTLE", Form("StoreFileLocally - File %s stored locally to %s", localFile, target.Data()));
+               Log("SHUTTLE", Form("CopyFileLocally - File %s stored locally to %s", localFile, target.Data()));
                return kTRUE;
        }
        else
        {
-               Log("SHUTTLE", Form("StoreFileLocally - Could not store file %s to %s! Error code = %d", 
+               Log("SHUTTLE", Form("CopyFileLocally - Could not store file %s to %s! Error code = %d", 
                                localFile, target.Data(), result));
                return kFALSE;
        }       
@@ -971,7 +1009,15 @@ Bool_t AliShuttle::CopyFilesToGrid(const char* type)
                        return 0;
                }
                
-               dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/Raw", 
+               // TODO partitions with one detector only write data into LHCperiod_DET
+               TString partition = GetRunParameter("detector");
+       
+               if (partition.Length() > 0 && partition != "ALICE")
+               {
+                       lhcPeriod.Append(Form("_%s", partition.Data()));
+               }
+               
+               dir = Form("%s/GRP/RunMetadata/alice/data/%d/%s/%09d/raw", 
                                localBaseFolder.Data(), GetCurrentYear(), 
                                lhcPeriod.Data(), GetCurrentRun());
                alienDir = dir(dir.Index("/alice/data/"), dir.Length());
@@ -1361,6 +1407,8 @@ Bool_t AliShuttle::ContinueProcessing()
                        Log("SHUTTLE", Form("ContinueProcessing - %s: all objects "
                                "successfully stored into main storage",
                                fCurrentDetector.Data()));
+                       UpdateShuttleStatus(AliShuttleStatus::kDone);
+                       UpdateShuttleLogbook(fCurrentDetector, "DONE");
                } else {
                        Log("SHUTTLE",
                                Form("ContinueProcessing - %s: Grid storage failed again",
@@ -1503,20 +1551,27 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                fgkMainRefStorage = Form("alien://folder=/alice/data/%d/%s/Reference?user=alidaq?cacheFold=/tmp/OCDBCache", 
                                        GetCurrentYear(), lhcPeriod.Data());
        
-       AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
-       if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
-       AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
-       if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
-
        // Loop on detectors in the configuration
        TIter iter(fConfig->GetDetectors());
        TObjString* aDetector = 0;
 
+       Bool_t first = kTRUE;
+
        while ((aDetector = (TObjString*) iter.Next()))
        {
                fCurrentDetector = aDetector->String();
 
                if (ContinueProcessing() == kFALSE) continue;
+               
+               if (first)
+               {
+                 // only read QueryCDB when needed and only once
+                 AliCDBStorage *mainCDBSto = AliCDBManager::Instance()->GetStorage(fgkMainCDB);
+                 if(mainCDBSto) mainCDBSto->QueryCDB(GetCurrentRun());
+                 AliCDBStorage *mainRefSto = AliCDBManager::Instance()->GetStorage(fgkMainRefStorage);
+                 if(mainRefSto) mainRefSto->QueryCDB(GetCurrentRun());
+                 first = kFALSE;
+               }
 
                Log("SHUTTLE", Form("\t\t\t****** run %d - %s: START  ******",
                                                GetCurrentRun(), aDetector->GetName()));
@@ -1683,7 +1738,8 @@ Bool_t AliShuttle::Process(AliShuttleLogbookEntry* entry)
                        if (success) // Preprocessor finished successfully!
                        { 
                                // remove temporary folder
-                               gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
+                                // temporary commented (JF)
+                               //gSystem->Exec(Form("rm -rf %s",tmpDir.Data()));
                                
                                // Update time_processed field in FXS DB
                                if (UpdateTable() == kFALSE)
@@ -1848,8 +1904,8 @@ Bool_t AliShuttle::ProcessCurrentDetector()
                                                        " Sending mail to DCS experts!", host.Data()));
                                        UpdateShuttleStatus(AliShuttleStatus::kDCSError);
                                        
-                                       if (!SendMailToDCS())
-                                               Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+                                       //if (!SendMailToDCS())
+                                       //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
 
                                        delete dcsMap;
                                        return kFALSE;
@@ -1869,8 +1925,8 @@ Bool_t AliShuttle::ProcessCurrentDetector()
                                                        " Sending mail to DCS experts!", host.Data()));
                                        UpdateShuttleStatus(AliShuttleStatus::kDCSError);
                                        
-                                       if (!SendMailToDCS())
-                                               Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
+                                       //if (!SendMailToDCS())
+                                       //      Log("SHUTTLE", Form("ProcessCurrentDetector - Could not send mail to DCS experts!"));
                                        
                                        if (aliasMap) delete aliasMap;
                                        delete dcsMap;
@@ -1934,6 +1990,58 @@ Bool_t AliShuttle::ProcessCurrentDetector()
        return kTRUE;
 }
 
+//______________________________________________________________________________________________
+void AliShuttle::CountOpenRuns()
+{
+       // Query DAQ's Shuttle logbook and sends the number of open runs to ML
+       
+       // check connection, in case connect
+       if (!Connect(3)) 
+               return;
+
+       TString sqlQuery;
+       sqlQuery = Form("select count(*) from %s where shuttle_done=0", fConfig->GetShuttlelbTable());
+       
+       TSQLResult* aResult = fServer[3]->Query(sqlQuery);
+       if (!aResult) {
+               AliError(Form("Can't execute query <%s>!", sqlQuery.Data()));
+               return;
+       }
+
+       AliDebug(2,Form("Query = %s", sqlQuery.Data()));
+       
+       if (aResult->GetRowCount() == 0) {
+               AliError(Form("No result for query %s received", sqlQuery.Data()));
+               return;
+       }
+
+       if (aResult->GetFieldCount() != 1) {
+               AliError(Form("Invalid field count for query %s received", sqlQuery.Data()));
+               return;
+       }
+
+       TSQLRow* aRow = aResult->Next();
+       if (!aRow) {
+               AliError(Form("Could not receive result of query %s", sqlQuery.Data()));
+               return;
+       }
+       
+       TString result(aRow->GetField(0), aRow->GetFieldLength(0));
+       Int_t count = result.Atoi();
+       
+       Log("SHUTTLE", Form("%d unprocessed runs", count));
+       
+       delete aRow;
+       delete aResult;
+
+       TMonaLisaValue mlStatus("SHUTTLE_openruns", count);
+
+       TList mlList;
+       mlList.Add(&mlStatus);
+
+       fMonaLisa->SendParameters(&mlList, "__PROCESSINGINFO__");
+}
+
 //______________________________________________________________________________________________
 Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
                TObjArray& entries)
@@ -1945,7 +2053,7 @@ Bool_t AliShuttle::QueryShuttleLogbook(const char* whereClause,
        entries.SetOwner(1);
 
        // check connection, in case connect
-       if(!Connect(3)) return kFALSE;
+       if (!Connect(3)) return kFALSE;
 
        TString sqlQuery;
        sqlQuery = Form("select * from %s %s order by run", fConfig->GetShuttlelbTable(), whereClause);
@@ -2419,7 +2527,12 @@ TList* AliShuttle::GetFileSources(Int_t system, const char* detector, const char
        // if id is NULL all sources are returned (distinct)
        //
 
-       Log(detector, Form("GetFileSources - Retrieving sources with id %s from %s", id, GetSystemName(system)));
+       if (id)
+       {
+               Log(detector, Form("GetFileSources - Querying %s FXS for files with id %s produced by %s", GetSystemName(system), id, detector));
+       } else {
+               Log(detector, Form("GetFileSources - Querying %s FXS for files produced by %s", GetSystemName(system), detector));
+       }
        
        // check if test mode should simulate a FXS error
        if (fTestMode & kErrorFXSSources)
@@ -2995,8 +3108,8 @@ Bool_t AliShuttle::Collect(Int_t run)
        if (!fMonaLisa)
                fMonaLisa = new TMonaLisaWriter(fConfig->GetMonitorHost(), fConfig->GetMonitorTable());
                
-
        SendAlive();
+       CountOpenRuns();
 
        TString whereClause("where shuttle_done=0");
        if (run != -1)
@@ -3186,6 +3299,22 @@ Bool_t AliShuttle::SendMail()
        if (fTestMode != kNone)
                return kTRUE;
 
+       TString to="";
+       TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
+       TObjString *anExpert=0;
+       while ((anExpert = (TObjString*) iterExperts.Next()))
+       {
+               to += Form("%s,", anExpert->GetName());
+       }
+       if (to.Length() > 0)
+         to.Remove(to.Length()-1);
+       AliDebug(2, Form("to: %s",to.Data()));
+
+       if (to.IsNull()) {
+               Log("SHUTTLE", "List of detector responsibles not yet set!");
+               return kFALSE;
+       }
+
        void* dir = gSystem->OpenDirectory(GetShuttleLogDir());
        if (dir == NULL)
        {
@@ -3212,21 +3341,6 @@ Bool_t AliShuttle::SendMail()
                return kFALSE;
        }
 
-       TString to="";
-       TIter iterExperts(fConfig->GetResponsibles(fCurrentDetector));
-       TObjString *anExpert=0;
-       while ((anExpert = (TObjString*) iterExperts.Next()))
-       {
-               to += Form("%s,", anExpert->GetName());
-       }
-       to.Remove(to.Length()-1);
-       AliDebug(2, Form("to: %s",to.Data()));
-
-       if (to.IsNull()) {
-               Log("SHUTTLE", "List of detector responsibles not yet set!");
-               return kFALSE;
-       }
-
        TString cc="alberto.colla@cern.ch";
 
        TString subject = Form("%s Shuttle preprocessor FAILED in run %d (run type = %s)!",
@@ -3243,7 +3357,7 @@ Bool_t AliShuttle::SendMail()
        {
                body += Form("\thttp://pcalimonitor.cern.ch:8889/shuttle.jsp?time=168 \n\n");
        } else {
-               body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD?time=168 \n\n");
+               body += Form("\thttp://pcalimonitor.cern.ch/shuttle.jsp?instance=PROD&time=168 \n\n");
        }